From b7a18f2d1b7afe26d1816621178a52c527d773f4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Diego=20=27Flameeyes=27=20Petten=C3=B2?= Date: Sat, 22 Dec 2007 23:24:00 +0100 Subject: Move libmpeg2 inside video_dec/libmpeg2. --HG-- rename : src/libmpeg2/Makefile.am => src/video_dec/libmpeg2/Makefile.am rename : src/libmpeg2/cpu_state.c => src/video_dec/libmpeg2/cpu_state.c rename : src/libmpeg2/decode.c => src/video_dec/libmpeg2/decode.c rename : src/libmpeg2/header.c => src/video_dec/libmpeg2/header.c rename : src/libmpeg2/idct.c => src/video_dec/libmpeg2/idct.c rename : src/libmpeg2/idct_altivec.c => src/video_dec/libmpeg2/idct_altivec.c rename : src/libmpeg2/idct_mlib.c => src/video_dec/libmpeg2/idct_mlib.c rename : src/libmpeg2/idct_mlib.h => src/video_dec/libmpeg2/idct_mlib.h rename : src/libmpeg2/idct_mmx.c => src/video_dec/libmpeg2/idct_mmx.c rename : src/libmpeg2/libmpeg2_accel.c => src/video_dec/libmpeg2/libmpeg2_accel.c rename : src/libmpeg2/libmpeg2_accel.h => src/video_dec/libmpeg2/libmpeg2_accel.h rename : src/libmpeg2/motion_comp.c => src/video_dec/libmpeg2/motion_comp.c rename : src/libmpeg2/motion_comp_altivec.c => src/video_dec/libmpeg2/motion_comp_altivec.c rename : src/libmpeg2/motion_comp_mlib.c => src/video_dec/libmpeg2/motion_comp_mlib.c rename : src/libmpeg2/motion_comp_mmx.c => src/video_dec/libmpeg2/motion_comp_mmx.c rename : src/libmpeg2/motion_comp_vis.c => src/video_dec/libmpeg2/motion_comp_vis.c rename : src/libmpeg2/mpeg2.h => src/video_dec/libmpeg2/mpeg2.h rename : src/libmpeg2/mpeg2_internal.h => src/video_dec/libmpeg2/mpeg2_internal.h rename : src/libmpeg2/slice.c => src/video_dec/libmpeg2/slice.c rename : src/libmpeg2/slice_xvmc.c => src/video_dec/libmpeg2/slice_xvmc.c rename : src/libmpeg2/slice_xvmc_vld.c => src/video_dec/libmpeg2/slice_xvmc_vld.c rename : src/libmpeg2/stats.c => src/video_dec/libmpeg2/stats.c rename : src/libmpeg2/vis.h => src/video_dec/libmpeg2/vis.h rename : src/libmpeg2/vlc.h => src/video_dec/libmpeg2/vlc.h rename : src/libmpeg2/xine_mpeg2_decoder.c => src/video_dec/libmpeg2/xine_mpeg2_decoder.c rename : src/libmpeg2/xvmc.h => src/video_dec/libmpeg2/xvmc.h rename : src/libmpeg2/xvmc_vld.h => src/video_dec/libmpeg2/xvmc_vld.h --- src/Makefile.am | 1 - src/libmpeg2/Makefile.am | 32 - src/libmpeg2/cpu_state.c | 183 --- src/libmpeg2/decode.c | 1009 ------------- src/libmpeg2/header.c | 411 ----- src/libmpeg2/idct.c | 348 ----- src/libmpeg2/idct_altivec.c | 233 --- src/libmpeg2/idct_mlib.c | 62 - src/libmpeg2/idct_mlib.h | 25 - src/libmpeg2/idct_mmx.c | 740 --------- src/libmpeg2/libmpeg2_accel.c | 223 --- src/libmpeg2/libmpeg2_accel.h | 48 - src/libmpeg2/motion_comp.c | 154 -- src/libmpeg2/motion_comp_altivec.c | 2031 ------------------------- src/libmpeg2/motion_comp_mlib.c | 181 --- src/libmpeg2/motion_comp_mmx.c | 1012 ------------- src/libmpeg2/motion_comp_vis.c | 2059 -------------------------- src/libmpeg2/mpeg2.h | 98 -- src/libmpeg2/mpeg2_internal.h | 294 ---- src/libmpeg2/slice.c | 1833 ----------------------- src/libmpeg2/slice_xvmc.c | 1988 ------------------------- src/libmpeg2/slice_xvmc_vld.c | 225 --- src/libmpeg2/stats.c | 317 ---- src/libmpeg2/vis.h | 328 ---- src/libmpeg2/vlc.h | 428 ------ src/libmpeg2/xine_mpeg2_decoder.c | 169 --- src/libmpeg2/xvmc.h | 32 - src/libmpeg2/xvmc_vld.h | 32 - src/video_dec/Makefile.am | 2 + src/video_dec/libmpeg2/Makefile.am | 32 + src/video_dec/libmpeg2/cpu_state.c | 183 +++ src/video_dec/libmpeg2/decode.c | 1009 +++++++++++++ src/video_dec/libmpeg2/header.c | 411 +++++ src/video_dec/libmpeg2/idct.c | 348 +++++ src/video_dec/libmpeg2/idct_altivec.c | 233 +++ src/video_dec/libmpeg2/idct_mlib.c | 62 + src/video_dec/libmpeg2/idct_mlib.h | 25 + src/video_dec/libmpeg2/idct_mmx.c | 740 +++++++++ src/video_dec/libmpeg2/libmpeg2_accel.c | 223 +++ src/video_dec/libmpeg2/libmpeg2_accel.h | 48 + src/video_dec/libmpeg2/motion_comp.c | 154 ++ src/video_dec/libmpeg2/motion_comp_altivec.c | 2031 +++++++++++++++++++++++++ src/video_dec/libmpeg2/motion_comp_mlib.c | 181 +++ src/video_dec/libmpeg2/motion_comp_mmx.c | 1012 +++++++++++++ src/video_dec/libmpeg2/motion_comp_vis.c | 2059 ++++++++++++++++++++++++++ src/video_dec/libmpeg2/mpeg2.h | 98 ++ src/video_dec/libmpeg2/mpeg2_internal.h | 294 ++++ src/video_dec/libmpeg2/slice.c | 1833 +++++++++++++++++++++++ src/video_dec/libmpeg2/slice_xvmc.c | 1988 +++++++++++++++++++++++++ src/video_dec/libmpeg2/slice_xvmc_vld.c | 225 +++ src/video_dec/libmpeg2/stats.c | 317 ++++ src/video_dec/libmpeg2/vis.h | 328 ++++ src/video_dec/libmpeg2/vlc.h | 428 ++++++ src/video_dec/libmpeg2/xine_mpeg2_decoder.c | 169 +++ src/video_dec/libmpeg2/xvmc.h | 32 + src/video_dec/libmpeg2/xvmc_vld.h | 32 + 56 files changed, 14497 insertions(+), 14496 deletions(-) delete mode 100644 src/libmpeg2/Makefile.am delete mode 100644 src/libmpeg2/cpu_state.c delete mode 100644 src/libmpeg2/decode.c delete mode 100644 src/libmpeg2/header.c delete mode 100644 src/libmpeg2/idct.c delete mode 100644 src/libmpeg2/idct_altivec.c delete mode 100644 src/libmpeg2/idct_mlib.c delete mode 100644 src/libmpeg2/idct_mlib.h delete mode 100644 src/libmpeg2/idct_mmx.c delete mode 100644 src/libmpeg2/libmpeg2_accel.c delete mode 100644 src/libmpeg2/libmpeg2_accel.h delete mode 100644 src/libmpeg2/motion_comp.c delete mode 100644 src/libmpeg2/motion_comp_altivec.c delete mode 100644 src/libmpeg2/motion_comp_mlib.c delete mode 100644 src/libmpeg2/motion_comp_mmx.c delete mode 100644 src/libmpeg2/motion_comp_vis.c delete mode 100644 src/libmpeg2/mpeg2.h delete mode 100644 src/libmpeg2/mpeg2_internal.h delete mode 100644 src/libmpeg2/slice.c delete mode 100644 src/libmpeg2/slice_xvmc.c delete mode 100644 src/libmpeg2/slice_xvmc_vld.c delete mode 100644 src/libmpeg2/stats.c delete mode 100644 src/libmpeg2/vis.h delete mode 100644 src/libmpeg2/vlc.h delete mode 100644 src/libmpeg2/xine_mpeg2_decoder.c delete mode 100644 src/libmpeg2/xvmc.h delete mode 100644 src/libmpeg2/xvmc_vld.h create mode 100644 src/video_dec/libmpeg2/Makefile.am create mode 100644 src/video_dec/libmpeg2/cpu_state.c create mode 100644 src/video_dec/libmpeg2/decode.c create mode 100644 src/video_dec/libmpeg2/header.c create mode 100644 src/video_dec/libmpeg2/idct.c create mode 100644 src/video_dec/libmpeg2/idct_altivec.c create mode 100644 src/video_dec/libmpeg2/idct_mlib.c create mode 100644 src/video_dec/libmpeg2/idct_mlib.h create mode 100644 src/video_dec/libmpeg2/idct_mmx.c create mode 100644 src/video_dec/libmpeg2/libmpeg2_accel.c create mode 100644 src/video_dec/libmpeg2/libmpeg2_accel.h create mode 100644 src/video_dec/libmpeg2/motion_comp.c create mode 100644 src/video_dec/libmpeg2/motion_comp_altivec.c create mode 100644 src/video_dec/libmpeg2/motion_comp_mlib.c create mode 100644 src/video_dec/libmpeg2/motion_comp_mmx.c create mode 100644 src/video_dec/libmpeg2/motion_comp_vis.c create mode 100644 src/video_dec/libmpeg2/mpeg2.h create mode 100644 src/video_dec/libmpeg2/mpeg2_internal.h create mode 100644 src/video_dec/libmpeg2/slice.c create mode 100644 src/video_dec/libmpeg2/slice_xvmc.c create mode 100644 src/video_dec/libmpeg2/slice_xvmc_vld.c create mode 100644 src/video_dec/libmpeg2/stats.c create mode 100644 src/video_dec/libmpeg2/vis.h create mode 100644 src/video_dec/libmpeg2/vlc.h create mode 100644 src/video_dec/libmpeg2/xine_mpeg2_decoder.c create mode 100644 src/video_dec/libmpeg2/xvmc.h create mode 100644 src/video_dec/libmpeg2/xvmc_vld.h (limited to 'src') diff --git a/src/Makefile.am b/src/Makefile.am index 4b16acb35..a94673fb7 100644 --- a/src/Makefile.am +++ b/src/Makefile.am @@ -11,7 +11,6 @@ SUBDIRS = \ dxr3 \ input \ demuxers \ - libmpeg2 \ libspudec \ libspucc \ libspucmml \ diff --git a/src/libmpeg2/Makefile.am b/src/libmpeg2/Makefile.am deleted file mode 100644 index d772f0e09..000000000 --- a/src/libmpeg2/Makefile.am +++ /dev/null @@ -1,32 +0,0 @@ -include $(top_srcdir)/misc/Makefile.common - -AM_CFLAGS = $(DEFAULT_OCFLAGS) $(VISIBILITY_FLAG) -AM_LDFLAGS = $(xineplug_ldflags) - -noinst_HEADERS = vlc.h mpeg2.h xvmc.h xvmc_vld.h mpeg2_internal.h idct_mlib.h vis.h \ - libmpeg2_accel.h - -xineplug_LTLIBRARIES = xineplug_decode_mpeg2.la - -xineplug_decode_mpeg2_la_SOURCES = \ - cpu_state.c \ - decode.c \ - header.c \ - idct.c \ - idct_altivec.c \ - idct_mlib.c \ - idct_mmx.c \ - motion_comp.c \ - motion_comp_altivec.c \ - motion_comp_mmx.c \ - motion_comp_mlib.c \ - motion_comp_vis.c \ - slice.c \ - slice_xvmc.c \ - slice_xvmc_vld.c \ - stats.c \ - xine_mpeg2_decoder.c \ - libmpeg2_accel.c - -xineplug_decode_mpeg2_la_LIBADD = $(MLIB_LIBS) $(XINE_LIB) -lm -xineplug_decode_mpeg2_la_CFLAGS = $(AM_CFLAGS) $(MLIB_CFLAGS) diff --git a/src/libmpeg2/cpu_state.c b/src/libmpeg2/cpu_state.c deleted file mode 100644 index 12963644c..000000000 --- a/src/libmpeg2/cpu_state.c +++ /dev/null @@ -1,183 +0,0 @@ -/* - * cpu_state.c - * Copyright (C) 2000-2003 Michel Lespinasse - * Copyright (C) 1999-2000 Aaron Holtzman - * - * This file is part of mpeg2dec, a free MPEG-2 video stream decoder. - * See http://libmpeg2.sourceforge.net/ for updates. - * - * mpeg2dec is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * mpeg2dec is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA - */ - -#include "config.h" - -#include -#include - -#include "mpeg2_internal.h" -#include - -void (* mpeg2_cpu_state_save) (cpu_state_t * state) = NULL; -void (* mpeg2_cpu_state_restore) (cpu_state_t * state) = NULL; - -#if defined(ARCH_X86) || defined(ARCH_X86_64) -static void state_restore_mmx (cpu_state_t * state) -{ - emms (); -} -#endif - -#if defined (ARCH_PPC) && defined (ENABLE_ALTIVEC) - -#ifndef HOST_OS_DARWIN - -static void state_save_altivec (cpu_state_t * state) -{ - asm (" \n" - " li %r9, 16 \n" - " stvx %v20, 0, %r3 \n" - " li %r11, 32 \n" - " stvx %v21, %r9, %r3 \n" - " li %r9, 48 \n" - " stvx %v22, %r11, %r3 \n" - " li %r11, 64 \n" - " stvx %v23, %r9, %r3 \n" - " li %r9, 80 \n" - " stvx %v24, %r11, %r3 \n" - " li %r11, 96 \n" - " stvx %v25, %r9, %r3 \n" - " li %r9, 112 \n" - " stvx %v26, %r11, %r3 \n" - " li %r11, 128 \n" - " stvx %v27, %r9, %r3 \n" - " li %r9, 144 \n" - " stvx %v28, %r11, %r3 \n" - " li %r11, 160 \n" - " stvx %v29, %r9, %r3 \n" - " li %r9, 176 \n" - " stvx %v30, %r11, %r3 \n" - " stvx %v31, %r9, %r3 \n" - ); -} - -static void state_restore_altivec (cpu_state_t * state) -{ - asm (" \n" - " li %r9, 16 \n" - " lvx %v20, 0, %r3 \n" - " li %r11, 32 \n" - " lvx %v21, %r9, %r3 \n" - " li %r9, 48 \n" - " lvx %v22, %r11, %r3 \n" - " li %r11, 64 \n" - " lvx %v23, %r9, %r3 \n" - " li %r9, 80 \n" - " lvx %v24, %r11, %r3 \n" - " li %r11, 96 \n" - " lvx %v25, %r9, %r3 \n" - " li %r9, 112 \n" - " lvx %v26, %r11, %r3 \n" - " li %r11, 128 \n" - " lvx %v27, %r9, %r3 \n" - " li %r9, 144 \n" - " lvx %v28, %r11, %r3 \n" - " li %r11, 160 \n" - " lvx %v29, %r9, %r3 \n" - " li %r9, 176 \n" - " lvx %v30, %r11, %r3 \n" - " lvx %v31, %r9, %r3 \n" - ); -} - -#else /* HOST_OS_DARWIN */ - -#define LI(a,b) "li r" #a "," #b "\n\t" -#define STVX0(a,b,c) "stvx v" #a ",0,r" #c "\n\t" -#define STVX(a,b,c) "stvx v" #a ",r" #b ",r" #c "\n\t" -#define LVX0(a,b,c) "lvx v" #a ",0,r" #c "\n\t" -#define LVX(a,b,c) "lvx v" #a ",r" #b ",r" #c "\n\t" - -static void state_save_altivec (cpu_state_t * state) -{ - asm (LI (9, 16) - STVX0 (20, 0, 3) - LI (11, 32) - STVX (21, 9, 3) - LI (9, 48) - STVX (22, 11, 3) - LI (11, 64) - STVX (23, 9, 3) - LI (9, 80) - STVX (24, 11, 3) - LI (11, 96) - STVX (25, 9, 3) - LI (9, 112) - STVX (26, 11, 3) - LI (11, 128) - STVX (27, 9, 3) - LI (9, 144) - STVX (28, 11, 3) - LI (11, 160) - STVX (29, 9, 3) - LI (9, 176) - STVX (30, 11, 3) - STVX (31, 9, 3)); -} - -static void state_restore_altivec (cpu_state_t * state) -{ - asm (LI (9, 16) - LVX0 (20, 0, 3) - LI (11, 32) - LVX (21, 9, 3) - LI (9, 48) - LVX (22, 11, 3) - LI (11, 64) - LVX (23, 9, 3) - LI (9, 80) - LVX (24, 11, 3) - LI (11, 96) - LVX (25, 9, 3) - LI (9, 112) - LVX (26, 11, 3) - LI (11, 128) - LVX (27, 9, 3) - LI (9, 144) - LVX (28, 11, 3) - LI (11, 160) - LVX (29, 9, 3) - LI (9, 176) - LVX (30, 11, 3) - LVX (31, 9, 3)); -} -#endif /* HOST_OS_DARWIN */ - -#endif /* defined (ARCH_PPC) && defined (ENABLE_ALTIVEC) */ - -void mpeg2_cpu_state_init (uint32_t mm_accel) -{ -#if defined(ARCH_X86) || defined(ARCH_X86_64) - if (mm_accel & MM_ACCEL_X86_MMX) { - mpeg2_cpu_state_restore = state_restore_mmx; - } -#endif -#if defined (ARCH_PPC) && defined (ENABLE_ALTIVEC) - if (mm_accel & MM_ACCEL_PPC_ALTIVEC) { - mpeg2_cpu_state_save = state_save_altivec; - mpeg2_cpu_state_restore = state_restore_altivec; - } -#endif -} - diff --git a/src/libmpeg2/decode.c b/src/libmpeg2/decode.c deleted file mode 100644 index 145d5f58b..000000000 --- a/src/libmpeg2/decode.c +++ /dev/null @@ -1,1009 +0,0 @@ -/* - * decode.c - * Copyright (C) 2000-2002 Michel Lespinasse - * Copyright (C) 1999-2000 Aaron Holtzman - * - * This file is part of mpeg2dec, a free MPEG-2 video stream decoder. - * See http://libmpeg2.sourceforge.net/ for updates. - * - * mpeg2dec is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * mpeg2dec is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA - * - * xine-specific version by G. Bartsch - * - */ - -#include "config.h" -#include -#include /* memcpy/memset, try to remove */ -#include -#include -#include - -#define LOG_MODULE "decode" -#define LOG_VERBOSE -/* -#define LOG -*/ - -#include -#include -#include "mpeg2.h" -#include "mpeg2_internal.h" -#include -#include "libmpeg2_accel.h" - -/* -#define LOG_PAN_SCAN -*/ - -/* #define BUFFER_SIZE (224 * 1024) */ -#define BUFFER_SIZE (1194 * 1024) /* new buffer size for mpeg2dec 0.2.1 */ - -static void process_userdata(mpeg2dec_t *mpeg2dec, uint8_t *buffer); - -void mpeg2_init (mpeg2dec_t * mpeg2dec, - xine_video_port_t * output) -{ - static int do_init = 1; - uint32_t mm_accel; - - if (do_init) { - do_init = 0; - mm_accel = xine_mm_accel(); - mpeg2_cpu_state_init (mm_accel); - mpeg2_idct_init (mm_accel); - mpeg2_mc_init (mm_accel); - libmpeg2_accel_scan(&mpeg2dec->accel, mpeg2_scan_norm, mpeg2_scan_alt); - } - - if( !mpeg2dec->chunk_buffer ) - mpeg2dec->chunk_buffer = xine_xmalloc_aligned (16, BUFFER_SIZE + 4, - &mpeg2dec->chunk_base); - if( !mpeg2dec->picture ) - mpeg2dec->picture = xine_xmalloc_aligned (16, sizeof (picture_t), - &mpeg2dec->picture_base); - - mpeg2dec->shift = 0xffffff00; - mpeg2dec->new_sequence = 0; - mpeg2dec->is_sequence_needed = 1; - mpeg2dec->is_wait_for_ip_frames = 2; - mpeg2dec->frames_to_drop = 0; - mpeg2dec->drop_frame = 0; - mpeg2dec->in_slice = 0; - mpeg2dec->output = output; - mpeg2dec->chunk_ptr = mpeg2dec->chunk_buffer; - mpeg2dec->code = 0xb4; - mpeg2dec->seek_mode = 0; - - /* initialize AFD storage */ - mpeg2dec->afd_value_seen = XINE_VIDEO_AFD_NOT_PRESENT; - mpeg2dec->afd_value_reported = (XINE_VIDEO_AFD_NOT_PRESENT - 1); - - memset (mpeg2dec->picture, 0, sizeof (picture_t)); - - /* initialize substructures */ - mpeg2_header_state_init (mpeg2dec->picture); - - if ( output->get_capabilities(output) & VO_CAP_XXMC) { - printf("libmpeg2: output port has XxMC capability\n"); - mpeg2dec->frame_format = XINE_IMGFMT_XXMC; - } else if( output->get_capabilities(output) & VO_CAP_XVMC_MOCOMP) { - printf("libmpeg2: output port has XvMC capability\n"); - mpeg2dec->frame_format = XINE_IMGFMT_XVMC; - } else { - mpeg2dec->frame_format = XINE_IMGFMT_YV12; - } -} - -static inline void get_frame_duration (mpeg2dec_t * mpeg2dec, vo_frame_t *frame) -{ - static const double durations[] = { - 0, /* invalid */ - 3753.75, /* 23.976 fps */ - 3750, /* 24 fps */ - 3600, /* 25 fps */ - 3003, /* 29.97 fps */ - 3000, /* 30 fps */ - 1800, /* 50 fps */ - 1501.5, /* 59.94 fps */ - 1500, /* 60 fps */ - }; - double duration = ((unsigned) mpeg2dec->picture->frame_rate_code > 8u) - ? 0 : durations[mpeg2dec->picture->frame_rate_code]; - - duration = duration * (mpeg2dec->picture->frame_rate_ext_n + 1.0) / - (mpeg2dec->picture->frame_rate_ext_d + 1.0); - - /* this should be used to detect any special rff pattern */ - mpeg2dec->rff_pattern = mpeg2dec->rff_pattern << 1; - mpeg2dec->rff_pattern |= !!frame->repeat_first_field; - - if( ((mpeg2dec->rff_pattern & 0xff) == 0xaa || - (mpeg2dec->rff_pattern & 0xff) == 0x55) && - !mpeg2dec->picture->progressive_sequence ) { - /* special case for ntsc 3:2 pulldown */ - duration *= 5.0 / 4.0; - } - else - { - if( frame->repeat_first_field ) { - if( !mpeg2dec->picture->progressive_sequence && - frame->progressive_frame ) { - /* decoder should output 3 fields, so adjust duration to - count on this extra field time */ - duration *= 3.0 / 2.0; - } else if( mpeg2dec->picture->progressive_sequence ) { - /* for progressive sequences the output should repeat the - frame 1 or 2 times depending on top_field_first flag. */ - duration *= (frame->top_field_first) ? 3 : 2; - } - } - } - - frame->duration = (int) ceil (duration); - _x_stream_info_set(mpeg2dec->stream, XINE_STREAM_INFO_FRAME_DURATION, frame->duration); - /*printf("mpeg2dec: rff=%u\n",frame->repeat_first_field);*/ -} - -static double get_aspect_ratio(mpeg2dec_t *mpeg2dec) -{ - double ratio; - picture_t * picture = mpeg2dec->picture; - double mpeg1_pel_ratio[16] = {1.0 /* forbidden */, - 1.0, 0.6735, 0.7031, 0.7615, 0.8055, 0.8437, 0.8935, 0.9157, - 0.9815, 1.0255, 1.0695, 1.0950, 1.1575, 1.2015, 1.0 /*reserved*/ }; - - /* TODO: For slower machines the value of this function should be computed - * once and cached! - */ - - if( !picture->mpeg1 ) { - /* these hardcoded values are defined on mpeg2 standard for - * aspect ratio. other values are reserved or forbidden. */ - switch(picture->aspect_ratio_information) { - case 2: - ratio = 4.0/3.0; - break; - case 3: - ratio = 16.0/9.0; - break; - case 4: - ratio = 2.11/1.0; - break; - case 1: - default: - ratio = (double)picture->coded_picture_width/(double)picture->coded_picture_height; - break; - } - } else { - /* mpeg1 constants refer to pixel aspect ratio */ - ratio = (double)picture->coded_picture_width/(double)picture->coded_picture_height; - ratio /= mpeg1_pel_ratio[picture->aspect_ratio_information]; - } - - return ratio; -} - -static void remember_metainfo (mpeg2dec_t *mpeg2dec) { - - picture_t * picture = mpeg2dec->picture; - - _x_stream_info_set(mpeg2dec->stream, XINE_STREAM_INFO_VIDEO_WIDTH, picture->display_width); - _x_stream_info_set(mpeg2dec->stream, XINE_STREAM_INFO_VIDEO_HEIGHT, picture->display_height); - _x_stream_info_set(mpeg2dec->stream, XINE_STREAM_INFO_VIDEO_RATIO, - ((double)10000 * get_aspect_ratio(mpeg2dec))); - - switch (mpeg2dec->picture->frame_rate_code) { - case 1: /* 23.976 fps */ - _x_stream_info_set(mpeg2dec->stream, XINE_STREAM_INFO_FRAME_DURATION, 3913); - break; - case 2: /* 24 fps */ - _x_stream_info_set(mpeg2dec->stream, XINE_STREAM_INFO_FRAME_DURATION, 3750); - break; - case 3: /* 25 fps */ - _x_stream_info_set(mpeg2dec->stream, XINE_STREAM_INFO_FRAME_DURATION, 3600); - break; - case 4: /* 29.97 fps */ - _x_stream_info_set(mpeg2dec->stream, XINE_STREAM_INFO_FRAME_DURATION, 3003); - break; - case 5: /* 30 fps */ - _x_stream_info_set(mpeg2dec->stream, XINE_STREAM_INFO_FRAME_DURATION, 3000); - break; - case 6: /* 50 fps */ - _x_stream_info_set(mpeg2dec->stream, XINE_STREAM_INFO_FRAME_DURATION, 1800); - break; - case 7: /* 59.94 fps */ - _x_stream_info_set(mpeg2dec->stream, XINE_STREAM_INFO_FRAME_DURATION, 1525); - break; - case 8: /* 60 fps */ - _x_stream_info_set(mpeg2dec->stream, XINE_STREAM_INFO_FRAME_DURATION, 1509); - break; - default: - /* printf ("invalid/unknown frame rate code : %d \n", - frame->frame_rate_code); */ - _x_stream_info_set(mpeg2dec->stream, XINE_STREAM_INFO_FRAME_DURATION, 3000); - } - - _x_meta_info_set_utf8(mpeg2dec->stream, XINE_META_INFO_VIDEOCODEC, "MPEG (libmpeg2)"); -} - -static inline int parse_chunk (mpeg2dec_t * mpeg2dec, int code, - uint8_t * buffer, int next_code) -{ - picture_t * picture; - int is_frame_done; - double ratio; - - /* wait for sequence_header_code */ - if (mpeg2dec->is_sequence_needed) { - if (code != 0xb3) { - /* printf ("libmpeg2: waiting for sequence header\n"); */ - mpeg2dec->pts = 0; - return 0; - } - } - if (mpeg2dec->is_frame_needed) { - /* printf ("libmpeg2: waiting for frame start\n"); */ - mpeg2dec->pts = 0; - if (mpeg2dec->picture->current_frame) - mpeg2dec->picture->current_frame->bad_frame = 1; - } - - mpeg2_stats (code, buffer); - - picture = mpeg2dec->picture; - is_frame_done = mpeg2dec->in_slice && ((!code) || (code >= 0xb0)); - - if (is_frame_done) - mpeg2dec->in_slice = 0; - - if (is_frame_done && picture->current_frame != NULL) { - - libmpeg2_accel_frame_completion(&mpeg2dec->accel, mpeg2dec->frame_format, - picture, code); - - if (((picture->picture_structure == FRAME_PICTURE) || - (picture->second_field)) ) { - - if (mpeg2dec->drop_frame) - picture->current_frame->bad_frame = 1; - - if (picture->picture_coding_type == B_TYPE) { - if( picture->current_frame && !picture->current_frame->drawn ) { - - /* hack against wrong mpeg1 pts */ - if (picture->mpeg1) - picture->current_frame->pts = 0; - - get_frame_duration(mpeg2dec, picture->current_frame); - mpeg2dec->frames_to_drop = picture->current_frame->draw (picture->current_frame, mpeg2dec->stream); - picture->current_frame->drawn = 1; - } - } else if (picture->forward_reference_frame && !picture->forward_reference_frame->drawn) { - get_frame_duration(mpeg2dec, picture->forward_reference_frame); - mpeg2dec->frames_to_drop = picture->forward_reference_frame->draw (picture->forward_reference_frame, - mpeg2dec->stream); - picture->forward_reference_frame->drawn = 1; - } - } - } - - switch (code) { - case 0x00: /* picture_start_code */ - if (mpeg2_header_picture (picture, buffer)) { - fprintf (stderr, "bad picture header\n"); - abort(); - } - - mpeg2dec->is_frame_needed=0; - - if (!picture->second_field) { - /* find out if we want to skip this frame */ - mpeg2dec->drop_frame = 0; - - /* picture->skip_non_intra_dct = (mpeg2dec->frames_to_drop>0) ; */ - - switch (picture->picture_coding_type) { - case B_TYPE: - - lprintf ("B-Frame\n"); - - if (mpeg2dec->frames_to_drop>1) { - lprintf ("dropping b-frame because frames_to_drop==%d\n", - mpeg2dec->frames_to_drop); - mpeg2dec->drop_frame = 1; - } else if (!picture->forward_reference_frame || picture->forward_reference_frame->bad_frame - || !picture->backward_reference_frame || picture->backward_reference_frame->bad_frame) { -#ifdef LOG - printf ("libmpeg2: dropping b-frame because ref is bad ("); - if (picture->forward_reference_frame) - printf ("fw ref frame %d, bad %d;", picture->forward_reference_frame->id, - picture->forward_reference_frame->bad_frame); - else - printf ("fw ref frame not there;"); - if (picture->backward_reference_frame) - printf ("bw ref frame %d, bad %d)\n", picture->backward_reference_frame->id, - picture->backward_reference_frame->bad_frame); - else - printf ("fw ref frame not there)\n"); -#endif - mpeg2dec->drop_frame = 1; - } else if (mpeg2dec->is_wait_for_ip_frames > 0) { - lprintf("dropping b-frame because refs are invalid\n"); - mpeg2dec->drop_frame = 1; - } - break; - - case P_TYPE: - - lprintf ("P-Frame\n"); - - if (mpeg2dec->frames_to_drop>2) { - mpeg2dec->drop_frame = 1; - lprintf ("dropping p-frame because frames_to_drop==%d\n", - mpeg2dec->frames_to_drop); - } else if (!picture->backward_reference_frame || picture->backward_reference_frame->bad_frame) { - mpeg2dec->drop_frame = 1; -#ifdef LOG - if (!picture->backward_reference_frame) - printf ("libmpeg2: dropping p-frame because no ref frame\n"); - else - printf ("libmpeg2: dropping p-frame because ref %d is bad\n", picture->backward_reference_frame->id); -#endif - } else if (mpeg2dec->is_wait_for_ip_frames > 1) { - lprintf("dropping p-frame because ref is invalid\n"); - mpeg2dec->drop_frame = 1; - } else if (mpeg2dec->is_wait_for_ip_frames) - mpeg2dec->is_wait_for_ip_frames--; - - break; - - case I_TYPE: - lprintf ("I-Frame\n"); - /* for the sake of dvd menus, never drop i-frames - if (mpeg2dec->frames_to_drop>4) { - mpeg2dec->drop_frame = 1; - } - */ - - if (mpeg2dec->is_wait_for_ip_frames) - mpeg2dec->is_wait_for_ip_frames--; - - break; - } - } - - break; - - case 0xb2: /* user data code */ - process_userdata(mpeg2dec, buffer); - break; - - case 0xb3: /* sequence_header_code */ - if (mpeg2_header_sequence (picture, buffer)) { - fprintf (stderr, "bad sequence header\n"); - /* abort(); */ - break; - } - - /* reset AFD value to detect absence */ - mpeg2dec->afd_value_seen = XINE_VIDEO_AFD_NOT_PRESENT; - - /* according to ISO/IEC 13818-2, an extension start code will follow. - * Otherwise the stream follows ISO/IEC 11172-2 which means MPEG1 */ - picture->mpeg1 = (next_code != 0xb5); - - if (mpeg2dec->force_aspect) picture->aspect_ratio_information = mpeg2dec->force_aspect; - - if (mpeg2dec->is_sequence_needed ) { - mpeg2dec->new_sequence = 1; - } - - if (mpeg2dec->is_sequence_needed - || (picture->aspect_ratio_information != picture->saved_aspect_ratio) - || (picture->frame_width != picture->coded_picture_width) - || (picture->frame_height != picture->coded_picture_height)) { - xine_event_t event; - xine_format_change_data_t data; - - remember_metainfo (mpeg2dec); - event.type = XINE_EVENT_FRAME_FORMAT_CHANGE; - event.stream = mpeg2dec->stream; - event.data = &data; - event.data_length = sizeof(data); - data.width = picture->coded_picture_width; - data.height = picture->coded_picture_height; - data.aspect = picture->aspect_ratio_information; - data.pan_scan = mpeg2dec->force_pan_scan; - xine_event_send(mpeg2dec->stream, &event); - - _x_stream_info_set(mpeg2dec->stream,XINE_STREAM_INFO_VIDEO_WIDTH, - picture->display_width); - _x_stream_info_set(mpeg2dec->stream,XINE_STREAM_INFO_VIDEO_HEIGHT, - picture->display_height); - - if (picture->forward_reference_frame && - picture->forward_reference_frame != picture->current_frame && - picture->forward_reference_frame != picture->backward_reference_frame) - picture->forward_reference_frame->free (picture->forward_reference_frame); - - if (picture->backward_reference_frame && - picture->backward_reference_frame != picture->current_frame) - picture->backward_reference_frame->free (picture->backward_reference_frame); - - mpeg2dec->is_sequence_needed = 0; - picture->forward_reference_frame = NULL; - picture->backward_reference_frame = NULL; - - picture->frame_width = picture->coded_picture_width; - picture->frame_height = picture->coded_picture_height; - picture->saved_aspect_ratio = picture->aspect_ratio_information; - } - break; - - case 0xb5: /* extension_start_code */ - if (mpeg2_header_extension (picture, buffer)) { - fprintf (stderr, "bad extension\n"); - abort(); - } - break; - - case 0xb7: /* sequence end code */ -#ifdef LOG_PAN_SCAN - printf ("libmpeg2: sequence end code not handled\n"); -#endif - case 0xb8: /* group of pictures start code */ - if (mpeg2_header_group_of_pictures (picture, buffer)) { - printf ("libmpeg2: bad group of pictures\n"); - abort(); - } - default: - if ((code >= 0xb9) && (code != 0xe4)) { - printf("Not multiplexed? 0x%x\n",code); - } - if (code >= 0xb0) - break; - - /* check for AFD change once per picture */ - if (mpeg2dec->afd_value_reported != mpeg2dec->afd_value_seen) { - /* AFD data should better be stored in current_frame to have it */ - /* ready and synchronous with other data like width or height. */ - /* An AFD change should then be detected when a new frame is emitted */ - /* from the decoder to report the AFD change in display order and not */ - /* in decoding order like it happens below for now. */ - _x_stream_info_set(mpeg2dec->stream, XINE_STREAM_INFO_VIDEO_AFD, mpeg2dec->afd_value_seen); - lprintf ("AFD changed from %d to %d\n", mpeg2dec->afd_value_reported, mpeg2dec->afd_value_seen); - mpeg2dec->afd_value_reported = mpeg2dec->afd_value_seen; - } - - if (!(mpeg2dec->in_slice)) { - mpeg2dec->in_slice = 1; - - if (picture->second_field) { - if (picture->current_frame) - picture->current_frame->field(picture->current_frame, - picture->picture_structure); - else - mpeg2dec->drop_frame = 1; - } else { - int flags = picture->picture_structure; - - if (!picture->mpeg1) flags |= VO_INTERLACED_FLAG; - if (mpeg2dec->force_pan_scan) flags |= VO_PAN_SCAN_FLAG; - if (mpeg2dec->new_sequence) flags |= VO_NEW_SEQUENCE_FLAG; - - if ( picture->current_frame && - picture->current_frame != picture->backward_reference_frame && - picture->current_frame != picture->forward_reference_frame ) { - picture->current_frame->free (picture->current_frame); - } - if (picture->picture_coding_type == B_TYPE) { - ratio = get_aspect_ratio(mpeg2dec); - picture->current_frame = - mpeg2dec->stream->video_out->get_frame (mpeg2dec->stream->video_out, - picture->coded_picture_width, - picture->coded_picture_height, - ratio, - mpeg2dec->frame_format, - flags); - libmpeg2_accel_new_frame( &mpeg2dec->accel, mpeg2dec->frame_format, - picture, ratio, flags); - } else { - ratio = get_aspect_ratio(mpeg2dec); - picture->current_frame = - mpeg2dec->stream->video_out->get_frame (mpeg2dec->stream->video_out, - picture->coded_picture_width, - picture->coded_picture_height, - ratio, - mpeg2dec->frame_format, - flags); - - libmpeg2_accel_new_frame( &mpeg2dec->accel, mpeg2dec->frame_format, - picture, ratio, flags); - - if (picture->forward_reference_frame && - picture->forward_reference_frame != picture->backward_reference_frame) - picture->forward_reference_frame->free (picture->forward_reference_frame); - - picture->forward_reference_frame = - picture->backward_reference_frame; - picture->backward_reference_frame = picture->current_frame; - } - - if(mpeg2dec->new_sequence) - mpeg2dec->new_sequence = - libmpeg2_accel_new_sequence(&mpeg2dec->accel, mpeg2dec->frame_format, - picture); - - picture->current_frame->bad_frame = 1; - picture->current_frame->drawn = 0; - picture->current_frame->pts = mpeg2dec->pts; - picture->current_frame->top_field_first = picture->top_field_first; - picture->current_frame->repeat_first_field = picture->repeat_first_field; - picture->current_frame->progressive_frame = picture->progressive_frame; - picture->current_frame->crop_right = picture->coded_picture_width - picture->display_width; - picture->current_frame->crop_bottom = picture->coded_picture_height - picture->display_height; - - switch( picture->picture_coding_type ) { - case I_TYPE: - picture->current_frame->picture_coding_type = XINE_PICT_I_TYPE; - break; - case P_TYPE: - picture->current_frame->picture_coding_type = XINE_PICT_P_TYPE; - break; - case B_TYPE: - picture->current_frame->picture_coding_type = XINE_PICT_B_TYPE; - break; - case D_TYPE: - picture->current_frame->picture_coding_type = XINE_PICT_D_TYPE; - break; - } - - lprintf ("decoding frame %d, type %s\n", - picture->current_frame->id, picture->picture_coding_type == I_TYPE ? "I" : - picture->picture_coding_type == P_TYPE ? "P" : "B"); - mpeg2dec->pts = 0; - /*printf("Starting to decode frame %d\n",picture->current_frame->id);*/ - } - } - - if (!mpeg2dec->drop_frame && picture->current_frame != NULL) { -#ifdef DEBUG_LOG - printf("slice target %08x past %08x future %08x\n",picture->current_frame,picture->forward_reference_frame,picture->backward_reference_frame); - fflush(stdout); -#endif - libmpeg2_accel_slice(&mpeg2dec->accel, picture, code, buffer, mpeg2dec->chunk_size, - mpeg2dec->chunk_buffer); - - if( picture->v_offset > picture->limit_y || - picture->v_offset + 16 > picture->display_height ) { - picture->current_frame->bad_frame = 0; - } - } - } - - /* printf ("libmpeg2: parse_chunk %d completed\n", code); */ - return is_frame_done; -} - -static inline int find_start_code (mpeg2dec_t * mpeg2dec, - uint8_t ** current, uint8_t * limit) -{ - uint8_t * p; - - if (*current >= limit) - return 0; - if (mpeg2dec->shift == 0x00000100) - return 1; - - mpeg2dec->shift = (mpeg2dec->shift | *(*current)++) << 8; - - if (*current >= limit) - return 0; - if (mpeg2dec->shift == 0x00000100) - return 1; - - mpeg2dec->shift = (mpeg2dec->shift | *(*current)++) << 8; - - if (*current >= limit) - return 0; - if (mpeg2dec->shift == 0x00000100) - return 1; - - limit--; - - if (*current >= limit) { - mpeg2dec->shift = (mpeg2dec->shift | *(*current)++) << 8; - return 0; - } - - p = *current; - - while (p < limit && (p = (uint8_t *)memchr(p, 0x01, limit - p))) { - if (p[-2] || p[-1]) - p += 3; - else { - *current = ++p; - return 1; - } - } - - *current = ++limit; - p = limit - 3; - mpeg2dec->shift = (mpeg2dec->shift | *p++) << 8; - mpeg2dec->shift = (mpeg2dec->shift | *p++) << 8; - mpeg2dec->shift = (mpeg2dec->shift | *p++) << 8; - - return 0; -} - -static inline uint8_t * copy_chunk (mpeg2dec_t * mpeg2dec, - uint8_t * current, uint8_t * end) -{ - uint8_t * limit; - uint8_t * data = current; - int found, bite; - - /* sequence end code 0xb7 doesn't have any data and there might be the case - * that no start code will follow this code for quite some time (e. g. in case - * of a still image. - * Therefore, return immediately with a chunk_size of 0. Setting code to 0xb4 - * will eat up any trailing garbage next time. - */ - if (mpeg2dec->code == 0xb7) { - mpeg2dec->code = 0xb4; - mpeg2dec->chunk_size = 0; - return current; - } - - limit = current + (mpeg2dec->chunk_buffer + BUFFER_SIZE - mpeg2dec->chunk_ptr); - if (limit > end) - limit = end; - - found = find_start_code(mpeg2dec, ¤t, limit); - bite = current - data; - if (bite) { - xine_fast_memcpy(mpeg2dec->chunk_ptr, data, bite); - mpeg2dec->chunk_ptr += bite; - } - - if (found) { - mpeg2dec->code = *current++; - mpeg2dec->chunk_size = mpeg2dec->chunk_ptr - mpeg2dec->chunk_buffer - 3; - mpeg2dec->chunk_ptr = mpeg2dec->chunk_buffer; - mpeg2dec->shift = 0xffffff00; - return current; - } - - if (current == end) - return NULL; - - /* we filled the chunk buffer without finding a start code */ - mpeg2dec->code = 0xb4; /* sequence_error_code */ - mpeg2dec->chunk_ptr = mpeg2dec->chunk_buffer; - return current; -} - -int mpeg2_decode_data (mpeg2dec_t * mpeg2dec, uint8_t * current, uint8_t * end, - uint64_t pts) -{ - int ret; - uint8_t code; - - ret = 0; - if (mpeg2dec->seek_mode) { - mpeg2dec->chunk_ptr = mpeg2dec->chunk_buffer; - mpeg2dec->code = 0xb4; - mpeg2dec->seek_mode = 0; - mpeg2dec->shift = 0xffffff00; - mpeg2dec->is_frame_needed = 1; - } - - if (pts) - mpeg2dec->pts = pts; - - while (current != end || mpeg2dec->code == 0xb7) { - code = mpeg2dec->code; - current = copy_chunk (mpeg2dec, current, end); - if (current == NULL) - break; - ret += parse_chunk (mpeg2dec, code, mpeg2dec->chunk_buffer, mpeg2dec->code); - } - - libmpeg2_accel_frame_completion(&mpeg2dec->accel, mpeg2dec->frame_format, - mpeg2dec->picture, 0xff); - - return ret; -} - -void mpeg2_discontinuity (mpeg2dec_t * mpeg2dec) { - picture_t *picture = mpeg2dec->picture; - - if( !picture ) - return; - - mpeg2dec->in_slice = 0; - mpeg2dec->pts = 0; - if ( picture->current_frame ) - picture->current_frame->pts = 0; - if ( picture->forward_reference_frame ) - picture->forward_reference_frame->pts = 0; - if ( picture->backward_reference_frame ) - picture->backward_reference_frame->pts = 0; - - libmpeg2_accel_discontinuity(&mpeg2dec->accel, mpeg2dec->frame_format, picture); -} - -void mpeg2_reset (mpeg2dec_t * mpeg2dec) { - - picture_t *picture = mpeg2dec->picture; - - if( !picture ) - return; - - mpeg2_discontinuity(mpeg2dec); - - if( !picture->mpeg1 ) { - mpeg2dec->is_wait_for_ip_frames = 2; - - /* mark current frames as bad so they won't make to screen */ - if ( picture->current_frame ) - picture->current_frame->bad_frame=1; - if (picture->forward_reference_frame ) - picture->forward_reference_frame->bad_frame=1; - if (picture->backward_reference_frame) - picture->backward_reference_frame->bad_frame=1; - - } else { - /* to free reference frames one also needs to fix slice.c to - * abort when they are NULL. unfortunately it seems to break - * DVD menus. - * - * ...so let's do this for mpeg-1 only :) - */ - if ( picture->current_frame && - picture->current_frame != picture->backward_reference_frame && - picture->current_frame != picture->forward_reference_frame ) - picture->current_frame->free (picture->current_frame); - picture->current_frame = NULL; - - if (picture->forward_reference_frame && - picture->forward_reference_frame != picture->backward_reference_frame) - picture->forward_reference_frame->free (picture->forward_reference_frame); - picture->forward_reference_frame = NULL; - - if (picture->backward_reference_frame) - picture->backward_reference_frame->free (picture->backward_reference_frame); - picture->backward_reference_frame = NULL; - } - - mpeg2dec->in_slice = 0; - mpeg2dec->seek_mode = 1; - -} - -void mpeg2_flush (mpeg2dec_t * mpeg2dec) { - - picture_t *picture = mpeg2dec->picture; - - if (!picture) - return; - - if (picture->current_frame && !picture->current_frame->drawn && - !picture->current_frame->bad_frame) { - - lprintf ("blasting out current frame %d on flush\n", - picture->current_frame->id); - - picture->current_frame->drawn = 1; - get_frame_duration(mpeg2dec, picture->current_frame); - - picture->current_frame->pts = 0; - picture->current_frame->draw(picture->current_frame, mpeg2dec->stream); - } - -} - -void mpeg2_close (mpeg2dec_t * mpeg2dec) -{ - picture_t *picture = mpeg2dec->picture; - - /* - { - static uint8_t finalizer[] = {0,0,1,0xb4}; - mpeg2_decode_data (mpeg2dec, finalizer, finalizer+4, 0); - } - */ - - /* - dont remove any picture->*->free() below. doing so will cause buffer - leak, and we only have about 15 of them. - */ - - if ( picture->current_frame ) { - if( !picture->current_frame->drawn ) { - lprintf ("blasting out current frame on close\n"); - picture->current_frame->pts = 0; - get_frame_duration(mpeg2dec, picture->current_frame); - picture->current_frame->draw (picture->current_frame, mpeg2dec->stream); - picture->current_frame->drawn = 1; - } - - if( picture->current_frame != picture->backward_reference_frame && - picture->current_frame != picture->forward_reference_frame ) { - picture->current_frame->free (picture->current_frame); - } - picture->current_frame = NULL; - } - - if (picture->forward_reference_frame && - picture->forward_reference_frame != picture->backward_reference_frame) { - picture->forward_reference_frame->free (picture->forward_reference_frame); - picture->forward_reference_frame = NULL; - } - - if (picture->backward_reference_frame) { - if( !picture->backward_reference_frame->drawn) { - lprintf ("blasting out backward reference frame on close\n"); - picture->backward_reference_frame->pts = 0; - get_frame_duration(mpeg2dec, picture->backward_reference_frame); - picture->backward_reference_frame->draw (picture->backward_reference_frame, mpeg2dec->stream); - picture->backward_reference_frame->drawn = 1; - } - picture->backward_reference_frame->free (picture->backward_reference_frame); - picture->backward_reference_frame = NULL; - } - - if ( mpeg2dec->chunk_buffer ) { - free (mpeg2dec->chunk_base); - mpeg2dec->chunk_buffer = NULL; - } - - if ( mpeg2dec->picture ) { - free (mpeg2dec->picture_base); - mpeg2dec->picture = NULL; - } - - if ( mpeg2dec->cc_dec) { - /* dispose the closed caption decoder */ - mpeg2dec->cc_dec->dispose(mpeg2dec->cc_dec); - mpeg2dec->cc_dec = NULL; - } -} - -void mpeg2_find_sequence_header (mpeg2dec_t * mpeg2dec, - uint8_t * current, uint8_t * end){ - - uint8_t code, next_code; - picture_t *picture = mpeg2dec->picture; - - mpeg2dec->seek_mode = 1; - - while (current != end) { - code = mpeg2dec->code; - current = copy_chunk (mpeg2dec, current, end); - if (current == NULL) - return ; - next_code = mpeg2dec->code; - - /* printf ("looking for sequence header... %02x\n", code); */ - - mpeg2_stats (code, mpeg2dec->chunk_buffer); - - if (code == 0xb3) { /* sequence_header_code */ - if (mpeg2_header_sequence (picture, mpeg2dec->chunk_buffer)) { - printf ("libmpeg2: bad sequence header\n"); - continue; - } - - /* according to ISO/IEC 13818-2, an extension start code will follow. - * Otherwise the stream follows ISO/IEC 11172-2 which means MPEG1 */ - picture->mpeg1 = (next_code != 0xb5); - - if (mpeg2dec->force_aspect) picture->aspect_ratio_information = mpeg2dec->force_aspect; - - if (mpeg2dec->is_sequence_needed) { - xine_event_t event; - xine_format_change_data_t data; - - mpeg2dec->new_sequence = 1; - - mpeg2dec->is_sequence_needed = 0; - picture->frame_width = picture->coded_picture_width; - picture->frame_height = picture->coded_picture_height; - - remember_metainfo (mpeg2dec); - - event.type = XINE_EVENT_FRAME_FORMAT_CHANGE; - event.stream = mpeg2dec->stream; - event.data = &data; - event.data_length = sizeof(data); - data.width = picture->coded_picture_width; - data.height = picture->coded_picture_height; - data.aspect = picture->aspect_ratio_information; - data.pan_scan = mpeg2dec->force_pan_scan; - xine_event_send(mpeg2dec->stream, &event); - - _x_stream_info_set(mpeg2dec->stream, XINE_STREAM_INFO_VIDEO_WIDTH, - picture->display_width); - _x_stream_info_set(mpeg2dec->stream, XINE_STREAM_INFO_VIDEO_HEIGHT, - picture->display_height); - } - } else if (code == 0xb5) { /* extension_start_code */ - if (mpeg2_header_extension (picture, mpeg2dec->chunk_buffer)) { - printf ("libmpeg2: bad extension\n"); - continue ; - } - } - } -} - -/* Find the end of the userdata field in an MPEG-2 stream */ -static uint8_t *find_end(uint8_t *buffer) -{ - uint8_t *current = buffer; - while(1) { - if (current[0] == 0 && current[1] == 0 && current[2] == 1) - break; - current++; - } - return current; -} - -static void process_userdata(mpeg2dec_t *mpeg2dec, uint8_t *buffer) -{ - /* check if user data denotes closed captions */ - if (buffer[0] == 'C' && buffer[1] == 'C') { - - if (!mpeg2dec->cc_dec) { - xine_event_t event; - xine_format_change_data_t data; - - /* open the closed caption decoder first */ - mpeg2dec->cc_dec = _x_get_spu_decoder(mpeg2dec->stream, (BUF_SPU_CC >> 16) & 0xff); - - /* send a frame format event so that the CC decoder knows the initial image size */ - event.type = XINE_EVENT_FRAME_FORMAT_CHANGE; - event.stream = mpeg2dec->stream; - event.data = &data; - event.data_length = sizeof(data); - data.width = mpeg2dec->picture->coded_picture_width; - data.height = mpeg2dec->picture->coded_picture_height; - data.aspect = mpeg2dec->picture->aspect_ratio_information; - data.pan_scan = mpeg2dec->force_pan_scan; - xine_event_send(mpeg2dec->stream, &event); - - _x_stream_info_set(mpeg2dec->stream, XINE_STREAM_INFO_VIDEO_WIDTH, - mpeg2dec->picture->display_width); - _x_stream_info_set(mpeg2dec->stream, XINE_STREAM_INFO_VIDEO_HEIGHT, - mpeg2dec->picture->display_height); - } - - if (mpeg2dec->cc_dec) { - buf_element_t buf; - - buf.type = BUF_SPU_CC; - buf.content = &buffer[2]; - buf.pts = mpeg2dec->pts; - buf.size = find_end(buffer) - &buffer[2]; - buf.decoder_flags = 0; - - mpeg2dec->cc_dec->decode_data(mpeg2dec->cc_dec, &buf); - } - } - /* check Active Format Description ETSI TS 101 154 V1.5.1 */ - else if (buffer[0] == 0x44 && buffer[1] == 0x54 && buffer[2] == 0x47 && buffer[3] == 0x31) - mpeg2dec->afd_value_seen = (buffer[4] & 0x40) ? (buffer[5] & 0x0f) : XINE_VIDEO_AFD_NOT_PRESENT; -} diff --git a/src/libmpeg2/header.c b/src/libmpeg2/header.c deleted file mode 100644 index 12ba0ff8a..000000000 --- a/src/libmpeg2/header.c +++ /dev/null @@ -1,411 +0,0 @@ -/* - * header.c - * Copyright (C) 2000-2002 Michel Lespinasse - * Copyright (C) 1999-2000 Aaron Holtzman - * - * This file is part of mpeg2dec, a free MPEG-2 video stream decoder. - * See http://libmpeg2.sourceforge.net/ for updates. - * - * mpeg2dec is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * mpeg2dec is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA - */ - -/* -#define LOG_PAN_SCAN -*/ - -#include "config.h" - -#include /* For printf debugging */ -#include - -#include "mpeg2_internal.h" -#include - -/* default intra quant matrix, in zig-zag order */ -static const uint8_t default_intra_quantizer_matrix[64] ATTR_ALIGN(16) = { - 8, - 16, 16, - 19, 16, 19, - 22, 22, 22, 22, - 22, 22, 26, 24, 26, - 27, 27, 27, 26, 26, 26, - 26, 27, 27, 27, 29, 29, 29, - 34, 34, 34, 29, 29, 29, 27, 27, - 29, 29, 32, 32, 34, 34, 37, - 38, 37, 35, 35, 34, 35, - 38, 38, 40, 40, 40, - 48, 48, 46, 46, - 56, 56, 58, - 69, 69, - 83 -}; - -uint8_t mpeg2_scan_norm[64] ATTR_ALIGN(16) = -{ - /* Zig-Zag scan pattern */ - 0, 1, 8,16, 9, 2, 3,10, - 17,24,32,25,18,11, 4, 5, - 12,19,26,33,40,48,41,34, - 27,20,13, 6, 7,14,21,28, - 35,42,49,56,57,50,43,36, - 29,22,15,23,30,37,44,51, - 58,59,52,45,38,31,39,46, - 53,60,61,54,47,55,62,63 -}; - -uint8_t mpeg2_scan_alt[64] ATTR_ALIGN(16) = -{ - /* Alternate scan pattern */ - 0,8,16,24,1,9,2,10,17,25,32,40,48,56,57,49, - 41,33,26,18,3,11,4,12,19,27,34,42,50,58,35,43, - 51,59,20,28,5,13,6,14,21,29,36,44,52,60,37,45, - 53,61,22,30,7,15,23,31,38,46,54,62,39,47,55,63 -}; - -/* count must be between 1 and 32 */ -static uint32_t get_bits(uint8_t *buffer, uint32_t count, uint32_t *bit_position) { - uint32_t byte_offset; - uint32_t bit_offset; - uint32_t bit_mask; - uint32_t bit_bite; - uint32_t result=0; - if (count == 0) return 0; - do { - byte_offset = *bit_position >> 3; /* Div 8 */ - bit_offset = 8 - (*bit_position & 0x7); /* Bits got 87654321 */ - bit_mask = ((1 << (bit_offset)) - 1); - bit_bite = bit_offset; - if (count < bit_offset) { - bit_mask ^= ((1 << (bit_offset-count)) - 1); - bit_bite = count; - } - /* - printf("Byte=0x%02x Bitmask=0x%04x byte_offset=%u bit_offset=%u bit_byte=%u count=%u\n",buffer[byte_offset], bit_mask, byte_offset, bit_offset, bit_bite,count); - */ - result = (result << bit_bite) | ((buffer[byte_offset] & bit_mask) >> (bit_offset-bit_bite)); - *bit_position+=bit_bite; - count-=bit_bite; - } while ((count > 0) && (byte_offset<50) ); - return result; -} - -void mpeg2_header_state_init (picture_t * picture) -{ - picture->scan = mpeg2_scan_norm; - picture->load_intra_quantizer_matrix = 1; - picture->load_non_intra_quantizer_matrix = 1; -} - -int mpeg2_header_sequence (picture_t * picture, uint8_t * buffer) -{ - int width, height; - int i; - - if ((buffer[6] & 0x20) != 0x20) - return 1; /* missing marker_bit */ - - height = (buffer[0] << 16) | (buffer[1] << 8) | buffer[2]; - - picture->display_width = width = (height >> 12); - picture->display_height = height = (height & 0xfff); - - width = (width + 15) & ~15; - height = (height + 15) & ~15; - - if ((width > 1920) || (height > 1152)) - return 1; /* size restrictions for MP@HL */ - - picture->coded_picture_width = width; - picture->coded_picture_height = height; - - /* this is not used by the decoder */ - picture->aspect_ratio_information = buffer[3] >> 4; - picture->frame_rate_code = buffer[3] & 15; - picture->bitrate = (buffer[4]<<10)|(buffer[5]<<2)|(buffer[6]>>6); - - if (buffer[7] & 2) { - for (i = 0; i < 64; i++) - picture->intra_quantizer_matrix[mpeg2_scan_norm[i]] = - (buffer[i+7] << 7) | (buffer[i+8] >> 1); - buffer += 64; - } else - for (i = 0; i < 64; i++) - picture->intra_quantizer_matrix[mpeg2_scan_norm[i]] = - default_intra_quantizer_matrix [i]; - - if (buffer[7] & 1) - for (i = 0; i < 64; i++) - picture->non_intra_quantizer_matrix[mpeg2_scan_norm[i]] = - buffer[i+8]; - else - for (i = 0; i < 64; i++) - picture->non_intra_quantizer_matrix[i] = 16; - picture->load_intra_quantizer_matrix = 1; - picture->load_non_intra_quantizer_matrix = 1; - /* MPEG1 - for testing only */ - picture->mpeg1 = 1; - picture->intra_dc_precision = 0; - picture->frame_pred_frame_dct = 1; - picture->q_scale_type = 0; - picture->concealment_motion_vectors = 0; - /* picture->alternate_scan = 0; */ - picture->picture_structure = FRAME_PICTURE; - /* picture->second_field = 0; */ - - return 0; -} - -static int sequence_extension (picture_t * picture, uint8_t * buffer) -{ - /* check chroma format, size extensions, marker bit */ - if (((buffer[1] & 0x07) != 0x02) || (buffer[2] & 0xe0) || - ((buffer[3] & 0x01) != 0x01)) - return 1; - - /* this is not used by the decoder */ - picture->progressive_sequence = (buffer[1] >> 3) & 1; - - picture->low_delay = buffer[5] & 0x80; - - if (!picture->progressive_sequence) - picture->coded_picture_height = - (picture->coded_picture_height + 31) & ~31; - - - /* printf ("libmpeg2: low_delay : %d\n", picture->low_delay); */ - -/* - printf ("libmpeg2: sequence extension+5 : %08x (%d)\n", - buffer[5], buffer[5] % 0x80); - */ - - picture->frame_rate_ext_n = buffer[5] & 0x31; - picture->frame_rate_ext_d = (buffer[5] >> 2) & 0x03; - - /* MPEG1 - for testing only */ - picture->mpeg1 = 0; - - return 0; -} - -static int quant_matrix_extension (picture_t * picture, uint8_t * buffer) -{ - int i; - - if (buffer[0] & 8) { - for (i = 0; i < 64; i++) - picture->intra_quantizer_matrix[mpeg2_scan_norm[i]] = - (buffer[i] << 5) | (buffer[i+1] >> 3); - buffer += 64; - } - - if (buffer[0] & 4) - for (i = 0; i < 64; i++) - picture->non_intra_quantizer_matrix[mpeg2_scan_norm[i]] = - (buffer[i] << 6) | (buffer[i+1] >> 2); - - return 0; -} - -static int picture_coding_extension (picture_t * picture, uint8_t * buffer) -{ - /* pre subtract 1 for use later in compute_motion_vector */ - picture->f_motion.f_code[0] = (buffer[0] & 15) - 1; - picture->f_motion.f_code[1] = (buffer[1] >> 4) - 1; - picture->b_motion.f_code[0] = (buffer[1] & 15) - 1; - picture->b_motion.f_code[1] = (buffer[2] >> 4) - 1; - - picture->intra_dc_precision = (buffer[2] >> 2) & 3; - picture->picture_structure = buffer[2] & 3; - picture->frame_pred_frame_dct = (buffer[3] >> 6) & 1; - picture->concealment_motion_vectors = (buffer[3] >> 5) & 1; - picture->q_scale_type = (buffer[3] >> 4) & 1; - picture->intra_vlc_format = (buffer[3] >> 3) & 1; - - if (buffer[3] & 4) /* alternate_scan */ - picture->scan = mpeg2_scan_alt; - else - picture->scan = mpeg2_scan_norm; - - /* these are not used by the decoder */ - picture->top_field_first = buffer[3] >> 7; - picture->repeat_first_field = (buffer[3] >> 1) & 1; - picture->progressive_frame = buffer[4] >> 7; - - return 0; -} - -static int sequence_display_extension (picture_t * picture, uint8_t * buffer) { - /* FIXME: implement. */ - uint32_t bit_position; - uint32_t padding; - - bit_position = 0; - padding = get_bits(buffer, 4, &bit_position); - picture->video_format = get_bits(buffer, 3, &bit_position); - picture->colour_description = get_bits(buffer, 1, &bit_position); - if(picture->colour_description) { - picture->colour_primatives = get_bits(buffer, 8, &bit_position); - picture->transfer_characteristics = get_bits(buffer, 8, &bit_position); - picture->matrix_coefficients = get_bits(buffer, 8, &bit_position); - } - picture->display_horizontal_size = get_bits(buffer, 14, &bit_position); - padding = get_bits(buffer, 1, &bit_position); - picture->display_vertical_size = get_bits(buffer, 14, &bit_position); - -#ifdef LOG_PAN_SCAN - printf("Sequence_display_extension\n"); - printf(" video_format: %u\n", picture->video_format); - printf(" colour_description: %u\n", picture->colour_description); - if(picture->colour_description) { - printf(" colour_primatives: %u\n", picture->colour_primatives); - printf(" transfer_characteristics %u\n", picture->transfer_characteristics); - printf(" matrix_coefficients %u\n", picture->matrix_coefficients); - } - printf(" display_horizontal_size %u\n", picture->display_horizontal_size); - printf(" display_vertical_size %u\n", picture->display_vertical_size); -#endif - - return 0; -} - -static int picture_display_extension (picture_t * picture, uint8_t * buffer) { - uint32_t bit_position; - uint32_t padding; - -#ifdef LOG_PAN_SCAN - printf ("libmpeg2: picture_display_extension\n"); -#endif - - bit_position = 0; - padding = get_bits(buffer, 4, &bit_position); - picture->frame_centre_horizontal_offset = get_bits(buffer, 16, &bit_position); - padding = get_bits(buffer, 1, &bit_position); - picture->frame_centre_vertical_offset = get_bits(buffer, 16, &bit_position); - padding = get_bits(buffer, 1, &bit_position); - -#ifdef LOG_PAN_SCAN - printf("Pan & Scan centre (x,y) = (%u, %u)\n", - picture->frame_centre_horizontal_offset, - picture->frame_centre_vertical_offset); -#endif - - return 0; -} - -int mpeg2_header_extension (picture_t * picture, uint8_t * buffer) -{ - switch (buffer[0] & 0xf0) { - case 0x00: /* reserved */ - return 0; - - case 0x10: /* sequence extension */ - return sequence_extension (picture, buffer); - - case 0x20: /* sequence display extension for Pan & Scan */ - return sequence_display_extension (picture, buffer); - - case 0x30: /* quant matrix extension */ - return quant_matrix_extension (picture, buffer); - - case 0x40: /* copyright extension */ - return 0; - - case 0x50: /* sequence scalable extension */ - return 0; - - case 0x60: /* reserved */ - return 0; - - case 0x70: /* picture display extension for Pan & Scan */ - return picture_display_extension (picture, buffer); - - case 0x80: /* picture coding extension */ - return picture_coding_extension (picture, buffer); - - case 0x90: /* picture spacial scalable extension */ - return 0; - - case 0xA0: /* picture temporal scalable extension */ - return 0; - - case 0xB0: /* camera parameters extension */ - return 0; - - case 0xC0: /* ITU-T extension */ - return 0; - - case 0xD0: /* reserved */ - return 0; - - case 0xE0: /* reserved */ - return 0; - - case 0xF0: /* reserved */ - return 0; - } - - return 0; -} - -int mpeg2_header_group_of_pictures (picture_t * picture, uint8_t * buffer) { - uint32_t bit_position; - uint32_t padding; - bit_position = 0; - - picture->drop_frame_flag = get_bits(buffer, 1, &bit_position); - picture->time_code_hours = get_bits(buffer, 5, &bit_position); - picture->time_code_minutes = get_bits(buffer, 6, &bit_position); - padding = get_bits(buffer, 1, &bit_position); - picture->time_code_seconds = get_bits(buffer, 6, &bit_position); - picture->time_code_pictures = get_bits(buffer, 6, &bit_position); - picture->closed_gop = get_bits(buffer, 1, &bit_position); - picture->broken_link = get_bits(buffer, 1, &bit_position); - -#ifdef LOG_PAN_SCAN - printf("Group of pictures\n"); - printf(" drop_frame_flag: %u\n", picture->drop_frame_flag); - printf(" time_code: HH:MM:SS:Pictures %02u:%02u:%02u:%02u\n", - picture->time_code_hours, - picture->time_code_minutes, - picture->time_code_seconds, - picture->time_code_pictures); - printf(" closed_gop: %u\n", picture->closed_gop); - printf(" bloken_link: %u\n", picture->broken_link); -#endif - - return 0; -} - -int mpeg2_header_picture (picture_t * picture, uint8_t * buffer) -{ - picture->picture_coding_type = (buffer [1] >> 3) & 7; - picture->vbv_delay = ((buffer[1] << 13) | (buffer[2] << 5) | - (buffer[3] >> 3)) & 0xffff; - - /* forward_f_code and backward_f_code - used in mpeg1 only */ - picture->f_motion.f_code[1] = (buffer[3] >> 2) & 1; - picture->f_motion.f_code[0] = - (((buffer[3] << 1) | (buffer[4] >> 7)) & 7) - 1; - picture->b_motion.f_code[1] = (buffer[4] >> 6) & 1; - picture->b_motion.f_code[0] = ((buffer[4] >> 3) & 7) - 1; - - /* move in header_process_picture_header */ - picture->second_field = - (picture->picture_structure != FRAME_PICTURE) && - !(picture->second_field); - - return 0; -} diff --git a/src/libmpeg2/idct.c b/src/libmpeg2/idct.c deleted file mode 100644 index 9f216db58..000000000 --- a/src/libmpeg2/idct.c +++ /dev/null @@ -1,348 +0,0 @@ -/* - * idct.c - * Copyright (C) 2000-2002 Michel Lespinasse - * Copyright (C) 1999-2000 Aaron Holtzman - * - * Portions of this code are from the MPEG software simulation group - * idct implementation. This code will be replaced with a new - * implementation soon. - * - * This file is part of mpeg2dec, a free MPEG-2 video stream decoder. - * See http://libmpeg2.sourceforge.net/ for updates. - * - * mpeg2dec is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * mpeg2dec is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA - */ - -/**********************************************************/ -/* inverse two dimensional DCT, Chen-Wang algorithm */ -/* (cf. IEEE ASSP-32, pp. 803-816, Aug. 1984) */ -/* 32-bit integer arithmetic (8 bit coefficients) */ -/* 11 mults, 29 adds per DCT */ -/* sE, 18.8.91 */ -/**********************************************************/ -/* coefficients extended to 12 bit for IEEE1180-1990 */ -/* compliance sE, 2.1.94 */ -/**********************************************************/ - -/* this code assumes >> to be a two's-complement arithmetic */ -/* right shift: (-2)>>1 == -1 , (-3)>>1 == -2 */ - -#include "config.h" - -#include -#include -#include - -#include "mpeg2_internal.h" -#include - -#define W1 2841 /* 2048*sqrt (2)*cos (1*pi/16) */ -#define W2 2676 /* 2048*sqrt (2)*cos (2*pi/16) */ -#define W3 2408 /* 2048*sqrt (2)*cos (3*pi/16) */ -#define W5 1609 /* 2048*sqrt (2)*cos (5*pi/16) */ -#define W6 1108 /* 2048*sqrt (2)*cos (6*pi/16) */ -#define W7 565 /* 2048*sqrt (2)*cos (7*pi/16) */ - -/* idct main entry points */ -void (* mpeg2_idct_copy) (int16_t * block, uint8_t * dest, int stride); -void (* mpeg2_idct_add) (int16_t * block, uint8_t * dest, int stride); -void (* mpeg2_idct) (int16_t * block); -void (* mpeg2_zero_block) (int16_t * block); - -static uint8_t clip_lut[1024]; -#define CLIP(i) ((clip_lut+384)[ (i)]) - -/* row (horizontal) IDCT - * - * 7 pi 1 - * dst[k] = sum c[l] * src[l] * cos ( -- * ( k + - ) * l ) - * l=0 8 2 - * - * where: c[0] = 128 - * c[1..7] = 128*sqrt (2) - */ - -static void inline idct_row (int16_t * block) -{ - int x0, x1, x2, x3, x4, x5, x6, x7, x8; - - x1 = block[4] << 11; - x2 = block[6]; - x3 = block[2]; - x4 = block[1]; - x5 = block[7]; - x6 = block[5]; - x7 = block[3]; - - /* shortcut */ - if (! (x1 | x2 | x3 | x4 | x5 | x6 | x7 )) { - block[0] = block[1] = block[2] = block[3] = block[4] = - block[5] = block[6] = block[7] = block[0]<<3; - return; - } - - x0 = (block[0] << 11) + 128; /* for proper rounding in the fourth stage */ - - /* first stage */ - x8 = W7 * (x4 + x5); - x4 = x8 + (W1 - W7) * x4; - x5 = x8 - (W1 + W7) * x5; - x8 = W3 * (x6 + x7); - x6 = x8 - (W3 - W5) * x6; - x7 = x8 - (W3 + W5) * x7; - - /* second stage */ - x8 = x0 + x1; - x0 -= x1; - x1 = W6 * (x3 + x2); - x2 = x1 - (W2 + W6) * x2; - x3 = x1 + (W2 - W6) * x3; - x1 = x4 + x6; - x4 -= x6; - x6 = x5 + x7; - x5 -= x7; - - /* third stage */ - x7 = x8 + x3; - x8 -= x3; - x3 = x0 + x2; - x0 -= x2; - x2 = (181 * (x4 + x5) + 128) >> 8; - x4 = (181 * (x4 - x5) + 128) >> 8; - - /* fourth stage */ - block[0] = (x7 + x1) >> 8; - block[1] = (x3 + x2) >> 8; - block[2] = (x0 + x4) >> 8; - block[3] = (x8 + x6) >> 8; - block[4] = (x8 - x6) >> 8; - block[5] = (x0 - x4) >> 8; - block[6] = (x3 - x2) >> 8; - block[7] = (x7 - x1) >> 8; -} - -/* column (vertical) IDCT - * - * 7 pi 1 - * dst[8*k] = sum c[l] * src[8*l] * cos ( -- * ( k + - ) * l ) - * l=0 8 2 - * - * where: c[0] = 1/1024 - * c[1..7] = (1/1024)*sqrt (2) - */ - -static void inline idct_col (int16_t *block) -{ - int x0, x1, x2, x3, x4, x5, x6, x7, x8; - - /* shortcut */ - x1 = block [8*4] << 8; - x2 = block [8*6]; - x3 = block [8*2]; - x4 = block [8*1]; - x5 = block [8*7]; - x6 = block [8*5]; - x7 = block [8*3]; - -#if 0 - if (! (x1 | x2 | x3 | x4 | x5 | x6 | x7 )) { - block[8*0] = block[8*1] = block[8*2] = block[8*3] = block[8*4] = - block[8*5] = block[8*6] = block[8*7] = (block[8*0] + 32) >> 6; - return; - } -#endif - - x0 = (block[8*0] << 8) + 8192; - - /* first stage */ - x8 = W7 * (x4 + x5) + 4; - x4 = (x8 + (W1 - W7) * x4) >> 3; - x5 = (x8 - (W1 + W7) * x5) >> 3; - x8 = W3 * (x6 + x7) + 4; - x6 = (x8 - (W3 - W5) * x6) >> 3; - x7 = (x8 - (W3 + W5) * x7) >> 3; - - /* second stage */ - x8 = x0 + x1; - x0 -= x1; - x1 = W6 * (x3 + x2) + 4; - x2 = (x1 - (W2 + W6) * x2) >> 3; - x3 = (x1 + (W2 - W6) * x3) >> 3; - x1 = x4 + x6; - x4 -= x6; - x6 = x5 + x7; - x5 -= x7; - - /* third stage */ - x7 = x8 + x3; - x8 -= x3; - x3 = x0 + x2; - x0 -= x2; - x2 = (181 * (x4 + x5) + 128) >> 8; - x4 = (181 * (x4 - x5) + 128) >> 8; - - /* fourth stage */ - block[8*0] = (x7 + x1) >> 14; - block[8*1] = (x3 + x2) >> 14; - block[8*2] = (x0 + x4) >> 14; - block[8*3] = (x8 + x6) >> 14; - block[8*4] = (x8 - x6) >> 14; - block[8*5] = (x0 - x4) >> 14; - block[8*6] = (x3 - x2) >> 14; - block[8*7] = (x7 - x1) >> 14; -} - -static void mpeg2_idct_copy_c (int16_t * block, uint8_t * dest, int stride) -{ - int i; - - for (i = 0; i < 8; i++) - idct_row (block + 8 * i); - - for (i = 0; i < 8; i++) - idct_col (block + i); - - i = 8; - do { - dest[0] = CLIP (block[0]); - dest[1] = CLIP (block[1]); - dest[2] = CLIP (block[2]); - dest[3] = CLIP (block[3]); - dest[4] = CLIP (block[4]); - dest[5] = CLIP (block[5]); - dest[6] = CLIP (block[6]); - dest[7] = CLIP (block[7]); - - block[0] = 0; block[1] = 0; block[2] = 0; block[3] = 0; - block[4] = 0; block[5] = 0; block[6] = 0; block[7] = 0; - - dest += stride; - block += 8; - } while (--i); -} - -static void mpeg2_idct_add_c (int16_t * block, uint8_t * dest, int stride) -{ - int i; - - for (i = 0; i < 8; i++) - idct_row (block + 8 * i); - - for (i = 0; i < 8; i++) - idct_col (block + i); - - i = 8; - do { - dest[0] = CLIP (block[0] + dest[0]); - dest[1] = CLIP (block[1] + dest[1]); - dest[2] = CLIP (block[2] + dest[2]); - dest[3] = CLIP (block[3] + dest[3]); - dest[4] = CLIP (block[4] + dest[4]); - dest[5] = CLIP (block[5] + dest[5]); - dest[6] = CLIP (block[6] + dest[6]); - dest[7] = CLIP (block[7] + dest[7]); - - block[0] = 0; block[1] = 0; block[2] = 0; block[3] = 0; - block[4] = 0; block[5] = 0; block[6] = 0; block[7] = 0; - - dest += stride; - block += 8; - } while (--i); -} - -static void mpeg2_idct_c (int16_t * block) -{ - int i; - - for (i = 0; i < 8; i++) - idct_row (block + 8 * i); - - for (i = 0; i < 8; i++) - idct_col (block + i); -} - -static void mpeg2_zero_block_c (int16_t * wblock) -{ - memset( wblock, 0, sizeof(int16_t) * 64 ); -} - -void mpeg2_idct_init (uint32_t mm_accel) -{ - mpeg2_zero_block = mpeg2_zero_block_c; - -#if defined(ARCH_X86) || defined(ARCH_X86_64) - if (mm_accel & MM_ACCEL_X86_MMXEXT) { -#ifdef LOG - fprintf (stderr, "Using MMXEXT for IDCT transform\n"); -#endif - mpeg2_idct_copy = mpeg2_idct_copy_mmxext; - mpeg2_idct_add = mpeg2_idct_add_mmxext; - mpeg2_idct = mpeg2_idct_mmxext; - mpeg2_zero_block = mpeg2_zero_block_mmx; - mpeg2_idct_mmx_init (); - } else if (mm_accel & MM_ACCEL_X86_MMX) { -#ifdef LOG - fprintf (stderr, "Using MMX for IDCT transform\n"); -#endif - mpeg2_idct_copy = mpeg2_idct_copy_mmx; - mpeg2_idct_add = mpeg2_idct_add_mmx; - mpeg2_idct = mpeg2_idct_mmx; - mpeg2_zero_block = mpeg2_zero_block_mmx; - mpeg2_idct_mmx_init (); - } else -#endif -#if defined (ARCH_PPC) && defined (ENABLE_ALTIVEC) - if (mm_accel & MM_ACCEL_PPC_ALTIVEC) { -#ifdef LOG - fprintf (stderr, "Using altivec for IDCT transform\n"); -#endif - mpeg2_idct_copy = mpeg2_idct_copy_altivec; - mpeg2_idct_add = mpeg2_idct_add_altivec; - mpeg2_idct_altivec_init (); - mpeg2_idct = mpeg2_idct_c; - } else -#endif -#ifdef LIBMPEG2_MLIB - if (mm_accel & MM_ACCEL_MLIB) { - char * env_var; - - env_var = getenv ("MLIB_NON_IEEE"); - - mpeg2_idct = mpeg2_idct_mlib; - if (env_var == NULL) { -#ifdef LOG - fprintf (stderr, "Using mlib for IDCT transform\n"); -#endif - mpeg2_idct_add = mpeg2_idct_add_mlib; - } else { - fprintf (stderr, "Using non-IEEE mlib for IDCT transform\n"); - mpeg2_idct_add = mpeg2_idct_add_mlib_non_ieee; - } - mpeg2_idct_copy = mpeg2_idct_copy_mlib_non_ieee; - } else -#endif - { - int i; - -#ifdef LOG - fprintf (stderr, "No accelerated IDCT transform found\n"); -#endif - mpeg2_idct_copy = mpeg2_idct_copy_c; - mpeg2_idct_add = mpeg2_idct_add_c; - mpeg2_idct = mpeg2_idct_c; - for (i = -384; i < 640; i++) - clip_lut[i+384] = (i < 0) ? 0 : ((i > 255) ? 255 : i); - } -} diff --git a/src/libmpeg2/idct_altivec.c b/src/libmpeg2/idct_altivec.c deleted file mode 100644 index de396560b..000000000 --- a/src/libmpeg2/idct_altivec.c +++ /dev/null @@ -1,233 +0,0 @@ -/* - * idct_altivec.c - * Copyright (C) 2000-2002 Michel Lespinasse - * Copyright (C) 1999-2000 Aaron Holtzman - * - * This file is part of mpeg2dec, a free MPEG-2 video stream decoder. - * See http://libmpeg2.sourceforge.net/ for updates. - * - * mpeg2dec is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * mpeg2dec is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA - */ - -#include "config.h" - -#if defined (ARCH_PPC) && defined (ENABLE_ALTIVEC) - -#include - -#include - -#include "mpeg2_internal.h" -#include - -#define vector_s16_t vector signed short -#define vector_u16_t vector unsigned short -#define vector_s8_t vector signed char -#define vector_u8_t vector unsigned char -#define vector_s32_t vector signed int -#define vector_u32_t vector unsigned int - -#define IDCT_HALF \ - /* 1st stage */ \ - t1 = vec_mradds (a1, vx7, vx1 ); \ - t8 = vec_mradds (a1, vx1, vec_subs (zero, vx7)); \ - t7 = vec_mradds (a2, vx5, vx3); \ - t3 = vec_mradds (ma2, vx3, vx5); \ - \ - /* 2nd stage */ \ - t5 = vec_adds (vx0, vx4); \ - t0 = vec_subs (vx0, vx4); \ - t2 = vec_mradds (a0, vx6, vx2); \ - t4 = vec_mradds (a0, vx2, vec_subs (zero,vx6)); \ - t6 = vec_adds (t8, t3); \ - t3 = vec_subs (t8, t3); \ - t8 = vec_subs (t1, t7); \ - t1 = vec_adds (t1, t7); \ - \ - /* 3rd stage */ \ - t7 = vec_adds (t5, t2); \ - t2 = vec_subs (t5, t2); \ - t5 = vec_adds (t0, t4); \ - t0 = vec_subs (t0, t4); \ - t4 = vec_subs (t8, t3); \ - t3 = vec_adds (t8, t3); \ - \ - /* 4th stage */ \ - vy0 = vec_adds (t7, t1); \ - vy7 = vec_subs (t7, t1); \ - vy1 = vec_mradds (c4, t3, t5); \ - vy6 = vec_mradds (mc4, t3, t5); \ - vy2 = vec_mradds (c4, t4, t0); \ - vy5 = vec_mradds (mc4, t4, t0); \ - vy3 = vec_adds (t2, t6); \ - vy4 = vec_subs (t2, t6); - -#define IDCT \ - vector_s16_t vx0, vx1, vx2, vx3, vx4, vx5, vx6, vx7; \ - vector_s16_t vy0, vy1, vy2, vy3, vy4, vy5, vy6, vy7; \ - vector_s16_t a0, a1, a2, ma2, c4, mc4, zero, bias; \ - vector_s16_t t0, t1, t2, t3, t4, t5, t6, t7, t8; \ - vector_u16_t shift; \ - \ - c4 = vec_splat (constants[0], 0); \ - a0 = vec_splat (constants[0], 1); \ - a1 = vec_splat (constants[0], 2); \ - a2 = vec_splat (constants[0], 3); \ - mc4 = vec_splat (constants[0], 4); \ - ma2 = vec_splat (constants[0], 5); \ - bias = (vector_s16_t)vec_splat ((vector_s32_t)constants[0], 3); \ - \ - zero = vec_splat_s16 (0); \ - shift = vec_splat_u16 (4); \ - \ - vx0 = vec_mradds (vec_sl (block[0], shift), constants[1], zero); \ - vx1 = vec_mradds (vec_sl (block[1], shift), constants[2], zero); \ - vx2 = vec_mradds (vec_sl (block[2], shift), constants[3], zero); \ - vx3 = vec_mradds (vec_sl (block[3], shift), constants[4], zero); \ - vx4 = vec_mradds (vec_sl (block[4], shift), constants[1], zero); \ - vx5 = vec_mradds (vec_sl (block[5], shift), constants[4], zero); \ - vx6 = vec_mradds (vec_sl (block[6], shift), constants[3], zero); \ - vx7 = vec_mradds (vec_sl (block[7], shift), constants[2], zero); \ - \ - IDCT_HALF \ - \ - vx0 = vec_mergeh (vy0, vy4); \ - vx1 = vec_mergel (vy0, vy4); \ - vx2 = vec_mergeh (vy1, vy5); \ - vx3 = vec_mergel (vy1, vy5); \ - vx4 = vec_mergeh (vy2, vy6); \ - vx5 = vec_mergel (vy2, vy6); \ - vx6 = vec_mergeh (vy3, vy7); \ - vx7 = vec_mergel (vy3, vy7); \ - \ - vy0 = vec_mergeh (vx0, vx4); \ - vy1 = vec_mergel (vx0, vx4); \ - vy2 = vec_mergeh (vx1, vx5); \ - vy3 = vec_mergel (vx1, vx5); \ - vy4 = vec_mergeh (vx2, vx6); \ - vy5 = vec_mergel (vx2, vx6); \ - vy6 = vec_mergeh (vx3, vx7); \ - vy7 = vec_mergel (vx3, vx7); \ - \ - vx0 = vec_adds (vec_mergeh (vy0, vy4), bias); \ - vx1 = vec_mergel (vy0, vy4); \ - vx2 = vec_mergeh (vy1, vy5); \ - vx3 = vec_mergel (vy1, vy5); \ - vx4 = vec_mergeh (vy2, vy6); \ - vx5 = vec_mergel (vy2, vy6); \ - vx6 = vec_mergeh (vy3, vy7); \ - vx7 = vec_mergel (vy3, vy7); \ - \ - IDCT_HALF \ - \ - shift = vec_splat_u16 (6); \ - vx0 = vec_sra (vy0, shift); \ - vx1 = vec_sra (vy1, shift); \ - vx2 = vec_sra (vy2, shift); \ - vx3 = vec_sra (vy3, shift); \ - vx4 = vec_sra (vy4, shift); \ - vx5 = vec_sra (vy5, shift); \ - vx6 = vec_sra (vy6, shift); \ - vx7 = vec_sra (vy7, shift); - -#if defined( __APPLE_CC__ ) && defined( __APPLE_ALTIVEC__ ) /* apple */ -#define VEC_S16(a,b,c,d,e,f,g,h) (vector_s16_t) (a, b, c, d, e, f, g, h) -#else /* gnu */ -#define VEC_S16(a,b,c,d,e,f,g,h) (vector_s16_t) {a, b, c, d, e, f, g, h} -#endif - -static vector_s16_t constants[5] = { - VEC_S16(23170, 13573, 6518, 21895, -23170, -21895, 32, 31), - VEC_S16(16384, 22725, 21407, 19266, 16384, 19266, 21407, 22725), - VEC_S16(22725, 31521, 29692, 26722, 22725, 26722, 29692, 31521), - VEC_S16(21407, 29692, 27969, 25172, 21407, 25172, 27969, 29692), - VEC_S16(19266, 26722, 25172, 22654, 19266, 22654, 25172, 26722) -}; - -void mpeg2_idct_copy_altivec (vector_s16_t * block, unsigned char * dest, - int stride) -{ - vector_u8_t tmp; - - IDCT - -#define COPY(dest,src) \ - tmp = vec_packsu (src, src); \ - vec_ste ((vector_u32_t)tmp, 0, (unsigned int *)dest); \ - vec_ste ((vector_u32_t)tmp, 4, (unsigned int *)dest); - - COPY (dest, vx0) dest += stride; - COPY (dest, vx1) dest += stride; - COPY (dest, vx2) dest += stride; - COPY (dest, vx3) dest += stride; - COPY (dest, vx4) dest += stride; - COPY (dest, vx5) dest += stride; - COPY (dest, vx6) dest += stride; - COPY (dest, vx7) - memset (block, 0, 64 * sizeof (signed short)); -} - -void mpeg2_idct_add_altivec (vector_s16_t * block, unsigned char * dest, - int stride) -{ - vector_u8_t tmp; - vector_s16_t tmp2, tmp3; - vector_u8_t perm0; - vector_u8_t perm1; - vector_u8_t p0, p1, p; - - IDCT - - p0 = vec_lvsl (0, dest); - p1 = vec_lvsl (stride, dest); - p = vec_splat_u8 (-1); - perm0 = vec_mergeh (p, p0); - perm1 = vec_mergeh (p, p1); - -#define ADD(dest,src,perm) \ - /* *(uint64_t *)&tmp = *(uint64_t *)dest; */ \ - tmp = vec_ld (0, dest); \ - tmp2 = (vector_s16_t)vec_perm (tmp, (vector_u8_t)zero, perm); \ - tmp3 = vec_adds (tmp2, src); \ - tmp = vec_packsu (tmp3, tmp3); \ - vec_ste ((vector_u32_t)tmp, 0, (unsigned int *)dest); \ - vec_ste ((vector_u32_t)tmp, 4, (unsigned int *)dest); - - ADD (dest, vx0, perm0) dest += stride; - ADD (dest, vx1, perm1) dest += stride; - ADD (dest, vx2, perm0) dest += stride; - ADD (dest, vx3, perm1) dest += stride; - ADD (dest, vx4, perm0) dest += stride; - ADD (dest, vx5, perm1) dest += stride; - ADD (dest, vx6, perm0) dest += stride; - ADD (dest, vx7, perm1) - memset (block, 0, 64 * sizeof (signed short)); -} - -void mpeg2_idct_altivec_init (void) -{ - int i, j; - - /* the altivec idct uses a transposed input, so we patch scan tables */ - for (i = 0; i < 64; i++) { - j = mpeg2_scan_norm[i]; - mpeg2_scan_norm[i] = (j >> 3) | ((j & 7) << 3); - j = mpeg2_scan_alt[i]; - mpeg2_scan_alt[i] = (j >> 3) | ((j & 7) << 3); - } -} - -#endif /* ARCH_PPC && ENABLED_ALTIVEC */ - diff --git a/src/libmpeg2/idct_mlib.c b/src/libmpeg2/idct_mlib.c deleted file mode 100644 index e573c9790..000000000 --- a/src/libmpeg2/idct_mlib.c +++ /dev/null @@ -1,62 +0,0 @@ -/* - * idct_mlib.c - * Copyright (C) 1999-2002 HÃ¥kan Hjort - * - * This file is part of mpeg2dec, a free MPEG-2 video stream decoder. - * See http://libmpeg2.sourceforge.net/ for updates. - * - * mpeg2dec is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * mpeg2dec is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA - */ - -#include "config.h" - -#ifdef LIBMPEG2_MLIB - -#include -#include -#include -#include -#include -#include - -#include "mpeg2_internal.h" - -void mpeg2_idct_add_mlib (int16_t * block, uint8_t * dest, int stride) -{ - mlib_VideoIDCT_IEEE_S16_S16 (block, block); - mlib_VideoAddBlock_U8_S16 (dest, block, stride); - memset (block, 0, 64 * sizeof (uint16_t)); -} - -void mpeg2_idct_copy_mlib_non_ieee (int16_t * block, uint8_t * dest, - int stride) -{ - mlib_VideoIDCT8x8_U8_S16 (dest, block, stride); - memset (block, 0, 64 * sizeof (uint16_t)); -} - -void mpeg2_idct_add_mlib_non_ieee (int16_t * block, uint8_t * dest, int stride) -{ - mlib_VideoIDCT8x8_S16_S16 (block, block); - mlib_VideoAddBlock_U8_S16 (dest, block, stride); - memset (block, 0, 64 * sizeof (uint16_t)); -} - -void mpeg2_idct_mlib (int16_t * block) -{ - mlib_VideoIDCT_IEEE_S16_S16 (block, block); -} - -#endif diff --git a/src/libmpeg2/idct_mlib.h b/src/libmpeg2/idct_mlib.h deleted file mode 100644 index 1fb0787dd..000000000 --- a/src/libmpeg2/idct_mlib.h +++ /dev/null @@ -1,25 +0,0 @@ -/* - * idct_mlib.h - * - * Copyright (C) 1999, HÃ¥kan Hjort - * - * This file is part of mpeg2dec, a free MPEG-2 video stream decoder. - * - * mpeg2dec is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2, or (at your option) - * any later version. - * - * mpeg2dec is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with GNU Make; see the file COPYING. If not, write to - * the Free Software Foundation, - * - */ - -void idct_block_copy_mlib (int16_t * block, uint8_t * dest, int stride); -void idct_block_add_mlib (int16_t * block, uint8_t * dest, int stride); diff --git a/src/libmpeg2/idct_mmx.c b/src/libmpeg2/idct_mmx.c deleted file mode 100644 index 6bb4bfbf0..000000000 --- a/src/libmpeg2/idct_mmx.c +++ /dev/null @@ -1,740 +0,0 @@ -/* - * idct_mmx.c - * Copyright (C) 2000-2002 Michel Lespinasse - * Copyright (C) 1999-2000 Aaron Holtzman - * - * This file is part of mpeg2dec, a free MPEG-2 video stream decoder. - * See http://libmpeg2.sourceforge.net/ for updates. - * - * mpeg2dec is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * mpeg2dec is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA - */ - -#include "config.h" - -#if defined(ARCH_X86) || defined(ARCH_X86_64) - -#include - -#include "mpeg2_internal.h" -#include - -#define ROW_SHIFT 11 -#define COL_SHIFT 6 - -#define round(bias) ((int)(((bias)+0.5) * (1<> ROW_SHIFT; - row[1] = (a1 + b1) >> ROW_SHIFT; - row[2] = (a2 + b2) >> ROW_SHIFT; - row[3] = (a3 + b3) >> ROW_SHIFT; - row[4] = (a3 - b3) >> ROW_SHIFT; - row[5] = (a2 - b2) >> ROW_SHIFT; - row[6] = (a1 - b1) >> ROW_SHIFT; - row[7] = (a0 - b0) >> ROW_SHIFT; -} -#endif - - -/* MMXEXT row IDCT */ - -#define mmxext_table(c1,c2,c3,c4,c5,c6,c7) { c4, c2, -c4, -c2, \ - c4, c6, c4, c6, \ - c1, c3, -c1, -c5, \ - c5, c7, c3, -c7, \ - c4, -c6, c4, -c6, \ - -c4, c2, c4, -c2, \ - c5, -c1, c3, -c1, \ - c7, c3, c7, -c5 } - -static inline void mmxext_row_head (int16_t * row, int offset, int16_t * table) -{ - movq_m2r (*(row+offset), mm2); // mm2 = x6 x4 x2 x0 - - movq_m2r (*(row+offset+4), mm5); // mm5 = x7 x5 x3 x1 - movq_r2r (mm2, mm0); // mm0 = x6 x4 x2 x0 - - movq_m2r (*table, mm3); // mm3 = -C2 -C4 C2 C4 - movq_r2r (mm5, mm6); // mm6 = x7 x5 x3 x1 - - movq_m2r (*(table+4), mm4); // mm4 = C6 C4 C6 C4 - pmaddwd_r2r (mm0, mm3); // mm3 = -C4*x4-C2*x6 C4*x0+C2*x2 - - pshufw_r2r (mm2, mm2, 0x4e); // mm2 = x2 x0 x6 x4 -} - -static inline void mmxext_row (int16_t * table, int32_t * rounder) -{ - movq_m2r (*(table+8), mm1); // mm1 = -C5 -C1 C3 C1 - pmaddwd_r2r (mm2, mm4); // mm4 = C4*x0+C6*x2 C4*x4+C6*x6 - - pmaddwd_m2r (*(table+16), mm0); // mm0 = C4*x4-C6*x6 C4*x0-C6*x2 - pshufw_r2r (mm6, mm6, 0x4e); // mm6 = x3 x1 x7 x5 - - movq_m2r (*(table+12), mm7); // mm7 = -C7 C3 C7 C5 - pmaddwd_r2r (mm5, mm1); // mm1 = -C1*x5-C5*x7 C1*x1+C3*x3 - - paddd_m2r (*rounder, mm3); // mm3 += rounder - pmaddwd_r2r (mm6, mm7); // mm7 = C3*x1-C7*x3 C5*x5+C7*x7 - - pmaddwd_m2r (*(table+20), mm2); // mm2 = C4*x0-C2*x2 -C4*x4+C2*x6 - paddd_r2r (mm4, mm3); // mm3 = a1 a0 + rounder - - pmaddwd_m2r (*(table+24), mm5); // mm5 = C3*x5-C1*x7 C5*x1-C1*x3 - movq_r2r (mm3, mm4); // mm4 = a1 a0 + rounder - - pmaddwd_m2r (*(table+28), mm6); // mm6 = C7*x1-C5*x3 C7*x5+C3*x7 - paddd_r2r (mm7, mm1); // mm1 = b1 b0 - - paddd_m2r (*rounder, mm0); // mm0 += rounder - psubd_r2r (mm1, mm3); // mm3 = a1-b1 a0-b0 + rounder - - psrad_i2r (ROW_SHIFT, mm3); // mm3 = y6 y7 - paddd_r2r (mm4, mm1); // mm1 = a1+b1 a0+b0 + rounder - - paddd_r2r (mm2, mm0); // mm0 = a3 a2 + rounder - psrad_i2r (ROW_SHIFT, mm1); // mm1 = y1 y0 - - paddd_r2r (mm6, mm5); // mm5 = b3 b2 - movq_r2r (mm0, mm4); // mm4 = a3 a2 + rounder - - paddd_r2r (mm5, mm0); // mm0 = a3+b3 a2+b2 + rounder - psubd_r2r (mm5, mm4); // mm4 = a3-b3 a2-b2 + rounder -} - -static inline void mmxext_row_tail (int16_t * row, int store) -{ - psrad_i2r (ROW_SHIFT, mm0); // mm0 = y3 y2 - - psrad_i2r (ROW_SHIFT, mm4); // mm4 = y4 y5 - - packssdw_r2r (mm0, mm1); // mm1 = y3 y2 y1 y0 - - packssdw_r2r (mm3, mm4); // mm4 = y6 y7 y4 y5 - - movq_r2m (mm1, *(row+store)); // save y3 y2 y1 y0 - pshufw_r2r (mm4, mm4, 0xb1); // mm4 = y7 y6 y5 y4 - - /* slot */ - - movq_r2m (mm4, *(row+store+4)); // save y7 y6 y5 y4 -} - -static inline void mmxext_row_mid (int16_t * row, int store, - int offset, int16_t * table) -{ - movq_m2r (*(row+offset), mm2); // mm2 = x6 x4 x2 x0 - psrad_i2r (ROW_SHIFT, mm0); // mm0 = y3 y2 - - movq_m2r (*(row+offset+4), mm5); // mm5 = x7 x5 x3 x1 - psrad_i2r (ROW_SHIFT, mm4); // mm4 = y4 y5 - - packssdw_r2r (mm0, mm1); // mm1 = y3 y2 y1 y0 - movq_r2r (mm5, mm6); // mm6 = x7 x5 x3 x1 - - packssdw_r2r (mm3, mm4); // mm4 = y6 y7 y4 y5 - movq_r2r (mm2, mm0); // mm0 = x6 x4 x2 x0 - - movq_r2m (mm1, *(row+store)); // save y3 y2 y1 y0 - pshufw_r2r (mm4, mm4, 0xb1); // mm4 = y7 y6 y5 y4 - - movq_m2r (*table, mm3); // mm3 = -C2 -C4 C2 C4 - movq_r2m (mm4, *(row+store+4)); // save y7 y6 y5 y4 - - pmaddwd_r2r (mm0, mm3); // mm3 = -C4*x4-C2*x6 C4*x0+C2*x2 - - movq_m2r (*(table+4), mm4); // mm4 = C6 C4 C6 C4 - pshufw_r2r (mm2, mm2, 0x4e); // mm2 = x2 x0 x6 x4 -} - - -/* MMX row IDCT */ - -#define mmx_table(c1,c2,c3,c4,c5,c6,c7) { c4, c2, c4, c6, \ - c4, c6, -c4, -c2, \ - c1, c3, c3, -c7, \ - c5, c7, -c1, -c5, \ - c4, -c6, c4, -c2, \ - -c4, c2, c4, -c6, \ - c5, -c1, c7, -c5, \ - c7, c3, c3, -c1 } - -static inline void mmx_row_head (int16_t * row, int offset, int16_t * table) -{ - movq_m2r (*(row+offset), mm2); // mm2 = x6 x4 x2 x0 - - movq_m2r (*(row+offset+4), mm5); // mm5 = x7 x5 x3 x1 - movq_r2r (mm2, mm0); // mm0 = x6 x4 x2 x0 - - movq_m2r (*table, mm3); // mm3 = C6 C4 C2 C4 - movq_r2r (mm5, mm6); // mm6 = x7 x5 x3 x1 - - punpckldq_r2r (mm0, mm0); // mm0 = x2 x0 x2 x0 - - movq_m2r (*(table+4), mm4); // mm4 = -C2 -C4 C6 C4 - pmaddwd_r2r (mm0, mm3); // mm3 = C4*x0+C6*x2 C4*x0+C2*x2 - - movq_m2r (*(table+8), mm1); // mm1 = -C7 C3 C3 C1 - punpckhdq_r2r (mm2, mm2); // mm2 = x6 x4 x6 x4 -} - -static inline void mmx_row (int16_t * table, int32_t * rounder) -{ - pmaddwd_r2r (mm2, mm4); // mm4 = -C4*x4-C2*x6 C4*x4+C6*x6 - punpckldq_r2r (mm5, mm5); // mm5 = x3 x1 x3 x1 - - pmaddwd_m2r (*(table+16), mm0); // mm0 = C4*x0-C2*x2 C4*x0-C6*x2 - punpckhdq_r2r (mm6, mm6); // mm6 = x7 x5 x7 x5 - - movq_m2r (*(table+12), mm7); // mm7 = -C5 -C1 C7 C5 - pmaddwd_r2r (mm5, mm1); // mm1 = C3*x1-C7*x3 C1*x1+C3*x3 - - paddd_m2r (*rounder, mm3); // mm3 += rounder - pmaddwd_r2r (mm6, mm7); // mm7 = -C1*x5-C5*x7 C5*x5+C7*x7 - - pmaddwd_m2r (*(table+20), mm2); // mm2 = C4*x4-C6*x6 -C4*x4+C2*x6 - paddd_r2r (mm4, mm3); // mm3 = a1 a0 + rounder - - pmaddwd_m2r (*(table+24), mm5); // mm5 = C7*x1-C5*x3 C5*x1-C1*x3 - movq_r2r (mm3, mm4); // mm4 = a1 a0 + rounder - - pmaddwd_m2r (*(table+28), mm6); // mm6 = C3*x5-C1*x7 C7*x5+C3*x7 - paddd_r2r (mm7, mm1); // mm1 = b1 b0 - - paddd_m2r (*rounder, mm0); // mm0 += rounder - psubd_r2r (mm1, mm3); // mm3 = a1-b1 a0-b0 + rounder - - psrad_i2r (ROW_SHIFT, mm3); // mm3 = y6 y7 - paddd_r2r (mm4, mm1); // mm1 = a1+b1 a0+b0 + rounder - - paddd_r2r (mm2, mm0); // mm0 = a3 a2 + rounder - psrad_i2r (ROW_SHIFT, mm1); // mm1 = y1 y0 - - paddd_r2r (mm6, mm5); // mm5 = b3 b2 - movq_r2r (mm0, mm7); // mm7 = a3 a2 + rounder - - paddd_r2r (mm5, mm0); // mm0 = a3+b3 a2+b2 + rounder - psubd_r2r (mm5, mm7); // mm7 = a3-b3 a2-b2 + rounder -} - -static inline void mmx_row_tail (int16_t * row, int store) -{ - psrad_i2r (ROW_SHIFT, mm0); // mm0 = y3 y2 - - psrad_i2r (ROW_SHIFT, mm7); // mm7 = y4 y5 - - packssdw_r2r (mm0, mm1); // mm1 = y3 y2 y1 y0 - - packssdw_r2r (mm3, mm7); // mm7 = y6 y7 y4 y5 - - movq_r2m (mm1, *(row+store)); // save y3 y2 y1 y0 - movq_r2r (mm7, mm4); // mm4 = y6 y7 y4 y5 - - pslld_i2r (16, mm7); // mm7 = y7 0 y5 0 - - psrld_i2r (16, mm4); // mm4 = 0 y6 0 y4 - - por_r2r (mm4, mm7); // mm7 = y7 y6 y5 y4 - - /* slot */ - - movq_r2m (mm7, *(row+store+4)); // save y7 y6 y5 y4 -} - -static inline void mmx_row_mid (int16_t * row, int store, - int offset, int16_t * table) -{ - movq_m2r (*(row+offset), mm2); // mm2 = x6 x4 x2 x0 - psrad_i2r (ROW_SHIFT, mm0); // mm0 = y3 y2 - - movq_m2r (*(row+offset+4), mm5); // mm5 = x7 x5 x3 x1 - psrad_i2r (ROW_SHIFT, mm7); // mm7 = y4 y5 - - packssdw_r2r (mm0, mm1); // mm1 = y3 y2 y1 y0 - movq_r2r (mm5, mm6); // mm6 = x7 x5 x3 x1 - - packssdw_r2r (mm3, mm7); // mm7 = y6 y7 y4 y5 - movq_r2r (mm2, mm0); // mm0 = x6 x4 x2 x0 - - movq_r2m (mm1, *(row+store)); // save y3 y2 y1 y0 - movq_r2r (mm7, mm1); // mm1 = y6 y7 y4 y5 - - punpckldq_r2r (mm0, mm0); // mm0 = x2 x0 x2 x0 - psrld_i2r (16, mm7); // mm7 = 0 y6 0 y4 - - movq_m2r (*table, mm3); // mm3 = C6 C4 C2 C4 - pslld_i2r (16, mm1); // mm1 = y7 0 y5 0 - - movq_m2r (*(table+4), mm4); // mm4 = -C2 -C4 C6 C4 - por_r2r (mm1, mm7); // mm7 = y7 y6 y5 y4 - - movq_m2r (*(table+8), mm1); // mm1 = -C7 C3 C3 C1 - punpckhdq_r2r (mm2, mm2); // mm2 = x6 x4 x6 x4 - - movq_r2m (mm7, *(row+store+4)); // save y7 y6 y5 y4 - pmaddwd_r2r (mm0, mm3); // mm3 = C4*x0+C6*x2 C4*x0+C2*x2 -} - - -#if 0 -// C column IDCT - its just here to document the MMXEXT and MMX versions -static inline void idct_col (int16_t * col, int offset) -{ -/* multiplication - as implemented on mmx */ -#define F(c,x) (((c) * (x)) >> 16) - -/* saturation - it helps us handle torture test cases */ -#define S(x) (((x)>32767) ? 32767 : ((x)<-32768) ? -32768 : (x)) - - int16_t x0, x1, x2, x3, x4, x5, x6, x7; - int16_t y0, y1, y2, y3, y4, y5, y6, y7; - int16_t a0, a1, a2, a3, b0, b1, b2, b3; - int16_t u04, v04, u26, v26, u17, v17, u35, v35, u12, v12; - - col += offset; - - x0 = col[0*8]; - x1 = col[1*8]; - x2 = col[2*8]; - x3 = col[3*8]; - x4 = col[4*8]; - x5 = col[5*8]; - x6 = col[6*8]; - x7 = col[7*8]; - - u04 = S (x0 + x4); - v04 = S (x0 - x4); - u26 = S (F (T2, x6) + x2); - v26 = S (F (T2, x2) - x6); - - a0 = S (u04 + u26); - a1 = S (v04 + v26); - a2 = S (v04 - v26); - a3 = S (u04 - u26); - - u17 = S (F (T1, x7) + x1); - v17 = S (F (T1, x1) - x7); - u35 = S (F (T3, x5) + x3); - v35 = S (F (T3, x3) - x5); - - b0 = S (u17 + u35); - b3 = S (v17 - v35); - u12 = S (u17 - u35); - v12 = S (v17 + v35); - u12 = S (2 * F (C4, u12)); - v12 = S (2 * F (C4, v12)); - b1 = S (u12 + v12); - b2 = S (u12 - v12); - - y0 = S (a0 + b0) >> COL_SHIFT; - y1 = S (a1 + b1) >> COL_SHIFT; - y2 = S (a2 + b2) >> COL_SHIFT; - y3 = S (a3 + b3) >> COL_SHIFT; - - y4 = S (a3 - b3) >> COL_SHIFT; - y5 = S (a2 - b2) >> COL_SHIFT; - y6 = S (a1 - b1) >> COL_SHIFT; - y7 = S (a0 - b0) >> COL_SHIFT; - - col[0*8] = y0; - col[1*8] = y1; - col[2*8] = y2; - col[3*8] = y3; - col[4*8] = y4; - col[5*8] = y5; - col[6*8] = y6; - col[7*8] = y7; -} -#endif - - -// MMX column IDCT -static inline void idct_col (int16_t * col, int offset) -{ -#define T1 13036 -#define T2 27146 -#define T3 43790 -#define C4 23170 - - static short _T1[] ATTR_ALIGN(8) = {T1,T1,T1,T1}; - static short _T2[] ATTR_ALIGN(8) = {T2,T2,T2,T2}; - static short _T3[] ATTR_ALIGN(8) = {T3,T3,T3,T3}; - static short _C4[] ATTR_ALIGN(8) = {C4,C4,C4,C4}; - - /* column code adapted from peter gubanov */ - /* http://www.elecard.com/peter/idct.shtml */ - - movq_m2r (*_T1, mm0); // mm0 = T1 - - movq_m2r (*(col+offset+1*8), mm1); // mm1 = x1 - movq_r2r (mm0, mm2); // mm2 = T1 - - movq_m2r (*(col+offset+7*8), mm4); // mm4 = x7 - pmulhw_r2r (mm1, mm0); // mm0 = T1*x1 - - movq_m2r (*_T3, mm5); // mm5 = T3 - pmulhw_r2r (mm4, mm2); // mm2 = T1*x7 - - movq_m2r (*(col+offset+5*8), mm6); // mm6 = x5 - movq_r2r (mm5, mm7); // mm7 = T3-1 - - movq_m2r (*(col+offset+3*8), mm3); // mm3 = x3 - psubsw_r2r (mm4, mm0); // mm0 = v17 - - movq_m2r (*_T2, mm4); // mm4 = T2 - pmulhw_r2r (mm3, mm5); // mm5 = (T3-1)*x3 - - paddsw_r2r (mm2, mm1); // mm1 = u17 - pmulhw_r2r (mm6, mm7); // mm7 = (T3-1)*x5 - - /* slot */ - - movq_r2r (mm4, mm2); // mm2 = T2 - paddsw_r2r (mm3, mm5); // mm5 = T3*x3 - - pmulhw_m2r (*(col+offset+2*8), mm4);// mm4 = T2*x2 - paddsw_r2r (mm6, mm7); // mm7 = T3*x5 - - psubsw_r2r (mm6, mm5); // mm5 = v35 - paddsw_r2r (mm3, mm7); // mm7 = u35 - - movq_m2r (*(col+offset+6*8), mm3); // mm3 = x6 - movq_r2r (mm0, mm6); // mm6 = v17 - - pmulhw_r2r (mm3, mm2); // mm2 = T2*x6 - psubsw_r2r (mm5, mm0); // mm0 = b3 - - psubsw_r2r (mm3, mm4); // mm4 = v26 - paddsw_r2r (mm6, mm5); // mm5 = v12 - - movq_r2m (mm0, *(col+offset+3*8)); // save b3 in scratch0 - movq_r2r (mm1, mm6); // mm6 = u17 - - paddsw_m2r (*(col+offset+2*8), mm2);// mm2 = u26 - paddsw_r2r (mm7, mm6); // mm6 = b0 - - psubsw_r2r (mm7, mm1); // mm1 = u12 - movq_r2r (mm1, mm7); // mm7 = u12 - - movq_m2r (*(col+offset+0*8), mm3); // mm3 = x0 - paddsw_r2r (mm5, mm1); // mm1 = u12+v12 - - movq_m2r (*_C4, mm0); // mm0 = C4/2 - psubsw_r2r (mm5, mm7); // mm7 = u12-v12 - - movq_r2m (mm6, *(col+offset+5*8)); // save b0 in scratch1 - pmulhw_r2r (mm0, mm1); // mm1 = b1/2 - - movq_r2r (mm4, mm6); // mm6 = v26 - pmulhw_r2r (mm0, mm7); // mm7 = b2/2 - - movq_m2r (*(col+offset+4*8), mm5); // mm5 = x4 - movq_r2r (mm3, mm0); // mm0 = x0 - - psubsw_r2r (mm5, mm3); // mm3 = v04 - paddsw_r2r (mm5, mm0); // mm0 = u04 - - paddsw_r2r (mm3, mm4); // mm4 = a1 - movq_r2r (mm0, mm5); // mm5 = u04 - - psubsw_r2r (mm6, mm3); // mm3 = a2 - paddsw_r2r (mm2, mm5); // mm5 = a0 - - paddsw_r2r (mm1, mm1); // mm1 = b1 - psubsw_r2r (mm2, mm0); // mm0 = a3 - - paddsw_r2r (mm7, mm7); // mm7 = b2 - movq_r2r (mm3, mm2); // mm2 = a2 - - movq_r2r (mm4, mm6); // mm6 = a1 - paddsw_r2r (mm7, mm3); // mm3 = a2+b2 - - psraw_i2r (COL_SHIFT, mm3); // mm3 = y2 - paddsw_r2r (mm1, mm4); // mm4 = a1+b1 - - psraw_i2r (COL_SHIFT, mm4); // mm4 = y1 - psubsw_r2r (mm1, mm6); // mm6 = a1-b1 - - movq_m2r (*(col+offset+5*8), mm1); // mm1 = b0 - psubsw_r2r (mm7, mm2); // mm2 = a2-b2 - - psraw_i2r (COL_SHIFT, mm6); // mm6 = y6 - movq_r2r (mm5, mm7); // mm7 = a0 - - movq_r2m (mm4, *(col+offset+1*8)); // save y1 - psraw_i2r (COL_SHIFT, mm2); // mm2 = y5 - - movq_r2m (mm3, *(col+offset+2*8)); // save y2 - paddsw_r2r (mm1, mm5); // mm5 = a0+b0 - - movq_m2r (*(col+offset+3*8), mm4); // mm4 = b3 - psubsw_r2r (mm1, mm7); // mm7 = a0-b0 - - psraw_i2r (COL_SHIFT, mm5); // mm5 = y0 - movq_r2r (mm0, mm3); // mm3 = a3 - - movq_r2m (mm2, *(col+offset+5*8)); // save y5 - psubsw_r2r (mm4, mm3); // mm3 = a3-b3 - - psraw_i2r (COL_SHIFT, mm7); // mm7 = y7 - paddsw_r2r (mm0, mm4); // mm4 = a3+b3 - - movq_r2m (mm5, *(col+offset+0*8)); // save y0 - psraw_i2r (COL_SHIFT, mm3); // mm3 = y4 - - movq_r2m (mm6, *(col+offset+6*8)); // save y6 - psraw_i2r (COL_SHIFT, mm4); // mm4 = y3 - - movq_r2m (mm7, *(col+offset+7*8)); // save y7 - - movq_r2m (mm3, *(col+offset+4*8)); // save y4 - - movq_r2m (mm4, *(col+offset+3*8)); // save y3 -} - - -static int32_t rounder0[] ATTR_ALIGN(8) = - rounder ((1 << (COL_SHIFT - 1)) - 0.5); -static int32_t rounder4[] ATTR_ALIGN(8) = rounder (0); -static int32_t rounder1[] ATTR_ALIGN(8) = - rounder (1.25683487303); /* C1*(C1/C4+C1+C7)/2 */ -static int32_t rounder7[] ATTR_ALIGN(8) = - rounder (-0.25); /* C1*(C7/C4+C7-C1)/2 */ -static int32_t rounder2[] ATTR_ALIGN(8) = - rounder (0.60355339059); /* C2 * (C6+C2)/2 */ -static int32_t rounder6[] ATTR_ALIGN(8) = - rounder (-0.25); /* C2 * (C6-C2)/2 */ -static int32_t rounder3[] ATTR_ALIGN(8) = - rounder (0.087788325588); /* C3*(-C3/C4+C3+C5)/2 */ -static int32_t rounder5[] ATTR_ALIGN(8) = - rounder (-0.441341716183); /* C3*(-C5/C4+C5-C3)/2 */ - - -#define declare_idct(idct,table,idct_row_head,idct_row,idct_row_tail,idct_row_mid) \ -static inline void idct (int16_t * block) \ -{ \ - static int16_t table04[] ATTR_ALIGN(16) = \ - table (22725, 21407, 19266, 16384, 12873, 8867, 4520); \ - static int16_t table17[] ATTR_ALIGN(16) = \ - table (31521, 29692, 26722, 22725, 17855, 12299, 6270); \ - static int16_t table26[] ATTR_ALIGN(16) = \ - table (29692, 27969, 25172, 21407, 16819, 11585, 5906); \ - static int16_t table35[] ATTR_ALIGN(16) = \ - table (26722, 25172, 22654, 19266, 15137, 10426, 5315); \ - \ - idct_row_head (block, 0*8, table04); \ - idct_row (table04, rounder0); \ - idct_row_mid (block, 0*8, 4*8, table04); \ - idct_row (table04, rounder4); \ - idct_row_mid (block, 4*8, 1*8, table17); \ - idct_row (table17, rounder1); \ - idct_row_mid (block, 1*8, 7*8, table17); \ - idct_row (table17, rounder7); \ - idct_row_mid (block, 7*8, 2*8, table26); \ - idct_row (table26, rounder2); \ - idct_row_mid (block, 2*8, 6*8, table26); \ - idct_row (table26, rounder6); \ - idct_row_mid (block, 6*8, 3*8, table35); \ - idct_row (table35, rounder3); \ - idct_row_mid (block, 3*8, 5*8, table35); \ - idct_row (table35, rounder5); \ - idct_row_tail (block, 5*8); \ - \ - idct_col (block, 0); \ - idct_col (block, 4); \ -} - - -#define COPY_MMX(offset,r0,r1,r2) \ -do { \ - movq_m2r (*(block+offset), r0); \ - dest += stride; \ - movq_m2r (*(block+offset+4), r1); \ - movq_r2m (r2, *dest); \ - packuswb_r2r (r1, r0); \ -} while (0) - -static void block_copy (int16_t * block, uint8_t * dest, int stride) -{ - movq_m2r (*(block+0*8), mm0); - movq_m2r (*(block+0*8+4), mm1); - movq_m2r (*(block+1*8), mm2); - packuswb_r2r (mm1, mm0); - movq_m2r (*(block+1*8+4), mm3); - movq_r2m (mm0, *dest); - packuswb_r2r (mm3, mm2); - COPY_MMX (2*8, mm0, mm1, mm2); - COPY_MMX (3*8, mm2, mm3, mm0); - COPY_MMX (4*8, mm0, mm1, mm2); - COPY_MMX (5*8, mm2, mm3, mm0); - COPY_MMX (6*8, mm0, mm1, mm2); - COPY_MMX (7*8, mm2, mm3, mm0); - movq_r2m (mm2, *(dest+stride)); -} - - -#define ADD_MMX(offset,r1,r2,r3,r4) \ -do { \ - movq_m2r (*(dest+2*stride), r1); \ - packuswb_r2r (r4, r3); \ - movq_r2r (r1, r2); \ - dest += stride; \ - movq_r2m (r3, *dest); \ - punpcklbw_r2r (mm0, r1); \ - paddsw_m2r (*(block+offset), r1); \ - punpckhbw_r2r (mm0, r2); \ - paddsw_m2r (*(block+offset+4), r2); \ -} while (0) - -static void block_add (int16_t * block, uint8_t * dest, int stride) -{ - movq_m2r (*dest, mm1); - pxor_r2r (mm0, mm0); - movq_m2r (*(dest+stride), mm3); - movq_r2r (mm1, mm2); - punpcklbw_r2r (mm0, mm1); - movq_r2r (mm3, mm4); - paddsw_m2r (*(block+0*8), mm1); - punpckhbw_r2r (mm0, mm2); - paddsw_m2r (*(block+0*8+4), mm2); - punpcklbw_r2r (mm0, mm3); - paddsw_m2r (*(block+1*8), mm3); - packuswb_r2r (mm2, mm1); - punpckhbw_r2r (mm0, mm4); - movq_r2m (mm1, *dest); - paddsw_m2r (*(block+1*8+4), mm4); - ADD_MMX (2*8, mm1, mm2, mm3, mm4); - ADD_MMX (3*8, mm3, mm4, mm1, mm2); - ADD_MMX (4*8, mm1, mm2, mm3, mm4); - ADD_MMX (5*8, mm3, mm4, mm1, mm2); - ADD_MMX (6*8, mm1, mm2, mm3, mm4); - ADD_MMX (7*8, mm3, mm4, mm1, mm2); - packuswb_r2r (mm4, mm3); - movq_r2m (mm3, *(dest+stride)); -} - -static inline void block_zero (int16_t * block) { - pxor_r2r (mm0, mm0); - movq_r2m (mm0, *(block+0*4)); - movq_r2m (mm0, *(block+1*4)); - movq_r2m (mm0, *(block+2*4)); - movq_r2m (mm0, *(block+3*4)); - movq_r2m (mm0, *(block+4*4)); - movq_r2m (mm0, *(block+5*4)); - movq_r2m (mm0, *(block+6*4)); - movq_r2m (mm0, *(block+7*4)); - movq_r2m (mm0, *(block+8*4)); - movq_r2m (mm0, *(block+9*4)); - movq_r2m (mm0, *(block+10*4)); - movq_r2m (mm0, *(block+11*4)); - movq_r2m (mm0, *(block+12*4)); - movq_r2m (mm0, *(block+13*4)); - movq_r2m (mm0, *(block+14*4)); - movq_r2m (mm0, *(block+15*4)); -} - -declare_idct (mmxext_idct, mmxext_table, - mmxext_row_head, mmxext_row, mmxext_row_tail, mmxext_row_mid) - -void mpeg2_idct_copy_mmxext (int16_t * block, uint8_t * dest, int stride) -{ - mmxext_idct (block); - block_copy (block, dest, stride); - block_zero (block); -} - -void mpeg2_idct_add_mmxext (int16_t * block, uint8_t * dest, int stride) -{ - mmxext_idct (block); - block_add (block, dest, stride); - block_zero (block); -} - -void mpeg2_idct_mmxext (int16_t * block) -{ - mmxext_idct (block); -} - -declare_idct (mmx_idct, mmx_table, - mmx_row_head, mmx_row, mmx_row_tail, mmx_row_mid) - -void mpeg2_idct_copy_mmx (int16_t * block, uint8_t * dest, int stride) -{ - mmx_idct (block); - block_copy (block, dest, stride); - block_zero (block); -} - -void mpeg2_idct_add_mmx (int16_t * block, uint8_t * dest, int stride) -{ - mmx_idct (block); - block_add (block, dest, stride); - block_zero (block); -} - -void mpeg2_idct_mmx (int16_t * block) -{ - mmx_idct (block); -} - -void mpeg2_zero_block_mmx (int16_t * block) -{ - block_zero (block); -} - -void mpeg2_idct_mmx_init (void) -{ - int i, j; - - /* the mmx/mmxext idct uses a reordered input, so we patch scan tables */ - - for (i = 0; i < 64; i++) { - j = mpeg2_scan_norm[i]; - mpeg2_scan_norm[i] = (j & 0x38) | ((j & 6) >> 1) | ((j & 1) << 2); - j = mpeg2_scan_alt[i]; - mpeg2_scan_alt[i] = (j & 0x38) | ((j & 6) >> 1) | ((j & 1) << 2); - } -} - -#endif diff --git a/src/libmpeg2/libmpeg2_accel.c b/src/libmpeg2/libmpeg2_accel.c deleted file mode 100644 index 92c0e280b..000000000 --- a/src/libmpeg2/libmpeg2_accel.c +++ /dev/null @@ -1,223 +0,0 @@ -/* - * libmpeg2_accel.c - * Copyright (C) 2004 The Unichrome Project. - * Copyright (C) 2005 Thomas Hellstrom. - * - * This file is part of xine, a free video player. - * - * xine is free software; you can redistribute it and/or modify it - * under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * xine is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA - */ - -#include -#include "mpeg2.h" -#include "mpeg2_internal.h" -#include "xvmc_vld.h" -#include "libmpeg2_accel.h" - - -void -libmpeg2_accel_scan( mpeg2dec_accel_t *accel, uint8_t *scan_norm, uint8_t *scan_alt) -{ - xvmc_setup_scan_ptable(); -} - - -int -libmpeg2_accel_discontinuity(mpeg2dec_accel_t *accel, uint32_t frame_format, picture_t *picture) -{ - accel->xvmc_last_slice_code=-1; - if ( !picture->current_frame ) - return 0; - if (frame_format == XINE_IMGFMT_XXMC) { - xine_xxmc_t *xxmc = (xine_xxmc_t *) - picture->current_frame->accel_data; - switch(xxmc->acceleration) { - case XINE_XVMC_ACCEL_VLD: - case XINE_XVMC_ACCEL_IDCT: - case XINE_XVMC_ACCEL_MOCOMP: - xxmc->proc_xxmc_flush( picture->current_frame ); - break; - default: - break; - } - } - return 0; -} - -int -libmpeg2_accel_new_sequence(mpeg2dec_accel_t *accel, uint32_t frame_format, picture_t *picture) -{ - switch(frame_format) { - case XINE_IMGFMT_XXMC: - case XINE_IMGFMT_XVMC: { - xine_xvmc_t *xvmc = (xine_xvmc_t *) - picture->current_frame->accel_data; - picture->mc = xvmc->macroblocks; - return 0; - } - default: - break; - } - return 1; -} - -int -libmpeg2_accel_new_frame(mpeg2dec_accel_t *accel, uint32_t frame_format, - picture_t *picture, double ratio, uint32_t flags) -{ - if (picture->current_frame) { - if (XINE_IMGFMT_XXMC == frame_format) { - xine_xxmc_t *xxmc = (xine_xxmc_t *) - picture->current_frame->accel_data; - - /* - * Make a request for acceleration type and mpeg coding from - * the output plugin. - */ - - xxmc->fallback_format = XINE_IMGFMT_YV12; - xxmc->acceleration = XINE_XVMC_ACCEL_VLD| XINE_XVMC_ACCEL_IDCT - | XINE_XVMC_ACCEL_MOCOMP ; - - /* - * Standard MOCOMP / IDCT XvMC implementation for interlaced streams - * is buggy. The bug is inherited from the old XvMC driver. Don't use it until - * it has been fixed. (A volunteer ?) - */ - - if ( picture->picture_structure != 3 ) { - picture->top_field_first = (picture->picture_structure == 1); - xxmc->acceleration &= ~( XINE_XVMC_ACCEL_IDCT | XINE_XVMC_ACCEL_MOCOMP ); - } - - xxmc->mpeg = (picture->mpeg1) ? XINE_XVMC_MPEG_1:XINE_XVMC_MPEG_2; - xxmc->proc_xxmc_update_frame (picture->current_frame->driver, - picture->current_frame, - picture->coded_picture_width, - picture->coded_picture_height, - ratio, - XINE_IMGFMT_XXMC, flags); - } - } - return 0; -} - -void -libmpeg2_accel_frame_completion(mpeg2dec_accel_t * accel, uint32_t frame_format, picture_t *picture, - int code) -{ - - if ( !picture->current_frame ) return; - - if (frame_format == XINE_IMGFMT_XXMC) { - xine_xxmc_t *xxmc = (xine_xxmc_t *) - picture->current_frame->accel_data; - if (!xxmc->decoded) { - switch(picture->current_frame->format) { - case XINE_IMGFMT_XXMC: - switch(xxmc->acceleration) { - case XINE_XVMC_ACCEL_VLD: - mpeg2_xxmc_vld_frame_complete(accel, picture, code); - break; - case XINE_XVMC_ACCEL_IDCT: - case XINE_XVMC_ACCEL_MOCOMP: - xxmc->decoded = !picture->current_frame->bad_frame; - xxmc->proc_xxmc_flush( picture->current_frame ); - break; - default: - break; - } - default: - break; - } - } - } -} - - -int -libmpeg2_accel_slice(mpeg2dec_accel_t *accel, picture_t *picture, int code, char * buffer, - uint32_t chunk_size, uint8_t *chunk_buffer) -{ - /* - * Don't reference frames of other formats. They are invalid. This may happen if the - * xxmc plugin suddenly falls back to software decoding. - */ - - if (( picture->current_frame->picture_coding_type == XINE_PICT_P_TYPE ) || - ( picture->current_frame->picture_coding_type == XINE_PICT_B_TYPE )) { - if (! picture->forward_reference_frame) return 1; - if (picture->forward_reference_frame->format != picture->current_frame->format) { - picture->v_offset = 0; - return 1; - } - } - - if ( picture->current_frame->picture_coding_type == XINE_PICT_B_TYPE ) { - if (! picture->backward_reference_frame) return 1; - if (picture->backward_reference_frame->format != picture->current_frame->format) { - picture->v_offset = 0; - return 1; - } - } - - switch( picture->current_frame->format ) { - - case XINE_IMGFMT_XXMC: - { - xine_xxmc_t *xxmc = (xine_xxmc_t *) - picture->current_frame->accel_data; - - if ( xxmc->proc_xxmc_lock_valid( picture->current_frame, - picture->forward_reference_frame, - picture->backward_reference_frame, - picture->current_frame->picture_coding_type)) { - picture->v_offset = 0; - return 1; - } - - switch(picture->current_frame->format) { - case XINE_IMGFMT_XXMC: - switch(xxmc->acceleration) { - case XINE_XVMC_ACCEL_VLD: - mpeg2_xxmc_slice(accel, picture, code, buffer, chunk_size, chunk_buffer); - break; - case XINE_XVMC_ACCEL_IDCT: - case XINE_XVMC_ACCEL_MOCOMP: - mpeg2_xvmc_slice (accel, picture, code, buffer); - break; - default: - mpeg2_slice (picture, code, buffer); - break; - } - break; - default: - mpeg2_slice (picture, code, buffer); - break; - } - xxmc->proc_xxmc_unlock(picture->current_frame->driver); - break; - } - - case XINE_IMGFMT_XVMC: - mpeg2_xvmc_slice (accel, picture, code, buffer); - break; - - default: - mpeg2_slice (picture, code, buffer); - break; - } - return 0; -} diff --git a/src/libmpeg2/libmpeg2_accel.h b/src/libmpeg2/libmpeg2_accel.h deleted file mode 100644 index 5d0b37a78..000000000 --- a/src/libmpeg2/libmpeg2_accel.h +++ /dev/null @@ -1,48 +0,0 @@ -/* - * libmpeg2_accel.h - * Copyright (C) 2004 The Unichrome Project. - * Copyright (C) 2005 Thomas Hellstrom. - * - * This file is part of xine, a free video player. - * - * xine is free software; you can redistribute it and/or modify it - * under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * xine is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA - */ - -#ifndef LIBMPEG2_ACCEL_H -#define LIBMPEG2_ACCEL_H - -#include "mpeg2_internal.h" - -/* - * Internal context data type. - */ - -typedef struct { - int xvmc_last_slice_code; - int slices_per_row; - int row_slice_count; - unsigned xxmc_mb_pic_height; -} mpeg2dec_accel_t; - -extern int libmpeg2_accel_discontinuity(mpeg2dec_accel_t *accel, uint32_t frame_format, picture_t *picture); -extern int libmpeg2_accel_new_sequence(mpeg2dec_accel_t *accel, uint32_t frame_format, picture_t *picture); -extern int libmpeg2_accel_new_frame(mpeg2dec_accel_t *accel, uint32_t frame_format, picture_t *picture, double ratio, uint32_t flags); -extern void libmpeg2_accel_frame_completion(mpeg2dec_accel_t *accel, uint32_t frame_format, picture_t *picture, int code); - -extern int libmpeg2_accel_slice(mpeg2dec_accel_t *accel, picture_t *picture, int code, - char * buffer, uint32_t chunk_size, uint8_t *chunk_buffer); -extern void libmpeg2_accel_scan( mpeg2dec_accel_t *accel, uint8_t *scan_norm, uint8_t *scan_alt); - -#endif diff --git a/src/libmpeg2/motion_comp.c b/src/libmpeg2/motion_comp.c deleted file mode 100644 index 9328dfb9f..000000000 --- a/src/libmpeg2/motion_comp.c +++ /dev/null @@ -1,154 +0,0 @@ -/* - * motion_comp.c - * Copyright (C) 2000-2002 Michel Lespinasse - * Copyright (C) 1999-2000 Aaron Holtzman - * - * This file is part of mpeg2dec, a free MPEG-2 video stream decoder. - * See http://libmpeg2.sourceforge.net/ for updates. - * - * mpeg2dec is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * mpeg2dec is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA - */ - -#include "config.h" - -#include -#include - -#include "mpeg2_internal.h" -#include - -mpeg2_mc_t mpeg2_mc; - -void mpeg2_mc_init (uint32_t mm_accel) -{ -#ifdef LIBMPEG2_MLIB - if (mm_accel & MM_ACCEL_MLIB) { -#ifdef LOG - fprintf (stderr, "Using mediaLib for motion compensation\n"); -#endif - mpeg2_mc = mpeg2_mc_mlib; - } -#endif - -#if defined(ARCH_X86) || defined(ARCH_X86_64) - if (mm_accel & MM_ACCEL_X86_MMXEXT) { -#ifdef LOG - fprintf (stderr, "Using MMXEXT for motion compensation\n"); -#endif - mpeg2_mc = mpeg2_mc_mmxext; - } else if (mm_accel & MM_ACCEL_X86_3DNOW) { -#ifdef LOG - fprintf (stderr, "Using 3DNOW for motion compensation\n"); -#endif - mpeg2_mc = mpeg2_mc_3dnow; - } else if (mm_accel & MM_ACCEL_X86_MMX) { -#ifdef LOG - fprintf (stderr, "Using MMX for motion compensation\n"); -#endif - mpeg2_mc = mpeg2_mc_mmx; - } else -#endif -#if defined (ARCH_PPC) && defined (ENABLE_ALTIVEC) - if (mm_accel & MM_ACCEL_PPC_ALTIVEC) { -#ifdef LOG - fprintf (stderr, "Using altivec for motion compensation\n"); -#endif - mpeg2_mc = mpeg2_mc_altivec; - } else -#endif -#ifdef ARCH_SPARC - if (mm_accel & MM_ACCEL_SPARC_VIS) { -#ifdef LOG - fprintf (stderr, "Using VIS for motion compensation\n"); -#endif - mpeg2_mc = mpeg2_mc_vis; - } else -#endif - { -#ifdef LOG - fprintf (stderr, "No accelerated motion compensation found\n"); -#endif - mpeg2_mc = mpeg2_mc_c; - } -} - -#define avg2(a,b) ((a+b+1)>>1) -#define avg4(a,b,c,d) ((a+b+c+d+2)>>2) - -#define predict_o(i) (ref[i]) -#define predict_x(i) (avg2 (ref[i], ref[i+1])) -#define predict_y(i) (avg2 (ref[i], (ref+stride)[i])) -#define predict_xy(i) (avg4 (ref[i], ref[i+1], \ - (ref+stride)[i], (ref+stride)[i+1])) - -#define put(predictor,i) dest[i] = predictor (i) -#define avg(predictor,i) dest[i] = avg2 (predictor (i), dest[i]) - -/* mc function template */ - -#define MC_FUNC(op,xy) \ -static void MC_##op##_##xy##_16_c (uint8_t * dest, uint8_t * ref, \ - int stride, int height) \ -{ \ - do { \ - op (predict_##xy, 0); \ - op (predict_##xy, 1); \ - op (predict_##xy, 2); \ - op (predict_##xy, 3); \ - op (predict_##xy, 4); \ - op (predict_##xy, 5); \ - op (predict_##xy, 6); \ - op (predict_##xy, 7); \ - op (predict_##xy, 8); \ - op (predict_##xy, 9); \ - op (predict_##xy, 10); \ - op (predict_##xy, 11); \ - op (predict_##xy, 12); \ - op (predict_##xy, 13); \ - op (predict_##xy, 14); \ - op (predict_##xy, 15); \ - ref += stride; \ - dest += stride; \ - } while (--height); \ -} \ -static void MC_##op##_##xy##_8_c (uint8_t * dest, uint8_t * ref, \ - int stride, int height) \ -{ \ - do { \ - op (predict_##xy, 0); \ - op (predict_##xy, 1); \ - op (predict_##xy, 2); \ - op (predict_##xy, 3); \ - op (predict_##xy, 4); \ - op (predict_##xy, 5); \ - op (predict_##xy, 6); \ - op (predict_##xy, 7); \ - ref += stride; \ - dest += stride; \ - } while (--height); \ -} - -/* definitions of the actual mc functions */ - -MC_FUNC (put,o) -MC_FUNC (avg,o) -MC_FUNC (put,x) -MC_FUNC (avg,x) -MC_FUNC (put,y) -MC_FUNC (avg,y) -MC_FUNC (put,xy) -MC_FUNC (avg,xy) - -MPEG2_MC_EXTERN (c) diff --git a/src/libmpeg2/motion_comp_altivec.c b/src/libmpeg2/motion_comp_altivec.c deleted file mode 100644 index 99719b7fb..000000000 --- a/src/libmpeg2/motion_comp_altivec.c +++ /dev/null @@ -1,2031 +0,0 @@ -/* - * motion_comp_altivec.c - * Copyright (C) 2000-2002 Michel Lespinasse - * Copyright (C) 1999-2000 Aaron Holtzman - * - * This file is part of mpeg2dec, a free MPEG-2 video stream decoder. - * See http://libmpeg2.sourceforge.net/ for updates. - * - * mpeg2dec is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * mpeg2dec is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA - */ - -#include "config.h" - -#ifndef HOST_OS_DARWIN - -#if defined (ARCH_PPC) && defined (ENABLE_ALTIVEC) - -#include "mpeg2_internal.h" - -#include - -/* - * The asm code is generated with: - * - * gcc-2.95 -fvec -DHOST_OS_DARWIN -O9 -fomit-frame-pointer -mregnames -S - * motion_comp_altivec.c - * - * sed 's/.L/._L/g' motion_comp_altivec.s | - * awk '{args=""; len=split ($2, arg, ","); - * for (i=1; i<=len; i++) { a=arg[i]; if (i> 1) - 1; - - ref0 = vec_ld (0, ref); - ref1 = vec_ld (15, ref); - ref += stride; - tmp = vec_perm (ref0, ref1, perm); - - do { - ref0 = vec_ld (0, ref); - ref1 = vec_ld (15, ref); - ref += stride; - vec_st (tmp, 0, dest); - tmp = vec_perm (ref0, ref1, perm); - - ref0 = vec_ld (0, ref); - ref1 = vec_ld (15, ref); - ref += stride; - vec_st (tmp, stride, dest); - dest += 2*stride; - tmp = vec_perm (ref0, ref1, perm); - } while (--height); - - ref0 = vec_ld (0, ref); - ref1 = vec_ld (15, ref); - vec_st (tmp, 0, dest); - tmp = vec_perm (ref0, ref1, perm); - vec_st (tmp, stride, dest); -} - -void MC_put_o_8_altivec (unsigned char * dest, unsigned char * ref, - int stride, int height) -{ - vector_u8_t perm0, perm1, tmp0, tmp1, ref0, ref1; - - tmp0 = vec_lvsl (0, ref); - tmp0 = vec_mergeh (tmp0, tmp0); - perm0 = vec_pack ((vector_u16_t)tmp0, (vector_u16_t)tmp0); - tmp1 = vec_lvsl (stride, ref); - tmp1 = vec_mergeh (tmp1, tmp1); - perm1 = vec_pack ((vector_u16_t)tmp1, (vector_u16_t)tmp1); - - height = (height >> 1) - 1; - - ref0 = vec_ld (0, ref); - ref1 = vec_ld (7, ref); - ref += stride; - tmp0 = vec_perm (ref0, ref1, perm0); - - do { - ref0 = vec_ld (0, ref); - ref1 = vec_ld (7, ref); - ref += stride; - vec_ste ((vector_u32_t)tmp0, 0, (unsigned int *)dest); - vec_ste ((vector_u32_t)tmp0, 4, (unsigned int *)dest); - dest += stride; - tmp1 = vec_perm (ref0, ref1, perm1); - - ref0 = vec_ld (0, ref); - ref1 = vec_ld (7, ref); - ref += stride; - vec_ste ((vector_u32_t)tmp1, 0, (unsigned int *)dest); - vec_ste ((vector_u32_t)tmp1, 4, (unsigned int *)dest); - dest += stride; - tmp0 = vec_perm (ref0, ref1, perm0); - } while (--height); - - ref0 = vec_ld (0, ref); - ref1 = vec_ld (7, ref); - vec_ste ((vector_u32_t)tmp0, 0, (unsigned int *)dest); - vec_ste ((vector_u32_t)tmp0, 4, (unsigned int *)dest); - dest += stride; - tmp1 = vec_perm (ref0, ref1, perm1); - vec_ste ((vector_u32_t)tmp1, 0, (unsigned int *)dest); - vec_ste ((vector_u32_t)tmp1, 4, (unsigned int *)dest); -} - -void MC_put_x_16_altivec (unsigned char * dest, unsigned char * ref, - int stride, int height) -{ - vector_u8_t permA, permB, ref0, ref1, tmp; - - permA = vec_lvsl (0, ref); - permB = vec_add (permA, vec_splat_u8 (1)); - - height = (height >> 1) - 1; - - ref0 = vec_ld (0, ref); - ref1 = vec_ld (16, ref); - ref += stride; - tmp = vec_avg (vec_perm (ref0, ref1, permA), - vec_perm (ref0, ref1, permB)); - - do { - ref0 = vec_ld (0, ref); - ref1 = vec_ld (16, ref); - ref += stride; - vec_st (tmp, 0, dest); - tmp = vec_avg (vec_perm (ref0, ref1, permA), - vec_perm (ref0, ref1, permB)); - - ref0 = vec_ld (0, ref); - ref1 = vec_ld (16, ref); - ref += stride; - vec_st (tmp, stride, dest); - dest += 2*stride; - tmp = vec_avg (vec_perm (ref0, ref1, permA), - vec_perm (ref0, ref1, permB)); - } while (--height); - - ref0 = vec_ld (0, ref); - ref1 = vec_ld (16, ref); - vec_st (tmp, 0, dest); - tmp = vec_avg (vec_perm (ref0, ref1, permA), - vec_perm (ref0, ref1, permB)); - vec_st (tmp, stride, dest); -} - -void MC_put_x_8_altivec (unsigned char * dest, unsigned char * ref, - int stride, int height) -{ - vector_u8_t perm0A, perm0B, perm1A, perm1B, ones, tmp0, tmp1, ref0, ref1; - - ones = vec_splat_u8 (1); - tmp0 = vec_lvsl (0, ref); - tmp0 = vec_mergeh (tmp0, tmp0); - perm0A = vec_pack ((vector_u16_t)tmp0, (vector_u16_t)tmp0); - perm0B = vec_add (perm0A, ones); - tmp1 = vec_lvsl (stride, ref); - tmp1 = vec_mergeh (tmp1, tmp1); - perm1A = vec_pack ((vector_u16_t)tmp1, (vector_u16_t)tmp1); - perm1B = vec_add (perm1A, ones); - - height = (height >> 1) - 1; - - ref0 = vec_ld (0, ref); - ref1 = vec_ld (8, ref); - ref += stride; - tmp0 = vec_avg (vec_perm (ref0, ref1, perm0A), - vec_perm (ref0, ref1, perm0B)); - - do { - ref0 = vec_ld (0, ref); - ref1 = vec_ld (8, ref); - ref += stride; - vec_ste ((vector_u32_t)tmp0, 0, (unsigned int *)dest); - vec_ste ((vector_u32_t)tmp0, 4, (unsigned int *)dest); - dest += stride; - tmp1 = vec_avg (vec_perm (ref0, ref1, perm1A), - vec_perm (ref0, ref1, perm1B)); - - ref0 = vec_ld (0, ref); - ref1 = vec_ld (8, ref); - ref += stride; - vec_ste ((vector_u32_t)tmp1, 0, (unsigned int *)dest); - vec_ste ((vector_u32_t)tmp1, 4, (unsigned int *)dest); - dest += stride; - tmp0 = vec_avg (vec_perm (ref0, ref1, perm0A), - vec_perm (ref0, ref1, perm0B)); - } while (--height); - - ref0 = vec_ld (0, ref); - ref1 = vec_ld (8, ref); - vec_ste ((vector_u32_t)tmp0, 0, (unsigned int *)dest); - vec_ste ((vector_u32_t)tmp0, 4, (unsigned int *)dest); - dest += stride; - tmp1 = vec_avg (vec_perm (ref0, ref1, perm1A), - vec_perm (ref0, ref1, perm1B)); - vec_ste ((vector_u32_t)tmp1, 0, (unsigned int *)dest); - vec_ste ((vector_u32_t)tmp1, 4, (unsigned int *)dest); -} - -void MC_put_y_16_altivec (unsigned char * dest, unsigned char * ref, - int stride, int height) -{ - vector_u8_t perm, ref0, ref1, tmp0, tmp1, tmp; - - perm = vec_lvsl (0, ref); - - height = (height >> 1) - 1; - - ref0 = vec_ld (0, ref); - ref1 = vec_ld (15, ref); - ref += stride; - tmp0 = vec_perm (ref0, ref1, perm); - ref0 = vec_ld (0, ref); - ref1 = vec_ld (15, ref); - ref += stride; - tmp1 = vec_perm (ref0, ref1, perm); - tmp = vec_avg (tmp0, tmp1); - - do { - ref0 = vec_ld (0, ref); - ref1 = vec_ld (15, ref); - ref += stride; - vec_st (tmp, 0, dest); - tmp0 = vec_perm (ref0, ref1, perm); - tmp = vec_avg (tmp0, tmp1); - - ref0 = vec_ld (0, ref); - ref1 = vec_ld (15, ref); - ref += stride; - vec_st (tmp, stride, dest); - dest += 2*stride; - tmp1 = vec_perm (ref0, ref1, perm); - tmp = vec_avg (tmp0, tmp1); - } while (--height); - - ref0 = vec_ld (0, ref); - ref1 = vec_ld (15, ref); - vec_st (tmp, 0, dest); - tmp0 = vec_perm (ref0, ref1, perm); - tmp = vec_avg (tmp0, tmp1); - vec_st (tmp, stride, dest); -} - -void MC_put_y_8_altivec (unsigned char * dest, unsigned char * ref, - int stride, int height) -{ - vector_u8_t perm0, perm1, tmp0, tmp1, tmp, ref0, ref1; - - tmp0 = vec_lvsl (0, ref); - tmp0 = vec_mergeh (tmp0, tmp0); - perm0 = vec_pack ((vector_u16_t)tmp0, (vector_u16_t)tmp0); - tmp1 = vec_lvsl (stride, ref); - tmp1 = vec_mergeh (tmp1, tmp1); - perm1 = vec_pack ((vector_u16_t)tmp1, (vector_u16_t)tmp1); - - height = (height >> 1) - 1; - - ref0 = vec_ld (0, ref); - ref1 = vec_ld (7, ref); - ref += stride; - tmp0 = vec_perm (ref0, ref1, perm0); - ref0 = vec_ld (0, ref); - ref1 = vec_ld (7, ref); - ref += stride; - tmp1 = vec_perm (ref0, ref1, perm1); - tmp = vec_avg (tmp0, tmp1); - - do { - ref0 = vec_ld (0, ref); - ref1 = vec_ld (7, ref); - ref += stride; - vec_ste ((vector_u32_t)tmp, 0, (unsigned int *)dest); - vec_ste ((vector_u32_t)tmp, 4, (unsigned int *)dest); - dest += stride; - tmp0 = vec_perm (ref0, ref1, perm0); - tmp = vec_avg (tmp0, tmp1); - - ref0 = vec_ld (0, ref); - ref1 = vec_ld (7, ref); - ref += stride; - vec_ste ((vector_u32_t)tmp, 0, (unsigned int *)dest); - vec_ste ((vector_u32_t)tmp, 4, (unsigned int *)dest); - dest += stride; - tmp1 = vec_perm (ref0, ref1, perm1); - tmp = vec_avg (tmp0, tmp1); - } while (--height); - - ref0 = vec_ld (0, ref); - ref1 = vec_ld (7, ref); - vec_ste ((vector_u32_t)tmp, 0, (unsigned int *)dest); - vec_ste ((vector_u32_t)tmp, 4, (unsigned int *)dest); - dest += stride; - tmp0 = vec_perm (ref0, ref1, perm0); - tmp = vec_avg (tmp0, tmp1); - vec_ste ((vector_u32_t)tmp, 0, (unsigned int *)dest); - vec_ste ((vector_u32_t)tmp, 4, (unsigned int *)dest); -} - -void MC_put_xy_16_altivec (unsigned char * dest, unsigned char * ref, - int stride, int height) -{ - vector_u8_t permA, permB, ref0, ref1, A, B, avg0, avg1, xor0, xor1, tmp; - vector_u8_t ones; - - ones = vec_splat_u8 (1); - permA = vec_lvsl (0, ref); - permB = vec_add (permA, ones); - - height = (height >> 1) - 1; - - ref0 = vec_ld (0, ref); - ref1 = vec_ld (16, ref); - ref += stride; - A = vec_perm (ref0, ref1, permA); - B = vec_perm (ref0, ref1, permB); - avg0 = vec_avg (A, B); - xor0 = vec_xor (A, B); - - ref0 = vec_ld (0, ref); - ref1 = vec_ld (16, ref); - ref += stride; - A = vec_perm (ref0, ref1, permA); - B = vec_perm (ref0, ref1, permB); - avg1 = vec_avg (A, B); - xor1 = vec_xor (A, B); - tmp = vec_sub (vec_avg (avg0, avg1), - vec_and (vec_and (ones, vec_or (xor0, xor1)), - vec_xor (avg0, avg1))); - - - do { - ref0 = vec_ld (0, ref); - ref1 = vec_ld (16, ref); - ref += stride; - vec_st (tmp, 0, dest); - A = vec_perm (ref0, ref1, permA); - B = vec_perm (ref0, ref1, permB); - avg0 = vec_avg (A, B); - xor0 = vec_xor (A, B); - tmp = vec_sub (vec_avg (avg0, avg1), - vec_and (vec_and (ones, vec_or (xor0, xor1)), - vec_xor (avg0, avg1))); - - ref0 = vec_ld (0, ref); - ref1 = vec_ld (16, ref); - ref += stride; - vec_st (tmp, stride, dest); - dest += 2*stride; - A = vec_perm (ref0, ref1, permA); - B = vec_perm (ref0, ref1, permB); - avg1 = vec_avg (A, B); - xor1 = vec_xor (A, B); - tmp = vec_sub (vec_avg (avg0, avg1), - vec_and (vec_and (ones, vec_or (xor0, xor1)), - vec_xor (avg0, avg1))); - } while (--height); - - ref0 = vec_ld (0, ref); - ref1 = vec_ld (16, ref); - vec_st (tmp, 0, dest); - A = vec_perm (ref0, ref1, permA); - B = vec_perm (ref0, ref1, permB); - avg0 = vec_avg (A, B); - xor0 = vec_xor (A, B); - tmp = vec_sub (vec_avg (avg0, avg1), - vec_and (vec_and (ones, vec_or (xor0, xor1)), - vec_xor (avg0, avg1))); - vec_st (tmp, stride, dest); -} - -void MC_put_xy_8_altivec (unsigned char * dest, unsigned char * ref, - int stride, int height) -{ - vector_u8_t perm0A, perm0B, perm1A, perm1B, ref0, ref1, A, B; - vector_u8_t avg0, avg1, xor0, xor1, tmp, ones; - - ones = vec_splat_u8 (1); - perm0A = vec_lvsl (0, ref); - perm0A = vec_mergeh (perm0A, perm0A); - perm0A = vec_pack ((vector_u16_t)perm0A, (vector_u16_t)perm0A); - perm0B = vec_add (perm0A, ones); - perm1A = vec_lvsl (stride, ref); - perm1A = vec_mergeh (perm1A, perm1A); - perm1A = vec_pack ((vector_u16_t)perm1A, (vector_u16_t)perm1A); - perm1B = vec_add (perm1A, ones); - - height = (height >> 1) - 1; - - ref0 = vec_ld (0, ref); - ref1 = vec_ld (16, ref); - ref += stride; - A = vec_perm (ref0, ref1, perm0A); - B = vec_perm (ref0, ref1, perm0B); - avg0 = vec_avg (A, B); - xor0 = vec_xor (A, B); - - ref0 = vec_ld (0, ref); - ref1 = vec_ld (16, ref); - ref += stride; - A = vec_perm (ref0, ref1, perm1A); - B = vec_perm (ref0, ref1, perm1B); - avg1 = vec_avg (A, B); - xor1 = vec_xor (A, B); - tmp = vec_sub (vec_avg (avg0, avg1), - vec_and (vec_and (ones, vec_or (xor0, xor1)), - vec_xor (avg0, avg1))); - - - do { - ref0 = vec_ld (0, ref); - ref1 = vec_ld (16, ref); - ref += stride; - vec_ste ((vector_u32_t)tmp, 0, (unsigned int *)dest); - vec_ste ((vector_u32_t)tmp, 4, (unsigned int *)dest); - dest += stride; - A = vec_perm (ref0, ref1, perm0A); - B = vec_perm (ref0, ref1, perm0B); - avg0 = vec_avg (A, B); - xor0 = vec_xor (A, B); - tmp = vec_sub (vec_avg (avg0, avg1), - vec_and (vec_and (ones, vec_or (xor0, xor1)), - vec_xor (avg0, avg1))); - - ref0 = vec_ld (0, ref); - ref1 = vec_ld (16, ref); - ref += stride; - vec_ste ((vector_u32_t)tmp, 0, (unsigned int *)dest); - vec_ste ((vector_u32_t)tmp, 4, (unsigned int *)dest); - dest += stride; - A = vec_perm (ref0, ref1, perm1A); - B = vec_perm (ref0, ref1, perm1B); - avg1 = vec_avg (A, B); - xor1 = vec_xor (A, B); - tmp = vec_sub (vec_avg (avg0, avg1), - vec_and (vec_and (ones, vec_or (xor0, xor1)), - vec_xor (avg0, avg1))); - } while (--height); - - ref0 = vec_ld (0, ref); - ref1 = vec_ld (16, ref); - vec_ste ((vector_u32_t)tmp, 0, (unsigned int *)dest); - vec_ste ((vector_u32_t)tmp, 4, (unsigned int *)dest); - dest += stride; - A = vec_perm (ref0, ref1, perm0A); - B = vec_perm (ref0, ref1, perm0B); - avg0 = vec_avg (A, B); - xor0 = vec_xor (A, B); - tmp = vec_sub (vec_avg (avg0, avg1), - vec_and (vec_and (ones, vec_or (xor0, xor1)), - vec_xor (avg0, avg1))); - vec_ste ((vector_u32_t)tmp, 0, (unsigned int *)dest); - vec_ste ((vector_u32_t)tmp, 4, (unsigned int *)dest); -} - -#if 0 -void MC_put_xy_8_altivec (unsigned char * dest, unsigned char * ref, - int stride, int height) -{ - vector_u8_t permA, permB, ref0, ref1, A, B, C, D, tmp, zero, ones; - vector_u16_t splat2, temp; - - ones = vec_splat_u8 (1); - permA = vec_lvsl (0, ref); - permB = vec_add (permA, ones); - - zero = vec_splat_u8 (0); - splat2 = vec_splat_u16 (2); - - do { - ref0 = vec_ld (0, ref); - ref1 = vec_ld (16, ref); - ref += stride; - A = vec_perm (ref0, ref1, permA); - B = vec_perm (ref0, ref1, permB); - ref0 = vec_ld (0, ref); - ref1 = vec_ld (16, ref); - C = vec_perm (ref0, ref1, permA); - D = vec_perm (ref0, ref1, permB); - - temp = vec_add (vec_add ((vector_u16_t)vec_mergeh (zero, A), - (vector_u16_t)vec_mergeh (zero, B)), - vec_add ((vector_u16_t)vec_mergeh (zero, C), - (vector_u16_t)vec_mergeh (zero, D))); - temp = vec_sr (vec_add (temp, splat2), splat2); - tmp = vec_pack (temp, temp); - - vec_st (tmp, 0, dest); - dest += stride; - tmp = vec_avg (vec_perm (ref0, ref1, permA), - vec_perm (ref0, ref1, permB)); - } while (--height); -} -#endif - -void MC_avg_o_16_altivec (unsigned char * dest, unsigned char * ref, - int stride, int height) -{ - vector_u8_t perm, ref0, ref1, tmp, prev; - - perm = vec_lvsl (0, ref); - - height = (height >> 1) - 1; - - ref0 = vec_ld (0, ref); - ref1 = vec_ld (15, ref); - ref += stride; - prev = vec_ld (0, dest); - tmp = vec_avg (prev, vec_perm (ref0, ref1, perm)); - - do { - ref0 = vec_ld (0, ref); - ref1 = vec_ld (15, ref); - ref += stride; - prev = vec_ld (stride, dest); - vec_st (tmp, 0, dest); - tmp = vec_avg (prev, vec_perm (ref0, ref1, perm)); - - ref0 = vec_ld (0, ref); - ref1 = vec_ld (15, ref); - ref += stride; - prev = vec_ld (2*stride, dest); - vec_st (tmp, stride, dest); - dest += 2*stride; - tmp = vec_avg (prev, vec_perm (ref0, ref1, perm)); - } while (--height); - - ref0 = vec_ld (0, ref); - ref1 = vec_ld (15, ref); - prev = vec_ld (stride, dest); - vec_st (tmp, 0, dest); - tmp = vec_avg (prev, vec_perm (ref0, ref1, perm)); - vec_st (tmp, stride, dest); -} - -void MC_avg_o_8_altivec (unsigned char * dest, unsigned char * ref, - int stride, int height) -{ - vector_u8_t perm0, perm1, tmp0, tmp1, ref0, ref1, prev; - - tmp0 = vec_lvsl (0, ref); - tmp0 = vec_mergeh (tmp0, tmp0); - perm0 = vec_pack ((vector_u16_t)tmp0, (vector_u16_t)tmp0); - tmp1 = vec_lvsl (stride, ref); - tmp1 = vec_mergeh (tmp1, tmp1); - perm1 = vec_pack ((vector_u16_t)tmp1, (vector_u16_t)tmp1); - - height = (height >> 1) - 1; - - ref0 = vec_ld (0, ref); - ref1 = vec_ld (7, ref); - ref += stride; - prev = vec_ld (0, dest); - tmp0 = vec_avg (prev, vec_perm (ref0, ref1, perm0)); - - do { - ref0 = vec_ld (0, ref); - ref1 = vec_ld (7, ref); - ref += stride; - prev = vec_ld (stride, dest); - vec_ste ((vector_u32_t)tmp0, 0, (unsigned int *)dest); - vec_ste ((vector_u32_t)tmp0, 4, (unsigned int *)dest); - dest += stride; - tmp1 = vec_avg (prev, vec_perm (ref0, ref1, perm1)); - - ref0 = vec_ld (0, ref); - ref1 = vec_ld (7, ref); - ref += stride; - prev = vec_ld (stride, dest); - vec_ste ((vector_u32_t)tmp1, 0, (unsigned int *)dest); - vec_ste ((vector_u32_t)tmp1, 4, (unsigned int *)dest); - dest += stride; - tmp0 = vec_avg (prev, vec_perm (ref0, ref1, perm0)); - } while (--height); - - ref0 = vec_ld (0, ref); - ref1 = vec_ld (7, ref); - prev = vec_ld (stride, dest); - vec_ste ((vector_u32_t)tmp0, 0, (unsigned int *)dest); - vec_ste ((vector_u32_t)tmp0, 4, (unsigned int *)dest); - dest += stride; - tmp1 = vec_avg (prev, vec_perm (ref0, ref1, perm1)); - vec_ste ((vector_u32_t)tmp1, 0, (unsigned int *)dest); - vec_ste ((vector_u32_t)tmp1, 4, (unsigned int *)dest); -} - -void MC_avg_x_16_altivec (unsigned char * dest, unsigned char * ref, - int stride, int height) -{ - vector_u8_t permA, permB, ref0, ref1, tmp, prev; - - permA = vec_lvsl (0, ref); - permB = vec_add (permA, vec_splat_u8 (1)); - - height = (height >> 1) - 1; - - ref0 = vec_ld (0, ref); - ref1 = vec_ld (16, ref); - prev = vec_ld (0, dest); - ref += stride; - tmp = vec_avg (prev, vec_avg (vec_perm (ref0, ref1, permA), - vec_perm (ref0, ref1, permB))); - - do { - ref0 = vec_ld (0, ref); - ref1 = vec_ld (16, ref); - ref += stride; - prev = vec_ld (stride, dest); - vec_st (tmp, 0, dest); - tmp = vec_avg (prev, vec_avg (vec_perm (ref0, ref1, permA), - vec_perm (ref0, ref1, permB))); - - ref0 = vec_ld (0, ref); - ref1 = vec_ld (16, ref); - ref += stride; - prev = vec_ld (2*stride, dest); - vec_st (tmp, stride, dest); - dest += 2*stride; - tmp = vec_avg (prev, vec_avg (vec_perm (ref0, ref1, permA), - vec_perm (ref0, ref1, permB))); - } while (--height); - - ref0 = vec_ld (0, ref); - ref1 = vec_ld (16, ref); - prev = vec_ld (stride, dest); - vec_st (tmp, 0, dest); - tmp = vec_avg (prev, vec_avg (vec_perm (ref0, ref1, permA), - vec_perm (ref0, ref1, permB))); - vec_st (tmp, stride, dest); -} - -void MC_avg_x_8_altivec (unsigned char * dest, unsigned char * ref, - int stride, int height) -{ - vector_u8_t perm0A, perm0B, perm1A, perm1B, ones, tmp0, tmp1, ref0, ref1; - vector_u8_t prev; - - ones = vec_splat_u8 (1); - tmp0 = vec_lvsl (0, ref); - tmp0 = vec_mergeh (tmp0, tmp0); - perm0A = vec_pack ((vector_u16_t)tmp0, (vector_u16_t)tmp0); - perm0B = vec_add (perm0A, ones); - tmp1 = vec_lvsl (stride, ref); - tmp1 = vec_mergeh (tmp1, tmp1); - perm1A = vec_pack ((vector_u16_t)tmp1, (vector_u16_t)tmp1); - perm1B = vec_add (perm1A, ones); - - height = (height >> 1) - 1; - - ref0 = vec_ld (0, ref); - ref1 = vec_ld (8, ref); - prev = vec_ld (0, dest); - ref += stride; - tmp0 = vec_avg (prev, vec_avg (vec_perm (ref0, ref1, perm0A), - vec_perm (ref0, ref1, perm0B))); - - do { - ref0 = vec_ld (0, ref); - ref1 = vec_ld (8, ref); - ref += stride; - prev = vec_ld (stride, dest); - vec_ste ((vector_u32_t)tmp0, 0, (unsigned int *)dest); - vec_ste ((vector_u32_t)tmp0, 4, (unsigned int *)dest); - dest += stride; - tmp1 = vec_avg (prev, vec_avg (vec_perm (ref0, ref1, perm1A), - vec_perm (ref0, ref1, perm1B))); - - ref0 = vec_ld (0, ref); - ref1 = vec_ld (8, ref); - ref += stride; - prev = vec_ld (stride, dest); - vec_ste ((vector_u32_t)tmp1, 0, (unsigned int *)dest); - vec_ste ((vector_u32_t)tmp1, 4, (unsigned int *)dest); - dest += stride; - tmp0 = vec_avg (prev, vec_avg (vec_perm (ref0, ref1, perm0A), - vec_perm (ref0, ref1, perm0B))); - } while (--height); - - ref0 = vec_ld (0, ref); - ref1 = vec_ld (8, ref); - prev = vec_ld (stride, dest); - vec_ste ((vector_u32_t)tmp0, 0, (unsigned int *)dest); - vec_ste ((vector_u32_t)tmp0, 4, (unsigned int *)dest); - dest += stride; - tmp1 = vec_avg (prev, vec_avg (vec_perm (ref0, ref1, perm1A), - vec_perm (ref0, ref1, perm1B))); - vec_ste ((vector_u32_t)tmp1, 0, (unsigned int *)dest); - vec_ste ((vector_u32_t)tmp1, 4, (unsigned int *)dest); -} - -void MC_avg_y_16_altivec (unsigned char * dest, unsigned char * ref, - int stride, int height) -{ - vector_u8_t perm, ref0, ref1, tmp0, tmp1, tmp, prev; - - perm = vec_lvsl (0, ref); - - height = (height >> 1) - 1; - - ref0 = vec_ld (0, ref); - ref1 = vec_ld (15, ref); - ref += stride; - tmp0 = vec_perm (ref0, ref1, perm); - ref0 = vec_ld (0, ref); - ref1 = vec_ld (15, ref); - ref += stride; - prev = vec_ld (0, dest); - tmp1 = vec_perm (ref0, ref1, perm); - tmp = vec_avg (prev, vec_avg (tmp0, tmp1)); - - do { - ref0 = vec_ld (0, ref); - ref1 = vec_ld (15, ref); - ref += stride; - prev = vec_ld (stride, dest); - vec_st (tmp, 0, dest); - tmp0 = vec_perm (ref0, ref1, perm); - tmp = vec_avg (prev, vec_avg (tmp0, tmp1)); - - ref0 = vec_ld (0, ref); - ref1 = vec_ld (15, ref); - ref += stride; - prev = vec_ld (2*stride, dest); - vec_st (tmp, stride, dest); - dest += 2*stride; - tmp1 = vec_perm (ref0, ref1, perm); - tmp = vec_avg (prev, vec_avg (tmp0, tmp1)); - } while (--height); - - ref0 = vec_ld (0, ref); - ref1 = vec_ld (15, ref); - prev = vec_ld (stride, dest); - vec_st (tmp, 0, dest); - tmp0 = vec_perm (ref0, ref1, perm); - tmp = vec_avg (prev, vec_avg (tmp0, tmp1)); - vec_st (tmp, stride, dest); -} - -void MC_avg_y_8_altivec (unsigned char * dest, unsigned char * ref, - int stride, int height) -{ - vector_u8_t perm0, perm1, tmp0, tmp1, tmp, ref0, ref1, prev; - - tmp0 = vec_lvsl (0, ref); - tmp0 = vec_mergeh (tmp0, tmp0); - perm0 = vec_pack ((vector_u16_t)tmp0, (vector_u16_t)tmp0); - tmp1 = vec_lvsl (stride, ref); - tmp1 = vec_mergeh (tmp1, tmp1); - perm1 = vec_pack ((vector_u16_t)tmp1, (vector_u16_t)tmp1); - - height = (height >> 1) - 1; - - ref0 = vec_ld (0, ref); - ref1 = vec_ld (7, ref); - ref += stride; - tmp0 = vec_perm (ref0, ref1, perm0); - ref0 = vec_ld (0, ref); - ref1 = vec_ld (7, ref); - ref += stride; - prev = vec_ld (0, dest); - tmp1 = vec_perm (ref0, ref1, perm1); - tmp = vec_avg (prev, vec_avg (tmp0, tmp1)); - - do { - ref0 = vec_ld (0, ref); - ref1 = vec_ld (7, ref); - ref += stride; - prev = vec_ld (stride, dest); - vec_ste ((vector_u32_t)tmp, 0, (unsigned int *)dest); - vec_ste ((vector_u32_t)tmp, 4, (unsigned int *)dest); - dest += stride; - tmp0 = vec_perm (ref0, ref1, perm0); - tmp = vec_avg (prev, vec_avg (tmp0, tmp1)); - - ref0 = vec_ld (0, ref); - ref1 = vec_ld (7, ref); - ref += stride; - prev = vec_ld (stride, dest); - vec_ste ((vector_u32_t)tmp, 0, (unsigned int *)dest); - vec_ste ((vector_u32_t)tmp, 4, (unsigned int *)dest); - dest += stride; - tmp1 = vec_perm (ref0, ref1, perm1); - tmp = vec_avg (prev, vec_avg (tmp0, tmp1)); - } while (--height); - - ref0 = vec_ld (0, ref); - ref1 = vec_ld (7, ref); - prev = vec_ld (stride, dest); - vec_ste ((vector_u32_t)tmp, 0, (unsigned int *)dest); - vec_ste ((vector_u32_t)tmp, 4, (unsigned int *)dest); - dest += stride; - tmp0 = vec_perm (ref0, ref1, perm0); - tmp = vec_avg (prev, vec_avg (tmp0, tmp1)); - vec_ste ((vector_u32_t)tmp, 0, (unsigned int *)dest); - vec_ste ((vector_u32_t)tmp, 4, (unsigned int *)dest); -} - -void MC_avg_xy_16_altivec (unsigned char * dest, unsigned char * ref, - int stride, int height) -{ - vector_u8_t permA, permB, ref0, ref1, A, B, avg0, avg1, xor0, xor1, tmp; - vector_u8_t ones, prev; - - ones = vec_splat_u8 (1); - permA = vec_lvsl (0, ref); - permB = vec_add (permA, ones); - - height = (height >> 1) - 1; - - ref0 = vec_ld (0, ref); - ref1 = vec_ld (16, ref); - ref += stride; - A = vec_perm (ref0, ref1, permA); - B = vec_perm (ref0, ref1, permB); - avg0 = vec_avg (A, B); - xor0 = vec_xor (A, B); - - ref0 = vec_ld (0, ref); - ref1 = vec_ld (16, ref); - ref += stride; - prev = vec_ld (0, dest); - A = vec_perm (ref0, ref1, permA); - B = vec_perm (ref0, ref1, permB); - avg1 = vec_avg (A, B); - xor1 = vec_xor (A, B); - tmp = vec_avg (prev, vec_sub (vec_avg (avg0, avg1), - vec_and (vec_and (ones, vec_or (xor0, xor1)), - vec_xor (avg0, avg1)))); - - - do { - ref0 = vec_ld (0, ref); - ref1 = vec_ld (16, ref); - ref += stride; - prev = vec_ld (stride, dest); - vec_st (tmp, 0, dest); - A = vec_perm (ref0, ref1, permA); - B = vec_perm (ref0, ref1, permB); - avg0 = vec_avg (A, B); - xor0 = vec_xor (A, B); - tmp = vec_avg (prev, - vec_sub (vec_avg (avg0, avg1), - vec_and (vec_and (ones, vec_or (xor0, xor1)), - vec_xor (avg0, avg1)))); - - ref0 = vec_ld (0, ref); - ref1 = vec_ld (16, ref); - ref += stride; - prev = vec_ld (2*stride, dest); - vec_st (tmp, stride, dest); - dest += 2*stride; - A = vec_perm (ref0, ref1, permA); - B = vec_perm (ref0, ref1, permB); - avg1 = vec_avg (A, B); - xor1 = vec_xor (A, B); - tmp = vec_avg (prev, - vec_sub (vec_avg (avg0, avg1), - vec_and (vec_and (ones, vec_or (xor0, xor1)), - vec_xor (avg0, avg1)))); - } while (--height); - - ref0 = vec_ld (0, ref); - ref1 = vec_ld (16, ref); - prev = vec_ld (stride, dest); - vec_st (tmp, 0, dest); - A = vec_perm (ref0, ref1, permA); - B = vec_perm (ref0, ref1, permB); - avg0 = vec_avg (A, B); - xor0 = vec_xor (A, B); - tmp = vec_avg (prev, vec_sub (vec_avg (avg0, avg1), - vec_and (vec_and (ones, vec_or (xor0, xor1)), - vec_xor (avg0, avg1)))); - vec_st (tmp, stride, dest); -} - -void MC_avg_xy_8_altivec (unsigned char * dest, unsigned char * ref, - int stride, int height) -{ - vector_u8_t perm0A, perm0B, perm1A, perm1B, ref0, ref1, A, B; - vector_u8_t avg0, avg1, xor0, xor1, tmp, ones, prev; - - ones = vec_splat_u8 (1); - perm0A = vec_lvsl (0, ref); - perm0A = vec_mergeh (perm0A, perm0A); - perm0A = vec_pack ((vector_u16_t)perm0A, (vector_u16_t)perm0A); - perm0B = vec_add (perm0A, ones); - perm1A = vec_lvsl (stride, ref); - perm1A = vec_mergeh (perm1A, perm1A); - perm1A = vec_pack ((vector_u16_t)perm1A, (vector_u16_t)perm1A); - perm1B = vec_add (perm1A, ones); - - height = (height >> 1) - 1; - - ref0 = vec_ld (0, ref); - ref1 = vec_ld (16, ref); - ref += stride; - A = vec_perm (ref0, ref1, perm0A); - B = vec_perm (ref0, ref1, perm0B); - avg0 = vec_avg (A, B); - xor0 = vec_xor (A, B); - - ref0 = vec_ld (0, ref); - ref1 = vec_ld (16, ref); - ref += stride; - prev = vec_ld (0, dest); - A = vec_perm (ref0, ref1, perm1A); - B = vec_perm (ref0, ref1, perm1B); - avg1 = vec_avg (A, B); - xor1 = vec_xor (A, B); - tmp = vec_avg (prev, vec_sub (vec_avg (avg0, avg1), - vec_and (vec_and (ones, vec_or (xor0, xor1)), - vec_xor (avg0, avg1)))); - - - do { - ref0 = vec_ld (0, ref); - ref1 = vec_ld (16, ref); - ref += stride; - prev = vec_ld (stride, dest); - vec_ste ((vector_u32_t)tmp, 0, (unsigned int *)dest); - vec_ste ((vector_u32_t)tmp, 4, (unsigned int *)dest); - dest += stride; - A = vec_perm (ref0, ref1, perm0A); - B = vec_perm (ref0, ref1, perm0B); - avg0 = vec_avg (A, B); - xor0 = vec_xor (A, B); - tmp = vec_avg (prev, - vec_sub (vec_avg (avg0, avg1), - vec_and (vec_and (ones, vec_or (xor0, xor1)), - vec_xor (avg0, avg1)))); - - ref0 = vec_ld (0, ref); - ref1 = vec_ld (16, ref); - ref += stride; - prev = vec_ld (stride, dest); - vec_ste ((vector_u32_t)tmp, 0, (unsigned int *)dest); - vec_ste ((vector_u32_t)tmp, 4, (unsigned int *)dest); - dest += stride; - A = vec_perm (ref0, ref1, perm1A); - B = vec_perm (ref0, ref1, perm1B); - avg1 = vec_avg (A, B); - xor1 = vec_xor (A, B); - tmp = vec_avg (prev, - vec_sub (vec_avg (avg0, avg1), - vec_and (vec_and (ones, vec_or (xor0, xor1)), - vec_xor (avg0, avg1)))); - } while (--height); - - ref0 = vec_ld (0, ref); - ref1 = vec_ld (16, ref); - prev = vec_ld (stride, dest); - vec_ste ((vector_u32_t)tmp, 0, (unsigned int *)dest); - vec_ste ((vector_u32_t)tmp, 4, (unsigned int *)dest); - dest += stride; - A = vec_perm (ref0, ref1, perm0A); - B = vec_perm (ref0, ref1, perm0B); - avg0 = vec_avg (A, B); - xor0 = vec_xor (A, B); - tmp = vec_avg (prev, vec_sub (vec_avg (avg0, avg1), - vec_and (vec_and (ones, vec_or (xor0, xor1)), - vec_xor (avg0, avg1)))); - vec_ste ((vector_u32_t)tmp, 0, (unsigned int *)dest); - vec_ste ((vector_u32_t)tmp, 4, (unsigned int *)dest); -} - -MPEG2_MC_EXTERN (altivec) - -#endif /* ENABLE_ALTIVEC */ - -#endif /* HOST_OS_DARWIN */ - diff --git a/src/libmpeg2/motion_comp_mlib.c b/src/libmpeg2/motion_comp_mlib.c deleted file mode 100644 index 1a37070ae..000000000 --- a/src/libmpeg2/motion_comp_mlib.c +++ /dev/null @@ -1,181 +0,0 @@ -/* - * motion_comp_mlib.c - * Copyright (C) 2000-2002 HÃ¥kan Hjort - * - * This file is part of mpeg2dec, a free MPEG-2 video stream decoder. - * See http://libmpeg2.sourceforge.net/ for updates. - * - * mpeg2dec is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * mpeg2dec is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA - */ - -#include "config.h" - -#ifdef LIBMPEG2_MLIB - -#include -#include -#include -#include -#include - -#include "mpeg2_internal.h" - -static void MC_put_o_16_mlib (uint8_t * dest, uint8_t * ref, - int stride, int height) -{ - if (height == 16) - mlib_VideoCopyRef_U8_U8_16x16 (dest, ref, stride); - else - mlib_VideoCopyRef_U8_U8_16x8 (dest, ref, stride); -} - -static void MC_put_x_16_mlib (uint8_t * dest, uint8_t * ref, - int stride, int height) -{ - if (height == 16) - mlib_VideoInterpX_U8_U8_16x16 (dest, ref, stride, stride); - else - mlib_VideoInterpX_U8_U8_16x8 (dest, ref, stride, stride); -} - -static void MC_put_y_16_mlib (uint8_t * dest, uint8_t * ref, - int stride, int height) -{ - if (height == 16) - mlib_VideoInterpY_U8_U8_16x16 (dest, ref, stride, stride); - else - mlib_VideoInterpY_U8_U8_16x8 (dest, ref, stride, stride); -} - -static void MC_put_xy_16_mlib (uint8_t * dest, uint8_t * ref, - int stride, int height) -{ - if (height == 16) - mlib_VideoInterpXY_U8_U8_16x16 (dest, ref, stride, stride); - else - mlib_VideoInterpXY_U8_U8_16x8 (dest, ref, stride, stride); -} - -static void MC_put_o_8_mlib (uint8_t * dest, uint8_t * ref, - int stride, int height) -{ - if (height == 8) - mlib_VideoCopyRef_U8_U8_8x8 (dest, ref, stride); - else - mlib_VideoCopyRef_U8_U8_8x4 (dest, ref, stride); -} - -static void MC_put_x_8_mlib (uint8_t * dest, uint8_t * ref, - int stride, int height) -{ - if (height == 8) - mlib_VideoInterpX_U8_U8_8x8 (dest, ref, stride, stride); - else - mlib_VideoInterpX_U8_U8_8x4 (dest, ref, stride, stride); -} - -static void MC_put_y_8_mlib (uint8_t * dest, uint8_t * ref, - int stride, int height) -{ - if (height == 8) - mlib_VideoInterpY_U8_U8_8x8 (dest, ref, stride, stride); - else - mlib_VideoInterpY_U8_U8_8x4 (dest, ref, stride, stride); -} - -static void MC_put_xy_8_mlib (uint8_t * dest, uint8_t * ref, - int stride, int height) -{ - if (height == 8) - mlib_VideoInterpXY_U8_U8_8x8 (dest, ref, stride, stride); - else - mlib_VideoInterpXY_U8_U8_8x4 (dest, ref, stride, stride); -} - -static void MC_avg_o_16_mlib (uint8_t * dest, uint8_t * ref, - int stride, int height) -{ - if (height == 16) - mlib_VideoCopyRefAve_U8_U8_16x16 (dest, ref, stride); - else - mlib_VideoCopyRefAve_U8_U8_16x8 (dest, ref, stride); -} - -static void MC_avg_x_16_mlib (uint8_t * dest, uint8_t * ref, - int stride, int height) -{ - if (height == 16) - mlib_VideoInterpAveX_U8_U8_16x16 (dest, ref, stride, stride); - else - mlib_VideoInterpAveX_U8_U8_16x8 (dest, ref, stride, stride); -} - -static void MC_avg_y_16_mlib (uint8_t * dest, uint8_t * ref, - int stride, int height) -{ - if (height == 16) - mlib_VideoInterpAveY_U8_U8_16x16 (dest, ref, stride, stride); - else - mlib_VideoInterpAveY_U8_U8_16x8 (dest, ref, stride, stride); -} - -static void MC_avg_xy_16_mlib (uint8_t * dest, uint8_t * ref, - int stride, int height) -{ - if (height == 16) - mlib_VideoInterpAveXY_U8_U8_16x16 (dest, ref, stride, stride); - else - mlib_VideoInterpAveXY_U8_U8_16x8 (dest, ref, stride, stride); -} - -static void MC_avg_o_8_mlib (uint8_t * dest, uint8_t * ref, - int stride, int height) -{ - if (height == 8) - mlib_VideoCopyRefAve_U8_U8_8x8 (dest, ref, stride); - else - mlib_VideoCopyRefAve_U8_U8_8x4 (dest, ref, stride); -} - -static void MC_avg_x_8_mlib (uint8_t * dest, uint8_t * ref, - int stride, int height) -{ - if (height == 8) - mlib_VideoInterpAveX_U8_U8_8x8 (dest, ref, stride, stride); - else - mlib_VideoInterpAveX_U8_U8_8x4 (dest, ref, stride, stride); -} - -static void MC_avg_y_8_mlib (uint8_t * dest, uint8_t * ref, - int stride, int height) -{ - if (height == 8) - mlib_VideoInterpAveY_U8_U8_8x8 (dest, ref, stride, stride); - else - mlib_VideoInterpAveY_U8_U8_8x4 (dest, ref, stride, stride); -} - -static void MC_avg_xy_8_mlib (uint8_t * dest, uint8_t * ref, - int stride, int height) -{ - if (height == 8) - mlib_VideoInterpAveXY_U8_U8_8x8 (dest, ref, stride, stride); - else - mlib_VideoInterpAveXY_U8_U8_8x4 (dest, ref, stride, stride); -} - -MPEG2_MC_EXTERN (mlib) - -#endif diff --git a/src/libmpeg2/motion_comp_mmx.c b/src/libmpeg2/motion_comp_mmx.c deleted file mode 100644 index f9b1f085d..000000000 --- a/src/libmpeg2/motion_comp_mmx.c +++ /dev/null @@ -1,1012 +0,0 @@ -/* - * motion_comp_mmx.c - * Copyright (C) 2000-2002 Michel Lespinasse - * Copyright (C) 1999-2000 Aaron Holtzman - * - * This file is part of mpeg2dec, a free MPEG-2 video stream decoder. - * See http://libmpeg2.sourceforge.net/ for updates. - * - * mpeg2dec is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * mpeg2dec is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA - */ - -#include "config.h" - -#if defined(ARCH_X86) || defined(ARCH_X86_64) - -#include - -#include "mpeg2_internal.h" -#include - -#define CPU_MMXEXT 0 -#define CPU_3DNOW 1 - - -/* MMX code - needs a rewrite */ - -/* some rounding constants */ -static mmx_t round1 = {0x0001000100010001LL}; -static mmx_t round4 = {0x0002000200020002LL}; - -/* - * This code should probably be compiled with loop unrolling - * (ie, -funroll-loops in gcc)becuase some of the loops - * use a small static number of iterations. This was written - * with the assumption the compiler knows best about when - * unrolling will help - */ - -static inline void mmx_zero_reg () -{ - /* load 0 into mm0 */ - pxor_r2r (mm0, mm0); -} - -static inline void mmx_average_2_U8 (uint8_t * dest, - uint8_t * src1, uint8_t * src2) -{ - /* *dest = (*src1 + *src2 + 1)/ 2; */ - - movq_m2r (*src1, mm1); // load 8 src1 bytes - movq_r2r (mm1, mm2); // copy 8 src1 bytes - - movq_m2r (*src2, mm3); // load 8 src2 bytes - movq_r2r (mm3, mm4); // copy 8 src2 bytes - - punpcklbw_r2r (mm0, mm1); // unpack low src1 bytes - punpckhbw_r2r (mm0, mm2); // unpack high src1 bytes - - punpcklbw_r2r (mm0, mm3); // unpack low src2 bytes - punpckhbw_r2r (mm0, mm4); // unpack high src2 bytes - - paddw_r2r (mm3, mm1); // add lows to mm1 - paddw_m2r (round1, mm1); - psraw_i2r (1, mm1); // /2 - - paddw_r2r (mm4, mm2); // add highs to mm2 - paddw_m2r (round1, mm2); - psraw_i2r (1, mm2); // /2 - - packuswb_r2r (mm2, mm1); // pack (w/ saturation) - movq_r2m (mm1, *dest); // store result in dest -} - -static inline void mmx_interp_average_2_U8 (uint8_t * dest, - uint8_t * src1, uint8_t * src2) -{ - /* *dest = (*dest + (*src1 + *src2 + 1)/ 2 + 1)/ 2; */ - - movq_m2r (*dest, mm1); // load 8 dest bytes - movq_r2r (mm1, mm2); // copy 8 dest bytes - - movq_m2r (*src1, mm3); // load 8 src1 bytes - movq_r2r (mm3, mm4); // copy 8 src1 bytes - - movq_m2r (*src2, mm5); // load 8 src2 bytes - movq_r2r (mm5, mm6); // copy 8 src2 bytes - - punpcklbw_r2r (mm0, mm1); // unpack low dest bytes - punpckhbw_r2r (mm0, mm2); // unpack high dest bytes - - punpcklbw_r2r (mm0, mm3); // unpack low src1 bytes - punpckhbw_r2r (mm0, mm4); // unpack high src1 bytes - - punpcklbw_r2r (mm0, mm5); // unpack low src2 bytes - punpckhbw_r2r (mm0, mm6); // unpack high src2 bytes - - paddw_r2r (mm5, mm3); // add lows - paddw_m2r (round1, mm3); - psraw_i2r (1, mm3); // /2 - - paddw_r2r (mm6, mm4); // add highs - paddw_m2r (round1, mm4); - psraw_i2r (1, mm4); // /2 - - paddw_r2r (mm3, mm1); // add lows - paddw_m2r (round1, mm1); - psraw_i2r (1, mm1); // /2 - - paddw_r2r (mm4, mm2); // add highs - paddw_m2r (round1, mm2); - psraw_i2r (1, mm2); // /2 - - packuswb_r2r (mm2, mm1); // pack (w/ saturation) - movq_r2m (mm1, *dest); // store result in dest -} - -static inline void mmx_average_4_U8 (uint8_t * dest, - uint8_t * src1, uint8_t * src2, - uint8_t * src3, uint8_t * src4) -{ - /* *dest = (*src1 + *src2 + *src3 + *src4 + 2)/ 4; */ - - movq_m2r (*src1, mm1); // load 8 src1 bytes - movq_r2r (mm1, mm2); // copy 8 src1 bytes - - punpcklbw_r2r (mm0, mm1); // unpack low src1 bytes - punpckhbw_r2r (mm0, mm2); // unpack high src1 bytes - - movq_m2r (*src2, mm3); // load 8 src2 bytes - movq_r2r (mm3, mm4); // copy 8 src2 bytes - - punpcklbw_r2r (mm0, mm3); // unpack low src2 bytes - punpckhbw_r2r (mm0, mm4); // unpack high src2 bytes - - paddw_r2r (mm3, mm1); // add lows - paddw_r2r (mm4, mm2); // add highs - - /* now have partials in mm1 and mm2 */ - - movq_m2r (*src3, mm3); // load 8 src3 bytes - movq_r2r (mm3, mm4); // copy 8 src3 bytes - - punpcklbw_r2r (mm0, mm3); // unpack low src3 bytes - punpckhbw_r2r (mm0, mm4); // unpack high src3 bytes - - paddw_r2r (mm3, mm1); // add lows - paddw_r2r (mm4, mm2); // add highs - - movq_m2r (*src4, mm5); // load 8 src4 bytes - movq_r2r (mm5, mm6); // copy 8 src4 bytes - - punpcklbw_r2r (mm0, mm5); // unpack low src4 bytes - punpckhbw_r2r (mm0, mm6); // unpack high src4 bytes - - paddw_r2r (mm5, mm1); // add lows - paddw_r2r (mm6, mm2); // add highs - - /* now have subtotal in mm1 and mm2 */ - - paddw_m2r (round4, mm1); - psraw_i2r (2, mm1); // /4 - paddw_m2r (round4, mm2); - psraw_i2r (2, mm2); // /4 - - packuswb_r2r (mm2, mm1); // pack (w/ saturation) - movq_r2m (mm1, *dest); // store result in dest -} - -static inline void mmx_interp_average_4_U8 (uint8_t * dest, - uint8_t * src1, uint8_t * src2, - uint8_t * src3, uint8_t * src4) -{ - /* *dest = (*dest + (*src1 + *src2 + *src3 + *src4 + 2)/ 4 + 1)/ 2; */ - - movq_m2r (*src1, mm1); // load 8 src1 bytes - movq_r2r (mm1, mm2); // copy 8 src1 bytes - - punpcklbw_r2r (mm0, mm1); // unpack low src1 bytes - punpckhbw_r2r (mm0, mm2); // unpack high src1 bytes - - movq_m2r (*src2, mm3); // load 8 src2 bytes - movq_r2r (mm3, mm4); // copy 8 src2 bytes - - punpcklbw_r2r (mm0, mm3); // unpack low src2 bytes - punpckhbw_r2r (mm0, mm4); // unpack high src2 bytes - - paddw_r2r (mm3, mm1); // add lows - paddw_r2r (mm4, mm2); // add highs - - /* now have partials in mm1 and mm2 */ - - movq_m2r (*src3, mm3); // load 8 src3 bytes - movq_r2r (mm3, mm4); // copy 8 src3 bytes - - punpcklbw_r2r (mm0, mm3); // unpack low src3 bytes - punpckhbw_r2r (mm0, mm4); // unpack high src3 bytes - - paddw_r2r (mm3, mm1); // add lows - paddw_r2r (mm4, mm2); // add highs - - movq_m2r (*src4, mm5); // load 8 src4 bytes - movq_r2r (mm5, mm6); // copy 8 src4 bytes - - punpcklbw_r2r (mm0, mm5); // unpack low src4 bytes - punpckhbw_r2r (mm0, mm6); // unpack high src4 bytes - - paddw_r2r (mm5, mm1); // add lows - paddw_r2r (mm6, mm2); // add highs - - paddw_m2r (round4, mm1); - psraw_i2r (2, mm1); // /4 - paddw_m2r (round4, mm2); - psraw_i2r (2, mm2); // /4 - - /* now have subtotal/4 in mm1 and mm2 */ - - movq_m2r (*dest, mm3); // load 8 dest bytes - movq_r2r (mm3, mm4); // copy 8 dest bytes - - punpcklbw_r2r (mm0, mm3); // unpack low dest bytes - punpckhbw_r2r (mm0, mm4); // unpack high dest bytes - - paddw_r2r (mm3, mm1); // add lows - paddw_r2r (mm4, mm2); // add highs - - paddw_m2r (round1, mm1); - psraw_i2r (1, mm1); // /2 - paddw_m2r (round1, mm2); - psraw_i2r (1, mm2); // /2 - - /* now have end value in mm1 and mm2 */ - - packuswb_r2r (mm2, mm1); // pack (w/ saturation) - movq_r2m (mm1,*dest); // store result in dest -} - -/*-----------------------------------------------------------------------*/ - -static inline void MC_avg_mmx (int width, int height, - uint8_t * dest, uint8_t * ref, int stride) -{ - mmx_zero_reg (); - - do { - mmx_average_2_U8 (dest, dest, ref); - - if (width == 16) - mmx_average_2_U8 (dest+8, dest+8, ref+8); - - dest += stride; - ref += stride; - } while (--height); -} - -static void MC_avg_o_16_mmx (uint8_t * dest, uint8_t * ref, - int stride, int height) -{ - MC_avg_mmx (16, height, dest, ref, stride); -} - -static void MC_avg_o_8_mmx (uint8_t * dest, uint8_t * ref, - int stride, int height) -{ - MC_avg_mmx (8, height, dest, ref, stride); -} - -/*-----------------------------------------------------------------------*/ - -static inline void MC_put_mmx (int width, int height, - uint8_t * dest, uint8_t * ref, int stride) -{ - mmx_zero_reg (); - - do { - movq_m2r (* ref, mm1); // load 8 ref bytes - movq_r2m (mm1,* dest); // store 8 bytes at curr - - if (width == 16) - { - movq_m2r (* (ref+8), mm1); // load 8 ref bytes - movq_r2m (mm1,* (dest+8)); // store 8 bytes at curr - } - - dest += stride; - ref += stride; - } while (--height); -} - -static void MC_put_o_16_mmx (uint8_t * dest, uint8_t * ref, - int stride, int height) -{ - MC_put_mmx (16, height, dest, ref, stride); -} - -static void MC_put_o_8_mmx (uint8_t * dest, uint8_t * ref, - int stride, int height) -{ - MC_put_mmx (8, height, dest, ref, stride); -} - -/*-----------------------------------------------------------------------*/ - -/* Half pixel interpolation in the x direction */ -static inline void MC_avg_x_mmx (int width, int height, - uint8_t * dest, uint8_t * ref, int stride) -{ - mmx_zero_reg (); - - do { - mmx_interp_average_2_U8 (dest, ref, ref+1); - - if (width == 16) - mmx_interp_average_2_U8 (dest+8, ref+8, ref+9); - - dest += stride; - ref += stride; - } while (--height); -} - -static void MC_avg_x_16_mmx (uint8_t * dest, uint8_t * ref, - int stride, int height) -{ - MC_avg_x_mmx (16, height, dest, ref, stride); -} - -static void MC_avg_x_8_mmx (uint8_t * dest, uint8_t * ref, - int stride, int height) -{ - MC_avg_x_mmx (8, height, dest, ref, stride); -} - -/*-----------------------------------------------------------------------*/ - -static inline void MC_put_x_mmx (int width, int height, - uint8_t * dest, uint8_t * ref, int stride) -{ - mmx_zero_reg (); - - do { - mmx_average_2_U8 (dest, ref, ref+1); - - if (width == 16) - mmx_average_2_U8 (dest+8, ref+8, ref+9); - - dest += stride; - ref += stride; - } while (--height); -} - -static void MC_put_x_16_mmx (uint8_t * dest, uint8_t * ref, - int stride, int height) -{ - MC_put_x_mmx (16, height, dest, ref, stride); -} - -static void MC_put_x_8_mmx (uint8_t * dest, uint8_t * ref, - int stride, int height) -{ - MC_put_x_mmx (8, height, dest, ref, stride); -} - -/*-----------------------------------------------------------------------*/ - -static inline void MC_avg_xy_mmx (int width, int height, - uint8_t * dest, uint8_t * ref, int stride) -{ - uint8_t * ref_next = ref+stride; - - mmx_zero_reg (); - - do { - mmx_interp_average_4_U8 (dest, ref, ref+1, ref_next, ref_next+1); - - if (width == 16) - mmx_interp_average_4_U8 (dest+8, ref+8, ref+9, - ref_next+8, ref_next+9); - - dest += stride; - ref += stride; - ref_next += stride; - } while (--height); -} - -static void MC_avg_xy_16_mmx (uint8_t * dest, uint8_t * ref, - int stride, int height) -{ - MC_avg_xy_mmx (16, height, dest, ref, stride); -} - -static void MC_avg_xy_8_mmx (uint8_t * dest, uint8_t * ref, - int stride, int height) -{ - MC_avg_xy_mmx (8, height, dest, ref, stride); -} - -/*-----------------------------------------------------------------------*/ - -static inline void MC_put_xy_mmx (int width, int height, - uint8_t * dest, uint8_t * ref, int stride) -{ - uint8_t * ref_next = ref+stride; - - mmx_zero_reg (); - - do { - mmx_average_4_U8 (dest, ref, ref+1, ref_next, ref_next+1); - - if (width == 16) - mmx_average_4_U8 (dest+8, ref+8, ref+9, ref_next+8, ref_next+9); - - dest += stride; - ref += stride; - ref_next += stride; - } while (--height); -} - -static void MC_put_xy_16_mmx (uint8_t * dest, uint8_t * ref, - int stride, int height) -{ - MC_put_xy_mmx (16, height, dest, ref, stride); -} - -static void MC_put_xy_8_mmx (uint8_t * dest, uint8_t * ref, - int stride, int height) -{ - MC_put_xy_mmx (8, height, dest, ref, stride); -} - -/*-----------------------------------------------------------------------*/ - -static inline void MC_avg_y_mmx (int width, int height, - uint8_t * dest, uint8_t * ref, int stride) -{ - uint8_t * ref_next = ref+stride; - - mmx_zero_reg (); - - do { - mmx_interp_average_2_U8 (dest, ref, ref_next); - - if (width == 16) - mmx_interp_average_2_U8 (dest+8, ref+8, ref_next+8); - - dest += stride; - ref += stride; - ref_next += stride; - } while (--height); -} - -static void MC_avg_y_16_mmx (uint8_t * dest, uint8_t * ref, - int stride, int height) -{ - MC_avg_y_mmx (16, height, dest, ref, stride); -} - -static void MC_avg_y_8_mmx (uint8_t * dest, uint8_t * ref, - int stride, int height) -{ - MC_avg_y_mmx (8, height, dest, ref, stride); -} - -/*-----------------------------------------------------------------------*/ - -static inline void MC_put_y_mmx (int width, int height, - uint8_t * dest, uint8_t * ref, int stride) -{ - uint8_t * ref_next = ref+stride; - - mmx_zero_reg (); - - do { - mmx_average_2_U8 (dest, ref, ref_next); - - if (width == 16) - mmx_average_2_U8 (dest+8, ref+8, ref_next+8); - - dest += stride; - ref += stride; - ref_next += stride; - } while (--height); -} - -static void MC_put_y_16_mmx (uint8_t * dest, uint8_t * ref, - int stride, int height) -{ - MC_put_y_mmx (16, height, dest, ref, stride); -} - -static void MC_put_y_8_mmx (uint8_t * dest, uint8_t * ref, - int stride, int height) -{ - MC_put_y_mmx (8, height, dest, ref, stride); -} - - -MPEG2_MC_EXTERN (mmx) - - - - - - - -/* CPU_MMXEXT/CPU_3DNOW adaptation layer */ - -#define pavg_r2r(src,dest) \ -do { \ - if (cpu == CPU_MMXEXT) \ - pavgb_r2r (src, dest); \ - else \ - pavgusb_r2r (src, dest); \ -} while (0) - -#define pavg_m2r(src,dest) \ -do { \ - if (cpu == CPU_MMXEXT) \ - pavgb_m2r (src, dest); \ - else \ - pavgusb_m2r (src, dest); \ -} while (0) - - -/* CPU_MMXEXT code */ - - -static inline void MC_put1_8 (int height, uint8_t * dest, uint8_t * ref, - int stride) -{ - do { - movq_m2r (*ref, mm0); - movq_r2m (mm0, *dest); - ref += stride; - dest += stride; - } while (--height); -} - -static inline void MC_put1_16 (int height, uint8_t * dest, uint8_t * ref, - int stride) -{ - do { - movq_m2r (*ref, mm0); - movq_m2r (*(ref+8), mm1); - ref += stride; - movq_r2m (mm0, *dest); - movq_r2m (mm1, *(dest+8)); - dest += stride; - } while (--height); -} - -static inline void MC_avg1_8 (int height, uint8_t * dest, uint8_t * ref, - int stride, int cpu) -{ - do { - movq_m2r (*ref, mm0); - pavg_m2r (*dest, mm0); - ref += stride; - movq_r2m (mm0, *dest); - dest += stride; - } while (--height); -} - -static inline void MC_avg1_16 (int height, uint8_t * dest, uint8_t * ref, - int stride, int cpu) -{ - do { - movq_m2r (*ref, mm0); - movq_m2r (*(ref+8), mm1); - pavg_m2r (*dest, mm0); - pavg_m2r (*(dest+8), mm1); - movq_r2m (mm0, *dest); - ref += stride; - movq_r2m (mm1, *(dest+8)); - dest += stride; - } while (--height); -} - -static inline void MC_put2_8 (int height, uint8_t * dest, uint8_t * ref, - int stride, int offset, int cpu) -{ - do { - movq_m2r (*ref, mm0); - pavg_m2r (*(ref+offset), mm0); - ref += stride; - movq_r2m (mm0, *dest); - dest += stride; - } while (--height); -} - -static inline void MC_put2_16 (int height, uint8_t * dest, uint8_t * ref, - int stride, int offset, int cpu) -{ - do { - movq_m2r (*ref, mm0); - movq_m2r (*(ref+8), mm1); - pavg_m2r (*(ref+offset), mm0); - pavg_m2r (*(ref+offset+8), mm1); - movq_r2m (mm0, *dest); - ref += stride; - movq_r2m (mm1, *(dest+8)); - dest += stride; - } while (--height); -} - -static inline void MC_avg2_8 (int height, uint8_t * dest, uint8_t * ref, - int stride, int offset, int cpu) -{ - do { - movq_m2r (*ref, mm0); - pavg_m2r (*(ref+offset), mm0); - pavg_m2r (*dest, mm0); - ref += stride; - movq_r2m (mm0, *dest); - dest += stride; - } while (--height); -} - -static inline void MC_avg2_16 (int height, uint8_t * dest, uint8_t * ref, - int stride, int offset, int cpu) -{ - do { - movq_m2r (*ref, mm0); - movq_m2r (*(ref+8), mm1); - pavg_m2r (*(ref+offset), mm0); - pavg_m2r (*(ref+offset+8), mm1); - pavg_m2r (*dest, mm0); - pavg_m2r (*(dest+8), mm1); - ref += stride; - movq_r2m (mm0, *dest); - movq_r2m (mm1, *(dest+8)); - dest += stride; - } while (--height); -} - -static mmx_t mask_one = {0x0101010101010101LL}; - -static inline void MC_put4_8 (int height, uint8_t * dest, uint8_t * ref, - int stride, int cpu) -{ - movq_m2r (*ref, mm0); - movq_m2r (*(ref+1), mm1); - movq_r2r (mm0, mm7); - pxor_r2r (mm1, mm7); - pavg_r2r (mm1, mm0); - ref += stride; - - do { - movq_m2r (*ref, mm2); - movq_r2r (mm0, mm5); - - movq_m2r (*(ref+1), mm3); - movq_r2r (mm2, mm6); - - pxor_r2r (mm3, mm6); - pavg_r2r (mm3, mm2); - - por_r2r (mm6, mm7); - pxor_r2r (mm2, mm5); - - pand_r2r (mm5, mm7); - pavg_r2r (mm2, mm0); - - pand_m2r (mask_one, mm7); - - psubusb_r2r (mm7, mm0); - - ref += stride; - movq_r2m (mm0, *dest); - dest += stride; - - movq_r2r (mm6, mm7); // unroll ! - movq_r2r (mm2, mm0); // unroll ! - } while (--height); -} - -static inline void MC_put4_16 (int height, uint8_t * dest, uint8_t * ref, - int stride, int cpu) -{ - do { - movq_m2r (*ref, mm0); - movq_m2r (*(ref+stride+1), mm1); - movq_r2r (mm0, mm7); - movq_m2r (*(ref+1), mm2); - pxor_r2r (mm1, mm7); - movq_m2r (*(ref+stride), mm3); - movq_r2r (mm2, mm6); - pxor_r2r (mm3, mm6); - pavg_r2r (mm1, mm0); - pavg_r2r (mm3, mm2); - por_r2r (mm6, mm7); - movq_r2r (mm0, mm6); - pxor_r2r (mm2, mm6); - pand_r2r (mm6, mm7); - pand_m2r (mask_one, mm7); - pavg_r2r (mm2, mm0); - psubusb_r2r (mm7, mm0); - movq_r2m (mm0, *dest); - - movq_m2r (*(ref+8), mm0); - movq_m2r (*(ref+stride+9), mm1); - movq_r2r (mm0, mm7); - movq_m2r (*(ref+9), mm2); - pxor_r2r (mm1, mm7); - movq_m2r (*(ref+stride+8), mm3); - movq_r2r (mm2, mm6); - pxor_r2r (mm3, mm6); - pavg_r2r (mm1, mm0); - pavg_r2r (mm3, mm2); - por_r2r (mm6, mm7); - movq_r2r (mm0, mm6); - pxor_r2r (mm2, mm6); - pand_r2r (mm6, mm7); - pand_m2r (mask_one, mm7); - pavg_r2r (mm2, mm0); - psubusb_r2r (mm7, mm0); - ref += stride; - movq_r2m (mm0, *(dest+8)); - dest += stride; - } while (--height); -} - -static inline void MC_avg4_8 (int height, uint8_t * dest, uint8_t * ref, - int stride, int cpu) -{ - do { - movq_m2r (*ref, mm0); - movq_m2r (*(ref+stride+1), mm1); - movq_r2r (mm0, mm7); - movq_m2r (*(ref+1), mm2); - pxor_r2r (mm1, mm7); - movq_m2r (*(ref+stride), mm3); - movq_r2r (mm2, mm6); - pxor_r2r (mm3, mm6); - pavg_r2r (mm1, mm0); - pavg_r2r (mm3, mm2); - por_r2r (mm6, mm7); - movq_r2r (mm0, mm6); - pxor_r2r (mm2, mm6); - pand_r2r (mm6, mm7); - pand_m2r (mask_one, mm7); - pavg_r2r (mm2, mm0); - psubusb_r2r (mm7, mm0); - movq_m2r (*dest, mm1); - pavg_r2r (mm1, mm0); - ref += stride; - movq_r2m (mm0, *dest); - dest += stride; - } while (--height); -} - -static inline void MC_avg4_16 (int height, uint8_t * dest, uint8_t * ref, - int stride, int cpu) -{ - do { - movq_m2r (*ref, mm0); - movq_m2r (*(ref+stride+1), mm1); - movq_r2r (mm0, mm7); - movq_m2r (*(ref+1), mm2); - pxor_r2r (mm1, mm7); - movq_m2r (*(ref+stride), mm3); - movq_r2r (mm2, mm6); - pxor_r2r (mm3, mm6); - pavg_r2r (mm1, mm0); - pavg_r2r (mm3, mm2); - por_r2r (mm6, mm7); - movq_r2r (mm0, mm6); - pxor_r2r (mm2, mm6); - pand_r2r (mm6, mm7); - pand_m2r (mask_one, mm7); - pavg_r2r (mm2, mm0); - psubusb_r2r (mm7, mm0); - movq_m2r (*dest, mm1); - pavg_r2r (mm1, mm0); - movq_r2m (mm0, *dest); - - movq_m2r (*(ref+8), mm0); - movq_m2r (*(ref+stride+9), mm1); - movq_r2r (mm0, mm7); - movq_m2r (*(ref+9), mm2); - pxor_r2r (mm1, mm7); - movq_m2r (*(ref+stride+8), mm3); - movq_r2r (mm2, mm6); - pxor_r2r (mm3, mm6); - pavg_r2r (mm1, mm0); - pavg_r2r (mm3, mm2); - por_r2r (mm6, mm7); - movq_r2r (mm0, mm6); - pxor_r2r (mm2, mm6); - pand_r2r (mm6, mm7); - pand_m2r (mask_one, mm7); - pavg_r2r (mm2, mm0); - psubusb_r2r (mm7, mm0); - movq_m2r (*(dest+8), mm1); - pavg_r2r (mm1, mm0); - ref += stride; - movq_r2m (mm0, *(dest+8)); - dest += stride; - } while (--height); -} - -static void MC_avg_o_16_mmxext (uint8_t * dest, uint8_t * ref, - int stride, int height) -{ - MC_avg1_16 (height, dest, ref, stride, CPU_MMXEXT); -} - -static void MC_avg_o_8_mmxext (uint8_t * dest, uint8_t * ref, - int stride, int height) -{ - MC_avg1_8 (height, dest, ref, stride, CPU_MMXEXT); -} - -static void MC_put_o_16_mmxext (uint8_t * dest, uint8_t * ref, - int stride, int height) -{ - MC_put1_16 (height, dest, ref, stride); -} - -static void MC_put_o_8_mmxext (uint8_t * dest, uint8_t * ref, - int stride, int height) -{ - MC_put1_8 (height, dest, ref, stride); -} - -static void MC_avg_x_16_mmxext (uint8_t * dest, uint8_t * ref, - int stride, int height) -{ - MC_avg2_16 (height, dest, ref, stride, 1, CPU_MMXEXT); -} - -static void MC_avg_x_8_mmxext (uint8_t * dest, uint8_t * ref, - int stride, int height) -{ - MC_avg2_8 (height, dest, ref, stride, 1, CPU_MMXEXT); -} - -static void MC_put_x_16_mmxext (uint8_t * dest, uint8_t * ref, - int stride, int height) -{ - MC_put2_16 (height, dest, ref, stride, 1, CPU_MMXEXT); -} - -static void MC_put_x_8_mmxext (uint8_t * dest, uint8_t * ref, - int stride, int height) -{ - MC_put2_8 (height, dest, ref, stride, 1, CPU_MMXEXT); -} - -static void MC_avg_y_16_mmxext (uint8_t * dest, uint8_t * ref, - int stride, int height) -{ - MC_avg2_16 (height, dest, ref, stride, stride, CPU_MMXEXT); -} - -static void MC_avg_y_8_mmxext (uint8_t * dest, uint8_t * ref, - int stride, int height) -{ - MC_avg2_8 (height, dest, ref, stride, stride, CPU_MMXEXT); -} - -static void MC_put_y_16_mmxext (uint8_t * dest, uint8_t * ref, - int stride, int height) -{ - MC_put2_16 (height, dest, ref, stride, stride, CPU_MMXEXT); -} - -static void MC_put_y_8_mmxext (uint8_t * dest, uint8_t * ref, - int stride, int height) -{ - MC_put2_8 (height, dest, ref, stride, stride, CPU_MMXEXT); -} - -static void MC_avg_xy_16_mmxext (uint8_t * dest, uint8_t * ref, - int stride, int height) -{ - MC_avg4_16 (height, dest, ref, stride, CPU_MMXEXT); -} - -static void MC_avg_xy_8_mmxext (uint8_t * dest, uint8_t * ref, - int stride, int height) -{ - MC_avg4_8 (height, dest, ref, stride, CPU_MMXEXT); -} - -static void MC_put_xy_16_mmxext (uint8_t * dest, uint8_t * ref, - int stride, int height) -{ - MC_put4_16 (height, dest, ref, stride, CPU_MMXEXT); -} - -static void MC_put_xy_8_mmxext (uint8_t * dest, uint8_t * ref, - int stride, int height) -{ - MC_put4_8 (height, dest, ref, stride, CPU_MMXEXT); -} - - -MPEG2_MC_EXTERN (mmxext) - - - -static void MC_avg_o_16_3dnow (uint8_t * dest, uint8_t * ref, - int stride, int height) -{ - MC_avg1_16 (height, dest, ref, stride, CPU_3DNOW); -} - -static void MC_avg_o_8_3dnow (uint8_t * dest, uint8_t * ref, - int stride, int height) -{ - MC_avg1_8 (height, dest, ref, stride, CPU_3DNOW); -} - -static void MC_put_o_16_3dnow (uint8_t * dest, uint8_t * ref, - int stride, int height) -{ - MC_put1_16 (height, dest, ref, stride); -} - -static void MC_put_o_8_3dnow (uint8_t * dest, uint8_t * ref, - int stride, int height) -{ - MC_put1_8 (height, dest, ref, stride); -} - -static void MC_avg_x_16_3dnow (uint8_t * dest, uint8_t * ref, - int stride, int height) -{ - MC_avg2_16 (height, dest, ref, stride, 1, CPU_3DNOW); -} - -static void MC_avg_x_8_3dnow (uint8_t * dest, uint8_t * ref, - int stride, int height) -{ - MC_avg2_8 (height, dest, ref, stride, 1, CPU_3DNOW); -} - -static void MC_put_x_16_3dnow (uint8_t * dest, uint8_t * ref, - int stride, int height) -{ - MC_put2_16 (height, dest, ref, stride, 1, CPU_3DNOW); -} - -static void MC_put_x_8_3dnow (uint8_t * dest, uint8_t * ref, - int stride, int height) -{ - MC_put2_8 (height, dest, ref, stride, 1, CPU_3DNOW); -} - -static void MC_avg_y_16_3dnow (uint8_t * dest, uint8_t * ref, - int stride, int height) -{ - MC_avg2_16 (height, dest, ref, stride, stride, CPU_3DNOW); -} - -static void MC_avg_y_8_3dnow (uint8_t * dest, uint8_t * ref, - int stride, int height) -{ - MC_avg2_8 (height, dest, ref, stride, stride, CPU_3DNOW); -} - -static void MC_put_y_16_3dnow (uint8_t * dest, uint8_t * ref, - int stride, int height) -{ - MC_put2_16 (height, dest, ref, stride, stride, CPU_3DNOW); -} - -static void MC_put_y_8_3dnow (uint8_t * dest, uint8_t * ref, - int stride, int height) -{ - MC_put2_8 (height, dest, ref, stride, stride, CPU_3DNOW); -} - -static void MC_avg_xy_16_3dnow (uint8_t * dest, uint8_t * ref, - int stride, int height) -{ - MC_avg4_16 (height, dest, ref, stride, CPU_3DNOW); -} - -static void MC_avg_xy_8_3dnow (uint8_t * dest, uint8_t * ref, - int stride, int height) -{ - MC_avg4_8 (height, dest, ref, stride, CPU_3DNOW); -} - -static void MC_put_xy_16_3dnow (uint8_t * dest, uint8_t * ref, - int stride, int height) -{ - MC_put4_16 (height, dest, ref, stride, CPU_3DNOW); -} - -static void MC_put_xy_8_3dnow (uint8_t * dest, uint8_t * ref, - int stride, int height) -{ - MC_put4_8 (height, dest, ref, stride, CPU_3DNOW); -} - - -MPEG2_MC_EXTERN (3dnow) - -#endif diff --git a/src/libmpeg2/motion_comp_vis.c b/src/libmpeg2/motion_comp_vis.c deleted file mode 100644 index d0a6673d6..000000000 --- a/src/libmpeg2/motion_comp_vis.c +++ /dev/null @@ -1,2059 +0,0 @@ -/* - * motion_comp_vis.c - * Copyright (C) 2003 David S. Miller - * - * This file is part of mpeg2dec, a free MPEG-2 video stream decoder. - * See http://libmpeg2.sourceforge.net/ for updates. - * - * mpeg2dec is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * mpeg2dec is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA - */ - -#include "config.h" - -#if defined(ARCH_SPARC) && defined(ENABLE_VIS) - -#include - -#include "mpeg2_internal.h" -#include "vis.h" - -/* The trick used in some of this file is the formula from the MMX - * motion comp code, which is: - * - * (x+y+1)>>1 == (x|y)-((x^y)>>1) - * - * This allows us to average 8 bytes at a time in a 64-bit FPU reg. - * We avoid overflows by masking before we do the shift, and we - * implement the shift by multiplying by 1/2 using mul8x16. So in - * VIS this is (assume 'x' is in f0, 'y' is in f2, a repeating mask - * of '0xfe' is in f4, a repeating mask of '0x7f' is in f6, and - * the value 0x80808080 is in f8): - * - * fxor f0, f2, f10 - * fand f10, f4, f10 - * fmul8x16 f8, f10, f10 - * fand f10, f6, f10 - * for f0, f2, f12 - * fpsub16 f12, f10, f10 - */ - -#define DUP4(x) {x, x, x, x} -#define DUP8(x) {x, x, x, x, x, x, x, x} -static const int16_t constants1[] ATTR_ALIGN(8) = DUP4 (1); -static const int16_t constants2[] ATTR_ALIGN(8) = DUP4 (2); -static const int16_t constants3[] ATTR_ALIGN(8) = DUP4 (3); -static const int16_t constants6[] ATTR_ALIGN(8) = DUP4 (6); -static const int8_t constants_fe[] ATTR_ALIGN(8) = DUP8 (0xfe); -static const int8_t constants_7f[] ATTR_ALIGN(8) = DUP8 (0x7f); -static const int8_t constants128[] ATTR_ALIGN(8) = DUP8 (128); -static const int16_t constants256_512[] ATTR_ALIGN(8) = - {256, 512, 256, 512}; -static const int16_t constants256_1024[] ATTR_ALIGN(8) = - {256, 1024, 256, 1024}; - -#define REF_0 0 -#define REF_0_1 1 -#define REF_2 2 -#define REF_2_1 3 -#define REF_4 4 -#define REF_4_1 5 -#define REF_6 6 -#define REF_6_1 7 -#define REF_S0 8 -#define REF_S0_1 9 -#define REF_S2 10 -#define REF_S2_1 11 -#define REF_S4 12 -#define REF_S4_1 13 -#define REF_S6 14 -#define REF_S6_1 15 -#define DST_0 16 -#define DST_1 17 -#define DST_2 18 -#define DST_3 19 -#define CONST_1 20 -#define CONST_2 20 -#define CONST_3 20 -#define CONST_6 20 -#define MASK_fe 20 -#define CONST_128 22 -#define CONST_256 22 -#define CONST_512 22 -#define CONST_1024 22 -#define TMP0 24 -#define TMP1 25 -#define TMP2 26 -#define TMP3 27 -#define TMP4 28 -#define TMP5 29 -#define ZERO 30 -#define MASK_7f 30 - -#define TMP6 32 -#define TMP8 34 -#define TMP10 36 -#define TMP12 38 -#define TMP14 40 -#define TMP16 42 -#define TMP18 44 -#define TMP20 46 -#define TMP22 48 -#define TMP24 50 -#define TMP26 52 -#define TMP28 54 -#define TMP30 56 -#define TMP32 58 - -static void MC_put_o_16_vis (uint8_t * dest, uint8_t * _ref, - int stride, int height) -{ - uint8_t *ref = (uint8_t *) _ref; - int offset; - - ref = vis_alignaddr(ref); - offset = (ref != _ref) ? 16 : 0; - do { /* 5 cycles */ - vis_ld64(ref[0], TMP0); - - vis_ld64_2(ref, 8, TMP2); - - vis_ld64_2(ref, offset, TMP4); - ref += stride; - - vis_faligndata(TMP0, TMP2, REF_0); - vis_st64(REF_0, dest[0]); - - vis_faligndata(TMP2, TMP4, REF_2); - vis_st64_2(REF_2, dest, 8); - dest += stride; - } while (--height); -} - -static void MC_put_o_8_vis (uint8_t * dest, uint8_t * _ref, - int stride, int height) -{ - uint8_t *ref = (uint8_t *) _ref; - int offset; - - ref = vis_alignaddr(ref); - offset = (ref != _ref) ? 8 : 0; - do { /* 4 cycles */ - vis_ld64(ref[0], TMP0); - - vis_ld64_2(ref, offset, TMP2); - ref += stride; - - /* stall */ - - vis_faligndata(TMP0, TMP2, REF_0); - vis_st64(REF_0, dest[0]); - dest += stride; - } while (--height); -} - - -static void MC_avg_o_16_vis (uint8_t * dest, uint8_t * _ref, - int stride, int height) -{ - uint8_t *ref = (uint8_t *) _ref; - int stride_8 = stride + 8; - int offset; - - ref = vis_alignaddr(ref); - offset = (ref != _ref) ? 16 : 0; - - vis_ld64(ref[0], TMP0); - - vis_ld64(ref[8], TMP2); - - vis_ld64_2(ref, offset, TMP4); - - vis_ld64(dest[0], DST_0); - - vis_ld64(dest[8], DST_2); - - vis_ld64(constants_fe[0], MASK_fe); - vis_faligndata(TMP0, TMP2, REF_0); - - vis_ld64(constants_7f[0], MASK_7f); - vis_faligndata(TMP2, TMP4, REF_2); - - vis_ld64(constants128[0], CONST_128); - - ref += stride; - height = (height >> 1) - 1; - - do { /* 24 cycles */ - vis_ld64(ref[0], TMP0); - vis_xor(DST_0, REF_0, TMP6); - - vis_ld64_2(ref, 8, TMP2); - vis_and(TMP6, MASK_fe, TMP6); - - vis_ld64_2(ref, offset, TMP4); - ref += stride; - vis_mul8x16(CONST_128, TMP6, TMP6); - vis_xor(DST_2, REF_2, TMP8); - - vis_and(TMP8, MASK_fe, TMP8); - - vis_or(DST_0, REF_0, TMP10); - vis_ld64_2(dest, stride, DST_0); - vis_mul8x16(CONST_128, TMP8, TMP8); - - vis_or(DST_2, REF_2, TMP12); - vis_ld64_2(dest, stride_8, DST_2); - - vis_ld64(ref[0], TMP14); - vis_and(TMP6, MASK_7f, TMP6); - - vis_and(TMP8, MASK_7f, TMP8); - - vis_psub16(TMP10, TMP6, TMP6); - vis_st64(TMP6, dest[0]); - - vis_psub16(TMP12, TMP8, TMP8); - vis_st64_2(TMP8, dest, 8); - - dest += stride; - vis_ld64_2(ref, 8, TMP16); - vis_faligndata(TMP0, TMP2, REF_0); - - vis_ld64_2(ref, offset, TMP18); - vis_faligndata(TMP2, TMP4, REF_2); - ref += stride; - - vis_xor(DST_0, REF_0, TMP20); - - vis_and(TMP20, MASK_fe, TMP20); - - vis_xor(DST_2, REF_2, TMP22); - vis_mul8x16(CONST_128, TMP20, TMP20); - - vis_and(TMP22, MASK_fe, TMP22); - - vis_or(DST_0, REF_0, TMP24); - vis_mul8x16(CONST_128, TMP22, TMP22); - - vis_or(DST_2, REF_2, TMP26); - - vis_ld64_2(dest, stride, DST_0); - vis_faligndata(TMP14, TMP16, REF_0); - - vis_ld64_2(dest, stride_8, DST_2); - vis_faligndata(TMP16, TMP18, REF_2); - - vis_and(TMP20, MASK_7f, TMP20); - - vis_and(TMP22, MASK_7f, TMP22); - - vis_psub16(TMP24, TMP20, TMP20); - vis_st64(TMP20, dest[0]); - - vis_psub16(TMP26, TMP22, TMP22); - vis_st64_2(TMP22, dest, 8); - dest += stride; - } while (--height); - - vis_ld64(ref[0], TMP0); - vis_xor(DST_0, REF_0, TMP6); - - vis_ld64_2(ref, 8, TMP2); - vis_and(TMP6, MASK_fe, TMP6); - - vis_ld64_2(ref, offset, TMP4); - vis_mul8x16(CONST_128, TMP6, TMP6); - vis_xor(DST_2, REF_2, TMP8); - - vis_and(TMP8, MASK_fe, TMP8); - - vis_or(DST_0, REF_0, TMP10); - vis_ld64_2(dest, stride, DST_0); - vis_mul8x16(CONST_128, TMP8, TMP8); - - vis_or(DST_2, REF_2, TMP12); - vis_ld64_2(dest, stride_8, DST_2); - - vis_ld64(ref[0], TMP14); - vis_and(TMP6, MASK_7f, TMP6); - - vis_and(TMP8, MASK_7f, TMP8); - - vis_psub16(TMP10, TMP6, TMP6); - vis_st64(TMP6, dest[0]); - - vis_psub16(TMP12, TMP8, TMP8); - vis_st64_2(TMP8, dest, 8); - - dest += stride; - vis_faligndata(TMP0, TMP2, REF_0); - - vis_faligndata(TMP2, TMP4, REF_2); - - vis_xor(DST_0, REF_0, TMP20); - - vis_and(TMP20, MASK_fe, TMP20); - - vis_xor(DST_2, REF_2, TMP22); - vis_mul8x16(CONST_128, TMP20, TMP20); - - vis_and(TMP22, MASK_fe, TMP22); - - vis_or(DST_0, REF_0, TMP24); - vis_mul8x16(CONST_128, TMP22, TMP22); - - vis_or(DST_2, REF_2, TMP26); - - vis_and(TMP20, MASK_7f, TMP20); - - vis_and(TMP22, MASK_7f, TMP22); - - vis_psub16(TMP24, TMP20, TMP20); - vis_st64(TMP20, dest[0]); - - vis_psub16(TMP26, TMP22, TMP22); - vis_st64_2(TMP22, dest, 8); -} - -static void MC_avg_o_8_vis (uint8_t * dest, uint8_t * _ref, - int stride, int height) -{ - uint8_t *ref = (uint8_t *) _ref; - int offset; - - ref = vis_alignaddr(ref); - offset = (ref != _ref) ? 8 : 0; - - vis_ld64(ref[0], TMP0); - - vis_ld64_2(ref, offset, TMP2); - - vis_ld64(dest[0], DST_0); - - vis_ld64(constants_fe[0], MASK_fe); - - vis_ld64(constants_7f[0], MASK_7f); - vis_faligndata(TMP0, TMP2, REF_0); - - vis_ld64(constants128[0], CONST_128); - - ref += stride; - height = (height >> 1) - 1; - - do { /* 12 cycles */ - vis_ld64(ref[0], TMP0); - vis_xor(DST_0, REF_0, TMP4); - - vis_ld64_2(ref, offset, TMP2); - vis_and(TMP4, MASK_fe, TMP4); - - vis_or(DST_0, REF_0, TMP6); - vis_ld64_2(dest, stride, DST_0); - ref += stride; - vis_mul8x16(CONST_128, TMP4, TMP4); - - vis_ld64(ref[0], TMP12); - vis_faligndata(TMP0, TMP2, REF_0); - - vis_ld64_2(ref, offset, TMP2); - vis_xor(DST_0, REF_0, TMP0); - ref += stride; - - vis_and(TMP0, MASK_fe, TMP0); - - vis_and(TMP4, MASK_7f, TMP4); - - vis_psub16(TMP6, TMP4, TMP4); - vis_st64(TMP4, dest[0]); - dest += stride; - vis_mul8x16(CONST_128, TMP0, TMP0); - - vis_or(DST_0, REF_0, TMP6); - vis_ld64_2(dest, stride, DST_0); - - vis_faligndata(TMP12, TMP2, REF_0); - - vis_and(TMP0, MASK_7f, TMP0); - - vis_psub16(TMP6, TMP0, TMP4); - vis_st64(TMP4, dest[0]); - dest += stride; - } while (--height); - - vis_ld64(ref[0], TMP0); - vis_xor(DST_0, REF_0, TMP4); - - vis_ld64_2(ref, offset, TMP2); - vis_and(TMP4, MASK_fe, TMP4); - - vis_or(DST_0, REF_0, TMP6); - vis_ld64_2(dest, stride, DST_0); - vis_mul8x16(CONST_128, TMP4, TMP4); - - vis_faligndata(TMP0, TMP2, REF_0); - - vis_xor(DST_0, REF_0, TMP0); - - vis_and(TMP0, MASK_fe, TMP0); - - vis_and(TMP4, MASK_7f, TMP4); - - vis_psub16(TMP6, TMP4, TMP4); - vis_st64(TMP4, dest[0]); - dest += stride; - vis_mul8x16(CONST_128, TMP0, TMP0); - - vis_or(DST_0, REF_0, TMP6); - - vis_and(TMP0, MASK_7f, TMP0); - - vis_psub16(TMP6, TMP0, TMP4); - vis_st64(TMP4, dest[0]); -} - -static void MC_put_x_16_vis (uint8_t * dest, uint8_t * _ref, - int stride, int height) -{ - uint8_t *ref = (uint8_t *) _ref; - unsigned long off = (unsigned long) ref & 0x7; - unsigned long off_plus_1 = off + 1; - - ref = vis_alignaddr(ref); - - vis_ld64(ref[0], TMP0); - - vis_ld64_2(ref, 8, TMP2); - - vis_ld64_2(ref, 16, TMP4); - - vis_ld64(constants_fe[0], MASK_fe); - - vis_ld64(constants_7f[0], MASK_7f); - vis_faligndata(TMP0, TMP2, REF_0); - - vis_ld64(constants128[0], CONST_128); - vis_faligndata(TMP2, TMP4, REF_4); - - if (off != 0x7) { - vis_alignaddr_g0((void *)off_plus_1); - vis_faligndata(TMP0, TMP2, REF_2); - vis_faligndata(TMP2, TMP4, REF_6); - } else { - vis_src1(TMP2, REF_2); - vis_src1(TMP4, REF_6); - } - - ref += stride; - height = (height >> 1) - 1; - - do { /* 34 cycles */ - vis_ld64(ref[0], TMP0); - vis_xor(REF_0, REF_2, TMP6); - - vis_ld64_2(ref, 8, TMP2); - vis_xor(REF_4, REF_6, TMP8); - - vis_ld64_2(ref, 16, TMP4); - vis_and(TMP6, MASK_fe, TMP6); - ref += stride; - - vis_ld64(ref[0], TMP14); - vis_mul8x16(CONST_128, TMP6, TMP6); - vis_and(TMP8, MASK_fe, TMP8); - - vis_ld64_2(ref, 8, TMP16); - vis_mul8x16(CONST_128, TMP8, TMP8); - vis_or(REF_0, REF_2, TMP10); - - vis_ld64_2(ref, 16, TMP18); - ref += stride; - vis_or(REF_4, REF_6, TMP12); - - vis_alignaddr_g0((void *)off); - - vis_faligndata(TMP0, TMP2, REF_0); - - vis_faligndata(TMP2, TMP4, REF_4); - - if (off != 0x7) { - vis_alignaddr_g0((void *)off_plus_1); - vis_faligndata(TMP0, TMP2, REF_2); - vis_faligndata(TMP2, TMP4, REF_6); - } else { - vis_src1(TMP2, REF_2); - vis_src1(TMP4, REF_6); - } - - vis_and(TMP6, MASK_7f, TMP6); - - vis_and(TMP8, MASK_7f, TMP8); - - vis_psub16(TMP10, TMP6, TMP6); - vis_st64(TMP6, dest[0]); - - vis_psub16(TMP12, TMP8, TMP8); - vis_st64_2(TMP8, dest, 8); - dest += stride; - - vis_xor(REF_0, REF_2, TMP6); - - vis_xor(REF_4, REF_6, TMP8); - - vis_and(TMP6, MASK_fe, TMP6); - - vis_mul8x16(CONST_128, TMP6, TMP6); - vis_and(TMP8, MASK_fe, TMP8); - - vis_mul8x16(CONST_128, TMP8, TMP8); - vis_or(REF_0, REF_2, TMP10); - - vis_or(REF_4, REF_6, TMP12); - - vis_alignaddr_g0((void *)off); - - vis_faligndata(TMP14, TMP16, REF_0); - - vis_faligndata(TMP16, TMP18, REF_4); - - if (off != 0x7) { - vis_alignaddr_g0((void *)off_plus_1); - vis_faligndata(TMP14, TMP16, REF_2); - vis_faligndata(TMP16, TMP18, REF_6); - } else { - vis_src1(TMP16, REF_2); - vis_src1(TMP18, REF_6); - } - - vis_and(TMP6, MASK_7f, TMP6); - - vis_and(TMP8, MASK_7f, TMP8); - - vis_psub16(TMP10, TMP6, TMP6); - vis_st64(TMP6, dest[0]); - - vis_psub16(TMP12, TMP8, TMP8); - vis_st64_2(TMP8, dest, 8); - dest += stride; - } while (--height); - - vis_ld64(ref[0], TMP0); - vis_xor(REF_0, REF_2, TMP6); - - vis_ld64_2(ref, 8, TMP2); - vis_xor(REF_4, REF_6, TMP8); - - vis_ld64_2(ref, 16, TMP4); - vis_and(TMP6, MASK_fe, TMP6); - - vis_mul8x16(CONST_128, TMP6, TMP6); - vis_and(TMP8, MASK_fe, TMP8); - - vis_mul8x16(CONST_128, TMP8, TMP8); - vis_or(REF_0, REF_2, TMP10); - - vis_or(REF_4, REF_6, TMP12); - - vis_alignaddr_g0((void *)off); - - vis_faligndata(TMP0, TMP2, REF_0); - - vis_faligndata(TMP2, TMP4, REF_4); - - if (off != 0x7) { - vis_alignaddr_g0((void *)off_plus_1); - vis_faligndata(TMP0, TMP2, REF_2); - vis_faligndata(TMP2, TMP4, REF_6); - } else { - vis_src1(TMP2, REF_2); - vis_src1(TMP4, REF_6); - } - - vis_and(TMP6, MASK_7f, TMP6); - - vis_and(TMP8, MASK_7f, TMP8); - - vis_psub16(TMP10, TMP6, TMP6); - vis_st64(TMP6, dest[0]); - - vis_psub16(TMP12, TMP8, TMP8); - vis_st64_2(TMP8, dest, 8); - dest += stride; - - vis_xor(REF_0, REF_2, TMP6); - - vis_xor(REF_4, REF_6, TMP8); - - vis_and(TMP6, MASK_fe, TMP6); - - vis_mul8x16(CONST_128, TMP6, TMP6); - vis_and(TMP8, MASK_fe, TMP8); - - vis_mul8x16(CONST_128, TMP8, TMP8); - vis_or(REF_0, REF_2, TMP10); - - vis_or(REF_4, REF_6, TMP12); - - vis_and(TMP6, MASK_7f, TMP6); - - vis_and(TMP8, MASK_7f, TMP8); - - vis_psub16(TMP10, TMP6, TMP6); - vis_st64(TMP6, dest[0]); - - vis_psub16(TMP12, TMP8, TMP8); - vis_st64_2(TMP8, dest, 8); -} - -static void MC_put_x_8_vis (uint8_t * dest, uint8_t * _ref, - int stride, int height) -{ - uint8_t *ref = (uint8_t *) _ref; - unsigned long off = (unsigned long) ref & 0x7; - unsigned long off_plus_1 = off + 1; - - ref = vis_alignaddr(ref); - - vis_ld64(ref[0], TMP0); - - vis_ld64(ref[8], TMP2); - - vis_ld64(constants_fe[0], MASK_fe); - - vis_ld64(constants_7f[0], MASK_7f); - - vis_ld64(constants128[0], CONST_128); - vis_faligndata(TMP0, TMP2, REF_0); - - if (off != 0x7) { - vis_alignaddr_g0((void *)off_plus_1); - vis_faligndata(TMP0, TMP2, REF_2); - } else { - vis_src1(TMP2, REF_2); - } - - ref += stride; - height = (height >> 1) - 1; - - do { /* 20 cycles */ - vis_ld64(ref[0], TMP0); - vis_xor(REF_0, REF_2, TMP4); - - vis_ld64_2(ref, 8, TMP2); - vis_and(TMP4, MASK_fe, TMP4); - ref += stride; - - vis_ld64(ref[0], TMP8); - vis_or(REF_0, REF_2, TMP6); - vis_mul8x16(CONST_128, TMP4, TMP4); - - vis_alignaddr_g0((void *)off); - - vis_ld64_2(ref, 8, TMP10); - ref += stride; - vis_faligndata(TMP0, TMP2, REF_0); - - if (off != 0x7) { - vis_alignaddr_g0((void *)off_plus_1); - vis_faligndata(TMP0, TMP2, REF_2); - } else { - vis_src1(TMP2, REF_2); - } - - vis_and(TMP4, MASK_7f, TMP4); - - vis_psub16(TMP6, TMP4, DST_0); - vis_st64(DST_0, dest[0]); - dest += stride; - - vis_xor(REF_0, REF_2, TMP12); - - vis_and(TMP12, MASK_fe, TMP12); - - vis_or(REF_0, REF_2, TMP14); - vis_mul8x16(CONST_128, TMP12, TMP12); - - vis_alignaddr_g0((void *)off); - vis_faligndata(TMP8, TMP10, REF_0); - if (off != 0x7) { - vis_alignaddr_g0((void *)off_plus_1); - vis_faligndata(TMP8, TMP10, REF_2); - } else { - vis_src1(TMP10, REF_2); - } - - vis_and(TMP12, MASK_7f, TMP12); - - vis_psub16(TMP14, TMP12, DST_0); - vis_st64(DST_0, dest[0]); - dest += stride; - } while (--height); - - vis_ld64(ref[0], TMP0); - vis_xor(REF_0, REF_2, TMP4); - - vis_ld64_2(ref, 8, TMP2); - vis_and(TMP4, MASK_fe, TMP4); - - vis_or(REF_0, REF_2, TMP6); - vis_mul8x16(CONST_128, TMP4, TMP4); - - vis_alignaddr_g0((void *)off); - - vis_faligndata(TMP0, TMP2, REF_0); - - if (off != 0x7) { - vis_alignaddr_g0((void *)off_plus_1); - vis_faligndata(TMP0, TMP2, REF_2); - } else { - vis_src1(TMP2, REF_2); - } - - vis_and(TMP4, MASK_7f, TMP4); - - vis_psub16(TMP6, TMP4, DST_0); - vis_st64(DST_0, dest[0]); - dest += stride; - - vis_xor(REF_0, REF_2, TMP12); - - vis_and(TMP12, MASK_fe, TMP12); - - vis_or(REF_0, REF_2, TMP14); - vis_mul8x16(CONST_128, TMP12, TMP12); - - vis_and(TMP12, MASK_7f, TMP12); - - vis_psub16(TMP14, TMP12, DST_0); - vis_st64(DST_0, dest[0]); - dest += stride; -} - -static void MC_avg_x_16_vis (uint8_t * dest, uint8_t * _ref, - int stride, int height) -{ - uint8_t *ref = (uint8_t *) _ref; - unsigned long off = (unsigned long) ref & 0x7; - unsigned long off_plus_1 = off + 1; - - vis_set_gsr(5 << VIS_GSR_SCALEFACT_SHIFT); - - vis_ld64(constants3[0], CONST_3); - vis_fzero(ZERO); - vis_ld64(constants256_512[0], CONST_256); - - ref = vis_alignaddr(ref); - do { /* 26 cycles */ - vis_ld64(ref[0], TMP0); - - vis_ld64(ref[8], TMP2); - - vis_alignaddr_g0((void *)off); - - vis_ld64(ref[16], TMP4); - - vis_ld64(dest[0], DST_0); - vis_faligndata(TMP0, TMP2, REF_0); - - vis_ld64(dest[8], DST_2); - vis_faligndata(TMP2, TMP4, REF_4); - - if (off != 0x7) { - vis_alignaddr_g0((void *)off_plus_1); - vis_faligndata(TMP0, TMP2, REF_2); - vis_faligndata(TMP2, TMP4, REF_6); - } else { - vis_src1(TMP2, REF_2); - vis_src1(TMP4, REF_6); - } - - vis_mul8x16au(REF_0, CONST_256, TMP0); - - vis_pmerge(ZERO, REF_2, TMP4); - vis_mul8x16au(REF_0_1, CONST_256, TMP2); - - vis_pmerge(ZERO, REF_2_1, TMP6); - - vis_padd16(TMP0, TMP4, TMP0); - - vis_mul8x16al(DST_0, CONST_512, TMP4); - vis_padd16(TMP2, TMP6, TMP2); - - vis_mul8x16al(DST_1, CONST_512, TMP6); - - vis_mul8x16au(REF_6, CONST_256, TMP12); - - vis_padd16(TMP0, TMP4, TMP0); - vis_mul8x16au(REF_6_1, CONST_256, TMP14); - - vis_padd16(TMP2, TMP6, TMP2); - vis_mul8x16au(REF_4, CONST_256, TMP16); - - vis_padd16(TMP0, CONST_3, TMP8); - vis_mul8x16au(REF_4_1, CONST_256, TMP18); - - vis_padd16(TMP2, CONST_3, TMP10); - vis_pack16(TMP8, DST_0); - - vis_pack16(TMP10, DST_1); - vis_padd16(TMP16, TMP12, TMP0); - - vis_st64(DST_0, dest[0]); - vis_mul8x16al(DST_2, CONST_512, TMP4); - vis_padd16(TMP18, TMP14, TMP2); - - vis_mul8x16al(DST_3, CONST_512, TMP6); - vis_padd16(TMP0, CONST_3, TMP0); - - vis_padd16(TMP2, CONST_3, TMP2); - - vis_padd16(TMP0, TMP4, TMP0); - - vis_padd16(TMP2, TMP6, TMP2); - vis_pack16(TMP0, DST_2); - - vis_pack16(TMP2, DST_3); - vis_st64(DST_2, dest[8]); - - ref += stride; - dest += stride; - } while (--height); -} - -static void MC_avg_x_8_vis (uint8_t * dest, uint8_t * _ref, - int stride, int height) -{ - uint8_t *ref = (uint8_t *) _ref; - unsigned long off = (unsigned long) ref & 0x7; - unsigned long off_plus_1 = off + 1; - int stride_times_2 = stride << 1; - - vis_set_gsr(5 << VIS_GSR_SCALEFACT_SHIFT); - - vis_ld64(constants3[0], CONST_3); - vis_fzero(ZERO); - vis_ld64(constants256_512[0], CONST_256); - - ref = vis_alignaddr(ref); - height >>= 2; - do { /* 47 cycles */ - vis_ld64(ref[0], TMP0); - - vis_ld64_2(ref, 8, TMP2); - ref += stride; - - vis_alignaddr_g0((void *)off); - - vis_ld64(ref[0], TMP4); - vis_faligndata(TMP0, TMP2, REF_0); - - vis_ld64_2(ref, 8, TMP6); - ref += stride; - - vis_ld64(ref[0], TMP8); - - vis_ld64_2(ref, 8, TMP10); - ref += stride; - vis_faligndata(TMP4, TMP6, REF_4); - - vis_ld64(ref[0], TMP12); - - vis_ld64_2(ref, 8, TMP14); - ref += stride; - vis_faligndata(TMP8, TMP10, REF_S0); - - vis_faligndata(TMP12, TMP14, REF_S4); - - if (off != 0x7) { - vis_alignaddr_g0((void *)off_plus_1); - - vis_ld64(dest[0], DST_0); - vis_faligndata(TMP0, TMP2, REF_2); - - vis_ld64_2(dest, stride, DST_2); - vis_faligndata(TMP4, TMP6, REF_6); - - vis_faligndata(TMP8, TMP10, REF_S2); - - vis_faligndata(TMP12, TMP14, REF_S6); - } else { - vis_ld64(dest[0], DST_0); - vis_src1(TMP2, REF_2); - - vis_ld64_2(dest, stride, DST_2); - vis_src1(TMP6, REF_6); - - vis_src1(TMP10, REF_S2); - - vis_src1(TMP14, REF_S6); - } - - vis_pmerge(ZERO, REF_0, TMP0); - vis_mul8x16au(REF_0_1, CONST_256, TMP2); - - vis_pmerge(ZERO, REF_2, TMP4); - vis_mul8x16au(REF_2_1, CONST_256, TMP6); - - vis_padd16(TMP0, CONST_3, TMP0); - vis_mul8x16al(DST_0, CONST_512, TMP16); - - vis_padd16(TMP2, CONST_3, TMP2); - vis_mul8x16al(DST_1, CONST_512, TMP18); - - vis_padd16(TMP0, TMP4, TMP0); - vis_mul8x16au(REF_4, CONST_256, TMP8); - - vis_padd16(TMP2, TMP6, TMP2); - vis_mul8x16au(REF_4_1, CONST_256, TMP10); - - vis_padd16(TMP0, TMP16, TMP0); - vis_mul8x16au(REF_6, CONST_256, TMP12); - - vis_padd16(TMP2, TMP18, TMP2); - vis_mul8x16au(REF_6_1, CONST_256, TMP14); - - vis_padd16(TMP8, CONST_3, TMP8); - vis_mul8x16al(DST_2, CONST_512, TMP16); - - vis_padd16(TMP8, TMP12, TMP8); - vis_mul8x16al(DST_3, CONST_512, TMP18); - - vis_padd16(TMP10, TMP14, TMP10); - vis_pack16(TMP0, DST_0); - - vis_pack16(TMP2, DST_1); - vis_st64(DST_0, dest[0]); - dest += stride; - vis_padd16(TMP10, CONST_3, TMP10); - - vis_ld64_2(dest, stride, DST_0); - vis_padd16(TMP8, TMP16, TMP8); - - vis_ld64_2(dest, stride_times_2, TMP4/*DST_2*/); - vis_padd16(TMP10, TMP18, TMP10); - vis_pack16(TMP8, DST_2); - - vis_pack16(TMP10, DST_3); - vis_st64(DST_2, dest[0]); - dest += stride; - - vis_mul8x16au(REF_S0_1, CONST_256, TMP2); - vis_pmerge(ZERO, REF_S0, TMP0); - - vis_pmerge(ZERO, REF_S2, TMP24); - vis_mul8x16au(REF_S2_1, CONST_256, TMP6); - - vis_padd16(TMP0, CONST_3, TMP0); - vis_mul8x16au(REF_S4, CONST_256, TMP8); - - vis_padd16(TMP2, CONST_3, TMP2); - vis_mul8x16au(REF_S4_1, CONST_256, TMP10); - - vis_padd16(TMP0, TMP24, TMP0); - vis_mul8x16au(REF_S6, CONST_256, TMP12); - - vis_padd16(TMP2, TMP6, TMP2); - vis_mul8x16au(REF_S6_1, CONST_256, TMP14); - - vis_padd16(TMP8, CONST_3, TMP8); - vis_mul8x16al(DST_0, CONST_512, TMP16); - - vis_padd16(TMP10, CONST_3, TMP10); - vis_mul8x16al(DST_1, CONST_512, TMP18); - - vis_padd16(TMP8, TMP12, TMP8); - vis_mul8x16al(TMP4/*DST_2*/, CONST_512, TMP20); - - vis_mul8x16al(TMP5/*DST_3*/, CONST_512, TMP22); - vis_padd16(TMP0, TMP16, TMP0); - - vis_padd16(TMP2, TMP18, TMP2); - vis_pack16(TMP0, DST_0); - - vis_padd16(TMP10, TMP14, TMP10); - vis_pack16(TMP2, DST_1); - vis_st64(DST_0, dest[0]); - dest += stride; - - vis_padd16(TMP8, TMP20, TMP8); - - vis_padd16(TMP10, TMP22, TMP10); - vis_pack16(TMP8, DST_2); - - vis_pack16(TMP10, DST_3); - vis_st64(DST_2, dest[0]); - dest += stride; - } while (--height); -} - -static void MC_put_y_16_vis (uint8_t * dest, uint8_t * _ref, - int stride, int height) -{ - uint8_t *ref = (uint8_t *) _ref; - int offset; - - ref = vis_alignaddr(ref); - offset = (ref != _ref) ? 16 : 0; - - vis_ld64(ref[0], TMP0); - - vis_ld64_2(ref, 8, TMP2); - - vis_ld64_2(ref, offset, TMP4); - ref += stride; - - vis_ld64(ref[0], TMP6); - vis_faligndata(TMP0, TMP2, REF_0); - - vis_ld64_2(ref, 8, TMP8); - vis_faligndata(TMP2, TMP4, REF_4); - - vis_ld64_2(ref, offset, TMP10); - ref += stride; - - vis_ld64(constants_fe[0], MASK_fe); - vis_faligndata(TMP6, TMP8, REF_2); - - vis_ld64(constants_7f[0], MASK_7f); - vis_faligndata(TMP8, TMP10, REF_6); - - vis_ld64(constants128[0], CONST_128); - height = (height >> 1) - 1; - do { /* 24 cycles */ - vis_ld64(ref[0], TMP0); - vis_xor(REF_0, REF_2, TMP12); - - vis_ld64_2(ref, 8, TMP2); - vis_xor(REF_4, REF_6, TMP16); - - vis_ld64_2(ref, offset, TMP4); - ref += stride; - vis_or(REF_0, REF_2, TMP14); - - vis_ld64(ref[0], TMP6); - vis_or(REF_4, REF_6, TMP18); - - vis_ld64_2(ref, 8, TMP8); - vis_faligndata(TMP0, TMP2, REF_0); - - vis_ld64_2(ref, offset, TMP10); - ref += stride; - vis_faligndata(TMP2, TMP4, REF_4); - - vis_and(TMP12, MASK_fe, TMP12); - - vis_and(TMP16, MASK_fe, TMP16); - vis_mul8x16(CONST_128, TMP12, TMP12); - - vis_mul8x16(CONST_128, TMP16, TMP16); - vis_xor(REF_0, REF_2, TMP0); - - vis_xor(REF_4, REF_6, TMP2); - - vis_or(REF_0, REF_2, TMP20); - - vis_and(TMP12, MASK_7f, TMP12); - - vis_and(TMP16, MASK_7f, TMP16); - - vis_psub16(TMP14, TMP12, TMP12); - vis_st64(TMP12, dest[0]); - - vis_psub16(TMP18, TMP16, TMP16); - vis_st64_2(TMP16, dest, 8); - dest += stride; - - vis_or(REF_4, REF_6, TMP18); - - vis_and(TMP0, MASK_fe, TMP0); - - vis_and(TMP2, MASK_fe, TMP2); - vis_mul8x16(CONST_128, TMP0, TMP0); - - vis_faligndata(TMP6, TMP8, REF_2); - vis_mul8x16(CONST_128, TMP2, TMP2); - - vis_faligndata(TMP8, TMP10, REF_6); - - vis_and(TMP0, MASK_7f, TMP0); - - vis_and(TMP2, MASK_7f, TMP2); - - vis_psub16(TMP20, TMP0, TMP0); - vis_st64(TMP0, dest[0]); - - vis_psub16(TMP18, TMP2, TMP2); - vis_st64_2(TMP2, dest, 8); - dest += stride; - } while (--height); - - vis_ld64(ref[0], TMP0); - vis_xor(REF_0, REF_2, TMP12); - - vis_ld64_2(ref, 8, TMP2); - vis_xor(REF_4, REF_6, TMP16); - - vis_ld64_2(ref, offset, TMP4); - vis_or(REF_0, REF_2, TMP14); - - vis_or(REF_4, REF_6, TMP18); - - vis_faligndata(TMP0, TMP2, REF_0); - - vis_faligndata(TMP2, TMP4, REF_4); - - vis_and(TMP12, MASK_fe, TMP12); - - vis_and(TMP16, MASK_fe, TMP16); - vis_mul8x16(CONST_128, TMP12, TMP12); - - vis_mul8x16(CONST_128, TMP16, TMP16); - vis_xor(REF_0, REF_2, TMP0); - - vis_xor(REF_4, REF_6, TMP2); - - vis_or(REF_0, REF_2, TMP20); - - vis_and(TMP12, MASK_7f, TMP12); - - vis_and(TMP16, MASK_7f, TMP16); - - vis_psub16(TMP14, TMP12, TMP12); - vis_st64(TMP12, dest[0]); - - vis_psub16(TMP18, TMP16, TMP16); - vis_st64_2(TMP16, dest, 8); - dest += stride; - - vis_or(REF_4, REF_6, TMP18); - - vis_and(TMP0, MASK_fe, TMP0); - - vis_and(TMP2, MASK_fe, TMP2); - vis_mul8x16(CONST_128, TMP0, TMP0); - - vis_mul8x16(CONST_128, TMP2, TMP2); - - vis_and(TMP0, MASK_7f, TMP0); - - vis_and(TMP2, MASK_7f, TMP2); - - vis_psub16(TMP20, TMP0, TMP0); - vis_st64(TMP0, dest[0]); - - vis_psub16(TMP18, TMP2, TMP2); - vis_st64_2(TMP2, dest, 8); -} - -static void MC_put_y_8_vis (uint8_t * dest, uint8_t * _ref, - int stride, int height) -{ - uint8_t *ref = (uint8_t *) _ref; - int offset; - - ref = vis_alignaddr(ref); - offset = (ref != _ref) ? 8 : 0; - - vis_ld64(ref[0], TMP0); - - vis_ld64_2(ref, offset, TMP2); - ref += stride; - - vis_ld64(ref[0], TMP4); - - vis_ld64_2(ref, offset, TMP6); - ref += stride; - - vis_ld64(constants_fe[0], MASK_fe); - vis_faligndata(TMP0, TMP2, REF_0); - - vis_ld64(constants_7f[0], MASK_7f); - vis_faligndata(TMP4, TMP6, REF_2); - - vis_ld64(constants128[0], CONST_128); - height = (height >> 1) - 1; - do { /* 12 cycles */ - vis_ld64(ref[0], TMP0); - vis_xor(REF_0, REF_2, TMP4); - - vis_ld64_2(ref, offset, TMP2); - ref += stride; - vis_and(TMP4, MASK_fe, TMP4); - - vis_or(REF_0, REF_2, TMP6); - vis_mul8x16(CONST_128, TMP4, TMP4); - - vis_faligndata(TMP0, TMP2, REF_0); - vis_ld64(ref[0], TMP0); - - vis_ld64_2(ref, offset, TMP2); - ref += stride; - vis_xor(REF_0, REF_2, TMP12); - - vis_and(TMP4, MASK_7f, TMP4); - - vis_and(TMP12, MASK_fe, TMP12); - - vis_mul8x16(CONST_128, TMP12, TMP12); - vis_or(REF_0, REF_2, TMP14); - - vis_psub16(TMP6, TMP4, DST_0); - vis_st64(DST_0, dest[0]); - dest += stride; - - vis_faligndata(TMP0, TMP2, REF_2); - - vis_and(TMP12, MASK_7f, TMP12); - - vis_psub16(TMP14, TMP12, DST_0); - vis_st64(DST_0, dest[0]); - dest += stride; - } while (--height); - - vis_ld64(ref[0], TMP0); - vis_xor(REF_0, REF_2, TMP4); - - vis_ld64_2(ref, offset, TMP2); - vis_and(TMP4, MASK_fe, TMP4); - - vis_or(REF_0, REF_2, TMP6); - vis_mul8x16(CONST_128, TMP4, TMP4); - - vis_faligndata(TMP0, TMP2, REF_0); - - vis_xor(REF_0, REF_2, TMP12); - - vis_and(TMP4, MASK_7f, TMP4); - - vis_and(TMP12, MASK_fe, TMP12); - - vis_mul8x16(CONST_128, TMP12, TMP12); - vis_or(REF_0, REF_2, TMP14); - - vis_psub16(TMP6, TMP4, DST_0); - vis_st64(DST_0, dest[0]); - dest += stride; - - vis_and(TMP12, MASK_7f, TMP12); - - vis_psub16(TMP14, TMP12, DST_0); - vis_st64(DST_0, dest[0]); -} - -static void MC_avg_y_16_vis (uint8_t * dest, uint8_t * _ref, - int stride, int height) -{ - uint8_t *ref = (uint8_t *) _ref; - int stride_8 = stride + 8; - int stride_16; - int offset; - - vis_set_gsr(5 << VIS_GSR_SCALEFACT_SHIFT); - - ref = vis_alignaddr(ref); - offset = (ref != _ref) ? 16 : 0; - - vis_ld64(ref[ 0], TMP0); - vis_fzero(ZERO); - - vis_ld64(ref[ 8], TMP2); - - vis_ld64_2(ref, offset, TMP4); - stride_16 = stride + offset; - - vis_ld64(constants3[0], CONST_3); - vis_faligndata(TMP0, TMP2, REF_2); - - vis_ld64(constants256_512[0], CONST_256); - vis_faligndata(TMP2, TMP4, REF_6); - height >>= 1; - - do { /* 31 cycles */ - vis_ld64_2(ref, stride, TMP0); - vis_pmerge(ZERO, REF_2, TMP12); - vis_mul8x16au(REF_2_1, CONST_256, TMP14); - - vis_ld64_2(ref, stride_8, TMP2); - vis_pmerge(ZERO, REF_6, TMP16); - vis_mul8x16au(REF_6_1, CONST_256, TMP18); - - vis_ld64_2(ref, stride_16, TMP4); - ref += stride; - - vis_ld64(dest[0], DST_0); - vis_faligndata(TMP0, TMP2, REF_0); - - vis_ld64_2(dest, 8, DST_2); - vis_faligndata(TMP2, TMP4, REF_4); - - vis_ld64_2(ref, stride, TMP6); - vis_pmerge(ZERO, REF_0, TMP0); - vis_mul8x16au(REF_0_1, CONST_256, TMP2); - - vis_ld64_2(ref, stride_8, TMP8); - vis_pmerge(ZERO, REF_4, TMP4); - - vis_ld64_2(ref, stride_16, TMP10); - ref += stride; - - vis_ld64_2(dest, stride, REF_S0/*DST_4*/); - vis_faligndata(TMP6, TMP8, REF_2); - vis_mul8x16au(REF_4_1, CONST_256, TMP6); - - vis_ld64_2(dest, stride_8, REF_S2/*DST_6*/); - vis_faligndata(TMP8, TMP10, REF_6); - vis_mul8x16al(DST_0, CONST_512, TMP20); - - vis_padd16(TMP0, CONST_3, TMP0); - vis_mul8x16al(DST_1, CONST_512, TMP22); - - vis_padd16(TMP2, CONST_3, TMP2); - vis_mul8x16al(DST_2, CONST_512, TMP24); - - vis_padd16(TMP4, CONST_3, TMP4); - vis_mul8x16al(DST_3, CONST_512, TMP26); - - vis_padd16(TMP6, CONST_3, TMP6); - - vis_padd16(TMP12, TMP20, TMP12); - vis_mul8x16al(REF_S0, CONST_512, TMP20); - - vis_padd16(TMP14, TMP22, TMP14); - vis_mul8x16al(REF_S0_1, CONST_512, TMP22); - - vis_padd16(TMP16, TMP24, TMP16); - vis_mul8x16al(REF_S2, CONST_512, TMP24); - - vis_padd16(TMP18, TMP26, TMP18); - vis_mul8x16al(REF_S2_1, CONST_512, TMP26); - - vis_padd16(TMP12, TMP0, TMP12); - vis_mul8x16au(REF_2, CONST_256, TMP28); - - vis_padd16(TMP14, TMP2, TMP14); - vis_mul8x16au(REF_2_1, CONST_256, TMP30); - - vis_padd16(TMP16, TMP4, TMP16); - vis_mul8x16au(REF_6, CONST_256, REF_S4); - - vis_padd16(TMP18, TMP6, TMP18); - vis_mul8x16au(REF_6_1, CONST_256, REF_S6); - - vis_pack16(TMP12, DST_0); - vis_padd16(TMP28, TMP0, TMP12); - - vis_pack16(TMP14, DST_1); - vis_st64(DST_0, dest[0]); - vis_padd16(TMP30, TMP2, TMP14); - - vis_pack16(TMP16, DST_2); - vis_padd16(REF_S4, TMP4, TMP16); - - vis_pack16(TMP18, DST_3); - vis_st64_2(DST_2, dest, 8); - dest += stride; - vis_padd16(REF_S6, TMP6, TMP18); - - vis_padd16(TMP12, TMP20, TMP12); - - vis_padd16(TMP14, TMP22, TMP14); - vis_pack16(TMP12, DST_0); - - vis_padd16(TMP16, TMP24, TMP16); - vis_pack16(TMP14, DST_1); - vis_st64(DST_0, dest[0]); - - vis_padd16(TMP18, TMP26, TMP18); - vis_pack16(TMP16, DST_2); - - vis_pack16(TMP18, DST_3); - vis_st64_2(DST_2, dest, 8); - dest += stride; - } while (--height); -} - -static void MC_avg_y_8_vis (uint8_t * dest, uint8_t * _ref, - int stride, int height) -{ - uint8_t *ref = (uint8_t *) _ref; - int stride_8; - int offset; - - vis_set_gsr(5 << VIS_GSR_SCALEFACT_SHIFT); - - ref = vis_alignaddr(ref); - offset = (ref != _ref) ? 8 : 0; - - vis_ld64(ref[ 0], TMP0); - vis_fzero(ZERO); - - vis_ld64_2(ref, offset, TMP2); - stride_8 = stride + offset; - - vis_ld64(constants3[0], CONST_3); - vis_faligndata(TMP0, TMP2, REF_2); - - vis_ld64(constants256_512[0], CONST_256); - - height >>= 1; - do { /* 20 cycles */ - vis_ld64_2(ref, stride, TMP0); - vis_pmerge(ZERO, REF_2, TMP8); - vis_mul8x16au(REF_2_1, CONST_256, TMP10); - - vis_ld64_2(ref, stride_8, TMP2); - ref += stride; - - vis_ld64(dest[0], DST_0); - - vis_ld64_2(dest, stride, DST_2); - vis_faligndata(TMP0, TMP2, REF_0); - - vis_ld64_2(ref, stride, TMP4); - vis_mul8x16al(DST_0, CONST_512, TMP16); - vis_pmerge(ZERO, REF_0, TMP12); - - vis_ld64_2(ref, stride_8, TMP6); - ref += stride; - vis_mul8x16al(DST_1, CONST_512, TMP18); - vis_pmerge(ZERO, REF_0_1, TMP14); - - vis_padd16(TMP12, CONST_3, TMP12); - vis_mul8x16al(DST_2, CONST_512, TMP24); - - vis_padd16(TMP14, CONST_3, TMP14); - vis_mul8x16al(DST_3, CONST_512, TMP26); - - vis_faligndata(TMP4, TMP6, REF_2); - - vis_padd16(TMP8, TMP12, TMP8); - - vis_padd16(TMP10, TMP14, TMP10); - vis_mul8x16au(REF_2, CONST_256, TMP20); - - vis_padd16(TMP8, TMP16, TMP0); - vis_mul8x16au(REF_2_1, CONST_256, TMP22); - - vis_padd16(TMP10, TMP18, TMP2); - vis_pack16(TMP0, DST_0); - - vis_pack16(TMP2, DST_1); - vis_st64(DST_0, dest[0]); - dest += stride; - vis_padd16(TMP12, TMP20, TMP12); - - vis_padd16(TMP14, TMP22, TMP14); - - vis_padd16(TMP12, TMP24, TMP0); - - vis_padd16(TMP14, TMP26, TMP2); - vis_pack16(TMP0, DST_2); - - vis_pack16(TMP2, DST_3); - vis_st64(DST_2, dest[0]); - dest += stride; - } while (--height); -} - -static void MC_put_xy_16_vis (uint8_t * dest, uint8_t * _ref, - int stride, int height) -{ - uint8_t *ref = (uint8_t *) _ref; - unsigned long off = (unsigned long) ref & 0x7; - unsigned long off_plus_1 = off + 1; - int stride_8 = stride + 8; - int stride_16 = stride + 16; - - vis_set_gsr(5 << VIS_GSR_SCALEFACT_SHIFT); - - ref = vis_alignaddr(ref); - - vis_ld64(ref[ 0], TMP0); - vis_fzero(ZERO); - - vis_ld64(ref[ 8], TMP2); - - vis_ld64(ref[16], TMP4); - - vis_ld64(constants2[0], CONST_2); - vis_faligndata(TMP0, TMP2, REF_S0); - - vis_ld64(constants256_512[0], CONST_256); - vis_faligndata(TMP2, TMP4, REF_S4); - - if (off != 0x7) { - vis_alignaddr_g0((void *)off_plus_1); - vis_faligndata(TMP0, TMP2, REF_S2); - vis_faligndata(TMP2, TMP4, REF_S6); - } else { - vis_src1(TMP2, REF_S2); - vis_src1(TMP4, REF_S6); - } - - height >>= 1; - do { - vis_ld64_2(ref, stride, TMP0); - vis_mul8x16au(REF_S0, CONST_256, TMP12); - vis_pmerge(ZERO, REF_S0_1, TMP14); - - vis_alignaddr_g0((void *)off); - - vis_ld64_2(ref, stride_8, TMP2); - vis_mul8x16au(REF_S2, CONST_256, TMP16); - vis_pmerge(ZERO, REF_S2_1, TMP18); - - vis_ld64_2(ref, stride_16, TMP4); - ref += stride; - vis_mul8x16au(REF_S4, CONST_256, TMP20); - vis_pmerge(ZERO, REF_S4_1, TMP22); - - vis_ld64_2(ref, stride, TMP6); - vis_mul8x16au(REF_S6, CONST_256, TMP24); - vis_pmerge(ZERO, REF_S6_1, TMP26); - - vis_ld64_2(ref, stride_8, TMP8); - vis_faligndata(TMP0, TMP2, REF_0); - - vis_ld64_2(ref, stride_16, TMP10); - ref += stride; - vis_faligndata(TMP2, TMP4, REF_4); - - vis_faligndata(TMP6, TMP8, REF_S0); - - vis_faligndata(TMP8, TMP10, REF_S4); - - if (off != 0x7) { - vis_alignaddr_g0((void *)off_plus_1); - vis_faligndata(TMP0, TMP2, REF_2); - vis_faligndata(TMP2, TMP4, REF_6); - vis_faligndata(TMP6, TMP8, REF_S2); - vis_faligndata(TMP8, TMP10, REF_S6); - } else { - vis_src1(TMP2, REF_2); - vis_src1(TMP4, REF_6); - vis_src1(TMP8, REF_S2); - vis_src1(TMP10, REF_S6); - } - - vis_mul8x16au(REF_0, CONST_256, TMP0); - vis_pmerge(ZERO, REF_0_1, TMP2); - - vis_mul8x16au(REF_2, CONST_256, TMP4); - vis_pmerge(ZERO, REF_2_1, TMP6); - - vis_padd16(TMP0, CONST_2, TMP8); - vis_mul8x16au(REF_4, CONST_256, TMP0); - - vis_padd16(TMP2, CONST_2, TMP10); - vis_mul8x16au(REF_4_1, CONST_256, TMP2); - - vis_padd16(TMP8, TMP4, TMP8); - vis_mul8x16au(REF_6, CONST_256, TMP4); - - vis_padd16(TMP10, TMP6, TMP10); - vis_mul8x16au(REF_6_1, CONST_256, TMP6); - - vis_padd16(TMP12, TMP8, TMP12); - - vis_padd16(TMP14, TMP10, TMP14); - - vis_padd16(TMP12, TMP16, TMP12); - - vis_padd16(TMP14, TMP18, TMP14); - vis_pack16(TMP12, DST_0); - - vis_pack16(TMP14, DST_1); - vis_st64(DST_0, dest[0]); - vis_padd16(TMP0, CONST_2, TMP12); - - vis_mul8x16au(REF_S0, CONST_256, TMP0); - vis_padd16(TMP2, CONST_2, TMP14); - - vis_mul8x16au(REF_S0_1, CONST_256, TMP2); - vis_padd16(TMP12, TMP4, TMP12); - - vis_mul8x16au(REF_S2, CONST_256, TMP4); - vis_padd16(TMP14, TMP6, TMP14); - - vis_mul8x16au(REF_S2_1, CONST_256, TMP6); - vis_padd16(TMP20, TMP12, TMP20); - - vis_padd16(TMP22, TMP14, TMP22); - - vis_padd16(TMP20, TMP24, TMP20); - - vis_padd16(TMP22, TMP26, TMP22); - vis_pack16(TMP20, DST_2); - - vis_pack16(TMP22, DST_3); - vis_st64_2(DST_2, dest, 8); - dest += stride; - vis_padd16(TMP0, TMP4, TMP24); - - vis_mul8x16au(REF_S4, CONST_256, TMP0); - vis_padd16(TMP2, TMP6, TMP26); - - vis_mul8x16au(REF_S4_1, CONST_256, TMP2); - vis_padd16(TMP24, TMP8, TMP24); - - vis_padd16(TMP26, TMP10, TMP26); - vis_pack16(TMP24, DST_0); - - vis_pack16(TMP26, DST_1); - vis_st64(DST_0, dest[0]); - vis_pmerge(ZERO, REF_S6, TMP4); - - vis_pmerge(ZERO, REF_S6_1, TMP6); - - vis_padd16(TMP0, TMP4, TMP0); - - vis_padd16(TMP2, TMP6, TMP2); - - vis_padd16(TMP0, TMP12, TMP0); - - vis_padd16(TMP2, TMP14, TMP2); - vis_pack16(TMP0, DST_2); - - vis_pack16(TMP2, DST_3); - vis_st64_2(DST_2, dest, 8); - dest += stride; - } while (--height); -} - -static void MC_put_xy_8_vis (uint8_t * dest, uint8_t * _ref, - int stride, int height) -{ - uint8_t *ref = (uint8_t *) _ref; - unsigned long off = (unsigned long) ref & 0x7; - unsigned long off_plus_1 = off + 1; - int stride_8 = stride + 8; - - vis_set_gsr(5 << VIS_GSR_SCALEFACT_SHIFT); - - ref = vis_alignaddr(ref); - - vis_ld64(ref[ 0], TMP0); - vis_fzero(ZERO); - - vis_ld64(ref[ 8], TMP2); - - vis_ld64(constants2[0], CONST_2); - - vis_ld64(constants256_512[0], CONST_256); - vis_faligndata(TMP0, TMP2, REF_S0); - - if (off != 0x7) { - vis_alignaddr_g0((void *)off_plus_1); - vis_faligndata(TMP0, TMP2, REF_S2); - } else { - vis_src1(TMP2, REF_S2); - } - - height >>= 1; - do { /* 26 cycles */ - vis_ld64_2(ref, stride, TMP0); - vis_mul8x16au(REF_S0, CONST_256, TMP8); - vis_pmerge(ZERO, REF_S2, TMP12); - - vis_alignaddr_g0((void *)off); - - vis_ld64_2(ref, stride_8, TMP2); - ref += stride; - vis_mul8x16au(REF_S0_1, CONST_256, TMP10); - vis_pmerge(ZERO, REF_S2_1, TMP14); - - vis_ld64_2(ref, stride, TMP4); - - vis_ld64_2(ref, stride_8, TMP6); - ref += stride; - vis_faligndata(TMP0, TMP2, REF_S4); - - vis_pmerge(ZERO, REF_S4, TMP18); - - vis_pmerge(ZERO, REF_S4_1, TMP20); - - vis_faligndata(TMP4, TMP6, REF_S0); - - if (off != 0x7) { - vis_alignaddr_g0((void *)off_plus_1); - vis_faligndata(TMP0, TMP2, REF_S6); - vis_faligndata(TMP4, TMP6, REF_S2); - } else { - vis_src1(TMP2, REF_S6); - vis_src1(TMP6, REF_S2); - } - - vis_padd16(TMP18, CONST_2, TMP18); - vis_mul8x16au(REF_S6, CONST_256, TMP22); - - vis_padd16(TMP20, CONST_2, TMP20); - vis_mul8x16au(REF_S6_1, CONST_256, TMP24); - - vis_mul8x16au(REF_S0, CONST_256, TMP26); - vis_pmerge(ZERO, REF_S0_1, TMP28); - - vis_mul8x16au(REF_S2, CONST_256, TMP30); - vis_padd16(TMP18, TMP22, TMP18); - - vis_mul8x16au(REF_S2_1, CONST_256, TMP32); - vis_padd16(TMP20, TMP24, TMP20); - - vis_padd16(TMP8, TMP18, TMP8); - - vis_padd16(TMP10, TMP20, TMP10); - - vis_padd16(TMP8, TMP12, TMP8); - - vis_padd16(TMP10, TMP14, TMP10); - vis_pack16(TMP8, DST_0); - - vis_pack16(TMP10, DST_1); - vis_st64(DST_0, dest[0]); - dest += stride; - vis_padd16(TMP18, TMP26, TMP18); - - vis_padd16(TMP20, TMP28, TMP20); - - vis_padd16(TMP18, TMP30, TMP18); - - vis_padd16(TMP20, TMP32, TMP20); - vis_pack16(TMP18, DST_2); - - vis_pack16(TMP20, DST_3); - vis_st64(DST_2, dest[0]); - dest += stride; - } while (--height); -} - -static void MC_avg_xy_16_vis (uint8_t * dest, uint8_t * _ref, - int stride, int height) -{ - uint8_t *ref = (uint8_t *) _ref; - unsigned long off = (unsigned long) ref & 0x7; - unsigned long off_plus_1 = off + 1; - int stride_8 = stride + 8; - int stride_16 = stride + 16; - - vis_set_gsr(4 << VIS_GSR_SCALEFACT_SHIFT); - - ref = vis_alignaddr(ref); - - vis_ld64(ref[ 0], TMP0); - vis_fzero(ZERO); - - vis_ld64(ref[ 8], TMP2); - - vis_ld64(ref[16], TMP4); - - vis_ld64(constants6[0], CONST_6); - vis_faligndata(TMP0, TMP2, REF_S0); - - vis_ld64(constants256_1024[0], CONST_256); - vis_faligndata(TMP2, TMP4, REF_S4); - - if (off != 0x7) { - vis_alignaddr_g0((void *)off_plus_1); - vis_faligndata(TMP0, TMP2, REF_S2); - vis_faligndata(TMP2, TMP4, REF_S6); - } else { - vis_src1(TMP2, REF_S2); - vis_src1(TMP4, REF_S6); - } - - height >>= 1; - do { /* 55 cycles */ - vis_ld64_2(ref, stride, TMP0); - vis_mul8x16au(REF_S0, CONST_256, TMP12); - vis_pmerge(ZERO, REF_S0_1, TMP14); - - vis_alignaddr_g0((void *)off); - - vis_ld64_2(ref, stride_8, TMP2); - vis_mul8x16au(REF_S2, CONST_256, TMP16); - vis_pmerge(ZERO, REF_S2_1, TMP18); - - vis_ld64_2(ref, stride_16, TMP4); - ref += stride; - vis_mul8x16au(REF_S4, CONST_256, TMP20); - vis_pmerge(ZERO, REF_S4_1, TMP22); - - vis_ld64_2(ref, stride, TMP6); - vis_mul8x16au(REF_S6, CONST_256, TMP24); - vis_pmerge(ZERO, REF_S6_1, TMP26); - - vis_ld64_2(ref, stride_8, TMP8); - vis_faligndata(TMP0, TMP2, REF_0); - - vis_ld64_2(ref, stride_16, TMP10); - ref += stride; - vis_faligndata(TMP2, TMP4, REF_4); - - vis_ld64(dest[0], DST_0); - vis_faligndata(TMP6, TMP8, REF_S0); - - vis_ld64_2(dest, 8, DST_2); - vis_faligndata(TMP8, TMP10, REF_S4); - - if (off != 0x7) { - vis_alignaddr_g0((void *)off_plus_1); - vis_faligndata(TMP0, TMP2, REF_2); - vis_faligndata(TMP2, TMP4, REF_6); - vis_faligndata(TMP6, TMP8, REF_S2); - vis_faligndata(TMP8, TMP10, REF_S6); - } else { - vis_src1(TMP2, REF_2); - vis_src1(TMP4, REF_6); - vis_src1(TMP8, REF_S2); - vis_src1(TMP10, REF_S6); - } - - vis_mul8x16al(DST_0, CONST_1024, TMP30); - vis_pmerge(ZERO, REF_0, TMP0); - - vis_mul8x16al(DST_1, CONST_1024, TMP32); - vis_pmerge(ZERO, REF_0_1, TMP2); - - vis_mul8x16au(REF_2, CONST_256, TMP4); - vis_pmerge(ZERO, REF_2_1, TMP6); - - vis_mul8x16al(DST_2, CONST_1024, REF_0); - vis_padd16(TMP0, CONST_6, TMP0); - - vis_mul8x16al(DST_3, CONST_1024, REF_2); - vis_padd16(TMP2, CONST_6, TMP2); - - vis_padd16(TMP0, TMP4, TMP0); - vis_mul8x16au(REF_4, CONST_256, TMP4); - - vis_padd16(TMP2, TMP6, TMP2); - vis_mul8x16au(REF_4_1, CONST_256, TMP6); - - vis_padd16(TMP12, TMP0, TMP12); - vis_mul8x16au(REF_6, CONST_256, TMP8); - - vis_padd16(TMP14, TMP2, TMP14); - vis_mul8x16au(REF_6_1, CONST_256, TMP10); - - vis_padd16(TMP12, TMP16, TMP12); - vis_mul8x16au(REF_S0, CONST_256, REF_4); - - vis_padd16(TMP14, TMP18, TMP14); - vis_mul8x16au(REF_S0_1, CONST_256, REF_6); - - vis_padd16(TMP12, TMP30, TMP12); - - vis_padd16(TMP14, TMP32, TMP14); - vis_pack16(TMP12, DST_0); - - vis_pack16(TMP14, DST_1); - vis_st64(DST_0, dest[0]); - vis_padd16(TMP4, CONST_6, TMP4); - - vis_ld64_2(dest, stride, DST_0); - vis_padd16(TMP6, CONST_6, TMP6); - vis_mul8x16au(REF_S2, CONST_256, TMP12); - - vis_padd16(TMP4, TMP8, TMP4); - vis_mul8x16au(REF_S2_1, CONST_256, TMP14); - - vis_padd16(TMP6, TMP10, TMP6); - - vis_padd16(TMP20, TMP4, TMP20); - - vis_padd16(TMP22, TMP6, TMP22); - - vis_padd16(TMP20, TMP24, TMP20); - - vis_padd16(TMP22, TMP26, TMP22); - - vis_padd16(TMP20, REF_0, TMP20); - vis_mul8x16au(REF_S4, CONST_256, REF_0); - - vis_padd16(TMP22, REF_2, TMP22); - vis_pack16(TMP20, DST_2); - - vis_pack16(TMP22, DST_3); - vis_st64_2(DST_2, dest, 8); - dest += stride; - - vis_ld64_2(dest, 8, DST_2); - vis_mul8x16al(DST_0, CONST_1024, TMP30); - vis_pmerge(ZERO, REF_S4_1, REF_2); - - vis_mul8x16al(DST_1, CONST_1024, TMP32); - vis_padd16(REF_4, TMP0, TMP8); - - vis_mul8x16au(REF_S6, CONST_256, REF_4); - vis_padd16(REF_6, TMP2, TMP10); - - vis_mul8x16au(REF_S6_1, CONST_256, REF_6); - vis_padd16(TMP8, TMP12, TMP8); - - vis_padd16(TMP10, TMP14, TMP10); - - vis_padd16(TMP8, TMP30, TMP8); - - vis_padd16(TMP10, TMP32, TMP10); - vis_pack16(TMP8, DST_0); - - vis_pack16(TMP10, DST_1); - vis_st64(DST_0, dest[0]); - - vis_padd16(REF_0, TMP4, REF_0); - - vis_mul8x16al(DST_2, CONST_1024, TMP30); - vis_padd16(REF_2, TMP6, REF_2); - - vis_mul8x16al(DST_3, CONST_1024, TMP32); - vis_padd16(REF_0, REF_4, REF_0); - - vis_padd16(REF_2, REF_6, REF_2); - - vis_padd16(REF_0, TMP30, REF_0); - - /* stall */ - - vis_padd16(REF_2, TMP32, REF_2); - vis_pack16(REF_0, DST_2); - - vis_pack16(REF_2, DST_3); - vis_st64_2(DST_2, dest, 8); - dest += stride; - } while (--height); -} - -static void MC_avg_xy_8_vis (uint8_t * dest, uint8_t * _ref, - int stride, int height) -{ - uint8_t *ref = (uint8_t *) _ref; - unsigned long off = (unsigned long) ref & 0x7; - unsigned long off_plus_1 = off + 1; - int stride_8 = stride + 8; - - vis_set_gsr(4 << VIS_GSR_SCALEFACT_SHIFT); - - ref = vis_alignaddr(ref); - - vis_ld64(ref[0], TMP0); - vis_fzero(ZERO); - - vis_ld64_2(ref, 8, TMP2); - - vis_ld64(constants6[0], CONST_6); - - vis_ld64(constants256_1024[0], CONST_256); - vis_faligndata(TMP0, TMP2, REF_S0); - - if (off != 0x7) { - vis_alignaddr_g0((void *)off_plus_1); - vis_faligndata(TMP0, TMP2, REF_S2); - } else { - vis_src1(TMP2, REF_S2); - } - - height >>= 1; - do { /* 31 cycles */ - vis_ld64_2(ref, stride, TMP0); - vis_mul8x16au(REF_S0, CONST_256, TMP8); - vis_pmerge(ZERO, REF_S0_1, TMP10); - - vis_ld64_2(ref, stride_8, TMP2); - ref += stride; - vis_mul8x16au(REF_S2, CONST_256, TMP12); - vis_pmerge(ZERO, REF_S2_1, TMP14); - - vis_alignaddr_g0((void *)off); - - vis_ld64_2(ref, stride, TMP4); - vis_faligndata(TMP0, TMP2, REF_S4); - - vis_ld64_2(ref, stride_8, TMP6); - ref += stride; - - vis_ld64(dest[0], DST_0); - vis_faligndata(TMP4, TMP6, REF_S0); - - vis_ld64_2(dest, stride, DST_2); - - if (off != 0x7) { - vis_alignaddr_g0((void *)off_plus_1); - vis_faligndata(TMP0, TMP2, REF_S6); - vis_faligndata(TMP4, TMP6, REF_S2); - } else { - vis_src1(TMP2, REF_S6); - vis_src1(TMP6, REF_S2); - } - - vis_mul8x16al(DST_0, CONST_1024, TMP30); - vis_pmerge(ZERO, REF_S4, TMP22); - - vis_mul8x16al(DST_1, CONST_1024, TMP32); - vis_pmerge(ZERO, REF_S4_1, TMP24); - - vis_mul8x16au(REF_S6, CONST_256, TMP26); - vis_pmerge(ZERO, REF_S6_1, TMP28); - - vis_mul8x16au(REF_S0, CONST_256, REF_S4); - vis_padd16(TMP22, CONST_6, TMP22); - - vis_mul8x16au(REF_S0_1, CONST_256, REF_S6); - vis_padd16(TMP24, CONST_6, TMP24); - - vis_mul8x16al(DST_2, CONST_1024, REF_0); - vis_padd16(TMP22, TMP26, TMP22); - - vis_mul8x16al(DST_3, CONST_1024, REF_2); - vis_padd16(TMP24, TMP28, TMP24); - - vis_mul8x16au(REF_S2, CONST_256, TMP26); - vis_padd16(TMP8, TMP22, TMP8); - - vis_mul8x16au(REF_S2_1, CONST_256, TMP28); - vis_padd16(TMP10, TMP24, TMP10); - - vis_padd16(TMP8, TMP12, TMP8); - - vis_padd16(TMP10, TMP14, TMP10); - - vis_padd16(TMP8, TMP30, TMP8); - - vis_padd16(TMP10, TMP32, TMP10); - vis_pack16(TMP8, DST_0); - - vis_pack16(TMP10, DST_1); - vis_st64(DST_0, dest[0]); - dest += stride; - - vis_padd16(REF_S4, TMP22, TMP12); - - vis_padd16(REF_S6, TMP24, TMP14); - - vis_padd16(TMP12, TMP26, TMP12); - - vis_padd16(TMP14, TMP28, TMP14); - - vis_padd16(TMP12, REF_0, TMP12); - - vis_padd16(TMP14, REF_2, TMP14); - vis_pack16(TMP12, DST_2); - - vis_pack16(TMP14, DST_3); - vis_st64(DST_2, dest[0]); - dest += stride; - } while (--height); -} - -MPEG2_MC_EXTERN(vis); - -#endif /* defined(ARCH_SPARC) && defined(ENABLE_VIS) */ diff --git a/src/libmpeg2/mpeg2.h b/src/libmpeg2/mpeg2.h deleted file mode 100644 index 253f300a2..000000000 --- a/src/libmpeg2/mpeg2.h +++ /dev/null @@ -1,98 +0,0 @@ -/* - * mpeg2.h - * Copyright (C) 1999-2001 Aaron Holtzman - * - * This file is part of mpeg2dec, a free MPEG-2 video stream decoder. - * - * mpeg2dec is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * mpeg2dec is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA - */ - -/* Structure for the mpeg2dec decoder */ - -#ifndef MPEG2_H -#define MPEG2_H - -#include "libmpeg2_accel.h" - -typedef struct mpeg2dec_s { - xine_video_port_t * output; - uint32_t frame_format; - - /* this is where we keep the state of the decoder */ - struct picture_s * picture, *picture_base; - - uint32_t shift; - int new_sequence; - int is_sequence_needed; - int is_wait_for_ip_frames; - int frames_to_drop, drop_frame; - int in_slice; - int seek_mode, is_frame_needed; - - /* the maximum chunk size is determined by vbv_buffer_size */ - /* which is 224K for MP@ML streams. */ - /* (we make no pretenses of decoding anything more than that) */ - /* allocated in init - gcc has problems allocating such big structures */ - uint8_t * chunk_buffer, *chunk_base; - /* pointer to current position in chunk_buffer */ - uint8_t * chunk_ptr; - /* last start code ? */ - uint8_t code; - uint32_t chunk_size; - - int64_t pts; - uint32_t rff_pattern; - int force_aspect; - int force_pan_scan; - - /* AFD data can be found after a sequence, group or picture start code */ - /* and will be stored in afd_value_seen. Later it will be transfered to */ - /* a stream property and stored into afd_value_reported to detect changes */ - int afd_value_seen; - int afd_value_reported; - - xine_stream_t *stream; - - /* a spu decoder for possible closed captions */ - spu_decoder_t *cc_dec; - mpeg2dec_accel_t accel; - -} mpeg2dec_t ; - - -/* initialize mpegdec with a opaque user pointer */ -void mpeg2_init (mpeg2dec_t * mpeg2dec, - xine_video_port_t * output); - -/* destroy everything which was allocated, shutdown the output */ -void mpeg2_close (mpeg2dec_t * mpeg2dec); - -int mpeg2_decode_data (mpeg2dec_t * mpeg2dec, - uint8_t * data_start, uint8_t * data_end, - uint64_t pts); - -void mpeg2_find_sequence_header (mpeg2dec_t * mpeg2dec, - uint8_t * data_start, uint8_t * data_end); - -void mpeg2_flush (mpeg2dec_t * mpeg2dec); -void mpeg2_reset (mpeg2dec_t * mpeg2dec); -void mpeg2_discontinuity (mpeg2dec_t * mpeg2dec); - -/* Not needed, it is defined as static in decode.c, and no-one else called it - * currently - */ -/* void process_userdata(mpeg2dec_t *mpeg2dec, uint8_t *buffer); */ - -#endif diff --git a/src/libmpeg2/mpeg2_internal.h b/src/libmpeg2/mpeg2_internal.h deleted file mode 100644 index 2e42aace6..000000000 --- a/src/libmpeg2/mpeg2_internal.h +++ /dev/null @@ -1,294 +0,0 @@ -/* - * mpeg2_internal.h - * Copyright (C) 2000-2002 Michel Lespinasse - * Copyright (C) 1999-2000 Aaron Holtzman - * - * This file is part of mpeg2dec, a free MPEG-2 video stream decoder. - * See http://libmpeg2.sourceforge.net/ for updates. - * - * mpeg2dec is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * mpeg2dec is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA - */ - -#ifndef MPEG2_INTERNAL_H -#define MPEG2_INTERNAL_H - -#include -#include "accel_xvmc.h" - -#ifdef ENABLE_ALTIVEC -#include -#endif - -/* macroblock modes */ -#define MACROBLOCK_INTRA XINE_MACROBLOCK_INTRA -#define MACROBLOCK_PATTERN XINE_MACROBLOCK_PATTERN -#define MACROBLOCK_MOTION_BACKWARD XINE_MACROBLOCK_MOTION_BACKWARD -#define MACROBLOCK_MOTION_FORWARD XINE_MACROBLOCK_MOTION_FORWARD -#define MACROBLOCK_QUANT XINE_MACROBLOCK_QUANT -#define DCT_TYPE_INTERLACED XINE_MACROBLOCK_DCT_TYPE_INTERLACED - -/* motion_type */ -#define MOTION_TYPE_MASK (3*64) -#define MOTION_TYPE_BASE 64 -#define MC_FIELD (1*64) -#define MC_FRAME (2*64) -#define MC_16X8 (2*64) -#define MC_DMV (3*64) - -/* picture structure */ -#define TOP_FIELD VO_TOP_FIELD -#define BOTTOM_FIELD VO_BOTTOM_FIELD -#define FRAME_PICTURE VO_BOTH_FIELDS - -/* picture coding type (mpeg2 header) */ -#define I_TYPE 1 -#define P_TYPE 2 -#define B_TYPE 3 -#define D_TYPE 4 - -typedef struct motion_s { - uint8_t * ref[2][3]; - uint8_t ** ref2[2]; - int pmv[2][2]; - int f_code[2]; -} motion_t; - -typedef struct picture_s { - /* first, state that carries information from one macroblock to the */ - /* next inside a slice, and is never used outside of mpeg2_slice() */ - - /* DCT coefficients - should be kept aligned ! */ - int16_t DCTblock[64]; - - /* XvMC DCT block and macroblock data for XvMC acceleration */ - xine_macroblocks_t *mc; - int XvMC_mb_type; - int XvMC_mv_field_sel[2][2]; - int XvMC_x; - int XvMC_y; - int XvMC_motion_type; - int XvMC_dmvector[2]; - int XvMC_cbp; - int XvMC_dct_type; - - /* bit parsing stuff */ - uint32_t bitstream_buf; /* current 32 bit working set of buffer */ - int bitstream_bits; /* used bits in working set */ - uint8_t * bitstream_ptr; /* buffer with stream data */ - - uint8_t * dest[3]; - int pitches[3]; - int offset; - unsigned int limit_x; - unsigned int limit_y_16; - unsigned int limit_y_8; - unsigned int limit_y; - - /* Motion vectors */ - /* The f_ and b_ correspond to the forward and backward motion */ - /* predictors */ - motion_t b_motion; - motion_t f_motion; - - /* predictor for DC coefficients in intra blocks */ - int16_t dc_dct_pred[3]; - - int quantizer_scale; /* remove */ - int current_field; /* remove */ - int dmv_offset; /* remove */ - unsigned int v_offset; /* remove */ - - - /* now non-slice-specific information */ - - /* sequence header stuff */ - uint8_t intra_quantizer_matrix [64]; - uint8_t non_intra_quantizer_matrix [64]; - int load_intra_quantizer_matrix; - int load_non_intra_quantizer_matrix; - - /* The width and height of the picture snapped to macroblock units */ - int coded_picture_width; - int coded_picture_height; - - /* The width and height as it appears on header sequence */ - unsigned int display_width, display_height; - - /* picture header stuff */ - - /* what type of picture this is (I, P, B, D) */ - int picture_coding_type; - - int vbv_delay; - int low_delay; - - /* picture coding extension stuff */ - - /* quantization factor for intra dc coefficients */ - int intra_dc_precision; - /* top/bottom/both fields */ - int picture_structure; - /* bool to indicate all predictions are frame based */ - int frame_pred_frame_dct; - /* bool to indicate whether intra blocks have motion vectors */ - /* (for concealment) */ - int concealment_motion_vectors; - /* bit to indicate which quantization table to use */ - int q_scale_type; - /* bool to use different vlc tables */ - int intra_vlc_format; - /* used for DMV MC */ - int top_field_first; - - /* stuff derived from bitstream */ - - /* pointer to the zigzag scan we're supposed to be using */ - uint8_t * scan; - - struct vo_frame_s * current_frame; - struct vo_frame_s * forward_reference_frame; - struct vo_frame_s * backward_reference_frame; - - int frame_width, frame_height; - - int second_field; - - int mpeg1; - - int skip_non_intra_dct; - - /* these things are not needed by the decoder */ - /* this is a temporary interface, we will build a better one later. */ - int aspect_ratio_information; - int saved_aspect_ratio; - int frame_rate_code; - int progressive_sequence; - int repeat_first_field; - int progressive_frame; - uint32_t frame_centre_horizontal_offset; - uint32_t frame_centre_vertical_offset; - uint32_t video_format; - uint32_t colour_description; - uint32_t colour_primatives; - uint32_t transfer_characteristics; - uint32_t matrix_coefficients; - uint32_t display_horizontal_size; - uint32_t display_vertical_size; - uint32_t drop_frame_flag; - uint32_t time_code_hours; - uint32_t time_code_minutes; - uint32_t time_code_seconds; - uint32_t time_code_pictures; - uint32_t closed_gop; - uint32_t broken_link; - - int bitrate; - int frame_rate_ext_n; - int frame_rate_ext_d; - -} picture_t; - -typedef struct cpu_state_s { -#ifdef ARCH_PPC - uint8_t regv[12*16]; -#endif - int dummy; -} cpu_state_t; - -/* cpu_state.c */ -extern void (* mpeg2_cpu_state_save) (cpu_state_t * state); -extern void (* mpeg2_cpu_state_restore) (cpu_state_t * state); -void mpeg2_cpu_state_init (uint32_t mm_accel); - -/* header.c */ -extern uint8_t mpeg2_scan_norm[64]; -extern uint8_t mpeg2_scan_alt[64]; -void mpeg2_header_state_init (picture_t * picture); -int mpeg2_header_picture (picture_t * picture, uint8_t * buffer); -int mpeg2_header_sequence (picture_t * picture, uint8_t * buffer); -int mpeg2_header_extension (picture_t * picture, uint8_t * buffer); -int mpeg2_header_group_of_pictures (picture_t * picture, uint8_t * buffer); - -/* idct.c */ -extern void (* mpeg2_idct_copy) (int16_t * block, uint8_t * dest, int stride); -extern void (* mpeg2_idct_add) (int16_t * block, uint8_t * dest, int stride); -extern void (* mpeg2_idct) (int16_t * block); -extern void (* mpeg2_zero_block) (int16_t * block); -void mpeg2_idct_init (uint32_t mm_accel); - -/* idct_mlib.c */ -void mpeg2_idct_add_mlib (int16_t * block, uint8_t * dest, int stride); -void mpeg2_idct_copy_mlib_non_ieee (int16_t * block, uint8_t * dest, - int stride); -void mpeg2_idct_add_mlib_non_ieee (int16_t * block, uint8_t * dest, - int stride); -void mpeg2_idct_mlib (int16_t * block); - -/* idct_mmx.c */ -void mpeg2_idct_copy_mmxext (int16_t * block, uint8_t * dest, int stride); -void mpeg2_idct_add_mmxext (int16_t * block, uint8_t * dest, int stride); -void mpeg2_idct_mmxext (int16_t * block); -void mpeg2_idct_copy_mmx (int16_t * block, uint8_t * dest, int stride); -void mpeg2_idct_add_mmx (int16_t * block, uint8_t * dest, int stride); -void mpeg2_idct_mmx (int16_t * block); -void mpeg2_zero_block_mmx (int16_t * block); -void mpeg2_idct_mmx_init (void); - -/* idct_altivec.c */ -# ifdef ENABLE_ALTIVEC -void mpeg2_idct_copy_altivec (vector signed short * block, unsigned char * dest, - int stride); -void mpeg2_idct_add_altivec (vector signed short * block, unsigned char * dest, - int stride); -# else /* ! ENABLE_ALTIVEC */ -void mpeg2_idct_copy_altivec (signed short * block, unsigned char * dest, - int stride); -void mpeg2_idct_add_altivec (signed short * block, unsigned char * dest, - int stride); -# endif /* ENABLE_ALTIVEC */ -void mpeg2_idct_altivec_init (void); - -/* motion_comp.c */ -void mpeg2_mc_init (uint32_t mm_accel); - -typedef struct mpeg2_mc_s { - void (* put [8]) (uint8_t * dst, uint8_t *, int32_t, int32_t); - void (* avg [8]) (uint8_t * dst, uint8_t *, int32_t, int32_t); -} mpeg2_mc_t; - -#define MPEG2_MC_EXTERN(x) mpeg2_mc_t mpeg2_mc_##x = { \ - {MC_put_o_16_##x, MC_put_x_16_##x, MC_put_y_16_##x, MC_put_xy_16_##x, \ - MC_put_o_8_##x, MC_put_x_8_##x, MC_put_y_8_##x, MC_put_xy_8_##x}, \ - {MC_avg_o_16_##x, MC_avg_x_16_##x, MC_avg_y_16_##x, MC_avg_xy_16_##x, \ - MC_avg_o_8_##x, MC_avg_x_8_##x, MC_avg_y_8_##x, MC_avg_xy_8_##x} \ -}; - -extern mpeg2_mc_t mpeg2_mc; -extern mpeg2_mc_t mpeg2_mc_c; -extern mpeg2_mc_t mpeg2_mc_mmx; -extern mpeg2_mc_t mpeg2_mc_mmxext; -extern mpeg2_mc_t mpeg2_mc_3dnow; -extern mpeg2_mc_t mpeg2_mc_altivec; -extern mpeg2_mc_t mpeg2_mc_mlib; -extern mpeg2_mc_t mpeg2_mc_vis; - -/* slice.c */ -void mpeg2_slice (picture_t * picture, int code, uint8_t * buffer); - -/* stats.c */ -void mpeg2_stats (int code, uint8_t * buffer); - - -#endif diff --git a/src/libmpeg2/slice.c b/src/libmpeg2/slice.c deleted file mode 100644 index 8247a9a24..000000000 --- a/src/libmpeg2/slice.c +++ /dev/null @@ -1,1833 +0,0 @@ -/* - * slice.c - * Copyright (C) 2000-2002 Michel Lespinasse - * Copyright (C) 1999-2000 Aaron Holtzman - * - * This file is part of mpeg2dec, a free MPEG-2 video stream decoder. - * See http://libmpeg2.sourceforge.net/ for updates. - * - * mpeg2dec is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * mpeg2dec is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA - */ - -#include "config.h" - -#include - -#include -#include -#include "mpeg2_internal.h" -#include - -#include "vlc.h" - -static const int non_linear_quantizer_scale [] = { - 0, 1, 2, 3, 4, 5, 6, 7, - 8, 10, 12, 14, 16, 18, 20, 22, - 24, 28, 32, 36, 40, 44, 48, 52, - 56, 64, 72, 80, 88, 96, 104, 112 -}; - -static inline int get_macroblock_modes (picture_t * picture) -{ -#define bit_buf (picture->bitstream_buf) -#define bits (picture->bitstream_bits) -#define bit_ptr (picture->bitstream_ptr) - int macroblock_modes; - const MBtab * tab; - - switch (picture->picture_coding_type) { - case I_TYPE: - - tab = MB_I + UBITS (bit_buf, 1); - DUMPBITS (bit_buf, bits, tab->len); - macroblock_modes = tab->modes; - - if ((! (picture->frame_pred_frame_dct)) && - (picture->picture_structure == FRAME_PICTURE)) { - macroblock_modes |= UBITS (bit_buf, 1) * DCT_TYPE_INTERLACED; - DUMPBITS (bit_buf, bits, 1); - } - - return macroblock_modes; - - case P_TYPE: - - tab = MB_P + UBITS (bit_buf, 5); - DUMPBITS (bit_buf, bits, tab->len); - macroblock_modes = tab->modes; - - if (picture->picture_structure != FRAME_PICTURE) { - if (macroblock_modes & MACROBLOCK_MOTION_FORWARD) { - macroblock_modes |= UBITS (bit_buf, 2) * MOTION_TYPE_BASE; - DUMPBITS (bit_buf, bits, 2); - } - return macroblock_modes; - } else if (picture->frame_pred_frame_dct) { - if (macroblock_modes & MACROBLOCK_MOTION_FORWARD) - macroblock_modes |= MC_FRAME; - return macroblock_modes; - } else { - if (macroblock_modes & MACROBLOCK_MOTION_FORWARD) { - macroblock_modes |= UBITS (bit_buf, 2) * MOTION_TYPE_BASE; - DUMPBITS (bit_buf, bits, 2); - } - if (macroblock_modes & (MACROBLOCK_INTRA | MACROBLOCK_PATTERN)) { - macroblock_modes |= UBITS (bit_buf, 1) * DCT_TYPE_INTERLACED; - DUMPBITS (bit_buf, bits, 1); - } - return macroblock_modes; - } - - case B_TYPE: - - tab = MB_B + UBITS (bit_buf, 6); - DUMPBITS (bit_buf, bits, tab->len); - macroblock_modes = tab->modes; - - if (picture->picture_structure != FRAME_PICTURE) { - if (! (macroblock_modes & MACROBLOCK_INTRA)) { - macroblock_modes |= UBITS (bit_buf, 2) * MOTION_TYPE_BASE; - DUMPBITS (bit_buf, bits, 2); - } - return macroblock_modes; - } else if (picture->frame_pred_frame_dct) { - /* if (! (macroblock_modes & MACROBLOCK_INTRA)) */ - macroblock_modes |= MC_FRAME; - return macroblock_modes; - } else { - if (macroblock_modes & MACROBLOCK_INTRA) - goto intra; - macroblock_modes |= UBITS (bit_buf, 2) * MOTION_TYPE_BASE; - DUMPBITS (bit_buf, bits, 2); - if (macroblock_modes & (MACROBLOCK_INTRA | MACROBLOCK_PATTERN)) { - intra: - macroblock_modes |= UBITS (bit_buf, 1) * DCT_TYPE_INTERLACED; - DUMPBITS (bit_buf, bits, 1); - } - return macroblock_modes; - } - - case D_TYPE: - - DUMPBITS (bit_buf, bits, 1); - return MACROBLOCK_INTRA; - - default: - return 0; - } -#undef bit_buf -#undef bits -#undef bit_ptr -} - -static inline int get_quantizer_scale (picture_t * picture) -{ -#define bit_buf (picture->bitstream_buf) -#define bits (picture->bitstream_bits) -#define bit_ptr (picture->bitstream_ptr) - - int quantizer_scale_code; - - quantizer_scale_code = UBITS (bit_buf, 5); - DUMPBITS (bit_buf, bits, 5); - - if (picture->q_scale_type) - return non_linear_quantizer_scale [quantizer_scale_code]; - else - return quantizer_scale_code << 1; -#undef bit_buf -#undef bits -#undef bit_ptr -} - -static inline int get_motion_delta (picture_t * picture, int f_code) -{ -#define bit_buf (picture->bitstream_buf) -#define bits (picture->bitstream_bits) -#define bit_ptr (picture->bitstream_ptr) - - int delta; - int sign; - const MVtab * tab; - - if (bit_buf & 0x80000000) { - DUMPBITS (bit_buf, bits, 1); - return 0; - } else if (bit_buf >= 0x0c000000) { - - tab = MV_4 + UBITS (bit_buf, 4); - delta = (tab->delta << f_code) + 1; - bits += tab->len + f_code + 1; - bit_buf <<= tab->len; - - sign = SBITS (bit_buf, 1); - bit_buf <<= 1; - - if (f_code) - delta += UBITS (bit_buf, f_code); - bit_buf <<= f_code; - - return (delta ^ sign) - sign; - - } else { - - tab = MV_10 + UBITS (bit_buf, 10); - delta = (tab->delta << f_code) + 1; - bits += tab->len + 1; - bit_buf <<= tab->len; - - sign = SBITS (bit_buf, 1); - bit_buf <<= 1; - - if (f_code) { - NEEDBITS (bit_buf, bits, bit_ptr); - delta += UBITS (bit_buf, f_code); - DUMPBITS (bit_buf, bits, f_code); - } - - return (delta ^ sign) - sign; - - } -#undef bit_buf -#undef bits -#undef bit_ptr -} - -static inline int bound_motion_vector (int vec, int f_code) -{ -#if 1 - unsigned int limit; - int sign; - - limit = 16 << f_code; - - if ((unsigned int)(vec + limit) < 2 * limit) - return vec; - else { - sign = ((int32_t)vec) >> 31; - return vec - ((2 * limit) ^ sign) + sign; - } -#else - return ((int32_t)vector << (27 - f_code)) >> (27 - f_code); -#endif -} - -static inline int get_dmv (picture_t * picture) -{ -#define bit_buf (picture->bitstream_buf) -#define bits (picture->bitstream_bits) -#define bit_ptr (picture->bitstream_ptr) - - const DMVtab * tab; - - tab = DMV_2 + UBITS (bit_buf, 2); - DUMPBITS (bit_buf, bits, tab->len); - return tab->dmv; -#undef bit_buf -#undef bits -#undef bit_ptr -} - -static inline int get_coded_block_pattern (picture_t * picture) -{ -#define bit_buf (picture->bitstream_buf) -#define bits (picture->bitstream_bits) -#define bit_ptr (picture->bitstream_ptr) - - const CBPtab * tab; - - NEEDBITS (bit_buf, bits, bit_ptr); - - if (bit_buf >= 0x20000000) { - - tab = CBP_7 + (UBITS (bit_buf, 7) - 16); - DUMPBITS (bit_buf, bits, tab->len); - return tab->cbp; - - } else { - - tab = CBP_9 + UBITS (bit_buf, 9); - DUMPBITS (bit_buf, bits, tab->len); - return tab->cbp; - } - -#undef bit_buf -#undef bits -#undef bit_ptr -} - -static inline int get_luma_dc_dct_diff (picture_t * picture) -{ -#define bit_buf (picture->bitstream_buf) -#define bits (picture->bitstream_bits) -#define bit_ptr (picture->bitstream_ptr) - const DCtab * tab; - int size; - int dc_diff; - - if (bit_buf < 0xf8000000) { - tab = DC_lum_5 + UBITS (bit_buf, 5); - size = tab->size; - if (size) { - bits += tab->len + size; - bit_buf <<= tab->len; - dc_diff = - UBITS (bit_buf, size) - UBITS (SBITS (~bit_buf, 1), size); - bit_buf <<= size; - return dc_diff; - } else { - DUMPBITS (bit_buf, bits, 3); - return 0; - } - } else { - tab = DC_long + (UBITS (bit_buf, 9) - 0x1e0); - size = tab->size; - DUMPBITS (bit_buf, bits, tab->len); - NEEDBITS (bit_buf, bits, bit_ptr); - dc_diff = UBITS (bit_buf, size) - UBITS (SBITS (~bit_buf, 1), size); - DUMPBITS (bit_buf, bits, size); - return dc_diff; - } -#undef bit_buf -#undef bits -#undef bit_ptr -} - -static inline int get_chroma_dc_dct_diff (picture_t * picture) -{ -#define bit_buf (picture->bitstream_buf) -#define bits (picture->bitstream_bits) -#define bit_ptr (picture->bitstream_ptr) - const DCtab * tab; - int size; - int dc_diff; - - if (bit_buf < 0xf8000000) { - tab = DC_chrom_5 + UBITS (bit_buf, 5); - size = tab->size; - if (size) { - bits += tab->len + size; - bit_buf <<= tab->len; - dc_diff = - UBITS (bit_buf, size) - UBITS (SBITS (~bit_buf, 1), size); - bit_buf <<= size; - return dc_diff; - } else { - DUMPBITS (bit_buf, bits, 2); - return 0; - } - } else { - tab = DC_long + (UBITS (bit_buf, 10) - 0x3e0); - size = tab->size; - DUMPBITS (bit_buf, bits, tab->len + 1); - NEEDBITS (bit_buf, bits, bit_ptr); - dc_diff = UBITS (bit_buf, size) - UBITS (SBITS (~bit_buf, 1), size); - DUMPBITS (bit_buf, bits, size); - return dc_diff; - } -#undef bit_buf -#undef bits -#undef bit_ptr -} - -#define SATURATE(val) \ -do { \ - if ((uint32_t)(val + 2048) > 4095) \ - val = (val > 0) ? 2047 : -2048; \ -} while (0) - -static void get_intra_block_B14 (picture_t * picture) -{ - int i; - int j; - int val; - uint8_t * scan = picture->scan; - uint8_t * quant_matrix = picture->intra_quantizer_matrix; - int quantizer_scale = picture->quantizer_scale; - int mismatch; - const DCTtab * tab; - uint32_t bit_buf; - int bits; - uint8_t * bit_ptr; - int16_t * dest; - - dest = picture->DCTblock; - i = 0; - mismatch = ~dest[0]; - - bit_buf = picture->bitstream_buf; - bits = picture->bitstream_bits; - bit_ptr = picture->bitstream_ptr; - - NEEDBITS (bit_buf, bits, bit_ptr); - - while (1) { - if (bit_buf >= 0x28000000) { - - tab = DCT_B14AC_5 + (UBITS (bit_buf, 5) - 5); - - i += tab->run; - if (i >= 64) - break; /* end of block */ - - normal_code: - j = scan[i]; - bit_buf <<= tab->len; - bits += tab->len + 1; - val = (tab->level * quantizer_scale * quant_matrix[j]) >> 4; - - /* if (bitstream_get (1)) val = -val; */ - val = (val ^ SBITS (bit_buf, 1)) - SBITS (bit_buf, 1); - - SATURATE (val); - dest[j] = val; - mismatch ^= val; - - bit_buf <<= 1; - NEEDBITS (bit_buf, bits, bit_ptr); - - continue; - - } else if (bit_buf >= 0x04000000) { - - tab = DCT_B14_8 + (UBITS (bit_buf, 8) - 4); - - i += tab->run; - if (i < 64) - goto normal_code; - - /* escape code */ - - i += UBITS (bit_buf << 6, 6) - 64; - if (i >= 64) - break; /* illegal, check needed to avoid buffer overflow */ - - j = scan[i]; - - DUMPBITS (bit_buf, bits, 12); - NEEDBITS (bit_buf, bits, bit_ptr); - val = (SBITS (bit_buf, 12) * - quantizer_scale * quant_matrix[j]) / 16; - - SATURATE (val); - dest[j] = val; - mismatch ^= val; - - DUMPBITS (bit_buf, bits, 12); - NEEDBITS (bit_buf, bits, bit_ptr); - - continue; - - } else if (bit_buf >= 0x02000000) { - tab = DCT_B14_10 + (UBITS (bit_buf, 10) - 8); - i += tab->run; - if (i < 64) - goto normal_code; - } else if (bit_buf >= 0x00800000) { - tab = DCT_13 + (UBITS (bit_buf, 13) - 16); - i += tab->run; - if (i < 64) - goto normal_code; - } else if (bit_buf >= 0x00200000) { - tab = DCT_15 + (UBITS (bit_buf, 15) - 16); - i += tab->run; - if (i < 64) - goto normal_code; - } else { - tab = DCT_16 + UBITS (bit_buf, 16); - bit_buf <<= 16; - GETWORD (bit_buf, bits + 16, bit_ptr); - i += tab->run; - if (i < 64) - goto normal_code; - } - break; /* illegal, check needed to avoid buffer overflow */ - } - dest[63] ^= mismatch & 1; - DUMPBITS (bit_buf, bits, 2); /* dump end of block code */ - picture->bitstream_buf = bit_buf; - picture->bitstream_bits = bits; - picture->bitstream_ptr = bit_ptr; -} - -static void get_intra_block_B15 (picture_t * picture) -{ - int i; - int j; - int val; - uint8_t * scan = picture->scan; - uint8_t * quant_matrix = picture->intra_quantizer_matrix; - int quantizer_scale = picture->quantizer_scale; - int mismatch; - const DCTtab * tab; - uint32_t bit_buf; - int bits; - uint8_t * bit_ptr; - int16_t * dest; - - dest = picture->DCTblock; - i = 0; - mismatch = ~dest[0]; - - bit_buf = picture->bitstream_buf; - bits = picture->bitstream_bits; - bit_ptr = picture->bitstream_ptr; - - NEEDBITS (bit_buf, bits, bit_ptr); - - while (1) { - if (bit_buf >= 0x04000000) { - - tab = DCT_B15_8 + (UBITS (bit_buf, 8) - 4); - - i += tab->run; - if (i < 64) { - - normal_code: - j = scan[i]; - bit_buf <<= tab->len; - bits += tab->len + 1; - val = (tab->level * quantizer_scale * quant_matrix[j]) >> 4; - - /* if (bitstream_get (1)) val = -val; */ - val = (val ^ SBITS (bit_buf, 1)) - SBITS (bit_buf, 1); - - SATURATE (val); - dest[j] = val; - mismatch ^= val; - - bit_buf <<= 1; - NEEDBITS (bit_buf, bits, bit_ptr); - - continue; - - } else { - - /* end of block. I commented out this code because if we */ - /* dont exit here we will still exit at the later test :) */ - - /* if (i >= 128) break; */ /* end of block */ - - /* escape code */ - - i += UBITS (bit_buf << 6, 6) - 64; - if (i >= 64) - break; /* illegal, check against buffer overflow */ - - j = scan[i]; - - DUMPBITS (bit_buf, bits, 12); - NEEDBITS (bit_buf, bits, bit_ptr); - val = (SBITS (bit_buf, 12) * - quantizer_scale * quant_matrix[j]) / 16; - - SATURATE (val); - dest[j] = val; - mismatch ^= val; - - DUMPBITS (bit_buf, bits, 12); - NEEDBITS (bit_buf, bits, bit_ptr); - - continue; - - } - } else if (bit_buf >= 0x02000000) { - tab = DCT_B15_10 + (UBITS (bit_buf, 10) - 8); - i += tab->run; - if (i < 64) - goto normal_code; - } else if (bit_buf >= 0x00800000) { - tab = DCT_13 + (UBITS (bit_buf, 13) - 16); - i += tab->run; - if (i < 64) - goto normal_code; - } else if (bit_buf >= 0x00200000) { - tab = DCT_15 + (UBITS (bit_buf, 15) - 16); - i += tab->run; - if (i < 64) - goto normal_code; - } else { - tab = DCT_16 + UBITS (bit_buf, 16); - bit_buf <<= 16; - GETWORD (bit_buf, bits + 16, bit_ptr); - i += tab->run; - if (i < 64) - goto normal_code; - } - break; /* illegal, check needed to avoid buffer overflow */ - } - dest[63] ^= mismatch & 1; - DUMPBITS (bit_buf, bits, 4); /* dump end of block code */ - picture->bitstream_buf = bit_buf; - picture->bitstream_bits = bits; - picture->bitstream_ptr = bit_ptr; -} - -static void get_non_intra_block (picture_t * picture) -{ - int i; - int j; - int val; - uint8_t * scan = picture->scan; - uint8_t * quant_matrix = picture->non_intra_quantizer_matrix; - int quantizer_scale = picture->quantizer_scale; - int mismatch; - const DCTtab * tab; - uint32_t bit_buf; - int bits; - uint8_t * bit_ptr; - int16_t * dest; - - i = -1; - mismatch = 1; - dest = picture->DCTblock; - - bit_buf = picture->bitstream_buf; - bits = picture->bitstream_bits; - bit_ptr = picture->bitstream_ptr; - - NEEDBITS (bit_buf, bits, bit_ptr); - if (bit_buf >= 0x28000000) { - tab = DCT_B14DC_5 + (UBITS (bit_buf, 5) - 5); - goto entry_1; - } else - goto entry_2; - - while (1) { - if (bit_buf >= 0x28000000) { - - tab = DCT_B14AC_5 + (UBITS (bit_buf, 5) - 5); - - entry_1: - i += tab->run; - if (i >= 64) - break; /* end of block */ - - normal_code: - j = scan[i]; - bit_buf <<= tab->len; - bits += tab->len + 1; - val = ((2*tab->level+1) * quantizer_scale * quant_matrix[j]) >> 5; - - /* if (bitstream_get (1)) val = -val; */ - val = (val ^ SBITS (bit_buf, 1)) - SBITS (bit_buf, 1); - - SATURATE (val); - dest[j] = val; - mismatch ^= val; - - bit_buf <<= 1; - NEEDBITS (bit_buf, bits, bit_ptr); - - continue; - - } - - entry_2: - if (bit_buf >= 0x04000000) { - - tab = DCT_B14_8 + (UBITS (bit_buf, 8) - 4); - - i += tab->run; - if (i < 64) - goto normal_code; - - /* escape code */ - - i += UBITS (bit_buf << 6, 6) - 64; - if (i >= 64) - break; /* illegal, check needed to avoid buffer overflow */ - - j = scan[i]; - - DUMPBITS (bit_buf, bits, 12); - NEEDBITS (bit_buf, bits, bit_ptr); - val = 2 * (SBITS (bit_buf, 12) + SBITS (bit_buf, 1)) + 1; - val = (val * quantizer_scale * quant_matrix[j]) / 32; - - SATURATE (val); - dest[j] = val; - mismatch ^= val; - - DUMPBITS (bit_buf, bits, 12); - NEEDBITS (bit_buf, bits, bit_ptr); - - continue; - - } else if (bit_buf >= 0x02000000) { - tab = DCT_B14_10 + (UBITS (bit_buf, 10) - 8); - i += tab->run; - if (i < 64) - goto normal_code; - } else if (bit_buf >= 0x00800000) { - tab = DCT_13 + (UBITS (bit_buf, 13) - 16); - i += tab->run; - if (i < 64) - goto normal_code; - } else if (bit_buf >= 0x00200000) { - tab = DCT_15 + (UBITS (bit_buf, 15) - 16); - i += tab->run; - if (i < 64) - goto normal_code; - } else { - tab = DCT_16 + UBITS (bit_buf, 16); - bit_buf <<= 16; - GETWORD (bit_buf, bits + 16, bit_ptr); - i += tab->run; - if (i < 64) - goto normal_code; - } - break; /* illegal, check needed to avoid buffer overflow */ - } - dest[63] ^= mismatch & 1; - DUMPBITS (bit_buf, bits, 2); /* dump end of block code */ - picture->bitstream_buf = bit_buf; - picture->bitstream_bits = bits; - picture->bitstream_ptr = bit_ptr; -} - -static void get_mpeg1_intra_block (picture_t * picture) -{ - int i; - int j; - int val; - uint8_t * scan = picture->scan; - uint8_t * quant_matrix = picture->intra_quantizer_matrix; - int quantizer_scale = picture->quantizer_scale; - const DCTtab * tab; - uint32_t bit_buf; - int bits; - uint8_t * bit_ptr; - int16_t * dest; - - i = 0; - dest = picture->DCTblock; - - bit_buf = picture->bitstream_buf; - bits = picture->bitstream_bits; - bit_ptr = picture->bitstream_ptr; - - NEEDBITS (bit_buf, bits, bit_ptr); - - while (1) { - if (bit_buf >= 0x28000000) { - - tab = DCT_B14AC_5 + (UBITS (bit_buf, 5) - 5); - - i += tab->run; - if (i >= 64) - break; /* end of block */ - - normal_code: - j = scan[i]; - bit_buf <<= tab->len; - bits += tab->len + 1; - val = (tab->level * quantizer_scale * quant_matrix[j]) >> 4; - - /* oddification */ - val = (val - 1) | 1; - - /* if (bitstream_get (1)) val = -val; */ - val = (val ^ SBITS (bit_buf, 1)) - SBITS (bit_buf, 1); - - SATURATE (val); - dest[j] = val; - - bit_buf <<= 1; - NEEDBITS (bit_buf, bits, bit_ptr); - - continue; - - } else if (bit_buf >= 0x04000000) { - - tab = DCT_B14_8 + (UBITS (bit_buf, 8) - 4); - - i += tab->run; - if (i < 64) - goto normal_code; - - /* escape code */ - - i += UBITS (bit_buf << 6, 6) - 64; - if (i >= 64) - break; /* illegal, check needed to avoid buffer overflow */ - - j = scan[i]; - - DUMPBITS (bit_buf, bits, 12); - NEEDBITS (bit_buf, bits, bit_ptr); - val = SBITS (bit_buf, 8); - if (! (val & 0x7f)) { - DUMPBITS (bit_buf, bits, 8); - val = UBITS (bit_buf, 8) + 2 * val; - } - val = (val * quantizer_scale * quant_matrix[j]) / 16; - - /* oddification */ - val = (val + ~SBITS (val, 1)) | 1; - - SATURATE (val); - dest[j] = val; - - DUMPBITS (bit_buf, bits, 8); - NEEDBITS (bit_buf, bits, bit_ptr); - - continue; - - } else if (bit_buf >= 0x02000000) { - tab = DCT_B14_10 + (UBITS (bit_buf, 10) - 8); - i += tab->run; - if (i < 64) - goto normal_code; - } else if (bit_buf >= 0x00800000) { - tab = DCT_13 + (UBITS (bit_buf, 13) - 16); - i += tab->run; - if (i < 64) - goto normal_code; - } else if (bit_buf >= 0x00200000) { - tab = DCT_15 + (UBITS (bit_buf, 15) - 16); - i += tab->run; - if (i < 64) - goto normal_code; - } else { - tab = DCT_16 + UBITS (bit_buf, 16); - bit_buf <<= 16; - GETWORD (bit_buf, bits + 16, bit_ptr); - i += tab->run; - if (i < 64) - goto normal_code; - } - break; /* illegal, check needed to avoid buffer overflow */ - } - DUMPBITS (bit_buf, bits, 2); /* dump end of block code */ - picture->bitstream_buf = bit_buf; - picture->bitstream_bits = bits; - picture->bitstream_ptr = bit_ptr; -} - -static void get_mpeg1_non_intra_block (picture_t * picture) -{ - int i; - int j; - int val; - uint8_t * scan = picture->scan; - uint8_t * quant_matrix = picture->non_intra_quantizer_matrix; - int quantizer_scale = picture->quantizer_scale; - const DCTtab * tab; - uint32_t bit_buf; - int bits; - uint8_t * bit_ptr; - int16_t * dest; - - i = -1; - dest = picture->DCTblock; - - bit_buf = picture->bitstream_buf; - bits = picture->bitstream_bits; - bit_ptr = picture->bitstream_ptr; - - NEEDBITS (bit_buf, bits, bit_ptr); - if (bit_buf >= 0x28000000) { - tab = DCT_B14DC_5 + (UBITS (bit_buf, 5) - 5); - goto entry_1; - } else - goto entry_2; - - while (1) { - if (bit_buf >= 0x28000000) { - - tab = DCT_B14AC_5 + (UBITS (bit_buf, 5) - 5); - - entry_1: - i += tab->run; - if (i >= 64) - break; /* end of block */ - - normal_code: - j = scan[i]; - bit_buf <<= tab->len; - bits += tab->len + 1; - val = ((2*tab->level+1) * quantizer_scale * quant_matrix[j]) >> 5; - - /* oddification */ - val = (val - 1) | 1; - - /* if (bitstream_get (1)) val = -val; */ - val = (val ^ SBITS (bit_buf, 1)) - SBITS (bit_buf, 1); - - SATURATE (val); - dest[j] = val; - - bit_buf <<= 1; - NEEDBITS (bit_buf, bits, bit_ptr); - - continue; - - } - - entry_2: - if (bit_buf >= 0x04000000) { - - tab = DCT_B14_8 + (UBITS (bit_buf, 8) - 4); - - i += tab->run; - if (i < 64) - goto normal_code; - - /* escape code */ - - i += UBITS (bit_buf << 6, 6) - 64; - if (i >= 64) - break; /* illegal, check needed to avoid buffer overflow */ - - j = scan[i]; - - DUMPBITS (bit_buf, bits, 12); - NEEDBITS (bit_buf, bits, bit_ptr); - val = SBITS (bit_buf, 8); - if (! (val & 0x7f)) { - DUMPBITS (bit_buf, bits, 8); - val = UBITS (bit_buf, 8) + 2 * val; - } - val = 2 * (val + SBITS (val, 1)) + 1; - val = (val * quantizer_scale * quant_matrix[j]) / 32; - - /* oddification */ - val = (val + ~SBITS (val, 1)) | 1; - - SATURATE (val); - dest[j] = val; - - DUMPBITS (bit_buf, bits, 8); - NEEDBITS (bit_buf, bits, bit_ptr); - - continue; - - } else if (bit_buf >= 0x02000000) { - tab = DCT_B14_10 + (UBITS (bit_buf, 10) - 8); - i += tab->run; - if (i < 64) - goto normal_code; - } else if (bit_buf >= 0x00800000) { - tab = DCT_13 + (UBITS (bit_buf, 13) - 16); - i += tab->run; - if (i < 64) - goto normal_code; - } else if (bit_buf >= 0x00200000) { - tab = DCT_15 + (UBITS (bit_buf, 15) - 16); - i += tab->run; - if (i < 64) - goto normal_code; - } else { - tab = DCT_16 + UBITS (bit_buf, 16); - bit_buf <<= 16; - GETWORD (bit_buf, bits + 16, bit_ptr); - i += tab->run; - if (i < 64) - goto normal_code; - } - break; /* illegal, check needed to avoid buffer overflow */ - } - DUMPBITS (bit_buf, bits, 2); /* dump end of block code */ - picture->bitstream_buf = bit_buf; - picture->bitstream_bits = bits; - picture->bitstream_ptr = bit_ptr; -} - -static inline void slice_intra_DCT (picture_t * picture, int cc, - uint8_t * dest, int stride) -{ -#define bit_buf (picture->bitstream_buf) -#define bits (picture->bitstream_bits) -#define bit_ptr (picture->bitstream_ptr) - NEEDBITS (bit_buf, bits, bit_ptr); - /* Get the intra DC coefficient and inverse quantize it */ - if (cc == 0) - picture->dc_dct_pred[0] += get_luma_dc_dct_diff (picture); - else - picture->dc_dct_pred[cc] += get_chroma_dc_dct_diff (picture); - picture->DCTblock[0] = - picture->dc_dct_pred[cc] << (3 - picture->intra_dc_precision); - - if (picture->mpeg1) { - if (picture->picture_coding_type != D_TYPE) - get_mpeg1_intra_block (picture); - } else if (picture->intra_vlc_format) - get_intra_block_B15 (picture); - else - get_intra_block_B14 (picture); - mpeg2_idct_copy (picture->DCTblock, dest, stride); -#undef bit_buf -#undef bits -#undef bit_ptr -} - -static inline void slice_non_intra_DCT (picture_t * picture, uint8_t * dest, - int stride) -{ - if (picture->mpeg1) - get_mpeg1_non_intra_block (picture); - else - get_non_intra_block (picture); - mpeg2_idct_add (picture->DCTblock, dest, stride); -} - -#define MOTION(table,ref,motion_x,motion_y,size,y) \ - pos_x = 2 * picture->offset + motion_x; \ - pos_y = 2 * picture->v_offset + motion_y + 2 * y; \ - if (pos_x > picture->limit_x) { \ - pos_x = ((int)pos_x < 0) ? 0 : picture->limit_x; \ - motion_x = pos_x - 2 * picture->offset; \ - } \ - if (pos_y > picture->limit_y_ ## size){ \ - pos_y = ((int)pos_y < 0) ? 0 : picture->limit_y_ ## size; \ - motion_y = pos_y - 2 * picture->v_offset - 2 * y; \ - } \ - xy_half = ((pos_y & 1) << 1) | (pos_x & 1); \ - table[xy_half] (picture->dest[0] + y * picture->pitches[0] + \ - picture->offset, ref[0] + (pos_x >> 1) + \ - (pos_y >> 1) * picture->pitches[0], picture->pitches[0], \ - size); \ - motion_x /= 2; motion_y /= 2; \ - xy_half = ((motion_y & 1) << 1) | (motion_x & 1); \ - table[4+xy_half] (picture->dest[1] + y/2 * picture->pitches[1] + \ - (picture->offset >> 1), ref[1] + \ - (((picture->offset + motion_x) >> 1) + \ - ((((picture->v_offset + motion_y) >> 1) + y/2) * \ - picture->pitches[1])), picture->pitches[1], size/2); \ - table[4+xy_half] (picture->dest[2] + y/2 * picture->pitches[2] + \ - (picture->offset >> 1), ref[2] + \ - (((picture->offset + motion_x) >> 1) + \ - ((((picture->v_offset + motion_y) >> 1) + y/2) * \ - picture->pitches[2])), picture->pitches[2], size/2) \ - -#define MOTION_FIELD(table,ref,motion_x,motion_y,dest_field,op,src_field) \ - pos_x = 2 * picture->offset + motion_x; \ - pos_y = picture->v_offset + motion_y; \ - if (pos_x > picture->limit_x) { \ - pos_x = ((int)pos_x < 0) ? 0 : picture->limit_x; \ - motion_x = pos_x - 2 * picture->offset; \ - } \ - if (pos_y > picture->limit_y){ \ - pos_y = ((int)pos_y < 0) ? 0 : picture->limit_y; \ - motion_y = pos_y - picture->v_offset; \ - } \ - xy_half = ((pos_y & 1) << 1) | (pos_x & 1); \ - table[xy_half] (picture->dest[0] + dest_field * picture->pitches[0] + \ - picture->offset, \ - (ref[0] + (pos_x >> 1) + \ - ((pos_y op) + src_field) * picture->pitches[0]), \ - 2 * picture->pitches[0], 8); \ - motion_x /= 2; motion_y /= 2; \ - xy_half = ((motion_y & 1) << 1) | (motion_x & 1); \ - table[4+xy_half] (picture->dest[1] + dest_field * picture->pitches[1] + \ - (picture->offset >> 1), ref[1] + \ - (((picture->offset + motion_x) >> 1) + \ - (((picture->v_offset >> 1) + \ - (motion_y op) + src_field) * picture->pitches[1])), \ - 2 * picture->pitches[1], 4); \ - table[4+xy_half] (picture->dest[2] + dest_field * picture->pitches[2] + \ - (picture->offset >> 1), ref[2] + \ - (((picture->offset + motion_x) >> 1) + \ - (((picture->v_offset >> 1) + \ - (motion_y op) + src_field) * picture->pitches[2])), \ - 2 * picture->pitches[2], 4) - -static void motion_mp1 (picture_t * picture, motion_t * motion, - void (** table) (uint8_t *, uint8_t *, int, int)) -{ -#define bit_buf (picture->bitstream_buf) -#define bits (picture->bitstream_bits) -#define bit_ptr (picture->bitstream_ptr) - int motion_x, motion_y; - unsigned int pos_x, pos_y, xy_half; - - NEEDBITS (bit_buf, bits, bit_ptr); - motion_x = (motion->pmv[0][0] + - (get_motion_delta (picture, - motion->f_code[0]) << motion->f_code[1])); - motion_x = bound_motion_vector (motion_x, - motion->f_code[0] + motion->f_code[1]); - motion->pmv[0][0] = motion_x; - - NEEDBITS (bit_buf, bits, bit_ptr); - motion_y = (motion->pmv[0][1] + - (get_motion_delta (picture, - motion->f_code[0]) << motion->f_code[1])); - motion_y = bound_motion_vector (motion_y, - motion->f_code[0] + motion->f_code[1]); - motion->pmv[0][1] = motion_y; - - MOTION (table, motion->ref[0], motion_x, motion_y, 16, 0); -#undef bit_buf -#undef bits -#undef bit_ptr -} - -static void motion_fr_frame (picture_t * picture, motion_t * motion, - void (** table) (uint8_t *, uint8_t *, int, int)) -{ -#define bit_buf (picture->bitstream_buf) -#define bits (picture->bitstream_bits) -#define bit_ptr (picture->bitstream_ptr) - int motion_x, motion_y; - unsigned int pos_x, pos_y, xy_half; - - NEEDBITS (bit_buf, bits, bit_ptr); - motion_x = motion->pmv[0][0] + get_motion_delta (picture, - motion->f_code[0]); - motion_x = bound_motion_vector (motion_x, motion->f_code[0]); - motion->pmv[1][0] = motion->pmv[0][0] = motion_x; - - NEEDBITS (bit_buf, bits, bit_ptr); - motion_y = motion->pmv[0][1] + get_motion_delta (picture, - motion->f_code[1]); - motion_y = bound_motion_vector (motion_y, motion->f_code[1]); - motion->pmv[1][1] = motion->pmv[0][1] = motion_y; - - MOTION (table, motion->ref[0], motion_x, motion_y, 16, 0); -#undef bit_buf -#undef bits -#undef bit_ptr -} - -static void motion_fr_field (picture_t * picture, motion_t * motion, - void (** table) (uint8_t *, uint8_t *, int, int)) -{ -#define bit_buf (picture->bitstream_buf) -#define bits (picture->bitstream_bits) -#define bit_ptr (picture->bitstream_ptr) - int motion_x, motion_y, field; - unsigned int pos_x, pos_y, xy_half; - - NEEDBITS (bit_buf, bits, bit_ptr); - field = UBITS (bit_buf, 1); - DUMPBITS (bit_buf, bits, 1); - - motion_x = motion->pmv[0][0] + get_motion_delta (picture, - motion->f_code[0]); - motion_x = bound_motion_vector (motion_x, motion->f_code[0]); - motion->pmv[0][0] = motion_x; - - NEEDBITS (bit_buf, bits, bit_ptr); - motion_y = (motion->pmv[0][1] >> 1) + get_motion_delta (picture, - motion->f_code[1]); - /* motion_y = bound_motion_vector (motion_y, motion->f_code[1]); */ - motion->pmv[0][1] = motion_y << 1; - - MOTION_FIELD (table, motion->ref[0], motion_x, motion_y, 0, & ~1, field); - - NEEDBITS (bit_buf, bits, bit_ptr); - field = UBITS (bit_buf, 1); - DUMPBITS (bit_buf, bits, 1); - - motion_x = motion->pmv[1][0] + get_motion_delta (picture, - motion->f_code[0]); - motion_x = bound_motion_vector (motion_x, motion->f_code[0]); - motion->pmv[1][0] = motion_x; - - NEEDBITS (bit_buf, bits, bit_ptr); - motion_y = (motion->pmv[1][1] >> 1) + get_motion_delta (picture, - motion->f_code[1]); - /* motion_y = bound_motion_vector (motion_y, motion->f_code[1]); */ - motion->pmv[1][1] = motion_y << 1; - - MOTION_FIELD (table, motion->ref[0], motion_x, motion_y, 1, & ~1, field); -#undef bit_buf -#undef bits -#undef bit_ptr -} - -static void motion_fr_dmv (picture_t * picture, motion_t * motion, - void (** table) (uint8_t *, uint8_t *, int, int)) -{ -#define bit_buf (picture->bitstream_buf) -#define bits (picture->bitstream_bits) -#define bit_ptr (picture->bitstream_ptr) - int motion_x, motion_y, dmv_x, dmv_y, m, other_x, other_y; - unsigned int pos_x, pos_y, xy_half, offset; - - NEEDBITS (bit_buf, bits, bit_ptr); - motion_x = motion->pmv[0][0] + get_motion_delta (picture, - motion->f_code[0]); - motion_x = bound_motion_vector (motion_x, motion->f_code[0]); - motion->pmv[1][0] = motion->pmv[0][0] = motion_x; - NEEDBITS (bit_buf, bits, bit_ptr); - dmv_x = get_dmv (picture); - - motion_y = (motion->pmv[0][1] >> 1) + get_motion_delta (picture, - motion->f_code[1]); - /* motion_y = bound_motion_vector (motion_y, motion->f_code[1]); */ - motion->pmv[1][1] = motion->pmv[0][1] = motion_y << 1; - dmv_y = get_dmv (picture); - - m = picture->top_field_first ? 1 : 3; - other_x = ((motion_x * m + (motion_x > 0)) >> 1) + dmv_x; - other_y = ((motion_y * m + (motion_y > 0)) >> 1) + dmv_y - 1; - MOTION_FIELD (mpeg2_mc.put, motion->ref[0], other_x, other_y, 0, | 1, 0); - - m = picture->top_field_first ? 3 : 1; - other_x = ((motion_x * m + (motion_x > 0)) >> 1) + dmv_x; - other_y = ((motion_y * m + (motion_y > 0)) >> 1) + dmv_y + 1; - MOTION_FIELD (mpeg2_mc.put, motion->ref[0], other_x, other_y, 1, & ~1, 0); - - pos_x = 2 * picture->offset + motion_x; - pos_y = picture->v_offset + motion_y; - if(pos_x > picture->limit_x){ - pos_x = ((int)pos_x < 0) ? 0 : picture->limit_x; - motion_x = pos_x - 2 * picture->offset; - } - if(pos_y > picture->limit_y){ - pos_y = ((int)pos_y < 0) ? 0 : picture->limit_y; - motion_y = pos_y - picture->v_offset; - } - xy_half = ((pos_y & 1) << 1) | (pos_x & 1); - offset = (pos_x >> 1) + (pos_y & ~1) * picture->pitches[0]; - mpeg2_mc.avg[xy_half] - (picture->dest[0] + picture->offset, - motion->ref[0][0] + offset, 2 * picture->pitches[0], 8); - mpeg2_mc.avg[xy_half] - (picture->dest[0] + picture->pitches[0] + picture->offset, - motion->ref[0][0] + picture->pitches[0] + offset, - 2 * picture->pitches[0], 8); - motion_x /= 2; motion_y /= 2; - xy_half = ((motion_y & 1) << 1) | (motion_x & 1); - offset = (((picture->offset + motion_x) >> 1) + - (((picture->v_offset >> 1) + (motion_y & ~1)) * - picture->pitches[1])); - mpeg2_mc.avg[4+xy_half] - (picture->dest[1] + (picture->offset >> 1), - motion->ref[0][1] + offset, 2 * picture->pitches[1], 4); - mpeg2_mc.avg[4+xy_half] - (picture->dest[1] + picture->pitches[1] + (picture->offset >> 1), - motion->ref[0][1] + picture->pitches[1] + offset, - 2 * picture->pitches[1], 4); - offset = (((picture->offset + motion_x) >> 1) + - (((picture->v_offset >> 1) + (motion_y & ~1)) * - picture->pitches[2])); - mpeg2_mc.avg[4+xy_half] - (picture->dest[2] + (picture->offset >> 1), - motion->ref[0][2] + offset, 2 * picture->pitches[2], 4); - mpeg2_mc.avg[4+xy_half] - (picture->dest[2] + picture->pitches[2] + (picture->offset >> 1), - motion->ref[0][2] + picture->pitches[2] + offset, - 2 * picture->pitches[2], 4); -#undef bit_buf -#undef bits -#undef bit_ptr -} - -static void motion_reuse (picture_t * picture, motion_t * motion, - void (** table) (uint8_t *, uint8_t *, int, int)) -{ - int motion_x, motion_y; - unsigned int pos_x, pos_y, xy_half; - - motion_x = motion->pmv[0][0]; - motion_y = motion->pmv[0][1]; - - MOTION (table, motion->ref[0], motion_x, motion_y, 16, 0); -} - -static void motion_zero (picture_t * picture, motion_t * motion, - void (** table) (uint8_t *, uint8_t *, int, int)) -{ - table[0] (picture->dest[0] + picture->offset, - (motion->ref[0][0] + picture->offset + - picture->v_offset * picture->pitches[0]), - picture->pitches[0], 16); - - table[4] (picture->dest[1] + (picture->offset >> 1), - motion->ref[0][1] + (picture->offset >> 1) + - (picture->v_offset >> 1) * picture->pitches[1], - picture->pitches[1], 8); - table[4] (picture->dest[2] + (picture->offset >> 1), - motion->ref[0][2] + (picture->offset >> 1) + - (picture->v_offset >> 1) * picture->pitches[2], - picture->pitches[2], 8); -} - -/* like motion_frame, but parsing without actual motion compensation */ -static void motion_fr_conceal (picture_t * picture) -{ -#define bit_buf (picture->bitstream_buf) -#define bits (picture->bitstream_bits) -#define bit_ptr (picture->bitstream_ptr) - int tmp; - - NEEDBITS (bit_buf, bits, bit_ptr); - tmp = (picture->f_motion.pmv[0][0] + - get_motion_delta (picture, picture->f_motion.f_code[0])); - tmp = bound_motion_vector (tmp, picture->f_motion.f_code[0]); - picture->f_motion.pmv[1][0] = picture->f_motion.pmv[0][0] = tmp; - - NEEDBITS (bit_buf, bits, bit_ptr); - tmp = (picture->f_motion.pmv[0][1] + - get_motion_delta (picture, picture->f_motion.f_code[1])); - tmp = bound_motion_vector (tmp, picture->f_motion.f_code[1]); - picture->f_motion.pmv[1][1] = picture->f_motion.pmv[0][1] = tmp; - - DUMPBITS (bit_buf, bits, 1); /* remove marker_bit */ -#undef bit_buf -#undef bits -#undef bit_ptr -} - -static void motion_fi_field (picture_t * picture, motion_t * motion, - void (** table) (uint8_t *, uint8_t *, int, int)) -{ -#define bit_buf (picture->bitstream_buf) -#define bits (picture->bitstream_bits) -#define bit_ptr (picture->bitstream_ptr) - int motion_x, motion_y; - uint8_t ** ref_field; - unsigned int pos_x, pos_y, xy_half; - - NEEDBITS (bit_buf, bits, bit_ptr); - ref_field = motion->ref2[UBITS (bit_buf, 1)]; - DUMPBITS (bit_buf, bits, 1); - - motion_x = motion->pmv[0][0] + get_motion_delta (picture, - motion->f_code[0]); - motion_x = bound_motion_vector (motion_x, motion->f_code[0]); - motion->pmv[1][0] = motion->pmv[0][0] = motion_x; - - NEEDBITS (bit_buf, bits, bit_ptr); - motion_y = motion->pmv[0][1] + get_motion_delta (picture, - motion->f_code[1]); - motion_y = bound_motion_vector (motion_y, motion->f_code[1]); - motion->pmv[1][1] = motion->pmv[0][1] = motion_y; - - MOTION (table, ref_field, motion_x, motion_y, 16, 0); -#undef bit_buf -#undef bits -#undef bit_ptr -} - -static void motion_fi_16x8 (picture_t * picture, motion_t * motion, - void (** table) (uint8_t *, uint8_t *, int, int)) -{ -#define bit_buf (picture->bitstream_buf) -#define bits (picture->bitstream_bits) -#define bit_ptr (picture->bitstream_ptr) - int motion_x, motion_y; - uint8_t ** ref_field; - unsigned int pos_x, pos_y, xy_half; - - NEEDBITS (bit_buf, bits, bit_ptr); - ref_field = motion->ref2[UBITS (bit_buf, 1)]; - DUMPBITS (bit_buf, bits, 1); - - motion_x = motion->pmv[0][0] + get_motion_delta (picture, - motion->f_code[0]); - motion_x = bound_motion_vector (motion_x, motion->f_code[0]); - motion->pmv[0][0] = motion_x; - - NEEDBITS (bit_buf, bits, bit_ptr); - motion_y = motion->pmv[0][1] + get_motion_delta (picture, - motion->f_code[1]); - motion_y = bound_motion_vector (motion_y, motion->f_code[1]); - motion->pmv[0][1] = motion_y; - - MOTION (table, ref_field, motion_x, motion_y, 8, 0); - - NEEDBITS (bit_buf, bits, bit_ptr); - ref_field = motion->ref2[UBITS (bit_buf, 1)]; - DUMPBITS (bit_buf, bits, 1); - - motion_x = motion->pmv[1][0] + get_motion_delta (picture, - motion->f_code[0]); - motion_x = bound_motion_vector (motion_x, motion->f_code[0]); - motion->pmv[1][0] = motion_x; - - NEEDBITS (bit_buf, bits, bit_ptr); - motion_y = motion->pmv[1][1] + get_motion_delta (picture, - motion->f_code[1]); - motion_y = bound_motion_vector (motion_y, motion->f_code[1]); - motion->pmv[1][1] = motion_y; - - MOTION (table, ref_field, motion_x, motion_y, 8, 8); -#undef bit_buf -#undef bits -#undef bit_ptr -} - -static void motion_fi_dmv (picture_t * picture, motion_t * motion, - void (** table) (uint8_t *, uint8_t *, int, int)) -{ -#define bit_buf (picture->bitstream_buf) -#define bits (picture->bitstream_bits) -#define bit_ptr (picture->bitstream_ptr) - int motion_x, motion_y, other_x, other_y; - unsigned int pos_x, pos_y, xy_half; - - NEEDBITS (bit_buf, bits, bit_ptr); - motion_x = motion->pmv[0][0] + get_motion_delta (picture, - motion->f_code[0]); - motion_x = bound_motion_vector (motion_x, motion->f_code[0]); - motion->pmv[1][0] = motion->pmv[0][0] = motion_x; - NEEDBITS (bit_buf, bits, bit_ptr); - other_x = ((motion_x + (motion_x > 0)) >> 1) + get_dmv (picture); - - motion_y = motion->pmv[0][1] + get_motion_delta (picture, - motion->f_code[1]); - motion_y = bound_motion_vector (motion_y, motion->f_code[1]); - motion->pmv[1][1] = motion->pmv[0][1] = motion_y; - other_y = (((motion_y + (motion_y > 0)) >> 1) + get_dmv (picture) + - picture->dmv_offset); - - MOTION (mpeg2_mc.put, motion->ref[0], motion_x, motion_y, 16, 0); - MOTION (mpeg2_mc.avg, motion->ref[1], other_x, other_y, 16, 0); -#undef bit_buf -#undef bits -#undef bit_ptr -} - -static void motion_fi_conceal (picture_t * picture) -{ -#define bit_buf (picture->bitstream_buf) -#define bits (picture->bitstream_bits) -#define bit_ptr (picture->bitstream_ptr) - int tmp; - - NEEDBITS (bit_buf, bits, bit_ptr); - DUMPBITS (bit_buf, bits, 1); /* remove field_select */ - - tmp = (picture->f_motion.pmv[0][0] + - get_motion_delta (picture, picture->f_motion.f_code[0])); - tmp = bound_motion_vector (tmp, picture->f_motion.f_code[0]); - picture->f_motion.pmv[1][0] = picture->f_motion.pmv[0][0] = tmp; - - NEEDBITS (bit_buf, bits, bit_ptr); - tmp = (picture->f_motion.pmv[0][1] + - get_motion_delta (picture, picture->f_motion.f_code[1])); - tmp = bound_motion_vector (tmp, picture->f_motion.f_code[1]); - picture->f_motion.pmv[1][1] = picture->f_motion.pmv[0][1] = tmp; - - DUMPBITS (bit_buf, bits, 1); /* remove marker_bit */ -#undef bit_buf -#undef bits -#undef bit_ptr -} - -#define MOTION_CALL(routine,direction) \ -do { \ - if ((direction) & MACROBLOCK_MOTION_FORWARD) \ - routine (picture, &(picture->f_motion), mpeg2_mc.put); \ - if ((direction) & MACROBLOCK_MOTION_BACKWARD) \ - routine (picture, &(picture->b_motion), \ - ((direction) & MACROBLOCK_MOTION_FORWARD ? \ - mpeg2_mc.avg : mpeg2_mc.put)); \ -} while (0) - -#define NEXT_MACROBLOCK \ -do { \ - picture->offset += 16; \ - if (picture->offset == picture->coded_picture_width) { \ - do { /* just so we can use the break statement */ \ - if (picture->current_frame->proc_slice) { \ - picture->current_frame->proc_slice (picture->current_frame, \ - picture->dest); \ - } \ - picture->dest[0] += 16 * picture->pitches[0]; \ - picture->dest[1] += 8 * picture->pitches[1]; \ - picture->dest[2] += 8 * picture->pitches[2]; \ - } while (0); \ - picture->v_offset += 16; \ - if (picture->v_offset > picture->limit_y) { \ - if (mpeg2_cpu_state_restore) \ - mpeg2_cpu_state_restore (&cpu_state); \ - return; \ - } \ - picture->offset = 0; \ - } \ -} while (0) - -static inline int slice_init (picture_t * picture, int code) -{ -#define bit_buf (picture->bitstream_buf) -#define bits (picture->bitstream_bits) -#define bit_ptr (picture->bitstream_ptr) - int offset, height; - struct vo_frame_s * forward_reference_frame; - struct vo_frame_s * backward_reference_frame; - const MBAtab * mba; - - offset = picture->picture_structure == BOTTOM_FIELD; - picture->pitches[0] = picture->current_frame->pitches[0]; - picture->pitches[1] = picture->current_frame->pitches[1]; - picture->pitches[2] = picture->current_frame->pitches[2]; - - if( picture->forward_reference_frame ) { - forward_reference_frame = picture->forward_reference_frame; - } - else { - /* return 1; */ - forward_reference_frame = picture->current_frame; - } - - if( picture->backward_reference_frame ) { - backward_reference_frame = picture->backward_reference_frame; - } - else { - /* return 1; */ - backward_reference_frame = picture->current_frame; - } - - picture->f_motion.ref[0][0] = - forward_reference_frame->base[0] + (offset ? picture->pitches[0] : 0); - picture->f_motion.ref[0][1] = - forward_reference_frame->base[1] + (offset ? picture->pitches[1] : 0); - picture->f_motion.ref[0][2] = - forward_reference_frame->base[2] + (offset ? picture->pitches[2] : 0); - - picture->b_motion.ref[0][0] = - backward_reference_frame->base[0] + (offset ? picture->pitches[0] : 0); - picture->b_motion.ref[0][1] = - backward_reference_frame->base[1] + (offset ? picture->pitches[1] : 0); - picture->b_motion.ref[0][2] = - backward_reference_frame->base[2] + (offset ? picture->pitches[2] : 0); - - if (picture->picture_structure != FRAME_PICTURE) { - uint8_t ** forward_ref; - int bottom_field; - - bottom_field = (picture->picture_structure == BOTTOM_FIELD); - picture->dmv_offset = bottom_field ? 1 : -1; - picture->f_motion.ref2[0] = picture->f_motion.ref[bottom_field]; - picture->f_motion.ref2[1] = picture->f_motion.ref[!bottom_field]; - picture->b_motion.ref2[0] = picture->b_motion.ref[bottom_field]; - picture->b_motion.ref2[1] = picture->b_motion.ref[!bottom_field]; - - forward_ref = forward_reference_frame->base; - if (picture->second_field && (picture->picture_coding_type != B_TYPE)) - forward_ref = picture->current_frame->base; - - picture->f_motion.ref[1][0] = forward_ref[0] + (bottom_field ? 0 : picture->pitches[0]); - picture->f_motion.ref[1][1] = forward_ref[1] + (bottom_field ? 0 : picture->pitches[1]); - picture->f_motion.ref[1][2] = forward_ref[2] + (bottom_field ? 0 : picture->pitches[2]); - - picture->b_motion.ref[1][0] = - backward_reference_frame->base[0] + (bottom_field ? 0 : picture->pitches[0]); - picture->b_motion.ref[1][1] = - backward_reference_frame->base[1] + (bottom_field ? 0 : picture->pitches[1]); - picture->b_motion.ref[1][2] = - backward_reference_frame->base[2] + (bottom_field ? 0 : picture->pitches[2]); - } - - picture->f_motion.pmv[0][0] = picture->f_motion.pmv[0][1] = 0; - picture->f_motion.pmv[1][0] = picture->f_motion.pmv[1][1] = 0; - picture->b_motion.pmv[0][0] = picture->b_motion.pmv[0][1] = 0; - picture->b_motion.pmv[1][0] = picture->b_motion.pmv[1][1] = 0; - - picture->v_offset = (code - 1) * 16; - offset = (code - 1); - if (picture->picture_structure != FRAME_PICTURE) - offset = 2 * offset; - - picture->dest[0] = picture->current_frame->base[0] + picture->pitches[0] * offset * 16; - picture->dest[1] = picture->current_frame->base[1] + picture->pitches[1] * offset * 8; - picture->dest[2] = picture->current_frame->base[2] + picture->pitches[2] * offset * 8; - - height = picture->coded_picture_height; - switch (picture->picture_structure) { - case BOTTOM_FIELD: - picture->dest[0] += picture->pitches[0]; - picture->dest[1] += picture->pitches[1]; - picture->dest[2] += picture->pitches[2]; - /* follow thru */ - case TOP_FIELD: - picture->pitches[0] <<= 1; - picture->pitches[1] <<= 1; - picture->pitches[2] <<= 1; - height >>= 1; - } - picture->limit_x = 2 * picture->coded_picture_width - 32; - picture->limit_y_16 = 2 * height - 32; - picture->limit_y_8 = 2 * height - 16; - picture->limit_y = height - 16; - - picture->dc_dct_pred[0] = picture->dc_dct_pred[1] = - picture->dc_dct_pred[2] = 1 << (picture->intra_dc_precision + 7); - - picture->quantizer_scale = get_quantizer_scale (picture); - - /* ignore intra_slice and all the extra data */ - while (bit_buf & 0x80000000) { - DUMPBITS (bit_buf, bits, 9); - NEEDBITS (bit_buf, bits, bit_ptr); - } - - /* decode initial macroblock address increment */ - offset = 0; - while (1) { - if (bit_buf >= 0x08000000) { - mba = MBA_5 + (UBITS (bit_buf, 6) - 2); - break; - } else if (bit_buf >= 0x01800000) { - mba = MBA_11 + (UBITS (bit_buf, 12) - 24); - break; - } else switch (UBITS (bit_buf, 12)) { - case 8: /* macroblock_escape */ - offset += 33; - DUMPBITS (bit_buf, bits, 11); - NEEDBITS (bit_buf, bits, bit_ptr); - continue; - case 15: /* macroblock_stuffing (MPEG1 only) */ - bit_buf &= 0xfffff; - DUMPBITS (bit_buf, bits, 11); - NEEDBITS (bit_buf, bits, bit_ptr); - continue; - default: /* error */ - return 1; - } - } - DUMPBITS (bit_buf, bits, mba->len + 1); - picture->offset = (offset + mba->mba) << 4; - - while (picture->offset - picture->coded_picture_width >= 0) { - picture->offset -= picture->coded_picture_width; - if ((picture->current_frame->proc_slice == NULL) || - (picture->picture_coding_type != B_TYPE)) { - picture->dest[0] += 16 * picture->pitches[0]; - picture->dest[1] += 8 * picture->pitches[1]; - picture->dest[2] += 8 * picture->pitches[2]; - } - picture->v_offset += 16; - } - if (picture->v_offset > picture->limit_y) - return 1; - - return 0; -#undef bit_buf -#undef bits -#undef bit_ptr -} - -void mpeg2_slice (picture_t * picture, int code, uint8_t * buffer) -{ -#define bit_buf (picture->bitstream_buf) -#define bits (picture->bitstream_bits) -#define bit_ptr (picture->bitstream_ptr) - cpu_state_t cpu_state; - - bitstream_init (picture, buffer); - - if (slice_init (picture, code)) - return; - - if (mpeg2_cpu_state_save) - mpeg2_cpu_state_save (&cpu_state); - - while (1) { - int macroblock_modes; - int mba_inc; - const MBAtab * mba; - - NEEDBITS (bit_buf, bits, bit_ptr); - - macroblock_modes = get_macroblock_modes (picture); - - /* maybe integrate MACROBLOCK_QUANT test into get_macroblock_modes ? */ - if (macroblock_modes & MACROBLOCK_QUANT) - picture->quantizer_scale = get_quantizer_scale (picture); - - if (macroblock_modes & MACROBLOCK_INTRA) { - - int DCT_offset, DCT_stride; - int offset; - uint8_t * dest_y; - - if (picture->concealment_motion_vectors) { - if (picture->picture_structure == FRAME_PICTURE) - motion_fr_conceal (picture); - else - motion_fi_conceal (picture); - } else { - picture->f_motion.pmv[0][0] = picture->f_motion.pmv[0][1] = 0; - picture->f_motion.pmv[1][0] = picture->f_motion.pmv[1][1] = 0; - picture->b_motion.pmv[0][0] = picture->b_motion.pmv[0][1] = 0; - picture->b_motion.pmv[1][0] = picture->b_motion.pmv[1][1] = 0; - } - - if (macroblock_modes & DCT_TYPE_INTERLACED) { - DCT_offset = picture->pitches[0]; - DCT_stride = picture->pitches[0] * 2; - } else { - DCT_offset = picture->pitches[0] * 8; - DCT_stride = picture->pitches[0]; - } - - offset = picture->offset; - dest_y = picture->dest[0] + offset; - slice_intra_DCT (picture, 0, dest_y, DCT_stride); - slice_intra_DCT (picture, 0, dest_y + 8, DCT_stride); - slice_intra_DCT (picture, 0, dest_y + DCT_offset, DCT_stride); - slice_intra_DCT (picture, 0, dest_y + DCT_offset + 8, DCT_stride); - slice_intra_DCT (picture, 1, picture->dest[1] + (offset >> 1), - picture->pitches[1]); - slice_intra_DCT (picture, 2, picture->dest[2] + (offset >> 1), - picture->pitches[2]); - - if (picture->picture_coding_type == D_TYPE) { - NEEDBITS (bit_buf, bits, bit_ptr); - DUMPBITS (bit_buf, bits, 1); - } - } else { - - if (picture->picture_structure == FRAME_PICTURE) - switch (macroblock_modes & MOTION_TYPE_MASK) { - case MC_FRAME: - if (picture->mpeg1) - MOTION_CALL (motion_mp1, macroblock_modes); - else - MOTION_CALL (motion_fr_frame, macroblock_modes); - break; - - case MC_FIELD: - MOTION_CALL (motion_fr_field, macroblock_modes); - break; - - case MC_DMV: - MOTION_CALL (motion_fr_dmv, MACROBLOCK_MOTION_FORWARD); - break; - - case 0: - /* non-intra mb without forward mv in a P picture */ - picture->f_motion.pmv[0][0] = 0; - picture->f_motion.pmv[0][1] = 0; - picture->f_motion.pmv[1][0] = 0; - picture->f_motion.pmv[1][1] = 0; - MOTION_CALL (motion_zero, MACROBLOCK_MOTION_FORWARD); - break; - } - else - switch (macroblock_modes & MOTION_TYPE_MASK) { - case MC_FIELD: - MOTION_CALL (motion_fi_field, macroblock_modes); - break; - - case MC_16X8: - MOTION_CALL (motion_fi_16x8, macroblock_modes); - break; - - case MC_DMV: - MOTION_CALL (motion_fi_dmv, MACROBLOCK_MOTION_FORWARD); - break; - - case 0: - /* non-intra mb without forward mv in a P picture */ - picture->f_motion.pmv[0][0] = 0; - picture->f_motion.pmv[0][1] = 0; - picture->f_motion.pmv[1][0] = 0; - picture->f_motion.pmv[1][1] = 0; - MOTION_CALL (motion_zero, MACROBLOCK_MOTION_FORWARD); - break; - } - - if (macroblock_modes & MACROBLOCK_PATTERN) { - int coded_block_pattern; - int DCT_offset, DCT_stride; - int offset; - uint8_t * dest_y; - - if (macroblock_modes & DCT_TYPE_INTERLACED) { - DCT_offset = picture->pitches[0]; - DCT_stride = picture->pitches[0] * 2; - } else { - DCT_offset = picture->pitches[0] * 8; - DCT_stride = picture->pitches[0]; - } - - coded_block_pattern = get_coded_block_pattern (picture); - - offset = picture->offset; - dest_y = picture->dest[0] + offset; - if (coded_block_pattern & 0x20) - slice_non_intra_DCT (picture, dest_y, DCT_stride); - if (coded_block_pattern & 0x10) - slice_non_intra_DCT (picture, dest_y + 8, DCT_stride); - if (coded_block_pattern & 0x08) - slice_non_intra_DCT (picture, dest_y + DCT_offset, - DCT_stride); - if (coded_block_pattern & 0x04) - slice_non_intra_DCT (picture, dest_y + DCT_offset + 8, - DCT_stride); - if (coded_block_pattern & 0x2) - slice_non_intra_DCT (picture, - picture->dest[1] + (offset >> 1), - picture->pitches[1]); - if (coded_block_pattern & 0x1) - slice_non_intra_DCT (picture, - picture->dest[2] + (offset >> 1), - picture->pitches[2]); - } - - picture->dc_dct_pred[0] = picture->dc_dct_pred[1] = - picture->dc_dct_pred[2] = 128 << picture->intra_dc_precision; - } - - NEXT_MACROBLOCK; - - NEEDBITS (bit_buf, bits, bit_ptr); - mba_inc = 0; - while (1) { - if (bit_buf >= 0x10000000) { - mba = MBA_5 + (UBITS (bit_buf, 5) - 2); - break; - } else if (bit_buf >= 0x03000000) { - mba = MBA_11 + (UBITS (bit_buf, 11) - 24); - break; - } else switch (UBITS (bit_buf, 11)) { - case 8: /* macroblock_escape */ - mba_inc += 33; - /* pass through */ - case 15: /* macroblock_stuffing (MPEG1 only) */ - DUMPBITS (bit_buf, bits, 11); - NEEDBITS (bit_buf, bits, bit_ptr); - continue; - default: /* end of slice, or error */ - if (mpeg2_cpu_state_restore) - mpeg2_cpu_state_restore (&cpu_state); - return; - } - } - DUMPBITS (bit_buf, bits, mba->len); - mba_inc += mba->mba; - - if (mba_inc) { - picture->dc_dct_pred[0] = picture->dc_dct_pred[1] = - picture->dc_dct_pred[2] = 128 << picture->intra_dc_precision; - - if (picture->picture_coding_type == P_TYPE) { - picture->f_motion.pmv[0][0] = picture->f_motion.pmv[0][1] = 0; - picture->f_motion.pmv[1][0] = picture->f_motion.pmv[1][1] = 0; - - do { - MOTION_CALL (motion_zero, MACROBLOCK_MOTION_FORWARD); - NEXT_MACROBLOCK; - } while (--mba_inc); - } else { - do { - MOTION_CALL (motion_reuse, macroblock_modes); - NEXT_MACROBLOCK; - } while (--mba_inc); - } - } - } -#undef bit_buf -#undef bits -#undef bit_ptr -} diff --git a/src/libmpeg2/slice_xvmc.c b/src/libmpeg2/slice_xvmc.c deleted file mode 100644 index 014ae7924..000000000 --- a/src/libmpeg2/slice_xvmc.c +++ /dev/null @@ -1,1988 +0,0 @@ -/* - * slice_xvmc.c - * Copyright (C) 2000-2002 Michel Lespinasse - * Copyright (C) 1999-2000 Aaron Holtzman - * - * This file is part of mpeg2dec, a free MPEG-2 video stream decoder. - * See http://libmpeg2.sourceforge.net/ for updates. - * - * mpeg2dec is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * mpeg2dec is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA - */ - -#include "config.h" - -#include -#include /* memcpy/memset, try to remove */ -#include -#include - -#include -#include -#include "mpeg2_internal.h" -#include - -#include -#include "accel_xvmc.h" -#include "xvmc.h" - - -#define MOTION_ACCEL XINE_VO_MOTION_ACCEL -#define IDCT_ACCEL XINE_VO_IDCT_ACCEL -#define SIGNED_INTRA XINE_VO_SIGNED_INTRA -#define ACCEL (MOTION_ACCEL | IDCT_ACCEL) - -#include "vlc.h" -/* original (non-patched) scan tables */ - -static const uint8_t mpeg2_scan_norm_orig[64] ATTR_ALIGN(16) = -{ - /* Zig-Zag scan pattern */ - 0, 1, 8,16, 9, 2, 3,10, - 17,24,32,25,18,11, 4, 5, - 12,19,26,33,40,48,41,34, - 27,20,13, 6, 7,14,21,28, - 35,42,49,56,57,50,43,36, - 29,22,15,23,30,37,44,51, - 58,59,52,45,38,31,39,46, - 53,60,61,54,47,55,62,63 -}; - -static const uint8_t mpeg2_scan_alt_orig[64] ATTR_ALIGN(16) = -{ - /* Alternate scan pattern */ - 0,8,16,24,1,9,2,10,17,25,32,40,48,56,57,49, - 41,33,26,18,3,11,4,12,19,27,34,42,50,58,35,43, - 51,59,20,28,5,13,6,14,21,29,36,44,52,60,37,45, - 53,61,22,30,7,15,23,31,38,46,54,62,39,47,55,63 -}; - -static uint8_t mpeg2_scan_alt_ptable[64] ATTR_ALIGN(16); -static uint8_t mpeg2_scan_norm_ptable[64] ATTR_ALIGN(16); -static uint8_t mpeg2_scan_orig_ptable[64] ATTR_ALIGN(16); - -void xvmc_setup_scan_ptable( void ) -{ - int i; - for (i=0; i<64; ++i) { - mpeg2_scan_norm_ptable[mpeg2_scan_norm_orig[i]] = mpeg2_scan_norm[i]; - mpeg2_scan_alt_ptable[mpeg2_scan_alt_orig[i]] = mpeg2_scan_alt[i]; - mpeg2_scan_orig_ptable[i] = i; - } -} - - -static const int non_linear_quantizer_scale [] = { - 0, 1, 2, 3, 4, 5, 6, 7, - 8, 10, 12, 14, 16, 18, 20, 22, - 24, 28, 32, 36, 40, 44, 48, 52, - 56, 64, 72, 80, 88, 96, 104, 112 -}; - -static inline int get_xvmc_macroblock_modes (picture_t * picture) -{ -#define bit_buf (picture->bitstream_buf) -#define bits (picture->bitstream_bits) -#define bit_ptr (picture->bitstream_ptr) - int macroblock_modes; - const MBtab * tab; - - switch (picture->picture_coding_type) { - case I_TYPE: - - tab = MB_I + UBITS (bit_buf, 1); - DUMPBITS (bit_buf, bits, tab->len); - macroblock_modes = tab->modes; - - if ((! (picture->frame_pred_frame_dct)) && - (picture->picture_structure == FRAME_PICTURE)) { - macroblock_modes |= UBITS (bit_buf, 1) * DCT_TYPE_INTERLACED; - DUMPBITS (bit_buf, bits, 1); - } - - return macroblock_modes; - - case P_TYPE: - - tab = MB_P + UBITS (bit_buf, 5); - DUMPBITS (bit_buf, bits, tab->len); - macroblock_modes = tab->modes; - - if (picture->picture_structure != FRAME_PICTURE) { - if (macroblock_modes & MACROBLOCK_MOTION_FORWARD) { - macroblock_modes |= UBITS (bit_buf, 2) * MOTION_TYPE_BASE; - DUMPBITS (bit_buf, bits, 2); - } - return macroblock_modes; - } else if (picture->frame_pred_frame_dct) { - if (macroblock_modes & MACROBLOCK_MOTION_FORWARD) - macroblock_modes |= MC_FRAME; - return macroblock_modes; - } else { - if (macroblock_modes & MACROBLOCK_MOTION_FORWARD) { - macroblock_modes |= UBITS (bit_buf, 2) * MOTION_TYPE_BASE; - DUMPBITS (bit_buf, bits, 2); - } - if (macroblock_modes & (MACROBLOCK_INTRA | MACROBLOCK_PATTERN)) { - macroblock_modes |= UBITS (bit_buf, 1) * DCT_TYPE_INTERLACED; - DUMPBITS (bit_buf, bits, 1); - } - return macroblock_modes; - } - - case B_TYPE: - - tab = MB_B + UBITS (bit_buf, 6); - DUMPBITS (bit_buf, bits, tab->len); - macroblock_modes = tab->modes; - - if (picture->picture_structure != FRAME_PICTURE) { - if (! (macroblock_modes & MACROBLOCK_INTRA)) { - macroblock_modes |= UBITS (bit_buf, 2) * MOTION_TYPE_BASE; - DUMPBITS (bit_buf, bits, 2); - } - return macroblock_modes; - } else if (picture->frame_pred_frame_dct) { - /* if (! (macroblock_modes & MACROBLOCK_INTRA)) */ - macroblock_modes |= MC_FRAME; - return macroblock_modes; - } else { - if (macroblock_modes & MACROBLOCK_INTRA) - goto intra; - macroblock_modes |= UBITS (bit_buf, 2) * MOTION_TYPE_BASE; - DUMPBITS (bit_buf, bits, 2); - if (macroblock_modes & (MACROBLOCK_INTRA | MACROBLOCK_PATTERN)) { - intra: - macroblock_modes |= UBITS (bit_buf, 1) * DCT_TYPE_INTERLACED; - DUMPBITS (bit_buf, bits, 1); - } - return macroblock_modes; - } - - case D_TYPE: - - DUMPBITS (bit_buf, bits, 1); - return MACROBLOCK_INTRA; - - default: - return 0; - } -#undef bit_buf -#undef bits -#undef bit_ptr -} - -static inline int get_xvmc_quantizer_scale (picture_t * picture) -{ -#define bit_buf (picture->bitstream_buf) -#define bits (picture->bitstream_bits) -#define bit_ptr (picture->bitstream_ptr) - - int quantizer_scale_code; - - quantizer_scale_code = UBITS (bit_buf, 5); - DUMPBITS (bit_buf, bits, 5); - - if (picture->q_scale_type) - return non_linear_quantizer_scale [quantizer_scale_code]; - else - return quantizer_scale_code << 1; -#undef bit_buf -#undef bits -#undef bit_ptr -} - -static inline int get_xvmc_motion_delta (picture_t * picture, int f_code) -{ -#define bit_buf (picture->bitstream_buf) -#define bits (picture->bitstream_bits) -#define bit_ptr (picture->bitstream_ptr) - - int delta; - int sign; - const MVtab * tab; - - if (bit_buf & 0x80000000) { - DUMPBITS (bit_buf, bits, 1); - return 0; - } else if (bit_buf >= 0x0c000000) { - - tab = MV_4 + UBITS (bit_buf, 4); - delta = (tab->delta << f_code) + 1; - bits += tab->len + f_code + 1; - bit_buf <<= tab->len; - - sign = SBITS (bit_buf, 1); - bit_buf <<= 1; - - if (f_code) - delta += UBITS (bit_buf, f_code); - bit_buf <<= f_code; - - return (delta ^ sign) - sign; - - } else { - - tab = MV_10 + UBITS (bit_buf, 10); - delta = (tab->delta << f_code) + 1; - bits += tab->len + 1; - bit_buf <<= tab->len; - - sign = SBITS (bit_buf, 1); - bit_buf <<= 1; - - if (f_code) { - NEEDBITS (bit_buf, bits, bit_ptr); - delta += UBITS (bit_buf, f_code); - DUMPBITS (bit_buf, bits, f_code); - } - - return (delta ^ sign) - sign; - - } -#undef bit_buf -#undef bits -#undef bit_ptr -} - -static inline int bound_motion_vector (int vec, int f_code) -{ -#if 1 - unsigned int limit; - int sign; - - limit = 16 << f_code; - - if ((unsigned int)(vec + limit) < 2 * limit) - return vec; - else { - sign = ((int32_t)vec) >> 31; - return vec - ((2 * limit) ^ sign) + sign; - } -#else - return ((int32_t)vec << (27 - f_code)) >> (27 - f_code); -#endif -} - -static inline int get_xvmc_dmv (picture_t * picture) -{ -#define bit_buf (picture->bitstream_buf) -#define bits (picture->bitstream_bits) -#define bit_ptr (picture->bitstream_ptr) - - const DMVtab * tab; - - tab = DMV_2 + UBITS (bit_buf, 2); - DUMPBITS (bit_buf, bits, tab->len); - return tab->dmv; -#undef bit_buf -#undef bits -#undef bit_ptr -} - -static inline int get_xvmc_coded_block_pattern (picture_t * picture) -{ -#define bit_buf (picture->bitstream_buf) -#define bits (picture->bitstream_bits) -#define bit_ptr (picture->bitstream_ptr) - - const CBPtab * tab; - - NEEDBITS (bit_buf, bits, bit_ptr); - - if (bit_buf >= 0x20000000) { - - tab = CBP_7 + (UBITS (bit_buf, 7) - 16); - DUMPBITS (bit_buf, bits, tab->len); - return tab->cbp; - - } else { - - tab = CBP_9 + UBITS (bit_buf, 9); - DUMPBITS (bit_buf, bits, tab->len); - return tab->cbp; - } - -#undef bit_buf -#undef bits -#undef bit_ptr -} - -static inline int get_xvmc_luma_dc_dct_diff (picture_t * picture) -{ -#define bit_buf (picture->bitstream_buf) -#define bits (picture->bitstream_bits) -#define bit_ptr (picture->bitstream_ptr) - const DCtab * tab; - int size; - int dc_diff; - - if (bit_buf < 0xf8000000) { - tab = DC_lum_5 + UBITS (bit_buf, 5); - size = tab->size; - if (size) { - bits += tab->len + size; - bit_buf <<= tab->len; - dc_diff = - UBITS (bit_buf, size) - UBITS (SBITS (~bit_buf, 1), size); - bit_buf <<= size; - return dc_diff; - } else { - DUMPBITS (bit_buf, bits, 3); - return 0; - } - } else { - tab = DC_long + (UBITS (bit_buf, 9) - 0x1e0); - size = tab->size; - DUMPBITS (bit_buf, bits, tab->len); - NEEDBITS (bit_buf, bits, bit_ptr); - dc_diff = UBITS (bit_buf, size) - UBITS (SBITS (~bit_buf, 1), size); - DUMPBITS (bit_buf, bits, size); - return dc_diff; - } -#undef bit_buf -#undef bits -#undef bit_ptr -} - -static inline int get_xvmc_chroma_dc_dct_diff (picture_t * picture) -{ -#define bit_buf (picture->bitstream_buf) -#define bits (picture->bitstream_bits) -#define bit_ptr (picture->bitstream_ptr) - const DCtab * tab; - int size; - int dc_diff; - - if (bit_buf < 0xf8000000) { - tab = DC_chrom_5 + UBITS (bit_buf, 5); - size = tab->size; - if (size) { - bits += tab->len + size; - bit_buf <<= tab->len; - dc_diff = - UBITS (bit_buf, size) - UBITS (SBITS (~bit_buf, 1), size); - bit_buf <<= size; - return dc_diff; - } else { - DUMPBITS (bit_buf, bits, 2); - return 0; - } - } else { - tab = DC_long + (UBITS (bit_buf, 10) - 0x3e0); - size = tab->size; - DUMPBITS (bit_buf, bits, tab->len + 1); - NEEDBITS (bit_buf, bits, bit_ptr); - dc_diff = UBITS (bit_buf, size) - UBITS (SBITS (~bit_buf, 1), size); - DUMPBITS (bit_buf, bits, size); - return dc_diff; - } -#undef bit_buf -#undef bits -#undef bit_ptr -} - -#define SATURATE(val) \ -do { \ - if ((uint32_t)(val + 2048) > 4095) \ - val = (val > 0) ? 2047 : -2048; \ -} while (0) - -static void get_xvmc_intra_block_B14 (picture_t * picture) -{ - int i; - int j; - int l; - int val; - const uint8_t * scan = picture->scan; - uint8_t * scan_ptable = mpeg2_scan_orig_ptable; - uint8_t * quant_matrix = picture->intra_quantizer_matrix; - int quantizer_scale = picture->quantizer_scale; - int mismatch; - const DCTtab * tab; - uint32_t bit_buf; - int bits; - uint8_t * bit_ptr; - int16_t * dest; - - dest = picture->mc->blockptr; - - if( picture->mc->xvmc_accel & IDCT_ACCEL ) { - if ( scan == mpeg2_scan_norm ) { - scan = mpeg2_scan_norm_orig; - scan_ptable = mpeg2_scan_norm_ptable; - } else { - scan = mpeg2_scan_alt_orig; - scan_ptable = mpeg2_scan_alt_ptable; - } - } - - i = 0; - mismatch = ~dest[0]; - - bit_buf = picture->bitstream_buf; - bits = picture->bitstream_bits; - bit_ptr = picture->bitstream_ptr; - - NEEDBITS (bit_buf, bits, bit_ptr); - - while (1) { - if (bit_buf >= 0x28000000) { - - tab = DCT_B14AC_5 + (UBITS (bit_buf, 5) - 5); - - i += tab->run; - if (i >= 64) - break; /* end of block */ - - normal_code: - l = scan_ptable[j = scan[i]]; - - bit_buf <<= tab->len; - bits += tab->len + 1; - val = (tab->level * quantizer_scale * quant_matrix[l]) >> 4; - - /* if (bitstream_get (1)) val = -val; */ - val = (val ^ SBITS (bit_buf, 1)) - SBITS (bit_buf, 1); - - SATURATE (val); - dest[j] = val; - mismatch ^= val; - - bit_buf <<= 1; - NEEDBITS (bit_buf, bits, bit_ptr); - - continue; - - } else if (bit_buf >= 0x04000000) { - - tab = DCT_B14_8 + (UBITS (bit_buf, 8) - 4); - - i += tab->run; - if (i < 64) - goto normal_code; - - /* escape code */ - - i += UBITS (bit_buf << 6, 6) - 64; - if (i >= 64) - break; /* illegal, check needed to avoid buffer overflow */ - - l = scan_ptable[j = scan[i]]; - - DUMPBITS (bit_buf, bits, 12); - NEEDBITS (bit_buf, bits, bit_ptr); - val = (SBITS (bit_buf, 12) * - quantizer_scale * quant_matrix[l]) / 16; - - SATURATE (val); - dest[j] = val; - mismatch ^= val; - - DUMPBITS (bit_buf, bits, 12); - NEEDBITS (bit_buf, bits, bit_ptr); - - continue; - - } else if (bit_buf >= 0x02000000) { - tab = DCT_B14_10 + (UBITS (bit_buf, 10) - 8); - i += tab->run; - if (i < 64) - goto normal_code; - } else if (bit_buf >= 0x00800000) { - tab = DCT_13 + (UBITS (bit_buf, 13) - 16); - i += tab->run; - if (i < 64) - goto normal_code; - } else if (bit_buf >= 0x00200000) { - tab = DCT_15 + (UBITS (bit_buf, 15) - 16); - i += tab->run; - if (i < 64) - goto normal_code; - } else { - tab = DCT_16 + UBITS (bit_buf, 16); - bit_buf <<= 16; - GETWORD (bit_buf, bits + 16, bit_ptr); - i += tab->run; - if (i < 64) - goto normal_code; - } - break; /* illegal, check needed to avoid buffer overflow */ - } - - dest[63] ^= mismatch & 1; - DUMPBITS (bit_buf, bits, 2); /* dump end of block code */ - picture->bitstream_buf = bit_buf; - picture->bitstream_bits = bits; - picture->bitstream_ptr = bit_ptr; -} - -static void get_xvmc_intra_block_B15 (picture_t * picture) -{ - int i; - int j; - int l; - int val; - const uint8_t * scan = picture->scan; - uint8_t * scan_ptable = mpeg2_scan_orig_ptable; - uint8_t * quant_matrix = picture->intra_quantizer_matrix; - int quantizer_scale = picture->quantizer_scale; - int mismatch; - const DCTtab * tab; - uint32_t bit_buf; - int bits; - uint8_t * bit_ptr; - int16_t * dest; - - dest = picture->mc->blockptr; - - if( picture->mc->xvmc_accel & IDCT_ACCEL ) { - if ( scan == mpeg2_scan_norm ) { - scan = mpeg2_scan_norm_orig; - scan_ptable = mpeg2_scan_norm_ptable; - } else { - scan = mpeg2_scan_alt_orig; - scan_ptable = mpeg2_scan_alt_ptable; - } - } - - i = 0; - mismatch = ~dest[0]; - - bit_buf = picture->bitstream_buf; - bits = picture->bitstream_bits; - bit_ptr = picture->bitstream_ptr; - - NEEDBITS (bit_buf, bits, bit_ptr); - - while (1) { - if (bit_buf >= 0x04000000) { - - tab = DCT_B15_8 + (UBITS (bit_buf, 8) - 4); - - i += tab->run; - if (i < 64) { - - normal_code: - l = scan_ptable[j = scan[i]]; - bit_buf <<= tab->len; - bits += tab->len + 1; - val = (tab->level * quantizer_scale * quant_matrix[l]) >> 4; - - /* if (bitstream_get (1)) val = -val; */ - val = (val ^ SBITS (bit_buf, 1)) - SBITS (bit_buf, 1); - - SATURATE (val); - dest[j] = val; - mismatch ^= val; - - bit_buf <<= 1; - NEEDBITS (bit_buf, bits, bit_ptr); - - continue; - - } else { - - /* end of block. I commented out this code because if we */ - /* dont exit here we will still exit at the later test :) */ - - /* if (i >= 128) break; */ /* end of block */ - - /* escape code */ - - i += UBITS (bit_buf << 6, 6) - 64; - if (i >= 64) - break; /* illegal, check against buffer overflow */ - - l = scan_ptable[j = scan[i]]; - - DUMPBITS (bit_buf, bits, 12); - NEEDBITS (bit_buf, bits, bit_ptr); - val = (SBITS (bit_buf, 12) * - quantizer_scale * quant_matrix[l]) / 16; - - SATURATE (val); - dest[j] = val; - mismatch ^= val; - - DUMPBITS (bit_buf, bits, 12); - NEEDBITS (bit_buf, bits, bit_ptr); - - continue; - - } - } else if (bit_buf >= 0x02000000) { - tab = DCT_B15_10 + (UBITS (bit_buf, 10) - 8); - i += tab->run; - if (i < 64) - goto normal_code; - } else if (bit_buf >= 0x00800000) { - tab = DCT_13 + (UBITS (bit_buf, 13) - 16); - i += tab->run; - if (i < 64) - goto normal_code; - } else if (bit_buf >= 0x00200000) { - tab = DCT_15 + (UBITS (bit_buf, 15) - 16); - i += tab->run; - if (i < 64) - goto normal_code; - } else { - tab = DCT_16 + UBITS (bit_buf, 16); - bit_buf <<= 16; - GETWORD (bit_buf, bits + 16, bit_ptr); - i += tab->run; - if (i < 64) - goto normal_code; - } - break; /* illegal, check needed to avoid buffer overflow */ - } - - dest[63] ^= mismatch & 1; - DUMPBITS (bit_buf, bits, 4); /* dump end of block code */ - picture->bitstream_buf = bit_buf; - picture->bitstream_bits = bits; - picture->bitstream_ptr = bit_ptr; -} - -static void get_xvmc_non_intra_block (picture_t * picture) -{ - int i; - int j; - int l; - int val; - const uint8_t * scan = picture->scan; - uint8_t * scan_ptable = mpeg2_scan_orig_ptable; - uint8_t * quant_matrix = picture->non_intra_quantizer_matrix; - int quantizer_scale = picture->quantizer_scale; - int mismatch; - const DCTtab * tab; - uint32_t bit_buf; - int bits; - uint8_t * bit_ptr; - int16_t * dest; - - i = -1; - mismatch = 1; - - dest = picture->mc->blockptr; - - if( picture->mc->xvmc_accel & IDCT_ACCEL ) { - if ( scan == mpeg2_scan_norm ) { - scan = mpeg2_scan_norm_orig; - scan_ptable = mpeg2_scan_norm_ptable; - } else { - scan = mpeg2_scan_alt_orig; - scan_ptable = mpeg2_scan_alt_ptable; - } - } - - bit_buf = picture->bitstream_buf; - bits = picture->bitstream_bits; - bit_ptr = picture->bitstream_ptr; - - NEEDBITS (bit_buf, bits, bit_ptr); - if (bit_buf >= 0x28000000) { - tab = DCT_B14DC_5 + (UBITS (bit_buf, 5) - 5); - goto entry_1; - } else - goto entry_2; - - while (1) { - if (bit_buf >= 0x28000000) { - - tab = DCT_B14AC_5 + (UBITS (bit_buf, 5) - 5); - - entry_1: - i += tab->run; - if (i >= 64) - break; /* end of block */ - - normal_code: - l = scan_ptable[j = scan[i]]; - bit_buf <<= tab->len; - bits += tab->len + 1; - val = ((2*tab->level+1) * quantizer_scale * quant_matrix[l]) >> 5; - - /* if (bitstream_get (1)) val = -val; */ - val = (val ^ SBITS (bit_buf, 1)) - SBITS (bit_buf, 1); - - SATURATE (val); - dest[j] = val; - mismatch ^= val; - - bit_buf <<= 1; - NEEDBITS (bit_buf, bits, bit_ptr); - - continue; - - } - - entry_2: - if (bit_buf >= 0x04000000) { - - tab = DCT_B14_8 + (UBITS (bit_buf, 8) - 4); - - i += tab->run; - if (i < 64) - goto normal_code; - - /* escape code */ - - i += UBITS (bit_buf << 6, 6) - 64; - if (i >= 64) - break; /* illegal, check needed to avoid buffer overflow */ - - l = scan_ptable[j = scan[i]]; - - DUMPBITS (bit_buf, bits, 12); - NEEDBITS (bit_buf, bits, bit_ptr); - val = 2 * (SBITS (bit_buf, 12) + SBITS (bit_buf, 1)) + 1; - val = (val * quantizer_scale * quant_matrix[l]) / 32; - - SATURATE (val); - dest[j] = val; - mismatch ^= val; - - DUMPBITS (bit_buf, bits, 12); - NEEDBITS (bit_buf, bits, bit_ptr); - - continue; - - } else if (bit_buf >= 0x02000000) { - tab = DCT_B14_10 + (UBITS (bit_buf, 10) - 8); - i += tab->run; - if (i < 64) - goto normal_code; - } else if (bit_buf >= 0x00800000) { - tab = DCT_13 + (UBITS (bit_buf, 13) - 16); - i += tab->run; - if (i < 64) - goto normal_code; - } else if (bit_buf >= 0x00200000) { - tab = DCT_15 + (UBITS (bit_buf, 15) - 16); - i += tab->run; - if (i < 64) - goto normal_code; - } else { - tab = DCT_16 + UBITS (bit_buf, 16); - bit_buf <<= 16; - GETWORD (bit_buf, bits + 16, bit_ptr); - i += tab->run; - if (i < 64) - goto normal_code; - } - break; /* illegal, check needed to avoid buffer overflow */ - } - dest[63] ^= mismatch & 1; - DUMPBITS (bit_buf, bits, 2); /* dump end of block code */ - picture->bitstream_buf = bit_buf; - picture->bitstream_bits = bits; - picture->bitstream_ptr = bit_ptr; -} - -static void get_xvmc_mpeg1_intra_block (picture_t * picture) -{ - int i; - int j; - int l; - int val; - const uint8_t * scan = picture->scan; - uint8_t * scan_ptable = mpeg2_scan_orig_ptable; - uint8_t * quant_matrix = picture->intra_quantizer_matrix; - int quantizer_scale = picture->quantizer_scale; - const DCTtab * tab; - uint32_t bit_buf; - int bits; - uint8_t * bit_ptr; - int16_t * dest; - - i = 0; - - dest = picture->mc->blockptr; - - if( picture->mc->xvmc_accel & IDCT_ACCEL ) { - if ( scan == mpeg2_scan_norm ) { - scan = mpeg2_scan_norm_orig; - scan_ptable = mpeg2_scan_norm_ptable; - } else { - scan = mpeg2_scan_alt_orig; - scan_ptable = mpeg2_scan_alt_ptable; - } - } - - bit_buf = picture->bitstream_buf; - bits = picture->bitstream_bits; - bit_ptr = picture->bitstream_ptr; - - NEEDBITS (bit_buf, bits, bit_ptr); - - while (1) { - if (bit_buf >= 0x28000000) { - - tab = DCT_B14AC_5 + (UBITS (bit_buf, 5) - 5); - - i += tab->run; - if (i >= 64) - break; /* end of block */ - - normal_code: - l = scan_ptable[j = scan[i]]; - bit_buf <<= tab->len; - bits += tab->len + 1; - val = (tab->level * quantizer_scale * quant_matrix[l]) >> 4; - - /* oddification */ - val = (val - 1) | 1; - - /* if (bitstream_get (1)) val = -val; */ - val = (val ^ SBITS (bit_buf, 1)) - SBITS (bit_buf, 1); - - SATURATE (val); - dest[j] = val; - - bit_buf <<= 1; - NEEDBITS (bit_buf, bits, bit_ptr); - - continue; - - } else if (bit_buf >= 0x04000000) { - - tab = DCT_B14_8 + (UBITS (bit_buf, 8) - 4); - - i += tab->run; - if (i < 64) - goto normal_code; - - /* escape code */ - - i += UBITS (bit_buf << 6, 6) - 64; - if (i >= 64) - break; /* illegal, check needed to avoid buffer overflow */ - - l = scan_ptable[j = scan[i]]; - - DUMPBITS (bit_buf, bits, 12); - NEEDBITS (bit_buf, bits, bit_ptr); - val = SBITS (bit_buf, 8); - if (! (val & 0x7f)) { - DUMPBITS (bit_buf, bits, 8); - val = UBITS (bit_buf, 8) + 2 * val; - } - val = (val * quantizer_scale * quant_matrix[l]) / 16; - - /* oddification */ - val = (val + ~SBITS (val, 1)) | 1; - - SATURATE (val); - dest[j] = val; - - DUMPBITS (bit_buf, bits, 8); - NEEDBITS (bit_buf, bits, bit_ptr); - - continue; - - } else if (bit_buf >= 0x02000000) { - tab = DCT_B14_10 + (UBITS (bit_buf, 10) - 8); - i += tab->run; - if (i < 64) - goto normal_code; - } else if (bit_buf >= 0x00800000) { - tab = DCT_13 + (UBITS (bit_buf, 13) - 16); - i += tab->run; - if (i < 64) - goto normal_code; - } else if (bit_buf >= 0x00200000) { - tab = DCT_15 + (UBITS (bit_buf, 15) - 16); - i += tab->run; - if (i < 64) - goto normal_code; - } else { - tab = DCT_16 + UBITS (bit_buf, 16); - bit_buf <<= 16; - GETWORD (bit_buf, bits + 16, bit_ptr); - i += tab->run; - if (i < 64) - goto normal_code; - } - break; /* illegal, check needed to avoid buffer overflow */ - } - DUMPBITS (bit_buf, bits, 2); /* dump end of block code */ - picture->bitstream_buf = bit_buf; - picture->bitstream_bits = bits; - picture->bitstream_ptr = bit_ptr; -} - -static void get_xvmc_mpeg1_non_intra_block (picture_t * picture) -{ - int i; - int j; - int l; - int val; - const uint8_t * scan = picture->scan; - uint8_t * scan_ptable = mpeg2_scan_orig_ptable; - uint8_t * quant_matrix = picture->non_intra_quantizer_matrix; - int quantizer_scale = picture->quantizer_scale; - const DCTtab * tab; - uint32_t bit_buf; - int bits; - uint8_t * bit_ptr; - int16_t * dest; - - i = -1; - - dest = picture->mc->blockptr; - - if( picture->mc->xvmc_accel & IDCT_ACCEL ) { - if ( scan == mpeg2_scan_norm ) { - scan = mpeg2_scan_norm_orig; - scan_ptable = mpeg2_scan_norm_ptable; - } else { - scan = mpeg2_scan_alt_orig; - scan_ptable = mpeg2_scan_alt_ptable; - } - } - - bit_buf = picture->bitstream_buf; - bits = picture->bitstream_bits; - bit_ptr = picture->bitstream_ptr; - - NEEDBITS (bit_buf, bits, bit_ptr); - if (bit_buf >= 0x28000000) { - tab = DCT_B14DC_5 + (UBITS (bit_buf, 5) - 5); - goto entry_1; - } else - goto entry_2; - - while (1) { - if (bit_buf >= 0x28000000) { - - tab = DCT_B14AC_5 + (UBITS (bit_buf, 5) - 5); - - entry_1: - i += tab->run; - if (i >= 64) - break; /* end of block */ - - normal_code: - l = scan_ptable[j = scan[i]]; - bit_buf <<= tab->len; - bits += tab->len + 1; - val = ((2*tab->level+1) * quantizer_scale * quant_matrix[l]) >> 5; - - /* oddification */ - val = (val - 1) | 1; - - /* if (bitstream_get (1)) val = -val; */ - val = (val ^ SBITS (bit_buf, 1)) - SBITS (bit_buf, 1); - - SATURATE (val); - dest[j] = val; - - bit_buf <<= 1; - NEEDBITS (bit_buf, bits, bit_ptr); - - continue; - - } - - entry_2: - if (bit_buf >= 0x04000000) { - - tab = DCT_B14_8 + (UBITS (bit_buf, 8) - 4); - - i += tab->run; - if (i < 64) - goto normal_code; - - /* escape code */ - - i += UBITS (bit_buf << 6, 6) - 64; - if (i >= 64) - break; /* illegal, check needed to avoid buffer overflow */ - - l = scan_ptable[j = scan[i]]; - - DUMPBITS (bit_buf, bits, 12); - NEEDBITS (bit_buf, bits, bit_ptr); - val = SBITS (bit_buf, 8); - if (! (val & 0x7f)) { - DUMPBITS (bit_buf, bits, 8); - val = UBITS (bit_buf, 8) + 2 * val; - } - val = 2 * (val + SBITS (val, 1)) + 1; - val = (val * quantizer_scale * quant_matrix[l]) / 32; - - /* oddification */ - val = (val + ~SBITS (val, 1)) | 1; - - SATURATE (val); - dest[j] = val; - - DUMPBITS (bit_buf, bits, 8); - NEEDBITS (bit_buf, bits, bit_ptr); - - continue; - - } else if (bit_buf >= 0x02000000) { - tab = DCT_B14_10 + (UBITS (bit_buf, 10) - 8); - i += tab->run; - if (i < 64) - goto normal_code; - } else if (bit_buf >= 0x00800000) { - tab = DCT_13 + (UBITS (bit_buf, 13) - 16); - i += tab->run; - if (i < 64) - goto normal_code; - } else if (bit_buf >= 0x00200000) { - tab = DCT_15 + (UBITS (bit_buf, 15) - 16); - i += tab->run; - if (i < 64) - goto normal_code; - } else { - tab = DCT_16 + UBITS (bit_buf, 16); - bit_buf <<= 16; - GETWORD (bit_buf, bits + 16, bit_ptr); - i += tab->run; - if (i < 64) - goto normal_code; - } - break; /* illegal, check needed to avoid buffer overflow */ - } - DUMPBITS (bit_buf, bits, 2); /* dump end of block code */ - picture->bitstream_buf = bit_buf; - picture->bitstream_bits = bits; - picture->bitstream_ptr = bit_ptr; -} - -static inline void slice_xvmc_intra_DCT (picture_t * picture, int cc, - uint8_t * dest, int stride) -{ -#define bit_buf (picture->bitstream_buf) -#define bits (picture->bitstream_bits) -#define bit_ptr (picture->bitstream_ptr) - NEEDBITS (bit_buf, bits, bit_ptr); - /* Get the intra DC coefficient and inverse quantize it */ - - // printf("slice: slice_xvmc_intra_DCT cc=%d pred[0]=%d\n",cc,picture->dc_dct_pred[0]); - if (cc == 0) - picture->dc_dct_pred[0] += get_xvmc_luma_dc_dct_diff (picture); - else - picture->dc_dct_pred[cc] += get_xvmc_chroma_dc_dct_diff (picture); - //TODO conversion to signed format - // printf("slice: pred[0]=%d presision=%d\n",picture->dc_dct_pred[0], - // picture->intra_dc_precision); - - mpeg2_zero_block(picture->mc->blockptr); - - picture->mc->blockptr[0] = picture->dc_dct_pred[cc] << (3 - picture->intra_dc_precision); - - if (picture->mpeg1) { - if (picture->picture_coding_type != D_TYPE) - get_xvmc_mpeg1_intra_block (picture); - } else if (picture->intra_vlc_format) - get_xvmc_intra_block_B15 (picture); - else - get_xvmc_intra_block_B14 (picture); - - if((picture->mc->xvmc_accel & ACCEL) == MOTION_ACCEL) { - //motion_comp only no idct acceleration so do it in software - mpeg2_idct (picture->mc->blockptr); - } - picture->mc->blockptr += 64; -#undef bit_buf -#undef bits -#undef bit_ptr -} - -static inline void slice_xvmc_non_intra_DCT (picture_t * picture, uint8_t * dest, - int stride) -{ - mpeg2_zero_block(picture->mc->blockptr); - - if (picture->mpeg1) - get_xvmc_mpeg1_non_intra_block (picture); - else - get_xvmc_non_intra_block (picture); - - if((picture->mc->xvmc_accel & ACCEL) == MOTION_ACCEL) { - // motion comp only no idct acceleration so do it in sw - mpeg2_idct (picture->mc->blockptr); - } - picture->mc->blockptr += 64; -} - -static void motion_mp1 (picture_t * picture, motion_t * motion, - void (** table) (uint8_t *, uint8_t *, int, int)) -{ -#define bit_buf (picture->bitstream_buf) -#define bits (picture->bitstream_bits) -#define bit_ptr (picture->bitstream_ptr) - int motion_x, motion_y; - - NEEDBITS (bit_buf, bits, bit_ptr); - motion_x = (motion->pmv[0][0] + - (get_xvmc_motion_delta (picture, - motion->f_code[0]) << motion->f_code[1])); - motion_x = bound_motion_vector (motion_x, - motion->f_code[0] + motion->f_code[1]); - motion->pmv[0][0] = motion_x; - - NEEDBITS (bit_buf, bits, bit_ptr); - motion_y = (motion->pmv[0][1] + - (get_xvmc_motion_delta (picture, - motion->f_code[0]) << motion->f_code[1])); - motion_y = bound_motion_vector (motion_y, - motion->f_code[0] + motion->f_code[1]); - motion->pmv[0][1] = motion_y; - -#undef bit_buf -#undef bits -#undef bit_ptr -} - -static void motion_fr_frame (picture_t * picture, motion_t * motion, - void (** table) (uint8_t *, uint8_t *, int, int)) -{ -#define bit_buf (picture->bitstream_buf) -#define bits (picture->bitstream_bits) -#define bit_ptr (picture->bitstream_ptr) - int motion_x, motion_y; - - NEEDBITS (bit_buf, bits, bit_ptr); - motion_x = motion->pmv[0][0] + get_xvmc_motion_delta (picture, - motion->f_code[0]); - motion_x = bound_motion_vector (motion_x, motion->f_code[0]); - motion->pmv[1][0] = motion->pmv[0][0] = motion_x; - - NEEDBITS (bit_buf, bits, bit_ptr); - motion_y = motion->pmv[0][1] + get_xvmc_motion_delta (picture, - motion->f_code[1]); - motion_y = bound_motion_vector (motion_y, motion->f_code[1]); - motion->pmv[1][1] = motion->pmv[0][1] = motion_y; - -#undef bit_buf -#undef bits -#undef bit_ptr -} - -static void motion_fr_field (picture_t * picture, motion_t * motion, - void (** table) (uint8_t *, uint8_t *, int, int), - int dir) -{ -#define bit_buf (picture->bitstream_buf) -#define bits (picture->bitstream_bits) -#define bit_ptr (picture->bitstream_ptr) - int motion_x, motion_y, field; - // unsigned int pos_x, pos_y, xy_half; - - NEEDBITS (bit_buf, bits, bit_ptr); - field = UBITS (bit_buf, 1); - picture->XvMC_mv_field_sel[0][dir] = field; - DUMPBITS (bit_buf, bits, 1); - - motion_x = motion->pmv[0][0] + get_xvmc_motion_delta (picture, - motion->f_code[0]); - motion_x = bound_motion_vector (motion_x, motion->f_code[0]); - motion->pmv[0][0] = motion_x; - - NEEDBITS (bit_buf, bits, bit_ptr); - motion_y = (motion->pmv[0][1] >> 1) + get_xvmc_motion_delta (picture, - motion->f_code[1]); - /* motion_y = bound_motion_vector (motion_y, motion->f_code[1]); */ - motion->pmv[0][1] = motion_y << 1; - - NEEDBITS (bit_buf, bits, bit_ptr); - field = UBITS (bit_buf, 1); - //TODO look at field select need bob (weave ok) - picture->XvMC_mv_field_sel[1][dir] = field; - DUMPBITS (bit_buf, bits, 1); - - motion_x = motion->pmv[1][0] + get_xvmc_motion_delta (picture, - motion->f_code[0]); - motion_x = bound_motion_vector (motion_x, motion->f_code[0]); - motion->pmv[1][0] = motion_x; - - NEEDBITS (bit_buf, bits, bit_ptr); - motion_y = (motion->pmv[1][1] >> 1) + get_xvmc_motion_delta (picture, - motion->f_code[1]); - /* motion_y = bound_motion_vector (motion_y, motion->f_code[1]); */ - motion->pmv[1][1] = motion_y << 1; - -#undef bit_buf -#undef bits -#undef bit_ptr -} - -static void motion_fr_dmv (picture_t * picture, motion_t * motion, - void (** table) (uint8_t *, uint8_t *, int, int)) -{ -#define bit_buf (picture->bitstream_buf) -#define bits (picture->bitstream_bits) -#define bit_ptr (picture->bitstream_ptr) - int motion_x, motion_y; - - // TODO field select ?? possible need to be 0 - picture->XvMC_mv_field_sel[0][0] = picture->XvMC_mv_field_sel[1][0] = 0; - - NEEDBITS (bit_buf, bits, bit_ptr); - motion_x = motion->pmv[0][0] + get_xvmc_motion_delta (picture, - motion->f_code[0]); - motion_x = bound_motion_vector (motion_x, motion->f_code[0]); - motion->pmv[1][0] = motion->pmv[0][0] = motion_x; - NEEDBITS (bit_buf, bits, bit_ptr); - - motion_y = (motion->pmv[0][1] >> 1) + get_xvmc_motion_delta (picture, - motion->f_code[1]); - /* motion_y = bound_motion_vector (motion_y, motion->f_code[1]); */ - motion->pmv[1][1] = motion->pmv[0][1] = motion_y << 1; - -#undef bit_buf -#undef bits -#undef bit_ptr -} - -static void motion_reuse (picture_t * picture, motion_t * motion, - void (** table) (uint8_t *, uint8_t *, int, int)) -{ - int motion_x, motion_y; - - motion_x = motion->pmv[0][0]; - motion_y = motion->pmv[0][1]; - -} - -/* like motion_frame, but parsing without actual motion compensation */ -static void motion_fr_conceal (picture_t * picture) -{ -#define bit_buf (picture->bitstream_buf) -#define bits (picture->bitstream_bits) -#define bit_ptr (picture->bitstream_ptr) - int tmp; - - NEEDBITS (bit_buf, bits, bit_ptr); - tmp = (picture->f_motion.pmv[0][0] + - get_xvmc_motion_delta (picture, picture->f_motion.f_code[0])); - tmp = bound_motion_vector (tmp, picture->f_motion.f_code[0]); - picture->f_motion.pmv[1][0] = picture->f_motion.pmv[0][0] = tmp; - - NEEDBITS (bit_buf, bits, bit_ptr); - tmp = (picture->f_motion.pmv[0][1] + - get_xvmc_motion_delta (picture, picture->f_motion.f_code[1])); - tmp = bound_motion_vector (tmp, picture->f_motion.f_code[1]); - picture->f_motion.pmv[1][1] = picture->f_motion.pmv[0][1] = tmp; - - DUMPBITS (bit_buf, bits, 1); /* remove marker_bit */ -#undef bit_buf -#undef bits -#undef bit_ptr -} - -static void motion_fi_field (picture_t * picture, motion_t * motion, - void (** table) (uint8_t *, uint8_t *, int, int)) -{ -#define bit_buf (picture->bitstream_buf) -#define bits (picture->bitstream_bits) -#define bit_ptr (picture->bitstream_ptr) - int motion_x, motion_y; - uint8_t ** ref_field; - - NEEDBITS (bit_buf, bits, bit_ptr); - ref_field = motion->ref2[UBITS (bit_buf, 1)]; - - // TODO field select may need to do something here for bob (weave ok) - picture->XvMC_mv_field_sel[0][0] = picture->XvMC_mv_field_sel[1][0] = 0; - - DUMPBITS (bit_buf, bits, 1); - - motion_x = motion->pmv[0][0] + get_xvmc_motion_delta (picture, - motion->f_code[0]); - motion_x = bound_motion_vector (motion_x, motion->f_code[0]); - motion->pmv[1][0] = motion->pmv[0][0] = motion_x; - - NEEDBITS (bit_buf, bits, bit_ptr); - motion_y = motion->pmv[0][1] + get_xvmc_motion_delta (picture, - motion->f_code[1]); - motion_y = bound_motion_vector (motion_y, motion->f_code[1]); - motion->pmv[1][1] = motion->pmv[0][1] = motion_y; - -#undef bit_buf -#undef bits -#undef bit_ptr -} - -static void motion_fi_16x8 (picture_t * picture, motion_t * motion, - void (** table) (uint8_t *, uint8_t *, int, int)) -{ -#define bit_buf (picture->bitstream_buf) -#define bits (picture->bitstream_bits) -#define bit_ptr (picture->bitstream_ptr) - int motion_x, motion_y; - uint8_t ** ref_field; - - NEEDBITS (bit_buf, bits, bit_ptr); - ref_field = motion->ref2[UBITS (bit_buf, 1)]; - - // TODO field select may need to do something here bob (weave ok) - picture->XvMC_mv_field_sel[0][0] = picture->XvMC_mv_field_sel[1][0] = 0; - - DUMPBITS (bit_buf, bits, 1); - - motion_x = motion->pmv[0][0] + get_xvmc_motion_delta (picture, - motion->f_code[0]); - motion_x = bound_motion_vector (motion_x, motion->f_code[0]); - motion->pmv[0][0] = motion_x; - - NEEDBITS (bit_buf, bits, bit_ptr); - motion_y = motion->pmv[0][1] + get_xvmc_motion_delta (picture, - motion->f_code[1]); - motion_y = bound_motion_vector (motion_y, motion->f_code[1]); - motion->pmv[0][1] = motion_y; - - - NEEDBITS (bit_buf, bits, bit_ptr); - ref_field = motion->ref2[UBITS (bit_buf, 1)]; - - // TODO field select may need to do something here for bob (weave ok) - picture->XvMC_mv_field_sel[0][0] = picture->XvMC_mv_field_sel[1][0] = 0; - - DUMPBITS (bit_buf, bits, 1); - - motion_x = motion->pmv[1][0] + get_xvmc_motion_delta (picture, - motion->f_code[0]); - motion_x = bound_motion_vector (motion_x, motion->f_code[0]); - motion->pmv[1][0] = motion_x; - - NEEDBITS (bit_buf, bits, bit_ptr); - motion_y = motion->pmv[1][1] + get_xvmc_motion_delta (picture, - motion->f_code[1]); - motion_y = bound_motion_vector (motion_y, motion->f_code[1]); - motion->pmv[1][1] = motion_y; - -#undef bit_buf -#undef bits -#undef bit_ptr -} - -static void motion_fi_dmv (picture_t * picture, motion_t * motion, - void (** table) (uint8_t *, uint8_t *, int, int)) -{ -#define bit_buf (picture->bitstream_buf) -#define bits (picture->bitstream_bits) -#define bit_ptr (picture->bitstream_ptr) - int motion_x, motion_y; - - NEEDBITS (bit_buf, bits, bit_ptr); - motion_x = motion->pmv[0][0] + get_xvmc_motion_delta (picture, - motion->f_code[0]); - motion_x = bound_motion_vector (motion_x, motion->f_code[0]); - motion->pmv[1][0] = motion->pmv[0][0] = motion_x; - NEEDBITS (bit_buf, bits, bit_ptr); - - motion_y = motion->pmv[0][1] + get_xvmc_motion_delta (picture, - motion->f_code[1]); - motion_y = bound_motion_vector (motion_y, motion->f_code[1]); - motion->pmv[1][1] = motion->pmv[0][1] = motion_y; - - // TODO field select may need to do something here for bob (weave ok) - picture->XvMC_mv_field_sel[0][0] = picture->XvMC_mv_field_sel[1][0] = 0; - -#undef bit_buf -#undef bits -#undef bit_ptr -} - - -static void motion_fi_conceal (picture_t * picture) -{ -#define bit_buf (picture->bitstream_buf) -#define bits (picture->bitstream_bits) -#define bit_ptr (picture->bitstream_ptr) - int tmp; - - NEEDBITS (bit_buf, bits, bit_ptr); - DUMPBITS (bit_buf, bits, 1); /* remove field_select */ - - tmp = (picture->f_motion.pmv[0][0] + - get_xvmc_motion_delta (picture, picture->f_motion.f_code[0])); - tmp = bound_motion_vector (tmp, picture->f_motion.f_code[0]); - picture->f_motion.pmv[1][0] = picture->f_motion.pmv[0][0] = tmp; - - NEEDBITS (bit_buf, bits, bit_ptr); - tmp = (picture->f_motion.pmv[0][1] + - get_xvmc_motion_delta (picture, picture->f_motion.f_code[1])); - tmp = bound_motion_vector (tmp, picture->f_motion.f_code[1]); - picture->f_motion.pmv[1][1] = picture->f_motion.pmv[0][1] = tmp; - - DUMPBITS (bit_buf, bits, 1); /* remove marker_bit */ -#undef bit_buf -#undef bits -#undef bit_ptr -} - -#define MOTION_CALL(routine,direction) \ -do { \ - if ((direction) & MACROBLOCK_MOTION_FORWARD) \ - routine (picture, &(picture->f_motion), mpeg2_mc.put); \ - if ((direction) & MACROBLOCK_MOTION_BACKWARD) \ - routine (picture, &(picture->b_motion), \ - ((direction) & MACROBLOCK_MOTION_FORWARD ? \ - mpeg2_mc.avg : mpeg2_mc.put)); \ -} while (0) - -#define NEXT_MACROBLOCK \ -do { \ - picture->offset += 16; \ - if (picture->offset == picture->coded_picture_width) { \ - do { /* just so we can use the break statement */ \ - if (picture->current_frame->proc_slice) { \ - picture->current_frame->proc_slice (picture->current_frame, \ - picture->dest); \ - if (picture->picture_coding_type == B_TYPE) \ - break; \ - } \ - picture->dest[0] += 16 * picture->pitches[0]; \ - picture->dest[1] += 8 * picture->pitches[1]; \ - picture->dest[2] += 8 * picture->pitches[2]; \ - } while (0); \ - picture->v_offset += 16; \ - if (picture->v_offset > picture->limit_y) { \ - if (mpeg2_cpu_state_restore) \ - mpeg2_cpu_state_restore (&cpu_state); \ - return; \ - } \ - picture->offset = 0; \ - } \ -} while (0) - -static inline int slice_xvmc_init (picture_t * picture, int code) -{ -#define bit_buf (picture->bitstream_buf) -#define bits (picture->bitstream_bits) -#define bit_ptr (picture->bitstream_ptr) - int offset, height; - struct vo_frame_s * forward_reference_frame; - struct vo_frame_s * backward_reference_frame; - const MBAtab * mba; - - offset = picture->picture_structure == BOTTOM_FIELD; - picture->pitches[0] = picture->current_frame->pitches[0]; - picture->pitches[1] = picture->current_frame->pitches[1]; - picture->pitches[2] = picture->current_frame->pitches[2]; - - if( picture->forward_reference_frame ) { - forward_reference_frame = picture->forward_reference_frame; - } - else { - /* return 1; */ - forward_reference_frame = picture->current_frame; - } - - if( picture->backward_reference_frame ) { - backward_reference_frame = picture->backward_reference_frame; - } - else { - /* return 1; */ - backward_reference_frame = picture->current_frame; - } - - picture->f_motion.ref[0][0] = - forward_reference_frame->base[0] + (offset ? picture->pitches[0] : 0); - picture->f_motion.ref[0][1] = - forward_reference_frame->base[1] + (offset ? picture->pitches[1] : 0); - picture->f_motion.ref[0][2] = - forward_reference_frame->base[2] + (offset ? picture->pitches[2] : 0); - - picture->b_motion.ref[0][0] = - backward_reference_frame->base[0] + (offset ? picture->pitches[0] : 0); - picture->b_motion.ref[0][1] = - backward_reference_frame->base[1] + (offset ? picture->pitches[1] : 0); - picture->b_motion.ref[0][2] = - backward_reference_frame->base[2] + (offset ? picture->pitches[2] : 0); - - if (picture->picture_structure != FRAME_PICTURE) { - uint8_t ** forward_ref; - int bottom_field; - - bottom_field = (picture->picture_structure == BOTTOM_FIELD); - picture->dmv_offset = bottom_field ? 1 : -1; - picture->f_motion.ref2[0] = picture->f_motion.ref[bottom_field]; - picture->f_motion.ref2[1] = picture->f_motion.ref[!bottom_field]; - picture->b_motion.ref2[0] = picture->b_motion.ref[bottom_field]; - picture->b_motion.ref2[1] = picture->b_motion.ref[!bottom_field]; - - forward_ref = forward_reference_frame->base; - if (picture->second_field && (picture->picture_coding_type != B_TYPE)) - forward_ref = picture->current_frame->base; - - picture->f_motion.ref[1][0] = forward_ref[0] + (bottom_field ? 0 : picture->pitches[0]); - picture->f_motion.ref[1][1] = forward_ref[1] + (bottom_field ? 0 : picture->pitches[1]); - picture->f_motion.ref[1][2] = forward_ref[2] + (bottom_field ? 0 : picture->pitches[2]); - - picture->b_motion.ref[1][0] = - backward_reference_frame->base[0] + (bottom_field ? 0 : picture->pitches[0]); - picture->b_motion.ref[1][1] = - backward_reference_frame->base[1] + (bottom_field ? 0 : picture->pitches[1]); - picture->b_motion.ref[1][2] = - backward_reference_frame->base[2] + (bottom_field ? 0 : picture->pitches[2]); - } - - picture->f_motion.pmv[0][0] = picture->f_motion.pmv[0][1] = 0; - picture->f_motion.pmv[1][0] = picture->f_motion.pmv[1][1] = 0; - picture->b_motion.pmv[0][0] = picture->b_motion.pmv[0][1] = 0; - picture->b_motion.pmv[1][0] = picture->b_motion.pmv[1][1] = 0; - - picture->v_offset = (code - 1) * 16; - offset = (code - 1); - if (picture->current_frame->proc_slice && picture->picture_coding_type == B_TYPE) - offset = 0; - else if (picture->picture_structure != FRAME_PICTURE) - offset = 2 * offset; - - picture->dest[0] = picture->current_frame->base[0] + picture->pitches[0] * offset * 16; - picture->dest[1] = picture->current_frame->base[1] + picture->pitches[1] * offset * 8; - picture->dest[2] = picture->current_frame->base[2] + picture->pitches[2] * offset * 8; - - height = picture->coded_picture_height; - switch (picture->picture_structure) { - case BOTTOM_FIELD: - picture->dest[0] += picture->pitches[0]; - picture->dest[1] += picture->pitches[1]; - picture->dest[2] += picture->pitches[2]; - /* follow thru */ - case TOP_FIELD: - picture->pitches[0] <<= 1; - picture->pitches[1] <<= 1; - picture->pitches[2] <<= 1; - height >>= 1; - } - picture->limit_x = 2 * picture->coded_picture_width - 32; - picture->limit_y_16 = 2 * height - 32; - picture->limit_y_8 = 2 * height - 16; - picture->limit_y = height - 16; - - //TODO conversion to signed format signed format - if((picture->mc->xvmc_accel & ACCEL) == MOTION_ACCEL && - !(picture->mc->xvmc_accel & SIGNED_INTRA)) { - //Motion Comp only unsigned intra - // original: - picture->dc_dct_pred[0] = picture->dc_dct_pred[1] = - picture->dc_dct_pred[2] = 1 << (picture->intra_dc_precision + 7); - } else { - //Motion Comp only signed intra MOTION_ACCEL+SIGNED_INTRA - picture->dc_dct_pred[0] = picture->dc_dct_pred[1] = - picture->dc_dct_pred[2] = 0; - } - - picture->quantizer_scale = get_xvmc_quantizer_scale (picture); - - /* ignore intra_slice and all the extra data */ - while (bit_buf & 0x80000000) { - DUMPBITS (bit_buf, bits, 9); - NEEDBITS (bit_buf, bits, bit_ptr); - } - - /* decode initial macroblock address increment */ - offset = 0; - while (1) { - if (bit_buf >= 0x08000000) { - mba = MBA_5 + (UBITS (bit_buf, 6) - 2); - break; - } else if (bit_buf >= 0x01800000) { - mba = MBA_11 + (UBITS (bit_buf, 12) - 24); - break; - } else switch (UBITS (bit_buf, 12)) { - case 8: /* macroblock_escape */ - offset += 33; - DUMPBITS (bit_buf, bits, 11); - NEEDBITS (bit_buf, bits, bit_ptr); - continue; - case 15: /* macroblock_stuffing (MPEG1 only) */ - bit_buf &= 0xfffff; - DUMPBITS (bit_buf, bits, 11); - NEEDBITS (bit_buf, bits, bit_ptr); - continue; - default: /* error */ - return 1; - } - } - DUMPBITS (bit_buf, bits, mba->len + 1); - picture->offset = (offset + mba->mba) << 4; - - while (picture->offset - picture->coded_picture_width >= 0) { - picture->offset -= picture->coded_picture_width; - if ((picture->current_frame->proc_slice == NULL) || - (picture->picture_coding_type != B_TYPE)) { - picture->dest[0] += 16 * picture->pitches[0]; - picture->dest[1] += 8 * picture->pitches[1]; - picture->dest[2] += 8 * picture->pitches[2]; - } - picture->v_offset += 16; - } - if (picture->v_offset > picture->limit_y) - return 1; - - return 0; -#undef bit_buf -#undef bits -#undef bit_ptr -} - -void mpeg2_xvmc_slice (mpeg2dec_accel_t *accel, picture_t * picture, int code, uint8_t * buffer) -{ -#define bit_buf (picture->bitstream_buf) -#define bits (picture->bitstream_bits) -#define bit_ptr (picture->bitstream_ptr) - cpu_state_t cpu_state; - xine_xvmc_t *xvmc = (xine_xvmc_t *) picture->current_frame->accel_data; - - if (1 == code) { - accel->xvmc_last_slice_code = 0; - } - if ((code != accel->xvmc_last_slice_code + 1) && - (code != accel->xvmc_last_slice_code)) - return; - - bitstream_init (picture, buffer); - - if (slice_xvmc_init (picture, code)) - return; - - if (mpeg2_cpu_state_save) - mpeg2_cpu_state_save (&cpu_state); - - while (1) { - int macroblock_modes; - int mba_inc; - const MBAtab * mba; - - NEEDBITS (bit_buf, bits, bit_ptr); - - macroblock_modes = get_xvmc_macroblock_modes (picture); //macroblock_modes() - picture->XvMC_mb_type = macroblock_modes & 0x1F; - picture->XvMC_dct_type = (macroblock_modes & DCT_TYPE_INTERLACED)>>5; - picture->XvMC_motion_type = (macroblock_modes & MOTION_TYPE_MASK)>>6; - - picture->XvMC_x = picture->offset/16; - picture->XvMC_y = picture->v_offset/16; - - if((picture->XvMC_x == 0) && (picture->XvMC_y == 0)) { - picture->XvMC_mv_field_sel[0][0] = - picture->XvMC_mv_field_sel[1][0] = - picture->XvMC_mv_field_sel[0][1] = - picture->XvMC_mv_field_sel[1][1] = 0; - } - - picture->XvMC_cbp = 0x3f; //TODO set for intra 4:2:0 6 blocks yyyyuv all enabled - - /* maybe integrate MACROBLOCK_QUANT test into get_xvmc_macroblock_modes ? */ - if (macroblock_modes & MACROBLOCK_QUANT) - picture->quantizer_scale = get_xvmc_quantizer_scale (picture); - if (macroblock_modes & MACROBLOCK_INTRA) { - - int DCT_offset, DCT_stride; - int offset; - uint8_t * dest_y; - - if (picture->concealment_motion_vectors) { - if (picture->picture_structure == FRAME_PICTURE) - motion_fr_conceal (picture); - else - motion_fi_conceal (picture); - } else { - picture->f_motion.pmv[0][0] = picture->f_motion.pmv[0][1] = 0; - picture->f_motion.pmv[1][0] = picture->f_motion.pmv[1][1] = 0; - picture->b_motion.pmv[0][0] = picture->b_motion.pmv[0][1] = 0; - picture->b_motion.pmv[1][0] = picture->b_motion.pmv[1][1] = 0; - } - - if (macroblock_modes & DCT_TYPE_INTERLACED) { - DCT_offset = picture->pitches[0]; - DCT_stride = picture->pitches[0] * 2; - } else { - DCT_offset = picture->pitches[0] * 8; - DCT_stride = picture->pitches[0]; - } - offset = picture->offset; - dest_y = picture->dest[0] + offset; - // unravaled loop of 6 block(i) calls in macroblock() - slice_xvmc_intra_DCT (picture, 0, dest_y, DCT_stride); - slice_xvmc_intra_DCT (picture, 0, dest_y + 8, DCT_stride); - slice_xvmc_intra_DCT (picture, 0, dest_y + DCT_offset, DCT_stride); - slice_xvmc_intra_DCT (picture, 0, dest_y + DCT_offset + 8, DCT_stride); - slice_xvmc_intra_DCT (picture, 1, picture->dest[1] + (offset >> 1), - picture->pitches[1]); - slice_xvmc_intra_DCT (picture, 2, picture->dest[2] + (offset >> 1), - picture->pitches[2]); - - if (picture->picture_coding_type == D_TYPE) { - NEEDBITS (bit_buf, bits, bit_ptr); - DUMPBITS (bit_buf, bits, 1); - } - } else { - picture->XvMC_cbp = 0; - - if (picture->picture_structure == FRAME_PICTURE) - switch (macroblock_modes & MOTION_TYPE_MASK) { - case MC_FRAME: - if (picture->mpeg1) { - MOTION_CALL (motion_mp1, macroblock_modes); - } else { - MOTION_CALL (motion_fr_frame, macroblock_modes); - } - break; - - case MC_FIELD: - //MOTION_CALL (motion_fr_field, macroblock_modes); - - if ((macroblock_modes) & MACROBLOCK_MOTION_FORWARD) - motion_fr_field(picture, &(picture->f_motion), - mpeg2_mc.put,0); - if ((macroblock_modes) & MACROBLOCK_MOTION_BACKWARD) - motion_fr_field(picture, &(picture->b_motion), - ((macroblock_modes) & MACROBLOCK_MOTION_FORWARD ? - mpeg2_mc.avg : mpeg2_mc.put),1); - - break; - - case MC_DMV: - MOTION_CALL (motion_fr_dmv, MACROBLOCK_MOTION_FORWARD); - break; - - case 0: - /* non-intra mb without forward mv in a P picture */ - picture->f_motion.pmv[0][0] = 0; - picture->f_motion.pmv[0][1] = 0; - picture->f_motion.pmv[1][0] = 0; - picture->f_motion.pmv[1][1] = 0; - // MOTION_CALL (motion_zero, MACROBLOCK_MOTION_FORWARD); - break; - } - else - switch (macroblock_modes & MOTION_TYPE_MASK) { - case MC_FIELD: - MOTION_CALL (motion_fi_field, macroblock_modes); - break; - - case MC_16X8: - MOTION_CALL (motion_fi_16x8, macroblock_modes); - break; - - case MC_DMV: - MOTION_CALL (motion_fi_dmv, MACROBLOCK_MOTION_FORWARD); - break; - - case 0: - /* non-intra mb without forward mv in a P picture */ - picture->f_motion.pmv[0][0] = 0; - picture->f_motion.pmv[0][1] = 0; - picture->f_motion.pmv[1][0] = 0; - picture->f_motion.pmv[1][1] = 0; - // MOTION_CALL (motion_zero, MACROBLOCK_MOTION_FORWARD); - break; - } - - if (macroblock_modes & MACROBLOCK_PATTERN) { - int coded_block_pattern; - int DCT_offset, DCT_stride; - int offset; - uint8_t * dest_y; - - if (macroblock_modes & DCT_TYPE_INTERLACED) { - DCT_offset = picture->pitches[0]; - DCT_stride = picture->pitches[0] * 2; - } else { - DCT_offset = picture->pitches[0] * 8; - DCT_stride = picture->pitches[0]; - } - - picture->XvMC_cbp = coded_block_pattern = get_xvmc_coded_block_pattern (picture); - offset = picture->offset; - dest_y = picture->dest[0] + offset; - // TODO optimize not fully used for idct accel only mc. - if (coded_block_pattern & 0x20) - slice_xvmc_non_intra_DCT (picture, dest_y, DCT_stride); // cc0 luma 0 - if (coded_block_pattern & 0x10) - slice_xvmc_non_intra_DCT (picture, dest_y + 8, DCT_stride); // cc0 luma 1 - if (coded_block_pattern & 0x08) - slice_xvmc_non_intra_DCT (picture, dest_y + DCT_offset, - DCT_stride); // cc0 luma 2 - if (coded_block_pattern & 0x04) - slice_xvmc_non_intra_DCT (picture, dest_y + DCT_offset + 8, - DCT_stride); // cc0 luma 3 - if (coded_block_pattern & 0x2) - slice_xvmc_non_intra_DCT (picture, - picture->dest[1] + (offset >> 1), - picture->pitches[1]); // cc1 croma - if (coded_block_pattern & 0x1) - slice_xvmc_non_intra_DCT (picture, - picture->dest[2] + (offset >> 1), - picture->pitches[2]); // cc2 croma - } - - if((picture->mc->xvmc_accel & ACCEL) == MOTION_ACCEL && - !(picture->mc->xvmc_accel & SIGNED_INTRA)) { - // original: - picture->dc_dct_pred[0] = picture->dc_dct_pred[1] = - picture->dc_dct_pred[2] = 128 << picture->intra_dc_precision; - - } else { // MOTION_ACCEL+SIGNED_INTRA - picture->dc_dct_pred[0] = picture->dc_dct_pred[1] = - picture->dc_dct_pred[2] = 0; - } - - } - xvmc->proc_macro_block(picture->XvMC_x, picture->XvMC_y, - picture->XvMC_mb_type, - picture->XvMC_motion_type, - picture->XvMC_mv_field_sel, - picture->XvMC_dmvector, - picture->XvMC_cbp, - picture->XvMC_dct_type, - picture->current_frame, - picture->forward_reference_frame, - picture->backward_reference_frame, - picture->picture_structure, - picture->second_field, - picture->f_motion.pmv, - picture->b_motion.pmv); - - - NEXT_MACROBLOCK; - - NEEDBITS (bit_buf, bits, bit_ptr); - mba_inc = 0; - while (1) { - if (bit_buf >= 0x10000000) { - mba = MBA_5 + (UBITS (bit_buf, 5) - 2); - break; - } else if (bit_buf >= 0x03000000) { - mba = MBA_11 + (UBITS (bit_buf, 11) - 24); - break; - } else switch (UBITS (bit_buf, 11)) { - case 8: /* macroblock_escape */ - mba_inc += 33; - /* pass through */ - case 15: /* macroblock_stuffing (MPEG1 only) */ - DUMPBITS (bit_buf, bits, 11); - NEEDBITS (bit_buf, bits, bit_ptr); - continue; - default: /* end of slice, or error */ - if (mpeg2_cpu_state_restore) - mpeg2_cpu_state_restore (&cpu_state); - accel->xvmc_last_slice_code = code; - return; - } - } - DUMPBITS (bit_buf, bits, mba->len); - mba_inc += mba->mba; - if (mba_inc) { - //TODO conversion to signed format signed format - if((picture->mc->xvmc_accel & ACCEL) == MOTION_ACCEL && - !(picture->mc->xvmc_accel & SIGNED_INTRA)) { - // original: - picture->dc_dct_pred[0] = picture->dc_dct_pred[1] = - picture->dc_dct_pred[2] = 128 << picture->intra_dc_precision; - } else { // MOTION_ACCEL+SIGNED_INTRA - picture->dc_dct_pred[0] = picture->dc_dct_pred[1] = - picture->dc_dct_pred[2] = 0; - } - - picture->XvMC_cbp = 0; - if (picture->picture_coding_type == P_TYPE) { - picture->f_motion.pmv[0][0] = picture->f_motion.pmv[0][1] = 0; - picture->f_motion.pmv[1][0] = picture->f_motion.pmv[1][1] = 0; - - do { - if(picture->mc->xvmc_accel) { - - /* derive motion_type */ - if(picture->picture_structure == FRAME_PICTURE) { - picture->XvMC_motion_type = XINE_MC_FRAME; - } else { - picture->XvMC_motion_type = XINE_MC_FIELD; - /* predict from field of same parity */ - picture->XvMC_mv_field_sel[0][0] = - picture->XvMC_mv_field_sel[0][1] = - (picture->picture_structure==BOTTOM_FIELD); - } - picture->XvMC_mb_type = macroblock_modes & 0x1E; - picture->XvMC_x = picture->offset/16; - picture->XvMC_y = picture->v_offset/16; - - xvmc->proc_macro_block(picture->XvMC_x,picture->XvMC_y, - picture->XvMC_mb_type, - picture->XvMC_motion_type, - picture->XvMC_mv_field_sel, - picture->XvMC_dmvector, - picture->XvMC_cbp, - picture->XvMC_dct_type, - picture->current_frame, - picture->forward_reference_frame, - picture->backward_reference_frame, - picture->picture_structure, - picture->second_field, - picture->f_motion.pmv, - picture->b_motion.pmv); - } else { - // MOTION_CALL (motion_zero, MACROBLOCK_MOTION_FORWARD); - } - NEXT_MACROBLOCK; - } while (--mba_inc); - } else { - do { - if(picture->mc->xvmc_accel) { - - /* derive motion_type */ - if(picture->picture_structure == FRAME_PICTURE) { - picture->XvMC_motion_type = XINE_MC_FRAME; - } else { - picture->XvMC_motion_type = XINE_MC_FIELD; - /* predict from field of same parity */ - picture->XvMC_mv_field_sel[0][0] = - picture->XvMC_mv_field_sel[0][1] = - (picture->picture_structure==BOTTOM_FIELD); - } - - picture->XvMC_mb_type = macroblock_modes & 0x1E; - picture->XvMC_x = picture->offset/16; - picture->XvMC_y = picture->v_offset/16; - - xvmc->proc_macro_block(picture->XvMC_x,picture->XvMC_y, - picture->XvMC_mb_type, - picture->XvMC_motion_type, - picture->XvMC_mv_field_sel, - picture->XvMC_dmvector, - picture->XvMC_cbp, - picture->XvMC_dct_type, - picture->current_frame, - picture->forward_reference_frame, - picture->backward_reference_frame, - picture->picture_structure, - picture->second_field, - picture->f_motion.pmv, - picture->b_motion.pmv); - } else { - MOTION_CALL (motion_reuse, macroblock_modes); - } - NEXT_MACROBLOCK; - } while (--mba_inc); - } - } - } - accel->xvmc_last_slice_code = code; -#undef bit_buf -#undef bits -#undef bit_ptr -} - diff --git a/src/libmpeg2/slice_xvmc_vld.c b/src/libmpeg2/slice_xvmc_vld.c deleted file mode 100644 index 3606cf66b..000000000 --- a/src/libmpeg2/slice_xvmc_vld.c +++ /dev/null @@ -1,225 +0,0 @@ -/* - * Copyright (c) 2004 The Unichrome project. All rights reserved. - * - * This program is free software; you can redistribute it and/or modify it under - * the terms of the GNU General Public License as published by the Free Software - * Foundation; either version 2, or (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, but WITHOUT - * ANY WARRANTIES OR REPRESENTATIONS; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. - * See the GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License along with - * this program; if not, write to the Free Software Foundation, Inc., - * 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. - * - * - */ - -#include -#include -#include "mpeg2.h" -#include "mpeg2_internal.h" -#include "xvmc_vld.h" - -static const uint8_t zig_zag_scan[64] ATTR_ALIGN(16) = -{ - /* Zig-Zag scan pattern */ - 0, 1, 8,16, 9, 2, 3,10, - 17,24,32,25,18,11, 4, 5, - 12,19,26,33,40,48,41,34, - 27,20,13, 6, 7,14,21,28, - 35,42,49,56,57,50,43,36, - 29,22,15,23,30,37,44,51, - 58,59,52,45,38,31,39,46, - 53,60,61,54,47,55,62,63 -}; - -static const uint8_t alternate_scan [64] ATTR_ALIGN(16) = -{ - /* Alternate scan pattern */ - 0,8,16,24,1,9,2,10,17,25,32,40,48,56,57,49, - 41,33,26,18,3,11,4,12,19,27,34,42,50,58,35,43, - 51,59,20,28,5,13,6,14,21,29,36,44,52,60,37,45, - 53,61,22,30,7,15,23,31,38,46,54,62,39,47,55,63 -}; - -void mpeg2_xxmc_slice( mpeg2dec_accel_t *accel, picture_t *picture, - int code, uint8_t *buffer, uint32_t chunk_size, - uint8_t *chunk_buffer) - -{ - vo_frame_t - *frame = picture->current_frame; - xine_xxmc_t - *xxmc = (xine_xxmc_t *) frame->accel_data; - xine_vld_frame_t - *vft = &xxmc->vld_frame; - unsigned - mb_frame_height; - int - i; - const uint8_t * - scan_pattern; - float - ms_per_slice; - - if (1 == code && accel->xvmc_last_slice_code != 1) { - frame->bad_frame = 1; - accel->slices_per_row = 1; - accel->row_slice_count = 1; - - /* - * Check that first field went through OK. Otherwise, - * indicate bad frame. - */ - - if (picture->second_field) { - accel->xvmc_last_slice_code = (xxmc->decoded) ? 0 : -1; - xxmc->decoded = 0; - } else { - accel->xvmc_last_slice_code = 0; - } - - mb_frame_height = - (!(picture->mpeg1) && (picture->progressive_sequence)) ? - 2*((picture->coded_picture_height+31) >> 5) : - (picture->coded_picture_height+15) >> 4; - accel->xxmc_mb_pic_height = (picture->picture_structure == FRAME_PICTURE ) ? - mb_frame_height : mb_frame_height >> 1; - - ms_per_slice = 1000. / (90000. * mb_frame_height) * frame->duration; - xxmc->sleep = 1. / (ms_per_slice * 0.45); - if (xxmc->sleep < 1.) xxmc->sleep = 1.; - - if (picture->mpeg1) { - vft->mv_ranges[0][0] = picture->b_motion.f_code[0]; - vft->mv_ranges[0][1] = picture->b_motion.f_code[0]; - vft->mv_ranges[1][0] = picture->f_motion.f_code[0]; - vft->mv_ranges[1][1] = picture->f_motion.f_code[0]; - } else { - vft->mv_ranges[0][0] = picture->b_motion.f_code[0]; - vft->mv_ranges[0][1] = picture->b_motion.f_code[1]; - vft->mv_ranges[1][0] = picture->f_motion.f_code[0]; - vft->mv_ranges[1][1] = picture->f_motion.f_code[1]; - } - - vft->picture_structure = picture->picture_structure; - vft->picture_coding_type = picture->picture_coding_type; - vft->mpeg_coding = (picture->mpeg1) ? 0 : 1; - vft->progressive_sequence = picture->progressive_sequence; - vft->scan = (picture->scan == mpeg2_scan_alt); - vft->pred_dct_frame = picture->frame_pred_frame_dct; - vft->concealment_motion_vectors = - picture->concealment_motion_vectors; - vft->q_scale_type = picture->q_scale_type; - vft->intra_vlc_format = picture->intra_vlc_format; - vft->intra_dc_precision = picture->intra_dc_precision; - vft->second_field = picture->second_field; - - /* - * Translation of libmpeg2's Q-matrix layout to VLD XvMC's. - * Errors here will give - * blocky artifacts and sometimes wrong colors. - */ - - scan_pattern = (vft->scan) ? alternate_scan : zig_zag_scan; - - if ((vft->load_intra_quantizer_matrix = picture->load_intra_quantizer_matrix)) { - for (i=0; i<64; ++i) { - vft->intra_quantizer_matrix[scan_pattern[i]] = - picture->intra_quantizer_matrix[picture->scan[i]]; - } - } - - if ((vft->load_non_intra_quantizer_matrix = picture->load_non_intra_quantizer_matrix)) { - for (i=0; i<64; ++i) { - vft->non_intra_quantizer_matrix[scan_pattern[i]] = - picture->non_intra_quantizer_matrix[picture->scan[i]]; - } - } - - picture->load_intra_quantizer_matrix = 0; - picture->load_non_intra_quantizer_matrix = 0; - vft->forward_reference_frame = picture->forward_reference_frame; - vft->backward_reference_frame = picture->backward_reference_frame; - xxmc->proc_xxmc_begin( frame ); - if (xxmc->result != 0) { - accel->xvmc_last_slice_code=-1; - } - } - - if (((code == accel->xvmc_last_slice_code + 1) || - (code == accel->xvmc_last_slice_code))) { - - /* - * Send this slice to the output plugin. May stall for a long - * time in proc_slice; - */ - - frame->bad_frame = 1; - xxmc->slice_data_size = chunk_size; - xxmc->slice_data = chunk_buffer; - xxmc->slice_code = code; - - xxmc->proc_xxmc_slice( frame ); - - if (xxmc->result != 0) { - accel->xvmc_last_slice_code=-1; - return; - } - /* - * Keep track of slices. - */ - - accel->row_slice_count = (accel->xvmc_last_slice_code == code) ? - accel->row_slice_count + 1 : 1; - accel->slices_per_row = (accel->row_slice_count > accel->slices_per_row) ? - accel->row_slice_count:accel->slices_per_row; - accel->xvmc_last_slice_code = code; - - } else { - - /* - * An error has occured. - */ - - lprintf("libmpeg2: VLD XvMC: Slice error.\n"); - accel->xvmc_last_slice_code = -1; - return; - } -} - -void mpeg2_xxmc_vld_frame_complete(mpeg2dec_accel_t *accel, picture_t *picture, int code) -{ - vo_frame_t - *frame = picture->current_frame; - xine_xxmc_t - *xxmc = (xine_xxmc_t *) frame->accel_data; - - if (xxmc->decoded) return; - if (accel->xvmc_last_slice_code == -1) { - xxmc->proc_xxmc_flush( frame ); - return; - } - - if ((code != 0xff) || ((accel->xvmc_last_slice_code == - accel->xxmc_mb_pic_height) && - accel->slices_per_row == accel->row_slice_count)) { - - xxmc->proc_xxmc_flush( frame ); - - if (xxmc->result) { - accel->xvmc_last_slice_code=-1; - frame->bad_frame = 1; - return; - } - xxmc->decoded = 1; - accel->xvmc_last_slice_code = 0; - if (picture->picture_structure == 3 || picture->second_field) { - if (xxmc->result == 0) - frame->bad_frame = 0; - } - } -} diff --git a/src/libmpeg2/stats.c b/src/libmpeg2/stats.c deleted file mode 100644 index 63c701179..000000000 --- a/src/libmpeg2/stats.c +++ /dev/null @@ -1,317 +0,0 @@ -/* - * stats.c - * Copyright (C) 2000-2002 Michel Lespinasse - * Copyright (C) 1999-2000 Aaron Holtzman - * - * This file is part of mpeg2dec, a free MPEG-2 video stream decoder. - * See http://libmpeg2.sourceforge.net/ for updates. - * - * mpeg2dec is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * mpeg2dec is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA - */ - -#include "config.h" - -#include -#include -#include - -#include "mpeg2_internal.h" - -static int debug_level = -1; - -/* Determine is debug output is required. */ -/* We could potentially have multiple levels of debug info */ -static int debug_is_on (void) -{ - char * env_var; - - if (debug_level < 0) { - env_var = getenv ("MPEG2_DEBUG"); - - if (env_var) - debug_level = 1; - else - debug_level = 0; - } - - return debug_level; -} - -static void stats_picture (uint8_t * buffer) -{ - static const char *const picture_coding_type_str [8] = { - "Invalid picture type", - "I-type", - "P-type", - "B-type", - "D (very bad)", - "Invalid","Invalid","Invalid" - }; - - int picture_coding_type; - int temporal_reference; - int vbv_delay; - - temporal_reference = (buffer[0] << 2) | (buffer[1] >> 6); - picture_coding_type = (buffer [1] >> 3) & 7; - vbv_delay = ((buffer[1] << 13) | (buffer[2] << 5) | - (buffer[3] >> 3)) & 0xffff; - - fprintf (stderr, " (picture) %s temporal_reference %d, vbv_delay %d\n", - picture_coding_type_str [picture_coding_type], - temporal_reference, vbv_delay); -} - -static void stats_user_data (uint8_t * buffer) -{ - fprintf (stderr, " (user_data)\n"); -} - -static void stats_sequence (uint8_t * buffer) -{ - static const char *const aspect_ratio_information_str[8] = { - "Invalid Aspect Ratio", - "1:1", - "4:3", - "16:9", - "2.21:1", - "Invalid Aspect Ratio", - "Invalid Aspect Ratio", - "Invalid Aspect Ratio" - }; - static const char *const frame_rate_str[16] = { - "Invalid frame_rate_code", - "23.976", "24", "25" , "29.97", - "30" , "50", "59.94", "60" , - "Invalid frame_rate_code", "Invalid frame_rate_code", - "Invalid frame_rate_code", "Invalid frame_rate_code", - "Invalid frame_rate_code", "Invalid frame_rate_code", - "Invalid frame_rate_code" - }; - - int horizontal_size; - int vertical_size; - int aspect_ratio_information; - int frame_rate_code; - int bit_rate_value; - int vbv_buffer_size_value; - int constrained_parameters_flag; - int load_intra_quantizer_matrix; - int load_non_intra_quantizer_matrix; - - vertical_size = (buffer[0] << 16) | (buffer[1] << 8) | buffer[2]; - horizontal_size = vertical_size >> 12; - vertical_size &= 0xfff; - aspect_ratio_information = buffer[3] >> 4; - frame_rate_code = buffer[3] & 15; - bit_rate_value = (buffer[4] << 10) | (buffer[5] << 2) | (buffer[6] >> 6); - vbv_buffer_size_value = ((buffer[6] << 5) | (buffer[7] >> 3)) & 0x3ff; - constrained_parameters_flag = buffer[7] & 4; - load_intra_quantizer_matrix = buffer[7] & 2; - if (load_intra_quantizer_matrix) - buffer += 64; - load_non_intra_quantizer_matrix = buffer[7] & 1; - - fprintf (stderr, " (seq) %dx%d %s, %s fps, %5.0f kbps, VBV %d kB%s%s%s\n", - horizontal_size, vertical_size, - aspect_ratio_information_str [aspect_ratio_information], - frame_rate_str [frame_rate_code], - bit_rate_value * 400.0 / 1000.0, - 2 * vbv_buffer_size_value, - constrained_parameters_flag ? " , CP":"", - load_intra_quantizer_matrix ? " , Custom Intra Matrix":"", - load_non_intra_quantizer_matrix ? " , Custom Non-Intra Matrix":""); -} - -static void stats_sequence_error (uint8_t * buffer) -{ - fprintf (stderr, " (sequence_error)\n"); -} - -static void stats_sequence_end (uint8_t * buffer) -{ - fprintf (stderr, " (sequence_end)\n"); -} - -static void stats_group (uint8_t * buffer) -{ - fprintf (stderr, " (group)%s%s\n", - (buffer[4] & 0x40) ? " closed_gop" : "", - (buffer[4] & 0x20) ? " broken_link" : ""); -} - -static void stats_slice (int code, uint8_t * buffer) -{ - /* fprintf (stderr, " (slice %d)\n", code); */ -} - -static void stats_sequence_extension (uint8_t * buffer) -{ - static const char *const chroma_format_str[4] = { - "Invalid Chroma Format", - "4:2:0 Chroma", - "4:2:2 Chroma", - "4:4:4 Chroma" - }; - - int progressive_sequence; - int chroma_format; - - progressive_sequence = (buffer[1] >> 3) & 1; - chroma_format = (buffer[1] >> 1) & 3; - - fprintf (stderr, " (seq_ext) progressive_sequence %d, %s\n", - progressive_sequence, chroma_format_str [chroma_format]); -} - -static void stats_sequence_display_extension (uint8_t * buffer) -{ - fprintf (stderr, " (sequence_display_extension)\n"); -} - -static void stats_quant_matrix_extension (uint8_t * buffer) -{ - fprintf (stderr, " (quant_matrix_extension)\n"); -} - -static void stats_copyright_extension (uint8_t * buffer) -{ - fprintf (stderr, " (copyright_extension)\n"); -} - - -static void stats_sequence_scalable_extension (uint8_t * buffer) -{ - fprintf (stderr, " (sequence_scalable_extension)\n"); -} - -static void stats_picture_display_extension (uint8_t * buffer) -{ - fprintf (stderr, " (picture_display_extension)\n"); -} - -static void stats_picture_coding_extension (uint8_t * buffer) -{ - static const char *const picture_structure_str[4] = { - "Invalid Picture Structure", - "Top field", - "Bottom field", - "Frame Picture" - }; - - int f_code[2][2]; - int intra_dc_precision; - int picture_structure; - int top_field_first; - int frame_pred_frame_dct; - int concealment_motion_vectors; - int q_scale_type; - int intra_vlc_format; - int alternate_scan; - int repeat_first_field; - int progressive_frame; - - f_code[0][0] = buffer[0] & 15; - f_code[0][1] = buffer[1] >> 4; - f_code[1][0] = buffer[1] & 15; - f_code[1][1] = buffer[2] >> 4; - intra_dc_precision = (buffer[2] >> 2) & 3; - picture_structure = buffer[2] & 3; - top_field_first = buffer[3] >> 7; - frame_pred_frame_dct = (buffer[3] >> 6) & 1; - concealment_motion_vectors = (buffer[3] >> 5) & 1; - q_scale_type = (buffer[3] >> 4) & 1; - intra_vlc_format = (buffer[3] >> 3) & 1; - alternate_scan = (buffer[3] >> 2) & 1; - repeat_first_field = (buffer[3] >> 1) & 1; - progressive_frame = buffer[4] >> 7; - - fprintf (stderr, - " (pic_ext) %s\n", picture_structure_str [picture_structure]); - fprintf (stderr, - " (pic_ext) forward horizontal f_code % d, forward vertical f_code % d\n", - f_code[0][0], f_code[0][1]); - fprintf (stderr, - " (pic_ext) backward horizontal f_code % d, backward vertical f_code % d\n", - f_code[1][0], f_code[1][1]); - fprintf (stderr, - " (pic_ext) intra_dc_precision %d, top_field_first %d, frame_pred_frame_dct %d\n", - intra_dc_precision, top_field_first, frame_pred_frame_dct); - fprintf (stderr, - " (pic_ext) concealment_motion_vectors %d, q_scale_type %d, intra_vlc_format %d\n", - concealment_motion_vectors, q_scale_type, intra_vlc_format); - fprintf (stderr, - " (pic_ext) alternate_scan %d, repeat_first_field %d, progressive_frame %d\n", - alternate_scan, repeat_first_field, progressive_frame); -} - -void mpeg2_stats (int code, uint8_t * buffer) -{ - if (! (debug_is_on ())) - return; - - switch (code) { - case 0x00: - stats_picture (buffer); - break; - case 0xb2: - stats_user_data (buffer); - break; - case 0xb3: - stats_sequence (buffer); - break; - case 0xb4: - stats_sequence_error (buffer); - break; - case 0xb5: - switch (buffer[0] >> 4) { - case 1: - stats_sequence_extension (buffer); - break; - case 2: - stats_sequence_display_extension (buffer); - break; - case 3: - stats_quant_matrix_extension (buffer); - break; - case 4: - stats_copyright_extension (buffer); - break; - case 5: - stats_sequence_scalable_extension (buffer); - break; - case 7: - stats_picture_display_extension (buffer); - break; - case 8: - stats_picture_coding_extension (buffer); - break; - default: - fprintf (stderr, " (unknown extension %#x)\n", buffer[0] >> 4); - } - break; - case 0xb7: - stats_sequence_end (buffer); - break; - case 0xb8: - stats_group (buffer); - break; - default: - if (code < 0xb0) - stats_slice (code, buffer); - else - fprintf (stderr, " (unknown start code %#02x)\n", code); - } -} diff --git a/src/libmpeg2/vis.h b/src/libmpeg2/vis.h deleted file mode 100644 index 69dd49075..000000000 --- a/src/libmpeg2/vis.h +++ /dev/null @@ -1,328 +0,0 @@ -/* - * vis.h - * Copyright (C) 2003 David S. Miller - * - * This file is part of mpeg2dec, a free MPEG-2 video stream decoder. - * See http://libmpeg2.sourceforge.net/ for updates. - * - * mpeg2dec is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * mpeg2dec is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA - */ - -/* You may be asking why I hard-code the instruction opcodes and don't - * use the normal VIS assembler mnenomics for the VIS instructions. - * - * The reason is that Sun, in their infinite wisdom, decided that a binary - * using a VIS instruction will cause it to be marked (in the ELF headers) - * as doing so, and this prevents the OS from loading such binaries if the - * current cpu doesn't have VIS. There is no way to easily override this - * behavior of the assembler that I am aware of. - * - * This totally defeats what libmpeg2 is trying to do which is allow a - * single binary to be created, and then detect the availability of VIS - * at runtime. - * - * I'm not saying that tainting the binary by default is bad, rather I'm - * saying that not providing a way to override this easily unnecessarily - * ties people's hands. - * - * Thus, we do the opcode encoding by hand and output 32-bit words in - * the assembler to keep the binary from becoming tainted. - */ - -#define vis_opc_base ((0x1 << 31) | (0x36 << 19)) -#define vis_opf(X) ((X) << 5) -#define vis_sreg(X) (X) -#define vis_dreg(X) (((X)&0x1f)|((X)>>5)) -#define vis_rs1_s(X) (vis_sreg(X) << 14) -#define vis_rs1_d(X) (vis_dreg(X) << 14) -#define vis_rs2_s(X) (vis_sreg(X) << 0) -#define vis_rs2_d(X) (vis_dreg(X) << 0) -#define vis_rd_s(X) (vis_sreg(X) << 25) -#define vis_rd_d(X) (vis_dreg(X) << 25) - -#define vis_ss2s(opf,rs1,rs2,rd) \ - __asm__ __volatile__ (".word %0" \ - : : "i" (vis_opc_base | vis_opf(opf) | \ - vis_rs1_s(rs1) | \ - vis_rs2_s(rs2) | \ - vis_rd_s(rd))) - -#define vis_dd2d(opf,rs1,rs2,rd) \ - __asm__ __volatile__ (".word %0" \ - : : "i" (vis_opc_base | vis_opf(opf) | \ - vis_rs1_d(rs1) | \ - vis_rs2_d(rs2) | \ - vis_rd_d(rd))) - -#define vis_ss2d(opf,rs1,rs2,rd) \ - __asm__ __volatile__ (".word %0" \ - : : "i" (vis_opc_base | vis_opf(opf) | \ - vis_rs1_s(rs1) | \ - vis_rs2_s(rs2) | \ - vis_rd_d(rd))) - -#define vis_sd2d(opf,rs1,rs2,rd) \ - __asm__ __volatile__ (".word %0" \ - : : "i" (vis_opc_base | vis_opf(opf) | \ - vis_rs1_s(rs1) | \ - vis_rs2_d(rs2) | \ - vis_rd_d(rd))) - -#define vis_d2s(opf,rs2,rd) \ - __asm__ __volatile__ (".word %0" \ - : : "i" (vis_opc_base | vis_opf(opf) | \ - vis_rs2_d(rs2) | \ - vis_rd_s(rd))) - -#define vis_s2d(opf,rs2,rd) \ - __asm__ __volatile__ (".word %0" \ - : : "i" (vis_opc_base | vis_opf(opf) | \ - vis_rs2_s(rs2) | \ - vis_rd_d(rd))) - -#define vis_d12d(opf,rs1,rd) \ - __asm__ __volatile__ (".word %0" \ - : : "i" (vis_opc_base | vis_opf(opf) | \ - vis_rs1_d(rs1) | \ - vis_rd_d(rd))) - -#define vis_d22d(opf,rs2,rd) \ - __asm__ __volatile__ (".word %0" \ - : : "i" (vis_opc_base | vis_opf(opf) | \ - vis_rs2_d(rs2) | \ - vis_rd_d(rd))) - -#define vis_s12s(opf,rs1,rd) \ - __asm__ __volatile__ (".word %0" \ - : : "i" (vis_opc_base | vis_opf(opf) | \ - vis_rs1_s(rs1) | \ - vis_rd_s(rd))) - -#define vis_s22s(opf,rs2,rd) \ - __asm__ __volatile__ (".word %0" \ - : : "i" (vis_opc_base | vis_opf(opf) | \ - vis_rs2_s(rs2) | \ - vis_rd_s(rd))) - -#define vis_s(opf,rd) \ - __asm__ __volatile__ (".word %0" \ - : : "i" (vis_opc_base | vis_opf(opf) | \ - vis_rd_s(rd))) - -#define vis_d(opf,rd) \ - __asm__ __volatile__ (".word %0" \ - : : "i" (vis_opc_base | vis_opf(opf) | \ - vis_rd_d(rd))) - -#define vis_r2m(op,rd,mem) \ - __asm__ __volatile__ (#op "\t%%f" #rd ", [%0]" : : "r" (&(mem)) ) - -#define vis_r2m_2(op,rd,mem1,mem2) \ - __asm__ __volatile__ (#op "\t%%f" #rd ", [%0 + %1]" : : "r" (mem1), "r" (mem2) ) - -#define vis_m2r(op,mem,rd) \ - __asm__ __volatile__ (#op "\t[%0], %%f" #rd : : "r" (&(mem)) ) - -#define vis_m2r_2(op,mem1,mem2,rd) \ - __asm__ __volatile__ (#op "\t[%0 + %1], %%f" #rd : : "r" (mem1), "r" (mem2) ) - -static inline void vis_set_gsr(unsigned int _val) -{ - register unsigned int val asm("g1"); - - val = _val; - __asm__ __volatile__(".word 0xa7804000" - : : "r" (val)); -} - -#define VIS_GSR_ALIGNADDR_MASK 0x0000007 -#define VIS_GSR_ALIGNADDR_SHIFT 0 -#define VIS_GSR_SCALEFACT_MASK 0x0000078 -#define VIS_GSR_SCALEFACT_SHIFT 3 - -#define vis_ld32(mem,rs1) vis_m2r(ld, mem, rs1) -#define vis_ld32_2(mem1,mem2,rs1) vis_m2r_2(ld, mem1, mem2, rs1) -#define vis_st32(rs1,mem) vis_r2m(st, rs1, mem) -#define vis_st32_2(rs1,mem1,mem2) vis_r2m_2(st, rs1, mem1, mem2) -#define vis_ld64(mem,rs1) vis_m2r(ldd, mem, rs1) -#define vis_ld64_2(mem1,mem2,rs1) vis_m2r_2(ldd, mem1, mem2, rs1) -#define vis_st64(rs1,mem) vis_r2m(std, rs1, mem) -#define vis_st64_2(rs1,mem1,mem2) vis_r2m_2(std, rs1, mem1, mem2) - -#define vis_ldblk(mem, rd) \ -do { register void *__mem asm("g1"); \ - __mem = &(mem); \ - __asm__ __volatile__(".word 0xc1985e00 | %1" \ - : \ - : "r" (__mem), \ - "i" (vis_rd_d(rd)) \ - : "memory"); \ -} while (0) - -#define vis_stblk(rd, mem) \ -do { register void *__mem asm("g1"); \ - __mem = &(mem); \ - __asm__ __volatile__(".word 0xc1b85e00 | %1" \ - : \ - : "r" (__mem), \ - "i" (vis_rd_d(rd)) \ - : "memory"); \ -} while (0) - -#define vis_membar_storestore() \ - __asm__ __volatile__(".word 0x8143e008" : : : "memory") - -#define vis_membar_sync() \ - __asm__ __volatile__(".word 0x8143e040" : : : "memory") - -/* 16 and 32 bit partitioned addition and subtraction. The normal - * versions perform 4 16-bit or 2 32-bit additions or subtractions. - * The 's' versions perform 2 16-bit or 2 32-bit additions or - * subtractions. - */ - -#define vis_padd16(rs1,rs2,rd) vis_dd2d(0x50, rs1, rs2, rd) -#define vis_padd16s(rs1,rs2,rd) vis_ss2s(0x51, rs1, rs2, rd) -#define vis_padd32(rs1,rs2,rd) vis_dd2d(0x52, rs1, rs2, rd) -#define vis_padd32s(rs1,rs2,rd) vis_ss2s(0x53, rs1, rs2, rd) -#define vis_psub16(rs1,rs2,rd) vis_dd2d(0x54, rs1, rs2, rd) -#define vis_psub16s(rs1,rs2,rd) vis_ss2s(0x55, rs1, rs2, rd) -#define vis_psub32(rs1,rs2,rd) vis_dd2d(0x56, rs1, rs2, rd) -#define vis_psub32s(rs1,rs2,rd) vis_ss2s(0x57, rs1, rs2, rd) - -/* Pixel formatting instructions. */ - -#define vis_pack16(rs2,rd) vis_d2s( 0x3b, rs2, rd) -#define vis_pack32(rs1,rs2,rd) vis_dd2d(0x3a, rs1, rs2, rd) -#define vis_packfix(rs2,rd) vis_d2s( 0x3d, rs2, rd) -#define vis_expand(rs2,rd) vis_s2d( 0x4d, rs2, rd) -#define vis_pmerge(rs1,rs2,rd) vis_ss2d(0x4b, rs1, rs2, rd) - -/* Partitioned multiply instructions. */ - -#define vis_mul8x16(rs1,rs2,rd) vis_sd2d(0x31, rs1, rs2, rd) -#define vis_mul8x16au(rs1,rs2,rd) vis_ss2d(0x33, rs1, rs2, rd) -#define vis_mul8x16al(rs1,rs2,rd) vis_ss2d(0x35, rs1, rs2, rd) -#define vis_mul8sux16(rs1,rs2,rd) vis_dd2d(0x36, rs1, rs2, rd) -#define vis_mul8ulx16(rs1,rs2,rd) vis_dd2d(0x37, rs1, rs2, rd) -#define vis_muld8sux16(rs1,rs2,rd) vis_ss2d(0x38, rs1, rs2, rd) -#define vis_muld8ulx16(rs1,rs2,rd) vis_ss2d(0x39, rs1, rs2, rd) - -/* Alignment instructions. */ - -static inline void *vis_alignaddr(void *_ptr) -{ - register void *ptr asm("g1"); - - ptr = _ptr; - - __asm__ __volatile__(".word %2" - : "=&r" (ptr) - : "0" (ptr), - "i" (vis_opc_base | vis_opf(0x18) | - vis_rs1_s(1) | - vis_rs2_s(0) | - vis_rd_s(1))); - - return ptr; -} - -static inline void vis_alignaddr_g0(void *_ptr) -{ - register void *ptr asm("g1"); - - ptr = _ptr; - - __asm__ __volatile__(".word %2" - : "=&r" (ptr) - : "0" (ptr), - "i" (vis_opc_base | vis_opf(0x18) | - vis_rs1_s(1) | - vis_rs2_s(0) | - vis_rd_s(0))); -} - -static inline void *vis_alignaddrl(void *_ptr) -{ - register void *ptr asm("g1"); - - ptr = _ptr; - - __asm__ __volatile__(".word %2" - : "=&r" (ptr) - : "0" (ptr), - "i" (vis_opc_base | vis_opf(0x19) | - vis_rs1_s(1) | - vis_rs2_s(0) | - vis_rd_s(1))); - - return ptr; -} - -static inline void vis_alignaddrl_g0(void *_ptr) -{ - register void *ptr asm("g1"); - - ptr = _ptr; - - __asm__ __volatile__(".word %2" - : "=&r" (ptr) - : "0" (ptr), - "i" (vis_opc_base | vis_opf(0x19) | - vis_rs1_s(1) | - vis_rs2_s(0) | - vis_rd_s(0))); -} - -#define vis_faligndata(rs1,rs2,rd) vis_dd2d(0x48, rs1, rs2, rd) - -/* Logical operate instructions. */ - -#define vis_fzero(rd) vis_d( 0x60, rd) -#define vis_fzeros(rd) vis_s( 0x61, rd) -#define vis_fone(rd) vis_d( 0x7e, rd) -#define vis_fones(rd) vis_s( 0x7f, rd) -#define vis_src1(rs1,rd) vis_d12d(0x74, rs1, rd) -#define vis_src1s(rs1,rd) vis_s12s(0x75, rs1, rd) -#define vis_src2(rs2,rd) vis_d22d(0x78, rs2, rd) -#define vis_src2s(rs2,rd) vis_s22s(0x79, rs2, rd) -#define vis_not1(rs1,rd) vis_d12d(0x6a, rs1, rd) -#define vis_not1s(rs1,rd) vis_s12s(0x6b, rs1, rd) -#define vis_not2(rs2,rd) vis_d22d(0x66, rs2, rd) -#define vis_not2s(rs2,rd) vis_s22s(0x67, rs2, rd) -#define vis_or(rs1,rs2,rd) vis_dd2d(0x7c, rs1, rs2, rd) -#define vis_ors(rs1,rs2,rd) vis_ss2s(0x7d, rs1, rs2, rd) -#define vis_nor(rs1,rs2,rd) vis_dd2d(0x62, rs1, rs2, rd) -#define vis_nors(rs1,rs2,rd) vis_ss2s(0x63, rs1, rs2, rd) -#define vis_and(rs1,rs2,rd) vis_dd2d(0x70, rs1, rs2, rd) -#define vis_ands(rs1,rs2,rd) vis_ss2s(0x71, rs1, rs2, rd) -#define vis_nand(rs1,rs2,rd) vis_dd2d(0x6e, rs1, rs2, rd) -#define vis_nands(rs1,rs2,rd) vis_ss2s(0x6f, rs1, rs2, rd) -#define vis_xor(rs1,rs2,rd) vis_dd2d(0x6c, rs1, rs2, rd) -#define vis_xors(rs1,rs2,rd) vis_ss2s(0x6d, rs1, rs2, rd) -#define vis_xnor(rs1,rs2,rd) vis_dd2d(0x72, rs1, rs2, rd) -#define vis_xnors(rs1,rs2,rd) vis_ss2s(0x73, rs1, rs2, rd) -#define vis_ornot1(rs1,rs2,rd) vis_dd2d(0x7a, rs1, rs2, rd) -#define vis_ornot1s(rs1,rs2,rd) vis_ss2s(0x7b, rs1, rs2, rd) -#define vis_ornot2(rs1,rs2,rd) vis_dd2d(0x76, rs1, rs2, rd) -#define vis_ornot2s(rs1,rs2,rd) vis_ss2s(0x77, rs1, rs2, rd) -#define vis_andnot1(rs1,rs2,rd) vis_dd2d(0x68, rs1, rs2, rd) -#define vis_andnot1s(rs1,rs2,rd) vis_ss2s(0x69, rs1, rs2, rd) -#define vis_andnot2(rs1,rs2,rd) vis_dd2d(0x64, rs1, rs2, rd) -#define vis_andnot2s(rs1,rs2,rd) vis_ss2s(0x65, rs1, rs2, rd) - -/* Pixel component distance. */ - -#define vis_pdist(rs1,rs2,rd) vis_dd2d(0x3e, rs1, rs2, rd) diff --git a/src/libmpeg2/vlc.h b/src/libmpeg2/vlc.h deleted file mode 100644 index 65de9a840..000000000 --- a/src/libmpeg2/vlc.h +++ /dev/null @@ -1,428 +0,0 @@ -/* - * vlc.h - * Copyright (C) 2000-2002 Michel Lespinasse - * Copyright (C) 1999-2000 Aaron Holtzman - * - * This file is part of mpeg2dec, a free MPEG-2 video stream decoder. - * See http://libmpeg2.sourceforge.net/ for updates. - * - * mpeg2dec is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * mpeg2dec is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA - */ - -#define GETWORD(bit_buf,shift,bit_ptr) \ -do { \ - bit_buf |= ((bit_ptr[0] << 8) | bit_ptr[1]) << (shift); \ - bit_ptr += 2; \ -} while (0) - -static inline void bitstream_init (picture_t * picture, uint8_t * start) -{ - picture->bitstream_buf = - (start[0] << 24) | (start[1] << 16) | (start[2] << 8) | start[3]; - picture->bitstream_ptr = start + 4; - picture->bitstream_bits = -16; -} - -/* make sure that there are at least 16 valid bits in bit_buf */ -#define NEEDBITS(bit_buf,bits,bit_ptr) \ -do { \ - if (bits > 0) { \ - GETWORD (bit_buf, bits, bit_ptr); \ - bits -= 16; \ - } \ -} while (0) - -/* remove num valid bits from bit_buf */ -#define DUMPBITS(bit_buf,bits,num) \ -do { \ - bit_buf <<= (num); \ - bits += (num); \ -} while (0) - -/* take num bits from the high part of bit_buf and zero extend them */ -#define UBITS(bit_buf,num) (((uint32_t)(bit_buf)) >> (32 - (num))) - -/* take num bits from the high part of bit_buf and sign extend them */ -#define SBITS(bit_buf,num) (((int32_t)(bit_buf)) >> (32 - (num))) - -typedef struct { - uint8_t modes; - uint8_t len; -} MBtab; - -typedef struct { - uint8_t delta; - uint8_t len; -} MVtab; - -typedef struct { - int8_t dmv; - uint8_t len; -} DMVtab; - -typedef struct { - uint8_t cbp; - uint8_t len; -} CBPtab; - -typedef struct { - uint8_t size; - uint8_t len; -} DCtab; - -typedef struct { - uint8_t run; - uint8_t level; - uint8_t len; -} DCTtab; - -typedef struct { - uint8_t mba; - uint8_t len; -} MBAtab; - - -#define INTRA MACROBLOCK_INTRA -#define QUANT MACROBLOCK_QUANT - -static const MBtab MB_I [] = { - {INTRA|QUANT, 2}, {INTRA, 1} -}; - -#define MC MACROBLOCK_MOTION_FORWARD -#define CODED MACROBLOCK_PATTERN - -static const MBtab MB_P [] = { - {INTRA|QUANT, 6}, {CODED|QUANT, 5}, {MC|CODED|QUANT, 5}, {INTRA, 5}, - {MC, 3}, {MC, 3}, {MC, 3}, {MC, 3}, - {CODED, 2}, {CODED, 2}, {CODED, 2}, {CODED, 2}, - {CODED, 2}, {CODED, 2}, {CODED, 2}, {CODED, 2}, - {MC|CODED, 1}, {MC|CODED, 1}, {MC|CODED, 1}, {MC|CODED, 1}, - {MC|CODED, 1}, {MC|CODED, 1}, {MC|CODED, 1}, {MC|CODED, 1}, - {MC|CODED, 1}, {MC|CODED, 1}, {MC|CODED, 1}, {MC|CODED, 1}, - {MC|CODED, 1}, {MC|CODED, 1}, {MC|CODED, 1}, {MC|CODED, 1} -}; - -#define FWD MACROBLOCK_MOTION_FORWARD -#define BWD MACROBLOCK_MOTION_BACKWARD -#define INTER MACROBLOCK_MOTION_FORWARD|MACROBLOCK_MOTION_BACKWARD - -static const MBtab MB_B [] = { - {0, 0}, {INTRA|QUANT, 6}, - {BWD|CODED|QUANT, 6}, {FWD|CODED|QUANT, 6}, - {INTER|CODED|QUANT, 5}, {INTER|CODED|QUANT, 5}, - {INTRA, 5}, {INTRA, 5}, - {FWD, 4}, {FWD, 4}, {FWD, 4}, {FWD, 4}, - {FWD|CODED, 4}, {FWD|CODED, 4}, {FWD|CODED, 4}, {FWD|CODED, 4}, - {BWD, 3}, {BWD, 3}, {BWD, 3}, {BWD, 3}, - {BWD, 3}, {BWD, 3}, {BWD, 3}, {BWD, 3}, - {BWD|CODED, 3}, {BWD|CODED, 3}, {BWD|CODED, 3}, {BWD|CODED, 3}, - {BWD|CODED, 3}, {BWD|CODED, 3}, {BWD|CODED, 3}, {BWD|CODED, 3}, - {INTER, 2}, {INTER, 2}, {INTER, 2}, {INTER, 2}, - {INTER, 2}, {INTER, 2}, {INTER, 2}, {INTER, 2}, - {INTER, 2}, {INTER, 2}, {INTER, 2}, {INTER, 2}, - {INTER, 2}, {INTER, 2}, {INTER, 2}, {INTER, 2}, - {INTER|CODED, 2}, {INTER|CODED, 2}, {INTER|CODED, 2}, {INTER|CODED, 2}, - {INTER|CODED, 2}, {INTER|CODED, 2}, {INTER|CODED, 2}, {INTER|CODED, 2}, - {INTER|CODED, 2}, {INTER|CODED, 2}, {INTER|CODED, 2}, {INTER|CODED, 2}, - {INTER|CODED, 2}, {INTER|CODED, 2}, {INTER|CODED, 2}, {INTER|CODED, 2} -}; - -#undef INTRA -#undef QUANT -#undef MC -#undef CODED -#undef FWD -#undef BWD -#undef INTER - - -static const MVtab MV_4 [] = { - { 3, 6}, { 2, 4}, { 1, 3}, { 1, 3}, { 0, 2}, { 0, 2}, { 0, 2}, { 0, 2} -}; - -static const MVtab MV_10 [] = { - { 0,10}, { 0,10}, { 0,10}, { 0,10}, { 0,10}, { 0,10}, { 0,10}, { 0,10}, - { 0,10}, { 0,10}, { 0,10}, { 0,10}, {15,10}, {14,10}, {13,10}, {12,10}, - {11,10}, {10,10}, { 9, 9}, { 9, 9}, { 8, 9}, { 8, 9}, { 7, 9}, { 7, 9}, - { 6, 7}, { 6, 7}, { 6, 7}, { 6, 7}, { 6, 7}, { 6, 7}, { 6, 7}, { 6, 7}, - { 5, 7}, { 5, 7}, { 5, 7}, { 5, 7}, { 5, 7}, { 5, 7}, { 5, 7}, { 5, 7}, - { 4, 7}, { 4, 7}, { 4, 7}, { 4, 7}, { 4, 7}, { 4, 7}, { 4, 7}, { 4, 7} -}; - - -static const DMVtab DMV_2 [] = { - { 0, 1}, { 0, 1}, { 1, 2}, {-1, 2} -}; - - -static const CBPtab CBP_7 [] = { - {0x22, 7}, {0x12, 7}, {0x0a, 7}, {0x06, 7}, - {0x21, 7}, {0x11, 7}, {0x09, 7}, {0x05, 7}, - {0x3f, 6}, {0x3f, 6}, {0x03, 6}, {0x03, 6}, - {0x24, 6}, {0x24, 6}, {0x18, 6}, {0x18, 6}, - {0x3e, 5}, {0x3e, 5}, {0x3e, 5}, {0x3e, 5}, - {0x02, 5}, {0x02, 5}, {0x02, 5}, {0x02, 5}, - {0x3d, 5}, {0x3d, 5}, {0x3d, 5}, {0x3d, 5}, - {0x01, 5}, {0x01, 5}, {0x01, 5}, {0x01, 5}, - {0x38, 5}, {0x38, 5}, {0x38, 5}, {0x38, 5}, - {0x34, 5}, {0x34, 5}, {0x34, 5}, {0x34, 5}, - {0x2c, 5}, {0x2c, 5}, {0x2c, 5}, {0x2c, 5}, - {0x1c, 5}, {0x1c, 5}, {0x1c, 5}, {0x1c, 5}, - {0x28, 5}, {0x28, 5}, {0x28, 5}, {0x28, 5}, - {0x14, 5}, {0x14, 5}, {0x14, 5}, {0x14, 5}, - {0x30, 5}, {0x30, 5}, {0x30, 5}, {0x30, 5}, - {0x0c, 5}, {0x0c, 5}, {0x0c, 5}, {0x0c, 5}, - {0x20, 4}, {0x20, 4}, {0x20, 4}, {0x20, 4}, - {0x20, 4}, {0x20, 4}, {0x20, 4}, {0x20, 4}, - {0x10, 4}, {0x10, 4}, {0x10, 4}, {0x10, 4}, - {0x10, 4}, {0x10, 4}, {0x10, 4}, {0x10, 4}, - {0x08, 4}, {0x08, 4}, {0x08, 4}, {0x08, 4}, - {0x08, 4}, {0x08, 4}, {0x08, 4}, {0x08, 4}, - {0x04, 4}, {0x04, 4}, {0x04, 4}, {0x04, 4}, - {0x04, 4}, {0x04, 4}, {0x04, 4}, {0x04, 4}, - {0x3c, 3}, {0x3c, 3}, {0x3c, 3}, {0x3c, 3}, - {0x3c, 3}, {0x3c, 3}, {0x3c, 3}, {0x3c, 3}, - {0x3c, 3}, {0x3c, 3}, {0x3c, 3}, {0x3c, 3}, - {0x3c, 3}, {0x3c, 3}, {0x3c, 3}, {0x3c, 3} -}; - -static const CBPtab CBP_9 [] = { - {0, 0}, {0x00, 9}, {0x27, 9}, {0x1b, 9}, - {0x3b, 9}, {0x37, 9}, {0x2f, 9}, {0x1f, 9}, - {0x3a, 8}, {0x3a, 8}, {0x36, 8}, {0x36, 8}, - {0x2e, 8}, {0x2e, 8}, {0x1e, 8}, {0x1e, 8}, - {0x39, 8}, {0x39, 8}, {0x35, 8}, {0x35, 8}, - {0x2d, 8}, {0x2d, 8}, {0x1d, 8}, {0x1d, 8}, - {0x26, 8}, {0x26, 8}, {0x1a, 8}, {0x1a, 8}, - {0x25, 8}, {0x25, 8}, {0x19, 8}, {0x19, 8}, - {0x2b, 8}, {0x2b, 8}, {0x17, 8}, {0x17, 8}, - {0x33, 8}, {0x33, 8}, {0x0f, 8}, {0x0f, 8}, - {0x2a, 8}, {0x2a, 8}, {0x16, 8}, {0x16, 8}, - {0x32, 8}, {0x32, 8}, {0x0e, 8}, {0x0e, 8}, - {0x29, 8}, {0x29, 8}, {0x15, 8}, {0x15, 8}, - {0x31, 8}, {0x31, 8}, {0x0d, 8}, {0x0d, 8}, - {0x23, 8}, {0x23, 8}, {0x13, 8}, {0x13, 8}, - {0x0b, 8}, {0x0b, 8}, {0x07, 8}, {0x07, 8} -}; - - -static const DCtab DC_lum_5 [] = { - {1, 2}, {1, 2}, {1, 2}, {1, 2}, {1, 2}, {1, 2}, {1, 2}, {1, 2}, - {2, 2}, {2, 2}, {2, 2}, {2, 2}, {2, 2}, {2, 2}, {2, 2}, {2, 2}, - {0, 3}, {0, 3}, {0, 3}, {0, 3}, {3, 3}, {3, 3}, {3, 3}, {3, 3}, - {4, 3}, {4, 3}, {4, 3}, {4, 3}, {5, 4}, {5, 4}, {6, 5} -}; - -static const DCtab DC_chrom_5 [] = { - {0, 2}, {0, 2}, {0, 2}, {0, 2}, {0, 2}, {0, 2}, {0, 2}, {0, 2}, - {1, 2}, {1, 2}, {1, 2}, {1, 2}, {1, 2}, {1, 2}, {1, 2}, {1, 2}, - {2, 2}, {2, 2}, {2, 2}, {2, 2}, {2, 2}, {2, 2}, {2, 2}, {2, 2}, - {3, 3}, {3, 3}, {3, 3}, {3, 3}, {4, 4}, {4, 4}, {5, 5} -}; - -static const DCtab DC_long [] = { - {6, 5}, {6, 5}, {6, 5}, {6, 5}, {6, 5}, {6, 5}, { 6, 5}, { 6, 5}, - {6, 5}, {6, 5}, {6, 5}, {6, 5}, {6, 5}, {6, 5}, { 6, 5}, { 6, 5}, - {7, 6}, {7, 6}, {7, 6}, {7, 6}, {7, 6}, {7, 6}, { 7, 6}, { 7, 6}, - {8, 7}, {8, 7}, {8, 7}, {8, 7}, {9, 8}, {9, 8}, {10, 9}, {11, 9} -}; - - -static const DCTtab DCT_16 [] = { - {129, 0, 0}, {129, 0, 0}, {129, 0, 0}, {129, 0, 0}, - {129, 0, 0}, {129, 0, 0}, {129, 0, 0}, {129, 0, 0}, - {129, 0, 0}, {129, 0, 0}, {129, 0, 0}, {129, 0, 0}, - {129, 0, 0}, {129, 0, 0}, {129, 0, 0}, {129, 0, 0}, - { 2,18, 0}, { 2,17, 0}, { 2,16, 0}, { 2,15, 0}, - { 7, 3, 0}, { 17, 2, 0}, { 16, 2, 0}, { 15, 2, 0}, - { 14, 2, 0}, { 13, 2, 0}, { 12, 2, 0}, { 32, 1, 0}, - { 31, 1, 0}, { 30, 1, 0}, { 29, 1, 0}, { 28, 1, 0} -}; - -static const DCTtab DCT_15 [] = { - { 1,40,15}, { 1,39,15}, { 1,38,15}, { 1,37,15}, - { 1,36,15}, { 1,35,15}, { 1,34,15}, { 1,33,15}, - { 1,32,15}, { 2,14,15}, { 2,13,15}, { 2,12,15}, - { 2,11,15}, { 2,10,15}, { 2, 9,15}, { 2, 8,15}, - { 1,31,14}, { 1,31,14}, { 1,30,14}, { 1,30,14}, - { 1,29,14}, { 1,29,14}, { 1,28,14}, { 1,28,14}, - { 1,27,14}, { 1,27,14}, { 1,26,14}, { 1,26,14}, - { 1,25,14}, { 1,25,14}, { 1,24,14}, { 1,24,14}, - { 1,23,14}, { 1,23,14}, { 1,22,14}, { 1,22,14}, - { 1,21,14}, { 1,21,14}, { 1,20,14}, { 1,20,14}, - { 1,19,14}, { 1,19,14}, { 1,18,14}, { 1,18,14}, - { 1,17,14}, { 1,17,14}, { 1,16,14}, { 1,16,14} -}; - -static const DCTtab DCT_13 [] = { - { 11, 2,13}, { 10, 2,13}, { 6, 3,13}, { 4, 4,13}, - { 3, 5,13}, { 2, 7,13}, { 2, 6,13}, { 1,15,13}, - { 1,14,13}, { 1,13,13}, { 1,12,13}, { 27, 1,13}, - { 26, 1,13}, { 25, 1,13}, { 24, 1,13}, { 23, 1,13}, - { 1,11,12}, { 1,11,12}, { 9, 2,12}, { 9, 2,12}, - { 5, 3,12}, { 5, 3,12}, { 1,10,12}, { 1,10,12}, - { 3, 4,12}, { 3, 4,12}, { 8, 2,12}, { 8, 2,12}, - { 22, 1,12}, { 22, 1,12}, { 21, 1,12}, { 21, 1,12}, - { 1, 9,12}, { 1, 9,12}, { 20, 1,12}, { 20, 1,12}, - { 19, 1,12}, { 19, 1,12}, { 2, 5,12}, { 2, 5,12}, - { 4, 3,12}, { 4, 3,12}, { 1, 8,12}, { 1, 8,12}, - { 7, 2,12}, { 7, 2,12}, { 18, 1,12}, { 18, 1,12} -}; - -static const DCTtab DCT_B14_10 [] = { - { 17, 1,10}, { 6, 2,10}, { 1, 7,10}, { 3, 3,10}, - { 2, 4,10}, { 16, 1,10}, { 15, 1,10}, { 5, 2,10} -}; - -static const DCTtab DCT_B14_8 [] = { - { 65, 0, 6}, { 65, 0, 6}, { 65, 0, 6}, { 65, 0, 6}, - { 3, 2, 7}, { 3, 2, 7}, { 10, 1, 7}, { 10, 1, 7}, - { 1, 4, 7}, { 1, 4, 7}, { 9, 1, 7}, { 9, 1, 7}, - { 8, 1, 6}, { 8, 1, 6}, { 8, 1, 6}, { 8, 1, 6}, - { 7, 1, 6}, { 7, 1, 6}, { 7, 1, 6}, { 7, 1, 6}, - { 2, 2, 6}, { 2, 2, 6}, { 2, 2, 6}, { 2, 2, 6}, - { 6, 1, 6}, { 6, 1, 6}, { 6, 1, 6}, { 6, 1, 6}, - { 14, 1, 8}, { 1, 6, 8}, { 13, 1, 8}, { 12, 1, 8}, - { 4, 2, 8}, { 2, 3, 8}, { 1, 5, 8}, { 11, 1, 8} -}; - -static const DCTtab DCT_B14AC_5 [] = { - { 1, 3, 5}, { 5, 1, 5}, { 4, 1, 5}, - { 1, 2, 4}, { 1, 2, 4}, { 3, 1, 4}, { 3, 1, 4}, - { 2, 1, 3}, { 2, 1, 3}, { 2, 1, 3}, { 2, 1, 3}, - {129, 0, 2}, {129, 0, 2}, {129, 0, 2}, {129, 0, 2}, - {129, 0, 2}, {129, 0, 2}, {129, 0, 2}, {129, 0, 2}, - { 1, 1, 2}, { 1, 1, 2}, { 1, 1, 2}, { 1, 1, 2}, - { 1, 1, 2}, { 1, 1, 2}, { 1, 1, 2}, { 1, 1, 2} -}; - -static const DCTtab DCT_B14DC_5 [] = { - { 1, 3, 5}, { 5, 1, 5}, { 4, 1, 5}, - { 1, 2, 4}, { 1, 2, 4}, { 3, 1, 4}, { 3, 1, 4}, - { 2, 1, 3}, { 2, 1, 3}, { 2, 1, 3}, { 2, 1, 3}, - { 1, 1, 1}, { 1, 1, 1}, { 1, 1, 1}, { 1, 1, 1}, - { 1, 1, 1}, { 1, 1, 1}, { 1, 1, 1}, { 1, 1, 1}, - { 1, 1, 1}, { 1, 1, 1}, { 1, 1, 1}, { 1, 1, 1}, - { 1, 1, 1}, { 1, 1, 1}, { 1, 1, 1}, { 1, 1, 1} -}; - -static const DCTtab DCT_B15_10 [] = { - { 6, 2, 9}, { 6, 2, 9}, { 15, 1, 9}, { 15, 1, 9}, - { 3, 4,10}, { 17, 1,10}, { 16, 1, 9}, { 16, 1, 9} -}; - -static const DCTtab DCT_B15_8 [] = { - { 65, 0, 6}, { 65, 0, 6}, { 65, 0, 6}, { 65, 0, 6}, - { 8, 1, 7}, { 8, 1, 7}, { 9, 1, 7}, { 9, 1, 7}, - { 7, 1, 7}, { 7, 1, 7}, { 3, 2, 7}, { 3, 2, 7}, - { 1, 7, 6}, { 1, 7, 6}, { 1, 7, 6}, { 1, 7, 6}, - { 1, 6, 6}, { 1, 6, 6}, { 1, 6, 6}, { 1, 6, 6}, - { 5, 1, 6}, { 5, 1, 6}, { 5, 1, 6}, { 5, 1, 6}, - { 6, 1, 6}, { 6, 1, 6}, { 6, 1, 6}, { 6, 1, 6}, - { 2, 5, 8}, { 12, 1, 8}, { 1,11, 8}, { 1,10, 8}, - { 14, 1, 8}, { 13, 1, 8}, { 4, 2, 8}, { 2, 4, 8}, - { 3, 1, 5}, { 3, 1, 5}, { 3, 1, 5}, { 3, 1, 5}, - { 3, 1, 5}, { 3, 1, 5}, { 3, 1, 5}, { 3, 1, 5}, - { 2, 2, 5}, { 2, 2, 5}, { 2, 2, 5}, { 2, 2, 5}, - { 2, 2, 5}, { 2, 2, 5}, { 2, 2, 5}, { 2, 2, 5}, - { 4, 1, 5}, { 4, 1, 5}, { 4, 1, 5}, { 4, 1, 5}, - { 4, 1, 5}, { 4, 1, 5}, { 4, 1, 5}, { 4, 1, 5}, - { 2, 1, 3}, { 2, 1, 3}, { 2, 1, 3}, { 2, 1, 3}, - { 2, 1, 3}, { 2, 1, 3}, { 2, 1, 3}, { 2, 1, 3}, - { 2, 1, 3}, { 2, 1, 3}, { 2, 1, 3}, { 2, 1, 3}, - { 2, 1, 3}, { 2, 1, 3}, { 2, 1, 3}, { 2, 1, 3}, - { 2, 1, 3}, { 2, 1, 3}, { 2, 1, 3}, { 2, 1, 3}, - { 2, 1, 3}, { 2, 1, 3}, { 2, 1, 3}, { 2, 1, 3}, - { 2, 1, 3}, { 2, 1, 3}, { 2, 1, 3}, { 2, 1, 3}, - { 2, 1, 3}, { 2, 1, 3}, { 2, 1, 3}, { 2, 1, 3}, - {129, 0, 4}, {129, 0, 4}, {129, 0, 4}, {129, 0, 4}, - {129, 0, 4}, {129, 0, 4}, {129, 0, 4}, {129, 0, 4}, - {129, 0, 4}, {129, 0, 4}, {129, 0, 4}, {129, 0, 4}, - {129, 0, 4}, {129, 0, 4}, {129, 0, 4}, {129, 0, 4}, - { 1, 3, 4}, { 1, 3, 4}, { 1, 3, 4}, { 1, 3, 4}, - { 1, 3, 4}, { 1, 3, 4}, { 1, 3, 4}, { 1, 3, 4}, - { 1, 3, 4}, { 1, 3, 4}, { 1, 3, 4}, { 1, 3, 4}, - { 1, 3, 4}, { 1, 3, 4}, { 1, 3, 4}, { 1, 3, 4}, - { 1, 1, 2}, { 1, 1, 2}, { 1, 1, 2}, { 1, 1, 2}, - { 1, 1, 2}, { 1, 1, 2}, { 1, 1, 2}, { 1, 1, 2}, - { 1, 1, 2}, { 1, 1, 2}, { 1, 1, 2}, { 1, 1, 2}, - { 1, 1, 2}, { 1, 1, 2}, { 1, 1, 2}, { 1, 1, 2}, - { 1, 1, 2}, { 1, 1, 2}, { 1, 1, 2}, { 1, 1, 2}, - { 1, 1, 2}, { 1, 1, 2}, { 1, 1, 2}, { 1, 1, 2}, - { 1, 1, 2}, { 1, 1, 2}, { 1, 1, 2}, { 1, 1, 2}, - { 1, 1, 2}, { 1, 1, 2}, { 1, 1, 2}, { 1, 1, 2}, - { 1, 1, 2}, { 1, 1, 2}, { 1, 1, 2}, { 1, 1, 2}, - { 1, 1, 2}, { 1, 1, 2}, { 1, 1, 2}, { 1, 1, 2}, - { 1, 1, 2}, { 1, 1, 2}, { 1, 1, 2}, { 1, 1, 2}, - { 1, 1, 2}, { 1, 1, 2}, { 1, 1, 2}, { 1, 1, 2}, - { 1, 1, 2}, { 1, 1, 2}, { 1, 1, 2}, { 1, 1, 2}, - { 1, 1, 2}, { 1, 1, 2}, { 1, 1, 2}, { 1, 1, 2}, - { 1, 1, 2}, { 1, 1, 2}, { 1, 1, 2}, { 1, 1, 2}, - { 1, 1, 2}, { 1, 1, 2}, { 1, 1, 2}, { 1, 1, 2}, - { 1, 2, 3}, { 1, 2, 3}, { 1, 2, 3}, { 1, 2, 3}, - { 1, 2, 3}, { 1, 2, 3}, { 1, 2, 3}, { 1, 2, 3}, - { 1, 2, 3}, { 1, 2, 3}, { 1, 2, 3}, { 1, 2, 3}, - { 1, 2, 3}, { 1, 2, 3}, { 1, 2, 3}, { 1, 2, 3}, - { 1, 2, 3}, { 1, 2, 3}, { 1, 2, 3}, { 1, 2, 3}, - { 1, 2, 3}, { 1, 2, 3}, { 1, 2, 3}, { 1, 2, 3}, - { 1, 2, 3}, { 1, 2, 3}, { 1, 2, 3}, { 1, 2, 3}, - { 1, 2, 3}, { 1, 2, 3}, { 1, 2, 3}, { 1, 2, 3}, - { 1, 4, 5}, { 1, 4, 5}, { 1, 4, 5}, { 1, 4, 5}, - { 1, 4, 5}, { 1, 4, 5}, { 1, 4, 5}, { 1, 4, 5}, - { 1, 5, 5}, { 1, 5, 5}, { 1, 5, 5}, { 1, 5, 5}, - { 1, 5, 5}, { 1, 5, 5}, { 1, 5, 5}, { 1, 5, 5}, - { 10, 1, 7}, { 10, 1, 7}, { 2, 3, 7}, { 2, 3, 7}, - { 11, 1, 7}, { 11, 1, 7}, { 1, 8, 7}, { 1, 8, 7}, - { 1, 9, 7}, { 1, 9, 7}, { 1,12, 8}, { 1,13, 8}, - { 3, 3, 8}, { 5, 2, 8}, { 1,14, 8}, { 1,15, 8} -}; - - -static const MBAtab MBA_5 [] = { - {6, 5}, {5, 5}, {4, 4}, {4, 4}, {3, 4}, {3, 4}, - {2, 3}, {2, 3}, {2, 3}, {2, 3}, {1, 3}, {1, 3}, {1, 3}, {1, 3}, - {0, 1}, {0, 1}, {0, 1}, {0, 1}, {0, 1}, {0, 1}, {0, 1}, {0, 1}, - {0, 1}, {0, 1}, {0, 1}, {0, 1}, {0, 1}, {0, 1}, {0, 1}, {0, 1} -}; - -static const MBAtab MBA_11 [] = { - {32, 11}, {31, 11}, {30, 11}, {29, 11}, - {28, 11}, {27, 11}, {26, 11}, {25, 11}, - {24, 11}, {23, 11}, {22, 11}, {21, 11}, - {20, 10}, {20, 10}, {19, 10}, {19, 10}, - {18, 10}, {18, 10}, {17, 10}, {17, 10}, - {16, 10}, {16, 10}, {15, 10}, {15, 10}, - {14, 8}, {14, 8}, {14, 8}, {14, 8}, - {14, 8}, {14, 8}, {14, 8}, {14, 8}, - {13, 8}, {13, 8}, {13, 8}, {13, 8}, - {13, 8}, {13, 8}, {13, 8}, {13, 8}, - {12, 8}, {12, 8}, {12, 8}, {12, 8}, - {12, 8}, {12, 8}, {12, 8}, {12, 8}, - {11, 8}, {11, 8}, {11, 8}, {11, 8}, - {11, 8}, {11, 8}, {11, 8}, {11, 8}, - {10, 8}, {10, 8}, {10, 8}, {10, 8}, - {10, 8}, {10, 8}, {10, 8}, {10, 8}, - { 9, 8}, { 9, 8}, { 9, 8}, { 9, 8}, - { 9, 8}, { 9, 8}, { 9, 8}, { 9, 8}, - { 8, 7}, { 8, 7}, { 8, 7}, { 8, 7}, - { 8, 7}, { 8, 7}, { 8, 7}, { 8, 7}, - { 8, 7}, { 8, 7}, { 8, 7}, { 8, 7}, - { 8, 7}, { 8, 7}, { 8, 7}, { 8, 7}, - { 7, 7}, { 7, 7}, { 7, 7}, { 7, 7}, - { 7, 7}, { 7, 7}, { 7, 7}, { 7, 7}, - { 7, 7}, { 7, 7}, { 7, 7}, { 7, 7}, - { 7, 7}, { 7, 7}, { 7, 7}, { 7, 7} -}; diff --git a/src/libmpeg2/xine_mpeg2_decoder.c b/src/libmpeg2/xine_mpeg2_decoder.c deleted file mode 100644 index 3a3e28452..000000000 --- a/src/libmpeg2/xine_mpeg2_decoder.c +++ /dev/null @@ -1,169 +0,0 @@ -/* - * Copyright (C) 2000-2003 the xine project - * - * This file is part of xine, a free video player. - * - * xine is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * xine is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110, USA - * - * stuff needed to turn libmpeg2 into a xine decoder plugin - */ - - -#include -#include -#include -#include -#include -#include - -#define LOG_MODULE "mpeg2_decoder" -#define LOG_VERBOSE -/* -#define LOG -*/ - -#include -#include -#include "mpeg2.h" -#include "mpeg2_internal.h" -#include - -typedef struct { - video_decoder_class_t decoder_class; -} mpeg2_class_t; - - -typedef struct mpeg2dec_decoder_s { - video_decoder_t video_decoder; - mpeg2dec_t mpeg2; - mpeg2_class_t *class; - xine_stream_t *stream; -} mpeg2dec_decoder_t; - -static void mpeg2dec_decode_data (video_decoder_t *this_gen, buf_element_t *buf) { - mpeg2dec_decoder_t *this = (mpeg2dec_decoder_t *) this_gen; - - lprintf ("decode_data, flags=0x%08x ...\n", buf->decoder_flags); - - /* handle aspect hints from xine-dvdnav */ - if (buf->decoder_flags & BUF_FLAG_SPECIAL) { - if (buf->decoder_info[1] == BUF_SPECIAL_ASPECT) { - this->mpeg2.force_aspect = buf->decoder_info[2]; - if (buf->decoder_info[3] == 0x1 && buf->decoder_info[2] == 3) - /* letterboxing is denied, we have to do pan&scan */ - this->mpeg2.force_pan_scan = 1; - else - this->mpeg2.force_pan_scan = 0; - } - return; - } - - if (buf->decoder_flags & BUF_FLAG_PREVIEW) { - mpeg2_find_sequence_header (&this->mpeg2, buf->content, buf->content + buf->size); - } else { - - mpeg2_decode_data (&this->mpeg2, buf->content, buf->content + buf->size, - buf->pts); - } - - lprintf ("decode_data...done\n"); -} - -static void mpeg2dec_flush (video_decoder_t *this_gen) { - mpeg2dec_decoder_t *this = (mpeg2dec_decoder_t *) this_gen; - - lprintf ("flush\n"); - - mpeg2_flush (&this->mpeg2); -} - -static void mpeg2dec_reset (video_decoder_t *this_gen) { - mpeg2dec_decoder_t *this = (mpeg2dec_decoder_t *) this_gen; - - mpeg2_reset (&this->mpeg2); -} - -static void mpeg2dec_discontinuity (video_decoder_t *this_gen) { - mpeg2dec_decoder_t *this = (mpeg2dec_decoder_t *) this_gen; - - mpeg2_discontinuity (&this->mpeg2); -} - -static void mpeg2dec_dispose (video_decoder_t *this_gen) { - - mpeg2dec_decoder_t *this = (mpeg2dec_decoder_t *) this_gen; - - lprintf ("close\n"); - - mpeg2_close (&this->mpeg2); - - this->stream->video_out->close(this->stream->video_out, this->stream); - - free (this); -} - -static video_decoder_t *open_plugin (video_decoder_class_t *class_gen, xine_stream_t *stream) { - mpeg2dec_decoder_t *this ; - - this = (mpeg2dec_decoder_t *) xine_xmalloc (sizeof (mpeg2dec_decoder_t)); - - this->video_decoder.decode_data = mpeg2dec_decode_data; - this->video_decoder.flush = mpeg2dec_flush; - this->video_decoder.reset = mpeg2dec_reset; - this->video_decoder.discontinuity = mpeg2dec_discontinuity; - this->video_decoder.dispose = mpeg2dec_dispose; - this->stream = stream; - this->class = (mpeg2_class_t *) class_gen; - this->mpeg2.stream = stream; - - mpeg2_init (&this->mpeg2, stream->video_out); - (stream->video_out->open) (stream->video_out, stream); - this->mpeg2.force_aspect = this->mpeg2.force_pan_scan = 0; - - return &this->video_decoder; -} - -/* - * mpeg2 plugin class - */ -static void *init_plugin (xine_t *xine, void *data) { - - mpeg2_class_t *this; - - this = (mpeg2_class_t *) xine_xmalloc (sizeof (mpeg2_class_t)); - - this->decoder_class.open_plugin = open_plugin; - this->decoder_class.identifier = "mpeg2dec"; - this->decoder_class.description = N_("mpeg2 based video decoder plugin"); - this->decoder_class.dispose = default_video_decoder_class_dispose; - - return this; -} -/* - * exported plugin catalog entry - */ - -static uint32_t supported_types[] = { BUF_VIDEO_MPEG, 0 }; - -static const decoder_info_t dec_info_mpeg2 = { - supported_types, /* supported types */ - 7 /* priority */ -}; - -const plugin_info_t xine_plugin_info[] EXPORTED = { - /* type, API, "name", version, special_info, init_function */ - { PLUGIN_VIDEO_DECODER, 19, "mpeg2", XINE_VERSION_CODE, &dec_info_mpeg2, init_plugin }, - { PLUGIN_NONE, 0, "", 0, NULL, NULL } -}; diff --git a/src/libmpeg2/xvmc.h b/src/libmpeg2/xvmc.h deleted file mode 100644 index 5d61bcf83..000000000 --- a/src/libmpeg2/xvmc.h +++ /dev/null @@ -1,32 +0,0 @@ -/* - * mpeg2_internal.h - * Copyright (C) 2000-2002 Michel Lespinasse - * Copyright (C) 1999-2000 Aaron Holtzman - * - * This file is part of mpeg2dec, a free MPEG-2 video stream decoder. - * See http://libmpeg2.sourceforge.net/ for updates. - * - * mpeg2dec is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * mpeg2dec is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA - */ - -#ifndef _XVMC_H -#include "libmpeg2_accel.h" - -/* slice_xvmc.c */ - -void mpeg2_xvmc_slice (mpeg2dec_accel_t *accel, picture_t * picture, int code, uint8_t * buffer); -void xvmc_setup_scan_ptable( void ); - -#endif diff --git a/src/libmpeg2/xvmc_vld.h b/src/libmpeg2/xvmc_vld.h deleted file mode 100644 index 561d1789d..000000000 --- a/src/libmpeg2/xvmc_vld.h +++ /dev/null @@ -1,32 +0,0 @@ -/* - * Copyright (c) 2004 The Unichrome project. All rights reserved. - * - * This program is free software; you can redistribute it and/or modify it under - * the terms of the GNU General Public License as published by the Free Software - * Foundation; either version 2, or (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, but WITHOUT - * ANY WARRANTIES OR REPRESENTATIONS; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. - * See the GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License along with - * this program; if not, write to the Free Software Foundation, Inc., - * 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. - * - * - */ - -#ifndef _XVMC_VLD_H -#define _XVMC_VLD_H - -#include "accel_xvmc.h" -#include "xvmc.h" - -extern void mpeg2_xxmc_slice( mpeg2dec_accel_t *accel, picture_t *picture, - int code, uint8_t *buffer, uint32_t chunk_size, - uint8_t *chunk_buffer); -extern void mpeg2_xxmc_vld_frame_complete(mpeg2dec_accel_t *accel, picture_t *picture, int code); - - -#endif diff --git a/src/video_dec/Makefile.am b/src/video_dec/Makefile.am index 321d70c1f..91246eb4d 100644 --- a/src/video_dec/Makefile.am +++ b/src/video_dec/Makefile.am @@ -1,3 +1,5 @@ +SUBDIRS = libmpeg2 + include $(top_srcdir)/misc/Makefile.common AM_CFLAGS = $(DEFAULT_OCFLAGS) $(VISIBILITY_FLAG) diff --git a/src/video_dec/libmpeg2/Makefile.am b/src/video_dec/libmpeg2/Makefile.am new file mode 100644 index 000000000..d772f0e09 --- /dev/null +++ b/src/video_dec/libmpeg2/Makefile.am @@ -0,0 +1,32 @@ +include $(top_srcdir)/misc/Makefile.common + +AM_CFLAGS = $(DEFAULT_OCFLAGS) $(VISIBILITY_FLAG) +AM_LDFLAGS = $(xineplug_ldflags) + +noinst_HEADERS = vlc.h mpeg2.h xvmc.h xvmc_vld.h mpeg2_internal.h idct_mlib.h vis.h \ + libmpeg2_accel.h + +xineplug_LTLIBRARIES = xineplug_decode_mpeg2.la + +xineplug_decode_mpeg2_la_SOURCES = \ + cpu_state.c \ + decode.c \ + header.c \ + idct.c \ + idct_altivec.c \ + idct_mlib.c \ + idct_mmx.c \ + motion_comp.c \ + motion_comp_altivec.c \ + motion_comp_mmx.c \ + motion_comp_mlib.c \ + motion_comp_vis.c \ + slice.c \ + slice_xvmc.c \ + slice_xvmc_vld.c \ + stats.c \ + xine_mpeg2_decoder.c \ + libmpeg2_accel.c + +xineplug_decode_mpeg2_la_LIBADD = $(MLIB_LIBS) $(XINE_LIB) -lm +xineplug_decode_mpeg2_la_CFLAGS = $(AM_CFLAGS) $(MLIB_CFLAGS) diff --git a/src/video_dec/libmpeg2/cpu_state.c b/src/video_dec/libmpeg2/cpu_state.c new file mode 100644 index 000000000..12963644c --- /dev/null +++ b/src/video_dec/libmpeg2/cpu_state.c @@ -0,0 +1,183 @@ +/* + * cpu_state.c + * Copyright (C) 2000-2003 Michel Lespinasse + * Copyright (C) 1999-2000 Aaron Holtzman + * + * This file is part of mpeg2dec, a free MPEG-2 video stream decoder. + * See http://libmpeg2.sourceforge.net/ for updates. + * + * mpeg2dec is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * mpeg2dec is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +#include "config.h" + +#include +#include + +#include "mpeg2_internal.h" +#include + +void (* mpeg2_cpu_state_save) (cpu_state_t * state) = NULL; +void (* mpeg2_cpu_state_restore) (cpu_state_t * state) = NULL; + +#if defined(ARCH_X86) || defined(ARCH_X86_64) +static void state_restore_mmx (cpu_state_t * state) +{ + emms (); +} +#endif + +#if defined (ARCH_PPC) && defined (ENABLE_ALTIVEC) + +#ifndef HOST_OS_DARWIN + +static void state_save_altivec (cpu_state_t * state) +{ + asm (" \n" + " li %r9, 16 \n" + " stvx %v20, 0, %r3 \n" + " li %r11, 32 \n" + " stvx %v21, %r9, %r3 \n" + " li %r9, 48 \n" + " stvx %v22, %r11, %r3 \n" + " li %r11, 64 \n" + " stvx %v23, %r9, %r3 \n" + " li %r9, 80 \n" + " stvx %v24, %r11, %r3 \n" + " li %r11, 96 \n" + " stvx %v25, %r9, %r3 \n" + " li %r9, 112 \n" + " stvx %v26, %r11, %r3 \n" + " li %r11, 128 \n" + " stvx %v27, %r9, %r3 \n" + " li %r9, 144 \n" + " stvx %v28, %r11, %r3 \n" + " li %r11, 160 \n" + " stvx %v29, %r9, %r3 \n" + " li %r9, 176 \n" + " stvx %v30, %r11, %r3 \n" + " stvx %v31, %r9, %r3 \n" + ); +} + +static void state_restore_altivec (cpu_state_t * state) +{ + asm (" \n" + " li %r9, 16 \n" + " lvx %v20, 0, %r3 \n" + " li %r11, 32 \n" + " lvx %v21, %r9, %r3 \n" + " li %r9, 48 \n" + " lvx %v22, %r11, %r3 \n" + " li %r11, 64 \n" + " lvx %v23, %r9, %r3 \n" + " li %r9, 80 \n" + " lvx %v24, %r11, %r3 \n" + " li %r11, 96 \n" + " lvx %v25, %r9, %r3 \n" + " li %r9, 112 \n" + " lvx %v26, %r11, %r3 \n" + " li %r11, 128 \n" + " lvx %v27, %r9, %r3 \n" + " li %r9, 144 \n" + " lvx %v28, %r11, %r3 \n" + " li %r11, 160 \n" + " lvx %v29, %r9, %r3 \n" + " li %r9, 176 \n" + " lvx %v30, %r11, %r3 \n" + " lvx %v31, %r9, %r3 \n" + ); +} + +#else /* HOST_OS_DARWIN */ + +#define LI(a,b) "li r" #a "," #b "\n\t" +#define STVX0(a,b,c) "stvx v" #a ",0,r" #c "\n\t" +#define STVX(a,b,c) "stvx v" #a ",r" #b ",r" #c "\n\t" +#define LVX0(a,b,c) "lvx v" #a ",0,r" #c "\n\t" +#define LVX(a,b,c) "lvx v" #a ",r" #b ",r" #c "\n\t" + +static void state_save_altivec (cpu_state_t * state) +{ + asm (LI (9, 16) + STVX0 (20, 0, 3) + LI (11, 32) + STVX (21, 9, 3) + LI (9, 48) + STVX (22, 11, 3) + LI (11, 64) + STVX (23, 9, 3) + LI (9, 80) + STVX (24, 11, 3) + LI (11, 96) + STVX (25, 9, 3) + LI (9, 112) + STVX (26, 11, 3) + LI (11, 128) + STVX (27, 9, 3) + LI (9, 144) + STVX (28, 11, 3) + LI (11, 160) + STVX (29, 9, 3) + LI (9, 176) + STVX (30, 11, 3) + STVX (31, 9, 3)); +} + +static void state_restore_altivec (cpu_state_t * state) +{ + asm (LI (9, 16) + LVX0 (20, 0, 3) + LI (11, 32) + LVX (21, 9, 3) + LI (9, 48) + LVX (22, 11, 3) + LI (11, 64) + LVX (23, 9, 3) + LI (9, 80) + LVX (24, 11, 3) + LI (11, 96) + LVX (25, 9, 3) + LI (9, 112) + LVX (26, 11, 3) + LI (11, 128) + LVX (27, 9, 3) + LI (9, 144) + LVX (28, 11, 3) + LI (11, 160) + LVX (29, 9, 3) + LI (9, 176) + LVX (30, 11, 3) + LVX (31, 9, 3)); +} +#endif /* HOST_OS_DARWIN */ + +#endif /* defined (ARCH_PPC) && defined (ENABLE_ALTIVEC) */ + +void mpeg2_cpu_state_init (uint32_t mm_accel) +{ +#if defined(ARCH_X86) || defined(ARCH_X86_64) + if (mm_accel & MM_ACCEL_X86_MMX) { + mpeg2_cpu_state_restore = state_restore_mmx; + } +#endif +#if defined (ARCH_PPC) && defined (ENABLE_ALTIVEC) + if (mm_accel & MM_ACCEL_PPC_ALTIVEC) { + mpeg2_cpu_state_save = state_save_altivec; + mpeg2_cpu_state_restore = state_restore_altivec; + } +#endif +} + diff --git a/src/video_dec/libmpeg2/decode.c b/src/video_dec/libmpeg2/decode.c new file mode 100644 index 000000000..145d5f58b --- /dev/null +++ b/src/video_dec/libmpeg2/decode.c @@ -0,0 +1,1009 @@ +/* + * decode.c + * Copyright (C) 2000-2002 Michel Lespinasse + * Copyright (C) 1999-2000 Aaron Holtzman + * + * This file is part of mpeg2dec, a free MPEG-2 video stream decoder. + * See http://libmpeg2.sourceforge.net/ for updates. + * + * mpeg2dec is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * mpeg2dec is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + * + * xine-specific version by G. Bartsch + * + */ + +#include "config.h" +#include +#include /* memcpy/memset, try to remove */ +#include +#include +#include + +#define LOG_MODULE "decode" +#define LOG_VERBOSE +/* +#define LOG +*/ + +#include +#include +#include "mpeg2.h" +#include "mpeg2_internal.h" +#include +#include "libmpeg2_accel.h" + +/* +#define LOG_PAN_SCAN +*/ + +/* #define BUFFER_SIZE (224 * 1024) */ +#define BUFFER_SIZE (1194 * 1024) /* new buffer size for mpeg2dec 0.2.1 */ + +static void process_userdata(mpeg2dec_t *mpeg2dec, uint8_t *buffer); + +void mpeg2_init (mpeg2dec_t * mpeg2dec, + xine_video_port_t * output) +{ + static int do_init = 1; + uint32_t mm_accel; + + if (do_init) { + do_init = 0; + mm_accel = xine_mm_accel(); + mpeg2_cpu_state_init (mm_accel); + mpeg2_idct_init (mm_accel); + mpeg2_mc_init (mm_accel); + libmpeg2_accel_scan(&mpeg2dec->accel, mpeg2_scan_norm, mpeg2_scan_alt); + } + + if( !mpeg2dec->chunk_buffer ) + mpeg2dec->chunk_buffer = xine_xmalloc_aligned (16, BUFFER_SIZE + 4, + &mpeg2dec->chunk_base); + if( !mpeg2dec->picture ) + mpeg2dec->picture = xine_xmalloc_aligned (16, sizeof (picture_t), + &mpeg2dec->picture_base); + + mpeg2dec->shift = 0xffffff00; + mpeg2dec->new_sequence = 0; + mpeg2dec->is_sequence_needed = 1; + mpeg2dec->is_wait_for_ip_frames = 2; + mpeg2dec->frames_to_drop = 0; + mpeg2dec->drop_frame = 0; + mpeg2dec->in_slice = 0; + mpeg2dec->output = output; + mpeg2dec->chunk_ptr = mpeg2dec->chunk_buffer; + mpeg2dec->code = 0xb4; + mpeg2dec->seek_mode = 0; + + /* initialize AFD storage */ + mpeg2dec->afd_value_seen = XINE_VIDEO_AFD_NOT_PRESENT; + mpeg2dec->afd_value_reported = (XINE_VIDEO_AFD_NOT_PRESENT - 1); + + memset (mpeg2dec->picture, 0, sizeof (picture_t)); + + /* initialize substructures */ + mpeg2_header_state_init (mpeg2dec->picture); + + if ( output->get_capabilities(output) & VO_CAP_XXMC) { + printf("libmpeg2: output port has XxMC capability\n"); + mpeg2dec->frame_format = XINE_IMGFMT_XXMC; + } else if( output->get_capabilities(output) & VO_CAP_XVMC_MOCOMP) { + printf("libmpeg2: output port has XvMC capability\n"); + mpeg2dec->frame_format = XINE_IMGFMT_XVMC; + } else { + mpeg2dec->frame_format = XINE_IMGFMT_YV12; + } +} + +static inline void get_frame_duration (mpeg2dec_t * mpeg2dec, vo_frame_t *frame) +{ + static const double durations[] = { + 0, /* invalid */ + 3753.75, /* 23.976 fps */ + 3750, /* 24 fps */ + 3600, /* 25 fps */ + 3003, /* 29.97 fps */ + 3000, /* 30 fps */ + 1800, /* 50 fps */ + 1501.5, /* 59.94 fps */ + 1500, /* 60 fps */ + }; + double duration = ((unsigned) mpeg2dec->picture->frame_rate_code > 8u) + ? 0 : durations[mpeg2dec->picture->frame_rate_code]; + + duration = duration * (mpeg2dec->picture->frame_rate_ext_n + 1.0) / + (mpeg2dec->picture->frame_rate_ext_d + 1.0); + + /* this should be used to detect any special rff pattern */ + mpeg2dec->rff_pattern = mpeg2dec->rff_pattern << 1; + mpeg2dec->rff_pattern |= !!frame->repeat_first_field; + + if( ((mpeg2dec->rff_pattern & 0xff) == 0xaa || + (mpeg2dec->rff_pattern & 0xff) == 0x55) && + !mpeg2dec->picture->progressive_sequence ) { + /* special case for ntsc 3:2 pulldown */ + duration *= 5.0 / 4.0; + } + else + { + if( frame->repeat_first_field ) { + if( !mpeg2dec->picture->progressive_sequence && + frame->progressive_frame ) { + /* decoder should output 3 fields, so adjust duration to + count on this extra field time */ + duration *= 3.0 / 2.0; + } else if( mpeg2dec->picture->progressive_sequence ) { + /* for progressive sequences the output should repeat the + frame 1 or 2 times depending on top_field_first flag. */ + duration *= (frame->top_field_first) ? 3 : 2; + } + } + } + + frame->duration = (int) ceil (duration); + _x_stream_info_set(mpeg2dec->stream, XINE_STREAM_INFO_FRAME_DURATION, frame->duration); + /*printf("mpeg2dec: rff=%u\n",frame->repeat_first_field);*/ +} + +static double get_aspect_ratio(mpeg2dec_t *mpeg2dec) +{ + double ratio; + picture_t * picture = mpeg2dec->picture; + double mpeg1_pel_ratio[16] = {1.0 /* forbidden */, + 1.0, 0.6735, 0.7031, 0.7615, 0.8055, 0.8437, 0.8935, 0.9157, + 0.9815, 1.0255, 1.0695, 1.0950, 1.1575, 1.2015, 1.0 /*reserved*/ }; + + /* TODO: For slower machines the value of this function should be computed + * once and cached! + */ + + if( !picture->mpeg1 ) { + /* these hardcoded values are defined on mpeg2 standard for + * aspect ratio. other values are reserved or forbidden. */ + switch(picture->aspect_ratio_information) { + case 2: + ratio = 4.0/3.0; + break; + case 3: + ratio = 16.0/9.0; + break; + case 4: + ratio = 2.11/1.0; + break; + case 1: + default: + ratio = (double)picture->coded_picture_width/(double)picture->coded_picture_height; + break; + } + } else { + /* mpeg1 constants refer to pixel aspect ratio */ + ratio = (double)picture->coded_picture_width/(double)picture->coded_picture_height; + ratio /= mpeg1_pel_ratio[picture->aspect_ratio_information]; + } + + return ratio; +} + +static void remember_metainfo (mpeg2dec_t *mpeg2dec) { + + picture_t * picture = mpeg2dec->picture; + + _x_stream_info_set(mpeg2dec->stream, XINE_STREAM_INFO_VIDEO_WIDTH, picture->display_width); + _x_stream_info_set(mpeg2dec->stream, XINE_STREAM_INFO_VIDEO_HEIGHT, picture->display_height); + _x_stream_info_set(mpeg2dec->stream, XINE_STREAM_INFO_VIDEO_RATIO, + ((double)10000 * get_aspect_ratio(mpeg2dec))); + + switch (mpeg2dec->picture->frame_rate_code) { + case 1: /* 23.976 fps */ + _x_stream_info_set(mpeg2dec->stream, XINE_STREAM_INFO_FRAME_DURATION, 3913); + break; + case 2: /* 24 fps */ + _x_stream_info_set(mpeg2dec->stream, XINE_STREAM_INFO_FRAME_DURATION, 3750); + break; + case 3: /* 25 fps */ + _x_stream_info_set(mpeg2dec->stream, XINE_STREAM_INFO_FRAME_DURATION, 3600); + break; + case 4: /* 29.97 fps */ + _x_stream_info_set(mpeg2dec->stream, XINE_STREAM_INFO_FRAME_DURATION, 3003); + break; + case 5: /* 30 fps */ + _x_stream_info_set(mpeg2dec->stream, XINE_STREAM_INFO_FRAME_DURATION, 3000); + break; + case 6: /* 50 fps */ + _x_stream_info_set(mpeg2dec->stream, XINE_STREAM_INFO_FRAME_DURATION, 1800); + break; + case 7: /* 59.94 fps */ + _x_stream_info_set(mpeg2dec->stream, XINE_STREAM_INFO_FRAME_DURATION, 1525); + break; + case 8: /* 60 fps */ + _x_stream_info_set(mpeg2dec->stream, XINE_STREAM_INFO_FRAME_DURATION, 1509); + break; + default: + /* printf ("invalid/unknown frame rate code : %d \n", + frame->frame_rate_code); */ + _x_stream_info_set(mpeg2dec->stream, XINE_STREAM_INFO_FRAME_DURATION, 3000); + } + + _x_meta_info_set_utf8(mpeg2dec->stream, XINE_META_INFO_VIDEOCODEC, "MPEG (libmpeg2)"); +} + +static inline int parse_chunk (mpeg2dec_t * mpeg2dec, int code, + uint8_t * buffer, int next_code) +{ + picture_t * picture; + int is_frame_done; + double ratio; + + /* wait for sequence_header_code */ + if (mpeg2dec->is_sequence_needed) { + if (code != 0xb3) { + /* printf ("libmpeg2: waiting for sequence header\n"); */ + mpeg2dec->pts = 0; + return 0; + } + } + if (mpeg2dec->is_frame_needed) { + /* printf ("libmpeg2: waiting for frame start\n"); */ + mpeg2dec->pts = 0; + if (mpeg2dec->picture->current_frame) + mpeg2dec->picture->current_frame->bad_frame = 1; + } + + mpeg2_stats (code, buffer); + + picture = mpeg2dec->picture; + is_frame_done = mpeg2dec->in_slice && ((!code) || (code >= 0xb0)); + + if (is_frame_done) + mpeg2dec->in_slice = 0; + + if (is_frame_done && picture->current_frame != NULL) { + + libmpeg2_accel_frame_completion(&mpeg2dec->accel, mpeg2dec->frame_format, + picture, code); + + if (((picture->picture_structure == FRAME_PICTURE) || + (picture->second_field)) ) { + + if (mpeg2dec->drop_frame) + picture->current_frame->bad_frame = 1; + + if (picture->picture_coding_type == B_TYPE) { + if( picture->current_frame && !picture->current_frame->drawn ) { + + /* hack against wrong mpeg1 pts */ + if (picture->mpeg1) + picture->current_frame->pts = 0; + + get_frame_duration(mpeg2dec, picture->current_frame); + mpeg2dec->frames_to_drop = picture->current_frame->draw (picture->current_frame, mpeg2dec->stream); + picture->current_frame->drawn = 1; + } + } else if (picture->forward_reference_frame && !picture->forward_reference_frame->drawn) { + get_frame_duration(mpeg2dec, picture->forward_reference_frame); + mpeg2dec->frames_to_drop = picture->forward_reference_frame->draw (picture->forward_reference_frame, + mpeg2dec->stream); + picture->forward_reference_frame->drawn = 1; + } + } + } + + switch (code) { + case 0x00: /* picture_start_code */ + if (mpeg2_header_picture (picture, buffer)) { + fprintf (stderr, "bad picture header\n"); + abort(); + } + + mpeg2dec->is_frame_needed=0; + + if (!picture->second_field) { + /* find out if we want to skip this frame */ + mpeg2dec->drop_frame = 0; + + /* picture->skip_non_intra_dct = (mpeg2dec->frames_to_drop>0) ; */ + + switch (picture->picture_coding_type) { + case B_TYPE: + + lprintf ("B-Frame\n"); + + if (mpeg2dec->frames_to_drop>1) { + lprintf ("dropping b-frame because frames_to_drop==%d\n", + mpeg2dec->frames_to_drop); + mpeg2dec->drop_frame = 1; + } else if (!picture->forward_reference_frame || picture->forward_reference_frame->bad_frame + || !picture->backward_reference_frame || picture->backward_reference_frame->bad_frame) { +#ifdef LOG + printf ("libmpeg2: dropping b-frame because ref is bad ("); + if (picture->forward_reference_frame) + printf ("fw ref frame %d, bad %d;", picture->forward_reference_frame->id, + picture->forward_reference_frame->bad_frame); + else + printf ("fw ref frame not there;"); + if (picture->backward_reference_frame) + printf ("bw ref frame %d, bad %d)\n", picture->backward_reference_frame->id, + picture->backward_reference_frame->bad_frame); + else + printf ("fw ref frame not there)\n"); +#endif + mpeg2dec->drop_frame = 1; + } else if (mpeg2dec->is_wait_for_ip_frames > 0) { + lprintf("dropping b-frame because refs are invalid\n"); + mpeg2dec->drop_frame = 1; + } + break; + + case P_TYPE: + + lprintf ("P-Frame\n"); + + if (mpeg2dec->frames_to_drop>2) { + mpeg2dec->drop_frame = 1; + lprintf ("dropping p-frame because frames_to_drop==%d\n", + mpeg2dec->frames_to_drop); + } else if (!picture->backward_reference_frame || picture->backward_reference_frame->bad_frame) { + mpeg2dec->drop_frame = 1; +#ifdef LOG + if (!picture->backward_reference_frame) + printf ("libmpeg2: dropping p-frame because no ref frame\n"); + else + printf ("libmpeg2: dropping p-frame because ref %d is bad\n", picture->backward_reference_frame->id); +#endif + } else if (mpeg2dec->is_wait_for_ip_frames > 1) { + lprintf("dropping p-frame because ref is invalid\n"); + mpeg2dec->drop_frame = 1; + } else if (mpeg2dec->is_wait_for_ip_frames) + mpeg2dec->is_wait_for_ip_frames--; + + break; + + case I_TYPE: + lprintf ("I-Frame\n"); + /* for the sake of dvd menus, never drop i-frames + if (mpeg2dec->frames_to_drop>4) { + mpeg2dec->drop_frame = 1; + } + */ + + if (mpeg2dec->is_wait_for_ip_frames) + mpeg2dec->is_wait_for_ip_frames--; + + break; + } + } + + break; + + case 0xb2: /* user data code */ + process_userdata(mpeg2dec, buffer); + break; + + case 0xb3: /* sequence_header_code */ + if (mpeg2_header_sequence (picture, buffer)) { + fprintf (stderr, "bad sequence header\n"); + /* abort(); */ + break; + } + + /* reset AFD value to detect absence */ + mpeg2dec->afd_value_seen = XINE_VIDEO_AFD_NOT_PRESENT; + + /* according to ISO/IEC 13818-2, an extension start code will follow. + * Otherwise the stream follows ISO/IEC 11172-2 which means MPEG1 */ + picture->mpeg1 = (next_code != 0xb5); + + if (mpeg2dec->force_aspect) picture->aspect_ratio_information = mpeg2dec->force_aspect; + + if (mpeg2dec->is_sequence_needed ) { + mpeg2dec->new_sequence = 1; + } + + if (mpeg2dec->is_sequence_needed + || (picture->aspect_ratio_information != picture->saved_aspect_ratio) + || (picture->frame_width != picture->coded_picture_width) + || (picture->frame_height != picture->coded_picture_height)) { + xine_event_t event; + xine_format_change_data_t data; + + remember_metainfo (mpeg2dec); + event.type = XINE_EVENT_FRAME_FORMAT_CHANGE; + event.stream = mpeg2dec->stream; + event.data = &data; + event.data_length = sizeof(data); + data.width = picture->coded_picture_width; + data.height = picture->coded_picture_height; + data.aspect = picture->aspect_ratio_information; + data.pan_scan = mpeg2dec->force_pan_scan; + xine_event_send(mpeg2dec->stream, &event); + + _x_stream_info_set(mpeg2dec->stream,XINE_STREAM_INFO_VIDEO_WIDTH, + picture->display_width); + _x_stream_info_set(mpeg2dec->stream,XINE_STREAM_INFO_VIDEO_HEIGHT, + picture->display_height); + + if (picture->forward_reference_frame && + picture->forward_reference_frame != picture->current_frame && + picture->forward_reference_frame != picture->backward_reference_frame) + picture->forward_reference_frame->free (picture->forward_reference_frame); + + if (picture->backward_reference_frame && + picture->backward_reference_frame != picture->current_frame) + picture->backward_reference_frame->free (picture->backward_reference_frame); + + mpeg2dec->is_sequence_needed = 0; + picture->forward_reference_frame = NULL; + picture->backward_reference_frame = NULL; + + picture->frame_width = picture->coded_picture_width; + picture->frame_height = picture->coded_picture_height; + picture->saved_aspect_ratio = picture->aspect_ratio_information; + } + break; + + case 0xb5: /* extension_start_code */ + if (mpeg2_header_extension (picture, buffer)) { + fprintf (stderr, "bad extension\n"); + abort(); + } + break; + + case 0xb7: /* sequence end code */ +#ifdef LOG_PAN_SCAN + printf ("libmpeg2: sequence end code not handled\n"); +#endif + case 0xb8: /* group of pictures start code */ + if (mpeg2_header_group_of_pictures (picture, buffer)) { + printf ("libmpeg2: bad group of pictures\n"); + abort(); + } + default: + if ((code >= 0xb9) && (code != 0xe4)) { + printf("Not multiplexed? 0x%x\n",code); + } + if (code >= 0xb0) + break; + + /* check for AFD change once per picture */ + if (mpeg2dec->afd_value_reported != mpeg2dec->afd_value_seen) { + /* AFD data should better be stored in current_frame to have it */ + /* ready and synchronous with other data like width or height. */ + /* An AFD change should then be detected when a new frame is emitted */ + /* from the decoder to report the AFD change in display order and not */ + /* in decoding order like it happens below for now. */ + _x_stream_info_set(mpeg2dec->stream, XINE_STREAM_INFO_VIDEO_AFD, mpeg2dec->afd_value_seen); + lprintf ("AFD changed from %d to %d\n", mpeg2dec->afd_value_reported, mpeg2dec->afd_value_seen); + mpeg2dec->afd_value_reported = mpeg2dec->afd_value_seen; + } + + if (!(mpeg2dec->in_slice)) { + mpeg2dec->in_slice = 1; + + if (picture->second_field) { + if (picture->current_frame) + picture->current_frame->field(picture->current_frame, + picture->picture_structure); + else + mpeg2dec->drop_frame = 1; + } else { + int flags = picture->picture_structure; + + if (!picture->mpeg1) flags |= VO_INTERLACED_FLAG; + if (mpeg2dec->force_pan_scan) flags |= VO_PAN_SCAN_FLAG; + if (mpeg2dec->new_sequence) flags |= VO_NEW_SEQUENCE_FLAG; + + if ( picture->current_frame && + picture->current_frame != picture->backward_reference_frame && + picture->current_frame != picture->forward_reference_frame ) { + picture->current_frame->free (picture->current_frame); + } + if (picture->picture_coding_type == B_TYPE) { + ratio = get_aspect_ratio(mpeg2dec); + picture->current_frame = + mpeg2dec->stream->video_out->get_frame (mpeg2dec->stream->video_out, + picture->coded_picture_width, + picture->coded_picture_height, + ratio, + mpeg2dec->frame_format, + flags); + libmpeg2_accel_new_frame( &mpeg2dec->accel, mpeg2dec->frame_format, + picture, ratio, flags); + } else { + ratio = get_aspect_ratio(mpeg2dec); + picture->current_frame = + mpeg2dec->stream->video_out->get_frame (mpeg2dec->stream->video_out, + picture->coded_picture_width, + picture->coded_picture_height, + ratio, + mpeg2dec->frame_format, + flags); + + libmpeg2_accel_new_frame( &mpeg2dec->accel, mpeg2dec->frame_format, + picture, ratio, flags); + + if (picture->forward_reference_frame && + picture->forward_reference_frame != picture->backward_reference_frame) + picture->forward_reference_frame->free (picture->forward_reference_frame); + + picture->forward_reference_frame = + picture->backward_reference_frame; + picture->backward_reference_frame = picture->current_frame; + } + + if(mpeg2dec->new_sequence) + mpeg2dec->new_sequence = + libmpeg2_accel_new_sequence(&mpeg2dec->accel, mpeg2dec->frame_format, + picture); + + picture->current_frame->bad_frame = 1; + picture->current_frame->drawn = 0; + picture->current_frame->pts = mpeg2dec->pts; + picture->current_frame->top_field_first = picture->top_field_first; + picture->current_frame->repeat_first_field = picture->repeat_first_field; + picture->current_frame->progressive_frame = picture->progressive_frame; + picture->current_frame->crop_right = picture->coded_picture_width - picture->display_width; + picture->current_frame->crop_bottom = picture->coded_picture_height - picture->display_height; + + switch( picture->picture_coding_type ) { + case I_TYPE: + picture->current_frame->picture_coding_type = XINE_PICT_I_TYPE; + break; + case P_TYPE: + picture->current_frame->picture_coding_type = XINE_PICT_P_TYPE; + break; + case B_TYPE: + picture->current_frame->picture_coding_type = XINE_PICT_B_TYPE; + break; + case D_TYPE: + picture->current_frame->picture_coding_type = XINE_PICT_D_TYPE; + break; + } + + lprintf ("decoding frame %d, type %s\n", + picture->current_frame->id, picture->picture_coding_type == I_TYPE ? "I" : + picture->picture_coding_type == P_TYPE ? "P" : "B"); + mpeg2dec->pts = 0; + /*printf("Starting to decode frame %d\n",picture->current_frame->id);*/ + } + } + + if (!mpeg2dec->drop_frame && picture->current_frame != NULL) { +#ifdef DEBUG_LOG + printf("slice target %08x past %08x future %08x\n",picture->current_frame,picture->forward_reference_frame,picture->backward_reference_frame); + fflush(stdout); +#endif + libmpeg2_accel_slice(&mpeg2dec->accel, picture, code, buffer, mpeg2dec->chunk_size, + mpeg2dec->chunk_buffer); + + if( picture->v_offset > picture->limit_y || + picture->v_offset + 16 > picture->display_height ) { + picture->current_frame->bad_frame = 0; + } + } + } + + /* printf ("libmpeg2: parse_chunk %d completed\n", code); */ + return is_frame_done; +} + +static inline int find_start_code (mpeg2dec_t * mpeg2dec, + uint8_t ** current, uint8_t * limit) +{ + uint8_t * p; + + if (*current >= limit) + return 0; + if (mpeg2dec->shift == 0x00000100) + return 1; + + mpeg2dec->shift = (mpeg2dec->shift | *(*current)++) << 8; + + if (*current >= limit) + return 0; + if (mpeg2dec->shift == 0x00000100) + return 1; + + mpeg2dec->shift = (mpeg2dec->shift | *(*current)++) << 8; + + if (*current >= limit) + return 0; + if (mpeg2dec->shift == 0x00000100) + return 1; + + limit--; + + if (*current >= limit) { + mpeg2dec->shift = (mpeg2dec->shift | *(*current)++) << 8; + return 0; + } + + p = *current; + + while (p < limit && (p = (uint8_t *)memchr(p, 0x01, limit - p))) { + if (p[-2] || p[-1]) + p += 3; + else { + *current = ++p; + return 1; + } + } + + *current = ++limit; + p = limit - 3; + mpeg2dec->shift = (mpeg2dec->shift | *p++) << 8; + mpeg2dec->shift = (mpeg2dec->shift | *p++) << 8; + mpeg2dec->shift = (mpeg2dec->shift | *p++) << 8; + + return 0; +} + +static inline uint8_t * copy_chunk (mpeg2dec_t * mpeg2dec, + uint8_t * current, uint8_t * end) +{ + uint8_t * limit; + uint8_t * data = current; + int found, bite; + + /* sequence end code 0xb7 doesn't have any data and there might be the case + * that no start code will follow this code for quite some time (e. g. in case + * of a still image. + * Therefore, return immediately with a chunk_size of 0. Setting code to 0xb4 + * will eat up any trailing garbage next time. + */ + if (mpeg2dec->code == 0xb7) { + mpeg2dec->code = 0xb4; + mpeg2dec->chunk_size = 0; + return current; + } + + limit = current + (mpeg2dec->chunk_buffer + BUFFER_SIZE - mpeg2dec->chunk_ptr); + if (limit > end) + limit = end; + + found = find_start_code(mpeg2dec, ¤t, limit); + bite = current - data; + if (bite) { + xine_fast_memcpy(mpeg2dec->chunk_ptr, data, bite); + mpeg2dec->chunk_ptr += bite; + } + + if (found) { + mpeg2dec->code = *current++; + mpeg2dec->chunk_size = mpeg2dec->chunk_ptr - mpeg2dec->chunk_buffer - 3; + mpeg2dec->chunk_ptr = mpeg2dec->chunk_buffer; + mpeg2dec->shift = 0xffffff00; + return current; + } + + if (current == end) + return NULL; + + /* we filled the chunk buffer without finding a start code */ + mpeg2dec->code = 0xb4; /* sequence_error_code */ + mpeg2dec->chunk_ptr = mpeg2dec->chunk_buffer; + return current; +} + +int mpeg2_decode_data (mpeg2dec_t * mpeg2dec, uint8_t * current, uint8_t * end, + uint64_t pts) +{ + int ret; + uint8_t code; + + ret = 0; + if (mpeg2dec->seek_mode) { + mpeg2dec->chunk_ptr = mpeg2dec->chunk_buffer; + mpeg2dec->code = 0xb4; + mpeg2dec->seek_mode = 0; + mpeg2dec->shift = 0xffffff00; + mpeg2dec->is_frame_needed = 1; + } + + if (pts) + mpeg2dec->pts = pts; + + while (current != end || mpeg2dec->code == 0xb7) { + code = mpeg2dec->code; + current = copy_chunk (mpeg2dec, current, end); + if (current == NULL) + break; + ret += parse_chunk (mpeg2dec, code, mpeg2dec->chunk_buffer, mpeg2dec->code); + } + + libmpeg2_accel_frame_completion(&mpeg2dec->accel, mpeg2dec->frame_format, + mpeg2dec->picture, 0xff); + + return ret; +} + +void mpeg2_discontinuity (mpeg2dec_t * mpeg2dec) { + picture_t *picture = mpeg2dec->picture; + + if( !picture ) + return; + + mpeg2dec->in_slice = 0; + mpeg2dec->pts = 0; + if ( picture->current_frame ) + picture->current_frame->pts = 0; + if ( picture->forward_reference_frame ) + picture->forward_reference_frame->pts = 0; + if ( picture->backward_reference_frame ) + picture->backward_reference_frame->pts = 0; + + libmpeg2_accel_discontinuity(&mpeg2dec->accel, mpeg2dec->frame_format, picture); +} + +void mpeg2_reset (mpeg2dec_t * mpeg2dec) { + + picture_t *picture = mpeg2dec->picture; + + if( !picture ) + return; + + mpeg2_discontinuity(mpeg2dec); + + if( !picture->mpeg1 ) { + mpeg2dec->is_wait_for_ip_frames = 2; + + /* mark current frames as bad so they won't make to screen */ + if ( picture->current_frame ) + picture->current_frame->bad_frame=1; + if (picture->forward_reference_frame ) + picture->forward_reference_frame->bad_frame=1; + if (picture->backward_reference_frame) + picture->backward_reference_frame->bad_frame=1; + + } else { + /* to free reference frames one also needs to fix slice.c to + * abort when they are NULL. unfortunately it seems to break + * DVD menus. + * + * ...so let's do this for mpeg-1 only :) + */ + if ( picture->current_frame && + picture->current_frame != picture->backward_reference_frame && + picture->current_frame != picture->forward_reference_frame ) + picture->current_frame->free (picture->current_frame); + picture->current_frame = NULL; + + if (picture->forward_reference_frame && + picture->forward_reference_frame != picture->backward_reference_frame) + picture->forward_reference_frame->free (picture->forward_reference_frame); + picture->forward_reference_frame = NULL; + + if (picture->backward_reference_frame) + picture->backward_reference_frame->free (picture->backward_reference_frame); + picture->backward_reference_frame = NULL; + } + + mpeg2dec->in_slice = 0; + mpeg2dec->seek_mode = 1; + +} + +void mpeg2_flush (mpeg2dec_t * mpeg2dec) { + + picture_t *picture = mpeg2dec->picture; + + if (!picture) + return; + + if (picture->current_frame && !picture->current_frame->drawn && + !picture->current_frame->bad_frame) { + + lprintf ("blasting out current frame %d on flush\n", + picture->current_frame->id); + + picture->current_frame->drawn = 1; + get_frame_duration(mpeg2dec, picture->current_frame); + + picture->current_frame->pts = 0; + picture->current_frame->draw(picture->current_frame, mpeg2dec->stream); + } + +} + +void mpeg2_close (mpeg2dec_t * mpeg2dec) +{ + picture_t *picture = mpeg2dec->picture; + + /* + { + static uint8_t finalizer[] = {0,0,1,0xb4}; + mpeg2_decode_data (mpeg2dec, finalizer, finalizer+4, 0); + } + */ + + /* + dont remove any picture->*->free() below. doing so will cause buffer + leak, and we only have about 15 of them. + */ + + if ( picture->current_frame ) { + if( !picture->current_frame->drawn ) { + lprintf ("blasting out current frame on close\n"); + picture->current_frame->pts = 0; + get_frame_duration(mpeg2dec, picture->current_frame); + picture->current_frame->draw (picture->current_frame, mpeg2dec->stream); + picture->current_frame->drawn = 1; + } + + if( picture->current_frame != picture->backward_reference_frame && + picture->current_frame != picture->forward_reference_frame ) { + picture->current_frame->free (picture->current_frame); + } + picture->current_frame = NULL; + } + + if (picture->forward_reference_frame && + picture->forward_reference_frame != picture->backward_reference_frame) { + picture->forward_reference_frame->free (picture->forward_reference_frame); + picture->forward_reference_frame = NULL; + } + + if (picture->backward_reference_frame) { + if( !picture->backward_reference_frame->drawn) { + lprintf ("blasting out backward reference frame on close\n"); + picture->backward_reference_frame->pts = 0; + get_frame_duration(mpeg2dec, picture->backward_reference_frame); + picture->backward_reference_frame->draw (picture->backward_reference_frame, mpeg2dec->stream); + picture->backward_reference_frame->drawn = 1; + } + picture->backward_reference_frame->free (picture->backward_reference_frame); + picture->backward_reference_frame = NULL; + } + + if ( mpeg2dec->chunk_buffer ) { + free (mpeg2dec->chunk_base); + mpeg2dec->chunk_buffer = NULL; + } + + if ( mpeg2dec->picture ) { + free (mpeg2dec->picture_base); + mpeg2dec->picture = NULL; + } + + if ( mpeg2dec->cc_dec) { + /* dispose the closed caption decoder */ + mpeg2dec->cc_dec->dispose(mpeg2dec->cc_dec); + mpeg2dec->cc_dec = NULL; + } +} + +void mpeg2_find_sequence_header (mpeg2dec_t * mpeg2dec, + uint8_t * current, uint8_t * end){ + + uint8_t code, next_code; + picture_t *picture = mpeg2dec->picture; + + mpeg2dec->seek_mode = 1; + + while (current != end) { + code = mpeg2dec->code; + current = copy_chunk (mpeg2dec, current, end); + if (current == NULL) + return ; + next_code = mpeg2dec->code; + + /* printf ("looking for sequence header... %02x\n", code); */ + + mpeg2_stats (code, mpeg2dec->chunk_buffer); + + if (code == 0xb3) { /* sequence_header_code */ + if (mpeg2_header_sequence (picture, mpeg2dec->chunk_buffer)) { + printf ("libmpeg2: bad sequence header\n"); + continue; + } + + /* according to ISO/IEC 13818-2, an extension start code will follow. + * Otherwise the stream follows ISO/IEC 11172-2 which means MPEG1 */ + picture->mpeg1 = (next_code != 0xb5); + + if (mpeg2dec->force_aspect) picture->aspect_ratio_information = mpeg2dec->force_aspect; + + if (mpeg2dec->is_sequence_needed) { + xine_event_t event; + xine_format_change_data_t data; + + mpeg2dec->new_sequence = 1; + + mpeg2dec->is_sequence_needed = 0; + picture->frame_width = picture->coded_picture_width; + picture->frame_height = picture->coded_picture_height; + + remember_metainfo (mpeg2dec); + + event.type = XINE_EVENT_FRAME_FORMAT_CHANGE; + event.stream = mpeg2dec->stream; + event.data = &data; + event.data_length = sizeof(data); + data.width = picture->coded_picture_width; + data.height = picture->coded_picture_height; + data.aspect = picture->aspect_ratio_information; + data.pan_scan = mpeg2dec->force_pan_scan; + xine_event_send(mpeg2dec->stream, &event); + + _x_stream_info_set(mpeg2dec->stream, XINE_STREAM_INFO_VIDEO_WIDTH, + picture->display_width); + _x_stream_info_set(mpeg2dec->stream, XINE_STREAM_INFO_VIDEO_HEIGHT, + picture->display_height); + } + } else if (code == 0xb5) { /* extension_start_code */ + if (mpeg2_header_extension (picture, mpeg2dec->chunk_buffer)) { + printf ("libmpeg2: bad extension\n"); + continue ; + } + } + } +} + +/* Find the end of the userdata field in an MPEG-2 stream */ +static uint8_t *find_end(uint8_t *buffer) +{ + uint8_t *current = buffer; + while(1) { + if (current[0] == 0 && current[1] == 0 && current[2] == 1) + break; + current++; + } + return current; +} + +static void process_userdata(mpeg2dec_t *mpeg2dec, uint8_t *buffer) +{ + /* check if user data denotes closed captions */ + if (buffer[0] == 'C' && buffer[1] == 'C') { + + if (!mpeg2dec->cc_dec) { + xine_event_t event; + xine_format_change_data_t data; + + /* open the closed caption decoder first */ + mpeg2dec->cc_dec = _x_get_spu_decoder(mpeg2dec->stream, (BUF_SPU_CC >> 16) & 0xff); + + /* send a frame format event so that the CC decoder knows the initial image size */ + event.type = XINE_EVENT_FRAME_FORMAT_CHANGE; + event.stream = mpeg2dec->stream; + event.data = &data; + event.data_length = sizeof(data); + data.width = mpeg2dec->picture->coded_picture_width; + data.height = mpeg2dec->picture->coded_picture_height; + data.aspect = mpeg2dec->picture->aspect_ratio_information; + data.pan_scan = mpeg2dec->force_pan_scan; + xine_event_send(mpeg2dec->stream, &event); + + _x_stream_info_set(mpeg2dec->stream, XINE_STREAM_INFO_VIDEO_WIDTH, + mpeg2dec->picture->display_width); + _x_stream_info_set(mpeg2dec->stream, XINE_STREAM_INFO_VIDEO_HEIGHT, + mpeg2dec->picture->display_height); + } + + if (mpeg2dec->cc_dec) { + buf_element_t buf; + + buf.type = BUF_SPU_CC; + buf.content = &buffer[2]; + buf.pts = mpeg2dec->pts; + buf.size = find_end(buffer) - &buffer[2]; + buf.decoder_flags = 0; + + mpeg2dec->cc_dec->decode_data(mpeg2dec->cc_dec, &buf); + } + } + /* check Active Format Description ETSI TS 101 154 V1.5.1 */ + else if (buffer[0] == 0x44 && buffer[1] == 0x54 && buffer[2] == 0x47 && buffer[3] == 0x31) + mpeg2dec->afd_value_seen = (buffer[4] & 0x40) ? (buffer[5] & 0x0f) : XINE_VIDEO_AFD_NOT_PRESENT; +} diff --git a/src/video_dec/libmpeg2/header.c b/src/video_dec/libmpeg2/header.c new file mode 100644 index 000000000..12ba0ff8a --- /dev/null +++ b/src/video_dec/libmpeg2/header.c @@ -0,0 +1,411 @@ +/* + * header.c + * Copyright (C) 2000-2002 Michel Lespinasse + * Copyright (C) 1999-2000 Aaron Holtzman + * + * This file is part of mpeg2dec, a free MPEG-2 video stream decoder. + * See http://libmpeg2.sourceforge.net/ for updates. + * + * mpeg2dec is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * mpeg2dec is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +/* +#define LOG_PAN_SCAN +*/ + +#include "config.h" + +#include /* For printf debugging */ +#include + +#include "mpeg2_internal.h" +#include + +/* default intra quant matrix, in zig-zag order */ +static const uint8_t default_intra_quantizer_matrix[64] ATTR_ALIGN(16) = { + 8, + 16, 16, + 19, 16, 19, + 22, 22, 22, 22, + 22, 22, 26, 24, 26, + 27, 27, 27, 26, 26, 26, + 26, 27, 27, 27, 29, 29, 29, + 34, 34, 34, 29, 29, 29, 27, 27, + 29, 29, 32, 32, 34, 34, 37, + 38, 37, 35, 35, 34, 35, + 38, 38, 40, 40, 40, + 48, 48, 46, 46, + 56, 56, 58, + 69, 69, + 83 +}; + +uint8_t mpeg2_scan_norm[64] ATTR_ALIGN(16) = +{ + /* Zig-Zag scan pattern */ + 0, 1, 8,16, 9, 2, 3,10, + 17,24,32,25,18,11, 4, 5, + 12,19,26,33,40,48,41,34, + 27,20,13, 6, 7,14,21,28, + 35,42,49,56,57,50,43,36, + 29,22,15,23,30,37,44,51, + 58,59,52,45,38,31,39,46, + 53,60,61,54,47,55,62,63 +}; + +uint8_t mpeg2_scan_alt[64] ATTR_ALIGN(16) = +{ + /* Alternate scan pattern */ + 0,8,16,24,1,9,2,10,17,25,32,40,48,56,57,49, + 41,33,26,18,3,11,4,12,19,27,34,42,50,58,35,43, + 51,59,20,28,5,13,6,14,21,29,36,44,52,60,37,45, + 53,61,22,30,7,15,23,31,38,46,54,62,39,47,55,63 +}; + +/* count must be between 1 and 32 */ +static uint32_t get_bits(uint8_t *buffer, uint32_t count, uint32_t *bit_position) { + uint32_t byte_offset; + uint32_t bit_offset; + uint32_t bit_mask; + uint32_t bit_bite; + uint32_t result=0; + if (count == 0) return 0; + do { + byte_offset = *bit_position >> 3; /* Div 8 */ + bit_offset = 8 - (*bit_position & 0x7); /* Bits got 87654321 */ + bit_mask = ((1 << (bit_offset)) - 1); + bit_bite = bit_offset; + if (count < bit_offset) { + bit_mask ^= ((1 << (bit_offset-count)) - 1); + bit_bite = count; + } + /* + printf("Byte=0x%02x Bitmask=0x%04x byte_offset=%u bit_offset=%u bit_byte=%u count=%u\n",buffer[byte_offset], bit_mask, byte_offset, bit_offset, bit_bite,count); + */ + result = (result << bit_bite) | ((buffer[byte_offset] & bit_mask) >> (bit_offset-bit_bite)); + *bit_position+=bit_bite; + count-=bit_bite; + } while ((count > 0) && (byte_offset<50) ); + return result; +} + +void mpeg2_header_state_init (picture_t * picture) +{ + picture->scan = mpeg2_scan_norm; + picture->load_intra_quantizer_matrix = 1; + picture->load_non_intra_quantizer_matrix = 1; +} + +int mpeg2_header_sequence (picture_t * picture, uint8_t * buffer) +{ + int width, height; + int i; + + if ((buffer[6] & 0x20) != 0x20) + return 1; /* missing marker_bit */ + + height = (buffer[0] << 16) | (buffer[1] << 8) | buffer[2]; + + picture->display_width = width = (height >> 12); + picture->display_height = height = (height & 0xfff); + + width = (width + 15) & ~15; + height = (height + 15) & ~15; + + if ((width > 1920) || (height > 1152)) + return 1; /* size restrictions for MP@HL */ + + picture->coded_picture_width = width; + picture->coded_picture_height = height; + + /* this is not used by the decoder */ + picture->aspect_ratio_information = buffer[3] >> 4; + picture->frame_rate_code = buffer[3] & 15; + picture->bitrate = (buffer[4]<<10)|(buffer[5]<<2)|(buffer[6]>>6); + + if (buffer[7] & 2) { + for (i = 0; i < 64; i++) + picture->intra_quantizer_matrix[mpeg2_scan_norm[i]] = + (buffer[i+7] << 7) | (buffer[i+8] >> 1); + buffer += 64; + } else + for (i = 0; i < 64; i++) + picture->intra_quantizer_matrix[mpeg2_scan_norm[i]] = + default_intra_quantizer_matrix [i]; + + if (buffer[7] & 1) + for (i = 0; i < 64; i++) + picture->non_intra_quantizer_matrix[mpeg2_scan_norm[i]] = + buffer[i+8]; + else + for (i = 0; i < 64; i++) + picture->non_intra_quantizer_matrix[i] = 16; + picture->load_intra_quantizer_matrix = 1; + picture->load_non_intra_quantizer_matrix = 1; + /* MPEG1 - for testing only */ + picture->mpeg1 = 1; + picture->intra_dc_precision = 0; + picture->frame_pred_frame_dct = 1; + picture->q_scale_type = 0; + picture->concealment_motion_vectors = 0; + /* picture->alternate_scan = 0; */ + picture->picture_structure = FRAME_PICTURE; + /* picture->second_field = 0; */ + + return 0; +} + +static int sequence_extension (picture_t * picture, uint8_t * buffer) +{ + /* check chroma format, size extensions, marker bit */ + if (((buffer[1] & 0x07) != 0x02) || (buffer[2] & 0xe0) || + ((buffer[3] & 0x01) != 0x01)) + return 1; + + /* this is not used by the decoder */ + picture->progressive_sequence = (buffer[1] >> 3) & 1; + + picture->low_delay = buffer[5] & 0x80; + + if (!picture->progressive_sequence) + picture->coded_picture_height = + (picture->coded_picture_height + 31) & ~31; + + + /* printf ("libmpeg2: low_delay : %d\n", picture->low_delay); */ + +/* + printf ("libmpeg2: sequence extension+5 : %08x (%d)\n", + buffer[5], buffer[5] % 0x80); + */ + + picture->frame_rate_ext_n = buffer[5] & 0x31; + picture->frame_rate_ext_d = (buffer[5] >> 2) & 0x03; + + /* MPEG1 - for testing only */ + picture->mpeg1 = 0; + + return 0; +} + +static int quant_matrix_extension (picture_t * picture, uint8_t * buffer) +{ + int i; + + if (buffer[0] & 8) { + for (i = 0; i < 64; i++) + picture->intra_quantizer_matrix[mpeg2_scan_norm[i]] = + (buffer[i] << 5) | (buffer[i+1] >> 3); + buffer += 64; + } + + if (buffer[0] & 4) + for (i = 0; i < 64; i++) + picture->non_intra_quantizer_matrix[mpeg2_scan_norm[i]] = + (buffer[i] << 6) | (buffer[i+1] >> 2); + + return 0; +} + +static int picture_coding_extension (picture_t * picture, uint8_t * buffer) +{ + /* pre subtract 1 for use later in compute_motion_vector */ + picture->f_motion.f_code[0] = (buffer[0] & 15) - 1; + picture->f_motion.f_code[1] = (buffer[1] >> 4) - 1; + picture->b_motion.f_code[0] = (buffer[1] & 15) - 1; + picture->b_motion.f_code[1] = (buffer[2] >> 4) - 1; + + picture->intra_dc_precision = (buffer[2] >> 2) & 3; + picture->picture_structure = buffer[2] & 3; + picture->frame_pred_frame_dct = (buffer[3] >> 6) & 1; + picture->concealment_motion_vectors = (buffer[3] >> 5) & 1; + picture->q_scale_type = (buffer[3] >> 4) & 1; + picture->intra_vlc_format = (buffer[3] >> 3) & 1; + + if (buffer[3] & 4) /* alternate_scan */ + picture->scan = mpeg2_scan_alt; + else + picture->scan = mpeg2_scan_norm; + + /* these are not used by the decoder */ + picture->top_field_first = buffer[3] >> 7; + picture->repeat_first_field = (buffer[3] >> 1) & 1; + picture->progressive_frame = buffer[4] >> 7; + + return 0; +} + +static int sequence_display_extension (picture_t * picture, uint8_t * buffer) { + /* FIXME: implement. */ + uint32_t bit_position; + uint32_t padding; + + bit_position = 0; + padding = get_bits(buffer, 4, &bit_position); + picture->video_format = get_bits(buffer, 3, &bit_position); + picture->colour_description = get_bits(buffer, 1, &bit_position); + if(picture->colour_description) { + picture->colour_primatives = get_bits(buffer, 8, &bit_position); + picture->transfer_characteristics = get_bits(buffer, 8, &bit_position); + picture->matrix_coefficients = get_bits(buffer, 8, &bit_position); + } + picture->display_horizontal_size = get_bits(buffer, 14, &bit_position); + padding = get_bits(buffer, 1, &bit_position); + picture->display_vertical_size = get_bits(buffer, 14, &bit_position); + +#ifdef LOG_PAN_SCAN + printf("Sequence_display_extension\n"); + printf(" video_format: %u\n", picture->video_format); + printf(" colour_description: %u\n", picture->colour_description); + if(picture->colour_description) { + printf(" colour_primatives: %u\n", picture->colour_primatives); + printf(" transfer_characteristics %u\n", picture->transfer_characteristics); + printf(" matrix_coefficients %u\n", picture->matrix_coefficients); + } + printf(" display_horizontal_size %u\n", picture->display_horizontal_size); + printf(" display_vertical_size %u\n", picture->display_vertical_size); +#endif + + return 0; +} + +static int picture_display_extension (picture_t * picture, uint8_t * buffer) { + uint32_t bit_position; + uint32_t padding; + +#ifdef LOG_PAN_SCAN + printf ("libmpeg2: picture_display_extension\n"); +#endif + + bit_position = 0; + padding = get_bits(buffer, 4, &bit_position); + picture->frame_centre_horizontal_offset = get_bits(buffer, 16, &bit_position); + padding = get_bits(buffer, 1, &bit_position); + picture->frame_centre_vertical_offset = get_bits(buffer, 16, &bit_position); + padding = get_bits(buffer, 1, &bit_position); + +#ifdef LOG_PAN_SCAN + printf("Pan & Scan centre (x,y) = (%u, %u)\n", + picture->frame_centre_horizontal_offset, + picture->frame_centre_vertical_offset); +#endif + + return 0; +} + +int mpeg2_header_extension (picture_t * picture, uint8_t * buffer) +{ + switch (buffer[0] & 0xf0) { + case 0x00: /* reserved */ + return 0; + + case 0x10: /* sequence extension */ + return sequence_extension (picture, buffer); + + case 0x20: /* sequence display extension for Pan & Scan */ + return sequence_display_extension (picture, buffer); + + case 0x30: /* quant matrix extension */ + return quant_matrix_extension (picture, buffer); + + case 0x40: /* copyright extension */ + return 0; + + case 0x50: /* sequence scalable extension */ + return 0; + + case 0x60: /* reserved */ + return 0; + + case 0x70: /* picture display extension for Pan & Scan */ + return picture_display_extension (picture, buffer); + + case 0x80: /* picture coding extension */ + return picture_coding_extension (picture, buffer); + + case 0x90: /* picture spacial scalable extension */ + return 0; + + case 0xA0: /* picture temporal scalable extension */ + return 0; + + case 0xB0: /* camera parameters extension */ + return 0; + + case 0xC0: /* ITU-T extension */ + return 0; + + case 0xD0: /* reserved */ + return 0; + + case 0xE0: /* reserved */ + return 0; + + case 0xF0: /* reserved */ + return 0; + } + + return 0; +} + +int mpeg2_header_group_of_pictures (picture_t * picture, uint8_t * buffer) { + uint32_t bit_position; + uint32_t padding; + bit_position = 0; + + picture->drop_frame_flag = get_bits(buffer, 1, &bit_position); + picture->time_code_hours = get_bits(buffer, 5, &bit_position); + picture->time_code_minutes = get_bits(buffer, 6, &bit_position); + padding = get_bits(buffer, 1, &bit_position); + picture->time_code_seconds = get_bits(buffer, 6, &bit_position); + picture->time_code_pictures = get_bits(buffer, 6, &bit_position); + picture->closed_gop = get_bits(buffer, 1, &bit_position); + picture->broken_link = get_bits(buffer, 1, &bit_position); + +#ifdef LOG_PAN_SCAN + printf("Group of pictures\n"); + printf(" drop_frame_flag: %u\n", picture->drop_frame_flag); + printf(" time_code: HH:MM:SS:Pictures %02u:%02u:%02u:%02u\n", + picture->time_code_hours, + picture->time_code_minutes, + picture->time_code_seconds, + picture->time_code_pictures); + printf(" closed_gop: %u\n", picture->closed_gop); + printf(" bloken_link: %u\n", picture->broken_link); +#endif + + return 0; +} + +int mpeg2_header_picture (picture_t * picture, uint8_t * buffer) +{ + picture->picture_coding_type = (buffer [1] >> 3) & 7; + picture->vbv_delay = ((buffer[1] << 13) | (buffer[2] << 5) | + (buffer[3] >> 3)) & 0xffff; + + /* forward_f_code and backward_f_code - used in mpeg1 only */ + picture->f_motion.f_code[1] = (buffer[3] >> 2) & 1; + picture->f_motion.f_code[0] = + (((buffer[3] << 1) | (buffer[4] >> 7)) & 7) - 1; + picture->b_motion.f_code[1] = (buffer[4] >> 6) & 1; + picture->b_motion.f_code[0] = ((buffer[4] >> 3) & 7) - 1; + + /* move in header_process_picture_header */ + picture->second_field = + (picture->picture_structure != FRAME_PICTURE) && + !(picture->second_field); + + return 0; +} diff --git a/src/video_dec/libmpeg2/idct.c b/src/video_dec/libmpeg2/idct.c new file mode 100644 index 000000000..9f216db58 --- /dev/null +++ b/src/video_dec/libmpeg2/idct.c @@ -0,0 +1,348 @@ +/* + * idct.c + * Copyright (C) 2000-2002 Michel Lespinasse + * Copyright (C) 1999-2000 Aaron Holtzman + * + * Portions of this code are from the MPEG software simulation group + * idct implementation. This code will be replaced with a new + * implementation soon. + * + * This file is part of mpeg2dec, a free MPEG-2 video stream decoder. + * See http://libmpeg2.sourceforge.net/ for updates. + * + * mpeg2dec is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * mpeg2dec is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +/**********************************************************/ +/* inverse two dimensional DCT, Chen-Wang algorithm */ +/* (cf. IEEE ASSP-32, pp. 803-816, Aug. 1984) */ +/* 32-bit integer arithmetic (8 bit coefficients) */ +/* 11 mults, 29 adds per DCT */ +/* sE, 18.8.91 */ +/**********************************************************/ +/* coefficients extended to 12 bit for IEEE1180-1990 */ +/* compliance sE, 2.1.94 */ +/**********************************************************/ + +/* this code assumes >> to be a two's-complement arithmetic */ +/* right shift: (-2)>>1 == -1 , (-3)>>1 == -2 */ + +#include "config.h" + +#include +#include +#include + +#include "mpeg2_internal.h" +#include + +#define W1 2841 /* 2048*sqrt (2)*cos (1*pi/16) */ +#define W2 2676 /* 2048*sqrt (2)*cos (2*pi/16) */ +#define W3 2408 /* 2048*sqrt (2)*cos (3*pi/16) */ +#define W5 1609 /* 2048*sqrt (2)*cos (5*pi/16) */ +#define W6 1108 /* 2048*sqrt (2)*cos (6*pi/16) */ +#define W7 565 /* 2048*sqrt (2)*cos (7*pi/16) */ + +/* idct main entry points */ +void (* mpeg2_idct_copy) (int16_t * block, uint8_t * dest, int stride); +void (* mpeg2_idct_add) (int16_t * block, uint8_t * dest, int stride); +void (* mpeg2_idct) (int16_t * block); +void (* mpeg2_zero_block) (int16_t * block); + +static uint8_t clip_lut[1024]; +#define CLIP(i) ((clip_lut+384)[ (i)]) + +/* row (horizontal) IDCT + * + * 7 pi 1 + * dst[k] = sum c[l] * src[l] * cos ( -- * ( k + - ) * l ) + * l=0 8 2 + * + * where: c[0] = 128 + * c[1..7] = 128*sqrt (2) + */ + +static void inline idct_row (int16_t * block) +{ + int x0, x1, x2, x3, x4, x5, x6, x7, x8; + + x1 = block[4] << 11; + x2 = block[6]; + x3 = block[2]; + x4 = block[1]; + x5 = block[7]; + x6 = block[5]; + x7 = block[3]; + + /* shortcut */ + if (! (x1 | x2 | x3 | x4 | x5 | x6 | x7 )) { + block[0] = block[1] = block[2] = block[3] = block[4] = + block[5] = block[6] = block[7] = block[0]<<3; + return; + } + + x0 = (block[0] << 11) + 128; /* for proper rounding in the fourth stage */ + + /* first stage */ + x8 = W7 * (x4 + x5); + x4 = x8 + (W1 - W7) * x4; + x5 = x8 - (W1 + W7) * x5; + x8 = W3 * (x6 + x7); + x6 = x8 - (W3 - W5) * x6; + x7 = x8 - (W3 + W5) * x7; + + /* second stage */ + x8 = x0 + x1; + x0 -= x1; + x1 = W6 * (x3 + x2); + x2 = x1 - (W2 + W6) * x2; + x3 = x1 + (W2 - W6) * x3; + x1 = x4 + x6; + x4 -= x6; + x6 = x5 + x7; + x5 -= x7; + + /* third stage */ + x7 = x8 + x3; + x8 -= x3; + x3 = x0 + x2; + x0 -= x2; + x2 = (181 * (x4 + x5) + 128) >> 8; + x4 = (181 * (x4 - x5) + 128) >> 8; + + /* fourth stage */ + block[0] = (x7 + x1) >> 8; + block[1] = (x3 + x2) >> 8; + block[2] = (x0 + x4) >> 8; + block[3] = (x8 + x6) >> 8; + block[4] = (x8 - x6) >> 8; + block[5] = (x0 - x4) >> 8; + block[6] = (x3 - x2) >> 8; + block[7] = (x7 - x1) >> 8; +} + +/* column (vertical) IDCT + * + * 7 pi 1 + * dst[8*k] = sum c[l] * src[8*l] * cos ( -- * ( k + - ) * l ) + * l=0 8 2 + * + * where: c[0] = 1/1024 + * c[1..7] = (1/1024)*sqrt (2) + */ + +static void inline idct_col (int16_t *block) +{ + int x0, x1, x2, x3, x4, x5, x6, x7, x8; + + /* shortcut */ + x1 = block [8*4] << 8; + x2 = block [8*6]; + x3 = block [8*2]; + x4 = block [8*1]; + x5 = block [8*7]; + x6 = block [8*5]; + x7 = block [8*3]; + +#if 0 + if (! (x1 | x2 | x3 | x4 | x5 | x6 | x7 )) { + block[8*0] = block[8*1] = block[8*2] = block[8*3] = block[8*4] = + block[8*5] = block[8*6] = block[8*7] = (block[8*0] + 32) >> 6; + return; + } +#endif + + x0 = (block[8*0] << 8) + 8192; + + /* first stage */ + x8 = W7 * (x4 + x5) + 4; + x4 = (x8 + (W1 - W7) * x4) >> 3; + x5 = (x8 - (W1 + W7) * x5) >> 3; + x8 = W3 * (x6 + x7) + 4; + x6 = (x8 - (W3 - W5) * x6) >> 3; + x7 = (x8 - (W3 + W5) * x7) >> 3; + + /* second stage */ + x8 = x0 + x1; + x0 -= x1; + x1 = W6 * (x3 + x2) + 4; + x2 = (x1 - (W2 + W6) * x2) >> 3; + x3 = (x1 + (W2 - W6) * x3) >> 3; + x1 = x4 + x6; + x4 -= x6; + x6 = x5 + x7; + x5 -= x7; + + /* third stage */ + x7 = x8 + x3; + x8 -= x3; + x3 = x0 + x2; + x0 -= x2; + x2 = (181 * (x4 + x5) + 128) >> 8; + x4 = (181 * (x4 - x5) + 128) >> 8; + + /* fourth stage */ + block[8*0] = (x7 + x1) >> 14; + block[8*1] = (x3 + x2) >> 14; + block[8*2] = (x0 + x4) >> 14; + block[8*3] = (x8 + x6) >> 14; + block[8*4] = (x8 - x6) >> 14; + block[8*5] = (x0 - x4) >> 14; + block[8*6] = (x3 - x2) >> 14; + block[8*7] = (x7 - x1) >> 14; +} + +static void mpeg2_idct_copy_c (int16_t * block, uint8_t * dest, int stride) +{ + int i; + + for (i = 0; i < 8; i++) + idct_row (block + 8 * i); + + for (i = 0; i < 8; i++) + idct_col (block + i); + + i = 8; + do { + dest[0] = CLIP (block[0]); + dest[1] = CLIP (block[1]); + dest[2] = CLIP (block[2]); + dest[3] = CLIP (block[3]); + dest[4] = CLIP (block[4]); + dest[5] = CLIP (block[5]); + dest[6] = CLIP (block[6]); + dest[7] = CLIP (block[7]); + + block[0] = 0; block[1] = 0; block[2] = 0; block[3] = 0; + block[4] = 0; block[5] = 0; block[6] = 0; block[7] = 0; + + dest += stride; + block += 8; + } while (--i); +} + +static void mpeg2_idct_add_c (int16_t * block, uint8_t * dest, int stride) +{ + int i; + + for (i = 0; i < 8; i++) + idct_row (block + 8 * i); + + for (i = 0; i < 8; i++) + idct_col (block + i); + + i = 8; + do { + dest[0] = CLIP (block[0] + dest[0]); + dest[1] = CLIP (block[1] + dest[1]); + dest[2] = CLIP (block[2] + dest[2]); + dest[3] = CLIP (block[3] + dest[3]); + dest[4] = CLIP (block[4] + dest[4]); + dest[5] = CLIP (block[5] + dest[5]); + dest[6] = CLIP (block[6] + dest[6]); + dest[7] = CLIP (block[7] + dest[7]); + + block[0] = 0; block[1] = 0; block[2] = 0; block[3] = 0; + block[4] = 0; block[5] = 0; block[6] = 0; block[7] = 0; + + dest += stride; + block += 8; + } while (--i); +} + +static void mpeg2_idct_c (int16_t * block) +{ + int i; + + for (i = 0; i < 8; i++) + idct_row (block + 8 * i); + + for (i = 0; i < 8; i++) + idct_col (block + i); +} + +static void mpeg2_zero_block_c (int16_t * wblock) +{ + memset( wblock, 0, sizeof(int16_t) * 64 ); +} + +void mpeg2_idct_init (uint32_t mm_accel) +{ + mpeg2_zero_block = mpeg2_zero_block_c; + +#if defined(ARCH_X86) || defined(ARCH_X86_64) + if (mm_accel & MM_ACCEL_X86_MMXEXT) { +#ifdef LOG + fprintf (stderr, "Using MMXEXT for IDCT transform\n"); +#endif + mpeg2_idct_copy = mpeg2_idct_copy_mmxext; + mpeg2_idct_add = mpeg2_idct_add_mmxext; + mpeg2_idct = mpeg2_idct_mmxext; + mpeg2_zero_block = mpeg2_zero_block_mmx; + mpeg2_idct_mmx_init (); + } else if (mm_accel & MM_ACCEL_X86_MMX) { +#ifdef LOG + fprintf (stderr, "Using MMX for IDCT transform\n"); +#endif + mpeg2_idct_copy = mpeg2_idct_copy_mmx; + mpeg2_idct_add = mpeg2_idct_add_mmx; + mpeg2_idct = mpeg2_idct_mmx; + mpeg2_zero_block = mpeg2_zero_block_mmx; + mpeg2_idct_mmx_init (); + } else +#endif +#if defined (ARCH_PPC) && defined (ENABLE_ALTIVEC) + if (mm_accel & MM_ACCEL_PPC_ALTIVEC) { +#ifdef LOG + fprintf (stderr, "Using altivec for IDCT transform\n"); +#endif + mpeg2_idct_copy = mpeg2_idct_copy_altivec; + mpeg2_idct_add = mpeg2_idct_add_altivec; + mpeg2_idct_altivec_init (); + mpeg2_idct = mpeg2_idct_c; + } else +#endif +#ifdef LIBMPEG2_MLIB + if (mm_accel & MM_ACCEL_MLIB) { + char * env_var; + + env_var = getenv ("MLIB_NON_IEEE"); + + mpeg2_idct = mpeg2_idct_mlib; + if (env_var == NULL) { +#ifdef LOG + fprintf (stderr, "Using mlib for IDCT transform\n"); +#endif + mpeg2_idct_add = mpeg2_idct_add_mlib; + } else { + fprintf (stderr, "Using non-IEEE mlib for IDCT transform\n"); + mpeg2_idct_add = mpeg2_idct_add_mlib_non_ieee; + } + mpeg2_idct_copy = mpeg2_idct_copy_mlib_non_ieee; + } else +#endif + { + int i; + +#ifdef LOG + fprintf (stderr, "No accelerated IDCT transform found\n"); +#endif + mpeg2_idct_copy = mpeg2_idct_copy_c; + mpeg2_idct_add = mpeg2_idct_add_c; + mpeg2_idct = mpeg2_idct_c; + for (i = -384; i < 640; i++) + clip_lut[i+384] = (i < 0) ? 0 : ((i > 255) ? 255 : i); + } +} diff --git a/src/video_dec/libmpeg2/idct_altivec.c b/src/video_dec/libmpeg2/idct_altivec.c new file mode 100644 index 000000000..de396560b --- /dev/null +++ b/src/video_dec/libmpeg2/idct_altivec.c @@ -0,0 +1,233 @@ +/* + * idct_altivec.c + * Copyright (C) 2000-2002 Michel Lespinasse + * Copyright (C) 1999-2000 Aaron Holtzman + * + * This file is part of mpeg2dec, a free MPEG-2 video stream decoder. + * See http://libmpeg2.sourceforge.net/ for updates. + * + * mpeg2dec is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * mpeg2dec is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +#include "config.h" + +#if defined (ARCH_PPC) && defined (ENABLE_ALTIVEC) + +#include + +#include + +#include "mpeg2_internal.h" +#include + +#define vector_s16_t vector signed short +#define vector_u16_t vector unsigned short +#define vector_s8_t vector signed char +#define vector_u8_t vector unsigned char +#define vector_s32_t vector signed int +#define vector_u32_t vector unsigned int + +#define IDCT_HALF \ + /* 1st stage */ \ + t1 = vec_mradds (a1, vx7, vx1 ); \ + t8 = vec_mradds (a1, vx1, vec_subs (zero, vx7)); \ + t7 = vec_mradds (a2, vx5, vx3); \ + t3 = vec_mradds (ma2, vx3, vx5); \ + \ + /* 2nd stage */ \ + t5 = vec_adds (vx0, vx4); \ + t0 = vec_subs (vx0, vx4); \ + t2 = vec_mradds (a0, vx6, vx2); \ + t4 = vec_mradds (a0, vx2, vec_subs (zero,vx6)); \ + t6 = vec_adds (t8, t3); \ + t3 = vec_subs (t8, t3); \ + t8 = vec_subs (t1, t7); \ + t1 = vec_adds (t1, t7); \ + \ + /* 3rd stage */ \ + t7 = vec_adds (t5, t2); \ + t2 = vec_subs (t5, t2); \ + t5 = vec_adds (t0, t4); \ + t0 = vec_subs (t0, t4); \ + t4 = vec_subs (t8, t3); \ + t3 = vec_adds (t8, t3); \ + \ + /* 4th stage */ \ + vy0 = vec_adds (t7, t1); \ + vy7 = vec_subs (t7, t1); \ + vy1 = vec_mradds (c4, t3, t5); \ + vy6 = vec_mradds (mc4, t3, t5); \ + vy2 = vec_mradds (c4, t4, t0); \ + vy5 = vec_mradds (mc4, t4, t0); \ + vy3 = vec_adds (t2, t6); \ + vy4 = vec_subs (t2, t6); + +#define IDCT \ + vector_s16_t vx0, vx1, vx2, vx3, vx4, vx5, vx6, vx7; \ + vector_s16_t vy0, vy1, vy2, vy3, vy4, vy5, vy6, vy7; \ + vector_s16_t a0, a1, a2, ma2, c4, mc4, zero, bias; \ + vector_s16_t t0, t1, t2, t3, t4, t5, t6, t7, t8; \ + vector_u16_t shift; \ + \ + c4 = vec_splat (constants[0], 0); \ + a0 = vec_splat (constants[0], 1); \ + a1 = vec_splat (constants[0], 2); \ + a2 = vec_splat (constants[0], 3); \ + mc4 = vec_splat (constants[0], 4); \ + ma2 = vec_splat (constants[0], 5); \ + bias = (vector_s16_t)vec_splat ((vector_s32_t)constants[0], 3); \ + \ + zero = vec_splat_s16 (0); \ + shift = vec_splat_u16 (4); \ + \ + vx0 = vec_mradds (vec_sl (block[0], shift), constants[1], zero); \ + vx1 = vec_mradds (vec_sl (block[1], shift), constants[2], zero); \ + vx2 = vec_mradds (vec_sl (block[2], shift), constants[3], zero); \ + vx3 = vec_mradds (vec_sl (block[3], shift), constants[4], zero); \ + vx4 = vec_mradds (vec_sl (block[4], shift), constants[1], zero); \ + vx5 = vec_mradds (vec_sl (block[5], shift), constants[4], zero); \ + vx6 = vec_mradds (vec_sl (block[6], shift), constants[3], zero); \ + vx7 = vec_mradds (vec_sl (block[7], shift), constants[2], zero); \ + \ + IDCT_HALF \ + \ + vx0 = vec_mergeh (vy0, vy4); \ + vx1 = vec_mergel (vy0, vy4); \ + vx2 = vec_mergeh (vy1, vy5); \ + vx3 = vec_mergel (vy1, vy5); \ + vx4 = vec_mergeh (vy2, vy6); \ + vx5 = vec_mergel (vy2, vy6); \ + vx6 = vec_mergeh (vy3, vy7); \ + vx7 = vec_mergel (vy3, vy7); \ + \ + vy0 = vec_mergeh (vx0, vx4); \ + vy1 = vec_mergel (vx0, vx4); \ + vy2 = vec_mergeh (vx1, vx5); \ + vy3 = vec_mergel (vx1, vx5); \ + vy4 = vec_mergeh (vx2, vx6); \ + vy5 = vec_mergel (vx2, vx6); \ + vy6 = vec_mergeh (vx3, vx7); \ + vy7 = vec_mergel (vx3, vx7); \ + \ + vx0 = vec_adds (vec_mergeh (vy0, vy4), bias); \ + vx1 = vec_mergel (vy0, vy4); \ + vx2 = vec_mergeh (vy1, vy5); \ + vx3 = vec_mergel (vy1, vy5); \ + vx4 = vec_mergeh (vy2, vy6); \ + vx5 = vec_mergel (vy2, vy6); \ + vx6 = vec_mergeh (vy3, vy7); \ + vx7 = vec_mergel (vy3, vy7); \ + \ + IDCT_HALF \ + \ + shift = vec_splat_u16 (6); \ + vx0 = vec_sra (vy0, shift); \ + vx1 = vec_sra (vy1, shift); \ + vx2 = vec_sra (vy2, shift); \ + vx3 = vec_sra (vy3, shift); \ + vx4 = vec_sra (vy4, shift); \ + vx5 = vec_sra (vy5, shift); \ + vx6 = vec_sra (vy6, shift); \ + vx7 = vec_sra (vy7, shift); + +#if defined( __APPLE_CC__ ) && defined( __APPLE_ALTIVEC__ ) /* apple */ +#define VEC_S16(a,b,c,d,e,f,g,h) (vector_s16_t) (a, b, c, d, e, f, g, h) +#else /* gnu */ +#define VEC_S16(a,b,c,d,e,f,g,h) (vector_s16_t) {a, b, c, d, e, f, g, h} +#endif + +static vector_s16_t constants[5] = { + VEC_S16(23170, 13573, 6518, 21895, -23170, -21895, 32, 31), + VEC_S16(16384, 22725, 21407, 19266, 16384, 19266, 21407, 22725), + VEC_S16(22725, 31521, 29692, 26722, 22725, 26722, 29692, 31521), + VEC_S16(21407, 29692, 27969, 25172, 21407, 25172, 27969, 29692), + VEC_S16(19266, 26722, 25172, 22654, 19266, 22654, 25172, 26722) +}; + +void mpeg2_idct_copy_altivec (vector_s16_t * block, unsigned char * dest, + int stride) +{ + vector_u8_t tmp; + + IDCT + +#define COPY(dest,src) \ + tmp = vec_packsu (src, src); \ + vec_ste ((vector_u32_t)tmp, 0, (unsigned int *)dest); \ + vec_ste ((vector_u32_t)tmp, 4, (unsigned int *)dest); + + COPY (dest, vx0) dest += stride; + COPY (dest, vx1) dest += stride; + COPY (dest, vx2) dest += stride; + COPY (dest, vx3) dest += stride; + COPY (dest, vx4) dest += stride; + COPY (dest, vx5) dest += stride; + COPY (dest, vx6) dest += stride; + COPY (dest, vx7) + memset (block, 0, 64 * sizeof (signed short)); +} + +void mpeg2_idct_add_altivec (vector_s16_t * block, unsigned char * dest, + int stride) +{ + vector_u8_t tmp; + vector_s16_t tmp2, tmp3; + vector_u8_t perm0; + vector_u8_t perm1; + vector_u8_t p0, p1, p; + + IDCT + + p0 = vec_lvsl (0, dest); + p1 = vec_lvsl (stride, dest); + p = vec_splat_u8 (-1); + perm0 = vec_mergeh (p, p0); + perm1 = vec_mergeh (p, p1); + +#define ADD(dest,src,perm) \ + /* *(uint64_t *)&tmp = *(uint64_t *)dest; */ \ + tmp = vec_ld (0, dest); \ + tmp2 = (vector_s16_t)vec_perm (tmp, (vector_u8_t)zero, perm); \ + tmp3 = vec_adds (tmp2, src); \ + tmp = vec_packsu (tmp3, tmp3); \ + vec_ste ((vector_u32_t)tmp, 0, (unsigned int *)dest); \ + vec_ste ((vector_u32_t)tmp, 4, (unsigned int *)dest); + + ADD (dest, vx0, perm0) dest += stride; + ADD (dest, vx1, perm1) dest += stride; + ADD (dest, vx2, perm0) dest += stride; + ADD (dest, vx3, perm1) dest += stride; + ADD (dest, vx4, perm0) dest += stride; + ADD (dest, vx5, perm1) dest += stride; + ADD (dest, vx6, perm0) dest += stride; + ADD (dest, vx7, perm1) + memset (block, 0, 64 * sizeof (signed short)); +} + +void mpeg2_idct_altivec_init (void) +{ + int i, j; + + /* the altivec idct uses a transposed input, so we patch scan tables */ + for (i = 0; i < 64; i++) { + j = mpeg2_scan_norm[i]; + mpeg2_scan_norm[i] = (j >> 3) | ((j & 7) << 3); + j = mpeg2_scan_alt[i]; + mpeg2_scan_alt[i] = (j >> 3) | ((j & 7) << 3); + } +} + +#endif /* ARCH_PPC && ENABLED_ALTIVEC */ + diff --git a/src/video_dec/libmpeg2/idct_mlib.c b/src/video_dec/libmpeg2/idct_mlib.c new file mode 100644 index 000000000..e573c9790 --- /dev/null +++ b/src/video_dec/libmpeg2/idct_mlib.c @@ -0,0 +1,62 @@ +/* + * idct_mlib.c + * Copyright (C) 1999-2002 HÃ¥kan Hjort + * + * This file is part of mpeg2dec, a free MPEG-2 video stream decoder. + * See http://libmpeg2.sourceforge.net/ for updates. + * + * mpeg2dec is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * mpeg2dec is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +#include "config.h" + +#ifdef LIBMPEG2_MLIB + +#include +#include +#include +#include +#include +#include + +#include "mpeg2_internal.h" + +void mpeg2_idct_add_mlib (int16_t * block, uint8_t * dest, int stride) +{ + mlib_VideoIDCT_IEEE_S16_S16 (block, block); + mlib_VideoAddBlock_U8_S16 (dest, block, stride); + memset (block, 0, 64 * sizeof (uint16_t)); +} + +void mpeg2_idct_copy_mlib_non_ieee (int16_t * block, uint8_t * dest, + int stride) +{ + mlib_VideoIDCT8x8_U8_S16 (dest, block, stride); + memset (block, 0, 64 * sizeof (uint16_t)); +} + +void mpeg2_idct_add_mlib_non_ieee (int16_t * block, uint8_t * dest, int stride) +{ + mlib_VideoIDCT8x8_S16_S16 (block, block); + mlib_VideoAddBlock_U8_S16 (dest, block, stride); + memset (block, 0, 64 * sizeof (uint16_t)); +} + +void mpeg2_idct_mlib (int16_t * block) +{ + mlib_VideoIDCT_IEEE_S16_S16 (block, block); +} + +#endif diff --git a/src/video_dec/libmpeg2/idct_mlib.h b/src/video_dec/libmpeg2/idct_mlib.h new file mode 100644 index 000000000..1fb0787dd --- /dev/null +++ b/src/video_dec/libmpeg2/idct_mlib.h @@ -0,0 +1,25 @@ +/* + * idct_mlib.h + * + * Copyright (C) 1999, HÃ¥kan Hjort + * + * This file is part of mpeg2dec, a free MPEG-2 video stream decoder. + * + * mpeg2dec is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2, or (at your option) + * any later version. + * + * mpeg2dec is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with GNU Make; see the file COPYING. If not, write to + * the Free Software Foundation, + * + */ + +void idct_block_copy_mlib (int16_t * block, uint8_t * dest, int stride); +void idct_block_add_mlib (int16_t * block, uint8_t * dest, int stride); diff --git a/src/video_dec/libmpeg2/idct_mmx.c b/src/video_dec/libmpeg2/idct_mmx.c new file mode 100644 index 000000000..6bb4bfbf0 --- /dev/null +++ b/src/video_dec/libmpeg2/idct_mmx.c @@ -0,0 +1,740 @@ +/* + * idct_mmx.c + * Copyright (C) 2000-2002 Michel Lespinasse + * Copyright (C) 1999-2000 Aaron Holtzman + * + * This file is part of mpeg2dec, a free MPEG-2 video stream decoder. + * See http://libmpeg2.sourceforge.net/ for updates. + * + * mpeg2dec is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * mpeg2dec is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +#include "config.h" + +#if defined(ARCH_X86) || defined(ARCH_X86_64) + +#include + +#include "mpeg2_internal.h" +#include + +#define ROW_SHIFT 11 +#define COL_SHIFT 6 + +#define round(bias) ((int)(((bias)+0.5) * (1<> ROW_SHIFT; + row[1] = (a1 + b1) >> ROW_SHIFT; + row[2] = (a2 + b2) >> ROW_SHIFT; + row[3] = (a3 + b3) >> ROW_SHIFT; + row[4] = (a3 - b3) >> ROW_SHIFT; + row[5] = (a2 - b2) >> ROW_SHIFT; + row[6] = (a1 - b1) >> ROW_SHIFT; + row[7] = (a0 - b0) >> ROW_SHIFT; +} +#endif + + +/* MMXEXT row IDCT */ + +#define mmxext_table(c1,c2,c3,c4,c5,c6,c7) { c4, c2, -c4, -c2, \ + c4, c6, c4, c6, \ + c1, c3, -c1, -c5, \ + c5, c7, c3, -c7, \ + c4, -c6, c4, -c6, \ + -c4, c2, c4, -c2, \ + c5, -c1, c3, -c1, \ + c7, c3, c7, -c5 } + +static inline void mmxext_row_head (int16_t * row, int offset, int16_t * table) +{ + movq_m2r (*(row+offset), mm2); // mm2 = x6 x4 x2 x0 + + movq_m2r (*(row+offset+4), mm5); // mm5 = x7 x5 x3 x1 + movq_r2r (mm2, mm0); // mm0 = x6 x4 x2 x0 + + movq_m2r (*table, mm3); // mm3 = -C2 -C4 C2 C4 + movq_r2r (mm5, mm6); // mm6 = x7 x5 x3 x1 + + movq_m2r (*(table+4), mm4); // mm4 = C6 C4 C6 C4 + pmaddwd_r2r (mm0, mm3); // mm3 = -C4*x4-C2*x6 C4*x0+C2*x2 + + pshufw_r2r (mm2, mm2, 0x4e); // mm2 = x2 x0 x6 x4 +} + +static inline void mmxext_row (int16_t * table, int32_t * rounder) +{ + movq_m2r (*(table+8), mm1); // mm1 = -C5 -C1 C3 C1 + pmaddwd_r2r (mm2, mm4); // mm4 = C4*x0+C6*x2 C4*x4+C6*x6 + + pmaddwd_m2r (*(table+16), mm0); // mm0 = C4*x4-C6*x6 C4*x0-C6*x2 + pshufw_r2r (mm6, mm6, 0x4e); // mm6 = x3 x1 x7 x5 + + movq_m2r (*(table+12), mm7); // mm7 = -C7 C3 C7 C5 + pmaddwd_r2r (mm5, mm1); // mm1 = -C1*x5-C5*x7 C1*x1+C3*x3 + + paddd_m2r (*rounder, mm3); // mm3 += rounder + pmaddwd_r2r (mm6, mm7); // mm7 = C3*x1-C7*x3 C5*x5+C7*x7 + + pmaddwd_m2r (*(table+20), mm2); // mm2 = C4*x0-C2*x2 -C4*x4+C2*x6 + paddd_r2r (mm4, mm3); // mm3 = a1 a0 + rounder + + pmaddwd_m2r (*(table+24), mm5); // mm5 = C3*x5-C1*x7 C5*x1-C1*x3 + movq_r2r (mm3, mm4); // mm4 = a1 a0 + rounder + + pmaddwd_m2r (*(table+28), mm6); // mm6 = C7*x1-C5*x3 C7*x5+C3*x7 + paddd_r2r (mm7, mm1); // mm1 = b1 b0 + + paddd_m2r (*rounder, mm0); // mm0 += rounder + psubd_r2r (mm1, mm3); // mm3 = a1-b1 a0-b0 + rounder + + psrad_i2r (ROW_SHIFT, mm3); // mm3 = y6 y7 + paddd_r2r (mm4, mm1); // mm1 = a1+b1 a0+b0 + rounder + + paddd_r2r (mm2, mm0); // mm0 = a3 a2 + rounder + psrad_i2r (ROW_SHIFT, mm1); // mm1 = y1 y0 + + paddd_r2r (mm6, mm5); // mm5 = b3 b2 + movq_r2r (mm0, mm4); // mm4 = a3 a2 + rounder + + paddd_r2r (mm5, mm0); // mm0 = a3+b3 a2+b2 + rounder + psubd_r2r (mm5, mm4); // mm4 = a3-b3 a2-b2 + rounder +} + +static inline void mmxext_row_tail (int16_t * row, int store) +{ + psrad_i2r (ROW_SHIFT, mm0); // mm0 = y3 y2 + + psrad_i2r (ROW_SHIFT, mm4); // mm4 = y4 y5 + + packssdw_r2r (mm0, mm1); // mm1 = y3 y2 y1 y0 + + packssdw_r2r (mm3, mm4); // mm4 = y6 y7 y4 y5 + + movq_r2m (mm1, *(row+store)); // save y3 y2 y1 y0 + pshufw_r2r (mm4, mm4, 0xb1); // mm4 = y7 y6 y5 y4 + + /* slot */ + + movq_r2m (mm4, *(row+store+4)); // save y7 y6 y5 y4 +} + +static inline void mmxext_row_mid (int16_t * row, int store, + int offset, int16_t * table) +{ + movq_m2r (*(row+offset), mm2); // mm2 = x6 x4 x2 x0 + psrad_i2r (ROW_SHIFT, mm0); // mm0 = y3 y2 + + movq_m2r (*(row+offset+4), mm5); // mm5 = x7 x5 x3 x1 + psrad_i2r (ROW_SHIFT, mm4); // mm4 = y4 y5 + + packssdw_r2r (mm0, mm1); // mm1 = y3 y2 y1 y0 + movq_r2r (mm5, mm6); // mm6 = x7 x5 x3 x1 + + packssdw_r2r (mm3, mm4); // mm4 = y6 y7 y4 y5 + movq_r2r (mm2, mm0); // mm0 = x6 x4 x2 x0 + + movq_r2m (mm1, *(row+store)); // save y3 y2 y1 y0 + pshufw_r2r (mm4, mm4, 0xb1); // mm4 = y7 y6 y5 y4 + + movq_m2r (*table, mm3); // mm3 = -C2 -C4 C2 C4 + movq_r2m (mm4, *(row+store+4)); // save y7 y6 y5 y4 + + pmaddwd_r2r (mm0, mm3); // mm3 = -C4*x4-C2*x6 C4*x0+C2*x2 + + movq_m2r (*(table+4), mm4); // mm4 = C6 C4 C6 C4 + pshufw_r2r (mm2, mm2, 0x4e); // mm2 = x2 x0 x6 x4 +} + + +/* MMX row IDCT */ + +#define mmx_table(c1,c2,c3,c4,c5,c6,c7) { c4, c2, c4, c6, \ + c4, c6, -c4, -c2, \ + c1, c3, c3, -c7, \ + c5, c7, -c1, -c5, \ + c4, -c6, c4, -c2, \ + -c4, c2, c4, -c6, \ + c5, -c1, c7, -c5, \ + c7, c3, c3, -c1 } + +static inline void mmx_row_head (int16_t * row, int offset, int16_t * table) +{ + movq_m2r (*(row+offset), mm2); // mm2 = x6 x4 x2 x0 + + movq_m2r (*(row+offset+4), mm5); // mm5 = x7 x5 x3 x1 + movq_r2r (mm2, mm0); // mm0 = x6 x4 x2 x0 + + movq_m2r (*table, mm3); // mm3 = C6 C4 C2 C4 + movq_r2r (mm5, mm6); // mm6 = x7 x5 x3 x1 + + punpckldq_r2r (mm0, mm0); // mm0 = x2 x0 x2 x0 + + movq_m2r (*(table+4), mm4); // mm4 = -C2 -C4 C6 C4 + pmaddwd_r2r (mm0, mm3); // mm3 = C4*x0+C6*x2 C4*x0+C2*x2 + + movq_m2r (*(table+8), mm1); // mm1 = -C7 C3 C3 C1 + punpckhdq_r2r (mm2, mm2); // mm2 = x6 x4 x6 x4 +} + +static inline void mmx_row (int16_t * table, int32_t * rounder) +{ + pmaddwd_r2r (mm2, mm4); // mm4 = -C4*x4-C2*x6 C4*x4+C6*x6 + punpckldq_r2r (mm5, mm5); // mm5 = x3 x1 x3 x1 + + pmaddwd_m2r (*(table+16), mm0); // mm0 = C4*x0-C2*x2 C4*x0-C6*x2 + punpckhdq_r2r (mm6, mm6); // mm6 = x7 x5 x7 x5 + + movq_m2r (*(table+12), mm7); // mm7 = -C5 -C1 C7 C5 + pmaddwd_r2r (mm5, mm1); // mm1 = C3*x1-C7*x3 C1*x1+C3*x3 + + paddd_m2r (*rounder, mm3); // mm3 += rounder + pmaddwd_r2r (mm6, mm7); // mm7 = -C1*x5-C5*x7 C5*x5+C7*x7 + + pmaddwd_m2r (*(table+20), mm2); // mm2 = C4*x4-C6*x6 -C4*x4+C2*x6 + paddd_r2r (mm4, mm3); // mm3 = a1 a0 + rounder + + pmaddwd_m2r (*(table+24), mm5); // mm5 = C7*x1-C5*x3 C5*x1-C1*x3 + movq_r2r (mm3, mm4); // mm4 = a1 a0 + rounder + + pmaddwd_m2r (*(table+28), mm6); // mm6 = C3*x5-C1*x7 C7*x5+C3*x7 + paddd_r2r (mm7, mm1); // mm1 = b1 b0 + + paddd_m2r (*rounder, mm0); // mm0 += rounder + psubd_r2r (mm1, mm3); // mm3 = a1-b1 a0-b0 + rounder + + psrad_i2r (ROW_SHIFT, mm3); // mm3 = y6 y7 + paddd_r2r (mm4, mm1); // mm1 = a1+b1 a0+b0 + rounder + + paddd_r2r (mm2, mm0); // mm0 = a3 a2 + rounder + psrad_i2r (ROW_SHIFT, mm1); // mm1 = y1 y0 + + paddd_r2r (mm6, mm5); // mm5 = b3 b2 + movq_r2r (mm0, mm7); // mm7 = a3 a2 + rounder + + paddd_r2r (mm5, mm0); // mm0 = a3+b3 a2+b2 + rounder + psubd_r2r (mm5, mm7); // mm7 = a3-b3 a2-b2 + rounder +} + +static inline void mmx_row_tail (int16_t * row, int store) +{ + psrad_i2r (ROW_SHIFT, mm0); // mm0 = y3 y2 + + psrad_i2r (ROW_SHIFT, mm7); // mm7 = y4 y5 + + packssdw_r2r (mm0, mm1); // mm1 = y3 y2 y1 y0 + + packssdw_r2r (mm3, mm7); // mm7 = y6 y7 y4 y5 + + movq_r2m (mm1, *(row+store)); // save y3 y2 y1 y0 + movq_r2r (mm7, mm4); // mm4 = y6 y7 y4 y5 + + pslld_i2r (16, mm7); // mm7 = y7 0 y5 0 + + psrld_i2r (16, mm4); // mm4 = 0 y6 0 y4 + + por_r2r (mm4, mm7); // mm7 = y7 y6 y5 y4 + + /* slot */ + + movq_r2m (mm7, *(row+store+4)); // save y7 y6 y5 y4 +} + +static inline void mmx_row_mid (int16_t * row, int store, + int offset, int16_t * table) +{ + movq_m2r (*(row+offset), mm2); // mm2 = x6 x4 x2 x0 + psrad_i2r (ROW_SHIFT, mm0); // mm0 = y3 y2 + + movq_m2r (*(row+offset+4), mm5); // mm5 = x7 x5 x3 x1 + psrad_i2r (ROW_SHIFT, mm7); // mm7 = y4 y5 + + packssdw_r2r (mm0, mm1); // mm1 = y3 y2 y1 y0 + movq_r2r (mm5, mm6); // mm6 = x7 x5 x3 x1 + + packssdw_r2r (mm3, mm7); // mm7 = y6 y7 y4 y5 + movq_r2r (mm2, mm0); // mm0 = x6 x4 x2 x0 + + movq_r2m (mm1, *(row+store)); // save y3 y2 y1 y0 + movq_r2r (mm7, mm1); // mm1 = y6 y7 y4 y5 + + punpckldq_r2r (mm0, mm0); // mm0 = x2 x0 x2 x0 + psrld_i2r (16, mm7); // mm7 = 0 y6 0 y4 + + movq_m2r (*table, mm3); // mm3 = C6 C4 C2 C4 + pslld_i2r (16, mm1); // mm1 = y7 0 y5 0 + + movq_m2r (*(table+4), mm4); // mm4 = -C2 -C4 C6 C4 + por_r2r (mm1, mm7); // mm7 = y7 y6 y5 y4 + + movq_m2r (*(table+8), mm1); // mm1 = -C7 C3 C3 C1 + punpckhdq_r2r (mm2, mm2); // mm2 = x6 x4 x6 x4 + + movq_r2m (mm7, *(row+store+4)); // save y7 y6 y5 y4 + pmaddwd_r2r (mm0, mm3); // mm3 = C4*x0+C6*x2 C4*x0+C2*x2 +} + + +#if 0 +// C column IDCT - its just here to document the MMXEXT and MMX versions +static inline void idct_col (int16_t * col, int offset) +{ +/* multiplication - as implemented on mmx */ +#define F(c,x) (((c) * (x)) >> 16) + +/* saturation - it helps us handle torture test cases */ +#define S(x) (((x)>32767) ? 32767 : ((x)<-32768) ? -32768 : (x)) + + int16_t x0, x1, x2, x3, x4, x5, x6, x7; + int16_t y0, y1, y2, y3, y4, y5, y6, y7; + int16_t a0, a1, a2, a3, b0, b1, b2, b3; + int16_t u04, v04, u26, v26, u17, v17, u35, v35, u12, v12; + + col += offset; + + x0 = col[0*8]; + x1 = col[1*8]; + x2 = col[2*8]; + x3 = col[3*8]; + x4 = col[4*8]; + x5 = col[5*8]; + x6 = col[6*8]; + x7 = col[7*8]; + + u04 = S (x0 + x4); + v04 = S (x0 - x4); + u26 = S (F (T2, x6) + x2); + v26 = S (F (T2, x2) - x6); + + a0 = S (u04 + u26); + a1 = S (v04 + v26); + a2 = S (v04 - v26); + a3 = S (u04 - u26); + + u17 = S (F (T1, x7) + x1); + v17 = S (F (T1, x1) - x7); + u35 = S (F (T3, x5) + x3); + v35 = S (F (T3, x3) - x5); + + b0 = S (u17 + u35); + b3 = S (v17 - v35); + u12 = S (u17 - u35); + v12 = S (v17 + v35); + u12 = S (2 * F (C4, u12)); + v12 = S (2 * F (C4, v12)); + b1 = S (u12 + v12); + b2 = S (u12 - v12); + + y0 = S (a0 + b0) >> COL_SHIFT; + y1 = S (a1 + b1) >> COL_SHIFT; + y2 = S (a2 + b2) >> COL_SHIFT; + y3 = S (a3 + b3) >> COL_SHIFT; + + y4 = S (a3 - b3) >> COL_SHIFT; + y5 = S (a2 - b2) >> COL_SHIFT; + y6 = S (a1 - b1) >> COL_SHIFT; + y7 = S (a0 - b0) >> COL_SHIFT; + + col[0*8] = y0; + col[1*8] = y1; + col[2*8] = y2; + col[3*8] = y3; + col[4*8] = y4; + col[5*8] = y5; + col[6*8] = y6; + col[7*8] = y7; +} +#endif + + +// MMX column IDCT +static inline void idct_col (int16_t * col, int offset) +{ +#define T1 13036 +#define T2 27146 +#define T3 43790 +#define C4 23170 + + static short _T1[] ATTR_ALIGN(8) = {T1,T1,T1,T1}; + static short _T2[] ATTR_ALIGN(8) = {T2,T2,T2,T2}; + static short _T3[] ATTR_ALIGN(8) = {T3,T3,T3,T3}; + static short _C4[] ATTR_ALIGN(8) = {C4,C4,C4,C4}; + + /* column code adapted from peter gubanov */ + /* http://www.elecard.com/peter/idct.shtml */ + + movq_m2r (*_T1, mm0); // mm0 = T1 + + movq_m2r (*(col+offset+1*8), mm1); // mm1 = x1 + movq_r2r (mm0, mm2); // mm2 = T1 + + movq_m2r (*(col+offset+7*8), mm4); // mm4 = x7 + pmulhw_r2r (mm1, mm0); // mm0 = T1*x1 + + movq_m2r (*_T3, mm5); // mm5 = T3 + pmulhw_r2r (mm4, mm2); // mm2 = T1*x7 + + movq_m2r (*(col+offset+5*8), mm6); // mm6 = x5 + movq_r2r (mm5, mm7); // mm7 = T3-1 + + movq_m2r (*(col+offset+3*8), mm3); // mm3 = x3 + psubsw_r2r (mm4, mm0); // mm0 = v17 + + movq_m2r (*_T2, mm4); // mm4 = T2 + pmulhw_r2r (mm3, mm5); // mm5 = (T3-1)*x3 + + paddsw_r2r (mm2, mm1); // mm1 = u17 + pmulhw_r2r (mm6, mm7); // mm7 = (T3-1)*x5 + + /* slot */ + + movq_r2r (mm4, mm2); // mm2 = T2 + paddsw_r2r (mm3, mm5); // mm5 = T3*x3 + + pmulhw_m2r (*(col+offset+2*8), mm4);// mm4 = T2*x2 + paddsw_r2r (mm6, mm7); // mm7 = T3*x5 + + psubsw_r2r (mm6, mm5); // mm5 = v35 + paddsw_r2r (mm3, mm7); // mm7 = u35 + + movq_m2r (*(col+offset+6*8), mm3); // mm3 = x6 + movq_r2r (mm0, mm6); // mm6 = v17 + + pmulhw_r2r (mm3, mm2); // mm2 = T2*x6 + psubsw_r2r (mm5, mm0); // mm0 = b3 + + psubsw_r2r (mm3, mm4); // mm4 = v26 + paddsw_r2r (mm6, mm5); // mm5 = v12 + + movq_r2m (mm0, *(col+offset+3*8)); // save b3 in scratch0 + movq_r2r (mm1, mm6); // mm6 = u17 + + paddsw_m2r (*(col+offset+2*8), mm2);// mm2 = u26 + paddsw_r2r (mm7, mm6); // mm6 = b0 + + psubsw_r2r (mm7, mm1); // mm1 = u12 + movq_r2r (mm1, mm7); // mm7 = u12 + + movq_m2r (*(col+offset+0*8), mm3); // mm3 = x0 + paddsw_r2r (mm5, mm1); // mm1 = u12+v12 + + movq_m2r (*_C4, mm0); // mm0 = C4/2 + psubsw_r2r (mm5, mm7); // mm7 = u12-v12 + + movq_r2m (mm6, *(col+offset+5*8)); // save b0 in scratch1 + pmulhw_r2r (mm0, mm1); // mm1 = b1/2 + + movq_r2r (mm4, mm6); // mm6 = v26 + pmulhw_r2r (mm0, mm7); // mm7 = b2/2 + + movq_m2r (*(col+offset+4*8), mm5); // mm5 = x4 + movq_r2r (mm3, mm0); // mm0 = x0 + + psubsw_r2r (mm5, mm3); // mm3 = v04 + paddsw_r2r (mm5, mm0); // mm0 = u04 + + paddsw_r2r (mm3, mm4); // mm4 = a1 + movq_r2r (mm0, mm5); // mm5 = u04 + + psubsw_r2r (mm6, mm3); // mm3 = a2 + paddsw_r2r (mm2, mm5); // mm5 = a0 + + paddsw_r2r (mm1, mm1); // mm1 = b1 + psubsw_r2r (mm2, mm0); // mm0 = a3 + + paddsw_r2r (mm7, mm7); // mm7 = b2 + movq_r2r (mm3, mm2); // mm2 = a2 + + movq_r2r (mm4, mm6); // mm6 = a1 + paddsw_r2r (mm7, mm3); // mm3 = a2+b2 + + psraw_i2r (COL_SHIFT, mm3); // mm3 = y2 + paddsw_r2r (mm1, mm4); // mm4 = a1+b1 + + psraw_i2r (COL_SHIFT, mm4); // mm4 = y1 + psubsw_r2r (mm1, mm6); // mm6 = a1-b1 + + movq_m2r (*(col+offset+5*8), mm1); // mm1 = b0 + psubsw_r2r (mm7, mm2); // mm2 = a2-b2 + + psraw_i2r (COL_SHIFT, mm6); // mm6 = y6 + movq_r2r (mm5, mm7); // mm7 = a0 + + movq_r2m (mm4, *(col+offset+1*8)); // save y1 + psraw_i2r (COL_SHIFT, mm2); // mm2 = y5 + + movq_r2m (mm3, *(col+offset+2*8)); // save y2 + paddsw_r2r (mm1, mm5); // mm5 = a0+b0 + + movq_m2r (*(col+offset+3*8), mm4); // mm4 = b3 + psubsw_r2r (mm1, mm7); // mm7 = a0-b0 + + psraw_i2r (COL_SHIFT, mm5); // mm5 = y0 + movq_r2r (mm0, mm3); // mm3 = a3 + + movq_r2m (mm2, *(col+offset+5*8)); // save y5 + psubsw_r2r (mm4, mm3); // mm3 = a3-b3 + + psraw_i2r (COL_SHIFT, mm7); // mm7 = y7 + paddsw_r2r (mm0, mm4); // mm4 = a3+b3 + + movq_r2m (mm5, *(col+offset+0*8)); // save y0 + psraw_i2r (COL_SHIFT, mm3); // mm3 = y4 + + movq_r2m (mm6, *(col+offset+6*8)); // save y6 + psraw_i2r (COL_SHIFT, mm4); // mm4 = y3 + + movq_r2m (mm7, *(col+offset+7*8)); // save y7 + + movq_r2m (mm3, *(col+offset+4*8)); // save y4 + + movq_r2m (mm4, *(col+offset+3*8)); // save y3 +} + + +static int32_t rounder0[] ATTR_ALIGN(8) = + rounder ((1 << (COL_SHIFT - 1)) - 0.5); +static int32_t rounder4[] ATTR_ALIGN(8) = rounder (0); +static int32_t rounder1[] ATTR_ALIGN(8) = + rounder (1.25683487303); /* C1*(C1/C4+C1+C7)/2 */ +static int32_t rounder7[] ATTR_ALIGN(8) = + rounder (-0.25); /* C1*(C7/C4+C7-C1)/2 */ +static int32_t rounder2[] ATTR_ALIGN(8) = + rounder (0.60355339059); /* C2 * (C6+C2)/2 */ +static int32_t rounder6[] ATTR_ALIGN(8) = + rounder (-0.25); /* C2 * (C6-C2)/2 */ +static int32_t rounder3[] ATTR_ALIGN(8) = + rounder (0.087788325588); /* C3*(-C3/C4+C3+C5)/2 */ +static int32_t rounder5[] ATTR_ALIGN(8) = + rounder (-0.441341716183); /* C3*(-C5/C4+C5-C3)/2 */ + + +#define declare_idct(idct,table,idct_row_head,idct_row,idct_row_tail,idct_row_mid) \ +static inline void idct (int16_t * block) \ +{ \ + static int16_t table04[] ATTR_ALIGN(16) = \ + table (22725, 21407, 19266, 16384, 12873, 8867, 4520); \ + static int16_t table17[] ATTR_ALIGN(16) = \ + table (31521, 29692, 26722, 22725, 17855, 12299, 6270); \ + static int16_t table26[] ATTR_ALIGN(16) = \ + table (29692, 27969, 25172, 21407, 16819, 11585, 5906); \ + static int16_t table35[] ATTR_ALIGN(16) = \ + table (26722, 25172, 22654, 19266, 15137, 10426, 5315); \ + \ + idct_row_head (block, 0*8, table04); \ + idct_row (table04, rounder0); \ + idct_row_mid (block, 0*8, 4*8, table04); \ + idct_row (table04, rounder4); \ + idct_row_mid (block, 4*8, 1*8, table17); \ + idct_row (table17, rounder1); \ + idct_row_mid (block, 1*8, 7*8, table17); \ + idct_row (table17, rounder7); \ + idct_row_mid (block, 7*8, 2*8, table26); \ + idct_row (table26, rounder2); \ + idct_row_mid (block, 2*8, 6*8, table26); \ + idct_row (table26, rounder6); \ + idct_row_mid (block, 6*8, 3*8, table35); \ + idct_row (table35, rounder3); \ + idct_row_mid (block, 3*8, 5*8, table35); \ + idct_row (table35, rounder5); \ + idct_row_tail (block, 5*8); \ + \ + idct_col (block, 0); \ + idct_col (block, 4); \ +} + + +#define COPY_MMX(offset,r0,r1,r2) \ +do { \ + movq_m2r (*(block+offset), r0); \ + dest += stride; \ + movq_m2r (*(block+offset+4), r1); \ + movq_r2m (r2, *dest); \ + packuswb_r2r (r1, r0); \ +} while (0) + +static void block_copy (int16_t * block, uint8_t * dest, int stride) +{ + movq_m2r (*(block+0*8), mm0); + movq_m2r (*(block+0*8+4), mm1); + movq_m2r (*(block+1*8), mm2); + packuswb_r2r (mm1, mm0); + movq_m2r (*(block+1*8+4), mm3); + movq_r2m (mm0, *dest); + packuswb_r2r (mm3, mm2); + COPY_MMX (2*8, mm0, mm1, mm2); + COPY_MMX (3*8, mm2, mm3, mm0); + COPY_MMX (4*8, mm0, mm1, mm2); + COPY_MMX (5*8, mm2, mm3, mm0); + COPY_MMX (6*8, mm0, mm1, mm2); + COPY_MMX (7*8, mm2, mm3, mm0); + movq_r2m (mm2, *(dest+stride)); +} + + +#define ADD_MMX(offset,r1,r2,r3,r4) \ +do { \ + movq_m2r (*(dest+2*stride), r1); \ + packuswb_r2r (r4, r3); \ + movq_r2r (r1, r2); \ + dest += stride; \ + movq_r2m (r3, *dest); \ + punpcklbw_r2r (mm0, r1); \ + paddsw_m2r (*(block+offset), r1); \ + punpckhbw_r2r (mm0, r2); \ + paddsw_m2r (*(block+offset+4), r2); \ +} while (0) + +static void block_add (int16_t * block, uint8_t * dest, int stride) +{ + movq_m2r (*dest, mm1); + pxor_r2r (mm0, mm0); + movq_m2r (*(dest+stride), mm3); + movq_r2r (mm1, mm2); + punpcklbw_r2r (mm0, mm1); + movq_r2r (mm3, mm4); + paddsw_m2r (*(block+0*8), mm1); + punpckhbw_r2r (mm0, mm2); + paddsw_m2r (*(block+0*8+4), mm2); + punpcklbw_r2r (mm0, mm3); + paddsw_m2r (*(block+1*8), mm3); + packuswb_r2r (mm2, mm1); + punpckhbw_r2r (mm0, mm4); + movq_r2m (mm1, *dest); + paddsw_m2r (*(block+1*8+4), mm4); + ADD_MMX (2*8, mm1, mm2, mm3, mm4); + ADD_MMX (3*8, mm3, mm4, mm1, mm2); + ADD_MMX (4*8, mm1, mm2, mm3, mm4); + ADD_MMX (5*8, mm3, mm4, mm1, mm2); + ADD_MMX (6*8, mm1, mm2, mm3, mm4); + ADD_MMX (7*8, mm3, mm4, mm1, mm2); + packuswb_r2r (mm4, mm3); + movq_r2m (mm3, *(dest+stride)); +} + +static inline void block_zero (int16_t * block) { + pxor_r2r (mm0, mm0); + movq_r2m (mm0, *(block+0*4)); + movq_r2m (mm0, *(block+1*4)); + movq_r2m (mm0, *(block+2*4)); + movq_r2m (mm0, *(block+3*4)); + movq_r2m (mm0, *(block+4*4)); + movq_r2m (mm0, *(block+5*4)); + movq_r2m (mm0, *(block+6*4)); + movq_r2m (mm0, *(block+7*4)); + movq_r2m (mm0, *(block+8*4)); + movq_r2m (mm0, *(block+9*4)); + movq_r2m (mm0, *(block+10*4)); + movq_r2m (mm0, *(block+11*4)); + movq_r2m (mm0, *(block+12*4)); + movq_r2m (mm0, *(block+13*4)); + movq_r2m (mm0, *(block+14*4)); + movq_r2m (mm0, *(block+15*4)); +} + +declare_idct (mmxext_idct, mmxext_table, + mmxext_row_head, mmxext_row, mmxext_row_tail, mmxext_row_mid) + +void mpeg2_idct_copy_mmxext (int16_t * block, uint8_t * dest, int stride) +{ + mmxext_idct (block); + block_copy (block, dest, stride); + block_zero (block); +} + +void mpeg2_idct_add_mmxext (int16_t * block, uint8_t * dest, int stride) +{ + mmxext_idct (block); + block_add (block, dest, stride); + block_zero (block); +} + +void mpeg2_idct_mmxext (int16_t * block) +{ + mmxext_idct (block); +} + +declare_idct (mmx_idct, mmx_table, + mmx_row_head, mmx_row, mmx_row_tail, mmx_row_mid) + +void mpeg2_idct_copy_mmx (int16_t * block, uint8_t * dest, int stride) +{ + mmx_idct (block); + block_copy (block, dest, stride); + block_zero (block); +} + +void mpeg2_idct_add_mmx (int16_t * block, uint8_t * dest, int stride) +{ + mmx_idct (block); + block_add (block, dest, stride); + block_zero (block); +} + +void mpeg2_idct_mmx (int16_t * block) +{ + mmx_idct (block); +} + +void mpeg2_zero_block_mmx (int16_t * block) +{ + block_zero (block); +} + +void mpeg2_idct_mmx_init (void) +{ + int i, j; + + /* the mmx/mmxext idct uses a reordered input, so we patch scan tables */ + + for (i = 0; i < 64; i++) { + j = mpeg2_scan_norm[i]; + mpeg2_scan_norm[i] = (j & 0x38) | ((j & 6) >> 1) | ((j & 1) << 2); + j = mpeg2_scan_alt[i]; + mpeg2_scan_alt[i] = (j & 0x38) | ((j & 6) >> 1) | ((j & 1) << 2); + } +} + +#endif diff --git a/src/video_dec/libmpeg2/libmpeg2_accel.c b/src/video_dec/libmpeg2/libmpeg2_accel.c new file mode 100644 index 000000000..92c0e280b --- /dev/null +++ b/src/video_dec/libmpeg2/libmpeg2_accel.c @@ -0,0 +1,223 @@ +/* + * libmpeg2_accel.c + * Copyright (C) 2004 The Unichrome Project. + * Copyright (C) 2005 Thomas Hellstrom. + * + * This file is part of xine, a free video player. + * + * xine is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * xine is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA + */ + +#include +#include "mpeg2.h" +#include "mpeg2_internal.h" +#include "xvmc_vld.h" +#include "libmpeg2_accel.h" + + +void +libmpeg2_accel_scan( mpeg2dec_accel_t *accel, uint8_t *scan_norm, uint8_t *scan_alt) +{ + xvmc_setup_scan_ptable(); +} + + +int +libmpeg2_accel_discontinuity(mpeg2dec_accel_t *accel, uint32_t frame_format, picture_t *picture) +{ + accel->xvmc_last_slice_code=-1; + if ( !picture->current_frame ) + return 0; + if (frame_format == XINE_IMGFMT_XXMC) { + xine_xxmc_t *xxmc = (xine_xxmc_t *) + picture->current_frame->accel_data; + switch(xxmc->acceleration) { + case XINE_XVMC_ACCEL_VLD: + case XINE_XVMC_ACCEL_IDCT: + case XINE_XVMC_ACCEL_MOCOMP: + xxmc->proc_xxmc_flush( picture->current_frame ); + break; + default: + break; + } + } + return 0; +} + +int +libmpeg2_accel_new_sequence(mpeg2dec_accel_t *accel, uint32_t frame_format, picture_t *picture) +{ + switch(frame_format) { + case XINE_IMGFMT_XXMC: + case XINE_IMGFMT_XVMC: { + xine_xvmc_t *xvmc = (xine_xvmc_t *) + picture->current_frame->accel_data; + picture->mc = xvmc->macroblocks; + return 0; + } + default: + break; + } + return 1; +} + +int +libmpeg2_accel_new_frame(mpeg2dec_accel_t *accel, uint32_t frame_format, + picture_t *picture, double ratio, uint32_t flags) +{ + if (picture->current_frame) { + if (XINE_IMGFMT_XXMC == frame_format) { + xine_xxmc_t *xxmc = (xine_xxmc_t *) + picture->current_frame->accel_data; + + /* + * Make a request for acceleration type and mpeg coding from + * the output plugin. + */ + + xxmc->fallback_format = XINE_IMGFMT_YV12; + xxmc->acceleration = XINE_XVMC_ACCEL_VLD| XINE_XVMC_ACCEL_IDCT + | XINE_XVMC_ACCEL_MOCOMP ; + + /* + * Standard MOCOMP / IDCT XvMC implementation for interlaced streams + * is buggy. The bug is inherited from the old XvMC driver. Don't use it until + * it has been fixed. (A volunteer ?) + */ + + if ( picture->picture_structure != 3 ) { + picture->top_field_first = (picture->picture_structure == 1); + xxmc->acceleration &= ~( XINE_XVMC_ACCEL_IDCT | XINE_XVMC_ACCEL_MOCOMP ); + } + + xxmc->mpeg = (picture->mpeg1) ? XINE_XVMC_MPEG_1:XINE_XVMC_MPEG_2; + xxmc->proc_xxmc_update_frame (picture->current_frame->driver, + picture->current_frame, + picture->coded_picture_width, + picture->coded_picture_height, + ratio, + XINE_IMGFMT_XXMC, flags); + } + } + return 0; +} + +void +libmpeg2_accel_frame_completion(mpeg2dec_accel_t * accel, uint32_t frame_format, picture_t *picture, + int code) +{ + + if ( !picture->current_frame ) return; + + if (frame_format == XINE_IMGFMT_XXMC) { + xine_xxmc_t *xxmc = (xine_xxmc_t *) + picture->current_frame->accel_data; + if (!xxmc->decoded) { + switch(picture->current_frame->format) { + case XINE_IMGFMT_XXMC: + switch(xxmc->acceleration) { + case XINE_XVMC_ACCEL_VLD: + mpeg2_xxmc_vld_frame_complete(accel, picture, code); + break; + case XINE_XVMC_ACCEL_IDCT: + case XINE_XVMC_ACCEL_MOCOMP: + xxmc->decoded = !picture->current_frame->bad_frame; + xxmc->proc_xxmc_flush( picture->current_frame ); + break; + default: + break; + } + default: + break; + } + } + } +} + + +int +libmpeg2_accel_slice(mpeg2dec_accel_t *accel, picture_t *picture, int code, char * buffer, + uint32_t chunk_size, uint8_t *chunk_buffer) +{ + /* + * Don't reference frames of other formats. They are invalid. This may happen if the + * xxmc plugin suddenly falls back to software decoding. + */ + + if (( picture->current_frame->picture_coding_type == XINE_PICT_P_TYPE ) || + ( picture->current_frame->picture_coding_type == XINE_PICT_B_TYPE )) { + if (! picture->forward_reference_frame) return 1; + if (picture->forward_reference_frame->format != picture->current_frame->format) { + picture->v_offset = 0; + return 1; + } + } + + if ( picture->current_frame->picture_coding_type == XINE_PICT_B_TYPE ) { + if (! picture->backward_reference_frame) return 1; + if (picture->backward_reference_frame->format != picture->current_frame->format) { + picture->v_offset = 0; + return 1; + } + } + + switch( picture->current_frame->format ) { + + case XINE_IMGFMT_XXMC: + { + xine_xxmc_t *xxmc = (xine_xxmc_t *) + picture->current_frame->accel_data; + + if ( xxmc->proc_xxmc_lock_valid( picture->current_frame, + picture->forward_reference_frame, + picture->backward_reference_frame, + picture->current_frame->picture_coding_type)) { + picture->v_offset = 0; + return 1; + } + + switch(picture->current_frame->format) { + case XINE_IMGFMT_XXMC: + switch(xxmc->acceleration) { + case XINE_XVMC_ACCEL_VLD: + mpeg2_xxmc_slice(accel, picture, code, buffer, chunk_size, chunk_buffer); + break; + case XINE_XVMC_ACCEL_IDCT: + case XINE_XVMC_ACCEL_MOCOMP: + mpeg2_xvmc_slice (accel, picture, code, buffer); + break; + default: + mpeg2_slice (picture, code, buffer); + break; + } + break; + default: + mpeg2_slice (picture, code, buffer); + break; + } + xxmc->proc_xxmc_unlock(picture->current_frame->driver); + break; + } + + case XINE_IMGFMT_XVMC: + mpeg2_xvmc_slice (accel, picture, code, buffer); + break; + + default: + mpeg2_slice (picture, code, buffer); + break; + } + return 0; +} diff --git a/src/video_dec/libmpeg2/libmpeg2_accel.h b/src/video_dec/libmpeg2/libmpeg2_accel.h new file mode 100644 index 000000000..5d0b37a78 --- /dev/null +++ b/src/video_dec/libmpeg2/libmpeg2_accel.h @@ -0,0 +1,48 @@ +/* + * libmpeg2_accel.h + * Copyright (C) 2004 The Unichrome Project. + * Copyright (C) 2005 Thomas Hellstrom. + * + * This file is part of xine, a free video player. + * + * xine is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * xine is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA + */ + +#ifndef LIBMPEG2_ACCEL_H +#define LIBMPEG2_ACCEL_H + +#include "mpeg2_internal.h" + +/* + * Internal context data type. + */ + +typedef struct { + int xvmc_last_slice_code; + int slices_per_row; + int row_slice_count; + unsigned xxmc_mb_pic_height; +} mpeg2dec_accel_t; + +extern int libmpeg2_accel_discontinuity(mpeg2dec_accel_t *accel, uint32_t frame_format, picture_t *picture); +extern int libmpeg2_accel_new_sequence(mpeg2dec_accel_t *accel, uint32_t frame_format, picture_t *picture); +extern int libmpeg2_accel_new_frame(mpeg2dec_accel_t *accel, uint32_t frame_format, picture_t *picture, double ratio, uint32_t flags); +extern void libmpeg2_accel_frame_completion(mpeg2dec_accel_t *accel, uint32_t frame_format, picture_t *picture, int code); + +extern int libmpeg2_accel_slice(mpeg2dec_accel_t *accel, picture_t *picture, int code, + char * buffer, uint32_t chunk_size, uint8_t *chunk_buffer); +extern void libmpeg2_accel_scan( mpeg2dec_accel_t *accel, uint8_t *scan_norm, uint8_t *scan_alt); + +#endif diff --git a/src/video_dec/libmpeg2/motion_comp.c b/src/video_dec/libmpeg2/motion_comp.c new file mode 100644 index 000000000..9328dfb9f --- /dev/null +++ b/src/video_dec/libmpeg2/motion_comp.c @@ -0,0 +1,154 @@ +/* + * motion_comp.c + * Copyright (C) 2000-2002 Michel Lespinasse + * Copyright (C) 1999-2000 Aaron Holtzman + * + * This file is part of mpeg2dec, a free MPEG-2 video stream decoder. + * See http://libmpeg2.sourceforge.net/ for updates. + * + * mpeg2dec is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * mpeg2dec is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +#include "config.h" + +#include +#include + +#include "mpeg2_internal.h" +#include + +mpeg2_mc_t mpeg2_mc; + +void mpeg2_mc_init (uint32_t mm_accel) +{ +#ifdef LIBMPEG2_MLIB + if (mm_accel & MM_ACCEL_MLIB) { +#ifdef LOG + fprintf (stderr, "Using mediaLib for motion compensation\n"); +#endif + mpeg2_mc = mpeg2_mc_mlib; + } +#endif + +#if defined(ARCH_X86) || defined(ARCH_X86_64) + if (mm_accel & MM_ACCEL_X86_MMXEXT) { +#ifdef LOG + fprintf (stderr, "Using MMXEXT for motion compensation\n"); +#endif + mpeg2_mc = mpeg2_mc_mmxext; + } else if (mm_accel & MM_ACCEL_X86_3DNOW) { +#ifdef LOG + fprintf (stderr, "Using 3DNOW for motion compensation\n"); +#endif + mpeg2_mc = mpeg2_mc_3dnow; + } else if (mm_accel & MM_ACCEL_X86_MMX) { +#ifdef LOG + fprintf (stderr, "Using MMX for motion compensation\n"); +#endif + mpeg2_mc = mpeg2_mc_mmx; + } else +#endif +#if defined (ARCH_PPC) && defined (ENABLE_ALTIVEC) + if (mm_accel & MM_ACCEL_PPC_ALTIVEC) { +#ifdef LOG + fprintf (stderr, "Using altivec for motion compensation\n"); +#endif + mpeg2_mc = mpeg2_mc_altivec; + } else +#endif +#ifdef ARCH_SPARC + if (mm_accel & MM_ACCEL_SPARC_VIS) { +#ifdef LOG + fprintf (stderr, "Using VIS for motion compensation\n"); +#endif + mpeg2_mc = mpeg2_mc_vis; + } else +#endif + { +#ifdef LOG + fprintf (stderr, "No accelerated motion compensation found\n"); +#endif + mpeg2_mc = mpeg2_mc_c; + } +} + +#define avg2(a,b) ((a+b+1)>>1) +#define avg4(a,b,c,d) ((a+b+c+d+2)>>2) + +#define predict_o(i) (ref[i]) +#define predict_x(i) (avg2 (ref[i], ref[i+1])) +#define predict_y(i) (avg2 (ref[i], (ref+stride)[i])) +#define predict_xy(i) (avg4 (ref[i], ref[i+1], \ + (ref+stride)[i], (ref+stride)[i+1])) + +#define put(predictor,i) dest[i] = predictor (i) +#define avg(predictor,i) dest[i] = avg2 (predictor (i), dest[i]) + +/* mc function template */ + +#define MC_FUNC(op,xy) \ +static void MC_##op##_##xy##_16_c (uint8_t * dest, uint8_t * ref, \ + int stride, int height) \ +{ \ + do { \ + op (predict_##xy, 0); \ + op (predict_##xy, 1); \ + op (predict_##xy, 2); \ + op (predict_##xy, 3); \ + op (predict_##xy, 4); \ + op (predict_##xy, 5); \ + op (predict_##xy, 6); \ + op (predict_##xy, 7); \ + op (predict_##xy, 8); \ + op (predict_##xy, 9); \ + op (predict_##xy, 10); \ + op (predict_##xy, 11); \ + op (predict_##xy, 12); \ + op (predict_##xy, 13); \ + op (predict_##xy, 14); \ + op (predict_##xy, 15); \ + ref += stride; \ + dest += stride; \ + } while (--height); \ +} \ +static void MC_##op##_##xy##_8_c (uint8_t * dest, uint8_t * ref, \ + int stride, int height) \ +{ \ + do { \ + op (predict_##xy, 0); \ + op (predict_##xy, 1); \ + op (predict_##xy, 2); \ + op (predict_##xy, 3); \ + op (predict_##xy, 4); \ + op (predict_##xy, 5); \ + op (predict_##xy, 6); \ + op (predict_##xy, 7); \ + ref += stride; \ + dest += stride; \ + } while (--height); \ +} + +/* definitions of the actual mc functions */ + +MC_FUNC (put,o) +MC_FUNC (avg,o) +MC_FUNC (put,x) +MC_FUNC (avg,x) +MC_FUNC (put,y) +MC_FUNC (avg,y) +MC_FUNC (put,xy) +MC_FUNC (avg,xy) + +MPEG2_MC_EXTERN (c) diff --git a/src/video_dec/libmpeg2/motion_comp_altivec.c b/src/video_dec/libmpeg2/motion_comp_altivec.c new file mode 100644 index 000000000..99719b7fb --- /dev/null +++ b/src/video_dec/libmpeg2/motion_comp_altivec.c @@ -0,0 +1,2031 @@ +/* + * motion_comp_altivec.c + * Copyright (C) 2000-2002 Michel Lespinasse + * Copyright (C) 1999-2000 Aaron Holtzman + * + * This file is part of mpeg2dec, a free MPEG-2 video stream decoder. + * See http://libmpeg2.sourceforge.net/ for updates. + * + * mpeg2dec is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * mpeg2dec is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +#include "config.h" + +#ifndef HOST_OS_DARWIN + +#if defined (ARCH_PPC) && defined (ENABLE_ALTIVEC) + +#include "mpeg2_internal.h" + +#include + +/* + * The asm code is generated with: + * + * gcc-2.95 -fvec -DHOST_OS_DARWIN -O9 -fomit-frame-pointer -mregnames -S + * motion_comp_altivec.c + * + * sed 's/.L/._L/g' motion_comp_altivec.s | + * awk '{args=""; len=split ($2, arg, ","); + * for (i=1; i<=len; i++) { a=arg[i]; if (i> 1) - 1; + + ref0 = vec_ld (0, ref); + ref1 = vec_ld (15, ref); + ref += stride; + tmp = vec_perm (ref0, ref1, perm); + + do { + ref0 = vec_ld (0, ref); + ref1 = vec_ld (15, ref); + ref += stride; + vec_st (tmp, 0, dest); + tmp = vec_perm (ref0, ref1, perm); + + ref0 = vec_ld (0, ref); + ref1 = vec_ld (15, ref); + ref += stride; + vec_st (tmp, stride, dest); + dest += 2*stride; + tmp = vec_perm (ref0, ref1, perm); + } while (--height); + + ref0 = vec_ld (0, ref); + ref1 = vec_ld (15, ref); + vec_st (tmp, 0, dest); + tmp = vec_perm (ref0, ref1, perm); + vec_st (tmp, stride, dest); +} + +void MC_put_o_8_altivec (unsigned char * dest, unsigned char * ref, + int stride, int height) +{ + vector_u8_t perm0, perm1, tmp0, tmp1, ref0, ref1; + + tmp0 = vec_lvsl (0, ref); + tmp0 = vec_mergeh (tmp0, tmp0); + perm0 = vec_pack ((vector_u16_t)tmp0, (vector_u16_t)tmp0); + tmp1 = vec_lvsl (stride, ref); + tmp1 = vec_mergeh (tmp1, tmp1); + perm1 = vec_pack ((vector_u16_t)tmp1, (vector_u16_t)tmp1); + + height = (height >> 1) - 1; + + ref0 = vec_ld (0, ref); + ref1 = vec_ld (7, ref); + ref += stride; + tmp0 = vec_perm (ref0, ref1, perm0); + + do { + ref0 = vec_ld (0, ref); + ref1 = vec_ld (7, ref); + ref += stride; + vec_ste ((vector_u32_t)tmp0, 0, (unsigned int *)dest); + vec_ste ((vector_u32_t)tmp0, 4, (unsigned int *)dest); + dest += stride; + tmp1 = vec_perm (ref0, ref1, perm1); + + ref0 = vec_ld (0, ref); + ref1 = vec_ld (7, ref); + ref += stride; + vec_ste ((vector_u32_t)tmp1, 0, (unsigned int *)dest); + vec_ste ((vector_u32_t)tmp1, 4, (unsigned int *)dest); + dest += stride; + tmp0 = vec_perm (ref0, ref1, perm0); + } while (--height); + + ref0 = vec_ld (0, ref); + ref1 = vec_ld (7, ref); + vec_ste ((vector_u32_t)tmp0, 0, (unsigned int *)dest); + vec_ste ((vector_u32_t)tmp0, 4, (unsigned int *)dest); + dest += stride; + tmp1 = vec_perm (ref0, ref1, perm1); + vec_ste ((vector_u32_t)tmp1, 0, (unsigned int *)dest); + vec_ste ((vector_u32_t)tmp1, 4, (unsigned int *)dest); +} + +void MC_put_x_16_altivec (unsigned char * dest, unsigned char * ref, + int stride, int height) +{ + vector_u8_t permA, permB, ref0, ref1, tmp; + + permA = vec_lvsl (0, ref); + permB = vec_add (permA, vec_splat_u8 (1)); + + height = (height >> 1) - 1; + + ref0 = vec_ld (0, ref); + ref1 = vec_ld (16, ref); + ref += stride; + tmp = vec_avg (vec_perm (ref0, ref1, permA), + vec_perm (ref0, ref1, permB)); + + do { + ref0 = vec_ld (0, ref); + ref1 = vec_ld (16, ref); + ref += stride; + vec_st (tmp, 0, dest); + tmp = vec_avg (vec_perm (ref0, ref1, permA), + vec_perm (ref0, ref1, permB)); + + ref0 = vec_ld (0, ref); + ref1 = vec_ld (16, ref); + ref += stride; + vec_st (tmp, stride, dest); + dest += 2*stride; + tmp = vec_avg (vec_perm (ref0, ref1, permA), + vec_perm (ref0, ref1, permB)); + } while (--height); + + ref0 = vec_ld (0, ref); + ref1 = vec_ld (16, ref); + vec_st (tmp, 0, dest); + tmp = vec_avg (vec_perm (ref0, ref1, permA), + vec_perm (ref0, ref1, permB)); + vec_st (tmp, stride, dest); +} + +void MC_put_x_8_altivec (unsigned char * dest, unsigned char * ref, + int stride, int height) +{ + vector_u8_t perm0A, perm0B, perm1A, perm1B, ones, tmp0, tmp1, ref0, ref1; + + ones = vec_splat_u8 (1); + tmp0 = vec_lvsl (0, ref); + tmp0 = vec_mergeh (tmp0, tmp0); + perm0A = vec_pack ((vector_u16_t)tmp0, (vector_u16_t)tmp0); + perm0B = vec_add (perm0A, ones); + tmp1 = vec_lvsl (stride, ref); + tmp1 = vec_mergeh (tmp1, tmp1); + perm1A = vec_pack ((vector_u16_t)tmp1, (vector_u16_t)tmp1); + perm1B = vec_add (perm1A, ones); + + height = (height >> 1) - 1; + + ref0 = vec_ld (0, ref); + ref1 = vec_ld (8, ref); + ref += stride; + tmp0 = vec_avg (vec_perm (ref0, ref1, perm0A), + vec_perm (ref0, ref1, perm0B)); + + do { + ref0 = vec_ld (0, ref); + ref1 = vec_ld (8, ref); + ref += stride; + vec_ste ((vector_u32_t)tmp0, 0, (unsigned int *)dest); + vec_ste ((vector_u32_t)tmp0, 4, (unsigned int *)dest); + dest += stride; + tmp1 = vec_avg (vec_perm (ref0, ref1, perm1A), + vec_perm (ref0, ref1, perm1B)); + + ref0 = vec_ld (0, ref); + ref1 = vec_ld (8, ref); + ref += stride; + vec_ste ((vector_u32_t)tmp1, 0, (unsigned int *)dest); + vec_ste ((vector_u32_t)tmp1, 4, (unsigned int *)dest); + dest += stride; + tmp0 = vec_avg (vec_perm (ref0, ref1, perm0A), + vec_perm (ref0, ref1, perm0B)); + } while (--height); + + ref0 = vec_ld (0, ref); + ref1 = vec_ld (8, ref); + vec_ste ((vector_u32_t)tmp0, 0, (unsigned int *)dest); + vec_ste ((vector_u32_t)tmp0, 4, (unsigned int *)dest); + dest += stride; + tmp1 = vec_avg (vec_perm (ref0, ref1, perm1A), + vec_perm (ref0, ref1, perm1B)); + vec_ste ((vector_u32_t)tmp1, 0, (unsigned int *)dest); + vec_ste ((vector_u32_t)tmp1, 4, (unsigned int *)dest); +} + +void MC_put_y_16_altivec (unsigned char * dest, unsigned char * ref, + int stride, int height) +{ + vector_u8_t perm, ref0, ref1, tmp0, tmp1, tmp; + + perm = vec_lvsl (0, ref); + + height = (height >> 1) - 1; + + ref0 = vec_ld (0, ref); + ref1 = vec_ld (15, ref); + ref += stride; + tmp0 = vec_perm (ref0, ref1, perm); + ref0 = vec_ld (0, ref); + ref1 = vec_ld (15, ref); + ref += stride; + tmp1 = vec_perm (ref0, ref1, perm); + tmp = vec_avg (tmp0, tmp1); + + do { + ref0 = vec_ld (0, ref); + ref1 = vec_ld (15, ref); + ref += stride; + vec_st (tmp, 0, dest); + tmp0 = vec_perm (ref0, ref1, perm); + tmp = vec_avg (tmp0, tmp1); + + ref0 = vec_ld (0, ref); + ref1 = vec_ld (15, ref); + ref += stride; + vec_st (tmp, stride, dest); + dest += 2*stride; + tmp1 = vec_perm (ref0, ref1, perm); + tmp = vec_avg (tmp0, tmp1); + } while (--height); + + ref0 = vec_ld (0, ref); + ref1 = vec_ld (15, ref); + vec_st (tmp, 0, dest); + tmp0 = vec_perm (ref0, ref1, perm); + tmp = vec_avg (tmp0, tmp1); + vec_st (tmp, stride, dest); +} + +void MC_put_y_8_altivec (unsigned char * dest, unsigned char * ref, + int stride, int height) +{ + vector_u8_t perm0, perm1, tmp0, tmp1, tmp, ref0, ref1; + + tmp0 = vec_lvsl (0, ref); + tmp0 = vec_mergeh (tmp0, tmp0); + perm0 = vec_pack ((vector_u16_t)tmp0, (vector_u16_t)tmp0); + tmp1 = vec_lvsl (stride, ref); + tmp1 = vec_mergeh (tmp1, tmp1); + perm1 = vec_pack ((vector_u16_t)tmp1, (vector_u16_t)tmp1); + + height = (height >> 1) - 1; + + ref0 = vec_ld (0, ref); + ref1 = vec_ld (7, ref); + ref += stride; + tmp0 = vec_perm (ref0, ref1, perm0); + ref0 = vec_ld (0, ref); + ref1 = vec_ld (7, ref); + ref += stride; + tmp1 = vec_perm (ref0, ref1, perm1); + tmp = vec_avg (tmp0, tmp1); + + do { + ref0 = vec_ld (0, ref); + ref1 = vec_ld (7, ref); + ref += stride; + vec_ste ((vector_u32_t)tmp, 0, (unsigned int *)dest); + vec_ste ((vector_u32_t)tmp, 4, (unsigned int *)dest); + dest += stride; + tmp0 = vec_perm (ref0, ref1, perm0); + tmp = vec_avg (tmp0, tmp1); + + ref0 = vec_ld (0, ref); + ref1 = vec_ld (7, ref); + ref += stride; + vec_ste ((vector_u32_t)tmp, 0, (unsigned int *)dest); + vec_ste ((vector_u32_t)tmp, 4, (unsigned int *)dest); + dest += stride; + tmp1 = vec_perm (ref0, ref1, perm1); + tmp = vec_avg (tmp0, tmp1); + } while (--height); + + ref0 = vec_ld (0, ref); + ref1 = vec_ld (7, ref); + vec_ste ((vector_u32_t)tmp, 0, (unsigned int *)dest); + vec_ste ((vector_u32_t)tmp, 4, (unsigned int *)dest); + dest += stride; + tmp0 = vec_perm (ref0, ref1, perm0); + tmp = vec_avg (tmp0, tmp1); + vec_ste ((vector_u32_t)tmp, 0, (unsigned int *)dest); + vec_ste ((vector_u32_t)tmp, 4, (unsigned int *)dest); +} + +void MC_put_xy_16_altivec (unsigned char * dest, unsigned char * ref, + int stride, int height) +{ + vector_u8_t permA, permB, ref0, ref1, A, B, avg0, avg1, xor0, xor1, tmp; + vector_u8_t ones; + + ones = vec_splat_u8 (1); + permA = vec_lvsl (0, ref); + permB = vec_add (permA, ones); + + height = (height >> 1) - 1; + + ref0 = vec_ld (0, ref); + ref1 = vec_ld (16, ref); + ref += stride; + A = vec_perm (ref0, ref1, permA); + B = vec_perm (ref0, ref1, permB); + avg0 = vec_avg (A, B); + xor0 = vec_xor (A, B); + + ref0 = vec_ld (0, ref); + ref1 = vec_ld (16, ref); + ref += stride; + A = vec_perm (ref0, ref1, permA); + B = vec_perm (ref0, ref1, permB); + avg1 = vec_avg (A, B); + xor1 = vec_xor (A, B); + tmp = vec_sub (vec_avg (avg0, avg1), + vec_and (vec_and (ones, vec_or (xor0, xor1)), + vec_xor (avg0, avg1))); + + + do { + ref0 = vec_ld (0, ref); + ref1 = vec_ld (16, ref); + ref += stride; + vec_st (tmp, 0, dest); + A = vec_perm (ref0, ref1, permA); + B = vec_perm (ref0, ref1, permB); + avg0 = vec_avg (A, B); + xor0 = vec_xor (A, B); + tmp = vec_sub (vec_avg (avg0, avg1), + vec_and (vec_and (ones, vec_or (xor0, xor1)), + vec_xor (avg0, avg1))); + + ref0 = vec_ld (0, ref); + ref1 = vec_ld (16, ref); + ref += stride; + vec_st (tmp, stride, dest); + dest += 2*stride; + A = vec_perm (ref0, ref1, permA); + B = vec_perm (ref0, ref1, permB); + avg1 = vec_avg (A, B); + xor1 = vec_xor (A, B); + tmp = vec_sub (vec_avg (avg0, avg1), + vec_and (vec_and (ones, vec_or (xor0, xor1)), + vec_xor (avg0, avg1))); + } while (--height); + + ref0 = vec_ld (0, ref); + ref1 = vec_ld (16, ref); + vec_st (tmp, 0, dest); + A = vec_perm (ref0, ref1, permA); + B = vec_perm (ref0, ref1, permB); + avg0 = vec_avg (A, B); + xor0 = vec_xor (A, B); + tmp = vec_sub (vec_avg (avg0, avg1), + vec_and (vec_and (ones, vec_or (xor0, xor1)), + vec_xor (avg0, avg1))); + vec_st (tmp, stride, dest); +} + +void MC_put_xy_8_altivec (unsigned char * dest, unsigned char * ref, + int stride, int height) +{ + vector_u8_t perm0A, perm0B, perm1A, perm1B, ref0, ref1, A, B; + vector_u8_t avg0, avg1, xor0, xor1, tmp, ones; + + ones = vec_splat_u8 (1); + perm0A = vec_lvsl (0, ref); + perm0A = vec_mergeh (perm0A, perm0A); + perm0A = vec_pack ((vector_u16_t)perm0A, (vector_u16_t)perm0A); + perm0B = vec_add (perm0A, ones); + perm1A = vec_lvsl (stride, ref); + perm1A = vec_mergeh (perm1A, perm1A); + perm1A = vec_pack ((vector_u16_t)perm1A, (vector_u16_t)perm1A); + perm1B = vec_add (perm1A, ones); + + height = (height >> 1) - 1; + + ref0 = vec_ld (0, ref); + ref1 = vec_ld (16, ref); + ref += stride; + A = vec_perm (ref0, ref1, perm0A); + B = vec_perm (ref0, ref1, perm0B); + avg0 = vec_avg (A, B); + xor0 = vec_xor (A, B); + + ref0 = vec_ld (0, ref); + ref1 = vec_ld (16, ref); + ref += stride; + A = vec_perm (ref0, ref1, perm1A); + B = vec_perm (ref0, ref1, perm1B); + avg1 = vec_avg (A, B); + xor1 = vec_xor (A, B); + tmp = vec_sub (vec_avg (avg0, avg1), + vec_and (vec_and (ones, vec_or (xor0, xor1)), + vec_xor (avg0, avg1))); + + + do { + ref0 = vec_ld (0, ref); + ref1 = vec_ld (16, ref); + ref += stride; + vec_ste ((vector_u32_t)tmp, 0, (unsigned int *)dest); + vec_ste ((vector_u32_t)tmp, 4, (unsigned int *)dest); + dest += stride; + A = vec_perm (ref0, ref1, perm0A); + B = vec_perm (ref0, ref1, perm0B); + avg0 = vec_avg (A, B); + xor0 = vec_xor (A, B); + tmp = vec_sub (vec_avg (avg0, avg1), + vec_and (vec_and (ones, vec_or (xor0, xor1)), + vec_xor (avg0, avg1))); + + ref0 = vec_ld (0, ref); + ref1 = vec_ld (16, ref); + ref += stride; + vec_ste ((vector_u32_t)tmp, 0, (unsigned int *)dest); + vec_ste ((vector_u32_t)tmp, 4, (unsigned int *)dest); + dest += stride; + A = vec_perm (ref0, ref1, perm1A); + B = vec_perm (ref0, ref1, perm1B); + avg1 = vec_avg (A, B); + xor1 = vec_xor (A, B); + tmp = vec_sub (vec_avg (avg0, avg1), + vec_and (vec_and (ones, vec_or (xor0, xor1)), + vec_xor (avg0, avg1))); + } while (--height); + + ref0 = vec_ld (0, ref); + ref1 = vec_ld (16, ref); + vec_ste ((vector_u32_t)tmp, 0, (unsigned int *)dest); + vec_ste ((vector_u32_t)tmp, 4, (unsigned int *)dest); + dest += stride; + A = vec_perm (ref0, ref1, perm0A); + B = vec_perm (ref0, ref1, perm0B); + avg0 = vec_avg (A, B); + xor0 = vec_xor (A, B); + tmp = vec_sub (vec_avg (avg0, avg1), + vec_and (vec_and (ones, vec_or (xor0, xor1)), + vec_xor (avg0, avg1))); + vec_ste ((vector_u32_t)tmp, 0, (unsigned int *)dest); + vec_ste ((vector_u32_t)tmp, 4, (unsigned int *)dest); +} + +#if 0 +void MC_put_xy_8_altivec (unsigned char * dest, unsigned char * ref, + int stride, int height) +{ + vector_u8_t permA, permB, ref0, ref1, A, B, C, D, tmp, zero, ones; + vector_u16_t splat2, temp; + + ones = vec_splat_u8 (1); + permA = vec_lvsl (0, ref); + permB = vec_add (permA, ones); + + zero = vec_splat_u8 (0); + splat2 = vec_splat_u16 (2); + + do { + ref0 = vec_ld (0, ref); + ref1 = vec_ld (16, ref); + ref += stride; + A = vec_perm (ref0, ref1, permA); + B = vec_perm (ref0, ref1, permB); + ref0 = vec_ld (0, ref); + ref1 = vec_ld (16, ref); + C = vec_perm (ref0, ref1, permA); + D = vec_perm (ref0, ref1, permB); + + temp = vec_add (vec_add ((vector_u16_t)vec_mergeh (zero, A), + (vector_u16_t)vec_mergeh (zero, B)), + vec_add ((vector_u16_t)vec_mergeh (zero, C), + (vector_u16_t)vec_mergeh (zero, D))); + temp = vec_sr (vec_add (temp, splat2), splat2); + tmp = vec_pack (temp, temp); + + vec_st (tmp, 0, dest); + dest += stride; + tmp = vec_avg (vec_perm (ref0, ref1, permA), + vec_perm (ref0, ref1, permB)); + } while (--height); +} +#endif + +void MC_avg_o_16_altivec (unsigned char * dest, unsigned char * ref, + int stride, int height) +{ + vector_u8_t perm, ref0, ref1, tmp, prev; + + perm = vec_lvsl (0, ref); + + height = (height >> 1) - 1; + + ref0 = vec_ld (0, ref); + ref1 = vec_ld (15, ref); + ref += stride; + prev = vec_ld (0, dest); + tmp = vec_avg (prev, vec_perm (ref0, ref1, perm)); + + do { + ref0 = vec_ld (0, ref); + ref1 = vec_ld (15, ref); + ref += stride; + prev = vec_ld (stride, dest); + vec_st (tmp, 0, dest); + tmp = vec_avg (prev, vec_perm (ref0, ref1, perm)); + + ref0 = vec_ld (0, ref); + ref1 = vec_ld (15, ref); + ref += stride; + prev = vec_ld (2*stride, dest); + vec_st (tmp, stride, dest); + dest += 2*stride; + tmp = vec_avg (prev, vec_perm (ref0, ref1, perm)); + } while (--height); + + ref0 = vec_ld (0, ref); + ref1 = vec_ld (15, ref); + prev = vec_ld (stride, dest); + vec_st (tmp, 0, dest); + tmp = vec_avg (prev, vec_perm (ref0, ref1, perm)); + vec_st (tmp, stride, dest); +} + +void MC_avg_o_8_altivec (unsigned char * dest, unsigned char * ref, + int stride, int height) +{ + vector_u8_t perm0, perm1, tmp0, tmp1, ref0, ref1, prev; + + tmp0 = vec_lvsl (0, ref); + tmp0 = vec_mergeh (tmp0, tmp0); + perm0 = vec_pack ((vector_u16_t)tmp0, (vector_u16_t)tmp0); + tmp1 = vec_lvsl (stride, ref); + tmp1 = vec_mergeh (tmp1, tmp1); + perm1 = vec_pack ((vector_u16_t)tmp1, (vector_u16_t)tmp1); + + height = (height >> 1) - 1; + + ref0 = vec_ld (0, ref); + ref1 = vec_ld (7, ref); + ref += stride; + prev = vec_ld (0, dest); + tmp0 = vec_avg (prev, vec_perm (ref0, ref1, perm0)); + + do { + ref0 = vec_ld (0, ref); + ref1 = vec_ld (7, ref); + ref += stride; + prev = vec_ld (stride, dest); + vec_ste ((vector_u32_t)tmp0, 0, (unsigned int *)dest); + vec_ste ((vector_u32_t)tmp0, 4, (unsigned int *)dest); + dest += stride; + tmp1 = vec_avg (prev, vec_perm (ref0, ref1, perm1)); + + ref0 = vec_ld (0, ref); + ref1 = vec_ld (7, ref); + ref += stride; + prev = vec_ld (stride, dest); + vec_ste ((vector_u32_t)tmp1, 0, (unsigned int *)dest); + vec_ste ((vector_u32_t)tmp1, 4, (unsigned int *)dest); + dest += stride; + tmp0 = vec_avg (prev, vec_perm (ref0, ref1, perm0)); + } while (--height); + + ref0 = vec_ld (0, ref); + ref1 = vec_ld (7, ref); + prev = vec_ld (stride, dest); + vec_ste ((vector_u32_t)tmp0, 0, (unsigned int *)dest); + vec_ste ((vector_u32_t)tmp0, 4, (unsigned int *)dest); + dest += stride; + tmp1 = vec_avg (prev, vec_perm (ref0, ref1, perm1)); + vec_ste ((vector_u32_t)tmp1, 0, (unsigned int *)dest); + vec_ste ((vector_u32_t)tmp1, 4, (unsigned int *)dest); +} + +void MC_avg_x_16_altivec (unsigned char * dest, unsigned char * ref, + int stride, int height) +{ + vector_u8_t permA, permB, ref0, ref1, tmp, prev; + + permA = vec_lvsl (0, ref); + permB = vec_add (permA, vec_splat_u8 (1)); + + height = (height >> 1) - 1; + + ref0 = vec_ld (0, ref); + ref1 = vec_ld (16, ref); + prev = vec_ld (0, dest); + ref += stride; + tmp = vec_avg (prev, vec_avg (vec_perm (ref0, ref1, permA), + vec_perm (ref0, ref1, permB))); + + do { + ref0 = vec_ld (0, ref); + ref1 = vec_ld (16, ref); + ref += stride; + prev = vec_ld (stride, dest); + vec_st (tmp, 0, dest); + tmp = vec_avg (prev, vec_avg (vec_perm (ref0, ref1, permA), + vec_perm (ref0, ref1, permB))); + + ref0 = vec_ld (0, ref); + ref1 = vec_ld (16, ref); + ref += stride; + prev = vec_ld (2*stride, dest); + vec_st (tmp, stride, dest); + dest += 2*stride; + tmp = vec_avg (prev, vec_avg (vec_perm (ref0, ref1, permA), + vec_perm (ref0, ref1, permB))); + } while (--height); + + ref0 = vec_ld (0, ref); + ref1 = vec_ld (16, ref); + prev = vec_ld (stride, dest); + vec_st (tmp, 0, dest); + tmp = vec_avg (prev, vec_avg (vec_perm (ref0, ref1, permA), + vec_perm (ref0, ref1, permB))); + vec_st (tmp, stride, dest); +} + +void MC_avg_x_8_altivec (unsigned char * dest, unsigned char * ref, + int stride, int height) +{ + vector_u8_t perm0A, perm0B, perm1A, perm1B, ones, tmp0, tmp1, ref0, ref1; + vector_u8_t prev; + + ones = vec_splat_u8 (1); + tmp0 = vec_lvsl (0, ref); + tmp0 = vec_mergeh (tmp0, tmp0); + perm0A = vec_pack ((vector_u16_t)tmp0, (vector_u16_t)tmp0); + perm0B = vec_add (perm0A, ones); + tmp1 = vec_lvsl (stride, ref); + tmp1 = vec_mergeh (tmp1, tmp1); + perm1A = vec_pack ((vector_u16_t)tmp1, (vector_u16_t)tmp1); + perm1B = vec_add (perm1A, ones); + + height = (height >> 1) - 1; + + ref0 = vec_ld (0, ref); + ref1 = vec_ld (8, ref); + prev = vec_ld (0, dest); + ref += stride; + tmp0 = vec_avg (prev, vec_avg (vec_perm (ref0, ref1, perm0A), + vec_perm (ref0, ref1, perm0B))); + + do { + ref0 = vec_ld (0, ref); + ref1 = vec_ld (8, ref); + ref += stride; + prev = vec_ld (stride, dest); + vec_ste ((vector_u32_t)tmp0, 0, (unsigned int *)dest); + vec_ste ((vector_u32_t)tmp0, 4, (unsigned int *)dest); + dest += stride; + tmp1 = vec_avg (prev, vec_avg (vec_perm (ref0, ref1, perm1A), + vec_perm (ref0, ref1, perm1B))); + + ref0 = vec_ld (0, ref); + ref1 = vec_ld (8, ref); + ref += stride; + prev = vec_ld (stride, dest); + vec_ste ((vector_u32_t)tmp1, 0, (unsigned int *)dest); + vec_ste ((vector_u32_t)tmp1, 4, (unsigned int *)dest); + dest += stride; + tmp0 = vec_avg (prev, vec_avg (vec_perm (ref0, ref1, perm0A), + vec_perm (ref0, ref1, perm0B))); + } while (--height); + + ref0 = vec_ld (0, ref); + ref1 = vec_ld (8, ref); + prev = vec_ld (stride, dest); + vec_ste ((vector_u32_t)tmp0, 0, (unsigned int *)dest); + vec_ste ((vector_u32_t)tmp0, 4, (unsigned int *)dest); + dest += stride; + tmp1 = vec_avg (prev, vec_avg (vec_perm (ref0, ref1, perm1A), + vec_perm (ref0, ref1, perm1B))); + vec_ste ((vector_u32_t)tmp1, 0, (unsigned int *)dest); + vec_ste ((vector_u32_t)tmp1, 4, (unsigned int *)dest); +} + +void MC_avg_y_16_altivec (unsigned char * dest, unsigned char * ref, + int stride, int height) +{ + vector_u8_t perm, ref0, ref1, tmp0, tmp1, tmp, prev; + + perm = vec_lvsl (0, ref); + + height = (height >> 1) - 1; + + ref0 = vec_ld (0, ref); + ref1 = vec_ld (15, ref); + ref += stride; + tmp0 = vec_perm (ref0, ref1, perm); + ref0 = vec_ld (0, ref); + ref1 = vec_ld (15, ref); + ref += stride; + prev = vec_ld (0, dest); + tmp1 = vec_perm (ref0, ref1, perm); + tmp = vec_avg (prev, vec_avg (tmp0, tmp1)); + + do { + ref0 = vec_ld (0, ref); + ref1 = vec_ld (15, ref); + ref += stride; + prev = vec_ld (stride, dest); + vec_st (tmp, 0, dest); + tmp0 = vec_perm (ref0, ref1, perm); + tmp = vec_avg (prev, vec_avg (tmp0, tmp1)); + + ref0 = vec_ld (0, ref); + ref1 = vec_ld (15, ref); + ref += stride; + prev = vec_ld (2*stride, dest); + vec_st (tmp, stride, dest); + dest += 2*stride; + tmp1 = vec_perm (ref0, ref1, perm); + tmp = vec_avg (prev, vec_avg (tmp0, tmp1)); + } while (--height); + + ref0 = vec_ld (0, ref); + ref1 = vec_ld (15, ref); + prev = vec_ld (stride, dest); + vec_st (tmp, 0, dest); + tmp0 = vec_perm (ref0, ref1, perm); + tmp = vec_avg (prev, vec_avg (tmp0, tmp1)); + vec_st (tmp, stride, dest); +} + +void MC_avg_y_8_altivec (unsigned char * dest, unsigned char * ref, + int stride, int height) +{ + vector_u8_t perm0, perm1, tmp0, tmp1, tmp, ref0, ref1, prev; + + tmp0 = vec_lvsl (0, ref); + tmp0 = vec_mergeh (tmp0, tmp0); + perm0 = vec_pack ((vector_u16_t)tmp0, (vector_u16_t)tmp0); + tmp1 = vec_lvsl (stride, ref); + tmp1 = vec_mergeh (tmp1, tmp1); + perm1 = vec_pack ((vector_u16_t)tmp1, (vector_u16_t)tmp1); + + height = (height >> 1) - 1; + + ref0 = vec_ld (0, ref); + ref1 = vec_ld (7, ref); + ref += stride; + tmp0 = vec_perm (ref0, ref1, perm0); + ref0 = vec_ld (0, ref); + ref1 = vec_ld (7, ref); + ref += stride; + prev = vec_ld (0, dest); + tmp1 = vec_perm (ref0, ref1, perm1); + tmp = vec_avg (prev, vec_avg (tmp0, tmp1)); + + do { + ref0 = vec_ld (0, ref); + ref1 = vec_ld (7, ref); + ref += stride; + prev = vec_ld (stride, dest); + vec_ste ((vector_u32_t)tmp, 0, (unsigned int *)dest); + vec_ste ((vector_u32_t)tmp, 4, (unsigned int *)dest); + dest += stride; + tmp0 = vec_perm (ref0, ref1, perm0); + tmp = vec_avg (prev, vec_avg (tmp0, tmp1)); + + ref0 = vec_ld (0, ref); + ref1 = vec_ld (7, ref); + ref += stride; + prev = vec_ld (stride, dest); + vec_ste ((vector_u32_t)tmp, 0, (unsigned int *)dest); + vec_ste ((vector_u32_t)tmp, 4, (unsigned int *)dest); + dest += stride; + tmp1 = vec_perm (ref0, ref1, perm1); + tmp = vec_avg (prev, vec_avg (tmp0, tmp1)); + } while (--height); + + ref0 = vec_ld (0, ref); + ref1 = vec_ld (7, ref); + prev = vec_ld (stride, dest); + vec_ste ((vector_u32_t)tmp, 0, (unsigned int *)dest); + vec_ste ((vector_u32_t)tmp, 4, (unsigned int *)dest); + dest += stride; + tmp0 = vec_perm (ref0, ref1, perm0); + tmp = vec_avg (prev, vec_avg (tmp0, tmp1)); + vec_ste ((vector_u32_t)tmp, 0, (unsigned int *)dest); + vec_ste ((vector_u32_t)tmp, 4, (unsigned int *)dest); +} + +void MC_avg_xy_16_altivec (unsigned char * dest, unsigned char * ref, + int stride, int height) +{ + vector_u8_t permA, permB, ref0, ref1, A, B, avg0, avg1, xor0, xor1, tmp; + vector_u8_t ones, prev; + + ones = vec_splat_u8 (1); + permA = vec_lvsl (0, ref); + permB = vec_add (permA, ones); + + height = (height >> 1) - 1; + + ref0 = vec_ld (0, ref); + ref1 = vec_ld (16, ref); + ref += stride; + A = vec_perm (ref0, ref1, permA); + B = vec_perm (ref0, ref1, permB); + avg0 = vec_avg (A, B); + xor0 = vec_xor (A, B); + + ref0 = vec_ld (0, ref); + ref1 = vec_ld (16, ref); + ref += stride; + prev = vec_ld (0, dest); + A = vec_perm (ref0, ref1, permA); + B = vec_perm (ref0, ref1, permB); + avg1 = vec_avg (A, B); + xor1 = vec_xor (A, B); + tmp = vec_avg (prev, vec_sub (vec_avg (avg0, avg1), + vec_and (vec_and (ones, vec_or (xor0, xor1)), + vec_xor (avg0, avg1)))); + + + do { + ref0 = vec_ld (0, ref); + ref1 = vec_ld (16, ref); + ref += stride; + prev = vec_ld (stride, dest); + vec_st (tmp, 0, dest); + A = vec_perm (ref0, ref1, permA); + B = vec_perm (ref0, ref1, permB); + avg0 = vec_avg (A, B); + xor0 = vec_xor (A, B); + tmp = vec_avg (prev, + vec_sub (vec_avg (avg0, avg1), + vec_and (vec_and (ones, vec_or (xor0, xor1)), + vec_xor (avg0, avg1)))); + + ref0 = vec_ld (0, ref); + ref1 = vec_ld (16, ref); + ref += stride; + prev = vec_ld (2*stride, dest); + vec_st (tmp, stride, dest); + dest += 2*stride; + A = vec_perm (ref0, ref1, permA); + B = vec_perm (ref0, ref1, permB); + avg1 = vec_avg (A, B); + xor1 = vec_xor (A, B); + tmp = vec_avg (prev, + vec_sub (vec_avg (avg0, avg1), + vec_and (vec_and (ones, vec_or (xor0, xor1)), + vec_xor (avg0, avg1)))); + } while (--height); + + ref0 = vec_ld (0, ref); + ref1 = vec_ld (16, ref); + prev = vec_ld (stride, dest); + vec_st (tmp, 0, dest); + A = vec_perm (ref0, ref1, permA); + B = vec_perm (ref0, ref1, permB); + avg0 = vec_avg (A, B); + xor0 = vec_xor (A, B); + tmp = vec_avg (prev, vec_sub (vec_avg (avg0, avg1), + vec_and (vec_and (ones, vec_or (xor0, xor1)), + vec_xor (avg0, avg1)))); + vec_st (tmp, stride, dest); +} + +void MC_avg_xy_8_altivec (unsigned char * dest, unsigned char * ref, + int stride, int height) +{ + vector_u8_t perm0A, perm0B, perm1A, perm1B, ref0, ref1, A, B; + vector_u8_t avg0, avg1, xor0, xor1, tmp, ones, prev; + + ones = vec_splat_u8 (1); + perm0A = vec_lvsl (0, ref); + perm0A = vec_mergeh (perm0A, perm0A); + perm0A = vec_pack ((vector_u16_t)perm0A, (vector_u16_t)perm0A); + perm0B = vec_add (perm0A, ones); + perm1A = vec_lvsl (stride, ref); + perm1A = vec_mergeh (perm1A, perm1A); + perm1A = vec_pack ((vector_u16_t)perm1A, (vector_u16_t)perm1A); + perm1B = vec_add (perm1A, ones); + + height = (height >> 1) - 1; + + ref0 = vec_ld (0, ref); + ref1 = vec_ld (16, ref); + ref += stride; + A = vec_perm (ref0, ref1, perm0A); + B = vec_perm (ref0, ref1, perm0B); + avg0 = vec_avg (A, B); + xor0 = vec_xor (A, B); + + ref0 = vec_ld (0, ref); + ref1 = vec_ld (16, ref); + ref += stride; + prev = vec_ld (0, dest); + A = vec_perm (ref0, ref1, perm1A); + B = vec_perm (ref0, ref1, perm1B); + avg1 = vec_avg (A, B); + xor1 = vec_xor (A, B); + tmp = vec_avg (prev, vec_sub (vec_avg (avg0, avg1), + vec_and (vec_and (ones, vec_or (xor0, xor1)), + vec_xor (avg0, avg1)))); + + + do { + ref0 = vec_ld (0, ref); + ref1 = vec_ld (16, ref); + ref += stride; + prev = vec_ld (stride, dest); + vec_ste ((vector_u32_t)tmp, 0, (unsigned int *)dest); + vec_ste ((vector_u32_t)tmp, 4, (unsigned int *)dest); + dest += stride; + A = vec_perm (ref0, ref1, perm0A); + B = vec_perm (ref0, ref1, perm0B); + avg0 = vec_avg (A, B); + xor0 = vec_xor (A, B); + tmp = vec_avg (prev, + vec_sub (vec_avg (avg0, avg1), + vec_and (vec_and (ones, vec_or (xor0, xor1)), + vec_xor (avg0, avg1)))); + + ref0 = vec_ld (0, ref); + ref1 = vec_ld (16, ref); + ref += stride; + prev = vec_ld (stride, dest); + vec_ste ((vector_u32_t)tmp, 0, (unsigned int *)dest); + vec_ste ((vector_u32_t)tmp, 4, (unsigned int *)dest); + dest += stride; + A = vec_perm (ref0, ref1, perm1A); + B = vec_perm (ref0, ref1, perm1B); + avg1 = vec_avg (A, B); + xor1 = vec_xor (A, B); + tmp = vec_avg (prev, + vec_sub (vec_avg (avg0, avg1), + vec_and (vec_and (ones, vec_or (xor0, xor1)), + vec_xor (avg0, avg1)))); + } while (--height); + + ref0 = vec_ld (0, ref); + ref1 = vec_ld (16, ref); + prev = vec_ld (stride, dest); + vec_ste ((vector_u32_t)tmp, 0, (unsigned int *)dest); + vec_ste ((vector_u32_t)tmp, 4, (unsigned int *)dest); + dest += stride; + A = vec_perm (ref0, ref1, perm0A); + B = vec_perm (ref0, ref1, perm0B); + avg0 = vec_avg (A, B); + xor0 = vec_xor (A, B); + tmp = vec_avg (prev, vec_sub (vec_avg (avg0, avg1), + vec_and (vec_and (ones, vec_or (xor0, xor1)), + vec_xor (avg0, avg1)))); + vec_ste ((vector_u32_t)tmp, 0, (unsigned int *)dest); + vec_ste ((vector_u32_t)tmp, 4, (unsigned int *)dest); +} + +MPEG2_MC_EXTERN (altivec) + +#endif /* ENABLE_ALTIVEC */ + +#endif /* HOST_OS_DARWIN */ + diff --git a/src/video_dec/libmpeg2/motion_comp_mlib.c b/src/video_dec/libmpeg2/motion_comp_mlib.c new file mode 100644 index 000000000..1a37070ae --- /dev/null +++ b/src/video_dec/libmpeg2/motion_comp_mlib.c @@ -0,0 +1,181 @@ +/* + * motion_comp_mlib.c + * Copyright (C) 2000-2002 HÃ¥kan Hjort + * + * This file is part of mpeg2dec, a free MPEG-2 video stream decoder. + * See http://libmpeg2.sourceforge.net/ for updates. + * + * mpeg2dec is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * mpeg2dec is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +#include "config.h" + +#ifdef LIBMPEG2_MLIB + +#include +#include +#include +#include +#include + +#include "mpeg2_internal.h" + +static void MC_put_o_16_mlib (uint8_t * dest, uint8_t * ref, + int stride, int height) +{ + if (height == 16) + mlib_VideoCopyRef_U8_U8_16x16 (dest, ref, stride); + else + mlib_VideoCopyRef_U8_U8_16x8 (dest, ref, stride); +} + +static void MC_put_x_16_mlib (uint8_t * dest, uint8_t * ref, + int stride, int height) +{ + if (height == 16) + mlib_VideoInterpX_U8_U8_16x16 (dest, ref, stride, stride); + else + mlib_VideoInterpX_U8_U8_16x8 (dest, ref, stride, stride); +} + +static void MC_put_y_16_mlib (uint8_t * dest, uint8_t * ref, + int stride, int height) +{ + if (height == 16) + mlib_VideoInterpY_U8_U8_16x16 (dest, ref, stride, stride); + else + mlib_VideoInterpY_U8_U8_16x8 (dest, ref, stride, stride); +} + +static void MC_put_xy_16_mlib (uint8_t * dest, uint8_t * ref, + int stride, int height) +{ + if (height == 16) + mlib_VideoInterpXY_U8_U8_16x16 (dest, ref, stride, stride); + else + mlib_VideoInterpXY_U8_U8_16x8 (dest, ref, stride, stride); +} + +static void MC_put_o_8_mlib (uint8_t * dest, uint8_t * ref, + int stride, int height) +{ + if (height == 8) + mlib_VideoCopyRef_U8_U8_8x8 (dest, ref, stride); + else + mlib_VideoCopyRef_U8_U8_8x4 (dest, ref, stride); +} + +static void MC_put_x_8_mlib (uint8_t * dest, uint8_t * ref, + int stride, int height) +{ + if (height == 8) + mlib_VideoInterpX_U8_U8_8x8 (dest, ref, stride, stride); + else + mlib_VideoInterpX_U8_U8_8x4 (dest, ref, stride, stride); +} + +static void MC_put_y_8_mlib (uint8_t * dest, uint8_t * ref, + int stride, int height) +{ + if (height == 8) + mlib_VideoInterpY_U8_U8_8x8 (dest, ref, stride, stride); + else + mlib_VideoInterpY_U8_U8_8x4 (dest, ref, stride, stride); +} + +static void MC_put_xy_8_mlib (uint8_t * dest, uint8_t * ref, + int stride, int height) +{ + if (height == 8) + mlib_VideoInterpXY_U8_U8_8x8 (dest, ref, stride, stride); + else + mlib_VideoInterpXY_U8_U8_8x4 (dest, ref, stride, stride); +} + +static void MC_avg_o_16_mlib (uint8_t * dest, uint8_t * ref, + int stride, int height) +{ + if (height == 16) + mlib_VideoCopyRefAve_U8_U8_16x16 (dest, ref, stride); + else + mlib_VideoCopyRefAve_U8_U8_16x8 (dest, ref, stride); +} + +static void MC_avg_x_16_mlib (uint8_t * dest, uint8_t * ref, + int stride, int height) +{ + if (height == 16) + mlib_VideoInterpAveX_U8_U8_16x16 (dest, ref, stride, stride); + else + mlib_VideoInterpAveX_U8_U8_16x8 (dest, ref, stride, stride); +} + +static void MC_avg_y_16_mlib (uint8_t * dest, uint8_t * ref, + int stride, int height) +{ + if (height == 16) + mlib_VideoInterpAveY_U8_U8_16x16 (dest, ref, stride, stride); + else + mlib_VideoInterpAveY_U8_U8_16x8 (dest, ref, stride, stride); +} + +static void MC_avg_xy_16_mlib (uint8_t * dest, uint8_t * ref, + int stride, int height) +{ + if (height == 16) + mlib_VideoInterpAveXY_U8_U8_16x16 (dest, ref, stride, stride); + else + mlib_VideoInterpAveXY_U8_U8_16x8 (dest, ref, stride, stride); +} + +static void MC_avg_o_8_mlib (uint8_t * dest, uint8_t * ref, + int stride, int height) +{ + if (height == 8) + mlib_VideoCopyRefAve_U8_U8_8x8 (dest, ref, stride); + else + mlib_VideoCopyRefAve_U8_U8_8x4 (dest, ref, stride); +} + +static void MC_avg_x_8_mlib (uint8_t * dest, uint8_t * ref, + int stride, int height) +{ + if (height == 8) + mlib_VideoInterpAveX_U8_U8_8x8 (dest, ref, stride, stride); + else + mlib_VideoInterpAveX_U8_U8_8x4 (dest, ref, stride, stride); +} + +static void MC_avg_y_8_mlib (uint8_t * dest, uint8_t * ref, + int stride, int height) +{ + if (height == 8) + mlib_VideoInterpAveY_U8_U8_8x8 (dest, ref, stride, stride); + else + mlib_VideoInterpAveY_U8_U8_8x4 (dest, ref, stride, stride); +} + +static void MC_avg_xy_8_mlib (uint8_t * dest, uint8_t * ref, + int stride, int height) +{ + if (height == 8) + mlib_VideoInterpAveXY_U8_U8_8x8 (dest, ref, stride, stride); + else + mlib_VideoInterpAveXY_U8_U8_8x4 (dest, ref, stride, stride); +} + +MPEG2_MC_EXTERN (mlib) + +#endif diff --git a/src/video_dec/libmpeg2/motion_comp_mmx.c b/src/video_dec/libmpeg2/motion_comp_mmx.c new file mode 100644 index 000000000..f9b1f085d --- /dev/null +++ b/src/video_dec/libmpeg2/motion_comp_mmx.c @@ -0,0 +1,1012 @@ +/* + * motion_comp_mmx.c + * Copyright (C) 2000-2002 Michel Lespinasse + * Copyright (C) 1999-2000 Aaron Holtzman + * + * This file is part of mpeg2dec, a free MPEG-2 video stream decoder. + * See http://libmpeg2.sourceforge.net/ for updates. + * + * mpeg2dec is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * mpeg2dec is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +#include "config.h" + +#if defined(ARCH_X86) || defined(ARCH_X86_64) + +#include + +#include "mpeg2_internal.h" +#include + +#define CPU_MMXEXT 0 +#define CPU_3DNOW 1 + + +/* MMX code - needs a rewrite */ + +/* some rounding constants */ +static mmx_t round1 = {0x0001000100010001LL}; +static mmx_t round4 = {0x0002000200020002LL}; + +/* + * This code should probably be compiled with loop unrolling + * (ie, -funroll-loops in gcc)becuase some of the loops + * use a small static number of iterations. This was written + * with the assumption the compiler knows best about when + * unrolling will help + */ + +static inline void mmx_zero_reg () +{ + /* load 0 into mm0 */ + pxor_r2r (mm0, mm0); +} + +static inline void mmx_average_2_U8 (uint8_t * dest, + uint8_t * src1, uint8_t * src2) +{ + /* *dest = (*src1 + *src2 + 1)/ 2; */ + + movq_m2r (*src1, mm1); // load 8 src1 bytes + movq_r2r (mm1, mm2); // copy 8 src1 bytes + + movq_m2r (*src2, mm3); // load 8 src2 bytes + movq_r2r (mm3, mm4); // copy 8 src2 bytes + + punpcklbw_r2r (mm0, mm1); // unpack low src1 bytes + punpckhbw_r2r (mm0, mm2); // unpack high src1 bytes + + punpcklbw_r2r (mm0, mm3); // unpack low src2 bytes + punpckhbw_r2r (mm0, mm4); // unpack high src2 bytes + + paddw_r2r (mm3, mm1); // add lows to mm1 + paddw_m2r (round1, mm1); + psraw_i2r (1, mm1); // /2 + + paddw_r2r (mm4, mm2); // add highs to mm2 + paddw_m2r (round1, mm2); + psraw_i2r (1, mm2); // /2 + + packuswb_r2r (mm2, mm1); // pack (w/ saturation) + movq_r2m (mm1, *dest); // store result in dest +} + +static inline void mmx_interp_average_2_U8 (uint8_t * dest, + uint8_t * src1, uint8_t * src2) +{ + /* *dest = (*dest + (*src1 + *src2 + 1)/ 2 + 1)/ 2; */ + + movq_m2r (*dest, mm1); // load 8 dest bytes + movq_r2r (mm1, mm2); // copy 8 dest bytes + + movq_m2r (*src1, mm3); // load 8 src1 bytes + movq_r2r (mm3, mm4); // copy 8 src1 bytes + + movq_m2r (*src2, mm5); // load 8 src2 bytes + movq_r2r (mm5, mm6); // copy 8 src2 bytes + + punpcklbw_r2r (mm0, mm1); // unpack low dest bytes + punpckhbw_r2r (mm0, mm2); // unpack high dest bytes + + punpcklbw_r2r (mm0, mm3); // unpack low src1 bytes + punpckhbw_r2r (mm0, mm4); // unpack high src1 bytes + + punpcklbw_r2r (mm0, mm5); // unpack low src2 bytes + punpckhbw_r2r (mm0, mm6); // unpack high src2 bytes + + paddw_r2r (mm5, mm3); // add lows + paddw_m2r (round1, mm3); + psraw_i2r (1, mm3); // /2 + + paddw_r2r (mm6, mm4); // add highs + paddw_m2r (round1, mm4); + psraw_i2r (1, mm4); // /2 + + paddw_r2r (mm3, mm1); // add lows + paddw_m2r (round1, mm1); + psraw_i2r (1, mm1); // /2 + + paddw_r2r (mm4, mm2); // add highs + paddw_m2r (round1, mm2); + psraw_i2r (1, mm2); // /2 + + packuswb_r2r (mm2, mm1); // pack (w/ saturation) + movq_r2m (mm1, *dest); // store result in dest +} + +static inline void mmx_average_4_U8 (uint8_t * dest, + uint8_t * src1, uint8_t * src2, + uint8_t * src3, uint8_t * src4) +{ + /* *dest = (*src1 + *src2 + *src3 + *src4 + 2)/ 4; */ + + movq_m2r (*src1, mm1); // load 8 src1 bytes + movq_r2r (mm1, mm2); // copy 8 src1 bytes + + punpcklbw_r2r (mm0, mm1); // unpack low src1 bytes + punpckhbw_r2r (mm0, mm2); // unpack high src1 bytes + + movq_m2r (*src2, mm3); // load 8 src2 bytes + movq_r2r (mm3, mm4); // copy 8 src2 bytes + + punpcklbw_r2r (mm0, mm3); // unpack low src2 bytes + punpckhbw_r2r (mm0, mm4); // unpack high src2 bytes + + paddw_r2r (mm3, mm1); // add lows + paddw_r2r (mm4, mm2); // add highs + + /* now have partials in mm1 and mm2 */ + + movq_m2r (*src3, mm3); // load 8 src3 bytes + movq_r2r (mm3, mm4); // copy 8 src3 bytes + + punpcklbw_r2r (mm0, mm3); // unpack low src3 bytes + punpckhbw_r2r (mm0, mm4); // unpack high src3 bytes + + paddw_r2r (mm3, mm1); // add lows + paddw_r2r (mm4, mm2); // add highs + + movq_m2r (*src4, mm5); // load 8 src4 bytes + movq_r2r (mm5, mm6); // copy 8 src4 bytes + + punpcklbw_r2r (mm0, mm5); // unpack low src4 bytes + punpckhbw_r2r (mm0, mm6); // unpack high src4 bytes + + paddw_r2r (mm5, mm1); // add lows + paddw_r2r (mm6, mm2); // add highs + + /* now have subtotal in mm1 and mm2 */ + + paddw_m2r (round4, mm1); + psraw_i2r (2, mm1); // /4 + paddw_m2r (round4, mm2); + psraw_i2r (2, mm2); // /4 + + packuswb_r2r (mm2, mm1); // pack (w/ saturation) + movq_r2m (mm1, *dest); // store result in dest +} + +static inline void mmx_interp_average_4_U8 (uint8_t * dest, + uint8_t * src1, uint8_t * src2, + uint8_t * src3, uint8_t * src4) +{ + /* *dest = (*dest + (*src1 + *src2 + *src3 + *src4 + 2)/ 4 + 1)/ 2; */ + + movq_m2r (*src1, mm1); // load 8 src1 bytes + movq_r2r (mm1, mm2); // copy 8 src1 bytes + + punpcklbw_r2r (mm0, mm1); // unpack low src1 bytes + punpckhbw_r2r (mm0, mm2); // unpack high src1 bytes + + movq_m2r (*src2, mm3); // load 8 src2 bytes + movq_r2r (mm3, mm4); // copy 8 src2 bytes + + punpcklbw_r2r (mm0, mm3); // unpack low src2 bytes + punpckhbw_r2r (mm0, mm4); // unpack high src2 bytes + + paddw_r2r (mm3, mm1); // add lows + paddw_r2r (mm4, mm2); // add highs + + /* now have partials in mm1 and mm2 */ + + movq_m2r (*src3, mm3); // load 8 src3 bytes + movq_r2r (mm3, mm4); // copy 8 src3 bytes + + punpcklbw_r2r (mm0, mm3); // unpack low src3 bytes + punpckhbw_r2r (mm0, mm4); // unpack high src3 bytes + + paddw_r2r (mm3, mm1); // add lows + paddw_r2r (mm4, mm2); // add highs + + movq_m2r (*src4, mm5); // load 8 src4 bytes + movq_r2r (mm5, mm6); // copy 8 src4 bytes + + punpcklbw_r2r (mm0, mm5); // unpack low src4 bytes + punpckhbw_r2r (mm0, mm6); // unpack high src4 bytes + + paddw_r2r (mm5, mm1); // add lows + paddw_r2r (mm6, mm2); // add highs + + paddw_m2r (round4, mm1); + psraw_i2r (2, mm1); // /4 + paddw_m2r (round4, mm2); + psraw_i2r (2, mm2); // /4 + + /* now have subtotal/4 in mm1 and mm2 */ + + movq_m2r (*dest, mm3); // load 8 dest bytes + movq_r2r (mm3, mm4); // copy 8 dest bytes + + punpcklbw_r2r (mm0, mm3); // unpack low dest bytes + punpckhbw_r2r (mm0, mm4); // unpack high dest bytes + + paddw_r2r (mm3, mm1); // add lows + paddw_r2r (mm4, mm2); // add highs + + paddw_m2r (round1, mm1); + psraw_i2r (1, mm1); // /2 + paddw_m2r (round1, mm2); + psraw_i2r (1, mm2); // /2 + + /* now have end value in mm1 and mm2 */ + + packuswb_r2r (mm2, mm1); // pack (w/ saturation) + movq_r2m (mm1,*dest); // store result in dest +} + +/*-----------------------------------------------------------------------*/ + +static inline void MC_avg_mmx (int width, int height, + uint8_t * dest, uint8_t * ref, int stride) +{ + mmx_zero_reg (); + + do { + mmx_average_2_U8 (dest, dest, ref); + + if (width == 16) + mmx_average_2_U8 (dest+8, dest+8, ref+8); + + dest += stride; + ref += stride; + } while (--height); +} + +static void MC_avg_o_16_mmx (uint8_t * dest, uint8_t * ref, + int stride, int height) +{ + MC_avg_mmx (16, height, dest, ref, stride); +} + +static void MC_avg_o_8_mmx (uint8_t * dest, uint8_t * ref, + int stride, int height) +{ + MC_avg_mmx (8, height, dest, ref, stride); +} + +/*-----------------------------------------------------------------------*/ + +static inline void MC_put_mmx (int width, int height, + uint8_t * dest, uint8_t * ref, int stride) +{ + mmx_zero_reg (); + + do { + movq_m2r (* ref, mm1); // load 8 ref bytes + movq_r2m (mm1,* dest); // store 8 bytes at curr + + if (width == 16) + { + movq_m2r (* (ref+8), mm1); // load 8 ref bytes + movq_r2m (mm1,* (dest+8)); // store 8 bytes at curr + } + + dest += stride; + ref += stride; + } while (--height); +} + +static void MC_put_o_16_mmx (uint8_t * dest, uint8_t * ref, + int stride, int height) +{ + MC_put_mmx (16, height, dest, ref, stride); +} + +static void MC_put_o_8_mmx (uint8_t * dest, uint8_t * ref, + int stride, int height) +{ + MC_put_mmx (8, height, dest, ref, stride); +} + +/*-----------------------------------------------------------------------*/ + +/* Half pixel interpolation in the x direction */ +static inline void MC_avg_x_mmx (int width, int height, + uint8_t * dest, uint8_t * ref, int stride) +{ + mmx_zero_reg (); + + do { + mmx_interp_average_2_U8 (dest, ref, ref+1); + + if (width == 16) + mmx_interp_average_2_U8 (dest+8, ref+8, ref+9); + + dest += stride; + ref += stride; + } while (--height); +} + +static void MC_avg_x_16_mmx (uint8_t * dest, uint8_t * ref, + int stride, int height) +{ + MC_avg_x_mmx (16, height, dest, ref, stride); +} + +static void MC_avg_x_8_mmx (uint8_t * dest, uint8_t * ref, + int stride, int height) +{ + MC_avg_x_mmx (8, height, dest, ref, stride); +} + +/*-----------------------------------------------------------------------*/ + +static inline void MC_put_x_mmx (int width, int height, + uint8_t * dest, uint8_t * ref, int stride) +{ + mmx_zero_reg (); + + do { + mmx_average_2_U8 (dest, ref, ref+1); + + if (width == 16) + mmx_average_2_U8 (dest+8, ref+8, ref+9); + + dest += stride; + ref += stride; + } while (--height); +} + +static void MC_put_x_16_mmx (uint8_t * dest, uint8_t * ref, + int stride, int height) +{ + MC_put_x_mmx (16, height, dest, ref, stride); +} + +static void MC_put_x_8_mmx (uint8_t * dest, uint8_t * ref, + int stride, int height) +{ + MC_put_x_mmx (8, height, dest, ref, stride); +} + +/*-----------------------------------------------------------------------*/ + +static inline void MC_avg_xy_mmx (int width, int height, + uint8_t * dest, uint8_t * ref, int stride) +{ + uint8_t * ref_next = ref+stride; + + mmx_zero_reg (); + + do { + mmx_interp_average_4_U8 (dest, ref, ref+1, ref_next, ref_next+1); + + if (width == 16) + mmx_interp_average_4_U8 (dest+8, ref+8, ref+9, + ref_next+8, ref_next+9); + + dest += stride; + ref += stride; + ref_next += stride; + } while (--height); +} + +static void MC_avg_xy_16_mmx (uint8_t * dest, uint8_t * ref, + int stride, int height) +{ + MC_avg_xy_mmx (16, height, dest, ref, stride); +} + +static void MC_avg_xy_8_mmx (uint8_t * dest, uint8_t * ref, + int stride, int height) +{ + MC_avg_xy_mmx (8, height, dest, ref, stride); +} + +/*-----------------------------------------------------------------------*/ + +static inline void MC_put_xy_mmx (int width, int height, + uint8_t * dest, uint8_t * ref, int stride) +{ + uint8_t * ref_next = ref+stride; + + mmx_zero_reg (); + + do { + mmx_average_4_U8 (dest, ref, ref+1, ref_next, ref_next+1); + + if (width == 16) + mmx_average_4_U8 (dest+8, ref+8, ref+9, ref_next+8, ref_next+9); + + dest += stride; + ref += stride; + ref_next += stride; + } while (--height); +} + +static void MC_put_xy_16_mmx (uint8_t * dest, uint8_t * ref, + int stride, int height) +{ + MC_put_xy_mmx (16, height, dest, ref, stride); +} + +static void MC_put_xy_8_mmx (uint8_t * dest, uint8_t * ref, + int stride, int height) +{ + MC_put_xy_mmx (8, height, dest, ref, stride); +} + +/*-----------------------------------------------------------------------*/ + +static inline void MC_avg_y_mmx (int width, int height, + uint8_t * dest, uint8_t * ref, int stride) +{ + uint8_t * ref_next = ref+stride; + + mmx_zero_reg (); + + do { + mmx_interp_average_2_U8 (dest, ref, ref_next); + + if (width == 16) + mmx_interp_average_2_U8 (dest+8, ref+8, ref_next+8); + + dest += stride; + ref += stride; + ref_next += stride; + } while (--height); +} + +static void MC_avg_y_16_mmx (uint8_t * dest, uint8_t * ref, + int stride, int height) +{ + MC_avg_y_mmx (16, height, dest, ref, stride); +} + +static void MC_avg_y_8_mmx (uint8_t * dest, uint8_t * ref, + int stride, int height) +{ + MC_avg_y_mmx (8, height, dest, ref, stride); +} + +/*-----------------------------------------------------------------------*/ + +static inline void MC_put_y_mmx (int width, int height, + uint8_t * dest, uint8_t * ref, int stride) +{ + uint8_t * ref_next = ref+stride; + + mmx_zero_reg (); + + do { + mmx_average_2_U8 (dest, ref, ref_next); + + if (width == 16) + mmx_average_2_U8 (dest+8, ref+8, ref_next+8); + + dest += stride; + ref += stride; + ref_next += stride; + } while (--height); +} + +static void MC_put_y_16_mmx (uint8_t * dest, uint8_t * ref, + int stride, int height) +{ + MC_put_y_mmx (16, height, dest, ref, stride); +} + +static void MC_put_y_8_mmx (uint8_t * dest, uint8_t * ref, + int stride, int height) +{ + MC_put_y_mmx (8, height, dest, ref, stride); +} + + +MPEG2_MC_EXTERN (mmx) + + + + + + + +/* CPU_MMXEXT/CPU_3DNOW adaptation layer */ + +#define pavg_r2r(src,dest) \ +do { \ + if (cpu == CPU_MMXEXT) \ + pavgb_r2r (src, dest); \ + else \ + pavgusb_r2r (src, dest); \ +} while (0) + +#define pavg_m2r(src,dest) \ +do { \ + if (cpu == CPU_MMXEXT) \ + pavgb_m2r (src, dest); \ + else \ + pavgusb_m2r (src, dest); \ +} while (0) + + +/* CPU_MMXEXT code */ + + +static inline void MC_put1_8 (int height, uint8_t * dest, uint8_t * ref, + int stride) +{ + do { + movq_m2r (*ref, mm0); + movq_r2m (mm0, *dest); + ref += stride; + dest += stride; + } while (--height); +} + +static inline void MC_put1_16 (int height, uint8_t * dest, uint8_t * ref, + int stride) +{ + do { + movq_m2r (*ref, mm0); + movq_m2r (*(ref+8), mm1); + ref += stride; + movq_r2m (mm0, *dest); + movq_r2m (mm1, *(dest+8)); + dest += stride; + } while (--height); +} + +static inline void MC_avg1_8 (int height, uint8_t * dest, uint8_t * ref, + int stride, int cpu) +{ + do { + movq_m2r (*ref, mm0); + pavg_m2r (*dest, mm0); + ref += stride; + movq_r2m (mm0, *dest); + dest += stride; + } while (--height); +} + +static inline void MC_avg1_16 (int height, uint8_t * dest, uint8_t * ref, + int stride, int cpu) +{ + do { + movq_m2r (*ref, mm0); + movq_m2r (*(ref+8), mm1); + pavg_m2r (*dest, mm0); + pavg_m2r (*(dest+8), mm1); + movq_r2m (mm0, *dest); + ref += stride; + movq_r2m (mm1, *(dest+8)); + dest += stride; + } while (--height); +} + +static inline void MC_put2_8 (int height, uint8_t * dest, uint8_t * ref, + int stride, int offset, int cpu) +{ + do { + movq_m2r (*ref, mm0); + pavg_m2r (*(ref+offset), mm0); + ref += stride; + movq_r2m (mm0, *dest); + dest += stride; + } while (--height); +} + +static inline void MC_put2_16 (int height, uint8_t * dest, uint8_t * ref, + int stride, int offset, int cpu) +{ + do { + movq_m2r (*ref, mm0); + movq_m2r (*(ref+8), mm1); + pavg_m2r (*(ref+offset), mm0); + pavg_m2r (*(ref+offset+8), mm1); + movq_r2m (mm0, *dest); + ref += stride; + movq_r2m (mm1, *(dest+8)); + dest += stride; + } while (--height); +} + +static inline void MC_avg2_8 (int height, uint8_t * dest, uint8_t * ref, + int stride, int offset, int cpu) +{ + do { + movq_m2r (*ref, mm0); + pavg_m2r (*(ref+offset), mm0); + pavg_m2r (*dest, mm0); + ref += stride; + movq_r2m (mm0, *dest); + dest += stride; + } while (--height); +} + +static inline void MC_avg2_16 (int height, uint8_t * dest, uint8_t * ref, + int stride, int offset, int cpu) +{ + do { + movq_m2r (*ref, mm0); + movq_m2r (*(ref+8), mm1); + pavg_m2r (*(ref+offset), mm0); + pavg_m2r (*(ref+offset+8), mm1); + pavg_m2r (*dest, mm0); + pavg_m2r (*(dest+8), mm1); + ref += stride; + movq_r2m (mm0, *dest); + movq_r2m (mm1, *(dest+8)); + dest += stride; + } while (--height); +} + +static mmx_t mask_one = {0x0101010101010101LL}; + +static inline void MC_put4_8 (int height, uint8_t * dest, uint8_t * ref, + int stride, int cpu) +{ + movq_m2r (*ref, mm0); + movq_m2r (*(ref+1), mm1); + movq_r2r (mm0, mm7); + pxor_r2r (mm1, mm7); + pavg_r2r (mm1, mm0); + ref += stride; + + do { + movq_m2r (*ref, mm2); + movq_r2r (mm0, mm5); + + movq_m2r (*(ref+1), mm3); + movq_r2r (mm2, mm6); + + pxor_r2r (mm3, mm6); + pavg_r2r (mm3, mm2); + + por_r2r (mm6, mm7); + pxor_r2r (mm2, mm5); + + pand_r2r (mm5, mm7); + pavg_r2r (mm2, mm0); + + pand_m2r (mask_one, mm7); + + psubusb_r2r (mm7, mm0); + + ref += stride; + movq_r2m (mm0, *dest); + dest += stride; + + movq_r2r (mm6, mm7); // unroll ! + movq_r2r (mm2, mm0); // unroll ! + } while (--height); +} + +static inline void MC_put4_16 (int height, uint8_t * dest, uint8_t * ref, + int stride, int cpu) +{ + do { + movq_m2r (*ref, mm0); + movq_m2r (*(ref+stride+1), mm1); + movq_r2r (mm0, mm7); + movq_m2r (*(ref+1), mm2); + pxor_r2r (mm1, mm7); + movq_m2r (*(ref+stride), mm3); + movq_r2r (mm2, mm6); + pxor_r2r (mm3, mm6); + pavg_r2r (mm1, mm0); + pavg_r2r (mm3, mm2); + por_r2r (mm6, mm7); + movq_r2r (mm0, mm6); + pxor_r2r (mm2, mm6); + pand_r2r (mm6, mm7); + pand_m2r (mask_one, mm7); + pavg_r2r (mm2, mm0); + psubusb_r2r (mm7, mm0); + movq_r2m (mm0, *dest); + + movq_m2r (*(ref+8), mm0); + movq_m2r (*(ref+stride+9), mm1); + movq_r2r (mm0, mm7); + movq_m2r (*(ref+9), mm2); + pxor_r2r (mm1, mm7); + movq_m2r (*(ref+stride+8), mm3); + movq_r2r (mm2, mm6); + pxor_r2r (mm3, mm6); + pavg_r2r (mm1, mm0); + pavg_r2r (mm3, mm2); + por_r2r (mm6, mm7); + movq_r2r (mm0, mm6); + pxor_r2r (mm2, mm6); + pand_r2r (mm6, mm7); + pand_m2r (mask_one, mm7); + pavg_r2r (mm2, mm0); + psubusb_r2r (mm7, mm0); + ref += stride; + movq_r2m (mm0, *(dest+8)); + dest += stride; + } while (--height); +} + +static inline void MC_avg4_8 (int height, uint8_t * dest, uint8_t * ref, + int stride, int cpu) +{ + do { + movq_m2r (*ref, mm0); + movq_m2r (*(ref+stride+1), mm1); + movq_r2r (mm0, mm7); + movq_m2r (*(ref+1), mm2); + pxor_r2r (mm1, mm7); + movq_m2r (*(ref+stride), mm3); + movq_r2r (mm2, mm6); + pxor_r2r (mm3, mm6); + pavg_r2r (mm1, mm0); + pavg_r2r (mm3, mm2); + por_r2r (mm6, mm7); + movq_r2r (mm0, mm6); + pxor_r2r (mm2, mm6); + pand_r2r (mm6, mm7); + pand_m2r (mask_one, mm7); + pavg_r2r (mm2, mm0); + psubusb_r2r (mm7, mm0); + movq_m2r (*dest, mm1); + pavg_r2r (mm1, mm0); + ref += stride; + movq_r2m (mm0, *dest); + dest += stride; + } while (--height); +} + +static inline void MC_avg4_16 (int height, uint8_t * dest, uint8_t * ref, + int stride, int cpu) +{ + do { + movq_m2r (*ref, mm0); + movq_m2r (*(ref+stride+1), mm1); + movq_r2r (mm0, mm7); + movq_m2r (*(ref+1), mm2); + pxor_r2r (mm1, mm7); + movq_m2r (*(ref+stride), mm3); + movq_r2r (mm2, mm6); + pxor_r2r (mm3, mm6); + pavg_r2r (mm1, mm0); + pavg_r2r (mm3, mm2); + por_r2r (mm6, mm7); + movq_r2r (mm0, mm6); + pxor_r2r (mm2, mm6); + pand_r2r (mm6, mm7); + pand_m2r (mask_one, mm7); + pavg_r2r (mm2, mm0); + psubusb_r2r (mm7, mm0); + movq_m2r (*dest, mm1); + pavg_r2r (mm1, mm0); + movq_r2m (mm0, *dest); + + movq_m2r (*(ref+8), mm0); + movq_m2r (*(ref+stride+9), mm1); + movq_r2r (mm0, mm7); + movq_m2r (*(ref+9), mm2); + pxor_r2r (mm1, mm7); + movq_m2r (*(ref+stride+8), mm3); + movq_r2r (mm2, mm6); + pxor_r2r (mm3, mm6); + pavg_r2r (mm1, mm0); + pavg_r2r (mm3, mm2); + por_r2r (mm6, mm7); + movq_r2r (mm0, mm6); + pxor_r2r (mm2, mm6); + pand_r2r (mm6, mm7); + pand_m2r (mask_one, mm7); + pavg_r2r (mm2, mm0); + psubusb_r2r (mm7, mm0); + movq_m2r (*(dest+8), mm1); + pavg_r2r (mm1, mm0); + ref += stride; + movq_r2m (mm0, *(dest+8)); + dest += stride; + } while (--height); +} + +static void MC_avg_o_16_mmxext (uint8_t * dest, uint8_t * ref, + int stride, int height) +{ + MC_avg1_16 (height, dest, ref, stride, CPU_MMXEXT); +} + +static void MC_avg_o_8_mmxext (uint8_t * dest, uint8_t * ref, + int stride, int height) +{ + MC_avg1_8 (height, dest, ref, stride, CPU_MMXEXT); +} + +static void MC_put_o_16_mmxext (uint8_t * dest, uint8_t * ref, + int stride, int height) +{ + MC_put1_16 (height, dest, ref, stride); +} + +static void MC_put_o_8_mmxext (uint8_t * dest, uint8_t * ref, + int stride, int height) +{ + MC_put1_8 (height, dest, ref, stride); +} + +static void MC_avg_x_16_mmxext (uint8_t * dest, uint8_t * ref, + int stride, int height) +{ + MC_avg2_16 (height, dest, ref, stride, 1, CPU_MMXEXT); +} + +static void MC_avg_x_8_mmxext (uint8_t * dest, uint8_t * ref, + int stride, int height) +{ + MC_avg2_8 (height, dest, ref, stride, 1, CPU_MMXEXT); +} + +static void MC_put_x_16_mmxext (uint8_t * dest, uint8_t * ref, + int stride, int height) +{ + MC_put2_16 (height, dest, ref, stride, 1, CPU_MMXEXT); +} + +static void MC_put_x_8_mmxext (uint8_t * dest, uint8_t * ref, + int stride, int height) +{ + MC_put2_8 (height, dest, ref, stride, 1, CPU_MMXEXT); +} + +static void MC_avg_y_16_mmxext (uint8_t * dest, uint8_t * ref, + int stride, int height) +{ + MC_avg2_16 (height, dest, ref, stride, stride, CPU_MMXEXT); +} + +static void MC_avg_y_8_mmxext (uint8_t * dest, uint8_t * ref, + int stride, int height) +{ + MC_avg2_8 (height, dest, ref, stride, stride, CPU_MMXEXT); +} + +static void MC_put_y_16_mmxext (uint8_t * dest, uint8_t * ref, + int stride, int height) +{ + MC_put2_16 (height, dest, ref, stride, stride, CPU_MMXEXT); +} + +static void MC_put_y_8_mmxext (uint8_t * dest, uint8_t * ref, + int stride, int height) +{ + MC_put2_8 (height, dest, ref, stride, stride, CPU_MMXEXT); +} + +static void MC_avg_xy_16_mmxext (uint8_t * dest, uint8_t * ref, + int stride, int height) +{ + MC_avg4_16 (height, dest, ref, stride, CPU_MMXEXT); +} + +static void MC_avg_xy_8_mmxext (uint8_t * dest, uint8_t * ref, + int stride, int height) +{ + MC_avg4_8 (height, dest, ref, stride, CPU_MMXEXT); +} + +static void MC_put_xy_16_mmxext (uint8_t * dest, uint8_t * ref, + int stride, int height) +{ + MC_put4_16 (height, dest, ref, stride, CPU_MMXEXT); +} + +static void MC_put_xy_8_mmxext (uint8_t * dest, uint8_t * ref, + int stride, int height) +{ + MC_put4_8 (height, dest, ref, stride, CPU_MMXEXT); +} + + +MPEG2_MC_EXTERN (mmxext) + + + +static void MC_avg_o_16_3dnow (uint8_t * dest, uint8_t * ref, + int stride, int height) +{ + MC_avg1_16 (height, dest, ref, stride, CPU_3DNOW); +} + +static void MC_avg_o_8_3dnow (uint8_t * dest, uint8_t * ref, + int stride, int height) +{ + MC_avg1_8 (height, dest, ref, stride, CPU_3DNOW); +} + +static void MC_put_o_16_3dnow (uint8_t * dest, uint8_t * ref, + int stride, int height) +{ + MC_put1_16 (height, dest, ref, stride); +} + +static void MC_put_o_8_3dnow (uint8_t * dest, uint8_t * ref, + int stride, int height) +{ + MC_put1_8 (height, dest, ref, stride); +} + +static void MC_avg_x_16_3dnow (uint8_t * dest, uint8_t * ref, + int stride, int height) +{ + MC_avg2_16 (height, dest, ref, stride, 1, CPU_3DNOW); +} + +static void MC_avg_x_8_3dnow (uint8_t * dest, uint8_t * ref, + int stride, int height) +{ + MC_avg2_8 (height, dest, ref, stride, 1, CPU_3DNOW); +} + +static void MC_put_x_16_3dnow (uint8_t * dest, uint8_t * ref, + int stride, int height) +{ + MC_put2_16 (height, dest, ref, stride, 1, CPU_3DNOW); +} + +static void MC_put_x_8_3dnow (uint8_t * dest, uint8_t * ref, + int stride, int height) +{ + MC_put2_8 (height, dest, ref, stride, 1, CPU_3DNOW); +} + +static void MC_avg_y_16_3dnow (uint8_t * dest, uint8_t * ref, + int stride, int height) +{ + MC_avg2_16 (height, dest, ref, stride, stride, CPU_3DNOW); +} + +static void MC_avg_y_8_3dnow (uint8_t * dest, uint8_t * ref, + int stride, int height) +{ + MC_avg2_8 (height, dest, ref, stride, stride, CPU_3DNOW); +} + +static void MC_put_y_16_3dnow (uint8_t * dest, uint8_t * ref, + int stride, int height) +{ + MC_put2_16 (height, dest, ref, stride, stride, CPU_3DNOW); +} + +static void MC_put_y_8_3dnow (uint8_t * dest, uint8_t * ref, + int stride, int height) +{ + MC_put2_8 (height, dest, ref, stride, stride, CPU_3DNOW); +} + +static void MC_avg_xy_16_3dnow (uint8_t * dest, uint8_t * ref, + int stride, int height) +{ + MC_avg4_16 (height, dest, ref, stride, CPU_3DNOW); +} + +static void MC_avg_xy_8_3dnow (uint8_t * dest, uint8_t * ref, + int stride, int height) +{ + MC_avg4_8 (height, dest, ref, stride, CPU_3DNOW); +} + +static void MC_put_xy_16_3dnow (uint8_t * dest, uint8_t * ref, + int stride, int height) +{ + MC_put4_16 (height, dest, ref, stride, CPU_3DNOW); +} + +static void MC_put_xy_8_3dnow (uint8_t * dest, uint8_t * ref, + int stride, int height) +{ + MC_put4_8 (height, dest, ref, stride, CPU_3DNOW); +} + + +MPEG2_MC_EXTERN (3dnow) + +#endif diff --git a/src/video_dec/libmpeg2/motion_comp_vis.c b/src/video_dec/libmpeg2/motion_comp_vis.c new file mode 100644 index 000000000..d0a6673d6 --- /dev/null +++ b/src/video_dec/libmpeg2/motion_comp_vis.c @@ -0,0 +1,2059 @@ +/* + * motion_comp_vis.c + * Copyright (C) 2003 David S. Miller + * + * This file is part of mpeg2dec, a free MPEG-2 video stream decoder. + * See http://libmpeg2.sourceforge.net/ for updates. + * + * mpeg2dec is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * mpeg2dec is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +#include "config.h" + +#if defined(ARCH_SPARC) && defined(ENABLE_VIS) + +#include + +#include "mpeg2_internal.h" +#include "vis.h" + +/* The trick used in some of this file is the formula from the MMX + * motion comp code, which is: + * + * (x+y+1)>>1 == (x|y)-((x^y)>>1) + * + * This allows us to average 8 bytes at a time in a 64-bit FPU reg. + * We avoid overflows by masking before we do the shift, and we + * implement the shift by multiplying by 1/2 using mul8x16. So in + * VIS this is (assume 'x' is in f0, 'y' is in f2, a repeating mask + * of '0xfe' is in f4, a repeating mask of '0x7f' is in f6, and + * the value 0x80808080 is in f8): + * + * fxor f0, f2, f10 + * fand f10, f4, f10 + * fmul8x16 f8, f10, f10 + * fand f10, f6, f10 + * for f0, f2, f12 + * fpsub16 f12, f10, f10 + */ + +#define DUP4(x) {x, x, x, x} +#define DUP8(x) {x, x, x, x, x, x, x, x} +static const int16_t constants1[] ATTR_ALIGN(8) = DUP4 (1); +static const int16_t constants2[] ATTR_ALIGN(8) = DUP4 (2); +static const int16_t constants3[] ATTR_ALIGN(8) = DUP4 (3); +static const int16_t constants6[] ATTR_ALIGN(8) = DUP4 (6); +static const int8_t constants_fe[] ATTR_ALIGN(8) = DUP8 (0xfe); +static const int8_t constants_7f[] ATTR_ALIGN(8) = DUP8 (0x7f); +static const int8_t constants128[] ATTR_ALIGN(8) = DUP8 (128); +static const int16_t constants256_512[] ATTR_ALIGN(8) = + {256, 512, 256, 512}; +static const int16_t constants256_1024[] ATTR_ALIGN(8) = + {256, 1024, 256, 1024}; + +#define REF_0 0 +#define REF_0_1 1 +#define REF_2 2 +#define REF_2_1 3 +#define REF_4 4 +#define REF_4_1 5 +#define REF_6 6 +#define REF_6_1 7 +#define REF_S0 8 +#define REF_S0_1 9 +#define REF_S2 10 +#define REF_S2_1 11 +#define REF_S4 12 +#define REF_S4_1 13 +#define REF_S6 14 +#define REF_S6_1 15 +#define DST_0 16 +#define DST_1 17 +#define DST_2 18 +#define DST_3 19 +#define CONST_1 20 +#define CONST_2 20 +#define CONST_3 20 +#define CONST_6 20 +#define MASK_fe 20 +#define CONST_128 22 +#define CONST_256 22 +#define CONST_512 22 +#define CONST_1024 22 +#define TMP0 24 +#define TMP1 25 +#define TMP2 26 +#define TMP3 27 +#define TMP4 28 +#define TMP5 29 +#define ZERO 30 +#define MASK_7f 30 + +#define TMP6 32 +#define TMP8 34 +#define TMP10 36 +#define TMP12 38 +#define TMP14 40 +#define TMP16 42 +#define TMP18 44 +#define TMP20 46 +#define TMP22 48 +#define TMP24 50 +#define TMP26 52 +#define TMP28 54 +#define TMP30 56 +#define TMP32 58 + +static void MC_put_o_16_vis (uint8_t * dest, uint8_t * _ref, + int stride, int height) +{ + uint8_t *ref = (uint8_t *) _ref; + int offset; + + ref = vis_alignaddr(ref); + offset = (ref != _ref) ? 16 : 0; + do { /* 5 cycles */ + vis_ld64(ref[0], TMP0); + + vis_ld64_2(ref, 8, TMP2); + + vis_ld64_2(ref, offset, TMP4); + ref += stride; + + vis_faligndata(TMP0, TMP2, REF_0); + vis_st64(REF_0, dest[0]); + + vis_faligndata(TMP2, TMP4, REF_2); + vis_st64_2(REF_2, dest, 8); + dest += stride; + } while (--height); +} + +static void MC_put_o_8_vis (uint8_t * dest, uint8_t * _ref, + int stride, int height) +{ + uint8_t *ref = (uint8_t *) _ref; + int offset; + + ref = vis_alignaddr(ref); + offset = (ref != _ref) ? 8 : 0; + do { /* 4 cycles */ + vis_ld64(ref[0], TMP0); + + vis_ld64_2(ref, offset, TMP2); + ref += stride; + + /* stall */ + + vis_faligndata(TMP0, TMP2, REF_0); + vis_st64(REF_0, dest[0]); + dest += stride; + } while (--height); +} + + +static void MC_avg_o_16_vis (uint8_t * dest, uint8_t * _ref, + int stride, int height) +{ + uint8_t *ref = (uint8_t *) _ref; + int stride_8 = stride + 8; + int offset; + + ref = vis_alignaddr(ref); + offset = (ref != _ref) ? 16 : 0; + + vis_ld64(ref[0], TMP0); + + vis_ld64(ref[8], TMP2); + + vis_ld64_2(ref, offset, TMP4); + + vis_ld64(dest[0], DST_0); + + vis_ld64(dest[8], DST_2); + + vis_ld64(constants_fe[0], MASK_fe); + vis_faligndata(TMP0, TMP2, REF_0); + + vis_ld64(constants_7f[0], MASK_7f); + vis_faligndata(TMP2, TMP4, REF_2); + + vis_ld64(constants128[0], CONST_128); + + ref += stride; + height = (height >> 1) - 1; + + do { /* 24 cycles */ + vis_ld64(ref[0], TMP0); + vis_xor(DST_0, REF_0, TMP6); + + vis_ld64_2(ref, 8, TMP2); + vis_and(TMP6, MASK_fe, TMP6); + + vis_ld64_2(ref, offset, TMP4); + ref += stride; + vis_mul8x16(CONST_128, TMP6, TMP6); + vis_xor(DST_2, REF_2, TMP8); + + vis_and(TMP8, MASK_fe, TMP8); + + vis_or(DST_0, REF_0, TMP10); + vis_ld64_2(dest, stride, DST_0); + vis_mul8x16(CONST_128, TMP8, TMP8); + + vis_or(DST_2, REF_2, TMP12); + vis_ld64_2(dest, stride_8, DST_2); + + vis_ld64(ref[0], TMP14); + vis_and(TMP6, MASK_7f, TMP6); + + vis_and(TMP8, MASK_7f, TMP8); + + vis_psub16(TMP10, TMP6, TMP6); + vis_st64(TMP6, dest[0]); + + vis_psub16(TMP12, TMP8, TMP8); + vis_st64_2(TMP8, dest, 8); + + dest += stride; + vis_ld64_2(ref, 8, TMP16); + vis_faligndata(TMP0, TMP2, REF_0); + + vis_ld64_2(ref, offset, TMP18); + vis_faligndata(TMP2, TMP4, REF_2); + ref += stride; + + vis_xor(DST_0, REF_0, TMP20); + + vis_and(TMP20, MASK_fe, TMP20); + + vis_xor(DST_2, REF_2, TMP22); + vis_mul8x16(CONST_128, TMP20, TMP20); + + vis_and(TMP22, MASK_fe, TMP22); + + vis_or(DST_0, REF_0, TMP24); + vis_mul8x16(CONST_128, TMP22, TMP22); + + vis_or(DST_2, REF_2, TMP26); + + vis_ld64_2(dest, stride, DST_0); + vis_faligndata(TMP14, TMP16, REF_0); + + vis_ld64_2(dest, stride_8, DST_2); + vis_faligndata(TMP16, TMP18, REF_2); + + vis_and(TMP20, MASK_7f, TMP20); + + vis_and(TMP22, MASK_7f, TMP22); + + vis_psub16(TMP24, TMP20, TMP20); + vis_st64(TMP20, dest[0]); + + vis_psub16(TMP26, TMP22, TMP22); + vis_st64_2(TMP22, dest, 8); + dest += stride; + } while (--height); + + vis_ld64(ref[0], TMP0); + vis_xor(DST_0, REF_0, TMP6); + + vis_ld64_2(ref, 8, TMP2); + vis_and(TMP6, MASK_fe, TMP6); + + vis_ld64_2(ref, offset, TMP4); + vis_mul8x16(CONST_128, TMP6, TMP6); + vis_xor(DST_2, REF_2, TMP8); + + vis_and(TMP8, MASK_fe, TMP8); + + vis_or(DST_0, REF_0, TMP10); + vis_ld64_2(dest, stride, DST_0); + vis_mul8x16(CONST_128, TMP8, TMP8); + + vis_or(DST_2, REF_2, TMP12); + vis_ld64_2(dest, stride_8, DST_2); + + vis_ld64(ref[0], TMP14); + vis_and(TMP6, MASK_7f, TMP6); + + vis_and(TMP8, MASK_7f, TMP8); + + vis_psub16(TMP10, TMP6, TMP6); + vis_st64(TMP6, dest[0]); + + vis_psub16(TMP12, TMP8, TMP8); + vis_st64_2(TMP8, dest, 8); + + dest += stride; + vis_faligndata(TMP0, TMP2, REF_0); + + vis_faligndata(TMP2, TMP4, REF_2); + + vis_xor(DST_0, REF_0, TMP20); + + vis_and(TMP20, MASK_fe, TMP20); + + vis_xor(DST_2, REF_2, TMP22); + vis_mul8x16(CONST_128, TMP20, TMP20); + + vis_and(TMP22, MASK_fe, TMP22); + + vis_or(DST_0, REF_0, TMP24); + vis_mul8x16(CONST_128, TMP22, TMP22); + + vis_or(DST_2, REF_2, TMP26); + + vis_and(TMP20, MASK_7f, TMP20); + + vis_and(TMP22, MASK_7f, TMP22); + + vis_psub16(TMP24, TMP20, TMP20); + vis_st64(TMP20, dest[0]); + + vis_psub16(TMP26, TMP22, TMP22); + vis_st64_2(TMP22, dest, 8); +} + +static void MC_avg_o_8_vis (uint8_t * dest, uint8_t * _ref, + int stride, int height) +{ + uint8_t *ref = (uint8_t *) _ref; + int offset; + + ref = vis_alignaddr(ref); + offset = (ref != _ref) ? 8 : 0; + + vis_ld64(ref[0], TMP0); + + vis_ld64_2(ref, offset, TMP2); + + vis_ld64(dest[0], DST_0); + + vis_ld64(constants_fe[0], MASK_fe); + + vis_ld64(constants_7f[0], MASK_7f); + vis_faligndata(TMP0, TMP2, REF_0); + + vis_ld64(constants128[0], CONST_128); + + ref += stride; + height = (height >> 1) - 1; + + do { /* 12 cycles */ + vis_ld64(ref[0], TMP0); + vis_xor(DST_0, REF_0, TMP4); + + vis_ld64_2(ref, offset, TMP2); + vis_and(TMP4, MASK_fe, TMP4); + + vis_or(DST_0, REF_0, TMP6); + vis_ld64_2(dest, stride, DST_0); + ref += stride; + vis_mul8x16(CONST_128, TMP4, TMP4); + + vis_ld64(ref[0], TMP12); + vis_faligndata(TMP0, TMP2, REF_0); + + vis_ld64_2(ref, offset, TMP2); + vis_xor(DST_0, REF_0, TMP0); + ref += stride; + + vis_and(TMP0, MASK_fe, TMP0); + + vis_and(TMP4, MASK_7f, TMP4); + + vis_psub16(TMP6, TMP4, TMP4); + vis_st64(TMP4, dest[0]); + dest += stride; + vis_mul8x16(CONST_128, TMP0, TMP0); + + vis_or(DST_0, REF_0, TMP6); + vis_ld64_2(dest, stride, DST_0); + + vis_faligndata(TMP12, TMP2, REF_0); + + vis_and(TMP0, MASK_7f, TMP0); + + vis_psub16(TMP6, TMP0, TMP4); + vis_st64(TMP4, dest[0]); + dest += stride; + } while (--height); + + vis_ld64(ref[0], TMP0); + vis_xor(DST_0, REF_0, TMP4); + + vis_ld64_2(ref, offset, TMP2); + vis_and(TMP4, MASK_fe, TMP4); + + vis_or(DST_0, REF_0, TMP6); + vis_ld64_2(dest, stride, DST_0); + vis_mul8x16(CONST_128, TMP4, TMP4); + + vis_faligndata(TMP0, TMP2, REF_0); + + vis_xor(DST_0, REF_0, TMP0); + + vis_and(TMP0, MASK_fe, TMP0); + + vis_and(TMP4, MASK_7f, TMP4); + + vis_psub16(TMP6, TMP4, TMP4); + vis_st64(TMP4, dest[0]); + dest += stride; + vis_mul8x16(CONST_128, TMP0, TMP0); + + vis_or(DST_0, REF_0, TMP6); + + vis_and(TMP0, MASK_7f, TMP0); + + vis_psub16(TMP6, TMP0, TMP4); + vis_st64(TMP4, dest[0]); +} + +static void MC_put_x_16_vis (uint8_t * dest, uint8_t * _ref, + int stride, int height) +{ + uint8_t *ref = (uint8_t *) _ref; + unsigned long off = (unsigned long) ref & 0x7; + unsigned long off_plus_1 = off + 1; + + ref = vis_alignaddr(ref); + + vis_ld64(ref[0], TMP0); + + vis_ld64_2(ref, 8, TMP2); + + vis_ld64_2(ref, 16, TMP4); + + vis_ld64(constants_fe[0], MASK_fe); + + vis_ld64(constants_7f[0], MASK_7f); + vis_faligndata(TMP0, TMP2, REF_0); + + vis_ld64(constants128[0], CONST_128); + vis_faligndata(TMP2, TMP4, REF_4); + + if (off != 0x7) { + vis_alignaddr_g0((void *)off_plus_1); + vis_faligndata(TMP0, TMP2, REF_2); + vis_faligndata(TMP2, TMP4, REF_6); + } else { + vis_src1(TMP2, REF_2); + vis_src1(TMP4, REF_6); + } + + ref += stride; + height = (height >> 1) - 1; + + do { /* 34 cycles */ + vis_ld64(ref[0], TMP0); + vis_xor(REF_0, REF_2, TMP6); + + vis_ld64_2(ref, 8, TMP2); + vis_xor(REF_4, REF_6, TMP8); + + vis_ld64_2(ref, 16, TMP4); + vis_and(TMP6, MASK_fe, TMP6); + ref += stride; + + vis_ld64(ref[0], TMP14); + vis_mul8x16(CONST_128, TMP6, TMP6); + vis_and(TMP8, MASK_fe, TMP8); + + vis_ld64_2(ref, 8, TMP16); + vis_mul8x16(CONST_128, TMP8, TMP8); + vis_or(REF_0, REF_2, TMP10); + + vis_ld64_2(ref, 16, TMP18); + ref += stride; + vis_or(REF_4, REF_6, TMP12); + + vis_alignaddr_g0((void *)off); + + vis_faligndata(TMP0, TMP2, REF_0); + + vis_faligndata(TMP2, TMP4, REF_4); + + if (off != 0x7) { + vis_alignaddr_g0((void *)off_plus_1); + vis_faligndata(TMP0, TMP2, REF_2); + vis_faligndata(TMP2, TMP4, REF_6); + } else { + vis_src1(TMP2, REF_2); + vis_src1(TMP4, REF_6); + } + + vis_and(TMP6, MASK_7f, TMP6); + + vis_and(TMP8, MASK_7f, TMP8); + + vis_psub16(TMP10, TMP6, TMP6); + vis_st64(TMP6, dest[0]); + + vis_psub16(TMP12, TMP8, TMP8); + vis_st64_2(TMP8, dest, 8); + dest += stride; + + vis_xor(REF_0, REF_2, TMP6); + + vis_xor(REF_4, REF_6, TMP8); + + vis_and(TMP6, MASK_fe, TMP6); + + vis_mul8x16(CONST_128, TMP6, TMP6); + vis_and(TMP8, MASK_fe, TMP8); + + vis_mul8x16(CONST_128, TMP8, TMP8); + vis_or(REF_0, REF_2, TMP10); + + vis_or(REF_4, REF_6, TMP12); + + vis_alignaddr_g0((void *)off); + + vis_faligndata(TMP14, TMP16, REF_0); + + vis_faligndata(TMP16, TMP18, REF_4); + + if (off != 0x7) { + vis_alignaddr_g0((void *)off_plus_1); + vis_faligndata(TMP14, TMP16, REF_2); + vis_faligndata(TMP16, TMP18, REF_6); + } else { + vis_src1(TMP16, REF_2); + vis_src1(TMP18, REF_6); + } + + vis_and(TMP6, MASK_7f, TMP6); + + vis_and(TMP8, MASK_7f, TMP8); + + vis_psub16(TMP10, TMP6, TMP6); + vis_st64(TMP6, dest[0]); + + vis_psub16(TMP12, TMP8, TMP8); + vis_st64_2(TMP8, dest, 8); + dest += stride; + } while (--height); + + vis_ld64(ref[0], TMP0); + vis_xor(REF_0, REF_2, TMP6); + + vis_ld64_2(ref, 8, TMP2); + vis_xor(REF_4, REF_6, TMP8); + + vis_ld64_2(ref, 16, TMP4); + vis_and(TMP6, MASK_fe, TMP6); + + vis_mul8x16(CONST_128, TMP6, TMP6); + vis_and(TMP8, MASK_fe, TMP8); + + vis_mul8x16(CONST_128, TMP8, TMP8); + vis_or(REF_0, REF_2, TMP10); + + vis_or(REF_4, REF_6, TMP12); + + vis_alignaddr_g0((void *)off); + + vis_faligndata(TMP0, TMP2, REF_0); + + vis_faligndata(TMP2, TMP4, REF_4); + + if (off != 0x7) { + vis_alignaddr_g0((void *)off_plus_1); + vis_faligndata(TMP0, TMP2, REF_2); + vis_faligndata(TMP2, TMP4, REF_6); + } else { + vis_src1(TMP2, REF_2); + vis_src1(TMP4, REF_6); + } + + vis_and(TMP6, MASK_7f, TMP6); + + vis_and(TMP8, MASK_7f, TMP8); + + vis_psub16(TMP10, TMP6, TMP6); + vis_st64(TMP6, dest[0]); + + vis_psub16(TMP12, TMP8, TMP8); + vis_st64_2(TMP8, dest, 8); + dest += stride; + + vis_xor(REF_0, REF_2, TMP6); + + vis_xor(REF_4, REF_6, TMP8); + + vis_and(TMP6, MASK_fe, TMP6); + + vis_mul8x16(CONST_128, TMP6, TMP6); + vis_and(TMP8, MASK_fe, TMP8); + + vis_mul8x16(CONST_128, TMP8, TMP8); + vis_or(REF_0, REF_2, TMP10); + + vis_or(REF_4, REF_6, TMP12); + + vis_and(TMP6, MASK_7f, TMP6); + + vis_and(TMP8, MASK_7f, TMP8); + + vis_psub16(TMP10, TMP6, TMP6); + vis_st64(TMP6, dest[0]); + + vis_psub16(TMP12, TMP8, TMP8); + vis_st64_2(TMP8, dest, 8); +} + +static void MC_put_x_8_vis (uint8_t * dest, uint8_t * _ref, + int stride, int height) +{ + uint8_t *ref = (uint8_t *) _ref; + unsigned long off = (unsigned long) ref & 0x7; + unsigned long off_plus_1 = off + 1; + + ref = vis_alignaddr(ref); + + vis_ld64(ref[0], TMP0); + + vis_ld64(ref[8], TMP2); + + vis_ld64(constants_fe[0], MASK_fe); + + vis_ld64(constants_7f[0], MASK_7f); + + vis_ld64(constants128[0], CONST_128); + vis_faligndata(TMP0, TMP2, REF_0); + + if (off != 0x7) { + vis_alignaddr_g0((void *)off_plus_1); + vis_faligndata(TMP0, TMP2, REF_2); + } else { + vis_src1(TMP2, REF_2); + } + + ref += stride; + height = (height >> 1) - 1; + + do { /* 20 cycles */ + vis_ld64(ref[0], TMP0); + vis_xor(REF_0, REF_2, TMP4); + + vis_ld64_2(ref, 8, TMP2); + vis_and(TMP4, MASK_fe, TMP4); + ref += stride; + + vis_ld64(ref[0], TMP8); + vis_or(REF_0, REF_2, TMP6); + vis_mul8x16(CONST_128, TMP4, TMP4); + + vis_alignaddr_g0((void *)off); + + vis_ld64_2(ref, 8, TMP10); + ref += stride; + vis_faligndata(TMP0, TMP2, REF_0); + + if (off != 0x7) { + vis_alignaddr_g0((void *)off_plus_1); + vis_faligndata(TMP0, TMP2, REF_2); + } else { + vis_src1(TMP2, REF_2); + } + + vis_and(TMP4, MASK_7f, TMP4); + + vis_psub16(TMP6, TMP4, DST_0); + vis_st64(DST_0, dest[0]); + dest += stride; + + vis_xor(REF_0, REF_2, TMP12); + + vis_and(TMP12, MASK_fe, TMP12); + + vis_or(REF_0, REF_2, TMP14); + vis_mul8x16(CONST_128, TMP12, TMP12); + + vis_alignaddr_g0((void *)off); + vis_faligndata(TMP8, TMP10, REF_0); + if (off != 0x7) { + vis_alignaddr_g0((void *)off_plus_1); + vis_faligndata(TMP8, TMP10, REF_2); + } else { + vis_src1(TMP10, REF_2); + } + + vis_and(TMP12, MASK_7f, TMP12); + + vis_psub16(TMP14, TMP12, DST_0); + vis_st64(DST_0, dest[0]); + dest += stride; + } while (--height); + + vis_ld64(ref[0], TMP0); + vis_xor(REF_0, REF_2, TMP4); + + vis_ld64_2(ref, 8, TMP2); + vis_and(TMP4, MASK_fe, TMP4); + + vis_or(REF_0, REF_2, TMP6); + vis_mul8x16(CONST_128, TMP4, TMP4); + + vis_alignaddr_g0((void *)off); + + vis_faligndata(TMP0, TMP2, REF_0); + + if (off != 0x7) { + vis_alignaddr_g0((void *)off_plus_1); + vis_faligndata(TMP0, TMP2, REF_2); + } else { + vis_src1(TMP2, REF_2); + } + + vis_and(TMP4, MASK_7f, TMP4); + + vis_psub16(TMP6, TMP4, DST_0); + vis_st64(DST_0, dest[0]); + dest += stride; + + vis_xor(REF_0, REF_2, TMP12); + + vis_and(TMP12, MASK_fe, TMP12); + + vis_or(REF_0, REF_2, TMP14); + vis_mul8x16(CONST_128, TMP12, TMP12); + + vis_and(TMP12, MASK_7f, TMP12); + + vis_psub16(TMP14, TMP12, DST_0); + vis_st64(DST_0, dest[0]); + dest += stride; +} + +static void MC_avg_x_16_vis (uint8_t * dest, uint8_t * _ref, + int stride, int height) +{ + uint8_t *ref = (uint8_t *) _ref; + unsigned long off = (unsigned long) ref & 0x7; + unsigned long off_plus_1 = off + 1; + + vis_set_gsr(5 << VIS_GSR_SCALEFACT_SHIFT); + + vis_ld64(constants3[0], CONST_3); + vis_fzero(ZERO); + vis_ld64(constants256_512[0], CONST_256); + + ref = vis_alignaddr(ref); + do { /* 26 cycles */ + vis_ld64(ref[0], TMP0); + + vis_ld64(ref[8], TMP2); + + vis_alignaddr_g0((void *)off); + + vis_ld64(ref[16], TMP4); + + vis_ld64(dest[0], DST_0); + vis_faligndata(TMP0, TMP2, REF_0); + + vis_ld64(dest[8], DST_2); + vis_faligndata(TMP2, TMP4, REF_4); + + if (off != 0x7) { + vis_alignaddr_g0((void *)off_plus_1); + vis_faligndata(TMP0, TMP2, REF_2); + vis_faligndata(TMP2, TMP4, REF_6); + } else { + vis_src1(TMP2, REF_2); + vis_src1(TMP4, REF_6); + } + + vis_mul8x16au(REF_0, CONST_256, TMP0); + + vis_pmerge(ZERO, REF_2, TMP4); + vis_mul8x16au(REF_0_1, CONST_256, TMP2); + + vis_pmerge(ZERO, REF_2_1, TMP6); + + vis_padd16(TMP0, TMP4, TMP0); + + vis_mul8x16al(DST_0, CONST_512, TMP4); + vis_padd16(TMP2, TMP6, TMP2); + + vis_mul8x16al(DST_1, CONST_512, TMP6); + + vis_mul8x16au(REF_6, CONST_256, TMP12); + + vis_padd16(TMP0, TMP4, TMP0); + vis_mul8x16au(REF_6_1, CONST_256, TMP14); + + vis_padd16(TMP2, TMP6, TMP2); + vis_mul8x16au(REF_4, CONST_256, TMP16); + + vis_padd16(TMP0, CONST_3, TMP8); + vis_mul8x16au(REF_4_1, CONST_256, TMP18); + + vis_padd16(TMP2, CONST_3, TMP10); + vis_pack16(TMP8, DST_0); + + vis_pack16(TMP10, DST_1); + vis_padd16(TMP16, TMP12, TMP0); + + vis_st64(DST_0, dest[0]); + vis_mul8x16al(DST_2, CONST_512, TMP4); + vis_padd16(TMP18, TMP14, TMP2); + + vis_mul8x16al(DST_3, CONST_512, TMP6); + vis_padd16(TMP0, CONST_3, TMP0); + + vis_padd16(TMP2, CONST_3, TMP2); + + vis_padd16(TMP0, TMP4, TMP0); + + vis_padd16(TMP2, TMP6, TMP2); + vis_pack16(TMP0, DST_2); + + vis_pack16(TMP2, DST_3); + vis_st64(DST_2, dest[8]); + + ref += stride; + dest += stride; + } while (--height); +} + +static void MC_avg_x_8_vis (uint8_t * dest, uint8_t * _ref, + int stride, int height) +{ + uint8_t *ref = (uint8_t *) _ref; + unsigned long off = (unsigned long) ref & 0x7; + unsigned long off_plus_1 = off + 1; + int stride_times_2 = stride << 1; + + vis_set_gsr(5 << VIS_GSR_SCALEFACT_SHIFT); + + vis_ld64(constants3[0], CONST_3); + vis_fzero(ZERO); + vis_ld64(constants256_512[0], CONST_256); + + ref = vis_alignaddr(ref); + height >>= 2; + do { /* 47 cycles */ + vis_ld64(ref[0], TMP0); + + vis_ld64_2(ref, 8, TMP2); + ref += stride; + + vis_alignaddr_g0((void *)off); + + vis_ld64(ref[0], TMP4); + vis_faligndata(TMP0, TMP2, REF_0); + + vis_ld64_2(ref, 8, TMP6); + ref += stride; + + vis_ld64(ref[0], TMP8); + + vis_ld64_2(ref, 8, TMP10); + ref += stride; + vis_faligndata(TMP4, TMP6, REF_4); + + vis_ld64(ref[0], TMP12); + + vis_ld64_2(ref, 8, TMP14); + ref += stride; + vis_faligndata(TMP8, TMP10, REF_S0); + + vis_faligndata(TMP12, TMP14, REF_S4); + + if (off != 0x7) { + vis_alignaddr_g0((void *)off_plus_1); + + vis_ld64(dest[0], DST_0); + vis_faligndata(TMP0, TMP2, REF_2); + + vis_ld64_2(dest, stride, DST_2); + vis_faligndata(TMP4, TMP6, REF_6); + + vis_faligndata(TMP8, TMP10, REF_S2); + + vis_faligndata(TMP12, TMP14, REF_S6); + } else { + vis_ld64(dest[0], DST_0); + vis_src1(TMP2, REF_2); + + vis_ld64_2(dest, stride, DST_2); + vis_src1(TMP6, REF_6); + + vis_src1(TMP10, REF_S2); + + vis_src1(TMP14, REF_S6); + } + + vis_pmerge(ZERO, REF_0, TMP0); + vis_mul8x16au(REF_0_1, CONST_256, TMP2); + + vis_pmerge(ZERO, REF_2, TMP4); + vis_mul8x16au(REF_2_1, CONST_256, TMP6); + + vis_padd16(TMP0, CONST_3, TMP0); + vis_mul8x16al(DST_0, CONST_512, TMP16); + + vis_padd16(TMP2, CONST_3, TMP2); + vis_mul8x16al(DST_1, CONST_512, TMP18); + + vis_padd16(TMP0, TMP4, TMP0); + vis_mul8x16au(REF_4, CONST_256, TMP8); + + vis_padd16(TMP2, TMP6, TMP2); + vis_mul8x16au(REF_4_1, CONST_256, TMP10); + + vis_padd16(TMP0, TMP16, TMP0); + vis_mul8x16au(REF_6, CONST_256, TMP12); + + vis_padd16(TMP2, TMP18, TMP2); + vis_mul8x16au(REF_6_1, CONST_256, TMP14); + + vis_padd16(TMP8, CONST_3, TMP8); + vis_mul8x16al(DST_2, CONST_512, TMP16); + + vis_padd16(TMP8, TMP12, TMP8); + vis_mul8x16al(DST_3, CONST_512, TMP18); + + vis_padd16(TMP10, TMP14, TMP10); + vis_pack16(TMP0, DST_0); + + vis_pack16(TMP2, DST_1); + vis_st64(DST_0, dest[0]); + dest += stride; + vis_padd16(TMP10, CONST_3, TMP10); + + vis_ld64_2(dest, stride, DST_0); + vis_padd16(TMP8, TMP16, TMP8); + + vis_ld64_2(dest, stride_times_2, TMP4/*DST_2*/); + vis_padd16(TMP10, TMP18, TMP10); + vis_pack16(TMP8, DST_2); + + vis_pack16(TMP10, DST_3); + vis_st64(DST_2, dest[0]); + dest += stride; + + vis_mul8x16au(REF_S0_1, CONST_256, TMP2); + vis_pmerge(ZERO, REF_S0, TMP0); + + vis_pmerge(ZERO, REF_S2, TMP24); + vis_mul8x16au(REF_S2_1, CONST_256, TMP6); + + vis_padd16(TMP0, CONST_3, TMP0); + vis_mul8x16au(REF_S4, CONST_256, TMP8); + + vis_padd16(TMP2, CONST_3, TMP2); + vis_mul8x16au(REF_S4_1, CONST_256, TMP10); + + vis_padd16(TMP0, TMP24, TMP0); + vis_mul8x16au(REF_S6, CONST_256, TMP12); + + vis_padd16(TMP2, TMP6, TMP2); + vis_mul8x16au(REF_S6_1, CONST_256, TMP14); + + vis_padd16(TMP8, CONST_3, TMP8); + vis_mul8x16al(DST_0, CONST_512, TMP16); + + vis_padd16(TMP10, CONST_3, TMP10); + vis_mul8x16al(DST_1, CONST_512, TMP18); + + vis_padd16(TMP8, TMP12, TMP8); + vis_mul8x16al(TMP4/*DST_2*/, CONST_512, TMP20); + + vis_mul8x16al(TMP5/*DST_3*/, CONST_512, TMP22); + vis_padd16(TMP0, TMP16, TMP0); + + vis_padd16(TMP2, TMP18, TMP2); + vis_pack16(TMP0, DST_0); + + vis_padd16(TMP10, TMP14, TMP10); + vis_pack16(TMP2, DST_1); + vis_st64(DST_0, dest[0]); + dest += stride; + + vis_padd16(TMP8, TMP20, TMP8); + + vis_padd16(TMP10, TMP22, TMP10); + vis_pack16(TMP8, DST_2); + + vis_pack16(TMP10, DST_3); + vis_st64(DST_2, dest[0]); + dest += stride; + } while (--height); +} + +static void MC_put_y_16_vis (uint8_t * dest, uint8_t * _ref, + int stride, int height) +{ + uint8_t *ref = (uint8_t *) _ref; + int offset; + + ref = vis_alignaddr(ref); + offset = (ref != _ref) ? 16 : 0; + + vis_ld64(ref[0], TMP0); + + vis_ld64_2(ref, 8, TMP2); + + vis_ld64_2(ref, offset, TMP4); + ref += stride; + + vis_ld64(ref[0], TMP6); + vis_faligndata(TMP0, TMP2, REF_0); + + vis_ld64_2(ref, 8, TMP8); + vis_faligndata(TMP2, TMP4, REF_4); + + vis_ld64_2(ref, offset, TMP10); + ref += stride; + + vis_ld64(constants_fe[0], MASK_fe); + vis_faligndata(TMP6, TMP8, REF_2); + + vis_ld64(constants_7f[0], MASK_7f); + vis_faligndata(TMP8, TMP10, REF_6); + + vis_ld64(constants128[0], CONST_128); + height = (height >> 1) - 1; + do { /* 24 cycles */ + vis_ld64(ref[0], TMP0); + vis_xor(REF_0, REF_2, TMP12); + + vis_ld64_2(ref, 8, TMP2); + vis_xor(REF_4, REF_6, TMP16); + + vis_ld64_2(ref, offset, TMP4); + ref += stride; + vis_or(REF_0, REF_2, TMP14); + + vis_ld64(ref[0], TMP6); + vis_or(REF_4, REF_6, TMP18); + + vis_ld64_2(ref, 8, TMP8); + vis_faligndata(TMP0, TMP2, REF_0); + + vis_ld64_2(ref, offset, TMP10); + ref += stride; + vis_faligndata(TMP2, TMP4, REF_4); + + vis_and(TMP12, MASK_fe, TMP12); + + vis_and(TMP16, MASK_fe, TMP16); + vis_mul8x16(CONST_128, TMP12, TMP12); + + vis_mul8x16(CONST_128, TMP16, TMP16); + vis_xor(REF_0, REF_2, TMP0); + + vis_xor(REF_4, REF_6, TMP2); + + vis_or(REF_0, REF_2, TMP20); + + vis_and(TMP12, MASK_7f, TMP12); + + vis_and(TMP16, MASK_7f, TMP16); + + vis_psub16(TMP14, TMP12, TMP12); + vis_st64(TMP12, dest[0]); + + vis_psub16(TMP18, TMP16, TMP16); + vis_st64_2(TMP16, dest, 8); + dest += stride; + + vis_or(REF_4, REF_6, TMP18); + + vis_and(TMP0, MASK_fe, TMP0); + + vis_and(TMP2, MASK_fe, TMP2); + vis_mul8x16(CONST_128, TMP0, TMP0); + + vis_faligndata(TMP6, TMP8, REF_2); + vis_mul8x16(CONST_128, TMP2, TMP2); + + vis_faligndata(TMP8, TMP10, REF_6); + + vis_and(TMP0, MASK_7f, TMP0); + + vis_and(TMP2, MASK_7f, TMP2); + + vis_psub16(TMP20, TMP0, TMP0); + vis_st64(TMP0, dest[0]); + + vis_psub16(TMP18, TMP2, TMP2); + vis_st64_2(TMP2, dest, 8); + dest += stride; + } while (--height); + + vis_ld64(ref[0], TMP0); + vis_xor(REF_0, REF_2, TMP12); + + vis_ld64_2(ref, 8, TMP2); + vis_xor(REF_4, REF_6, TMP16); + + vis_ld64_2(ref, offset, TMP4); + vis_or(REF_0, REF_2, TMP14); + + vis_or(REF_4, REF_6, TMP18); + + vis_faligndata(TMP0, TMP2, REF_0); + + vis_faligndata(TMP2, TMP4, REF_4); + + vis_and(TMP12, MASK_fe, TMP12); + + vis_and(TMP16, MASK_fe, TMP16); + vis_mul8x16(CONST_128, TMP12, TMP12); + + vis_mul8x16(CONST_128, TMP16, TMP16); + vis_xor(REF_0, REF_2, TMP0); + + vis_xor(REF_4, REF_6, TMP2); + + vis_or(REF_0, REF_2, TMP20); + + vis_and(TMP12, MASK_7f, TMP12); + + vis_and(TMP16, MASK_7f, TMP16); + + vis_psub16(TMP14, TMP12, TMP12); + vis_st64(TMP12, dest[0]); + + vis_psub16(TMP18, TMP16, TMP16); + vis_st64_2(TMP16, dest, 8); + dest += stride; + + vis_or(REF_4, REF_6, TMP18); + + vis_and(TMP0, MASK_fe, TMP0); + + vis_and(TMP2, MASK_fe, TMP2); + vis_mul8x16(CONST_128, TMP0, TMP0); + + vis_mul8x16(CONST_128, TMP2, TMP2); + + vis_and(TMP0, MASK_7f, TMP0); + + vis_and(TMP2, MASK_7f, TMP2); + + vis_psub16(TMP20, TMP0, TMP0); + vis_st64(TMP0, dest[0]); + + vis_psub16(TMP18, TMP2, TMP2); + vis_st64_2(TMP2, dest, 8); +} + +static void MC_put_y_8_vis (uint8_t * dest, uint8_t * _ref, + int stride, int height) +{ + uint8_t *ref = (uint8_t *) _ref; + int offset; + + ref = vis_alignaddr(ref); + offset = (ref != _ref) ? 8 : 0; + + vis_ld64(ref[0], TMP0); + + vis_ld64_2(ref, offset, TMP2); + ref += stride; + + vis_ld64(ref[0], TMP4); + + vis_ld64_2(ref, offset, TMP6); + ref += stride; + + vis_ld64(constants_fe[0], MASK_fe); + vis_faligndata(TMP0, TMP2, REF_0); + + vis_ld64(constants_7f[0], MASK_7f); + vis_faligndata(TMP4, TMP6, REF_2); + + vis_ld64(constants128[0], CONST_128); + height = (height >> 1) - 1; + do { /* 12 cycles */ + vis_ld64(ref[0], TMP0); + vis_xor(REF_0, REF_2, TMP4); + + vis_ld64_2(ref, offset, TMP2); + ref += stride; + vis_and(TMP4, MASK_fe, TMP4); + + vis_or(REF_0, REF_2, TMP6); + vis_mul8x16(CONST_128, TMP4, TMP4); + + vis_faligndata(TMP0, TMP2, REF_0); + vis_ld64(ref[0], TMP0); + + vis_ld64_2(ref, offset, TMP2); + ref += stride; + vis_xor(REF_0, REF_2, TMP12); + + vis_and(TMP4, MASK_7f, TMP4); + + vis_and(TMP12, MASK_fe, TMP12); + + vis_mul8x16(CONST_128, TMP12, TMP12); + vis_or(REF_0, REF_2, TMP14); + + vis_psub16(TMP6, TMP4, DST_0); + vis_st64(DST_0, dest[0]); + dest += stride; + + vis_faligndata(TMP0, TMP2, REF_2); + + vis_and(TMP12, MASK_7f, TMP12); + + vis_psub16(TMP14, TMP12, DST_0); + vis_st64(DST_0, dest[0]); + dest += stride; + } while (--height); + + vis_ld64(ref[0], TMP0); + vis_xor(REF_0, REF_2, TMP4); + + vis_ld64_2(ref, offset, TMP2); + vis_and(TMP4, MASK_fe, TMP4); + + vis_or(REF_0, REF_2, TMP6); + vis_mul8x16(CONST_128, TMP4, TMP4); + + vis_faligndata(TMP0, TMP2, REF_0); + + vis_xor(REF_0, REF_2, TMP12); + + vis_and(TMP4, MASK_7f, TMP4); + + vis_and(TMP12, MASK_fe, TMP12); + + vis_mul8x16(CONST_128, TMP12, TMP12); + vis_or(REF_0, REF_2, TMP14); + + vis_psub16(TMP6, TMP4, DST_0); + vis_st64(DST_0, dest[0]); + dest += stride; + + vis_and(TMP12, MASK_7f, TMP12); + + vis_psub16(TMP14, TMP12, DST_0); + vis_st64(DST_0, dest[0]); +} + +static void MC_avg_y_16_vis (uint8_t * dest, uint8_t * _ref, + int stride, int height) +{ + uint8_t *ref = (uint8_t *) _ref; + int stride_8 = stride + 8; + int stride_16; + int offset; + + vis_set_gsr(5 << VIS_GSR_SCALEFACT_SHIFT); + + ref = vis_alignaddr(ref); + offset = (ref != _ref) ? 16 : 0; + + vis_ld64(ref[ 0], TMP0); + vis_fzero(ZERO); + + vis_ld64(ref[ 8], TMP2); + + vis_ld64_2(ref, offset, TMP4); + stride_16 = stride + offset; + + vis_ld64(constants3[0], CONST_3); + vis_faligndata(TMP0, TMP2, REF_2); + + vis_ld64(constants256_512[0], CONST_256); + vis_faligndata(TMP2, TMP4, REF_6); + height >>= 1; + + do { /* 31 cycles */ + vis_ld64_2(ref, stride, TMP0); + vis_pmerge(ZERO, REF_2, TMP12); + vis_mul8x16au(REF_2_1, CONST_256, TMP14); + + vis_ld64_2(ref, stride_8, TMP2); + vis_pmerge(ZERO, REF_6, TMP16); + vis_mul8x16au(REF_6_1, CONST_256, TMP18); + + vis_ld64_2(ref, stride_16, TMP4); + ref += stride; + + vis_ld64(dest[0], DST_0); + vis_faligndata(TMP0, TMP2, REF_0); + + vis_ld64_2(dest, 8, DST_2); + vis_faligndata(TMP2, TMP4, REF_4); + + vis_ld64_2(ref, stride, TMP6); + vis_pmerge(ZERO, REF_0, TMP0); + vis_mul8x16au(REF_0_1, CONST_256, TMP2); + + vis_ld64_2(ref, stride_8, TMP8); + vis_pmerge(ZERO, REF_4, TMP4); + + vis_ld64_2(ref, stride_16, TMP10); + ref += stride; + + vis_ld64_2(dest, stride, REF_S0/*DST_4*/); + vis_faligndata(TMP6, TMP8, REF_2); + vis_mul8x16au(REF_4_1, CONST_256, TMP6); + + vis_ld64_2(dest, stride_8, REF_S2/*DST_6*/); + vis_faligndata(TMP8, TMP10, REF_6); + vis_mul8x16al(DST_0, CONST_512, TMP20); + + vis_padd16(TMP0, CONST_3, TMP0); + vis_mul8x16al(DST_1, CONST_512, TMP22); + + vis_padd16(TMP2, CONST_3, TMP2); + vis_mul8x16al(DST_2, CONST_512, TMP24); + + vis_padd16(TMP4, CONST_3, TMP4); + vis_mul8x16al(DST_3, CONST_512, TMP26); + + vis_padd16(TMP6, CONST_3, TMP6); + + vis_padd16(TMP12, TMP20, TMP12); + vis_mul8x16al(REF_S0, CONST_512, TMP20); + + vis_padd16(TMP14, TMP22, TMP14); + vis_mul8x16al(REF_S0_1, CONST_512, TMP22); + + vis_padd16(TMP16, TMP24, TMP16); + vis_mul8x16al(REF_S2, CONST_512, TMP24); + + vis_padd16(TMP18, TMP26, TMP18); + vis_mul8x16al(REF_S2_1, CONST_512, TMP26); + + vis_padd16(TMP12, TMP0, TMP12); + vis_mul8x16au(REF_2, CONST_256, TMP28); + + vis_padd16(TMP14, TMP2, TMP14); + vis_mul8x16au(REF_2_1, CONST_256, TMP30); + + vis_padd16(TMP16, TMP4, TMP16); + vis_mul8x16au(REF_6, CONST_256, REF_S4); + + vis_padd16(TMP18, TMP6, TMP18); + vis_mul8x16au(REF_6_1, CONST_256, REF_S6); + + vis_pack16(TMP12, DST_0); + vis_padd16(TMP28, TMP0, TMP12); + + vis_pack16(TMP14, DST_1); + vis_st64(DST_0, dest[0]); + vis_padd16(TMP30, TMP2, TMP14); + + vis_pack16(TMP16, DST_2); + vis_padd16(REF_S4, TMP4, TMP16); + + vis_pack16(TMP18, DST_3); + vis_st64_2(DST_2, dest, 8); + dest += stride; + vis_padd16(REF_S6, TMP6, TMP18); + + vis_padd16(TMP12, TMP20, TMP12); + + vis_padd16(TMP14, TMP22, TMP14); + vis_pack16(TMP12, DST_0); + + vis_padd16(TMP16, TMP24, TMP16); + vis_pack16(TMP14, DST_1); + vis_st64(DST_0, dest[0]); + + vis_padd16(TMP18, TMP26, TMP18); + vis_pack16(TMP16, DST_2); + + vis_pack16(TMP18, DST_3); + vis_st64_2(DST_2, dest, 8); + dest += stride; + } while (--height); +} + +static void MC_avg_y_8_vis (uint8_t * dest, uint8_t * _ref, + int stride, int height) +{ + uint8_t *ref = (uint8_t *) _ref; + int stride_8; + int offset; + + vis_set_gsr(5 << VIS_GSR_SCALEFACT_SHIFT); + + ref = vis_alignaddr(ref); + offset = (ref != _ref) ? 8 : 0; + + vis_ld64(ref[ 0], TMP0); + vis_fzero(ZERO); + + vis_ld64_2(ref, offset, TMP2); + stride_8 = stride + offset; + + vis_ld64(constants3[0], CONST_3); + vis_faligndata(TMP0, TMP2, REF_2); + + vis_ld64(constants256_512[0], CONST_256); + + height >>= 1; + do { /* 20 cycles */ + vis_ld64_2(ref, stride, TMP0); + vis_pmerge(ZERO, REF_2, TMP8); + vis_mul8x16au(REF_2_1, CONST_256, TMP10); + + vis_ld64_2(ref, stride_8, TMP2); + ref += stride; + + vis_ld64(dest[0], DST_0); + + vis_ld64_2(dest, stride, DST_2); + vis_faligndata(TMP0, TMP2, REF_0); + + vis_ld64_2(ref, stride, TMP4); + vis_mul8x16al(DST_0, CONST_512, TMP16); + vis_pmerge(ZERO, REF_0, TMP12); + + vis_ld64_2(ref, stride_8, TMP6); + ref += stride; + vis_mul8x16al(DST_1, CONST_512, TMP18); + vis_pmerge(ZERO, REF_0_1, TMP14); + + vis_padd16(TMP12, CONST_3, TMP12); + vis_mul8x16al(DST_2, CONST_512, TMP24); + + vis_padd16(TMP14, CONST_3, TMP14); + vis_mul8x16al(DST_3, CONST_512, TMP26); + + vis_faligndata(TMP4, TMP6, REF_2); + + vis_padd16(TMP8, TMP12, TMP8); + + vis_padd16(TMP10, TMP14, TMP10); + vis_mul8x16au(REF_2, CONST_256, TMP20); + + vis_padd16(TMP8, TMP16, TMP0); + vis_mul8x16au(REF_2_1, CONST_256, TMP22); + + vis_padd16(TMP10, TMP18, TMP2); + vis_pack16(TMP0, DST_0); + + vis_pack16(TMP2, DST_1); + vis_st64(DST_0, dest[0]); + dest += stride; + vis_padd16(TMP12, TMP20, TMP12); + + vis_padd16(TMP14, TMP22, TMP14); + + vis_padd16(TMP12, TMP24, TMP0); + + vis_padd16(TMP14, TMP26, TMP2); + vis_pack16(TMP0, DST_2); + + vis_pack16(TMP2, DST_3); + vis_st64(DST_2, dest[0]); + dest += stride; + } while (--height); +} + +static void MC_put_xy_16_vis (uint8_t * dest, uint8_t * _ref, + int stride, int height) +{ + uint8_t *ref = (uint8_t *) _ref; + unsigned long off = (unsigned long) ref & 0x7; + unsigned long off_plus_1 = off + 1; + int stride_8 = stride + 8; + int stride_16 = stride + 16; + + vis_set_gsr(5 << VIS_GSR_SCALEFACT_SHIFT); + + ref = vis_alignaddr(ref); + + vis_ld64(ref[ 0], TMP0); + vis_fzero(ZERO); + + vis_ld64(ref[ 8], TMP2); + + vis_ld64(ref[16], TMP4); + + vis_ld64(constants2[0], CONST_2); + vis_faligndata(TMP0, TMP2, REF_S0); + + vis_ld64(constants256_512[0], CONST_256); + vis_faligndata(TMP2, TMP4, REF_S4); + + if (off != 0x7) { + vis_alignaddr_g0((void *)off_plus_1); + vis_faligndata(TMP0, TMP2, REF_S2); + vis_faligndata(TMP2, TMP4, REF_S6); + } else { + vis_src1(TMP2, REF_S2); + vis_src1(TMP4, REF_S6); + } + + height >>= 1; + do { + vis_ld64_2(ref, stride, TMP0); + vis_mul8x16au(REF_S0, CONST_256, TMP12); + vis_pmerge(ZERO, REF_S0_1, TMP14); + + vis_alignaddr_g0((void *)off); + + vis_ld64_2(ref, stride_8, TMP2); + vis_mul8x16au(REF_S2, CONST_256, TMP16); + vis_pmerge(ZERO, REF_S2_1, TMP18); + + vis_ld64_2(ref, stride_16, TMP4); + ref += stride; + vis_mul8x16au(REF_S4, CONST_256, TMP20); + vis_pmerge(ZERO, REF_S4_1, TMP22); + + vis_ld64_2(ref, stride, TMP6); + vis_mul8x16au(REF_S6, CONST_256, TMP24); + vis_pmerge(ZERO, REF_S6_1, TMP26); + + vis_ld64_2(ref, stride_8, TMP8); + vis_faligndata(TMP0, TMP2, REF_0); + + vis_ld64_2(ref, stride_16, TMP10); + ref += stride; + vis_faligndata(TMP2, TMP4, REF_4); + + vis_faligndata(TMP6, TMP8, REF_S0); + + vis_faligndata(TMP8, TMP10, REF_S4); + + if (off != 0x7) { + vis_alignaddr_g0((void *)off_plus_1); + vis_faligndata(TMP0, TMP2, REF_2); + vis_faligndata(TMP2, TMP4, REF_6); + vis_faligndata(TMP6, TMP8, REF_S2); + vis_faligndata(TMP8, TMP10, REF_S6); + } else { + vis_src1(TMP2, REF_2); + vis_src1(TMP4, REF_6); + vis_src1(TMP8, REF_S2); + vis_src1(TMP10, REF_S6); + } + + vis_mul8x16au(REF_0, CONST_256, TMP0); + vis_pmerge(ZERO, REF_0_1, TMP2); + + vis_mul8x16au(REF_2, CONST_256, TMP4); + vis_pmerge(ZERO, REF_2_1, TMP6); + + vis_padd16(TMP0, CONST_2, TMP8); + vis_mul8x16au(REF_4, CONST_256, TMP0); + + vis_padd16(TMP2, CONST_2, TMP10); + vis_mul8x16au(REF_4_1, CONST_256, TMP2); + + vis_padd16(TMP8, TMP4, TMP8); + vis_mul8x16au(REF_6, CONST_256, TMP4); + + vis_padd16(TMP10, TMP6, TMP10); + vis_mul8x16au(REF_6_1, CONST_256, TMP6); + + vis_padd16(TMP12, TMP8, TMP12); + + vis_padd16(TMP14, TMP10, TMP14); + + vis_padd16(TMP12, TMP16, TMP12); + + vis_padd16(TMP14, TMP18, TMP14); + vis_pack16(TMP12, DST_0); + + vis_pack16(TMP14, DST_1); + vis_st64(DST_0, dest[0]); + vis_padd16(TMP0, CONST_2, TMP12); + + vis_mul8x16au(REF_S0, CONST_256, TMP0); + vis_padd16(TMP2, CONST_2, TMP14); + + vis_mul8x16au(REF_S0_1, CONST_256, TMP2); + vis_padd16(TMP12, TMP4, TMP12); + + vis_mul8x16au(REF_S2, CONST_256, TMP4); + vis_padd16(TMP14, TMP6, TMP14); + + vis_mul8x16au(REF_S2_1, CONST_256, TMP6); + vis_padd16(TMP20, TMP12, TMP20); + + vis_padd16(TMP22, TMP14, TMP22); + + vis_padd16(TMP20, TMP24, TMP20); + + vis_padd16(TMP22, TMP26, TMP22); + vis_pack16(TMP20, DST_2); + + vis_pack16(TMP22, DST_3); + vis_st64_2(DST_2, dest, 8); + dest += stride; + vis_padd16(TMP0, TMP4, TMP24); + + vis_mul8x16au(REF_S4, CONST_256, TMP0); + vis_padd16(TMP2, TMP6, TMP26); + + vis_mul8x16au(REF_S4_1, CONST_256, TMP2); + vis_padd16(TMP24, TMP8, TMP24); + + vis_padd16(TMP26, TMP10, TMP26); + vis_pack16(TMP24, DST_0); + + vis_pack16(TMP26, DST_1); + vis_st64(DST_0, dest[0]); + vis_pmerge(ZERO, REF_S6, TMP4); + + vis_pmerge(ZERO, REF_S6_1, TMP6); + + vis_padd16(TMP0, TMP4, TMP0); + + vis_padd16(TMP2, TMP6, TMP2); + + vis_padd16(TMP0, TMP12, TMP0); + + vis_padd16(TMP2, TMP14, TMP2); + vis_pack16(TMP0, DST_2); + + vis_pack16(TMP2, DST_3); + vis_st64_2(DST_2, dest, 8); + dest += stride; + } while (--height); +} + +static void MC_put_xy_8_vis (uint8_t * dest, uint8_t * _ref, + int stride, int height) +{ + uint8_t *ref = (uint8_t *) _ref; + unsigned long off = (unsigned long) ref & 0x7; + unsigned long off_plus_1 = off + 1; + int stride_8 = stride + 8; + + vis_set_gsr(5 << VIS_GSR_SCALEFACT_SHIFT); + + ref = vis_alignaddr(ref); + + vis_ld64(ref[ 0], TMP0); + vis_fzero(ZERO); + + vis_ld64(ref[ 8], TMP2); + + vis_ld64(constants2[0], CONST_2); + + vis_ld64(constants256_512[0], CONST_256); + vis_faligndata(TMP0, TMP2, REF_S0); + + if (off != 0x7) { + vis_alignaddr_g0((void *)off_plus_1); + vis_faligndata(TMP0, TMP2, REF_S2); + } else { + vis_src1(TMP2, REF_S2); + } + + height >>= 1; + do { /* 26 cycles */ + vis_ld64_2(ref, stride, TMP0); + vis_mul8x16au(REF_S0, CONST_256, TMP8); + vis_pmerge(ZERO, REF_S2, TMP12); + + vis_alignaddr_g0((void *)off); + + vis_ld64_2(ref, stride_8, TMP2); + ref += stride; + vis_mul8x16au(REF_S0_1, CONST_256, TMP10); + vis_pmerge(ZERO, REF_S2_1, TMP14); + + vis_ld64_2(ref, stride, TMP4); + + vis_ld64_2(ref, stride_8, TMP6); + ref += stride; + vis_faligndata(TMP0, TMP2, REF_S4); + + vis_pmerge(ZERO, REF_S4, TMP18); + + vis_pmerge(ZERO, REF_S4_1, TMP20); + + vis_faligndata(TMP4, TMP6, REF_S0); + + if (off != 0x7) { + vis_alignaddr_g0((void *)off_plus_1); + vis_faligndata(TMP0, TMP2, REF_S6); + vis_faligndata(TMP4, TMP6, REF_S2); + } else { + vis_src1(TMP2, REF_S6); + vis_src1(TMP6, REF_S2); + } + + vis_padd16(TMP18, CONST_2, TMP18); + vis_mul8x16au(REF_S6, CONST_256, TMP22); + + vis_padd16(TMP20, CONST_2, TMP20); + vis_mul8x16au(REF_S6_1, CONST_256, TMP24); + + vis_mul8x16au(REF_S0, CONST_256, TMP26); + vis_pmerge(ZERO, REF_S0_1, TMP28); + + vis_mul8x16au(REF_S2, CONST_256, TMP30); + vis_padd16(TMP18, TMP22, TMP18); + + vis_mul8x16au(REF_S2_1, CONST_256, TMP32); + vis_padd16(TMP20, TMP24, TMP20); + + vis_padd16(TMP8, TMP18, TMP8); + + vis_padd16(TMP10, TMP20, TMP10); + + vis_padd16(TMP8, TMP12, TMP8); + + vis_padd16(TMP10, TMP14, TMP10); + vis_pack16(TMP8, DST_0); + + vis_pack16(TMP10, DST_1); + vis_st64(DST_0, dest[0]); + dest += stride; + vis_padd16(TMP18, TMP26, TMP18); + + vis_padd16(TMP20, TMP28, TMP20); + + vis_padd16(TMP18, TMP30, TMP18); + + vis_padd16(TMP20, TMP32, TMP20); + vis_pack16(TMP18, DST_2); + + vis_pack16(TMP20, DST_3); + vis_st64(DST_2, dest[0]); + dest += stride; + } while (--height); +} + +static void MC_avg_xy_16_vis (uint8_t * dest, uint8_t * _ref, + int stride, int height) +{ + uint8_t *ref = (uint8_t *) _ref; + unsigned long off = (unsigned long) ref & 0x7; + unsigned long off_plus_1 = off + 1; + int stride_8 = stride + 8; + int stride_16 = stride + 16; + + vis_set_gsr(4 << VIS_GSR_SCALEFACT_SHIFT); + + ref = vis_alignaddr(ref); + + vis_ld64(ref[ 0], TMP0); + vis_fzero(ZERO); + + vis_ld64(ref[ 8], TMP2); + + vis_ld64(ref[16], TMP4); + + vis_ld64(constants6[0], CONST_6); + vis_faligndata(TMP0, TMP2, REF_S0); + + vis_ld64(constants256_1024[0], CONST_256); + vis_faligndata(TMP2, TMP4, REF_S4); + + if (off != 0x7) { + vis_alignaddr_g0((void *)off_plus_1); + vis_faligndata(TMP0, TMP2, REF_S2); + vis_faligndata(TMP2, TMP4, REF_S6); + } else { + vis_src1(TMP2, REF_S2); + vis_src1(TMP4, REF_S6); + } + + height >>= 1; + do { /* 55 cycles */ + vis_ld64_2(ref, stride, TMP0); + vis_mul8x16au(REF_S0, CONST_256, TMP12); + vis_pmerge(ZERO, REF_S0_1, TMP14); + + vis_alignaddr_g0((void *)off); + + vis_ld64_2(ref, stride_8, TMP2); + vis_mul8x16au(REF_S2, CONST_256, TMP16); + vis_pmerge(ZERO, REF_S2_1, TMP18); + + vis_ld64_2(ref, stride_16, TMP4); + ref += stride; + vis_mul8x16au(REF_S4, CONST_256, TMP20); + vis_pmerge(ZERO, REF_S4_1, TMP22); + + vis_ld64_2(ref, stride, TMP6); + vis_mul8x16au(REF_S6, CONST_256, TMP24); + vis_pmerge(ZERO, REF_S6_1, TMP26); + + vis_ld64_2(ref, stride_8, TMP8); + vis_faligndata(TMP0, TMP2, REF_0); + + vis_ld64_2(ref, stride_16, TMP10); + ref += stride; + vis_faligndata(TMP2, TMP4, REF_4); + + vis_ld64(dest[0], DST_0); + vis_faligndata(TMP6, TMP8, REF_S0); + + vis_ld64_2(dest, 8, DST_2); + vis_faligndata(TMP8, TMP10, REF_S4); + + if (off != 0x7) { + vis_alignaddr_g0((void *)off_plus_1); + vis_faligndata(TMP0, TMP2, REF_2); + vis_faligndata(TMP2, TMP4, REF_6); + vis_faligndata(TMP6, TMP8, REF_S2); + vis_faligndata(TMP8, TMP10, REF_S6); + } else { + vis_src1(TMP2, REF_2); + vis_src1(TMP4, REF_6); + vis_src1(TMP8, REF_S2); + vis_src1(TMP10, REF_S6); + } + + vis_mul8x16al(DST_0, CONST_1024, TMP30); + vis_pmerge(ZERO, REF_0, TMP0); + + vis_mul8x16al(DST_1, CONST_1024, TMP32); + vis_pmerge(ZERO, REF_0_1, TMP2); + + vis_mul8x16au(REF_2, CONST_256, TMP4); + vis_pmerge(ZERO, REF_2_1, TMP6); + + vis_mul8x16al(DST_2, CONST_1024, REF_0); + vis_padd16(TMP0, CONST_6, TMP0); + + vis_mul8x16al(DST_3, CONST_1024, REF_2); + vis_padd16(TMP2, CONST_6, TMP2); + + vis_padd16(TMP0, TMP4, TMP0); + vis_mul8x16au(REF_4, CONST_256, TMP4); + + vis_padd16(TMP2, TMP6, TMP2); + vis_mul8x16au(REF_4_1, CONST_256, TMP6); + + vis_padd16(TMP12, TMP0, TMP12); + vis_mul8x16au(REF_6, CONST_256, TMP8); + + vis_padd16(TMP14, TMP2, TMP14); + vis_mul8x16au(REF_6_1, CONST_256, TMP10); + + vis_padd16(TMP12, TMP16, TMP12); + vis_mul8x16au(REF_S0, CONST_256, REF_4); + + vis_padd16(TMP14, TMP18, TMP14); + vis_mul8x16au(REF_S0_1, CONST_256, REF_6); + + vis_padd16(TMP12, TMP30, TMP12); + + vis_padd16(TMP14, TMP32, TMP14); + vis_pack16(TMP12, DST_0); + + vis_pack16(TMP14, DST_1); + vis_st64(DST_0, dest[0]); + vis_padd16(TMP4, CONST_6, TMP4); + + vis_ld64_2(dest, stride, DST_0); + vis_padd16(TMP6, CONST_6, TMP6); + vis_mul8x16au(REF_S2, CONST_256, TMP12); + + vis_padd16(TMP4, TMP8, TMP4); + vis_mul8x16au(REF_S2_1, CONST_256, TMP14); + + vis_padd16(TMP6, TMP10, TMP6); + + vis_padd16(TMP20, TMP4, TMP20); + + vis_padd16(TMP22, TMP6, TMP22); + + vis_padd16(TMP20, TMP24, TMP20); + + vis_padd16(TMP22, TMP26, TMP22); + + vis_padd16(TMP20, REF_0, TMP20); + vis_mul8x16au(REF_S4, CONST_256, REF_0); + + vis_padd16(TMP22, REF_2, TMP22); + vis_pack16(TMP20, DST_2); + + vis_pack16(TMP22, DST_3); + vis_st64_2(DST_2, dest, 8); + dest += stride; + + vis_ld64_2(dest, 8, DST_2); + vis_mul8x16al(DST_0, CONST_1024, TMP30); + vis_pmerge(ZERO, REF_S4_1, REF_2); + + vis_mul8x16al(DST_1, CONST_1024, TMP32); + vis_padd16(REF_4, TMP0, TMP8); + + vis_mul8x16au(REF_S6, CONST_256, REF_4); + vis_padd16(REF_6, TMP2, TMP10); + + vis_mul8x16au(REF_S6_1, CONST_256, REF_6); + vis_padd16(TMP8, TMP12, TMP8); + + vis_padd16(TMP10, TMP14, TMP10); + + vis_padd16(TMP8, TMP30, TMP8); + + vis_padd16(TMP10, TMP32, TMP10); + vis_pack16(TMP8, DST_0); + + vis_pack16(TMP10, DST_1); + vis_st64(DST_0, dest[0]); + + vis_padd16(REF_0, TMP4, REF_0); + + vis_mul8x16al(DST_2, CONST_1024, TMP30); + vis_padd16(REF_2, TMP6, REF_2); + + vis_mul8x16al(DST_3, CONST_1024, TMP32); + vis_padd16(REF_0, REF_4, REF_0); + + vis_padd16(REF_2, REF_6, REF_2); + + vis_padd16(REF_0, TMP30, REF_0); + + /* stall */ + + vis_padd16(REF_2, TMP32, REF_2); + vis_pack16(REF_0, DST_2); + + vis_pack16(REF_2, DST_3); + vis_st64_2(DST_2, dest, 8); + dest += stride; + } while (--height); +} + +static void MC_avg_xy_8_vis (uint8_t * dest, uint8_t * _ref, + int stride, int height) +{ + uint8_t *ref = (uint8_t *) _ref; + unsigned long off = (unsigned long) ref & 0x7; + unsigned long off_plus_1 = off + 1; + int stride_8 = stride + 8; + + vis_set_gsr(4 << VIS_GSR_SCALEFACT_SHIFT); + + ref = vis_alignaddr(ref); + + vis_ld64(ref[0], TMP0); + vis_fzero(ZERO); + + vis_ld64_2(ref, 8, TMP2); + + vis_ld64(constants6[0], CONST_6); + + vis_ld64(constants256_1024[0], CONST_256); + vis_faligndata(TMP0, TMP2, REF_S0); + + if (off != 0x7) { + vis_alignaddr_g0((void *)off_plus_1); + vis_faligndata(TMP0, TMP2, REF_S2); + } else { + vis_src1(TMP2, REF_S2); + } + + height >>= 1; + do { /* 31 cycles */ + vis_ld64_2(ref, stride, TMP0); + vis_mul8x16au(REF_S0, CONST_256, TMP8); + vis_pmerge(ZERO, REF_S0_1, TMP10); + + vis_ld64_2(ref, stride_8, TMP2); + ref += stride; + vis_mul8x16au(REF_S2, CONST_256, TMP12); + vis_pmerge(ZERO, REF_S2_1, TMP14); + + vis_alignaddr_g0((void *)off); + + vis_ld64_2(ref, stride, TMP4); + vis_faligndata(TMP0, TMP2, REF_S4); + + vis_ld64_2(ref, stride_8, TMP6); + ref += stride; + + vis_ld64(dest[0], DST_0); + vis_faligndata(TMP4, TMP6, REF_S0); + + vis_ld64_2(dest, stride, DST_2); + + if (off != 0x7) { + vis_alignaddr_g0((void *)off_plus_1); + vis_faligndata(TMP0, TMP2, REF_S6); + vis_faligndata(TMP4, TMP6, REF_S2); + } else { + vis_src1(TMP2, REF_S6); + vis_src1(TMP6, REF_S2); + } + + vis_mul8x16al(DST_0, CONST_1024, TMP30); + vis_pmerge(ZERO, REF_S4, TMP22); + + vis_mul8x16al(DST_1, CONST_1024, TMP32); + vis_pmerge(ZERO, REF_S4_1, TMP24); + + vis_mul8x16au(REF_S6, CONST_256, TMP26); + vis_pmerge(ZERO, REF_S6_1, TMP28); + + vis_mul8x16au(REF_S0, CONST_256, REF_S4); + vis_padd16(TMP22, CONST_6, TMP22); + + vis_mul8x16au(REF_S0_1, CONST_256, REF_S6); + vis_padd16(TMP24, CONST_6, TMP24); + + vis_mul8x16al(DST_2, CONST_1024, REF_0); + vis_padd16(TMP22, TMP26, TMP22); + + vis_mul8x16al(DST_3, CONST_1024, REF_2); + vis_padd16(TMP24, TMP28, TMP24); + + vis_mul8x16au(REF_S2, CONST_256, TMP26); + vis_padd16(TMP8, TMP22, TMP8); + + vis_mul8x16au(REF_S2_1, CONST_256, TMP28); + vis_padd16(TMP10, TMP24, TMP10); + + vis_padd16(TMP8, TMP12, TMP8); + + vis_padd16(TMP10, TMP14, TMP10); + + vis_padd16(TMP8, TMP30, TMP8); + + vis_padd16(TMP10, TMP32, TMP10); + vis_pack16(TMP8, DST_0); + + vis_pack16(TMP10, DST_1); + vis_st64(DST_0, dest[0]); + dest += stride; + + vis_padd16(REF_S4, TMP22, TMP12); + + vis_padd16(REF_S6, TMP24, TMP14); + + vis_padd16(TMP12, TMP26, TMP12); + + vis_padd16(TMP14, TMP28, TMP14); + + vis_padd16(TMP12, REF_0, TMP12); + + vis_padd16(TMP14, REF_2, TMP14); + vis_pack16(TMP12, DST_2); + + vis_pack16(TMP14, DST_3); + vis_st64(DST_2, dest[0]); + dest += stride; + } while (--height); +} + +MPEG2_MC_EXTERN(vis); + +#endif /* defined(ARCH_SPARC) && defined(ENABLE_VIS) */ diff --git a/src/video_dec/libmpeg2/mpeg2.h b/src/video_dec/libmpeg2/mpeg2.h new file mode 100644 index 000000000..253f300a2 --- /dev/null +++ b/src/video_dec/libmpeg2/mpeg2.h @@ -0,0 +1,98 @@ +/* + * mpeg2.h + * Copyright (C) 1999-2001 Aaron Holtzman + * + * This file is part of mpeg2dec, a free MPEG-2 video stream decoder. + * + * mpeg2dec is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * mpeg2dec is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +/* Structure for the mpeg2dec decoder */ + +#ifndef MPEG2_H +#define MPEG2_H + +#include "libmpeg2_accel.h" + +typedef struct mpeg2dec_s { + xine_video_port_t * output; + uint32_t frame_format; + + /* this is where we keep the state of the decoder */ + struct picture_s * picture, *picture_base; + + uint32_t shift; + int new_sequence; + int is_sequence_needed; + int is_wait_for_ip_frames; + int frames_to_drop, drop_frame; + int in_slice; + int seek_mode, is_frame_needed; + + /* the maximum chunk size is determined by vbv_buffer_size */ + /* which is 224K for MP@ML streams. */ + /* (we make no pretenses of decoding anything more than that) */ + /* allocated in init - gcc has problems allocating such big structures */ + uint8_t * chunk_buffer, *chunk_base; + /* pointer to current position in chunk_buffer */ + uint8_t * chunk_ptr; + /* last start code ? */ + uint8_t code; + uint32_t chunk_size; + + int64_t pts; + uint32_t rff_pattern; + int force_aspect; + int force_pan_scan; + + /* AFD data can be found after a sequence, group or picture start code */ + /* and will be stored in afd_value_seen. Later it will be transfered to */ + /* a stream property and stored into afd_value_reported to detect changes */ + int afd_value_seen; + int afd_value_reported; + + xine_stream_t *stream; + + /* a spu decoder for possible closed captions */ + spu_decoder_t *cc_dec; + mpeg2dec_accel_t accel; + +} mpeg2dec_t ; + + +/* initialize mpegdec with a opaque user pointer */ +void mpeg2_init (mpeg2dec_t * mpeg2dec, + xine_video_port_t * output); + +/* destroy everything which was allocated, shutdown the output */ +void mpeg2_close (mpeg2dec_t * mpeg2dec); + +int mpeg2_decode_data (mpeg2dec_t * mpeg2dec, + uint8_t * data_start, uint8_t * data_end, + uint64_t pts); + +void mpeg2_find_sequence_header (mpeg2dec_t * mpeg2dec, + uint8_t * data_start, uint8_t * data_end); + +void mpeg2_flush (mpeg2dec_t * mpeg2dec); +void mpeg2_reset (mpeg2dec_t * mpeg2dec); +void mpeg2_discontinuity (mpeg2dec_t * mpeg2dec); + +/* Not needed, it is defined as static in decode.c, and no-one else called it + * currently + */ +/* void process_userdata(mpeg2dec_t *mpeg2dec, uint8_t *buffer); */ + +#endif diff --git a/src/video_dec/libmpeg2/mpeg2_internal.h b/src/video_dec/libmpeg2/mpeg2_internal.h new file mode 100644 index 000000000..2e42aace6 --- /dev/null +++ b/src/video_dec/libmpeg2/mpeg2_internal.h @@ -0,0 +1,294 @@ +/* + * mpeg2_internal.h + * Copyright (C) 2000-2002 Michel Lespinasse + * Copyright (C) 1999-2000 Aaron Holtzman + * + * This file is part of mpeg2dec, a free MPEG-2 video stream decoder. + * See http://libmpeg2.sourceforge.net/ for updates. + * + * mpeg2dec is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * mpeg2dec is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +#ifndef MPEG2_INTERNAL_H +#define MPEG2_INTERNAL_H + +#include +#include "accel_xvmc.h" + +#ifdef ENABLE_ALTIVEC +#include +#endif + +/* macroblock modes */ +#define MACROBLOCK_INTRA XINE_MACROBLOCK_INTRA +#define MACROBLOCK_PATTERN XINE_MACROBLOCK_PATTERN +#define MACROBLOCK_MOTION_BACKWARD XINE_MACROBLOCK_MOTION_BACKWARD +#define MACROBLOCK_MOTION_FORWARD XINE_MACROBLOCK_MOTION_FORWARD +#define MACROBLOCK_QUANT XINE_MACROBLOCK_QUANT +#define DCT_TYPE_INTERLACED XINE_MACROBLOCK_DCT_TYPE_INTERLACED + +/* motion_type */ +#define MOTION_TYPE_MASK (3*64) +#define MOTION_TYPE_BASE 64 +#define MC_FIELD (1*64) +#define MC_FRAME (2*64) +#define MC_16X8 (2*64) +#define MC_DMV (3*64) + +/* picture structure */ +#define TOP_FIELD VO_TOP_FIELD +#define BOTTOM_FIELD VO_BOTTOM_FIELD +#define FRAME_PICTURE VO_BOTH_FIELDS + +/* picture coding type (mpeg2 header) */ +#define I_TYPE 1 +#define P_TYPE 2 +#define B_TYPE 3 +#define D_TYPE 4 + +typedef struct motion_s { + uint8_t * ref[2][3]; + uint8_t ** ref2[2]; + int pmv[2][2]; + int f_code[2]; +} motion_t; + +typedef struct picture_s { + /* first, state that carries information from one macroblock to the */ + /* next inside a slice, and is never used outside of mpeg2_slice() */ + + /* DCT coefficients - should be kept aligned ! */ + int16_t DCTblock[64]; + + /* XvMC DCT block and macroblock data for XvMC acceleration */ + xine_macroblocks_t *mc; + int XvMC_mb_type; + int XvMC_mv_field_sel[2][2]; + int XvMC_x; + int XvMC_y; + int XvMC_motion_type; + int XvMC_dmvector[2]; + int XvMC_cbp; + int XvMC_dct_type; + + /* bit parsing stuff */ + uint32_t bitstream_buf; /* current 32 bit working set of buffer */ + int bitstream_bits; /* used bits in working set */ + uint8_t * bitstream_ptr; /* buffer with stream data */ + + uint8_t * dest[3]; + int pitches[3]; + int offset; + unsigned int limit_x; + unsigned int limit_y_16; + unsigned int limit_y_8; + unsigned int limit_y; + + /* Motion vectors */ + /* The f_ and b_ correspond to the forward and backward motion */ + /* predictors */ + motion_t b_motion; + motion_t f_motion; + + /* predictor for DC coefficients in intra blocks */ + int16_t dc_dct_pred[3]; + + int quantizer_scale; /* remove */ + int current_field; /* remove */ + int dmv_offset; /* remove */ + unsigned int v_offset; /* remove */ + + + /* now non-slice-specific information */ + + /* sequence header stuff */ + uint8_t intra_quantizer_matrix [64]; + uint8_t non_intra_quantizer_matrix [64]; + int load_intra_quantizer_matrix; + int load_non_intra_quantizer_matrix; + + /* The width and height of the picture snapped to macroblock units */ + int coded_picture_width; + int coded_picture_height; + + /* The width and height as it appears on header sequence */ + unsigned int display_width, display_height; + + /* picture header stuff */ + + /* what type of picture this is (I, P, B, D) */ + int picture_coding_type; + + int vbv_delay; + int low_delay; + + /* picture coding extension stuff */ + + /* quantization factor for intra dc coefficients */ + int intra_dc_precision; + /* top/bottom/both fields */ + int picture_structure; + /* bool to indicate all predictions are frame based */ + int frame_pred_frame_dct; + /* bool to indicate whether intra blocks have motion vectors */ + /* (for concealment) */ + int concealment_motion_vectors; + /* bit to indicate which quantization table to use */ + int q_scale_type; + /* bool to use different vlc tables */ + int intra_vlc_format; + /* used for DMV MC */ + int top_field_first; + + /* stuff derived from bitstream */ + + /* pointer to the zigzag scan we're supposed to be using */ + uint8_t * scan; + + struct vo_frame_s * current_frame; + struct vo_frame_s * forward_reference_frame; + struct vo_frame_s * backward_reference_frame; + + int frame_width, frame_height; + + int second_field; + + int mpeg1; + + int skip_non_intra_dct; + + /* these things are not needed by the decoder */ + /* this is a temporary interface, we will build a better one later. */ + int aspect_ratio_information; + int saved_aspect_ratio; + int frame_rate_code; + int progressive_sequence; + int repeat_first_field; + int progressive_frame; + uint32_t frame_centre_horizontal_offset; + uint32_t frame_centre_vertical_offset; + uint32_t video_format; + uint32_t colour_description; + uint32_t colour_primatives; + uint32_t transfer_characteristics; + uint32_t matrix_coefficients; + uint32_t display_horizontal_size; + uint32_t display_vertical_size; + uint32_t drop_frame_flag; + uint32_t time_code_hours; + uint32_t time_code_minutes; + uint32_t time_code_seconds; + uint32_t time_code_pictures; + uint32_t closed_gop; + uint32_t broken_link; + + int bitrate; + int frame_rate_ext_n; + int frame_rate_ext_d; + +} picture_t; + +typedef struct cpu_state_s { +#ifdef ARCH_PPC + uint8_t regv[12*16]; +#endif + int dummy; +} cpu_state_t; + +/* cpu_state.c */ +extern void (* mpeg2_cpu_state_save) (cpu_state_t * state); +extern void (* mpeg2_cpu_state_restore) (cpu_state_t * state); +void mpeg2_cpu_state_init (uint32_t mm_accel); + +/* header.c */ +extern uint8_t mpeg2_scan_norm[64]; +extern uint8_t mpeg2_scan_alt[64]; +void mpeg2_header_state_init (picture_t * picture); +int mpeg2_header_picture (picture_t * picture, uint8_t * buffer); +int mpeg2_header_sequence (picture_t * picture, uint8_t * buffer); +int mpeg2_header_extension (picture_t * picture, uint8_t * buffer); +int mpeg2_header_group_of_pictures (picture_t * picture, uint8_t * buffer); + +/* idct.c */ +extern void (* mpeg2_idct_copy) (int16_t * block, uint8_t * dest, int stride); +extern void (* mpeg2_idct_add) (int16_t * block, uint8_t * dest, int stride); +extern void (* mpeg2_idct) (int16_t * block); +extern void (* mpeg2_zero_block) (int16_t * block); +void mpeg2_idct_init (uint32_t mm_accel); + +/* idct_mlib.c */ +void mpeg2_idct_add_mlib (int16_t * block, uint8_t * dest, int stride); +void mpeg2_idct_copy_mlib_non_ieee (int16_t * block, uint8_t * dest, + int stride); +void mpeg2_idct_add_mlib_non_ieee (int16_t * block, uint8_t * dest, + int stride); +void mpeg2_idct_mlib (int16_t * block); + +/* idct_mmx.c */ +void mpeg2_idct_copy_mmxext (int16_t * block, uint8_t * dest, int stride); +void mpeg2_idct_add_mmxext (int16_t * block, uint8_t * dest, int stride); +void mpeg2_idct_mmxext (int16_t * block); +void mpeg2_idct_copy_mmx (int16_t * block, uint8_t * dest, int stride); +void mpeg2_idct_add_mmx (int16_t * block, uint8_t * dest, int stride); +void mpeg2_idct_mmx (int16_t * block); +void mpeg2_zero_block_mmx (int16_t * block); +void mpeg2_idct_mmx_init (void); + +/* idct_altivec.c */ +# ifdef ENABLE_ALTIVEC +void mpeg2_idct_copy_altivec (vector signed short * block, unsigned char * dest, + int stride); +void mpeg2_idct_add_altivec (vector signed short * block, unsigned char * dest, + int stride); +# else /* ! ENABLE_ALTIVEC */ +void mpeg2_idct_copy_altivec (signed short * block, unsigned char * dest, + int stride); +void mpeg2_idct_add_altivec (signed short * block, unsigned char * dest, + int stride); +# endif /* ENABLE_ALTIVEC */ +void mpeg2_idct_altivec_init (void); + +/* motion_comp.c */ +void mpeg2_mc_init (uint32_t mm_accel); + +typedef struct mpeg2_mc_s { + void (* put [8]) (uint8_t * dst, uint8_t *, int32_t, int32_t); + void (* avg [8]) (uint8_t * dst, uint8_t *, int32_t, int32_t); +} mpeg2_mc_t; + +#define MPEG2_MC_EXTERN(x) mpeg2_mc_t mpeg2_mc_##x = { \ + {MC_put_o_16_##x, MC_put_x_16_##x, MC_put_y_16_##x, MC_put_xy_16_##x, \ + MC_put_o_8_##x, MC_put_x_8_##x, MC_put_y_8_##x, MC_put_xy_8_##x}, \ + {MC_avg_o_16_##x, MC_avg_x_16_##x, MC_avg_y_16_##x, MC_avg_xy_16_##x, \ + MC_avg_o_8_##x, MC_avg_x_8_##x, MC_avg_y_8_##x, MC_avg_xy_8_##x} \ +}; + +extern mpeg2_mc_t mpeg2_mc; +extern mpeg2_mc_t mpeg2_mc_c; +extern mpeg2_mc_t mpeg2_mc_mmx; +extern mpeg2_mc_t mpeg2_mc_mmxext; +extern mpeg2_mc_t mpeg2_mc_3dnow; +extern mpeg2_mc_t mpeg2_mc_altivec; +extern mpeg2_mc_t mpeg2_mc_mlib; +extern mpeg2_mc_t mpeg2_mc_vis; + +/* slice.c */ +void mpeg2_slice (picture_t * picture, int code, uint8_t * buffer); + +/* stats.c */ +void mpeg2_stats (int code, uint8_t * buffer); + + +#endif diff --git a/src/video_dec/libmpeg2/slice.c b/src/video_dec/libmpeg2/slice.c new file mode 100644 index 000000000..8247a9a24 --- /dev/null +++ b/src/video_dec/libmpeg2/slice.c @@ -0,0 +1,1833 @@ +/* + * slice.c + * Copyright (C) 2000-2002 Michel Lespinasse + * Copyright (C) 1999-2000 Aaron Holtzman + * + * This file is part of mpeg2dec, a free MPEG-2 video stream decoder. + * See http://libmpeg2.sourceforge.net/ for updates. + * + * mpeg2dec is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * mpeg2dec is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +#include "config.h" + +#include + +#include +#include +#include "mpeg2_internal.h" +#include + +#include "vlc.h" + +static const int non_linear_quantizer_scale [] = { + 0, 1, 2, 3, 4, 5, 6, 7, + 8, 10, 12, 14, 16, 18, 20, 22, + 24, 28, 32, 36, 40, 44, 48, 52, + 56, 64, 72, 80, 88, 96, 104, 112 +}; + +static inline int get_macroblock_modes (picture_t * picture) +{ +#define bit_buf (picture->bitstream_buf) +#define bits (picture->bitstream_bits) +#define bit_ptr (picture->bitstream_ptr) + int macroblock_modes; + const MBtab * tab; + + switch (picture->picture_coding_type) { + case I_TYPE: + + tab = MB_I + UBITS (bit_buf, 1); + DUMPBITS (bit_buf, bits, tab->len); + macroblock_modes = tab->modes; + + if ((! (picture->frame_pred_frame_dct)) && + (picture->picture_structure == FRAME_PICTURE)) { + macroblock_modes |= UBITS (bit_buf, 1) * DCT_TYPE_INTERLACED; + DUMPBITS (bit_buf, bits, 1); + } + + return macroblock_modes; + + case P_TYPE: + + tab = MB_P + UBITS (bit_buf, 5); + DUMPBITS (bit_buf, bits, tab->len); + macroblock_modes = tab->modes; + + if (picture->picture_structure != FRAME_PICTURE) { + if (macroblock_modes & MACROBLOCK_MOTION_FORWARD) { + macroblock_modes |= UBITS (bit_buf, 2) * MOTION_TYPE_BASE; + DUMPBITS (bit_buf, bits, 2); + } + return macroblock_modes; + } else if (picture->frame_pred_frame_dct) { + if (macroblock_modes & MACROBLOCK_MOTION_FORWARD) + macroblock_modes |= MC_FRAME; + return macroblock_modes; + } else { + if (macroblock_modes & MACROBLOCK_MOTION_FORWARD) { + macroblock_modes |= UBITS (bit_buf, 2) * MOTION_TYPE_BASE; + DUMPBITS (bit_buf, bits, 2); + } + if (macroblock_modes & (MACROBLOCK_INTRA | MACROBLOCK_PATTERN)) { + macroblock_modes |= UBITS (bit_buf, 1) * DCT_TYPE_INTERLACED; + DUMPBITS (bit_buf, bits, 1); + } + return macroblock_modes; + } + + case B_TYPE: + + tab = MB_B + UBITS (bit_buf, 6); + DUMPBITS (bit_buf, bits, tab->len); + macroblock_modes = tab->modes; + + if (picture->picture_structure != FRAME_PICTURE) { + if (! (macroblock_modes & MACROBLOCK_INTRA)) { + macroblock_modes |= UBITS (bit_buf, 2) * MOTION_TYPE_BASE; + DUMPBITS (bit_buf, bits, 2); + } + return macroblock_modes; + } else if (picture->frame_pred_frame_dct) { + /* if (! (macroblock_modes & MACROBLOCK_INTRA)) */ + macroblock_modes |= MC_FRAME; + return macroblock_modes; + } else { + if (macroblock_modes & MACROBLOCK_INTRA) + goto intra; + macroblock_modes |= UBITS (bit_buf, 2) * MOTION_TYPE_BASE; + DUMPBITS (bit_buf, bits, 2); + if (macroblock_modes & (MACROBLOCK_INTRA | MACROBLOCK_PATTERN)) { + intra: + macroblock_modes |= UBITS (bit_buf, 1) * DCT_TYPE_INTERLACED; + DUMPBITS (bit_buf, bits, 1); + } + return macroblock_modes; + } + + case D_TYPE: + + DUMPBITS (bit_buf, bits, 1); + return MACROBLOCK_INTRA; + + default: + return 0; + } +#undef bit_buf +#undef bits +#undef bit_ptr +} + +static inline int get_quantizer_scale (picture_t * picture) +{ +#define bit_buf (picture->bitstream_buf) +#define bits (picture->bitstream_bits) +#define bit_ptr (picture->bitstream_ptr) + + int quantizer_scale_code; + + quantizer_scale_code = UBITS (bit_buf, 5); + DUMPBITS (bit_buf, bits, 5); + + if (picture->q_scale_type) + return non_linear_quantizer_scale [quantizer_scale_code]; + else + return quantizer_scale_code << 1; +#undef bit_buf +#undef bits +#undef bit_ptr +} + +static inline int get_motion_delta (picture_t * picture, int f_code) +{ +#define bit_buf (picture->bitstream_buf) +#define bits (picture->bitstream_bits) +#define bit_ptr (picture->bitstream_ptr) + + int delta; + int sign; + const MVtab * tab; + + if (bit_buf & 0x80000000) { + DUMPBITS (bit_buf, bits, 1); + return 0; + } else if (bit_buf >= 0x0c000000) { + + tab = MV_4 + UBITS (bit_buf, 4); + delta = (tab->delta << f_code) + 1; + bits += tab->len + f_code + 1; + bit_buf <<= tab->len; + + sign = SBITS (bit_buf, 1); + bit_buf <<= 1; + + if (f_code) + delta += UBITS (bit_buf, f_code); + bit_buf <<= f_code; + + return (delta ^ sign) - sign; + + } else { + + tab = MV_10 + UBITS (bit_buf, 10); + delta = (tab->delta << f_code) + 1; + bits += tab->len + 1; + bit_buf <<= tab->len; + + sign = SBITS (bit_buf, 1); + bit_buf <<= 1; + + if (f_code) { + NEEDBITS (bit_buf, bits, bit_ptr); + delta += UBITS (bit_buf, f_code); + DUMPBITS (bit_buf, bits, f_code); + } + + return (delta ^ sign) - sign; + + } +#undef bit_buf +#undef bits +#undef bit_ptr +} + +static inline int bound_motion_vector (int vec, int f_code) +{ +#if 1 + unsigned int limit; + int sign; + + limit = 16 << f_code; + + if ((unsigned int)(vec + limit) < 2 * limit) + return vec; + else { + sign = ((int32_t)vec) >> 31; + return vec - ((2 * limit) ^ sign) + sign; + } +#else + return ((int32_t)vector << (27 - f_code)) >> (27 - f_code); +#endif +} + +static inline int get_dmv (picture_t * picture) +{ +#define bit_buf (picture->bitstream_buf) +#define bits (picture->bitstream_bits) +#define bit_ptr (picture->bitstream_ptr) + + const DMVtab * tab; + + tab = DMV_2 + UBITS (bit_buf, 2); + DUMPBITS (bit_buf, bits, tab->len); + return tab->dmv; +#undef bit_buf +#undef bits +#undef bit_ptr +} + +static inline int get_coded_block_pattern (picture_t * picture) +{ +#define bit_buf (picture->bitstream_buf) +#define bits (picture->bitstream_bits) +#define bit_ptr (picture->bitstream_ptr) + + const CBPtab * tab; + + NEEDBITS (bit_buf, bits, bit_ptr); + + if (bit_buf >= 0x20000000) { + + tab = CBP_7 + (UBITS (bit_buf, 7) - 16); + DUMPBITS (bit_buf, bits, tab->len); + return tab->cbp; + + } else { + + tab = CBP_9 + UBITS (bit_buf, 9); + DUMPBITS (bit_buf, bits, tab->len); + return tab->cbp; + } + +#undef bit_buf +#undef bits +#undef bit_ptr +} + +static inline int get_luma_dc_dct_diff (picture_t * picture) +{ +#define bit_buf (picture->bitstream_buf) +#define bits (picture->bitstream_bits) +#define bit_ptr (picture->bitstream_ptr) + const DCtab * tab; + int size; + int dc_diff; + + if (bit_buf < 0xf8000000) { + tab = DC_lum_5 + UBITS (bit_buf, 5); + size = tab->size; + if (size) { + bits += tab->len + size; + bit_buf <<= tab->len; + dc_diff = + UBITS (bit_buf, size) - UBITS (SBITS (~bit_buf, 1), size); + bit_buf <<= size; + return dc_diff; + } else { + DUMPBITS (bit_buf, bits, 3); + return 0; + } + } else { + tab = DC_long + (UBITS (bit_buf, 9) - 0x1e0); + size = tab->size; + DUMPBITS (bit_buf, bits, tab->len); + NEEDBITS (bit_buf, bits, bit_ptr); + dc_diff = UBITS (bit_buf, size) - UBITS (SBITS (~bit_buf, 1), size); + DUMPBITS (bit_buf, bits, size); + return dc_diff; + } +#undef bit_buf +#undef bits +#undef bit_ptr +} + +static inline int get_chroma_dc_dct_diff (picture_t * picture) +{ +#define bit_buf (picture->bitstream_buf) +#define bits (picture->bitstream_bits) +#define bit_ptr (picture->bitstream_ptr) + const DCtab * tab; + int size; + int dc_diff; + + if (bit_buf < 0xf8000000) { + tab = DC_chrom_5 + UBITS (bit_buf, 5); + size = tab->size; + if (size) { + bits += tab->len + size; + bit_buf <<= tab->len; + dc_diff = + UBITS (bit_buf, size) - UBITS (SBITS (~bit_buf, 1), size); + bit_buf <<= size; + return dc_diff; + } else { + DUMPBITS (bit_buf, bits, 2); + return 0; + } + } else { + tab = DC_long + (UBITS (bit_buf, 10) - 0x3e0); + size = tab->size; + DUMPBITS (bit_buf, bits, tab->len + 1); + NEEDBITS (bit_buf, bits, bit_ptr); + dc_diff = UBITS (bit_buf, size) - UBITS (SBITS (~bit_buf, 1), size); + DUMPBITS (bit_buf, bits, size); + return dc_diff; + } +#undef bit_buf +#undef bits +#undef bit_ptr +} + +#define SATURATE(val) \ +do { \ + if ((uint32_t)(val + 2048) > 4095) \ + val = (val > 0) ? 2047 : -2048; \ +} while (0) + +static void get_intra_block_B14 (picture_t * picture) +{ + int i; + int j; + int val; + uint8_t * scan = picture->scan; + uint8_t * quant_matrix = picture->intra_quantizer_matrix; + int quantizer_scale = picture->quantizer_scale; + int mismatch; + const DCTtab * tab; + uint32_t bit_buf; + int bits; + uint8_t * bit_ptr; + int16_t * dest; + + dest = picture->DCTblock; + i = 0; + mismatch = ~dest[0]; + + bit_buf = picture->bitstream_buf; + bits = picture->bitstream_bits; + bit_ptr = picture->bitstream_ptr; + + NEEDBITS (bit_buf, bits, bit_ptr); + + while (1) { + if (bit_buf >= 0x28000000) { + + tab = DCT_B14AC_5 + (UBITS (bit_buf, 5) - 5); + + i += tab->run; + if (i >= 64) + break; /* end of block */ + + normal_code: + j = scan[i]; + bit_buf <<= tab->len; + bits += tab->len + 1; + val = (tab->level * quantizer_scale * quant_matrix[j]) >> 4; + + /* if (bitstream_get (1)) val = -val; */ + val = (val ^ SBITS (bit_buf, 1)) - SBITS (bit_buf, 1); + + SATURATE (val); + dest[j] = val; + mismatch ^= val; + + bit_buf <<= 1; + NEEDBITS (bit_buf, bits, bit_ptr); + + continue; + + } else if (bit_buf >= 0x04000000) { + + tab = DCT_B14_8 + (UBITS (bit_buf, 8) - 4); + + i += tab->run; + if (i < 64) + goto normal_code; + + /* escape code */ + + i += UBITS (bit_buf << 6, 6) - 64; + if (i >= 64) + break; /* illegal, check needed to avoid buffer overflow */ + + j = scan[i]; + + DUMPBITS (bit_buf, bits, 12); + NEEDBITS (bit_buf, bits, bit_ptr); + val = (SBITS (bit_buf, 12) * + quantizer_scale * quant_matrix[j]) / 16; + + SATURATE (val); + dest[j] = val; + mismatch ^= val; + + DUMPBITS (bit_buf, bits, 12); + NEEDBITS (bit_buf, bits, bit_ptr); + + continue; + + } else if (bit_buf >= 0x02000000) { + tab = DCT_B14_10 + (UBITS (bit_buf, 10) - 8); + i += tab->run; + if (i < 64) + goto normal_code; + } else if (bit_buf >= 0x00800000) { + tab = DCT_13 + (UBITS (bit_buf, 13) - 16); + i += tab->run; + if (i < 64) + goto normal_code; + } else if (bit_buf >= 0x00200000) { + tab = DCT_15 + (UBITS (bit_buf, 15) - 16); + i += tab->run; + if (i < 64) + goto normal_code; + } else { + tab = DCT_16 + UBITS (bit_buf, 16); + bit_buf <<= 16; + GETWORD (bit_buf, bits + 16, bit_ptr); + i += tab->run; + if (i < 64) + goto normal_code; + } + break; /* illegal, check needed to avoid buffer overflow */ + } + dest[63] ^= mismatch & 1; + DUMPBITS (bit_buf, bits, 2); /* dump end of block code */ + picture->bitstream_buf = bit_buf; + picture->bitstream_bits = bits; + picture->bitstream_ptr = bit_ptr; +} + +static void get_intra_block_B15 (picture_t * picture) +{ + int i; + int j; + int val; + uint8_t * scan = picture->scan; + uint8_t * quant_matrix = picture->intra_quantizer_matrix; + int quantizer_scale = picture->quantizer_scale; + int mismatch; + const DCTtab * tab; + uint32_t bit_buf; + int bits; + uint8_t * bit_ptr; + int16_t * dest; + + dest = picture->DCTblock; + i = 0; + mismatch = ~dest[0]; + + bit_buf = picture->bitstream_buf; + bits = picture->bitstream_bits; + bit_ptr = picture->bitstream_ptr; + + NEEDBITS (bit_buf, bits, bit_ptr); + + while (1) { + if (bit_buf >= 0x04000000) { + + tab = DCT_B15_8 + (UBITS (bit_buf, 8) - 4); + + i += tab->run; + if (i < 64) { + + normal_code: + j = scan[i]; + bit_buf <<= tab->len; + bits += tab->len + 1; + val = (tab->level * quantizer_scale * quant_matrix[j]) >> 4; + + /* if (bitstream_get (1)) val = -val; */ + val = (val ^ SBITS (bit_buf, 1)) - SBITS (bit_buf, 1); + + SATURATE (val); + dest[j] = val; + mismatch ^= val; + + bit_buf <<= 1; + NEEDBITS (bit_buf, bits, bit_ptr); + + continue; + + } else { + + /* end of block. I commented out this code because if we */ + /* dont exit here we will still exit at the later test :) */ + + /* if (i >= 128) break; */ /* end of block */ + + /* escape code */ + + i += UBITS (bit_buf << 6, 6) - 64; + if (i >= 64) + break; /* illegal, check against buffer overflow */ + + j = scan[i]; + + DUMPBITS (bit_buf, bits, 12); + NEEDBITS (bit_buf, bits, bit_ptr); + val = (SBITS (bit_buf, 12) * + quantizer_scale * quant_matrix[j]) / 16; + + SATURATE (val); + dest[j] = val; + mismatch ^= val; + + DUMPBITS (bit_buf, bits, 12); + NEEDBITS (bit_buf, bits, bit_ptr); + + continue; + + } + } else if (bit_buf >= 0x02000000) { + tab = DCT_B15_10 + (UBITS (bit_buf, 10) - 8); + i += tab->run; + if (i < 64) + goto normal_code; + } else if (bit_buf >= 0x00800000) { + tab = DCT_13 + (UBITS (bit_buf, 13) - 16); + i += tab->run; + if (i < 64) + goto normal_code; + } else if (bit_buf >= 0x00200000) { + tab = DCT_15 + (UBITS (bit_buf, 15) - 16); + i += tab->run; + if (i < 64) + goto normal_code; + } else { + tab = DCT_16 + UBITS (bit_buf, 16); + bit_buf <<= 16; + GETWORD (bit_buf, bits + 16, bit_ptr); + i += tab->run; + if (i < 64) + goto normal_code; + } + break; /* illegal, check needed to avoid buffer overflow */ + } + dest[63] ^= mismatch & 1; + DUMPBITS (bit_buf, bits, 4); /* dump end of block code */ + picture->bitstream_buf = bit_buf; + picture->bitstream_bits = bits; + picture->bitstream_ptr = bit_ptr; +} + +static void get_non_intra_block (picture_t * picture) +{ + int i; + int j; + int val; + uint8_t * scan = picture->scan; + uint8_t * quant_matrix = picture->non_intra_quantizer_matrix; + int quantizer_scale = picture->quantizer_scale; + int mismatch; + const DCTtab * tab; + uint32_t bit_buf; + int bits; + uint8_t * bit_ptr; + int16_t * dest; + + i = -1; + mismatch = 1; + dest = picture->DCTblock; + + bit_buf = picture->bitstream_buf; + bits = picture->bitstream_bits; + bit_ptr = picture->bitstream_ptr; + + NEEDBITS (bit_buf, bits, bit_ptr); + if (bit_buf >= 0x28000000) { + tab = DCT_B14DC_5 + (UBITS (bit_buf, 5) - 5); + goto entry_1; + } else + goto entry_2; + + while (1) { + if (bit_buf >= 0x28000000) { + + tab = DCT_B14AC_5 + (UBITS (bit_buf, 5) - 5); + + entry_1: + i += tab->run; + if (i >= 64) + break; /* end of block */ + + normal_code: + j = scan[i]; + bit_buf <<= tab->len; + bits += tab->len + 1; + val = ((2*tab->level+1) * quantizer_scale * quant_matrix[j]) >> 5; + + /* if (bitstream_get (1)) val = -val; */ + val = (val ^ SBITS (bit_buf, 1)) - SBITS (bit_buf, 1); + + SATURATE (val); + dest[j] = val; + mismatch ^= val; + + bit_buf <<= 1; + NEEDBITS (bit_buf, bits, bit_ptr); + + continue; + + } + + entry_2: + if (bit_buf >= 0x04000000) { + + tab = DCT_B14_8 + (UBITS (bit_buf, 8) - 4); + + i += tab->run; + if (i < 64) + goto normal_code; + + /* escape code */ + + i += UBITS (bit_buf << 6, 6) - 64; + if (i >= 64) + break; /* illegal, check needed to avoid buffer overflow */ + + j = scan[i]; + + DUMPBITS (bit_buf, bits, 12); + NEEDBITS (bit_buf, bits, bit_ptr); + val = 2 * (SBITS (bit_buf, 12) + SBITS (bit_buf, 1)) + 1; + val = (val * quantizer_scale * quant_matrix[j]) / 32; + + SATURATE (val); + dest[j] = val; + mismatch ^= val; + + DUMPBITS (bit_buf, bits, 12); + NEEDBITS (bit_buf, bits, bit_ptr); + + continue; + + } else if (bit_buf >= 0x02000000) { + tab = DCT_B14_10 + (UBITS (bit_buf, 10) - 8); + i += tab->run; + if (i < 64) + goto normal_code; + } else if (bit_buf >= 0x00800000) { + tab = DCT_13 + (UBITS (bit_buf, 13) - 16); + i += tab->run; + if (i < 64) + goto normal_code; + } else if (bit_buf >= 0x00200000) { + tab = DCT_15 + (UBITS (bit_buf, 15) - 16); + i += tab->run; + if (i < 64) + goto normal_code; + } else { + tab = DCT_16 + UBITS (bit_buf, 16); + bit_buf <<= 16; + GETWORD (bit_buf, bits + 16, bit_ptr); + i += tab->run; + if (i < 64) + goto normal_code; + } + break; /* illegal, check needed to avoid buffer overflow */ + } + dest[63] ^= mismatch & 1; + DUMPBITS (bit_buf, bits, 2); /* dump end of block code */ + picture->bitstream_buf = bit_buf; + picture->bitstream_bits = bits; + picture->bitstream_ptr = bit_ptr; +} + +static void get_mpeg1_intra_block (picture_t * picture) +{ + int i; + int j; + int val; + uint8_t * scan = picture->scan; + uint8_t * quant_matrix = picture->intra_quantizer_matrix; + int quantizer_scale = picture->quantizer_scale; + const DCTtab * tab; + uint32_t bit_buf; + int bits; + uint8_t * bit_ptr; + int16_t * dest; + + i = 0; + dest = picture->DCTblock; + + bit_buf = picture->bitstream_buf; + bits = picture->bitstream_bits; + bit_ptr = picture->bitstream_ptr; + + NEEDBITS (bit_buf, bits, bit_ptr); + + while (1) { + if (bit_buf >= 0x28000000) { + + tab = DCT_B14AC_5 + (UBITS (bit_buf, 5) - 5); + + i += tab->run; + if (i >= 64) + break; /* end of block */ + + normal_code: + j = scan[i]; + bit_buf <<= tab->len; + bits += tab->len + 1; + val = (tab->level * quantizer_scale * quant_matrix[j]) >> 4; + + /* oddification */ + val = (val - 1) | 1; + + /* if (bitstream_get (1)) val = -val; */ + val = (val ^ SBITS (bit_buf, 1)) - SBITS (bit_buf, 1); + + SATURATE (val); + dest[j] = val; + + bit_buf <<= 1; + NEEDBITS (bit_buf, bits, bit_ptr); + + continue; + + } else if (bit_buf >= 0x04000000) { + + tab = DCT_B14_8 + (UBITS (bit_buf, 8) - 4); + + i += tab->run; + if (i < 64) + goto normal_code; + + /* escape code */ + + i += UBITS (bit_buf << 6, 6) - 64; + if (i >= 64) + break; /* illegal, check needed to avoid buffer overflow */ + + j = scan[i]; + + DUMPBITS (bit_buf, bits, 12); + NEEDBITS (bit_buf, bits, bit_ptr); + val = SBITS (bit_buf, 8); + if (! (val & 0x7f)) { + DUMPBITS (bit_buf, bits, 8); + val = UBITS (bit_buf, 8) + 2 * val; + } + val = (val * quantizer_scale * quant_matrix[j]) / 16; + + /* oddification */ + val = (val + ~SBITS (val, 1)) | 1; + + SATURATE (val); + dest[j] = val; + + DUMPBITS (bit_buf, bits, 8); + NEEDBITS (bit_buf, bits, bit_ptr); + + continue; + + } else if (bit_buf >= 0x02000000) { + tab = DCT_B14_10 + (UBITS (bit_buf, 10) - 8); + i += tab->run; + if (i < 64) + goto normal_code; + } else if (bit_buf >= 0x00800000) { + tab = DCT_13 + (UBITS (bit_buf, 13) - 16); + i += tab->run; + if (i < 64) + goto normal_code; + } else if (bit_buf >= 0x00200000) { + tab = DCT_15 + (UBITS (bit_buf, 15) - 16); + i += tab->run; + if (i < 64) + goto normal_code; + } else { + tab = DCT_16 + UBITS (bit_buf, 16); + bit_buf <<= 16; + GETWORD (bit_buf, bits + 16, bit_ptr); + i += tab->run; + if (i < 64) + goto normal_code; + } + break; /* illegal, check needed to avoid buffer overflow */ + } + DUMPBITS (bit_buf, bits, 2); /* dump end of block code */ + picture->bitstream_buf = bit_buf; + picture->bitstream_bits = bits; + picture->bitstream_ptr = bit_ptr; +} + +static void get_mpeg1_non_intra_block (picture_t * picture) +{ + int i; + int j; + int val; + uint8_t * scan = picture->scan; + uint8_t * quant_matrix = picture->non_intra_quantizer_matrix; + int quantizer_scale = picture->quantizer_scale; + const DCTtab * tab; + uint32_t bit_buf; + int bits; + uint8_t * bit_ptr; + int16_t * dest; + + i = -1; + dest = picture->DCTblock; + + bit_buf = picture->bitstream_buf; + bits = picture->bitstream_bits; + bit_ptr = picture->bitstream_ptr; + + NEEDBITS (bit_buf, bits, bit_ptr); + if (bit_buf >= 0x28000000) { + tab = DCT_B14DC_5 + (UBITS (bit_buf, 5) - 5); + goto entry_1; + } else + goto entry_2; + + while (1) { + if (bit_buf >= 0x28000000) { + + tab = DCT_B14AC_5 + (UBITS (bit_buf, 5) - 5); + + entry_1: + i += tab->run; + if (i >= 64) + break; /* end of block */ + + normal_code: + j = scan[i]; + bit_buf <<= tab->len; + bits += tab->len + 1; + val = ((2*tab->level+1) * quantizer_scale * quant_matrix[j]) >> 5; + + /* oddification */ + val = (val - 1) | 1; + + /* if (bitstream_get (1)) val = -val; */ + val = (val ^ SBITS (bit_buf, 1)) - SBITS (bit_buf, 1); + + SATURATE (val); + dest[j] = val; + + bit_buf <<= 1; + NEEDBITS (bit_buf, bits, bit_ptr); + + continue; + + } + + entry_2: + if (bit_buf >= 0x04000000) { + + tab = DCT_B14_8 + (UBITS (bit_buf, 8) - 4); + + i += tab->run; + if (i < 64) + goto normal_code; + + /* escape code */ + + i += UBITS (bit_buf << 6, 6) - 64; + if (i >= 64) + break; /* illegal, check needed to avoid buffer overflow */ + + j = scan[i]; + + DUMPBITS (bit_buf, bits, 12); + NEEDBITS (bit_buf, bits, bit_ptr); + val = SBITS (bit_buf, 8); + if (! (val & 0x7f)) { + DUMPBITS (bit_buf, bits, 8); + val = UBITS (bit_buf, 8) + 2 * val; + } + val = 2 * (val + SBITS (val, 1)) + 1; + val = (val * quantizer_scale * quant_matrix[j]) / 32; + + /* oddification */ + val = (val + ~SBITS (val, 1)) | 1; + + SATURATE (val); + dest[j] = val; + + DUMPBITS (bit_buf, bits, 8); + NEEDBITS (bit_buf, bits, bit_ptr); + + continue; + + } else if (bit_buf >= 0x02000000) { + tab = DCT_B14_10 + (UBITS (bit_buf, 10) - 8); + i += tab->run; + if (i < 64) + goto normal_code; + } else if (bit_buf >= 0x00800000) { + tab = DCT_13 + (UBITS (bit_buf, 13) - 16); + i += tab->run; + if (i < 64) + goto normal_code; + } else if (bit_buf >= 0x00200000) { + tab = DCT_15 + (UBITS (bit_buf, 15) - 16); + i += tab->run; + if (i < 64) + goto normal_code; + } else { + tab = DCT_16 + UBITS (bit_buf, 16); + bit_buf <<= 16; + GETWORD (bit_buf, bits + 16, bit_ptr); + i += tab->run; + if (i < 64) + goto normal_code; + } + break; /* illegal, check needed to avoid buffer overflow */ + } + DUMPBITS (bit_buf, bits, 2); /* dump end of block code */ + picture->bitstream_buf = bit_buf; + picture->bitstream_bits = bits; + picture->bitstream_ptr = bit_ptr; +} + +static inline void slice_intra_DCT (picture_t * picture, int cc, + uint8_t * dest, int stride) +{ +#define bit_buf (picture->bitstream_buf) +#define bits (picture->bitstream_bits) +#define bit_ptr (picture->bitstream_ptr) + NEEDBITS (bit_buf, bits, bit_ptr); + /* Get the intra DC coefficient and inverse quantize it */ + if (cc == 0) + picture->dc_dct_pred[0] += get_luma_dc_dct_diff (picture); + else + picture->dc_dct_pred[cc] += get_chroma_dc_dct_diff (picture); + picture->DCTblock[0] = + picture->dc_dct_pred[cc] << (3 - picture->intra_dc_precision); + + if (picture->mpeg1) { + if (picture->picture_coding_type != D_TYPE) + get_mpeg1_intra_block (picture); + } else if (picture->intra_vlc_format) + get_intra_block_B15 (picture); + else + get_intra_block_B14 (picture); + mpeg2_idct_copy (picture->DCTblock, dest, stride); +#undef bit_buf +#undef bits +#undef bit_ptr +} + +static inline void slice_non_intra_DCT (picture_t * picture, uint8_t * dest, + int stride) +{ + if (picture->mpeg1) + get_mpeg1_non_intra_block (picture); + else + get_non_intra_block (picture); + mpeg2_idct_add (picture->DCTblock, dest, stride); +} + +#define MOTION(table,ref,motion_x,motion_y,size,y) \ + pos_x = 2 * picture->offset + motion_x; \ + pos_y = 2 * picture->v_offset + motion_y + 2 * y; \ + if (pos_x > picture->limit_x) { \ + pos_x = ((int)pos_x < 0) ? 0 : picture->limit_x; \ + motion_x = pos_x - 2 * picture->offset; \ + } \ + if (pos_y > picture->limit_y_ ## size){ \ + pos_y = ((int)pos_y < 0) ? 0 : picture->limit_y_ ## size; \ + motion_y = pos_y - 2 * picture->v_offset - 2 * y; \ + } \ + xy_half = ((pos_y & 1) << 1) | (pos_x & 1); \ + table[xy_half] (picture->dest[0] + y * picture->pitches[0] + \ + picture->offset, ref[0] + (pos_x >> 1) + \ + (pos_y >> 1) * picture->pitches[0], picture->pitches[0], \ + size); \ + motion_x /= 2; motion_y /= 2; \ + xy_half = ((motion_y & 1) << 1) | (motion_x & 1); \ + table[4+xy_half] (picture->dest[1] + y/2 * picture->pitches[1] + \ + (picture->offset >> 1), ref[1] + \ + (((picture->offset + motion_x) >> 1) + \ + ((((picture->v_offset + motion_y) >> 1) + y/2) * \ + picture->pitches[1])), picture->pitches[1], size/2); \ + table[4+xy_half] (picture->dest[2] + y/2 * picture->pitches[2] + \ + (picture->offset >> 1), ref[2] + \ + (((picture->offset + motion_x) >> 1) + \ + ((((picture->v_offset + motion_y) >> 1) + y/2) * \ + picture->pitches[2])), picture->pitches[2], size/2) \ + +#define MOTION_FIELD(table,ref,motion_x,motion_y,dest_field,op,src_field) \ + pos_x = 2 * picture->offset + motion_x; \ + pos_y = picture->v_offset + motion_y; \ + if (pos_x > picture->limit_x) { \ + pos_x = ((int)pos_x < 0) ? 0 : picture->limit_x; \ + motion_x = pos_x - 2 * picture->offset; \ + } \ + if (pos_y > picture->limit_y){ \ + pos_y = ((int)pos_y < 0) ? 0 : picture->limit_y; \ + motion_y = pos_y - picture->v_offset; \ + } \ + xy_half = ((pos_y & 1) << 1) | (pos_x & 1); \ + table[xy_half] (picture->dest[0] + dest_field * picture->pitches[0] + \ + picture->offset, \ + (ref[0] + (pos_x >> 1) + \ + ((pos_y op) + src_field) * picture->pitches[0]), \ + 2 * picture->pitches[0], 8); \ + motion_x /= 2; motion_y /= 2; \ + xy_half = ((motion_y & 1) << 1) | (motion_x & 1); \ + table[4+xy_half] (picture->dest[1] + dest_field * picture->pitches[1] + \ + (picture->offset >> 1), ref[1] + \ + (((picture->offset + motion_x) >> 1) + \ + (((picture->v_offset >> 1) + \ + (motion_y op) + src_field) * picture->pitches[1])), \ + 2 * picture->pitches[1], 4); \ + table[4+xy_half] (picture->dest[2] + dest_field * picture->pitches[2] + \ + (picture->offset >> 1), ref[2] + \ + (((picture->offset + motion_x) >> 1) + \ + (((picture->v_offset >> 1) + \ + (motion_y op) + src_field) * picture->pitches[2])), \ + 2 * picture->pitches[2], 4) + +static void motion_mp1 (picture_t * picture, motion_t * motion, + void (** table) (uint8_t *, uint8_t *, int, int)) +{ +#define bit_buf (picture->bitstream_buf) +#define bits (picture->bitstream_bits) +#define bit_ptr (picture->bitstream_ptr) + int motion_x, motion_y; + unsigned int pos_x, pos_y, xy_half; + + NEEDBITS (bit_buf, bits, bit_ptr); + motion_x = (motion->pmv[0][0] + + (get_motion_delta (picture, + motion->f_code[0]) << motion->f_code[1])); + motion_x = bound_motion_vector (motion_x, + motion->f_code[0] + motion->f_code[1]); + motion->pmv[0][0] = motion_x; + + NEEDBITS (bit_buf, bits, bit_ptr); + motion_y = (motion->pmv[0][1] + + (get_motion_delta (picture, + motion->f_code[0]) << motion->f_code[1])); + motion_y = bound_motion_vector (motion_y, + motion->f_code[0] + motion->f_code[1]); + motion->pmv[0][1] = motion_y; + + MOTION (table, motion->ref[0], motion_x, motion_y, 16, 0); +#undef bit_buf +#undef bits +#undef bit_ptr +} + +static void motion_fr_frame (picture_t * picture, motion_t * motion, + void (** table) (uint8_t *, uint8_t *, int, int)) +{ +#define bit_buf (picture->bitstream_buf) +#define bits (picture->bitstream_bits) +#define bit_ptr (picture->bitstream_ptr) + int motion_x, motion_y; + unsigned int pos_x, pos_y, xy_half; + + NEEDBITS (bit_buf, bits, bit_ptr); + motion_x = motion->pmv[0][0] + get_motion_delta (picture, + motion->f_code[0]); + motion_x = bound_motion_vector (motion_x, motion->f_code[0]); + motion->pmv[1][0] = motion->pmv[0][0] = motion_x; + + NEEDBITS (bit_buf, bits, bit_ptr); + motion_y = motion->pmv[0][1] + get_motion_delta (picture, + motion->f_code[1]); + motion_y = bound_motion_vector (motion_y, motion->f_code[1]); + motion->pmv[1][1] = motion->pmv[0][1] = motion_y; + + MOTION (table, motion->ref[0], motion_x, motion_y, 16, 0); +#undef bit_buf +#undef bits +#undef bit_ptr +} + +static void motion_fr_field (picture_t * picture, motion_t * motion, + void (** table) (uint8_t *, uint8_t *, int, int)) +{ +#define bit_buf (picture->bitstream_buf) +#define bits (picture->bitstream_bits) +#define bit_ptr (picture->bitstream_ptr) + int motion_x, motion_y, field; + unsigned int pos_x, pos_y, xy_half; + + NEEDBITS (bit_buf, bits, bit_ptr); + field = UBITS (bit_buf, 1); + DUMPBITS (bit_buf, bits, 1); + + motion_x = motion->pmv[0][0] + get_motion_delta (picture, + motion->f_code[0]); + motion_x = bound_motion_vector (motion_x, motion->f_code[0]); + motion->pmv[0][0] = motion_x; + + NEEDBITS (bit_buf, bits, bit_ptr); + motion_y = (motion->pmv[0][1] >> 1) + get_motion_delta (picture, + motion->f_code[1]); + /* motion_y = bound_motion_vector (motion_y, motion->f_code[1]); */ + motion->pmv[0][1] = motion_y << 1; + + MOTION_FIELD (table, motion->ref[0], motion_x, motion_y, 0, & ~1, field); + + NEEDBITS (bit_buf, bits, bit_ptr); + field = UBITS (bit_buf, 1); + DUMPBITS (bit_buf, bits, 1); + + motion_x = motion->pmv[1][0] + get_motion_delta (picture, + motion->f_code[0]); + motion_x = bound_motion_vector (motion_x, motion->f_code[0]); + motion->pmv[1][0] = motion_x; + + NEEDBITS (bit_buf, bits, bit_ptr); + motion_y = (motion->pmv[1][1] >> 1) + get_motion_delta (picture, + motion->f_code[1]); + /* motion_y = bound_motion_vector (motion_y, motion->f_code[1]); */ + motion->pmv[1][1] = motion_y << 1; + + MOTION_FIELD (table, motion->ref[0], motion_x, motion_y, 1, & ~1, field); +#undef bit_buf +#undef bits +#undef bit_ptr +} + +static void motion_fr_dmv (picture_t * picture, motion_t * motion, + void (** table) (uint8_t *, uint8_t *, int, int)) +{ +#define bit_buf (picture->bitstream_buf) +#define bits (picture->bitstream_bits) +#define bit_ptr (picture->bitstream_ptr) + int motion_x, motion_y, dmv_x, dmv_y, m, other_x, other_y; + unsigned int pos_x, pos_y, xy_half, offset; + + NEEDBITS (bit_buf, bits, bit_ptr); + motion_x = motion->pmv[0][0] + get_motion_delta (picture, + motion->f_code[0]); + motion_x = bound_motion_vector (motion_x, motion->f_code[0]); + motion->pmv[1][0] = motion->pmv[0][0] = motion_x; + NEEDBITS (bit_buf, bits, bit_ptr); + dmv_x = get_dmv (picture); + + motion_y = (motion->pmv[0][1] >> 1) + get_motion_delta (picture, + motion->f_code[1]); + /* motion_y = bound_motion_vector (motion_y, motion->f_code[1]); */ + motion->pmv[1][1] = motion->pmv[0][1] = motion_y << 1; + dmv_y = get_dmv (picture); + + m = picture->top_field_first ? 1 : 3; + other_x = ((motion_x * m + (motion_x > 0)) >> 1) + dmv_x; + other_y = ((motion_y * m + (motion_y > 0)) >> 1) + dmv_y - 1; + MOTION_FIELD (mpeg2_mc.put, motion->ref[0], other_x, other_y, 0, | 1, 0); + + m = picture->top_field_first ? 3 : 1; + other_x = ((motion_x * m + (motion_x > 0)) >> 1) + dmv_x; + other_y = ((motion_y * m + (motion_y > 0)) >> 1) + dmv_y + 1; + MOTION_FIELD (mpeg2_mc.put, motion->ref[0], other_x, other_y, 1, & ~1, 0); + + pos_x = 2 * picture->offset + motion_x; + pos_y = picture->v_offset + motion_y; + if(pos_x > picture->limit_x){ + pos_x = ((int)pos_x < 0) ? 0 : picture->limit_x; + motion_x = pos_x - 2 * picture->offset; + } + if(pos_y > picture->limit_y){ + pos_y = ((int)pos_y < 0) ? 0 : picture->limit_y; + motion_y = pos_y - picture->v_offset; + } + xy_half = ((pos_y & 1) << 1) | (pos_x & 1); + offset = (pos_x >> 1) + (pos_y & ~1) * picture->pitches[0]; + mpeg2_mc.avg[xy_half] + (picture->dest[0] + picture->offset, + motion->ref[0][0] + offset, 2 * picture->pitches[0], 8); + mpeg2_mc.avg[xy_half] + (picture->dest[0] + picture->pitches[0] + picture->offset, + motion->ref[0][0] + picture->pitches[0] + offset, + 2 * picture->pitches[0], 8); + motion_x /= 2; motion_y /= 2; + xy_half = ((motion_y & 1) << 1) | (motion_x & 1); + offset = (((picture->offset + motion_x) >> 1) + + (((picture->v_offset >> 1) + (motion_y & ~1)) * + picture->pitches[1])); + mpeg2_mc.avg[4+xy_half] + (picture->dest[1] + (picture->offset >> 1), + motion->ref[0][1] + offset, 2 * picture->pitches[1], 4); + mpeg2_mc.avg[4+xy_half] + (picture->dest[1] + picture->pitches[1] + (picture->offset >> 1), + motion->ref[0][1] + picture->pitches[1] + offset, + 2 * picture->pitches[1], 4); + offset = (((picture->offset + motion_x) >> 1) + + (((picture->v_offset >> 1) + (motion_y & ~1)) * + picture->pitches[2])); + mpeg2_mc.avg[4+xy_half] + (picture->dest[2] + (picture->offset >> 1), + motion->ref[0][2] + offset, 2 * picture->pitches[2], 4); + mpeg2_mc.avg[4+xy_half] + (picture->dest[2] + picture->pitches[2] + (picture->offset >> 1), + motion->ref[0][2] + picture->pitches[2] + offset, + 2 * picture->pitches[2], 4); +#undef bit_buf +#undef bits +#undef bit_ptr +} + +static void motion_reuse (picture_t * picture, motion_t * motion, + void (** table) (uint8_t *, uint8_t *, int, int)) +{ + int motion_x, motion_y; + unsigned int pos_x, pos_y, xy_half; + + motion_x = motion->pmv[0][0]; + motion_y = motion->pmv[0][1]; + + MOTION (table, motion->ref[0], motion_x, motion_y, 16, 0); +} + +static void motion_zero (picture_t * picture, motion_t * motion, + void (** table) (uint8_t *, uint8_t *, int, int)) +{ + table[0] (picture->dest[0] + picture->offset, + (motion->ref[0][0] + picture->offset + + picture->v_offset * picture->pitches[0]), + picture->pitches[0], 16); + + table[4] (picture->dest[1] + (picture->offset >> 1), + motion->ref[0][1] + (picture->offset >> 1) + + (picture->v_offset >> 1) * picture->pitches[1], + picture->pitches[1], 8); + table[4] (picture->dest[2] + (picture->offset >> 1), + motion->ref[0][2] + (picture->offset >> 1) + + (picture->v_offset >> 1) * picture->pitches[2], + picture->pitches[2], 8); +} + +/* like motion_frame, but parsing without actual motion compensation */ +static void motion_fr_conceal (picture_t * picture) +{ +#define bit_buf (picture->bitstream_buf) +#define bits (picture->bitstream_bits) +#define bit_ptr (picture->bitstream_ptr) + int tmp; + + NEEDBITS (bit_buf, bits, bit_ptr); + tmp = (picture->f_motion.pmv[0][0] + + get_motion_delta (picture, picture->f_motion.f_code[0])); + tmp = bound_motion_vector (tmp, picture->f_motion.f_code[0]); + picture->f_motion.pmv[1][0] = picture->f_motion.pmv[0][0] = tmp; + + NEEDBITS (bit_buf, bits, bit_ptr); + tmp = (picture->f_motion.pmv[0][1] + + get_motion_delta (picture, picture->f_motion.f_code[1])); + tmp = bound_motion_vector (tmp, picture->f_motion.f_code[1]); + picture->f_motion.pmv[1][1] = picture->f_motion.pmv[0][1] = tmp; + + DUMPBITS (bit_buf, bits, 1); /* remove marker_bit */ +#undef bit_buf +#undef bits +#undef bit_ptr +} + +static void motion_fi_field (picture_t * picture, motion_t * motion, + void (** table) (uint8_t *, uint8_t *, int, int)) +{ +#define bit_buf (picture->bitstream_buf) +#define bits (picture->bitstream_bits) +#define bit_ptr (picture->bitstream_ptr) + int motion_x, motion_y; + uint8_t ** ref_field; + unsigned int pos_x, pos_y, xy_half; + + NEEDBITS (bit_buf, bits, bit_ptr); + ref_field = motion->ref2[UBITS (bit_buf, 1)]; + DUMPBITS (bit_buf, bits, 1); + + motion_x = motion->pmv[0][0] + get_motion_delta (picture, + motion->f_code[0]); + motion_x = bound_motion_vector (motion_x, motion->f_code[0]); + motion->pmv[1][0] = motion->pmv[0][0] = motion_x; + + NEEDBITS (bit_buf, bits, bit_ptr); + motion_y = motion->pmv[0][1] + get_motion_delta (picture, + motion->f_code[1]); + motion_y = bound_motion_vector (motion_y, motion->f_code[1]); + motion->pmv[1][1] = motion->pmv[0][1] = motion_y; + + MOTION (table, ref_field, motion_x, motion_y, 16, 0); +#undef bit_buf +#undef bits +#undef bit_ptr +} + +static void motion_fi_16x8 (picture_t * picture, motion_t * motion, + void (** table) (uint8_t *, uint8_t *, int, int)) +{ +#define bit_buf (picture->bitstream_buf) +#define bits (picture->bitstream_bits) +#define bit_ptr (picture->bitstream_ptr) + int motion_x, motion_y; + uint8_t ** ref_field; + unsigned int pos_x, pos_y, xy_half; + + NEEDBITS (bit_buf, bits, bit_ptr); + ref_field = motion->ref2[UBITS (bit_buf, 1)]; + DUMPBITS (bit_buf, bits, 1); + + motion_x = motion->pmv[0][0] + get_motion_delta (picture, + motion->f_code[0]); + motion_x = bound_motion_vector (motion_x, motion->f_code[0]); + motion->pmv[0][0] = motion_x; + + NEEDBITS (bit_buf, bits, bit_ptr); + motion_y = motion->pmv[0][1] + get_motion_delta (picture, + motion->f_code[1]); + motion_y = bound_motion_vector (motion_y, motion->f_code[1]); + motion->pmv[0][1] = motion_y; + + MOTION (table, ref_field, motion_x, motion_y, 8, 0); + + NEEDBITS (bit_buf, bits, bit_ptr); + ref_field = motion->ref2[UBITS (bit_buf, 1)]; + DUMPBITS (bit_buf, bits, 1); + + motion_x = motion->pmv[1][0] + get_motion_delta (picture, + motion->f_code[0]); + motion_x = bound_motion_vector (motion_x, motion->f_code[0]); + motion->pmv[1][0] = motion_x; + + NEEDBITS (bit_buf, bits, bit_ptr); + motion_y = motion->pmv[1][1] + get_motion_delta (picture, + motion->f_code[1]); + motion_y = bound_motion_vector (motion_y, motion->f_code[1]); + motion->pmv[1][1] = motion_y; + + MOTION (table, ref_field, motion_x, motion_y, 8, 8); +#undef bit_buf +#undef bits +#undef bit_ptr +} + +static void motion_fi_dmv (picture_t * picture, motion_t * motion, + void (** table) (uint8_t *, uint8_t *, int, int)) +{ +#define bit_buf (picture->bitstream_buf) +#define bits (picture->bitstream_bits) +#define bit_ptr (picture->bitstream_ptr) + int motion_x, motion_y, other_x, other_y; + unsigned int pos_x, pos_y, xy_half; + + NEEDBITS (bit_buf, bits, bit_ptr); + motion_x = motion->pmv[0][0] + get_motion_delta (picture, + motion->f_code[0]); + motion_x = bound_motion_vector (motion_x, motion->f_code[0]); + motion->pmv[1][0] = motion->pmv[0][0] = motion_x; + NEEDBITS (bit_buf, bits, bit_ptr); + other_x = ((motion_x + (motion_x > 0)) >> 1) + get_dmv (picture); + + motion_y = motion->pmv[0][1] + get_motion_delta (picture, + motion->f_code[1]); + motion_y = bound_motion_vector (motion_y, motion->f_code[1]); + motion->pmv[1][1] = motion->pmv[0][1] = motion_y; + other_y = (((motion_y + (motion_y > 0)) >> 1) + get_dmv (picture) + + picture->dmv_offset); + + MOTION (mpeg2_mc.put, motion->ref[0], motion_x, motion_y, 16, 0); + MOTION (mpeg2_mc.avg, motion->ref[1], other_x, other_y, 16, 0); +#undef bit_buf +#undef bits +#undef bit_ptr +} + +static void motion_fi_conceal (picture_t * picture) +{ +#define bit_buf (picture->bitstream_buf) +#define bits (picture->bitstream_bits) +#define bit_ptr (picture->bitstream_ptr) + int tmp; + + NEEDBITS (bit_buf, bits, bit_ptr); + DUMPBITS (bit_buf, bits, 1); /* remove field_select */ + + tmp = (picture->f_motion.pmv[0][0] + + get_motion_delta (picture, picture->f_motion.f_code[0])); + tmp = bound_motion_vector (tmp, picture->f_motion.f_code[0]); + picture->f_motion.pmv[1][0] = picture->f_motion.pmv[0][0] = tmp; + + NEEDBITS (bit_buf, bits, bit_ptr); + tmp = (picture->f_motion.pmv[0][1] + + get_motion_delta (picture, picture->f_motion.f_code[1])); + tmp = bound_motion_vector (tmp, picture->f_motion.f_code[1]); + picture->f_motion.pmv[1][1] = picture->f_motion.pmv[0][1] = tmp; + + DUMPBITS (bit_buf, bits, 1); /* remove marker_bit */ +#undef bit_buf +#undef bits +#undef bit_ptr +} + +#define MOTION_CALL(routine,direction) \ +do { \ + if ((direction) & MACROBLOCK_MOTION_FORWARD) \ + routine (picture, &(picture->f_motion), mpeg2_mc.put); \ + if ((direction) & MACROBLOCK_MOTION_BACKWARD) \ + routine (picture, &(picture->b_motion), \ + ((direction) & MACROBLOCK_MOTION_FORWARD ? \ + mpeg2_mc.avg : mpeg2_mc.put)); \ +} while (0) + +#define NEXT_MACROBLOCK \ +do { \ + picture->offset += 16; \ + if (picture->offset == picture->coded_picture_width) { \ + do { /* just so we can use the break statement */ \ + if (picture->current_frame->proc_slice) { \ + picture->current_frame->proc_slice (picture->current_frame, \ + picture->dest); \ + } \ + picture->dest[0] += 16 * picture->pitches[0]; \ + picture->dest[1] += 8 * picture->pitches[1]; \ + picture->dest[2] += 8 * picture->pitches[2]; \ + } while (0); \ + picture->v_offset += 16; \ + if (picture->v_offset > picture->limit_y) { \ + if (mpeg2_cpu_state_restore) \ + mpeg2_cpu_state_restore (&cpu_state); \ + return; \ + } \ + picture->offset = 0; \ + } \ +} while (0) + +static inline int slice_init (picture_t * picture, int code) +{ +#define bit_buf (picture->bitstream_buf) +#define bits (picture->bitstream_bits) +#define bit_ptr (picture->bitstream_ptr) + int offset, height; + struct vo_frame_s * forward_reference_frame; + struct vo_frame_s * backward_reference_frame; + const MBAtab * mba; + + offset = picture->picture_structure == BOTTOM_FIELD; + picture->pitches[0] = picture->current_frame->pitches[0]; + picture->pitches[1] = picture->current_frame->pitches[1]; + picture->pitches[2] = picture->current_frame->pitches[2]; + + if( picture->forward_reference_frame ) { + forward_reference_frame = picture->forward_reference_frame; + } + else { + /* return 1; */ + forward_reference_frame = picture->current_frame; + } + + if( picture->backward_reference_frame ) { + backward_reference_frame = picture->backward_reference_frame; + } + else { + /* return 1; */ + backward_reference_frame = picture->current_frame; + } + + picture->f_motion.ref[0][0] = + forward_reference_frame->base[0] + (offset ? picture->pitches[0] : 0); + picture->f_motion.ref[0][1] = + forward_reference_frame->base[1] + (offset ? picture->pitches[1] : 0); + picture->f_motion.ref[0][2] = + forward_reference_frame->base[2] + (offset ? picture->pitches[2] : 0); + + picture->b_motion.ref[0][0] = + backward_reference_frame->base[0] + (offset ? picture->pitches[0] : 0); + picture->b_motion.ref[0][1] = + backward_reference_frame->base[1] + (offset ? picture->pitches[1] : 0); + picture->b_motion.ref[0][2] = + backward_reference_frame->base[2] + (offset ? picture->pitches[2] : 0); + + if (picture->picture_structure != FRAME_PICTURE) { + uint8_t ** forward_ref; + int bottom_field; + + bottom_field = (picture->picture_structure == BOTTOM_FIELD); + picture->dmv_offset = bottom_field ? 1 : -1; + picture->f_motion.ref2[0] = picture->f_motion.ref[bottom_field]; + picture->f_motion.ref2[1] = picture->f_motion.ref[!bottom_field]; + picture->b_motion.ref2[0] = picture->b_motion.ref[bottom_field]; + picture->b_motion.ref2[1] = picture->b_motion.ref[!bottom_field]; + + forward_ref = forward_reference_frame->base; + if (picture->second_field && (picture->picture_coding_type != B_TYPE)) + forward_ref = picture->current_frame->base; + + picture->f_motion.ref[1][0] = forward_ref[0] + (bottom_field ? 0 : picture->pitches[0]); + picture->f_motion.ref[1][1] = forward_ref[1] + (bottom_field ? 0 : picture->pitches[1]); + picture->f_motion.ref[1][2] = forward_ref[2] + (bottom_field ? 0 : picture->pitches[2]); + + picture->b_motion.ref[1][0] = + backward_reference_frame->base[0] + (bottom_field ? 0 : picture->pitches[0]); + picture->b_motion.ref[1][1] = + backward_reference_frame->base[1] + (bottom_field ? 0 : picture->pitches[1]); + picture->b_motion.ref[1][2] = + backward_reference_frame->base[2] + (bottom_field ? 0 : picture->pitches[2]); + } + + picture->f_motion.pmv[0][0] = picture->f_motion.pmv[0][1] = 0; + picture->f_motion.pmv[1][0] = picture->f_motion.pmv[1][1] = 0; + picture->b_motion.pmv[0][0] = picture->b_motion.pmv[0][1] = 0; + picture->b_motion.pmv[1][0] = picture->b_motion.pmv[1][1] = 0; + + picture->v_offset = (code - 1) * 16; + offset = (code - 1); + if (picture->picture_structure != FRAME_PICTURE) + offset = 2 * offset; + + picture->dest[0] = picture->current_frame->base[0] + picture->pitches[0] * offset * 16; + picture->dest[1] = picture->current_frame->base[1] + picture->pitches[1] * offset * 8; + picture->dest[2] = picture->current_frame->base[2] + picture->pitches[2] * offset * 8; + + height = picture->coded_picture_height; + switch (picture->picture_structure) { + case BOTTOM_FIELD: + picture->dest[0] += picture->pitches[0]; + picture->dest[1] += picture->pitches[1]; + picture->dest[2] += picture->pitches[2]; + /* follow thru */ + case TOP_FIELD: + picture->pitches[0] <<= 1; + picture->pitches[1] <<= 1; + picture->pitches[2] <<= 1; + height >>= 1; + } + picture->limit_x = 2 * picture->coded_picture_width - 32; + picture->limit_y_16 = 2 * height - 32; + picture->limit_y_8 = 2 * height - 16; + picture->limit_y = height - 16; + + picture->dc_dct_pred[0] = picture->dc_dct_pred[1] = + picture->dc_dct_pred[2] = 1 << (picture->intra_dc_precision + 7); + + picture->quantizer_scale = get_quantizer_scale (picture); + + /* ignore intra_slice and all the extra data */ + while (bit_buf & 0x80000000) { + DUMPBITS (bit_buf, bits, 9); + NEEDBITS (bit_buf, bits, bit_ptr); + } + + /* decode initial macroblock address increment */ + offset = 0; + while (1) { + if (bit_buf >= 0x08000000) { + mba = MBA_5 + (UBITS (bit_buf, 6) - 2); + break; + } else if (bit_buf >= 0x01800000) { + mba = MBA_11 + (UBITS (bit_buf, 12) - 24); + break; + } else switch (UBITS (bit_buf, 12)) { + case 8: /* macroblock_escape */ + offset += 33; + DUMPBITS (bit_buf, bits, 11); + NEEDBITS (bit_buf, bits, bit_ptr); + continue; + case 15: /* macroblock_stuffing (MPEG1 only) */ + bit_buf &= 0xfffff; + DUMPBITS (bit_buf, bits, 11); + NEEDBITS (bit_buf, bits, bit_ptr); + continue; + default: /* error */ + return 1; + } + } + DUMPBITS (bit_buf, bits, mba->len + 1); + picture->offset = (offset + mba->mba) << 4; + + while (picture->offset - picture->coded_picture_width >= 0) { + picture->offset -= picture->coded_picture_width; + if ((picture->current_frame->proc_slice == NULL) || + (picture->picture_coding_type != B_TYPE)) { + picture->dest[0] += 16 * picture->pitches[0]; + picture->dest[1] += 8 * picture->pitches[1]; + picture->dest[2] += 8 * picture->pitches[2]; + } + picture->v_offset += 16; + } + if (picture->v_offset > picture->limit_y) + return 1; + + return 0; +#undef bit_buf +#undef bits +#undef bit_ptr +} + +void mpeg2_slice (picture_t * picture, int code, uint8_t * buffer) +{ +#define bit_buf (picture->bitstream_buf) +#define bits (picture->bitstream_bits) +#define bit_ptr (picture->bitstream_ptr) + cpu_state_t cpu_state; + + bitstream_init (picture, buffer); + + if (slice_init (picture, code)) + return; + + if (mpeg2_cpu_state_save) + mpeg2_cpu_state_save (&cpu_state); + + while (1) { + int macroblock_modes; + int mba_inc; + const MBAtab * mba; + + NEEDBITS (bit_buf, bits, bit_ptr); + + macroblock_modes = get_macroblock_modes (picture); + + /* maybe integrate MACROBLOCK_QUANT test into get_macroblock_modes ? */ + if (macroblock_modes & MACROBLOCK_QUANT) + picture->quantizer_scale = get_quantizer_scale (picture); + + if (macroblock_modes & MACROBLOCK_INTRA) { + + int DCT_offset, DCT_stride; + int offset; + uint8_t * dest_y; + + if (picture->concealment_motion_vectors) { + if (picture->picture_structure == FRAME_PICTURE) + motion_fr_conceal (picture); + else + motion_fi_conceal (picture); + } else { + picture->f_motion.pmv[0][0] = picture->f_motion.pmv[0][1] = 0; + picture->f_motion.pmv[1][0] = picture->f_motion.pmv[1][1] = 0; + picture->b_motion.pmv[0][0] = picture->b_motion.pmv[0][1] = 0; + picture->b_motion.pmv[1][0] = picture->b_motion.pmv[1][1] = 0; + } + + if (macroblock_modes & DCT_TYPE_INTERLACED) { + DCT_offset = picture->pitches[0]; + DCT_stride = picture->pitches[0] * 2; + } else { + DCT_offset = picture->pitches[0] * 8; + DCT_stride = picture->pitches[0]; + } + + offset = picture->offset; + dest_y = picture->dest[0] + offset; + slice_intra_DCT (picture, 0, dest_y, DCT_stride); + slice_intra_DCT (picture, 0, dest_y + 8, DCT_stride); + slice_intra_DCT (picture, 0, dest_y + DCT_offset, DCT_stride); + slice_intra_DCT (picture, 0, dest_y + DCT_offset + 8, DCT_stride); + slice_intra_DCT (picture, 1, picture->dest[1] + (offset >> 1), + picture->pitches[1]); + slice_intra_DCT (picture, 2, picture->dest[2] + (offset >> 1), + picture->pitches[2]); + + if (picture->picture_coding_type == D_TYPE) { + NEEDBITS (bit_buf, bits, bit_ptr); + DUMPBITS (bit_buf, bits, 1); + } + } else { + + if (picture->picture_structure == FRAME_PICTURE) + switch (macroblock_modes & MOTION_TYPE_MASK) { + case MC_FRAME: + if (picture->mpeg1) + MOTION_CALL (motion_mp1, macroblock_modes); + else + MOTION_CALL (motion_fr_frame, macroblock_modes); + break; + + case MC_FIELD: + MOTION_CALL (motion_fr_field, macroblock_modes); + break; + + case MC_DMV: + MOTION_CALL (motion_fr_dmv, MACROBLOCK_MOTION_FORWARD); + break; + + case 0: + /* non-intra mb without forward mv in a P picture */ + picture->f_motion.pmv[0][0] = 0; + picture->f_motion.pmv[0][1] = 0; + picture->f_motion.pmv[1][0] = 0; + picture->f_motion.pmv[1][1] = 0; + MOTION_CALL (motion_zero, MACROBLOCK_MOTION_FORWARD); + break; + } + else + switch (macroblock_modes & MOTION_TYPE_MASK) { + case MC_FIELD: + MOTION_CALL (motion_fi_field, macroblock_modes); + break; + + case MC_16X8: + MOTION_CALL (motion_fi_16x8, macroblock_modes); + break; + + case MC_DMV: + MOTION_CALL (motion_fi_dmv, MACROBLOCK_MOTION_FORWARD); + break; + + case 0: + /* non-intra mb without forward mv in a P picture */ + picture->f_motion.pmv[0][0] = 0; + picture->f_motion.pmv[0][1] = 0; + picture->f_motion.pmv[1][0] = 0; + picture->f_motion.pmv[1][1] = 0; + MOTION_CALL (motion_zero, MACROBLOCK_MOTION_FORWARD); + break; + } + + if (macroblock_modes & MACROBLOCK_PATTERN) { + int coded_block_pattern; + int DCT_offset, DCT_stride; + int offset; + uint8_t * dest_y; + + if (macroblock_modes & DCT_TYPE_INTERLACED) { + DCT_offset = picture->pitches[0]; + DCT_stride = picture->pitches[0] * 2; + } else { + DCT_offset = picture->pitches[0] * 8; + DCT_stride = picture->pitches[0]; + } + + coded_block_pattern = get_coded_block_pattern (picture); + + offset = picture->offset; + dest_y = picture->dest[0] + offset; + if (coded_block_pattern & 0x20) + slice_non_intra_DCT (picture, dest_y, DCT_stride); + if (coded_block_pattern & 0x10) + slice_non_intra_DCT (picture, dest_y + 8, DCT_stride); + if (coded_block_pattern & 0x08) + slice_non_intra_DCT (picture, dest_y + DCT_offset, + DCT_stride); + if (coded_block_pattern & 0x04) + slice_non_intra_DCT (picture, dest_y + DCT_offset + 8, + DCT_stride); + if (coded_block_pattern & 0x2) + slice_non_intra_DCT (picture, + picture->dest[1] + (offset >> 1), + picture->pitches[1]); + if (coded_block_pattern & 0x1) + slice_non_intra_DCT (picture, + picture->dest[2] + (offset >> 1), + picture->pitches[2]); + } + + picture->dc_dct_pred[0] = picture->dc_dct_pred[1] = + picture->dc_dct_pred[2] = 128 << picture->intra_dc_precision; + } + + NEXT_MACROBLOCK; + + NEEDBITS (bit_buf, bits, bit_ptr); + mba_inc = 0; + while (1) { + if (bit_buf >= 0x10000000) { + mba = MBA_5 + (UBITS (bit_buf, 5) - 2); + break; + } else if (bit_buf >= 0x03000000) { + mba = MBA_11 + (UBITS (bit_buf, 11) - 24); + break; + } else switch (UBITS (bit_buf, 11)) { + case 8: /* macroblock_escape */ + mba_inc += 33; + /* pass through */ + case 15: /* macroblock_stuffing (MPEG1 only) */ + DUMPBITS (bit_buf, bits, 11); + NEEDBITS (bit_buf, bits, bit_ptr); + continue; + default: /* end of slice, or error */ + if (mpeg2_cpu_state_restore) + mpeg2_cpu_state_restore (&cpu_state); + return; + } + } + DUMPBITS (bit_buf, bits, mba->len); + mba_inc += mba->mba; + + if (mba_inc) { + picture->dc_dct_pred[0] = picture->dc_dct_pred[1] = + picture->dc_dct_pred[2] = 128 << picture->intra_dc_precision; + + if (picture->picture_coding_type == P_TYPE) { + picture->f_motion.pmv[0][0] = picture->f_motion.pmv[0][1] = 0; + picture->f_motion.pmv[1][0] = picture->f_motion.pmv[1][1] = 0; + + do { + MOTION_CALL (motion_zero, MACROBLOCK_MOTION_FORWARD); + NEXT_MACROBLOCK; + } while (--mba_inc); + } else { + do { + MOTION_CALL (motion_reuse, macroblock_modes); + NEXT_MACROBLOCK; + } while (--mba_inc); + } + } + } +#undef bit_buf +#undef bits +#undef bit_ptr +} diff --git a/src/video_dec/libmpeg2/slice_xvmc.c b/src/video_dec/libmpeg2/slice_xvmc.c new file mode 100644 index 000000000..014ae7924 --- /dev/null +++ b/src/video_dec/libmpeg2/slice_xvmc.c @@ -0,0 +1,1988 @@ +/* + * slice_xvmc.c + * Copyright (C) 2000-2002 Michel Lespinasse + * Copyright (C) 1999-2000 Aaron Holtzman + * + * This file is part of mpeg2dec, a free MPEG-2 video stream decoder. + * See http://libmpeg2.sourceforge.net/ for updates. + * + * mpeg2dec is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * mpeg2dec is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +#include "config.h" + +#include +#include /* memcpy/memset, try to remove */ +#include +#include + +#include +#include +#include "mpeg2_internal.h" +#include + +#include +#include "accel_xvmc.h" +#include "xvmc.h" + + +#define MOTION_ACCEL XINE_VO_MOTION_ACCEL +#define IDCT_ACCEL XINE_VO_IDCT_ACCEL +#define SIGNED_INTRA XINE_VO_SIGNED_INTRA +#define ACCEL (MOTION_ACCEL | IDCT_ACCEL) + +#include "vlc.h" +/* original (non-patched) scan tables */ + +static const uint8_t mpeg2_scan_norm_orig[64] ATTR_ALIGN(16) = +{ + /* Zig-Zag scan pattern */ + 0, 1, 8,16, 9, 2, 3,10, + 17,24,32,25,18,11, 4, 5, + 12,19,26,33,40,48,41,34, + 27,20,13, 6, 7,14,21,28, + 35,42,49,56,57,50,43,36, + 29,22,15,23,30,37,44,51, + 58,59,52,45,38,31,39,46, + 53,60,61,54,47,55,62,63 +}; + +static const uint8_t mpeg2_scan_alt_orig[64] ATTR_ALIGN(16) = +{ + /* Alternate scan pattern */ + 0,8,16,24,1,9,2,10,17,25,32,40,48,56,57,49, + 41,33,26,18,3,11,4,12,19,27,34,42,50,58,35,43, + 51,59,20,28,5,13,6,14,21,29,36,44,52,60,37,45, + 53,61,22,30,7,15,23,31,38,46,54,62,39,47,55,63 +}; + +static uint8_t mpeg2_scan_alt_ptable[64] ATTR_ALIGN(16); +static uint8_t mpeg2_scan_norm_ptable[64] ATTR_ALIGN(16); +static uint8_t mpeg2_scan_orig_ptable[64] ATTR_ALIGN(16); + +void xvmc_setup_scan_ptable( void ) +{ + int i; + for (i=0; i<64; ++i) { + mpeg2_scan_norm_ptable[mpeg2_scan_norm_orig[i]] = mpeg2_scan_norm[i]; + mpeg2_scan_alt_ptable[mpeg2_scan_alt_orig[i]] = mpeg2_scan_alt[i]; + mpeg2_scan_orig_ptable[i] = i; + } +} + + +static const int non_linear_quantizer_scale [] = { + 0, 1, 2, 3, 4, 5, 6, 7, + 8, 10, 12, 14, 16, 18, 20, 22, + 24, 28, 32, 36, 40, 44, 48, 52, + 56, 64, 72, 80, 88, 96, 104, 112 +}; + +static inline int get_xvmc_macroblock_modes (picture_t * picture) +{ +#define bit_buf (picture->bitstream_buf) +#define bits (picture->bitstream_bits) +#define bit_ptr (picture->bitstream_ptr) + int macroblock_modes; + const MBtab * tab; + + switch (picture->picture_coding_type) { + case I_TYPE: + + tab = MB_I + UBITS (bit_buf, 1); + DUMPBITS (bit_buf, bits, tab->len); + macroblock_modes = tab->modes; + + if ((! (picture->frame_pred_frame_dct)) && + (picture->picture_structure == FRAME_PICTURE)) { + macroblock_modes |= UBITS (bit_buf, 1) * DCT_TYPE_INTERLACED; + DUMPBITS (bit_buf, bits, 1); + } + + return macroblock_modes; + + case P_TYPE: + + tab = MB_P + UBITS (bit_buf, 5); + DUMPBITS (bit_buf, bits, tab->len); + macroblock_modes = tab->modes; + + if (picture->picture_structure != FRAME_PICTURE) { + if (macroblock_modes & MACROBLOCK_MOTION_FORWARD) { + macroblock_modes |= UBITS (bit_buf, 2) * MOTION_TYPE_BASE; + DUMPBITS (bit_buf, bits, 2); + } + return macroblock_modes; + } else if (picture->frame_pred_frame_dct) { + if (macroblock_modes & MACROBLOCK_MOTION_FORWARD) + macroblock_modes |= MC_FRAME; + return macroblock_modes; + } else { + if (macroblock_modes & MACROBLOCK_MOTION_FORWARD) { + macroblock_modes |= UBITS (bit_buf, 2) * MOTION_TYPE_BASE; + DUMPBITS (bit_buf, bits, 2); + } + if (macroblock_modes & (MACROBLOCK_INTRA | MACROBLOCK_PATTERN)) { + macroblock_modes |= UBITS (bit_buf, 1) * DCT_TYPE_INTERLACED; + DUMPBITS (bit_buf, bits, 1); + } + return macroblock_modes; + } + + case B_TYPE: + + tab = MB_B + UBITS (bit_buf, 6); + DUMPBITS (bit_buf, bits, tab->len); + macroblock_modes = tab->modes; + + if (picture->picture_structure != FRAME_PICTURE) { + if (! (macroblock_modes & MACROBLOCK_INTRA)) { + macroblock_modes |= UBITS (bit_buf, 2) * MOTION_TYPE_BASE; + DUMPBITS (bit_buf, bits, 2); + } + return macroblock_modes; + } else if (picture->frame_pred_frame_dct) { + /* if (! (macroblock_modes & MACROBLOCK_INTRA)) */ + macroblock_modes |= MC_FRAME; + return macroblock_modes; + } else { + if (macroblock_modes & MACROBLOCK_INTRA) + goto intra; + macroblock_modes |= UBITS (bit_buf, 2) * MOTION_TYPE_BASE; + DUMPBITS (bit_buf, bits, 2); + if (macroblock_modes & (MACROBLOCK_INTRA | MACROBLOCK_PATTERN)) { + intra: + macroblock_modes |= UBITS (bit_buf, 1) * DCT_TYPE_INTERLACED; + DUMPBITS (bit_buf, bits, 1); + } + return macroblock_modes; + } + + case D_TYPE: + + DUMPBITS (bit_buf, bits, 1); + return MACROBLOCK_INTRA; + + default: + return 0; + } +#undef bit_buf +#undef bits +#undef bit_ptr +} + +static inline int get_xvmc_quantizer_scale (picture_t * picture) +{ +#define bit_buf (picture->bitstream_buf) +#define bits (picture->bitstream_bits) +#define bit_ptr (picture->bitstream_ptr) + + int quantizer_scale_code; + + quantizer_scale_code = UBITS (bit_buf, 5); + DUMPBITS (bit_buf, bits, 5); + + if (picture->q_scale_type) + return non_linear_quantizer_scale [quantizer_scale_code]; + else + return quantizer_scale_code << 1; +#undef bit_buf +#undef bits +#undef bit_ptr +} + +static inline int get_xvmc_motion_delta (picture_t * picture, int f_code) +{ +#define bit_buf (picture->bitstream_buf) +#define bits (picture->bitstream_bits) +#define bit_ptr (picture->bitstream_ptr) + + int delta; + int sign; + const MVtab * tab; + + if (bit_buf & 0x80000000) { + DUMPBITS (bit_buf, bits, 1); + return 0; + } else if (bit_buf >= 0x0c000000) { + + tab = MV_4 + UBITS (bit_buf, 4); + delta = (tab->delta << f_code) + 1; + bits += tab->len + f_code + 1; + bit_buf <<= tab->len; + + sign = SBITS (bit_buf, 1); + bit_buf <<= 1; + + if (f_code) + delta += UBITS (bit_buf, f_code); + bit_buf <<= f_code; + + return (delta ^ sign) - sign; + + } else { + + tab = MV_10 + UBITS (bit_buf, 10); + delta = (tab->delta << f_code) + 1; + bits += tab->len + 1; + bit_buf <<= tab->len; + + sign = SBITS (bit_buf, 1); + bit_buf <<= 1; + + if (f_code) { + NEEDBITS (bit_buf, bits, bit_ptr); + delta += UBITS (bit_buf, f_code); + DUMPBITS (bit_buf, bits, f_code); + } + + return (delta ^ sign) - sign; + + } +#undef bit_buf +#undef bits +#undef bit_ptr +} + +static inline int bound_motion_vector (int vec, int f_code) +{ +#if 1 + unsigned int limit; + int sign; + + limit = 16 << f_code; + + if ((unsigned int)(vec + limit) < 2 * limit) + return vec; + else { + sign = ((int32_t)vec) >> 31; + return vec - ((2 * limit) ^ sign) + sign; + } +#else + return ((int32_t)vec << (27 - f_code)) >> (27 - f_code); +#endif +} + +static inline int get_xvmc_dmv (picture_t * picture) +{ +#define bit_buf (picture->bitstream_buf) +#define bits (picture->bitstream_bits) +#define bit_ptr (picture->bitstream_ptr) + + const DMVtab * tab; + + tab = DMV_2 + UBITS (bit_buf, 2); + DUMPBITS (bit_buf, bits, tab->len); + return tab->dmv; +#undef bit_buf +#undef bits +#undef bit_ptr +} + +static inline int get_xvmc_coded_block_pattern (picture_t * picture) +{ +#define bit_buf (picture->bitstream_buf) +#define bits (picture->bitstream_bits) +#define bit_ptr (picture->bitstream_ptr) + + const CBPtab * tab; + + NEEDBITS (bit_buf, bits, bit_ptr); + + if (bit_buf >= 0x20000000) { + + tab = CBP_7 + (UBITS (bit_buf, 7) - 16); + DUMPBITS (bit_buf, bits, tab->len); + return tab->cbp; + + } else { + + tab = CBP_9 + UBITS (bit_buf, 9); + DUMPBITS (bit_buf, bits, tab->len); + return tab->cbp; + } + +#undef bit_buf +#undef bits +#undef bit_ptr +} + +static inline int get_xvmc_luma_dc_dct_diff (picture_t * picture) +{ +#define bit_buf (picture->bitstream_buf) +#define bits (picture->bitstream_bits) +#define bit_ptr (picture->bitstream_ptr) + const DCtab * tab; + int size; + int dc_diff; + + if (bit_buf < 0xf8000000) { + tab = DC_lum_5 + UBITS (bit_buf, 5); + size = tab->size; + if (size) { + bits += tab->len + size; + bit_buf <<= tab->len; + dc_diff = + UBITS (bit_buf, size) - UBITS (SBITS (~bit_buf, 1), size); + bit_buf <<= size; + return dc_diff; + } else { + DUMPBITS (bit_buf, bits, 3); + return 0; + } + } else { + tab = DC_long + (UBITS (bit_buf, 9) - 0x1e0); + size = tab->size; + DUMPBITS (bit_buf, bits, tab->len); + NEEDBITS (bit_buf, bits, bit_ptr); + dc_diff = UBITS (bit_buf, size) - UBITS (SBITS (~bit_buf, 1), size); + DUMPBITS (bit_buf, bits, size); + return dc_diff; + } +#undef bit_buf +#undef bits +#undef bit_ptr +} + +static inline int get_xvmc_chroma_dc_dct_diff (picture_t * picture) +{ +#define bit_buf (picture->bitstream_buf) +#define bits (picture->bitstream_bits) +#define bit_ptr (picture->bitstream_ptr) + const DCtab * tab; + int size; + int dc_diff; + + if (bit_buf < 0xf8000000) { + tab = DC_chrom_5 + UBITS (bit_buf, 5); + size = tab->size; + if (size) { + bits += tab->len + size; + bit_buf <<= tab->len; + dc_diff = + UBITS (bit_buf, size) - UBITS (SBITS (~bit_buf, 1), size); + bit_buf <<= size; + return dc_diff; + } else { + DUMPBITS (bit_buf, bits, 2); + return 0; + } + } else { + tab = DC_long + (UBITS (bit_buf, 10) - 0x3e0); + size = tab->size; + DUMPBITS (bit_buf, bits, tab->len + 1); + NEEDBITS (bit_buf, bits, bit_ptr); + dc_diff = UBITS (bit_buf, size) - UBITS (SBITS (~bit_buf, 1), size); + DUMPBITS (bit_buf, bits, size); + return dc_diff; + } +#undef bit_buf +#undef bits +#undef bit_ptr +} + +#define SATURATE(val) \ +do { \ + if ((uint32_t)(val + 2048) > 4095) \ + val = (val > 0) ? 2047 : -2048; \ +} while (0) + +static void get_xvmc_intra_block_B14 (picture_t * picture) +{ + int i; + int j; + int l; + int val; + const uint8_t * scan = picture->scan; + uint8_t * scan_ptable = mpeg2_scan_orig_ptable; + uint8_t * quant_matrix = picture->intra_quantizer_matrix; + int quantizer_scale = picture->quantizer_scale; + int mismatch; + const DCTtab * tab; + uint32_t bit_buf; + int bits; + uint8_t * bit_ptr; + int16_t * dest; + + dest = picture->mc->blockptr; + + if( picture->mc->xvmc_accel & IDCT_ACCEL ) { + if ( scan == mpeg2_scan_norm ) { + scan = mpeg2_scan_norm_orig; + scan_ptable = mpeg2_scan_norm_ptable; + } else { + scan = mpeg2_scan_alt_orig; + scan_ptable = mpeg2_scan_alt_ptable; + } + } + + i = 0; + mismatch = ~dest[0]; + + bit_buf = picture->bitstream_buf; + bits = picture->bitstream_bits; + bit_ptr = picture->bitstream_ptr; + + NEEDBITS (bit_buf, bits, bit_ptr); + + while (1) { + if (bit_buf >= 0x28000000) { + + tab = DCT_B14AC_5 + (UBITS (bit_buf, 5) - 5); + + i += tab->run; + if (i >= 64) + break; /* end of block */ + + normal_code: + l = scan_ptable[j = scan[i]]; + + bit_buf <<= tab->len; + bits += tab->len + 1; + val = (tab->level * quantizer_scale * quant_matrix[l]) >> 4; + + /* if (bitstream_get (1)) val = -val; */ + val = (val ^ SBITS (bit_buf, 1)) - SBITS (bit_buf, 1); + + SATURATE (val); + dest[j] = val; + mismatch ^= val; + + bit_buf <<= 1; + NEEDBITS (bit_buf, bits, bit_ptr); + + continue; + + } else if (bit_buf >= 0x04000000) { + + tab = DCT_B14_8 + (UBITS (bit_buf, 8) - 4); + + i += tab->run; + if (i < 64) + goto normal_code; + + /* escape code */ + + i += UBITS (bit_buf << 6, 6) - 64; + if (i >= 64) + break; /* illegal, check needed to avoid buffer overflow */ + + l = scan_ptable[j = scan[i]]; + + DUMPBITS (bit_buf, bits, 12); + NEEDBITS (bit_buf, bits, bit_ptr); + val = (SBITS (bit_buf, 12) * + quantizer_scale * quant_matrix[l]) / 16; + + SATURATE (val); + dest[j] = val; + mismatch ^= val; + + DUMPBITS (bit_buf, bits, 12); + NEEDBITS (bit_buf, bits, bit_ptr); + + continue; + + } else if (bit_buf >= 0x02000000) { + tab = DCT_B14_10 + (UBITS (bit_buf, 10) - 8); + i += tab->run; + if (i < 64) + goto normal_code; + } else if (bit_buf >= 0x00800000) { + tab = DCT_13 + (UBITS (bit_buf, 13) - 16); + i += tab->run; + if (i < 64) + goto normal_code; + } else if (bit_buf >= 0x00200000) { + tab = DCT_15 + (UBITS (bit_buf, 15) - 16); + i += tab->run; + if (i < 64) + goto normal_code; + } else { + tab = DCT_16 + UBITS (bit_buf, 16); + bit_buf <<= 16; + GETWORD (bit_buf, bits + 16, bit_ptr); + i += tab->run; + if (i < 64) + goto normal_code; + } + break; /* illegal, check needed to avoid buffer overflow */ + } + + dest[63] ^= mismatch & 1; + DUMPBITS (bit_buf, bits, 2); /* dump end of block code */ + picture->bitstream_buf = bit_buf; + picture->bitstream_bits = bits; + picture->bitstream_ptr = bit_ptr; +} + +static void get_xvmc_intra_block_B15 (picture_t * picture) +{ + int i; + int j; + int l; + int val; + const uint8_t * scan = picture->scan; + uint8_t * scan_ptable = mpeg2_scan_orig_ptable; + uint8_t * quant_matrix = picture->intra_quantizer_matrix; + int quantizer_scale = picture->quantizer_scale; + int mismatch; + const DCTtab * tab; + uint32_t bit_buf; + int bits; + uint8_t * bit_ptr; + int16_t * dest; + + dest = picture->mc->blockptr; + + if( picture->mc->xvmc_accel & IDCT_ACCEL ) { + if ( scan == mpeg2_scan_norm ) { + scan = mpeg2_scan_norm_orig; + scan_ptable = mpeg2_scan_norm_ptable; + } else { + scan = mpeg2_scan_alt_orig; + scan_ptable = mpeg2_scan_alt_ptable; + } + } + + i = 0; + mismatch = ~dest[0]; + + bit_buf = picture->bitstream_buf; + bits = picture->bitstream_bits; + bit_ptr = picture->bitstream_ptr; + + NEEDBITS (bit_buf, bits, bit_ptr); + + while (1) { + if (bit_buf >= 0x04000000) { + + tab = DCT_B15_8 + (UBITS (bit_buf, 8) - 4); + + i += tab->run; + if (i < 64) { + + normal_code: + l = scan_ptable[j = scan[i]]; + bit_buf <<= tab->len; + bits += tab->len + 1; + val = (tab->level * quantizer_scale * quant_matrix[l]) >> 4; + + /* if (bitstream_get (1)) val = -val; */ + val = (val ^ SBITS (bit_buf, 1)) - SBITS (bit_buf, 1); + + SATURATE (val); + dest[j] = val; + mismatch ^= val; + + bit_buf <<= 1; + NEEDBITS (bit_buf, bits, bit_ptr); + + continue; + + } else { + + /* end of block. I commented out this code because if we */ + /* dont exit here we will still exit at the later test :) */ + + /* if (i >= 128) break; */ /* end of block */ + + /* escape code */ + + i += UBITS (bit_buf << 6, 6) - 64; + if (i >= 64) + break; /* illegal, check against buffer overflow */ + + l = scan_ptable[j = scan[i]]; + + DUMPBITS (bit_buf, bits, 12); + NEEDBITS (bit_buf, bits, bit_ptr); + val = (SBITS (bit_buf, 12) * + quantizer_scale * quant_matrix[l]) / 16; + + SATURATE (val); + dest[j] = val; + mismatch ^= val; + + DUMPBITS (bit_buf, bits, 12); + NEEDBITS (bit_buf, bits, bit_ptr); + + continue; + + } + } else if (bit_buf >= 0x02000000) { + tab = DCT_B15_10 + (UBITS (bit_buf, 10) - 8); + i += tab->run; + if (i < 64) + goto normal_code; + } else if (bit_buf >= 0x00800000) { + tab = DCT_13 + (UBITS (bit_buf, 13) - 16); + i += tab->run; + if (i < 64) + goto normal_code; + } else if (bit_buf >= 0x00200000) { + tab = DCT_15 + (UBITS (bit_buf, 15) - 16); + i += tab->run; + if (i < 64) + goto normal_code; + } else { + tab = DCT_16 + UBITS (bit_buf, 16); + bit_buf <<= 16; + GETWORD (bit_buf, bits + 16, bit_ptr); + i += tab->run; + if (i < 64) + goto normal_code; + } + break; /* illegal, check needed to avoid buffer overflow */ + } + + dest[63] ^= mismatch & 1; + DUMPBITS (bit_buf, bits, 4); /* dump end of block code */ + picture->bitstream_buf = bit_buf; + picture->bitstream_bits = bits; + picture->bitstream_ptr = bit_ptr; +} + +static void get_xvmc_non_intra_block (picture_t * picture) +{ + int i; + int j; + int l; + int val; + const uint8_t * scan = picture->scan; + uint8_t * scan_ptable = mpeg2_scan_orig_ptable; + uint8_t * quant_matrix = picture->non_intra_quantizer_matrix; + int quantizer_scale = picture->quantizer_scale; + int mismatch; + const DCTtab * tab; + uint32_t bit_buf; + int bits; + uint8_t * bit_ptr; + int16_t * dest; + + i = -1; + mismatch = 1; + + dest = picture->mc->blockptr; + + if( picture->mc->xvmc_accel & IDCT_ACCEL ) { + if ( scan == mpeg2_scan_norm ) { + scan = mpeg2_scan_norm_orig; + scan_ptable = mpeg2_scan_norm_ptable; + } else { + scan = mpeg2_scan_alt_orig; + scan_ptable = mpeg2_scan_alt_ptable; + } + } + + bit_buf = picture->bitstream_buf; + bits = picture->bitstream_bits; + bit_ptr = picture->bitstream_ptr; + + NEEDBITS (bit_buf, bits, bit_ptr); + if (bit_buf >= 0x28000000) { + tab = DCT_B14DC_5 + (UBITS (bit_buf, 5) - 5); + goto entry_1; + } else + goto entry_2; + + while (1) { + if (bit_buf >= 0x28000000) { + + tab = DCT_B14AC_5 + (UBITS (bit_buf, 5) - 5); + + entry_1: + i += tab->run; + if (i >= 64) + break; /* end of block */ + + normal_code: + l = scan_ptable[j = scan[i]]; + bit_buf <<= tab->len; + bits += tab->len + 1; + val = ((2*tab->level+1) * quantizer_scale * quant_matrix[l]) >> 5; + + /* if (bitstream_get (1)) val = -val; */ + val = (val ^ SBITS (bit_buf, 1)) - SBITS (bit_buf, 1); + + SATURATE (val); + dest[j] = val; + mismatch ^= val; + + bit_buf <<= 1; + NEEDBITS (bit_buf, bits, bit_ptr); + + continue; + + } + + entry_2: + if (bit_buf >= 0x04000000) { + + tab = DCT_B14_8 + (UBITS (bit_buf, 8) - 4); + + i += tab->run; + if (i < 64) + goto normal_code; + + /* escape code */ + + i += UBITS (bit_buf << 6, 6) - 64; + if (i >= 64) + break; /* illegal, check needed to avoid buffer overflow */ + + l = scan_ptable[j = scan[i]]; + + DUMPBITS (bit_buf, bits, 12); + NEEDBITS (bit_buf, bits, bit_ptr); + val = 2 * (SBITS (bit_buf, 12) + SBITS (bit_buf, 1)) + 1; + val = (val * quantizer_scale * quant_matrix[l]) / 32; + + SATURATE (val); + dest[j] = val; + mismatch ^= val; + + DUMPBITS (bit_buf, bits, 12); + NEEDBITS (bit_buf, bits, bit_ptr); + + continue; + + } else if (bit_buf >= 0x02000000) { + tab = DCT_B14_10 + (UBITS (bit_buf, 10) - 8); + i += tab->run; + if (i < 64) + goto normal_code; + } else if (bit_buf >= 0x00800000) { + tab = DCT_13 + (UBITS (bit_buf, 13) - 16); + i += tab->run; + if (i < 64) + goto normal_code; + } else if (bit_buf >= 0x00200000) { + tab = DCT_15 + (UBITS (bit_buf, 15) - 16); + i += tab->run; + if (i < 64) + goto normal_code; + } else { + tab = DCT_16 + UBITS (bit_buf, 16); + bit_buf <<= 16; + GETWORD (bit_buf, bits + 16, bit_ptr); + i += tab->run; + if (i < 64) + goto normal_code; + } + break; /* illegal, check needed to avoid buffer overflow */ + } + dest[63] ^= mismatch & 1; + DUMPBITS (bit_buf, bits, 2); /* dump end of block code */ + picture->bitstream_buf = bit_buf; + picture->bitstream_bits = bits; + picture->bitstream_ptr = bit_ptr; +} + +static void get_xvmc_mpeg1_intra_block (picture_t * picture) +{ + int i; + int j; + int l; + int val; + const uint8_t * scan = picture->scan; + uint8_t * scan_ptable = mpeg2_scan_orig_ptable; + uint8_t * quant_matrix = picture->intra_quantizer_matrix; + int quantizer_scale = picture->quantizer_scale; + const DCTtab * tab; + uint32_t bit_buf; + int bits; + uint8_t * bit_ptr; + int16_t * dest; + + i = 0; + + dest = picture->mc->blockptr; + + if( picture->mc->xvmc_accel & IDCT_ACCEL ) { + if ( scan == mpeg2_scan_norm ) { + scan = mpeg2_scan_norm_orig; + scan_ptable = mpeg2_scan_norm_ptable; + } else { + scan = mpeg2_scan_alt_orig; + scan_ptable = mpeg2_scan_alt_ptable; + } + } + + bit_buf = picture->bitstream_buf; + bits = picture->bitstream_bits; + bit_ptr = picture->bitstream_ptr; + + NEEDBITS (bit_buf, bits, bit_ptr); + + while (1) { + if (bit_buf >= 0x28000000) { + + tab = DCT_B14AC_5 + (UBITS (bit_buf, 5) - 5); + + i += tab->run; + if (i >= 64) + break; /* end of block */ + + normal_code: + l = scan_ptable[j = scan[i]]; + bit_buf <<= tab->len; + bits += tab->len + 1; + val = (tab->level * quantizer_scale * quant_matrix[l]) >> 4; + + /* oddification */ + val = (val - 1) | 1; + + /* if (bitstream_get (1)) val = -val; */ + val = (val ^ SBITS (bit_buf, 1)) - SBITS (bit_buf, 1); + + SATURATE (val); + dest[j] = val; + + bit_buf <<= 1; + NEEDBITS (bit_buf, bits, bit_ptr); + + continue; + + } else if (bit_buf >= 0x04000000) { + + tab = DCT_B14_8 + (UBITS (bit_buf, 8) - 4); + + i += tab->run; + if (i < 64) + goto normal_code; + + /* escape code */ + + i += UBITS (bit_buf << 6, 6) - 64; + if (i >= 64) + break; /* illegal, check needed to avoid buffer overflow */ + + l = scan_ptable[j = scan[i]]; + + DUMPBITS (bit_buf, bits, 12); + NEEDBITS (bit_buf, bits, bit_ptr); + val = SBITS (bit_buf, 8); + if (! (val & 0x7f)) { + DUMPBITS (bit_buf, bits, 8); + val = UBITS (bit_buf, 8) + 2 * val; + } + val = (val * quantizer_scale * quant_matrix[l]) / 16; + + /* oddification */ + val = (val + ~SBITS (val, 1)) | 1; + + SATURATE (val); + dest[j] = val; + + DUMPBITS (bit_buf, bits, 8); + NEEDBITS (bit_buf, bits, bit_ptr); + + continue; + + } else if (bit_buf >= 0x02000000) { + tab = DCT_B14_10 + (UBITS (bit_buf, 10) - 8); + i += tab->run; + if (i < 64) + goto normal_code; + } else if (bit_buf >= 0x00800000) { + tab = DCT_13 + (UBITS (bit_buf, 13) - 16); + i += tab->run; + if (i < 64) + goto normal_code; + } else if (bit_buf >= 0x00200000) { + tab = DCT_15 + (UBITS (bit_buf, 15) - 16); + i += tab->run; + if (i < 64) + goto normal_code; + } else { + tab = DCT_16 + UBITS (bit_buf, 16); + bit_buf <<= 16; + GETWORD (bit_buf, bits + 16, bit_ptr); + i += tab->run; + if (i < 64) + goto normal_code; + } + break; /* illegal, check needed to avoid buffer overflow */ + } + DUMPBITS (bit_buf, bits, 2); /* dump end of block code */ + picture->bitstream_buf = bit_buf; + picture->bitstream_bits = bits; + picture->bitstream_ptr = bit_ptr; +} + +static void get_xvmc_mpeg1_non_intra_block (picture_t * picture) +{ + int i; + int j; + int l; + int val; + const uint8_t * scan = picture->scan; + uint8_t * scan_ptable = mpeg2_scan_orig_ptable; + uint8_t * quant_matrix = picture->non_intra_quantizer_matrix; + int quantizer_scale = picture->quantizer_scale; + const DCTtab * tab; + uint32_t bit_buf; + int bits; + uint8_t * bit_ptr; + int16_t * dest; + + i = -1; + + dest = picture->mc->blockptr; + + if( picture->mc->xvmc_accel & IDCT_ACCEL ) { + if ( scan == mpeg2_scan_norm ) { + scan = mpeg2_scan_norm_orig; + scan_ptable = mpeg2_scan_norm_ptable; + } else { + scan = mpeg2_scan_alt_orig; + scan_ptable = mpeg2_scan_alt_ptable; + } + } + + bit_buf = picture->bitstream_buf; + bits = picture->bitstream_bits; + bit_ptr = picture->bitstream_ptr; + + NEEDBITS (bit_buf, bits, bit_ptr); + if (bit_buf >= 0x28000000) { + tab = DCT_B14DC_5 + (UBITS (bit_buf, 5) - 5); + goto entry_1; + } else + goto entry_2; + + while (1) { + if (bit_buf >= 0x28000000) { + + tab = DCT_B14AC_5 + (UBITS (bit_buf, 5) - 5); + + entry_1: + i += tab->run; + if (i >= 64) + break; /* end of block */ + + normal_code: + l = scan_ptable[j = scan[i]]; + bit_buf <<= tab->len; + bits += tab->len + 1; + val = ((2*tab->level+1) * quantizer_scale * quant_matrix[l]) >> 5; + + /* oddification */ + val = (val - 1) | 1; + + /* if (bitstream_get (1)) val = -val; */ + val = (val ^ SBITS (bit_buf, 1)) - SBITS (bit_buf, 1); + + SATURATE (val); + dest[j] = val; + + bit_buf <<= 1; + NEEDBITS (bit_buf, bits, bit_ptr); + + continue; + + } + + entry_2: + if (bit_buf >= 0x04000000) { + + tab = DCT_B14_8 + (UBITS (bit_buf, 8) - 4); + + i += tab->run; + if (i < 64) + goto normal_code; + + /* escape code */ + + i += UBITS (bit_buf << 6, 6) - 64; + if (i >= 64) + break; /* illegal, check needed to avoid buffer overflow */ + + l = scan_ptable[j = scan[i]]; + + DUMPBITS (bit_buf, bits, 12); + NEEDBITS (bit_buf, bits, bit_ptr); + val = SBITS (bit_buf, 8); + if (! (val & 0x7f)) { + DUMPBITS (bit_buf, bits, 8); + val = UBITS (bit_buf, 8) + 2 * val; + } + val = 2 * (val + SBITS (val, 1)) + 1; + val = (val * quantizer_scale * quant_matrix[l]) / 32; + + /* oddification */ + val = (val + ~SBITS (val, 1)) | 1; + + SATURATE (val); + dest[j] = val; + + DUMPBITS (bit_buf, bits, 8); + NEEDBITS (bit_buf, bits, bit_ptr); + + continue; + + } else if (bit_buf >= 0x02000000) { + tab = DCT_B14_10 + (UBITS (bit_buf, 10) - 8); + i += tab->run; + if (i < 64) + goto normal_code; + } else if (bit_buf >= 0x00800000) { + tab = DCT_13 + (UBITS (bit_buf, 13) - 16); + i += tab->run; + if (i < 64) + goto normal_code; + } else if (bit_buf >= 0x00200000) { + tab = DCT_15 + (UBITS (bit_buf, 15) - 16); + i += tab->run; + if (i < 64) + goto normal_code; + } else { + tab = DCT_16 + UBITS (bit_buf, 16); + bit_buf <<= 16; + GETWORD (bit_buf, bits + 16, bit_ptr); + i += tab->run; + if (i < 64) + goto normal_code; + } + break; /* illegal, check needed to avoid buffer overflow */ + } + DUMPBITS (bit_buf, bits, 2); /* dump end of block code */ + picture->bitstream_buf = bit_buf; + picture->bitstream_bits = bits; + picture->bitstream_ptr = bit_ptr; +} + +static inline void slice_xvmc_intra_DCT (picture_t * picture, int cc, + uint8_t * dest, int stride) +{ +#define bit_buf (picture->bitstream_buf) +#define bits (picture->bitstream_bits) +#define bit_ptr (picture->bitstream_ptr) + NEEDBITS (bit_buf, bits, bit_ptr); + /* Get the intra DC coefficient and inverse quantize it */ + + // printf("slice: slice_xvmc_intra_DCT cc=%d pred[0]=%d\n",cc,picture->dc_dct_pred[0]); + if (cc == 0) + picture->dc_dct_pred[0] += get_xvmc_luma_dc_dct_diff (picture); + else + picture->dc_dct_pred[cc] += get_xvmc_chroma_dc_dct_diff (picture); + //TODO conversion to signed format + // printf("slice: pred[0]=%d presision=%d\n",picture->dc_dct_pred[0], + // picture->intra_dc_precision); + + mpeg2_zero_block(picture->mc->blockptr); + + picture->mc->blockptr[0] = picture->dc_dct_pred[cc] << (3 - picture->intra_dc_precision); + + if (picture->mpeg1) { + if (picture->picture_coding_type != D_TYPE) + get_xvmc_mpeg1_intra_block (picture); + } else if (picture->intra_vlc_format) + get_xvmc_intra_block_B15 (picture); + else + get_xvmc_intra_block_B14 (picture); + + if((picture->mc->xvmc_accel & ACCEL) == MOTION_ACCEL) { + //motion_comp only no idct acceleration so do it in software + mpeg2_idct (picture->mc->blockptr); + } + picture->mc->blockptr += 64; +#undef bit_buf +#undef bits +#undef bit_ptr +} + +static inline void slice_xvmc_non_intra_DCT (picture_t * picture, uint8_t * dest, + int stride) +{ + mpeg2_zero_block(picture->mc->blockptr); + + if (picture->mpeg1) + get_xvmc_mpeg1_non_intra_block (picture); + else + get_xvmc_non_intra_block (picture); + + if((picture->mc->xvmc_accel & ACCEL) == MOTION_ACCEL) { + // motion comp only no idct acceleration so do it in sw + mpeg2_idct (picture->mc->blockptr); + } + picture->mc->blockptr += 64; +} + +static void motion_mp1 (picture_t * picture, motion_t * motion, + void (** table) (uint8_t *, uint8_t *, int, int)) +{ +#define bit_buf (picture->bitstream_buf) +#define bits (picture->bitstream_bits) +#define bit_ptr (picture->bitstream_ptr) + int motion_x, motion_y; + + NEEDBITS (bit_buf, bits, bit_ptr); + motion_x = (motion->pmv[0][0] + + (get_xvmc_motion_delta (picture, + motion->f_code[0]) << motion->f_code[1])); + motion_x = bound_motion_vector (motion_x, + motion->f_code[0] + motion->f_code[1]); + motion->pmv[0][0] = motion_x; + + NEEDBITS (bit_buf, bits, bit_ptr); + motion_y = (motion->pmv[0][1] + + (get_xvmc_motion_delta (picture, + motion->f_code[0]) << motion->f_code[1])); + motion_y = bound_motion_vector (motion_y, + motion->f_code[0] + motion->f_code[1]); + motion->pmv[0][1] = motion_y; + +#undef bit_buf +#undef bits +#undef bit_ptr +} + +static void motion_fr_frame (picture_t * picture, motion_t * motion, + void (** table) (uint8_t *, uint8_t *, int, int)) +{ +#define bit_buf (picture->bitstream_buf) +#define bits (picture->bitstream_bits) +#define bit_ptr (picture->bitstream_ptr) + int motion_x, motion_y; + + NEEDBITS (bit_buf, bits, bit_ptr); + motion_x = motion->pmv[0][0] + get_xvmc_motion_delta (picture, + motion->f_code[0]); + motion_x = bound_motion_vector (motion_x, motion->f_code[0]); + motion->pmv[1][0] = motion->pmv[0][0] = motion_x; + + NEEDBITS (bit_buf, bits, bit_ptr); + motion_y = motion->pmv[0][1] + get_xvmc_motion_delta (picture, + motion->f_code[1]); + motion_y = bound_motion_vector (motion_y, motion->f_code[1]); + motion->pmv[1][1] = motion->pmv[0][1] = motion_y; + +#undef bit_buf +#undef bits +#undef bit_ptr +} + +static void motion_fr_field (picture_t * picture, motion_t * motion, + void (** table) (uint8_t *, uint8_t *, int, int), + int dir) +{ +#define bit_buf (picture->bitstream_buf) +#define bits (picture->bitstream_bits) +#define bit_ptr (picture->bitstream_ptr) + int motion_x, motion_y, field; + // unsigned int pos_x, pos_y, xy_half; + + NEEDBITS (bit_buf, bits, bit_ptr); + field = UBITS (bit_buf, 1); + picture->XvMC_mv_field_sel[0][dir] = field; + DUMPBITS (bit_buf, bits, 1); + + motion_x = motion->pmv[0][0] + get_xvmc_motion_delta (picture, + motion->f_code[0]); + motion_x = bound_motion_vector (motion_x, motion->f_code[0]); + motion->pmv[0][0] = motion_x; + + NEEDBITS (bit_buf, bits, bit_ptr); + motion_y = (motion->pmv[0][1] >> 1) + get_xvmc_motion_delta (picture, + motion->f_code[1]); + /* motion_y = bound_motion_vector (motion_y, motion->f_code[1]); */ + motion->pmv[0][1] = motion_y << 1; + + NEEDBITS (bit_buf, bits, bit_ptr); + field = UBITS (bit_buf, 1); + //TODO look at field select need bob (weave ok) + picture->XvMC_mv_field_sel[1][dir] = field; + DUMPBITS (bit_buf, bits, 1); + + motion_x = motion->pmv[1][0] + get_xvmc_motion_delta (picture, + motion->f_code[0]); + motion_x = bound_motion_vector (motion_x, motion->f_code[0]); + motion->pmv[1][0] = motion_x; + + NEEDBITS (bit_buf, bits, bit_ptr); + motion_y = (motion->pmv[1][1] >> 1) + get_xvmc_motion_delta (picture, + motion->f_code[1]); + /* motion_y = bound_motion_vector (motion_y, motion->f_code[1]); */ + motion->pmv[1][1] = motion_y << 1; + +#undef bit_buf +#undef bits +#undef bit_ptr +} + +static void motion_fr_dmv (picture_t * picture, motion_t * motion, + void (** table) (uint8_t *, uint8_t *, int, int)) +{ +#define bit_buf (picture->bitstream_buf) +#define bits (picture->bitstream_bits) +#define bit_ptr (picture->bitstream_ptr) + int motion_x, motion_y; + + // TODO field select ?? possible need to be 0 + picture->XvMC_mv_field_sel[0][0] = picture->XvMC_mv_field_sel[1][0] = 0; + + NEEDBITS (bit_buf, bits, bit_ptr); + motion_x = motion->pmv[0][0] + get_xvmc_motion_delta (picture, + motion->f_code[0]); + motion_x = bound_motion_vector (motion_x, motion->f_code[0]); + motion->pmv[1][0] = motion->pmv[0][0] = motion_x; + NEEDBITS (bit_buf, bits, bit_ptr); + + motion_y = (motion->pmv[0][1] >> 1) + get_xvmc_motion_delta (picture, + motion->f_code[1]); + /* motion_y = bound_motion_vector (motion_y, motion->f_code[1]); */ + motion->pmv[1][1] = motion->pmv[0][1] = motion_y << 1; + +#undef bit_buf +#undef bits +#undef bit_ptr +} + +static void motion_reuse (picture_t * picture, motion_t * motion, + void (** table) (uint8_t *, uint8_t *, int, int)) +{ + int motion_x, motion_y; + + motion_x = motion->pmv[0][0]; + motion_y = motion->pmv[0][1]; + +} + +/* like motion_frame, but parsing without actual motion compensation */ +static void motion_fr_conceal (picture_t * picture) +{ +#define bit_buf (picture->bitstream_buf) +#define bits (picture->bitstream_bits) +#define bit_ptr (picture->bitstream_ptr) + int tmp; + + NEEDBITS (bit_buf, bits, bit_ptr); + tmp = (picture->f_motion.pmv[0][0] + + get_xvmc_motion_delta (picture, picture->f_motion.f_code[0])); + tmp = bound_motion_vector (tmp, picture->f_motion.f_code[0]); + picture->f_motion.pmv[1][0] = picture->f_motion.pmv[0][0] = tmp; + + NEEDBITS (bit_buf, bits, bit_ptr); + tmp = (picture->f_motion.pmv[0][1] + + get_xvmc_motion_delta (picture, picture->f_motion.f_code[1])); + tmp = bound_motion_vector (tmp, picture->f_motion.f_code[1]); + picture->f_motion.pmv[1][1] = picture->f_motion.pmv[0][1] = tmp; + + DUMPBITS (bit_buf, bits, 1); /* remove marker_bit */ +#undef bit_buf +#undef bits +#undef bit_ptr +} + +static void motion_fi_field (picture_t * picture, motion_t * motion, + void (** table) (uint8_t *, uint8_t *, int, int)) +{ +#define bit_buf (picture->bitstream_buf) +#define bits (picture->bitstream_bits) +#define bit_ptr (picture->bitstream_ptr) + int motion_x, motion_y; + uint8_t ** ref_field; + + NEEDBITS (bit_buf, bits, bit_ptr); + ref_field = motion->ref2[UBITS (bit_buf, 1)]; + + // TODO field select may need to do something here for bob (weave ok) + picture->XvMC_mv_field_sel[0][0] = picture->XvMC_mv_field_sel[1][0] = 0; + + DUMPBITS (bit_buf, bits, 1); + + motion_x = motion->pmv[0][0] + get_xvmc_motion_delta (picture, + motion->f_code[0]); + motion_x = bound_motion_vector (motion_x, motion->f_code[0]); + motion->pmv[1][0] = motion->pmv[0][0] = motion_x; + + NEEDBITS (bit_buf, bits, bit_ptr); + motion_y = motion->pmv[0][1] + get_xvmc_motion_delta (picture, + motion->f_code[1]); + motion_y = bound_motion_vector (motion_y, motion->f_code[1]); + motion->pmv[1][1] = motion->pmv[0][1] = motion_y; + +#undef bit_buf +#undef bits +#undef bit_ptr +} + +static void motion_fi_16x8 (picture_t * picture, motion_t * motion, + void (** table) (uint8_t *, uint8_t *, int, int)) +{ +#define bit_buf (picture->bitstream_buf) +#define bits (picture->bitstream_bits) +#define bit_ptr (picture->bitstream_ptr) + int motion_x, motion_y; + uint8_t ** ref_field; + + NEEDBITS (bit_buf, bits, bit_ptr); + ref_field = motion->ref2[UBITS (bit_buf, 1)]; + + // TODO field select may need to do something here bob (weave ok) + picture->XvMC_mv_field_sel[0][0] = picture->XvMC_mv_field_sel[1][0] = 0; + + DUMPBITS (bit_buf, bits, 1); + + motion_x = motion->pmv[0][0] + get_xvmc_motion_delta (picture, + motion->f_code[0]); + motion_x = bound_motion_vector (motion_x, motion->f_code[0]); + motion->pmv[0][0] = motion_x; + + NEEDBITS (bit_buf, bits, bit_ptr); + motion_y = motion->pmv[0][1] + get_xvmc_motion_delta (picture, + motion->f_code[1]); + motion_y = bound_motion_vector (motion_y, motion->f_code[1]); + motion->pmv[0][1] = motion_y; + + + NEEDBITS (bit_buf, bits, bit_ptr); + ref_field = motion->ref2[UBITS (bit_buf, 1)]; + + // TODO field select may need to do something here for bob (weave ok) + picture->XvMC_mv_field_sel[0][0] = picture->XvMC_mv_field_sel[1][0] = 0; + + DUMPBITS (bit_buf, bits, 1); + + motion_x = motion->pmv[1][0] + get_xvmc_motion_delta (picture, + motion->f_code[0]); + motion_x = bound_motion_vector (motion_x, motion->f_code[0]); + motion->pmv[1][0] = motion_x; + + NEEDBITS (bit_buf, bits, bit_ptr); + motion_y = motion->pmv[1][1] + get_xvmc_motion_delta (picture, + motion->f_code[1]); + motion_y = bound_motion_vector (motion_y, motion->f_code[1]); + motion->pmv[1][1] = motion_y; + +#undef bit_buf +#undef bits +#undef bit_ptr +} + +static void motion_fi_dmv (picture_t * picture, motion_t * motion, + void (** table) (uint8_t *, uint8_t *, int, int)) +{ +#define bit_buf (picture->bitstream_buf) +#define bits (picture->bitstream_bits) +#define bit_ptr (picture->bitstream_ptr) + int motion_x, motion_y; + + NEEDBITS (bit_buf, bits, bit_ptr); + motion_x = motion->pmv[0][0] + get_xvmc_motion_delta (picture, + motion->f_code[0]); + motion_x = bound_motion_vector (motion_x, motion->f_code[0]); + motion->pmv[1][0] = motion->pmv[0][0] = motion_x; + NEEDBITS (bit_buf, bits, bit_ptr); + + motion_y = motion->pmv[0][1] + get_xvmc_motion_delta (picture, + motion->f_code[1]); + motion_y = bound_motion_vector (motion_y, motion->f_code[1]); + motion->pmv[1][1] = motion->pmv[0][1] = motion_y; + + // TODO field select may need to do something here for bob (weave ok) + picture->XvMC_mv_field_sel[0][0] = picture->XvMC_mv_field_sel[1][0] = 0; + +#undef bit_buf +#undef bits +#undef bit_ptr +} + + +static void motion_fi_conceal (picture_t * picture) +{ +#define bit_buf (picture->bitstream_buf) +#define bits (picture->bitstream_bits) +#define bit_ptr (picture->bitstream_ptr) + int tmp; + + NEEDBITS (bit_buf, bits, bit_ptr); + DUMPBITS (bit_buf, bits, 1); /* remove field_select */ + + tmp = (picture->f_motion.pmv[0][0] + + get_xvmc_motion_delta (picture, picture->f_motion.f_code[0])); + tmp = bound_motion_vector (tmp, picture->f_motion.f_code[0]); + picture->f_motion.pmv[1][0] = picture->f_motion.pmv[0][0] = tmp; + + NEEDBITS (bit_buf, bits, bit_ptr); + tmp = (picture->f_motion.pmv[0][1] + + get_xvmc_motion_delta (picture, picture->f_motion.f_code[1])); + tmp = bound_motion_vector (tmp, picture->f_motion.f_code[1]); + picture->f_motion.pmv[1][1] = picture->f_motion.pmv[0][1] = tmp; + + DUMPBITS (bit_buf, bits, 1); /* remove marker_bit */ +#undef bit_buf +#undef bits +#undef bit_ptr +} + +#define MOTION_CALL(routine,direction) \ +do { \ + if ((direction) & MACROBLOCK_MOTION_FORWARD) \ + routine (picture, &(picture->f_motion), mpeg2_mc.put); \ + if ((direction) & MACROBLOCK_MOTION_BACKWARD) \ + routine (picture, &(picture->b_motion), \ + ((direction) & MACROBLOCK_MOTION_FORWARD ? \ + mpeg2_mc.avg : mpeg2_mc.put)); \ +} while (0) + +#define NEXT_MACROBLOCK \ +do { \ + picture->offset += 16; \ + if (picture->offset == picture->coded_picture_width) { \ + do { /* just so we can use the break statement */ \ + if (picture->current_frame->proc_slice) { \ + picture->current_frame->proc_slice (picture->current_frame, \ + picture->dest); \ + if (picture->picture_coding_type == B_TYPE) \ + break; \ + } \ + picture->dest[0] += 16 * picture->pitches[0]; \ + picture->dest[1] += 8 * picture->pitches[1]; \ + picture->dest[2] += 8 * picture->pitches[2]; \ + } while (0); \ + picture->v_offset += 16; \ + if (picture->v_offset > picture->limit_y) { \ + if (mpeg2_cpu_state_restore) \ + mpeg2_cpu_state_restore (&cpu_state); \ + return; \ + } \ + picture->offset = 0; \ + } \ +} while (0) + +static inline int slice_xvmc_init (picture_t * picture, int code) +{ +#define bit_buf (picture->bitstream_buf) +#define bits (picture->bitstream_bits) +#define bit_ptr (picture->bitstream_ptr) + int offset, height; + struct vo_frame_s * forward_reference_frame; + struct vo_frame_s * backward_reference_frame; + const MBAtab * mba; + + offset = picture->picture_structure == BOTTOM_FIELD; + picture->pitches[0] = picture->current_frame->pitches[0]; + picture->pitches[1] = picture->current_frame->pitches[1]; + picture->pitches[2] = picture->current_frame->pitches[2]; + + if( picture->forward_reference_frame ) { + forward_reference_frame = picture->forward_reference_frame; + } + else { + /* return 1; */ + forward_reference_frame = picture->current_frame; + } + + if( picture->backward_reference_frame ) { + backward_reference_frame = picture->backward_reference_frame; + } + else { + /* return 1; */ + backward_reference_frame = picture->current_frame; + } + + picture->f_motion.ref[0][0] = + forward_reference_frame->base[0] + (offset ? picture->pitches[0] : 0); + picture->f_motion.ref[0][1] = + forward_reference_frame->base[1] + (offset ? picture->pitches[1] : 0); + picture->f_motion.ref[0][2] = + forward_reference_frame->base[2] + (offset ? picture->pitches[2] : 0); + + picture->b_motion.ref[0][0] = + backward_reference_frame->base[0] + (offset ? picture->pitches[0] : 0); + picture->b_motion.ref[0][1] = + backward_reference_frame->base[1] + (offset ? picture->pitches[1] : 0); + picture->b_motion.ref[0][2] = + backward_reference_frame->base[2] + (offset ? picture->pitches[2] : 0); + + if (picture->picture_structure != FRAME_PICTURE) { + uint8_t ** forward_ref; + int bottom_field; + + bottom_field = (picture->picture_structure == BOTTOM_FIELD); + picture->dmv_offset = bottom_field ? 1 : -1; + picture->f_motion.ref2[0] = picture->f_motion.ref[bottom_field]; + picture->f_motion.ref2[1] = picture->f_motion.ref[!bottom_field]; + picture->b_motion.ref2[0] = picture->b_motion.ref[bottom_field]; + picture->b_motion.ref2[1] = picture->b_motion.ref[!bottom_field]; + + forward_ref = forward_reference_frame->base; + if (picture->second_field && (picture->picture_coding_type != B_TYPE)) + forward_ref = picture->current_frame->base; + + picture->f_motion.ref[1][0] = forward_ref[0] + (bottom_field ? 0 : picture->pitches[0]); + picture->f_motion.ref[1][1] = forward_ref[1] + (bottom_field ? 0 : picture->pitches[1]); + picture->f_motion.ref[1][2] = forward_ref[2] + (bottom_field ? 0 : picture->pitches[2]); + + picture->b_motion.ref[1][0] = + backward_reference_frame->base[0] + (bottom_field ? 0 : picture->pitches[0]); + picture->b_motion.ref[1][1] = + backward_reference_frame->base[1] + (bottom_field ? 0 : picture->pitches[1]); + picture->b_motion.ref[1][2] = + backward_reference_frame->base[2] + (bottom_field ? 0 : picture->pitches[2]); + } + + picture->f_motion.pmv[0][0] = picture->f_motion.pmv[0][1] = 0; + picture->f_motion.pmv[1][0] = picture->f_motion.pmv[1][1] = 0; + picture->b_motion.pmv[0][0] = picture->b_motion.pmv[0][1] = 0; + picture->b_motion.pmv[1][0] = picture->b_motion.pmv[1][1] = 0; + + picture->v_offset = (code - 1) * 16; + offset = (code - 1); + if (picture->current_frame->proc_slice && picture->picture_coding_type == B_TYPE) + offset = 0; + else if (picture->picture_structure != FRAME_PICTURE) + offset = 2 * offset; + + picture->dest[0] = picture->current_frame->base[0] + picture->pitches[0] * offset * 16; + picture->dest[1] = picture->current_frame->base[1] + picture->pitches[1] * offset * 8; + picture->dest[2] = picture->current_frame->base[2] + picture->pitches[2] * offset * 8; + + height = picture->coded_picture_height; + switch (picture->picture_structure) { + case BOTTOM_FIELD: + picture->dest[0] += picture->pitches[0]; + picture->dest[1] += picture->pitches[1]; + picture->dest[2] += picture->pitches[2]; + /* follow thru */ + case TOP_FIELD: + picture->pitches[0] <<= 1; + picture->pitches[1] <<= 1; + picture->pitches[2] <<= 1; + height >>= 1; + } + picture->limit_x = 2 * picture->coded_picture_width - 32; + picture->limit_y_16 = 2 * height - 32; + picture->limit_y_8 = 2 * height - 16; + picture->limit_y = height - 16; + + //TODO conversion to signed format signed format + if((picture->mc->xvmc_accel & ACCEL) == MOTION_ACCEL && + !(picture->mc->xvmc_accel & SIGNED_INTRA)) { + //Motion Comp only unsigned intra + // original: + picture->dc_dct_pred[0] = picture->dc_dct_pred[1] = + picture->dc_dct_pred[2] = 1 << (picture->intra_dc_precision + 7); + } else { + //Motion Comp only signed intra MOTION_ACCEL+SIGNED_INTRA + picture->dc_dct_pred[0] = picture->dc_dct_pred[1] = + picture->dc_dct_pred[2] = 0; + } + + picture->quantizer_scale = get_xvmc_quantizer_scale (picture); + + /* ignore intra_slice and all the extra data */ + while (bit_buf & 0x80000000) { + DUMPBITS (bit_buf, bits, 9); + NEEDBITS (bit_buf, bits, bit_ptr); + } + + /* decode initial macroblock address increment */ + offset = 0; + while (1) { + if (bit_buf >= 0x08000000) { + mba = MBA_5 + (UBITS (bit_buf, 6) - 2); + break; + } else if (bit_buf >= 0x01800000) { + mba = MBA_11 + (UBITS (bit_buf, 12) - 24); + break; + } else switch (UBITS (bit_buf, 12)) { + case 8: /* macroblock_escape */ + offset += 33; + DUMPBITS (bit_buf, bits, 11); + NEEDBITS (bit_buf, bits, bit_ptr); + continue; + case 15: /* macroblock_stuffing (MPEG1 only) */ + bit_buf &= 0xfffff; + DUMPBITS (bit_buf, bits, 11); + NEEDBITS (bit_buf, bits, bit_ptr); + continue; + default: /* error */ + return 1; + } + } + DUMPBITS (bit_buf, bits, mba->len + 1); + picture->offset = (offset + mba->mba) << 4; + + while (picture->offset - picture->coded_picture_width >= 0) { + picture->offset -= picture->coded_picture_width; + if ((picture->current_frame->proc_slice == NULL) || + (picture->picture_coding_type != B_TYPE)) { + picture->dest[0] += 16 * picture->pitches[0]; + picture->dest[1] += 8 * picture->pitches[1]; + picture->dest[2] += 8 * picture->pitches[2]; + } + picture->v_offset += 16; + } + if (picture->v_offset > picture->limit_y) + return 1; + + return 0; +#undef bit_buf +#undef bits +#undef bit_ptr +} + +void mpeg2_xvmc_slice (mpeg2dec_accel_t *accel, picture_t * picture, int code, uint8_t * buffer) +{ +#define bit_buf (picture->bitstream_buf) +#define bits (picture->bitstream_bits) +#define bit_ptr (picture->bitstream_ptr) + cpu_state_t cpu_state; + xine_xvmc_t *xvmc = (xine_xvmc_t *) picture->current_frame->accel_data; + + if (1 == code) { + accel->xvmc_last_slice_code = 0; + } + if ((code != accel->xvmc_last_slice_code + 1) && + (code != accel->xvmc_last_slice_code)) + return; + + bitstream_init (picture, buffer); + + if (slice_xvmc_init (picture, code)) + return; + + if (mpeg2_cpu_state_save) + mpeg2_cpu_state_save (&cpu_state); + + while (1) { + int macroblock_modes; + int mba_inc; + const MBAtab * mba; + + NEEDBITS (bit_buf, bits, bit_ptr); + + macroblock_modes = get_xvmc_macroblock_modes (picture); //macroblock_modes() + picture->XvMC_mb_type = macroblock_modes & 0x1F; + picture->XvMC_dct_type = (macroblock_modes & DCT_TYPE_INTERLACED)>>5; + picture->XvMC_motion_type = (macroblock_modes & MOTION_TYPE_MASK)>>6; + + picture->XvMC_x = picture->offset/16; + picture->XvMC_y = picture->v_offset/16; + + if((picture->XvMC_x == 0) && (picture->XvMC_y == 0)) { + picture->XvMC_mv_field_sel[0][0] = + picture->XvMC_mv_field_sel[1][0] = + picture->XvMC_mv_field_sel[0][1] = + picture->XvMC_mv_field_sel[1][1] = 0; + } + + picture->XvMC_cbp = 0x3f; //TODO set for intra 4:2:0 6 blocks yyyyuv all enabled + + /* maybe integrate MACROBLOCK_QUANT test into get_xvmc_macroblock_modes ? */ + if (macroblock_modes & MACROBLOCK_QUANT) + picture->quantizer_scale = get_xvmc_quantizer_scale (picture); + if (macroblock_modes & MACROBLOCK_INTRA) { + + int DCT_offset, DCT_stride; + int offset; + uint8_t * dest_y; + + if (picture->concealment_motion_vectors) { + if (picture->picture_structure == FRAME_PICTURE) + motion_fr_conceal (picture); + else + motion_fi_conceal (picture); + } else { + picture->f_motion.pmv[0][0] = picture->f_motion.pmv[0][1] = 0; + picture->f_motion.pmv[1][0] = picture->f_motion.pmv[1][1] = 0; + picture->b_motion.pmv[0][0] = picture->b_motion.pmv[0][1] = 0; + picture->b_motion.pmv[1][0] = picture->b_motion.pmv[1][1] = 0; + } + + if (macroblock_modes & DCT_TYPE_INTERLACED) { + DCT_offset = picture->pitches[0]; + DCT_stride = picture->pitches[0] * 2; + } else { + DCT_offset = picture->pitches[0] * 8; + DCT_stride = picture->pitches[0]; + } + offset = picture->offset; + dest_y = picture->dest[0] + offset; + // unravaled loop of 6 block(i) calls in macroblock() + slice_xvmc_intra_DCT (picture, 0, dest_y, DCT_stride); + slice_xvmc_intra_DCT (picture, 0, dest_y + 8, DCT_stride); + slice_xvmc_intra_DCT (picture, 0, dest_y + DCT_offset, DCT_stride); + slice_xvmc_intra_DCT (picture, 0, dest_y + DCT_offset + 8, DCT_stride); + slice_xvmc_intra_DCT (picture, 1, picture->dest[1] + (offset >> 1), + picture->pitches[1]); + slice_xvmc_intra_DCT (picture, 2, picture->dest[2] + (offset >> 1), + picture->pitches[2]); + + if (picture->picture_coding_type == D_TYPE) { + NEEDBITS (bit_buf, bits, bit_ptr); + DUMPBITS (bit_buf, bits, 1); + } + } else { + picture->XvMC_cbp = 0; + + if (picture->picture_structure == FRAME_PICTURE) + switch (macroblock_modes & MOTION_TYPE_MASK) { + case MC_FRAME: + if (picture->mpeg1) { + MOTION_CALL (motion_mp1, macroblock_modes); + } else { + MOTION_CALL (motion_fr_frame, macroblock_modes); + } + break; + + case MC_FIELD: + //MOTION_CALL (motion_fr_field, macroblock_modes); + + if ((macroblock_modes) & MACROBLOCK_MOTION_FORWARD) + motion_fr_field(picture, &(picture->f_motion), + mpeg2_mc.put,0); + if ((macroblock_modes) & MACROBLOCK_MOTION_BACKWARD) + motion_fr_field(picture, &(picture->b_motion), + ((macroblock_modes) & MACROBLOCK_MOTION_FORWARD ? + mpeg2_mc.avg : mpeg2_mc.put),1); + + break; + + case MC_DMV: + MOTION_CALL (motion_fr_dmv, MACROBLOCK_MOTION_FORWARD); + break; + + case 0: + /* non-intra mb without forward mv in a P picture */ + picture->f_motion.pmv[0][0] = 0; + picture->f_motion.pmv[0][1] = 0; + picture->f_motion.pmv[1][0] = 0; + picture->f_motion.pmv[1][1] = 0; + // MOTION_CALL (motion_zero, MACROBLOCK_MOTION_FORWARD); + break; + } + else + switch (macroblock_modes & MOTION_TYPE_MASK) { + case MC_FIELD: + MOTION_CALL (motion_fi_field, macroblock_modes); + break; + + case MC_16X8: + MOTION_CALL (motion_fi_16x8, macroblock_modes); + break; + + case MC_DMV: + MOTION_CALL (motion_fi_dmv, MACROBLOCK_MOTION_FORWARD); + break; + + case 0: + /* non-intra mb without forward mv in a P picture */ + picture->f_motion.pmv[0][0] = 0; + picture->f_motion.pmv[0][1] = 0; + picture->f_motion.pmv[1][0] = 0; + picture->f_motion.pmv[1][1] = 0; + // MOTION_CALL (motion_zero, MACROBLOCK_MOTION_FORWARD); + break; + } + + if (macroblock_modes & MACROBLOCK_PATTERN) { + int coded_block_pattern; + int DCT_offset, DCT_stride; + int offset; + uint8_t * dest_y; + + if (macroblock_modes & DCT_TYPE_INTERLACED) { + DCT_offset = picture->pitches[0]; + DCT_stride = picture->pitches[0] * 2; + } else { + DCT_offset = picture->pitches[0] * 8; + DCT_stride = picture->pitches[0]; + } + + picture->XvMC_cbp = coded_block_pattern = get_xvmc_coded_block_pattern (picture); + offset = picture->offset; + dest_y = picture->dest[0] + offset; + // TODO optimize not fully used for idct accel only mc. + if (coded_block_pattern & 0x20) + slice_xvmc_non_intra_DCT (picture, dest_y, DCT_stride); // cc0 luma 0 + if (coded_block_pattern & 0x10) + slice_xvmc_non_intra_DCT (picture, dest_y + 8, DCT_stride); // cc0 luma 1 + if (coded_block_pattern & 0x08) + slice_xvmc_non_intra_DCT (picture, dest_y + DCT_offset, + DCT_stride); // cc0 luma 2 + if (coded_block_pattern & 0x04) + slice_xvmc_non_intra_DCT (picture, dest_y + DCT_offset + 8, + DCT_stride); // cc0 luma 3 + if (coded_block_pattern & 0x2) + slice_xvmc_non_intra_DCT (picture, + picture->dest[1] + (offset >> 1), + picture->pitches[1]); // cc1 croma + if (coded_block_pattern & 0x1) + slice_xvmc_non_intra_DCT (picture, + picture->dest[2] + (offset >> 1), + picture->pitches[2]); // cc2 croma + } + + if((picture->mc->xvmc_accel & ACCEL) == MOTION_ACCEL && + !(picture->mc->xvmc_accel & SIGNED_INTRA)) { + // original: + picture->dc_dct_pred[0] = picture->dc_dct_pred[1] = + picture->dc_dct_pred[2] = 128 << picture->intra_dc_precision; + + } else { // MOTION_ACCEL+SIGNED_INTRA + picture->dc_dct_pred[0] = picture->dc_dct_pred[1] = + picture->dc_dct_pred[2] = 0; + } + + } + xvmc->proc_macro_block(picture->XvMC_x, picture->XvMC_y, + picture->XvMC_mb_type, + picture->XvMC_motion_type, + picture->XvMC_mv_field_sel, + picture->XvMC_dmvector, + picture->XvMC_cbp, + picture->XvMC_dct_type, + picture->current_frame, + picture->forward_reference_frame, + picture->backward_reference_frame, + picture->picture_structure, + picture->second_field, + picture->f_motion.pmv, + picture->b_motion.pmv); + + + NEXT_MACROBLOCK; + + NEEDBITS (bit_buf, bits, bit_ptr); + mba_inc = 0; + while (1) { + if (bit_buf >= 0x10000000) { + mba = MBA_5 + (UBITS (bit_buf, 5) - 2); + break; + } else if (bit_buf >= 0x03000000) { + mba = MBA_11 + (UBITS (bit_buf, 11) - 24); + break; + } else switch (UBITS (bit_buf, 11)) { + case 8: /* macroblock_escape */ + mba_inc += 33; + /* pass through */ + case 15: /* macroblock_stuffing (MPEG1 only) */ + DUMPBITS (bit_buf, bits, 11); + NEEDBITS (bit_buf, bits, bit_ptr); + continue; + default: /* end of slice, or error */ + if (mpeg2_cpu_state_restore) + mpeg2_cpu_state_restore (&cpu_state); + accel->xvmc_last_slice_code = code; + return; + } + } + DUMPBITS (bit_buf, bits, mba->len); + mba_inc += mba->mba; + if (mba_inc) { + //TODO conversion to signed format signed format + if((picture->mc->xvmc_accel & ACCEL) == MOTION_ACCEL && + !(picture->mc->xvmc_accel & SIGNED_INTRA)) { + // original: + picture->dc_dct_pred[0] = picture->dc_dct_pred[1] = + picture->dc_dct_pred[2] = 128 << picture->intra_dc_precision; + } else { // MOTION_ACCEL+SIGNED_INTRA + picture->dc_dct_pred[0] = picture->dc_dct_pred[1] = + picture->dc_dct_pred[2] = 0; + } + + picture->XvMC_cbp = 0; + if (picture->picture_coding_type == P_TYPE) { + picture->f_motion.pmv[0][0] = picture->f_motion.pmv[0][1] = 0; + picture->f_motion.pmv[1][0] = picture->f_motion.pmv[1][1] = 0; + + do { + if(picture->mc->xvmc_accel) { + + /* derive motion_type */ + if(picture->picture_structure == FRAME_PICTURE) { + picture->XvMC_motion_type = XINE_MC_FRAME; + } else { + picture->XvMC_motion_type = XINE_MC_FIELD; + /* predict from field of same parity */ + picture->XvMC_mv_field_sel[0][0] = + picture->XvMC_mv_field_sel[0][1] = + (picture->picture_structure==BOTTOM_FIELD); + } + picture->XvMC_mb_type = macroblock_modes & 0x1E; + picture->XvMC_x = picture->offset/16; + picture->XvMC_y = picture->v_offset/16; + + xvmc->proc_macro_block(picture->XvMC_x,picture->XvMC_y, + picture->XvMC_mb_type, + picture->XvMC_motion_type, + picture->XvMC_mv_field_sel, + picture->XvMC_dmvector, + picture->XvMC_cbp, + picture->XvMC_dct_type, + picture->current_frame, + picture->forward_reference_frame, + picture->backward_reference_frame, + picture->picture_structure, + picture->second_field, + picture->f_motion.pmv, + picture->b_motion.pmv); + } else { + // MOTION_CALL (motion_zero, MACROBLOCK_MOTION_FORWARD); + } + NEXT_MACROBLOCK; + } while (--mba_inc); + } else { + do { + if(picture->mc->xvmc_accel) { + + /* derive motion_type */ + if(picture->picture_structure == FRAME_PICTURE) { + picture->XvMC_motion_type = XINE_MC_FRAME; + } else { + picture->XvMC_motion_type = XINE_MC_FIELD; + /* predict from field of same parity */ + picture->XvMC_mv_field_sel[0][0] = + picture->XvMC_mv_field_sel[0][1] = + (picture->picture_structure==BOTTOM_FIELD); + } + + picture->XvMC_mb_type = macroblock_modes & 0x1E; + picture->XvMC_x = picture->offset/16; + picture->XvMC_y = picture->v_offset/16; + + xvmc->proc_macro_block(picture->XvMC_x,picture->XvMC_y, + picture->XvMC_mb_type, + picture->XvMC_motion_type, + picture->XvMC_mv_field_sel, + picture->XvMC_dmvector, + picture->XvMC_cbp, + picture->XvMC_dct_type, + picture->current_frame, + picture->forward_reference_frame, + picture->backward_reference_frame, + picture->picture_structure, + picture->second_field, + picture->f_motion.pmv, + picture->b_motion.pmv); + } else { + MOTION_CALL (motion_reuse, macroblock_modes); + } + NEXT_MACROBLOCK; + } while (--mba_inc); + } + } + } + accel->xvmc_last_slice_code = code; +#undef bit_buf +#undef bits +#undef bit_ptr +} + diff --git a/src/video_dec/libmpeg2/slice_xvmc_vld.c b/src/video_dec/libmpeg2/slice_xvmc_vld.c new file mode 100644 index 000000000..3606cf66b --- /dev/null +++ b/src/video_dec/libmpeg2/slice_xvmc_vld.c @@ -0,0 +1,225 @@ +/* + * Copyright (c) 2004 The Unichrome project. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify it under + * the terms of the GNU General Public License as published by the Free Software + * Foundation; either version 2, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTIES OR REPRESENTATIONS; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * See the GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License along with + * this program; if not, write to the Free Software Foundation, Inc., + * 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. + * + * + */ + +#include +#include +#include "mpeg2.h" +#include "mpeg2_internal.h" +#include "xvmc_vld.h" + +static const uint8_t zig_zag_scan[64] ATTR_ALIGN(16) = +{ + /* Zig-Zag scan pattern */ + 0, 1, 8,16, 9, 2, 3,10, + 17,24,32,25,18,11, 4, 5, + 12,19,26,33,40,48,41,34, + 27,20,13, 6, 7,14,21,28, + 35,42,49,56,57,50,43,36, + 29,22,15,23,30,37,44,51, + 58,59,52,45,38,31,39,46, + 53,60,61,54,47,55,62,63 +}; + +static const uint8_t alternate_scan [64] ATTR_ALIGN(16) = +{ + /* Alternate scan pattern */ + 0,8,16,24,1,9,2,10,17,25,32,40,48,56,57,49, + 41,33,26,18,3,11,4,12,19,27,34,42,50,58,35,43, + 51,59,20,28,5,13,6,14,21,29,36,44,52,60,37,45, + 53,61,22,30,7,15,23,31,38,46,54,62,39,47,55,63 +}; + +void mpeg2_xxmc_slice( mpeg2dec_accel_t *accel, picture_t *picture, + int code, uint8_t *buffer, uint32_t chunk_size, + uint8_t *chunk_buffer) + +{ + vo_frame_t + *frame = picture->current_frame; + xine_xxmc_t + *xxmc = (xine_xxmc_t *) frame->accel_data; + xine_vld_frame_t + *vft = &xxmc->vld_frame; + unsigned + mb_frame_height; + int + i; + const uint8_t * + scan_pattern; + float + ms_per_slice; + + if (1 == code && accel->xvmc_last_slice_code != 1) { + frame->bad_frame = 1; + accel->slices_per_row = 1; + accel->row_slice_count = 1; + + /* + * Check that first field went through OK. Otherwise, + * indicate bad frame. + */ + + if (picture->second_field) { + accel->xvmc_last_slice_code = (xxmc->decoded) ? 0 : -1; + xxmc->decoded = 0; + } else { + accel->xvmc_last_slice_code = 0; + } + + mb_frame_height = + (!(picture->mpeg1) && (picture->progressive_sequence)) ? + 2*((picture->coded_picture_height+31) >> 5) : + (picture->coded_picture_height+15) >> 4; + accel->xxmc_mb_pic_height = (picture->picture_structure == FRAME_PICTURE ) ? + mb_frame_height : mb_frame_height >> 1; + + ms_per_slice = 1000. / (90000. * mb_frame_height) * frame->duration; + xxmc->sleep = 1. / (ms_per_slice * 0.45); + if (xxmc->sleep < 1.) xxmc->sleep = 1.; + + if (picture->mpeg1) { + vft->mv_ranges[0][0] = picture->b_motion.f_code[0]; + vft->mv_ranges[0][1] = picture->b_motion.f_code[0]; + vft->mv_ranges[1][0] = picture->f_motion.f_code[0]; + vft->mv_ranges[1][1] = picture->f_motion.f_code[0]; + } else { + vft->mv_ranges[0][0] = picture->b_motion.f_code[0]; + vft->mv_ranges[0][1] = picture->b_motion.f_code[1]; + vft->mv_ranges[1][0] = picture->f_motion.f_code[0]; + vft->mv_ranges[1][1] = picture->f_motion.f_code[1]; + } + + vft->picture_structure = picture->picture_structure; + vft->picture_coding_type = picture->picture_coding_type; + vft->mpeg_coding = (picture->mpeg1) ? 0 : 1; + vft->progressive_sequence = picture->progressive_sequence; + vft->scan = (picture->scan == mpeg2_scan_alt); + vft->pred_dct_frame = picture->frame_pred_frame_dct; + vft->concealment_motion_vectors = + picture->concealment_motion_vectors; + vft->q_scale_type = picture->q_scale_type; + vft->intra_vlc_format = picture->intra_vlc_format; + vft->intra_dc_precision = picture->intra_dc_precision; + vft->second_field = picture->second_field; + + /* + * Translation of libmpeg2's Q-matrix layout to VLD XvMC's. + * Errors here will give + * blocky artifacts and sometimes wrong colors. + */ + + scan_pattern = (vft->scan) ? alternate_scan : zig_zag_scan; + + if ((vft->load_intra_quantizer_matrix = picture->load_intra_quantizer_matrix)) { + for (i=0; i<64; ++i) { + vft->intra_quantizer_matrix[scan_pattern[i]] = + picture->intra_quantizer_matrix[picture->scan[i]]; + } + } + + if ((vft->load_non_intra_quantizer_matrix = picture->load_non_intra_quantizer_matrix)) { + for (i=0; i<64; ++i) { + vft->non_intra_quantizer_matrix[scan_pattern[i]] = + picture->non_intra_quantizer_matrix[picture->scan[i]]; + } + } + + picture->load_intra_quantizer_matrix = 0; + picture->load_non_intra_quantizer_matrix = 0; + vft->forward_reference_frame = picture->forward_reference_frame; + vft->backward_reference_frame = picture->backward_reference_frame; + xxmc->proc_xxmc_begin( frame ); + if (xxmc->result != 0) { + accel->xvmc_last_slice_code=-1; + } + } + + if (((code == accel->xvmc_last_slice_code + 1) || + (code == accel->xvmc_last_slice_code))) { + + /* + * Send this slice to the output plugin. May stall for a long + * time in proc_slice; + */ + + frame->bad_frame = 1; + xxmc->slice_data_size = chunk_size; + xxmc->slice_data = chunk_buffer; + xxmc->slice_code = code; + + xxmc->proc_xxmc_slice( frame ); + + if (xxmc->result != 0) { + accel->xvmc_last_slice_code=-1; + return; + } + /* + * Keep track of slices. + */ + + accel->row_slice_count = (accel->xvmc_last_slice_code == code) ? + accel->row_slice_count + 1 : 1; + accel->slices_per_row = (accel->row_slice_count > accel->slices_per_row) ? + accel->row_slice_count:accel->slices_per_row; + accel->xvmc_last_slice_code = code; + + } else { + + /* + * An error has occured. + */ + + lprintf("libmpeg2: VLD XvMC: Slice error.\n"); + accel->xvmc_last_slice_code = -1; + return; + } +} + +void mpeg2_xxmc_vld_frame_complete(mpeg2dec_accel_t *accel, picture_t *picture, int code) +{ + vo_frame_t + *frame = picture->current_frame; + xine_xxmc_t + *xxmc = (xine_xxmc_t *) frame->accel_data; + + if (xxmc->decoded) return; + if (accel->xvmc_last_slice_code == -1) { + xxmc->proc_xxmc_flush( frame ); + return; + } + + if ((code != 0xff) || ((accel->xvmc_last_slice_code == + accel->xxmc_mb_pic_height) && + accel->slices_per_row == accel->row_slice_count)) { + + xxmc->proc_xxmc_flush( frame ); + + if (xxmc->result) { + accel->xvmc_last_slice_code=-1; + frame->bad_frame = 1; + return; + } + xxmc->decoded = 1; + accel->xvmc_last_slice_code = 0; + if (picture->picture_structure == 3 || picture->second_field) { + if (xxmc->result == 0) + frame->bad_frame = 0; + } + } +} diff --git a/src/video_dec/libmpeg2/stats.c b/src/video_dec/libmpeg2/stats.c new file mode 100644 index 000000000..63c701179 --- /dev/null +++ b/src/video_dec/libmpeg2/stats.c @@ -0,0 +1,317 @@ +/* + * stats.c + * Copyright (C) 2000-2002 Michel Lespinasse + * Copyright (C) 1999-2000 Aaron Holtzman + * + * This file is part of mpeg2dec, a free MPEG-2 video stream decoder. + * See http://libmpeg2.sourceforge.net/ for updates. + * + * mpeg2dec is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * mpeg2dec is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +#include "config.h" + +#include +#include +#include + +#include "mpeg2_internal.h" + +static int debug_level = -1; + +/* Determine is debug output is required. */ +/* We could potentially have multiple levels of debug info */ +static int debug_is_on (void) +{ + char * env_var; + + if (debug_level < 0) { + env_var = getenv ("MPEG2_DEBUG"); + + if (env_var) + debug_level = 1; + else + debug_level = 0; + } + + return debug_level; +} + +static void stats_picture (uint8_t * buffer) +{ + static const char *const picture_coding_type_str [8] = { + "Invalid picture type", + "I-type", + "P-type", + "B-type", + "D (very bad)", + "Invalid","Invalid","Invalid" + }; + + int picture_coding_type; + int temporal_reference; + int vbv_delay; + + temporal_reference = (buffer[0] << 2) | (buffer[1] >> 6); + picture_coding_type = (buffer [1] >> 3) & 7; + vbv_delay = ((buffer[1] << 13) | (buffer[2] << 5) | + (buffer[3] >> 3)) & 0xffff; + + fprintf (stderr, " (picture) %s temporal_reference %d, vbv_delay %d\n", + picture_coding_type_str [picture_coding_type], + temporal_reference, vbv_delay); +} + +static void stats_user_data (uint8_t * buffer) +{ + fprintf (stderr, " (user_data)\n"); +} + +static void stats_sequence (uint8_t * buffer) +{ + static const char *const aspect_ratio_information_str[8] = { + "Invalid Aspect Ratio", + "1:1", + "4:3", + "16:9", + "2.21:1", + "Invalid Aspect Ratio", + "Invalid Aspect Ratio", + "Invalid Aspect Ratio" + }; + static const char *const frame_rate_str[16] = { + "Invalid frame_rate_code", + "23.976", "24", "25" , "29.97", + "30" , "50", "59.94", "60" , + "Invalid frame_rate_code", "Invalid frame_rate_code", + "Invalid frame_rate_code", "Invalid frame_rate_code", + "Invalid frame_rate_code", "Invalid frame_rate_code", + "Invalid frame_rate_code" + }; + + int horizontal_size; + int vertical_size; + int aspect_ratio_information; + int frame_rate_code; + int bit_rate_value; + int vbv_buffer_size_value; + int constrained_parameters_flag; + int load_intra_quantizer_matrix; + int load_non_intra_quantizer_matrix; + + vertical_size = (buffer[0] << 16) | (buffer[1] << 8) | buffer[2]; + horizontal_size = vertical_size >> 12; + vertical_size &= 0xfff; + aspect_ratio_information = buffer[3] >> 4; + frame_rate_code = buffer[3] & 15; + bit_rate_value = (buffer[4] << 10) | (buffer[5] << 2) | (buffer[6] >> 6); + vbv_buffer_size_value = ((buffer[6] << 5) | (buffer[7] >> 3)) & 0x3ff; + constrained_parameters_flag = buffer[7] & 4; + load_intra_quantizer_matrix = buffer[7] & 2; + if (load_intra_quantizer_matrix) + buffer += 64; + load_non_intra_quantizer_matrix = buffer[7] & 1; + + fprintf (stderr, " (seq) %dx%d %s, %s fps, %5.0f kbps, VBV %d kB%s%s%s\n", + horizontal_size, vertical_size, + aspect_ratio_information_str [aspect_ratio_information], + frame_rate_str [frame_rate_code], + bit_rate_value * 400.0 / 1000.0, + 2 * vbv_buffer_size_value, + constrained_parameters_flag ? " , CP":"", + load_intra_quantizer_matrix ? " , Custom Intra Matrix":"", + load_non_intra_quantizer_matrix ? " , Custom Non-Intra Matrix":""); +} + +static void stats_sequence_error (uint8_t * buffer) +{ + fprintf (stderr, " (sequence_error)\n"); +} + +static void stats_sequence_end (uint8_t * buffer) +{ + fprintf (stderr, " (sequence_end)\n"); +} + +static void stats_group (uint8_t * buffer) +{ + fprintf (stderr, " (group)%s%s\n", + (buffer[4] & 0x40) ? " closed_gop" : "", + (buffer[4] & 0x20) ? " broken_link" : ""); +} + +static void stats_slice (int code, uint8_t * buffer) +{ + /* fprintf (stderr, " (slice %d)\n", code); */ +} + +static void stats_sequence_extension (uint8_t * buffer) +{ + static const char *const chroma_format_str[4] = { + "Invalid Chroma Format", + "4:2:0 Chroma", + "4:2:2 Chroma", + "4:4:4 Chroma" + }; + + int progressive_sequence; + int chroma_format; + + progressive_sequence = (buffer[1] >> 3) & 1; + chroma_format = (buffer[1] >> 1) & 3; + + fprintf (stderr, " (seq_ext) progressive_sequence %d, %s\n", + progressive_sequence, chroma_format_str [chroma_format]); +} + +static void stats_sequence_display_extension (uint8_t * buffer) +{ + fprintf (stderr, " (sequence_display_extension)\n"); +} + +static void stats_quant_matrix_extension (uint8_t * buffer) +{ + fprintf (stderr, " (quant_matrix_extension)\n"); +} + +static void stats_copyright_extension (uint8_t * buffer) +{ + fprintf (stderr, " (copyright_extension)\n"); +} + + +static void stats_sequence_scalable_extension (uint8_t * buffer) +{ + fprintf (stderr, " (sequence_scalable_extension)\n"); +} + +static void stats_picture_display_extension (uint8_t * buffer) +{ + fprintf (stderr, " (picture_display_extension)\n"); +} + +static void stats_picture_coding_extension (uint8_t * buffer) +{ + static const char *const picture_structure_str[4] = { + "Invalid Picture Structure", + "Top field", + "Bottom field", + "Frame Picture" + }; + + int f_code[2][2]; + int intra_dc_precision; + int picture_structure; + int top_field_first; + int frame_pred_frame_dct; + int concealment_motion_vectors; + int q_scale_type; + int intra_vlc_format; + int alternate_scan; + int repeat_first_field; + int progressive_frame; + + f_code[0][0] = buffer[0] & 15; + f_code[0][1] = buffer[1] >> 4; + f_code[1][0] = buffer[1] & 15; + f_code[1][1] = buffer[2] >> 4; + intra_dc_precision = (buffer[2] >> 2) & 3; + picture_structure = buffer[2] & 3; + top_field_first = buffer[3] >> 7; + frame_pred_frame_dct = (buffer[3] >> 6) & 1; + concealment_motion_vectors = (buffer[3] >> 5) & 1; + q_scale_type = (buffer[3] >> 4) & 1; + intra_vlc_format = (buffer[3] >> 3) & 1; + alternate_scan = (buffer[3] >> 2) & 1; + repeat_first_field = (buffer[3] >> 1) & 1; + progressive_frame = buffer[4] >> 7; + + fprintf (stderr, + " (pic_ext) %s\n", picture_structure_str [picture_structure]); + fprintf (stderr, + " (pic_ext) forward horizontal f_code % d, forward vertical f_code % d\n", + f_code[0][0], f_code[0][1]); + fprintf (stderr, + " (pic_ext) backward horizontal f_code % d, backward vertical f_code % d\n", + f_code[1][0], f_code[1][1]); + fprintf (stderr, + " (pic_ext) intra_dc_precision %d, top_field_first %d, frame_pred_frame_dct %d\n", + intra_dc_precision, top_field_first, frame_pred_frame_dct); + fprintf (stderr, + " (pic_ext) concealment_motion_vectors %d, q_scale_type %d, intra_vlc_format %d\n", + concealment_motion_vectors, q_scale_type, intra_vlc_format); + fprintf (stderr, + " (pic_ext) alternate_scan %d, repeat_first_field %d, progressive_frame %d\n", + alternate_scan, repeat_first_field, progressive_frame); +} + +void mpeg2_stats (int code, uint8_t * buffer) +{ + if (! (debug_is_on ())) + return; + + switch (code) { + case 0x00: + stats_picture (buffer); + break; + case 0xb2: + stats_user_data (buffer); + break; + case 0xb3: + stats_sequence (buffer); + break; + case 0xb4: + stats_sequence_error (buffer); + break; + case 0xb5: + switch (buffer[0] >> 4) { + case 1: + stats_sequence_extension (buffer); + break; + case 2: + stats_sequence_display_extension (buffer); + break; + case 3: + stats_quant_matrix_extension (buffer); + break; + case 4: + stats_copyright_extension (buffer); + break; + case 5: + stats_sequence_scalable_extension (buffer); + break; + case 7: + stats_picture_display_extension (buffer); + break; + case 8: + stats_picture_coding_extension (buffer); + break; + default: + fprintf (stderr, " (unknown extension %#x)\n", buffer[0] >> 4); + } + break; + case 0xb7: + stats_sequence_end (buffer); + break; + case 0xb8: + stats_group (buffer); + break; + default: + if (code < 0xb0) + stats_slice (code, buffer); + else + fprintf (stderr, " (unknown start code %#02x)\n", code); + } +} diff --git a/src/video_dec/libmpeg2/vis.h b/src/video_dec/libmpeg2/vis.h new file mode 100644 index 000000000..69dd49075 --- /dev/null +++ b/src/video_dec/libmpeg2/vis.h @@ -0,0 +1,328 @@ +/* + * vis.h + * Copyright (C) 2003 David S. Miller + * + * This file is part of mpeg2dec, a free MPEG-2 video stream decoder. + * See http://libmpeg2.sourceforge.net/ for updates. + * + * mpeg2dec is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * mpeg2dec is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +/* You may be asking why I hard-code the instruction opcodes and don't + * use the normal VIS assembler mnenomics for the VIS instructions. + * + * The reason is that Sun, in their infinite wisdom, decided that a binary + * using a VIS instruction will cause it to be marked (in the ELF headers) + * as doing so, and this prevents the OS from loading such binaries if the + * current cpu doesn't have VIS. There is no way to easily override this + * behavior of the assembler that I am aware of. + * + * This totally defeats what libmpeg2 is trying to do which is allow a + * single binary to be created, and then detect the availability of VIS + * at runtime. + * + * I'm not saying that tainting the binary by default is bad, rather I'm + * saying that not providing a way to override this easily unnecessarily + * ties people's hands. + * + * Thus, we do the opcode encoding by hand and output 32-bit words in + * the assembler to keep the binary from becoming tainted. + */ + +#define vis_opc_base ((0x1 << 31) | (0x36 << 19)) +#define vis_opf(X) ((X) << 5) +#define vis_sreg(X) (X) +#define vis_dreg(X) (((X)&0x1f)|((X)>>5)) +#define vis_rs1_s(X) (vis_sreg(X) << 14) +#define vis_rs1_d(X) (vis_dreg(X) << 14) +#define vis_rs2_s(X) (vis_sreg(X) << 0) +#define vis_rs2_d(X) (vis_dreg(X) << 0) +#define vis_rd_s(X) (vis_sreg(X) << 25) +#define vis_rd_d(X) (vis_dreg(X) << 25) + +#define vis_ss2s(opf,rs1,rs2,rd) \ + __asm__ __volatile__ (".word %0" \ + : : "i" (vis_opc_base | vis_opf(opf) | \ + vis_rs1_s(rs1) | \ + vis_rs2_s(rs2) | \ + vis_rd_s(rd))) + +#define vis_dd2d(opf,rs1,rs2,rd) \ + __asm__ __volatile__ (".word %0" \ + : : "i" (vis_opc_base | vis_opf(opf) | \ + vis_rs1_d(rs1) | \ + vis_rs2_d(rs2) | \ + vis_rd_d(rd))) + +#define vis_ss2d(opf,rs1,rs2,rd) \ + __asm__ __volatile__ (".word %0" \ + : : "i" (vis_opc_base | vis_opf(opf) | \ + vis_rs1_s(rs1) | \ + vis_rs2_s(rs2) | \ + vis_rd_d(rd))) + +#define vis_sd2d(opf,rs1,rs2,rd) \ + __asm__ __volatile__ (".word %0" \ + : : "i" (vis_opc_base | vis_opf(opf) | \ + vis_rs1_s(rs1) | \ + vis_rs2_d(rs2) | \ + vis_rd_d(rd))) + +#define vis_d2s(opf,rs2,rd) \ + __asm__ __volatile__ (".word %0" \ + : : "i" (vis_opc_base | vis_opf(opf) | \ + vis_rs2_d(rs2) | \ + vis_rd_s(rd))) + +#define vis_s2d(opf,rs2,rd) \ + __asm__ __volatile__ (".word %0" \ + : : "i" (vis_opc_base | vis_opf(opf) | \ + vis_rs2_s(rs2) | \ + vis_rd_d(rd))) + +#define vis_d12d(opf,rs1,rd) \ + __asm__ __volatile__ (".word %0" \ + : : "i" (vis_opc_base | vis_opf(opf) | \ + vis_rs1_d(rs1) | \ + vis_rd_d(rd))) + +#define vis_d22d(opf,rs2,rd) \ + __asm__ __volatile__ (".word %0" \ + : : "i" (vis_opc_base | vis_opf(opf) | \ + vis_rs2_d(rs2) | \ + vis_rd_d(rd))) + +#define vis_s12s(opf,rs1,rd) \ + __asm__ __volatile__ (".word %0" \ + : : "i" (vis_opc_base | vis_opf(opf) | \ + vis_rs1_s(rs1) | \ + vis_rd_s(rd))) + +#define vis_s22s(opf,rs2,rd) \ + __asm__ __volatile__ (".word %0" \ + : : "i" (vis_opc_base | vis_opf(opf) | \ + vis_rs2_s(rs2) | \ + vis_rd_s(rd))) + +#define vis_s(opf,rd) \ + __asm__ __volatile__ (".word %0" \ + : : "i" (vis_opc_base | vis_opf(opf) | \ + vis_rd_s(rd))) + +#define vis_d(opf,rd) \ + __asm__ __volatile__ (".word %0" \ + : : "i" (vis_opc_base | vis_opf(opf) | \ + vis_rd_d(rd))) + +#define vis_r2m(op,rd,mem) \ + __asm__ __volatile__ (#op "\t%%f" #rd ", [%0]" : : "r" (&(mem)) ) + +#define vis_r2m_2(op,rd,mem1,mem2) \ + __asm__ __volatile__ (#op "\t%%f" #rd ", [%0 + %1]" : : "r" (mem1), "r" (mem2) ) + +#define vis_m2r(op,mem,rd) \ + __asm__ __volatile__ (#op "\t[%0], %%f" #rd : : "r" (&(mem)) ) + +#define vis_m2r_2(op,mem1,mem2,rd) \ + __asm__ __volatile__ (#op "\t[%0 + %1], %%f" #rd : : "r" (mem1), "r" (mem2) ) + +static inline void vis_set_gsr(unsigned int _val) +{ + register unsigned int val asm("g1"); + + val = _val; + __asm__ __volatile__(".word 0xa7804000" + : : "r" (val)); +} + +#define VIS_GSR_ALIGNADDR_MASK 0x0000007 +#define VIS_GSR_ALIGNADDR_SHIFT 0 +#define VIS_GSR_SCALEFACT_MASK 0x0000078 +#define VIS_GSR_SCALEFACT_SHIFT 3 + +#define vis_ld32(mem,rs1) vis_m2r(ld, mem, rs1) +#define vis_ld32_2(mem1,mem2,rs1) vis_m2r_2(ld, mem1, mem2, rs1) +#define vis_st32(rs1,mem) vis_r2m(st, rs1, mem) +#define vis_st32_2(rs1,mem1,mem2) vis_r2m_2(st, rs1, mem1, mem2) +#define vis_ld64(mem,rs1) vis_m2r(ldd, mem, rs1) +#define vis_ld64_2(mem1,mem2,rs1) vis_m2r_2(ldd, mem1, mem2, rs1) +#define vis_st64(rs1,mem) vis_r2m(std, rs1, mem) +#define vis_st64_2(rs1,mem1,mem2) vis_r2m_2(std, rs1, mem1, mem2) + +#define vis_ldblk(mem, rd) \ +do { register void *__mem asm("g1"); \ + __mem = &(mem); \ + __asm__ __volatile__(".word 0xc1985e00 | %1" \ + : \ + : "r" (__mem), \ + "i" (vis_rd_d(rd)) \ + : "memory"); \ +} while (0) + +#define vis_stblk(rd, mem) \ +do { register void *__mem asm("g1"); \ + __mem = &(mem); \ + __asm__ __volatile__(".word 0xc1b85e00 | %1" \ + : \ + : "r" (__mem), \ + "i" (vis_rd_d(rd)) \ + : "memory"); \ +} while (0) + +#define vis_membar_storestore() \ + __asm__ __volatile__(".word 0x8143e008" : : : "memory") + +#define vis_membar_sync() \ + __asm__ __volatile__(".word 0x8143e040" : : : "memory") + +/* 16 and 32 bit partitioned addition and subtraction. The normal + * versions perform 4 16-bit or 2 32-bit additions or subtractions. + * The 's' versions perform 2 16-bit or 2 32-bit additions or + * subtractions. + */ + +#define vis_padd16(rs1,rs2,rd) vis_dd2d(0x50, rs1, rs2, rd) +#define vis_padd16s(rs1,rs2,rd) vis_ss2s(0x51, rs1, rs2, rd) +#define vis_padd32(rs1,rs2,rd) vis_dd2d(0x52, rs1, rs2, rd) +#define vis_padd32s(rs1,rs2,rd) vis_ss2s(0x53, rs1, rs2, rd) +#define vis_psub16(rs1,rs2,rd) vis_dd2d(0x54, rs1, rs2, rd) +#define vis_psub16s(rs1,rs2,rd) vis_ss2s(0x55, rs1, rs2, rd) +#define vis_psub32(rs1,rs2,rd) vis_dd2d(0x56, rs1, rs2, rd) +#define vis_psub32s(rs1,rs2,rd) vis_ss2s(0x57, rs1, rs2, rd) + +/* Pixel formatting instructions. */ + +#define vis_pack16(rs2,rd) vis_d2s( 0x3b, rs2, rd) +#define vis_pack32(rs1,rs2,rd) vis_dd2d(0x3a, rs1, rs2, rd) +#define vis_packfix(rs2,rd) vis_d2s( 0x3d, rs2, rd) +#define vis_expand(rs2,rd) vis_s2d( 0x4d, rs2, rd) +#define vis_pmerge(rs1,rs2,rd) vis_ss2d(0x4b, rs1, rs2, rd) + +/* Partitioned multiply instructions. */ + +#define vis_mul8x16(rs1,rs2,rd) vis_sd2d(0x31, rs1, rs2, rd) +#define vis_mul8x16au(rs1,rs2,rd) vis_ss2d(0x33, rs1, rs2, rd) +#define vis_mul8x16al(rs1,rs2,rd) vis_ss2d(0x35, rs1, rs2, rd) +#define vis_mul8sux16(rs1,rs2,rd) vis_dd2d(0x36, rs1, rs2, rd) +#define vis_mul8ulx16(rs1,rs2,rd) vis_dd2d(0x37, rs1, rs2, rd) +#define vis_muld8sux16(rs1,rs2,rd) vis_ss2d(0x38, rs1, rs2, rd) +#define vis_muld8ulx16(rs1,rs2,rd) vis_ss2d(0x39, rs1, rs2, rd) + +/* Alignment instructions. */ + +static inline void *vis_alignaddr(void *_ptr) +{ + register void *ptr asm("g1"); + + ptr = _ptr; + + __asm__ __volatile__(".word %2" + : "=&r" (ptr) + : "0" (ptr), + "i" (vis_opc_base | vis_opf(0x18) | + vis_rs1_s(1) | + vis_rs2_s(0) | + vis_rd_s(1))); + + return ptr; +} + +static inline void vis_alignaddr_g0(void *_ptr) +{ + register void *ptr asm("g1"); + + ptr = _ptr; + + __asm__ __volatile__(".word %2" + : "=&r" (ptr) + : "0" (ptr), + "i" (vis_opc_base | vis_opf(0x18) | + vis_rs1_s(1) | + vis_rs2_s(0) | + vis_rd_s(0))); +} + +static inline void *vis_alignaddrl(void *_ptr) +{ + register void *ptr asm("g1"); + + ptr = _ptr; + + __asm__ __volatile__(".word %2" + : "=&r" (ptr) + : "0" (ptr), + "i" (vis_opc_base | vis_opf(0x19) | + vis_rs1_s(1) | + vis_rs2_s(0) | + vis_rd_s(1))); + + return ptr; +} + +static inline void vis_alignaddrl_g0(void *_ptr) +{ + register void *ptr asm("g1"); + + ptr = _ptr; + + __asm__ __volatile__(".word %2" + : "=&r" (ptr) + : "0" (ptr), + "i" (vis_opc_base | vis_opf(0x19) | + vis_rs1_s(1) | + vis_rs2_s(0) | + vis_rd_s(0))); +} + +#define vis_faligndata(rs1,rs2,rd) vis_dd2d(0x48, rs1, rs2, rd) + +/* Logical operate instructions. */ + +#define vis_fzero(rd) vis_d( 0x60, rd) +#define vis_fzeros(rd) vis_s( 0x61, rd) +#define vis_fone(rd) vis_d( 0x7e, rd) +#define vis_fones(rd) vis_s( 0x7f, rd) +#define vis_src1(rs1,rd) vis_d12d(0x74, rs1, rd) +#define vis_src1s(rs1,rd) vis_s12s(0x75, rs1, rd) +#define vis_src2(rs2,rd) vis_d22d(0x78, rs2, rd) +#define vis_src2s(rs2,rd) vis_s22s(0x79, rs2, rd) +#define vis_not1(rs1,rd) vis_d12d(0x6a, rs1, rd) +#define vis_not1s(rs1,rd) vis_s12s(0x6b, rs1, rd) +#define vis_not2(rs2,rd) vis_d22d(0x66, rs2, rd) +#define vis_not2s(rs2,rd) vis_s22s(0x67, rs2, rd) +#define vis_or(rs1,rs2,rd) vis_dd2d(0x7c, rs1, rs2, rd) +#define vis_ors(rs1,rs2,rd) vis_ss2s(0x7d, rs1, rs2, rd) +#define vis_nor(rs1,rs2,rd) vis_dd2d(0x62, rs1, rs2, rd) +#define vis_nors(rs1,rs2,rd) vis_ss2s(0x63, rs1, rs2, rd) +#define vis_and(rs1,rs2,rd) vis_dd2d(0x70, rs1, rs2, rd) +#define vis_ands(rs1,rs2,rd) vis_ss2s(0x71, rs1, rs2, rd) +#define vis_nand(rs1,rs2,rd) vis_dd2d(0x6e, rs1, rs2, rd) +#define vis_nands(rs1,rs2,rd) vis_ss2s(0x6f, rs1, rs2, rd) +#define vis_xor(rs1,rs2,rd) vis_dd2d(0x6c, rs1, rs2, rd) +#define vis_xors(rs1,rs2,rd) vis_ss2s(0x6d, rs1, rs2, rd) +#define vis_xnor(rs1,rs2,rd) vis_dd2d(0x72, rs1, rs2, rd) +#define vis_xnors(rs1,rs2,rd) vis_ss2s(0x73, rs1, rs2, rd) +#define vis_ornot1(rs1,rs2,rd) vis_dd2d(0x7a, rs1, rs2, rd) +#define vis_ornot1s(rs1,rs2,rd) vis_ss2s(0x7b, rs1, rs2, rd) +#define vis_ornot2(rs1,rs2,rd) vis_dd2d(0x76, rs1, rs2, rd) +#define vis_ornot2s(rs1,rs2,rd) vis_ss2s(0x77, rs1, rs2, rd) +#define vis_andnot1(rs1,rs2,rd) vis_dd2d(0x68, rs1, rs2, rd) +#define vis_andnot1s(rs1,rs2,rd) vis_ss2s(0x69, rs1, rs2, rd) +#define vis_andnot2(rs1,rs2,rd) vis_dd2d(0x64, rs1, rs2, rd) +#define vis_andnot2s(rs1,rs2,rd) vis_ss2s(0x65, rs1, rs2, rd) + +/* Pixel component distance. */ + +#define vis_pdist(rs1,rs2,rd) vis_dd2d(0x3e, rs1, rs2, rd) diff --git a/src/video_dec/libmpeg2/vlc.h b/src/video_dec/libmpeg2/vlc.h new file mode 100644 index 000000000..65de9a840 --- /dev/null +++ b/src/video_dec/libmpeg2/vlc.h @@ -0,0 +1,428 @@ +/* + * vlc.h + * Copyright (C) 2000-2002 Michel Lespinasse + * Copyright (C) 1999-2000 Aaron Holtzman + * + * This file is part of mpeg2dec, a free MPEG-2 video stream decoder. + * See http://libmpeg2.sourceforge.net/ for updates. + * + * mpeg2dec is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * mpeg2dec is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +#define GETWORD(bit_buf,shift,bit_ptr) \ +do { \ + bit_buf |= ((bit_ptr[0] << 8) | bit_ptr[1]) << (shift); \ + bit_ptr += 2; \ +} while (0) + +static inline void bitstream_init (picture_t * picture, uint8_t * start) +{ + picture->bitstream_buf = + (start[0] << 24) | (start[1] << 16) | (start[2] << 8) | start[3]; + picture->bitstream_ptr = start + 4; + picture->bitstream_bits = -16; +} + +/* make sure that there are at least 16 valid bits in bit_buf */ +#define NEEDBITS(bit_buf,bits,bit_ptr) \ +do { \ + if (bits > 0) { \ + GETWORD (bit_buf, bits, bit_ptr); \ + bits -= 16; \ + } \ +} while (0) + +/* remove num valid bits from bit_buf */ +#define DUMPBITS(bit_buf,bits,num) \ +do { \ + bit_buf <<= (num); \ + bits += (num); \ +} while (0) + +/* take num bits from the high part of bit_buf and zero extend them */ +#define UBITS(bit_buf,num) (((uint32_t)(bit_buf)) >> (32 - (num))) + +/* take num bits from the high part of bit_buf and sign extend them */ +#define SBITS(bit_buf,num) (((int32_t)(bit_buf)) >> (32 - (num))) + +typedef struct { + uint8_t modes; + uint8_t len; +} MBtab; + +typedef struct { + uint8_t delta; + uint8_t len; +} MVtab; + +typedef struct { + int8_t dmv; + uint8_t len; +} DMVtab; + +typedef struct { + uint8_t cbp; + uint8_t len; +} CBPtab; + +typedef struct { + uint8_t size; + uint8_t len; +} DCtab; + +typedef struct { + uint8_t run; + uint8_t level; + uint8_t len; +} DCTtab; + +typedef struct { + uint8_t mba; + uint8_t len; +} MBAtab; + + +#define INTRA MACROBLOCK_INTRA +#define QUANT MACROBLOCK_QUANT + +static const MBtab MB_I [] = { + {INTRA|QUANT, 2}, {INTRA, 1} +}; + +#define MC MACROBLOCK_MOTION_FORWARD +#define CODED MACROBLOCK_PATTERN + +static const MBtab MB_P [] = { + {INTRA|QUANT, 6}, {CODED|QUANT, 5}, {MC|CODED|QUANT, 5}, {INTRA, 5}, + {MC, 3}, {MC, 3}, {MC, 3}, {MC, 3}, + {CODED, 2}, {CODED, 2}, {CODED, 2}, {CODED, 2}, + {CODED, 2}, {CODED, 2}, {CODED, 2}, {CODED, 2}, + {MC|CODED, 1}, {MC|CODED, 1}, {MC|CODED, 1}, {MC|CODED, 1}, + {MC|CODED, 1}, {MC|CODED, 1}, {MC|CODED, 1}, {MC|CODED, 1}, + {MC|CODED, 1}, {MC|CODED, 1}, {MC|CODED, 1}, {MC|CODED, 1}, + {MC|CODED, 1}, {MC|CODED, 1}, {MC|CODED, 1}, {MC|CODED, 1} +}; + +#define FWD MACROBLOCK_MOTION_FORWARD +#define BWD MACROBLOCK_MOTION_BACKWARD +#define INTER MACROBLOCK_MOTION_FORWARD|MACROBLOCK_MOTION_BACKWARD + +static const MBtab MB_B [] = { + {0, 0}, {INTRA|QUANT, 6}, + {BWD|CODED|QUANT, 6}, {FWD|CODED|QUANT, 6}, + {INTER|CODED|QUANT, 5}, {INTER|CODED|QUANT, 5}, + {INTRA, 5}, {INTRA, 5}, + {FWD, 4}, {FWD, 4}, {FWD, 4}, {FWD, 4}, + {FWD|CODED, 4}, {FWD|CODED, 4}, {FWD|CODED, 4}, {FWD|CODED, 4}, + {BWD, 3}, {BWD, 3}, {BWD, 3}, {BWD, 3}, + {BWD, 3}, {BWD, 3}, {BWD, 3}, {BWD, 3}, + {BWD|CODED, 3}, {BWD|CODED, 3}, {BWD|CODED, 3}, {BWD|CODED, 3}, + {BWD|CODED, 3}, {BWD|CODED, 3}, {BWD|CODED, 3}, {BWD|CODED, 3}, + {INTER, 2}, {INTER, 2}, {INTER, 2}, {INTER, 2}, + {INTER, 2}, {INTER, 2}, {INTER, 2}, {INTER, 2}, + {INTER, 2}, {INTER, 2}, {INTER, 2}, {INTER, 2}, + {INTER, 2}, {INTER, 2}, {INTER, 2}, {INTER, 2}, + {INTER|CODED, 2}, {INTER|CODED, 2}, {INTER|CODED, 2}, {INTER|CODED, 2}, + {INTER|CODED, 2}, {INTER|CODED, 2}, {INTER|CODED, 2}, {INTER|CODED, 2}, + {INTER|CODED, 2}, {INTER|CODED, 2}, {INTER|CODED, 2}, {INTER|CODED, 2}, + {INTER|CODED, 2}, {INTER|CODED, 2}, {INTER|CODED, 2}, {INTER|CODED, 2} +}; + +#undef INTRA +#undef QUANT +#undef MC +#undef CODED +#undef FWD +#undef BWD +#undef INTER + + +static const MVtab MV_4 [] = { + { 3, 6}, { 2, 4}, { 1, 3}, { 1, 3}, { 0, 2}, { 0, 2}, { 0, 2}, { 0, 2} +}; + +static const MVtab MV_10 [] = { + { 0,10}, { 0,10}, { 0,10}, { 0,10}, { 0,10}, { 0,10}, { 0,10}, { 0,10}, + { 0,10}, { 0,10}, { 0,10}, { 0,10}, {15,10}, {14,10}, {13,10}, {12,10}, + {11,10}, {10,10}, { 9, 9}, { 9, 9}, { 8, 9}, { 8, 9}, { 7, 9}, { 7, 9}, + { 6, 7}, { 6, 7}, { 6, 7}, { 6, 7}, { 6, 7}, { 6, 7}, { 6, 7}, { 6, 7}, + { 5, 7}, { 5, 7}, { 5, 7}, { 5, 7}, { 5, 7}, { 5, 7}, { 5, 7}, { 5, 7}, + { 4, 7}, { 4, 7}, { 4, 7}, { 4, 7}, { 4, 7}, { 4, 7}, { 4, 7}, { 4, 7} +}; + + +static const DMVtab DMV_2 [] = { + { 0, 1}, { 0, 1}, { 1, 2}, {-1, 2} +}; + + +static const CBPtab CBP_7 [] = { + {0x22, 7}, {0x12, 7}, {0x0a, 7}, {0x06, 7}, + {0x21, 7}, {0x11, 7}, {0x09, 7}, {0x05, 7}, + {0x3f, 6}, {0x3f, 6}, {0x03, 6}, {0x03, 6}, + {0x24, 6}, {0x24, 6}, {0x18, 6}, {0x18, 6}, + {0x3e, 5}, {0x3e, 5}, {0x3e, 5}, {0x3e, 5}, + {0x02, 5}, {0x02, 5}, {0x02, 5}, {0x02, 5}, + {0x3d, 5}, {0x3d, 5}, {0x3d, 5}, {0x3d, 5}, + {0x01, 5}, {0x01, 5}, {0x01, 5}, {0x01, 5}, + {0x38, 5}, {0x38, 5}, {0x38, 5}, {0x38, 5}, + {0x34, 5}, {0x34, 5}, {0x34, 5}, {0x34, 5}, + {0x2c, 5}, {0x2c, 5}, {0x2c, 5}, {0x2c, 5}, + {0x1c, 5}, {0x1c, 5}, {0x1c, 5}, {0x1c, 5}, + {0x28, 5}, {0x28, 5}, {0x28, 5}, {0x28, 5}, + {0x14, 5}, {0x14, 5}, {0x14, 5}, {0x14, 5}, + {0x30, 5}, {0x30, 5}, {0x30, 5}, {0x30, 5}, + {0x0c, 5}, {0x0c, 5}, {0x0c, 5}, {0x0c, 5}, + {0x20, 4}, {0x20, 4}, {0x20, 4}, {0x20, 4}, + {0x20, 4}, {0x20, 4}, {0x20, 4}, {0x20, 4}, + {0x10, 4}, {0x10, 4}, {0x10, 4}, {0x10, 4}, + {0x10, 4}, {0x10, 4}, {0x10, 4}, {0x10, 4}, + {0x08, 4}, {0x08, 4}, {0x08, 4}, {0x08, 4}, + {0x08, 4}, {0x08, 4}, {0x08, 4}, {0x08, 4}, + {0x04, 4}, {0x04, 4}, {0x04, 4}, {0x04, 4}, + {0x04, 4}, {0x04, 4}, {0x04, 4}, {0x04, 4}, + {0x3c, 3}, {0x3c, 3}, {0x3c, 3}, {0x3c, 3}, + {0x3c, 3}, {0x3c, 3}, {0x3c, 3}, {0x3c, 3}, + {0x3c, 3}, {0x3c, 3}, {0x3c, 3}, {0x3c, 3}, + {0x3c, 3}, {0x3c, 3}, {0x3c, 3}, {0x3c, 3} +}; + +static const CBPtab CBP_9 [] = { + {0, 0}, {0x00, 9}, {0x27, 9}, {0x1b, 9}, + {0x3b, 9}, {0x37, 9}, {0x2f, 9}, {0x1f, 9}, + {0x3a, 8}, {0x3a, 8}, {0x36, 8}, {0x36, 8}, + {0x2e, 8}, {0x2e, 8}, {0x1e, 8}, {0x1e, 8}, + {0x39, 8}, {0x39, 8}, {0x35, 8}, {0x35, 8}, + {0x2d, 8}, {0x2d, 8}, {0x1d, 8}, {0x1d, 8}, + {0x26, 8}, {0x26, 8}, {0x1a, 8}, {0x1a, 8}, + {0x25, 8}, {0x25, 8}, {0x19, 8}, {0x19, 8}, + {0x2b, 8}, {0x2b, 8}, {0x17, 8}, {0x17, 8}, + {0x33, 8}, {0x33, 8}, {0x0f, 8}, {0x0f, 8}, + {0x2a, 8}, {0x2a, 8}, {0x16, 8}, {0x16, 8}, + {0x32, 8}, {0x32, 8}, {0x0e, 8}, {0x0e, 8}, + {0x29, 8}, {0x29, 8}, {0x15, 8}, {0x15, 8}, + {0x31, 8}, {0x31, 8}, {0x0d, 8}, {0x0d, 8}, + {0x23, 8}, {0x23, 8}, {0x13, 8}, {0x13, 8}, + {0x0b, 8}, {0x0b, 8}, {0x07, 8}, {0x07, 8} +}; + + +static const DCtab DC_lum_5 [] = { + {1, 2}, {1, 2}, {1, 2}, {1, 2}, {1, 2}, {1, 2}, {1, 2}, {1, 2}, + {2, 2}, {2, 2}, {2, 2}, {2, 2}, {2, 2}, {2, 2}, {2, 2}, {2, 2}, + {0, 3}, {0, 3}, {0, 3}, {0, 3}, {3, 3}, {3, 3}, {3, 3}, {3, 3}, + {4, 3}, {4, 3}, {4, 3}, {4, 3}, {5, 4}, {5, 4}, {6, 5} +}; + +static const DCtab DC_chrom_5 [] = { + {0, 2}, {0, 2}, {0, 2}, {0, 2}, {0, 2}, {0, 2}, {0, 2}, {0, 2}, + {1, 2}, {1, 2}, {1, 2}, {1, 2}, {1, 2}, {1, 2}, {1, 2}, {1, 2}, + {2, 2}, {2, 2}, {2, 2}, {2, 2}, {2, 2}, {2, 2}, {2, 2}, {2, 2}, + {3, 3}, {3, 3}, {3, 3}, {3, 3}, {4, 4}, {4, 4}, {5, 5} +}; + +static const DCtab DC_long [] = { + {6, 5}, {6, 5}, {6, 5}, {6, 5}, {6, 5}, {6, 5}, { 6, 5}, { 6, 5}, + {6, 5}, {6, 5}, {6, 5}, {6, 5}, {6, 5}, {6, 5}, { 6, 5}, { 6, 5}, + {7, 6}, {7, 6}, {7, 6}, {7, 6}, {7, 6}, {7, 6}, { 7, 6}, { 7, 6}, + {8, 7}, {8, 7}, {8, 7}, {8, 7}, {9, 8}, {9, 8}, {10, 9}, {11, 9} +}; + + +static const DCTtab DCT_16 [] = { + {129, 0, 0}, {129, 0, 0}, {129, 0, 0}, {129, 0, 0}, + {129, 0, 0}, {129, 0, 0}, {129, 0, 0}, {129, 0, 0}, + {129, 0, 0}, {129, 0, 0}, {129, 0, 0}, {129, 0, 0}, + {129, 0, 0}, {129, 0, 0}, {129, 0, 0}, {129, 0, 0}, + { 2,18, 0}, { 2,17, 0}, { 2,16, 0}, { 2,15, 0}, + { 7, 3, 0}, { 17, 2, 0}, { 16, 2, 0}, { 15, 2, 0}, + { 14, 2, 0}, { 13, 2, 0}, { 12, 2, 0}, { 32, 1, 0}, + { 31, 1, 0}, { 30, 1, 0}, { 29, 1, 0}, { 28, 1, 0} +}; + +static const DCTtab DCT_15 [] = { + { 1,40,15}, { 1,39,15}, { 1,38,15}, { 1,37,15}, + { 1,36,15}, { 1,35,15}, { 1,34,15}, { 1,33,15}, + { 1,32,15}, { 2,14,15}, { 2,13,15}, { 2,12,15}, + { 2,11,15}, { 2,10,15}, { 2, 9,15}, { 2, 8,15}, + { 1,31,14}, { 1,31,14}, { 1,30,14}, { 1,30,14}, + { 1,29,14}, { 1,29,14}, { 1,28,14}, { 1,28,14}, + { 1,27,14}, { 1,27,14}, { 1,26,14}, { 1,26,14}, + { 1,25,14}, { 1,25,14}, { 1,24,14}, { 1,24,14}, + { 1,23,14}, { 1,23,14}, { 1,22,14}, { 1,22,14}, + { 1,21,14}, { 1,21,14}, { 1,20,14}, { 1,20,14}, + { 1,19,14}, { 1,19,14}, { 1,18,14}, { 1,18,14}, + { 1,17,14}, { 1,17,14}, { 1,16,14}, { 1,16,14} +}; + +static const DCTtab DCT_13 [] = { + { 11, 2,13}, { 10, 2,13}, { 6, 3,13}, { 4, 4,13}, + { 3, 5,13}, { 2, 7,13}, { 2, 6,13}, { 1,15,13}, + { 1,14,13}, { 1,13,13}, { 1,12,13}, { 27, 1,13}, + { 26, 1,13}, { 25, 1,13}, { 24, 1,13}, { 23, 1,13}, + { 1,11,12}, { 1,11,12}, { 9, 2,12}, { 9, 2,12}, + { 5, 3,12}, { 5, 3,12}, { 1,10,12}, { 1,10,12}, + { 3, 4,12}, { 3, 4,12}, { 8, 2,12}, { 8, 2,12}, + { 22, 1,12}, { 22, 1,12}, { 21, 1,12}, { 21, 1,12}, + { 1, 9,12}, { 1, 9,12}, { 20, 1,12}, { 20, 1,12}, + { 19, 1,12}, { 19, 1,12}, { 2, 5,12}, { 2, 5,12}, + { 4, 3,12}, { 4, 3,12}, { 1, 8,12}, { 1, 8,12}, + { 7, 2,12}, { 7, 2,12}, { 18, 1,12}, { 18, 1,12} +}; + +static const DCTtab DCT_B14_10 [] = { + { 17, 1,10}, { 6, 2,10}, { 1, 7,10}, { 3, 3,10}, + { 2, 4,10}, { 16, 1,10}, { 15, 1,10}, { 5, 2,10} +}; + +static const DCTtab DCT_B14_8 [] = { + { 65, 0, 6}, { 65, 0, 6}, { 65, 0, 6}, { 65, 0, 6}, + { 3, 2, 7}, { 3, 2, 7}, { 10, 1, 7}, { 10, 1, 7}, + { 1, 4, 7}, { 1, 4, 7}, { 9, 1, 7}, { 9, 1, 7}, + { 8, 1, 6}, { 8, 1, 6}, { 8, 1, 6}, { 8, 1, 6}, + { 7, 1, 6}, { 7, 1, 6}, { 7, 1, 6}, { 7, 1, 6}, + { 2, 2, 6}, { 2, 2, 6}, { 2, 2, 6}, { 2, 2, 6}, + { 6, 1, 6}, { 6, 1, 6}, { 6, 1, 6}, { 6, 1, 6}, + { 14, 1, 8}, { 1, 6, 8}, { 13, 1, 8}, { 12, 1, 8}, + { 4, 2, 8}, { 2, 3, 8}, { 1, 5, 8}, { 11, 1, 8} +}; + +static const DCTtab DCT_B14AC_5 [] = { + { 1, 3, 5}, { 5, 1, 5}, { 4, 1, 5}, + { 1, 2, 4}, { 1, 2, 4}, { 3, 1, 4}, { 3, 1, 4}, + { 2, 1, 3}, { 2, 1, 3}, { 2, 1, 3}, { 2, 1, 3}, + {129, 0, 2}, {129, 0, 2}, {129, 0, 2}, {129, 0, 2}, + {129, 0, 2}, {129, 0, 2}, {129, 0, 2}, {129, 0, 2}, + { 1, 1, 2}, { 1, 1, 2}, { 1, 1, 2}, { 1, 1, 2}, + { 1, 1, 2}, { 1, 1, 2}, { 1, 1, 2}, { 1, 1, 2} +}; + +static const DCTtab DCT_B14DC_5 [] = { + { 1, 3, 5}, { 5, 1, 5}, { 4, 1, 5}, + { 1, 2, 4}, { 1, 2, 4}, { 3, 1, 4}, { 3, 1, 4}, + { 2, 1, 3}, { 2, 1, 3}, { 2, 1, 3}, { 2, 1, 3}, + { 1, 1, 1}, { 1, 1, 1}, { 1, 1, 1}, { 1, 1, 1}, + { 1, 1, 1}, { 1, 1, 1}, { 1, 1, 1}, { 1, 1, 1}, + { 1, 1, 1}, { 1, 1, 1}, { 1, 1, 1}, { 1, 1, 1}, + { 1, 1, 1}, { 1, 1, 1}, { 1, 1, 1}, { 1, 1, 1} +}; + +static const DCTtab DCT_B15_10 [] = { + { 6, 2, 9}, { 6, 2, 9}, { 15, 1, 9}, { 15, 1, 9}, + { 3, 4,10}, { 17, 1,10}, { 16, 1, 9}, { 16, 1, 9} +}; + +static const DCTtab DCT_B15_8 [] = { + { 65, 0, 6}, { 65, 0, 6}, { 65, 0, 6}, { 65, 0, 6}, + { 8, 1, 7}, { 8, 1, 7}, { 9, 1, 7}, { 9, 1, 7}, + { 7, 1, 7}, { 7, 1, 7}, { 3, 2, 7}, { 3, 2, 7}, + { 1, 7, 6}, { 1, 7, 6}, { 1, 7, 6}, { 1, 7, 6}, + { 1, 6, 6}, { 1, 6, 6}, { 1, 6, 6}, { 1, 6, 6}, + { 5, 1, 6}, { 5, 1, 6}, { 5, 1, 6}, { 5, 1, 6}, + { 6, 1, 6}, { 6, 1, 6}, { 6, 1, 6}, { 6, 1, 6}, + { 2, 5, 8}, { 12, 1, 8}, { 1,11, 8}, { 1,10, 8}, + { 14, 1, 8}, { 13, 1, 8}, { 4, 2, 8}, { 2, 4, 8}, + { 3, 1, 5}, { 3, 1, 5}, { 3, 1, 5}, { 3, 1, 5}, + { 3, 1, 5}, { 3, 1, 5}, { 3, 1, 5}, { 3, 1, 5}, + { 2, 2, 5}, { 2, 2, 5}, { 2, 2, 5}, { 2, 2, 5}, + { 2, 2, 5}, { 2, 2, 5}, { 2, 2, 5}, { 2, 2, 5}, + { 4, 1, 5}, { 4, 1, 5}, { 4, 1, 5}, { 4, 1, 5}, + { 4, 1, 5}, { 4, 1, 5}, { 4, 1, 5}, { 4, 1, 5}, + { 2, 1, 3}, { 2, 1, 3}, { 2, 1, 3}, { 2, 1, 3}, + { 2, 1, 3}, { 2, 1, 3}, { 2, 1, 3}, { 2, 1, 3}, + { 2, 1, 3}, { 2, 1, 3}, { 2, 1, 3}, { 2, 1, 3}, + { 2, 1, 3}, { 2, 1, 3}, { 2, 1, 3}, { 2, 1, 3}, + { 2, 1, 3}, { 2, 1, 3}, { 2, 1, 3}, { 2, 1, 3}, + { 2, 1, 3}, { 2, 1, 3}, { 2, 1, 3}, { 2, 1, 3}, + { 2, 1, 3}, { 2, 1, 3}, { 2, 1, 3}, { 2, 1, 3}, + { 2, 1, 3}, { 2, 1, 3}, { 2, 1, 3}, { 2, 1, 3}, + {129, 0, 4}, {129, 0, 4}, {129, 0, 4}, {129, 0, 4}, + {129, 0, 4}, {129, 0, 4}, {129, 0, 4}, {129, 0, 4}, + {129, 0, 4}, {129, 0, 4}, {129, 0, 4}, {129, 0, 4}, + {129, 0, 4}, {129, 0, 4}, {129, 0, 4}, {129, 0, 4}, + { 1, 3, 4}, { 1, 3, 4}, { 1, 3, 4}, { 1, 3, 4}, + { 1, 3, 4}, { 1, 3, 4}, { 1, 3, 4}, { 1, 3, 4}, + { 1, 3, 4}, { 1, 3, 4}, { 1, 3, 4}, { 1, 3, 4}, + { 1, 3, 4}, { 1, 3, 4}, { 1, 3, 4}, { 1, 3, 4}, + { 1, 1, 2}, { 1, 1, 2}, { 1, 1, 2}, { 1, 1, 2}, + { 1, 1, 2}, { 1, 1, 2}, { 1, 1, 2}, { 1, 1, 2}, + { 1, 1, 2}, { 1, 1, 2}, { 1, 1, 2}, { 1, 1, 2}, + { 1, 1, 2}, { 1, 1, 2}, { 1, 1, 2}, { 1, 1, 2}, + { 1, 1, 2}, { 1, 1, 2}, { 1, 1, 2}, { 1, 1, 2}, + { 1, 1, 2}, { 1, 1, 2}, { 1, 1, 2}, { 1, 1, 2}, + { 1, 1, 2}, { 1, 1, 2}, { 1, 1, 2}, { 1, 1, 2}, + { 1, 1, 2}, { 1, 1, 2}, { 1, 1, 2}, { 1, 1, 2}, + { 1, 1, 2}, { 1, 1, 2}, { 1, 1, 2}, { 1, 1, 2}, + { 1, 1, 2}, { 1, 1, 2}, { 1, 1, 2}, { 1, 1, 2}, + { 1, 1, 2}, { 1, 1, 2}, { 1, 1, 2}, { 1, 1, 2}, + { 1, 1, 2}, { 1, 1, 2}, { 1, 1, 2}, { 1, 1, 2}, + { 1, 1, 2}, { 1, 1, 2}, { 1, 1, 2}, { 1, 1, 2}, + { 1, 1, 2}, { 1, 1, 2}, { 1, 1, 2}, { 1, 1, 2}, + { 1, 1, 2}, { 1, 1, 2}, { 1, 1, 2}, { 1, 1, 2}, + { 1, 1, 2}, { 1, 1, 2}, { 1, 1, 2}, { 1, 1, 2}, + { 1, 2, 3}, { 1, 2, 3}, { 1, 2, 3}, { 1, 2, 3}, + { 1, 2, 3}, { 1, 2, 3}, { 1, 2, 3}, { 1, 2, 3}, + { 1, 2, 3}, { 1, 2, 3}, { 1, 2, 3}, { 1, 2, 3}, + { 1, 2, 3}, { 1, 2, 3}, { 1, 2, 3}, { 1, 2, 3}, + { 1, 2, 3}, { 1, 2, 3}, { 1, 2, 3}, { 1, 2, 3}, + { 1, 2, 3}, { 1, 2, 3}, { 1, 2, 3}, { 1, 2, 3}, + { 1, 2, 3}, { 1, 2, 3}, { 1, 2, 3}, { 1, 2, 3}, + { 1, 2, 3}, { 1, 2, 3}, { 1, 2, 3}, { 1, 2, 3}, + { 1, 4, 5}, { 1, 4, 5}, { 1, 4, 5}, { 1, 4, 5}, + { 1, 4, 5}, { 1, 4, 5}, { 1, 4, 5}, { 1, 4, 5}, + { 1, 5, 5}, { 1, 5, 5}, { 1, 5, 5}, { 1, 5, 5}, + { 1, 5, 5}, { 1, 5, 5}, { 1, 5, 5}, { 1, 5, 5}, + { 10, 1, 7}, { 10, 1, 7}, { 2, 3, 7}, { 2, 3, 7}, + { 11, 1, 7}, { 11, 1, 7}, { 1, 8, 7}, { 1, 8, 7}, + { 1, 9, 7}, { 1, 9, 7}, { 1,12, 8}, { 1,13, 8}, + { 3, 3, 8}, { 5, 2, 8}, { 1,14, 8}, { 1,15, 8} +}; + + +static const MBAtab MBA_5 [] = { + {6, 5}, {5, 5}, {4, 4}, {4, 4}, {3, 4}, {3, 4}, + {2, 3}, {2, 3}, {2, 3}, {2, 3}, {1, 3}, {1, 3}, {1, 3}, {1, 3}, + {0, 1}, {0, 1}, {0, 1}, {0, 1}, {0, 1}, {0, 1}, {0, 1}, {0, 1}, + {0, 1}, {0, 1}, {0, 1}, {0, 1}, {0, 1}, {0, 1}, {0, 1}, {0, 1} +}; + +static const MBAtab MBA_11 [] = { + {32, 11}, {31, 11}, {30, 11}, {29, 11}, + {28, 11}, {27, 11}, {26, 11}, {25, 11}, + {24, 11}, {23, 11}, {22, 11}, {21, 11}, + {20, 10}, {20, 10}, {19, 10}, {19, 10}, + {18, 10}, {18, 10}, {17, 10}, {17, 10}, + {16, 10}, {16, 10}, {15, 10}, {15, 10}, + {14, 8}, {14, 8}, {14, 8}, {14, 8}, + {14, 8}, {14, 8}, {14, 8}, {14, 8}, + {13, 8}, {13, 8}, {13, 8}, {13, 8}, + {13, 8}, {13, 8}, {13, 8}, {13, 8}, + {12, 8}, {12, 8}, {12, 8}, {12, 8}, + {12, 8}, {12, 8}, {12, 8}, {12, 8}, + {11, 8}, {11, 8}, {11, 8}, {11, 8}, + {11, 8}, {11, 8}, {11, 8}, {11, 8}, + {10, 8}, {10, 8}, {10, 8}, {10, 8}, + {10, 8}, {10, 8}, {10, 8}, {10, 8}, + { 9, 8}, { 9, 8}, { 9, 8}, { 9, 8}, + { 9, 8}, { 9, 8}, { 9, 8}, { 9, 8}, + { 8, 7}, { 8, 7}, { 8, 7}, { 8, 7}, + { 8, 7}, { 8, 7}, { 8, 7}, { 8, 7}, + { 8, 7}, { 8, 7}, { 8, 7}, { 8, 7}, + { 8, 7}, { 8, 7}, { 8, 7}, { 8, 7}, + { 7, 7}, { 7, 7}, { 7, 7}, { 7, 7}, + { 7, 7}, { 7, 7}, { 7, 7}, { 7, 7}, + { 7, 7}, { 7, 7}, { 7, 7}, { 7, 7}, + { 7, 7}, { 7, 7}, { 7, 7}, { 7, 7} +}; diff --git a/src/video_dec/libmpeg2/xine_mpeg2_decoder.c b/src/video_dec/libmpeg2/xine_mpeg2_decoder.c new file mode 100644 index 000000000..3a3e28452 --- /dev/null +++ b/src/video_dec/libmpeg2/xine_mpeg2_decoder.c @@ -0,0 +1,169 @@ +/* + * Copyright (C) 2000-2003 the xine project + * + * This file is part of xine, a free video player. + * + * xine is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * xine is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110, USA + * + * stuff needed to turn libmpeg2 into a xine decoder plugin + */ + + +#include +#include +#include +#include +#include +#include + +#define LOG_MODULE "mpeg2_decoder" +#define LOG_VERBOSE +/* +#define LOG +*/ + +#include +#include +#include "mpeg2.h" +#include "mpeg2_internal.h" +#include + +typedef struct { + video_decoder_class_t decoder_class; +} mpeg2_class_t; + + +typedef struct mpeg2dec_decoder_s { + video_decoder_t video_decoder; + mpeg2dec_t mpeg2; + mpeg2_class_t *class; + xine_stream_t *stream; +} mpeg2dec_decoder_t; + +static void mpeg2dec_decode_data (video_decoder_t *this_gen, buf_element_t *buf) { + mpeg2dec_decoder_t *this = (mpeg2dec_decoder_t *) this_gen; + + lprintf ("decode_data, flags=0x%08x ...\n", buf->decoder_flags); + + /* handle aspect hints from xine-dvdnav */ + if (buf->decoder_flags & BUF_FLAG_SPECIAL) { + if (buf->decoder_info[1] == BUF_SPECIAL_ASPECT) { + this->mpeg2.force_aspect = buf->decoder_info[2]; + if (buf->decoder_info[3] == 0x1 && buf->decoder_info[2] == 3) + /* letterboxing is denied, we have to do pan&scan */ + this->mpeg2.force_pan_scan = 1; + else + this->mpeg2.force_pan_scan = 0; + } + return; + } + + if (buf->decoder_flags & BUF_FLAG_PREVIEW) { + mpeg2_find_sequence_header (&this->mpeg2, buf->content, buf->content + buf->size); + } else { + + mpeg2_decode_data (&this->mpeg2, buf->content, buf->content + buf->size, + buf->pts); + } + + lprintf ("decode_data...done\n"); +} + +static void mpeg2dec_flush (video_decoder_t *this_gen) { + mpeg2dec_decoder_t *this = (mpeg2dec_decoder_t *) this_gen; + + lprintf ("flush\n"); + + mpeg2_flush (&this->mpeg2); +} + +static void mpeg2dec_reset (video_decoder_t *this_gen) { + mpeg2dec_decoder_t *this = (mpeg2dec_decoder_t *) this_gen; + + mpeg2_reset (&this->mpeg2); +} + +static void mpeg2dec_discontinuity (video_decoder_t *this_gen) { + mpeg2dec_decoder_t *this = (mpeg2dec_decoder_t *) this_gen; + + mpeg2_discontinuity (&this->mpeg2); +} + +static void mpeg2dec_dispose (video_decoder_t *this_gen) { + + mpeg2dec_decoder_t *this = (mpeg2dec_decoder_t *) this_gen; + + lprintf ("close\n"); + + mpeg2_close (&this->mpeg2); + + this->stream->video_out->close(this->stream->video_out, this->stream); + + free (this); +} + +static video_decoder_t *open_plugin (video_decoder_class_t *class_gen, xine_stream_t *stream) { + mpeg2dec_decoder_t *this ; + + this = (mpeg2dec_decoder_t *) xine_xmalloc (sizeof (mpeg2dec_decoder_t)); + + this->video_decoder.decode_data = mpeg2dec_decode_data; + this->video_decoder.flush = mpeg2dec_flush; + this->video_decoder.reset = mpeg2dec_reset; + this->video_decoder.discontinuity = mpeg2dec_discontinuity; + this->video_decoder.dispose = mpeg2dec_dispose; + this->stream = stream; + this->class = (mpeg2_class_t *) class_gen; + this->mpeg2.stream = stream; + + mpeg2_init (&this->mpeg2, stream->video_out); + (stream->video_out->open) (stream->video_out, stream); + this->mpeg2.force_aspect = this->mpeg2.force_pan_scan = 0; + + return &this->video_decoder; +} + +/* + * mpeg2 plugin class + */ +static void *init_plugin (xine_t *xine, void *data) { + + mpeg2_class_t *this; + + this = (mpeg2_class_t *) xine_xmalloc (sizeof (mpeg2_class_t)); + + this->decoder_class.open_plugin = open_plugin; + this->decoder_class.identifier = "mpeg2dec"; + this->decoder_class.description = N_("mpeg2 based video decoder plugin"); + this->decoder_class.dispose = default_video_decoder_class_dispose; + + return this; +} +/* + * exported plugin catalog entry + */ + +static uint32_t supported_types[] = { BUF_VIDEO_MPEG, 0 }; + +static const decoder_info_t dec_info_mpeg2 = { + supported_types, /* supported types */ + 7 /* priority */ +}; + +const plugin_info_t xine_plugin_info[] EXPORTED = { + /* type, API, "name", version, special_info, init_function */ + { PLUGIN_VIDEO_DECODER, 19, "mpeg2", XINE_VERSION_CODE, &dec_info_mpeg2, init_plugin }, + { PLUGIN_NONE, 0, "", 0, NULL, NULL } +}; diff --git a/src/video_dec/libmpeg2/xvmc.h b/src/video_dec/libmpeg2/xvmc.h new file mode 100644 index 000000000..5d61bcf83 --- /dev/null +++ b/src/video_dec/libmpeg2/xvmc.h @@ -0,0 +1,32 @@ +/* + * mpeg2_internal.h + * Copyright (C) 2000-2002 Michel Lespinasse + * Copyright (C) 1999-2000 Aaron Holtzman + * + * This file is part of mpeg2dec, a free MPEG-2 video stream decoder. + * See http://libmpeg2.sourceforge.net/ for updates. + * + * mpeg2dec is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * mpeg2dec is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +#ifndef _XVMC_H +#include "libmpeg2_accel.h" + +/* slice_xvmc.c */ + +void mpeg2_xvmc_slice (mpeg2dec_accel_t *accel, picture_t * picture, int code, uint8_t * buffer); +void xvmc_setup_scan_ptable( void ); + +#endif diff --git a/src/video_dec/libmpeg2/xvmc_vld.h b/src/video_dec/libmpeg2/xvmc_vld.h new file mode 100644 index 000000000..561d1789d --- /dev/null +++ b/src/video_dec/libmpeg2/xvmc_vld.h @@ -0,0 +1,32 @@ +/* + * Copyright (c) 2004 The Unichrome project. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify it under + * the terms of the GNU General Public License as published by the Free Software + * Foundation; either version 2, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTIES OR REPRESENTATIONS; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * See the GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License along with + * this program; if not, write to the Free Software Foundation, Inc., + * 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. + * + * + */ + +#ifndef _XVMC_VLD_H +#define _XVMC_VLD_H + +#include "accel_xvmc.h" +#include "xvmc.h" + +extern void mpeg2_xxmc_slice( mpeg2dec_accel_t *accel, picture_t *picture, + int code, uint8_t *buffer, uint32_t chunk_size, + uint8_t *chunk_buffer); +extern void mpeg2_xxmc_vld_frame_complete(mpeg2dec_accel_t *accel, picture_t *picture, int code); + + +#endif -- cgit v1.2.3