diff options
author | Daniel Caujolle-Bert <f1rmb@users.sourceforge.net> | 2001-04-18 22:33:39 +0000 |
---|---|---|
committer | Daniel Caujolle-Bert <f1rmb@users.sourceforge.net> | 2001-04-18 22:33:39 +0000 |
commit | db6b7c2e1c52c536a7f9690a410bf69817e0b2c5 (patch) | |
tree | fdaf7537abca3d4875ad21322c54888914ed15a2 /src/libmpeg2 | |
download | xine-lib-db6b7c2e1c52c536a7f9690a410bf69817e0b2c5.tar.gz xine-lib-db6b7c2e1c52c536a7f9690a410bf69817e0b2c5.tar.bz2 |
Initial revision
CVS patchset: 1
CVS date: 2001/04/18 22:33:39
Diffstat (limited to 'src/libmpeg2')
-rw-r--r-- | src/libmpeg2/Makefile.am | 23 | ||||
-rw-r--r-- | src/libmpeg2/decode.c | 323 | ||||
-rw-r--r-- | src/libmpeg2/header.c | 235 | ||||
-rw-r--r-- | src/libmpeg2/idct.c | 290 | ||||
-rw-r--r-- | src/libmpeg2/idct_mlib.c | 47 | ||||
-rw-r--r-- | src/libmpeg2/idct_mlib.h | 25 | ||||
-rw-r--r-- | src/libmpeg2/idct_mmx.c | 705 | ||||
-rw-r--r-- | src/libmpeg2/motion_comp.c | 125 | ||||
-rw-r--r-- | src/libmpeg2/motion_comp_mlib.c | 180 | ||||
-rw-r--r-- | src/libmpeg2/motion_comp_mmx.c | 1017 | ||||
-rw-r--r-- | src/libmpeg2/mpeg2.h | 67 | ||||
-rw-r--r-- | src/libmpeg2/mpeg2_internal.h | 194 | ||||
-rw-r--r-- | src/libmpeg2/slice.c | 1799 | ||||
-rw-r--r-- | src/libmpeg2/stats.c | 315 | ||||
-rw-r--r-- | src/libmpeg2/vlc.h | 425 |
15 files changed, 5770 insertions, 0 deletions
diff --git a/src/libmpeg2/Makefile.am b/src/libmpeg2/Makefile.am new file mode 100644 index 000000000..a7031295c --- /dev/null +++ b/src/libmpeg2/Makefile.am @@ -0,0 +1,23 @@ +CFLAGS = @BUILD_LIB_STATIC@ @LIBMPEG2_CFLAGS@ @GLOBAL_CFLAGS@ + +EXTRA_DIST = idct_mlib.c idct_mlib.h motion_comp_mlib.c + +noinst_LTLIBRARIES = libmpeg2.la + +#libmpeg2_la_SOURCES = slice.c header.c stats.c idct.c motion_comp.c\ +# decode.c idct_mmx.c motion_comp_mmx.c +libmpeg2_la_SOURCES = slice.c header.c stats.c idct.c motion_comp.c\ + decode.c idct_mmx.c motion_comp_mmx.c + +noinst_HEADERS = vlc.h mpeg2.h mpeg2_internal.h + +debug: + $(MAKE) CFLAGS="$(DEBUG_CFLAGS) @BUILD_LIB_STATIC@" + +mostlyclean-generic: + -rm -f *~ \#* .*~ .\#* + +maintainer-clean-generic: + -@echo "This command is intended for maintainers to use;" + -@echo "it deletes files that may require special tools to rebuild." + -rm -f Makefile.in diff --git a/src/libmpeg2/decode.c b/src/libmpeg2/decode.c new file mode 100644 index 000000000..77e198fbf --- /dev/null +++ b/src/libmpeg2/decode.c @@ -0,0 +1,323 @@ +/* + * decode.c + * Copyright (C) 1999-2001 Aaron Holtzman <aholtzma@ess.engr.uvic.ca> + * + * This file is part of mpeg2dec, a free MPEG-2 video stream decoder. + * + * mpeg2dec is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * mpeg2dec is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +#include "config.h" + +#include <stdio.h> +#include <string.h> /* memcpy/memset, try to remove */ +#include <stdlib.h> +#include <inttypes.h> + +/* Xine specific */ +#include "buffer.h" +#include "video_decoder.h" +/* */ + + +#include "video_out.h" +#include "mpeg2.h" +#include "mpeg2_internal.h" +#include "cpu_accel.h" +#include "attributes.h" + +#ifdef HAVE_MEMALIGN +/* some systems have memalign() but no declaration for it */ +void * memalign (size_t align, size_t size); +#else +/* assume malloc alignment is sufficient */ +#define memalign(align,size) malloc (size) +#endif + +#define BUFFER_SIZE (224 * 1024) + +mpeg2_config_t config; + +void mpeg2_init (mpeg2dec_t * mpeg2dec, uint32_t mm_accel, + vo_instance_t * output) +{ + static int do_init = 1; + + if (do_init) { + do_init = 0; + config.flags = mm_accel; + idct_init (); + motion_comp_init (); + } + + mpeg2dec->chunk_buffer = memalign (16, BUFFER_SIZE + 4); + mpeg2dec->picture = memalign (16, sizeof (picture_t)); + + mpeg2dec->shift = 0xffffff00; + mpeg2dec->is_sequence_needed = 1; + mpeg2dec->drop_flag = 0; + mpeg2dec->drop_frame = 0; + mpeg2dec->in_slice = 0; + mpeg2dec->output = output; + mpeg2dec->chunk_ptr = mpeg2dec->chunk_buffer; + mpeg2dec->code = 0xb4; + + memset (mpeg2dec->picture, 0, sizeof (picture_t)); + + /* initialize supstructures */ + header_state_init (mpeg2dec->picture); +} + +static inline int parse_chunk (mpeg2dec_t * mpeg2dec, int code, + uint8_t * buffer, uint32_t pts) +{ + picture_t * picture; + int is_frame_done; + + /* wait for sequence_header_code */ + if (mpeg2dec->is_sequence_needed && (code != 0xb3)) + return 0; + + stats_header (code, buffer); + + picture = mpeg2dec->picture; + is_frame_done = mpeg2dec->in_slice && ((!code) || (code >= 0xb0)); + + if (is_frame_done) { + mpeg2dec->in_slice = 0; + + if (((picture->picture_structure == FRAME_PICTURE) || + (picture->second_field)) && + (!(mpeg2dec->drop_frame))) { + vo_draw ((picture->picture_coding_type == B_TYPE) ? + picture->current_frame : + picture->forward_reference_frame); +#ifdef ARCH_X86 + if (config.flags & MM_ACCEL_X86_MMX) + emms (); +#endif + } + } + + switch (code) { + case 0x00: /* picture_start_code */ + if (header_process_picture_header (picture, buffer)) { + fprintf (stderr, "bad picture header\n"); + exit (1); + } + + if (mpeg2dec->pts) { + picture->current_frame->PTS = mpeg2dec->pts; + mpeg2dec->pts = 0; + } + + mpeg2dec->drop_frame = + mpeg2dec->drop_flag && (picture->picture_coding_type == B_TYPE); + break; + + case 0xb3: /* sequence_header_code */ + if (header_process_sequence_header (picture, buffer)) { + fprintf (stderr, "bad sequence header\n"); + exit (1); + } + if (mpeg2dec->is_sequence_needed) { + mpeg2dec->is_sequence_needed = 0; + if (vo_setup (mpeg2dec->output, picture->coded_picture_width, + picture->coded_picture_height)) { + fprintf (stderr, "display setup failed\n"); + exit (1); + } + picture->forward_reference_frame = + vo_get_frame (mpeg2dec->output, + VO_PREDICTION_FLAG | VO_BOTH_FIELDS); + picture->backward_reference_frame = + vo_get_frame (mpeg2dec->output, + VO_PREDICTION_FLAG | VO_BOTH_FIELDS); + } + mpeg2dec->frame_rate_code = picture->frame_rate_code; /* FIXME */ + break; + + case 0xb5: /* extension_start_code */ + if (header_process_extension (picture, buffer)) { + fprintf (stderr, "bad extension\n"); + exit (1); + } + break; + + default: + if (code >= 0xb9) + fprintf (stderr, "stream not demultiplexed ?\n"); + + if (code >= 0xb0) + break; + + if (!(mpeg2dec->in_slice)) { + mpeg2dec->in_slice = 1; + + if (picture->second_field) + vo_field (picture->current_frame, picture->picture_structure); + /* + else { + if (picture->picture_coding_type == B_TYPE) + picture->current_frame = + vo_get_frame (mpeg2dec->output, + picture->picture_structure); + else { + picture->current_frame = + vo_get_frame (mpeg2dec->output, + (VO_PREDICTION_FLAG | + picture->picture_structure)); + picture->forward_reference_frame = + picture->backward_reference_frame; + picture->backward_reference_frame = picture->current_frame; + } + }*/ + } + + if (!(mpeg2dec->drop_frame)) { + slice_process (picture, code, buffer); + +#ifdef ARCH_X86 + if (config.flags & MM_ACCEL_X86_MMX) + emms (); +#endif + } + } + + return is_frame_done; +} + +static inline uint8_t * copy_chunk (mpeg2dec_t * mpeg2dec, + uint8_t * current, uint8_t * end) +{ + uint32_t shift; + uint8_t * chunk_ptr; + uint8_t * limit; + uint8_t byte; + + shift = mpeg2dec->shift; + chunk_ptr = mpeg2dec->chunk_ptr; + limit = current + (mpeg2dec->chunk_buffer + BUFFER_SIZE - chunk_ptr); + if (limit > end) + limit = end; + + while (1) { + byte = *current++; + if (shift != 0x00000100) { + shift = (shift | byte) << 8; + *chunk_ptr++ = byte; + if (current < limit) + continue; + if (current == end) { + mpeg2dec->chunk_ptr = chunk_ptr; + mpeg2dec->shift = shift; + return NULL; + } else { + /* we filled the chunk buffer without finding a start code */ + mpeg2dec->code = 0xb4; /* sequence_error_code */ + mpeg2dec->chunk_ptr = mpeg2dec->chunk_buffer; + return current; + } + } + mpeg2dec->code = byte; + mpeg2dec->chunk_ptr = mpeg2dec->chunk_buffer; + mpeg2dec->shift = 0xffffff00; + return current; + } +} + +int mpeg2_decode_data (mpeg2dec_t * mpeg2dec, uint8_t * current, + uint8_t * end, uint32_t pts) +{ + int ret; + uint8_t code; + + ret = 0; + + mpeg2dec->pts = pts; + while (current != end) { + code = mpeg2dec->code; + current = copy_chunk (mpeg2dec, current, end); + if (current == NULL) + return ret; + ret += parse_chunk (mpeg2dec, code, mpeg2dec->chunk_buffer, pts); + } + return ret; +} + +void mpeg2_close (mpeg2dec_t * mpeg2dec) +{ + static uint8_t finalizer[] = {0,0,1,0}; + + mpeg2_decode_data (mpeg2dec, finalizer, finalizer+4, mpeg2dec->pts); + + if (! (mpeg2dec->is_sequence_needed)) + vo_draw (mpeg2dec->picture->backward_reference_frame); + + free (mpeg2dec->chunk_buffer); + free (mpeg2dec->picture); +} + +void mpeg2_drop (mpeg2dec_t * mpeg2dec, int flag) +{ + mpeg2dec->drop_flag = flag; +} + +/* + * xine specific stuff + */ + +int mpeg2dec_get_version () { + return 1; +} + +int mpeg2dec_can_handle (int buf_type) { + return (buf_type == BUF_VIDEO_MPEG) ; +} + + +static mpeg2dec_t gMpeg2; + +void mpeg2dec_init (vo_instance_t *video_out) { + uint32_t mmacc = mm_accel(); + + mpeg2_init (&gMpeg2, mmacc, video_out); +} + +void mpeg2dec_decode_data (buf_element_t *buf) { + mpeg2_decode_data (&gMpeg2, buf->content, buf->content + buf->size, + buf->PTS); +} + +void mpeg2dec_release_img_buffers () { + // decode_free_image_buffers (&gMpeg2); +} + +void mpeg2dec_close () { + mpeg2_close (&gMpeg2); +} + +static video_decoder_t vd_mpeg2dec = { + mpeg2dec_get_version, + mpeg2dec_can_handle, + mpeg2dec_init, + mpeg2dec_decode_data, + mpeg2dec_release_img_buffers, + mpeg2dec_close +}; + +video_decoder_t *init_video_decoder_mpeg2dec () { + return &vd_mpeg2dec; +} diff --git a/src/libmpeg2/header.c b/src/libmpeg2/header.c new file mode 100644 index 000000000..e021b2f8e --- /dev/null +++ b/src/libmpeg2/header.c @@ -0,0 +1,235 @@ +/* + * slice.c + * Copyright (C) 1999-2001 Aaron Holtzman <aholtzma@ess.engr.uvic.ca> + * + * This file is part of mpeg2dec, a free MPEG-2 video stream decoder. + * + * mpeg2dec is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * mpeg2dec is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +#include "config.h" + +#include <inttypes.h> + +#include "mpeg2_internal.h" +#include "attributes.h" + +/* default intra quant matrix, in zig-zag order */ +static uint8_t default_intra_quantizer_matrix[64] ATTR_ALIGN(16) = { + 8, + 16, 16, + 19, 16, 19, + 22, 22, 22, 22, + 22, 22, 26, 24, 26, + 27, 27, 27, 26, 26, 26, + 26, 27, 27, 27, 29, 29, 29, + 34, 34, 34, 29, 29, 29, 27, 27, + 29, 29, 32, 32, 34, 34, 37, + 38, 37, 35, 35, 34, 35, + 38, 38, 40, 40, 40, + 48, 48, 46, 46, + 56, 56, 58, + 69, 69, + 83 +}; + +uint8_t scan_norm[64] ATTR_ALIGN(16) = +{ + /* Zig-Zag scan pattern */ + 0, 1, 8,16, 9, 2, 3,10, + 17,24,32,25,18,11, 4, 5, + 12,19,26,33,40,48,41,34, + 27,20,13, 6, 7,14,21,28, + 35,42,49,56,57,50,43,36, + 29,22,15,23,30,37,44,51, + 58,59,52,45,38,31,39,46, + 53,60,61,54,47,55,62,63 +}; + +uint8_t scan_alt[64] ATTR_ALIGN(16) = +{ + /* Alternate scan pattern */ + 0,8,16,24,1,9,2,10,17,25,32,40,48,56,57,49, + 41,33,26,18,3,11,4,12,19,27,34,42,50,58,35,43, + 51,59,20,28,5,13,6,14,21,29,36,44,52,60,37,45, + 53,61,22,30,7,15,23,31,38,46,54,62,39,47,55,63 +}; + +void header_state_init (picture_t * picture) +{ + picture->scan = scan_norm; +} + +int header_process_sequence_header (picture_t * picture, uint8_t * buffer) +{ + int width, height; + int i; + + if ((buffer[6] & 0x20) != 0x20) + return 1; /* missing marker_bit */ + + height = (buffer[0] << 16) | (buffer[1] << 8) | buffer[2]; + + width = ((height >> 12) + 15) & ~15; + height = ((height & 0xfff) + 15) & ~15; + + if ((width > 768) || (height > 576)) + return 1; /* size restrictions for MP@ML or MPEG1 */ + + picture->coded_picture_width = width; + picture->coded_picture_height = height; + + /* this is not used by the decoder */ + picture->aspect_ratio_information = buffer[3] >> 4; + picture->frame_rate_code = buffer[3] & 15; + picture->bitrate = (buffer[4]<<10)|(buffer[5]<<2)|(buffer[6]>>6); + + if (buffer[7] & 2) { + for (i = 0; i < 64; i++) + picture->intra_quantizer_matrix[scan_norm[i]] = + (buffer[i+7] << 7) | (buffer[i+8] >> 1); + buffer += 64; + } else { + for (i = 0; i < 64; i++) + picture->intra_quantizer_matrix[scan_norm[i]] = + default_intra_quantizer_matrix [i]; + } + + if (buffer[7] & 1) { + for (i = 0; i < 64; i++) + picture->non_intra_quantizer_matrix[scan_norm[i]] = + buffer[i+8]; + } else { + for (i = 0; i < 64; i++) + picture->non_intra_quantizer_matrix[i] = 16; + } + + /* MPEG1 - for testing only */ + picture->mpeg1 = 1; + picture->intra_dc_precision = 0; + picture->frame_pred_frame_dct = 1; + picture->q_scale_type = 0; + picture->concealment_motion_vectors = 0; + /* picture->alternate_scan = 0; */ + picture->picture_structure = FRAME_PICTURE; + /* picture->second_field = 0; */ + + return 0; +} + +static int header_process_sequence_extension (picture_t * picture, + uint8_t * buffer) +{ + /* check chroma format, size extensions, marker bit */ + if (((buffer[1] & 0x07) != 0x02) || (buffer[2] & 0xe0) || + ((buffer[3] & 0x01) != 0x01)) + return 1; + + /* this is not used by the decoder */ + picture->progressive_sequence = (buffer[1] >> 3) & 1; + + if (picture->progressive_sequence) + picture->coded_picture_height = + (picture->coded_picture_height + 31) & ~31; + + /* MPEG1 - for testing only */ + picture->mpeg1 = 0; + + return 0; +} + +static int header_process_quant_matrix_extension (picture_t * picture, + uint8_t * buffer) +{ + int i; + + if (buffer[0] & 8) { + for (i = 0; i < 64; i++) + picture->intra_quantizer_matrix[scan_norm[i]] = + (buffer[i] << 5) | (buffer[i+1] >> 3); + buffer += 64; + } + + if (buffer[0] & 4) { + for (i = 0; i < 64; i++) + picture->non_intra_quantizer_matrix[scan_norm[i]] = + (buffer[i] << 6) | (buffer[i+1] >> 2); + } + + return 0; +} + +static int header_process_picture_coding_extension (picture_t * picture, uint8_t * buffer) +{ + /* pre subtract 1 for use later in compute_motion_vector */ + picture->f_motion.f_code[0] = (buffer[0] & 15) - 1; + picture->f_motion.f_code[1] = (buffer[1] >> 4) - 1; + picture->b_motion.f_code[0] = (buffer[1] & 15) - 1; + picture->b_motion.f_code[1] = (buffer[2] >> 4) - 1; + + picture->intra_dc_precision = (buffer[2] >> 2) & 3; + picture->picture_structure = buffer[2] & 3; + picture->frame_pred_frame_dct = (buffer[3] >> 6) & 1; + picture->concealment_motion_vectors = (buffer[3] >> 5) & 1; + picture->q_scale_type = (buffer[3] >> 4) & 1; + picture->intra_vlc_format = (buffer[3] >> 3) & 1; + + if (buffer[3] & 4) /* alternate_scan */ + picture->scan = scan_alt; + else + picture->scan = scan_norm; + + /* these are not used by the decoder */ + picture->top_field_first = buffer[3] >> 7; + picture->repeat_first_field = (buffer[3] >> 1) & 1; + picture->progressive_frame = buffer[4] >> 7; + + return 0; +} + +int header_process_extension (picture_t * picture, uint8_t * buffer) +{ + switch (buffer[0] & 0xf0) { + case 0x10: /* sequence extension */ + return header_process_sequence_extension (picture, buffer); + + case 0x30: /* quant matrix extension */ + return header_process_quant_matrix_extension (picture, buffer); + + case 0x80: /* picture coding extension */ + return header_process_picture_coding_extension (picture, buffer); + } + + return 0; +} + +int header_process_picture_header (picture_t *picture, uint8_t * buffer) +{ + picture->picture_coding_type = (buffer [1] >> 3) & 7; + + /* forward_f_code and backward_f_code - used in mpeg1 only */ + picture->f_motion.f_code[1] = (buffer[3] >> 2) & 1; + picture->f_motion.f_code[0] = + (((buffer[3] << 1) | (buffer[4] >> 7)) & 7) - 1; + picture->b_motion.f_code[1] = (buffer[4] >> 6) & 1; + picture->b_motion.f_code[0] = ((buffer[4] >> 3) & 7) - 1; + + /* move in header_process_picture_header */ + picture->second_field = + (picture->picture_structure != FRAME_PICTURE) && + !(picture->second_field); + + return 0; +} diff --git a/src/libmpeg2/idct.c b/src/libmpeg2/idct.c new file mode 100644 index 000000000..21d33dc8c --- /dev/null +++ b/src/libmpeg2/idct.c @@ -0,0 +1,290 @@ +/* + * idct.c + * Copyright (C) 1999-2001 Aaron Holtzman <aholtzma@ess.engr.uvic.ca> + * + * Portions of this code are from the MPEG software simulation group + * idct implementation. This code will be replaced with a new + * implementation soon. + * + * This file is part of mpeg2dec, a free MPEG-2 video stream decoder. + * + * mpeg2dec is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * mpeg2dec is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +/**********************************************************/ +/* inverse two dimensional DCT, Chen-Wang algorithm */ +/* (cf. IEEE ASSP-32, pp. 803-816, Aug. 1984) */ +/* 32-bit integer arithmetic (8 bit coefficients) */ +/* 11 mults, 29 adds per DCT */ +/* sE, 18.8.91 */ +/**********************************************************/ +/* coefficients extended to 12 bit for IEEE1180-1990 */ +/* compliance sE, 2.1.94 */ +/**********************************************************/ + +/* this code assumes >> to be a two's-complement arithmetic */ +/* right shift: (-2)>>1 == -1 , (-3)>>1 == -2 */ + +#include "config.h" + +#include <stdio.h> +#include <inttypes.h> + +#include "mpeg2_internal.h" +#include "xine_internal.h" +#include "xine.h" +#include "cpu_accel.h" + +#define W1 2841 /* 2048*sqrt (2)*cos (1*pi/16) */ +#define W2 2676 /* 2048*sqrt (2)*cos (2*pi/16) */ +#define W3 2408 /* 2048*sqrt (2)*cos (3*pi/16) */ +#define W5 1609 /* 2048*sqrt (2)*cos (5*pi/16) */ +#define W6 1108 /* 2048*sqrt (2)*cos (6*pi/16) */ +#define W7 565 /* 2048*sqrt (2)*cos (7*pi/16) */ + +/* idct main entry point */ +void (*idct_block_copy) (int16_t * block, uint8_t * dest, int stride); +void (*idct_block_add) (int16_t * block, uint8_t * dest, int stride); + +static void idct_block_copy_c (int16_t *block, uint8_t * dest, int stride); +static void idct_block_add_c (int16_t *block, uint8_t * dest, int stride); + +static uint8_t clip_lut[1024]; +#define CLIP(i) ((clip_lut+384)[ (i)]) + +void idct_init (void) +{ +#ifdef ARCH_X86 + if (config.flags & MM_ACCEL_X86_MMXEXT) { + fprintf (stderr, "Using MMXEXT for IDCT transform\n"); + idct_block_copy = idct_block_copy_mmxext; + idct_block_add = idct_block_add_mmxext; + idct_mmx_init (); + } else if (config.flags & MM_ACCEL_X86_MMX) { + fprintf (stderr, "Using MMX for IDCT transform\n"); + idct_block_copy = idct_block_copy_mmx; + idct_block_add = idct_block_add_mmx; + idct_mmx_init (); + } else +#endif +#ifdef LIBMPEG2_MLIB + if (config.flags & MM_ACCEL_MLIB) { + fprintf (stderr, "Using mlib for IDCT transform\n"); + idct_block_copy = idct_block_copy_mlib; + idct_block_add = idct_block_add_mlib; + } else +#endif + { + int i; + + fprintf (stderr, "No accelerated IDCT transform found\n"); + idct_block_copy = idct_block_copy_c; + idct_block_add = idct_block_add_c; + for (i = -384; i < 640; i++) + clip_lut[i+384] = (i < 0) ? 0 : ((i > 255) ? 255 : i); + } +} + +/* row (horizontal) IDCT + * + * 7 pi 1 + * dst[k] = sum c[l] * src[l] * cos ( -- * ( k + - ) * l ) + * l=0 8 2 + * + * where: c[0] = 128 + * c[1..7] = 128*sqrt (2) + */ + +static void inline idct_row (int16_t * block) +{ + int x0, x1, x2, x3, x4, x5, x6, x7, x8; + + x1 = block[4] << 11; + x2 = block[6]; + x3 = block[2]; + x4 = block[1]; + x5 = block[7]; + x6 = block[5]; + x7 = block[3]; + + /* shortcut */ + if (! (x1 | x2 | x3 | x4 | x5 | x6 | x7 )) { + block[0] = block[1] = block[2] = block[3] = block[4] = + block[5] = block[6] = block[7] = block[0]<<3; + return; + } + + x0 = (block[0] << 11) + 128; /* for proper rounding in the fourth stage */ + + /* first stage */ + x8 = W7 * (x4 + x5); + x4 = x8 + (W1 - W7) * x4; + x5 = x8 - (W1 + W7) * x5; + x8 = W3 * (x6 + x7); + x6 = x8 - (W3 - W5) * x6; + x7 = x8 - (W3 + W5) * x7; + + /* second stage */ + x8 = x0 + x1; + x0 -= x1; + x1 = W6 * (x3 + x2); + x2 = x1 - (W2 + W6) * x2; + x3 = x1 + (W2 - W6) * x3; + x1 = x4 + x6; + x4 -= x6; + x6 = x5 + x7; + x5 -= x7; + + /* third stage */ + x7 = x8 + x3; + x8 -= x3; + x3 = x0 + x2; + x0 -= x2; + x2 = (181 * (x4 + x5) + 128) >> 8; + x4 = (181 * (x4 - x5) + 128) >> 8; + + /* fourth stage */ + block[0] = (x7 + x1) >> 8; + block[1] = (x3 + x2) >> 8; + block[2] = (x0 + x4) >> 8; + block[3] = (x8 + x6) >> 8; + block[4] = (x8 - x6) >> 8; + block[5] = (x0 - x4) >> 8; + block[6] = (x3 - x2) >> 8; + block[7] = (x7 - x1) >> 8; +} + +/* column (vertical) IDCT + * + * 7 pi 1 + * dst[8*k] = sum c[l] * src[8*l] * cos ( -- * ( k + - ) * l ) + * l=0 8 2 + * + * where: c[0] = 1/1024 + * c[1..7] = (1/1024)*sqrt (2) + */ + +static void inline idct_col (int16_t *block) +{ + int x0, x1, x2, x3, x4, x5, x6, x7, x8; + + /* shortcut */ + x1 = block [8*4] << 8; + x2 = block [8*6]; + x3 = block [8*2]; + x4 = block [8*1]; + x5 = block [8*7]; + x6 = block [8*5]; + x7 = block [8*3]; + +#if 0 + if (! (x1 | x2 | x3 | x4 | x5 | x6 | x7 )) { + block[8*0] = block[8*1] = block[8*2] = block[8*3] = block[8*4] = + block[8*5] = block[8*6] = block[8*7] = (block[8*0] + 32) >> 6; + return; + } +#endif + + x0 = (block[8*0] << 8) + 8192; + + /* first stage */ + x8 = W7 * (x4 + x5) + 4; + x4 = (x8 + (W1 - W7) * x4) >> 3; + x5 = (x8 - (W1 + W7) * x5) >> 3; + x8 = W3 * (x6 + x7) + 4; + x6 = (x8 - (W3 - W5) * x6) >> 3; + x7 = (x8 - (W3 + W5) * x7) >> 3; + + /* second stage */ + x8 = x0 + x1; + x0 -= x1; + x1 = W6 * (x3 + x2) + 4; + x2 = (x1 - (W2 + W6) * x2) >> 3; + x3 = (x1 + (W2 - W6) * x3) >> 3; + x1 = x4 + x6; + x4 -= x6; + x6 = x5 + x7; + x5 -= x7; + + /* third stage */ + x7 = x8 + x3; + x8 -= x3; + x3 = x0 + x2; + x0 -= x2; + x2 = (181 * (x4 + x5) + 128) >> 8; + x4 = (181 * (x4 - x5) + 128) >> 8; + + /* fourth stage */ + block[8*0] = (x7 + x1) >> 14; + block[8*1] = (x3 + x2) >> 14; + block[8*2] = (x0 + x4) >> 14; + block[8*3] = (x8 + x6) >> 14; + block[8*4] = (x8 - x6) >> 14; + block[8*5] = (x0 - x4) >> 14; + block[8*6] = (x3 - x2) >> 14; + block[8*7] = (x7 - x1) >> 14; +} + +void idct_block_copy_c (int16_t * block, uint8_t * dest, int stride) +{ + int i; + + for (i = 0; i < 8; i++) + idct_row (block + 8 * i); + + for (i = 0; i < 8; i++) + idct_col (block + i); + + i = 8; + do { + dest[0] = CLIP (block[0]); + dest[1] = CLIP (block[1]); + dest[2] = CLIP (block[2]); + dest[3] = CLIP (block[3]); + dest[4] = CLIP (block[4]); + dest[5] = CLIP (block[5]); + dest[6] = CLIP (block[6]); + dest[7] = CLIP (block[7]); + + dest += stride; + block += 8; + } while (--i); +} + +void idct_block_add_c (int16_t * block, uint8_t * dest, int stride) +{ + int i; + + for (i = 0; i < 8; i++) + idct_row (block + 8 * i); + + for (i = 0; i < 8; i++) + idct_col (block + i); + + i = 8; + do { + dest[0] = CLIP (block[0] + dest[0]); + dest[1] = CLIP (block[1] + dest[1]); + dest[2] = CLIP (block[2] + dest[2]); + dest[3] = CLIP (block[3] + dest[3]); + dest[4] = CLIP (block[4] + dest[4]); + dest[5] = CLIP (block[5] + dest[5]); + dest[6] = CLIP (block[6] + dest[6]); + dest[7] = CLIP (block[7] + dest[7]); + + dest += stride; + block += 8; + } while (--i); +} diff --git a/src/libmpeg2/idct_mlib.c b/src/libmpeg2/idct_mlib.c new file mode 100644 index 000000000..876ab574a --- /dev/null +++ b/src/libmpeg2/idct_mlib.c @@ -0,0 +1,47 @@ +/* + * idct_mlib.c + * Copyright (C) 1999-2001 Håkan Hjort <d95hjort@dtek.chalmers.se> + * + * This file is part of mpeg2dec, a free MPEG-2 video stream decoder. + * + * mpeg2dec is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * mpeg2dec is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +#include "config.h" + +#ifdef LIBMPEG2_MLIB + +#include <inttypes.h> +#include <mlib_types.h> +#include <mlib_status.h> +#include <mlib_sys.h> +#include <mlib_video.h> + +#include "mpeg2_internal.h" + +void idct_block_copy_mlib (int16_t * block, uint8_t * dest, int stride) +{ + mlib_VideoIDCT8x8_U8_S16 (dest, block, stride); +} + +void idct_block_add_mlib (int16_t * block, uint8_t * dest, int stride) +{ + /* Should we use mlib_VideoIDCT_IEEE_S16_S16 here ?? */ + /* it's ~30% slower. */ + mlib_VideoIDCT8x8_S16_S16 (block, block); + mlib_VideoAddBlock_U8_S16 (dest, block, stride); +} + +#endif diff --git a/src/libmpeg2/idct_mlib.h b/src/libmpeg2/idct_mlib.h new file mode 100644 index 000000000..4a5b92919 --- /dev/null +++ b/src/libmpeg2/idct_mlib.h @@ -0,0 +1,25 @@ +/* + * idct_mlib.h + * + * Copyright (C) 1999, Håkan Hjort <d95hjort@dtek.chalmers.se> + * + * This file is part of mpeg2dec, a free MPEG-2 video stream decoder. + * + * mpeg2dec is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2, or (at your option) + * any later version. + * + * mpeg2dec is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with GNU Make; see the file COPYING. If not, write to + * the Free Software Foundation, + * + */ + +void idct_block_copy_mlib (int16_t * block, uint8_t * dest, int stride); +void idct_block_add_mlib (int16_t * block, uint8_t * dest, int stride); diff --git a/src/libmpeg2/idct_mmx.c b/src/libmpeg2/idct_mmx.c new file mode 100644 index 000000000..927a78996 --- /dev/null +++ b/src/libmpeg2/idct_mmx.c @@ -0,0 +1,705 @@ +/* + * idct_mmx.c + * Copyright (C) 1999-2001 Aaron Holtzman <aholtzma@ess.engr.uvic.ca> + * + * This file is part of mpeg2dec, a free MPEG-2 video stream decoder. + * + * mpeg2dec is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * mpeg2dec is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +#include "config.h" + +#ifdef ARCH_X86 + +#include <inttypes.h> + +#include "mpeg2_internal.h" +#include "attributes.h" +#include "cpu_accel.h" + +#define ROW_SHIFT 11 +#define COL_SHIFT 6 + +#define round(bias) ((int)(((bias)+0.5) * (1<<ROW_SHIFT))) +#define rounder(bias) {round (bias), round (bias)} + + +#if 0 +/* C row IDCT - its just here to document the MMXEXT and MMX versions */ +static inline void idct_row (int16_t * row, int offset, + int16_t * table, int32_t * rounder) +{ + int C1, C2, C3, C4, C5, C6, C7; + int a0, a1, a2, a3, b0, b1, b2, b3; + + row += offset; + + C1 = table[1]; + C2 = table[2]; + C3 = table[3]; + C4 = table[4]; + C5 = table[5]; + C6 = table[6]; + C7 = table[7]; + + a0 = C4*row[0] + C2*row[2] + C4*row[4] + C6*row[6] + *rounder; + a1 = C4*row[0] + C6*row[2] - C4*row[4] - C2*row[6] + *rounder; + a2 = C4*row[0] - C6*row[2] - C4*row[4] + C2*row[6] + *rounder; + a3 = C4*row[0] - C2*row[2] + C4*row[4] - C6*row[6] + *rounder; + + b0 = C1*row[1] + C3*row[3] + C5*row[5] + C7*row[7]; + b1 = C3*row[1] - C7*row[3] - C1*row[5] - C5*row[7]; + b2 = C5*row[1] - C1*row[3] + C7*row[5] + C3*row[7]; + b3 = C7*row[1] - C5*row[3] + C3*row[5] - C1*row[7]; + + row[0] = (a0 + b0) >> ROW_SHIFT; + row[1] = (a1 + b1) >> ROW_SHIFT; + row[2] = (a2 + b2) >> ROW_SHIFT; + row[3] = (a3 + b3) >> ROW_SHIFT; + row[4] = (a3 - b3) >> ROW_SHIFT; + row[5] = (a2 - b2) >> ROW_SHIFT; + row[6] = (a1 - b1) >> ROW_SHIFT; + row[7] = (a0 - b0) >> ROW_SHIFT; +} +#endif + + +/* MMXEXT row IDCT */ + +#define mmxext_table(c1,c2,c3,c4,c5,c6,c7) { c4, c2, -c4, -c2, \ + c4, c6, c4, c6, \ + c1, c3, -c1, -c5, \ + c5, c7, c3, -c7, \ + c4, -c6, c4, -c6, \ + -c4, c2, c4, -c2, \ + c5, -c1, c3, -c1, \ + c7, c3, c7, -c5 } + +static inline void mmxext_row_head (int16_t * row, int offset, int16_t * table) +{ + movq_m2r (*(row+offset), mm2); // mm2 = x6 x4 x2 x0 + + movq_m2r (*(row+offset+4), mm5); // mm5 = x7 x5 x3 x1 + movq_r2r (mm2, mm0); // mm0 = x6 x4 x2 x0 + + movq_m2r (*table, mm3); // mm3 = -C2 -C4 C2 C4 + movq_r2r (mm5, mm6); // mm6 = x7 x5 x3 x1 + + movq_m2r (*(table+4), mm4); // mm4 = C6 C4 C6 C4 + pmaddwd_r2r (mm0, mm3); // mm3 = -C4*x4-C2*x6 C4*x0+C2*x2 + + pshufw_r2r (mm2, mm2, 0x4e); // mm2 = x2 x0 x6 x4 +} + +static inline void mmxext_row (int16_t * table, int32_t * rounder) +{ + movq_m2r (*(table+8), mm1); // mm1 = -C5 -C1 C3 C1 + pmaddwd_r2r (mm2, mm4); // mm4 = C4*x0+C6*x2 C4*x4+C6*x6 + + pmaddwd_m2r (*(table+16), mm0); // mm0 = C4*x4-C6*x6 C4*x0-C6*x2 + pshufw_r2r (mm6, mm6, 0x4e); // mm6 = x3 x1 x7 x5 + + movq_m2r (*(table+12), mm7); // mm7 = -C7 C3 C7 C5 + pmaddwd_r2r (mm5, mm1); // mm1 = -C1*x5-C5*x7 C1*x1+C3*x3 + + paddd_m2r (*rounder, mm3); // mm3 += rounder + pmaddwd_r2r (mm6, mm7); // mm7 = C3*x1-C7*x3 C5*x5+C7*x7 + + pmaddwd_m2r (*(table+20), mm2); // mm2 = C4*x0-C2*x2 -C4*x4+C2*x6 + paddd_r2r (mm4, mm3); // mm3 = a1 a0 + rounder + + pmaddwd_m2r (*(table+24), mm5); // mm5 = C3*x5-C1*x7 C5*x1-C1*x3 + movq_r2r (mm3, mm4); // mm4 = a1 a0 + rounder + + pmaddwd_m2r (*(table+28), mm6); // mm6 = C7*x1-C5*x3 C7*x5+C3*x7 + paddd_r2r (mm7, mm1); // mm1 = b1 b0 + + paddd_m2r (*rounder, mm0); // mm0 += rounder + psubd_r2r (mm1, mm3); // mm3 = a1-b1 a0-b0 + rounder + + psrad_i2r (ROW_SHIFT, mm3); // mm3 = y6 y7 + paddd_r2r (mm4, mm1); // mm1 = a1+b1 a0+b0 + rounder + + paddd_r2r (mm2, mm0); // mm0 = a3 a2 + rounder + psrad_i2r (ROW_SHIFT, mm1); // mm1 = y1 y0 + + paddd_r2r (mm6, mm5); // mm5 = b3 b2 + movq_r2r (mm0, mm4); // mm4 = a3 a2 + rounder + + paddd_r2r (mm5, mm0); // mm0 = a3+b3 a2+b2 + rounder + psubd_r2r (mm5, mm4); // mm4 = a3-b3 a2-b2 + rounder +} + +static inline void mmxext_row_tail (int16_t * row, int store) +{ + psrad_i2r (ROW_SHIFT, mm0); // mm0 = y3 y2 + + psrad_i2r (ROW_SHIFT, mm4); // mm4 = y4 y5 + + packssdw_r2r (mm0, mm1); // mm1 = y3 y2 y1 y0 + + packssdw_r2r (mm3, mm4); // mm4 = y6 y7 y4 y5 + + movq_r2m (mm1, *(row+store)); // save y3 y2 y1 y0 + pshufw_r2r (mm4, mm4, 0xb1); // mm4 = y7 y6 y5 y4 + + /* slot */ + + movq_r2m (mm4, *(row+store+4)); // save y7 y6 y5 y4 +} + +static inline void mmxext_row_mid (int16_t * row, int store, + int offset, int16_t * table) +{ + movq_m2r (*(row+offset), mm2); // mm2 = x6 x4 x2 x0 + psrad_i2r (ROW_SHIFT, mm0); // mm0 = y3 y2 + + movq_m2r (*(row+offset+4), mm5); // mm5 = x7 x5 x3 x1 + psrad_i2r (ROW_SHIFT, mm4); // mm4 = y4 y5 + + packssdw_r2r (mm0, mm1); // mm1 = y3 y2 y1 y0 + movq_r2r (mm5, mm6); // mm6 = x7 x5 x3 x1 + + packssdw_r2r (mm3, mm4); // mm4 = y6 y7 y4 y5 + movq_r2r (mm2, mm0); // mm0 = x6 x4 x2 x0 + + movq_r2m (mm1, *(row+store)); // save y3 y2 y1 y0 + pshufw_r2r (mm4, mm4, 0xb1); // mm4 = y7 y6 y5 y4 + + movq_m2r (*table, mm3); // mm3 = -C2 -C4 C2 C4 + movq_r2m (mm4, *(row+store+4)); // save y7 y6 y5 y4 + + pmaddwd_r2r (mm0, mm3); // mm3 = -C4*x4-C2*x6 C4*x0+C2*x2 + + movq_m2r (*(table+4), mm4); // mm4 = C6 C4 C6 C4 + pshufw_r2r (mm2, mm2, 0x4e); // mm2 = x2 x0 x6 x4 +} + + +/* MMX row IDCT */ + +#define mmx_table(c1,c2,c3,c4,c5,c6,c7) { c4, c2, c4, c6, \ + c4, c6, -c4, -c2, \ + c1, c3, c3, -c7, \ + c5, c7, -c1, -c5, \ + c4, -c6, c4, -c2, \ + -c4, c2, c4, -c6, \ + c5, -c1, c7, -c5, \ + c7, c3, c3, -c1 } + +static inline void mmx_row_head (int16_t * row, int offset, int16_t * table) +{ + movq_m2r (*(row+offset), mm2); // mm2 = x6 x4 x2 x0 + + movq_m2r (*(row+offset+4), mm5); // mm5 = x7 x5 x3 x1 + movq_r2r (mm2, mm0); // mm0 = x6 x4 x2 x0 + + movq_m2r (*table, mm3); // mm3 = C6 C4 C2 C4 + movq_r2r (mm5, mm6); // mm6 = x7 x5 x3 x1 + + punpckldq_r2r (mm0, mm0); // mm0 = x2 x0 x2 x0 + + movq_m2r (*(table+4), mm4); // mm4 = -C2 -C4 C6 C4 + pmaddwd_r2r (mm0, mm3); // mm3 = C4*x0+C6*x2 C4*x0+C2*x2 + + movq_m2r (*(table+8), mm1); // mm1 = -C7 C3 C3 C1 + punpckhdq_r2r (mm2, mm2); // mm2 = x6 x4 x6 x4 +} + +static inline void mmx_row (int16_t * table, int32_t * rounder) +{ + pmaddwd_r2r (mm2, mm4); // mm4 = -C4*x4-C2*x6 C4*x4+C6*x6 + punpckldq_r2r (mm5, mm5); // mm5 = x3 x1 x3 x1 + + pmaddwd_m2r (*(table+16), mm0); // mm0 = C4*x0-C2*x2 C4*x0-C6*x2 + punpckhdq_r2r (mm6, mm6); // mm6 = x7 x5 x7 x5 + + movq_m2r (*(table+12), mm7); // mm7 = -C5 -C1 C7 C5 + pmaddwd_r2r (mm5, mm1); // mm1 = C3*x1-C7*x3 C1*x1+C3*x3 + + paddd_m2r (*rounder, mm3); // mm3 += rounder + pmaddwd_r2r (mm6, mm7); // mm7 = -C1*x5-C5*x7 C5*x5+C7*x7 + + pmaddwd_m2r (*(table+20), mm2); // mm2 = C4*x4-C6*x6 -C4*x4+C2*x6 + paddd_r2r (mm4, mm3); // mm3 = a1 a0 + rounder + + pmaddwd_m2r (*(table+24), mm5); // mm5 = C7*x1-C5*x3 C5*x1-C1*x3 + movq_r2r (mm3, mm4); // mm4 = a1 a0 + rounder + + pmaddwd_m2r (*(table+28), mm6); // mm6 = C3*x5-C1*x7 C7*x5+C3*x7 + paddd_r2r (mm7, mm1); // mm1 = b1 b0 + + paddd_m2r (*rounder, mm0); // mm0 += rounder + psubd_r2r (mm1, mm3); // mm3 = a1-b1 a0-b0 + rounder + + psrad_i2r (ROW_SHIFT, mm3); // mm3 = y6 y7 + paddd_r2r (mm4, mm1); // mm1 = a1+b1 a0+b0 + rounder + + paddd_r2r (mm2, mm0); // mm0 = a3 a2 + rounder + psrad_i2r (ROW_SHIFT, mm1); // mm1 = y1 y0 + + paddd_r2r (mm6, mm5); // mm5 = b3 b2 + movq_r2r (mm0, mm7); // mm7 = a3 a2 + rounder + + paddd_r2r (mm5, mm0); // mm0 = a3+b3 a2+b2 + rounder + psubd_r2r (mm5, mm7); // mm7 = a3-b3 a2-b2 + rounder +} + +static inline void mmx_row_tail (int16_t * row, int store) +{ + psrad_i2r (ROW_SHIFT, mm0); // mm0 = y3 y2 + + psrad_i2r (ROW_SHIFT, mm7); // mm7 = y4 y5 + + packssdw_r2r (mm0, mm1); // mm1 = y3 y2 y1 y0 + + packssdw_r2r (mm3, mm7); // mm7 = y6 y7 y4 y5 + + movq_r2m (mm1, *(row+store)); // save y3 y2 y1 y0 + movq_r2r (mm7, mm4); // mm4 = y6 y7 y4 y5 + + pslld_i2r (16, mm7); // mm7 = y7 0 y5 0 + + psrld_i2r (16, mm4); // mm4 = 0 y6 0 y4 + + por_r2r (mm4, mm7); // mm7 = y7 y6 y5 y4 + + /* slot */ + + movq_r2m (mm7, *(row+store+4)); // save y7 y6 y5 y4 +} + +static inline void mmx_row_mid (int16_t * row, int store, + int offset, int16_t * table) +{ + movq_m2r (*(row+offset), mm2); // mm2 = x6 x4 x2 x0 + psrad_i2r (ROW_SHIFT, mm0); // mm0 = y3 y2 + + movq_m2r (*(row+offset+4), mm5); // mm5 = x7 x5 x3 x1 + psrad_i2r (ROW_SHIFT, mm7); // mm7 = y4 y5 + + packssdw_r2r (mm0, mm1); // mm1 = y3 y2 y1 y0 + movq_r2r (mm5, mm6); // mm6 = x7 x5 x3 x1 + + packssdw_r2r (mm3, mm7); // mm7 = y6 y7 y4 y5 + movq_r2r (mm2, mm0); // mm0 = x6 x4 x2 x0 + + movq_r2m (mm1, *(row+store)); // save y3 y2 y1 y0 + movq_r2r (mm7, mm1); // mm1 = y6 y7 y4 y5 + + punpckldq_r2r (mm0, mm0); // mm0 = x2 x0 x2 x0 + psrld_i2r (16, mm7); // mm7 = 0 y6 0 y4 + + movq_m2r (*table, mm3); // mm3 = C6 C4 C2 C4 + pslld_i2r (16, mm1); // mm1 = y7 0 y5 0 + + movq_m2r (*(table+4), mm4); // mm4 = -C2 -C4 C6 C4 + por_r2r (mm1, mm7); // mm7 = y7 y6 y5 y4 + + movq_m2r (*(table+8), mm1); // mm1 = -C7 C3 C3 C1 + punpckhdq_r2r (mm2, mm2); // mm2 = x6 x4 x6 x4 + + movq_r2m (mm7, *(row+store+4)); // save y7 y6 y5 y4 + pmaddwd_r2r (mm0, mm3); // mm3 = C4*x0+C6*x2 C4*x0+C2*x2 +} + + +#if 0 +// C column IDCT - its just here to document the MMXEXT and MMX versions +static inline void idct_col (int16_t * col, int offset) +{ +/* multiplication - as implemented on mmx */ +#define F(c,x) (((c) * (x)) >> 16) + +/* saturation - it helps us handle torture test cases */ +#define S(x) (((x)>32767) ? 32767 : ((x)<-32768) ? -32768 : (x)) + + int16_t x0, x1, x2, x3, x4, x5, x6, x7; + int16_t y0, y1, y2, y3, y4, y5, y6, y7; + int16_t a0, a1, a2, a3, b0, b1, b2, b3; + int16_t u04, v04, u26, v26, u17, v17, u35, v35, u12, v12; + + col += offset; + + x0 = col[0*8]; + x1 = col[1*8]; + x2 = col[2*8]; + x3 = col[3*8]; + x4 = col[4*8]; + x5 = col[5*8]; + x6 = col[6*8]; + x7 = col[7*8]; + + u04 = S (x0 + x4); + v04 = S (x0 - x4); + u26 = S (F (T2, x6) + x2); + v26 = S (F (T2, x2) - x6); + + a0 = S (u04 + u26); + a1 = S (v04 + v26); + a2 = S (v04 - v26); + a3 = S (u04 - u26); + + u17 = S (F (T1, x7) + x1); + v17 = S (F (T1, x1) - x7); + u35 = S (F (T3, x5) + x3); + v35 = S (F (T3, x3) - x5); + + b0 = S (u17 + u35); + b3 = S (v17 - v35); + u12 = S (u17 - u35); + v12 = S (v17 + v35); + u12 = S (2 * F (C4, u12)); + v12 = S (2 * F (C4, v12)); + b1 = S (u12 + v12); + b2 = S (u12 - v12); + + y0 = S (a0 + b0) >> COL_SHIFT; + y1 = S (a1 + b1) >> COL_SHIFT; + y2 = S (a2 + b2) >> COL_SHIFT; + y3 = S (a3 + b3) >> COL_SHIFT; + + y4 = S (a3 - b3) >> COL_SHIFT; + y5 = S (a2 - b2) >> COL_SHIFT; + y6 = S (a1 - b1) >> COL_SHIFT; + y7 = S (a0 - b0) >> COL_SHIFT; + + col[0*8] = y0; + col[1*8] = y1; + col[2*8] = y2; + col[3*8] = y3; + col[4*8] = y4; + col[5*8] = y5; + col[6*8] = y6; + col[7*8] = y7; +} +#endif + + +// MMX column IDCT +static inline void idct_col (int16_t * col, int offset) +{ +#define T1 13036 +#define T2 27146 +#define T3 43790 +#define C4 23170 + + static short _T1[] ATTR_ALIGN(8) = {T1,T1,T1,T1}; + static short _T2[] ATTR_ALIGN(8) = {T2,T2,T2,T2}; + static short _T3[] ATTR_ALIGN(8) = {T3,T3,T3,T3}; + static short _C4[] ATTR_ALIGN(8) = {C4,C4,C4,C4}; + + /* column code adapted from peter gubanov */ + /* http://www.elecard.com/peter/idct.shtml */ + + movq_m2r (*_T1, mm0); // mm0 = T1 + + movq_m2r (*(col+offset+1*8), mm1); // mm1 = x1 + movq_r2r (mm0, mm2); // mm2 = T1 + + movq_m2r (*(col+offset+7*8), mm4); // mm4 = x7 + pmulhw_r2r (mm1, mm0); // mm0 = T1*x1 + + movq_m2r (*_T3, mm5); // mm5 = T3 + pmulhw_r2r (mm4, mm2); // mm2 = T1*x7 + + movq_m2r (*(col+offset+5*8), mm6); // mm6 = x5 + movq_r2r (mm5, mm7); // mm7 = T3-1 + + movq_m2r (*(col+offset+3*8), mm3); // mm3 = x3 + psubsw_r2r (mm4, mm0); // mm0 = v17 + + movq_m2r (*_T2, mm4); // mm4 = T2 + pmulhw_r2r (mm3, mm5); // mm5 = (T3-1)*x3 + + paddsw_r2r (mm2, mm1); // mm1 = u17 + pmulhw_r2r (mm6, mm7); // mm7 = (T3-1)*x5 + + /* slot */ + + movq_r2r (mm4, mm2); // mm2 = T2 + paddsw_r2r (mm3, mm5); // mm5 = T3*x3 + + pmulhw_m2r (*(col+offset+2*8), mm4);// mm4 = T2*x2 + paddsw_r2r (mm6, mm7); // mm7 = T3*x5 + + psubsw_r2r (mm6, mm5); // mm5 = v35 + paddsw_r2r (mm3, mm7); // mm7 = u35 + + movq_m2r (*(col+offset+6*8), mm3); // mm3 = x6 + movq_r2r (mm0, mm6); // mm6 = v17 + + pmulhw_r2r (mm3, mm2); // mm2 = T2*x6 + psubsw_r2r (mm5, mm0); // mm0 = b3 + + psubsw_r2r (mm3, mm4); // mm4 = v26 + paddsw_r2r (mm6, mm5); // mm5 = v12 + + movq_r2m (mm0, *(col+offset+3*8)); // save b3 in scratch0 + movq_r2r (mm1, mm6); // mm6 = u17 + + paddsw_m2r (*(col+offset+2*8), mm2);// mm2 = u26 + paddsw_r2r (mm7, mm6); // mm6 = b0 + + psubsw_r2r (mm7, mm1); // mm1 = u12 + movq_r2r (mm1, mm7); // mm7 = u12 + + movq_m2r (*(col+offset+0*8), mm3); // mm3 = x0 + paddsw_r2r (mm5, mm1); // mm1 = u12+v12 + + movq_m2r (*_C4, mm0); // mm0 = C4/2 + psubsw_r2r (mm5, mm7); // mm7 = u12-v12 + + movq_r2m (mm6, *(col+offset+5*8)); // save b0 in scratch1 + pmulhw_r2r (mm0, mm1); // mm1 = b1/2 + + movq_r2r (mm4, mm6); // mm6 = v26 + pmulhw_r2r (mm0, mm7); // mm7 = b2/2 + + movq_m2r (*(col+offset+4*8), mm5); // mm5 = x4 + movq_r2r (mm3, mm0); // mm0 = x0 + + psubsw_r2r (mm5, mm3); // mm3 = v04 + paddsw_r2r (mm5, mm0); // mm0 = u04 + + paddsw_r2r (mm3, mm4); // mm4 = a1 + movq_r2r (mm0, mm5); // mm5 = u04 + + psubsw_r2r (mm6, mm3); // mm3 = a2 + paddsw_r2r (mm2, mm5); // mm5 = a0 + + paddsw_r2r (mm1, mm1); // mm1 = b1 + psubsw_r2r (mm2, mm0); // mm0 = a3 + + paddsw_r2r (mm7, mm7); // mm7 = b2 + movq_r2r (mm3, mm2); // mm2 = a2 + + movq_r2r (mm4, mm6); // mm6 = a1 + paddsw_r2r (mm7, mm3); // mm3 = a2+b2 + + psraw_i2r (COL_SHIFT, mm3); // mm3 = y2 + paddsw_r2r (mm1, mm4); // mm4 = a1+b1 + + psraw_i2r (COL_SHIFT, mm4); // mm4 = y1 + psubsw_r2r (mm1, mm6); // mm6 = a1-b1 + + movq_m2r (*(col+offset+5*8), mm1); // mm1 = b0 + psubsw_r2r (mm7, mm2); // mm2 = a2-b2 + + psraw_i2r (COL_SHIFT, mm6); // mm6 = y6 + movq_r2r (mm5, mm7); // mm7 = a0 + + movq_r2m (mm4, *(col+offset+1*8)); // save y1 + psraw_i2r (COL_SHIFT, mm2); // mm2 = y5 + + movq_r2m (mm3, *(col+offset+2*8)); // save y2 + paddsw_r2r (mm1, mm5); // mm5 = a0+b0 + + movq_m2r (*(col+offset+3*8), mm4); // mm4 = b3 + psubsw_r2r (mm1, mm7); // mm7 = a0-b0 + + psraw_i2r (COL_SHIFT, mm5); // mm5 = y0 + movq_r2r (mm0, mm3); // mm3 = a3 + + movq_r2m (mm2, *(col+offset+5*8)); // save y5 + psubsw_r2r (mm4, mm3); // mm3 = a3-b3 + + psraw_i2r (COL_SHIFT, mm7); // mm7 = y7 + paddsw_r2r (mm0, mm4); // mm4 = a3+b3 + + movq_r2m (mm5, *(col+offset+0*8)); // save y0 + psraw_i2r (COL_SHIFT, mm3); // mm3 = y4 + + movq_r2m (mm6, *(col+offset+6*8)); // save y6 + psraw_i2r (COL_SHIFT, mm4); // mm4 = y3 + + movq_r2m (mm7, *(col+offset+7*8)); // save y7 + + movq_r2m (mm3, *(col+offset+4*8)); // save y4 + + movq_r2m (mm4, *(col+offset+3*8)); // save y3 +} + + +static int32_t rounder0[] ATTR_ALIGN(8) = + rounder ((1 << (COL_SHIFT - 1)) - 0.5); +static int32_t rounder4[] ATTR_ALIGN(8) = rounder (0); +static int32_t rounder1[] ATTR_ALIGN(8) = + rounder (1.25683487303); /* C1*(C1/C4+C1+C7)/2 */ +static int32_t rounder7[] ATTR_ALIGN(8) = + rounder (-0.25); /* C1*(C7/C4+C7-C1)/2 */ +static int32_t rounder2[] ATTR_ALIGN(8) = + rounder (0.60355339059); /* C2 * (C6+C2)/2 */ +static int32_t rounder6[] ATTR_ALIGN(8) = + rounder (-0.25); /* C2 * (C6-C2)/2 */ +static int32_t rounder3[] ATTR_ALIGN(8) = + rounder (0.087788325588); /* C3*(-C3/C4+C3+C5)/2 */ +static int32_t rounder5[] ATTR_ALIGN(8) = + rounder (-0.441341716183); /* C3*(-C5/C4+C5-C3)/2 */ + + +#define declare_idct(idct,table,idct_row_head,idct_row,idct_row_tail,idct_row_mid) \ +static inline void idct (int16_t * block) \ +{ \ + static int16_t table04[] ATTR_ALIGN(16) = \ + table (22725, 21407, 19266, 16384, 12873, 8867, 4520); \ + static int16_t table17[] ATTR_ALIGN(16) = \ + table (31521, 29692, 26722, 22725, 17855, 12299, 6270); \ + static int16_t table26[] ATTR_ALIGN(16) = \ + table (29692, 27969, 25172, 21407, 16819, 11585, 5906); \ + static int16_t table35[] ATTR_ALIGN(16) = \ + table (26722, 25172, 22654, 19266, 15137, 10426, 5315); \ + \ + idct_row_head (block, 0*8, table04); \ + idct_row (table04, rounder0); \ + idct_row_mid (block, 0*8, 4*8, table04); \ + idct_row (table04, rounder4); \ + idct_row_mid (block, 4*8, 1*8, table17); \ + idct_row (table17, rounder1); \ + idct_row_mid (block, 1*8, 7*8, table17); \ + idct_row (table17, rounder7); \ + idct_row_mid (block, 7*8, 2*8, table26); \ + idct_row (table26, rounder2); \ + idct_row_mid (block, 2*8, 6*8, table26); \ + idct_row (table26, rounder6); \ + idct_row_mid (block, 6*8, 3*8, table35); \ + idct_row (table35, rounder3); \ + idct_row_mid (block, 3*8, 5*8, table35); \ + idct_row (table35, rounder5); \ + idct_row_tail (block, 5*8); \ + \ + idct_col (block, 0); \ + idct_col (block, 4); \ +} + + +#define COPY_MMX(offset,r0,r1,r2) \ +do { \ + movq_m2r (*(block+offset), r0); \ + dest += stride; \ + movq_m2r (*(block+offset+4), r1); \ + movq_r2m (r2, *dest); \ + packuswb_r2r (r1, r0); \ +} while (0) + +static void block_copy (int16_t * block, uint8_t * dest, int stride) +{ + movq_m2r (*(block+0*8), mm0); + movq_m2r (*(block+0*8+4), mm1); + movq_m2r (*(block+1*8), mm2); + packuswb_r2r (mm1, mm0); + movq_m2r (*(block+1*8+4), mm3); + movq_r2m (mm0, *dest); + packuswb_r2r (mm3, mm2); + COPY_MMX (2*8, mm0, mm1, mm2); + COPY_MMX (3*8, mm2, mm3, mm0); + COPY_MMX (4*8, mm0, mm1, mm2); + COPY_MMX (5*8, mm2, mm3, mm0); + COPY_MMX (6*8, mm0, mm1, mm2); + COPY_MMX (7*8, mm2, mm3, mm0); + movq_r2m (mm2, *(dest+stride)); +} + + +#define ADD_MMX(offset,r1,r2,r3,r4) \ +do { \ + movq_m2r (*(dest+2*stride), r1); \ + packuswb_r2r (r4, r3); \ + movq_r2r (r1, r2); \ + dest += stride; \ + movq_r2m (r3, *dest); \ + punpcklbw_r2r (mm0, r1); \ + paddsw_m2r (*(block+offset), r1); \ + punpckhbw_r2r (mm0, r2); \ + paddsw_m2r (*(block+offset+4), r2); \ +} while (0) + +static void block_add (int16_t * block, uint8_t * dest, int stride) +{ + movq_m2r (*dest, mm1); + pxor_r2r (mm0, mm0); + movq_m2r (*(dest+stride), mm3); + movq_r2r (mm1, mm2); + punpcklbw_r2r (mm0, mm1); + movq_r2r (mm3, mm4); + paddsw_m2r (*(block+0*8), mm1); + punpckhbw_r2r (mm0, mm2); + paddsw_m2r (*(block+0*8+4), mm2); + punpcklbw_r2r (mm0, mm3); + paddsw_m2r (*(block+1*8), mm3); + packuswb_r2r (mm2, mm1); + punpckhbw_r2r (mm0, mm4); + movq_r2m (mm1, *dest); + paddsw_m2r (*(block+1*8+4), mm4); + ADD_MMX (2*8, mm1, mm2, mm3, mm4); + ADD_MMX (3*8, mm3, mm4, mm1, mm2); + ADD_MMX (4*8, mm1, mm2, mm3, mm4); + ADD_MMX (5*8, mm3, mm4, mm1, mm2); + ADD_MMX (6*8, mm1, mm2, mm3, mm4); + ADD_MMX (7*8, mm3, mm4, mm1, mm2); + packuswb_r2r (mm4, mm3); + movq_r2m (mm3, *(dest+stride)); +} + + +declare_idct (mmxext_idct, mmxext_table, + mmxext_row_head, mmxext_row, mmxext_row_tail, mmxext_row_mid) + +void idct_block_copy_mmxext (int16_t * block, uint8_t * dest, int stride) +{ + mmxext_idct (block); + block_copy (block, dest, stride); +} + +void idct_block_add_mmxext (int16_t * block, uint8_t * dest, int stride) +{ + mmxext_idct (block); + block_add (block, dest, stride); +} + + +declare_idct (mmx_idct, mmx_table, + mmx_row_head, mmx_row, mmx_row_tail, mmx_row_mid) + +void idct_block_copy_mmx (int16_t * block, uint8_t * dest, int stride) +{ + mmx_idct (block); + block_copy (block, dest, stride); +} + +void idct_block_add_mmx (int16_t * block, uint8_t * dest, int stride) +{ + mmx_idct (block); + block_add (block, dest, stride); +} + + +void idct_mmx_init (void) +{ + extern uint8_t scan_norm[64]; + extern uint8_t scan_alt[64]; + int i, j; + + /* the mmx/mmxext idct uses a reordered input, so we patch scan tables */ + + for (i = 0; i < 64; i++) { + j = scan_norm[i]; + scan_norm[i] = (j & 0x38) | ((j & 6) >> 1) | ((j & 1) << 2); + j = scan_alt[i]; + scan_alt[i] = (j & 0x38) | ((j & 6) >> 1) | ((j & 1) << 2); + } +} + +#endif diff --git a/src/libmpeg2/motion_comp.c b/src/libmpeg2/motion_comp.c new file mode 100644 index 000000000..fd4055265 --- /dev/null +++ b/src/libmpeg2/motion_comp.c @@ -0,0 +1,125 @@ +/* + * motion_comp.c + * Copyright (C) 1999-2001 Aaron Holtzman <aholtzma@ess.engr.uvic.ca> + * + * This file is part of mpeg2dec, a free MPEG-2 video stream decoder. + * + * mpeg2dec is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * mpeg2dec is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +#include "config.h" + +#include <stdio.h> +#include <inttypes.h> + +#include "mpeg2_internal.h" +#include "cpu_accel.h" + +mc_functions_t mc_functions; + +void motion_comp_init (void) +{ + +#ifdef ARCH_X86 + if (config.flags & MM_ACCEL_X86_MMXEXT) { + fprintf (stderr, "Using MMXEXT for motion compensation\n"); + mc_functions = mc_functions_mmxext; + } else if (config.flags & MM_ACCEL_X86_3DNOW) { + fprintf (stderr, "Using 3DNOW for motion compensation\n"); + mc_functions = mc_functions_3dnow; + } else if (config.flags & MM_ACCEL_X86_MMX) { + fprintf (stderr, "Using MMX for motion compensation\n"); + mc_functions = mc_functions_mmx; + } else +#endif +#ifdef LIBMPEG2_MLIB + if (config.flags & MM_ACCEL_MLIB) { + fprintf (stderr, "Using mlib for motion compensation\n"); + mc_functions = mc_functions_mlib; + } else +#endif + { + fprintf (stderr, "No accelerated motion compensation found\n"); + mc_functions = mc_functions_c; + } +} + +#define avg2(a,b) ((a+b+1)>>1) +#define avg4(a,b,c,d) ((a+b+c+d+2)>>2) + +#define predict_(i) (ref[i]) +#define predict_x(i) (avg2 (ref[i], ref[i+1])) +#define predict_y(i) (avg2 (ref[i], (ref+stride)[i])) +#define predict_xy(i) (avg4 (ref[i], ref[i+1], (ref+stride)[i], (ref+stride)[i+1])) + +#define put(predictor,i) dest[i] = predictor (i) +#define avg(predictor,i) dest[i] = avg2 (predictor (i), dest[i]) + +/* mc function template */ + +#define MC_FUNC(op,xy) \ +static void MC_##op##_##xy##16_c (uint8_t * dest, uint8_t * ref,\ + int stride, int height) \ +{ \ + do { \ + op (predict_##xy, 0); \ + op (predict_##xy, 1); \ + op (predict_##xy, 2); \ + op (predict_##xy, 3); \ + op (predict_##xy, 4); \ + op (predict_##xy, 5); \ + op (predict_##xy, 6); \ + op (predict_##xy, 7); \ + op (predict_##xy, 8); \ + op (predict_##xy, 9); \ + op (predict_##xy, 10); \ + op (predict_##xy, 11); \ + op (predict_##xy, 12); \ + op (predict_##xy, 13); \ + op (predict_##xy, 14); \ + op (predict_##xy, 15); \ + ref += stride; \ + dest += stride; \ + } while (--height); \ +} \ +static void MC_##op##_##xy##8_c (uint8_t * dest, uint8_t * ref, \ + int stride, int height) \ +{ \ + do { \ + op (predict_##xy, 0); \ + op (predict_##xy, 1); \ + op (predict_##xy, 2); \ + op (predict_##xy, 3); \ + op (predict_##xy, 4); \ + op (predict_##xy, 5); \ + op (predict_##xy, 6); \ + op (predict_##xy, 7); \ + ref += stride; \ + dest += stride; \ + } while (--height); \ +} + +/* definitions of the actual mc functions */ + +MC_FUNC (put,) +MC_FUNC (avg,) +MC_FUNC (put,x) +MC_FUNC (avg,x) +MC_FUNC (put,y) +MC_FUNC (avg,y) +MC_FUNC (put,xy) +MC_FUNC (avg,xy) + +MOTION_COMP_EXTERN (c) diff --git a/src/libmpeg2/motion_comp_mlib.c b/src/libmpeg2/motion_comp_mlib.c new file mode 100644 index 000000000..91c0fb5a8 --- /dev/null +++ b/src/libmpeg2/motion_comp_mlib.c @@ -0,0 +1,180 @@ +/* + * motion_comp_mlib.c + * Copyright (C) 2000-2001 Håkan Hjort <d95hjort@dtek.chalmers.se> + * + * This file is part of mpeg2dec, a free MPEG-2 video stream decoder. + * + * mpeg2dec is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * mpeg2dec is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +#include "config.h" + +#ifdef LIBMPEG2_MLIB + +#include <inttypes.h> +#include <mlib_types.h> +#include <mlib_status.h> +#include <mlib_sys.h> +#include <mlib_video.h> + +#include "mpeg2_internal.h" + +static void MC_put_16_mlib (uint8_t * dest, uint8_t * ref, + int stride, int height) +{ + if (height == 16) + mlib_VideoCopyRef_U8_U8_16x16 (dest, ref, stride); + else + mlib_VideoCopyRef_U8_U8_16x8 (dest, ref, stride); +} + +static void MC_put_x16_mlib (uint8_t * dest, uint8_t * ref, + int stride, int height) +{ + if (height == 16) + mlib_VideoInterpX_U8_U8_16x16 (dest, ref, stride, stride); + else + mlib_VideoInterpX_U8_U8_16x8 (dest, ref, stride, stride); +} + +static void MC_put_y16_mlib (uint8_t * dest, uint8_t * ref, + int stride, int height) +{ + if (height == 16) + mlib_VideoInterpY_U8_U8_16x16 (dest, ref, stride, stride); + else + mlib_VideoInterpY_U8_U8_16x8 (dest, ref, stride, stride); +} + +static void MC_put_xy16_mlib (uint8_t * dest, uint8_t * ref, + int stride, int height) +{ + if (height == 16) + mlib_VideoInterpXY_U8_U8_16x16 (dest, ref, stride, stride); + else + mlib_VideoInterpXY_U8_U8_16x8 (dest, ref, stride, stride); +} + +static void MC_put_8_mlib (uint8_t * dest, uint8_t * ref, + int stride, int height) +{ + if (height == 8) + mlib_VideoCopyRef_U8_U8_8x8 (dest, ref, stride); + else + mlib_VideoCopyRef_U8_U8_8x4 (dest, ref, stride); +} + +static void MC_put_x8_mlib (uint8_t * dest, uint8_t * ref, + int stride, int height) +{ + if (height == 8) + mlib_VideoInterpX_U8_U8_8x8 (dest, ref, stride, stride); + else + mlib_VideoInterpX_U8_U8_8x4 (dest, ref, stride, stride); +} + +static void MC_put_y8_mlib (uint8_t * dest, uint8_t * ref, + int stride, int height) +{ + if (height == 8) + mlib_VideoInterpY_U8_U8_8x8 (dest, ref, stride, stride); + else + mlib_VideoInterpY_U8_U8_8x4 (dest, ref, stride, stride); +} + +static void MC_put_xy8_mlib (uint8_t * dest, uint8_t * ref, + int stride, int height) +{ + if (height == 8) + mlib_VideoInterpXY_U8_U8_8x8 (dest, ref, stride, stride); + else + mlib_VideoInterpXY_U8_U8_8x4 (dest, ref, stride, stride); +} + +static void MC_avg_16_mlib (uint8_t * dest, uint8_t * ref, + int stride, int height) +{ + if (height == 16) + mlib_VideoCopyRefAve_U8_U8_16x16 (dest, ref, stride); + else + mlib_VideoCopyRefAve_U8_U8_16x8 (dest, ref, stride); +} + +static void MC_avg_x16_mlib (uint8_t * dest, uint8_t * ref, + int stride, int height) +{ + if (height == 16) + mlib_VideoInterpAveX_U8_U8_16x16 (dest, ref, stride, stride); + else + mlib_VideoInterpAveX_U8_U8_16x8 (dest, ref, stride, stride); +} + +static void MC_avg_y16_mlib (uint8_t * dest, uint8_t * ref, + int stride, int height) +{ + if (height == 16) + mlib_VideoInterpAveY_U8_U8_16x16 (dest, ref, stride, stride); + else + mlib_VideoInterpAveY_U8_U8_16x8 (dest, ref, stride, stride); +} + +static void MC_avg_xy16_mlib (uint8_t * dest, uint8_t * ref, + int stride, int height) +{ + if (height == 16) + mlib_VideoInterpAveXY_U8_U8_16x16 (dest, ref, stride, stride); + else + mlib_VideoInterpAveXY_U8_U8_16x8 (dest, ref, stride, stride); +} + +static void MC_avg_8_mlib (uint8_t * dest, uint8_t * ref, + int stride, int height) +{ + if (height == 8) + mlib_VideoCopyRefAve_U8_U8_8x8 (dest, ref, stride); + else + mlib_VideoCopyRefAve_U8_U8_8x4 (dest, ref, stride); +} + +static void MC_avg_x8_mlib (uint8_t * dest, uint8_t * ref, + int stride, int height) +{ + if (height == 8) + mlib_VideoInterpAveX_U8_U8_8x8 (dest, ref, stride, stride); + else + mlib_VideoInterpAveX_U8_U8_8x4 (dest, ref, stride, stride); +} + +static void MC_avg_y8_mlib (uint8_t * dest, uint8_t * ref, + int stride, int height) +{ + if (height == 8) + mlib_VideoInterpAveY_U8_U8_8x8 (dest, ref, stride, stride); + else + mlib_VideoInterpAveY_U8_U8_8x4 (dest, ref, stride, stride); +} + +static void MC_avg_xy8_mlib (uint8_t * dest, uint8_t * ref, + int stride, int height) +{ + if (height == 8) + mlib_VideoInterpAveXY_U8_U8_8x8 (dest, ref, stride, stride); + else + mlib_VideoInterpAveXY_U8_U8_8x4 (dest, ref, stride, stride); +} + +MOTION_COMP_EXTERN (mlib) + +#endif diff --git a/src/libmpeg2/motion_comp_mmx.c b/src/libmpeg2/motion_comp_mmx.c new file mode 100644 index 000000000..049546b1f --- /dev/null +++ b/src/libmpeg2/motion_comp_mmx.c @@ -0,0 +1,1017 @@ +/* + * motion_comp_mmx.c + * Copyright (C) 1999-2001 Aaron Holtzman <aholtzma@ess.engr.uvic.ca> + * + * This file is part of mpeg2dec, a free MPEG-2 video stream decoder. + * + * mpeg2dec is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * mpeg2dec is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +#include "config.h" + +#ifdef ARCH_X86 + +#include <inttypes.h> + +#include "mpeg2_internal.h" +#include "attributes.h" +#include "cpu_accel.h" + +#define CPU_MMXEXT 0 +#define CPU_3DNOW 1 + + +/* MMX code - needs a rewrite */ + + + + + + + +/* some rounding constants */ +mmx_t round1 = {0x0001000100010001LL}; +mmx_t round4 = {0x0002000200020002LL}; + +/* + * This code should probably be compiled with loop unrolling + * (ie, -funroll-loops in gcc)becuase some of the loops + * use a small static number of iterations. This was written + * with the assumption the compiler knows best about when + * unrolling will help + */ + +static inline void mmx_zero_reg () +{ + /* load 0 into mm0 */ + pxor_r2r (mm0, mm0); +} + +static inline void mmx_average_2_U8 (uint8_t * dest, + uint8_t * src1, uint8_t * src2) +{ + /* *dest = (*src1 + *src2 + 1)/ 2; */ + + movq_m2r (*src1, mm1); // load 8 src1 bytes + movq_r2r (mm1, mm2); // copy 8 src1 bytes + + movq_m2r (*src2, mm3); // load 8 src2 bytes + movq_r2r (mm3, mm4); // copy 8 src2 bytes + + punpcklbw_r2r (mm0, mm1); // unpack low src1 bytes + punpckhbw_r2r (mm0, mm2); // unpack high src1 bytes + + punpcklbw_r2r (mm0, mm3); // unpack low src2 bytes + punpckhbw_r2r (mm0, mm4); // unpack high src2 bytes + + paddw_r2r (mm3, mm1); // add lows to mm1 + paddw_m2r (round1, mm1); + psraw_i2r (1, mm1); // /2 + + paddw_r2r (mm4, mm2); // add highs to mm2 + paddw_m2r (round1, mm2); + psraw_i2r (1, mm2); // /2 + + packuswb_r2r (mm2, mm1); // pack (w/ saturation) + movq_r2m (mm1, *dest); // store result in dest +} + +static inline void mmx_interp_average_2_U8 (uint8_t * dest, + uint8_t * src1, uint8_t * src2) +{ + /* *dest = (*dest + (*src1 + *src2 + 1)/ 2 + 1)/ 2; */ + + movq_m2r (*dest, mm1); // load 8 dest bytes + movq_r2r (mm1, mm2); // copy 8 dest bytes + + movq_m2r (*src1, mm3); // load 8 src1 bytes + movq_r2r (mm3, mm4); // copy 8 src1 bytes + + movq_m2r (*src2, mm5); // load 8 src2 bytes + movq_r2r (mm5, mm6); // copy 8 src2 bytes + + punpcklbw_r2r (mm0, mm1); // unpack low dest bytes + punpckhbw_r2r (mm0, mm2); // unpack high dest bytes + + punpcklbw_r2r (mm0, mm3); // unpack low src1 bytes + punpckhbw_r2r (mm0, mm4); // unpack high src1 bytes + + punpcklbw_r2r (mm0, mm5); // unpack low src2 bytes + punpckhbw_r2r (mm0, mm6); // unpack high src2 bytes + + paddw_r2r (mm5, mm3); // add lows + paddw_m2r (round1, mm3); + psraw_i2r (1, mm3); // /2 + + paddw_r2r (mm6, mm4); // add highs + paddw_m2r (round1, mm4); + psraw_i2r (1, mm4); // /2 + + paddw_r2r (mm3, mm1); // add lows + paddw_m2r (round1, mm1); + psraw_i2r (1, mm1); // /2 + + paddw_r2r (mm4, mm2); // add highs + paddw_m2r (round1, mm2); + psraw_i2r (1, mm2); // /2 + + packuswb_r2r (mm2, mm1); // pack (w/ saturation) + movq_r2m (mm1, *dest); // store result in dest +} + +static inline void mmx_average_4_U8 (uint8_t * dest, + uint8_t * src1, uint8_t * src2, + uint8_t * src3, uint8_t * src4) +{ + /* *dest = (*src1 + *src2 + *src3 + *src4 + 2)/ 4; */ + + movq_m2r (*src1, mm1); // load 8 src1 bytes + movq_r2r (mm1, mm2); // copy 8 src1 bytes + + punpcklbw_r2r (mm0, mm1); // unpack low src1 bytes + punpckhbw_r2r (mm0, mm2); // unpack high src1 bytes + + movq_m2r (*src2, mm3); // load 8 src2 bytes + movq_r2r (mm3, mm4); // copy 8 src2 bytes + + punpcklbw_r2r (mm0, mm3); // unpack low src2 bytes + punpckhbw_r2r (mm0, mm4); // unpack high src2 bytes + + paddw_r2r (mm3, mm1); // add lows + paddw_r2r (mm4, mm2); // add highs + + /* now have partials in mm1 and mm2 */ + + movq_m2r (*src3, mm3); // load 8 src3 bytes + movq_r2r (mm3, mm4); // copy 8 src3 bytes + + punpcklbw_r2r (mm0, mm3); // unpack low src3 bytes + punpckhbw_r2r (mm0, mm4); // unpack high src3 bytes + + paddw_r2r (mm3, mm1); // add lows + paddw_r2r (mm4, mm2); // add highs + + movq_m2r (*src4, mm5); // load 8 src4 bytes + movq_r2r (mm5, mm6); // copy 8 src4 bytes + + punpcklbw_r2r (mm0, mm5); // unpack low src4 bytes + punpckhbw_r2r (mm0, mm6); // unpack high src4 bytes + + paddw_r2r (mm5, mm1); // add lows + paddw_r2r (mm6, mm2); // add highs + + /* now have subtotal in mm1 and mm2 */ + + paddw_m2r (round4, mm1); + psraw_i2r (2, mm1); // /4 + paddw_m2r (round4, mm2); + psraw_i2r (2, mm2); // /4 + + packuswb_r2r (mm2, mm1); // pack (w/ saturation) + movq_r2m (mm1, *dest); // store result in dest +} + +static inline void mmx_interp_average_4_U8 (uint8_t * dest, + uint8_t * src1, uint8_t * src2, + uint8_t * src3, uint8_t * src4) +{ + /* *dest = (*dest + (*src1 + *src2 + *src3 + *src4 + 2)/ 4 + 1)/ 2; */ + + movq_m2r (*src1, mm1); // load 8 src1 bytes + movq_r2r (mm1, mm2); // copy 8 src1 bytes + + punpcklbw_r2r (mm0, mm1); // unpack low src1 bytes + punpckhbw_r2r (mm0, mm2); // unpack high src1 bytes + + movq_m2r (*src2, mm3); // load 8 src2 bytes + movq_r2r (mm3, mm4); // copy 8 src2 bytes + + punpcklbw_r2r (mm0, mm3); // unpack low src2 bytes + punpckhbw_r2r (mm0, mm4); // unpack high src2 bytes + + paddw_r2r (mm3, mm1); // add lows + paddw_r2r (mm4, mm2); // add highs + + /* now have partials in mm1 and mm2 */ + + movq_m2r (*src3, mm3); // load 8 src3 bytes + movq_r2r (mm3, mm4); // copy 8 src3 bytes + + punpcklbw_r2r (mm0, mm3); // unpack low src3 bytes + punpckhbw_r2r (mm0, mm4); // unpack high src3 bytes + + paddw_r2r (mm3, mm1); // add lows + paddw_r2r (mm4, mm2); // add highs + + movq_m2r (*src4, mm5); // load 8 src4 bytes + movq_r2r (mm5, mm6); // copy 8 src4 bytes + + punpcklbw_r2r (mm0, mm5); // unpack low src4 bytes + punpckhbw_r2r (mm0, mm6); // unpack high src4 bytes + + paddw_r2r (mm5, mm1); // add lows + paddw_r2r (mm6, mm2); // add highs + + paddw_m2r (round4, mm1); + psraw_i2r (2, mm1); // /4 + paddw_m2r (round4, mm2); + psraw_i2r (2, mm2); // /4 + + /* now have subtotal/4 in mm1 and mm2 */ + + movq_m2r (*dest, mm3); // load 8 dest bytes + movq_r2r (mm3, mm4); // copy 8 dest bytes + + punpcklbw_r2r (mm0, mm3); // unpack low dest bytes + punpckhbw_r2r (mm0, mm4); // unpack high dest bytes + + paddw_r2r (mm3, mm1); // add lows + paddw_r2r (mm4, mm2); // add highs + + paddw_m2r (round1, mm1); + psraw_i2r (1, mm1); // /2 + paddw_m2r (round1, mm2); + psraw_i2r (1, mm2); // /2 + + /* now have end value in mm1 and mm2 */ + + packuswb_r2r (mm2, mm1); // pack (w/ saturation) + movq_r2m (mm1,*dest); // store result in dest +} + +/*-----------------------------------------------------------------------*/ + +static inline void MC_avg_mmx (int width, int height, + uint8_t * dest, uint8_t * ref, int stride) +{ + mmx_zero_reg (); + + do { + mmx_average_2_U8 (dest, dest, ref); + + if (width == 16) + mmx_average_2_U8 (dest+8, dest+8, ref+8); + + dest += stride; + ref += stride; + } while (--height); +} + +static void MC_avg_16_mmx (uint8_t * dest, uint8_t * ref, + int stride, int height) +{ + MC_avg_mmx (16, height, dest, ref, stride); +} + +static void MC_avg_8_mmx (uint8_t * dest, uint8_t * ref, + int stride, int height) +{ + MC_avg_mmx (8, height, dest, ref, stride); +} + +/*-----------------------------------------------------------------------*/ + +static inline void MC_put_mmx (int width, int height, + uint8_t * dest, uint8_t * ref, int stride) +{ + mmx_zero_reg (); + + do { + movq_m2r (* ref, mm1); // load 8 ref bytes + movq_r2m (mm1,* dest); // store 8 bytes at curr + + if (width == 16) + { + movq_m2r (* (ref+8), mm1); // load 8 ref bytes + movq_r2m (mm1,* (dest+8)); // store 8 bytes at curr + } + + dest += stride; + ref += stride; + } while (--height); +} + +static void MC_put_16_mmx (uint8_t * dest, uint8_t * ref, + int stride, int height) +{ + MC_put_mmx (16, height, dest, ref, stride); +} + +static void MC_put_8_mmx (uint8_t * dest, uint8_t * ref, + int stride, int height) +{ + MC_put_mmx (8, height, dest, ref, stride); +} + +/*-----------------------------------------------------------------------*/ + +/* Half pixel interpolation in the x direction */ +static inline void MC_avg_x_mmx (int width, int height, + uint8_t * dest, uint8_t * ref, int stride) +{ + mmx_zero_reg (); + + do { + mmx_interp_average_2_U8 (dest, ref, ref+1); + + if (width == 16) + mmx_interp_average_2_U8 (dest+8, ref+8, ref+9); + + dest += stride; + ref += stride; + } while (--height); +} + +static void MC_avg_x16_mmx (uint8_t * dest, uint8_t * ref, + int stride, int height) +{ + MC_avg_x_mmx (16, height, dest, ref, stride); +} + +static void MC_avg_x8_mmx (uint8_t * dest, uint8_t * ref, + int stride, int height) +{ + MC_avg_x_mmx (8, height, dest, ref, stride); +} + +/*-----------------------------------------------------------------------*/ + +static inline void MC_put_x_mmx (int width, int height, + uint8_t * dest, uint8_t * ref, int stride) +{ + mmx_zero_reg (); + + do { + mmx_average_2_U8 (dest, ref, ref+1); + + if (width == 16) + mmx_average_2_U8 (dest+8, ref+8, ref+9); + + dest += stride; + ref += stride; + } while (--height); +} + +static void MC_put_x16_mmx (uint8_t * dest, uint8_t * ref, + int stride, int height) +{ + MC_put_x_mmx (16, height, dest, ref, stride); +} + +static void MC_put_x8_mmx (uint8_t * dest, uint8_t * ref, + int stride, int height) +{ + MC_put_x_mmx (8, height, dest, ref, stride); +} + +/*-----------------------------------------------------------------------*/ + +static inline void MC_avg_xy_mmx (int width, int height, + uint8_t * dest, uint8_t * ref, int stride) +{ + uint8_t * ref_next = ref+stride; + + mmx_zero_reg (); + + do { + mmx_interp_average_4_U8 (dest, ref, ref+1, ref_next, ref_next+1); + + if (width == 16) + mmx_interp_average_4_U8 (dest+8, ref+8, ref+9, + ref_next+8, ref_next+9); + + dest += stride; + ref += stride; + ref_next += stride; + } while (--height); +} + +static void MC_avg_xy16_mmx (uint8_t * dest, uint8_t * ref, + int stride, int height) +{ + MC_avg_xy_mmx (16, height, dest, ref, stride); +} + +static void MC_avg_xy8_mmx (uint8_t * dest, uint8_t * ref, + int stride, int height) +{ + MC_avg_xy_mmx (8, height, dest, ref, stride); +} + +/*-----------------------------------------------------------------------*/ + +static inline void MC_put_xy_mmx (int width, int height, + uint8_t * dest, uint8_t * ref, int stride) +{ + uint8_t * ref_next = ref+stride; + + mmx_zero_reg (); + + do { + mmx_average_4_U8 (dest, ref, ref+1, ref_next, ref_next+1); + + if (width == 16) + mmx_average_4_U8 (dest+8, ref+8, ref+9, ref_next+8, ref_next+9); + + dest += stride; + ref += stride; + ref_next += stride; + } while (--height); +} + +static void MC_put_xy16_mmx (uint8_t * dest, uint8_t * ref, + int stride, int height) +{ + MC_put_xy_mmx (16, height, dest, ref, stride); +} + +static void MC_put_xy8_mmx (uint8_t * dest, uint8_t * ref, + int stride, int height) +{ + MC_put_xy_mmx (8, height, dest, ref, stride); +} + +/*-----------------------------------------------------------------------*/ + +static inline void MC_avg_y_mmx (int width, int height, + uint8_t * dest, uint8_t * ref, int stride) +{ + uint8_t * ref_next = ref+stride; + + mmx_zero_reg (); + + do { + mmx_interp_average_2_U8 (dest, ref, ref_next); + + if (width == 16) + mmx_interp_average_2_U8 (dest+8, ref+8, ref_next+8); + + dest += stride; + ref += stride; + ref_next += stride; + } while (--height); +} + +static void MC_avg_y16_mmx (uint8_t * dest, uint8_t * ref, + int stride, int height) +{ + MC_avg_y_mmx (16, height, dest, ref, stride); +} + +static void MC_avg_y8_mmx (uint8_t * dest, uint8_t * ref, + int stride, int height) +{ + MC_avg_y_mmx (8, height, dest, ref, stride); +} + +/*-----------------------------------------------------------------------*/ + +static inline void MC_put_y_mmx (int width, int height, + uint8_t * dest, uint8_t * ref, int stride) +{ + uint8_t * ref_next = ref+stride; + + mmx_zero_reg (); + + do { + mmx_average_2_U8 (dest, ref, ref_next); + + if (width == 16) + mmx_average_2_U8 (dest+8, ref+8, ref_next+8); + + dest += stride; + ref += stride; + ref_next += stride; + } while (--height); +} + +static void MC_put_y16_mmx (uint8_t * dest, uint8_t * ref, + int stride, int height) +{ + MC_put_y_mmx (16, height, dest, ref, stride); +} + +static void MC_put_y8_mmx (uint8_t * dest, uint8_t * ref, + int stride, int height) +{ + MC_put_y_mmx (8, height, dest, ref, stride); +} + + +MOTION_COMP_EXTERN (mmx) + + + + + + + +/* CPU_MMXEXT/CPU_3DNOW adaptation layer */ + +#define pavg_r2r(src,dest) \ +do { \ + if (cpu == CPU_MMXEXT) \ + pavgb_r2r (src, dest); \ + else \ + pavgusb_r2r (src, dest); \ +} while (0) + +#define pavg_m2r(src,dest) \ +do { \ + if (cpu == CPU_MMXEXT) \ + pavgb_m2r (src, dest); \ + else \ + pavgusb_m2r (src, dest); \ +} while (0) + + +/* CPU_MMXEXT code */ + + +static inline void MC_put1_8 (int height, uint8_t * dest, uint8_t * ref, + int stride) +{ + do { + movq_m2r (*ref, mm0); + movq_r2m (mm0, *dest); + ref += stride; + dest += stride; + } while (--height); +} + +static inline void MC_put1_16 (int height, uint8_t * dest, uint8_t * ref, + int stride) +{ + do { + movq_m2r (*ref, mm0); + movq_m2r (*(ref+8), mm1); + ref += stride; + movq_r2m (mm0, *dest); + movq_r2m (mm1, *(dest+8)); + dest += stride; + } while (--height); +} + +static inline void MC_avg1_8 (int height, uint8_t * dest, uint8_t * ref, + int stride, int cpu) +{ + do { + movq_m2r (*ref, mm0); + pavg_m2r (*dest, mm0); + ref += stride; + movq_r2m (mm0, *dest); + dest += stride; + } while (--height); +} + +static inline void MC_avg1_16 (int height, uint8_t * dest, uint8_t * ref, + int stride, int cpu) +{ + do { + movq_m2r (*ref, mm0); + movq_m2r (*(ref+8), mm1); + pavg_m2r (*dest, mm0); + pavg_m2r (*(dest+8), mm1); + movq_r2m (mm0, *dest); + ref += stride; + movq_r2m (mm1, *(dest+8)); + dest += stride; + } while (--height); +} + +static inline void MC_put2_8 (int height, uint8_t * dest, uint8_t * ref, + int stride, int offset, int cpu) +{ + do { + movq_m2r (*ref, mm0); + pavg_m2r (*(ref+offset), mm0); + ref += stride; + movq_r2m (mm0, *dest); + dest += stride; + } while (--height); +} + +static inline void MC_put2_16 (int height, uint8_t * dest, uint8_t * ref, + int stride, int offset, int cpu) +{ + do { + movq_m2r (*ref, mm0); + movq_m2r (*(ref+8), mm1); + pavg_m2r (*(ref+offset), mm0); + pavg_m2r (*(ref+offset+8), mm1); + movq_r2m (mm0, *dest); + ref += stride; + movq_r2m (mm1, *(dest+8)); + dest += stride; + } while (--height); +} + +static inline void MC_avg2_8 (int height, uint8_t * dest, uint8_t * ref, + int stride, int offset, int cpu) +{ + do { + movq_m2r (*ref, mm0); + pavg_m2r (*(ref+offset), mm0); + pavg_m2r (*dest, mm0); + ref += stride; + movq_r2m (mm0, *dest); + dest += stride; + } while (--height); +} + +static inline void MC_avg2_16 (int height, uint8_t * dest, uint8_t * ref, + int stride, int offset, int cpu) +{ + do { + movq_m2r (*ref, mm0); + movq_m2r (*(ref+8), mm1); + pavg_m2r (*(ref+offset), mm0); + pavg_m2r (*(ref+offset+8), mm1); + pavg_m2r (*dest, mm0); + pavg_m2r (*(dest+8), mm1); + ref += stride; + movq_r2m (mm0, *dest); + movq_r2m (mm1, *(dest+8)); + dest += stride; + } while (--height); +} + +static mmx_t mask_one = {0x0101010101010101LL}; + +static inline void MC_put4_8 (int height, uint8_t * dest, uint8_t * ref, + int stride, int cpu) +{ + movq_m2r (*ref, mm0); + movq_m2r (*(ref+1), mm1); + movq_r2r (mm0, mm7); + pxor_r2r (mm1, mm7); + pavg_r2r (mm1, mm0); + ref += stride; + + do { + movq_m2r (*ref, mm2); + movq_r2r (mm0, mm5); + + movq_m2r (*(ref+1), mm3); + movq_r2r (mm2, mm6); + + pxor_r2r (mm3, mm6); + pavg_r2r (mm3, mm2); + + por_r2r (mm6, mm7); + pxor_r2r (mm2, mm5); + + pand_r2r (mm5, mm7); + pavg_r2r (mm2, mm0); + + pand_m2r (mask_one, mm7); + + psubusb_r2r (mm7, mm0); + + ref += stride; + movq_r2m (mm0, *dest); + dest += stride; + + movq_r2r (mm6, mm7); // unroll ! + movq_r2r (mm2, mm0); // unroll ! + } while (--height); +} + +static inline void MC_put4_16 (int height, uint8_t * dest, uint8_t * ref, + int stride, int cpu) +{ + do { + movq_m2r (*ref, mm0); + movq_m2r (*(ref+stride+1), mm1); + movq_r2r (mm0, mm7); + movq_m2r (*(ref+1), mm2); + pxor_r2r (mm1, mm7); + movq_m2r (*(ref+stride), mm3); + movq_r2r (mm2, mm6); + pxor_r2r (mm3, mm6); + pavg_r2r (mm1, mm0); + pavg_r2r (mm3, mm2); + por_r2r (mm6, mm7); + movq_r2r (mm0, mm6); + pxor_r2r (mm2, mm6); + pand_r2r (mm6, mm7); + pand_m2r (mask_one, mm7); + pavg_r2r (mm2, mm0); + psubusb_r2r (mm7, mm0); + movq_r2m (mm0, *dest); + + movq_m2r (*(ref+8), mm0); + movq_m2r (*(ref+stride+9), mm1); + movq_r2r (mm0, mm7); + movq_m2r (*(ref+9), mm2); + pxor_r2r (mm1, mm7); + movq_m2r (*(ref+stride+8), mm3); + movq_r2r (mm2, mm6); + pxor_r2r (mm3, mm6); + pavg_r2r (mm1, mm0); + pavg_r2r (mm3, mm2); + por_r2r (mm6, mm7); + movq_r2r (mm0, mm6); + pxor_r2r (mm2, mm6); + pand_r2r (mm6, mm7); + pand_m2r (mask_one, mm7); + pavg_r2r (mm2, mm0); + psubusb_r2r (mm7, mm0); + ref += stride; + movq_r2m (mm0, *(dest+8)); + dest += stride; + } while (--height); +} + +static inline void MC_avg4_8 (int height, uint8_t * dest, uint8_t * ref, + int stride, int cpu) +{ + do { + movq_m2r (*ref, mm0); + movq_m2r (*(ref+stride+1), mm1); + movq_r2r (mm0, mm7); + movq_m2r (*(ref+1), mm2); + pxor_r2r (mm1, mm7); + movq_m2r (*(ref+stride), mm3); + movq_r2r (mm2, mm6); + pxor_r2r (mm3, mm6); + pavg_r2r (mm1, mm0); + pavg_r2r (mm3, mm2); + por_r2r (mm6, mm7); + movq_r2r (mm0, mm6); + pxor_r2r (mm2, mm6); + pand_r2r (mm6, mm7); + pand_m2r (mask_one, mm7); + pavg_r2r (mm2, mm0); + psubusb_r2r (mm7, mm0); + movq_m2r (*dest, mm1); + pavg_r2r (mm1, mm0); + ref += stride; + movq_r2m (mm0, *dest); + dest += stride; + } while (--height); +} + +static inline void MC_avg4_16 (int height, uint8_t * dest, uint8_t * ref, + int stride, int cpu) +{ + do { + movq_m2r (*ref, mm0); + movq_m2r (*(ref+stride+1), mm1); + movq_r2r (mm0, mm7); + movq_m2r (*(ref+1), mm2); + pxor_r2r (mm1, mm7); + movq_m2r (*(ref+stride), mm3); + movq_r2r (mm2, mm6); + pxor_r2r (mm3, mm6); + pavg_r2r (mm1, mm0); + pavg_r2r (mm3, mm2); + por_r2r (mm6, mm7); + movq_r2r (mm0, mm6); + pxor_r2r (mm2, mm6); + pand_r2r (mm6, mm7); + pand_m2r (mask_one, mm7); + pavg_r2r (mm2, mm0); + psubusb_r2r (mm7, mm0); + movq_m2r (*dest, mm1); + pavg_r2r (mm1, mm0); + movq_r2m (mm0, *dest); + + movq_m2r (*(ref+8), mm0); + movq_m2r (*(ref+stride+9), mm1); + movq_r2r (mm0, mm7); + movq_m2r (*(ref+9), mm2); + pxor_r2r (mm1, mm7); + movq_m2r (*(ref+stride+8), mm3); + movq_r2r (mm2, mm6); + pxor_r2r (mm3, mm6); + pavg_r2r (mm1, mm0); + pavg_r2r (mm3, mm2); + por_r2r (mm6, mm7); + movq_r2r (mm0, mm6); + pxor_r2r (mm2, mm6); + pand_r2r (mm6, mm7); + pand_m2r (mask_one, mm7); + pavg_r2r (mm2, mm0); + psubusb_r2r (mm7, mm0); + movq_m2r (*(dest+8), mm1); + pavg_r2r (mm1, mm0); + ref += stride; + movq_r2m (mm0, *(dest+8)); + dest += stride; + } while (--height); +} + +static void MC_avg_16_mmxext (uint8_t * dest, uint8_t * ref, + int stride, int height) +{ + MC_avg1_16 (height, dest, ref, stride, CPU_MMXEXT); +} + +static void MC_avg_8_mmxext (uint8_t * dest, uint8_t * ref, + int stride, int height) +{ + MC_avg1_8 (height, dest, ref, stride, CPU_MMXEXT); +} + +static void MC_put_16_mmxext (uint8_t * dest, uint8_t * ref, + int stride, int height) +{ + MC_put1_16 (height, dest, ref, stride); +} + +static void MC_put_8_mmxext (uint8_t * dest, uint8_t * ref, + int stride, int height) +{ + MC_put1_8 (height, dest, ref, stride); +} + +static void MC_avg_x16_mmxext (uint8_t * dest, uint8_t * ref, + int stride, int height) +{ + MC_avg2_16 (height, dest, ref, stride, 1, CPU_MMXEXT); +} + +static void MC_avg_x8_mmxext (uint8_t * dest, uint8_t * ref, + int stride, int height) +{ + MC_avg2_8 (height, dest, ref, stride, 1, CPU_MMXEXT); +} + +static void MC_put_x16_mmxext (uint8_t * dest, uint8_t * ref, + int stride, int height) +{ + MC_put2_16 (height, dest, ref, stride, 1, CPU_MMXEXT); +} + +static void MC_put_x8_mmxext (uint8_t * dest, uint8_t * ref, + int stride, int height) +{ + MC_put2_8 (height, dest, ref, stride, 1, CPU_MMXEXT); +} + +static void MC_avg_y16_mmxext (uint8_t * dest, uint8_t * ref, + int stride, int height) +{ + MC_avg2_16 (height, dest, ref, stride, stride, CPU_MMXEXT); +} + +static void MC_avg_y8_mmxext (uint8_t * dest, uint8_t * ref, + int stride, int height) +{ + MC_avg2_8 (height, dest, ref, stride, stride, CPU_MMXEXT); +} + +static void MC_put_y16_mmxext (uint8_t * dest, uint8_t * ref, + int stride, int height) +{ + MC_put2_16 (height, dest, ref, stride, stride, CPU_MMXEXT); +} + +static void MC_put_y8_mmxext (uint8_t * dest, uint8_t * ref, + int stride, int height) +{ + MC_put2_8 (height, dest, ref, stride, stride, CPU_MMXEXT); +} + +static void MC_avg_xy16_mmxext (uint8_t * dest, uint8_t * ref, + int stride, int height) +{ + MC_avg4_16 (height, dest, ref, stride, CPU_MMXEXT); +} + +static void MC_avg_xy8_mmxext (uint8_t * dest, uint8_t * ref, + int stride, int height) +{ + MC_avg4_8 (height, dest, ref, stride, CPU_MMXEXT); +} + +static void MC_put_xy16_mmxext (uint8_t * dest, uint8_t * ref, + int stride, int height) +{ + MC_put4_16 (height, dest, ref, stride, CPU_MMXEXT); +} + +static void MC_put_xy8_mmxext (uint8_t * dest, uint8_t * ref, + int stride, int height) +{ + MC_put4_8 (height, dest, ref, stride, CPU_MMXEXT); +} + + +MOTION_COMP_EXTERN (mmxext) + + + +static void MC_avg_16_3dnow (uint8_t * dest, uint8_t * ref, + int stride, int height) +{ + MC_avg1_16 (height, dest, ref, stride, CPU_3DNOW); +} + +static void MC_avg_8_3dnow (uint8_t * dest, uint8_t * ref, + int stride, int height) +{ + MC_avg1_8 (height, dest, ref, stride, CPU_3DNOW); +} + +static void MC_put_16_3dnow (uint8_t * dest, uint8_t * ref, + int stride, int height) +{ + MC_put1_16 (height, dest, ref, stride); +} + +static void MC_put_8_3dnow (uint8_t * dest, uint8_t * ref, + int stride, int height) +{ + MC_put1_8 (height, dest, ref, stride); +} + +static void MC_avg_x16_3dnow (uint8_t * dest, uint8_t * ref, + int stride, int height) +{ + MC_avg2_16 (height, dest, ref, stride, 1, CPU_3DNOW); +} + +static void MC_avg_x8_3dnow (uint8_t * dest, uint8_t * ref, + int stride, int height) +{ + MC_avg2_8 (height, dest, ref, stride, 1, CPU_3DNOW); +} + +static void MC_put_x16_3dnow (uint8_t * dest, uint8_t * ref, + int stride, int height) +{ + MC_put2_16 (height, dest, ref, stride, 1, CPU_3DNOW); +} + +static void MC_put_x8_3dnow (uint8_t * dest, uint8_t * ref, + int stride, int height) +{ + MC_put2_8 (height, dest, ref, stride, 1, CPU_3DNOW); +} + +static void MC_avg_y16_3dnow (uint8_t * dest, uint8_t * ref, + int stride, int height) +{ + MC_avg2_16 (height, dest, ref, stride, stride, CPU_3DNOW); +} + +static void MC_avg_y8_3dnow (uint8_t * dest, uint8_t * ref, + int stride, int height) +{ + MC_avg2_8 (height, dest, ref, stride, stride, CPU_3DNOW); +} + +static void MC_put_y16_3dnow (uint8_t * dest, uint8_t * ref, + int stride, int height) +{ + MC_put2_16 (height, dest, ref, stride, stride, CPU_3DNOW); +} + +static void MC_put_y8_3dnow (uint8_t * dest, uint8_t * ref, + int stride, int height) +{ + MC_put2_8 (height, dest, ref, stride, stride, CPU_3DNOW); +} + +static void MC_avg_xy16_3dnow (uint8_t * dest, uint8_t * ref, + int stride, int height) +{ + MC_avg4_16 (height, dest, ref, stride, CPU_3DNOW); +} + +static void MC_avg_xy8_3dnow (uint8_t * dest, uint8_t * ref, + int stride, int height) +{ + MC_avg4_8 (height, dest, ref, stride, CPU_3DNOW); +} + +static void MC_put_xy16_3dnow (uint8_t * dest, uint8_t * ref, + int stride, int height) +{ + MC_put4_16 (height, dest, ref, stride, CPU_3DNOW); +} + +static void MC_put_xy8_3dnow (uint8_t * dest, uint8_t * ref, + int stride, int height) +{ + MC_put4_8 (height, dest, ref, stride, CPU_3DNOW); +} + + +MOTION_COMP_EXTERN (3dnow) + +#endif diff --git a/src/libmpeg2/mpeg2.h b/src/libmpeg2/mpeg2.h new file mode 100644 index 000000000..c83a61e7e --- /dev/null +++ b/src/libmpeg2/mpeg2.h @@ -0,0 +1,67 @@ +/* + * mpeg2.h + * Copyright (C) 1999-2001 Aaron Holtzman <aholtzma@ess.engr.uvic.ca> + * + * This file is part of mpeg2dec, a free MPEG-2 video stream decoder. + * + * mpeg2dec is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * mpeg2dec is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +/* Structure for the mpeg2dec decoder */ + +typedef struct mpeg2dec_s { + vo_instance_t * output; + + /* this is where we keep the state of the decoder */ + struct picture_s * picture; + + uint32_t shift; + int is_display_initialized; + int is_sequence_needed; + int drop_flag; + int drop_frame; + int in_slice; + + /* the maximum chunk size is determined by vbv_buffer_size */ + /* which is 224K for MP@ML streams. */ + /* (we make no pretenses of decoding anything more than that) */ + /* allocated in init - gcc has problems allocating such big structures */ + uint8_t * chunk_buffer; + /* pointer to current position in chunk_buffer */ + uint8_t * chunk_ptr; + /* last start code ? */ + uint8_t code; + + uint32_t pts; + + /* ONLY for 0.2.0 release - will not stay there later */ + int frame_rate_code; +} mpeg2dec_t ; + + + + + +/* initialize mpegdec with a opaque user pointer */ +void mpeg2_init (mpeg2dec_t * mpeg2dec, uint32_t mm_accel, + vo_instance_t * output); + +/* destroy everything which was allocated, shutdown the output */ +void mpeg2_close (mpeg2dec_t * mpeg2dec); + +int mpeg2_decode_data (mpeg2dec_t * mpeg2dec, + uint8_t * data_start, uint8_t * data_end, uint32_t pts); + +void mpeg2_drop (mpeg2dec_t * mpeg2dec, int flag); diff --git a/src/libmpeg2/mpeg2_internal.h b/src/libmpeg2/mpeg2_internal.h new file mode 100644 index 000000000..d3a92eb74 --- /dev/null +++ b/src/libmpeg2/mpeg2_internal.h @@ -0,0 +1,194 @@ +/* + * mpeg2_internal.h + * Copyright (C) 1999-2001 Aaron Holtzman <aholtzma@ess.engr.uvic.ca> + * + * This file is part of mpeg2dec, a free MPEG-2 video stream decoder. + * + * mpeg2dec is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * mpeg2dec is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +/* macroblock modes */ +#define MACROBLOCK_INTRA 1 +#define MACROBLOCK_PATTERN 2 +#define MACROBLOCK_MOTION_BACKWARD 4 +#define MACROBLOCK_MOTION_FORWARD 8 +#define MACROBLOCK_QUANT 16 +#define DCT_TYPE_INTERLACED 32 +/* motion_type */ +#define MOTION_TYPE_MASK (3*64) +#define MOTION_TYPE_BASE 64 +#define MC_FIELD (1*64) +#define MC_FRAME (2*64) +#define MC_16X8 (2*64) +#define MC_DMV (3*64) + +/* picture structure */ +#define TOP_FIELD 1 +#define BOTTOM_FIELD 2 +#define FRAME_PICTURE 3 + +/* picture coding type */ +#define I_TYPE 1 +#define P_TYPE 2 +#define B_TYPE 3 +#define D_TYPE 4 + +typedef struct motion_s { + uint8_t * ref[2][3]; + int pmv[2][2]; + int f_code[2]; +} motion_t; + +typedef struct picture_s { + /* first, state that carries information from one macroblock to the */ + /* next inside a slice, and is never used outside of slice_process() */ + + /* DCT coefficients - should be kept aligned ! */ + int16_t DCTblock[64]; + + /* bit parsing stuff */ + uint32_t bitstream_buf; /* current 32 bit working set of buffer */ + int bitstream_bits; /* used bits in working set */ + uint8_t * bitstream_ptr; /* buffer with stream data */ + + /* Motion vectors */ + /* The f_ and b_ correspond to the forward and backward motion */ + /* predictors */ + motion_t b_motion; + motion_t f_motion; + + /* predictor for DC coefficients in intra blocks */ + int16_t dc_dct_pred[3]; + + int quantizer_scale; /* remove */ + int current_field; /* remove */ + int v_offset; /* remove */ + + + /* now non-slice-specific information */ + + /* sequence header stuff */ + uint8_t intra_quantizer_matrix [64]; + uint8_t non_intra_quantizer_matrix [64]; + + /* The width and height of the picture snapped to macroblock units */ + int coded_picture_width; + int coded_picture_height; + + /* picture header stuff */ + + /* what type of picture this is (I, P, B, D) */ + int picture_coding_type; + + /* picture coding extension stuff */ + + /* quantization factor for intra dc coefficients */ + int intra_dc_precision; + /* top/bottom/both fields */ + int picture_structure; + /* bool to indicate all predictions are frame based */ + int frame_pred_frame_dct; + /* bool to indicate whether intra blocks have motion vectors */ + /* (for concealment) */ + int concealment_motion_vectors; + /* bit to indicate which quantization table to use */ + int q_scale_type; + /* bool to use different vlc tables */ + int intra_vlc_format; + /* used for DMV MC */ + int top_field_first; + + /* stuff derived from bitstream */ + + /* pointer to the zigzag scan we're supposed to be using */ + uint8_t * scan; + + struct vo_frame_s * current_frame; + struct vo_frame_s * forward_reference_frame; + struct vo_frame_s * backward_reference_frame; + + int second_field; + + int mpeg1; + + /* these things are not needed by the decoder */ + /* this is a temporary interface, we will build a better one later. */ + int aspect_ratio_information; + int frame_rate_code; + int progressive_sequence; + int repeat_first_field; + int progressive_frame; + int bitrate; +} picture_t; + +typedef struct mpeg2_config_s { + /* Bit flags that enable various things */ + uint32_t flags; +} mpeg2_config_t; + +/* The only global variable, */ +/* the config struct */ +extern mpeg2_config_t config; + + + +/* slice.c */ +void header_state_init (picture_t * picture); +int header_process_picture_header (picture_t * picture, uint8_t * buffer); +int header_process_sequence_header (picture_t * picture, uint8_t * buffer); +int header_process_extension (picture_t * picture, uint8_t * buffer); + +/* idct.c */ +void idct_init (void); + +/* idct_mlib.c */ +void idct_block_copy_mlib (int16_t * block, uint8_t * dest, int stride); +void idct_block_add_mlib (int16_t * block, uint8_t * dest, int stride); + +/* idct_mmx.c */ +void idct_block_copy_mmxext (int16_t *block, uint8_t * dest, int stride); +void idct_block_add_mmxext (int16_t *block, uint8_t * dest, int stride); +void idct_block_copy_mmx (int16_t *block, uint8_t * dest, int stride); +void idct_block_add_mmx (int16_t *block, uint8_t * dest, int stride); +void idct_mmx_init (void); + +/* motion_comp.c */ +void motion_comp_init (void); + +typedef struct mc_functions_s +{ + void (* put [8]) (uint8_t *dst, uint8_t *, int32_t, int32_t); + void (* avg [8]) (uint8_t *dst, uint8_t *, int32_t, int32_t); +} mc_functions_t; + +#define MOTION_COMP_EXTERN(x) mc_functions_t mc_functions_##x = \ +{ \ + {MC_put_16_##x, MC_put_x16_##x, MC_put_y16_##x, MC_put_xy16_##x, \ + MC_put_8_##x, MC_put_x8_##x, MC_put_y8_##x, MC_put_xy8_##x}, \ + {MC_avg_16_##x, MC_avg_x16_##x, MC_avg_y16_##x, MC_avg_xy16_##x, \ + MC_avg_8_##x, MC_avg_x8_##x, MC_avg_y8_##x, MC_avg_xy8_##x} \ +}; + +extern mc_functions_t mc_functions_c; +extern mc_functions_t mc_functions_mmx; +extern mc_functions_t mc_functions_mmxext; +extern mc_functions_t mc_functions_3dnow; +extern mc_functions_t mc_functions_mlib; + +/* slice.c */ +int slice_process (picture_t *picture, uint8_t code, uint8_t * buffer); + +/* stats.c */ +void stats_header (uint8_t code, uint8_t * buffer); diff --git a/src/libmpeg2/slice.c b/src/libmpeg2/slice.c new file mode 100644 index 000000000..727adb3d9 --- /dev/null +++ b/src/libmpeg2/slice.c @@ -0,0 +1,1799 @@ +/* + * slice.c + * Copyright (C) 1999-2001 Aaron Holtzman <aholtzma@ess.engr.uvic.ca> + * + * This file is part of mpeg2dec, a free MPEG-2 video stream decoder. + * + * mpeg2dec is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * mpeg2dec is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +#include "config.h" + +#include <string.h> +#include <inttypes.h> + +#include "video_out.h" +#include "mpeg2_internal.h" +#include "attributes.h" + +extern mc_functions_t mc_functions; +extern void (* idct_block_copy) (int16_t * block, uint8_t * dest, int stride); +extern void (* idct_block_add) (int16_t * block, uint8_t * dest, int stride); + +#include "vlc.h" + +static int non_linear_quantizer_scale [] = { + 0, 1, 2, 3, 4, 5, 6, 7, + 8, 10, 12, 14, 16, 18, 20, 22, + 24, 28, 32, 36, 40, 44, 48, 52, + 56, 64, 72, 80, 88, 96, 104, 112 +}; + +static inline int get_macroblock_modes (picture_t * picture) +{ +#define bit_buf (picture->bitstream_buf) +#define bits (picture->bitstream_bits) +#define bit_ptr (picture->bitstream_ptr) + int macroblock_modes; + MBtab * tab; + + switch (picture->picture_coding_type) { + case I_TYPE: + + tab = MB_I + UBITS (bit_buf, 1); + DUMPBITS (bit_buf, bits, tab->len); + macroblock_modes = tab->modes; + + if ((! (picture->frame_pred_frame_dct)) && + (picture->picture_structure == FRAME_PICTURE)) { + macroblock_modes |= UBITS (bit_buf, 1) * DCT_TYPE_INTERLACED; + DUMPBITS (bit_buf, bits, 1); + } + + return macroblock_modes; + + case P_TYPE: + + tab = MB_P + UBITS (bit_buf, 5); + DUMPBITS (bit_buf, bits, tab->len); + macroblock_modes = tab->modes; + + if (picture->picture_structure != FRAME_PICTURE) { + if (macroblock_modes & MACROBLOCK_MOTION_FORWARD) { + macroblock_modes |= UBITS (bit_buf, 2) * MOTION_TYPE_BASE; + DUMPBITS (bit_buf, bits, 2); + } + return macroblock_modes; + } else if (picture->frame_pred_frame_dct) { + if (macroblock_modes & MACROBLOCK_MOTION_FORWARD) + macroblock_modes |= MC_FRAME; + return macroblock_modes; + } else { + if (macroblock_modes & MACROBLOCK_MOTION_FORWARD) { + macroblock_modes |= UBITS (bit_buf, 2) * MOTION_TYPE_BASE; + DUMPBITS (bit_buf, bits, 2); + } + if (macroblock_modes & (MACROBLOCK_INTRA | MACROBLOCK_PATTERN)) { + macroblock_modes |= UBITS (bit_buf, 1) * DCT_TYPE_INTERLACED; + DUMPBITS (bit_buf, bits, 1); + } + return macroblock_modes; + } + + case B_TYPE: + + tab = MB_B + UBITS (bit_buf, 6); + DUMPBITS (bit_buf, bits, tab->len); + macroblock_modes = tab->modes; + + if (picture->picture_structure != FRAME_PICTURE) { + if (! (macroblock_modes & MACROBLOCK_INTRA)) { + macroblock_modes |= UBITS (bit_buf, 2) * MOTION_TYPE_BASE; + DUMPBITS (bit_buf, bits, 2); + } + return macroblock_modes; + } else if (picture->frame_pred_frame_dct) { + /* if (! (macroblock_modes & MACROBLOCK_INTRA)) */ + macroblock_modes |= MC_FRAME; + return macroblock_modes; + } else { + if (macroblock_modes & MACROBLOCK_INTRA) + goto intra; + macroblock_modes |= UBITS (bit_buf, 2) * MOTION_TYPE_BASE; + DUMPBITS (bit_buf, bits, 2); + if (macroblock_modes & (MACROBLOCK_INTRA | MACROBLOCK_PATTERN)) { + intra: + macroblock_modes |= UBITS (bit_buf, 1) * DCT_TYPE_INTERLACED; + DUMPBITS (bit_buf, bits, 1); + } + return macroblock_modes; + } + + case D_TYPE: + + DUMPBITS (bit_buf, bits, 1); + return MACROBLOCK_INTRA; + + default: + return 0; + } +#undef bit_buf +#undef bits +#undef bit_ptr +} + +static inline int get_quantizer_scale (picture_t * picture) +{ +#define bit_buf (picture->bitstream_buf) +#define bits (picture->bitstream_bits) +#define bit_ptr (picture->bitstream_ptr) + + int quantizer_scale_code; + + quantizer_scale_code = UBITS (bit_buf, 5); + DUMPBITS (bit_buf, bits, 5); + + if (picture->q_scale_type) + return non_linear_quantizer_scale [quantizer_scale_code]; + else + return quantizer_scale_code << 1; +#undef bit_buf +#undef bits +#undef bit_ptr +} + +static inline int get_motion_delta (picture_t * picture, int f_code) +{ +#define bit_buf (picture->bitstream_buf) +#define bits (picture->bitstream_bits) +#define bit_ptr (picture->bitstream_ptr) + + int delta; + int sign; + MVtab * tab; + + if (bit_buf & 0x80000000) { + DUMPBITS (bit_buf, bits, 1); + return 0; + } else if (bit_buf >= 0x0c000000) { + + tab = MV_4 + UBITS (bit_buf, 4); + delta = (tab->delta << f_code) + 1; + bits += tab->len + f_code + 1; + bit_buf <<= tab->len; + + sign = SBITS (bit_buf, 1); + bit_buf <<= 1; + + if (f_code) + delta += UBITS (bit_buf, f_code); + bit_buf <<= f_code; + + return (delta ^ sign) - sign; + + } else { + + tab = MV_10 + UBITS (bit_buf, 10); + delta = (tab->delta << f_code) + 1; + bits += tab->len + 1; + bit_buf <<= tab->len; + + sign = SBITS (bit_buf, 1); + bit_buf <<= 1; + + if (f_code) { + NEEDBITS (bit_buf, bits, bit_ptr); + delta += UBITS (bit_buf, f_code); + DUMPBITS (bit_buf, bits, f_code); + } + + return (delta ^ sign) - sign; + + } +#undef bit_buf +#undef bits +#undef bit_ptr +} + +static inline int bound_motion_vector (int vector, int f_code) +{ +#if 1 + int limit; + + limit = 16 << f_code; + + if (vector >= limit) + return vector - 2*limit; + else if (vector < -limit) + return vector + 2*limit; + else return vector; +#else + return (vector << (27 - f_code)) >> (27 - f_code); +#endif +} + +static inline int get_dmv (picture_t * picture) +{ +#define bit_buf (picture->bitstream_buf) +#define bits (picture->bitstream_bits) +#define bit_ptr (picture->bitstream_ptr) + + DMVtab * tab; + + tab = DMV_2 + UBITS (bit_buf, 2); + DUMPBITS (bit_buf, bits, tab->len); + return tab->dmv; +#undef bit_buf +#undef bits +#undef bit_ptr +} + +static inline int get_coded_block_pattern (picture_t * picture) +{ +#define bit_buf (picture->bitstream_buf) +#define bits (picture->bitstream_bits) +#define bit_ptr (picture->bitstream_ptr) + + CBPtab * tab; + + NEEDBITS (bit_buf, bits, bit_ptr); + + if (bit_buf >= 0x20000000) { + + tab = CBP_7 - 16 + UBITS (bit_buf, 7); + DUMPBITS (bit_buf, bits, tab->len); + return tab->cbp; + + } else { + + tab = CBP_9 + UBITS (bit_buf, 9); + DUMPBITS (bit_buf, bits, tab->len); + return tab->cbp; + } + +#undef bit_buf +#undef bits +#undef bit_ptr +} + +static inline int get_luma_dc_dct_diff (picture_t * picture) +{ +#define bit_buf (picture->bitstream_buf) +#define bits (picture->bitstream_bits) +#define bit_ptr (picture->bitstream_ptr) + DCtab * tab; + int size; + int dc_diff; + + if (bit_buf < 0xf8000000) { + tab = DC_lum_5 + UBITS (bit_buf, 5); + size = tab->size; + if (size) { + bits += tab->len + size; + bit_buf <<= tab->len; + dc_diff = + UBITS (bit_buf, size) - UBITS (SBITS (~bit_buf, 1), size); + bit_buf <<= size; + return dc_diff; + } else { + DUMPBITS (bit_buf, bits, 3); + return 0; + } + } else { + tab = DC_long - 0x1e0 + UBITS (bit_buf, 9); + size = tab->size; + DUMPBITS (bit_buf, bits, tab->len); + NEEDBITS (bit_buf, bits, bit_ptr); + dc_diff = UBITS (bit_buf, size) - UBITS (SBITS (~bit_buf, 1), size); + DUMPBITS (bit_buf, bits, size); + return dc_diff; + } +#undef bit_buf +#undef bits +#undef bit_ptr +} + +static inline int get_chroma_dc_dct_diff (picture_t * picture) +{ +#define bit_buf (picture->bitstream_buf) +#define bits (picture->bitstream_bits) +#define bit_ptr (picture->bitstream_ptr) + DCtab * tab; + int size; + int dc_diff; + + if (bit_buf < 0xf8000000) { + tab = DC_chrom_5 + UBITS (bit_buf, 5); + size = tab->size; + if (size) { + bits += tab->len + size; + bit_buf <<= tab->len; + dc_diff = + UBITS (bit_buf, size) - UBITS (SBITS (~bit_buf, 1), size); + bit_buf <<= size; + return dc_diff; + } else { + DUMPBITS (bit_buf, bits, 2); + return 0; + } + } else { + tab = DC_long - 0x3e0 + UBITS (bit_buf, 10); + size = tab->size; + DUMPBITS (bit_buf, bits, tab->len + 1); + NEEDBITS (bit_buf, bits, bit_ptr); + dc_diff = UBITS (bit_buf, size) - UBITS (SBITS (~bit_buf, 1), size); + DUMPBITS (bit_buf, bits, size); + return dc_diff; + } +#undef bit_buf +#undef bits +#undef bit_ptr +} + +#define SATURATE(val) \ +do { \ + if ((uint32_t)(val + 2048) > 4095) \ + val = (val > 0) ? 2047 : -2048; \ +} while (0) + +static void get_intra_block_B14 (picture_t * picture) +{ + int i; + int j; + int val; + uint8_t * scan = picture->scan; + uint8_t * quant_matrix = picture->intra_quantizer_matrix; + int quantizer_scale = picture->quantizer_scale; + int mismatch; + DCTtab * tab; + uint32_t bit_buf; + int bits; + uint8_t * bit_ptr; + int16_t * dest; + + dest = picture->DCTblock; + i = 0; + mismatch = ~dest[0]; + + bit_buf = picture->bitstream_buf; + bits = picture->bitstream_bits; + bit_ptr = picture->bitstream_ptr; + + NEEDBITS (bit_buf, bits, bit_ptr); + + while (1) { + if (bit_buf >= 0x28000000) { + + tab = DCT_B14AC_5 - 5 + UBITS (bit_buf, 5); + + i += tab->run; + if (i >= 64) + break; /* end of block */ + + normal_code: + j = scan[i]; + bit_buf <<= tab->len; + bits += tab->len + 1; + val = (tab->level * quantizer_scale * quant_matrix[j]) >> 4; + + /* if (bitstream_get (1)) val = -val; */ + val = (val ^ SBITS (bit_buf, 1)) - SBITS (bit_buf, 1); + + SATURATE (val); + dest[j] = val; + mismatch ^= val; + + bit_buf <<= 1; + NEEDBITS (bit_buf, bits, bit_ptr); + + continue; + + } else if (bit_buf >= 0x04000000) { + + tab = DCT_B14_8 - 4 + UBITS (bit_buf, 8); + + i += tab->run; + if (i < 64) + goto normal_code; + + /* escape code */ + + i += UBITS (bit_buf << 6, 6) - 64; + if (i >= 64) + break; /* illegal, check needed to avoid buffer overflow */ + + j = scan[i]; + + DUMPBITS (bit_buf, bits, 12); + NEEDBITS (bit_buf, bits, bit_ptr); + val = (SBITS (bit_buf, 12) * + quantizer_scale * quant_matrix[j]) / 16; + + SATURATE (val); + dest[j] = val; + mismatch ^= val; + + DUMPBITS (bit_buf, bits, 12); + NEEDBITS (bit_buf, bits, bit_ptr); + + continue; + + } else if (bit_buf >= 0x02000000) { + tab = DCT_B14_10 - 8 + UBITS (bit_buf, 10); + i += tab->run; + if (i < 64) + goto normal_code; + } else if (bit_buf >= 0x00800000) { + tab = DCT_13 - 16 + UBITS (bit_buf, 13); + i += tab->run; + if (i < 64) + goto normal_code; + } else if (bit_buf >= 0x00200000) { + tab = DCT_15 - 16 + UBITS (bit_buf, 15); + i += tab->run; + if (i < 64) + goto normal_code; + } else { + tab = DCT_16 + UBITS (bit_buf, 16); + bit_buf <<= 16; + GETWORD (bit_buf, bits + 16, bit_ptr); + i += tab->run; + if (i < 64) + goto normal_code; + } + break; /* illegal, check needed to avoid buffer overflow */ + } + dest[63] ^= mismatch & 1; + DUMPBITS (bit_buf, bits, 2); /* dump end of block code */ + picture->bitstream_buf = bit_buf; + picture->bitstream_bits = bits; + picture->bitstream_ptr = bit_ptr; +} + +static void get_intra_block_B15 (picture_t * picture) +{ + int i; + int j; + int val; + uint8_t * scan = picture->scan; + uint8_t * quant_matrix = picture->intra_quantizer_matrix; + int quantizer_scale = picture->quantizer_scale; + int mismatch; + DCTtab * tab; + uint32_t bit_buf; + int bits; + uint8_t * bit_ptr; + int16_t * dest; + + dest = picture->DCTblock; + i = 0; + mismatch = ~dest[0]; + + bit_buf = picture->bitstream_buf; + bits = picture->bitstream_bits; + bit_ptr = picture->bitstream_ptr; + + NEEDBITS (bit_buf, bits, bit_ptr); + + while (1) { + if (bit_buf >= 0x04000000) { + + tab = DCT_B15_8 - 4 + UBITS (bit_buf, 8); + + i += tab->run; + if (i < 64) { + + normal_code: + j = scan[i]; + bit_buf <<= tab->len; + bits += tab->len + 1; + val = (tab->level * quantizer_scale * quant_matrix[j]) >> 4; + + /* if (bitstream_get (1)) val = -val; */ + val = (val ^ SBITS (bit_buf, 1)) - SBITS (bit_buf, 1); + + SATURATE (val); + dest[j] = val; + mismatch ^= val; + + bit_buf <<= 1; + NEEDBITS (bit_buf, bits, bit_ptr); + + continue; + + } else { + + /* end of block. I commented out this code because if we */ + /* dont exit here we will still exit at the later test :) */ + + /* if (i >= 128) break; */ /* end of block */ + + /* escape code */ + + i += UBITS (bit_buf << 6, 6) - 64; + if (i >= 64) + break; /* illegal, check against buffer overflow */ + + j = scan[i]; + + DUMPBITS (bit_buf, bits, 12); + NEEDBITS (bit_buf, bits, bit_ptr); + val = (SBITS (bit_buf, 12) * + quantizer_scale * quant_matrix[j]) / 16; + + SATURATE (val); + dest[j] = val; + mismatch ^= val; + + DUMPBITS (bit_buf, bits, 12); + NEEDBITS (bit_buf, bits, bit_ptr); + + continue; + + } + } else if (bit_buf >= 0x02000000) { + tab = DCT_B15_10 - 8 + UBITS (bit_buf, 10); + i += tab->run; + if (i < 64) + goto normal_code; + } else if (bit_buf >= 0x00800000) { + tab = DCT_13 - 16 + UBITS (bit_buf, 13); + i += tab->run; + if (i < 64) + goto normal_code; + } else if (bit_buf >= 0x00200000) { + tab = DCT_15 - 16 + UBITS (bit_buf, 15); + i += tab->run; + if (i < 64) + goto normal_code; + } else { + tab = DCT_16 + UBITS (bit_buf, 16); + bit_buf <<= 16; + GETWORD (bit_buf, bits + 16, bit_ptr); + i += tab->run; + if (i < 64) + goto normal_code; + } + break; /* illegal, check needed to avoid buffer overflow */ + } + dest[63] ^= mismatch & 1; + DUMPBITS (bit_buf, bits, 4); /* dump end of block code */ + picture->bitstream_buf = bit_buf; + picture->bitstream_bits = bits; + picture->bitstream_ptr = bit_ptr; +} + +static void get_non_intra_block (picture_t * picture) +{ + int i; + int j; + int val; + uint8_t * scan = picture->scan; + uint8_t * quant_matrix = picture->non_intra_quantizer_matrix; + int quantizer_scale = picture->quantizer_scale; + int mismatch; + DCTtab * tab; + uint32_t bit_buf; + int bits; + uint8_t * bit_ptr; + int16_t * dest; + + i = -1; + mismatch = 1; + dest = picture->DCTblock; + + bit_buf = picture->bitstream_buf; + bits = picture->bitstream_bits; + bit_ptr = picture->bitstream_ptr; + + NEEDBITS (bit_buf, bits, bit_ptr); + if (bit_buf >= 0x28000000) { + tab = DCT_B14DC_5 - 5 + UBITS (bit_buf, 5); + goto entry_1; + } else + goto entry_2; + + while (1) { + if (bit_buf >= 0x28000000) { + + tab = DCT_B14AC_5 - 5 + UBITS (bit_buf, 5); + + entry_1: + i += tab->run; + if (i >= 64) + break; /* end of block */ + + normal_code: + j = scan[i]; + bit_buf <<= tab->len; + bits += tab->len + 1; + val = ((2*tab->level+1) * quantizer_scale * quant_matrix[j]) >> 5; + + /* if (bitstream_get (1)) val = -val; */ + val = (val ^ SBITS (bit_buf, 1)) - SBITS (bit_buf, 1); + + SATURATE (val); + dest[j] = val; + mismatch ^= val; + + bit_buf <<= 1; + NEEDBITS (bit_buf, bits, bit_ptr); + + continue; + + } + + entry_2: + if (bit_buf >= 0x04000000) { + + tab = DCT_B14_8 - 4 + UBITS (bit_buf, 8); + + i += tab->run; + if (i < 64) + goto normal_code; + + /* escape code */ + + i += UBITS (bit_buf << 6, 6) - 64; + if (i >= 64) + break; /* illegal, check needed to avoid buffer overflow */ + + j = scan[i]; + + DUMPBITS (bit_buf, bits, 12); + NEEDBITS (bit_buf, bits, bit_ptr); + val = 2 * (SBITS (bit_buf, 12) + SBITS (bit_buf, 1)) + 1; + val = (val * quantizer_scale * quant_matrix[j]) / 32; + + SATURATE (val); + dest[j] = val; + mismatch ^= val; + + DUMPBITS (bit_buf, bits, 12); + NEEDBITS (bit_buf, bits, bit_ptr); + + continue; + + } else if (bit_buf >= 0x02000000) { + tab = DCT_B14_10 - 8 + UBITS (bit_buf, 10); + i += tab->run; + if (i < 64) + goto normal_code; + } else if (bit_buf >= 0x00800000) { + tab = DCT_13 - 16 + UBITS (bit_buf, 13); + i += tab->run; + if (i < 64) + goto normal_code; + } else if (bit_buf >= 0x00200000) { + tab = DCT_15 - 16 + UBITS (bit_buf, 15); + i += tab->run; + if (i < 64) + goto normal_code; + } else { + tab = DCT_16 + UBITS (bit_buf, 16); + bit_buf <<= 16; + GETWORD (bit_buf, bits + 16, bit_ptr); + i += tab->run; + if (i < 64) + goto normal_code; + } + break; /* illegal, check needed to avoid buffer overflow */ + } + dest[63] ^= mismatch & 1; + DUMPBITS (bit_buf, bits, 2); /* dump end of block code */ + picture->bitstream_buf = bit_buf; + picture->bitstream_bits = bits; + picture->bitstream_ptr = bit_ptr; +} + +static void get_mpeg1_intra_block (picture_t * picture) +{ + int i; + int j; + int val; + uint8_t * scan = picture->scan; + uint8_t * quant_matrix = picture->intra_quantizer_matrix; + int quantizer_scale = picture->quantizer_scale; + DCTtab * tab; + uint32_t bit_buf; + int bits; + uint8_t * bit_ptr; + int16_t * dest; + + i = 0; + dest = picture->DCTblock; + + bit_buf = picture->bitstream_buf; + bits = picture->bitstream_bits; + bit_ptr = picture->bitstream_ptr; + + NEEDBITS (bit_buf, bits, bit_ptr); + + while (1) { + if (bit_buf >= 0x28000000) { + + tab = DCT_B14AC_5 - 5 + UBITS (bit_buf, 5); + + i += tab->run; + if (i >= 64) + break; /* end of block */ + + normal_code: + j = scan[i]; + bit_buf <<= tab->len; + bits += tab->len + 1; + val = (tab->level * quantizer_scale * quant_matrix[j]) >> 4; + + /* oddification */ + val = (val - 1) | 1; + + /* if (bitstream_get (1)) val = -val; */ + val = (val ^ SBITS (bit_buf, 1)) - SBITS (bit_buf, 1); + + SATURATE (val); + dest[j] = val; + + bit_buf <<= 1; + NEEDBITS (bit_buf, bits, bit_ptr); + + continue; + + } else if (bit_buf >= 0x04000000) { + + tab = DCT_B14_8 - 4 + UBITS (bit_buf, 8); + + i += tab->run; + if (i < 64) + goto normal_code; + + /* escape code */ + + i += UBITS (bit_buf << 6, 6) - 64; + if (i >= 64) + break; /* illegal, check needed to avoid buffer overflow */ + + j = scan[i]; + + DUMPBITS (bit_buf, bits, 12); + NEEDBITS (bit_buf, bits, bit_ptr); + val = SBITS (bit_buf, 8); + if (! (val & 0x7f)) { + DUMPBITS (bit_buf, bits, 8); + val = UBITS (bit_buf, 8) + 2 * val; + } + val = (val * quantizer_scale * quant_matrix[j]) / 16; + + /* oddification */ + val = (val + ~SBITS (val, 1)) | 1; + + SATURATE (val); + dest[j] = val; + + DUMPBITS (bit_buf, bits, 8); + NEEDBITS (bit_buf, bits, bit_ptr); + + continue; + + } else if (bit_buf >= 0x02000000) { + tab = DCT_B14_10 - 8 + UBITS (bit_buf, 10); + i += tab->run; + if (i < 64) + goto normal_code; + } else if (bit_buf >= 0x00800000) { + tab = DCT_13 - 16 + UBITS (bit_buf, 13); + i += tab->run; + if (i < 64) + goto normal_code; + } else if (bit_buf >= 0x00200000) { + tab = DCT_15 - 16 + UBITS (bit_buf, 15); + i += tab->run; + if (i < 64) + goto normal_code; + } else { + tab = DCT_16 + UBITS (bit_buf, 16); + bit_buf <<= 16; + GETWORD (bit_buf, bits + 16, bit_ptr); + i += tab->run; + if (i < 64) + goto normal_code; + } + break; /* illegal, check needed to avoid buffer overflow */ + } + DUMPBITS (bit_buf, bits, 2); /* dump end of block code */ + picture->bitstream_buf = bit_buf; + picture->bitstream_bits = bits; + picture->bitstream_ptr = bit_ptr; +} + +static void get_mpeg1_non_intra_block (picture_t * picture) +{ + int i; + int j; + int val; + uint8_t * scan = picture->scan; + uint8_t * quant_matrix = picture->non_intra_quantizer_matrix; + int quantizer_scale = picture->quantizer_scale; + DCTtab * tab; + uint32_t bit_buf; + int bits; + uint8_t * bit_ptr; + int16_t * dest; + + i = -1; + dest = picture->DCTblock; + + bit_buf = picture->bitstream_buf; + bits = picture->bitstream_bits; + bit_ptr = picture->bitstream_ptr; + + NEEDBITS (bit_buf, bits, bit_ptr); + if (bit_buf >= 0x28000000) { + tab = DCT_B14DC_5 - 5 + UBITS (bit_buf, 5); + goto entry_1; + } else + goto entry_2; + + while (1) { + if (bit_buf >= 0x28000000) { + + tab = DCT_B14AC_5 - 5 + UBITS (bit_buf, 5); + + entry_1: + i += tab->run; + if (i >= 64) + break; /* end of block */ + + normal_code: + j = scan[i]; + bit_buf <<= tab->len; + bits += tab->len + 1; + val = ((2*tab->level+1) * quantizer_scale * quant_matrix[j]) >> 5; + + /* oddification */ + val = (val - 1) | 1; + + /* if (bitstream_get (1)) val = -val; */ + val = (val ^ SBITS (bit_buf, 1)) - SBITS (bit_buf, 1); + + SATURATE (val); + dest[j] = val; + + bit_buf <<= 1; + NEEDBITS (bit_buf, bits, bit_ptr); + + continue; + + } + + entry_2: + if (bit_buf >= 0x04000000) { + + tab = DCT_B14_8 - 4 + UBITS (bit_buf, 8); + + i += tab->run; + if (i < 64) + goto normal_code; + + /* escape code */ + + i += UBITS (bit_buf << 6, 6) - 64; + if (i >= 64) + break; /* illegal, check needed to avoid buffer overflow */ + + j = scan[i]; + + DUMPBITS (bit_buf, bits, 12); + NEEDBITS (bit_buf, bits, bit_ptr); + val = SBITS (bit_buf, 8); + if (! (val & 0x7f)) { + DUMPBITS (bit_buf, bits, 8); + val = UBITS (bit_buf, 8) + 2 * val; + } + val = 2 * (val + SBITS (val, 1)) + 1; + val = (val * quantizer_scale * quant_matrix[j]) / 32; + + /* oddification */ + val = (val + ~SBITS (val, 1)) | 1; + + SATURATE (val); + dest[j] = val; + + DUMPBITS (bit_buf, bits, 8); + NEEDBITS (bit_buf, bits, bit_ptr); + + continue; + + } else if (bit_buf >= 0x02000000) { + tab = DCT_B14_10 - 8 + UBITS (bit_buf, 10); + i += tab->run; + if (i < 64) + goto normal_code; + } else if (bit_buf >= 0x00800000) { + tab = DCT_13 - 16 + UBITS (bit_buf, 13); + i += tab->run; + if (i < 64) + goto normal_code; + } else if (bit_buf >= 0x00200000) { + tab = DCT_15 - 16 + UBITS (bit_buf, 15); + i += tab->run; + if (i < 64) + goto normal_code; + } else { + tab = DCT_16 + UBITS (bit_buf, 16); + bit_buf <<= 16; + GETWORD (bit_buf, bits + 16, bit_ptr); + i += tab->run; + if (i < 64) + goto normal_code; + } + break; /* illegal, check needed to avoid buffer overflow */ + } + DUMPBITS (bit_buf, bits, 2); /* dump end of block code */ + picture->bitstream_buf = bit_buf; + picture->bitstream_bits = bits; + picture->bitstream_ptr = bit_ptr; +} + +static inline int get_macroblock_address_increment (picture_t * picture) +{ +#define bit_buf (picture->bitstream_buf) +#define bits (picture->bitstream_bits) +#define bit_ptr (picture->bitstream_ptr) + + MBAtab * tab; + int mba; + + mba = 0; + + while (1) { + if (bit_buf >= 0x10000000) { + tab = MBA_5 - 2 + UBITS (bit_buf, 5); + DUMPBITS (bit_buf, bits, tab->len); + return mba + tab->mba; + } else if (bit_buf >= 0x03000000) { + tab = MBA_11 - 24 + UBITS (bit_buf, 11); + DUMPBITS (bit_buf, bits, tab->len); + return mba + tab->mba; + } else switch (UBITS (bit_buf, 11)) { + case 8: /* macroblock_escape */ + mba += 33; + /* no break here on purpose */ + case 15: /* macroblock_stuffing (MPEG1 only) */ + DUMPBITS (bit_buf, bits, 11); + NEEDBITS (bit_buf, bits, bit_ptr); + break; + default: /* end of slice, or error */ + return 0; + } + } + +#undef bit_buf +#undef bits +#undef bit_ptr +} + +static inline void slice_intra_DCT (picture_t * picture, int cc, + uint8_t * dest, int stride) +{ +#define bit_buf (picture->bitstream_buf) +#define bits (picture->bitstream_bits) +#define bit_ptr (picture->bitstream_ptr) + NEEDBITS (bit_buf, bits, bit_ptr); + /* Get the intra DC coefficient and inverse quantize it */ + if (cc == 0) + picture->dc_dct_pred[0] += get_luma_dc_dct_diff (picture); + else + picture->dc_dct_pred[cc] += get_chroma_dc_dct_diff (picture); + picture->DCTblock[0] = + picture->dc_dct_pred[cc] << (3 - picture->intra_dc_precision); + memset (picture->DCTblock + 1, 0, 63 * sizeof (int16_t)); + + if (picture->mpeg1) { + if (picture->picture_coding_type != D_TYPE) + get_mpeg1_intra_block (picture); + } else if (picture->intra_vlc_format) + get_intra_block_B15 (picture); + else + get_intra_block_B14 (picture); + idct_block_copy (picture->DCTblock, dest, stride); +#undef bit_buf +#undef bits +#undef bit_ptr +} + +static inline void slice_non_intra_DCT (picture_t * picture, uint8_t * dest, + int stride) +{ + memset (picture->DCTblock, 0, 64 * sizeof (int16_t)); + if (picture->mpeg1) + get_mpeg1_non_intra_block (picture); + else + get_non_intra_block (picture); + idct_block_add (picture->DCTblock, dest, stride); +} + +#define MOTION_Y(table,offset_x,offset_y,motion_x,motion_y, \ + dest,src,offset_dest,offset_src,stride,height) \ +do { \ + int xy_half; \ + int total_offset; \ + \ + xy_half = ((motion_y & 1) << 1) | (motion_x & 1); \ + total_offset = ((offset_y + (motion_y >> 1)) * stride + \ + offset_x + (motion_x >> 1) + (offset_src)); \ + table[xy_half] (dest[0] + offset_x + (offset_dest), \ + src[0] + total_offset, stride, height); \ +} while (0) + +#define MOTION_UV(table,offset_x,offset_y,motion_x,motion_y, \ + dest,src,offset_dest,offset_src,stride,height) \ +do { \ + int xy_half; \ + int total_offset; \ + \ + xy_half = ((motion_y & 1) << 1) | (motion_x & 1); \ + total_offset = (((offset_y + motion_y) >> 1) * (stride) + \ + ((offset_x + motion_x) >> 1) + (offset_src)); \ + table[4+xy_half] (dest[1] + (offset_x >> 1) + (offset_dest), \ + src[1] + total_offset, stride, height); \ + table[4+xy_half] (dest[2] + (offset_x >> 1) + (offset_dest), \ + src[2] + total_offset, stride, height); \ +} while (0) + +static inline void motion_block (void (** table) (uint8_t *, uint8_t *, + int32_t, int32_t), + int x_offset, int y_offset, int mb_y_8_offset, + int src_field, int dest_field, + int x_pred, int y_pred, + uint8_t * dest[3], uint8_t * src[3], + int stride, int height) +{ + MOTION_Y (table, x_offset, y_offset, x_pred, y_pred, dest, src, + dest_field + mb_y_8_offset*8*stride, src_field, stride, height); + + x_pred /= 2; + y_pred /= 2; + stride >>= 1; + height >>= 1; + + MOTION_UV (table, x_offset, y_offset, x_pred, y_pred, dest, src, + (dest_field >> 1) + mb_y_8_offset*4*stride, src_field >> 1, + stride, height); +} + +static void motion_mp1 (picture_t * picture, motion_t * motion, + uint8_t * dest[3], int offset, int stride, + void (** table) (uint8_t *, uint8_t *, int, int)) +{ +#define bit_buf (picture->bitstream_buf) +#define bits (picture->bitstream_bits) +#define bit_ptr (picture->bitstream_ptr) + int motion_x, motion_y; + + NEEDBITS (bit_buf, bits, bit_ptr); + motion_x = motion->pmv[0][0] + get_motion_delta (picture, + motion->f_code[0]); + motion_x = bound_motion_vector (motion_x, motion->f_code[0]); + motion->pmv[0][0] = motion_x; + + NEEDBITS (bit_buf, bits, bit_ptr); + motion_y = motion->pmv[0][1] + get_motion_delta (picture, + motion->f_code[0]); + motion_y = bound_motion_vector (motion_y, motion->f_code[0]); + motion->pmv[0][1] = motion_y; + + if (motion->f_code[1]) { + motion_x <<= 1; + motion_y <<= 1; + } + + motion_block (table, offset, picture->v_offset, 0, 0, 0, + motion_x, motion_y, dest, motion->ref[0], stride, 16); +#undef bit_buf +#undef bits +#undef bit_ptr +} + +static void motion_mp1_reuse (picture_t * picture, motion_t * motion, + uint8_t * dest[3], int offset, int stride, + void (** table) (uint8_t *, uint8_t *, int, int)) +{ + int motion_x, motion_y; + + motion_x = motion->pmv[0][0]; + motion_y = motion->pmv[0][1]; + + if (motion->f_code[1]) { + motion_x <<= 1; + motion_y <<= 1; + } + + motion_block (table, offset, picture->v_offset, 0, 0, 0, + motion_x, motion_y, dest, motion->ref[0], stride, 16); +} + +static void motion_fr_frame (picture_t * picture, motion_t * motion, + uint8_t * dest[3], int offset, int stride, + void (** table) (uint8_t *, uint8_t *, int, int)) +{ +#define bit_buf (picture->bitstream_buf) +#define bits (picture->bitstream_bits) +#define bit_ptr (picture->bitstream_ptr) + int motion_x, motion_y; + + NEEDBITS (bit_buf, bits, bit_ptr); + motion_x = motion->pmv[0][0] + get_motion_delta (picture, + motion->f_code[0]); + motion_x = bound_motion_vector (motion_x, motion->f_code[0]); + motion->pmv[1][0] = motion->pmv[0][0] = motion_x; + + NEEDBITS (bit_buf, bits, bit_ptr); + motion_y = motion->pmv[0][1] + get_motion_delta (picture, + motion->f_code[1]); + motion_y = bound_motion_vector (motion_y, motion->f_code[1]); + motion->pmv[1][1] = motion->pmv[0][1] = motion_y; + + motion_block (table, offset, picture->v_offset, 0, 0, 0, + motion_x, motion_y, dest, motion->ref[0], stride, 16); +#undef bit_buf +#undef bits +#undef bit_ptr +} + +static void motion_fr_field (picture_t * picture, motion_t * motion, + uint8_t * dest[3], int offset, int stride, + void (** table) (uint8_t *, uint8_t *, int, int)) +{ +#define bit_buf (picture->bitstream_buf) +#define bits (picture->bitstream_bits) +#define bit_ptr (picture->bitstream_ptr) + int motion_x, motion_y; + int field_select; + + NEEDBITS (bit_buf, bits, bit_ptr); + field_select = SBITS (bit_buf, 1); + DUMPBITS (bit_buf, bits, 1); + + motion_x = motion->pmv[0][0] + get_motion_delta (picture, + motion->f_code[0]); + motion_x = bound_motion_vector (motion_x, motion->f_code[0]); + motion->pmv[0][0] = motion_x; + + NEEDBITS (bit_buf, bits, bit_ptr); + motion_y = (motion->pmv[0][1] >> 1) + get_motion_delta (picture, + motion->f_code[1]); + /* motion_y = bound_motion_vector (motion_y, motion->f_code[1]); */ + motion->pmv[0][1] = motion_y << 1; + + motion_block (table, offset, picture->v_offset >> 1, + 0, (field_select & stride), 0, + motion_x, motion_y, dest, motion->ref[0], stride * 2, 8); + + NEEDBITS (bit_buf, bits, bit_ptr); + field_select = SBITS (bit_buf, 1); + DUMPBITS (bit_buf, bits, 1); + + motion_x = motion->pmv[1][0] + get_motion_delta (picture, + motion->f_code[0]); + motion_x = bound_motion_vector (motion_x, motion->f_code[0]); + motion->pmv[1][0] = motion_x; + + NEEDBITS (bit_buf, bits, bit_ptr); + motion_y = (motion->pmv[1][1] >> 1) + get_motion_delta (picture, + motion->f_code[1]); + /* motion_y = bound_motion_vector (motion_y, motion->f_code[1]); */ + motion->pmv[1][1] = motion_y << 1; + + motion_block (table, offset, picture->v_offset >> 1, + 0, (field_select & stride), stride, + motion_x, motion_y, dest, motion->ref[0], stride * 2, 8); +#undef bit_buf +#undef bits +#undef bit_ptr +} + +static void motion_fr_dmv (picture_t * picture, motion_t * motion, + uint8_t * dest[3], int offset, int stride, + void (** table) (uint8_t *, uint8_t *, int, int)) +{ +#define bit_buf (picture->bitstream_buf) +#define bits (picture->bitstream_bits) +#define bit_ptr (picture->bitstream_ptr) + int motion_x, motion_y; + int dmv_x, dmv_y; + int m; + int other_x, other_y; + + NEEDBITS (bit_buf, bits, bit_ptr); + motion_x = motion->pmv[0][0] + get_motion_delta (picture, + motion->f_code[0]); + motion_x = bound_motion_vector (motion_x, motion->f_code[0]); + motion->pmv[1][0] = motion->pmv[0][0] = motion_x; + + NEEDBITS (bit_buf, bits, bit_ptr); + dmv_x = get_dmv (picture); + + NEEDBITS (bit_buf, bits, bit_ptr); + motion_y = (motion->pmv[0][1] >> 1) + get_motion_delta (picture, + motion->f_code[1]); + /* motion_y = bound_motion_vector (motion_y, motion->f_code[1]); */ + motion->pmv[1][1] = motion->pmv[0][1] = motion_y << 1; + + NEEDBITS (bit_buf, bits, bit_ptr); + dmv_y = get_dmv (picture); + + motion_block (mc_functions.put, offset, picture->v_offset >> 1, 0, 0, 0, + motion_x, motion_y, dest, motion->ref[0], stride * 2, 8); + + m = picture->top_field_first ? 1 : 3; + other_x = ((motion_x * m + (motion_x > 0)) >> 1) + dmv_x; + other_y = ((motion_y * m + (motion_y > 0)) >> 1) + dmv_y - 1; + motion_block (mc_functions.avg, offset, picture->v_offset >> 1, 0, stride, 0, + other_x, other_y, dest, motion->ref[0], stride * 2, 8); + + motion_block (mc_functions.put, offset, picture->v_offset >> 1, + 0, stride, stride, + motion_x, motion_y, dest, motion->ref[0], stride * 2, 8); + + m = picture->top_field_first ? 3 : 1; + other_x = ((motion_x * m + (motion_x > 0)) >> 1) + dmv_x; + other_y = ((motion_y * m + (motion_y > 0)) >> 1) + dmv_y + 1; + motion_block (mc_functions.avg, offset, picture->v_offset >> 1, 0, 0, stride, + other_x, other_y, dest, motion->ref[0], stride * 2, 8); +#undef bit_buf +#undef bits +#undef bit_ptr +} + +/* like motion_frame, but reuse previous motion vectors */ +static void motion_fr_reuse (picture_t * picture, motion_t * motion, + uint8_t * dest[3], int offset, int stride, + void (** table) (uint8_t *, uint8_t *, int, int)) +{ + motion_block (table, offset, picture->v_offset, 0, 0, 0, + motion->pmv[0][0], motion->pmv[0][1], + dest, motion->ref[0], stride, 16); +} + +/* like motion_frame, but use null motion vectors */ +static void motion_fr_zero (picture_t * picture, motion_t * motion, + uint8_t * dest[3], int offset, int stride, + void (** table) (uint8_t *, uint8_t *, int, int)) +{ + motion_block (table, offset, picture->v_offset, 0, 0, 0, 0, 0, + dest, motion->ref[0], stride, 16); +} + +/* like motion_frame, but parsing without actual motion compensation */ +static void motion_fr_conceal (picture_t * picture) +{ +#define bit_buf (picture->bitstream_buf) +#define bits (picture->bitstream_bits) +#define bit_ptr (picture->bitstream_ptr) + int tmp; + + NEEDBITS (bit_buf, bits, bit_ptr); + tmp = (picture->f_motion.pmv[0][0] + + get_motion_delta (picture, picture->f_motion.f_code[0])); + tmp = bound_motion_vector (tmp, picture->f_motion.f_code[0]); + picture->f_motion.pmv[1][0] = picture->f_motion.pmv[0][0] = tmp; + + NEEDBITS (bit_buf, bits, bit_ptr); + tmp = (picture->f_motion.pmv[0][1] + + get_motion_delta (picture, picture->f_motion.f_code[1])); + tmp = bound_motion_vector (tmp, picture->f_motion.f_code[1]); + picture->f_motion.pmv[1][1] = picture->f_motion.pmv[0][1] = tmp; + + DUMPBITS (bit_buf, bits, 1); /* remove marker_bit */ +#undef bit_buf +#undef bits +#undef bit_ptr +} + +static void motion_fi_field (picture_t * picture, motion_t * motion, + uint8_t * dest[3], int offset, int stride, + void (** table) (uint8_t *, uint8_t *, int, int)) +{ +#define bit_buf (picture->bitstream_buf) +#define bits (picture->bitstream_bits) +#define bit_ptr (picture->bitstream_ptr) + int motion_x, motion_y; + int field_select; + + NEEDBITS (bit_buf, bits, bit_ptr); + field_select = UBITS (bit_buf, 1); + DUMPBITS (bit_buf, bits, 1); + + NEEDBITS (bit_buf, bits, bit_ptr); + motion_x = motion->pmv[0][0] + get_motion_delta (picture, + motion->f_code[0]); + motion_x = bound_motion_vector (motion_x, motion->f_code[0]); + motion->pmv[1][0] = motion->pmv[0][0] = motion_x; + + NEEDBITS (bit_buf, bits, bit_ptr); + motion_y = motion->pmv[0][1] + get_motion_delta (picture, + motion->f_code[1]); + motion_y = bound_motion_vector (motion_y, motion->f_code[1]); + motion->pmv[1][1] = motion->pmv[0][1] = motion_y; + + motion_block (table, offset, picture->v_offset, 0, 0, 0, + motion_x, motion_y, + dest, motion->ref[field_select], stride, 16); +#undef bit_buf +#undef bits +#undef bit_ptr +} + +static void motion_fi_16x8 (picture_t * picture, motion_t * motion, + uint8_t * dest[3], int offset, int stride, + void (** table) (uint8_t *, uint8_t *, int, int)) +{ +#define bit_buf (picture->bitstream_buf) +#define bits (picture->bitstream_bits) +#define bit_ptr (picture->bitstream_ptr) + int motion_x, motion_y; + int field_select; + + NEEDBITS (bit_buf, bits, bit_ptr); + field_select = UBITS (bit_buf, 1); + DUMPBITS (bit_buf, bits, 1); + + NEEDBITS (bit_buf, bits, bit_ptr); + motion_x = motion->pmv[0][0] + get_motion_delta (picture, + motion->f_code[0]); + motion_x = bound_motion_vector (motion_x, motion->f_code[0]); + motion->pmv[0][0] = motion_x; + + NEEDBITS (bit_buf, bits, bit_ptr); + motion_y = motion->pmv[0][1] + get_motion_delta (picture, + motion->f_code[1]); + motion_y = bound_motion_vector (motion_y, motion->f_code[1]); + motion->pmv[0][1] = motion_y; + + motion_block (table, offset, picture->v_offset, 0, 0, 0, + motion_x, motion_y, + dest, motion->ref[field_select], stride, 8); + + NEEDBITS (bit_buf, bits, bit_ptr); + field_select = UBITS (bit_buf, 1); + DUMPBITS (bit_buf, bits, 1); + + NEEDBITS (bit_buf, bits, bit_ptr); + motion_x = motion->pmv[1][0] + get_motion_delta (picture, + motion->f_code[0]); + motion_x = bound_motion_vector (motion_x, motion->f_code[0]); + motion->pmv[1][0] = motion_x; + + NEEDBITS (bit_buf, bits, bit_ptr); + motion_y = motion->pmv[1][1] + get_motion_delta (picture, + motion->f_code[1]); + motion_y = bound_motion_vector (motion_y, motion->f_code[1]); + motion->pmv[1][1] = motion_y; + + motion_block (table, offset, picture->v_offset+8, 1, 0, 0, + motion_x, motion_y, + dest, motion->ref[field_select], stride, 8); +#undef bit_buf +#undef bits +#undef bit_ptr +} + +static void motion_fi_dmv (picture_t * picture, motion_t * motion, + uint8_t * dest[3], int offset, int stride, + void (** table) (uint8_t *, uint8_t *, int, int)) +{ +#define bit_buf (picture->bitstream_buf) +#define bits (picture->bitstream_bits) +#define bit_ptr (picture->bitstream_ptr) + int motion_x, motion_y; + int dmv_x, dmv_y; + + NEEDBITS (bit_buf, bits, bit_ptr); + motion_x = motion->pmv[0][0] + get_motion_delta (picture, + motion->f_code[0]); + motion_x = bound_motion_vector (motion_x, motion->f_code[0]); + motion->pmv[1][0] = motion->pmv[0][0] = motion_x; + + NEEDBITS (bit_buf, bits, bit_ptr); + dmv_x = get_dmv (picture); + + NEEDBITS (bit_buf, bits, bit_ptr); + motion_y = motion->pmv[0][1] + get_motion_delta (picture, + motion->f_code[1]); + motion_y = bound_motion_vector (motion_y, motion->f_code[1]); + motion->pmv[1][1] = motion->pmv[0][1] = motion_y; + + NEEDBITS (bit_buf, bits, bit_ptr); + dmv_y = get_dmv (picture); + + motion_block (mc_functions.put, offset, picture->v_offset, 0, 0, 0, + motion_x, motion_y, + dest, motion->ref[picture->current_field], stride, 16); + + motion_x = ((motion_x + (motion_x > 0)) >> 1) + dmv_x; + motion_y = ((motion_y + (motion_y > 0)) >> 1) + dmv_y + + 2 * picture->current_field - 1; + motion_block (mc_functions.avg, offset, picture->v_offset, 0, 0, 0, + motion_x, motion_y, + dest, motion->ref[!picture->current_field], stride, 16); +#undef bit_buf +#undef bits +#undef bit_ptr +} + +static void motion_fi_reuse (picture_t * picture, motion_t * motion, + uint8_t * dest[3], int offset, int stride, + void (** table) (uint8_t *, uint8_t *, int, int)) +{ + motion_block (table, offset, picture->v_offset, 0, 0, 0, + motion->pmv[0][0], motion->pmv[0][1], + dest, motion->ref[picture->current_field], stride, 16); +} + +static void motion_fi_zero (picture_t * picture, motion_t * motion, + uint8_t * dest[3], int offset, int stride, + void (** table) (uint8_t *, uint8_t *, int, int)) +{ + motion_block (table, offset, picture->v_offset, 0, 0, 0, 0, 0, + dest, motion->ref[picture->current_field], stride, 16); +} + +static void motion_fi_conceal (picture_t * picture) +{ +#define bit_buf (picture->bitstream_buf) +#define bits (picture->bitstream_bits) +#define bit_ptr (picture->bitstream_ptr) + int tmp; + + NEEDBITS (bit_buf, bits, bit_ptr); + DUMPBITS (bit_buf, bits, 1); /* remove field_select */ + + NEEDBITS (bit_buf, bits, bit_ptr); + tmp = (picture->f_motion.pmv[0][0] + + get_motion_delta (picture, picture->f_motion.f_code[0])); + tmp = bound_motion_vector (tmp, picture->f_motion.f_code[0]); + picture->f_motion.pmv[1][0] = picture->f_motion.pmv[0][0] = tmp; + + NEEDBITS (bit_buf, bits, bit_ptr); + tmp = (picture->f_motion.pmv[0][1] + + get_motion_delta (picture, picture->f_motion.f_code[1])); + tmp = bound_motion_vector (tmp, picture->f_motion.f_code[1]); + picture->f_motion.pmv[1][1] = picture->f_motion.pmv[0][1] = tmp; + + DUMPBITS (bit_buf, bits, 1); /* remove marker_bit */ +#undef bit_buf +#undef bits +#undef bit_ptr +} + +#define MOTION(routine,direction) \ +do { \ + if ((direction) & MACROBLOCK_MOTION_FORWARD) \ + routine (picture, &(picture->f_motion), dest, offset, stride, \ + mc_functions.put); \ + if ((direction) & MACROBLOCK_MOTION_BACKWARD) \ + routine (picture, &(picture->b_motion), dest, offset, stride, \ + ((direction) & MACROBLOCK_MOTION_FORWARD ? \ + mc_functions.avg : mc_functions.put)); \ +} while (0) + +#define CHECK_DISPLAY \ +do { \ + if (offset == picture->coded_picture_width) { \ + do { /* just so we can use the break statement */ \ + if (picture->current_frame->copy) { \ + picture->current_frame->copy (picture->current_frame, \ + dest); \ + if (picture->picture_coding_type == B_TYPE) \ + break; \ + } \ + dest[0] += 16 * stride; \ + dest[1] += 4 * stride; \ + dest[2] += 4 * stride; \ + } while (0); \ + if (! (picture->mpeg1)) \ + return 0; \ + picture->v_offset += 16; \ + if (picture->v_offset >= picture->coded_picture_height) \ + return 0; \ + offset = 0; \ + } \ +} while (0) + +int slice_process (picture_t * picture, uint8_t code, uint8_t * buffer) +{ +#define bit_buf (picture->bitstream_buf) +#define bits (picture->bitstream_bits) +#define bit_ptr (picture->bitstream_ptr) + int macroblock_modes; + int stride; + uint8_t * dest[3]; + int offset; + uint8_t ** forward_ref[2]; + + stride = picture->coded_picture_width; + offset = (code - 1) * stride * 4; + picture->v_offset = (code - 1) * 16; + + forward_ref[0] = picture->forward_reference_frame->base; + if (picture->picture_structure != FRAME_PICTURE) { + forward_ref[1] = picture->forward_reference_frame->base; + offset <<= 1; + picture->current_field = (picture->picture_structure == BOTTOM_FIELD); + if ((picture->second_field) && + (picture->picture_coding_type != B_TYPE)) + forward_ref[picture->picture_structure == TOP_FIELD] = + picture->current_frame->base; + + picture->f_motion.ref[1][0] = forward_ref[1][0] + stride; + picture->f_motion.ref[1][1] = forward_ref[1][1] + (stride >> 1); + picture->f_motion.ref[1][2] = forward_ref[1][2] + (stride >> 1); + + picture->b_motion.ref[1][0] = + picture->backward_reference_frame->base[0] + stride; + picture->b_motion.ref[1][1] = + picture->backward_reference_frame->base[1] + (stride >> 1); + picture->b_motion.ref[1][2] = + picture->backward_reference_frame->base[2] + (stride >> 1); + } + + picture->f_motion.ref[0][0] = forward_ref[0][0]; + picture->f_motion.ref[0][1] = forward_ref[0][1]; + picture->f_motion.ref[0][2] = forward_ref[0][2]; + + picture->f_motion.pmv[0][0] = picture->f_motion.pmv[0][1] = 0; + picture->f_motion.pmv[1][0] = picture->f_motion.pmv[1][1] = 0; + + picture->b_motion.ref[0][0] = picture->backward_reference_frame->base[0]; + picture->b_motion.ref[0][1] = picture->backward_reference_frame->base[1]; + picture->b_motion.ref[0][2] = picture->backward_reference_frame->base[2]; + + picture->b_motion.pmv[0][0] = picture->b_motion.pmv[0][1] = 0; + picture->b_motion.pmv[1][0] = picture->b_motion.pmv[1][1] = 0; + + if ((picture->current_frame->copy) && + (picture->picture_coding_type == B_TYPE)) + offset = 0; + + dest[0] = picture->current_frame->base[0] + offset * 4; + dest[1] = picture->current_frame->base[1] + offset; + dest[2] = picture->current_frame->base[2] + offset; + + switch (picture->picture_structure) { + case BOTTOM_FIELD: + dest[0] += stride; + dest[1] += stride >> 1; + dest[2] += stride >> 1; + /* follow thru */ + case TOP_FIELD: + stride <<= 1; + } + + picture->dc_dct_pred[0] = picture->dc_dct_pred[1] = + picture->dc_dct_pred[2] = 1 << (picture->intra_dc_precision + 7); + + bitstream_init (picture, buffer); + + picture->quantizer_scale = get_quantizer_scale (picture); + + /* ignore intra_slice and all the extra data */ + while (bit_buf & 0x80000000) { + DUMPBITS (bit_buf, bits, 9); + NEEDBITS (bit_buf, bits, bit_ptr); + } + DUMPBITS (bit_buf, bits, 1); + + NEEDBITS (bit_buf, bits, bit_ptr); + offset = get_macroblock_address_increment (picture) << 4; + + while (1) { + NEEDBITS (bit_buf, bits, bit_ptr); + + macroblock_modes = get_macroblock_modes (picture); + + /* maybe integrate MACROBLOCK_QUANT test into get_macroblock_modes ? */ + if (macroblock_modes & MACROBLOCK_QUANT) + picture->quantizer_scale = get_quantizer_scale (picture); + + if (macroblock_modes & MACROBLOCK_INTRA) { + + int DCT_offset, DCT_stride; + + if (picture->concealment_motion_vectors) { + if (picture->picture_structure == FRAME_PICTURE) + motion_fr_conceal (picture); + else + motion_fi_conceal (picture); + } else { + picture->f_motion.pmv[0][0] = picture->f_motion.pmv[0][1] = 0; + picture->f_motion.pmv[1][0] = picture->f_motion.pmv[1][1] = 0; + picture->b_motion.pmv[0][0] = picture->b_motion.pmv[0][1] = 0; + picture->b_motion.pmv[1][0] = picture->b_motion.pmv[1][1] = 0; + } + + if (macroblock_modes & DCT_TYPE_INTERLACED) { + DCT_offset = stride; + DCT_stride = stride * 2; + } else { + DCT_offset = stride * 8; + DCT_stride = stride; + } + + slice_intra_DCT (picture, 0, dest[0] + offset, DCT_stride); + slice_intra_DCT (picture, 0, dest[0] + offset + 8, DCT_stride); + slice_intra_DCT (picture, 0, dest[0] + offset + DCT_offset, + DCT_stride); + slice_intra_DCT (picture, 0, dest[0] + offset + DCT_offset + 8, + DCT_stride); + + slice_intra_DCT (picture, 1, dest[1] + (offset >> 1), stride >> 1); + slice_intra_DCT (picture, 2, dest[2] + (offset >> 1), stride >> 1); + + if (picture->picture_coding_type == D_TYPE) { + NEEDBITS (bit_buf, bits, bit_ptr); + DUMPBITS (bit_buf, bits, 1); + } + } else { + + if (picture->mpeg1) { + if ((macroblock_modes & MOTION_TYPE_MASK) == MC_FRAME) + MOTION (motion_mp1, macroblock_modes); + else { + /* non-intra mb without forward mv in a P picture */ + picture->f_motion.pmv[0][0] = 0; + picture->f_motion.pmv[0][1] = 0; + picture->f_motion.pmv[1][0] = 0; + picture->f_motion.pmv[1][1] = 0; + MOTION (motion_fr_zero, MACROBLOCK_MOTION_FORWARD); + } + } else if (picture->picture_structure == FRAME_PICTURE) + switch (macroblock_modes & MOTION_TYPE_MASK) { + case MC_FRAME: + MOTION (motion_fr_frame, macroblock_modes); + break; + + case MC_FIELD: + MOTION (motion_fr_field, macroblock_modes); + break; + + case MC_DMV: + MOTION (motion_fr_dmv, MACROBLOCK_MOTION_FORWARD); + break; + + case 0: + /* non-intra mb without forward mv in a P picture */ + picture->f_motion.pmv[0][0] = 0; + picture->f_motion.pmv[0][1] = 0; + picture->f_motion.pmv[1][0] = 0; + picture->f_motion.pmv[1][1] = 0; + MOTION (motion_fr_zero, MACROBLOCK_MOTION_FORWARD); + break; + } + else + switch (macroblock_modes & MOTION_TYPE_MASK) { + case MC_FIELD: + MOTION (motion_fi_field, macroblock_modes); + break; + + case MC_16X8: + MOTION (motion_fi_16x8, macroblock_modes); + break; + + case MC_DMV: + MOTION (motion_fi_dmv, MACROBLOCK_MOTION_FORWARD); + break; + + case 0: + /* non-intra mb without forward mv in a P picture */ + picture->f_motion.pmv[0][0] = 0; + picture->f_motion.pmv[0][1] = 0; + picture->f_motion.pmv[1][0] = 0; + picture->f_motion.pmv[1][1] = 0; + MOTION (motion_fi_zero, MACROBLOCK_MOTION_FORWARD); + break; + } + + if (macroblock_modes & MACROBLOCK_PATTERN) { + int coded_block_pattern; + int DCT_offset, DCT_stride; + + if (macroblock_modes & DCT_TYPE_INTERLACED) { + DCT_offset = stride; + DCT_stride = stride * 2; + } else { + DCT_offset = stride * 8; + DCT_stride = stride; + } + + coded_block_pattern = get_coded_block_pattern (picture); + + if (coded_block_pattern & 0x20) + slice_non_intra_DCT (picture, dest[0] + offset, + DCT_stride); + if (coded_block_pattern & 0x10) + slice_non_intra_DCT (picture, dest[0] + offset + 8, + DCT_stride); + if (coded_block_pattern & 0x08) + slice_non_intra_DCT (picture, + dest[0] + offset + DCT_offset, + DCT_stride); + if (coded_block_pattern & 0x04) + slice_non_intra_DCT (picture, + dest[0] + offset + DCT_offset + 8, + DCT_stride); + + if (coded_block_pattern & 0x2) + slice_non_intra_DCT (picture, dest[1] + (offset >> 1), + stride >> 1); + if (coded_block_pattern & 0x1) + slice_non_intra_DCT (picture, dest[2] + (offset >> 1), + stride >> 1); + } + + picture->dc_dct_pred[0] = picture->dc_dct_pred[1] = + picture->dc_dct_pred[2] = 1 << (picture->intra_dc_precision+7); + } + + offset += 16; + CHECK_DISPLAY; + + NEEDBITS (bit_buf, bits, bit_ptr); + + if (bit_buf & 0x80000000) { + DUMPBITS (bit_buf, bits, 1); + } else { + int mba_inc; + + mba_inc = get_macroblock_address_increment (picture); + if (!mba_inc) + break; + + picture->dc_dct_pred[0] = picture->dc_dct_pred[1] = + picture->dc_dct_pred[2] = 1 << (picture->intra_dc_precision+7); + + if (picture->picture_coding_type == P_TYPE) { + picture->f_motion.pmv[0][0] = picture->f_motion.pmv[0][1] = 0; + picture->f_motion.pmv[1][0] = picture->f_motion.pmv[1][1] = 0; + + do { + if (picture->picture_structure == FRAME_PICTURE) + MOTION (motion_fr_zero, MACROBLOCK_MOTION_FORWARD); + else + MOTION (motion_fi_zero, MACROBLOCK_MOTION_FORWARD); + + offset += 16; + CHECK_DISPLAY; + } while (--mba_inc); + } else { + do { + if (picture->mpeg1) + MOTION (motion_mp1_reuse, macroblock_modes); + else if (picture->picture_structure == FRAME_PICTURE) + MOTION (motion_fr_reuse, macroblock_modes); + else + MOTION (motion_fi_reuse, macroblock_modes); + + offset += 16; + CHECK_DISPLAY; + } while (--mba_inc); + } + } + } + + return 0; +#undef bit_buf +#undef bits +#undef bit_ptr +} diff --git a/src/libmpeg2/stats.c b/src/libmpeg2/stats.c new file mode 100644 index 000000000..f3456058d --- /dev/null +++ b/src/libmpeg2/stats.c @@ -0,0 +1,315 @@ +/* + * stats.c + * Copyright (C) 1999-2001 Aaron Holtzman <aholtzma@ess.engr.uvic.ca> + * + * This file is part of mpeg2dec, a free MPEG-2 video stream decoder. + * + * mpeg2dec is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * mpeg2dec is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +#include "config.h" + +#include <stdio.h> +#include <stdlib.h> +#include <inttypes.h> + +#include "mpeg2_internal.h" + +static int debug_level = -1; + +/* Determine is debug output is required. */ +/* We could potentially have multiple levels of debug info */ +static int debug_is_on (void) +{ + char * env_var; + + if (debug_level < 0) { + env_var = getenv ("MPEG2_DEBUG"); + + if (env_var) + debug_level = 1; + else + debug_level = 0; + } + + return debug_level; +} + +static void stats_picture (uint8_t * buffer) +{ + static char * picture_coding_type_str [8] = { + "Invalid picture type", + "I-type", + "P-type", + "B-type", + "D (very bad)", + "Invalid","Invalid","Invalid" + }; + + int picture_coding_type; + int temporal_reference; + int vbv_delay; + + temporal_reference = (buffer[0] << 2) | (buffer[1] >> 6); + picture_coding_type = (buffer [1] >> 3) & 7; + vbv_delay = ((buffer[1] << 13) | (buffer[2] << 5) | + (buffer[3] >> 3)) & 0xffff; + + fprintf (stderr, " (picture) %s temporal_reference %d, vbv_delay %d\n", + picture_coding_type_str [picture_coding_type], + temporal_reference, vbv_delay); +} + +static void stats_user_data (uint8_t * buffer) +{ + fprintf (stderr, " (user_data)\n"); +} + +static void stats_sequence (uint8_t * buffer) +{ + static char * aspect_ratio_information_str[8] = { + "Invalid Aspect Ratio", + "1:1", + "4:3", + "16:9", + "2.21:1", + "Invalid Aspect Ratio", + "Invalid Aspect Ratio", + "Invalid Aspect Ratio" + }; + static char * frame_rate_str[16] = { + "Invalid frame_rate_code", + "23.976", "24", "25" , "29.97", + "30" , "50", "59.94", "60" , + "Invalid frame_rate_code", "Invalid frame_rate_code", + "Invalid frame_rate_code", "Invalid frame_rate_code", + "Invalid frame_rate_code", "Invalid frame_rate_code", + "Invalid frame_rate_code" + }; + + int horizontal_size; + int vertical_size; + int aspect_ratio_information; + int frame_rate_code; + int bit_rate_value; + int vbv_buffer_size_value; + int constrained_parameters_flag; + int load_intra_quantizer_matrix; + int load_non_intra_quantizer_matrix; + + vertical_size = (buffer[0] << 16) | (buffer[1] << 8) | buffer[2]; + horizontal_size = vertical_size >> 12; + vertical_size &= 0xfff; + aspect_ratio_information = buffer[3] >> 4; + frame_rate_code = buffer[3] & 15; + bit_rate_value = (buffer[4] << 10) | (buffer[5] << 2) | (buffer[6] >> 6); + vbv_buffer_size_value = ((buffer[6] << 5) | (buffer[7] >> 3)) & 0x3ff; + constrained_parameters_flag = buffer[7] & 4; + load_intra_quantizer_matrix = buffer[7] & 2; + if (load_intra_quantizer_matrix) + buffer += 64; + load_non_intra_quantizer_matrix = buffer[7] & 1; + + fprintf (stderr, " (seq) %dx%d %s, %s fps, %5.0f kbps, VBV %d kB%s%s%s\n", + horizontal_size, vertical_size, + aspect_ratio_information_str [aspect_ratio_information], + frame_rate_str [frame_rate_code], + bit_rate_value * 400.0 / 1000.0, + 2 * vbv_buffer_size_value, + constrained_parameters_flag ? " , CP":"", + load_intra_quantizer_matrix ? " , Custom Intra Matrix":"", + load_non_intra_quantizer_matrix ? " , Custom Non-Intra Matrix":""); +} + +static void stats_sequence_error (uint8_t * buffer) +{ + fprintf (stderr, " (sequence_error)\n"); +} + +static void stats_sequence_end (uint8_t * buffer) +{ + fprintf (stderr, " (sequence_end)\n"); +} + +static void stats_group (uint8_t * buffer) +{ + fprintf (stderr, " (group)%s%s\n", + (buffer[4] & 0x40) ? " closed_gop" : "", + (buffer[4] & 0x20) ? " broken_link" : ""); +} + +static void stats_slice (uint8_t code, uint8_t * buffer) +{ + /* fprintf (stderr, " (slice %d)\n", code); */ +} + +static void stats_sequence_extension (uint8_t * buffer) +{ + static char * chroma_format_str[4] = { + "Invalid Chroma Format", + "4:2:0 Chroma", + "4:2:2 Chroma", + "4:4:4 Chroma" + }; + + int progressive_sequence; + int chroma_format; + + progressive_sequence = (buffer[1] >> 3) & 1; + chroma_format = (buffer[1] >> 1) & 3; + + fprintf (stderr, " (seq_ext) progressive_sequence %d, %s\n", + progressive_sequence, chroma_format_str [chroma_format]); +} + +static void stats_sequence_display_extension (uint8_t * buffer) +{ + fprintf (stderr, " (sequence_display_extension)\n"); +} + +static void stats_quant_matrix_extension (uint8_t * buffer) +{ + fprintf (stderr, " (quant_matrix_extension)\n"); +} + +static void stats_copyright_extension (uint8_t * buffer) +{ + fprintf (stderr, " (copyright_extension)\n"); +} + + +static void stats_sequence_scalable_extension (uint8_t * buffer) +{ + fprintf (stderr, " (sequence_scalable_extension)\n"); +} + +static void stats_picture_display_extension (uint8_t * buffer) +{ + fprintf (stderr, " (picture_display_extension)\n"); +} + +static void stats_picture_coding_extension (uint8_t * buffer) +{ + static char * picture_structure_str[4] = { + "Invalid Picture Structure", + "Top field", + "Bottom field", + "Frame Picture" + }; + + int f_code[2][2]; + int intra_dc_precision; + int picture_structure; + int top_field_first; + int frame_pred_frame_dct; + int concealment_motion_vectors; + int q_scale_type; + int intra_vlc_format; + int alternate_scan; + int repeat_first_field; + int progressive_frame; + + f_code[0][0] = buffer[0] & 15; + f_code[0][1] = buffer[1] >> 4; + f_code[1][0] = buffer[1] & 15; + f_code[1][1] = buffer[2] >> 4; + intra_dc_precision = (buffer[2] >> 2) & 3; + picture_structure = buffer[2] & 3; + top_field_first = buffer[3] >> 7; + frame_pred_frame_dct = (buffer[3] >> 6) & 1; + concealment_motion_vectors = (buffer[3] >> 5) & 1; + q_scale_type = (buffer[3] >> 4) & 1; + intra_vlc_format = (buffer[3] >> 3) & 1; + alternate_scan = (buffer[3] >> 2) & 1; + repeat_first_field = (buffer[3] >> 1) & 1; + progressive_frame = buffer[4] >> 7; + + fprintf (stderr, + " (pic_ext) %s\n", picture_structure_str [picture_structure]); + fprintf (stderr, + " (pic_ext) forward horizontal f_code % d, forward vertical f_code % d\n", + f_code[0][0], f_code[0][1]); + fprintf (stderr, + " (pic_ext) backward horizontal f_code % d, backward vertical f_code % d\n", + f_code[1][0], f_code[1][1]); + fprintf (stderr, + " (pic_ext) intra_dc_precision %d, top_field_first %d, frame_pred_frame_dct %d\n", + intra_dc_precision, top_field_first, frame_pred_frame_dct); + fprintf (stderr, + " (pic_ext) concealment_motion_vectors %d, q_scale_type %d, intra_vlc_format %d\n", + concealment_motion_vectors, q_scale_type, intra_vlc_format); + fprintf (stderr, + " (pic_ext) alternate_scan %d, repeat_first_field %d, progressive_frame %d\n", + alternate_scan, repeat_first_field, progressive_frame); +} + +void stats_header (uint8_t code, uint8_t * buffer) +{ + if (! (debug_is_on ())) + return; + + switch (code) { + case 0x00: + stats_picture (buffer); + break; + case 0xb2: + stats_user_data (buffer); + break; + case 0xb3: + stats_sequence (buffer); + break; + case 0xb4: + stats_sequence_error (buffer); + break; + case 0xb5: + switch (buffer[0] >> 4) { + case 1: + stats_sequence_extension (buffer); + break; + case 2: + stats_sequence_display_extension (buffer); + break; + case 3: + stats_quant_matrix_extension (buffer); + break; + case 4: + stats_copyright_extension (buffer); + break; + case 5: + stats_sequence_scalable_extension (buffer); + break; + case 7: + stats_picture_display_extension (buffer); + break; + case 8: + stats_picture_coding_extension (buffer); + break; + default: + fprintf (stderr, " (unknown extension %#x)\n", buffer[0] >> 4); + } + break; + case 0xb7: + stats_sequence_end (buffer); + break; + case 0xb8: + stats_group (buffer); + break; + default: + if (code < 0xb0) + stats_slice (code, buffer); + else + fprintf (stderr, " (unknown start code %#02x)\n", code); + } +} diff --git a/src/libmpeg2/vlc.h b/src/libmpeg2/vlc.h new file mode 100644 index 000000000..ed2e04f88 --- /dev/null +++ b/src/libmpeg2/vlc.h @@ -0,0 +1,425 @@ +/* + * vlc.h + * Copyright (C) 1999-2001 Aaron Holtzman <aholtzma@ess.engr.uvic.ca> + * + * This file is part of mpeg2dec, a free MPEG-2 video stream decoder. + * + * mpeg2dec is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * mpeg2dec is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +#define GETWORD(bit_buf,shift,bit_ptr) \ +do { \ + bit_buf |= ((bit_ptr[0] << 8) | bit_ptr[1]) << (shift); \ + bit_ptr += 2; \ +} while (0) + +static inline void bitstream_init (picture_t * picture, uint8_t * start) +{ + picture->bitstream_buf = 0; GETWORD (picture->bitstream_buf, 16, start); + picture->bitstream_ptr = start; + picture->bitstream_bits = 0; +} + +/* make sure that there are at least 16 valid bits in bit_buf */ +#define NEEDBITS(bit_buf,bits,bit_ptr) \ +do { \ + if (bits > 0) { \ + GETWORD (bit_buf, bits, bit_ptr); \ + bits -= 16; \ + } \ +} while (0) + +/* remove num valid bits from bit_buf */ +#define DUMPBITS(bit_buf,bits,num) \ +do { \ + bit_buf <<= (num); \ + bits += (num); \ +} while (0) + +/* take num bits from the high part of bit_buf and zero extend them */ +#define UBITS(bit_buf,num) (((uint32_t)(bit_buf)) >> (32 - (num))) + +/* take num bits from the high part of bit_buf and sign extend them */ +#define SBITS(bit_buf,num) (((int32_t)(bit_buf)) >> (32 - (num))) + +typedef struct { + uint8_t modes; + uint8_t len; +} MBtab; + +typedef struct { + uint8_t delta; + uint8_t len; +} MVtab; + +typedef struct { + int8_t dmv; + uint8_t len; +} DMVtab; + +typedef struct { + uint8_t cbp; + uint8_t len; +} CBPtab; + +typedef struct { + uint8_t size; + uint8_t len; +} DCtab; + +typedef struct { + uint8_t run; + uint8_t level; + uint8_t len; +} DCTtab; + +typedef struct { + uint8_t mba; + uint8_t len; +} MBAtab; + + +#define INTRA MACROBLOCK_INTRA +#define QUANT MACROBLOCK_QUANT + +static MBtab MB_I [] = { + {INTRA|QUANT, 2}, {INTRA, 1} +}; + +#define MC MACROBLOCK_MOTION_FORWARD +#define CODED MACROBLOCK_PATTERN + +static MBtab MB_P [] = { + {INTRA|QUANT, 6}, {CODED|QUANT, 5}, {MC|CODED|QUANT, 5}, {INTRA, 5}, + {MC, 3}, {MC, 3}, {MC, 3}, {MC, 3}, + {CODED, 2}, {CODED, 2}, {CODED, 2}, {CODED, 2}, + {CODED, 2}, {CODED, 2}, {CODED, 2}, {CODED, 2}, + {MC|CODED, 1}, {MC|CODED, 1}, {MC|CODED, 1}, {MC|CODED, 1}, + {MC|CODED, 1}, {MC|CODED, 1}, {MC|CODED, 1}, {MC|CODED, 1}, + {MC|CODED, 1}, {MC|CODED, 1}, {MC|CODED, 1}, {MC|CODED, 1}, + {MC|CODED, 1}, {MC|CODED, 1}, {MC|CODED, 1}, {MC|CODED, 1} +}; + +#define FWD MACROBLOCK_MOTION_FORWARD +#define BWD MACROBLOCK_MOTION_BACKWARD +#define INTER MACROBLOCK_MOTION_FORWARD|MACROBLOCK_MOTION_BACKWARD + +static MBtab MB_B [] = { + {0, 0}, {INTRA|QUANT, 6}, + {BWD|CODED|QUANT, 6}, {FWD|CODED|QUANT, 6}, + {INTER|CODED|QUANT, 5}, {INTER|CODED|QUANT, 5}, + {INTRA, 5}, {INTRA, 5}, + {FWD, 4}, {FWD, 4}, {FWD, 4}, {FWD, 4}, + {FWD|CODED, 4}, {FWD|CODED, 4}, {FWD|CODED, 4}, {FWD|CODED, 4}, + {BWD, 3}, {BWD, 3}, {BWD, 3}, {BWD, 3}, + {BWD, 3}, {BWD, 3}, {BWD, 3}, {BWD, 3}, + {BWD|CODED, 3}, {BWD|CODED, 3}, {BWD|CODED, 3}, {BWD|CODED, 3}, + {BWD|CODED, 3}, {BWD|CODED, 3}, {BWD|CODED, 3}, {BWD|CODED, 3}, + {INTER, 2}, {INTER, 2}, {INTER, 2}, {INTER, 2}, + {INTER, 2}, {INTER, 2}, {INTER, 2}, {INTER, 2}, + {INTER, 2}, {INTER, 2}, {INTER, 2}, {INTER, 2}, + {INTER, 2}, {INTER, 2}, {INTER, 2}, {INTER, 2}, + {INTER|CODED, 2}, {INTER|CODED, 2}, {INTER|CODED, 2}, {INTER|CODED, 2}, + {INTER|CODED, 2}, {INTER|CODED, 2}, {INTER|CODED, 2}, {INTER|CODED, 2}, + {INTER|CODED, 2}, {INTER|CODED, 2}, {INTER|CODED, 2}, {INTER|CODED, 2}, + {INTER|CODED, 2}, {INTER|CODED, 2}, {INTER|CODED, 2}, {INTER|CODED, 2} +}; + +#undef INTRA +#undef QUANT +#undef MC +#undef CODED +#undef FWD +#undef BWD +#undef INTER + + +static MVtab MV_4 [] = { + { 3, 6}, { 2, 4}, { 1, 3}, { 1, 3}, { 0, 2}, { 0, 2}, { 0, 2}, { 0, 2} +}; + +static MVtab MV_10 [] = { + { 0,10}, { 0,10}, { 0,10}, { 0,10}, { 0,10}, { 0,10}, { 0,10}, { 0,10}, + { 0,10}, { 0,10}, { 0,10}, { 0,10}, {15,10}, {14,10}, {13,10}, {12,10}, + {11,10}, {10,10}, { 9, 9}, { 9, 9}, { 8, 9}, { 8, 9}, { 7, 9}, { 7, 9}, + { 6, 7}, { 6, 7}, { 6, 7}, { 6, 7}, { 6, 7}, { 6, 7}, { 6, 7}, { 6, 7}, + { 5, 7}, { 5, 7}, { 5, 7}, { 5, 7}, { 5, 7}, { 5, 7}, { 5, 7}, { 5, 7}, + { 4, 7}, { 4, 7}, { 4, 7}, { 4, 7}, { 4, 7}, { 4, 7}, { 4, 7}, { 4, 7} +}; + + +static DMVtab DMV_2 [] = { + { 0, 1}, { 0, 1}, { 1, 2}, {-1, 2} +}; + + +static CBPtab CBP_7 [] = { + {0x22, 7}, {0x12, 7}, {0x0a, 7}, {0x06, 7}, + {0x21, 7}, {0x11, 7}, {0x09, 7}, {0x05, 7}, + {0x3f, 6}, {0x3f, 6}, {0x03, 6}, {0x03, 6}, + {0x24, 6}, {0x24, 6}, {0x18, 6}, {0x18, 6}, + {0x3e, 5}, {0x3e, 5}, {0x3e, 5}, {0x3e, 5}, + {0x02, 5}, {0x02, 5}, {0x02, 5}, {0x02, 5}, + {0x3d, 5}, {0x3d, 5}, {0x3d, 5}, {0x3d, 5}, + {0x01, 5}, {0x01, 5}, {0x01, 5}, {0x01, 5}, + {0x38, 5}, {0x38, 5}, {0x38, 5}, {0x38, 5}, + {0x34, 5}, {0x34, 5}, {0x34, 5}, {0x34, 5}, + {0x2c, 5}, {0x2c, 5}, {0x2c, 5}, {0x2c, 5}, + {0x1c, 5}, {0x1c, 5}, {0x1c, 5}, {0x1c, 5}, + {0x28, 5}, {0x28, 5}, {0x28, 5}, {0x28, 5}, + {0x14, 5}, {0x14, 5}, {0x14, 5}, {0x14, 5}, + {0x30, 5}, {0x30, 5}, {0x30, 5}, {0x30, 5}, + {0x0c, 5}, {0x0c, 5}, {0x0c, 5}, {0x0c, 5}, + {0x20, 4}, {0x20, 4}, {0x20, 4}, {0x20, 4}, + {0x20, 4}, {0x20, 4}, {0x20, 4}, {0x20, 4}, + {0x10, 4}, {0x10, 4}, {0x10, 4}, {0x10, 4}, + {0x10, 4}, {0x10, 4}, {0x10, 4}, {0x10, 4}, + {0x08, 4}, {0x08, 4}, {0x08, 4}, {0x08, 4}, + {0x08, 4}, {0x08, 4}, {0x08, 4}, {0x08, 4}, + {0x04, 4}, {0x04, 4}, {0x04, 4}, {0x04, 4}, + {0x04, 4}, {0x04, 4}, {0x04, 4}, {0x04, 4}, + {0x3c, 3}, {0x3c, 3}, {0x3c, 3}, {0x3c, 3}, + {0x3c, 3}, {0x3c, 3}, {0x3c, 3}, {0x3c, 3}, + {0x3c, 3}, {0x3c, 3}, {0x3c, 3}, {0x3c, 3}, + {0x3c, 3}, {0x3c, 3}, {0x3c, 3}, {0x3c, 3} +}; + +static CBPtab CBP_9 [] = { + {0, 0}, {0x00, 9}, {0x27, 9}, {0x1b, 9}, + {0x3b, 9}, {0x37, 9}, {0x2f, 9}, {0x1f, 9}, + {0x3a, 8}, {0x3a, 8}, {0x36, 8}, {0x36, 8}, + {0x2e, 8}, {0x2e, 8}, {0x1e, 8}, {0x1e, 8}, + {0x39, 8}, {0x39, 8}, {0x35, 8}, {0x35, 8}, + {0x2d, 8}, {0x2d, 8}, {0x1d, 8}, {0x1d, 8}, + {0x26, 8}, {0x26, 8}, {0x1a, 8}, {0x1a, 8}, + {0x25, 8}, {0x25, 8}, {0x19, 8}, {0x19, 8}, + {0x2b, 8}, {0x2b, 8}, {0x17, 8}, {0x17, 8}, + {0x33, 8}, {0x33, 8}, {0x0f, 8}, {0x0f, 8}, + {0x2a, 8}, {0x2a, 8}, {0x16, 8}, {0x16, 8}, + {0x32, 8}, {0x32, 8}, {0x0e, 8}, {0x0e, 8}, + {0x29, 8}, {0x29, 8}, {0x15, 8}, {0x15, 8}, + {0x31, 8}, {0x31, 8}, {0x0d, 8}, {0x0d, 8}, + {0x23, 8}, {0x23, 8}, {0x13, 8}, {0x13, 8}, + {0x0b, 8}, {0x0b, 8}, {0x07, 8}, {0x07, 8} +}; + + +static DCtab DC_lum_5 [] = { + {1, 2}, {1, 2}, {1, 2}, {1, 2}, {1, 2}, {1, 2}, {1, 2}, {1, 2}, + {2, 2}, {2, 2}, {2, 2}, {2, 2}, {2, 2}, {2, 2}, {2, 2}, {2, 2}, + {0, 3}, {0, 3}, {0, 3}, {0, 3}, {3, 3}, {3, 3}, {3, 3}, {3, 3}, + {4, 3}, {4, 3}, {4, 3}, {4, 3}, {5, 4}, {5, 4}, {6, 5} +}; + +static DCtab DC_chrom_5 [] = { + {0, 2}, {0, 2}, {0, 2}, {0, 2}, {0, 2}, {0, 2}, {0, 2}, {0, 2}, + {1, 2}, {1, 2}, {1, 2}, {1, 2}, {1, 2}, {1, 2}, {1, 2}, {1, 2}, + {2, 2}, {2, 2}, {2, 2}, {2, 2}, {2, 2}, {2, 2}, {2, 2}, {2, 2}, + {3, 3}, {3, 3}, {3, 3}, {3, 3}, {4, 4}, {4, 4}, {5, 5} +}; + +static DCtab DC_long [] = { + {6, 5}, {6, 5}, {6, 5}, {6, 5}, {6, 5}, {6, 5}, { 6, 5}, { 6, 5}, + {6, 5}, {6, 5}, {6, 5}, {6, 5}, {6, 5}, {6, 5}, { 6, 5}, { 6, 5}, + {7, 6}, {7, 6}, {7, 6}, {7, 6}, {7, 6}, {7, 6}, { 7, 6}, { 7, 6}, + {8, 7}, {8, 7}, {8, 7}, {8, 7}, {9, 8}, {9, 8}, {10, 9}, {11, 9} +}; + + +static DCTtab DCT_16 [] = { + {129, 0, 0}, {129, 0, 0}, {129, 0, 0}, {129, 0, 0}, + {129, 0, 0}, {129, 0, 0}, {129, 0, 0}, {129, 0, 0}, + {129, 0, 0}, {129, 0, 0}, {129, 0, 0}, {129, 0, 0}, + {129, 0, 0}, {129, 0, 0}, {129, 0, 0}, {129, 0, 0}, + { 2,18, 0}, { 2,17, 0}, { 2,16, 0}, { 2,15, 0}, + { 7, 3, 0}, { 17, 2, 0}, { 16, 2, 0}, { 15, 2, 0}, + { 14, 2, 0}, { 13, 2, 0}, { 12, 2, 0}, { 32, 1, 0}, + { 31, 1, 0}, { 30, 1, 0}, { 29, 1, 0}, { 28, 1, 0} +}; + +static DCTtab DCT_15 [] = { + { 1,40,15}, { 1,39,15}, { 1,38,15}, { 1,37,15}, + { 1,36,15}, { 1,35,15}, { 1,34,15}, { 1,33,15}, + { 1,32,15}, { 2,14,15}, { 2,13,15}, { 2,12,15}, + { 2,11,15}, { 2,10,15}, { 2, 9,15}, { 2, 8,15}, + { 1,31,14}, { 1,31,14}, { 1,30,14}, { 1,30,14}, + { 1,29,14}, { 1,29,14}, { 1,28,14}, { 1,28,14}, + { 1,27,14}, { 1,27,14}, { 1,26,14}, { 1,26,14}, + { 1,25,14}, { 1,25,14}, { 1,24,14}, { 1,24,14}, + { 1,23,14}, { 1,23,14}, { 1,22,14}, { 1,22,14}, + { 1,21,14}, { 1,21,14}, { 1,20,14}, { 1,20,14}, + { 1,19,14}, { 1,19,14}, { 1,18,14}, { 1,18,14}, + { 1,17,14}, { 1,17,14}, { 1,16,14}, { 1,16,14} +}; + +static DCTtab DCT_13 [] = { + { 11, 2,13}, { 10, 2,13}, { 6, 3,13}, { 4, 4,13}, + { 3, 5,13}, { 2, 7,13}, { 2, 6,13}, { 1,15,13}, + { 1,14,13}, { 1,13,13}, { 1,12,13}, { 27, 1,13}, + { 26, 1,13}, { 25, 1,13}, { 24, 1,13}, { 23, 1,13}, + { 1,11,12}, { 1,11,12}, { 9, 2,12}, { 9, 2,12}, + { 5, 3,12}, { 5, 3,12}, { 1,10,12}, { 1,10,12}, + { 3, 4,12}, { 3, 4,12}, { 8, 2,12}, { 8, 2,12}, + { 22, 1,12}, { 22, 1,12}, { 21, 1,12}, { 21, 1,12}, + { 1, 9,12}, { 1, 9,12}, { 20, 1,12}, { 20, 1,12}, + { 19, 1,12}, { 19, 1,12}, { 2, 5,12}, { 2, 5,12}, + { 4, 3,12}, { 4, 3,12}, { 1, 8,12}, { 1, 8,12}, + { 7, 2,12}, { 7, 2,12}, { 18, 1,12}, { 18, 1,12} +}; + +static DCTtab DCT_B14_10 [] = { + { 17, 1,10}, { 6, 2,10}, { 1, 7,10}, { 3, 3,10}, + { 2, 4,10}, { 16, 1,10}, { 15, 1,10}, { 5, 2,10} +}; + +static DCTtab DCT_B14_8 [] = { + { 65, 0, 6}, { 65, 0, 6}, { 65, 0, 6}, { 65, 0, 6}, + { 3, 2, 7}, { 3, 2, 7}, { 10, 1, 7}, { 10, 1, 7}, + { 1, 4, 7}, { 1, 4, 7}, { 9, 1, 7}, { 9, 1, 7}, + { 8, 1, 6}, { 8, 1, 6}, { 8, 1, 6}, { 8, 1, 6}, + { 7, 1, 6}, { 7, 1, 6}, { 7, 1, 6}, { 7, 1, 6}, + { 2, 2, 6}, { 2, 2, 6}, { 2, 2, 6}, { 2, 2, 6}, + { 6, 1, 6}, { 6, 1, 6}, { 6, 1, 6}, { 6, 1, 6}, + { 14, 1, 8}, { 1, 6, 8}, { 13, 1, 8}, { 12, 1, 8}, + { 4, 2, 8}, { 2, 3, 8}, { 1, 5, 8}, { 11, 1, 8} +}; + +static DCTtab DCT_B14AC_5 [] = { + { 1, 3, 5}, { 5, 1, 5}, { 4, 1, 5}, + { 1, 2, 4}, { 1, 2, 4}, { 3, 1, 4}, { 3, 1, 4}, + { 2, 1, 3}, { 2, 1, 3}, { 2, 1, 3}, { 2, 1, 3}, + {129, 0, 2}, {129, 0, 2}, {129, 0, 2}, {129, 0, 2}, + {129, 0, 2}, {129, 0, 2}, {129, 0, 2}, {129, 0, 2}, + { 1, 1, 2}, { 1, 1, 2}, { 1, 1, 2}, { 1, 1, 2}, + { 1, 1, 2}, { 1, 1, 2}, { 1, 1, 2}, { 1, 1, 2} +}; + +static DCTtab DCT_B14DC_5 [] = { + { 1, 3, 5}, { 5, 1, 5}, { 4, 1, 5}, + { 1, 2, 4}, { 1, 2, 4}, { 3, 1, 4}, { 3, 1, 4}, + { 2, 1, 3}, { 2, 1, 3}, { 2, 1, 3}, { 2, 1, 3}, + { 1, 1, 1}, { 1, 1, 1}, { 1, 1, 1}, { 1, 1, 1}, + { 1, 1, 1}, { 1, 1, 1}, { 1, 1, 1}, { 1, 1, 1}, + { 1, 1, 1}, { 1, 1, 1}, { 1, 1, 1}, { 1, 1, 1}, + { 1, 1, 1}, { 1, 1, 1}, { 1, 1, 1}, { 1, 1, 1} +}; + +static DCTtab DCT_B15_10 [] = { + { 6, 2, 9}, { 6, 2, 9}, { 15, 1, 9}, { 15, 1, 9}, + { 3, 4,10}, { 17, 1,10}, { 16, 1, 9}, { 16, 1, 9} +}; + +static DCTtab DCT_B15_8 [] = { + { 65, 0, 6}, { 65, 0, 6}, { 65, 0, 6}, { 65, 0, 6}, + { 8, 1, 7}, { 8, 1, 7}, { 9, 1, 7}, { 9, 1, 7}, + { 7, 1, 7}, { 7, 1, 7}, { 3, 2, 7}, { 3, 2, 7}, + { 1, 7, 6}, { 1, 7, 6}, { 1, 7, 6}, { 1, 7, 6}, + { 1, 6, 6}, { 1, 6, 6}, { 1, 6, 6}, { 1, 6, 6}, + { 5, 1, 6}, { 5, 1, 6}, { 5, 1, 6}, { 5, 1, 6}, + { 6, 1, 6}, { 6, 1, 6}, { 6, 1, 6}, { 6, 1, 6}, + { 2, 5, 8}, { 12, 1, 8}, { 1,11, 8}, { 1,10, 8}, + { 14, 1, 8}, { 13, 1, 8}, { 4, 2, 8}, { 2, 4, 8}, + { 3, 1, 5}, { 3, 1, 5}, { 3, 1, 5}, { 3, 1, 5}, + { 3, 1, 5}, { 3, 1, 5}, { 3, 1, 5}, { 3, 1, 5}, + { 2, 2, 5}, { 2, 2, 5}, { 2, 2, 5}, { 2, 2, 5}, + { 2, 2, 5}, { 2, 2, 5}, { 2, 2, 5}, { 2, 2, 5}, + { 4, 1, 5}, { 4, 1, 5}, { 4, 1, 5}, { 4, 1, 5}, + { 4, 1, 5}, { 4, 1, 5}, { 4, 1, 5}, { 4, 1, 5}, + { 2, 1, 3}, { 2, 1, 3}, { 2, 1, 3}, { 2, 1, 3}, + { 2, 1, 3}, { 2, 1, 3}, { 2, 1, 3}, { 2, 1, 3}, + { 2, 1, 3}, { 2, 1, 3}, { 2, 1, 3}, { 2, 1, 3}, + { 2, 1, 3}, { 2, 1, 3}, { 2, 1, 3}, { 2, 1, 3}, + { 2, 1, 3}, { 2, 1, 3}, { 2, 1, 3}, { 2, 1, 3}, + { 2, 1, 3}, { 2, 1, 3}, { 2, 1, 3}, { 2, 1, 3}, + { 2, 1, 3}, { 2, 1, 3}, { 2, 1, 3}, { 2, 1, 3}, + { 2, 1, 3}, { 2, 1, 3}, { 2, 1, 3}, { 2, 1, 3}, + {129, 0, 4}, {129, 0, 4}, {129, 0, 4}, {129, 0, 4}, + {129, 0, 4}, {129, 0, 4}, {129, 0, 4}, {129, 0, 4}, + {129, 0, 4}, {129, 0, 4}, {129, 0, 4}, {129, 0, 4}, + {129, 0, 4}, {129, 0, 4}, {129, 0, 4}, {129, 0, 4}, + { 1, 3, 4}, { 1, 3, 4}, { 1, 3, 4}, { 1, 3, 4}, + { 1, 3, 4}, { 1, 3, 4}, { 1, 3, 4}, { 1, 3, 4}, + { 1, 3, 4}, { 1, 3, 4}, { 1, 3, 4}, { 1, 3, 4}, + { 1, 3, 4}, { 1, 3, 4}, { 1, 3, 4}, { 1, 3, 4}, + { 1, 1, 2}, { 1, 1, 2}, { 1, 1, 2}, { 1, 1, 2}, + { 1, 1, 2}, { 1, 1, 2}, { 1, 1, 2}, { 1, 1, 2}, + { 1, 1, 2}, { 1, 1, 2}, { 1, 1, 2}, { 1, 1, 2}, + { 1, 1, 2}, { 1, 1, 2}, { 1, 1, 2}, { 1, 1, 2}, + { 1, 1, 2}, { 1, 1, 2}, { 1, 1, 2}, { 1, 1, 2}, + { 1, 1, 2}, { 1, 1, 2}, { 1, 1, 2}, { 1, 1, 2}, + { 1, 1, 2}, { 1, 1, 2}, { 1, 1, 2}, { 1, 1, 2}, + { 1, 1, 2}, { 1, 1, 2}, { 1, 1, 2}, { 1, 1, 2}, + { 1, 1, 2}, { 1, 1, 2}, { 1, 1, 2}, { 1, 1, 2}, + { 1, 1, 2}, { 1, 1, 2}, { 1, 1, 2}, { 1, 1, 2}, + { 1, 1, 2}, { 1, 1, 2}, { 1, 1, 2}, { 1, 1, 2}, + { 1, 1, 2}, { 1, 1, 2}, { 1, 1, 2}, { 1, 1, 2}, + { 1, 1, 2}, { 1, 1, 2}, { 1, 1, 2}, { 1, 1, 2}, + { 1, 1, 2}, { 1, 1, 2}, { 1, 1, 2}, { 1, 1, 2}, + { 1, 1, 2}, { 1, 1, 2}, { 1, 1, 2}, { 1, 1, 2}, + { 1, 1, 2}, { 1, 1, 2}, { 1, 1, 2}, { 1, 1, 2}, + { 1, 2, 3}, { 1, 2, 3}, { 1, 2, 3}, { 1, 2, 3}, + { 1, 2, 3}, { 1, 2, 3}, { 1, 2, 3}, { 1, 2, 3}, + { 1, 2, 3}, { 1, 2, 3}, { 1, 2, 3}, { 1, 2, 3}, + { 1, 2, 3}, { 1, 2, 3}, { 1, 2, 3}, { 1, 2, 3}, + { 1, 2, 3}, { 1, 2, 3}, { 1, 2, 3}, { 1, 2, 3}, + { 1, 2, 3}, { 1, 2, 3}, { 1, 2, 3}, { 1, 2, 3}, + { 1, 2, 3}, { 1, 2, 3}, { 1, 2, 3}, { 1, 2, 3}, + { 1, 2, 3}, { 1, 2, 3}, { 1, 2, 3}, { 1, 2, 3}, + { 1, 4, 5}, { 1, 4, 5}, { 1, 4, 5}, { 1, 4, 5}, + { 1, 4, 5}, { 1, 4, 5}, { 1, 4, 5}, { 1, 4, 5}, + { 1, 5, 5}, { 1, 5, 5}, { 1, 5, 5}, { 1, 5, 5}, + { 1, 5, 5}, { 1, 5, 5}, { 1, 5, 5}, { 1, 5, 5}, + { 10, 1, 7}, { 10, 1, 7}, { 2, 3, 7}, { 2, 3, 7}, + { 11, 1, 7}, { 11, 1, 7}, { 1, 8, 7}, { 1, 8, 7}, + { 1, 9, 7}, { 1, 9, 7}, { 1,12, 8}, { 1,13, 8}, + { 3, 3, 8}, { 5, 2, 8}, { 1,14, 8}, { 1,15, 8} +}; + + +static MBAtab MBA_5 [] = { + {6, 5}, {5, 5}, {4, 4}, {4, 4}, {3, 4}, {3, 4}, + {2, 3}, {2, 3}, {2, 3}, {2, 3}, {1, 3}, {1, 3}, {1, 3}, {1, 3}, + {0, 1}, {0, 1}, {0, 1}, {0, 1}, {0, 1}, {0, 1}, {0, 1}, {0, 1}, + {0, 1}, {0, 1}, {0, 1}, {0, 1}, {0, 1}, {0, 1}, {0, 1}, {0, 1} +}; + +static MBAtab MBA_11 [] = { + {32, 11}, {31, 11}, {30, 11}, {29, 11}, + {28, 11}, {27, 11}, {26, 11}, {25, 11}, + {24, 11}, {23, 11}, {22, 11}, {21, 11}, + {20, 10}, {20, 10}, {19, 10}, {19, 10}, + {18, 10}, {18, 10}, {17, 10}, {17, 10}, + {16, 10}, {16, 10}, {15, 10}, {15, 10}, + {14, 8}, {14, 8}, {14, 8}, {14, 8}, + {14, 8}, {14, 8}, {14, 8}, {14, 8}, + {13, 8}, {13, 8}, {13, 8}, {13, 8}, + {13, 8}, {13, 8}, {13, 8}, {13, 8}, + {12, 8}, {12, 8}, {12, 8}, {12, 8}, + {12, 8}, {12, 8}, {12, 8}, {12, 8}, + {11, 8}, {11, 8}, {11, 8}, {11, 8}, + {11, 8}, {11, 8}, {11, 8}, {11, 8}, + {10, 8}, {10, 8}, {10, 8}, {10, 8}, + {10, 8}, {10, 8}, {10, 8}, {10, 8}, + { 9, 8}, { 9, 8}, { 9, 8}, { 9, 8}, + { 9, 8}, { 9, 8}, { 9, 8}, { 9, 8}, + { 8, 7}, { 8, 7}, { 8, 7}, { 8, 7}, + { 8, 7}, { 8, 7}, { 8, 7}, { 8, 7}, + { 8, 7}, { 8, 7}, { 8, 7}, { 8, 7}, + { 8, 7}, { 8, 7}, { 8, 7}, { 8, 7}, + { 7, 7}, { 7, 7}, { 7, 7}, { 7, 7}, + { 7, 7}, { 7, 7}, { 7, 7}, { 7, 7}, + { 7, 7}, { 7, 7}, { 7, 7}, { 7, 7}, + { 7, 7}, { 7, 7}, { 7, 7}, { 7, 7} +}; |