diff options
Diffstat (limited to 'src/video_dec')
91 files changed, 43654 insertions, 0 deletions
diff --git a/src/video_dec/Makefile.am b/src/video_dec/Makefile.am new file mode 100644 index 000000000..f20a88bad --- /dev/null +++ b/src/video_dec/Makefile.am @@ -0,0 +1,45 @@ +include $(top_srcdir)/misc/Makefile.quiet +SUBDIRS = \ + libmpeg2 \ + libmpeg2new \ + libvdpau + +include $(top_builddir)/misc/Makefile.plugins +include $(top_srcdir)/misc/Makefile.common + +AM_CFLAGS = $(DEFAULT_OCFLAGS) $(VISIBILITY_FLAG) +AM_LDFLAGS = $(xineplug_ldflags) + +EXTRA_DIST = foovideo.c + +if ENABLE_IMAGEMAGICK +image_module = xineplug_decode_image.la +endif + +if ENABLE_GDK_PIXBUF +gdkpixbuf_module = xineplug_decode_gdk_pixbuf.la +endif + +xineplug_LTLIBRARIES = $(image_module) \ + $(gdkpixbuf_module) \ + $(theora_module) \ + xineplug_decode_bitplane.la \ + xineplug_decode_rgb.la \ + xineplug_decode_yuv.la + +xineplug_decode_bitplane_la_SOURCES = bitplane.c +xineplug_decode_bitplane_la_LIBADD = $(XINE_LIB) $(LTLIBINTL) + +xineplug_decode_rgb_la_SOURCES = rgb.c +xineplug_decode_rgb_la_LIBADD = $(XINE_LIB) + +xineplug_decode_yuv_la_SOURCES = yuv.c +xineplug_decode_yuv_la_LIBADD = $(XINE_LIB) + +xineplug_decode_image_la_SOURCES = image.c +xineplug_decode_image_la_LIBADD = $(XINE_LIB) $(DYNAMIC_LD_LIBS) $(WAND_LIBS) +xineplug_decode_image_la_CFLAGS = $(AM_CFLAGS) $(WAND_CFLAGS) + +xineplug_decode_gdk_pixbuf_la_SOURCES = gdkpixbuf.c +xineplug_decode_gdk_pixbuf_la_LIBADD = $(XINE_LIB) $(DYNAMIC_LD_LIBS) $(GDK_PIXBUF_LIBS) +xineplug_decode_gdk_pixbuf_la_CFLAGS = $(AM_CFLAGS) $(GDK_PIXBUF_CFLAGS) diff --git a/src/video_dec/bitplane.c b/src/video_dec/bitplane.c new file mode 100644 index 000000000..fa9f0ffba --- /dev/null +++ b/src/video_dec/bitplane.c @@ -0,0 +1,1550 @@ +/* + * Copyright (C) 2004 the xine project + * + * This file is part of xine, a free video player. + * + * xine is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * xine is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110, USA + * + * Bitplane "Decoder" by Manfred Tremmel (Manfred.Tremmel@iiv.de) + * Converts Amiga typical bitplane pictures to a YUV2 map + * suitable for display under xine. It's based on the rgb-decoder + * and the development documentation from the Amiga Developer CD + * + * Supported formats: + * - uncompressed and byterun1 compressed ILBM data + * - IFF ANIM compression methods OPT 5, 7 (long and short) and + * 8 (long and short) + * - untested (found no testfiles) IFF-ANIM OPT 3, 4 and 6 + */ + +#ifdef HAVE_CONFIG_H +#include "config.h" +#endif + +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <sys/types.h> +#include <unistd.h> + +#include <xine/xine_internal.h> +#include <xine/video_out.h> +#include <xine/buffer.h> +#include <xine/xineutils.h> +#include "bswap.h" + +#include "demuxers/iff.h" + +#define IFF_REPLACE_BYTE_SIMPLE(ptr, old_data, new_data, colorindexx ) { \ + register uint8_t *index_ptr = ptr; \ + register uint8_t colorindex = colorindexx; \ + *index_ptr -= ((old_data & 0x80) ? colorindex : 0); \ + *index_ptr++ += ((new_data & 0x80) ? colorindex : 0); \ + *index_ptr -= ((old_data & 0x40) ? colorindex : 0); \ + *index_ptr++ += ((new_data & 0x40) ? colorindex : 0); \ + *index_ptr -= ((old_data & 0x20) ? colorindex : 0); \ + *index_ptr++ += ((new_data & 0x20) ? colorindex : 0); \ + *index_ptr -= ((old_data & 0x10) ? colorindex : 0); \ + *index_ptr++ += ((new_data & 0x10) ? colorindex : 0); \ + *index_ptr -= ((old_data & 0x08) ? colorindex : 0); \ + *index_ptr++ += ((new_data & 0x08) ? colorindex : 0); \ + *index_ptr -= ((old_data & 0x04) ? colorindex : 0); \ + *index_ptr++ += ((new_data & 0x04) ? colorindex : 0); \ + *index_ptr -= ((old_data & 0x02) ? colorindex : 0); \ + *index_ptr++ += ((new_data & 0x02) ? colorindex : 0); \ + *index_ptr -= ((old_data & 0x01) ? colorindex : 0); \ + *index_ptr += ((new_data & 0x01) ? colorindex : 0); \ + old_data = new_data; \ +} + +#define IFF_REPLACE_BYTE(ptr, yuvy, yuvu, yuvv, yuv_palette, old_data, new_data, colorindexx ) { \ + register uint8_t *index_ptr = ptr; \ + register uint8_t colorindex = colorindexx; \ + register uint8_t *yuv_y = yuvy; \ + register uint8_t *yuv_u = yuvu; \ + register uint8_t *yuv_v = yuvv; \ + *index_ptr -= ((old_data & 0x80) ? colorindex : 0); \ + *index_ptr += ((new_data & 0x80) ? colorindex : 0); \ + yuv_index = *index_ptr++ * 4; \ + *yuv_y++ = yuv_palette[yuv_index++]; \ + *yuv_u++ = yuv_palette[yuv_index++]; \ + *yuv_v++ = yuv_palette[yuv_index]; \ + *index_ptr -= ((old_data & 0x40) ? colorindex : 0); \ + *index_ptr += ((new_data & 0x40) ? colorindex : 0); \ + yuv_index = *index_ptr++ * 4; \ + *yuv_y++ = yuv_palette[yuv_index++]; \ + *yuv_u++ = yuv_palette[yuv_index++]; \ + *yuv_v++ = yuv_palette[yuv_index]; \ + *index_ptr -= ((old_data & 0x20) ? colorindex : 0); \ + *index_ptr += ((new_data & 0x20) ? colorindex : 0); \ + yuv_index = *index_ptr++ * 4; \ + *yuv_y++ = yuv_palette[yuv_index++]; \ + *yuv_u++ = yuv_palette[yuv_index++]; \ + *yuv_v++ = yuv_palette[yuv_index]; \ + *index_ptr -= ((old_data & 0x10) ? colorindex : 0); \ + *index_ptr += ((new_data & 0x10) ? colorindex : 0); \ + yuv_index = *index_ptr++ * 4; \ + *yuv_y++ = yuv_palette[yuv_index++]; \ + *yuv_u++ = yuv_palette[yuv_index++]; \ + *yuv_v++ = yuv_palette[yuv_index]; \ + *index_ptr -= ((old_data & 0x08) ? colorindex : 0); \ + *index_ptr += ((new_data & 0x08) ? colorindex : 0); \ + yuv_index = *index_ptr++ * 4; \ + *yuv_y++ = yuv_palette[yuv_index++]; \ + *yuv_u++ = yuv_palette[yuv_index++]; \ + *yuv_v++ = yuv_palette[yuv_index]; \ + *index_ptr -= ((old_data & 0x04) ? colorindex : 0); \ + *index_ptr += ((new_data & 0x04) ? colorindex : 0); \ + yuv_index = *index_ptr++ * 4; \ + *yuv_y++ = yuv_palette[yuv_index++]; \ + *yuv_u++ = yuv_palette[yuv_index++]; \ + *yuv_v++ = yuv_palette[yuv_index]; \ + *index_ptr -= ((old_data & 0x02) ? colorindex : 0); \ + *index_ptr += ((new_data & 0x02) ? colorindex : 0); \ + yuv_index = *index_ptr++ * 4; \ + *yuv_y++ = yuv_palette[yuv_index++]; \ + *yuv_u++ = yuv_palette[yuv_index++]; \ + *yuv_v++ = yuv_palette[yuv_index]; \ + *index_ptr -= ((old_data & 0x01) ? colorindex : 0); \ + *index_ptr += ((new_data & 0x01) ? colorindex : 0); \ + yuv_index = *index_ptr * 4; \ + *yuv_y = yuv_palette[yuv_index++]; \ + *yuv_u = yuv_palette[yuv_index++]; \ + *yuv_v = yuv_palette[yuv_index]; \ + old_data = new_data; \ +} + +#define IFF_REPLACE_SHORT_SIMPLE(ptr_s, old_data_s, new_data_s, colorindexx_s ) { \ + uint8_t *xindex_ptr = (uint8_t *)ptr_s; \ + uint8_t *xold_data = (uint8_t *)old_data_s; \ + uint8_t *xnew_data = (uint8_t *)new_data_s; \ + IFF_REPLACE_BYTE_SIMPLE(xindex_ptr, *xold_data, *xnew_data, colorindexx_s ); \ + xindex_ptr += 8; \ + xold_data++; \ + xnew_data++; \ + IFF_REPLACE_BYTE_SIMPLE(xindex_ptr, *xold_data, *xnew_data, colorindexx_s ); \ +} + +#define IFF_REPLACE_SHORT(ptr_s, yuvy_s, yuvu_s, yuvv_s, yuv_palette_s, old_data_s, new_data_s, colorindexx_s ) { \ + uint8_t *xindex_ptr = (uint8_t *)ptr_s; \ + uint8_t *xold_data = (uint8_t *)old_data_s; \ + uint8_t *xnew_data = (uint8_t *)new_data_s; \ + uint8_t *xyuv_y = yuvy_s; \ + uint8_t *xyuv_u = yuvu_s; \ + uint8_t *xyuv_v = yuvv_s; \ + IFF_REPLACE_BYTE(xindex_ptr, xyuv_y, xyuv_u, xyuv_v, yuv_palette_s, *xold_data, *xnew_data, colorindexx_s ); \ + xindex_ptr += 8; \ + xold_data++; \ + xnew_data++; \ + xyuv_y += 8; \ + xyuv_u += 8; \ + xyuv_v += 8; \ + IFF_REPLACE_BYTE(xindex_ptr, xyuv_y, xyuv_u, xyuv_v, yuv_palette_s, *xold_data, *xnew_data, colorindexx_s ); \ +} + +#define IFF_REPLACE_LONG_SIMPLE(ptr_l, old_data_l, new_data_l, colorindexx_l ) { \ + uint8_t *xindex_ptr = (uint8_t *)ptr_l; \ + uint8_t *xold_data = (uint8_t *)old_data_l; \ + uint8_t *xnew_data = (uint8_t *)new_data_l; \ + IFF_REPLACE_BYTE_SIMPLE(xindex_ptr, *xold_data, *xnew_data, colorindexx_l ); \ + xindex_ptr += 8; \ + xold_data++; \ + xnew_data++; \ + IFF_REPLACE_BYTE_SIMPLE(xindex_ptr, *xold_data, *xnew_data, colorindexx_l ); \ + xindex_ptr += 8; \ + xold_data++; \ + xnew_data++; \ + IFF_REPLACE_BYTE_SIMPLE(xindex_ptr, *xold_data, *xnew_data, colorindexx_l ); \ + xindex_ptr += 8; \ + xold_data++; \ + xnew_data++; \ + IFF_REPLACE_BYTE_SIMPLE(xindex_ptr, *xold_data, *xnew_data, colorindexx_l ); \ +} + +#define IFF_REPLACE_LONG(ptr_l, yuvy_l, yuvu_l, yuvv_l, yuv_palette_l, old_data_l, new_data_l, colorindexx_l ) { \ + uint8_t *xindex_ptr = (uint8_t *)ptr_l; \ + uint8_t *xold_data = (uint8_t *)old_data_l; \ + uint8_t *xnew_data = (uint8_t *)new_data_l; \ + uint8_t *xyuv_y = yuvy_l; \ + uint8_t *xyuv_u = yuvu_l; \ + uint8_t *xyuv_v = yuvv_l; \ + IFF_REPLACE_BYTE(xindex_ptr, xyuv_y, xyuv_u, xyuv_v, yuv_palette_l, *xold_data, *xnew_data, colorindexx_l ); \ + xindex_ptr += 8; \ + xold_data++; \ + xnew_data++; \ + xyuv_y += 8; \ + xyuv_u += 8; \ + xyuv_v += 8; \ + IFF_REPLACE_BYTE(xindex_ptr, xyuv_y, xyuv_u, xyuv_v, yuv_palette_l, *xold_data, *xnew_data, colorindexx_l ); \ + xindex_ptr += 8; \ + xold_data++; \ + xnew_data++; \ + xyuv_y += 8; \ + xyuv_u += 8; \ + xyuv_v += 8; \ + IFF_REPLACE_BYTE(xindex_ptr, xyuv_y, xyuv_u, xyuv_v, yuv_palette_l, *xold_data, *xnew_data, colorindexx_l ); \ + xindex_ptr += 8; \ + xold_data++; \ + xnew_data++; \ + xyuv_y += 8; \ + xyuv_u += 8; \ + xyuv_v += 8; \ + IFF_REPLACE_BYTE(xindex_ptr, xyuv_y, xyuv_u, xyuv_v, yuv_palette_l, *xold_data, *xnew_data, colorindexx_l ); \ +} + +typedef struct { + video_decoder_class_t decoder_class; +} bitplane_class_t; + +typedef struct bitplane_decoder_s { + video_decoder_t video_decoder; /* parent video decoder structure */ + + bitplane_class_t *class; + xine_stream_t *stream; + + /* these are traditional variables in a video decoder object */ + uint64_t video_step; /* frame duration in pts units */ + int decoder_ok; /* current decoder status */ + int skipframes; /* 0 = draw picture, 1 = skip it */ + int framenumber; + + unsigned char *buf; /* the accumulated buffer data */ + int bufsize; /* the maximum size of buf */ + int size; /* the current size of buf */ + int size_uk; /* size of unkompressed bitplane */ + + int width; /* the width of a video frame */ + int height; /* the height of a video frame */ + int num_pixel; /* number pixel */ + double ratio; /* the width to height ratio */ + int bytes_per_pixel; + int num_bitplanes; + int camg_mode; + int is_ham; + + unsigned char yuv_palette[256 * 4]; + unsigned char rgb_palette[256 * 4]; + yuv_planes_t yuv_planes; + yuv_planes_t yuv_planes_hist; + + uint8_t *buf_uk; /* uncompressed buffer */ + uint8_t *buf_uk_hist; /* uncompressed buffer historic */ + uint8_t *index_buf; /* index buffer (for indexed pics) */ + uint8_t *index_buf_hist;/* index buffer historic */ + +} bitplane_decoder_t; + +/* create a new buffer and decde a byterun1 decoded buffer into it */ +static uint8_t *bitplane_decode_byterun1 (uint8_t *compressed, + int size_compressed, + int size_uncompressed) { + + /* BytRun1 decompression */ + int pixel_ptr = 0; + int i = 0; + int j = 0; + + uint8_t *uncompressed = calloc(1, size_uncompressed ); + + while ( i < size_compressed && + pixel_ptr < size_uncompressed ) { + if( compressed[i] <= 127 ) { + j = compressed[i++]; + if( (i+j) > size_compressed ) { + free(uncompressed); + return NULL; + } + for( ; (j >= 0) && (pixel_ptr < size_uncompressed); j-- ) { + uncompressed[pixel_ptr++] = compressed[i++]; + } + } else if ( compressed[i] > 128 ) { + j = 256 - compressed[i++]; + if( i >= size_compressed ) { + free(uncompressed); + return NULL; + } + for( ; (j >= 0) && (pixel_ptr < size_uncompressed); j-- ) { + uncompressed[pixel_ptr++] = compressed[i]; + } + i++; + } + } + return uncompressed; +} + +/* create a new buffer with "normal" index or rgb numbers out of a bitplane */ +static void bitplane_decode_bitplane (uint8_t *bitplane_buffer, + uint8_t *index_buf, + int width, + int height, + int num_bitplanes, + int bytes_per_pixel ) { + + int rowsize = width / 8; + int pixel_ptr = 0; + int row_ptr = 0; + int palette_index = 0; + int i = 0; + int j = 0; + int row_i = 0; + int row_j = 0; + int palette_offset = 0; + int palette_index_rowsize = 0; + uint8_t color = 0; + uint8_t data = 0; + int bytes_per_pixel_8 = bytes_per_pixel * 8; + int rowsize_num_bitplanes = rowsize * num_bitplanes; + int width_bytes_per_pixel = width * bytes_per_pixel; + + for (i = 0; i < (height * width_bytes_per_pixel); index_buf[i++] = 0); + + /* decode Bitplanes to RGB/Index Numbers */ + for (row_ptr = 0; row_ptr < height; row_ptr++) { + + row_i = row_ptr * width_bytes_per_pixel; + row_j = row_ptr * rowsize_num_bitplanes; + + for (palette_index = 0; palette_index < num_bitplanes; palette_index++) { + + palette_offset = ((palette_index > 15) ? 2 : (palette_index > 7) ? 1 : 0); + color = bitplainoffeset[palette_index]; + palette_index_rowsize = palette_index * rowsize; + + for (pixel_ptr = 0; pixel_ptr < rowsize; pixel_ptr++) { + i = row_i + + (pixel_ptr * bytes_per_pixel_8) + + palette_offset; + j = row_j + palette_index_rowsize + pixel_ptr; + + data = bitplane_buffer[j]; + + index_buf[i] += ((data & 0x80) ? color : 0); + i += bytes_per_pixel; + index_buf[i] += ((data & 0x40) ? color : 0); + i += bytes_per_pixel; + index_buf[i] += ((data & 0x20) ? color : 0); + i += bytes_per_pixel; + index_buf[i] += ((data & 0x10) ? color : 0); + i += bytes_per_pixel; + index_buf[i] += ((data & 0x08) ? color : 0); + i += bytes_per_pixel; + index_buf[i] += ((data & 0x04) ? color : 0); + i += bytes_per_pixel; + index_buf[i] += ((data & 0x02) ? color : 0); + i += bytes_per_pixel; + index_buf[i] += ((data & 0x01) ? color : 0); + } + } + } +} + +/* create Buffer decode HAM6 and HAM8 to YUV color */ +static void bitplane_decode_ham (uint8_t *ham_buffer, + yuv_planes_t *yuv_planes, + int width, + int height, + int num_bitplanes, + int bytes_per_pixel, + unsigned char *rgb_palette ) { + + uint8_t *ham_buffer_work = ham_buffer; + uint8_t *ham_buffer_end = &ham_buffer[(width * height)]; + uint8_t *yuv_ptr_y = yuv_planes->y; + uint8_t *yuv_ptr_u = yuv_planes->u; + uint8_t *yuv_ptr_v = yuv_planes->v; + int i = 0; + int j = 0; + uint8_t r = 0; + uint8_t g = 0; + uint8_t b = 0; + /* position of special HAM-Bits differs in HAM6 and HAM8, detect them */ + int hambits = num_bitplanes > 6 ? 6 : 4; + /* the other bits contain the real data, dreate a mask out of it */ + int maskbits = 8 - hambits; + int mask = ( 1 << hambits ) - 1; + + for(; ham_buffer_work < ham_buffer_end; j = *ham_buffer_work++) { + i = (j & mask); + switch ( j >> hambits ) { + case HAMBITS_CMAP: + /* Take colors from palette */ + r = rgb_palette[i * 4 + 0]; + g = rgb_palette[i * 4 + 1]; + b = rgb_palette[i * 4 + 2]; + break; + case HAMBITS_BLUE: + /* keep red and green and modify blue */ + b = i << maskbits; + b |= b >> hambits; + break; + case HAMBITS_RED: + /* keep green and blue and modify red */ + r = i << maskbits; + r |= r >> hambits; + break; + case HAMBITS_GREEN: + /* keep red and blue and modify green */ + g = i << maskbits; + g |= g >> hambits; + break; + default: + break; + } + *yuv_ptr_y++ = COMPUTE_Y(r, g, b); + *yuv_ptr_u++ = COMPUTE_U(r, g, b); + *yuv_ptr_v++ = COMPUTE_V(r, g, b); + } +} + +/* decoding method 3 */ +static void bitplane_sdelta_opt_3 (bitplane_decoder_t *this) { + + uint32_t rowsize = this->width / 16; + uint32_t rowsize_all_planes = rowsize * this->num_bitplanes; + + uint32_t palette_index = 0; + uint32_t *deltadata = (uint32_t *)this->buf; + uint16_t *ptr = NULL; + uint16_t *planeptr = NULL; + uint16_t *picture_end = (uint16_t *)(&this->buf_uk[(rowsize_all_planes * 2 * this->height)]); + uint16_t *data = NULL; + uint16_t *data_end = (uint16_t *)(&this->buf[this->size]); + uint16_t *rowworkptr = NULL; + int16_t s = 0; + int16_t size = 0; + uint32_t pixel_ptr_bit = 0; + uint32_t row_ptr = 0; + uint32_t yuv_index = 0; + + /* Repeat for each plane */ + for(palette_index = 0; palette_index < this->num_bitplanes; palette_index++) { + + planeptr = (uint16_t *)(&this->buf_uk[(palette_index * rowsize * 2)]); + /* data starts at beginn of delta-Buffer + offset of the first */ + /* 32 Bit long word in the buffer. The buffer starts with 8 */ + /* of this Offset, for every bitplane (max 8) one */ + data = (uint16_t *)(&this->buf[_X_BE_32(&deltadata[palette_index])]); + if( data != (uint16_t *)this->buf ) { + /* This 8 Pointers are followd by another 8 */ + ptr = (uint16_t *)(&this->buf[_X_BE_32(&deltadata[(palette_index+8)])]); + + /* in this case, I think big/little endian is not important ;-) */ + while( *data != 0xFFFF) { + row_ptr = 0; + size = _X_BE_16(data); + data++; + if( size >= 0 ) { + rowworkptr = planeptr + size; + pixel_ptr_bit = size * 16; + if( this->is_ham ) { + IFF_REPLACE_SHORT_SIMPLE(&this->index_buf[pixel_ptr_bit], + rowworkptr, data, bitplainoffeset[palette_index] ); + } else { + IFF_REPLACE_SHORT( &this->index_buf[pixel_ptr_bit], + &this->yuv_planes.y[pixel_ptr_bit], &this->yuv_planes.u[pixel_ptr_bit], + &this->yuv_planes.v[pixel_ptr_bit], this->yuv_palette, + rowworkptr, data, bitplainoffeset[palette_index] ); + } + data++; + } else { + size = 0 - size + 2; + rowworkptr = planeptr + size; + pixel_ptr_bit = size * 16; + s = _X_BE_16(data); + data++; + while( s--) { + if( this->is_ham ) { + IFF_REPLACE_SHORT_SIMPLE(&this->index_buf[pixel_ptr_bit], + rowworkptr, data, bitplainoffeset[palette_index] ); + } else { + IFF_REPLACE_SHORT( &this->index_buf[pixel_ptr_bit], + &this->yuv_planes.y[pixel_ptr_bit], &this->yuv_planes.u[pixel_ptr_bit], + &this->yuv_planes.v[pixel_ptr_bit], this->yuv_palette, + rowworkptr, data, bitplainoffeset[palette_index] ); + } + rowworkptr++; + data++; + } + } + + + + + size = _X_BE_16(ptr); + ptr++; + if (size < 0) { + for (s = size; s < 0; s++) { + if (data > data_end || rowworkptr > picture_end) + return; + yuv_index = ((row_ptr * this->width) + pixel_ptr_bit); + if( this->is_ham ) { + IFF_REPLACE_SHORT_SIMPLE(&this->index_buf[yuv_index], + rowworkptr, data, bitplainoffeset[palette_index] ); + } else { + IFF_REPLACE_SHORT( &this->index_buf[yuv_index], + &this->yuv_planes.y[yuv_index], &this->yuv_planes.u[yuv_index], + &this->yuv_planes.v[yuv_index], this->yuv_palette, + rowworkptr, data, bitplainoffeset[palette_index] ); + } + rowworkptr += rowsize_all_planes; + row_ptr++; + } + data++; + } + else { + for (s = 0; s < size; s++) { + if (data > data_end || rowworkptr > picture_end) + return; + yuv_index = ((row_ptr * this->width) + pixel_ptr_bit); + if( this->is_ham ) { + IFF_REPLACE_SHORT_SIMPLE(&this->index_buf[yuv_index], + rowworkptr, data, bitplainoffeset[palette_index] ); + } else { + IFF_REPLACE_SHORT( &this->index_buf[yuv_index], + &this->yuv_planes.y[yuv_index], &this->yuv_planes.u[yuv_index], + &this->yuv_planes.v[yuv_index], this->yuv_palette, + rowworkptr, data, bitplainoffeset[palette_index] ); + } + data++; + rowworkptr += rowsize_all_planes; + row_ptr++; + } + } + } + } + } +} + +/* decoding method 4 */ +static void bitplane_set_dlta_short (bitplane_decoder_t *this) { + + uint32_t rowsize = this->width / 16; + uint32_t rowsize_all_planes = rowsize * this->num_bitplanes; + + uint32_t palette_index = 0; + uint32_t *deltadata = (uint32_t *)this->buf; + uint16_t *ptr = NULL; + uint16_t *planeptr = NULL; + uint16_t *picture_end = (uint16_t *)(&this->buf_uk[(rowsize_all_planes * 2 * this->height)]); + uint16_t *data = NULL; + uint16_t *data_end = (uint16_t *)(&this->buf[this->size]); + uint16_t *rowworkptr = NULL; + int16_t s = 0; + int16_t size = 0; + uint16_t pixel_ptr = 0; + uint32_t pixel_ptr_bit = 0; + uint32_t row_ptr = 0; + uint32_t yuv_index = 0; + + /* Repeat for each plane */ + for(palette_index = 0; palette_index < this->num_bitplanes; palette_index++) { + + planeptr = (uint16_t *)(&this->buf_uk[(palette_index * rowsize * 2)]); + /* data starts at beginn of delta-Buffer + offset of the first */ + /* 32 Bit long word in the buffer. The buffer starts with 8 */ + /* of this Offset, for every bitplane (max 8) one */ + data = (uint16_t *)(&this->buf[_X_BE_32(&deltadata[palette_index])]); + if( data != (uint16_t *)this->buf ) { + /* This 8 Pointers are followd by another 8 */ + ptr = (uint16_t *)(&this->buf[_X_BE_32(&deltadata[(palette_index+8)])]); + + /* in this case, I think big/little endian is not important ;-) */ + while( *ptr != 0xFFFF) { + pixel_ptr = _X_BE_16(ptr); + pixel_ptr_bit = pixel_ptr * 16; + row_ptr = 0; + rowworkptr = planeptr + pixel_ptr; + ptr++; + size = _X_BE_16(ptr); + ptr++; + if (size < 0) { + for (s = size; s < 0; s++) { + if (data > data_end || rowworkptr > picture_end) + return; + yuv_index = ((row_ptr * this->width) + pixel_ptr_bit); + if( this->is_ham ) { + IFF_REPLACE_SHORT_SIMPLE(&this->index_buf[yuv_index], + rowworkptr, data, bitplainoffeset[palette_index] ); + } else { + IFF_REPLACE_SHORT( &this->index_buf[yuv_index], + &this->yuv_planes.y[yuv_index], &this->yuv_planes.u[yuv_index], + &this->yuv_planes.v[yuv_index], this->yuv_palette, + rowworkptr, data, bitplainoffeset[palette_index] ); + } + rowworkptr += rowsize_all_planes; + row_ptr++; + } + data++; + } else { + for (s = 0; s < size; s++) { + if (data > data_end || rowworkptr > picture_end) + return; + yuv_index = ((row_ptr * this->width) + pixel_ptr_bit); + if( this->is_ham ) { + IFF_REPLACE_SHORT_SIMPLE(&this->index_buf[yuv_index], + rowworkptr, data, bitplainoffeset[palette_index] ); + } else { + IFF_REPLACE_SHORT( &this->index_buf[yuv_index], + &this->yuv_planes.y[yuv_index], &this->yuv_planes.u[yuv_index], + &this->yuv_planes.v[yuv_index], this->yuv_palette, + rowworkptr, data, bitplainoffeset[palette_index] ); + } + data++; + rowworkptr += rowsize_all_planes; + row_ptr++; + } + } + } + } + } +} + +/* decoding method 5 */ +static void bitplane_dlta_5 (bitplane_decoder_t *this) { + + uint32_t rowsize = this->width / 8; + uint32_t rowsize_all_planes = rowsize * this->num_bitplanes; + + uint32_t yuv_index = 0; + uint32_t delta_offset = 0; + uint32_t palette_index = 0; + uint32_t pixel_ptr = 0; + uint32_t pixel_ptr_bit = 0; + uint32_t row_ptr = 0; + uint32_t *deltadata = (uint32_t *)this->buf; + uint8_t *planeptr = NULL; + uint8_t *rowworkptr = NULL; + uint8_t *picture_end = this->buf_uk + (rowsize_all_planes * this->height); + uint8_t *data = NULL; + uint8_t *data_end = this->buf + this->size; + uint8_t op_count = 0; + uint8_t op = 0; + uint8_t count = 0; + + /* Repeat for each plane */ + for(palette_index = 0; palette_index < this->num_bitplanes; palette_index++) { + + planeptr = &this->buf_uk[(palette_index * rowsize)]; + /* data starts at beginn of delta-Buffer + offset of the first */ + /* 32 Bit long word in the buffer. The buffer starts with 8 */ + /* of this Offset, for every bitplane (max 8) one */ + delta_offset = _X_BE_32(&deltadata[palette_index]); + + if (delta_offset > 0) { + data = this->buf + delta_offset; + for( pixel_ptr = 0; pixel_ptr < rowsize; pixel_ptr++) { + rowworkptr = planeptr + pixel_ptr; + pixel_ptr_bit = pixel_ptr * 8; + row_ptr = 0; + /* execute ops */ + for( op_count = *data++; op_count; op_count--) { + op = *data++; + if (op & 0x80) { + /* Uniq ops */ + count = op & 0x7f; /* get count */ + while(count--) { + if (data > data_end || rowworkptr > picture_end) + return; + yuv_index = ((row_ptr * this->width) + pixel_ptr_bit); + if( this->is_ham ) { + IFF_REPLACE_BYTE_SIMPLE(&this->index_buf[yuv_index], + *rowworkptr, *data, bitplainoffeset[palette_index] ); + } else { + IFF_REPLACE_BYTE( &this->index_buf[yuv_index], + &this->yuv_planes.y[yuv_index], &this->yuv_planes.u[yuv_index], + &this->yuv_planes.v[yuv_index], this->yuv_palette, + *rowworkptr, *data, bitplainoffeset[palette_index] ); + } + data++; + rowworkptr += rowsize_all_planes; + row_ptr++; + } + } else { + if (op == 0) { + /* Same ops */ + count = *data++; + while(count--) { + if (data > data_end || rowworkptr > picture_end) + return; + yuv_index = ((row_ptr * this->width) + pixel_ptr_bit); + if( this->is_ham ) { + IFF_REPLACE_BYTE_SIMPLE(&this->index_buf[yuv_index], + *rowworkptr, *data, bitplainoffeset[palette_index] ); + } else { + IFF_REPLACE_BYTE( &this->index_buf[yuv_index], + &this->yuv_planes.y[yuv_index], &this->yuv_planes.u[yuv_index], + &this->yuv_planes.v[yuv_index], this->yuv_palette, + *rowworkptr, *data, bitplainoffeset[palette_index] ); + } + rowworkptr += rowsize_all_planes; + row_ptr++; + } + data++; + } else { + /* Skip ops */ + rowworkptr += (rowsize_all_planes * op); + row_ptr += op; + } + } + } + } + } + } +} + +/* decoding method 7 (short version) */ +static void bitplane_dlta_7_short (bitplane_decoder_t *this) { + + uint32_t rowsize = this->width / 16; + uint32_t rowsize_all_planes = rowsize * this->num_bitplanes; + + uint32_t yuv_index = 0; + uint32_t opcode_offset = 0; + uint32_t data_offset = 0; + uint32_t palette_index = 0; + uint32_t pixel_ptr = 0; + uint32_t pixel_ptr_bit = 0; + uint32_t row_ptr = 0; + uint32_t *deltadata = (uint32_t *)this->buf; + uint8_t *planeptr = NULL; + uint16_t *rowworkptr = NULL; + uint16_t *picture_end = (uint16_t *)(&this->buf_uk[(rowsize_all_planes * 2 * this->height)]); + uint16_t *data = NULL; + uint16_t *data_end = (uint16_t *)(&this->buf[this->size]); + uint8_t *op_ptr = NULL; + uint8_t op_count = 0; + uint8_t op = 0; + uint8_t count = 0; + + /* Repeat for each plane */ + for(palette_index = 0; palette_index < this->num_bitplanes; palette_index++) { + + planeptr = &this->buf_uk[(palette_index * rowsize * 2)]; + /* find opcode and data offset (up to 8 pointers, one for every bitplane */ + opcode_offset = _X_BE_32(&deltadata[palette_index]); + data_offset = _X_BE_32(&deltadata[palette_index + 8]); + + if (opcode_offset > 0 && data_offset > 0) { + data = (uint16_t *)(&this->buf[data_offset]); + op_ptr = this->buf + opcode_offset; + for( pixel_ptr = 0; pixel_ptr < rowsize; pixel_ptr++) { + rowworkptr = (uint16_t *)(&planeptr[pixel_ptr * 2]); + pixel_ptr_bit = pixel_ptr * 16; + row_ptr = 0; + /* execute ops */ + for( op_count = *op_ptr++; op_count; op_count--) { + op = *op_ptr++; + if (op & 0x80) { + /* Uniq ops */ + count = op & 0x7f; /* get count */ + while(count--) { + if (data > data_end || rowworkptr > picture_end) + return; + yuv_index = ((row_ptr * this->width) + pixel_ptr_bit); + if( this->is_ham ) { + IFF_REPLACE_SHORT_SIMPLE(&this->index_buf[yuv_index], + rowworkptr, data, bitplainoffeset[palette_index] ); + } else { + IFF_REPLACE_SHORT( &this->index_buf[yuv_index], + &this->yuv_planes.y[yuv_index], &this->yuv_planes.u[yuv_index], + &this->yuv_planes.v[yuv_index], this->yuv_palette, + rowworkptr, data, bitplainoffeset[palette_index] ); + } + data++; + rowworkptr += rowsize_all_planes; + row_ptr++; + } + } else { + if (op == 0) { + /* Same ops */ + count = *op_ptr++; + while(count--) { + if (data > data_end || rowworkptr > picture_end) + return; + yuv_index = ((row_ptr * this->width) + pixel_ptr_bit); + if( this->is_ham ) { + IFF_REPLACE_SHORT_SIMPLE(&this->index_buf[yuv_index], + rowworkptr, data, bitplainoffeset[palette_index] ); + } else { + IFF_REPLACE_SHORT( &this->index_buf[yuv_index], + &this->yuv_planes.y[yuv_index], &this->yuv_planes.u[yuv_index], + &this->yuv_planes.v[yuv_index], this->yuv_palette, + rowworkptr, data, bitplainoffeset[palette_index] ); + } + rowworkptr += rowsize_all_planes; + row_ptr++; + } + data++; + } else { + /* Skip ops */ + rowworkptr += (rowsize_all_planes * op); + row_ptr += op; + } + } + } + } + } + } +} + +/* decoding method 7 (long version) */ +static void bitplane_dlta_7_long (bitplane_decoder_t *this) { + + uint32_t rowsize = this->width / 32; + uint32_t rowsize_all_planes = rowsize * this->num_bitplanes; + + uint32_t yuv_index = 0; + uint32_t opcode_offset = 0; + uint32_t data_offset = 0; + uint32_t palette_index = 0; + uint32_t pixel_ptr = 0; + uint32_t pixel_ptr_bit = 0; + uint32_t row_ptr = 0; + uint32_t *deltadata = (uint32_t *)this->buf; + uint8_t *planeptr = NULL; + uint32_t *rowworkptr = NULL; + uint32_t *picture_end = (uint32_t *)(&this->buf_uk[(rowsize_all_planes * 4 * this->height)]); + uint32_t *data = NULL; + uint32_t *data_end = (uint32_t *)(&this->buf[this->size]); + uint8_t *op_ptr = NULL; + uint8_t op_count = 0; + uint8_t op = 0; + uint8_t count = 0; + + /* Repeat for each plane */ + for(palette_index = 0; palette_index < this->num_bitplanes; palette_index++) { + planeptr = &this->buf_uk[(palette_index * rowsize * 4)]; + /* find opcode and data offset (up to 8 pointers, one for every bitplane */ + opcode_offset = _X_BE_32(&deltadata[palette_index]); + data_offset = _X_BE_32(&deltadata[palette_index + 8]); + + if (opcode_offset > 0 && data_offset > 0) { + data = (uint32_t *)(&this->buf[data_offset]); + op_ptr = this->buf + opcode_offset; + for( pixel_ptr = 0; pixel_ptr < rowsize; pixel_ptr++) { + rowworkptr = (uint32_t *)(&planeptr[pixel_ptr * 4]); + pixel_ptr_bit = pixel_ptr * 32; + row_ptr = 0; + /* execute ops */ + for( op_count = *op_ptr++; op_count; op_count--) { + op = *op_ptr++; + if (op & 0x80) { + /* Uniq ops */ + count = op & 0x7f; /* get count */ + while(count--) { + if (data > data_end || rowworkptr > picture_end) + return; + yuv_index = ((row_ptr * this->width) + pixel_ptr_bit); + if( this->is_ham ) { + IFF_REPLACE_LONG_SIMPLE(&this->index_buf[yuv_index], + rowworkptr, data, bitplainoffeset[palette_index] ); + } else { + IFF_REPLACE_LONG( &this->index_buf[yuv_index], + &this->yuv_planes.y[yuv_index], &this->yuv_planes.u[yuv_index], + &this->yuv_planes.v[yuv_index], this->yuv_palette, + rowworkptr, data, bitplainoffeset[palette_index] ); + } + data++; + rowworkptr += rowsize_all_planes; + row_ptr++; + } + } else { + if (op == 0) { + /* Same ops */ + count = *op_ptr++; + while(count--) { + if (data > data_end || rowworkptr > picture_end) + return; + yuv_index = ((row_ptr * this->width) + pixel_ptr_bit); + if( this->is_ham ) { + IFF_REPLACE_LONG_SIMPLE(&this->index_buf[yuv_index], + rowworkptr, data, bitplainoffeset[palette_index] ); + } else { + IFF_REPLACE_LONG( &this->index_buf[yuv_index], + &this->yuv_planes.y[yuv_index], &this->yuv_planes.u[yuv_index], + &this->yuv_planes.v[yuv_index], this->yuv_palette, + rowworkptr, data, bitplainoffeset[palette_index] ); + } + rowworkptr += rowsize_all_planes; + row_ptr++; + } + data++; + } else { + /* Skip ops */ + rowworkptr += (rowsize_all_planes * op); + row_ptr += op; + } + } + } + } + } + } +} + +/* decoding method 8 short */ +static void bitplane_dlta_8_short (bitplane_decoder_t *this) { + + uint32_t rowsize = this->width / 16; + uint32_t rowsize_all_planes = rowsize * this->num_bitplanes; + + uint32_t yuv_index = 0; + uint32_t delta_offset = 0; + uint32_t palette_index = 0; + uint32_t pixel_ptr = 0; + uint32_t row_ptr = 0; + uint32_t pixel_ptr_bit = 0; + uint32_t *deltadata = (uint32_t *)this->buf; + uint16_t *planeptr = NULL; + uint16_t *rowworkptr = NULL; + uint16_t *picture_end = (uint16_t *)(&this->buf_uk[(rowsize_all_planes * 2 * this->height)]); + uint16_t *data = NULL; + uint16_t *data_end = (uint16_t *)(&this->buf[this->size]); + uint16_t op_count = 0; + uint16_t op = 0; + uint16_t count = 0; + + /* Repeat for each plane */ + for(palette_index = 0; palette_index < this->num_bitplanes; palette_index++) { + + planeptr = (uint16_t *)(&this->buf_uk[(palette_index * rowsize * 2)]); + /* data starts at beginn of delta-Buffer + offset of the first */ + /* 32 Bit long word in the buffer. The buffer starts with 8 */ + /* of this Offset, for every bitplane (max 8) one */ + delta_offset = _X_BE_32(&deltadata[palette_index]); + + if (delta_offset > 0) { + data = (uint16_t *)(&this->buf[delta_offset]); + for( pixel_ptr = 0; pixel_ptr < rowsize; pixel_ptr++) { + rowworkptr = planeptr + pixel_ptr; + pixel_ptr_bit = pixel_ptr * 16; + row_ptr = 0; + /* execute ops */ + op_count = _X_BE_16(data); + data++; + for( ; op_count; op_count--) { + op = _X_BE_16(data); + data++; + if (op & 0x8000) { + /* Uniq ops */ + count = op & 0x7fff; /* get count */ + while(count--) { + if (data > data_end || rowworkptr > picture_end) + return; + yuv_index = ((row_ptr * this->width) + pixel_ptr_bit); + if( this->is_ham ) { + IFF_REPLACE_SHORT_SIMPLE(&this->index_buf[yuv_index], + rowworkptr, data, bitplainoffeset[palette_index] ); + } else { + IFF_REPLACE_SHORT( &this->index_buf[yuv_index], + &this->yuv_planes.y[yuv_index], &this->yuv_planes.u[yuv_index], + &this->yuv_planes.v[yuv_index], this->yuv_palette, + rowworkptr, data, bitplainoffeset[palette_index] ); + } + data++; + rowworkptr += rowsize_all_planes; + row_ptr++; + } + } else { + if (op == 0) { + /* Same ops */ + count = _X_BE_16(data); + data++; + while(count--) { + if (data > data_end || rowworkptr > picture_end) + return; + yuv_index = ((row_ptr * this->width) + pixel_ptr_bit); + if( this->is_ham ) { + IFF_REPLACE_SHORT_SIMPLE(&this->index_buf[yuv_index], + rowworkptr, data, bitplainoffeset[palette_index] ); + } else { + IFF_REPLACE_SHORT( &this->index_buf[yuv_index], + &this->yuv_planes.y[yuv_index], &this->yuv_planes.u[yuv_index], + &this->yuv_planes.v[yuv_index], this->yuv_palette, + rowworkptr, data, bitplainoffeset[palette_index] ); + } + rowworkptr += rowsize_all_planes; + row_ptr++; + } + data++; + } else { + /* Skip ops */ + rowworkptr += (rowsize_all_planes * op); + row_ptr += op; + } + } + } + } + } + } +} + +/* decoding method 8 long */ +static void bitplane_dlta_8_long (bitplane_decoder_t *this) { + + uint32_t rowsize = this->width / 32; + uint32_t rowsize_all_planes = rowsize * this->num_bitplanes; + + uint32_t yuv_index = 0; + uint32_t delta_offset = 0; + uint32_t palette_index = 0; + uint32_t pixel_ptr = 0; + uint32_t pixel_ptr_bit = 0; + uint32_t row_ptr = 0; + uint32_t *deltadata = (uint32_t *)this->buf; + uint32_t *planeptr = NULL; + uint32_t *rowworkptr = NULL; + uint32_t *picture_end = (uint32_t *)(&this->buf_uk[(rowsize_all_planes * 4 * this->height)]); + uint32_t *data = NULL; + uint32_t *data_end = (uint32_t *)(&this->buf[this->size]); + uint32_t op_count = 0; + uint32_t op = 0; + uint32_t count = 0; + + /* Repeat for each plane */ + for(palette_index = 0; palette_index < this->num_bitplanes; palette_index++) { + + planeptr = (uint32_t *)(&this->buf_uk[(palette_index * rowsize * 4)]); + /* data starts at beginn of delta-Buffer + offset of the first */ + /* 32 Bit long word in the buffer. The buffer starts with 8 */ + /* of this Offset, for every bitplane (max 8) one */ + delta_offset = _X_BE_32(&deltadata[palette_index]); + + if (delta_offset > 0) { + data = (uint32_t *)(&this->buf[delta_offset]); + for( pixel_ptr = 0; pixel_ptr < rowsize; pixel_ptr++) { + rowworkptr = planeptr + pixel_ptr; + pixel_ptr_bit = pixel_ptr * 32; + row_ptr = 0; + /* execute ops */ + op_count = _X_BE_32(data); + data++; + for( ; op_count; op_count--) { + op = _X_BE_32(data); + data++; + if (op & 0x80000000) { + /* Uniq ops */ + count = op & 0x7fffffff; /* get count */ + while(count--) { + if (data <= data_end || rowworkptr <= picture_end) { + yuv_index = ((row_ptr * this->width) + pixel_ptr_bit); + if( this->is_ham ) { + IFF_REPLACE_LONG_SIMPLE(&this->index_buf[yuv_index], + rowworkptr, data, bitplainoffeset[palette_index] ); + } else { + IFF_REPLACE_LONG( &this->index_buf[((row_ptr * this->width) + pixel_ptr_bit)], + &this->yuv_planes.y[yuv_index], &this->yuv_planes.u[yuv_index], + &this->yuv_planes.v[yuv_index], this->yuv_palette, + rowworkptr, data, bitplainoffeset[palette_index] ); + } + } + data++; + rowworkptr += rowsize_all_planes; + row_ptr++; + } + } else { + if (op == 0) { + /* Same ops */ + count = _X_BE_32(data); + data++; + while(count--) { + if (data <= data_end && rowworkptr <= picture_end) { + yuv_index = ((row_ptr * this->width) + pixel_ptr_bit); + if( this->is_ham ) { + IFF_REPLACE_LONG_SIMPLE(&this->index_buf[yuv_index], + rowworkptr, data, bitplainoffeset[palette_index] ); + } else { + IFF_REPLACE_LONG( &this->index_buf[yuv_index], + &this->yuv_planes.y[yuv_index], &this->yuv_planes.u[yuv_index], + &this->yuv_planes.v[yuv_index], this->yuv_palette, + rowworkptr, data, bitplainoffeset[palette_index] ); + } + } + rowworkptr += rowsize_all_planes; + row_ptr++; + } + data++; + } else { + /* Skip ops */ + rowworkptr += (rowsize_all_planes * op); + row_ptr += op; + } + } + } + } + } + } +/* bitplane_decode_bitplane(this->buf_uk, this->index_buf, this->width, this->height, this->num_bitplanes, 1);*/ +} + +static void bitplane_decode_data (video_decoder_t *this_gen, + buf_element_t *buf) { + + bitplane_decoder_t *this = (bitplane_decoder_t *) this_gen; + xine_bmiheader *bih = 0; + palette_entry_t *palette = 0; + AnimHeader *anhd = NULL; + int i = 0; + int j = 0; + int buf_ptr = 0; + unsigned char r = 0; + unsigned char g = 0; + unsigned char b = 0; + uint8_t *buf_exchange = NULL; + + vo_frame_t *img = 0; /* video out frame */ + + /* a video decoder does not care about this flag (?) */ + if (buf->decoder_flags & BUF_FLAG_PREVIEW) + return; + + if ((buf->decoder_flags & BUF_FLAG_SPECIAL) && + (buf->decoder_info[1] == BUF_SPECIAL_PALETTE)) { + palette = (palette_entry_t *)buf->decoder_info_ptr[2]; + + for (i = 0; i < buf->decoder_info[2]; i++) { + this->yuv_palette[i * 4 + 0] = + COMPUTE_Y(palette[i].r, palette[i].g, palette[i].b); + this->yuv_palette[i * 4 + 1] = + COMPUTE_U(palette[i].r, palette[i].g, palette[i].b); + this->yuv_palette[i * 4 + 2] = + COMPUTE_V(palette[i].r, palette[i].g, palette[i].b); + this->rgb_palette[i * 4 + 0] = palette[i].r; + this->rgb_palette[i * 4 + 1] = palette[i].g; + this->rgb_palette[i * 4 + 2] = palette[i].b; + } + + /* EHB Pictures not allways contain all 64 colors, sometimes only */ + /* the first 32 are included and sometimes all 64 colors are provide,*/ + /* but second 32 are only stupid dirt, so recalculate them */ + if (((this->num_bitplanes == 6) && + (buf->decoder_info[2] == 32)) || + (this->camg_mode & CAMG_EHB)) { + for (i = 32; i < 64; i++) { + this->rgb_palette[i * 4 + 0] = palette[(i-32)].r / 2; + this->rgb_palette[i * 4 + 1] = palette[(i-32)].g / 2; + this->rgb_palette[i * 4 + 2] = palette[(i-32)].b / 2; + this->yuv_palette[i * 4 + 0] = + COMPUTE_Y(this->rgb_palette[i*4+0], this->rgb_palette[i*4+1], this->rgb_palette[i*4+2]); + this->yuv_palette[i * 4 + 1] = + COMPUTE_U(this->rgb_palette[i*4+0], this->rgb_palette[i*4+1], this->rgb_palette[i*4+2]); + this->yuv_palette[i * 4 + 2] = + COMPUTE_V(this->rgb_palette[i*4+0], this->rgb_palette[i*4+1], this->rgb_palette[i*4+2]); + } + } + + return; + } + + if (buf->decoder_flags & BUF_FLAG_STDHEADER) { /* need to initialize */ + (this->stream->video_out->open) (this->stream->video_out, this->stream); + + bih = (xine_bmiheader *) buf->content; + this->width = (bih->biWidth + 15) & ~0x0f; + this->height = bih->biHeight; + this->num_pixel = this->width * this->height; + this->ratio = (double)this->width/(double)this->height; + this->video_step = buf->decoder_info[1]; + /* Palette based Formates use up to 8 Bit per pixel, always use 8 Bit if less */ + this->bytes_per_pixel = (bih->biBitCount + 1) / 8; + if( this->bytes_per_pixel < 1 ) + this->bytes_per_pixel = 1; + + /* New Buffer for indexes (palette based formats) */ + this->index_buf = calloc( this->num_pixel, this->bytes_per_pixel ); + this->index_buf_hist = calloc( this->num_pixel, this->bytes_per_pixel ); + + this->num_bitplanes = bih->biPlanes; + this->camg_mode = bih->biCompression; + if( this->camg_mode & CAMG_HAM ) + this->is_ham = 1; + else + this->is_ham = 0; + + if( buf->decoder_info[2] != buf->decoder_info[3] && + buf->decoder_info[3] > 0 ) { + this->ratio *= buf->decoder_info[2]; + this->ratio /= buf->decoder_info[3]; + } + + if( (bih->biCompression & CAMG_HIRES) && + !(bih->biCompression & CAMG_LACE) ) { + if( (buf->decoder_info[2] * 16) > (buf->decoder_info[3] * 10) ) + this->ratio /= 2.0; + } + + if( !(bih->biCompression & CAMG_HIRES) && + (bih->biCompression & CAMG_LACE) ) { + if( (buf->decoder_info[2] * 10) < (buf->decoder_info[3] * 16) ) + this->ratio *= 2.0; + } + + free (this->buf); + this->bufsize = VIDEOBUFSIZE; + this->buf = calloc(1, this->bufsize); + this->size = 0; + this->framenumber = 0; + + init_yuv_planes(&this->yuv_planes, this->width, this->height); + init_yuv_planes(&this->yuv_planes_hist, this->width, this->height); + + (this->stream->video_out->open) (this->stream->video_out, this->stream); + this->decoder_ok = 1; + + /* load the stream/meta info */ + switch( buf->type ) { + case BUF_VIDEO_BITPLANE: + _x_meta_info_set_utf8(this->stream, XINE_META_INFO_VIDEOCODEC, "Uncompressed bitplane"); + break; + case BUF_VIDEO_BITPLANE_BR1: + _x_meta_info_set_utf8(this->stream, XINE_META_INFO_VIDEOCODEC, "ByteRun1 bitplane"); + break; + default: + _x_meta_info_set_utf8(this->stream, XINE_META_INFO_VIDEOCODEC, "Unknown bitplane"); + break; + } + + return; + } else if (this->decoder_ok) { + + this->skipframes = 0; + this->framenumber++; + if (this->size + buf->size > this->bufsize) { + this->bufsize = this->size + 2 * buf->size; + this->buf = realloc (this->buf, this->bufsize); + } + + xine_fast_memcpy (&this->buf[this->size], buf->content, buf->size); + + this->size += buf->size; + + if (buf->decoder_flags & BUF_FLAG_FRAMERATE) + this->video_step = buf->decoder_info[0]; + + if (buf->decoder_flags & BUF_FLAG_FRAME_END) { + + img = this->stream->video_out->get_frame (this->stream->video_out, + this->width, this->height, + this->ratio, XINE_IMGFMT_YUY2, + VO_BOTH_FIELDS); + + img->duration = this->video_step; + img->pts = buf->pts; + img->bad_frame = 0; + anhd = (AnimHeader *)(buf->decoder_info_ptr[0]); + + if( (this->buf_uk == NULL) || + (anhd == NULL) || + (anhd->operation == IFF_ANHD_ILBM) ) { + + /* iterate through each row */ + buf_ptr = 0; + this->size_uk = (((this->num_pixel) / 8) * this->num_bitplanes); + + if( this->buf_uk_hist != NULL ) + xine_fast_memcpy (this->buf_uk_hist, this->buf_uk, this->size_uk); + switch( buf->type ) { + case BUF_VIDEO_BITPLANE: + /* uncompressed Buffer, set decoded_buf pointer direct to input stream */ + if( this->buf_uk == NULL ) + this->buf_uk = malloc(this->size); + xine_fast_memcpy (this->buf_uk, this->buf, this->size); + break; + case BUF_VIDEO_BITPLANE_BR1: + /* create Buffer for decompressed bitmap */ + this->buf_uk = bitplane_decode_byterun1( + this->buf, /* compressed buffer */ + this->size, /* size of compressed data */ + this->size_uk ); /* size of uncompressed data */ + + if( this->buf_uk == NULL ) { + xine_log(this->stream->xine, XINE_LOG_MSG, + _("bitplane: error doing ByteRun1 decompression\n")); + _x_stream_info_set(this->stream, XINE_STREAM_INFO_VIDEO_HANDLED, 0); + return; + } + /* set pointer to decompressed Buffer */ + break; + default: + break; + } + bitplane_decode_bitplane( this->buf_uk, /* bitplane buffer */ + this->index_buf, /* index buffer */ + this->width, /* width */ + this->height, /* hight */ + this->num_bitplanes, /* number bitplanes */ + this->bytes_per_pixel); /* used Bytes per pixel */ + + if ((this->bytes_per_pixel == 1) && + (this->is_ham == 0) ) { + buf_exchange = this->index_buf; + for (i = 0; i < (this->height * this->width); i++) { + j = *buf_exchange++ * 4; + this->yuv_planes.y[i] = this->yuv_palette[j++]; + this->yuv_planes.u[i] = this->yuv_palette[j++]; + this->yuv_planes.v[i] = this->yuv_palette[j]; + } + } + if( this->buf_uk_hist == NULL ) { + this->buf_uk_hist = malloc(this->size_uk); + xine_fast_memcpy (this->buf_uk_hist, this->buf_uk, this->size_uk); + xine_fast_memcpy (this->index_buf_hist, this->index_buf, + (this->num_pixel * this->bytes_per_pixel)); + xine_fast_memcpy (this->yuv_planes_hist.y, this->yuv_planes.y, (this->num_pixel)); + xine_fast_memcpy (this->yuv_planes_hist.u, this->yuv_planes.u, (this->num_pixel)); + xine_fast_memcpy (this->yuv_planes_hist.v, this->yuv_planes.v, (this->num_pixel)); + } + } else { + /* when no start-picture is given, create a empty one */ + if( this->buf_uk_hist == NULL ) { + this->size_uk = (((this->num_pixel) / 8) * this->num_bitplanes); + this->buf_uk = calloc(this->num_bitplanes, ((this->num_pixel) / 8)); + this->buf_uk_hist = calloc(this->num_bitplanes, ((this->num_pixel) / 8)); + } + if( this->index_buf == NULL ) { + this->index_buf = calloc( this->num_pixel, this->bytes_per_pixel ); + this->index_buf_hist = calloc( this->num_pixel, this->bytes_per_pixel ); + } + + switch( anhd->operation ) { + /* also known as IFF-ANIM OPT1 (never seen in real world) */ + case IFF_ANHD_XOR: + xine_log(this->stream->xine, XINE_LOG_MSG, + _("bitplane: Anim Opt 1 is not supported at the moment\n")); + _x_stream_info_set(this->stream, XINE_STREAM_INFO_VIDEO_HANDLED, 0); + return; + break; + /* also known as IFF-ANIM OPT2 (never seen in real world) */ + case IFF_ANHD_LDELTA: + xine_log(this->stream->xine, XINE_LOG_MSG, + _("bitplane: Anim Opt 2 is not supported at the moment\n")); + _x_stream_info_set(this->stream, XINE_STREAM_INFO_VIDEO_HANDLED, 0); + return; + break; + /* also known as IFF-ANIM OPT3 */ + case IFF_ANHD_SDELTA: + _x_meta_info_set_utf8(this->stream, XINE_META_INFO_VIDEOCODEC, "Anim OPT3"); + bitplane_sdelta_opt_3 ( this ); + return; + break; + /* also known as IFF-ANIM OPT4 (never seen in real world) */ + case IFF_ANHD_SLDELTA: + _x_meta_info_set_utf8(this->stream, XINE_META_INFO_VIDEOCODEC, "Anim OPT4 (SLDELTA)"); + bitplane_set_dlta_short ( this ); + break; + /* also known as IFF-ANIM OPT5 */ + case IFF_ANHD_BVDELTA: + _x_meta_info_set_utf8(this->stream, XINE_META_INFO_VIDEOCODEC, "Anim OPT5 (BVDELTA)"); + bitplane_dlta_5(this); + break; + /* IFF-ANIM OPT6 is exactly the same as OPT5, but for stereo-displays */ + /* first picture is on the left display, second on the right, third on */ + /* the left, forth on right, ... Only display left picture on mono display*/ + case IFF_ANHD_STEREOO5: + _x_meta_info_set_utf8(this->stream, XINE_META_INFO_VIDEOCODEC, "Anim OPT6 (BVDELTA STEREO)"); + bitplane_dlta_5(this); + if( this->framenumber % 2 == 0 ) + this->skipframes = 1; + return; + break; + case IFF_ANHD_OPT7: + if(anhd->bits == 0) { + _x_meta_info_set_utf8(this->stream, XINE_META_INFO_VIDEOCODEC, "Anim OPT7 (SHORT)"); + bitplane_dlta_7_short(this); + } else { + _x_meta_info_set_utf8(this->stream, XINE_META_INFO_VIDEOCODEC, "Anim OPT7 (LONG)"); + bitplane_dlta_7_long(this); + } + break; + case IFF_ANHD_OPT8: + if(anhd->bits == 0) { + _x_meta_info_set_utf8(this->stream, XINE_META_INFO_VIDEOCODEC, "Anim OPT8 (SHORT)"); + bitplane_dlta_8_short(this); + } else { + _x_meta_info_set_utf8(this->stream, XINE_META_INFO_VIDEOCODEC, "Anim OPT8 (LONG)"); + bitplane_dlta_8_long(this); + } + break; + case IFF_ANHD_ASCIIJ: + xine_log(this->stream->xine, XINE_LOG_MSG, + _("bitplane: Anim ASCIIJ is not supported at the moment\n")); + _x_stream_info_set(this->stream, XINE_STREAM_INFO_VIDEO_HANDLED, 0); + return; + break; + default: + xine_log(this->stream->xine, XINE_LOG_MSG, + _("bitplane: This anim-type is not supported at the moment\n")); + _x_stream_info_set(this->stream, XINE_STREAM_INFO_VIDEO_HANDLED, 0); + return; + break; + } + /* change old bitmap buffer (which now is the new one) with new buffer */ + buf_exchange = this->buf_uk; + this->buf_uk = this->buf_uk_hist; + this->buf_uk_hist = buf_exchange; + /* do the same with the index buffer */ + buf_exchange = this->index_buf; + this->index_buf = this->index_buf_hist; + this->index_buf_hist = buf_exchange; + /* and also with yuv buffer */ + buf_exchange = this->yuv_planes.y; + this->yuv_planes.y = this->yuv_planes_hist.y; + this->yuv_planes_hist.y = buf_exchange; + buf_exchange = this->yuv_planes.u; + this->yuv_planes.u = this->yuv_planes_hist.u; + this->yuv_planes_hist.u = buf_exchange; + buf_exchange = this->yuv_planes.v; + this->yuv_planes.v = this->yuv_planes_hist.v; + this->yuv_planes_hist.v = buf_exchange; + } + + if( this->skipframes == 0 ) { + switch (this->bytes_per_pixel) { + case 1: + /* HAM-pictrues need special handling */ + if( this->is_ham ) { + /* Decode HAM-Pictures to YUV */ + bitplane_decode_ham( this->index_buf, /* HAM-bitplane buffer */ + &(this->yuv_planes), /* YUV buffer */ + this->width, /* width */ + this->height, /* hight */ + this->num_bitplanes, /* number bitplanes */ + this->bytes_per_pixel, /* used Bytes per pixel */ + this->rgb_palette); /* Palette (RGB) */ + } + break; + case 3: + buf_exchange = this->index_buf; + for (i = 0; i < (this->height * this->width); i++) { + r = *buf_exchange++; + g = *buf_exchange++; + b = *buf_exchange++; + + this->yuv_planes.y[i] = COMPUTE_Y(r, g, b); + this->yuv_planes.u[i] = COMPUTE_U(r, g, b); + this->yuv_planes.v[i] = COMPUTE_V(r, g, b); + } + break; + default: + break; + } + + yuv444_to_yuy2(&this->yuv_planes, img->base[0], img->pitches[0]); + + img->draw(img, this->stream); + } + img->free(img); + + this->size = 0; + if ( buf->decoder_info[1] > 90000 ) + xine_usec_sleep(buf->decoder_info[1]); + } + } +} + +/* + * This function is called when xine needs to flush the system. Not + * sure when or if this is used or even if it needs to do anything. + */ +static void bitplane_flush (video_decoder_t *this_gen) { +} + +/* + * This function resets the video decoder. + */ +static void bitplane_reset (video_decoder_t *this_gen) { + bitplane_decoder_t *this = (bitplane_decoder_t *) this_gen; + + this->size = 0; +} + +static void bitplane_discontinuity (video_decoder_t *this_gen) { +} + +/* + * This function frees the video decoder instance allocated to the decoder. + */ +static void bitplane_dispose (video_decoder_t *this_gen) { + bitplane_decoder_t *this = (bitplane_decoder_t *) this_gen; + + free (this->buf); + free (this->buf_uk); + free (this->buf_uk_hist); + free (this->index_buf); + free (this->index_buf_hist); + free (this->index_buf); + + if (this->decoder_ok) { + this->decoder_ok = 0; + this->stream->video_out->close(this->stream->video_out, this->stream); + } + + free (this_gen); +} + +static video_decoder_t *open_plugin (video_decoder_class_t *class_gen, xine_stream_t *stream) { + + bitplane_decoder_t *this = (bitplane_decoder_t *) calloc(1, sizeof(bitplane_decoder_t)); + + this->video_decoder.decode_data = bitplane_decode_data; + this->video_decoder.flush = bitplane_flush; + this->video_decoder.reset = bitplane_reset; + this->video_decoder.discontinuity = bitplane_discontinuity; + this->video_decoder.dispose = bitplane_dispose; + this->size = 0; + + this->stream = stream; + this->class = (bitplane_class_t *) class_gen; + + this->decoder_ok = 0; + this->buf = NULL; + this->buf_uk = NULL; + this->index_buf = NULL; + this->index_buf = NULL; + + return &this->video_decoder; +} + +static void *init_plugin (xine_t *xine, void *data) { + + bitplane_class_t *this = (bitplane_class_t *) calloc(1, sizeof(bitplane_class_t)); + + this->decoder_class.open_plugin = open_plugin; + this->decoder_class.identifier = "bitplane"; + this->decoder_class.description = N_("Raw bitplane video decoder plugin"); + this->decoder_class.dispose = default_video_decoder_class_dispose; + + return this; +} + +/* + * exported plugin catalog entry + */ + +static const uint32_t video_types[] = { + BUF_VIDEO_BITPLANE, + BUF_VIDEO_BITPLANE_BR1, + 0 +}; + +static const decoder_info_t dec_info_video = { + video_types, /* supported types */ + 1 /* priority */ +}; + +const plugin_info_t xine_plugin_info[] EXPORTED = { + /* type, API, "name", version, special_info, init_function */ + { PLUGIN_VIDEO_DECODER, 19, "bitplane", XINE_VERSION_CODE, &dec_info_video, init_plugin }, + { PLUGIN_NONE, 0, "", 0, NULL, NULL } +}; diff --git a/src/video_dec/foovideo.c b/src/video_dec/foovideo.c new file mode 100644 index 000000000..99ec1287b --- /dev/null +++ b/src/video_dec/foovideo.c @@ -0,0 +1,285 @@ +/* + * Copyright (C) 2000-2003 the xine project + * + * This file is part of xine, a free video player. + * + * xine is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * xine is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110, USA + * + * foovideo.c: This is a reference video decoder for the xine multimedia + * player. It really works too! It will output frames of packed YUY2 data + * where each byte in the map is the same value, which is 3 larger than the + * value from the last frame. This creates a slowly rotating solid color + * frame when the frames are played in succession. + */ + +#ifdef HAVE_CONFIG_H +#include "config.h" +#endif + +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <sys/types.h> +#include <unistd.h> + +#include <xine/xine_internal.h> +#include <xine/video_out.h> +#include <xine/buffer.h> +#include <xine/xineutils.h> +#include "bswap.h" + +#define VIDEOBUFSIZE 128*1024 + +typedef struct { + video_decoder_class_t decoder_class; +} foovideo_class_t; + +typedef struct foovideo_decoder_s { + video_decoder_t video_decoder; /* parent video decoder structure */ + + foovideo_class_t *class; + xine_stream_t *stream; + + /* these are traditional variables in a video decoder object */ + uint64_t video_step; /* frame duration in pts units */ + int decoder_ok; /* current decoder status */ + int skipframes; + + unsigned char *buf; /* the accumulated buffer data */ + int bufsize; /* the maximum size of buf */ + int size; /* the current size of buf */ + + int width; /* the width of a video frame */ + int height; /* the height of a video frame */ + double ratio; /* the width to height ratio */ + + /* these are variables exclusive to the foo video decoder */ + unsigned char current_yuv_byte; + +} foovideo_decoder_t; + +/************************************************************************** + * foovideo specific decode functions + *************************************************************************/ + +/************************************************************************** + * xine video plugin functions + *************************************************************************/ + +/* + * This function receives a buffer of data from the demuxer layer and + * figures out how to handle it based on its header flags. + */ +static void foovideo_decode_data (video_decoder_t *this_gen, + buf_element_t *buf) { + + foovideo_decoder_t *this = (foovideo_decoder_t *) this_gen; + xine_bmiheader *bih; + + vo_frame_t *img; /* video out frame */ + + /* a video decoder does not care about this flag (?) */ + if (buf->decoder_flags & BUF_FLAG_PREVIEW) + return; + + if (buf->decoder_flags & BUF_FLAG_FRAMERATE) { + this->video_step = buf->decoder_info[0]; + _x_stream_info_set(this->stream, XINE_STREAM_INFO_FRAME_DURATION, this->video_step); + } + + if (buf->decoder_flags & BUF_FLAG_STDHEADER) { /* need to initialize */ + (this->stream->video_out->open) (this->stream->video_out, this->stream); + + free(this->buf); + + bih = (xine_bmiheader *) buf->content; + this->width = bih->biWidth; + this->height = bih->biHeight; + this->ratio = (double)this->width/(double)this->height; + + if (this->buf) + free (this->buf); + this->bufsize = VIDEOBUFSIZE; + this->buf = malloc(this->bufsize); + this->size = 0; + + /* take this opportunity to load the stream/meta info */ + _x_meta_info_set_utf8(this->stream, XINE_META_INFO_VIDEOCODEC, "foovideo"); + + /* do anything else relating to initializing this decoder */ + this->current_yuv_byte = 0; + + this->decoder_ok = 1; + + return; + } else if (this->decoder_ok) { + + if (this->size + buf->size > this->bufsize) { + this->bufsize = this->size + 2 * buf->size; + this->buf = realloc (this->buf, this->bufsize); + } + + xine_fast_memcpy (&this->buf[this->size], buf->content, buf->size); + + this->size += buf->size; + + if (buf->decoder_flags & BUF_FLAG_FRAME_END) { + + img = this->stream->video_out->get_frame (this->stream->video_out, + this->width, this->height, + this->ratio, + XINE_IMGFMT_YUY2, VO_BOTH_FIELDS); + + img->duration = this->video_step; + img->pts = buf->pts; + img->bad_frame = 0; + + memset(img->base[0], this->current_yuv_byte, + this->width * this->height * 2); + this->current_yuv_byte += 3; + + img->draw(img, this->stream); + img->free(img); + + this->size = 0; + } + } +} + +/* + * This function is called when xine needs to flush the system. + */ +static void foovideo_flush (video_decoder_t *this_gen) { +} + +/* + * This function resets the video decoder. + */ +static void foovideo_reset (video_decoder_t *this_gen) { + foovideo_decoder_t *this = (foovideo_decoder_t *) this_gen; + + this->size = 0; +} + +/* + * The decoder should forget any stored pts values here. + */ +static void foovideo_discontinuity (video_decoder_t *this_gen) { + foovideo_decoder_t *this = (foovideo_decoder_t *) this_gen; + +} + +/* + * This function frees the video decoder instance allocated to the decoder. + */ +static void foovideo_dispose (video_decoder_t *this_gen) { + + foovideo_decoder_t *this = (foovideo_decoder_t *) this_gen; + + free (this->buf); + + if (this->decoder_ok) { + this->decoder_ok = 0; + this->stream->video_out->close(this->stream->video_out, this->stream); + } + + free (this_gen); +} + +/* + * This function allocates, initializes, and returns a private video + * decoder structure. + */ +static video_decoder_t *open_plugin (video_decoder_class_t *class_gen, xine_stream_t *stream) { + + foovideo_decoder_t *this ; + + this = (foovideo_decoder_t *) calloc(1, sizeof(foovideo_decoder_t)); + + this->video_decoder.decode_data = foovideo_decode_data; + this->video_decoder.flush = foovideo_flush; + this->video_decoder.reset = foovideo_reset; + this->video_decoder.discontinuity = foovideo_discontinuity; + this->video_decoder.dispose = foovideo_dispose; + this->size = 0; + + this->stream = stream; + this->class = (foovideo_class_t *) class_gen; + + this->decoder_ok = 0; + this->buf = NULL; + + return &this->video_decoder; +} + +/* + * This function frees the video decoder class and any other memory that was + * allocated. + */ +static void dispose_class (video_decoder_class_t *this) { + free (this); +} + +/* + * This function allocates a private video decoder class and initializes + * the class's member functions. + */ +static void *init_plugin (xine_t *xine, void *data) { + + foovideo_class_t *this; + + this = (foovideo_class_t *) calloc(1, sizeof(foovideo_class_t)); + + this->decoder_class.open_plugin = open_plugin; + this->decoder_class.identifier = "foovideo"; + this->decoder_class.description = N_("foovideo: reference xine video decoder plugin"); + this->decoder_class.dispose = dispose_class; + + return this; +} + +/* + * This is a list of all of the internal xine video buffer types that + * this decoder is able to handle. Check src/xine-engine/buffer.h for a + * list of valid buffer types (and add a new one if the one you need does + * not exist). Terminate the list with a 0. + */ +static const uint32_t video_types[] = { + /* BUF_VIDEO_FOOVIDEO, */ + BUF_VIDEO_VQA, + BUF_VIDEO_SORENSON_V3, + 0 +}; + +/* + * This data structure combines the list of supported xine buffer types and + * the priority that the plugin should be given with respect to other + * plugins that handle the same buffer type. A plugin with priority (n+1) + * will be used instead of a plugin with priority (n). + */ +static const decoder_info_t dec_info_video = { + video_types, /* supported types */ + 5 /* priority */ +}; + +/* + * The plugin catalog entry. This is the only information that this plugin + * will export to the public. + */ +const plugin_info_t xine_plugin_info[] EXPORTED = { + /* { type, API, "name", version, special_info, init_function } */ + { PLUGIN_VIDEO_DECODER, 19, "foovideo", XINE_VERSION_CODE, &dec_info_video, init_plugin }, + { PLUGIN_NONE, 0, "", 0, NULL, NULL } +}; diff --git a/src/video_dec/gdkpixbuf.c b/src/video_dec/gdkpixbuf.c new file mode 100644 index 000000000..ed88323fb --- /dev/null +++ b/src/video_dec/gdkpixbuf.c @@ -0,0 +1,299 @@ +/* + * Copyright (C) 2006 the xine project + * + * This file is part of xine, a free video player. + * + * xine is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * xine is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110, USA + * + * a gdk-pixbuf-based image video decoder + */ + +#ifdef HAVE_CONFIG_H +#include "config.h" +#endif + + + +#include <stdlib.h> +#include <string.h> +#include <sys/types.h> +#include <sys/stat.h> +#include <fcntl.h> +#include <unistd.h> +#include <stdio.h> + +#define LOG_MODULE "gdkpixbuf_video_decoder" +#define LOG_VERBOSE +/* +#define LOG +*/ + +#include <xine/xine_internal.h> +#include <xine/video_out.h> +#include <xine/buffer.h> +#include <xine/xineutils.h> +#include "bswap.h" + +#include <gdk-pixbuf/gdk-pixbuf.h> + +typedef struct { + video_decoder_class_t decoder_class; + + /* + * private variables + */ + +} image_class_t; + + +typedef struct image_decoder_s { + video_decoder_t video_decoder; + + image_class_t *cls; + + xine_stream_t *stream; + int video_open; + + GdkPixbufLoader *loader; + +} image_decoder_t; + + +static void image_decode_data (video_decoder_t *this_gen, buf_element_t *buf) { + image_decoder_t *this = (image_decoder_t *) this_gen; + GError *error = NULL; + + if (!this->video_open) { + lprintf("opening video\n"); + (this->stream->video_out->open) (this->stream->video_out, this->stream); + this->video_open = 1; + } + + if (this->loader == NULL) { + this->loader = gdk_pixbuf_loader_new (); + } + + if (gdk_pixbuf_loader_write (this->loader, buf->mem, buf->size, &error) == FALSE) { + lprintf("error loading image: %s\n", error->message); + g_error_free (error); + gdk_pixbuf_loader_close (this->loader, NULL); + g_object_unref (G_OBJECT (this->loader)); + this->loader = NULL; + return; + } + + if (buf->decoder_flags & BUF_FLAG_FRAME_END) { + GdkPixbuf *pixbuf; + int width, height, x, y, rowstride, n_channels, i; + guchar *img_buf; + yuv_planes_t yuv_planes; + vo_frame_t *img; + + /* + * this->image -> rgb data + */ + if (gdk_pixbuf_loader_close (this->loader, &error) == FALSE) { + lprintf("error loading image: %s\n", error->message); + g_error_free (error); + g_object_unref (G_OBJECT (this->loader)); + this->loader = NULL; + return; + } + + pixbuf = gdk_pixbuf_loader_get_pixbuf (this->loader); + if (pixbuf != NULL) + g_object_ref (G_OBJECT (pixbuf)); + g_object_unref (this->loader); + this->loader = NULL; + + if (pixbuf == NULL) { + lprintf("error loading image\n"); + return; + } + + width = gdk_pixbuf_get_width (pixbuf) & ~1; /* must be even for init_yuv_planes */ + height = gdk_pixbuf_get_height (pixbuf); + img_buf = gdk_pixbuf_get_pixels (pixbuf); + + _x_stream_info_set(this->stream, XINE_STREAM_INFO_VIDEO_WIDTH, width); + _x_stream_info_set(this->stream, XINE_STREAM_INFO_VIDEO_HEIGHT, height); + + lprintf("image loaded successfully\n"); + + /* + * rgb data -> yuv_planes + */ + init_yuv_planes(&yuv_planes, width, height); + + n_channels = gdk_pixbuf_get_n_channels (pixbuf); + rowstride = gdk_pixbuf_get_rowstride (pixbuf); + i = 0; + for (y = 0; y < height; y++) { + for (x = 0; x < width; x++) { + guchar *p; + p = img_buf + y * rowstride + x * n_channels; + + yuv_planes.y[i] = COMPUTE_Y (p[0], p[1], p[2]); + yuv_planes.u[i] = COMPUTE_U (p[0], p[1], p[2]); + yuv_planes.v[i] = COMPUTE_V (p[0], p[1], p[2]); + + i++; + } + } + gdk_pixbuf_unref (pixbuf); + + /* + * alloc and draw video frame + */ + img = this->stream->video_out->get_frame (this->stream->video_out, width, + height, (double)width/(double)height, + XINE_IMGFMT_YUY2, + VO_BOTH_FIELDS); + img->pts = buf->pts; + img->duration = 3600; + img->bad_frame = 0; + + yuv444_to_yuy2(&yuv_planes, img->base[0], img->pitches[0]); + free_yuv_planes(&yuv_planes); + + _x_stream_info_set(this->stream, XINE_STREAM_INFO_FRAME_DURATION, img->duration); + + img->draw(img, this->stream); + img->free(img); + } +} + + +static void image_flush (video_decoder_t *this_gen) { + /* image_decoder_t *this = (image_decoder_t *) this_gen; */ + + /* + * flush out any frames that are still stored in the decoder + */ +} + + +static void image_reset (video_decoder_t *this_gen) { + image_decoder_t *this = (image_decoder_t *) this_gen; + + /* + * reset decoder after engine flush (prepare for new + * video data not related to recently decoded data) + */ + + if (this->loader != NULL) { + gdk_pixbuf_loader_close (this->loader, NULL); + g_object_unref (G_OBJECT (this->loader)); + this->loader = NULL; + } +} + + +static void image_discontinuity (video_decoder_t *this_gen) { + /* image_decoder_t *this = (image_decoder_t *) this_gen; */ + + /* + * a time reference discontinuity has happened. + * that is, it must forget any currently held pts value + */ +} + +static void image_dispose (video_decoder_t *this_gen) { + image_decoder_t *this = (image_decoder_t *) this_gen; + + if (this->video_open) { + lprintf("closing video\n"); + + this->stream->video_out->close(this->stream->video_out, this->stream); + this->video_open = 0; + } + + if (this->loader != NULL) { + gdk_pixbuf_loader_close (this->loader, NULL); + g_object_unref (G_OBJECT (this->loader)); + this->loader = NULL; + } + + lprintf("closed\n"); + free (this); +} + + +static video_decoder_t *open_plugin (video_decoder_class_t *class_gen, + xine_stream_t *stream) { + + image_class_t *cls = (image_class_t *) class_gen; + image_decoder_t *this; + + lprintf("opened\n"); + + g_type_init (); + + this = (image_decoder_t *) calloc(1, sizeof(image_decoder_t)); + + this->video_decoder.decode_data = image_decode_data; + this->video_decoder.flush = image_flush; + this->video_decoder.reset = image_reset; + this->video_decoder.discontinuity = image_discontinuity; + this->video_decoder.dispose = image_dispose; + this->cls = cls; + this->stream = stream; + + /* + * initialisation of privates + */ + + return &this->video_decoder; +} + +/* + * image plugin class + */ +static void *init_class (xine_t *xine, void *data) { + + image_class_t *this; + + this = (image_class_t *) calloc(1, sizeof(image_class_t)); + + this->decoder_class.open_plugin = open_plugin; + this->decoder_class.identifier = "gdkpixbuf"; + this->decoder_class.description = N_("gdk-pixbuf image video decoder plugin"); + this->decoder_class.dispose = default_video_decoder_class_dispose; + + /* + * initialisation of privates + */ + + lprintf("class opened\n"); + + return this; +} + +/* + * exported plugin catalog entry + */ + +static const uint32_t supported_types[] = { BUF_VIDEO_IMAGE, BUF_VIDEO_JPEG, 0 }; + +static const decoder_info_t dec_info_image = { + supported_types, /* supported types */ + 7 /* priority */ +}; + +const plugin_info_t xine_plugin_info[] EXPORTED = { + /* type, API, "name", version, special_info, init_function */ + { PLUGIN_VIDEO_DECODER, 19, "gdkpixbuf", XINE_VERSION_CODE, &dec_info_image, init_class }, + { PLUGIN_NONE, 0, "", 0, NULL, NULL } +}; diff --git a/src/video_dec/image.c b/src/video_dec/image.c new file mode 100644 index 000000000..e91588702 --- /dev/null +++ b/src/video_dec/image.c @@ -0,0 +1,306 @@ +/* + * Copyright (C) 2003-2005 the xine project + * + * This file is part of xine, a free video player. + * + * xine is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * xine is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110, USA + * + * a image video decoder + */ + +#ifdef HAVE_CONFIG_H +#include "config.h" +#endif + + +#include <stdlib.h> +#include <string.h> +#include <sys/types.h> +#include <sys/stat.h> +#include <fcntl.h> +#include <unistd.h> +#include <stdio.h> + +#define LOG_MODULE "image_video_decoder" +#define LOG_VERBOSE +/* +#define LOG +*/ + +#include <wand/magick_wand.h> +#ifdef PACKAGE_NAME +#undef PACKAGE_BUGREPORT +#undef PACKAGE_NAME +#undef PACKAGE_STRING +#undef PACKAGE_TARNAME +#undef PACKAGE_VERSION +#endif + +#include <xine/xine_internal.h> +#include <xine/video_out.h> +#include <xine/buffer.h> +#include <xine/xineutils.h> +#include "bswap.h" + +#ifdef HAVE_GRAPHICSMAGICK +# define MAGICK_VERSION 0x670 +#else +# if !defined(MagickLibVersion) || MagickLibVersion < 0x671 +# define MAGICK_VERSION 0x670 +#else +# define MAGICK_VERSION MagickLibVersion +# endif +#endif + + +typedef struct { + video_decoder_class_t decoder_class; + + /* + * private variables + */ + +} image_class_t; + + +typedef struct image_decoder_s { + video_decoder_t video_decoder; + + image_class_t *cls; + + xine_stream_t *stream; + int video_open; + + unsigned char *image; + int index; + +} image_decoder_t; + + +static void image_decode_data (video_decoder_t *this_gen, buf_element_t *buf) { + image_decoder_t *this = (image_decoder_t *) this_gen; + + if (!this->video_open) { + lprintf("opening video\n"); + (this->stream->video_out->open) (this->stream->video_out, this->stream); + this->video_open = 1; + } + + xine_buffer_copyin(this->image, this->index, buf->mem, buf->size); + this->index += buf->size; + + if (buf->decoder_flags & BUF_FLAG_FRAME_END) { + int width, height, i; + int status; + MagickWand *wand; + uint8_t *img_buf, *img_buf_ptr; + yuv_planes_t yuv_planes; + vo_frame_t *img; + + /* + * this->image -> rgb data + */ +#if MAGICK_VERSION < 0x671 + InitializeMagick(NULL); +#else + MagickWandGenesis(); +#endif + wand = NewMagickWand(); + status = MagickReadImageBlob(wand, this->image, this->index); + + this->index = 0; + + if (!status) { + DestroyMagickWand(wand); +#if MAGICK_VERSION < 0x671 + DestroyMagick(); +#else + MagickWandTerminus(); +#endif + lprintf("error loading image\n"); + return; + } + + width = MagickGetImageWidth(wand) & ~1; /* must be even for init_yuv_planes */ + height = MagickGetImageHeight(wand); + img_buf = malloc(width * height * 3); +#if MAGICK_VERSION < 0x671 + MagickGetImagePixels(wand, 0, 0, width, height, "RGB", CharPixel, img_buf); + DestroyMagickWand(wand); + DestroyMagick(); +#else + MagickExportImagePixels(wand, 0, 0, width, height, "RGB", CharPixel, img_buf); + DestroyMagickWand(wand); + MagickWandTerminus(); +#endif + + _x_stream_info_set(this->stream, XINE_STREAM_INFO_VIDEO_WIDTH, width); + _x_stream_info_set(this->stream, XINE_STREAM_INFO_VIDEO_HEIGHT, height); + + lprintf("image loaded successfully\n"); + + /* + * rgb data -> yuv_planes + */ + init_yuv_planes(&yuv_planes, width, height); + + img_buf_ptr = img_buf; + for (i=0; i < width*height; i++) { + uint8_t r = *(img_buf_ptr++); + uint8_t g = *(img_buf_ptr++); + uint8_t b = *(img_buf_ptr++); + + yuv_planes.y[i] = COMPUTE_Y(r, g, b); + yuv_planes.u[i] = COMPUTE_U(r, g, b); + yuv_planes.v[i] = COMPUTE_V(r, g, b); + } + free(img_buf); + + /* + * alloc and draw video frame + */ + img = this->stream->video_out->get_frame (this->stream->video_out, width, + height, (double)width/(double)height, + XINE_IMGFMT_YUY2, + VO_BOTH_FIELDS); + img->pts = buf->pts; + img->duration = 3600; + img->bad_frame = 0; + + yuv444_to_yuy2(&yuv_planes, img->base[0], img->pitches[0]); + free_yuv_planes(&yuv_planes); + + _x_stream_info_set(this->stream, XINE_STREAM_INFO_FRAME_DURATION, img->duration); + + img->draw(img, this->stream); + img->free(img); + } +} + + +static void image_flush (video_decoder_t *this_gen) { + /* image_decoder_t *this = (image_decoder_t *) this_gen; */ + + /* + * flush out any frames that are still stored in the decoder + */ +} + + +static void image_reset (video_decoder_t *this_gen) { + image_decoder_t *this = (image_decoder_t *) this_gen; + + /* + * reset decoder after engine flush (prepare for new + * video data not related to recently decoded data) + */ + + this->index = 0; +} + + +static void image_discontinuity (video_decoder_t *this_gen) { + /* image_decoder_t *this = (image_decoder_t *) this_gen; */ + + /* + * a time reference discontinuity has happened. + * that is, it must forget any currently held pts value + */ +} + +static void image_dispose (video_decoder_t *this_gen) { + image_decoder_t *this = (image_decoder_t *) this_gen; + + if (this->video_open) { + lprintf("closing video\n"); + + this->stream->video_out->close(this->stream->video_out, this->stream); + this->video_open = 0; + } + + xine_buffer_free(this->image); + + lprintf("closed\n"); + free (this); +} + + +static video_decoder_t *open_plugin (video_decoder_class_t *class_gen, + xine_stream_t *stream) { + + image_class_t *cls = (image_class_t *) class_gen; + image_decoder_t *this; + + lprintf("opened\n"); + + this = (image_decoder_t *) calloc(1, sizeof(image_decoder_t)); + + this->video_decoder.decode_data = image_decode_data; + this->video_decoder.flush = image_flush; + this->video_decoder.reset = image_reset; + this->video_decoder.discontinuity = image_discontinuity; + this->video_decoder.dispose = image_dispose; + this->cls = cls; + this->stream = stream; + + /* + * initialisation of privates + */ + + this->image = xine_buffer_init(10240); + + return &this->video_decoder; +} + +/* + * image plugin class + */ +static void *init_class (xine_t *xine, void *data) { + + image_class_t *this; + + this = (image_class_t *) calloc(1, sizeof(image_class_t)); + + this->decoder_class.open_plugin = open_plugin; + this->decoder_class.identifier = "imagevdec"; + this->decoder_class.description = N_("image video decoder plugin"); + this->decoder_class.dispose = default_video_decoder_class_dispose; + + /* + * initialisation of privates + */ + + lprintf("class opened\n"); + + return this; +} + +/* + * exported plugin catalog entry + */ + +static const uint32_t supported_types[] = { BUF_VIDEO_IMAGE, + 0 }; + +static const decoder_info_t dec_info_image = { + supported_types, /* supported types */ + 6 /* priority */ +}; + +const plugin_info_t xine_plugin_info[] EXPORTED = { + /* type, API, "name", version, special_info, init_function */ + { PLUGIN_VIDEO_DECODER, 19, "image", XINE_VERSION_CODE, &dec_info_image, init_class }, + { PLUGIN_NONE, 0, "", 0, NULL, NULL } +}; diff --git a/src/video_dec/libmpeg2/Makefile.am b/src/video_dec/libmpeg2/Makefile.am new file mode 100644 index 000000000..a6aab2a72 --- /dev/null +++ b/src/video_dec/libmpeg2/Makefile.am @@ -0,0 +1,34 @@ +include $(top_srcdir)/misc/Makefile.quiet +include $(top_builddir)/misc/Makefile.plugins +include $(top_srcdir)/misc/Makefile.common + +AM_CFLAGS = $(DEFAULT_OCFLAGS) $(VISIBILITY_FLAG) +AM_LDFLAGS = $(xineplug_ldflags) + +noinst_HEADERS = vlc.h mpeg2.h xvmc.h xvmc_vld.h mpeg2_internal.h idct_mlib.h vis.h \ + libmpeg2_accel.h + +xineplug_LTLIBRARIES = xineplug_decode_mpeg2.la + +xineplug_decode_mpeg2_la_SOURCES = \ + cpu_state.c \ + decode.c \ + header.c \ + idct.c \ + idct_altivec.c \ + idct_mlib.c \ + idct_mmx.c \ + motion_comp.c \ + motion_comp_altivec.c \ + motion_comp_mmx.c \ + motion_comp_mlib.c \ + motion_comp_vis.c \ + slice.c \ + slice_xvmc.c \ + slice_xvmc_vld.c \ + stats.c \ + xine_mpeg2_decoder.c \ + libmpeg2_accel.c + +xineplug_decode_mpeg2_la_LIBADD = $(XINE_LIB) $(MLIB_LIBS) $(LTLIBINTL) $(AVUTIL_LIBS) -lm +xineplug_decode_mpeg2_la_CFLAGS = $(AM_CFLAGS) $(MLIB_CFLAGS) $(AVUTIL_CFLAGS) diff --git a/src/video_dec/libmpeg2/cpu_state.c b/src/video_dec/libmpeg2/cpu_state.c new file mode 100644 index 000000000..d1507eec1 --- /dev/null +++ b/src/video_dec/libmpeg2/cpu_state.c @@ -0,0 +1,184 @@ +/* + * cpu_state.c + * Copyright (C) 2000-2003 Michel Lespinasse <walken@zoy.org> + * Copyright (C) 1999-2000 Aaron Holtzman <aholtzma@ess.engr.uvic.ca> + * + * This file is part of mpeg2dec, a free MPEG-2 video stream decoder. + * See http://libmpeg2.sourceforge.net/ for updates. + * + * mpeg2dec is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * mpeg2dec is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +#include "config.h" + +#include <stdlib.h> +#include <inttypes.h> + +#include "mpeg2_internal.h" +#include <xine/xineutils.h> +#include "xine_mmx.h" + +void (* mpeg2_cpu_state_save) (cpu_state_t * state) = NULL; +void (* mpeg2_cpu_state_restore) (cpu_state_t * state) = NULL; + +#if defined(ARCH_X86) || defined(ARCH_X86_64) +static void state_restore_mmx (cpu_state_t * state) +{ + emms (); +} +#endif + +#if defined (ARCH_PPC) && defined (ENABLE_ALTIVEC) + +#ifndef HOST_OS_DARWIN + +static void state_save_altivec (cpu_state_t * state) +{ + asm (" \n" + " li %r9, 16 \n" + " stvx %v20, 0, %r3 \n" + " li %r11, 32 \n" + " stvx %v21, %r9, %r3 \n" + " li %r9, 48 \n" + " stvx %v22, %r11, %r3 \n" + " li %r11, 64 \n" + " stvx %v23, %r9, %r3 \n" + " li %r9, 80 \n" + " stvx %v24, %r11, %r3 \n" + " li %r11, 96 \n" + " stvx %v25, %r9, %r3 \n" + " li %r9, 112 \n" + " stvx %v26, %r11, %r3 \n" + " li %r11, 128 \n" + " stvx %v27, %r9, %r3 \n" + " li %r9, 144 \n" + " stvx %v28, %r11, %r3 \n" + " li %r11, 160 \n" + " stvx %v29, %r9, %r3 \n" + " li %r9, 176 \n" + " stvx %v30, %r11, %r3 \n" + " stvx %v31, %r9, %r3 \n" + ); +} + +static void state_restore_altivec (cpu_state_t * state) +{ + asm (" \n" + " li %r9, 16 \n" + " lvx %v20, 0, %r3 \n" + " li %r11, 32 \n" + " lvx %v21, %r9, %r3 \n" + " li %r9, 48 \n" + " lvx %v22, %r11, %r3 \n" + " li %r11, 64 \n" + " lvx %v23, %r9, %r3 \n" + " li %r9, 80 \n" + " lvx %v24, %r11, %r3 \n" + " li %r11, 96 \n" + " lvx %v25, %r9, %r3 \n" + " li %r9, 112 \n" + " lvx %v26, %r11, %r3 \n" + " li %r11, 128 \n" + " lvx %v27, %r9, %r3 \n" + " li %r9, 144 \n" + " lvx %v28, %r11, %r3 \n" + " li %r11, 160 \n" + " lvx %v29, %r9, %r3 \n" + " li %r9, 176 \n" + " lvx %v30, %r11, %r3 \n" + " lvx %v31, %r9, %r3 \n" + ); +} + +#else /* HOST_OS_DARWIN */ + +#define LI(a,b) "li r" #a "," #b "\n\t" +#define STVX0(a,b,c) "stvx v" #a ",0,r" #c "\n\t" +#define STVX(a,b,c) "stvx v" #a ",r" #b ",r" #c "\n\t" +#define LVX0(a,b,c) "lvx v" #a ",0,r" #c "\n\t" +#define LVX(a,b,c) "lvx v" #a ",r" #b ",r" #c "\n\t" + +static void state_save_altivec (cpu_state_t * state) +{ + asm (LI (9, 16) + STVX0 (20, 0, 3) + LI (11, 32) + STVX (21, 9, 3) + LI (9, 48) + STVX (22, 11, 3) + LI (11, 64) + STVX (23, 9, 3) + LI (9, 80) + STVX (24, 11, 3) + LI (11, 96) + STVX (25, 9, 3) + LI (9, 112) + STVX (26, 11, 3) + LI (11, 128) + STVX (27, 9, 3) + LI (9, 144) + STVX (28, 11, 3) + LI (11, 160) + STVX (29, 9, 3) + LI (9, 176) + STVX (30, 11, 3) + STVX (31, 9, 3)); +} + +static void state_restore_altivec (cpu_state_t * state) +{ + asm (LI (9, 16) + LVX0 (20, 0, 3) + LI (11, 32) + LVX (21, 9, 3) + LI (9, 48) + LVX (22, 11, 3) + LI (11, 64) + LVX (23, 9, 3) + LI (9, 80) + LVX (24, 11, 3) + LI (11, 96) + LVX (25, 9, 3) + LI (9, 112) + LVX (26, 11, 3) + LI (11, 128) + LVX (27, 9, 3) + LI (9, 144) + LVX (28, 11, 3) + LI (11, 160) + LVX (29, 9, 3) + LI (9, 176) + LVX (30, 11, 3) + LVX (31, 9, 3)); +} +#endif /* HOST_OS_DARWIN */ + +#endif /* defined (ARCH_PPC) && defined (ENABLE_ALTIVEC) */ + +void mpeg2_cpu_state_init (uint32_t mm_accel) +{ +#if defined(ARCH_X86) || defined(ARCH_X86_64) + if (mm_accel & MM_ACCEL_X86_MMX) { + mpeg2_cpu_state_restore = state_restore_mmx; + } +#endif +#if defined (ARCH_PPC) && defined (ENABLE_ALTIVEC) + if (mm_accel & MM_ACCEL_PPC_ALTIVEC) { + mpeg2_cpu_state_save = state_save_altivec; + mpeg2_cpu_state_restore = state_restore_altivec; + } +#endif +} + diff --git a/src/video_dec/libmpeg2/decode.c b/src/video_dec/libmpeg2/decode.c new file mode 100644 index 000000000..848d111fc --- /dev/null +++ b/src/video_dec/libmpeg2/decode.c @@ -0,0 +1,1006 @@ +/* + * decode.c + * Copyright (C) 2000-2002 Michel Lespinasse <walken@zoy.org> + * Copyright (C) 1999-2000 Aaron Holtzman <aholtzma@ess.engr.uvic.ca> + * + * This file is part of mpeg2dec, a free MPEG-2 video stream decoder. + * See http://libmpeg2.sourceforge.net/ for updates. + * + * mpeg2dec is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * mpeg2dec is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + * + * xine-specific version by G. Bartsch + * + */ + +#include "config.h" +#include <stdio.h> +#include <string.h> /* memcpy/memset, try to remove */ +#include <stdlib.h> +#include <inttypes.h> +#include <math.h> + +#define LOG_MODULE "decode" +#define LOG_VERBOSE +/* +#define LOG +*/ + +#ifdef HAVE_FFMPEG_AVUTIL_H +# include <mem.h> +#else +# include <libavutil/mem.h> +#endif + +#include <xine/xine_internal.h> +#include <xine/video_out.h> + +#include "mpeg2.h" +#include "mpeg2_internal.h" +#include <xine/xineutils.h> +#include "libmpeg2_accel.h" + +/* +#define LOG_PAN_SCAN +*/ + +/* #define BUFFER_SIZE (224 * 1024) */ +#define BUFFER_SIZE (1194 * 1024) /* new buffer size for mpeg2dec 0.2.1 */ + +static void process_userdata(mpeg2dec_t *mpeg2dec, uint8_t *buffer); + +void mpeg2_init (mpeg2dec_t * mpeg2dec, + xine_video_port_t * output) +{ + static int do_init = 1; + uint32_t mm_accel; + + if (do_init) { + do_init = 0; + mm_accel = xine_mm_accel(); + mpeg2_cpu_state_init (mm_accel); + mpeg2_idct_init (mm_accel); + mpeg2_mc_init (mm_accel); + libmpeg2_accel_scan(&mpeg2dec->accel, mpeg2_scan_norm, mpeg2_scan_alt); + } + + if( !mpeg2dec->chunk_buffer ) + mpeg2dec->chunk_buffer = av_mallocz(BUFFER_SIZE + 4); + if( !mpeg2dec->picture ) + mpeg2dec->picture = av_mallocz(sizeof(picture_t)); + + mpeg2dec->shift = 0xffffff00; + mpeg2dec->new_sequence = 0; + mpeg2dec->is_sequence_needed = 1; + mpeg2dec->is_wait_for_ip_frames = 2; + mpeg2dec->frames_to_drop = 0; + mpeg2dec->drop_frame = 0; + mpeg2dec->in_slice = 0; + mpeg2dec->output = output; + mpeg2dec->chunk_ptr = mpeg2dec->chunk_buffer; + mpeg2dec->code = 0xb4; + mpeg2dec->seek_mode = 0; + + /* initialize AFD storage */ + mpeg2dec->afd_value_seen = XINE_VIDEO_AFD_NOT_PRESENT; + mpeg2dec->afd_value_reported = (XINE_VIDEO_AFD_NOT_PRESENT - 1); + + /* initialize substructures */ + mpeg2_header_state_init (mpeg2dec->picture); + + if ( output->get_capabilities(output) & VO_CAP_XXMC) { + printf("libmpeg2: output port has XxMC capability\n"); + mpeg2dec->frame_format = XINE_IMGFMT_XXMC; + } else if( output->get_capabilities(output) & VO_CAP_XVMC_MOCOMP) { + printf("libmpeg2: output port has XvMC capability\n"); + mpeg2dec->frame_format = XINE_IMGFMT_XVMC; + } else { + mpeg2dec->frame_format = XINE_IMGFMT_YV12; + } +} + +static inline void get_frame_duration (mpeg2dec_t * mpeg2dec, vo_frame_t *frame) +{ + static const double durations[] = { + 0, /* invalid */ + 3753.75, /* 23.976 fps */ + 3750, /* 24 fps */ + 3600, /* 25 fps */ + 3003, /* 29.97 fps */ + 3000, /* 30 fps */ + 1800, /* 50 fps */ + 1501.5, /* 59.94 fps */ + 1500, /* 60 fps */ + }; + double duration = ((unsigned) mpeg2dec->picture->frame_rate_code > 8u) + ? 0 : durations[mpeg2dec->picture->frame_rate_code]; + + duration = duration * (mpeg2dec->picture->frame_rate_ext_n + 1.0) / + (mpeg2dec->picture->frame_rate_ext_d + 1.0); + + /* this should be used to detect any special rff pattern */ + mpeg2dec->rff_pattern = mpeg2dec->rff_pattern << 1; + mpeg2dec->rff_pattern |= !!frame->repeat_first_field; + + if( ((mpeg2dec->rff_pattern & 0xff) == 0xaa || + (mpeg2dec->rff_pattern & 0xff) == 0x55) && + !mpeg2dec->picture->progressive_sequence ) { + /* special case for ntsc 3:2 pulldown */ + duration *= 5.0 / 4.0; + } + else + { + if( frame->repeat_first_field ) { + if( !mpeg2dec->picture->progressive_sequence && + frame->progressive_frame ) { + /* decoder should output 3 fields, so adjust duration to + count on this extra field time */ + duration *= 3.0 / 2.0; + } else if( mpeg2dec->picture->progressive_sequence ) { + /* for progressive sequences the output should repeat the + frame 1 or 2 times depending on top_field_first flag. */ + duration *= (frame->top_field_first) ? 3 : 2; + } + } + } + + frame->duration = (int) ceil (duration); + _x_stream_info_set(mpeg2dec->stream, XINE_STREAM_INFO_FRAME_DURATION, frame->duration); + /*printf("mpeg2dec: rff=%u\n",frame->repeat_first_field);*/ +} + +static double get_aspect_ratio(mpeg2dec_t *mpeg2dec) +{ + double ratio; + picture_t * picture = mpeg2dec->picture; + double mpeg1_pel_ratio[16] = {1.0 /* forbidden */, + 1.0, 0.6735, 0.7031, 0.7615, 0.8055, 0.8437, 0.8935, 0.9157, + 0.9815, 1.0255, 1.0695, 1.0950, 1.1575, 1.2015, 1.0 /*reserved*/ }; + + /* TODO: For slower machines the value of this function should be computed + * once and cached! + */ + + if( !picture->mpeg1 ) { + /* these hardcoded values are defined on mpeg2 standard for + * aspect ratio. other values are reserved or forbidden. */ + switch(picture->aspect_ratio_information) { + case 2: + ratio = 4.0/3.0; + break; + case 3: + ratio = 16.0/9.0; + break; + case 4: + ratio = 2.11/1.0; + break; + case 1: + default: + ratio = (double)picture->coded_picture_width/(double)picture->coded_picture_height; + break; + } + } else { + /* mpeg1 constants refer to pixel aspect ratio */ + ratio = (double)picture->coded_picture_width/(double)picture->coded_picture_height; + ratio /= mpeg1_pel_ratio[picture->aspect_ratio_information]; + } + + return ratio; +} + +static void remember_metainfo (mpeg2dec_t *mpeg2dec) { + + picture_t * picture = mpeg2dec->picture; + + _x_stream_info_set(mpeg2dec->stream, XINE_STREAM_INFO_VIDEO_WIDTH, picture->display_width); + _x_stream_info_set(mpeg2dec->stream, XINE_STREAM_INFO_VIDEO_HEIGHT, picture->display_height); + _x_stream_info_set(mpeg2dec->stream, XINE_STREAM_INFO_VIDEO_RATIO, + ((double)10000 * get_aspect_ratio(mpeg2dec))); + + switch (mpeg2dec->picture->frame_rate_code) { + case 1: /* 23.976 fps */ + _x_stream_info_set(mpeg2dec->stream, XINE_STREAM_INFO_FRAME_DURATION, 3913); + break; + case 2: /* 24 fps */ + _x_stream_info_set(mpeg2dec->stream, XINE_STREAM_INFO_FRAME_DURATION, 3750); + break; + case 3: /* 25 fps */ + _x_stream_info_set(mpeg2dec->stream, XINE_STREAM_INFO_FRAME_DURATION, 3600); + break; + case 4: /* 29.97 fps */ + _x_stream_info_set(mpeg2dec->stream, XINE_STREAM_INFO_FRAME_DURATION, 3003); + break; + case 5: /* 30 fps */ + _x_stream_info_set(mpeg2dec->stream, XINE_STREAM_INFO_FRAME_DURATION, 3000); + break; + case 6: /* 50 fps */ + _x_stream_info_set(mpeg2dec->stream, XINE_STREAM_INFO_FRAME_DURATION, 1800); + break; + case 7: /* 59.94 fps */ + _x_stream_info_set(mpeg2dec->stream, XINE_STREAM_INFO_FRAME_DURATION, 1525); + break; + case 8: /* 60 fps */ + _x_stream_info_set(mpeg2dec->stream, XINE_STREAM_INFO_FRAME_DURATION, 1509); + break; + default: + /* printf ("invalid/unknown frame rate code : %d \n", + frame->frame_rate_code); */ + _x_stream_info_set(mpeg2dec->stream, XINE_STREAM_INFO_FRAME_DURATION, 3000); + } + + _x_meta_info_set_utf8(mpeg2dec->stream, XINE_META_INFO_VIDEOCODEC, "MPEG (libmpeg2)"); +} + +static inline int parse_chunk (mpeg2dec_t * mpeg2dec, int code, + uint8_t * buffer, int next_code) +{ + picture_t * picture; + int is_frame_done; + double ratio; + + /* wait for sequence_header_code */ + if (mpeg2dec->is_sequence_needed) { + if (code != 0xb3) { + /* printf ("libmpeg2: waiting for sequence header\n"); */ + mpeg2dec->pts = 0; + return 0; + } + } + if (mpeg2dec->is_frame_needed) { + /* printf ("libmpeg2: waiting for frame start\n"); */ + mpeg2dec->pts = 0; + if (mpeg2dec->picture->current_frame) + mpeg2dec->picture->current_frame->bad_frame = 1; + } + + mpeg2_stats (code, buffer); + + picture = mpeg2dec->picture; + is_frame_done = mpeg2dec->in_slice && ((!code) || (code >= 0xb0)); + + if (is_frame_done) + mpeg2dec->in_slice = 0; + + if (is_frame_done && picture->current_frame != NULL) { + + libmpeg2_accel_frame_completion(&mpeg2dec->accel, mpeg2dec->frame_format, + picture, code); + + if (((picture->picture_structure == FRAME_PICTURE) || + (picture->second_field)) ) { + + if (mpeg2dec->drop_frame) + picture->current_frame->bad_frame = 1; + + if (picture->picture_coding_type == B_TYPE) { + if( picture->current_frame && !picture->current_frame->drawn ) { + + /* hack against wrong mpeg1 pts */ + if (picture->mpeg1) + picture->current_frame->pts = 0; + + get_frame_duration(mpeg2dec, picture->current_frame); + mpeg2dec->frames_to_drop = picture->current_frame->draw (picture->current_frame, mpeg2dec->stream); + picture->current_frame->drawn = 1; + } + } else if (picture->forward_reference_frame && !picture->forward_reference_frame->drawn) { + get_frame_duration(mpeg2dec, picture->forward_reference_frame); + mpeg2dec->frames_to_drop = picture->forward_reference_frame->draw (picture->forward_reference_frame, + mpeg2dec->stream); + picture->forward_reference_frame->drawn = 1; + } + } + } + + switch (code) { + case 0x00: /* picture_start_code */ + if (mpeg2_header_picture (picture, buffer)) { + fprintf (stderr, "bad picture header\n"); + abort(); + } + + mpeg2dec->is_frame_needed=0; + + if (!picture->second_field) { + /* find out if we want to skip this frame */ + mpeg2dec->drop_frame = 0; + + /* picture->skip_non_intra_dct = (mpeg2dec->frames_to_drop>0) ; */ + + switch (picture->picture_coding_type) { + case B_TYPE: + + lprintf ("B-Frame\n"); + + if (mpeg2dec->frames_to_drop>1) { + lprintf ("dropping b-frame because frames_to_drop==%d\n", + mpeg2dec->frames_to_drop); + mpeg2dec->drop_frame = 1; + } else if (!picture->forward_reference_frame || picture->forward_reference_frame->bad_frame + || !picture->backward_reference_frame || picture->backward_reference_frame->bad_frame) { +#ifdef LOG + printf ("libmpeg2: dropping b-frame because ref is bad ("); + if (picture->forward_reference_frame) + printf ("fw ref frame %d, bad %d;", picture->forward_reference_frame->id, + picture->forward_reference_frame->bad_frame); + else + printf ("fw ref frame not there;"); + if (picture->backward_reference_frame) + printf ("bw ref frame %d, bad %d)\n", picture->backward_reference_frame->id, + picture->backward_reference_frame->bad_frame); + else + printf ("fw ref frame not there)\n"); +#endif + mpeg2dec->drop_frame = 1; + } else if (mpeg2dec->is_wait_for_ip_frames > 0) { + lprintf("dropping b-frame because refs are invalid\n"); + mpeg2dec->drop_frame = 1; + } + break; + + case P_TYPE: + + lprintf ("P-Frame\n"); + + if (mpeg2dec->frames_to_drop>2) { + mpeg2dec->drop_frame = 1; + lprintf ("dropping p-frame because frames_to_drop==%d\n", + mpeg2dec->frames_to_drop); + } else if (!picture->backward_reference_frame || picture->backward_reference_frame->bad_frame) { + mpeg2dec->drop_frame = 1; +#ifdef LOG + if (!picture->backward_reference_frame) + printf ("libmpeg2: dropping p-frame because no ref frame\n"); + else + printf ("libmpeg2: dropping p-frame because ref %d is bad\n", picture->backward_reference_frame->id); +#endif + } else if (mpeg2dec->is_wait_for_ip_frames > 1) { + lprintf("dropping p-frame because ref is invalid\n"); + mpeg2dec->drop_frame = 1; + } else if (mpeg2dec->is_wait_for_ip_frames) + mpeg2dec->is_wait_for_ip_frames--; + + break; + + case I_TYPE: + lprintf ("I-Frame\n"); + /* for the sake of dvd menus, never drop i-frames + if (mpeg2dec->frames_to_drop>4) { + mpeg2dec->drop_frame = 1; + } + */ + + if (mpeg2dec->is_wait_for_ip_frames) + mpeg2dec->is_wait_for_ip_frames--; + + break; + } + } + + break; + + case 0xb2: /* user data code */ + process_userdata(mpeg2dec, buffer); + break; + + case 0xb3: /* sequence_header_code */ + if (mpeg2_header_sequence (picture, buffer)) { + fprintf (stderr, "bad sequence header\n"); + /* abort(); */ + break; + } + + /* reset AFD value to detect absence */ + mpeg2dec->afd_value_seen = XINE_VIDEO_AFD_NOT_PRESENT; + + /* according to ISO/IEC 13818-2, an extension start code will follow. + * Otherwise the stream follows ISO/IEC 11172-2 which means MPEG1 */ + picture->mpeg1 = (next_code != 0xb5); + + if (mpeg2dec->force_aspect) picture->aspect_ratio_information = mpeg2dec->force_aspect; + + if (mpeg2dec->is_sequence_needed ) { + mpeg2dec->new_sequence = 1; + } + + if (mpeg2dec->is_sequence_needed + || (picture->aspect_ratio_information != picture->saved_aspect_ratio) + || (picture->frame_width != picture->coded_picture_width) + || (picture->frame_height != picture->coded_picture_height)) { + xine_event_t event; + xine_format_change_data_t data; + + remember_metainfo (mpeg2dec); + event.type = XINE_EVENT_FRAME_FORMAT_CHANGE; + event.stream = mpeg2dec->stream; + event.data = &data; + event.data_length = sizeof(data); + data.width = picture->coded_picture_width; + data.height = picture->coded_picture_height; + data.aspect = picture->aspect_ratio_information; + data.pan_scan = mpeg2dec->force_pan_scan; + xine_event_send(mpeg2dec->stream, &event); + + _x_stream_info_set(mpeg2dec->stream,XINE_STREAM_INFO_VIDEO_WIDTH, + picture->display_width); + _x_stream_info_set(mpeg2dec->stream,XINE_STREAM_INFO_VIDEO_HEIGHT, + picture->display_height); + + if (picture->forward_reference_frame && + picture->forward_reference_frame != picture->current_frame && + picture->forward_reference_frame != picture->backward_reference_frame) + picture->forward_reference_frame->free (picture->forward_reference_frame); + + if (picture->backward_reference_frame && + picture->backward_reference_frame != picture->current_frame) + picture->backward_reference_frame->free (picture->backward_reference_frame); + + mpeg2dec->is_sequence_needed = 0; + picture->forward_reference_frame = NULL; + picture->backward_reference_frame = NULL; + + picture->frame_width = picture->coded_picture_width; + picture->frame_height = picture->coded_picture_height; + picture->saved_aspect_ratio = picture->aspect_ratio_information; + } + break; + + case 0xb5: /* extension_start_code */ + if (mpeg2_header_extension (picture, buffer)) { + fprintf (stderr, "bad extension\n"); + abort(); + } + break; + + case 0xb7: /* sequence end code */ + mpeg2_flush(mpeg2dec); + mpeg2dec->is_sequence_needed = 1; + break; + + case 0xb8: /* group of pictures start code */ + if (mpeg2_header_group_of_pictures (picture, buffer)) { + printf ("libmpeg2: bad group of pictures\n"); + abort(); + } + default: + if ((code >= 0xb9) && (code != 0xe4)) { + printf("Not multiplexed? 0x%x\n",code); + } + if (code >= 0xb0) + break; + + /* check for AFD change once per picture */ + if (mpeg2dec->afd_value_reported != mpeg2dec->afd_value_seen) { + /* AFD data should better be stored in current_frame to have it */ + /* ready and synchronous with other data like width or height. */ + /* An AFD change should then be detected when a new frame is emitted */ + /* from the decoder to report the AFD change in display order and not */ + /* in decoding order like it happens below for now. */ + _x_stream_info_set(mpeg2dec->stream, XINE_STREAM_INFO_VIDEO_AFD, mpeg2dec->afd_value_seen); + lprintf ("AFD changed from %d to %d\n", mpeg2dec->afd_value_reported, mpeg2dec->afd_value_seen); + mpeg2dec->afd_value_reported = mpeg2dec->afd_value_seen; + } + + if (!(mpeg2dec->in_slice)) { + mpeg2dec->in_slice = 1; + + if (picture->second_field) { + if (picture->current_frame) + picture->current_frame->field(picture->current_frame, + picture->picture_structure); + else + mpeg2dec->drop_frame = 1; + } else { + int flags = picture->picture_structure; + + if (!picture->mpeg1) flags |= VO_INTERLACED_FLAG; + if (mpeg2dec->force_pan_scan) flags |= VO_PAN_SCAN_FLAG; + if (mpeg2dec->new_sequence) flags |= VO_NEW_SEQUENCE_FLAG; + + if ( picture->current_frame && + picture->current_frame != picture->backward_reference_frame && + picture->current_frame != picture->forward_reference_frame ) { + picture->current_frame->free (picture->current_frame); + } + if (picture->picture_coding_type == B_TYPE) { + ratio = get_aspect_ratio(mpeg2dec); + picture->current_frame = + mpeg2dec->stream->video_out->get_frame (mpeg2dec->stream->video_out, + picture->coded_picture_width, + picture->coded_picture_height, + ratio, + mpeg2dec->frame_format, + flags); + libmpeg2_accel_new_frame( &mpeg2dec->accel, mpeg2dec->frame_format, + picture, ratio, flags); + } else { + ratio = get_aspect_ratio(mpeg2dec); + picture->current_frame = + mpeg2dec->stream->video_out->get_frame (mpeg2dec->stream->video_out, + picture->coded_picture_width, + picture->coded_picture_height, + ratio, + mpeg2dec->frame_format, + flags); + + libmpeg2_accel_new_frame( &mpeg2dec->accel, mpeg2dec->frame_format, + picture, ratio, flags); + + if (picture->forward_reference_frame && + picture->forward_reference_frame != picture->backward_reference_frame) + picture->forward_reference_frame->free (picture->forward_reference_frame); + + picture->forward_reference_frame = + picture->backward_reference_frame; + picture->backward_reference_frame = picture->current_frame; + } + + if(mpeg2dec->new_sequence) + mpeg2dec->new_sequence = + libmpeg2_accel_new_sequence(&mpeg2dec->accel, mpeg2dec->frame_format, + picture); + + picture->current_frame->bad_frame = 1; + picture->current_frame->drawn = 0; + picture->current_frame->pts = mpeg2dec->pts; + picture->current_frame->top_field_first = picture->top_field_first; + picture->current_frame->repeat_first_field = picture->repeat_first_field; + picture->current_frame->progressive_frame = picture->progressive_frame; + picture->current_frame->crop_right = picture->coded_picture_width - picture->display_width; + picture->current_frame->crop_bottom = picture->coded_picture_height - picture->display_height; + + switch( picture->picture_coding_type ) { + case I_TYPE: + picture->current_frame->picture_coding_type = XINE_PICT_I_TYPE; + break; + case P_TYPE: + picture->current_frame->picture_coding_type = XINE_PICT_P_TYPE; + break; + case B_TYPE: + picture->current_frame->picture_coding_type = XINE_PICT_B_TYPE; + break; + case D_TYPE: + picture->current_frame->picture_coding_type = XINE_PICT_D_TYPE; + break; + } + + lprintf ("decoding frame %d, type %s\n", + picture->current_frame->id, picture->picture_coding_type == I_TYPE ? "I" : + picture->picture_coding_type == P_TYPE ? "P" : "B"); + mpeg2dec->pts = 0; + /*printf("Starting to decode frame %d\n",picture->current_frame->id);*/ + } + } + + if (!mpeg2dec->drop_frame && picture->current_frame != NULL) { +#ifdef DEBUG_LOG + printf("slice target %08x past %08x future %08x\n",picture->current_frame,picture->forward_reference_frame,picture->backward_reference_frame); + fflush(stdout); +#endif + libmpeg2_accel_slice(&mpeg2dec->accel, picture, code, buffer, mpeg2dec->chunk_size, + mpeg2dec->chunk_buffer); + + if( picture->v_offset > picture->limit_y || + picture->v_offset + 16 > picture->display_height ) { + picture->current_frame->bad_frame = 0; + } + } + } + + /* printf ("libmpeg2: parse_chunk %d completed\n", code); */ + return is_frame_done; +} + +static inline int find_start_code (mpeg2dec_t * mpeg2dec, + uint8_t ** current, uint8_t * limit) +{ + uint8_t * p; + + if (*current >= limit) + return 0; + if (mpeg2dec->shift == 0x00000100) + return 1; + + mpeg2dec->shift = (mpeg2dec->shift | *(*current)++) << 8; + + if (*current >= limit) + return 0; + if (mpeg2dec->shift == 0x00000100) + return 1; + + mpeg2dec->shift = (mpeg2dec->shift | *(*current)++) << 8; + + if (*current >= limit) + return 0; + if (mpeg2dec->shift == 0x00000100) + return 1; + + limit--; + + if (*current >= limit) { + mpeg2dec->shift = (mpeg2dec->shift | *(*current)++) << 8; + return 0; + } + + p = *current; + + while (p < limit && (p = (uint8_t *)memchr(p, 0x01, limit - p))) { + if (p[-2] || p[-1]) + p += 3; + else { + *current = ++p; + return 1; + } + } + + *current = ++limit; + p = limit - 3; + mpeg2dec->shift = (mpeg2dec->shift | *p++) << 8; + mpeg2dec->shift = (mpeg2dec->shift | *p++) << 8; + mpeg2dec->shift = (mpeg2dec->shift | *p++) << 8; + + return 0; +} + +static inline uint8_t * copy_chunk (mpeg2dec_t * mpeg2dec, + uint8_t * current, uint8_t * end) +{ + uint8_t * limit; + uint8_t * data = current; + int found, bite; + + /* sequence end code 0xb7 doesn't have any data and there might be the case + * that no start code will follow this code for quite some time (e. g. in case + * of a still image. + * Therefore, return immediately with a chunk_size of 0. Setting code to 0xb4 + * will eat up any trailing garbage next time. + */ + if (mpeg2dec->code == 0xb7) { + mpeg2dec->code = 0xb4; + mpeg2dec->chunk_size = 0; + return current; + } + + limit = current + (mpeg2dec->chunk_buffer + BUFFER_SIZE - mpeg2dec->chunk_ptr); + if (limit > end) + limit = end; + + found = find_start_code(mpeg2dec, ¤t, limit); + bite = current - data; + if (bite) { + xine_fast_memcpy(mpeg2dec->chunk_ptr, data, bite); + mpeg2dec->chunk_ptr += bite; + } + + if (found) { + mpeg2dec->code = *current++; + mpeg2dec->chunk_size = mpeg2dec->chunk_ptr - mpeg2dec->chunk_buffer - 3; + mpeg2dec->chunk_ptr = mpeg2dec->chunk_buffer; + mpeg2dec->shift = 0xffffff00; + return current; + } + + if (current == end) + return NULL; + + /* we filled the chunk buffer without finding a start code */ + mpeg2dec->code = 0xb4; /* sequence_error_code */ + mpeg2dec->chunk_ptr = mpeg2dec->chunk_buffer; + return current; +} + +int mpeg2_decode_data (mpeg2dec_t * mpeg2dec, uint8_t * current, uint8_t * end, + uint64_t pts) +{ + int ret; + uint8_t code; + + ret = 0; + if (mpeg2dec->seek_mode) { + mpeg2dec->chunk_ptr = mpeg2dec->chunk_buffer; + mpeg2dec->code = 0xb4; + mpeg2dec->seek_mode = 0; + mpeg2dec->shift = 0xffffff00; + mpeg2dec->is_frame_needed = 1; + } + + if (pts) + mpeg2dec->pts = pts; + + while (current != end || mpeg2dec->code == 0xb7) { + code = mpeg2dec->code; + current = copy_chunk (mpeg2dec, current, end); + if (current == NULL) + break; + ret += parse_chunk (mpeg2dec, code, mpeg2dec->chunk_buffer, mpeg2dec->code); + } + + libmpeg2_accel_frame_completion(&mpeg2dec->accel, mpeg2dec->frame_format, + mpeg2dec->picture, 0xff); + + return ret; +} + +void mpeg2_discontinuity (mpeg2dec_t * mpeg2dec) { + picture_t *picture = mpeg2dec->picture; + + if( !picture ) + return; + + mpeg2dec->in_slice = 0; + mpeg2dec->pts = 0; + if ( picture->current_frame ) + picture->current_frame->pts = 0; + if ( picture->forward_reference_frame ) + picture->forward_reference_frame->pts = 0; + if ( picture->backward_reference_frame ) + picture->backward_reference_frame->pts = 0; + + libmpeg2_accel_discontinuity(&mpeg2dec->accel, mpeg2dec->frame_format, picture); +} + +void mpeg2_reset (mpeg2dec_t * mpeg2dec) { + + picture_t *picture = mpeg2dec->picture; + + if( !picture ) + return; + + mpeg2_discontinuity(mpeg2dec); + + if( !picture->mpeg1 ) { + mpeg2dec->is_wait_for_ip_frames = 2; + + /* mark current frames as bad so they won't make to screen */ + if ( picture->current_frame ) + picture->current_frame->bad_frame=1; + if (picture->forward_reference_frame ) + picture->forward_reference_frame->bad_frame=1; + if (picture->backward_reference_frame) + picture->backward_reference_frame->bad_frame=1; + + } else { + /* to free reference frames one also needs to fix slice.c to + * abort when they are NULL. unfortunately it seems to break + * DVD menus. + * + * ...so let's do this for mpeg-1 only :) + */ + if ( picture->current_frame && + picture->current_frame != picture->backward_reference_frame && + picture->current_frame != picture->forward_reference_frame ) + picture->current_frame->free (picture->current_frame); + picture->current_frame = NULL; + + if (picture->forward_reference_frame && + picture->forward_reference_frame != picture->backward_reference_frame) + picture->forward_reference_frame->free (picture->forward_reference_frame); + picture->forward_reference_frame = NULL; + + if (picture->backward_reference_frame) + picture->backward_reference_frame->free (picture->backward_reference_frame); + picture->backward_reference_frame = NULL; + } + + mpeg2dec->in_slice = 0; + mpeg2dec->seek_mode = 1; + +} + +void mpeg2_flush (mpeg2dec_t * mpeg2dec) { + + picture_t *picture = mpeg2dec->picture; + + if (!picture) + return; + + if (picture->current_frame && !picture->current_frame->drawn && + !picture->current_frame->bad_frame) { + + lprintf ("blasting out current frame %d on flush\n", + picture->current_frame->id); + + picture->current_frame->drawn = 1; + get_frame_duration(mpeg2dec, picture->current_frame); + + picture->current_frame->pts = 0; + picture->current_frame->draw(picture->current_frame, mpeg2dec->stream); + } + +} + +void mpeg2_close (mpeg2dec_t * mpeg2dec) +{ + picture_t *picture = mpeg2dec->picture; + + /* + { + static uint8_t finalizer[] = {0,0,1,0xb4}; + mpeg2_decode_data (mpeg2dec, finalizer, finalizer+4, 0); + } + */ + + /* + dont remove any picture->*->free() below. doing so will cause buffer + leak, and we only have about 15 of them. + */ + + if ( picture->current_frame ) { + if( !picture->current_frame->drawn ) { + lprintf ("blasting out current frame on close\n"); + picture->current_frame->pts = 0; + get_frame_duration(mpeg2dec, picture->current_frame); + picture->current_frame->draw (picture->current_frame, mpeg2dec->stream); + picture->current_frame->drawn = 1; + } + + if( picture->current_frame != picture->backward_reference_frame && + picture->current_frame != picture->forward_reference_frame ) { + picture->current_frame->free (picture->current_frame); + } + picture->current_frame = NULL; + } + + if (picture->forward_reference_frame && + picture->forward_reference_frame != picture->backward_reference_frame) { + picture->forward_reference_frame->free (picture->forward_reference_frame); + picture->forward_reference_frame = NULL; + } + + if (picture->backward_reference_frame) { + if( !picture->backward_reference_frame->drawn) { + lprintf ("blasting out backward reference frame on close\n"); + picture->backward_reference_frame->pts = 0; + get_frame_duration(mpeg2dec, picture->backward_reference_frame); + picture->backward_reference_frame->draw (picture->backward_reference_frame, mpeg2dec->stream); + picture->backward_reference_frame->drawn = 1; + } + picture->backward_reference_frame->free (picture->backward_reference_frame); + picture->backward_reference_frame = NULL; + } + + av_freep(&mpeg2dec->chunk_buffer); + av_freep(&mpeg2dec->picture_base); + + if ( mpeg2dec->cc_dec) { + /* dispose the closed caption decoder */ + mpeg2dec->cc_dec->dispose(mpeg2dec->cc_dec); + mpeg2dec->cc_dec = NULL; + } +} + +void mpeg2_find_sequence_header (mpeg2dec_t * mpeg2dec, + uint8_t * current, uint8_t * end){ + + uint8_t code, next_code; + picture_t *picture = mpeg2dec->picture; + + mpeg2dec->seek_mode = 1; + + while (current != end) { + code = mpeg2dec->code; + current = copy_chunk (mpeg2dec, current, end); + if (current == NULL) + return ; + next_code = mpeg2dec->code; + + /* printf ("looking for sequence header... %02x\n", code); */ + + mpeg2_stats (code, mpeg2dec->chunk_buffer); + + if (code == 0xb3) { /* sequence_header_code */ + if (mpeg2_header_sequence (picture, mpeg2dec->chunk_buffer)) { + printf ("libmpeg2: bad sequence header\n"); + continue; + } + + /* according to ISO/IEC 13818-2, an extension start code will follow. + * Otherwise the stream follows ISO/IEC 11172-2 which means MPEG1 */ + picture->mpeg1 = (next_code != 0xb5); + + if (mpeg2dec->force_aspect) picture->aspect_ratio_information = mpeg2dec->force_aspect; + + if (mpeg2dec->is_sequence_needed) { + xine_event_t event; + xine_format_change_data_t data; + + mpeg2dec->new_sequence = 1; + + mpeg2dec->is_sequence_needed = 0; + picture->frame_width = picture->coded_picture_width; + picture->frame_height = picture->coded_picture_height; + + remember_metainfo (mpeg2dec); + + event.type = XINE_EVENT_FRAME_FORMAT_CHANGE; + event.stream = mpeg2dec->stream; + event.data = &data; + event.data_length = sizeof(data); + data.width = picture->coded_picture_width; + data.height = picture->coded_picture_height; + data.aspect = picture->aspect_ratio_information; + data.pan_scan = mpeg2dec->force_pan_scan; + xine_event_send(mpeg2dec->stream, &event); + + _x_stream_info_set(mpeg2dec->stream, XINE_STREAM_INFO_VIDEO_WIDTH, + picture->display_width); + _x_stream_info_set(mpeg2dec->stream, XINE_STREAM_INFO_VIDEO_HEIGHT, + picture->display_height); + } + } else if (code == 0xb5) { /* extension_start_code */ + if (mpeg2_header_extension (picture, mpeg2dec->chunk_buffer)) { + printf ("libmpeg2: bad extension\n"); + continue ; + } + } + } +} + +/* Find the end of the userdata field in an MPEG-2 stream */ +static uint8_t *find_end(uint8_t *buffer) +{ + uint8_t *current = buffer; + while(1) { + if (current[0] == 0 && current[1] == 0 && current[2] == 1) + break; + current++; + } + return current; +} + +static void process_userdata(mpeg2dec_t *mpeg2dec, uint8_t *buffer) +{ + /* check if user data denotes closed captions */ + if (buffer[0] == 'C' && buffer[1] == 'C') { + + if (!mpeg2dec->cc_dec) { + xine_event_t event; + xine_format_change_data_t data; + + /* open the closed caption decoder first */ + mpeg2dec->cc_dec = _x_get_spu_decoder(mpeg2dec->stream, (BUF_SPU_CC >> 16) & 0xff); + + /* send a frame format event so that the CC decoder knows the initial image size */ + event.type = XINE_EVENT_FRAME_FORMAT_CHANGE; + event.stream = mpeg2dec->stream; + event.data = &data; + event.data_length = sizeof(data); + data.width = mpeg2dec->picture->coded_picture_width; + data.height = mpeg2dec->picture->coded_picture_height; + data.aspect = mpeg2dec->picture->aspect_ratio_information; + data.pan_scan = mpeg2dec->force_pan_scan; + xine_event_send(mpeg2dec->stream, &event); + + _x_stream_info_set(mpeg2dec->stream, XINE_STREAM_INFO_VIDEO_WIDTH, + mpeg2dec->picture->display_width); + _x_stream_info_set(mpeg2dec->stream, XINE_STREAM_INFO_VIDEO_HEIGHT, + mpeg2dec->picture->display_height); + } + + if (mpeg2dec->cc_dec) { + buf_element_t buf; + + buf.type = BUF_SPU_CC; + buf.content = &buffer[2]; + buf.pts = mpeg2dec->pts; + buf.size = find_end(buffer) - &buffer[2]; + buf.decoder_flags = 0; + + mpeg2dec->cc_dec->decode_data(mpeg2dec->cc_dec, &buf); + } + } + /* check Active Format Description ETSI TS 101 154 V1.5.1 */ + else if (buffer[0] == 0x44 && buffer[1] == 0x54 && buffer[2] == 0x47 && buffer[3] == 0x31) + mpeg2dec->afd_value_seen = (buffer[4] & 0x40) ? (buffer[5] & 0x0f) : XINE_VIDEO_AFD_NOT_PRESENT; +} diff --git a/src/video_dec/libmpeg2/header.c b/src/video_dec/libmpeg2/header.c new file mode 100644 index 000000000..0c2b76891 --- /dev/null +++ b/src/video_dec/libmpeg2/header.c @@ -0,0 +1,419 @@ +/* + * header.c + * Copyright (C) 2000-2002 Michel Lespinasse <walken@zoy.org> + * Copyright (C) 1999-2000 Aaron Holtzman <aholtzma@ess.engr.uvic.ca> + * + * This file is part of mpeg2dec, a free MPEG-2 video stream decoder. + * See http://libmpeg2.sourceforge.net/ for updates. + * + * mpeg2dec is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * mpeg2dec is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +/* +#define LOG_PAN_SCAN +*/ + +#include "config.h" + +#include <stdio.h> /* For printf debugging */ +#include <inttypes.h> + +#include "mpeg2_internal.h" +#include <xine/attributes.h> + +/* default intra quant matrix, in zig-zag order */ +static const uint8_t default_intra_quantizer_matrix[64] ATTR_ALIGN(16) = { + 8, + 16, 16, + 19, 16, 19, + 22, 22, 22, 22, + 22, 22, 26, 24, 26, + 27, 27, 27, 26, 26, 26, + 26, 27, 27, 27, 29, 29, 29, + 34, 34, 34, 29, 29, 29, 27, 27, + 29, 29, 32, 32, 34, 34, 37, + 38, 37, 35, 35, 34, 35, + 38, 38, 40, 40, 40, + 48, 48, 46, 46, + 56, 56, 58, + 69, 69, + 83 +}; + +uint8_t mpeg2_scan_norm[64] ATTR_ALIGN(16) = +{ + /* Zig-Zag scan pattern */ + 0, 1, 8,16, 9, 2, 3,10, + 17,24,32,25,18,11, 4, 5, + 12,19,26,33,40,48,41,34, + 27,20,13, 6, 7,14,21,28, + 35,42,49,56,57,50,43,36, + 29,22,15,23,30,37,44,51, + 58,59,52,45,38,31,39,46, + 53,60,61,54,47,55,62,63 +}; + +uint8_t mpeg2_scan_alt[64] ATTR_ALIGN(16) = +{ + /* Alternate scan pattern */ + 0,8,16,24,1,9,2,10,17,25,32,40,48,56,57,49, + 41,33,26,18,3,11,4,12,19,27,34,42,50,58,35,43, + 51,59,20,28,5,13,6,14,21,29,36,44,52,60,37,45, + 53,61,22,30,7,15,23,31,38,46,54,62,39,47,55,63 +}; + +/* count must be between 1 and 32 */ +static uint32_t get_bits(uint8_t *buffer, uint32_t count, uint32_t *bit_position) { + uint32_t byte_offset; + uint32_t bit_offset; + uint32_t bit_mask; + uint32_t bit_bite; + uint32_t result=0; + if (count == 0) return 0; + do { + byte_offset = *bit_position >> 3; /* Div 8 */ + bit_offset = 8 - (*bit_position & 0x7); /* Bits got 87654321 */ + bit_mask = ((1 << (bit_offset)) - 1); + bit_bite = bit_offset; + if (count < bit_offset) { + bit_mask ^= ((1 << (bit_offset-count)) - 1); + bit_bite = count; + } + /* + printf("Byte=0x%02x Bitmask=0x%04x byte_offset=%u bit_offset=%u bit_byte=%u count=%u\n",buffer[byte_offset], bit_mask, byte_offset, bit_offset, bit_bite,count); + */ + result = (result << bit_bite) | ((buffer[byte_offset] & bit_mask) >> (bit_offset-bit_bite)); + *bit_position+=bit_bite; + count-=bit_bite; + } while ((count > 0) && (byte_offset<50) ); + return result; +} + +static int32_t get_bits_signed(uint8_t *buffer, uint32_t count, uint32_t *bit_position) { + uint32_t value = get_bits(buffer, count, bit_position); + uint32_t sign_mask = (uint32_t)(-1 << (count - 1)); + if (value & sign_mask) + value |= sign_mask; /* sign-extend value */ + return (int32_t)value; +} + +void mpeg2_header_state_init (picture_t * picture) +{ + picture->scan = mpeg2_scan_norm; + picture->load_intra_quantizer_matrix = 1; + picture->load_non_intra_quantizer_matrix = 1; +} + +int mpeg2_header_sequence (picture_t * picture, uint8_t * buffer) +{ + int width, height; + int i; + + if ((buffer[6] & 0x20) != 0x20) + return 1; /* missing marker_bit */ + + height = (buffer[0] << 16) | (buffer[1] << 8) | buffer[2]; + + picture->display_width = width = (height >> 12); + picture->display_height = height = (height & 0xfff); + + width = (width + 15) & ~15; + height = (height + 15) & ~15; + + if ((width > 1920) || (height > 1152)) + return 1; /* size restrictions for MP@HL */ + + picture->coded_picture_width = width; + picture->coded_picture_height = height; + + /* this is not used by the decoder */ + picture->aspect_ratio_information = buffer[3] >> 4; + picture->frame_rate_code = buffer[3] & 15; + picture->bitrate = (buffer[4]<<10)|(buffer[5]<<2)|(buffer[6]>>6); + + if (buffer[7] & 2) { + for (i = 0; i < 64; i++) + picture->intra_quantizer_matrix[mpeg2_scan_norm[i]] = + (buffer[i+7] << 7) | (buffer[i+8] >> 1); + buffer += 64; + } else + for (i = 0; i < 64; i++) + picture->intra_quantizer_matrix[mpeg2_scan_norm[i]] = + default_intra_quantizer_matrix [i]; + + if (buffer[7] & 1) + for (i = 0; i < 64; i++) + picture->non_intra_quantizer_matrix[mpeg2_scan_norm[i]] = + buffer[i+8]; + else + for (i = 0; i < 64; i++) + picture->non_intra_quantizer_matrix[i] = 16; + picture->load_intra_quantizer_matrix = 1; + picture->load_non_intra_quantizer_matrix = 1; + /* MPEG1 - for testing only */ + picture->mpeg1 = 1; + picture->intra_dc_precision = 0; + picture->frame_pred_frame_dct = 1; + picture->q_scale_type = 0; + picture->concealment_motion_vectors = 0; + /* picture->alternate_scan = 0; */ + picture->picture_structure = FRAME_PICTURE; + /* picture->second_field = 0; */ + + return 0; +} + +static int sequence_extension (picture_t * picture, uint8_t * buffer) +{ + /* check chroma format, size extensions, marker bit */ + if (((buffer[1] & 0x07) != 0x02) || (buffer[2] & 0xe0) || + ((buffer[3] & 0x01) != 0x01)) + return 1; + + /* this is not used by the decoder */ + picture->progressive_sequence = (buffer[1] >> 3) & 1; + + picture->low_delay = buffer[5] & 0x80; + + if (!picture->progressive_sequence) + picture->coded_picture_height = + (picture->coded_picture_height + 31) & ~31; + + + /* printf ("libmpeg2: low_delay : %d\n", picture->low_delay); */ + +/* + printf ("libmpeg2: sequence extension+5 : %08x (%d)\n", + buffer[5], buffer[5] % 0x80); + */ + + picture->frame_rate_ext_n = buffer[5] & 0x31; + picture->frame_rate_ext_d = (buffer[5] >> 2) & 0x03; + + /* MPEG1 - for testing only */ + picture->mpeg1 = 0; + + return 0; +} + +static int quant_matrix_extension (picture_t * picture, uint8_t * buffer) +{ + int i; + + if (buffer[0] & 8) { + for (i = 0; i < 64; i++) + picture->intra_quantizer_matrix[mpeg2_scan_norm[i]] = + (buffer[i] << 5) | (buffer[i+1] >> 3); + buffer += 64; + } + + if (buffer[0] & 4) + for (i = 0; i < 64; i++) + picture->non_intra_quantizer_matrix[mpeg2_scan_norm[i]] = + (buffer[i] << 6) | (buffer[i+1] >> 2); + + return 0; +} + +static int picture_coding_extension (picture_t * picture, uint8_t * buffer) +{ + /* pre subtract 1 for use later in compute_motion_vector */ + picture->f_motion.f_code[0] = (buffer[0] & 15) - 1; + picture->f_motion.f_code[1] = (buffer[1] >> 4) - 1; + picture->b_motion.f_code[0] = (buffer[1] & 15) - 1; + picture->b_motion.f_code[1] = (buffer[2] >> 4) - 1; + + picture->intra_dc_precision = (buffer[2] >> 2) & 3; + picture->picture_structure = buffer[2] & 3; + picture->frame_pred_frame_dct = (buffer[3] >> 6) & 1; + picture->concealment_motion_vectors = (buffer[3] >> 5) & 1; + picture->q_scale_type = (buffer[3] >> 4) & 1; + picture->intra_vlc_format = (buffer[3] >> 3) & 1; + + if (buffer[3] & 4) /* alternate_scan */ + picture->scan = mpeg2_scan_alt; + else + picture->scan = mpeg2_scan_norm; + + /* these are not used by the decoder */ + picture->top_field_first = buffer[3] >> 7; + picture->repeat_first_field = (buffer[3] >> 1) & 1; + picture->progressive_frame = buffer[4] >> 7; + + return 0; +} + +static int sequence_display_extension (picture_t * picture, uint8_t * buffer) { + /* FIXME: implement. */ + uint32_t bit_position; + uint32_t padding; + + bit_position = 0; + padding = get_bits(buffer, 4, &bit_position); + picture->video_format = get_bits(buffer, 3, &bit_position); + picture->colour_description = get_bits(buffer, 1, &bit_position); + if(picture->colour_description) { + picture->colour_primatives = get_bits(buffer, 8, &bit_position); + picture->transfer_characteristics = get_bits(buffer, 8, &bit_position); + picture->matrix_coefficients = get_bits(buffer, 8, &bit_position); + } + picture->display_horizontal_size = get_bits(buffer, 14, &bit_position); + padding = get_bits(buffer, 1, &bit_position); + picture->display_vertical_size = get_bits(buffer, 14, &bit_position); + +#ifdef LOG_PAN_SCAN + printf("Sequence_display_extension\n"); + printf(" video_format: %u\n", picture->video_format); + printf(" colour_description: %u\n", picture->colour_description); + if(picture->colour_description) { + printf(" colour_primatives: %u\n", picture->colour_primatives); + printf(" transfer_characteristics %u\n", picture->transfer_characteristics); + printf(" matrix_coefficients %u\n", picture->matrix_coefficients); + } + printf(" display_horizontal_size %u\n", picture->display_horizontal_size); + printf(" display_vertical_size %u\n", picture->display_vertical_size); +#endif + + return 0; +} + +static int picture_display_extension (picture_t * picture, uint8_t * buffer) { + uint32_t bit_position; + uint32_t padding; + +#ifdef LOG_PAN_SCAN + printf ("libmpeg2: picture_display_extension\n"); +#endif + + bit_position = 0; + padding = get_bits(buffer, 4, &bit_position); + picture->frame_centre_horizontal_offset = get_bits_signed(buffer, 16, &bit_position); + padding = get_bits(buffer, 1, &bit_position); + picture->frame_centre_vertical_offset = get_bits_signed(buffer, 16, &bit_position); + padding = get_bits(buffer, 1, &bit_position); + +#ifdef LOG_PAN_SCAN + printf("Pan & Scan centre (x,y) = (%d, %d)\n", + picture->frame_centre_horizontal_offset, + picture->frame_centre_vertical_offset); +#endif + + return 0; +} + +int mpeg2_header_extension (picture_t * picture, uint8_t * buffer) +{ + switch (buffer[0] & 0xf0) { + case 0x00: /* reserved */ + return 0; + + case 0x10: /* sequence extension */ + return sequence_extension (picture, buffer); + + case 0x20: /* sequence display extension for Pan & Scan */ + return sequence_display_extension (picture, buffer); + + case 0x30: /* quant matrix extension */ + return quant_matrix_extension (picture, buffer); + + case 0x40: /* copyright extension */ + return 0; + + case 0x50: /* sequence scalable extension */ + return 0; + + case 0x60: /* reserved */ + return 0; + + case 0x70: /* picture display extension for Pan & Scan */ + return picture_display_extension (picture, buffer); + + case 0x80: /* picture coding extension */ + return picture_coding_extension (picture, buffer); + + case 0x90: /* picture spacial scalable extension */ + return 0; + + case 0xA0: /* picture temporal scalable extension */ + return 0; + + case 0xB0: /* camera parameters extension */ + return 0; + + case 0xC0: /* ITU-T extension */ + return 0; + + case 0xD0: /* reserved */ + return 0; + + case 0xE0: /* reserved */ + return 0; + + case 0xF0: /* reserved */ + return 0; + } + + return 0; +} + +int mpeg2_header_group_of_pictures (picture_t * picture, uint8_t * buffer) { + uint32_t bit_position; + uint32_t padding; + bit_position = 0; + + picture->drop_frame_flag = get_bits(buffer, 1, &bit_position); + picture->time_code_hours = get_bits(buffer, 5, &bit_position); + picture->time_code_minutes = get_bits(buffer, 6, &bit_position); + padding = get_bits(buffer, 1, &bit_position); + picture->time_code_seconds = get_bits(buffer, 6, &bit_position); + picture->time_code_pictures = get_bits(buffer, 6, &bit_position); + picture->closed_gop = get_bits(buffer, 1, &bit_position); + picture->broken_link = get_bits(buffer, 1, &bit_position); + +#ifdef LOG_PAN_SCAN + printf("Group of pictures\n"); + printf(" drop_frame_flag: %u\n", picture->drop_frame_flag); + printf(" time_code: HH:MM:SS:Pictures %02u:%02u:%02u:%02u\n", + picture->time_code_hours, + picture->time_code_minutes, + picture->time_code_seconds, + picture->time_code_pictures); + printf(" closed_gop: %u\n", picture->closed_gop); + printf(" bloken_link: %u\n", picture->broken_link); +#endif + + return 0; +} + +int mpeg2_header_picture (picture_t * picture, uint8_t * buffer) +{ + picture->picture_coding_type = (buffer [1] >> 3) & 7; + picture->vbv_delay = ((buffer[1] << 13) | (buffer[2] << 5) | + (buffer[3] >> 3)) & 0xffff; + + /* forward_f_code and backward_f_code - used in mpeg1 only */ + picture->f_motion.f_code[1] = (buffer[3] >> 2) & 1; + picture->f_motion.f_code[0] = + (((buffer[3] << 1) | (buffer[4] >> 7)) & 7) - 1; + picture->b_motion.f_code[1] = (buffer[4] >> 6) & 1; + picture->b_motion.f_code[0] = ((buffer[4] >> 3) & 7) - 1; + + /* move in header_process_picture_header */ + picture->second_field = + (picture->picture_structure != FRAME_PICTURE) && + !(picture->second_field); + + return 0; +} diff --git a/src/video_dec/libmpeg2/idct.c b/src/video_dec/libmpeg2/idct.c new file mode 100644 index 000000000..9f216db58 --- /dev/null +++ b/src/video_dec/libmpeg2/idct.c @@ -0,0 +1,348 @@ +/* + * idct.c + * Copyright (C) 2000-2002 Michel Lespinasse <walken@zoy.org> + * Copyright (C) 1999-2000 Aaron Holtzman <aholtzma@ess.engr.uvic.ca> + * + * Portions of this code are from the MPEG software simulation group + * idct implementation. This code will be replaced with a new + * implementation soon. + * + * This file is part of mpeg2dec, a free MPEG-2 video stream decoder. + * See http://libmpeg2.sourceforge.net/ for updates. + * + * mpeg2dec is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * mpeg2dec is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +/**********************************************************/ +/* inverse two dimensional DCT, Chen-Wang algorithm */ +/* (cf. IEEE ASSP-32, pp. 803-816, Aug. 1984) */ +/* 32-bit integer arithmetic (8 bit coefficients) */ +/* 11 mults, 29 adds per DCT */ +/* sE, 18.8.91 */ +/**********************************************************/ +/* coefficients extended to 12 bit for IEEE1180-1990 */ +/* compliance sE, 2.1.94 */ +/**********************************************************/ + +/* this code assumes >> to be a two's-complement arithmetic */ +/* right shift: (-2)>>1 == -1 , (-3)>>1 == -2 */ + +#include "config.h" + +#include <stdio.h> +#include <stdlib.h> +#include <inttypes.h> + +#include "mpeg2_internal.h" +#include <xine/xineutils.h> + +#define W1 2841 /* 2048*sqrt (2)*cos (1*pi/16) */ +#define W2 2676 /* 2048*sqrt (2)*cos (2*pi/16) */ +#define W3 2408 /* 2048*sqrt (2)*cos (3*pi/16) */ +#define W5 1609 /* 2048*sqrt (2)*cos (5*pi/16) */ +#define W6 1108 /* 2048*sqrt (2)*cos (6*pi/16) */ +#define W7 565 /* 2048*sqrt (2)*cos (7*pi/16) */ + +/* idct main entry points */ +void (* mpeg2_idct_copy) (int16_t * block, uint8_t * dest, int stride); +void (* mpeg2_idct_add) (int16_t * block, uint8_t * dest, int stride); +void (* mpeg2_idct) (int16_t * block); +void (* mpeg2_zero_block) (int16_t * block); + +static uint8_t clip_lut[1024]; +#define CLIP(i) ((clip_lut+384)[ (i)]) + +/* row (horizontal) IDCT + * + * 7 pi 1 + * dst[k] = sum c[l] * src[l] * cos ( -- * ( k + - ) * l ) + * l=0 8 2 + * + * where: c[0] = 128 + * c[1..7] = 128*sqrt (2) + */ + +static void inline idct_row (int16_t * block) +{ + int x0, x1, x2, x3, x4, x5, x6, x7, x8; + + x1 = block[4] << 11; + x2 = block[6]; + x3 = block[2]; + x4 = block[1]; + x5 = block[7]; + x6 = block[5]; + x7 = block[3]; + + /* shortcut */ + if (! (x1 | x2 | x3 | x4 | x5 | x6 | x7 )) { + block[0] = block[1] = block[2] = block[3] = block[4] = + block[5] = block[6] = block[7] = block[0]<<3; + return; + } + + x0 = (block[0] << 11) + 128; /* for proper rounding in the fourth stage */ + + /* first stage */ + x8 = W7 * (x4 + x5); + x4 = x8 + (W1 - W7) * x4; + x5 = x8 - (W1 + W7) * x5; + x8 = W3 * (x6 + x7); + x6 = x8 - (W3 - W5) * x6; + x7 = x8 - (W3 + W5) * x7; + + /* second stage */ + x8 = x0 + x1; + x0 -= x1; + x1 = W6 * (x3 + x2); + x2 = x1 - (W2 + W6) * x2; + x3 = x1 + (W2 - W6) * x3; + x1 = x4 + x6; + x4 -= x6; + x6 = x5 + x7; + x5 -= x7; + + /* third stage */ + x7 = x8 + x3; + x8 -= x3; + x3 = x0 + x2; + x0 -= x2; + x2 = (181 * (x4 + x5) + 128) >> 8; + x4 = (181 * (x4 - x5) + 128) >> 8; + + /* fourth stage */ + block[0] = (x7 + x1) >> 8; + block[1] = (x3 + x2) >> 8; + block[2] = (x0 + x4) >> 8; + block[3] = (x8 + x6) >> 8; + block[4] = (x8 - x6) >> 8; + block[5] = (x0 - x4) >> 8; + block[6] = (x3 - x2) >> 8; + block[7] = (x7 - x1) >> 8; +} + +/* column (vertical) IDCT + * + * 7 pi 1 + * dst[8*k] = sum c[l] * src[8*l] * cos ( -- * ( k + - ) * l ) + * l=0 8 2 + * + * where: c[0] = 1/1024 + * c[1..7] = (1/1024)*sqrt (2) + */ + +static void inline idct_col (int16_t *block) +{ + int x0, x1, x2, x3, x4, x5, x6, x7, x8; + + /* shortcut */ + x1 = block [8*4] << 8; + x2 = block [8*6]; + x3 = block [8*2]; + x4 = block [8*1]; + x5 = block [8*7]; + x6 = block [8*5]; + x7 = block [8*3]; + +#if 0 + if (! (x1 | x2 | x3 | x4 | x5 | x6 | x7 )) { + block[8*0] = block[8*1] = block[8*2] = block[8*3] = block[8*4] = + block[8*5] = block[8*6] = block[8*7] = (block[8*0] + 32) >> 6; + return; + } +#endif + + x0 = (block[8*0] << 8) + 8192; + + /* first stage */ + x8 = W7 * (x4 + x5) + 4; + x4 = (x8 + (W1 - W7) * x4) >> 3; + x5 = (x8 - (W1 + W7) * x5) >> 3; + x8 = W3 * (x6 + x7) + 4; + x6 = (x8 - (W3 - W5) * x6) >> 3; + x7 = (x8 - (W3 + W5) * x7) >> 3; + + /* second stage */ + x8 = x0 + x1; + x0 -= x1; + x1 = W6 * (x3 + x2) + 4; + x2 = (x1 - (W2 + W6) * x2) >> 3; + x3 = (x1 + (W2 - W6) * x3) >> 3; + x1 = x4 + x6; + x4 -= x6; + x6 = x5 + x7; + x5 -= x7; + + /* third stage */ + x7 = x8 + x3; + x8 -= x3; + x3 = x0 + x2; + x0 -= x2; + x2 = (181 * (x4 + x5) + 128) >> 8; + x4 = (181 * (x4 - x5) + 128) >> 8; + + /* fourth stage */ + block[8*0] = (x7 + x1) >> 14; + block[8*1] = (x3 + x2) >> 14; + block[8*2] = (x0 + x4) >> 14; + block[8*3] = (x8 + x6) >> 14; + block[8*4] = (x8 - x6) >> 14; + block[8*5] = (x0 - x4) >> 14; + block[8*6] = (x3 - x2) >> 14; + block[8*7] = (x7 - x1) >> 14; +} + +static void mpeg2_idct_copy_c (int16_t * block, uint8_t * dest, int stride) +{ + int i; + + for (i = 0; i < 8; i++) + idct_row (block + 8 * i); + + for (i = 0; i < 8; i++) + idct_col (block + i); + + i = 8; + do { + dest[0] = CLIP (block[0]); + dest[1] = CLIP (block[1]); + dest[2] = CLIP (block[2]); + dest[3] = CLIP (block[3]); + dest[4] = CLIP (block[4]); + dest[5] = CLIP (block[5]); + dest[6] = CLIP (block[6]); + dest[7] = CLIP (block[7]); + + block[0] = 0; block[1] = 0; block[2] = 0; block[3] = 0; + block[4] = 0; block[5] = 0; block[6] = 0; block[7] = 0; + + dest += stride; + block += 8; + } while (--i); +} + +static void mpeg2_idct_add_c (int16_t * block, uint8_t * dest, int stride) +{ + int i; + + for (i = 0; i < 8; i++) + idct_row (block + 8 * i); + + for (i = 0; i < 8; i++) + idct_col (block + i); + + i = 8; + do { + dest[0] = CLIP (block[0] + dest[0]); + dest[1] = CLIP (block[1] + dest[1]); + dest[2] = CLIP (block[2] + dest[2]); + dest[3] = CLIP (block[3] + dest[3]); + dest[4] = CLIP (block[4] + dest[4]); + dest[5] = CLIP (block[5] + dest[5]); + dest[6] = CLIP (block[6] + dest[6]); + dest[7] = CLIP (block[7] + dest[7]); + + block[0] = 0; block[1] = 0; block[2] = 0; block[3] = 0; + block[4] = 0; block[5] = 0; block[6] = 0; block[7] = 0; + + dest += stride; + block += 8; + } while (--i); +} + +static void mpeg2_idct_c (int16_t * block) +{ + int i; + + for (i = 0; i < 8; i++) + idct_row (block + 8 * i); + + for (i = 0; i < 8; i++) + idct_col (block + i); +} + +static void mpeg2_zero_block_c (int16_t * wblock) +{ + memset( wblock, 0, sizeof(int16_t) * 64 ); +} + +void mpeg2_idct_init (uint32_t mm_accel) +{ + mpeg2_zero_block = mpeg2_zero_block_c; + +#if defined(ARCH_X86) || defined(ARCH_X86_64) + if (mm_accel & MM_ACCEL_X86_MMXEXT) { +#ifdef LOG + fprintf (stderr, "Using MMXEXT for IDCT transform\n"); +#endif + mpeg2_idct_copy = mpeg2_idct_copy_mmxext; + mpeg2_idct_add = mpeg2_idct_add_mmxext; + mpeg2_idct = mpeg2_idct_mmxext; + mpeg2_zero_block = mpeg2_zero_block_mmx; + mpeg2_idct_mmx_init (); + } else if (mm_accel & MM_ACCEL_X86_MMX) { +#ifdef LOG + fprintf (stderr, "Using MMX for IDCT transform\n"); +#endif + mpeg2_idct_copy = mpeg2_idct_copy_mmx; + mpeg2_idct_add = mpeg2_idct_add_mmx; + mpeg2_idct = mpeg2_idct_mmx; + mpeg2_zero_block = mpeg2_zero_block_mmx; + mpeg2_idct_mmx_init (); + } else +#endif +#if defined (ARCH_PPC) && defined (ENABLE_ALTIVEC) + if (mm_accel & MM_ACCEL_PPC_ALTIVEC) { +#ifdef LOG + fprintf (stderr, "Using altivec for IDCT transform\n"); +#endif + mpeg2_idct_copy = mpeg2_idct_copy_altivec; + mpeg2_idct_add = mpeg2_idct_add_altivec; + mpeg2_idct_altivec_init (); + mpeg2_idct = mpeg2_idct_c; + } else +#endif +#ifdef LIBMPEG2_MLIB + if (mm_accel & MM_ACCEL_MLIB) { + char * env_var; + + env_var = getenv ("MLIB_NON_IEEE"); + + mpeg2_idct = mpeg2_idct_mlib; + if (env_var == NULL) { +#ifdef LOG + fprintf (stderr, "Using mlib for IDCT transform\n"); +#endif + mpeg2_idct_add = mpeg2_idct_add_mlib; + } else { + fprintf (stderr, "Using non-IEEE mlib for IDCT transform\n"); + mpeg2_idct_add = mpeg2_idct_add_mlib_non_ieee; + } + mpeg2_idct_copy = mpeg2_idct_copy_mlib_non_ieee; + } else +#endif + { + int i; + +#ifdef LOG + fprintf (stderr, "No accelerated IDCT transform found\n"); +#endif + mpeg2_idct_copy = mpeg2_idct_copy_c; + mpeg2_idct_add = mpeg2_idct_add_c; + mpeg2_idct = mpeg2_idct_c; + for (i = -384; i < 640; i++) + clip_lut[i+384] = (i < 0) ? 0 : ((i > 255) ? 255 : i); + } +} diff --git a/src/video_dec/libmpeg2/idct_altivec.c b/src/video_dec/libmpeg2/idct_altivec.c new file mode 100644 index 000000000..de396560b --- /dev/null +++ b/src/video_dec/libmpeg2/idct_altivec.c @@ -0,0 +1,233 @@ +/* + * idct_altivec.c + * Copyright (C) 2000-2002 Michel Lespinasse <walken@zoy.org> + * Copyright (C) 1999-2000 Aaron Holtzman <aholtzma@ess.engr.uvic.ca> + * + * This file is part of mpeg2dec, a free MPEG-2 video stream decoder. + * See http://libmpeg2.sourceforge.net/ for updates. + * + * mpeg2dec is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * mpeg2dec is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +#include "config.h" + +#if defined (ARCH_PPC) && defined (ENABLE_ALTIVEC) + +#include <altivec.h> + +#include <inttypes.h> + +#include "mpeg2_internal.h" +#include <xine/xineutils.h> + +#define vector_s16_t vector signed short +#define vector_u16_t vector unsigned short +#define vector_s8_t vector signed char +#define vector_u8_t vector unsigned char +#define vector_s32_t vector signed int +#define vector_u32_t vector unsigned int + +#define IDCT_HALF \ + /* 1st stage */ \ + t1 = vec_mradds (a1, vx7, vx1 ); \ + t8 = vec_mradds (a1, vx1, vec_subs (zero, vx7)); \ + t7 = vec_mradds (a2, vx5, vx3); \ + t3 = vec_mradds (ma2, vx3, vx5); \ + \ + /* 2nd stage */ \ + t5 = vec_adds (vx0, vx4); \ + t0 = vec_subs (vx0, vx4); \ + t2 = vec_mradds (a0, vx6, vx2); \ + t4 = vec_mradds (a0, vx2, vec_subs (zero,vx6)); \ + t6 = vec_adds (t8, t3); \ + t3 = vec_subs (t8, t3); \ + t8 = vec_subs (t1, t7); \ + t1 = vec_adds (t1, t7); \ + \ + /* 3rd stage */ \ + t7 = vec_adds (t5, t2); \ + t2 = vec_subs (t5, t2); \ + t5 = vec_adds (t0, t4); \ + t0 = vec_subs (t0, t4); \ + t4 = vec_subs (t8, t3); \ + t3 = vec_adds (t8, t3); \ + \ + /* 4th stage */ \ + vy0 = vec_adds (t7, t1); \ + vy7 = vec_subs (t7, t1); \ + vy1 = vec_mradds (c4, t3, t5); \ + vy6 = vec_mradds (mc4, t3, t5); \ + vy2 = vec_mradds (c4, t4, t0); \ + vy5 = vec_mradds (mc4, t4, t0); \ + vy3 = vec_adds (t2, t6); \ + vy4 = vec_subs (t2, t6); + +#define IDCT \ + vector_s16_t vx0, vx1, vx2, vx3, vx4, vx5, vx6, vx7; \ + vector_s16_t vy0, vy1, vy2, vy3, vy4, vy5, vy6, vy7; \ + vector_s16_t a0, a1, a2, ma2, c4, mc4, zero, bias; \ + vector_s16_t t0, t1, t2, t3, t4, t5, t6, t7, t8; \ + vector_u16_t shift; \ + \ + c4 = vec_splat (constants[0], 0); \ + a0 = vec_splat (constants[0], 1); \ + a1 = vec_splat (constants[0], 2); \ + a2 = vec_splat (constants[0], 3); \ + mc4 = vec_splat (constants[0], 4); \ + ma2 = vec_splat (constants[0], 5); \ + bias = (vector_s16_t)vec_splat ((vector_s32_t)constants[0], 3); \ + \ + zero = vec_splat_s16 (0); \ + shift = vec_splat_u16 (4); \ + \ + vx0 = vec_mradds (vec_sl (block[0], shift), constants[1], zero); \ + vx1 = vec_mradds (vec_sl (block[1], shift), constants[2], zero); \ + vx2 = vec_mradds (vec_sl (block[2], shift), constants[3], zero); \ + vx3 = vec_mradds (vec_sl (block[3], shift), constants[4], zero); \ + vx4 = vec_mradds (vec_sl (block[4], shift), constants[1], zero); \ + vx5 = vec_mradds (vec_sl (block[5], shift), constants[4], zero); \ + vx6 = vec_mradds (vec_sl (block[6], shift), constants[3], zero); \ + vx7 = vec_mradds (vec_sl (block[7], shift), constants[2], zero); \ + \ + IDCT_HALF \ + \ + vx0 = vec_mergeh (vy0, vy4); \ + vx1 = vec_mergel (vy0, vy4); \ + vx2 = vec_mergeh (vy1, vy5); \ + vx3 = vec_mergel (vy1, vy5); \ + vx4 = vec_mergeh (vy2, vy6); \ + vx5 = vec_mergel (vy2, vy6); \ + vx6 = vec_mergeh (vy3, vy7); \ + vx7 = vec_mergel (vy3, vy7); \ + \ + vy0 = vec_mergeh (vx0, vx4); \ + vy1 = vec_mergel (vx0, vx4); \ + vy2 = vec_mergeh (vx1, vx5); \ + vy3 = vec_mergel (vx1, vx5); \ + vy4 = vec_mergeh (vx2, vx6); \ + vy5 = vec_mergel (vx2, vx6); \ + vy6 = vec_mergeh (vx3, vx7); \ + vy7 = vec_mergel (vx3, vx7); \ + \ + vx0 = vec_adds (vec_mergeh (vy0, vy4), bias); \ + vx1 = vec_mergel (vy0, vy4); \ + vx2 = vec_mergeh (vy1, vy5); \ + vx3 = vec_mergel (vy1, vy5); \ + vx4 = vec_mergeh (vy2, vy6); \ + vx5 = vec_mergel (vy2, vy6); \ + vx6 = vec_mergeh (vy3, vy7); \ + vx7 = vec_mergel (vy3, vy7); \ + \ + IDCT_HALF \ + \ + shift = vec_splat_u16 (6); \ + vx0 = vec_sra (vy0, shift); \ + vx1 = vec_sra (vy1, shift); \ + vx2 = vec_sra (vy2, shift); \ + vx3 = vec_sra (vy3, shift); \ + vx4 = vec_sra (vy4, shift); \ + vx5 = vec_sra (vy5, shift); \ + vx6 = vec_sra (vy6, shift); \ + vx7 = vec_sra (vy7, shift); + +#if defined( __APPLE_CC__ ) && defined( __APPLE_ALTIVEC__ ) /* apple */ +#define VEC_S16(a,b,c,d,e,f,g,h) (vector_s16_t) (a, b, c, d, e, f, g, h) +#else /* gnu */ +#define VEC_S16(a,b,c,d,e,f,g,h) (vector_s16_t) {a, b, c, d, e, f, g, h} +#endif + +static vector_s16_t constants[5] = { + VEC_S16(23170, 13573, 6518, 21895, -23170, -21895, 32, 31), + VEC_S16(16384, 22725, 21407, 19266, 16384, 19266, 21407, 22725), + VEC_S16(22725, 31521, 29692, 26722, 22725, 26722, 29692, 31521), + VEC_S16(21407, 29692, 27969, 25172, 21407, 25172, 27969, 29692), + VEC_S16(19266, 26722, 25172, 22654, 19266, 22654, 25172, 26722) +}; + +void mpeg2_idct_copy_altivec (vector_s16_t * block, unsigned char * dest, + int stride) +{ + vector_u8_t tmp; + + IDCT + +#define COPY(dest,src) \ + tmp = vec_packsu (src, src); \ + vec_ste ((vector_u32_t)tmp, 0, (unsigned int *)dest); \ + vec_ste ((vector_u32_t)tmp, 4, (unsigned int *)dest); + + COPY (dest, vx0) dest += stride; + COPY (dest, vx1) dest += stride; + COPY (dest, vx2) dest += stride; + COPY (dest, vx3) dest += stride; + COPY (dest, vx4) dest += stride; + COPY (dest, vx5) dest += stride; + COPY (dest, vx6) dest += stride; + COPY (dest, vx7) + memset (block, 0, 64 * sizeof (signed short)); +} + +void mpeg2_idct_add_altivec (vector_s16_t * block, unsigned char * dest, + int stride) +{ + vector_u8_t tmp; + vector_s16_t tmp2, tmp3; + vector_u8_t perm0; + vector_u8_t perm1; + vector_u8_t p0, p1, p; + + IDCT + + p0 = vec_lvsl (0, dest); + p1 = vec_lvsl (stride, dest); + p = vec_splat_u8 (-1); + perm0 = vec_mergeh (p, p0); + perm1 = vec_mergeh (p, p1); + +#define ADD(dest,src,perm) \ + /* *(uint64_t *)&tmp = *(uint64_t *)dest; */ \ + tmp = vec_ld (0, dest); \ + tmp2 = (vector_s16_t)vec_perm (tmp, (vector_u8_t)zero, perm); \ + tmp3 = vec_adds (tmp2, src); \ + tmp = vec_packsu (tmp3, tmp3); \ + vec_ste ((vector_u32_t)tmp, 0, (unsigned int *)dest); \ + vec_ste ((vector_u32_t)tmp, 4, (unsigned int *)dest); + + ADD (dest, vx0, perm0) dest += stride; + ADD (dest, vx1, perm1) dest += stride; + ADD (dest, vx2, perm0) dest += stride; + ADD (dest, vx3, perm1) dest += stride; + ADD (dest, vx4, perm0) dest += stride; + ADD (dest, vx5, perm1) dest += stride; + ADD (dest, vx6, perm0) dest += stride; + ADD (dest, vx7, perm1) + memset (block, 0, 64 * sizeof (signed short)); +} + +void mpeg2_idct_altivec_init (void) +{ + int i, j; + + /* the altivec idct uses a transposed input, so we patch scan tables */ + for (i = 0; i < 64; i++) { + j = mpeg2_scan_norm[i]; + mpeg2_scan_norm[i] = (j >> 3) | ((j & 7) << 3); + j = mpeg2_scan_alt[i]; + mpeg2_scan_alt[i] = (j >> 3) | ((j & 7) << 3); + } +} + +#endif /* ARCH_PPC && ENABLED_ALTIVEC */ + diff --git a/src/video_dec/libmpeg2/idct_mlib.c b/src/video_dec/libmpeg2/idct_mlib.c new file mode 100644 index 000000000..e573c9790 --- /dev/null +++ b/src/video_dec/libmpeg2/idct_mlib.c @@ -0,0 +1,62 @@ +/* + * idct_mlib.c + * Copyright (C) 1999-2002 HÃ¥kan Hjort <d95hjort@dtek.chalmers.se> + * + * This file is part of mpeg2dec, a free MPEG-2 video stream decoder. + * See http://libmpeg2.sourceforge.net/ for updates. + * + * mpeg2dec is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * mpeg2dec is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +#include "config.h" + +#ifdef LIBMPEG2_MLIB + +#include <mlib_types.h> +#include <mlib_status.h> +#include <mlib_sys.h> +#include <mlib_video.h> +#include <string.h> +#include <inttypes.h> + +#include "mpeg2_internal.h" + +void mpeg2_idct_add_mlib (int16_t * block, uint8_t * dest, int stride) +{ + mlib_VideoIDCT_IEEE_S16_S16 (block, block); + mlib_VideoAddBlock_U8_S16 (dest, block, stride); + memset (block, 0, 64 * sizeof (uint16_t)); +} + +void mpeg2_idct_copy_mlib_non_ieee (int16_t * block, uint8_t * dest, + int stride) +{ + mlib_VideoIDCT8x8_U8_S16 (dest, block, stride); + memset (block, 0, 64 * sizeof (uint16_t)); +} + +void mpeg2_idct_add_mlib_non_ieee (int16_t * block, uint8_t * dest, int stride) +{ + mlib_VideoIDCT8x8_S16_S16 (block, block); + mlib_VideoAddBlock_U8_S16 (dest, block, stride); + memset (block, 0, 64 * sizeof (uint16_t)); +} + +void mpeg2_idct_mlib (int16_t * block) +{ + mlib_VideoIDCT_IEEE_S16_S16 (block, block); +} + +#endif diff --git a/src/video_dec/libmpeg2/idct_mlib.h b/src/video_dec/libmpeg2/idct_mlib.h new file mode 100644 index 000000000..1fb0787dd --- /dev/null +++ b/src/video_dec/libmpeg2/idct_mlib.h @@ -0,0 +1,25 @@ +/* + * idct_mlib.h + * + * Copyright (C) 1999, HÃ¥kan Hjort <d95hjort@dtek.chalmers.se> + * + * This file is part of mpeg2dec, a free MPEG-2 video stream decoder. + * + * mpeg2dec is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2, or (at your option) + * any later version. + * + * mpeg2dec is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with GNU Make; see the file COPYING. If not, write to + * the Free Software Foundation, + * + */ + +void idct_block_copy_mlib (int16_t * block, uint8_t * dest, int stride); +void idct_block_add_mlib (int16_t * block, uint8_t * dest, int stride); diff --git a/src/video_dec/libmpeg2/idct_mmx.c b/src/video_dec/libmpeg2/idct_mmx.c new file mode 100644 index 000000000..92ae365b4 --- /dev/null +++ b/src/video_dec/libmpeg2/idct_mmx.c @@ -0,0 +1,741 @@ +/* + * idct_mmx.c + * Copyright (C) 2000-2002 Michel Lespinasse <walken@zoy.org> + * Copyright (C) 1999-2000 Aaron Holtzman <aholtzma@ess.engr.uvic.ca> + * + * This file is part of mpeg2dec, a free MPEG-2 video stream decoder. + * See http://libmpeg2.sourceforge.net/ for updates. + * + * mpeg2dec is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * mpeg2dec is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +#include "config.h" + +#if defined(ARCH_X86) || defined(ARCH_X86_64) + +#include <inttypes.h> + +#include "mpeg2_internal.h" +#include <xine/xineutils.h> +#include "xine_mmx.h" + +#define ROW_SHIFT 11 +#define COL_SHIFT 6 + +#define round(bias) ((int)(((bias)+0.5) * (1<<ROW_SHIFT))) +#define rounder(bias) {round (bias), round (bias)} + + +#if 0 +/* C row IDCT - its just here to document the MMXEXT and MMX versions */ +static inline void idct_row (int16_t * row, int offset, + int16_t * table, int32_t * rounder) +{ + int C1, C2, C3, C4, C5, C6, C7; + int a0, a1, a2, a3, b0, b1, b2, b3; + + row += offset; + + C1 = table[1]; + C2 = table[2]; + C3 = table[3]; + C4 = table[4]; + C5 = table[5]; + C6 = table[6]; + C7 = table[7]; + + a0 = C4*row[0] + C2*row[2] + C4*row[4] + C6*row[6] + *rounder; + a1 = C4*row[0] + C6*row[2] - C4*row[4] - C2*row[6] + *rounder; + a2 = C4*row[0] - C6*row[2] - C4*row[4] + C2*row[6] + *rounder; + a3 = C4*row[0] - C2*row[2] + C4*row[4] - C6*row[6] + *rounder; + + b0 = C1*row[1] + C3*row[3] + C5*row[5] + C7*row[7]; + b1 = C3*row[1] - C7*row[3] - C1*row[5] - C5*row[7]; + b2 = C5*row[1] - C1*row[3] + C7*row[5] + C3*row[7]; + b3 = C7*row[1] - C5*row[3] + C3*row[5] - C1*row[7]; + + row[0] = (a0 + b0) >> ROW_SHIFT; + row[1] = (a1 + b1) >> ROW_SHIFT; + row[2] = (a2 + b2) >> ROW_SHIFT; + row[3] = (a3 + b3) >> ROW_SHIFT; + row[4] = (a3 - b3) >> ROW_SHIFT; + row[5] = (a2 - b2) >> ROW_SHIFT; + row[6] = (a1 - b1) >> ROW_SHIFT; + row[7] = (a0 - b0) >> ROW_SHIFT; +} +#endif + + +/* MMXEXT row IDCT */ + +#define mmxext_table(c1,c2,c3,c4,c5,c6,c7) { c4, c2, -c4, -c2, \ + c4, c6, c4, c6, \ + c1, c3, -c1, -c5, \ + c5, c7, c3, -c7, \ + c4, -c6, c4, -c6, \ + -c4, c2, c4, -c2, \ + c5, -c1, c3, -c1, \ + c7, c3, c7, -c5 } + +static inline void mmxext_row_head (int16_t * row, int offset, int16_t * table) +{ + movq_m2r (*(row+offset), mm2); // mm2 = x6 x4 x2 x0 + + movq_m2r (*(row+offset+4), mm5); // mm5 = x7 x5 x3 x1 + movq_r2r (mm2, mm0); // mm0 = x6 x4 x2 x0 + + movq_m2r (*table, mm3); // mm3 = -C2 -C4 C2 C4 + movq_r2r (mm5, mm6); // mm6 = x7 x5 x3 x1 + + movq_m2r (*(table+4), mm4); // mm4 = C6 C4 C6 C4 + pmaddwd_r2r (mm0, mm3); // mm3 = -C4*x4-C2*x6 C4*x0+C2*x2 + + pshufw_r2r (mm2, mm2, 0x4e); // mm2 = x2 x0 x6 x4 +} + +static inline void mmxext_row (int16_t * table, int32_t * rounder) +{ + movq_m2r (*(table+8), mm1); // mm1 = -C5 -C1 C3 C1 + pmaddwd_r2r (mm2, mm4); // mm4 = C4*x0+C6*x2 C4*x4+C6*x6 + + pmaddwd_m2r (*(table+16), mm0); // mm0 = C4*x4-C6*x6 C4*x0-C6*x2 + pshufw_r2r (mm6, mm6, 0x4e); // mm6 = x3 x1 x7 x5 + + movq_m2r (*(table+12), mm7); // mm7 = -C7 C3 C7 C5 + pmaddwd_r2r (mm5, mm1); // mm1 = -C1*x5-C5*x7 C1*x1+C3*x3 + + paddd_m2r (*rounder, mm3); // mm3 += rounder + pmaddwd_r2r (mm6, mm7); // mm7 = C3*x1-C7*x3 C5*x5+C7*x7 + + pmaddwd_m2r (*(table+20), mm2); // mm2 = C4*x0-C2*x2 -C4*x4+C2*x6 + paddd_r2r (mm4, mm3); // mm3 = a1 a0 + rounder + + pmaddwd_m2r (*(table+24), mm5); // mm5 = C3*x5-C1*x7 C5*x1-C1*x3 + movq_r2r (mm3, mm4); // mm4 = a1 a0 + rounder + + pmaddwd_m2r (*(table+28), mm6); // mm6 = C7*x1-C5*x3 C7*x5+C3*x7 + paddd_r2r (mm7, mm1); // mm1 = b1 b0 + + paddd_m2r (*rounder, mm0); // mm0 += rounder + psubd_r2r (mm1, mm3); // mm3 = a1-b1 a0-b0 + rounder + + psrad_i2r (ROW_SHIFT, mm3); // mm3 = y6 y7 + paddd_r2r (mm4, mm1); // mm1 = a1+b1 a0+b0 + rounder + + paddd_r2r (mm2, mm0); // mm0 = a3 a2 + rounder + psrad_i2r (ROW_SHIFT, mm1); // mm1 = y1 y0 + + paddd_r2r (mm6, mm5); // mm5 = b3 b2 + movq_r2r (mm0, mm4); // mm4 = a3 a2 + rounder + + paddd_r2r (mm5, mm0); // mm0 = a3+b3 a2+b2 + rounder + psubd_r2r (mm5, mm4); // mm4 = a3-b3 a2-b2 + rounder +} + +static inline void mmxext_row_tail (int16_t * row, int store) +{ + psrad_i2r (ROW_SHIFT, mm0); // mm0 = y3 y2 + + psrad_i2r (ROW_SHIFT, mm4); // mm4 = y4 y5 + + packssdw_r2r (mm0, mm1); // mm1 = y3 y2 y1 y0 + + packssdw_r2r (mm3, mm4); // mm4 = y6 y7 y4 y5 + + movq_r2m (mm1, *(row+store)); // save y3 y2 y1 y0 + pshufw_r2r (mm4, mm4, 0xb1); // mm4 = y7 y6 y5 y4 + + /* slot */ + + movq_r2m (mm4, *(row+store+4)); // save y7 y6 y5 y4 +} + +static inline void mmxext_row_mid (int16_t * row, int store, + int offset, int16_t * table) +{ + movq_m2r (*(row+offset), mm2); // mm2 = x6 x4 x2 x0 + psrad_i2r (ROW_SHIFT, mm0); // mm0 = y3 y2 + + movq_m2r (*(row+offset+4), mm5); // mm5 = x7 x5 x3 x1 + psrad_i2r (ROW_SHIFT, mm4); // mm4 = y4 y5 + + packssdw_r2r (mm0, mm1); // mm1 = y3 y2 y1 y0 + movq_r2r (mm5, mm6); // mm6 = x7 x5 x3 x1 + + packssdw_r2r (mm3, mm4); // mm4 = y6 y7 y4 y5 + movq_r2r (mm2, mm0); // mm0 = x6 x4 x2 x0 + + movq_r2m (mm1, *(row+store)); // save y3 y2 y1 y0 + pshufw_r2r (mm4, mm4, 0xb1); // mm4 = y7 y6 y5 y4 + + movq_m2r (*table, mm3); // mm3 = -C2 -C4 C2 C4 + movq_r2m (mm4, *(row+store+4)); // save y7 y6 y5 y4 + + pmaddwd_r2r (mm0, mm3); // mm3 = -C4*x4-C2*x6 C4*x0+C2*x2 + + movq_m2r (*(table+4), mm4); // mm4 = C6 C4 C6 C4 + pshufw_r2r (mm2, mm2, 0x4e); // mm2 = x2 x0 x6 x4 +} + + +/* MMX row IDCT */ + +#define mmx_table(c1,c2,c3,c4,c5,c6,c7) { c4, c2, c4, c6, \ + c4, c6, -c4, -c2, \ + c1, c3, c3, -c7, \ + c5, c7, -c1, -c5, \ + c4, -c6, c4, -c2, \ + -c4, c2, c4, -c6, \ + c5, -c1, c7, -c5, \ + c7, c3, c3, -c1 } + +static inline void mmx_row_head (int16_t * row, int offset, int16_t * table) +{ + movq_m2r (*(row+offset), mm2); // mm2 = x6 x4 x2 x0 + + movq_m2r (*(row+offset+4), mm5); // mm5 = x7 x5 x3 x1 + movq_r2r (mm2, mm0); // mm0 = x6 x4 x2 x0 + + movq_m2r (*table, mm3); // mm3 = C6 C4 C2 C4 + movq_r2r (mm5, mm6); // mm6 = x7 x5 x3 x1 + + punpckldq_r2r (mm0, mm0); // mm0 = x2 x0 x2 x0 + + movq_m2r (*(table+4), mm4); // mm4 = -C2 -C4 C6 C4 + pmaddwd_r2r (mm0, mm3); // mm3 = C4*x0+C6*x2 C4*x0+C2*x2 + + movq_m2r (*(table+8), mm1); // mm1 = -C7 C3 C3 C1 + punpckhdq_r2r (mm2, mm2); // mm2 = x6 x4 x6 x4 +} + +static inline void mmx_row (int16_t * table, int32_t * rounder) +{ + pmaddwd_r2r (mm2, mm4); // mm4 = -C4*x4-C2*x6 C4*x4+C6*x6 + punpckldq_r2r (mm5, mm5); // mm5 = x3 x1 x3 x1 + + pmaddwd_m2r (*(table+16), mm0); // mm0 = C4*x0-C2*x2 C4*x0-C6*x2 + punpckhdq_r2r (mm6, mm6); // mm6 = x7 x5 x7 x5 + + movq_m2r (*(table+12), mm7); // mm7 = -C5 -C1 C7 C5 + pmaddwd_r2r (mm5, mm1); // mm1 = C3*x1-C7*x3 C1*x1+C3*x3 + + paddd_m2r (*rounder, mm3); // mm3 += rounder + pmaddwd_r2r (mm6, mm7); // mm7 = -C1*x5-C5*x7 C5*x5+C7*x7 + + pmaddwd_m2r (*(table+20), mm2); // mm2 = C4*x4-C6*x6 -C4*x4+C2*x6 + paddd_r2r (mm4, mm3); // mm3 = a1 a0 + rounder + + pmaddwd_m2r (*(table+24), mm5); // mm5 = C7*x1-C5*x3 C5*x1-C1*x3 + movq_r2r (mm3, mm4); // mm4 = a1 a0 + rounder + + pmaddwd_m2r (*(table+28), mm6); // mm6 = C3*x5-C1*x7 C7*x5+C3*x7 + paddd_r2r (mm7, mm1); // mm1 = b1 b0 + + paddd_m2r (*rounder, mm0); // mm0 += rounder + psubd_r2r (mm1, mm3); // mm3 = a1-b1 a0-b0 + rounder + + psrad_i2r (ROW_SHIFT, mm3); // mm3 = y6 y7 + paddd_r2r (mm4, mm1); // mm1 = a1+b1 a0+b0 + rounder + + paddd_r2r (mm2, mm0); // mm0 = a3 a2 + rounder + psrad_i2r (ROW_SHIFT, mm1); // mm1 = y1 y0 + + paddd_r2r (mm6, mm5); // mm5 = b3 b2 + movq_r2r (mm0, mm7); // mm7 = a3 a2 + rounder + + paddd_r2r (mm5, mm0); // mm0 = a3+b3 a2+b2 + rounder + psubd_r2r (mm5, mm7); // mm7 = a3-b3 a2-b2 + rounder +} + +static inline void mmx_row_tail (int16_t * row, int store) +{ + psrad_i2r (ROW_SHIFT, mm0); // mm0 = y3 y2 + + psrad_i2r (ROW_SHIFT, mm7); // mm7 = y4 y5 + + packssdw_r2r (mm0, mm1); // mm1 = y3 y2 y1 y0 + + packssdw_r2r (mm3, mm7); // mm7 = y6 y7 y4 y5 + + movq_r2m (mm1, *(row+store)); // save y3 y2 y1 y0 + movq_r2r (mm7, mm4); // mm4 = y6 y7 y4 y5 + + pslld_i2r (16, mm7); // mm7 = y7 0 y5 0 + + psrld_i2r (16, mm4); // mm4 = 0 y6 0 y4 + + por_r2r (mm4, mm7); // mm7 = y7 y6 y5 y4 + + /* slot */ + + movq_r2m (mm7, *(row+store+4)); // save y7 y6 y5 y4 +} + +static inline void mmx_row_mid (int16_t * row, int store, + int offset, int16_t * table) +{ + movq_m2r (*(row+offset), mm2); // mm2 = x6 x4 x2 x0 + psrad_i2r (ROW_SHIFT, mm0); // mm0 = y3 y2 + + movq_m2r (*(row+offset+4), mm5); // mm5 = x7 x5 x3 x1 + psrad_i2r (ROW_SHIFT, mm7); // mm7 = y4 y5 + + packssdw_r2r (mm0, mm1); // mm1 = y3 y2 y1 y0 + movq_r2r (mm5, mm6); // mm6 = x7 x5 x3 x1 + + packssdw_r2r (mm3, mm7); // mm7 = y6 y7 y4 y5 + movq_r2r (mm2, mm0); // mm0 = x6 x4 x2 x0 + + movq_r2m (mm1, *(row+store)); // save y3 y2 y1 y0 + movq_r2r (mm7, mm1); // mm1 = y6 y7 y4 y5 + + punpckldq_r2r (mm0, mm0); // mm0 = x2 x0 x2 x0 + psrld_i2r (16, mm7); // mm7 = 0 y6 0 y4 + + movq_m2r (*table, mm3); // mm3 = C6 C4 C2 C4 + pslld_i2r (16, mm1); // mm1 = y7 0 y5 0 + + movq_m2r (*(table+4), mm4); // mm4 = -C2 -C4 C6 C4 + por_r2r (mm1, mm7); // mm7 = y7 y6 y5 y4 + + movq_m2r (*(table+8), mm1); // mm1 = -C7 C3 C3 C1 + punpckhdq_r2r (mm2, mm2); // mm2 = x6 x4 x6 x4 + + movq_r2m (mm7, *(row+store+4)); // save y7 y6 y5 y4 + pmaddwd_r2r (mm0, mm3); // mm3 = C4*x0+C6*x2 C4*x0+C2*x2 +} + + +#if 0 +// C column IDCT - its just here to document the MMXEXT and MMX versions +static inline void idct_col (int16_t * col, int offset) +{ +/* multiplication - as implemented on mmx */ +#define F(c,x) (((c) * (x)) >> 16) + +/* saturation - it helps us handle torture test cases */ +#define S(x) (((x)>32767) ? 32767 : ((x)<-32768) ? -32768 : (x)) + + int16_t x0, x1, x2, x3, x4, x5, x6, x7; + int16_t y0, y1, y2, y3, y4, y5, y6, y7; + int16_t a0, a1, a2, a3, b0, b1, b2, b3; + int16_t u04, v04, u26, v26, u17, v17, u35, v35, u12, v12; + + col += offset; + + x0 = col[0*8]; + x1 = col[1*8]; + x2 = col[2*8]; + x3 = col[3*8]; + x4 = col[4*8]; + x5 = col[5*8]; + x6 = col[6*8]; + x7 = col[7*8]; + + u04 = S (x0 + x4); + v04 = S (x0 - x4); + u26 = S (F (T2, x6) + x2); + v26 = S (F (T2, x2) - x6); + + a0 = S (u04 + u26); + a1 = S (v04 + v26); + a2 = S (v04 - v26); + a3 = S (u04 - u26); + + u17 = S (F (T1, x7) + x1); + v17 = S (F (T1, x1) - x7); + u35 = S (F (T3, x5) + x3); + v35 = S (F (T3, x3) - x5); + + b0 = S (u17 + u35); + b3 = S (v17 - v35); + u12 = S (u17 - u35); + v12 = S (v17 + v35); + u12 = S (2 * F (C4, u12)); + v12 = S (2 * F (C4, v12)); + b1 = S (u12 + v12); + b2 = S (u12 - v12); + + y0 = S (a0 + b0) >> COL_SHIFT; + y1 = S (a1 + b1) >> COL_SHIFT; + y2 = S (a2 + b2) >> COL_SHIFT; + y3 = S (a3 + b3) >> COL_SHIFT; + + y4 = S (a3 - b3) >> COL_SHIFT; + y5 = S (a2 - b2) >> COL_SHIFT; + y6 = S (a1 - b1) >> COL_SHIFT; + y7 = S (a0 - b0) >> COL_SHIFT; + + col[0*8] = y0; + col[1*8] = y1; + col[2*8] = y2; + col[3*8] = y3; + col[4*8] = y4; + col[5*8] = y5; + col[6*8] = y6; + col[7*8] = y7; +} +#endif + + +// MMX column IDCT +static inline void idct_col (int16_t * col, int offset) +{ +#define T1 13036 +#define T2 27146 +#define T3 43790 +#define C4 23170 + + static short _T1[] ATTR_ALIGN(8) = {T1,T1,T1,T1}; + static short _T2[] ATTR_ALIGN(8) = {T2,T2,T2,T2}; + static short _T3[] ATTR_ALIGN(8) = {T3,T3,T3,T3}; + static short _C4[] ATTR_ALIGN(8) = {C4,C4,C4,C4}; + + /* column code adapted from peter gubanov */ + /* http://www.elecard.com/peter/idct.shtml */ + + movq_m2r (*_T1, mm0); // mm0 = T1 + + movq_m2r (*(col+offset+1*8), mm1); // mm1 = x1 + movq_r2r (mm0, mm2); // mm2 = T1 + + movq_m2r (*(col+offset+7*8), mm4); // mm4 = x7 + pmulhw_r2r (mm1, mm0); // mm0 = T1*x1 + + movq_m2r (*_T3, mm5); // mm5 = T3 + pmulhw_r2r (mm4, mm2); // mm2 = T1*x7 + + movq_m2r (*(col+offset+5*8), mm6); // mm6 = x5 + movq_r2r (mm5, mm7); // mm7 = T3-1 + + movq_m2r (*(col+offset+3*8), mm3); // mm3 = x3 + psubsw_r2r (mm4, mm0); // mm0 = v17 + + movq_m2r (*_T2, mm4); // mm4 = T2 + pmulhw_r2r (mm3, mm5); // mm5 = (T3-1)*x3 + + paddsw_r2r (mm2, mm1); // mm1 = u17 + pmulhw_r2r (mm6, mm7); // mm7 = (T3-1)*x5 + + /* slot */ + + movq_r2r (mm4, mm2); // mm2 = T2 + paddsw_r2r (mm3, mm5); // mm5 = T3*x3 + + pmulhw_m2r (*(col+offset+2*8), mm4);// mm4 = T2*x2 + paddsw_r2r (mm6, mm7); // mm7 = T3*x5 + + psubsw_r2r (mm6, mm5); // mm5 = v35 + paddsw_r2r (mm3, mm7); // mm7 = u35 + + movq_m2r (*(col+offset+6*8), mm3); // mm3 = x6 + movq_r2r (mm0, mm6); // mm6 = v17 + + pmulhw_r2r (mm3, mm2); // mm2 = T2*x6 + psubsw_r2r (mm5, mm0); // mm0 = b3 + + psubsw_r2r (mm3, mm4); // mm4 = v26 + paddsw_r2r (mm6, mm5); // mm5 = v12 + + movq_r2m (mm0, *(col+offset+3*8)); // save b3 in scratch0 + movq_r2r (mm1, mm6); // mm6 = u17 + + paddsw_m2r (*(col+offset+2*8), mm2);// mm2 = u26 + paddsw_r2r (mm7, mm6); // mm6 = b0 + + psubsw_r2r (mm7, mm1); // mm1 = u12 + movq_r2r (mm1, mm7); // mm7 = u12 + + movq_m2r (*(col+offset+0*8), mm3); // mm3 = x0 + paddsw_r2r (mm5, mm1); // mm1 = u12+v12 + + movq_m2r (*_C4, mm0); // mm0 = C4/2 + psubsw_r2r (mm5, mm7); // mm7 = u12-v12 + + movq_r2m (mm6, *(col+offset+5*8)); // save b0 in scratch1 + pmulhw_r2r (mm0, mm1); // mm1 = b1/2 + + movq_r2r (mm4, mm6); // mm6 = v26 + pmulhw_r2r (mm0, mm7); // mm7 = b2/2 + + movq_m2r (*(col+offset+4*8), mm5); // mm5 = x4 + movq_r2r (mm3, mm0); // mm0 = x0 + + psubsw_r2r (mm5, mm3); // mm3 = v04 + paddsw_r2r (mm5, mm0); // mm0 = u04 + + paddsw_r2r (mm3, mm4); // mm4 = a1 + movq_r2r (mm0, mm5); // mm5 = u04 + + psubsw_r2r (mm6, mm3); // mm3 = a2 + paddsw_r2r (mm2, mm5); // mm5 = a0 + + paddsw_r2r (mm1, mm1); // mm1 = b1 + psubsw_r2r (mm2, mm0); // mm0 = a3 + + paddsw_r2r (mm7, mm7); // mm7 = b2 + movq_r2r (mm3, mm2); // mm2 = a2 + + movq_r2r (mm4, mm6); // mm6 = a1 + paddsw_r2r (mm7, mm3); // mm3 = a2+b2 + + psraw_i2r (COL_SHIFT, mm3); // mm3 = y2 + paddsw_r2r (mm1, mm4); // mm4 = a1+b1 + + psraw_i2r (COL_SHIFT, mm4); // mm4 = y1 + psubsw_r2r (mm1, mm6); // mm6 = a1-b1 + + movq_m2r (*(col+offset+5*8), mm1); // mm1 = b0 + psubsw_r2r (mm7, mm2); // mm2 = a2-b2 + + psraw_i2r (COL_SHIFT, mm6); // mm6 = y6 + movq_r2r (mm5, mm7); // mm7 = a0 + + movq_r2m (mm4, *(col+offset+1*8)); // save y1 + psraw_i2r (COL_SHIFT, mm2); // mm2 = y5 + + movq_r2m (mm3, *(col+offset+2*8)); // save y2 + paddsw_r2r (mm1, mm5); // mm5 = a0+b0 + + movq_m2r (*(col+offset+3*8), mm4); // mm4 = b3 + psubsw_r2r (mm1, mm7); // mm7 = a0-b0 + + psraw_i2r (COL_SHIFT, mm5); // mm5 = y0 + movq_r2r (mm0, mm3); // mm3 = a3 + + movq_r2m (mm2, *(col+offset+5*8)); // save y5 + psubsw_r2r (mm4, mm3); // mm3 = a3-b3 + + psraw_i2r (COL_SHIFT, mm7); // mm7 = y7 + paddsw_r2r (mm0, mm4); // mm4 = a3+b3 + + movq_r2m (mm5, *(col+offset+0*8)); // save y0 + psraw_i2r (COL_SHIFT, mm3); // mm3 = y4 + + movq_r2m (mm6, *(col+offset+6*8)); // save y6 + psraw_i2r (COL_SHIFT, mm4); // mm4 = y3 + + movq_r2m (mm7, *(col+offset+7*8)); // save y7 + + movq_r2m (mm3, *(col+offset+4*8)); // save y4 + + movq_r2m (mm4, *(col+offset+3*8)); // save y3 +} + + +static int32_t rounder0[] ATTR_ALIGN(8) = + rounder ((1 << (COL_SHIFT - 1)) - 0.5); +static int32_t rounder4[] ATTR_ALIGN(8) = rounder (0); +static int32_t rounder1[] ATTR_ALIGN(8) = + rounder (1.25683487303); /* C1*(C1/C4+C1+C7)/2 */ +static int32_t rounder7[] ATTR_ALIGN(8) = + rounder (-0.25); /* C1*(C7/C4+C7-C1)/2 */ +static int32_t rounder2[] ATTR_ALIGN(8) = + rounder (0.60355339059); /* C2 * (C6+C2)/2 */ +static int32_t rounder6[] ATTR_ALIGN(8) = + rounder (-0.25); /* C2 * (C6-C2)/2 */ +static int32_t rounder3[] ATTR_ALIGN(8) = + rounder (0.087788325588); /* C3*(-C3/C4+C3+C5)/2 */ +static int32_t rounder5[] ATTR_ALIGN(8) = + rounder (-0.441341716183); /* C3*(-C5/C4+C5-C3)/2 */ + + +#define declare_idct(idct,table,idct_row_head,idct_row,idct_row_tail,idct_row_mid) \ +static inline void idct (int16_t * block) \ +{ \ + static int16_t table04[] ATTR_ALIGN(16) = \ + table (22725, 21407, 19266, 16384, 12873, 8867, 4520); \ + static int16_t table17[] ATTR_ALIGN(16) = \ + table (31521, 29692, 26722, 22725, 17855, 12299, 6270); \ + static int16_t table26[] ATTR_ALIGN(16) = \ + table (29692, 27969, 25172, 21407, 16819, 11585, 5906); \ + static int16_t table35[] ATTR_ALIGN(16) = \ + table (26722, 25172, 22654, 19266, 15137, 10426, 5315); \ + \ + idct_row_head (block, 0*8, table04); \ + idct_row (table04, rounder0); \ + idct_row_mid (block, 0*8, 4*8, table04); \ + idct_row (table04, rounder4); \ + idct_row_mid (block, 4*8, 1*8, table17); \ + idct_row (table17, rounder1); \ + idct_row_mid (block, 1*8, 7*8, table17); \ + idct_row (table17, rounder7); \ + idct_row_mid (block, 7*8, 2*8, table26); \ + idct_row (table26, rounder2); \ + idct_row_mid (block, 2*8, 6*8, table26); \ + idct_row (table26, rounder6); \ + idct_row_mid (block, 6*8, 3*8, table35); \ + idct_row (table35, rounder3); \ + idct_row_mid (block, 3*8, 5*8, table35); \ + idct_row (table35, rounder5); \ + idct_row_tail (block, 5*8); \ + \ + idct_col (block, 0); \ + idct_col (block, 4); \ +} + + +#define COPY_MMX(offset,r0,r1,r2) \ +do { \ + movq_m2r (*(block+offset), r0); \ + dest += stride; \ + movq_m2r (*(block+offset+4), r1); \ + movq_r2m (r2, *dest); \ + packuswb_r2r (r1, r0); \ +} while (0) + +static void block_copy (int16_t * block, uint8_t * dest, int stride) +{ + movq_m2r (*(block+0*8), mm0); + movq_m2r (*(block+0*8+4), mm1); + movq_m2r (*(block+1*8), mm2); + packuswb_r2r (mm1, mm0); + movq_m2r (*(block+1*8+4), mm3); + movq_r2m (mm0, *dest); + packuswb_r2r (mm3, mm2); + COPY_MMX (2*8, mm0, mm1, mm2); + COPY_MMX (3*8, mm2, mm3, mm0); + COPY_MMX (4*8, mm0, mm1, mm2); + COPY_MMX (5*8, mm2, mm3, mm0); + COPY_MMX (6*8, mm0, mm1, mm2); + COPY_MMX (7*8, mm2, mm3, mm0); + movq_r2m (mm2, *(dest+stride)); +} + + +#define ADD_MMX(offset,r1,r2,r3,r4) \ +do { \ + movq_m2r (*(dest+2*stride), r1); \ + packuswb_r2r (r4, r3); \ + movq_r2r (r1, r2); \ + dest += stride; \ + movq_r2m (r3, *dest); \ + punpcklbw_r2r (mm0, r1); \ + paddsw_m2r (*(block+offset), r1); \ + punpckhbw_r2r (mm0, r2); \ + paddsw_m2r (*(block+offset+4), r2); \ +} while (0) + +static void block_add (int16_t * block, uint8_t * dest, int stride) +{ + movq_m2r (*dest, mm1); + pxor_r2r (mm0, mm0); + movq_m2r (*(dest+stride), mm3); + movq_r2r (mm1, mm2); + punpcklbw_r2r (mm0, mm1); + movq_r2r (mm3, mm4); + paddsw_m2r (*(block+0*8), mm1); + punpckhbw_r2r (mm0, mm2); + paddsw_m2r (*(block+0*8+4), mm2); + punpcklbw_r2r (mm0, mm3); + paddsw_m2r (*(block+1*8), mm3); + packuswb_r2r (mm2, mm1); + punpckhbw_r2r (mm0, mm4); + movq_r2m (mm1, *dest); + paddsw_m2r (*(block+1*8+4), mm4); + ADD_MMX (2*8, mm1, mm2, mm3, mm4); + ADD_MMX (3*8, mm3, mm4, mm1, mm2); + ADD_MMX (4*8, mm1, mm2, mm3, mm4); + ADD_MMX (5*8, mm3, mm4, mm1, mm2); + ADD_MMX (6*8, mm1, mm2, mm3, mm4); + ADD_MMX (7*8, mm3, mm4, mm1, mm2); + packuswb_r2r (mm4, mm3); + movq_r2m (mm3, *(dest+stride)); +} + +static inline void block_zero (int16_t * block) { + pxor_r2r (mm0, mm0); + movq_r2m (mm0, *(block+0*4)); + movq_r2m (mm0, *(block+1*4)); + movq_r2m (mm0, *(block+2*4)); + movq_r2m (mm0, *(block+3*4)); + movq_r2m (mm0, *(block+4*4)); + movq_r2m (mm0, *(block+5*4)); + movq_r2m (mm0, *(block+6*4)); + movq_r2m (mm0, *(block+7*4)); + movq_r2m (mm0, *(block+8*4)); + movq_r2m (mm0, *(block+9*4)); + movq_r2m (mm0, *(block+10*4)); + movq_r2m (mm0, *(block+11*4)); + movq_r2m (mm0, *(block+12*4)); + movq_r2m (mm0, *(block+13*4)); + movq_r2m (mm0, *(block+14*4)); + movq_r2m (mm0, *(block+15*4)); +} + +declare_idct (mmxext_idct, mmxext_table, + mmxext_row_head, mmxext_row, mmxext_row_tail, mmxext_row_mid) + +void mpeg2_idct_copy_mmxext (int16_t * block, uint8_t * dest, int stride) +{ + mmxext_idct (block); + block_copy (block, dest, stride); + block_zero (block); +} + +void mpeg2_idct_add_mmxext (int16_t * block, uint8_t * dest, int stride) +{ + mmxext_idct (block); + block_add (block, dest, stride); + block_zero (block); +} + +void mpeg2_idct_mmxext (int16_t * block) +{ + mmxext_idct (block); +} + +declare_idct (mmx_idct, mmx_table, + mmx_row_head, mmx_row, mmx_row_tail, mmx_row_mid) + +void mpeg2_idct_copy_mmx (int16_t * block, uint8_t * dest, int stride) +{ + mmx_idct (block); + block_copy (block, dest, stride); + block_zero (block); +} + +void mpeg2_idct_add_mmx (int16_t * block, uint8_t * dest, int stride) +{ + mmx_idct (block); + block_add (block, dest, stride); + block_zero (block); +} + +void mpeg2_idct_mmx (int16_t * block) +{ + mmx_idct (block); +} + +void mpeg2_zero_block_mmx (int16_t * block) +{ + block_zero (block); +} + +void mpeg2_idct_mmx_init (void) +{ + int i, j; + + /* the mmx/mmxext idct uses a reordered input, so we patch scan tables */ + + for (i = 0; i < 64; i++) { + j = mpeg2_scan_norm[i]; + mpeg2_scan_norm[i] = (j & 0x38) | ((j & 6) >> 1) | ((j & 1) << 2); + j = mpeg2_scan_alt[i]; + mpeg2_scan_alt[i] = (j & 0x38) | ((j & 6) >> 1) | ((j & 1) << 2); + } +} + +#endif diff --git a/src/video_dec/libmpeg2/libmpeg2_accel.c b/src/video_dec/libmpeg2/libmpeg2_accel.c new file mode 100644 index 000000000..92c0e280b --- /dev/null +++ b/src/video_dec/libmpeg2/libmpeg2_accel.c @@ -0,0 +1,223 @@ +/* + * libmpeg2_accel.c + * Copyright (C) 2004 The Unichrome Project. + * Copyright (C) 2005 Thomas Hellstrom. + * + * This file is part of xine, a free video player. + * + * xine is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * xine is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA + */ + +#include <xine/xine_internal.h> +#include "mpeg2.h" +#include "mpeg2_internal.h" +#include "xvmc_vld.h" +#include "libmpeg2_accel.h" + + +void +libmpeg2_accel_scan( mpeg2dec_accel_t *accel, uint8_t *scan_norm, uint8_t *scan_alt) +{ + xvmc_setup_scan_ptable(); +} + + +int +libmpeg2_accel_discontinuity(mpeg2dec_accel_t *accel, uint32_t frame_format, picture_t *picture) +{ + accel->xvmc_last_slice_code=-1; + if ( !picture->current_frame ) + return 0; + if (frame_format == XINE_IMGFMT_XXMC) { + xine_xxmc_t *xxmc = (xine_xxmc_t *) + picture->current_frame->accel_data; + switch(xxmc->acceleration) { + case XINE_XVMC_ACCEL_VLD: + case XINE_XVMC_ACCEL_IDCT: + case XINE_XVMC_ACCEL_MOCOMP: + xxmc->proc_xxmc_flush( picture->current_frame ); + break; + default: + break; + } + } + return 0; +} + +int +libmpeg2_accel_new_sequence(mpeg2dec_accel_t *accel, uint32_t frame_format, picture_t *picture) +{ + switch(frame_format) { + case XINE_IMGFMT_XXMC: + case XINE_IMGFMT_XVMC: { + xine_xvmc_t *xvmc = (xine_xvmc_t *) + picture->current_frame->accel_data; + picture->mc = xvmc->macroblocks; + return 0; + } + default: + break; + } + return 1; +} + +int +libmpeg2_accel_new_frame(mpeg2dec_accel_t *accel, uint32_t frame_format, + picture_t *picture, double ratio, uint32_t flags) +{ + if (picture->current_frame) { + if (XINE_IMGFMT_XXMC == frame_format) { + xine_xxmc_t *xxmc = (xine_xxmc_t *) + picture->current_frame->accel_data; + + /* + * Make a request for acceleration type and mpeg coding from + * the output plugin. + */ + + xxmc->fallback_format = XINE_IMGFMT_YV12; + xxmc->acceleration = XINE_XVMC_ACCEL_VLD| XINE_XVMC_ACCEL_IDCT + | XINE_XVMC_ACCEL_MOCOMP ; + + /* + * Standard MOCOMP / IDCT XvMC implementation for interlaced streams + * is buggy. The bug is inherited from the old XvMC driver. Don't use it until + * it has been fixed. (A volunteer ?) + */ + + if ( picture->picture_structure != 3 ) { + picture->top_field_first = (picture->picture_structure == 1); + xxmc->acceleration &= ~( XINE_XVMC_ACCEL_IDCT | XINE_XVMC_ACCEL_MOCOMP ); + } + + xxmc->mpeg = (picture->mpeg1) ? XINE_XVMC_MPEG_1:XINE_XVMC_MPEG_2; + xxmc->proc_xxmc_update_frame (picture->current_frame->driver, + picture->current_frame, + picture->coded_picture_width, + picture->coded_picture_height, + ratio, + XINE_IMGFMT_XXMC, flags); + } + } + return 0; +} + +void +libmpeg2_accel_frame_completion(mpeg2dec_accel_t * accel, uint32_t frame_format, picture_t *picture, + int code) +{ + + if ( !picture->current_frame ) return; + + if (frame_format == XINE_IMGFMT_XXMC) { + xine_xxmc_t *xxmc = (xine_xxmc_t *) + picture->current_frame->accel_data; + if (!xxmc->decoded) { + switch(picture->current_frame->format) { + case XINE_IMGFMT_XXMC: + switch(xxmc->acceleration) { + case XINE_XVMC_ACCEL_VLD: + mpeg2_xxmc_vld_frame_complete(accel, picture, code); + break; + case XINE_XVMC_ACCEL_IDCT: + case XINE_XVMC_ACCEL_MOCOMP: + xxmc->decoded = !picture->current_frame->bad_frame; + xxmc->proc_xxmc_flush( picture->current_frame ); + break; + default: + break; + } + default: + break; + } + } + } +} + + +int +libmpeg2_accel_slice(mpeg2dec_accel_t *accel, picture_t *picture, int code, char * buffer, + uint32_t chunk_size, uint8_t *chunk_buffer) +{ + /* + * Don't reference frames of other formats. They are invalid. This may happen if the + * xxmc plugin suddenly falls back to software decoding. + */ + + if (( picture->current_frame->picture_coding_type == XINE_PICT_P_TYPE ) || + ( picture->current_frame->picture_coding_type == XINE_PICT_B_TYPE )) { + if (! picture->forward_reference_frame) return 1; + if (picture->forward_reference_frame->format != picture->current_frame->format) { + picture->v_offset = 0; + return 1; + } + } + + if ( picture->current_frame->picture_coding_type == XINE_PICT_B_TYPE ) { + if (! picture->backward_reference_frame) return 1; + if (picture->backward_reference_frame->format != picture->current_frame->format) { + picture->v_offset = 0; + return 1; + } + } + + switch( picture->current_frame->format ) { + + case XINE_IMGFMT_XXMC: + { + xine_xxmc_t *xxmc = (xine_xxmc_t *) + picture->current_frame->accel_data; + + if ( xxmc->proc_xxmc_lock_valid( picture->current_frame, + picture->forward_reference_frame, + picture->backward_reference_frame, + picture->current_frame->picture_coding_type)) { + picture->v_offset = 0; + return 1; + } + + switch(picture->current_frame->format) { + case XINE_IMGFMT_XXMC: + switch(xxmc->acceleration) { + case XINE_XVMC_ACCEL_VLD: + mpeg2_xxmc_slice(accel, picture, code, buffer, chunk_size, chunk_buffer); + break; + case XINE_XVMC_ACCEL_IDCT: + case XINE_XVMC_ACCEL_MOCOMP: + mpeg2_xvmc_slice (accel, picture, code, buffer); + break; + default: + mpeg2_slice (picture, code, buffer); + break; + } + break; + default: + mpeg2_slice (picture, code, buffer); + break; + } + xxmc->proc_xxmc_unlock(picture->current_frame->driver); + break; + } + + case XINE_IMGFMT_XVMC: + mpeg2_xvmc_slice (accel, picture, code, buffer); + break; + + default: + mpeg2_slice (picture, code, buffer); + break; + } + return 0; +} diff --git a/src/video_dec/libmpeg2/libmpeg2_accel.h b/src/video_dec/libmpeg2/libmpeg2_accel.h new file mode 100644 index 000000000..5d0b37a78 --- /dev/null +++ b/src/video_dec/libmpeg2/libmpeg2_accel.h @@ -0,0 +1,48 @@ +/* + * libmpeg2_accel.h + * Copyright (C) 2004 The Unichrome Project. + * Copyright (C) 2005 Thomas Hellstrom. + * + * This file is part of xine, a free video player. + * + * xine is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * xine is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA + */ + +#ifndef LIBMPEG2_ACCEL_H +#define LIBMPEG2_ACCEL_H + +#include "mpeg2_internal.h" + +/* + * Internal context data type. + */ + +typedef struct { + int xvmc_last_slice_code; + int slices_per_row; + int row_slice_count; + unsigned xxmc_mb_pic_height; +} mpeg2dec_accel_t; + +extern int libmpeg2_accel_discontinuity(mpeg2dec_accel_t *accel, uint32_t frame_format, picture_t *picture); +extern int libmpeg2_accel_new_sequence(mpeg2dec_accel_t *accel, uint32_t frame_format, picture_t *picture); +extern int libmpeg2_accel_new_frame(mpeg2dec_accel_t *accel, uint32_t frame_format, picture_t *picture, double ratio, uint32_t flags); +extern void libmpeg2_accel_frame_completion(mpeg2dec_accel_t *accel, uint32_t frame_format, picture_t *picture, int code); + +extern int libmpeg2_accel_slice(mpeg2dec_accel_t *accel, picture_t *picture, int code, + char * buffer, uint32_t chunk_size, uint8_t *chunk_buffer); +extern void libmpeg2_accel_scan( mpeg2dec_accel_t *accel, uint8_t *scan_norm, uint8_t *scan_alt); + +#endif diff --git a/src/video_dec/libmpeg2/motion_comp.c b/src/video_dec/libmpeg2/motion_comp.c new file mode 100644 index 000000000..4a324b8ea --- /dev/null +++ b/src/video_dec/libmpeg2/motion_comp.c @@ -0,0 +1,154 @@ +/* + * motion_comp.c + * Copyright (C) 2000-2002 Michel Lespinasse <walken@zoy.org> + * Copyright (C) 1999-2000 Aaron Holtzman <aholtzma@ess.engr.uvic.ca> + * + * This file is part of mpeg2dec, a free MPEG-2 video stream decoder. + * See http://libmpeg2.sourceforge.net/ for updates. + * + * mpeg2dec is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * mpeg2dec is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +#include "config.h" + +#include <stdio.h> +#include <inttypes.h> + +#include "mpeg2_internal.h" +#include <xine/xineutils.h> + +mpeg2_mc_t mpeg2_mc; + +void mpeg2_mc_init (uint32_t mm_accel) +{ +#ifdef LIBMPEG2_MLIB + if (mm_accel & MM_ACCEL_MLIB) { +#ifdef LOG + fprintf (stderr, "Using mediaLib for motion compensation\n"); +#endif + mpeg2_mc = mpeg2_mc_mlib; + } +#endif + +#if defined(ARCH_X86) || defined(ARCH_X86_64) + if (mm_accel & MM_ACCEL_X86_MMXEXT) { +#ifdef LOG + fprintf (stderr, "Using MMXEXT for motion compensation\n"); +#endif + mpeg2_mc = mpeg2_mc_mmxext; + } else if (mm_accel & MM_ACCEL_X86_3DNOW) { +#ifdef LOG + fprintf (stderr, "Using 3DNOW for motion compensation\n"); +#endif + mpeg2_mc = mpeg2_mc_3dnow; + } else if (mm_accel & MM_ACCEL_X86_MMX) { +#ifdef LOG + fprintf (stderr, "Using MMX for motion compensation\n"); +#endif + mpeg2_mc = mpeg2_mc_mmx; + } else +#endif +#if defined (ARCH_PPC) && defined (ENABLE_ALTIVEC) + if (mm_accel & MM_ACCEL_PPC_ALTIVEC) { +#ifdef LOG + fprintf (stderr, "Using altivec for motion compensation\n"); +#endif + mpeg2_mc = mpeg2_mc_altivec; + } else +#endif +#if defined(ARCH_SPARC) && defined(ENABLE_VIS) + if (mm_accel & MM_ACCEL_SPARC_VIS) { +#ifdef LOG + fprintf (stderr, "Using VIS for motion compensation\n"); +#endif + mpeg2_mc = mpeg2_mc_vis; + } else +#endif + { +#ifdef LOG + fprintf (stderr, "No accelerated motion compensation found\n"); +#endif + mpeg2_mc = mpeg2_mc_c; + } +} + +#define avg2(a,b) ((a+b+1)>>1) +#define avg4(a,b,c,d) ((a+b+c+d+2)>>2) + +#define predict_o(i) (ref[i]) +#define predict_x(i) (avg2 (ref[i], ref[i+1])) +#define predict_y(i) (avg2 (ref[i], (ref+stride)[i])) +#define predict_xy(i) (avg4 (ref[i], ref[i+1], \ + (ref+stride)[i], (ref+stride)[i+1])) + +#define put(predictor,i) dest[i] = predictor (i) +#define avg(predictor,i) dest[i] = avg2 (predictor (i), dest[i]) + +/* mc function template */ + +#define MC_FUNC(op,xy) \ +static void MC_##op##_##xy##_16_c (uint8_t * dest, uint8_t * ref, \ + int stride, int height) \ +{ \ + do { \ + op (predict_##xy, 0); \ + op (predict_##xy, 1); \ + op (predict_##xy, 2); \ + op (predict_##xy, 3); \ + op (predict_##xy, 4); \ + op (predict_##xy, 5); \ + op (predict_##xy, 6); \ + op (predict_##xy, 7); \ + op (predict_##xy, 8); \ + op (predict_##xy, 9); \ + op (predict_##xy, 10); \ + op (predict_##xy, 11); \ + op (predict_##xy, 12); \ + op (predict_##xy, 13); \ + op (predict_##xy, 14); \ + op (predict_##xy, 15); \ + ref += stride; \ + dest += stride; \ + } while (--height); \ +} \ +static void MC_##op##_##xy##_8_c (uint8_t * dest, uint8_t * ref, \ + int stride, int height) \ +{ \ + do { \ + op (predict_##xy, 0); \ + op (predict_##xy, 1); \ + op (predict_##xy, 2); \ + op (predict_##xy, 3); \ + op (predict_##xy, 4); \ + op (predict_##xy, 5); \ + op (predict_##xy, 6); \ + op (predict_##xy, 7); \ + ref += stride; \ + dest += stride; \ + } while (--height); \ +} + +/* definitions of the actual mc functions */ + +MC_FUNC (put,o) +MC_FUNC (avg,o) +MC_FUNC (put,x) +MC_FUNC (avg,x) +MC_FUNC (put,y) +MC_FUNC (avg,y) +MC_FUNC (put,xy) +MC_FUNC (avg,xy) + +MPEG2_MC_EXTERN (c) diff --git a/src/video_dec/libmpeg2/motion_comp_altivec.c b/src/video_dec/libmpeg2/motion_comp_altivec.c new file mode 100644 index 000000000..99719b7fb --- /dev/null +++ b/src/video_dec/libmpeg2/motion_comp_altivec.c @@ -0,0 +1,2031 @@ +/* + * motion_comp_altivec.c + * Copyright (C) 2000-2002 Michel Lespinasse <walken@zoy.org> + * Copyright (C) 1999-2000 Aaron Holtzman <aholtzma@ess.engr.uvic.ca> + * + * This file is part of mpeg2dec, a free MPEG-2 video stream decoder. + * See http://libmpeg2.sourceforge.net/ for updates. + * + * mpeg2dec is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * mpeg2dec is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +#include "config.h" + +#ifndef HOST_OS_DARWIN + +#if defined (ARCH_PPC) && defined (ENABLE_ALTIVEC) + +#include "mpeg2_internal.h" + +#include <inttypes.h> + +/* + * The asm code is generated with: + * + * gcc-2.95 -fvec -DHOST_OS_DARWIN -O9 -fomit-frame-pointer -mregnames -S + * motion_comp_altivec.c + * + * sed 's/.L/._L/g' motion_comp_altivec.s | + * awk '{args=""; len=split ($2, arg, ","); + * for (i=1; i<=len; i++) { a=arg[i]; if (i<len) a=a","; + * args = args sprintf ("%-6s", a) } + * printf ("\t\"\t%-16s%-24s\\n\"\n", $1, args) }' | + * unexpand -a + */ + +static void MC_put_o_16_altivec (uint8_t * dest, uint8_t * ref, + int stride, int height) +{ + asm (" \n" + " srawi %r6, %r6, 1 \n" + " li %r9, 15 \n" + " addi %r6, %r6, -1 \n" + " lvsl %v12, 0, %r4 \n" + " mtctr %r6 \n" + " lvx %v1, 0, %r4 \n" + " lvx %v0, %r9, %r4 \n" + " add %r0, %r5, %r5 \n" + " vperm %v13, %v1, %v0, %v12 \n" + " add %r4, %r4, %r5 \n" + "._L6: \n" + " li %r9, 15 \n" + " lvx %v1, 0, %r4 \n" + " lvx %v0, %r9, %r4 \n" + " stvx %v13, 0, %r3 \n" + " vperm %v13, %v1, %v0, %v12 \n" + " add %r4, %r4, %r5 \n" + " lvx %v1, 0, %r4 \n" + " lvx %v0, %r9, %r4 \n" + " stvx %v13, %r5, %r3 \n" + " vperm %v13, %v1, %v0, %v12 \n" + " add %r4, %r4, %r5 \n" + " add %r3, %r3, %r0 \n" + " bdnz ._L6 \n" + " lvx %v0, %r9, %r4 \n" + " lvx %v1, 0, %r4 \n" + " stvx %v13, 0, %r3 \n" + " vperm %v13, %v1, %v0, %v12 \n" + " stvx %v13, %r5, %r3 \n" + ); +} + +static void MC_put_o_8_altivec (uint8_t * dest, uint8_t * ref, + int stride, int height) +{ + asm (" \n" + " lvsl %v12, 0, %r4 \n" + " lvsl %v1, %r5, %r4 \n" + " vmrghb %v12, %v12, %v12 \n" + " srawi %r6, %r6, 1 \n" + " li %r9, 7 \n" + " vmrghb %v1, %v1, %v1 \n" + " addi %r6, %r6, -1 \n" + " vpkuhum %v10, %v12, %v12 \n" + " lvx %v13, 0, %r4 \n" + " mtctr %r6 \n" + " vpkuhum %v11, %v1, %v1 \n" + " lvx %v0, %r9, %r4 \n" + " add %r4, %r4, %r5 \n" + " vperm %v12, %v13, %v0, %v10 \n" + "._L11: \n" + " li %r9, 7 \n" + " lvx %v0, %r9, %r4 \n" + " lvx %v13, 0, %r4 \n" + " stvewx %v12, 0, %r3 \n" + " li %r9, 4 \n" + " vperm %v1, %v13, %v0, %v11 \n" + " stvewx %v12, %r9, %r3 \n" + " add %r4, %r4, %r5 \n" + " li %r9, 7 \n" + " lvx %v0, %r9, %r4 \n" + " lvx %v13, 0, %r4 \n" + " add %r3, %r3, %r5 \n" + " stvewx %v1, 0, %r3 \n" + " vperm %v12, %v13, %v0, %v10 \n" + " li %r9, 4 \n" + " stvewx %v1, %r9, %r3 \n" + " add %r4, %r4, %r5 \n" + " add %r3, %r3, %r5 \n" + " bdnz ._L11 \n" + " li %r9, 7 \n" + " lvx %v0, %r9, %r4 \n" + " lvx %v13, 0, %r4 \n" + " stvewx %v12, 0, %r3 \n" + " li %r9, 4 \n" + " vperm %v1, %v13, %v0, %v11 \n" + " stvewx %v12, %r9, %r3 \n" + " add %r3, %r3, %r5 \n" + " stvewx %v1, 0, %r3 \n" + " stvewx %v1, %r9, %r3 \n" + ); +} + +static void MC_put_x_16_altivec (uint8_t * dest, uint8_t * ref, + int stride, int height) +{ + asm (" \n" + " lvsl %v11, 0, %r4 \n" + " vspltisb %v0, 1 \n" + " li %r9, 16 \n" + " lvx %v12, 0, %r4 \n" + " vaddubm %v10, %v11, %v0 \n" + " lvx %v13, %r9, %r4 \n" + " srawi %r6, %r6, 1 \n" + " addi %r6, %r6, -1 \n" + " vperm %v1, %v12, %v13, %v10 \n" + " vperm %v0, %v12, %v13, %v11 \n" + " mtctr %r6 \n" + " add %r0, %r5, %r5 \n" + " add %r4, %r4, %r5 \n" + " vavgub %v0, %v0, %v1 \n" + "._L16: \n" + " li %r9, 16 \n" + " lvx %v12, 0, %r4 \n" + " lvx %v13, %r9, %r4 \n" + " stvx %v0, 0, %r3 \n" + " vperm %v1, %v12, %v13, %v10 \n" + " add %r4, %r4, %r5 \n" + " vperm %v0, %v12, %v13, %v11 \n" + " lvx %v12, 0, %r4 \n" + " lvx %v13, %r9, %r4 \n" + " vavgub %v0, %v0, %v1 \n" + " stvx %v0, %r5, %r3 \n" + " vperm %v1, %v12, %v13, %v10 \n" + " add %r4, %r4, %r5 \n" + " vperm %v0, %v12, %v13, %v11 \n" + " add %r3, %r3, %r0 \n" + " vavgub %v0, %v0, %v1 \n" + " bdnz ._L16 \n" + " lvx %v13, %r9, %r4 \n" + " lvx %v12, 0, %r4 \n" + " stvx %v0, 0, %r3 \n" + " vperm %v1, %v12, %v13, %v10 \n" + " vperm %v0, %v12, %v13, %v11 \n" + " vavgub %v0, %v0, %v1 \n" + " stvx %v0, %r5, %r3 \n" + ); +} + +static void MC_put_x_8_altivec (uint8_t * dest, uint8_t * ref, + int stride, int height) +{ + asm (" \n" + " lvsl %v0, 0, %r4 \n" + " vspltisb %v13, 1 \n" + " lvsl %v10, %r5, %r4 \n" + " vmrghb %v0, %v0, %v0 \n" + " li %r9, 8 \n" + " lvx %v11, 0, %r4 \n" + " vmrghb %v10, %v10, %v10 \n" + " vpkuhum %v8, %v0, %v0 \n" + " lvx %v12, %r9, %r4 \n" + " srawi %r6, %r6, 1 \n" + " vpkuhum %v9, %v10, %v10 \n" + " vaddubm %v7, %v8, %v13 \n" + " addi %r6, %r6, -1 \n" + " vperm %v1, %v11, %v12, %v8 \n" + " mtctr %r6 \n" + " vaddubm %v13, %v9, %v13 \n" + " add %r4, %r4, %r5 \n" + " vperm %v0, %v11, %v12, %v7 \n" + " vavgub %v0, %v1, %v0 \n" + "._L21: \n" + " li %r9, 8 \n" + " lvx %v12, %r9, %r4 \n" + " lvx %v11, 0, %r4 \n" + " stvewx %v0, 0, %r3 \n" + " li %r9, 4 \n" + " vperm %v1, %v11, %v12, %v13 \n" + " stvewx %v0, %r9, %r3 \n" + " vperm %v0, %v11, %v12, %v9 \n" + " add %r4, %r4, %r5 \n" + " li %r9, 8 \n" + " lvx %v12, %r9, %r4 \n" + " vavgub %v10, %v0, %v1 \n" + " lvx %v11, 0, %r4 \n" + " add %r3, %r3, %r5 \n" + " stvewx %v10, 0, %r3 \n" + " vperm %v1, %v11, %v12, %v7 \n" + " vperm %v0, %v11, %v12, %v8 \n" + " li %r9, 4 \n" + " stvewx %v10, %r9, %r3 \n" + " add %r4, %r4, %r5 \n" + " vavgub %v0, %v0, %v1 \n" + " add %r3, %r3, %r5 \n" + " bdnz ._L21 \n" + " li %r9, 8 \n" + " lvx %v12, %r9, %r4 \n" + " lvx %v11, 0, %r4 \n" + " stvewx %v0, 0, %r3 \n" + " li %r9, 4 \n" + " vperm %v1, %v11, %v12, %v13 \n" + " stvewx %v0, %r9, %r3 \n" + " vperm %v0, %v11, %v12, %v9 \n" + " add %r3, %r3, %r5 \n" + " vavgub %v10, %v0, %v1 \n" + " stvewx %v10, 0, %r3 \n" + " stvewx %v10, %r9, %r3 \n" + ); +} + +static void MC_put_y_16_altivec (uint8_t * dest, uint8_t * ref, + int stride, int height) +{ + asm (" \n" + " li %r9, 15 \n" + " lvsl %v10, 0, %r4 \n" + " lvx %v13, 0, %r4 \n" + " lvx %v1, %r9, %r4 \n" + " add %r4, %r4, %r5 \n" + " vperm %v12, %v13, %v1, %v10 \n" + " srawi %r6, %r6, 1 \n" + " lvx %v13, 0, %r4 \n" + " lvx %v1, %r9, %r4 \n" + " addi %r6, %r6, -1 \n" + " vperm %v11, %v13, %v1, %v10 \n" + " mtctr %r6 \n" + " add %r0, %r5, %r5 \n" + " add %r4, %r4, %r5 \n" + " vavgub %v0, %v12, %v11 \n" + "._L26: \n" + " li %r9, 15 \n" + " lvx %v13, 0, %r4 \n" + " lvx %v1, %r9, %r4 \n" + " stvx %v0, 0, %r3 \n" + " vperm %v12, %v13, %v1, %v10 \n" + " add %r4, %r4, %r5 \n" + " lvx %v13, 0, %r4 \n" + " lvx %v1, %r9, %r4 \n" + " vavgub %v0, %v12, %v11 \n" + " stvx %v0, %r5, %r3 \n" + " vperm %v11, %v13, %v1, %v10 \n" + " add %r4, %r4, %r5 \n" + " add %r3, %r3, %r0 \n" + " vavgub %v0, %v12, %v11 \n" + " bdnz ._L26 \n" + " lvx %v1, %r9, %r4 \n" + " lvx %v13, 0, %r4 \n" + " stvx %v0, 0, %r3 \n" + " vperm %v12, %v13, %v1, %v10 \n" + " vavgub %v0, %v12, %v11 \n" + " stvx %v0, %r5, %r3 \n" + ); +} + +static void MC_put_y_8_altivec (uint8_t * dest, uint8_t * ref, + int stride, int height) +{ + asm (" \n" + " lvsl %v13, 0, %r4 \n" + " lvsl %v11, %r5, %r4 \n" + " vmrghb %v13, %v13, %v13 \n" + " li %r9, 7 \n" + " lvx %v12, 0, %r4 \n" + " vmrghb %v11, %v11, %v11 \n" + " lvx %v1, %r9, %r4 \n" + " vpkuhum %v9, %v13, %v13 \n" + " add %r4, %r4, %r5 \n" + " vpkuhum %v10, %v11, %v11 \n" + " vperm %v13, %v12, %v1, %v9 \n" + " srawi %r6, %r6, 1 \n" + " lvx %v12, 0, %r4 \n" + " lvx %v1, %r9, %r4 \n" + " addi %r6, %r6, -1 \n" + " vperm %v11, %v12, %v1, %v10 \n" + " mtctr %r6 \n" + " add %r4, %r4, %r5 \n" + " vavgub %v0, %v13, %v11 \n" + "._L31: \n" + " li %r9, 7 \n" + " lvx %v1, %r9, %r4 \n" + " lvx %v12, 0, %r4 \n" + " stvewx %v0, 0, %r3 \n" + " li %r9, 4 \n" + " vperm %v13, %v12, %v1, %v9 \n" + " stvewx %v0, %r9, %r3 \n" + " add %r4, %r4, %r5 \n" + " vavgub %v0, %v13, %v11 \n" + " li %r9, 7 \n" + " lvx %v1, %r9, %r4 \n" + " lvx %v12, 0, %r4 \n" + " add %r3, %r3, %r5 \n" + " stvewx %v0, 0, %r3 \n" + " vperm %v11, %v12, %v1, %v10 \n" + " li %r9, 4 \n" + " stvewx %v0, %r9, %r3 \n" + " vavgub %v0, %v13, %v11 \n" + " add %r4, %r4, %r5 \n" + " add %r3, %r3, %r5 \n" + " bdnz ._L31 \n" + " li %r9, 7 \n" + " lvx %v1, %r9, %r4 \n" + " lvx %v12, 0, %r4 \n" + " stvewx %v0, 0, %r3 \n" + " li %r9, 4 \n" + " vperm %v13, %v12, %v1, %v9 \n" + " stvewx %v0, %r9, %r3 \n" + " add %r3, %r3, %r5 \n" + " vavgub %v0, %v13, %v11 \n" + " stvewx %v0, 0, %r3 \n" + " stvewx %v0, %r9, %r3 \n" + ); +} + +static void MC_put_xy_16_altivec (uint8_t * dest, uint8_t * ref, + int stride, int height) +{ + asm (" \n" + " lvsl %v5, 0, %r4 \n" + " vspltisb %v3, 1 \n" + " li %r9, 16 \n" + " lvx %v1, 0, %r4 \n" + " vaddubm %v4, %v5, %v3 \n" + " lvx %v0, %r9, %r4 \n" + " add %r4, %r4, %r5 \n" + " vperm %v10, %v1, %v0, %v4 \n" + " srawi %r6, %r6, 1 \n" + " vperm %v11, %v1, %v0, %v5 \n" + " addi %r6, %r6, -1 \n" + " lvx %v1, 0, %r4 \n" + " mtctr %r6 \n" + " lvx %v0, %r9, %r4 \n" + " vavgub %v9, %v11, %v10 \n" + " vxor %v8, %v11, %v10 \n" + " add %r0, %r5, %r5 \n" + " vperm %v10, %v1, %v0, %v4 \n" + " add %r4, %r4, %r5 \n" + " vperm %v11, %v1, %v0, %v5 \n" + " vxor %v6, %v11, %v10 \n" + " vavgub %v7, %v11, %v10 \n" + " vor %v0, %v8, %v6 \n" + " vxor %v13, %v9, %v7 \n" + " vand %v0, %v3, %v0 \n" + " vavgub %v1, %v9, %v7 \n" + " vand %v0, %v0, %v13 \n" + " vsububm %v13, %v1, %v0 \n" + "._L36: \n" + " li %r9, 16 \n" + " lvx %v1, 0, %r4 \n" + " lvx %v0, %r9, %r4 \n" + " stvx %v13, 0, %r3 \n" + " vperm %v10, %v1, %v0, %v4 \n" + " add %r4, %r4, %r5 \n" + " vperm %v11, %v1, %v0, %v5 \n" + " lvx %v1, 0, %r4 \n" + " lvx %v0, %r9, %r4 \n" + " vavgub %v9, %v11, %v10 \n" + " vxor %v8, %v11, %v10 \n" + " add %r4, %r4, %r5 \n" + " vperm %v10, %v1, %v0, %v4 \n" + " vavgub %v12, %v9, %v7 \n" + " vperm %v11, %v1, %v0, %v5 \n" + " vor %v13, %v8, %v6 \n" + " vxor %v0, %v9, %v7 \n" + " vxor %v6, %v11, %v10 \n" + " vand %v13, %v3, %v13 \n" + " vavgub %v7, %v11, %v10 \n" + " vor %v1, %v8, %v6 \n" + " vand %v13, %v13, %v0 \n" + " vxor %v0, %v9, %v7 \n" + " vand %v1, %v3, %v1 \n" + " vsububm %v13, %v12, %v13 \n" + " vand %v1, %v1, %v0 \n" + " stvx %v13, %r5, %r3 \n" + " vavgub %v0, %v9, %v7 \n" + " add %r3, %r3, %r0 \n" + " vsububm %v13, %v0, %v1 \n" + " bdnz ._L36 \n" + " lvx %v0, %r9, %r4 \n" + " lvx %v1, 0, %r4 \n" + " stvx %v13, 0, %r3 \n" + " vperm %v10, %v1, %v0, %v4 \n" + " vperm %v11, %v1, %v0, %v5 \n" + " vxor %v8, %v11, %v10 \n" + " vavgub %v9, %v11, %v10 \n" + " vor %v0, %v8, %v6 \n" + " vxor %v13, %v9, %v7 \n" + " vand %v0, %v3, %v0 \n" + " vavgub %v1, %v9, %v7 \n" + " vand %v0, %v0, %v13 \n" + " vsububm %v13, %v1, %v0 \n" + " stvx %v13, %r5, %r3 \n" + ); +} + +static void MC_put_xy_8_altivec (uint8_t * dest, uint8_t * ref, + int stride, int height) +{ + asm (" \n" + " lvsl %v4, 0, %r4 \n" + " vspltisb %v3, 1 \n" + " lvsl %v5, %r5, %r4 \n" + " vmrghb %v4, %v4, %v4 \n" + " li %r9, 16 \n" + " vmrghb %v5, %v5, %v5 \n" + " lvx %v1, 0, %r4 \n" + " vpkuhum %v4, %v4, %v4 \n" + " lvx %v0, %r9, %r4 \n" + " vpkuhum %v5, %v5, %v5 \n" + " add %r4, %r4, %r5 \n" + " vaddubm %v2, %v4, %v3 \n" + " vperm %v11, %v1, %v0, %v4 \n" + " srawi %r6, %r6, 1 \n" + " vaddubm %v19, %v5, %v3 \n" + " addi %r6, %r6, -1 \n" + " vperm %v10, %v1, %v0, %v2 \n" + " mtctr %r6 \n" + " lvx %v1, 0, %r4 \n" + " lvx %v0, %r9, %r4 \n" + " vavgub %v9, %v11, %v10 \n" + " vxor %v8, %v11, %v10 \n" + " add %r4, %r4, %r5 \n" + " vperm %v10, %v1, %v0, %v19 \n" + " vperm %v11, %v1, %v0, %v5 \n" + " vxor %v6, %v11, %v10 \n" + " vavgub %v7, %v11, %v10 \n" + " vor %v0, %v8, %v6 \n" + " vxor %v13, %v9, %v7 \n" + " vand %v0, %v3, %v0 \n" + " vavgub %v1, %v9, %v7 \n" + " vand %v0, %v0, %v13 \n" + " vsububm %v13, %v1, %v0 \n" + "._L41: \n" + " li %r9, 16 \n" + " lvx %v0, %r9, %r4 \n" + " lvx %v1, 0, %r4 \n" + " stvewx %v13, 0, %r3 \n" + " li %r9, 4 \n" + " vperm %v10, %v1, %v0, %v2 \n" + " stvewx %v13, %r9, %r3 \n" + " vperm %v11, %v1, %v0, %v4 \n" + " add %r4, %r4, %r5 \n" + " li %r9, 16 \n" + " vavgub %v9, %v11, %v10 \n" + " lvx %v0, %r9, %r4 \n" + " vxor %v8, %v11, %v10 \n" + " lvx %v1, 0, %r4 \n" + " vavgub %v12, %v9, %v7 \n" + " vor %v13, %v8, %v6 \n" + " add %r3, %r3, %r5 \n" + " vperm %v10, %v1, %v0, %v19 \n" + " li %r9, 4 \n" + " vperm %v11, %v1, %v0, %v5 \n" + " vand %v13, %v3, %v13 \n" + " add %r4, %r4, %r5 \n" + " vxor %v0, %v9, %v7 \n" + " vxor %v6, %v11, %v10 \n" + " vavgub %v7, %v11, %v10 \n" + " vor %v1, %v8, %v6 \n" + " vand %v13, %v13, %v0 \n" + " vxor %v0, %v9, %v7 \n" + " vand %v1, %v3, %v1 \n" + " vsububm %v13, %v12, %v13 \n" + " vand %v1, %v1, %v0 \n" + " stvewx %v13, 0, %r3 \n" + " vavgub %v0, %v9, %v7 \n" + " stvewx %v13, %r9, %r3 \n" + " add %r3, %r3, %r5 \n" + " vsububm %v13, %v0, %v1 \n" + " bdnz ._L41 \n" + " li %r9, 16 \n" + " lvx %v0, %r9, %r4 \n" + " lvx %v1, 0, %r4 \n" + " stvewx %v13, 0, %r3 \n" + " vperm %v10, %v1, %v0, %v2 \n" + " li %r9, 4 \n" + " vperm %v11, %v1, %v0, %v4 \n" + " stvewx %v13, %r9, %r3 \n" + " add %r3, %r3, %r5 \n" + " vxor %v8, %v11, %v10 \n" + " vavgub %v9, %v11, %v10 \n" + " vor %v0, %v8, %v6 \n" + " vxor %v13, %v9, %v7 \n" + " vand %v0, %v3, %v0 \n" + " vavgub %v1, %v9, %v7 \n" + " vand %v0, %v0, %v13 \n" + " vsububm %v13, %v1, %v0 \n" + " stvewx %v13, 0, %r3 \n" + " stvewx %v13, %r9, %r3 \n" + ); +} + +static void MC_avg_o_16_altivec (uint8_t * dest, uint8_t * ref, + int stride, int height) +{ + asm (" \n" + " li %r9, 15 \n" + " lvx %v0, %r9, %r4 \n" + " lvsl %v11, 0, %r4 \n" + " lvx %v1, 0, %r4 \n" + " srawi %r6, %r6, 1 \n" + " addi %r6, %r6, -1 \n" + " vperm %v0, %v1, %v0, %v11 \n" + " lvx %v13, 0, %r3 \n" + " mtctr %r6 \n" + " add %r9, %r5, %r5 \n" + " vavgub %v12, %v13, %v0 \n" + " add %r4, %r4, %r5 \n" + "._L46: \n" + " li %r11, 15 \n" + " lvx %v1, 0, %r4 \n" + " lvx %v0, %r11, %r4 \n" + " lvx %v13, %r5, %r3 \n" + " vperm %v0, %v1, %v0, %v11 \n" + " stvx %v12, 0, %r3 \n" + " add %r4, %r4, %r5 \n" + " vavgub %v12, %v13, %v0 \n" + " lvx %v1, 0, %r4 \n" + " lvx %v0, %r11, %r4 \n" + " lvx %v13, %r9, %r3 \n" + " vperm %v0, %v1, %v0, %v11 \n" + " stvx %v12, %r5, %r3 \n" + " add %r4, %r4, %r5 \n" + " vavgub %v12, %v13, %v0 \n" + " add %r3, %r3, %r9 \n" + " bdnz ._L46 \n" + " lvx %v0, %r11, %r4 \n" + " lvx %v1, 0, %r4 \n" + " lvx %v13, %r5, %r3 \n" + " vperm %v0, %v1, %v0, %v11 \n" + " stvx %v12, 0, %r3 \n" + " vavgub %v12, %v13, %v0 \n" + " stvx %v12, %r5, %r3 \n" + ); +} + +static void MC_avg_o_8_altivec (uint8_t * dest, uint8_t * ref, + int stride, int height) +{ + asm (" \n" + " lvsl %v12, 0, %r4 \n" + " li %r9, 7 \n" + " vmrghb %v12, %v12, %v12 \n" + " lvsl %v1, %r5, %r4 \n" + " lvx %v13, 0, %r4 \n" + " vpkuhum %v9, %v12, %v12 \n" + " lvx %v0, %r9, %r4 \n" + " srawi %r6, %r6, 1 \n" + " vmrghb %v1, %v1, %v1 \n" + " addi %r6, %r6, -1 \n" + " vperm %v0, %v13, %v0, %v9 \n" + " lvx %v11, 0, %r3 \n" + " mtctr %r6 \n" + " vpkuhum %v10, %v1, %v1 \n" + " add %r4, %r4, %r5 \n" + " vavgub %v12, %v11, %v0 \n" + "._L51: \n" + " li %r9, 7 \n" + " lvx %v0, %r9, %r4 \n" + " lvx %v13, 0, %r4 \n" + " lvx %v11, %r5, %r3 \n" + " stvewx %v12, 0, %r3 \n" + " vperm %v0, %v13, %v0, %v10 \n" + " li %r9, 4 \n" + " stvewx %v12, %r9, %r3 \n" + " vavgub %v1, %v11, %v0 \n" + " add %r4, %r4, %r5 \n" + " li %r9, 7 \n" + " lvx %v0, %r9, %r4 \n" + " add %r3, %r3, %r5 \n" + " lvx %v13, 0, %r4 \n" + " lvx %v11, %r5, %r3 \n" + " stvewx %v1, 0, %r3 \n" + " vperm %v0, %v13, %v0, %v9 \n" + " li %r9, 4 \n" + " stvewx %v1, %r9, %r3 \n" + " vavgub %v12, %v11, %v0 \n" + " add %r4, %r4, %r5 \n" + " add %r3, %r3, %r5 \n" + " bdnz ._L51 \n" + " li %r9, 7 \n" + " lvx %v0, %r9, %r4 \n" + " lvx %v13, 0, %r4 \n" + " lvx %v11, %r5, %r3 \n" + " stvewx %v12, 0, %r3 \n" + " vperm %v0, %v13, %v0, %v10 \n" + " li %r9, 4 \n" + " stvewx %v12, %r9, %r3 \n" + " vavgub %v1, %v11, %v0 \n" + " add %r3, %r3, %r5 \n" + " stvewx %v1, 0, %r3 \n" + " stvewx %v1, %r9, %r3 \n" + ); +} + +static void MC_avg_x_16_altivec (uint8_t * dest, uint8_t * ref, + int stride, int height) +{ + asm (" \n" + " lvsl %v8, 0, %r4 \n" + " vspltisb %v0, 1 \n" + " li %r9, 16 \n" + " lvx %v12, %r9, %r4 \n" + " vaddubm %v7, %v8, %v0 \n" + " lvx %v11, 0, %r4 \n" + " srawi %r6, %r6, 1 \n" + " vperm %v1, %v11, %v12, %v7 \n" + " addi %r6, %r6, -1 \n" + " vperm %v0, %v11, %v12, %v8 \n" + " lvx %v9, 0, %r3 \n" + " mtctr %r6 \n" + " add %r9, %r5, %r5 \n" + " vavgub %v0, %v0, %v1 \n" + " add %r4, %r4, %r5 \n" + " vavgub %v10, %v9, %v0 \n" + "._L56: \n" + " li %r11, 16 \n" + " lvx %v11, 0, %r4 \n" + " lvx %v12, %r11, %r4 \n" + " lvx %v9, %r5, %r3 \n" + " stvx %v10, 0, %r3 \n" + " vperm %v0, %v11, %v12, %v7 \n" + " add %r4, %r4, %r5 \n" + " vperm %v1, %v11, %v12, %v8 \n" + " lvx %v11, 0, %r4 \n" + " lvx %v12, %r11, %r4 \n" + " vavgub %v1, %v1, %v0 \n" + " add %r4, %r4, %r5 \n" + " vperm %v13, %v11, %v12, %v7 \n" + " vavgub %v10, %v9, %v1 \n" + " vperm %v0, %v11, %v12, %v8 \n" + " lvx %v9, %r9, %r3 \n" + " stvx %v10, %r5, %r3 \n" + " vavgub %v0, %v0, %v13 \n" + " add %r3, %r3, %r9 \n" + " vavgub %v10, %v9, %v0 \n" + " bdnz ._L56 \n" + " lvx %v12, %r11, %r4 \n" + " lvx %v11, 0, %r4 \n" + " lvx %v9, %r5, %r3 \n" + " vperm %v1, %v11, %v12, %v7 \n" + " stvx %v10, 0, %r3 \n" + " vperm %v0, %v11, %v12, %v8 \n" + " vavgub %v0, %v0, %v1 \n" + " vavgub %v10, %v9, %v0 \n" + " stvx %v10, %r5, %r3 \n" + ); +} + +static void MC_avg_x_8_altivec (uint8_t * dest, uint8_t * ref, + int stride, int height) +{ + asm (" \n" + " lvsl %v10, 0, %r4 \n" + " vspltisb %v13, 1 \n" + " li %r9, 8 \n" + " vmrghb %v10, %v10, %v10 \n" + " lvx %v11, 0, %r4 \n" + " lvx %v12, %r9, %r4 \n" + " vpkuhum %v7, %v10, %v10 \n" + " srawi %r6, %r6, 1 \n" + " lvsl %v10, %r5, %r4 \n" + " vaddubm %v6, %v7, %v13 \n" + " vperm %v0, %v11, %v12, %v7 \n" + " addi %r6, %r6, -1 \n" + " vmrghb %v10, %v10, %v10 \n" + " lvx %v9, 0, %r3 \n" + " mtctr %r6 \n" + " vperm %v1, %v11, %v12, %v6 \n" + " add %r4, %r4, %r5 \n" + " vpkuhum %v8, %v10, %v10 \n" + " vavgub %v0, %v0, %v1 \n" + " vaddubm %v13, %v8, %v13 \n" + " vavgub %v10, %v9, %v0 \n" + "._L61: \n" + " li %r9, 8 \n" + " lvx %v12, %r9, %r4 \n" + " lvx %v11, 0, %r4 \n" + " lvx %v9, %r5, %r3 \n" + " stvewx %v10, 0, %r3 \n" + " vperm %v1, %v11, %v12, %v13 \n" + " vperm %v0, %v11, %v12, %v8 \n" + " li %r9, 4 \n" + " stvewx %v10, %r9, %r3 \n" + " add %r4, %r4, %r5 \n" + " vavgub %v0, %v0, %v1 \n" + " li %r9, 8 \n" + " lvx %v12, %r9, %r4 \n" + " vavgub %v10, %v9, %v0 \n" + " lvx %v11, 0, %r4 \n" + " add %r3, %r3, %r5 \n" + " vperm %v1, %v11, %v12, %v6 \n" + " lvx %v9, %r5, %r3 \n" + " vperm %v0, %v11, %v12, %v7 \n" + " stvewx %v10, 0, %r3 \n" + " li %r9, 4 \n" + " vavgub %v0, %v0, %v1 \n" + " stvewx %v10, %r9, %r3 \n" + " add %r4, %r4, %r5 \n" + " add %r3, %r3, %r5 \n" + " vavgub %v10, %v9, %v0 \n" + " bdnz ._L61 \n" + " li %r9, 8 \n" + " lvx %v12, %r9, %r4 \n" + " lvx %v11, 0, %r4 \n" + " lvx %v9, %r5, %r3 \n" + " vperm %v1, %v11, %v12, %v13 \n" + " stvewx %v10, 0, %r3 \n" + " vperm %v0, %v11, %v12, %v8 \n" + " li %r9, 4 \n" + " stvewx %v10, %r9, %r3 \n" + " vavgub %v0, %v0, %v1 \n" + " add %r3, %r3, %r5 \n" + " vavgub %v10, %v9, %v0 \n" + " stvewx %v10, 0, %r3 \n" + " stvewx %v10, %r9, %r3 \n" + ); +} + +static void MC_avg_y_16_altivec (uint8_t * dest, uint8_t * ref, + int stride, int height) +{ + asm (" \n" + " li %r9, 15 \n" + " lvx %v1, %r9, %r4 \n" + " lvsl %v9, 0, %r4 \n" + " lvx %v13, 0, %r4 \n" + " add %r4, %r4, %r5 \n" + " vperm %v11, %v13, %v1, %v9 \n" + " li %r11, 15 \n" + " lvx %v13, 0, %r4 \n" + " lvx %v1, %r11, %r4 \n" + " srawi %r6, %r6, 1 \n" + " vperm %v10, %v13, %v1, %v9 \n" + " addi %r6, %r6, -1 \n" + " lvx %v12, 0, %r3 \n" + " mtctr %r6 \n" + " vavgub %v0, %v11, %v10 \n" + " add %r9, %r5, %r5 \n" + " add %r4, %r4, %r5 \n" + " vavgub %v0, %v12, %v0 \n" + "._L66: \n" + " li %r11, 15 \n" + " lvx %v13, 0, %r4 \n" + " lvx %v1, %r11, %r4 \n" + " lvx %v12, %r5, %r3 \n" + " vperm %v11, %v13, %v1, %v9 \n" + " stvx %v0, 0, %r3 \n" + " add %r4, %r4, %r5 \n" + " vavgub %v0, %v11, %v10 \n" + " lvx %v13, 0, %r4 \n" + " lvx %v1, %r11, %r4 \n" + " vavgub %v0, %v12, %v0 \n" + " add %r4, %r4, %r5 \n" + " lvx %v12, %r9, %r3 \n" + " vperm %v10, %v13, %v1, %v9 \n" + " stvx %v0, %r5, %r3 \n" + " vavgub %v0, %v11, %v10 \n" + " add %r3, %r3, %r9 \n" + " vavgub %v0, %v12, %v0 \n" + " bdnz ._L66 \n" + " lvx %v1, %r11, %r4 \n" + " lvx %v13, 0, %r4 \n" + " lvx %v12, %r5, %r3 \n" + " vperm %v11, %v13, %v1, %v9 \n" + " stvx %v0, 0, %r3 \n" + " vavgub %v0, %v11, %v10 \n" + " vavgub %v0, %v12, %v0 \n" + " stvx %v0, %r5, %r3 \n" + ); +} + +static void MC_avg_y_8_altivec (uint8_t * dest, uint8_t * ref, + int stride, int height) +{ + asm (" \n" + " lvsl %v12, 0, %r4 \n" + " lvsl %v9, %r5, %r4 \n" + " vmrghb %v12, %v12, %v12 \n" + " li %r9, 7 \n" + " lvx %v11, 0, %r4 \n" + " vmrghb %v9, %v9, %v9 \n" + " lvx %v13, %r9, %r4 \n" + " vpkuhum %v7, %v12, %v12 \n" + " add %r4, %r4, %r5 \n" + " vpkuhum %v8, %v9, %v9 \n" + " vperm %v12, %v11, %v13, %v7 \n" + " srawi %r6, %r6, 1 \n" + " lvx %v11, 0, %r4 \n" + " lvx %v13, %r9, %r4 \n" + " addi %r6, %r6, -1 \n" + " vperm %v9, %v11, %v13, %v8 \n" + " lvx %v10, 0, %r3 \n" + " mtctr %r6 \n" + " add %r4, %r4, %r5 \n" + " vavgub %v0, %v12, %v9 \n" + " vavgub %v1, %v10, %v0 \n" + "._L71: \n" + " li %r9, 7 \n" + " lvx %v13, %r9, %r4 \n" + " lvx %v11, 0, %r4 \n" + " lvx %v10, %r5, %r3 \n" + " stvewx %v1, 0, %r3 \n" + " vperm %v12, %v11, %v13, %v7 \n" + " li %r9, 4 \n" + " stvewx %v1, %r9, %r3 \n" + " vavgub %v0, %v12, %v9 \n" + " add %r4, %r4, %r5 \n" + " li %r9, 7 \n" + " vavgub %v1, %v10, %v0 \n" + " lvx %v13, %r9, %r4 \n" + " lvx %v11, 0, %r4 \n" + " add %r3, %r3, %r5 \n" + " vperm %v9, %v11, %v13, %v8 \n" + " lvx %v10, %r5, %r3 \n" + " stvewx %v1, 0, %r3 \n" + " vavgub %v0, %v12, %v9 \n" + " li %r9, 4 \n" + " stvewx %v1, %r9, %r3 \n" + " add %r4, %r4, %r5 \n" + " vavgub %v1, %v10, %v0 \n" + " add %r3, %r3, %r5 \n" + " bdnz ._L71 \n" + " li %r9, 7 \n" + " lvx %v13, %r9, %r4 \n" + " lvx %v11, 0, %r4 \n" + " lvx %v10, %r5, %r3 \n" + " vperm %v12, %v11, %v13, %v7 \n" + " stvewx %v1, 0, %r3 \n" + " li %r9, 4 \n" + " vavgub %v0, %v12, %v9 \n" + " stvewx %v1, %r9, %r3 \n" + " add %r3, %r3, %r5 \n" + " vavgub %v1, %v10, %v0 \n" + " stvewx %v1, 0, %r3 \n" + " stvewx %v1, %r9, %r3 \n" + ); +} + +static void MC_avg_xy_16_altivec (uint8_t * dest, uint8_t * ref, + int stride, int height) +{ + asm (" \n" + " lvsl %v4, 0, %r4 \n" + " vspltisb %v2, 1 \n" + " li %r9, 16 \n" + " lvx %v1, %r9, %r4 \n" + " vaddubm %v3, %v4, %v2 \n" + " lvx %v13, 0, %r4 \n" + " add %r4, %r4, %r5 \n" + " vperm %v10, %v13, %v1, %v3 \n" + " li %r11, 16 \n" + " vperm %v11, %v13, %v1, %v4 \n" + " srawi %r6, %r6, 1 \n" + " lvx %v13, 0, %r4 \n" + " lvx %v1, %r11, %r4 \n" + " vavgub %v9, %v11, %v10 \n" + " vxor %v8, %v11, %v10 \n" + " addi %r6, %r6, -1 \n" + " vperm %v10, %v13, %v1, %v3 \n" + " lvx %v6, 0, %r3 \n" + " mtctr %r6 \n" + " vperm %v11, %v13, %v1, %v4 \n" + " add %r9, %r5, %r5 \n" + " add %r4, %r4, %r5 \n" + " vxor %v5, %v11, %v10 \n" + " vavgub %v7, %v11, %v10 \n" + " vor %v1, %v8, %v5 \n" + " vxor %v13, %v9, %v7 \n" + " vand %v1, %v2, %v1 \n" + " vavgub %v0, %v9, %v7 \n" + " vand %v1, %v1, %v13 \n" + " vsububm %v0, %v0, %v1 \n" + " vavgub %v12, %v6, %v0 \n" + "._L76: \n" + " li %r11, 16 \n" + " lvx %v13, 0, %r4 \n" + " lvx %v1, %r11, %r4 \n" + " lvx %v6, %r5, %r3 \n" + " stvx %v12, 0, %r3 \n" + " vperm %v10, %v13, %v1, %v3 \n" + " vperm %v11, %v13, %v1, %v4 \n" + " add %r4, %r4, %r5 \n" + " lvx %v13, 0, %r4 \n" + " lvx %v1, %r11, %r4 \n" + " vavgub %v9, %v11, %v10 \n" + " vxor %v8, %v11, %v10 \n" + " add %r4, %r4, %r5 \n" + " vperm %v10, %v13, %v1, %v3 \n" + " vavgub %v12, %v9, %v7 \n" + " vperm %v11, %v13, %v1, %v4 \n" + " vor %v0, %v8, %v5 \n" + " vxor %v13, %v9, %v7 \n" + " vxor %v5, %v11, %v10 \n" + " vand %v0, %v2, %v0 \n" + " vavgub %v7, %v11, %v10 \n" + " vor %v1, %v8, %v5 \n" + " vand %v0, %v0, %v13 \n" + " vand %v1, %v2, %v1 \n" + " vxor %v13, %v9, %v7 \n" + " vsububm %v12, %v12, %v0 \n" + " vand %v1, %v1, %v13 \n" + " vavgub %v0, %v9, %v7 \n" + " vavgub %v12, %v6, %v12 \n" + " lvx %v6, %r9, %r3 \n" + " vsububm %v0, %v0, %v1 \n" + " stvx %v12, %r5, %r3 \n" + " vavgub %v12, %v6, %v0 \n" + " add %r3, %r3, %r9 \n" + " bdnz ._L76 \n" + " lvx %v1, %r11, %r4 \n" + " lvx %v13, 0, %r4 \n" + " lvx %v6, %r5, %r3 \n" + " vperm %v10, %v13, %v1, %v3 \n" + " stvx %v12, 0, %r3 \n" + " vperm %v11, %v13, %v1, %v4 \n" + " vxor %v8, %v11, %v10 \n" + " vavgub %v9, %v11, %v10 \n" + " vor %v0, %v8, %v5 \n" + " vxor %v13, %v9, %v7 \n" + " vand %v0, %v2, %v0 \n" + " vavgub %v1, %v9, %v7 \n" + " vand %v0, %v0, %v13 \n" + " vsububm %v1, %v1, %v0 \n" + " vavgub %v12, %v6, %v1 \n" + " stvx %v12, %r5, %r3 \n" + ); +} + +static void MC_avg_xy_8_altivec (uint8_t * dest, uint8_t * ref, + int stride, int height) +{ + asm (" \n" + " lvsl %v2, 0, %r4 \n" + " vspltisb %v19, 1 \n" + " lvsl %v3, %r5, %r4 \n" + " vmrghb %v2, %v2, %v2 \n" + " li %r9, 16 \n" + " vmrghb %v3, %v3, %v3 \n" + " lvx %v9, 0, %r4 \n" + " vpkuhum %v2, %v2, %v2 \n" + " lvx %v1, %r9, %r4 \n" + " vpkuhum %v3, %v3, %v3 \n" + " add %r4, %r4, %r5 \n" + " vaddubm %v18, %v2, %v19 \n" + " vperm %v11, %v9, %v1, %v2 \n" + " srawi %r6, %r6, 1 \n" + " vaddubm %v17, %v3, %v19 \n" + " addi %r6, %r6, -1 \n" + " vperm %v10, %v9, %v1, %v18 \n" + " lvx %v4, 0, %r3 \n" + " mtctr %r6 \n" + " lvx %v1, %r9, %r4 \n" + " lvx %v9, 0, %r4 \n" + " vavgub %v8, %v11, %v10 \n" + " vxor %v7, %v11, %v10 \n" + " add %r4, %r4, %r5 \n" + " vperm %v10, %v9, %v1, %v17 \n" + " vperm %v11, %v9, %v1, %v3 \n" + " vxor %v5, %v11, %v10 \n" + " vavgub %v6, %v11, %v10 \n" + " vor %v1, %v7, %v5 \n" + " vxor %v13, %v8, %v6 \n" + " vand %v1, %v19, %v1 \n" + " vavgub %v0, %v8, %v6 \n" + " vand %v1, %v1, %v13 \n" + " vsububm %v0, %v0, %v1 \n" + " vavgub %v13, %v4, %v0 \n" + "._L81: \n" + " li %r9, 16 \n" + " lvx %v1, %r9, %r4 \n" + " lvx %v9, 0, %r4 \n" + " lvx %v4, %r5, %r3 \n" + " stvewx %v13, 0, %r3 \n" + " vperm %v10, %v9, %v1, %v18 \n" + " vperm %v11, %v9, %v1, %v2 \n" + " li %r9, 4 \n" + " stvewx %v13, %r9, %r3 \n" + " vxor %v7, %v11, %v10 \n" + " add %r4, %r4, %r5 \n" + " li %r9, 16 \n" + " vavgub %v8, %v11, %v10 \n" + " lvx %v1, %r9, %r4 \n" + " vor %v0, %v7, %v5 \n" + " lvx %v9, 0, %r4 \n" + " vxor %v12, %v8, %v6 \n" + " vand %v0, %v19, %v0 \n" + " add %r3, %r3, %r5 \n" + " vperm %v10, %v9, %v1, %v17 \n" + " vavgub %v13, %v8, %v6 \n" + " li %r9, 4 \n" + " vperm %v11, %v9, %v1, %v3 \n" + " vand %v0, %v0, %v12 \n" + " add %r4, %r4, %r5 \n" + " vxor %v5, %v11, %v10 \n" + " vavgub %v6, %v11, %v10 \n" + " vor %v1, %v7, %v5 \n" + " vsububm %v13, %v13, %v0 \n" + " vxor %v0, %v8, %v6 \n" + " vand %v1, %v19, %v1 \n" + " vavgub %v13, %v4, %v13 \n" + " vand %v1, %v1, %v0 \n" + " lvx %v4, %r5, %r3 \n" + " vavgub %v0, %v8, %v6 \n" + " stvewx %v13, 0, %r3 \n" + " stvewx %v13, %r9, %r3 \n" + " vsububm %v0, %v0, %v1 \n" + " add %r3, %r3, %r5 \n" + " vavgub %v13, %v4, %v0 \n" + " bdnz ._L81 \n" + " li %r9, 16 \n" + " lvx %v1, %r9, %r4 \n" + " lvx %v9, 0, %r4 \n" + " lvx %v4, %r5, %r3 \n" + " vperm %v10, %v9, %v1, %v18 \n" + " stvewx %v13, 0, %r3 \n" + " vperm %v11, %v9, %v1, %v2 \n" + " li %r9, 4 \n" + " stvewx %v13, %r9, %r3 \n" + " vxor %v7, %v11, %v10 \n" + " add %r3, %r3, %r5 \n" + " vavgub %v8, %v11, %v10 \n" + " vor %v0, %v7, %v5 \n" + " vxor %v13, %v8, %v6 \n" + " vand %v0, %v19, %v0 \n" + " vavgub %v1, %v8, %v6 \n" + " vand %v0, %v0, %v13 \n" + " vsububm %v1, %v1, %v0 \n" + " vavgub %v13, %v4, %v1 \n" + " stvewx %v13, 0, %r3 \n" + " stvewx %v13, %r9, %r3 \n" + ); +} + +MPEG2_MC_EXTERN (altivec) + +#endif /* ARCH_PPC */ + +#else /* HOST_OS_DARWIN */ + +#ifdef ENABLE_ALTIVEC + +#include "mpeg2_internal.h" + +#define vector_s16_t vector signed short +#define vector_u16_t vector unsigned short +#define vector_s8_t vector signed char +#define vector_u8_t vector unsigned char +#define vector_s32_t vector signed int +#define vector_u32_t vector unsigned int + +void MC_put_o_16_altivec (unsigned char * dest, unsigned char * ref, + int stride, int height) +{ + vector_u8_t perm, ref0, ref1, tmp; + + perm = vec_lvsl (0, ref); + + height = (height >> 1) - 1; + + ref0 = vec_ld (0, ref); + ref1 = vec_ld (15, ref); + ref += stride; + tmp = vec_perm (ref0, ref1, perm); + + do { + ref0 = vec_ld (0, ref); + ref1 = vec_ld (15, ref); + ref += stride; + vec_st (tmp, 0, dest); + tmp = vec_perm (ref0, ref1, perm); + + ref0 = vec_ld (0, ref); + ref1 = vec_ld (15, ref); + ref += stride; + vec_st (tmp, stride, dest); + dest += 2*stride; + tmp = vec_perm (ref0, ref1, perm); + } while (--height); + + ref0 = vec_ld (0, ref); + ref1 = vec_ld (15, ref); + vec_st (tmp, 0, dest); + tmp = vec_perm (ref0, ref1, perm); + vec_st (tmp, stride, dest); +} + +void MC_put_o_8_altivec (unsigned char * dest, unsigned char * ref, + int stride, int height) +{ + vector_u8_t perm0, perm1, tmp0, tmp1, ref0, ref1; + + tmp0 = vec_lvsl (0, ref); + tmp0 = vec_mergeh (tmp0, tmp0); + perm0 = vec_pack ((vector_u16_t)tmp0, (vector_u16_t)tmp0); + tmp1 = vec_lvsl (stride, ref); + tmp1 = vec_mergeh (tmp1, tmp1); + perm1 = vec_pack ((vector_u16_t)tmp1, (vector_u16_t)tmp1); + + height = (height >> 1) - 1; + + ref0 = vec_ld (0, ref); + ref1 = vec_ld (7, ref); + ref += stride; + tmp0 = vec_perm (ref0, ref1, perm0); + + do { + ref0 = vec_ld (0, ref); + ref1 = vec_ld (7, ref); + ref += stride; + vec_ste ((vector_u32_t)tmp0, 0, (unsigned int *)dest); + vec_ste ((vector_u32_t)tmp0, 4, (unsigned int *)dest); + dest += stride; + tmp1 = vec_perm (ref0, ref1, perm1); + + ref0 = vec_ld (0, ref); + ref1 = vec_ld (7, ref); + ref += stride; + vec_ste ((vector_u32_t)tmp1, 0, (unsigned int *)dest); + vec_ste ((vector_u32_t)tmp1, 4, (unsigned int *)dest); + dest += stride; + tmp0 = vec_perm (ref0, ref1, perm0); + } while (--height); + + ref0 = vec_ld (0, ref); + ref1 = vec_ld (7, ref); + vec_ste ((vector_u32_t)tmp0, 0, (unsigned int *)dest); + vec_ste ((vector_u32_t)tmp0, 4, (unsigned int *)dest); + dest += stride; + tmp1 = vec_perm (ref0, ref1, perm1); + vec_ste ((vector_u32_t)tmp1, 0, (unsigned int *)dest); + vec_ste ((vector_u32_t)tmp1, 4, (unsigned int *)dest); +} + +void MC_put_x_16_altivec (unsigned char * dest, unsigned char * ref, + int stride, int height) +{ + vector_u8_t permA, permB, ref0, ref1, tmp; + + permA = vec_lvsl (0, ref); + permB = vec_add (permA, vec_splat_u8 (1)); + + height = (height >> 1) - 1; + + ref0 = vec_ld (0, ref); + ref1 = vec_ld (16, ref); + ref += stride; + tmp = vec_avg (vec_perm (ref0, ref1, permA), + vec_perm (ref0, ref1, permB)); + + do { + ref0 = vec_ld (0, ref); + ref1 = vec_ld (16, ref); + ref += stride; + vec_st (tmp, 0, dest); + tmp = vec_avg (vec_perm (ref0, ref1, permA), + vec_perm (ref0, ref1, permB)); + + ref0 = vec_ld (0, ref); + ref1 = vec_ld (16, ref); + ref += stride; + vec_st (tmp, stride, dest); + dest += 2*stride; + tmp = vec_avg (vec_perm (ref0, ref1, permA), + vec_perm (ref0, ref1, permB)); + } while (--height); + + ref0 = vec_ld (0, ref); + ref1 = vec_ld (16, ref); + vec_st (tmp, 0, dest); + tmp = vec_avg (vec_perm (ref0, ref1, permA), + vec_perm (ref0, ref1, permB)); + vec_st (tmp, stride, dest); +} + +void MC_put_x_8_altivec (unsigned char * dest, unsigned char * ref, + int stride, int height) +{ + vector_u8_t perm0A, perm0B, perm1A, perm1B, ones, tmp0, tmp1, ref0, ref1; + + ones = vec_splat_u8 (1); + tmp0 = vec_lvsl (0, ref); + tmp0 = vec_mergeh (tmp0, tmp0); + perm0A = vec_pack ((vector_u16_t)tmp0, (vector_u16_t)tmp0); + perm0B = vec_add (perm0A, ones); + tmp1 = vec_lvsl (stride, ref); + tmp1 = vec_mergeh (tmp1, tmp1); + perm1A = vec_pack ((vector_u16_t)tmp1, (vector_u16_t)tmp1); + perm1B = vec_add (perm1A, ones); + + height = (height >> 1) - 1; + + ref0 = vec_ld (0, ref); + ref1 = vec_ld (8, ref); + ref += stride; + tmp0 = vec_avg (vec_perm (ref0, ref1, perm0A), + vec_perm (ref0, ref1, perm0B)); + + do { + ref0 = vec_ld (0, ref); + ref1 = vec_ld (8, ref); + ref += stride; + vec_ste ((vector_u32_t)tmp0, 0, (unsigned int *)dest); + vec_ste ((vector_u32_t)tmp0, 4, (unsigned int *)dest); + dest += stride; + tmp1 = vec_avg (vec_perm (ref0, ref1, perm1A), + vec_perm (ref0, ref1, perm1B)); + + ref0 = vec_ld (0, ref); + ref1 = vec_ld (8, ref); + ref += stride; + vec_ste ((vector_u32_t)tmp1, 0, (unsigned int *)dest); + vec_ste ((vector_u32_t)tmp1, 4, (unsigned int *)dest); + dest += stride; + tmp0 = vec_avg (vec_perm (ref0, ref1, perm0A), + vec_perm (ref0, ref1, perm0B)); + } while (--height); + + ref0 = vec_ld (0, ref); + ref1 = vec_ld (8, ref); + vec_ste ((vector_u32_t)tmp0, 0, (unsigned int *)dest); + vec_ste ((vector_u32_t)tmp0, 4, (unsigned int *)dest); + dest += stride; + tmp1 = vec_avg (vec_perm (ref0, ref1, perm1A), + vec_perm (ref0, ref1, perm1B)); + vec_ste ((vector_u32_t)tmp1, 0, (unsigned int *)dest); + vec_ste ((vector_u32_t)tmp1, 4, (unsigned int *)dest); +} + +void MC_put_y_16_altivec (unsigned char * dest, unsigned char * ref, + int stride, int height) +{ + vector_u8_t perm, ref0, ref1, tmp0, tmp1, tmp; + + perm = vec_lvsl (0, ref); + + height = (height >> 1) - 1; + + ref0 = vec_ld (0, ref); + ref1 = vec_ld (15, ref); + ref += stride; + tmp0 = vec_perm (ref0, ref1, perm); + ref0 = vec_ld (0, ref); + ref1 = vec_ld (15, ref); + ref += stride; + tmp1 = vec_perm (ref0, ref1, perm); + tmp = vec_avg (tmp0, tmp1); + + do { + ref0 = vec_ld (0, ref); + ref1 = vec_ld (15, ref); + ref += stride; + vec_st (tmp, 0, dest); + tmp0 = vec_perm (ref0, ref1, perm); + tmp = vec_avg (tmp0, tmp1); + + ref0 = vec_ld (0, ref); + ref1 = vec_ld (15, ref); + ref += stride; + vec_st (tmp, stride, dest); + dest += 2*stride; + tmp1 = vec_perm (ref0, ref1, perm); + tmp = vec_avg (tmp0, tmp1); + } while (--height); + + ref0 = vec_ld (0, ref); + ref1 = vec_ld (15, ref); + vec_st (tmp, 0, dest); + tmp0 = vec_perm (ref0, ref1, perm); + tmp = vec_avg (tmp0, tmp1); + vec_st (tmp, stride, dest); +} + +void MC_put_y_8_altivec (unsigned char * dest, unsigned char * ref, + int stride, int height) +{ + vector_u8_t perm0, perm1, tmp0, tmp1, tmp, ref0, ref1; + + tmp0 = vec_lvsl (0, ref); + tmp0 = vec_mergeh (tmp0, tmp0); + perm0 = vec_pack ((vector_u16_t)tmp0, (vector_u16_t)tmp0); + tmp1 = vec_lvsl (stride, ref); + tmp1 = vec_mergeh (tmp1, tmp1); + perm1 = vec_pack ((vector_u16_t)tmp1, (vector_u16_t)tmp1); + + height = (height >> 1) - 1; + + ref0 = vec_ld (0, ref); + ref1 = vec_ld (7, ref); + ref += stride; + tmp0 = vec_perm (ref0, ref1, perm0); + ref0 = vec_ld (0, ref); + ref1 = vec_ld (7, ref); + ref += stride; + tmp1 = vec_perm (ref0, ref1, perm1); + tmp = vec_avg (tmp0, tmp1); + + do { + ref0 = vec_ld (0, ref); + ref1 = vec_ld (7, ref); + ref += stride; + vec_ste ((vector_u32_t)tmp, 0, (unsigned int *)dest); + vec_ste ((vector_u32_t)tmp, 4, (unsigned int *)dest); + dest += stride; + tmp0 = vec_perm (ref0, ref1, perm0); + tmp = vec_avg (tmp0, tmp1); + + ref0 = vec_ld (0, ref); + ref1 = vec_ld (7, ref); + ref += stride; + vec_ste ((vector_u32_t)tmp, 0, (unsigned int *)dest); + vec_ste ((vector_u32_t)tmp, 4, (unsigned int *)dest); + dest += stride; + tmp1 = vec_perm (ref0, ref1, perm1); + tmp = vec_avg (tmp0, tmp1); + } while (--height); + + ref0 = vec_ld (0, ref); + ref1 = vec_ld (7, ref); + vec_ste ((vector_u32_t)tmp, 0, (unsigned int *)dest); + vec_ste ((vector_u32_t)tmp, 4, (unsigned int *)dest); + dest += stride; + tmp0 = vec_perm (ref0, ref1, perm0); + tmp = vec_avg (tmp0, tmp1); + vec_ste ((vector_u32_t)tmp, 0, (unsigned int *)dest); + vec_ste ((vector_u32_t)tmp, 4, (unsigned int *)dest); +} + +void MC_put_xy_16_altivec (unsigned char * dest, unsigned char * ref, + int stride, int height) +{ + vector_u8_t permA, permB, ref0, ref1, A, B, avg0, avg1, xor0, xor1, tmp; + vector_u8_t ones; + + ones = vec_splat_u8 (1); + permA = vec_lvsl (0, ref); + permB = vec_add (permA, ones); + + height = (height >> 1) - 1; + + ref0 = vec_ld (0, ref); + ref1 = vec_ld (16, ref); + ref += stride; + A = vec_perm (ref0, ref1, permA); + B = vec_perm (ref0, ref1, permB); + avg0 = vec_avg (A, B); + xor0 = vec_xor (A, B); + + ref0 = vec_ld (0, ref); + ref1 = vec_ld (16, ref); + ref += stride; + A = vec_perm (ref0, ref1, permA); + B = vec_perm (ref0, ref1, permB); + avg1 = vec_avg (A, B); + xor1 = vec_xor (A, B); + tmp = vec_sub (vec_avg (avg0, avg1), + vec_and (vec_and (ones, vec_or (xor0, xor1)), + vec_xor (avg0, avg1))); + + + do { + ref0 = vec_ld (0, ref); + ref1 = vec_ld (16, ref); + ref += stride; + vec_st (tmp, 0, dest); + A = vec_perm (ref0, ref1, permA); + B = vec_perm (ref0, ref1, permB); + avg0 = vec_avg (A, B); + xor0 = vec_xor (A, B); + tmp = vec_sub (vec_avg (avg0, avg1), + vec_and (vec_and (ones, vec_or (xor0, xor1)), + vec_xor (avg0, avg1))); + + ref0 = vec_ld (0, ref); + ref1 = vec_ld (16, ref); + ref += stride; + vec_st (tmp, stride, dest); + dest += 2*stride; + A = vec_perm (ref0, ref1, permA); + B = vec_perm (ref0, ref1, permB); + avg1 = vec_avg (A, B); + xor1 = vec_xor (A, B); + tmp = vec_sub (vec_avg (avg0, avg1), + vec_and (vec_and (ones, vec_or (xor0, xor1)), + vec_xor (avg0, avg1))); + } while (--height); + + ref0 = vec_ld (0, ref); + ref1 = vec_ld (16, ref); + vec_st (tmp, 0, dest); + A = vec_perm (ref0, ref1, permA); + B = vec_perm (ref0, ref1, permB); + avg0 = vec_avg (A, B); + xor0 = vec_xor (A, B); + tmp = vec_sub (vec_avg (avg0, avg1), + vec_and (vec_and (ones, vec_or (xor0, xor1)), + vec_xor (avg0, avg1))); + vec_st (tmp, stride, dest); +} + +void MC_put_xy_8_altivec (unsigned char * dest, unsigned char * ref, + int stride, int height) +{ + vector_u8_t perm0A, perm0B, perm1A, perm1B, ref0, ref1, A, B; + vector_u8_t avg0, avg1, xor0, xor1, tmp, ones; + + ones = vec_splat_u8 (1); + perm0A = vec_lvsl (0, ref); + perm0A = vec_mergeh (perm0A, perm0A); + perm0A = vec_pack ((vector_u16_t)perm0A, (vector_u16_t)perm0A); + perm0B = vec_add (perm0A, ones); + perm1A = vec_lvsl (stride, ref); + perm1A = vec_mergeh (perm1A, perm1A); + perm1A = vec_pack ((vector_u16_t)perm1A, (vector_u16_t)perm1A); + perm1B = vec_add (perm1A, ones); + + height = (height >> 1) - 1; + + ref0 = vec_ld (0, ref); + ref1 = vec_ld (16, ref); + ref += stride; + A = vec_perm (ref0, ref1, perm0A); + B = vec_perm (ref0, ref1, perm0B); + avg0 = vec_avg (A, B); + xor0 = vec_xor (A, B); + + ref0 = vec_ld (0, ref); + ref1 = vec_ld (16, ref); + ref += stride; + A = vec_perm (ref0, ref1, perm1A); + B = vec_perm (ref0, ref1, perm1B); + avg1 = vec_avg (A, B); + xor1 = vec_xor (A, B); + tmp = vec_sub (vec_avg (avg0, avg1), + vec_and (vec_and (ones, vec_or (xor0, xor1)), + vec_xor (avg0, avg1))); + + + do { + ref0 = vec_ld (0, ref); + ref1 = vec_ld (16, ref); + ref += stride; + vec_ste ((vector_u32_t)tmp, 0, (unsigned int *)dest); + vec_ste ((vector_u32_t)tmp, 4, (unsigned int *)dest); + dest += stride; + A = vec_perm (ref0, ref1, perm0A); + B = vec_perm (ref0, ref1, perm0B); + avg0 = vec_avg (A, B); + xor0 = vec_xor (A, B); + tmp = vec_sub (vec_avg (avg0, avg1), + vec_and (vec_and (ones, vec_or (xor0, xor1)), + vec_xor (avg0, avg1))); + + ref0 = vec_ld (0, ref); + ref1 = vec_ld (16, ref); + ref += stride; + vec_ste ((vector_u32_t)tmp, 0, (unsigned int *)dest); + vec_ste ((vector_u32_t)tmp, 4, (unsigned int *)dest); + dest += stride; + A = vec_perm (ref0, ref1, perm1A); + B = vec_perm (ref0, ref1, perm1B); + avg1 = vec_avg (A, B); + xor1 = vec_xor (A, B); + tmp = vec_sub (vec_avg (avg0, avg1), + vec_and (vec_and (ones, vec_or (xor0, xor1)), + vec_xor (avg0, avg1))); + } while (--height); + + ref0 = vec_ld (0, ref); + ref1 = vec_ld (16, ref); + vec_ste ((vector_u32_t)tmp, 0, (unsigned int *)dest); + vec_ste ((vector_u32_t)tmp, 4, (unsigned int *)dest); + dest += stride; + A = vec_perm (ref0, ref1, perm0A); + B = vec_perm (ref0, ref1, perm0B); + avg0 = vec_avg (A, B); + xor0 = vec_xor (A, B); + tmp = vec_sub (vec_avg (avg0, avg1), + vec_and (vec_and (ones, vec_or (xor0, xor1)), + vec_xor (avg0, avg1))); + vec_ste ((vector_u32_t)tmp, 0, (unsigned int *)dest); + vec_ste ((vector_u32_t)tmp, 4, (unsigned int *)dest); +} + +#if 0 +void MC_put_xy_8_altivec (unsigned char * dest, unsigned char * ref, + int stride, int height) +{ + vector_u8_t permA, permB, ref0, ref1, A, B, C, D, tmp, zero, ones; + vector_u16_t splat2, temp; + + ones = vec_splat_u8 (1); + permA = vec_lvsl (0, ref); + permB = vec_add (permA, ones); + + zero = vec_splat_u8 (0); + splat2 = vec_splat_u16 (2); + + do { + ref0 = vec_ld (0, ref); + ref1 = vec_ld (16, ref); + ref += stride; + A = vec_perm (ref0, ref1, permA); + B = vec_perm (ref0, ref1, permB); + ref0 = vec_ld (0, ref); + ref1 = vec_ld (16, ref); + C = vec_perm (ref0, ref1, permA); + D = vec_perm (ref0, ref1, permB); + + temp = vec_add (vec_add ((vector_u16_t)vec_mergeh (zero, A), + (vector_u16_t)vec_mergeh (zero, B)), + vec_add ((vector_u16_t)vec_mergeh (zero, C), + (vector_u16_t)vec_mergeh (zero, D))); + temp = vec_sr (vec_add (temp, splat2), splat2); + tmp = vec_pack (temp, temp); + + vec_st (tmp, 0, dest); + dest += stride; + tmp = vec_avg (vec_perm (ref0, ref1, permA), + vec_perm (ref0, ref1, permB)); + } while (--height); +} +#endif + +void MC_avg_o_16_altivec (unsigned char * dest, unsigned char * ref, + int stride, int height) +{ + vector_u8_t perm, ref0, ref1, tmp, prev; + + perm = vec_lvsl (0, ref); + + height = (height >> 1) - 1; + + ref0 = vec_ld (0, ref); + ref1 = vec_ld (15, ref); + ref += stride; + prev = vec_ld (0, dest); + tmp = vec_avg (prev, vec_perm (ref0, ref1, perm)); + + do { + ref0 = vec_ld (0, ref); + ref1 = vec_ld (15, ref); + ref += stride; + prev = vec_ld (stride, dest); + vec_st (tmp, 0, dest); + tmp = vec_avg (prev, vec_perm (ref0, ref1, perm)); + + ref0 = vec_ld (0, ref); + ref1 = vec_ld (15, ref); + ref += stride; + prev = vec_ld (2*stride, dest); + vec_st (tmp, stride, dest); + dest += 2*stride; + tmp = vec_avg (prev, vec_perm (ref0, ref1, perm)); + } while (--height); + + ref0 = vec_ld (0, ref); + ref1 = vec_ld (15, ref); + prev = vec_ld (stride, dest); + vec_st (tmp, 0, dest); + tmp = vec_avg (prev, vec_perm (ref0, ref1, perm)); + vec_st (tmp, stride, dest); +} + +void MC_avg_o_8_altivec (unsigned char * dest, unsigned char * ref, + int stride, int height) +{ + vector_u8_t perm0, perm1, tmp0, tmp1, ref0, ref1, prev; + + tmp0 = vec_lvsl (0, ref); + tmp0 = vec_mergeh (tmp0, tmp0); + perm0 = vec_pack ((vector_u16_t)tmp0, (vector_u16_t)tmp0); + tmp1 = vec_lvsl (stride, ref); + tmp1 = vec_mergeh (tmp1, tmp1); + perm1 = vec_pack ((vector_u16_t)tmp1, (vector_u16_t)tmp1); + + height = (height >> 1) - 1; + + ref0 = vec_ld (0, ref); + ref1 = vec_ld (7, ref); + ref += stride; + prev = vec_ld (0, dest); + tmp0 = vec_avg (prev, vec_perm (ref0, ref1, perm0)); + + do { + ref0 = vec_ld (0, ref); + ref1 = vec_ld (7, ref); + ref += stride; + prev = vec_ld (stride, dest); + vec_ste ((vector_u32_t)tmp0, 0, (unsigned int *)dest); + vec_ste ((vector_u32_t)tmp0, 4, (unsigned int *)dest); + dest += stride; + tmp1 = vec_avg (prev, vec_perm (ref0, ref1, perm1)); + + ref0 = vec_ld (0, ref); + ref1 = vec_ld (7, ref); + ref += stride; + prev = vec_ld (stride, dest); + vec_ste ((vector_u32_t)tmp1, 0, (unsigned int *)dest); + vec_ste ((vector_u32_t)tmp1, 4, (unsigned int *)dest); + dest += stride; + tmp0 = vec_avg (prev, vec_perm (ref0, ref1, perm0)); + } while (--height); + + ref0 = vec_ld (0, ref); + ref1 = vec_ld (7, ref); + prev = vec_ld (stride, dest); + vec_ste ((vector_u32_t)tmp0, 0, (unsigned int *)dest); + vec_ste ((vector_u32_t)tmp0, 4, (unsigned int *)dest); + dest += stride; + tmp1 = vec_avg (prev, vec_perm (ref0, ref1, perm1)); + vec_ste ((vector_u32_t)tmp1, 0, (unsigned int *)dest); + vec_ste ((vector_u32_t)tmp1, 4, (unsigned int *)dest); +} + +void MC_avg_x_16_altivec (unsigned char * dest, unsigned char * ref, + int stride, int height) +{ + vector_u8_t permA, permB, ref0, ref1, tmp, prev; + + permA = vec_lvsl (0, ref); + permB = vec_add (permA, vec_splat_u8 (1)); + + height = (height >> 1) - 1; + + ref0 = vec_ld (0, ref); + ref1 = vec_ld (16, ref); + prev = vec_ld (0, dest); + ref += stride; + tmp = vec_avg (prev, vec_avg (vec_perm (ref0, ref1, permA), + vec_perm (ref0, ref1, permB))); + + do { + ref0 = vec_ld (0, ref); + ref1 = vec_ld (16, ref); + ref += stride; + prev = vec_ld (stride, dest); + vec_st (tmp, 0, dest); + tmp = vec_avg (prev, vec_avg (vec_perm (ref0, ref1, permA), + vec_perm (ref0, ref1, permB))); + + ref0 = vec_ld (0, ref); + ref1 = vec_ld (16, ref); + ref += stride; + prev = vec_ld (2*stride, dest); + vec_st (tmp, stride, dest); + dest += 2*stride; + tmp = vec_avg (prev, vec_avg (vec_perm (ref0, ref1, permA), + vec_perm (ref0, ref1, permB))); + } while (--height); + + ref0 = vec_ld (0, ref); + ref1 = vec_ld (16, ref); + prev = vec_ld (stride, dest); + vec_st (tmp, 0, dest); + tmp = vec_avg (prev, vec_avg (vec_perm (ref0, ref1, permA), + vec_perm (ref0, ref1, permB))); + vec_st (tmp, stride, dest); +} + +void MC_avg_x_8_altivec (unsigned char * dest, unsigned char * ref, + int stride, int height) +{ + vector_u8_t perm0A, perm0B, perm1A, perm1B, ones, tmp0, tmp1, ref0, ref1; + vector_u8_t prev; + + ones = vec_splat_u8 (1); + tmp0 = vec_lvsl (0, ref); + tmp0 = vec_mergeh (tmp0, tmp0); + perm0A = vec_pack ((vector_u16_t)tmp0, (vector_u16_t)tmp0); + perm0B = vec_add (perm0A, ones); + tmp1 = vec_lvsl (stride, ref); + tmp1 = vec_mergeh (tmp1, tmp1); + perm1A = vec_pack ((vector_u16_t)tmp1, (vector_u16_t)tmp1); + perm1B = vec_add (perm1A, ones); + + height = (height >> 1) - 1; + + ref0 = vec_ld (0, ref); + ref1 = vec_ld (8, ref); + prev = vec_ld (0, dest); + ref += stride; + tmp0 = vec_avg (prev, vec_avg (vec_perm (ref0, ref1, perm0A), + vec_perm (ref0, ref1, perm0B))); + + do { + ref0 = vec_ld (0, ref); + ref1 = vec_ld (8, ref); + ref += stride; + prev = vec_ld (stride, dest); + vec_ste ((vector_u32_t)tmp0, 0, (unsigned int *)dest); + vec_ste ((vector_u32_t)tmp0, 4, (unsigned int *)dest); + dest += stride; + tmp1 = vec_avg (prev, vec_avg (vec_perm (ref0, ref1, perm1A), + vec_perm (ref0, ref1, perm1B))); + + ref0 = vec_ld (0, ref); + ref1 = vec_ld (8, ref); + ref += stride; + prev = vec_ld (stride, dest); + vec_ste ((vector_u32_t)tmp1, 0, (unsigned int *)dest); + vec_ste ((vector_u32_t)tmp1, 4, (unsigned int *)dest); + dest += stride; + tmp0 = vec_avg (prev, vec_avg (vec_perm (ref0, ref1, perm0A), + vec_perm (ref0, ref1, perm0B))); + } while (--height); + + ref0 = vec_ld (0, ref); + ref1 = vec_ld (8, ref); + prev = vec_ld (stride, dest); + vec_ste ((vector_u32_t)tmp0, 0, (unsigned int *)dest); + vec_ste ((vector_u32_t)tmp0, 4, (unsigned int *)dest); + dest += stride; + tmp1 = vec_avg (prev, vec_avg (vec_perm (ref0, ref1, perm1A), + vec_perm (ref0, ref1, perm1B))); + vec_ste ((vector_u32_t)tmp1, 0, (unsigned int *)dest); + vec_ste ((vector_u32_t)tmp1, 4, (unsigned int *)dest); +} + +void MC_avg_y_16_altivec (unsigned char * dest, unsigned char * ref, + int stride, int height) +{ + vector_u8_t perm, ref0, ref1, tmp0, tmp1, tmp, prev; + + perm = vec_lvsl (0, ref); + + height = (height >> 1) - 1; + + ref0 = vec_ld (0, ref); + ref1 = vec_ld (15, ref); + ref += stride; + tmp0 = vec_perm (ref0, ref1, perm); + ref0 = vec_ld (0, ref); + ref1 = vec_ld (15, ref); + ref += stride; + prev = vec_ld (0, dest); + tmp1 = vec_perm (ref0, ref1, perm); + tmp = vec_avg (prev, vec_avg (tmp0, tmp1)); + + do { + ref0 = vec_ld (0, ref); + ref1 = vec_ld (15, ref); + ref += stride; + prev = vec_ld (stride, dest); + vec_st (tmp, 0, dest); + tmp0 = vec_perm (ref0, ref1, perm); + tmp = vec_avg (prev, vec_avg (tmp0, tmp1)); + + ref0 = vec_ld (0, ref); + ref1 = vec_ld (15, ref); + ref += stride; + prev = vec_ld (2*stride, dest); + vec_st (tmp, stride, dest); + dest += 2*stride; + tmp1 = vec_perm (ref0, ref1, perm); + tmp = vec_avg (prev, vec_avg (tmp0, tmp1)); + } while (--height); + + ref0 = vec_ld (0, ref); + ref1 = vec_ld (15, ref); + prev = vec_ld (stride, dest); + vec_st (tmp, 0, dest); + tmp0 = vec_perm (ref0, ref1, perm); + tmp = vec_avg (prev, vec_avg (tmp0, tmp1)); + vec_st (tmp, stride, dest); +} + +void MC_avg_y_8_altivec (unsigned char * dest, unsigned char * ref, + int stride, int height) +{ + vector_u8_t perm0, perm1, tmp0, tmp1, tmp, ref0, ref1, prev; + + tmp0 = vec_lvsl (0, ref); + tmp0 = vec_mergeh (tmp0, tmp0); + perm0 = vec_pack ((vector_u16_t)tmp0, (vector_u16_t)tmp0); + tmp1 = vec_lvsl (stride, ref); + tmp1 = vec_mergeh (tmp1, tmp1); + perm1 = vec_pack ((vector_u16_t)tmp1, (vector_u16_t)tmp1); + + height = (height >> 1) - 1; + + ref0 = vec_ld (0, ref); + ref1 = vec_ld (7, ref); + ref += stride; + tmp0 = vec_perm (ref0, ref1, perm0); + ref0 = vec_ld (0, ref); + ref1 = vec_ld (7, ref); + ref += stride; + prev = vec_ld (0, dest); + tmp1 = vec_perm (ref0, ref1, perm1); + tmp = vec_avg (prev, vec_avg (tmp0, tmp1)); + + do { + ref0 = vec_ld (0, ref); + ref1 = vec_ld (7, ref); + ref += stride; + prev = vec_ld (stride, dest); + vec_ste ((vector_u32_t)tmp, 0, (unsigned int *)dest); + vec_ste ((vector_u32_t)tmp, 4, (unsigned int *)dest); + dest += stride; + tmp0 = vec_perm (ref0, ref1, perm0); + tmp = vec_avg (prev, vec_avg (tmp0, tmp1)); + + ref0 = vec_ld (0, ref); + ref1 = vec_ld (7, ref); + ref += stride; + prev = vec_ld (stride, dest); + vec_ste ((vector_u32_t)tmp, 0, (unsigned int *)dest); + vec_ste ((vector_u32_t)tmp, 4, (unsigned int *)dest); + dest += stride; + tmp1 = vec_perm (ref0, ref1, perm1); + tmp = vec_avg (prev, vec_avg (tmp0, tmp1)); + } while (--height); + + ref0 = vec_ld (0, ref); + ref1 = vec_ld (7, ref); + prev = vec_ld (stride, dest); + vec_ste ((vector_u32_t)tmp, 0, (unsigned int *)dest); + vec_ste ((vector_u32_t)tmp, 4, (unsigned int *)dest); + dest += stride; + tmp0 = vec_perm (ref0, ref1, perm0); + tmp = vec_avg (prev, vec_avg (tmp0, tmp1)); + vec_ste ((vector_u32_t)tmp, 0, (unsigned int *)dest); + vec_ste ((vector_u32_t)tmp, 4, (unsigned int *)dest); +} + +void MC_avg_xy_16_altivec (unsigned char * dest, unsigned char * ref, + int stride, int height) +{ + vector_u8_t permA, permB, ref0, ref1, A, B, avg0, avg1, xor0, xor1, tmp; + vector_u8_t ones, prev; + + ones = vec_splat_u8 (1); + permA = vec_lvsl (0, ref); + permB = vec_add (permA, ones); + + height = (height >> 1) - 1; + + ref0 = vec_ld (0, ref); + ref1 = vec_ld (16, ref); + ref += stride; + A = vec_perm (ref0, ref1, permA); + B = vec_perm (ref0, ref1, permB); + avg0 = vec_avg (A, B); + xor0 = vec_xor (A, B); + + ref0 = vec_ld (0, ref); + ref1 = vec_ld (16, ref); + ref += stride; + prev = vec_ld (0, dest); + A = vec_perm (ref0, ref1, permA); + B = vec_perm (ref0, ref1, permB); + avg1 = vec_avg (A, B); + xor1 = vec_xor (A, B); + tmp = vec_avg (prev, vec_sub (vec_avg (avg0, avg1), + vec_and (vec_and (ones, vec_or (xor0, xor1)), + vec_xor (avg0, avg1)))); + + + do { + ref0 = vec_ld (0, ref); + ref1 = vec_ld (16, ref); + ref += stride; + prev = vec_ld (stride, dest); + vec_st (tmp, 0, dest); + A = vec_perm (ref0, ref1, permA); + B = vec_perm (ref0, ref1, permB); + avg0 = vec_avg (A, B); + xor0 = vec_xor (A, B); + tmp = vec_avg (prev, + vec_sub (vec_avg (avg0, avg1), + vec_and (vec_and (ones, vec_or (xor0, xor1)), + vec_xor (avg0, avg1)))); + + ref0 = vec_ld (0, ref); + ref1 = vec_ld (16, ref); + ref += stride; + prev = vec_ld (2*stride, dest); + vec_st (tmp, stride, dest); + dest += 2*stride; + A = vec_perm (ref0, ref1, permA); + B = vec_perm (ref0, ref1, permB); + avg1 = vec_avg (A, B); + xor1 = vec_xor (A, B); + tmp = vec_avg (prev, + vec_sub (vec_avg (avg0, avg1), + vec_and (vec_and (ones, vec_or (xor0, xor1)), + vec_xor (avg0, avg1)))); + } while (--height); + + ref0 = vec_ld (0, ref); + ref1 = vec_ld (16, ref); + prev = vec_ld (stride, dest); + vec_st (tmp, 0, dest); + A = vec_perm (ref0, ref1, permA); + B = vec_perm (ref0, ref1, permB); + avg0 = vec_avg (A, B); + xor0 = vec_xor (A, B); + tmp = vec_avg (prev, vec_sub (vec_avg (avg0, avg1), + vec_and (vec_and (ones, vec_or (xor0, xor1)), + vec_xor (avg0, avg1)))); + vec_st (tmp, stride, dest); +} + +void MC_avg_xy_8_altivec (unsigned char * dest, unsigned char * ref, + int stride, int height) +{ + vector_u8_t perm0A, perm0B, perm1A, perm1B, ref0, ref1, A, B; + vector_u8_t avg0, avg1, xor0, xor1, tmp, ones, prev; + + ones = vec_splat_u8 (1); + perm0A = vec_lvsl (0, ref); + perm0A = vec_mergeh (perm0A, perm0A); + perm0A = vec_pack ((vector_u16_t)perm0A, (vector_u16_t)perm0A); + perm0B = vec_add (perm0A, ones); + perm1A = vec_lvsl (stride, ref); + perm1A = vec_mergeh (perm1A, perm1A); + perm1A = vec_pack ((vector_u16_t)perm1A, (vector_u16_t)perm1A); + perm1B = vec_add (perm1A, ones); + + height = (height >> 1) - 1; + + ref0 = vec_ld (0, ref); + ref1 = vec_ld (16, ref); + ref += stride; + A = vec_perm (ref0, ref1, perm0A); + B = vec_perm (ref0, ref1, perm0B); + avg0 = vec_avg (A, B); + xor0 = vec_xor (A, B); + + ref0 = vec_ld (0, ref); + ref1 = vec_ld (16, ref); + ref += stride; + prev = vec_ld (0, dest); + A = vec_perm (ref0, ref1, perm1A); + B = vec_perm (ref0, ref1, perm1B); + avg1 = vec_avg (A, B); + xor1 = vec_xor (A, B); + tmp = vec_avg (prev, vec_sub (vec_avg (avg0, avg1), + vec_and (vec_and (ones, vec_or (xor0, xor1)), + vec_xor (avg0, avg1)))); + + + do { + ref0 = vec_ld (0, ref); + ref1 = vec_ld (16, ref); + ref += stride; + prev = vec_ld (stride, dest); + vec_ste ((vector_u32_t)tmp, 0, (unsigned int *)dest); + vec_ste ((vector_u32_t)tmp, 4, (unsigned int *)dest); + dest += stride; + A = vec_perm (ref0, ref1, perm0A); + B = vec_perm (ref0, ref1, perm0B); + avg0 = vec_avg (A, B); + xor0 = vec_xor (A, B); + tmp = vec_avg (prev, + vec_sub (vec_avg (avg0, avg1), + vec_and (vec_and (ones, vec_or (xor0, xor1)), + vec_xor (avg0, avg1)))); + + ref0 = vec_ld (0, ref); + ref1 = vec_ld (16, ref); + ref += stride; + prev = vec_ld (stride, dest); + vec_ste ((vector_u32_t)tmp, 0, (unsigned int *)dest); + vec_ste ((vector_u32_t)tmp, 4, (unsigned int *)dest); + dest += stride; + A = vec_perm (ref0, ref1, perm1A); + B = vec_perm (ref0, ref1, perm1B); + avg1 = vec_avg (A, B); + xor1 = vec_xor (A, B); + tmp = vec_avg (prev, + vec_sub (vec_avg (avg0, avg1), + vec_and (vec_and (ones, vec_or (xor0, xor1)), + vec_xor (avg0, avg1)))); + } while (--height); + + ref0 = vec_ld (0, ref); + ref1 = vec_ld (16, ref); + prev = vec_ld (stride, dest); + vec_ste ((vector_u32_t)tmp, 0, (unsigned int *)dest); + vec_ste ((vector_u32_t)tmp, 4, (unsigned int *)dest); + dest += stride; + A = vec_perm (ref0, ref1, perm0A); + B = vec_perm (ref0, ref1, perm0B); + avg0 = vec_avg (A, B); + xor0 = vec_xor (A, B); + tmp = vec_avg (prev, vec_sub (vec_avg (avg0, avg1), + vec_and (vec_and (ones, vec_or (xor0, xor1)), + vec_xor (avg0, avg1)))); + vec_ste ((vector_u32_t)tmp, 0, (unsigned int *)dest); + vec_ste ((vector_u32_t)tmp, 4, (unsigned int *)dest); +} + +MPEG2_MC_EXTERN (altivec) + +#endif /* ENABLE_ALTIVEC */ + +#endif /* HOST_OS_DARWIN */ + diff --git a/src/video_dec/libmpeg2/motion_comp_mlib.c b/src/video_dec/libmpeg2/motion_comp_mlib.c new file mode 100644 index 000000000..1a37070ae --- /dev/null +++ b/src/video_dec/libmpeg2/motion_comp_mlib.c @@ -0,0 +1,181 @@ +/* + * motion_comp_mlib.c + * Copyright (C) 2000-2002 HÃ¥kan Hjort <d95hjort@dtek.chalmers.se> + * + * This file is part of mpeg2dec, a free MPEG-2 video stream decoder. + * See http://libmpeg2.sourceforge.net/ for updates. + * + * mpeg2dec is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * mpeg2dec is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +#include "config.h" + +#ifdef LIBMPEG2_MLIB + +#include <mlib_types.h> +#include <mlib_status.h> +#include <mlib_sys.h> +#include <mlib_video.h> +#include <inttypes.h> + +#include "mpeg2_internal.h" + +static void MC_put_o_16_mlib (uint8_t * dest, uint8_t * ref, + int stride, int height) +{ + if (height == 16) + mlib_VideoCopyRef_U8_U8_16x16 (dest, ref, stride); + else + mlib_VideoCopyRef_U8_U8_16x8 (dest, ref, stride); +} + +static void MC_put_x_16_mlib (uint8_t * dest, uint8_t * ref, + int stride, int height) +{ + if (height == 16) + mlib_VideoInterpX_U8_U8_16x16 (dest, ref, stride, stride); + else + mlib_VideoInterpX_U8_U8_16x8 (dest, ref, stride, stride); +} + +static void MC_put_y_16_mlib (uint8_t * dest, uint8_t * ref, + int stride, int height) +{ + if (height == 16) + mlib_VideoInterpY_U8_U8_16x16 (dest, ref, stride, stride); + else + mlib_VideoInterpY_U8_U8_16x8 (dest, ref, stride, stride); +} + +static void MC_put_xy_16_mlib (uint8_t * dest, uint8_t * ref, + int stride, int height) +{ + if (height == 16) + mlib_VideoInterpXY_U8_U8_16x16 (dest, ref, stride, stride); + else + mlib_VideoInterpXY_U8_U8_16x8 (dest, ref, stride, stride); +} + +static void MC_put_o_8_mlib (uint8_t * dest, uint8_t * ref, + int stride, int height) +{ + if (height == 8) + mlib_VideoCopyRef_U8_U8_8x8 (dest, ref, stride); + else + mlib_VideoCopyRef_U8_U8_8x4 (dest, ref, stride); +} + +static void MC_put_x_8_mlib (uint8_t * dest, uint8_t * ref, + int stride, int height) +{ + if (height == 8) + mlib_VideoInterpX_U8_U8_8x8 (dest, ref, stride, stride); + else + mlib_VideoInterpX_U8_U8_8x4 (dest, ref, stride, stride); +} + +static void MC_put_y_8_mlib (uint8_t * dest, uint8_t * ref, + int stride, int height) +{ + if (height == 8) + mlib_VideoInterpY_U8_U8_8x8 (dest, ref, stride, stride); + else + mlib_VideoInterpY_U8_U8_8x4 (dest, ref, stride, stride); +} + +static void MC_put_xy_8_mlib (uint8_t * dest, uint8_t * ref, + int stride, int height) +{ + if (height == 8) + mlib_VideoInterpXY_U8_U8_8x8 (dest, ref, stride, stride); + else + mlib_VideoInterpXY_U8_U8_8x4 (dest, ref, stride, stride); +} + +static void MC_avg_o_16_mlib (uint8_t * dest, uint8_t * ref, + int stride, int height) +{ + if (height == 16) + mlib_VideoCopyRefAve_U8_U8_16x16 (dest, ref, stride); + else + mlib_VideoCopyRefAve_U8_U8_16x8 (dest, ref, stride); +} + +static void MC_avg_x_16_mlib (uint8_t * dest, uint8_t * ref, + int stride, int height) +{ + if (height == 16) + mlib_VideoInterpAveX_U8_U8_16x16 (dest, ref, stride, stride); + else + mlib_VideoInterpAveX_U8_U8_16x8 (dest, ref, stride, stride); +} + +static void MC_avg_y_16_mlib (uint8_t * dest, uint8_t * ref, + int stride, int height) +{ + if (height == 16) + mlib_VideoInterpAveY_U8_U8_16x16 (dest, ref, stride, stride); + else + mlib_VideoInterpAveY_U8_U8_16x8 (dest, ref, stride, stride); +} + +static void MC_avg_xy_16_mlib (uint8_t * dest, uint8_t * ref, + int stride, int height) +{ + if (height == 16) + mlib_VideoInterpAveXY_U8_U8_16x16 (dest, ref, stride, stride); + else + mlib_VideoInterpAveXY_U8_U8_16x8 (dest, ref, stride, stride); +} + +static void MC_avg_o_8_mlib (uint8_t * dest, uint8_t * ref, + int stride, int height) +{ + if (height == 8) + mlib_VideoCopyRefAve_U8_U8_8x8 (dest, ref, stride); + else + mlib_VideoCopyRefAve_U8_U8_8x4 (dest, ref, stride); +} + +static void MC_avg_x_8_mlib (uint8_t * dest, uint8_t * ref, + int stride, int height) +{ + if (height == 8) + mlib_VideoInterpAveX_U8_U8_8x8 (dest, ref, stride, stride); + else + mlib_VideoInterpAveX_U8_U8_8x4 (dest, ref, stride, stride); +} + +static void MC_avg_y_8_mlib (uint8_t * dest, uint8_t * ref, + int stride, int height) +{ + if (height == 8) + mlib_VideoInterpAveY_U8_U8_8x8 (dest, ref, stride, stride); + else + mlib_VideoInterpAveY_U8_U8_8x4 (dest, ref, stride, stride); +} + +static void MC_avg_xy_8_mlib (uint8_t * dest, uint8_t * ref, + int stride, int height) +{ + if (height == 8) + mlib_VideoInterpAveXY_U8_U8_8x8 (dest, ref, stride, stride); + else + mlib_VideoInterpAveXY_U8_U8_8x4 (dest, ref, stride, stride); +} + +MPEG2_MC_EXTERN (mlib) + +#endif diff --git a/src/video_dec/libmpeg2/motion_comp_mmx.c b/src/video_dec/libmpeg2/motion_comp_mmx.c new file mode 100644 index 000000000..9c5ab455d --- /dev/null +++ b/src/video_dec/libmpeg2/motion_comp_mmx.c @@ -0,0 +1,1013 @@ +/* + * motion_comp_mmx.c + * Copyright (C) 2000-2002 Michel Lespinasse <walken@zoy.org> + * Copyright (C) 1999-2000 Aaron Holtzman <aholtzma@ess.engr.uvic.ca> + * + * This file is part of mpeg2dec, a free MPEG-2 video stream decoder. + * See http://libmpeg2.sourceforge.net/ for updates. + * + * mpeg2dec is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * mpeg2dec is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +#include "config.h" + +#if defined(ARCH_X86) || defined(ARCH_X86_64) + +#include <inttypes.h> + +#include "mpeg2_internal.h" +#include <xine/xineutils.h> +#include "xine_mmx.h" + +#define CPU_MMXEXT 0 +#define CPU_3DNOW 1 + + +/* MMX code - needs a rewrite */ + +/* some rounding constants */ +static mmx_t round1 = {0x0001000100010001LL}; +static mmx_t round4 = {0x0002000200020002LL}; + +/* + * This code should probably be compiled with loop unrolling + * (ie, -funroll-loops in gcc)becuase some of the loops + * use a small static number of iterations. This was written + * with the assumption the compiler knows best about when + * unrolling will help + */ + +static inline void mmx_zero_reg () +{ + /* load 0 into mm0 */ + pxor_r2r (mm0, mm0); +} + +static inline void mmx_average_2_U8 (uint8_t * dest, + uint8_t * src1, uint8_t * src2) +{ + /* *dest = (*src1 + *src2 + 1)/ 2; */ + + movq_m2r (*src1, mm1); // load 8 src1 bytes + movq_r2r (mm1, mm2); // copy 8 src1 bytes + + movq_m2r (*src2, mm3); // load 8 src2 bytes + movq_r2r (mm3, mm4); // copy 8 src2 bytes + + punpcklbw_r2r (mm0, mm1); // unpack low src1 bytes + punpckhbw_r2r (mm0, mm2); // unpack high src1 bytes + + punpcklbw_r2r (mm0, mm3); // unpack low src2 bytes + punpckhbw_r2r (mm0, mm4); // unpack high src2 bytes + + paddw_r2r (mm3, mm1); // add lows to mm1 + paddw_m2r (round1, mm1); + psraw_i2r (1, mm1); // /2 + + paddw_r2r (mm4, mm2); // add highs to mm2 + paddw_m2r (round1, mm2); + psraw_i2r (1, mm2); // /2 + + packuswb_r2r (mm2, mm1); // pack (w/ saturation) + movq_r2m (mm1, *dest); // store result in dest +} + +static inline void mmx_interp_average_2_U8 (uint8_t * dest, + uint8_t * src1, uint8_t * src2) +{ + /* *dest = (*dest + (*src1 + *src2 + 1)/ 2 + 1)/ 2; */ + + movq_m2r (*dest, mm1); // load 8 dest bytes + movq_r2r (mm1, mm2); // copy 8 dest bytes + + movq_m2r (*src1, mm3); // load 8 src1 bytes + movq_r2r (mm3, mm4); // copy 8 src1 bytes + + movq_m2r (*src2, mm5); // load 8 src2 bytes + movq_r2r (mm5, mm6); // copy 8 src2 bytes + + punpcklbw_r2r (mm0, mm1); // unpack low dest bytes + punpckhbw_r2r (mm0, mm2); // unpack high dest bytes + + punpcklbw_r2r (mm0, mm3); // unpack low src1 bytes + punpckhbw_r2r (mm0, mm4); // unpack high src1 bytes + + punpcklbw_r2r (mm0, mm5); // unpack low src2 bytes + punpckhbw_r2r (mm0, mm6); // unpack high src2 bytes + + paddw_r2r (mm5, mm3); // add lows + paddw_m2r (round1, mm3); + psraw_i2r (1, mm3); // /2 + + paddw_r2r (mm6, mm4); // add highs + paddw_m2r (round1, mm4); + psraw_i2r (1, mm4); // /2 + + paddw_r2r (mm3, mm1); // add lows + paddw_m2r (round1, mm1); + psraw_i2r (1, mm1); // /2 + + paddw_r2r (mm4, mm2); // add highs + paddw_m2r (round1, mm2); + psraw_i2r (1, mm2); // /2 + + packuswb_r2r (mm2, mm1); // pack (w/ saturation) + movq_r2m (mm1, *dest); // store result in dest +} + +static inline void mmx_average_4_U8 (uint8_t * dest, + uint8_t * src1, uint8_t * src2, + uint8_t * src3, uint8_t * src4) +{ + /* *dest = (*src1 + *src2 + *src3 + *src4 + 2)/ 4; */ + + movq_m2r (*src1, mm1); // load 8 src1 bytes + movq_r2r (mm1, mm2); // copy 8 src1 bytes + + punpcklbw_r2r (mm0, mm1); // unpack low src1 bytes + punpckhbw_r2r (mm0, mm2); // unpack high src1 bytes + + movq_m2r (*src2, mm3); // load 8 src2 bytes + movq_r2r (mm3, mm4); // copy 8 src2 bytes + + punpcklbw_r2r (mm0, mm3); // unpack low src2 bytes + punpckhbw_r2r (mm0, mm4); // unpack high src2 bytes + + paddw_r2r (mm3, mm1); // add lows + paddw_r2r (mm4, mm2); // add highs + + /* now have partials in mm1 and mm2 */ + + movq_m2r (*src3, mm3); // load 8 src3 bytes + movq_r2r (mm3, mm4); // copy 8 src3 bytes + + punpcklbw_r2r (mm0, mm3); // unpack low src3 bytes + punpckhbw_r2r (mm0, mm4); // unpack high src3 bytes + + paddw_r2r (mm3, mm1); // add lows + paddw_r2r (mm4, mm2); // add highs + + movq_m2r (*src4, mm5); // load 8 src4 bytes + movq_r2r (mm5, mm6); // copy 8 src4 bytes + + punpcklbw_r2r (mm0, mm5); // unpack low src4 bytes + punpckhbw_r2r (mm0, mm6); // unpack high src4 bytes + + paddw_r2r (mm5, mm1); // add lows + paddw_r2r (mm6, mm2); // add highs + + /* now have subtotal in mm1 and mm2 */ + + paddw_m2r (round4, mm1); + psraw_i2r (2, mm1); // /4 + paddw_m2r (round4, mm2); + psraw_i2r (2, mm2); // /4 + + packuswb_r2r (mm2, mm1); // pack (w/ saturation) + movq_r2m (mm1, *dest); // store result in dest +} + +static inline void mmx_interp_average_4_U8 (uint8_t * dest, + uint8_t * src1, uint8_t * src2, + uint8_t * src3, uint8_t * src4) +{ + /* *dest = (*dest + (*src1 + *src2 + *src3 + *src4 + 2)/ 4 + 1)/ 2; */ + + movq_m2r (*src1, mm1); // load 8 src1 bytes + movq_r2r (mm1, mm2); // copy 8 src1 bytes + + punpcklbw_r2r (mm0, mm1); // unpack low src1 bytes + punpckhbw_r2r (mm0, mm2); // unpack high src1 bytes + + movq_m2r (*src2, mm3); // load 8 src2 bytes + movq_r2r (mm3, mm4); // copy 8 src2 bytes + + punpcklbw_r2r (mm0, mm3); // unpack low src2 bytes + punpckhbw_r2r (mm0, mm4); // unpack high src2 bytes + + paddw_r2r (mm3, mm1); // add lows + paddw_r2r (mm4, mm2); // add highs + + /* now have partials in mm1 and mm2 */ + + movq_m2r (*src3, mm3); // load 8 src3 bytes + movq_r2r (mm3, mm4); // copy 8 src3 bytes + + punpcklbw_r2r (mm0, mm3); // unpack low src3 bytes + punpckhbw_r2r (mm0, mm4); // unpack high src3 bytes + + paddw_r2r (mm3, mm1); // add lows + paddw_r2r (mm4, mm2); // add highs + + movq_m2r (*src4, mm5); // load 8 src4 bytes + movq_r2r (mm5, mm6); // copy 8 src4 bytes + + punpcklbw_r2r (mm0, mm5); // unpack low src4 bytes + punpckhbw_r2r (mm0, mm6); // unpack high src4 bytes + + paddw_r2r (mm5, mm1); // add lows + paddw_r2r (mm6, mm2); // add highs + + paddw_m2r (round4, mm1); + psraw_i2r (2, mm1); // /4 + paddw_m2r (round4, mm2); + psraw_i2r (2, mm2); // /4 + + /* now have subtotal/4 in mm1 and mm2 */ + + movq_m2r (*dest, mm3); // load 8 dest bytes + movq_r2r (mm3, mm4); // copy 8 dest bytes + + punpcklbw_r2r (mm0, mm3); // unpack low dest bytes + punpckhbw_r2r (mm0, mm4); // unpack high dest bytes + + paddw_r2r (mm3, mm1); // add lows + paddw_r2r (mm4, mm2); // add highs + + paddw_m2r (round1, mm1); + psraw_i2r (1, mm1); // /2 + paddw_m2r (round1, mm2); + psraw_i2r (1, mm2); // /2 + + /* now have end value in mm1 and mm2 */ + + packuswb_r2r (mm2, mm1); // pack (w/ saturation) + movq_r2m (mm1,*dest); // store result in dest +} + +/*-----------------------------------------------------------------------*/ + +static inline void MC_avg_mmx (int width, int height, + uint8_t * dest, uint8_t * ref, int stride) +{ + mmx_zero_reg (); + + do { + mmx_average_2_U8 (dest, dest, ref); + + if (width == 16) + mmx_average_2_U8 (dest+8, dest+8, ref+8); + + dest += stride; + ref += stride; + } while (--height); +} + +static void MC_avg_o_16_mmx (uint8_t * dest, uint8_t * ref, + int stride, int height) +{ + MC_avg_mmx (16, height, dest, ref, stride); +} + +static void MC_avg_o_8_mmx (uint8_t * dest, uint8_t * ref, + int stride, int height) +{ + MC_avg_mmx (8, height, dest, ref, stride); +} + +/*-----------------------------------------------------------------------*/ + +static inline void MC_put_mmx (int width, int height, + uint8_t * dest, uint8_t * ref, int stride) +{ + mmx_zero_reg (); + + do { + movq_m2r (* ref, mm1); // load 8 ref bytes + movq_r2m (mm1,* dest); // store 8 bytes at curr + + if (width == 16) + { + movq_m2r (* (ref+8), mm1); // load 8 ref bytes + movq_r2m (mm1,* (dest+8)); // store 8 bytes at curr + } + + dest += stride; + ref += stride; + } while (--height); +} + +static void MC_put_o_16_mmx (uint8_t * dest, uint8_t * ref, + int stride, int height) +{ + MC_put_mmx (16, height, dest, ref, stride); +} + +static void MC_put_o_8_mmx (uint8_t * dest, uint8_t * ref, + int stride, int height) +{ + MC_put_mmx (8, height, dest, ref, stride); +} + +/*-----------------------------------------------------------------------*/ + +/* Half pixel interpolation in the x direction */ +static inline void MC_avg_x_mmx (int width, int height, + uint8_t * dest, uint8_t * ref, int stride) +{ + mmx_zero_reg (); + + do { + mmx_interp_average_2_U8 (dest, ref, ref+1); + + if (width == 16) + mmx_interp_average_2_U8 (dest+8, ref+8, ref+9); + + dest += stride; + ref += stride; + } while (--height); +} + +static void MC_avg_x_16_mmx (uint8_t * dest, uint8_t * ref, + int stride, int height) +{ + MC_avg_x_mmx (16, height, dest, ref, stride); +} + +static void MC_avg_x_8_mmx (uint8_t * dest, uint8_t * ref, + int stride, int height) +{ + MC_avg_x_mmx (8, height, dest, ref, stride); +} + +/*-----------------------------------------------------------------------*/ + +static inline void MC_put_x_mmx (int width, int height, + uint8_t * dest, uint8_t * ref, int stride) +{ + mmx_zero_reg (); + + do { + mmx_average_2_U8 (dest, ref, ref+1); + + if (width == 16) + mmx_average_2_U8 (dest+8, ref+8, ref+9); + + dest += stride; + ref += stride; + } while (--height); +} + +static void MC_put_x_16_mmx (uint8_t * dest, uint8_t * ref, + int stride, int height) +{ + MC_put_x_mmx (16, height, dest, ref, stride); +} + +static void MC_put_x_8_mmx (uint8_t * dest, uint8_t * ref, + int stride, int height) +{ + MC_put_x_mmx (8, height, dest, ref, stride); +} + +/*-----------------------------------------------------------------------*/ + +static inline void MC_avg_xy_mmx (int width, int height, + uint8_t * dest, uint8_t * ref, int stride) +{ + uint8_t * ref_next = ref+stride; + + mmx_zero_reg (); + + do { + mmx_interp_average_4_U8 (dest, ref, ref+1, ref_next, ref_next+1); + + if (width == 16) + mmx_interp_average_4_U8 (dest+8, ref+8, ref+9, + ref_next+8, ref_next+9); + + dest += stride; + ref += stride; + ref_next += stride; + } while (--height); +} + +static void MC_avg_xy_16_mmx (uint8_t * dest, uint8_t * ref, + int stride, int height) +{ + MC_avg_xy_mmx (16, height, dest, ref, stride); +} + +static void MC_avg_xy_8_mmx (uint8_t * dest, uint8_t * ref, + int stride, int height) +{ + MC_avg_xy_mmx (8, height, dest, ref, stride); +} + +/*-----------------------------------------------------------------------*/ + +static inline void MC_put_xy_mmx (int width, int height, + uint8_t * dest, uint8_t * ref, int stride) +{ + uint8_t * ref_next = ref+stride; + + mmx_zero_reg (); + + do { + mmx_average_4_U8 (dest, ref, ref+1, ref_next, ref_next+1); + + if (width == 16) + mmx_average_4_U8 (dest+8, ref+8, ref+9, ref_next+8, ref_next+9); + + dest += stride; + ref += stride; + ref_next += stride; + } while (--height); +} + +static void MC_put_xy_16_mmx (uint8_t * dest, uint8_t * ref, + int stride, int height) +{ + MC_put_xy_mmx (16, height, dest, ref, stride); +} + +static void MC_put_xy_8_mmx (uint8_t * dest, uint8_t * ref, + int stride, int height) +{ + MC_put_xy_mmx (8, height, dest, ref, stride); +} + +/*-----------------------------------------------------------------------*/ + +static inline void MC_avg_y_mmx (int width, int height, + uint8_t * dest, uint8_t * ref, int stride) +{ + uint8_t * ref_next = ref+stride; + + mmx_zero_reg (); + + do { + mmx_interp_average_2_U8 (dest, ref, ref_next); + + if (width == 16) + mmx_interp_average_2_U8 (dest+8, ref+8, ref_next+8); + + dest += stride; + ref += stride; + ref_next += stride; + } while (--height); +} + +static void MC_avg_y_16_mmx (uint8_t * dest, uint8_t * ref, + int stride, int height) +{ + MC_avg_y_mmx (16, height, dest, ref, stride); +} + +static void MC_avg_y_8_mmx (uint8_t * dest, uint8_t * ref, + int stride, int height) +{ + MC_avg_y_mmx (8, height, dest, ref, stride); +} + +/*-----------------------------------------------------------------------*/ + +static inline void MC_put_y_mmx (int width, int height, + uint8_t * dest, uint8_t * ref, int stride) +{ + uint8_t * ref_next = ref+stride; + + mmx_zero_reg (); + + do { + mmx_average_2_U8 (dest, ref, ref_next); + + if (width == 16) + mmx_average_2_U8 (dest+8, ref+8, ref_next+8); + + dest += stride; + ref += stride; + ref_next += stride; + } while (--height); +} + +static void MC_put_y_16_mmx (uint8_t * dest, uint8_t * ref, + int stride, int height) +{ + MC_put_y_mmx (16, height, dest, ref, stride); +} + +static void MC_put_y_8_mmx (uint8_t * dest, uint8_t * ref, + int stride, int height) +{ + MC_put_y_mmx (8, height, dest, ref, stride); +} + + +MPEG2_MC_EXTERN (mmx) + + + + + + + +/* CPU_MMXEXT/CPU_3DNOW adaptation layer */ + +#define pavg_r2r(src,dest) \ +do { \ + if (cpu == CPU_MMXEXT) \ + pavgb_r2r (src, dest); \ + else \ + pavgusb_r2r (src, dest); \ +} while (0) + +#define pavg_m2r(src,dest) \ +do { \ + if (cpu == CPU_MMXEXT) \ + pavgb_m2r (src, dest); \ + else \ + pavgusb_m2r (src, dest); \ +} while (0) + + +/* CPU_MMXEXT code */ + + +static inline void MC_put1_8 (int height, uint8_t * dest, uint8_t * ref, + int stride) +{ + do { + movq_m2r (*ref, mm0); + movq_r2m (mm0, *dest); + ref += stride; + dest += stride; + } while (--height); +} + +static inline void MC_put1_16 (int height, uint8_t * dest, uint8_t * ref, + int stride) +{ + do { + movq_m2r (*ref, mm0); + movq_m2r (*(ref+8), mm1); + ref += stride; + movq_r2m (mm0, *dest); + movq_r2m (mm1, *(dest+8)); + dest += stride; + } while (--height); +} + +static inline void MC_avg1_8 (int height, uint8_t * dest, uint8_t * ref, + int stride, int cpu) +{ + do { + movq_m2r (*ref, mm0); + pavg_m2r (*dest, mm0); + ref += stride; + movq_r2m (mm0, *dest); + dest += stride; + } while (--height); +} + +static inline void MC_avg1_16 (int height, uint8_t * dest, uint8_t * ref, + int stride, int cpu) +{ + do { + movq_m2r (*ref, mm0); + movq_m2r (*(ref+8), mm1); + pavg_m2r (*dest, mm0); + pavg_m2r (*(dest+8), mm1); + movq_r2m (mm0, *dest); + ref += stride; + movq_r2m (mm1, *(dest+8)); + dest += stride; + } while (--height); +} + +static inline void MC_put2_8 (int height, uint8_t * dest, uint8_t * ref, + int stride, int offset, int cpu) +{ + do { + movq_m2r (*ref, mm0); + pavg_m2r (*(ref+offset), mm0); + ref += stride; + movq_r2m (mm0, *dest); + dest += stride; + } while (--height); +} + +static inline void MC_put2_16 (int height, uint8_t * dest, uint8_t * ref, + int stride, int offset, int cpu) +{ + do { + movq_m2r (*ref, mm0); + movq_m2r (*(ref+8), mm1); + pavg_m2r (*(ref+offset), mm0); + pavg_m2r (*(ref+offset+8), mm1); + movq_r2m (mm0, *dest); + ref += stride; + movq_r2m (mm1, *(dest+8)); + dest += stride; + } while (--height); +} + +static inline void MC_avg2_8 (int height, uint8_t * dest, uint8_t * ref, + int stride, int offset, int cpu) +{ + do { + movq_m2r (*ref, mm0); + pavg_m2r (*(ref+offset), mm0); + pavg_m2r (*dest, mm0); + ref += stride; + movq_r2m (mm0, *dest); + dest += stride; + } while (--height); +} + +static inline void MC_avg2_16 (int height, uint8_t * dest, uint8_t * ref, + int stride, int offset, int cpu) +{ + do { + movq_m2r (*ref, mm0); + movq_m2r (*(ref+8), mm1); + pavg_m2r (*(ref+offset), mm0); + pavg_m2r (*(ref+offset+8), mm1); + pavg_m2r (*dest, mm0); + pavg_m2r (*(dest+8), mm1); + ref += stride; + movq_r2m (mm0, *dest); + movq_r2m (mm1, *(dest+8)); + dest += stride; + } while (--height); +} + +static mmx_t mask_one = {0x0101010101010101LL}; + +static inline void MC_put4_8 (int height, uint8_t * dest, uint8_t * ref, + int stride, int cpu) +{ + movq_m2r (*ref, mm0); + movq_m2r (*(ref+1), mm1); + movq_r2r (mm0, mm7); + pxor_r2r (mm1, mm7); + pavg_r2r (mm1, mm0); + ref += stride; + + do { + movq_m2r (*ref, mm2); + movq_r2r (mm0, mm5); + + movq_m2r (*(ref+1), mm3); + movq_r2r (mm2, mm6); + + pxor_r2r (mm3, mm6); + pavg_r2r (mm3, mm2); + + por_r2r (mm6, mm7); + pxor_r2r (mm2, mm5); + + pand_r2r (mm5, mm7); + pavg_r2r (mm2, mm0); + + pand_m2r (mask_one, mm7); + + psubusb_r2r (mm7, mm0); + + ref += stride; + movq_r2m (mm0, *dest); + dest += stride; + + movq_r2r (mm6, mm7); // unroll ! + movq_r2r (mm2, mm0); // unroll ! + } while (--height); +} + +static inline void MC_put4_16 (int height, uint8_t * dest, uint8_t * ref, + int stride, int cpu) +{ + do { + movq_m2r (*ref, mm0); + movq_m2r (*(ref+stride+1), mm1); + movq_r2r (mm0, mm7); + movq_m2r (*(ref+1), mm2); + pxor_r2r (mm1, mm7); + movq_m2r (*(ref+stride), mm3); + movq_r2r (mm2, mm6); + pxor_r2r (mm3, mm6); + pavg_r2r (mm1, mm0); + pavg_r2r (mm3, mm2); + por_r2r (mm6, mm7); + movq_r2r (mm0, mm6); + pxor_r2r (mm2, mm6); + pand_r2r (mm6, mm7); + pand_m2r (mask_one, mm7); + pavg_r2r (mm2, mm0); + psubusb_r2r (mm7, mm0); + movq_r2m (mm0, *dest); + + movq_m2r (*(ref+8), mm0); + movq_m2r (*(ref+stride+9), mm1); + movq_r2r (mm0, mm7); + movq_m2r (*(ref+9), mm2); + pxor_r2r (mm1, mm7); + movq_m2r (*(ref+stride+8), mm3); + movq_r2r (mm2, mm6); + pxor_r2r (mm3, mm6); + pavg_r2r (mm1, mm0); + pavg_r2r (mm3, mm2); + por_r2r (mm6, mm7); + movq_r2r (mm0, mm6); + pxor_r2r (mm2, mm6); + pand_r2r (mm6, mm7); + pand_m2r (mask_one, mm7); + pavg_r2r (mm2, mm0); + psubusb_r2r (mm7, mm0); + ref += stride; + movq_r2m (mm0, *(dest+8)); + dest += stride; + } while (--height); +} + +static inline void MC_avg4_8 (int height, uint8_t * dest, uint8_t * ref, + int stride, int cpu) +{ + do { + movq_m2r (*ref, mm0); + movq_m2r (*(ref+stride+1), mm1); + movq_r2r (mm0, mm7); + movq_m2r (*(ref+1), mm2); + pxor_r2r (mm1, mm7); + movq_m2r (*(ref+stride), mm3); + movq_r2r (mm2, mm6); + pxor_r2r (mm3, mm6); + pavg_r2r (mm1, mm0); + pavg_r2r (mm3, mm2); + por_r2r (mm6, mm7); + movq_r2r (mm0, mm6); + pxor_r2r (mm2, mm6); + pand_r2r (mm6, mm7); + pand_m2r (mask_one, mm7); + pavg_r2r (mm2, mm0); + psubusb_r2r (mm7, mm0); + movq_m2r (*dest, mm1); + pavg_r2r (mm1, mm0); + ref += stride; + movq_r2m (mm0, *dest); + dest += stride; + } while (--height); +} + +static inline void MC_avg4_16 (int height, uint8_t * dest, uint8_t * ref, + int stride, int cpu) +{ + do { + movq_m2r (*ref, mm0); + movq_m2r (*(ref+stride+1), mm1); + movq_r2r (mm0, mm7); + movq_m2r (*(ref+1), mm2); + pxor_r2r (mm1, mm7); + movq_m2r (*(ref+stride), mm3); + movq_r2r (mm2, mm6); + pxor_r2r (mm3, mm6); + pavg_r2r (mm1, mm0); + pavg_r2r (mm3, mm2); + por_r2r (mm6, mm7); + movq_r2r (mm0, mm6); + pxor_r2r (mm2, mm6); + pand_r2r (mm6, mm7); + pand_m2r (mask_one, mm7); + pavg_r2r (mm2, mm0); + psubusb_r2r (mm7, mm0); + movq_m2r (*dest, mm1); + pavg_r2r (mm1, mm0); + movq_r2m (mm0, *dest); + + movq_m2r (*(ref+8), mm0); + movq_m2r (*(ref+stride+9), mm1); + movq_r2r (mm0, mm7); + movq_m2r (*(ref+9), mm2); + pxor_r2r (mm1, mm7); + movq_m2r (*(ref+stride+8), mm3); + movq_r2r (mm2, mm6); + pxor_r2r (mm3, mm6); + pavg_r2r (mm1, mm0); + pavg_r2r (mm3, mm2); + por_r2r (mm6, mm7); + movq_r2r (mm0, mm6); + pxor_r2r (mm2, mm6); + pand_r2r (mm6, mm7); + pand_m2r (mask_one, mm7); + pavg_r2r (mm2, mm0); + psubusb_r2r (mm7, mm0); + movq_m2r (*(dest+8), mm1); + pavg_r2r (mm1, mm0); + ref += stride; + movq_r2m (mm0, *(dest+8)); + dest += stride; + } while (--height); +} + +static void MC_avg_o_16_mmxext (uint8_t * dest, uint8_t * ref, + int stride, int height) +{ + MC_avg1_16 (height, dest, ref, stride, CPU_MMXEXT); +} + +static void MC_avg_o_8_mmxext (uint8_t * dest, uint8_t * ref, + int stride, int height) +{ + MC_avg1_8 (height, dest, ref, stride, CPU_MMXEXT); +} + +static void MC_put_o_16_mmxext (uint8_t * dest, uint8_t * ref, + int stride, int height) +{ + MC_put1_16 (height, dest, ref, stride); +} + +static void MC_put_o_8_mmxext (uint8_t * dest, uint8_t * ref, + int stride, int height) +{ + MC_put1_8 (height, dest, ref, stride); +} + +static void MC_avg_x_16_mmxext (uint8_t * dest, uint8_t * ref, + int stride, int height) +{ + MC_avg2_16 (height, dest, ref, stride, 1, CPU_MMXEXT); +} + +static void MC_avg_x_8_mmxext (uint8_t * dest, uint8_t * ref, + int stride, int height) +{ + MC_avg2_8 (height, dest, ref, stride, 1, CPU_MMXEXT); +} + +static void MC_put_x_16_mmxext (uint8_t * dest, uint8_t * ref, + int stride, int height) +{ + MC_put2_16 (height, dest, ref, stride, 1, CPU_MMXEXT); +} + +static void MC_put_x_8_mmxext (uint8_t * dest, uint8_t * ref, + int stride, int height) +{ + MC_put2_8 (height, dest, ref, stride, 1, CPU_MMXEXT); +} + +static void MC_avg_y_16_mmxext (uint8_t * dest, uint8_t * ref, + int stride, int height) +{ + MC_avg2_16 (height, dest, ref, stride, stride, CPU_MMXEXT); +} + +static void MC_avg_y_8_mmxext (uint8_t * dest, uint8_t * ref, + int stride, int height) +{ + MC_avg2_8 (height, dest, ref, stride, stride, CPU_MMXEXT); +} + +static void MC_put_y_16_mmxext (uint8_t * dest, uint8_t * ref, + int stride, int height) +{ + MC_put2_16 (height, dest, ref, stride, stride, CPU_MMXEXT); +} + +static void MC_put_y_8_mmxext (uint8_t * dest, uint8_t * ref, + int stride, int height) +{ + MC_put2_8 (height, dest, ref, stride, stride, CPU_MMXEXT); +} + +static void MC_avg_xy_16_mmxext (uint8_t * dest, uint8_t * ref, + int stride, int height) +{ + MC_avg4_16 (height, dest, ref, stride, CPU_MMXEXT); +} + +static void MC_avg_xy_8_mmxext (uint8_t * dest, uint8_t * ref, + int stride, int height) +{ + MC_avg4_8 (height, dest, ref, stride, CPU_MMXEXT); +} + +static void MC_put_xy_16_mmxext (uint8_t * dest, uint8_t * ref, + int stride, int height) +{ + MC_put4_16 (height, dest, ref, stride, CPU_MMXEXT); +} + +static void MC_put_xy_8_mmxext (uint8_t * dest, uint8_t * ref, + int stride, int height) +{ + MC_put4_8 (height, dest, ref, stride, CPU_MMXEXT); +} + + +MPEG2_MC_EXTERN (mmxext) + + + +static void MC_avg_o_16_3dnow (uint8_t * dest, uint8_t * ref, + int stride, int height) +{ + MC_avg1_16 (height, dest, ref, stride, CPU_3DNOW); +} + +static void MC_avg_o_8_3dnow (uint8_t * dest, uint8_t * ref, + int stride, int height) +{ + MC_avg1_8 (height, dest, ref, stride, CPU_3DNOW); +} + +static void MC_put_o_16_3dnow (uint8_t * dest, uint8_t * ref, + int stride, int height) +{ + MC_put1_16 (height, dest, ref, stride); +} + +static void MC_put_o_8_3dnow (uint8_t * dest, uint8_t * ref, + int stride, int height) +{ + MC_put1_8 (height, dest, ref, stride); +} + +static void MC_avg_x_16_3dnow (uint8_t * dest, uint8_t * ref, + int stride, int height) +{ + MC_avg2_16 (height, dest, ref, stride, 1, CPU_3DNOW); +} + +static void MC_avg_x_8_3dnow (uint8_t * dest, uint8_t * ref, + int stride, int height) +{ + MC_avg2_8 (height, dest, ref, stride, 1, CPU_3DNOW); +} + +static void MC_put_x_16_3dnow (uint8_t * dest, uint8_t * ref, + int stride, int height) +{ + MC_put2_16 (height, dest, ref, stride, 1, CPU_3DNOW); +} + +static void MC_put_x_8_3dnow (uint8_t * dest, uint8_t * ref, + int stride, int height) +{ + MC_put2_8 (height, dest, ref, stride, 1, CPU_3DNOW); +} + +static void MC_avg_y_16_3dnow (uint8_t * dest, uint8_t * ref, + int stride, int height) +{ + MC_avg2_16 (height, dest, ref, stride, stride, CPU_3DNOW); +} + +static void MC_avg_y_8_3dnow (uint8_t * dest, uint8_t * ref, + int stride, int height) +{ + MC_avg2_8 (height, dest, ref, stride, stride, CPU_3DNOW); +} + +static void MC_put_y_16_3dnow (uint8_t * dest, uint8_t * ref, + int stride, int height) +{ + MC_put2_16 (height, dest, ref, stride, stride, CPU_3DNOW); +} + +static void MC_put_y_8_3dnow (uint8_t * dest, uint8_t * ref, + int stride, int height) +{ + MC_put2_8 (height, dest, ref, stride, stride, CPU_3DNOW); +} + +static void MC_avg_xy_16_3dnow (uint8_t * dest, uint8_t * ref, + int stride, int height) +{ + MC_avg4_16 (height, dest, ref, stride, CPU_3DNOW); +} + +static void MC_avg_xy_8_3dnow (uint8_t * dest, uint8_t * ref, + int stride, int height) +{ + MC_avg4_8 (height, dest, ref, stride, CPU_3DNOW); +} + +static void MC_put_xy_16_3dnow (uint8_t * dest, uint8_t * ref, + int stride, int height) +{ + MC_put4_16 (height, dest, ref, stride, CPU_3DNOW); +} + +static void MC_put_xy_8_3dnow (uint8_t * dest, uint8_t * ref, + int stride, int height) +{ + MC_put4_8 (height, dest, ref, stride, CPU_3DNOW); +} + + +MPEG2_MC_EXTERN (3dnow) + +#endif diff --git a/src/video_dec/libmpeg2/motion_comp_vis.c b/src/video_dec/libmpeg2/motion_comp_vis.c new file mode 100644 index 000000000..d0a6673d6 --- /dev/null +++ b/src/video_dec/libmpeg2/motion_comp_vis.c @@ -0,0 +1,2059 @@ +/* + * motion_comp_vis.c + * Copyright (C) 2003 David S. Miller <davem@redhat.com> + * + * This file is part of mpeg2dec, a free MPEG-2 video stream decoder. + * See http://libmpeg2.sourceforge.net/ for updates. + * + * mpeg2dec is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * mpeg2dec is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +#include "config.h" + +#if defined(ARCH_SPARC) && defined(ENABLE_VIS) + +#include <inttypes.h> + +#include "mpeg2_internal.h" +#include "vis.h" + +/* The trick used in some of this file is the formula from the MMX + * motion comp code, which is: + * + * (x+y+1)>>1 == (x|y)-((x^y)>>1) + * + * This allows us to average 8 bytes at a time in a 64-bit FPU reg. + * We avoid overflows by masking before we do the shift, and we + * implement the shift by multiplying by 1/2 using mul8x16. So in + * VIS this is (assume 'x' is in f0, 'y' is in f2, a repeating mask + * of '0xfe' is in f4, a repeating mask of '0x7f' is in f6, and + * the value 0x80808080 is in f8): + * + * fxor f0, f2, f10 + * fand f10, f4, f10 + * fmul8x16 f8, f10, f10 + * fand f10, f6, f10 + * for f0, f2, f12 + * fpsub16 f12, f10, f10 + */ + +#define DUP4(x) {x, x, x, x} +#define DUP8(x) {x, x, x, x, x, x, x, x} +static const int16_t constants1[] ATTR_ALIGN(8) = DUP4 (1); +static const int16_t constants2[] ATTR_ALIGN(8) = DUP4 (2); +static const int16_t constants3[] ATTR_ALIGN(8) = DUP4 (3); +static const int16_t constants6[] ATTR_ALIGN(8) = DUP4 (6); +static const int8_t constants_fe[] ATTR_ALIGN(8) = DUP8 (0xfe); +static const int8_t constants_7f[] ATTR_ALIGN(8) = DUP8 (0x7f); +static const int8_t constants128[] ATTR_ALIGN(8) = DUP8 (128); +static const int16_t constants256_512[] ATTR_ALIGN(8) = + {256, 512, 256, 512}; +static const int16_t constants256_1024[] ATTR_ALIGN(8) = + {256, 1024, 256, 1024}; + +#define REF_0 0 +#define REF_0_1 1 +#define REF_2 2 +#define REF_2_1 3 +#define REF_4 4 +#define REF_4_1 5 +#define REF_6 6 +#define REF_6_1 7 +#define REF_S0 8 +#define REF_S0_1 9 +#define REF_S2 10 +#define REF_S2_1 11 +#define REF_S4 12 +#define REF_S4_1 13 +#define REF_S6 14 +#define REF_S6_1 15 +#define DST_0 16 +#define DST_1 17 +#define DST_2 18 +#define DST_3 19 +#define CONST_1 20 +#define CONST_2 20 +#define CONST_3 20 +#define CONST_6 20 +#define MASK_fe 20 +#define CONST_128 22 +#define CONST_256 22 +#define CONST_512 22 +#define CONST_1024 22 +#define TMP0 24 +#define TMP1 25 +#define TMP2 26 +#define TMP3 27 +#define TMP4 28 +#define TMP5 29 +#define ZERO 30 +#define MASK_7f 30 + +#define TMP6 32 +#define TMP8 34 +#define TMP10 36 +#define TMP12 38 +#define TMP14 40 +#define TMP16 42 +#define TMP18 44 +#define TMP20 46 +#define TMP22 48 +#define TMP24 50 +#define TMP26 52 +#define TMP28 54 +#define TMP30 56 +#define TMP32 58 + +static void MC_put_o_16_vis (uint8_t * dest, uint8_t * _ref, + int stride, int height) +{ + uint8_t *ref = (uint8_t *) _ref; + int offset; + + ref = vis_alignaddr(ref); + offset = (ref != _ref) ? 16 : 0; + do { /* 5 cycles */ + vis_ld64(ref[0], TMP0); + + vis_ld64_2(ref, 8, TMP2); + + vis_ld64_2(ref, offset, TMP4); + ref += stride; + + vis_faligndata(TMP0, TMP2, REF_0); + vis_st64(REF_0, dest[0]); + + vis_faligndata(TMP2, TMP4, REF_2); + vis_st64_2(REF_2, dest, 8); + dest += stride; + } while (--height); +} + +static void MC_put_o_8_vis (uint8_t * dest, uint8_t * _ref, + int stride, int height) +{ + uint8_t *ref = (uint8_t *) _ref; + int offset; + + ref = vis_alignaddr(ref); + offset = (ref != _ref) ? 8 : 0; + do { /* 4 cycles */ + vis_ld64(ref[0], TMP0); + + vis_ld64_2(ref, offset, TMP2); + ref += stride; + + /* stall */ + + vis_faligndata(TMP0, TMP2, REF_0); + vis_st64(REF_0, dest[0]); + dest += stride; + } while (--height); +} + + +static void MC_avg_o_16_vis (uint8_t * dest, uint8_t * _ref, + int stride, int height) +{ + uint8_t *ref = (uint8_t *) _ref; + int stride_8 = stride + 8; + int offset; + + ref = vis_alignaddr(ref); + offset = (ref != _ref) ? 16 : 0; + + vis_ld64(ref[0], TMP0); + + vis_ld64(ref[8], TMP2); + + vis_ld64_2(ref, offset, TMP4); + + vis_ld64(dest[0], DST_0); + + vis_ld64(dest[8], DST_2); + + vis_ld64(constants_fe[0], MASK_fe); + vis_faligndata(TMP0, TMP2, REF_0); + + vis_ld64(constants_7f[0], MASK_7f); + vis_faligndata(TMP2, TMP4, REF_2); + + vis_ld64(constants128[0], CONST_128); + + ref += stride; + height = (height >> 1) - 1; + + do { /* 24 cycles */ + vis_ld64(ref[0], TMP0); + vis_xor(DST_0, REF_0, TMP6); + + vis_ld64_2(ref, 8, TMP2); + vis_and(TMP6, MASK_fe, TMP6); + + vis_ld64_2(ref, offset, TMP4); + ref += stride; + vis_mul8x16(CONST_128, TMP6, TMP6); + vis_xor(DST_2, REF_2, TMP8); + + vis_and(TMP8, MASK_fe, TMP8); + + vis_or(DST_0, REF_0, TMP10); + vis_ld64_2(dest, stride, DST_0); + vis_mul8x16(CONST_128, TMP8, TMP8); + + vis_or(DST_2, REF_2, TMP12); + vis_ld64_2(dest, stride_8, DST_2); + + vis_ld64(ref[0], TMP14); + vis_and(TMP6, MASK_7f, TMP6); + + vis_and(TMP8, MASK_7f, TMP8); + + vis_psub16(TMP10, TMP6, TMP6); + vis_st64(TMP6, dest[0]); + + vis_psub16(TMP12, TMP8, TMP8); + vis_st64_2(TMP8, dest, 8); + + dest += stride; + vis_ld64_2(ref, 8, TMP16); + vis_faligndata(TMP0, TMP2, REF_0); + + vis_ld64_2(ref, offset, TMP18); + vis_faligndata(TMP2, TMP4, REF_2); + ref += stride; + + vis_xor(DST_0, REF_0, TMP20); + + vis_and(TMP20, MASK_fe, TMP20); + + vis_xor(DST_2, REF_2, TMP22); + vis_mul8x16(CONST_128, TMP20, TMP20); + + vis_and(TMP22, MASK_fe, TMP22); + + vis_or(DST_0, REF_0, TMP24); + vis_mul8x16(CONST_128, TMP22, TMP22); + + vis_or(DST_2, REF_2, TMP26); + + vis_ld64_2(dest, stride, DST_0); + vis_faligndata(TMP14, TMP16, REF_0); + + vis_ld64_2(dest, stride_8, DST_2); + vis_faligndata(TMP16, TMP18, REF_2); + + vis_and(TMP20, MASK_7f, TMP20); + + vis_and(TMP22, MASK_7f, TMP22); + + vis_psub16(TMP24, TMP20, TMP20); + vis_st64(TMP20, dest[0]); + + vis_psub16(TMP26, TMP22, TMP22); + vis_st64_2(TMP22, dest, 8); + dest += stride; + } while (--height); + + vis_ld64(ref[0], TMP0); + vis_xor(DST_0, REF_0, TMP6); + + vis_ld64_2(ref, 8, TMP2); + vis_and(TMP6, MASK_fe, TMP6); + + vis_ld64_2(ref, offset, TMP4); + vis_mul8x16(CONST_128, TMP6, TMP6); + vis_xor(DST_2, REF_2, TMP8); + + vis_and(TMP8, MASK_fe, TMP8); + + vis_or(DST_0, REF_0, TMP10); + vis_ld64_2(dest, stride, DST_0); + vis_mul8x16(CONST_128, TMP8, TMP8); + + vis_or(DST_2, REF_2, TMP12); + vis_ld64_2(dest, stride_8, DST_2); + + vis_ld64(ref[0], TMP14); + vis_and(TMP6, MASK_7f, TMP6); + + vis_and(TMP8, MASK_7f, TMP8); + + vis_psub16(TMP10, TMP6, TMP6); + vis_st64(TMP6, dest[0]); + + vis_psub16(TMP12, TMP8, TMP8); + vis_st64_2(TMP8, dest, 8); + + dest += stride; + vis_faligndata(TMP0, TMP2, REF_0); + + vis_faligndata(TMP2, TMP4, REF_2); + + vis_xor(DST_0, REF_0, TMP20); + + vis_and(TMP20, MASK_fe, TMP20); + + vis_xor(DST_2, REF_2, TMP22); + vis_mul8x16(CONST_128, TMP20, TMP20); + + vis_and(TMP22, MASK_fe, TMP22); + + vis_or(DST_0, REF_0, TMP24); + vis_mul8x16(CONST_128, TMP22, TMP22); + + vis_or(DST_2, REF_2, TMP26); + + vis_and(TMP20, MASK_7f, TMP20); + + vis_and(TMP22, MASK_7f, TMP22); + + vis_psub16(TMP24, TMP20, TMP20); + vis_st64(TMP20, dest[0]); + + vis_psub16(TMP26, TMP22, TMP22); + vis_st64_2(TMP22, dest, 8); +} + +static void MC_avg_o_8_vis (uint8_t * dest, uint8_t * _ref, + int stride, int height) +{ + uint8_t *ref = (uint8_t *) _ref; + int offset; + + ref = vis_alignaddr(ref); + offset = (ref != _ref) ? 8 : 0; + + vis_ld64(ref[0], TMP0); + + vis_ld64_2(ref, offset, TMP2); + + vis_ld64(dest[0], DST_0); + + vis_ld64(constants_fe[0], MASK_fe); + + vis_ld64(constants_7f[0], MASK_7f); + vis_faligndata(TMP0, TMP2, REF_0); + + vis_ld64(constants128[0], CONST_128); + + ref += stride; + height = (height >> 1) - 1; + + do { /* 12 cycles */ + vis_ld64(ref[0], TMP0); + vis_xor(DST_0, REF_0, TMP4); + + vis_ld64_2(ref, offset, TMP2); + vis_and(TMP4, MASK_fe, TMP4); + + vis_or(DST_0, REF_0, TMP6); + vis_ld64_2(dest, stride, DST_0); + ref += stride; + vis_mul8x16(CONST_128, TMP4, TMP4); + + vis_ld64(ref[0], TMP12); + vis_faligndata(TMP0, TMP2, REF_0); + + vis_ld64_2(ref, offset, TMP2); + vis_xor(DST_0, REF_0, TMP0); + ref += stride; + + vis_and(TMP0, MASK_fe, TMP0); + + vis_and(TMP4, MASK_7f, TMP4); + + vis_psub16(TMP6, TMP4, TMP4); + vis_st64(TMP4, dest[0]); + dest += stride; + vis_mul8x16(CONST_128, TMP0, TMP0); + + vis_or(DST_0, REF_0, TMP6); + vis_ld64_2(dest, stride, DST_0); + + vis_faligndata(TMP12, TMP2, REF_0); + + vis_and(TMP0, MASK_7f, TMP0); + + vis_psub16(TMP6, TMP0, TMP4); + vis_st64(TMP4, dest[0]); + dest += stride; + } while (--height); + + vis_ld64(ref[0], TMP0); + vis_xor(DST_0, REF_0, TMP4); + + vis_ld64_2(ref, offset, TMP2); + vis_and(TMP4, MASK_fe, TMP4); + + vis_or(DST_0, REF_0, TMP6); + vis_ld64_2(dest, stride, DST_0); + vis_mul8x16(CONST_128, TMP4, TMP4); + + vis_faligndata(TMP0, TMP2, REF_0); + + vis_xor(DST_0, REF_0, TMP0); + + vis_and(TMP0, MASK_fe, TMP0); + + vis_and(TMP4, MASK_7f, TMP4); + + vis_psub16(TMP6, TMP4, TMP4); + vis_st64(TMP4, dest[0]); + dest += stride; + vis_mul8x16(CONST_128, TMP0, TMP0); + + vis_or(DST_0, REF_0, TMP6); + + vis_and(TMP0, MASK_7f, TMP0); + + vis_psub16(TMP6, TMP0, TMP4); + vis_st64(TMP4, dest[0]); +} + +static void MC_put_x_16_vis (uint8_t * dest, uint8_t * _ref, + int stride, int height) +{ + uint8_t *ref = (uint8_t *) _ref; + unsigned long off = (unsigned long) ref & 0x7; + unsigned long off_plus_1 = off + 1; + + ref = vis_alignaddr(ref); + + vis_ld64(ref[0], TMP0); + + vis_ld64_2(ref, 8, TMP2); + + vis_ld64_2(ref, 16, TMP4); + + vis_ld64(constants_fe[0], MASK_fe); + + vis_ld64(constants_7f[0], MASK_7f); + vis_faligndata(TMP0, TMP2, REF_0); + + vis_ld64(constants128[0], CONST_128); + vis_faligndata(TMP2, TMP4, REF_4); + + if (off != 0x7) { + vis_alignaddr_g0((void *)off_plus_1); + vis_faligndata(TMP0, TMP2, REF_2); + vis_faligndata(TMP2, TMP4, REF_6); + } else { + vis_src1(TMP2, REF_2); + vis_src1(TMP4, REF_6); + } + + ref += stride; + height = (height >> 1) - 1; + + do { /* 34 cycles */ + vis_ld64(ref[0], TMP0); + vis_xor(REF_0, REF_2, TMP6); + + vis_ld64_2(ref, 8, TMP2); + vis_xor(REF_4, REF_6, TMP8); + + vis_ld64_2(ref, 16, TMP4); + vis_and(TMP6, MASK_fe, TMP6); + ref += stride; + + vis_ld64(ref[0], TMP14); + vis_mul8x16(CONST_128, TMP6, TMP6); + vis_and(TMP8, MASK_fe, TMP8); + + vis_ld64_2(ref, 8, TMP16); + vis_mul8x16(CONST_128, TMP8, TMP8); + vis_or(REF_0, REF_2, TMP10); + + vis_ld64_2(ref, 16, TMP18); + ref += stride; + vis_or(REF_4, REF_6, TMP12); + + vis_alignaddr_g0((void *)off); + + vis_faligndata(TMP0, TMP2, REF_0); + + vis_faligndata(TMP2, TMP4, REF_4); + + if (off != 0x7) { + vis_alignaddr_g0((void *)off_plus_1); + vis_faligndata(TMP0, TMP2, REF_2); + vis_faligndata(TMP2, TMP4, REF_6); + } else { + vis_src1(TMP2, REF_2); + vis_src1(TMP4, REF_6); + } + + vis_and(TMP6, MASK_7f, TMP6); + + vis_and(TMP8, MASK_7f, TMP8); + + vis_psub16(TMP10, TMP6, TMP6); + vis_st64(TMP6, dest[0]); + + vis_psub16(TMP12, TMP8, TMP8); + vis_st64_2(TMP8, dest, 8); + dest += stride; + + vis_xor(REF_0, REF_2, TMP6); + + vis_xor(REF_4, REF_6, TMP8); + + vis_and(TMP6, MASK_fe, TMP6); + + vis_mul8x16(CONST_128, TMP6, TMP6); + vis_and(TMP8, MASK_fe, TMP8); + + vis_mul8x16(CONST_128, TMP8, TMP8); + vis_or(REF_0, REF_2, TMP10); + + vis_or(REF_4, REF_6, TMP12); + + vis_alignaddr_g0((void *)off); + + vis_faligndata(TMP14, TMP16, REF_0); + + vis_faligndata(TMP16, TMP18, REF_4); + + if (off != 0x7) { + vis_alignaddr_g0((void *)off_plus_1); + vis_faligndata(TMP14, TMP16, REF_2); + vis_faligndata(TMP16, TMP18, REF_6); + } else { + vis_src1(TMP16, REF_2); + vis_src1(TMP18, REF_6); + } + + vis_and(TMP6, MASK_7f, TMP6); + + vis_and(TMP8, MASK_7f, TMP8); + + vis_psub16(TMP10, TMP6, TMP6); + vis_st64(TMP6, dest[0]); + + vis_psub16(TMP12, TMP8, TMP8); + vis_st64_2(TMP8, dest, 8); + dest += stride; + } while (--height); + + vis_ld64(ref[0], TMP0); + vis_xor(REF_0, REF_2, TMP6); + + vis_ld64_2(ref, 8, TMP2); + vis_xor(REF_4, REF_6, TMP8); + + vis_ld64_2(ref, 16, TMP4); + vis_and(TMP6, MASK_fe, TMP6); + + vis_mul8x16(CONST_128, TMP6, TMP6); + vis_and(TMP8, MASK_fe, TMP8); + + vis_mul8x16(CONST_128, TMP8, TMP8); + vis_or(REF_0, REF_2, TMP10); + + vis_or(REF_4, REF_6, TMP12); + + vis_alignaddr_g0((void *)off); + + vis_faligndata(TMP0, TMP2, REF_0); + + vis_faligndata(TMP2, TMP4, REF_4); + + if (off != 0x7) { + vis_alignaddr_g0((void *)off_plus_1); + vis_faligndata(TMP0, TMP2, REF_2); + vis_faligndata(TMP2, TMP4, REF_6); + } else { + vis_src1(TMP2, REF_2); + vis_src1(TMP4, REF_6); + } + + vis_and(TMP6, MASK_7f, TMP6); + + vis_and(TMP8, MASK_7f, TMP8); + + vis_psub16(TMP10, TMP6, TMP6); + vis_st64(TMP6, dest[0]); + + vis_psub16(TMP12, TMP8, TMP8); + vis_st64_2(TMP8, dest, 8); + dest += stride; + + vis_xor(REF_0, REF_2, TMP6); + + vis_xor(REF_4, REF_6, TMP8); + + vis_and(TMP6, MASK_fe, TMP6); + + vis_mul8x16(CONST_128, TMP6, TMP6); + vis_and(TMP8, MASK_fe, TMP8); + + vis_mul8x16(CONST_128, TMP8, TMP8); + vis_or(REF_0, REF_2, TMP10); + + vis_or(REF_4, REF_6, TMP12); + + vis_and(TMP6, MASK_7f, TMP6); + + vis_and(TMP8, MASK_7f, TMP8); + + vis_psub16(TMP10, TMP6, TMP6); + vis_st64(TMP6, dest[0]); + + vis_psub16(TMP12, TMP8, TMP8); + vis_st64_2(TMP8, dest, 8); +} + +static void MC_put_x_8_vis (uint8_t * dest, uint8_t * _ref, + int stride, int height) +{ + uint8_t *ref = (uint8_t *) _ref; + unsigned long off = (unsigned long) ref & 0x7; + unsigned long off_plus_1 = off + 1; + + ref = vis_alignaddr(ref); + + vis_ld64(ref[0], TMP0); + + vis_ld64(ref[8], TMP2); + + vis_ld64(constants_fe[0], MASK_fe); + + vis_ld64(constants_7f[0], MASK_7f); + + vis_ld64(constants128[0], CONST_128); + vis_faligndata(TMP0, TMP2, REF_0); + + if (off != 0x7) { + vis_alignaddr_g0((void *)off_plus_1); + vis_faligndata(TMP0, TMP2, REF_2); + } else { + vis_src1(TMP2, REF_2); + } + + ref += stride; + height = (height >> 1) - 1; + + do { /* 20 cycles */ + vis_ld64(ref[0], TMP0); + vis_xor(REF_0, REF_2, TMP4); + + vis_ld64_2(ref, 8, TMP2); + vis_and(TMP4, MASK_fe, TMP4); + ref += stride; + + vis_ld64(ref[0], TMP8); + vis_or(REF_0, REF_2, TMP6); + vis_mul8x16(CONST_128, TMP4, TMP4); + + vis_alignaddr_g0((void *)off); + + vis_ld64_2(ref, 8, TMP10); + ref += stride; + vis_faligndata(TMP0, TMP2, REF_0); + + if (off != 0x7) { + vis_alignaddr_g0((void *)off_plus_1); + vis_faligndata(TMP0, TMP2, REF_2); + } else { + vis_src1(TMP2, REF_2); + } + + vis_and(TMP4, MASK_7f, TMP4); + + vis_psub16(TMP6, TMP4, DST_0); + vis_st64(DST_0, dest[0]); + dest += stride; + + vis_xor(REF_0, REF_2, TMP12); + + vis_and(TMP12, MASK_fe, TMP12); + + vis_or(REF_0, REF_2, TMP14); + vis_mul8x16(CONST_128, TMP12, TMP12); + + vis_alignaddr_g0((void *)off); + vis_faligndata(TMP8, TMP10, REF_0); + if (off != 0x7) { + vis_alignaddr_g0((void *)off_plus_1); + vis_faligndata(TMP8, TMP10, REF_2); + } else { + vis_src1(TMP10, REF_2); + } + + vis_and(TMP12, MASK_7f, TMP12); + + vis_psub16(TMP14, TMP12, DST_0); + vis_st64(DST_0, dest[0]); + dest += stride; + } while (--height); + + vis_ld64(ref[0], TMP0); + vis_xor(REF_0, REF_2, TMP4); + + vis_ld64_2(ref, 8, TMP2); + vis_and(TMP4, MASK_fe, TMP4); + + vis_or(REF_0, REF_2, TMP6); + vis_mul8x16(CONST_128, TMP4, TMP4); + + vis_alignaddr_g0((void *)off); + + vis_faligndata(TMP0, TMP2, REF_0); + + if (off != 0x7) { + vis_alignaddr_g0((void *)off_plus_1); + vis_faligndata(TMP0, TMP2, REF_2); + } else { + vis_src1(TMP2, REF_2); + } + + vis_and(TMP4, MASK_7f, TMP4); + + vis_psub16(TMP6, TMP4, DST_0); + vis_st64(DST_0, dest[0]); + dest += stride; + + vis_xor(REF_0, REF_2, TMP12); + + vis_and(TMP12, MASK_fe, TMP12); + + vis_or(REF_0, REF_2, TMP14); + vis_mul8x16(CONST_128, TMP12, TMP12); + + vis_and(TMP12, MASK_7f, TMP12); + + vis_psub16(TMP14, TMP12, DST_0); + vis_st64(DST_0, dest[0]); + dest += stride; +} + +static void MC_avg_x_16_vis (uint8_t * dest, uint8_t * _ref, + int stride, int height) +{ + uint8_t *ref = (uint8_t *) _ref; + unsigned long off = (unsigned long) ref & 0x7; + unsigned long off_plus_1 = off + 1; + + vis_set_gsr(5 << VIS_GSR_SCALEFACT_SHIFT); + + vis_ld64(constants3[0], CONST_3); + vis_fzero(ZERO); + vis_ld64(constants256_512[0], CONST_256); + + ref = vis_alignaddr(ref); + do { /* 26 cycles */ + vis_ld64(ref[0], TMP0); + + vis_ld64(ref[8], TMP2); + + vis_alignaddr_g0((void *)off); + + vis_ld64(ref[16], TMP4); + + vis_ld64(dest[0], DST_0); + vis_faligndata(TMP0, TMP2, REF_0); + + vis_ld64(dest[8], DST_2); + vis_faligndata(TMP2, TMP4, REF_4); + + if (off != 0x7) { + vis_alignaddr_g0((void *)off_plus_1); + vis_faligndata(TMP0, TMP2, REF_2); + vis_faligndata(TMP2, TMP4, REF_6); + } else { + vis_src1(TMP2, REF_2); + vis_src1(TMP4, REF_6); + } + + vis_mul8x16au(REF_0, CONST_256, TMP0); + + vis_pmerge(ZERO, REF_2, TMP4); + vis_mul8x16au(REF_0_1, CONST_256, TMP2); + + vis_pmerge(ZERO, REF_2_1, TMP6); + + vis_padd16(TMP0, TMP4, TMP0); + + vis_mul8x16al(DST_0, CONST_512, TMP4); + vis_padd16(TMP2, TMP6, TMP2); + + vis_mul8x16al(DST_1, CONST_512, TMP6); + + vis_mul8x16au(REF_6, CONST_256, TMP12); + + vis_padd16(TMP0, TMP4, TMP0); + vis_mul8x16au(REF_6_1, CONST_256, TMP14); + + vis_padd16(TMP2, TMP6, TMP2); + vis_mul8x16au(REF_4, CONST_256, TMP16); + + vis_padd16(TMP0, CONST_3, TMP8); + vis_mul8x16au(REF_4_1, CONST_256, TMP18); + + vis_padd16(TMP2, CONST_3, TMP10); + vis_pack16(TMP8, DST_0); + + vis_pack16(TMP10, DST_1); + vis_padd16(TMP16, TMP12, TMP0); + + vis_st64(DST_0, dest[0]); + vis_mul8x16al(DST_2, CONST_512, TMP4); + vis_padd16(TMP18, TMP14, TMP2); + + vis_mul8x16al(DST_3, CONST_512, TMP6); + vis_padd16(TMP0, CONST_3, TMP0); + + vis_padd16(TMP2, CONST_3, TMP2); + + vis_padd16(TMP0, TMP4, TMP0); + + vis_padd16(TMP2, TMP6, TMP2); + vis_pack16(TMP0, DST_2); + + vis_pack16(TMP2, DST_3); + vis_st64(DST_2, dest[8]); + + ref += stride; + dest += stride; + } while (--height); +} + +static void MC_avg_x_8_vis (uint8_t * dest, uint8_t * _ref, + int stride, int height) +{ + uint8_t *ref = (uint8_t *) _ref; + unsigned long off = (unsigned long) ref & 0x7; + unsigned long off_plus_1 = off + 1; + int stride_times_2 = stride << 1; + + vis_set_gsr(5 << VIS_GSR_SCALEFACT_SHIFT); + + vis_ld64(constants3[0], CONST_3); + vis_fzero(ZERO); + vis_ld64(constants256_512[0], CONST_256); + + ref = vis_alignaddr(ref); + height >>= 2; + do { /* 47 cycles */ + vis_ld64(ref[0], TMP0); + + vis_ld64_2(ref, 8, TMP2); + ref += stride; + + vis_alignaddr_g0((void *)off); + + vis_ld64(ref[0], TMP4); + vis_faligndata(TMP0, TMP2, REF_0); + + vis_ld64_2(ref, 8, TMP6); + ref += stride; + + vis_ld64(ref[0], TMP8); + + vis_ld64_2(ref, 8, TMP10); + ref += stride; + vis_faligndata(TMP4, TMP6, REF_4); + + vis_ld64(ref[0], TMP12); + + vis_ld64_2(ref, 8, TMP14); + ref += stride; + vis_faligndata(TMP8, TMP10, REF_S0); + + vis_faligndata(TMP12, TMP14, REF_S4); + + if (off != 0x7) { + vis_alignaddr_g0((void *)off_plus_1); + + vis_ld64(dest[0], DST_0); + vis_faligndata(TMP0, TMP2, REF_2); + + vis_ld64_2(dest, stride, DST_2); + vis_faligndata(TMP4, TMP6, REF_6); + + vis_faligndata(TMP8, TMP10, REF_S2); + + vis_faligndata(TMP12, TMP14, REF_S6); + } else { + vis_ld64(dest[0], DST_0); + vis_src1(TMP2, REF_2); + + vis_ld64_2(dest, stride, DST_2); + vis_src1(TMP6, REF_6); + + vis_src1(TMP10, REF_S2); + + vis_src1(TMP14, REF_S6); + } + + vis_pmerge(ZERO, REF_0, TMP0); + vis_mul8x16au(REF_0_1, CONST_256, TMP2); + + vis_pmerge(ZERO, REF_2, TMP4); + vis_mul8x16au(REF_2_1, CONST_256, TMP6); + + vis_padd16(TMP0, CONST_3, TMP0); + vis_mul8x16al(DST_0, CONST_512, TMP16); + + vis_padd16(TMP2, CONST_3, TMP2); + vis_mul8x16al(DST_1, CONST_512, TMP18); + + vis_padd16(TMP0, TMP4, TMP0); + vis_mul8x16au(REF_4, CONST_256, TMP8); + + vis_padd16(TMP2, TMP6, TMP2); + vis_mul8x16au(REF_4_1, CONST_256, TMP10); + + vis_padd16(TMP0, TMP16, TMP0); + vis_mul8x16au(REF_6, CONST_256, TMP12); + + vis_padd16(TMP2, TMP18, TMP2); + vis_mul8x16au(REF_6_1, CONST_256, TMP14); + + vis_padd16(TMP8, CONST_3, TMP8); + vis_mul8x16al(DST_2, CONST_512, TMP16); + + vis_padd16(TMP8, TMP12, TMP8); + vis_mul8x16al(DST_3, CONST_512, TMP18); + + vis_padd16(TMP10, TMP14, TMP10); + vis_pack16(TMP0, DST_0); + + vis_pack16(TMP2, DST_1); + vis_st64(DST_0, dest[0]); + dest += stride; + vis_padd16(TMP10, CONST_3, TMP10); + + vis_ld64_2(dest, stride, DST_0); + vis_padd16(TMP8, TMP16, TMP8); + + vis_ld64_2(dest, stride_times_2, TMP4/*DST_2*/); + vis_padd16(TMP10, TMP18, TMP10); + vis_pack16(TMP8, DST_2); + + vis_pack16(TMP10, DST_3); + vis_st64(DST_2, dest[0]); + dest += stride; + + vis_mul8x16au(REF_S0_1, CONST_256, TMP2); + vis_pmerge(ZERO, REF_S0, TMP0); + + vis_pmerge(ZERO, REF_S2, TMP24); + vis_mul8x16au(REF_S2_1, CONST_256, TMP6); + + vis_padd16(TMP0, CONST_3, TMP0); + vis_mul8x16au(REF_S4, CONST_256, TMP8); + + vis_padd16(TMP2, CONST_3, TMP2); + vis_mul8x16au(REF_S4_1, CONST_256, TMP10); + + vis_padd16(TMP0, TMP24, TMP0); + vis_mul8x16au(REF_S6, CONST_256, TMP12); + + vis_padd16(TMP2, TMP6, TMP2); + vis_mul8x16au(REF_S6_1, CONST_256, TMP14); + + vis_padd16(TMP8, CONST_3, TMP8); + vis_mul8x16al(DST_0, CONST_512, TMP16); + + vis_padd16(TMP10, CONST_3, TMP10); + vis_mul8x16al(DST_1, CONST_512, TMP18); + + vis_padd16(TMP8, TMP12, TMP8); + vis_mul8x16al(TMP4/*DST_2*/, CONST_512, TMP20); + + vis_mul8x16al(TMP5/*DST_3*/, CONST_512, TMP22); + vis_padd16(TMP0, TMP16, TMP0); + + vis_padd16(TMP2, TMP18, TMP2); + vis_pack16(TMP0, DST_0); + + vis_padd16(TMP10, TMP14, TMP10); + vis_pack16(TMP2, DST_1); + vis_st64(DST_0, dest[0]); + dest += stride; + + vis_padd16(TMP8, TMP20, TMP8); + + vis_padd16(TMP10, TMP22, TMP10); + vis_pack16(TMP8, DST_2); + + vis_pack16(TMP10, DST_3); + vis_st64(DST_2, dest[0]); + dest += stride; + } while (--height); +} + +static void MC_put_y_16_vis (uint8_t * dest, uint8_t * _ref, + int stride, int height) +{ + uint8_t *ref = (uint8_t *) _ref; + int offset; + + ref = vis_alignaddr(ref); + offset = (ref != _ref) ? 16 : 0; + + vis_ld64(ref[0], TMP0); + + vis_ld64_2(ref, 8, TMP2); + + vis_ld64_2(ref, offset, TMP4); + ref += stride; + + vis_ld64(ref[0], TMP6); + vis_faligndata(TMP0, TMP2, REF_0); + + vis_ld64_2(ref, 8, TMP8); + vis_faligndata(TMP2, TMP4, REF_4); + + vis_ld64_2(ref, offset, TMP10); + ref += stride; + + vis_ld64(constants_fe[0], MASK_fe); + vis_faligndata(TMP6, TMP8, REF_2); + + vis_ld64(constants_7f[0], MASK_7f); + vis_faligndata(TMP8, TMP10, REF_6); + + vis_ld64(constants128[0], CONST_128); + height = (height >> 1) - 1; + do { /* 24 cycles */ + vis_ld64(ref[0], TMP0); + vis_xor(REF_0, REF_2, TMP12); + + vis_ld64_2(ref, 8, TMP2); + vis_xor(REF_4, REF_6, TMP16); + + vis_ld64_2(ref, offset, TMP4); + ref += stride; + vis_or(REF_0, REF_2, TMP14); + + vis_ld64(ref[0], TMP6); + vis_or(REF_4, REF_6, TMP18); + + vis_ld64_2(ref, 8, TMP8); + vis_faligndata(TMP0, TMP2, REF_0); + + vis_ld64_2(ref, offset, TMP10); + ref += stride; + vis_faligndata(TMP2, TMP4, REF_4); + + vis_and(TMP12, MASK_fe, TMP12); + + vis_and(TMP16, MASK_fe, TMP16); + vis_mul8x16(CONST_128, TMP12, TMP12); + + vis_mul8x16(CONST_128, TMP16, TMP16); + vis_xor(REF_0, REF_2, TMP0); + + vis_xor(REF_4, REF_6, TMP2); + + vis_or(REF_0, REF_2, TMP20); + + vis_and(TMP12, MASK_7f, TMP12); + + vis_and(TMP16, MASK_7f, TMP16); + + vis_psub16(TMP14, TMP12, TMP12); + vis_st64(TMP12, dest[0]); + + vis_psub16(TMP18, TMP16, TMP16); + vis_st64_2(TMP16, dest, 8); + dest += stride; + + vis_or(REF_4, REF_6, TMP18); + + vis_and(TMP0, MASK_fe, TMP0); + + vis_and(TMP2, MASK_fe, TMP2); + vis_mul8x16(CONST_128, TMP0, TMP0); + + vis_faligndata(TMP6, TMP8, REF_2); + vis_mul8x16(CONST_128, TMP2, TMP2); + + vis_faligndata(TMP8, TMP10, REF_6); + + vis_and(TMP0, MASK_7f, TMP0); + + vis_and(TMP2, MASK_7f, TMP2); + + vis_psub16(TMP20, TMP0, TMP0); + vis_st64(TMP0, dest[0]); + + vis_psub16(TMP18, TMP2, TMP2); + vis_st64_2(TMP2, dest, 8); + dest += stride; + } while (--height); + + vis_ld64(ref[0], TMP0); + vis_xor(REF_0, REF_2, TMP12); + + vis_ld64_2(ref, 8, TMP2); + vis_xor(REF_4, REF_6, TMP16); + + vis_ld64_2(ref, offset, TMP4); + vis_or(REF_0, REF_2, TMP14); + + vis_or(REF_4, REF_6, TMP18); + + vis_faligndata(TMP0, TMP2, REF_0); + + vis_faligndata(TMP2, TMP4, REF_4); + + vis_and(TMP12, MASK_fe, TMP12); + + vis_and(TMP16, MASK_fe, TMP16); + vis_mul8x16(CONST_128, TMP12, TMP12); + + vis_mul8x16(CONST_128, TMP16, TMP16); + vis_xor(REF_0, REF_2, TMP0); + + vis_xor(REF_4, REF_6, TMP2); + + vis_or(REF_0, REF_2, TMP20); + + vis_and(TMP12, MASK_7f, TMP12); + + vis_and(TMP16, MASK_7f, TMP16); + + vis_psub16(TMP14, TMP12, TMP12); + vis_st64(TMP12, dest[0]); + + vis_psub16(TMP18, TMP16, TMP16); + vis_st64_2(TMP16, dest, 8); + dest += stride; + + vis_or(REF_4, REF_6, TMP18); + + vis_and(TMP0, MASK_fe, TMP0); + + vis_and(TMP2, MASK_fe, TMP2); + vis_mul8x16(CONST_128, TMP0, TMP0); + + vis_mul8x16(CONST_128, TMP2, TMP2); + + vis_and(TMP0, MASK_7f, TMP0); + + vis_and(TMP2, MASK_7f, TMP2); + + vis_psub16(TMP20, TMP0, TMP0); + vis_st64(TMP0, dest[0]); + + vis_psub16(TMP18, TMP2, TMP2); + vis_st64_2(TMP2, dest, 8); +} + +static void MC_put_y_8_vis (uint8_t * dest, uint8_t * _ref, + int stride, int height) +{ + uint8_t *ref = (uint8_t *) _ref; + int offset; + + ref = vis_alignaddr(ref); + offset = (ref != _ref) ? 8 : 0; + + vis_ld64(ref[0], TMP0); + + vis_ld64_2(ref, offset, TMP2); + ref += stride; + + vis_ld64(ref[0], TMP4); + + vis_ld64_2(ref, offset, TMP6); + ref += stride; + + vis_ld64(constants_fe[0], MASK_fe); + vis_faligndata(TMP0, TMP2, REF_0); + + vis_ld64(constants_7f[0], MASK_7f); + vis_faligndata(TMP4, TMP6, REF_2); + + vis_ld64(constants128[0], CONST_128); + height = (height >> 1) - 1; + do { /* 12 cycles */ + vis_ld64(ref[0], TMP0); + vis_xor(REF_0, REF_2, TMP4); + + vis_ld64_2(ref, offset, TMP2); + ref += stride; + vis_and(TMP4, MASK_fe, TMP4); + + vis_or(REF_0, REF_2, TMP6); + vis_mul8x16(CONST_128, TMP4, TMP4); + + vis_faligndata(TMP0, TMP2, REF_0); + vis_ld64(ref[0], TMP0); + + vis_ld64_2(ref, offset, TMP2); + ref += stride; + vis_xor(REF_0, REF_2, TMP12); + + vis_and(TMP4, MASK_7f, TMP4); + + vis_and(TMP12, MASK_fe, TMP12); + + vis_mul8x16(CONST_128, TMP12, TMP12); + vis_or(REF_0, REF_2, TMP14); + + vis_psub16(TMP6, TMP4, DST_0); + vis_st64(DST_0, dest[0]); + dest += stride; + + vis_faligndata(TMP0, TMP2, REF_2); + + vis_and(TMP12, MASK_7f, TMP12); + + vis_psub16(TMP14, TMP12, DST_0); + vis_st64(DST_0, dest[0]); + dest += stride; + } while (--height); + + vis_ld64(ref[0], TMP0); + vis_xor(REF_0, REF_2, TMP4); + + vis_ld64_2(ref, offset, TMP2); + vis_and(TMP4, MASK_fe, TMP4); + + vis_or(REF_0, REF_2, TMP6); + vis_mul8x16(CONST_128, TMP4, TMP4); + + vis_faligndata(TMP0, TMP2, REF_0); + + vis_xor(REF_0, REF_2, TMP12); + + vis_and(TMP4, MASK_7f, TMP4); + + vis_and(TMP12, MASK_fe, TMP12); + + vis_mul8x16(CONST_128, TMP12, TMP12); + vis_or(REF_0, REF_2, TMP14); + + vis_psub16(TMP6, TMP4, DST_0); + vis_st64(DST_0, dest[0]); + dest += stride; + + vis_and(TMP12, MASK_7f, TMP12); + + vis_psub16(TMP14, TMP12, DST_0); + vis_st64(DST_0, dest[0]); +} + +static void MC_avg_y_16_vis (uint8_t * dest, uint8_t * _ref, + int stride, int height) +{ + uint8_t *ref = (uint8_t *) _ref; + int stride_8 = stride + 8; + int stride_16; + int offset; + + vis_set_gsr(5 << VIS_GSR_SCALEFACT_SHIFT); + + ref = vis_alignaddr(ref); + offset = (ref != _ref) ? 16 : 0; + + vis_ld64(ref[ 0], TMP0); + vis_fzero(ZERO); + + vis_ld64(ref[ 8], TMP2); + + vis_ld64_2(ref, offset, TMP4); + stride_16 = stride + offset; + + vis_ld64(constants3[0], CONST_3); + vis_faligndata(TMP0, TMP2, REF_2); + + vis_ld64(constants256_512[0], CONST_256); + vis_faligndata(TMP2, TMP4, REF_6); + height >>= 1; + + do { /* 31 cycles */ + vis_ld64_2(ref, stride, TMP0); + vis_pmerge(ZERO, REF_2, TMP12); + vis_mul8x16au(REF_2_1, CONST_256, TMP14); + + vis_ld64_2(ref, stride_8, TMP2); + vis_pmerge(ZERO, REF_6, TMP16); + vis_mul8x16au(REF_6_1, CONST_256, TMP18); + + vis_ld64_2(ref, stride_16, TMP4); + ref += stride; + + vis_ld64(dest[0], DST_0); + vis_faligndata(TMP0, TMP2, REF_0); + + vis_ld64_2(dest, 8, DST_2); + vis_faligndata(TMP2, TMP4, REF_4); + + vis_ld64_2(ref, stride, TMP6); + vis_pmerge(ZERO, REF_0, TMP0); + vis_mul8x16au(REF_0_1, CONST_256, TMP2); + + vis_ld64_2(ref, stride_8, TMP8); + vis_pmerge(ZERO, REF_4, TMP4); + + vis_ld64_2(ref, stride_16, TMP10); + ref += stride; + + vis_ld64_2(dest, stride, REF_S0/*DST_4*/); + vis_faligndata(TMP6, TMP8, REF_2); + vis_mul8x16au(REF_4_1, CONST_256, TMP6); + + vis_ld64_2(dest, stride_8, REF_S2/*DST_6*/); + vis_faligndata(TMP8, TMP10, REF_6); + vis_mul8x16al(DST_0, CONST_512, TMP20); + + vis_padd16(TMP0, CONST_3, TMP0); + vis_mul8x16al(DST_1, CONST_512, TMP22); + + vis_padd16(TMP2, CONST_3, TMP2); + vis_mul8x16al(DST_2, CONST_512, TMP24); + + vis_padd16(TMP4, CONST_3, TMP4); + vis_mul8x16al(DST_3, CONST_512, TMP26); + + vis_padd16(TMP6, CONST_3, TMP6); + + vis_padd16(TMP12, TMP20, TMP12); + vis_mul8x16al(REF_S0, CONST_512, TMP20); + + vis_padd16(TMP14, TMP22, TMP14); + vis_mul8x16al(REF_S0_1, CONST_512, TMP22); + + vis_padd16(TMP16, TMP24, TMP16); + vis_mul8x16al(REF_S2, CONST_512, TMP24); + + vis_padd16(TMP18, TMP26, TMP18); + vis_mul8x16al(REF_S2_1, CONST_512, TMP26); + + vis_padd16(TMP12, TMP0, TMP12); + vis_mul8x16au(REF_2, CONST_256, TMP28); + + vis_padd16(TMP14, TMP2, TMP14); + vis_mul8x16au(REF_2_1, CONST_256, TMP30); + + vis_padd16(TMP16, TMP4, TMP16); + vis_mul8x16au(REF_6, CONST_256, REF_S4); + + vis_padd16(TMP18, TMP6, TMP18); + vis_mul8x16au(REF_6_1, CONST_256, REF_S6); + + vis_pack16(TMP12, DST_0); + vis_padd16(TMP28, TMP0, TMP12); + + vis_pack16(TMP14, DST_1); + vis_st64(DST_0, dest[0]); + vis_padd16(TMP30, TMP2, TMP14); + + vis_pack16(TMP16, DST_2); + vis_padd16(REF_S4, TMP4, TMP16); + + vis_pack16(TMP18, DST_3); + vis_st64_2(DST_2, dest, 8); + dest += stride; + vis_padd16(REF_S6, TMP6, TMP18); + + vis_padd16(TMP12, TMP20, TMP12); + + vis_padd16(TMP14, TMP22, TMP14); + vis_pack16(TMP12, DST_0); + + vis_padd16(TMP16, TMP24, TMP16); + vis_pack16(TMP14, DST_1); + vis_st64(DST_0, dest[0]); + + vis_padd16(TMP18, TMP26, TMP18); + vis_pack16(TMP16, DST_2); + + vis_pack16(TMP18, DST_3); + vis_st64_2(DST_2, dest, 8); + dest += stride; + } while (--height); +} + +static void MC_avg_y_8_vis (uint8_t * dest, uint8_t * _ref, + int stride, int height) +{ + uint8_t *ref = (uint8_t *) _ref; + int stride_8; + int offset; + + vis_set_gsr(5 << VIS_GSR_SCALEFACT_SHIFT); + + ref = vis_alignaddr(ref); + offset = (ref != _ref) ? 8 : 0; + + vis_ld64(ref[ 0], TMP0); + vis_fzero(ZERO); + + vis_ld64_2(ref, offset, TMP2); + stride_8 = stride + offset; + + vis_ld64(constants3[0], CONST_3); + vis_faligndata(TMP0, TMP2, REF_2); + + vis_ld64(constants256_512[0], CONST_256); + + height >>= 1; + do { /* 20 cycles */ + vis_ld64_2(ref, stride, TMP0); + vis_pmerge(ZERO, REF_2, TMP8); + vis_mul8x16au(REF_2_1, CONST_256, TMP10); + + vis_ld64_2(ref, stride_8, TMP2); + ref += stride; + + vis_ld64(dest[0], DST_0); + + vis_ld64_2(dest, stride, DST_2); + vis_faligndata(TMP0, TMP2, REF_0); + + vis_ld64_2(ref, stride, TMP4); + vis_mul8x16al(DST_0, CONST_512, TMP16); + vis_pmerge(ZERO, REF_0, TMP12); + + vis_ld64_2(ref, stride_8, TMP6); + ref += stride; + vis_mul8x16al(DST_1, CONST_512, TMP18); + vis_pmerge(ZERO, REF_0_1, TMP14); + + vis_padd16(TMP12, CONST_3, TMP12); + vis_mul8x16al(DST_2, CONST_512, TMP24); + + vis_padd16(TMP14, CONST_3, TMP14); + vis_mul8x16al(DST_3, CONST_512, TMP26); + + vis_faligndata(TMP4, TMP6, REF_2); + + vis_padd16(TMP8, TMP12, TMP8); + + vis_padd16(TMP10, TMP14, TMP10); + vis_mul8x16au(REF_2, CONST_256, TMP20); + + vis_padd16(TMP8, TMP16, TMP0); + vis_mul8x16au(REF_2_1, CONST_256, TMP22); + + vis_padd16(TMP10, TMP18, TMP2); + vis_pack16(TMP0, DST_0); + + vis_pack16(TMP2, DST_1); + vis_st64(DST_0, dest[0]); + dest += stride; + vis_padd16(TMP12, TMP20, TMP12); + + vis_padd16(TMP14, TMP22, TMP14); + + vis_padd16(TMP12, TMP24, TMP0); + + vis_padd16(TMP14, TMP26, TMP2); + vis_pack16(TMP0, DST_2); + + vis_pack16(TMP2, DST_3); + vis_st64(DST_2, dest[0]); + dest += stride; + } while (--height); +} + +static void MC_put_xy_16_vis (uint8_t * dest, uint8_t * _ref, + int stride, int height) +{ + uint8_t *ref = (uint8_t *) _ref; + unsigned long off = (unsigned long) ref & 0x7; + unsigned long off_plus_1 = off + 1; + int stride_8 = stride + 8; + int stride_16 = stride + 16; + + vis_set_gsr(5 << VIS_GSR_SCALEFACT_SHIFT); + + ref = vis_alignaddr(ref); + + vis_ld64(ref[ 0], TMP0); + vis_fzero(ZERO); + + vis_ld64(ref[ 8], TMP2); + + vis_ld64(ref[16], TMP4); + + vis_ld64(constants2[0], CONST_2); + vis_faligndata(TMP0, TMP2, REF_S0); + + vis_ld64(constants256_512[0], CONST_256); + vis_faligndata(TMP2, TMP4, REF_S4); + + if (off != 0x7) { + vis_alignaddr_g0((void *)off_plus_1); + vis_faligndata(TMP0, TMP2, REF_S2); + vis_faligndata(TMP2, TMP4, REF_S6); + } else { + vis_src1(TMP2, REF_S2); + vis_src1(TMP4, REF_S6); + } + + height >>= 1; + do { + vis_ld64_2(ref, stride, TMP0); + vis_mul8x16au(REF_S0, CONST_256, TMP12); + vis_pmerge(ZERO, REF_S0_1, TMP14); + + vis_alignaddr_g0((void *)off); + + vis_ld64_2(ref, stride_8, TMP2); + vis_mul8x16au(REF_S2, CONST_256, TMP16); + vis_pmerge(ZERO, REF_S2_1, TMP18); + + vis_ld64_2(ref, stride_16, TMP4); + ref += stride; + vis_mul8x16au(REF_S4, CONST_256, TMP20); + vis_pmerge(ZERO, REF_S4_1, TMP22); + + vis_ld64_2(ref, stride, TMP6); + vis_mul8x16au(REF_S6, CONST_256, TMP24); + vis_pmerge(ZERO, REF_S6_1, TMP26); + + vis_ld64_2(ref, stride_8, TMP8); + vis_faligndata(TMP0, TMP2, REF_0); + + vis_ld64_2(ref, stride_16, TMP10); + ref += stride; + vis_faligndata(TMP2, TMP4, REF_4); + + vis_faligndata(TMP6, TMP8, REF_S0); + + vis_faligndata(TMP8, TMP10, REF_S4); + + if (off != 0x7) { + vis_alignaddr_g0((void *)off_plus_1); + vis_faligndata(TMP0, TMP2, REF_2); + vis_faligndata(TMP2, TMP4, REF_6); + vis_faligndata(TMP6, TMP8, REF_S2); + vis_faligndata(TMP8, TMP10, REF_S6); + } else { + vis_src1(TMP2, REF_2); + vis_src1(TMP4, REF_6); + vis_src1(TMP8, REF_S2); + vis_src1(TMP10, REF_S6); + } + + vis_mul8x16au(REF_0, CONST_256, TMP0); + vis_pmerge(ZERO, REF_0_1, TMP2); + + vis_mul8x16au(REF_2, CONST_256, TMP4); + vis_pmerge(ZERO, REF_2_1, TMP6); + + vis_padd16(TMP0, CONST_2, TMP8); + vis_mul8x16au(REF_4, CONST_256, TMP0); + + vis_padd16(TMP2, CONST_2, TMP10); + vis_mul8x16au(REF_4_1, CONST_256, TMP2); + + vis_padd16(TMP8, TMP4, TMP8); + vis_mul8x16au(REF_6, CONST_256, TMP4); + + vis_padd16(TMP10, TMP6, TMP10); + vis_mul8x16au(REF_6_1, CONST_256, TMP6); + + vis_padd16(TMP12, TMP8, TMP12); + + vis_padd16(TMP14, TMP10, TMP14); + + vis_padd16(TMP12, TMP16, TMP12); + + vis_padd16(TMP14, TMP18, TMP14); + vis_pack16(TMP12, DST_0); + + vis_pack16(TMP14, DST_1); + vis_st64(DST_0, dest[0]); + vis_padd16(TMP0, CONST_2, TMP12); + + vis_mul8x16au(REF_S0, CONST_256, TMP0); + vis_padd16(TMP2, CONST_2, TMP14); + + vis_mul8x16au(REF_S0_1, CONST_256, TMP2); + vis_padd16(TMP12, TMP4, TMP12); + + vis_mul8x16au(REF_S2, CONST_256, TMP4); + vis_padd16(TMP14, TMP6, TMP14); + + vis_mul8x16au(REF_S2_1, CONST_256, TMP6); + vis_padd16(TMP20, TMP12, TMP20); + + vis_padd16(TMP22, TMP14, TMP22); + + vis_padd16(TMP20, TMP24, TMP20); + + vis_padd16(TMP22, TMP26, TMP22); + vis_pack16(TMP20, DST_2); + + vis_pack16(TMP22, DST_3); + vis_st64_2(DST_2, dest, 8); + dest += stride; + vis_padd16(TMP0, TMP4, TMP24); + + vis_mul8x16au(REF_S4, CONST_256, TMP0); + vis_padd16(TMP2, TMP6, TMP26); + + vis_mul8x16au(REF_S4_1, CONST_256, TMP2); + vis_padd16(TMP24, TMP8, TMP24); + + vis_padd16(TMP26, TMP10, TMP26); + vis_pack16(TMP24, DST_0); + + vis_pack16(TMP26, DST_1); + vis_st64(DST_0, dest[0]); + vis_pmerge(ZERO, REF_S6, TMP4); + + vis_pmerge(ZERO, REF_S6_1, TMP6); + + vis_padd16(TMP0, TMP4, TMP0); + + vis_padd16(TMP2, TMP6, TMP2); + + vis_padd16(TMP0, TMP12, TMP0); + + vis_padd16(TMP2, TMP14, TMP2); + vis_pack16(TMP0, DST_2); + + vis_pack16(TMP2, DST_3); + vis_st64_2(DST_2, dest, 8); + dest += stride; + } while (--height); +} + +static void MC_put_xy_8_vis (uint8_t * dest, uint8_t * _ref, + int stride, int height) +{ + uint8_t *ref = (uint8_t *) _ref; + unsigned long off = (unsigned long) ref & 0x7; + unsigned long off_plus_1 = off + 1; + int stride_8 = stride + 8; + + vis_set_gsr(5 << VIS_GSR_SCALEFACT_SHIFT); + + ref = vis_alignaddr(ref); + + vis_ld64(ref[ 0], TMP0); + vis_fzero(ZERO); + + vis_ld64(ref[ 8], TMP2); + + vis_ld64(constants2[0], CONST_2); + + vis_ld64(constants256_512[0], CONST_256); + vis_faligndata(TMP0, TMP2, REF_S0); + + if (off != 0x7) { + vis_alignaddr_g0((void *)off_plus_1); + vis_faligndata(TMP0, TMP2, REF_S2); + } else { + vis_src1(TMP2, REF_S2); + } + + height >>= 1; + do { /* 26 cycles */ + vis_ld64_2(ref, stride, TMP0); + vis_mul8x16au(REF_S0, CONST_256, TMP8); + vis_pmerge(ZERO, REF_S2, TMP12); + + vis_alignaddr_g0((void *)off); + + vis_ld64_2(ref, stride_8, TMP2); + ref += stride; + vis_mul8x16au(REF_S0_1, CONST_256, TMP10); + vis_pmerge(ZERO, REF_S2_1, TMP14); + + vis_ld64_2(ref, stride, TMP4); + + vis_ld64_2(ref, stride_8, TMP6); + ref += stride; + vis_faligndata(TMP0, TMP2, REF_S4); + + vis_pmerge(ZERO, REF_S4, TMP18); + + vis_pmerge(ZERO, REF_S4_1, TMP20); + + vis_faligndata(TMP4, TMP6, REF_S0); + + if (off != 0x7) { + vis_alignaddr_g0((void *)off_plus_1); + vis_faligndata(TMP0, TMP2, REF_S6); + vis_faligndata(TMP4, TMP6, REF_S2); + } else { + vis_src1(TMP2, REF_S6); + vis_src1(TMP6, REF_S2); + } + + vis_padd16(TMP18, CONST_2, TMP18); + vis_mul8x16au(REF_S6, CONST_256, TMP22); + + vis_padd16(TMP20, CONST_2, TMP20); + vis_mul8x16au(REF_S6_1, CONST_256, TMP24); + + vis_mul8x16au(REF_S0, CONST_256, TMP26); + vis_pmerge(ZERO, REF_S0_1, TMP28); + + vis_mul8x16au(REF_S2, CONST_256, TMP30); + vis_padd16(TMP18, TMP22, TMP18); + + vis_mul8x16au(REF_S2_1, CONST_256, TMP32); + vis_padd16(TMP20, TMP24, TMP20); + + vis_padd16(TMP8, TMP18, TMP8); + + vis_padd16(TMP10, TMP20, TMP10); + + vis_padd16(TMP8, TMP12, TMP8); + + vis_padd16(TMP10, TMP14, TMP10); + vis_pack16(TMP8, DST_0); + + vis_pack16(TMP10, DST_1); + vis_st64(DST_0, dest[0]); + dest += stride; + vis_padd16(TMP18, TMP26, TMP18); + + vis_padd16(TMP20, TMP28, TMP20); + + vis_padd16(TMP18, TMP30, TMP18); + + vis_padd16(TMP20, TMP32, TMP20); + vis_pack16(TMP18, DST_2); + + vis_pack16(TMP20, DST_3); + vis_st64(DST_2, dest[0]); + dest += stride; + } while (--height); +} + +static void MC_avg_xy_16_vis (uint8_t * dest, uint8_t * _ref, + int stride, int height) +{ + uint8_t *ref = (uint8_t *) _ref; + unsigned long off = (unsigned long) ref & 0x7; + unsigned long off_plus_1 = off + 1; + int stride_8 = stride + 8; + int stride_16 = stride + 16; + + vis_set_gsr(4 << VIS_GSR_SCALEFACT_SHIFT); + + ref = vis_alignaddr(ref); + + vis_ld64(ref[ 0], TMP0); + vis_fzero(ZERO); + + vis_ld64(ref[ 8], TMP2); + + vis_ld64(ref[16], TMP4); + + vis_ld64(constants6[0], CONST_6); + vis_faligndata(TMP0, TMP2, REF_S0); + + vis_ld64(constants256_1024[0], CONST_256); + vis_faligndata(TMP2, TMP4, REF_S4); + + if (off != 0x7) { + vis_alignaddr_g0((void *)off_plus_1); + vis_faligndata(TMP0, TMP2, REF_S2); + vis_faligndata(TMP2, TMP4, REF_S6); + } else { + vis_src1(TMP2, REF_S2); + vis_src1(TMP4, REF_S6); + } + + height >>= 1; + do { /* 55 cycles */ + vis_ld64_2(ref, stride, TMP0); + vis_mul8x16au(REF_S0, CONST_256, TMP12); + vis_pmerge(ZERO, REF_S0_1, TMP14); + + vis_alignaddr_g0((void *)off); + + vis_ld64_2(ref, stride_8, TMP2); + vis_mul8x16au(REF_S2, CONST_256, TMP16); + vis_pmerge(ZERO, REF_S2_1, TMP18); + + vis_ld64_2(ref, stride_16, TMP4); + ref += stride; + vis_mul8x16au(REF_S4, CONST_256, TMP20); + vis_pmerge(ZERO, REF_S4_1, TMP22); + + vis_ld64_2(ref, stride, TMP6); + vis_mul8x16au(REF_S6, CONST_256, TMP24); + vis_pmerge(ZERO, REF_S6_1, TMP26); + + vis_ld64_2(ref, stride_8, TMP8); + vis_faligndata(TMP0, TMP2, REF_0); + + vis_ld64_2(ref, stride_16, TMP10); + ref += stride; + vis_faligndata(TMP2, TMP4, REF_4); + + vis_ld64(dest[0], DST_0); + vis_faligndata(TMP6, TMP8, REF_S0); + + vis_ld64_2(dest, 8, DST_2); + vis_faligndata(TMP8, TMP10, REF_S4); + + if (off != 0x7) { + vis_alignaddr_g0((void *)off_plus_1); + vis_faligndata(TMP0, TMP2, REF_2); + vis_faligndata(TMP2, TMP4, REF_6); + vis_faligndata(TMP6, TMP8, REF_S2); + vis_faligndata(TMP8, TMP10, REF_S6); + } else { + vis_src1(TMP2, REF_2); + vis_src1(TMP4, REF_6); + vis_src1(TMP8, REF_S2); + vis_src1(TMP10, REF_S6); + } + + vis_mul8x16al(DST_0, CONST_1024, TMP30); + vis_pmerge(ZERO, REF_0, TMP0); + + vis_mul8x16al(DST_1, CONST_1024, TMP32); + vis_pmerge(ZERO, REF_0_1, TMP2); + + vis_mul8x16au(REF_2, CONST_256, TMP4); + vis_pmerge(ZERO, REF_2_1, TMP6); + + vis_mul8x16al(DST_2, CONST_1024, REF_0); + vis_padd16(TMP0, CONST_6, TMP0); + + vis_mul8x16al(DST_3, CONST_1024, REF_2); + vis_padd16(TMP2, CONST_6, TMP2); + + vis_padd16(TMP0, TMP4, TMP0); + vis_mul8x16au(REF_4, CONST_256, TMP4); + + vis_padd16(TMP2, TMP6, TMP2); + vis_mul8x16au(REF_4_1, CONST_256, TMP6); + + vis_padd16(TMP12, TMP0, TMP12); + vis_mul8x16au(REF_6, CONST_256, TMP8); + + vis_padd16(TMP14, TMP2, TMP14); + vis_mul8x16au(REF_6_1, CONST_256, TMP10); + + vis_padd16(TMP12, TMP16, TMP12); + vis_mul8x16au(REF_S0, CONST_256, REF_4); + + vis_padd16(TMP14, TMP18, TMP14); + vis_mul8x16au(REF_S0_1, CONST_256, REF_6); + + vis_padd16(TMP12, TMP30, TMP12); + + vis_padd16(TMP14, TMP32, TMP14); + vis_pack16(TMP12, DST_0); + + vis_pack16(TMP14, DST_1); + vis_st64(DST_0, dest[0]); + vis_padd16(TMP4, CONST_6, TMP4); + + vis_ld64_2(dest, stride, DST_0); + vis_padd16(TMP6, CONST_6, TMP6); + vis_mul8x16au(REF_S2, CONST_256, TMP12); + + vis_padd16(TMP4, TMP8, TMP4); + vis_mul8x16au(REF_S2_1, CONST_256, TMP14); + + vis_padd16(TMP6, TMP10, TMP6); + + vis_padd16(TMP20, TMP4, TMP20); + + vis_padd16(TMP22, TMP6, TMP22); + + vis_padd16(TMP20, TMP24, TMP20); + + vis_padd16(TMP22, TMP26, TMP22); + + vis_padd16(TMP20, REF_0, TMP20); + vis_mul8x16au(REF_S4, CONST_256, REF_0); + + vis_padd16(TMP22, REF_2, TMP22); + vis_pack16(TMP20, DST_2); + + vis_pack16(TMP22, DST_3); + vis_st64_2(DST_2, dest, 8); + dest += stride; + + vis_ld64_2(dest, 8, DST_2); + vis_mul8x16al(DST_0, CONST_1024, TMP30); + vis_pmerge(ZERO, REF_S4_1, REF_2); + + vis_mul8x16al(DST_1, CONST_1024, TMP32); + vis_padd16(REF_4, TMP0, TMP8); + + vis_mul8x16au(REF_S6, CONST_256, REF_4); + vis_padd16(REF_6, TMP2, TMP10); + + vis_mul8x16au(REF_S6_1, CONST_256, REF_6); + vis_padd16(TMP8, TMP12, TMP8); + + vis_padd16(TMP10, TMP14, TMP10); + + vis_padd16(TMP8, TMP30, TMP8); + + vis_padd16(TMP10, TMP32, TMP10); + vis_pack16(TMP8, DST_0); + + vis_pack16(TMP10, DST_1); + vis_st64(DST_0, dest[0]); + + vis_padd16(REF_0, TMP4, REF_0); + + vis_mul8x16al(DST_2, CONST_1024, TMP30); + vis_padd16(REF_2, TMP6, REF_2); + + vis_mul8x16al(DST_3, CONST_1024, TMP32); + vis_padd16(REF_0, REF_4, REF_0); + + vis_padd16(REF_2, REF_6, REF_2); + + vis_padd16(REF_0, TMP30, REF_0); + + /* stall */ + + vis_padd16(REF_2, TMP32, REF_2); + vis_pack16(REF_0, DST_2); + + vis_pack16(REF_2, DST_3); + vis_st64_2(DST_2, dest, 8); + dest += stride; + } while (--height); +} + +static void MC_avg_xy_8_vis (uint8_t * dest, uint8_t * _ref, + int stride, int height) +{ + uint8_t *ref = (uint8_t *) _ref; + unsigned long off = (unsigned long) ref & 0x7; + unsigned long off_plus_1 = off + 1; + int stride_8 = stride + 8; + + vis_set_gsr(4 << VIS_GSR_SCALEFACT_SHIFT); + + ref = vis_alignaddr(ref); + + vis_ld64(ref[0], TMP0); + vis_fzero(ZERO); + + vis_ld64_2(ref, 8, TMP2); + + vis_ld64(constants6[0], CONST_6); + + vis_ld64(constants256_1024[0], CONST_256); + vis_faligndata(TMP0, TMP2, REF_S0); + + if (off != 0x7) { + vis_alignaddr_g0((void *)off_plus_1); + vis_faligndata(TMP0, TMP2, REF_S2); + } else { + vis_src1(TMP2, REF_S2); + } + + height >>= 1; + do { /* 31 cycles */ + vis_ld64_2(ref, stride, TMP0); + vis_mul8x16au(REF_S0, CONST_256, TMP8); + vis_pmerge(ZERO, REF_S0_1, TMP10); + + vis_ld64_2(ref, stride_8, TMP2); + ref += stride; + vis_mul8x16au(REF_S2, CONST_256, TMP12); + vis_pmerge(ZERO, REF_S2_1, TMP14); + + vis_alignaddr_g0((void *)off); + + vis_ld64_2(ref, stride, TMP4); + vis_faligndata(TMP0, TMP2, REF_S4); + + vis_ld64_2(ref, stride_8, TMP6); + ref += stride; + + vis_ld64(dest[0], DST_0); + vis_faligndata(TMP4, TMP6, REF_S0); + + vis_ld64_2(dest, stride, DST_2); + + if (off != 0x7) { + vis_alignaddr_g0((void *)off_plus_1); + vis_faligndata(TMP0, TMP2, REF_S6); + vis_faligndata(TMP4, TMP6, REF_S2); + } else { + vis_src1(TMP2, REF_S6); + vis_src1(TMP6, REF_S2); + } + + vis_mul8x16al(DST_0, CONST_1024, TMP30); + vis_pmerge(ZERO, REF_S4, TMP22); + + vis_mul8x16al(DST_1, CONST_1024, TMP32); + vis_pmerge(ZERO, REF_S4_1, TMP24); + + vis_mul8x16au(REF_S6, CONST_256, TMP26); + vis_pmerge(ZERO, REF_S6_1, TMP28); + + vis_mul8x16au(REF_S0, CONST_256, REF_S4); + vis_padd16(TMP22, CONST_6, TMP22); + + vis_mul8x16au(REF_S0_1, CONST_256, REF_S6); + vis_padd16(TMP24, CONST_6, TMP24); + + vis_mul8x16al(DST_2, CONST_1024, REF_0); + vis_padd16(TMP22, TMP26, TMP22); + + vis_mul8x16al(DST_3, CONST_1024, REF_2); + vis_padd16(TMP24, TMP28, TMP24); + + vis_mul8x16au(REF_S2, CONST_256, TMP26); + vis_padd16(TMP8, TMP22, TMP8); + + vis_mul8x16au(REF_S2_1, CONST_256, TMP28); + vis_padd16(TMP10, TMP24, TMP10); + + vis_padd16(TMP8, TMP12, TMP8); + + vis_padd16(TMP10, TMP14, TMP10); + + vis_padd16(TMP8, TMP30, TMP8); + + vis_padd16(TMP10, TMP32, TMP10); + vis_pack16(TMP8, DST_0); + + vis_pack16(TMP10, DST_1); + vis_st64(DST_0, dest[0]); + dest += stride; + + vis_padd16(REF_S4, TMP22, TMP12); + + vis_padd16(REF_S6, TMP24, TMP14); + + vis_padd16(TMP12, TMP26, TMP12); + + vis_padd16(TMP14, TMP28, TMP14); + + vis_padd16(TMP12, REF_0, TMP12); + + vis_padd16(TMP14, REF_2, TMP14); + vis_pack16(TMP12, DST_2); + + vis_pack16(TMP14, DST_3); + vis_st64(DST_2, dest[0]); + dest += stride; + } while (--height); +} + +MPEG2_MC_EXTERN(vis); + +#endif /* defined(ARCH_SPARC) && defined(ENABLE_VIS) */ diff --git a/src/video_dec/libmpeg2/mpeg2.h b/src/video_dec/libmpeg2/mpeg2.h new file mode 100644 index 000000000..ae69688f5 --- /dev/null +++ b/src/video_dec/libmpeg2/mpeg2.h @@ -0,0 +1,100 @@ +/* + * mpeg2.h + * Copyright (C) 1999-2001 Aaron Holtzman <aholtzma@ess.engr.uvic.ca> + * + * This file is part of mpeg2dec, a free MPEG-2 video stream decoder. + * + * mpeg2dec is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * mpeg2dec is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +/* Structure for the mpeg2dec decoder */ + +#ifndef MPEG2_H +#define MPEG2_H + +#include "libmpeg2_accel.h" + +typedef struct mpeg2dec_s { + xine_video_port_t * output; + uint32_t frame_format; + + /* this is where we keep the state of the decoder */ + struct picture_s * picture; + void *picture_base; + + uint32_t shift; + int new_sequence; + int is_sequence_needed; + int is_wait_for_ip_frames; + int frames_to_drop, drop_frame; + int in_slice; + int seek_mode, is_frame_needed; + + /* the maximum chunk size is determined by vbv_buffer_size */ + /* which is 224K for MP@ML streams. */ + /* (we make no pretenses of decoding anything more than that) */ + /* allocated in init - gcc has problems allocating such big structures */ + uint8_t * chunk_buffer; + void *chunk_base; + /* pointer to current position in chunk_buffer */ + uint8_t * chunk_ptr; + /* last start code ? */ + uint8_t code; + uint32_t chunk_size; + + int64_t pts; + uint32_t rff_pattern; + int force_aspect; + int force_pan_scan; + + /* AFD data can be found after a sequence, group or picture start code */ + /* and will be stored in afd_value_seen. Later it will be transfered to */ + /* a stream property and stored into afd_value_reported to detect changes */ + int afd_value_seen; + int afd_value_reported; + + xine_stream_t *stream; + + /* a spu decoder for possible closed captions */ + spu_decoder_t *cc_dec; + mpeg2dec_accel_t accel; + +} mpeg2dec_t ; + + +/* initialize mpegdec with a opaque user pointer */ +void mpeg2_init (mpeg2dec_t * mpeg2dec, + xine_video_port_t * output); + +/* destroy everything which was allocated, shutdown the output */ +void mpeg2_close (mpeg2dec_t * mpeg2dec); + +int mpeg2_decode_data (mpeg2dec_t * mpeg2dec, + uint8_t * data_start, uint8_t * data_end, + uint64_t pts); + +void mpeg2_find_sequence_header (mpeg2dec_t * mpeg2dec, + uint8_t * data_start, uint8_t * data_end); + +void mpeg2_flush (mpeg2dec_t * mpeg2dec); +void mpeg2_reset (mpeg2dec_t * mpeg2dec); +void mpeg2_discontinuity (mpeg2dec_t * mpeg2dec); + +/* Not needed, it is defined as static in decode.c, and no-one else called it + * currently + */ +/* void process_userdata(mpeg2dec_t *mpeg2dec, uint8_t *buffer); */ + +#endif diff --git a/src/video_dec/libmpeg2/mpeg2_internal.h b/src/video_dec/libmpeg2/mpeg2_internal.h new file mode 100644 index 000000000..eeaa16227 --- /dev/null +++ b/src/video_dec/libmpeg2/mpeg2_internal.h @@ -0,0 +1,294 @@ +/* + * mpeg2_internal.h + * Copyright (C) 2000-2002 Michel Lespinasse <walken@zoy.org> + * Copyright (C) 1999-2000 Aaron Holtzman <aholtzma@ess.engr.uvic.ca> + * + * This file is part of mpeg2dec, a free MPEG-2 video stream decoder. + * See http://libmpeg2.sourceforge.net/ for updates. + * + * mpeg2dec is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * mpeg2dec is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +#ifndef MPEG2_INTERNAL_H +#define MPEG2_INTERNAL_H + +#include <xine/video_out.h> +#include "accel_xvmc.h" + +#ifdef ENABLE_ALTIVEC +#include <altivec.h> +#endif + +/* macroblock modes */ +#define MACROBLOCK_INTRA XINE_MACROBLOCK_INTRA +#define MACROBLOCK_PATTERN XINE_MACROBLOCK_PATTERN +#define MACROBLOCK_MOTION_BACKWARD XINE_MACROBLOCK_MOTION_BACKWARD +#define MACROBLOCK_MOTION_FORWARD XINE_MACROBLOCK_MOTION_FORWARD +#define MACROBLOCK_QUANT XINE_MACROBLOCK_QUANT +#define DCT_TYPE_INTERLACED XINE_MACROBLOCK_DCT_TYPE_INTERLACED + +/* motion_type */ +#define MOTION_TYPE_MASK (3*64) +#define MOTION_TYPE_BASE 64 +#define MC_FIELD (1*64) +#define MC_FRAME (2*64) +#define MC_16X8 (2*64) +#define MC_DMV (3*64) + +/* picture structure */ +#define TOP_FIELD VO_TOP_FIELD +#define BOTTOM_FIELD VO_BOTTOM_FIELD +#define FRAME_PICTURE VO_BOTH_FIELDS + +/* picture coding type (mpeg2 header) */ +#define I_TYPE 1 +#define P_TYPE 2 +#define B_TYPE 3 +#define D_TYPE 4 + +typedef struct motion_s { + uint8_t * ref[2][3]; + uint8_t ** ref2[2]; + int pmv[2][2]; + int f_code[2]; +} motion_t; + +typedef struct picture_s { + /* first, state that carries information from one macroblock to the */ + /* next inside a slice, and is never used outside of mpeg2_slice() */ + + /* DCT coefficients - should be kept aligned ! */ + int16_t DCTblock[64]; + + /* XvMC DCT block and macroblock data for XvMC acceleration */ + xine_macroblocks_t *mc; + int XvMC_mb_type; + int XvMC_mv_field_sel[2][2]; + int XvMC_x; + int XvMC_y; + int XvMC_motion_type; + int XvMC_dmvector[2]; + int XvMC_cbp; + int XvMC_dct_type; + + /* bit parsing stuff */ + uint32_t bitstream_buf; /* current 32 bit working set of buffer */ + int bitstream_bits; /* used bits in working set */ + uint8_t * bitstream_ptr; /* buffer with stream data */ + + uint8_t * dest[3]; + int pitches[3]; + int offset; + unsigned int limit_x; + unsigned int limit_y_16; + unsigned int limit_y_8; + unsigned int limit_y; + + /* Motion vectors */ + /* The f_ and b_ correspond to the forward and backward motion */ + /* predictors */ + motion_t b_motion; + motion_t f_motion; + + /* predictor for DC coefficients in intra blocks */ + int16_t dc_dct_pred[3]; + + int quantizer_scale; /* remove */ + int current_field; /* remove */ + int dmv_offset; /* remove */ + unsigned int v_offset; /* remove */ + + + /* now non-slice-specific information */ + + /* sequence header stuff */ + uint8_t intra_quantizer_matrix [64]; + uint8_t non_intra_quantizer_matrix [64]; + int load_intra_quantizer_matrix; + int load_non_intra_quantizer_matrix; + + /* The width and height of the picture snapped to macroblock units */ + int coded_picture_width; + int coded_picture_height; + + /* The width and height as it appears on header sequence */ + unsigned int display_width, display_height; + + /* picture header stuff */ + + /* what type of picture this is (I, P, B, D) */ + int picture_coding_type; + + int vbv_delay; + int low_delay; + + /* picture coding extension stuff */ + + /* quantization factor for intra dc coefficients */ + int intra_dc_precision; + /* top/bottom/both fields */ + int picture_structure; + /* bool to indicate all predictions are frame based */ + int frame_pred_frame_dct; + /* bool to indicate whether intra blocks have motion vectors */ + /* (for concealment) */ + int concealment_motion_vectors; + /* bit to indicate which quantization table to use */ + int q_scale_type; + /* bool to use different vlc tables */ + int intra_vlc_format; + /* used for DMV MC */ + int top_field_first; + + /* stuff derived from bitstream */ + + /* pointer to the zigzag scan we're supposed to be using */ + uint8_t * scan; + + struct vo_frame_s * current_frame; + struct vo_frame_s * forward_reference_frame; + struct vo_frame_s * backward_reference_frame; + + int frame_width, frame_height; + + int second_field; + + int mpeg1; + + int skip_non_intra_dct; + + /* these things are not needed by the decoder */ + /* this is a temporary interface, we will build a better one later. */ + int aspect_ratio_information; + int saved_aspect_ratio; + int frame_rate_code; + int progressive_sequence; + int repeat_first_field; + int progressive_frame; + int32_t frame_centre_horizontal_offset; + int32_t frame_centre_vertical_offset; + uint32_t video_format; + uint32_t colour_description; + uint32_t colour_primatives; + uint32_t transfer_characteristics; + uint32_t matrix_coefficients; + uint32_t display_horizontal_size; + uint32_t display_vertical_size; + uint32_t drop_frame_flag; + uint32_t time_code_hours; + uint32_t time_code_minutes; + uint32_t time_code_seconds; + uint32_t time_code_pictures; + uint32_t closed_gop; + uint32_t broken_link; + + int bitrate; + int frame_rate_ext_n; + int frame_rate_ext_d; + +} picture_t; + +typedef struct cpu_state_s { +#ifdef ARCH_PPC + uint8_t regv[12*16]; +#endif + int dummy; +} cpu_state_t; + +/* cpu_state.c */ +extern void (* mpeg2_cpu_state_save) (cpu_state_t * state); +extern void (* mpeg2_cpu_state_restore) (cpu_state_t * state); +void mpeg2_cpu_state_init (uint32_t mm_accel); + +/* header.c */ +extern uint8_t mpeg2_scan_norm[64]; +extern uint8_t mpeg2_scan_alt[64]; +void mpeg2_header_state_init (picture_t * picture); +int mpeg2_header_picture (picture_t * picture, uint8_t * buffer); +int mpeg2_header_sequence (picture_t * picture, uint8_t * buffer); +int mpeg2_header_extension (picture_t * picture, uint8_t * buffer); +int mpeg2_header_group_of_pictures (picture_t * picture, uint8_t * buffer); + +/* idct.c */ +extern void (* mpeg2_idct_copy) (int16_t * block, uint8_t * dest, int stride); +extern void (* mpeg2_idct_add) (int16_t * block, uint8_t * dest, int stride); +extern void (* mpeg2_idct) (int16_t * block); +extern void (* mpeg2_zero_block) (int16_t * block); +void mpeg2_idct_init (uint32_t mm_accel); + +/* idct_mlib.c */ +void mpeg2_idct_add_mlib (int16_t * block, uint8_t * dest, int stride); +void mpeg2_idct_copy_mlib_non_ieee (int16_t * block, uint8_t * dest, + int stride); +void mpeg2_idct_add_mlib_non_ieee (int16_t * block, uint8_t * dest, + int stride); +void mpeg2_idct_mlib (int16_t * block); + +/* idct_mmx.c */ +void mpeg2_idct_copy_mmxext (int16_t * block, uint8_t * dest, int stride); +void mpeg2_idct_add_mmxext (int16_t * block, uint8_t * dest, int stride); +void mpeg2_idct_mmxext (int16_t * block); +void mpeg2_idct_copy_mmx (int16_t * block, uint8_t * dest, int stride); +void mpeg2_idct_add_mmx (int16_t * block, uint8_t * dest, int stride); +void mpeg2_idct_mmx (int16_t * block); +void mpeg2_zero_block_mmx (int16_t * block); +void mpeg2_idct_mmx_init (void); + +/* idct_altivec.c */ +# ifdef ENABLE_ALTIVEC +void mpeg2_idct_copy_altivec (vector signed short * block, unsigned char * dest, + int stride); +void mpeg2_idct_add_altivec (vector signed short * block, unsigned char * dest, + int stride); +# else /* ! ENABLE_ALTIVEC */ +void mpeg2_idct_copy_altivec (signed short * block, unsigned char * dest, + int stride); +void mpeg2_idct_add_altivec (signed short * block, unsigned char * dest, + int stride); +# endif /* ENABLE_ALTIVEC */ +void mpeg2_idct_altivec_init (void); + +/* motion_comp.c */ +void mpeg2_mc_init (uint32_t mm_accel); + +typedef struct mpeg2_mc_s { + void (* put [8]) (uint8_t * dst, uint8_t *, int32_t, int32_t); + void (* avg [8]) (uint8_t * dst, uint8_t *, int32_t, int32_t); +} mpeg2_mc_t; + +#define MPEG2_MC_EXTERN(x) mpeg2_mc_t mpeg2_mc_##x = { \ + {MC_put_o_16_##x, MC_put_x_16_##x, MC_put_y_16_##x, MC_put_xy_16_##x, \ + MC_put_o_8_##x, MC_put_x_8_##x, MC_put_y_8_##x, MC_put_xy_8_##x}, \ + {MC_avg_o_16_##x, MC_avg_x_16_##x, MC_avg_y_16_##x, MC_avg_xy_16_##x, \ + MC_avg_o_8_##x, MC_avg_x_8_##x, MC_avg_y_8_##x, MC_avg_xy_8_##x} \ +}; + +extern mpeg2_mc_t mpeg2_mc; +extern mpeg2_mc_t mpeg2_mc_c; +extern mpeg2_mc_t mpeg2_mc_mmx; +extern mpeg2_mc_t mpeg2_mc_mmxext; +extern mpeg2_mc_t mpeg2_mc_3dnow; +extern mpeg2_mc_t mpeg2_mc_altivec; +extern mpeg2_mc_t mpeg2_mc_mlib; +extern mpeg2_mc_t mpeg2_mc_vis; + +/* slice.c */ +void mpeg2_slice (picture_t * picture, int code, uint8_t * buffer); + +/* stats.c */ +void mpeg2_stats (int code, uint8_t * buffer); + + +#endif diff --git a/src/video_dec/libmpeg2/slice.c b/src/video_dec/libmpeg2/slice.c new file mode 100644 index 000000000..8247a9a24 --- /dev/null +++ b/src/video_dec/libmpeg2/slice.c @@ -0,0 +1,1833 @@ +/* + * slice.c + * Copyright (C) 2000-2002 Michel Lespinasse <walken@zoy.org> + * Copyright (C) 1999-2000 Aaron Holtzman <aholtzma@ess.engr.uvic.ca> + * + * This file is part of mpeg2dec, a free MPEG-2 video stream decoder. + * See http://libmpeg2.sourceforge.net/ for updates. + * + * mpeg2dec is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * mpeg2dec is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +#include "config.h" + +#include <inttypes.h> + +#include <xine/xine_internal.h> +#include <xine/video_out.h> +#include "mpeg2_internal.h" +#include <xine/attributes.h> + +#include "vlc.h" + +static const int non_linear_quantizer_scale [] = { + 0, 1, 2, 3, 4, 5, 6, 7, + 8, 10, 12, 14, 16, 18, 20, 22, + 24, 28, 32, 36, 40, 44, 48, 52, + 56, 64, 72, 80, 88, 96, 104, 112 +}; + +static inline int get_macroblock_modes (picture_t * picture) +{ +#define bit_buf (picture->bitstream_buf) +#define bits (picture->bitstream_bits) +#define bit_ptr (picture->bitstream_ptr) + int macroblock_modes; + const MBtab * tab; + + switch (picture->picture_coding_type) { + case I_TYPE: + + tab = MB_I + UBITS (bit_buf, 1); + DUMPBITS (bit_buf, bits, tab->len); + macroblock_modes = tab->modes; + + if ((! (picture->frame_pred_frame_dct)) && + (picture->picture_structure == FRAME_PICTURE)) { + macroblock_modes |= UBITS (bit_buf, 1) * DCT_TYPE_INTERLACED; + DUMPBITS (bit_buf, bits, 1); + } + + return macroblock_modes; + + case P_TYPE: + + tab = MB_P + UBITS (bit_buf, 5); + DUMPBITS (bit_buf, bits, tab->len); + macroblock_modes = tab->modes; + + if (picture->picture_structure != FRAME_PICTURE) { + if (macroblock_modes & MACROBLOCK_MOTION_FORWARD) { + macroblock_modes |= UBITS (bit_buf, 2) * MOTION_TYPE_BASE; + DUMPBITS (bit_buf, bits, 2); + } + return macroblock_modes; + } else if (picture->frame_pred_frame_dct) { + if (macroblock_modes & MACROBLOCK_MOTION_FORWARD) + macroblock_modes |= MC_FRAME; + return macroblock_modes; + } else { + if (macroblock_modes & MACROBLOCK_MOTION_FORWARD) { + macroblock_modes |= UBITS (bit_buf, 2) * MOTION_TYPE_BASE; + DUMPBITS (bit_buf, bits, 2); + } + if (macroblock_modes & (MACROBLOCK_INTRA | MACROBLOCK_PATTERN)) { + macroblock_modes |= UBITS (bit_buf, 1) * DCT_TYPE_INTERLACED; + DUMPBITS (bit_buf, bits, 1); + } + return macroblock_modes; + } + + case B_TYPE: + + tab = MB_B + UBITS (bit_buf, 6); + DUMPBITS (bit_buf, bits, tab->len); + macroblock_modes = tab->modes; + + if (picture->picture_structure != FRAME_PICTURE) { + if (! (macroblock_modes & MACROBLOCK_INTRA)) { + macroblock_modes |= UBITS (bit_buf, 2) * MOTION_TYPE_BASE; + DUMPBITS (bit_buf, bits, 2); + } + return macroblock_modes; + } else if (picture->frame_pred_frame_dct) { + /* if (! (macroblock_modes & MACROBLOCK_INTRA)) */ + macroblock_modes |= MC_FRAME; + return macroblock_modes; + } else { + if (macroblock_modes & MACROBLOCK_INTRA) + goto intra; + macroblock_modes |= UBITS (bit_buf, 2) * MOTION_TYPE_BASE; + DUMPBITS (bit_buf, bits, 2); + if (macroblock_modes & (MACROBLOCK_INTRA | MACROBLOCK_PATTERN)) { + intra: + macroblock_modes |= UBITS (bit_buf, 1) * DCT_TYPE_INTERLACED; + DUMPBITS (bit_buf, bits, 1); + } + return macroblock_modes; + } + + case D_TYPE: + + DUMPBITS (bit_buf, bits, 1); + return MACROBLOCK_INTRA; + + default: + return 0; + } +#undef bit_buf +#undef bits +#undef bit_ptr +} + +static inline int get_quantizer_scale (picture_t * picture) +{ +#define bit_buf (picture->bitstream_buf) +#define bits (picture->bitstream_bits) +#define bit_ptr (picture->bitstream_ptr) + + int quantizer_scale_code; + + quantizer_scale_code = UBITS (bit_buf, 5); + DUMPBITS (bit_buf, bits, 5); + + if (picture->q_scale_type) + return non_linear_quantizer_scale [quantizer_scale_code]; + else + return quantizer_scale_code << 1; +#undef bit_buf +#undef bits +#undef bit_ptr +} + +static inline int get_motion_delta (picture_t * picture, int f_code) +{ +#define bit_buf (picture->bitstream_buf) +#define bits (picture->bitstream_bits) +#define bit_ptr (picture->bitstream_ptr) + + int delta; + int sign; + const MVtab * tab; + + if (bit_buf & 0x80000000) { + DUMPBITS (bit_buf, bits, 1); + return 0; + } else if (bit_buf >= 0x0c000000) { + + tab = MV_4 + UBITS (bit_buf, 4); + delta = (tab->delta << f_code) + 1; + bits += tab->len + f_code + 1; + bit_buf <<= tab->len; + + sign = SBITS (bit_buf, 1); + bit_buf <<= 1; + + if (f_code) + delta += UBITS (bit_buf, f_code); + bit_buf <<= f_code; + + return (delta ^ sign) - sign; + + } else { + + tab = MV_10 + UBITS (bit_buf, 10); + delta = (tab->delta << f_code) + 1; + bits += tab->len + 1; + bit_buf <<= tab->len; + + sign = SBITS (bit_buf, 1); + bit_buf <<= 1; + + if (f_code) { + NEEDBITS (bit_buf, bits, bit_ptr); + delta += UBITS (bit_buf, f_code); + DUMPBITS (bit_buf, bits, f_code); + } + + return (delta ^ sign) - sign; + + } +#undef bit_buf +#undef bits +#undef bit_ptr +} + +static inline int bound_motion_vector (int vec, int f_code) +{ +#if 1 + unsigned int limit; + int sign; + + limit = 16 << f_code; + + if ((unsigned int)(vec + limit) < 2 * limit) + return vec; + else { + sign = ((int32_t)vec) >> 31; + return vec - ((2 * limit) ^ sign) + sign; + } +#else + return ((int32_t)vector << (27 - f_code)) >> (27 - f_code); +#endif +} + +static inline int get_dmv (picture_t * picture) +{ +#define bit_buf (picture->bitstream_buf) +#define bits (picture->bitstream_bits) +#define bit_ptr (picture->bitstream_ptr) + + const DMVtab * tab; + + tab = DMV_2 + UBITS (bit_buf, 2); + DUMPBITS (bit_buf, bits, tab->len); + return tab->dmv; +#undef bit_buf +#undef bits +#undef bit_ptr +} + +static inline int get_coded_block_pattern (picture_t * picture) +{ +#define bit_buf (picture->bitstream_buf) +#define bits (picture->bitstream_bits) +#define bit_ptr (picture->bitstream_ptr) + + const CBPtab * tab; + + NEEDBITS (bit_buf, bits, bit_ptr); + + if (bit_buf >= 0x20000000) { + + tab = CBP_7 + (UBITS (bit_buf, 7) - 16); + DUMPBITS (bit_buf, bits, tab->len); + return tab->cbp; + + } else { + + tab = CBP_9 + UBITS (bit_buf, 9); + DUMPBITS (bit_buf, bits, tab->len); + return tab->cbp; + } + +#undef bit_buf +#undef bits +#undef bit_ptr +} + +static inline int get_luma_dc_dct_diff (picture_t * picture) +{ +#define bit_buf (picture->bitstream_buf) +#define bits (picture->bitstream_bits) +#define bit_ptr (picture->bitstream_ptr) + const DCtab * tab; + int size; + int dc_diff; + + if (bit_buf < 0xf8000000) { + tab = DC_lum_5 + UBITS (bit_buf, 5); + size = tab->size; + if (size) { + bits += tab->len + size; + bit_buf <<= tab->len; + dc_diff = + UBITS (bit_buf, size) - UBITS (SBITS (~bit_buf, 1), size); + bit_buf <<= size; + return dc_diff; + } else { + DUMPBITS (bit_buf, bits, 3); + return 0; + } + } else { + tab = DC_long + (UBITS (bit_buf, 9) - 0x1e0); + size = tab->size; + DUMPBITS (bit_buf, bits, tab->len); + NEEDBITS (bit_buf, bits, bit_ptr); + dc_diff = UBITS (bit_buf, size) - UBITS (SBITS (~bit_buf, 1), size); + DUMPBITS (bit_buf, bits, size); + return dc_diff; + } +#undef bit_buf +#undef bits +#undef bit_ptr +} + +static inline int get_chroma_dc_dct_diff (picture_t * picture) +{ +#define bit_buf (picture->bitstream_buf) +#define bits (picture->bitstream_bits) +#define bit_ptr (picture->bitstream_ptr) + const DCtab * tab; + int size; + int dc_diff; + + if (bit_buf < 0xf8000000) { + tab = DC_chrom_5 + UBITS (bit_buf, 5); + size = tab->size; + if (size) { + bits += tab->len + size; + bit_buf <<= tab->len; + dc_diff = + UBITS (bit_buf, size) - UBITS (SBITS (~bit_buf, 1), size); + bit_buf <<= size; + return dc_diff; + } else { + DUMPBITS (bit_buf, bits, 2); + return 0; + } + } else { + tab = DC_long + (UBITS (bit_buf, 10) - 0x3e0); + size = tab->size; + DUMPBITS (bit_buf, bits, tab->len + 1); + NEEDBITS (bit_buf, bits, bit_ptr); + dc_diff = UBITS (bit_buf, size) - UBITS (SBITS (~bit_buf, 1), size); + DUMPBITS (bit_buf, bits, size); + return dc_diff; + } +#undef bit_buf +#undef bits +#undef bit_ptr +} + +#define SATURATE(val) \ +do { \ + if ((uint32_t)(val + 2048) > 4095) \ + val = (val > 0) ? 2047 : -2048; \ +} while (0) + +static void get_intra_block_B14 (picture_t * picture) +{ + int i; + int j; + int val; + uint8_t * scan = picture->scan; + uint8_t * quant_matrix = picture->intra_quantizer_matrix; + int quantizer_scale = picture->quantizer_scale; + int mismatch; + const DCTtab * tab; + uint32_t bit_buf; + int bits; + uint8_t * bit_ptr; + int16_t * dest; + + dest = picture->DCTblock; + i = 0; + mismatch = ~dest[0]; + + bit_buf = picture->bitstream_buf; + bits = picture->bitstream_bits; + bit_ptr = picture->bitstream_ptr; + + NEEDBITS (bit_buf, bits, bit_ptr); + + while (1) { + if (bit_buf >= 0x28000000) { + + tab = DCT_B14AC_5 + (UBITS (bit_buf, 5) - 5); + + i += tab->run; + if (i >= 64) + break; /* end of block */ + + normal_code: + j = scan[i]; + bit_buf <<= tab->len; + bits += tab->len + 1; + val = (tab->level * quantizer_scale * quant_matrix[j]) >> 4; + + /* if (bitstream_get (1)) val = -val; */ + val = (val ^ SBITS (bit_buf, 1)) - SBITS (bit_buf, 1); + + SATURATE (val); + dest[j] = val; + mismatch ^= val; + + bit_buf <<= 1; + NEEDBITS (bit_buf, bits, bit_ptr); + + continue; + + } else if (bit_buf >= 0x04000000) { + + tab = DCT_B14_8 + (UBITS (bit_buf, 8) - 4); + + i += tab->run; + if (i < 64) + goto normal_code; + + /* escape code */ + + i += UBITS (bit_buf << 6, 6) - 64; + if (i >= 64) + break; /* illegal, check needed to avoid buffer overflow */ + + j = scan[i]; + + DUMPBITS (bit_buf, bits, 12); + NEEDBITS (bit_buf, bits, bit_ptr); + val = (SBITS (bit_buf, 12) * + quantizer_scale * quant_matrix[j]) / 16; + + SATURATE (val); + dest[j] = val; + mismatch ^= val; + + DUMPBITS (bit_buf, bits, 12); + NEEDBITS (bit_buf, bits, bit_ptr); + + continue; + + } else if (bit_buf >= 0x02000000) { + tab = DCT_B14_10 + (UBITS (bit_buf, 10) - 8); + i += tab->run; + if (i < 64) + goto normal_code; + } else if (bit_buf >= 0x00800000) { + tab = DCT_13 + (UBITS (bit_buf, 13) - 16); + i += tab->run; + if (i < 64) + goto normal_code; + } else if (bit_buf >= 0x00200000) { + tab = DCT_15 + (UBITS (bit_buf, 15) - 16); + i += tab->run; + if (i < 64) + goto normal_code; + } else { + tab = DCT_16 + UBITS (bit_buf, 16); + bit_buf <<= 16; + GETWORD (bit_buf, bits + 16, bit_ptr); + i += tab->run; + if (i < 64) + goto normal_code; + } + break; /* illegal, check needed to avoid buffer overflow */ + } + dest[63] ^= mismatch & 1; + DUMPBITS (bit_buf, bits, 2); /* dump end of block code */ + picture->bitstream_buf = bit_buf; + picture->bitstream_bits = bits; + picture->bitstream_ptr = bit_ptr; +} + +static void get_intra_block_B15 (picture_t * picture) +{ + int i; + int j; + int val; + uint8_t * scan = picture->scan; + uint8_t * quant_matrix = picture->intra_quantizer_matrix; + int quantizer_scale = picture->quantizer_scale; + int mismatch; + const DCTtab * tab; + uint32_t bit_buf; + int bits; + uint8_t * bit_ptr; + int16_t * dest; + + dest = picture->DCTblock; + i = 0; + mismatch = ~dest[0]; + + bit_buf = picture->bitstream_buf; + bits = picture->bitstream_bits; + bit_ptr = picture->bitstream_ptr; + + NEEDBITS (bit_buf, bits, bit_ptr); + + while (1) { + if (bit_buf >= 0x04000000) { + + tab = DCT_B15_8 + (UBITS (bit_buf, 8) - 4); + + i += tab->run; + if (i < 64) { + + normal_code: + j = scan[i]; + bit_buf <<= tab->len; + bits += tab->len + 1; + val = (tab->level * quantizer_scale * quant_matrix[j]) >> 4; + + /* if (bitstream_get (1)) val = -val; */ + val = (val ^ SBITS (bit_buf, 1)) - SBITS (bit_buf, 1); + + SATURATE (val); + dest[j] = val; + mismatch ^= val; + + bit_buf <<= 1; + NEEDBITS (bit_buf, bits, bit_ptr); + + continue; + + } else { + + /* end of block. I commented out this code because if we */ + /* dont exit here we will still exit at the later test :) */ + + /* if (i >= 128) break; */ /* end of block */ + + /* escape code */ + + i += UBITS (bit_buf << 6, 6) - 64; + if (i >= 64) + break; /* illegal, check against buffer overflow */ + + j = scan[i]; + + DUMPBITS (bit_buf, bits, 12); + NEEDBITS (bit_buf, bits, bit_ptr); + val = (SBITS (bit_buf, 12) * + quantizer_scale * quant_matrix[j]) / 16; + + SATURATE (val); + dest[j] = val; + mismatch ^= val; + + DUMPBITS (bit_buf, bits, 12); + NEEDBITS (bit_buf, bits, bit_ptr); + + continue; + + } + } else if (bit_buf >= 0x02000000) { + tab = DCT_B15_10 + (UBITS (bit_buf, 10) - 8); + i += tab->run; + if (i < 64) + goto normal_code; + } else if (bit_buf >= 0x00800000) { + tab = DCT_13 + (UBITS (bit_buf, 13) - 16); + i += tab->run; + if (i < 64) + goto normal_code; + } else if (bit_buf >= 0x00200000) { + tab = DCT_15 + (UBITS (bit_buf, 15) - 16); + i += tab->run; + if (i < 64) + goto normal_code; + } else { + tab = DCT_16 + UBITS (bit_buf, 16); + bit_buf <<= 16; + GETWORD (bit_buf, bits + 16, bit_ptr); + i += tab->run; + if (i < 64) + goto normal_code; + } + break; /* illegal, check needed to avoid buffer overflow */ + } + dest[63] ^= mismatch & 1; + DUMPBITS (bit_buf, bits, 4); /* dump end of block code */ + picture->bitstream_buf = bit_buf; + picture->bitstream_bits = bits; + picture->bitstream_ptr = bit_ptr; +} + +static void get_non_intra_block (picture_t * picture) +{ + int i; + int j; + int val; + uint8_t * scan = picture->scan; + uint8_t * quant_matrix = picture->non_intra_quantizer_matrix; + int quantizer_scale = picture->quantizer_scale; + int mismatch; + const DCTtab * tab; + uint32_t bit_buf; + int bits; + uint8_t * bit_ptr; + int16_t * dest; + + i = -1; + mismatch = 1; + dest = picture->DCTblock; + + bit_buf = picture->bitstream_buf; + bits = picture->bitstream_bits; + bit_ptr = picture->bitstream_ptr; + + NEEDBITS (bit_buf, bits, bit_ptr); + if (bit_buf >= 0x28000000) { + tab = DCT_B14DC_5 + (UBITS (bit_buf, 5) - 5); + goto entry_1; + } else + goto entry_2; + + while (1) { + if (bit_buf >= 0x28000000) { + + tab = DCT_B14AC_5 + (UBITS (bit_buf, 5) - 5); + + entry_1: + i += tab->run; + if (i >= 64) + break; /* end of block */ + + normal_code: + j = scan[i]; + bit_buf <<= tab->len; + bits += tab->len + 1; + val = ((2*tab->level+1) * quantizer_scale * quant_matrix[j]) >> 5; + + /* if (bitstream_get (1)) val = -val; */ + val = (val ^ SBITS (bit_buf, 1)) - SBITS (bit_buf, 1); + + SATURATE (val); + dest[j] = val; + mismatch ^= val; + + bit_buf <<= 1; + NEEDBITS (bit_buf, bits, bit_ptr); + + continue; + + } + + entry_2: + if (bit_buf >= 0x04000000) { + + tab = DCT_B14_8 + (UBITS (bit_buf, 8) - 4); + + i += tab->run; + if (i < 64) + goto normal_code; + + /* escape code */ + + i += UBITS (bit_buf << 6, 6) - 64; + if (i >= 64) + break; /* illegal, check needed to avoid buffer overflow */ + + j = scan[i]; + + DUMPBITS (bit_buf, bits, 12); + NEEDBITS (bit_buf, bits, bit_ptr); + val = 2 * (SBITS (bit_buf, 12) + SBITS (bit_buf, 1)) + 1; + val = (val * quantizer_scale * quant_matrix[j]) / 32; + + SATURATE (val); + dest[j] = val; + mismatch ^= val; + + DUMPBITS (bit_buf, bits, 12); + NEEDBITS (bit_buf, bits, bit_ptr); + + continue; + + } else if (bit_buf >= 0x02000000) { + tab = DCT_B14_10 + (UBITS (bit_buf, 10) - 8); + i += tab->run; + if (i < 64) + goto normal_code; + } else if (bit_buf >= 0x00800000) { + tab = DCT_13 + (UBITS (bit_buf, 13) - 16); + i += tab->run; + if (i < 64) + goto normal_code; + } else if (bit_buf >= 0x00200000) { + tab = DCT_15 + (UBITS (bit_buf, 15) - 16); + i += tab->run; + if (i < 64) + goto normal_code; + } else { + tab = DCT_16 + UBITS (bit_buf, 16); + bit_buf <<= 16; + GETWORD (bit_buf, bits + 16, bit_ptr); + i += tab->run; + if (i < 64) + goto normal_code; + } + break; /* illegal, check needed to avoid buffer overflow */ + } + dest[63] ^= mismatch & 1; + DUMPBITS (bit_buf, bits, 2); /* dump end of block code */ + picture->bitstream_buf = bit_buf; + picture->bitstream_bits = bits; + picture->bitstream_ptr = bit_ptr; +} + +static void get_mpeg1_intra_block (picture_t * picture) +{ + int i; + int j; + int val; + uint8_t * scan = picture->scan; + uint8_t * quant_matrix = picture->intra_quantizer_matrix; + int quantizer_scale = picture->quantizer_scale; + const DCTtab * tab; + uint32_t bit_buf; + int bits; + uint8_t * bit_ptr; + int16_t * dest; + + i = 0; + dest = picture->DCTblock; + + bit_buf = picture->bitstream_buf; + bits = picture->bitstream_bits; + bit_ptr = picture->bitstream_ptr; + + NEEDBITS (bit_buf, bits, bit_ptr); + + while (1) { + if (bit_buf >= 0x28000000) { + + tab = DCT_B14AC_5 + (UBITS (bit_buf, 5) - 5); + + i += tab->run; + if (i >= 64) + break; /* end of block */ + + normal_code: + j = scan[i]; + bit_buf <<= tab->len; + bits += tab->len + 1; + val = (tab->level * quantizer_scale * quant_matrix[j]) >> 4; + + /* oddification */ + val = (val - 1) | 1; + + /* if (bitstream_get (1)) val = -val; */ + val = (val ^ SBITS (bit_buf, 1)) - SBITS (bit_buf, 1); + + SATURATE (val); + dest[j] = val; + + bit_buf <<= 1; + NEEDBITS (bit_buf, bits, bit_ptr); + + continue; + + } else if (bit_buf >= 0x04000000) { + + tab = DCT_B14_8 + (UBITS (bit_buf, 8) - 4); + + i += tab->run; + if (i < 64) + goto normal_code; + + /* escape code */ + + i += UBITS (bit_buf << 6, 6) - 64; + if (i >= 64) + break; /* illegal, check needed to avoid buffer overflow */ + + j = scan[i]; + + DUMPBITS (bit_buf, bits, 12); + NEEDBITS (bit_buf, bits, bit_ptr); + val = SBITS (bit_buf, 8); + if (! (val & 0x7f)) { + DUMPBITS (bit_buf, bits, 8); + val = UBITS (bit_buf, 8) + 2 * val; + } + val = (val * quantizer_scale * quant_matrix[j]) / 16; + + /* oddification */ + val = (val + ~SBITS (val, 1)) | 1; + + SATURATE (val); + dest[j] = val; + + DUMPBITS (bit_buf, bits, 8); + NEEDBITS (bit_buf, bits, bit_ptr); + + continue; + + } else if (bit_buf >= 0x02000000) { + tab = DCT_B14_10 + (UBITS (bit_buf, 10) - 8); + i += tab->run; + if (i < 64) + goto normal_code; + } else if (bit_buf >= 0x00800000) { + tab = DCT_13 + (UBITS (bit_buf, 13) - 16); + i += tab->run; + if (i < 64) + goto normal_code; + } else if (bit_buf >= 0x00200000) { + tab = DCT_15 + (UBITS (bit_buf, 15) - 16); + i += tab->run; + if (i < 64) + goto normal_code; + } else { + tab = DCT_16 + UBITS (bit_buf, 16); + bit_buf <<= 16; + GETWORD (bit_buf, bits + 16, bit_ptr); + i += tab->run; + if (i < 64) + goto normal_code; + } + break; /* illegal, check needed to avoid buffer overflow */ + } + DUMPBITS (bit_buf, bits, 2); /* dump end of block code */ + picture->bitstream_buf = bit_buf; + picture->bitstream_bits = bits; + picture->bitstream_ptr = bit_ptr; +} + +static void get_mpeg1_non_intra_block (picture_t * picture) +{ + int i; + int j; + int val; + uint8_t * scan = picture->scan; + uint8_t * quant_matrix = picture->non_intra_quantizer_matrix; + int quantizer_scale = picture->quantizer_scale; + const DCTtab * tab; + uint32_t bit_buf; + int bits; + uint8_t * bit_ptr; + int16_t * dest; + + i = -1; + dest = picture->DCTblock; + + bit_buf = picture->bitstream_buf; + bits = picture->bitstream_bits; + bit_ptr = picture->bitstream_ptr; + + NEEDBITS (bit_buf, bits, bit_ptr); + if (bit_buf >= 0x28000000) { + tab = DCT_B14DC_5 + (UBITS (bit_buf, 5) - 5); + goto entry_1; + } else + goto entry_2; + + while (1) { + if (bit_buf >= 0x28000000) { + + tab = DCT_B14AC_5 + (UBITS (bit_buf, 5) - 5); + + entry_1: + i += tab->run; + if (i >= 64) + break; /* end of block */ + + normal_code: + j = scan[i]; + bit_buf <<= tab->len; + bits += tab->len + 1; + val = ((2*tab->level+1) * quantizer_scale * quant_matrix[j]) >> 5; + + /* oddification */ + val = (val - 1) | 1; + + /* if (bitstream_get (1)) val = -val; */ + val = (val ^ SBITS (bit_buf, 1)) - SBITS (bit_buf, 1); + + SATURATE (val); + dest[j] = val; + + bit_buf <<= 1; + NEEDBITS (bit_buf, bits, bit_ptr); + + continue; + + } + + entry_2: + if (bit_buf >= 0x04000000) { + + tab = DCT_B14_8 + (UBITS (bit_buf, 8) - 4); + + i += tab->run; + if (i < 64) + goto normal_code; + + /* escape code */ + + i += UBITS (bit_buf << 6, 6) - 64; + if (i >= 64) + break; /* illegal, check needed to avoid buffer overflow */ + + j = scan[i]; + + DUMPBITS (bit_buf, bits, 12); + NEEDBITS (bit_buf, bits, bit_ptr); + val = SBITS (bit_buf, 8); + if (! (val & 0x7f)) { + DUMPBITS (bit_buf, bits, 8); + val = UBITS (bit_buf, 8) + 2 * val; + } + val = 2 * (val + SBITS (val, 1)) + 1; + val = (val * quantizer_scale * quant_matrix[j]) / 32; + + /* oddification */ + val = (val + ~SBITS (val, 1)) | 1; + + SATURATE (val); + dest[j] = val; + + DUMPBITS (bit_buf, bits, 8); + NEEDBITS (bit_buf, bits, bit_ptr); + + continue; + + } else if (bit_buf >= 0x02000000) { + tab = DCT_B14_10 + (UBITS (bit_buf, 10) - 8); + i += tab->run; + if (i < 64) + goto normal_code; + } else if (bit_buf >= 0x00800000) { + tab = DCT_13 + (UBITS (bit_buf, 13) - 16); + i += tab->run; + if (i < 64) + goto normal_code; + } else if (bit_buf >= 0x00200000) { + tab = DCT_15 + (UBITS (bit_buf, 15) - 16); + i += tab->run; + if (i < 64) + goto normal_code; + } else { + tab = DCT_16 + UBITS (bit_buf, 16); + bit_buf <<= 16; + GETWORD (bit_buf, bits + 16, bit_ptr); + i += tab->run; + if (i < 64) + goto normal_code; + } + break; /* illegal, check needed to avoid buffer overflow */ + } + DUMPBITS (bit_buf, bits, 2); /* dump end of block code */ + picture->bitstream_buf = bit_buf; + picture->bitstream_bits = bits; + picture->bitstream_ptr = bit_ptr; +} + +static inline void slice_intra_DCT (picture_t * picture, int cc, + uint8_t * dest, int stride) +{ +#define bit_buf (picture->bitstream_buf) +#define bits (picture->bitstream_bits) +#define bit_ptr (picture->bitstream_ptr) + NEEDBITS (bit_buf, bits, bit_ptr); + /* Get the intra DC coefficient and inverse quantize it */ + if (cc == 0) + picture->dc_dct_pred[0] += get_luma_dc_dct_diff (picture); + else + picture->dc_dct_pred[cc] += get_chroma_dc_dct_diff (picture); + picture->DCTblock[0] = + picture->dc_dct_pred[cc] << (3 - picture->intra_dc_precision); + + if (picture->mpeg1) { + if (picture->picture_coding_type != D_TYPE) + get_mpeg1_intra_block (picture); + } else if (picture->intra_vlc_format) + get_intra_block_B15 (picture); + else + get_intra_block_B14 (picture); + mpeg2_idct_copy (picture->DCTblock, dest, stride); +#undef bit_buf +#undef bits +#undef bit_ptr +} + +static inline void slice_non_intra_DCT (picture_t * picture, uint8_t * dest, + int stride) +{ + if (picture->mpeg1) + get_mpeg1_non_intra_block (picture); + else + get_non_intra_block (picture); + mpeg2_idct_add (picture->DCTblock, dest, stride); +} + +#define MOTION(table,ref,motion_x,motion_y,size,y) \ + pos_x = 2 * picture->offset + motion_x; \ + pos_y = 2 * picture->v_offset + motion_y + 2 * y; \ + if (pos_x > picture->limit_x) { \ + pos_x = ((int)pos_x < 0) ? 0 : picture->limit_x; \ + motion_x = pos_x - 2 * picture->offset; \ + } \ + if (pos_y > picture->limit_y_ ## size){ \ + pos_y = ((int)pos_y < 0) ? 0 : picture->limit_y_ ## size; \ + motion_y = pos_y - 2 * picture->v_offset - 2 * y; \ + } \ + xy_half = ((pos_y & 1) << 1) | (pos_x & 1); \ + table[xy_half] (picture->dest[0] + y * picture->pitches[0] + \ + picture->offset, ref[0] + (pos_x >> 1) + \ + (pos_y >> 1) * picture->pitches[0], picture->pitches[0], \ + size); \ + motion_x /= 2; motion_y /= 2; \ + xy_half = ((motion_y & 1) << 1) | (motion_x & 1); \ + table[4+xy_half] (picture->dest[1] + y/2 * picture->pitches[1] + \ + (picture->offset >> 1), ref[1] + \ + (((picture->offset + motion_x) >> 1) + \ + ((((picture->v_offset + motion_y) >> 1) + y/2) * \ + picture->pitches[1])), picture->pitches[1], size/2); \ + table[4+xy_half] (picture->dest[2] + y/2 * picture->pitches[2] + \ + (picture->offset >> 1), ref[2] + \ + (((picture->offset + motion_x) >> 1) + \ + ((((picture->v_offset + motion_y) >> 1) + y/2) * \ + picture->pitches[2])), picture->pitches[2], size/2) \ + +#define MOTION_FIELD(table,ref,motion_x,motion_y,dest_field,op,src_field) \ + pos_x = 2 * picture->offset + motion_x; \ + pos_y = picture->v_offset + motion_y; \ + if (pos_x > picture->limit_x) { \ + pos_x = ((int)pos_x < 0) ? 0 : picture->limit_x; \ + motion_x = pos_x - 2 * picture->offset; \ + } \ + if (pos_y > picture->limit_y){ \ + pos_y = ((int)pos_y < 0) ? 0 : picture->limit_y; \ + motion_y = pos_y - picture->v_offset; \ + } \ + xy_half = ((pos_y & 1) << 1) | (pos_x & 1); \ + table[xy_half] (picture->dest[0] + dest_field * picture->pitches[0] + \ + picture->offset, \ + (ref[0] + (pos_x >> 1) + \ + ((pos_y op) + src_field) * picture->pitches[0]), \ + 2 * picture->pitches[0], 8); \ + motion_x /= 2; motion_y /= 2; \ + xy_half = ((motion_y & 1) << 1) | (motion_x & 1); \ + table[4+xy_half] (picture->dest[1] + dest_field * picture->pitches[1] + \ + (picture->offset >> 1), ref[1] + \ + (((picture->offset + motion_x) >> 1) + \ + (((picture->v_offset >> 1) + \ + (motion_y op) + src_field) * picture->pitches[1])), \ + 2 * picture->pitches[1], 4); \ + table[4+xy_half] (picture->dest[2] + dest_field * picture->pitches[2] + \ + (picture->offset >> 1), ref[2] + \ + (((picture->offset + motion_x) >> 1) + \ + (((picture->v_offset >> 1) + \ + (motion_y op) + src_field) * picture->pitches[2])), \ + 2 * picture->pitches[2], 4) + +static void motion_mp1 (picture_t * picture, motion_t * motion, + void (** table) (uint8_t *, uint8_t *, int, int)) +{ +#define bit_buf (picture->bitstream_buf) +#define bits (picture->bitstream_bits) +#define bit_ptr (picture->bitstream_ptr) + int motion_x, motion_y; + unsigned int pos_x, pos_y, xy_half; + + NEEDBITS (bit_buf, bits, bit_ptr); + motion_x = (motion->pmv[0][0] + + (get_motion_delta (picture, + motion->f_code[0]) << motion->f_code[1])); + motion_x = bound_motion_vector (motion_x, + motion->f_code[0] + motion->f_code[1]); + motion->pmv[0][0] = motion_x; + + NEEDBITS (bit_buf, bits, bit_ptr); + motion_y = (motion->pmv[0][1] + + (get_motion_delta (picture, + motion->f_code[0]) << motion->f_code[1])); + motion_y = bound_motion_vector (motion_y, + motion->f_code[0] + motion->f_code[1]); + motion->pmv[0][1] = motion_y; + + MOTION (table, motion->ref[0], motion_x, motion_y, 16, 0); +#undef bit_buf +#undef bits +#undef bit_ptr +} + +static void motion_fr_frame (picture_t * picture, motion_t * motion, + void (** table) (uint8_t *, uint8_t *, int, int)) +{ +#define bit_buf (picture->bitstream_buf) +#define bits (picture->bitstream_bits) +#define bit_ptr (picture->bitstream_ptr) + int motion_x, motion_y; + unsigned int pos_x, pos_y, xy_half; + + NEEDBITS (bit_buf, bits, bit_ptr); + motion_x = motion->pmv[0][0] + get_motion_delta (picture, + motion->f_code[0]); + motion_x = bound_motion_vector (motion_x, motion->f_code[0]); + motion->pmv[1][0] = motion->pmv[0][0] = motion_x; + + NEEDBITS (bit_buf, bits, bit_ptr); + motion_y = motion->pmv[0][1] + get_motion_delta (picture, + motion->f_code[1]); + motion_y = bound_motion_vector (motion_y, motion->f_code[1]); + motion->pmv[1][1] = motion->pmv[0][1] = motion_y; + + MOTION (table, motion->ref[0], motion_x, motion_y, 16, 0); +#undef bit_buf +#undef bits +#undef bit_ptr +} + +static void motion_fr_field (picture_t * picture, motion_t * motion, + void (** table) (uint8_t *, uint8_t *, int, int)) +{ +#define bit_buf (picture->bitstream_buf) +#define bits (picture->bitstream_bits) +#define bit_ptr (picture->bitstream_ptr) + int motion_x, motion_y, field; + unsigned int pos_x, pos_y, xy_half; + + NEEDBITS (bit_buf, bits, bit_ptr); + field = UBITS (bit_buf, 1); + DUMPBITS (bit_buf, bits, 1); + + motion_x = motion->pmv[0][0] + get_motion_delta (picture, + motion->f_code[0]); + motion_x = bound_motion_vector (motion_x, motion->f_code[0]); + motion->pmv[0][0] = motion_x; + + NEEDBITS (bit_buf, bits, bit_ptr); + motion_y = (motion->pmv[0][1] >> 1) + get_motion_delta (picture, + motion->f_code[1]); + /* motion_y = bound_motion_vector (motion_y, motion->f_code[1]); */ + motion->pmv[0][1] = motion_y << 1; + + MOTION_FIELD (table, motion->ref[0], motion_x, motion_y, 0, & ~1, field); + + NEEDBITS (bit_buf, bits, bit_ptr); + field = UBITS (bit_buf, 1); + DUMPBITS (bit_buf, bits, 1); + + motion_x = motion->pmv[1][0] + get_motion_delta (picture, + motion->f_code[0]); + motion_x = bound_motion_vector (motion_x, motion->f_code[0]); + motion->pmv[1][0] = motion_x; + + NEEDBITS (bit_buf, bits, bit_ptr); + motion_y = (motion->pmv[1][1] >> 1) + get_motion_delta (picture, + motion->f_code[1]); + /* motion_y = bound_motion_vector (motion_y, motion->f_code[1]); */ + motion->pmv[1][1] = motion_y << 1; + + MOTION_FIELD (table, motion->ref[0], motion_x, motion_y, 1, & ~1, field); +#undef bit_buf +#undef bits +#undef bit_ptr +} + +static void motion_fr_dmv (picture_t * picture, motion_t * motion, + void (** table) (uint8_t *, uint8_t *, int, int)) +{ +#define bit_buf (picture->bitstream_buf) +#define bits (picture->bitstream_bits) +#define bit_ptr (picture->bitstream_ptr) + int motion_x, motion_y, dmv_x, dmv_y, m, other_x, other_y; + unsigned int pos_x, pos_y, xy_half, offset; + + NEEDBITS (bit_buf, bits, bit_ptr); + motion_x = motion->pmv[0][0] + get_motion_delta (picture, + motion->f_code[0]); + motion_x = bound_motion_vector (motion_x, motion->f_code[0]); + motion->pmv[1][0] = motion->pmv[0][0] = motion_x; + NEEDBITS (bit_buf, bits, bit_ptr); + dmv_x = get_dmv (picture); + + motion_y = (motion->pmv[0][1] >> 1) + get_motion_delta (picture, + motion->f_code[1]); + /* motion_y = bound_motion_vector (motion_y, motion->f_code[1]); */ + motion->pmv[1][1] = motion->pmv[0][1] = motion_y << 1; + dmv_y = get_dmv (picture); + + m = picture->top_field_first ? 1 : 3; + other_x = ((motion_x * m + (motion_x > 0)) >> 1) + dmv_x; + other_y = ((motion_y * m + (motion_y > 0)) >> 1) + dmv_y - 1; + MOTION_FIELD (mpeg2_mc.put, motion->ref[0], other_x, other_y, 0, | 1, 0); + + m = picture->top_field_first ? 3 : 1; + other_x = ((motion_x * m + (motion_x > 0)) >> 1) + dmv_x; + other_y = ((motion_y * m + (motion_y > 0)) >> 1) + dmv_y + 1; + MOTION_FIELD (mpeg2_mc.put, motion->ref[0], other_x, other_y, 1, & ~1, 0); + + pos_x = 2 * picture->offset + motion_x; + pos_y = picture->v_offset + motion_y; + if(pos_x > picture->limit_x){ + pos_x = ((int)pos_x < 0) ? 0 : picture->limit_x; + motion_x = pos_x - 2 * picture->offset; + } + if(pos_y > picture->limit_y){ + pos_y = ((int)pos_y < 0) ? 0 : picture->limit_y; + motion_y = pos_y - picture->v_offset; + } + xy_half = ((pos_y & 1) << 1) | (pos_x & 1); + offset = (pos_x >> 1) + (pos_y & ~1) * picture->pitches[0]; + mpeg2_mc.avg[xy_half] + (picture->dest[0] + picture->offset, + motion->ref[0][0] + offset, 2 * picture->pitches[0], 8); + mpeg2_mc.avg[xy_half] + (picture->dest[0] + picture->pitches[0] + picture->offset, + motion->ref[0][0] + picture->pitches[0] + offset, + 2 * picture->pitches[0], 8); + motion_x /= 2; motion_y /= 2; + xy_half = ((motion_y & 1) << 1) | (motion_x & 1); + offset = (((picture->offset + motion_x) >> 1) + + (((picture->v_offset >> 1) + (motion_y & ~1)) * + picture->pitches[1])); + mpeg2_mc.avg[4+xy_half] + (picture->dest[1] + (picture->offset >> 1), + motion->ref[0][1] + offset, 2 * picture->pitches[1], 4); + mpeg2_mc.avg[4+xy_half] + (picture->dest[1] + picture->pitches[1] + (picture->offset >> 1), + motion->ref[0][1] + picture->pitches[1] + offset, + 2 * picture->pitches[1], 4); + offset = (((picture->offset + motion_x) >> 1) + + (((picture->v_offset >> 1) + (motion_y & ~1)) * + picture->pitches[2])); + mpeg2_mc.avg[4+xy_half] + (picture->dest[2] + (picture->offset >> 1), + motion->ref[0][2] + offset, 2 * picture->pitches[2], 4); + mpeg2_mc.avg[4+xy_half] + (picture->dest[2] + picture->pitches[2] + (picture->offset >> 1), + motion->ref[0][2] + picture->pitches[2] + offset, + 2 * picture->pitches[2], 4); +#undef bit_buf +#undef bits +#undef bit_ptr +} + +static void motion_reuse (picture_t * picture, motion_t * motion, + void (** table) (uint8_t *, uint8_t *, int, int)) +{ + int motion_x, motion_y; + unsigned int pos_x, pos_y, xy_half; + + motion_x = motion->pmv[0][0]; + motion_y = motion->pmv[0][1]; + + MOTION (table, motion->ref[0], motion_x, motion_y, 16, 0); +} + +static void motion_zero (picture_t * picture, motion_t * motion, + void (** table) (uint8_t *, uint8_t *, int, int)) +{ + table[0] (picture->dest[0] + picture->offset, + (motion->ref[0][0] + picture->offset + + picture->v_offset * picture->pitches[0]), + picture->pitches[0], 16); + + table[4] (picture->dest[1] + (picture->offset >> 1), + motion->ref[0][1] + (picture->offset >> 1) + + (picture->v_offset >> 1) * picture->pitches[1], + picture->pitches[1], 8); + table[4] (picture->dest[2] + (picture->offset >> 1), + motion->ref[0][2] + (picture->offset >> 1) + + (picture->v_offset >> 1) * picture->pitches[2], + picture->pitches[2], 8); +} + +/* like motion_frame, but parsing without actual motion compensation */ +static void motion_fr_conceal (picture_t * picture) +{ +#define bit_buf (picture->bitstream_buf) +#define bits (picture->bitstream_bits) +#define bit_ptr (picture->bitstream_ptr) + int tmp; + + NEEDBITS (bit_buf, bits, bit_ptr); + tmp = (picture->f_motion.pmv[0][0] + + get_motion_delta (picture, picture->f_motion.f_code[0])); + tmp = bound_motion_vector (tmp, picture->f_motion.f_code[0]); + picture->f_motion.pmv[1][0] = picture->f_motion.pmv[0][0] = tmp; + + NEEDBITS (bit_buf, bits, bit_ptr); + tmp = (picture->f_motion.pmv[0][1] + + get_motion_delta (picture, picture->f_motion.f_code[1])); + tmp = bound_motion_vector (tmp, picture->f_motion.f_code[1]); + picture->f_motion.pmv[1][1] = picture->f_motion.pmv[0][1] = tmp; + + DUMPBITS (bit_buf, bits, 1); /* remove marker_bit */ +#undef bit_buf +#undef bits +#undef bit_ptr +} + +static void motion_fi_field (picture_t * picture, motion_t * motion, + void (** table) (uint8_t *, uint8_t *, int, int)) +{ +#define bit_buf (picture->bitstream_buf) +#define bits (picture->bitstream_bits) +#define bit_ptr (picture->bitstream_ptr) + int motion_x, motion_y; + uint8_t ** ref_field; + unsigned int pos_x, pos_y, xy_half; + + NEEDBITS (bit_buf, bits, bit_ptr); + ref_field = motion->ref2[UBITS (bit_buf, 1)]; + DUMPBITS (bit_buf, bits, 1); + + motion_x = motion->pmv[0][0] + get_motion_delta (picture, + motion->f_code[0]); + motion_x = bound_motion_vector (motion_x, motion->f_code[0]); + motion->pmv[1][0] = motion->pmv[0][0] = motion_x; + + NEEDBITS (bit_buf, bits, bit_ptr); + motion_y = motion->pmv[0][1] + get_motion_delta (picture, + motion->f_code[1]); + motion_y = bound_motion_vector (motion_y, motion->f_code[1]); + motion->pmv[1][1] = motion->pmv[0][1] = motion_y; + + MOTION (table, ref_field, motion_x, motion_y, 16, 0); +#undef bit_buf +#undef bits +#undef bit_ptr +} + +static void motion_fi_16x8 (picture_t * picture, motion_t * motion, + void (** table) (uint8_t *, uint8_t *, int, int)) +{ +#define bit_buf (picture->bitstream_buf) +#define bits (picture->bitstream_bits) +#define bit_ptr (picture->bitstream_ptr) + int motion_x, motion_y; + uint8_t ** ref_field; + unsigned int pos_x, pos_y, xy_half; + + NEEDBITS (bit_buf, bits, bit_ptr); + ref_field = motion->ref2[UBITS (bit_buf, 1)]; + DUMPBITS (bit_buf, bits, 1); + + motion_x = motion->pmv[0][0] + get_motion_delta (picture, + motion->f_code[0]); + motion_x = bound_motion_vector (motion_x, motion->f_code[0]); + motion->pmv[0][0] = motion_x; + + NEEDBITS (bit_buf, bits, bit_ptr); + motion_y = motion->pmv[0][1] + get_motion_delta (picture, + motion->f_code[1]); + motion_y = bound_motion_vector (motion_y, motion->f_code[1]); + motion->pmv[0][1] = motion_y; + + MOTION (table, ref_field, motion_x, motion_y, 8, 0); + + NEEDBITS (bit_buf, bits, bit_ptr); + ref_field = motion->ref2[UBITS (bit_buf, 1)]; + DUMPBITS (bit_buf, bits, 1); + + motion_x = motion->pmv[1][0] + get_motion_delta (picture, + motion->f_code[0]); + motion_x = bound_motion_vector (motion_x, motion->f_code[0]); + motion->pmv[1][0] = motion_x; + + NEEDBITS (bit_buf, bits, bit_ptr); + motion_y = motion->pmv[1][1] + get_motion_delta (picture, + motion->f_code[1]); + motion_y = bound_motion_vector (motion_y, motion->f_code[1]); + motion->pmv[1][1] = motion_y; + + MOTION (table, ref_field, motion_x, motion_y, 8, 8); +#undef bit_buf +#undef bits +#undef bit_ptr +} + +static void motion_fi_dmv (picture_t * picture, motion_t * motion, + void (** table) (uint8_t *, uint8_t *, int, int)) +{ +#define bit_buf (picture->bitstream_buf) +#define bits (picture->bitstream_bits) +#define bit_ptr (picture->bitstream_ptr) + int motion_x, motion_y, other_x, other_y; + unsigned int pos_x, pos_y, xy_half; + + NEEDBITS (bit_buf, bits, bit_ptr); + motion_x = motion->pmv[0][0] + get_motion_delta (picture, + motion->f_code[0]); + motion_x = bound_motion_vector (motion_x, motion->f_code[0]); + motion->pmv[1][0] = motion->pmv[0][0] = motion_x; + NEEDBITS (bit_buf, bits, bit_ptr); + other_x = ((motion_x + (motion_x > 0)) >> 1) + get_dmv (picture); + + motion_y = motion->pmv[0][1] + get_motion_delta (picture, + motion->f_code[1]); + motion_y = bound_motion_vector (motion_y, motion->f_code[1]); + motion->pmv[1][1] = motion->pmv[0][1] = motion_y; + other_y = (((motion_y + (motion_y > 0)) >> 1) + get_dmv (picture) + + picture->dmv_offset); + + MOTION (mpeg2_mc.put, motion->ref[0], motion_x, motion_y, 16, 0); + MOTION (mpeg2_mc.avg, motion->ref[1], other_x, other_y, 16, 0); +#undef bit_buf +#undef bits +#undef bit_ptr +} + +static void motion_fi_conceal (picture_t * picture) +{ +#define bit_buf (picture->bitstream_buf) +#define bits (picture->bitstream_bits) +#define bit_ptr (picture->bitstream_ptr) + int tmp; + + NEEDBITS (bit_buf, bits, bit_ptr); + DUMPBITS (bit_buf, bits, 1); /* remove field_select */ + + tmp = (picture->f_motion.pmv[0][0] + + get_motion_delta (picture, picture->f_motion.f_code[0])); + tmp = bound_motion_vector (tmp, picture->f_motion.f_code[0]); + picture->f_motion.pmv[1][0] = picture->f_motion.pmv[0][0] = tmp; + + NEEDBITS (bit_buf, bits, bit_ptr); + tmp = (picture->f_motion.pmv[0][1] + + get_motion_delta (picture, picture->f_motion.f_code[1])); + tmp = bound_motion_vector (tmp, picture->f_motion.f_code[1]); + picture->f_motion.pmv[1][1] = picture->f_motion.pmv[0][1] = tmp; + + DUMPBITS (bit_buf, bits, 1); /* remove marker_bit */ +#undef bit_buf +#undef bits +#undef bit_ptr +} + +#define MOTION_CALL(routine,direction) \ +do { \ + if ((direction) & MACROBLOCK_MOTION_FORWARD) \ + routine (picture, &(picture->f_motion), mpeg2_mc.put); \ + if ((direction) & MACROBLOCK_MOTION_BACKWARD) \ + routine (picture, &(picture->b_motion), \ + ((direction) & MACROBLOCK_MOTION_FORWARD ? \ + mpeg2_mc.avg : mpeg2_mc.put)); \ +} while (0) + +#define NEXT_MACROBLOCK \ +do { \ + picture->offset += 16; \ + if (picture->offset == picture->coded_picture_width) { \ + do { /* just so we can use the break statement */ \ + if (picture->current_frame->proc_slice) { \ + picture->current_frame->proc_slice (picture->current_frame, \ + picture->dest); \ + } \ + picture->dest[0] += 16 * picture->pitches[0]; \ + picture->dest[1] += 8 * picture->pitches[1]; \ + picture->dest[2] += 8 * picture->pitches[2]; \ + } while (0); \ + picture->v_offset += 16; \ + if (picture->v_offset > picture->limit_y) { \ + if (mpeg2_cpu_state_restore) \ + mpeg2_cpu_state_restore (&cpu_state); \ + return; \ + } \ + picture->offset = 0; \ + } \ +} while (0) + +static inline int slice_init (picture_t * picture, int code) +{ +#define bit_buf (picture->bitstream_buf) +#define bits (picture->bitstream_bits) +#define bit_ptr (picture->bitstream_ptr) + int offset, height; + struct vo_frame_s * forward_reference_frame; + struct vo_frame_s * backward_reference_frame; + const MBAtab * mba; + + offset = picture->picture_structure == BOTTOM_FIELD; + picture->pitches[0] = picture->current_frame->pitches[0]; + picture->pitches[1] = picture->current_frame->pitches[1]; + picture->pitches[2] = picture->current_frame->pitches[2]; + + if( picture->forward_reference_frame ) { + forward_reference_frame = picture->forward_reference_frame; + } + else { + /* return 1; */ + forward_reference_frame = picture->current_frame; + } + + if( picture->backward_reference_frame ) { + backward_reference_frame = picture->backward_reference_frame; + } + else { + /* return 1; */ + backward_reference_frame = picture->current_frame; + } + + picture->f_motion.ref[0][0] = + forward_reference_frame->base[0] + (offset ? picture->pitches[0] : 0); + picture->f_motion.ref[0][1] = + forward_reference_frame->base[1] + (offset ? picture->pitches[1] : 0); + picture->f_motion.ref[0][2] = + forward_reference_frame->base[2] + (offset ? picture->pitches[2] : 0); + + picture->b_motion.ref[0][0] = + backward_reference_frame->base[0] + (offset ? picture->pitches[0] : 0); + picture->b_motion.ref[0][1] = + backward_reference_frame->base[1] + (offset ? picture->pitches[1] : 0); + picture->b_motion.ref[0][2] = + backward_reference_frame->base[2] + (offset ? picture->pitches[2] : 0); + + if (picture->picture_structure != FRAME_PICTURE) { + uint8_t ** forward_ref; + int bottom_field; + + bottom_field = (picture->picture_structure == BOTTOM_FIELD); + picture->dmv_offset = bottom_field ? 1 : -1; + picture->f_motion.ref2[0] = picture->f_motion.ref[bottom_field]; + picture->f_motion.ref2[1] = picture->f_motion.ref[!bottom_field]; + picture->b_motion.ref2[0] = picture->b_motion.ref[bottom_field]; + picture->b_motion.ref2[1] = picture->b_motion.ref[!bottom_field]; + + forward_ref = forward_reference_frame->base; + if (picture->second_field && (picture->picture_coding_type != B_TYPE)) + forward_ref = picture->current_frame->base; + + picture->f_motion.ref[1][0] = forward_ref[0] + (bottom_field ? 0 : picture->pitches[0]); + picture->f_motion.ref[1][1] = forward_ref[1] + (bottom_field ? 0 : picture->pitches[1]); + picture->f_motion.ref[1][2] = forward_ref[2] + (bottom_field ? 0 : picture->pitches[2]); + + picture->b_motion.ref[1][0] = + backward_reference_frame->base[0] + (bottom_field ? 0 : picture->pitches[0]); + picture->b_motion.ref[1][1] = + backward_reference_frame->base[1] + (bottom_field ? 0 : picture->pitches[1]); + picture->b_motion.ref[1][2] = + backward_reference_frame->base[2] + (bottom_field ? 0 : picture->pitches[2]); + } + + picture->f_motion.pmv[0][0] = picture->f_motion.pmv[0][1] = 0; + picture->f_motion.pmv[1][0] = picture->f_motion.pmv[1][1] = 0; + picture->b_motion.pmv[0][0] = picture->b_motion.pmv[0][1] = 0; + picture->b_motion.pmv[1][0] = picture->b_motion.pmv[1][1] = 0; + + picture->v_offset = (code - 1) * 16; + offset = (code - 1); + if (picture->picture_structure != FRAME_PICTURE) + offset = 2 * offset; + + picture->dest[0] = picture->current_frame->base[0] + picture->pitches[0] * offset * 16; + picture->dest[1] = picture->current_frame->base[1] + picture->pitches[1] * offset * 8; + picture->dest[2] = picture->current_frame->base[2] + picture->pitches[2] * offset * 8; + + height = picture->coded_picture_height; + switch (picture->picture_structure) { + case BOTTOM_FIELD: + picture->dest[0] += picture->pitches[0]; + picture->dest[1] += picture->pitches[1]; + picture->dest[2] += picture->pitches[2]; + /* follow thru */ + case TOP_FIELD: + picture->pitches[0] <<= 1; + picture->pitches[1] <<= 1; + picture->pitches[2] <<= 1; + height >>= 1; + } + picture->limit_x = 2 * picture->coded_picture_width - 32; + picture->limit_y_16 = 2 * height - 32; + picture->limit_y_8 = 2 * height - 16; + picture->limit_y = height - 16; + + picture->dc_dct_pred[0] = picture->dc_dct_pred[1] = + picture->dc_dct_pred[2] = 1 << (picture->intra_dc_precision + 7); + + picture->quantizer_scale = get_quantizer_scale (picture); + + /* ignore intra_slice and all the extra data */ + while (bit_buf & 0x80000000) { + DUMPBITS (bit_buf, bits, 9); + NEEDBITS (bit_buf, bits, bit_ptr); + } + + /* decode initial macroblock address increment */ + offset = 0; + while (1) { + if (bit_buf >= 0x08000000) { + mba = MBA_5 + (UBITS (bit_buf, 6) - 2); + break; + } else if (bit_buf >= 0x01800000) { + mba = MBA_11 + (UBITS (bit_buf, 12) - 24); + break; + } else switch (UBITS (bit_buf, 12)) { + case 8: /* macroblock_escape */ + offset += 33; + DUMPBITS (bit_buf, bits, 11); + NEEDBITS (bit_buf, bits, bit_ptr); + continue; + case 15: /* macroblock_stuffing (MPEG1 only) */ + bit_buf &= 0xfffff; + DUMPBITS (bit_buf, bits, 11); + NEEDBITS (bit_buf, bits, bit_ptr); + continue; + default: /* error */ + return 1; + } + } + DUMPBITS (bit_buf, bits, mba->len + 1); + picture->offset = (offset + mba->mba) << 4; + + while (picture->offset - picture->coded_picture_width >= 0) { + picture->offset -= picture->coded_picture_width; + if ((picture->current_frame->proc_slice == NULL) || + (picture->picture_coding_type != B_TYPE)) { + picture->dest[0] += 16 * picture->pitches[0]; + picture->dest[1] += 8 * picture->pitches[1]; + picture->dest[2] += 8 * picture->pitches[2]; + } + picture->v_offset += 16; + } + if (picture->v_offset > picture->limit_y) + return 1; + + return 0; +#undef bit_buf +#undef bits +#undef bit_ptr +} + +void mpeg2_slice (picture_t * picture, int code, uint8_t * buffer) +{ +#define bit_buf (picture->bitstream_buf) +#define bits (picture->bitstream_bits) +#define bit_ptr (picture->bitstream_ptr) + cpu_state_t cpu_state; + + bitstream_init (picture, buffer); + + if (slice_init (picture, code)) + return; + + if (mpeg2_cpu_state_save) + mpeg2_cpu_state_save (&cpu_state); + + while (1) { + int macroblock_modes; + int mba_inc; + const MBAtab * mba; + + NEEDBITS (bit_buf, bits, bit_ptr); + + macroblock_modes = get_macroblock_modes (picture); + + /* maybe integrate MACROBLOCK_QUANT test into get_macroblock_modes ? */ + if (macroblock_modes & MACROBLOCK_QUANT) + picture->quantizer_scale = get_quantizer_scale (picture); + + if (macroblock_modes & MACROBLOCK_INTRA) { + + int DCT_offset, DCT_stride; + int offset; + uint8_t * dest_y; + + if (picture->concealment_motion_vectors) { + if (picture->picture_structure == FRAME_PICTURE) + motion_fr_conceal (picture); + else + motion_fi_conceal (picture); + } else { + picture->f_motion.pmv[0][0] = picture->f_motion.pmv[0][1] = 0; + picture->f_motion.pmv[1][0] = picture->f_motion.pmv[1][1] = 0; + picture->b_motion.pmv[0][0] = picture->b_motion.pmv[0][1] = 0; + picture->b_motion.pmv[1][0] = picture->b_motion.pmv[1][1] = 0; + } + + if (macroblock_modes & DCT_TYPE_INTERLACED) { + DCT_offset = picture->pitches[0]; + DCT_stride = picture->pitches[0] * 2; + } else { + DCT_offset = picture->pitches[0] * 8; + DCT_stride = picture->pitches[0]; + } + + offset = picture->offset; + dest_y = picture->dest[0] + offset; + slice_intra_DCT (picture, 0, dest_y, DCT_stride); + slice_intra_DCT (picture, 0, dest_y + 8, DCT_stride); + slice_intra_DCT (picture, 0, dest_y + DCT_offset, DCT_stride); + slice_intra_DCT (picture, 0, dest_y + DCT_offset + 8, DCT_stride); + slice_intra_DCT (picture, 1, picture->dest[1] + (offset >> 1), + picture->pitches[1]); + slice_intra_DCT (picture, 2, picture->dest[2] + (offset >> 1), + picture->pitches[2]); + + if (picture->picture_coding_type == D_TYPE) { + NEEDBITS (bit_buf, bits, bit_ptr); + DUMPBITS (bit_buf, bits, 1); + } + } else { + + if (picture->picture_structure == FRAME_PICTURE) + switch (macroblock_modes & MOTION_TYPE_MASK) { + case MC_FRAME: + if (picture->mpeg1) + MOTION_CALL (motion_mp1, macroblock_modes); + else + MOTION_CALL (motion_fr_frame, macroblock_modes); + break; + + case MC_FIELD: + MOTION_CALL (motion_fr_field, macroblock_modes); + break; + + case MC_DMV: + MOTION_CALL (motion_fr_dmv, MACROBLOCK_MOTION_FORWARD); + break; + + case 0: + /* non-intra mb without forward mv in a P picture */ + picture->f_motion.pmv[0][0] = 0; + picture->f_motion.pmv[0][1] = 0; + picture->f_motion.pmv[1][0] = 0; + picture->f_motion.pmv[1][1] = 0; + MOTION_CALL (motion_zero, MACROBLOCK_MOTION_FORWARD); + break; + } + else + switch (macroblock_modes & MOTION_TYPE_MASK) { + case MC_FIELD: + MOTION_CALL (motion_fi_field, macroblock_modes); + break; + + case MC_16X8: + MOTION_CALL (motion_fi_16x8, macroblock_modes); + break; + + case MC_DMV: + MOTION_CALL (motion_fi_dmv, MACROBLOCK_MOTION_FORWARD); + break; + + case 0: + /* non-intra mb without forward mv in a P picture */ + picture->f_motion.pmv[0][0] = 0; + picture->f_motion.pmv[0][1] = 0; + picture->f_motion.pmv[1][0] = 0; + picture->f_motion.pmv[1][1] = 0; + MOTION_CALL (motion_zero, MACROBLOCK_MOTION_FORWARD); + break; + } + + if (macroblock_modes & MACROBLOCK_PATTERN) { + int coded_block_pattern; + int DCT_offset, DCT_stride; + int offset; + uint8_t * dest_y; + + if (macroblock_modes & DCT_TYPE_INTERLACED) { + DCT_offset = picture->pitches[0]; + DCT_stride = picture->pitches[0] * 2; + } else { + DCT_offset = picture->pitches[0] * 8; + DCT_stride = picture->pitches[0]; + } + + coded_block_pattern = get_coded_block_pattern (picture); + + offset = picture->offset; + dest_y = picture->dest[0] + offset; + if (coded_block_pattern & 0x20) + slice_non_intra_DCT (picture, dest_y, DCT_stride); + if (coded_block_pattern & 0x10) + slice_non_intra_DCT (picture, dest_y + 8, DCT_stride); + if (coded_block_pattern & 0x08) + slice_non_intra_DCT (picture, dest_y + DCT_offset, + DCT_stride); + if (coded_block_pattern & 0x04) + slice_non_intra_DCT (picture, dest_y + DCT_offset + 8, + DCT_stride); + if (coded_block_pattern & 0x2) + slice_non_intra_DCT (picture, + picture->dest[1] + (offset >> 1), + picture->pitches[1]); + if (coded_block_pattern & 0x1) + slice_non_intra_DCT (picture, + picture->dest[2] + (offset >> 1), + picture->pitches[2]); + } + + picture->dc_dct_pred[0] = picture->dc_dct_pred[1] = + picture->dc_dct_pred[2] = 128 << picture->intra_dc_precision; + } + + NEXT_MACROBLOCK; + + NEEDBITS (bit_buf, bits, bit_ptr); + mba_inc = 0; + while (1) { + if (bit_buf >= 0x10000000) { + mba = MBA_5 + (UBITS (bit_buf, 5) - 2); + break; + } else if (bit_buf >= 0x03000000) { + mba = MBA_11 + (UBITS (bit_buf, 11) - 24); + break; + } else switch (UBITS (bit_buf, 11)) { + case 8: /* macroblock_escape */ + mba_inc += 33; + /* pass through */ + case 15: /* macroblock_stuffing (MPEG1 only) */ + DUMPBITS (bit_buf, bits, 11); + NEEDBITS (bit_buf, bits, bit_ptr); + continue; + default: /* end of slice, or error */ + if (mpeg2_cpu_state_restore) + mpeg2_cpu_state_restore (&cpu_state); + return; + } + } + DUMPBITS (bit_buf, bits, mba->len); + mba_inc += mba->mba; + + if (mba_inc) { + picture->dc_dct_pred[0] = picture->dc_dct_pred[1] = + picture->dc_dct_pred[2] = 128 << picture->intra_dc_precision; + + if (picture->picture_coding_type == P_TYPE) { + picture->f_motion.pmv[0][0] = picture->f_motion.pmv[0][1] = 0; + picture->f_motion.pmv[1][0] = picture->f_motion.pmv[1][1] = 0; + + do { + MOTION_CALL (motion_zero, MACROBLOCK_MOTION_FORWARD); + NEXT_MACROBLOCK; + } while (--mba_inc); + } else { + do { + MOTION_CALL (motion_reuse, macroblock_modes); + NEXT_MACROBLOCK; + } while (--mba_inc); + } + } + } +#undef bit_buf +#undef bits +#undef bit_ptr +} diff --git a/src/video_dec/libmpeg2/slice_xvmc.c b/src/video_dec/libmpeg2/slice_xvmc.c new file mode 100644 index 000000000..014ae7924 --- /dev/null +++ b/src/video_dec/libmpeg2/slice_xvmc.c @@ -0,0 +1,1988 @@ +/* + * slice_xvmc.c + * Copyright (C) 2000-2002 Michel Lespinasse <walken@zoy.org> + * Copyright (C) 1999-2000 Aaron Holtzman <aholtzma@ess.engr.uvic.ca> + * + * This file is part of mpeg2dec, a free MPEG-2 video stream decoder. + * See http://libmpeg2.sourceforge.net/ for updates. + * + * mpeg2dec is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * mpeg2dec is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +#include "config.h" + +#include <stdio.h> +#include <string.h> /* memcpy/memset, try to remove */ +#include <stdlib.h> +#include <inttypes.h> + +#include <xine/xine_internal.h> +#include <xine/video_out.h> +#include "mpeg2_internal.h" +#include <xine/xineutils.h> + +#include <xine/attributes.h> +#include "accel_xvmc.h" +#include "xvmc.h" + + +#define MOTION_ACCEL XINE_VO_MOTION_ACCEL +#define IDCT_ACCEL XINE_VO_IDCT_ACCEL +#define SIGNED_INTRA XINE_VO_SIGNED_INTRA +#define ACCEL (MOTION_ACCEL | IDCT_ACCEL) + +#include "vlc.h" +/* original (non-patched) scan tables */ + +static const uint8_t mpeg2_scan_norm_orig[64] ATTR_ALIGN(16) = +{ + /* Zig-Zag scan pattern */ + 0, 1, 8,16, 9, 2, 3,10, + 17,24,32,25,18,11, 4, 5, + 12,19,26,33,40,48,41,34, + 27,20,13, 6, 7,14,21,28, + 35,42,49,56,57,50,43,36, + 29,22,15,23,30,37,44,51, + 58,59,52,45,38,31,39,46, + 53,60,61,54,47,55,62,63 +}; + +static const uint8_t mpeg2_scan_alt_orig[64] ATTR_ALIGN(16) = +{ + /* Alternate scan pattern */ + 0,8,16,24,1,9,2,10,17,25,32,40,48,56,57,49, + 41,33,26,18,3,11,4,12,19,27,34,42,50,58,35,43, + 51,59,20,28,5,13,6,14,21,29,36,44,52,60,37,45, + 53,61,22,30,7,15,23,31,38,46,54,62,39,47,55,63 +}; + +static uint8_t mpeg2_scan_alt_ptable[64] ATTR_ALIGN(16); +static uint8_t mpeg2_scan_norm_ptable[64] ATTR_ALIGN(16); +static uint8_t mpeg2_scan_orig_ptable[64] ATTR_ALIGN(16); + +void xvmc_setup_scan_ptable( void ) +{ + int i; + for (i=0; i<64; ++i) { + mpeg2_scan_norm_ptable[mpeg2_scan_norm_orig[i]] = mpeg2_scan_norm[i]; + mpeg2_scan_alt_ptable[mpeg2_scan_alt_orig[i]] = mpeg2_scan_alt[i]; + mpeg2_scan_orig_ptable[i] = i; + } +} + + +static const int non_linear_quantizer_scale [] = { + 0, 1, 2, 3, 4, 5, 6, 7, + 8, 10, 12, 14, 16, 18, 20, 22, + 24, 28, 32, 36, 40, 44, 48, 52, + 56, 64, 72, 80, 88, 96, 104, 112 +}; + +static inline int get_xvmc_macroblock_modes (picture_t * picture) +{ +#define bit_buf (picture->bitstream_buf) +#define bits (picture->bitstream_bits) +#define bit_ptr (picture->bitstream_ptr) + int macroblock_modes; + const MBtab * tab; + + switch (picture->picture_coding_type) { + case I_TYPE: + + tab = MB_I + UBITS (bit_buf, 1); + DUMPBITS (bit_buf, bits, tab->len); + macroblock_modes = tab->modes; + + if ((! (picture->frame_pred_frame_dct)) && + (picture->picture_structure == FRAME_PICTURE)) { + macroblock_modes |= UBITS (bit_buf, 1) * DCT_TYPE_INTERLACED; + DUMPBITS (bit_buf, bits, 1); + } + + return macroblock_modes; + + case P_TYPE: + + tab = MB_P + UBITS (bit_buf, 5); + DUMPBITS (bit_buf, bits, tab->len); + macroblock_modes = tab->modes; + + if (picture->picture_structure != FRAME_PICTURE) { + if (macroblock_modes & MACROBLOCK_MOTION_FORWARD) { + macroblock_modes |= UBITS (bit_buf, 2) * MOTION_TYPE_BASE; + DUMPBITS (bit_buf, bits, 2); + } + return macroblock_modes; + } else if (picture->frame_pred_frame_dct) { + if (macroblock_modes & MACROBLOCK_MOTION_FORWARD) + macroblock_modes |= MC_FRAME; + return macroblock_modes; + } else { + if (macroblock_modes & MACROBLOCK_MOTION_FORWARD) { + macroblock_modes |= UBITS (bit_buf, 2) * MOTION_TYPE_BASE; + DUMPBITS (bit_buf, bits, 2); + } + if (macroblock_modes & (MACROBLOCK_INTRA | MACROBLOCK_PATTERN)) { + macroblock_modes |= UBITS (bit_buf, 1) * DCT_TYPE_INTERLACED; + DUMPBITS (bit_buf, bits, 1); + } + return macroblock_modes; + } + + case B_TYPE: + + tab = MB_B + UBITS (bit_buf, 6); + DUMPBITS (bit_buf, bits, tab->len); + macroblock_modes = tab->modes; + + if (picture->picture_structure != FRAME_PICTURE) { + if (! (macroblock_modes & MACROBLOCK_INTRA)) { + macroblock_modes |= UBITS (bit_buf, 2) * MOTION_TYPE_BASE; + DUMPBITS (bit_buf, bits, 2); + } + return macroblock_modes; + } else if (picture->frame_pred_frame_dct) { + /* if (! (macroblock_modes & MACROBLOCK_INTRA)) */ + macroblock_modes |= MC_FRAME; + return macroblock_modes; + } else { + if (macroblock_modes & MACROBLOCK_INTRA) + goto intra; + macroblock_modes |= UBITS (bit_buf, 2) * MOTION_TYPE_BASE; + DUMPBITS (bit_buf, bits, 2); + if (macroblock_modes & (MACROBLOCK_INTRA | MACROBLOCK_PATTERN)) { + intra: + macroblock_modes |= UBITS (bit_buf, 1) * DCT_TYPE_INTERLACED; + DUMPBITS (bit_buf, bits, 1); + } + return macroblock_modes; + } + + case D_TYPE: + + DUMPBITS (bit_buf, bits, 1); + return MACROBLOCK_INTRA; + + default: + return 0; + } +#undef bit_buf +#undef bits +#undef bit_ptr +} + +static inline int get_xvmc_quantizer_scale (picture_t * picture) +{ +#define bit_buf (picture->bitstream_buf) +#define bits (picture->bitstream_bits) +#define bit_ptr (picture->bitstream_ptr) + + int quantizer_scale_code; + + quantizer_scale_code = UBITS (bit_buf, 5); + DUMPBITS (bit_buf, bits, 5); + + if (picture->q_scale_type) + return non_linear_quantizer_scale [quantizer_scale_code]; + else + return quantizer_scale_code << 1; +#undef bit_buf +#undef bits +#undef bit_ptr +} + +static inline int get_xvmc_motion_delta (picture_t * picture, int f_code) +{ +#define bit_buf (picture->bitstream_buf) +#define bits (picture->bitstream_bits) +#define bit_ptr (picture->bitstream_ptr) + + int delta; + int sign; + const MVtab * tab; + + if (bit_buf & 0x80000000) { + DUMPBITS (bit_buf, bits, 1); + return 0; + } else if (bit_buf >= 0x0c000000) { + + tab = MV_4 + UBITS (bit_buf, 4); + delta = (tab->delta << f_code) + 1; + bits += tab->len + f_code + 1; + bit_buf <<= tab->len; + + sign = SBITS (bit_buf, 1); + bit_buf <<= 1; + + if (f_code) + delta += UBITS (bit_buf, f_code); + bit_buf <<= f_code; + + return (delta ^ sign) - sign; + + } else { + + tab = MV_10 + UBITS (bit_buf, 10); + delta = (tab->delta << f_code) + 1; + bits += tab->len + 1; + bit_buf <<= tab->len; + + sign = SBITS (bit_buf, 1); + bit_buf <<= 1; + + if (f_code) { + NEEDBITS (bit_buf, bits, bit_ptr); + delta += UBITS (bit_buf, f_code); + DUMPBITS (bit_buf, bits, f_code); + } + + return (delta ^ sign) - sign; + + } +#undef bit_buf +#undef bits +#undef bit_ptr +} + +static inline int bound_motion_vector (int vec, int f_code) +{ +#if 1 + unsigned int limit; + int sign; + + limit = 16 << f_code; + + if ((unsigned int)(vec + limit) < 2 * limit) + return vec; + else { + sign = ((int32_t)vec) >> 31; + return vec - ((2 * limit) ^ sign) + sign; + } +#else + return ((int32_t)vec << (27 - f_code)) >> (27 - f_code); +#endif +} + +static inline int get_xvmc_dmv (picture_t * picture) +{ +#define bit_buf (picture->bitstream_buf) +#define bits (picture->bitstream_bits) +#define bit_ptr (picture->bitstream_ptr) + + const DMVtab * tab; + + tab = DMV_2 + UBITS (bit_buf, 2); + DUMPBITS (bit_buf, bits, tab->len); + return tab->dmv; +#undef bit_buf +#undef bits +#undef bit_ptr +} + +static inline int get_xvmc_coded_block_pattern (picture_t * picture) +{ +#define bit_buf (picture->bitstream_buf) +#define bits (picture->bitstream_bits) +#define bit_ptr (picture->bitstream_ptr) + + const CBPtab * tab; + + NEEDBITS (bit_buf, bits, bit_ptr); + + if (bit_buf >= 0x20000000) { + + tab = CBP_7 + (UBITS (bit_buf, 7) - 16); + DUMPBITS (bit_buf, bits, tab->len); + return tab->cbp; + + } else { + + tab = CBP_9 + UBITS (bit_buf, 9); + DUMPBITS (bit_buf, bits, tab->len); + return tab->cbp; + } + +#undef bit_buf +#undef bits +#undef bit_ptr +} + +static inline int get_xvmc_luma_dc_dct_diff (picture_t * picture) +{ +#define bit_buf (picture->bitstream_buf) +#define bits (picture->bitstream_bits) +#define bit_ptr (picture->bitstream_ptr) + const DCtab * tab; + int size; + int dc_diff; + + if (bit_buf < 0xf8000000) { + tab = DC_lum_5 + UBITS (bit_buf, 5); + size = tab->size; + if (size) { + bits += tab->len + size; + bit_buf <<= tab->len; + dc_diff = + UBITS (bit_buf, size) - UBITS (SBITS (~bit_buf, 1), size); + bit_buf <<= size; + return dc_diff; + } else { + DUMPBITS (bit_buf, bits, 3); + return 0; + } + } else { + tab = DC_long + (UBITS (bit_buf, 9) - 0x1e0); + size = tab->size; + DUMPBITS (bit_buf, bits, tab->len); + NEEDBITS (bit_buf, bits, bit_ptr); + dc_diff = UBITS (bit_buf, size) - UBITS (SBITS (~bit_buf, 1), size); + DUMPBITS (bit_buf, bits, size); + return dc_diff; + } +#undef bit_buf +#undef bits +#undef bit_ptr +} + +static inline int get_xvmc_chroma_dc_dct_diff (picture_t * picture) +{ +#define bit_buf (picture->bitstream_buf) +#define bits (picture->bitstream_bits) +#define bit_ptr (picture->bitstream_ptr) + const DCtab * tab; + int size; + int dc_diff; + + if (bit_buf < 0xf8000000) { + tab = DC_chrom_5 + UBITS (bit_buf, 5); + size = tab->size; + if (size) { + bits += tab->len + size; + bit_buf <<= tab->len; + dc_diff = + UBITS (bit_buf, size) - UBITS (SBITS (~bit_buf, 1), size); + bit_buf <<= size; + return dc_diff; + } else { + DUMPBITS (bit_buf, bits, 2); + return 0; + } + } else { + tab = DC_long + (UBITS (bit_buf, 10) - 0x3e0); + size = tab->size; + DUMPBITS (bit_buf, bits, tab->len + 1); + NEEDBITS (bit_buf, bits, bit_ptr); + dc_diff = UBITS (bit_buf, size) - UBITS (SBITS (~bit_buf, 1), size); + DUMPBITS (bit_buf, bits, size); + return dc_diff; + } +#undef bit_buf +#undef bits +#undef bit_ptr +} + +#define SATURATE(val) \ +do { \ + if ((uint32_t)(val + 2048) > 4095) \ + val = (val > 0) ? 2047 : -2048; \ +} while (0) + +static void get_xvmc_intra_block_B14 (picture_t * picture) +{ + int i; + int j; + int l; + int val; + const uint8_t * scan = picture->scan; + uint8_t * scan_ptable = mpeg2_scan_orig_ptable; + uint8_t * quant_matrix = picture->intra_quantizer_matrix; + int quantizer_scale = picture->quantizer_scale; + int mismatch; + const DCTtab * tab; + uint32_t bit_buf; + int bits; + uint8_t * bit_ptr; + int16_t * dest; + + dest = picture->mc->blockptr; + + if( picture->mc->xvmc_accel & IDCT_ACCEL ) { + if ( scan == mpeg2_scan_norm ) { + scan = mpeg2_scan_norm_orig; + scan_ptable = mpeg2_scan_norm_ptable; + } else { + scan = mpeg2_scan_alt_orig; + scan_ptable = mpeg2_scan_alt_ptable; + } + } + + i = 0; + mismatch = ~dest[0]; + + bit_buf = picture->bitstream_buf; + bits = picture->bitstream_bits; + bit_ptr = picture->bitstream_ptr; + + NEEDBITS (bit_buf, bits, bit_ptr); + + while (1) { + if (bit_buf >= 0x28000000) { + + tab = DCT_B14AC_5 + (UBITS (bit_buf, 5) - 5); + + i += tab->run; + if (i >= 64) + break; /* end of block */ + + normal_code: + l = scan_ptable[j = scan[i]]; + + bit_buf <<= tab->len; + bits += tab->len + 1; + val = (tab->level * quantizer_scale * quant_matrix[l]) >> 4; + + /* if (bitstream_get (1)) val = -val; */ + val = (val ^ SBITS (bit_buf, 1)) - SBITS (bit_buf, 1); + + SATURATE (val); + dest[j] = val; + mismatch ^= val; + + bit_buf <<= 1; + NEEDBITS (bit_buf, bits, bit_ptr); + + continue; + + } else if (bit_buf >= 0x04000000) { + + tab = DCT_B14_8 + (UBITS (bit_buf, 8) - 4); + + i += tab->run; + if (i < 64) + goto normal_code; + + /* escape code */ + + i += UBITS (bit_buf << 6, 6) - 64; + if (i >= 64) + break; /* illegal, check needed to avoid buffer overflow */ + + l = scan_ptable[j = scan[i]]; + + DUMPBITS (bit_buf, bits, 12); + NEEDBITS (bit_buf, bits, bit_ptr); + val = (SBITS (bit_buf, 12) * + quantizer_scale * quant_matrix[l]) / 16; + + SATURATE (val); + dest[j] = val; + mismatch ^= val; + + DUMPBITS (bit_buf, bits, 12); + NEEDBITS (bit_buf, bits, bit_ptr); + + continue; + + } else if (bit_buf >= 0x02000000) { + tab = DCT_B14_10 + (UBITS (bit_buf, 10) - 8); + i += tab->run; + if (i < 64) + goto normal_code; + } else if (bit_buf >= 0x00800000) { + tab = DCT_13 + (UBITS (bit_buf, 13) - 16); + i += tab->run; + if (i < 64) + goto normal_code; + } else if (bit_buf >= 0x00200000) { + tab = DCT_15 + (UBITS (bit_buf, 15) - 16); + i += tab->run; + if (i < 64) + goto normal_code; + } else { + tab = DCT_16 + UBITS (bit_buf, 16); + bit_buf <<= 16; + GETWORD (bit_buf, bits + 16, bit_ptr); + i += tab->run; + if (i < 64) + goto normal_code; + } + break; /* illegal, check needed to avoid buffer overflow */ + } + + dest[63] ^= mismatch & 1; + DUMPBITS (bit_buf, bits, 2); /* dump end of block code */ + picture->bitstream_buf = bit_buf; + picture->bitstream_bits = bits; + picture->bitstream_ptr = bit_ptr; +} + +static void get_xvmc_intra_block_B15 (picture_t * picture) +{ + int i; + int j; + int l; + int val; + const uint8_t * scan = picture->scan; + uint8_t * scan_ptable = mpeg2_scan_orig_ptable; + uint8_t * quant_matrix = picture->intra_quantizer_matrix; + int quantizer_scale = picture->quantizer_scale; + int mismatch; + const DCTtab * tab; + uint32_t bit_buf; + int bits; + uint8_t * bit_ptr; + int16_t * dest; + + dest = picture->mc->blockptr; + + if( picture->mc->xvmc_accel & IDCT_ACCEL ) { + if ( scan == mpeg2_scan_norm ) { + scan = mpeg2_scan_norm_orig; + scan_ptable = mpeg2_scan_norm_ptable; + } else { + scan = mpeg2_scan_alt_orig; + scan_ptable = mpeg2_scan_alt_ptable; + } + } + + i = 0; + mismatch = ~dest[0]; + + bit_buf = picture->bitstream_buf; + bits = picture->bitstream_bits; + bit_ptr = picture->bitstream_ptr; + + NEEDBITS (bit_buf, bits, bit_ptr); + + while (1) { + if (bit_buf >= 0x04000000) { + + tab = DCT_B15_8 + (UBITS (bit_buf, 8) - 4); + + i += tab->run; + if (i < 64) { + + normal_code: + l = scan_ptable[j = scan[i]]; + bit_buf <<= tab->len; + bits += tab->len + 1; + val = (tab->level * quantizer_scale * quant_matrix[l]) >> 4; + + /* if (bitstream_get (1)) val = -val; */ + val = (val ^ SBITS (bit_buf, 1)) - SBITS (bit_buf, 1); + + SATURATE (val); + dest[j] = val; + mismatch ^= val; + + bit_buf <<= 1; + NEEDBITS (bit_buf, bits, bit_ptr); + + continue; + + } else { + + /* end of block. I commented out this code because if we */ + /* dont exit here we will still exit at the later test :) */ + + /* if (i >= 128) break; */ /* end of block */ + + /* escape code */ + + i += UBITS (bit_buf << 6, 6) - 64; + if (i >= 64) + break; /* illegal, check against buffer overflow */ + + l = scan_ptable[j = scan[i]]; + + DUMPBITS (bit_buf, bits, 12); + NEEDBITS (bit_buf, bits, bit_ptr); + val = (SBITS (bit_buf, 12) * + quantizer_scale * quant_matrix[l]) / 16; + + SATURATE (val); + dest[j] = val; + mismatch ^= val; + + DUMPBITS (bit_buf, bits, 12); + NEEDBITS (bit_buf, bits, bit_ptr); + + continue; + + } + } else if (bit_buf >= 0x02000000) { + tab = DCT_B15_10 + (UBITS (bit_buf, 10) - 8); + i += tab->run; + if (i < 64) + goto normal_code; + } else if (bit_buf >= 0x00800000) { + tab = DCT_13 + (UBITS (bit_buf, 13) - 16); + i += tab->run; + if (i < 64) + goto normal_code; + } else if (bit_buf >= 0x00200000) { + tab = DCT_15 + (UBITS (bit_buf, 15) - 16); + i += tab->run; + if (i < 64) + goto normal_code; + } else { + tab = DCT_16 + UBITS (bit_buf, 16); + bit_buf <<= 16; + GETWORD (bit_buf, bits + 16, bit_ptr); + i += tab->run; + if (i < 64) + goto normal_code; + } + break; /* illegal, check needed to avoid buffer overflow */ + } + + dest[63] ^= mismatch & 1; + DUMPBITS (bit_buf, bits, 4); /* dump end of block code */ + picture->bitstream_buf = bit_buf; + picture->bitstream_bits = bits; + picture->bitstream_ptr = bit_ptr; +} + +static void get_xvmc_non_intra_block (picture_t * picture) +{ + int i; + int j; + int l; + int val; + const uint8_t * scan = picture->scan; + uint8_t * scan_ptable = mpeg2_scan_orig_ptable; + uint8_t * quant_matrix = picture->non_intra_quantizer_matrix; + int quantizer_scale = picture->quantizer_scale; + int mismatch; + const DCTtab * tab; + uint32_t bit_buf; + int bits; + uint8_t * bit_ptr; + int16_t * dest; + + i = -1; + mismatch = 1; + + dest = picture->mc->blockptr; + + if( picture->mc->xvmc_accel & IDCT_ACCEL ) { + if ( scan == mpeg2_scan_norm ) { + scan = mpeg2_scan_norm_orig; + scan_ptable = mpeg2_scan_norm_ptable; + } else { + scan = mpeg2_scan_alt_orig; + scan_ptable = mpeg2_scan_alt_ptable; + } + } + + bit_buf = picture->bitstream_buf; + bits = picture->bitstream_bits; + bit_ptr = picture->bitstream_ptr; + + NEEDBITS (bit_buf, bits, bit_ptr); + if (bit_buf >= 0x28000000) { + tab = DCT_B14DC_5 + (UBITS (bit_buf, 5) - 5); + goto entry_1; + } else + goto entry_2; + + while (1) { + if (bit_buf >= 0x28000000) { + + tab = DCT_B14AC_5 + (UBITS (bit_buf, 5) - 5); + + entry_1: + i += tab->run; + if (i >= 64) + break; /* end of block */ + + normal_code: + l = scan_ptable[j = scan[i]]; + bit_buf <<= tab->len; + bits += tab->len + 1; + val = ((2*tab->level+1) * quantizer_scale * quant_matrix[l]) >> 5; + + /* if (bitstream_get (1)) val = -val; */ + val = (val ^ SBITS (bit_buf, 1)) - SBITS (bit_buf, 1); + + SATURATE (val); + dest[j] = val; + mismatch ^= val; + + bit_buf <<= 1; + NEEDBITS (bit_buf, bits, bit_ptr); + + continue; + + } + + entry_2: + if (bit_buf >= 0x04000000) { + + tab = DCT_B14_8 + (UBITS (bit_buf, 8) - 4); + + i += tab->run; + if (i < 64) + goto normal_code; + + /* escape code */ + + i += UBITS (bit_buf << 6, 6) - 64; + if (i >= 64) + break; /* illegal, check needed to avoid buffer overflow */ + + l = scan_ptable[j = scan[i]]; + + DUMPBITS (bit_buf, bits, 12); + NEEDBITS (bit_buf, bits, bit_ptr); + val = 2 * (SBITS (bit_buf, 12) + SBITS (bit_buf, 1)) + 1; + val = (val * quantizer_scale * quant_matrix[l]) / 32; + + SATURATE (val); + dest[j] = val; + mismatch ^= val; + + DUMPBITS (bit_buf, bits, 12); + NEEDBITS (bit_buf, bits, bit_ptr); + + continue; + + } else if (bit_buf >= 0x02000000) { + tab = DCT_B14_10 + (UBITS (bit_buf, 10) - 8); + i += tab->run; + if (i < 64) + goto normal_code; + } else if (bit_buf >= 0x00800000) { + tab = DCT_13 + (UBITS (bit_buf, 13) - 16); + i += tab->run; + if (i < 64) + goto normal_code; + } else if (bit_buf >= 0x00200000) { + tab = DCT_15 + (UBITS (bit_buf, 15) - 16); + i += tab->run; + if (i < 64) + goto normal_code; + } else { + tab = DCT_16 + UBITS (bit_buf, 16); + bit_buf <<= 16; + GETWORD (bit_buf, bits + 16, bit_ptr); + i += tab->run; + if (i < 64) + goto normal_code; + } + break; /* illegal, check needed to avoid buffer overflow */ + } + dest[63] ^= mismatch & 1; + DUMPBITS (bit_buf, bits, 2); /* dump end of block code */ + picture->bitstream_buf = bit_buf; + picture->bitstream_bits = bits; + picture->bitstream_ptr = bit_ptr; +} + +static void get_xvmc_mpeg1_intra_block (picture_t * picture) +{ + int i; + int j; + int l; + int val; + const uint8_t * scan = picture->scan; + uint8_t * scan_ptable = mpeg2_scan_orig_ptable; + uint8_t * quant_matrix = picture->intra_quantizer_matrix; + int quantizer_scale = picture->quantizer_scale; + const DCTtab * tab; + uint32_t bit_buf; + int bits; + uint8_t * bit_ptr; + int16_t * dest; + + i = 0; + + dest = picture->mc->blockptr; + + if( picture->mc->xvmc_accel & IDCT_ACCEL ) { + if ( scan == mpeg2_scan_norm ) { + scan = mpeg2_scan_norm_orig; + scan_ptable = mpeg2_scan_norm_ptable; + } else { + scan = mpeg2_scan_alt_orig; + scan_ptable = mpeg2_scan_alt_ptable; + } + } + + bit_buf = picture->bitstream_buf; + bits = picture->bitstream_bits; + bit_ptr = picture->bitstream_ptr; + + NEEDBITS (bit_buf, bits, bit_ptr); + + while (1) { + if (bit_buf >= 0x28000000) { + + tab = DCT_B14AC_5 + (UBITS (bit_buf, 5) - 5); + + i += tab->run; + if (i >= 64) + break; /* end of block */ + + normal_code: + l = scan_ptable[j = scan[i]]; + bit_buf <<= tab->len; + bits += tab->len + 1; + val = (tab->level * quantizer_scale * quant_matrix[l]) >> 4; + + /* oddification */ + val = (val - 1) | 1; + + /* if (bitstream_get (1)) val = -val; */ + val = (val ^ SBITS (bit_buf, 1)) - SBITS (bit_buf, 1); + + SATURATE (val); + dest[j] = val; + + bit_buf <<= 1; + NEEDBITS (bit_buf, bits, bit_ptr); + + continue; + + } else if (bit_buf >= 0x04000000) { + + tab = DCT_B14_8 + (UBITS (bit_buf, 8) - 4); + + i += tab->run; + if (i < 64) + goto normal_code; + + /* escape code */ + + i += UBITS (bit_buf << 6, 6) - 64; + if (i >= 64) + break; /* illegal, check needed to avoid buffer overflow */ + + l = scan_ptable[j = scan[i]]; + + DUMPBITS (bit_buf, bits, 12); + NEEDBITS (bit_buf, bits, bit_ptr); + val = SBITS (bit_buf, 8); + if (! (val & 0x7f)) { + DUMPBITS (bit_buf, bits, 8); + val = UBITS (bit_buf, 8) + 2 * val; + } + val = (val * quantizer_scale * quant_matrix[l]) / 16; + + /* oddification */ + val = (val + ~SBITS (val, 1)) | 1; + + SATURATE (val); + dest[j] = val; + + DUMPBITS (bit_buf, bits, 8); + NEEDBITS (bit_buf, bits, bit_ptr); + + continue; + + } else if (bit_buf >= 0x02000000) { + tab = DCT_B14_10 + (UBITS (bit_buf, 10) - 8); + i += tab->run; + if (i < 64) + goto normal_code; + } else if (bit_buf >= 0x00800000) { + tab = DCT_13 + (UBITS (bit_buf, 13) - 16); + i += tab->run; + if (i < 64) + goto normal_code; + } else if (bit_buf >= 0x00200000) { + tab = DCT_15 + (UBITS (bit_buf, 15) - 16); + i += tab->run; + if (i < 64) + goto normal_code; + } else { + tab = DCT_16 + UBITS (bit_buf, 16); + bit_buf <<= 16; + GETWORD (bit_buf, bits + 16, bit_ptr); + i += tab->run; + if (i < 64) + goto normal_code; + } + break; /* illegal, check needed to avoid buffer overflow */ + } + DUMPBITS (bit_buf, bits, 2); /* dump end of block code */ + picture->bitstream_buf = bit_buf; + picture->bitstream_bits = bits; + picture->bitstream_ptr = bit_ptr; +} + +static void get_xvmc_mpeg1_non_intra_block (picture_t * picture) +{ + int i; + int j; + int l; + int val; + const uint8_t * scan = picture->scan; + uint8_t * scan_ptable = mpeg2_scan_orig_ptable; + uint8_t * quant_matrix = picture->non_intra_quantizer_matrix; + int quantizer_scale = picture->quantizer_scale; + const DCTtab * tab; + uint32_t bit_buf; + int bits; + uint8_t * bit_ptr; + int16_t * dest; + + i = -1; + + dest = picture->mc->blockptr; + + if( picture->mc->xvmc_accel & IDCT_ACCEL ) { + if ( scan == mpeg2_scan_norm ) { + scan = mpeg2_scan_norm_orig; + scan_ptable = mpeg2_scan_norm_ptable; + } else { + scan = mpeg2_scan_alt_orig; + scan_ptable = mpeg2_scan_alt_ptable; + } + } + + bit_buf = picture->bitstream_buf; + bits = picture->bitstream_bits; + bit_ptr = picture->bitstream_ptr; + + NEEDBITS (bit_buf, bits, bit_ptr); + if (bit_buf >= 0x28000000) { + tab = DCT_B14DC_5 + (UBITS (bit_buf, 5) - 5); + goto entry_1; + } else + goto entry_2; + + while (1) { + if (bit_buf >= 0x28000000) { + + tab = DCT_B14AC_5 + (UBITS (bit_buf, 5) - 5); + + entry_1: + i += tab->run; + if (i >= 64) + break; /* end of block */ + + normal_code: + l = scan_ptable[j = scan[i]]; + bit_buf <<= tab->len; + bits += tab->len + 1; + val = ((2*tab->level+1) * quantizer_scale * quant_matrix[l]) >> 5; + + /* oddification */ + val = (val - 1) | 1; + + /* if (bitstream_get (1)) val = -val; */ + val = (val ^ SBITS (bit_buf, 1)) - SBITS (bit_buf, 1); + + SATURATE (val); + dest[j] = val; + + bit_buf <<= 1; + NEEDBITS (bit_buf, bits, bit_ptr); + + continue; + + } + + entry_2: + if (bit_buf >= 0x04000000) { + + tab = DCT_B14_8 + (UBITS (bit_buf, 8) - 4); + + i += tab->run; + if (i < 64) + goto normal_code; + + /* escape code */ + + i += UBITS (bit_buf << 6, 6) - 64; + if (i >= 64) + break; /* illegal, check needed to avoid buffer overflow */ + + l = scan_ptable[j = scan[i]]; + + DUMPBITS (bit_buf, bits, 12); + NEEDBITS (bit_buf, bits, bit_ptr); + val = SBITS (bit_buf, 8); + if (! (val & 0x7f)) { + DUMPBITS (bit_buf, bits, 8); + val = UBITS (bit_buf, 8) + 2 * val; + } + val = 2 * (val + SBITS (val, 1)) + 1; + val = (val * quantizer_scale * quant_matrix[l]) / 32; + + /* oddification */ + val = (val + ~SBITS (val, 1)) | 1; + + SATURATE (val); + dest[j] = val; + + DUMPBITS (bit_buf, bits, 8); + NEEDBITS (bit_buf, bits, bit_ptr); + + continue; + + } else if (bit_buf >= 0x02000000) { + tab = DCT_B14_10 + (UBITS (bit_buf, 10) - 8); + i += tab->run; + if (i < 64) + goto normal_code; + } else if (bit_buf >= 0x00800000) { + tab = DCT_13 + (UBITS (bit_buf, 13) - 16); + i += tab->run; + if (i < 64) + goto normal_code; + } else if (bit_buf >= 0x00200000) { + tab = DCT_15 + (UBITS (bit_buf, 15) - 16); + i += tab->run; + if (i < 64) + goto normal_code; + } else { + tab = DCT_16 + UBITS (bit_buf, 16); + bit_buf <<= 16; + GETWORD (bit_buf, bits + 16, bit_ptr); + i += tab->run; + if (i < 64) + goto normal_code; + } + break; /* illegal, check needed to avoid buffer overflow */ + } + DUMPBITS (bit_buf, bits, 2); /* dump end of block code */ + picture->bitstream_buf = bit_buf; + picture->bitstream_bits = bits; + picture->bitstream_ptr = bit_ptr; +} + +static inline void slice_xvmc_intra_DCT (picture_t * picture, int cc, + uint8_t * dest, int stride) +{ +#define bit_buf (picture->bitstream_buf) +#define bits (picture->bitstream_bits) +#define bit_ptr (picture->bitstream_ptr) + NEEDBITS (bit_buf, bits, bit_ptr); + /* Get the intra DC coefficient and inverse quantize it */ + + // printf("slice: slice_xvmc_intra_DCT cc=%d pred[0]=%d\n",cc,picture->dc_dct_pred[0]); + if (cc == 0) + picture->dc_dct_pred[0] += get_xvmc_luma_dc_dct_diff (picture); + else + picture->dc_dct_pred[cc] += get_xvmc_chroma_dc_dct_diff (picture); + //TODO conversion to signed format + // printf("slice: pred[0]=%d presision=%d\n",picture->dc_dct_pred[0], + // picture->intra_dc_precision); + + mpeg2_zero_block(picture->mc->blockptr); + + picture->mc->blockptr[0] = picture->dc_dct_pred[cc] << (3 - picture->intra_dc_precision); + + if (picture->mpeg1) { + if (picture->picture_coding_type != D_TYPE) + get_xvmc_mpeg1_intra_block (picture); + } else if (picture->intra_vlc_format) + get_xvmc_intra_block_B15 (picture); + else + get_xvmc_intra_block_B14 (picture); + + if((picture->mc->xvmc_accel & ACCEL) == MOTION_ACCEL) { + //motion_comp only no idct acceleration so do it in software + mpeg2_idct (picture->mc->blockptr); + } + picture->mc->blockptr += 64; +#undef bit_buf +#undef bits +#undef bit_ptr +} + +static inline void slice_xvmc_non_intra_DCT (picture_t * picture, uint8_t * dest, + int stride) +{ + mpeg2_zero_block(picture->mc->blockptr); + + if (picture->mpeg1) + get_xvmc_mpeg1_non_intra_block (picture); + else + get_xvmc_non_intra_block (picture); + + if((picture->mc->xvmc_accel & ACCEL) == MOTION_ACCEL) { + // motion comp only no idct acceleration so do it in sw + mpeg2_idct (picture->mc->blockptr); + } + picture->mc->blockptr += 64; +} + +static void motion_mp1 (picture_t * picture, motion_t * motion, + void (** table) (uint8_t *, uint8_t *, int, int)) +{ +#define bit_buf (picture->bitstream_buf) +#define bits (picture->bitstream_bits) +#define bit_ptr (picture->bitstream_ptr) + int motion_x, motion_y; + + NEEDBITS (bit_buf, bits, bit_ptr); + motion_x = (motion->pmv[0][0] + + (get_xvmc_motion_delta (picture, + motion->f_code[0]) << motion->f_code[1])); + motion_x = bound_motion_vector (motion_x, + motion->f_code[0] + motion->f_code[1]); + motion->pmv[0][0] = motion_x; + + NEEDBITS (bit_buf, bits, bit_ptr); + motion_y = (motion->pmv[0][1] + + (get_xvmc_motion_delta (picture, + motion->f_code[0]) << motion->f_code[1])); + motion_y = bound_motion_vector (motion_y, + motion->f_code[0] + motion->f_code[1]); + motion->pmv[0][1] = motion_y; + +#undef bit_buf +#undef bits +#undef bit_ptr +} + +static void motion_fr_frame (picture_t * picture, motion_t * motion, + void (** table) (uint8_t *, uint8_t *, int, int)) +{ +#define bit_buf (picture->bitstream_buf) +#define bits (picture->bitstream_bits) +#define bit_ptr (picture->bitstream_ptr) + int motion_x, motion_y; + + NEEDBITS (bit_buf, bits, bit_ptr); + motion_x = motion->pmv[0][0] + get_xvmc_motion_delta (picture, + motion->f_code[0]); + motion_x = bound_motion_vector (motion_x, motion->f_code[0]); + motion->pmv[1][0] = motion->pmv[0][0] = motion_x; + + NEEDBITS (bit_buf, bits, bit_ptr); + motion_y = motion->pmv[0][1] + get_xvmc_motion_delta (picture, + motion->f_code[1]); + motion_y = bound_motion_vector (motion_y, motion->f_code[1]); + motion->pmv[1][1] = motion->pmv[0][1] = motion_y; + +#undef bit_buf +#undef bits +#undef bit_ptr +} + +static void motion_fr_field (picture_t * picture, motion_t * motion, + void (** table) (uint8_t *, uint8_t *, int, int), + int dir) +{ +#define bit_buf (picture->bitstream_buf) +#define bits (picture->bitstream_bits) +#define bit_ptr (picture->bitstream_ptr) + int motion_x, motion_y, field; + // unsigned int pos_x, pos_y, xy_half; + + NEEDBITS (bit_buf, bits, bit_ptr); + field = UBITS (bit_buf, 1); + picture->XvMC_mv_field_sel[0][dir] = field; + DUMPBITS (bit_buf, bits, 1); + + motion_x = motion->pmv[0][0] + get_xvmc_motion_delta (picture, + motion->f_code[0]); + motion_x = bound_motion_vector (motion_x, motion->f_code[0]); + motion->pmv[0][0] = motion_x; + + NEEDBITS (bit_buf, bits, bit_ptr); + motion_y = (motion->pmv[0][1] >> 1) + get_xvmc_motion_delta (picture, + motion->f_code[1]); + /* motion_y = bound_motion_vector (motion_y, motion->f_code[1]); */ + motion->pmv[0][1] = motion_y << 1; + + NEEDBITS (bit_buf, bits, bit_ptr); + field = UBITS (bit_buf, 1); + //TODO look at field select need bob (weave ok) + picture->XvMC_mv_field_sel[1][dir] = field; + DUMPBITS (bit_buf, bits, 1); + + motion_x = motion->pmv[1][0] + get_xvmc_motion_delta (picture, + motion->f_code[0]); + motion_x = bound_motion_vector (motion_x, motion->f_code[0]); + motion->pmv[1][0] = motion_x; + + NEEDBITS (bit_buf, bits, bit_ptr); + motion_y = (motion->pmv[1][1] >> 1) + get_xvmc_motion_delta (picture, + motion->f_code[1]); + /* motion_y = bound_motion_vector (motion_y, motion->f_code[1]); */ + motion->pmv[1][1] = motion_y << 1; + +#undef bit_buf +#undef bits +#undef bit_ptr +} + +static void motion_fr_dmv (picture_t * picture, motion_t * motion, + void (** table) (uint8_t *, uint8_t *, int, int)) +{ +#define bit_buf (picture->bitstream_buf) +#define bits (picture->bitstream_bits) +#define bit_ptr (picture->bitstream_ptr) + int motion_x, motion_y; + + // TODO field select ?? possible need to be 0 + picture->XvMC_mv_field_sel[0][0] = picture->XvMC_mv_field_sel[1][0] = 0; + + NEEDBITS (bit_buf, bits, bit_ptr); + motion_x = motion->pmv[0][0] + get_xvmc_motion_delta (picture, + motion->f_code[0]); + motion_x = bound_motion_vector (motion_x, motion->f_code[0]); + motion->pmv[1][0] = motion->pmv[0][0] = motion_x; + NEEDBITS (bit_buf, bits, bit_ptr); + + motion_y = (motion->pmv[0][1] >> 1) + get_xvmc_motion_delta (picture, + motion->f_code[1]); + /* motion_y = bound_motion_vector (motion_y, motion->f_code[1]); */ + motion->pmv[1][1] = motion->pmv[0][1] = motion_y << 1; + +#undef bit_buf +#undef bits +#undef bit_ptr +} + +static void motion_reuse (picture_t * picture, motion_t * motion, + void (** table) (uint8_t *, uint8_t *, int, int)) +{ + int motion_x, motion_y; + + motion_x = motion->pmv[0][0]; + motion_y = motion->pmv[0][1]; + +} + +/* like motion_frame, but parsing without actual motion compensation */ +static void motion_fr_conceal (picture_t * picture) +{ +#define bit_buf (picture->bitstream_buf) +#define bits (picture->bitstream_bits) +#define bit_ptr (picture->bitstream_ptr) + int tmp; + + NEEDBITS (bit_buf, bits, bit_ptr); + tmp = (picture->f_motion.pmv[0][0] + + get_xvmc_motion_delta (picture, picture->f_motion.f_code[0])); + tmp = bound_motion_vector (tmp, picture->f_motion.f_code[0]); + picture->f_motion.pmv[1][0] = picture->f_motion.pmv[0][0] = tmp; + + NEEDBITS (bit_buf, bits, bit_ptr); + tmp = (picture->f_motion.pmv[0][1] + + get_xvmc_motion_delta (picture, picture->f_motion.f_code[1])); + tmp = bound_motion_vector (tmp, picture->f_motion.f_code[1]); + picture->f_motion.pmv[1][1] = picture->f_motion.pmv[0][1] = tmp; + + DUMPBITS (bit_buf, bits, 1); /* remove marker_bit */ +#undef bit_buf +#undef bits +#undef bit_ptr +} + +static void motion_fi_field (picture_t * picture, motion_t * motion, + void (** table) (uint8_t *, uint8_t *, int, int)) +{ +#define bit_buf (picture->bitstream_buf) +#define bits (picture->bitstream_bits) +#define bit_ptr (picture->bitstream_ptr) + int motion_x, motion_y; + uint8_t ** ref_field; + + NEEDBITS (bit_buf, bits, bit_ptr); + ref_field = motion->ref2[UBITS (bit_buf, 1)]; + + // TODO field select may need to do something here for bob (weave ok) + picture->XvMC_mv_field_sel[0][0] = picture->XvMC_mv_field_sel[1][0] = 0; + + DUMPBITS (bit_buf, bits, 1); + + motion_x = motion->pmv[0][0] + get_xvmc_motion_delta (picture, + motion->f_code[0]); + motion_x = bound_motion_vector (motion_x, motion->f_code[0]); + motion->pmv[1][0] = motion->pmv[0][0] = motion_x; + + NEEDBITS (bit_buf, bits, bit_ptr); + motion_y = motion->pmv[0][1] + get_xvmc_motion_delta (picture, + motion->f_code[1]); + motion_y = bound_motion_vector (motion_y, motion->f_code[1]); + motion->pmv[1][1] = motion->pmv[0][1] = motion_y; + +#undef bit_buf +#undef bits +#undef bit_ptr +} + +static void motion_fi_16x8 (picture_t * picture, motion_t * motion, + void (** table) (uint8_t *, uint8_t *, int, int)) +{ +#define bit_buf (picture->bitstream_buf) +#define bits (picture->bitstream_bits) +#define bit_ptr (picture->bitstream_ptr) + int motion_x, motion_y; + uint8_t ** ref_field; + + NEEDBITS (bit_buf, bits, bit_ptr); + ref_field = motion->ref2[UBITS (bit_buf, 1)]; + + // TODO field select may need to do something here bob (weave ok) + picture->XvMC_mv_field_sel[0][0] = picture->XvMC_mv_field_sel[1][0] = 0; + + DUMPBITS (bit_buf, bits, 1); + + motion_x = motion->pmv[0][0] + get_xvmc_motion_delta (picture, + motion->f_code[0]); + motion_x = bound_motion_vector (motion_x, motion->f_code[0]); + motion->pmv[0][0] = motion_x; + + NEEDBITS (bit_buf, bits, bit_ptr); + motion_y = motion->pmv[0][1] + get_xvmc_motion_delta (picture, + motion->f_code[1]); + motion_y = bound_motion_vector (motion_y, motion->f_code[1]); + motion->pmv[0][1] = motion_y; + + + NEEDBITS (bit_buf, bits, bit_ptr); + ref_field = motion->ref2[UBITS (bit_buf, 1)]; + + // TODO field select may need to do something here for bob (weave ok) + picture->XvMC_mv_field_sel[0][0] = picture->XvMC_mv_field_sel[1][0] = 0; + + DUMPBITS (bit_buf, bits, 1); + + motion_x = motion->pmv[1][0] + get_xvmc_motion_delta (picture, + motion->f_code[0]); + motion_x = bound_motion_vector (motion_x, motion->f_code[0]); + motion->pmv[1][0] = motion_x; + + NEEDBITS (bit_buf, bits, bit_ptr); + motion_y = motion->pmv[1][1] + get_xvmc_motion_delta (picture, + motion->f_code[1]); + motion_y = bound_motion_vector (motion_y, motion->f_code[1]); + motion->pmv[1][1] = motion_y; + +#undef bit_buf +#undef bits +#undef bit_ptr +} + +static void motion_fi_dmv (picture_t * picture, motion_t * motion, + void (** table) (uint8_t *, uint8_t *, int, int)) +{ +#define bit_buf (picture->bitstream_buf) +#define bits (picture->bitstream_bits) +#define bit_ptr (picture->bitstream_ptr) + int motion_x, motion_y; + + NEEDBITS (bit_buf, bits, bit_ptr); + motion_x = motion->pmv[0][0] + get_xvmc_motion_delta (picture, + motion->f_code[0]); + motion_x = bound_motion_vector (motion_x, motion->f_code[0]); + motion->pmv[1][0] = motion->pmv[0][0] = motion_x; + NEEDBITS (bit_buf, bits, bit_ptr); + + motion_y = motion->pmv[0][1] + get_xvmc_motion_delta (picture, + motion->f_code[1]); + motion_y = bound_motion_vector (motion_y, motion->f_code[1]); + motion->pmv[1][1] = motion->pmv[0][1] = motion_y; + + // TODO field select may need to do something here for bob (weave ok) + picture->XvMC_mv_field_sel[0][0] = picture->XvMC_mv_field_sel[1][0] = 0; + +#undef bit_buf +#undef bits +#undef bit_ptr +} + + +static void motion_fi_conceal (picture_t * picture) +{ +#define bit_buf (picture->bitstream_buf) +#define bits (picture->bitstream_bits) +#define bit_ptr (picture->bitstream_ptr) + int tmp; + + NEEDBITS (bit_buf, bits, bit_ptr); + DUMPBITS (bit_buf, bits, 1); /* remove field_select */ + + tmp = (picture->f_motion.pmv[0][0] + + get_xvmc_motion_delta (picture, picture->f_motion.f_code[0])); + tmp = bound_motion_vector (tmp, picture->f_motion.f_code[0]); + picture->f_motion.pmv[1][0] = picture->f_motion.pmv[0][0] = tmp; + + NEEDBITS (bit_buf, bits, bit_ptr); + tmp = (picture->f_motion.pmv[0][1] + + get_xvmc_motion_delta (picture, picture->f_motion.f_code[1])); + tmp = bound_motion_vector (tmp, picture->f_motion.f_code[1]); + picture->f_motion.pmv[1][1] = picture->f_motion.pmv[0][1] = tmp; + + DUMPBITS (bit_buf, bits, 1); /* remove marker_bit */ +#undef bit_buf +#undef bits +#undef bit_ptr +} + +#define MOTION_CALL(routine,direction) \ +do { \ + if ((direction) & MACROBLOCK_MOTION_FORWARD) \ + routine (picture, &(picture->f_motion), mpeg2_mc.put); \ + if ((direction) & MACROBLOCK_MOTION_BACKWARD) \ + routine (picture, &(picture->b_motion), \ + ((direction) & MACROBLOCK_MOTION_FORWARD ? \ + mpeg2_mc.avg : mpeg2_mc.put)); \ +} while (0) + +#define NEXT_MACROBLOCK \ +do { \ + picture->offset += 16; \ + if (picture->offset == picture->coded_picture_width) { \ + do { /* just so we can use the break statement */ \ + if (picture->current_frame->proc_slice) { \ + picture->current_frame->proc_slice (picture->current_frame, \ + picture->dest); \ + if (picture->picture_coding_type == B_TYPE) \ + break; \ + } \ + picture->dest[0] += 16 * picture->pitches[0]; \ + picture->dest[1] += 8 * picture->pitches[1]; \ + picture->dest[2] += 8 * picture->pitches[2]; \ + } while (0); \ + picture->v_offset += 16; \ + if (picture->v_offset > picture->limit_y) { \ + if (mpeg2_cpu_state_restore) \ + mpeg2_cpu_state_restore (&cpu_state); \ + return; \ + } \ + picture->offset = 0; \ + } \ +} while (0) + +static inline int slice_xvmc_init (picture_t * picture, int code) +{ +#define bit_buf (picture->bitstream_buf) +#define bits (picture->bitstream_bits) +#define bit_ptr (picture->bitstream_ptr) + int offset, height; + struct vo_frame_s * forward_reference_frame; + struct vo_frame_s * backward_reference_frame; + const MBAtab * mba; + + offset = picture->picture_structure == BOTTOM_FIELD; + picture->pitches[0] = picture->current_frame->pitches[0]; + picture->pitches[1] = picture->current_frame->pitches[1]; + picture->pitches[2] = picture->current_frame->pitches[2]; + + if( picture->forward_reference_frame ) { + forward_reference_frame = picture->forward_reference_frame; + } + else { + /* return 1; */ + forward_reference_frame = picture->current_frame; + } + + if( picture->backward_reference_frame ) { + backward_reference_frame = picture->backward_reference_frame; + } + else { + /* return 1; */ + backward_reference_frame = picture->current_frame; + } + + picture->f_motion.ref[0][0] = + forward_reference_frame->base[0] + (offset ? picture->pitches[0] : 0); + picture->f_motion.ref[0][1] = + forward_reference_frame->base[1] + (offset ? picture->pitches[1] : 0); + picture->f_motion.ref[0][2] = + forward_reference_frame->base[2] + (offset ? picture->pitches[2] : 0); + + picture->b_motion.ref[0][0] = + backward_reference_frame->base[0] + (offset ? picture->pitches[0] : 0); + picture->b_motion.ref[0][1] = + backward_reference_frame->base[1] + (offset ? picture->pitches[1] : 0); + picture->b_motion.ref[0][2] = + backward_reference_frame->base[2] + (offset ? picture->pitches[2] : 0); + + if (picture->picture_structure != FRAME_PICTURE) { + uint8_t ** forward_ref; + int bottom_field; + + bottom_field = (picture->picture_structure == BOTTOM_FIELD); + picture->dmv_offset = bottom_field ? 1 : -1; + picture->f_motion.ref2[0] = picture->f_motion.ref[bottom_field]; + picture->f_motion.ref2[1] = picture->f_motion.ref[!bottom_field]; + picture->b_motion.ref2[0] = picture->b_motion.ref[bottom_field]; + picture->b_motion.ref2[1] = picture->b_motion.ref[!bottom_field]; + + forward_ref = forward_reference_frame->base; + if (picture->second_field && (picture->picture_coding_type != B_TYPE)) + forward_ref = picture->current_frame->base; + + picture->f_motion.ref[1][0] = forward_ref[0] + (bottom_field ? 0 : picture->pitches[0]); + picture->f_motion.ref[1][1] = forward_ref[1] + (bottom_field ? 0 : picture->pitches[1]); + picture->f_motion.ref[1][2] = forward_ref[2] + (bottom_field ? 0 : picture->pitches[2]); + + picture->b_motion.ref[1][0] = + backward_reference_frame->base[0] + (bottom_field ? 0 : picture->pitches[0]); + picture->b_motion.ref[1][1] = + backward_reference_frame->base[1] + (bottom_field ? 0 : picture->pitches[1]); + picture->b_motion.ref[1][2] = + backward_reference_frame->base[2] + (bottom_field ? 0 : picture->pitches[2]); + } + + picture->f_motion.pmv[0][0] = picture->f_motion.pmv[0][1] = 0; + picture->f_motion.pmv[1][0] = picture->f_motion.pmv[1][1] = 0; + picture->b_motion.pmv[0][0] = picture->b_motion.pmv[0][1] = 0; + picture->b_motion.pmv[1][0] = picture->b_motion.pmv[1][1] = 0; + + picture->v_offset = (code - 1) * 16; + offset = (code - 1); + if (picture->current_frame->proc_slice && picture->picture_coding_type == B_TYPE) + offset = 0; + else if (picture->picture_structure != FRAME_PICTURE) + offset = 2 * offset; + + picture->dest[0] = picture->current_frame->base[0] + picture->pitches[0] * offset * 16; + picture->dest[1] = picture->current_frame->base[1] + picture->pitches[1] * offset * 8; + picture->dest[2] = picture->current_frame->base[2] + picture->pitches[2] * offset * 8; + + height = picture->coded_picture_height; + switch (picture->picture_structure) { + case BOTTOM_FIELD: + picture->dest[0] += picture->pitches[0]; + picture->dest[1] += picture->pitches[1]; + picture->dest[2] += picture->pitches[2]; + /* follow thru */ + case TOP_FIELD: + picture->pitches[0] <<= 1; + picture->pitches[1] <<= 1; + picture->pitches[2] <<= 1; + height >>= 1; + } + picture->limit_x = 2 * picture->coded_picture_width - 32; + picture->limit_y_16 = 2 * height - 32; + picture->limit_y_8 = 2 * height - 16; + picture->limit_y = height - 16; + + //TODO conversion to signed format signed format + if((picture->mc->xvmc_accel & ACCEL) == MOTION_ACCEL && + !(picture->mc->xvmc_accel & SIGNED_INTRA)) { + //Motion Comp only unsigned intra + // original: + picture->dc_dct_pred[0] = picture->dc_dct_pred[1] = + picture->dc_dct_pred[2] = 1 << (picture->intra_dc_precision + 7); + } else { + //Motion Comp only signed intra MOTION_ACCEL+SIGNED_INTRA + picture->dc_dct_pred[0] = picture->dc_dct_pred[1] = + picture->dc_dct_pred[2] = 0; + } + + picture->quantizer_scale = get_xvmc_quantizer_scale (picture); + + /* ignore intra_slice and all the extra data */ + while (bit_buf & 0x80000000) { + DUMPBITS (bit_buf, bits, 9); + NEEDBITS (bit_buf, bits, bit_ptr); + } + + /* decode initial macroblock address increment */ + offset = 0; + while (1) { + if (bit_buf >= 0x08000000) { + mba = MBA_5 + (UBITS (bit_buf, 6) - 2); + break; + } else if (bit_buf >= 0x01800000) { + mba = MBA_11 + (UBITS (bit_buf, 12) - 24); + break; + } else switch (UBITS (bit_buf, 12)) { + case 8: /* macroblock_escape */ + offset += 33; + DUMPBITS (bit_buf, bits, 11); + NEEDBITS (bit_buf, bits, bit_ptr); + continue; + case 15: /* macroblock_stuffing (MPEG1 only) */ + bit_buf &= 0xfffff; + DUMPBITS (bit_buf, bits, 11); + NEEDBITS (bit_buf, bits, bit_ptr); + continue; + default: /* error */ + return 1; + } + } + DUMPBITS (bit_buf, bits, mba->len + 1); + picture->offset = (offset + mba->mba) << 4; + + while (picture->offset - picture->coded_picture_width >= 0) { + picture->offset -= picture->coded_picture_width; + if ((picture->current_frame->proc_slice == NULL) || + (picture->picture_coding_type != B_TYPE)) { + picture->dest[0] += 16 * picture->pitches[0]; + picture->dest[1] += 8 * picture->pitches[1]; + picture->dest[2] += 8 * picture->pitches[2]; + } + picture->v_offset += 16; + } + if (picture->v_offset > picture->limit_y) + return 1; + + return 0; +#undef bit_buf +#undef bits +#undef bit_ptr +} + +void mpeg2_xvmc_slice (mpeg2dec_accel_t *accel, picture_t * picture, int code, uint8_t * buffer) +{ +#define bit_buf (picture->bitstream_buf) +#define bits (picture->bitstream_bits) +#define bit_ptr (picture->bitstream_ptr) + cpu_state_t cpu_state; + xine_xvmc_t *xvmc = (xine_xvmc_t *) picture->current_frame->accel_data; + + if (1 == code) { + accel->xvmc_last_slice_code = 0; + } + if ((code != accel->xvmc_last_slice_code + 1) && + (code != accel->xvmc_last_slice_code)) + return; + + bitstream_init (picture, buffer); + + if (slice_xvmc_init (picture, code)) + return; + + if (mpeg2_cpu_state_save) + mpeg2_cpu_state_save (&cpu_state); + + while (1) { + int macroblock_modes; + int mba_inc; + const MBAtab * mba; + + NEEDBITS (bit_buf, bits, bit_ptr); + + macroblock_modes = get_xvmc_macroblock_modes (picture); //macroblock_modes() + picture->XvMC_mb_type = macroblock_modes & 0x1F; + picture->XvMC_dct_type = (macroblock_modes & DCT_TYPE_INTERLACED)>>5; + picture->XvMC_motion_type = (macroblock_modes & MOTION_TYPE_MASK)>>6; + + picture->XvMC_x = picture->offset/16; + picture->XvMC_y = picture->v_offset/16; + + if((picture->XvMC_x == 0) && (picture->XvMC_y == 0)) { + picture->XvMC_mv_field_sel[0][0] = + picture->XvMC_mv_field_sel[1][0] = + picture->XvMC_mv_field_sel[0][1] = + picture->XvMC_mv_field_sel[1][1] = 0; + } + + picture->XvMC_cbp = 0x3f; //TODO set for intra 4:2:0 6 blocks yyyyuv all enabled + + /* maybe integrate MACROBLOCK_QUANT test into get_xvmc_macroblock_modes ? */ + if (macroblock_modes & MACROBLOCK_QUANT) + picture->quantizer_scale = get_xvmc_quantizer_scale (picture); + if (macroblock_modes & MACROBLOCK_INTRA) { + + int DCT_offset, DCT_stride; + int offset; + uint8_t * dest_y; + + if (picture->concealment_motion_vectors) { + if (picture->picture_structure == FRAME_PICTURE) + motion_fr_conceal (picture); + else + motion_fi_conceal (picture); + } else { + picture->f_motion.pmv[0][0] = picture->f_motion.pmv[0][1] = 0; + picture->f_motion.pmv[1][0] = picture->f_motion.pmv[1][1] = 0; + picture->b_motion.pmv[0][0] = picture->b_motion.pmv[0][1] = 0; + picture->b_motion.pmv[1][0] = picture->b_motion.pmv[1][1] = 0; + } + + if (macroblock_modes & DCT_TYPE_INTERLACED) { + DCT_offset = picture->pitches[0]; + DCT_stride = picture->pitches[0] * 2; + } else { + DCT_offset = picture->pitches[0] * 8; + DCT_stride = picture->pitches[0]; + } + offset = picture->offset; + dest_y = picture->dest[0] + offset; + // unravaled loop of 6 block(i) calls in macroblock() + slice_xvmc_intra_DCT (picture, 0, dest_y, DCT_stride); + slice_xvmc_intra_DCT (picture, 0, dest_y + 8, DCT_stride); + slice_xvmc_intra_DCT (picture, 0, dest_y + DCT_offset, DCT_stride); + slice_xvmc_intra_DCT (picture, 0, dest_y + DCT_offset + 8, DCT_stride); + slice_xvmc_intra_DCT (picture, 1, picture->dest[1] + (offset >> 1), + picture->pitches[1]); + slice_xvmc_intra_DCT (picture, 2, picture->dest[2] + (offset >> 1), + picture->pitches[2]); + + if (picture->picture_coding_type == D_TYPE) { + NEEDBITS (bit_buf, bits, bit_ptr); + DUMPBITS (bit_buf, bits, 1); + } + } else { + picture->XvMC_cbp = 0; + + if (picture->picture_structure == FRAME_PICTURE) + switch (macroblock_modes & MOTION_TYPE_MASK) { + case MC_FRAME: + if (picture->mpeg1) { + MOTION_CALL (motion_mp1, macroblock_modes); + } else { + MOTION_CALL (motion_fr_frame, macroblock_modes); + } + break; + + case MC_FIELD: + //MOTION_CALL (motion_fr_field, macroblock_modes); + + if ((macroblock_modes) & MACROBLOCK_MOTION_FORWARD) + motion_fr_field(picture, &(picture->f_motion), + mpeg2_mc.put,0); + if ((macroblock_modes) & MACROBLOCK_MOTION_BACKWARD) + motion_fr_field(picture, &(picture->b_motion), + ((macroblock_modes) & MACROBLOCK_MOTION_FORWARD ? + mpeg2_mc.avg : mpeg2_mc.put),1); + + break; + + case MC_DMV: + MOTION_CALL (motion_fr_dmv, MACROBLOCK_MOTION_FORWARD); + break; + + case 0: + /* non-intra mb without forward mv in a P picture */ + picture->f_motion.pmv[0][0] = 0; + picture->f_motion.pmv[0][1] = 0; + picture->f_motion.pmv[1][0] = 0; + picture->f_motion.pmv[1][1] = 0; + // MOTION_CALL (motion_zero, MACROBLOCK_MOTION_FORWARD); + break; + } + else + switch (macroblock_modes & MOTION_TYPE_MASK) { + case MC_FIELD: + MOTION_CALL (motion_fi_field, macroblock_modes); + break; + + case MC_16X8: + MOTION_CALL (motion_fi_16x8, macroblock_modes); + break; + + case MC_DMV: + MOTION_CALL (motion_fi_dmv, MACROBLOCK_MOTION_FORWARD); + break; + + case 0: + /* non-intra mb without forward mv in a P picture */ + picture->f_motion.pmv[0][0] = 0; + picture->f_motion.pmv[0][1] = 0; + picture->f_motion.pmv[1][0] = 0; + picture->f_motion.pmv[1][1] = 0; + // MOTION_CALL (motion_zero, MACROBLOCK_MOTION_FORWARD); + break; + } + + if (macroblock_modes & MACROBLOCK_PATTERN) { + int coded_block_pattern; + int DCT_offset, DCT_stride; + int offset; + uint8_t * dest_y; + + if (macroblock_modes & DCT_TYPE_INTERLACED) { + DCT_offset = picture->pitches[0]; + DCT_stride = picture->pitches[0] * 2; + } else { + DCT_offset = picture->pitches[0] * 8; + DCT_stride = picture->pitches[0]; + } + + picture->XvMC_cbp = coded_block_pattern = get_xvmc_coded_block_pattern (picture); + offset = picture->offset; + dest_y = picture->dest[0] + offset; + // TODO optimize not fully used for idct accel only mc. + if (coded_block_pattern & 0x20) + slice_xvmc_non_intra_DCT (picture, dest_y, DCT_stride); // cc0 luma 0 + if (coded_block_pattern & 0x10) + slice_xvmc_non_intra_DCT (picture, dest_y + 8, DCT_stride); // cc0 luma 1 + if (coded_block_pattern & 0x08) + slice_xvmc_non_intra_DCT (picture, dest_y + DCT_offset, + DCT_stride); // cc0 luma 2 + if (coded_block_pattern & 0x04) + slice_xvmc_non_intra_DCT (picture, dest_y + DCT_offset + 8, + DCT_stride); // cc0 luma 3 + if (coded_block_pattern & 0x2) + slice_xvmc_non_intra_DCT (picture, + picture->dest[1] + (offset >> 1), + picture->pitches[1]); // cc1 croma + if (coded_block_pattern & 0x1) + slice_xvmc_non_intra_DCT (picture, + picture->dest[2] + (offset >> 1), + picture->pitches[2]); // cc2 croma + } + + if((picture->mc->xvmc_accel & ACCEL) == MOTION_ACCEL && + !(picture->mc->xvmc_accel & SIGNED_INTRA)) { + // original: + picture->dc_dct_pred[0] = picture->dc_dct_pred[1] = + picture->dc_dct_pred[2] = 128 << picture->intra_dc_precision; + + } else { // MOTION_ACCEL+SIGNED_INTRA + picture->dc_dct_pred[0] = picture->dc_dct_pred[1] = + picture->dc_dct_pred[2] = 0; + } + + } + xvmc->proc_macro_block(picture->XvMC_x, picture->XvMC_y, + picture->XvMC_mb_type, + picture->XvMC_motion_type, + picture->XvMC_mv_field_sel, + picture->XvMC_dmvector, + picture->XvMC_cbp, + picture->XvMC_dct_type, + picture->current_frame, + picture->forward_reference_frame, + picture->backward_reference_frame, + picture->picture_structure, + picture->second_field, + picture->f_motion.pmv, + picture->b_motion.pmv); + + + NEXT_MACROBLOCK; + + NEEDBITS (bit_buf, bits, bit_ptr); + mba_inc = 0; + while (1) { + if (bit_buf >= 0x10000000) { + mba = MBA_5 + (UBITS (bit_buf, 5) - 2); + break; + } else if (bit_buf >= 0x03000000) { + mba = MBA_11 + (UBITS (bit_buf, 11) - 24); + break; + } else switch (UBITS (bit_buf, 11)) { + case 8: /* macroblock_escape */ + mba_inc += 33; + /* pass through */ + case 15: /* macroblock_stuffing (MPEG1 only) */ + DUMPBITS (bit_buf, bits, 11); + NEEDBITS (bit_buf, bits, bit_ptr); + continue; + default: /* end of slice, or error */ + if (mpeg2_cpu_state_restore) + mpeg2_cpu_state_restore (&cpu_state); + accel->xvmc_last_slice_code = code; + return; + } + } + DUMPBITS (bit_buf, bits, mba->len); + mba_inc += mba->mba; + if (mba_inc) { + //TODO conversion to signed format signed format + if((picture->mc->xvmc_accel & ACCEL) == MOTION_ACCEL && + !(picture->mc->xvmc_accel & SIGNED_INTRA)) { + // original: + picture->dc_dct_pred[0] = picture->dc_dct_pred[1] = + picture->dc_dct_pred[2] = 128 << picture->intra_dc_precision; + } else { // MOTION_ACCEL+SIGNED_INTRA + picture->dc_dct_pred[0] = picture->dc_dct_pred[1] = + picture->dc_dct_pred[2] = 0; + } + + picture->XvMC_cbp = 0; + if (picture->picture_coding_type == P_TYPE) { + picture->f_motion.pmv[0][0] = picture->f_motion.pmv[0][1] = 0; + picture->f_motion.pmv[1][0] = picture->f_motion.pmv[1][1] = 0; + + do { + if(picture->mc->xvmc_accel) { + + /* derive motion_type */ + if(picture->picture_structure == FRAME_PICTURE) { + picture->XvMC_motion_type = XINE_MC_FRAME; + } else { + picture->XvMC_motion_type = XINE_MC_FIELD; + /* predict from field of same parity */ + picture->XvMC_mv_field_sel[0][0] = + picture->XvMC_mv_field_sel[0][1] = + (picture->picture_structure==BOTTOM_FIELD); + } + picture->XvMC_mb_type = macroblock_modes & 0x1E; + picture->XvMC_x = picture->offset/16; + picture->XvMC_y = picture->v_offset/16; + + xvmc->proc_macro_block(picture->XvMC_x,picture->XvMC_y, + picture->XvMC_mb_type, + picture->XvMC_motion_type, + picture->XvMC_mv_field_sel, + picture->XvMC_dmvector, + picture->XvMC_cbp, + picture->XvMC_dct_type, + picture->current_frame, + picture->forward_reference_frame, + picture->backward_reference_frame, + picture->picture_structure, + picture->second_field, + picture->f_motion.pmv, + picture->b_motion.pmv); + } else { + // MOTION_CALL (motion_zero, MACROBLOCK_MOTION_FORWARD); + } + NEXT_MACROBLOCK; + } while (--mba_inc); + } else { + do { + if(picture->mc->xvmc_accel) { + + /* derive motion_type */ + if(picture->picture_structure == FRAME_PICTURE) { + picture->XvMC_motion_type = XINE_MC_FRAME; + } else { + picture->XvMC_motion_type = XINE_MC_FIELD; + /* predict from field of same parity */ + picture->XvMC_mv_field_sel[0][0] = + picture->XvMC_mv_field_sel[0][1] = + (picture->picture_structure==BOTTOM_FIELD); + } + + picture->XvMC_mb_type = macroblock_modes & 0x1E; + picture->XvMC_x = picture->offset/16; + picture->XvMC_y = picture->v_offset/16; + + xvmc->proc_macro_block(picture->XvMC_x,picture->XvMC_y, + picture->XvMC_mb_type, + picture->XvMC_motion_type, + picture->XvMC_mv_field_sel, + picture->XvMC_dmvector, + picture->XvMC_cbp, + picture->XvMC_dct_type, + picture->current_frame, + picture->forward_reference_frame, + picture->backward_reference_frame, + picture->picture_structure, + picture->second_field, + picture->f_motion.pmv, + picture->b_motion.pmv); + } else { + MOTION_CALL (motion_reuse, macroblock_modes); + } + NEXT_MACROBLOCK; + } while (--mba_inc); + } + } + } + accel->xvmc_last_slice_code = code; +#undef bit_buf +#undef bits +#undef bit_ptr +} + diff --git a/src/video_dec/libmpeg2/slice_xvmc_vld.c b/src/video_dec/libmpeg2/slice_xvmc_vld.c new file mode 100644 index 000000000..60fa8b4f8 --- /dev/null +++ b/src/video_dec/libmpeg2/slice_xvmc_vld.c @@ -0,0 +1,229 @@ +/* + * Copyright (c) 2004 The Unichrome project. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify it under + * the terms of the GNU General Public License as published by the Free Software + * Foundation; either version 2, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTIES OR REPRESENTATIONS; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * See the GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License along with + * this program; if not, write to the Free Software Foundation, Inc., + * 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. + * + * + */ + +#ifdef HAVE_CONFIG_H +#include "config.h" +#endif + +#include <xine/xine_internal.h> +#include <xine/video_out.h> +#include "mpeg2.h" +#include "mpeg2_internal.h" +#include "xvmc_vld.h" + +static const uint8_t zig_zag_scan[64] ATTR_ALIGN(16) = +{ + /* Zig-Zag scan pattern */ + 0, 1, 8,16, 9, 2, 3,10, + 17,24,32,25,18,11, 4, 5, + 12,19,26,33,40,48,41,34, + 27,20,13, 6, 7,14,21,28, + 35,42,49,56,57,50,43,36, + 29,22,15,23,30,37,44,51, + 58,59,52,45,38,31,39,46, + 53,60,61,54,47,55,62,63 +}; + +static const uint8_t alternate_scan [64] ATTR_ALIGN(16) = +{ + /* Alternate scan pattern */ + 0,8,16,24,1,9,2,10,17,25,32,40,48,56,57,49, + 41,33,26,18,3,11,4,12,19,27,34,42,50,58,35,43, + 51,59,20,28,5,13,6,14,21,29,36,44,52,60,37,45, + 53,61,22,30,7,15,23,31,38,46,54,62,39,47,55,63 +}; + +void mpeg2_xxmc_slice( mpeg2dec_accel_t *accel, picture_t *picture, + int code, uint8_t *buffer, uint32_t chunk_size, + uint8_t *chunk_buffer) + +{ + vo_frame_t + *frame = picture->current_frame; + xine_xxmc_t + *xxmc = (xine_xxmc_t *) frame->accel_data; + xine_vld_frame_t + *vft = &xxmc->vld_frame; + unsigned + mb_frame_height; + int + i; + const uint8_t * + scan_pattern; + float + ms_per_slice; + + if (1 == code && accel->xvmc_last_slice_code != 1) { + frame->bad_frame = 1; + accel->slices_per_row = 1; + accel->row_slice_count = 1; + + /* + * Check that first field went through OK. Otherwise, + * indicate bad frame. + */ + + if (picture->second_field) { + accel->xvmc_last_slice_code = (xxmc->decoded) ? 0 : -1; + xxmc->decoded = 0; + } else { + accel->xvmc_last_slice_code = 0; + } + + mb_frame_height = + (!(picture->mpeg1) && (picture->progressive_sequence)) ? + 2*((picture->coded_picture_height+31) >> 5) : + (picture->coded_picture_height+15) >> 4; + accel->xxmc_mb_pic_height = (picture->picture_structure == FRAME_PICTURE ) ? + mb_frame_height : mb_frame_height >> 1; + + ms_per_slice = 1000. / (90000. * mb_frame_height) * frame->duration; + xxmc->sleep = 1. / (ms_per_slice * 0.45); + if (xxmc->sleep < 1.) xxmc->sleep = 1.; + + if (picture->mpeg1) { + vft->mv_ranges[0][0] = picture->b_motion.f_code[0]; + vft->mv_ranges[0][1] = picture->b_motion.f_code[0]; + vft->mv_ranges[1][0] = picture->f_motion.f_code[0]; + vft->mv_ranges[1][1] = picture->f_motion.f_code[0]; + } else { + vft->mv_ranges[0][0] = picture->b_motion.f_code[0]; + vft->mv_ranges[0][1] = picture->b_motion.f_code[1]; + vft->mv_ranges[1][0] = picture->f_motion.f_code[0]; + vft->mv_ranges[1][1] = picture->f_motion.f_code[1]; + } + + vft->picture_structure = picture->picture_structure; + vft->picture_coding_type = picture->picture_coding_type; + vft->mpeg_coding = (picture->mpeg1) ? 0 : 1; + vft->progressive_sequence = picture->progressive_sequence; + vft->scan = (picture->scan == mpeg2_scan_alt); + vft->pred_dct_frame = picture->frame_pred_frame_dct; + vft->concealment_motion_vectors = + picture->concealment_motion_vectors; + vft->q_scale_type = picture->q_scale_type; + vft->intra_vlc_format = picture->intra_vlc_format; + vft->intra_dc_precision = picture->intra_dc_precision; + vft->second_field = picture->second_field; + + /* + * Translation of libmpeg2's Q-matrix layout to VLD XvMC's. + * Errors here will give + * blocky artifacts and sometimes wrong colors. + */ + + scan_pattern = (vft->scan) ? alternate_scan : zig_zag_scan; + + if ((vft->load_intra_quantizer_matrix = picture->load_intra_quantizer_matrix)) { + for (i=0; i<64; ++i) { + vft->intra_quantizer_matrix[scan_pattern[i]] = + picture->intra_quantizer_matrix[picture->scan[i]]; + } + } + + if ((vft->load_non_intra_quantizer_matrix = picture->load_non_intra_quantizer_matrix)) { + for (i=0; i<64; ++i) { + vft->non_intra_quantizer_matrix[scan_pattern[i]] = + picture->non_intra_quantizer_matrix[picture->scan[i]]; + } + } + + picture->load_intra_quantizer_matrix = 0; + picture->load_non_intra_quantizer_matrix = 0; + vft->forward_reference_frame = picture->forward_reference_frame; + vft->backward_reference_frame = picture->backward_reference_frame; + xxmc->proc_xxmc_begin( frame ); + if (xxmc->result != 0) { + accel->xvmc_last_slice_code=-1; + } + } + + if (((code == accel->xvmc_last_slice_code + 1) || + (code == accel->xvmc_last_slice_code))) { + + /* + * Send this slice to the output plugin. May stall for a long + * time in proc_slice; + */ + + frame->bad_frame = 1; + xxmc->slice_data_size = chunk_size; + xxmc->slice_data = chunk_buffer; + xxmc->slice_code = code; + + xxmc->proc_xxmc_slice( frame ); + + if (xxmc->result != 0) { + accel->xvmc_last_slice_code=-1; + return; + } + /* + * Keep track of slices. + */ + + accel->row_slice_count = (accel->xvmc_last_slice_code == code) ? + accel->row_slice_count + 1 : 1; + accel->slices_per_row = (accel->row_slice_count > accel->slices_per_row) ? + accel->row_slice_count:accel->slices_per_row; + accel->xvmc_last_slice_code = code; + + } else { + + /* + * An error has occured. + */ + + lprintf("libmpeg2: VLD XvMC: Slice error.\n"); + accel->xvmc_last_slice_code = -1; + return; + } +} + +void mpeg2_xxmc_vld_frame_complete(mpeg2dec_accel_t *accel, picture_t *picture, int code) +{ + vo_frame_t + *frame = picture->current_frame; + xine_xxmc_t + *xxmc = (xine_xxmc_t *) frame->accel_data; + + if (xxmc->decoded) return; + if (accel->xvmc_last_slice_code == -1) { + xxmc->proc_xxmc_flush( frame ); + return; + } + + if ((code != 0xff) || ((accel->xvmc_last_slice_code == + accel->xxmc_mb_pic_height) && + accel->slices_per_row == accel->row_slice_count)) { + + xxmc->proc_xxmc_flush( frame ); + + if (xxmc->result) { + accel->xvmc_last_slice_code=-1; + frame->bad_frame = 1; + return; + } + xxmc->decoded = 1; + accel->xvmc_last_slice_code = 0; + if (picture->picture_structure == 3 || picture->second_field) { + if (xxmc->result == 0) + frame->bad_frame = 0; + } + } +} diff --git a/src/video_dec/libmpeg2/stats.c b/src/video_dec/libmpeg2/stats.c new file mode 100644 index 000000000..63c701179 --- /dev/null +++ b/src/video_dec/libmpeg2/stats.c @@ -0,0 +1,317 @@ +/* + * stats.c + * Copyright (C) 2000-2002 Michel Lespinasse <walken@zoy.org> + * Copyright (C) 1999-2000 Aaron Holtzman <aholtzma@ess.engr.uvic.ca> + * + * This file is part of mpeg2dec, a free MPEG-2 video stream decoder. + * See http://libmpeg2.sourceforge.net/ for updates. + * + * mpeg2dec is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * mpeg2dec is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +#include "config.h" + +#include <stdio.h> +#include <stdlib.h> +#include <inttypes.h> + +#include "mpeg2_internal.h" + +static int debug_level = -1; + +/* Determine is debug output is required. */ +/* We could potentially have multiple levels of debug info */ +static int debug_is_on (void) +{ + char * env_var; + + if (debug_level < 0) { + env_var = getenv ("MPEG2_DEBUG"); + + if (env_var) + debug_level = 1; + else + debug_level = 0; + } + + return debug_level; +} + +static void stats_picture (uint8_t * buffer) +{ + static const char *const picture_coding_type_str [8] = { + "Invalid picture type", + "I-type", + "P-type", + "B-type", + "D (very bad)", + "Invalid","Invalid","Invalid" + }; + + int picture_coding_type; + int temporal_reference; + int vbv_delay; + + temporal_reference = (buffer[0] << 2) | (buffer[1] >> 6); + picture_coding_type = (buffer [1] >> 3) & 7; + vbv_delay = ((buffer[1] << 13) | (buffer[2] << 5) | + (buffer[3] >> 3)) & 0xffff; + + fprintf (stderr, " (picture) %s temporal_reference %d, vbv_delay %d\n", + picture_coding_type_str [picture_coding_type], + temporal_reference, vbv_delay); +} + +static void stats_user_data (uint8_t * buffer) +{ + fprintf (stderr, " (user_data)\n"); +} + +static void stats_sequence (uint8_t * buffer) +{ + static const char *const aspect_ratio_information_str[8] = { + "Invalid Aspect Ratio", + "1:1", + "4:3", + "16:9", + "2.21:1", + "Invalid Aspect Ratio", + "Invalid Aspect Ratio", + "Invalid Aspect Ratio" + }; + static const char *const frame_rate_str[16] = { + "Invalid frame_rate_code", + "23.976", "24", "25" , "29.97", + "30" , "50", "59.94", "60" , + "Invalid frame_rate_code", "Invalid frame_rate_code", + "Invalid frame_rate_code", "Invalid frame_rate_code", + "Invalid frame_rate_code", "Invalid frame_rate_code", + "Invalid frame_rate_code" + }; + + int horizontal_size; + int vertical_size; + int aspect_ratio_information; + int frame_rate_code; + int bit_rate_value; + int vbv_buffer_size_value; + int constrained_parameters_flag; + int load_intra_quantizer_matrix; + int load_non_intra_quantizer_matrix; + + vertical_size = (buffer[0] << 16) | (buffer[1] << 8) | buffer[2]; + horizontal_size = vertical_size >> 12; + vertical_size &= 0xfff; + aspect_ratio_information = buffer[3] >> 4; + frame_rate_code = buffer[3] & 15; + bit_rate_value = (buffer[4] << 10) | (buffer[5] << 2) | (buffer[6] >> 6); + vbv_buffer_size_value = ((buffer[6] << 5) | (buffer[7] >> 3)) & 0x3ff; + constrained_parameters_flag = buffer[7] & 4; + load_intra_quantizer_matrix = buffer[7] & 2; + if (load_intra_quantizer_matrix) + buffer += 64; + load_non_intra_quantizer_matrix = buffer[7] & 1; + + fprintf (stderr, " (seq) %dx%d %s, %s fps, %5.0f kbps, VBV %d kB%s%s%s\n", + horizontal_size, vertical_size, + aspect_ratio_information_str [aspect_ratio_information], + frame_rate_str [frame_rate_code], + bit_rate_value * 400.0 / 1000.0, + 2 * vbv_buffer_size_value, + constrained_parameters_flag ? " , CP":"", + load_intra_quantizer_matrix ? " , Custom Intra Matrix":"", + load_non_intra_quantizer_matrix ? " , Custom Non-Intra Matrix":""); +} + +static void stats_sequence_error (uint8_t * buffer) +{ + fprintf (stderr, " (sequence_error)\n"); +} + +static void stats_sequence_end (uint8_t * buffer) +{ + fprintf (stderr, " (sequence_end)\n"); +} + +static void stats_group (uint8_t * buffer) +{ + fprintf (stderr, " (group)%s%s\n", + (buffer[4] & 0x40) ? " closed_gop" : "", + (buffer[4] & 0x20) ? " broken_link" : ""); +} + +static void stats_slice (int code, uint8_t * buffer) +{ + /* fprintf (stderr, " (slice %d)\n", code); */ +} + +static void stats_sequence_extension (uint8_t * buffer) +{ + static const char *const chroma_format_str[4] = { + "Invalid Chroma Format", + "4:2:0 Chroma", + "4:2:2 Chroma", + "4:4:4 Chroma" + }; + + int progressive_sequence; + int chroma_format; + + progressive_sequence = (buffer[1] >> 3) & 1; + chroma_format = (buffer[1] >> 1) & 3; + + fprintf (stderr, " (seq_ext) progressive_sequence %d, %s\n", + progressive_sequence, chroma_format_str [chroma_format]); +} + +static void stats_sequence_display_extension (uint8_t * buffer) +{ + fprintf (stderr, " (sequence_display_extension)\n"); +} + +static void stats_quant_matrix_extension (uint8_t * buffer) +{ + fprintf (stderr, " (quant_matrix_extension)\n"); +} + +static void stats_copyright_extension (uint8_t * buffer) +{ + fprintf (stderr, " (copyright_extension)\n"); +} + + +static void stats_sequence_scalable_extension (uint8_t * buffer) +{ + fprintf (stderr, " (sequence_scalable_extension)\n"); +} + +static void stats_picture_display_extension (uint8_t * buffer) +{ + fprintf (stderr, " (picture_display_extension)\n"); +} + +static void stats_picture_coding_extension (uint8_t * buffer) +{ + static const char *const picture_structure_str[4] = { + "Invalid Picture Structure", + "Top field", + "Bottom field", + "Frame Picture" + }; + + int f_code[2][2]; + int intra_dc_precision; + int picture_structure; + int top_field_first; + int frame_pred_frame_dct; + int concealment_motion_vectors; + int q_scale_type; + int intra_vlc_format; + int alternate_scan; + int repeat_first_field; + int progressive_frame; + + f_code[0][0] = buffer[0] & 15; + f_code[0][1] = buffer[1] >> 4; + f_code[1][0] = buffer[1] & 15; + f_code[1][1] = buffer[2] >> 4; + intra_dc_precision = (buffer[2] >> 2) & 3; + picture_structure = buffer[2] & 3; + top_field_first = buffer[3] >> 7; + frame_pred_frame_dct = (buffer[3] >> 6) & 1; + concealment_motion_vectors = (buffer[3] >> 5) & 1; + q_scale_type = (buffer[3] >> 4) & 1; + intra_vlc_format = (buffer[3] >> 3) & 1; + alternate_scan = (buffer[3] >> 2) & 1; + repeat_first_field = (buffer[3] >> 1) & 1; + progressive_frame = buffer[4] >> 7; + + fprintf (stderr, + " (pic_ext) %s\n", picture_structure_str [picture_structure]); + fprintf (stderr, + " (pic_ext) forward horizontal f_code % d, forward vertical f_code % d\n", + f_code[0][0], f_code[0][1]); + fprintf (stderr, + " (pic_ext) backward horizontal f_code % d, backward vertical f_code % d\n", + f_code[1][0], f_code[1][1]); + fprintf (stderr, + " (pic_ext) intra_dc_precision %d, top_field_first %d, frame_pred_frame_dct %d\n", + intra_dc_precision, top_field_first, frame_pred_frame_dct); + fprintf (stderr, + " (pic_ext) concealment_motion_vectors %d, q_scale_type %d, intra_vlc_format %d\n", + concealment_motion_vectors, q_scale_type, intra_vlc_format); + fprintf (stderr, + " (pic_ext) alternate_scan %d, repeat_first_field %d, progressive_frame %d\n", + alternate_scan, repeat_first_field, progressive_frame); +} + +void mpeg2_stats (int code, uint8_t * buffer) +{ + if (! (debug_is_on ())) + return; + + switch (code) { + case 0x00: + stats_picture (buffer); + break; + case 0xb2: + stats_user_data (buffer); + break; + case 0xb3: + stats_sequence (buffer); + break; + case 0xb4: + stats_sequence_error (buffer); + break; + case 0xb5: + switch (buffer[0] >> 4) { + case 1: + stats_sequence_extension (buffer); + break; + case 2: + stats_sequence_display_extension (buffer); + break; + case 3: + stats_quant_matrix_extension (buffer); + break; + case 4: + stats_copyright_extension (buffer); + break; + case 5: + stats_sequence_scalable_extension (buffer); + break; + case 7: + stats_picture_display_extension (buffer); + break; + case 8: + stats_picture_coding_extension (buffer); + break; + default: + fprintf (stderr, " (unknown extension %#x)\n", buffer[0] >> 4); + } + break; + case 0xb7: + stats_sequence_end (buffer); + break; + case 0xb8: + stats_group (buffer); + break; + default: + if (code < 0xb0) + stats_slice (code, buffer); + else + fprintf (stderr, " (unknown start code %#02x)\n", code); + } +} diff --git a/src/video_dec/libmpeg2/vis.h b/src/video_dec/libmpeg2/vis.h new file mode 100644 index 000000000..69dd49075 --- /dev/null +++ b/src/video_dec/libmpeg2/vis.h @@ -0,0 +1,328 @@ +/* + * vis.h + * Copyright (C) 2003 David S. Miller <davem@redhat.com> + * + * This file is part of mpeg2dec, a free MPEG-2 video stream decoder. + * See http://libmpeg2.sourceforge.net/ for updates. + * + * mpeg2dec is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * mpeg2dec is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +/* You may be asking why I hard-code the instruction opcodes and don't + * use the normal VIS assembler mnenomics for the VIS instructions. + * + * The reason is that Sun, in their infinite wisdom, decided that a binary + * using a VIS instruction will cause it to be marked (in the ELF headers) + * as doing so, and this prevents the OS from loading such binaries if the + * current cpu doesn't have VIS. There is no way to easily override this + * behavior of the assembler that I am aware of. + * + * This totally defeats what libmpeg2 is trying to do which is allow a + * single binary to be created, and then detect the availability of VIS + * at runtime. + * + * I'm not saying that tainting the binary by default is bad, rather I'm + * saying that not providing a way to override this easily unnecessarily + * ties people's hands. + * + * Thus, we do the opcode encoding by hand and output 32-bit words in + * the assembler to keep the binary from becoming tainted. + */ + +#define vis_opc_base ((0x1 << 31) | (0x36 << 19)) +#define vis_opf(X) ((X) << 5) +#define vis_sreg(X) (X) +#define vis_dreg(X) (((X)&0x1f)|((X)>>5)) +#define vis_rs1_s(X) (vis_sreg(X) << 14) +#define vis_rs1_d(X) (vis_dreg(X) << 14) +#define vis_rs2_s(X) (vis_sreg(X) << 0) +#define vis_rs2_d(X) (vis_dreg(X) << 0) +#define vis_rd_s(X) (vis_sreg(X) << 25) +#define vis_rd_d(X) (vis_dreg(X) << 25) + +#define vis_ss2s(opf,rs1,rs2,rd) \ + __asm__ __volatile__ (".word %0" \ + : : "i" (vis_opc_base | vis_opf(opf) | \ + vis_rs1_s(rs1) | \ + vis_rs2_s(rs2) | \ + vis_rd_s(rd))) + +#define vis_dd2d(opf,rs1,rs2,rd) \ + __asm__ __volatile__ (".word %0" \ + : : "i" (vis_opc_base | vis_opf(opf) | \ + vis_rs1_d(rs1) | \ + vis_rs2_d(rs2) | \ + vis_rd_d(rd))) + +#define vis_ss2d(opf,rs1,rs2,rd) \ + __asm__ __volatile__ (".word %0" \ + : : "i" (vis_opc_base | vis_opf(opf) | \ + vis_rs1_s(rs1) | \ + vis_rs2_s(rs2) | \ + vis_rd_d(rd))) + +#define vis_sd2d(opf,rs1,rs2,rd) \ + __asm__ __volatile__ (".word %0" \ + : : "i" (vis_opc_base | vis_opf(opf) | \ + vis_rs1_s(rs1) | \ + vis_rs2_d(rs2) | \ + vis_rd_d(rd))) + +#define vis_d2s(opf,rs2,rd) \ + __asm__ __volatile__ (".word %0" \ + : : "i" (vis_opc_base | vis_opf(opf) | \ + vis_rs2_d(rs2) | \ + vis_rd_s(rd))) + +#define vis_s2d(opf,rs2,rd) \ + __asm__ __volatile__ (".word %0" \ + : : "i" (vis_opc_base | vis_opf(opf) | \ + vis_rs2_s(rs2) | \ + vis_rd_d(rd))) + +#define vis_d12d(opf,rs1,rd) \ + __asm__ __volatile__ (".word %0" \ + : : "i" (vis_opc_base | vis_opf(opf) | \ + vis_rs1_d(rs1) | \ + vis_rd_d(rd))) + +#define vis_d22d(opf,rs2,rd) \ + __asm__ __volatile__ (".word %0" \ + : : "i" (vis_opc_base | vis_opf(opf) | \ + vis_rs2_d(rs2) | \ + vis_rd_d(rd))) + +#define vis_s12s(opf,rs1,rd) \ + __asm__ __volatile__ (".word %0" \ + : : "i" (vis_opc_base | vis_opf(opf) | \ + vis_rs1_s(rs1) | \ + vis_rd_s(rd))) + +#define vis_s22s(opf,rs2,rd) \ + __asm__ __volatile__ (".word %0" \ + : : "i" (vis_opc_base | vis_opf(opf) | \ + vis_rs2_s(rs2) | \ + vis_rd_s(rd))) + +#define vis_s(opf,rd) \ + __asm__ __volatile__ (".word %0" \ + : : "i" (vis_opc_base | vis_opf(opf) | \ + vis_rd_s(rd))) + +#define vis_d(opf,rd) \ + __asm__ __volatile__ (".word %0" \ + : : "i" (vis_opc_base | vis_opf(opf) | \ + vis_rd_d(rd))) + +#define vis_r2m(op,rd,mem) \ + __asm__ __volatile__ (#op "\t%%f" #rd ", [%0]" : : "r" (&(mem)) ) + +#define vis_r2m_2(op,rd,mem1,mem2) \ + __asm__ __volatile__ (#op "\t%%f" #rd ", [%0 + %1]" : : "r" (mem1), "r" (mem2) ) + +#define vis_m2r(op,mem,rd) \ + __asm__ __volatile__ (#op "\t[%0], %%f" #rd : : "r" (&(mem)) ) + +#define vis_m2r_2(op,mem1,mem2,rd) \ + __asm__ __volatile__ (#op "\t[%0 + %1], %%f" #rd : : "r" (mem1), "r" (mem2) ) + +static inline void vis_set_gsr(unsigned int _val) +{ + register unsigned int val asm("g1"); + + val = _val; + __asm__ __volatile__(".word 0xa7804000" + : : "r" (val)); +} + +#define VIS_GSR_ALIGNADDR_MASK 0x0000007 +#define VIS_GSR_ALIGNADDR_SHIFT 0 +#define VIS_GSR_SCALEFACT_MASK 0x0000078 +#define VIS_GSR_SCALEFACT_SHIFT 3 + +#define vis_ld32(mem,rs1) vis_m2r(ld, mem, rs1) +#define vis_ld32_2(mem1,mem2,rs1) vis_m2r_2(ld, mem1, mem2, rs1) +#define vis_st32(rs1,mem) vis_r2m(st, rs1, mem) +#define vis_st32_2(rs1,mem1,mem2) vis_r2m_2(st, rs1, mem1, mem2) +#define vis_ld64(mem,rs1) vis_m2r(ldd, mem, rs1) +#define vis_ld64_2(mem1,mem2,rs1) vis_m2r_2(ldd, mem1, mem2, rs1) +#define vis_st64(rs1,mem) vis_r2m(std, rs1, mem) +#define vis_st64_2(rs1,mem1,mem2) vis_r2m_2(std, rs1, mem1, mem2) + +#define vis_ldblk(mem, rd) \ +do { register void *__mem asm("g1"); \ + __mem = &(mem); \ + __asm__ __volatile__(".word 0xc1985e00 | %1" \ + : \ + : "r" (__mem), \ + "i" (vis_rd_d(rd)) \ + : "memory"); \ +} while (0) + +#define vis_stblk(rd, mem) \ +do { register void *__mem asm("g1"); \ + __mem = &(mem); \ + __asm__ __volatile__(".word 0xc1b85e00 | %1" \ + : \ + : "r" (__mem), \ + "i" (vis_rd_d(rd)) \ + : "memory"); \ +} while (0) + +#define vis_membar_storestore() \ + __asm__ __volatile__(".word 0x8143e008" : : : "memory") + +#define vis_membar_sync() \ + __asm__ __volatile__(".word 0x8143e040" : : : "memory") + +/* 16 and 32 bit partitioned addition and subtraction. The normal + * versions perform 4 16-bit or 2 32-bit additions or subtractions. + * The 's' versions perform 2 16-bit or 2 32-bit additions or + * subtractions. + */ + +#define vis_padd16(rs1,rs2,rd) vis_dd2d(0x50, rs1, rs2, rd) +#define vis_padd16s(rs1,rs2,rd) vis_ss2s(0x51, rs1, rs2, rd) +#define vis_padd32(rs1,rs2,rd) vis_dd2d(0x52, rs1, rs2, rd) +#define vis_padd32s(rs1,rs2,rd) vis_ss2s(0x53, rs1, rs2, rd) +#define vis_psub16(rs1,rs2,rd) vis_dd2d(0x54, rs1, rs2, rd) +#define vis_psub16s(rs1,rs2,rd) vis_ss2s(0x55, rs1, rs2, rd) +#define vis_psub32(rs1,rs2,rd) vis_dd2d(0x56, rs1, rs2, rd) +#define vis_psub32s(rs1,rs2,rd) vis_ss2s(0x57, rs1, rs2, rd) + +/* Pixel formatting instructions. */ + +#define vis_pack16(rs2,rd) vis_d2s( 0x3b, rs2, rd) +#define vis_pack32(rs1,rs2,rd) vis_dd2d(0x3a, rs1, rs2, rd) +#define vis_packfix(rs2,rd) vis_d2s( 0x3d, rs2, rd) +#define vis_expand(rs2,rd) vis_s2d( 0x4d, rs2, rd) +#define vis_pmerge(rs1,rs2,rd) vis_ss2d(0x4b, rs1, rs2, rd) + +/* Partitioned multiply instructions. */ + +#define vis_mul8x16(rs1,rs2,rd) vis_sd2d(0x31, rs1, rs2, rd) +#define vis_mul8x16au(rs1,rs2,rd) vis_ss2d(0x33, rs1, rs2, rd) +#define vis_mul8x16al(rs1,rs2,rd) vis_ss2d(0x35, rs1, rs2, rd) +#define vis_mul8sux16(rs1,rs2,rd) vis_dd2d(0x36, rs1, rs2, rd) +#define vis_mul8ulx16(rs1,rs2,rd) vis_dd2d(0x37, rs1, rs2, rd) +#define vis_muld8sux16(rs1,rs2,rd) vis_ss2d(0x38, rs1, rs2, rd) +#define vis_muld8ulx16(rs1,rs2,rd) vis_ss2d(0x39, rs1, rs2, rd) + +/* Alignment instructions. */ + +static inline void *vis_alignaddr(void *_ptr) +{ + register void *ptr asm("g1"); + + ptr = _ptr; + + __asm__ __volatile__(".word %2" + : "=&r" (ptr) + : "0" (ptr), + "i" (vis_opc_base | vis_opf(0x18) | + vis_rs1_s(1) | + vis_rs2_s(0) | + vis_rd_s(1))); + + return ptr; +} + +static inline void vis_alignaddr_g0(void *_ptr) +{ + register void *ptr asm("g1"); + + ptr = _ptr; + + __asm__ __volatile__(".word %2" + : "=&r" (ptr) + : "0" (ptr), + "i" (vis_opc_base | vis_opf(0x18) | + vis_rs1_s(1) | + vis_rs2_s(0) | + vis_rd_s(0))); +} + +static inline void *vis_alignaddrl(void *_ptr) +{ + register void *ptr asm("g1"); + + ptr = _ptr; + + __asm__ __volatile__(".word %2" + : "=&r" (ptr) + : "0" (ptr), + "i" (vis_opc_base | vis_opf(0x19) | + vis_rs1_s(1) | + vis_rs2_s(0) | + vis_rd_s(1))); + + return ptr; +} + +static inline void vis_alignaddrl_g0(void *_ptr) +{ + register void *ptr asm("g1"); + + ptr = _ptr; + + __asm__ __volatile__(".word %2" + : "=&r" (ptr) + : "0" (ptr), + "i" (vis_opc_base | vis_opf(0x19) | + vis_rs1_s(1) | + vis_rs2_s(0) | + vis_rd_s(0))); +} + +#define vis_faligndata(rs1,rs2,rd) vis_dd2d(0x48, rs1, rs2, rd) + +/* Logical operate instructions. */ + +#define vis_fzero(rd) vis_d( 0x60, rd) +#define vis_fzeros(rd) vis_s( 0x61, rd) +#define vis_fone(rd) vis_d( 0x7e, rd) +#define vis_fones(rd) vis_s( 0x7f, rd) +#define vis_src1(rs1,rd) vis_d12d(0x74, rs1, rd) +#define vis_src1s(rs1,rd) vis_s12s(0x75, rs1, rd) +#define vis_src2(rs2,rd) vis_d22d(0x78, rs2, rd) +#define vis_src2s(rs2,rd) vis_s22s(0x79, rs2, rd) +#define vis_not1(rs1,rd) vis_d12d(0x6a, rs1, rd) +#define vis_not1s(rs1,rd) vis_s12s(0x6b, rs1, rd) +#define vis_not2(rs2,rd) vis_d22d(0x66, rs2, rd) +#define vis_not2s(rs2,rd) vis_s22s(0x67, rs2, rd) +#define vis_or(rs1,rs2,rd) vis_dd2d(0x7c, rs1, rs2, rd) +#define vis_ors(rs1,rs2,rd) vis_ss2s(0x7d, rs1, rs2, rd) +#define vis_nor(rs1,rs2,rd) vis_dd2d(0x62, rs1, rs2, rd) +#define vis_nors(rs1,rs2,rd) vis_ss2s(0x63, rs1, rs2, rd) +#define vis_and(rs1,rs2,rd) vis_dd2d(0x70, rs1, rs2, rd) +#define vis_ands(rs1,rs2,rd) vis_ss2s(0x71, rs1, rs2, rd) +#define vis_nand(rs1,rs2,rd) vis_dd2d(0x6e, rs1, rs2, rd) +#define vis_nands(rs1,rs2,rd) vis_ss2s(0x6f, rs1, rs2, rd) +#define vis_xor(rs1,rs2,rd) vis_dd2d(0x6c, rs1, rs2, rd) +#define vis_xors(rs1,rs2,rd) vis_ss2s(0x6d, rs1, rs2, rd) +#define vis_xnor(rs1,rs2,rd) vis_dd2d(0x72, rs1, rs2, rd) +#define vis_xnors(rs1,rs2,rd) vis_ss2s(0x73, rs1, rs2, rd) +#define vis_ornot1(rs1,rs2,rd) vis_dd2d(0x7a, rs1, rs2, rd) +#define vis_ornot1s(rs1,rs2,rd) vis_ss2s(0x7b, rs1, rs2, rd) +#define vis_ornot2(rs1,rs2,rd) vis_dd2d(0x76, rs1, rs2, rd) +#define vis_ornot2s(rs1,rs2,rd) vis_ss2s(0x77, rs1, rs2, rd) +#define vis_andnot1(rs1,rs2,rd) vis_dd2d(0x68, rs1, rs2, rd) +#define vis_andnot1s(rs1,rs2,rd) vis_ss2s(0x69, rs1, rs2, rd) +#define vis_andnot2(rs1,rs2,rd) vis_dd2d(0x64, rs1, rs2, rd) +#define vis_andnot2s(rs1,rs2,rd) vis_ss2s(0x65, rs1, rs2, rd) + +/* Pixel component distance. */ + +#define vis_pdist(rs1,rs2,rd) vis_dd2d(0x3e, rs1, rs2, rd) diff --git a/src/video_dec/libmpeg2/vlc.h b/src/video_dec/libmpeg2/vlc.h new file mode 100644 index 000000000..65de9a840 --- /dev/null +++ b/src/video_dec/libmpeg2/vlc.h @@ -0,0 +1,428 @@ +/* + * vlc.h + * Copyright (C) 2000-2002 Michel Lespinasse <walken@zoy.org> + * Copyright (C) 1999-2000 Aaron Holtzman <aholtzma@ess.engr.uvic.ca> + * + * This file is part of mpeg2dec, a free MPEG-2 video stream decoder. + * See http://libmpeg2.sourceforge.net/ for updates. + * + * mpeg2dec is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * mpeg2dec is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +#define GETWORD(bit_buf,shift,bit_ptr) \ +do { \ + bit_buf |= ((bit_ptr[0] << 8) | bit_ptr[1]) << (shift); \ + bit_ptr += 2; \ +} while (0) + +static inline void bitstream_init (picture_t * picture, uint8_t * start) +{ + picture->bitstream_buf = + (start[0] << 24) | (start[1] << 16) | (start[2] << 8) | start[3]; + picture->bitstream_ptr = start + 4; + picture->bitstream_bits = -16; +} + +/* make sure that there are at least 16 valid bits in bit_buf */ +#define NEEDBITS(bit_buf,bits,bit_ptr) \ +do { \ + if (bits > 0) { \ + GETWORD (bit_buf, bits, bit_ptr); \ + bits -= 16; \ + } \ +} while (0) + +/* remove num valid bits from bit_buf */ +#define DUMPBITS(bit_buf,bits,num) \ +do { \ + bit_buf <<= (num); \ + bits += (num); \ +} while (0) + +/* take num bits from the high part of bit_buf and zero extend them */ +#define UBITS(bit_buf,num) (((uint32_t)(bit_buf)) >> (32 - (num))) + +/* take num bits from the high part of bit_buf and sign extend them */ +#define SBITS(bit_buf,num) (((int32_t)(bit_buf)) >> (32 - (num))) + +typedef struct { + uint8_t modes; + uint8_t len; +} MBtab; + +typedef struct { + uint8_t delta; + uint8_t len; +} MVtab; + +typedef struct { + int8_t dmv; + uint8_t len; +} DMVtab; + +typedef struct { + uint8_t cbp; + uint8_t len; +} CBPtab; + +typedef struct { + uint8_t size; + uint8_t len; +} DCtab; + +typedef struct { + uint8_t run; + uint8_t level; + uint8_t len; +} DCTtab; + +typedef struct { + uint8_t mba; + uint8_t len; +} MBAtab; + + +#define INTRA MACROBLOCK_INTRA +#define QUANT MACROBLOCK_QUANT + +static const MBtab MB_I [] = { + {INTRA|QUANT, 2}, {INTRA, 1} +}; + +#define MC MACROBLOCK_MOTION_FORWARD +#define CODED MACROBLOCK_PATTERN + +static const MBtab MB_P [] = { + {INTRA|QUANT, 6}, {CODED|QUANT, 5}, {MC|CODED|QUANT, 5}, {INTRA, 5}, + {MC, 3}, {MC, 3}, {MC, 3}, {MC, 3}, + {CODED, 2}, {CODED, 2}, {CODED, 2}, {CODED, 2}, + {CODED, 2}, {CODED, 2}, {CODED, 2}, {CODED, 2}, + {MC|CODED, 1}, {MC|CODED, 1}, {MC|CODED, 1}, {MC|CODED, 1}, + {MC|CODED, 1}, {MC|CODED, 1}, {MC|CODED, 1}, {MC|CODED, 1}, + {MC|CODED, 1}, {MC|CODED, 1}, {MC|CODED, 1}, {MC|CODED, 1}, + {MC|CODED, 1}, {MC|CODED, 1}, {MC|CODED, 1}, {MC|CODED, 1} +}; + +#define FWD MACROBLOCK_MOTION_FORWARD +#define BWD MACROBLOCK_MOTION_BACKWARD +#define INTER MACROBLOCK_MOTION_FORWARD|MACROBLOCK_MOTION_BACKWARD + +static const MBtab MB_B [] = { + {0, 0}, {INTRA|QUANT, 6}, + {BWD|CODED|QUANT, 6}, {FWD|CODED|QUANT, 6}, + {INTER|CODED|QUANT, 5}, {INTER|CODED|QUANT, 5}, + {INTRA, 5}, {INTRA, 5}, + {FWD, 4}, {FWD, 4}, {FWD, 4}, {FWD, 4}, + {FWD|CODED, 4}, {FWD|CODED, 4}, {FWD|CODED, 4}, {FWD|CODED, 4}, + {BWD, 3}, {BWD, 3}, {BWD, 3}, {BWD, 3}, + {BWD, 3}, {BWD, 3}, {BWD, 3}, {BWD, 3}, + {BWD|CODED, 3}, {BWD|CODED, 3}, {BWD|CODED, 3}, {BWD|CODED, 3}, + {BWD|CODED, 3}, {BWD|CODED, 3}, {BWD|CODED, 3}, {BWD|CODED, 3}, + {INTER, 2}, {INTER, 2}, {INTER, 2}, {INTER, 2}, + {INTER, 2}, {INTER, 2}, {INTER, 2}, {INTER, 2}, + {INTER, 2}, {INTER, 2}, {INTER, 2}, {INTER, 2}, + {INTER, 2}, {INTER, 2}, {INTER, 2}, {INTER, 2}, + {INTER|CODED, 2}, {INTER|CODED, 2}, {INTER|CODED, 2}, {INTER|CODED, 2}, + {INTER|CODED, 2}, {INTER|CODED, 2}, {INTER|CODED, 2}, {INTER|CODED, 2}, + {INTER|CODED, 2}, {INTER|CODED, 2}, {INTER|CODED, 2}, {INTER|CODED, 2}, + {INTER|CODED, 2}, {INTER|CODED, 2}, {INTER|CODED, 2}, {INTER|CODED, 2} +}; + +#undef INTRA +#undef QUANT +#undef MC +#undef CODED +#undef FWD +#undef BWD +#undef INTER + + +static const MVtab MV_4 [] = { + { 3, 6}, { 2, 4}, { 1, 3}, { 1, 3}, { 0, 2}, { 0, 2}, { 0, 2}, { 0, 2} +}; + +static const MVtab MV_10 [] = { + { 0,10}, { 0,10}, { 0,10}, { 0,10}, { 0,10}, { 0,10}, { 0,10}, { 0,10}, + { 0,10}, { 0,10}, { 0,10}, { 0,10}, {15,10}, {14,10}, {13,10}, {12,10}, + {11,10}, {10,10}, { 9, 9}, { 9, 9}, { 8, 9}, { 8, 9}, { 7, 9}, { 7, 9}, + { 6, 7}, { 6, 7}, { 6, 7}, { 6, 7}, { 6, 7}, { 6, 7}, { 6, 7}, { 6, 7}, + { 5, 7}, { 5, 7}, { 5, 7}, { 5, 7}, { 5, 7}, { 5, 7}, { 5, 7}, { 5, 7}, + { 4, 7}, { 4, 7}, { 4, 7}, { 4, 7}, { 4, 7}, { 4, 7}, { 4, 7}, { 4, 7} +}; + + +static const DMVtab DMV_2 [] = { + { 0, 1}, { 0, 1}, { 1, 2}, {-1, 2} +}; + + +static const CBPtab CBP_7 [] = { + {0x22, 7}, {0x12, 7}, {0x0a, 7}, {0x06, 7}, + {0x21, 7}, {0x11, 7}, {0x09, 7}, {0x05, 7}, + {0x3f, 6}, {0x3f, 6}, {0x03, 6}, {0x03, 6}, + {0x24, 6}, {0x24, 6}, {0x18, 6}, {0x18, 6}, + {0x3e, 5}, {0x3e, 5}, {0x3e, 5}, {0x3e, 5}, + {0x02, 5}, {0x02, 5}, {0x02, 5}, {0x02, 5}, + {0x3d, 5}, {0x3d, 5}, {0x3d, 5}, {0x3d, 5}, + {0x01, 5}, {0x01, 5}, {0x01, 5}, {0x01, 5}, + {0x38, 5}, {0x38, 5}, {0x38, 5}, {0x38, 5}, + {0x34, 5}, {0x34, 5}, {0x34, 5}, {0x34, 5}, + {0x2c, 5}, {0x2c, 5}, {0x2c, 5}, {0x2c, 5}, + {0x1c, 5}, {0x1c, 5}, {0x1c, 5}, {0x1c, 5}, + {0x28, 5}, {0x28, 5}, {0x28, 5}, {0x28, 5}, + {0x14, 5}, {0x14, 5}, {0x14, 5}, {0x14, 5}, + {0x30, 5}, {0x30, 5}, {0x30, 5}, {0x30, 5}, + {0x0c, 5}, {0x0c, 5}, {0x0c, 5}, {0x0c, 5}, + {0x20, 4}, {0x20, 4}, {0x20, 4}, {0x20, 4}, + {0x20, 4}, {0x20, 4}, {0x20, 4}, {0x20, 4}, + {0x10, 4}, {0x10, 4}, {0x10, 4}, {0x10, 4}, + {0x10, 4}, {0x10, 4}, {0x10, 4}, {0x10, 4}, + {0x08, 4}, {0x08, 4}, {0x08, 4}, {0x08, 4}, + {0x08, 4}, {0x08, 4}, {0x08, 4}, {0x08, 4}, + {0x04, 4}, {0x04, 4}, {0x04, 4}, {0x04, 4}, + {0x04, 4}, {0x04, 4}, {0x04, 4}, {0x04, 4}, + {0x3c, 3}, {0x3c, 3}, {0x3c, 3}, {0x3c, 3}, + {0x3c, 3}, {0x3c, 3}, {0x3c, 3}, {0x3c, 3}, + {0x3c, 3}, {0x3c, 3}, {0x3c, 3}, {0x3c, 3}, + {0x3c, 3}, {0x3c, 3}, {0x3c, 3}, {0x3c, 3} +}; + +static const CBPtab CBP_9 [] = { + {0, 0}, {0x00, 9}, {0x27, 9}, {0x1b, 9}, + {0x3b, 9}, {0x37, 9}, {0x2f, 9}, {0x1f, 9}, + {0x3a, 8}, {0x3a, 8}, {0x36, 8}, {0x36, 8}, + {0x2e, 8}, {0x2e, 8}, {0x1e, 8}, {0x1e, 8}, + {0x39, 8}, {0x39, 8}, {0x35, 8}, {0x35, 8}, + {0x2d, 8}, {0x2d, 8}, {0x1d, 8}, {0x1d, 8}, + {0x26, 8}, {0x26, 8}, {0x1a, 8}, {0x1a, 8}, + {0x25, 8}, {0x25, 8}, {0x19, 8}, {0x19, 8}, + {0x2b, 8}, {0x2b, 8}, {0x17, 8}, {0x17, 8}, + {0x33, 8}, {0x33, 8}, {0x0f, 8}, {0x0f, 8}, + {0x2a, 8}, {0x2a, 8}, {0x16, 8}, {0x16, 8}, + {0x32, 8}, {0x32, 8}, {0x0e, 8}, {0x0e, 8}, + {0x29, 8}, {0x29, 8}, {0x15, 8}, {0x15, 8}, + {0x31, 8}, {0x31, 8}, {0x0d, 8}, {0x0d, 8}, + {0x23, 8}, {0x23, 8}, {0x13, 8}, {0x13, 8}, + {0x0b, 8}, {0x0b, 8}, {0x07, 8}, {0x07, 8} +}; + + +static const DCtab DC_lum_5 [] = { + {1, 2}, {1, 2}, {1, 2}, {1, 2}, {1, 2}, {1, 2}, {1, 2}, {1, 2}, + {2, 2}, {2, 2}, {2, 2}, {2, 2}, {2, 2}, {2, 2}, {2, 2}, {2, 2}, + {0, 3}, {0, 3}, {0, 3}, {0, 3}, {3, 3}, {3, 3}, {3, 3}, {3, 3}, + {4, 3}, {4, 3}, {4, 3}, {4, 3}, {5, 4}, {5, 4}, {6, 5} +}; + +static const DCtab DC_chrom_5 [] = { + {0, 2}, {0, 2}, {0, 2}, {0, 2}, {0, 2}, {0, 2}, {0, 2}, {0, 2}, + {1, 2}, {1, 2}, {1, 2}, {1, 2}, {1, 2}, {1, 2}, {1, 2}, {1, 2}, + {2, 2}, {2, 2}, {2, 2}, {2, 2}, {2, 2}, {2, 2}, {2, 2}, {2, 2}, + {3, 3}, {3, 3}, {3, 3}, {3, 3}, {4, 4}, {4, 4}, {5, 5} +}; + +static const DCtab DC_long [] = { + {6, 5}, {6, 5}, {6, 5}, {6, 5}, {6, 5}, {6, 5}, { 6, 5}, { 6, 5}, + {6, 5}, {6, 5}, {6, 5}, {6, 5}, {6, 5}, {6, 5}, { 6, 5}, { 6, 5}, + {7, 6}, {7, 6}, {7, 6}, {7, 6}, {7, 6}, {7, 6}, { 7, 6}, { 7, 6}, + {8, 7}, {8, 7}, {8, 7}, {8, 7}, {9, 8}, {9, 8}, {10, 9}, {11, 9} +}; + + +static const DCTtab DCT_16 [] = { + {129, 0, 0}, {129, 0, 0}, {129, 0, 0}, {129, 0, 0}, + {129, 0, 0}, {129, 0, 0}, {129, 0, 0}, {129, 0, 0}, + {129, 0, 0}, {129, 0, 0}, {129, 0, 0}, {129, 0, 0}, + {129, 0, 0}, {129, 0, 0}, {129, 0, 0}, {129, 0, 0}, + { 2,18, 0}, { 2,17, 0}, { 2,16, 0}, { 2,15, 0}, + { 7, 3, 0}, { 17, 2, 0}, { 16, 2, 0}, { 15, 2, 0}, + { 14, 2, 0}, { 13, 2, 0}, { 12, 2, 0}, { 32, 1, 0}, + { 31, 1, 0}, { 30, 1, 0}, { 29, 1, 0}, { 28, 1, 0} +}; + +static const DCTtab DCT_15 [] = { + { 1,40,15}, { 1,39,15}, { 1,38,15}, { 1,37,15}, + { 1,36,15}, { 1,35,15}, { 1,34,15}, { 1,33,15}, + { 1,32,15}, { 2,14,15}, { 2,13,15}, { 2,12,15}, + { 2,11,15}, { 2,10,15}, { 2, 9,15}, { 2, 8,15}, + { 1,31,14}, { 1,31,14}, { 1,30,14}, { 1,30,14}, + { 1,29,14}, { 1,29,14}, { 1,28,14}, { 1,28,14}, + { 1,27,14}, { 1,27,14}, { 1,26,14}, { 1,26,14}, + { 1,25,14}, { 1,25,14}, { 1,24,14}, { 1,24,14}, + { 1,23,14}, { 1,23,14}, { 1,22,14}, { 1,22,14}, + { 1,21,14}, { 1,21,14}, { 1,20,14}, { 1,20,14}, + { 1,19,14}, { 1,19,14}, { 1,18,14}, { 1,18,14}, + { 1,17,14}, { 1,17,14}, { 1,16,14}, { 1,16,14} +}; + +static const DCTtab DCT_13 [] = { + { 11, 2,13}, { 10, 2,13}, { 6, 3,13}, { 4, 4,13}, + { 3, 5,13}, { 2, 7,13}, { 2, 6,13}, { 1,15,13}, + { 1,14,13}, { 1,13,13}, { 1,12,13}, { 27, 1,13}, + { 26, 1,13}, { 25, 1,13}, { 24, 1,13}, { 23, 1,13}, + { 1,11,12}, { 1,11,12}, { 9, 2,12}, { 9, 2,12}, + { 5, 3,12}, { 5, 3,12}, { 1,10,12}, { 1,10,12}, + { 3, 4,12}, { 3, 4,12}, { 8, 2,12}, { 8, 2,12}, + { 22, 1,12}, { 22, 1,12}, { 21, 1,12}, { 21, 1,12}, + { 1, 9,12}, { 1, 9,12}, { 20, 1,12}, { 20, 1,12}, + { 19, 1,12}, { 19, 1,12}, { 2, 5,12}, { 2, 5,12}, + { 4, 3,12}, { 4, 3,12}, { 1, 8,12}, { 1, 8,12}, + { 7, 2,12}, { 7, 2,12}, { 18, 1,12}, { 18, 1,12} +}; + +static const DCTtab DCT_B14_10 [] = { + { 17, 1,10}, { 6, 2,10}, { 1, 7,10}, { 3, 3,10}, + { 2, 4,10}, { 16, 1,10}, { 15, 1,10}, { 5, 2,10} +}; + +static const DCTtab DCT_B14_8 [] = { + { 65, 0, 6}, { 65, 0, 6}, { 65, 0, 6}, { 65, 0, 6}, + { 3, 2, 7}, { 3, 2, 7}, { 10, 1, 7}, { 10, 1, 7}, + { 1, 4, 7}, { 1, 4, 7}, { 9, 1, 7}, { 9, 1, 7}, + { 8, 1, 6}, { 8, 1, 6}, { 8, 1, 6}, { 8, 1, 6}, + { 7, 1, 6}, { 7, 1, 6}, { 7, 1, 6}, { 7, 1, 6}, + { 2, 2, 6}, { 2, 2, 6}, { 2, 2, 6}, { 2, 2, 6}, + { 6, 1, 6}, { 6, 1, 6}, { 6, 1, 6}, { 6, 1, 6}, + { 14, 1, 8}, { 1, 6, 8}, { 13, 1, 8}, { 12, 1, 8}, + { 4, 2, 8}, { 2, 3, 8}, { 1, 5, 8}, { 11, 1, 8} +}; + +static const DCTtab DCT_B14AC_5 [] = { + { 1, 3, 5}, { 5, 1, 5}, { 4, 1, 5}, + { 1, 2, 4}, { 1, 2, 4}, { 3, 1, 4}, { 3, 1, 4}, + { 2, 1, 3}, { 2, 1, 3}, { 2, 1, 3}, { 2, 1, 3}, + {129, 0, 2}, {129, 0, 2}, {129, 0, 2}, {129, 0, 2}, + {129, 0, 2}, {129, 0, 2}, {129, 0, 2}, {129, 0, 2}, + { 1, 1, 2}, { 1, 1, 2}, { 1, 1, 2}, { 1, 1, 2}, + { 1, 1, 2}, { 1, 1, 2}, { 1, 1, 2}, { 1, 1, 2} +}; + +static const DCTtab DCT_B14DC_5 [] = { + { 1, 3, 5}, { 5, 1, 5}, { 4, 1, 5}, + { 1, 2, 4}, { 1, 2, 4}, { 3, 1, 4}, { 3, 1, 4}, + { 2, 1, 3}, { 2, 1, 3}, { 2, 1, 3}, { 2, 1, 3}, + { 1, 1, 1}, { 1, 1, 1}, { 1, 1, 1}, { 1, 1, 1}, + { 1, 1, 1}, { 1, 1, 1}, { 1, 1, 1}, { 1, 1, 1}, + { 1, 1, 1}, { 1, 1, 1}, { 1, 1, 1}, { 1, 1, 1}, + { 1, 1, 1}, { 1, 1, 1}, { 1, 1, 1}, { 1, 1, 1} +}; + +static const DCTtab DCT_B15_10 [] = { + { 6, 2, 9}, { 6, 2, 9}, { 15, 1, 9}, { 15, 1, 9}, + { 3, 4,10}, { 17, 1,10}, { 16, 1, 9}, { 16, 1, 9} +}; + +static const DCTtab DCT_B15_8 [] = { + { 65, 0, 6}, { 65, 0, 6}, { 65, 0, 6}, { 65, 0, 6}, + { 8, 1, 7}, { 8, 1, 7}, { 9, 1, 7}, { 9, 1, 7}, + { 7, 1, 7}, { 7, 1, 7}, { 3, 2, 7}, { 3, 2, 7}, + { 1, 7, 6}, { 1, 7, 6}, { 1, 7, 6}, { 1, 7, 6}, + { 1, 6, 6}, { 1, 6, 6}, { 1, 6, 6}, { 1, 6, 6}, + { 5, 1, 6}, { 5, 1, 6}, { 5, 1, 6}, { 5, 1, 6}, + { 6, 1, 6}, { 6, 1, 6}, { 6, 1, 6}, { 6, 1, 6}, + { 2, 5, 8}, { 12, 1, 8}, { 1,11, 8}, { 1,10, 8}, + { 14, 1, 8}, { 13, 1, 8}, { 4, 2, 8}, { 2, 4, 8}, + { 3, 1, 5}, { 3, 1, 5}, { 3, 1, 5}, { 3, 1, 5}, + { 3, 1, 5}, { 3, 1, 5}, { 3, 1, 5}, { 3, 1, 5}, + { 2, 2, 5}, { 2, 2, 5}, { 2, 2, 5}, { 2, 2, 5}, + { 2, 2, 5}, { 2, 2, 5}, { 2, 2, 5}, { 2, 2, 5}, + { 4, 1, 5}, { 4, 1, 5}, { 4, 1, 5}, { 4, 1, 5}, + { 4, 1, 5}, { 4, 1, 5}, { 4, 1, 5}, { 4, 1, 5}, + { 2, 1, 3}, { 2, 1, 3}, { 2, 1, 3}, { 2, 1, 3}, + { 2, 1, 3}, { 2, 1, 3}, { 2, 1, 3}, { 2, 1, 3}, + { 2, 1, 3}, { 2, 1, 3}, { 2, 1, 3}, { 2, 1, 3}, + { 2, 1, 3}, { 2, 1, 3}, { 2, 1, 3}, { 2, 1, 3}, + { 2, 1, 3}, { 2, 1, 3}, { 2, 1, 3}, { 2, 1, 3}, + { 2, 1, 3}, { 2, 1, 3}, { 2, 1, 3}, { 2, 1, 3}, + { 2, 1, 3}, { 2, 1, 3}, { 2, 1, 3}, { 2, 1, 3}, + { 2, 1, 3}, { 2, 1, 3}, { 2, 1, 3}, { 2, 1, 3}, + {129, 0, 4}, {129, 0, 4}, {129, 0, 4}, {129, 0, 4}, + {129, 0, 4}, {129, 0, 4}, {129, 0, 4}, {129, 0, 4}, + {129, 0, 4}, {129, 0, 4}, {129, 0, 4}, {129, 0, 4}, + {129, 0, 4}, {129, 0, 4}, {129, 0, 4}, {129, 0, 4}, + { 1, 3, 4}, { 1, 3, 4}, { 1, 3, 4}, { 1, 3, 4}, + { 1, 3, 4}, { 1, 3, 4}, { 1, 3, 4}, { 1, 3, 4}, + { 1, 3, 4}, { 1, 3, 4}, { 1, 3, 4}, { 1, 3, 4}, + { 1, 3, 4}, { 1, 3, 4}, { 1, 3, 4}, { 1, 3, 4}, + { 1, 1, 2}, { 1, 1, 2}, { 1, 1, 2}, { 1, 1, 2}, + { 1, 1, 2}, { 1, 1, 2}, { 1, 1, 2}, { 1, 1, 2}, + { 1, 1, 2}, { 1, 1, 2}, { 1, 1, 2}, { 1, 1, 2}, + { 1, 1, 2}, { 1, 1, 2}, { 1, 1, 2}, { 1, 1, 2}, + { 1, 1, 2}, { 1, 1, 2}, { 1, 1, 2}, { 1, 1, 2}, + { 1, 1, 2}, { 1, 1, 2}, { 1, 1, 2}, { 1, 1, 2}, + { 1, 1, 2}, { 1, 1, 2}, { 1, 1, 2}, { 1, 1, 2}, + { 1, 1, 2}, { 1, 1, 2}, { 1, 1, 2}, { 1, 1, 2}, + { 1, 1, 2}, { 1, 1, 2}, { 1, 1, 2}, { 1, 1, 2}, + { 1, 1, 2}, { 1, 1, 2}, { 1, 1, 2}, { 1, 1, 2}, + { 1, 1, 2}, { 1, 1, 2}, { 1, 1, 2}, { 1, 1, 2}, + { 1, 1, 2}, { 1, 1, 2}, { 1, 1, 2}, { 1, 1, 2}, + { 1, 1, 2}, { 1, 1, 2}, { 1, 1, 2}, { 1, 1, 2}, + { 1, 1, 2}, { 1, 1, 2}, { 1, 1, 2}, { 1, 1, 2}, + { 1, 1, 2}, { 1, 1, 2}, { 1, 1, 2}, { 1, 1, 2}, + { 1, 1, 2}, { 1, 1, 2}, { 1, 1, 2}, { 1, 1, 2}, + { 1, 2, 3}, { 1, 2, 3}, { 1, 2, 3}, { 1, 2, 3}, + { 1, 2, 3}, { 1, 2, 3}, { 1, 2, 3}, { 1, 2, 3}, + { 1, 2, 3}, { 1, 2, 3}, { 1, 2, 3}, { 1, 2, 3}, + { 1, 2, 3}, { 1, 2, 3}, { 1, 2, 3}, { 1, 2, 3}, + { 1, 2, 3}, { 1, 2, 3}, { 1, 2, 3}, { 1, 2, 3}, + { 1, 2, 3}, { 1, 2, 3}, { 1, 2, 3}, { 1, 2, 3}, + { 1, 2, 3}, { 1, 2, 3}, { 1, 2, 3}, { 1, 2, 3}, + { 1, 2, 3}, { 1, 2, 3}, { 1, 2, 3}, { 1, 2, 3}, + { 1, 4, 5}, { 1, 4, 5}, { 1, 4, 5}, { 1, 4, 5}, + { 1, 4, 5}, { 1, 4, 5}, { 1, 4, 5}, { 1, 4, 5}, + { 1, 5, 5}, { 1, 5, 5}, { 1, 5, 5}, { 1, 5, 5}, + { 1, 5, 5}, { 1, 5, 5}, { 1, 5, 5}, { 1, 5, 5}, + { 10, 1, 7}, { 10, 1, 7}, { 2, 3, 7}, { 2, 3, 7}, + { 11, 1, 7}, { 11, 1, 7}, { 1, 8, 7}, { 1, 8, 7}, + { 1, 9, 7}, { 1, 9, 7}, { 1,12, 8}, { 1,13, 8}, + { 3, 3, 8}, { 5, 2, 8}, { 1,14, 8}, { 1,15, 8} +}; + + +static const MBAtab MBA_5 [] = { + {6, 5}, {5, 5}, {4, 4}, {4, 4}, {3, 4}, {3, 4}, + {2, 3}, {2, 3}, {2, 3}, {2, 3}, {1, 3}, {1, 3}, {1, 3}, {1, 3}, + {0, 1}, {0, 1}, {0, 1}, {0, 1}, {0, 1}, {0, 1}, {0, 1}, {0, 1}, + {0, 1}, {0, 1}, {0, 1}, {0, 1}, {0, 1}, {0, 1}, {0, 1}, {0, 1} +}; + +static const MBAtab MBA_11 [] = { + {32, 11}, {31, 11}, {30, 11}, {29, 11}, + {28, 11}, {27, 11}, {26, 11}, {25, 11}, + {24, 11}, {23, 11}, {22, 11}, {21, 11}, + {20, 10}, {20, 10}, {19, 10}, {19, 10}, + {18, 10}, {18, 10}, {17, 10}, {17, 10}, + {16, 10}, {16, 10}, {15, 10}, {15, 10}, + {14, 8}, {14, 8}, {14, 8}, {14, 8}, + {14, 8}, {14, 8}, {14, 8}, {14, 8}, + {13, 8}, {13, 8}, {13, 8}, {13, 8}, + {13, 8}, {13, 8}, {13, 8}, {13, 8}, + {12, 8}, {12, 8}, {12, 8}, {12, 8}, + {12, 8}, {12, 8}, {12, 8}, {12, 8}, + {11, 8}, {11, 8}, {11, 8}, {11, 8}, + {11, 8}, {11, 8}, {11, 8}, {11, 8}, + {10, 8}, {10, 8}, {10, 8}, {10, 8}, + {10, 8}, {10, 8}, {10, 8}, {10, 8}, + { 9, 8}, { 9, 8}, { 9, 8}, { 9, 8}, + { 9, 8}, { 9, 8}, { 9, 8}, { 9, 8}, + { 8, 7}, { 8, 7}, { 8, 7}, { 8, 7}, + { 8, 7}, { 8, 7}, { 8, 7}, { 8, 7}, + { 8, 7}, { 8, 7}, { 8, 7}, { 8, 7}, + { 8, 7}, { 8, 7}, { 8, 7}, { 8, 7}, + { 7, 7}, { 7, 7}, { 7, 7}, { 7, 7}, + { 7, 7}, { 7, 7}, { 7, 7}, { 7, 7}, + { 7, 7}, { 7, 7}, { 7, 7}, { 7, 7}, + { 7, 7}, { 7, 7}, { 7, 7}, { 7, 7} +}; diff --git a/src/video_dec/libmpeg2/xine_mpeg2_decoder.c b/src/video_dec/libmpeg2/xine_mpeg2_decoder.c new file mode 100644 index 000000000..c4c7fac2d --- /dev/null +++ b/src/video_dec/libmpeg2/xine_mpeg2_decoder.c @@ -0,0 +1,173 @@ +/* + * Copyright (C) 2000-2003 the xine project + * + * This file is part of xine, a free video player. + * + * xine is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * xine is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110, USA + * + * stuff needed to turn libmpeg2 into a xine decoder plugin + */ + +#ifdef HAVE_CONFIG_H +#include "config.h" +#endif + + +#include <stdlib.h> +#include <string.h> +#include <sys/types.h> +#include <sys/stat.h> +#include <fcntl.h> +#include <unistd.h> + +#define LOG_MODULE "mpeg2_decoder" +#define LOG_VERBOSE +/* +#define LOG +*/ + +#include <xine/xine_internal.h> +#include <xine/video_out.h> +#include "mpeg2.h" +#include "mpeg2_internal.h" +#include <xine/buffer.h> + +typedef struct { + video_decoder_class_t decoder_class; +} mpeg2_class_t; + + +typedef struct mpeg2dec_decoder_s { + video_decoder_t video_decoder; + mpeg2dec_t mpeg2; + mpeg2_class_t *class; + xine_stream_t *stream; +} mpeg2dec_decoder_t; + +static void mpeg2dec_decode_data (video_decoder_t *this_gen, buf_element_t *buf) { + mpeg2dec_decoder_t *this = (mpeg2dec_decoder_t *) this_gen; + + lprintf ("decode_data, flags=0x%08x ...\n", buf->decoder_flags); + + /* handle aspect hints from xine-dvdnav */ + if (buf->decoder_flags & BUF_FLAG_SPECIAL) { + if (buf->decoder_info[1] == BUF_SPECIAL_ASPECT) { + this->mpeg2.force_aspect = buf->decoder_info[2]; + if (buf->decoder_info[3] == 0x1 && buf->decoder_info[2] == 3) + /* letterboxing is denied, we have to do pan&scan */ + this->mpeg2.force_pan_scan = 1; + else + this->mpeg2.force_pan_scan = 0; + } + return; + } + + if (buf->decoder_flags & BUF_FLAG_PREVIEW) { + mpeg2_find_sequence_header (&this->mpeg2, buf->content, buf->content + buf->size); + } else { + + mpeg2_decode_data (&this->mpeg2, buf->content, buf->content + buf->size, + buf->pts); + } + + lprintf ("decode_data...done\n"); +} + +static void mpeg2dec_flush (video_decoder_t *this_gen) { + mpeg2dec_decoder_t *this = (mpeg2dec_decoder_t *) this_gen; + + lprintf ("flush\n"); + + mpeg2_flush (&this->mpeg2); +} + +static void mpeg2dec_reset (video_decoder_t *this_gen) { + mpeg2dec_decoder_t *this = (mpeg2dec_decoder_t *) this_gen; + + mpeg2_reset (&this->mpeg2); +} + +static void mpeg2dec_discontinuity (video_decoder_t *this_gen) { + mpeg2dec_decoder_t *this = (mpeg2dec_decoder_t *) this_gen; + + mpeg2_discontinuity (&this->mpeg2); +} + +static void mpeg2dec_dispose (video_decoder_t *this_gen) { + + mpeg2dec_decoder_t *this = (mpeg2dec_decoder_t *) this_gen; + + lprintf ("close\n"); + + mpeg2_close (&this->mpeg2); + + this->stream->video_out->close(this->stream->video_out, this->stream); + + free (this); +} + +static video_decoder_t *open_plugin (video_decoder_class_t *class_gen, xine_stream_t *stream) { + mpeg2dec_decoder_t *this ; + + this = (mpeg2dec_decoder_t *) calloc(1, sizeof(mpeg2dec_decoder_t)); + + this->video_decoder.decode_data = mpeg2dec_decode_data; + this->video_decoder.flush = mpeg2dec_flush; + this->video_decoder.reset = mpeg2dec_reset; + this->video_decoder.discontinuity = mpeg2dec_discontinuity; + this->video_decoder.dispose = mpeg2dec_dispose; + this->stream = stream; + this->class = (mpeg2_class_t *) class_gen; + this->mpeg2.stream = stream; + + mpeg2_init (&this->mpeg2, stream->video_out); + (stream->video_out->open) (stream->video_out, stream); + this->mpeg2.force_aspect = this->mpeg2.force_pan_scan = 0; + + return &this->video_decoder; +} + +/* + * mpeg2 plugin class + */ +static void *init_plugin (xine_t *xine, void *data) { + + mpeg2_class_t *this; + + this = (mpeg2_class_t *) calloc(1, sizeof(mpeg2_class_t)); + + this->decoder_class.open_plugin = open_plugin; + this->decoder_class.identifier = "mpeg2dec"; + this->decoder_class.description = N_("mpeg2 based video decoder plugin"); + this->decoder_class.dispose = default_video_decoder_class_dispose; + + return this; +} +/* + * exported plugin catalog entry + */ + +static const uint32_t supported_types[] = { BUF_VIDEO_MPEG, 0 }; + +static const decoder_info_t dec_info_mpeg2 = { + supported_types, /* supported types */ + 7 /* priority */ +}; + +const plugin_info_t xine_plugin_info[] EXPORTED = { + /* type, API, "name", version, special_info, init_function */ + { PLUGIN_VIDEO_DECODER, 19, "mpeg2", XINE_VERSION_CODE, &dec_info_mpeg2, init_plugin }, + { PLUGIN_NONE, 0, "", 0, NULL, NULL } +}; diff --git a/src/video_dec/libmpeg2/xvmc.h b/src/video_dec/libmpeg2/xvmc.h new file mode 100644 index 000000000..5d61bcf83 --- /dev/null +++ b/src/video_dec/libmpeg2/xvmc.h @@ -0,0 +1,32 @@ +/* + * mpeg2_internal.h + * Copyright (C) 2000-2002 Michel Lespinasse <walken@zoy.org> + * Copyright (C) 1999-2000 Aaron Holtzman <aholtzma@ess.engr.uvic.ca> + * + * This file is part of mpeg2dec, a free MPEG-2 video stream decoder. + * See http://libmpeg2.sourceforge.net/ for updates. + * + * mpeg2dec is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * mpeg2dec is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +#ifndef _XVMC_H +#include "libmpeg2_accel.h" + +/* slice_xvmc.c */ + +void mpeg2_xvmc_slice (mpeg2dec_accel_t *accel, picture_t * picture, int code, uint8_t * buffer); +void xvmc_setup_scan_ptable( void ); + +#endif diff --git a/src/video_dec/libmpeg2/xvmc_vld.h b/src/video_dec/libmpeg2/xvmc_vld.h new file mode 100644 index 000000000..561d1789d --- /dev/null +++ b/src/video_dec/libmpeg2/xvmc_vld.h @@ -0,0 +1,32 @@ +/* + * Copyright (c) 2004 The Unichrome project. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify it under + * the terms of the GNU General Public License as published by the Free Software + * Foundation; either version 2, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTIES OR REPRESENTATIONS; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * See the GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License along with + * this program; if not, write to the Free Software Foundation, Inc., + * 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. + * + * + */ + +#ifndef _XVMC_VLD_H +#define _XVMC_VLD_H + +#include "accel_xvmc.h" +#include "xvmc.h" + +extern void mpeg2_xxmc_slice( mpeg2dec_accel_t *accel, picture_t *picture, + int code, uint8_t *buffer, uint32_t chunk_size, + uint8_t *chunk_buffer); +extern void mpeg2_xxmc_vld_frame_complete(mpeg2dec_accel_t *accel, picture_t *picture, int code); + + +#endif diff --git a/src/video_dec/libmpeg2new/Makefile.am b/src/video_dec/libmpeg2new/Makefile.am new file mode 100644 index 000000000..2ff66d089 --- /dev/null +++ b/src/video_dec/libmpeg2new/Makefile.am @@ -0,0 +1,20 @@ +include $(top_srcdir)/misc/Makefile.quiet +include $(top_builddir)/misc/Makefile.plugins +include $(top_srcdir)/misc/Makefile.common + +AM_CFLAGS = $(DEFAULT_OCFLAGS) $(VISIBILITY_FLAG) +AM_LDFLAGS = $(xineplug_ldflags) + +SUBDIRS = include libmpeg2 + +if ENABLE_MPEG2NEW +mpeg2new_module = xineplug_decode_mpeg2new.la +endif + +xineplug_LTLIBRARIES = $(mpeg2new_module) + +xineplug_decode_mpeg2new_la_SOURCES = \ + xine_mpeg2new_decoder.c + +xineplug_decode_mpeg2new_la_LIBADD = $(XINE_LIB) libmpeg2/libmpeg2.la +xineplug_decode_mpeg2new_la_CFLAGS = $(AM_CFLAGS) $(MLIB_CFLAGS) diff --git a/src/video_dec/libmpeg2new/include/Makefile.am b/src/video_dec/libmpeg2new/include/Makefile.am new file mode 100644 index 000000000..d9c7a4df6 --- /dev/null +++ b/src/video_dec/libmpeg2new/include/Makefile.am @@ -0,0 +1,5 @@ +include $(top_srcdir)/misc/Makefile.quiet +include $(top_builddir)/misc/Makefile.plugins +include $(top_srcdir)/misc/Makefile.common + +EXTRA_DIST = video_out.h mmx.h alpha_asm.h vis.h attributes.h tendra.h mpeg2.h mpeg2convert.h diff --git a/src/video_dec/libmpeg2new/include/alpha_asm.h b/src/video_dec/libmpeg2new/include/alpha_asm.h new file mode 100644 index 000000000..bf1081f24 --- /dev/null +++ b/src/video_dec/libmpeg2new/include/alpha_asm.h @@ -0,0 +1,181 @@ +/* + * Alpha assembly macros + * Copyright (c) 2002-2003 Falk Hueffner <falk@debian.org> + * + * This file is part of mpeg2dec, a free MPEG-2 video stream decoder. + * See http://libmpeg2.sourceforge.net/ for updates. + * + * mpeg2dec is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * mpeg2dec is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +#ifndef ALPHA_ASM_H +#define ALPHA_ASM_H + +#include <inttypes.h> + +#if defined __GNUC__ +# define GNUC_PREREQ(maj, min) \ + ((__GNUC__ << 16) + __GNUC_MINOR__ >= ((maj) << 16) + (min)) +#else +# define GNUC_PREREQ(maj, min) 0 +#endif + +#define AMASK_BWX (1 << 0) +#define AMASK_FIX (1 << 1) +#define AMASK_CIX (1 << 2) +#define AMASK_MVI (1 << 8) + +#ifdef __alpha_bwx__ +# define HAVE_BWX() 1 +#else +# define HAVE_BWX() (amask(AMASK_BWX) == 0) +#endif +#ifdef __alpha_fix__ +# define HAVE_FIX() 1 +#else +# define HAVE_FIX() (amask(AMASK_FIX) == 0) +#endif +#ifdef __alpha_max__ +# define HAVE_MVI() 1 +#else +# define HAVE_MVI() (amask(AMASK_MVI) == 0) +#endif +#ifdef __alpha_cix__ +# define HAVE_CIX() 1 +#else +# define HAVE_CIX() (amask(AMASK_CIX) == 0) +#endif + +inline static uint64_t BYTE_VEC(uint64_t x) +{ + x |= x << 8; + x |= x << 16; + x |= x << 32; + return x; +} +inline static uint64_t WORD_VEC(uint64_t x) +{ + x |= x << 16; + x |= x << 32; + return x; +} + +#define ldq(p) (*(const uint64_t *) (p)) +#define ldl(p) (*(const int32_t *) (p)) +#define stl(l, p) do { *(uint32_t *) (p) = (l); } while (0) +#define stq(l, p) do { *(uint64_t *) (p) = (l); } while (0) +#define sextw(x) ((int16_t) (x)) + +#ifdef __GNUC__ +struct unaligned_long { uint64_t l; } __attribute__((packed)); +#define ldq_u(p) (*(const uint64_t *) (((uint64_t) (p)) & ~7ul)) +#define uldq(a) (((const struct unaligned_long *) (a))->l) + +#if GNUC_PREREQ(3,3) +#define prefetch(p) __builtin_prefetch((p), 0, 1) +#define prefetch_en(p) __builtin_prefetch((p), 0, 0) +#define prefetch_m(p) __builtin_prefetch((p), 1, 1) +#define prefetch_men(p) __builtin_prefetch((p), 1, 0) +#define cmpbge __builtin_alpha_cmpbge +/* Avoid warnings. */ +#define extql(a, b) __builtin_alpha_extql(a, (uint64_t) (b)) +#define extwl(a, b) __builtin_alpha_extwl(a, (uint64_t) (b)) +#define extqh(a, b) __builtin_alpha_extqh(a, (uint64_t) (b)) +#define zap __builtin_alpha_zap +#define zapnot __builtin_alpha_zapnot +#define amask __builtin_alpha_amask +#define implver __builtin_alpha_implver +#define rpcc __builtin_alpha_rpcc +#else +#define prefetch(p) asm volatile("ldl $31,%0" : : "m"(*(const char *) (p)) : "memory") +#define prefetch_en(p) asm volatile("ldq $31,%0" : : "m"(*(const char *) (p)) : "memory") +#define prefetch_m(p) asm volatile("lds $f31,%0" : : "m"(*(const char *) (p)) : "memory") +#define prefetch_men(p) asm volatile("ldt $f31,%0" : : "m"(*(const char *) (p)) : "memory") +#define cmpbge(a, b) ({ uint64_t __r; asm ("cmpbge %r1,%2,%0" : "=r" (__r) : "rJ" (a), "rI" (b)); __r; }) +#define extql(a, b) ({ uint64_t __r; asm ("extql %r1,%2,%0" : "=r" (__r) : "rJ" (a), "rI" (b)); __r; }) +#define extwl(a, b) ({ uint64_t __r; asm ("extwl %r1,%2,%0" : "=r" (__r) : "rJ" (a), "rI" (b)); __r; }) +#define extqh(a, b) ({ uint64_t __r; asm ("extqh %r1,%2,%0" : "=r" (__r) : "rJ" (a), "rI" (b)); __r; }) +#define zap(a, b) ({ uint64_t __r; asm ("zap %r1,%2,%0" : "=r" (__r) : "rJ" (a), "rI" (b)); __r; }) +#define zapnot(a, b) ({ uint64_t __r; asm ("zapnot %r1,%2,%0" : "=r" (__r) : "rJ" (a), "rI" (b)); __r; }) +#define amask(a) ({ uint64_t __r; asm ("amask %1,%0" : "=r" (__r) : "rI" (a)); __r; }) +#define implver() ({ uint64_t __r; asm ("implver %0" : "=r" (__r)); __r; }) +#define rpcc() ({ uint64_t __r; asm volatile ("rpcc %0" : "=r" (__r)); __r; }) +#endif +#define wh64(p) asm volatile("wh64 (%0)" : : "r"(p) : "memory") + +#if GNUC_PREREQ(3,3) && defined(__alpha_max__) +#define minub8 __builtin_alpha_minub8 +#define minsb8 __builtin_alpha_minsb8 +#define minuw4 __builtin_alpha_minuw4 +#define minsw4 __builtin_alpha_minsw4 +#define maxub8 __builtin_alpha_maxub8 +#define maxsb8 __builtin_alpha_maxsb8 +#define maxuw4 __builtin_alpha_maxuw4 +#define maxsw4 __builtin_alpha_maxsw4 +#define perr __builtin_alpha_perr +#define pklb __builtin_alpha_pklb +#define pkwb __builtin_alpha_pkwb +#define unpkbl __builtin_alpha_unpkbl +#define unpkbw __builtin_alpha_unpkbw +#else +#define minub8(a, b) ({ uint64_t __r; asm (".arch ev6; minub8 %r1,%2,%0" : "=r" (__r) : "%rJ" (a), "rI" (b)); __r; }) +#define minsb8(a, b) ({ uint64_t __r; asm (".arch ev6; minsb8 %r1,%2,%0" : "=r" (__r) : "%rJ" (a), "rI" (b)); __r; }) +#define minuw4(a, b) ({ uint64_t __r; asm (".arch ev6; minuw4 %r1,%2,%0" : "=r" (__r) : "%rJ" (a), "rI" (b)); __r; }) +#define minsw4(a, b) ({ uint64_t __r; asm (".arch ev6; minsw4 %r1,%2,%0" : "=r" (__r) : "%rJ" (a), "rI" (b)); __r; }) +#define maxub8(a, b) ({ uint64_t __r; asm (".arch ev6; maxub8 %r1,%2,%0" : "=r" (__r) : "%rJ" (a), "rI" (b)); __r; }) +#define maxsb8(a, b) ({ uint64_t __r; asm (".arch ev6; maxsb8 %r1,%2,%0" : "=r" (__r) : "%rJ" (a), "rI" (b)); __r; }) +#define maxuw4(a, b) ({ uint64_t __r; asm (".arch ev6; maxuw4 %r1,%2,%0" : "=r" (__r) : "%rJ" (a), "rI" (b)); __r; }) +#define maxsw4(a, b) ({ uint64_t __r; asm (".arch ev6; maxsw4 %r1,%2,%0" : "=r" (__r) : "%rJ" (a), "rI" (b)); __r; }) +#define perr(a, b) ({ uint64_t __r; asm (".arch ev6; perr %r1,%r2,%0" : "=r" (__r) : "%rJ" (a), "rJ" (b)); __r; }) +#define pklb(a) ({ uint64_t __r; asm (".arch ev6; pklb %r1,%0" : "=r" (__r) : "rJ" (a)); __r; }) +#define pkwb(a) ({ uint64_t __r; asm (".arch ev6; pkwb %r1,%0" : "=r" (__r) : "rJ" (a)); __r; }) +#define unpkbl(a) ({ uint64_t __r; asm (".arch ev6; unpkbl %r1,%0" : "=r" (__r) : "rJ" (a)); __r; }) +#define unpkbw(a) ({ uint64_t __r; asm (".arch ev6; unpkbw %r1,%0" : "=r" (__r) : "rJ" (a)); __r; }) +#endif + +#elif defined(__DECC) /* Digital/Compaq/hp "ccc" compiler */ + +#include <c_asm.h> +#define ldq_u(a) asm ("ldq_u %v0,0(%a0)", a) +#define uldq(a) (*(const __unaligned uint64_t *) (a)) +#define cmpbge(a, b) asm ("cmpbge %a0,%a1,%v0", a, b) +#define extql(a, b) asm ("extql %a0,%a1,%v0", a, b) +#define extwl(a, b) asm ("extwl %a0,%a1,%v0", a, b) +#define extqh(a, b) asm ("extqh %a0,%a1,%v0", a, b) +#define zap(a, b) asm ("zap %a0,%a1,%v0", a, b) +#define zapnot(a, b) asm ("zapnot %a0,%a1,%v0", a, b) +#define amask(a) asm ("amask %a0,%v0", a) +#define implver() asm ("implver %v0") +#define rpcc() asm ("rpcc %v0") +#define minub8(a, b) asm ("minub8 %a0,%a1,%v0", a, b) +#define minsb8(a, b) asm ("minsb8 %a0,%a1,%v0", a, b) +#define minuw4(a, b) asm ("minuw4 %a0,%a1,%v0", a, b) +#define minsw4(a, b) asm ("minsw4 %a0,%a1,%v0", a, b) +#define maxub8(a, b) asm ("maxub8 %a0,%a1,%v0", a, b) +#define maxsb8(a, b) asm ("maxsb8 %a0,%a1,%v0", a, b) +#define maxuw4(a, b) asm ("maxuw4 %a0,%a1,%v0", a, b) +#define maxsw4(a, b) asm ("maxsw4 %a0,%a1,%v0", a, b) +#define perr(a, b) asm ("perr %a0,%a1,%v0", a, b) +#define pklb(a) asm ("pklb %a0,%v0", a) +#define pkwb(a) asm ("pkwb %a0,%v0", a) +#define unpkbl(a) asm ("unpkbl %a0,%v0", a) +#define unpkbw(a) asm ("unpkbw %a0,%v0", a) +#define wh64(a) asm ("wh64 %a0", a) + +#else +#error "Unknown compiler!" +#endif + +#endif /* ALPHA_ASM_H */ diff --git a/src/video_dec/libmpeg2new/include/attributes.h b/src/video_dec/libmpeg2new/include/attributes.h new file mode 100644 index 000000000..83f1364a2 --- /dev/null +++ b/src/video_dec/libmpeg2new/include/attributes.h @@ -0,0 +1,33 @@ +/* + * attributes.h + * Copyright (C) 2000-2003 Michel Lespinasse <walken@zoy.org> + * Copyright (C) 1999-2000 Aaron Holtzman <aholtzma@ess.engr.uvic.ca> + * + * This file is part of mpeg2dec, a free MPEG-2 video stream decoder. + * See http://libmpeg2.sourceforge.net/ for updates. + * + * mpeg2dec is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * mpeg2dec is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +/* use gcc attribs to align critical data structures */ +#include <xine/attributes.h> + +#ifdef HAVE_BUILTIN_EXPECT +#define likely(x) __builtin_expect ((x) != 0, 1) +#define unlikely(x) __builtin_expect ((x) != 0, 0) +#else +#define likely(x) (x) +#define unlikely(x) (x) +#endif diff --git a/src/video_dec/libmpeg2new/include/mmx.h b/src/video_dec/libmpeg2new/include/mmx.h new file mode 100644 index 000000000..08b4d4776 --- /dev/null +++ b/src/video_dec/libmpeg2new/include/mmx.h @@ -0,0 +1,263 @@ +/* + * mmx.h + * Copyright (C) 2000-2003 Michel Lespinasse <walken@zoy.org> + * Copyright (C) 1999-2000 Aaron Holtzman <aholtzma@ess.engr.uvic.ca> + * + * This file is part of mpeg2dec, a free MPEG-2 video stream decoder. + * See http://libmpeg2.sourceforge.net/ for updates. + * + * mpeg2dec is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * mpeg2dec is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +/* + * The type of an value that fits in an MMX register (note that long + * long constant values MUST be suffixed by LL and unsigned long long + * values by ULL, lest they be truncated by the compiler) + */ + +typedef union { + long long q; /* Quadword (64-bit) value */ + unsigned long long uq; /* Unsigned Quadword */ + int d[2]; /* 2 Doubleword (32-bit) values */ + unsigned int ud[2]; /* 2 Unsigned Doubleword */ + short w[4]; /* 4 Word (16-bit) values */ + unsigned short uw[4]; /* 4 Unsigned Word */ + char b[8]; /* 8 Byte (8-bit) values */ + unsigned char ub[8]; /* 8 Unsigned Byte */ + float s[2]; /* Single-precision (32-bit) value */ +} ATTR_ALIGN(8) mmx_t; /* On an 8-byte (64-bit) boundary */ + + +#define mmx_i2r(op,imm,reg) \ + __asm__ __volatile__ (#op " %0, %%" #reg \ + : /* nothing */ \ + : "i" (imm) ) + +#define mmx_m2r(op,mem,reg) \ + __asm__ __volatile__ (#op " %0, %%" #reg \ + : /* nothing */ \ + : "m" (mem)) + +#define mmx_r2m(op,reg,mem) \ + __asm__ __volatile__ (#op " %%" #reg ", %0" \ + : "=m" (mem) \ + : /* nothing */ ) + +#define mmx_r2r(op,regs,regd) \ + __asm__ __volatile__ (#op " %" #regs ", %" #regd) + + +#define emms() __asm__ __volatile__ ("emms") + +#define movd_m2r(var,reg) mmx_m2r (movd, var, reg) +#define movd_r2m(reg,var) mmx_r2m (movd, reg, var) +#define movd_v2r(var,reg) __asm__ __volatile__ ("movd %0, %%" #reg \ + : /* nothing */ \ + : "rm" (var)) +#define movd_r2v(reg,var) __asm__ __volatile__ ("movd %%" #reg ", %0" \ + : "=rm" (var) \ + : /* nothing */ ) + +#define movq_m2r(var,reg) mmx_m2r (movq, var, reg) +#define movq_r2m(reg,var) mmx_r2m (movq, reg, var) +#define movq_r2r(regs,regd) mmx_r2r (movq, regs, regd) + +#define packssdw_m2r(var,reg) mmx_m2r (packssdw, var, reg) +#define packssdw_r2r(regs,regd) mmx_r2r (packssdw, regs, regd) +#define packsswb_m2r(var,reg) mmx_m2r (packsswb, var, reg) +#define packsswb_r2r(regs,regd) mmx_r2r (packsswb, regs, regd) + +#define packuswb_m2r(var,reg) mmx_m2r (packuswb, var, reg) +#define packuswb_r2r(regs,regd) mmx_r2r (packuswb, regs, regd) + +#define paddb_m2r(var,reg) mmx_m2r (paddb, var, reg) +#define paddb_r2r(regs,regd) mmx_r2r (paddb, regs, regd) +#define paddd_m2r(var,reg) mmx_m2r (paddd, var, reg) +#define paddd_r2r(regs,regd) mmx_r2r (paddd, regs, regd) +#define paddw_m2r(var,reg) mmx_m2r (paddw, var, reg) +#define paddw_r2r(regs,regd) mmx_r2r (paddw, regs, regd) + +#define paddsb_m2r(var,reg) mmx_m2r (paddsb, var, reg) +#define paddsb_r2r(regs,regd) mmx_r2r (paddsb, regs, regd) +#define paddsw_m2r(var,reg) mmx_m2r (paddsw, var, reg) +#define paddsw_r2r(regs,regd) mmx_r2r (paddsw, regs, regd) + +#define paddusb_m2r(var,reg) mmx_m2r (paddusb, var, reg) +#define paddusb_r2r(regs,regd) mmx_r2r (paddusb, regs, regd) +#define paddusw_m2r(var,reg) mmx_m2r (paddusw, var, reg) +#define paddusw_r2r(regs,regd) mmx_r2r (paddusw, regs, regd) + +#define pand_m2r(var,reg) mmx_m2r (pand, var, reg) +#define pand_r2r(regs,regd) mmx_r2r (pand, regs, regd) + +#define pandn_m2r(var,reg) mmx_m2r (pandn, var, reg) +#define pandn_r2r(regs,regd) mmx_r2r (pandn, regs, regd) + +#define pcmpeqb_m2r(var,reg) mmx_m2r (pcmpeqb, var, reg) +#define pcmpeqb_r2r(regs,regd) mmx_r2r (pcmpeqb, regs, regd) +#define pcmpeqd_m2r(var,reg) mmx_m2r (pcmpeqd, var, reg) +#define pcmpeqd_r2r(regs,regd) mmx_r2r (pcmpeqd, regs, regd) +#define pcmpeqw_m2r(var,reg) mmx_m2r (pcmpeqw, var, reg) +#define pcmpeqw_r2r(regs,regd) mmx_r2r (pcmpeqw, regs, regd) + +#define pcmpgtb_m2r(var,reg) mmx_m2r (pcmpgtb, var, reg) +#define pcmpgtb_r2r(regs,regd) mmx_r2r (pcmpgtb, regs, regd) +#define pcmpgtd_m2r(var,reg) mmx_m2r (pcmpgtd, var, reg) +#define pcmpgtd_r2r(regs,regd) mmx_r2r (pcmpgtd, regs, regd) +#define pcmpgtw_m2r(var,reg) mmx_m2r (pcmpgtw, var, reg) +#define pcmpgtw_r2r(regs,regd) mmx_r2r (pcmpgtw, regs, regd) + +#define pmaddwd_m2r(var,reg) mmx_m2r (pmaddwd, var, reg) +#define pmaddwd_r2r(regs,regd) mmx_r2r (pmaddwd, regs, regd) + +#define pmulhw_m2r(var,reg) mmx_m2r (pmulhw, var, reg) +#define pmulhw_r2r(regs,regd) mmx_r2r (pmulhw, regs, regd) + +#define pmullw_m2r(var,reg) mmx_m2r (pmullw, var, reg) +#define pmullw_r2r(regs,regd) mmx_r2r (pmullw, regs, regd) + +#define por_m2r(var,reg) mmx_m2r (por, var, reg) +#define por_r2r(regs,regd) mmx_r2r (por, regs, regd) + +#define pslld_i2r(imm,reg) mmx_i2r (pslld, imm, reg) +#define pslld_m2r(var,reg) mmx_m2r (pslld, var, reg) +#define pslld_r2r(regs,regd) mmx_r2r (pslld, regs, regd) +#define psllq_i2r(imm,reg) mmx_i2r (psllq, imm, reg) +#define psllq_m2r(var,reg) mmx_m2r (psllq, var, reg) +#define psllq_r2r(regs,regd) mmx_r2r (psllq, regs, regd) +#define psllw_i2r(imm,reg) mmx_i2r (psllw, imm, reg) +#define psllw_m2r(var,reg) mmx_m2r (psllw, var, reg) +#define psllw_r2r(regs,regd) mmx_r2r (psllw, regs, regd) + +#define psrad_i2r(imm,reg) mmx_i2r (psrad, imm, reg) +#define psrad_m2r(var,reg) mmx_m2r (psrad, var, reg) +#define psrad_r2r(regs,regd) mmx_r2r (psrad, regs, regd) +#define psraw_i2r(imm,reg) mmx_i2r (psraw, imm, reg) +#define psraw_m2r(var,reg) mmx_m2r (psraw, var, reg) +#define psraw_r2r(regs,regd) mmx_r2r (psraw, regs, regd) + +#define psrld_i2r(imm,reg) mmx_i2r (psrld, imm, reg) +#define psrld_m2r(var,reg) mmx_m2r (psrld, var, reg) +#define psrld_r2r(regs,regd) mmx_r2r (psrld, regs, regd) +#define psrlq_i2r(imm,reg) mmx_i2r (psrlq, imm, reg) +#define psrlq_m2r(var,reg) mmx_m2r (psrlq, var, reg) +#define psrlq_r2r(regs,regd) mmx_r2r (psrlq, regs, regd) +#define psrlw_i2r(imm,reg) mmx_i2r (psrlw, imm, reg) +#define psrlw_m2r(var,reg) mmx_m2r (psrlw, var, reg) +#define psrlw_r2r(regs,regd) mmx_r2r (psrlw, regs, regd) + +#define psubb_m2r(var,reg) mmx_m2r (psubb, var, reg) +#define psubb_r2r(regs,regd) mmx_r2r (psubb, regs, regd) +#define psubd_m2r(var,reg) mmx_m2r (psubd, var, reg) +#define psubd_r2r(regs,regd) mmx_r2r (psubd, regs, regd) +#define psubw_m2r(var,reg) mmx_m2r (psubw, var, reg) +#define psubw_r2r(regs,regd) mmx_r2r (psubw, regs, regd) + +#define psubsb_m2r(var,reg) mmx_m2r (psubsb, var, reg) +#define psubsb_r2r(regs,regd) mmx_r2r (psubsb, regs, regd) +#define psubsw_m2r(var,reg) mmx_m2r (psubsw, var, reg) +#define psubsw_r2r(regs,regd) mmx_r2r (psubsw, regs, regd) + +#define psubusb_m2r(var,reg) mmx_m2r (psubusb, var, reg) +#define psubusb_r2r(regs,regd) mmx_r2r (psubusb, regs, regd) +#define psubusw_m2r(var,reg) mmx_m2r (psubusw, var, reg) +#define psubusw_r2r(regs,regd) mmx_r2r (psubusw, regs, regd) + +#define punpckhbw_m2r(var,reg) mmx_m2r (punpckhbw, var, reg) +#define punpckhbw_r2r(regs,regd) mmx_r2r (punpckhbw, regs, regd) +#define punpckhdq_m2r(var,reg) mmx_m2r (punpckhdq, var, reg) +#define punpckhdq_r2r(regs,regd) mmx_r2r (punpckhdq, regs, regd) +#define punpckhwd_m2r(var,reg) mmx_m2r (punpckhwd, var, reg) +#define punpckhwd_r2r(regs,regd) mmx_r2r (punpckhwd, regs, regd) + +#define punpcklbw_m2r(var,reg) mmx_m2r (punpcklbw, var, reg) +#define punpcklbw_r2r(regs,regd) mmx_r2r (punpcklbw, regs, regd) +#define punpckldq_m2r(var,reg) mmx_m2r (punpckldq, var, reg) +#define punpckldq_r2r(regs,regd) mmx_r2r (punpckldq, regs, regd) +#define punpcklwd_m2r(var,reg) mmx_m2r (punpcklwd, var, reg) +#define punpcklwd_r2r(regs,regd) mmx_r2r (punpcklwd, regs, regd) + +#define pxor_m2r(var,reg) mmx_m2r (pxor, var, reg) +#define pxor_r2r(regs,regd) mmx_r2r (pxor, regs, regd) + + +/* 3DNOW extensions */ + +#define pavgusb_m2r(var,reg) mmx_m2r (pavgusb, var, reg) +#define pavgusb_r2r(regs,regd) mmx_r2r (pavgusb, regs, regd) + + +/* AMD MMX extensions - also available in intel SSE */ + + +#define mmx_m2ri(op,mem,reg,imm) \ + __asm__ __volatile__ (#op " %1, %0, %%" #reg \ + : /* nothing */ \ + : "m" (mem), "i" (imm)) + +#define mmx_r2ri(op,regs,regd,imm) \ + __asm__ __volatile__ (#op " %0, %%" #regs ", %%" #regd \ + : /* nothing */ \ + : "i" (imm) ) + +#define mmx_fetch(mem,hint) \ + __asm__ __volatile__ ("prefetch" #hint " %0" \ + : /* nothing */ \ + : "m" (mem)) + + +#define maskmovq(regs,maskreg) mmx_r2ri (maskmovq, regs, maskreg) + +#define movntq_r2m(mmreg,var) mmx_r2m (movntq, mmreg, var) + +#define pavgb_m2r(var,reg) mmx_m2r (pavgb, var, reg) +#define pavgb_r2r(regs,regd) mmx_r2r (pavgb, regs, regd) +#define pavgw_m2r(var,reg) mmx_m2r (pavgw, var, reg) +#define pavgw_r2r(regs,regd) mmx_r2r (pavgw, regs, regd) + +#define pextrw_r2r(mmreg,reg,imm) mmx_r2ri (pextrw, mmreg, reg, imm) + +#define pinsrw_r2r(reg,mmreg,imm) mmx_r2ri (pinsrw, reg, mmreg, imm) + +#define pmaxsw_m2r(var,reg) mmx_m2r (pmaxsw, var, reg) +#define pmaxsw_r2r(regs,regd) mmx_r2r (pmaxsw, regs, regd) + +#define pmaxub_m2r(var,reg) mmx_m2r (pmaxub, var, reg) +#define pmaxub_r2r(regs,regd) mmx_r2r (pmaxub, regs, regd) + +#define pminsw_m2r(var,reg) mmx_m2r (pminsw, var, reg) +#define pminsw_r2r(regs,regd) mmx_r2r (pminsw, regs, regd) + +#define pminub_m2r(var,reg) mmx_m2r (pminub, var, reg) +#define pminub_r2r(regs,regd) mmx_r2r (pminub, regs, regd) + +#define pmovmskb(mmreg,reg) \ + __asm__ __volatile__ ("movmskps %" #mmreg ", %" #reg) + +#define pmulhuw_m2r(var,reg) mmx_m2r (pmulhuw, var, reg) +#define pmulhuw_r2r(regs,regd) mmx_r2r (pmulhuw, regs, regd) + +#define prefetcht0(mem) mmx_fetch (mem, t0) +#define prefetcht1(mem) mmx_fetch (mem, t1) +#define prefetcht2(mem) mmx_fetch (mem, t2) +#define prefetchnta(mem) mmx_fetch (mem, nta) + +#define psadbw_m2r(var,reg) mmx_m2r (psadbw, var, reg) +#define psadbw_r2r(regs,regd) mmx_r2r (psadbw, regs, regd) + +#define pshufw_m2r(var,reg,imm) mmx_m2ri(pshufw, var, reg, imm) +#define pshufw_r2r(regs,regd,imm) mmx_r2ri(pshufw, regs, regd, imm) + +#define sfence() __asm__ __volatile__ ("sfence\n\t") diff --git a/src/video_dec/libmpeg2new/include/mpeg2.h b/src/video_dec/libmpeg2new/include/mpeg2.h new file mode 100644 index 000000000..6c1a3805b --- /dev/null +++ b/src/video_dec/libmpeg2new/include/mpeg2.h @@ -0,0 +1,202 @@ +/* + * mpeg2.h + * Copyright (C) 2000-2004 Michel Lespinasse <walken@zoy.org> + * Copyright (C) 1999-2000 Aaron Holtzman <aholtzma@ess.engr.uvic.ca> + * + * This file is part of mpeg2dec, a free MPEG-2 video stream decoder. + * See http://libmpeg2.sourceforge.net/ for updates. + * + * mpeg2dec is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * mpeg2dec is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +#ifndef MPEG2_H +#define MPEG2_H + +#define MPEG2_VERSION(a,b,c) (((a)<<16)|((b)<<8)|(c)) +#define MPEG2_RELEASE MPEG2_VERSION (0, 4, 1) /* 0.4.1 */ + +#define SEQ_FLAG_MPEG2 1 +#define SEQ_FLAG_CONSTRAINED_PARAMETERS 2 +#define SEQ_FLAG_PROGRESSIVE_SEQUENCE 4 +#define SEQ_FLAG_LOW_DELAY 8 +#define SEQ_FLAG_COLOUR_DESCRIPTION 16 + +#define SEQ_MASK_VIDEO_FORMAT 0xe0 +#define SEQ_VIDEO_FORMAT_COMPONENT 0 +#define SEQ_VIDEO_FORMAT_PAL 0x20 +#define SEQ_VIDEO_FORMAT_NTSC 0x40 +#define SEQ_VIDEO_FORMAT_SECAM 0x60 +#define SEQ_VIDEO_FORMAT_MAC 0x80 +#define SEQ_VIDEO_FORMAT_UNSPECIFIED 0xa0 + +typedef struct mpeg2_sequence_s { + unsigned int width, height; + unsigned int chroma_width, chroma_height; + unsigned int byte_rate; + unsigned int vbv_buffer_size; + uint32_t flags; + + unsigned int picture_width, picture_height; + unsigned int display_width, display_height; + unsigned int pixel_width, pixel_height; + unsigned int frame_period; + + uint8_t profile_level_id; + uint8_t colour_primaries; + uint8_t transfer_characteristics; + uint8_t matrix_coefficients; +} mpeg2_sequence_t; + +#define GOP_FLAG_DROP_FRAME 1 +#define GOP_FLAG_BROKEN_LINK 2 +#define GOP_FLAG_CLOSED_GOP 4 + +typedef struct mpeg2_gop_s { + uint8_t hours; + uint8_t minutes; + uint8_t seconds; + uint8_t pictures; + uint32_t flags; +} mpeg2_gop_t; + +#define PIC_MASK_CODING_TYPE 7 +#define PIC_FLAG_CODING_TYPE_I 1 +#define PIC_FLAG_CODING_TYPE_P 2 +#define PIC_FLAG_CODING_TYPE_B 3 +#define PIC_FLAG_CODING_TYPE_D 4 + +#define PIC_FLAG_TOP_FIELD_FIRST 8 +#define PIC_FLAG_PROGRESSIVE_FRAME 16 +#define PIC_FLAG_COMPOSITE_DISPLAY 32 +#define PIC_FLAG_SKIP 64 +#define PIC_FLAG_TAGS 128 +#define PIC_MASK_COMPOSITE_DISPLAY 0xfffff000 + +typedef struct mpeg2_picture_s { + unsigned int temporal_reference; + unsigned int nb_fields; + uint32_t tag, tag2; + uint32_t flags; + struct { + int x, y; + } display_offset[3]; +} mpeg2_picture_t; + +typedef struct mpeg2_fbuf_s { + uint8_t * buf[3]; + void * id; +} mpeg2_fbuf_t; + +typedef struct mpeg2_info_s { + const mpeg2_sequence_t * sequence; + const mpeg2_gop_t * gop; + const mpeg2_picture_t * current_picture; + const mpeg2_picture_t * current_picture_2nd; + const mpeg2_fbuf_t * current_fbuf; + const mpeg2_picture_t * display_picture; + const mpeg2_picture_t * display_picture_2nd; + const mpeg2_fbuf_t * display_fbuf; + const mpeg2_fbuf_t * discard_fbuf; + const uint8_t * user_data; + unsigned int user_data_len; +} mpeg2_info_t; + +typedef struct mpeg2dec_s mpeg2dec_t; +typedef struct mpeg2_decoder_s mpeg2_decoder_t; + +typedef enum { + STATE_BUFFER = 0, + STATE_SEQUENCE = 1, + STATE_SEQUENCE_REPEATED = 2, + STATE_SEQUENCE_MODIFIED = 3, + STATE_GOP = 4, + STATE_PICTURE = 5, + STATE_SLICE_1ST = 6, + STATE_PICTURE_2ND = 7, + STATE_SLICE = 8, + STATE_END = 9, + STATE_INVALID = 10, + STATE_INVALID_END = 11 +} mpeg2_state_t; + +typedef struct mpeg2_convert_init_s { + unsigned int id_size; + unsigned int buf_size[3]; + void (* start) (void * id, const mpeg2_fbuf_t * fbuf, + const mpeg2_picture_t * picture, const mpeg2_gop_t * gop); + void (* copy) (void * id, uint8_t * const * src, unsigned int v_offset); +} mpeg2_convert_init_t; +typedef enum { + MPEG2_CONVERT_SET = 0, + MPEG2_CONVERT_STRIDE = 1, + MPEG2_CONVERT_START = 2 +} mpeg2_convert_stage_t; +typedef int mpeg2_convert_t (int stage, void * id, + const mpeg2_sequence_t * sequence, int stride, + uint32_t accel, void * arg, + mpeg2_convert_init_t * result); +int mpeg2_convert (mpeg2dec_t * mpeg2dec, mpeg2_convert_t convert, void * arg); +int mpeg2_stride (mpeg2dec_t * mpeg2dec, int stride); +void mpeg2_set_buf (mpeg2dec_t * mpeg2dec, uint8_t * buf[3], void * id); +void mpeg2_custom_fbuf (mpeg2dec_t * mpeg2dec, int custom_fbuf); + +#define MPEG2_ACCEL_X86_MMX 1 +#define MPEG2_ACCEL_X86_3DNOW 2 +#define MPEG2_ACCEL_X86_MMXEXT 4 +#define MPEG2_ACCEL_X86_SSE2 8 +#define MPEG2_ACCEL_X86_SSE3 16 +#define MPEG2_ACCEL_PPC_ALTIVEC 1 +#define MPEG2_ACCEL_ALPHA 1 +#define MPEG2_ACCEL_ALPHA_MVI 2 +#define MPEG2_ACCEL_SPARC_VIS 1 +#define MPEG2_ACCEL_SPARC_VIS2 2 +#define MPEG2_ACCEL_DETECT 0x80000000 + +uint32_t mpeg2_accel (uint32_t accel); +mpeg2dec_t * mpeg2_init (void); +const mpeg2_info_t * mpeg2_info (mpeg2dec_t * mpeg2dec); +void mpeg2_close (mpeg2dec_t * mpeg2dec); + +void mpeg2_buffer (mpeg2dec_t * mpeg2dec, uint8_t * start, uint8_t * end); +int mpeg2_getpos (mpeg2dec_t * mpeg2dec); +mpeg2_state_t mpeg2_parse (mpeg2dec_t * mpeg2dec); + +void mpeg2_reset (mpeg2dec_t * mpeg2dec, int full_reset); +void mpeg2_skip (mpeg2dec_t * mpeg2dec, int skip); +void mpeg2_slice_region (mpeg2dec_t * mpeg2dec, int start, int end); + +void mpeg2_tag_picture (mpeg2dec_t * mpeg2dec, uint32_t tag, uint32_t tag2); + +void mpeg2_init_fbuf (mpeg2_decoder_t * decoder, uint8_t * current_fbuf[3], + uint8_t * forward_fbuf[3], uint8_t * backward_fbuf[3]); +void mpeg2_slice (mpeg2_decoder_t * decoder, int code, const uint8_t * buffer); +int mpeg2_guess_aspect (const mpeg2_sequence_t * sequence, + unsigned int * pixel_width, + unsigned int * pixel_height); + +typedef enum { + MPEG2_ALLOC_MPEG2DEC = 0, + MPEG2_ALLOC_CHUNK = 1, + MPEG2_ALLOC_YUV = 2, + MPEG2_ALLOC_CONVERT_ID = 3, + MPEG2_ALLOC_CONVERTED = 4 +} mpeg2_alloc_t; + +void * mpeg2_malloc (unsigned size, mpeg2_alloc_t reason); +void mpeg2_free (void * buf); +void mpeg2_malloc_hooks (void * malloc (unsigned, mpeg2_alloc_t), + int free (void *)); + +#endif /* MPEG2_H */ diff --git a/src/video_dec/libmpeg2new/include/mpeg2convert.h b/src/video_dec/libmpeg2new/include/mpeg2convert.h new file mode 100644 index 000000000..aac5d1991 --- /dev/null +++ b/src/video_dec/libmpeg2new/include/mpeg2convert.h @@ -0,0 +1,48 @@ +/* + * mpeg2convert.h + * Copyright (C) 2000-2003 Michel Lespinasse <walken@zoy.org> + * Copyright (C) 1999-2000 Aaron Holtzman <aholtzma@ess.engr.uvic.ca> + * + * This file is part of mpeg2dec, a free MPEG-2 video stream decoder. + * See http://libmpeg2.sourceforge.net/ for updates. + * + * mpeg2dec is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * mpeg2dec is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +#ifndef MPEG2CONVERT_H +#define MPEG2CONVERT_H + +mpeg2_convert_t mpeg2convert_rgb32; +mpeg2_convert_t mpeg2convert_rgb24; +mpeg2_convert_t mpeg2convert_rgb16; +mpeg2_convert_t mpeg2convert_rgb15; +mpeg2_convert_t mpeg2convert_rgb8; +mpeg2_convert_t mpeg2convert_bgr32; +mpeg2_convert_t mpeg2convert_bgr24; +mpeg2_convert_t mpeg2convert_bgr16; +mpeg2_convert_t mpeg2convert_bgr15; +mpeg2_convert_t mpeg2convert_bgr8; + +typedef enum { + MPEG2CONVERT_RGB = 0, + MPEG2CONVERT_BGR = 1 +} mpeg2convert_rgb_order_t; + +mpeg2_convert_t * mpeg2convert_rgb (mpeg2convert_rgb_order_t order, + unsigned int bpp); + +mpeg2_convert_t mpeg2convert_uyvy; + +#endif /* MPEG2CONVERT_H */ diff --git a/src/video_dec/libmpeg2new/include/sse.h b/src/video_dec/libmpeg2new/include/sse.h new file mode 100644 index 000000000..4bd853f8b --- /dev/null +++ b/src/video_dec/libmpeg2new/include/sse.h @@ -0,0 +1,256 @@ +/* + * sse.h + * Copyright (C) 1999-2003 R. Fisher + * + * This file is part of mpeg2dec, a free MPEG-2 video stream decoder. + * See http://libmpeg2.sourceforge.net/ for updates. + * + * mpeg2dec is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * mpeg2dec is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +typedef union { + float sf[4]; /* Single-precision (32-bit) value */ +} ATTR_ALIGN(16) sse_t; /* On a 16 byte (128-bit) boundary */ + + +#define sse_i2r(op, imm, reg) \ + __asm__ __volatile__ (#op " %0, %%" #reg \ + : /* nothing */ \ + : "X" (imm) ) + +#define sse_m2r(op, mem, reg) \ + __asm__ __volatile__ (#op " %0, %%" #reg \ + : /* nothing */ \ + : "X" (mem)) + +#define sse_r2m(op, reg, mem) \ + __asm__ __volatile__ (#op " %%" #reg ", %0" \ + : "=X" (mem) \ + : /* nothing */ ) + +#define sse_r2r(op, regs, regd) \ + __asm__ __volatile__ (#op " %" #regs ", %" #regd) + +#define sse_r2ri(op, regs, regd, imm) \ + __asm__ __volatile__ (#op " %0, %%" #regs ", %%" #regd \ + : /* nothing */ \ + : "X" (imm) ) + +#define sse_m2ri(op, mem, reg, subop) \ + __asm__ __volatile__ (#op " %0, %%" #reg ", " #subop \ + : /* nothing */ \ + : "X" (mem)) + + +#define movaps_m2r(var, reg) sse_m2r(movaps, var, reg) +#define movaps_r2m(reg, var) sse_r2m(movaps, reg, var) +#define movaps_r2r(regs, regd) sse_r2r(movaps, regs, regd) + +#define movntps_r2m(xmmreg, var) sse_r2m(movntps, xmmreg, var) + +#define movups_m2r(var, reg) sse_m2r(movups, var, reg) +#define movups_r2m(reg, var) sse_r2m(movups, reg, var) +#define movups_r2r(regs, regd) sse_r2r(movups, regs, regd) + +#define movhlps_r2r(regs, regd) sse_r2r(movhlps, regs, regd) + +#define movlhps_r2r(regs, regd) sse_r2r(movlhps, regs, regd) + +#define movhps_m2r(var, reg) sse_m2r(movhps, var, reg) +#define movhps_r2m(reg, var) sse_r2m(movhps, reg, var) + +#define movlps_m2r(var, reg) sse_m2r(movlps, var, reg) +#define movlps_r2m(reg, var) sse_r2m(movlps, reg, var) + +#define movss_m2r(var, reg) sse_m2r(movss, var, reg) +#define movss_r2m(reg, var) sse_r2m(movss, reg, var) +#define movss_r2r(regs, regd) sse_r2r(movss, regs, regd) + +#define shufps_m2r(var, reg, index) sse_m2ri(shufps, var, reg, index) +#define shufps_r2r(regs, regd, index) sse_r2ri(shufps, regs, regd, index) + +#define cvtpi2ps_m2r(var, xmmreg) sse_m2r(cvtpi2ps, var, xmmreg) +#define cvtpi2ps_r2r(mmreg, xmmreg) sse_r2r(cvtpi2ps, mmreg, xmmreg) + +#define cvtps2pi_m2r(var, mmreg) sse_m2r(cvtps2pi, var, mmreg) +#define cvtps2pi_r2r(xmmreg, mmreg) sse_r2r(cvtps2pi, mmreg, xmmreg) + +#define cvttps2pi_m2r(var, mmreg) sse_m2r(cvttps2pi, var, mmreg) +#define cvttps2pi_r2r(xmmreg, mmreg) sse_r2r(cvttps2pi, mmreg, xmmreg) + +#define cvtsi2ss_m2r(var, xmmreg) sse_m2r(cvtsi2ss, var, xmmreg) +#define cvtsi2ss_r2r(reg, xmmreg) sse_r2r(cvtsi2ss, reg, xmmreg) + +#define cvtss2si_m2r(var, reg) sse_m2r(cvtss2si, var, reg) +#define cvtss2si_r2r(xmmreg, reg) sse_r2r(cvtss2si, xmmreg, reg) + +#define cvttss2si_m2r(var, reg) sse_m2r(cvtss2si, var, reg) +#define cvttss2si_r2r(xmmreg, reg) sse_r2r(cvtss2si, xmmreg, reg) + +#define movmskps(xmmreg, reg) \ + __asm__ __volatile__ ("movmskps %" #xmmreg ", %" #reg) + +#define addps_m2r(var, reg) sse_m2r(addps, var, reg) +#define addps_r2r(regs, regd) sse_r2r(addps, regs, regd) + +#define addss_m2r(var, reg) sse_m2r(addss, var, reg) +#define addss_r2r(regs, regd) sse_r2r(addss, regs, regd) + +#define subps_m2r(var, reg) sse_m2r(subps, var, reg) +#define subps_r2r(regs, regd) sse_r2r(subps, regs, regd) + +#define subss_m2r(var, reg) sse_m2r(subss, var, reg) +#define subss_r2r(regs, regd) sse_r2r(subss, regs, regd) + +#define mulps_m2r(var, reg) sse_m2r(mulps, var, reg) +#define mulps_r2r(regs, regd) sse_r2r(mulps, regs, regd) + +#define mulss_m2r(var, reg) sse_m2r(mulss, var, reg) +#define mulss_r2r(regs, regd) sse_r2r(mulss, regs, regd) + +#define divps_m2r(var, reg) sse_m2r(divps, var, reg) +#define divps_r2r(regs, regd) sse_r2r(divps, regs, regd) + +#define divss_m2r(var, reg) sse_m2r(divss, var, reg) +#define divss_r2r(regs, regd) sse_r2r(divss, regs, regd) + +#define rcpps_m2r(var, reg) sse_m2r(rcpps, var, reg) +#define rcpps_r2r(regs, regd) sse_r2r(rcpps, regs, regd) + +#define rcpss_m2r(var, reg) sse_m2r(rcpss, var, reg) +#define rcpss_r2r(regs, regd) sse_r2r(rcpss, regs, regd) + +#define rsqrtps_m2r(var, reg) sse_m2r(rsqrtps, var, reg) +#define rsqrtps_r2r(regs, regd) sse_r2r(rsqrtps, regs, regd) + +#define rsqrtss_m2r(var, reg) sse_m2r(rsqrtss, var, reg) +#define rsqrtss_r2r(regs, regd) sse_r2r(rsqrtss, regs, regd) + +#define sqrtps_m2r(var, reg) sse_m2r(sqrtps, var, reg) +#define sqrtps_r2r(regs, regd) sse_r2r(sqrtps, regs, regd) + +#define sqrtss_m2r(var, reg) sse_m2r(sqrtss, var, reg) +#define sqrtss_r2r(regs, regd) sse_r2r(sqrtss, regs, regd) + +#define andps_m2r(var, reg) sse_m2r(andps, var, reg) +#define andps_r2r(regs, regd) sse_r2r(andps, regs, regd) + +#define andnps_m2r(var, reg) sse_m2r(andnps, var, reg) +#define andnps_r2r(regs, regd) sse_r2r(andnps, regs, regd) + +#define orps_m2r(var, reg) sse_m2r(orps, var, reg) +#define orps_r2r(regs, regd) sse_r2r(orps, regs, regd) + +#define xorps_m2r(var, reg) sse_m2r(xorps, var, reg) +#define xorps_r2r(regs, regd) sse_r2r(xorps, regs, regd) + +#define maxps_m2r(var, reg) sse_m2r(maxps, var, reg) +#define maxps_r2r(regs, regd) sse_r2r(maxps, regs, regd) + +#define maxss_m2r(var, reg) sse_m2r(maxss, var, reg) +#define maxss_r2r(regs, regd) sse_r2r(maxss, regs, regd) + +#define minps_m2r(var, reg) sse_m2r(minps, var, reg) +#define minps_r2r(regs, regd) sse_r2r(minps, regs, regd) + +#define minss_m2r(var, reg) sse_m2r(minss, var, reg) +#define minss_r2r(regs, regd) sse_r2r(minss, regs, regd) + +#define cmpps_m2r(var, reg, op) sse_m2ri(cmpps, var, reg, op) +#define cmpps_r2r(regs, regd, op) sse_r2ri(cmpps, regs, regd, op) + +#define cmpeqps_m2r(var, reg) sse_m2ri(cmpps, var, reg, 0) +#define cmpeqps_r2r(regs, regd) sse_r2ri(cmpps, regs, regd, 0) + +#define cmpltps_m2r(var, reg) sse_m2ri(cmpps, var, reg, 1) +#define cmpltps_r2r(regs, regd) sse_r2ri(cmpps, regs, regd, 1) + +#define cmpleps_m2r(var, reg) sse_m2ri(cmpps, var, reg, 2) +#define cmpleps_r2r(regs, regd) sse_r2ri(cmpps, regs, regd, 2) + +#define cmpunordps_m2r(var, reg) sse_m2ri(cmpps, var, reg, 3) +#define cmpunordps_r2r(regs, regd) sse_r2ri(cmpps, regs, regd, 3) + +#define cmpneqps_m2r(var, reg) sse_m2ri(cmpps, var, reg, 4) +#define cmpneqps_r2r(regs, regd) sse_r2ri(cmpps, regs, regd, 4) + +#define cmpnltps_m2r(var, reg) sse_m2ri(cmpps, var, reg, 5) +#define cmpnltps_r2r(regs, regd) sse_r2ri(cmpps, regs, regd, 5) + +#define cmpnleps_m2r(var, reg) sse_m2ri(cmpps, var, reg, 6) +#define cmpnleps_r2r(regs, regd) sse_r2ri(cmpps, regs, regd, 6) + +#define cmpordps_m2r(var, reg) sse_m2ri(cmpps, var, reg, 7) +#define cmpordps_r2r(regs, regd) sse_r2ri(cmpps, regs, regd, 7) + +#define cmpss_m2r(var, reg, op) sse_m2ri(cmpss, var, reg, op) +#define cmpss_r2r(regs, regd, op) sse_r2ri(cmpss, regs, regd, op) + +#define cmpeqss_m2r(var, reg) sse_m2ri(cmpss, var, reg, 0) +#define cmpeqss_r2r(regs, regd) sse_r2ri(cmpss, regs, regd, 0) + +#define cmpltss_m2r(var, reg) sse_m2ri(cmpss, var, reg, 1) +#define cmpltss_r2r(regs, regd) sse_r2ri(cmpss, regs, regd, 1) + +#define cmpless_m2r(var, reg) sse_m2ri(cmpss, var, reg, 2) +#define cmpless_r2r(regs, regd) sse_r2ri(cmpss, regs, regd, 2) + +#define cmpunordss_m2r(var, reg) sse_m2ri(cmpss, var, reg, 3) +#define cmpunordss_r2r(regs, regd) sse_r2ri(cmpss, regs, regd, 3) + +#define cmpneqss_m2r(var, reg) sse_m2ri(cmpss, var, reg, 4) +#define cmpneqss_r2r(regs, regd) sse_r2ri(cmpss, regs, regd, 4) + +#define cmpnltss_m2r(var, reg) sse_m2ri(cmpss, var, reg, 5) +#define cmpnltss_r2r(regs, regd) sse_r2ri(cmpss, regs, regd, 5) + +#define cmpnless_m2r(var, reg) sse_m2ri(cmpss, var, reg, 6) +#define cmpnless_r2r(regs, regd) sse_r2ri(cmpss, regs, regd, 6) + +#define cmpordss_m2r(var, reg) sse_m2ri(cmpss, var, reg, 7) +#define cmpordss_r2r(regs, regd) sse_r2ri(cmpss, regs, regd, 7) + +#define comiss_m2r(var, reg) sse_m2r(comiss, var, reg) +#define comiss_r2r(regs, regd) sse_r2r(comiss, regs, regd) + +#define ucomiss_m2r(var, reg) sse_m2r(ucomiss, var, reg) +#define ucomiss_r2r(regs, regd) sse_r2r(ucomiss, regs, regd) + +#define unpcklps_m2r(var, reg) sse_m2r(unpcklps, var, reg) +#define unpcklps_r2r(regs, regd) sse_r2r(unpcklps, regs, regd) + +#define unpckhps_m2r(var, reg) sse_m2r(unpckhps, var, reg) +#define unpckhps_r2r(regs, regd) sse_r2r(unpckhps, regs, regd) + +#define fxrstor(mem) \ + __asm__ __volatile__ ("fxrstor %0" \ + : /* nothing */ \ + : "X" (mem)) + +#define fxsave(mem) \ + __asm__ __volatile__ ("fxsave %0" \ + : /* nothing */ \ + : "X" (mem)) + +#define stmxcsr(mem) \ + __asm__ __volatile__ ("stmxcsr %0" \ + : /* nothing */ \ + : "X" (mem)) + +#define ldmxcsr(mem) \ + __asm__ __volatile__ ("ldmxcsr %0" \ + : /* nothing */ \ + : "X" (mem)) + diff --git a/src/video_dec/libmpeg2new/include/tendra.h b/src/video_dec/libmpeg2new/include/tendra.h new file mode 100644 index 000000000..09900916a --- /dev/null +++ b/src/video_dec/libmpeg2new/include/tendra.h @@ -0,0 +1,35 @@ +/* + * tendra.h + * Copyright (C) 2000-2003 Michel Lespinasse <walken@zoy.org> + * Copyright (C) 1999-2000 Aaron Holtzman <aholtzma@ess.engr.uvic.ca> + * + * This file is part of mpeg2dec, a free MPEG-2 video stream decoder. + * See http://libmpeg2.sourceforge.net/ for updates. + * + * mpeg2dec is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * mpeg2dec is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +#pragma TenDRA begin +#pragma TenDRA longlong type warning + +#ifdef TenDRA_check + +#pragma TenDRA conversion analysis (pointer-int explicit) off +#pragma TenDRA implicit function declaration off + +/* avoid the "No declarations in translation unit" problem */ +int TenDRA; + +#endif /* TenDRA_check */ diff --git a/src/video_dec/libmpeg2new/include/video_out.h b/src/video_dec/libmpeg2new/include/video_out.h new file mode 100644 index 000000000..342c55197 --- /dev/null +++ b/src/video_dec/libmpeg2new/include/video_out.h @@ -0,0 +1,58 @@ +/* + * video_out.h + * Copyright (C) 2000-2003 Michel Lespinasse <walken@zoy.org> + * Copyright (C) 1999-2000 Aaron Holtzman <aholtzma@ess.engr.uvic.ca> + * + * This file is part of mpeg2dec, a free MPEG-2 video stream decoder. + * See http://libmpeg2.sourceforge.net/ for updates. + * + * mpeg2dec is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * mpeg2dec is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +struct mpeg2_sequence_s; +struct mpeg2_convert_init_s; +typedef struct { + int (* convert) (int stage, void * id, + const struct mpeg2_sequence_s * sequence, + int stride, uint32_t accel, void * arg, + struct mpeg2_convert_init_s * result); +} vo_setup_result_t; + +typedef struct vo_instance_s vo_instance_t; +struct vo_instance_s { + int (* setup) (vo_instance_t * instance, unsigned int width, + unsigned int height, unsigned int chroma_width, + unsigned int chroma_height, vo_setup_result_t * result); + void (* setup_fbuf) (vo_instance_t * instance, uint8_t ** buf, void ** id); + void (* set_fbuf) (vo_instance_t * instance, uint8_t ** buf, void ** id); + void (* start_fbuf) (vo_instance_t * instance, + uint8_t * const * buf, void * id); + void (* draw) (vo_instance_t * instance, uint8_t * const * buf, void * id); + void (* discard) (vo_instance_t * instance, + uint8_t * const * buf, void * id); + void (* close) (vo_instance_t * instance); +}; + +typedef vo_instance_t * vo_open_t (void); + +typedef struct { + char * name; + vo_open_t * open; +} vo_driver_t; + +void vo_accel (uint32_t accel); + +/* return NULL terminated array of all drivers */ +vo_driver_t const * vo_drivers (void); diff --git a/src/video_dec/libmpeg2new/include/vis.h b/src/video_dec/libmpeg2new/include/vis.h new file mode 100644 index 000000000..69dd49075 --- /dev/null +++ b/src/video_dec/libmpeg2new/include/vis.h @@ -0,0 +1,328 @@ +/* + * vis.h + * Copyright (C) 2003 David S. Miller <davem@redhat.com> + * + * This file is part of mpeg2dec, a free MPEG-2 video stream decoder. + * See http://libmpeg2.sourceforge.net/ for updates. + * + * mpeg2dec is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * mpeg2dec is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +/* You may be asking why I hard-code the instruction opcodes and don't + * use the normal VIS assembler mnenomics for the VIS instructions. + * + * The reason is that Sun, in their infinite wisdom, decided that a binary + * using a VIS instruction will cause it to be marked (in the ELF headers) + * as doing so, and this prevents the OS from loading such binaries if the + * current cpu doesn't have VIS. There is no way to easily override this + * behavior of the assembler that I am aware of. + * + * This totally defeats what libmpeg2 is trying to do which is allow a + * single binary to be created, and then detect the availability of VIS + * at runtime. + * + * I'm not saying that tainting the binary by default is bad, rather I'm + * saying that not providing a way to override this easily unnecessarily + * ties people's hands. + * + * Thus, we do the opcode encoding by hand and output 32-bit words in + * the assembler to keep the binary from becoming tainted. + */ + +#define vis_opc_base ((0x1 << 31) | (0x36 << 19)) +#define vis_opf(X) ((X) << 5) +#define vis_sreg(X) (X) +#define vis_dreg(X) (((X)&0x1f)|((X)>>5)) +#define vis_rs1_s(X) (vis_sreg(X) << 14) +#define vis_rs1_d(X) (vis_dreg(X) << 14) +#define vis_rs2_s(X) (vis_sreg(X) << 0) +#define vis_rs2_d(X) (vis_dreg(X) << 0) +#define vis_rd_s(X) (vis_sreg(X) << 25) +#define vis_rd_d(X) (vis_dreg(X) << 25) + +#define vis_ss2s(opf,rs1,rs2,rd) \ + __asm__ __volatile__ (".word %0" \ + : : "i" (vis_opc_base | vis_opf(opf) | \ + vis_rs1_s(rs1) | \ + vis_rs2_s(rs2) | \ + vis_rd_s(rd))) + +#define vis_dd2d(opf,rs1,rs2,rd) \ + __asm__ __volatile__ (".word %0" \ + : : "i" (vis_opc_base | vis_opf(opf) | \ + vis_rs1_d(rs1) | \ + vis_rs2_d(rs2) | \ + vis_rd_d(rd))) + +#define vis_ss2d(opf,rs1,rs2,rd) \ + __asm__ __volatile__ (".word %0" \ + : : "i" (vis_opc_base | vis_opf(opf) | \ + vis_rs1_s(rs1) | \ + vis_rs2_s(rs2) | \ + vis_rd_d(rd))) + +#define vis_sd2d(opf,rs1,rs2,rd) \ + __asm__ __volatile__ (".word %0" \ + : : "i" (vis_opc_base | vis_opf(opf) | \ + vis_rs1_s(rs1) | \ + vis_rs2_d(rs2) | \ + vis_rd_d(rd))) + +#define vis_d2s(opf,rs2,rd) \ + __asm__ __volatile__ (".word %0" \ + : : "i" (vis_opc_base | vis_opf(opf) | \ + vis_rs2_d(rs2) | \ + vis_rd_s(rd))) + +#define vis_s2d(opf,rs2,rd) \ + __asm__ __volatile__ (".word %0" \ + : : "i" (vis_opc_base | vis_opf(opf) | \ + vis_rs2_s(rs2) | \ + vis_rd_d(rd))) + +#define vis_d12d(opf,rs1,rd) \ + __asm__ __volatile__ (".word %0" \ + : : "i" (vis_opc_base | vis_opf(opf) | \ + vis_rs1_d(rs1) | \ + vis_rd_d(rd))) + +#define vis_d22d(opf,rs2,rd) \ + __asm__ __volatile__ (".word %0" \ + : : "i" (vis_opc_base | vis_opf(opf) | \ + vis_rs2_d(rs2) | \ + vis_rd_d(rd))) + +#define vis_s12s(opf,rs1,rd) \ + __asm__ __volatile__ (".word %0" \ + : : "i" (vis_opc_base | vis_opf(opf) | \ + vis_rs1_s(rs1) | \ + vis_rd_s(rd))) + +#define vis_s22s(opf,rs2,rd) \ + __asm__ __volatile__ (".word %0" \ + : : "i" (vis_opc_base | vis_opf(opf) | \ + vis_rs2_s(rs2) | \ + vis_rd_s(rd))) + +#define vis_s(opf,rd) \ + __asm__ __volatile__ (".word %0" \ + : : "i" (vis_opc_base | vis_opf(opf) | \ + vis_rd_s(rd))) + +#define vis_d(opf,rd) \ + __asm__ __volatile__ (".word %0" \ + : : "i" (vis_opc_base | vis_opf(opf) | \ + vis_rd_d(rd))) + +#define vis_r2m(op,rd,mem) \ + __asm__ __volatile__ (#op "\t%%f" #rd ", [%0]" : : "r" (&(mem)) ) + +#define vis_r2m_2(op,rd,mem1,mem2) \ + __asm__ __volatile__ (#op "\t%%f" #rd ", [%0 + %1]" : : "r" (mem1), "r" (mem2) ) + +#define vis_m2r(op,mem,rd) \ + __asm__ __volatile__ (#op "\t[%0], %%f" #rd : : "r" (&(mem)) ) + +#define vis_m2r_2(op,mem1,mem2,rd) \ + __asm__ __volatile__ (#op "\t[%0 + %1], %%f" #rd : : "r" (mem1), "r" (mem2) ) + +static inline void vis_set_gsr(unsigned int _val) +{ + register unsigned int val asm("g1"); + + val = _val; + __asm__ __volatile__(".word 0xa7804000" + : : "r" (val)); +} + +#define VIS_GSR_ALIGNADDR_MASK 0x0000007 +#define VIS_GSR_ALIGNADDR_SHIFT 0 +#define VIS_GSR_SCALEFACT_MASK 0x0000078 +#define VIS_GSR_SCALEFACT_SHIFT 3 + +#define vis_ld32(mem,rs1) vis_m2r(ld, mem, rs1) +#define vis_ld32_2(mem1,mem2,rs1) vis_m2r_2(ld, mem1, mem2, rs1) +#define vis_st32(rs1,mem) vis_r2m(st, rs1, mem) +#define vis_st32_2(rs1,mem1,mem2) vis_r2m_2(st, rs1, mem1, mem2) +#define vis_ld64(mem,rs1) vis_m2r(ldd, mem, rs1) +#define vis_ld64_2(mem1,mem2,rs1) vis_m2r_2(ldd, mem1, mem2, rs1) +#define vis_st64(rs1,mem) vis_r2m(std, rs1, mem) +#define vis_st64_2(rs1,mem1,mem2) vis_r2m_2(std, rs1, mem1, mem2) + +#define vis_ldblk(mem, rd) \ +do { register void *__mem asm("g1"); \ + __mem = &(mem); \ + __asm__ __volatile__(".word 0xc1985e00 | %1" \ + : \ + : "r" (__mem), \ + "i" (vis_rd_d(rd)) \ + : "memory"); \ +} while (0) + +#define vis_stblk(rd, mem) \ +do { register void *__mem asm("g1"); \ + __mem = &(mem); \ + __asm__ __volatile__(".word 0xc1b85e00 | %1" \ + : \ + : "r" (__mem), \ + "i" (vis_rd_d(rd)) \ + : "memory"); \ +} while (0) + +#define vis_membar_storestore() \ + __asm__ __volatile__(".word 0x8143e008" : : : "memory") + +#define vis_membar_sync() \ + __asm__ __volatile__(".word 0x8143e040" : : : "memory") + +/* 16 and 32 bit partitioned addition and subtraction. The normal + * versions perform 4 16-bit or 2 32-bit additions or subtractions. + * The 's' versions perform 2 16-bit or 2 32-bit additions or + * subtractions. + */ + +#define vis_padd16(rs1,rs2,rd) vis_dd2d(0x50, rs1, rs2, rd) +#define vis_padd16s(rs1,rs2,rd) vis_ss2s(0x51, rs1, rs2, rd) +#define vis_padd32(rs1,rs2,rd) vis_dd2d(0x52, rs1, rs2, rd) +#define vis_padd32s(rs1,rs2,rd) vis_ss2s(0x53, rs1, rs2, rd) +#define vis_psub16(rs1,rs2,rd) vis_dd2d(0x54, rs1, rs2, rd) +#define vis_psub16s(rs1,rs2,rd) vis_ss2s(0x55, rs1, rs2, rd) +#define vis_psub32(rs1,rs2,rd) vis_dd2d(0x56, rs1, rs2, rd) +#define vis_psub32s(rs1,rs2,rd) vis_ss2s(0x57, rs1, rs2, rd) + +/* Pixel formatting instructions. */ + +#define vis_pack16(rs2,rd) vis_d2s( 0x3b, rs2, rd) +#define vis_pack32(rs1,rs2,rd) vis_dd2d(0x3a, rs1, rs2, rd) +#define vis_packfix(rs2,rd) vis_d2s( 0x3d, rs2, rd) +#define vis_expand(rs2,rd) vis_s2d( 0x4d, rs2, rd) +#define vis_pmerge(rs1,rs2,rd) vis_ss2d(0x4b, rs1, rs2, rd) + +/* Partitioned multiply instructions. */ + +#define vis_mul8x16(rs1,rs2,rd) vis_sd2d(0x31, rs1, rs2, rd) +#define vis_mul8x16au(rs1,rs2,rd) vis_ss2d(0x33, rs1, rs2, rd) +#define vis_mul8x16al(rs1,rs2,rd) vis_ss2d(0x35, rs1, rs2, rd) +#define vis_mul8sux16(rs1,rs2,rd) vis_dd2d(0x36, rs1, rs2, rd) +#define vis_mul8ulx16(rs1,rs2,rd) vis_dd2d(0x37, rs1, rs2, rd) +#define vis_muld8sux16(rs1,rs2,rd) vis_ss2d(0x38, rs1, rs2, rd) +#define vis_muld8ulx16(rs1,rs2,rd) vis_ss2d(0x39, rs1, rs2, rd) + +/* Alignment instructions. */ + +static inline void *vis_alignaddr(void *_ptr) +{ + register void *ptr asm("g1"); + + ptr = _ptr; + + __asm__ __volatile__(".word %2" + : "=&r" (ptr) + : "0" (ptr), + "i" (vis_opc_base | vis_opf(0x18) | + vis_rs1_s(1) | + vis_rs2_s(0) | + vis_rd_s(1))); + + return ptr; +} + +static inline void vis_alignaddr_g0(void *_ptr) +{ + register void *ptr asm("g1"); + + ptr = _ptr; + + __asm__ __volatile__(".word %2" + : "=&r" (ptr) + : "0" (ptr), + "i" (vis_opc_base | vis_opf(0x18) | + vis_rs1_s(1) | + vis_rs2_s(0) | + vis_rd_s(0))); +} + +static inline void *vis_alignaddrl(void *_ptr) +{ + register void *ptr asm("g1"); + + ptr = _ptr; + + __asm__ __volatile__(".word %2" + : "=&r" (ptr) + : "0" (ptr), + "i" (vis_opc_base | vis_opf(0x19) | + vis_rs1_s(1) | + vis_rs2_s(0) | + vis_rd_s(1))); + + return ptr; +} + +static inline void vis_alignaddrl_g0(void *_ptr) +{ + register void *ptr asm("g1"); + + ptr = _ptr; + + __asm__ __volatile__(".word %2" + : "=&r" (ptr) + : "0" (ptr), + "i" (vis_opc_base | vis_opf(0x19) | + vis_rs1_s(1) | + vis_rs2_s(0) | + vis_rd_s(0))); +} + +#define vis_faligndata(rs1,rs2,rd) vis_dd2d(0x48, rs1, rs2, rd) + +/* Logical operate instructions. */ + +#define vis_fzero(rd) vis_d( 0x60, rd) +#define vis_fzeros(rd) vis_s( 0x61, rd) +#define vis_fone(rd) vis_d( 0x7e, rd) +#define vis_fones(rd) vis_s( 0x7f, rd) +#define vis_src1(rs1,rd) vis_d12d(0x74, rs1, rd) +#define vis_src1s(rs1,rd) vis_s12s(0x75, rs1, rd) +#define vis_src2(rs2,rd) vis_d22d(0x78, rs2, rd) +#define vis_src2s(rs2,rd) vis_s22s(0x79, rs2, rd) +#define vis_not1(rs1,rd) vis_d12d(0x6a, rs1, rd) +#define vis_not1s(rs1,rd) vis_s12s(0x6b, rs1, rd) +#define vis_not2(rs2,rd) vis_d22d(0x66, rs2, rd) +#define vis_not2s(rs2,rd) vis_s22s(0x67, rs2, rd) +#define vis_or(rs1,rs2,rd) vis_dd2d(0x7c, rs1, rs2, rd) +#define vis_ors(rs1,rs2,rd) vis_ss2s(0x7d, rs1, rs2, rd) +#define vis_nor(rs1,rs2,rd) vis_dd2d(0x62, rs1, rs2, rd) +#define vis_nors(rs1,rs2,rd) vis_ss2s(0x63, rs1, rs2, rd) +#define vis_and(rs1,rs2,rd) vis_dd2d(0x70, rs1, rs2, rd) +#define vis_ands(rs1,rs2,rd) vis_ss2s(0x71, rs1, rs2, rd) +#define vis_nand(rs1,rs2,rd) vis_dd2d(0x6e, rs1, rs2, rd) +#define vis_nands(rs1,rs2,rd) vis_ss2s(0x6f, rs1, rs2, rd) +#define vis_xor(rs1,rs2,rd) vis_dd2d(0x6c, rs1, rs2, rd) +#define vis_xors(rs1,rs2,rd) vis_ss2s(0x6d, rs1, rs2, rd) +#define vis_xnor(rs1,rs2,rd) vis_dd2d(0x72, rs1, rs2, rd) +#define vis_xnors(rs1,rs2,rd) vis_ss2s(0x73, rs1, rs2, rd) +#define vis_ornot1(rs1,rs2,rd) vis_dd2d(0x7a, rs1, rs2, rd) +#define vis_ornot1s(rs1,rs2,rd) vis_ss2s(0x7b, rs1, rs2, rd) +#define vis_ornot2(rs1,rs2,rd) vis_dd2d(0x76, rs1, rs2, rd) +#define vis_ornot2s(rs1,rs2,rd) vis_ss2s(0x77, rs1, rs2, rd) +#define vis_andnot1(rs1,rs2,rd) vis_dd2d(0x68, rs1, rs2, rd) +#define vis_andnot1s(rs1,rs2,rd) vis_ss2s(0x69, rs1, rs2, rd) +#define vis_andnot2(rs1,rs2,rd) vis_dd2d(0x64, rs1, rs2, rd) +#define vis_andnot2s(rs1,rs2,rd) vis_ss2s(0x65, rs1, rs2, rd) + +/* Pixel component distance. */ + +#define vis_pdist(rs1,rs2,rd) vis_dd2d(0x3e, rs1, rs2, rd) diff --git a/src/video_dec/libmpeg2new/libmpeg2/Makefile.am b/src/video_dec/libmpeg2new/libmpeg2/Makefile.am new file mode 100644 index 000000000..3a69cd1b4 --- /dev/null +++ b/src/video_dec/libmpeg2new/libmpeg2/Makefile.am @@ -0,0 +1,23 @@ +include $(top_srcdir)/misc/Makefile.quiet +include $(top_srcdir)/misc/Makefile.common + +AM_CFLAGS = $(DEFAULT_OCFLAGS) $(VISIBILITY_FLAG) + +if ENABLE_MPEG2NEW +mpeg2new_libs = libmpeg2.la libmpeg2arch.la +endif + +noinst_LTLIBRARIES = $(mpeg2new_libs) + +libmpeg2_la_SOURCES = alloc.c header.c decode.c slice.c motion_comp.c idct.c +libmpeg2_la_LIBADD = libmpeg2arch.la + +AM_CPPFLAGS = -I$(srcdir)/../include + +libmpeg2arch_la_SOURCES = motion_comp_mmx.c idct_mmx.c \ + motion_comp_altivec.c idct_altivec.c \ + motion_comp_alpha.c idct_alpha.c \ + motion_comp_vis.c \ + cpu_accel.c cpu_state.c + +EXTRA_DIST = mpeg2_internal.h vlc.h diff --git a/src/video_dec/libmpeg2new/libmpeg2/alloc.c b/src/video_dec/libmpeg2new/libmpeg2/alloc.c new file mode 100644 index 000000000..f1a7afa1c --- /dev/null +++ b/src/video_dec/libmpeg2new/libmpeg2/alloc.c @@ -0,0 +1,70 @@ +/* + * alloc.c + * Copyright (C) 2000-2003 Michel Lespinasse <walken@zoy.org> + * Copyright (C) 1999-2000 Aaron Holtzman <aholtzma@ess.engr.uvic.ca> + * + * This file is part of mpeg2dec, a free MPEG-2 video stream decoder. + * See http://libmpeg2.sourceforge.net/ for updates. + * + * mpeg2dec is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * mpeg2dec is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +#include <stdlib.h> +#include <inttypes.h> + +#include "../include/mpeg2.h" + +static void * (* malloc_hook) (unsigned size, mpeg2_alloc_t reason) = NULL; +static int (* free_hook) (void * buf) = NULL; + +void * mpeg2_malloc (unsigned size, mpeg2_alloc_t reason) +{ + char * buf; + + if (malloc_hook) { + buf = (char *) malloc_hook (size, reason); + if (buf) + return buf; + } + + if (size) { + buf = (char *) malloc (size + 63 + sizeof (void **)); + if (buf) { + char * align_buf; + + align_buf = buf + 63 + sizeof (void **); + align_buf -= (long)align_buf & 63; + *(((void **)align_buf) - 1) = buf; + return align_buf; + } + } + return NULL; +} + +void mpeg2_free (void * buf) +{ + if (free_hook && free_hook (buf)) + return; + + if (buf) + free (*(((void **)buf) - 1)); +} + +void mpeg2_malloc_hooks (void * malloc (unsigned, mpeg2_alloc_t), + int free (void *)) +{ + malloc_hook = malloc; + free_hook = free; +} diff --git a/src/video_dec/libmpeg2new/libmpeg2/configure.incl b/src/video_dec/libmpeg2new/libmpeg2/configure.incl new file mode 100644 index 000000000..f8dbd5aef --- /dev/null +++ b/src/video_dec/libmpeg2new/libmpeg2/configure.incl @@ -0,0 +1,11 @@ +AC_SUBST([LIBMPEG2_CFLAGS]) + +dnl avoid -fPIC when possible +AC_LIBTOOL_NON_PIC([LIBMPEG2_CFLAGS="$LIBMPEG2_CFLAGS -prefer-non-pic"]) + +dnl check for cpudetect +AC_ARG_ENABLE([accel-detect], + [ --disable-accel-detect make a version without accel detection code]) +if test x"$enable_accel_detect" != x"no"; then + AC_DEFINE([ACCEL_DETECT],,[autodetect accelerations]) +fi diff --git a/src/video_dec/libmpeg2new/libmpeg2/convert_internal.h b/src/video_dec/libmpeg2new/libmpeg2/convert_internal.h new file mode 100644 index 000000000..d1e63d5e3 --- /dev/null +++ b/src/video_dec/libmpeg2new/libmpeg2/convert_internal.h @@ -0,0 +1,42 @@ +/* + * convert_internal.h + * Copyright (C) 2000-2003 Michel Lespinasse <walken@zoy.org> + * Copyright (C) 1999-2000 Aaron Holtzman <aholtzma@ess.engr.uvic.ca> + * + * This file is part of mpeg2dec, a free MPEG-2 video stream decoder. + * See http://libmpeg2.sourceforge.net/ for updates. + * + * mpeg2dec is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * mpeg2dec is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +typedef struct { + uint8_t * rgb_ptr; + int width; + int field; + int y_stride, rgb_stride, y_increm, uv_increm, rgb_increm, rgb_slice; + int chroma420, convert420; + int dither_offset, dither_stride; + int y_stride_frame, uv_stride_frame, rgb_stride_frame, rgb_stride_min; +} convert_rgb_t; + +typedef void mpeg2convert_copy_t (void * id, uint8_t * const * src, + unsigned int v_offset); + +mpeg2convert_copy_t * mpeg2convert_rgb_mmxext (int bpp, int mode, + const mpeg2_sequence_t * seq); +mpeg2convert_copy_t * mpeg2convert_rgb_mmx (int bpp, int mode, + const mpeg2_sequence_t * seq); +mpeg2convert_copy_t * mpeg2convert_rgb_vis (int bpp, int mode, + const mpeg2_sequence_t * seq); diff --git a/src/video_dec/libmpeg2new/libmpeg2/cpu_accel.c b/src/video_dec/libmpeg2new/libmpeg2/cpu_accel.c new file mode 100644 index 000000000..7846f1e88 --- /dev/null +++ b/src/video_dec/libmpeg2new/libmpeg2/cpu_accel.c @@ -0,0 +1,258 @@ +/* + * cpu_accel.c + * Copyright (C) 2000-2004 Michel Lespinasse <walken@zoy.org> + * Copyright (C) 1999-2000 Aaron Holtzman <aholtzma@ess.engr.uvic.ca> + * + * This file is part of mpeg2dec, a free MPEG-2 video stream decoder. + * See http://libmpeg2.sourceforge.net/ for updates. + * + * mpeg2dec is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * mpeg2dec is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +#include "config.h" + +#include <inttypes.h> + +#include "../include/mpeg2.h" +#include "../include/attributes.h" +#include "mpeg2_internal.h" + +#ifdef ARCH_X86 +static inline uint32_t arch_accel (uint32_t accel) +{ + if (accel & (MPEG2_ACCEL_X86_3DNOW | MPEG2_ACCEL_X86_MMXEXT)) + accel |= MPEG2_ACCEL_X86_MMX; + + if (accel & (MPEG2_ACCEL_X86_SSE2 | MPEG2_ACCEL_X86_SSE3)) + accel |= MPEG2_ACCEL_X86_MMXEXT; + + if (accel & (MPEG2_ACCEL_X86_SSE3)) + accel |= MPEG2_ACCEL_X86_SSE2; + +#ifdef ACCEL_DETECT + if (accel & MPEG2_ACCEL_DETECT) { + uint32_t eax, ebx, ecx, edx; + int AMD; + +#if !defined(PIC) && !defined(__PIC__) +#define cpuid(op,eax,ebx,ecx,edx) \ + __asm__ ("cpuid" \ + : "=a" (eax), \ + "=b" (ebx), \ + "=c" (ecx), \ + "=d" (edx) \ + : "a" (op) \ + : "cc") +#else /* PIC version : save ebx */ +#define cpuid(op,eax,ebx,ecx,edx) \ + __asm__ ("push %%ebx\n\t" \ + "cpuid\n\t" \ + "movl %%ebx,%1\n\t" \ + "pop %%ebx" \ + : "=a" (eax), \ + "=r" (ebx), \ + "=c" (ecx), \ + "=d" (edx) \ + : "a" (op) \ + : "cc") +#endif + + __asm__ ("pushf\n\t" + "pushf\n\t" + "pop %0\n\t" + "movl %0,%1\n\t" + "xorl $0x200000,%0\n\t" + "push %0\n\t" + "popf\n\t" + "pushf\n\t" + "pop %0\n\t" + "popf" + : "=r" (eax), + "=r" (ebx) + : + : "cc"); + + if (eax == ebx) /* no cpuid */ + return accel; + + cpuid (0x00000000, eax, ebx, ecx, edx); + if (!eax) /* vendor string only */ + return accel; + + AMD = (ebx == 0x68747541 && ecx == 0x444d4163 && edx == 0x69746e65); + + cpuid (0x00000001, eax, ebx, ecx, edx); + if (! (edx & 0x00800000)) /* no MMX */ + return accel; + + accel |= MPEG2_ACCEL_X86_MMX; + if (edx & 0x02000000) /* SSE - identical to AMD MMX extensions */ + accel |= MPEG2_ACCEL_X86_MMXEXT; + + if (edx & 0x04000000) /* SSE2 */ + accel |= MPEG2_ACCEL_X86_SSE2; + + if (ecx & 0x00000001) /* SSE3 */ + accel |= MPEG2_ACCEL_X86_SSE3; + + cpuid (0x80000000, eax, ebx, ecx, edx); + if (eax < 0x80000001) /* no extended capabilities */ + return accel; + + cpuid (0x80000001, eax, ebx, ecx, edx); + + if (edx & 0x80000000) + accel |= MPEG2_ACCEL_X86_3DNOW; + + if (AMD && (edx & 0x00400000)) /* AMD MMX extensions */ + accel |= MPEG2_ACCEL_X86_MMXEXT; + } +#endif /* ACCEL_DETECT */ + + return accel; +} +#endif /* ARCH_X86 */ + +#if defined(ACCEL_DETECT) && (defined(ARCH_PPC) || defined(ARCH_SPARC)) +#include <signal.h> +#include <setjmp.h> + +static sigjmp_buf jmpbuf; +static volatile sig_atomic_t canjump = 0; + +static RETSIGTYPE sigill_handler (int sig) +{ + if (!canjump) { + signal (sig, SIG_DFL); + raise (sig); + } + + canjump = 0; + siglongjmp (jmpbuf, 1); +} +#endif /* ACCEL_DETECT && (ARCH_PPC || ARCH_SPARC) */ + +#ifdef ARCH_PPC +static inline uint32_t arch_accel (uint32_t accel) +{ +#ifdef ACCEL_DETECT + if (accel & (MPEG2_ACCEL_PPC_ALTIVEC | MPEG2_ACCEL_DETECT) == + MPEG2_ACCEL_DETECT) { + static RETSIGTYPE (* oldsig) (int); + + oldsig = signal (SIGILL, sigill_handler); + if (sigsetjmp (jmpbuf, 1)) { + signal (SIGILL, oldsig); + return accel; + } + + canjump = 1; + +#ifdef HAVE_ALTIVEC_H /* gnu */ +#define VAND(a,b,c) "vand " #a "," #b "," #c "\n\t" +#else /* apple */ +#define VAND(a,b,c) "vand v" #a ",v" #b ",v" #c "\n\t" +#endif + asm volatile ("mtspr 256, %0\n\t" + VAND (0, 0, 0) + : + : "r" (-1)); + + canjump = 0; + accel |= MPEG2_ACCEL_PPC_ALTIVEC; + + signal (SIGILL, oldsig); + } +#endif /* ACCEL_DETECT */ + + return accel; +} +#endif /* ARCH_PPC */ + +#ifdef ARCH_SPARC +static inline uint32_t arch_accel (uint32_t accel) +{ + if (accel & MPEG2_ACCEL_SPARC_VIS2) + accel |= MPEG2_ACCEL_SPARC_VIS; + +#ifdef ACCEL_DETECT + if (accel & (MPEG2_ACCEL_SPARC_VIS2 | MPEG2_ACCEL_DETECT) == + MPEG2_ACCEL_DETECT) { + static RETSIGTYPE (* oldsig) (int); + + oldsig = signal (SIGILL, sigill_handler); + if (sigsetjmp (jmpbuf, 1)) { + signal (SIGILL, oldsig); + return accel; + } + + canjump = 1; + + /* pdist %f0, %f0, %f0 */ + __asm__ __volatile__(".word\t0x81b007c0"); + + canjump = 0; + accel |= MPEG2_ACCEL_SPARC_VIS; + + if (sigsetjmp (jmpbuf, 1)) { + signal (SIGILL, oldsig); + return accel; + } + + canjump = 1; + + /* edge8n %g0, %g0, %g0 */ + __asm__ __volatile__(".word\t0x81b00020"); + + canjump = 0; + accel |= MPEG2_ACCEL_SPARC_VIS2; + + signal (SIGILL, oldsig); + } +#endif /* ACCEL_DETECT */ + + return accel; +} +#endif /* ARCH_SPARC */ + +#ifdef ARCH_ALPHA +static inline uint32_t arch_accel (uint32_t accel) +{ + if (accel & MPEG2_ACCEL_ALPHA_MVI) + accel |= MPEG2_ACCEL_ALPHA; + +#ifdef ACCEL_DETECT + if (accel & MPEG2_ACCEL_DETECT) { + uint64_t no_mvi; + + asm volatile ("amask %1, %0" + : "=r" (no_mvi) + : "rI" (256)); /* AMASK_MVI */ + accel |= no_mvi ? MPEG2_ACCEL_ALPHA : (MPEG2_ACCEL_ALPHA | + MPEG2_ACCEL_ALPHA_MVI); + } +#endif /* ACCEL_DETECT */ + + return accel; +} +#endif /* ARCH_ALPHA */ + +uint32_t mpeg2_detect_accel (uint32_t accel) +{ +#if defined (ARCH_X86) || defined (ARCH_PPC) || defined (ARCH_ALPHA) || defined (ARCH_SPARC) + accel = arch_accel (accel); +#endif + return accel; +} diff --git a/src/video_dec/libmpeg2new/libmpeg2/cpu_state.c b/src/video_dec/libmpeg2new/libmpeg2/cpu_state.c new file mode 100644 index 000000000..edbf2dd28 --- /dev/null +++ b/src/video_dec/libmpeg2new/libmpeg2/cpu_state.c @@ -0,0 +1,129 @@ +/* + * cpu_state.c + * Copyright (C) 2000-2003 Michel Lespinasse <walken@zoy.org> + * Copyright (C) 1999-2000 Aaron Holtzman <aholtzma@ess.engr.uvic.ca> + * + * This file is part of mpeg2dec, a free MPEG-2 video stream decoder. + * See http://libmpeg2.sourceforge.net/ for updates. + * + * mpeg2dec is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * mpeg2dec is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +#include "config.h" + +#include <stdlib.h> +#include <inttypes.h> + +#include "../include/mpeg2.h" +#include "../include/attributes.h" +#include "mpeg2_internal.h" +#ifdef ARCH_X86 +#include "../include/mmx.h" +#endif + +void (* mpeg2_cpu_state_save) (cpu_state_t * state) = NULL; +void (* mpeg2_cpu_state_restore) (cpu_state_t * state) = NULL; + +#ifdef ARCH_X86 +static void state_restore_mmx (cpu_state_t * state) +{ + emms (); +} +#endif + +#ifdef ARCH_PPC +#ifdef HAVE_ALTIVEC_H /* gnu */ +#define LI(a,b) "li " #a "," #b "\n\t" +#define STVX0(a,b,c) "stvx " #a ",0," #c "\n\t" +#define STVX(a,b,c) "stvx " #a "," #b "," #c "\n\t" +#define LVX0(a,b,c) "lvx " #a ",0," #c "\n\t" +#define LVX(a,b,c) "lvx " #a "," #b "," #c "\n\t" +#else /* apple */ +#define LI(a,b) "li r" #a "," #b "\n\t" +#define STVX0(a,b,c) "stvx v" #a ",0,r" #c "\n\t" +#define STVX(a,b,c) "stvx v" #a ",r" #b ",r" #c "\n\t" +#define LVX0(a,b,c) "lvx v" #a ",0,r" #c "\n\t" +#define LVX(a,b,c) "lvx v" #a ",r" #b ",r" #c "\n\t" +#endif + +static void state_save_altivec (cpu_state_t * state) +{ + asm (LI (9, 16) + STVX0 (20, 0, 3) + LI (11, 32) + STVX (21, 9, 3) + LI (9, 48) + STVX (22, 11, 3) + LI (11, 64) + STVX (23, 9, 3) + LI (9, 80) + STVX (24, 11, 3) + LI (11, 96) + STVX (25, 9, 3) + LI (9, 112) + STVX (26, 11, 3) + LI (11, 128) + STVX (27, 9, 3) + LI (9, 144) + STVX (28, 11, 3) + LI (11, 160) + STVX (29, 9, 3) + LI (9, 176) + STVX (30, 11, 3) + STVX (31, 9, 3)); +} + +static void state_restore_altivec (cpu_state_t * state) +{ + asm (LI (9, 16) + LVX0 (20, 0, 3) + LI (11, 32) + LVX (21, 9, 3) + LI (9, 48) + LVX (22, 11, 3) + LI (11, 64) + LVX (23, 9, 3) + LI (9, 80) + LVX (24, 11, 3) + LI (11, 96) + LVX (25, 9, 3) + LI (9, 112) + LVX (26, 11, 3) + LI (11, 128) + LVX (27, 9, 3) + LI (9, 144) + LVX (28, 11, 3) + LI (11, 160) + LVX (29, 9, 3) + LI (9, 176) + LVX (30, 11, 3) + LVX (31, 9, 3)); +} +#endif + +void mpeg2_cpu_state_init (uint32_t accel) +{ +#ifdef ARCH_X86 + if (accel & MPEG2_ACCEL_X86_MMX) { + mpeg2_cpu_state_restore = state_restore_mmx; + } +#endif +#ifdef ARCH_PPC + if (accel & MPEG2_ACCEL_PPC_ALTIVEC) { + mpeg2_cpu_state_save = state_save_altivec; + mpeg2_cpu_state_restore = state_restore_altivec; + } +#endif +} diff --git a/src/video_dec/libmpeg2new/libmpeg2/decode.c b/src/video_dec/libmpeg2new/libmpeg2/decode.c new file mode 100644 index 000000000..337ba4466 --- /dev/null +++ b/src/video_dec/libmpeg2new/libmpeg2/decode.c @@ -0,0 +1,439 @@ +/* + * decode.c + * Copyright (C) 2000-2003 Michel Lespinasse <walken@zoy.org> + * Copyright (C) 1999-2000 Aaron Holtzman <aholtzma@ess.engr.uvic.ca> + * + * This file is part of mpeg2dec, a free MPEG-2 video stream decoder. + * See http://libmpeg2.sourceforge.net/ for updates. + * + * mpeg2dec is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * mpeg2dec is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +#include "config.h" + +#include <string.h> /* memcmp/memset, try to remove */ +#include <stdlib.h> +#include <inttypes.h> + +#include "../include/mpeg2.h" +#include "../include/attributes.h" +#include "mpeg2_internal.h" + +static int mpeg2_accels = 0; + +#define BUFFER_SIZE (1194 * 1024) + +const mpeg2_info_t * mpeg2_info (mpeg2dec_t * mpeg2dec) +{ + return &(mpeg2dec->info); +} + +static inline int skip_chunk (mpeg2dec_t * mpeg2dec, int bytes) +{ + uint8_t * current; + uint32_t shift; + uint8_t * limit; + uint8_t byte; + + if (!bytes) + return 0; + + current = mpeg2dec->buf_start; + shift = mpeg2dec->shift; + limit = current + bytes; + + do { + byte = *current++; + if (shift == 0x00000100) { + int skipped; + + mpeg2dec->shift = 0xffffff00; + skipped = current - mpeg2dec->buf_start; + mpeg2dec->buf_start = current; + return skipped; + } + shift = (shift | byte) << 8; + } while (current < limit); + + mpeg2dec->shift = shift; + mpeg2dec->buf_start = current; + return 0; +} + +static inline int copy_chunk (mpeg2dec_t * mpeg2dec, int bytes) +{ + uint8_t * current; + uint32_t shift; + uint8_t * chunk_ptr; + uint8_t * limit; + uint8_t byte; + + if (!bytes) + return 0; + + current = mpeg2dec->buf_start; + shift = mpeg2dec->shift; + chunk_ptr = mpeg2dec->chunk_ptr; + limit = current + bytes; + + do { + byte = *current++; + if (shift == 0x00000100) { + int copied; + + mpeg2dec->shift = 0xffffff00; + mpeg2dec->chunk_ptr = chunk_ptr + 1; + copied = current - mpeg2dec->buf_start; + mpeg2dec->buf_start = current; + return copied; + } + shift = (shift | byte) << 8; + *chunk_ptr++ = byte; + } while (current < limit); + + mpeg2dec->shift = shift; + mpeg2dec->buf_start = current; + return 0; +} + +void mpeg2_buffer (mpeg2dec_t * mpeg2dec, uint8_t * start, uint8_t * end) +{ + mpeg2dec->buf_start = start; + mpeg2dec->buf_end = end; +} + +int mpeg2_getpos (mpeg2dec_t * mpeg2dec) +{ + return mpeg2dec->buf_end - mpeg2dec->buf_start; +} + +static inline mpeg2_state_t seek_chunk (mpeg2dec_t * mpeg2dec) +{ + int size, skipped; + + size = mpeg2dec->buf_end - mpeg2dec->buf_start; + skipped = skip_chunk (mpeg2dec, size); + if (!skipped) { + mpeg2dec->bytes_since_tag += size; + return STATE_BUFFER; + } + mpeg2dec->bytes_since_tag += skipped; + mpeg2dec->code = mpeg2dec->buf_start[-1]; + return STATE_INTERNAL_NORETURN; +} + +mpeg2_state_t mpeg2_seek_header (mpeg2dec_t * mpeg2dec) +{ + while (!(mpeg2dec->code == 0xb3 || + ((mpeg2dec->code == 0xb7 || mpeg2dec->code == 0xb8 || + !mpeg2dec->code) && mpeg2dec->sequence.width != (unsigned)-1))) + if (seek_chunk (mpeg2dec) == STATE_BUFFER) + return STATE_BUFFER; + mpeg2dec->chunk_start = mpeg2dec->chunk_ptr = mpeg2dec->chunk_buffer; + mpeg2dec->user_data_len = 0; + return ((mpeg2dec->code == 0xb7) ? + mpeg2_header_end (mpeg2dec) : mpeg2_parse_header (mpeg2dec)); +} + +#define RECEIVED(code,state) (((state) << 8) + (code)) + +mpeg2_state_t mpeg2_parse (mpeg2dec_t * mpeg2dec) +{ + int size_buffer, size_chunk, copied; + + if (mpeg2dec->action) { + mpeg2_state_t state; + + state = mpeg2dec->action (mpeg2dec); + if ((int)state > (int)STATE_INTERNAL_NORETURN) + return state; + } + + while (1) { + while ((unsigned) (mpeg2dec->code - mpeg2dec->first_decode_slice) < + mpeg2dec->nb_decode_slices) { + size_buffer = mpeg2dec->buf_end - mpeg2dec->buf_start; + size_chunk = (mpeg2dec->chunk_buffer + BUFFER_SIZE - + mpeg2dec->chunk_ptr); + if (size_buffer <= size_chunk) { + copied = copy_chunk (mpeg2dec, size_buffer); + if (!copied) { + mpeg2dec->bytes_since_tag += size_buffer; + mpeg2dec->chunk_ptr += size_buffer; + return STATE_BUFFER; + } + } else { + copied = copy_chunk (mpeg2dec, size_chunk); + if (!copied) { + /* filled the chunk buffer without finding a start code */ + mpeg2dec->bytes_since_tag += size_chunk; + mpeg2dec->action = seek_chunk; + return STATE_INVALID; + } + } + mpeg2dec->bytes_since_tag += copied; + + mpeg2_slice (&(mpeg2dec->decoder), mpeg2dec->code, + mpeg2dec->chunk_start); + mpeg2dec->code = mpeg2dec->buf_start[-1]; + mpeg2dec->chunk_ptr = mpeg2dec->chunk_start; + } + if ((unsigned) (mpeg2dec->code - 1) >= 0xb0 - 1) + break; + if (seek_chunk (mpeg2dec) == STATE_BUFFER) + return STATE_BUFFER; + } + + mpeg2dec->action = mpeg2_seek_header; + switch (mpeg2dec->code) { + case 0x00: + return mpeg2dec->state; + case 0xb3: + case 0xb7: + case 0xb8: + return (mpeg2dec->state == STATE_SLICE) ? STATE_SLICE : STATE_INVALID; + default: + mpeg2dec->action = seek_chunk; + return STATE_INVALID; + } +} + +mpeg2_state_t mpeg2_parse_header (mpeg2dec_t * mpeg2dec) +{ + static int (* process_header[]) (mpeg2dec_t * mpeg2dec) = { + mpeg2_header_picture, mpeg2_header_extension, mpeg2_header_user_data, + mpeg2_header_sequence, NULL, NULL, NULL, NULL, mpeg2_header_gop + }; + int size_buffer, size_chunk, copied; + + mpeg2dec->action = mpeg2_parse_header; + mpeg2dec->info.user_data = NULL; mpeg2dec->info.user_data_len = 0; + while (1) { + size_buffer = mpeg2dec->buf_end - mpeg2dec->buf_start; + size_chunk = (mpeg2dec->chunk_buffer + BUFFER_SIZE - + mpeg2dec->chunk_ptr); + if (size_buffer <= size_chunk) { + copied = copy_chunk (mpeg2dec, size_buffer); + if (!copied) { + mpeg2dec->bytes_since_tag += size_buffer; + mpeg2dec->chunk_ptr += size_buffer; + return STATE_BUFFER; + } + } else { + copied = copy_chunk (mpeg2dec, size_chunk); + if (!copied) { + /* filled the chunk buffer without finding a start code */ + mpeg2dec->bytes_since_tag += size_chunk; + mpeg2dec->code = 0xb4; + mpeg2dec->action = mpeg2_seek_header; + return STATE_INVALID; + } + } + mpeg2dec->bytes_since_tag += copied; + + if (process_header[mpeg2dec->code & 0x0b] (mpeg2dec)) { + mpeg2dec->code = mpeg2dec->buf_start[-1]; + mpeg2dec->action = mpeg2_seek_header; + return STATE_INVALID; + } + + mpeg2dec->code = mpeg2dec->buf_start[-1]; + switch (RECEIVED (mpeg2dec->code, mpeg2dec->state)) { + + /* state transition after a sequence header */ + case RECEIVED (0x00, STATE_SEQUENCE): + case RECEIVED (0xb8, STATE_SEQUENCE): + mpeg2_header_sequence_finalize (mpeg2dec); + break; + + /* other legal state transitions */ + case RECEIVED (0x00, STATE_GOP): + mpeg2_header_gop_finalize (mpeg2dec); + break; + case RECEIVED (0x01, STATE_PICTURE): + case RECEIVED (0x01, STATE_PICTURE_2ND): + mpeg2_header_picture_finalize (mpeg2dec, mpeg2_accels); + mpeg2dec->action = mpeg2_header_slice_start; + break; + + /* legal headers within a given state */ + case RECEIVED (0xb2, STATE_SEQUENCE): + case RECEIVED (0xb2, STATE_GOP): + case RECEIVED (0xb2, STATE_PICTURE): + case RECEIVED (0xb2, STATE_PICTURE_2ND): + case RECEIVED (0xb5, STATE_SEQUENCE): + case RECEIVED (0xb5, STATE_PICTURE): + case RECEIVED (0xb5, STATE_PICTURE_2ND): + mpeg2dec->chunk_ptr = mpeg2dec->chunk_start; + continue; + + default: + mpeg2dec->action = mpeg2_seek_header; + return STATE_INVALID; + } + + mpeg2dec->chunk_start = mpeg2dec->chunk_ptr = mpeg2dec->chunk_buffer; + mpeg2dec->user_data_len = 0; + return mpeg2dec->state; + } +} + +int mpeg2_convert (mpeg2dec_t * mpeg2dec, mpeg2_convert_t convert, void * arg) +{ + mpeg2_convert_init_t convert_init; + int error; + + error = convert (MPEG2_CONVERT_SET, NULL, &(mpeg2dec->sequence), 0, + mpeg2_accels, arg, &convert_init); + if (!error) { + mpeg2dec->convert = convert; + mpeg2dec->convert_arg = arg; + mpeg2dec->convert_id_size = convert_init.id_size; + mpeg2dec->convert_stride = 0; + } + return error; +} + +int mpeg2_stride (mpeg2dec_t * mpeg2dec, int stride) +{ + if (!mpeg2dec->convert) { + if (stride < (int) mpeg2dec->sequence.width) + stride = mpeg2dec->sequence.width; + mpeg2dec->decoder.stride_frame = stride; + } else { + mpeg2_convert_init_t convert_init; + + stride = mpeg2dec->convert (MPEG2_CONVERT_STRIDE, NULL, + &(mpeg2dec->sequence), stride, + mpeg2_accels, mpeg2dec->convert_arg, + &convert_init); + mpeg2dec->convert_id_size = convert_init.id_size; + mpeg2dec->convert_stride = stride; + } + return stride; +} + +void mpeg2_set_buf (mpeg2dec_t * mpeg2dec, uint8_t * buf[3], void * id) +{ + mpeg2_fbuf_t * fbuf; + + if (mpeg2dec->custom_fbuf) { + if (mpeg2dec->state == STATE_SEQUENCE) { + mpeg2dec->fbuf[2] = mpeg2dec->fbuf[1]; + mpeg2dec->fbuf[1] = mpeg2dec->fbuf[0]; + } + mpeg2_set_fbuf (mpeg2dec, (mpeg2dec->decoder.coding_type == + PIC_FLAG_CODING_TYPE_B)); + fbuf = mpeg2dec->fbuf[0]; + } else { + fbuf = &(mpeg2dec->fbuf_alloc[mpeg2dec->alloc_index].fbuf); + mpeg2dec->alloc_index_user = ++mpeg2dec->alloc_index; + } + fbuf->buf[0] = buf[0]; + fbuf->buf[1] = buf[1]; + fbuf->buf[2] = buf[2]; + fbuf->id = id; +} + +void mpeg2_custom_fbuf (mpeg2dec_t * mpeg2dec, int custom_fbuf) +{ + mpeg2dec->custom_fbuf = custom_fbuf; +} + +void mpeg2_skip (mpeg2dec_t * mpeg2dec, int skip) +{ + mpeg2dec->first_decode_slice = 1; + mpeg2dec->nb_decode_slices = skip ? 0 : (0xb0 - 1); +} + +void mpeg2_slice_region (mpeg2dec_t * mpeg2dec, int start, int end) +{ + start = (start < 1) ? 1 : (start > 0xb0) ? 0xb0 : start; + end = (end < start) ? start : (end > 0xb0) ? 0xb0 : end; + mpeg2dec->first_decode_slice = start; + mpeg2dec->nb_decode_slices = end - start; +} + +void mpeg2_tag_picture (mpeg2dec_t * mpeg2dec, uint32_t tag, uint32_t tag2) +{ + mpeg2dec->tag_previous = mpeg2dec->tag_current; + mpeg2dec->tag2_previous = mpeg2dec->tag2_current; + mpeg2dec->tag_current = tag; + mpeg2dec->tag2_current = tag2; + mpeg2dec->num_tags++; + mpeg2dec->bytes_since_tag = 0; +} + +uint32_t mpeg2_accel (uint32_t accel) +{ + if (!mpeg2_accels) { + mpeg2_accels = mpeg2_detect_accel (accel) | MPEG2_ACCEL_DETECT; + mpeg2_cpu_state_init (mpeg2_accels); + mpeg2_idct_init (mpeg2_accels); + mpeg2_mc_init (mpeg2_accels); + } + return mpeg2_accels & ~MPEG2_ACCEL_DETECT; +} + +void mpeg2_reset (mpeg2dec_t * mpeg2dec, int full_reset) +{ + mpeg2dec->buf_start = mpeg2dec->buf_end = NULL; + mpeg2dec->num_tags = 0; + mpeg2dec->shift = 0xffffff00; + mpeg2dec->code = 0xb4; + mpeg2dec->action = mpeg2_seek_header; + mpeg2dec->state = STATE_INVALID; + mpeg2dec->first = 1; + + mpeg2_reset_info(&(mpeg2dec->info)); + mpeg2dec->info.gop = NULL; + mpeg2dec->info.user_data = NULL; + mpeg2dec->info.user_data_len = 0; + if (full_reset) { + mpeg2dec->info.sequence = NULL; + mpeg2_header_state_init (mpeg2dec); + } + +} + +mpeg2dec_t * mpeg2_init (void) +{ + mpeg2dec_t * mpeg2dec; + + mpeg2_accel (MPEG2_ACCEL_DETECT); + + mpeg2dec = (mpeg2dec_t *) mpeg2_malloc (sizeof (mpeg2dec_t), + MPEG2_ALLOC_MPEG2DEC); + if (mpeg2dec == NULL) + return NULL; + + memset (mpeg2dec->decoder.DCTblock, 0, 64 * sizeof (int16_t)); + memset (mpeg2dec->quantizer_matrix, 0, 4 * 64 * sizeof (uint8_t)); + + mpeg2dec->chunk_buffer = (uint8_t *) mpeg2_malloc (BUFFER_SIZE + 4, + MPEG2_ALLOC_CHUNK); + + mpeg2dec->sequence.width = (unsigned)-1; + mpeg2_reset (mpeg2dec, 1); + + return mpeg2dec; +} + +void mpeg2_close (mpeg2dec_t * mpeg2dec) +{ + mpeg2_header_state_init (mpeg2dec); + mpeg2_free (mpeg2dec->chunk_buffer); + mpeg2_free (mpeg2dec); +} diff --git a/src/video_dec/libmpeg2new/libmpeg2/header.c b/src/video_dec/libmpeg2new/libmpeg2/header.c new file mode 100644 index 000000000..935a50aa3 --- /dev/null +++ b/src/video_dec/libmpeg2new/libmpeg2/header.c @@ -0,0 +1,961 @@ +/* + * header.c + * Copyright (C) 2000-2003 Michel Lespinasse <walken@zoy.org> + * Copyright (C) 2003 Regis Duchesne <hpreg@zoy.org> + * Copyright (C) 1999-2000 Aaron Holtzman <aholtzma@ess.engr.uvic.ca> + * + * This file is part of mpeg2dec, a free MPEG-2 video stream decoder. + * See http://libmpeg2.sourceforge.net/ for updates. + * + * mpeg2dec is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * mpeg2dec is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +#include "config.h" + +#include <inttypes.h> +#include <stdlib.h> /* defines NULL */ +#include <string.h> /* memcmp */ + +#include "../include/mpeg2.h" +#include "../include/attributes.h" +#include "mpeg2_internal.h" + +#define SEQ_EXT 2 +#define SEQ_DISPLAY_EXT 4 +#define QUANT_MATRIX_EXT 8 +#define COPYRIGHT_EXT 0x10 +#define PIC_DISPLAY_EXT 0x80 +#define PIC_CODING_EXT 0x100 + +/* default intra quant matrix, in zig-zag order */ +static const uint8_t default_intra_quantizer_matrix[64] ATTR_ALIGN(16) = { + 8, + 16, 16, + 19, 16, 19, + 22, 22, 22, 22, + 22, 22, 26, 24, 26, + 27, 27, 27, 26, 26, 26, + 26, 27, 27, 27, 29, 29, 29, + 34, 34, 34, 29, 29, 29, 27, 27, + 29, 29, 32, 32, 34, 34, 37, + 38, 37, 35, 35, 34, 35, + 38, 38, 40, 40, 40, + 48, 48, 46, 46, + 56, 56, 58, + 69, 69, + 83 +}; + +uint8_t mpeg2_scan_norm[64] ATTR_ALIGN(16) = { + /* Zig-Zag scan pattern */ + 0, 1, 8, 16, 9, 2, 3, 10, 17, 24, 32, 25, 18, 11, 4, 5, + 12, 19, 26, 33, 40, 48, 41, 34, 27, 20, 13, 6, 7, 14, 21, 28, + 35, 42, 49, 56, 57, 50, 43, 36, 29, 22, 15, 23, 30, 37, 44, 51, + 58, 59, 52, 45, 38, 31, 39, 46, 53, 60, 61, 54, 47, 55, 62, 63 +}; + +uint8_t mpeg2_scan_alt[64] ATTR_ALIGN(16) = { + /* Alternate scan pattern */ + 0, 8, 16, 24, 1, 9, 2, 10, 17, 25, 32, 40, 48, 56, 57, 49, + 41, 33, 26, 18, 3, 11, 4, 12, 19, 27, 34, 42, 50, 58, 35, 43, + 51, 59, 20, 28, 5, 13, 6, 14, 21, 29, 36, 44, 52, 60, 37, 45, + 53, 61, 22, 30, 7, 15, 23, 31, 38, 46, 54, 62, 39, 47, 55, 63 +}; + +void mpeg2_header_state_init (mpeg2dec_t * mpeg2dec) +{ + if (mpeg2dec->sequence.width != (unsigned)-1) { + int i; + + mpeg2dec->sequence.width = (unsigned)-1; + if (!mpeg2dec->custom_fbuf) + for (i = mpeg2dec->alloc_index_user; + i < mpeg2dec->alloc_index; i++) { + mpeg2_free (mpeg2dec->fbuf_alloc[i].fbuf.buf[0]); + mpeg2_free (mpeg2dec->fbuf_alloc[i].fbuf.buf[1]); + mpeg2_free (mpeg2dec->fbuf_alloc[i].fbuf.buf[2]); + } + if (mpeg2dec->convert_start) + for (i = 0; i < 3; i++) { + mpeg2_free (mpeg2dec->yuv_buf[i][0]); + mpeg2_free (mpeg2dec->yuv_buf[i][1]); + mpeg2_free (mpeg2dec->yuv_buf[i][2]); + } + if (mpeg2dec->decoder.convert_id) + mpeg2_free (mpeg2dec->decoder.convert_id); + } + mpeg2dec->decoder.coding_type = I_TYPE; + mpeg2dec->decoder.convert = NULL; + mpeg2dec->decoder.convert_id = NULL; + mpeg2dec->picture = mpeg2dec->pictures; + mpeg2dec->fbuf[0] = &mpeg2dec->fbuf_alloc[0].fbuf; + mpeg2dec->fbuf[1] = &mpeg2dec->fbuf_alloc[1].fbuf; + mpeg2dec->fbuf[2] = &mpeg2dec->fbuf_alloc[2].fbuf; + mpeg2dec->first = 1; + mpeg2dec->alloc_index = 0; + mpeg2dec->alloc_index_user = 0; + mpeg2dec->first_decode_slice = 1; + mpeg2dec->nb_decode_slices = 0xb0 - 1; + mpeg2dec->convert = NULL; + mpeg2dec->convert_start = NULL; + mpeg2dec->custom_fbuf = 0; + mpeg2dec->yuv_index = 0; +} + +void mpeg2_reset_info (mpeg2_info_t * info) +{ + info->current_picture = info->current_picture_2nd = NULL; + info->display_picture = info->display_picture_2nd = NULL; + info->current_fbuf = info->display_fbuf = info->discard_fbuf = NULL; +} + +static void info_user_data (mpeg2dec_t * mpeg2dec) +{ + if (mpeg2dec->user_data_len) { + mpeg2dec->info.user_data = mpeg2dec->chunk_buffer; + mpeg2dec->info.user_data_len = mpeg2dec->user_data_len - 3; + } +} + +int mpeg2_header_sequence (mpeg2dec_t * mpeg2dec) +{ + uint8_t * buffer = mpeg2dec->chunk_start; + mpeg2_sequence_t * sequence = &(mpeg2dec->new_sequence); + static unsigned int frame_period[16] = { + 0, 1126125, 1125000, 1080000, 900900, 900000, 540000, 450450, 450000, + /* unofficial: xing 15 fps */ + 1800000, + /* unofficial: libmpeg3 "Unofficial economy rates" 5/10/12/15 fps */ + 5400000, 2700000, 2250000, 1800000, 0, 0 + }; + int i; + + if ((buffer[6] & 0x20) != 0x20) /* missing marker_bit */ + return 1; + + i = (buffer[0] << 16) | (buffer[1] << 8) | buffer[2]; + if (! (sequence->display_width = sequence->picture_width = i >> 12)) + return 1; + if (! (sequence->display_height = sequence->picture_height = i & 0xfff)) + return 1; + sequence->width = (sequence->picture_width + 15) & ~15; + sequence->height = (sequence->picture_height + 15) & ~15; + sequence->chroma_width = sequence->width >> 1; + sequence->chroma_height = sequence->height >> 1; + + sequence->flags = (SEQ_FLAG_PROGRESSIVE_SEQUENCE | + SEQ_VIDEO_FORMAT_UNSPECIFIED); + + sequence->pixel_width = buffer[3] >> 4; /* aspect ratio */ + sequence->frame_period = frame_period[buffer[3] & 15]; + + sequence->byte_rate = (buffer[4]<<10) | (buffer[5]<<2) | (buffer[6]>>6); + + sequence->vbv_buffer_size = ((buffer[6]<<16)|(buffer[7]<<8))&0x1ff800; + + if (buffer[7] & 4) + sequence->flags |= SEQ_FLAG_CONSTRAINED_PARAMETERS; + + mpeg2dec->copy_matrix = 3; + if (buffer[7] & 2) { + for (i = 0; i < 64; i++) + mpeg2dec->new_quantizer_matrix[0][mpeg2_scan_norm[i]] = + (buffer[i+7] << 7) | (buffer[i+8] >> 1); + buffer += 64; + } else + for (i = 0; i < 64; i++) + mpeg2dec->new_quantizer_matrix[0][mpeg2_scan_norm[i]] = + default_intra_quantizer_matrix[i]; + + if (buffer[7] & 1) + for (i = 0; i < 64; i++) + mpeg2dec->new_quantizer_matrix[1][mpeg2_scan_norm[i]] = + buffer[i+8]; + else + memset (mpeg2dec->new_quantizer_matrix[1], 16, 64); + + sequence->profile_level_id = 0x80; + sequence->colour_primaries = 0; + sequence->transfer_characteristics = 0; + sequence->matrix_coefficients = 0; + + mpeg2dec->ext_state = SEQ_EXT; + mpeg2dec->state = STATE_SEQUENCE; + mpeg2dec->display_offset_x = mpeg2dec->display_offset_y = 0; + + return 0; +} + +static int sequence_ext (mpeg2dec_t * mpeg2dec) +{ + uint8_t * buffer = mpeg2dec->chunk_start; + mpeg2_sequence_t * sequence = &(mpeg2dec->new_sequence); + uint32_t flags; + + if (!(buffer[3] & 1)) + return 1; + + sequence->profile_level_id = (buffer[0] << 4) | (buffer[1] >> 4); + + sequence->display_width = sequence->picture_width += + ((buffer[1] << 13) | (buffer[2] << 5)) & 0x3000; + sequence->display_height = sequence->picture_height += + (buffer[2] << 7) & 0x3000; + sequence->width = (sequence->picture_width + 15) & ~15; + sequence->height = (sequence->picture_height + 15) & ~15; + flags = sequence->flags | SEQ_FLAG_MPEG2; + if (!(buffer[1] & 8)) { + flags &= ~SEQ_FLAG_PROGRESSIVE_SEQUENCE; + sequence->height = (sequence->height + 31) & ~31; + } + if (buffer[5] & 0x80) + flags |= SEQ_FLAG_LOW_DELAY; + sequence->flags = flags; + sequence->chroma_width = sequence->width; + sequence->chroma_height = sequence->height; + switch (buffer[1] & 6) { + case 0: /* invalid */ + return 1; + case 2: /* 4:2:0 */ + sequence->chroma_height >>= 1; + case 4: /* 4:2:2 */ + sequence->chroma_width >>= 1; + } + + sequence->byte_rate += ((buffer[2]<<25) | (buffer[3]<<17)) & 0x3ffc0000; + + sequence->vbv_buffer_size |= buffer[4] << 21; + + sequence->frame_period = + sequence->frame_period * ((buffer[5]&31)+1) / (((buffer[5]>>2)&3)+1); + + mpeg2dec->ext_state = SEQ_DISPLAY_EXT; + + return 0; +} + +static int sequence_display_ext (mpeg2dec_t * mpeg2dec) +{ + uint8_t * buffer = mpeg2dec->chunk_start; + mpeg2_sequence_t * sequence = &(mpeg2dec->new_sequence); + + sequence->flags = ((sequence->flags & ~SEQ_MASK_VIDEO_FORMAT) | + ((buffer[0]<<4) & SEQ_MASK_VIDEO_FORMAT)); + if (buffer[0] & 1) { + sequence->flags |= SEQ_FLAG_COLOUR_DESCRIPTION; + sequence->colour_primaries = buffer[1]; + sequence->transfer_characteristics = buffer[2]; + sequence->matrix_coefficients = buffer[3]; + buffer += 3; + } + + if (!(buffer[2] & 2)) /* missing marker_bit */ + return 1; + + sequence->display_width = (buffer[1] << 6) | (buffer[2] >> 2); + sequence->display_height = + ((buffer[2]& 1 ) << 13) | (buffer[3] << 5) | (buffer[4] >> 3); + + return 0; +} + +static inline void simplify (unsigned int * u, unsigned int * v) +{ + unsigned int a, b, tmp; + + a = *u; b = *v; + while (a) { /* find greatest common divisor */ + tmp = a; a = b % tmp; b = tmp; + } + *u /= b; *v /= b; +} + +static inline void finalize_sequence (mpeg2_sequence_t * sequence) +{ + int width; + int height; + + sequence->byte_rate *= 50; + + if (sequence->flags & SEQ_FLAG_MPEG2) { + switch (sequence->pixel_width) { + case 1: /* square pixels */ + sequence->pixel_width = sequence->pixel_height = 1; return; + case 2: /* 4:3 aspect ratio */ + width = 4; height = 3; break; + case 3: /* 16:9 aspect ratio */ + width = 16; height = 9; break; + case 4: /* 2.21:1 aspect ratio */ + width = 221; height = 100; break; + default: /* illegal */ + sequence->pixel_width = sequence->pixel_height = 0; return; + } + width *= sequence->display_height; + height *= sequence->display_width; + + } else { + if (sequence->byte_rate == 50 * 0x3ffff) + sequence->byte_rate = 0; /* mpeg-1 VBR */ + + switch (sequence->pixel_width) { + case 0: case 15: /* illegal */ + sequence->pixel_width = sequence->pixel_height = 0; return; + case 1: /* square pixels */ + sequence->pixel_width = sequence->pixel_height = 1; return; + case 3: /* 720x576 16:9 */ + sequence->pixel_width = 64; sequence->pixel_height = 45; return; + case 6: /* 720x480 16:9 */ + sequence->pixel_width = 32; sequence->pixel_height = 27; return; + case 8: /* BT.601 625 lines 4:3 */ + sequence->pixel_width = 59; sequence->pixel_height = 54; return; + case 12: /* BT.601 525 lines 4:3 */ + sequence->pixel_width = 10; sequence->pixel_height = 11; return; + default: + height = 88 * sequence->pixel_width + 1171; + width = 2000; + } + } + + sequence->pixel_width = width; + sequence->pixel_height = height; + simplify (&sequence->pixel_width, &sequence->pixel_height); +} + +int mpeg2_guess_aspect (const mpeg2_sequence_t * sequence, + unsigned int * pixel_width, + unsigned int * pixel_height) +{ + static struct { + unsigned int width, height; + } video_modes[] = { + {720, 576}, /* 625 lines, 13.5 MHz (D1, DV, DVB, DVD) */ + {704, 576}, /* 625 lines, 13.5 MHz (1/1 D1, DVB, DVD, 4CIF) */ + {544, 576}, /* 625 lines, 10.125 MHz (DVB, laserdisc) */ + {528, 576}, /* 625 lines, 10.125 MHz (3/4 D1, DVB, laserdisc) */ + {480, 576}, /* 625 lines, 9 MHz (2/3 D1, DVB, SVCD) */ + {352, 576}, /* 625 lines, 6.75 MHz (D2, 1/2 D1, CVD, DVB, DVD) */ + {352, 288}, /* 625 lines, 6.75 MHz, 1 field (D4, VCD, DVB, DVD, CIF) */ + {176, 144}, /* 625 lines, 3.375 MHz, half field (QCIF) */ + {720, 486}, /* 525 lines, 13.5 MHz (D1) */ + {704, 486}, /* 525 lines, 13.5 MHz */ + {720, 480}, /* 525 lines, 13.5 MHz (DV, DSS, DVD) */ + {704, 480}, /* 525 lines, 13.5 MHz (1/1 D1, ATSC, DVD) */ + {544, 480}, /* 525 lines. 10.125 MHz (DSS, laserdisc) */ + {528, 480}, /* 525 lines. 10.125 MHz (3/4 D1, laserdisc) */ + {480, 480}, /* 525 lines, 9 MHz (2/3 D1, SVCD) */ + {352, 480}, /* 525 lines, 6.75 MHz (D2, 1/2 D1, CVD, DVD) */ + {352, 240} /* 525 lines. 6.75 MHz, 1 field (D4, VCD, DSS, DVD) */ + }; + unsigned int width, height, pix_width, pix_height, i, DAR_16_9; + + *pixel_width = sequence->pixel_width; + *pixel_height = sequence->pixel_height; + width = sequence->picture_width; + height = sequence->picture_height; + for (i = 0; i < sizeof (video_modes) / sizeof (video_modes[0]); i++) + if (width == video_modes[i].width && height == video_modes[i].height) + break; + if (i == sizeof (video_modes) / sizeof (video_modes[0]) || + (sequence->pixel_width == 1 && sequence->pixel_height == 1) || + width != sequence->display_width || height != sequence->display_height) + return 0; + + for (pix_height = 1; height * pix_height < 480; pix_height <<= 1); + height *= pix_height; + for (pix_width = 1; width * pix_width <= 352; pix_width <<= 1); + width *= pix_width; + + if (! (sequence->flags & SEQ_FLAG_MPEG2)) { + static unsigned int mpeg1_check[2][2] = {{11, 54}, {27, 45}}; + DAR_16_9 = (sequence->pixel_height == 27 || + sequence->pixel_height == 45); + if (width < 704 || + sequence->pixel_height != mpeg1_check[DAR_16_9][height == 576]) + return 0; + } else { + DAR_16_9 = (3 * sequence->picture_width * sequence->pixel_width > + 4 * sequence->picture_height * sequence->pixel_height); + switch (width) { + case 528: case 544: pix_width *= 4; pix_height *= 3; break; + case 480: pix_width *= 3; pix_height *= 2; break; + } + } + if (DAR_16_9) { + pix_width *= 4; pix_height *= 3; + } + if (height == 576) { + pix_width *= 59; pix_height *= 54; + } else { + pix_width *= 10; pix_height *= 11; + } + *pixel_width = pix_width; + *pixel_height = pix_height; + simplify (pixel_width, pixel_height); + return (height == 576) ? 1 : 2; +} + +static void copy_matrix (mpeg2dec_t * mpeg2dec, int index) +{ + if (memcmp (mpeg2dec->quantizer_matrix[index], + mpeg2dec->new_quantizer_matrix[index], 64)) { + memcpy (mpeg2dec->quantizer_matrix[index], + mpeg2dec->new_quantizer_matrix[index], 64); + mpeg2dec->scaled[index] = -1; + } +} + +static void finalize_matrix (mpeg2dec_t * mpeg2dec) +{ + mpeg2_decoder_t * decoder = &(mpeg2dec->decoder); + int i; + + for (i = 0; i < 2; i++) { + if (mpeg2dec->copy_matrix & (1 << i)) + copy_matrix (mpeg2dec, i); + if ((mpeg2dec->copy_matrix & (4 << i)) && + memcmp (mpeg2dec->quantizer_matrix[i], + mpeg2dec->new_quantizer_matrix[i+2], 64)) { + copy_matrix (mpeg2dec, i + 2); + decoder->chroma_quantizer[i] = decoder->quantizer_prescale[i+2]; + } else if (mpeg2dec->copy_matrix & (5 << i)) + decoder->chroma_quantizer[i] = decoder->quantizer_prescale[i]; + } +} + +static mpeg2_state_t invalid_end_action (mpeg2dec_t * mpeg2dec) +{ + mpeg2_reset_info (&(mpeg2dec->info)); + mpeg2dec->info.gop = NULL; + info_user_data (mpeg2dec); + mpeg2_header_state_init (mpeg2dec); + mpeg2dec->sequence = mpeg2dec->new_sequence; + mpeg2dec->action = mpeg2_seek_header; + mpeg2dec->state = STATE_SEQUENCE; + return STATE_SEQUENCE; +} + +void mpeg2_header_sequence_finalize (mpeg2dec_t * mpeg2dec) +{ + mpeg2_sequence_t * sequence = &(mpeg2dec->new_sequence); + mpeg2_decoder_t * decoder = &(mpeg2dec->decoder); + + finalize_sequence (sequence); + finalize_matrix (mpeg2dec); + + decoder->mpeg1 = !(sequence->flags & SEQ_FLAG_MPEG2); + decoder->width = sequence->width; + decoder->height = sequence->height; + decoder->vertical_position_extension = (sequence->picture_height > 2800); + decoder->chroma_format = ((sequence->chroma_width == sequence->width) + + (sequence->chroma_height == sequence->height)); + + if (mpeg2dec->sequence.width != (unsigned)-1) { + /* + * According to 6.1.1.6, repeat sequence headers should be + * identical to the original. However some encoders dont + * respect that and change various fields (including bitrate + * and aspect ratio) in the repeat sequence headers. So we + * choose to be as conservative as possible and only restart + * the decoder if the width, height, chroma_width, + * chroma_height or low_delay flag are modified. + */ + if (sequence->width != mpeg2dec->sequence.width || + sequence->height != mpeg2dec->sequence.height || + sequence->chroma_width != mpeg2dec->sequence.chroma_width || + sequence->chroma_height != mpeg2dec->sequence.chroma_height || + ((sequence->flags ^ mpeg2dec->sequence.flags) & + SEQ_FLAG_LOW_DELAY)) { + decoder->stride_frame = sequence->width; + mpeg2_header_end (mpeg2dec); + mpeg2dec->action = invalid_end_action; + mpeg2dec->state = STATE_INVALID_END; + return; + } + mpeg2dec->state = (memcmp (&(mpeg2dec->sequence), sequence, + sizeof (mpeg2_sequence_t)) ? + STATE_SEQUENCE_MODIFIED : STATE_SEQUENCE_REPEATED); + } else + decoder->stride_frame = sequence->width; + mpeg2dec->sequence = *sequence; + mpeg2_reset_info (&(mpeg2dec->info)); + mpeg2dec->info.sequence = &(mpeg2dec->sequence); + mpeg2dec->info.gop = NULL; + info_user_data (mpeg2dec); +} + +int mpeg2_header_gop (mpeg2dec_t * mpeg2dec) +{ + uint8_t * buffer = mpeg2dec->chunk_start; + mpeg2_gop_t * gop = &(mpeg2dec->new_gop); + + if (! (buffer[1] & 8)) + return 1; + gop->hours = (buffer[0] >> 2) & 31; + gop->minutes = ((buffer[0] << 4) | (buffer[1] >> 4)) & 63; + gop->seconds = ((buffer[1] << 3) | (buffer[2] >> 5)) & 63; + gop->pictures = ((buffer[2] << 1) | (buffer[3] >> 7)) & 63; + gop->flags = (buffer[0] >> 7) | ((buffer[3] >> 4) & 6); + mpeg2dec->state = STATE_GOP; + return 0; +} + +void mpeg2_header_gop_finalize (mpeg2dec_t * mpeg2dec) +{ + mpeg2dec->gop = mpeg2dec->new_gop; + mpeg2_reset_info (&(mpeg2dec->info)); + mpeg2dec->info.gop = &(mpeg2dec->gop); + info_user_data (mpeg2dec); +} + +void mpeg2_set_fbuf (mpeg2dec_t * mpeg2dec, int b_type) +{ + int i; + + for (i = 0; i < 3; i++) + if (mpeg2dec->fbuf[1] != &mpeg2dec->fbuf_alloc[i].fbuf && + mpeg2dec->fbuf[2] != &mpeg2dec->fbuf_alloc[i].fbuf) { + mpeg2dec->fbuf[0] = &mpeg2dec->fbuf_alloc[i].fbuf; + mpeg2dec->info.current_fbuf = mpeg2dec->fbuf[0]; + if (b_type || (mpeg2dec->sequence.flags & SEQ_FLAG_LOW_DELAY)) { + if (b_type || mpeg2dec->convert) + mpeg2dec->info.discard_fbuf = mpeg2dec->fbuf[0]; + mpeg2dec->info.display_fbuf = mpeg2dec->fbuf[0]; + } + break; + } +} + +int mpeg2_header_picture (mpeg2dec_t * mpeg2dec) +{ + uint8_t * buffer = mpeg2dec->chunk_start; + mpeg2_picture_t * picture = &(mpeg2dec->new_picture); + mpeg2_decoder_t * decoder = &(mpeg2dec->decoder); + int type; + + mpeg2dec->state = ((mpeg2dec->state != STATE_SLICE_1ST) ? + STATE_PICTURE : STATE_PICTURE_2ND); + mpeg2dec->ext_state = PIC_CODING_EXT; + + picture->temporal_reference = (buffer[0] << 2) | (buffer[1] >> 6); + + type = (buffer [1] >> 3) & 7; + if (type == PIC_FLAG_CODING_TYPE_P || type == PIC_FLAG_CODING_TYPE_B) { + /* forward_f_code and backward_f_code - used in mpeg1 only */ + decoder->f_motion.f_code[1] = (buffer[3] >> 2) & 1; + decoder->f_motion.f_code[0] = + (((buffer[3] << 1) | (buffer[4] >> 7)) & 7) - 1; + decoder->b_motion.f_code[1] = (buffer[4] >> 6) & 1; + decoder->b_motion.f_code[0] = ((buffer[4] >> 3) & 7) - 1; + } + + picture->flags = PIC_FLAG_PROGRESSIVE_FRAME | type; + picture->tag = picture->tag2 = 0; + if (mpeg2dec->num_tags) { + if (mpeg2dec->bytes_since_tag >= mpeg2dec->chunk_ptr - buffer + 4) { + mpeg2dec->num_tags = 0; + picture->tag = mpeg2dec->tag_current; + picture->tag2 = mpeg2dec->tag2_current; + picture->flags |= PIC_FLAG_TAGS; + } else if (mpeg2dec->num_tags > 1) { + mpeg2dec->num_tags = 1; + picture->tag = mpeg2dec->tag_previous; + picture->tag2 = mpeg2dec->tag2_previous; + picture->flags |= PIC_FLAG_TAGS; + } + } + picture->nb_fields = 2; + picture->display_offset[0].x = picture->display_offset[1].x = + picture->display_offset[2].x = mpeg2dec->display_offset_x; + picture->display_offset[0].y = picture->display_offset[1].y = + picture->display_offset[2].y = mpeg2dec->display_offset_y; + + /* XXXXXX decode extra_information_picture as well */ + + mpeg2dec->q_scale_type = 0; + decoder->intra_dc_precision = 7; + decoder->frame_pred_frame_dct = 1; + decoder->concealment_motion_vectors = 0; + decoder->scan = mpeg2_scan_norm; + decoder->picture_structure = FRAME_PICTURE; + mpeg2dec->copy_matrix = 0; + + return 0; +} + +static int picture_coding_ext (mpeg2dec_t * mpeg2dec) +{ + uint8_t * buffer = mpeg2dec->chunk_start; + mpeg2_picture_t * picture = &(mpeg2dec->new_picture); + mpeg2_decoder_t * decoder = &(mpeg2dec->decoder); + uint32_t flags; + + /* pre subtract 1 for use later in compute_motion_vector */ + decoder->f_motion.f_code[0] = (buffer[0] & 15) - 1; + decoder->f_motion.f_code[1] = (buffer[1] >> 4) - 1; + decoder->b_motion.f_code[0] = (buffer[1] & 15) - 1; + decoder->b_motion.f_code[1] = (buffer[2] >> 4) - 1; + + flags = picture->flags; + decoder->intra_dc_precision = 7 - ((buffer[2] >> 2) & 3); + decoder->picture_structure = buffer[2] & 3; + switch (decoder->picture_structure) { + case TOP_FIELD: + flags |= PIC_FLAG_TOP_FIELD_FIRST; + case BOTTOM_FIELD: + picture->nb_fields = 1; + break; + case FRAME_PICTURE: + if (!(mpeg2dec->sequence.flags & SEQ_FLAG_PROGRESSIVE_SEQUENCE)) { + picture->nb_fields = (buffer[3] & 2) ? 3 : 2; + flags |= (buffer[3] & 128) ? PIC_FLAG_TOP_FIELD_FIRST : 0; + } else + picture->nb_fields = (buffer[3]&2) ? ((buffer[3]&128) ? 6 : 4) : 2; + break; + default: + return 1; + } + decoder->top_field_first = buffer[3] >> 7; + decoder->frame_pred_frame_dct = (buffer[3] >> 6) & 1; + decoder->concealment_motion_vectors = (buffer[3] >> 5) & 1; + mpeg2dec->q_scale_type = buffer[3] & 16; + decoder->intra_vlc_format = (buffer[3] >> 3) & 1; + decoder->scan = (buffer[3] & 4) ? mpeg2_scan_alt : mpeg2_scan_norm; + if (!(buffer[4] & 0x80)) + flags &= ~PIC_FLAG_PROGRESSIVE_FRAME; + if (buffer[4] & 0x40) + flags |= (((buffer[4]<<26) | (buffer[5]<<18) | (buffer[6]<<10)) & + PIC_MASK_COMPOSITE_DISPLAY) | PIC_FLAG_COMPOSITE_DISPLAY; + picture->flags = flags; + + mpeg2dec->ext_state = PIC_DISPLAY_EXT | COPYRIGHT_EXT | QUANT_MATRIX_EXT; + + return 0; +} + +static int picture_display_ext (mpeg2dec_t * mpeg2dec) +{ + uint8_t * buffer = mpeg2dec->chunk_start; + mpeg2_picture_t * picture = &(mpeg2dec->new_picture); + int i, nb_pos; + + nb_pos = picture->nb_fields; + if (mpeg2dec->sequence.flags & SEQ_FLAG_PROGRESSIVE_SEQUENCE) + nb_pos >>= 1; + + for (i = 0; i < nb_pos; i++) { + int x, y; + + x = ((buffer[4*i] << 24) | (buffer[4*i+1] << 16) | + (buffer[4*i+2] << 8) | buffer[4*i+3]) >> (11-2*i); + y = ((buffer[4*i+2] << 24) | (buffer[4*i+3] << 16) | + (buffer[4*i+4] << 8) | buffer[4*i+5]) >> (10-2*i); + if (! (x & y & 1)) + return 1; + picture->display_offset[i].x = mpeg2dec->display_offset_x = x >> 1; + picture->display_offset[i].y = mpeg2dec->display_offset_y = y >> 1; + } + for (; i < 3; i++) { + picture->display_offset[i].x = mpeg2dec->display_offset_x; + picture->display_offset[i].y = mpeg2dec->display_offset_y; + } + return 0; +} + +void mpeg2_header_picture_finalize (mpeg2dec_t * mpeg2dec, uint32_t accels) +{ + mpeg2_decoder_t * decoder = &(mpeg2dec->decoder); + int old_type_b = (decoder->coding_type == B_TYPE); + int low_delay = mpeg2dec->sequence.flags & SEQ_FLAG_LOW_DELAY; + + finalize_matrix (mpeg2dec); + decoder->coding_type = mpeg2dec->new_picture.flags & PIC_MASK_CODING_TYPE; + + if (mpeg2dec->state == STATE_PICTURE) { + mpeg2_picture_t * picture; + mpeg2_picture_t * other; + + decoder->second_field = 0; + + picture = other = mpeg2dec->pictures; + if (old_type_b ^ (mpeg2dec->picture < mpeg2dec->pictures + 2)) + picture += 2; + else + other += 2; + mpeg2dec->picture = picture; + *picture = mpeg2dec->new_picture; + + if (!old_type_b) { + mpeg2dec->fbuf[2] = mpeg2dec->fbuf[1]; + mpeg2dec->fbuf[1] = mpeg2dec->fbuf[0]; + } + mpeg2dec->fbuf[0] = NULL; + mpeg2_reset_info (&(mpeg2dec->info)); + mpeg2dec->info.current_picture = picture; + mpeg2dec->info.display_picture = picture; + if (decoder->coding_type != B_TYPE) { + if (!low_delay) { + if (mpeg2dec->first) { + mpeg2dec->info.display_picture = NULL; + mpeg2dec->first = 0; + } else { + mpeg2dec->info.display_picture = other; + if (other->nb_fields == 1) + mpeg2dec->info.display_picture_2nd = other + 1; + mpeg2dec->info.display_fbuf = mpeg2dec->fbuf[1]; + } + } + if (!low_delay + !mpeg2dec->convert) + mpeg2dec->info.discard_fbuf = + mpeg2dec->fbuf[!low_delay + !mpeg2dec->convert]; + } + if (mpeg2dec->convert) { + mpeg2_convert_init_t convert_init; + if (!mpeg2dec->convert_start) { + int y_size, uv_size; + + mpeg2dec->decoder.convert_id = + mpeg2_malloc (mpeg2dec->convert_id_size, + MPEG2_ALLOC_CONVERT_ID); + mpeg2dec->convert (MPEG2_CONVERT_START, + mpeg2dec->decoder.convert_id, + &(mpeg2dec->sequence), + mpeg2dec->convert_stride, accels, + mpeg2dec->convert_arg, &convert_init); + mpeg2dec->convert_start = convert_init.start; + mpeg2dec->decoder.convert = convert_init.copy; + + y_size = decoder->stride_frame * mpeg2dec->sequence.height; + uv_size = y_size >> (2 - mpeg2dec->decoder.chroma_format); + mpeg2dec->yuv_buf[0][0] = + (uint8_t *) mpeg2_malloc (y_size, MPEG2_ALLOC_YUV); + mpeg2dec->yuv_buf[0][1] = + (uint8_t *) mpeg2_malloc (uv_size, MPEG2_ALLOC_YUV); + mpeg2dec->yuv_buf[0][2] = + (uint8_t *) mpeg2_malloc (uv_size, MPEG2_ALLOC_YUV); + mpeg2dec->yuv_buf[1][0] = + (uint8_t *) mpeg2_malloc (y_size, MPEG2_ALLOC_YUV); + mpeg2dec->yuv_buf[1][1] = + (uint8_t *) mpeg2_malloc (uv_size, MPEG2_ALLOC_YUV); + mpeg2dec->yuv_buf[1][2] = + (uint8_t *) mpeg2_malloc (uv_size, MPEG2_ALLOC_YUV); + y_size = decoder->stride_frame * 32; + uv_size = y_size >> (2 - mpeg2dec->decoder.chroma_format); + mpeg2dec->yuv_buf[2][0] = + (uint8_t *) mpeg2_malloc (y_size, MPEG2_ALLOC_YUV); + mpeg2dec->yuv_buf[2][1] = + (uint8_t *) mpeg2_malloc (uv_size, MPEG2_ALLOC_YUV); + mpeg2dec->yuv_buf[2][2] = + (uint8_t *) mpeg2_malloc (uv_size, MPEG2_ALLOC_YUV); + } + if (!mpeg2dec->custom_fbuf) { + while (mpeg2dec->alloc_index < 3) { + mpeg2_fbuf_t * fbuf; + + fbuf = &mpeg2dec->fbuf_alloc[mpeg2dec->alloc_index++].fbuf; + fbuf->id = NULL; + fbuf->buf[0] = + (uint8_t *) mpeg2_malloc (convert_init.buf_size[0], + MPEG2_ALLOC_CONVERTED); + fbuf->buf[1] = + (uint8_t *) mpeg2_malloc (convert_init.buf_size[1], + MPEG2_ALLOC_CONVERTED); + fbuf->buf[2] = + (uint8_t *) mpeg2_malloc (convert_init.buf_size[2], + MPEG2_ALLOC_CONVERTED); + } + mpeg2_set_fbuf (mpeg2dec, (decoder->coding_type == B_TYPE)); + } + } else if (!mpeg2dec->custom_fbuf) { + while (mpeg2dec->alloc_index < 3) { + mpeg2_fbuf_t * fbuf; + int y_size, uv_size; + + fbuf = &(mpeg2dec->fbuf_alloc[mpeg2dec->alloc_index++].fbuf); + fbuf->id = NULL; + y_size = decoder->stride_frame * mpeg2dec->sequence.height; + uv_size = y_size >> (2 - decoder->chroma_format); + fbuf->buf[0] = (uint8_t *) mpeg2_malloc (y_size, + MPEG2_ALLOC_YUV); + fbuf->buf[1] = (uint8_t *) mpeg2_malloc (uv_size, + MPEG2_ALLOC_YUV); + fbuf->buf[2] = (uint8_t *) mpeg2_malloc (uv_size, + MPEG2_ALLOC_YUV); + } + mpeg2_set_fbuf (mpeg2dec, (decoder->coding_type == B_TYPE)); + } + } else { + decoder->second_field = 1; + mpeg2dec->picture++; /* second field picture */ + *(mpeg2dec->picture) = mpeg2dec->new_picture; + mpeg2dec->info.current_picture_2nd = mpeg2dec->picture; + if (low_delay || decoder->coding_type == B_TYPE) + mpeg2dec->info.display_picture_2nd = mpeg2dec->picture; + } + + info_user_data (mpeg2dec); +} + +static int copyright_ext (mpeg2dec_t * mpeg2dec) +{ + return 0; +} + +static int quant_matrix_ext (mpeg2dec_t * mpeg2dec) +{ + uint8_t * buffer = mpeg2dec->chunk_start; + int i, j; + + for (i = 0; i < 4; i++) + if (buffer[0] & (8 >> i)) { + for (j = 0; j < 64; j++) + mpeg2dec->new_quantizer_matrix[i][mpeg2_scan_norm[j]] = + (buffer[j] << (i+5)) | (buffer[j+1] >> (3-i)); + mpeg2dec->copy_matrix |= 1 << i; + buffer += 64; + } + + return 0; +} + +int mpeg2_header_extension (mpeg2dec_t * mpeg2dec) +{ + static int (* parser[]) (mpeg2dec_t *) = { + 0, sequence_ext, sequence_display_ext, quant_matrix_ext, + copyright_ext, 0, 0, picture_display_ext, picture_coding_ext + }; + int ext, ext_bit; + + ext = mpeg2dec->chunk_start[0] >> 4; + ext_bit = 1 << ext; + + if (!(mpeg2dec->ext_state & ext_bit)) + return 0; /* ignore illegal extensions */ + mpeg2dec->ext_state &= ~ext_bit; + return parser[ext] (mpeg2dec); +} + +int mpeg2_header_user_data (mpeg2dec_t * mpeg2dec) +{ + mpeg2dec->user_data_len += mpeg2dec->chunk_ptr - 1 - mpeg2dec->chunk_start; + mpeg2dec->chunk_start = mpeg2dec->chunk_ptr - 1; + + return 0; +} + +static void prescale (mpeg2dec_t * mpeg2dec, int index) +{ + static int non_linear_scale [] = { + 0, 1, 2, 3, 4, 5, 6, 7, + 8, 10, 12, 14, 16, 18, 20, 22, + 24, 28, 32, 36, 40, 44, 48, 52, + 56, 64, 72, 80, 88, 96, 104, 112 + }; + int i, j, k; + mpeg2_decoder_t * decoder = &(mpeg2dec->decoder); + + if (mpeg2dec->scaled[index] != mpeg2dec->q_scale_type) { + mpeg2dec->scaled[index] = mpeg2dec->q_scale_type; + for (i = 0; i < 32; i++) { + k = mpeg2dec->q_scale_type ? non_linear_scale[i] : (i << 1); + for (j = 0; j < 64; j++) + decoder->quantizer_prescale[index][i][j] = + k * mpeg2dec->quantizer_matrix[index][j]; + } + } +} + +mpeg2_state_t mpeg2_header_slice_start (mpeg2dec_t * mpeg2dec) +{ + mpeg2_decoder_t * decoder = &(mpeg2dec->decoder); + + mpeg2dec->info.user_data = NULL; mpeg2dec->info.user_data_len = 0; + mpeg2dec->state = ((mpeg2dec->picture->nb_fields > 1 || + mpeg2dec->state == STATE_PICTURE_2ND) ? + STATE_SLICE : STATE_SLICE_1ST); + + if (mpeg2dec->decoder.coding_type != D_TYPE) { + prescale (mpeg2dec, 0); + if (decoder->chroma_quantizer[0] == decoder->quantizer_prescale[2]) + prescale (mpeg2dec, 2); + if (mpeg2dec->decoder.coding_type != I_TYPE) { + prescale (mpeg2dec, 1); + if (decoder->chroma_quantizer[1] == decoder->quantizer_prescale[3]) + prescale (mpeg2dec, 3); + } + } + + if (!(mpeg2dec->nb_decode_slices)) + mpeg2dec->picture->flags |= PIC_FLAG_SKIP; + else if (mpeg2dec->convert_start) { + mpeg2dec->convert_start (decoder->convert_id, mpeg2dec->fbuf[0], + mpeg2dec->picture, mpeg2dec->info.gop); + + if (mpeg2dec->decoder.coding_type == B_TYPE) + mpeg2_init_fbuf (&(mpeg2dec->decoder), mpeg2dec->yuv_buf[2], + mpeg2dec->yuv_buf[mpeg2dec->yuv_index ^ 1], + mpeg2dec->yuv_buf[mpeg2dec->yuv_index]); + else { + mpeg2_init_fbuf (&(mpeg2dec->decoder), + mpeg2dec->yuv_buf[mpeg2dec->yuv_index ^ 1], + mpeg2dec->yuv_buf[mpeg2dec->yuv_index], + mpeg2dec->yuv_buf[mpeg2dec->yuv_index]); + if (mpeg2dec->state == STATE_SLICE) + mpeg2dec->yuv_index ^= 1; + } + } else { + int b_type; + + b_type = (mpeg2dec->decoder.coding_type == B_TYPE); + mpeg2_init_fbuf (&(mpeg2dec->decoder), mpeg2dec->fbuf[0]->buf, + mpeg2dec->fbuf[b_type + 1]->buf, + mpeg2dec->fbuf[b_type]->buf); + } + mpeg2dec->action = NULL; + return STATE_INTERNAL_NORETURN; +} + +static mpeg2_state_t seek_sequence (mpeg2dec_t * mpeg2dec) +{ + mpeg2_reset_info (&(mpeg2dec->info)); + mpeg2dec->info.sequence = NULL; + mpeg2dec->info.gop = NULL; + mpeg2_header_state_init (mpeg2dec); + mpeg2dec->action = mpeg2_seek_header; + return mpeg2_seek_header (mpeg2dec); +} + +mpeg2_state_t mpeg2_header_end (mpeg2dec_t * mpeg2dec) +{ + mpeg2_picture_t * picture; + int b_type; + + b_type = (mpeg2dec->decoder.coding_type == B_TYPE); + picture = mpeg2dec->pictures; + if ((mpeg2dec->picture >= picture + 2) ^ b_type) + picture = mpeg2dec->pictures + 2; + + mpeg2_reset_info (&(mpeg2dec->info)); + if (!(mpeg2dec->sequence.flags & SEQ_FLAG_LOW_DELAY)) { + mpeg2dec->info.display_picture = picture; + if (picture->nb_fields == 1) + mpeg2dec->info.display_picture_2nd = picture + 1; + mpeg2dec->info.display_fbuf = mpeg2dec->fbuf[b_type]; + if (!mpeg2dec->convert) + mpeg2dec->info.discard_fbuf = mpeg2dec->fbuf[b_type + 1]; + } else if (!mpeg2dec->convert) + mpeg2dec->info.discard_fbuf = mpeg2dec->fbuf[b_type]; + mpeg2dec->action = seek_sequence; + return STATE_END; +} diff --git a/src/video_dec/libmpeg2new/libmpeg2/idct.c b/src/video_dec/libmpeg2new/libmpeg2/idct.c new file mode 100644 index 000000000..8b982bb33 --- /dev/null +++ b/src/video_dec/libmpeg2new/libmpeg2/idct.c @@ -0,0 +1,287 @@ +/* + * idct.c + * Copyright (C) 2000-2003 Michel Lespinasse <walken@zoy.org> + * Copyright (C) 1999-2000 Aaron Holtzman <aholtzma@ess.engr.uvic.ca> + * + * This file is part of mpeg2dec, a free MPEG-2 video stream decoder. + * See http://libmpeg2.sourceforge.net/ for updates. + * + * mpeg2dec is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * mpeg2dec is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +#include "config.h" + +#include <stdlib.h> +#include <inttypes.h> + +#include "../include/mpeg2.h" +#include "../include/attributes.h" +#include "mpeg2_internal.h" + +#define W1 2841 /* 2048 * sqrt (2) * cos (1 * pi / 16) */ +#define W2 2676 /* 2048 * sqrt (2) * cos (2 * pi / 16) */ +#define W3 2408 /* 2048 * sqrt (2) * cos (3 * pi / 16) */ +#define W5 1609 /* 2048 * sqrt (2) * cos (5 * pi / 16) */ +#define W6 1108 /* 2048 * sqrt (2) * cos (6 * pi / 16) */ +#define W7 565 /* 2048 * sqrt (2) * cos (7 * pi / 16) */ + +/* idct main entry point */ +void (* mpeg2_idct_copy) (int16_t * block, uint8_t * dest, int stride); +void (* mpeg2_idct_add) (int last, int16_t * block, + uint8_t * dest, int stride); + +/* + * In legal streams, the IDCT output should be between -384 and +384. + * In corrupted streams, it is possible to force the IDCT output to go + * to +-3826 - this is the worst case for a column IDCT where the + * column inputs are 16-bit values. + */ +uint8_t mpeg2_clip[3840 * 2 + 256]; +#define CLIP(i) ((mpeg2_clip + 3840)[i]) + +#if 0 +#define BUTTERFLY(t0,t1,W0,W1,d0,d1) \ +do { \ + t0 = W0 * d0 + W1 * d1; \ + t1 = W0 * d1 - W1 * d0; \ +} while (0) +#else +#define BUTTERFLY(t0,t1,W0,W1,d0,d1) \ +do { \ + int tmp = W0 * (d0 + d1); \ + t0 = tmp + (W1 - W0) * d1; \ + t1 = tmp - (W1 + W0) * d0; \ +} while (0) +#endif + +static void inline idct_row (int16_t * const block) +{ + int d0, d1, d2, d3; + int a0, a1, a2, a3, b0, b1, b2, b3; + int t0, t1, t2, t3; + + /* shortcut */ + if (likely (!(block[1] | ((int32_t *)block)[1] | ((int32_t *)block)[2] | + ((int32_t *)block)[3]))) { + uint32_t tmp = (uint16_t) (block[0] >> 1); + tmp |= tmp << 16; + ((int32_t *)block)[0] = tmp; + ((int32_t *)block)[1] = tmp; + ((int32_t *)block)[2] = tmp; + ((int32_t *)block)[3] = tmp; + return; + } + + d0 = (block[0] << 11) + 2048; + d1 = block[1]; + d2 = block[2] << 11; + d3 = block[3]; + t0 = d0 + d2; + t1 = d0 - d2; + BUTTERFLY (t2, t3, W6, W2, d3, d1); + a0 = t0 + t2; + a1 = t1 + t3; + a2 = t1 - t3; + a3 = t0 - t2; + + d0 = block[4]; + d1 = block[5]; + d2 = block[6]; + d3 = block[7]; + BUTTERFLY (t0, t1, W7, W1, d3, d0); + BUTTERFLY (t2, t3, W3, W5, d1, d2); + b0 = t0 + t2; + b3 = t1 + t3; + t0 -= t2; + t1 -= t3; + b1 = ((t0 + t1) >> 8) * 181; + b2 = ((t0 - t1) >> 8) * 181; + + block[0] = (a0 + b0) >> 12; + block[1] = (a1 + b1) >> 12; + block[2] = (a2 + b2) >> 12; + block[3] = (a3 + b3) >> 12; + block[4] = (a3 - b3) >> 12; + block[5] = (a2 - b2) >> 12; + block[6] = (a1 - b1) >> 12; + block[7] = (a0 - b0) >> 12; +} + +static void inline idct_col (int16_t * const block) +{ + int d0, d1, d2, d3; + int a0, a1, a2, a3, b0, b1, b2, b3; + int t0, t1, t2, t3; + + d0 = (block[8*0] << 11) + 65536; + d1 = block[8*1]; + d2 = block[8*2] << 11; + d3 = block[8*3]; + t0 = d0 + d2; + t1 = d0 - d2; + BUTTERFLY (t2, t3, W6, W2, d3, d1); + a0 = t0 + t2; + a1 = t1 + t3; + a2 = t1 - t3; + a3 = t0 - t2; + + d0 = block[8*4]; + d1 = block[8*5]; + d2 = block[8*6]; + d3 = block[8*7]; + BUTTERFLY (t0, t1, W7, W1, d3, d0); + BUTTERFLY (t2, t3, W3, W5, d1, d2); + b0 = t0 + t2; + b3 = t1 + t3; + t0 -= t2; + t1 -= t3; + b1 = ((t0 + t1) >> 8) * 181; + b2 = ((t0 - t1) >> 8) * 181; + + block[8*0] = (a0 + b0) >> 17; + block[8*1] = (a1 + b1) >> 17; + block[8*2] = (a2 + b2) >> 17; + block[8*3] = (a3 + b3) >> 17; + block[8*4] = (a3 - b3) >> 17; + block[8*5] = (a2 - b2) >> 17; + block[8*6] = (a1 - b1) >> 17; + block[8*7] = (a0 - b0) >> 17; +} + +static void mpeg2_idct_copy_c (int16_t * block, uint8_t * dest, + const int stride) +{ + int i; + + for (i = 0; i < 8; i++) + idct_row (block + 8 * i); + for (i = 0; i < 8; i++) + idct_col (block + i); + do { + dest[0] = CLIP (block[0]); + dest[1] = CLIP (block[1]); + dest[2] = CLIP (block[2]); + dest[3] = CLIP (block[3]); + dest[4] = CLIP (block[4]); + dest[5] = CLIP (block[5]); + dest[6] = CLIP (block[6]); + dest[7] = CLIP (block[7]); + + ((int32_t *)block)[0] = 0; ((int32_t *)block)[1] = 0; + ((int32_t *)block)[2] = 0; ((int32_t *)block)[3] = 0; + + dest += stride; + block += 8; + } while (--i); +} + +static void mpeg2_idct_add_c (const int last, int16_t * block, + uint8_t * dest, const int stride) +{ + int i; + + if (last != 129 || (block[0] & (7 << 4)) == (4 << 4)) { + for (i = 0; i < 8; i++) + idct_row (block + 8 * i); + for (i = 0; i < 8; i++) + idct_col (block + i); + do { + dest[0] = CLIP (block[0] + dest[0]); + dest[1] = CLIP (block[1] + dest[1]); + dest[2] = CLIP (block[2] + dest[2]); + dest[3] = CLIP (block[3] + dest[3]); + dest[4] = CLIP (block[4] + dest[4]); + dest[5] = CLIP (block[5] + dest[5]); + dest[6] = CLIP (block[6] + dest[6]); + dest[7] = CLIP (block[7] + dest[7]); + + ((int32_t *)block)[0] = 0; ((int32_t *)block)[1] = 0; + ((int32_t *)block)[2] = 0; ((int32_t *)block)[3] = 0; + + dest += stride; + block += 8; + } while (--i); + } else { + int DC; + + DC = (block[0] + 64) >> 7; + block[0] = block[63] = 0; + i = 8; + do { + dest[0] = CLIP (DC + dest[0]); + dest[1] = CLIP (DC + dest[1]); + dest[2] = CLIP (DC + dest[2]); + dest[3] = CLIP (DC + dest[3]); + dest[4] = CLIP (DC + dest[4]); + dest[5] = CLIP (DC + dest[5]); + dest[6] = CLIP (DC + dest[6]); + dest[7] = CLIP (DC + dest[7]); + dest += stride; + } while (--i); + } +} + +void mpeg2_idct_init (uint32_t accel) +{ +#ifdef ARCH_X86 + if (accel & MPEG2_ACCEL_X86_MMXEXT) { + mpeg2_idct_copy = mpeg2_idct_copy_mmxext; + mpeg2_idct_add = mpeg2_idct_add_mmxext; + mpeg2_idct_mmx_init (); + } else if (accel & MPEG2_ACCEL_X86_MMX) { + mpeg2_idct_copy = mpeg2_idct_copy_mmx; + mpeg2_idct_add = mpeg2_idct_add_mmx; + mpeg2_idct_mmx_init (); + } else +#endif +#ifdef ARCH_PPC + if (accel & MPEG2_ACCEL_PPC_ALTIVEC) { + mpeg2_idct_copy = mpeg2_idct_copy_altivec; + mpeg2_idct_add = mpeg2_idct_add_altivec; + mpeg2_idct_altivec_init (); + } else +#endif +#ifdef ARCH_ALPHA + if (accel & MPEG2_ACCEL_ALPHA_MVI) { + mpeg2_idct_copy = mpeg2_idct_copy_mvi; + mpeg2_idct_add = mpeg2_idct_add_mvi; + mpeg2_idct_alpha_init (); + } else if (accel & MPEG2_ACCEL_ALPHA) { + int i; + + mpeg2_idct_copy = mpeg2_idct_copy_alpha; + mpeg2_idct_add = mpeg2_idct_add_alpha; + mpeg2_idct_alpha_init (); + for (i = -3840; i < 3840 + 256; i++) + CLIP(i) = (i < 0) ? 0 : ((i > 255) ? 255 : i); + } else +#endif + { + extern uint8_t mpeg2_scan_norm[64]; + extern uint8_t mpeg2_scan_alt[64]; + int i, j; + + mpeg2_idct_copy = mpeg2_idct_copy_c; + mpeg2_idct_add = mpeg2_idct_add_c; + for (i = -3840; i < 3840 + 256; i++) + CLIP(i) = (i < 0) ? 0 : ((i > 255) ? 255 : i); + for (i = 0; i < 64; i++) { + j = mpeg2_scan_norm[i]; + mpeg2_scan_norm[i] = ((j & 0x36) >> 1) | ((j & 0x09) << 2); + j = mpeg2_scan_alt[i]; + mpeg2_scan_alt[i] = ((j & 0x36) >> 1) | ((j & 0x09) << 2); + } + } +} diff --git a/src/video_dec/libmpeg2new/libmpeg2/idct_alpha.c b/src/video_dec/libmpeg2new/libmpeg2/idct_alpha.c new file mode 100644 index 000000000..1d8fd08ee --- /dev/null +++ b/src/video_dec/libmpeg2new/libmpeg2/idct_alpha.c @@ -0,0 +1,379 @@ +/* + * idct_alpha.c + * Copyright (C) 2002-2003 Falk Hueffner <falk@debian.org> + * Copyright (C) 2000-2003 Michel Lespinasse <walken@zoy.org> + * Copyright (C) 1999-2000 Aaron Holtzman <aholtzma@ess.engr.uvic.ca> + * + * This file is part of mpeg2dec, a free MPEG-2 video stream decoder. + * See http://libmpeg2.sourceforge.net/ for updates. + * + * mpeg2dec is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * mpeg2dec is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +#include "config.h" + +#ifdef ARCH_ALPHA + +#include <stdlib.h> +#include <inttypes.h> + +#include "mpeg2.h" +#include <xine/attributes.h> +#include "mpeg2_internal.h" +#include "alpha_asm.h" + +#define W1 2841 /* 2048 * sqrt (2) * cos (1 * pi / 16) */ +#define W2 2676 /* 2048 * sqrt (2) * cos (2 * pi / 16) */ +#define W3 2408 /* 2048 * sqrt (2) * cos (3 * pi / 16) */ +#define W5 1609 /* 2048 * sqrt (2) * cos (5 * pi / 16) */ +#define W6 1108 /* 2048 * sqrt (2) * cos (6 * pi / 16) */ +#define W7 565 /* 2048 * sqrt (2) * cos (7 * pi / 16) */ + +extern uint8_t mpeg2_clip[3840 * 2 + 256]; +#define CLIP(i) ((mpeg2_clip + 3840)[i]) + +#if 0 +#define BUTTERFLY(t0,t1,W0,W1,d0,d1) \ +do { \ + t0 = W0 * d0 + W1 * d1; \ + t1 = W0 * d1 - W1 * d0; \ +} while (0) +#else +#define BUTTERFLY(t0,t1,W0,W1,d0,d1) \ +do { \ + int_fast32_t tmp = W0 * (d0 + d1); \ + t0 = tmp + (W1 - W0) * d1; \ + t1 = tmp - (W1 + W0) * d0; \ +} while (0) +#endif + +static void inline idct_row (int16_t * const block) +{ + uint64_t l, r; + int_fast32_t d0, d1, d2, d3; + int_fast32_t a0, a1, a2, a3, b0, b1, b2, b3; + int_fast32_t t0, t1, t2, t3; + + l = ldq (block); + r = ldq (block + 4); + + /* shortcut */ + if (likely (!((l & ~0xffffUL) | r))) { + uint64_t tmp = (uint16_t) (l >> 1); + tmp |= tmp << 16; + tmp |= tmp << 32; + ((int32_t *)block)[0] = tmp; + ((int32_t *)block)[1] = tmp; + ((int32_t *)block)[2] = tmp; + ((int32_t *)block)[3] = tmp; + return; + } + + d0 = (sextw (l) << 11) + 2048; + d1 = sextw (extwl (l, 2)); + d2 = sextw (extwl (l, 4)) << 11; + d3 = sextw (extwl (l, 6)); + t0 = d0 + d2; + t1 = d0 - d2; + BUTTERFLY (t2, t3, W6, W2, d3, d1); + a0 = t0 + t2; + a1 = t1 + t3; + a2 = t1 - t3; + a3 = t0 - t2; + + d0 = sextw (r); + d1 = sextw (extwl (r, 2)); + d2 = sextw (extwl (r, 4)); + d3 = sextw (extwl (r, 6)); + BUTTERFLY (t0, t1, W7, W1, d3, d0); + BUTTERFLY (t2, t3, W3, W5, d1, d2); + b0 = t0 + t2; + b3 = t1 + t3; + t0 -= t2; + t1 -= t3; + b1 = ((t0 + t1) >> 8) * 181; + b2 = ((t0 - t1) >> 8) * 181; + + block[0] = (a0 + b0) >> 12; + block[1] = (a1 + b1) >> 12; + block[2] = (a2 + b2) >> 12; + block[3] = (a3 + b3) >> 12; + block[4] = (a3 - b3) >> 12; + block[5] = (a2 - b2) >> 12; + block[6] = (a1 - b1) >> 12; + block[7] = (a0 - b0) >> 12; +} + +static void inline idct_col (int16_t * const block) +{ + int_fast32_t d0, d1, d2, d3; + int_fast32_t a0, a1, a2, a3, b0, b1, b2, b3; + int_fast32_t t0, t1, t2, t3; + + d0 = (block[8*0] << 11) + 65536; + d1 = block[8*1]; + d2 = block[8*2] << 11; + d3 = block[8*3]; + t0 = d0 + d2; + t1 = d0 - d2; + BUTTERFLY (t2, t3, W6, W2, d3, d1); + a0 = t0 + t2; + a1 = t1 + t3; + a2 = t1 - t3; + a3 = t0 - t2; + + d0 = block[8*4]; + d1 = block[8*5]; + d2 = block[8*6]; + d3 = block[8*7]; + BUTTERFLY (t0, t1, W7, W1, d3, d0); + BUTTERFLY (t2, t3, W3, W5, d1, d2); + b0 = t0 + t2; + b3 = t1 + t3; + t0 -= t2; + t1 -= t3; + b1 = ((t0 + t1) >> 8) * 181; + b2 = ((t0 - t1) >> 8) * 181; + + block[8*0] = (a0 + b0) >> 17; + block[8*1] = (a1 + b1) >> 17; + block[8*2] = (a2 + b2) >> 17; + block[8*3] = (a3 + b3) >> 17; + block[8*4] = (a3 - b3) >> 17; + block[8*5] = (a2 - b2) >> 17; + block[8*6] = (a1 - b1) >> 17; + block[8*7] = (a0 - b0) >> 17; +} + +void mpeg2_idct_copy_mvi (int16_t * block, uint8_t * dest, const int stride) +{ + uint64_t clampmask; + int i; + + for (i = 0; i < 8; i++) + idct_row (block + 8 * i); + + for (i = 0; i < 8; i++) + idct_col (block + i); + + clampmask = zap (-1, 0xaa); /* 0x00ff00ff00ff00ff */ + do { + uint64_t shorts0, shorts1; + + shorts0 = ldq (block); + shorts0 = maxsw4 (shorts0, 0); + shorts0 = minsw4 (shorts0, clampmask); + stl (pkwb (shorts0), dest); + + shorts1 = ldq (block + 4); + shorts1 = maxsw4 (shorts1, 0); + shorts1 = minsw4 (shorts1, clampmask); + stl (pkwb (shorts1), dest + 4); + + stq (0, block); + stq (0, block + 4); + + dest += stride; + block += 8; + } while (--i); +} + +void mpeg2_idct_add_mvi (const int last, int16_t * block, + uint8_t * dest, const int stride) +{ + uint64_t clampmask; + uint64_t signmask; + int i; + + if (last != 129 || (block[0] & (7 << 4)) == (4 << 4)) { + for (i = 0; i < 8; i++) + idct_row (block + 8 * i); + for (i = 0; i < 8; i++) + idct_col (block + i); + clampmask = zap (-1, 0xaa); /* 0x00ff00ff00ff00ff */ + signmask = zap (-1, 0x33); + signmask ^= signmask >> 1; /* 0x8000800080008000 */ + + do { + uint64_t shorts0, pix0, signs0; + uint64_t shorts1, pix1, signs1; + + shorts0 = ldq (block); + shorts1 = ldq (block + 4); + + pix0 = unpkbw (ldl (dest)); + /* signed subword add (MMX paddw). */ + signs0 = shorts0 & signmask; + shorts0 &= ~signmask; + shorts0 += pix0; + shorts0 ^= signs0; + /* clamp. */ + shorts0 = maxsw4 (shorts0, 0); + shorts0 = minsw4 (shorts0, clampmask); + + /* next 4. */ + pix1 = unpkbw (ldl (dest + 4)); + signs1 = shorts1 & signmask; + shorts1 &= ~signmask; + shorts1 += pix1; + shorts1 ^= signs1; + shorts1 = maxsw4 (shorts1, 0); + shorts1 = minsw4 (shorts1, clampmask); + + stl (pkwb (shorts0), dest); + stl (pkwb (shorts1), dest + 4); + stq (0, block); + stq (0, block + 4); + + dest += stride; + block += 8; + } while (--i); + } else { + int DC; + uint64_t p0, p1, p2, p3, p4, p5, p6, p7; + uint64_t DCs; + + DC = (block[0] + 64) >> 7; + block[0] = block[63] = 0; + + p0 = ldq (dest + 0 * stride); + p1 = ldq (dest + 1 * stride); + p2 = ldq (dest + 2 * stride); + p3 = ldq (dest + 3 * stride); + p4 = ldq (dest + 4 * stride); + p5 = ldq (dest + 5 * stride); + p6 = ldq (dest + 6 * stride); + p7 = ldq (dest + 7 * stride); + + if (DC > 0) { + DCs = BYTE_VEC (likely (DC <= 255) ? DC : 255); + p0 += minub8 (DCs, ~p0); + p1 += minub8 (DCs, ~p1); + p2 += minub8 (DCs, ~p2); + p3 += minub8 (DCs, ~p3); + p4 += minub8 (DCs, ~p4); + p5 += minub8 (DCs, ~p5); + p6 += minub8 (DCs, ~p6); + p7 += minub8 (DCs, ~p7); + } else { + DCs = BYTE_VEC (likely (-DC <= 255) ? -DC : 255); + p0 -= minub8 (DCs, p0); + p1 -= minub8 (DCs, p1); + p2 -= minub8 (DCs, p2); + p3 -= minub8 (DCs, p3); + p4 -= minub8 (DCs, p4); + p5 -= minub8 (DCs, p5); + p6 -= minub8 (DCs, p6); + p7 -= minub8 (DCs, p7); + } + + stq (p0, dest + 0 * stride); + stq (p1, dest + 1 * stride); + stq (p2, dest + 2 * stride); + stq (p3, dest + 3 * stride); + stq (p4, dest + 4 * stride); + stq (p5, dest + 5 * stride); + stq (p6, dest + 6 * stride); + stq (p7, dest + 7 * stride); + } +} + +void mpeg2_idct_copy_alpha (int16_t * block, uint8_t * dest, const int stride) +{ + int i; + + for (i = 0; i < 8; i++) + idct_row (block + 8 * i); + for (i = 0; i < 8; i++) + idct_col (block + i); + do { + dest[0] = CLIP (block[0]); + dest[1] = CLIP (block[1]); + dest[2] = CLIP (block[2]); + dest[3] = CLIP (block[3]); + dest[4] = CLIP (block[4]); + dest[5] = CLIP (block[5]); + dest[6] = CLIP (block[6]); + dest[7] = CLIP (block[7]); + + stq(0, block); + stq(0, block + 4); + + dest += stride; + block += 8; + } while (--i); +} + +void mpeg2_idct_add_alpha (const int last, int16_t * block, + uint8_t * dest, const int stride) +{ + int i; + + if (last != 129 || (block[0] & (7 << 4)) == (4 << 4)) { + for (i = 0; i < 8; i++) + idct_row (block + 8 * i); + for (i = 0; i < 8; i++) + idct_col (block + i); + do { + dest[0] = CLIP (block[0] + dest[0]); + dest[1] = CLIP (block[1] + dest[1]); + dest[2] = CLIP (block[2] + dest[2]); + dest[3] = CLIP (block[3] + dest[3]); + dest[4] = CLIP (block[4] + dest[4]); + dest[5] = CLIP (block[5] + dest[5]); + dest[6] = CLIP (block[6] + dest[6]); + dest[7] = CLIP (block[7] + dest[7]); + + stq(0, block); + stq(0, block + 4); + + dest += stride; + block += 8; + } while (--i); + } else { + int DC; + + DC = (block[0] + 64) >> 7; + block[0] = block[63] = 0; + i = 8; + do { + dest[0] = CLIP (DC + dest[0]); + dest[1] = CLIP (DC + dest[1]); + dest[2] = CLIP (DC + dest[2]); + dest[3] = CLIP (DC + dest[3]); + dest[4] = CLIP (DC + dest[4]); + dest[5] = CLIP (DC + dest[5]); + dest[6] = CLIP (DC + dest[6]); + dest[7] = CLIP (DC + dest[7]); + dest += stride; + } while (--i); + } +} + +void mpeg2_idct_alpha_init (void) +{ + extern uint8_t mpeg2_scan_norm[64]; + extern uint8_t mpeg2_scan_alt[64]; + int i, j; + + for (i = 0; i < 64; i++) { + j = mpeg2_scan_norm[i]; + mpeg2_scan_norm[i] = ((j & 0x36) >> 1) | ((j & 0x09) << 2); + j = mpeg2_scan_alt[i]; + mpeg2_scan_alt[i] = ((j & 0x36) >> 1) | ((j & 0x09) << 2); + } +} + +#endif /* ARCH_ALPHA */ diff --git a/src/video_dec/libmpeg2new/libmpeg2/idct_altivec.c b/src/video_dec/libmpeg2new/libmpeg2/idct_altivec.c new file mode 100644 index 000000000..f15bca165 --- /dev/null +++ b/src/video_dec/libmpeg2new/libmpeg2/idct_altivec.c @@ -0,0 +1,288 @@ +/* + * idct_altivec.c + * Copyright (C) 2000-2003 Michel Lespinasse <walken@zoy.org> + * Copyright (C) 1999-2000 Aaron Holtzman <aholtzma@ess.engr.uvic.ca> + * + * This file is part of mpeg2dec, a free MPEG-2 video stream decoder. + * See http://libmpeg2.sourceforge.net/ for updates. + * + * mpeg2dec is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * mpeg2dec is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +#include "config.h" + +#ifdef ARCH_PPC + +#ifdef HAVE_ALTIVEC_H +#include <altivec.h> +#endif +#include <inttypes.h> + +#include "mpeg2.h" +#include <xine/attributes.h> +#include "mpeg2_internal.h" + +typedef vector signed char vector_s8_t; +typedef vector unsigned char vector_u8_t; +typedef vector signed short vector_s16_t; +typedef vector unsigned short vector_u16_t; +typedef vector signed int vector_s32_t; +typedef vector unsigned int vector_u32_t; + +#if defined(HAVE_ALTIVEC_H) && (__GNUC__ * 100 + __GNUC_MINOR__ < 303) +/* work around gcc <3.3 vec_mergel bug */ +static inline vector_s16_t my_vec_mergel (vector_s16_t const A, + vector_s16_t const B) +{ + static const vector_u8_t mergel = { + 0x08, 0x09, 0x18, 0x19, 0x0a, 0x0b, 0x1a, 0x1b, + 0x0c, 0x0d, 0x1c, 0x1d, 0x0e, 0x0f, 0x1e, 0x1f + }; + return vec_perm (A, B, mergel); +} +#undef vec_mergel +#define vec_mergel my_vec_mergel +#endif + +#ifdef HAVE_ALTIVEC_H /* gnu */ +#define VEC_S16(a,b,c,d,e,f,g,h) {a, b, c, d, e, f, g, h} +#else /* apple */ +#define VEC_S16(a,b,c,d,e,f,g,h) (vector_s16_t) (a, b, c, d, e, f, g, h) +#endif + +static const vector_s16_t constants ATTR_ALIGN(16) = + VEC_S16 (23170, 13573, 6518, 21895, -23170, -21895, 32, 31); +static const vector_s16_t constants_1 ATTR_ALIGN(16) = + VEC_S16 (16384, 22725, 21407, 19266, 16384, 19266, 21407, 22725); +static const vector_s16_t constants_2 ATTR_ALIGN(16) = + VEC_S16 (16069, 22289, 20995, 18895, 16069, 18895, 20995, 22289); +static const vector_s16_t constants_3 ATTR_ALIGN(16) = + VEC_S16 (21407, 29692, 27969, 25172, 21407, 25172, 27969, 29692); +static const vector_s16_t constants_4 ATTR_ALIGN(16) = + VEC_S16 (13623, 18895, 17799, 16019, 13623, 16019, 17799, 18895); + +#define IDCT \ + vector_s16_t vx0, vx1, vx2, vx3, vx4, vx5, vx6, vx7; \ + vector_s16_t vy0, vy1, vy2, vy3, vy4, vy5, vy6, vy7; \ + vector_s16_t a0, a1, a2, ma2, c4, mc4, zero, bias; \ + vector_s16_t t0, t1, t2, t3, t4, t5, t6, t7, t8; \ + vector_u16_t shift; \ + \ + c4 = vec_splat (constants, 0); \ + a0 = vec_splat (constants, 1); \ + a1 = vec_splat (constants, 2); \ + a2 = vec_splat (constants, 3); \ + mc4 = vec_splat (constants, 4); \ + ma2 = vec_splat (constants, 5); \ + bias = (vector_s16_t)vec_splat ((vector_s32_t)constants, 3); \ + \ + zero = vec_splat_s16 (0); \ + \ + vx0 = vec_adds (block[0], block[4]); \ + vx4 = vec_subs (block[0], block[4]); \ + t5 = vec_mradds (vx0, constants_1, zero); \ + t0 = vec_mradds (vx4, constants_1, zero); \ + \ + vx1 = vec_mradds (a1, block[7], block[1]); \ + vx7 = vec_mradds (a1, block[1], vec_subs (zero, block[7])); \ + t1 = vec_mradds (vx1, constants_2, zero); \ + t8 = vec_mradds (vx7, constants_2, zero); \ + \ + vx2 = vec_mradds (a0, block[6], block[2]); \ + vx6 = vec_mradds (a0, block[2], vec_subs (zero, block[6])); \ + t2 = vec_mradds (vx2, constants_3, zero); \ + t4 = vec_mradds (vx6, constants_3, zero); \ + \ + vx3 = vec_mradds (block[3], constants_4, zero); \ + vx5 = vec_mradds (block[5], constants_4, zero); \ + t7 = vec_mradds (a2, vx5, vx3); \ + t3 = vec_mradds (ma2, vx3, vx5); \ + \ + t6 = vec_adds (t8, t3); \ + t3 = vec_subs (t8, t3); \ + t8 = vec_subs (t1, t7); \ + t1 = vec_adds (t1, t7); \ + t6 = vec_mradds (a0, t6, t6); /* a0+1 == 2*c4 */ \ + t1 = vec_mradds (a0, t1, t1); /* a0+1 == 2*c4 */ \ + \ + t7 = vec_adds (t5, t2); \ + t2 = vec_subs (t5, t2); \ + t5 = vec_adds (t0, t4); \ + t0 = vec_subs (t0, t4); \ + t4 = vec_subs (t8, t3); \ + t3 = vec_adds (t8, t3); \ + \ + vy0 = vec_adds (t7, t1); \ + vy7 = vec_subs (t7, t1); \ + vy1 = vec_adds (t5, t3); \ + vy6 = vec_subs (t5, t3); \ + vy2 = vec_adds (t0, t4); \ + vy5 = vec_subs (t0, t4); \ + vy3 = vec_adds (t2, t6); \ + vy4 = vec_subs (t2, t6); \ + \ + vx0 = vec_mergeh (vy0, vy4); \ + vx1 = vec_mergel (vy0, vy4); \ + vx2 = vec_mergeh (vy1, vy5); \ + vx3 = vec_mergel (vy1, vy5); \ + vx4 = vec_mergeh (vy2, vy6); \ + vx5 = vec_mergel (vy2, vy6); \ + vx6 = vec_mergeh (vy3, vy7); \ + vx7 = vec_mergel (vy3, vy7); \ + \ + vy0 = vec_mergeh (vx0, vx4); \ + vy1 = vec_mergel (vx0, vx4); \ + vy2 = vec_mergeh (vx1, vx5); \ + vy3 = vec_mergel (vx1, vx5); \ + vy4 = vec_mergeh (vx2, vx6); \ + vy5 = vec_mergel (vx2, vx6); \ + vy6 = vec_mergeh (vx3, vx7); \ + vy7 = vec_mergel (vx3, vx7); \ + \ + vx0 = vec_mergeh (vy0, vy4); \ + vx1 = vec_mergel (vy0, vy4); \ + vx2 = vec_mergeh (vy1, vy5); \ + vx3 = vec_mergel (vy1, vy5); \ + vx4 = vec_mergeh (vy2, vy6); \ + vx5 = vec_mergel (vy2, vy6); \ + vx6 = vec_mergeh (vy3, vy7); \ + vx7 = vec_mergel (vy3, vy7); \ + \ + vx0 = vec_adds (vx0, bias); \ + t5 = vec_adds (vx0, vx4); \ + t0 = vec_subs (vx0, vx4); \ + \ + t1 = vec_mradds (a1, vx7, vx1); \ + t8 = vec_mradds (a1, vx1, vec_subs (zero, vx7)); \ + \ + t2 = vec_mradds (a0, vx6, vx2); \ + t4 = vec_mradds (a0, vx2, vec_subs (zero, vx6)); \ + \ + t7 = vec_mradds (a2, vx5, vx3); \ + t3 = vec_mradds (ma2, vx3, vx5); \ + \ + t6 = vec_adds (t8, t3); \ + t3 = vec_subs (t8, t3); \ + t8 = vec_subs (t1, t7); \ + t1 = vec_adds (t1, t7); \ + \ + t7 = vec_adds (t5, t2); \ + t2 = vec_subs (t5, t2); \ + t5 = vec_adds (t0, t4); \ + t0 = vec_subs (t0, t4); \ + t4 = vec_subs (t8, t3); \ + t3 = vec_adds (t8, t3); \ + \ + vy0 = vec_adds (t7, t1); \ + vy7 = vec_subs (t7, t1); \ + vy1 = vec_mradds (c4, t3, t5); \ + vy6 = vec_mradds (mc4, t3, t5); \ + vy2 = vec_mradds (c4, t4, t0); \ + vy5 = vec_mradds (mc4, t4, t0); \ + vy3 = vec_adds (t2, t6); \ + vy4 = vec_subs (t2, t6); \ + \ + shift = vec_splat_u16 (6); \ + vx0 = vec_sra (vy0, shift); \ + vx1 = vec_sra (vy1, shift); \ + vx2 = vec_sra (vy2, shift); \ + vx3 = vec_sra (vy3, shift); \ + vx4 = vec_sra (vy4, shift); \ + vx5 = vec_sra (vy5, shift); \ + vx6 = vec_sra (vy6, shift); \ + vx7 = vec_sra (vy7, shift); + +void mpeg2_idct_copy_altivec (int16_t * const _block, uint8_t * dest, + const int stride) +{ + vector_s16_t * const block = (vector_s16_t *)_block; + vector_u8_t tmp; + + IDCT + +#define COPY(dest,src) \ + tmp = vec_packsu (src, src); \ + vec_ste ((vector_u32_t)tmp, 0, (unsigned int *)dest); \ + vec_ste ((vector_u32_t)tmp, 4, (unsigned int *)dest); + + COPY (dest, vx0) dest += stride; + COPY (dest, vx1) dest += stride; + COPY (dest, vx2) dest += stride; + COPY (dest, vx3) dest += stride; + COPY (dest, vx4) dest += stride; + COPY (dest, vx5) dest += stride; + COPY (dest, vx6) dest += stride; + COPY (dest, vx7) + + block[0] = block[1] = block[2] = block[3] = zero; + block[4] = block[5] = block[6] = block[7] = zero; +} + +void mpeg2_idct_add_altivec (const int last, int16_t * const _block, + uint8_t * dest, const int stride) +{ + vector_s16_t * const block = (vector_s16_t *)_block; + vector_u8_t tmp; + vector_s16_t tmp2, tmp3; + vector_u8_t perm0; + vector_u8_t perm1; + vector_u8_t p0, p1, p; + + IDCT + + p0 = vec_lvsl (0, dest); + p1 = vec_lvsl (stride, dest); + p = vec_splat_u8 (-1); + perm0 = vec_mergeh (p, p0); + perm1 = vec_mergeh (p, p1); + +#define ADD(dest,src,perm) \ + /* *(uint64_t *)&tmp = *(uint64_t *)dest; */ \ + tmp = vec_ld (0, dest); \ + tmp2 = (vector_s16_t)vec_perm (tmp, (vector_u8_t)zero, perm); \ + tmp3 = vec_adds (tmp2, src); \ + tmp = vec_packsu (tmp3, tmp3); \ + vec_ste ((vector_u32_t)tmp, 0, (unsigned int *)dest); \ + vec_ste ((vector_u32_t)tmp, 4, (unsigned int *)dest); + + ADD (dest, vx0, perm0) dest += stride; + ADD (dest, vx1, perm1) dest += stride; + ADD (dest, vx2, perm0) dest += stride; + ADD (dest, vx3, perm1) dest += stride; + ADD (dest, vx4, perm0) dest += stride; + ADD (dest, vx5, perm1) dest += stride; + ADD (dest, vx6, perm0) dest += stride; + ADD (dest, vx7, perm1) + + block[0] = block[1] = block[2] = block[3] = zero; + block[4] = block[5] = block[6] = block[7] = zero; +} + +void mpeg2_idct_altivec_init (void) +{ + extern uint8_t mpeg2_scan_norm[64]; + extern uint8_t mpeg2_scan_alt[64]; + int i, j; + + /* the altivec idct uses a transposed input, so we patch scan tables */ + for (i = 0; i < 64; i++) { + j = mpeg2_scan_norm[i]; + mpeg2_scan_norm[i] = (j >> 3) | ((j & 7) << 3); + j = mpeg2_scan_alt[i]; + mpeg2_scan_alt[i] = (j >> 3) | ((j & 7) << 3); + } +} + +#endif diff --git a/src/video_dec/libmpeg2new/libmpeg2/idct_mlib.c b/src/video_dec/libmpeg2new/libmpeg2/idct_mlib.c new file mode 100644 index 000000000..55a2e9b64 --- /dev/null +++ b/src/video_dec/libmpeg2new/libmpeg2/idct_mlib.c @@ -0,0 +1,60 @@ +/* + * idct_mlib.c + * Copyright (C) 1999-2003 HÃ¥kan Hjort <d95hjort@dtek.chalmers.se> + * + * This file is part of mpeg2dec, a free MPEG-2 video stream decoder. + * See http://libmpeg2.sourceforge.net/ for updates. + * + * mpeg2dec is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * mpeg2dec is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +#include "config.h" + +#ifdef LIBMPEG2_MLIB + +#include <mlib_types.h> +#include <mlib_status.h> +#include <mlib_sys.h> +#include <mlib_video.h> +#include <string.h> +#include <inttypes.h> + +#include "../include/mpeg2.h" +#include "mpeg2_internal.h" + +void mpeg2_idct_add_mlib (const int last, int16_t * const block, + uint8_t * const dest, const int stride) +{ + mlib_VideoIDCT_IEEE_S16_S16 (block, block); + mlib_VideoAddBlock_U8_S16 (dest, block, stride); + memset (block, 0, 64 * sizeof (uint16_t)); +} + +void mpeg2_idct_copy_mlib_non_ieee (int16_t * const block, + uint8_t * const dest, const int stride) +{ + mlib_VideoIDCT8x8_U8_S16 (dest, block, stride); + memset (block, 0, 64 * sizeof (uint16_t)); +} + +void mpeg2_idct_add_mlib_non_ieee (const int last, int16_t * const block, + uint8_t * const dest, const int stride) +{ + mlib_VideoIDCT8x8_S16_S16 (block, block); + mlib_VideoAddBlock_U8_S16 (dest, block, stride); + memset (block, 0, 64 * sizeof (uint16_t)); +} + +#endif diff --git a/src/video_dec/libmpeg2new/libmpeg2/idct_mmx.c b/src/video_dec/libmpeg2new/libmpeg2/idct_mmx.c new file mode 100644 index 000000000..d5a5c08a4 --- /dev/null +++ b/src/video_dec/libmpeg2new/libmpeg2/idct_mmx.c @@ -0,0 +1,814 @@ +/* + * idct_mmx.c + * Copyright (C) 2000-2003 Michel Lespinasse <walken@zoy.org> + * Copyright (C) 1999-2000 Aaron Holtzman <aholtzma@ess.engr.uvic.ca> + * + * This file is part of mpeg2dec, a free MPEG-2 video stream decoder. + * See http://libmpeg2.sourceforge.net/ for updates. + * + * mpeg2dec is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * mpeg2dec is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +#include "config.h" + +#ifdef ARCH_X86 + +#include <inttypes.h> + +#include "../include/mpeg2.h" +#include "../include/attributes.h" +#include "mpeg2_internal.h" +#include "../include/mmx.h" + +#define ROW_SHIFT 15 +#define COL_SHIFT 6 + +#define round(bias) ((int)(((bias)+0.5) * (1<<ROW_SHIFT))) +#define rounder(bias) {round (bias), round (bias)} + + +#if 0 +/* C row IDCT - its just here to document the MMXEXT and MMX versions */ +static inline void idct_row (int16_t * row, int offset, + int16_t * table, int32_t * rounder) +{ + int C1, C2, C3, C4, C5, C6, C7; + int a0, a1, a2, a3, b0, b1, b2, b3; + + row += offset; + + C1 = table[1]; + C2 = table[2]; + C3 = table[3]; + C4 = table[4]; + C5 = table[5]; + C6 = table[6]; + C7 = table[7]; + + a0 = C4*row[0] + C2*row[2] + C4*row[4] + C6*row[6] + *rounder; + a1 = C4*row[0] + C6*row[2] - C4*row[4] - C2*row[6] + *rounder; + a2 = C4*row[0] - C6*row[2] - C4*row[4] + C2*row[6] + *rounder; + a3 = C4*row[0] - C2*row[2] + C4*row[4] - C6*row[6] + *rounder; + + b0 = C1*row[1] + C3*row[3] + C5*row[5] + C7*row[7]; + b1 = C3*row[1] - C7*row[3] - C1*row[5] - C5*row[7]; + b2 = C5*row[1] - C1*row[3] + C7*row[5] + C3*row[7]; + b3 = C7*row[1] - C5*row[3] + C3*row[5] - C1*row[7]; + + row[0] = (a0 + b0) >> ROW_SHIFT; + row[1] = (a1 + b1) >> ROW_SHIFT; + row[2] = (a2 + b2) >> ROW_SHIFT; + row[3] = (a3 + b3) >> ROW_SHIFT; + row[4] = (a3 - b3) >> ROW_SHIFT; + row[5] = (a2 - b2) >> ROW_SHIFT; + row[6] = (a1 - b1) >> ROW_SHIFT; + row[7] = (a0 - b0) >> ROW_SHIFT; +} +#endif + + +/* MMXEXT row IDCT */ + +#define mmxext_table(c1,c2,c3,c4,c5,c6,c7) { c4, c2, -c4, -c2, \ + c4, c6, c4, c6, \ + c1, c3, -c1, -c5, \ + c5, c7, c3, -c7, \ + c4, -c6, c4, -c6, \ + -c4, c2, c4, -c2, \ + c5, -c1, c3, -c1, \ + c7, c3, c7, -c5 } + +static inline void mmxext_row_head (int16_t * const row, const int offset, + const int16_t * const table) +{ + movq_m2r (*(row+offset), mm2); /* mm2 = x6 x4 x2 x0 */ + + movq_m2r (*(row+offset+4), mm5); /* mm5 = x7 x5 x3 x1 */ + movq_r2r (mm2, mm0); /* mm0 = x6 x4 x2 x0 */ + + movq_m2r (*table, mm3); /* mm3 = -C2 -C4 C2 C4 */ + movq_r2r (mm5, mm6); /* mm6 = x7 x5 x3 x1 */ + + movq_m2r (*(table+4), mm4); /* mm4 = C6 C4 C6 C4 */ + pmaddwd_r2r (mm0, mm3); /* mm3 = -C4*x4-C2*x6 C4*x0+C2*x2 */ + + pshufw_r2r (mm2, mm2, 0x4e); /* mm2 = x2 x0 x6 x4 */ +} + +static inline void mmxext_row (const int16_t * const table, + const int32_t * const rounder) +{ + movq_m2r (*(table+8), mm1); /* mm1 = -C5 -C1 C3 C1 */ + pmaddwd_r2r (mm2, mm4); /* mm4 = C4*x0+C6*x2 C4*x4+C6*x6 */ + + pmaddwd_m2r (*(table+16), mm0); /* mm0 = C4*x4-C6*x6 C4*x0-C6*x2 */ + pshufw_r2r (mm6, mm6, 0x4e); /* mm6 = x3 x1 x7 x5 */ + + movq_m2r (*(table+12), mm7); /* mm7 = -C7 C3 C7 C5 */ + pmaddwd_r2r (mm5, mm1); /* mm1 = -C1*x5-C5*x7 C1*x1+C3*x3 */ + + paddd_m2r (*rounder, mm3); /* mm3 += rounder */ + pmaddwd_r2r (mm6, mm7); /* mm7 = C3*x1-C7*x3 C5*x5+C7*x7 */ + + pmaddwd_m2r (*(table+20), mm2); /* mm2 = C4*x0-C2*x2 -C4*x4+C2*x6 */ + paddd_r2r (mm4, mm3); /* mm3 = a1 a0 + rounder */ + + pmaddwd_m2r (*(table+24), mm5); /* mm5 = C3*x5-C1*x7 C5*x1-C1*x3 */ + movq_r2r (mm3, mm4); /* mm4 = a1 a0 + rounder */ + + pmaddwd_m2r (*(table+28), mm6); /* mm6 = C7*x1-C5*x3 C7*x5+C3*x7 */ + paddd_r2r (mm7, mm1); /* mm1 = b1 b0 */ + + paddd_m2r (*rounder, mm0); /* mm0 += rounder */ + psubd_r2r (mm1, mm3); /* mm3 = a1-b1 a0-b0 + rounder */ + + psrad_i2r (ROW_SHIFT, mm3); /* mm3 = y6 y7 */ + paddd_r2r (mm4, mm1); /* mm1 = a1+b1 a0+b0 + rounder */ + + paddd_r2r (mm2, mm0); /* mm0 = a3 a2 + rounder */ + psrad_i2r (ROW_SHIFT, mm1); /* mm1 = y1 y0 */ + + paddd_r2r (mm6, mm5); /* mm5 = b3 b2 */ + movq_r2r (mm0, mm4); /* mm4 = a3 a2 + rounder */ + + paddd_r2r (mm5, mm0); /* mm0 = a3+b3 a2+b2 + rounder */ + psubd_r2r (mm5, mm4); /* mm4 = a3-b3 a2-b2 + rounder */ +} + +static inline void mmxext_row_tail (int16_t * const row, const int store) +{ + psrad_i2r (ROW_SHIFT, mm0); /* mm0 = y3 y2 */ + + psrad_i2r (ROW_SHIFT, mm4); /* mm4 = y4 y5 */ + + packssdw_r2r (mm0, mm1); /* mm1 = y3 y2 y1 y0 */ + + packssdw_r2r (mm3, mm4); /* mm4 = y6 y7 y4 y5 */ + + movq_r2m (mm1, *(row+store)); /* save y3 y2 y1 y0 */ + pshufw_r2r (mm4, mm4, 0xb1); /* mm4 = y7 y6 y5 y4 */ + + /* slot */ + + movq_r2m (mm4, *(row+store+4)); /* save y7 y6 y5 y4 */ +} + +static inline void mmxext_row_mid (int16_t * const row, const int store, + const int offset, + const int16_t * const table) +{ + movq_m2r (*(row+offset), mm2); /* mm2 = x6 x4 x2 x0 */ + psrad_i2r (ROW_SHIFT, mm0); /* mm0 = y3 y2 */ + + movq_m2r (*(row+offset+4), mm5); /* mm5 = x7 x5 x3 x1 */ + psrad_i2r (ROW_SHIFT, mm4); /* mm4 = y4 y5 */ + + packssdw_r2r (mm0, mm1); /* mm1 = y3 y2 y1 y0 */ + movq_r2r (mm5, mm6); /* mm6 = x7 x5 x3 x1 */ + + packssdw_r2r (mm3, mm4); /* mm4 = y6 y7 y4 y5 */ + movq_r2r (mm2, mm0); /* mm0 = x6 x4 x2 x0 */ + + movq_r2m (mm1, *(row+store)); /* save y3 y2 y1 y0 */ + pshufw_r2r (mm4, mm4, 0xb1); /* mm4 = y7 y6 y5 y4 */ + + movq_m2r (*table, mm3); /* mm3 = -C2 -C4 C2 C4 */ + movq_r2m (mm4, *(row+store+4)); /* save y7 y6 y5 y4 */ + + pmaddwd_r2r (mm0, mm3); /* mm3 = -C4*x4-C2*x6 C4*x0+C2*x2 */ + + movq_m2r (*(table+4), mm4); /* mm4 = C6 C4 C6 C4 */ + pshufw_r2r (mm2, mm2, 0x4e); /* mm2 = x2 x0 x6 x4 */ +} + + +/* MMX row IDCT */ + +#define mmx_table(c1,c2,c3,c4,c5,c6,c7) { c4, c2, c4, c6, \ + c4, c6, -c4, -c2, \ + c1, c3, c3, -c7, \ + c5, c7, -c1, -c5, \ + c4, -c6, c4, -c2, \ + -c4, c2, c4, -c6, \ + c5, -c1, c7, -c5, \ + c7, c3, c3, -c1 } + +static inline void mmx_row_head (int16_t * const row, const int offset, + const int16_t * const table) +{ + movq_m2r (*(row+offset), mm2); /* mm2 = x6 x4 x2 x0 */ + + movq_m2r (*(row+offset+4), mm5); /* mm5 = x7 x5 x3 x1 */ + movq_r2r (mm2, mm0); /* mm0 = x6 x4 x2 x0 */ + + movq_m2r (*table, mm3); /* mm3 = C6 C4 C2 C4 */ + movq_r2r (mm5, mm6); /* mm6 = x7 x5 x3 x1 */ + + punpckldq_r2r (mm0, mm0); /* mm0 = x2 x0 x2 x0 */ + + movq_m2r (*(table+4), mm4); /* mm4 = -C2 -C4 C6 C4 */ + pmaddwd_r2r (mm0, mm3); /* mm3 = C4*x0+C6*x2 C4*x0+C2*x2 */ + + movq_m2r (*(table+8), mm1); /* mm1 = -C7 C3 C3 C1 */ + punpckhdq_r2r (mm2, mm2); /* mm2 = x6 x4 x6 x4 */ +} + +static inline void mmx_row (const int16_t * const table, + const int32_t * const rounder) +{ + pmaddwd_r2r (mm2, mm4); /* mm4 = -C4*x4-C2*x6 C4*x4+C6*x6 */ + punpckldq_r2r (mm5, mm5); /* mm5 = x3 x1 x3 x1 */ + + pmaddwd_m2r (*(table+16), mm0); /* mm0 = C4*x0-C2*x2 C4*x0-C6*x2 */ + punpckhdq_r2r (mm6, mm6); /* mm6 = x7 x5 x7 x5 */ + + movq_m2r (*(table+12), mm7); /* mm7 = -C5 -C1 C7 C5 */ + pmaddwd_r2r (mm5, mm1); /* mm1 = C3*x1-C7*x3 C1*x1+C3*x3 */ + + paddd_m2r (*rounder, mm3); /* mm3 += rounder */ + pmaddwd_r2r (mm6, mm7); /* mm7 = -C1*x5-C5*x7 C5*x5+C7*x7 */ + + pmaddwd_m2r (*(table+20), mm2); /* mm2 = C4*x4-C6*x6 -C4*x4+C2*x6 */ + paddd_r2r (mm4, mm3); /* mm3 = a1 a0 + rounder */ + + pmaddwd_m2r (*(table+24), mm5); /* mm5 = C7*x1-C5*x3 C5*x1-C1*x3 */ + movq_r2r (mm3, mm4); /* mm4 = a1 a0 + rounder */ + + pmaddwd_m2r (*(table+28), mm6); /* mm6 = C3*x5-C1*x7 C7*x5+C3*x7 */ + paddd_r2r (mm7, mm1); /* mm1 = b1 b0 */ + + paddd_m2r (*rounder, mm0); /* mm0 += rounder */ + psubd_r2r (mm1, mm3); /* mm3 = a1-b1 a0-b0 + rounder */ + + psrad_i2r (ROW_SHIFT, mm3); /* mm3 = y6 y7 */ + paddd_r2r (mm4, mm1); /* mm1 = a1+b1 a0+b0 + rounder */ + + paddd_r2r (mm2, mm0); /* mm0 = a3 a2 + rounder */ + psrad_i2r (ROW_SHIFT, mm1); /* mm1 = y1 y0 */ + + paddd_r2r (mm6, mm5); /* mm5 = b3 b2 */ + movq_r2r (mm0, mm7); /* mm7 = a3 a2 + rounder */ + + paddd_r2r (mm5, mm0); /* mm0 = a3+b3 a2+b2 + rounder */ + psubd_r2r (mm5, mm7); /* mm7 = a3-b3 a2-b2 + rounder */ +} + +static inline void mmx_row_tail (int16_t * const row, const int store) +{ + psrad_i2r (ROW_SHIFT, mm0); /* mm0 = y3 y2 */ + + psrad_i2r (ROW_SHIFT, mm7); /* mm7 = y4 y5 */ + + packssdw_r2r (mm0, mm1); /* mm1 = y3 y2 y1 y0 */ + + packssdw_r2r (mm3, mm7); /* mm7 = y6 y7 y4 y5 */ + + movq_r2m (mm1, *(row+store)); /* save y3 y2 y1 y0 */ + movq_r2r (mm7, mm4); /* mm4 = y6 y7 y4 y5 */ + + pslld_i2r (16, mm7); /* mm7 = y7 0 y5 0 */ + + psrld_i2r (16, mm4); /* mm4 = 0 y6 0 y4 */ + + por_r2r (mm4, mm7); /* mm7 = y7 y6 y5 y4 */ + + /* slot */ + + movq_r2m (mm7, *(row+store+4)); /* save y7 y6 y5 y4 */ +} + +static inline void mmx_row_mid (int16_t * const row, const int store, + const int offset, const int16_t * const table) +{ + movq_m2r (*(row+offset), mm2); /* mm2 = x6 x4 x2 x0 */ + psrad_i2r (ROW_SHIFT, mm0); /* mm0 = y3 y2 */ + + movq_m2r (*(row+offset+4), mm5); /* mm5 = x7 x5 x3 x1 */ + psrad_i2r (ROW_SHIFT, mm7); /* mm7 = y4 y5 */ + + packssdw_r2r (mm0, mm1); /* mm1 = y3 y2 y1 y0 */ + movq_r2r (mm5, mm6); /* mm6 = x7 x5 x3 x1 */ + + packssdw_r2r (mm3, mm7); /* mm7 = y6 y7 y4 y5 */ + movq_r2r (mm2, mm0); /* mm0 = x6 x4 x2 x0 */ + + movq_r2m (mm1, *(row+store)); /* save y3 y2 y1 y0 */ + movq_r2r (mm7, mm1); /* mm1 = y6 y7 y4 y5 */ + + punpckldq_r2r (mm0, mm0); /* mm0 = x2 x0 x2 x0 */ + psrld_i2r (16, mm7); /* mm7 = 0 y6 0 y4 */ + + movq_m2r (*table, mm3); /* mm3 = C6 C4 C2 C4 */ + pslld_i2r (16, mm1); /* mm1 = y7 0 y5 0 */ + + movq_m2r (*(table+4), mm4); /* mm4 = -C2 -C4 C6 C4 */ + por_r2r (mm1, mm7); /* mm7 = y7 y6 y5 y4 */ + + movq_m2r (*(table+8), mm1); /* mm1 = -C7 C3 C3 C1 */ + punpckhdq_r2r (mm2, mm2); /* mm2 = x6 x4 x6 x4 */ + + movq_r2m (mm7, *(row+store+4)); /* save y7 y6 y5 y4 */ + pmaddwd_r2r (mm0, mm3); /* mm3 = C4*x0+C6*x2 C4*x0+C2*x2 */ +} + + +#if 0 +/* C column IDCT - its just here to document the MMXEXT and MMX versions */ +static inline void idct_col (int16_t * col, int offset) +{ +/* multiplication - as implemented on mmx */ +#define F(c,x) (((c) * (x)) >> 16) + +/* saturation - it helps us handle torture test cases */ +#define S(x) (((x)>32767) ? 32767 : ((x)<-32768) ? -32768 : (x)) + + int16_t x0, x1, x2, x3, x4, x5, x6, x7; + int16_t y0, y1, y2, y3, y4, y5, y6, y7; + int16_t a0, a1, a2, a3, b0, b1, b2, b3; + int16_t u04, v04, u26, v26, u17, v17, u35, v35, u12, v12; + + col += offset; + + x0 = col[0*8]; + x1 = col[1*8]; + x2 = col[2*8]; + x3 = col[3*8]; + x4 = col[4*8]; + x5 = col[5*8]; + x6 = col[6*8]; + x7 = col[7*8]; + + u04 = S (x0 + x4); + v04 = S (x0 - x4); + u26 = S (F (T2, x6) + x2); + v26 = S (F (T2, x2) - x6); + + a0 = S (u04 + u26); + a1 = S (v04 + v26); + a2 = S (v04 - v26); + a3 = S (u04 - u26); + + u17 = S (F (T1, x7) + x1); + v17 = S (F (T1, x1) - x7); + u35 = S (F (T3, x5) + x3); + v35 = S (F (T3, x3) - x5); + + b0 = S (u17 + u35); + b3 = S (v17 - v35); + u12 = S (u17 - u35); + v12 = S (v17 + v35); + u12 = S (2 * F (C4, u12)); + v12 = S (2 * F (C4, v12)); + b1 = S (u12 + v12); + b2 = S (u12 - v12); + + y0 = S (a0 + b0) >> COL_SHIFT; + y1 = S (a1 + b1) >> COL_SHIFT; + y2 = S (a2 + b2) >> COL_SHIFT; + y3 = S (a3 + b3) >> COL_SHIFT; + + y4 = S (a3 - b3) >> COL_SHIFT; + y5 = S (a2 - b2) >> COL_SHIFT; + y6 = S (a1 - b1) >> COL_SHIFT; + y7 = S (a0 - b0) >> COL_SHIFT; + + col[0*8] = y0; + col[1*8] = y1; + col[2*8] = y2; + col[3*8] = y3; + col[4*8] = y4; + col[5*8] = y5; + col[6*8] = y6; + col[7*8] = y7; +} +#endif + + +/* MMX column IDCT */ +static inline void idct_col (int16_t * const col, const int offset) +{ +#define T1 13036 +#define T2 27146 +#define T3 43790 +#define C4 23170 + + static const short _T1[] ATTR_ALIGN(8) = {T1,T1,T1,T1}; + static const short _T2[] ATTR_ALIGN(8) = {T2,T2,T2,T2}; + static const short _T3[] ATTR_ALIGN(8) = {T3,T3,T3,T3}; + static const short _C4[] ATTR_ALIGN(8) = {C4,C4,C4,C4}; + + /* column code adapted from peter gubanov */ + /* http://www.elecard.com/peter/idct.shtml */ + + movq_m2r (*_T1, mm0); /* mm0 = T1 */ + + movq_m2r (*(col+offset+1*8), mm1); /* mm1 = x1 */ + movq_r2r (mm0, mm2); /* mm2 = T1 */ + + movq_m2r (*(col+offset+7*8), mm4); /* mm4 = x7 */ + pmulhw_r2r (mm1, mm0); /* mm0 = T1*x1 */ + + movq_m2r (*_T3, mm5); /* mm5 = T3 */ + pmulhw_r2r (mm4, mm2); /* mm2 = T1*x7 */ + + movq_m2r (*(col+offset+5*8), mm6); /* mm6 = x5 */ + movq_r2r (mm5, mm7); /* mm7 = T3-1 */ + + movq_m2r (*(col+offset+3*8), mm3); /* mm3 = x3 */ + psubsw_r2r (mm4, mm0); /* mm0 = v17 */ + + movq_m2r (*_T2, mm4); /* mm4 = T2 */ + pmulhw_r2r (mm3, mm5); /* mm5 = (T3-1)*x3 */ + + paddsw_r2r (mm2, mm1); /* mm1 = u17 */ + pmulhw_r2r (mm6, mm7); /* mm7 = (T3-1)*x5 */ + + /* slot */ + + movq_r2r (mm4, mm2); /* mm2 = T2 */ + paddsw_r2r (mm3, mm5); /* mm5 = T3*x3 */ + + pmulhw_m2r (*(col+offset+2*8), mm4);/* mm4 = T2*x2 */ + paddsw_r2r (mm6, mm7); /* mm7 = T3*x5 */ + + psubsw_r2r (mm6, mm5); /* mm5 = v35 */ + paddsw_r2r (mm3, mm7); /* mm7 = u35 */ + + movq_m2r (*(col+offset+6*8), mm3); /* mm3 = x6 */ + movq_r2r (mm0, mm6); /* mm6 = v17 */ + + pmulhw_r2r (mm3, mm2); /* mm2 = T2*x6 */ + psubsw_r2r (mm5, mm0); /* mm0 = b3 */ + + psubsw_r2r (mm3, mm4); /* mm4 = v26 */ + paddsw_r2r (mm6, mm5); /* mm5 = v12 */ + + movq_r2m (mm0, *(col+offset+3*8)); /* save b3 in scratch0 */ + movq_r2r (mm1, mm6); /* mm6 = u17 */ + + paddsw_m2r (*(col+offset+2*8), mm2);/* mm2 = u26 */ + paddsw_r2r (mm7, mm6); /* mm6 = b0 */ + + psubsw_r2r (mm7, mm1); /* mm1 = u12 */ + movq_r2r (mm1, mm7); /* mm7 = u12 */ + + movq_m2r (*(col+offset+0*8), mm3); /* mm3 = x0 */ + paddsw_r2r (mm5, mm1); /* mm1 = u12+v12 */ + + movq_m2r (*_C4, mm0); /* mm0 = C4/2 */ + psubsw_r2r (mm5, mm7); /* mm7 = u12-v12 */ + + movq_r2m (mm6, *(col+offset+5*8)); /* save b0 in scratch1 */ + pmulhw_r2r (mm0, mm1); /* mm1 = b1/2 */ + + movq_r2r (mm4, mm6); /* mm6 = v26 */ + pmulhw_r2r (mm0, mm7); /* mm7 = b2/2 */ + + movq_m2r (*(col+offset+4*8), mm5); /* mm5 = x4 */ + movq_r2r (mm3, mm0); /* mm0 = x0 */ + + psubsw_r2r (mm5, mm3); /* mm3 = v04 */ + paddsw_r2r (mm5, mm0); /* mm0 = u04 */ + + paddsw_r2r (mm3, mm4); /* mm4 = a1 */ + movq_r2r (mm0, mm5); /* mm5 = u04 */ + + psubsw_r2r (mm6, mm3); /* mm3 = a2 */ + paddsw_r2r (mm2, mm5); /* mm5 = a0 */ + + paddsw_r2r (mm1, mm1); /* mm1 = b1 */ + psubsw_r2r (mm2, mm0); /* mm0 = a3 */ + + paddsw_r2r (mm7, mm7); /* mm7 = b2 */ + movq_r2r (mm3, mm2); /* mm2 = a2 */ + + movq_r2r (mm4, mm6); /* mm6 = a1 */ + paddsw_r2r (mm7, mm3); /* mm3 = a2+b2 */ + + psraw_i2r (COL_SHIFT, mm3); /* mm3 = y2 */ + paddsw_r2r (mm1, mm4); /* mm4 = a1+b1 */ + + psraw_i2r (COL_SHIFT, mm4); /* mm4 = y1 */ + psubsw_r2r (mm1, mm6); /* mm6 = a1-b1 */ + + movq_m2r (*(col+offset+5*8), mm1); /* mm1 = b0 */ + psubsw_r2r (mm7, mm2); /* mm2 = a2-b2 */ + + psraw_i2r (COL_SHIFT, mm6); /* mm6 = y6 */ + movq_r2r (mm5, mm7); /* mm7 = a0 */ + + movq_r2m (mm4, *(col+offset+1*8)); /* save y1 */ + psraw_i2r (COL_SHIFT, mm2); /* mm2 = y5 */ + + movq_r2m (mm3, *(col+offset+2*8)); /* save y2 */ + paddsw_r2r (mm1, mm5); /* mm5 = a0+b0 */ + + movq_m2r (*(col+offset+3*8), mm4); /* mm4 = b3 */ + psubsw_r2r (mm1, mm7); /* mm7 = a0-b0 */ + + psraw_i2r (COL_SHIFT, mm5); /* mm5 = y0 */ + movq_r2r (mm0, mm3); /* mm3 = a3 */ + + movq_r2m (mm2, *(col+offset+5*8)); /* save y5 */ + psubsw_r2r (mm4, mm3); /* mm3 = a3-b3 */ + + psraw_i2r (COL_SHIFT, mm7); /* mm7 = y7 */ + paddsw_r2r (mm0, mm4); /* mm4 = a3+b3 */ + + movq_r2m (mm5, *(col+offset+0*8)); /* save y0 */ + psraw_i2r (COL_SHIFT, mm3); /* mm3 = y4 */ + + movq_r2m (mm6, *(col+offset+6*8)); /* save y6 */ + psraw_i2r (COL_SHIFT, mm4); /* mm4 = y3 */ + + movq_r2m (mm7, *(col+offset+7*8)); /* save y7 */ + + movq_r2m (mm3, *(col+offset+4*8)); /* save y4 */ + + movq_r2m (mm4, *(col+offset+3*8)); /* save y3 */ +} + + +static const int32_t rounder0[] ATTR_ALIGN(8) = + rounder ((1 << (COL_SHIFT - 1)) - 0.5); +static const int32_t rounder4[] ATTR_ALIGN(8) = rounder (0); +static const int32_t rounder1[] ATTR_ALIGN(8) = + rounder (1.25683487303); /* C1*(C1/C4+C1+C7)/2 */ +static const int32_t rounder7[] ATTR_ALIGN(8) = + rounder (-0.25); /* C1*(C7/C4+C7-C1)/2 */ +static const int32_t rounder2[] ATTR_ALIGN(8) = + rounder (0.60355339059); /* C2 * (C6+C2)/2 */ +static const int32_t rounder6[] ATTR_ALIGN(8) = + rounder (-0.25); /* C2 * (C6-C2)/2 */ +static const int32_t rounder3[] ATTR_ALIGN(8) = + rounder (0.087788325588); /* C3*(-C3/C4+C3+C5)/2 */ +static const int32_t rounder5[] ATTR_ALIGN(8) = + rounder (-0.441341716183); /* C3*(-C5/C4+C5-C3)/2 */ + + +#define declare_idct(idct,table,idct_row_head,idct_row,idct_row_tail,idct_row_mid) \ +static inline void idct (int16_t * const block) \ +{ \ + static const int16_t table04[] ATTR_ALIGN(16) = \ + table (22725, 21407, 19266, 16384, 12873, 8867, 4520); \ + static const int16_t table17[] ATTR_ALIGN(16) = \ + table (31521, 29692, 26722, 22725, 17855, 12299, 6270); \ + static const int16_t table26[] ATTR_ALIGN(16) = \ + table (29692, 27969, 25172, 21407, 16819, 11585, 5906); \ + static const int16_t table35[] ATTR_ALIGN(16) = \ + table (26722, 25172, 22654, 19266, 15137, 10426, 5315); \ + \ + idct_row_head (block, 0*8, table04); \ + idct_row (table04, rounder0); \ + idct_row_mid (block, 0*8, 4*8, table04); \ + idct_row (table04, rounder4); \ + idct_row_mid (block, 4*8, 1*8, table17); \ + idct_row (table17, rounder1); \ + idct_row_mid (block, 1*8, 7*8, table17); \ + idct_row (table17, rounder7); \ + idct_row_mid (block, 7*8, 2*8, table26); \ + idct_row (table26, rounder2); \ + idct_row_mid (block, 2*8, 6*8, table26); \ + idct_row (table26, rounder6); \ + idct_row_mid (block, 6*8, 3*8, table35); \ + idct_row (table35, rounder3); \ + idct_row_mid (block, 3*8, 5*8, table35); \ + idct_row (table35, rounder5); \ + idct_row_tail (block, 5*8); \ + \ + idct_col (block, 0); \ + idct_col (block, 4); \ +} + + +#define COPY_MMX(offset,r0,r1,r2) \ +do { \ + movq_m2r (*(block+offset), r0); \ + dest += stride; \ + movq_m2r (*(block+offset+4), r1); \ + movq_r2m (r2, *dest); \ + packuswb_r2r (r1, r0); \ +} while (0) + +static inline void block_copy (int16_t * const block, uint8_t * dest, + const int stride) +{ + movq_m2r (*(block+0*8), mm0); + movq_m2r (*(block+0*8+4), mm1); + movq_m2r (*(block+1*8), mm2); + packuswb_r2r (mm1, mm0); + movq_m2r (*(block+1*8+4), mm3); + movq_r2m (mm0, *dest); + packuswb_r2r (mm3, mm2); + COPY_MMX (2*8, mm0, mm1, mm2); + COPY_MMX (3*8, mm2, mm3, mm0); + COPY_MMX (4*8, mm0, mm1, mm2); + COPY_MMX (5*8, mm2, mm3, mm0); + COPY_MMX (6*8, mm0, mm1, mm2); + COPY_MMX (7*8, mm2, mm3, mm0); + movq_r2m (mm2, *(dest+stride)); +} + + +#define ADD_MMX(offset,r1,r2,r3,r4) \ +do { \ + movq_m2r (*(dest+2*stride), r1); \ + packuswb_r2r (r4, r3); \ + movq_r2r (r1, r2); \ + dest += stride; \ + movq_r2m (r3, *dest); \ + punpcklbw_r2r (mm0, r1); \ + paddsw_m2r (*(block+offset), r1); \ + punpckhbw_r2r (mm0, r2); \ + paddsw_m2r (*(block+offset+4), r2); \ +} while (0) + +static inline void block_add (int16_t * const block, uint8_t * dest, + const int stride) +{ + movq_m2r (*dest, mm1); + pxor_r2r (mm0, mm0); + movq_m2r (*(dest+stride), mm3); + movq_r2r (mm1, mm2); + punpcklbw_r2r (mm0, mm1); + movq_r2r (mm3, mm4); + paddsw_m2r (*(block+0*8), mm1); + punpckhbw_r2r (mm0, mm2); + paddsw_m2r (*(block+0*8+4), mm2); + punpcklbw_r2r (mm0, mm3); + paddsw_m2r (*(block+1*8), mm3); + packuswb_r2r (mm2, mm1); + punpckhbw_r2r (mm0, mm4); + movq_r2m (mm1, *dest); + paddsw_m2r (*(block+1*8+4), mm4); + ADD_MMX (2*8, mm1, mm2, mm3, mm4); + ADD_MMX (3*8, mm3, mm4, mm1, mm2); + ADD_MMX (4*8, mm1, mm2, mm3, mm4); + ADD_MMX (5*8, mm3, mm4, mm1, mm2); + ADD_MMX (6*8, mm1, mm2, mm3, mm4); + ADD_MMX (7*8, mm3, mm4, mm1, mm2); + packuswb_r2r (mm4, mm3); + movq_r2m (mm3, *(dest+stride)); +} + + +static inline void block_zero (int16_t * const block) +{ + pxor_r2r (mm0, mm0); + movq_r2m (mm0, *(block+0*4)); + movq_r2m (mm0, *(block+1*4)); + movq_r2m (mm0, *(block+2*4)); + movq_r2m (mm0, *(block+3*4)); + movq_r2m (mm0, *(block+4*4)); + movq_r2m (mm0, *(block+5*4)); + movq_r2m (mm0, *(block+6*4)); + movq_r2m (mm0, *(block+7*4)); + movq_r2m (mm0, *(block+8*4)); + movq_r2m (mm0, *(block+9*4)); + movq_r2m (mm0, *(block+10*4)); + movq_r2m (mm0, *(block+11*4)); + movq_r2m (mm0, *(block+12*4)); + movq_r2m (mm0, *(block+13*4)); + movq_r2m (mm0, *(block+14*4)); + movq_r2m (mm0, *(block+15*4)); +} + + +#define CPU_MMXEXT 0 +#define CPU_MMX 1 + +#define dup4(reg) \ +do { \ + if (cpu != CPU_MMXEXT) { \ + punpcklwd_r2r (reg, reg); \ + punpckldq_r2r (reg, reg); \ + } else \ + pshufw_r2r (reg, reg, 0x00); \ +} while (0) + +static inline void block_add_DC (int16_t * const block, uint8_t * dest, + const int stride, const int cpu) +{ + movd_v2r ((block[0] + 64) >> 7, mm0); + pxor_r2r (mm1, mm1); + movq_m2r (*dest, mm2); + dup4 (mm0); + psubsw_r2r (mm0, mm1); + packuswb_r2r (mm0, mm0); + paddusb_r2r (mm0, mm2); + packuswb_r2r (mm1, mm1); + movq_m2r (*(dest + stride), mm3); + psubusb_r2r (mm1, mm2); + block[0] = 0; + paddusb_r2r (mm0, mm3); + movq_r2m (mm2, *dest); + psubusb_r2r (mm1, mm3); + movq_m2r (*(dest + 2*stride), mm2); + dest += stride; + movq_r2m (mm3, *dest); + paddusb_r2r (mm0, mm2); + movq_m2r (*(dest + 2*stride), mm3); + psubusb_r2r (mm1, mm2); + dest += stride; + paddusb_r2r (mm0, mm3); + movq_r2m (mm2, *dest); + psubusb_r2r (mm1, mm3); + movq_m2r (*(dest + 2*stride), mm2); + dest += stride; + movq_r2m (mm3, *dest); + paddusb_r2r (mm0, mm2); + movq_m2r (*(dest + 2*stride), mm3); + psubusb_r2r (mm1, mm2); + dest += stride; + paddusb_r2r (mm0, mm3); + movq_r2m (mm2, *dest); + psubusb_r2r (mm1, mm3); + movq_m2r (*(dest + 2*stride), mm2); + dest += stride; + movq_r2m (mm3, *dest); + paddusb_r2r (mm0, mm2); + movq_m2r (*(dest + 2*stride), mm3); + psubusb_r2r (mm1, mm2); + block[63] = 0; + paddusb_r2r (mm0, mm3); + movq_r2m (mm2, *(dest + stride)); + psubusb_r2r (mm1, mm3); + movq_r2m (mm3, *(dest + 2*stride)); +} + + +declare_idct (mmxext_idct, mmxext_table, + mmxext_row_head, mmxext_row, mmxext_row_tail, mmxext_row_mid) + +void mpeg2_idct_copy_mmxext (int16_t * const block, uint8_t * const dest, + const int stride) +{ + mmxext_idct (block); + block_copy (block, dest, stride); + block_zero (block); +} + +void mpeg2_idct_add_mmxext (const int last, int16_t * const block, + uint8_t * const dest, const int stride) +{ + if (last != 129 || (block[0] & (7 << 4)) == (4 << 4)) { + mmxext_idct (block); + block_add (block, dest, stride); + block_zero (block); + } else + block_add_DC (block, dest, stride, CPU_MMXEXT); +} + + +declare_idct (mmx_idct, mmx_table, + mmx_row_head, mmx_row, mmx_row_tail, mmx_row_mid) + +void mpeg2_idct_copy_mmx (int16_t * const block, uint8_t * const dest, + const int stride) +{ + mmx_idct (block); + block_copy (block, dest, stride); + block_zero (block); +} + +void mpeg2_idct_add_mmx (const int last, int16_t * const block, + uint8_t * const dest, const int stride) +{ + if (last != 129 || (block[0] & (7 << 4)) == (4 << 4)) { + mmx_idct (block); + block_add (block, dest, stride); + block_zero (block); + } else + block_add_DC (block, dest, stride, CPU_MMX); +} + + +void mpeg2_idct_mmx_init (void) +{ + extern uint8_t mpeg2_scan_norm[64]; + extern uint8_t mpeg2_scan_alt[64]; + int i, j; + + /* the mmx/mmxext idct uses a reordered input, so we patch scan tables */ + + for (i = 0; i < 64; i++) { + j = mpeg2_scan_norm[i]; + mpeg2_scan_norm[i] = (j & 0x38) | ((j & 6) >> 1) | ((j & 1) << 2); + j = mpeg2_scan_alt[i]; + mpeg2_scan_alt[i] = (j & 0x38) | ((j & 6) >> 1) | ((j & 1) << 2); + } +} + +#endif diff --git a/src/video_dec/libmpeg2new/libmpeg2/libmpeg2.pc.in b/src/video_dec/libmpeg2new/libmpeg2/libmpeg2.pc.in new file mode 100644 index 000000000..d54500b0e --- /dev/null +++ b/src/video_dec/libmpeg2new/libmpeg2/libmpeg2.pc.in @@ -0,0 +1,10 @@ +prefix=@prefix@ +exec_prefix=@exec_prefix@ +libdir=@libdir@ +includedir=@includedir@ + +Name: libmpeg2 +Description: A decoding library for MPEG-1 and MPEG-2 streams. +Version: @VERSION@ +Libs: -L${libdir} -lmpeg2 +Cflags: -I${includedir}/@PACKAGE@ diff --git a/src/video_dec/libmpeg2new/libmpeg2/libmpeg2convert.pc.in b/src/video_dec/libmpeg2new/libmpeg2/libmpeg2convert.pc.in new file mode 100644 index 000000000..42383a6e2 --- /dev/null +++ b/src/video_dec/libmpeg2new/libmpeg2/libmpeg2convert.pc.in @@ -0,0 +1,10 @@ +prefix=@prefix@ +exec_prefix=@exec_prefix@ +libdir=@libdir@ +includedir=@includedir@ + +Name: libmpeg2convert +Description: libmpeg2 helper functions for converting to various formats. +Version: @VERSION@ +Libs: -L${libdir} -lmpeg2convert +Cflags: -I${includedir}/@PACKAGE@ diff --git a/src/video_dec/libmpeg2new/libmpeg2/motion_comp.c b/src/video_dec/libmpeg2new/libmpeg2/motion_comp.c new file mode 100644 index 000000000..d5a265d5c --- /dev/null +++ b/src/video_dec/libmpeg2new/libmpeg2/motion_comp.c @@ -0,0 +1,130 @@ +/* + * motion_comp.c + * Copyright (C) 2000-2003 Michel Lespinasse <walken@zoy.org> + * Copyright (C) 1999-2000 Aaron Holtzman <aholtzma@ess.engr.uvic.ca> + * + * This file is part of mpeg2dec, a free MPEG-2 video stream decoder. + * See http://libmpeg2.sourceforge.net/ for updates. + * + * mpeg2dec is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * mpeg2dec is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +#include "config.h" + +#include <inttypes.h> + +#include "../include/mpeg2.h" +#include "../include/attributes.h" +#include "mpeg2_internal.h" + +mpeg2_mc_t mpeg2_mc; + +void mpeg2_mc_init (uint32_t accel) +{ +#ifdef ARCH_X86 + if (accel & MPEG2_ACCEL_X86_MMXEXT) + mpeg2_mc = mpeg2_mc_mmxext; + else if (accel & MPEG2_ACCEL_X86_3DNOW) + mpeg2_mc = mpeg2_mc_3dnow; + else if (accel & MPEG2_ACCEL_X86_MMX) + mpeg2_mc = mpeg2_mc_mmx; + else +#endif +#ifdef ARCH_PPC + if (accel & MPEG2_ACCEL_PPC_ALTIVEC) + mpeg2_mc = mpeg2_mc_altivec; + else +#endif +#ifdef ARCH_ALPHA + if (accel & MPEG2_ACCEL_ALPHA) + mpeg2_mc = mpeg2_mc_alpha; + else +#endif +#ifdef ARCH_SPARC + if (accel & MPEG2_ACCEL_SPARC_VIS) + mpeg2_mc = mpeg2_mc_vis; + else +#endif + mpeg2_mc = mpeg2_mc_c; +} + +#define avg2(a,b) ((a+b+1)>>1) +#define avg4(a,b,c,d) ((a+b+c+d+2)>>2) + +#define predict_o(i) (ref[i]) +#define predict_x(i) (avg2 (ref[i], ref[i+1])) +#define predict_y(i) (avg2 (ref[i], (ref+stride)[i])) +#define predict_xy(i) (avg4 (ref[i], ref[i+1], \ + (ref+stride)[i], (ref+stride)[i+1])) + +#define put(predictor,i) dest[i] = predictor (i) +#define avg(predictor,i) dest[i] = avg2 (predictor (i), dest[i]) + +/* mc function template */ + +#define MC_FUNC(op,xy) \ +static void MC_##op##_##xy##_16_c (uint8_t * dest, const uint8_t * ref, \ + const int stride, int height) \ +{ \ + do { \ + op (predict_##xy, 0); \ + op (predict_##xy, 1); \ + op (predict_##xy, 2); \ + op (predict_##xy, 3); \ + op (predict_##xy, 4); \ + op (predict_##xy, 5); \ + op (predict_##xy, 6); \ + op (predict_##xy, 7); \ + op (predict_##xy, 8); \ + op (predict_##xy, 9); \ + op (predict_##xy, 10); \ + op (predict_##xy, 11); \ + op (predict_##xy, 12); \ + op (predict_##xy, 13); \ + op (predict_##xy, 14); \ + op (predict_##xy, 15); \ + ref += stride; \ + dest += stride; \ + } while (--height); \ +} \ +static void MC_##op##_##xy##_8_c (uint8_t * dest, const uint8_t * ref, \ + const int stride, int height) \ +{ \ + do { \ + op (predict_##xy, 0); \ + op (predict_##xy, 1); \ + op (predict_##xy, 2); \ + op (predict_##xy, 3); \ + op (predict_##xy, 4); \ + op (predict_##xy, 5); \ + op (predict_##xy, 6); \ + op (predict_##xy, 7); \ + ref += stride; \ + dest += stride; \ + } while (--height); \ +} + +/* definitions of the actual mc functions */ + +MC_FUNC (put,o) +MC_FUNC (avg,o) +MC_FUNC (put,x) +MC_FUNC (avg,x) +MC_FUNC (put,y) +MC_FUNC (avg,y) +MC_FUNC (put,xy) +MC_FUNC (avg,xy) + +MPEG2_MC_EXTERN (c) diff --git a/src/video_dec/libmpeg2new/libmpeg2/motion_comp_alpha.c b/src/video_dec/libmpeg2new/libmpeg2/motion_comp_alpha.c new file mode 100644 index 000000000..73f6625d2 --- /dev/null +++ b/src/video_dec/libmpeg2new/libmpeg2/motion_comp_alpha.c @@ -0,0 +1,253 @@ +/* + * motion_comp_alpha.c + * Copyright (C) 2002-2003 Falk Hueffner <falk@debian.org> + * + * This file is part of mpeg2dec, a free MPEG-2 video stream decoder. + * See http://libmpeg2.sourceforge.net/ for updates. + * + * mpeg2dec is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * mpeg2dec is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +#include "config.h" + +#ifdef ARCH_ALPHA + +#include <inttypes.h> + +#include "../include/mpeg2.h" +#include "../include/attributes.h" +#include "mpeg2_internal.h" +#include "../include/alpha_asm.h" + +static inline uint64_t avg2 (uint64_t a, uint64_t b) +{ + return (a | b) - (((a ^ b) & BYTE_VEC (0xfe)) >> 1); +} + +// Load two unaligned quadwords from addr. This macro only works if +// addr is actually unaligned. +#define ULOAD16(ret_l,ret_r,addr) \ + do { \ + uint64_t _l = ldq_u (addr + 0); \ + uint64_t _m = ldq_u (addr + 8); \ + uint64_t _r = ldq_u (addr + 16); \ + ret_l = extql (_l, addr) | extqh (_m, addr); \ + ret_r = extql (_m, addr) | extqh (_r, addr); \ + } while (0) + +// Load two aligned quadwords from addr. +#define ALOAD16(ret_l,ret_r,addr) \ + do { \ + ret_l = ldq (addr); \ + ret_r = ldq (addr + 8); \ + } while (0) + +#define OP8(LOAD,LOAD16,STORE) \ + do { \ + STORE (LOAD (pixels), block); \ + pixels += line_size; \ + block += line_size; \ + } while (--h) + +#define OP16(LOAD,LOAD16,STORE) \ + do { \ + uint64_t l, r; \ + LOAD16 (l, r, pixels); \ + STORE (l, block); \ + STORE (r, block + 8); \ + pixels += line_size; \ + block += line_size; \ + } while (--h) + +#define OP8_X2(LOAD,LOAD16,STORE) \ + do { \ + uint64_t p0, p1; \ + \ + p0 = LOAD (pixels); \ + p1 = p0 >> 8 | ((uint64_t) pixels[8] << 56); \ + STORE (avg2 (p0, p1), block); \ + pixels += line_size; \ + block += line_size; \ + } while (--h) + +#define OP16_X2(LOAD,LOAD16,STORE) \ + do { \ + uint64_t p0, p1; \ + \ + LOAD16 (p0, p1, pixels); \ + STORE (avg2(p0, p0 >> 8 | p1 << 56), block); \ + STORE (avg2(p1, p1 >> 8 | (uint64_t) pixels[16] << 56), \ + block + 8); \ + pixels += line_size; \ + block += line_size; \ + } while (--h) + +#define OP8_Y2(LOAD,LOAD16,STORE) \ + do { \ + uint64_t p0, p1; \ + p0 = LOAD (pixels); \ + pixels += line_size; \ + p1 = LOAD (pixels); \ + do { \ + uint64_t av = avg2 (p0, p1); \ + if (--h == 0) line_size = 0; \ + pixels += line_size; \ + p0 = p1; \ + p1 = LOAD (pixels); \ + STORE (av, block); \ + block += line_size; \ + } while (h); \ + } while (0) + +#define OP16_Y2(LOAD,LOAD16,STORE) \ + do { \ + uint64_t p0l, p0r, p1l, p1r; \ + LOAD16 (p0l, p0r, pixels); \ + pixels += line_size; \ + LOAD16 (p1l, p1r, pixels); \ + do { \ + uint64_t avl, avr; \ + if (--h == 0) line_size = 0; \ + avl = avg2 (p0l, p1l); \ + avr = avg2 (p0r, p1r); \ + p0l = p1l; \ + p0r = p1r; \ + pixels += line_size; \ + LOAD16 (p1l, p1r, pixels); \ + STORE (avl, block); \ + STORE (avr, block + 8); \ + block += line_size; \ + } while (h); \ + } while (0) + +#define OP8_XY2(LOAD,LOAD16,STORE) \ + do { \ + uint64_t pl, ph; \ + uint64_t p1 = LOAD (pixels); \ + uint64_t p2 = p1 >> 8 | ((uint64_t) pixels[8] << 56); \ + \ + ph = (((p1 & ~BYTE_VEC (0x03)) >> 2) + \ + ((p2 & ~BYTE_VEC (0x03)) >> 2)); \ + pl = ((p1 & BYTE_VEC (0x03)) + \ + (p2 & BYTE_VEC (0x03))); \ + \ + do { \ + uint64_t npl, nph; \ + \ + pixels += line_size; \ + p1 = LOAD (pixels); \ + p2 = (p1 >> 8) | ((uint64_t) pixels[8] << 56); \ + nph = (((p1 & ~BYTE_VEC (0x03)) >> 2) + \ + ((p2 & ~BYTE_VEC (0x03)) >> 2)); \ + npl = ((p1 & BYTE_VEC (0x03)) + \ + (p2 & BYTE_VEC (0x03))); \ + \ + STORE (ph + nph + \ + (((pl + npl + BYTE_VEC (0x02)) >> 2) & \ + BYTE_VEC (0x03)), block); \ + \ + block += line_size; \ + pl = npl; \ + ph = nph; \ + } while (--h); \ + } while (0) + +#define OP16_XY2(LOAD,LOAD16,STORE) \ + do { \ + uint64_t p0, p1, p2, p3, pl_l, ph_l, pl_r, ph_r; \ + LOAD16 (p0, p2, pixels); \ + p1 = p0 >> 8 | (p2 << 56); \ + p3 = p2 >> 8 | ((uint64_t)pixels[16] << 56); \ + \ + ph_l = (((p0 & ~BYTE_VEC (0x03)) >> 2) + \ + ((p1 & ~BYTE_VEC (0x03)) >> 2)); \ + pl_l = ((p0 & BYTE_VEC (0x03)) + \ + (p1 & BYTE_VEC(0x03))); \ + ph_r = (((p2 & ~BYTE_VEC (0x03)) >> 2) + \ + ((p3 & ~BYTE_VEC (0x03)) >> 2)); \ + pl_r = ((p2 & BYTE_VEC (0x03)) + \ + (p3 & BYTE_VEC (0x03))); \ + \ + do { \ + uint64_t npl_l, nph_l, npl_r, nph_r; \ + \ + pixels += line_size; \ + LOAD16 (p0, p2, pixels); \ + p1 = p0 >> 8 | (p2 << 56); \ + p3 = p2 >> 8 | ((uint64_t)pixels[16] << 56); \ + nph_l = (((p0 & ~BYTE_VEC (0x03)) >> 2) + \ + ((p1 & ~BYTE_VEC (0x03)) >> 2)); \ + npl_l = ((p0 & BYTE_VEC (0x03)) + \ + (p1 & BYTE_VEC (0x03))); \ + nph_r = (((p2 & ~BYTE_VEC (0x03)) >> 2) + \ + ((p3 & ~BYTE_VEC (0x03)) >> 2)); \ + npl_r = ((p2 & BYTE_VEC (0x03)) + \ + (p3 & BYTE_VEC (0x03))); \ + \ + STORE (ph_l + nph_l + \ + (((pl_l + npl_l + BYTE_VEC (0x02)) >> 2) & \ + BYTE_VEC(0x03)), block); \ + STORE (ph_r + nph_r + \ + (((pl_r + npl_r + BYTE_VEC (0x02)) >> 2) & \ + BYTE_VEC(0x03)), block + 8); \ + \ + block += line_size; \ + pl_l = npl_l; \ + ph_l = nph_l; \ + pl_r = npl_r; \ + ph_r = nph_r; \ + } while (--h); \ + } while (0) + +#define MAKE_OP(OPNAME,SIZE,SUFF,OPKIND,STORE) \ +static void MC_ ## OPNAME ## _ ## SUFF ## _ ## SIZE ## _alpha \ + (uint8_t *restrict block, const uint8_t *restrict pixels, \ + int line_size, int h) \ +{ \ + if ((uint64_t) pixels & 0x7) { \ + OPKIND (uldq, ULOAD16, STORE); \ + } else { \ + OPKIND (ldq, ALOAD16, STORE); \ + } \ +} + +#define PIXOP(OPNAME,STORE) \ + MAKE_OP (OPNAME, 8, o, OP8, STORE); \ + MAKE_OP (OPNAME, 8, x, OP8_X2, STORE); \ + MAKE_OP (OPNAME, 8, y, OP8_Y2, STORE); \ + MAKE_OP (OPNAME, 8, xy, OP8_XY2, STORE); \ + MAKE_OP (OPNAME, 16, o, OP16, STORE); \ + MAKE_OP (OPNAME, 16, x, OP16_X2, STORE); \ + MAKE_OP (OPNAME, 16, y, OP16_Y2, STORE); \ + MAKE_OP (OPNAME, 16, xy, OP16_XY2, STORE); + +#define STORE(l,b) stq (l, b) +PIXOP (put, STORE); +#undef STORE +#define STORE(l,b) stq (avg2 (l, ldq (b)), b); +PIXOP (avg, STORE); + +mpeg2_mc_t mpeg2_mc_alpha = { + { MC_put_o_16_alpha, MC_put_x_16_alpha, + MC_put_y_16_alpha, MC_put_xy_16_alpha, + MC_put_o_8_alpha, MC_put_x_8_alpha, + MC_put_y_8_alpha, MC_put_xy_8_alpha }, + { MC_avg_o_16_alpha, MC_avg_x_16_alpha, + MC_avg_y_16_alpha, MC_avg_xy_16_alpha, + MC_avg_o_8_alpha, MC_avg_x_8_alpha, + MC_avg_y_8_alpha, MC_avg_xy_8_alpha } +}; + +#endif diff --git a/src/video_dec/libmpeg2new/libmpeg2/motion_comp_altivec.c b/src/video_dec/libmpeg2new/libmpeg2/motion_comp_altivec.c new file mode 100644 index 000000000..cc1b72f56 --- /dev/null +++ b/src/video_dec/libmpeg2new/libmpeg2/motion_comp_altivec.c @@ -0,0 +1,1010 @@ +/* + * motion_comp_altivec.c + * Copyright (C) 2000-2003 Michel Lespinasse <walken@zoy.org> + * Copyright (C) 1999-2000 Aaron Holtzman <aholtzma@ess.engr.uvic.ca> + * + * This file is part of mpeg2dec, a free MPEG-2 video stream decoder. + * See http://libmpeg2.sourceforge.net/ for updates. + * + * mpeg2dec is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * mpeg2dec is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +#include "config.h" + +#ifdef ARCH_PPC + +#ifdef HAVE_ALTIVEC_H +#include <altivec.h> +#endif +#include <inttypes.h> + +#include "../include/mpeg2.h" +#include "../include/attributes.h" +#include "mpeg2_internal.h" + +typedef vector signed char vector_s8_t; +typedef vector unsigned char vector_u8_t; +typedef vector signed short vector_s16_t; +typedef vector unsigned short vector_u16_t; +typedef vector signed int vector_s32_t; +typedef vector unsigned int vector_u32_t; + +#ifndef COFFEE_BREAK /* Workarounds for gcc suckage */ + +static inline vector_u8_t my_vec_ld (int const A, const uint8_t * const B) +{ + return vec_ld (A, (uint8_t *)B); +} +#undef vec_ld +#define vec_ld my_vec_ld + +static inline vector_u8_t my_vec_and (vector_u8_t const A, vector_u8_t const B) +{ + return vec_and (A, B); +} +#undef vec_and +#define vec_and my_vec_and + +static inline vector_u8_t my_vec_avg (vector_u8_t const A, vector_u8_t const B) +{ + return vec_avg (A, B); +} +#undef vec_avg +#define vec_avg my_vec_avg + +#endif + +static void MC_put_o_16_altivec (uint8_t * dest, const uint8_t * ref, + const int stride, int height) +{ + vector_u8_t perm, ref0, ref1, tmp; + + perm = vec_lvsl (0, ref); + + height = (height >> 1) - 1; + + ref0 = vec_ld (0, ref); + ref1 = vec_ld (15, ref); + ref += stride; + tmp = vec_perm (ref0, ref1, perm); + + do { + ref0 = vec_ld (0, ref); + ref1 = vec_ld (15, ref); + ref += stride; + vec_st (tmp, 0, dest); + tmp = vec_perm (ref0, ref1, perm); + + ref0 = vec_ld (0, ref); + ref1 = vec_ld (15, ref); + ref += stride; + vec_st (tmp, stride, dest); + dest += 2*stride; + tmp = vec_perm (ref0, ref1, perm); + } while (--height); + + ref0 = vec_ld (0, ref); + ref1 = vec_ld (15, ref); + vec_st (tmp, 0, dest); + tmp = vec_perm (ref0, ref1, perm); + vec_st (tmp, stride, dest); +} + +static void MC_put_o_8_altivec (uint8_t * dest, const uint8_t * ref, + const int stride, int height) +{ + vector_u8_t perm0, perm1, tmp0, tmp1, ref0, ref1; + + tmp0 = vec_lvsl (0, ref); + tmp0 = vec_mergeh (tmp0, tmp0); + perm0 = vec_pack ((vector_u16_t)tmp0, (vector_u16_t)tmp0); + tmp1 = vec_lvsl (stride, ref); + tmp1 = vec_mergeh (tmp1, tmp1); + perm1 = vec_pack ((vector_u16_t)tmp1, (vector_u16_t)tmp1); + + height = (height >> 1) - 1; + + ref0 = vec_ld (0, ref); + ref1 = vec_ld (7, ref); + ref += stride; + tmp0 = vec_perm (ref0, ref1, perm0); + + do { + ref0 = vec_ld (0, ref); + ref1 = vec_ld (7, ref); + ref += stride; + vec_ste ((vector_u32_t)tmp0, 0, (unsigned int *)dest); + vec_ste ((vector_u32_t)tmp0, 4, (unsigned int *)dest); + dest += stride; + tmp1 = vec_perm (ref0, ref1, perm1); + + ref0 = vec_ld (0, ref); + ref1 = vec_ld (7, ref); + ref += stride; + vec_ste ((vector_u32_t)tmp1, 0, (unsigned int *)dest); + vec_ste ((vector_u32_t)tmp1, 4, (unsigned int *)dest); + dest += stride; + tmp0 = vec_perm (ref0, ref1, perm0); + } while (--height); + + ref0 = vec_ld (0, ref); + ref1 = vec_ld (7, ref); + vec_ste ((vector_u32_t)tmp0, 0, (unsigned int *)dest); + vec_ste ((vector_u32_t)tmp0, 4, (unsigned int *)dest); + dest += stride; + tmp1 = vec_perm (ref0, ref1, perm1); + vec_ste ((vector_u32_t)tmp1, 0, (unsigned int *)dest); + vec_ste ((vector_u32_t)tmp1, 4, (unsigned int *)dest); +} + +static void MC_put_x_16_altivec (uint8_t * dest, const uint8_t * ref, + const int stride, int height) +{ + vector_u8_t permA, permB, ref0, ref1, tmp; + + permA = vec_lvsl (0, ref); + permB = vec_add (permA, vec_splat_u8 (1)); + + height = (height >> 1) - 1; + + ref0 = vec_ld (0, ref); + ref1 = vec_ld (16, ref); + ref += stride; + tmp = vec_avg (vec_perm (ref0, ref1, permA), + vec_perm (ref0, ref1, permB)); + + do { + ref0 = vec_ld (0, ref); + ref1 = vec_ld (16, ref); + ref += stride; + vec_st (tmp, 0, dest); + tmp = vec_avg (vec_perm (ref0, ref1, permA), + vec_perm (ref0, ref1, permB)); + + ref0 = vec_ld (0, ref); + ref1 = vec_ld (16, ref); + ref += stride; + vec_st (tmp, stride, dest); + dest += 2*stride; + tmp = vec_avg (vec_perm (ref0, ref1, permA), + vec_perm (ref0, ref1, permB)); + } while (--height); + + ref0 = vec_ld (0, ref); + ref1 = vec_ld (16, ref); + vec_st (tmp, 0, dest); + tmp = vec_avg (vec_perm (ref0, ref1, permA), + vec_perm (ref0, ref1, permB)); + vec_st (tmp, stride, dest); +} + +static void MC_put_x_8_altivec (uint8_t * dest, const uint8_t * ref, + const int stride, int height) +{ + vector_u8_t perm0A, perm0B, perm1A, perm1B, ones, tmp0, tmp1, ref0, ref1; + + ones = vec_splat_u8 (1); + tmp0 = vec_lvsl (0, ref); + tmp0 = vec_mergeh (tmp0, tmp0); + perm0A = vec_pack ((vector_u16_t)tmp0, (vector_u16_t)tmp0); + perm0B = vec_add (perm0A, ones); + tmp1 = vec_lvsl (stride, ref); + tmp1 = vec_mergeh (tmp1, tmp1); + perm1A = vec_pack ((vector_u16_t)tmp1, (vector_u16_t)tmp1); + perm1B = vec_add (perm1A, ones); + + height = (height >> 1) - 1; + + ref0 = vec_ld (0, ref); + ref1 = vec_ld (8, ref); + ref += stride; + tmp0 = vec_avg (vec_perm (ref0, ref1, perm0A), + vec_perm (ref0, ref1, perm0B)); + + do { + ref0 = vec_ld (0, ref); + ref1 = vec_ld (8, ref); + ref += stride; + vec_ste ((vector_u32_t)tmp0, 0, (unsigned int *)dest); + vec_ste ((vector_u32_t)tmp0, 4, (unsigned int *)dest); + dest += stride; + tmp1 = vec_avg (vec_perm (ref0, ref1, perm1A), + vec_perm (ref0, ref1, perm1B)); + + ref0 = vec_ld (0, ref); + ref1 = vec_ld (8, ref); + ref += stride; + vec_ste ((vector_u32_t)tmp1, 0, (unsigned int *)dest); + vec_ste ((vector_u32_t)tmp1, 4, (unsigned int *)dest); + dest += stride; + tmp0 = vec_avg (vec_perm (ref0, ref1, perm0A), + vec_perm (ref0, ref1, perm0B)); + } while (--height); + + ref0 = vec_ld (0, ref); + ref1 = vec_ld (8, ref); + vec_ste ((vector_u32_t)tmp0, 0, (unsigned int *)dest); + vec_ste ((vector_u32_t)tmp0, 4, (unsigned int *)dest); + dest += stride; + tmp1 = vec_avg (vec_perm (ref0, ref1, perm1A), + vec_perm (ref0, ref1, perm1B)); + vec_ste ((vector_u32_t)tmp1, 0, (unsigned int *)dest); + vec_ste ((vector_u32_t)tmp1, 4, (unsigned int *)dest); +} + +static void MC_put_y_16_altivec (uint8_t * dest, const uint8_t * ref, + const int stride, int height) +{ + vector_u8_t perm, ref0, ref1, tmp0, tmp1, tmp; + + perm = vec_lvsl (0, ref); + + height = (height >> 1) - 1; + + ref0 = vec_ld (0, ref); + ref1 = vec_ld (15, ref); + ref += stride; + tmp0 = vec_perm (ref0, ref1, perm); + ref0 = vec_ld (0, ref); + ref1 = vec_ld (15, ref); + ref += stride; + tmp1 = vec_perm (ref0, ref1, perm); + tmp = vec_avg (tmp0, tmp1); + + do { + ref0 = vec_ld (0, ref); + ref1 = vec_ld (15, ref); + ref += stride; + vec_st (tmp, 0, dest); + tmp0 = vec_perm (ref0, ref1, perm); + tmp = vec_avg (tmp0, tmp1); + + ref0 = vec_ld (0, ref); + ref1 = vec_ld (15, ref); + ref += stride; + vec_st (tmp, stride, dest); + dest += 2*stride; + tmp1 = vec_perm (ref0, ref1, perm); + tmp = vec_avg (tmp0, tmp1); + } while (--height); + + ref0 = vec_ld (0, ref); + ref1 = vec_ld (15, ref); + vec_st (tmp, 0, dest); + tmp0 = vec_perm (ref0, ref1, perm); + tmp = vec_avg (tmp0, tmp1); + vec_st (tmp, stride, dest); +} + +static void MC_put_y_8_altivec (uint8_t * dest, const uint8_t * ref, + const int stride, int height) +{ + vector_u8_t perm0, perm1, tmp0, tmp1, tmp, ref0, ref1; + + tmp0 = vec_lvsl (0, ref); + tmp0 = vec_mergeh (tmp0, tmp0); + perm0 = vec_pack ((vector_u16_t)tmp0, (vector_u16_t)tmp0); + tmp1 = vec_lvsl (stride, ref); + tmp1 = vec_mergeh (tmp1, tmp1); + perm1 = vec_pack ((vector_u16_t)tmp1, (vector_u16_t)tmp1); + + height = (height >> 1) - 1; + + ref0 = vec_ld (0, ref); + ref1 = vec_ld (7, ref); + ref += stride; + tmp0 = vec_perm (ref0, ref1, perm0); + ref0 = vec_ld (0, ref); + ref1 = vec_ld (7, ref); + ref += stride; + tmp1 = vec_perm (ref0, ref1, perm1); + tmp = vec_avg (tmp0, tmp1); + + do { + ref0 = vec_ld (0, ref); + ref1 = vec_ld (7, ref); + ref += stride; + vec_ste ((vector_u32_t)tmp, 0, (unsigned int *)dest); + vec_ste ((vector_u32_t)tmp, 4, (unsigned int *)dest); + dest += stride; + tmp0 = vec_perm (ref0, ref1, perm0); + tmp = vec_avg (tmp0, tmp1); + + ref0 = vec_ld (0, ref); + ref1 = vec_ld (7, ref); + ref += stride; + vec_ste ((vector_u32_t)tmp, 0, (unsigned int *)dest); + vec_ste ((vector_u32_t)tmp, 4, (unsigned int *)dest); + dest += stride; + tmp1 = vec_perm (ref0, ref1, perm1); + tmp = vec_avg (tmp0, tmp1); + } while (--height); + + ref0 = vec_ld (0, ref); + ref1 = vec_ld (7, ref); + vec_ste ((vector_u32_t)tmp, 0, (unsigned int *)dest); + vec_ste ((vector_u32_t)tmp, 4, (unsigned int *)dest); + dest += stride; + tmp0 = vec_perm (ref0, ref1, perm0); + tmp = vec_avg (tmp0, tmp1); + vec_ste ((vector_u32_t)tmp, 0, (unsigned int *)dest); + vec_ste ((vector_u32_t)tmp, 4, (unsigned int *)dest); +} + +static void MC_put_xy_16_altivec (uint8_t * dest, const uint8_t * ref, + const int stride, int height) +{ + vector_u8_t permA, permB, ref0, ref1, A, B, avg0, avg1, xor0, xor1, tmp; + vector_u8_t ones; + + ones = vec_splat_u8 (1); + permA = vec_lvsl (0, ref); + permB = vec_add (permA, ones); + + height = (height >> 1) - 1; + + ref0 = vec_ld (0, ref); + ref1 = vec_ld (16, ref); + ref += stride; + A = vec_perm (ref0, ref1, permA); + B = vec_perm (ref0, ref1, permB); + avg0 = vec_avg (A, B); + xor0 = vec_xor (A, B); + + ref0 = vec_ld (0, ref); + ref1 = vec_ld (16, ref); + ref += stride; + A = vec_perm (ref0, ref1, permA); + B = vec_perm (ref0, ref1, permB); + avg1 = vec_avg (A, B); + xor1 = vec_xor (A, B); + tmp = vec_sub (vec_avg (avg0, avg1), + vec_and (vec_and (ones, vec_or (xor0, xor1)), + vec_xor (avg0, avg1))); + + do { + ref0 = vec_ld (0, ref); + ref1 = vec_ld (16, ref); + ref += stride; + vec_st (tmp, 0, dest); + A = vec_perm (ref0, ref1, permA); + B = vec_perm (ref0, ref1, permB); + avg0 = vec_avg (A, B); + xor0 = vec_xor (A, B); + tmp = vec_sub (vec_avg (avg0, avg1), + vec_and (vec_and (ones, vec_or (xor0, xor1)), + vec_xor (avg0, avg1))); + + ref0 = vec_ld (0, ref); + ref1 = vec_ld (16, ref); + ref += stride; + vec_st (tmp, stride, dest); + dest += 2*stride; + A = vec_perm (ref0, ref1, permA); + B = vec_perm (ref0, ref1, permB); + avg1 = vec_avg (A, B); + xor1 = vec_xor (A, B); + tmp = vec_sub (vec_avg (avg0, avg1), + vec_and (vec_and (ones, vec_or (xor0, xor1)), + vec_xor (avg0, avg1))); + } while (--height); + + ref0 = vec_ld (0, ref); + ref1 = vec_ld (16, ref); + vec_st (tmp, 0, dest); + A = vec_perm (ref0, ref1, permA); + B = vec_perm (ref0, ref1, permB); + avg0 = vec_avg (A, B); + xor0 = vec_xor (A, B); + tmp = vec_sub (vec_avg (avg0, avg1), + vec_and (vec_and (ones, vec_or (xor0, xor1)), + vec_xor (avg0, avg1))); + vec_st (tmp, stride, dest); +} + +static void MC_put_xy_8_altivec (uint8_t * dest, const uint8_t * ref, + const int stride, int height) +{ + vector_u8_t perm0A, perm0B, perm1A, perm1B, ref0, ref1, A, B; + vector_u8_t avg0, avg1, xor0, xor1, tmp, ones; + + ones = vec_splat_u8 (1); + perm0A = vec_lvsl (0, ref); + perm0A = vec_mergeh (perm0A, perm0A); + perm0A = vec_pack ((vector_u16_t)perm0A, (vector_u16_t)perm0A); + perm0B = vec_add (perm0A, ones); + perm1A = vec_lvsl (stride, ref); + perm1A = vec_mergeh (perm1A, perm1A); + perm1A = vec_pack ((vector_u16_t)perm1A, (vector_u16_t)perm1A); + perm1B = vec_add (perm1A, ones); + + height = (height >> 1) - 1; + + ref0 = vec_ld (0, ref); + ref1 = vec_ld (8, ref); + ref += stride; + A = vec_perm (ref0, ref1, perm0A); + B = vec_perm (ref0, ref1, perm0B); + avg0 = vec_avg (A, B); + xor0 = vec_xor (A, B); + + ref0 = vec_ld (0, ref); + ref1 = vec_ld (8, ref); + ref += stride; + A = vec_perm (ref0, ref1, perm1A); + B = vec_perm (ref0, ref1, perm1B); + avg1 = vec_avg (A, B); + xor1 = vec_xor (A, B); + tmp = vec_sub (vec_avg (avg0, avg1), + vec_and (vec_and (ones, vec_or (xor0, xor1)), + vec_xor (avg0, avg1))); + + do { + ref0 = vec_ld (0, ref); + ref1 = vec_ld (8, ref); + ref += stride; + vec_ste ((vector_u32_t)tmp, 0, (unsigned int *)dest); + vec_ste ((vector_u32_t)tmp, 4, (unsigned int *)dest); + dest += stride; + A = vec_perm (ref0, ref1, perm0A); + B = vec_perm (ref0, ref1, perm0B); + avg0 = vec_avg (A, B); + xor0 = vec_xor (A, B); + tmp = vec_sub (vec_avg (avg0, avg1), + vec_and (vec_and (ones, vec_or (xor0, xor1)), + vec_xor (avg0, avg1))); + + ref0 = vec_ld (0, ref); + ref1 = vec_ld (8, ref); + ref += stride; + vec_ste ((vector_u32_t)tmp, 0, (unsigned int *)dest); + vec_ste ((vector_u32_t)tmp, 4, (unsigned int *)dest); + dest += stride; + A = vec_perm (ref0, ref1, perm1A); + B = vec_perm (ref0, ref1, perm1B); + avg1 = vec_avg (A, B); + xor1 = vec_xor (A, B); + tmp = vec_sub (vec_avg (avg0, avg1), + vec_and (vec_and (ones, vec_or (xor0, xor1)), + vec_xor (avg0, avg1))); + } while (--height); + + ref0 = vec_ld (0, ref); + ref1 = vec_ld (8, ref); + vec_ste ((vector_u32_t)tmp, 0, (unsigned int *)dest); + vec_ste ((vector_u32_t)tmp, 4, (unsigned int *)dest); + dest += stride; + A = vec_perm (ref0, ref1, perm0A); + B = vec_perm (ref0, ref1, perm0B); + avg0 = vec_avg (A, B); + xor0 = vec_xor (A, B); + tmp = vec_sub (vec_avg (avg0, avg1), + vec_and (vec_and (ones, vec_or (xor0, xor1)), + vec_xor (avg0, avg1))); + vec_ste ((vector_u32_t)tmp, 0, (unsigned int *)dest); + vec_ste ((vector_u32_t)tmp, 4, (unsigned int *)dest); +} + +#if 0 +static void MC_put_xy_8_altivec (uint8_t * dest, const uint8_t * ref, + const int stride, int height) +{ + vector_u8_t permA, permB, ref0, ref1, A, B, C, D, tmp, zero, ones; + vector_u16_t splat2, temp; + + ones = vec_splat_u8 (1); + permA = vec_lvsl (0, ref); + permB = vec_add (permA, ones); + + zero = vec_splat_u8 (0); + splat2 = vec_splat_u16 (2); + + do { + ref0 = vec_ld (0, ref); + ref1 = vec_ld (8, ref); + ref += stride; + A = vec_perm (ref0, ref1, permA); + B = vec_perm (ref0, ref1, permB); + ref0 = vec_ld (0, ref); + ref1 = vec_ld (8, ref); + C = vec_perm (ref0, ref1, permA); + D = vec_perm (ref0, ref1, permB); + + temp = vec_add (vec_add ((vector_u16_t)vec_mergeh (zero, A), + (vector_u16_t)vec_mergeh (zero, B)), + vec_add ((vector_u16_t)vec_mergeh (zero, C), + (vector_u16_t)vec_mergeh (zero, D))); + temp = vec_sr (vec_add (temp, splat2), splat2); + tmp = vec_pack (temp, temp); + + vec_st (tmp, 0, dest); + dest += stride; + tmp = vec_avg (vec_perm (ref0, ref1, permA), + vec_perm (ref0, ref1, permB)); + } while (--height); +} +#endif + +static void MC_avg_o_16_altivec (uint8_t * dest, const uint8_t * ref, + const int stride, int height) +{ + vector_u8_t perm, ref0, ref1, tmp, prev; + + perm = vec_lvsl (0, ref); + + height = (height >> 1) - 1; + + ref0 = vec_ld (0, ref); + ref1 = vec_ld (15, ref); + ref += stride; + prev = vec_ld (0, dest); + tmp = vec_avg (prev, vec_perm (ref0, ref1, perm)); + + do { + ref0 = vec_ld (0, ref); + ref1 = vec_ld (15, ref); + ref += stride; + prev = vec_ld (stride, dest); + vec_st (tmp, 0, dest); + tmp = vec_avg (prev, vec_perm (ref0, ref1, perm)); + + ref0 = vec_ld (0, ref); + ref1 = vec_ld (15, ref); + ref += stride; + prev = vec_ld (2*stride, dest); + vec_st (tmp, stride, dest); + dest += 2*stride; + tmp = vec_avg (prev, vec_perm (ref0, ref1, perm)); + } while (--height); + + ref0 = vec_ld (0, ref); + ref1 = vec_ld (15, ref); + prev = vec_ld (stride, dest); + vec_st (tmp, 0, dest); + tmp = vec_avg (prev, vec_perm (ref0, ref1, perm)); + vec_st (tmp, stride, dest); +} + +static void MC_avg_o_8_altivec (uint8_t * dest, const uint8_t * ref, + const int stride, int height) +{ + vector_u8_t perm0, perm1, tmp0, tmp1, ref0, ref1, prev; + + tmp0 = vec_lvsl (0, ref); + tmp0 = vec_mergeh (tmp0, tmp0); + perm0 = vec_pack ((vector_u16_t)tmp0, (vector_u16_t)tmp0); + tmp1 = vec_lvsl (stride, ref); + tmp1 = vec_mergeh (tmp1, tmp1); + perm1 = vec_pack ((vector_u16_t)tmp1, (vector_u16_t)tmp1); + + height = (height >> 1) - 1; + + ref0 = vec_ld (0, ref); + ref1 = vec_ld (7, ref); + ref += stride; + prev = vec_ld (0, dest); + tmp0 = vec_avg (prev, vec_perm (ref0, ref1, perm0)); + + do { + ref0 = vec_ld (0, ref); + ref1 = vec_ld (7, ref); + ref += stride; + prev = vec_ld (stride, dest); + vec_ste ((vector_u32_t)tmp0, 0, (unsigned int *)dest); + vec_ste ((vector_u32_t)tmp0, 4, (unsigned int *)dest); + dest += stride; + tmp1 = vec_avg (prev, vec_perm (ref0, ref1, perm1)); + + ref0 = vec_ld (0, ref); + ref1 = vec_ld (7, ref); + ref += stride; + prev = vec_ld (stride, dest); + vec_ste ((vector_u32_t)tmp1, 0, (unsigned int *)dest); + vec_ste ((vector_u32_t)tmp1, 4, (unsigned int *)dest); + dest += stride; + tmp0 = vec_avg (prev, vec_perm (ref0, ref1, perm0)); + } while (--height); + + ref0 = vec_ld (0, ref); + ref1 = vec_ld (7, ref); + prev = vec_ld (stride, dest); + vec_ste ((vector_u32_t)tmp0, 0, (unsigned int *)dest); + vec_ste ((vector_u32_t)tmp0, 4, (unsigned int *)dest); + dest += stride; + tmp1 = vec_avg (prev, vec_perm (ref0, ref1, perm1)); + vec_ste ((vector_u32_t)tmp1, 0, (unsigned int *)dest); + vec_ste ((vector_u32_t)tmp1, 4, (unsigned int *)dest); +} + +static void MC_avg_x_16_altivec (uint8_t * dest, const uint8_t * ref, + const int stride, int height) +{ + vector_u8_t permA, permB, ref0, ref1, tmp, prev; + + permA = vec_lvsl (0, ref); + permB = vec_add (permA, vec_splat_u8 (1)); + + height = (height >> 1) - 1; + + ref0 = vec_ld (0, ref); + ref1 = vec_ld (16, ref); + prev = vec_ld (0, dest); + ref += stride; + tmp = vec_avg (prev, vec_avg (vec_perm (ref0, ref1, permA), + vec_perm (ref0, ref1, permB))); + + do { + ref0 = vec_ld (0, ref); + ref1 = vec_ld (16, ref); + ref += stride; + prev = vec_ld (stride, dest); + vec_st (tmp, 0, dest); + tmp = vec_avg (prev, vec_avg (vec_perm (ref0, ref1, permA), + vec_perm (ref0, ref1, permB))); + + ref0 = vec_ld (0, ref); + ref1 = vec_ld (16, ref); + ref += stride; + prev = vec_ld (2*stride, dest); + vec_st (tmp, stride, dest); + dest += 2*stride; + tmp = vec_avg (prev, vec_avg (vec_perm (ref0, ref1, permA), + vec_perm (ref0, ref1, permB))); + } while (--height); + + ref0 = vec_ld (0, ref); + ref1 = vec_ld (16, ref); + prev = vec_ld (stride, dest); + vec_st (tmp, 0, dest); + tmp = vec_avg (prev, vec_avg (vec_perm (ref0, ref1, permA), + vec_perm (ref0, ref1, permB))); + vec_st (tmp, stride, dest); +} + +static void MC_avg_x_8_altivec (uint8_t * dest, const uint8_t * ref, + const int stride, int height) +{ + vector_u8_t perm0A, perm0B, perm1A, perm1B, ones, tmp0, tmp1, ref0, ref1; + vector_u8_t prev; + + ones = vec_splat_u8 (1); + tmp0 = vec_lvsl (0, ref); + tmp0 = vec_mergeh (tmp0, tmp0); + perm0A = vec_pack ((vector_u16_t)tmp0, (vector_u16_t)tmp0); + perm0B = vec_add (perm0A, ones); + tmp1 = vec_lvsl (stride, ref); + tmp1 = vec_mergeh (tmp1, tmp1); + perm1A = vec_pack ((vector_u16_t)tmp1, (vector_u16_t)tmp1); + perm1B = vec_add (perm1A, ones); + + height = (height >> 1) - 1; + + ref0 = vec_ld (0, ref); + ref1 = vec_ld (8, ref); + prev = vec_ld (0, dest); + ref += stride; + tmp0 = vec_avg (prev, vec_avg (vec_perm (ref0, ref1, perm0A), + vec_perm (ref0, ref1, perm0B))); + + do { + ref0 = vec_ld (0, ref); + ref1 = vec_ld (8, ref); + ref += stride; + prev = vec_ld (stride, dest); + vec_ste ((vector_u32_t)tmp0, 0, (unsigned int *)dest); + vec_ste ((vector_u32_t)tmp0, 4, (unsigned int *)dest); + dest += stride; + tmp1 = vec_avg (prev, vec_avg (vec_perm (ref0, ref1, perm1A), + vec_perm (ref0, ref1, perm1B))); + + ref0 = vec_ld (0, ref); + ref1 = vec_ld (8, ref); + ref += stride; + prev = vec_ld (stride, dest); + vec_ste ((vector_u32_t)tmp1, 0, (unsigned int *)dest); + vec_ste ((vector_u32_t)tmp1, 4, (unsigned int *)dest); + dest += stride; + tmp0 = vec_avg (prev, vec_avg (vec_perm (ref0, ref1, perm0A), + vec_perm (ref0, ref1, perm0B))); + } while (--height); + + ref0 = vec_ld (0, ref); + ref1 = vec_ld (8, ref); + prev = vec_ld (stride, dest); + vec_ste ((vector_u32_t)tmp0, 0, (unsigned int *)dest); + vec_ste ((vector_u32_t)tmp0, 4, (unsigned int *)dest); + dest += stride; + tmp1 = vec_avg (prev, vec_avg (vec_perm (ref0, ref1, perm1A), + vec_perm (ref0, ref1, perm1B))); + vec_ste ((vector_u32_t)tmp1, 0, (unsigned int *)dest); + vec_ste ((vector_u32_t)tmp1, 4, (unsigned int *)dest); +} + +static void MC_avg_y_16_altivec (uint8_t * dest, const uint8_t * ref, + const int stride, int height) +{ + vector_u8_t perm, ref0, ref1, tmp0, tmp1, tmp, prev; + + perm = vec_lvsl (0, ref); + + height = (height >> 1) - 1; + + ref0 = vec_ld (0, ref); + ref1 = vec_ld (15, ref); + ref += stride; + tmp0 = vec_perm (ref0, ref1, perm); + ref0 = vec_ld (0, ref); + ref1 = vec_ld (15, ref); + ref += stride; + prev = vec_ld (0, dest); + tmp1 = vec_perm (ref0, ref1, perm); + tmp = vec_avg (prev, vec_avg (tmp0, tmp1)); + + do { + ref0 = vec_ld (0, ref); + ref1 = vec_ld (15, ref); + ref += stride; + prev = vec_ld (stride, dest); + vec_st (tmp, 0, dest); + tmp0 = vec_perm (ref0, ref1, perm); + tmp = vec_avg (prev, vec_avg (tmp0, tmp1)); + + ref0 = vec_ld (0, ref); + ref1 = vec_ld (15, ref); + ref += stride; + prev = vec_ld (2*stride, dest); + vec_st (tmp, stride, dest); + dest += 2*stride; + tmp1 = vec_perm (ref0, ref1, perm); + tmp = vec_avg (prev, vec_avg (tmp0, tmp1)); + } while (--height); + + ref0 = vec_ld (0, ref); + ref1 = vec_ld (15, ref); + prev = vec_ld (stride, dest); + vec_st (tmp, 0, dest); + tmp0 = vec_perm (ref0, ref1, perm); + tmp = vec_avg (prev, vec_avg (tmp0, tmp1)); + vec_st (tmp, stride, dest); +} + +static void MC_avg_y_8_altivec (uint8_t * dest, const uint8_t * ref, + const int stride, int height) +{ + vector_u8_t perm0, perm1, tmp0, tmp1, tmp, ref0, ref1, prev; + + tmp0 = vec_lvsl (0, ref); + tmp0 = vec_mergeh (tmp0, tmp0); + perm0 = vec_pack ((vector_u16_t)tmp0, (vector_u16_t)tmp0); + tmp1 = vec_lvsl (stride, ref); + tmp1 = vec_mergeh (tmp1, tmp1); + perm1 = vec_pack ((vector_u16_t)tmp1, (vector_u16_t)tmp1); + + height = (height >> 1) - 1; + + ref0 = vec_ld (0, ref); + ref1 = vec_ld (7, ref); + ref += stride; + tmp0 = vec_perm (ref0, ref1, perm0); + ref0 = vec_ld (0, ref); + ref1 = vec_ld (7, ref); + ref += stride; + prev = vec_ld (0, dest); + tmp1 = vec_perm (ref0, ref1, perm1); + tmp = vec_avg (prev, vec_avg (tmp0, tmp1)); + + do { + ref0 = vec_ld (0, ref); + ref1 = vec_ld (7, ref); + ref += stride; + prev = vec_ld (stride, dest); + vec_ste ((vector_u32_t)tmp, 0, (unsigned int *)dest); + vec_ste ((vector_u32_t)tmp, 4, (unsigned int *)dest); + dest += stride; + tmp0 = vec_perm (ref0, ref1, perm0); + tmp = vec_avg (prev, vec_avg (tmp0, tmp1)); + + ref0 = vec_ld (0, ref); + ref1 = vec_ld (7, ref); + ref += stride; + prev = vec_ld (stride, dest); + vec_ste ((vector_u32_t)tmp, 0, (unsigned int *)dest); + vec_ste ((vector_u32_t)tmp, 4, (unsigned int *)dest); + dest += stride; + tmp1 = vec_perm (ref0, ref1, perm1); + tmp = vec_avg (prev, vec_avg (tmp0, tmp1)); + } while (--height); + + ref0 = vec_ld (0, ref); + ref1 = vec_ld (7, ref); + prev = vec_ld (stride, dest); + vec_ste ((vector_u32_t)tmp, 0, (unsigned int *)dest); + vec_ste ((vector_u32_t)tmp, 4, (unsigned int *)dest); + dest += stride; + tmp0 = vec_perm (ref0, ref1, perm0); + tmp = vec_avg (prev, vec_avg (tmp0, tmp1)); + vec_ste ((vector_u32_t)tmp, 0, (unsigned int *)dest); + vec_ste ((vector_u32_t)tmp, 4, (unsigned int *)dest); +} + +static void MC_avg_xy_16_altivec (uint8_t * dest, const uint8_t * ref, + const int stride, int height) +{ + vector_u8_t permA, permB, ref0, ref1, A, B, avg0, avg1, xor0, xor1, tmp; + vector_u8_t ones, prev; + + ones = vec_splat_u8 (1); + permA = vec_lvsl (0, ref); + permB = vec_add (permA, ones); + + height = (height >> 1) - 1; + + ref0 = vec_ld (0, ref); + ref1 = vec_ld (16, ref); + ref += stride; + A = vec_perm (ref0, ref1, permA); + B = vec_perm (ref0, ref1, permB); + avg0 = vec_avg (A, B); + xor0 = vec_xor (A, B); + + ref0 = vec_ld (0, ref); + ref1 = vec_ld (16, ref); + ref += stride; + prev = vec_ld (0, dest); + A = vec_perm (ref0, ref1, permA); + B = vec_perm (ref0, ref1, permB); + avg1 = vec_avg (A, B); + xor1 = vec_xor (A, B); + tmp = vec_avg (prev, vec_sub (vec_avg (avg0, avg1), + vec_and (vec_and (ones, vec_or (xor0, xor1)), + vec_xor (avg0, avg1)))); + + do { + ref0 = vec_ld (0, ref); + ref1 = vec_ld (16, ref); + ref += stride; + prev = vec_ld (stride, dest); + vec_st (tmp, 0, dest); + A = vec_perm (ref0, ref1, permA); + B = vec_perm (ref0, ref1, permB); + avg0 = vec_avg (A, B); + xor0 = vec_xor (A, B); + tmp = vec_avg (prev, + vec_sub (vec_avg (avg0, avg1), + vec_and (vec_and (ones, vec_or (xor0, xor1)), + vec_xor (avg0, avg1)))); + + ref0 = vec_ld (0, ref); + ref1 = vec_ld (16, ref); + ref += stride; + prev = vec_ld (2*stride, dest); + vec_st (tmp, stride, dest); + dest += 2*stride; + A = vec_perm (ref0, ref1, permA); + B = vec_perm (ref0, ref1, permB); + avg1 = vec_avg (A, B); + xor1 = vec_xor (A, B); + tmp = vec_avg (prev, + vec_sub (vec_avg (avg0, avg1), + vec_and (vec_and (ones, vec_or (xor0, xor1)), + vec_xor (avg0, avg1)))); + } while (--height); + + ref0 = vec_ld (0, ref); + ref1 = vec_ld (16, ref); + prev = vec_ld (stride, dest); + vec_st (tmp, 0, dest); + A = vec_perm (ref0, ref1, permA); + B = vec_perm (ref0, ref1, permB); + avg0 = vec_avg (A, B); + xor0 = vec_xor (A, B); + tmp = vec_avg (prev, vec_sub (vec_avg (avg0, avg1), + vec_and (vec_and (ones, vec_or (xor0, xor1)), + vec_xor (avg0, avg1)))); + vec_st (tmp, stride, dest); +} + +static void MC_avg_xy_8_altivec (uint8_t * dest, const uint8_t * ref, + const int stride, int height) +{ + vector_u8_t perm0A, perm0B, perm1A, perm1B, ref0, ref1, A, B; + vector_u8_t avg0, avg1, xor0, xor1, tmp, ones, prev; + + ones = vec_splat_u8 (1); + perm0A = vec_lvsl (0, ref); + perm0A = vec_mergeh (perm0A, perm0A); + perm0A = vec_pack ((vector_u16_t)perm0A, (vector_u16_t)perm0A); + perm0B = vec_add (perm0A, ones); + perm1A = vec_lvsl (stride, ref); + perm1A = vec_mergeh (perm1A, perm1A); + perm1A = vec_pack ((vector_u16_t)perm1A, (vector_u16_t)perm1A); + perm1B = vec_add (perm1A, ones); + + height = (height >> 1) - 1; + + ref0 = vec_ld (0, ref); + ref1 = vec_ld (8, ref); + ref += stride; + A = vec_perm (ref0, ref1, perm0A); + B = vec_perm (ref0, ref1, perm0B); + avg0 = vec_avg (A, B); + xor0 = vec_xor (A, B); + + ref0 = vec_ld (0, ref); + ref1 = vec_ld (8, ref); + ref += stride; + prev = vec_ld (0, dest); + A = vec_perm (ref0, ref1, perm1A); + B = vec_perm (ref0, ref1, perm1B); + avg1 = vec_avg (A, B); + xor1 = vec_xor (A, B); + tmp = vec_avg (prev, vec_sub (vec_avg (avg0, avg1), + vec_and (vec_and (ones, vec_or (xor0, xor1)), + vec_xor (avg0, avg1)))); + + do { + ref0 = vec_ld (0, ref); + ref1 = vec_ld (8, ref); + ref += stride; + prev = vec_ld (stride, dest); + vec_ste ((vector_u32_t)tmp, 0, (unsigned int *)dest); + vec_ste ((vector_u32_t)tmp, 4, (unsigned int *)dest); + dest += stride; + A = vec_perm (ref0, ref1, perm0A); + B = vec_perm (ref0, ref1, perm0B); + avg0 = vec_avg (A, B); + xor0 = vec_xor (A, B); + tmp = vec_avg (prev, + vec_sub (vec_avg (avg0, avg1), + vec_and (vec_and (ones, vec_or (xor0, xor1)), + vec_xor (avg0, avg1)))); + + ref0 = vec_ld (0, ref); + ref1 = vec_ld (8, ref); + ref += stride; + prev = vec_ld (stride, dest); + vec_ste ((vector_u32_t)tmp, 0, (unsigned int *)dest); + vec_ste ((vector_u32_t)tmp, 4, (unsigned int *)dest); + dest += stride; + A = vec_perm (ref0, ref1, perm1A); + B = vec_perm (ref0, ref1, perm1B); + avg1 = vec_avg (A, B); + xor1 = vec_xor (A, B); + tmp = vec_avg (prev, + vec_sub (vec_avg (avg0, avg1), + vec_and (vec_and (ones, vec_or (xor0, xor1)), + vec_xor (avg0, avg1)))); + } while (--height); + + ref0 = vec_ld (0, ref); + ref1 = vec_ld (8, ref); + prev = vec_ld (stride, dest); + vec_ste ((vector_u32_t)tmp, 0, (unsigned int *)dest); + vec_ste ((vector_u32_t)tmp, 4, (unsigned int *)dest); + dest += stride; + A = vec_perm (ref0, ref1, perm0A); + B = vec_perm (ref0, ref1, perm0B); + avg0 = vec_avg (A, B); + xor0 = vec_xor (A, B); + tmp = vec_avg (prev, vec_sub (vec_avg (avg0, avg1), + vec_and (vec_and (ones, vec_or (xor0, xor1)), + vec_xor (avg0, avg1)))); + vec_ste ((vector_u32_t)tmp, 0, (unsigned int *)dest); + vec_ste ((vector_u32_t)tmp, 4, (unsigned int *)dest); +} + +MPEG2_MC_EXTERN (altivec) + +#endif diff --git a/src/video_dec/libmpeg2new/libmpeg2/motion_comp_mlib.c b/src/video_dec/libmpeg2new/libmpeg2/motion_comp_mlib.c new file mode 100644 index 000000000..71c085029 --- /dev/null +++ b/src/video_dec/libmpeg2new/libmpeg2/motion_comp_mlib.c @@ -0,0 +1,190 @@ +/* + * motion_comp_mlib.c + * Copyright (C) 2000-2003 HÃ¥kan Hjort <d95hjort@dtek.chalmers.se> + * + * This file is part of mpeg2dec, a free MPEG-2 video stream decoder. + * See http://libmpeg2.sourceforge.net/ for updates. + * + * mpeg2dec is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * mpeg2dec is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +#include "config.h" + +#ifdef LIBMPEG2_MLIB + +#include <mlib_types.h> +#include <mlib_status.h> +#include <mlib_sys.h> +#include <mlib_video.h> +#include <inttypes.h> + +#include "../include/mpeg2.h" +#include "mpeg2_internal.h" + +static void MC_put_o_16_mlib (uint8_t * dest, const uint8_t * ref, + int stride, int height) +{ + if (height == 16) + mlib_VideoCopyRef_U8_U8_16x16 (dest, (uint8_t *) ref, stride); + else + mlib_VideoCopyRef_U8_U8_16x8 (dest, (uint8_t *) ref, stride); +} + +static void MC_put_x_16_mlib (uint8_t * dest, const uint8_t * ref, + int stride, int height) +{ + if (height == 16) + mlib_VideoInterpX_U8_U8_16x16 (dest, (uint8_t *) ref, stride, stride); + else + mlib_VideoInterpX_U8_U8_16x8 (dest, (uint8_t *) ref, stride, stride); +} + +static void MC_put_y_16_mlib (uint8_t * dest, const uint8_t * ref, + int stride, int height) +{ + if (height == 16) + mlib_VideoInterpY_U8_U8_16x16 (dest, (uint8_t *) ref, stride, stride); + else + mlib_VideoInterpY_U8_U8_16x8 (dest, (uint8_t *) ref, stride, stride); +} + +static void MC_put_xy_16_mlib (uint8_t * dest, const uint8_t * ref, + int stride, int height) +{ + if (height == 16) + mlib_VideoInterpXY_U8_U8_16x16 (dest, (uint8_t *) ref, stride, stride); + else + mlib_VideoInterpXY_U8_U8_16x8 (dest, (uint8_t *) ref, stride, stride); +} + +static void MC_put_o_8_mlib (uint8_t * dest, const uint8_t * ref, + int stride, int height) +{ + if (height == 8) + mlib_VideoCopyRef_U8_U8_8x8 (dest, (uint8_t *) ref, stride); + else + mlib_VideoCopyRef_U8_U8_8x4 (dest, (uint8_t *) ref, stride); +} + +static void MC_put_x_8_mlib (uint8_t * dest, const uint8_t * ref, + int stride, int height) +{ + if (height == 8) + mlib_VideoInterpX_U8_U8_8x8 (dest, (uint8_t *) ref, stride, stride); + else + mlib_VideoInterpX_U8_U8_8x4 (dest, (uint8_t *) ref, stride, stride); +} + +static void MC_put_y_8_mlib (uint8_t * dest, const uint8_t * ref, + int stride, int height) +{ + if (height == 8) + mlib_VideoInterpY_U8_U8_8x8 (dest, (uint8_t *) ref, stride, stride); + else + mlib_VideoInterpY_U8_U8_8x4 (dest, (uint8_t *) ref, stride, stride); +} + +static void MC_put_xy_8_mlib (uint8_t * dest, const uint8_t * ref, + int stride, int height) +{ + if (height == 8) + mlib_VideoInterpXY_U8_U8_8x8 (dest, (uint8_t *) ref, stride, stride); + else + mlib_VideoInterpXY_U8_U8_8x4 (dest, (uint8_t *) ref, stride, stride); +} + +static void MC_avg_o_16_mlib (uint8_t * dest, const uint8_t * ref, + int stride, int height) +{ + if (height == 16) + mlib_VideoCopyRefAve_U8_U8_16x16 (dest, (uint8_t *) ref, stride); + else + mlib_VideoCopyRefAve_U8_U8_16x8 (dest, (uint8_t *) ref, stride); +} + +static void MC_avg_x_16_mlib (uint8_t * dest, const uint8_t * ref, + int stride, int height) +{ + if (height == 16) + mlib_VideoInterpAveX_U8_U8_16x16 (dest, (uint8_t *) ref, + stride, stride); + else + mlib_VideoInterpAveX_U8_U8_16x8 (dest, (uint8_t *) ref, + stride, stride); +} + +static void MC_avg_y_16_mlib (uint8_t * dest, const uint8_t * ref, + int stride, int height) +{ + if (height == 16) + mlib_VideoInterpAveY_U8_U8_16x16 (dest, (uint8_t *) ref, + stride, stride); + else + mlib_VideoInterpAveY_U8_U8_16x8 (dest, (uint8_t *) ref, + stride, stride); +} + +static void MC_avg_xy_16_mlib (uint8_t * dest, const uint8_t * ref, + int stride, int height) +{ + if (height == 16) + mlib_VideoInterpAveXY_U8_U8_16x16 (dest, (uint8_t *) ref, + stride, stride); + else + mlib_VideoInterpAveXY_U8_U8_16x8 (dest, (uint8_t *) ref, + stride, stride); +} + +static void MC_avg_o_8_mlib (uint8_t * dest, const uint8_t * ref, + int stride, int height) +{ + if (height == 8) + mlib_VideoCopyRefAve_U8_U8_8x8 (dest, (uint8_t *) ref, stride); + else + mlib_VideoCopyRefAve_U8_U8_8x4 (dest, (uint8_t *) ref, stride); +} + +static void MC_avg_x_8_mlib (uint8_t * dest, const uint8_t * ref, + int stride, int height) +{ + if (height == 8) + mlib_VideoInterpAveX_U8_U8_8x8 (dest, (uint8_t *) ref, stride, stride); + else + mlib_VideoInterpAveX_U8_U8_8x4 (dest, (uint8_t *) ref, stride, stride); +} + +static void MC_avg_y_8_mlib (uint8_t * dest, const uint8_t * ref, + int stride, int height) +{ + if (height == 8) + mlib_VideoInterpAveY_U8_U8_8x8 (dest, (uint8_t *) ref, stride, stride); + else + mlib_VideoInterpAveY_U8_U8_8x4 (dest, (uint8_t *) ref, stride, stride); +} + +static void MC_avg_xy_8_mlib (uint8_t * dest, const uint8_t * ref, + int stride, int height) +{ + if (height == 8) + mlib_VideoInterpAveXY_U8_U8_8x8 (dest, (uint8_t *) ref, + stride, stride); + else + mlib_VideoInterpAveXY_U8_U8_8x4 (dest, (uint8_t *) ref, + stride, stride); +} + +MPEG2_MC_EXTERN (mlib) + +#endif diff --git a/src/video_dec/libmpeg2new/libmpeg2/motion_comp_mmx.c b/src/video_dec/libmpeg2new/libmpeg2/motion_comp_mmx.c new file mode 100644 index 000000000..8694bdfea --- /dev/null +++ b/src/video_dec/libmpeg2new/libmpeg2/motion_comp_mmx.c @@ -0,0 +1,1005 @@ +/* + * motion_comp_mmx.c + * Copyright (C) 2000-2003 Michel Lespinasse <walken@zoy.org> + * Copyright (C) 1999-2000 Aaron Holtzman <aholtzma@ess.engr.uvic.ca> + * + * This file is part of mpeg2dec, a free MPEG-2 video stream decoder. + * See http://libmpeg2.sourceforge.net/ for updates. + * + * mpeg2dec is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * mpeg2dec is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +#include "config.h" + +#ifdef ARCH_X86 + +#include <inttypes.h> + +#include "../include/mpeg2.h" +#include "../include/attributes.h" +#include "mpeg2_internal.h" +#include "../include/mmx.h" + +#define CPU_MMXEXT 0 +#define CPU_3DNOW 1 + + +/* MMX code - needs a rewrite */ + +/* + * Motion Compensation frequently needs to average values using the + * formula (x+y+1)>>1. Both MMXEXT and 3Dnow include one instruction + * to compute this, but it's been left out of classic MMX. + * + * We need to be careful of overflows when doing this computation. + * Rather than unpacking data to 16-bits, which reduces parallelism, + * we use the following formulas: + * + * (x+y)>>1 == (x&y)+((x^y)>>1) + * (x+y+1)>>1 == (x|y)-((x^y)>>1) + */ + +/* some rounding constants */ +static mmx_t mask1 = {0xfefefefefefefefeLL}; +static mmx_t round4 = {0x0002000200020002LL}; + +/* + * This code should probably be compiled with loop unrolling + * (ie, -funroll-loops in gcc)becuase some of the loops + * use a small static number of iterations. This was written + * with the assumption the compiler knows best about when + * unrolling will help + */ + +static inline void mmx_zero_reg () +{ + /* load 0 into mm0 */ + pxor_r2r (mm0, mm0); +} + +static inline void mmx_average_2_U8 (uint8_t * dest, const uint8_t * src1, + const uint8_t * src2) +{ + /* *dest = (*src1 + *src2 + 1)/ 2; */ + + movq_m2r (*src1, mm1); /* load 8 src1 bytes */ + movq_r2r (mm1, mm2); /* copy 8 src1 bytes */ + + movq_m2r (*src2, mm3); /* load 8 src2 bytes */ + movq_r2r (mm3, mm4); /* copy 8 src2 bytes */ + + pxor_r2r (mm1, mm3); /* xor src1 and src2 */ + pand_m2r (mask1, mm3); /* mask lower bits */ + psrlq_i2r (1, mm3); /* /2 */ + por_r2r (mm2, mm4); /* or src1 and src2 */ + psubb_r2r (mm3, mm4); /* subtract subresults */ + movq_r2m (mm4, *dest); /* store result in dest */ +} + +static inline void mmx_interp_average_2_U8 (uint8_t * dest, + const uint8_t * src1, + const uint8_t * src2) +{ + /* *dest = (*dest + (*src1 + *src2 + 1)/ 2 + 1)/ 2; */ + + movq_m2r (*dest, mm1); /* load 8 dest bytes */ + movq_r2r (mm1, mm2); /* copy 8 dest bytes */ + + movq_m2r (*src1, mm3); /* load 8 src1 bytes */ + movq_r2r (mm3, mm4); /* copy 8 src1 bytes */ + + movq_m2r (*src2, mm5); /* load 8 src2 bytes */ + movq_r2r (mm5, mm6); /* copy 8 src2 bytes */ + + pxor_r2r (mm3, mm5); /* xor src1 and src2 */ + pand_m2r (mask1, mm5); /* mask lower bits */ + psrlq_i2r (1, mm5); /* /2 */ + por_r2r (mm4, mm6); /* or src1 and src2 */ + psubb_r2r (mm5, mm6); /* subtract subresults */ + movq_r2r (mm6, mm5); /* copy subresult */ + + pxor_r2r (mm1, mm5); /* xor srcavg and dest */ + pand_m2r (mask1, mm5); /* mask lower bits */ + psrlq_i2r (1, mm5); /* /2 */ + por_r2r (mm2, mm6); /* or srcavg and dest */ + psubb_r2r (mm5, mm6); /* subtract subresults */ + movq_r2m (mm6, *dest); /* store result in dest */ +} + +static inline void mmx_average_4_U8 (uint8_t * dest, const uint8_t * src1, + const uint8_t * src2, + const uint8_t * src3, + const uint8_t * src4) +{ + /* *dest = (*src1 + *src2 + *src3 + *src4 + 2)/ 4; */ + + movq_m2r (*src1, mm1); /* load 8 src1 bytes */ + movq_r2r (mm1, mm2); /* copy 8 src1 bytes */ + + punpcklbw_r2r (mm0, mm1); /* unpack low src1 bytes */ + punpckhbw_r2r (mm0, mm2); /* unpack high src1 bytes */ + + movq_m2r (*src2, mm3); /* load 8 src2 bytes */ + movq_r2r (mm3, mm4); /* copy 8 src2 bytes */ + + punpcklbw_r2r (mm0, mm3); /* unpack low src2 bytes */ + punpckhbw_r2r (mm0, mm4); /* unpack high src2 bytes */ + + paddw_r2r (mm3, mm1); /* add lows */ + paddw_r2r (mm4, mm2); /* add highs */ + + /* now have partials in mm1 and mm2 */ + + movq_m2r (*src3, mm3); /* load 8 src3 bytes */ + movq_r2r (mm3, mm4); /* copy 8 src3 bytes */ + + punpcklbw_r2r (mm0, mm3); /* unpack low src3 bytes */ + punpckhbw_r2r (mm0, mm4); /* unpack high src3 bytes */ + + paddw_r2r (mm3, mm1); /* add lows */ + paddw_r2r (mm4, mm2); /* add highs */ + + movq_m2r (*src4, mm5); /* load 8 src4 bytes */ + movq_r2r (mm5, mm6); /* copy 8 src4 bytes */ + + punpcklbw_r2r (mm0, mm5); /* unpack low src4 bytes */ + punpckhbw_r2r (mm0, mm6); /* unpack high src4 bytes */ + + paddw_r2r (mm5, mm1); /* add lows */ + paddw_r2r (mm6, mm2); /* add highs */ + + /* now have subtotal in mm1 and mm2 */ + + paddw_m2r (round4, mm1); + psraw_i2r (2, mm1); /* /4 */ + paddw_m2r (round4, mm2); + psraw_i2r (2, mm2); /* /4 */ + + packuswb_r2r (mm2, mm1); /* pack (w/ saturation) */ + movq_r2m (mm1, *dest); /* store result in dest */ +} + +static inline void mmx_interp_average_4_U8 (uint8_t * dest, + const uint8_t * src1, + const uint8_t * src2, + const uint8_t * src3, + const uint8_t * src4) +{ + /* *dest = (*dest + (*src1 + *src2 + *src3 + *src4 + 2)/ 4 + 1)/ 2; */ + + movq_m2r (*src1, mm1); /* load 8 src1 bytes */ + movq_r2r (mm1, mm2); /* copy 8 src1 bytes */ + + punpcklbw_r2r (mm0, mm1); /* unpack low src1 bytes */ + punpckhbw_r2r (mm0, mm2); /* unpack high src1 bytes */ + + movq_m2r (*src2, mm3); /* load 8 src2 bytes */ + movq_r2r (mm3, mm4); /* copy 8 src2 bytes */ + + punpcklbw_r2r (mm0, mm3); /* unpack low src2 bytes */ + punpckhbw_r2r (mm0, mm4); /* unpack high src2 bytes */ + + paddw_r2r (mm3, mm1); /* add lows */ + paddw_r2r (mm4, mm2); /* add highs */ + + /* now have partials in mm1 and mm2 */ + + movq_m2r (*src3, mm3); /* load 8 src3 bytes */ + movq_r2r (mm3, mm4); /* copy 8 src3 bytes */ + + punpcklbw_r2r (mm0, mm3); /* unpack low src3 bytes */ + punpckhbw_r2r (mm0, mm4); /* unpack high src3 bytes */ + + paddw_r2r (mm3, mm1); /* add lows */ + paddw_r2r (mm4, mm2); /* add highs */ + + movq_m2r (*src4, mm5); /* load 8 src4 bytes */ + movq_r2r (mm5, mm6); /* copy 8 src4 bytes */ + + punpcklbw_r2r (mm0, mm5); /* unpack low src4 bytes */ + punpckhbw_r2r (mm0, mm6); /* unpack high src4 bytes */ + + paddw_r2r (mm5, mm1); /* add lows */ + paddw_r2r (mm6, mm2); /* add highs */ + + paddw_m2r (round4, mm1); + psraw_i2r (2, mm1); /* /4 */ + paddw_m2r (round4, mm2); + psraw_i2r (2, mm2); /* /4 */ + + /* now have subtotal/4 in mm1 and mm2 */ + + movq_m2r (*dest, mm3); /* load 8 dest bytes */ + movq_r2r (mm3, mm4); /* copy 8 dest bytes */ + + packuswb_r2r (mm2, mm1); /* pack (w/ saturation) */ + movq_r2r (mm1,mm2); /* copy subresult */ + + pxor_r2r (mm1, mm3); /* xor srcavg and dest */ + pand_m2r (mask1, mm3); /* mask lower bits */ + psrlq_i2r (1, mm3); /* /2 */ + por_r2r (mm2, mm4); /* or srcavg and dest */ + psubb_r2r (mm3, mm4); /* subtract subresults */ + movq_r2m (mm4, *dest); /* store result in dest */ +} + +/*-----------------------------------------------------------------------*/ + +static inline void MC_avg_mmx (const int width, int height, uint8_t * dest, + const uint8_t * ref, const int stride) +{ + mmx_zero_reg (); + + do { + mmx_average_2_U8 (dest, dest, ref); + + if (width == 16) + mmx_average_2_U8 (dest+8, dest+8, ref+8); + + dest += stride; + ref += stride; + } while (--height); +} + +static void MC_avg_o_16_mmx (uint8_t * dest, const uint8_t * ref, + int stride, int height) +{ + MC_avg_mmx (16, height, dest, ref, stride); +} + +static void MC_avg_o_8_mmx (uint8_t * dest, const uint8_t * ref, + int stride, int height) +{ + MC_avg_mmx (8, height, dest, ref, stride); +} + +/*-----------------------------------------------------------------------*/ + +static inline void MC_put_mmx (const int width, int height, uint8_t * dest, + const uint8_t * ref, const int stride) +{ + mmx_zero_reg (); + + do { + movq_m2r (* ref, mm1); /* load 8 ref bytes */ + movq_r2m (mm1,* dest); /* store 8 bytes at curr */ + + if (width == 16) + { + movq_m2r (* (ref+8), mm1); /* load 8 ref bytes */ + movq_r2m (mm1,* (dest+8)); /* store 8 bytes at curr */ + } + + dest += stride; + ref += stride; + } while (--height); +} + +static void MC_put_o_16_mmx (uint8_t * dest, const uint8_t * ref, + int stride, int height) +{ + MC_put_mmx (16, height, dest, ref, stride); +} + +static void MC_put_o_8_mmx (uint8_t * dest, const uint8_t * ref, + int stride, int height) +{ + MC_put_mmx (8, height, dest, ref, stride); +} + +/*-----------------------------------------------------------------------*/ + +/* Half pixel interpolation in the x direction */ +static inline void MC_avg_x_mmx (const int width, int height, uint8_t * dest, + const uint8_t * ref, const int stride) +{ + mmx_zero_reg (); + + do { + mmx_interp_average_2_U8 (dest, ref, ref+1); + + if (width == 16) + mmx_interp_average_2_U8 (dest+8, ref+8, ref+9); + + dest += stride; + ref += stride; + } while (--height); +} + +static void MC_avg_x_16_mmx (uint8_t * dest, const uint8_t * ref, + int stride, int height) +{ + MC_avg_x_mmx (16, height, dest, ref, stride); +} + +static void MC_avg_x_8_mmx (uint8_t * dest, const uint8_t * ref, + int stride, int height) +{ + MC_avg_x_mmx (8, height, dest, ref, stride); +} + +/*-----------------------------------------------------------------------*/ + +static inline void MC_put_x_mmx (const int width, int height, uint8_t * dest, + const uint8_t * ref, const int stride) +{ + mmx_zero_reg (); + + do { + mmx_average_2_U8 (dest, ref, ref+1); + + if (width == 16) + mmx_average_2_U8 (dest+8, ref+8, ref+9); + + dest += stride; + ref += stride; + } while (--height); +} + +static void MC_put_x_16_mmx (uint8_t * dest, const uint8_t * ref, + int stride, int height) +{ + MC_put_x_mmx (16, height, dest, ref, stride); +} + +static void MC_put_x_8_mmx (uint8_t * dest, const uint8_t * ref, + int stride, int height) +{ + MC_put_x_mmx (8, height, dest, ref, stride); +} + +/*-----------------------------------------------------------------------*/ + +static inline void MC_avg_xy_mmx (const int width, int height, uint8_t * dest, + const uint8_t * ref, const int stride) +{ + const uint8_t * ref_next = ref + stride; + + mmx_zero_reg (); + + do { + mmx_interp_average_4_U8 (dest, ref, ref+1, ref_next, ref_next+1); + + if (width == 16) + mmx_interp_average_4_U8 (dest+8, ref+8, ref+9, + ref_next+8, ref_next+9); + + dest += stride; + ref += stride; + ref_next += stride; + } while (--height); +} + +static void MC_avg_xy_16_mmx (uint8_t * dest, const uint8_t * ref, + int stride, int height) +{ + MC_avg_xy_mmx (16, height, dest, ref, stride); +} + +static void MC_avg_xy_8_mmx (uint8_t * dest, const uint8_t * ref, + int stride, int height) +{ + MC_avg_xy_mmx (8, height, dest, ref, stride); +} + +/*-----------------------------------------------------------------------*/ + +static inline void MC_put_xy_mmx (const int width, int height, uint8_t * dest, + const uint8_t * ref, const int stride) +{ + const uint8_t * ref_next = ref + stride; + + mmx_zero_reg (); + + do { + mmx_average_4_U8 (dest, ref, ref+1, ref_next, ref_next+1); + + if (width == 16) + mmx_average_4_U8 (dest+8, ref+8, ref+9, ref_next+8, ref_next+9); + + dest += stride; + ref += stride; + ref_next += stride; + } while (--height); +} + +static void MC_put_xy_16_mmx (uint8_t * dest, const uint8_t * ref, + int stride, int height) +{ + MC_put_xy_mmx (16, height, dest, ref, stride); +} + +static void MC_put_xy_8_mmx (uint8_t * dest, const uint8_t * ref, + int stride, int height) +{ + MC_put_xy_mmx (8, height, dest, ref, stride); +} + +/*-----------------------------------------------------------------------*/ + +static inline void MC_avg_y_mmx (const int width, int height, uint8_t * dest, + const uint8_t * ref, const int stride) +{ + const uint8_t * ref_next = ref + stride; + + mmx_zero_reg (); + + do { + mmx_interp_average_2_U8 (dest, ref, ref_next); + + if (width == 16) + mmx_interp_average_2_U8 (dest+8, ref+8, ref_next+8); + + dest += stride; + ref += stride; + ref_next += stride; + } while (--height); +} + +static void MC_avg_y_16_mmx (uint8_t * dest, const uint8_t * ref, + int stride, int height) +{ + MC_avg_y_mmx (16, height, dest, ref, stride); +} + +static void MC_avg_y_8_mmx (uint8_t * dest, const uint8_t * ref, + int stride, int height) +{ + MC_avg_y_mmx (8, height, dest, ref, stride); +} + +/*-----------------------------------------------------------------------*/ + +static inline void MC_put_y_mmx (const int width, int height, uint8_t * dest, + const uint8_t * ref, const int stride) +{ + const uint8_t * ref_next = ref + stride; + + mmx_zero_reg (); + + do { + mmx_average_2_U8 (dest, ref, ref_next); + + if (width == 16) + mmx_average_2_U8 (dest+8, ref+8, ref_next+8); + + dest += stride; + ref += stride; + ref_next += stride; + } while (--height); +} + +static void MC_put_y_16_mmx (uint8_t * dest, const uint8_t * ref, + int stride, int height) +{ + MC_put_y_mmx (16, height, dest, ref, stride); +} + +static void MC_put_y_8_mmx (uint8_t * dest, const uint8_t * ref, + int stride, int height) +{ + MC_put_y_mmx (8, height, dest, ref, stride); +} + + +MPEG2_MC_EXTERN (mmx) + + + + + + + +/* CPU_MMXEXT/CPU_3DNOW adaptation layer */ + +#define pavg_r2r(src,dest) \ +do { \ + if (cpu == CPU_MMXEXT) \ + pavgb_r2r (src, dest); \ + else \ + pavgusb_r2r (src, dest); \ +} while (0) + +#define pavg_m2r(src,dest) \ +do { \ + if (cpu == CPU_MMXEXT) \ + pavgb_m2r (src, dest); \ + else \ + pavgusb_m2r (src, dest); \ +} while (0) + + +/* CPU_MMXEXT code */ + + +static inline void MC_put1_8 (int height, uint8_t * dest, const uint8_t * ref, + const int stride) +{ + do { + movq_m2r (*ref, mm0); + movq_r2m (mm0, *dest); + ref += stride; + dest += stride; + } while (--height); +} + +static inline void MC_put1_16 (int height, uint8_t * dest, const uint8_t * ref, + const int stride) +{ + do { + movq_m2r (*ref, mm0); + movq_m2r (*(ref+8), mm1); + ref += stride; + movq_r2m (mm0, *dest); + movq_r2m (mm1, *(dest+8)); + dest += stride; + } while (--height); +} + +static inline void MC_avg1_8 (int height, uint8_t * dest, const uint8_t * ref, + const int stride, const int cpu) +{ + do { + movq_m2r (*ref, mm0); + pavg_m2r (*dest, mm0); + ref += stride; + movq_r2m (mm0, *dest); + dest += stride; + } while (--height); +} + +static inline void MC_avg1_16 (int height, uint8_t * dest, const uint8_t * ref, + const int stride, const int cpu) +{ + do { + movq_m2r (*ref, mm0); + movq_m2r (*(ref+8), mm1); + pavg_m2r (*dest, mm0); + pavg_m2r (*(dest+8), mm1); + movq_r2m (mm0, *dest); + ref += stride; + movq_r2m (mm1, *(dest+8)); + dest += stride; + } while (--height); +} + +static inline void MC_put2_8 (int height, uint8_t * dest, const uint8_t * ref, + const int stride, const int offset, + const int cpu) +{ + do { + movq_m2r (*ref, mm0); + pavg_m2r (*(ref+offset), mm0); + ref += stride; + movq_r2m (mm0, *dest); + dest += stride; + } while (--height); +} + +static inline void MC_put2_16 (int height, uint8_t * dest, const uint8_t * ref, + const int stride, const int offset, + const int cpu) +{ + do { + movq_m2r (*ref, mm0); + movq_m2r (*(ref+8), mm1); + pavg_m2r (*(ref+offset), mm0); + pavg_m2r (*(ref+offset+8), mm1); + movq_r2m (mm0, *dest); + ref += stride; + movq_r2m (mm1, *(dest+8)); + dest += stride; + } while (--height); +} + +static inline void MC_avg2_8 (int height, uint8_t * dest, const uint8_t * ref, + const int stride, const int offset, + const int cpu) +{ + do { + movq_m2r (*ref, mm0); + pavg_m2r (*(ref+offset), mm0); + pavg_m2r (*dest, mm0); + ref += stride; + movq_r2m (mm0, *dest); + dest += stride; + } while (--height); +} + +static inline void MC_avg2_16 (int height, uint8_t * dest, const uint8_t * ref, + const int stride, const int offset, + const int cpu) +{ + do { + movq_m2r (*ref, mm0); + movq_m2r (*(ref+8), mm1); + pavg_m2r (*(ref+offset), mm0); + pavg_m2r (*(ref+offset+8), mm1); + pavg_m2r (*dest, mm0); + pavg_m2r (*(dest+8), mm1); + ref += stride; + movq_r2m (mm0, *dest); + movq_r2m (mm1, *(dest+8)); + dest += stride; + } while (--height); +} + +static mmx_t mask_one = {0x0101010101010101LL}; + +static inline void MC_put4_8 (int height, uint8_t * dest, const uint8_t * ref, + const int stride, const int cpu) +{ + movq_m2r (*ref, mm0); + movq_m2r (*(ref+1), mm1); + movq_r2r (mm0, mm7); + pxor_r2r (mm1, mm7); + pavg_r2r (mm1, mm0); + ref += stride; + + do { + movq_m2r (*ref, mm2); + movq_r2r (mm0, mm5); + + movq_m2r (*(ref+1), mm3); + movq_r2r (mm2, mm6); + + pxor_r2r (mm3, mm6); + pavg_r2r (mm3, mm2); + + por_r2r (mm6, mm7); + pxor_r2r (mm2, mm5); + + pand_r2r (mm5, mm7); + pavg_r2r (mm2, mm0); + + pand_m2r (mask_one, mm7); + + psubusb_r2r (mm7, mm0); + + ref += stride; + movq_r2m (mm0, *dest); + dest += stride; + + movq_r2r (mm6, mm7); /* unroll ! */ + movq_r2r (mm2, mm0); /* unroll ! */ + } while (--height); +} + +static inline void MC_put4_16 (int height, uint8_t * dest, const uint8_t * ref, + const int stride, const int cpu) +{ + do { + movq_m2r (*ref, mm0); + movq_m2r (*(ref+stride+1), mm1); + movq_r2r (mm0, mm7); + movq_m2r (*(ref+1), mm2); + pxor_r2r (mm1, mm7); + movq_m2r (*(ref+stride), mm3); + movq_r2r (mm2, mm6); + pxor_r2r (mm3, mm6); + pavg_r2r (mm1, mm0); + pavg_r2r (mm3, mm2); + por_r2r (mm6, mm7); + movq_r2r (mm0, mm6); + pxor_r2r (mm2, mm6); + pand_r2r (mm6, mm7); + pand_m2r (mask_one, mm7); + pavg_r2r (mm2, mm0); + psubusb_r2r (mm7, mm0); + movq_r2m (mm0, *dest); + + movq_m2r (*(ref+8), mm0); + movq_m2r (*(ref+stride+9), mm1); + movq_r2r (mm0, mm7); + movq_m2r (*(ref+9), mm2); + pxor_r2r (mm1, mm7); + movq_m2r (*(ref+stride+8), mm3); + movq_r2r (mm2, mm6); + pxor_r2r (mm3, mm6); + pavg_r2r (mm1, mm0); + pavg_r2r (mm3, mm2); + por_r2r (mm6, mm7); + movq_r2r (mm0, mm6); + pxor_r2r (mm2, mm6); + pand_r2r (mm6, mm7); + pand_m2r (mask_one, mm7); + pavg_r2r (mm2, mm0); + psubusb_r2r (mm7, mm0); + ref += stride; + movq_r2m (mm0, *(dest+8)); + dest += stride; + } while (--height); +} + +static inline void MC_avg4_8 (int height, uint8_t * dest, const uint8_t * ref, + const int stride, const int cpu) +{ + do { + movq_m2r (*ref, mm0); + movq_m2r (*(ref+stride+1), mm1); + movq_r2r (mm0, mm7); + movq_m2r (*(ref+1), mm2); + pxor_r2r (mm1, mm7); + movq_m2r (*(ref+stride), mm3); + movq_r2r (mm2, mm6); + pxor_r2r (mm3, mm6); + pavg_r2r (mm1, mm0); + pavg_r2r (mm3, mm2); + por_r2r (mm6, mm7); + movq_r2r (mm0, mm6); + pxor_r2r (mm2, mm6); + pand_r2r (mm6, mm7); + pand_m2r (mask_one, mm7); + pavg_r2r (mm2, mm0); + psubusb_r2r (mm7, mm0); + movq_m2r (*dest, mm1); + pavg_r2r (mm1, mm0); + ref += stride; + movq_r2m (mm0, *dest); + dest += stride; + } while (--height); +} + +static inline void MC_avg4_16 (int height, uint8_t * dest, const uint8_t * ref, + const int stride, const int cpu) +{ + do { + movq_m2r (*ref, mm0); + movq_m2r (*(ref+stride+1), mm1); + movq_r2r (mm0, mm7); + movq_m2r (*(ref+1), mm2); + pxor_r2r (mm1, mm7); + movq_m2r (*(ref+stride), mm3); + movq_r2r (mm2, mm6); + pxor_r2r (mm3, mm6); + pavg_r2r (mm1, mm0); + pavg_r2r (mm3, mm2); + por_r2r (mm6, mm7); + movq_r2r (mm0, mm6); + pxor_r2r (mm2, mm6); + pand_r2r (mm6, mm7); + pand_m2r (mask_one, mm7); + pavg_r2r (mm2, mm0); + psubusb_r2r (mm7, mm0); + movq_m2r (*dest, mm1); + pavg_r2r (mm1, mm0); + movq_r2m (mm0, *dest); + + movq_m2r (*(ref+8), mm0); + movq_m2r (*(ref+stride+9), mm1); + movq_r2r (mm0, mm7); + movq_m2r (*(ref+9), mm2); + pxor_r2r (mm1, mm7); + movq_m2r (*(ref+stride+8), mm3); + movq_r2r (mm2, mm6); + pxor_r2r (mm3, mm6); + pavg_r2r (mm1, mm0); + pavg_r2r (mm3, mm2); + por_r2r (mm6, mm7); + movq_r2r (mm0, mm6); + pxor_r2r (mm2, mm6); + pand_r2r (mm6, mm7); + pand_m2r (mask_one, mm7); + pavg_r2r (mm2, mm0); + psubusb_r2r (mm7, mm0); + movq_m2r (*(dest+8), mm1); + pavg_r2r (mm1, mm0); + ref += stride; + movq_r2m (mm0, *(dest+8)); + dest += stride; + } while (--height); +} + +static void MC_avg_o_16_mmxext (uint8_t * dest, const uint8_t * ref, + int stride, int height) +{ + MC_avg1_16 (height, dest, ref, stride, CPU_MMXEXT); +} + +static void MC_avg_o_8_mmxext (uint8_t * dest, const uint8_t * ref, + int stride, int height) +{ + MC_avg1_8 (height, dest, ref, stride, CPU_MMXEXT); +} + +static void MC_put_o_16_mmxext (uint8_t * dest, const uint8_t * ref, + int stride, int height) +{ + MC_put1_16 (height, dest, ref, stride); +} + +static void MC_put_o_8_mmxext (uint8_t * dest, const uint8_t * ref, + int stride, int height) +{ + MC_put1_8 (height, dest, ref, stride); +} + +static void MC_avg_x_16_mmxext (uint8_t * dest, const uint8_t * ref, + int stride, int height) +{ + MC_avg2_16 (height, dest, ref, stride, 1, CPU_MMXEXT); +} + +static void MC_avg_x_8_mmxext (uint8_t * dest, const uint8_t * ref, + int stride, int height) +{ + MC_avg2_8 (height, dest, ref, stride, 1, CPU_MMXEXT); +} + +static void MC_put_x_16_mmxext (uint8_t * dest, const uint8_t * ref, + int stride, int height) +{ + MC_put2_16 (height, dest, ref, stride, 1, CPU_MMXEXT); +} + +static void MC_put_x_8_mmxext (uint8_t * dest, const uint8_t * ref, + int stride, int height) +{ + MC_put2_8 (height, dest, ref, stride, 1, CPU_MMXEXT); +} + +static void MC_avg_y_16_mmxext (uint8_t * dest, const uint8_t * ref, + int stride, int height) +{ + MC_avg2_16 (height, dest, ref, stride, stride, CPU_MMXEXT); +} + +static void MC_avg_y_8_mmxext (uint8_t * dest, const uint8_t * ref, + int stride, int height) +{ + MC_avg2_8 (height, dest, ref, stride, stride, CPU_MMXEXT); +} + +static void MC_put_y_16_mmxext (uint8_t * dest, const uint8_t * ref, + int stride, int height) +{ + MC_put2_16 (height, dest, ref, stride, stride, CPU_MMXEXT); +} + +static void MC_put_y_8_mmxext (uint8_t * dest, const uint8_t * ref, + int stride, int height) +{ + MC_put2_8 (height, dest, ref, stride, stride, CPU_MMXEXT); +} + +static void MC_avg_xy_16_mmxext (uint8_t * dest, const uint8_t * ref, + int stride, int height) +{ + MC_avg4_16 (height, dest, ref, stride, CPU_MMXEXT); +} + +static void MC_avg_xy_8_mmxext (uint8_t * dest, const uint8_t * ref, + int stride, int height) +{ + MC_avg4_8 (height, dest, ref, stride, CPU_MMXEXT); +} + +static void MC_put_xy_16_mmxext (uint8_t * dest, const uint8_t * ref, + int stride, int height) +{ + MC_put4_16 (height, dest, ref, stride, CPU_MMXEXT); +} + +static void MC_put_xy_8_mmxext (uint8_t * dest, const uint8_t * ref, + int stride, int height) +{ + MC_put4_8 (height, dest, ref, stride, CPU_MMXEXT); +} + + +MPEG2_MC_EXTERN (mmxext) + + + +static void MC_avg_o_16_3dnow (uint8_t * dest, const uint8_t * ref, + int stride, int height) +{ + MC_avg1_16 (height, dest, ref, stride, CPU_3DNOW); +} + +static void MC_avg_o_8_3dnow (uint8_t * dest, const uint8_t * ref, + int stride, int height) +{ + MC_avg1_8 (height, dest, ref, stride, CPU_3DNOW); +} + +static void MC_put_o_16_3dnow (uint8_t * dest, const uint8_t * ref, + int stride, int height) +{ + MC_put1_16 (height, dest, ref, stride); +} + +static void MC_put_o_8_3dnow (uint8_t * dest, const uint8_t * ref, + int stride, int height) +{ + MC_put1_8 (height, dest, ref, stride); +} + +static void MC_avg_x_16_3dnow (uint8_t * dest, const uint8_t * ref, + int stride, int height) +{ + MC_avg2_16 (height, dest, ref, stride, 1, CPU_3DNOW); +} + +static void MC_avg_x_8_3dnow (uint8_t * dest, const uint8_t * ref, + int stride, int height) +{ + MC_avg2_8 (height, dest, ref, stride, 1, CPU_3DNOW); +} + +static void MC_put_x_16_3dnow (uint8_t * dest, const uint8_t * ref, + int stride, int height) +{ + MC_put2_16 (height, dest, ref, stride, 1, CPU_3DNOW); +} + +static void MC_put_x_8_3dnow (uint8_t * dest, const uint8_t * ref, + int stride, int height) +{ + MC_put2_8 (height, dest, ref, stride, 1, CPU_3DNOW); +} + +static void MC_avg_y_16_3dnow (uint8_t * dest, const uint8_t * ref, + int stride, int height) +{ + MC_avg2_16 (height, dest, ref, stride, stride, CPU_3DNOW); +} + +static void MC_avg_y_8_3dnow (uint8_t * dest, const uint8_t * ref, + int stride, int height) +{ + MC_avg2_8 (height, dest, ref, stride, stride, CPU_3DNOW); +} + +static void MC_put_y_16_3dnow (uint8_t * dest, const uint8_t * ref, + int stride, int height) +{ + MC_put2_16 (height, dest, ref, stride, stride, CPU_3DNOW); +} + +static void MC_put_y_8_3dnow (uint8_t * dest, const uint8_t * ref, + int stride, int height) +{ + MC_put2_8 (height, dest, ref, stride, stride, CPU_3DNOW); +} + +static void MC_avg_xy_16_3dnow (uint8_t * dest, const uint8_t * ref, + int stride, int height) +{ + MC_avg4_16 (height, dest, ref, stride, CPU_3DNOW); +} + +static void MC_avg_xy_8_3dnow (uint8_t * dest, const uint8_t * ref, + int stride, int height) +{ + MC_avg4_8 (height, dest, ref, stride, CPU_3DNOW); +} + +static void MC_put_xy_16_3dnow (uint8_t * dest, const uint8_t * ref, + int stride, int height) +{ + MC_put4_16 (height, dest, ref, stride, CPU_3DNOW); +} + +static void MC_put_xy_8_3dnow (uint8_t * dest, const uint8_t * ref, + int stride, int height) +{ + MC_put4_8 (height, dest, ref, stride, CPU_3DNOW); +} + + +MPEG2_MC_EXTERN (3dnow) + +#endif diff --git a/src/video_dec/libmpeg2new/libmpeg2/motion_comp_vis.c b/src/video_dec/libmpeg2new/libmpeg2/motion_comp_vis.c new file mode 100644 index 000000000..e4b61aaa7 --- /dev/null +++ b/src/video_dec/libmpeg2new/libmpeg2/motion_comp_vis.c @@ -0,0 +1,2061 @@ +/* + * motion_comp_vis.c + * Copyright (C) 2003 David S. Miller <davem@redhat.com> + * + * This file is part of mpeg2dec, a free MPEG-2 video stream decoder. + * See http://libmpeg2.sourceforge.net/ for updates. + * + * mpeg2dec is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * mpeg2dec is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +#include "config.h" + +#if defined(ARCH_SPARC) && defined(ENABLE_VIS) + +#include <inttypes.h> + +#include "../include/mpeg2.h" +#include "../include/attributes.h" +#include "mpeg2_internal.h" +#include "../include/vis.h" + +/* The trick used in some of this file is the formula from the MMX + * motion comp code, which is: + * + * (x+y+1)>>1 == (x|y)-((x^y)>>1) + * + * This allows us to average 8 bytes at a time in a 64-bit FPU reg. + * We avoid overflows by masking before we do the shift, and we + * implement the shift by multiplying by 1/2 using mul8x16. So in + * VIS this is (assume 'x' is in f0, 'y' is in f2, a repeating mask + * of '0xfe' is in f4, a repeating mask of '0x7f' is in f6, and + * the value 0x80808080 is in f8): + * + * fxor f0, f2, f10 + * fand f10, f4, f10 + * fmul8x16 f8, f10, f10 + * fand f10, f6, f10 + * for f0, f2, f12 + * fpsub16 f12, f10, f10 + */ + +#define DUP4(x) {x, x, x, x} +#define DUP8(x) {x, x, x, x, x, x, x, x} +static const int16_t constants1[] ATTR_ALIGN(8) = DUP4 (1); +static const int16_t constants2[] ATTR_ALIGN(8) = DUP4 (2); +static const int16_t constants3[] ATTR_ALIGN(8) = DUP4 (3); +static const int16_t constants6[] ATTR_ALIGN(8) = DUP4 (6); +static const int8_t constants_fe[] ATTR_ALIGN(8) = DUP8 (0xfe); +static const int8_t constants_7f[] ATTR_ALIGN(8) = DUP8 (0x7f); +static const int8_t constants128[] ATTR_ALIGN(8) = DUP8 (128); +static const int16_t constants256_512[] ATTR_ALIGN(8) = + {256, 512, 256, 512}; +static const int16_t constants256_1024[] ATTR_ALIGN(8) = + {256, 1024, 256, 1024}; + +#define REF_0 0 +#define REF_0_1 1 +#define REF_2 2 +#define REF_2_1 3 +#define REF_4 4 +#define REF_4_1 5 +#define REF_6 6 +#define REF_6_1 7 +#define REF_S0 8 +#define REF_S0_1 9 +#define REF_S2 10 +#define REF_S2_1 11 +#define REF_S4 12 +#define REF_S4_1 13 +#define REF_S6 14 +#define REF_S6_1 15 +#define DST_0 16 +#define DST_1 17 +#define DST_2 18 +#define DST_3 19 +#define CONST_1 20 +#define CONST_2 20 +#define CONST_3 20 +#define CONST_6 20 +#define MASK_fe 20 +#define CONST_128 22 +#define CONST_256 22 +#define CONST_512 22 +#define CONST_1024 22 +#define TMP0 24 +#define TMP1 25 +#define TMP2 26 +#define TMP3 27 +#define TMP4 28 +#define TMP5 29 +#define ZERO 30 +#define MASK_7f 30 + +#define TMP6 32 +#define TMP8 34 +#define TMP10 36 +#define TMP12 38 +#define TMP14 40 +#define TMP16 42 +#define TMP18 44 +#define TMP20 46 +#define TMP22 48 +#define TMP24 50 +#define TMP26 52 +#define TMP28 54 +#define TMP30 56 +#define TMP32 58 + +static void MC_put_o_16_vis (uint8_t * dest, const uint8_t * _ref, + const int stride, int height) +{ + uint8_t *ref = (uint8_t *) _ref; + int offset; + + ref = vis_alignaddr(ref); + offset = (ref != _ref) ? 16 : 0; + do { /* 5 cycles */ + vis_ld64(ref[0], TMP0); + + vis_ld64_2(ref, 8, TMP2); + + vis_ld64_2(ref, offset, TMP4); + ref += stride; + + vis_faligndata(TMP0, TMP2, REF_0); + vis_st64(REF_0, dest[0]); + + vis_faligndata(TMP2, TMP4, REF_2); + vis_st64_2(REF_2, dest, 8); + dest += stride; + } while (--height); +} + +static void MC_put_o_8_vis (uint8_t * dest, const uint8_t * _ref, + const int stride, int height) +{ + uint8_t *ref = (uint8_t *) _ref; + int offset; + + ref = vis_alignaddr(ref); + offset = (ref != _ref) ? 8 : 0; + do { /* 4 cycles */ + vis_ld64(ref[0], TMP0); + + vis_ld64_2(ref, offset, TMP2); + ref += stride; + + /* stall */ + + vis_faligndata(TMP0, TMP2, REF_0); + vis_st64(REF_0, dest[0]); + dest += stride; + } while (--height); +} + + +static void MC_avg_o_16_vis (uint8_t * dest, const uint8_t * _ref, + const int stride, int height) +{ + uint8_t *ref = (uint8_t *) _ref; + int stride_8 = stride + 8; + int offset; + + ref = vis_alignaddr(ref); + offset = (ref != _ref) ? 16 : 0; + + vis_ld64(ref[0], TMP0); + + vis_ld64(ref[8], TMP2); + + vis_ld64_2(ref, offset, TMP4); + + vis_ld64(dest[0], DST_0); + + vis_ld64(dest[8], DST_2); + + vis_ld64(constants_fe[0], MASK_fe); + vis_faligndata(TMP0, TMP2, REF_0); + + vis_ld64(constants_7f[0], MASK_7f); + vis_faligndata(TMP2, TMP4, REF_2); + + vis_ld64(constants128[0], CONST_128); + + ref += stride; + height = (height >> 1) - 1; + + do { /* 24 cycles */ + vis_ld64(ref[0], TMP0); + vis_xor(DST_0, REF_0, TMP6); + + vis_ld64_2(ref, 8, TMP2); + vis_and(TMP6, MASK_fe, TMP6); + + vis_ld64_2(ref, offset, TMP4); + ref += stride; + vis_mul8x16(CONST_128, TMP6, TMP6); + vis_xor(DST_2, REF_2, TMP8); + + vis_and(TMP8, MASK_fe, TMP8); + + vis_or(DST_0, REF_0, TMP10); + vis_ld64_2(dest, stride, DST_0); + vis_mul8x16(CONST_128, TMP8, TMP8); + + vis_or(DST_2, REF_2, TMP12); + vis_ld64_2(dest, stride_8, DST_2); + + vis_ld64(ref[0], TMP14); + vis_and(TMP6, MASK_7f, TMP6); + + vis_and(TMP8, MASK_7f, TMP8); + + vis_psub16(TMP10, TMP6, TMP6); + vis_st64(TMP6, dest[0]); + + vis_psub16(TMP12, TMP8, TMP8); + vis_st64_2(TMP8, dest, 8); + + dest += stride; + vis_ld64_2(ref, 8, TMP16); + vis_faligndata(TMP0, TMP2, REF_0); + + vis_ld64_2(ref, offset, TMP18); + vis_faligndata(TMP2, TMP4, REF_2); + ref += stride; + + vis_xor(DST_0, REF_0, TMP20); + + vis_and(TMP20, MASK_fe, TMP20); + + vis_xor(DST_2, REF_2, TMP22); + vis_mul8x16(CONST_128, TMP20, TMP20); + + vis_and(TMP22, MASK_fe, TMP22); + + vis_or(DST_0, REF_0, TMP24); + vis_mul8x16(CONST_128, TMP22, TMP22); + + vis_or(DST_2, REF_2, TMP26); + + vis_ld64_2(dest, stride, DST_0); + vis_faligndata(TMP14, TMP16, REF_0); + + vis_ld64_2(dest, stride_8, DST_2); + vis_faligndata(TMP16, TMP18, REF_2); + + vis_and(TMP20, MASK_7f, TMP20); + + vis_and(TMP22, MASK_7f, TMP22); + + vis_psub16(TMP24, TMP20, TMP20); + vis_st64(TMP20, dest[0]); + + vis_psub16(TMP26, TMP22, TMP22); + vis_st64_2(TMP22, dest, 8); + dest += stride; + } while (--height); + + vis_ld64(ref[0], TMP0); + vis_xor(DST_0, REF_0, TMP6); + + vis_ld64_2(ref, 8, TMP2); + vis_and(TMP6, MASK_fe, TMP6); + + vis_ld64_2(ref, offset, TMP4); + vis_mul8x16(CONST_128, TMP6, TMP6); + vis_xor(DST_2, REF_2, TMP8); + + vis_and(TMP8, MASK_fe, TMP8); + + vis_or(DST_0, REF_0, TMP10); + vis_ld64_2(dest, stride, DST_0); + vis_mul8x16(CONST_128, TMP8, TMP8); + + vis_or(DST_2, REF_2, TMP12); + vis_ld64_2(dest, stride_8, DST_2); + + vis_ld64(ref[0], TMP14); + vis_and(TMP6, MASK_7f, TMP6); + + vis_and(TMP8, MASK_7f, TMP8); + + vis_psub16(TMP10, TMP6, TMP6); + vis_st64(TMP6, dest[0]); + + vis_psub16(TMP12, TMP8, TMP8); + vis_st64_2(TMP8, dest, 8); + + dest += stride; + vis_faligndata(TMP0, TMP2, REF_0); + + vis_faligndata(TMP2, TMP4, REF_2); + + vis_xor(DST_0, REF_0, TMP20); + + vis_and(TMP20, MASK_fe, TMP20); + + vis_xor(DST_2, REF_2, TMP22); + vis_mul8x16(CONST_128, TMP20, TMP20); + + vis_and(TMP22, MASK_fe, TMP22); + + vis_or(DST_0, REF_0, TMP24); + vis_mul8x16(CONST_128, TMP22, TMP22); + + vis_or(DST_2, REF_2, TMP26); + + vis_and(TMP20, MASK_7f, TMP20); + + vis_and(TMP22, MASK_7f, TMP22); + + vis_psub16(TMP24, TMP20, TMP20); + vis_st64(TMP20, dest[0]); + + vis_psub16(TMP26, TMP22, TMP22); + vis_st64_2(TMP22, dest, 8); +} + +static void MC_avg_o_8_vis (uint8_t * dest, const uint8_t * _ref, + const int stride, int height) +{ + uint8_t *ref = (uint8_t *) _ref; + int offset; + + ref = vis_alignaddr(ref); + offset = (ref != _ref) ? 8 : 0; + + vis_ld64(ref[0], TMP0); + + vis_ld64_2(ref, offset, TMP2); + + vis_ld64(dest[0], DST_0); + + vis_ld64(constants_fe[0], MASK_fe); + + vis_ld64(constants_7f[0], MASK_7f); + vis_faligndata(TMP0, TMP2, REF_0); + + vis_ld64(constants128[0], CONST_128); + + ref += stride; + height = (height >> 1) - 1; + + do { /* 12 cycles */ + vis_ld64(ref[0], TMP0); + vis_xor(DST_0, REF_0, TMP4); + + vis_ld64_2(ref, offset, TMP2); + vis_and(TMP4, MASK_fe, TMP4); + + vis_or(DST_0, REF_0, TMP6); + vis_ld64_2(dest, stride, DST_0); + ref += stride; + vis_mul8x16(CONST_128, TMP4, TMP4); + + vis_ld64(ref[0], TMP12); + vis_faligndata(TMP0, TMP2, REF_0); + + vis_ld64_2(ref, offset, TMP2); + vis_xor(DST_0, REF_0, TMP0); + ref += stride; + + vis_and(TMP0, MASK_fe, TMP0); + + vis_and(TMP4, MASK_7f, TMP4); + + vis_psub16(TMP6, TMP4, TMP4); + vis_st64(TMP4, dest[0]); + dest += stride; + vis_mul8x16(CONST_128, TMP0, TMP0); + + vis_or(DST_0, REF_0, TMP6); + vis_ld64_2(dest, stride, DST_0); + + vis_faligndata(TMP12, TMP2, REF_0); + + vis_and(TMP0, MASK_7f, TMP0); + + vis_psub16(TMP6, TMP0, TMP4); + vis_st64(TMP4, dest[0]); + dest += stride; + } while (--height); + + vis_ld64(ref[0], TMP0); + vis_xor(DST_0, REF_0, TMP4); + + vis_ld64_2(ref, offset, TMP2); + vis_and(TMP4, MASK_fe, TMP4); + + vis_or(DST_0, REF_0, TMP6); + vis_ld64_2(dest, stride, DST_0); + vis_mul8x16(CONST_128, TMP4, TMP4); + + vis_faligndata(TMP0, TMP2, REF_0); + + vis_xor(DST_0, REF_0, TMP0); + + vis_and(TMP0, MASK_fe, TMP0); + + vis_and(TMP4, MASK_7f, TMP4); + + vis_psub16(TMP6, TMP4, TMP4); + vis_st64(TMP4, dest[0]); + dest += stride; + vis_mul8x16(CONST_128, TMP0, TMP0); + + vis_or(DST_0, REF_0, TMP6); + + vis_and(TMP0, MASK_7f, TMP0); + + vis_psub16(TMP6, TMP0, TMP4); + vis_st64(TMP4, dest[0]); +} + +static void MC_put_x_16_vis (uint8_t * dest, const uint8_t * _ref, + const int stride, int height) +{ + uint8_t *ref = (uint8_t *) _ref; + unsigned long off = (unsigned long) ref & 0x7; + unsigned long off_plus_1 = off + 1; + + ref = vis_alignaddr(ref); + + vis_ld64(ref[0], TMP0); + + vis_ld64_2(ref, 8, TMP2); + + vis_ld64_2(ref, 16, TMP4); + + vis_ld64(constants_fe[0], MASK_fe); + + vis_ld64(constants_7f[0], MASK_7f); + vis_faligndata(TMP0, TMP2, REF_0); + + vis_ld64(constants128[0], CONST_128); + vis_faligndata(TMP2, TMP4, REF_4); + + if (off != 0x7) { + vis_alignaddr_g0((void *)off_plus_1); + vis_faligndata(TMP0, TMP2, REF_2); + vis_faligndata(TMP2, TMP4, REF_6); + } else { + vis_src1(TMP2, REF_2); + vis_src1(TMP4, REF_6); + } + + ref += stride; + height = (height >> 1) - 1; + + do { /* 34 cycles */ + vis_ld64(ref[0], TMP0); + vis_xor(REF_0, REF_2, TMP6); + + vis_ld64_2(ref, 8, TMP2); + vis_xor(REF_4, REF_6, TMP8); + + vis_ld64_2(ref, 16, TMP4); + vis_and(TMP6, MASK_fe, TMP6); + ref += stride; + + vis_ld64(ref[0], TMP14); + vis_mul8x16(CONST_128, TMP6, TMP6); + vis_and(TMP8, MASK_fe, TMP8); + + vis_ld64_2(ref, 8, TMP16); + vis_mul8x16(CONST_128, TMP8, TMP8); + vis_or(REF_0, REF_2, TMP10); + + vis_ld64_2(ref, 16, TMP18); + ref += stride; + vis_or(REF_4, REF_6, TMP12); + + vis_alignaddr_g0((void *)off); + + vis_faligndata(TMP0, TMP2, REF_0); + + vis_faligndata(TMP2, TMP4, REF_4); + + if (off != 0x7) { + vis_alignaddr_g0((void *)off_plus_1); + vis_faligndata(TMP0, TMP2, REF_2); + vis_faligndata(TMP2, TMP4, REF_6); + } else { + vis_src1(TMP2, REF_2); + vis_src1(TMP4, REF_6); + } + + vis_and(TMP6, MASK_7f, TMP6); + + vis_and(TMP8, MASK_7f, TMP8); + + vis_psub16(TMP10, TMP6, TMP6); + vis_st64(TMP6, dest[0]); + + vis_psub16(TMP12, TMP8, TMP8); + vis_st64_2(TMP8, dest, 8); + dest += stride; + + vis_xor(REF_0, REF_2, TMP6); + + vis_xor(REF_4, REF_6, TMP8); + + vis_and(TMP6, MASK_fe, TMP6); + + vis_mul8x16(CONST_128, TMP6, TMP6); + vis_and(TMP8, MASK_fe, TMP8); + + vis_mul8x16(CONST_128, TMP8, TMP8); + vis_or(REF_0, REF_2, TMP10); + + vis_or(REF_4, REF_6, TMP12); + + vis_alignaddr_g0((void *)off); + + vis_faligndata(TMP14, TMP16, REF_0); + + vis_faligndata(TMP16, TMP18, REF_4); + + if (off != 0x7) { + vis_alignaddr_g0((void *)off_plus_1); + vis_faligndata(TMP14, TMP16, REF_2); + vis_faligndata(TMP16, TMP18, REF_6); + } else { + vis_src1(TMP16, REF_2); + vis_src1(TMP18, REF_6); + } + + vis_and(TMP6, MASK_7f, TMP6); + + vis_and(TMP8, MASK_7f, TMP8); + + vis_psub16(TMP10, TMP6, TMP6); + vis_st64(TMP6, dest[0]); + + vis_psub16(TMP12, TMP8, TMP8); + vis_st64_2(TMP8, dest, 8); + dest += stride; + } while (--height); + + vis_ld64(ref[0], TMP0); + vis_xor(REF_0, REF_2, TMP6); + + vis_ld64_2(ref, 8, TMP2); + vis_xor(REF_4, REF_6, TMP8); + + vis_ld64_2(ref, 16, TMP4); + vis_and(TMP6, MASK_fe, TMP6); + + vis_mul8x16(CONST_128, TMP6, TMP6); + vis_and(TMP8, MASK_fe, TMP8); + + vis_mul8x16(CONST_128, TMP8, TMP8); + vis_or(REF_0, REF_2, TMP10); + + vis_or(REF_4, REF_6, TMP12); + + vis_alignaddr_g0((void *)off); + + vis_faligndata(TMP0, TMP2, REF_0); + + vis_faligndata(TMP2, TMP4, REF_4); + + if (off != 0x7) { + vis_alignaddr_g0((void *)off_plus_1); + vis_faligndata(TMP0, TMP2, REF_2); + vis_faligndata(TMP2, TMP4, REF_6); + } else { + vis_src1(TMP2, REF_2); + vis_src1(TMP4, REF_6); + } + + vis_and(TMP6, MASK_7f, TMP6); + + vis_and(TMP8, MASK_7f, TMP8); + + vis_psub16(TMP10, TMP6, TMP6); + vis_st64(TMP6, dest[0]); + + vis_psub16(TMP12, TMP8, TMP8); + vis_st64_2(TMP8, dest, 8); + dest += stride; + + vis_xor(REF_0, REF_2, TMP6); + + vis_xor(REF_4, REF_6, TMP8); + + vis_and(TMP6, MASK_fe, TMP6); + + vis_mul8x16(CONST_128, TMP6, TMP6); + vis_and(TMP8, MASK_fe, TMP8); + + vis_mul8x16(CONST_128, TMP8, TMP8); + vis_or(REF_0, REF_2, TMP10); + + vis_or(REF_4, REF_6, TMP12); + + vis_and(TMP6, MASK_7f, TMP6); + + vis_and(TMP8, MASK_7f, TMP8); + + vis_psub16(TMP10, TMP6, TMP6); + vis_st64(TMP6, dest[0]); + + vis_psub16(TMP12, TMP8, TMP8); + vis_st64_2(TMP8, dest, 8); +} + +static void MC_put_x_8_vis (uint8_t * dest, const uint8_t * _ref, + const int stride, int height) +{ + uint8_t *ref = (uint8_t *) _ref; + unsigned long off = (unsigned long) ref & 0x7; + unsigned long off_plus_1 = off + 1; + + ref = vis_alignaddr(ref); + + vis_ld64(ref[0], TMP0); + + vis_ld64(ref[8], TMP2); + + vis_ld64(constants_fe[0], MASK_fe); + + vis_ld64(constants_7f[0], MASK_7f); + + vis_ld64(constants128[0], CONST_128); + vis_faligndata(TMP0, TMP2, REF_0); + + if (off != 0x7) { + vis_alignaddr_g0((void *)off_plus_1); + vis_faligndata(TMP0, TMP2, REF_2); + } else { + vis_src1(TMP2, REF_2); + } + + ref += stride; + height = (height >> 1) - 1; + + do { /* 20 cycles */ + vis_ld64(ref[0], TMP0); + vis_xor(REF_0, REF_2, TMP4); + + vis_ld64_2(ref, 8, TMP2); + vis_and(TMP4, MASK_fe, TMP4); + ref += stride; + + vis_ld64(ref[0], TMP8); + vis_or(REF_0, REF_2, TMP6); + vis_mul8x16(CONST_128, TMP4, TMP4); + + vis_alignaddr_g0((void *)off); + + vis_ld64_2(ref, 8, TMP10); + ref += stride; + vis_faligndata(TMP0, TMP2, REF_0); + + if (off != 0x7) { + vis_alignaddr_g0((void *)off_plus_1); + vis_faligndata(TMP0, TMP2, REF_2); + } else { + vis_src1(TMP2, REF_2); + } + + vis_and(TMP4, MASK_7f, TMP4); + + vis_psub16(TMP6, TMP4, DST_0); + vis_st64(DST_0, dest[0]); + dest += stride; + + vis_xor(REF_0, REF_2, TMP12); + + vis_and(TMP12, MASK_fe, TMP12); + + vis_or(REF_0, REF_2, TMP14); + vis_mul8x16(CONST_128, TMP12, TMP12); + + vis_alignaddr_g0((void *)off); + vis_faligndata(TMP8, TMP10, REF_0); + if (off != 0x7) { + vis_alignaddr_g0((void *)off_plus_1); + vis_faligndata(TMP8, TMP10, REF_2); + } else { + vis_src1(TMP10, REF_2); + } + + vis_and(TMP12, MASK_7f, TMP12); + + vis_psub16(TMP14, TMP12, DST_0); + vis_st64(DST_0, dest[0]); + dest += stride; + } while (--height); + + vis_ld64(ref[0], TMP0); + vis_xor(REF_0, REF_2, TMP4); + + vis_ld64_2(ref, 8, TMP2); + vis_and(TMP4, MASK_fe, TMP4); + + vis_or(REF_0, REF_2, TMP6); + vis_mul8x16(CONST_128, TMP4, TMP4); + + vis_alignaddr_g0((void *)off); + + vis_faligndata(TMP0, TMP2, REF_0); + + if (off != 0x7) { + vis_alignaddr_g0((void *)off_plus_1); + vis_faligndata(TMP0, TMP2, REF_2); + } else { + vis_src1(TMP2, REF_2); + } + + vis_and(TMP4, MASK_7f, TMP4); + + vis_psub16(TMP6, TMP4, DST_0); + vis_st64(DST_0, dest[0]); + dest += stride; + + vis_xor(REF_0, REF_2, TMP12); + + vis_and(TMP12, MASK_fe, TMP12); + + vis_or(REF_0, REF_2, TMP14); + vis_mul8x16(CONST_128, TMP12, TMP12); + + vis_and(TMP12, MASK_7f, TMP12); + + vis_psub16(TMP14, TMP12, DST_0); + vis_st64(DST_0, dest[0]); + dest += stride; +} + +static void MC_avg_x_16_vis (uint8_t * dest, const uint8_t * _ref, + const int stride, int height) +{ + uint8_t *ref = (uint8_t *) _ref; + unsigned long off = (unsigned long) ref & 0x7; + unsigned long off_plus_1 = off + 1; + + vis_set_gsr(5 << VIS_GSR_SCALEFACT_SHIFT); + + vis_ld64(constants3[0], CONST_3); + vis_fzero(ZERO); + vis_ld64(constants256_512[0], CONST_256); + + ref = vis_alignaddr(ref); + do { /* 26 cycles */ + vis_ld64(ref[0], TMP0); + + vis_ld64(ref[8], TMP2); + + vis_alignaddr_g0((void *)off); + + vis_ld64(ref[16], TMP4); + + vis_ld64(dest[0], DST_0); + vis_faligndata(TMP0, TMP2, REF_0); + + vis_ld64(dest[8], DST_2); + vis_faligndata(TMP2, TMP4, REF_4); + + if (off != 0x7) { + vis_alignaddr_g0((void *)off_plus_1); + vis_faligndata(TMP0, TMP2, REF_2); + vis_faligndata(TMP2, TMP4, REF_6); + } else { + vis_src1(TMP2, REF_2); + vis_src1(TMP4, REF_6); + } + + vis_mul8x16au(REF_0, CONST_256, TMP0); + + vis_pmerge(ZERO, REF_2, TMP4); + vis_mul8x16au(REF_0_1, CONST_256, TMP2); + + vis_pmerge(ZERO, REF_2_1, TMP6); + + vis_padd16(TMP0, TMP4, TMP0); + + vis_mul8x16al(DST_0, CONST_512, TMP4); + vis_padd16(TMP2, TMP6, TMP2); + + vis_mul8x16al(DST_1, CONST_512, TMP6); + + vis_mul8x16au(REF_6, CONST_256, TMP12); + + vis_padd16(TMP0, TMP4, TMP0); + vis_mul8x16au(REF_6_1, CONST_256, TMP14); + + vis_padd16(TMP2, TMP6, TMP2); + vis_mul8x16au(REF_4, CONST_256, TMP16); + + vis_padd16(TMP0, CONST_3, TMP8); + vis_mul8x16au(REF_4_1, CONST_256, TMP18); + + vis_padd16(TMP2, CONST_3, TMP10); + vis_pack16(TMP8, DST_0); + + vis_pack16(TMP10, DST_1); + vis_padd16(TMP16, TMP12, TMP0); + + vis_st64(DST_0, dest[0]); + vis_mul8x16al(DST_2, CONST_512, TMP4); + vis_padd16(TMP18, TMP14, TMP2); + + vis_mul8x16al(DST_3, CONST_512, TMP6); + vis_padd16(TMP0, CONST_3, TMP0); + + vis_padd16(TMP2, CONST_3, TMP2); + + vis_padd16(TMP0, TMP4, TMP0); + + vis_padd16(TMP2, TMP6, TMP2); + vis_pack16(TMP0, DST_2); + + vis_pack16(TMP2, DST_3); + vis_st64(DST_2, dest[8]); + + ref += stride; + dest += stride; + } while (--height); +} + +static void MC_avg_x_8_vis (uint8_t * dest, const uint8_t * _ref, + const int stride, int height) +{ + uint8_t *ref = (uint8_t *) _ref; + unsigned long off = (unsigned long) ref & 0x7; + unsigned long off_plus_1 = off + 1; + int stride_times_2 = stride << 1; + + vis_set_gsr(5 << VIS_GSR_SCALEFACT_SHIFT); + + vis_ld64(constants3[0], CONST_3); + vis_fzero(ZERO); + vis_ld64(constants256_512[0], CONST_256); + + ref = vis_alignaddr(ref); + height >>= 2; + do { /* 47 cycles */ + vis_ld64(ref[0], TMP0); + + vis_ld64_2(ref, 8, TMP2); + ref += stride; + + vis_alignaddr_g0((void *)off); + + vis_ld64(ref[0], TMP4); + vis_faligndata(TMP0, TMP2, REF_0); + + vis_ld64_2(ref, 8, TMP6); + ref += stride; + + vis_ld64(ref[0], TMP8); + + vis_ld64_2(ref, 8, TMP10); + ref += stride; + vis_faligndata(TMP4, TMP6, REF_4); + + vis_ld64(ref[0], TMP12); + + vis_ld64_2(ref, 8, TMP14); + ref += stride; + vis_faligndata(TMP8, TMP10, REF_S0); + + vis_faligndata(TMP12, TMP14, REF_S4); + + if (off != 0x7) { + vis_alignaddr_g0((void *)off_plus_1); + + vis_ld64(dest[0], DST_0); + vis_faligndata(TMP0, TMP2, REF_2); + + vis_ld64_2(dest, stride, DST_2); + vis_faligndata(TMP4, TMP6, REF_6); + + vis_faligndata(TMP8, TMP10, REF_S2); + + vis_faligndata(TMP12, TMP14, REF_S6); + } else { + vis_ld64(dest[0], DST_0); + vis_src1(TMP2, REF_2); + + vis_ld64_2(dest, stride, DST_2); + vis_src1(TMP6, REF_6); + + vis_src1(TMP10, REF_S2); + + vis_src1(TMP14, REF_S6); + } + + vis_pmerge(ZERO, REF_0, TMP0); + vis_mul8x16au(REF_0_1, CONST_256, TMP2); + + vis_pmerge(ZERO, REF_2, TMP4); + vis_mul8x16au(REF_2_1, CONST_256, TMP6); + + vis_padd16(TMP0, CONST_3, TMP0); + vis_mul8x16al(DST_0, CONST_512, TMP16); + + vis_padd16(TMP2, CONST_3, TMP2); + vis_mul8x16al(DST_1, CONST_512, TMP18); + + vis_padd16(TMP0, TMP4, TMP0); + vis_mul8x16au(REF_4, CONST_256, TMP8); + + vis_padd16(TMP2, TMP6, TMP2); + vis_mul8x16au(REF_4_1, CONST_256, TMP10); + + vis_padd16(TMP0, TMP16, TMP0); + vis_mul8x16au(REF_6, CONST_256, TMP12); + + vis_padd16(TMP2, TMP18, TMP2); + vis_mul8x16au(REF_6_1, CONST_256, TMP14); + + vis_padd16(TMP8, CONST_3, TMP8); + vis_mul8x16al(DST_2, CONST_512, TMP16); + + vis_padd16(TMP8, TMP12, TMP8); + vis_mul8x16al(DST_3, CONST_512, TMP18); + + vis_padd16(TMP10, TMP14, TMP10); + vis_pack16(TMP0, DST_0); + + vis_pack16(TMP2, DST_1); + vis_st64(DST_0, dest[0]); + dest += stride; + vis_padd16(TMP10, CONST_3, TMP10); + + vis_ld64_2(dest, stride, DST_0); + vis_padd16(TMP8, TMP16, TMP8); + + vis_ld64_2(dest, stride_times_2, TMP4/*DST_2*/); + vis_padd16(TMP10, TMP18, TMP10); + vis_pack16(TMP8, DST_2); + + vis_pack16(TMP10, DST_3); + vis_st64(DST_2, dest[0]); + dest += stride; + + vis_mul8x16au(REF_S0_1, CONST_256, TMP2); + vis_pmerge(ZERO, REF_S0, TMP0); + + vis_pmerge(ZERO, REF_S2, TMP24); + vis_mul8x16au(REF_S2_1, CONST_256, TMP6); + + vis_padd16(TMP0, CONST_3, TMP0); + vis_mul8x16au(REF_S4, CONST_256, TMP8); + + vis_padd16(TMP2, CONST_3, TMP2); + vis_mul8x16au(REF_S4_1, CONST_256, TMP10); + + vis_padd16(TMP0, TMP24, TMP0); + vis_mul8x16au(REF_S6, CONST_256, TMP12); + + vis_padd16(TMP2, TMP6, TMP2); + vis_mul8x16au(REF_S6_1, CONST_256, TMP14); + + vis_padd16(TMP8, CONST_3, TMP8); + vis_mul8x16al(DST_0, CONST_512, TMP16); + + vis_padd16(TMP10, CONST_3, TMP10); + vis_mul8x16al(DST_1, CONST_512, TMP18); + + vis_padd16(TMP8, TMP12, TMP8); + vis_mul8x16al(TMP4/*DST_2*/, CONST_512, TMP20); + + vis_mul8x16al(TMP5/*DST_3*/, CONST_512, TMP22); + vis_padd16(TMP0, TMP16, TMP0); + + vis_padd16(TMP2, TMP18, TMP2); + vis_pack16(TMP0, DST_0); + + vis_padd16(TMP10, TMP14, TMP10); + vis_pack16(TMP2, DST_1); + vis_st64(DST_0, dest[0]); + dest += stride; + + vis_padd16(TMP8, TMP20, TMP8); + + vis_padd16(TMP10, TMP22, TMP10); + vis_pack16(TMP8, DST_2); + + vis_pack16(TMP10, DST_3); + vis_st64(DST_2, dest[0]); + dest += stride; + } while (--height); +} + +static void MC_put_y_16_vis (uint8_t * dest, const uint8_t * _ref, + const int stride, int height) +{ + uint8_t *ref = (uint8_t *) _ref; + int offset; + + ref = vis_alignaddr(ref); + offset = (ref != _ref) ? 16 : 0; + + vis_ld64(ref[0], TMP0); + + vis_ld64_2(ref, 8, TMP2); + + vis_ld64_2(ref, offset, TMP4); + ref += stride; + + vis_ld64(ref[0], TMP6); + vis_faligndata(TMP0, TMP2, REF_0); + + vis_ld64_2(ref, 8, TMP8); + vis_faligndata(TMP2, TMP4, REF_4); + + vis_ld64_2(ref, offset, TMP10); + ref += stride; + + vis_ld64(constants_fe[0], MASK_fe); + vis_faligndata(TMP6, TMP8, REF_2); + + vis_ld64(constants_7f[0], MASK_7f); + vis_faligndata(TMP8, TMP10, REF_6); + + vis_ld64(constants128[0], CONST_128); + height = (height >> 1) - 1; + do { /* 24 cycles */ + vis_ld64(ref[0], TMP0); + vis_xor(REF_0, REF_2, TMP12); + + vis_ld64_2(ref, 8, TMP2); + vis_xor(REF_4, REF_6, TMP16); + + vis_ld64_2(ref, offset, TMP4); + ref += stride; + vis_or(REF_0, REF_2, TMP14); + + vis_ld64(ref[0], TMP6); + vis_or(REF_4, REF_6, TMP18); + + vis_ld64_2(ref, 8, TMP8); + vis_faligndata(TMP0, TMP2, REF_0); + + vis_ld64_2(ref, offset, TMP10); + ref += stride; + vis_faligndata(TMP2, TMP4, REF_4); + + vis_and(TMP12, MASK_fe, TMP12); + + vis_and(TMP16, MASK_fe, TMP16); + vis_mul8x16(CONST_128, TMP12, TMP12); + + vis_mul8x16(CONST_128, TMP16, TMP16); + vis_xor(REF_0, REF_2, TMP0); + + vis_xor(REF_4, REF_6, TMP2); + + vis_or(REF_0, REF_2, TMP20); + + vis_and(TMP12, MASK_7f, TMP12); + + vis_and(TMP16, MASK_7f, TMP16); + + vis_psub16(TMP14, TMP12, TMP12); + vis_st64(TMP12, dest[0]); + + vis_psub16(TMP18, TMP16, TMP16); + vis_st64_2(TMP16, dest, 8); + dest += stride; + + vis_or(REF_4, REF_6, TMP18); + + vis_and(TMP0, MASK_fe, TMP0); + + vis_and(TMP2, MASK_fe, TMP2); + vis_mul8x16(CONST_128, TMP0, TMP0); + + vis_faligndata(TMP6, TMP8, REF_2); + vis_mul8x16(CONST_128, TMP2, TMP2); + + vis_faligndata(TMP8, TMP10, REF_6); + + vis_and(TMP0, MASK_7f, TMP0); + + vis_and(TMP2, MASK_7f, TMP2); + + vis_psub16(TMP20, TMP0, TMP0); + vis_st64(TMP0, dest[0]); + + vis_psub16(TMP18, TMP2, TMP2); + vis_st64_2(TMP2, dest, 8); + dest += stride; + } while (--height); + + vis_ld64(ref[0], TMP0); + vis_xor(REF_0, REF_2, TMP12); + + vis_ld64_2(ref, 8, TMP2); + vis_xor(REF_4, REF_6, TMP16); + + vis_ld64_2(ref, offset, TMP4); + vis_or(REF_0, REF_2, TMP14); + + vis_or(REF_4, REF_6, TMP18); + + vis_faligndata(TMP0, TMP2, REF_0); + + vis_faligndata(TMP2, TMP4, REF_4); + + vis_and(TMP12, MASK_fe, TMP12); + + vis_and(TMP16, MASK_fe, TMP16); + vis_mul8x16(CONST_128, TMP12, TMP12); + + vis_mul8x16(CONST_128, TMP16, TMP16); + vis_xor(REF_0, REF_2, TMP0); + + vis_xor(REF_4, REF_6, TMP2); + + vis_or(REF_0, REF_2, TMP20); + + vis_and(TMP12, MASK_7f, TMP12); + + vis_and(TMP16, MASK_7f, TMP16); + + vis_psub16(TMP14, TMP12, TMP12); + vis_st64(TMP12, dest[0]); + + vis_psub16(TMP18, TMP16, TMP16); + vis_st64_2(TMP16, dest, 8); + dest += stride; + + vis_or(REF_4, REF_6, TMP18); + + vis_and(TMP0, MASK_fe, TMP0); + + vis_and(TMP2, MASK_fe, TMP2); + vis_mul8x16(CONST_128, TMP0, TMP0); + + vis_mul8x16(CONST_128, TMP2, TMP2); + + vis_and(TMP0, MASK_7f, TMP0); + + vis_and(TMP2, MASK_7f, TMP2); + + vis_psub16(TMP20, TMP0, TMP0); + vis_st64(TMP0, dest[0]); + + vis_psub16(TMP18, TMP2, TMP2); + vis_st64_2(TMP2, dest, 8); +} + +static void MC_put_y_8_vis (uint8_t * dest, const uint8_t * _ref, + const int stride, int height) +{ + uint8_t *ref = (uint8_t *) _ref; + int offset; + + ref = vis_alignaddr(ref); + offset = (ref != _ref) ? 8 : 0; + + vis_ld64(ref[0], TMP0); + + vis_ld64_2(ref, offset, TMP2); + ref += stride; + + vis_ld64(ref[0], TMP4); + + vis_ld64_2(ref, offset, TMP6); + ref += stride; + + vis_ld64(constants_fe[0], MASK_fe); + vis_faligndata(TMP0, TMP2, REF_0); + + vis_ld64(constants_7f[0], MASK_7f); + vis_faligndata(TMP4, TMP6, REF_2); + + vis_ld64(constants128[0], CONST_128); + height = (height >> 1) - 1; + do { /* 12 cycles */ + vis_ld64(ref[0], TMP0); + vis_xor(REF_0, REF_2, TMP4); + + vis_ld64_2(ref, offset, TMP2); + ref += stride; + vis_and(TMP4, MASK_fe, TMP4); + + vis_or(REF_0, REF_2, TMP6); + vis_mul8x16(CONST_128, TMP4, TMP4); + + vis_faligndata(TMP0, TMP2, REF_0); + vis_ld64(ref[0], TMP0); + + vis_ld64_2(ref, offset, TMP2); + ref += stride; + vis_xor(REF_0, REF_2, TMP12); + + vis_and(TMP4, MASK_7f, TMP4); + + vis_and(TMP12, MASK_fe, TMP12); + + vis_mul8x16(CONST_128, TMP12, TMP12); + vis_or(REF_0, REF_2, TMP14); + + vis_psub16(TMP6, TMP4, DST_0); + vis_st64(DST_0, dest[0]); + dest += stride; + + vis_faligndata(TMP0, TMP2, REF_2); + + vis_and(TMP12, MASK_7f, TMP12); + + vis_psub16(TMP14, TMP12, DST_0); + vis_st64(DST_0, dest[0]); + dest += stride; + } while (--height); + + vis_ld64(ref[0], TMP0); + vis_xor(REF_0, REF_2, TMP4); + + vis_ld64_2(ref, offset, TMP2); + vis_and(TMP4, MASK_fe, TMP4); + + vis_or(REF_0, REF_2, TMP6); + vis_mul8x16(CONST_128, TMP4, TMP4); + + vis_faligndata(TMP0, TMP2, REF_0); + + vis_xor(REF_0, REF_2, TMP12); + + vis_and(TMP4, MASK_7f, TMP4); + + vis_and(TMP12, MASK_fe, TMP12); + + vis_mul8x16(CONST_128, TMP12, TMP12); + vis_or(REF_0, REF_2, TMP14); + + vis_psub16(TMP6, TMP4, DST_0); + vis_st64(DST_0, dest[0]); + dest += stride; + + vis_and(TMP12, MASK_7f, TMP12); + + vis_psub16(TMP14, TMP12, DST_0); + vis_st64(DST_0, dest[0]); +} + +static void MC_avg_y_16_vis (uint8_t * dest, const uint8_t * _ref, + const int stride, int height) +{ + uint8_t *ref = (uint8_t *) _ref; + int stride_8 = stride + 8; + int stride_16; + int offset; + + vis_set_gsr(5 << VIS_GSR_SCALEFACT_SHIFT); + + ref = vis_alignaddr(ref); + offset = (ref != _ref) ? 16 : 0; + + vis_ld64(ref[ 0], TMP0); + vis_fzero(ZERO); + + vis_ld64(ref[ 8], TMP2); + + vis_ld64_2(ref, offset, TMP4); + stride_16 = stride + offset; + + vis_ld64(constants3[0], CONST_3); + vis_faligndata(TMP0, TMP2, REF_2); + + vis_ld64(constants256_512[0], CONST_256); + vis_faligndata(TMP2, TMP4, REF_6); + height >>= 1; + + do { /* 31 cycles */ + vis_ld64_2(ref, stride, TMP0); + vis_pmerge(ZERO, REF_2, TMP12); + vis_mul8x16au(REF_2_1, CONST_256, TMP14); + + vis_ld64_2(ref, stride_8, TMP2); + vis_pmerge(ZERO, REF_6, TMP16); + vis_mul8x16au(REF_6_1, CONST_256, TMP18); + + vis_ld64_2(ref, stride_16, TMP4); + ref += stride; + + vis_ld64(dest[0], DST_0); + vis_faligndata(TMP0, TMP2, REF_0); + + vis_ld64_2(dest, 8, DST_2); + vis_faligndata(TMP2, TMP4, REF_4); + + vis_ld64_2(ref, stride, TMP6); + vis_pmerge(ZERO, REF_0, TMP0); + vis_mul8x16au(REF_0_1, CONST_256, TMP2); + + vis_ld64_2(ref, stride_8, TMP8); + vis_pmerge(ZERO, REF_4, TMP4); + + vis_ld64_2(ref, stride_16, TMP10); + ref += stride; + + vis_ld64_2(dest, stride, REF_S0/*DST_4*/); + vis_faligndata(TMP6, TMP8, REF_2); + vis_mul8x16au(REF_4_1, CONST_256, TMP6); + + vis_ld64_2(dest, stride_8, REF_S2/*DST_6*/); + vis_faligndata(TMP8, TMP10, REF_6); + vis_mul8x16al(DST_0, CONST_512, TMP20); + + vis_padd16(TMP0, CONST_3, TMP0); + vis_mul8x16al(DST_1, CONST_512, TMP22); + + vis_padd16(TMP2, CONST_3, TMP2); + vis_mul8x16al(DST_2, CONST_512, TMP24); + + vis_padd16(TMP4, CONST_3, TMP4); + vis_mul8x16al(DST_3, CONST_512, TMP26); + + vis_padd16(TMP6, CONST_3, TMP6); + + vis_padd16(TMP12, TMP20, TMP12); + vis_mul8x16al(REF_S0, CONST_512, TMP20); + + vis_padd16(TMP14, TMP22, TMP14); + vis_mul8x16al(REF_S0_1, CONST_512, TMP22); + + vis_padd16(TMP16, TMP24, TMP16); + vis_mul8x16al(REF_S2, CONST_512, TMP24); + + vis_padd16(TMP18, TMP26, TMP18); + vis_mul8x16al(REF_S2_1, CONST_512, TMP26); + + vis_padd16(TMP12, TMP0, TMP12); + vis_mul8x16au(REF_2, CONST_256, TMP28); + + vis_padd16(TMP14, TMP2, TMP14); + vis_mul8x16au(REF_2_1, CONST_256, TMP30); + + vis_padd16(TMP16, TMP4, TMP16); + vis_mul8x16au(REF_6, CONST_256, REF_S4); + + vis_padd16(TMP18, TMP6, TMP18); + vis_mul8x16au(REF_6_1, CONST_256, REF_S6); + + vis_pack16(TMP12, DST_0); + vis_padd16(TMP28, TMP0, TMP12); + + vis_pack16(TMP14, DST_1); + vis_st64(DST_0, dest[0]); + vis_padd16(TMP30, TMP2, TMP14); + + vis_pack16(TMP16, DST_2); + vis_padd16(REF_S4, TMP4, TMP16); + + vis_pack16(TMP18, DST_3); + vis_st64_2(DST_2, dest, 8); + dest += stride; + vis_padd16(REF_S6, TMP6, TMP18); + + vis_padd16(TMP12, TMP20, TMP12); + + vis_padd16(TMP14, TMP22, TMP14); + vis_pack16(TMP12, DST_0); + + vis_padd16(TMP16, TMP24, TMP16); + vis_pack16(TMP14, DST_1); + vis_st64(DST_0, dest[0]); + + vis_padd16(TMP18, TMP26, TMP18); + vis_pack16(TMP16, DST_2); + + vis_pack16(TMP18, DST_3); + vis_st64_2(DST_2, dest, 8); + dest += stride; + } while (--height); +} + +static void MC_avg_y_8_vis (uint8_t * dest, const uint8_t * _ref, + const int stride, int height) +{ + uint8_t *ref = (uint8_t *) _ref; + int stride_8; + int offset; + + vis_set_gsr(5 << VIS_GSR_SCALEFACT_SHIFT); + + ref = vis_alignaddr(ref); + offset = (ref != _ref) ? 8 : 0; + + vis_ld64(ref[ 0], TMP0); + vis_fzero(ZERO); + + vis_ld64_2(ref, offset, TMP2); + stride_8 = stride + offset; + + vis_ld64(constants3[0], CONST_3); + vis_faligndata(TMP0, TMP2, REF_2); + + vis_ld64(constants256_512[0], CONST_256); + + height >>= 1; + do { /* 20 cycles */ + vis_ld64_2(ref, stride, TMP0); + vis_pmerge(ZERO, REF_2, TMP8); + vis_mul8x16au(REF_2_1, CONST_256, TMP10); + + vis_ld64_2(ref, stride_8, TMP2); + ref += stride; + + vis_ld64(dest[0], DST_0); + + vis_ld64_2(dest, stride, DST_2); + vis_faligndata(TMP0, TMP2, REF_0); + + vis_ld64_2(ref, stride, TMP4); + vis_mul8x16al(DST_0, CONST_512, TMP16); + vis_pmerge(ZERO, REF_0, TMP12); + + vis_ld64_2(ref, stride_8, TMP6); + ref += stride; + vis_mul8x16al(DST_1, CONST_512, TMP18); + vis_pmerge(ZERO, REF_0_1, TMP14); + + vis_padd16(TMP12, CONST_3, TMP12); + vis_mul8x16al(DST_2, CONST_512, TMP24); + + vis_padd16(TMP14, CONST_3, TMP14); + vis_mul8x16al(DST_3, CONST_512, TMP26); + + vis_faligndata(TMP4, TMP6, REF_2); + + vis_padd16(TMP8, TMP12, TMP8); + + vis_padd16(TMP10, TMP14, TMP10); + vis_mul8x16au(REF_2, CONST_256, TMP20); + + vis_padd16(TMP8, TMP16, TMP0); + vis_mul8x16au(REF_2_1, CONST_256, TMP22); + + vis_padd16(TMP10, TMP18, TMP2); + vis_pack16(TMP0, DST_0); + + vis_pack16(TMP2, DST_1); + vis_st64(DST_0, dest[0]); + dest += stride; + vis_padd16(TMP12, TMP20, TMP12); + + vis_padd16(TMP14, TMP22, TMP14); + + vis_padd16(TMP12, TMP24, TMP0); + + vis_padd16(TMP14, TMP26, TMP2); + vis_pack16(TMP0, DST_2); + + vis_pack16(TMP2, DST_3); + vis_st64(DST_2, dest[0]); + dest += stride; + } while (--height); +} + +static void MC_put_xy_16_vis (uint8_t * dest, const uint8_t * _ref, + const int stride, int height) +{ + uint8_t *ref = (uint8_t *) _ref; + unsigned long off = (unsigned long) ref & 0x7; + unsigned long off_plus_1 = off + 1; + int stride_8 = stride + 8; + int stride_16 = stride + 16; + + vis_set_gsr(5 << VIS_GSR_SCALEFACT_SHIFT); + + ref = vis_alignaddr(ref); + + vis_ld64(ref[ 0], TMP0); + vis_fzero(ZERO); + + vis_ld64(ref[ 8], TMP2); + + vis_ld64(ref[16], TMP4); + + vis_ld64(constants2[0], CONST_2); + vis_faligndata(TMP0, TMP2, REF_S0); + + vis_ld64(constants256_512[0], CONST_256); + vis_faligndata(TMP2, TMP4, REF_S4); + + if (off != 0x7) { + vis_alignaddr_g0((void *)off_plus_1); + vis_faligndata(TMP0, TMP2, REF_S2); + vis_faligndata(TMP2, TMP4, REF_S6); + } else { + vis_src1(TMP2, REF_S2); + vis_src1(TMP4, REF_S6); + } + + height >>= 1; + do { + vis_ld64_2(ref, stride, TMP0); + vis_mul8x16au(REF_S0, CONST_256, TMP12); + vis_pmerge(ZERO, REF_S0_1, TMP14); + + vis_alignaddr_g0((void *)off); + + vis_ld64_2(ref, stride_8, TMP2); + vis_mul8x16au(REF_S2, CONST_256, TMP16); + vis_pmerge(ZERO, REF_S2_1, TMP18); + + vis_ld64_2(ref, stride_16, TMP4); + ref += stride; + vis_mul8x16au(REF_S4, CONST_256, TMP20); + vis_pmerge(ZERO, REF_S4_1, TMP22); + + vis_ld64_2(ref, stride, TMP6); + vis_mul8x16au(REF_S6, CONST_256, TMP24); + vis_pmerge(ZERO, REF_S6_1, TMP26); + + vis_ld64_2(ref, stride_8, TMP8); + vis_faligndata(TMP0, TMP2, REF_0); + + vis_ld64_2(ref, stride_16, TMP10); + ref += stride; + vis_faligndata(TMP2, TMP4, REF_4); + + vis_faligndata(TMP6, TMP8, REF_S0); + + vis_faligndata(TMP8, TMP10, REF_S4); + + if (off != 0x7) { + vis_alignaddr_g0((void *)off_plus_1); + vis_faligndata(TMP0, TMP2, REF_2); + vis_faligndata(TMP2, TMP4, REF_6); + vis_faligndata(TMP6, TMP8, REF_S2); + vis_faligndata(TMP8, TMP10, REF_S6); + } else { + vis_src1(TMP2, REF_2); + vis_src1(TMP4, REF_6); + vis_src1(TMP8, REF_S2); + vis_src1(TMP10, REF_S6); + } + + vis_mul8x16au(REF_0, CONST_256, TMP0); + vis_pmerge(ZERO, REF_0_1, TMP2); + + vis_mul8x16au(REF_2, CONST_256, TMP4); + vis_pmerge(ZERO, REF_2_1, TMP6); + + vis_padd16(TMP0, CONST_2, TMP8); + vis_mul8x16au(REF_4, CONST_256, TMP0); + + vis_padd16(TMP2, CONST_2, TMP10); + vis_mul8x16au(REF_4_1, CONST_256, TMP2); + + vis_padd16(TMP8, TMP4, TMP8); + vis_mul8x16au(REF_6, CONST_256, TMP4); + + vis_padd16(TMP10, TMP6, TMP10); + vis_mul8x16au(REF_6_1, CONST_256, TMP6); + + vis_padd16(TMP12, TMP8, TMP12); + + vis_padd16(TMP14, TMP10, TMP14); + + vis_padd16(TMP12, TMP16, TMP12); + + vis_padd16(TMP14, TMP18, TMP14); + vis_pack16(TMP12, DST_0); + + vis_pack16(TMP14, DST_1); + vis_st64(DST_0, dest[0]); + vis_padd16(TMP0, CONST_2, TMP12); + + vis_mul8x16au(REF_S0, CONST_256, TMP0); + vis_padd16(TMP2, CONST_2, TMP14); + + vis_mul8x16au(REF_S0_1, CONST_256, TMP2); + vis_padd16(TMP12, TMP4, TMP12); + + vis_mul8x16au(REF_S2, CONST_256, TMP4); + vis_padd16(TMP14, TMP6, TMP14); + + vis_mul8x16au(REF_S2_1, CONST_256, TMP6); + vis_padd16(TMP20, TMP12, TMP20); + + vis_padd16(TMP22, TMP14, TMP22); + + vis_padd16(TMP20, TMP24, TMP20); + + vis_padd16(TMP22, TMP26, TMP22); + vis_pack16(TMP20, DST_2); + + vis_pack16(TMP22, DST_3); + vis_st64_2(DST_2, dest, 8); + dest += stride; + vis_padd16(TMP0, TMP4, TMP24); + + vis_mul8x16au(REF_S4, CONST_256, TMP0); + vis_padd16(TMP2, TMP6, TMP26); + + vis_mul8x16au(REF_S4_1, CONST_256, TMP2); + vis_padd16(TMP24, TMP8, TMP24); + + vis_padd16(TMP26, TMP10, TMP26); + vis_pack16(TMP24, DST_0); + + vis_pack16(TMP26, DST_1); + vis_st64(DST_0, dest[0]); + vis_pmerge(ZERO, REF_S6, TMP4); + + vis_pmerge(ZERO, REF_S6_1, TMP6); + + vis_padd16(TMP0, TMP4, TMP0); + + vis_padd16(TMP2, TMP6, TMP2); + + vis_padd16(TMP0, TMP12, TMP0); + + vis_padd16(TMP2, TMP14, TMP2); + vis_pack16(TMP0, DST_2); + + vis_pack16(TMP2, DST_3); + vis_st64_2(DST_2, dest, 8); + dest += stride; + } while (--height); +} + +static void MC_put_xy_8_vis (uint8_t * dest, const uint8_t * _ref, + const int stride, int height) +{ + uint8_t *ref = (uint8_t *) _ref; + unsigned long off = (unsigned long) ref & 0x7; + unsigned long off_plus_1 = off + 1; + int stride_8 = stride + 8; + + vis_set_gsr(5 << VIS_GSR_SCALEFACT_SHIFT); + + ref = vis_alignaddr(ref); + + vis_ld64(ref[ 0], TMP0); + vis_fzero(ZERO); + + vis_ld64(ref[ 8], TMP2); + + vis_ld64(constants2[0], CONST_2); + + vis_ld64(constants256_512[0], CONST_256); + vis_faligndata(TMP0, TMP2, REF_S0); + + if (off != 0x7) { + vis_alignaddr_g0((void *)off_plus_1); + vis_faligndata(TMP0, TMP2, REF_S2); + } else { + vis_src1(TMP2, REF_S2); + } + + height >>= 1; + do { /* 26 cycles */ + vis_ld64_2(ref, stride, TMP0); + vis_mul8x16au(REF_S0, CONST_256, TMP8); + vis_pmerge(ZERO, REF_S2, TMP12); + + vis_alignaddr_g0((void *)off); + + vis_ld64_2(ref, stride_8, TMP2); + ref += stride; + vis_mul8x16au(REF_S0_1, CONST_256, TMP10); + vis_pmerge(ZERO, REF_S2_1, TMP14); + + vis_ld64_2(ref, stride, TMP4); + + vis_ld64_2(ref, stride_8, TMP6); + ref += stride; + vis_faligndata(TMP0, TMP2, REF_S4); + + vis_pmerge(ZERO, REF_S4, TMP18); + + vis_pmerge(ZERO, REF_S4_1, TMP20); + + vis_faligndata(TMP4, TMP6, REF_S0); + + if (off != 0x7) { + vis_alignaddr_g0((void *)off_plus_1); + vis_faligndata(TMP0, TMP2, REF_S6); + vis_faligndata(TMP4, TMP6, REF_S2); + } else { + vis_src1(TMP2, REF_S6); + vis_src1(TMP6, REF_S2); + } + + vis_padd16(TMP18, CONST_2, TMP18); + vis_mul8x16au(REF_S6, CONST_256, TMP22); + + vis_padd16(TMP20, CONST_2, TMP20); + vis_mul8x16au(REF_S6_1, CONST_256, TMP24); + + vis_mul8x16au(REF_S0, CONST_256, TMP26); + vis_pmerge(ZERO, REF_S0_1, TMP28); + + vis_mul8x16au(REF_S2, CONST_256, TMP30); + vis_padd16(TMP18, TMP22, TMP18); + + vis_mul8x16au(REF_S2_1, CONST_256, TMP32); + vis_padd16(TMP20, TMP24, TMP20); + + vis_padd16(TMP8, TMP18, TMP8); + + vis_padd16(TMP10, TMP20, TMP10); + + vis_padd16(TMP8, TMP12, TMP8); + + vis_padd16(TMP10, TMP14, TMP10); + vis_pack16(TMP8, DST_0); + + vis_pack16(TMP10, DST_1); + vis_st64(DST_0, dest[0]); + dest += stride; + vis_padd16(TMP18, TMP26, TMP18); + + vis_padd16(TMP20, TMP28, TMP20); + + vis_padd16(TMP18, TMP30, TMP18); + + vis_padd16(TMP20, TMP32, TMP20); + vis_pack16(TMP18, DST_2); + + vis_pack16(TMP20, DST_3); + vis_st64(DST_2, dest[0]); + dest += stride; + } while (--height); +} + +static void MC_avg_xy_16_vis (uint8_t * dest, const uint8_t * _ref, + const int stride, int height) +{ + uint8_t *ref = (uint8_t *) _ref; + unsigned long off = (unsigned long) ref & 0x7; + unsigned long off_plus_1 = off + 1; + int stride_8 = stride + 8; + int stride_16 = stride + 16; + + vis_set_gsr(4 << VIS_GSR_SCALEFACT_SHIFT); + + ref = vis_alignaddr(ref); + + vis_ld64(ref[ 0], TMP0); + vis_fzero(ZERO); + + vis_ld64(ref[ 8], TMP2); + + vis_ld64(ref[16], TMP4); + + vis_ld64(constants6[0], CONST_6); + vis_faligndata(TMP0, TMP2, REF_S0); + + vis_ld64(constants256_1024[0], CONST_256); + vis_faligndata(TMP2, TMP4, REF_S4); + + if (off != 0x7) { + vis_alignaddr_g0((void *)off_plus_1); + vis_faligndata(TMP0, TMP2, REF_S2); + vis_faligndata(TMP2, TMP4, REF_S6); + } else { + vis_src1(TMP2, REF_S2); + vis_src1(TMP4, REF_S6); + } + + height >>= 1; + do { /* 55 cycles */ + vis_ld64_2(ref, stride, TMP0); + vis_mul8x16au(REF_S0, CONST_256, TMP12); + vis_pmerge(ZERO, REF_S0_1, TMP14); + + vis_alignaddr_g0((void *)off); + + vis_ld64_2(ref, stride_8, TMP2); + vis_mul8x16au(REF_S2, CONST_256, TMP16); + vis_pmerge(ZERO, REF_S2_1, TMP18); + + vis_ld64_2(ref, stride_16, TMP4); + ref += stride; + vis_mul8x16au(REF_S4, CONST_256, TMP20); + vis_pmerge(ZERO, REF_S4_1, TMP22); + + vis_ld64_2(ref, stride, TMP6); + vis_mul8x16au(REF_S6, CONST_256, TMP24); + vis_pmerge(ZERO, REF_S6_1, TMP26); + + vis_ld64_2(ref, stride_8, TMP8); + vis_faligndata(TMP0, TMP2, REF_0); + + vis_ld64_2(ref, stride_16, TMP10); + ref += stride; + vis_faligndata(TMP2, TMP4, REF_4); + + vis_ld64(dest[0], DST_0); + vis_faligndata(TMP6, TMP8, REF_S0); + + vis_ld64_2(dest, 8, DST_2); + vis_faligndata(TMP8, TMP10, REF_S4); + + if (off != 0x7) { + vis_alignaddr_g0((void *)off_plus_1); + vis_faligndata(TMP0, TMP2, REF_2); + vis_faligndata(TMP2, TMP4, REF_6); + vis_faligndata(TMP6, TMP8, REF_S2); + vis_faligndata(TMP8, TMP10, REF_S6); + } else { + vis_src1(TMP2, REF_2); + vis_src1(TMP4, REF_6); + vis_src1(TMP8, REF_S2); + vis_src1(TMP10, REF_S6); + } + + vis_mul8x16al(DST_0, CONST_1024, TMP30); + vis_pmerge(ZERO, REF_0, TMP0); + + vis_mul8x16al(DST_1, CONST_1024, TMP32); + vis_pmerge(ZERO, REF_0_1, TMP2); + + vis_mul8x16au(REF_2, CONST_256, TMP4); + vis_pmerge(ZERO, REF_2_1, TMP6); + + vis_mul8x16al(DST_2, CONST_1024, REF_0); + vis_padd16(TMP0, CONST_6, TMP0); + + vis_mul8x16al(DST_3, CONST_1024, REF_2); + vis_padd16(TMP2, CONST_6, TMP2); + + vis_padd16(TMP0, TMP4, TMP0); + vis_mul8x16au(REF_4, CONST_256, TMP4); + + vis_padd16(TMP2, TMP6, TMP2); + vis_mul8x16au(REF_4_1, CONST_256, TMP6); + + vis_padd16(TMP12, TMP0, TMP12); + vis_mul8x16au(REF_6, CONST_256, TMP8); + + vis_padd16(TMP14, TMP2, TMP14); + vis_mul8x16au(REF_6_1, CONST_256, TMP10); + + vis_padd16(TMP12, TMP16, TMP12); + vis_mul8x16au(REF_S0, CONST_256, REF_4); + + vis_padd16(TMP14, TMP18, TMP14); + vis_mul8x16au(REF_S0_1, CONST_256, REF_6); + + vis_padd16(TMP12, TMP30, TMP12); + + vis_padd16(TMP14, TMP32, TMP14); + vis_pack16(TMP12, DST_0); + + vis_pack16(TMP14, DST_1); + vis_st64(DST_0, dest[0]); + vis_padd16(TMP4, CONST_6, TMP4); + + vis_ld64_2(dest, stride, DST_0); + vis_padd16(TMP6, CONST_6, TMP6); + vis_mul8x16au(REF_S2, CONST_256, TMP12); + + vis_padd16(TMP4, TMP8, TMP4); + vis_mul8x16au(REF_S2_1, CONST_256, TMP14); + + vis_padd16(TMP6, TMP10, TMP6); + + vis_padd16(TMP20, TMP4, TMP20); + + vis_padd16(TMP22, TMP6, TMP22); + + vis_padd16(TMP20, TMP24, TMP20); + + vis_padd16(TMP22, TMP26, TMP22); + + vis_padd16(TMP20, REF_0, TMP20); + vis_mul8x16au(REF_S4, CONST_256, REF_0); + + vis_padd16(TMP22, REF_2, TMP22); + vis_pack16(TMP20, DST_2); + + vis_pack16(TMP22, DST_3); + vis_st64_2(DST_2, dest, 8); + dest += stride; + + vis_ld64_2(dest, 8, DST_2); + vis_mul8x16al(DST_0, CONST_1024, TMP30); + vis_pmerge(ZERO, REF_S4_1, REF_2); + + vis_mul8x16al(DST_1, CONST_1024, TMP32); + vis_padd16(REF_4, TMP0, TMP8); + + vis_mul8x16au(REF_S6, CONST_256, REF_4); + vis_padd16(REF_6, TMP2, TMP10); + + vis_mul8x16au(REF_S6_1, CONST_256, REF_6); + vis_padd16(TMP8, TMP12, TMP8); + + vis_padd16(TMP10, TMP14, TMP10); + + vis_padd16(TMP8, TMP30, TMP8); + + vis_padd16(TMP10, TMP32, TMP10); + vis_pack16(TMP8, DST_0); + + vis_pack16(TMP10, DST_1); + vis_st64(DST_0, dest[0]); + + vis_padd16(REF_0, TMP4, REF_0); + + vis_mul8x16al(DST_2, CONST_1024, TMP30); + vis_padd16(REF_2, TMP6, REF_2); + + vis_mul8x16al(DST_3, CONST_1024, TMP32); + vis_padd16(REF_0, REF_4, REF_0); + + vis_padd16(REF_2, REF_6, REF_2); + + vis_padd16(REF_0, TMP30, REF_0); + + /* stall */ + + vis_padd16(REF_2, TMP32, REF_2); + vis_pack16(REF_0, DST_2); + + vis_pack16(REF_2, DST_3); + vis_st64_2(DST_2, dest, 8); + dest += stride; + } while (--height); +} + +static void MC_avg_xy_8_vis (uint8_t * dest, const uint8_t * _ref, + const int stride, int height) +{ + uint8_t *ref = (uint8_t *) _ref; + unsigned long off = (unsigned long) ref & 0x7; + unsigned long off_plus_1 = off + 1; + int stride_8 = stride + 8; + + vis_set_gsr(4 << VIS_GSR_SCALEFACT_SHIFT); + + ref = vis_alignaddr(ref); + + vis_ld64(ref[0], TMP0); + vis_fzero(ZERO); + + vis_ld64_2(ref, 8, TMP2); + + vis_ld64(constants6[0], CONST_6); + + vis_ld64(constants256_1024[0], CONST_256); + vis_faligndata(TMP0, TMP2, REF_S0); + + if (off != 0x7) { + vis_alignaddr_g0((void *)off_plus_1); + vis_faligndata(TMP0, TMP2, REF_S2); + } else { + vis_src1(TMP2, REF_S2); + } + + height >>= 1; + do { /* 31 cycles */ + vis_ld64_2(ref, stride, TMP0); + vis_mul8x16au(REF_S0, CONST_256, TMP8); + vis_pmerge(ZERO, REF_S0_1, TMP10); + + vis_ld64_2(ref, stride_8, TMP2); + ref += stride; + vis_mul8x16au(REF_S2, CONST_256, TMP12); + vis_pmerge(ZERO, REF_S2_1, TMP14); + + vis_alignaddr_g0((void *)off); + + vis_ld64_2(ref, stride, TMP4); + vis_faligndata(TMP0, TMP2, REF_S4); + + vis_ld64_2(ref, stride_8, TMP6); + ref += stride; + + vis_ld64(dest[0], DST_0); + vis_faligndata(TMP4, TMP6, REF_S0); + + vis_ld64_2(dest, stride, DST_2); + + if (off != 0x7) { + vis_alignaddr_g0((void *)off_plus_1); + vis_faligndata(TMP0, TMP2, REF_S6); + vis_faligndata(TMP4, TMP6, REF_S2); + } else { + vis_src1(TMP2, REF_S6); + vis_src1(TMP6, REF_S2); + } + + vis_mul8x16al(DST_0, CONST_1024, TMP30); + vis_pmerge(ZERO, REF_S4, TMP22); + + vis_mul8x16al(DST_1, CONST_1024, TMP32); + vis_pmerge(ZERO, REF_S4_1, TMP24); + + vis_mul8x16au(REF_S6, CONST_256, TMP26); + vis_pmerge(ZERO, REF_S6_1, TMP28); + + vis_mul8x16au(REF_S0, CONST_256, REF_S4); + vis_padd16(TMP22, CONST_6, TMP22); + + vis_mul8x16au(REF_S0_1, CONST_256, REF_S6); + vis_padd16(TMP24, CONST_6, TMP24); + + vis_mul8x16al(DST_2, CONST_1024, REF_0); + vis_padd16(TMP22, TMP26, TMP22); + + vis_mul8x16al(DST_3, CONST_1024, REF_2); + vis_padd16(TMP24, TMP28, TMP24); + + vis_mul8x16au(REF_S2, CONST_256, TMP26); + vis_padd16(TMP8, TMP22, TMP8); + + vis_mul8x16au(REF_S2_1, CONST_256, TMP28); + vis_padd16(TMP10, TMP24, TMP10); + + vis_padd16(TMP8, TMP12, TMP8); + + vis_padd16(TMP10, TMP14, TMP10); + + vis_padd16(TMP8, TMP30, TMP8); + + vis_padd16(TMP10, TMP32, TMP10); + vis_pack16(TMP8, DST_0); + + vis_pack16(TMP10, DST_1); + vis_st64(DST_0, dest[0]); + dest += stride; + + vis_padd16(REF_S4, TMP22, TMP12); + + vis_padd16(REF_S6, TMP24, TMP14); + + vis_padd16(TMP12, TMP26, TMP12); + + vis_padd16(TMP14, TMP28, TMP14); + + vis_padd16(TMP12, REF_0, TMP12); + + vis_padd16(TMP14, REF_2, TMP14); + vis_pack16(TMP12, DST_2); + + vis_pack16(TMP14, DST_3); + vis_st64(DST_2, dest[0]); + dest += stride; + } while (--height); +} + +MPEG2_MC_EXTERN(vis); + +#endif /* defined(ARCH_SPARC) && defined(ENABLE_VIS) */ diff --git a/src/video_dec/libmpeg2new/libmpeg2/mpeg2_internal.h b/src/video_dec/libmpeg2new/libmpeg2/mpeg2_internal.h new file mode 100644 index 000000000..fec7d4744 --- /dev/null +++ b/src/video_dec/libmpeg2new/libmpeg2/mpeg2_internal.h @@ -0,0 +1,302 @@ +/* + * mpeg2_internal.h + * Copyright (C) 2000-2003 Michel Lespinasse <walken@zoy.org> + * Copyright (C) 1999-2000 Aaron Holtzman <aholtzma@ess.engr.uvic.ca> + * + * This file is part of mpeg2dec, a free MPEG-2 video stream decoder. + * See http://libmpeg2.sourceforge.net/ for updates. + * + * mpeg2dec is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * mpeg2dec is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +#define STATE_INTERNAL_NORETURN ((mpeg2_state_t)-1) + +/* macroblock modes */ +#define MACROBLOCK_INTRA 1 +#define MACROBLOCK_PATTERN 2 +#define MACROBLOCK_MOTION_BACKWARD 4 +#define MACROBLOCK_MOTION_FORWARD 8 +#define MACROBLOCK_QUANT 16 +#define DCT_TYPE_INTERLACED 32 +/* motion_type */ +#define MOTION_TYPE_SHIFT 6 +#define MC_FIELD 1 +#define MC_FRAME 2 +#define MC_16X8 2 +#define MC_DMV 3 + +/* picture structure */ +#define TOP_FIELD 1 +#define BOTTOM_FIELD 2 +#define FRAME_PICTURE 3 + +/* picture coding type */ +#define I_TYPE 1 +#define P_TYPE 2 +#define B_TYPE 3 +#define D_TYPE 4 + +typedef void mpeg2_mc_fct (uint8_t *, const uint8_t *, int, int); + +typedef struct { + uint8_t * ref[2][3]; + uint8_t ** ref2[2]; + int pmv[2][2]; + int f_code[2]; +} motion_t; + +typedef void motion_parser_t (mpeg2_decoder_t * decoder, + motion_t * motion, + mpeg2_mc_fct * const * table); + +struct mpeg2_decoder_s { + /* first, state that carries information from one macroblock to the */ + /* next inside a slice, and is never used outside of mpeg2_slice() */ + + /* bit parsing stuff */ + uint32_t bitstream_buf; /* current 32 bit working set */ + int bitstream_bits; /* used bits in working set */ + const uint8_t * bitstream_ptr; /* buffer with stream data */ + + uint8_t * dest[3]; + + int offset; + int stride; + int uv_stride; + int slice_stride; + int slice_uv_stride; + int stride_frame; + unsigned int limit_x; + unsigned int limit_y_16; + unsigned int limit_y_8; + unsigned int limit_y; + + /* Motion vectors */ + /* The f_ and b_ correspond to the forward and backward motion */ + /* predictors */ + motion_t b_motion; + motion_t f_motion; + motion_parser_t * motion_parser[5]; + + /* predictor for DC coefficients in intra blocks */ + int16_t dc_dct_pred[3]; + + /* DCT coefficients */ + int16_t DCTblock[64] ATTR_ALIGN(64); + + uint8_t * picture_dest[3]; + void (* convert) (void * convert_id, uint8_t * const * src, + unsigned int v_offset); + void * convert_id; + + int dmv_offset; + unsigned int v_offset; + + /* now non-slice-specific information */ + + /* sequence header stuff */ + uint16_t * quantizer_matrix[4]; + uint16_t (* chroma_quantizer[2])[64]; + uint16_t quantizer_prescale[4][32][64]; + + /* The width and height of the picture snapped to macroblock units */ + int width; + int height; + int vertical_position_extension; + int chroma_format; + + /* picture header stuff */ + + /* what type of picture this is (I, P, B, D) */ + int coding_type; + + /* picture coding extension stuff */ + + /* quantization factor for intra dc coefficients */ + int intra_dc_precision; + /* top/bottom/both fields */ + int picture_structure; + /* bool to indicate all predictions are frame based */ + int frame_pred_frame_dct; + /* bool to indicate whether intra blocks have motion vectors */ + /* (for concealment) */ + int concealment_motion_vectors; + /* bool to use different vlc tables */ + int intra_vlc_format; + /* used for DMV MC */ + int top_field_first; + + /* stuff derived from bitstream */ + + /* pointer to the zigzag scan we're supposed to be using */ + const uint8_t * scan; + + int second_field; + + int mpeg1; +}; + +typedef struct { + mpeg2_fbuf_t fbuf; +} fbuf_alloc_t; + +struct mpeg2dec_s { + mpeg2_decoder_t decoder; + + mpeg2_info_t info; + + uint32_t shift; + int is_display_initialized; + mpeg2_state_t (* action) (struct mpeg2dec_s * mpeg2dec); + mpeg2_state_t state; + uint32_t ext_state; + + /* allocated in init - gcc has problems allocating such big structures */ + uint8_t * chunk_buffer; + /* pointer to start of the current chunk */ + uint8_t * chunk_start; + /* pointer to current position in chunk_buffer */ + uint8_t * chunk_ptr; + /* last start code ? */ + uint8_t code; + + /* picture tags */ + uint32_t tag_current, tag2_current, tag_previous, tag2_previous; + int num_tags; + int bytes_since_tag; + + int first; + int alloc_index_user; + int alloc_index; + uint8_t first_decode_slice; + uint8_t nb_decode_slices; + + unsigned int user_data_len; + + mpeg2_sequence_t new_sequence; + mpeg2_sequence_t sequence; + mpeg2_gop_t new_gop; + mpeg2_gop_t gop; + mpeg2_picture_t new_picture; + mpeg2_picture_t pictures[4]; + mpeg2_picture_t * picture; + /*const*/ mpeg2_fbuf_t * fbuf[3]; /* 0: current fbuf, 1-2: prediction fbufs */ + + fbuf_alloc_t fbuf_alloc[3]; + int custom_fbuf; + + uint8_t * yuv_buf[3][3]; + int yuv_index; + mpeg2_convert_t * convert; + void * convert_arg; + unsigned int convert_id_size; + int convert_stride; + void (* convert_start) (void * id, const mpeg2_fbuf_t * fbuf, + const mpeg2_picture_t * picture, + const mpeg2_gop_t * gop); + + uint8_t * buf_start; + uint8_t * buf_end; + + int16_t display_offset_x, display_offset_y; + + int copy_matrix; + int8_t q_scale_type, scaled[4]; + uint8_t quantizer_matrix[4][64]; + uint8_t new_quantizer_matrix[4][64]; +}; + +typedef struct { +#ifdef ARCH_PPC + uint8_t regv[12*16]; +#endif + int dummy; +} cpu_state_t; + +/* cpu_accel.c */ +uint32_t mpeg2_detect_accel (uint32_t accel); + +/* cpu_state.c */ +void mpeg2_cpu_state_init (uint32_t accel); + +/* decode.c */ +mpeg2_state_t mpeg2_seek_header (mpeg2dec_t * mpeg2dec); +mpeg2_state_t mpeg2_parse_header (mpeg2dec_t * mpeg2dec); + +/* header.c */ +void mpeg2_header_state_init (mpeg2dec_t * mpeg2dec); +void mpeg2_reset_info (mpeg2_info_t * info); +int mpeg2_header_sequence (mpeg2dec_t * mpeg2dec); +int mpeg2_header_gop (mpeg2dec_t * mpeg2dec); +mpeg2_state_t mpeg2_header_picture_start (mpeg2dec_t * mpeg2dec); +int mpeg2_header_picture (mpeg2dec_t * mpeg2dec); +int mpeg2_header_extension (mpeg2dec_t * mpeg2dec); +int mpeg2_header_user_data (mpeg2dec_t * mpeg2dec); +void mpeg2_header_sequence_finalize (mpeg2dec_t * mpeg2dec); +void mpeg2_header_gop_finalize (mpeg2dec_t * mpeg2dec); +void mpeg2_header_picture_finalize (mpeg2dec_t * mpeg2dec, uint32_t accels); +mpeg2_state_t mpeg2_header_slice_start (mpeg2dec_t * mpeg2dec); +mpeg2_state_t mpeg2_header_end (mpeg2dec_t * mpeg2dec); +void mpeg2_set_fbuf (mpeg2dec_t * mpeg2dec, int b_type); + +/* idct.c */ +void mpeg2_idct_init (uint32_t accel); + +/* idct_mmx.c */ +void mpeg2_idct_copy_mmxext (int16_t * block, uint8_t * dest, int stride); +void mpeg2_idct_add_mmxext (int last, int16_t * block, + uint8_t * dest, int stride); +void mpeg2_idct_copy_mmx (int16_t * block, uint8_t * dest, int stride); +void mpeg2_idct_add_mmx (int last, int16_t * block, + uint8_t * dest, int stride); +void mpeg2_idct_mmx_init (void); + +/* idct_altivec.c */ +void mpeg2_idct_copy_altivec (int16_t * block, uint8_t * dest, int stride); +void mpeg2_idct_add_altivec (int last, int16_t * block, + uint8_t * dest, int stride); +void mpeg2_idct_altivec_init (void); + +/* idct_alpha.c */ +void mpeg2_idct_copy_mvi (int16_t * block, uint8_t * dest, int stride); +void mpeg2_idct_add_mvi (int last, int16_t * block, + uint8_t * dest, int stride); +void mpeg2_idct_copy_alpha (int16_t * block, uint8_t * dest, int stride); +void mpeg2_idct_add_alpha (int last, int16_t * block, + uint8_t * dest, int stride); +void mpeg2_idct_alpha_init (void); + +/* motion_comp.c */ +void mpeg2_mc_init (uint32_t accel); + +typedef struct { + mpeg2_mc_fct * put [8]; + mpeg2_mc_fct * avg [8]; +} mpeg2_mc_t; + +#define MPEG2_MC_EXTERN(x) mpeg2_mc_t mpeg2_mc_##x = { \ + {MC_put_o_16_##x, MC_put_x_16_##x, MC_put_y_16_##x, MC_put_xy_16_##x, \ + MC_put_o_8_##x, MC_put_x_8_##x, MC_put_y_8_##x, MC_put_xy_8_##x}, \ + {MC_avg_o_16_##x, MC_avg_x_16_##x, MC_avg_y_16_##x, MC_avg_xy_16_##x, \ + MC_avg_o_8_##x, MC_avg_x_8_##x, MC_avg_y_8_##x, MC_avg_xy_8_##x} \ +}; + +extern mpeg2_mc_t mpeg2_mc_c; +extern mpeg2_mc_t mpeg2_mc_mmx; +extern mpeg2_mc_t mpeg2_mc_mmxext; +extern mpeg2_mc_t mpeg2_mc_3dnow; +extern mpeg2_mc_t mpeg2_mc_altivec; +extern mpeg2_mc_t mpeg2_mc_alpha; +extern mpeg2_mc_t mpeg2_mc_vis; diff --git a/src/video_dec/libmpeg2new/libmpeg2/rgb.c b/src/video_dec/libmpeg2new/libmpeg2/rgb.c new file mode 100644 index 000000000..e4abcacc2 --- /dev/null +++ b/src/video_dec/libmpeg2new/libmpeg2/rgb.c @@ -0,0 +1,598 @@ +/* + * rgb.c + * Copyright (C) 2000-2003 Michel Lespinasse <walken@zoy.org> + * Copyright (C) 1999-2000 Aaron Holtzman <aholtzma@ess.engr.uvic.ca> + * + * This file is part of mpeg2dec, a free MPEG-2 video stream decoder. + * See http://libmpeg2.sourceforge.net/ for updates. + * + * mpeg2dec is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * mpeg2dec is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +#include "config.h" +#include <xine/attributes.h> + +#include <inttypes.h> + +#include "mpeg2.h" +#include "mpeg2convert.h" +#include "convert_internal.h" + +static int matrix_coefficients = 6; + +static const int Inverse_Table_6_9[8][4] = { + {117504, 138453, 13954, 34903}, /* no sequence_display_extension */ + {117504, 138453, 13954, 34903}, /* ITU-R Rec. 709 (1990) */ + {104597, 132201, 25675, 53279}, /* unspecified */ + {104597, 132201, 25675, 53279}, /* reserved */ + {104448, 132798, 24759, 53109}, /* FCC */ + {104597, 132201, 25675, 53279}, /* ITU-R Rec. 624-4 System B, G */ + {104597, 132201, 25675, 53279}, /* SMPTE 170M */ + {117579, 136230, 16907, 35559} /* SMPTE 240M (1987) */ +}; + +static const uint8_t dither[] ATTR_ALIGN(32) = { + 0, 0, 23, 54, 5, 13, 29, 68, 1, 3, 24, 58, 7, 17, 30, 71, + 0, 0, 23, 54, 5, 13, 29, 68, 1, 3, 24, 58, 7, 17, 30, 71, + 0, 0, 23, 54, 5, 13, 29, 68, 1, 3, 24, 58, 7, 17, 30, 71, + 0, 0, 23, 54, 5, 13, 29, 68, 1, 3, 24, 58, 7, 17, 30, 71, + 15, 36, 7, 18, 21, 50, 13, 31, 17, 39, 9, 21, 22, 53, 15, 35, + 15, 36, 7, 18, 21, 50, 13, 31, 17, 39, 9, 21, 22, 53, 15, 35, + 15, 36, 7, 18, 21, 50, 13, 31, 17, 39, 9, 21, 22, 53, 15, 35, + 15, 36, 7, 18, 21, 50, 13, 31, 17, 39, 9, 21, 22, 53, 15, 35, + 3, 9, 27, 63, 1, 4, 25, 59, 5, 12, 28, 67, 3, 7, 26, 62, + 3, 9, 27, 63, 1, 4, 25, 59, 5, 12, 28, 67, 3, 7, 26, 62, + 3, 9, 27, 63, 1, 4, 25, 59, 5, 12, 28, 67, 3, 7, 26, 62, + 3, 9, 27, 63, 1, 4, 25, 59, 5, 12, 28, 67, 3, 7, 26, 62, + 19, 45, 11, 27, 17, 41, 9, 22, 21, 49, 13, 30, 19, 44, 11, 26, + 19, 45, 11, 27, 17, 41, 9, 22, 21, 49, 13, 30, 19, 44, 11, 26, + 19, 45, 11, 27, 17, 41, 9, 22, 21, 49, 13, 30, 19, 44, 11, 26, + 19, 45, 11, 27, 17, 41, 9, 22, 21, 49, 13, 30, 19, 44, 11, 26, + 0, 2, 24, 57, 6, 15, 30, 70, 0, 1, 23, 55, 6, 14, 29, 69, + 0, 2, 24, 57, 6, 15, 30, 70, 0, 1, 23, 55, 6, 14, 29, 69, + 0, 2, 24, 57, 6, 15, 30, 70, 0, 1, 23, 55, 6, 14, 29, 69, + 0, 2, 24, 57, 6, 15, 30, 70, 0, 1, 23, 55, 6, 14, 29, 69, + 16, 38, 8, 20, 22, 52, 14, 34, 16, 37, 8, 19, 21, 51, 14, 33, + 16, 38, 8, 20, 22, 52, 14, 34, 16, 37, 8, 19, 21, 51, 14, 33, + 16, 38, 8, 20, 22, 52, 14, 34, 16, 37, 8, 19, 21, 51, 14, 33, + 16, 38, 8, 20, 22, 52, 14, 34, 16, 37, 8, 19, 21, 51, 14, 33, + 4, 11, 28, 66, 2, 6, 26, 61, 4, 10, 27, 65, 2, 5, 25, 60, + 4, 11, 28, 66, 2, 6, 26, 61, 4, 10, 27, 65, 2, 5, 25, 60, + 4, 11, 28, 66, 2, 6, 26, 61, 4, 10, 27, 65, 2, 5, 25, 60, + 4, 11, 28, 66, 2, 6, 26, 61, 4, 10, 27, 65, 2, 5, 25, 60, + 20, 47, 12, 29, 18, 43, 10, 25, 20, 46, 12, 28, 18, 42, 10, 23, + 20, 47, 12, 29, 18, 43, 10, 25, 20, 46, 12, 28, 18, 42, 10, 23, + 20, 47, 12, 29, 18, 43, 10, 25, 20, 46, 12, 28, 18, 42, 10, 23, + 20, 47, 12, 29, 18, 43, 10, 25, 20, 46, 12, 28, 18, 42, 10, 23, + 0, 0, 23, 54, 5, 13, 29, 68, 1, 3, 24, 58, 7, 17, 30, 71, + 0, 0, 23, 54, 5, 13, 29, 68, 1, 3, 24, 58, 7, 17, 30, 71, + 0, 0, 23, 54, 5, 13, 29, 68, 1, 3, 24, 58, 7, 17, 30, 71, + 0, 0, 23, 54, 5, 13, 29, 68, 1, 3, 24, 58, 7, 17, 30, 71, + 15, 36, 7, 18, 21, 50, 13, 31, 17, 39, 9, 21, 22, 53, 15, 35, + 15, 36, 7, 18, 21, 50, 13, 31, 17, 39, 9, 21, 22, 53, 15, 35 +}; + +static const uint8_t dither_temporal[64] = { + 0x00, 0x20, 0x21, 0x01, 0x40, 0x60, 0x61, 0x41, + 0x42, 0x62, 0x63, 0x43, 0x02, 0x22, 0x23, 0x03, + 0x80, 0xa0, 0xa1, 0x81, 0xc0, 0xe0, 0xe1, 0xc1, + 0xc2, 0xe2, 0xe3, 0xc3, 0x82, 0xa2, 0xa3, 0x83, + 0x84, 0xa4, 0xa5, 0x85, 0xc4, 0xe4, 0xe5, 0xc5, + 0xc6, 0xe6, 0xe7, 0xc7, 0x86, 0xa6, 0xa7, 0x87, + 0x04, 0x24, 0x25, 0x05, 0x44, 0x64, 0x65, 0x45, + 0x46, 0x66, 0x67, 0x47, 0x06, 0x26, 0x27, 0x07 +}; + +typedef struct { + convert_rgb_t base; + void * table_rV[256]; + void * table_gU[256]; + int table_gV[256]; + void * table_bU[256]; +} convert_rgb_c_t; + +#define RGB(type,i) \ + U = pu[i]; \ + V = pv[i]; \ + r = (type *) id->table_rV[V]; \ + g = (type *) (((uint8_t *)id->table_gU[U]) + id->table_gV[V]); \ + b = (type *) id->table_bU[U]; + +#define DST(py,dst,i,j) \ + Y = py[i]; \ + dst[i] = r[Y] + g[Y] + b[Y]; + +#define DSTRGB(py,dst,i,j) \ + Y = py[i]; \ + dst[3*i] = r[Y]; dst[3*i+1] = g[Y]; dst[3*i+2] = b[Y]; + +#define DSTBGR(py,dst,i,j) \ + Y = py[i]; \ + dst[3*i] = b[Y]; dst[3*i+1] = g[Y]; dst[3*i+2] = r[Y]; + +#define DSTDITHER(py,dst,i,j) \ + Y = py[i]; \ + dst[i] = r[Y+pd[2*i+96*j]] + g[Y-pd[2*i+96*j]] + b[Y+pd[2*i+1+96*j]]; + +#define DO(x) x +#define SKIP(x) + +#define DECLARE_420(func,type,num,DST,DITHER) \ +static void func (void * _id, uint8_t * const * src, \ + unsigned int v_offset) \ +{ \ + const convert_rgb_c_t * const id = (convert_rgb_c_t *) _id; \ + type * dst_1; \ + const uint8_t * py_1, * pu, * pv; \ + int i; \ + DITHER(uint8_t dithpos = id->base.dither_offset;) \ + \ + dst_1 = (type *)(id->base.rgb_ptr + id->base.rgb_slice * v_offset); \ + py_1 = src[0]; pu = src[1]; pv = src[2]; \ + \ + i = 8; \ + do { \ + const uint8_t * py_2; \ + int j, U, V, Y; \ + const type * r, * g, * b; \ + type * dst_2; \ + DITHER(const uint8_t * const pd = dither + 2 * dithpos;) \ + \ + dst_2 = (type *)((char *)dst_1 + id->base.rgb_stride); \ + py_2 = py_1 + id->base.y_stride; \ + j = id->base.width; \ + do { \ + RGB (type, 0) \ + DST (py_1, dst_1, 0, 0) \ + DST (py_1, dst_1, 1, 0) \ + DST (py_2, dst_2, 0, 1) \ + DST (py_2, dst_2, 1, 1) \ + \ + RGB (type, 1) \ + DST (py_2, dst_2, 2, 1) \ + DST (py_2, dst_2, 3, 1) \ + DST (py_1, dst_1, 2, 0) \ + DST (py_1, dst_1, 3, 0) \ + \ + RGB (type, 2) \ + DST (py_1, dst_1, 4, 0) \ + DST (py_1, dst_1, 5, 0) \ + DST (py_2, dst_2, 4, 1) \ + DST (py_2, dst_2, 5, 1) \ + \ + RGB (type, 3) \ + DST (py_2, dst_2, 6, 1) \ + DST (py_2, dst_2, 7, 1) \ + DST (py_1, dst_1, 6, 0) \ + DST (py_1, dst_1, 7, 0) \ + \ + pu += 4; \ + pv += 4; \ + py_1 += 8; \ + py_2 += 8; \ + dst_1 += 8 * num; \ + dst_2 += 8 * num; \ + } while (--j); \ + if (--i == id->base.field) { \ + dst_1 = (type *)(id->base.rgb_ptr + \ + id->base.rgb_slice * (v_offset + 1)); \ + py_1 = src[0] + id->base.y_stride_frame; \ + pu = src[1] + id->base.uv_stride_frame; \ + pv = src[2] + id->base.uv_stride_frame; \ + } else { \ + py_1 += id->base.y_increm; \ + pu += id->base.uv_increm; \ + pv += id->base.uv_increm; \ + dst_1 = (type *)((char *)dst_1 + id->base.rgb_increm); \ + DITHER(dithpos += id->base.dither_stride;) \ + } \ + } while (i); \ +} + +DECLARE_420 (rgb_c_32_420, uint32_t, 1, DST, SKIP) +DECLARE_420 (rgb_c_24_rgb_420, uint8_t, 3, DSTRGB, SKIP) +DECLARE_420 (rgb_c_24_bgr_420, uint8_t, 3, DSTBGR, SKIP) +DECLARE_420 (rgb_c_16_420, uint16_t, 1, DST, SKIP) +DECLARE_420 (rgb_c_8_420, uint8_t, 1, DSTDITHER, DO) + +#define DECLARE_422(func,type,num,DST,DITHER) \ +static void func (void * _id, uint8_t * const * src, \ + unsigned int v_offset) \ +{ \ + const convert_rgb_c_t * const id = (convert_rgb_c_t *) _id; \ + type * dst; \ + const uint8_t * py, * pu, * pv; \ + int i; \ + DITHER(uint8_t dithpos = id->base.dither_offset;) \ + \ + dst = (type *)(id->base.rgb_ptr + id->base.rgb_stride * v_offset); \ + py = src[0]; pu = src[1]; pv = src[2]; \ + \ + i = 16; \ + do { \ + int j, U, V, Y; \ + const type * r, * g, * b; \ + DITHER(const uint8_t * const pd = dither + 2 * dithpos;) \ + \ + j = id->base.width; \ + do { \ + RGB (type, 0) \ + DST (py, dst, 0, 0) \ + DST (py, dst, 1, 0) \ + \ + RGB (type, 1) \ + DST (py, dst, 2, 0) \ + DST (py, dst, 3, 0) \ + \ + RGB (type, 2) \ + DST (py, dst, 4, 0) \ + DST (py, dst, 5, 0) \ + \ + RGB (type, 3) \ + DST (py, dst, 6, 0) \ + DST (py, dst, 7, 0) \ + \ + pu += 4; \ + pv += 4; \ + py += 8; \ + dst += 8 * num; \ + } while (--j); \ + py += id->base.y_increm; \ + pu += id->base.uv_increm; \ + pv += id->base.uv_increm; \ + dst = (type *)((char *)dst + id->base.rgb_increm); \ + DITHER(dithpos += id->base.dither_stride;) \ + } while (--i); \ +} + +DECLARE_422 (rgb_c_32_422, uint32_t, 1, DST, SKIP) +DECLARE_422 (rgb_c_24_rgb_422, uint8_t, 3, DSTRGB, SKIP) +DECLARE_422 (rgb_c_24_bgr_422, uint8_t, 3, DSTBGR, SKIP) +DECLARE_422 (rgb_c_16_422, uint16_t, 1, DST, SKIP) +DECLARE_422 (rgb_c_8_422, uint8_t, 1, DSTDITHER, DO) + +#define DECLARE_444(func,type,num,DST,DITHER) \ +static void func (void * _id, uint8_t * const * src, \ + unsigned int v_offset) \ +{ \ + const convert_rgb_c_t * const id = (convert_rgb_c_t *) _id; \ + type * dst; \ + const uint8_t * py, * pu, * pv; \ + int i; \ + DITHER(uint8_t dithpos = id->base.dither_offset;) \ + \ + dst = (type *)(id->base.rgb_ptr + id->base.rgb_stride * v_offset); \ + py = src[0]; pu = src[1]; pv = src[2]; \ + \ + i = 16; \ + do { \ + int j, U, V, Y; \ + const type * r, * g, * b; \ + DITHER(const uint8_t * const pd = dither + 2 * dithpos;) \ + \ + j = id->base.width; \ + do { \ + RGB (type, 0) \ + DST (py, dst, 0, 0) \ + RGB (type, 1) \ + DST (py, dst, 1, 0) \ + RGB (type, 2) \ + DST (py, dst, 2, 0) \ + RGB (type, 3) \ + DST (py, dst, 3, 0) \ + RGB (type, 4) \ + DST (py, dst, 4, 0) \ + RGB (type, 5) \ + DST (py, dst, 5, 0) \ + RGB (type, 6) \ + DST (py, dst, 6, 0) \ + RGB (type, 7) \ + DST (py, dst, 7, 0) \ + \ + pu += 8; \ + pv += 8; \ + py += 8; \ + dst += 8 * num; \ + } while (--j); \ + py += id->base.y_increm; \ + pu += id->base.y_increm; \ + pv += id->base.y_increm; \ + dst = (type *)((char *)dst + id->base.rgb_increm); \ + DITHER(dithpos += id->base.dither_stride;) \ + } while (--i); \ +} + +DECLARE_444 (rgb_c_32_444, uint32_t, 1, DST, SKIP) +DECLARE_444 (rgb_c_24_rgb_444, uint8_t, 3, DSTRGB, SKIP) +DECLARE_444 (rgb_c_24_bgr_444, uint8_t, 3, DSTBGR, SKIP) +DECLARE_444 (rgb_c_16_444, uint16_t, 1, DST, SKIP) +DECLARE_444 (rgb_c_8_444, uint8_t, 1, DSTDITHER, DO) + +static void rgb_start (void * _id, const mpeg2_fbuf_t * fbuf, + const mpeg2_picture_t * picture, + const mpeg2_gop_t * gop) +{ + convert_rgb_t * id = (convert_rgb_t *) _id; + int uv_stride = id->uv_stride_frame; + id->y_stride = id->y_stride_frame; + id->rgb_ptr = fbuf->buf[0]; + id->rgb_slice = id->rgb_stride = id->rgb_stride_frame; + id->dither_stride = 32; + id->dither_offset = dither_temporal[picture->temporal_reference & 63]; + id->field = 0; + if ((picture->nb_fields == 1) || + (id->chroma420 && !(picture->flags & PIC_FLAG_PROGRESSIVE_FRAME))) { + uv_stride <<= 1; + id->y_stride <<= 1; + id->rgb_stride <<= 1; + id->dither_stride <<= 1; + id->dither_offset += 16; + if (picture->nb_fields == 1) { + id->rgb_slice <<= 1; + if (!(picture->flags & PIC_FLAG_TOP_FIELD_FIRST)) { + id->rgb_ptr += id->rgb_stride_frame; + id->dither_offset += 32; + } + } else + id->field = 8 >> id->convert420; + } + id->y_increm = (id->y_stride << id->convert420) - id->y_stride_frame; + id->uv_increm = uv_stride - id->uv_stride_frame; + id->rgb_increm = (id->rgb_stride << id->convert420) - id->rgb_stride_min; + id->dither_stride <<= id->convert420; +} + +static inline int div_round (int dividend, int divisor) +{ + if (dividend > 0) + return (dividend + (divisor>>1)) / divisor; + else + return -((-dividend + (divisor>>1)) / divisor); +} + +static unsigned int rgb_c_init (convert_rgb_c_t * id, + mpeg2convert_rgb_order_t order, + unsigned int bpp) +{ + int i; + uint8_t table_Y[1024]; + uint32_t * table_32 = 0; + uint16_t * table_16 = 0; + uint8_t * table_8 = 0; + uint8_t * table_332 = 0; + int entry_size = 0; + void * table_r = 0; + void * table_g = 0; + void * table_b = 0; + + int crv = Inverse_Table_6_9[matrix_coefficients][0]; + int cbu = Inverse_Table_6_9[matrix_coefficients][1]; + int cgu = -Inverse_Table_6_9[matrix_coefficients][2]; + int cgv = -Inverse_Table_6_9[matrix_coefficients][3]; + + for (i = 0; i < 1024; i++) { + int j; + + j = (76309 * (i - 384 - 16) + 32768) >> 16; + table_Y[i] = (j < 0) ? 0 : ((j > 255) ? 255 : j); + } + + switch (bpp) { + case 32: + if (!id) + return (197 + 2*682 + 256 + 132) * sizeof (uint32_t); + table_32 = (uint32_t *) (id + 1); + entry_size = sizeof (uint32_t); + table_r = table_32 + 197; + table_b = table_32 + 197 + 685; + table_g = table_32 + 197 + 2*682; + + for (i = -197; i < 256+197; i++) + ((uint32_t *) table_r)[i] = + table_Y[i+384] << ((order == MPEG2CONVERT_RGB) ? 16 : 0); + for (i = -132; i < 256+132; i++) + ((uint32_t *) table_g)[i] = table_Y[i+384] << 8; + for (i = -232; i < 256+232; i++) + ((uint32_t *) table_b)[i] = + table_Y[i+384] << ((order == MPEG2CONVERT_RGB) ? 0 : 16); + break; + + case 24: + if (!id) + return (256 + 2*232) * sizeof (uint8_t); + table_8 = (uint8_t *) (id + 1); + entry_size = sizeof (uint8_t); + table_r = table_g = table_b = table_8 + 232; + + for (i = -232; i < 256+232; i++) + ((uint8_t * )table_b)[i] = table_Y[i+384]; + break; + + case 15: + case 16: + if (!id) + return (197 + 2*682 + 256 + 132) * sizeof (uint16_t); + table_16 = (uint16_t *) (id + 1); + entry_size = sizeof (uint16_t); + table_r = table_16 + 197; + table_b = table_16 + 197 + 685; + table_g = table_16 + 197 + 2*682; + + for (i = -197; i < 256+197; i++) { + int j = table_Y[i+384] >> 3; + + if (order == MPEG2CONVERT_RGB) + j <<= ((bpp==16) ? 11 : 10); + + ((uint16_t *)table_r)[i] = j; + } + for (i = -132; i < 256+132; i++) { + int j = table_Y[i+384] >> ((bpp==16) ? 2 : 3); + + ((uint16_t *)table_g)[i] = j << 5; + } + for (i = -232; i < 256+232; i++) { + int j = table_Y[i+384] >> 3; + + if (order == MPEG2CONVERT_BGR) + j <<= ((bpp==16) ? 11 : 10); + + ((uint16_t *)table_b)[i] = j; + } + break; + + case 8: + if (!id) + return (197 + 2*682 + 256 + 232 + 71) * sizeof (uint8_t); + table_332 = (uint8_t *) (id + 1); + entry_size = sizeof (uint8_t); + table_r = table_332 + 197; + table_g = table_332 + 197 + 682 + 30; + table_b = table_332 + 197 + 2*682; + + for (i = -197; i < 256+197+30; i++) + ((uint8_t *)table_r)[i] = ((table_Y[i+384] * 7 / 255) << + (order == MPEG2CONVERT_RGB ? 5 : 0)); + for (i = -132; i < 256+132+30; i++) + ((uint8_t *)table_g)[i-30] = ((table_Y[i+384] * 7 / 255) << + (order == MPEG2CONVERT_RGB ? 2 : 3)); + for (i = -232; i < 256+232+71; i++) + ((uint8_t *)table_b)[i] = ((table_Y[i+384] / 85) << + (order == MPEG2CONVERT_RGB ? 0 : 6)); + break; + } + + for (i = 0; i < 256; i++) { + id->table_rV[i] = (((uint8_t *)table_r) + + entry_size * div_round (crv * (i-128), 76309)); + id->table_gU[i] = (((uint8_t *)table_g) + + entry_size * div_round (cgu * (i-128), 76309)); + id->table_gV[i] = entry_size * div_round (cgv * (i-128), 76309); + id->table_bU[i] = (((uint8_t *)table_b) + + entry_size * div_round (cbu * (i-128), 76309)); + } + + return 0; +} + +static int rgb_internal (mpeg2convert_rgb_order_t order, unsigned int bpp, + int stage, void * _id, const mpeg2_sequence_t * seq, + int stride, uint32_t accel, void * arg, + mpeg2_convert_init_t * result) +{ + convert_rgb_t * id = (convert_rgb_t *) _id; + mpeg2convert_copy_t * copy = (mpeg2convert_copy_t *) 0; + unsigned int id_size = sizeof (convert_rgb_t); + int chroma420 = (seq->chroma_height < seq->height); + int convert420 = 0; + int rgb_stride_min = ((bpp + 7) >> 3) * seq->width; + +#ifdef ARCH_X86 + if (!copy && (accel & MPEG2_ACCEL_X86_MMXEXT)) { + convert420 = 0; + copy = mpeg2convert_rgb_mmxext (order, bpp, seq); + } + if (!copy && (accel & MPEG2_ACCEL_X86_MMX)) { + convert420 = 0; + copy = mpeg2convert_rgb_mmx (order, bpp, seq); + } +#endif +#ifdef ARCH_SPARC + if (!copy && (accel & MPEG2_ACCEL_SPARC_VIS)) { + convert420 = chroma420; + copy = mpeg2convert_rgb_vis (order, bpp, seq); + } +#endif + if (!copy) { + int src, dest; + static void (* rgb_c[3][5]) (void *, uint8_t * const *, + unsigned int) = + {{rgb_c_24_bgr_420, rgb_c_8_420, rgb_c_16_420, + rgb_c_24_rgb_420, rgb_c_32_420}, + {rgb_c_24_bgr_422, rgb_c_8_422, rgb_c_16_422, + rgb_c_24_rgb_422, rgb_c_32_422}, + {rgb_c_24_bgr_444, rgb_c_8_444, rgb_c_16_444, + rgb_c_24_rgb_444, rgb_c_32_444}}; + + convert420 = chroma420; + id_size = (sizeof (convert_rgb_c_t) + + rgb_c_init ((convert_rgb_c_t *) id, order, bpp)); + src = ((seq->chroma_width == seq->width) + + (seq->chroma_height == seq->height)); + dest = ((bpp == 24 && order == MPEG2CONVERT_BGR) ? 0 : (bpp + 7) >> 3); + copy = rgb_c[src][dest]; + } + + result->id_size = id_size; + + if (stride < rgb_stride_min) + stride = rgb_stride_min; + + if (stage == MPEG2_CONVERT_STRIDE) + return stride; + else if (stage == MPEG2_CONVERT_START) { + id->width = seq->width >> 3; + id->y_stride_frame = seq->width; + id->uv_stride_frame = seq->chroma_width; + id->rgb_stride_frame = stride; + id->rgb_stride_min = rgb_stride_min; + id->chroma420 = chroma420; + id->convert420 = convert420; + result->buf_size[0] = stride * seq->height; + result->buf_size[1] = result->buf_size[2] = 0; + result->start = rgb_start; + result->copy = copy; + } + return 0; +} + +#define DECLARE(func,order,bpp) \ +int func (int stage, void * id, \ + const mpeg2_sequence_t * sequence, int stride, \ + uint32_t accel, void * arg, mpeg2_convert_init_t * result) \ +{ \ + return rgb_internal (order, bpp, stage, id, sequence, stride, \ + accel, arg, result); \ +} + +DECLARE (mpeg2convert_rgb32, MPEG2CONVERT_RGB, 32) +DECLARE (mpeg2convert_rgb24, MPEG2CONVERT_RGB, 24) +DECLARE (mpeg2convert_rgb16, MPEG2CONVERT_RGB, 16) +DECLARE (mpeg2convert_rgb15, MPEG2CONVERT_RGB, 15) +DECLARE (mpeg2convert_rgb8, MPEG2CONVERT_RGB, 8) +DECLARE (mpeg2convert_bgr32, MPEG2CONVERT_BGR, 32) +DECLARE (mpeg2convert_bgr24, MPEG2CONVERT_BGR, 24) +DECLARE (mpeg2convert_bgr16, MPEG2CONVERT_BGR, 16) +DECLARE (mpeg2convert_bgr15, MPEG2CONVERT_BGR, 15) +DECLARE (mpeg2convert_bgr8, MPEG2CONVERT_BGR, 8) + +mpeg2_convert_t * mpeg2convert_rgb (mpeg2convert_rgb_order_t order, + unsigned int bpp) +{ + static mpeg2_convert_t * table[5][2] = + {{mpeg2convert_rgb15, mpeg2convert_bgr15}, + {mpeg2convert_rgb8, mpeg2convert_bgr8}, + {mpeg2convert_rgb16, mpeg2convert_bgr16}, + {mpeg2convert_rgb24, mpeg2convert_bgr24}, + {mpeg2convert_rgb32, mpeg2convert_bgr32}}; + + if (order == MPEG2CONVERT_RGB || order == MPEG2CONVERT_BGR) { + if (bpp == 15) + return table[0][order == MPEG2CONVERT_BGR]; + else if (bpp >= 8 && bpp <= 32 && (bpp & 7) == 0) + return table[bpp >> 3][order == MPEG2CONVERT_BGR]; + } + return (mpeg2_convert_t *) 0; +} diff --git a/src/video_dec/libmpeg2new/libmpeg2/rgb_mmx.c b/src/video_dec/libmpeg2new/libmpeg2/rgb_mmx.c new file mode 100644 index 000000000..6ca7e65a8 --- /dev/null +++ b/src/video_dec/libmpeg2new/libmpeg2/rgb_mmx.c @@ -0,0 +1,321 @@ +/* + * rgb_mmx.c + * Copyright (C) 2000-2003 Silicon Integrated System Corp. + * All Rights Reserved. + * + * Author: Olie Lho <ollie@sis.com.tw> + * + * This file is part of mpeg2dec, a free MPEG-2 video stream decoder. + * See http://libmpeg2.sourceforge.net/ for updates. + * + * mpeg2dec is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * mpeg2dec is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +#include "config.h" + +#ifdef ARCH_X86 + +#include <stdio.h> +#include <stdlib.h> +#include <inttypes.h> + +#include "mpeg2.h" +#include "mpeg2convert.h" +#include "convert_internal.h" +#include <xine/attributes.h> +#include "mmx.h" + +#define CPU_MMXEXT 0 +#define CPU_MMX 1 + +/* CPU_MMXEXT/CPU_MMX adaptation layer */ + +#define movntq(src,dest) \ +do { \ + if (cpu == CPU_MMXEXT) \ + movntq_r2m (src, dest); \ + else \ + movq_r2m (src, dest); \ +} while (0) + +static inline void mmx_yuv2rgb (uint8_t * py, uint8_t * pu, uint8_t * pv) +{ + static mmx_t mmx_80w = {0x0080008000800080LL}; + static mmx_t mmx_U_green = {0xf37df37df37df37dLL}; + static mmx_t mmx_U_blue = {0x4093409340934093LL}; + static mmx_t mmx_V_red = {0x3312331233123312LL}; + static mmx_t mmx_V_green = {0xe5fce5fce5fce5fcLL}; + static mmx_t mmx_10w = {0x1010101010101010LL}; + static mmx_t mmx_00ffw = {0x00ff00ff00ff00ffLL}; + static mmx_t mmx_Y_coeff = {0x253f253f253f253fLL}; + + movd_m2r (*pu, mm0); /* mm0 = 00 00 00 00 u3 u2 u1 u0 */ + movd_m2r (*pv, mm1); /* mm1 = 00 00 00 00 v3 v2 v1 v0 */ + movq_m2r (*py, mm6); /* mm6 = Y7 Y6 Y5 Y4 Y3 Y2 Y1 Y0 */ + pxor_r2r (mm4, mm4); /* mm4 = 0 */ + /* XXX might do cache preload for image here */ + + /* + * Do the multiply part of the conversion for even and odd pixels + * register usage: + * mm0 -> Cblue, mm1 -> Cred, mm2 -> Cgreen even pixels + * mm3 -> Cblue, mm4 -> Cred, mm5 -> Cgreen odd pixels + * mm6 -> Y even, mm7 -> Y odd + */ + + punpcklbw_r2r (mm4, mm0); /* mm0 = u3 u2 u1 u0 */ + punpcklbw_r2r (mm4, mm1); /* mm1 = v3 v2 v1 v0 */ + psubsw_m2r (mmx_80w, mm0); /* u -= 128 */ + psubsw_m2r (mmx_80w, mm1); /* v -= 128 */ + psllw_i2r (3, mm0); /* promote precision */ + psllw_i2r (3, mm1); /* promote precision */ + movq_r2r (mm0, mm2); /* mm2 = u3 u2 u1 u0 */ + movq_r2r (mm1, mm3); /* mm3 = v3 v2 v1 v0 */ + pmulhw_m2r (mmx_U_green, mm2); /* mm2 = u * u_green */ + pmulhw_m2r (mmx_V_green, mm3); /* mm3 = v * v_green */ + pmulhw_m2r (mmx_U_blue, mm0); /* mm0 = chroma_b */ + pmulhw_m2r (mmx_V_red, mm1); /* mm1 = chroma_r */ + paddsw_r2r (mm3, mm2); /* mm2 = chroma_g */ + + psubusb_m2r (mmx_10w, mm6); /* Y -= 16 */ + movq_r2r (mm6, mm7); /* mm7 = Y7 Y6 Y5 Y4 Y3 Y2 Y1 Y0 */ + pand_m2r (mmx_00ffw, mm6); /* mm6 = Y6 Y4 Y2 Y0 */ + psrlw_i2r (8, mm7); /* mm7 = Y7 Y5 Y3 Y1 */ + psllw_i2r (3, mm6); /* promote precision */ + psllw_i2r (3, mm7); /* promote precision */ + pmulhw_m2r (mmx_Y_coeff, mm6); /* mm6 = luma_rgb even */ + pmulhw_m2r (mmx_Y_coeff, mm7); /* mm7 = luma_rgb odd */ + + /* + * Do the addition part of the conversion for even and odd pixels + * register usage: + * mm0 -> Cblue, mm1 -> Cred, mm2 -> Cgreen even pixels + * mm3 -> Cblue, mm4 -> Cred, mm5 -> Cgreen odd pixels + * mm6 -> Y even, mm7 -> Y odd + */ + + movq_r2r (mm0, mm3); /* mm3 = chroma_b */ + movq_r2r (mm1, mm4); /* mm4 = chroma_r */ + movq_r2r (mm2, mm5); /* mm5 = chroma_g */ + paddsw_r2r (mm6, mm0); /* mm0 = B6 B4 B2 B0 */ + paddsw_r2r (mm7, mm3); /* mm3 = B7 B5 B3 B1 */ + paddsw_r2r (mm6, mm1); /* mm1 = R6 R4 R2 R0 */ + paddsw_r2r (mm7, mm4); /* mm4 = R7 R5 R3 R1 */ + paddsw_r2r (mm6, mm2); /* mm2 = G6 G4 G2 G0 */ + paddsw_r2r (mm7, mm5); /* mm5 = G7 G5 G3 G1 */ + packuswb_r2r (mm0, mm0); /* saturate to 0-255 */ + packuswb_r2r (mm1, mm1); /* saturate to 0-255 */ + packuswb_r2r (mm2, mm2); /* saturate to 0-255 */ + packuswb_r2r (mm3, mm3); /* saturate to 0-255 */ + packuswb_r2r (mm4, mm4); /* saturate to 0-255 */ + packuswb_r2r (mm5, mm5); /* saturate to 0-255 */ + punpcklbw_r2r (mm3, mm0); /* mm0 = B7 B6 B5 B4 B3 B2 B1 B0 */ + punpcklbw_r2r (mm4, mm1); /* mm1 = R7 R6 R5 R4 R3 R2 R1 R0 */ + punpcklbw_r2r (mm5, mm2); /* mm2 = G7 G6 G5 G4 G3 G2 G1 G0 */ +} + +static inline void mmx_unpack_16rgb (uint8_t * image, const int cpu) +{ + static mmx_t mmx_bluemask = {0xf8f8f8f8f8f8f8f8LL}; + static mmx_t mmx_greenmask = {0xfcfcfcfcfcfcfcfcLL}; + static mmx_t mmx_redmask = {0xf8f8f8f8f8f8f8f8LL}; + + /* + * convert RGB plane to RGB 16 bits + * mm0 -> B, mm1 -> R, mm2 -> G + * mm4 -> GB, mm5 -> AR pixel 4-7 + * mm6 -> GB, mm7 -> AR pixel 0-3 + */ + + pand_m2r (mmx_bluemask, mm0); /* mm0 = b7b6b5b4b3______ */ + pand_m2r (mmx_greenmask, mm2); /* mm2 = g7g6g5g4g3g2____ */ + pand_m2r (mmx_redmask, mm1); /* mm1 = r7r6r5r4r3______ */ + psrlq_i2r (3, mm0); /* mm0 = ______b7b6b5b4b3 */ + pxor_r2r (mm4, mm4); /* mm4 = 0 */ + movq_r2r (mm0, mm5); /* mm5 = ______b7b6b5b4b3 */ + movq_r2r (mm2, mm7); /* mm7 = g7g6g5g4g3g2____ */ + + punpcklbw_r2r (mm4, mm2); + punpcklbw_r2r (mm1, mm0); + psllq_i2r (3, mm2); + por_r2r (mm2, mm0); + movntq (mm0, *image); + + punpckhbw_r2r (mm4, mm7); + punpckhbw_r2r (mm1, mm5); + psllq_i2r (3, mm7); + por_r2r (mm7, mm5); + movntq (mm5, *(image+8)); +} + +static inline void mmx_unpack_32rgb (uint8_t * image, const int cpu) +{ + /* + * convert RGB plane to RGB packed format, + * mm0 -> B, mm1 -> R, mm2 -> G, mm3 -> 0, + * mm4 -> GB, mm5 -> AR pixel 4-7, + * mm6 -> GB, mm7 -> AR pixel 0-3 + */ + + pxor_r2r (mm3, mm3); + movq_r2r (mm0, mm6); + movq_r2r (mm1, mm7); + movq_r2r (mm0, mm4); + movq_r2r (mm1, mm5); + punpcklbw_r2r (mm2, mm6); + punpcklbw_r2r (mm3, mm7); + punpcklwd_r2r (mm7, mm6); + movntq (mm6, *image); + movq_r2r (mm0, mm6); + punpcklbw_r2r (mm2, mm6); + punpckhwd_r2r (mm7, mm6); + movntq (mm6, *(image+8)); + punpckhbw_r2r (mm2, mm4); + punpckhbw_r2r (mm3, mm5); + punpcklwd_r2r (mm5, mm4); + movntq (mm4, *(image+16)); + movq_r2r (mm0, mm4); + punpckhbw_r2r (mm2, mm4); + punpckhwd_r2r (mm5, mm4); + movntq (mm4, *(image+24)); +} + +static inline void rgb16 (void * const _id, uint8_t * const * src, + const unsigned int v_offset, const int cpu) +{ + convert_rgb_t * const id = (convert_rgb_t *) _id; + uint8_t * dst; + uint8_t * py, * pu, * pv; + int i, j; + + dst = id->rgb_ptr + id->rgb_slice * v_offset; + py = src[0]; pu = src[1]; pv = src[2]; + + i = 16; + do { + j = id->width; + do { + mmx_yuv2rgb (py, pu, pv); + mmx_unpack_16rgb (dst, cpu); + py += 8; + pu += 4; + pv += 4; + dst += 16; + } while (--j); + + dst += id->rgb_increm; + py += id->y_increm; + if (--i == id->field) { + dst = id->rgb_ptr + id->rgb_slice * (v_offset + 1); + py = src[0] + id->y_stride_frame; + pu = src[1] + id->uv_stride_frame; + pv = src[2] + id->uv_stride_frame; + } else if (! (i & id->chroma420)) { + pu += id->uv_increm; + pv += id->uv_increm; + } else { + pu -= id->uv_stride_frame; + pv -= id->uv_stride_frame; + } + } while (i); +} + +static inline void argb32 (void * const _id, uint8_t * const * src, + const unsigned int v_offset, const int cpu) +{ + convert_rgb_t * const id = (convert_rgb_t *) _id; + uint8_t * dst; + uint8_t * py, * pu, * pv; + int i, j; + + dst = id->rgb_ptr + id->rgb_slice * v_offset; + py = src[0]; pu = src[1]; pv = src[2]; + + i = 16; + do { + j = id->width; + do { + mmx_yuv2rgb (py, pu, pv); + mmx_unpack_32rgb (dst, cpu); + py += 8; + pu += 4; + pv += 4; + dst += 32; + } while (--j); + + dst += id->rgb_increm; + py += id->y_increm; + if (--i == id->field) { + dst = id->rgb_ptr + id->rgb_slice * (v_offset + 1); + py = src[0] + id->y_stride_frame; + pu = src[1] + id->uv_stride_frame; + pv = src[2] + id->uv_stride_frame; + } else if (! (i & id->chroma420)) { + pu += id->uv_increm; + pv += id->uv_increm; + } else { + pu -= id->uv_stride_frame; + pv -= id->uv_stride_frame; + } + } while (i); +} + +static void mmxext_rgb16 (void * id, uint8_t * const * src, + unsigned int v_offset) +{ + rgb16 (id, src, v_offset, CPU_MMXEXT); +} + +static void mmxext_argb32 (void * id, uint8_t * const * src, + unsigned int v_offset) +{ + argb32 (id, src, v_offset, CPU_MMXEXT); +} + +static void mmx_rgb16 (void * id, uint8_t * const * src, unsigned int v_offset) +{ + rgb16 (id, src, v_offset, CPU_MMX); +} + +static void mmx_argb32 (void * id, uint8_t * const * src, + unsigned int v_offset) +{ + argb32 (id, src, v_offset, CPU_MMX); +} + +mpeg2convert_copy_t * mpeg2convert_rgb_mmxext (int order, int bpp, + const mpeg2_sequence_t * seq) +{ + if (order == MPEG2CONVERT_RGB && seq->chroma_width < seq->width) { + if (bpp == 16) + return mmxext_rgb16; + else if (bpp == 32) + return mmxext_argb32; + } + return NULL; /* Fallback to C */ +} + +mpeg2convert_copy_t * mpeg2convert_rgb_mmx (int order, int bpp, + const mpeg2_sequence_t * seq) +{ + if (order == MPEG2CONVERT_RGB && seq->chroma_width < seq->width) { + if (bpp == 16) + return mmx_rgb16; + else if (bpp == 32) + return mmx_argb32; + } + return NULL; /* Fallback to C */ +} +#endif diff --git a/src/video_dec/libmpeg2new/libmpeg2/rgb_vis.c b/src/video_dec/libmpeg2new/libmpeg2/rgb_vis.c new file mode 100644 index 000000000..cbd7c7072 --- /dev/null +++ b/src/video_dec/libmpeg2new/libmpeg2/rgb_vis.c @@ -0,0 +1,384 @@ +/* + * rgb_vis.c + * Copyright (C) 2003 David S. Miller <davem@redhat.com> + * + * This file is part of mpeg2dec, a free MPEG-2 video stream decoder. + * See http://libmpeg2.sourceforge.net/ for updates. + * + * mpeg2dec is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * mpeg2dec is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +#include "config.h" + +#ifdef ARCH_SPARC + +#include <stddef.h> +#include <inttypes.h> + +#include "mpeg2.h" +#include "mpeg2convert.h" +#include "convert_internal.h" +#include <xine/attributes.h> +#include "vis.h" + +/* Based partially upon the MMX yuv2rgb code, see there for credits. + * + * The difference here is that since we have enough registers we + * process both even and odd scanlines in one pass. + */ + +static const uint16_t const_2048[] ATTR_ALIGN(8) = {2048, 2048, 2048, 2048}; +static const uint16_t const_1024[] ATTR_ALIGN(8) = {1024, 1024, 1024, 1024}; +static const uint16_t const_128[] ATTR_ALIGN(8) = {128, 128, 128, 128}; +static const uint8_t const_Ugreen[] ATTR_ALIGN(8) = + {0xf3, 0x00, 0xf3, 0x00, 0xf3, 0x00, 0xf3, 0x00}; +static const uint8_t const_Vgreen[] ATTR_ALIGN(8) = + {0xe6, 0x00, 0xe6, 0x00, 0xe6, 0x00, 0xe6, 0x00}; +static const uint8_t const_Ublue_Vred[] ATTR_ALIGN(8) = + {0x41, 0x41, 0x41, 0x41, 0x33, 0x33, 0x33, 0x33}; +static const uint8_t const_Ycoeff[] ATTR_ALIGN(4) = {0x25, 0x25, 0x25, 0x25}; + +#define TMP0 0 +#define TMP1 1 +#define TMP2 2 +#define TMP3 3 +#define TMP4 4 +#define TMP5 5 +#define TMP6 6 +#define TMP7 7 +#define TMP8 8 +#define TMP9 9 +#define TMP10 10 +#define TMP11 11 +#define TMP12 12 +#define TMP13 13 + +#define CONST_UBLUE 14 +#define CONST_VRED 15 +#define CONST_2048 16 + +#define BLUE8_EVEN 18 +#define BLUE8_ODD 19 +#define RED8_EVEN 20 +#define RED8_ODD 21 +#define GREEN8_EVEN 22 +#define GREEN8_ODD 23 + +#define BLUE8_2_EVEN 24 +#define BLUE8_2_ODD 25 +#define RED8_2_EVEN 26 +#define RED8_2_ODD 27 +#define GREEN8_2_EVEN 28 +#define GREEN8_2_ODD 29 + +#define CONST_YCOEFF 30 +#define ZEROS 31 + +#define PU_0 32 +#define PU_2 34 +#define PV_0 36 +#define PV_2 38 +#define PY_0 40 +#define PY_2 42 +#define PY_4 44 +#define PY_6 46 + +#define CONST_128 56 +#define CONST_1024 58 +#define CONST_VGREEN 60 +#define CONST_UGREEN 62 + +static inline void vis_init_consts(void) +{ + vis_set_gsr(7 << VIS_GSR_SCALEFACT_SHIFT); + + vis_ld64(const_2048[0], CONST_2048); + vis_ld64(const_1024[0], CONST_1024); + vis_ld64(const_Ugreen[0], CONST_UGREEN); + vis_ld64(const_Vgreen[0], CONST_VGREEN); + vis_fzeros(ZEROS); + vis_ld64(const_Ublue_Vred[0], CONST_UBLUE); + vis_ld32(const_Ycoeff[0], CONST_YCOEFF); + vis_ld64(const_128[0], CONST_128); +} + +static inline void vis_yuv2rgb(uint8_t *py, uint8_t *pu, uint8_t *pv, + int y_stride) +{ + vis_ld32(pu[0], TMP0); + + vis_ld32(pv[0], TMP2); + + vis_ld64(py[0], TMP4); + vis_mul8x16au(TMP0, CONST_2048, PU_0); + + vis_ld64_2(py, y_stride, TMP8); + vis_mul8x16au(TMP2, CONST_2048, PV_0); + + vis_pmerge(TMP4, TMP5, TMP6); + + vis_pmerge(TMP6, TMP7, TMP4); + + vis_pmerge(TMP8, TMP9, TMP10); + + vis_pmerge(TMP10, TMP11, TMP8); + vis_mul8x16au(TMP4, CONST_2048, PY_0); + + vis_psub16(PU_0, CONST_1024, PU_0); + vis_mul8x16au(TMP5, CONST_2048, PY_2); + + vis_psub16(PV_0, CONST_1024, PV_0); + vis_mul8x16au(TMP8, CONST_2048, PY_4); + + vis_psub16(PY_0, CONST_128, PY_0); + vis_mul8x16au(TMP9, CONST_2048, PY_6); + + vis_psub16(PY_2, CONST_128, PY_2); + vis_mul8x16(CONST_YCOEFF, PY_0, PY_0); + + vis_psub16(PY_4, CONST_128, PY_4); + vis_mul8x16(CONST_YCOEFF, PY_2, PY_2); + + vis_psub16(PY_6, CONST_128, PY_6); + vis_mul8x16(CONST_YCOEFF, PY_4, PY_4); + + vis_mul8x16(CONST_YCOEFF, PY_6, PY_6); + + vis_mul8sux16(CONST_UGREEN, PU_0, TMP0); + + vis_mul8sux16(CONST_VGREEN, PV_0, TMP2); + + vis_mul8x16(CONST_UBLUE, PU_0, TMP4); + + vis_mul8x16(CONST_VRED, PV_0, TMP6); + vis_padd16(TMP0, TMP2, TMP10); + + vis_padd16(PY_0, TMP4, TMP0); + + vis_padd16(PY_2, TMP4, TMP2); + vis_pack16(TMP0, BLUE8_EVEN); + + vis_padd16(PY_4, TMP4, TMP0); + vis_pack16(TMP2, BLUE8_ODD); + + vis_padd16(PY_6, TMP4, TMP2); + vis_pack16(TMP0, BLUE8_2_EVEN); + + vis_padd16(PY_0, TMP6, TMP0); + vis_pack16(TMP2, BLUE8_2_ODD); + + vis_padd16(PY_2, TMP6, TMP2); + vis_pack16(TMP0, RED8_EVEN); + + vis_padd16(PY_4, TMP6, TMP0); + vis_pack16(TMP2, RED8_ODD); + + vis_padd16(PY_6, TMP6, TMP2); + vis_pack16(TMP0, RED8_2_EVEN); + + vis_padd16(PY_0, TMP10, TMP0); + vis_pack16(TMP2, RED8_2_ODD); + + vis_padd16(PY_2, TMP10, TMP2); + vis_pack16(TMP0, GREEN8_EVEN); + + vis_padd16(PY_4, TMP10, TMP0); + vis_pack16(TMP2, GREEN8_ODD); + + vis_padd16(PY_6, TMP10, TMP2); + vis_pack16(TMP0, GREEN8_2_EVEN); + + vis_pack16(TMP2, GREEN8_2_ODD); + vis_pmerge(BLUE8_EVEN, BLUE8_ODD, BLUE8_EVEN); + + vis_pmerge(BLUE8_2_EVEN, BLUE8_2_ODD, BLUE8_2_EVEN); + + vis_pmerge(RED8_EVEN, RED8_ODD, RED8_EVEN); + + vis_pmerge(RED8_2_EVEN, RED8_2_ODD, RED8_2_EVEN); + + vis_pmerge(GREEN8_EVEN, GREEN8_ODD, GREEN8_EVEN); + + vis_pmerge(GREEN8_2_EVEN, GREEN8_2_ODD, GREEN8_2_EVEN); +} + +static inline void vis_unpack_32rgb(uint8_t *image, int stride) +{ + vis_pmerge(ZEROS, GREEN8_EVEN, TMP0); + vis_pmerge(RED8_EVEN, BLUE8_EVEN, TMP2); + + vis_pmerge(TMP0, TMP2, TMP4); + vis_st64(TMP4, image[0]); + + vis_pmerge(TMP1, TMP3, TMP6); + vis_st64_2(TMP6, image, 8); + + vis_pmerge(ZEROS, GREEN8_ODD, TMP8); + vis_pmerge(RED8_ODD, BLUE8_ODD, TMP10); + + vis_pmerge(TMP8, TMP10, TMP0); + vis_st64_2(TMP0, image, 16); + + vis_pmerge(TMP9, TMP11, TMP2); + vis_st64_2(TMP2, image, 24); + + image += stride; + + vis_pmerge(ZEROS, GREEN8_2_EVEN, TMP0); + vis_pmerge(RED8_2_EVEN, BLUE8_2_EVEN, TMP2); + + vis_pmerge(TMP0, TMP2, TMP4); + vis_st64(TMP4, image[0]); + + vis_pmerge(TMP1, TMP3, TMP6); + vis_st64_2(TMP6, image, 8); + + vis_pmerge(ZEROS, GREEN8_2_ODD, TMP8); + vis_pmerge(RED8_2_ODD, BLUE8_2_ODD, TMP10); + + vis_pmerge(TMP8, TMP10, TMP0); + vis_st64_2(TMP0, image, 16); + + vis_pmerge(TMP9, TMP11, TMP2); + vis_st64_2(TMP2, image, 24); +} + +static inline void vis_unpack_32bgr(uint8_t *image, int stride) +{ + vis_pmerge(ZEROS, GREEN8_EVEN, TMP0); + vis_pmerge(BLUE8_EVEN, RED8_EVEN, TMP2); + + vis_pmerge(TMP0, TMP2, TMP4); + vis_st64(TMP4, image[0]); + + vis_pmerge(TMP1, TMP3, TMP6); + vis_st64_2(TMP6, image, 8); + + vis_pmerge(ZEROS, GREEN8_ODD, TMP8); + vis_pmerge(BLUE8_ODD, RED8_ODD, TMP10); + + vis_pmerge(TMP8, TMP10, TMP0); + vis_st64_2(TMP0, image, 16); + + vis_pmerge(TMP9, TMP11, TMP2); + vis_st64_2(TMP2, image, 24); + + image += stride; + + vis_pmerge(ZEROS, GREEN8_2_EVEN, TMP0); + vis_pmerge(BLUE8_2_EVEN, RED8_2_EVEN, TMP2); + + vis_pmerge(TMP0, TMP2, TMP4); + vis_st64(TMP4, image[0]); + + vis_pmerge(TMP1, TMP3, TMP6); + vis_st64_2(TMP6, image, 8); + + vis_pmerge(ZEROS, GREEN8_2_ODD, TMP8); + vis_pmerge(BLUE8_2_ODD, RED8_2_ODD, TMP10); + + vis_pmerge(TMP8, TMP10, TMP0); + vis_st64_2(TMP0, image, 16); + + vis_pmerge(TMP9, TMP11, TMP2); + vis_st64_2(TMP2, image, 24); +} + +static inline void vis_yuv420_argb32(uint8_t *image, + uint8_t *py, uint8_t *pu, uint8_t *pv, + int width, int height, int rgb_stride, + int y_stride, int uv_stride) +{ + height >>= 1; + uv_stride -= width >> 1; + do { + int i = width >> 3; + do { + vis_yuv2rgb(py, pu, pv, y_stride); + vis_unpack_32rgb(image, rgb_stride); + py += 8; + pu += 4; + pv += 4; + image += 32; + } while (--i); + + py += (y_stride << 1) - width; + image += (rgb_stride << 1) - 4 * width; + pu += uv_stride; + pv += uv_stride; + } while (--height); +} + +static inline void vis_yuv420_abgr32(uint8_t *image, + uint8_t *py, uint8_t *pu, uint8_t *pv, + int width, int height, int rgb_stride, + int y_stride, int uv_stride) +{ + height >>= 1; + uv_stride -= width >> 1; + do { + int i = width >> 3; + do { + vis_yuv2rgb(py, pu, pv, y_stride); + vis_unpack_32bgr(image, rgb_stride); + py += 8; + pu += 4; + pv += 4; + image += 32; + } while (--i); + + py += (y_stride << 1) - width; + image += (rgb_stride << 1) - 4 * width; + pu += uv_stride; + pv += uv_stride; + } while (--height); +} + +static void vis_argb32(void *_id, uint8_t * const *src, + unsigned int v_offset) +{ + convert_rgb_t *id = (convert_rgb_t *) _id; + + vis_init_consts(); + vis_yuv420_argb32(id->rgb_ptr + id->rgb_stride * v_offset, + src[0], src[1], src[2], id->width, 16, + id->rgb_stride, id->y_stride, id->y_stride >> 1); +} + +static void vis_abgr32(void *_id, uint8_t * const *src, + unsigned int v_offset) +{ + convert_rgb_t *id = (convert_rgb_t *) _id; + + vis_init_consts(); + vis_yuv420_abgr32(id->rgb_ptr + id->rgb_stride * v_offset, + src[0], src[1], src[2], id->width, 16, + id->rgb_stride, id->y_stride, id->y_stride >> 1); +} + +mpeg2convert_copy_t *mpeg2convert_rgb_vis(int order, int bpp, + const mpeg2_sequence_t * seq) +{ + if (bpp == 32 && seq->chroma_height < seq->height) { + if (order == MPEG2CONVERT_RGB) + return vis_argb32; + if (order == MPEG2CONVERT_BGR) + return vis_abgr32; + } + + return NULL; /* Fallback to C */ +} + +#endif /* ARCH_SPARC */ diff --git a/src/video_dec/libmpeg2new/libmpeg2/slice.c b/src/video_dec/libmpeg2new/libmpeg2/slice.c new file mode 100644 index 000000000..ce4508639 --- /dev/null +++ b/src/video_dec/libmpeg2new/libmpeg2/slice.c @@ -0,0 +1,2058 @@ +/* + * slice.c + * Copyright (C) 2000-2003 Michel Lespinasse <walken@zoy.org> + * Copyright (C) 2003 Peter Gubanov <peter@elecard.net.ru> + * Copyright (C) 1999-2000 Aaron Holtzman <aholtzma@ess.engr.uvic.ca> + * + * This file is part of mpeg2dec, a free MPEG-2 video stream decoder. + * See http://libmpeg2.sourceforge.net/ for updates. + * + * mpeg2dec is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * mpeg2dec is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +#include "config.h" + +#include <inttypes.h> + +#include "../include/mpeg2.h" +#include "../include/attributes.h" +#include "mpeg2_internal.h" + +extern mpeg2_mc_t mpeg2_mc; +extern void (* mpeg2_idct_copy) (int16_t * block, uint8_t * dest, int stride); +extern void (* mpeg2_idct_add) (int last, int16_t * block, + uint8_t * dest, int stride); +extern void (* mpeg2_cpu_state_save) (cpu_state_t * state); +extern void (* mpeg2_cpu_state_restore) (cpu_state_t * state); + +#include "vlc.h" + +static inline int get_macroblock_modes (mpeg2_decoder_t * const decoder) +{ +#define bit_buf (decoder->bitstream_buf) +#define bits (decoder->bitstream_bits) +#define bit_ptr (decoder->bitstream_ptr) + int macroblock_modes; + const MBtab * tab; + + switch (decoder->coding_type) { + case I_TYPE: + + tab = MB_I + UBITS (bit_buf, 1); + DUMPBITS (bit_buf, bits, tab->len); + macroblock_modes = tab->modes; + + if ((! (decoder->frame_pred_frame_dct)) && + (decoder->picture_structure == FRAME_PICTURE)) { + macroblock_modes |= UBITS (bit_buf, 1) * DCT_TYPE_INTERLACED; + DUMPBITS (bit_buf, bits, 1); + } + + return macroblock_modes; + + case P_TYPE: + + tab = MB_P + UBITS (bit_buf, 5); + DUMPBITS (bit_buf, bits, tab->len); + macroblock_modes = tab->modes; + + if (decoder->picture_structure != FRAME_PICTURE) { + if (macroblock_modes & MACROBLOCK_MOTION_FORWARD) { + macroblock_modes |= UBITS (bit_buf, 2) << MOTION_TYPE_SHIFT; + DUMPBITS (bit_buf, bits, 2); + } + return macroblock_modes | MACROBLOCK_MOTION_FORWARD; + } else if (decoder->frame_pred_frame_dct) { + if (macroblock_modes & MACROBLOCK_MOTION_FORWARD) + macroblock_modes |= MC_FRAME << MOTION_TYPE_SHIFT; + return macroblock_modes | MACROBLOCK_MOTION_FORWARD; + } else { + if (macroblock_modes & MACROBLOCK_MOTION_FORWARD) { + macroblock_modes |= UBITS (bit_buf, 2) << MOTION_TYPE_SHIFT; + DUMPBITS (bit_buf, bits, 2); + } + if (macroblock_modes & (MACROBLOCK_INTRA | MACROBLOCK_PATTERN)) { + macroblock_modes |= UBITS (bit_buf, 1) * DCT_TYPE_INTERLACED; + DUMPBITS (bit_buf, bits, 1); + } + return macroblock_modes | MACROBLOCK_MOTION_FORWARD; + } + + case B_TYPE: + + tab = MB_B + UBITS (bit_buf, 6); + DUMPBITS (bit_buf, bits, tab->len); + macroblock_modes = tab->modes; + + if (decoder->picture_structure != FRAME_PICTURE) { + if (! (macroblock_modes & MACROBLOCK_INTRA)) { + macroblock_modes |= UBITS (bit_buf, 2) << MOTION_TYPE_SHIFT; + DUMPBITS (bit_buf, bits, 2); + } + return macroblock_modes; + } else if (decoder->frame_pred_frame_dct) { + /* if (! (macroblock_modes & MACROBLOCK_INTRA)) */ + macroblock_modes |= MC_FRAME << MOTION_TYPE_SHIFT; + return macroblock_modes; + } else { + if (macroblock_modes & MACROBLOCK_INTRA) + goto intra; + macroblock_modes |= UBITS (bit_buf, 2) << MOTION_TYPE_SHIFT; + DUMPBITS (bit_buf, bits, 2); + if (macroblock_modes & (MACROBLOCK_INTRA | MACROBLOCK_PATTERN)) { + intra: + macroblock_modes |= UBITS (bit_buf, 1) * DCT_TYPE_INTERLACED; + DUMPBITS (bit_buf, bits, 1); + } + return macroblock_modes; + } + + case D_TYPE: + + DUMPBITS (bit_buf, bits, 1); + return MACROBLOCK_INTRA; + + default: + return 0; + } +#undef bit_buf +#undef bits +#undef bit_ptr +} + +static inline void get_quantizer_scale (mpeg2_decoder_t * const decoder) +{ +#define bit_buf (decoder->bitstream_buf) +#define bits (decoder->bitstream_bits) +#define bit_ptr (decoder->bitstream_ptr) + + int quantizer_scale_code; + + quantizer_scale_code = UBITS (bit_buf, 5); + DUMPBITS (bit_buf, bits, 5); + + decoder->quantizer_matrix[0] = + decoder->quantizer_prescale[0][quantizer_scale_code]; + decoder->quantizer_matrix[1] = + decoder->quantizer_prescale[1][quantizer_scale_code]; + decoder->quantizer_matrix[2] = + decoder->chroma_quantizer[0][quantizer_scale_code]; + decoder->quantizer_matrix[3] = + decoder->chroma_quantizer[1][quantizer_scale_code]; +#undef bit_buf +#undef bits +#undef bit_ptr +} + +static inline int get_motion_delta (mpeg2_decoder_t * const decoder, + const int f_code) +{ +#define bit_buf (decoder->bitstream_buf) +#define bits (decoder->bitstream_bits) +#define bit_ptr (decoder->bitstream_ptr) + + int delta; + int sign; + const MVtab * tab; + + if (bit_buf & 0x80000000) { + DUMPBITS (bit_buf, bits, 1); + return 0; + } else if (bit_buf >= 0x0c000000) { + + tab = MV_4 + UBITS (bit_buf, 4); + delta = (tab->delta << f_code) + 1; + bits += tab->len + f_code + 1; + bit_buf <<= tab->len; + + sign = SBITS (bit_buf, 1); + bit_buf <<= 1; + + if (f_code) + delta += UBITS (bit_buf, f_code); + bit_buf <<= f_code; + + return (delta ^ sign) - sign; + + } else { + + tab = MV_10 + UBITS (bit_buf, 10); + delta = (tab->delta << f_code) + 1; + bits += tab->len + 1; + bit_buf <<= tab->len; + + sign = SBITS (bit_buf, 1); + bit_buf <<= 1; + + if (f_code) { + NEEDBITS (bit_buf, bits, bit_ptr); + delta += UBITS (bit_buf, f_code); + DUMPBITS (bit_buf, bits, f_code); + } + + return (delta ^ sign) - sign; + + } +#undef bit_buf +#undef bits +#undef bit_ptr +} + +static inline int bound_motion_vector (const int vector, const int f_code) +{ + return ((int32_t)vector << (27 - f_code)) >> (27 - f_code); +} + +static inline int get_dmv (mpeg2_decoder_t * const decoder) +{ +#define bit_buf (decoder->bitstream_buf) +#define bits (decoder->bitstream_bits) +#define bit_ptr (decoder->bitstream_ptr) + + const DMVtab * tab; + + tab = DMV_2 + UBITS (bit_buf, 2); + DUMPBITS (bit_buf, bits, tab->len); + return tab->dmv; +#undef bit_buf +#undef bits +#undef bit_ptr +} + +static inline int get_coded_block_pattern (mpeg2_decoder_t * const decoder) +{ +#define bit_buf (decoder->bitstream_buf) +#define bits (decoder->bitstream_bits) +#define bit_ptr (decoder->bitstream_ptr) + + const CBPtab * tab; + + NEEDBITS (bit_buf, bits, bit_ptr); + + if (bit_buf >= 0x20000000) { + + tab = CBP_7 + (UBITS (bit_buf, 7) - 16); + DUMPBITS (bit_buf, bits, tab->len); + return tab->cbp; + + } else { + + tab = CBP_9 + UBITS (bit_buf, 9); + DUMPBITS (bit_buf, bits, tab->len); + return tab->cbp; + } + +#undef bit_buf +#undef bits +#undef bit_ptr +} + +static inline int get_luma_dc_dct_diff (mpeg2_decoder_t * const decoder) +{ +#define bit_buf (decoder->bitstream_buf) +#define bits (decoder->bitstream_bits) +#define bit_ptr (decoder->bitstream_ptr) + const DCtab * tab; + int size; + int dc_diff; + + if (bit_buf < 0xf8000000) { + tab = DC_lum_5 + UBITS (bit_buf, 5); + size = tab->size; + if (size) { + bits += tab->len + size; + bit_buf <<= tab->len; + dc_diff = + UBITS (bit_buf, size) - UBITS (SBITS (~bit_buf, 1), size); + bit_buf <<= size; + return dc_diff << decoder->intra_dc_precision; + } else { + DUMPBITS (bit_buf, bits, 3); + return 0; + } + } else { + tab = DC_long + (UBITS (bit_buf, 9) - 0x1e0); + size = tab->size; + DUMPBITS (bit_buf, bits, tab->len); + NEEDBITS (bit_buf, bits, bit_ptr); + dc_diff = UBITS (bit_buf, size) - UBITS (SBITS (~bit_buf, 1), size); + DUMPBITS (bit_buf, bits, size); + return dc_diff << decoder->intra_dc_precision; + } +#undef bit_buf +#undef bits +#undef bit_ptr +} + +static inline int get_chroma_dc_dct_diff (mpeg2_decoder_t * const decoder) +{ +#define bit_buf (decoder->bitstream_buf) +#define bits (decoder->bitstream_bits) +#define bit_ptr (decoder->bitstream_ptr) + const DCtab * tab; + int size; + int dc_diff; + + if (bit_buf < 0xf8000000) { + tab = DC_chrom_5 + UBITS (bit_buf, 5); + size = tab->size; + if (size) { + bits += tab->len + size; + bit_buf <<= tab->len; + dc_diff = + UBITS (bit_buf, size) - UBITS (SBITS (~bit_buf, 1), size); + bit_buf <<= size; + return dc_diff << decoder->intra_dc_precision; + } else { + DUMPBITS (bit_buf, bits, 2); + return 0; + } + } else { + tab = DC_long + (UBITS (bit_buf, 10) - 0x3e0); + size = tab->size; + DUMPBITS (bit_buf, bits, tab->len + 1); + NEEDBITS (bit_buf, bits, bit_ptr); + dc_diff = UBITS (bit_buf, size) - UBITS (SBITS (~bit_buf, 1), size); + DUMPBITS (bit_buf, bits, size); + return dc_diff << decoder->intra_dc_precision; + } +#undef bit_buf +#undef bits +#undef bit_ptr +} + +#define SATURATE(val) \ +do { \ + val <<= 4; \ + if (unlikely (val != (int16_t) val)) \ + val = (SBITS (val, 1) ^ 2047) << 4; \ +} while (0) + +static void get_intra_block_B14 (mpeg2_decoder_t * const decoder, + const uint16_t * const quant_matrix) +{ + int i; + int j; + int val; + const uint8_t * const scan = decoder->scan; + int mismatch; + const DCTtab * tab; + uint32_t bit_buf; + int bits; + const uint8_t * bit_ptr; + int16_t * const dest = decoder->DCTblock; + + i = 0; + mismatch = ~dest[0]; + + bit_buf = decoder->bitstream_buf; + bits = decoder->bitstream_bits; + bit_ptr = decoder->bitstream_ptr; + + NEEDBITS (bit_buf, bits, bit_ptr); + + while (1) { + if (bit_buf >= 0x28000000) { + + tab = DCT_B14AC_5 + (UBITS (bit_buf, 5) - 5); + + i += tab->run; + if (i >= 64) + break; /* end of block */ + + normal_code: + j = scan[i]; + bit_buf <<= tab->len; + bits += tab->len + 1; + val = (tab->level * quant_matrix[j]) >> 4; + + /* if (bitstream_get (1)) val = -val; */ + val = (val ^ SBITS (bit_buf, 1)) - SBITS (bit_buf, 1); + + SATURATE (val); + dest[j] = val; + mismatch ^= val; + + bit_buf <<= 1; + NEEDBITS (bit_buf, bits, bit_ptr); + + continue; + + } else if (bit_buf >= 0x04000000) { + + tab = DCT_B14_8 + (UBITS (bit_buf, 8) - 4); + + i += tab->run; + if (i < 64) + goto normal_code; + + /* escape code */ + + i += UBITS (bit_buf << 6, 6) - 64; + if (i >= 64) + break; /* illegal, check needed to avoid buffer overflow */ + + j = scan[i]; + + DUMPBITS (bit_buf, bits, 12); + NEEDBITS (bit_buf, bits, bit_ptr); + val = (SBITS (bit_buf, 12) * quant_matrix[j]) / 16; + + SATURATE (val); + dest[j] = val; + mismatch ^= val; + + DUMPBITS (bit_buf, bits, 12); + NEEDBITS (bit_buf, bits, bit_ptr); + + continue; + + } else if (bit_buf >= 0x02000000) { + tab = DCT_B14_10 + (UBITS (bit_buf, 10) - 8); + i += tab->run; + if (i < 64) + goto normal_code; + } else if (bit_buf >= 0x00800000) { + tab = DCT_13 + (UBITS (bit_buf, 13) - 16); + i += tab->run; + if (i < 64) + goto normal_code; + } else if (bit_buf >= 0x00200000) { + tab = DCT_15 + (UBITS (bit_buf, 15) - 16); + i += tab->run; + if (i < 64) + goto normal_code; + } else { + tab = DCT_16 + UBITS (bit_buf, 16); + bit_buf <<= 16; + GETWORD (bit_buf, bits + 16, bit_ptr); + i += tab->run; + if (i < 64) + goto normal_code; + } + break; /* illegal, check needed to avoid buffer overflow */ + } + dest[63] ^= mismatch & 16; + DUMPBITS (bit_buf, bits, tab->len); /* dump end of block code */ + decoder->bitstream_buf = bit_buf; + decoder->bitstream_bits = bits; + decoder->bitstream_ptr = bit_ptr; +} + +static void get_intra_block_B15 (mpeg2_decoder_t * const decoder, + const uint16_t * const quant_matrix) +{ + int i; + int j; + int val; + const uint8_t * const scan = decoder->scan; + int mismatch; + const DCTtab * tab; + uint32_t bit_buf; + int bits; + const uint8_t * bit_ptr; + int16_t * const dest = decoder->DCTblock; + + i = 0; + mismatch = ~dest[0]; + + bit_buf = decoder->bitstream_buf; + bits = decoder->bitstream_bits; + bit_ptr = decoder->bitstream_ptr; + + NEEDBITS (bit_buf, bits, bit_ptr); + + while (1) { + if (bit_buf >= 0x04000000) { + + tab = DCT_B15_8 + (UBITS (bit_buf, 8) - 4); + + i += tab->run; + if (i < 64) { + + normal_code: + j = scan[i]; + bit_buf <<= tab->len; + bits += tab->len + 1; + val = (tab->level * quant_matrix[j]) >> 4; + + /* if (bitstream_get (1)) val = -val; */ + val = (val ^ SBITS (bit_buf, 1)) - SBITS (bit_buf, 1); + + SATURATE (val); + dest[j] = val; + mismatch ^= val; + + bit_buf <<= 1; + NEEDBITS (bit_buf, bits, bit_ptr); + + continue; + + } else { + + /* end of block. I commented out this code because if we */ + /* dont exit here we will still exit at the later test :) */ + + /* if (i >= 128) break; */ /* end of block */ + + /* escape code */ + + i += UBITS (bit_buf << 6, 6) - 64; + if (i >= 64) + break; /* illegal, check against buffer overflow */ + + j = scan[i]; + + DUMPBITS (bit_buf, bits, 12); + NEEDBITS (bit_buf, bits, bit_ptr); + val = (SBITS (bit_buf, 12) * quant_matrix[j]) / 16; + + SATURATE (val); + dest[j] = val; + mismatch ^= val; + + DUMPBITS (bit_buf, bits, 12); + NEEDBITS (bit_buf, bits, bit_ptr); + + continue; + + } + } else if (bit_buf >= 0x02000000) { + tab = DCT_B15_10 + (UBITS (bit_buf, 10) - 8); + i += tab->run; + if (i < 64) + goto normal_code; + } else if (bit_buf >= 0x00800000) { + tab = DCT_13 + (UBITS (bit_buf, 13) - 16); + i += tab->run; + if (i < 64) + goto normal_code; + } else if (bit_buf >= 0x00200000) { + tab = DCT_15 + (UBITS (bit_buf, 15) - 16); + i += tab->run; + if (i < 64) + goto normal_code; + } else { + tab = DCT_16 + UBITS (bit_buf, 16); + bit_buf <<= 16; + GETWORD (bit_buf, bits + 16, bit_ptr); + i += tab->run; + if (i < 64) + goto normal_code; + } + break; /* illegal, check needed to avoid buffer overflow */ + } + dest[63] ^= mismatch & 16; + DUMPBITS (bit_buf, bits, tab->len); /* dump end of block code */ + decoder->bitstream_buf = bit_buf; + decoder->bitstream_bits = bits; + decoder->bitstream_ptr = bit_ptr; +} + +static int get_non_intra_block (mpeg2_decoder_t * const decoder, + const uint16_t * const quant_matrix) +{ + int i; + int j; + int val; + const uint8_t * const scan = decoder->scan; + int mismatch; + const DCTtab * tab; + uint32_t bit_buf; + int bits; + const uint8_t * bit_ptr; + int16_t * const dest = decoder->DCTblock; + + i = -1; + mismatch = -1; + + bit_buf = decoder->bitstream_buf; + bits = decoder->bitstream_bits; + bit_ptr = decoder->bitstream_ptr; + + NEEDBITS (bit_buf, bits, bit_ptr); + if (bit_buf >= 0x28000000) { + tab = DCT_B14DC_5 + (UBITS (bit_buf, 5) - 5); + goto entry_1; + } else + goto entry_2; + + while (1) { + if (bit_buf >= 0x28000000) { + + tab = DCT_B14AC_5 + (UBITS (bit_buf, 5) - 5); + + entry_1: + i += tab->run; + if (i >= 64) + break; /* end of block */ + + normal_code: + j = scan[i]; + bit_buf <<= tab->len; + bits += tab->len + 1; + val = ((2 * tab->level + 1) * quant_matrix[j]) >> 5; + + /* if (bitstream_get (1)) val = -val; */ + val = (val ^ SBITS (bit_buf, 1)) - SBITS (bit_buf, 1); + + SATURATE (val); + dest[j] = val; + mismatch ^= val; + + bit_buf <<= 1; + NEEDBITS (bit_buf, bits, bit_ptr); + + continue; + + } + + entry_2: + if (bit_buf >= 0x04000000) { + + tab = DCT_B14_8 + (UBITS (bit_buf, 8) - 4); + + i += tab->run; + if (i < 64) + goto normal_code; + + /* escape code */ + + i += UBITS (bit_buf << 6, 6) - 64; + if (i >= 64) + break; /* illegal, check needed to avoid buffer overflow */ + + j = scan[i]; + + DUMPBITS (bit_buf, bits, 12); + NEEDBITS (bit_buf, bits, bit_ptr); + val = 2 * (SBITS (bit_buf, 12) + SBITS (bit_buf, 1)) + 1; + val = (val * quant_matrix[j]) / 32; + + SATURATE (val); + dest[j] = val; + mismatch ^= val; + + DUMPBITS (bit_buf, bits, 12); + NEEDBITS (bit_buf, bits, bit_ptr); + + continue; + + } else if (bit_buf >= 0x02000000) { + tab = DCT_B14_10 + (UBITS (bit_buf, 10) - 8); + i += tab->run; + if (i < 64) + goto normal_code; + } else if (bit_buf >= 0x00800000) { + tab = DCT_13 + (UBITS (bit_buf, 13) - 16); + i += tab->run; + if (i < 64) + goto normal_code; + } else if (bit_buf >= 0x00200000) { + tab = DCT_15 + (UBITS (bit_buf, 15) - 16); + i += tab->run; + if (i < 64) + goto normal_code; + } else { + tab = DCT_16 + UBITS (bit_buf, 16); + bit_buf <<= 16; + GETWORD (bit_buf, bits + 16, bit_ptr); + i += tab->run; + if (i < 64) + goto normal_code; + } + break; /* illegal, check needed to avoid buffer overflow */ + } + dest[63] ^= mismatch & 16; + DUMPBITS (bit_buf, bits, tab->len); /* dump end of block code */ + decoder->bitstream_buf = bit_buf; + decoder->bitstream_bits = bits; + decoder->bitstream_ptr = bit_ptr; + return i; +} + +static void get_mpeg1_intra_block (mpeg2_decoder_t * const decoder) +{ + int i; + int j; + int val; + const uint8_t * const scan = decoder->scan; + const uint16_t * const quant_matrix = decoder->quantizer_matrix[0]; + const DCTtab * tab; + uint32_t bit_buf; + int bits; + const uint8_t * bit_ptr; + int16_t * const dest = decoder->DCTblock; + + i = 0; + + bit_buf = decoder->bitstream_buf; + bits = decoder->bitstream_bits; + bit_ptr = decoder->bitstream_ptr; + + NEEDBITS (bit_buf, bits, bit_ptr); + + while (1) { + if (bit_buf >= 0x28000000) { + + tab = DCT_B14AC_5 + (UBITS (bit_buf, 5) - 5); + + i += tab->run; + if (i >= 64) + break; /* end of block */ + + normal_code: + j = scan[i]; + bit_buf <<= tab->len; + bits += tab->len + 1; + val = (tab->level * quant_matrix[j]) >> 4; + + /* oddification */ + val = (val - 1) | 1; + + /* if (bitstream_get (1)) val = -val; */ + val = (val ^ SBITS (bit_buf, 1)) - SBITS (bit_buf, 1); + + SATURATE (val); + dest[j] = val; + + bit_buf <<= 1; + NEEDBITS (bit_buf, bits, bit_ptr); + + continue; + + } else if (bit_buf >= 0x04000000) { + + tab = DCT_B14_8 + (UBITS (bit_buf, 8) - 4); + + i += tab->run; + if (i < 64) + goto normal_code; + + /* escape code */ + + i += UBITS (bit_buf << 6, 6) - 64; + if (i >= 64) + break; /* illegal, check needed to avoid buffer overflow */ + + j = scan[i]; + + DUMPBITS (bit_buf, bits, 12); + NEEDBITS (bit_buf, bits, bit_ptr); + val = SBITS (bit_buf, 8); + if (! (val & 0x7f)) { + DUMPBITS (bit_buf, bits, 8); + val = UBITS (bit_buf, 8) + 2 * val; + } + val = (val * quant_matrix[j]) / 16; + + /* oddification */ + val = (val + ~SBITS (val, 1)) | 1; + + SATURATE (val); + dest[j] = val; + + DUMPBITS (bit_buf, bits, 8); + NEEDBITS (bit_buf, bits, bit_ptr); + + continue; + + } else if (bit_buf >= 0x02000000) { + tab = DCT_B14_10 + (UBITS (bit_buf, 10) - 8); + i += tab->run; + if (i < 64) + goto normal_code; + } else if (bit_buf >= 0x00800000) { + tab = DCT_13 + (UBITS (bit_buf, 13) - 16); + i += tab->run; + if (i < 64) + goto normal_code; + } else if (bit_buf >= 0x00200000) { + tab = DCT_15 + (UBITS (bit_buf, 15) - 16); + i += tab->run; + if (i < 64) + goto normal_code; + } else { + tab = DCT_16 + UBITS (bit_buf, 16); + bit_buf <<= 16; + GETWORD (bit_buf, bits + 16, bit_ptr); + i += tab->run; + if (i < 64) + goto normal_code; + } + break; /* illegal, check needed to avoid buffer overflow */ + } + DUMPBITS (bit_buf, bits, tab->len); /* dump end of block code */ + decoder->bitstream_buf = bit_buf; + decoder->bitstream_bits = bits; + decoder->bitstream_ptr = bit_ptr; +} + +static int get_mpeg1_non_intra_block (mpeg2_decoder_t * const decoder) +{ + int i; + int j; + int val; + const uint8_t * const scan = decoder->scan; + const uint16_t * const quant_matrix = decoder->quantizer_matrix[1]; + const DCTtab * tab; + uint32_t bit_buf; + int bits; + const uint8_t * bit_ptr; + int16_t * const dest = decoder->DCTblock; + + i = -1; + + bit_buf = decoder->bitstream_buf; + bits = decoder->bitstream_bits; + bit_ptr = decoder->bitstream_ptr; + + NEEDBITS (bit_buf, bits, bit_ptr); + if (bit_buf >= 0x28000000) { + tab = DCT_B14DC_5 + (UBITS (bit_buf, 5) - 5); + goto entry_1; + } else + goto entry_2; + + while (1) { + if (bit_buf >= 0x28000000) { + + tab = DCT_B14AC_5 + (UBITS (bit_buf, 5) - 5); + + entry_1: + i += tab->run; + if (i >= 64) + break; /* end of block */ + + normal_code: + j = scan[i]; + bit_buf <<= tab->len; + bits += tab->len + 1; + val = ((2 * tab->level + 1) * quant_matrix[j]) >> 5; + + /* oddification */ + val = (val - 1) | 1; + + /* if (bitstream_get (1)) val = -val; */ + val = (val ^ SBITS (bit_buf, 1)) - SBITS (bit_buf, 1); + + SATURATE (val); + dest[j] = val; + + bit_buf <<= 1; + NEEDBITS (bit_buf, bits, bit_ptr); + + continue; + + } + + entry_2: + if (bit_buf >= 0x04000000) { + + tab = DCT_B14_8 + (UBITS (bit_buf, 8) - 4); + + i += tab->run; + if (i < 64) + goto normal_code; + + /* escape code */ + + i += UBITS (bit_buf << 6, 6) - 64; + if (i >= 64) + break; /* illegal, check needed to avoid buffer overflow */ + + j = scan[i]; + + DUMPBITS (bit_buf, bits, 12); + NEEDBITS (bit_buf, bits, bit_ptr); + val = SBITS (bit_buf, 8); + if (! (val & 0x7f)) { + DUMPBITS (bit_buf, bits, 8); + val = UBITS (bit_buf, 8) + 2 * val; + } + val = 2 * (val + SBITS (val, 1)) + 1; + val = (val * quant_matrix[j]) / 32; + + /* oddification */ + val = (val + ~SBITS (val, 1)) | 1; + + SATURATE (val); + dest[j] = val; + + DUMPBITS (bit_buf, bits, 8); + NEEDBITS (bit_buf, bits, bit_ptr); + + continue; + + } else if (bit_buf >= 0x02000000) { + tab = DCT_B14_10 + (UBITS (bit_buf, 10) - 8); + i += tab->run; + if (i < 64) + goto normal_code; + } else if (bit_buf >= 0x00800000) { + tab = DCT_13 + (UBITS (bit_buf, 13) - 16); + i += tab->run; + if (i < 64) + goto normal_code; + } else if (bit_buf >= 0x00200000) { + tab = DCT_15 + (UBITS (bit_buf, 15) - 16); + i += tab->run; + if (i < 64) + goto normal_code; + } else { + tab = DCT_16 + UBITS (bit_buf, 16); + bit_buf <<= 16; + GETWORD (bit_buf, bits + 16, bit_ptr); + i += tab->run; + if (i < 64) + goto normal_code; + } + break; /* illegal, check needed to avoid buffer overflow */ + } + DUMPBITS (bit_buf, bits, tab->len); /* dump end of block code */ + decoder->bitstream_buf = bit_buf; + decoder->bitstream_bits = bits; + decoder->bitstream_ptr = bit_ptr; + return i; +} + +static inline void slice_intra_DCT (mpeg2_decoder_t * const decoder, + const int cc, + uint8_t * const dest, const int stride) +{ +#define bit_buf (decoder->bitstream_buf) +#define bits (decoder->bitstream_bits) +#define bit_ptr (decoder->bitstream_ptr) + NEEDBITS (bit_buf, bits, bit_ptr); + /* Get the intra DC coefficient and inverse quantize it */ + if (cc == 0) + decoder->DCTblock[0] = + decoder->dc_dct_pred[0] += get_luma_dc_dct_diff (decoder); + else + decoder->DCTblock[0] = + decoder->dc_dct_pred[cc] += get_chroma_dc_dct_diff (decoder); + + if (decoder->mpeg1) { + if (decoder->coding_type != D_TYPE) + get_mpeg1_intra_block (decoder); + } else if (decoder->intra_vlc_format) + get_intra_block_B15 (decoder, decoder->quantizer_matrix[cc ? 2 : 0]); + else + get_intra_block_B14 (decoder, decoder->quantizer_matrix[cc ? 2 : 0]); + mpeg2_idct_copy (decoder->DCTblock, dest, stride); +#undef bit_buf +#undef bits +#undef bit_ptr +} + +static inline void slice_non_intra_DCT (mpeg2_decoder_t * const decoder, + const int cc, + uint8_t * const dest, const int stride) +{ + int last; + + if (decoder->mpeg1) + last = get_mpeg1_non_intra_block (decoder); + else + last = get_non_intra_block (decoder, + decoder->quantizer_matrix[cc ? 3 : 1]); + mpeg2_idct_add (last, decoder->DCTblock, dest, stride); +} + +#define MOTION_420(table,ref,motion_x,motion_y,size,y) \ + pos_x = 2 * decoder->offset + motion_x; \ + pos_y = 2 * decoder->v_offset + motion_y + 2 * y; \ + if (unlikely (pos_x > decoder->limit_x)) { \ + pos_x = ((int)pos_x < 0) ? 0 : decoder->limit_x; \ + motion_x = pos_x - 2 * decoder->offset; \ + } \ + if (unlikely (pos_y > decoder->limit_y_ ## size)) { \ + pos_y = ((int)pos_y < 0) ? 0 : decoder->limit_y_ ## size; \ + motion_y = pos_y - 2 * decoder->v_offset - 2 * y; \ + } \ + xy_half = ((pos_y & 1) << 1) | (pos_x & 1); \ + table[xy_half] (decoder->dest[0] + y * decoder->stride + decoder->offset, \ + ref[0] + (pos_x >> 1) + (pos_y >> 1) * decoder->stride, \ + decoder->stride, size); \ + motion_x /= 2; motion_y /= 2; \ + xy_half = ((motion_y & 1) << 1) | (motion_x & 1); \ + offset = (((decoder->offset + motion_x) >> 1) + \ + ((((decoder->v_offset + motion_y) >> 1) + y/2) * \ + decoder->uv_stride)); \ + table[4+xy_half] (decoder->dest[1] + y/2 * decoder->uv_stride + \ + (decoder->offset >> 1), ref[1] + offset, \ + decoder->uv_stride, size/2); \ + table[4+xy_half] (decoder->dest[2] + y/2 * decoder->uv_stride + \ + (decoder->offset >> 1), ref[2] + offset, \ + decoder->uv_stride, size/2) + +#define MOTION_FIELD_420(table,ref,motion_x,motion_y,dest_field,op,src_field) \ + pos_x = 2 * decoder->offset + motion_x; \ + pos_y = decoder->v_offset + motion_y; \ + if (unlikely (pos_x > decoder->limit_x)) { \ + pos_x = ((int)pos_x < 0) ? 0 : decoder->limit_x; \ + motion_x = pos_x - 2 * decoder->offset; \ + } \ + if (unlikely (pos_y > decoder->limit_y)) { \ + pos_y = ((int)pos_y < 0) ? 0 : decoder->limit_y; \ + motion_y = pos_y - decoder->v_offset; \ + } \ + xy_half = ((pos_y & 1) << 1) | (pos_x & 1); \ + table[xy_half] (decoder->dest[0] + dest_field * decoder->stride + \ + decoder->offset, \ + (ref[0] + (pos_x >> 1) + \ + ((pos_y op) + src_field) * decoder->stride), \ + 2 * decoder->stride, 8); \ + motion_x /= 2; motion_y /= 2; \ + xy_half = ((motion_y & 1) << 1) | (motion_x & 1); \ + offset = (((decoder->offset + motion_x) >> 1) + \ + (((decoder->v_offset >> 1) + (motion_y op) + src_field) * \ + decoder->uv_stride)); \ + table[4+xy_half] (decoder->dest[1] + dest_field * decoder->uv_stride + \ + (decoder->offset >> 1), ref[1] + offset, \ + 2 * decoder->uv_stride, 4); \ + table[4+xy_half] (decoder->dest[2] + dest_field * decoder->uv_stride + \ + (decoder->offset >> 1), ref[2] + offset, \ + 2 * decoder->uv_stride, 4) + +#define MOTION_DMV_420(table,ref,motion_x,motion_y) \ + pos_x = 2 * decoder->offset + motion_x; \ + pos_y = decoder->v_offset + motion_y; \ + if (unlikely (pos_x > decoder->limit_x)) { \ + pos_x = ((int)pos_x < 0) ? 0 : decoder->limit_x; \ + motion_x = pos_x - 2 * decoder->offset; \ + } \ + if (unlikely (pos_y > decoder->limit_y)) { \ + pos_y = ((int)pos_y < 0) ? 0 : decoder->limit_y; \ + motion_y = pos_y - decoder->v_offset; \ + } \ + xy_half = ((pos_y & 1) << 1) | (pos_x & 1); \ + offset = (pos_x >> 1) + (pos_y & ~1) * decoder->stride; \ + table[xy_half] (decoder->dest[0] + decoder->offset, \ + ref[0] + offset, 2 * decoder->stride, 8); \ + table[xy_half] (decoder->dest[0] + decoder->stride + decoder->offset, \ + ref[0] + decoder->stride + offset, \ + 2 * decoder->stride, 8); \ + motion_x /= 2; motion_y /= 2; \ + xy_half = ((motion_y & 1) << 1) | (motion_x & 1); \ + offset = (((decoder->offset + motion_x) >> 1) + \ + (((decoder->v_offset >> 1) + (motion_y & ~1)) * \ + decoder->uv_stride)); \ + table[4+xy_half] (decoder->dest[1] + (decoder->offset >> 1), \ + ref[1] + offset, 2 * decoder->uv_stride, 4); \ + table[4+xy_half] (decoder->dest[1] + decoder->uv_stride + \ + (decoder->offset >> 1), \ + ref[1] + decoder->uv_stride + offset, \ + 2 * decoder->uv_stride, 4); \ + table[4+xy_half] (decoder->dest[2] + (decoder->offset >> 1), \ + ref[2] + offset, 2 * decoder->uv_stride, 4); \ + table[4+xy_half] (decoder->dest[2] + decoder->uv_stride + \ + (decoder->offset >> 1), \ + ref[2] + decoder->uv_stride + offset, \ + 2 * decoder->uv_stride, 4) + +#define MOTION_ZERO_420(table,ref) \ + table[0] (decoder->dest[0] + decoder->offset, \ + (ref[0] + decoder->offset + \ + decoder->v_offset * decoder->stride), decoder->stride, 16); \ + offset = ((decoder->offset >> 1) + \ + (decoder->v_offset >> 1) * decoder->uv_stride); \ + table[4] (decoder->dest[1] + (decoder->offset >> 1), \ + ref[1] + offset, decoder->uv_stride, 8); \ + table[4] (decoder->dest[2] + (decoder->offset >> 1), \ + ref[2] + offset, decoder->uv_stride, 8) + +#define MOTION_422(table,ref,motion_x,motion_y,size,y) \ + pos_x = 2 * decoder->offset + motion_x; \ + pos_y = 2 * decoder->v_offset + motion_y + 2 * y; \ + if (unlikely (pos_x > decoder->limit_x)) { \ + pos_x = ((int)pos_x < 0) ? 0 : decoder->limit_x; \ + motion_x = pos_x - 2 * decoder->offset; \ + } \ + if (unlikely (pos_y > decoder->limit_y_ ## size)) { \ + pos_y = ((int)pos_y < 0) ? 0 : decoder->limit_y_ ## size; \ + motion_y = pos_y - 2 * decoder->v_offset - 2 * y; \ + } \ + xy_half = ((pos_y & 1) << 1) | (pos_x & 1); \ + offset = (pos_x >> 1) + (pos_y >> 1) * decoder->stride; \ + table[xy_half] (decoder->dest[0] + y * decoder->stride + decoder->offset, \ + ref[0] + offset, decoder->stride, size); \ + offset = (offset + (motion_x & (motion_x < 0))) >> 1; \ + motion_x /= 2; \ + xy_half = ((pos_y & 1) << 1) | (motion_x & 1); \ + table[4+xy_half] (decoder->dest[1] + y * decoder->uv_stride + \ + (decoder->offset >> 1), ref[1] + offset, \ + decoder->uv_stride, size); \ + table[4+xy_half] (decoder->dest[2] + y * decoder->uv_stride + \ + (decoder->offset >> 1), ref[2] + offset, \ + decoder->uv_stride, size) + +#define MOTION_FIELD_422(table,ref,motion_x,motion_y,dest_field,op,src_field) \ + pos_x = 2 * decoder->offset + motion_x; \ + pos_y = decoder->v_offset + motion_y; \ + if (unlikely (pos_x > decoder->limit_x)) { \ + pos_x = ((int)pos_x < 0) ? 0 : decoder->limit_x; \ + motion_x = pos_x - 2 * decoder->offset; \ + } \ + if (unlikely (pos_y > decoder->limit_y)) { \ + pos_y = ((int)pos_y < 0) ? 0 : decoder->limit_y; \ + motion_y = pos_y - decoder->v_offset; \ + } \ + xy_half = ((pos_y & 1) << 1) | (pos_x & 1); \ + offset = (pos_x >> 1) + ((pos_y op) + src_field) * decoder->stride; \ + table[xy_half] (decoder->dest[0] + dest_field * decoder->stride + \ + decoder->offset, ref[0] + offset, \ + 2 * decoder->stride, 8); \ + offset = (offset + (motion_x & (motion_x < 0))) >> 1; \ + motion_x /= 2; \ + xy_half = ((pos_y & 1) << 1) | (motion_x & 1); \ + table[4+xy_half] (decoder->dest[1] + dest_field * decoder->uv_stride + \ + (decoder->offset >> 1), ref[1] + offset, \ + 2 * decoder->uv_stride, 8); \ + table[4+xy_half] (decoder->dest[2] + dest_field * decoder->uv_stride + \ + (decoder->offset >> 1), ref[2] + offset, \ + 2 * decoder->uv_stride, 8) + +#define MOTION_DMV_422(table,ref,motion_x,motion_y) \ + pos_x = 2 * decoder->offset + motion_x; \ + pos_y = decoder->v_offset + motion_y; \ + if (unlikely (pos_x > decoder->limit_x)) { \ + pos_x = ((int)pos_x < 0) ? 0 : decoder->limit_x; \ + motion_x = pos_x - 2 * decoder->offset; \ + } \ + if (unlikely (pos_y > decoder->limit_y)) { \ + pos_y = ((int)pos_y < 0) ? 0 : decoder->limit_y; \ + motion_y = pos_y - decoder->v_offset; \ + } \ + xy_half = ((pos_y & 1) << 1) | (pos_x & 1); \ + offset = (pos_x >> 1) + (pos_y & ~1) * decoder->stride; \ + table[xy_half] (decoder->dest[0] + decoder->offset, \ + ref[0] + offset, 2 * decoder->stride, 8); \ + table[xy_half] (decoder->dest[0] + decoder->stride + decoder->offset, \ + ref[0] + decoder->stride + offset, \ + 2 * decoder->stride, 8); \ + offset = (offset + (motion_x & (motion_x < 0))) >> 1; \ + motion_x /= 2; \ + xy_half = ((pos_y & 1) << 1) | (motion_x & 1); \ + table[4+xy_half] (decoder->dest[1] + (decoder->offset >> 1), \ + ref[1] + offset, 2 * decoder->uv_stride, 8); \ + table[4+xy_half] (decoder->dest[1] + decoder->uv_stride + \ + (decoder->offset >> 1), \ + ref[1] + decoder->uv_stride + offset, \ + 2 * decoder->uv_stride, 8); \ + table[4+xy_half] (decoder->dest[2] + (decoder->offset >> 1), \ + ref[2] + offset, 2 * decoder->uv_stride, 8); \ + table[4+xy_half] (decoder->dest[2] + decoder->uv_stride + \ + (decoder->offset >> 1), \ + ref[2] + decoder->uv_stride + offset, \ + 2 * decoder->uv_stride, 8) + +#define MOTION_ZERO_422(table,ref) \ + offset = decoder->offset + decoder->v_offset * decoder->stride; \ + table[0] (decoder->dest[0] + decoder->offset, \ + ref[0] + offset, decoder->stride, 16); \ + offset >>= 1; \ + table[4] (decoder->dest[1] + (decoder->offset >> 1), \ + ref[1] + offset, decoder->uv_stride, 16); \ + table[4] (decoder->dest[2] + (decoder->offset >> 1), \ + ref[2] + offset, decoder->uv_stride, 16) + +#define MOTION_444(table,ref,motion_x,motion_y,size,y) \ + pos_x = 2 * decoder->offset + motion_x; \ + pos_y = 2 * decoder->v_offset + motion_y + 2 * y; \ + if (unlikely (pos_x > decoder->limit_x)) { \ + pos_x = ((int)pos_x < 0) ? 0 : decoder->limit_x; \ + motion_x = pos_x - 2 * decoder->offset; \ + } \ + if (unlikely (pos_y > decoder->limit_y_ ## size)) { \ + pos_y = ((int)pos_y < 0) ? 0 : decoder->limit_y_ ## size; \ + motion_y = pos_y - 2 * decoder->v_offset - 2 * y; \ + } \ + xy_half = ((pos_y & 1) << 1) | (pos_x & 1); \ + offset = (pos_x >> 1) + (pos_y >> 1) * decoder->stride; \ + table[xy_half] (decoder->dest[0] + y * decoder->stride + decoder->offset, \ + ref[0] + offset, decoder->stride, size); \ + table[xy_half] (decoder->dest[1] + y * decoder->stride + decoder->offset, \ + ref[1] + offset, decoder->stride, size); \ + table[xy_half] (decoder->dest[2] + y * decoder->stride + decoder->offset, \ + ref[2] + offset, decoder->stride, size) + +#define MOTION_FIELD_444(table,ref,motion_x,motion_y,dest_field,op,src_field) \ + pos_x = 2 * decoder->offset + motion_x; \ + pos_y = decoder->v_offset + motion_y; \ + if (unlikely (pos_x > decoder->limit_x)) { \ + pos_x = ((int)pos_x < 0) ? 0 : decoder->limit_x; \ + motion_x = pos_x - 2 * decoder->offset; \ + } \ + if (unlikely (pos_y > decoder->limit_y)) { \ + pos_y = ((int)pos_y < 0) ? 0 : decoder->limit_y; \ + motion_y = pos_y - decoder->v_offset; \ + } \ + xy_half = ((pos_y & 1) << 1) | (pos_x & 1); \ + offset = (pos_x >> 1) + ((pos_y op) + src_field) * decoder->stride; \ + table[xy_half] (decoder->dest[0] + dest_field * decoder->stride + \ + decoder->offset, ref[0] + offset, \ + 2 * decoder->stride, 8); \ + table[xy_half] (decoder->dest[1] + dest_field * decoder->stride + \ + decoder->offset, ref[1] + offset, \ + 2 * decoder->stride, 8); \ + table[xy_half] (decoder->dest[2] + dest_field * decoder->stride + \ + decoder->offset, ref[2] + offset, \ + 2 * decoder->stride, 8) + +#define MOTION_DMV_444(table,ref,motion_x,motion_y) \ + pos_x = 2 * decoder->offset + motion_x; \ + pos_y = decoder->v_offset + motion_y; \ + if (unlikely (pos_x > decoder->limit_x)) { \ + pos_x = ((int)pos_x < 0) ? 0 : decoder->limit_x; \ + motion_x = pos_x - 2 * decoder->offset; \ + } \ + if (unlikely (pos_y > decoder->limit_y)) { \ + pos_y = ((int)pos_y < 0) ? 0 : decoder->limit_y; \ + motion_y = pos_y - decoder->v_offset; \ + } \ + xy_half = ((pos_y & 1) << 1) | (pos_x & 1); \ + offset = (pos_x >> 1) + (pos_y & ~1) * decoder->stride; \ + table[xy_half] (decoder->dest[0] + decoder->offset, \ + ref[0] + offset, 2 * decoder->stride, 8); \ + table[xy_half] (decoder->dest[0] + decoder->stride + decoder->offset, \ + ref[0] + decoder->stride + offset, \ + 2 * decoder->stride, 8); \ + table[xy_half] (decoder->dest[1] + decoder->offset, \ + ref[1] + offset, 2 * decoder->stride, 8); \ + table[xy_half] (decoder->dest[1] + decoder->stride + decoder->offset, \ + ref[1] + decoder->stride + offset, \ + 2 * decoder->stride, 8); \ + table[xy_half] (decoder->dest[2] + decoder->offset, \ + ref[2] + offset, 2 * decoder->stride, 8); \ + table[xy_half] (decoder->dest[2] + decoder->stride + decoder->offset, \ + ref[2] + decoder->stride + offset, \ + 2 * decoder->stride, 8) + +#define MOTION_ZERO_444(table,ref) \ + offset = decoder->offset + decoder->v_offset * decoder->stride; \ + table[0] (decoder->dest[0] + decoder->offset, \ + ref[0] + offset, decoder->stride, 16); \ + table[4] (decoder->dest[1] + decoder->offset, \ + ref[1] + offset, decoder->stride, 16); \ + table[4] (decoder->dest[2] + (decoder->offset >> 1), \ + ref[2] + offset, decoder->stride, 16) + +#define bit_buf (decoder->bitstream_buf) +#define bits (decoder->bitstream_bits) +#define bit_ptr (decoder->bitstream_ptr) + +static void motion_mp1 (mpeg2_decoder_t * const decoder, + motion_t * const motion, + mpeg2_mc_fct * const * const table) +{ + int motion_x, motion_y; + unsigned int pos_x, pos_y, xy_half, offset; + + NEEDBITS (bit_buf, bits, bit_ptr); + motion_x = (motion->pmv[0][0] + + (get_motion_delta (decoder, + motion->f_code[0]) << motion->f_code[1])); + motion_x = bound_motion_vector (motion_x, + motion->f_code[0] + motion->f_code[1]); + motion->pmv[0][0] = motion_x; + + NEEDBITS (bit_buf, bits, bit_ptr); + motion_y = (motion->pmv[0][1] + + (get_motion_delta (decoder, + motion->f_code[0]) << motion->f_code[1])); + motion_y = bound_motion_vector (motion_y, + motion->f_code[0] + motion->f_code[1]); + motion->pmv[0][1] = motion_y; + + MOTION_420 (table, motion->ref[0], motion_x, motion_y, 16, 0); +} + +#define MOTION_FUNCTIONS(FORMAT,MOTION,MOTION_FIELD,MOTION_DMV,MOTION_ZERO) \ + \ +static void motion_fr_frame_##FORMAT (mpeg2_decoder_t * const decoder, \ + motion_t * const motion, \ + mpeg2_mc_fct * const * const table) \ +{ \ + int motion_x, motion_y; \ + unsigned int pos_x, pos_y, xy_half, offset; \ + \ + NEEDBITS (bit_buf, bits, bit_ptr); \ + motion_x = motion->pmv[0][0] + get_motion_delta (decoder, \ + motion->f_code[0]); \ + motion_x = bound_motion_vector (motion_x, motion->f_code[0]); \ + motion->pmv[1][0] = motion->pmv[0][0] = motion_x; \ + \ + NEEDBITS (bit_buf, bits, bit_ptr); \ + motion_y = motion->pmv[0][1] + get_motion_delta (decoder, \ + motion->f_code[1]); \ + motion_y = bound_motion_vector (motion_y, motion->f_code[1]); \ + motion->pmv[1][1] = motion->pmv[0][1] = motion_y; \ + \ + MOTION (table, motion->ref[0], motion_x, motion_y, 16, 0); \ +} \ + \ +static void motion_fr_field_##FORMAT (mpeg2_decoder_t * const decoder, \ + motion_t * const motion, \ + mpeg2_mc_fct * const * const table) \ +{ \ + int motion_x, motion_y, field; \ + unsigned int pos_x, pos_y, xy_half, offset; \ + \ + NEEDBITS (bit_buf, bits, bit_ptr); \ + field = UBITS (bit_buf, 1); \ + DUMPBITS (bit_buf, bits, 1); \ + \ + motion_x = motion->pmv[0][0] + get_motion_delta (decoder, \ + motion->f_code[0]); \ + motion_x = bound_motion_vector (motion_x, motion->f_code[0]); \ + motion->pmv[0][0] = motion_x; \ + \ + NEEDBITS (bit_buf, bits, bit_ptr); \ + motion_y = ((motion->pmv[0][1] >> 1) + \ + get_motion_delta (decoder, motion->f_code[1])); \ + /* motion_y = bound_motion_vector (motion_y, motion->f_code[1]); */ \ + motion->pmv[0][1] = motion_y << 1; \ + \ + MOTION_FIELD (table, motion->ref[0], motion_x, motion_y, 0, & ~1, field); \ + \ + NEEDBITS (bit_buf, bits, bit_ptr); \ + field = UBITS (bit_buf, 1); \ + DUMPBITS (bit_buf, bits, 1); \ + \ + motion_x = motion->pmv[1][0] + get_motion_delta (decoder, \ + motion->f_code[0]); \ + motion_x = bound_motion_vector (motion_x, motion->f_code[0]); \ + motion->pmv[1][0] = motion_x; \ + \ + NEEDBITS (bit_buf, bits, bit_ptr); \ + motion_y = ((motion->pmv[1][1] >> 1) + \ + get_motion_delta (decoder, motion->f_code[1])); \ + /* motion_y = bound_motion_vector (motion_y, motion->f_code[1]); */ \ + motion->pmv[1][1] = motion_y << 1; \ + \ + MOTION_FIELD (table, motion->ref[0], motion_x, motion_y, 1, & ~1, field); \ +} \ + \ +static void motion_fr_dmv_##FORMAT (mpeg2_decoder_t * const decoder, \ + motion_t * const motion, \ + mpeg2_mc_fct * const * const table) \ +{ \ + int motion_x, motion_y, dmv_x, dmv_y, m, other_x, other_y; \ + unsigned int pos_x, pos_y, xy_half, offset; \ + \ + NEEDBITS (bit_buf, bits, bit_ptr); \ + motion_x = motion->pmv[0][0] + get_motion_delta (decoder, \ + motion->f_code[0]); \ + motion_x = bound_motion_vector (motion_x, motion->f_code[0]); \ + motion->pmv[1][0] = motion->pmv[0][0] = motion_x; \ + NEEDBITS (bit_buf, bits, bit_ptr); \ + dmv_x = get_dmv (decoder); \ + \ + motion_y = ((motion->pmv[0][1] >> 1) + \ + get_motion_delta (decoder, motion->f_code[1])); \ + /* motion_y = bound_motion_vector (motion_y, motion->f_code[1]); */ \ + motion->pmv[1][1] = motion->pmv[0][1] = motion_y << 1; \ + dmv_y = get_dmv (decoder); \ + \ + m = decoder->top_field_first ? 1 : 3; \ + other_x = ((motion_x * m + (motion_x > 0)) >> 1) + dmv_x; \ + other_y = ((motion_y * m + (motion_y > 0)) >> 1) + dmv_y - 1; \ + MOTION_FIELD (mpeg2_mc.put, motion->ref[0], other_x, other_y, 0, | 1, 0); \ + \ + m = decoder->top_field_first ? 3 : 1; \ + other_x = ((motion_x * m + (motion_x > 0)) >> 1) + dmv_x; \ + other_y = ((motion_y * m + (motion_y > 0)) >> 1) + dmv_y + 1; \ + MOTION_FIELD (mpeg2_mc.put, motion->ref[0], other_x, other_y, 1, & ~1, 0);\ + \ + MOTION_DMV (mpeg2_mc.avg, motion->ref[0], motion_x, motion_y); \ +} \ + \ +static void motion_reuse_##FORMAT (mpeg2_decoder_t * const decoder, \ + motion_t * const motion, \ + mpeg2_mc_fct * const * const table) \ +{ \ + int motion_x, motion_y; \ + unsigned int pos_x, pos_y, xy_half, offset; \ + \ + motion_x = motion->pmv[0][0]; \ + motion_y = motion->pmv[0][1]; \ + \ + MOTION (table, motion->ref[0], motion_x, motion_y, 16, 0); \ +} \ + \ +static void motion_zero_##FORMAT (mpeg2_decoder_t * const decoder, \ + motion_t * const motion, \ + mpeg2_mc_fct * const * const table) \ +{ \ + unsigned int offset; \ + \ + motion->pmv[0][0] = motion->pmv[0][1] = 0; \ + motion->pmv[1][0] = motion->pmv[1][1] = 0; \ + \ + MOTION_ZERO (table, motion->ref[0]); \ +} \ + \ +static void motion_fi_field_##FORMAT (mpeg2_decoder_t * const decoder, \ + motion_t * const motion, \ + mpeg2_mc_fct * const * const table) \ +{ \ + int motion_x, motion_y; \ + uint8_t ** ref_field; \ + unsigned int pos_x, pos_y, xy_half, offset; \ + \ + NEEDBITS (bit_buf, bits, bit_ptr); \ + ref_field = motion->ref2[UBITS (bit_buf, 1)]; \ + DUMPBITS (bit_buf, bits, 1); \ + \ + motion_x = motion->pmv[0][0] + get_motion_delta (decoder, \ + motion->f_code[0]); \ + motion_x = bound_motion_vector (motion_x, motion->f_code[0]); \ + motion->pmv[1][0] = motion->pmv[0][0] = motion_x; \ + \ + NEEDBITS (bit_buf, bits, bit_ptr); \ + motion_y = motion->pmv[0][1] + get_motion_delta (decoder, \ + motion->f_code[1]); \ + motion_y = bound_motion_vector (motion_y, motion->f_code[1]); \ + motion->pmv[1][1] = motion->pmv[0][1] = motion_y; \ + \ + MOTION (table, ref_field, motion_x, motion_y, 16, 0); \ +} \ + \ +static void motion_fi_16x8_##FORMAT (mpeg2_decoder_t * const decoder, \ + motion_t * const motion, \ + mpeg2_mc_fct * const * const table) \ +{ \ + int motion_x, motion_y; \ + uint8_t ** ref_field; \ + unsigned int pos_x, pos_y, xy_half, offset; \ + \ + NEEDBITS (bit_buf, bits, bit_ptr); \ + ref_field = motion->ref2[UBITS (bit_buf, 1)]; \ + DUMPBITS (bit_buf, bits, 1); \ + \ + motion_x = motion->pmv[0][0] + get_motion_delta (decoder, \ + motion->f_code[0]); \ + motion_x = bound_motion_vector (motion_x, motion->f_code[0]); \ + motion->pmv[0][0] = motion_x; \ + \ + NEEDBITS (bit_buf, bits, bit_ptr); \ + motion_y = motion->pmv[0][1] + get_motion_delta (decoder, \ + motion->f_code[1]); \ + motion_y = bound_motion_vector (motion_y, motion->f_code[1]); \ + motion->pmv[0][1] = motion_y; \ + \ + MOTION (table, ref_field, motion_x, motion_y, 8, 0); \ + \ + NEEDBITS (bit_buf, bits, bit_ptr); \ + ref_field = motion->ref2[UBITS (bit_buf, 1)]; \ + DUMPBITS (bit_buf, bits, 1); \ + \ + motion_x = motion->pmv[1][0] + get_motion_delta (decoder, \ + motion->f_code[0]); \ + motion_x = bound_motion_vector (motion_x, motion->f_code[0]); \ + motion->pmv[1][0] = motion_x; \ + \ + NEEDBITS (bit_buf, bits, bit_ptr); \ + motion_y = motion->pmv[1][1] + get_motion_delta (decoder, \ + motion->f_code[1]); \ + motion_y = bound_motion_vector (motion_y, motion->f_code[1]); \ + motion->pmv[1][1] = motion_y; \ + \ + MOTION (table, ref_field, motion_x, motion_y, 8, 8); \ +} \ + \ +static void motion_fi_dmv_##FORMAT (mpeg2_decoder_t * const decoder, \ + motion_t * const motion, \ + mpeg2_mc_fct * const * const table) \ +{ \ + int motion_x, motion_y, other_x, other_y; \ + unsigned int pos_x, pos_y, xy_half, offset; \ + \ + NEEDBITS (bit_buf, bits, bit_ptr); \ + motion_x = motion->pmv[0][0] + get_motion_delta (decoder, \ + motion->f_code[0]); \ + motion_x = bound_motion_vector (motion_x, motion->f_code[0]); \ + motion->pmv[1][0] = motion->pmv[0][0] = motion_x; \ + NEEDBITS (bit_buf, bits, bit_ptr); \ + other_x = ((motion_x + (motion_x > 0)) >> 1) + get_dmv (decoder); \ + \ + motion_y = motion->pmv[0][1] + get_motion_delta (decoder, \ + motion->f_code[1]); \ + motion_y = bound_motion_vector (motion_y, motion->f_code[1]); \ + motion->pmv[1][1] = motion->pmv[0][1] = motion_y; \ + other_y = (((motion_y + (motion_y > 0)) >> 1) + get_dmv (decoder) + \ + decoder->dmv_offset); \ + \ + MOTION (mpeg2_mc.put, motion->ref[0], motion_x, motion_y, 16, 0); \ + MOTION (mpeg2_mc.avg, motion->ref[1], other_x, other_y, 16, 0); \ +} \ + +MOTION_FUNCTIONS (420, MOTION_420, MOTION_FIELD_420, MOTION_DMV_420, + MOTION_ZERO_420) +MOTION_FUNCTIONS (422, MOTION_422, MOTION_FIELD_422, MOTION_DMV_422, + MOTION_ZERO_422) +MOTION_FUNCTIONS (444, MOTION_444, MOTION_FIELD_444, MOTION_DMV_444, + MOTION_ZERO_444) + +/* like motion_frame, but parsing without actual motion compensation */ +static void motion_fr_conceal (mpeg2_decoder_t * const decoder) +{ + int tmp; + + NEEDBITS (bit_buf, bits, bit_ptr); + tmp = (decoder->f_motion.pmv[0][0] + + get_motion_delta (decoder, decoder->f_motion.f_code[0])); + tmp = bound_motion_vector (tmp, decoder->f_motion.f_code[0]); + decoder->f_motion.pmv[1][0] = decoder->f_motion.pmv[0][0] = tmp; + + NEEDBITS (bit_buf, bits, bit_ptr); + tmp = (decoder->f_motion.pmv[0][1] + + get_motion_delta (decoder, decoder->f_motion.f_code[1])); + tmp = bound_motion_vector (tmp, decoder->f_motion.f_code[1]); + decoder->f_motion.pmv[1][1] = decoder->f_motion.pmv[0][1] = tmp; + + DUMPBITS (bit_buf, bits, 1); /* remove marker_bit */ +} + +static void motion_fi_conceal (mpeg2_decoder_t * const decoder) +{ + int tmp; + + NEEDBITS (bit_buf, bits, bit_ptr); + DUMPBITS (bit_buf, bits, 1); /* remove field_select */ + + tmp = (decoder->f_motion.pmv[0][0] + + get_motion_delta (decoder, decoder->f_motion.f_code[0])); + tmp = bound_motion_vector (tmp, decoder->f_motion.f_code[0]); + decoder->f_motion.pmv[1][0] = decoder->f_motion.pmv[0][0] = tmp; + + NEEDBITS (bit_buf, bits, bit_ptr); + tmp = (decoder->f_motion.pmv[0][1] + + get_motion_delta (decoder, decoder->f_motion.f_code[1])); + tmp = bound_motion_vector (tmp, decoder->f_motion.f_code[1]); + decoder->f_motion.pmv[1][1] = decoder->f_motion.pmv[0][1] = tmp; + + DUMPBITS (bit_buf, bits, 1); /* remove marker_bit */ +} + +#undef bit_buf +#undef bits +#undef bit_ptr + +#define MOTION_CALL(routine,direction) \ +do { \ + if ((direction) & MACROBLOCK_MOTION_FORWARD) \ + routine (decoder, &(decoder->f_motion), mpeg2_mc.put); \ + if ((direction) & MACROBLOCK_MOTION_BACKWARD) \ + routine (decoder, &(decoder->b_motion), \ + ((direction) & MACROBLOCK_MOTION_FORWARD ? \ + mpeg2_mc.avg : mpeg2_mc.put)); \ +} while (0) + +#define NEXT_MACROBLOCK \ +do { \ + decoder->offset += 16; \ + if (decoder->offset == decoder->width) { \ + do { /* just so we can use the break statement */ \ + if (decoder->convert) { \ + decoder->convert (decoder->convert_id, decoder->dest, \ + decoder->v_offset); \ + if (decoder->coding_type == B_TYPE) \ + break; \ + } \ + decoder->dest[0] += decoder->slice_stride; \ + decoder->dest[1] += decoder->slice_uv_stride; \ + decoder->dest[2] += decoder->slice_uv_stride; \ + } while (0); \ + decoder->v_offset += 16; \ + if (decoder->v_offset > decoder->limit_y) { \ + if (mpeg2_cpu_state_restore) \ + mpeg2_cpu_state_restore (&cpu_state); \ + return; \ + } \ + decoder->offset = 0; \ + } \ +} while (0) + +void mpeg2_init_fbuf (mpeg2_decoder_t * decoder, uint8_t * current_fbuf[3], + uint8_t * forward_fbuf[3], uint8_t * backward_fbuf[3]) +{ + int offset, stride, height, bottom_field; + + stride = decoder->stride_frame; + bottom_field = (decoder->picture_structure == BOTTOM_FIELD); + offset = bottom_field ? stride : 0; + height = decoder->height; + + decoder->picture_dest[0] = current_fbuf[0] + offset; + decoder->picture_dest[1] = current_fbuf[1] + (offset >> 1); + decoder->picture_dest[2] = current_fbuf[2] + (offset >> 1); + + decoder->f_motion.ref[0][0] = forward_fbuf[0] + offset; + decoder->f_motion.ref[0][1] = forward_fbuf[1] + (offset >> 1); + decoder->f_motion.ref[0][2] = forward_fbuf[2] + (offset >> 1); + + decoder->b_motion.ref[0][0] = backward_fbuf[0] + offset; + decoder->b_motion.ref[0][1] = backward_fbuf[1] + (offset >> 1); + decoder->b_motion.ref[0][2] = backward_fbuf[2] + (offset >> 1); + + if (decoder->picture_structure != FRAME_PICTURE) { + decoder->dmv_offset = bottom_field ? 1 : -1; + decoder->f_motion.ref2[0] = decoder->f_motion.ref[bottom_field]; + decoder->f_motion.ref2[1] = decoder->f_motion.ref[!bottom_field]; + decoder->b_motion.ref2[0] = decoder->b_motion.ref[bottom_field]; + decoder->b_motion.ref2[1] = decoder->b_motion.ref[!bottom_field]; + offset = stride - offset; + + if (decoder->second_field && (decoder->coding_type != B_TYPE)) + forward_fbuf = current_fbuf; + + decoder->f_motion.ref[1][0] = forward_fbuf[0] + offset; + decoder->f_motion.ref[1][1] = forward_fbuf[1] + (offset >> 1); + decoder->f_motion.ref[1][2] = forward_fbuf[2] + (offset >> 1); + + decoder->b_motion.ref[1][0] = backward_fbuf[0] + offset; + decoder->b_motion.ref[1][1] = backward_fbuf[1] + (offset >> 1); + decoder->b_motion.ref[1][2] = backward_fbuf[2] + (offset >> 1); + + stride <<= 1; + height >>= 1; + } + + decoder->stride = stride; + decoder->uv_stride = stride >> 1; + decoder->slice_stride = 16 * stride; + decoder->slice_uv_stride = + decoder->slice_stride >> (2 - decoder->chroma_format); + decoder->limit_x = 2 * decoder->width - 32; + decoder->limit_y_16 = 2 * height - 32; + decoder->limit_y_8 = 2 * height - 16; + decoder->limit_y = height - 16; + + if (decoder->mpeg1) { + decoder->motion_parser[0] = motion_zero_420; + decoder->motion_parser[MC_FRAME] = motion_mp1; + decoder->motion_parser[4] = motion_reuse_420; + } else if (decoder->picture_structure == FRAME_PICTURE) { + if (decoder->chroma_format == 0) { + decoder->motion_parser[0] = motion_zero_420; + decoder->motion_parser[MC_FIELD] = motion_fr_field_420; + decoder->motion_parser[MC_FRAME] = motion_fr_frame_420; + decoder->motion_parser[MC_DMV] = motion_fr_dmv_420; + decoder->motion_parser[4] = motion_reuse_420; + } else if (decoder->chroma_format == 1) { + decoder->motion_parser[0] = motion_zero_422; + decoder->motion_parser[MC_FIELD] = motion_fr_field_422; + decoder->motion_parser[MC_FRAME] = motion_fr_frame_422; + decoder->motion_parser[MC_DMV] = motion_fr_dmv_422; + decoder->motion_parser[4] = motion_reuse_422; + } else { + decoder->motion_parser[0] = motion_zero_444; + decoder->motion_parser[MC_FIELD] = motion_fr_field_444; + decoder->motion_parser[MC_FRAME] = motion_fr_frame_444; + decoder->motion_parser[MC_DMV] = motion_fr_dmv_444; + decoder->motion_parser[4] = motion_reuse_444; + } + } else { + if (decoder->chroma_format == 0) { + decoder->motion_parser[0] = motion_zero_420; + decoder->motion_parser[MC_FIELD] = motion_fi_field_420; + decoder->motion_parser[MC_16X8] = motion_fi_16x8_420; + decoder->motion_parser[MC_DMV] = motion_fi_dmv_420; + decoder->motion_parser[4] = motion_reuse_420; + } else if (decoder->chroma_format == 1) { + decoder->motion_parser[0] = motion_zero_422; + decoder->motion_parser[MC_FIELD] = motion_fi_field_422; + decoder->motion_parser[MC_16X8] = motion_fi_16x8_422; + decoder->motion_parser[MC_DMV] = motion_fi_dmv_422; + decoder->motion_parser[4] = motion_reuse_422; + } else { + decoder->motion_parser[0] = motion_zero_444; + decoder->motion_parser[MC_FIELD] = motion_fi_field_444; + decoder->motion_parser[MC_16X8] = motion_fi_16x8_444; + decoder->motion_parser[MC_DMV] = motion_fi_dmv_444; + decoder->motion_parser[4] = motion_reuse_444; + } + } +} + +static inline int slice_init (mpeg2_decoder_t * const decoder, int code) +{ +#define bit_buf (decoder->bitstream_buf) +#define bits (decoder->bitstream_bits) +#define bit_ptr (decoder->bitstream_ptr) + int offset; + const MBAtab * mba; + + decoder->dc_dct_pred[0] = decoder->dc_dct_pred[1] = + decoder->dc_dct_pred[2] = 16384; + + decoder->f_motion.pmv[0][0] = decoder->f_motion.pmv[0][1] = 0; + decoder->f_motion.pmv[1][0] = decoder->f_motion.pmv[1][1] = 0; + decoder->b_motion.pmv[0][0] = decoder->b_motion.pmv[0][1] = 0; + decoder->b_motion.pmv[1][0] = decoder->b_motion.pmv[1][1] = 0; + + if (decoder->vertical_position_extension) { + code += UBITS (bit_buf, 3) << 7; + DUMPBITS (bit_buf, bits, 3); + } + decoder->v_offset = (code - 1) * 16; + offset = 0; + if (!(decoder->convert) || decoder->coding_type != B_TYPE) + offset = (code - 1) * decoder->slice_stride; + + decoder->dest[0] = decoder->picture_dest[0] + offset; + offset >>= (2 - decoder->chroma_format); + decoder->dest[1] = decoder->picture_dest[1] + offset; + decoder->dest[2] = decoder->picture_dest[2] + offset; + + get_quantizer_scale (decoder); + + /* ignore intra_slice and all the extra data */ + while (bit_buf & 0x80000000) { + DUMPBITS (bit_buf, bits, 9); + NEEDBITS (bit_buf, bits, bit_ptr); + } + + /* decode initial macroblock address increment */ + offset = 0; + while (1) { + if (bit_buf >= 0x08000000) { + mba = MBA_5 + (UBITS (bit_buf, 6) - 2); + break; + } else if (bit_buf >= 0x01800000) { + mba = MBA_11 + (UBITS (bit_buf, 12) - 24); + break; + } else switch (UBITS (bit_buf, 12)) { + case 8: /* macroblock_escape */ + offset += 33; + DUMPBITS (bit_buf, bits, 11); + NEEDBITS (bit_buf, bits, bit_ptr); + continue; + case 15: /* macroblock_stuffing (MPEG1 only) */ + bit_buf &= 0xfffff; + DUMPBITS (bit_buf, bits, 11); + NEEDBITS (bit_buf, bits, bit_ptr); + continue; + default: /* error */ + return 1; + } + } + DUMPBITS (bit_buf, bits, mba->len + 1); + decoder->offset = (offset + mba->mba) << 4; + + while (decoder->offset - decoder->width >= 0) { + decoder->offset -= decoder->width; + if (!(decoder->convert) || decoder->coding_type != B_TYPE) { + decoder->dest[0] += decoder->slice_stride; + decoder->dest[1] += decoder->slice_uv_stride; + decoder->dest[2] += decoder->slice_uv_stride; + } + decoder->v_offset += 16; + } + if (decoder->v_offset > decoder->limit_y) + return 1; + + return 0; +#undef bit_buf +#undef bits +#undef bit_ptr +} + +void mpeg2_slice (mpeg2_decoder_t * const decoder, const int code, + const uint8_t * const buffer) +{ +#define bit_buf (decoder->bitstream_buf) +#define bits (decoder->bitstream_bits) +#define bit_ptr (decoder->bitstream_ptr) + cpu_state_t cpu_state; + + bitstream_init (decoder, buffer); + + if (slice_init (decoder, code)) + return; + + if (mpeg2_cpu_state_save) + mpeg2_cpu_state_save (&cpu_state); + + while (1) { + int macroblock_modes; + int mba_inc; + const MBAtab * mba; + + NEEDBITS (bit_buf, bits, bit_ptr); + + macroblock_modes = get_macroblock_modes (decoder); + + /* maybe integrate MACROBLOCK_QUANT test into get_macroblock_modes ? */ + if (macroblock_modes & MACROBLOCK_QUANT) + get_quantizer_scale (decoder); + + if (macroblock_modes & MACROBLOCK_INTRA) { + + int DCT_offset, DCT_stride; + int offset; + uint8_t * dest_y; + + if (decoder->concealment_motion_vectors) { + if (decoder->picture_structure == FRAME_PICTURE) + motion_fr_conceal (decoder); + else + motion_fi_conceal (decoder); + } else { + decoder->f_motion.pmv[0][0] = decoder->f_motion.pmv[0][1] = 0; + decoder->f_motion.pmv[1][0] = decoder->f_motion.pmv[1][1] = 0; + decoder->b_motion.pmv[0][0] = decoder->b_motion.pmv[0][1] = 0; + decoder->b_motion.pmv[1][0] = decoder->b_motion.pmv[1][1] = 0; + } + + if (macroblock_modes & DCT_TYPE_INTERLACED) { + DCT_offset = decoder->stride; + DCT_stride = decoder->stride * 2; + } else { + DCT_offset = decoder->stride * 8; + DCT_stride = decoder->stride; + } + + offset = decoder->offset; + dest_y = decoder->dest[0] + offset; + slice_intra_DCT (decoder, 0, dest_y, DCT_stride); + slice_intra_DCT (decoder, 0, dest_y + 8, DCT_stride); + slice_intra_DCT (decoder, 0, dest_y + DCT_offset, DCT_stride); + slice_intra_DCT (decoder, 0, dest_y + DCT_offset + 8, DCT_stride); + if (likely (decoder->chroma_format == 0)) { + slice_intra_DCT (decoder, 1, decoder->dest[1] + (offset >> 1), + decoder->uv_stride); + slice_intra_DCT (decoder, 2, decoder->dest[2] + (offset >> 1), + decoder->uv_stride); + if (decoder->coding_type == D_TYPE) { + NEEDBITS (bit_buf, bits, bit_ptr); + DUMPBITS (bit_buf, bits, 1); + } + } else if (likely (decoder->chroma_format == 1)) { + uint8_t * dest_u = decoder->dest[1] + (offset >> 1); + uint8_t * dest_v = decoder->dest[2] + (offset >> 1); + DCT_stride >>= 1; + DCT_offset >>= 1; + slice_intra_DCT (decoder, 1, dest_u, DCT_stride); + slice_intra_DCT (decoder, 2, dest_v, DCT_stride); + slice_intra_DCT (decoder, 1, dest_u + DCT_offset, DCT_stride); + slice_intra_DCT (decoder, 2, dest_v + DCT_offset, DCT_stride); + } else { + uint8_t * dest_u = decoder->dest[1] + offset; + uint8_t * dest_v = decoder->dest[2] + offset; + slice_intra_DCT (decoder, 1, dest_u, DCT_stride); + slice_intra_DCT (decoder, 2, dest_v, DCT_stride); + slice_intra_DCT (decoder, 1, dest_u + DCT_offset, DCT_stride); + slice_intra_DCT (decoder, 2, dest_v + DCT_offset, DCT_stride); + slice_intra_DCT (decoder, 1, dest_u + 8, DCT_stride); + slice_intra_DCT (decoder, 2, dest_v + 8, DCT_stride); + slice_intra_DCT (decoder, 1, dest_u + DCT_offset + 8, + DCT_stride); + slice_intra_DCT (decoder, 2, dest_v + DCT_offset + 8, + DCT_stride); + } + } else { + + motion_parser_t * parser; + + parser = + decoder->motion_parser[macroblock_modes >> MOTION_TYPE_SHIFT]; + MOTION_CALL (parser, macroblock_modes); + + if (macroblock_modes & MACROBLOCK_PATTERN) { + int coded_block_pattern; + int DCT_offset, DCT_stride; + + if (macroblock_modes & DCT_TYPE_INTERLACED) { + DCT_offset = decoder->stride; + DCT_stride = decoder->stride * 2; + } else { + DCT_offset = decoder->stride * 8; + DCT_stride = decoder->stride; + } + + coded_block_pattern = get_coded_block_pattern (decoder); + + if (likely (decoder->chroma_format == 0)) { + int offset = decoder->offset; + uint8_t * dest_y = decoder->dest[0] + offset; + if (coded_block_pattern & 1) + slice_non_intra_DCT (decoder, 0, dest_y, DCT_stride); + if (coded_block_pattern & 2) + slice_non_intra_DCT (decoder, 0, dest_y + 8, + DCT_stride); + if (coded_block_pattern & 4) + slice_non_intra_DCT (decoder, 0, dest_y + DCT_offset, + DCT_stride); + if (coded_block_pattern & 8) + slice_non_intra_DCT (decoder, 0, + dest_y + DCT_offset + 8, + DCT_stride); + if (coded_block_pattern & 16) + slice_non_intra_DCT (decoder, 1, + decoder->dest[1] + (offset >> 1), + decoder->uv_stride); + if (coded_block_pattern & 32) + slice_non_intra_DCT (decoder, 2, + decoder->dest[2] + (offset >> 1), + decoder->uv_stride); + } else if (likely (decoder->chroma_format == 1)) { + int offset; + uint8_t * dest_y; + + coded_block_pattern |= bit_buf & (3 << 30); + DUMPBITS (bit_buf, bits, 2); + + offset = decoder->offset; + dest_y = decoder->dest[0] + offset; + if (coded_block_pattern & 1) + slice_non_intra_DCT (decoder, 0, dest_y, DCT_stride); + if (coded_block_pattern & 2) + slice_non_intra_DCT (decoder, 0, dest_y + 8, + DCT_stride); + if (coded_block_pattern & 4) + slice_non_intra_DCT (decoder, 0, dest_y + DCT_offset, + DCT_stride); + if (coded_block_pattern & 8) + slice_non_intra_DCT (decoder, 0, + dest_y + DCT_offset + 8, + DCT_stride); + + DCT_stride >>= 1; + DCT_offset = (DCT_offset + offset) >> 1; + if (coded_block_pattern & 16) + slice_non_intra_DCT (decoder, 1, + decoder->dest[1] + (offset >> 1), + DCT_stride); + if (coded_block_pattern & 32) + slice_non_intra_DCT (decoder, 2, + decoder->dest[2] + (offset >> 1), + DCT_stride); + if (coded_block_pattern & (2 << 30)) + slice_non_intra_DCT (decoder, 1, + decoder->dest[1] + DCT_offset, + DCT_stride); + if (coded_block_pattern & (1 << 30)) + slice_non_intra_DCT (decoder, 2, + decoder->dest[2] + DCT_offset, + DCT_stride); + } else { + int offset; + uint8_t * dest_y, * dest_u, * dest_v; + + coded_block_pattern |= bit_buf & (63 << 26); + DUMPBITS (bit_buf, bits, 6); + + offset = decoder->offset; + dest_y = decoder->dest[0] + offset; + dest_u = decoder->dest[1] + offset; + dest_v = decoder->dest[2] + offset; + + if (coded_block_pattern & 1) + slice_non_intra_DCT (decoder, 0, dest_y, DCT_stride); + if (coded_block_pattern & 2) + slice_non_intra_DCT (decoder, 0, dest_y + 8, + DCT_stride); + if (coded_block_pattern & 4) + slice_non_intra_DCT (decoder, 0, dest_y + DCT_offset, + DCT_stride); + if (coded_block_pattern & 8) + slice_non_intra_DCT (decoder, 0, + dest_y + DCT_offset + 8, + DCT_stride); + + if (coded_block_pattern & 16) + slice_non_intra_DCT (decoder, 1, dest_u, DCT_stride); + if (coded_block_pattern & 32) + slice_non_intra_DCT (decoder, 2, dest_v, DCT_stride); + if (coded_block_pattern & (32 << 26)) + slice_non_intra_DCT (decoder, 1, dest_u + DCT_offset, + DCT_stride); + if (coded_block_pattern & (16 << 26)) + slice_non_intra_DCT (decoder, 2, dest_v + DCT_offset, + DCT_stride); + if (coded_block_pattern & (8 << 26)) + slice_non_intra_DCT (decoder, 1, dest_u + 8, + DCT_stride); + if (coded_block_pattern & (4 << 26)) + slice_non_intra_DCT (decoder, 2, dest_v + 8, + DCT_stride); + if (coded_block_pattern & (2 << 26)) + slice_non_intra_DCT (decoder, 1, + dest_u + DCT_offset + 8, + DCT_stride); + if (coded_block_pattern & (1 << 26)) + slice_non_intra_DCT (decoder, 2, + dest_v + DCT_offset + 8, + DCT_stride); + } + } + + decoder->dc_dct_pred[0] = decoder->dc_dct_pred[1] = + decoder->dc_dct_pred[2] = 16384; + } + + NEXT_MACROBLOCK; + + NEEDBITS (bit_buf, bits, bit_ptr); + mba_inc = 0; + while (1) { + if (bit_buf >= 0x10000000) { + mba = MBA_5 + (UBITS (bit_buf, 5) - 2); + break; + } else if (bit_buf >= 0x03000000) { + mba = MBA_11 + (UBITS (bit_buf, 11) - 24); + break; + } else switch (UBITS (bit_buf, 11)) { + case 8: /* macroblock_escape */ + mba_inc += 33; + /* pass through */ + case 15: /* macroblock_stuffing (MPEG1 only) */ + DUMPBITS (bit_buf, bits, 11); + NEEDBITS (bit_buf, bits, bit_ptr); + continue; + default: /* end of slice, or error */ + if (mpeg2_cpu_state_restore) + mpeg2_cpu_state_restore (&cpu_state); + return; + } + } + DUMPBITS (bit_buf, bits, mba->len); + mba_inc += mba->mba; + + if (mba_inc) { + decoder->dc_dct_pred[0] = decoder->dc_dct_pred[1] = + decoder->dc_dct_pred[2] = 16384; + + if (decoder->coding_type == P_TYPE) { + do { + MOTION_CALL (decoder->motion_parser[0], + MACROBLOCK_MOTION_FORWARD); + NEXT_MACROBLOCK; + } while (--mba_inc); + } else { + do { + MOTION_CALL (decoder->motion_parser[4], macroblock_modes); + NEXT_MACROBLOCK; + } while (--mba_inc); + } + } + } +#undef bit_buf +#undef bits +#undef bit_ptr +} diff --git a/src/video_dec/libmpeg2new/libmpeg2/uyvy.c b/src/video_dec/libmpeg2new/libmpeg2/uyvy.c new file mode 100644 index 000000000..7f107ffad --- /dev/null +++ b/src/video_dec/libmpeg2new/libmpeg2/uyvy.c @@ -0,0 +1,123 @@ +/* + * uyvy.c + * Copyright (C) 2000-2003 Michel Lespinasse <walken@zoy.org> + * Copyright (C) 2003 Regis Duchesne <hpreg@zoy.org> + * Copyright (C) 1999-2000 Aaron Holtzman <aholtzma@ess.engr.uvic.ca> + * + * This file is part of mpeg2dec, a free MPEG-2 video stream decoder. + * See http://libmpeg2.sourceforge.net/ for updates. + * + * mpeg2dec is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * mpeg2dec is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +#include "config.h" + +#include <inttypes.h> + +#include "mpeg2.h" +#include "mpeg2convert.h" + +typedef struct { + int width; + int stride; + int chroma420; + uint8_t * out; +} convert_uyvy_t; + +static void uyvy_start (void * _id, const mpeg2_fbuf_t * fbuf, + const mpeg2_picture_t * picture, + const mpeg2_gop_t * gop) +{ + convert_uyvy_t * instance = (convert_uyvy_t *) _id; + + instance->out = fbuf->buf[0]; + instance->stride = instance->width; + if (picture->nb_fields == 1) { + if (! (picture->flags & PIC_FLAG_TOP_FIELD_FIRST)) + instance->out += 2 * instance->stride; + instance->stride <<= 1; + } +} + +#ifdef WORDS_BIGENDIAN +#define PACK(a,b,c,d) (((a) << 24) | ((b) << 16) | ((c) << 8) | (d)) +#else +#define PACK(a,b,c,d) (((d) << 24) | ((c) << 16) | ((b) << 8) | (a)) +#endif + +static void uyvy_copy (void * const _id, uint8_t * const * src, + const unsigned int v_offset) +{ + const convert_uyvy_t * const id = (convert_uyvy_t *) _id; + uint8_t * _dst; + uint8_t * py, * pu, * pv; + int i, j; + + _dst = id->out + 2 * id->stride * v_offset; + py = src[0]; pu = src[1]; pv = src[2]; + + i = 16; + do { + uint32_t * dst = (uint32_t *) _dst; + + j = id->width >> 4; + do { + dst[0] = PACK (pu[0], py[0], pv[0], py[1]); + dst[1] = PACK (pu[1], py[2], pv[1], py[3]); + dst[2] = PACK (pu[2], py[4], pv[2], py[5]); + dst[3] = PACK (pu[3], py[6], pv[3], py[7]); + dst[4] = PACK (pu[4], py[8], pv[4], py[9]); + dst[5] = PACK (pu[5], py[10], pv[5], py[11]); + dst[6] = PACK (pu[6], py[12], pv[6], py[13]); + dst[7] = PACK (pu[7], py[14], pv[7], py[15]); + py += 16; + pu += 8; + pv += 8; + dst += 8; + } while (--j); + py -= id->width; + pu -= id->width >> 1; + pv -= id->width >> 1; + _dst += 2 * id->stride; + py += id->stride; + if (! (--i & id->chroma420)) { + pu += id->stride >> 1; + pv += id->stride >> 1; + } + } while (i); +} + +int mpeg2convert_uyvy (int stage, void * _id, const mpeg2_sequence_t * seq, + int stride, uint32_t accel, void * arg, + mpeg2_convert_init_t * result) +{ + convert_uyvy_t * instance = (convert_uyvy_t *) _id; + + if (seq->chroma_width == seq->width) + return 1; + + if (instance) { + instance->width = seq->width; + instance->chroma420 = (seq->chroma_height < seq->height); + result->buf_size[0] = seq->width * seq->height * 2; + result->buf_size[1] = result->buf_size[2] = 0; + result->start = uyvy_start; + result->copy = uyvy_copy; + } else { + result->id_size = sizeof (convert_uyvy_t); + } + + return 0; +} diff --git a/src/video_dec/libmpeg2new/libmpeg2/vlc.h b/src/video_dec/libmpeg2new/libmpeg2/vlc.h new file mode 100644 index 000000000..57448ce04 --- /dev/null +++ b/src/video_dec/libmpeg2new/libmpeg2/vlc.h @@ -0,0 +1,429 @@ +/* + * vlc.h + * Copyright (C) 2000-2003 Michel Lespinasse <walken@zoy.org> + * Copyright (C) 1999-2000 Aaron Holtzman <aholtzma@ess.engr.uvic.ca> + * + * This file is part of mpeg2dec, a free MPEG-2 video stream decoder. + * See http://libmpeg2.sourceforge.net/ for updates. + * + * mpeg2dec is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * mpeg2dec is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +#define GETWORD(bit_buf,shift,bit_ptr) \ +do { \ + bit_buf |= ((bit_ptr[0] << 8) | bit_ptr[1]) << (shift); \ + bit_ptr += 2; \ +} while (0) + +static inline void bitstream_init (mpeg2_decoder_t * decoder, + const uint8_t * start) +{ + decoder->bitstream_buf = + (start[0] << 24) | (start[1] << 16) | (start[2] << 8) | start[3]; + decoder->bitstream_ptr = start + 4; + decoder->bitstream_bits = -16; +} + +/* make sure that there are at least 16 valid bits in bit_buf */ +#define NEEDBITS(bit_buf,bits,bit_ptr) \ +do { \ + if (unlikely (bits > 0)) { \ + GETWORD (bit_buf, bits, bit_ptr); \ + bits -= 16; \ + } \ +} while (0) + +/* remove num valid bits from bit_buf */ +#define DUMPBITS(bit_buf,bits,num) \ +do { \ + bit_buf <<= (num); \ + bits += (num); \ +} while (0) + +/* take num bits from the high part of bit_buf and zero extend them */ +#define UBITS(bit_buf,num) (((uint32_t)(bit_buf)) >> (32 - (num))) + +/* take num bits from the high part of bit_buf and sign extend them */ +#define SBITS(bit_buf,num) (((int32_t)(bit_buf)) >> (32 - (num))) + +typedef struct { + uint8_t modes; + uint8_t len; +} MBtab; + +typedef struct { + uint8_t delta; + uint8_t len; +} MVtab; + +typedef struct { + int8_t dmv; + uint8_t len; +} DMVtab; + +typedef struct { + uint8_t cbp; + uint8_t len; +} CBPtab; + +typedef struct { + uint8_t size; + uint8_t len; +} DCtab; + +typedef struct { + uint8_t run; + uint8_t level; + uint8_t len; +} DCTtab; + +typedef struct { + uint8_t mba; + uint8_t len; +} MBAtab; + + +#define INTRA MACROBLOCK_INTRA +#define QUANT MACROBLOCK_QUANT + +static const MBtab MB_I [] = { + {INTRA|QUANT, 2}, {INTRA, 1} +}; + +#define MC MACROBLOCK_MOTION_FORWARD +#define CODED MACROBLOCK_PATTERN + +static const MBtab MB_P [] = { + {INTRA|QUANT, 6}, {CODED|QUANT, 5}, {MC|CODED|QUANT, 5}, {INTRA, 5}, + {MC, 3}, {MC, 3}, {MC, 3}, {MC, 3}, + {CODED, 2}, {CODED, 2}, {CODED, 2}, {CODED, 2}, + {CODED, 2}, {CODED, 2}, {CODED, 2}, {CODED, 2}, + {MC|CODED, 1}, {MC|CODED, 1}, {MC|CODED, 1}, {MC|CODED, 1}, + {MC|CODED, 1}, {MC|CODED, 1}, {MC|CODED, 1}, {MC|CODED, 1}, + {MC|CODED, 1}, {MC|CODED, 1}, {MC|CODED, 1}, {MC|CODED, 1}, + {MC|CODED, 1}, {MC|CODED, 1}, {MC|CODED, 1}, {MC|CODED, 1} +}; + +#define FWD MACROBLOCK_MOTION_FORWARD +#define BWD MACROBLOCK_MOTION_BACKWARD +#define INTER MACROBLOCK_MOTION_FORWARD|MACROBLOCK_MOTION_BACKWARD + +static const MBtab MB_B [] = { + {0, 6}, {INTRA|QUANT, 6}, + {BWD|CODED|QUANT, 6}, {FWD|CODED|QUANT, 6}, + {INTER|CODED|QUANT, 5}, {INTER|CODED|QUANT, 5}, + {INTRA, 5}, {INTRA, 5}, + {FWD, 4}, {FWD, 4}, {FWD, 4}, {FWD, 4}, + {FWD|CODED, 4}, {FWD|CODED, 4}, {FWD|CODED, 4}, {FWD|CODED, 4}, + {BWD, 3}, {BWD, 3}, {BWD, 3}, {BWD, 3}, + {BWD, 3}, {BWD, 3}, {BWD, 3}, {BWD, 3}, + {BWD|CODED, 3}, {BWD|CODED, 3}, {BWD|CODED, 3}, {BWD|CODED, 3}, + {BWD|CODED, 3}, {BWD|CODED, 3}, {BWD|CODED, 3}, {BWD|CODED, 3}, + {INTER, 2}, {INTER, 2}, {INTER, 2}, {INTER, 2}, + {INTER, 2}, {INTER, 2}, {INTER, 2}, {INTER, 2}, + {INTER, 2}, {INTER, 2}, {INTER, 2}, {INTER, 2}, + {INTER, 2}, {INTER, 2}, {INTER, 2}, {INTER, 2}, + {INTER|CODED, 2}, {INTER|CODED, 2}, {INTER|CODED, 2}, {INTER|CODED, 2}, + {INTER|CODED, 2}, {INTER|CODED, 2}, {INTER|CODED, 2}, {INTER|CODED, 2}, + {INTER|CODED, 2}, {INTER|CODED, 2}, {INTER|CODED, 2}, {INTER|CODED, 2}, + {INTER|CODED, 2}, {INTER|CODED, 2}, {INTER|CODED, 2}, {INTER|CODED, 2} +}; + +#undef INTRA +#undef QUANT +#undef MC +#undef CODED +#undef FWD +#undef BWD +#undef INTER + + +static const MVtab MV_4 [] = { + { 3, 6}, { 2, 4}, { 1, 3}, { 1, 3}, { 0, 2}, { 0, 2}, { 0, 2}, { 0, 2} +}; + +static const MVtab MV_10 [] = { + { 0,10}, { 0,10}, { 0,10}, { 0,10}, { 0,10}, { 0,10}, { 0,10}, { 0,10}, + { 0,10}, { 0,10}, { 0,10}, { 0,10}, {15,10}, {14,10}, {13,10}, {12,10}, + {11,10}, {10,10}, { 9, 9}, { 9, 9}, { 8, 9}, { 8, 9}, { 7, 9}, { 7, 9}, + { 6, 7}, { 6, 7}, { 6, 7}, { 6, 7}, { 6, 7}, { 6, 7}, { 6, 7}, { 6, 7}, + { 5, 7}, { 5, 7}, { 5, 7}, { 5, 7}, { 5, 7}, { 5, 7}, { 5, 7}, { 5, 7}, + { 4, 7}, { 4, 7}, { 4, 7}, { 4, 7}, { 4, 7}, { 4, 7}, { 4, 7}, { 4, 7} +}; + + +static const DMVtab DMV_2 [] = { + { 0, 1}, { 0, 1}, { 1, 2}, {-1, 2} +}; + + +static const CBPtab CBP_7 [] = { + {0x11, 7}, {0x12, 7}, {0x14, 7}, {0x18, 7}, + {0x21, 7}, {0x22, 7}, {0x24, 7}, {0x28, 7}, + {0x3f, 6}, {0x3f, 6}, {0x30, 6}, {0x30, 6}, + {0x09, 6}, {0x09, 6}, {0x06, 6}, {0x06, 6}, + {0x1f, 5}, {0x1f, 5}, {0x1f, 5}, {0x1f, 5}, + {0x10, 5}, {0x10, 5}, {0x10, 5}, {0x10, 5}, + {0x2f, 5}, {0x2f, 5}, {0x2f, 5}, {0x2f, 5}, + {0x20, 5}, {0x20, 5}, {0x20, 5}, {0x20, 5}, + {0x07, 5}, {0x07, 5}, {0x07, 5}, {0x07, 5}, + {0x0b, 5}, {0x0b, 5}, {0x0b, 5}, {0x0b, 5}, + {0x0d, 5}, {0x0d, 5}, {0x0d, 5}, {0x0d, 5}, + {0x0e, 5}, {0x0e, 5}, {0x0e, 5}, {0x0e, 5}, + {0x05, 5}, {0x05, 5}, {0x05, 5}, {0x05, 5}, + {0x0a, 5}, {0x0a, 5}, {0x0a, 5}, {0x0a, 5}, + {0x03, 5}, {0x03, 5}, {0x03, 5}, {0x03, 5}, + {0x0c, 5}, {0x0c, 5}, {0x0c, 5}, {0x0c, 5}, + {0x01, 4}, {0x01, 4}, {0x01, 4}, {0x01, 4}, + {0x01, 4}, {0x01, 4}, {0x01, 4}, {0x01, 4}, + {0x02, 4}, {0x02, 4}, {0x02, 4}, {0x02, 4}, + {0x02, 4}, {0x02, 4}, {0x02, 4}, {0x02, 4}, + {0x04, 4}, {0x04, 4}, {0x04, 4}, {0x04, 4}, + {0x04, 4}, {0x04, 4}, {0x04, 4}, {0x04, 4}, + {0x08, 4}, {0x08, 4}, {0x08, 4}, {0x08, 4}, + {0x08, 4}, {0x08, 4}, {0x08, 4}, {0x08, 4}, + {0x0f, 3}, {0x0f, 3}, {0x0f, 3}, {0x0f, 3}, + {0x0f, 3}, {0x0f, 3}, {0x0f, 3}, {0x0f, 3}, + {0x0f, 3}, {0x0f, 3}, {0x0f, 3}, {0x0f, 3}, + {0x0f, 3}, {0x0f, 3}, {0x0f, 3}, {0x0f, 3} +}; + +static const CBPtab CBP_9 [] = { + {0, 9}, {0x00, 9}, {0x39, 9}, {0x36, 9}, + {0x37, 9}, {0x3b, 9}, {0x3d, 9}, {0x3e, 9}, + {0x17, 8}, {0x17, 8}, {0x1b, 8}, {0x1b, 8}, + {0x1d, 8}, {0x1d, 8}, {0x1e, 8}, {0x1e, 8}, + {0x27, 8}, {0x27, 8}, {0x2b, 8}, {0x2b, 8}, + {0x2d, 8}, {0x2d, 8}, {0x2e, 8}, {0x2e, 8}, + {0x19, 8}, {0x19, 8}, {0x16, 8}, {0x16, 8}, + {0x29, 8}, {0x29, 8}, {0x26, 8}, {0x26, 8}, + {0x35, 8}, {0x35, 8}, {0x3a, 8}, {0x3a, 8}, + {0x33, 8}, {0x33, 8}, {0x3c, 8}, {0x3c, 8}, + {0x15, 8}, {0x15, 8}, {0x1a, 8}, {0x1a, 8}, + {0x13, 8}, {0x13, 8}, {0x1c, 8}, {0x1c, 8}, + {0x25, 8}, {0x25, 8}, {0x2a, 8}, {0x2a, 8}, + {0x23, 8}, {0x23, 8}, {0x2c, 8}, {0x2c, 8}, + {0x31, 8}, {0x31, 8}, {0x32, 8}, {0x32, 8}, + {0x34, 8}, {0x34, 8}, {0x38, 8}, {0x38, 8} +}; + + +static const DCtab DC_lum_5 [] = { + {1, 2}, {1, 2}, {1, 2}, {1, 2}, {1, 2}, {1, 2}, {1, 2}, {1, 2}, + {2, 2}, {2, 2}, {2, 2}, {2, 2}, {2, 2}, {2, 2}, {2, 2}, {2, 2}, + {0, 3}, {0, 3}, {0, 3}, {0, 3}, {3, 3}, {3, 3}, {3, 3}, {3, 3}, + {4, 3}, {4, 3}, {4, 3}, {4, 3}, {5, 4}, {5, 4}, {6, 5} +}; + +static const DCtab DC_chrom_5 [] = { + {0, 2}, {0, 2}, {0, 2}, {0, 2}, {0, 2}, {0, 2}, {0, 2}, {0, 2}, + {1, 2}, {1, 2}, {1, 2}, {1, 2}, {1, 2}, {1, 2}, {1, 2}, {1, 2}, + {2, 2}, {2, 2}, {2, 2}, {2, 2}, {2, 2}, {2, 2}, {2, 2}, {2, 2}, + {3, 3}, {3, 3}, {3, 3}, {3, 3}, {4, 4}, {4, 4}, {5, 5} +}; + +static const DCtab DC_long [] = { + {6, 5}, {6, 5}, {6, 5}, {6, 5}, {6, 5}, {6, 5}, { 6, 5}, { 6, 5}, + {6, 5}, {6, 5}, {6, 5}, {6, 5}, {6, 5}, {6, 5}, { 6, 5}, { 6, 5}, + {7, 6}, {7, 6}, {7, 6}, {7, 6}, {7, 6}, {7, 6}, { 7, 6}, { 7, 6}, + {8, 7}, {8, 7}, {8, 7}, {8, 7}, {9, 8}, {9, 8}, {10, 9}, {11, 9} +}; + + +static const DCTtab DCT_16 [] = { + {129, 0, 0}, {129, 0, 0}, {129, 0, 0}, {129, 0, 0}, + {129, 0, 0}, {129, 0, 0}, {129, 0, 0}, {129, 0, 0}, + {129, 0, 0}, {129, 0, 0}, {129, 0, 0}, {129, 0, 0}, + {129, 0, 0}, {129, 0, 0}, {129, 0, 0}, {129, 0, 0}, + { 2,18, 0}, { 2,17, 0}, { 2,16, 0}, { 2,15, 0}, + { 7, 3, 0}, { 17, 2, 0}, { 16, 2, 0}, { 15, 2, 0}, + { 14, 2, 0}, { 13, 2, 0}, { 12, 2, 0}, { 32, 1, 0}, + { 31, 1, 0}, { 30, 1, 0}, { 29, 1, 0}, { 28, 1, 0} +}; + +static const DCTtab DCT_15 [] = { + { 1,40,15}, { 1,39,15}, { 1,38,15}, { 1,37,15}, + { 1,36,15}, { 1,35,15}, { 1,34,15}, { 1,33,15}, + { 1,32,15}, { 2,14,15}, { 2,13,15}, { 2,12,15}, + { 2,11,15}, { 2,10,15}, { 2, 9,15}, { 2, 8,15}, + { 1,31,14}, { 1,31,14}, { 1,30,14}, { 1,30,14}, + { 1,29,14}, { 1,29,14}, { 1,28,14}, { 1,28,14}, + { 1,27,14}, { 1,27,14}, { 1,26,14}, { 1,26,14}, + { 1,25,14}, { 1,25,14}, { 1,24,14}, { 1,24,14}, + { 1,23,14}, { 1,23,14}, { 1,22,14}, { 1,22,14}, + { 1,21,14}, { 1,21,14}, { 1,20,14}, { 1,20,14}, + { 1,19,14}, { 1,19,14}, { 1,18,14}, { 1,18,14}, + { 1,17,14}, { 1,17,14}, { 1,16,14}, { 1,16,14} +}; + +static const DCTtab DCT_13 [] = { + { 11, 2,13}, { 10, 2,13}, { 6, 3,13}, { 4, 4,13}, + { 3, 5,13}, { 2, 7,13}, { 2, 6,13}, { 1,15,13}, + { 1,14,13}, { 1,13,13}, { 1,12,13}, { 27, 1,13}, + { 26, 1,13}, { 25, 1,13}, { 24, 1,13}, { 23, 1,13}, + { 1,11,12}, { 1,11,12}, { 9, 2,12}, { 9, 2,12}, + { 5, 3,12}, { 5, 3,12}, { 1,10,12}, { 1,10,12}, + { 3, 4,12}, { 3, 4,12}, { 8, 2,12}, { 8, 2,12}, + { 22, 1,12}, { 22, 1,12}, { 21, 1,12}, { 21, 1,12}, + { 1, 9,12}, { 1, 9,12}, { 20, 1,12}, { 20, 1,12}, + { 19, 1,12}, { 19, 1,12}, { 2, 5,12}, { 2, 5,12}, + { 4, 3,12}, { 4, 3,12}, { 1, 8,12}, { 1, 8,12}, + { 7, 2,12}, { 7, 2,12}, { 18, 1,12}, { 18, 1,12} +}; + +static const DCTtab DCT_B14_10 [] = { + { 17, 1,10}, { 6, 2,10}, { 1, 7,10}, { 3, 3,10}, + { 2, 4,10}, { 16, 1,10}, { 15, 1,10}, { 5, 2,10} +}; + +static const DCTtab DCT_B14_8 [] = { + { 65, 0,12}, { 65, 0,12}, { 65, 0,12}, { 65, 0,12}, + { 3, 2, 7}, { 3, 2, 7}, { 10, 1, 7}, { 10, 1, 7}, + { 1, 4, 7}, { 1, 4, 7}, { 9, 1, 7}, { 9, 1, 7}, + { 8, 1, 6}, { 8, 1, 6}, { 8, 1, 6}, { 8, 1, 6}, + { 7, 1, 6}, { 7, 1, 6}, { 7, 1, 6}, { 7, 1, 6}, + { 2, 2, 6}, { 2, 2, 6}, { 2, 2, 6}, { 2, 2, 6}, + { 6, 1, 6}, { 6, 1, 6}, { 6, 1, 6}, { 6, 1, 6}, + { 14, 1, 8}, { 1, 6, 8}, { 13, 1, 8}, { 12, 1, 8}, + { 4, 2, 8}, { 2, 3, 8}, { 1, 5, 8}, { 11, 1, 8} +}; + +static const DCTtab DCT_B14AC_5 [] = { + { 1, 3, 5}, { 5, 1, 5}, { 4, 1, 5}, + { 1, 2, 4}, { 1, 2, 4}, { 3, 1, 4}, { 3, 1, 4}, + { 2, 1, 3}, { 2, 1, 3}, { 2, 1, 3}, { 2, 1, 3}, + {129, 0, 2}, {129, 0, 2}, {129, 0, 2}, {129, 0, 2}, + {129, 0, 2}, {129, 0, 2}, {129, 0, 2}, {129, 0, 2}, + { 1, 1, 2}, { 1, 1, 2}, { 1, 1, 2}, { 1, 1, 2}, + { 1, 1, 2}, { 1, 1, 2}, { 1, 1, 2}, { 1, 1, 2} +}; + +static const DCTtab DCT_B14DC_5 [] = { + { 1, 3, 5}, { 5, 1, 5}, { 4, 1, 5}, + { 1, 2, 4}, { 1, 2, 4}, { 3, 1, 4}, { 3, 1, 4}, + { 2, 1, 3}, { 2, 1, 3}, { 2, 1, 3}, { 2, 1, 3}, + { 1, 1, 1}, { 1, 1, 1}, { 1, 1, 1}, { 1, 1, 1}, + { 1, 1, 1}, { 1, 1, 1}, { 1, 1, 1}, { 1, 1, 1}, + { 1, 1, 1}, { 1, 1, 1}, { 1, 1, 1}, { 1, 1, 1}, + { 1, 1, 1}, { 1, 1, 1}, { 1, 1, 1}, { 1, 1, 1} +}; + +static const DCTtab DCT_B15_10 [] = { + { 6, 2, 9}, { 6, 2, 9}, { 15, 1, 9}, { 15, 1, 9}, + { 3, 4,10}, { 17, 1,10}, { 16, 1, 9}, { 16, 1, 9} +}; + +static const DCTtab DCT_B15_8 [] = { + { 65, 0,12}, { 65, 0,12}, { 65, 0,12}, { 65, 0,12}, + { 8, 1, 7}, { 8, 1, 7}, { 9, 1, 7}, { 9, 1, 7}, + { 7, 1, 7}, { 7, 1, 7}, { 3, 2, 7}, { 3, 2, 7}, + { 1, 7, 6}, { 1, 7, 6}, { 1, 7, 6}, { 1, 7, 6}, + { 1, 6, 6}, { 1, 6, 6}, { 1, 6, 6}, { 1, 6, 6}, + { 5, 1, 6}, { 5, 1, 6}, { 5, 1, 6}, { 5, 1, 6}, + { 6, 1, 6}, { 6, 1, 6}, { 6, 1, 6}, { 6, 1, 6}, + { 2, 5, 8}, { 12, 1, 8}, { 1,11, 8}, { 1,10, 8}, + { 14, 1, 8}, { 13, 1, 8}, { 4, 2, 8}, { 2, 4, 8}, + { 3, 1, 5}, { 3, 1, 5}, { 3, 1, 5}, { 3, 1, 5}, + { 3, 1, 5}, { 3, 1, 5}, { 3, 1, 5}, { 3, 1, 5}, + { 2, 2, 5}, { 2, 2, 5}, { 2, 2, 5}, { 2, 2, 5}, + { 2, 2, 5}, { 2, 2, 5}, { 2, 2, 5}, { 2, 2, 5}, + { 4, 1, 5}, { 4, 1, 5}, { 4, 1, 5}, { 4, 1, 5}, + { 4, 1, 5}, { 4, 1, 5}, { 4, 1, 5}, { 4, 1, 5}, + { 2, 1, 3}, { 2, 1, 3}, { 2, 1, 3}, { 2, 1, 3}, + { 2, 1, 3}, { 2, 1, 3}, { 2, 1, 3}, { 2, 1, 3}, + { 2, 1, 3}, { 2, 1, 3}, { 2, 1, 3}, { 2, 1, 3}, + { 2, 1, 3}, { 2, 1, 3}, { 2, 1, 3}, { 2, 1, 3}, + { 2, 1, 3}, { 2, 1, 3}, { 2, 1, 3}, { 2, 1, 3}, + { 2, 1, 3}, { 2, 1, 3}, { 2, 1, 3}, { 2, 1, 3}, + { 2, 1, 3}, { 2, 1, 3}, { 2, 1, 3}, { 2, 1, 3}, + { 2, 1, 3}, { 2, 1, 3}, { 2, 1, 3}, { 2, 1, 3}, + {129, 0, 4}, {129, 0, 4}, {129, 0, 4}, {129, 0, 4}, + {129, 0, 4}, {129, 0, 4}, {129, 0, 4}, {129, 0, 4}, + {129, 0, 4}, {129, 0, 4}, {129, 0, 4}, {129, 0, 4}, + {129, 0, 4}, {129, 0, 4}, {129, 0, 4}, {129, 0, 4}, + { 1, 3, 4}, { 1, 3, 4}, { 1, 3, 4}, { 1, 3, 4}, + { 1, 3, 4}, { 1, 3, 4}, { 1, 3, 4}, { 1, 3, 4}, + { 1, 3, 4}, { 1, 3, 4}, { 1, 3, 4}, { 1, 3, 4}, + { 1, 3, 4}, { 1, 3, 4}, { 1, 3, 4}, { 1, 3, 4}, + { 1, 1, 2}, { 1, 1, 2}, { 1, 1, 2}, { 1, 1, 2}, + { 1, 1, 2}, { 1, 1, 2}, { 1, 1, 2}, { 1, 1, 2}, + { 1, 1, 2}, { 1, 1, 2}, { 1, 1, 2}, { 1, 1, 2}, + { 1, 1, 2}, { 1, 1, 2}, { 1, 1, 2}, { 1, 1, 2}, + { 1, 1, 2}, { 1, 1, 2}, { 1, 1, 2}, { 1, 1, 2}, + { 1, 1, 2}, { 1, 1, 2}, { 1, 1, 2}, { 1, 1, 2}, + { 1, 1, 2}, { 1, 1, 2}, { 1, 1, 2}, { 1, 1, 2}, + { 1, 1, 2}, { 1, 1, 2}, { 1, 1, 2}, { 1, 1, 2}, + { 1, 1, 2}, { 1, 1, 2}, { 1, 1, 2}, { 1, 1, 2}, + { 1, 1, 2}, { 1, 1, 2}, { 1, 1, 2}, { 1, 1, 2}, + { 1, 1, 2}, { 1, 1, 2}, { 1, 1, 2}, { 1, 1, 2}, + { 1, 1, 2}, { 1, 1, 2}, { 1, 1, 2}, { 1, 1, 2}, + { 1, 1, 2}, { 1, 1, 2}, { 1, 1, 2}, { 1, 1, 2}, + { 1, 1, 2}, { 1, 1, 2}, { 1, 1, 2}, { 1, 1, 2}, + { 1, 1, 2}, { 1, 1, 2}, { 1, 1, 2}, { 1, 1, 2}, + { 1, 1, 2}, { 1, 1, 2}, { 1, 1, 2}, { 1, 1, 2}, + { 1, 2, 3}, { 1, 2, 3}, { 1, 2, 3}, { 1, 2, 3}, + { 1, 2, 3}, { 1, 2, 3}, { 1, 2, 3}, { 1, 2, 3}, + { 1, 2, 3}, { 1, 2, 3}, { 1, 2, 3}, { 1, 2, 3}, + { 1, 2, 3}, { 1, 2, 3}, { 1, 2, 3}, { 1, 2, 3}, + { 1, 2, 3}, { 1, 2, 3}, { 1, 2, 3}, { 1, 2, 3}, + { 1, 2, 3}, { 1, 2, 3}, { 1, 2, 3}, { 1, 2, 3}, + { 1, 2, 3}, { 1, 2, 3}, { 1, 2, 3}, { 1, 2, 3}, + { 1, 2, 3}, { 1, 2, 3}, { 1, 2, 3}, { 1, 2, 3}, + { 1, 4, 5}, { 1, 4, 5}, { 1, 4, 5}, { 1, 4, 5}, + { 1, 4, 5}, { 1, 4, 5}, { 1, 4, 5}, { 1, 4, 5}, + { 1, 5, 5}, { 1, 5, 5}, { 1, 5, 5}, { 1, 5, 5}, + { 1, 5, 5}, { 1, 5, 5}, { 1, 5, 5}, { 1, 5, 5}, + { 10, 1, 7}, { 10, 1, 7}, { 2, 3, 7}, { 2, 3, 7}, + { 11, 1, 7}, { 11, 1, 7}, { 1, 8, 7}, { 1, 8, 7}, + { 1, 9, 7}, { 1, 9, 7}, { 1,12, 8}, { 1,13, 8}, + { 3, 3, 8}, { 5, 2, 8}, { 1,14, 8}, { 1,15, 8} +}; + + +static const MBAtab MBA_5 [] = { + {6, 5}, {5, 5}, {4, 4}, {4, 4}, {3, 4}, {3, 4}, + {2, 3}, {2, 3}, {2, 3}, {2, 3}, {1, 3}, {1, 3}, {1, 3}, {1, 3}, + {0, 1}, {0, 1}, {0, 1}, {0, 1}, {0, 1}, {0, 1}, {0, 1}, {0, 1}, + {0, 1}, {0, 1}, {0, 1}, {0, 1}, {0, 1}, {0, 1}, {0, 1}, {0, 1} +}; + +static const MBAtab MBA_11 [] = { + {32, 11}, {31, 11}, {30, 11}, {29, 11}, + {28, 11}, {27, 11}, {26, 11}, {25, 11}, + {24, 11}, {23, 11}, {22, 11}, {21, 11}, + {20, 10}, {20, 10}, {19, 10}, {19, 10}, + {18, 10}, {18, 10}, {17, 10}, {17, 10}, + {16, 10}, {16, 10}, {15, 10}, {15, 10}, + {14, 8}, {14, 8}, {14, 8}, {14, 8}, + {14, 8}, {14, 8}, {14, 8}, {14, 8}, + {13, 8}, {13, 8}, {13, 8}, {13, 8}, + {13, 8}, {13, 8}, {13, 8}, {13, 8}, + {12, 8}, {12, 8}, {12, 8}, {12, 8}, + {12, 8}, {12, 8}, {12, 8}, {12, 8}, + {11, 8}, {11, 8}, {11, 8}, {11, 8}, + {11, 8}, {11, 8}, {11, 8}, {11, 8}, + {10, 8}, {10, 8}, {10, 8}, {10, 8}, + {10, 8}, {10, 8}, {10, 8}, {10, 8}, + { 9, 8}, { 9, 8}, { 9, 8}, { 9, 8}, + { 9, 8}, { 9, 8}, { 9, 8}, { 9, 8}, + { 8, 7}, { 8, 7}, { 8, 7}, { 8, 7}, + { 8, 7}, { 8, 7}, { 8, 7}, { 8, 7}, + { 8, 7}, { 8, 7}, { 8, 7}, { 8, 7}, + { 8, 7}, { 8, 7}, { 8, 7}, { 8, 7}, + { 7, 7}, { 7, 7}, { 7, 7}, { 7, 7}, + { 7, 7}, { 7, 7}, { 7, 7}, { 7, 7}, + { 7, 7}, { 7, 7}, { 7, 7}, { 7, 7}, + { 7, 7}, { 7, 7}, { 7, 7}, { 7, 7} +}; diff --git a/src/video_dec/libmpeg2new/xine_mpeg2new_decoder.c b/src/video_dec/libmpeg2new/xine_mpeg2new_decoder.c new file mode 100644 index 000000000..2678168e2 --- /dev/null +++ b/src/video_dec/libmpeg2new/xine_mpeg2new_decoder.c @@ -0,0 +1,512 @@ +/* + * Copyright (C) 2000-2004 the xine project + * + * This file is part of xine, a free video player. + * + * xine is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * xine is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA + * + * stuff needed to turn libmpeg2 into a xine decoder plugin + */ + +#ifdef HAVE_CONFIG_H +#include "config.h" +#endif + + +#include <stdlib.h> +#include <string.h> +#include <sys/types.h> +#include <sys/stat.h> +#include <fcntl.h> +#include <unistd.h> +#include <inttypes.h> +#include <assert.h> + +#include "./include/mpeg2.h" +#include <xine/xine_internal.h> +#include <xine/video_out.h> +#include <xine/buffer.h> + +/* +#define LOG +#define LOG_FRAME_ALLOC_FREE +#define LOG_ENTRY +#define LOG_FRAME_COUNTER +*/ + +#define _x_abort() do {} while (0) + +typedef struct { + video_decoder_class_t decoder_class; +} mpeg2_class_t; + +typedef struct { + uint32_t id; + vo_frame_t * img; +} img_state_t; + +typedef struct mpeg2_video_decoder_s { + video_decoder_t video_decoder; + mpeg2dec_t *mpeg2dec; + mpeg2_class_t *class; + xine_stream_t *stream; + int32_t force_aspect; + int force_pan_scan; + double ratio; + img_state_t img_state[30]; + uint32_t frame_number; + uint32_t rff_pattern; + +} mpeg2_video_decoder_t; + +#ifndef LOG_FRAME_ALLOC_FREE +inline static void mpeg2_video_print_bad_state(img_state_t * img_state) {} +#else +static void mpeg2_video_print_bad_state(img_state_t * img_state) { + int32_t n,m; + m=0; + for(n=0;n<30;n++) { + if (img_state[n].id>0) { + printf("%d = %u\n",n, img_state[n].id); + m++; + } + } + if (m > 3) _x_abort(); + if (m == 0) printf("NO FRAMES\n"); +} +#endif + +static void mpeg2_video_free_all(img_state_t * img_state) { + int32_t n,m; + vo_frame_t * img; + printf("libmpeg2new:free_all\n"); + for(n=0;n<30;n++) { + if (img_state[n].id>0) { + img = img_state[n].img; + img->free(img); + img_state[n].id = 0; + } + } +} + + +static void mpeg2_video_print_fbuf(const mpeg2_fbuf_t * fbuf) { + printf("%p",fbuf); + vo_frame_t * img; + if (fbuf) { + img = (vo_frame_t *) fbuf->id; + if (img) { + printf (", img=%p, (id=%d)\n", + img, img->id); + } else { + printf (", img=NULL\n"); + } + } else { + printf ("\n"); + } +} + +static void mpeg2_video_decode_data (video_decoder_t *this_gen, buf_element_t *buf_element) { + mpeg2_video_decoder_t *this = (mpeg2_video_decoder_t *) this_gen; + uint8_t * current = buf_element->content; + uint8_t * end = buf_element->content + buf_element->size; + const mpeg2_info_t * info; + mpeg2_state_t state; + vo_frame_t * img; + uint32_t picture_structure; + int32_t frame_skipping; + + /* handle aspect hints from xine-dvdnav */ + if (buf_element->decoder_flags & BUF_FLAG_SPECIAL) { + if (buf_element->decoder_info[1] == BUF_SPECIAL_ASPECT) { + this->force_aspect = buf_element->decoder_info[2]; + if (buf_element->decoder_info[3] == 0x1 && buf_element->decoder_info[2] == 3) + /* letterboxing is denied, we have to do pan&scan */ + this->force_pan_scan = 1; + else + this->force_pan_scan = 0; + } + + return; + } + + if (buf_element->decoder_flags != 0) return; + +#ifdef LOG_ENTRY + printf ("libmpeg2: decode_data: enter\n"); +#endif + + mpeg2_buffer (this->mpeg2dec, current, end); + + info = mpeg2_info (this->mpeg2dec); + + while ((state = mpeg2_parse (this->mpeg2dec)) != STATE_BUFFER) { + switch (state) { + case STATE_SEQUENCE: + /* might set nb fbuf, convert format, stride */ + /* might set fbufs */ + _x_stream_info_set(this->stream, XINE_STREAM_INFO_VIDEO_BITRATE, info->sequence->byte_rate * 8); + _x_stream_info_set(this->stream, XINE_STREAM_INFO_VIDEO_WIDTH, info->sequence->picture_width); + _x_stream_info_set(this->stream, XINE_STREAM_INFO_VIDEO_HEIGHT, info->sequence->picture_height); + _x_stream_info_set(this->stream, XINE_STREAM_INFO_FRAME_DURATION, info->sequence->frame_period / 300); + if (this->force_aspect) ((mpeg2_sequence_t *)info->sequence)->pixel_width = this->force_aspect; /* ugly... */ + switch (info->sequence->pixel_width) { + case 3: + this->ratio = 16.0 / 9.0; + break; + case 4: + this->ratio = 2.11; + break; + case 2: + this->ratio = 4.0 / 3.0; + break; + case 1: + default: + this->ratio = (double)info->sequence->picture_width/(double)info->sequence->picture_height; + break; + } + _x_stream_info_set(this->stream, XINE_STREAM_INFO_VIDEO_RATIO, (int)(10000*this->ratio)); + + if (info->sequence->flags & SEQ_FLAG_MPEG2) { + _x_meta_info_set_utf8(this->stream, XINE_META_INFO_VIDEOCODEC, "MPEG 2 (libmpeg2new)"); + } else { + _x_meta_info_set_utf8(this->stream, XINE_META_INFO_VIDEOCODEC, "MPEG 1 (libmpeg2new)"); + } + + break; + case STATE_PICTURE: + /* might skip */ + /* might set fbuf */ + if (info->current_picture->nb_fields == 1) { + picture_structure = info->current_picture->flags & PIC_FLAG_TOP_FIELD_FIRST ? VO_TOP_FIELD : VO_BOTTOM_FIELD; + } else { + picture_structure = VO_BOTH_FIELDS; + } + + img = this->stream->video_out->get_frame (this->stream->video_out, + info->sequence->picture_width, + info->sequence->picture_height, + this->ratio, + XINE_IMGFMT_YV12, + picture_structure); + this->frame_number++; +#ifdef LOG_FRAME_COUNTER + printf("libmpeg2:frame_number=%d\n",this->frame_number); +#endif + img->top_field_first = info->current_picture->flags & PIC_FLAG_TOP_FIELD_FIRST ? 1 : 0; + img->repeat_first_field = (info->current_picture->nb_fields > 2) ? 1 : 0; + img->duration=info->sequence->frame_period / 300; + if( ((this->rff_pattern & 0xff) == 0xaa || + (this->rff_pattern & 0xff) == 0x55) ) { + /* special case for ntsc 3:2 pulldown */ + img->duration += img->duration/4; + } else { + if( img->repeat_first_field ) { + img->duration = (img->duration * info->current_picture->nb_fields) / 2; + } + } + + if ((info->current_picture->flags & 7) == 1) { + img->pts=buf_element->pts; /* If an I frame, use PTS */ + } else { + img->pts=0; + } + + +#ifdef LOG_FRAME_ALLOC_FREE + printf ("libmpeg2:decode_data:get_frame xine=%p (id=%d)\n", img,img->id); +#endif + if (this->img_state[img->id].id != 0) { + printf ("libmpeg2:decode_data:get_frame id=%d BAD STATE:%d\n", img->id, this->img_state[img->id].id); + _x_abort(); + } + + this->img_state[img->id].id = 1; + this->img_state[img->id].img = img; + + mpeg2_set_buf (this->mpeg2dec, img->base, img); + break; + case STATE_SLICE: + case STATE_END: +#if 0 + printf("libmpeg2:decode_data:current_fbuf="); + mpeg2_video_print_fbuf(info->current_fbuf); + printf("libmpeg2:decode_data:display_fbuf="); + mpeg2_video_print_fbuf(info->display_fbuf); + printf("libmpeg2:decode_data:discard_fbuf="); + mpeg2_video_print_fbuf(info->discard_fbuf); +#endif + /* draw current picture */ + /* might free frame buffer */ + if (info->display_fbuf && info->display_fbuf->id) { + img = (vo_frame_t *) info->display_fbuf->id; + /* this should be used to detect any special rff pattern */ + this->rff_pattern = this->rff_pattern << 1; + this->rff_pattern |= img->repeat_first_field; + +#ifdef LOG_FRAME_ALLOC_FREE + printf ("libmpeg2:decode_data:draw_frame xine=%p, fbuf=%p, id=%d \n", img, info->display_fbuf, img->id); +#endif + if (this->img_state[img->id].id != 1) { + printf ("libmpeg2:decode_data:draw_frame id=%d BAD STATE:%d\n", img->id, this->img_state[img->id].id); + _x_abort(); + } + if (this->img_state[img->id].id == 1) { + frame_skipping = img->draw (img, this->stream); + /* FIXME: Handle skipping */ + this->img_state[img->id].id = 2; + } + + } + if (info->discard_fbuf && !info->discard_fbuf->id) { + printf ("libmpeg2:decode_data:BAD free_frame discard: xine=%p, fbuf=%p\n", info->discard_fbuf->id, info->discard_fbuf); + //_x_abort(); + } + if (info->discard_fbuf && info->discard_fbuf->id) { + img = (vo_frame_t *) info->discard_fbuf->id; +#ifdef LOG_FRAME_ALLOC_FREE + printf ("libmpeg2:decode_data:free_frame xine=%p, fbuf=%p,id=%d\n", img, info->discard_fbuf, img->id); +#endif + if (this->img_state[img->id].id != 2) { + printf ("libmpeg2:decode_data:free_frame id=%d BAD STATE:%d\n", img->id, this->img_state[img->id].id); + _x_abort(); + } + if (this->img_state[img->id].id == 2) { + img->free(img); + this->img_state[img->id].id = 0; + } + } +#ifdef LOG_FRAME_ALLOC_FREE + mpeg2_video_print_bad_state(this->img_state); +#endif + break; + case STATE_GOP: + break; + default: + printf("libmpeg2new: STATE unknown %d\n",state); + break; + } + + } +#ifdef LOG_ENTRY + printf ("libmpeg2: decode_data: exit\n"); +#endif + +} + +static void mpeg2_video_flush (video_decoder_t *this_gen) { + mpeg2_video_decoder_t *this = (mpeg2_video_decoder_t *) this_gen; + +#ifdef LOG_ENTRY + printf ("libmpeg2: flush\n"); +#endif + +/* mpeg2_flush (&this->mpeg2); */ +} + +static void mpeg2_video_reset (video_decoder_t *this_gen) { + mpeg2_video_decoder_t *this = (mpeg2_video_decoder_t *) this_gen; + int32_t state; + const mpeg2_info_t * info; + vo_frame_t * img; + int32_t frame_skipping; + +#ifdef LOG_ENTRY + printf ("libmpeg2: reset\n"); +#endif + mpeg2_reset (this->mpeg2dec, 1); /* 1 for full reset */ + mpeg2_video_free_all(this->img_state); + + +#if 0 /* This bit of code does not work yet. */ + info = mpeg2_info (this->mpeg2dec); + state = mpeg2_reset (this->mpeg2dec); + printf("reset state1:%d\n",state); + if (info->display_fbuf && info->display_fbuf->id) { + img = (vo_frame_t *) info->display_fbuf->id; + + if (this->img_state[img->id] != 1) { + printf ("libmpeg2:decode_data:draw_frame id=%d BAD STATE:%d\n", img->id, this->img_state[img->id]); + _x_abort(); + } + if (this->img_state[img->id] == 1) { + frame_skipping = img->draw (img, this->stream); + /* FIXME: Handle skipping */ + this->img_state[img->id] = 2; + } + } + + if (info->discard_fbuf && !info->discard_fbuf->id) { + printf ("libmpeg2:decode_data:BAD free_frame discard_fbuf=%p\n", info->discard_fbuf); + _x_abort(); + } + if (info->discard_fbuf && info->discard_fbuf->id) { + img = (vo_frame_t *) info->discard_fbuf->id; + if (this->img_state[img->id] != 2) { + printf ("libmpeg2:decode_data:free_frame id=%d BAD STATE:%d\n", img->id, this->img_state[img->id]); + _x_abort(); + } + if (this->img_state[img->id] == 2) { + img->free(img); + this->img_state[img->id] = 0; + } + } + state = mpeg2_parse (this->mpeg2dec); + printf("reset state2:%d\n",state); + if (info->display_fbuf && info->display_fbuf->id) { + img = (vo_frame_t *) info->display_fbuf->id; + + if (this->img_state[img->id] != 1) { + printf ("libmpeg2:decode_data:draw_frame id=%d BAD STATE:%d\n", img->id, this->img_state[img->id]); + _x_abort(); + } + if (this->img_state[img->id] == 1) { + frame_skipping = img->draw (img, this->stream); + /* FIXME: Handle skipping */ + this->img_state[img->id] = 2; + } + } + + if (info->discard_fbuf && !info->discard_fbuf->id) { + printf ("libmpeg2:decode_data:BAD free_frame discard_fbuf=%p\n", info->discard_fbuf); + _x_abort(); + } + if (info->discard_fbuf && info->discard_fbuf->id) { + img = (vo_frame_t *) info->discard_fbuf->id; + if (this->img_state[img->id] != 2) { + printf ("libmpeg2:decode_data:free_frame id=%d BAD STATE:%d\n", img->id, this->img_state[img->id]); + _x_abort(); + } + if (this->img_state[img->id] == 2) { + img->free(img); + this->img_state[img->id] = 0; + } + } + state = mpeg2_parse (this->mpeg2dec); + printf("reset state3:%d\n",state); + if (info->display_fbuf && info->display_fbuf->id) { + img = (vo_frame_t *) info->display_fbuf->id; + + if (this->img_state[img->id] != 1) { + printf ("libmpeg2:decode_data:draw_frame id=%d BAD STATE:%d\n", img->id, this->img_state[img->id]); + _x_abort(); + } + if (this->img_state[img->id] == 1) { + frame_skipping = img->draw (img, this->stream); + /* FIXME: Handle skipping */ + this->img_state[img->id] = 2; + } + } + + if (info->discard_fbuf && !info->discard_fbuf->id) { + printf ("libmpeg2:decode_data:BAD free_frame discard_fbuf=%p\n", info->discard_fbuf); + _x_abort(); + } + if (info->discard_fbuf && info->discard_fbuf->id) { + img = (vo_frame_t *) info->discard_fbuf->id; + if (this->img_state[img->id] != 2) { + printf ("libmpeg2:decode_data:free_frame id=%d BAD STATE:%d\n", img->id, this->img_state[img->id]); + _x_abort(); + } + if (this->img_state[img->id] == 2) { + img->free(img); + this->img_state[img->id] = 0; + } + } +#endif + +} + +static void mpeg2_video_discontinuity (video_decoder_t *this_gen) { + mpeg2_video_decoder_t *this = (mpeg2_video_decoder_t *) this_gen; + +#ifdef LOG_ENTRY + printf ("libmpeg2: dicontinuity\n"); +#endif +/* mpeg2_discontinuity (&this->mpeg2dec); */ +} + +static void mpeg2_video_dispose (video_decoder_t *this_gen) { + + mpeg2_video_decoder_t *this = (mpeg2_video_decoder_t *) this_gen; + +#ifdef LOG_ENTRY + printf ("libmpeg2: close\n"); +#endif + + mpeg2_close (this->mpeg2dec); + + this->stream->video_out->close(this->stream->video_out, this->stream); + + free (this); +} + +static video_decoder_t *open_plugin (video_decoder_class_t *class_gen, xine_stream_t *stream) { + mpeg2_video_decoder_t *this ; + int32_t n; + + this = (mpeg2_video_decoder_t *) calloc(1, sizeof(mpeg2_video_decoder_t)); + + this->video_decoder.decode_data = mpeg2_video_decode_data; + this->video_decoder.flush = mpeg2_video_flush; + this->video_decoder.reset = mpeg2_video_reset; + this->video_decoder.discontinuity = mpeg2_video_discontinuity; + this->video_decoder.dispose = mpeg2_video_dispose; + this->stream = stream; + this->class = (mpeg2_class_t *) class_gen; + this->frame_number=0; + this->rff_pattern=0; + + this->mpeg2dec = mpeg2_init (); + mpeg2_custom_fbuf (this->mpeg2dec, 1); /* <- Force libmpeg2 to use xine frame buffers. */ + (stream->video_out->open) (stream->video_out, stream); + this->force_aspect = this->force_pan_scan = 0; + for(n=0;n<30;n++) this->img_state[n].id=0; + + return &this->video_decoder; +} + +/* + * mpeg2 plugin class + */ +static void *init_plugin (xine_t *xine, void *data) { + + mpeg2_class_t *this; + + this = (mpeg2_class_t *) calloc(1, sizeof(mpeg2_class_t)); + + this->decoder_class.open_plugin = open_plugin; + this->decoder_class.identifier = "mpeg2new"; + this->decoder_class.description = N_("mpeg2 based video decoder plugin"); + this->decoder_class.dispose = default_video_decoder_class_dispose; + + return this; +} +/* + * exported plugin catalog entry + */ + +static const uint32_t supported_types[] = { BUF_VIDEO_MPEG, 0 }; + +static const decoder_info_t dec_info_mpeg2 = { + supported_types, /* supported types */ + 6 /* priority */ +}; + +const plugin_info_t xine_plugin_info[] EXPORTED = { + /* type, API, "name", version, special_info, init_function */ + { PLUGIN_VIDEO_DECODER, 19, "mpeg2new", XINE_VERSION_CODE, &dec_info_mpeg2, init_plugin }, + { PLUGIN_NONE, 0, "", 0, NULL, NULL } +}; diff --git a/src/video_dec/libvdpau/Makefile.am b/src/video_dec/libvdpau/Makefile.am new file mode 100644 index 000000000..781001a04 --- /dev/null +++ b/src/video_dec/libvdpau/Makefile.am @@ -0,0 +1,42 @@ +include $(top_srcdir)/misc/Makefile.quiet +include $(top_srcdir)/misc/Makefile.common + +AM_CFLAGS = $(DEFAULT_OCFLAGS) $(VISIBILITY_FLAG) +AM_LDFLAGS = $(xineplug_ldflags) + +noinst_HEADERS = alterh264_decode.h alterh264_bits_reader.h bits_reader.h dpb.h cpb.h h264_parser.h nal.h + +if ENABLE_VDPAU +vdpau_h264_module = xineplug_decode_vdpau_h264.la +VDPAU_CFLAGS += -D_ISOC99_SOURCE + +vdpau_h264_alter_module = xineplug_decode_vdpau_h264_alter.la + +vdpau_mpeg12_module = xineplug_decode_vdpau_mpeg12.la + +vdpau_vc1_module = xineplug_decode_vdpau_vc1.la + +vdpau_mpeg4_module = xineplug_decode_vdpau_mpeg4.la +endif + +xineplug_LTLIBRARIES = $(vdpau_h264_module) $(vdpau_h264_alter_module) $(vdpau_mpeg12_module) $(vdpau_vc1_module) $(vdpau_mpeg4_module) + +xineplug_decode_vdpau_h264_alter_la_SOURCES = alterh264_decode.c +xineplug_decode_vdpau_h264_alter_la_CFLAGS = $(AM_CFLAGS) -fno-strict-aliasing +xineplug_decode_vdpau_h264_alter_la_LIBADD = $(XINE_LIB) $(DYNAMIC_LD_LIBS) + +xineplug_decode_vdpau_h264_la_SOURCES = nal.c dpb.c cpb.c h264_parser.c vdpau_h264.c +xineplug_decode_vdpau_h264_la_CFLAGS = $(AM_CFLAGS) $(VDPAU_CFLAGS) -fno-strict-aliasing +xineplug_decode_vdpau_h264_la_LIBADD = $(XINE_LIB) $(DYNAMIC_LD_LIBS) -lm + +xineplug_decode_vdpau_mpeg12_la_SOURCES = vdpau_mpeg12.c +xineplug_decode_vdpau_mpeg12_la_CFLAGS = $(AM_CFLAGS) -fno-strict-aliasing +xineplug_decode_vdpau_mpeg12_la_LIBADD = $(XINE_LIB) $(DYNAMIC_LD_LIBS) + +xineplug_decode_vdpau_vc1_la_SOURCES = vdpau_vc1.c +xineplug_decode_vdpau_vc1_la_CFLAGS = $(AM_CFLAGS) -fno-strict-aliasing +xineplug_decode_vdpau_vc1_la_LIBADD = $(XINE_LIB) $(DYNAMIC_LD_LIBS) + +xineplug_decode_vdpau_mpeg4_la_SOURCES = vdpau_mpeg4.c +xineplug_decode_vdpau_mpeg4_la_CFLAGS = $(AM_CFLAGS) -fno-strict-aliasing +xineplug_decode_vdpau_mpeg4_la_LIBADD = $(XINE_LIB) $(DYNAMIC_LD_LIBS) diff --git a/src/video_dec/libvdpau/alterh264_bits_reader.h b/src/video_dec/libvdpau/alterh264_bits_reader.h new file mode 100644 index 000000000..47a26aca1 --- /dev/null +++ b/src/video_dec/libvdpau/alterh264_bits_reader.h @@ -0,0 +1,127 @@ +/* kate: tab-indent on; indent-width 4; mixedindent off; indent-mode cstyle; remove-trailing-space on; */ +#ifndef ALTERH264_BITS_READER_H +#define ALTERH264_BITS_READER_H +#include <sys/types.h> +#include <inttypes.h> +#include <stdio.h> + + + +typedef struct { + uint8_t *buffer, *start; + int offbits, length, oflow; +} bits_reader_t; + + + +static void +bits_reader_set (bits_reader_t * br, uint8_t * buf, int len) +{ + br->buffer = br->start = buf; + br->offbits = 0; + br->length = len; + br->oflow = 0; +} + + + +static inline uint32_t +more_rbsp_data (bits_reader_t * br) +{ + uint8_t val[8] = { 0x80, 0x40, 0x20, 0x10, 0x08, 0x04, 0x02, 0x01 }; + uint8_t *buf = br->start + br->length; + int bit; + + while (--buf >= br->buffer) + { + for (bit = 7; bit > -1; bit--) + if (*buf & val[bit]) + return ((buf - br->buffer) * 8) - br->offbits + bit; + } + return 0; +} + + + +static inline uint8_t +bits_reader_shift (bits_reader_t * br) +{ + br->offbits = 0; + if ((br->buffer + 1) > (br->start + br->length - 1)) + { + br->oflow = 1; + //printf("!!!!! buffer overflow !!!!!\n"); + return 0; + } + ++br->buffer; + if ((*(br->buffer) == 3) && ((br->buffer - br->start) > 2) + && (*(br->buffer - 2) == 0) && (*(br->buffer - 1) == 0)) + { + if ((br->buffer + 1) > (br->start + br->length - 1)) + { + br->oflow = 1; + //printf("!!!!! buffer overflow !!!!!\n"); + return 0; + } + ++br->buffer; + } + return 1; +} + + + +static inline uint32_t +read_bits (bits_reader_t * br, int nbits) +{ + uint8_t val[8] = { 0x80, 0x40, 0x20, 0x10, 0x08, 0x04, 0x02, 0x01 }; + uint32_t res = 0; + + while (nbits) + { + res = (res << 1) + ((*br->buffer & val[br->offbits]) ? 1 : 0); + --nbits; + ++br->offbits; + if (br->offbits > 7) + if (!bits_reader_shift (br)) + return 1; + } + return res; +} + + + +static inline void +skip_bits (bits_reader_t * br, int nbits) +{ + while (nbits) + { + --nbits; + ++br->offbits; + if (br->offbits > 7) + bits_reader_shift (br); + } +} + + + +static inline uint32_t +read_exp_ue (bits_reader_t * br) +{ + int leading = -1; + uint8_t b; + + for (b = 0; !b; leading++) + b = read_bits (br, 1); + + return (1 << leading) - 1 + read_bits (br, leading); +} + + + +static inline int32_t +read_exp_se (bits_reader_t * br) +{ + uint32_t res = read_exp_ue (br); + return (res & 0x01) ? (res + 1) / 2 : -(res / 2); +} +#endif /* ALTERH264_BITS_READER_H */ diff --git a/src/video_dec/libvdpau/alterh264_decode.c b/src/video_dec/libvdpau/alterh264_decode.c new file mode 100644 index 000000000..f11162f3e --- /dev/null +++ b/src/video_dec/libvdpau/alterh264_decode.c @@ -0,0 +1,2448 @@ +/* kate: space-indent on; indent-width 2; mixedindent off; indent-mode cstyle; remove-trailing-space on; + * Copyright (C) 2008 the xine project + * Copyright (C) 2008 Christophe Thommeret <hftom@free.fr> + * + * This file is part of xine, a free video player. + * + * xine is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * xine is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110, USA + * + * alterh264_decode.c, a H264 video stream parser using VDPAU hardware decoder + * + */ + +#include "alterh264_decode.h" + + +#define MAX_DPB_SIZE 16 +#define MIN_BUFFER_SIZE 10000 +#define MAX_BUFFER_SIZE 3145728 + +#define NAL_UNSPECIFIED 0 +#define NAL_SLICE_NO_IDR 1 +#define NAL_SLICE_IDR 5 +#define NAL_SEI 6 +#define NAL_SEQUENCE 7 +#define NAL_PICTURE 8 +#define NAL_ACCES 9 +#define NAL_END_SEQUENCE 10 +#define NAL_END_STREAM 11 +#define NAL_SEQUENCE_EXT 13 + +#define SLICE_TYPE_P 0 +#define SLICE_TYPE_B 1 +#define SLICE_TYPE_I 2 +#define SLICE_TYPE_SP 3 +#define SLICE_TYPE_SI 4 + +#define START_IDR_FLAG 1000 + +#define MAX_POC 2147483647 + +#define DPB_DRAW_CLEAR 1 +#define DPB_DRAW_REFS 2 +#define DPB_DRAW_CURRENT 3 + +//#define MAKE_DAT /*do NOT define this, unless you know what you do */ +#ifdef MAKE_DAT +static int nframes; +static FILE *outfile; +#endif + + +/*-------- DPB -------------------------------------------*/ +static void +dpb_print (sequence_t * sequence) +{ + int i; + dpb_frame_t *frame; + uint32_t sf; + + for (i = 0; i < MAX_DPB_SIZE; i++) + { + frame = sequence->dpb[i]; + if (!frame->used) + break; + vo_frame_t *vo = (vo_frame_t *) frame->videoSurface; + vdpau_accel_t *accel; + if (vo) + accel = (vdpau_accel_t *) vo->accel_data; + sf = (vo) ? accel->surface : -1; + fprintf (stderr, + "{ i:%d u:%d c:%d pn:%d-%d ir:%d-%d tpoc:%d bpoc:%d sf:%u }\n", + i, frame->used, frame->completed, frame->PicNum[0], + frame->PicNum[1], frame->is_reference[0], frame->is_reference[1], + frame->TopFieldOrderCnt, frame->BottomFieldOrderCnt, sf); + } +} + + + +static void +dpb_clear_all_pts (sequence_t * sequence) +{ + int i; + + for (i = 0; i < MAX_DPB_SIZE; i++) + { + if (!sequence->dpb[i]->used) + break; + sequence->dpb[i]->pts = 0; + } + sequence->cur_pic.pts = 0; + sequence->cur_pic.drop_pts = 1; +} + + +static void +dpb_reset (sequence_t * sequence) +{ + int i; + + for (i = 0; i < MAX_DPB_SIZE; i++) + { + if (sequence->dpb[i]->videoSurface) + sequence->dpb[i]->videoSurface->free (sequence->dpb[i]->videoSurface); + memset (sequence->dpb[i], 0, sizeof (dpb_frame_t)); + } + if (sequence->cur_pic.videoSurface && !sequence->cur_pic.is_reference[0] + && !sequence->cur_pic.is_reference[1]) + { + //fprintf(stderr, "freeing cur_pic\n"); + sequence->cur_pic.videoSurface->free (sequence->cur_pic.videoSurface); + } +} + + + +static void +dpb_remove (sequence_t * sequence, int index) +{ + lprintf ("|||||||||||||||||||||||||||||||||||||||| dbp_remove\n"); + int i; + + dpb_frame_t *frame = sequence->dpb[index]; + if (frame->videoSurface) + frame->videoSurface->free (frame->videoSurface); + memset (frame, 0, sizeof (dpb_frame_t)); + for (i = index; i < (MAX_DPB_SIZE - 1); i++) + { + sequence->dpb[i] = sequence->dpb[i + 1]; + if (!sequence->dpb[i]->used) + { + sequence->dpb[i + 1] = frame; + break; + } + } + if (i == (MAX_DPB_SIZE - 1)) + sequence->dpb[i] = frame; +} + + + +static dpb_frame_t * +dpb_get_prev_ref (sequence_t * sequence) +{ + int i = MAX_DPB_SIZE - 1; + + while (i > -1) + { + if (sequence->dpb[i]->used) + return sequence->dpb[i]; + --i; + } + + return NULL; +} + + + +static void +dpb_draw_frames (vdpau_h264_alter_decoder_t * this_gen, int32_t curpoc, + int draw_mode) +{ + sequence_t *seq = (sequence_t *) & this_gen->sequence; + int i, index = 0; + int32_t poc, tpoc; + dpb_frame_t *frame; + + while (index > -1) + { + index = -1; + poc = curpoc; + for (i = 0; i < MAX_DPB_SIZE; i++) + { + frame = seq->dpb[i]; + if (!frame->used) + break; + tpoc = + (frame->TopFieldOrderCnt > + frame->BottomFieldOrderCnt) ? frame->TopFieldOrderCnt : frame-> + BottomFieldOrderCnt; + if (!frame->videoSurface->drawn && (tpoc <= poc)) + { + poc = tpoc; + index = i; + } + } + if ((index > -1) && (poc <= curpoc)) + { + //fprintf(stderr,"|||||||||||||||||||||||||||||||||||||||| dpb_draw_frame = %d\n", poc); + frame = seq->dpb[index]; + frame->videoSurface->pts = frame->pts; + //fprintf(stderr,"H264 PTS = %llu\n", frame->pts); + frame->videoSurface->top_field_first = frame->top_field_first; + frame->videoSurface->draw (frame->videoSurface, this_gen->stream); + frame->videoSurface->drawn++; + if ((draw_mode != DPB_DRAW_CLEAR) && !frame->is_reference[0] + && !frame->is_reference[1]) + dpb_remove (seq, index); + } + else + index = -1; + } + + if (draw_mode == DPB_DRAW_CURRENT) + { + //fprintf(stderr,"|||||||||||||||||||||||||||||||||||||||| dpb_draw_frame = %d\n", curpoc); + frame = &seq->cur_pic; + frame->videoSurface->pts = frame->pts; + //fprintf(stderr,"H264 PTS = %llu\n", frame->pts); + frame->videoSurface->top_field_first = frame->top_field_first; + frame->videoSurface->draw (frame->videoSurface, this_gen->stream); + frame->videoSurface->free (frame->videoSurface); + } + else if (draw_mode == DPB_DRAW_CLEAR) + dpb_reset (seq); +} + + + +static dpb_frame_t * +dpb_get_PicNum (sequence_t * sequence, int32_t pic_num, int *index) +{ + dpb_frame_t *frame; + int i = 0; + + for (i = 0; i < MAX_DPB_SIZE; i++) + { + frame = sequence->dpb[i]; + if (!frame->used) + break; + if ((frame->PicNum[0] == pic_num) || (frame->PicNum[1] == pic_num)) + { + *index = i; + return frame; + } + } + return 0; +} + + + +static void +dpb_mmc1 (vdpau_h264_alter_decoder_t * this_gen, int32_t picnum) +{ + sequence_t *seq = (sequence_t *) & this_gen->sequence; + int index; + + lprintf ("dpb_mmc1\n"); + + dpb_frame_t *frame = dpb_get_PicNum (seq, picnum, &index); + + if (frame) + { + frame->is_reference[0] = frame->is_reference[1] = 0; + if (frame->videoSurface->drawn) + dpb_remove (seq, index); + else + dpb_draw_frames (this_gen, + (frame->TopFieldOrderCnt > + frame->BottomFieldOrderCnt) ? frame-> + TopFieldOrderCnt : frame->BottomFieldOrderCnt, + DPB_DRAW_REFS); + } +} + + + +static void +dbp_append (vdpau_h264_alter_decoder_t * this_gen, int second_field) +{ + sequence_t *sequence = (sequence_t *) & this_gen->sequence; + int i, index = 0, refs = 0; + int32_t fnw = MAX_POC; + slice_param_t *sl = &sequence->slice_param; + pic_param_t *pic = sequence->pic_param[sl->pic_parameter_set_id]; + seq_param_t *sp = sequence->seq_param[pic->seq_parameter_set_id]; + dpb_frame_t *tmp = 0, *cur_pic = &sequence->cur_pic; + int max = sp->num_ref_frames ? sp->num_ref_frames : 1; + max = (max > MAX_DPB_SIZE) ? MAX_DPB_SIZE : max; + + vo_frame_t *vo = (vo_frame_t *) cur_pic->videoSurface; + vdpau_accel_t *accel = (vdpau_accel_t *) vo->accel_data; + lprintf + ("|||||||||||||||||||||||||||||||||||||||| dbp_append surface = %d\n", + accel->surface); + + if (second_field) + { + tmp = dpb_get_prev_ref (sequence); + if (tmp) + { + memcpy (tmp, cur_pic, sizeof (dpb_frame_t)); + cur_pic->videoSurface = NULL; + } + else + fprintf (stderr, "OOPS, no frame to store the second field ?!\n"); + return; + } + + for (i = 0; i < MAX_DPB_SIZE; i++) + { + if (!sequence->dpb[i]->used) + break; + if (sequence->dpb[i]->FrameNumWrap < fnw) + { + fnw = sequence->dpb[i]->FrameNumWrap; + index = i; + } + refs++; + } + + if (refs >= max) + { + lprintf ("sliding window\n"); + tmp = sequence->dpb[index], + tmp->is_reference[0] = tmp->is_reference[1] = 0; + if (tmp->videoSurface->drawn) + dpb_remove (sequence, index); + else + dpb_draw_frames (this_gen, + (tmp->TopFieldOrderCnt > + tmp->BottomFieldOrderCnt) ? tmp-> + TopFieldOrderCnt : tmp->BottomFieldOrderCnt, + DPB_DRAW_REFS); + + for (i = 0; i < MAX_DPB_SIZE; i++) + { + if (!sequence->dpb[i]->used) + break; + } + } + + if (i < MAX_DPB_SIZE) + { + memcpy (sequence->dpb[i], cur_pic, sizeof (dpb_frame_t)); + if (!cur_pic->field_pic_flag) + cur_pic->videoSurface = NULL; + } +} + +/*--------------------------------------------------------*/ + + + +static void +reset_slices (sequence_t * sequence) +{ + sequence->slices_count = 0; + sequence->slice_mode = 0; +} + + + +static void +reset_sequence (sequence_t * sequence) +{ + sequence->prevFrameNum = 0; + sequence->prevFrameNumOffset = 0; + sequence->prevMMC5 = 0; + + sequence->startup_frame = 0; + sequence->reset = 0; + sequence->chroma = 0; + sequence->pic_pts = 0; + sequence->bufpos = 0; + sequence->bufseek = 0; + sequence->start = -1; + reset_slices (sequence); + dpb_reset (sequence); + memset (&sequence->cur_pic, 0, sizeof (dpb_frame_t)); + sequence->reset = VO_NEW_SEQUENCE_FLAG; + sequence->color_standard = VDP_COLOR_STANDARD_ITUR_BT_601; +} + + + +static void +set_ratio (sequence_t * seq, seq_param_t * sp) +{ + if (seq->mode_frame && seq->ratio) + return; + if (!seq->coded_height) + seq->coded_height = 1; + seq->ratio = (double) seq->coded_width / (double) seq->coded_height; + if (sp->vui.aspect_ratio_info) + { + switch (sp->vui.aspect_ratio_idc) + { + case ASPECT_1_1: + seq->ratio = 1 * seq->ratio; + break; + case ASPECT_12_11: + seq->ratio *= 12.0 / 11.0; + break; + case ASPECT_10_11: + seq->ratio *= 10.0 / 11.0; + break; + case ASPECT_16_11: + seq->ratio *= 16.0 / 11.0; + break; + case ASPECT_40_33: + seq->ratio *= 40.0 / 33.0; + break; + case ASPECT_24_11: + seq->ratio *= 24.0 / 11.0; + break; + case ASPECT_20_11: + seq->ratio *= 20.0 / 11.0; + break; + case ASPECT_32_11: + seq->ratio *= 32.0 / 11.0; + break; + case ASPECT_80_33: + seq->ratio *= 80.0 / 33.0; + break; + case ASPECT_18_11: + seq->ratio *= 18.0 / 11.0; + break; + case ASPECT_15_11: + seq->ratio *= 15.0 / 11.0; + break; + case ASPECT_64_33: + seq->ratio *= 64.0 / 33.0; + break; + case ASPECT_160_99: + seq->ratio *= 160.0 / 99.0; + break; + case ASPECT_4_3: + seq->ratio *= 4.0 / 3.0; + break; + case ASPECT_3_2: + seq->ratio *= 3.0 / 2.0; + break; + case ASPECT_2_1: + seq->ratio *= 2.0 / 1.0; + break; + case ASPECT_EXTENDED_SAR: + if (sp->vui.sar_height) + seq->ratio *= (double) sp->vui.sar_width / sp->vui.sar_height; + break; + } + } +} + + + +static void +parse_scaling_list (bits_reader_t * br, uint8_t * scaling_list, int len, + int index) +{ + int last_scale = 8; + int next_scale = 8; + int32_t delta_scale; + uint8_t use_default_scaling_matrix_flag = 0; + int i; + + const uint8_t *zigzag = (len == 64) ? zigzag_8x8 : zigzag_4x4; + + for (i = 0; i < len; i++) + { + if (next_scale != 0) + { + delta_scale = read_exp_se (br); + next_scale = (last_scale + delta_scale + 256) % 256; + if (i == 0 && next_scale == 0) + { + use_default_scaling_matrix_flag = 1; + break; + } + } + scaling_list[zigzag[i]] = last_scale = + (next_scale == 0) ? last_scale : next_scale; + } + + if (use_default_scaling_matrix_flag) + { + switch (index) + { + case 0: + case 1: + case 2: + { + for (i = 0; i < sizeof (default_4x4_intra); i++) + scaling_list[zigzag_4x4[i]] = default_4x4_intra[i]; + break; + } + case 3: + case 4: + case 5: + { + for (i = 0; i < sizeof (default_4x4_inter); i++) + scaling_list[zigzag_4x4[i]] = default_4x4_inter[i]; + break; + } + case 6: + { + for (i = 0; i < sizeof (default_8x8_intra); i++) + scaling_list[zigzag_8x8[i]] = default_8x8_intra[i]; + break; + } + case 7: + { + for (i = 0; i < sizeof (default_8x8_inter); i++) + scaling_list[zigzag_8x8[i]] = default_8x8_inter[i]; + break; + } + } + } +} + + + +static void +scaling_list_fallback_A (uint8_t * scaling_lists_4x4, + uint8_t * scaling_lists_8x8, int i) +{ + int j; + switch (i) + { + case 0: + { + for (j = 0; j < sizeof (default_4x4_intra); j++) + scaling_lists_4x4[(i * 16) + zigzag_4x4[j]] = default_4x4_intra[j]; + break; + } + case 3: + { + for (j = 0; j < sizeof (default_4x4_inter); j++) + scaling_lists_4x4[(i * 16) + zigzag_4x4[j]] = default_4x4_inter[j]; + break; + } + case 1: + case 2: + case 4: + case 5: + memcpy (&scaling_lists_4x4[i * 16], &scaling_lists_4x4[(i - 1) * 16], + 6 * 16); + break; + case 6: + { + for (j = 0; j < sizeof (default_8x8_intra); j++) + scaling_lists_8x8[(i - 6) * 64 + zigzag_8x8[j]] = + default_8x8_intra[j]; + break; + } + case 7: + { + for (j = 0; j < sizeof (default_8x8_inter); j++) + scaling_lists_8x8[(i - 6) * 64 + zigzag_8x8[j]] = + default_8x8_inter[j]; + break; + } + + } +} + + + +static void +scaling_list_fallback_B (seq_param_t * sp, pic_param_t * pic, int i) +{ + switch (i) + { + case 0: + case 3: + memcpy (pic->scaling_lists_4x4[i], sp->scaling_lists_4x4[i], + sizeof (pic->scaling_lists_4x4[i])); + break; + case 1: + case 2: + case 4: + case 5: + memcpy (pic->scaling_lists_4x4[i], pic->scaling_lists_4x4[i - 1], + sizeof (pic->scaling_lists_4x4[i])); + break; + case 6: + case 7: + memcpy (pic->scaling_lists_8x8[i - 6], sp->scaling_lists_8x8[i - 6], + sizeof (pic->scaling_lists_8x8[i - 6])); + break; + } +} + + + +static void +vui_parameters (sequence_t * seq, vui_param_t * vui) +{ + bits_reader_t *br = &seq->br; + + vui->aspect_ratio_info = read_bits (br, 1); + lprintf ("aspect_ratio_info_present_flag = %d\n", vui->aspect_ratio_info); + if (vui->aspect_ratio_info) + { + vui->aspect_ratio_idc = read_bits (br, 8); + lprintf ("aspect_ratio_idc = %d\n", vui->aspect_ratio_idc); + if (vui->aspect_ratio_idc == 255) + { + vui->sar_width = read_bits (br, 16); + lprintf ("sar_width = %d\n", vui->sar_width); + vui->sar_height = read_bits (br, 16); + lprintf ("sar_height = %d\n", vui->sar_height); + } + } + if (read_bits (br, 1)) /* overscan_info_present_flag */ + skip_bits (br, 1); /* overscan_appropriate_falg */ + if (read_bits (br, 1)) + { /* video_signal_type_present_flag */ + skip_bits (br, 3); /*video_format */ + skip_bits (br, 1); /*video_full_range_flag */ + vui->colour_desc = read_bits (br, 1); + lprintf ("colour_desc = %d\n", vui->colour_desc); + if (vui->colour_desc) + { + vui->colour_primaries = read_bits (br, 8); + lprintf ("colour_primaries = %d\n", vui->colour_primaries); + skip_bits (br, 8); /* transfer_characteristics */ + skip_bits (br, 8); /* matrix_coefficients */ + switch (vui->colour_primaries) + { + case 1: + seq->color_standard = VDP_COLOR_STANDARD_ITUR_BT_709; + break; + case 6: + case 7: + seq->color_standard = VDP_COLOR_STANDARD_SMPTE_240M; + break; + } + } + } + if (read_bits (br, 1)) + { /* chroma_loc_info_present_flag */ + read_exp_ue (br); /* chroma_sample_loc_type_top_field */ + read_exp_ue (br); /* chroma_sample_loc_type_bottom_field */ + } + vui->timing_info = read_bits (br, 1); + lprintf ("timing_info = %d\n", vui->timing_info); + if (vui->timing_info) + { + vui->num_units_in_tick = read_bits (br, 32); + lprintf ("num_units_in_tick = %u\n", vui->num_units_in_tick); + vui->time_scale = read_bits (br, 32); + lprintf ("time_scale = %u\n", vui->time_scale); + if (vui->time_scale > 0) + seq->video_step = + 180000. * (double) vui->num_units_in_tick / (double) vui->time_scale; + } +} + + + +static void +seq_parameter_set_data (vdpau_h264_alter_decoder_t * this_gen) +{ + sequence_t *seq = (sequence_t *) & this_gen->sequence; + seq_param_t *sp; + int i; + + uint8_t profile_idc = read_bits (&seq->br, 8); + lprintf ("profile_idc = %d\n", profile_idc); + uint8_t constraint_set0_flag = read_bits (&seq->br, 1); + lprintf ("constraint_set0_flag = %d\n", constraint_set0_flag); + uint8_t constraint_set1_flag = read_bits (&seq->br, 1); + lprintf ("constraint_set1_flag = %d\n", constraint_set1_flag); + uint8_t constraint_set2_flag = read_bits (&seq->br, 1); + lprintf ("constraint_set2_flag = %d\n", constraint_set2_flag); + uint8_t constraint_set3_flag = read_bits (&seq->br, 1); + lprintf ("constraint_set3_flag = %d\n", constraint_set3_flag); + skip_bits (&seq->br, 4); + uint8_t level_idc = read_bits (&seq->br, 8); + lprintf ("level_idc = %d\n", level_idc); + + uint8_t seq_parameter_set_id = read_exp_ue (&seq->br); + lprintf ("seq_parameter_set_id = %d\n", seq_parameter_set_id); + if (seq_parameter_set_id > 31) + { + lprintf ("OOPS : seq_parameter_set_id > 31 !!\n"); + return; + } + + if (!seq->seq_param[seq_parameter_set_id]) + seq->seq_param[seq_parameter_set_id] = + (seq_param_t *) calloc (1, sizeof (seq_param_t)); + if (!seq->seq_param[seq_parameter_set_id]) + { + lprintf ("OOPS : can't allocate SPS %d !!\n", seq_parameter_set_id); + return; + } + + sp = seq->seq_param[seq_parameter_set_id]; + sp->profile_idc = profile_idc; + switch (profile_idc) + { + case 100: + seq->profile = VDP_DECODER_PROFILE_H264_HIGH; + break; + case 77: + seq->profile = VDP_DECODER_PROFILE_H264_MAIN; + break; + case 66: /* nvidia's vdpau doesn't suppot baseline, force main */ + default: + seq->profile = VDP_DECODER_PROFILE_H264_MAIN; + } + sp->constraint_set0_flag = constraint_set0_flag; + sp->constraint_set1_flag = constraint_set1_flag; + sp->constraint_set2_flag = constraint_set2_flag; + sp->constraint_set3_flag = constraint_set3_flag; + sp->level_idc = level_idc; + + memset (&sp->scaling_lists_4x4, 16, sizeof (sp->scaling_lists_4x4)); + memset (&sp->scaling_lists_8x8, 16, sizeof (sp->scaling_lists_8x8)); + + sp->chroma_format_idc = 1; + sp->separate_colour_plane_flag = 0; + if (sp->profile_idc == 100 || sp->profile_idc == 110 + || sp->profile_idc == 122 || sp->profile_idc == 244 + || sp->profile_idc == 44 || sp->profile_idc == 83 + || sp->profile_idc == 86) + { + sp->chroma_format_idc = read_exp_ue (&seq->br); + lprintf ("chroma_format_idc = %u\n", sp->chroma_format_idc); + if (sp->chroma_format_idc == 3) + { + sp->separate_colour_plane_flag = read_bits (&seq->br, 1); + lprintf ("separate_colour_plane_flag = %d\n", + sp->separate_colour_plane_flag); + } + sp->bit_depth_luma_minus8 = read_exp_ue (&seq->br); + lprintf ("bit_depth_luma_minus8 = %u\n", sp->bit_depth_luma_minus8); + sp->bit_depth_chroma_minus8 = read_exp_ue (&seq->br); + lprintf ("bit_depth_chroma_minus8 = %u\n", sp->bit_depth_chroma_minus8); + sp->qpprime_y_zero_transform_bypass_flag = read_bits (&seq->br, 1); + lprintf ("qpprime_y_zero_transform_bypass_flag = %u\n", + sp->qpprime_y_zero_transform_bypass_flag); + sp->seq_scaling_matrix_present_flag = read_bits (&seq->br, 1); + lprintf ("seq_scaling_matrix_present_flag = %u\n", + sp->seq_scaling_matrix_present_flag); + if (sp->seq_scaling_matrix_present_flag) + { + for (i = 0; i < 8; i++) + { + int scaling_flag = read_bits (&seq->br, 1); + if (scaling_flag) + { + if (i < 6) + parse_scaling_list (&seq->br, &sp->scaling_lists_4x4[i][0], 16, + i); + else + parse_scaling_list (&seq->br, &sp->scaling_lists_8x8[i - 6][0], + 64, i); + } + else + scaling_list_fallback_A ((uint8_t *) sp->scaling_lists_4x4, + (uint8_t *) sp->scaling_lists_8x8, i); + } + } + } + sp->log2_max_frame_num_minus4 = read_exp_ue (&seq->br); + lprintf ("log2_max_frame_num_minus4 = %u\n", sp->log2_max_frame_num_minus4); + sp->pic_order_cnt_type = read_exp_ue (&seq->br); + lprintf ("pic_order_cnt_type = %u\n", sp->pic_order_cnt_type); + if (sp->pic_order_cnt_type == 0) + { + sp->log2_max_pic_order_cnt_lsb_minus4 = read_exp_ue (&seq->br); + lprintf ("log2_max_pic_order_cnt_lsb_minus4 = %u\n", + sp->log2_max_pic_order_cnt_lsb_minus4); + } + else if (sp->pic_order_cnt_type == 1) + { + sp->delta_pic_order_always_zero_flag = read_bits (&seq->br, 1); + lprintf ("delta_pic_order_always_zero_flag = %u\n", + sp->delta_pic_order_always_zero_flag); + sp->offset_for_non_ref_pic = read_exp_se (&seq->br); + lprintf ("offset_for_non_ref_pic = %d\n", sp->offset_for_non_ref_pic); + sp->offset_for_top_to_bottom_field = read_exp_se (&seq->br); + lprintf ("offset_for_top_to_bottom_field = %d\n", + sp->offset_for_top_to_bottom_field); + sp->num_ref_frames_in_pic_order_cnt_cycle = read_exp_ue (&seq->br); + lprintf ("num_ref_frames_in_pic_order_cnt_cycle = %u\n", + sp->num_ref_frames_in_pic_order_cnt_cycle); + for (i = 0; i < sp->num_ref_frames_in_pic_order_cnt_cycle; i++) + { + sp->offset_for_ref_frame[i] = read_exp_se (&seq->br); + lprintf ("offset_for_ref_frame[%d] = %d\n", i, + sp->offset_for_ref_frame[i]); + } + } + sp->num_ref_frames = read_exp_ue (&seq->br); + if (sp->num_ref_frames > 16) + sp->num_ref_frames = 16; + lprintf ("num_ref_frames = %u\n", sp->num_ref_frames); + sp->gaps_in_frame_num_value_allowed_flag = read_bits (&seq->br, 1); + lprintf ("gaps_in_frame_num_value_allowed_flag = %u\n", + sp->gaps_in_frame_num_value_allowed_flag); + sp->pic_width_in_mbs_minus1 = read_exp_ue (&seq->br); + lprintf ("pic_width_in_mbs_minus1 = %u\n", sp->pic_width_in_mbs_minus1); + sp->pic_height_in_map_units_minus1 = read_exp_ue (&seq->br); + lprintf ("pic_height_in_map_units_minus1 = %u\n", + sp->pic_height_in_map_units_minus1); + sp->frame_mbs_only_flag = read_bits (&seq->br, 1); + lprintf ("frame_mbs_only_flag = %u\n", sp->frame_mbs_only_flag); + + seq->coded_width = (sp->pic_width_in_mbs_minus1 + 1) * 16; + seq->coded_height = + (2 - sp->frame_mbs_only_flag) * (sp->pic_height_in_map_units_minus1 + + 1) * 16; + + if (!sp->frame_mbs_only_flag) + { + sp->mb_adaptive_frame_field_flag = read_bits (&seq->br, 1); + lprintf ("mb_adaptive_frame_field_flag = %u\n", + sp->mb_adaptive_frame_field_flag); + } + sp->direct_8x8_inference_flag = read_bits (&seq->br, 1); + lprintf ("direct_8x8_inference_flag = %u\n", sp->direct_8x8_inference_flag); + sp->frame_cropping_flag = read_bits (&seq->br, 1); + lprintf ("frame_cropping_flag = %u\n", sp->frame_cropping_flag); + if (sp->frame_cropping_flag) + { + sp->frame_crop_left_offset = read_exp_ue (&seq->br); + lprintf ("frame_crop_left_offset = %u\n", sp->frame_crop_left_offset); + sp->frame_crop_right_offset = read_exp_ue (&seq->br); + lprintf ("frame_crop_right_offset = %u\n", sp->frame_crop_right_offset); + sp->frame_crop_top_offset = read_exp_ue (&seq->br); + lprintf ("frame_crop_top_offset = %u\n", sp->frame_crop_top_offset); + sp->frame_crop_bottom_offset = read_exp_ue (&seq->br); + lprintf ("frame_crop_bottom_offset = %u\n", sp->frame_crop_bottom_offset); + seq->coded_height -= + (2 - sp->frame_mbs_only_flag) * 2 * sp->frame_crop_bottom_offset; + } + if (seq->coded_height == 1088) + seq->coded_height = 1080; + sp->vui_parameters_present_flag = read_bits (&seq->br, 1); + lprintf ("vui_parameters_present_flag = %u\n", + sp->vui_parameters_present_flag); + if (sp->vui_parameters_present_flag) + vui_parameters (seq, &sp->vui); + set_ratio (seq, sp); +} + + + +static void +pic_parameter_set (vdpau_h264_alter_decoder_t * this_gen) +{ + sequence_t *seq = (sequence_t *) & this_gen->sequence; + pic_param_t *pic; + seq_param_t *sp; + int i; + + uint8_t pic_parameter_set_id = read_exp_ue (&seq->br); + lprintf ("pic_parameter_set_id = %u\n", pic_parameter_set_id); + if (!seq->pic_param[pic_parameter_set_id]) + seq->pic_param[pic_parameter_set_id] = + (pic_param_t *) calloc (1, sizeof (pic_param_t)); + if (!seq->pic_param[pic_parameter_set_id]) + { + lprintf ("OOPS : can't allocate PPS %d !!\n", pic_parameter_set_id); + return; + } + pic = seq->pic_param[pic_parameter_set_id]; + + uint8_t seq_parameter_set_id = read_exp_ue (&seq->br); + lprintf ("seq_parameter_set_id = %u\n", seq_parameter_set_id); + if (seq_parameter_set_id > 31) + { + lprintf ("OOPS : referenced SPS (%d) does not exist !!\n", + seq_parameter_set_id); + return; + } + if (!seq->seq_param[seq_parameter_set_id]) + { + lprintf ("OOPS : referenced SPS (%d) does not exist !!\n", + seq_parameter_set_id); + return; + } + + pic->seq_parameter_set_id = seq_parameter_set_id; + sp = seq->seq_param[pic->seq_parameter_set_id]; + pic->entropy_coding_mode_flag = read_bits (&seq->br, 1); + lprintf ("entropy_coding_mode_flag = %u\n", pic->entropy_coding_mode_flag); + pic->pic_order_present_flag = read_bits (&seq->br, 1); + lprintf ("pic_order_present_flag = %u\n", pic->pic_order_present_flag); + uint8_t num_slice_groups_minus1 = read_exp_ue (&seq->br); + lprintf ("num_slice_groups_minus1 = %u\n", num_slice_groups_minus1); + if (num_slice_groups_minus1 > 0) + { + uint8_t slice_group_map_type = read_exp_ue (&seq->br); + lprintf ("slice_group_map_type = %u\n", slice_group_map_type); + if (!slice_group_map_type) + { + for (i = 0; i < num_slice_groups_minus1; i++) + read_exp_ue (&seq->br); + } + else if (slice_group_map_type == 2) + { + for (i = 0; i < num_slice_groups_minus1; i++) + { + read_exp_ue (&seq->br); + read_exp_ue (&seq->br); + } + } + else if (slice_group_map_type == 3 || slice_group_map_type == 4 + || slice_group_map_type == 5) + { + read_bits (&seq->br, 1); + read_exp_ue (&seq->br); + } + else if (slice_group_map_type == 6) + { + read_exp_ue (&seq->br); + } + } + pic->num_ref_idx_l0_active_minus1 = read_exp_ue (&seq->br); + lprintf ("num_ref_idx_l0_active_minus1 = %u\n", + pic->num_ref_idx_l0_active_minus1); + pic->num_ref_idx_l1_active_minus1 = read_exp_ue (&seq->br); + lprintf ("num_ref_idx_l1_active_minus1 = %u\n", + pic->num_ref_idx_l1_active_minus1); + pic->weighted_pred_flag = read_bits (&seq->br, 1); + lprintf ("weighted_pred_flag = %u\n", pic->weighted_pred_flag); + pic->weighted_bipred_idc = read_bits (&seq->br, 2); + lprintf ("weighted_bipred_idc = %u\n", pic->weighted_bipred_idc); + pic->pic_init_qp_minus26 = read_exp_se (&seq->br); + lprintf ("pic_init_qp_minus26 = %d\n", pic->pic_init_qp_minus26); + pic->pic_init_qs_minus26 = read_exp_se (&seq->br); + lprintf ("pic_init_qs_minus26 = %d\n", pic->pic_init_qs_minus26); + pic->chroma_qp_index_offset = read_exp_se (&seq->br); + lprintf ("chroma_qp_index_offset = %d\n", pic->chroma_qp_index_offset); + pic->deblocking_filter_control_present_flag = read_bits (&seq->br, 1); + lprintf ("deblocking_filter_control_present_flag = %u\n", + pic->deblocking_filter_control_present_flag); + pic->constrained_intra_pred_flag = read_bits (&seq->br, 1); + lprintf ("constrained_intra_pred_flag = %u\n", + pic->constrained_intra_pred_flag); + pic->redundant_pic_cnt_present_flag = read_bits (&seq->br, 1); + lprintf ("redundant_pic_cnt_present_flag = %u\n", + pic->redundant_pic_cnt_present_flag); + + uint32_t more = more_rbsp_data (&seq->br); + lprintf ("more bits = %u (buflen = %d) (still = %d)\n", more, + seq->br.length, seq->br.start + seq->br.length - seq->br.buffer); + if (more) + { + pic->transform_8x8_mode_flag = read_bits (&seq->br, 1); + lprintf ("transform_8x8_mode_flag = %u\n", pic->transform_8x8_mode_flag); + pic->pic_scaling_matrix_present_flag = read_bits (&seq->br, 1); + lprintf ("pic_scaling_matrix_present_flag = %u\n", + pic->pic_scaling_matrix_present_flag); + if (pic->pic_scaling_matrix_present_flag) + { + for (i = 0; i < 8; i++) + { + if (i < 6 || pic->transform_8x8_mode_flag) + pic->pic_scaling_list_present_flag[i] = read_bits (&seq->br, 1); + else + pic->pic_scaling_list_present_flag[i] = 0; + + if (pic->pic_scaling_list_present_flag[i]) + { + if (i < 6) + parse_scaling_list (&seq->br, &pic->scaling_lists_4x4[i][0], 16, + i); + else + parse_scaling_list (&seq->br, &pic->scaling_lists_8x8[i - 6][0], + 64, i); + } + else + { + if (!sp->seq_scaling_matrix_present_flag) + scaling_list_fallback_A ((uint8_t *) pic->scaling_lists_4x4, + (uint8_t *) pic->scaling_lists_8x8, i); + else + scaling_list_fallback_B (sp, pic, i); + } + } + } + pic->second_chroma_qp_index_offset = read_exp_se (&seq->br); + lprintf ("second_chroma_qp_index_offset = %d\n", + pic->second_chroma_qp_index_offset); + } + else + { + pic->transform_8x8_mode_flag = 0; + pic->pic_scaling_matrix_present_flag = 0; + pic->second_chroma_qp_index_offset = pic->chroma_qp_index_offset; + } +} + + + +static void +pred_weight_table (vdpau_h264_alter_decoder_t * this_gen, uint8_t slice_type, + uint8_t ChromaArrayType, uint8_t l0, uint8_t l1) +{ + sequence_t *seq = (sequence_t *) & this_gen->sequence; + int i; + + read_exp_ue (&seq->br); + if (ChromaArrayType) + read_exp_ue (&seq->br); + for (i = 0; i <= l0; i++) + { + if (read_bits (&seq->br, 1)) + { + read_exp_se (&seq->br); + read_exp_se (&seq->br); + } + if (ChromaArrayType && read_bits (&seq->br, 1)) + { + read_exp_se (&seq->br); + read_exp_se (&seq->br); + read_exp_se (&seq->br); + read_exp_se (&seq->br); + } + } + if (slice_type == SLICE_TYPE_B) + { + for (i = 0; i <= l1; i++) + { + if (read_bits (&seq->br, 1)) + { + read_exp_se (&seq->br); + read_exp_se (&seq->br); + } + if (ChromaArrayType) + { + if (read_bits (&seq->br, 1)) + { + read_exp_se (&seq->br); + read_exp_se (&seq->br); + read_exp_se (&seq->br); + read_exp_se (&seq->br); + } + } + } + } +} + + + +static void +ref_pic_list_reordering (vdpau_h264_alter_decoder_t * this_gen) +{ + sequence_t *seq = (sequence_t *) & this_gen->sequence; + slice_param_t *sl = &seq->slice_param; + + if ((sl->slice_type != SLICE_TYPE_I) && (sl->slice_type != SLICE_TYPE_SI)) + { + if (read_bits (&seq->br, 1)) + { + uint32_t tmp, diff; + do + { + tmp = read_exp_ue (&seq->br); + if (tmp == 0 || tmp == 1) + diff = read_exp_ue (&seq->br); + else if (tmp == 2) + diff = read_exp_ue (&seq->br); + } + while (tmp != 3 && !seq->br.oflow); + } + } + if (sl->slice_type == SLICE_TYPE_B) + { + if (read_bits (&seq->br, 1)) + { + uint32_t tmp2, diff2; + do + { + tmp2 = read_exp_ue (&seq->br); + if (tmp2 == 0 || tmp2 == 1) + diff2 = read_exp_ue (&seq->br); + else if (tmp2 == 2) + diff2 = read_exp_ue (&seq->br); + } + while (tmp2 != 3 && !seq->br.oflow); + } + } +} + + + +static void +dec_ref_pic_marking (vdpau_h264_alter_decoder_t * this_gen, uint8_t idr) +{ + sequence_t *seq = (sequence_t *) & this_gen->sequence; + int32_t pic_num; + + if (idr) + { + uint8_t no_output_of_prior_pics_flag = read_bits (&seq->br, 1); + lprintf ("no_output_of_prior_pics_flag = %u\n", + no_output_of_prior_pics_flag); + uint8_t long_term_reference_flag = read_bits (&seq->br, 1); + lprintf ("long_term_reference_flag = %u\n", long_term_reference_flag); + } + else + { + uint8_t adaptive_ref_pic_marking_mode_flag = read_bits (&seq->br, 1); + lprintf ("adaptive_ref_pic_marking_mode_flag = %u\n", + adaptive_ref_pic_marking_mode_flag); + if (!adaptive_ref_pic_marking_mode_flag) + { + if (seq->cur_pic.field_pic_flag + && (seq->cur_pic.completed == PICTURE_DONE) + && (seq->cur_pic.is_reference[0] || seq->cur_pic.is_reference[1])) + { + seq->cur_pic.is_reference[0] = seq->cur_pic.is_reference[1] = + SHORT_TERM_REF; + lprintf ("short_ref marking\n"); + } + // sliding window is always performed in dpb_append() + } + else + { + uint8_t memory_management_control_operation; + do + { + memory_management_control_operation = read_exp_ue (&seq->br); + lprintf ("memory_management_control_operation = %u\n", + memory_management_control_operation); + if (memory_management_control_operation == 1 + || memory_management_control_operation == 3) + { + uint32_t difference_of_pic_nums_minus1 = read_exp_ue (&seq->br); + lprintf ("difference_of_pic_nums_minus1 = %u\n", + difference_of_pic_nums_minus1); + pic_num = + seq->cur_pic.PicNum[0] - (difference_of_pic_nums_minus1 + 1); + dpb_mmc1 (this_gen, pic_num); + } + if (memory_management_control_operation == 2) + { + uint32_t long_term_pic_num = read_exp_ue (&seq->br); + lprintf ("long_term_pic_num = %u\n", long_term_pic_num); + } + if (memory_management_control_operation == 3 + || memory_management_control_operation == 6) + { + uint32_t long_term_frame_idx = read_exp_ue (&seq->br); + lprintf ("long_term_frame_idx = %u\n", long_term_frame_idx); + } + if (memory_management_control_operation == 4) + { + uint32_t max_long_term_frame_idx_plus1 = read_exp_ue (&seq->br); + lprintf ("max_long_term_frame_idx_plus1 = %u\n", + max_long_term_frame_idx_plus1); + } + } + while (memory_management_control_operation && !seq->br.oflow); + } + } +} + + + +static void +slice_header (vdpau_h264_alter_decoder_t * this_gen, uint8_t nal_ref_idc, + uint8_t nal_unit_type) +{ + sequence_t *seq = (sequence_t *) & this_gen->sequence; + slice_param_t *sl = &seq->slice_param; + pic_param_t *pic; + seq_param_t *sp; + + sl->nal_ref_idc = nal_ref_idc; + sl->nal_unit_type = nal_unit_type; + + read_exp_ue (&seq->br); /* first_mb_in_slice */ + sl->slice_type = read_exp_ue (&seq->br) % 5; + lprintf ("slice_type = %u\n", sl->slice_type); + sl->pic_parameter_set_id = read_exp_ue (&seq->br); + lprintf ("pic_parameter_set_id = %u\n", sl->pic_parameter_set_id); + if (!seq->pic_param[sl->pic_parameter_set_id]) + { + lprintf ("OOPS : referenced PPS (%d) does not exist !!\n", + sl->pic_parameter_set_id); + seq->cur_pic.missing_header = 1; + return; + } + pic = seq->pic_param[sl->pic_parameter_set_id]; + if (!seq->seq_param[pic->seq_parameter_set_id]) + { + lprintf ("OOPS : referenced SPS (%d) does not exist !!\n", + pic->seq_parameter_set_id); + seq->cur_pic.missing_header = 1; + return; + } + + if (!seq->startup_frame && (sl->slice_type == SLICE_TYPE_I) + && !seq->cur_pic.completed) + seq->startup_frame = 1; + + sp = seq->seq_param[pic->seq_parameter_set_id]; + if (sp->separate_colour_plane_flag) + read_bits (&seq->br, 2); /* colour_plane_id */ + sl->frame_num = read_bits (&seq->br, sp->log2_max_frame_num_minus4 + 4); + lprintf ("frame_num = %u\n", sl->frame_num); + sl->MaxFrameNum = 1 << (sp->log2_max_frame_num_minus4 + 4); + + sl->field_pic_flag = sl->bottom_field_flag = + sl->delta_pic_order_cnt_bottom = 0; + sl->delta_pic_order_cnt[0] = sl->delta_pic_order_cnt[1] = 0; + + if (!sp->frame_mbs_only_flag) + { + sl->field_pic_flag = read_bits (&seq->br, 1); + lprintf ("field_pic_flag = %u\n", sl->field_pic_flag); + if (sl->field_pic_flag) + { + sl->bottom_field_flag = read_bits (&seq->br, 1); + lprintf ("bottom_field_flag = %u\n", sl->bottom_field_flag); + } + } + if (nal_unit_type == NAL_SLICE_IDR) + { + sl->idr_pic_id = read_exp_ue (&seq->br); + lprintf ("idr_pic_id = %u\n", sl->idr_pic_id); + } + if (sp->pic_order_cnt_type == 0) + { + sl->pic_order_cnt_lsb = + read_bits (&seq->br, sp->log2_max_pic_order_cnt_lsb_minus4 + 4); + lprintf ("pic_order_cnt_lsb = %u\n", sl->pic_order_cnt_lsb); + if (pic->pic_order_present_flag && !sl->field_pic_flag) + { + sl->delta_pic_order_cnt_bottom = read_exp_se (&seq->br); + lprintf ("delta_pic_order_cnt_bottom = %d\n", + sl->delta_pic_order_cnt_bottom); + } + } + if (sp->pic_order_cnt_type == 1 && !sp->delta_pic_order_always_zero_flag) + { + sl->delta_pic_order_cnt[0] = read_exp_se (&seq->br); + lprintf ("delta_pic_order_cnt[0] = %d\n", sl->delta_pic_order_cnt[0]); + if (pic->pic_order_present_flag && !sl->field_pic_flag) + { + sl->delta_pic_order_cnt[1] = read_exp_se (&seq->br); + lprintf ("delta_pic_order_cnt[1] = %d\n", sl->delta_pic_order_cnt[1]); + } + } + if (pic->redundant_pic_cnt_present_flag) + { + sl->redundant_pic_cnt = read_exp_ue (&seq->br); + lprintf ("redundant_pic_cnt = %u\n", sl->redundant_pic_cnt); + } + if (sl->slice_type == SLICE_TYPE_B) + skip_bits (&seq->br, 1); /* direct_spatial_mv_pred_flag */ + + sl->num_ref_idx_l0_active_minus1 = pic->num_ref_idx_l0_active_minus1; + sl->num_ref_idx_l1_active_minus1 = pic->num_ref_idx_l1_active_minus1; + + if (sl->slice_type == SLICE_TYPE_P || sl->slice_type == SLICE_TYPE_SP + || sl->slice_type == SLICE_TYPE_B) + { + if (read_bits (&seq->br, 1)) + { + lprintf ("num_ref_idx_active_override_flag = 1\n"); + sl->num_ref_idx_l0_active_minus1 = read_exp_ue (&seq->br); + if (sl->slice_type == SLICE_TYPE_B) + sl->num_ref_idx_l1_active_minus1 = read_exp_ue (&seq->br); + lprintf ("num_ref_idx_l0_active_minus1 = %u\n", + sl->num_ref_idx_l0_active_minus1); + lprintf ("num_ref_idx_l1_active_minus1 = %u\n", + sl->num_ref_idx_l1_active_minus1); + } + } +} + + + +static void +slice_header_post (vdpau_h264_alter_decoder_t * this_gen) +{ + sequence_t *seq = (sequence_t *) & this_gen->sequence; + slice_param_t *sl = &seq->slice_param; + + if (!sl->nal_ref_idc) + return; + + pic_param_t *pic = seq->pic_param[sl->pic_parameter_set_id]; + seq_param_t *sp = seq->seq_param[pic->seq_parameter_set_id]; + + if ((pic->weighted_pred_flag + && ((sl->slice_type == SLICE_TYPE_P) + || (sl->slice_type == SLICE_TYPE_SP))) + || ((pic->weighted_bipred_idc == 1) + && (sl->slice_type == SLICE_TYPE_B))) + { + uint8_t chroma = + (sp->separate_colour_plane_flag) ? 0 : sp->chroma_format_idc; + pred_weight_table (this_gen, sl->slice_type, chroma, + sl->num_ref_idx_l0_active_minus1, + sl->num_ref_idx_l1_active_minus1); + } + + dec_ref_pic_marking (this_gen, (sl->nal_unit_type == 5) ? 1 : 0); +} + + + +static void +decode_poc (vdpau_h264_alter_decoder_t * this_gen) +{ + sequence_t *seq = (sequence_t *) & this_gen->sequence; + slice_param_t *sl = &seq->slice_param; + pic_param_t *pic = seq->pic_param[sl->pic_parameter_set_id]; + seq_param_t *sp = seq->seq_param[pic->seq_parameter_set_id]; + int parity = sl->bottom_field_flag ? 1 : 0; + + seq->cur_pic.used = 1; + seq->cur_pic.FrameNum = sl->frame_num; + seq->cur_pic.is_reference[parity] = sl->nal_ref_idc; + seq->cur_pic.field_pic_flag = sl->field_pic_flag; + + if (sl->field_pic_flag) + { + if (!seq->cur_pic.completed) + seq->cur_pic.top_field_first = !parity; + seq->cur_pic.completed |= + (parity ? PICTURE_BOTTOM_DONE : PICTURE_TOP_DONE); + } + else + { + seq->cur_pic.is_reference[!parity] = seq->cur_pic.is_reference[parity]; + seq->cur_pic.completed = PICTURE_DONE; + } + + if (sp->pic_order_cnt_type == 0) + { + dpb_frame_t *prev_pic = dpb_get_prev_ref (seq); + int32_t prevPicOrderCntMsb, prevPicOrderCntLsb; + uint32_t MaxPicOrderCntLsb = + 1 << (sp->log2_max_pic_order_cnt_lsb_minus4 + 4); + + seq->cur_pic.pic_order_cnt_lsb = sl->pic_order_cnt_lsb; + seq->cur_pic.top_field_first = + (sl->delta_pic_order_cnt_bottom < 0) ? 0 : 1; + + if (!prev_pic) + { + seq->cur_pic.PicOrderCntMsb = seq->cur_pic.TopFieldOrderCnt = + seq->cur_pic.BottomFieldOrderCnt = 0; + return; + } + if (sl->nal_unit_type == NAL_SLICE_IDR) + prevPicOrderCntMsb = prevPicOrderCntLsb = 0; + else if (prev_pic->mmc5) + { + if (!sl->bottom_field_flag) + { + prevPicOrderCntMsb = 0; + prevPicOrderCntLsb = prev_pic->TopFieldOrderCnt; + } + else + prevPicOrderCntMsb = prevPicOrderCntLsb = 0; + } + else + { + prevPicOrderCntMsb = prev_pic->PicOrderCntMsb; + prevPicOrderCntLsb = prev_pic->pic_order_cnt_lsb; + } + + if ((sl->pic_order_cnt_lsb < prevPicOrderCntLsb) + && ((prevPicOrderCntLsb - sl->pic_order_cnt_lsb) >= + (MaxPicOrderCntLsb / 2))) + seq->cur_pic.PicOrderCntMsb = prevPicOrderCntMsb + MaxPicOrderCntLsb; + else if ((sl->pic_order_cnt_lsb > prevPicOrderCntLsb) + && ((sl->pic_order_cnt_lsb - prevPicOrderCntLsb) > + (MaxPicOrderCntLsb / 2))) + seq->cur_pic.PicOrderCntMsb = prevPicOrderCntMsb - MaxPicOrderCntLsb; + else + seq->cur_pic.PicOrderCntMsb = prevPicOrderCntMsb; + + if (!sl->field_pic_flag) + { + seq->cur_pic.TopFieldOrderCnt = + seq->cur_pic.PicOrderCntMsb + sl->pic_order_cnt_lsb; + seq->cur_pic.BottomFieldOrderCnt = + seq->cur_pic.TopFieldOrderCnt + sl->delta_pic_order_cnt_bottom; + } + else + { + if (sl->bottom_field_flag) + seq->cur_pic.BottomFieldOrderCnt = + seq->cur_pic.PicOrderCntMsb + sl->pic_order_cnt_lsb; + else + seq->cur_pic.TopFieldOrderCnt = + seq->cur_pic.PicOrderCntMsb + sl->pic_order_cnt_lsb; + } + } + else + { + int16_t FrameNumOffset, prevFrameNumOffset; + uint16_t MaxFrameNum = 1 << (sp->log2_max_frame_num_minus4 + 4); + + if (sl->nal_unit_type == NAL_SLICE_IDR) + { + FrameNumOffset = 0; + } + else + { + if (seq->prevMMC5) + prevFrameNumOffset = 0; + else + prevFrameNumOffset = seq->prevFrameNumOffset; + + if (seq->prevFrameNum > sl->frame_num) + FrameNumOffset = prevFrameNumOffset + MaxFrameNum; + else + FrameNumOffset = prevFrameNumOffset; + } + + if (sp->pic_order_cnt_type == 1) + { + int16_t absFrameNum = 0, picOrderCntCycleCnt = + 0, frameNumInPicOrderCntCycle = 0, expectedDeltaPerPicOrderCntCycle = + 0, expectedPicOrderCnt = 0; + int i; + if (sp->num_ref_frames_in_pic_order_cnt_cycle) + absFrameNum = FrameNumOffset + sl->frame_num; + if (!sl->nal_ref_idc && (absFrameNum > 0)) + --absFrameNum; + + for (i = 0; i < sp->num_ref_frames_in_pic_order_cnt_cycle; i++) + expectedDeltaPerPicOrderCntCycle += sp->offset_for_ref_frame[i]; + + if (absFrameNum > 0) + { + picOrderCntCycleCnt = + (absFrameNum - 1) / sp->num_ref_frames_in_pic_order_cnt_cycle; + frameNumInPicOrderCntCycle = + (absFrameNum - 1) % sp->num_ref_frames_in_pic_order_cnt_cycle; + expectedPicOrderCnt = + picOrderCntCycleCnt * expectedDeltaPerPicOrderCntCycle; + for (i = 0; i < frameNumInPicOrderCntCycle; i++) + expectedPicOrderCnt += sp->offset_for_ref_frame[i]; + } + if (!sl->nal_ref_idc) + expectedPicOrderCnt += sp->offset_for_non_ref_pic; + + if (!sl->field_pic_flag) + { + seq->cur_pic.TopFieldOrderCnt = + expectedPicOrderCnt + sl->delta_pic_order_cnt[0]; + seq->cur_pic.BottomFieldOrderCnt = + seq->cur_pic.TopFieldOrderCnt + sp->offset_for_top_to_bottom_field + + sl->delta_pic_order_cnt[1]; + } + else if (!sl->bottom_field_flag) + seq->cur_pic.TopFieldOrderCnt = + expectedPicOrderCnt + sl->delta_pic_order_cnt[0]; + else + seq->cur_pic.BottomFieldOrderCnt = + expectedPicOrderCnt + sp->offset_for_top_to_bottom_field + + sl->delta_pic_order_cnt[1]; + } + else + { + int32_t tmpPicOrderCnt; + if (sl->nal_unit_type == NAL_SLICE_IDR) + tmpPicOrderCnt = 0; + else if (!sl->nal_ref_idc) + tmpPicOrderCnt = 2 * (FrameNumOffset + sl->frame_num) - 1; + else + tmpPicOrderCnt = 2 * (FrameNumOffset + sl->frame_num); + + if (!sl->field_pic_flag) + seq->cur_pic.TopFieldOrderCnt = seq->cur_pic.BottomFieldOrderCnt = + tmpPicOrderCnt; + else if (sl->bottom_field_flag) + seq->cur_pic.BottomFieldOrderCnt = tmpPicOrderCnt; + else + seq->cur_pic.TopFieldOrderCnt = tmpPicOrderCnt; + } + seq->prevFrameNum = seq->cur_pic.FrameNum; + seq->prevFrameNumOffset = FrameNumOffset; + } + + if (seq->cur_pic.completed < PICTURE_DONE) + { + if (sl->bottom_field_flag) + seq->cur_pic.TopFieldOrderCnt = seq->cur_pic.BottomFieldOrderCnt; + else + seq->cur_pic.BottomFieldOrderCnt = seq->cur_pic.TopFieldOrderCnt; + } +} + + + +static void +decode_picnum (vdpau_h264_alter_decoder_t * this_gen) +{ + sequence_t *seq = (sequence_t *) & this_gen->sequence; + slice_param_t *sl = &seq->slice_param; + dpb_frame_t *frame; + int i = 0; + + int parity = sl->bottom_field_flag ? 1 : 0; + + if (!seq->cur_pic.field_pic_flag) + seq->cur_pic.PicNum[0] = seq->cur_pic.FrameNum; + else + seq->cur_pic.PicNum[parity] = 2 * seq->cur_pic.FrameNum + 1; + + while (i < MAX_DPB_SIZE) + { + frame = seq->dpb[i]; + if (!frame->used) + break; + if (frame->FrameNum > seq->cur_pic.FrameNum) + frame->FrameNumWrap = frame->FrameNum - sl->MaxFrameNum; + else + frame->FrameNumWrap = frame->FrameNum; + + if (!sl->field_pic_flag) + { + frame->PicNum[0] = frame->PicNum[1] = frame->FrameNumWrap; + } + else + { + frame->PicNum[0] = 2 * frame->FrameNumWrap + (parity ? 0 : 1); + frame->PicNum[1] = 2 * frame->FrameNumWrap + (parity ? 1 : 0); + } + ++i; + } +} + + + +static int +check_ref_list (vdpau_h264_alter_decoder_t * this_gen) +{ + int i, j, bad_frame = 0; + dpb_frame_t *frame; + sequence_t *seq = (sequence_t *) & this_gen->sequence; + slice_param_t *sl = &seq->slice_param; + pic_param_t *pic = seq->pic_param[sl->pic_parameter_set_id]; + seq_param_t *sp = seq->seq_param[pic->seq_parameter_set_id]; + int prefs = 0; + int brefs = 0; + int poc, curpoc; + + //int fps = (double)sp->vui.time_scale / (double)sp->vui.num_units_in_tick / ( 2 - sl->field_pic_flag ); + int fps = (1 + sl->field_pic_flag) * 2 * sp->num_ref_frames; + + if (seq->startup_frame >= fps) + return 0; + + curpoc = + (seq->cur_pic.TopFieldOrderCnt > + seq->cur_pic.BottomFieldOrderCnt) ? seq->cur_pic.TopFieldOrderCnt : seq-> + cur_pic.BottomFieldOrderCnt; + + for (i = 15; i > -1; i--) + { + frame = seq->dpb[i]; + if (!frame->used) + continue; + poc = + (frame->TopFieldOrderCnt > + frame->BottomFieldOrderCnt) ? frame->TopFieldOrderCnt : frame-> + BottomFieldOrderCnt; + if (seq->cur_pic.field_pic_flag) + { + if (!frame->videoSurface->bad_frame) + { + for (j = 0; j < 2; j++) + { + if (frame->is_reference[j]) + { + if (poc <= curpoc) + ++prefs; + else + ++brefs; + } + } + } + } + else + { + if (!frame->videoSurface->bad_frame) + { + if (poc <= curpoc) + ++prefs; + else + ++brefs; + } + } + } + + if (sl->slice_type != SLICE_TYPE_I) + { + if (prefs < (sl->num_ref_idx_l0_active_minus1 + 1)) + bad_frame = 1; + if (sl->slice_type == SLICE_TYPE_B) + { + if (brefs < (sl->num_ref_idx_l1_active_minus1 + 1)) + bad_frame = 1; + } + } + + if (bad_frame) + fprintf (stderr, + "******** Missing refframes, dropping. nrf=%d lo=%d prefs=%d l1=%d brefs=%d type=%d (%d fps)\n", + sp->num_ref_frames, sl->num_ref_idx_l0_active_minus1 + 1, prefs, + sl->num_ref_idx_l1_active_minus1 + 1, brefs, sl->slice_type, + fps); + //else + //fprintf(stderr,"******** GOOD ! nrf=%d lo=%d prefs=%d l1=%d brefs=%d type=%d (%d fps)\n", sp->num_ref_frames, sl->num_ref_idx_l0_active_minus1 + 1, prefs, sl->num_ref_idx_l1_active_minus1 + 1, brefs, sl->slice_type, fps ); + + if (seq->cur_pic.is_reference[0] || seq->cur_pic.is_reference[1]) + ++seq->startup_frame; + + return bad_frame; +} + + + +static void +decode_render (vdpau_h264_alter_decoder_t * vd, int bad_frame) +{ + int i, j; + VdpPictureInfoH264 info; + seq_param_t *sp; + pic_param_t *pic; + slice_param_t *sl; + sequence_t *seq = (sequence_t *) & vd->sequence; + vo_frame_t *img; + + if (!seq->cur_pic.field_pic_flag || (seq->cur_pic.completed < PICTURE_DONE)) + { + img = + vd->stream->video_out->get_frame (vd->stream->video_out, + seq->coded_width, seq->coded_height, + seq->ratio, XINE_IMGFMT_VDPAU, + VO_BOTH_FIELDS | seq->chroma | seq-> + reset); + seq->reset = 0; + img->drawn = 0; + } + else + img = seq->cur_pic.videoSurface; + + if (!img) + { /* should not happen */ + fprintf (stderr, + "vdpau_h264_alter : !!!!!!!!!!!!!!!!!!!!!! No vo_frame_t !!!!!!!!!!!!!!!!!!!!!!!\n"); + return; + } + + vdpau_accel_t *accel = (vdpau_accel_t *) img->accel_data; + if (!seq->accel_vdpau) + seq->accel_vdpau = accel; + + if (seq->vdp_runtime_nr != *(seq->accel_vdpau->current_vdp_runtime_nr)) + vd->decoder = VDP_INVALID_HANDLE; + + sl = &vd->sequence.slice_param; + pic = vd->sequence.pic_param[sl->pic_parameter_set_id]; + sp = vd->sequence.seq_param[pic->seq_parameter_set_id]; + + VdpStatus st; + if (vd->decoder == VDP_INVALID_HANDLE || vd->decoder_profile != seq->profile + || vd->decoder_width != seq->coded_width + || vd->decoder_height != seq->coded_height) + { + if (vd->decoder != VDP_INVALID_HANDLE) + { + accel->vdp_decoder_destroy (vd->decoder); + vd->decoder = VDP_INVALID_HANDLE; + } + st = + accel->vdp_decoder_create (accel->vdp_device, seq->profile, + seq->coded_width, seq->coded_height, + sp->num_ref_frames, &vd->decoder); + if (st != VDP_STATUS_OK) + fprintf (stderr, "vdpau_h264_alter : failed to create decoder !! %s\n", + accel->vdp_get_error_string (st)); + else + { + vd->decoder_profile = seq->profile; + vd->decoder_width = seq->coded_width; + vd->decoder_height = seq->coded_height; + seq->vdp_runtime_nr = seq->accel_vdpau->vdp_runtime_nr; + } + } + + info.slice_count = seq->slices_count; + info.field_order_cnt[0] = seq->cur_pic.TopFieldOrderCnt; + info.field_order_cnt[1] = seq->cur_pic.BottomFieldOrderCnt; + info.is_reference = sl->nal_ref_idc ? VDP_TRUE : VDP_FALSE; + info.frame_num = sl->frame_num; + info.field_pic_flag = sl->field_pic_flag; + info.bottom_field_flag = sl->bottom_field_flag; + info.num_ref_frames = sp->num_ref_frames; + info.mb_adaptive_frame_field_flag = sp->mb_adaptive_frame_field_flag + && !sl->field_pic_flag; + info.constrained_intra_pred_flag = pic->constrained_intra_pred_flag; + info.weighted_pred_flag = pic->weighted_pred_flag; + info.weighted_bipred_idc = pic->weighted_bipred_idc; + info.frame_mbs_only_flag = sp->frame_mbs_only_flag; + info.transform_8x8_mode_flag = pic->transform_8x8_mode_flag; + info.chroma_qp_index_offset = pic->chroma_qp_index_offset; + info.second_chroma_qp_index_offset = pic->second_chroma_qp_index_offset; + info.pic_init_qp_minus26 = pic->pic_init_qp_minus26; + info.num_ref_idx_l0_active_minus1 = pic->num_ref_idx_l0_active_minus1; + info.num_ref_idx_l1_active_minus1 = pic->num_ref_idx_l1_active_minus1; + info.log2_max_frame_num_minus4 = sp->log2_max_frame_num_minus4; + info.pic_order_cnt_type = sp->pic_order_cnt_type; + info.log2_max_pic_order_cnt_lsb_minus4 = + sp->log2_max_pic_order_cnt_lsb_minus4; + info.delta_pic_order_always_zero_flag = + sp->delta_pic_order_always_zero_flag; + info.direct_8x8_inference_flag = sp->direct_8x8_inference_flag; + info.entropy_coding_mode_flag = pic->entropy_coding_mode_flag; + info.pic_order_present_flag = pic->pic_order_present_flag; + info.deblocking_filter_control_present_flag = + pic->deblocking_filter_control_present_flag; + info.redundant_pic_cnt_present_flag = pic->redundant_pic_cnt_present_flag; + + if (!pic->pic_scaling_matrix_present_flag) + { + xine_fast_memcpy (info.scaling_lists_4x4, sp->scaling_lists_4x4, + sizeof (info.scaling_lists_4x4)); + xine_fast_memcpy (info.scaling_lists_8x8, sp->scaling_lists_8x8, + sizeof (info.scaling_lists_8x8)); + } + else + { + xine_fast_memcpy (info.scaling_lists_4x4, pic->scaling_lists_4x4, + sizeof (info.scaling_lists_4x4)); + xine_fast_memcpy (info.scaling_lists_8x8, pic->scaling_lists_8x8, + sizeof (info.scaling_lists_8x8)); + } + + j = 0; + for (i = (MAX_DPB_SIZE - 1); i > -1; i--) + { + if (!seq->dpb[i]->used) + continue; + vdpau_accel_t *accel = + (vdpau_accel_t *) seq->dpb[i]->videoSurface->accel_data; + info.referenceFrames[j].surface = accel->surface; + info.referenceFrames[j].is_long_term = 0; + info.referenceFrames[j].frame_idx = seq->dpb[i]->FrameNum; + info.referenceFrames[j].top_is_reference = + seq->dpb[i]->is_reference[0] ? VDP_TRUE : VDP_FALSE; + info.referenceFrames[j].bottom_is_reference = + seq->dpb[i]->is_reference[1] ? VDP_TRUE : VDP_FALSE; + info.referenceFrames[j].field_order_cnt[0] = + seq->dpb[i]->TopFieldOrderCnt; + info.referenceFrames[j].field_order_cnt[1] = + seq->dpb[i]->BottomFieldOrderCnt; + ++j; + } + for (; j < MAX_DPB_SIZE; j++) + { + info.referenceFrames[j].surface = VDP_INVALID_HANDLE; + info.referenceFrames[j].is_long_term = 0; + info.referenceFrames[j].frame_idx = 0; + info.referenceFrames[j].top_is_reference = 0; + info.referenceFrames[j].bottom_is_reference = 0; + info.referenceFrames[j].field_order_cnt[0] = 0; + info.referenceFrames[j].field_order_cnt[1] = 0; + } + + uint8_t sc[3] = { 0, 0, 1 }; + VdpBitstreamBuffer vbits[seq->slices_count * 2]; + for (i = 0; i < seq->slices_count; i++) + { + vbits[i * 2].struct_version = VDP_BITSTREAM_BUFFER_VERSION; + vbits[i * 2].bitstream = sc; + vbits[i * 2].bitstream_bytes = 3; + vbits[(i * 2) + 1].struct_version = VDP_BITSTREAM_BUFFER_VERSION; + vbits[(i * 2) + 1].bitstream = seq->buf + seq->slices[i].buf_offset; + vbits[(i * 2) + 1].bitstream_bytes = seq->slices[i].len; + } + st = + accel->vdp_decoder_render (vd->decoder, accel->surface, + (VdpPictureInfo *) & info, + seq->slices_count * 2, vbits); + if (st != VDP_STATUS_OK) + lprintf ("**********************DECODING failed! - surface = %d - %s\n", + accel->surface, accel->vdp_get_error_string (st)); + else + lprintf ("**********************DECODING success! - surface = %d\n", + accel->surface); + + if ((seq->ratio != seq->reported_ratio) + || (seq->coded_width != seq->reported_coded_width) + || (seq->coded_height != seq->reported_coded_height) + || (seq->video_step != seq->reported_video_step)) + { + seq->reported_ratio = seq->ratio; + seq->reported_coded_width = seq->coded_width; + seq->reported_coded_height = seq->coded_height; + seq->reported_video_step = seq->video_step; + _x_stream_info_set (vd->stream, XINE_STREAM_INFO_VIDEO_WIDTH, + seq->coded_width); + _x_stream_info_set (vd->stream, XINE_STREAM_INFO_VIDEO_HEIGHT, + seq->coded_height); + _x_stream_info_set (vd->stream, XINE_STREAM_INFO_VIDEO_RATIO, + ((double) 10000 * seq->ratio)); + _x_stream_info_set (vd->stream, XINE_STREAM_INFO_FRAME_DURATION, + seq->video_step); + _x_meta_info_set_utf8 (vd->stream, XINE_META_INFO_VIDEOCODEC, + "H264/AVC (vdpau_alter)"); + xine_event_t event; + xine_format_change_data_t data; + event.type = XINE_EVENT_FRAME_FORMAT_CHANGE; + event.stream = vd->stream; + event.data = &data; + event.data_length = sizeof (data); + data.width = seq->coded_width; + data.height = seq->coded_height; + data.aspect = seq->ratio; + xine_event_send (vd->stream, &event); + } + + accel->color_standard = seq->color_standard; + + if (seq->cur_pic.completed == PICTURE_DONE) + { + seq->cur_pic.pts = seq->pic_pts; + seq->pic_pts = 0; + } + if (seq->cur_pic.drop_pts) + seq->cur_pic.pts = 0; + if (sp->frame_mbs_only_flag) + img->progressive_frame = -1; + img->bad_frame = bad_frame; + img->duration = seq->video_step; + seq->cur_pic.videoSurface = img; +} + + + +static void +decode_picture (vdpau_h264_alter_decoder_t * decoder) +{ + if (decoder->sequence.cur_pic.missing_header + || !decoder->sequence.startup_frame) + { + memset (&decoder->sequence.cur_pic, 0, sizeof (dpb_frame_t)); + lprintf ("MISSING_HEADER or !startup_frame\n\n"); + return; + } + + slice_param_t *sl = &decoder->sequence.slice_param; + dpb_frame_t *cur_pic = &decoder->sequence.cur_pic; + + if (cur_pic->completed && cur_pic->field_pic_flag) + { + int wrong_field = 0; + if ((sl->frame_num != cur_pic->FrameNum) + || (sl->bottom_field_flag + && (cur_pic->completed == PICTURE_BOTTOM_DONE)) + || (!sl->bottom_field_flag + && (cur_pic->completed == PICTURE_TOP_DONE)) + || !sl->field_pic_flag) + { + wrong_field = 1; + } + if (wrong_field) + { + fprintf (stderr, "vdpau_h264_alter : Wrong field, skipping.\n"); + memset (cur_pic, 0, sizeof (dpb_frame_t)); + dpb_reset (&decoder->sequence); + cur_pic->missing_header = 1; + decoder->sequence.startup_frame = 0; + return; + } + } + + /* picture decoding */ + decode_poc (decoder); + lprintf ("TopFieldOrderCnt = %d - BottomFieldOrderCnt = %d\n", + cur_pic->TopFieldOrderCnt, cur_pic->BottomFieldOrderCnt); + if (sl->nal_unit_type == 5) + { + dpb_draw_frames (decoder, MAX_POC, DPB_DRAW_CLEAR); + decoder->sequence.startup_frame = START_IDR_FLAG; + } + decode_picnum (decoder); + ref_pic_list_reordering (decoder); + lprintf ("............................. slices_count = %d\n", + decoder->sequence.slices_count); + + decode_render (decoder, check_ref_list (decoder)); + + /* dec_ref_pic_marking */ + slice_header_post (decoder); + + if (!cur_pic->is_reference[0] && !cur_pic->is_reference[1]) + { + if (cur_pic->completed == PICTURE_DONE) + { + dpb_draw_frames (decoder, + (cur_pic->TopFieldOrderCnt > + cur_pic->BottomFieldOrderCnt) ? cur_pic-> + TopFieldOrderCnt : cur_pic->BottomFieldOrderCnt, + DPB_DRAW_CURRENT); + } + } + else + { + if (decoder->sequence. + seq_param[decoder->sequence.pic_param[sl->pic_parameter_set_id]-> + seq_parameter_set_id]->pic_order_cnt_type == 2) + dpb_draw_frames (decoder, + (cur_pic->TopFieldOrderCnt > + cur_pic->BottomFieldOrderCnt) ? cur_pic-> + TopFieldOrderCnt : cur_pic->BottomFieldOrderCnt, + DPB_DRAW_REFS); + + if (!sl->field_pic_flag || cur_pic->completed < PICTURE_DONE) + dbp_append (decoder, 0); + else + dbp_append (decoder, 1); + } + + if (cur_pic->completed == PICTURE_DONE) + memset (cur_pic, 0, sizeof (dpb_frame_t)); + + + lprintf + ("\n___________________________________________________________________________________________\n\n"); +} + + + +static int +parse_startcodes (vdpau_h264_alter_decoder_t * this_gen, uint8_t * buf, + uint32_t len) +{ + sequence_t *sequence = (sequence_t *) & this_gen->sequence; + bits_reader_set (&sequence->br, buf, len); + int ret = 0; + + skip_bits (&sequence->br, 1); /* forbidden_zero_bit */ + uint8_t nal_ref_idc = read_bits (&sequence->br, 2); + uint8_t nal_unit_type = read_bits (&sequence->br, 5); + lprintf ("NAL size = %d, nal_ref_idc = %d, nal_unit_type = %d\n", len, + nal_ref_idc, nal_unit_type); + + switch (nal_unit_type) + { + case NAL_END_SEQUENCE: + break; + case NAL_SEQUENCE: + seq_parameter_set_data (this_gen); + break; + case NAL_PICTURE: + pic_parameter_set (this_gen); + break; + case NAL_SLICE_IDR: + slice_header (this_gen, nal_ref_idc, nal_unit_type); + sequence->slices[sequence->slices_count].buf_offset = buf - sequence->buf; + sequence->slices[sequence->slices_count].len = len; + ++sequence->slices_count; + sequence->slice_mode = NAL_SLICE_IDR; + break; + case NAL_SLICE_NO_IDR: + slice_header (this_gen, nal_ref_idc, nal_unit_type); + sequence->slices[sequence->slices_count].buf_offset = buf - sequence->buf; + sequence->slices[sequence->slices_count].len = len; + ++sequence->slices_count; + sequence->slice_mode = NAL_SLICE_NO_IDR; + break; + } + + return ret; +} + + + +static void +parse_codec_private (vdpau_h264_alter_decoder_t * this_gen, uint8_t * buf, + uint32_t len) +{ + sequence_t *sequence = (sequence_t *) & this_gen->sequence; + bits_reader_set (&sequence->br, buf, len); + uint8_t *buffer = buf; + int i; + + lprintf ("parse_codec_private\n"); + + sequence->mode_frame = 1; + + // reserved + skip_bits (&sequence->br, 8); + skip_bits (&sequence->br, 8); /* profile_idc */ + skip_bits (&sequence->br, 8); + skip_bits (&sequence->br, 8); /* level_idc */ + skip_bits (&sequence->br, 6); + + sequence->frame_header_size = read_bits (&sequence->br, 2) + 1; + //sequence->frame_header_size = 3; + skip_bits (&sequence->br, 3); + uint8_t count = read_bits (&sequence->br, 5); + buffer += 6; + for (i = 0; i < count; i++) + { + bits_reader_set (&sequence->br, buffer, len - (buffer - buf)); + uint16_t sps_size = read_bits (&sequence->br, 16); + skip_bits (&sequence->br, 8); + seq_parameter_set_data (this_gen); + buffer += sps_size + 2; + } + count = buffer[0]; + ++buffer; + for (i = 0; i < count; i++) + { + bits_reader_set (&sequence->br, buffer, len - (buffer - buf)); + uint16_t pps_size = read_bits (&sequence->br, 16); + skip_bits (&sequence->br, 8); + pic_parameter_set (this_gen); + buffer += pps_size + 2; + } +} + + + +static void +flush_buffer (sequence_t * seq) +{ + if ((seq->bufpos - seq->bufseek) >= seq->bufseek) + { + seq->bufsize = (seq->bufpos - seq->bufseek) + MIN_BUFFER_SIZE; + lprintf ("buffer too short, have to allocate a new one.\n"); + uint8_t *tmp = (uint8_t *) malloc (seq->bufsize); + xine_fast_memcpy (tmp, seq->buf + seq->bufseek, + seq->bufpos - seq->bufseek); + free (seq->buf); + seq->buf = tmp; + } + else + xine_fast_memcpy (seq->buf, seq->buf + seq->bufseek, + seq->bufpos - seq->bufseek); + + seq->bufpos -= seq->bufseek; + seq->start = -1; + seq->bufseek = 0; + reset_slices (seq); +} + + + +/* + * This function receives a buffer of data from the demuxer layer and + * figures out how to handle it based on its header flags. + */ +static void +vdpau_h264_alter_decode_data (video_decoder_t * this_gen, buf_element_t * buf) +{ + vdpau_h264_alter_decoder_t *this = (vdpau_h264_alter_decoder_t *) this_gen; + sequence_t *seq = (sequence_t *) & this->sequence; + +#ifdef MAKE_DAT + fwrite (buf->content, 1, buf->size, outfile); +#endif + + if (buf->decoder_flags & BUF_FLAG_FRAMERATE) + { + lprintf ("BUF_FLAG_FRAMERATE\n"); + seq->video_step = buf->decoder_info[0]; + } + + if (buf->decoder_flags & BUF_FLAG_ASPECT) + { + lprintf ("BUF_FLAG_ASPECT\n"); + seq->ratio = + (double) buf->decoder_info[1] / (double) buf->decoder_info[2]; + } + + if (buf->decoder_flags & BUF_FLAG_STDHEADER) + { + lprintf ("BUF_FLAG_STDHEADER\n"); + seq->flag_header = 1; + xine_bmiheader *bih = (xine_bmiheader *) buf->content; + seq->coded_width = bih->biWidth; + seq->coded_height = bih->biHeight; + uint8_t *codec_private = buf->content + sizeof (xine_bmiheader); + uint32_t codec_private_len = bih->biSize - sizeof (xine_bmiheader); + if (codec_private_len > 0) + parse_codec_private (this, codec_private, codec_private_len); + return; + } + + if (buf->decoder_flags & BUF_FLAG_SPECIAL) + { + if (buf->decoder_info[1] == BUF_SPECIAL_DECODER_CONFIG) + { + lprintf ("BUF_SPECIAL_DECODER_CONFIG\n"); + seq->flag_header = 1; + uint8_t *codec_private = buf->decoder_info_ptr[2]; + uint32_t codec_private_len = buf->decoder_info[2]; + if (codec_private_len > 0) + parse_codec_private (this, codec_private, codec_private_len); + } + return; + } + + if (!buf->size) + return; + + int size = seq->bufpos + buf->size; + if (seq->bufsize < size) + { + if (seq->bufsize > MAX_BUFFER_SIZE) + { + fprintf (stderr, + "vdpau_h264_alter : sorry, can't accumulate so much data, broken stream?\n"); + reset_sequence (seq); + return; + } + seq->bufsize = size + MIN_BUFFER_SIZE; + seq->buf = (uint8_t *) realloc (seq->buf, seq->bufsize); + lprintf ("realloc new size = %d\n", seq->bufsize); + } + xine_fast_memcpy (seq->buf + seq->bufpos, buf->content, buf->size); + seq->bufpos += buf->size; + + if (buf->decoder_flags & BUF_FLAG_FRAME_START) + seq->pic_pts = buf->pts; + + int frame_end = buf->decoder_flags & BUF_FLAG_FRAME_END; + + if (seq->mode_frame) + { + if (!seq->pic_pts) + seq->pic_pts = buf->pts; + if (frame_end) + { + if (buf->pts) + seq->pic_pts = buf->pts; + lprintf ("frame_end && seq->mode_frame\n"); + int fhs; + uint8_t tb; + uint32_t j = 0; + while (j < seq->bufpos) + { + uint32_t s = 0; + for (fhs = 0; fhs < seq->frame_header_size; fhs++) + s |= seq->buf[j + fhs] << (8 * (seq->frame_header_size - 1 - fhs)); + tb = *(seq->buf + j + seq->frame_header_size) & 0x1F; + if (seq->slice_mode && (tb != seq->slice_mode)) + { + decode_picture (this); + reset_slices (seq); + } + parse_startcodes (this, seq->buf + j + seq->frame_header_size, s); + j += seq->frame_header_size + s; + } + if (seq->slice_mode) + { + decode_picture (this); + reset_slices (seq); + } + seq->bufpos = 0; + } + return; + } + + while (seq->bufseek <= seq->bufpos - 4) + { + uint8_t *buffer = seq->buf + seq->bufseek; + if (buffer[0] == 0 && buffer[1] == 0 && buffer[2] == 1) + { + if (seq->start < 0) + { + seq->start = seq->bufseek; + uint8_t tb = buffer[3] & 0x1F; + if (((tb == NAL_SLICE_NO_IDR) || (tb == NAL_SLICE_IDR)) + && !seq->pic_pts) + seq->pic_pts = buf->pts; + if (seq->slice_mode && (tb != seq->slice_mode)) + { + decode_picture (this); + flush_buffer (seq); + } + if ((tb & 0x1F) == NAL_END_SEQUENCE) + { + dpb_print (seq); + dpb_draw_frames (this, MAX_POC, DPB_DRAW_CLEAR); + lprintf ("NAL_END_SEQUENCE\n"); + dpb_print (seq); + } + } + else + { + parse_startcodes (this, seq->buf + seq->start + 3, + seq->bufseek - seq->start - 3); + seq->start = -1; + --seq->bufseek; + } + } + ++seq->bufseek; + } + + if (frame_end && seq->flag_header && (seq->start > -1) + && (seq->bufseek > seq->start)) + { + lprintf ("frame_end && seq->start\n"); + seq->bufseek = seq->bufpos; + parse_startcodes (this, seq->buf + seq->start + 3, + seq->bufseek - seq->start - 3); + if (seq->slice_mode) + decode_picture (this); + flush_buffer (seq); + } +} + + +/* + * This function is called when xine needs to flush the system. + */ +static void +vdpau_h264_alter_flush (video_decoder_t * this_gen) +{ + vdpau_h264_alter_decoder_t *this = (vdpau_h264_alter_decoder_t *) this_gen; + + printf ("vdpau_h264_alter_flush\n"); + dpb_draw_frames (this, MAX_POC, DPB_DRAW_REFS); +} + + +/* + * This function resets the video decoder. + */ +static void +vdpau_h264_alter_reset (video_decoder_t * this_gen) +{ + vdpau_h264_alter_decoder_t *this = (vdpau_h264_alter_decoder_t *) this_gen; + + lprintf ("vdpau_h264_alter_reset\n"); + reset_sequence (&this->sequence); +} + + +/* + * The decoder should forget any stored pts values here. + */ +static void +vdpau_h264_alter_discontinuity (video_decoder_t * this_gen) +{ + vdpau_h264_alter_decoder_t *this = (vdpau_h264_alter_decoder_t *) this_gen; + printf ("vdpau_h264_alter_discontinuity\n"); + + dpb_clear_all_pts (&this->sequence); + this->sequence.reset = VO_NEW_SEQUENCE_FLAG; +} + + +/* + * This function frees the video decoder instance allocated to the decoder. + */ +static void +vdpau_h264_alter_dispose (video_decoder_t * this_gen) +{ + + vdpau_h264_alter_decoder_t *this = (vdpau_h264_alter_decoder_t *) this_gen; + + lprintf ("vdpau_h264_alter_dispose\n"); + + if ((this->decoder != VDP_INVALID_HANDLE) && this->sequence.accel_vdpau) + { + this->sequence.accel_vdpau->vdp_decoder_destroy (this->decoder); + this->decoder = VDP_INVALID_HANDLE; + } + reset_sequence (&this->sequence); + + int i; + for (i = 0; i < MAX_DPB_SIZE; i++) + free (this->sequence.dpb[i]); + for (i = 0; i < 32; i++) + if (this->sequence.seq_param[i]) + free (this->sequence.seq_param[i]); + for (i = 0; i < 255; i++) + if (this->sequence.pic_param[i]) + free (this->sequence.pic_param[i]); + + this->stream->video_out->close (this->stream->video_out, this->stream); + + free (this->sequence.buf); + free (this_gen); +} + + +/* + * This function allocates, initializes, and returns a private video + * decoder structure. + */ +static video_decoder_t * +open_plugin (video_decoder_class_t * class_gen, xine_stream_t * stream) +{ + + vdpau_h264_alter_decoder_t *this; + + /* the videoout must be vdpau-capable to support this decoder */ + if (! + (stream->video_driver-> + get_capabilities (stream->video_driver) & VO_CAP_VDPAU_H264)) + return NULL; + + /* now check if vdpau has free decoder resource */ + vo_frame_t *img = + stream->video_out->get_frame (stream->video_out, 1920, 1080, 1, + XINE_IMGFMT_VDPAU, VO_BOTH_FIELDS); + vdpau_accel_t *accel = (vdpau_accel_t *) img->accel_data; + int runtime_nr = accel->vdp_runtime_nr; + img->free (img); + VdpDecoder decoder; + VdpStatus st = + accel->vdp_decoder_create (accel->vdp_device, + VDP_DECODER_PROFILE_H264_MAIN, 1920, 1080, 16, + &decoder); + if (st != VDP_STATUS_OK) + { + fprintf (stderr, "can't create vdpau decoder!\n"); + return NULL; + } + + accel->vdp_decoder_destroy (decoder); + + this = + (vdpau_h264_alter_decoder_t *) calloc (1, + sizeof + (vdpau_h264_alter_decoder_t)); + + this->video_decoder.decode_data = vdpau_h264_alter_decode_data; + this->video_decoder.flush = vdpau_h264_alter_flush; + this->video_decoder.reset = vdpau_h264_alter_reset; + this->video_decoder.discontinuity = vdpau_h264_alter_discontinuity; + this->video_decoder.dispose = vdpau_h264_alter_dispose; + + this->stream = stream; + this->class = (vdpau_h264_alter_class_t *) class_gen; + + int i; + for (i = 0; i < 16; i++) + this->sequence.dpb[i] = (dpb_frame_t *) calloc (1, sizeof (dpb_frame_t)); + this->sequence.bufsize = MIN_BUFFER_SIZE; + this->sequence.buf = (uint8_t *) malloc (this->sequence.bufsize); + this->sequence.vdp_runtime_nr = runtime_nr; + this->sequence.reset = VO_NEW_SEQUENCE_FLAG; + this->sequence.reset = VO_NEW_SEQUENCE_FLAG; + this->sequence.ratio = 0.0; + this->sequence.video_step = 3600; + this->sequence.coded_width = 1280; + this->sequence.coded_height = 720; + this->sequence.reported_ratio = 0.0; + this->sequence.reported_video_step = 0; + this->sequence.reported_coded_width = 0; + this->sequence.reported_coded_height = 0; + this->sequence.frame_header_size = 4; + this->sequence.flag_header = 0; + this->sequence.mode_frame = 0; + reset_sequence (&this->sequence); + + this->decoder = VDP_INVALID_HANDLE; + this->sequence.accel_vdpau = NULL; + + (stream->video_out->open) (stream->video_out, stream); + +#ifdef MAKE_DAT + outfile = fopen ("/tmp/h264.dat", "w"); + nframes = 0; +#endif + + return &this->video_decoder; +} + + +/* + * This function allocates a private video decoder class and initializes + * the class's member functions. + */ +static void * +init_plugin (xine_t * xine, void *data) +{ + + vdpau_h264_alter_class_t *this; + + this = + (vdpau_h264_alter_class_t *) calloc (1, + sizeof (vdpau_h264_alter_class_t)); + + this->decoder_class.open_plugin = open_plugin; + this->decoder_class.identifier = "vdpau_h264_alter"; + this->decoder_class.description = + N_ + ("vdpau_h264_alter: H264 decoder plugin using VDPAU hardware decoding.\n" + "Must be used along with video_out_vdpau."); + this->decoder_class.dispose = default_video_decoder_class_dispose; + + return this; +} + + +/* + * This is a list of all of the internal xine video buffer types that + * this decoder is able to handle. Check src/xine-engine/buffer.h for a + * list of valid buffer types (and add a new one if the one you need does + * not exist). Terminate the list with a 0. + */ +static const uint32_t video_types[] = { + BUF_VIDEO_H264, + 0 +}; + + +/* + * This data structure combines the list of supported xine buffer types and + * the priority that the plugin should be given with respect to other + * plugins that handle the same buffer type. A plugin with priority (n+1) + * will be used instead of a plugin with priority (n). + */ +static const decoder_info_t dec_info_video = { + video_types, /* supported types */ + 9 /* priority */ +}; + + +/* + * The plugin catalog entry. This is the only information that this plugin + * will export to the public. + */ +const plugin_info_t xine_plugin_info[] EXPORTED = { + /* { type, API, "name", version, special_info, init_function } */ + {PLUGIN_VIDEO_DECODER, 19, "vdpau_h264_alter", XINE_VERSION_CODE, + &dec_info_video, init_plugin}, + {PLUGIN_NONE, 0, "", 0, NULL, NULL} +}; diff --git a/src/video_dec/libvdpau/alterh264_decode.h b/src/video_dec/libvdpau/alterh264_decode.h new file mode 100644 index 000000000..88f5e638f --- /dev/null +++ b/src/video_dec/libvdpau/alterh264_decode.h @@ -0,0 +1,339 @@ +/* kate: tab-indent on; indent-width 4; mixedindent off; indent-mode cstyle; remove-trailing-space on; */ +#ifndef ALTERH264_DECODE_H +#define ALTERH264_DECODE_H + +//#define LOG +#define LOG_MODULE "vdpau_h264" + +#ifdef HAVE_CONFIG_H +#include "config.h" +#endif + +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <sys/types.h> +#include <unistd.h> + +#include <xine/xine_internal.h> +#include <xine/video_out.h> +#include <xine/buffer.h> +#include <xine/xineutils.h> +#include "accel_vdpau.h" +#include <vdpau/vdpau.h> + +#include "alterh264_bits_reader.h" + + + +enum aspect_ratio +{ + ASPECT_UNSPECIFIED = 0, + ASPECT_1_1, + ASPECT_12_11, + ASPECT_10_11, + ASPECT_16_11, + ASPECT_40_33, + ASPECT_24_11, + ASPECT_20_11, + ASPECT_32_11, + ASPECT_80_33, + ASPECT_18_11, + ASPECT_15_11, + ASPECT_64_33, + ASPECT_160_99, + ASPECT_4_3, + ASPECT_3_2, + ASPECT_2_1, + ASPECT_RESERVED, + ASPECT_EXTENDED_SAR = 255 +}; + + + +static const uint8_t zigzag_4x4[16] = { + 0, 1, 4, 8, + 5, 2, 3, 6, + 9, 12, 13, 10, + 7, 11, 14, 15 +}; + +static const uint8_t zigzag_8x8[64] = { + 0, 1, 8, 16, 9, 2, 3, 10, + 17, 24, 32, 25, 18, 11, 4, 5, + 12, 19, 26, 33, 40, 48, 41, 34, + 27, 20, 13, 6, 7, 14, 21, 28, + 35, 42, 49, 56, 57, 50, 43, 36, + 29, 22, 15, 23, 30, 37, 44, 51, + 58, 59, 52, 45, 38, 31, 39, 46, + 53, 60, 61, 54, 47, 55, 62, 63 +}; + +static const uint8_t default_4x4_intra[16] = { + 6, 13, 13, 20, + 20, 20, 28, 28, + 28, 28, 32, 32, + 32, 37, 37, 42 +}; + +static const uint8_t default_4x4_inter[16] = { + 10, 14, 14, 20, + 20, 20, 24, 24, + 24, 24, 27, 27, + 27, 30, 30, 34 +}; + +static const uint8_t default_8x8_intra[64] = { + 6, 10, 10, 13, 11, 13, 16, 16, + 16, 16, 18, 18, 18, 18, 18, 23, + 23, 23, 23, 23, 23, 25, 25, 25, + 25, 25, 25, 25, 27, 27, 27, 27, + 27, 27, 27, 27, 29, 29, 29, 29, + 29, 29, 29, 31, 31, 31, 31, 31, + 31, 33, 33, 33, 33, 33, 36, 36, + 36, 36, 38, 38, 38, 40, 40, 42 +}; + +static const uint8_t default_8x8_inter[64] = { + 9, 13, 13, 15, 13, 15, 17, 17, + 17, 17, 19, 19, 19, 19, 19, 21, + 21, 21, 21, 21, 21, 22, 22, 22, + 22, 22, 22, 22, 24, 24, 24, 24, + 24, 24, 24, 24, 25, 25, 25, 25, + 25, 25, 25, 27, 27, 27, 27, 27, + 27, 28, 28, 28, 28, 28, 30, 30, + 30, 30, 32, 32, 32, 33, 33, 35 +}; + + + +typedef struct +{ + uint8_t aspect_ratio_info; + uint8_t aspect_ratio_idc; + uint16_t sar_width; + uint16_t sar_height; + uint8_t colour_desc; + uint8_t colour_primaries; + uint8_t timing_info; + uint32_t num_units_in_tick; + uint32_t time_scale; +} vui_param_t; + + + +typedef struct +{ + uint8_t profile_idc; + uint8_t level_idc; + uint8_t seq_parameter_set_id; + uint8_t constraint_set0_flag; + uint8_t constraint_set1_flag; + uint8_t constraint_set2_flag; + uint8_t constraint_set3_flag; + uint8_t chroma_format_idc; + uint8_t separate_colour_plane_flag; + uint8_t bit_depth_luma_minus8; + uint8_t bit_depth_chroma_minus8; + uint8_t qpprime_y_zero_transform_bypass_flag; + uint8_t seq_scaling_matrix_present_flag; + uint8_t scaling_lists_4x4[6][16]; + uint8_t scaling_lists_8x8[2][64]; + uint8_t log2_max_frame_num_minus4; + uint8_t pic_order_cnt_type; + uint8_t log2_max_pic_order_cnt_lsb_minus4; + uint8_t delta_pic_order_always_zero_flag; + int32_t offset_for_non_ref_pic; + int32_t offset_for_top_to_bottom_field; + uint8_t num_ref_frames_in_pic_order_cnt_cycle; + int32_t offset_for_ref_frame[256]; + uint8_t num_ref_frames; + uint8_t gaps_in_frame_num_value_allowed_flag; + uint8_t pic_width_in_mbs_minus1; + uint8_t pic_height_in_map_units_minus1; + uint8_t frame_mbs_only_flag; + uint8_t mb_adaptive_frame_field_flag; + uint8_t direct_8x8_inference_flag; + uint8_t frame_cropping_flag; + uint16_t frame_crop_left_offset; + uint16_t frame_crop_right_offset; + uint16_t frame_crop_top_offset; + uint16_t frame_crop_bottom_offset; + uint8_t vui_parameters_present_flag; + vui_param_t vui; +} seq_param_t; + + + +typedef struct +{ + uint8_t pic_parameter_set_id; + uint8_t seq_parameter_set_id; + uint8_t entropy_coding_mode_flag; + uint8_t pic_order_present_flag; + /*uint8_t num_slice_groups_minus1; + uint8_t slice_group_map_type; + uint16_t run_length_minus1[64]; + uint16_t top_left[64]; + uint16_t bottom_right[64]; + uint8_t slice_group_change_direction_flag; + uint16_t slice_group_change_rate_minus1; + uint16_t pic_size_in_map_units_minus1; + uint8_t slice_group_id[64]; */ + uint8_t num_ref_idx_l0_active_minus1; + uint8_t num_ref_idx_l1_active_minus1; + uint8_t weighted_pred_flag; + uint8_t weighted_bipred_idc; + int8_t pic_init_qp_minus26; + int8_t pic_init_qs_minus26; + int8_t chroma_qp_index_offset; + uint8_t deblocking_filter_control_present_flag; + uint8_t constrained_intra_pred_flag; + uint8_t redundant_pic_cnt_present_flag; + uint8_t transform_8x8_mode_flag; + uint8_t pic_scaling_matrix_present_flag; + uint8_t pic_scaling_list_present_flag[8]; + uint8_t scaling_lists_4x4[6][16]; + uint8_t scaling_lists_8x8[2][64]; + int8_t second_chroma_qp_index_offset; +} pic_param_t; + + + +typedef struct +{ + uint8_t nal_ref_idc; + uint8_t nal_unit_type; + uint8_t slice_type; + uint8_t pic_parameter_set_id; + uint16_t frame_num; + uint32_t MaxFrameNum; + uint8_t field_pic_flag; + uint8_t bottom_field_flag; + uint16_t idr_pic_id; + uint16_t pic_order_cnt_lsb; + int32_t delta_pic_order_cnt_bottom; + int32_t delta_pic_order_cnt[2]; + uint8_t redundant_pic_cnt; + uint8_t num_ref_idx_l0_active_minus1; + uint8_t num_ref_idx_l1_active_minus1; +} slice_param_t; + + +#define PICTURE_TOP_DONE 1 +#define PICTURE_BOTTOM_DONE 2 +#define PICTURE_DONE 3 + +#define SHORT_TERM_REF 1 +#define LONG_TERM_REF 2 + +typedef struct +{ + uint8_t used; + uint8_t missing_header; + int64_t pts; + uint8_t drop_pts; + uint8_t completed; + uint8_t top_field_first; + uint16_t FrameNum; + int32_t FrameNumWrap; + int32_t PicNum[2]; /* 0:top, 1:bottom */ + uint8_t is_reference[2]; /* 0:top, 1:bottom, short or long term */ + uint8_t field_pic_flag; + int32_t PicOrderCntMsb; + int32_t TopFieldOrderCnt; + int32_t BottomFieldOrderCnt; + uint16_t pic_order_cnt_lsb; + uint8_t mmc5; + + vo_frame_t *videoSurface; +} dpb_frame_t; + + + +typedef struct +{ + uint32_t buf_offset; + uint32_t len; +} slice_t; + + + +typedef struct +{ + uint32_t coded_width; + uint32_t reported_coded_width; + uint32_t coded_height; + uint32_t reported_coded_height; + uint64_t video_step; /* frame duration in pts units */ + uint64_t reported_video_step; /* frame duration in pts units */ + double ratio; + double reported_ratio; + + slice_t slices[68]; + int slices_count; + int slice_mode; + + seq_param_t *seq_param[32]; + pic_param_t *pic_param[256]; + slice_param_t slice_param; + + dpb_frame_t *dpb[16]; + dpb_frame_t cur_pic; + uint16_t prevFrameNum; + uint16_t prevFrameNumOffset; + uint8_t prevMMC5; + + VdpColorStandard color_standard; + int chroma; + int top_field_first; + VdpDecoderProfile profile; + + uint8_t *buf; /* accumulate data */ + int bufseek; + uint32_t bufsize; + uint32_t bufpos; + int start; + + int64_t pic_pts; + + bits_reader_t br; + + int vdp_runtime_nr; + vdpau_accel_t *accel_vdpau; + + int reset; + int startup_frame; + + uint8_t mode_frame; + uint8_t flag_header; + uint32_t frame_header_size; + +} sequence_t; + + + +typedef struct +{ + video_decoder_class_t decoder_class; +} vdpau_h264_alter_class_t; + + + +typedef struct vdpau_mpeg12_decoder_s +{ + video_decoder_t video_decoder; /* parent video decoder structure */ + + vdpau_h264_alter_class_t *class; + xine_stream_t *stream; + + sequence_t sequence; + + VdpDecoder decoder; + VdpDecoderProfile decoder_profile; + uint32_t decoder_width; + uint32_t decoder_height; + +} vdpau_h264_alter_decoder_t; + +#endif /* ALTERH264_DECODE_H */ diff --git a/src/video_dec/libvdpau/bits_reader.h b/src/video_dec/libvdpau/bits_reader.h new file mode 100644 index 000000000..db7cdfc7e --- /dev/null +++ b/src/video_dec/libvdpau/bits_reader.h @@ -0,0 +1,82 @@ +#include <sys/types.h> + + + +typedef struct { + uint8_t *buffer, *start; + int offbits, length, oflow; +} bits_reader_t; + + + +static void bits_reader_set( bits_reader_t *br, uint8_t *buf, int len ) +{ + br->buffer = br->start = buf; + br->offbits = 0; + br->length = len; + br->oflow = 0; +} + + + +static uint32_t read_bits( bits_reader_t *br, int nbits ) +{ + int i, nbytes; + uint32_t ret = 0; + uint8_t *buf; + + buf = br->buffer; + nbytes = (br->offbits + nbits)/8; + if ( ((br->offbits + nbits) %8 ) > 0 ) + nbytes++; + if ( (buf + nbytes) > (br->start + br->length) ) { + br->oflow = 1; + return 0; + } + for ( i=0; i<nbytes; i++ ) + ret += buf[i]<<((nbytes-i-1)*8); + i = (4-nbytes)*8+br->offbits; + ret = ((ret<<i)>>i)>>((nbytes*8)-nbits-br->offbits); + + br->offbits += nbits; + br->buffer += br->offbits / 8; + br->offbits %= 8; + + return ret; +} + + + +static void skip_bits( bits_reader_t *br, int nbits ) +{ + br->offbits += nbits; + br->buffer += br->offbits / 8; + br->offbits %= 8; + if ( br->buffer > (br->start + br->length) ) { + br->oflow = 1; + } +} + + + +static uint32_t get_bits( bits_reader_t *br, int nbits ) +{ + int i, nbytes; + uint32_t ret = 0; + uint8_t *buf; + + buf = br->buffer; + nbytes = (br->offbits + nbits)/8; + if ( ((br->offbits + nbits) %8 ) > 0 ) + nbytes++; + if ( (buf + nbytes) > (br->start + br->length) ) { + br->oflow = 1; + return 0; + } + for ( i=0; i<nbytes; i++ ) + ret += buf[i]<<((nbytes-i-1)*8); + i = (4-nbytes)*8+br->offbits; + ret = ((ret<<i)>>i)>>((nbytes*8)-nbits-br->offbits); + + return ret; +} diff --git a/src/video_dec/libvdpau/cpb.c b/src/video_dec/libvdpau/cpb.c new file mode 100644 index 000000000..d06238e5c --- /dev/null +++ b/src/video_dec/libvdpau/cpb.c @@ -0,0 +1,49 @@ +/* + * Copyright (C) 2009 Julian Scheel + * + * This file is part of xine, a free video player. + * + * xine is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * xine is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110, USA + * + * cpb.c: Coded Picture Buffer + */ + +#ifdef HAVE_CONFIG_H +#include "config.h" +#endif + +#include "cpb.h" + +#include <stdlib.h> + +struct coded_picture* create_coded_picture() +{ + struct coded_picture* pic = calloc(1, sizeof(struct coded_picture)); + return pic; +} + +void free_coded_picture(struct coded_picture *pic) +{ + if(!pic) + return; + + release_nal_unit(pic->sei_nal); + release_nal_unit(pic->sps_nal); + release_nal_unit(pic->pps_nal); + release_nal_unit(pic->slc_nal); + + free(pic); +} + diff --git a/src/video_dec/libvdpau/cpb.h b/src/video_dec/libvdpau/cpb.h new file mode 100644 index 000000000..37dbd94cf --- /dev/null +++ b/src/video_dec/libvdpau/cpb.h @@ -0,0 +1,67 @@ +/* + * Copyright (C) 2009 Julian Scheel + * + * This file is part of xine, a free video player. + * + * xine is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * xine is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110, USA + * + * cpb.h: Coded Picture Buffer + */ + +#ifndef CPB_H_ +#define CPB_H_ + +#include "nal.h" + +enum picture_flags { + IDR_PIC = 0x01, + REFERENCE = 0x02, + NOT_EXISTING = 0x04, + INTERLACED = 0x08 +}; + +struct coded_picture +{ + uint32_t flag_mask; + + uint32_t max_pic_num; + int32_t pic_num; + + uint8_t used_for_long_term_ref; + uint32_t long_term_pic_num; + uint32_t long_term_frame_idx; + + int32_t top_field_order_cnt; + int32_t bottom_field_order_cnt; + + uint8_t repeat_pic; + + /* buffer data for the image slices, which + * are passed to the decoder + */ + uint32_t slice_cnt; + + int64_t pts; + + struct nal_unit *sei_nal; + struct nal_unit *sps_nal; + struct nal_unit *pps_nal; + struct nal_unit *slc_nal; +}; + +struct coded_picture* create_coded_picture(void); +void free_coded_picture(struct coded_picture *pic); + +#endif /* CPB_H_ */ diff --git a/src/video_dec/libvdpau/dpb.c b/src/video_dec/libvdpau/dpb.c new file mode 100644 index 000000000..c2afd42ba --- /dev/null +++ b/src/video_dec/libvdpau/dpb.c @@ -0,0 +1,622 @@ +/* + * Copyright (C) 2008 Julian Scheel + * + * This file is part of xine, a free video player. + * + * xine is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * xine is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110, USA + * + * dpb.c: Implementing Decoded Picture Buffer + */ + +#ifdef HAVE_CONFIG_H +#include "config.h" +#endif + +#include <stdio.h> +#include <stdlib.h> +#include <string.h> + +#include "cpb.h" +#include "dpb.h" +#include "nal.h" + +#include "h264_parser.h" + +#include "accel_vdpau.h" + +#include <xine/video_out.h> + +//#define DEBUG_DPB + +int dp_top_field_first(struct decoded_picture *decoded_pic) +{ + int top_field_first = 1; + + + if (decoded_pic->coded_pic[1] != NULL) { + if (!decoded_pic->coded_pic[0]->slc_nal->slc.bottom_field_flag && + decoded_pic->coded_pic[1]->slc_nal->slc.bottom_field_flag && + decoded_pic->coded_pic[0]->top_field_order_cnt != + decoded_pic->coded_pic[1]->bottom_field_order_cnt) { + top_field_first = decoded_pic->coded_pic[0]->top_field_order_cnt < decoded_pic->coded_pic[1]->bottom_field_order_cnt; + } else if (decoded_pic->coded_pic[0]->slc_nal->slc.bottom_field_flag && + !decoded_pic->coded_pic[1]->slc_nal->slc.bottom_field_flag && + decoded_pic->coded_pic[0]->bottom_field_order_cnt != + decoded_pic->coded_pic[1]->top_field_order_cnt) { + top_field_first = decoded_pic->coded_pic[0]->bottom_field_order_cnt > decoded_pic->coded_pic[1]->top_field_order_cnt; + } + } + + if (decoded_pic->coded_pic[0]->flag_mask & PIC_STRUCT_PRESENT && decoded_pic->coded_pic[0]->sei_nal != NULL) { + uint8_t pic_struct = decoded_pic->coded_pic[0]->sei_nal->sei.pic_timing.pic_struct; + if(pic_struct == DISP_TOP_BOTTOM || + pic_struct == DISP_TOP_BOTTOM_TOP) { + top_field_first = 1; + } else if (pic_struct == DISP_BOTTOM_TOP || + pic_struct == DISP_BOTTOM_TOP_BOTTOM) { + top_field_first = 0; + } else if (pic_struct == DISP_FRAME) { + top_field_first = 1; + } + } + + return top_field_first; +} + +/** + * ---------------------------------------------------------------------------- + * decoded picture + * ---------------------------------------------------------------------------- + */ + +void free_decoded_picture(struct decoded_picture *pic); + +struct decoded_picture* init_decoded_picture(struct coded_picture *cpic, vo_frame_t *img) +{ + struct decoded_picture *pic = calloc(1, sizeof(struct decoded_picture)); + + pic->coded_pic[0] = cpic; + + decoded_pic_check_reference(pic); + pic->img = img; + pic->lock_counter = 1; + + return pic; +} + +void decoded_pic_check_reference(struct decoded_picture *pic) +{ + int i; + for(i = 0; i < 2; i++) { + struct coded_picture *cpic = pic->coded_pic[i]; + if(cpic && (cpic->flag_mask & REFERENCE)) { + // FIXME: this assumes Top Field First! + if(i == 0) { + pic->top_is_reference = cpic->slc_nal->slc.field_pic_flag + ? (cpic->slc_nal->slc.bottom_field_flag ? 0 : 1) : 1; + } + + pic->bottom_is_reference = cpic->slc_nal->slc.field_pic_flag + ? (cpic->slc_nal->slc.bottom_field_flag ? 1 : 0) : 1; + } + } +} + +void decoded_pic_add_field(struct decoded_picture *pic, + struct coded_picture *cpic) +{ + pic->coded_pic[1] = cpic; + + decoded_pic_check_reference(pic); +} + +void release_decoded_picture(struct decoded_picture *pic) +{ + if(!pic) + return; + + pic->lock_counter--; + //printf("release decoded picture: %p (%d)\n", pic, pic->lock_counter); + + if(pic->lock_counter <= 0) { + free_decoded_picture(pic); + } +} + +void lock_decoded_picture(struct decoded_picture *pic) +{ + if(!pic) + return; + + pic->lock_counter++; + //printf("lock decoded picture: %p (%d)\n", pic, pic->lock_counter); +} + +void free_decoded_picture(struct decoded_picture *pic) +{ + if(!pic) + return; + + if(pic->img != NULL) { + pic->img->free(pic->img); + } + + free_coded_picture(pic->coded_pic[1]); + free_coded_picture(pic->coded_pic[0]); + pic->coded_pic[0] = NULL; + pic->coded_pic[1] = NULL; + free(pic); +} + + + + +/** + * ---------------------------------------------------------------------------- + * dpb code starting here + * ---------------------------------------------------------------------------- + */ + +struct dpb* create_dpb(void) +{ + struct dpb *dpb = calloc(1, sizeof(struct dpb)); + + dpb->output_list = xine_list_new(); + dpb->reference_list = xine_list_new(); + + dpb->max_reorder_frames = MAX_DPB_COUNT; + dpb->max_dpb_frames = MAX_DPB_COUNT; + + return dpb; +} + +int dpb_total_frames(struct dpb *dpb) +{ + int num_frames = xine_list_size(dpb->output_list); + + xine_list_iterator_t ite = xine_list_front(dpb->reference_list); + while(ite) { + struct decoded_picture *pic = xine_list_get_value(dpb->reference_list, ite); + if (xine_list_find(dpb->output_list, pic) == NULL) { + num_frames++; + } + + ite = xine_list_next(dpb->reference_list, ite); + } + + return num_frames; +} + +void release_dpb(struct dpb *dpb) +{ + if(!dpb) + return; + + dpb_free_all(dpb); + + xine_list_delete(dpb->output_list); + xine_list_delete(dpb->reference_list); + + free(dpb); +} + +struct decoded_picture* dpb_get_next_out_picture(struct dpb *dpb, int do_flush) +{ + struct decoded_picture *pic = NULL;; + struct decoded_picture *outpic = NULL; + + if(!do_flush && + xine_list_size(dpb->output_list) < dpb->max_reorder_frames && + dpb_total_frames(dpb) < dpb->max_dpb_frames) { + return NULL; + } + + xine_list_iterator_t ite = xine_list_back(dpb->output_list); + while (ite) { + pic = xine_list_get_value(dpb->output_list, ite); + + int32_t out_top_field_order_cnt = outpic != NULL ? + outpic->coded_pic[0]->top_field_order_cnt : 0; + int32_t top_field_order_cnt = pic->coded_pic[0]->top_field_order_cnt; + + int32_t out_bottom_field_order_cnt = outpic != NULL ? + (outpic->coded_pic[1] != NULL ? + outpic->coded_pic[1]->bottom_field_order_cnt : + outpic->coded_pic[0]->top_field_order_cnt) : 0; + int32_t bottom_field_order_cnt = pic->coded_pic[1] != NULL ? + pic->coded_pic[1]->bottom_field_order_cnt : + pic->coded_pic[0]->top_field_order_cnt; + + if (outpic == NULL || + (top_field_order_cnt <= out_top_field_order_cnt && + bottom_field_order_cnt <= out_bottom_field_order_cnt) || + (out_top_field_order_cnt <= 0 && top_field_order_cnt > 0 && + out_bottom_field_order_cnt <= 0 && bottom_field_order_cnt > 0) || + outpic->coded_pic[0]->flag_mask & IDR_PIC) { + outpic = pic; + } + + ite = xine_list_prev(dpb->output_list, ite); + } + + return outpic; +} + +struct decoded_picture* dpb_get_picture(struct dpb *dpb, uint32_t picnum) +{ + struct decoded_picture *pic = NULL; + + xine_list_iterator_t ite = xine_list_front(dpb->reference_list); + while (ite) { + pic = xine_list_get_value(dpb->reference_list, ite); + + if ((pic->coded_pic[0]->pic_num == picnum || + (pic->coded_pic[1] != NULL && + pic->coded_pic[1]->pic_num == picnum))) { + return pic; + } + + ite = xine_list_next(dpb->reference_list, ite); + } + + return NULL; +} + +struct decoded_picture* dpb_get_picture_by_ltpn(struct dpb *dpb, + uint32_t longterm_picnum) +{ + struct decoded_picture *pic = NULL; + + xine_list_iterator_t ite = xine_list_front(dpb->reference_list); + while (ite) { + pic = xine_list_get_value(dpb->reference_list, ite); + + if (pic->coded_pic[0]->long_term_pic_num == longterm_picnum || + (pic->coded_pic[1] != NULL && + pic->coded_pic[1]->long_term_pic_num == longterm_picnum)) { + return pic; + } + + ite = xine_list_next(dpb->reference_list, ite); + } + + return NULL; +} + +struct decoded_picture* dpb_get_picture_by_ltidx(struct dpb *dpb, + uint32_t longterm_idx) +{ + struct decoded_picture *pic = NULL; + + xine_list_iterator_t ite = xine_list_front(dpb->reference_list); + while (ite) { + pic = xine_list_get_value(dpb->reference_list, ite); + + if (pic->coded_pic[0]->long_term_frame_idx == longterm_idx || + (pic->coded_pic[1] != NULL && + pic->coded_pic[1]->long_term_frame_idx == longterm_idx)) { + return pic; + } + + ite = xine_list_next(dpb->reference_list, ite); + } + + return NULL; +} + +int dpb_set_unused_ref_picture_byltpn(struct dpb *dpb, uint32_t longterm_picnum) +{ + struct decoded_picture *pic = NULL; + + xine_list_iterator_t ite = xine_list_front(dpb->reference_list); + while (ite) { + pic = xine_list_get_value(dpb->reference_list, ite); + + uint8_t found = 0; + + if (pic->coded_pic[0]->long_term_pic_num == longterm_picnum) { + pic->coded_pic[0]->used_for_long_term_ref = 0; + found = 1; + } + + if ((pic->coded_pic[1] != NULL && + pic->coded_pic[1]->long_term_pic_num == longterm_picnum)) { + pic->coded_pic[1]->used_for_long_term_ref = 0; + found = 1; + } + + if(found && !pic->coded_pic[0]->used_for_long_term_ref && + (pic->coded_pic[1] == NULL || + !pic->coded_pic[1]->used_for_long_term_ref)) { + dpb_unmark_reference_picture(dpb, pic); + } + + if (found) + return 0; + + ite = xine_list_next(dpb->reference_list, ite); + } + + return -1; +} + +int dpb_set_unused_ref_picture_bylidx(struct dpb *dpb, uint32_t longterm_idx) +{ + struct decoded_picture *pic = NULL; + + xine_list_iterator_t ite = xine_list_front(dpb->reference_list); + while (ite) { + pic = xine_list_get_value(dpb->reference_list, ite); + + uint8_t found = 0; + + if (pic->coded_pic[0]->long_term_frame_idx == longterm_idx) { + pic->coded_pic[0]->used_for_long_term_ref = 0; + found = 1; + } + + if ((pic->coded_pic[1] != NULL && + pic->coded_pic[1]->long_term_frame_idx == longterm_idx)) { + pic->coded_pic[1]->used_for_long_term_ref = 0; + found = 1; + } + + if(found && !pic->coded_pic[0]->used_for_long_term_ref && + (pic->coded_pic[1] == NULL || + !pic->coded_pic[1]->used_for_long_term_ref)) { + dpb_unmark_reference_picture(dpb, pic); + } + + if (found) + return 0; + + ite = xine_list_next(dpb->reference_list, ite); + } + + return -1; +} + +int dpb_set_unused_ref_picture_lidx_gt(struct dpb *dpb, int32_t longterm_idx) +{ + struct decoded_picture *pic = NULL; + + xine_list_iterator_t ite = xine_list_front(dpb->reference_list); + while (ite) { + pic = xine_list_get_value(dpb->reference_list, ite); + + uint8_t found = 0; + + if (pic->coded_pic[0]->long_term_frame_idx >= longterm_idx) { + pic->coded_pic[0]->used_for_long_term_ref = 0; + found = 1; + } + + if ((pic->coded_pic[1] != NULL && + pic->coded_pic[1]->long_term_frame_idx >= longterm_idx)) { + pic->coded_pic[1]->used_for_long_term_ref = 0; + found = 1; + } + + if(found && !pic->coded_pic[0]->used_for_long_term_ref && + (pic->coded_pic[1] == NULL || + !pic->coded_pic[1]->used_for_long_term_ref)) { + dpb_unmark_reference_picture(dpb, pic); + } + + ite = xine_list_next(dpb->reference_list, ite); + } + + return -1; +} + + +int dpb_unmark_picture_delayed(struct dpb *dpb, struct decoded_picture *pic) +{ + if(!pic) + return -1; + + xine_list_iterator_t ite = xine_list_find(dpb->output_list, pic); + if (ite) { + xine_list_remove(dpb->output_list, ite); + release_decoded_picture(pic); + + return 0; + } + + return -1; +} + +int dpb_unmark_reference_picture(struct dpb *dpb, struct decoded_picture *pic) +{ + if(!pic) + return -1; + + xine_list_iterator_t ite = xine_list_find(dpb->reference_list, pic); + if (ite) { + xine_list_remove(dpb->reference_list, ite); + release_decoded_picture(pic); + + return 0; + } + + return -1; +} + +/*static int dpb_remove_picture_by_img(struct dpb *dpb, vo_frame_t *remimg) +{ + int retval = -1; + struct decoded_picture *pic = NULL; + + xine_list_iterator_t ite = xine_list_front(dpb->output_list); + while (ite) { + pic = xine_list_get_value(dpb->output_list, ite); + + if (pic->img == remimg) { + dpb_unmark_picture_delayed(dpb, pic); + dpb->used--; + retval = 0; + } + + ite = xine_list_next(dpb->output_list, ite); + } + + return retval; +}*/ + + +int dpb_add_picture(struct dpb *dpb, struct decoded_picture *pic, uint32_t num_ref_frames) +{ +#if 0 + /* this should never happen */ + pic->img->lock(pic->img); + if (0 == dpb_remove_picture_by_img(dpb, pic->img)) + lprintf("H264/DPB broken stream: current img was already in dpb -- freed it\n"); + else + pic->img->free(pic->img); +#endif + + /* add the pic to the output picture list, as no + * pic would be immediately drawn. + * acquire a lock for this list + */ + lock_decoded_picture(pic); + xine_list_push_back(dpb->output_list, pic); + + + /* check if the pic is a reference pic, + * if it is it should be added to the reference + * list. another lock has to be acquired in that case + */ + if (pic->coded_pic[0]->flag_mask & REFERENCE || + (pic->coded_pic[1] != NULL && + pic->coded_pic[1]->flag_mask & REFERENCE)) { + lock_decoded_picture(pic); + xine_list_push_back(dpb->reference_list, pic); + + /* + * always apply the sliding window reference removal, if more reference + * frames than expected are in the list. we will always remove the oldest + * reference frame + */ + if(xine_list_size(dpb->reference_list) > num_ref_frames) { + struct decoded_picture *discard = xine_list_get_value(dpb->reference_list, xine_list_front(dpb->reference_list)); + dpb_unmark_reference_picture(dpb, discard); + } + } + +#if DEBUG_DPB + printf("DPB list sizes: Total: %2d, Output: %2d, Reference: %2d\n", + dpb_total_frames(dpb), xine_list_size(dpb->output_list), + xine_list_size(dpb->reference_list)); +#endif + + return 0; +} + +int dpb_flush(struct dpb *dpb) +{ + struct decoded_picture *pic = NULL; + + xine_list_iterator_t ite = xine_list_front(dpb->reference_list); + while (ite) { + pic = xine_list_get_value(dpb->reference_list, ite); + + dpb_unmark_reference_picture(dpb, pic); + + /* CAUTION: xine_list_next would return an item, but not the one we + * expect, as the current one was deleted + */ + ite = xine_list_front(dpb->reference_list); + } + + return 0; +} + +void dpb_free_all(struct dpb *dpb) +{ + xine_list_iterator_t ite = xine_list_front(dpb->output_list); + while(ite) { + dpb_unmark_picture_delayed(dpb, xine_list_get_value(dpb->output_list, ite)); + /* CAUTION: xine_list_next would return an item, but not the one we + * expect, as the current one was deleted + */ + ite = xine_list_front(dpb->output_list); + } + + ite = xine_list_front(dpb->reference_list); + while(ite) { + dpb_unmark_reference_picture(dpb, xine_list_get_value(dpb->reference_list, ite)); + /* CAUTION: xine_list_next would return an item, but not the one we + * expect, as the current one was deleted + */ + ite = xine_list_front(dpb->reference_list); + } +} + +void dpb_clear_all_pts(struct dpb *dpb) +{ + xine_list_iterator_t ite = xine_list_front(dpb->output_list); + while(ite) { + struct decoded_picture *pic = xine_list_get_value(dpb->output_list, ite); + pic->img->pts = 0; + + ite = xine_list_next(dpb->output_list, ite); + } +} + +int fill_vdpau_reference_list(struct dpb *dpb, VdpReferenceFrameH264 *reflist) +{ + struct decoded_picture *pic = NULL; + + int i = 0; + int used_refframes = 0; + + xine_list_iterator_t ite = xine_list_back(dpb->reference_list); + while (ite) { + pic = xine_list_get_value(dpb->reference_list, ite); + reflist[i].surface = ((vdpau_accel_t*)pic->img->accel_data)->surface; + reflist[i].is_long_term = pic->coded_pic[0]->used_for_long_term_ref || + (pic->coded_pic[1] != NULL && pic->coded_pic[1]->used_for_long_term_ref); + + reflist[i].frame_idx = pic->coded_pic[0]->used_for_long_term_ref ? + pic->coded_pic[0]->long_term_pic_num : + pic->coded_pic[0]->slc_nal->slc.frame_num; + reflist[i].top_is_reference = pic->top_is_reference; + reflist[i].bottom_is_reference = pic->bottom_is_reference; + reflist[i].field_order_cnt[0] = pic->coded_pic[0]->top_field_order_cnt; + reflist[i].field_order_cnt[1] = pic->coded_pic[1] != NULL ? + pic->coded_pic[1]->bottom_field_order_cnt : + pic->coded_pic[0]->bottom_field_order_cnt; + i++; + + ite = xine_list_prev(dpb->reference_list, ite); + } + + used_refframes = i; + + // fill all other frames with invalid handles + while(i < 16) { + reflist[i].bottom_is_reference = VDP_FALSE; + reflist[i].top_is_reference = VDP_FALSE; + reflist[i].frame_idx = 0; + reflist[i].is_long_term = VDP_FALSE; + reflist[i].surface = VDP_INVALID_HANDLE; + reflist[i].field_order_cnt[0] = 0; + reflist[i].field_order_cnt[1] = 0; + i++; + } + + return used_refframes; +} diff --git a/src/video_dec/libvdpau/dpb.h b/src/video_dec/libvdpau/dpb.h new file mode 100644 index 000000000..356bcbf70 --- /dev/null +++ b/src/video_dec/libvdpau/dpb.h @@ -0,0 +1,117 @@ +/* + * Copyright (C) 2008 Julian Scheel + * + * This file is part of xine, a free video player. + * + * xine is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * xine is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110, USA + * + * dpb.h: Decoded Picture Buffer + */ + +#ifndef DPB_H_ +#define DPB_H_ + +#define MAX_DPB_COUNT 16 + +#include "nal.h" +#include "cpb.h" +#include <xine/video_out.h> +#include <xine/list.h> + +#define USED_FOR_REF (top_is_reference || bottom_is_reference) + +/** + * ---------------------------------------------------------------------------- + * decoded picture + * ---------------------------------------------------------------------------- + */ + +struct decoded_picture { + vo_frame_t *img; /* this is the image we block, to make sure + * the surface is not double-used */ + + /** + * a decoded picture always contains a whole frame, + * respective a field pair, so it can contain up to + * 2 coded pics + */ + struct coded_picture *coded_pic[2]; + + int32_t frame_num_wrap; + + uint8_t top_is_reference; + uint8_t bottom_is_reference; + + uint32_t lock_counter; +}; + +struct decoded_picture* init_decoded_picture(struct coded_picture *cpic, + vo_frame_t *img); +void release_decoded_picture(struct decoded_picture *pic); +void lock_decoded_picture(struct decoded_picture *pic); +void decoded_pic_check_reference(struct decoded_picture *pic); +void decoded_pic_add_field(struct decoded_picture *pic, + struct coded_picture *cpic); + + +/** + * ---------------------------------------------------------------------------- + * dpb code starting here + * ---------------------------------------------------------------------------- + */ + +/* Decoded Picture Buffer */ +struct dpb { + xine_list_t *reference_list; + xine_list_t *output_list; + + int max_reorder_frames; + int max_dpb_frames; +}; + +struct dpb* create_dpb(void); +void release_dpb(struct dpb *dpb); + +/** + * calculates the total number of frames in the dpb + * when frames are used for reference and are not drawn + * yet the result would be less then reference_list-size+ + * output_list-size + */ +int dpb_total_frames(struct dpb *dpb); + +struct decoded_picture* dpb_get_next_out_picture(struct dpb *dpb, int do_flush); + +struct decoded_picture* dpb_get_picture(struct dpb *dpb, uint32_t picnum); +struct decoded_picture* dpb_get_picture_by_ltpn(struct dpb *dpb, uint32_t longterm_picnum); +struct decoded_picture* dpb_get_picture_by_ltidx(struct dpb *dpb, uint32_t longterm_idx); + +int dpb_set_unused_ref_picture_byltpn(struct dpb *dpb, uint32_t longterm_picnum); +int dpb_set_unused_ref_picture_bylidx(struct dpb *dpb, uint32_t longterm_idx); +int dpb_set_unused_ref_picture_lidx_gt(struct dpb *dpb, int32_t longterm_idx); + +int dpb_unmark_picture_delayed(struct dpb *dpb, struct decoded_picture *pic); +int dpb_unmark_reference_picture(struct dpb *dpb, struct decoded_picture *pic); + +int dpb_add_picture(struct dpb *dpb, struct decoded_picture *pic, uint32_t num_ref_frames); +int dpb_flush(struct dpb *dpb); +void dpb_free_all(struct dpb *dpb); +void dpb_clear_all_pts(struct dpb *dpb); + +int fill_vdpau_reference_list(struct dpb *dpb, VdpReferenceFrameH264 *reflist); + +int dp_top_field_first(struct decoded_picture *decoded_pic); + +#endif /* DPB_H_ */ diff --git a/src/video_dec/libvdpau/h264_parser.c b/src/video_dec/libvdpau/h264_parser.c new file mode 100644 index 000000000..d495bf483 --- /dev/null +++ b/src/video_dec/libvdpau/h264_parser.c @@ -0,0 +1,2038 @@ +/* + * Copyright (C) 2008 Julian Scheel + * + * This file is part of xine, a free video player. + * + * xine is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * xine is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110, USA + * + * h264_parser.c: Almost full-features H264 NAL-Parser + */ + +#ifdef HAVE_CONFIG_H +#include "config.h" +#endif + +#include <stdio.h> +#include <stdlib.h> +#include <math.h> + +#include "h264_parser.h" +#include "nal.h" +#include "cpb.h" + +/* default scaling_lists according to Table 7-2 */ +uint8_t default_4x4_intra[16] = { 6, 13, 13, 20, 20, 20, 28, 28, 28, 28, 32, + 32, 32, 37, 37, 42 }; + +uint8_t default_4x4_inter[16] = { 10, 14, 14, 20, 20, 20, 24, 24, 24, 24, 27, + 27, 27, 30, 30, 34 }; + +uint8_t default_8x8_intra[64] = { 6, 10, 10, 13, 11, 13, 16, 16, 16, 16, 18, + 18, 18, 18, 18, 23, 23, 23, 23, 23, 23, 25, 25, 25, 25, 25, 25, 25, 27, 27, + 27, 27, 27, 27, 27, 27, 29, 29, 29, 29, 29, 29, 29, 31, 31, 31, 31, 31, 31, + 33, 33, 33, 33, 33, 36, 36, 36, 36, 38, 38, 38, 40, 40, 42 }; + +uint8_t default_8x8_inter[64] = { 9, 13, 13, 15, 13, 15, 17, 17, 17, 17, 19, + 19, 19, 19, 19, 21, 21, 21, 21, 21, 21, 22, 22, 22, 22, 22, 22, 22, 24, 24, + 24, 24, 24, 24, 24, 24, 25, 25, 25, 25, 25, 25, 25, 27, 27, 27, 27, 27, 27, + 28, 28, 28, 28, 28, 30, 30, 30, 30, 32, 32, 32, 33, 33, 35 }; + +struct buf_reader +{ + uint8_t *buf; + uint8_t *cur_pos; + int len; + int cur_offset; +}; + +struct h264_parser* init_parser(); + +static inline uint32_t read_bits(struct buf_reader *buf, int len); +uint32_t read_exp_golomb(struct buf_reader *buf); +int32_t read_exp_golomb_s(struct buf_reader *buf); + +void calculate_pic_order(struct h264_parser *parser, struct coded_picture *pic, + struct slice_header *slc); +void skip_scaling_list(struct buf_reader *buf, int size); +void parse_scaling_list(struct buf_reader *buf, uint8_t *scaling_list, + int length, int index); + +struct nal_unit* parse_nal_header(struct buf_reader *buf, + struct coded_picture *pic, struct h264_parser *parser); +static void sps_scaling_list_fallback(struct seq_parameter_set_rbsp *sps, + int i); +static void pps_scaling_list_fallback(struct seq_parameter_set_rbsp *sps, + struct pic_parameter_set_rbsp *pps, int i); + +uint8_t parse_sps(struct buf_reader *buf, struct seq_parameter_set_rbsp *sps); +void interpret_sps(struct coded_picture *pic, struct h264_parser *parser); + +void parse_vui_parameters(struct buf_reader *buf, + struct seq_parameter_set_rbsp *sps); +void parse_hrd_parameters(struct buf_reader *buf, struct hrd_parameters *hrd); + +uint8_t parse_pps(struct buf_reader *buf, struct pic_parameter_set_rbsp *pps); +void interpret_pps(struct coded_picture *pic); + +void parse_sei(struct buf_reader *buf, struct sei_message *sei, + struct h264_parser *parser); +void interpret_sei(struct coded_picture *pic); + +uint8_t parse_slice_header(struct buf_reader *buf, struct nal_unit *slc_nal, + struct h264_parser *parser); +void interpret_slice_header(struct h264_parser *parser, struct nal_unit *slc_nal); + +void parse_ref_pic_list_reordering(struct buf_reader *buf, + struct slice_header *slc); + +void calculate_pic_nums(struct h264_parser *parser, struct coded_picture *cpic); +void execute_ref_pic_marking(struct coded_picture *cpic, + uint32_t memory_management_control_operation, + uint32_t marking_nr, + struct h264_parser *parser); +void parse_pred_weight_table(struct buf_reader *buf, struct slice_header *slc, + struct h264_parser *parser); +void parse_dec_ref_pic_marking(struct buf_reader *buf, + struct nal_unit *slc_nal); + +/* here goes the parser implementation */ + +static void decode_nal(uint8_t **ret, int *len_ret, uint8_t *buf, int buf_len) +{ + // TODO: rework without copying + uint8_t *end = &buf[buf_len]; + uint8_t *pos = malloc(buf_len); + + *ret = pos; + while (buf < end) { + if (buf < end - 3 && buf[0] == 0x00 && buf[1] == 0x00 && buf[2] == 0x03) { + + *pos++ = 0x00; + *pos++ = 0x00; + + buf += 3; + continue; + } + *pos++ = *buf++; + } + + *len_ret = pos - *ret; +} + +#if 0 +static inline void dump_bits(const char *label, const struct buf_reader *buf, int bits) +{ + struct buf_reader lbuf; + memcpy(&lbuf, buf, sizeof(struct buf_reader)); + + int i; + printf("%s: 0b", label); + for(i=0; i < bits; i++) + printf("%d", read_bits(&lbuf, 1)); + printf("\n"); +} +#endif + +/** + * @return total number of bits read by the buf_reader + */ +static inline uint32_t bits_read(struct buf_reader *buf) +{ + int bits_read = 0; + bits_read = (buf->cur_pos - buf->buf)*8; + bits_read += (8-buf->cur_offset); + + return bits_read; +} + +/* skips stuffing bytes in the buf_reader */ +static inline void skip_emulation_prevention_three_byte(struct buf_reader *buf) +{ + if(buf->cur_pos - buf->buf > 2 && + *(buf->cur_pos-2) == 0x00 && + *(buf->cur_pos-1) == 0x00 && + *buf->cur_pos == 0x03) { + buf->cur_pos++; + } +} + +/* + * read len bits from the buffer and return them + * @return right aligned bits + */ +static inline uint32_t read_bits(struct buf_reader *buf, int len) +{ + static uint32_t i_mask[33] = { 0x00, 0x01, 0x03, 0x07, 0x0f, 0x1f, 0x3f, + 0x7f, 0xff, 0x1ff, 0x3ff, 0x7ff, 0xfff, 0x1fff, 0x3fff, 0x7fff, 0xffff, + 0x1ffff, 0x3ffff, 0x7ffff, 0xfffff, 0x1fffff, 0x3fffff, 0x7fffff, + 0xffffff, 0x1ffffff, 0x3ffffff, 0x7ffffff, 0xfffffff, 0x1fffffff, + 0x3fffffff, 0x7fffffff, 0xffffffff }; + + int i_shr; + uint32_t bits = 0; + + while (len > 0 && (buf->cur_pos - buf->buf) < buf->len) { + if ((i_shr = buf->cur_offset - len) >= 0) { + bits |= (*buf->cur_pos >> i_shr) & i_mask[len]; + buf->cur_offset -= len; + if (buf->cur_offset == 0) { + buf->cur_pos++; + buf->cur_offset = 8; + + skip_emulation_prevention_three_byte(buf); + } + return bits; + } + else { + bits |= (*buf->cur_pos & i_mask[buf->cur_offset]) << -i_shr; + len -= buf->cur_offset; + buf->cur_pos++; + buf->cur_offset = 8; + + skip_emulation_prevention_three_byte(buf); + } + } + return bits; +} + +/* determines if following bits are rtsb_trailing_bits */ +static inline int rbsp_trailing_bits(uint8_t *buf, int buf_len) +{ + uint8_t *cur_buf = buf+(buf_len-1); + uint8_t cur_val; + int parsed_bits = 0; + int i; + + while(buf_len > 0) { + cur_val = *cur_buf; + for(i = 0; i < 9; i++) { + if (cur_val&1) + return parsed_bits+i; + cur_val>>=1; + } + parsed_bits += 8; + cur_buf--; + } + + lprintf("rbsp trailing bits could not be found\n"); + return 0; +} + +uint32_t read_exp_golomb(struct buf_reader *buf) +{ + int leading_zero_bits = 0; + + while (read_bits(buf, 1) == 0 && leading_zero_bits < 32) + leading_zero_bits++; + + uint32_t code = (1 << leading_zero_bits) - 1 + read_bits(buf, + leading_zero_bits); + return code; +} + +int32_t read_exp_golomb_s(struct buf_reader *buf) +{ + uint32_t ue = read_exp_golomb(buf); + int32_t code = ue & 0x01 ? (ue + 1) / 2 : -(ue / 2); + return code; +} + + +/** + * parses the NAL header data and calls the subsequent + * parser methods that handle specific NAL units + */ +struct nal_unit* parse_nal_header(struct buf_reader *buf, + struct coded_picture *pic, struct h264_parser *parser) +{ + if (buf->len < 1) + return NULL; + + + struct nal_unit *nal = create_nal_unit(); + + nal->nal_ref_idc = (buf->buf[0] >> 5) & 0x03; + nal->nal_unit_type = buf->buf[0] & 0x1f; + + buf->cur_pos = buf->buf + 1; + //lprintf("NAL: %d\n", nal->nal_unit_type); + + struct buf_reader ibuf; + ibuf.cur_offset = 8; + + switch (nal->nal_unit_type) { + case NAL_SPS: + parse_sps(buf, &nal->sps); + break; + case NAL_PPS: + parse_pps(buf, &nal->pps); + break; + case NAL_SLICE: + case NAL_PART_A: + case NAL_PART_B: + case NAL_PART_C: + case NAL_SLICE_IDR: + parse_slice_header(buf, nal, parser); + break; + case NAL_SEI: + memset(&(nal->sei), 0x00, sizeof(struct sei_message)); + parse_sei(buf, &nal->sei, parser); + break; + default: + break; + } + + return nal; +} + +/** + * calculates the picture order count according to ITU-T Rec. H.264 (11/2007) + * chapter 8.2.1, p104f + */ +void calculate_pic_order(struct h264_parser *parser, struct coded_picture *pic, + struct slice_header *slc) +{ + /* retrieve sps and pps from the buffers */ + struct nal_unit *pps_nal = + nal_buffer_get_by_pps_id(parser->pps_buffer, slc->pic_parameter_set_id); + + if (pps_nal == NULL) { + xprintf(parser->xine, XINE_VERBOSITY_DEBUG, + "ERR: calculate_pic_order: pic_parameter_set_id %d not found in buffers\n", + slc->pic_parameter_set_id); + return; + } + + struct pic_parameter_set_rbsp *pps = &pps_nal->pps; + + struct nal_unit *sps_nal = + nal_buffer_get_by_sps_id(parser->sps_buffer, pps->seq_parameter_set_id); + + if (sps_nal == NULL) { + xprintf(parser->xine, XINE_VERBOSITY_DEBUG, + "ERR: calculate_pic_order: seq_parameter_set_id %d not found in buffers\n", + pps->seq_parameter_set_id); + return; + } + + struct seq_parameter_set_rbsp *sps = &sps_nal->sps; + + if (sps->pic_order_cnt_type == 0) { + + if (pic->flag_mask & IDR_PIC) { + parser->prev_pic_order_cnt_lsb = 0; + parser->prev_pic_order_cnt_msb = 0; + + + // FIXME + parser->frame_num_offset = 0; + } + + const int max_poc_lsb = 1 << (sps->log2_max_pic_order_cnt_lsb_minus4 + 4); + + uint32_t pic_order_cnt_msb = 0; + + if (slc->pic_order_cnt_lsb < parser->prev_pic_order_cnt_lsb + && parser->prev_pic_order_cnt_lsb - slc->pic_order_cnt_lsb + >= max_poc_lsb / 2) + pic_order_cnt_msb = parser->prev_pic_order_cnt_msb + max_poc_lsb; + else if (slc->pic_order_cnt_lsb > parser->prev_pic_order_cnt_lsb + && parser->prev_pic_order_cnt_lsb - slc->pic_order_cnt_lsb + < -max_poc_lsb / 2) + pic_order_cnt_msb = parser->prev_pic_order_cnt_msb - max_poc_lsb; + else + pic_order_cnt_msb = parser->prev_pic_order_cnt_msb; + + if(!slc->field_pic_flag || !slc->bottom_field_flag) { + pic->top_field_order_cnt = pic_order_cnt_msb + slc->pic_order_cnt_lsb; + parser->prev_top_field_order_cnt = pic->top_field_order_cnt; + } + + if (pic->flag_mask & REFERENCE) { + parser->prev_pic_order_cnt_msb = pic_order_cnt_msb; + } + + pic->bottom_field_order_cnt = 0; + + if(!slc->field_pic_flag) + pic->bottom_field_order_cnt = pic->top_field_order_cnt + slc->delta_pic_order_cnt_bottom; + else //if(slc->bottom_field_flag) //TODO: this is not spec compliant, but works... + pic->bottom_field_order_cnt = pic_order_cnt_msb + slc->pic_order_cnt_lsb; + + if(slc->field_pic_flag && slc->bottom_field_flag) + pic->top_field_order_cnt = parser->prev_top_field_order_cnt; + + } else if (sps->pic_order_cnt_type == 2) { + uint32_t prev_frame_num = parser->last_vcl_nal ? parser->last_vcl_nal->slc.frame_num : 0; + uint32_t prev_frame_num_offset = parser->frame_num_offset; + uint32_t temp_pic_order_cnt = 0; + + if (parser->pic->flag_mask & IDR_PIC) + parser->frame_num_offset = 0; + else if (prev_frame_num > slc->frame_num) + parser->frame_num_offset = prev_frame_num_offset + sps->max_frame_num; + else + parser->frame_num_offset = prev_frame_num_offset; + + if(parser->pic->flag_mask & IDR_PIC) + temp_pic_order_cnt = 0; + else if(!(parser->pic->flag_mask & REFERENCE)) + temp_pic_order_cnt = 2 * (parser->frame_num_offset + slc->frame_num)-1; + else + temp_pic_order_cnt = 2 * (parser->frame_num_offset + slc->frame_num); + + if(!slc->field_pic_flag) + pic->top_field_order_cnt = pic->bottom_field_order_cnt = temp_pic_order_cnt; + else if(slc->bottom_field_flag) + pic->bottom_field_order_cnt = temp_pic_order_cnt; + else + pic->top_field_order_cnt = temp_pic_order_cnt; + + } else { + xprintf(parser->xine, XINE_VERBOSITY_DEBUG, + "FIXME: Unsupported poc_type: %d\n", sps->pic_order_cnt_type); + } +} + +void skip_scaling_list(struct buf_reader *buf, int size) +{ + int i; + for (i = 0; i < size; i++) { + read_exp_golomb_s(buf); + } +} + +void parse_scaling_list(struct buf_reader *buf, uint8_t *scaling_list, + int length, int index) +{ + int last_scale = 8; + int next_scale = 8; + int32_t delta_scale; + uint8_t use_default_scaling_matrix_flag = 0; + int i; + + const uint8_t *zigzag = (length==64) ? zigzag_8x8 : zigzag_4x4; + + for (i = 0; i < length; i++) { + if (next_scale != 0) { + delta_scale = read_exp_golomb_s(buf); + next_scale = (last_scale + delta_scale + 256) % 256; + if (i == 0 && next_scale == 0) { + use_default_scaling_matrix_flag = 1; + break; + } + } + scaling_list[zigzag[i]] = last_scale = (next_scale == 0) ? last_scale : next_scale; + } + + if (use_default_scaling_matrix_flag) { + switch (index) { + case 0: + case 1: + case 2: { + for(i = 0; i < sizeof(default_4x4_intra); i++) { + scaling_list[zigzag_4x4[i]] = default_4x4_intra[i]; + } + //memcpy(scaling_list, default_4x4_intra, sizeof(default_4x4_intra)); + break; + } + case 3: + case 4: + case 5: { + for(i = 0; i < sizeof(default_4x4_inter); i++) { + scaling_list[zigzag_4x4[i]] = default_4x4_inter[i]; + } + //memcpy(scaling_list, default_4x4_inter, sizeof(default_4x4_inter)); + break; + } + case 6: { + for(i = 0; i < sizeof(default_8x8_intra); i++) { + scaling_list[zigzag_8x8[i]] = default_8x8_intra[i]; + } + //memcpy(scaling_list, default_8x8_intra, sizeof(default_8x8_intra)); + break; + } + case 7: { + for(i = 0; i < sizeof(default_8x8_inter); i++) { + scaling_list[zigzag_8x8[i]] = default_8x8_inter[i]; + } + //memcpy(scaling_list, default_8x8_inter, sizeof(default_8x8_inter)); + break; + } + } + } +} + +static void sps_scaling_list_fallback(struct seq_parameter_set_rbsp *sps, int i) +{ + int j; + switch (i) { + case 0: { + for(j = 0; j < sizeof(default_4x4_intra); j++) { + sps->scaling_lists_4x4[i][zigzag_4x4[j]] = default_4x4_intra[j]; + } + //memcpy(sps->scaling_lists_4x4[i], default_4x4_intra, sizeof(sps->scaling_lists_4x4[i])); + break; + } + case 3: { + for(j = 0; j < sizeof(default_4x4_inter); j++) { + sps->scaling_lists_4x4[i][zigzag_4x4[j]] = default_4x4_inter[j]; + } + //memcpy(sps->scaling_lists_4x4[i], default_4x4_inter, sizeof(sps->scaling_lists_4x4[i])); + break; + } + case 1: + case 2: + case 4: + case 5: + memcpy(sps->scaling_lists_4x4[i], sps->scaling_lists_4x4[i-1], sizeof(sps->scaling_lists_4x4[i])); + break; + case 6: { + for(j = 0; j < sizeof(default_8x8_intra); j++) { + sps->scaling_lists_8x8[i-6][zigzag_8x8[j]] = default_8x8_intra[j]; + } + //memcpy(sps->scaling_lists_8x8[i-6], default_8x8_intra, sizeof(sps->scaling_lists_8x8[i-6])); + break; + } + case 7: { + for(j = 0; j < sizeof(default_8x8_inter); j++) { + sps->scaling_lists_8x8[i-6][zigzag_8x8[j]] = default_8x8_inter[j]; + } + //memcpy(sps->scaling_lists_8x8[i-6], default_8x8_inter, sizeof(sps->scaling_lists_8x8[i-6])); + break; + } + + } +} + +static void pps_scaling_list_fallback(struct seq_parameter_set_rbsp *sps, struct pic_parameter_set_rbsp *pps, int i) +{ + switch (i) { + case 0: + case 3: + memcpy(pps->scaling_lists_4x4[i], sps->scaling_lists_4x4[i], sizeof(pps->scaling_lists_4x4[i])); + break; + case 1: + case 2: + case 4: + case 5: + memcpy(pps->scaling_lists_4x4[i], pps->scaling_lists_4x4[i-1], sizeof(pps->scaling_lists_4x4[i])); + break; + case 6: + case 7: + memcpy(pps->scaling_lists_8x8[i-6], sps->scaling_lists_8x8[i-6], sizeof(pps->scaling_lists_8x8[i-6])); + break; + + } +} + + +uint8_t parse_sps(struct buf_reader *buf, struct seq_parameter_set_rbsp *sps) +{ + sps->profile_idc = read_bits(buf, 8); + sps->constraint_setN_flag = read_bits(buf, 4); + read_bits(buf, 4); + sps->level_idc = read_bits(buf, 8); + + sps->seq_parameter_set_id = read_exp_golomb(buf); + + memset(sps->scaling_lists_4x4, 16, sizeof(sps->scaling_lists_4x4)); + memset(sps->scaling_lists_8x8, 16, sizeof(sps->scaling_lists_8x8)); + if (sps->profile_idc == 100 || sps->profile_idc == 110 || sps->profile_idc + == 122 || sps->profile_idc == 244 || sps->profile_idc == 44 || + sps->profile_idc == 83 || sps->profile_idc == 86) { + sps->chroma_format_idc = read_exp_golomb(buf); + if (sps->chroma_format_idc == 3) { + sps->separate_colour_plane_flag = read_bits(buf, 1); + } + + sps->bit_depth_luma_minus8 = read_exp_golomb(buf); + sps->bit_depth_chroma_minus8 = read_exp_golomb(buf); + sps->qpprime_y_zero_transform_bypass_flag = read_bits(buf, 1); + sps->seq_scaling_matrix_present_flag = read_bits(buf, 1); + if (sps->seq_scaling_matrix_present_flag) { + int i; + for (i = 0; i < 8; i++) { + sps->seq_scaling_list_present_flag[i] = read_bits(buf, 1); + + if (sps->seq_scaling_list_present_flag[i]) { + if (i < 6) + parse_scaling_list(buf, sps->scaling_lists_4x4[i], 16, i); + else + parse_scaling_list(buf, sps->scaling_lists_8x8[i - 6], 64, i); + } else { + sps_scaling_list_fallback(sps, i); + } + } + } + } else + sps->chroma_format_idc = 1; + + sps->log2_max_frame_num_minus4 = read_exp_golomb(buf); + sps->max_frame_num = 1 << (sps->log2_max_frame_num_minus4 + 4); + + sps->pic_order_cnt_type = read_exp_golomb(buf); + if (!sps->pic_order_cnt_type) + sps->log2_max_pic_order_cnt_lsb_minus4 = read_exp_golomb(buf); + else if(sps->pic_order_cnt_type == 1) { + sps->delta_pic_order_always_zero_flag = read_bits(buf, 1); + sps->offset_for_non_ref_pic = read_exp_golomb_s(buf); + sps->offset_for_top_to_bottom_field = read_exp_golomb_s(buf); + sps->num_ref_frames_in_pic_order_cnt_cycle = read_exp_golomb(buf); + int i; + for (i = 0; i < sps->num_ref_frames_in_pic_order_cnt_cycle; i++) { + sps->offset_for_ref_frame[i] = read_exp_golomb_s(buf); + } + } + + sps->num_ref_frames = read_exp_golomb(buf); + sps->gaps_in_frame_num_value_allowed_flag = read_bits(buf, 1); + + /*sps->pic_width_in_mbs_minus1 = read_exp_golomb(buf); + sps->pic_height_in_map_units_minus1 = read_exp_golomb(buf);*/ + sps->pic_width = 16 * (read_exp_golomb(buf) + 1); + sps->pic_height = 16 * (read_exp_golomb(buf) + 1); + + sps->frame_mbs_only_flag = read_bits(buf, 1); + + /* compute the height correctly even for interlaced material */ + sps->pic_height = (2 - sps->frame_mbs_only_flag) * sps->pic_height; + if (sps->pic_height == 1088) + sps->pic_height = 1080; + + if (!sps->frame_mbs_only_flag) + sps->mb_adaptive_frame_field_flag = read_bits(buf, 1); + + sps->direct_8x8_inference_flag = read_bits(buf, 1); + sps->frame_cropping_flag = read_bits(buf, 1); + if (sps->frame_cropping_flag) { + sps->frame_crop_left_offset = read_exp_golomb(buf); + sps->frame_crop_right_offset = read_exp_golomb(buf); + sps->frame_crop_top_offset = read_exp_golomb(buf); + sps->frame_crop_bottom_offset = read_exp_golomb(buf); + } + sps->vui_parameters_present_flag = read_bits(buf, 1); + if (sps->vui_parameters_present_flag) { + parse_vui_parameters(buf, sps); + } + + return 0; +} + +/* evaluates values parsed by sps and modifies the current + * picture according to them + */ +void interpret_sps(struct coded_picture *pic, struct h264_parser *parser) +{ + if(pic->sps_nal == NULL) { + xprintf(parser->xine, XINE_VERBOSITY_DEBUG, + "WARNING: Picture contains no seq_parameter_set\n"); + return; + } + + struct seq_parameter_set_rbsp *sps = &pic->sps_nal->sps; + + if(sps->vui_parameters_present_flag && + sps->vui_parameters.pic_struct_present_flag) { + parser->flag_mask |= PIC_STRUCT_PRESENT; + } else { + parser->flag_mask &= ~PIC_STRUCT_PRESENT; + } + + if(sps->vui_parameters_present_flag && + (sps->vui_parameters.nal_hrd_parameters_present_flag || + sps->vui_parameters.vc1_hrd_parameters_present_flag)) { + parser->flag_mask |= CPB_DPB_DELAYS_PRESENT; + } else { + parser->flag_mask &= ~(CPB_DPB_DELAYS_PRESENT); + } + + if(pic->slc_nal != NULL) { + struct slice_header *slc = &pic->slc_nal->slc; + if (slc->field_pic_flag == 0) { + pic->max_pic_num = sps->max_frame_num; + parser->curr_pic_num = slc->frame_num; + } else { + pic->max_pic_num = 2 * sps->max_frame_num; + parser->curr_pic_num = 2 * slc->frame_num + 1; + } + } +} + +void parse_sei(struct buf_reader *buf, struct sei_message *sei, + struct h264_parser *parser) +{ + uint8_t tmp; + + struct nal_unit *sps_nal = + nal_buffer_get_last(parser->sps_buffer); + + if (sps_nal == NULL) { + xprintf(parser->xine, XINE_VERBOSITY_DEBUG, + "ERR: parse_sei: seq_parameter_set_id not found in buffers\n"); + return; + } + + struct seq_parameter_set_rbsp *sps = &sps_nal->sps; + + sei->payload_type = 0; + while((tmp = read_bits(buf, 8)) == 0xff) { + sei->payload_type += 255; + } + sei->last_payload_type_byte = tmp; + sei->payload_type += sei->last_payload_type_byte; + + sei->payload_size = 0; + while((tmp = read_bits(buf, 8)) == 0xff) { + sei->payload_size += 255; + } + sei->last_payload_size_byte = tmp; + sei->payload_size += sei->last_payload_size_byte; + + /* pic_timing */ + if(sei->payload_type == 1) { + if(parser->flag_mask & CPB_DPB_DELAYS_PRESENT) { + sei->pic_timing.cpb_removal_delay = read_bits(buf, 5); + sei->pic_timing.dpb_output_delay = read_bits(buf, 5); + } + + if(parser->flag_mask & PIC_STRUCT_PRESENT) { + sei->pic_timing.pic_struct = read_bits(buf, 4); + + uint8_t NumClockTs = 0; + switch(sei->pic_timing.pic_struct) { + case 0: + case 1: + case 2: + NumClockTs = 1; + break; + case 3: + case 4: + case 7: + NumClockTs = 2; + break; + case 5: + case 6: + case 8: + NumClockTs = 3; + break; + } + + int i; + for(i = 0; i < NumClockTs; i++) { + if(read_bits(buf, 1)) { /* clock_timestamp_flag == 1 */ + sei->pic_timing.ct_type = read_bits(buf, 2); + sei->pic_timing.nuit_field_based_flag = read_bits(buf, 1); + sei->pic_timing.counting_type = read_bits(buf, 5); + sei->pic_timing.full_timestamp_flag = read_bits(buf, 1); + sei->pic_timing.discontinuity_flag = read_bits(buf, 1); + sei->pic_timing.cnt_dropped_flag = read_bits(buf, 1); + sei->pic_timing.n_frames = read_bits(buf, 8); + if(sei->pic_timing.full_timestamp_flag) { + sei->pic_timing.seconds_value = read_bits(buf, 6); + sei->pic_timing.minutes_value = read_bits(buf, 6); + sei->pic_timing.hours_value = read_bits(buf, 5); + } else { + if(read_bits(buf, 1)) { + sei->pic_timing.seconds_value = read_bits(buf, 6); + + if(read_bits(buf, 1)) { + sei->pic_timing.minutes_value = read_bits(buf, 6); + + if(read_bits(buf, 1)) { + sei->pic_timing.hours_value = read_bits(buf, 5); + } + } + } + } + + if(sps->vui_parameters_present_flag && + sps->vui_parameters.nal_hrd_parameters_present_flag) { + sei->pic_timing.time_offset = + read_bits(buf, + sps->vui_parameters.nal_hrd_parameters.time_offset_length); + } + } + } + } + } /*else { + fprintf(stderr, "Unimplemented SEI payload: %d\n", sei->payload_type); + }*/ + +} + +void interpret_sei(struct coded_picture *pic) +{ + if(!pic->sps_nal || !pic->sei_nal) + return; + + struct seq_parameter_set_rbsp *sps = &pic->sps_nal->sps; + struct sei_message *sei = &pic->sei_nal->sei; + + if(sps && sps->vui_parameters_present_flag && + sps->vui_parameters.pic_struct_present_flag) { + switch(sei->pic_timing.pic_struct) { + case DISP_FRAME: + pic->flag_mask &= ~INTERLACED; + pic->repeat_pic = 0; + break; + case DISP_TOP: + case DISP_BOTTOM: + case DISP_TOP_BOTTOM: + case DISP_BOTTOM_TOP: + pic->flag_mask |= INTERLACED; + break; + case DISP_TOP_BOTTOM_TOP: + case DISP_BOTTOM_TOP_BOTTOM: + pic->flag_mask |= INTERLACED; + pic->repeat_pic = 1; + break; + case DISP_FRAME_DOUBLING: + pic->flag_mask &= ~INTERLACED; + pic->repeat_pic = 2; + break; + case DISP_FRAME_TRIPLING: + pic->flag_mask &= ~INTERLACED; + pic->repeat_pic = 3; + } + } +} + +void parse_vui_parameters(struct buf_reader *buf, + struct seq_parameter_set_rbsp *sps) +{ + sps->vui_parameters.aspect_ration_info_present_flag = read_bits(buf, 1); + if (sps->vui_parameters.aspect_ration_info_present_flag == 1) { + sps->vui_parameters.aspect_ratio_idc = read_bits(buf, 8); + if (sps->vui_parameters.aspect_ratio_idc == ASPECT_EXTENDED_SAR) { + sps->vui_parameters.sar_width = read_bits(buf, 16); + sps->vui_parameters.sar_height = read_bits(buf, 16); + } + } + + sps->vui_parameters.overscan_info_present_flag = read_bits(buf, 1); + if (sps->vui_parameters.overscan_info_present_flag) { + sps->vui_parameters.overscan_appropriate_flag = read_bits(buf, 1); + } + + sps->vui_parameters.video_signal_type_present_flag = read_bits(buf, 1); + if (sps->vui_parameters.video_signal_type_present_flag) { + sps->vui_parameters.video_format = read_bits(buf, 3); + sps->vui_parameters.video_full_range_flag = read_bits(buf, 1); + sps->vui_parameters.colour_description_present = read_bits(buf, 1); + if (sps->vui_parameters.colour_description_present) { + sps->vui_parameters.colour_primaries = read_bits(buf, 8); + sps->vui_parameters.transfer_characteristics = read_bits(buf, 8); + sps->vui_parameters.matrix_coefficients = read_bits(buf, 8); + } + } + + sps->vui_parameters.chroma_loc_info_present_flag = read_bits(buf, 1); + if (sps->vui_parameters.chroma_loc_info_present_flag) { + sps->vui_parameters.chroma_sample_loc_type_top_field = read_exp_golomb(buf); + sps->vui_parameters.chroma_sample_loc_type_bottom_field = read_exp_golomb( + buf); + } + + sps->vui_parameters.timing_info_present_flag = read_bits(buf, 1); + if (sps->vui_parameters.timing_info_present_flag) { + uint32_t num_units_in_tick = read_bits(buf, 32); + uint32_t time_scale = read_bits(buf, 32); + sps->vui_parameters.num_units_in_tick = num_units_in_tick; + sps->vui_parameters.time_scale = time_scale; + sps->vui_parameters.fixed_frame_rate_flag = read_bits(buf, 1); + } + + sps->vui_parameters.nal_hrd_parameters_present_flag = read_bits(buf, 1); + if (sps->vui_parameters.nal_hrd_parameters_present_flag) + parse_hrd_parameters(buf, &sps->vui_parameters.nal_hrd_parameters); + + sps->vui_parameters.vc1_hrd_parameters_present_flag = read_bits(buf, 1); + if (sps->vui_parameters.vc1_hrd_parameters_present_flag) + parse_hrd_parameters(buf, &sps->vui_parameters.vc1_hrd_parameters); + + if (sps->vui_parameters.nal_hrd_parameters_present_flag + || sps->vui_parameters.vc1_hrd_parameters_present_flag) + sps->vui_parameters.low_delay_hrd_flag = read_bits(buf, 1); + + sps->vui_parameters.pic_struct_present_flag = read_bits(buf, 1); + sps->vui_parameters.bitstream_restriction_flag = read_bits(buf, 1); + + if (sps->vui_parameters.bitstream_restriction_flag) { + sps->vui_parameters.motion_vectors_over_pic_boundaries = read_bits(buf, 1); + sps->vui_parameters.max_bytes_per_pic_denom = read_exp_golomb(buf); + sps->vui_parameters.max_bits_per_mb_denom = read_exp_golomb(buf); + sps->vui_parameters.log2_max_mv_length_horizontal = read_exp_golomb(buf); + sps->vui_parameters.log2_max_mv_length_vertical = read_exp_golomb(buf); + sps->vui_parameters.num_reorder_frames = read_exp_golomb(buf); + sps->vui_parameters.max_dec_frame_buffering = read_exp_golomb(buf); + } +} + +void parse_hrd_parameters(struct buf_reader *buf, struct hrd_parameters *hrd) +{ + hrd->cpb_cnt_minus1 = read_exp_golomb(buf); + hrd->bit_rate_scale = read_bits(buf, 4); + hrd->cpb_size_scale = read_bits(buf, 4); + + int i; + for (i = 0; i <= hrd->cpb_cnt_minus1; i++) { + hrd->bit_rate_value_minus1[i] = read_exp_golomb(buf); + hrd->cpb_size_value_minus1[i] = read_exp_golomb(buf); + hrd->cbr_flag[i] = read_bits(buf, 1); + } + + hrd->initial_cpb_removal_delay_length_minus1 = read_bits(buf, 5); + hrd->cpb_removal_delay_length_minus1 = read_bits(buf, 5); + hrd->dpb_output_delay_length_minus1 = read_bits(buf, 5); + hrd->time_offset_length = read_bits(buf, 5); +} + +uint8_t parse_pps(struct buf_reader *buf, struct pic_parameter_set_rbsp *pps) +{ + pps->pic_parameter_set_id = read_exp_golomb(buf); + pps->seq_parameter_set_id = read_exp_golomb(buf); + pps->entropy_coding_mode_flag = read_bits(buf, 1); + pps->pic_order_present_flag = read_bits(buf, 1); + + pps->num_slice_groups_minus1 = read_exp_golomb(buf); + if (pps->num_slice_groups_minus1 > 0) { + pps->slice_group_map_type = read_exp_golomb(buf); + if (pps->slice_group_map_type == 0) { + int i_group; + for (i_group = 0; i_group <= pps->num_slice_groups_minus1; i_group++) { + if (i_group < 64) + pps->run_length_minus1[i_group] = read_exp_golomb(buf); + else { // FIXME: skips if more than 64 groups exist + lprintf("Error: Only 64 slice_groups are supported\n"); + read_exp_golomb(buf); + } + } + } + else if (pps->slice_group_map_type == 3 || pps->slice_group_map_type == 4 + || pps->slice_group_map_type == 5) { + pps->slice_group_change_direction_flag = read_bits(buf, 1); + pps->slice_group_change_rate_minus1 = read_exp_golomb(buf); + } + else if (pps->slice_group_map_type == 6) { + pps->pic_size_in_map_units_minus1 = read_exp_golomb(buf); + int i_group; + for (i_group = 0; i_group <= pps->num_slice_groups_minus1; i_group++) { + pps->slice_group_id[i_group] = read_bits(buf, ceil(log( + pps->num_slice_groups_minus1 + 1))); + } + } + } + + pps->num_ref_idx_l0_active_minus1 = read_exp_golomb(buf); + pps->num_ref_idx_l1_active_minus1 = read_exp_golomb(buf); + pps->weighted_pred_flag = read_bits(buf, 1); + pps->weighted_bipred_idc = read_bits(buf, 2); + pps->pic_init_qp_minus26 = read_exp_golomb_s(buf); + pps->pic_init_qs_minus26 = read_exp_golomb_s(buf); + pps->chroma_qp_index_offset = read_exp_golomb_s(buf); + pps->deblocking_filter_control_present_flag = read_bits(buf, 1); + pps->constrained_intra_pred_flag = read_bits(buf, 1); + pps->redundant_pic_cnt_present_flag = read_bits(buf, 1); + + int bit_length = (buf->len*8)-rbsp_trailing_bits(buf->buf, buf->len); + int bit_read = bits_read(buf); + + memset(pps->scaling_lists_4x4, 16, sizeof(pps->scaling_lists_4x4)); + memset(pps->scaling_lists_8x8, 16, sizeof(pps->scaling_lists_8x8)); + if (bit_length-bit_read > 1) { + pps->transform_8x8_mode_flag = read_bits(buf, 1); + pps->pic_scaling_matrix_present_flag = read_bits(buf, 1); + if (pps->pic_scaling_matrix_present_flag) { + int i; + for (i = 0; i < 8; i++) { + if(i < 6 || pps->transform_8x8_mode_flag) + pps->pic_scaling_list_present_flag[i] = read_bits(buf, 1); + else + pps->pic_scaling_list_present_flag[i] = 0; + + if (pps->pic_scaling_list_present_flag[i]) { + if (i < 6) + parse_scaling_list(buf, pps->scaling_lists_4x4[i], 16, i); + else + parse_scaling_list(buf, pps->scaling_lists_8x8[i - 6], 64, i); + } + } + } + + pps->second_chroma_qp_index_offset = read_exp_golomb_s(buf); + } else + pps->second_chroma_qp_index_offset = pps->chroma_qp_index_offset; + + return 0; +} + +void interpret_pps(struct coded_picture *pic) +{ + if(pic->sps_nal == NULL) { + lprintf("WARNING: Picture contains no seq_parameter_set\n"); + return; + } else if(pic->pps_nal == NULL) { + lprintf("WARNING: Picture contains no pic_parameter_set\n"); + return; + } + + struct seq_parameter_set_rbsp *sps = &pic->sps_nal->sps; + struct pic_parameter_set_rbsp *pps = &pic->pps_nal->pps; + + int i; + for (i = 0; i < 8; i++) { + if (!pps->pic_scaling_list_present_flag[i]) { + pps_scaling_list_fallback(sps, pps, i); + } + } + + if (!pps->pic_scaling_matrix_present_flag && sps != NULL) { + memcpy(pps->scaling_lists_4x4, sps->scaling_lists_4x4, + sizeof(pps->scaling_lists_4x4)); + memcpy(pps->scaling_lists_8x8, sps->scaling_lists_8x8, + sizeof(pps->scaling_lists_8x8)); + } +} + +uint8_t parse_slice_header(struct buf_reader *buf, struct nal_unit *slc_nal, + struct h264_parser *parser) +{ + struct slice_header *slc = &slc_nal->slc; + + slc->first_mb_in_slice = read_exp_golomb(buf); + /* we do some parsing on the slice type, because the list is doubled */ + slc->slice_type = slice_type(read_exp_golomb(buf)); + + //print_slice_type(slc->slice_type); + slc->pic_parameter_set_id = read_exp_golomb(buf); + + /* retrieve sps and pps from the buffers */ + struct nal_unit *pps_nal = + nal_buffer_get_by_pps_id(parser->pps_buffer, slc->pic_parameter_set_id); + + if (pps_nal == NULL) { + xprintf(parser->xine, XINE_VERBOSITY_DEBUG, + "ERR: parse_slice_header: pic_parameter_set_id %d not found in buffers\n", + slc->pic_parameter_set_id); + return -1; + } + + struct pic_parameter_set_rbsp *pps = &pps_nal->pps; + + struct nal_unit *sps_nal = + nal_buffer_get_by_sps_id(parser->sps_buffer, pps->seq_parameter_set_id); + + if (sps_nal == NULL) { + xprintf(parser->xine, XINE_VERBOSITY_DEBUG, + "ERR: parse_slice_header: seq_parameter_set_id %d not found in buffers\n", + pps->seq_parameter_set_id); + return -1; + } + + struct seq_parameter_set_rbsp *sps = &sps_nal->sps; + + if(sps->separate_colour_plane_flag) + slc->colour_plane_id = read_bits(buf, 2); + + slc->frame_num = read_bits(buf, sps->log2_max_frame_num_minus4 + 4); + if (!sps->frame_mbs_only_flag) { + slc->field_pic_flag = read_bits(buf, 1); + if (slc->field_pic_flag) + slc->bottom_field_flag = read_bits(buf, 1); + else + slc->bottom_field_flag = 0; + } + else { + slc->field_pic_flag = 0; + slc->bottom_field_flag = 0; + } + + if (slc_nal->nal_unit_type == NAL_SLICE_IDR) + slc->idr_pic_id = read_exp_golomb(buf); + + if (!sps->pic_order_cnt_type) { + slc->pic_order_cnt_lsb = read_bits(buf, + sps->log2_max_pic_order_cnt_lsb_minus4 + 4); + if (pps->pic_order_present_flag && !slc->field_pic_flag) + slc->delta_pic_order_cnt_bottom = read_exp_golomb_s(buf); + } + + if (sps->pic_order_cnt_type == 1 && !sps->delta_pic_order_always_zero_flag) { + slc->delta_pic_order_cnt[0] = read_exp_golomb_s(buf); + if (pps->pic_order_present_flag && !slc->field_pic_flag) + slc->delta_pic_order_cnt[1] = read_exp_golomb_s(buf); + } + + if (pps->redundant_pic_cnt_present_flag == 1) { + slc->redundant_pic_cnt = read_exp_golomb(buf); + } + + if (slc->slice_type == SLICE_B) + slc->direct_spatial_mv_pred_flag = read_bits(buf, 1); + + /* take default values in case they are not set here */ + slc->num_ref_idx_l0_active_minus1 = pps->num_ref_idx_l0_active_minus1; + slc->num_ref_idx_l1_active_minus1 = pps->num_ref_idx_l1_active_minus1; + + if (slc->slice_type == SLICE_P || slc->slice_type == SLICE_SP + || slc->slice_type == SLICE_B) { + slc->num_ref_idx_active_override_flag = read_bits(buf, 1); + + if (slc->num_ref_idx_active_override_flag == 1) { + slc->num_ref_idx_l0_active_minus1 = read_exp_golomb(buf); + + if (slc->slice_type == SLICE_B) { + slc->num_ref_idx_l1_active_minus1 = read_exp_golomb(buf); + } + } + } + + /* --- ref_pic_list_reordering --- */ + parse_ref_pic_list_reordering(buf, slc); + + /* --- pred_weight_table --- */ + if ((pps->weighted_pred_flag && (slc->slice_type == SLICE_P + || slc->slice_type == SLICE_SP)) || (pps->weighted_bipred_idc == 1 + && slc->slice_type == SLICE_B)) { + parse_pred_weight_table(buf, slc, parser); + } + + /* --- dec_ref_pic_marking --- */ + if (slc_nal->nal_ref_idc != 0) + parse_dec_ref_pic_marking(buf, slc_nal); + else + slc->dec_ref_pic_marking_count = 0; + + return 0; +} + +void interpret_slice_header(struct h264_parser *parser, struct nal_unit *slc_nal) +{ + struct coded_picture *pic = parser->pic; + struct slice_header *slc = &slc_nal->slc; + + /* retrieve sps and pps from the buffers */ + struct nal_unit *pps_nal = + nal_buffer_get_by_pps_id(parser->pps_buffer, slc->pic_parameter_set_id); + + if (pps_nal == NULL) { + xprintf(parser->xine, XINE_VERBOSITY_DEBUG, + "ERR: interpret_slice_header: pic_parameter_set_id %d not found in buffers\n", + slc->pic_parameter_set_id); + return; + } + + struct nal_unit *sps_nal = + nal_buffer_get_by_sps_id(parser->sps_buffer, pps_nal->pps.seq_parameter_set_id); + + if (sps_nal == NULL) { + xprintf(parser->xine, XINE_VERBOSITY_DEBUG, + "ERR: interpret_slice_header: seq_parameter_set_id %d not found in buffers\n", + pps_nal->pps.seq_parameter_set_id); + return; + } + + if (pic->sps_nal) { + release_nal_unit(pic->sps_nal); + } + if (pic->pps_nal) { + release_nal_unit(pic->pps_nal); + } + lock_nal_unit(sps_nal); + pic->sps_nal = sps_nal; + lock_nal_unit(pps_nal); + pic->pps_nal = pps_nal; +} + +void parse_ref_pic_list_reordering(struct buf_reader *buf, struct slice_header *slc) +{ + if (slc->slice_type != SLICE_I && slc->slice_type != SLICE_SI) { + slc->ref_pic_list_reordering.ref_pic_list_reordering_flag_l0 = read_bits( + buf, 1); + + if (slc->ref_pic_list_reordering.ref_pic_list_reordering_flag_l0 == 1) { + do { + slc->ref_pic_list_reordering.reordering_of_pic_nums_idc + = read_exp_golomb(buf); + + if (slc->ref_pic_list_reordering.reordering_of_pic_nums_idc == 0 + || slc->ref_pic_list_reordering.reordering_of_pic_nums_idc == 1) { + slc->ref_pic_list_reordering.abs_diff_pic_num_minus1 + = read_exp_golomb(buf); + } + else if (slc->ref_pic_list_reordering.reordering_of_pic_nums_idc == 2) { + slc->ref_pic_list_reordering.long_term_pic_num = read_exp_golomb(buf); + } + } while (slc->ref_pic_list_reordering.reordering_of_pic_nums_idc != 3); + } + } + + if (slc->slice_type == SLICE_B) { + slc->ref_pic_list_reordering.ref_pic_list_reordering_flag_l1 = read_bits( + buf, 1); + + if (slc->ref_pic_list_reordering.ref_pic_list_reordering_flag_l1 == 1) { + do { + slc->ref_pic_list_reordering.reordering_of_pic_nums_idc + = read_exp_golomb(buf); + + if (slc->ref_pic_list_reordering.reordering_of_pic_nums_idc == 0 + || slc->ref_pic_list_reordering.reordering_of_pic_nums_idc == 1) { + slc->ref_pic_list_reordering.abs_diff_pic_num_minus1 + = read_exp_golomb(buf); + } + else if (slc->ref_pic_list_reordering.reordering_of_pic_nums_idc == 2) { + slc->ref_pic_list_reordering.long_term_pic_num = read_exp_golomb(buf); + } + } while (slc->ref_pic_list_reordering.reordering_of_pic_nums_idc != 3); + } + } +} + +void parse_pred_weight_table(struct buf_reader *buf, struct slice_header *slc, + struct h264_parser *parser) +{ + /* retrieve sps and pps from the buffers */ + struct pic_parameter_set_rbsp *pps = + &nal_buffer_get_by_pps_id(parser->pps_buffer, slc->pic_parameter_set_id) + ->pps; + + struct seq_parameter_set_rbsp *sps = + &nal_buffer_get_by_sps_id(parser->sps_buffer, pps->seq_parameter_set_id) + ->sps; + + slc->pred_weight_table.luma_log2_weight_denom = read_exp_golomb(buf); + + uint32_t ChromaArrayType = sps->chroma_format_idc; + if(sps->separate_colour_plane_flag) + ChromaArrayType = 0; + + if (ChromaArrayType != 0) + slc->pred_weight_table.chroma_log2_weight_denom = read_exp_golomb(buf); + + int i; + for (i = 0; i <= slc->num_ref_idx_l0_active_minus1; i++) { + uint8_t luma_weight_l0_flag = read_bits(buf, 1); + + if (luma_weight_l0_flag == 1) { + slc->pred_weight_table.luma_weight_l0[i] = read_exp_golomb_s(buf); + slc->pred_weight_table.luma_offset_l0[i] = read_exp_golomb_s(buf); + } + + if (ChromaArrayType != 0) { + uint8_t chroma_weight_l0_flag = read_bits(buf, 1); + + if (chroma_weight_l0_flag == 1) { + int j; + for (j = 0; j < 2; j++) { + slc->pred_weight_table.chroma_weight_l0[i][j] + = read_exp_golomb_s(buf); + slc->pred_weight_table.chroma_offset_l0[i][j] + = read_exp_golomb_s(buf); + } + } + } + } + + if ((slc->slice_type % 5) == SLICE_B) { + /* FIXME: Being spec-compliant here and loop to num_ref_idx_l0_active_minus1 + * will break Divx7 files. Keep this in mind if any other streams are broken + */ + for (i = 0; i <= slc->num_ref_idx_l1_active_minus1; i++) { + uint8_t luma_weight_l1_flag = read_bits(buf, 1); + + if (luma_weight_l1_flag == 1) { + slc->pred_weight_table.luma_weight_l1[i] = read_exp_golomb_s(buf); + slc->pred_weight_table.luma_offset_l1[i] = read_exp_golomb_s(buf); + } + + if (ChromaArrayType != 0) { + uint8_t chroma_weight_l1_flag = read_bits(buf, 1); + + if (chroma_weight_l1_flag == 1) { + int j; + for (j = 0; j < 2; j++) { + slc->pred_weight_table.chroma_weight_l1[i][j] + = read_exp_golomb_s(buf); + slc->pred_weight_table.chroma_offset_l1[i][j] + = read_exp_golomb_s(buf); + } + } + } + } + } +} + +/** + * PicNum calculation following ITU-T H264 11/2007 + * 8.2.4.1 p112f + */ +void calculate_pic_nums(struct h264_parser *parser, struct coded_picture *cpic) +{ + struct decoded_picture *pic = NULL; + struct slice_header *cslc = &cpic->slc_nal->slc; + + xine_list_iterator_t ite = xine_list_front(parser->dpb->reference_list); + while (ite) { + pic = xine_list_get_value(parser->dpb->reference_list, ite); + + int i; + for (i=0; i<2; i++) { + if(pic->coded_pic[i] == NULL) + continue; + + struct slice_header *slc = &pic->coded_pic[i]->slc_nal->slc; + struct seq_parameter_set_rbsp *sps = &pic->coded_pic[i]->sps_nal->sps; + + if (!pic->coded_pic[i]->used_for_long_term_ref) { + int32_t frame_num_wrap = 0; + if (slc->frame_num > cslc->frame_num) + frame_num_wrap = slc->frame_num - sps->max_frame_num; + else + frame_num_wrap = slc->frame_num; + + if(i == 0) { + pic->frame_num_wrap = frame_num_wrap; + } + + if (cslc->field_pic_flag == 0) { + pic->coded_pic[i]->pic_num = frame_num_wrap; + } else { + pic->coded_pic[i]->pic_num = 2 * frame_num_wrap; + if((slc->field_pic_flag == 1 && + cslc->bottom_field_flag == slc->bottom_field_flag) || + (slc->field_pic_flag == 0 && !cslc->bottom_field_flag)) + pic->coded_pic[i]->pic_num++; + } + } else { + pic->coded_pic[i]->long_term_pic_num = pic->coded_pic[i]->long_term_frame_idx; + if(slc->bottom_field_flag == cslc->bottom_field_flag) + pic->coded_pic[i]->long_term_pic_num++; + } + } + + ite = xine_list_next(parser->dpb->reference_list, ite); + } +} + +void execute_ref_pic_marking(struct coded_picture *cpic, + uint32_t memory_management_control_operation, + uint32_t marking_nr, + struct h264_parser *parser) +{ + /** + * according to NOTE 6, p83 the dec_ref_pic_marking + * structure is identical for all slice headers within + * a coded picture, so we can simply use the last + * slice_header we saw in the pic + */ + if (!cpic->slc_nal) + return; + struct slice_header *slc = &cpic->slc_nal->slc; + struct dpb *dpb = parser->dpb; + + calculate_pic_nums(parser, cpic); + + if (cpic->flag_mask & IDR_PIC) { + if(slc->dec_ref_pic_marking[marking_nr].long_term_reference_flag) { + cpic->used_for_long_term_ref = 1; + dpb_set_unused_ref_picture_lidx_gt(dpb, 0); + } else { + dpb_set_unused_ref_picture_lidx_gt(dpb, -1); + } + return; + } + + /* MMC operation == 1 : 8.2.5.4.1, p. 120 */ + if (memory_management_control_operation == 1) { + // short-term -> unused for reference + int32_t pic_num_x = (parser->curr_pic_num + - (slc->dec_ref_pic_marking[marking_nr].difference_of_pic_nums_minus1 + 1)); + //% cpic->max_pic_num; + struct decoded_picture* pic = NULL; + if ((pic = dpb_get_picture(dpb, pic_num_x)) != NULL) { + if (cpic->slc_nal->slc.field_pic_flag == 0) { + dpb_unmark_reference_picture(dpb, pic); + } else { + + if (pic->coded_pic[0]->slc_nal->slc.field_pic_flag == 1) { + if (pic->top_is_reference) + pic->top_is_reference = 0; + else if (pic->bottom_is_reference) + pic->bottom_is_reference = 0; + + if(!pic->top_is_reference && !pic->bottom_is_reference) + dpb_unmark_reference_picture(dpb, pic); + } else { + pic->top_is_reference = pic->bottom_is_reference = 0; + dpb_unmark_reference_picture(dpb, pic); + } + } + } else { + xprintf(parser->xine, XINE_VERBOSITY_DEBUG, + "H264: mmc 1 failed: %d not existent - curr_pic: %d\n", + pic_num_x, parser->curr_pic_num); + } + } else if (memory_management_control_operation == 2) { + // long-term -> unused for reference + struct decoded_picture* pic = dpb_get_picture_by_ltpn(dpb, + slc->dec_ref_pic_marking[marking_nr].long_term_pic_num); + if (pic != NULL) { + if (cpic->slc_nal->slc.field_pic_flag == 0) + dpb_set_unused_ref_picture_byltpn(dpb, + slc->dec_ref_pic_marking[marking_nr].long_term_pic_num); + else { + + if (pic->coded_pic[0]->slc_nal->slc.field_pic_flag == 1) { + if (pic->top_is_reference) + pic->top_is_reference = 0; + else if (pic->bottom_is_reference) + pic->bottom_is_reference = 0; + + if(!pic->top_is_reference && !pic->bottom_is_reference) { + dpb_set_unused_ref_picture_byltpn(dpb, + slc->dec_ref_pic_marking[marking_nr].long_term_pic_num); + } + } else { + pic->top_is_reference = pic->bottom_is_reference = 0; + dpb_set_unused_ref_picture_byltpn(dpb, + slc->dec_ref_pic_marking[marking_nr].long_term_pic_num); + } + } + } + } else if (memory_management_control_operation == 3) { + // short-term -> long-term, set long-term frame index + uint32_t pic_num_x = parser->curr_pic_num + - (slc->dec_ref_pic_marking[marking_nr].difference_of_pic_nums_minus1 + 1); + struct decoded_picture* pic = dpb_get_picture_by_ltidx(dpb, + slc->dec_ref_pic_marking[marking_nr].long_term_pic_num); + if (pic != NULL) + dpb_set_unused_ref_picture_bylidx(dpb, + slc->dec_ref_pic_marking[marking_nr].long_term_frame_idx); + + pic = dpb_get_picture(dpb, pic_num_x); + if (pic) { + pic = dpb_get_picture(dpb, pic_num_x); + + if (pic->coded_pic[0]->slc_nal->slc.field_pic_flag == 0) { + pic->coded_pic[0]->long_term_frame_idx + = slc->dec_ref_pic_marking[marking_nr].long_term_frame_idx; + pic->coded_pic[0]->long_term_pic_num = pic->coded_pic[0]->long_term_frame_idx; + } + else { + if(pic->coded_pic[0]->pic_num == pic_num_x) { + pic->coded_pic[0]->long_term_frame_idx + = slc->dec_ref_pic_marking[marking_nr].long_term_frame_idx; + pic->coded_pic[0]->long_term_pic_num = pic->coded_pic[0]->long_term_frame_idx * 2 + 1; + } else if(pic->coded_pic[1] != NULL && + pic->coded_pic[1]->pic_num == pic_num_x) { + pic->coded_pic[1]->long_term_frame_idx + = slc->dec_ref_pic_marking[marking_nr].long_term_frame_idx; + pic->coded_pic[1]->long_term_pic_num = pic->coded_pic[1]->long_term_frame_idx * 2 + 1; + } + } + } + else { + xprintf(parser->xine, XINE_VERBOSITY_DEBUG, + "memory_management_control_operation: 3 failed. No such picture.\n"); + } + + } else if (memory_management_control_operation == 4) { + /* set max-long-term frame index, + * mark all long-term pictures with long-term frame idx + * greater max-long-term farme idx as unused for ref */ + if (slc->dec_ref_pic_marking[marking_nr].max_long_term_frame_idx_plus1 == 0) + dpb_set_unused_ref_picture_lidx_gt(dpb, 0); + else + dpb_set_unused_ref_picture_lidx_gt(dpb, + slc->dec_ref_pic_marking[marking_nr].max_long_term_frame_idx_plus1 - 1); + } else if (memory_management_control_operation == 5) { + /* mark all ref pics as unused for reference, + * set max-long-term frame index = no long-term frame idxs */ + dpb_flush(dpb); + + if (!slc->bottom_field_flag) { + parser->prev_pic_order_cnt_lsb = cpic->top_field_order_cnt; + parser->prev_pic_order_cnt_msb = 0; + } else { + parser->prev_pic_order_cnt_lsb = 0; + parser->prev_pic_order_cnt_msb = 0; + } + } else if (memory_management_control_operation == 6) { + /* mark current picture as used for long-term ref, + * assing long-term frame idx to it */ + struct decoded_picture* pic = dpb_get_picture_by_ltidx(dpb, + slc->dec_ref_pic_marking[marking_nr].long_term_frame_idx); + if (pic != NULL) + dpb_set_unused_ref_picture_bylidx(dpb, + slc->dec_ref_pic_marking[marking_nr].long_term_frame_idx); + + cpic->long_term_frame_idx = slc->dec_ref_pic_marking[marking_nr].long_term_frame_idx; + cpic->used_for_long_term_ref = 1; + + if (slc->field_pic_flag == 0) { + cpic->long_term_pic_num = cpic->long_term_frame_idx; + } + else { + cpic->long_term_pic_num = cpic->long_term_frame_idx * 2 + 1; + } + + } +} + +void parse_dec_ref_pic_marking(struct buf_reader *buf, + struct nal_unit *slc_nal) +{ + struct slice_header *slc = &slc_nal->slc; + + if (!slc) + return; + + slc->dec_ref_pic_marking_count = 0; + int i = slc->dec_ref_pic_marking_count; + + if (slc_nal->nal_unit_type == NAL_SLICE_IDR) { + slc->dec_ref_pic_marking[i].no_output_of_prior_pics_flag = read_bits(buf, 1); + slc->dec_ref_pic_marking[i].long_term_reference_flag = read_bits(buf, 1); + i+=2; + } else { + slc->dec_ref_pic_marking[i].adaptive_ref_pic_marking_mode_flag = read_bits( + buf, 1); + + if (slc->dec_ref_pic_marking[i].adaptive_ref_pic_marking_mode_flag) { + do { + slc->dec_ref_pic_marking[i].memory_management_control_operation + = read_exp_golomb(buf); + + if (slc->dec_ref_pic_marking[i].memory_management_control_operation == 1 + || slc->dec_ref_pic_marking[i].memory_management_control_operation + == 3) + slc->dec_ref_pic_marking[i].difference_of_pic_nums_minus1 + = read_exp_golomb(buf); + + if (slc->dec_ref_pic_marking[i].memory_management_control_operation == 2) + slc->dec_ref_pic_marking[i].long_term_pic_num = read_exp_golomb(buf); + + if (slc->dec_ref_pic_marking[i].memory_management_control_operation == 3 + || slc->dec_ref_pic_marking[i].memory_management_control_operation + == 6) + slc->dec_ref_pic_marking[i].long_term_frame_idx = read_exp_golomb(buf); + + if (slc->dec_ref_pic_marking[i].memory_management_control_operation == 4) + slc->dec_ref_pic_marking[i].max_long_term_frame_idx_plus1 + = read_exp_golomb(buf); + + i++; + if(i >= 10) { + lprintf("Error: Not more than 10 MMC operations supported per slice. Dropping some.\n"); + i = 0; + } + } while (slc->dec_ref_pic_marking[i-1].memory_management_control_operation + != 0); + } + } + + slc->dec_ref_pic_marking_count = (i>0) ? (i-1) : 0; +} + +/* ----------------- NAL parser ----------------- */ + +struct h264_parser* init_parser(xine_t *xine) +{ + struct h264_parser *parser = calloc(1, sizeof(struct h264_parser)); + parser->pic = create_coded_picture(); + parser->position = NON_VCL; + parser->last_vcl_nal = NULL; + parser->sps_buffer = create_nal_buffer(32); + parser->pps_buffer = create_nal_buffer(32); + parser->xine = xine; + parser->dpb = create_dpb(); + + return parser; +} + +void reset_parser(struct h264_parser *parser) +{ + parser->position = NON_VCL; + parser->buf_len = parser->prebuf_len = 0; + parser->next_nal_position = 0; + parser->last_nal_res = 0; + + if(parser->last_vcl_nal) { + release_nal_unit(parser->last_vcl_nal); + } + parser->last_vcl_nal = NULL; + + parser->prev_pic_order_cnt_msb = 0; + parser->prev_pic_order_cnt_lsb = 0; + parser->frame_num_offset = 0; + parser->prev_top_field_order_cnt = 0; + parser->curr_pic_num = 0; + parser->flag_mask = 0; + + if(parser->pic != NULL) { + free_coded_picture(parser->pic); + parser->pic = create_coded_picture(); + } +} + +void free_parser(struct h264_parser *parser) +{ + dpb_free_all(parser->dpb); + release_dpb(parser->dpb); + free_nal_buffer(parser->pps_buffer); + free_nal_buffer(parser->sps_buffer); + free(parser); +} + +void parse_codec_private(struct h264_parser *parser, uint8_t *inbuf, int inbuf_len) +{ + struct buf_reader bufr; + + bufr.buf = inbuf; + bufr.cur_pos = inbuf; + bufr.cur_offset = 8; + bufr.len = inbuf_len; + + // FIXME: Might be broken! + struct nal_unit *nal = calloc(1, sizeof(struct nal_unit)); + + + /* reserved */ + read_bits(&bufr, 8); + nal->sps.profile_idc = read_bits(&bufr, 8); + read_bits(&bufr, 8); + nal->sps.level_idc = read_bits(&bufr, 8); + read_bits(&bufr, 6); + + parser->nal_size_length = read_bits(&bufr, 2) + 1; + parser->nal_size_length_buf = calloc(1, parser->nal_size_length); + read_bits(&bufr, 3); + uint8_t sps_count = read_bits(&bufr, 5); + + inbuf += 6; + inbuf_len -= 6; + int i; + + struct coded_picture *dummy = NULL; + for(i = 0; i < sps_count; i++) { + uint16_t sps_size = read_bits(&bufr, 16); + inbuf += 2; + inbuf_len -= 2; + parse_nal(inbuf, sps_size, parser, &dummy); + inbuf += sps_size; + inbuf_len -= sps_size; + } + + bufr.buf = inbuf; + bufr.cur_pos = inbuf; + bufr.cur_offset = 8; + bufr.len = inbuf_len; + + uint8_t pps_count = read_bits(&bufr, 8); + inbuf += 1; + for(i = 0; i < pps_count; i++) { + uint16_t pps_size = read_bits(&bufr, 16); + inbuf += 2; + inbuf_len -= 2; + parse_nal(inbuf, pps_size, parser, &dummy); + inbuf += pps_size; + inbuf_len -= pps_size; + } + + nal_buffer_append(parser->sps_buffer, nal); +} + +void process_mmc_operations(struct h264_parser *parser, struct coded_picture *picture) +{ + if (picture->flag_mask & REFERENCE) { + parser->prev_pic_order_cnt_lsb + = picture->slc_nal->slc.pic_order_cnt_lsb; + } + + int i; + for(i = 0; i < picture->slc_nal->slc. + dec_ref_pic_marking_count; i++) { + execute_ref_pic_marking( + picture, + picture->slc_nal->slc.dec_ref_pic_marking[i]. + memory_management_control_operation, + i, + parser); + } +} + +int parse_frame(struct h264_parser *parser, uint8_t *inbuf, int inbuf_len, + int64_t pts, + uint8_t **ret_buf, uint32_t *ret_len, struct coded_picture **ret_pic) +{ + int32_t next_nal = 0; + int32_t offset = 0; + int start_seq_len = 3; + + *ret_pic = NULL; + *ret_buf = NULL; + *ret_len = 0; + + if(parser->nal_size_length > 0) + start_seq_len = offset = parser->nal_size_length; + + if (parser->prebuf_len + inbuf_len > MAX_FRAME_SIZE) { + xprintf(parser->xine, XINE_VERBOSITY_LOG,"h264_parser: prebuf underrun\n"); + *ret_len = 0; + *ret_buf = NULL; + parser->prebuf_len = 0; + return inbuf_len; + } + + /* copy the whole inbuf to the prebuf, + * then search for a nal-start sequence in the prebuf, + * if it's in there, parse the nal and append to parser->buf + * or return a frame */ + + xine_fast_memcpy(parser->prebuf + parser->prebuf_len, inbuf, inbuf_len); + parser->prebuf_len += inbuf_len; + + while((next_nal = seek_for_nal(parser->prebuf+start_seq_len-offset, parser->prebuf_len-start_seq_len+offset, parser)) > 0) { + + struct coded_picture *completed_pic = NULL; + + if(!parser->nal_size_length && + (parser->prebuf[0] != 0x00 || parser->prebuf[1] != 0x00 || + parser->prebuf[2] != 0x01)) { + xprintf(parser->xine, XINE_VERBOSITY_LOG, "Broken NAL, skip it.\n"); + parser->last_nal_res = 2; + } else { + parser->last_nal_res = parse_nal(parser->prebuf+start_seq_len, + next_nal, parser, &completed_pic); + } + + if (completed_pic != NULL && + completed_pic->slice_cnt > 0 && + parser->buf_len > 0) { + + //lprintf("Frame complete: %d bytes\n", parser->buf_len); + *ret_len = parser->buf_len; + *ret_buf = malloc(parser->buf_len); + xine_fast_memcpy(*ret_buf, parser->buf, parser->buf_len); + + *ret_pic = completed_pic; + + parser->buf_len = 0; + + if (pts != 0 && (parser->pic->pts == 0 || parser->pic->pts != pts)) { + parser->pic->pts = pts; + } + + /** + * if the new coded picture started with a VCL nal + * we have to copy this to buffer for the next picture + * now. + */ + if(parser->last_nal_res == 1) { + if(parser->nal_size_length > 0) { + static const uint8_t start_seq[3] = { 0x00, 0x00, 0x01 }; + xine_fast_memcpy(parser->buf, start_seq, 3); + parser->buf_len += 3; + } + + xine_fast_memcpy(parser->buf+parser->buf_len, parser->prebuf+offset, next_nal+start_seq_len-2*offset); + parser->buf_len += next_nal+start_seq_len-2*offset; + } + + memmove(parser->prebuf, parser->prebuf+(next_nal+start_seq_len-offset), parser->prebuf_len-(next_nal+start_seq_len-offset)); + parser->prebuf_len -= next_nal+start_seq_len-offset; + + return inbuf_len; + } + + /* got a new nal, which is part of the current + * coded picture. add it to buf + */ + if (parser->last_nal_res < 2) { + if (parser->buf_len + next_nal+start_seq_len-offset > MAX_FRAME_SIZE) { + xprintf(parser->xine, XINE_VERBOSITY_LOG, "h264_parser: buf underrun!\n"); + parser->buf_len = 0; + *ret_len = 0; + *ret_buf = NULL; + return inbuf_len; + } + + if(parser->nal_size_length > 0) { + static const uint8_t start_seq[3] = { 0x00, 0x00, 0x01 }; + xine_fast_memcpy(parser->buf+parser->buf_len, start_seq, 3); + parser->buf_len += 3; + } + + xine_fast_memcpy(parser->buf+parser->buf_len, parser->prebuf+offset, next_nal+start_seq_len-2*offset); + parser->buf_len += next_nal+start_seq_len-2*offset; + + memmove(parser->prebuf, parser->prebuf+(next_nal+start_seq_len-offset), parser->prebuf_len-(next_nal+start_seq_len-offset)); + parser->prebuf_len -= next_nal+start_seq_len-offset; + } else { + /* got a non-relevant nal, just remove it */ + memmove(parser->prebuf, parser->prebuf+(next_nal+start_seq_len-offset), parser->prebuf_len-(next_nal+start_seq_len-offset)); + parser->prebuf_len -= next_nal+start_seq_len-offset; + } + } + + if (pts != 0 && (parser->pic->pts == 0 || parser->pic->pts != pts)) { + parser->pic->pts = pts; + } + + *ret_buf = NULL; + *ret_len = 0; + return inbuf_len; +} + + +/** + * @return 0: NAL is part of coded picture + * 2: NAL is not part of coded picture + * 1: NAL is the beginning of a new coded picture + * 3: NAL is marked as END_OF_SEQUENCE + */ +int parse_nal(uint8_t *buf, int buf_len, struct h264_parser *parser, + struct coded_picture **completed_picture) +{ + int ret = 0; + + struct buf_reader bufr; + + bufr.buf = buf; + bufr.cur_pos = buf; + bufr.cur_offset = 8; + bufr.len = buf_len; + + *completed_picture = NULL; + + struct nal_unit *nal = parse_nal_header(&bufr, parser->pic, parser); + + /** + * we detect the start of a new access unit if + * a non-vcl nal unit is received after a vcl + * nal unit + * NAL_END_OF_SEQUENCE terminates the current + * access unit + */ + if (nal->nal_unit_type >= NAL_SLICE && + nal->nal_unit_type <= NAL_SLICE_IDR) { + parser->position = VCL; + } else if ((parser->position == VCL && + nal->nal_unit_type >= NAL_SEI && + nal->nal_unit_type <= NAL_PPS) || + nal->nal_unit_type == NAL_AU_DELIMITER || + nal->nal_unit_type == NAL_END_OF_SEQUENCE) { + /* start of a new access unit! */ + *completed_picture = parser->pic; + parser->pic = create_coded_picture(); + + if(parser->last_vcl_nal != NULL) { + release_nal_unit(parser->last_vcl_nal); + parser->last_vcl_nal = NULL; + } + parser->position = NON_VCL; + } else { + parser->position = NON_VCL; + } + + switch(nal->nal_unit_type) { + case NAL_SPS: + nal_buffer_append(parser->sps_buffer, nal); + break; + case NAL_PPS: + nal_buffer_append(parser->pps_buffer, nal); + break; + case NAL_SEI: { + if (parser->pic != NULL) { + if(parser->pic->sei_nal) { + release_nal_unit(parser->pic->sei_nal); + } + lock_nal_unit(nal); + parser->pic->sei_nal = nal; + interpret_sei(parser->pic); + } + } + default: + break; + } + + /** + * in case of an access unit which does not contain any + * non-vcl nal units we have to detect the new access + * unit through the algorithm for detecting first vcl nal + * units of a primary coded picture + */ + if (parser->position == VCL && parser->last_vcl_nal != NULL && + nal->nal_unit_type >= NAL_SLICE && nal->nal_unit_type <= NAL_SLICE_IDR) { + /** + * frame boundary detection according to + * ITU-T Rec. H264 (11/2007) chapt 7.4.1.2.4, p65 + */ + struct nal_unit* last_nal = parser->last_vcl_nal; + + if (nal == NULL || last_nal == NULL) { + ret = 1; + } else if (nal->slc.frame_num != last_nal->slc.frame_num) { + ret = 1; + } else if (nal->slc.pic_parameter_set_id + != last_nal->slc.pic_parameter_set_id) { + ret = 1; + } else if (nal->slc.field_pic_flag + != last_nal->slc.field_pic_flag) { + ret = 1; + } else if (nal->slc.bottom_field_flag + != last_nal->slc.bottom_field_flag) { + ret = 1; + } else if (nal->nal_ref_idc != last_nal->nal_ref_idc && + (nal->nal_ref_idc == 0 || last_nal->nal_ref_idc == 0)) { + ret = 1; + } else if (nal->sps.pic_order_cnt_type == 0 + && last_nal->sps.pic_order_cnt_type == 0 + && (nal->slc.pic_order_cnt_lsb != last_nal->slc.pic_order_cnt_lsb + || nal->slc.delta_pic_order_cnt_bottom + != last_nal->slc.delta_pic_order_cnt_bottom)) { + ret = 1; + } else if (nal->sps.pic_order_cnt_type == 1 + && last_nal->sps.pic_order_cnt_type == 1 + && (nal->slc.delta_pic_order_cnt[0] + != last_nal->slc.delta_pic_order_cnt[0] + || nal->slc.delta_pic_order_cnt[1] + != last_nal->slc.delta_pic_order_cnt[1])) { + ret = 1; + } else if (nal->nal_unit_type != last_nal->nal_unit_type && (nal->nal_unit_type + == NAL_SLICE_IDR || last_nal->nal_unit_type == NAL_SLICE_IDR)) { + ret = 1; + } else if (nal->nal_unit_type == NAL_SLICE_IDR + && last_nal->nal_unit_type == NAL_SLICE_IDR && nal->slc.idr_pic_id + != last_nal->slc.idr_pic_id) { + ret = 1; + } + + /* increase the slice_cnt until a new frame is detected */ + if (ret && *completed_picture == NULL) { + *completed_picture = parser->pic; + parser->pic = create_coded_picture(); + } + + } else if (nal->nal_unit_type == NAL_PPS || nal->nal_unit_type == NAL_SPS) { + ret = 2; + } else if (nal->nal_unit_type == NAL_AU_DELIMITER) { + ret = 2; + } else if (nal->nal_unit_type == NAL_END_OF_SEQUENCE) { + ret = 3; + } else if (nal->nal_unit_type >= NAL_SEI) { + ret = 2; + } + + if (parser->pic) { + + if (nal->nal_unit_type == NAL_SLICE_IDR) { + parser->pic->flag_mask |= IDR_PIC; + } + + /* reference flag is only set for slice NALs, + * as PPS/SPS/SEI only references are not relevant + * for the vdpau decoder. + */ + if (nal->nal_ref_idc && + nal->nal_unit_type <= NAL_SLICE_IDR) { + parser->pic->flag_mask |= REFERENCE; + } else if (!nal->nal_ref_idc && + nal->nal_unit_type >= NAL_SLICE && + nal->nal_unit_type <= NAL_PART_C) { + /* remove reference flag if a picture is not + * continously flagged as reference. */ + parser->pic->flag_mask &= ~REFERENCE; + } + + if (nal->nal_unit_type >= NAL_SLICE && + nal->nal_unit_type <= NAL_SLICE_IDR) { + lock_nal_unit(nal); + if(parser->last_vcl_nal) { + release_nal_unit(parser->last_vcl_nal); + } + parser->last_vcl_nal = nal; + + parser->pic->slice_cnt++; + if(parser->pic->slc_nal) { + release_nal_unit(parser->pic->slc_nal); + } + lock_nal_unit(nal); + parser->pic->slc_nal = nal; + + interpret_slice_header(parser, nal); + } + + if (*completed_picture != NULL && + (*completed_picture)->slice_cnt > 0) { + calculate_pic_order(parser, *completed_picture, + &((*completed_picture)->slc_nal->slc)); + interpret_sps(*completed_picture, parser); + interpret_pps(*completed_picture); + } + } + + release_nal_unit(nal); + return ret; +} + +int seek_for_nal(uint8_t *buf, int buf_len, struct h264_parser *parser) +{ + if(buf_len <= 0) + return -1; + + if(parser->nal_size_length > 0) { + if(buf_len < parser->nal_size_length) { + return -1; + } + + uint32_t next_nal = parser->next_nal_position; + if(!next_nal) { + struct buf_reader bufr; + + bufr.buf = buf; + bufr.cur_pos = buf; + bufr.cur_offset = 8; + bufr.len = buf_len; + + next_nal = read_bits(&bufr, parser->nal_size_length*8)+parser->nal_size_length; + } + + if(next_nal > buf_len) { + parser->next_nal_position = next_nal; + return -1; + } else + parser->next_nal_position = 0; + + return next_nal; + } + + /* NAL_END_OF_SEQUENCE has only 1 byte, so + * we do not need to search for the next start sequence */ + if(buf[0] == NAL_END_OF_SEQUENCE) + return 1; + + int i; + for (i = 0; i < buf_len - 2; i++) { + if (buf[i] == 0x00 && buf[i + 1] == 0x00 && buf[i + 2] == 0x01) { + //lprintf("found nal at: %d\n", i); + return i; + } + } + + return -1; +} diff --git a/src/video_dec/libvdpau/h264_parser.h b/src/video_dec/libvdpau/h264_parser.h new file mode 100644 index 000000000..49bc56bab --- /dev/null +++ b/src/video_dec/libvdpau/h264_parser.h @@ -0,0 +1,110 @@ +/* + * Copyright (C) 2008 Julian Scheel + * + * This file is part of xine, a free video player. + * + * xine is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * xine is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110, USA + * + * h264_parser.h: Almost full-features H264 NAL-Parser + */ + +#ifndef NAL_PARSER_H_ +#define NAL_PARSER_H_ + +#include <stdlib.h> + +#include <xine/xine_internal.h> +#include "nal.h" +#include "dpb.h" + +#define MAX_FRAME_SIZE 1024*1024 + +/* specifies wether the parser last parsed + * non-vcl or vcl nal units. depending on + * this the access unit boundaries are detected + */ +enum parser_position { + NON_VCL, + VCL +}; + +enum parser_flags { + CPB_DPB_DELAYS_PRESENT = 0x01, + PIC_STRUCT_PRESENT = 0x02 +}; + +struct h264_parser { + uint8_t buf[MAX_FRAME_SIZE]; + uint32_t buf_len; + + /* prebuf is used to store the currently + * processed nal unit */ + uint8_t prebuf[MAX_FRAME_SIZE]; + uint32_t prebuf_len; + uint32_t next_nal_position; + + uint8_t last_nal_res; + + uint8_t nal_size_length; + uint32_t next_nal_size; + uint8_t *nal_size_length_buf; + uint8_t have_nal_size_length_buf; + + enum parser_position position; + + struct coded_picture *pic; + + struct nal_unit *last_vcl_nal; + struct nal_buffer *sps_buffer; + struct nal_buffer *pps_buffer; + + uint32_t prev_pic_order_cnt_lsb; + uint32_t prev_pic_order_cnt_msb; + uint32_t frame_num_offset; + + int32_t prev_top_field_order_cnt; + + uint32_t curr_pic_num; + + uint16_t flag_mask; + + /* this is dpb used for reference frame + * heading to vdpau + unordered frames + */ + struct dpb *dpb; + + xine_t *xine; +}; + +int parse_nal(uint8_t *buf, int buf_len, struct h264_parser *parser, + struct coded_picture **completed_picture); + +int seek_for_nal(uint8_t *buf, int buf_len, struct h264_parser *parser); + +struct h264_parser* init_parser(xine_t *xine); +void reset_parser(struct h264_parser *parser); +void free_parser(struct h264_parser *parser); +int parse_frame(struct h264_parser *parser, uint8_t *inbuf, int inbuf_len, + int64_t pts, + uint8_t **ret_buf, uint32_t *ret_len, struct coded_picture **ret_pic); + +/* this has to be called after decoding the frame delivered by parse_frame, + * but before adding a decoded frame to the dpb. + */ +void process_mmc_operations(struct h264_parser *parser, struct coded_picture *picture); + +void parse_codec_private(struct h264_parser *parser, uint8_t *inbuf, int inbuf_len); + +#endif diff --git a/src/video_dec/libvdpau/nal.c b/src/video_dec/libvdpau/nal.c new file mode 100644 index 000000000..c3693c5f2 --- /dev/null +++ b/src/video_dec/libvdpau/nal.c @@ -0,0 +1,213 @@ +/* + * Copyright (C) 2008 Julian Scheel + * + * This file is part of xine, a free video player. + * + * xine is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * xine is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110, USA + * + * nal.c: nal-structure utility functions + */ + +#ifdef HAVE_CONFIG_H +#include "config.h" +#endif + +#include <stdio.h> +#include <stdlib.h> +#include <string.h> + +#include "nal.h" +#include <xine/xine_internal.h> + +struct nal_buffer* create_nal_buffer(uint8_t max_size) +{ + struct nal_buffer *nal_buffer = calloc(1, sizeof(struct nal_buffer)); + nal_buffer->max_size = max_size; + + return nal_buffer; +} + +/** + * destroys a nal buffer. all referenced nals are released + */ +void free_nal_buffer(struct nal_buffer *nal_buffer) +{ + struct nal_unit *nal = nal_buffer->first; + + while (nal) { + struct nal_unit *delete = nal; + nal = nal->next; + release_nal_unit(delete); + } + + free(nal_buffer); +} + +/** + * appends a nal unit to the end of the buffer + */ +void nal_buffer_append(struct nal_buffer *nal_buffer, struct nal_unit *nal) +{ + if(nal_buffer->used == nal_buffer->max_size) { + nal_buffer_remove(nal_buffer, nal_buffer->first); + } + + if (nal_buffer->first == NULL) { + nal_buffer->first = nal_buffer->last = nal; + nal->prev = nal->next = NULL; + + lock_nal_unit(nal); + nal_buffer->used++; + } else if (nal_buffer->last != NULL) { + nal_buffer->last->next = nal; + nal->prev = nal_buffer->last; + nal_buffer->last = nal; + + lock_nal_unit(nal); + nal_buffer->used++; + } else { + lprintf("ERR: nal_buffer is in a broken state\n"); + } +} + +void nal_buffer_remove(struct nal_buffer *nal_buffer, struct nal_unit *nal) +{ + if (nal == nal_buffer->first && nal == nal_buffer->last) { + nal_buffer->first = nal_buffer->last = NULL; + } else { + if (nal == nal_buffer->first) { + nal_buffer->first = nal->next; + nal_buffer->first->prev = NULL; + } else { + nal->prev->next = nal->next; + } + + if (nal == nal_buffer->last) { + nal_buffer->last = nal->prev; + nal_buffer->last->next = NULL; + } else { + nal->next->prev = nal->prev; + } + } + + nal->next = nal->prev = NULL; + release_nal_unit(nal); + + nal_buffer->used--; +} + +void nal_buffer_flush(struct nal_buffer *nal_buffer) +{ + while(nal_buffer->used > 0) { + nal_buffer_remove(nal_buffer, nal_buffer->first); + } +} + +/** + * returns the last element in the buffer + */ +struct nal_unit *nal_buffer_get_last(struct nal_buffer *nal_buffer) +{ + return nal_buffer->last; +} + +/** + * get a nal unit from a nal_buffer from it's + * seq parameter_set_id + */ +struct nal_unit* nal_buffer_get_by_sps_id(struct nal_buffer *nal_buffer, + uint32_t seq_parameter_set_id) +{ + struct nal_unit *nal = nal_buffer->last; + + if (nal != NULL) { + do { + if(nal->nal_unit_type == NAL_SPS) { + if(nal->sps.seq_parameter_set_id == seq_parameter_set_id) { + return nal; + } + } + + nal = nal->prev; + } while(nal != NULL); + } + + return NULL; +} + +/** + * get a nal unit from a nal_buffer from it's + * pic parameter_set_id + */ +struct nal_unit* nal_buffer_get_by_pps_id(struct nal_buffer *nal_buffer, + uint32_t pic_parameter_set_id) +{ + struct nal_unit *nal = nal_buffer->last; + + if (nal != NULL) { + do { + if(nal->nal_unit_type == NAL_PPS) { + if(nal->pps.pic_parameter_set_id == pic_parameter_set_id) { + return nal; + } + } + + nal = nal->prev; + } while(nal != NULL); + } + + return NULL; +} + +/** + * create a new nal unit, with a lock_counter of 1 + */ +struct nal_unit* create_nal_unit() +{ + struct nal_unit *nal = calloc(1, sizeof(struct nal_unit)); + nal->lock_counter = 1; + + return nal; +} + +void lock_nal_unit(struct nal_unit *nal) +{ + nal->lock_counter++; +} + +void release_nal_unit(struct nal_unit *nal) +{ + if(!nal) + return; + + nal->lock_counter--; + + if(nal->lock_counter <= 0) { + free(nal); + } +} + +/** + * creates a copy of a nal unit with a single lock + */ +void copy_nal_unit(struct nal_unit *dest, struct nal_unit *src) +{ + /* size without pps, sps and slc units: */ + int size = sizeof(struct nal_unit); + + xine_fast_memcpy(dest, src, size); + dest->lock_counter = 1; + dest->prev = dest->next = NULL; +} diff --git a/src/video_dec/libvdpau/nal.h b/src/video_dec/libvdpau/nal.h new file mode 100644 index 000000000..f40617cd0 --- /dev/null +++ b/src/video_dec/libvdpau/nal.h @@ -0,0 +1,501 @@ +/* + * Copyright (C) 2008 Julian Scheel + * + * This file is part of xine, a free video player. + * + * xine is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * xine is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110, USA + * + * nal.h: H264 NAL structures + */ + +#ifndef NAL_H_ +#define NAL_H_ +#include <stdint.h> +#include <vdpau/vdpau.h> + +enum nal_unit_types +{ + NAL_UNSPECIFIED = 0, + NAL_SLICE, + NAL_PART_A, + NAL_PART_B, + NAL_PART_C, + NAL_SLICE_IDR, + NAL_SEI, + NAL_SPS, + NAL_PPS, + NAL_AU_DELIMITER, + NAL_END_OF_SEQUENCE, + NAL_END_OF_STREAM, + NAL_FILLER_DATA, + NAL_SPS_EXT +}; + +enum pic_struct { + DISP_FRAME = 0, + DISP_TOP, + DISP_BOTTOM, + DISP_TOP_BOTTOM, + DISP_BOTTOM_TOP, + DISP_TOP_BOTTOM_TOP, + DISP_BOTTOM_TOP_BOTTOM, + DISP_FRAME_DOUBLING, + DISP_FRAME_TRIPLING +}; + +enum ct_type { + CT_PROGRESSIVE = 0, + CT_INTERLACED, + CT_UNKNOWN, + CT_RESERVED +}; + +/* slice types repeat from 5-9, we + * need a helper function for comparison + */ +enum slice_types +{ + SLICE_P = 0, SLICE_B, SLICE_I, SLICE_SP, SLICE_SI +}; + +enum aspect_ratio +{ + ASPECT_UNSPECIFIED = 0, + ASPECT_1_1, + ASPECT_12_11, + ASPECT_10_11, + ASPECT_16_11, + ASPECT_40_33, + ASPECT_24_11, + ASPECT_20_11, + ASPECT_32_11, + ASPECT_80_33, + ASPECT_18_11, + ASPECT_15_11, + ASPECT_64_33, + ASPECT_160_99, + ASPECT_4_3, + ASPECT_3_2, + ASPECT_2_1, + ASPECT_RESERVED, + ASPECT_EXTENDED_SAR=255 +}; + +static const uint8_t zigzag_4x4[16] = { + 0+0*4, 1+0*4, 0+1*4, 0+2*4, + 1+1*4, 2+0*4, 3+0*4, 2+1*4, + 1+2*4, 0+3*4, 1+3*4, 2+2*4, + 3+1*4, 3+2*4, 2+3*4, 3+3*4, +}; + +static const uint8_t zigzag_8x8[64] = { + 0+0*8, 1+0*8, 0+1*8, 0+2*8, + 1+1*8, 2+0*8, 3+0*8, 2+1*8, + 1+2*8, 0+3*8, 0+4*8, 1+3*8, + 2+2*8, 3+1*8, 4+0*8, 5+0*8, + 4+1*8, 3+2*8, 2+3*8, 1+4*8, + 0+5*8, 0+6*8, 1+5*8, 2+4*8, + 3+3*8, 4+2*8, 5+1*8, 6+0*8, + 7+0*8, 6+1*8, 5+2*8, 4+3*8, + 3+4*8, 2+5*8, 1+6*8, 0+7*8, + 1+7*8, 2+6*8, 3+5*8, 4+4*8, + 5+3*8, 6+2*8, 7+1*8, 7+2*8, + 6+3*8, 5+4*8, 4+5*8, 3+6*8, + 2+7*8, 3+7*8, 4+6*8, 5+5*8, + 6+4*8, 7+3*8, 7+4*8, 6+5*8, + 5+6*8, 4+7*8, 5+7*8, 6+6*8, + 7+5*8, 7+6*8, 6+7*8, 7+7*8, +}; + +static inline uint32_t slice_type(uint32_t slice_type) +{ + return (slice_type < 10 ? slice_type % 5 : slice_type); +} + +#if 0 +static inline void print_slice_type(uint32_t slice_type) +{ + switch(slice_type) { + case SLICE_P: + printf("SLICE_P\n"); + break; + case SLICE_B: + printf("SLICE_B\n"); + break; + case SLICE_I: + printf("SLICE_I\n"); + break; + case SLICE_SP: + printf("SLICE_SP\n"); + break; + case SLICE_SI: + printf("SLICE_SI\n"); + break; + default: + printf("Unknown SLICE\n"); + } +} +#endif + +struct hrd_parameters +{ + uint32_t cpb_cnt_minus1; + uint8_t bit_rate_scale; + uint8_t cpb_size_scale; + + uint32_t bit_rate_value_minus1[32]; + uint32_t cpb_size_value_minus1[32]; + uint8_t cbr_flag[32]; + + uint8_t initial_cpb_removal_delay_length_minus1; + uint8_t cpb_removal_delay_length_minus1; + uint8_t dpb_output_delay_length_minus1; + uint8_t time_offset_length; +}; + +struct seq_parameter_set_rbsp +{ + uint8_t profile_idc; // 0xff + uint8_t constraint_setN_flag; // 0x0f + uint8_t level_idc; // 0xff + uint32_t seq_parameter_set_id; + uint32_t chroma_format_idc; + uint8_t separate_colour_plane_flag; // 0x01 + uint32_t bit_depth_luma_minus8; + uint32_t bit_depth_chroma_minus8; + uint8_t qpprime_y_zero_transform_bypass_flag; + uint8_t seq_scaling_matrix_present_flag; + + /* if(seq_scaling_matrix_present_flag) */ + uint8_t seq_scaling_list_present_flag[8]; + + uint8_t scaling_lists_4x4[6][16]; + uint8_t scaling_lists_8x8[2][64]; + /* endif */ + + uint32_t log2_max_frame_num_minus4; + uint32_t max_frame_num; + uint32_t pic_order_cnt_type; + // if pic_order_cnt_type==0 + uint32_t log2_max_pic_order_cnt_lsb_minus4; + // else + uint8_t delta_pic_order_always_zero_flag; + int32_t offset_for_non_ref_pic; + int32_t offset_for_top_to_bottom_field; + uint8_t num_ref_frames_in_pic_order_cnt_cycle; + int32_t offset_for_ref_frame[256]; + // TODO: some more ignored here + uint32_t num_ref_frames; + uint8_t gaps_in_frame_num_value_allowed_flag; + /*uint32_t pic_width_in_mbs_minus1; + uint32_t pic_height_in_map_units_minus1;*/ + uint32_t pic_width; + uint32_t pic_height; + uint8_t frame_mbs_only_flag; + uint8_t mb_adaptive_frame_field_flag; + uint8_t direct_8x8_inference_flag; + uint8_t frame_cropping_flag; + uint32_t frame_crop_left_offset; + uint32_t frame_crop_right_offset; + uint32_t frame_crop_top_offset; + uint32_t frame_crop_bottom_offset; + uint8_t vui_parameters_present_flag; + + /* vui_parameters */ + struct + { + uint8_t aspect_ration_info_present_flag; + + /* aspect_ration_info_present_flag == 1 */ + uint8_t aspect_ratio_idc; + uint16_t sar_width; + uint16_t sar_height; + + uint8_t overscan_info_present_flag; + /* overscan_info_present_flag == 1 */ + uint8_t overscan_appropriate_flag; + + uint8_t video_signal_type_present_flag; + /* video_signal_type_present_flag == 1 */ + uint8_t video_format; + uint8_t video_full_range_flag; + uint8_t colour_description_present; + /* colour_description_present == 1 */ + uint8_t colour_primaries; + uint8_t transfer_characteristics; + uint8_t matrix_coefficients; + + uint8_t chroma_loc_info_present_flag; + /* chroma_loc_info_present_flag == 1 */ + uint8_t chroma_sample_loc_type_top_field; + uint8_t chroma_sample_loc_type_bottom_field; + + uint8_t timing_info_present_flag; + /* timing_info_present_flag == 1 */ + uint32_t num_units_in_tick; + uint32_t time_scale; + uint8_t fixed_frame_rate_flag; + + uint8_t nal_hrd_parameters_present_flag; + struct hrd_parameters nal_hrd_parameters; + + uint8_t vc1_hrd_parameters_present_flag; + struct hrd_parameters vc1_hrd_parameters; + + uint8_t low_delay_hrd_flag; + + uint8_t pic_struct_present_flag; + uint8_t bitstream_restriction_flag; + + /* bitstream_restriction_flag == 1 */ + uint8_t motion_vectors_over_pic_boundaries; + uint32_t max_bytes_per_pic_denom; + uint32_t max_bits_per_mb_denom; + uint32_t log2_max_mv_length_horizontal; + uint32_t log2_max_mv_length_vertical; + uint32_t num_reorder_frames; + uint32_t max_dec_frame_buffering; + } vui_parameters; + +}; + +struct pic_parameter_set_rbsp +{ + uint32_t pic_parameter_set_id; + uint32_t seq_parameter_set_id; + uint8_t entropy_coding_mode_flag; + uint8_t pic_order_present_flag; + + uint32_t num_slice_groups_minus1; + + /* num_slice_groups_minus1 > 0 */ + uint32_t slice_group_map_type; + + /* slice_group_map_type == 1 */ + uint32_t run_length_minus1[64]; + + /* slice_group_map_type == 2 */ + uint32_t top_left[64]; + uint32_t bottom_right[64]; + + /* slice_group_map_type == 3,4,5 */ + uint8_t slice_group_change_direction_flag; + uint32_t slice_group_change_rate_minus1; + + /* slice_group_map_type == 6 */ + uint32_t pic_size_in_map_units_minus1; + uint8_t slice_group_id[64]; + + uint32_t num_ref_idx_l0_active_minus1; + uint32_t num_ref_idx_l1_active_minus1; + uint8_t weighted_pred_flag; + uint8_t weighted_bipred_idc; + int32_t pic_init_qp_minus26; + int32_t pic_init_qs_minus26; + int32_t chroma_qp_index_offset; + uint8_t deblocking_filter_control_present_flag; + uint8_t constrained_intra_pred_flag; + uint8_t redundant_pic_cnt_present_flag; + + /* if(more_rbsp_data) */ + uint8_t transform_8x8_mode_flag; + uint8_t pic_scaling_matrix_present_flag; + + /* if(pic_scaling_matrix_present_flag) */ + uint8_t pic_scaling_list_present_flag[8]; + + uint8_t scaling_lists_4x4[6][16]; + uint8_t scaling_lists_8x8[2][64]; + + int32_t second_chroma_qp_index_offset; +}; + +/*struct clock_timestamp { + uint8_t ct_type; + uint8_t nuit_fiel_based_flag; + uint8_t counting_type; + uint8_t full_timestamp_flag; + uint8_t discontinuity_flag; + uint8_t cnt_dropped_flag; + uint8_t n_frames +};*/ + +/* sei contains several additional info, we do + * only care for pic_timing, to handle display + * reordering + */ +struct sei_message +{ + uint32_t payload_type; + uint8_t last_payload_type_byte; + uint32_t payload_size; + uint8_t last_payload_size_byte; + + struct + { + /* cpb_dpb_delays_present_flag == 1 */ + uint8_t cpb_removal_delay; + uint8_t dpb_output_delay; + + uint8_t pic_struct; + uint8_t ct_type : 1; + uint8_t nuit_field_based_flag : 1; + uint8_t counting_type : 5; + uint8_t full_timestamp_flag : 1; + uint8_t discontinuity_flag : 1; + uint8_t cnt_dropped_flag : 1; + uint8_t n_frames; + + uint8_t seconds_value : 6; + uint8_t minutes_value : 6; + uint8_t hours_value : 5; + + int32_t time_offset; + } pic_timing; +}; + +struct slice_header +{ + uint32_t first_mb_in_slice; + uint32_t slice_type; + uint32_t pic_parameter_set_id; + uint8_t colour_plane_id; + uint32_t frame_num; + uint8_t field_pic_flag; + uint8_t bottom_field_flag; + uint32_t idr_pic_id; + + /* sps->pic_order_cnt_type == 0 */ + uint32_t pic_order_cnt_lsb; + int32_t delta_pic_order_cnt_bottom; + /* sps->pic_order_cnt_type == 1 && !sps->delta_pic_order_always_zero_flag */ + int32_t delta_pic_order_cnt[2]; + + /* pps->redundant_pic_cnt_present_flag == 1 */ + int32_t redundant_pic_cnt; + + /* slice_type == B */ + uint8_t direct_spatial_mv_pred_flag; + + /* slice_type == P, SP, B */ + uint8_t num_ref_idx_active_override_flag; + /* num_ref_idx_active_override_flag == 1 */ + uint32_t num_ref_idx_l0_active_minus1; + /* slice type == B */ + uint32_t num_ref_idx_l1_active_minus1; + + /* ref_pic_list_reordering */ + struct + { + /* slice_type != I && slice_type != SI */ + uint8_t ref_pic_list_reordering_flag_l0; + + /* slice_type == B */ + uint8_t ref_pic_list_reordering_flag_l1; + + /* ref_pic_list_reordering_flag_l0 == 1 */ + uint32_t reordering_of_pic_nums_idc; + + /* reordering_of_pic_nums_idc == 0, 1 */ + uint32_t abs_diff_pic_num_minus1; + + /* reordering_of_pic_nums_idc == 2) */ + uint32_t long_term_pic_num; + } ref_pic_list_reordering; + + /* pred_weight_table */ + struct + { + uint32_t luma_log2_weight_denom; + + /* chroma_format_idc != 0 */ + uint32_t chroma_log2_weight_denom; + + int32_t luma_weight_l0[32]; + int32_t luma_offset_l0[32]; + + int32_t chroma_weight_l0[32][2]; + int32_t chroma_offset_l0[32][2]; + + int32_t luma_weight_l1[32]; + int32_t luma_offset_l1[32]; + + int32_t chroma_weight_l1[32][2]; + int32_t chroma_offset_l1[32][2]; + } pred_weight_table; + + /* def_rec_pic_marking */ + struct + { + + /* nal_unit_type == NAL_SLICE_IDR */ + uint8_t no_output_of_prior_pics_flag; + uint8_t long_term_reference_flag; + + /* else */ + uint8_t adaptive_ref_pic_marking_mode_flag; + uint32_t memory_management_control_operation; + + uint32_t difference_of_pic_nums_minus1; + uint32_t long_term_pic_num; + uint32_t long_term_frame_idx; + uint32_t max_long_term_frame_idx_plus1; + } dec_ref_pic_marking[10]; + uint32_t dec_ref_pic_marking_count; +}; + +struct nal_unit { + uint8_t nal_ref_idc; // 0x03 + enum nal_unit_types nal_unit_type; // 0x1f + + //union { + struct sei_message sei; + struct seq_parameter_set_rbsp sps; + struct pic_parameter_set_rbsp pps; + struct slice_header slc; + //}; + + struct nal_unit *prev; + struct nal_unit *next; + + uint32_t lock_counter; +}; + +struct nal_buffer { + struct nal_unit *first; + struct nal_unit *last; + + uint8_t max_size; + uint8_t used; +}; + +struct nal_buffer* create_nal_buffer(uint8_t max_size); +void free_nal_buffer(struct nal_buffer *nal_buffer); +void nal_buffer_append(struct nal_buffer *nal_buffer, struct nal_unit *nal); +void nal_buffer_remove(struct nal_buffer *nal_buffer, struct nal_unit *nal); +void nal_buffer_flush(struct nal_buffer *nal_buffer); + +struct nal_unit* nal_buffer_get_by_sps_id(struct nal_buffer *nal_buffer, + uint32_t seq_parameter_set_id); +struct nal_unit* nal_buffer_get_by_pps_id(struct nal_buffer *nal_buffer, + uint32_t pic_parameter_set_id); +struct nal_unit* nal_buffer_get_last(struct nal_buffer *nal_buffer); + +struct nal_unit* create_nal_unit(void); +void lock_nal_unit(struct nal_unit *nal); +void release_nal_unit(struct nal_unit *nal); +void copy_nal_unit(struct nal_unit *dest, struct nal_unit *src); + +#endif /* NAL_H_ */ diff --git a/src/video_dec/libvdpau/vdpau_h264.c b/src/video_dec/libvdpau/vdpau_h264.c new file mode 100644 index 000000000..25ed62295 --- /dev/null +++ b/src/video_dec/libvdpau/vdpau_h264.c @@ -0,0 +1,1014 @@ +/* + * Copyright (C) 2008 Julian Scheel + * + * kate: space-indent on; indent-width 2; mixedindent off; indent-mode cstyle; remove-trailing-space on; + * + * This file is part of xine, a free video player. + * + * xine is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * xine is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110, USA + * + * vdpau_h264.c: H264 Video Decoder utilizing nvidia VDPAU engine + */ + +#define LOG_MODULE "vdpau_h264" + +#ifdef HAVE_CONFIG_H +#include "config.h" +#endif + +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <sys/types.h> +#include <unistd.h> +#include <vdpau/vdpau.h> + +#include <xine/xine_internal.h> +#include <xine/video_out.h> +#include <xine/buffer.h> +#include <xine/xineutils.h> +#include "bswap.h" +#include "accel_vdpau.h" +#include "h264_parser.h" +#include "dpb.h" +#include "cpb.h" + +//#define DEBUG_H264 + +#define VIDEOBUFSIZE 128*1024 + +typedef struct { + video_decoder_class_t decoder_class; +} vdpau_h264_class_t; + +typedef struct vdpau_h264_decoder_s { + video_decoder_t video_decoder; /* parent video decoder structure */ + + vdpau_h264_class_t *class; + xine_stream_t *stream; + + /* these are traditional variables in a video decoder object */ + uint64_t video_step; /* frame duration in pts units */ + uint64_t reported_video_step; /* frame duration in pts units */ + + int width; /* the width of a video frame */ + int height; /* the height of a video frame */ + double ratio; /* the width to height ratio */ + + + struct h264_parser *nal_parser; /* h264 nal parser. extracts stream data for vdpau */ + + struct decoded_picture *incomplete_pic; + uint32_t last_top_field_order_cnt; + + int have_frame_boundary_marks; + int wait_for_frame_start; + + VdpDecoder decoder; + int decoder_started; + int progressive_cnt; /* count of progressive marked frames in line */ + + VdpColorStandard color_standard; + VdpDecoderProfile profile; + vdpau_accel_t *vdpau_accel; + + xine_t *xine; + + struct coded_picture *completed_pic; + vo_frame_t *dangling_img; + + uint8_t *codec_private; + uint32_t codec_private_len; + + int vdp_runtime_nr; + + int reset; + +} vdpau_h264_decoder_t; + +static void vdpau_h264_reset (video_decoder_t *this_gen); +static void vdpau_h264_flush (video_decoder_t *this_gen); + +/************************************************************************** + * vdpau_h264 specific decode functions + *************************************************************************/ + +/************************************************************************** + * xine video plugin functions + *************************************************************************/ + +#ifdef DEBUG_H264 +static inline void dump_pictureinfo_h264(VdpPictureInfoH264 *pic) +{ + printf("C: slice_count: %d\n", pic->slice_count); + printf("C: field_order_cnt[0]: %d\n", pic->field_order_cnt[0]); + printf("C: field_order_cnt[1]: %d\n", pic->field_order_cnt[1]); + printf("C: is_reference: %d\n", pic->is_reference); + printf("C: frame_num: %d\n", pic->frame_num); + printf("C: field_pic_flag: %d\n", pic->field_pic_flag); + printf("C: bottom_field_flag: %d\n", pic->bottom_field_flag); + printf("C: num_ref_frames: %d\n", pic->num_ref_frames); + printf("C: mb_adaptive_frame_field_flag: %d\n", pic->mb_adaptive_frame_field_flag); + printf("C: constrained_intra_pred_flag: %d\n", pic->constrained_intra_pred_flag); + printf("C: weighted_pred_flag: %d\n", pic->weighted_pred_flag); + printf("C: weighted_bipred_idc: %d\n", pic->weighted_bipred_idc); + printf("C: frame_mbs_only_flag: %d\n", pic->frame_mbs_only_flag); + printf("C: transform_8x8_mode_flag: %d\n", pic->transform_8x8_mode_flag); + printf("C: chroma_qp_index_offset: %d\n", pic->chroma_qp_index_offset); + printf("C: second_chroma_qp_index_offset: %d\n", pic->second_chroma_qp_index_offset); + printf("C: pic_init_qp_minus26: %d\n", pic->pic_init_qp_minus26); + printf("C: num_ref_idx_l0_active_minus1: %d\n", pic->num_ref_idx_l0_active_minus1); + printf("C: num_ref_idx_l1_active_minus1: %d\n", pic->num_ref_idx_l1_active_minus1); + printf("C: log2_max_frame_num_minus4: %d\n", pic->log2_max_frame_num_minus4); + printf("C: pic_order_cnt_type: %d\n", pic->pic_order_cnt_type); + printf("C: log2_max_pic_order_cnt_lsb_minus4: %d\n", pic->log2_max_pic_order_cnt_lsb_minus4); + printf("C: delta_pic_order_always_zero_flag: %d\n", pic->delta_pic_order_always_zero_flag); + printf("C: direct_8x8_inference_flag: %d\n", pic->direct_8x8_inference_flag); + printf("C: entropy_coding_mode_flag: %d\n", pic->entropy_coding_mode_flag); + printf("C: pic_order_present_flag: %d\n", pic->pic_order_present_flag); + printf("C: deblocking_filter_control_present_flag: %d\n", pic->deblocking_filter_control_present_flag); + printf("C: redundant_pic_cnt_present_flag: %d\n", pic->redundant_pic_cnt_present_flag); + + int i, j; + for(i = 0; i < 6; i++) { + printf("C: scalint_list4x4[%d]:\nC:", i); + for(j = 0; j < 16; j++) { + printf(" [%d]", pic->scaling_lists_4x4[i][j]); + if(j%8 == 0) + printf("\nC:"); + } + printf("C: \n"); + } + for(i = 0; i < 2; i++) { + printf("C: scalint_list8x8[%d]:\nC:", i); + for(j = 0; j < 64; j++) { + printf(" [%d] ", pic->scaling_lists_8x8[i][j]); + if(j%8 == 0) + printf("\nC:"); + } + printf("C: \n"); + } + + //int i; + for(i = 0; i < 16; i++) { + if(pic->referenceFrames[i].surface != VDP_INVALID_HANDLE) { + printf("C: -------------------\n"); + printf("C: Reference Frame %d:\n", i); + printf("C: frame_idx: %d\n", pic->referenceFrames[i].frame_idx); + printf("C: field_order_cnt[0]: %d\n", pic->referenceFrames[i].field_order_cnt[0]); + printf("C: field_order_cnt[1]: %d\n", pic->referenceFrames[i].field_order_cnt[0]); + printf("C: is_long_term: %d\n", pic->referenceFrames[i].is_long_term); + printf("C: top_is_reference: %d\n", pic->referenceFrames[i].top_is_reference); + printf("C: bottom_is_reference: %d\n", pic->referenceFrames[i].bottom_is_reference); + } + } + printf("C: ---------------------------------------------------------------\n"); + /*memcpy(pic.scaling_lists_4x4, pps->scaling_lists_4x4, 6*16); + memcpy(pic.scaling_lists_8x8, pps->scaling_lists_8x8, 2*64); + memcpy(pic.referenceFrames, this->reference_frames, sizeof(this->reference_frames));*/ + +} +#endif + +static void set_ratio(video_decoder_t *this_gen) +{ + vdpau_h264_decoder_t *this = (vdpau_h264_decoder_t *)this_gen; + + this->ratio = (double)this->width / (double)this->height; + if(this->completed_pic->sps_nal->sps.vui_parameters.aspect_ration_info_present_flag) { + switch(this->completed_pic->sps_nal->sps.vui_parameters.aspect_ratio_idc) { + case ASPECT_1_1: + this->ratio = 1 * this->ratio; + break; + case ASPECT_12_11: + this->ratio *= 12.0/11.0; + break; + case ASPECT_10_11: + this->ratio *= 10.0/11.0; + break; + case ASPECT_16_11: + this->ratio *= 16.0/11.0; + break; + case ASPECT_40_33: + this->ratio *= 40.0/33.0; + break; + case ASPECT_24_11: + this->ratio *= 24.0/11.0; + break; + case ASPECT_20_11: + this->ratio *= 20.0/11.0; + break; + case ASPECT_32_11: + this->ratio *= 32.0/11.0; + break; + case ASPECT_80_33: + this->ratio *= 80.0/33.0; + break; + case ASPECT_18_11: + this->ratio *= 18.0/11.0; + break; + case ASPECT_15_11: + this->ratio *= 15.0/11.0; + break; + case ASPECT_64_33: + this->ratio *= 64.0/33.0; + break; + case ASPECT_160_99: + this->ratio *= 160.0/99.0; + break; + case ASPECT_4_3: + this->ratio *= 4.0/3.0; + break; + case ASPECT_3_2: + this->ratio *= 3.0/2.0; + break; + case ASPECT_2_1: + this->ratio *= 2.0/1.0; + break; + case ASPECT_EXTENDED_SAR: + this->ratio *= + (double)this->completed_pic->sps_nal->sps.vui_parameters.sar_width/ + (double)this->completed_pic->sps_nal->sps.vui_parameters.sar_height; + break; + } + } +} + +static void fill_vdpau_pictureinfo_h264(video_decoder_t *this_gen, uint32_t slice_count, VdpPictureInfoH264 *pic) +{ + vdpau_h264_decoder_t *this = (vdpau_h264_decoder_t *)this_gen; + + struct pic_parameter_set_rbsp *pps = &this->completed_pic->pps_nal->pps; + struct seq_parameter_set_rbsp *sps = &this->completed_pic->sps_nal->sps; + struct slice_header *slc = &this->completed_pic->slc_nal->slc; + + pic->slice_count = slice_count; + pic->field_order_cnt[0] = this->completed_pic->top_field_order_cnt; + pic->field_order_cnt[1] = this->completed_pic->bottom_field_order_cnt; + pic->is_reference = + (this->completed_pic->flag_mask & REFERENCE) ? VDP_TRUE : VDP_FALSE; + pic->frame_num = slc->frame_num; + pic->field_pic_flag = slc->field_pic_flag; + pic->bottom_field_flag = slc->bottom_field_flag; + pic->num_ref_frames = sps->num_ref_frames; + pic->mb_adaptive_frame_field_flag = sps->mb_adaptive_frame_field_flag && !slc->field_pic_flag; + pic->constrained_intra_pred_flag = pps->constrained_intra_pred_flag; + pic->weighted_pred_flag = pps->weighted_pred_flag; + pic->weighted_bipred_idc = pps->weighted_bipred_idc; + pic->frame_mbs_only_flag = sps->frame_mbs_only_flag; + pic->transform_8x8_mode_flag = pps->transform_8x8_mode_flag; + pic->chroma_qp_index_offset = pps->chroma_qp_index_offset; + pic->second_chroma_qp_index_offset = pps->second_chroma_qp_index_offset; + pic->pic_init_qp_minus26 = pps->pic_init_qp_minus26; + pic->num_ref_idx_l0_active_minus1 = pps->num_ref_idx_l0_active_minus1; + pic->num_ref_idx_l1_active_minus1 = pps->num_ref_idx_l1_active_minus1; + pic->log2_max_frame_num_minus4 = sps->log2_max_frame_num_minus4; + pic->pic_order_cnt_type = sps->pic_order_cnt_type; + pic->log2_max_pic_order_cnt_lsb_minus4 = sps->log2_max_pic_order_cnt_lsb_minus4; + pic->delta_pic_order_always_zero_flag = sps->delta_pic_order_always_zero_flag; + pic->direct_8x8_inference_flag = sps->direct_8x8_inference_flag; + pic->entropy_coding_mode_flag = pps->entropy_coding_mode_flag; + pic->pic_order_present_flag = pps->pic_order_present_flag; + pic->deblocking_filter_control_present_flag = pps->deblocking_filter_control_present_flag; + pic->redundant_pic_cnt_present_flag = pps->redundant_pic_cnt_present_flag; + + memcpy(pic->scaling_lists_4x4, pps->scaling_lists_4x4, sizeof(pic->scaling_lists_4x4)); + memcpy(pic->scaling_lists_8x8, pps->scaling_lists_8x8, sizeof(pic->scaling_lists_8x8)); + + /* set num_ref_frames to the number of actually available reference frames, + * if this is not set generation 3 decoders will fail. */ + /*pic->num_ref_frames =*/ + fill_vdpau_reference_list(this->nal_parser->dpb, pic->referenceFrames); + +} + +static int check_progressive(video_decoder_t *this_gen, struct decoded_picture *dpic) +{ + vdpau_h264_decoder_t *this = (vdpau_h264_decoder_t *)this_gen; + int progressive = 0; + int i; + + for(i = 0; i < 2; i++) { + struct coded_picture *pic = dpic->coded_pic[i]; + if (!pic) { + continue; + } + + if (pic->flag_mask & PIC_STRUCT_PRESENT && pic->sei_nal != NULL) { + uint8_t pic_struct = pic->sei_nal->sei.pic_timing.pic_struct; + + if (pic_struct == DISP_FRAME) { + progressive = 1; + continue; + } else if (pic_struct == DISP_TOP_BOTTOM || + pic_struct == DISP_BOTTOM_TOP) { + progressive = 0; + break; + } + + /* FIXME: seems unreliable, maybe it's has to be interpreted more complex */ + /*if (pic->sei_nal->sei.pic_timing.ct_type == CT_INTERLACED) { + return 0; + } else if (pic->sei_nal->sei.pic_timing.ct_type == CT_PROGRESSIVE) { + return 1; + } */ + } + + if (pic->slc_nal->slc.field_pic_flag && pic->pps_nal->pps.pic_order_present_flag) { + if(pic->slc_nal->slc.delta_pic_order_cnt_bottom == 1 || + pic->slc_nal->slc.delta_pic_order_cnt_bottom == -1) { + progressive = 0; + break; + } else { + progressive = 1; + continue; + } + } + if (!pic->slc_nal->slc.field_pic_flag && pic->sps_nal->sps.frame_mbs_only_flag) { + progressive = 1; + continue; + } + } + + if (progressive) { + this->progressive_cnt++; + } else { + this->progressive_cnt = 0; + } + + /* only switch to progressive mode if at least 5 + * frames in order were marked as progressive */ + return (this->progressive_cnt >= 5); +} + +static int vdpau_decoder_init(video_decoder_t *this_gen) +{ + vdpau_h264_decoder_t *this = (vdpau_h264_decoder_t *)this_gen; + vo_frame_t *img; + + if(this->width == 0) { + this->width = this->completed_pic->sps_nal->sps.pic_width; + this->height = this->completed_pic->sps_nal->sps.pic_height; + } + + set_ratio(this_gen); + + _x_stream_info_set( this->stream, XINE_STREAM_INFO_VIDEO_WIDTH, this->width ); + _x_stream_info_set( this->stream, XINE_STREAM_INFO_VIDEO_HEIGHT, this->height ); + _x_stream_info_set( this->stream, XINE_STREAM_INFO_VIDEO_RATIO, ((double)10000*this->ratio) ); + _x_stream_info_set( this->stream, XINE_STREAM_INFO_FRAME_DURATION, (this->reported_video_step = this->video_step) ); + _x_meta_info_set_utf8( this->stream, XINE_META_INFO_VIDEOCODEC, "H264/AVC (vdpau)" ); + xine_event_t event; + xine_format_change_data_t data; + event.type = XINE_EVENT_FRAME_FORMAT_CHANGE; + event.stream = this->stream; + event.data = &data; + event.data_length = sizeof(data); + data.width = this->width; + data.height = this->height; + data.aspect = this->ratio; + xine_event_send( this->stream, &event ); + + switch(this->completed_pic->sps_nal->sps.profile_idc) { + case 100: + this->profile = VDP_DECODER_PROFILE_H264_HIGH; + break; + case 77: + this->profile = VDP_DECODER_PROFILE_H264_MAIN; + break; + case 66: + default: + // nvidia's VDPAU doesn't support BASELINE. But most (every?) streams marked BASELINE do not use BASELINE specifics, + // so, just force MAIN. + //this->profile = VDP_DECODER_PROFILE_H264_BASELINE; + this->profile = VDP_DECODER_PROFILE_H264_MAIN; + break; + } + + // Level 4.1 limits: + int ref_frames = 0; + if(this->completed_pic->sps_nal->sps.num_ref_frames) { + ref_frames = this->completed_pic->sps_nal->sps.num_ref_frames; + } else { + uint32_t round_width = (this->width + 15) & ~15; + uint32_t round_height = (this->height + 15) & ~15; + uint32_t surf_size = (round_width * round_height * 3) / 2; + ref_frames = (12 * 1024 * 1024) / surf_size; + } + + if (ref_frames > 16) { + ref_frames = 16; + } + + xprintf(this->xine, XINE_VERBOSITY_LOG, "Allocate %d reference frames\n", + ref_frames); + /* get the vdpau context from vo */ + //(this->stream->video_out->open) (this->stream->video_out, this->stream); + img = this->stream->video_out->get_frame (this->stream->video_out, + this->width, this->height, + this->ratio, + XINE_IMGFMT_VDPAU, VO_BOTH_FIELDS | this->reset); + this->reset = 0; + + this->vdpau_accel = (vdpau_accel_t*)img->accel_data; + + img->free(img); + img = NULL; + + /*VdpBool is_supported; + uint32_t max_level, max_references, max_width, max_height;*/ + if(this->vdpau_accel->vdp_runtime_nr > 0) { + xprintf(this->xine, XINE_VERBOSITY_LOG, + "Create decoder: vdp_device: %d, profile: %d, res: %dx%d\n", + this->vdpau_accel->vdp_device, this->profile, this->width, this->height); + + VdpStatus status = this->vdpau_accel->vdp_decoder_create(this->vdpau_accel->vdp_device, + this->profile, this->width, this->height, 16, &this->decoder); + + if(status != VDP_STATUS_OK) { + xprintf(this->xine, XINE_VERBOSITY_LOG, "vdpau_h264: ERROR: VdpDecoderCreate returned status != OK (%s)\n", this->vdpau_accel->vdp_get_error_string(status)); + return 0; + } + } + return 1; +} + +static void draw_frames(video_decoder_t *this_gen, int flush) +{ + vdpau_h264_decoder_t *this = (vdpau_h264_decoder_t *)this_gen; + + struct decoded_picture *decoded_pic = NULL; + while ((decoded_pic = dpb_get_next_out_picture(this->nal_parser->dpb, flush)) != NULL) { + decoded_pic->img->top_field_first = dp_top_field_first(decoded_pic); + decoded_pic->img->progressive_frame = check_progressive(this_gen, decoded_pic); +#ifdef DEBUG_H264 + printf("progressive: %d\n", decoded_pic->img->progressive_frame); +#endif + if (flush) { + xprintf(this->xine, XINE_VERBOSITY_DEBUG, + "h264 flush, draw pts: %"PRId64"\n", decoded_pic->img->pts); + } + + decoded_pic->img->draw(decoded_pic->img, this->stream); + dpb_unmark_picture_delayed(this->nal_parser->dpb, decoded_pic); + decoded_pic = NULL; + } +} + +static int vdpau_decoder_render(video_decoder_t *this_gen, VdpBitstreamBuffer *vdp_buffer, uint32_t slice_count) +{ + vdpau_h264_decoder_t *this = (vdpau_h264_decoder_t *)this_gen; + vo_frame_t *img = NULL; + + /* if we wait for a second field for this frame, we + * have to render to the same surface again. + */ + if (this->incomplete_pic) { + img = this->incomplete_pic->img; + } + + // FIXME: what is if this is the second field of a field coded + // picture? - should we keep the first field in dpb? + if(this->completed_pic->flag_mask & IDR_PIC) { + dpb_flush(this->nal_parser->dpb); + if(this->incomplete_pic) { + release_decoded_picture(this->incomplete_pic); + this->incomplete_pic = NULL; + } + } + + struct seq_parameter_set_rbsp *sps = &this->completed_pic->sps_nal->sps; + struct slice_header *slc = &this->completed_pic->slc_nal->slc; + + if(sps->vui_parameters_present_flag && + sps->vui_parameters.timing_info_present_flag && + this->video_step == 0) { + this->video_step = 2*90000/(1/((double)sps->vui_parameters.num_units_in_tick/(double)sps->vui_parameters.time_scale)); + } + + /* go and decode a frame */ + + /* check if we expect a second field, but got a frame */ + if (this->incomplete_pic && img) { + if ((this->completed_pic->slc_nal->slc.frame_num != + this->incomplete_pic->coded_pic[0]->slc_nal->slc.frame_num) || + !slc->field_pic_flag) { + xprintf(this->xine, XINE_VERBOSITY_DEBUG, "H264 warning: Expected a second field, stream might be broken\n"); + + /* remove this pic from dpb, as it is not complete */ + dpb_unmark_picture_delayed(this->nal_parser->dpb, this->incomplete_pic); + dpb_unmark_reference_picture(this->nal_parser->dpb, this->incomplete_pic); + + release_decoded_picture(this->incomplete_pic); + this->incomplete_pic = NULL; + img = NULL; + } + } + + + VdpPictureInfoH264 pic; + + fill_vdpau_pictureinfo_h264(this_gen, slice_count, &pic); + +#ifdef DEBUG_H264 + dump_pictureinfo_h264(&pic); + + int i; + printf("E: Bytes used: %d\n", vdp_buffer->bitstream_bytes); + printf("E: Decode data: \nE:"); + for(i = 0; i < ((vdp_buffer->bitstream_bytes < 20) ? vdp_buffer->bitstream_bytes : 20); i++) { + printf("%02x ", ((uint8_t*)vdp_buffer->bitstream)[i]); + if((i+1) % 10 == 0) + printf("\nE:"); + } + printf("\n...\n"); + for(i = vdp_buffer->bitstream_bytes - 20; i < vdp_buffer->bitstream_bytes; i++) { + printf("%02x ", ((uint8_t*)vdp_buffer->bitstream)[i]); + if((i+1) % 10 == 0) + printf("\nE:"); + } + printf("\nE: ---------------------------------------------------------------\n"); +#endif + + if(!this->decoder_started && !pic.is_reference) + return 0; + + this->decoder_started = 1; + + if(img == NULL) { + img = this->stream->video_out->get_frame (this->stream->video_out, + this->width, this->height, + this->ratio, + XINE_IMGFMT_VDPAU, VO_BOTH_FIELDS); + this->vdpau_accel = (vdpau_accel_t*)img->accel_data; + + img->duration = this->video_step; + img->pts = this->completed_pic->pts; + + if (this->dangling_img) { + xprintf(this->xine, XINE_VERBOSITY_LOG, + "broken stream: current img wasn't processed -- freeing it!\n"); + this->dangling_img->free(this->dangling_img); + } + this->dangling_img = img; + } else { + if (img->pts == 0) { + img->pts = this->completed_pic->pts; + } + } + + if(this->vdp_runtime_nr != *(this->vdpau_accel->current_vdp_runtime_nr)) { + xprintf(this->xine, XINE_VERBOSITY_LOG, + "VDPAU was preempted. Reinitialise the decoder.\n"); + this->decoder = VDP_INVALID_HANDLE; + vdpau_h264_reset(this_gen); + this->vdp_runtime_nr = this->vdpau_accel->vdp_runtime_nr; + return 0; + } + + VdpVideoSurface surface = this->vdpau_accel->surface; + + /*xprintf(this->xine, XINE_VERBOSITY_DEBUG, + "Decode: NUM: %d, REF: %d, BYTES: %d, PTS: %lld\n", pic.frame_num, pic.is_reference, vdp_buffer->bitstream_bytes, this->completed_pic->pts);*/ + VdpStatus status = this->vdpau_accel->vdp_decoder_render(this->decoder, + surface, (VdpPictureInfo*)&pic, 1, vdp_buffer); + + /* free the image data */ + if(((uint8_t*)vdp_buffer->bitstream) != NULL) { + free((uint8_t*)vdp_buffer->bitstream); + } + + process_mmc_operations(this->nal_parser, this->completed_pic); + + if(status != VDP_STATUS_OK) + { + xprintf(this->xine, XINE_VERBOSITY_LOG, "vdpau_h264: Decoder failure: %s\n", this->vdpau_accel->vdp_get_error_string(status)); + if (this->dangling_img) + this->dangling_img->free(this->dangling_img); + img = NULL; + this->dangling_img = NULL; + free_coded_picture(this->completed_pic); + this->completed_pic = NULL; + } + else { + img->bad_frame = 0; + + if(!img->progressive_frame && this->completed_pic->repeat_pic) + img->repeat_first_field = 1; + //else if(img->progressive_frame && this->nal_parser->current_nal->repeat_pic) + // img->duration *= this->nal_parser->current_nal->repeat_pic; + + /* only bt601 and bt701 handled so far. others seem to be rarely used */ + if(sps->vui_parameters.colour_description_present) { + switch (sps->vui_parameters.colour_primaries) { + case 1: + this->color_standard = VDP_COLOR_STANDARD_ITUR_BT_709; + break; + case 5: + case 6: + default: + this->color_standard = VDP_COLOR_STANDARD_ITUR_BT_601; + break; + } + } + + this->vdpau_accel->color_standard = this->color_standard; + + struct decoded_picture *decoded_pic = NULL; + + + uint8_t draw_frame = 0; + if (!slc->field_pic_flag) { /* frame coded: simply add to dpb */ + decoded_pic = init_decoded_picture(this->completed_pic, img); + this->completed_pic = NULL; + this->dangling_img = NULL; + + dpb_add_picture(this->nal_parser->dpb, decoded_pic, sps->num_ref_frames); + + draw_frame = 1; + } else { /* field coded: check for second field */ + if (!this->incomplete_pic) { + decoded_pic = init_decoded_picture(this->completed_pic, img); + this->completed_pic = NULL; + this->dangling_img = NULL; + this->incomplete_pic = decoded_pic; + lock_decoded_picture(this->incomplete_pic); + + dpb_add_picture(this->nal_parser->dpb, decoded_pic, sps->num_ref_frames); + + /* don't do a draw yet as the field was incomplete */ + draw_frame = 0; + } else { + decoded_pic = this->incomplete_pic; + lock_decoded_picture(decoded_pic); + + /* picture is complete now */ + release_decoded_picture(this->incomplete_pic); + this->incomplete_pic = NULL; + this->dangling_img = NULL; + + decoded_pic_add_field(decoded_pic, this->completed_pic); + this->completed_pic = NULL; + + draw_frame = 1; + } + } + + release_decoded_picture(decoded_pic); + + /* draw the next frame in display order */ + if (draw_frame) { + draw_frames(this_gen, 0); + } + } + + return 1; +} + +/* + * This function receives a buffer of data from the demuxer layer and + * figures out how to handle it based on its header flags. + */ +static void vdpau_h264_decode_data (video_decoder_t *this_gen, + buf_element_t *buf) { + + vdpau_h264_decoder_t *this = (vdpau_h264_decoder_t *) this_gen; + + VdpBitstreamBuffer vdp_buffer; + vdp_buffer.struct_version = VDP_BITSTREAM_BUFFER_VERSION; + + /* a video decoder does not care about this flag (?) */ + if (buf->decoder_flags & BUF_FLAG_PREVIEW) + return; + + if(buf->decoder_flags & BUF_FLAG_FRAME_START || buf->decoder_flags & BUF_FLAG_FRAME_END) + this->have_frame_boundary_marks = 1; + + if (buf->decoder_flags & BUF_FLAG_FRAMERATE) { + this->video_step = buf->decoder_info[0]; + _x_stream_info_set(this->stream, XINE_STREAM_INFO_FRAME_DURATION, this->video_step); + } + + if (this->video_step != this->reported_video_step){ + _x_stream_info_set(this->stream, XINE_STREAM_INFO_FRAME_DURATION, (this->reported_video_step = this->video_step)); + } + + if (buf->decoder_flags & BUF_FLAG_STDHEADER) { /* need to initialize */ + this->have_frame_boundary_marks = 0; + + xine_bmiheader *bih = (xine_bmiheader*)buf->content; + this->width = bih->biWidth; + this->height = bih->biHeight; + + uint8_t *codec_private = buf->content + sizeof(xine_bmiheader); + uint32_t codec_private_len = bih->biSize - sizeof(xine_bmiheader); + this->codec_private_len = codec_private_len; + this->codec_private = malloc(codec_private_len); + memcpy(this->codec_private, codec_private, codec_private_len); + + if(codec_private_len > 0) { + parse_codec_private(this->nal_parser, codec_private, codec_private_len); + } + } else if (buf->decoder_flags & BUF_FLAG_SPECIAL) { + this->have_frame_boundary_marks = 0; + + if(buf->decoder_info[1] == BUF_SPECIAL_DECODER_CONFIG) { + uint8_t *codec_private = buf->decoder_info_ptr[2]; + uint32_t codec_private_len = buf->decoder_info[2]; + this->codec_private_len = codec_private_len; + this->codec_private = malloc(codec_private_len); + memcpy(this->codec_private, codec_private, codec_private_len); + + if(codec_private_len > 0) { + parse_codec_private(this->nal_parser, codec_private, codec_private_len); + } + } else if (buf->decoder_info[1] == BUF_SPECIAL_PALETTE) { + xprintf(this->xine, XINE_VERBOSITY_LOG, + "SPECIAL PALETTE is not yet handled\n"); + } else + xprintf(this->xine, XINE_VERBOSITY_LOG, + "UNKNOWN SPECIAL HEADER\n"); + + } else { + /* parse the first nal packages to retrieve profile type */ + int len = 0; + + while(len < buf->size && !(this->wait_for_frame_start && !(buf->decoder_flags & BUF_FLAG_FRAME_START))) { + this->wait_for_frame_start = 0; + len += parse_frame(this->nal_parser, buf->content + len, buf->size - len, + buf->pts, + (uint8_t**)&vdp_buffer.bitstream, &vdp_buffer.bitstream_bytes, &this->completed_pic); + + if(this->decoder == VDP_INVALID_HANDLE && + this->completed_pic && + this->completed_pic->sps_nal != NULL && + this->completed_pic->sps_nal->sps.pic_width > 0 && + this->completed_pic->sps_nal->sps.pic_height > 0) { + + vdpau_decoder_init(this_gen); + } + + if(this->completed_pic && + this->completed_pic->sps_nal != NULL && + this->completed_pic->sps_nal->sps.vui_parameters_present_flag && + this->completed_pic->sps_nal->sps.vui_parameters.bitstream_restriction_flag) { + + this->nal_parser->dpb->max_reorder_frames = + this->completed_pic->sps_nal->sps.vui_parameters.num_reorder_frames + 1; + this->nal_parser->dpb->max_dpb_frames = this->completed_pic->sps_nal->sps.vui_parameters.max_dec_frame_buffering + 1; + + xprintf(this->xine, XINE_VERBOSITY_DEBUG, + "max reorder count: %d, max dpb count %d\n", + this->nal_parser->dpb->max_reorder_frames, + this->nal_parser->dpb->max_dpb_frames); + } + + if(this->decoder != VDP_INVALID_HANDLE && + vdp_buffer.bitstream_bytes > 0 && + this->completed_pic->slc_nal != NULL && + this->completed_pic->pps_nal != NULL) { + vdpau_decoder_render(this_gen, &vdp_buffer, this->completed_pic->slice_cnt); + } else if (this->completed_pic != NULL) { + free_coded_picture(this->completed_pic); + } + + /* in case the last nal was detected as END_OF_SEQUENCE + * we will flush the dpb, so that all pictures get drawn + */ + if(this->nal_parser->last_nal_res == 3) { + xprintf(this->xine, XINE_VERBOSITY_DEBUG, + "END_OF_SEQUENCE, flush buffers\n"); + vdpau_h264_flush(this_gen); + } + } + } + + if(buf->decoder_flags & BUF_FLAG_FRAME_END) + this->wait_for_frame_start = 0; +} + +/* + * This function is called when xine needs to flush the system. + */ +static void vdpau_h264_flush (video_decoder_t *this_gen) { + vdpau_h264_decoder_t *this = (vdpau_h264_decoder_t*) this_gen; + struct decoded_picture *decoded_pic = NULL; + + if(this->dangling_img){ + this->dangling_img->free(this->dangling_img); + this->dangling_img = NULL; + } + + if (this->incomplete_pic) { + release_decoded_picture(this->incomplete_pic); + this->incomplete_pic = NULL; + } + + draw_frames(this_gen, 1); + dpb_free_all(this->nal_parser->dpb); + this->reset = VO_NEW_SEQUENCE_FLAG; +} + +/* + * This function resets the video decoder. + */ +static void vdpau_h264_reset (video_decoder_t *this_gen) { + vdpau_h264_decoder_t *this = (vdpau_h264_decoder_t *) this_gen; + + dpb_free_all(this->nal_parser->dpb); + + if (this->decoder != VDP_INVALID_HANDLE) { + this->vdpau_accel->vdp_decoder_destroy( this->decoder ); + this->decoder = VDP_INVALID_HANDLE; + } + + // Doing a full parser reinit here works more reliable than + // resetting + + //reset_parser(this->nal_parser); + free_parser(this->nal_parser); + this->nal_parser = init_parser(this->xine); + + this->color_standard = VDP_COLOR_STANDARD_ITUR_BT_601; + this->video_step = 0; + + if(this->codec_private_len > 0) { + parse_codec_private(this->nal_parser, this->codec_private, this->codec_private_len); + + /* if the stream does not contain frame boundary marks we + * have to hope that the next nal will start with the next + * incoming buf... seems to work, though... + */ + this->wait_for_frame_start = this->have_frame_boundary_marks; + } + + if (this->incomplete_pic) { + release_decoded_picture(this->incomplete_pic); + this->incomplete_pic = NULL; + } + + if (this->dangling_img) { + this->dangling_img->free(this->dangling_img); + this->dangling_img = NULL; + } + + this->progressive_cnt = 0; + this->reset = VO_NEW_SEQUENCE_FLAG; +} + +/* + * The decoder should forget any stored pts values here. + */ +static void vdpau_h264_discontinuity (video_decoder_t *this_gen) { + vdpau_h264_decoder_t *this = (vdpau_h264_decoder_t *) this_gen; + + dpb_clear_all_pts(this->nal_parser->dpb); + this->reset = VO_NEW_SEQUENCE_FLAG; +} + +/* + * This function frees the video decoder instance allocated to the decoder. + */ +static void vdpau_h264_dispose (video_decoder_t *this_gen) { + + vdpau_h264_decoder_t *this = (vdpau_h264_decoder_t *) this_gen; + + if (this->incomplete_pic) { + release_decoded_picture(this->incomplete_pic); + this->incomplete_pic = NULL; + } + + if (this->dangling_img) { + this->dangling_img->free(this->dangling_img); + this->dangling_img = NULL; + } + + dpb_free_all(this->nal_parser->dpb); + + if (this->decoder != VDP_INVALID_HANDLE) { + this->vdpau_accel->vdp_decoder_destroy( this->decoder ); + this->decoder = VDP_INVALID_HANDLE; + } + + this->stream->video_out->close( this->stream->video_out, this->stream ); + + free_parser (this->nal_parser); + free (this_gen); +} + +/* + * This function allocates, initializes, and returns a private video + * decoder structure. + */ +static video_decoder_t *open_plugin (video_decoder_class_t *class_gen, xine_stream_t *stream) { + + vdpau_h264_decoder_t *this ; + + /* the videoout must be vdpau-capable to support this decoder */ + if ( !(stream->video_driver->get_capabilities(stream->video_driver) & VO_CAP_VDPAU_H264) ) + return NULL; + + /* now check if vdpau has free decoder resource */ + vo_frame_t *img = stream->video_out->get_frame( stream->video_out, 1920, 1080, 1, XINE_IMGFMT_VDPAU, VO_BOTH_FIELDS ); + vdpau_accel_t *accel = (vdpau_accel_t*)img->accel_data; + int runtime_nr = accel->vdp_runtime_nr; + img->free(img); + VdpDecoder decoder; + VdpStatus st = accel->vdp_decoder_create( accel->vdp_device, VDP_DECODER_PROFILE_H264_MAIN, 1920, 1080, 16, &decoder ); + if ( st!=VDP_STATUS_OK ) { + lprintf( "can't create vdpau decoder.\n" ); + return NULL; + } + + accel->vdp_decoder_destroy( decoder ); + + this = (vdpau_h264_decoder_t *) calloc(1, sizeof(vdpau_h264_decoder_t)); + + this->nal_parser = init_parser(stream->xine); + + this->video_decoder.decode_data = vdpau_h264_decode_data; + this->video_decoder.flush = vdpau_h264_flush; + this->video_decoder.reset = vdpau_h264_reset; + this->video_decoder.discontinuity = vdpau_h264_discontinuity; + this->video_decoder.dispose = vdpau_h264_dispose; + + this->stream = stream; + this->xine = stream->xine; + this->class = (vdpau_h264_class_t *) class_gen; + + this->decoder = VDP_INVALID_HANDLE; + this->vdp_runtime_nr = runtime_nr; + this->color_standard = VDP_COLOR_STANDARD_ITUR_BT_601; + this->progressive_cnt = 0; + + this->reset = VO_NEW_SEQUENCE_FLAG; + + (this->stream->video_out->open) (this->stream->video_out, this->stream); + + return &this->video_decoder; +} + +/* + * This function allocates a private video decoder class and initializes + * the class's member functions. + */ +static void *init_plugin (xine_t *xine, void *data) { + + vdpau_h264_class_t *this; + + this = (vdpau_h264_class_t *) calloc(1, sizeof(vdpau_h264_class_t)); + + this->decoder_class.open_plugin = open_plugin; + this->decoder_class.identifier = "vdpau_h264"; + this->decoder_class.description = + N_("vdpau_h264: h264 decoder plugin using VDPAU hardware decoding.\n" + "Must be used along with video_out_vdpau."); + this->decoder_class.dispose = default_video_decoder_class_dispose; + + return this; +} + +/* + * This is a list of all of the internal xine video buffer types that + * this decoder is able to handle. Check src/xine-engine/buffer.h for a + * list of valid buffer types (and add a new one if the one you need does + * not exist). Terminate the list with a 0. + */ +static const uint32_t video_types[] = { + /* BUF_VIDEO_FOOVIDEO, */ + BUF_VIDEO_H264, + 0 +}; + +/* + * This data structure combines the list of supported xine buffer types and + * the priority that the plugin should be given with respect to other + * plugins that handle the same buffer type. A plugin with priority (n+1) + * will be used instead of a plugin with priority (n). + */ +static const decoder_info_t dec_info_video = { + video_types, /* supported types */ + 7 /* priority */ +}; + +/* + * The plugin catalog entry. This is the only information that this plugin + * will export to the public. + */ +const plugin_info_t xine_plugin_info[] EXPORTED = { + /* { type, API, "name", version, special_info, init_function } */ + { PLUGIN_VIDEO_DECODER | PLUGIN_MUST_PRELOAD, 19, "vdpau_h264", XINE_VERSION_CODE, &dec_info_video, init_plugin }, + { PLUGIN_NONE, 0, "", 0, NULL, NULL } +}; diff --git a/src/video_dec/libvdpau/vdpau_mpeg12.c b/src/video_dec/libvdpau/vdpau_mpeg12.c new file mode 100644 index 000000000..1067f8634 --- /dev/null +++ b/src/video_dec/libvdpau/vdpau_mpeg12.c @@ -0,0 +1,1101 @@ +/* + * kate: space-indent on; indent-width 2; mixedindent off; indent-mode cstyle; remove-trailing-space on; + * Copyright (C) 2008 the xine project + * Copyright (C) 2008 Christophe Thommeret <hftom@free.fr> + * + * This file is part of xine, a free video player. + * + * xine is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * xine is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110, USA + * + * vdpau_mpeg12.c, a mpeg1/2 video stream parser using VDPAU hardware decoder + * + */ + +/*#define LOG*/ +#define LOG_MODULE "vdpau_mpeg12" + +#ifdef HAVE_CONFIG_H +#include "config.h" +#endif + +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <sys/types.h> +#include <unistd.h> + +#include <xine/xine_internal.h> +#include <xine/video_out.h> +#include <xine/buffer.h> +#include <xine/xineutils.h> +#include "accel_vdpau.h" +#include "bits_reader.h" + +#include <vdpau/vdpau.h> + +#define sequence_header_code 0xb3 +#define sequence_error_code 0xb4 +#define sequence_end_code 0xb7 +#define group_start_code 0xb8 +#define extension_start_code 0xb5 +#define user_data_start_code 0xb2 +#define picture_start_code 0x00 +#define begin_slice_start_code 0x01 +#define end_slice_start_code 0xaf + +#define sequence_ext_sc 1 +#define quant_matrix_ext_sc 3 +#define picture_coding_ext_sc 8 +#define sequence_display_ext_sc 2 + +#define I_FRAME 1 +#define P_FRAME 2 +#define B_FRAME 3 + +#define PICTURE_TOP 1 +#define PICTURE_BOTTOM 2 +#define PICTURE_FRAME 3 + +/*#define MAKE_DAT*/ /*do NOT define this, unless you know what you do */ +#ifdef MAKE_DAT +static int nframes; +static FILE *outfile; +#endif + + + +/* default intra quant matrix, in zig-zag order */ +static const uint8_t default_intra_quantizer_matrix[64] = { + 8, + 16, 16, + 19, 16, 19, + 22, 22, 22, 22, + 22, 22, 26, 24, 26, + 27, 27, 27, 26, 26, 26, + 26, 27, 27, 27, 29, 29, 29, + 34, 34, 34, 29, 29, 29, 27, 27, + 29, 29, 32, 32, 34, 34, 37, + 38, 37, 35, 35, 34, 35, + 38, 38, 40, 40, 40, + 48, 48, 46, 46, + 56, 56, 58, + 69, 69, + 83 +}; + +uint8_t mpeg2_scan_norm[64] = { + /* Zig-Zag scan pattern */ + 0, 1, 8,16, 9, 2, 3,10, + 17,24,32,25,18,11, 4, 5, + 12,19,26,33,40,48,41,34, + 27,20,13, 6, 7,14,21,28, + 35,42,49,56,57,50,43,36, + 29,22,15,23,30,37,44,51, + 58,59,52,45,38,31,39,46, + 53,60,61,54,47,55,62,63 +}; + + + +typedef struct { + VdpPictureInfoMPEG1Or2 vdp_infos; /* first field, also used for frame */ + VdpPictureInfoMPEG1Or2 vdp_infos2; /* second field */ + int slices_count, slices_count2; + uint8_t *slices; + int slices_size; + int slices_pos, slices_pos_top; + + int progressive_frame; + int repeat_first_field; +} picture_t; + + + +typedef struct { + uint32_t coded_width; + uint32_t coded_height; + + double video_step; /* frame duration in pts units */ + double reported_video_step; /* frame duration in pts units */ + double ratio; + + VdpDecoderProfile profile; + int horizontal_size_value; + int vertical_size_value; + int aspect_ratio_information; + int frame_rate_code; + int progressive_sequence; + int chroma; + int horizontal_size_extension; + int vertical_size_extension; + int frame_rate_extension_n; + int frame_rate_extension_d; + int display_horizontal_size; + int display_vertical_size; + int top_field_first; + + int have_header; + int have_display_extension; + + uint8_t *buf; /* accumulate data */ + int bufseek; + uint32_t bufsize; + uint32_t bufpos; + int start; + + picture_t picture; + vo_frame_t *forward_ref; + vo_frame_t *backward_ref; + + int64_t cur_pts, seq_pts; + + vdpau_accel_t *accel_vdpau; + + bits_reader_t br; + + int vdp_runtime_nr; + int reset; + +} sequence_t; + + + +typedef struct { + video_decoder_class_t decoder_class; +} vdpau_mpeg12_class_t; + + + +typedef struct vdpau_mpeg12_decoder_s { + video_decoder_t video_decoder; /* parent video decoder structure */ + + vdpau_mpeg12_class_t *class; + xine_stream_t *stream; + + sequence_t sequence; + + VdpDecoder decoder; + VdpDecoderProfile decoder_profile; + uint32_t decoder_width; + uint32_t decoder_height; + +} vdpau_mpeg12_decoder_t; + + +static void picture_ready( vdpau_mpeg12_decoder_t *vd, uint8_t end_of_sequence ); + + + +static void reset_picture( picture_t *pic ) +{ + lprintf( "reset_picture\n" ); + pic->vdp_infos.picture_structure = pic->vdp_infos2.picture_structure = 0; + pic->vdp_infos2.intra_dc_precision = pic->vdp_infos.intra_dc_precision = 0; + pic->vdp_infos2.frame_pred_frame_dct = pic->vdp_infos.frame_pred_frame_dct = 1; + pic->vdp_infos2.concealment_motion_vectors = pic->vdp_infos.concealment_motion_vectors = 0; + pic->vdp_infos2.intra_vlc_format = pic->vdp_infos.intra_vlc_format = 0; + pic->vdp_infos2.alternate_scan = pic->vdp_infos.alternate_scan = 0; + pic->vdp_infos2.q_scale_type = pic->vdp_infos.q_scale_type = 0; + pic->vdp_infos2.top_field_first = pic->vdp_infos.top_field_first = 1; + pic->slices_count = 0; + pic->slices_count2 = 0; + pic->slices_pos = 0; + pic->slices_pos_top = 0; + pic->progressive_frame = 0; + pic->repeat_first_field = 0; +} + + + +static void init_picture( picture_t *pic ) +{ + pic->slices_size = 2048; + pic->slices = (uint8_t*)malloc(pic->slices_size); + reset_picture( pic ); +} + + + +static void reset_sequence( sequence_t *sequence, int free_refs ) +{ + sequence->cur_pts = sequence->seq_pts = 0; + if ( sequence->forward_ref ) + sequence->forward_ref->pts = 0; + if ( sequence->backward_ref ) + sequence->backward_ref->pts = 0; + + if ( !free_refs ) + return; + + sequence->bufpos = 0; + sequence->bufseek = 0; + sequence->start = -1; + if ( sequence->forward_ref ) + sequence->forward_ref->free( sequence->forward_ref ); + sequence->forward_ref = NULL; + if ( sequence->backward_ref ) + sequence->backward_ref->free( sequence->backward_ref ); + sequence->backward_ref = NULL; + sequence->top_field_first = 0; + sequence->reset = VO_NEW_SEQUENCE_FLAG; +} + + + +static void free_sequence( sequence_t *sequence ) +{ + lprintf( "init_sequence\n" ); + sequence->have_header = 0; + sequence->profile = VDP_DECODER_PROFILE_MPEG1; + sequence->chroma = 0; + sequence->video_step = 3600; + reset_sequence( sequence, 1 ); +} + + + +static void sequence_header( vdpau_mpeg12_decoder_t *this_gen, uint8_t *buf, int len ) +{ + sequence_t *sequence = (sequence_t*)&this_gen->sequence; + + int i, j; + + if ( !sequence->have_header ) + sequence->have_header = 1; + + sequence->profile = VDP_DECODER_PROFILE_MPEG1; + sequence->horizontal_size_extension = 0; + sequence->vertical_size_extension = 0; + sequence->have_display_extension = 0; + + bits_reader_set( &sequence->br, buf, len ); + sequence->horizontal_size_value = read_bits( &sequence->br, 12 ); + lprintf( "horizontal_size_value: %d\n", sequence->horizontal_size_value ); + sequence->vertical_size_value = read_bits( &sequence->br, 12 ); + lprintf( "vertical_size_value: %d\n", sequence->vertical_size_value ); + sequence->aspect_ratio_information = read_bits( &sequence->br, 4 ); + lprintf( "aspect_ratio_information: %d\n", sequence->aspect_ratio_information ); + sequence->frame_rate_code = read_bits( &sequence->br, 4 ); + lprintf( "frame_rate_code: %d\n", sequence->frame_rate_code ); + int tmp; + tmp = read_bits( &sequence->br, 18 ); + lprintf( "bit_rate_value: %d\n", tmp ); + tmp = read_bits( &sequence->br, 1 ); + lprintf( "marker_bit: %d\n", tmp ); + tmp = read_bits( &sequence->br, 10 ); + lprintf( "vbv_buffer_size_value: %d\n", tmp ); + tmp = read_bits( &sequence->br, 1 ); + lprintf( "constrained_parameters_flag: %d\n", tmp ); + i = read_bits( &sequence->br, 1 ); + lprintf( "load_intra_quantizer_matrix: %d\n", i ); + if ( i ) { + for ( j=0; j<64; ++j ) { + sequence->picture.vdp_infos2.intra_quantizer_matrix[mpeg2_scan_norm[j]] = sequence->picture.vdp_infos.intra_quantizer_matrix[mpeg2_scan_norm[j]] = read_bits( &sequence->br, 8 ); + } + } + else { + for ( j=0; j<64; ++j ) { + sequence->picture.vdp_infos2.intra_quantizer_matrix[mpeg2_scan_norm[j]] = sequence->picture.vdp_infos.intra_quantizer_matrix[mpeg2_scan_norm[j]] = default_intra_quantizer_matrix[j]; + } + } + + i = read_bits( &sequence->br, 1 ); + lprintf( "load_non_intra_quantizer_matrix: %d\n", i ); + if ( i ) { + for ( j=0; j<64; ++j ) { + sequence->picture.vdp_infos2.non_intra_quantizer_matrix[mpeg2_scan_norm[j]] = sequence->picture.vdp_infos.non_intra_quantizer_matrix[mpeg2_scan_norm[j]] = read_bits( &sequence->br, 8 ); + } + } + else { + memset( sequence->picture.vdp_infos.non_intra_quantizer_matrix, 16, 64 ); + memset( sequence->picture.vdp_infos2.non_intra_quantizer_matrix, 16, 64 ); + } +} + + + +static void process_sequence_mpeg12_dependent_data( vdpau_mpeg12_decoder_t *this_gen ) +{ + sequence_t *sequence = (sequence_t*)&this_gen->sequence; + + int frame_rate_value_n, frame_rate_value_d; + + sequence->coded_width = sequence->horizontal_size_value | (sequence->horizontal_size_extension << 14); + sequence->coded_height = sequence->vertical_size_value | (sequence->vertical_size_extension << 14); + + switch ( sequence->frame_rate_code ) { + case 1: frame_rate_value_n = 24; frame_rate_value_d = 1001; break; /* 23.976.. */ + case 2: frame_rate_value_n = 24; frame_rate_value_d = 1000; break; /* 24 */ + case 3: frame_rate_value_n = 25; frame_rate_value_d = 1000; break; /* 25 */ + case 4: frame_rate_value_n = 30; frame_rate_value_d = 1001; break; /* 29.97.. */ + case 5: frame_rate_value_n = 30; frame_rate_value_d = 1000; break; /* 30 */ + case 6: frame_rate_value_n = 50; frame_rate_value_d = 1000; break; /* 50 */ + case 7: frame_rate_value_n = 60; frame_rate_value_d = 1001; break; /* 59.94.. */ + case 8: frame_rate_value_n = 60; frame_rate_value_d = 1000; break; /* 60 */ + default: frame_rate_value_n = 50; frame_rate_value_d = 1000; /* assume 50 */ + } + + sequence->video_step = 90.0 * (frame_rate_value_d * (sequence->frame_rate_extension_d + 1)) + / (frame_rate_value_n * (sequence->frame_rate_extension_n + 1)); + + if ( sequence->profile==VDP_DECODER_PROFILE_MPEG1 ) { + double pel_aspect_ratio; /* height / width */ + + switch ( sequence->aspect_ratio_information ) { + case 1: pel_aspect_ratio = 1.0000; + case 2: pel_aspect_ratio = 0.6735; + case 3: pel_aspect_ratio = 0.7031; + case 4: pel_aspect_ratio = 0.7615; + case 5: pel_aspect_ratio = 0.8055; + case 6: pel_aspect_ratio = 0.8437; + case 7: pel_aspect_ratio = 0.8935; + case 8: pel_aspect_ratio = 0.9157; + case 9: pel_aspect_ratio = 0.9815; + case 10: pel_aspect_ratio = 1.0255; + case 11: pel_aspect_ratio = 1.0695; + case 12: pel_aspect_ratio = 1.0950; + case 13: pel_aspect_ratio = 1.1575; + case 14: pel_aspect_ratio = 1.2015; + default: pel_aspect_ratio = 1.0000; /* fallback */ + } + + sequence->ratio = ((double)sequence->coded_width/(double)sequence->coded_height)/pel_aspect_ratio; + } + else { + switch ( sequence->aspect_ratio_information ) { + case 1: sequence->ratio = sequence->have_display_extension + ? ((double)sequence->display_horizontal_size/(double)sequence->display_vertical_size)/1.0 + : ((double)sequence->coded_width/(double)sequence->coded_height)/1.0; + break; + case 2: sequence->ratio = 4.0/3.0; break; + case 3: sequence->ratio = 16.0/9.0; break; + case 4: sequence->ratio = 2.21; break; + default: sequence->ratio = ((double)sequence->coded_width/(double)sequence->coded_height)/1.0; + } + } + + if ( sequence->have_header == 1 ) { + sequence->have_header = 2; + _x_stream_info_set( this_gen->stream, XINE_STREAM_INFO_VIDEO_WIDTH, sequence->coded_width ); + _x_stream_info_set( this_gen->stream, XINE_STREAM_INFO_VIDEO_HEIGHT, sequence->coded_height ); + _x_stream_info_set( this_gen->stream, XINE_STREAM_INFO_VIDEO_RATIO, ((double)10000*sequence->ratio) ); + _x_stream_info_set( this_gen->stream, XINE_STREAM_INFO_FRAME_DURATION, (sequence->reported_video_step = sequence->video_step) ); + _x_meta_info_set_utf8( this_gen->stream, XINE_META_INFO_VIDEOCODEC, "MPEG1/2 (vdpau)" ); + xine_event_t event; + xine_format_change_data_t data; + event.type = XINE_EVENT_FRAME_FORMAT_CHANGE; + event.stream = this_gen->stream; + event.data = &data; + event.data_length = sizeof(data); + data.width = sequence->coded_width; + data.height = sequence->coded_height; + data.aspect = sequence->ratio; + xine_event_send( this_gen->stream, &event ); + } + else if ( sequence->have_header == 2 && sequence->reported_video_step != sequence->video_step ) { + _x_stream_info_set( this_gen->stream, XINE_STREAM_INFO_FRAME_DURATION, (sequence->reported_video_step = sequence->video_step) ); + } +} + + + +static void picture_header( vdpau_mpeg12_decoder_t *this_gen, uint8_t *buf, int len ) +{ + sequence_t *sequence = (sequence_t*)&this_gen->sequence; + + process_sequence_mpeg12_dependent_data(this_gen); + + if ( sequence->profile==VDP_DECODER_PROFILE_MPEG1 ) + sequence->picture.vdp_infos.picture_structure = PICTURE_FRAME; + + VdpPictureInfoMPEG1Or2 *infos = &sequence->picture.vdp_infos; + + if ( sequence->picture.vdp_infos.picture_structure==PICTURE_FRAME ) { + picture_ready( this_gen, 0 ); + reset_picture( &sequence->picture ); + } + else if ( sequence->picture.vdp_infos.picture_structure && sequence->picture.vdp_infos2.picture_structure ) { + picture_ready( this_gen, 0 ); + reset_picture( &sequence->picture ); + } + else if ( sequence->picture.vdp_infos.picture_structure ) { + infos = &sequence->picture.vdp_infos2; + sequence->picture.slices_pos_top = sequence->picture.slices_pos; + + sequence->cur_pts = 0; /* ignore pts of second field */ + } + + /* take over pts for next issued image */ + if ( sequence->cur_pts ) { + sequence->seq_pts = sequence->cur_pts; + sequence->cur_pts = 0; + } + + bits_reader_set( &sequence->br, buf, len ); + int tmp = read_bits( &sequence->br, 10 ); + lprintf( "temporal_reference: %d\n", tmp ); + infos->picture_coding_type = read_bits( &sequence->br, 3 ); + lprintf( "picture_coding_type: %d\n", infos->picture_coding_type ); + infos->forward_reference = VDP_INVALID_HANDLE; + infos->backward_reference = VDP_INVALID_HANDLE; + skip_bits( &sequence->br, 16 ); + if ( infos->picture_coding_type > I_FRAME ) { + infos->full_pel_forward_vector = read_bits( &sequence->br, 1 ); + infos->f_code[0][0] = infos->f_code[0][1] = read_bits( &sequence->br, 3 ); + if ( infos->picture_coding_type==B_FRAME ) { + infos->full_pel_backward_vector = read_bits( &sequence->br, 1 ); + infos->f_code[1][0] = infos->f_code[1][1] = read_bits( &sequence->br, 3 ); + } + } + else { + infos->full_pel_forward_vector = 0; + infos->full_pel_backward_vector = 0; + } +} + + + +static void sequence_extension( sequence_t *sequence, uint8_t *buf, int len ) +{ + bits_reader_set( &sequence->br, buf, len ); + int tmp = read_bits( &sequence->br, 4 ); + lprintf( "extension_start_code_identifier: %d\n", tmp ); + skip_bits( &sequence->br, 1 ); + switch ( read_bits( &sequence->br, 3 ) ) { + case 5: sequence->profile = VDP_DECODER_PROFILE_MPEG2_SIMPLE; break; + default: sequence->profile = VDP_DECODER_PROFILE_MPEG2_MAIN; + } + skip_bits( &sequence->br, 4 ); + sequence->progressive_sequence = read_bits( &sequence->br, 1 ); + lprintf( "progressive_sequence: %d\n", sequence->progressive_sequence ); + if ( read_bits( &sequence->br, 2 ) == 2 ) + sequence->chroma = VO_CHROMA_422; + tmp = read_bits( &sequence->br, 2 ); + lprintf( "horizontal_size_extension: %d\n", tmp ); + tmp = read_bits( &sequence->br, 2 ); + lprintf( "vertical_size_extension: %d\n", tmp ); + tmp = read_bits( &sequence->br, 12 ); + lprintf( "bit_rate_extension: %d\n", tmp ); + tmp = read_bits( &sequence->br, 1 ); + lprintf( "marker_bit: %d\n", tmp ); + tmp = read_bits( &sequence->br, 8 ); + lprintf( "vbv_buffer_size_extension: %d\n", tmp ); + tmp = read_bits( &sequence->br, 1 ); + lprintf( "low_delay: %d\n", tmp ); + sequence->frame_rate_extension_n = read_bits( &sequence->br, 2 ); + lprintf( "frame_rate_extension_n: %d\n", sequence->frame_rate_extension_n ); + sequence->frame_rate_extension_d = read_bits( &sequence->br, 5 ); + lprintf( "frame_rate_extension_d: %d\n", sequence->frame_rate_extension_d ); +} + + + +static void picture_coding_extension( sequence_t *sequence, uint8_t *buf, int len ) +{ + VdpPictureInfoMPEG1Or2 *infos = &sequence->picture.vdp_infos; + if ( infos->picture_structure && infos->picture_structure!=PICTURE_FRAME ) + infos = &sequence->picture.vdp_infos2; + + bits_reader_set( &sequence->br, buf, len ); + int tmp = read_bits( &sequence->br, 4 ); + lprintf( "extension_start_code_identifier: %d\n", tmp ); + infos->f_code[0][0] = read_bits( &sequence->br, 4 ); + infos->f_code[0][1] = read_bits( &sequence->br, 4 ); + infos->f_code[1][0] = read_bits( &sequence->br, 4 ); + infos->f_code[1][1] = read_bits( &sequence->br, 4 ); + lprintf( "f_code_0_0: %d\n", infos->f_code[0][0] ); + lprintf( "f_code_0_1: %d\n", infos->f_code[0][1] ); + lprintf( "f_code_1_0: %d\n", infos->f_code[1][0] ); + lprintf( "f_code_1_1: %d\n", infos->f_code[1][1] ); + infos->intra_dc_precision = read_bits( &sequence->br, 2 ); + lprintf( "intra_dc_precision: %d\n", infos->intra_dc_precision ); + infos->picture_structure = read_bits( &sequence->br, 2 ); + lprintf( "picture_structure: %d\n", infos->picture_structure ); + infos->top_field_first = read_bits( &sequence->br, 1 ); + lprintf( "top_field_first: %d\n", infos->top_field_first ); + infos->frame_pred_frame_dct = read_bits( &sequence->br, 1 ); + lprintf( "frame_pred_frame_dct: %d\n", infos->frame_pred_frame_dct ); + infos->concealment_motion_vectors = read_bits( &sequence->br, 1 ); + lprintf( "concealment_motion_vectors: %d\n", infos->concealment_motion_vectors ); + infos->q_scale_type = read_bits( &sequence->br, 1 ); + lprintf( "q_scale_type: %d\n", infos->q_scale_type ); + infos->intra_vlc_format = read_bits( &sequence->br, 1 ); + lprintf( "intra_vlc_format: %d\n", infos->intra_vlc_format ); + infos->alternate_scan = read_bits( &sequence->br, 1 ); + lprintf( "alternate_scan: %d\n", infos->alternate_scan ); + sequence->picture.repeat_first_field = read_bits( &sequence->br, 1 ); + lprintf( "repeat_first_field: %d\n", sequence->picture.repeat_first_field ); + tmp = read_bits( &sequence->br, 1 ); + lprintf( "chroma_420_type: %d\n", tmp ); + sequence->picture.progressive_frame = read_bits( &sequence->br, 1 ); + lprintf( "progressive_frame: %d\n", sequence->picture.progressive_frame ); +} + + + +static void quant_matrix_extension( sequence_t *sequence, uint8_t *buf, int len ) +{ + int i, j; + + bits_reader_set( &sequence->br, buf, len ); + skip_bits( &sequence->br, 4 ); + i = read_bits( &sequence->br, 1 ); + lprintf( "load_intra_quantizer_matrix: %d\n", i ); + if ( i ) { + for ( j=0; j<64; ++j ) { + sequence->picture.vdp_infos2.intra_quantizer_matrix[mpeg2_scan_norm[j]] = sequence->picture.vdp_infos.intra_quantizer_matrix[mpeg2_scan_norm[j]] = read_bits( &sequence->br, 8 ); + } + } + else { + for ( j=0; j<64; ++j ) { + sequence->picture.vdp_infos2.intra_quantizer_matrix[mpeg2_scan_norm[j]] = sequence->picture.vdp_infos.intra_quantizer_matrix[mpeg2_scan_norm[j]] = default_intra_quantizer_matrix[j]; + } + } + + i = read_bits( &sequence->br, 1 ); + lprintf( "load_non_intra_quantizer_matrix: %d\n", i ); + if ( i ) { + for ( j=0; j<64; ++j ) { + sequence->picture.vdp_infos2.non_intra_quantizer_matrix[mpeg2_scan_norm[j]] = sequence->picture.vdp_infos.non_intra_quantizer_matrix[mpeg2_scan_norm[j]] = read_bits( &sequence->br, 8 ); + } + } + else { + memset( sequence->picture.vdp_infos.non_intra_quantizer_matrix, 16, 64 ); + memset( sequence->picture.vdp_infos2.non_intra_quantizer_matrix, 16, 64 ); + } +} + + + +static void copy_slice( sequence_t *sequence, uint8_t *buf, int len ) +{ + int size = sequence->picture.slices_pos+len; + if ( sequence->picture.slices_size < size ) { + sequence->picture.slices_size = size+1024; + sequence->picture.slices = realloc( sequence->picture.slices, sequence->picture.slices_size ); + } + xine_fast_memcpy( sequence->picture.slices+sequence->picture.slices_pos, buf, len ); + sequence->picture.slices_pos += len; + if ( sequence->picture.slices_pos_top ) + sequence->picture.slices_count2++; + else + sequence->picture.slices_count++; +} + + + +static int parse_code( vdpau_mpeg12_decoder_t *this_gen, uint8_t *buf, int len ) +{ + sequence_t *sequence = (sequence_t*)&this_gen->sequence; + + if ( !sequence->have_header && buf[3]!=sequence_header_code ) { + lprintf( " ----------- no sequence header yet.\n" ); + return 0; + } + + if ( (buf[3] >= begin_slice_start_code) && (buf[3] <= end_slice_start_code) ) { + lprintf( " ----------- slice_start_code\n" ); + copy_slice( sequence, buf, len ); + return 0; + } + + switch ( buf[3] ) { + case sequence_header_code: + lprintf( " ----------- sequence_header_code\n" ); + sequence_header( this_gen, buf+4, len-4 ); + break; + case extension_start_code: { + switch ( buf[4]>>4 ) { + case sequence_ext_sc: + lprintf( " ----------- sequence_extension_start_code\n" ); + sequence_extension( sequence, buf+4, len-4 ); + break; + case quant_matrix_ext_sc: + lprintf( " ----------- quant_matrix_extension_start_code\n" ); + quant_matrix_extension( sequence, buf+4, len-4 ); + break; + case picture_coding_ext_sc: + lprintf( " ----------- picture_coding_extension_start_code\n" ); + picture_coding_extension( sequence, buf+4, len-4 ); + break; + case sequence_display_ext_sc: + lprintf( " ----------- sequence_display_extension_start_code\n" ); + break; + } + break; + } + case user_data_start_code: + lprintf( " ----------- user_data_start_code\n" ); + break; + case group_start_code: + lprintf( " ----------- group_start_code\n" ); + break; + case picture_start_code: + lprintf( " ----------- picture_start_code\n" ); + picture_header( this_gen, buf+4, len-4 ); + break; + case sequence_error_code: + lprintf( " ----------- sequence_error_code\n" ); + break; + case sequence_end_code: + lprintf( " ----------- sequence_end_code\n" ); + break; + } + return 0; +} + + + +static void decode_render( vdpau_mpeg12_decoder_t *vd, vdpau_accel_t *accel ) +{ + sequence_t *seq = (sequence_t*)&vd->sequence; + picture_t *pic = (picture_t*)&seq->picture; + + pic->vdp_infos.slice_count = pic->slices_count; + pic->vdp_infos2.slice_count = pic->slices_count2; + + VdpStatus st; + if ( vd->decoder==VDP_INVALID_HANDLE || vd->decoder_profile!=seq->profile || vd->decoder_width!=seq->coded_width || vd->decoder_height!=seq->coded_height ) { + if ( vd->decoder!=VDP_INVALID_HANDLE ) { + accel->vdp_decoder_destroy( vd->decoder ); + vd->decoder = VDP_INVALID_HANDLE; + } + st = accel->vdp_decoder_create( accel->vdp_device, seq->profile, seq->coded_width, seq->coded_height, 2, &vd->decoder); + if ( st!=VDP_STATUS_OK ) + lprintf( "failed to create decoder !! %s\n", accel->vdp_get_error_string( st ) ); + else { + vd->decoder_profile = seq->profile; + vd->decoder_width = seq->coded_width; + vd->decoder_height = seq->coded_height; + seq->vdp_runtime_nr = accel->vdp_runtime_nr; + } + } + + VdpBitstreamBuffer vbit; + vbit.struct_version = VDP_BITSTREAM_BUFFER_VERSION; + vbit.bitstream = pic->slices; + vbit.bitstream_bytes = (pic->vdp_infos.picture_structure==PICTURE_FRAME)? pic->slices_pos : pic->slices_pos_top; + st = accel->vdp_decoder_render( vd->decoder, accel->surface, (VdpPictureInfo*)&pic->vdp_infos, 1, &vbit ); + if ( st!=VDP_STATUS_OK ) + lprintf( "decoder failed : %d!! %s\n", st, accel->vdp_get_error_string( st ) ); + else { + lprintf( "DECODER SUCCESS : frame_type:%d, slices=%d, slices_bytes=%d, current=%d, forwref:%d, backref:%d, pts:%lld\n", + pic->vdp_infos.picture_coding_type, pic->vdp_infos.slice_count, vbit.bitstream_bytes, accel->surface, pic->vdp_infos.forward_reference, pic->vdp_infos.backward_reference, seq->cur_pts ); + VdpPictureInfoMPEG1Or2 *info = &pic->vdp_infos; + lprintf("%d %d %d %d %d %d %d %d %d %d %d %d %d\n", info->intra_dc_precision, info->frame_pred_frame_dct, info->concealment_motion_vectors, + info->intra_vlc_format, info->alternate_scan, info->q_scale_type, info->top_field_first, info->full_pel_forward_vector, + info->full_pel_backward_vector, info->f_code[0][0], info->f_code[0][1], info->f_code[1][0], info->f_code[1][1] ); + } + + if ( pic->vdp_infos.picture_structure != PICTURE_FRAME ) { + pic->vdp_infos2.backward_reference = VDP_INVALID_HANDLE; + pic->vdp_infos2.forward_reference = VDP_INVALID_HANDLE; + if ( pic->vdp_infos2.picture_coding_type==P_FRAME ) { + if ( pic->vdp_infos.picture_coding_type==I_FRAME ) + pic->vdp_infos2.forward_reference = accel->surface; + else + pic->vdp_infos2.forward_reference = pic->vdp_infos.forward_reference; + } + else if ( pic->vdp_infos.picture_coding_type==B_FRAME ) { + pic->vdp_infos2.forward_reference = pic->vdp_infos.forward_reference; + pic->vdp_infos2.backward_reference = pic->vdp_infos.backward_reference; + } + vbit.struct_version = VDP_BITSTREAM_BUFFER_VERSION; + vbit.bitstream = pic->slices+pic->slices_pos_top; + vbit.bitstream_bytes = pic->slices_pos-pic->slices_pos_top; + st = accel->vdp_decoder_render( vd->decoder, accel->surface, (VdpPictureInfo*)&pic->vdp_infos2, 1, &vbit ); + if ( st!=VDP_STATUS_OK ) + lprintf( "decoder failed : %d!! %s\n", st, accel->vdp_get_error_string( st ) ); + else + lprintf( "DECODER SUCCESS : frame_type:%d, slices=%d, current=%d, forwref:%d, backref:%d, pts:%lld\n", + pic->vdp_infos2.picture_coding_type, pic->vdp_infos2.slice_count, accel->surface, pic->vdp_infos2.forward_reference, pic->vdp_infos2.backward_reference, seq->cur_pts ); + } +} + + + +static void decode_picture( vdpau_mpeg12_decoder_t *vd, uint8_t end_of_sequence ) +{ + sequence_t *seq = (sequence_t*)&vd->sequence; + picture_t *pic = (picture_t*)&seq->picture; + vdpau_accel_t *ref_accel; + + if ( seq->profile == VDP_DECODER_PROFILE_MPEG1 ) + pic->vdp_infos.picture_structure=PICTURE_FRAME; + + if ( pic->vdp_infos.picture_coding_type==P_FRAME ) { + if ( seq->backward_ref ) { + ref_accel = (vdpau_accel_t*)seq->backward_ref->accel_data; + pic->vdp_infos.forward_reference = ref_accel->surface; + } + else + return; + } + else if ( pic->vdp_infos.picture_coding_type==B_FRAME ) { + if ( seq->forward_ref ) { + ref_accel = (vdpau_accel_t*)seq->forward_ref->accel_data; + pic->vdp_infos.forward_reference = ref_accel->surface; + } + else + return; + if ( seq->backward_ref ) { + ref_accel = (vdpau_accel_t*)seq->backward_ref->accel_data; + pic->vdp_infos.backward_reference = ref_accel->surface; + } + else + return; + } + + int still_image = (end_of_sequence) ? VO_STILL_IMAGE : 0; + vo_frame_t *img = vd->stream->video_out->get_frame( vd->stream->video_out, seq->coded_width, seq->coded_height, + seq->ratio, XINE_IMGFMT_VDPAU, VO_BOTH_FIELDS | seq->chroma | seq->reset | still_image ); + seq->reset = 0; + vdpau_accel_t *accel = (vdpau_accel_t*)img->accel_data; + if ( !seq->accel_vdpau ) + seq->accel_vdpau = accel; + + if( seq->vdp_runtime_nr != *(seq->accel_vdpau->current_vdp_runtime_nr) ) { + seq->accel_vdpau = accel; + if ( seq->forward_ref ) + seq->forward_ref->free( seq->forward_ref ); + seq->forward_ref = NULL; + if ( seq->backward_ref ) + seq->backward_ref->free( seq->backward_ref ); + seq->backward_ref = NULL; + vd->decoder = VDP_INVALID_HANDLE; + } + + decode_render( vd, accel ); + +#ifdef MAKE_DAT + if ( nframes==0 ) { + fwrite( &seq->coded_width, 1, sizeof(seq->coded_width), outfile ); + fwrite( &seq->coded_height, 1, sizeof(seq->coded_height), outfile ); + fwrite( &seq->ratio, 1, sizeof(seq->ratio), outfile ); + fwrite( &seq->profile, 1, sizeof(seq->profile), outfile ); + } + + if ( nframes++ < 25 ) { + fwrite( &pic->vdp_infos, 1, sizeof(pic->vdp_infos), outfile ); + fwrite( &pic->slices_pos, 1, sizeof(pic->slices_pos), outfile ); + fwrite( pic->slices, 1, pic->slices_pos, outfile ); + } +#endif + + img->drawn = 0; + img->pts = seq->seq_pts; + seq->seq_pts = 0; /* reset */ + img->bad_frame = 0; + + if ( end_of_sequence ) { + if ( seq->backward_ref ) + seq->backward_ref->free( seq->backward_ref ); + seq->backward_ref = NULL; + } + +#if 0 + /* trying to deal with (french) buggy streams that randomly set bottom_field_first + while stream is top_field_first. So we assume that when top_field_first + is set one time, the stream _is_ top_field_first. */ + lprintf("pic->vdp_infos.top_field_first = %d\n", pic->vdp_infos.top_field_first); + if ( pic->vdp_infos.top_field_first ) + seq->top_field_first = 1; + img->top_field_first = seq->top_field_first; +#else + img->top_field_first = pic->vdp_infos.top_field_first; +#endif + + /* progressive_frame is unreliable with most mpeg2 streams */ + if ( pic->vdp_infos.picture_structure!=PICTURE_FRAME ) + img->progressive_frame = 0; + else + img->progressive_frame = pic->progressive_frame; + + img->repeat_first_field = pic->repeat_first_field; + + double duration = seq->video_step; + + if ( img->repeat_first_field ) { + if( !seq->progressive_sequence && pic->progressive_frame ) { + /* decoder should output 3 fields, so adjust duration to + count on this extra field time */ + duration *= 3; + duration /= 2; + } else if ( seq->progressive_sequence ) { + /* for progressive sequences the output should repeat the + frame 1 or 2 times depending on top_field_first flag. */ + duration *= (pic->vdp_infos.top_field_first ? 3 : 2); + } + } + + img->duration = (int)(duration + .5); + + if ( pic->vdp_infos.picture_coding_type!=B_FRAME ) { + if ( pic->vdp_infos.picture_coding_type==I_FRAME && !seq->backward_ref ) { + img->pts = 0; + img->draw( img, vd->stream ); + ++img->drawn; + } + if ( seq->forward_ref ) { + seq->forward_ref->drawn = 0; + seq->forward_ref->free( seq->forward_ref ); + } + seq->forward_ref = seq->backward_ref; + if ( seq->forward_ref && !seq->forward_ref->drawn ) { + seq->forward_ref->draw( seq->forward_ref, vd->stream ); + } + seq->backward_ref = img; + } + else { + img->draw( img, vd->stream ); + img->free( img ); + } +} + + + +static void picture_ready( vdpau_mpeg12_decoder_t *vd, uint8_t end_of_sequence ) +{ + picture_t *pic = (picture_t*)&vd->sequence.picture; + if ( !pic->slices_count ) + return; + if ( pic->vdp_infos2.picture_structure && !pic->slices_count2 ) + return; + decode_picture( vd, end_of_sequence ); +} + + + +/* + * This function receives a buffer of data from the demuxer layer and + * figures out how to handle it based on its header flags. + */ +static void vdpau_mpeg12_decode_data (video_decoder_t *this_gen, buf_element_t *buf) +{ + vdpau_mpeg12_decoder_t *this = (vdpau_mpeg12_decoder_t *) this_gen; + sequence_t *seq = (sequence_t*)&this->sequence; + + /* preview buffers shall not be decoded and drawn -- use them only to supply stream information */ + if (buf->decoder_flags & BUF_FLAG_PREVIEW) + return; + + if ( !buf->size ) + return; + + if ( buf->pts ) + seq->cur_pts = buf->pts; + + int size = seq->bufpos+buf->size; + if ( seq->bufsize < size ) { + seq->bufsize = size+1024; + seq->buf = realloc( seq->buf, seq->bufsize ); + } + xine_fast_memcpy( seq->buf+seq->bufpos, buf->content, buf->size ); + seq->bufpos += buf->size; + + while ( seq->bufseek <= seq->bufpos-4 ) { + uint8_t *buffer = seq->buf+seq->bufseek; + if ( buffer[0]==0 && buffer[1]==0 && buffer[2]==1 ) { + if ( seq->start<0 ) { + seq->start = seq->bufseek; + } + else { + parse_code( this, seq->buf+seq->start, seq->bufseek-seq->start ); + uint8_t *tmp = (uint8_t*)malloc(seq->bufsize); + xine_fast_memcpy( tmp, seq->buf+seq->bufseek, seq->bufpos-seq->bufseek ); + seq->bufpos -= seq->bufseek; + seq->start = -1; + seq->bufseek = -1; + free( seq->buf ); + seq->buf = tmp; + } + } + ++seq->bufseek; + } + + /* still image detection -- don't wait for further data if buffer ends in sequence end code */ + if (seq->start >= 0 && seq->buf[seq->start + 3] == sequence_end_code) { + decode_picture(this, 1); + parse_code(this, seq->buf+seq->start, 4); + seq->start = -1; + } +} + +/* + * This function is called when xine needs to flush the system. + */ +static void vdpau_mpeg12_flush (video_decoder_t *this_gen) { + vdpau_mpeg12_decoder_t *this = (vdpau_mpeg12_decoder_t *) this_gen; + + lprintf( "vdpau_mpeg12_flush\n" ); +} + +/* + * This function resets the video decoder. + */ +static void vdpau_mpeg12_reset (video_decoder_t *this_gen) { + vdpau_mpeg12_decoder_t *this = (vdpau_mpeg12_decoder_t *) this_gen; + + lprintf( "vdpau_mpeg12_reset\n" ); + reset_sequence( &this->sequence, 1 ); +} + +/* + * The decoder should forget any stored pts values here. + */ +static void vdpau_mpeg12_discontinuity (video_decoder_t *this_gen) { + vdpau_mpeg12_decoder_t *this = (vdpau_mpeg12_decoder_t *) this_gen; + + lprintf( "vdpau_mpeg12_discontinuity\n" ); + reset_sequence( &this->sequence, 0 ); +} + +/* + * This function frees the video decoder instance allocated to the decoder. + */ +static void vdpau_mpeg12_dispose (video_decoder_t *this_gen) { + + vdpau_mpeg12_decoder_t *this = (vdpau_mpeg12_decoder_t *) this_gen; + + lprintf( "vdpau_mpeg12_dispose\n" ); + + if ( this->decoder!=VDP_INVALID_HANDLE && this->sequence.accel_vdpau ) { + this->sequence.accel_vdpau->vdp_decoder_destroy( this->decoder ); + this->decoder = VDP_INVALID_HANDLE; + } + + free_sequence( &this->sequence ); + + this->stream->video_out->close( this->stream->video_out, this->stream ); + + free( this->sequence.picture.slices ); + free( this->sequence.buf ); + free( this_gen ); +} + +/* + * This function allocates, initializes, and returns a private video + * decoder structure. + */ +static video_decoder_t *open_plugin (video_decoder_class_t *class_gen, xine_stream_t *stream) { + + vdpau_mpeg12_decoder_t *this ; + + lprintf( "open_plugin\n" ); + + /* the videoout must be vdpau-capable to support this decoder */ + if ( !(stream->video_driver->get_capabilities(stream->video_driver) & VO_CAP_VDPAU_MPEG12) ) + return NULL; + + /* now check if vdpau has free decoder resource */ + vo_frame_t *img = stream->video_out->get_frame( stream->video_out, 1920, 1080, 1, XINE_IMGFMT_VDPAU, VO_BOTH_FIELDS ); + vdpau_accel_t *accel = (vdpau_accel_t*)img->accel_data; + int runtime_nr = accel->vdp_runtime_nr; + img->free(img); + VdpDecoder decoder; + VdpStatus st = accel->vdp_decoder_create( accel->vdp_device, VDP_DECODER_PROFILE_MPEG2_MAIN, 1920, 1080, 2, &decoder ); + if ( st!=VDP_STATUS_OK ) { + lprintf( "can't create vdpau decoder.\n" ); + return NULL; + } + + accel->vdp_decoder_destroy( decoder ); + + this = (vdpau_mpeg12_decoder_t *) calloc(1, sizeof(vdpau_mpeg12_decoder_t)); + + this->video_decoder.decode_data = vdpau_mpeg12_decode_data; + this->video_decoder.flush = vdpau_mpeg12_flush; + this->video_decoder.reset = vdpau_mpeg12_reset; + this->video_decoder.discontinuity = vdpau_mpeg12_discontinuity; + this->video_decoder.dispose = vdpau_mpeg12_dispose; + + this->stream = stream; + this->class = (vdpau_mpeg12_class_t *) class_gen; + + this->sequence.bufsize = 1024; + this->sequence.buf = (uint8_t*)malloc(this->sequence.bufsize); + this->sequence.forward_ref = 0; + this->sequence.backward_ref = 0; + this->sequence.vdp_runtime_nr = runtime_nr; + free_sequence( &this->sequence ); + this->sequence.ratio = 1; + this->sequence.reset = VO_NEW_SEQUENCE_FLAG; + + init_picture( &this->sequence.picture ); + + this->decoder = VDP_INVALID_HANDLE; + this->sequence.accel_vdpau = NULL; + + (stream->video_out->open)(stream->video_out, stream); + +#ifdef MAKE_DAT + outfile = fopen( "/tmp/mpg.dat","w"); + nframes = 0; +#endif + + return &this->video_decoder; +} + +/* + * This function allocates a private video decoder class and initializes + * the class's member functions. + */ +static void *init_plugin (xine_t *xine, void *data) { + + vdpau_mpeg12_class_t *this; + + this = (vdpau_mpeg12_class_t *) calloc(1, sizeof(vdpau_mpeg12_class_t)); + + this->decoder_class.open_plugin = open_plugin; + this->decoder_class.identifier = "vdpau_mpeg12"; + this->decoder_class.description = + N_("vdpau_mpeg12: mpeg1/2 decoder plugin using VDPAU hardware decoding.\n" + "Must be used along with video_out_vdpau."); + this->decoder_class.dispose = default_video_decoder_class_dispose; + + return this; +} + +/* + * This is a list of all of the internal xine video buffer types that + * this decoder is able to handle. Check src/xine-engine/buffer.h for a + * list of valid buffer types (and add a new one if the one you need does + * not exist). Terminate the list with a 0. + */ +static const uint32_t video_types[] = { + BUF_VIDEO_MPEG, + 0 +}; + +/* + * This data structure combines the list of supported xine buffer types and + * the priority that the plugin should be given with respect to other + * plugins that handle the same buffer type. A plugin with priority (n+1) + * will be used instead of a plugin with priority (n). + */ +static const decoder_info_t dec_info_video = { + video_types, /* supported types */ + 8 /* priority */ +}; + +/* + * The plugin catalog entry. This is the only information that this plugin + * will export to the public. + */ +const plugin_info_t xine_plugin_info[] EXPORTED = { + /* { type, API, "name", version, special_info, init_function } */ + { PLUGIN_VIDEO_DECODER, 19, "vdpau_mpeg12", XINE_VERSION_CODE, &dec_info_video, init_plugin }, + { PLUGIN_NONE, 0, "", 0, NULL, NULL } +}; diff --git a/src/video_dec/libvdpau/vdpau_mpeg4.c b/src/video_dec/libvdpau/vdpau_mpeg4.c new file mode 100644 index 000000000..4d7dee1ed --- /dev/null +++ b/src/video_dec/libvdpau/vdpau_mpeg4.c @@ -0,0 +1,1194 @@ +/* + * kate: space-indent on; indent-width 2; mixedindent off; indent-mode cstyle; remove-trailing-space on; + * + * Copyright (C) 2010 the xine project + * Copyright (C) 2010 Christophe Thommeret <hftom@free.fr> + * + * This file is part of xine, a free video player. + * + * xine is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * xine is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110, USA + * + * vdpau_mpeg4.c, a mpeg4-part-2 video stream parser using VDPAU hardware decoder + * + */ + +/*#define LOG*/ +#define LOG_MODULE "vdpau_mpeg4" + + +#ifdef HAVE_CONFIG_H +#include "config.h" +#endif + +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <sys/types.h> +#include <unistd.h> + +#include <xine/xine_internal.h> +#include <xine/video_out.h> +#include <xine/buffer.h> +#include <xine/xineutils.h> +#include "accel_vdpau.h" +#include "bits_reader.h" + +#include <vdpau/vdpau.h> + +#define begin_vo_start_code 0x00 +#define end_vo_start_code 0x1f +#define begin_vol_start_code 0x20 +#define end_vol_start_code 0x2f +#define viso_sequence_start_code 0xb0 +#define viso_sequence_end_code 0xb1 +#define viso_start_code 0xb5 +#define group_start_code 0xb3 +#define user_data_start_code 0xb2 +#define vop_start_code 0xb6 + +#define I_FRAME 0 +#define P_FRAME 1 +#define B_FRAME 2 + +#define PICTURE_TOP 1 +#define PICTURE_BOTTOM 2 +#define PICTURE_FRAME 3 + +#define SHAPE_RECT 0 +#define SHAPE_BIN 1 +#define SHAPE_BINONLY 2 +#define SHAPE_GRAY 3 + +#define SPRITE_STATIC 1 +#define SPRITE_GMC 2 + +static int nframe; + +/*#define MAKE_DAT*/ /*do NOT define this, unless you know what you do */ +#ifdef MAKE_DAT +static int nframes; +static FILE *outfile; +#endif + + + +/* default intra quant matrix, in zig-zag order */ +static const uint8_t default_intra_quantizer_matrix[64] = { + 8, + 17, 17, + 20, 18, 18, + 19, 19, 21, 21, + 22, 22, 22, 21, 21, + 23, 23, 23, 23, 23, 23, + 25, 24, 24, 24, 24, 25, 25, + 27, 27, 26, 26, 26, 26, 26, 27, + 28, 28, 28, 28, 28, 28, 28, + 30, 30, 30, 30, 30, 30, + 32, 32, 32, 32, 32, + 35, 35, 35, 35, + 38, 38, 38, + 41, 41, + 45 +}; + +/* default non intra quant matrix, in zig-zag order */ +static const uint8_t default_non_intra_quantizer_matrix[64] = { + 16, + 17, 17, + 18, 18, 18, + 19, 19, 19, 19, + 20, 20, 20, 20, 20, + 21, 21, 21, 21, 21, 21, + 22, 22, 22, 22, 22, 22, 22, + 23, 23, 23, 23, 23, 23, 23, 23, + 24, 24, 24, 25, 24, 24, 24, + 25, 26, 26, 26, 26, 25, + 27, 27, 27, 27, 27, + 28, 28, 28, 28, + 30, 30, 30, + 31, 31, + 33 +}; + +uint8_t mpeg_scan_norm[64] = { + /* Zig-Zag scan pattern */ + 0, 1, 8,16, 9, 2, 3,10, + 17,24,32,25,18,11, 4, 5, + 12,19,26,33,40,48,41,34, + 27,20,13, 6, 7,14,21,28, + 35,42,49,56,57,50,43,36, + 29,22,15,23,30,37,44,51, + 58,59,52,45,38,31,39,46, + 53,60,61,54,47,55,62,63 +}; + + + +typedef struct { + VdpPictureInfoMPEG4Part2 vdp_infos; /* first field, also used for frame */ + + int viso_verid; + int newpred_enable; + int reduced_resolution_vop_enable; + int vol_shape; + int complexity_estimation_disable; + int sprite_enable; + int quant_precision; + + int progressive_frame; +} picture_t; + + + +typedef struct { + uint32_t coded_width; + uint32_t coded_height; + + uint64_t video_step; /* frame duration in pts units */ + double ratio; + VdpDecoderProfile profile; + int chroma; + int top_field_first; + + int have_header; + + uint8_t *buf; /* accumulate data */ + int bufseek; + uint32_t bufsize; + uint32_t bufpos; + int start; + + picture_t picture; + vo_frame_t *forward_ref; + vo_frame_t *backward_ref; + + int64_t cur_pts, seq_pts; + + vdpau_accel_t *accel_vdpau; + + VdpColorStandard color_standard; + + bits_reader_t br; + + int vdp_runtime_nr; + int reset; + + int have_codec_name; + char codec_name[256]; + + int fixed_vop_time_increment; + int time_increment_bits; + int last_time_base; + int time_base; + int time; + int last_non_b_time; + int t_frame; + +} sequence_t; + + + +typedef struct { + video_decoder_class_t decoder_class; +} vdpau_mpeg4_class_t; + + + +typedef struct vdpau_mpeg4_decoder_s { + video_decoder_t video_decoder; /* parent video decoder structure */ + + vdpau_mpeg4_class_t *class; + xine_stream_t *stream; + + sequence_t sequence; + + VdpDecoder decoder; + VdpDecoderProfile decoder_profile; + uint32_t decoder_width; + uint32_t decoder_height; + +} vdpau_mpeg4_decoder_t; + + + +static void reset_picture( picture_t *pic ) +{ + lprintf( "reset_picture\n" ); + pic->vdp_infos.vop_coding_type = 0; + pic->vdp_infos.alternate_vertical_scan_flag = 0; + pic->vdp_infos.quant_type = 0; + pic->vdp_infos.vop_time_increment_resolution = 0; + pic->vdp_infos.vop_fcode_forward = 1; + pic->vdp_infos.vop_fcode_backward = 1; + pic->vdp_infos.resync_marker_disable = 0; + pic->vdp_infos.interlaced = 0; + pic->vdp_infos.quarter_sample = 0; + pic->vdp_infos.short_video_header = 0; + pic->vdp_infos.rounding_control = 0; + pic->vdp_infos.top_field_first = 1; + pic->progressive_frame = 1; + pic->viso_verid = 1; + pic->newpred_enable = 0; + pic->reduced_resolution_vop_enable = 0; + pic->complexity_estimation_disable = 1; + pic->vol_shape = SHAPE_RECT; + pic->quant_precision = 5; + pic->vdp_infos.trd[0] = pic->vdp_infos.trd[1] = 0; + pic->vdp_infos.trb[0] = pic->vdp_infos.trb[1] = 0; +} + + + +static void init_picture( picture_t *pic ) +{ + reset_picture( pic ); +} + + + +static void reset_sequence( sequence_t *sequence, int free_refs ) +{ + sequence->cur_pts = sequence->seq_pts = 0; + if ( sequence->forward_ref ) + sequence->forward_ref->pts = 0; + if ( sequence->backward_ref ) + sequence->backward_ref->pts = 0; + + if ( !free_refs ) + return; + + sequence->bufpos = 0; + sequence->bufseek = 0; + sequence->start = -1; + if ( sequence->forward_ref ) + sequence->forward_ref->free( sequence->forward_ref ); + sequence->forward_ref = NULL; + if ( sequence->backward_ref ) + sequence->backward_ref->free( sequence->backward_ref ); + sequence->backward_ref = NULL; + sequence->top_field_first = 0; + sequence->reset = VO_NEW_SEQUENCE_FLAG; + sequence->color_standard = VDP_COLOR_STANDARD_ITUR_BT_709; + + sequence->last_time_base = 0; + sequence->time_base = 0; + sequence->time = 0; + sequence->last_non_b_time = 0; + sequence->t_frame = 0; +} + + + +static void free_sequence( sequence_t *sequence ) +{ + lprintf( "init_sequence\n" ); + sequence->have_header = 0; + sequence->profile = VDP_DECODER_PROFILE_MPEG4_PART2_ASP; + sequence->chroma = 0; + sequence->video_step = 3600; + sequence->have_codec_name = 0; + strcpy( sequence->codec_name, "MPEG4 / XviD / DivX (vdpau)" ); + reset_sequence( sequence, 1 ); +} + + + +static void update_metadata( vdpau_mpeg4_decoder_t *this_gen ) +{ + sequence_t *sequence = (sequence_t*)&this_gen->sequence; + + _x_stream_info_set( this_gen->stream, XINE_STREAM_INFO_VIDEO_WIDTH, sequence->coded_width ); + _x_stream_info_set( this_gen->stream, XINE_STREAM_INFO_VIDEO_HEIGHT, sequence->coded_height ); + _x_stream_info_set( this_gen->stream, XINE_STREAM_INFO_VIDEO_RATIO, ((double)10000*sequence->ratio) ); + _x_stream_info_set( this_gen->stream, XINE_STREAM_INFO_FRAME_DURATION, sequence->video_step ); + _x_meta_info_set_utf8( this_gen->stream, XINE_META_INFO_VIDEOCODEC, sequence->codec_name ); + xine_event_t event; + xine_format_change_data_t data; + event.type = XINE_EVENT_FRAME_FORMAT_CHANGE; + event.stream = this_gen->stream; + event.data = &data; + event.data_length = sizeof(data); + data.width = sequence->coded_width; + data.height = sequence->coded_height; + data.aspect = sequence->ratio; + xine_event_send( this_gen->stream, &event ); +} + + + +static void visual_object( vdpau_mpeg4_decoder_t *this_gen, uint8_t *buf, int len ) +{ + sequence_t *sequence = (sequence_t*)&this_gen->sequence; + picture_t *picture = (picture_t*)&sequence->picture; + bits_reader_set( &sequence->br, buf, len ); + + if ( read_bits( &sequence->br, 1 ) ) { + picture->viso_verid = read_bits( &sequence->br, 4 ); + lprintf("visual_object_verid: %d\n", picture->viso_verid); + skip_bits( &sequence->br, 3 ); + } + if ( read_bits( &sequence->br, 4 ) == 1 ) { + if ( read_bits( &sequence->br, 1 ) ) { + skip_bits( &sequence->br, 4 ); + if ( read_bits( &sequence->br, 1 ) ) { + if ( read_bits( &sequence->br, 8 ) == 7 ) { + lprintf("color_standard: smpte_240M\n"); + sequence->color_standard = VDP_COLOR_STANDARD_SMPTE_240M; + } + skip_bits( &sequence->br, 16 ); + } + } + } +} + + + +static void video_object_layer( vdpau_mpeg4_decoder_t *this_gen, uint8_t *buf, int len ) +{ + sequence_t *sequence = (sequence_t*)&this_gen->sequence; + picture_t *picture = (picture_t*)&sequence->picture; + bits_reader_set( &sequence->br, buf, len ); + + int vol_verid = 1; + + picture->vdp_infos.short_video_header = 0; + sequence->t_frame = 0; + + skip_bits( &sequence->br, 9 ); + if ( read_bits( &sequence->br, 1 ) ) { + vol_verid = read_bits( &sequence->br, 4 ); + lprintf("video_object_layer_verid: %d\n", vol_verid); + skip_bits( &sequence->br, 3 ); + } + double parw=1, parh=1; + int ar = read_bits( &sequence->br, 4 ); + lprintf("aspect_ratio_info: %d\n", ar); + switch ( ar ) { + case 1: parw = parh = 1; break; + case 2: parw = 12; parh = 11; break; + case 3: parw = 10; parh = 11; break; + case 4: parw = 16; parh = 11; break; + case 5: parw = 40; parh = 33; break; + case 15: { + parw = read_bits( &sequence->br, 8 ); + parh = read_bits( &sequence->br, 8 ); + break; + } + } + lprintf("parw: %f, parh: %f\n", parw, parh); + if ( read_bits( &sequence->br, 1 ) ) { + skip_bits( &sequence->br, 3 ); + if ( read_bits( &sequence->br, 1 ) ) { + read_bits( &sequence->br, 16 ); + read_bits( &sequence->br, 16 ); + read_bits( &sequence->br, 16 ); + read_bits( &sequence->br, 15 ); + read_bits( &sequence->br, 16 ); + } + } + + picture->vol_shape = read_bits( &sequence->br, 2 ); + if ( (picture->vol_shape == SHAPE_GRAY) && (vol_verid != 1) ) { + skip_bits( &sequence->br, 4 ); + fprintf(stderr, "vdpau_mpeg4: unsupported SHAPE_GRAY!\n"); + } + skip_bits( &sequence->br, 1 ); + picture->vdp_infos.vop_time_increment_resolution = read_bits( &sequence->br, 16 ); + lprintf("vop_time_increment_resolution: %d\n", picture->vdp_infos.vop_time_increment_resolution); + int length=1, max=2; + while ( (max - 1) < picture->vdp_infos.vop_time_increment_resolution ) { + ++length; + max *= 2; + } + sequence->time_increment_bits = length; + if ( sequence->time_increment_bits < 1 ) + sequence->time_increment_bits = 1; + skip_bits( &sequence->br, 1 ); + + if ( read_bits( &sequence->br, 1 ) ) { + sequence->fixed_vop_time_increment = read_bits( &sequence->br, sequence->time_increment_bits ); + } + else + sequence->fixed_vop_time_increment = 1; + + sequence->video_step = 90000 / (picture->vdp_infos.vop_time_increment_resolution / sequence->fixed_vop_time_increment); + lprintf("fixed_vop_time_increment: %d\n", sequence->fixed_vop_time_increment); + lprintf("video_step: %d\n", (int)sequence->video_step); + + if ( picture->vol_shape != SHAPE_BINONLY ) { + if ( picture->vol_shape == SHAPE_RECT ) { + skip_bits( &sequence->br, 1 ); + sequence->coded_width = read_bits( &sequence->br, 13 ); + lprintf("vol_width: %d\n", sequence->coded_width); + skip_bits( &sequence->br, 1 ); + sequence->coded_height = read_bits( &sequence->br, 13 ); + lprintf("vol_height: %d\n", sequence->coded_height); + skip_bits( &sequence->br, 1 ); + } + sequence->ratio = ((double)sequence->coded_width * parw) / ((double)sequence->coded_height * parh); + lprintf("aspect_ratio: %f\n", sequence->ratio); + picture->vdp_infos.interlaced = read_bits( &sequence->br, 1 ); + skip_bits( &sequence->br, 1 ); + + picture->sprite_enable = 0; + if ( vol_verid == 1 ) + picture->sprite_enable = read_bits( &sequence->br, 1 ); + else + picture->sprite_enable = read_bits( &sequence->br, 2 ); + + if ( (picture->sprite_enable == SPRITE_STATIC) || (picture->sprite_enable == SPRITE_GMC) ) { + if ( picture->sprite_enable != SPRITE_GMC ) { + skip_bits( &sequence->br, 14 ); + skip_bits( &sequence->br, 14 ); + skip_bits( &sequence->br, 14 ); + skip_bits( &sequence->br, 14 ); + } + skip_bits( &sequence->br, 9 ); + if ( picture->sprite_enable != SPRITE_GMC ) + skip_bits( &sequence->br, 1 ); + } + if ( (vol_verid != 1) && (picture->vol_shape != SHAPE_RECT) ) + skip_bits( &sequence->br, 1 ); + + if ( read_bits( &sequence->br, 1 ) ) { + picture->quant_precision = read_bits( &sequence->br, 4 ); + skip_bits( &sequence->br, 4 ); + } + else + picture->quant_precision = 5; + + if ( picture->vol_shape == SHAPE_GRAY ) + skip_bits( &sequence->br, 3 ); + + picture->vdp_infos.quant_type = read_bits( &sequence->br, 1 ); + + /* load default matrices */ + int j; + for ( j=0; j<64; ++j ) { + sequence->picture.vdp_infos.intra_quantizer_matrix[mpeg_scan_norm[j]] = default_intra_quantizer_matrix[j]; + sequence->picture.vdp_infos.non_intra_quantizer_matrix[mpeg_scan_norm[j]] = default_non_intra_quantizer_matrix[j]; + } + if ( picture->vdp_infos.quant_type ) { + int val, last = 0; + if ( read_bits( &sequence->br, 1 ) ) { /* load_intra_quant_matrix */ + lprintf("load_intra_quant_matrix\n"); + for ( j=0; j<64; ++j ) { + val = read_bits( &sequence->br, 8 ); + if ( !val ) + break; + last = sequence->picture.vdp_infos.intra_quantizer_matrix[j] = val; + } + for ( ; j<64; ++j ) + sequence->picture.vdp_infos.intra_quantizer_matrix[j] = last; + } + if ( read_bits( &sequence->br, 1 ) ) { /* load_non_intra_quant_matrix */ + lprintf("load_non_intra_quant_matrix\n"); + for ( j=0; j<64; ++j ) { + val = read_bits( &sequence->br, 8 ); + if ( !val ) + break; + last = sequence->picture.vdp_infos.non_intra_quantizer_matrix[j] = val; + } + for ( ; j<64; ++j ) + sequence->picture.vdp_infos.non_intra_quantizer_matrix[j] = last; + } + if ( picture->vol_shape == SHAPE_GRAY ) { /* FIXME */ + fprintf(stderr, "vdpau_mpeg4: grayscale shape not supported!\n"); + return; + } + } + if ( vol_verid != 1 ) + sequence->picture.vdp_infos.quarter_sample = read_bits( &sequence->br, 1 ); + else + sequence->picture.vdp_infos.quarter_sample = 0; + + picture->complexity_estimation_disable = read_bits( &sequence->br, 1 ); + if ( !picture->complexity_estimation_disable ) { /* define_vop_complexity_estimation_header */ + int estimation_method = read_bits( &sequence->br, 2 ); + if ( (estimation_method == 0) || (estimation_method == 1) ){ + if ( !read_bits( &sequence->br, 1 ) ) + skip_bits( &sequence->br, 6 ); + if ( !read_bits( &sequence->br, 1 ) ) + skip_bits( &sequence->br, 4 ); + skip_bits( &sequence->br, 1 ); + if ( !read_bits( &sequence->br, 1 ) ) + skip_bits( &sequence->br, 4 ); + if ( !read_bits( &sequence->br, 1 ) ) + skip_bits( &sequence->br, 6 ); + skip_bits( &sequence->br, 1 ); + if ( estimation_method == 1 ) { + if ( !read_bits( &sequence->br, 1 ) ) + skip_bits( &sequence->br, 2 ); + } + } + } + + picture->vdp_infos.resync_marker_disable = read_bits( &sequence->br, 1 ); + + if ( read_bits( &sequence->br, 1 ) ) + skip_bits( &sequence->br, 1 ); + if ( vol_verid != 1 ) { + picture->newpred_enable = read_bits( &sequence->br, 1 ); + if ( picture->newpred_enable ) + skip_bits( &sequence->br, 3 ); + picture->reduced_resolution_vop_enable = read_bits( &sequence->br, 1 ); + } + else { + picture->newpred_enable = 0; + picture->reduced_resolution_vop_enable = 0; + } + /* .... */ + } + else { + if ( vol_verid != 1 ) { + if ( read_bits( &sequence->br, 1 ) ) + skip_bits( &sequence->br, 24 ); + } + picture->vdp_infos.resync_marker_disable = read_bits( &sequence->br, 1 ); + } + + if ( !sequence->have_header ) { + update_metadata( this_gen ); + sequence->have_header = 1; + } +} + + +#define ROUNDED_DIV(a,b) (((a)>0 ? (a) + ((b)>>1) : (a) - ((b)>>1))/(b)) + +static void video_object_plane( vdpau_mpeg4_decoder_t *this_gen, uint8_t *buf, int len ) +{ + sequence_t *sequence = (sequence_t*)&this_gen->sequence; + picture_t *picture = (picture_t*)&sequence->picture; + bits_reader_set( &sequence->br, buf, len ); + int time_inc=0, time_increment; + + sequence->seq_pts = sequence->cur_pts; + sequence->cur_pts = 0; + + picture->vdp_infos.vop_coding_type = read_bits( &sequence->br, 2 ); + while ( read_bits( &sequence->br, 1 ) ) + ++time_inc; + + skip_bits( &sequence->br, 1 ); + + if ( sequence->time_increment_bits == 0 || !(get_bits( &sequence->br, sequence->time_increment_bits + 1) & 1) ) { + for ( sequence->time_increment_bits = 1; sequence->time_increment_bits < 16; ++sequence->time_increment_bits ) { + if ( picture->vdp_infos.vop_coding_type == P_FRAME ) { + if ( (get_bits( &sequence->br, sequence->time_increment_bits + 6 ) & 0x37) == 0x30 ) + break; + } + else { + if ( (get_bits( &sequence->br, sequence->time_increment_bits + 5 ) & 0x1f) == 0x18 ) + break; + } + fprintf(stderr, "Headers are not complete, guessing time_increment_bits: %d\n", sequence->time_increment_bits); + } + } + + time_increment = read_bits( &sequence->br, sequence->time_increment_bits ); + + if ( picture->vdp_infos.vop_coding_type != B_FRAME ) { + sequence->last_time_base = sequence->time_base; + sequence->time_base += time_inc; + sequence->time = sequence->time_base * picture->vdp_infos.vop_time_increment_resolution + time_increment; + if ( sequence->time < sequence->last_non_b_time ) { + ++sequence->time_base; + sequence->time += picture->vdp_infos.vop_time_increment_resolution; + } + picture->vdp_infos.trd[0] = sequence->time - sequence->last_non_b_time; + sequence->last_non_b_time = sequence->time; + } + else { + sequence->time = (sequence->last_time_base + time_inc) * picture->vdp_infos.vop_time_increment_resolution + time_increment; + picture->vdp_infos.trb[0] = picture->vdp_infos.trd[0] - (sequence->last_non_b_time - sequence->time); + if ( (picture->vdp_infos.trd[0] <= picture->vdp_infos.trb[0] ) || (picture->vdp_infos.trd[0] <= (picture->vdp_infos.trd[0] - picture->vdp_infos.trb[0])) || (picture->vdp_infos.trd[0] <= 0) ) { + /* FIXME */ + } + if ( sequence->t_frame == 0 ) + sequence->t_frame = picture->vdp_infos.trb[0]; + if ( sequence->t_frame == 0 ) + sequence->t_frame = 1; + picture->vdp_infos.trd[1] = ( ROUNDED_DIV(sequence->last_non_b_time, sequence->t_frame) - ROUNDED_DIV(sequence->last_non_b_time - picture->vdp_infos.trd[0], sequence->t_frame)); + picture->vdp_infos.trb[1] = ( ROUNDED_DIV(sequence->time, sequence->t_frame) - ROUNDED_DIV(sequence->last_non_b_time - picture->vdp_infos.trd[0], sequence->t_frame)); + if ( picture->vdp_infos.interlaced ) { + /* FIXME */ + } + } + + /*if ( sequence->fixed_vop_time_increment ) + sequence->seq_pts = ( sequence->time + sequence->fixed_vop_time_increment/2 ) / sequence->fixed_vop_time_increment;*/ + + skip_bits( &sequence->br, 1 ); + if ( !read_bits( &sequence->br, 1 ) ) + return; /* vop_coded == 0 */ + + if ( picture->newpred_enable ) { /* FIXME */ + fprintf(stderr, "vdpau_mpeg4: newpred_enable, dunno what to do !!!\n"); + return; + } + + if ( (picture->vol_shape != SHAPE_BINONLY) && (picture->vdp_infos.vop_coding_type == P_FRAME) ) + picture->vdp_infos.rounding_control = read_bits( &sequence->br, 1 ); + else + picture->vdp_infos.rounding_control = 0; + + if ( picture->reduced_resolution_vop_enable && (picture->vol_shape == SHAPE_RECT) && (picture->vdp_infos.vop_coding_type != B_FRAME) ) + skip_bits( &sequence->br, 1 ); + if ( picture->vol_shape != SHAPE_RECT ) { /* FIXME */ + fprintf(stderr, "vdpau_mpeg4: vol_shape != SHAPE_RECT, return\n"); + return; + } + + if ( picture->vol_shape != SHAPE_BINONLY ) { + if ( !picture->complexity_estimation_disable ) { /* FIXME */ + fprintf(stderr, "vdpau_mpeg4: TODO: read_vop_complexity_estimation_header\n"); + return; + } + } + + if ( picture->vol_shape != SHAPE_BINONLY ) { + skip_bits( &sequence->br, 3 ); + if ( picture->vdp_infos.interlaced ) { + picture->vdp_infos.top_field_first = read_bits( &sequence->br, 1 ); + picture->vdp_infos.alternate_vertical_scan_flag = read_bits( &sequence->br, 1 ); + } + } + + if ( picture->vol_shape != SHAPE_BINONLY ) { + skip_bits( &sequence->br, picture->quant_precision ); + if ( picture->vol_shape == SHAPE_GRAY ) { /* FIXME */ + fprintf(stderr, "vdpau_mpeg4: unsupported SHAPE_GRAY!\n"); + return; + } + if ( picture->vdp_infos.vop_coding_type != I_FRAME ) + picture->vdp_infos.vop_fcode_forward = read_bits( &sequence->br, 3 ); + if ( picture->vdp_infos.vop_coding_type == B_FRAME ) + picture->vdp_infos.vop_fcode_backward = read_bits( &sequence->br, 3 ); + } +} + + + +static void gop_header( vdpau_mpeg4_decoder_t *this_gen, uint8_t *buf, int len ) +{ + int h, m, s; + + sequence_t *sequence = (sequence_t*)&this_gen->sequence; + bits_reader_set( &sequence->br, buf, len ); + + h = read_bits( &sequence->br, 5 ); + m = read_bits( &sequence->br, 6 ); + skip_bits( &sequence->br, 1 ); + s = read_bits( &sequence->br, 6 ); + + sequence->time_base = s + (60 * (m + (60 * h))); +} + + + +static void user_data( vdpau_mpeg4_decoder_t *this_gen, uint8_t *buffer, int len ) +{ + /* code from ffmpeg's mpeg4videodec.c */ + + char buf[256]; + int i, e, ver = 0, build = 0, ver2 = 0, ver3 = 0; + char last; + + if ( this_gen->sequence.have_codec_name ) + return; + + for( i=0; i<255 && i<len; i++ ) { + if ( buffer[i] == 0 ) + break; + buf[i]= buffer[i]; + } + buf[i]=0; + + /* divx detection */ + e = sscanf(buf, "DivX%dBuild%d%c", &ver, &build, &last); + if ( e < 2 ) + e=sscanf(buf, "DivX%db%d%c", &ver, &build, &last); + if ( e >= 2 ) { + strcpy( this_gen->sequence.codec_name, "MPEG4 / DivX " ); + sprintf( buf, "%d", ver ); + strcat( this_gen->sequence.codec_name, " (vdpau)" ); + this_gen->sequence.have_codec_name = 1; + } + + /* ffmpeg detection */ + e = sscanf(buf, "FFmpe%*[^b]b%d", &build) + 3; + if ( e != 4 ) + e=sscanf(buf, "FFmpeg v%d.%d.%d / libavcodec build: %d", &ver, &ver2, &ver3, &build); + if ( e != 4 ) { + e=sscanf(buf, "Lavc%d.%d.%d", &ver, &ver2, &ver3)+1; + if ( e > 1 ) + build= (ver<<16) + (ver2<<8) + ver3; + } + if ( e == 4 ) { + strcpy( this_gen->sequence.codec_name, "MPEG4 / FFmpeg " ); + sprintf( buf, "%d", build ); + strcat( this_gen->sequence.codec_name, " (vdpau)" ); + this_gen->sequence.have_codec_name = 1; + } + else { + if(strcmp(buf, "ffmpeg")==0) { + strcpy( this_gen->sequence.codec_name, "MPEG4 / FFmpeg " ); + strcpy( this_gen->sequence.codec_name, "4600" ); + strcat( this_gen->sequence.codec_name, " (vdpau)" ); + this_gen->sequence.have_codec_name = 1; + } + } + + /* Xvid detection */ + e = sscanf(buf, "XviD%d", &build); + if ( e == 1 ) { + strcpy( this_gen->sequence.codec_name, "MPEG4 / XviD " ); + sprintf( buf, "%d", build ); + strcat( this_gen->sequence.codec_name, " (vdpau)" ); + this_gen->sequence.have_codec_name = 1; + } + + update_metadata( this_gen ); +} + + + +static int parse_code( vdpau_mpeg4_decoder_t *this_gen, uint8_t *buf, int len ) +{ + sequence_t *sequence = (sequence_t*)&this_gen->sequence; + + if ( (buf[3] >= begin_vo_start_code) && (buf[3] <= end_vo_start_code) ) { + lprintf( " ----------- vo_start_code\n" ); + return 0; + } + + if ( (buf[3] >= begin_vol_start_code) && (buf[3] <= end_vol_start_code) ) { + lprintf( " ----------- vol_start_code\n" ); + video_object_layer( this_gen, buf+4, len-4); + return 0; + } + + switch ( buf[3] ) { + case viso_sequence_start_code: + lprintf( " ----------- viso_sequence_start_code\n" ); + break; + case viso_sequence_end_code: + lprintf( " ----------- viso_sequence_end_code\n" ); + break; + case viso_start_code: + lprintf( " ----------- viso_start_code\n" ); + visual_object( this_gen, buf+4, len-4 ); + break; + } + + if ( !sequence->have_header ) + return 0; + + switch ( buf[3] ) { + case group_start_code: + lprintf( " ----------- group_start_code\n" ); + gop_header( this_gen, buf+4, len-4 ); + break; + case user_data_start_code: + lprintf( " ----------- user_data_start_code\n" ); + user_data( this_gen, buf+4, len-4 ); + break; + case vop_start_code: + lprintf( " ----------- vop_start_code\n" ); + video_object_plane( this_gen, buf+4, len-4 ); + return 1; + break; + } + return 0; +} + + + +static void decode_render( vdpau_mpeg4_decoder_t *vd, vdpau_accel_t *accel, uint8_t *buf, int len ) +{ + sequence_t *seq = (sequence_t*)&vd->sequence; + picture_t *pic = (picture_t*)&seq->picture; + + VdpStatus st; + if ( vd->decoder==VDP_INVALID_HANDLE || vd->decoder_profile!=seq->profile || vd->decoder_width!=seq->coded_width || vd->decoder_height!=seq->coded_height ) { + if ( vd->decoder!=VDP_INVALID_HANDLE ) { + accel->vdp_decoder_destroy( vd->decoder ); + vd->decoder = VDP_INVALID_HANDLE; + } + st = accel->vdp_decoder_create( accel->vdp_device, seq->profile, seq->coded_width, seq->coded_height, 2, &vd->decoder); + if ( st!=VDP_STATUS_OK ) + fprintf(stderr, "vdpau_mpeg4: failed to create decoder !! %s\n", accel->vdp_get_error_string( st ) ); + else { + lprintf( "decoder created.\n" ); + vd->decoder_profile = seq->profile; + vd->decoder_width = seq->coded_width; + vd->decoder_height = seq->coded_height; + seq->vdp_runtime_nr = accel->vdp_runtime_nr; + } + } + + VdpPictureInfoMPEG4Part2 *infos = (VdpPictureInfoMPEG4Part2*)&pic->vdp_infos; + printf("%d: %d %d %d %d %d %d %d %d %d %d %d %d %d %d %d %d\n", ++nframe, infos->vop_coding_type,infos->vop_time_increment_resolution, infos->vop_fcode_forward, infos->vop_fcode_backward, infos->resync_marker_disable, infos->interlaced, infos->quant_type, infos->quarter_sample, infos->short_video_header, infos->rounding_control, infos->alternate_vertical_scan_flag, len, infos->trd[0], infos->trd[1], infos->trb[0], infos->trb[1]); + + VdpBitstreamBuffer vbit; + vbit.struct_version = VDP_BITSTREAM_BUFFER_VERSION; + vbit.bitstream = buf; + vbit.bitstream_bytes = len; + st = accel->vdp_decoder_render( vd->decoder, accel->surface, (VdpPictureInfo*)&pic->vdp_infos, 1, &vbit ); + if ( st!=VDP_STATUS_OK ) + fprintf(stderr, "vdpau_mpeg4: decoder failed : %d!! %s\n", st, accel->vdp_get_error_string( st ) ); + else { + lprintf( "DECODER SUCCESS : vop_coding_type=%d, bytes=%d, current=%d, forwref:%d, backref:%d, pts:%lld\n", + pic->vdp_infos.vop_coding_type, vbit.bitstream_bytes, accel->surface, pic->vdp_infos.forward_reference, pic->vdp_infos.backward_reference, seq->seq_pts ); + } +} + + + +static void decode_picture( vdpau_mpeg4_decoder_t *vd ) +{ + sequence_t *seq = (sequence_t*)&vd->sequence; + picture_t *pic = (picture_t*)&seq->picture; + vdpau_accel_t *ref_accel; + + uint8_t *buf = seq->buf; + int len = seq->bufpos; + + pic->vdp_infos.forward_reference = VDP_INVALID_HANDLE; + pic->vdp_infos.backward_reference = VDP_INVALID_HANDLE; + + if ( pic->vdp_infos.vop_coding_type == P_FRAME ) { + if ( seq->backward_ref ) { + ref_accel = (vdpau_accel_t*)seq->backward_ref->accel_data; + pic->vdp_infos.forward_reference = ref_accel->surface; + } + else { + /* reset_picture( &seq->picture ); */ + return; + } + } + else if ( pic->vdp_infos.vop_coding_type == B_FRAME ) { + if ( seq->forward_ref ) { + ref_accel = (vdpau_accel_t*)seq->forward_ref->accel_data; + pic->vdp_infos.forward_reference = ref_accel->surface; + } + else { + /* reset_picture( &seq->picture ); */ + return; + } + if ( seq->backward_ref ) { + ref_accel = (vdpau_accel_t*)seq->backward_ref->accel_data; + pic->vdp_infos.backward_reference = ref_accel->surface; + } + else { + /* reset_picture( &seq->picture );*/ + return; + } + } + + vo_frame_t *img = vd->stream->video_out->get_frame( vd->stream->video_out, seq->coded_width, seq->coded_height, seq->ratio, XINE_IMGFMT_VDPAU, VO_BOTH_FIELDS ); + vdpau_accel_t *accel = (vdpau_accel_t*)img->accel_data; + if ( !seq->accel_vdpau ) + seq->accel_vdpau = accel; + + if( seq->vdp_runtime_nr != *(seq->accel_vdpau->current_vdp_runtime_nr) ) { + seq->accel_vdpau = accel; + if ( seq->forward_ref ) + seq->forward_ref->free( seq->forward_ref ); + seq->forward_ref = NULL; + if ( seq->backward_ref ) + seq->backward_ref->free( seq->backward_ref ); + seq->backward_ref = NULL; + vd->decoder = VDP_INVALID_HANDLE; + } + + decode_render( vd, accel, buf, len ); + + +#ifdef MAKE_DAT + if ( nframes==0 ) { + fwrite( &seq->coded_width, 1, sizeof(seq->coded_width), outfile ); + fwrite( &seq->coded_height, 1, sizeof(seq->coded_height), outfile ); + fwrite( &seq->ratio, 1, sizeof(seq->ratio), outfile ); + fwrite( &seq->profile, 1, sizeof(seq->profile), outfile ); + } + + if ( nframes++ < 25 ) { + fwrite( &pic->vdp_infos, 1, sizeof(pic->vdp_infos), outfile ); + fwrite( &len, 1, sizeof(len), outfile ); + fwrite( buf, 1, len, outfile ); + printf( "picture_type = %d\n", pic->vdp_infos.picture_type); + } +#endif + + if ( pic->vdp_infos.interlaced ) { + img->progressive_frame = 0; + img->top_field_first = pic->vdp_infos.top_field_first; + } + else { + img->progressive_frame = -1; /* set to -1 to let the vo know that it MUST NOT deinterlace */ + img->top_field_first = 1; + } + img->pts = seq->seq_pts; + img->bad_frame = 0; + if ( seq->video_step > 900 ) /* some buggy streams */ + img->duration = seq->video_step; + accel->color_standard = seq->color_standard; + + if ( pic->vdp_infos.vop_coding_type < B_FRAME ) { + if ( pic->vdp_infos.vop_coding_type == I_FRAME && !seq->backward_ref ) { + img->pts = 0; + img->draw( img, vd->stream ); + ++img->drawn; + } + if ( seq->forward_ref ) { + seq->forward_ref->drawn = 0; + seq->forward_ref->free( seq->forward_ref ); + } + seq->forward_ref = seq->backward_ref; + if ( seq->forward_ref && !seq->forward_ref->drawn ) { + seq->forward_ref->draw( seq->forward_ref, vd->stream ); + } + seq->backward_ref = img; + } + else { + img->draw( img, vd->stream ); + img->free( img ); + } +} + + + + +/* + * This function receives a buffer of data from the demuxer layer and + * figures out how to handle it based on its header flags. + */ +static void vdpau_mpeg4_decode_data (video_decoder_t *this_gen, buf_element_t *buf) +{ + vdpau_mpeg4_decoder_t *this = (vdpau_mpeg4_decoder_t *) this_gen; + sequence_t *seq = (sequence_t*)&this->sequence; + + /* preview buffers shall not be decoded and drawn -- use them only to supply stream information */ + if (buf->decoder_flags & BUF_FLAG_PREVIEW) + return; + + if ( !buf->size ) + return; + + if ( buf->pts ) + seq->cur_pts = buf->pts; + + int size = seq->bufpos+buf->size; + if ( seq->bufsize < size ) { + seq->bufsize = size+1024; + seq->buf = realloc( seq->buf, seq->bufsize ); + } + xine_fast_memcpy( seq->buf+seq->bufpos, buf->content, buf->size ); + seq->bufpos += buf->size; + + while ( seq->bufseek <= seq->bufpos-4 ) { + uint8_t *buffer = seq->buf+seq->bufseek; + if ( buffer[0]==0 && buffer[1]==0 && buffer[2]==1 ) { + if ( seq->start<0 ) { + seq->start = seq->bufseek; + } + else { + if ( parse_code( this, seq->buf+seq->start, seq->bufseek-seq->start ) ) { + decode_picture( this ); + } + uint8_t *tmp = (uint8_t*)malloc(seq->bufsize); + xine_fast_memcpy( tmp, seq->buf+seq->bufseek, seq->bufpos-seq->bufseek ); + seq->bufpos -= seq->bufseek; + seq->start = -1; + seq->bufseek = -1; + free( seq->buf ); + seq->buf = tmp; + } + } + ++seq->bufseek; + } +} + +/* + * This function is called when xine needs to flush the system. + */ +static void vdpau_mpeg4_flush (video_decoder_t *this_gen) { + vdpau_mpeg4_decoder_t *this = (vdpau_mpeg4_decoder_t *) this_gen; + + lprintf( "vdpau_mpeg4_flush\n" ); +} + +/* + * This function resets the video decoder. + */ +static void vdpau_mpeg4_reset (video_decoder_t *this_gen) { + vdpau_mpeg4_decoder_t *this = (vdpau_mpeg4_decoder_t *) this_gen; + + lprintf( "vdpau_mpeg4_reset\n" ); + reset_sequence( &this->sequence, 1 ); +} + +/* + * The decoder should forget any stored pts values here. + */ +static void vdpau_mpeg4_discontinuity (video_decoder_t *this_gen) { + vdpau_mpeg4_decoder_t *this = (vdpau_mpeg4_decoder_t *) this_gen; + + lprintf( "vdpau_mpeg4_discontinuity\n" ); + reset_sequence( &this->sequence, 0 ); +} + +/* + * This function frees the video decoder instance allocated to the decoder. + */ +static void vdpau_mpeg4_dispose (video_decoder_t *this_gen) { + + vdpau_mpeg4_decoder_t *this = (vdpau_mpeg4_decoder_t *) this_gen; + + lprintf( "vdpau_mpeg4_dispose\n" ); + + if ( this->decoder!=VDP_INVALID_HANDLE && this->sequence.accel_vdpau ) { + this->sequence.accel_vdpau->vdp_decoder_destroy( this->decoder ); + this->decoder = VDP_INVALID_HANDLE; + } + + free_sequence( &this->sequence ); + + this->stream->video_out->close( this->stream->video_out, this->stream ); + + free( this->sequence.buf ); + free( this_gen ); +} + +/* + * This function allocates, initializes, and returns a private video + * decoder structure. + */ +static video_decoder_t *open_plugin (video_decoder_class_t *class_gen, xine_stream_t *stream) { + + vdpau_mpeg4_decoder_t *this ; + + lprintf( "open_plugin\n" ); + + /* the videoout must be vdpau-capable to support this decoder */ + if ( !(stream->video_driver->get_capabilities(stream->video_driver) & VO_CAP_VDPAU_MPEG4) ) + return NULL; + + /* now check if vdpau has free decoder resource */ + vo_frame_t *img = stream->video_out->get_frame( stream->video_out, 1920, 1080, 1, XINE_IMGFMT_VDPAU, VO_BOTH_FIELDS ); + vdpau_accel_t *accel = (vdpau_accel_t*)img->accel_data; + int runtime_nr = accel->vdp_runtime_nr; + img->free(img); + VdpDecoder decoder; + VdpStatus st = accel->vdp_decoder_create( accel->vdp_device, VDP_DECODER_PROFILE_MPEG4_PART2_ASP, 1920, 1080, 2, &decoder ); + if ( st!=VDP_STATUS_OK ) { + lprintf( "can't create vdpau decoder.\n" ); + return NULL; + } + + accel->vdp_decoder_destroy( decoder ); + + this = (vdpau_mpeg4_decoder_t *) calloc(1, sizeof(vdpau_mpeg4_decoder_t)); + + this->video_decoder.decode_data = vdpau_mpeg4_decode_data; + this->video_decoder.flush = vdpau_mpeg4_flush; + this->video_decoder.reset = vdpau_mpeg4_reset; + this->video_decoder.discontinuity = vdpau_mpeg4_discontinuity; + this->video_decoder.dispose = vdpau_mpeg4_dispose; + + this->stream = stream; + this->class = (vdpau_mpeg4_class_t *) class_gen; + + this->sequence.bufsize = 1024; + this->sequence.buf = (uint8_t*)malloc(this->sequence.bufsize); + this->sequence.forward_ref = 0; + this->sequence.backward_ref = 0; + this->sequence.vdp_runtime_nr = runtime_nr; + free_sequence( &this->sequence ); + this->sequence.ratio = 1; + this->sequence.reset = VO_NEW_SEQUENCE_FLAG; + + init_picture( &this->sequence.picture ); + + this->decoder = VDP_INVALID_HANDLE; + this->sequence.accel_vdpau = NULL; + + (stream->video_out->open)(stream->video_out, stream); + +#ifdef MAKE_DAT + outfile = fopen( "/tmp/mpeg4.dat","w"); + nframes = 0; +#endif + nframe = 0; + + return &this->video_decoder; +} + +/* + * This function allocates a private video decoder class and initializes + * the class's member functions. + */ +static void *init_plugin (xine_t *xine, void *data) { + + vdpau_mpeg4_class_t *this; + + this = (vdpau_mpeg4_class_t *) calloc(1, sizeof(vdpau_mpeg4_class_t)); + + this->decoder_class.open_plugin = open_plugin; + this->decoder_class.identifier = "vdpau_mpeg4"; + this->decoder_class.description = + N_("vdpau_mpeg4: mpeg4 part 2 decoder plugin using VDPAU hardware decoding.\n" + "Must be used along with video_out_vdpau."); + this->decoder_class.dispose = default_video_decoder_class_dispose; + + return this; +} + +/* + * This is a list of all of the internal xine video buffer types that + * this decoder is able to handle. Check src/xine-engine/buffer.h for a + * list of valid buffer types (and add a new one if the one you need does + * not exist). Terminate the list with a 0. + */ +static const uint32_t video_types[] = { + BUF_VIDEO_MPEG4, + BUF_VIDEO_XVID, + BUF_VIDEO_DIVX5, + BUF_VIDEO_3IVX, + 0 +}; + +/* + * This data structure combines the list of supported xine buffer types and + * the priority that the plugin should be given with respect to other + * plugins that handle the same buffer type. A plugin with priority (n+1) + * will be used instead of a plugin with priority (n). + */ +static const decoder_info_t dec_info_video = { + video_types, /* supported types */ + 0 /* priority */ +}; + +/* + * The plugin catalog entry. This is the only information that this plugin + * will export to the public. + */ +const plugin_info_t xine_plugin_info[] EXPORTED = { + /* { type, API, "name", version, special_info, init_function } */ + { PLUGIN_VIDEO_DECODER, 19, "vdpau_mpeg4", XINE_VERSION_CODE, &dec_info_video, init_plugin }, + { PLUGIN_NONE, 0, "", 0, NULL, NULL } +}; diff --git a/src/video_dec/libvdpau/vdpau_vc1.c b/src/video_dec/libvdpau/vdpau_vc1.c new file mode 100644 index 000000000..fe6ce26b4 --- /dev/null +++ b/src/video_dec/libvdpau/vdpau_vc1.c @@ -0,0 +1,1176 @@ +/* + * Copyright (C) 2008 the xine project + * Copyright (C) 2008 Christophe Thommeret <hftom@free.fr> + * + * This file is part of xine, a free video player. + * + * xine is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * xine is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110, USA + * + * vdpau_vc1.c, a vc1 video stream parser using VDPAU hardware decoder + * + */ + +/*#define LOG*/ +#define LOG_MODULE "vdpau_vc1" + +#ifdef HAVE_CONFIG_H +#include "config.h" +#endif + +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <sys/types.h> +#include <unistd.h> + +#include <xine/xine_internal.h> +#include <xine/video_out.h> +#include <xine/buffer.h> +#include <xine/xineutils.h> +#include "accel_vdpau.h" +#include "bits_reader.h" + +#include <vdpau/vdpau.h> + +#define sequence_header_code 0x0f +#define sequence_end_code 0x0a +#define entry_point_code 0x0e +#define frame_start_code 0x0d +#define field_start_code 0x0c +#define slice_start_code 0x0b + +#define PICTURE_FRAME 0 +#define PICTURE_FRAME_INTERLACE 2 +#define PICTURE_FIELD_INTERLACE 3 + +#define I_FRAME 0 +#define P_FRAME 1 +#define B_FRAME 3 +#define BI_FRAME 4 + +#define FIELDS_I_I 0 +#define FIELDS_I_P 1 +#define FIELDS_P_I 2 +#define FIELDS_P_P 3 +#define FIELDS_B_B 4 +#define FIELDS_B_BI 5 +#define FIELDS_BI_B 6 +#define FIELDS_BI_BI 7 + +#define MODE_STARTCODE 0 +#define MODE_FRAME 1 + +/*#define MAKE_DAT*/ /*do NOT define this, unless you know what you do */ +#ifdef MAKE_DAT +static int nframes; +static FILE *outfile; +#endif + + + +const double aspect_ratio[] = { + 0.0, + 1.0, + 12./11., + 10./11., + 16./11., + 40./33., + 24./11., + 20./11., + 32./11., + 80./33., + 18./11., + 15./11., + 64./33., + 160./99. +}; + + + +typedef struct { + VdpPictureInfoVC1 vdp_infos; + int slices; + int fptype; + int field; + int header_size; + int hrd_param_flag; + int hrd_num_leaky_buckets; + int repeat_first_field; + int top_field_first; + int skipped; +} picture_t; + + + +typedef struct { + uint32_t coded_width; + uint32_t coded_height; + + uint64_t video_step; /* frame duration in pts units */ + uint64_t reported_video_step; /* frame duration in pts units */ + double ratio; + VdpDecoderProfile profile; + + int mode; + int have_header; + + uint8_t *buf; /* accumulate data */ + int bufseek; + int start; + int code_start, current_code; + uint32_t bufsize; + uint32_t bufpos; + + picture_t picture; + vo_frame_t *forward_ref; + vo_frame_t *backward_ref; + + int64_t seq_pts; + int64_t cur_pts; + + vdpau_accel_t *accel_vdpau; + + bits_reader_t br; + + int vdp_runtime_nr; + +} sequence_t; + + + +typedef struct { + video_decoder_class_t decoder_class; +} vdpau_vc1_class_t; + + + +typedef struct vdpau_vc1_decoder_s { + video_decoder_t video_decoder; /* parent video decoder structure */ + + vdpau_vc1_class_t *class; + xine_stream_t *stream; + + sequence_t sequence; + + VdpDecoder decoder; + VdpDecoderProfile decoder_profile; + uint32_t decoder_width; + uint32_t decoder_height; + +} vdpau_vc1_decoder_t; + + + +static void init_picture( picture_t *pic ) +{ + memset( pic, 0, sizeof( picture_t ) ); +} + + + +static void reset_picture( picture_t *pic ) +{ + pic->slices = 1; +} + + + +static void reset_sequence( sequence_t *sequence ) +{ + lprintf( "reset_sequence\n" ); + sequence->bufpos = 0; + sequence->bufseek = 0; + sequence->start = -1; + sequence->code_start = sequence->current_code = 0; + sequence->seq_pts = sequence->cur_pts = 0; + if ( sequence->forward_ref ) + sequence->forward_ref->free( sequence->forward_ref ); + sequence->forward_ref = NULL; + if ( sequence->backward_ref ) + sequence->backward_ref->free( sequence->backward_ref ); + sequence->backward_ref = NULL; + reset_picture( &sequence->picture ); +} + + + +static void init_sequence( sequence_t *sequence ) +{ + lprintf( "init_sequence\n" ); + sequence->have_header = 0; + sequence->profile = VDP_DECODER_PROFILE_VC1_SIMPLE; + sequence->ratio = 0; + sequence->video_step = 0; + sequence->picture.hrd_param_flag = 0; + reset_sequence( sequence ); +} + + + +static void update_metadata( vdpau_vc1_decoder_t *this_gen ) +{ + sequence_t *sequence = (sequence_t*)&this_gen->sequence; + + if ( !sequence->have_header ) { + sequence->have_header = 1; + _x_stream_info_set( this_gen->stream, XINE_STREAM_INFO_VIDEO_WIDTH, sequence->coded_width ); + _x_stream_info_set( this_gen->stream, XINE_STREAM_INFO_VIDEO_HEIGHT, sequence->coded_height ); + _x_stream_info_set( this_gen->stream, XINE_STREAM_INFO_VIDEO_RATIO, ((double)10000*sequence->ratio) ); + _x_stream_info_set( this_gen->stream, XINE_STREAM_INFO_FRAME_DURATION, (sequence->reported_video_step = sequence->video_step) ); + _x_meta_info_set_utf8( this_gen->stream, XINE_META_INFO_VIDEOCODEC, "VC1/WMV9 (vdpau)" ); + xine_event_t event; + xine_format_change_data_t data; + event.type = XINE_EVENT_FRAME_FORMAT_CHANGE; + event.stream = this_gen->stream; + event.data = &data; + event.data_length = sizeof(data); + data.width = sequence->coded_width; + data.height = sequence->coded_height; + data.aspect = sequence->ratio; + xine_event_send( this_gen->stream, &event ); + } +} + + + +static void sequence_header_advanced( vdpau_vc1_decoder_t *this_gen, uint8_t *buf, int len ) +{ + lprintf( "sequence_header_advanced\n" ); + sequence_t *sequence = (sequence_t*)&this_gen->sequence; + + if ( len < 5 ) + return; + + sequence->profile = VDP_DECODER_PROFILE_VC1_ADVANCED; + lprintf("VDP_DECODER_PROFILE_VC1_ADVANCED\n"); + bits_reader_set( &sequence->br, buf, len ); + skip_bits( &sequence->br, 15 ); + sequence->picture.vdp_infos.postprocflag = read_bits( &sequence->br, 1 ); + sequence->coded_width = (read_bits( &sequence->br, 12 )+1)<<1; + sequence->coded_height = (read_bits( &sequence->br, 12 )+1)<<1; + sequence->picture.vdp_infos.pulldown = read_bits( &sequence->br, 1 ); + sequence->picture.vdp_infos.interlace = read_bits( &sequence->br, 1 ); + sequence->picture.vdp_infos.tfcntrflag = read_bits( &sequence->br, 1 ); + sequence->picture.vdp_infos.finterpflag = read_bits( &sequence->br, 1 ); + skip_bits( &sequence->br, 1 ); + sequence->picture.vdp_infos.psf = read_bits( &sequence->br, 1 ); + sequence->picture.vdp_infos.maxbframes = 7; + if ( read_bits( &sequence->br, 1 ) ) { + double w, h; + int ar=0; + w = read_bits( &sequence->br, 14 )+1; + h = read_bits( &sequence->br, 14 )+1; + if ( read_bits( &sequence->br, 1 ) ) { + ar = read_bits( &sequence->br, 4 ); + } + if ( ar==15 ) { + w = read_bits( &sequence->br, 8 ); + h = read_bits( &sequence->br, 8 ); + sequence->ratio = w/h; + lprintf("aspect_ratio (w/h) = %f\n", sequence->ratio); + } + else if ( ar && ar<14 ) { + sequence->ratio = sequence->coded_width*aspect_ratio[ar]/sequence->coded_height; + lprintf("aspect_ratio = %f\n", sequence->ratio); + } + + if ( read_bits( &sequence->br, 1 ) ) { + if ( read_bits( &sequence->br, 1 ) ) { + int exp = read_bits( &sequence->br, 16 ); + lprintf("framerate exp = %d\n", exp); + } + else { + double nr = read_bits( &sequence->br, 8 ); + switch ((int)nr) { + case 1: nr = 24000; break; + case 2: nr = 25000; break; + case 3: nr = 30000; break; + case 4: nr = 50000; break; + case 5: nr = 60000; break; + default: nr = 0; + } + double dr = read_bits( &sequence->br, 4 ); + switch ((int)dr) { + case 2: dr = 1001; break; + default: dr = 1000; + } + sequence->video_step = 90000/(nr/dr); + lprintf("framerate = %f video_step = %d\n", nr/dr, sequence->video_step); + } + } + if ( read_bits( &sequence->br, 1 ) ) { + int col = read_bits( &sequence->br, 8 ); + lprintf("color_standard = %d\n", col); + skip_bits( &sequence->br, 16 ); + } + } + sequence->picture.hrd_param_flag = read_bits( &sequence->br, 1 ); + if ( sequence->picture.hrd_param_flag ) + sequence->picture.hrd_num_leaky_buckets = read_bits( &sequence->br, 5 ); + + update_metadata( this_gen ); +} + + + +static void sequence_header( vdpau_vc1_decoder_t *this_gen, uint8_t *buf, int len ) +{ + lprintf( "sequence_header\n" ); + sequence_t *sequence = (sequence_t*)&this_gen->sequence; + + if ( len < 4 ) + return; + + bits_reader_set( &sequence->br, buf, len ); + switch ( read_bits( &sequence->br, 2 ) ) { + case 0: sequence->profile = VDP_DECODER_PROFILE_VC1_SIMPLE; lprintf("VDP_DECODER_PROFILE_VC1_SIMPLE\n"); break; + case 1: sequence->profile = VDP_DECODER_PROFILE_VC1_MAIN; lprintf("VDP_DECODER_PROFILE_VC1_MAIN\n"); break; + case 2: sequence->profile = VDP_DECODER_PROFILE_VC1_MAIN; fprintf(stderr, "vc1_complex profile not supported by vdpau, forcing vc1_main, expect corruption!.\n"); break; + case 3: return sequence_header_advanced( this_gen, buf, len ); break; + default: return; /* illegal value, broken header? */ + } + skip_bits( &sequence->br, 10 ); + sequence->picture.vdp_infos.loopfilter = read_bits( &sequence->br, 1 ); + skip_bits( &sequence->br, 1 ); + sequence->picture.vdp_infos.multires = read_bits( &sequence->br, 1 ); + skip_bits( &sequence->br, 1 ); + sequence->picture.vdp_infos.fastuvmc = read_bits( &sequence->br, 1 ); + sequence->picture.vdp_infos.extended_mv = read_bits( &sequence->br, 1 ); + sequence->picture.vdp_infos.dquant = read_bits( &sequence->br, 2 ); + sequence->picture.vdp_infos.vstransform = read_bits( &sequence->br, 1 ); + skip_bits( &sequence->br, 1 ); + sequence->picture.vdp_infos.overlap = read_bits( &sequence->br, 1 ); + sequence->picture.vdp_infos.syncmarker = read_bits( &sequence->br, 1 ); + sequence->picture.vdp_infos.rangered = read_bits( &sequence->br, 1 ); + sequence->picture.vdp_infos.maxbframes = read_bits( &sequence->br, 3 ); + sequence->picture.vdp_infos.quantizer = read_bits( &sequence->br, 2 ); + sequence->picture.vdp_infos.finterpflag = read_bits( &sequence->br, 1 ); + + update_metadata( this_gen ); +} + + + +static void entry_point( vdpau_vc1_decoder_t *this_gen, uint8_t *buf, int len ) +{ + lprintf( "entry_point\n" ); + sequence_t *sequence = (sequence_t*)&this_gen->sequence; + + bits_reader_set( &sequence->br, buf, len ); + skip_bits( &sequence->br, 2 ); + sequence->picture.vdp_infos.panscan_flag = read_bits( &sequence->br, 1 ); + sequence->picture.vdp_infos.refdist_flag = read_bits( &sequence->br, 1 ); + sequence->picture.vdp_infos.loopfilter = read_bits( &sequence->br, 1 ); + sequence->picture.vdp_infos.fastuvmc = read_bits( &sequence->br, 1 ); + sequence->picture.vdp_infos.extended_mv = read_bits( &sequence->br, 1 ); + sequence->picture.vdp_infos.dquant = read_bits( &sequence->br, 2 ); + sequence->picture.vdp_infos.vstransform = read_bits( &sequence->br, 1 ); + sequence->picture.vdp_infos.overlap = read_bits( &sequence->br, 1 ); + sequence->picture.vdp_infos.quantizer = read_bits( &sequence->br, 2 ); + + if ( sequence->picture.hrd_param_flag ) { + int i; + for ( i=0; i<sequence->picture.hrd_num_leaky_buckets; ++i ) + skip_bits( &sequence->br, 8 ); + } + + if ( read_bits( &sequence->br, 1 ) ) { + sequence->coded_width = (read_bits( &sequence->br, 12 )+1)<<1; + sequence->coded_height = (read_bits( &sequence->br, 12 )+1)<<1; + } + + if ( sequence->picture.vdp_infos.extended_mv ) + sequence->picture.vdp_infos.extended_dmv = read_bits( &sequence->br, 1 ); + + sequence->picture.vdp_infos.range_mapy_flag = read_bits( &sequence->br, 1 ); + if ( sequence->picture.vdp_infos.range_mapy_flag ) { + sequence->picture.vdp_infos.range_mapy = read_bits( &sequence->br, 3 ); + } + sequence->picture.vdp_infos.range_mapuv_flag = read_bits( &sequence->br, 1 ); + if ( sequence->picture.vdp_infos.range_mapuv_flag ) { + sequence->picture.vdp_infos.range_mapuv = read_bits( &sequence->br, 3 ); + } +} + + + +static void picture_header( vdpau_vc1_decoder_t *this_gen, uint8_t *buf, int len ) +{ + sequence_t *sequence = (sequence_t*)&this_gen->sequence; + picture_t *pic = (picture_t*)&sequence->picture; + VdpPictureInfoVC1 *info = &(sequence->picture.vdp_infos); + int tmp; + + lprintf("picture_header\n"); + + bits_reader_set( &sequence->br, buf, len ); + skip_bits( &sequence->br, 2 ); + + if ( info->finterpflag ) + skip_bits( &sequence->br, 1 ); + if ( info->rangered ) { + /*info->rangered &= ~2; + info->rangered |= get_bits( buf,off++,1 ) << 1;*/ + info->rangered = (read_bits( &sequence->br, 1 ) << 1) +1; + } + if ( !info->maxbframes ) { + if ( read_bits( &sequence->br, 1 ) ) + info->picture_type = P_FRAME; + else + info->picture_type = I_FRAME; + } + else { + if ( read_bits( &sequence->br, 1 ) ) + info->picture_type = P_FRAME; + else { + if ( read_bits( &sequence->br, 1 ) ) + info->picture_type = I_FRAME; + else + info->picture_type = B_FRAME; + } + } + if ( info->picture_type == B_FRAME ) { + tmp = read_bits( &sequence->br, 3 ); + if ( tmp==7 ) { + tmp = (tmp<<4) | read_bits( &sequence->br, 4 ); + if ( tmp==127 ) + info->picture_type = BI_FRAME; + } + } +} + + + +static void picture_header_advanced( vdpau_vc1_decoder_t *this_gen, uint8_t *buf, int len ) +{ + sequence_t *sequence = (sequence_t*)&this_gen->sequence; + picture_t *pic = (picture_t*)&sequence->picture; + VdpPictureInfoVC1 *info = &(sequence->picture.vdp_infos); + + lprintf("picture_header_advanced\n"); + + bits_reader_set( &sequence->br, buf, len ); + + if ( info->interlace ) { + lprintf("frame->interlace=1\n"); + if ( !read_bits( &sequence->br, 1 ) ) { + lprintf("progressive frame\n"); + info->frame_coding_mode = PICTURE_FRAME; + } + else { + if ( !read_bits( &sequence->br, 1 ) ) { + lprintf("frame interlaced\n"); + info->frame_coding_mode = PICTURE_FRAME_INTERLACE; + } + else { + lprintf("field interlaced\n"); + info->frame_coding_mode = PICTURE_FIELD_INTERLACE; + } + } + } + if ( info->interlace && info->frame_coding_mode == PICTURE_FIELD_INTERLACE ) { + pic->fptype = read_bits( &sequence->br, 3 ); + switch ( pic->fptype ) { + case FIELDS_I_I: + case FIELDS_I_P: + info->picture_type = I_FRAME; break; + case FIELDS_P_I: + case FIELDS_P_P: + info->picture_type = P_FRAME; break; + case FIELDS_B_B: + case FIELDS_B_BI: + info->picture_type = B_FRAME; break; + default: + info->picture_type = BI_FRAME; + } + } + else { + if ( !read_bits( &sequence->br, 1 ) ) + info->picture_type = P_FRAME; + else { + if ( !read_bits( &sequence->br, 1 ) ) + info->picture_type = B_FRAME; + else { + if ( !read_bits( &sequence->br, 1 ) ) + info->picture_type = I_FRAME; + else { + if ( !read_bits( &sequence->br, 1 ) ) + info->picture_type = BI_FRAME; + else { + info->picture_type = P_FRAME; + pic->skipped = 1; + } + } + } + } + } + if ( info->tfcntrflag ) { + lprintf("tfcntrflag=1\n"); + skip_bits( &sequence->br, 8 ); + } + if ( info->pulldown && info->interlace ) { + pic->top_field_first = read_bits( &sequence->br, 1 ); + pic->repeat_first_field = read_bits( &sequence->br, 1 ); + } +} + + + +static void parse_header( vdpau_vc1_decoder_t *this_gen, uint8_t *buf, int len ) +{ + sequence_t *sequence = (sequence_t*)&this_gen->sequence; + int off=0; + + while ( off < (len-4) ) { + uint8_t *buffer = buf+off; + if ( buffer[0]==0 && buffer[1]==0 && buffer[2]==1 ) { + switch ( buffer[3] ) { + case sequence_header_code: sequence_header( this_gen, buf+off+4, len-off-4 ); break; + case entry_point_code: entry_point( this_gen, buf+off+4, len-off-4 ); break; + } + } + ++off; + } + if ( !sequence->have_header ) + sequence_header( this_gen, buf, len ); +} + + + +static void remove_emulation_prevention( uint8_t *src, uint8_t *dst, int src_len, int *dst_len ) +{ + int i; + int len = 0; + int removed = 0; + + for ( i=0; i<src_len-3; ++i ) { + if ( src[i]==0 && src[i+1]==0 && src[i+2]==3 ) { + lprintf("removed emulation prevention byte\n"); + dst[len++] = src[i]; + dst[len++] = src[i+1]; + i += 2; + ++removed; + } + else { + memcpy( dst+len, src+i, 4 ); + ++len; + } + } + for ( ; i<src_len; ++i ) + dst[len++] = src[i]; + *dst_len = src_len-removed; +} + + + +static int parse_code( vdpau_vc1_decoder_t *this_gen, uint8_t *buf, int len ) +{ + sequence_t *sequence = (sequence_t*)&this_gen->sequence; + + if ( !sequence->have_header && buf[3]!=sequence_header_code ) + return 0; + + if ( sequence->code_start == frame_start_code ) { + if ( sequence->current_code==field_start_code || sequence->current_code==slice_start_code ) { + sequence->picture.slices++; + return -1; + } + return 1; /* frame complete, decode */ + } + + switch ( buf[3] ) { + int dst_len; + uint8_t *tmp; + case sequence_header_code: + lprintf("sequence_header_code\n"); + tmp = malloc( len ); + remove_emulation_prevention( buf, tmp, len, &dst_len ); + sequence_header( this_gen, tmp+4, dst_len-4 ); + free( tmp ); + break; + case entry_point_code: + lprintf("entry_point_code\n"); + tmp = malloc( len ); + remove_emulation_prevention( buf, tmp, len, &dst_len ); + entry_point( this_gen, tmp+4, dst_len-4 ); + free( tmp ); + break; + case sequence_end_code: + lprintf("sequence_end_code\n"); + break; + case frame_start_code: + lprintf("frame_start_code, len=%d\n", len); + break; + case field_start_code: + lprintf("field_start_code\n"); + break; + case slice_start_code: + lprintf("slice_start_code, len=%d\n", len); + break; + } + return 0; +} + + + +static void decode_render( vdpau_vc1_decoder_t *vd, vdpau_accel_t *accel, uint8_t *buf, int len ) +{ + sequence_t *seq = (sequence_t*)&vd->sequence; + picture_t *pic = (picture_t*)&seq->picture; + + VdpStatus st; + if ( vd->decoder==VDP_INVALID_HANDLE || vd->decoder_profile!=seq->profile || vd->decoder_width!=seq->coded_width || vd->decoder_height!=seq->coded_height ) { + if ( vd->decoder!=VDP_INVALID_HANDLE ) { + accel->vdp_decoder_destroy( vd->decoder ); + vd->decoder = VDP_INVALID_HANDLE; + } + st = accel->vdp_decoder_create( accel->vdp_device, seq->profile, seq->coded_width, seq->coded_height, 2, &vd->decoder); + if ( st!=VDP_STATUS_OK ) + fprintf(stderr, "vdpau_vc1: failed to create decoder !! %s\n", accel->vdp_get_error_string( st ) ); + else { + lprintf( "decoder created.\n" ); + vd->decoder_profile = seq->profile; + vd->decoder_width = seq->coded_width; + vd->decoder_height = seq->coded_height; + seq->vdp_runtime_nr = accel->vdp_runtime_nr; + } + } + + VdpBitstreamBuffer vbit; + vbit.struct_version = VDP_BITSTREAM_BUFFER_VERSION; + vbit.bitstream = buf; + vbit.bitstream_bytes = len; + if ( pic->field ) + vbit.bitstream_bytes = pic->field; + st = accel->vdp_decoder_render( vd->decoder, accel->surface, (VdpPictureInfo*)&pic->vdp_infos, 1, &vbit ); + if ( st!=VDP_STATUS_OK ) + fprintf(stderr, "vdpau_vc1: decoder failed : %d!! %s\n", st, accel->vdp_get_error_string( st ) ); + else { + lprintf( "DECODER SUCCESS : slices=%d, slices_bytes=%d, current=%d, forwref:%d, backref:%d, pts:%lld\n", + pic->vdp_infos.slice_count, vbit.bitstream_bytes, accel->surface, pic->vdp_infos.forward_reference, pic->vdp_infos.backward_reference, seq->seq_pts ); + } + VdpPictureInfoVC1 *info = &(seq->picture.vdp_infos); + lprintf("%d %d %d %d %d %d %d %d %d %d %d %d %d %d %d %d %d %d %d %d %d %d %d %d %d %d %d %d %d\n", info->slice_count, info->picture_type, info->frame_coding_mode, + info->postprocflag, info->pulldown, info->interlace, info->tfcntrflag, info->finterpflag, info->psf, info->dquant, info->panscan_flag, info->refdist_flag, + info->quantizer, info->extended_mv, info->extended_dmv, info->overlap, info->vstransform, info->loopfilter, info->fastuvmc, info->range_mapy_flag, info->range_mapy, + info->range_mapuv_flag, info->range_mapuv, info->multires, info->syncmarker, info->rangered, info->maxbframes, info->deblockEnable, info->pquant ); + + if ( pic->field ) { + int old_type = pic->vdp_infos.picture_type; + switch ( pic->fptype ) { + case FIELDS_I_I: + case FIELDS_P_I: + pic->vdp_infos.picture_type = I_FRAME; + pic->vdp_infos.backward_reference = VDP_INVALID_HANDLE; + pic->vdp_infos.forward_reference = VDP_INVALID_HANDLE; + break; + case FIELDS_I_P: + pic->vdp_infos.forward_reference = accel->surface; + pic->vdp_infos.picture_type = P_FRAME; + break; + case FIELDS_P_P: + if ( seq->backward_ref ) + pic->vdp_infos.forward_reference = ((vdpau_accel_t*)seq->backward_ref->accel_data)->surface; + pic->vdp_infos.picture_type = P_FRAME; + break; + case FIELDS_B_B: + case FIELDS_BI_B: + pic->vdp_infos.picture_type = B_FRAME; + break; + default: + pic->vdp_infos.picture_type = BI_FRAME; + } + vbit.bitstream = buf+pic->field+4; + vbit.bitstream_bytes = len-pic->field-4; + st = accel->vdp_decoder_render( vd->decoder, accel->surface, (VdpPictureInfo*)&pic->vdp_infos, 1, &vbit ); + if ( st!=VDP_STATUS_OK ) + fprintf(stderr, "vdpau_vc1: decoder failed : %d!! %s\n", st, accel->vdp_get_error_string( st ) ); + else { + lprintf( "DECODER SUCCESS (second field): slices=%d, slices_bytes=%d, current=%d, forwref:%d, backref:%d, pts:%lld\n", + pic->vdp_infos.slice_count, vbit.bitstream_bytes, accel->surface, pic->vdp_infos.forward_reference, pic->vdp_infos.backward_reference, seq->seq_pts ); + } + VdpPictureInfoVC1 *info = &(seq->picture.vdp_infos); + lprintf("%d %d %d %d %d %d %d %d %d %d %d %d %d %d %d %d %d %d %d %d %d %d %d %d %d %d %d %d %d\n", info->slice_count, info->picture_type, info->frame_coding_mode, + info->postprocflag, info->pulldown, info->interlace, info->tfcntrflag, info->finterpflag, info->psf, info->dquant, info->panscan_flag, info->refdist_flag, + info->quantizer, info->extended_mv, info->extended_dmv, info->overlap, info->vstransform, info->loopfilter, info->fastuvmc, info->range_mapy_flag, info->range_mapy, + info->range_mapuv_flag, info->range_mapuv, info->multires, info->syncmarker, info->rangered, info->maxbframes, info->deblockEnable, info->pquant ); + + pic->vdp_infos.picture_type = old_type; + } +} + + + +static int search_field( vdpau_vc1_decoder_t *vd, uint8_t *buf, int len ) +{ + int i; + lprintf("search_fields, len=%d\n", len); + for ( i=0; i<len-4; ++i ) { + if ( buf[i]==0 && buf[i+1]==0 && buf[i+2]==1 && buf[i+3]==field_start_code ) { + lprintf("found field_start_code at %d\n", i); + return i; + } + } + return 0; +} + + + +static void decode_picture( vdpau_vc1_decoder_t *vd ) +{ + sequence_t *seq = (sequence_t*)&vd->sequence; + picture_t *pic = (picture_t*)&seq->picture; + vdpau_accel_t *ref_accel; + int field; + + uint8_t *buf; + int len; + + pic->skipped = 0; + pic->field = 0; + + if ( seq->mode == MODE_FRAME ) { + buf = seq->buf; + len = seq->bufpos; + if ( seq->profile==VDP_DECODER_PROFILE_VC1_ADVANCED ) + picture_header_advanced( vd, buf, len ); + else + picture_header( vd, buf, len ); + + if ( len < 2 ) + pic->skipped = 1; + } + else { + seq->picture.vdp_infos.slice_count = seq->picture.slices; + buf = seq->buf+seq->start+4; + len = seq->bufseek-seq->start-4; + if ( seq->profile==VDP_DECODER_PROFILE_VC1_ADVANCED ) { + int tmplen = (len>50) ? 50 : len; + uint8_t *tmp = malloc( tmplen ); + remove_emulation_prevention( buf, tmp, tmplen, &tmplen ); + picture_header_advanced( vd, tmp, tmplen ); + free( tmp ); + } + else + picture_header( vd, buf, len ); + + if ( len < 2 ) + pic->skipped = 1; + } + + if ( pic->skipped ) + pic->vdp_infos.picture_type = P_FRAME; + + if ( pic->vdp_infos.interlace && pic->vdp_infos.frame_coding_mode == PICTURE_FIELD_INTERLACE ) { + if ( !(field = search_field( vd, buf, len )) ) + lprintf("error, no fields found!\n"); + else + pic->field = field; + } + + pic->vdp_infos.forward_reference = VDP_INVALID_HANDLE; + pic->vdp_infos.backward_reference = VDP_INVALID_HANDLE; + + if ( pic->vdp_infos.picture_type==P_FRAME ) { + if ( seq->backward_ref ) { + ref_accel = (vdpau_accel_t*)seq->backward_ref->accel_data; + pic->vdp_infos.forward_reference = ref_accel->surface; + } + else { + reset_picture( &seq->picture ); + return; + } + } + else if ( pic->vdp_infos.picture_type>=B_FRAME ) { + if ( seq->forward_ref ) { + ref_accel = (vdpau_accel_t*)seq->forward_ref->accel_data; + pic->vdp_infos.forward_reference = ref_accel->surface; + } + else { + reset_picture( &seq->picture ); + return; + } + if ( seq->backward_ref ) { + ref_accel = (vdpau_accel_t*)seq->backward_ref->accel_data; + pic->vdp_infos.backward_reference = ref_accel->surface; + } + else { + reset_picture( &seq->picture ); + return; + } + } + + vo_frame_t *img = vd->stream->video_out->get_frame( vd->stream->video_out, seq->coded_width, seq->coded_height, + seq->ratio, XINE_IMGFMT_VDPAU, VO_BOTH_FIELDS ); + vdpau_accel_t *accel = (vdpau_accel_t*)img->accel_data; + if ( !seq->accel_vdpau ) + seq->accel_vdpau = accel; + + if( seq->vdp_runtime_nr != *(seq->accel_vdpau->current_vdp_runtime_nr) ) { + seq->accel_vdpau = accel; + if ( seq->forward_ref ) + seq->forward_ref->free( seq->forward_ref ); + seq->forward_ref = NULL; + if ( seq->backward_ref ) + seq->backward_ref->free( seq->backward_ref ); + seq->backward_ref = NULL; + vd->decoder = VDP_INVALID_HANDLE; + } + + decode_render( vd, accel, buf, len ); + + +#ifdef MAKE_DAT + if ( nframes==0 ) { + fwrite( &seq->coded_width, 1, sizeof(seq->coded_width), outfile ); + fwrite( &seq->coded_height, 1, sizeof(seq->coded_height), outfile ); + fwrite( &seq->ratio, 1, sizeof(seq->ratio), outfile ); + fwrite( &seq->profile, 1, sizeof(seq->profile), outfile ); + } + + if ( nframes++ < 25 ) { + fwrite( &pic->vdp_infos, 1, sizeof(pic->vdp_infos), outfile ); + fwrite( &len, 1, sizeof(len), outfile ); + fwrite( buf, 1, len, outfile ); + printf( "picture_type = %d\n", pic->vdp_infos.picture_type); + } +#endif + + if ( pic->vdp_infos.interlace && pic->vdp_infos.frame_coding_mode ) { + img->progressive_frame = 0; + img->top_field_first = pic->top_field_first; + } + else { + img->progressive_frame = 1; + img->top_field_first = 1; + } + img->pts = seq->seq_pts; + img->bad_frame = 0; + img->duration = seq->video_step; + accel->color_standard = VDP_COLOR_STANDARD_ITUR_BT_709; + + if ( pic->vdp_infos.picture_type<B_FRAME ) { + if ( pic->vdp_infos.picture_type==I_FRAME && !seq->backward_ref ) { + img->pts = 0; + img->draw( img, vd->stream ); + ++img->drawn; + } + if ( seq->forward_ref ) { + seq->forward_ref->drawn = 0; + seq->forward_ref->free( seq->forward_ref ); + } + seq->forward_ref = seq->backward_ref; + if ( seq->forward_ref && !seq->forward_ref->drawn ) { + seq->forward_ref->draw( seq->forward_ref, vd->stream ); + } + seq->backward_ref = img; + } + else { + img->draw( img, vd->stream ); + img->free( img ); + } + + seq->seq_pts +=seq->video_step; + + reset_picture( &seq->picture ); +} + + + +/* + * This function receives a buffer of data from the demuxer layer and + * figures out how to handle it based on its header flags. + */ +static void vdpau_vc1_decode_data (video_decoder_t *this_gen, buf_element_t *buf) +{ + vdpau_vc1_decoder_t *this = (vdpau_vc1_decoder_t *) this_gen; + sequence_t *seq = (sequence_t*)&this->sequence; + + /* a video decoder does not care about this flag (?) */ + if (buf->decoder_flags & BUF_FLAG_PREVIEW) { + lprintf("BUF_FLAG_PREVIEW\n"); + } + + if (buf->decoder_flags & BUF_FLAG_FRAMERATE) { + lprintf("BUF_FLAG_FRAMERATE=%d\n", buf->decoder_info[0]); + if ( buf->decoder_info[0] > 0 ) { + this->sequence.video_step = buf->decoder_info[0]; + _x_stream_info_set(this->stream, XINE_STREAM_INFO_FRAME_DURATION, this->sequence.video_step); + } + } + + if (this->sequence.reported_video_step != this->sequence.video_step){ + _x_stream_info_set(this->stream, XINE_STREAM_INFO_FRAME_DURATION, (this->sequence.reported_video_step = this->sequence.video_step)); + } + + if (buf->decoder_flags & BUF_FLAG_HEADER) { + lprintf("BUF_FLAG_HEADER\n"); + } + + if (buf->decoder_flags & BUF_FLAG_ASPECT) { + lprintf("BUF_FLAG_ASPECT\n"); + seq->ratio = (double)buf->decoder_info[1]/(double)buf->decoder_info[2]; + lprintf("arx=%d ary=%d ratio=%f\n", buf->decoder_info[1], buf->decoder_info[2], seq->ratio); + } + + if ( !buf->size ) + return; + + seq->cur_pts = buf->pts; + + if (buf->decoder_flags & BUF_FLAG_STDHEADER) { + lprintf("BUF_FLAG_STDHEADER\n"); + xine_bmiheader *bih = (xine_bmiheader *) buf->content; + int bs = sizeof( xine_bmiheader ); + seq->coded_width = bih->biWidth; + seq->coded_height = bih->biHeight; + lprintf( "width=%d height=%d\n", bih->biWidth, bih->biHeight ); + if ( buf->size > bs ) { + seq->mode = MODE_FRAME; + parse_header( this, buf->content+bs, buf->size-bs ); + } + return; + } + + int size = seq->bufpos+buf->size; + if ( seq->bufsize < size ) { + seq->bufsize = size+10000; + seq->buf = realloc( seq->buf, seq->bufsize ); + lprintf("sequence buffer realloced = %d\n", seq->bufsize ); + } + xine_fast_memcpy( seq->buf+seq->bufpos, buf->content, buf->size ); + seq->bufpos += buf->size; + + if (buf->decoder_flags & BUF_FLAG_FRAME_START) { + lprintf("BUF_FLAG_FRAME_START\n"); + seq->seq_pts = buf->pts; + seq->mode = MODE_FRAME; + if ( seq->bufpos > 3 ) { + if ( seq->buf[0]==0 && seq->buf[1]==0 && seq->buf[2]==1 ) { + seq->mode = MODE_STARTCODE; + } + } + } + + if ( seq->mode == MODE_FRAME ) { + if ( buf->decoder_flags & BUF_FLAG_FRAME_END ) { + lprintf("BUF_FLAG_FRAME_END\n"); + decode_picture( this ); + seq->bufpos = 0; + } + return; + } + + int res, startcode=0; + while ( seq->bufseek <= seq->bufpos-4 ) { + uint8_t *buffer = seq->buf+seq->bufseek; + if ( buffer[0]==0 && buffer[1]==0 && buffer[2]==1 ) { + startcode = 1; + seq->current_code = buffer[3]; + lprintf("current_code = %d\n", seq->current_code); + if ( seq->start<0 ) { + seq->start = seq->bufseek; + seq->code_start = buffer[3]; + lprintf("code_start = %d\n", seq->code_start); + if ( seq->cur_pts ) + seq->seq_pts = seq->cur_pts; + } + else { + res = parse_code( this, seq->buf+seq->start, seq->bufseek-seq->start ); + if ( res==1 ) { + seq->mode = MODE_STARTCODE; + decode_picture( this ); + parse_code( this, seq->buf+seq->start, seq->bufseek-seq->start ); + } + if ( res!=-1 ) { + uint8_t *tmp = (uint8_t*)malloc(seq->bufsize); + xine_fast_memcpy( tmp, seq->buf+seq->bufseek, seq->bufpos-seq->bufseek ); + seq->bufpos -= seq->bufseek; + seq->start = -1; + seq->bufseek = -1; + free( seq->buf ); + seq->buf = tmp; + } + } + } + ++seq->bufseek; + } +} + + + +/* + * This function is called when xine needs to flush the system. + */ +static void vdpau_vc1_flush (video_decoder_t *this_gen) { + vdpau_vc1_decoder_t *this = (vdpau_vc1_decoder_t *) this_gen; + + lprintf( "vdpau_vc1_flush\n" ); +} + +/* + * This function resets the video decoder. + */ +static void vdpau_vc1_reset (video_decoder_t *this_gen) { + vdpau_vc1_decoder_t *this = (vdpau_vc1_decoder_t *) this_gen; + + lprintf( "vdpau_vc1_reset\n" ); + reset_sequence( &this->sequence ); +} + +/* + * The decoder should forget any stored pts values here. + */ +static void vdpau_vc1_discontinuity (video_decoder_t *this_gen) { + vdpau_vc1_decoder_t *this = (vdpau_vc1_decoder_t *) this_gen; + + lprintf( "vdpau_vc1_discontinuity\n" ); +} + +/* + * This function frees the video decoder instance allocated to the decoder. + */ +static void vdpau_vc1_dispose (video_decoder_t *this_gen) { + + vdpau_vc1_decoder_t *this = (vdpau_vc1_decoder_t *) this_gen; + + lprintf( "vdpau_vc1_dispose\n" ); + + if ( this->decoder!=VDP_INVALID_HANDLE && this->sequence.accel_vdpau ) { + this->sequence.accel_vdpau->vdp_decoder_destroy( this->decoder ); + this->decoder = VDP_INVALID_HANDLE; + } + + reset_sequence( &this->sequence ); + + this->stream->video_out->close( this->stream->video_out, this->stream ); + + free( this->sequence.buf ); + free( this_gen ); +} + +/* + * This function allocates, initializes, and returns a private video + * decoder structure. + */ +static video_decoder_t *open_plugin (video_decoder_class_t *class_gen, xine_stream_t *stream) { + + vdpau_vc1_decoder_t *this ; + + lprintf( "open_plugin\n" ); + + /* the videoout must be vdpau-capable to support this decoder */ + if ( !(stream->video_driver->get_capabilities(stream->video_driver) & VO_CAP_VDPAU_VC1) ) + return NULL; + + /* now check if vdpau has free decoder resource */ + vo_frame_t *img = stream->video_out->get_frame( stream->video_out, 1920, 1080, 1, XINE_IMGFMT_VDPAU, VO_BOTH_FIELDS ); + vdpau_accel_t *accel = (vdpau_accel_t*)img->accel_data; + int runtime_nr = accel->vdp_runtime_nr; + img->free(img); + VdpDecoder decoder; + VdpStatus st = accel->vdp_decoder_create( accel->vdp_device, VDP_DECODER_PROFILE_VC1_MAIN, 1920, 1080, 2, &decoder ); + if ( st!=VDP_STATUS_OK ) { + lprintf( "can't create vdpau decoder.\n" ); + return NULL; + } + + accel->vdp_decoder_destroy( decoder ); + + this = (vdpau_vc1_decoder_t *) calloc(1, sizeof(vdpau_vc1_decoder_t)); + + this->video_decoder.decode_data = vdpau_vc1_decode_data; + this->video_decoder.flush = vdpau_vc1_flush; + this->video_decoder.reset = vdpau_vc1_reset; + this->video_decoder.discontinuity = vdpau_vc1_discontinuity; + this->video_decoder.dispose = vdpau_vc1_dispose; + + this->stream = stream; + this->class = (vdpau_vc1_class_t *) class_gen; + + this->sequence.bufsize = 10000; + this->sequence.buf = (uint8_t*)malloc(this->sequence.bufsize); + this->sequence.forward_ref = 0; + this->sequence.backward_ref = 0; + this->sequence.vdp_runtime_nr = runtime_nr; + init_sequence( &this->sequence ); + + init_picture( &this->sequence.picture ); + + this->decoder = VDP_INVALID_HANDLE; + this->sequence.accel_vdpau = NULL; + this->sequence.mode = MODE_STARTCODE; + + (stream->video_out->open)(stream->video_out, stream); + +#ifdef MAKE_DAT + outfile = fopen( "/tmp/vc1.dat","w"); + nframes = 0; +#endif + + return &this->video_decoder; +} + +/* + * This function allocates a private video decoder class and initializes + * the class's member functions. + */ +static void *init_plugin (xine_t *xine, void *data) { + + vdpau_vc1_class_t *this; + + this = (vdpau_vc1_class_t *) calloc(1, sizeof(vdpau_vc1_class_t)); + + this->decoder_class.open_plugin = open_plugin; + this->decoder_class.identifier = "vdpau_vc1"; + this->decoder_class.description = + N_("vdpau_vc1: vc1 decoder plugin using VDPAU hardware decoding.\n" + "Must be used along with video_out_vdpau."); + this->decoder_class.dispose = default_video_decoder_class_dispose; + + return this; +} + +/* + * This is a list of all of the internal xine video buffer types that + * this decoder is able to handle. Check src/xine-engine/buffer.h for a + * list of valid buffer types (and add a new one if the one you need does + * not exist). Terminate the list with a 0. + */ +static const uint32_t video_types[] = { + BUF_VIDEO_VC1, BUF_VIDEO_WMV9, + 0 +}; + +/* + * This data structure combines the list of supported xine buffer types and + * the priority that the plugin should be given with respect to other + * plugins that handle the same buffer type. A plugin with priority (n+1) + * will be used instead of a plugin with priority (n). + */ +static const decoder_info_t dec_info_video = { + video_types, /* supported types */ + 8 /* priority */ +}; + +/* + * The plugin catalog entry. This is the only information that this plugin + * will export to the public. + */ +const plugin_info_t xine_plugin_info[] EXPORTED = { + /* { type, API, "name", version, special_info, init_function } */ + { PLUGIN_VIDEO_DECODER, 19, "vdpau_vc1", XINE_VERSION_CODE, &dec_info_video, init_plugin }, + { PLUGIN_NONE, 0, "", 0, NULL, NULL } +}; diff --git a/src/video_dec/rgb.c b/src/video_dec/rgb.c new file mode 100644 index 000000000..678ce8194 --- /dev/null +++ b/src/video_dec/rgb.c @@ -0,0 +1,451 @@ +/* + * Copyright (C) 2000-2003 the xine project + * + * This file is part of xine, a free video player. + * + * xine is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * xine is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110, USA + * + * Raw RGB "Decoder" by Mike Melanson (melanson@pcisys.net) + * Actually, this decoder just converts a raw RGB image to a YUY2 map + * suitable for display under xine. + * + * This decoder deals with raw RGB data from Microsoft and Quicktime files. + * Data from a MS file can be 32-, 24-, 16-, or 8-bit. The latter can also + * be grayscale, depending on whether a palette is present. Data from a QT + * file can be 32-, 24-, 16-, 8-, 4-, 2-, or 1-bit. Any resolutions <= 8 + * can also be greyscale depending on what the QT file specifies. + * + * One more catch: Raw RGB from a Microsoft file is upside down. This is + * indicated by a negative height parameter. + */ + +#ifdef HAVE_CONFIG_H +#include "config.h" +#endif + +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <sys/types.h> +#include <unistd.h> + +#define LOG_MODULE "rgb" +#define LOG_VERBOSE +/* +#define LOG +*/ +#include <xine/xine_internal.h> +#include <xine/video_out.h> +#include <xine/buffer.h> +#include <xine/xineutils.h> +#include "bswap.h" + +typedef struct { + video_decoder_class_t decoder_class; +} rgb_class_t; + +typedef struct rgb_decoder_s { + video_decoder_t video_decoder; /* parent video decoder structure */ + + rgb_class_t *class; + xine_stream_t *stream; + + /* these are traditional variables in a video decoder object */ + uint64_t video_step; /* frame duration in pts units */ + int decoder_ok; /* current decoder status */ + int skipframes; + + unsigned char *buf; /* the accumulated buffer data */ + int bufsize; /* the maximum size of buf */ + int size; /* the current size of buf */ + + int width; /* the width of a video frame */ + int height; /* the height of a video frame */ + double ratio; /* the width to height ratio */ + int bytes_per_pixel; + int bit_depth; + int upside_down; + + unsigned char yuv_palette[256 * 4]; + yuv_planes_t yuv_planes; + +} rgb_decoder_t; + +static void rgb_decode_data (video_decoder_t *this_gen, + buf_element_t *buf) { + + rgb_decoder_t *this = (rgb_decoder_t *) this_gen; + xine_bmiheader *bih; + palette_entry_t *palette; + int i; + int pixel_ptr, row_ptr; + int palette_index; + int buf_ptr; + unsigned int packed_pixel; + unsigned char r, g, b; + int pixels_left; + unsigned char pixel_byte = 0; + + vo_frame_t *img; /* video out frame */ + + /* a video decoder does not care about this flag (?) */ + if (buf->decoder_flags & BUF_FLAG_PREVIEW) + return; + + if ((buf->decoder_flags & BUF_FLAG_SPECIAL) && + (buf->decoder_info[1] == BUF_SPECIAL_PALETTE)) { + palette = (palette_entry_t *)buf->decoder_info_ptr[2]; + for (i = 0; i < buf->decoder_info[2]; i++) { + this->yuv_palette[i * 4 + 0] = + COMPUTE_Y(palette[i].r, palette[i].g, palette[i].b); + this->yuv_palette[i * 4 + 1] = + COMPUTE_U(palette[i].r, palette[i].g, palette[i].b); + this->yuv_palette[i * 4 + 2] = + COMPUTE_V(palette[i].r, palette[i].g, palette[i].b); + } + } + + if (buf->decoder_flags & BUF_FLAG_FRAMERATE) { + this->video_step = buf->decoder_info[0]; + _x_stream_info_set(this->stream, XINE_STREAM_INFO_FRAME_DURATION, this->video_step); + } + + if (buf->decoder_flags & BUF_FLAG_STDHEADER) { /* need to initialize */ + (this->stream->video_out->open) (this->stream->video_out, this->stream); + + bih = (xine_bmiheader *) buf->content; + this->width = (bih->biWidth + 3) & ~0x03; + this->height = (bih->biHeight + 3) & ~0x03; + if (this->height < 0) { + this->upside_down = 1; + this->height = -this->height; + } else { + this->upside_down = 0; + } + this->ratio = (double)this->width/(double)this->height; + + this->bit_depth = bih->biBitCount; + if (this->bit_depth > 32) + this->bit_depth &= 0x1F; + /* round this number up in case of 15 */ + lprintf("width = %d, height = %d, bit_depth = %d\n", this->width, this->height, this->bit_depth); + + this->bytes_per_pixel = (this->bit_depth + 1) / 8; + + free (this->buf); + + /* minimal buffer size */ + this->bufsize = this->width * this->height * this->bytes_per_pixel; + this->buf = calloc(1, this->bufsize); + this->size = 0; + + init_yuv_planes(&this->yuv_planes, this->width, this->height); + + (this->stream->video_out->open) (this->stream->video_out, this->stream); + this->decoder_ok = 1; + + /* load the stream/meta info */ + _x_meta_info_set_utf8(this->stream, XINE_META_INFO_VIDEOCODEC, "Raw RGB"); + + return; + } else if (this->decoder_ok) { + + if (this->size + buf->size > this->bufsize) { + this->bufsize = this->size + 2 * buf->size; + this->buf = realloc (this->buf, this->bufsize); + } + xine_fast_memcpy (&this->buf[this->size], buf->content, buf->size); + + this->size += buf->size; + + if (buf->decoder_flags & BUF_FLAG_FRAME_END) { + + img = this->stream->video_out->get_frame (this->stream->video_out, + this->width, this->height, + this->ratio, XINE_IMGFMT_YUY2, + VO_BOTH_FIELDS); + + img->duration = this->video_step; + img->pts = buf->pts; + img->bad_frame = 0; + + + /* iterate through each row */ + buf_ptr = 0; + + if (this->upside_down) { + for (row_ptr = this->yuv_planes.row_width * (this->yuv_planes.row_count - 1); + row_ptr >= 0; row_ptr -= this->yuv_planes.row_width) { + for (pixel_ptr = 0; pixel_ptr < this->width; pixel_ptr++) { + + if (this->bytes_per_pixel == 1) { + + palette_index = this->buf[buf_ptr++]; + + this->yuv_planes.y[row_ptr + pixel_ptr] = + this->yuv_palette[palette_index * 4 + 0]; + this->yuv_planes.u[row_ptr + pixel_ptr] = + this->yuv_palette[palette_index * 4 + 1]; + this->yuv_planes.v[row_ptr + pixel_ptr] = + this->yuv_palette[palette_index * 4 + 2]; + + } else if (this->bytes_per_pixel == 2) { + + /* ABGR1555 format, little-endian order */ + packed_pixel = _X_LE_16(&this->buf[buf_ptr]); + buf_ptr += 2; + UNPACK_BGR15(packed_pixel, r, g, b); + + this->yuv_planes.y[row_ptr + pixel_ptr] = + COMPUTE_Y(r, g, b); + this->yuv_planes.u[row_ptr + pixel_ptr] = + COMPUTE_U(r, g, b); + this->yuv_planes.v[row_ptr + pixel_ptr] = + COMPUTE_V(r, g, b); + + } else { + + /* BGR24 or BGRA32 */ + b = this->buf[buf_ptr++]; + g = this->buf[buf_ptr++]; + r = this->buf[buf_ptr++]; + + /* the next line takes care of 'A' in the 32-bit case */ + buf_ptr += this->bytes_per_pixel - 3; + + this->yuv_planes.y[row_ptr + pixel_ptr] = + COMPUTE_Y(r, g, b); + this->yuv_planes.u[row_ptr + pixel_ptr] = + COMPUTE_U(r, g, b); + this->yuv_planes.v[row_ptr + pixel_ptr] = + COMPUTE_V(r, g, b); + + } + } + } + } else { + + for (row_ptr = 0; row_ptr < this->yuv_planes.row_width * this->yuv_planes.row_count; row_ptr += this->yuv_planes.row_width) { + pixels_left = 0; + for (pixel_ptr = 0; pixel_ptr < this->width; pixel_ptr++) { + + if (this->bit_depth == 1) { + + if (pixels_left == 0) { + pixels_left = 8; + pixel_byte = *this->buf++; + } + + if (pixel_byte & 0x80) { + this->yuv_planes.y[row_ptr + pixel_ptr] = + this->yuv_palette[1 * 4 + 0]; + this->yuv_planes.u[row_ptr + pixel_ptr] = + this->yuv_palette[1 * 4 + 1]; + this->yuv_planes.v[row_ptr + pixel_ptr] = + this->yuv_palette[1 * 4 + 2]; + } else { + this->yuv_planes.y[row_ptr + pixel_ptr] = + this->yuv_palette[0 * 4 + 0]; + this->yuv_planes.u[row_ptr + pixel_ptr] = + this->yuv_palette[0 * 4 + 1]; + this->yuv_planes.v[row_ptr + pixel_ptr] = + this->yuv_palette[0 * 4 + 2]; + } + pixels_left--; + pixel_byte <<= 1; + + } else if (this->bit_depth == 2) { + + if (pixels_left == 0) { + pixels_left = 4; + pixel_byte = *this->buf++; + } + + palette_index = (pixel_byte & 0xC0) >> 6; + this->yuv_planes.y[row_ptr + pixel_ptr] = + this->yuv_palette[palette_index * 4 + 0]; + this->yuv_planes.u[row_ptr + pixel_ptr] = + this->yuv_palette[palette_index * 4 + 1]; + this->yuv_planes.v[row_ptr + pixel_ptr] = + this->yuv_palette[palette_index * 4 + 2]; + + pixels_left--; + pixel_byte <<= 2; + + } else if (this->bit_depth == 4) { + + if (pixels_left == 0) { + pixels_left = 2; + pixel_byte = *this->buf++; + } + + palette_index = (pixel_byte & 0xF0) >> 4; + this->yuv_planes.y[row_ptr + pixel_ptr] = + this->yuv_palette[palette_index * 4 + 0]; + this->yuv_planes.u[row_ptr + pixel_ptr] = + this->yuv_palette[palette_index * 4 + 1]; + this->yuv_planes.v[row_ptr + pixel_ptr] = + this->yuv_palette[palette_index * 4 + 2]; + + pixels_left--; + pixel_byte <<= 4; + + } else if (this->bytes_per_pixel == 1) { + + palette_index = this->buf[buf_ptr++]; + + this->yuv_planes.y[row_ptr + pixel_ptr] = + this->yuv_palette[palette_index * 4 + 0]; + this->yuv_planes.u[row_ptr + pixel_ptr] = + this->yuv_palette[palette_index * 4 + 1]; + this->yuv_planes.v[row_ptr + pixel_ptr] = + this->yuv_palette[palette_index * 4 + 2]; + + } else if (this->bytes_per_pixel == 2) { + + /* ARGB1555 format, big-endian order */ + packed_pixel = _X_BE_16(&this->buf[buf_ptr]); + buf_ptr += 2; + UNPACK_RGB15(packed_pixel, r, g, b); + + this->yuv_planes.y[row_ptr + pixel_ptr] = + COMPUTE_Y(r, g, b); + this->yuv_planes.u[row_ptr + pixel_ptr] = + COMPUTE_U(r, g, b); + this->yuv_planes.v[row_ptr + pixel_ptr] = + COMPUTE_V(r, g, b); + + } else { + + /* RGB24 or ARGB32; the next line takes care of 'A' in the + * 32-bit case */ + buf_ptr += this->bytes_per_pixel - 3; + + r = this->buf[buf_ptr++]; + g = this->buf[buf_ptr++]; + b = this->buf[buf_ptr++]; + + this->yuv_planes.y[row_ptr + pixel_ptr] = + COMPUTE_Y(r, g, b); + this->yuv_planes.u[row_ptr + pixel_ptr] = + COMPUTE_U(r, g, b); + this->yuv_planes.v[row_ptr + pixel_ptr] = + COMPUTE_V(r, g, b); + + } + } + } + } + + yuv444_to_yuy2(&this->yuv_planes, img->base[0], img->pitches[0]); + + img->draw(img, this->stream); + img->free(img); + + this->size = 0; + } + } +} + +/* + * This function is called when xine needs to flush the system. Not + * sure when or if this is used or even if it needs to do anything. + */ +static void rgb_flush (video_decoder_t *this_gen) { +} + +/* + * This function resets the video decoder. + */ +static void rgb_reset (video_decoder_t *this_gen) { + rgb_decoder_t *this = (rgb_decoder_t *) this_gen; + + this->size = 0; +} + +static void rgb_discontinuity (video_decoder_t *this_gen) { +} + +/* + * This function frees the video decoder instance allocated to the decoder. + */ +static void rgb_dispose (video_decoder_t *this_gen) { + rgb_decoder_t *this = (rgb_decoder_t *) this_gen; + + free (this->buf); + + if (this->decoder_ok) { + this->decoder_ok = 0; + this->stream->video_out->close(this->stream->video_out, this->stream); + } + + free (this_gen); +} + +static video_decoder_t *open_plugin (video_decoder_class_t *class_gen, xine_stream_t *stream) { + + rgb_decoder_t *this ; + + this = (rgb_decoder_t *) calloc(1, sizeof(rgb_decoder_t)); + + this->video_decoder.decode_data = rgb_decode_data; + this->video_decoder.flush = rgb_flush; + this->video_decoder.reset = rgb_reset; + this->video_decoder.discontinuity = rgb_discontinuity; + this->video_decoder.dispose = rgb_dispose; + this->size = 0; + + this->stream = stream; + this->class = (rgb_class_t *) class_gen; + + this->decoder_ok = 0; + this->buf = NULL; + + return &this->video_decoder; +} + +static void *init_plugin (xine_t *xine, void *data) { + + rgb_class_t *this; + + this = (rgb_class_t *) calloc(1, sizeof(rgb_class_t)); + + this->decoder_class.open_plugin = open_plugin; + this->decoder_class.identifier = "RGB"; + this->decoder_class.description = N_("Raw RGB video decoder plugin"); + this->decoder_class.dispose = default_video_decoder_class_dispose; + + return this; +} + +/* + * exported plugin catalog entry + */ + +static const uint32_t video_types[] = { + BUF_VIDEO_RGB, + 0 + }; + +static const decoder_info_t dec_info_video = { + video_types, /* supported types */ + 1 /* priority */ +}; + +const plugin_info_t xine_plugin_info[] EXPORTED = { + /* type, API, "name", version, special_info, init_function */ + { PLUGIN_VIDEO_DECODER, 19, "rgb", XINE_VERSION_CODE, &dec_info_video, init_plugin }, + { PLUGIN_NONE, 0, "", 0, NULL, NULL } +}; diff --git a/src/video_dec/yuv.c b/src/video_dec/yuv.c new file mode 100644 index 000000000..c1a8b1829 --- /dev/null +++ b/src/video_dec/yuv.c @@ -0,0 +1,377 @@ +/* + * Copyright (C) 2000-2004 the xine project + * + * This file is part of xine, a free video player. + * + * xine is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * xine is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110, USA + * + * YUV "Decoder" by Mike Melanson (melanson@pcisys.net) + * Actually, this decoder just reorganizes chunks of raw YUV data in such + * a way that xine can display them. + */ + +#ifdef HAVE_CONFIG_H +#include "config.h" +#endif + +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <sys/types.h> +#include <unistd.h> + +#include <xine/xine_internal.h> +#include <xine/video_out.h> +#include <xine/buffer.h> +#include <xine/xineutils.h> +#include "bswap.h" + +#define VIDEOBUFSIZE 128*1024 + +typedef struct { + video_decoder_class_t decoder_class; +} yuv_class_t; + +typedef struct yuv_decoder_s { + video_decoder_t video_decoder; /* parent video decoder structure */ + + yuv_class_t *class; + xine_stream_t *stream; + + /* these are traditional variables in a video decoder object */ + uint64_t video_step; /* frame duration in pts units */ + int decoder_ok; /* current decoder status */ + int skipframes; + + unsigned char *buf; /* the accumulated buffer data */ + int bufsize; /* the maximum size of buf */ + int size; /* the current size of buf */ + + int width; /* the width of a video frame */ + int height; /* the height of a video frame */ + double ratio; /* the width to height ratio */ + + int progressive; + int top_field_first; + +} yuv_decoder_t; + +/************************************************************************** + * xine video plugin functions + *************************************************************************/ + +/* + * This function receives a buffer of data from the demuxer layer and + * figures out how to handle it based on its header flags. + */ +static void yuv_decode_data (video_decoder_t *this_gen, + buf_element_t *buf) { + + yuv_decoder_t *this = (yuv_decoder_t *) this_gen; + xine_bmiheader *bih; + + vo_frame_t *img; /* video out frame */ + + /* a video decoder does not care about this flag (?) */ + if (buf->decoder_flags & BUF_FLAG_PREVIEW) + return; + + if (buf->decoder_flags & BUF_FLAG_FRAMERATE) { + this->video_step = buf->decoder_info[0]; + _x_stream_info_set(this->stream, XINE_STREAM_INFO_FRAME_DURATION, this->video_step); + } + + if (buf->decoder_flags & BUF_FLAG_STDHEADER) { /* need to initialize */ + (this->stream->video_out->open) (this->stream->video_out, this->stream); + + bih = (xine_bmiheader *) buf->content; + this->width = (bih->biWidth + 3) & ~0x03; + this->height = (bih->biHeight + 3) & ~0x03; + + if (buf->decoder_flags & BUF_FLAG_ASPECT) + this->ratio = (double)buf->decoder_info[1] / (double)buf->decoder_info[2]; + else + this->ratio = (double)this->width / (double)this->height; + + this->progressive = buf->decoder_info[3]; + this->top_field_first = buf->decoder_info[4]; + + free (this->buf); + this->buf = NULL; + + this->bufsize = VIDEOBUFSIZE; + this->buf = malloc(this->bufsize); + this->size = 0; + + this->decoder_ok = 1; + + /* load the stream/meta info */ + switch (buf->type) { + + case BUF_VIDEO_YUY2: + _x_meta_info_set_utf8(this->stream, XINE_META_INFO_VIDEOCODEC, "Raw YUY2"); + break; + + case BUF_VIDEO_YV12: + _x_meta_info_set_utf8(this->stream, XINE_META_INFO_VIDEOCODEC, "Raw YV12"); + break; + + case BUF_VIDEO_YVU9: + _x_meta_info_set_utf8(this->stream, XINE_META_INFO_VIDEOCODEC, "Raw YVU9"); + break; + + case BUF_VIDEO_GREY: + _x_meta_info_set_utf8(this->stream, XINE_META_INFO_VIDEOCODEC, "Greyscale YUV"); + break; + + case BUF_VIDEO_I420: + _x_meta_info_set_utf8(this->stream, XINE_META_INFO_VIDEOCODEC, "Raw I420"); + break; + + } + + _x_stream_info_set(this->stream, XINE_STREAM_INFO_VIDEO_WIDTH, this->width); + _x_stream_info_set(this->stream, XINE_STREAM_INFO_VIDEO_HEIGHT, this->height); + _x_stream_info_set(this->stream, XINE_STREAM_INFO_VIDEO_RATIO, this->ratio*10000); + + return; + } else if (this->decoder_ok && !(buf->decoder_flags & BUF_FLAG_SPECIAL)) { + uint8_t *src; + + /* if buffer contains an entire frame then there's no need to copy it + * into our internal buffer */ + if ((buf->decoder_flags & BUF_FLAG_FRAME_START) && + (buf->decoder_flags & BUF_FLAG_FRAME_END)) + src = buf->content; + else { + if (this->size + buf->size > this->bufsize) { + this->bufsize = this->size + 2 * buf->size; + this->buf = realloc (this->buf, this->bufsize); + } + + xine_fast_memcpy (&this->buf[this->size], buf->content, buf->size); + + this->size += buf->size; + + src = this->buf; + } + + if (buf->decoder_flags & BUF_FLAG_FRAME_END) { + + if (buf->type == BUF_VIDEO_YUY2) { + + img = this->stream->video_out->get_frame (this->stream->video_out, + this->width, this->height, + this->ratio, XINE_IMGFMT_YUY2, VO_BOTH_FIELDS); + + yuy2_to_yuy2( + /* src */ + src, this->width*2, + /* dst */ + img->base[0], img->pitches[0], + /* width x height */ + this->width, this->height); + + } else if (buf->type == BUF_VIDEO_YV12) { + + img = this->stream->video_out->get_frame (this->stream->video_out, + this->width, this->height, + this->ratio, XINE_IMGFMT_YV12, VO_BOTH_FIELDS); + + yv12_to_yv12( + /* Y */ + src, this->width, + img->base[0], img->pitches[0], + /* U */ + src + (this->width * this->height * 5/4), this->width/2, + img->base[1], img->pitches[1], + /* V */ + src + (this->width * this->height), this->width/2, + img->base[2], img->pitches[2], + /* width x height */ + this->width, this->height); + + } else if (buf->type == BUF_VIDEO_I420) { + + img = this->stream->video_out->get_frame (this->stream->video_out, + this->width, this->height, + this->ratio, XINE_IMGFMT_YV12, VO_BOTH_FIELDS); + + yv12_to_yv12( + /* Y */ + src, this->width, + img->base[0], img->pitches[0], + /* U */ + src + (this->width * this->height), this->width/2, + img->base[1], img->pitches[1], + /* V */ + src + (this->width * this->height * 5/4), this->width/2, + img->base[2], img->pitches[2], + /* width x height */ + this->width, this->height); + + } else if (buf->type == BUF_VIDEO_YVU9) { + + img = this->stream->video_out->get_frame (this->stream->video_out, + this->width, this->height, + this->ratio, XINE_IMGFMT_YV12, VO_BOTH_FIELDS); + + + yuv9_to_yv12( + /* Y */ + src, + this->width, + img->base[0], + img->pitches[0], + /* U */ + src + (this->width * this->height), + this->width / 4, + img->base[1], + img->pitches[1], + /* V */ + src + (this->width * this->height) + + (this->width * this->height / 16), + this->width / 4, + img->base[2], + img->pitches[2], + /* width x height */ + this->width, + this->height); + + } else if (buf->type == BUF_VIDEO_GREY) { + + img = this->stream->video_out->get_frame (this->stream->video_out, + this->width, this->height, + this->ratio, XINE_IMGFMT_YV12, VO_BOTH_FIELDS); + + xine_fast_memcpy(img->base[0], src, this->width * this->height); + memset( img->base[1], 0x80, this->width * this->height / 4 ); + memset( img->base[2], 0x80, this->width * this->height / 4 ); + + } else { + + /* just allocate something to avoid compiler warnings */ + img = this->stream->video_out->get_frame (this->stream->video_out, + this->width, this->height, + this->ratio, XINE_IMGFMT_YV12, VO_BOTH_FIELDS); + + } + + img->duration = this->video_step; + img->pts = buf->pts; + img->bad_frame = 0; + + img->draw(img, this->stream); + img->free(img); + + this->size = 0; + } + } +} + +/* + * This function is called when xine needs to flush the system. Not + * sure when or if this is used or even if it needs to do anything. + */ +static void yuv_flush (video_decoder_t *this_gen) { +} + +/* + * This function resets the video decoder. + */ +static void yuv_reset (video_decoder_t *this_gen) { + yuv_decoder_t *this = (yuv_decoder_t *) this_gen; + + this->size = 0; +} + +static void yuv_discontinuity (video_decoder_t *this_gen) { +} + +/* + * This function frees the video decoder instance allocated to the decoder. + */ +static void yuv_dispose (video_decoder_t *this_gen) { + yuv_decoder_t *this = (yuv_decoder_t *) this_gen; + + free (this->buf); + + if (this->decoder_ok) { + this->decoder_ok = 0; + this->stream->video_out->close(this->stream->video_out, this->stream); + } + + free (this_gen); +} + +static video_decoder_t *open_plugin (video_decoder_class_t *class_gen, xine_stream_t *stream) { + + yuv_decoder_t *this ; + + this = (yuv_decoder_t *) calloc(1, sizeof(yuv_decoder_t)); + + this->video_decoder.decode_data = yuv_decode_data; + this->video_decoder.flush = yuv_flush; + this->video_decoder.reset = yuv_reset; + this->video_decoder.discontinuity = yuv_discontinuity; + this->video_decoder.dispose = yuv_dispose; + this->size = 0; + + this->stream = stream; + this->class = (yuv_class_t *) class_gen; + + this->decoder_ok = 0; + this->buf = NULL; + + return &this->video_decoder; +} + +static void *init_plugin (xine_t *xine, void *data) { + + yuv_class_t *this; + + this = (yuv_class_t *) calloc(1, sizeof(yuv_class_t)); + + this->decoder_class.open_plugin = open_plugin; + this->decoder_class.identifier = "YUV"; + this->decoder_class.description = N_("Raw YUV video decoder plugin"); + this->decoder_class.dispose = default_video_decoder_class_dispose; + + return this; +} + +/* + * exported plugin catalog entry + */ + +static const uint32_t video_types[] = { + BUF_VIDEO_YUY2, + BUF_VIDEO_YV12, + BUF_VIDEO_YVU9, + BUF_VIDEO_GREY, + BUF_VIDEO_I420, + 0 + }; + +static const decoder_info_t dec_info_video = { + video_types, /* supported types */ + 1 /* priority */ +}; + +const plugin_info_t xine_plugin_info[] EXPORTED = { + /* type, API, "name", version, special_info, init_function */ + { PLUGIN_VIDEO_DECODER, 19, "yuv", XINE_VERSION_CODE, &dec_info_video, init_plugin }, + { PLUGIN_NONE, 0, "", 0, NULL, NULL } +}; |