diff options
115 files changed, 27562 insertions, 6551 deletions
@@ -12,7 +12,7 @@ updates (the word 'maintainer' is intentionally avoided here). project version mediator ----------------------------------------------------------------------- -ffmpeg build 4715 Mike Melanson +ffmpeg build 4752 Mike Melanson goom 2k4-dev21 gsm610 1.0.10 Mike Melanson liba52 0.7.4 diff --git a/src/libffmpeg/libavcodec/4xm.c b/src/libffmpeg/libavcodec/4xm.c index fd84f8968..6932d52ab 100644 --- a/src/libffmpeg/libavcodec/4xm.c +++ b/src/libffmpeg/libavcodec/4xm.c @@ -220,17 +220,12 @@ static void idct(DCTELEM block[64]){ } static void init_vlcs(FourXContext *f){ - static int done = 0; int i; - if (!done) { - done = 1; - - for(i=0; i<4; i++){ - init_vlc(&block_type_vlc[i], BLOCK_TYPE_VLC_BITS, 7, - &block_type_tab[i][0][1], 2, 1, - &block_type_tab[i][0][0], 2, 1); - } + for(i=0; i<4; i++){ + init_vlc(&block_type_vlc[i], BLOCK_TYPE_VLC_BITS, 7, + &block_type_tab[i][0][1], 2, 1, + &block_type_tab[i][0][0], 2, 1, 1); } } @@ -328,13 +323,19 @@ static int decode_p_frame(FourXContext *f, uint8_t *buf, int length){ uint16_t *src= (uint16_t*)f->last_picture.data[0]; uint16_t *dst= (uint16_t*)f->current_picture.data[0]; const int stride= f->current_picture.linesize[0]>>1; - const int bitstream_size= get32(buf+8); - const int bytestream_size= get32(buf+16); - const int wordstream_size= get32(buf+12); + const unsigned int bitstream_size= get32(buf+8); + const unsigned int bytestream_size= get32(buf+16); + const unsigned int wordstream_size= get32(buf+12); - if(bitstream_size+ bytestream_size+ wordstream_size + 20 != length) + if(bitstream_size+ bytestream_size+ wordstream_size + 20 != length + || bitstream_size > (1<<26) + || bytestream_size > (1<<26) + || wordstream_size > (1<<26) + ){ av_log(f->avctx, AV_LOG_ERROR, "lengths %d %d %d %d\n", bitstream_size, bytestream_size, wordstream_size, bitstream_size+ bytestream_size+ wordstream_size - length); + return -1; + } f->bitstream_buffer= av_fast_realloc(f->bitstream_buffer, &f->bitstream_buffer_size, bitstream_size + FF_INPUT_BUFFER_PADDING_SIZE); f->dsp.bswap_buf((uint32_t*)f->bitstream_buffer, (uint32_t*)(buf + 20), bitstream_size/4); @@ -544,7 +545,7 @@ static uint8_t *read_huffman_tables(FourXContext *f, uint8_t * const buf){ init_vlc(&f->pre_vlc, ACDC_VLC_BITS, 257, len_tab , 1, 1, - bits_tab, 4, 4); + bits_tab, 4, 4, 0); return ptr; } @@ -555,13 +556,17 @@ static int decode_i_frame(FourXContext *f, uint8_t *buf, int length){ const int height= f->avctx->height; uint16_t *dst= (uint16_t*)f->current_picture.data[0]; const int stride= f->current_picture.linesize[0]>>1; - const int bitstream_size= get32(buf); + const unsigned int bitstream_size= get32(buf); const int token_count __attribute__((unused)) = get32(buf + bitstream_size + 8); - int prestream_size= 4*get32(buf + bitstream_size + 4); + unsigned int prestream_size= 4*get32(buf + bitstream_size + 4); uint8_t *prestream= buf + bitstream_size + 12; - if(prestream_size + bitstream_size + 12 != length) + if(prestream_size + bitstream_size + 12 != length + || bitstream_size > (1<<26) + || prestream_size > (1<<26)){ av_log(f->avctx, AV_LOG_ERROR, "size missmatch %d %d %d\n", prestream_size, bitstream_size, length); + return -1; + } prestream= read_huffman_tables(f, prestream); @@ -600,11 +605,6 @@ static int decode_frame(AVCodecContext *avctx, AVFrame *p, temp; int i, frame_4cc, frame_size; - /* special case for last picture */ - if (buf_size == 0) { - return 0; - } - frame_4cc= get32(buf); if(buf_size != get32(buf+4)+8){ av_log(f->avctx, AV_LOG_ERROR, "size missmatch %d %d\n", buf_size, get32(buf+4)); diff --git a/src/libffmpeg/libavcodec/8bps.c b/src/libffmpeg/libavcodec/8bps.c index 9509f42ad..3898ac5dd 100644 --- a/src/libffmpeg/libavcodec/8bps.c +++ b/src/libffmpeg/libavcodec/8bps.c @@ -61,7 +61,7 @@ static int decode_frame(AVCodecContext *avctx, void *data, int *data_size, uint8 { EightBpsContext * const c = (EightBpsContext *)avctx->priv_data; unsigned char *encoded = (unsigned char *)buf; - unsigned char *pixptr; + unsigned char *pixptr, *pixptr_end; unsigned int height = avctx->height; // Real image height unsigned int dlen, p, row; unsigned char *lp, *dp; @@ -70,11 +70,6 @@ static int decode_frame(AVCodecContext *avctx, void *data, int *data_size, uint8 unsigned int planes = c->planes; unsigned char *planemap = c->planemap; - - /* no supplementary picture */ - if (buf_size == 0) - return 0; - if(c->pic.data[0]) avctx->release_buffer(avctx, &c->pic); @@ -101,18 +96,23 @@ static int decode_frame(AVCodecContext *avctx, void *data, int *data_size, uint8 /* Decode a plane */ for(row = 0; row < height; row++) { pixptr = c->pic.data[0] + row * c->pic.linesize[0] + planemap[p]; + pixptr_end = pixptr + c->pic.linesize[0]; dlen = be2me_16(*(unsigned short *)(lp+row*2)); /* Decode a row of this plane */ while(dlen > 0) { if ((count = *dp++) <= 127) { count++; dlen -= count + 1; + if (pixptr + count * px_inc > pixptr_end) + break; while(count--) { *pixptr = *dp++; pixptr += px_inc; } } else { count = 257 - count; + if (pixptr + count * px_inc > pixptr_end) + break; while(count--) { *pixptr = *dp; pixptr += px_inc; @@ -155,6 +155,10 @@ static int decode_init(AVCodecContext *avctx) c->pic.data[0] = NULL; + if (avcodec_check_dimensions(avctx, avctx->width, avctx->height) < 0) { + return 1; + } + switch (avctx->bits_per_sample) { case 8: avctx->pix_fmt = PIX_FMT_PAL8; diff --git a/src/libffmpeg/libavcodec/Makefile.am b/src/libffmpeg/libavcodec/Makefile.am index 5d9c0d2d0..8b4ae4fef 100644 --- a/src/libffmpeg/libavcodec/Makefile.am +++ b/src/libffmpeg/libavcodec/Makefile.am @@ -17,11 +17,13 @@ noinst_LTLIBRARIES = libavcodec.la libavcodec_la_SOURCES = \ 4xm.c \ 8bps.c \ + aasc.c \ adpcm.c \ + alac.c \ asv1.c \ + bitstream.c \ cabac.c \ cinepak.c \ - common.c \ cyuv.c \ dpcm.c \ dsputil.c \ @@ -33,9 +35,11 @@ libavcodec_la_SOURCES = \ flicvideo.c \ fft.c \ golomb.c \ + h261.c \ h263.c \ h263dec.c \ h264.c \ + h264idct.c \ huffyuv.c \ idcinvideo.c \ imgconvert.c \ @@ -46,6 +50,7 @@ libavcodec_la_SOURCES = \ jfdctint.c \ jrevdct.c \ lcl.c \ + loco.c \ mdct.c \ mace.c \ mem.c \ @@ -57,21 +62,27 @@ libavcodec_la_SOURCES = \ msmpeg4.c \ msrle.c \ msvideo1.c \ - opts.c \ parser.c \ pcm.c \ + qdrw.c \ + qpeg.c \ qtrle.c \ ra144.c \ ra288.c \ + rangecoder.c \ ratecontrol.c \ rational.c \ roqvideo.c \ rpza.c \ rv10.c \ + shorten.c \ simple_idct.c \ smc.c \ + snow.c \ svq1.c \ + tscc.c \ truemotion1.c \ + ulti.c \ utils.c \ vcr1.c \ vmdav.c \ @@ -79,7 +90,9 @@ libavcodec_la_SOURCES = \ vp3dsp.c \ vqavideo.c \ wmadec.c \ - xan.c + wnv1.c \ + xan.c \ + xl.c libavcodec_la_LDFLAGS = \ $(top_builddir)/src/libffmpeg/libavcodec/armv4l/libavcodec_armv4l.la \ @@ -92,6 +105,7 @@ libavcodec_la_LDFLAGS = \ noinst_HEADERS = \ avcodec.h \ + bitstream.h \ bswap.h \ cabac.h \ common.h \ @@ -103,6 +117,7 @@ noinst_HEADERS = \ imgconvert_template.h \ indeo3data.h \ integer.h \ + h261data.h \ h263data.h \ h264data.h \ mpeg4data.h \ @@ -114,11 +129,13 @@ noinst_HEADERS = \ msmpeg4data.h \ ra144.h \ ra288.h \ + rangecoder.h \ rational.h \ simple_idct.h \ sp5x.h \ svq1_cb.h \ svq1_vlc.h \ truemotion1data.h \ + ulti_cb.h \ vp3data.h \ wmadata.h diff --git a/src/libffmpeg/libavcodec/aasc.c b/src/libffmpeg/libavcodec/aasc.c new file mode 100644 index 000000000..d2419e98c --- /dev/null +++ b/src/libffmpeg/libavcodec/aasc.c @@ -0,0 +1,174 @@ +/* + * Autodesc RLE Decoder + * Copyright (C) 2005 the ffmpeg project + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +/** + * @file aasc.c + * Autodesc RLE Video Decoder by Konstantin Shishkov + */ + +#include <stdio.h> +#include <stdlib.h> +#include <string.h> + +#include "common.h" +#include "avcodec.h" +#include "dsputil.h" + +typedef struct AascContext { + AVCodecContext *avctx; + AVFrame frame; +} AascContext; + +#define FETCH_NEXT_STREAM_BYTE() \ + if (stream_ptr >= buf_size) \ + { \ + av_log(s->avctx, AV_LOG_ERROR, " AASC: stream ptr just went out of bounds (fetch)\n"); \ + break; \ + } \ + stream_byte = buf[stream_ptr++]; + +static int aasc_decode_init(AVCodecContext *avctx) +{ + AascContext *s = (AascContext *)avctx->priv_data; + + s->avctx = avctx; + + avctx->pix_fmt = PIX_FMT_BGR24; + avctx->has_b_frames = 0; + s->frame.data[0] = NULL; + + return 0; +} + +static int aasc_decode_frame(AVCodecContext *avctx, + void *data, int *data_size, + uint8_t *buf, int buf_size) +{ + AascContext *s = (AascContext *)avctx->priv_data; + int stream_ptr = 4; + unsigned char rle_code; + unsigned char stream_byte; + int pixel_ptr = 0; + int row_dec, row_ptr; + int frame_size; + int i; + + s->frame.reference = 1; + s->frame.buffer_hints = FF_BUFFER_HINTS_VALID | FF_BUFFER_HINTS_PRESERVE | FF_BUFFER_HINTS_REUSABLE; + if (avctx->reget_buffer(avctx, &s->frame)) { + av_log(avctx, AV_LOG_ERROR, "reget_buffer() failed\n"); + return -1; + } + + row_dec = s->frame.linesize[0]; + row_ptr = (s->avctx->height - 1) * row_dec; + frame_size = row_dec * s->avctx->height; + + while (row_ptr >= 0) { + FETCH_NEXT_STREAM_BYTE(); + rle_code = stream_byte; + if (rle_code == 0) { + /* fetch the next byte to see how to handle escape code */ + FETCH_NEXT_STREAM_BYTE(); + if (stream_byte == 0) { + /* line is done, goto the next one */ + row_ptr -= row_dec; + pixel_ptr = 0; + } else if (stream_byte == 1) { + /* decode is done */ + break; + } else if (stream_byte == 2) { + /* reposition frame decode coordinates */ + FETCH_NEXT_STREAM_BYTE(); + pixel_ptr += stream_byte; + FETCH_NEXT_STREAM_BYTE(); + row_ptr -= stream_byte * row_dec; + } else { + /* copy pixels from encoded stream */ + if ((pixel_ptr + stream_byte > avctx->width * 3) || + (row_ptr < 0)) { + av_log(s->avctx, AV_LOG_ERROR, " AASC: frame ptr just went out of bounds (copy1)\n"); + break; + } + + rle_code = stream_byte; + if (stream_ptr + rle_code > buf_size) { + av_log(s->avctx, AV_LOG_ERROR, " AASC: stream ptr just went out of bounds (copy2)\n"); + break; + } + + for (i = 0; i < rle_code; i++) { + FETCH_NEXT_STREAM_BYTE(); + s->frame.data[0][row_ptr + pixel_ptr] = stream_byte; + pixel_ptr++; + } + if (rle_code & 1) + stream_ptr++; + } + } else { + /* decode a run of data */ + if ((pixel_ptr + rle_code > avctx->width * 3) || + (row_ptr < 0)) { + av_log(s->avctx, AV_LOG_ERROR, " AASC: frame ptr just went out of bounds (run1)\n"); + break; + } + + FETCH_NEXT_STREAM_BYTE(); + + while(rle_code--) { + s->frame.data[0][row_ptr + pixel_ptr] = stream_byte; + pixel_ptr++; + } + } + } + + /* one last sanity check on the way out */ + if (stream_ptr < buf_size) + av_log(s->avctx, AV_LOG_ERROR, " AASC: ended frame decode with bytes left over (%d < %d)\n", + stream_ptr, buf_size); + + *data_size = sizeof(AVFrame); + *(AVFrame*)data = s->frame; + + /* report that the buffer was completely consumed */ + return buf_size; +} + +static int aasc_decode_end(AVCodecContext *avctx) +{ + AascContext *s = (AascContext *)avctx->priv_data; + + /* release the last frame */ + if (s->frame.data[0]) + avctx->release_buffer(avctx, &s->frame); + + return 0; +} + +AVCodec aasc_decoder = { + "aasc", + CODEC_TYPE_VIDEO, + CODEC_ID_AASC, + sizeof(AascContext), + aasc_decode_init, + NULL, + aasc_decode_end, + aasc_decode_frame, + CODEC_CAP_DR1, +}; diff --git a/src/libffmpeg/libavcodec/adpcm.c b/src/libffmpeg/libavcodec/adpcm.c index 0755e24fe..043c4d4b2 100644 --- a/src/libffmpeg/libavcodec/adpcm.c +++ b/src/libffmpeg/libavcodec/adpcm.c @@ -17,6 +17,7 @@ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ #include "avcodec.h" +#include "bitstream.h" /** * @file adpcm.c @@ -103,6 +104,19 @@ static int ea_adpcm_table[] = { 3, 4, 7, 8, 10, 11, 0, -1, -3, -4 }; +static int ct_adpcm_table[8] = { + 0x00E6, 0x00E6, 0x00E6, 0x00E6, + 0x0133, 0x0199, 0x0200, 0x0266 +}; + +// padded to zero where table size is less then 16 +static int swf_index_tables[4][16] = { + /*2*/ { -1, 2 }, + /*3*/ { -1, -1, 2, 4 }, + /*4*/ { -1, -1, -1, -1, 2, 4, 6, 8 }, + /*5*/ { -1, -1, -1, -1, -1, -1, -1, -1, 1, 2, 4, 6, 8, 10, 13, 16 } +}; + /* end of tables */ typedef struct ADPCMChannelStatus { @@ -124,6 +138,10 @@ typedef struct ADPCMContext { int channel; /* for stereo MOVs, decode left, then decode right, then tell it's decoded */ ADPCMChannelStatus status[2]; short sample_buffer[32]; /* hold left samples while waiting for right samples */ + + /* SWF only */ + int nb_bits; + int nb_samples; } ADPCMContext; /* XXX: implement encoding */ @@ -361,6 +379,9 @@ static int adpcm_decode_init(AVCodecContext * avctx) c->status[0].step = c->status[1].step = 0; switch(avctx->codec->id) { + case CODEC_ID_ADPCM_CT: + c->status[0].step = c->status[1].step = 511; + break; default: break; } @@ -411,6 +432,37 @@ static inline short adpcm_ms_expand_nibble(ADPCMChannelStatus *c, char nibble) return (short)predictor; } +static inline short adpcm_ct_expand_nibble(ADPCMChannelStatus *c, char nibble) +{ + int predictor; + int sign, delta, diff; + int new_step; + + sign = nibble & 8; + delta = nibble & 7; + /* perform direct multiplication instead of series of jumps proposed by + * the reference ADPCM implementation since modern CPUs can do the mults + * quickly enough */ + diff = ((2 * delta + 1) * c->step) >> 3; + predictor = c->predictor; + /* predictor update is not so trivial: predictor is multiplied on 254/256 before updating */ + if(sign) + predictor = ((predictor * 254) >> 8) - diff; + else + predictor = ((predictor * 254) >> 8) + diff; + /* calculate new step and clamp it to range 511..32767 */ + new_step = (ct_adpcm_table[nibble & 7] * c->step) >> 8; + c->step = new_step; + if(c->step < 511) + c->step = 511; + if(c->step > 32767) + c->step = 32767; + + CLAMP_TO_SHORT(predictor); + c->predictor = predictor; + return (short)predictor; +} + static void xa_decode(short *out, const unsigned char *in, ADPCMChannelStatus *left, ADPCMChannelStatus *right, int inc) { @@ -840,6 +892,92 @@ static int adpcm_decode_frame(AVCodecContext *avctx, src++; } break; + case CODEC_ID_ADPCM_CT: + while (src < buf + buf_size) { + if (st) { + *samples++ = adpcm_ct_expand_nibble(&c->status[0], + (src[0] >> 4) & 0x0F); + *samples++ = adpcm_ct_expand_nibble(&c->status[1], + src[0] & 0x0F); + } else { + *samples++ = adpcm_ct_expand_nibble(&c->status[0], + (src[0] >> 4) & 0x0F); + *samples++ = adpcm_ct_expand_nibble(&c->status[0], + src[0] & 0x0F); + } + src++; + } + break; + case CODEC_ID_ADPCM_SWF: + { + GetBitContext gb; + int *table; + int k0, signmask; + int size = buf_size*8; + + init_get_bits(&gb, buf, size); + + // first frame, read bits & inital values + if (!c->nb_bits) + { + c->nb_bits = get_bits(&gb, 2)+2; +// av_log(NULL,AV_LOG_INFO,"nb_bits: %d\n", c->nb_bits); + } + + table = swf_index_tables[c->nb_bits-2]; + k0 = 1 << (c->nb_bits-2); + signmask = 1 << (c->nb_bits-1); + + while (get_bits_count(&gb) <= size) + { + int i; + + c->nb_samples++; + // wrap around at every 4096 samples... + if ((c->nb_samples & 0xfff) == 1) + { + for (i = 0; i <= st; i++) + { + *samples++ = c->status[i].predictor = get_sbits(&gb, 16); + c->status[i].step_index = get_bits(&gb, 6); + } + } + + // similar to IMA adpcm + for (i = 0; i <= st; i++) + { + int delta = get_bits(&gb, c->nb_bits); + int step = step_table[c->status[i].step_index]; + long vpdiff = 0; // vpdiff = (delta+0.5)*step/4 + int k = k0; + + do { + if (delta & k) + vpdiff += step; + step >>= 1; + k >>= 1; + } while(k); + vpdiff += step; + + if (delta & signmask) + c->status[i].predictor -= vpdiff; + else + c->status[i].predictor += vpdiff; + + c->status[i].step_index += table[delta & (~signmask)]; + + c->status[i].step_index = clip(c->status[i].step_index, 0, 88); + c->status[i].predictor = clip(c->status[i].predictor, -32768, 32767); + + *samples++ = c->status[i].predictor; + } + } + +// src += get_bits_count(&gb)*8; + src += size; + + break; + } default: return -1; } @@ -895,5 +1033,7 @@ ADPCM_CODEC(CODEC_ID_ADPCM_4XM, adpcm_4xm); ADPCM_CODEC(CODEC_ID_ADPCM_XA, adpcm_xa); ADPCM_CODEC(CODEC_ID_ADPCM_ADX, adpcm_adx); ADPCM_CODEC(CODEC_ID_ADPCM_EA, adpcm_ea); +ADPCM_CODEC(CODEC_ID_ADPCM_CT, adpcm_ct); +ADPCM_CODEC(CODEC_ID_ADPCM_SWF, adpcm_swf); #undef ADPCM_CODEC diff --git a/src/libffmpeg/libavcodec/adx.c b/src/libffmpeg/libavcodec/adx.c index e41a75726..a52575c13 100644 --- a/src/libffmpeg/libavcodec/adx.c +++ b/src/libffmpeg/libavcodec/adx.c @@ -314,6 +314,7 @@ static int adx_decode_init(AVCodecContext * avctx) return 0; } +#if 0 static void dump(unsigned char *buf,size_t len) { int i; @@ -324,6 +325,8 @@ static void dump(unsigned char *buf,size_t len) } av_log(NULL, AV_LOG_ERROR, "\n"); } +#endif + static int adx_decode_frame(AVCodecContext *avctx, void *data, int *data_size, uint8_t *buf0, int buf_size) diff --git a/src/libffmpeg/libavcodec/alac.c b/src/libffmpeg/libavcodec/alac.c new file mode 100644 index 000000000..5ae2e00f4 --- /dev/null +++ b/src/libffmpeg/libavcodec/alac.c @@ -0,0 +1,833 @@ +/* + * ALAC (Apple Lossless Audio Codec) decoder + * Copyright (c) 2005 David Hammerton + * All rights reserved. + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +/** + * @file alac.c + * ALAC (Apple Lossless Audio Codec) decoder + * @author 2005 David Hammerton + * + * For more information on the ALAC format, visit: + * http://crazney.net/programs/itunes/alac.html + * + * Note: This decoder expects a 36- (0x24-)byte QuickTime atom to be + * passed through the extradata[_size] fields. This atom is tacked onto + * the end of an 'alac' stsd atom and has the following format: + * bytes 0-3 atom size (0x24), big-endian + * bytes 4-7 atom type ('alac', not the 'alac' tag from start of stsd) + * bytes 8-35 data bytes needed by decoder + */ + + +#include "avcodec.h" +#include "bitstream.h" + +#define ALAC_EXTRADATA_SIZE 36 + +typedef struct { + + AVCodecContext *avctx; + GetBitContext gb; + /* init to 0; first frame decode should initialize from extradata and + * set this to 1 */ + int context_initialized; + + int samplesize; + int numchannels; + int bytespersample; + + /* buffers */ + int32_t *predicterror_buffer_a; + int32_t *predicterror_buffer_b; + + int32_t *outputsamples_buffer_a; + int32_t *outputsamples_buffer_b; + + /* stuff from setinfo */ + uint32_t setinfo_max_samples_per_frame; /* 0x1000 = 4096 */ /* max samples per frame? */ + uint8_t setinfo_7a; /* 0x00 */ + uint8_t setinfo_sample_size; /* 0x10 */ + uint8_t setinfo_rice_historymult; /* 0x28 */ + uint8_t setinfo_rice_initialhistory; /* 0x0a */ + uint8_t setinfo_rice_kmodifier; /* 0x0e */ + uint8_t setinfo_7f; /* 0x02 */ + uint16_t setinfo_80; /* 0x00ff */ + uint32_t setinfo_82; /* 0x000020e7 */ + uint32_t setinfo_86; /* 0x00069fe4 */ + uint32_t setinfo_8a_rate; /* 0x0000ac44 */ + /* end setinfo stuff */ + +} ALACContext; + +static void allocate_buffers(ALACContext *alac) +{ + alac->predicterror_buffer_a = av_malloc(alac->setinfo_max_samples_per_frame * 4); + alac->predicterror_buffer_b = av_malloc(alac->setinfo_max_samples_per_frame * 4); + + alac->outputsamples_buffer_a = av_malloc(alac->setinfo_max_samples_per_frame * 4); + alac->outputsamples_buffer_b = av_malloc(alac->setinfo_max_samples_per_frame * 4); +} + +void alac_set_info(ALACContext *alac) +{ + unsigned char *ptr = alac->avctx->extradata; + + ptr += 4; /* size */ + ptr += 4; /* alac */ + ptr += 4; /* 0 ? */ + + alac->setinfo_max_samples_per_frame = BE_32(ptr); /* buffer size / 2 ? */ + ptr += 4; + alac->setinfo_7a = *ptr++; + alac->setinfo_sample_size = *ptr++; + alac->setinfo_rice_historymult = *ptr++; + alac->setinfo_rice_initialhistory = *ptr++; + alac->setinfo_rice_kmodifier = *ptr++; + alac->setinfo_7f = *ptr++; + alac->setinfo_80 = BE_16(ptr); + ptr += 2; + alac->setinfo_82 = BE_32(ptr); + ptr += 4; + alac->setinfo_86 = BE_32(ptr); + ptr += 4; + alac->setinfo_8a_rate = BE_32(ptr); + ptr += 4; + + allocate_buffers(alac); +} + +/* hideously inefficient. could use a bitmask search, + * alternatively bsr on x86, + */ +static int count_leading_zeros(int32_t input) +{ + int i = 0; + while (!(0x80000000 & input) && i < 32) { + i++; + input = input << 1; + } + return i; +} + +void bastardized_rice_decompress(ALACContext *alac, + int32_t *output_buffer, + int output_size, + int readsamplesize, /* arg_10 */ + int rice_initialhistory, /* arg424->b */ + int rice_kmodifier, /* arg424->d */ + int rice_historymult, /* arg424->c */ + int rice_kmodifier_mask /* arg424->e */ + ) +{ + int output_count; + unsigned int history = rice_initialhistory; + int sign_modifier = 0; + + for (output_count = 0; output_count < output_size; output_count++) { + int32_t x = 0; + int32_t x_modified; + int32_t final_val; + + /* read x - number of 1s before 0 represent the rice */ + while (x <= 8 && get_bits1(&alac->gb)) { + x++; + } + + + if (x > 8) { /* RICE THRESHOLD */ + /* use alternative encoding */ + int32_t value; + + value = get_bits(&alac->gb, readsamplesize); + + /* mask value to readsamplesize size */ + if (readsamplesize != 32) + value &= (0xffffffff >> (32 - readsamplesize)); + + x = value; + } else { + /* standard rice encoding */ + int extrabits; + int k; /* size of extra bits */ + + /* read k, that is bits as is */ + k = 31 - rice_kmodifier - count_leading_zeros((history >> 9) + 3); + + if (k < 0) + k += rice_kmodifier; + else + k = rice_kmodifier; + + if (k != 1) { + extrabits = show_bits(&alac->gb, k); + + /* multiply x by 2^k - 1, as part of their strange algorithm */ + x = (x << k) - x; + + if (extrabits > 1) { + x += extrabits - 1; + get_bits(&alac->gb, k); + } else { + get_bits(&alac->gb, k - 1); + } + } + } + + x_modified = sign_modifier + x; + final_val = (x_modified + 1) / 2; + if (x_modified & 1) final_val *= -1; + + output_buffer[output_count] = final_val; + + sign_modifier = 0; + + /* now update the history */ + history += (x_modified * rice_historymult) + - ((history * rice_historymult) >> 9); + + if (x_modified > 0xffff) + history = 0xffff; + + /* special case: there may be compressed blocks of 0 */ + if ((history < 128) && (output_count+1 < output_size)) { + int block_size; + + sign_modifier = 1; + + x = 0; + while (x <= 8 && get_bits1(&alac->gb)) { + x++; + } + + if (x > 8) { + block_size = get_bits(&alac->gb, 16); + block_size &= 0xffff; + } else { + int k; + int extrabits; + + k = count_leading_zeros(history) + ((history + 16) >> 6 /* / 64 */) - 24; + + extrabits = show_bits(&alac->gb, k); + + block_size = (((1 << k) - 1) & rice_kmodifier_mask) * x + + extrabits - 1; + + if (extrabits < 2) { + x = 1 - extrabits; + block_size += x; + get_bits(&alac->gb, k - 1); + } else { + get_bits(&alac->gb, k); + } + } + + if (block_size > 0) { + memset(&output_buffer[output_count+1], 0, block_size * 4); + output_count += block_size; + + } + + if (block_size > 0xffff) + sign_modifier = 0; + + history = 0; + } + } +} + +#define SIGN_EXTENDED32(val, bits) ((val << (32 - bits)) >> (32 - bits)) + +#define SIGN_ONLY(v) \ + ((v < 0) ? (-1) : \ + ((v > 0) ? (1) : \ + (0))) + +static void predictor_decompress_fir_adapt(int32_t *error_buffer, + int32_t *buffer_out, + int output_size, + int readsamplesize, + int16_t *predictor_coef_table, + int predictor_coef_num, + int predictor_quantitization) +{ + int i; + + /* first sample always copies */ + *buffer_out = *error_buffer; + + if (!predictor_coef_num) { + if (output_size <= 1) return; + memcpy(buffer_out+1, error_buffer+1, (output_size-1) * 4); + return; + } + + if (predictor_coef_num == 0x1f) { /* 11111 - max value of predictor_coef_num */ + /* second-best case scenario for fir decompression, + * error describes a small difference from the previous sample only + */ + if (output_size <= 1) return; + for (i = 0; i < output_size - 1; i++) { + int32_t prev_value; + int32_t error_value; + + prev_value = buffer_out[i]; + error_value = error_buffer[i+1]; + buffer_out[i+1] = SIGN_EXTENDED32((prev_value + error_value), readsamplesize); + } + return; + } + + /* read warm-up samples */ + if (predictor_coef_num > 0) { + int i; + for (i = 0; i < predictor_coef_num; i++) { + int32_t val; + + val = buffer_out[i] + error_buffer[i+1]; + + val = SIGN_EXTENDED32(val, readsamplesize); + + buffer_out[i+1] = val; + } + } + +#if 0 + /* 4 and 8 are very common cases (the only ones i've seen). these + * should be unrolled and optimised + */ + if (predictor_coef_num == 4) { + /* FIXME: optimised general case */ + return; + } + + if (predictor_coef_table == 8) { + /* FIXME: optimised general case */ + return; + } +#endif + + + /* general case */ + if (predictor_coef_num > 0) { + for (i = predictor_coef_num + 1; + i < output_size; + i++) { + int j; + int sum = 0; + int outval; + int error_val = error_buffer[i]; + + for (j = 0; j < predictor_coef_num; j++) { + sum += (buffer_out[predictor_coef_num-j] - buffer_out[0]) * + predictor_coef_table[j]; + } + + outval = (1 << (predictor_quantitization-1)) + sum; + outval = outval >> predictor_quantitization; + outval = outval + buffer_out[0] + error_val; + outval = SIGN_EXTENDED32(outval, readsamplesize); + + buffer_out[predictor_coef_num+1] = outval; + + if (error_val > 0) { + int predictor_num = predictor_coef_num - 1; + + while (predictor_num >= 0 && error_val > 0) { + int val = buffer_out[0] - buffer_out[predictor_coef_num - predictor_num]; + int sign = SIGN_ONLY(val); + + predictor_coef_table[predictor_num] -= sign; + + val *= sign; /* absolute value */ + + error_val -= ((val >> predictor_quantitization) * + (predictor_coef_num - predictor_num)); + + predictor_num--; + } + } else if (error_val < 0) { + int predictor_num = predictor_coef_num - 1; + + while (predictor_num >= 0 && error_val < 0) { + int val = buffer_out[0] - buffer_out[predictor_coef_num - predictor_num]; + int sign = - SIGN_ONLY(val); + + predictor_coef_table[predictor_num] -= sign; + + val *= sign; /* neg value */ + + error_val -= ((val >> predictor_quantitization) * + (predictor_coef_num - predictor_num)); + + predictor_num--; + } + } + + buffer_out++; + } + } +} + +void deinterlace_16(int32_t *buffer_a, int32_t *buffer_b, + int16_t *buffer_out, + int numchannels, int numsamples, + uint8_t interlacing_shift, + uint8_t interlacing_leftweight) +{ + int i; + if (numsamples <= 0) return; + + /* weighted interlacing */ + if (interlacing_leftweight) { + for (i = 0; i < numsamples; i++) { + int32_t difference, midright; + int16_t left; + int16_t right; + + midright = buffer_a[i]; + difference = buffer_b[i]; + + + right = midright - ((difference * interlacing_leftweight) >> interlacing_shift); + left = (midright - ((difference * interlacing_leftweight) >> interlacing_shift)) + + difference; + + buffer_out[i*numchannels] = left; + buffer_out[i*numchannels + 1] = right; + } + + return; + } + + /* otherwise basic interlacing took place */ + for (i = 0; i < numsamples; i++) { + int16_t left, right; + + left = buffer_a[i]; + right = buffer_b[i]; + + buffer_out[i*numchannels] = left; + buffer_out[i*numchannels + 1] = right; + } +} + +static int alac_decode_frame(AVCodecContext *avctx, + void *outbuffer, int *outputsize, + uint8_t *inbuffer, int input_buffer_size) +{ + ALACContext *alac = avctx->priv_data; + + int channels; + int32_t outputsamples; + + /* short-circuit null buffers */ + if (!inbuffer || !input_buffer_size) + return input_buffer_size; + + /* initialize from the extradata */ + if (!alac->context_initialized) { + if (alac->avctx->extradata_size != ALAC_EXTRADATA_SIZE) { + av_log(NULL, AV_LOG_ERROR, "alac: expected %d extradata bytes\n", + ALAC_EXTRADATA_SIZE); + return input_buffer_size; + } + alac_set_info(alac); + alac->context_initialized = 1; + } + + outputsamples = alac->setinfo_max_samples_per_frame; + + init_get_bits(&alac->gb, inbuffer, input_buffer_size * 8); + + channels = get_bits(&alac->gb, 3); + + *outputsize = outputsamples * alac->bytespersample; + + switch(channels) { + case 0: { /* 1 channel */ + int hassize; + int isnotcompressed; + int readsamplesize; + + int wasted_bytes; + int ricemodifier; + + + /* 2^result = something to do with output waiting. + * perhaps matters if we read > 1 frame in a pass? + */ + get_bits(&alac->gb, 4); + + get_bits(&alac->gb, 12); /* unknown, skip 12 bits */ + + hassize = get_bits(&alac->gb, 1); /* the output sample size is stored soon */ + + wasted_bytes = get_bits(&alac->gb, 2); /* unknown ? */ + + isnotcompressed = get_bits(&alac->gb, 1); /* whether the frame is compressed */ + + if (hassize) { + /* now read the number of samples, + * as a 32bit integer */ + outputsamples = get_bits(&alac->gb, 32); + *outputsize = outputsamples * alac->bytespersample; + } + + readsamplesize = alac->setinfo_sample_size - (wasted_bytes * 8); + + if (!isnotcompressed) { + /* so it is compressed */ + int16_t predictor_coef_table[32]; + int predictor_coef_num; + int prediction_type; + int prediction_quantitization; + int i; + + /* skip 16 bits, not sure what they are. seem to be used in + * two channel case */ + get_bits(&alac->gb, 8); + get_bits(&alac->gb, 8); + + prediction_type = get_bits(&alac->gb, 4); + prediction_quantitization = get_bits(&alac->gb, 4); + + ricemodifier = get_bits(&alac->gb, 3); + predictor_coef_num = get_bits(&alac->gb, 5); + + /* read the predictor table */ + for (i = 0; i < predictor_coef_num; i++) { + predictor_coef_table[i] = (int16_t)get_bits(&alac->gb, 16); + } + + if (wasted_bytes) { + /* these bytes seem to have something to do with + * > 2 channel files. + */ + av_log(NULL, AV_LOG_ERROR, "FIXME: unimplemented, unhandling of wasted_bytes\n"); + } + + bastardized_rice_decompress(alac, + alac->predicterror_buffer_a, + outputsamples, + readsamplesize, + alac->setinfo_rice_initialhistory, + alac->setinfo_rice_kmodifier, + ricemodifier * alac->setinfo_rice_historymult / 4, + (1 << alac->setinfo_rice_kmodifier) - 1); + + if (prediction_type == 0) { + /* adaptive fir */ + predictor_decompress_fir_adapt(alac->predicterror_buffer_a, + alac->outputsamples_buffer_a, + outputsamples, + readsamplesize, + predictor_coef_table, + predictor_coef_num, + prediction_quantitization); + } else { + av_log(NULL, AV_LOG_ERROR, "FIXME: unhandled prediction type: %i\n", prediction_type); + /* i think the only other prediction type (or perhaps this is just a + * boolean?) runs adaptive fir twice.. like: + * predictor_decompress_fir_adapt(predictor_error, tempout, ...) + * predictor_decompress_fir_adapt(predictor_error, outputsamples ...) + * little strange.. + */ + } + + } else { + /* not compressed, easy case */ + if (readsamplesize <= 16) { + int i; + for (i = 0; i < outputsamples; i++) { + int32_t audiobits = get_bits(&alac->gb, readsamplesize); + + audiobits = SIGN_EXTENDED32(audiobits, readsamplesize); + + alac->outputsamples_buffer_a[i] = audiobits; + } + } else { + int i; + for (i = 0; i < outputsamples; i++) { + int32_t audiobits; + + audiobits = get_bits(&alac->gb, 16); + /* special case of sign extension.. + * as we'll be ORing the low 16bits into this */ + audiobits = audiobits << 16; + audiobits = audiobits >> (32 - readsamplesize); + + audiobits |= get_bits(&alac->gb, readsamplesize - 16); + + alac->outputsamples_buffer_a[i] = audiobits; + } + } + /* wasted_bytes = 0; // unused */ + } + + switch(alac->setinfo_sample_size) { + case 16: { + int i; + for (i = 0; i < outputsamples; i++) { + int16_t sample = alac->outputsamples_buffer_a[i]; + be2me_16(sample); + ((int16_t*)outbuffer)[i * alac->numchannels] = sample; + } + break; + } + case 20: + case 24: + case 32: + av_log(NULL, AV_LOG_ERROR, "FIXME: unimplemented sample size %i\n", alac->setinfo_sample_size); + break; + default: + break; + } + break; + } + case 1: { /* 2 channels */ + int hassize; + int isnotcompressed; + int readsamplesize; + + int wasted_bytes; + + uint8_t interlacing_shift; + uint8_t interlacing_leftweight; + + /* 2^result = something to do with output waiting. + * perhaps matters if we read > 1 frame in a pass? + */ + get_bits(&alac->gb, 4); + + get_bits(&alac->gb, 12); /* unknown, skip 12 bits */ + + hassize = get_bits(&alac->gb, 1); /* the output sample size is stored soon */ + + wasted_bytes = get_bits(&alac->gb, 2); /* unknown ? */ + + isnotcompressed = get_bits(&alac->gb, 1); /* whether the frame is compressed */ + + if (hassize) { + /* now read the number of samples, + * as a 32bit integer */ + outputsamples = get_bits(&alac->gb, 32); + *outputsize = outputsamples * alac->bytespersample; + } + + readsamplesize = alac->setinfo_sample_size - (wasted_bytes * 8) + 1; + + if (!isnotcompressed) { + /* compressed */ + int16_t predictor_coef_table_a[32]; + int predictor_coef_num_a; + int prediction_type_a; + int prediction_quantitization_a; + int ricemodifier_a; + + int16_t predictor_coef_table_b[32]; + int predictor_coef_num_b; + int prediction_type_b; + int prediction_quantitization_b; + int ricemodifier_b; + + int i; + + interlacing_shift = get_bits(&alac->gb, 8); + interlacing_leftweight = get_bits(&alac->gb, 8); + + /******** channel 1 ***********/ + prediction_type_a = get_bits(&alac->gb, 4); + prediction_quantitization_a = get_bits(&alac->gb, 4); + + ricemodifier_a = get_bits(&alac->gb, 3); + predictor_coef_num_a = get_bits(&alac->gb, 5); + + /* read the predictor table */ + for (i = 0; i < predictor_coef_num_a; i++) { + predictor_coef_table_a[i] = (int16_t)get_bits(&alac->gb, 16); + } + + /******** channel 2 *********/ + prediction_type_b = get_bits(&alac->gb, 4); + prediction_quantitization_b = get_bits(&alac->gb, 4); + + ricemodifier_b = get_bits(&alac->gb, 3); + predictor_coef_num_b = get_bits(&alac->gb, 5); + + /* read the predictor table */ + for (i = 0; i < predictor_coef_num_b; i++) { + predictor_coef_table_b[i] = (int16_t)get_bits(&alac->gb, 16); + } + + /*********************/ + if (wasted_bytes) { + /* see mono case */ + av_log(NULL, AV_LOG_ERROR, "FIXME: unimplemented, unhandling of wasted_bytes\n"); + } + + /* channel 1 */ + bastardized_rice_decompress(alac, + alac->predicterror_buffer_a, + outputsamples, + readsamplesize, + alac->setinfo_rice_initialhistory, + alac->setinfo_rice_kmodifier, + ricemodifier_a * alac->setinfo_rice_historymult / 4, + (1 << alac->setinfo_rice_kmodifier) - 1); + + if (prediction_type_a == 0) { + /* adaptive fir */ + predictor_decompress_fir_adapt(alac->predicterror_buffer_a, + alac->outputsamples_buffer_a, + outputsamples, + readsamplesize, + predictor_coef_table_a, + predictor_coef_num_a, + prediction_quantitization_a); + } else { + /* see mono case */ + av_log(NULL, AV_LOG_ERROR, "FIXME: unhandled prediction type: %i\n", prediction_type_a); + } + + /* channel 2 */ + bastardized_rice_decompress(alac, + alac->predicterror_buffer_b, + outputsamples, + readsamplesize, + alac->setinfo_rice_initialhistory, + alac->setinfo_rice_kmodifier, + ricemodifier_b * alac->setinfo_rice_historymult / 4, + (1 << alac->setinfo_rice_kmodifier) - 1); + + if (prediction_type_b == 0) { + /* adaptive fir */ + predictor_decompress_fir_adapt(alac->predicterror_buffer_b, + alac->outputsamples_buffer_b, + outputsamples, + readsamplesize, + predictor_coef_table_b, + predictor_coef_num_b, + prediction_quantitization_b); + } else { + av_log(NULL, AV_LOG_ERROR, "FIXME: unhandled prediction type: %i\n", prediction_type_b); + } + } else { + /* not compressed, easy case */ + if (alac->setinfo_sample_size <= 16) { + int i; + for (i = 0; i < outputsamples; i++) { + int32_t audiobits_a, audiobits_b; + + audiobits_a = get_bits(&alac->gb, alac->setinfo_sample_size); + audiobits_b = get_bits(&alac->gb, alac->setinfo_sample_size); + + audiobits_a = SIGN_EXTENDED32(audiobits_a, alac->setinfo_sample_size); + audiobits_b = SIGN_EXTENDED32(audiobits_b, alac->setinfo_sample_size); + + alac->outputsamples_buffer_a[i] = audiobits_a; + alac->outputsamples_buffer_b[i] = audiobits_b; + } + } else { + int i; + for (i = 0; i < outputsamples; i++) { + int32_t audiobits_a, audiobits_b; + + audiobits_a = get_bits(&alac->gb, 16); + audiobits_a = audiobits_a << 16; + audiobits_a = audiobits_a >> (32 - alac->setinfo_sample_size); + audiobits_a |= get_bits(&alac->gb, alac->setinfo_sample_size - 16); + + audiobits_b = get_bits(&alac->gb, 16); + audiobits_b = audiobits_b << 16; + audiobits_b = audiobits_b >> (32 - alac->setinfo_sample_size); + audiobits_b |= get_bits(&alac->gb, alac->setinfo_sample_size - 16); + + alac->outputsamples_buffer_a[i] = audiobits_a; + alac->outputsamples_buffer_b[i] = audiobits_b; + } + } + /* wasted_bytes = 0; */ + interlacing_shift = 0; + interlacing_leftweight = 0; + } + + switch(alac->setinfo_sample_size) { + case 16: { + deinterlace_16(alac->outputsamples_buffer_a, + alac->outputsamples_buffer_b, + (int16_t*)outbuffer, + alac->numchannels, + outputsamples, + interlacing_shift, + interlacing_leftweight); + break; + } + case 20: + case 24: + case 32: + av_log(NULL, AV_LOG_ERROR, "FIXME: unimplemented sample size %i\n", alac->setinfo_sample_size); + break; + default: + break; + } + + break; + } + } + + return input_buffer_size; +} + +static int alac_decode_init(AVCodecContext * avctx) +{ + ALACContext *alac = avctx->priv_data; + alac->avctx = avctx; + alac->context_initialized = 0; + + alac->samplesize = alac->avctx->bits_per_sample; + alac->numchannels = alac->avctx->channels; + alac->bytespersample = (alac->samplesize / 8) * alac->numchannels; + + return 0; +} + +static int alac_decode_close(AVCodecContext *avctx) +{ + ALACContext *alac = avctx->priv_data; + + av_free(alac->predicterror_buffer_a); + av_free(alac->predicterror_buffer_b); + + av_free(alac->outputsamples_buffer_a); + av_free(alac->outputsamples_buffer_b); + + return 0; +} + +AVCodec alac_decoder = { + "alac", + CODEC_TYPE_AUDIO, + CODEC_ID_ALAC, + sizeof(ALACContext), + alac_decode_init, + NULL, + alac_decode_close, + alac_decode_frame, +}; diff --git a/src/libffmpeg/libavcodec/alpha/dsputil_alpha_asm.S b/src/libffmpeg/libavcodec/alpha/dsputil_alpha_asm.S index 7ec6757d7..6519a9590 100644 --- a/src/libffmpeg/libavcodec/alpha/dsputil_alpha_asm.S +++ b/src/libffmpeg/libavcodec/alpha/dsputil_alpha_asm.S @@ -23,9 +23,6 @@ */ #include "regdef.h" -#ifdef HAVE_AV_CONFIG_H -#include "config.h" -#endif /* Some nicer register names. */ #define ta t10 diff --git a/src/libffmpeg/libavcodec/alpha/motion_est_mvi_asm.S b/src/libffmpeg/libavcodec/alpha/motion_est_mvi_asm.S index 0042e7e82..9e6b75f53 100644 --- a/src/libffmpeg/libavcodec/alpha/motion_est_mvi_asm.S +++ b/src/libffmpeg/libavcodec/alpha/motion_est_mvi_asm.S @@ -18,9 +18,6 @@ */ #include "regdef.h" -#ifdef HAVE_AV_CONFIG_H -#include "config.h" -#endif /* Some nicer register names. */ #define ta t10 diff --git a/src/libffmpeg/libavcodec/asv1.c b/src/libffmpeg/libavcodec/asv1.c index 2ab729c17..4ab2518ab 100644 --- a/src/libffmpeg/libavcodec/asv1.c +++ b/src/libffmpeg/libavcodec/asv1.c @@ -63,7 +63,7 @@ static const uint8_t scantab[64]={ }; -static const uint8_t reverse[256]={ +const uint8_t ff_reverse[256]={ 0x00,0x80,0x40,0xC0,0x20,0xA0,0x60,0xE0,0x10,0x90,0x50,0xD0,0x30,0xB0,0x70,0xF0, 0x08,0x88,0x48,0xC8,0x28,0xA8,0x68,0xE8,0x18,0x98,0x58,0xD8,0x38,0xB8,0x78,0xF8, 0x04,0x84,0x44,0xC4,0x24,0xA4,0x64,0xE4,0x14,0x94,0x54,0xD4,0x34,0xB4,0x74,0xF4, @@ -137,29 +137,29 @@ static void init_vlcs(ASV1Context *a){ init_vlc(&ccp_vlc, VLC_BITS, 17, &ccp_tab[0][1], 2, 1, - &ccp_tab[0][0], 2, 1); + &ccp_tab[0][0], 2, 1, 1); init_vlc(&dc_ccp_vlc, VLC_BITS, 8, &dc_ccp_tab[0][1], 2, 1, - &dc_ccp_tab[0][0], 2, 1); + &dc_ccp_tab[0][0], 2, 1, 1); init_vlc(&ac_ccp_vlc, VLC_BITS, 16, &ac_ccp_tab[0][1], 2, 1, - &ac_ccp_tab[0][0], 2, 1); + &ac_ccp_tab[0][0], 2, 1, 1); init_vlc(&level_vlc, VLC_BITS, 7, &level_tab[0][1], 2, 1, - &level_tab[0][0], 2, 1); + &level_tab[0][0], 2, 1, 1); init_vlc(&asv2_level_vlc, ASV2_LEVEL_VLC_BITS, 63, &asv2_level_tab[0][1], 2, 1, - &asv2_level_tab[0][0], 2, 1); + &asv2_level_tab[0][0], 2, 1, 1); } } //FIXME write a reversed bitstream reader to avoid the double reverse static inline int asv2_get_bits(GetBitContext *gb, int n){ - return reverse[ get_bits(gb, n) << (8-n) ]; + return ff_reverse[ get_bits(gb, n) << (8-n) ]; } static inline void asv2_put_bits(PutBitContext *pb, int n, int v){ - put_bits(pb, n, reverse[ v << (8-n) ]); + put_bits(pb, n, ff_reverse[ v << (8-n) ]); } static inline int asv1_get_level(GetBitContext *gb){ @@ -339,8 +339,13 @@ static inline int decode_mb(ASV1Context *a, DCTELEM block[6][64]){ return 0; } -static inline void encode_mb(ASV1Context *a, DCTELEM block[6][64]){ +static inline int encode_mb(ASV1Context *a, DCTELEM block[6][64]){ int i; + + if(a->pb.buf_end - a->pb.buf - (put_bits_count(&a->pb)>>3) < 30*16*16*3/2/8){ + av_log(a->avctx, AV_LOG_ERROR, "encoded frame too large\n"); + return -1; + } if(a->avctx->codec_id == CODEC_ID_ASV1){ for(i=0; i<6; i++) @@ -349,6 +354,7 @@ static inline void encode_mb(ASV1Context *a, DCTELEM block[6][64]){ for(i=0; i<6; i++) asv2_encode_block(a, block[i]); } + return 0; } static inline void idct_put(ASV1Context *a, int mb_x, int mb_y){ @@ -403,11 +409,6 @@ static int decode_frame(AVCodecContext *avctx, AVFrame * const p= (AVFrame*)&a->picture; int mb_x, mb_y; - /* special case for last picture */ - if (buf_size == 0) { - return 0; - } - if(p->data[0]) avctx->release_buffer(avctx, p); @@ -426,7 +427,7 @@ static int decode_frame(AVCodecContext *avctx, else{ int i; for(i=0; i<buf_size; i++) - a->bitstream_buffer[i]= reverse[ buf[i] ]; + a->bitstream_buffer[i]= ff_reverse[ buf[i] ]; } init_get_bits(&a->gb, a->bitstream_buffer, buf_size*8); @@ -527,7 +528,7 @@ static int encode_frame(AVCodecContext *avctx, unsigned char *buf, int buf_size, else{ int i; for(i=0; i<4*size; i++) - buf[i]= reverse[ buf[i] ]; + buf[i]= ff_reverse[ buf[i] ]; } return size*4; @@ -573,7 +574,7 @@ static int decode_init(AVCodecContext *avctx){ } p->qstride= a->mb_width; - p->qscale_table= av_mallocz( p->qstride * a->mb_height); + p->qscale_table= av_malloc( p->qstride * a->mb_height); p->quality= (32*scale + a->inv_qscale/2)/a->inv_qscale; memset(p->qscale_table, p->quality, p->qstride*a->mb_height); diff --git a/src/libffmpeg/libavcodec/avcodec.h b/src/libffmpeg/libavcodec/avcodec.h index 26fee27ef..064f58df2 100644 --- a/src/libffmpeg/libavcodec/avcodec.h +++ b/src/libffmpeg/libavcodec/avcodec.h @@ -15,16 +15,9 @@ extern "C" { #include "rational.h" #include <sys/types.h> /* size_t */ -/* FIXME: We cannot use ffmpeg's XvMC capabilities, since that would require - * linking the ffmpeg plugin against XvMC libraries, which is a bad thing, - * since they are output dependend. - * The correct fix would be to reimplement the XvMC functions libavcodec uses - * and do the necessary talking with our XvMC output plugin there. */ -#undef HAVE_XVMC - -#define FFMPEG_VERSION_INT 0x000408 -#define FFMPEG_VERSION "0.4.8" -#define LIBAVCODEC_BUILD 4715 +#define FFMPEG_VERSION_INT 0x000409 +#define FFMPEG_VERSION "0.4.9-pre1" +#define LIBAVCODEC_BUILD 4752 #define LIBAVCODEC_VERSION_INT FFMPEG_VERSION_INT #define LIBAVCODEC_VERSION FFMPEG_VERSION @@ -36,18 +29,22 @@ extern "C" { #define AV_NOPTS_VALUE int64_t_C(0x8000000000000000) #define AV_TIME_BASE 1000000 +/* FIXME: We cannot use ffmpeg's XvMC capabilities, since that would require + * linking the ffmpeg plugin against XvMC libraries, which is a bad thing, + * since they are output dependend. + * The correct fix would be to reimplement the XvMC functions libavcodec uses + * and do the necessary talking with our XvMC output plugin there. */ +#undef HAVE_XVMC + enum CodecID { CODEC_ID_NONE, CODEC_ID_MPEG1VIDEO, CODEC_ID_MPEG2VIDEO, /* prefered ID for MPEG Video 1 or 2 decoding */ CODEC_ID_MPEG2VIDEO_XVMC, + CODEC_ID_H261, CODEC_ID_H263, CODEC_ID_RV10, CODEC_ID_RV20, - CODEC_ID_MP2, - CODEC_ID_MP3, /* prefered ID for MPEG Audio layer 1, 2 or3 decoding */ - CODEC_ID_VORBIS, - CODEC_ID_AC3, CODEC_ID_MJPEG, CODEC_ID_MJPEGB, CODEC_ID_LJPEG, @@ -65,19 +62,12 @@ enum CodecID { CODEC_ID_SVQ1, CODEC_ID_SVQ3, CODEC_ID_DVVIDEO, - CODEC_ID_DVAUDIO, - CODEC_ID_WMAV1, - CODEC_ID_WMAV2, - CODEC_ID_MACE3, - CODEC_ID_MACE6, CODEC_ID_HUFFYUV, CODEC_ID_CYUV, CODEC_ID_H264, CODEC_ID_INDEO3, CODEC_ID_VP3, CODEC_ID_THEORA, - CODEC_ID_AAC, - CODEC_ID_MPEG4AAC, CODEC_ID_ASV1, CODEC_ID_ASV2, CODEC_ID_FFV1, @@ -100,13 +90,33 @@ enum CodecID { CODEC_ID_FLIC, CODEC_ID_TRUEMOTION1, CODEC_ID_VMDVIDEO, - CODEC_ID_VMDAUDIO, CODEC_ID_MSZH, CODEC_ID_ZLIB, CODEC_ID_QTRLE, + CODEC_ID_SNOW, + CODEC_ID_TSCC, + CODEC_ID_ULTI, + CODEC_ID_QDRAW, + CODEC_ID_VIXL, + CODEC_ID_QPEG, + CODEC_ID_XVID, + CODEC_ID_PNG, + CODEC_ID_PPM, + CODEC_ID_PBM, + CODEC_ID_PGM, + CODEC_ID_PGMYUV, + CODEC_ID_PAM, + CODEC_ID_FFVHUFF, + CODEC_ID_RV30, + CODEC_ID_RV40, + CODEC_ID_VC9, + CODEC_ID_WMV3, + CODEC_ID_LOCO, + CODEC_ID_WNV1, + CODEC_ID_AASC, /* various pcm "codecs" */ - CODEC_ID_PCM_S16LE, + CODEC_ID_PCM_S16LE= 0x10000, CODEC_ID_PCM_S16BE, CODEC_ID_PCM_U16LE, CODEC_ID_PCM_U16BE, @@ -116,7 +126,7 @@ enum CodecID { CODEC_ID_PCM_ALAW, /* various adpcm codecs */ - CODEC_ID_ADPCM_IMA_QT, + CODEC_ID_ADPCM_IMA_QT= 0x11000, CODEC_ID_ADPCM_IMA_WAV, CODEC_ID_ADPCM_IMA_DK3, CODEC_ID_ADPCM_IMA_DK4, @@ -128,23 +138,48 @@ enum CodecID { CODEC_ID_ADPCM_ADX, CODEC_ID_ADPCM_EA, CODEC_ID_ADPCM_G726, + CODEC_ID_ADPCM_CT, + CODEC_ID_ADPCM_SWF, - /* AMR */ - CODEC_ID_AMR_NB, + /* AMR */ + CODEC_ID_AMR_NB= 0x12000, CODEC_ID_AMR_WB, /* RealAudio codecs*/ - CODEC_ID_RA_144, + CODEC_ID_RA_144= 0x13000, CODEC_ID_RA_288, /* various DPCM codecs */ - CODEC_ID_ROQ_DPCM, + CODEC_ID_ROQ_DPCM= 0x14000, CODEC_ID_INTERPLAY_DPCM, CODEC_ID_XAN_DPCM, + CODEC_ID_SOL_DPCM, + CODEC_ID_MP2= 0x15000, + CODEC_ID_MP3, /* prefered ID for MPEG Audio layer 1, 2 or3 decoding */ + CODEC_ID_AAC, + CODEC_ID_MPEG4AAC, + CODEC_ID_AC3, + CODEC_ID_DTS, + CODEC_ID_VORBIS, + CODEC_ID_DVAUDIO, + CODEC_ID_WMAV1, + CODEC_ID_WMAV2, + CODEC_ID_MACE3, + CODEC_ID_MACE6, + CODEC_ID_VMDAUDIO, + CODEC_ID_SONIC, + CODEC_ID_SONIC_LS, CODEC_ID_FLAC, + CODEC_ID_MP3ADU, + CODEC_ID_MP3ON4, + CODEC_ID_SHORTEN, + CODEC_ID_ALAC, + CODEC_ID_WESTWOOD_SND1, + + CODEC_ID_OGGTHEORA= 0x16000, - CODEC_ID_MPEG2TS, /* _FAKE_ codec to indicate a raw MPEG2 transport + CODEC_ID_MPEG2TS= 0x20000, /* _FAKE_ codec to indicate a raw MPEG2 transport stream (only used by libavformat) */ }; @@ -178,7 +213,7 @@ enum CodecType { */ enum PixelFormat { PIX_FMT_YUV420P, ///< Planar YUV 4:2:0 (1 Cr & Cb sample per 2x2 Y samples) - PIX_FMT_YUV422, + PIX_FMT_YUV422, ///< Packed pixel, Y0 Cb Y1 Cr PIX_FMT_RGB24, ///< Packed pixel, 3 bytes per pixel, RGBRGB... PIX_FMT_BGR24, ///< Packed pixel, 3 bytes per pixel, BGRBGR... PIX_FMT_YUV422P, ///< Planar YUV 4:2:2 (1 Cr & Cb sample per 2x1 Y samples) @@ -197,12 +232,17 @@ enum PixelFormat { PIX_FMT_YUVJ444P, ///< Planar YUV 4:4:4 full scale (jpeg) PIX_FMT_XVMC_MPEG2_MC,///< XVideo Motion Acceleration via common packet passing(xvmc_render.h) PIX_FMT_XVMC_MPEG2_IDCT, + PIX_FMT_UYVY422, ///< Packed pixel, Cb Y0 Cr Y1 + PIX_FMT_UYVY411, ///< Packed pixel, Cb Y0 Y1 Cr Y2 Y3 PIX_FMT_NB, }; /* currently unused, may be used if 24/32 bits samples ever supported */ enum SampleFormat { SAMPLE_FMT_S16 = 0, ///< signed 16 bits + SAMPLE_FMT_S32, ///< signed 32 bits + SAMPLE_FMT_FLT, ///< float + SAMPLE_FMT_DBL, ///< double }; /* in bytes */ @@ -217,6 +257,12 @@ enum SampleFormat { */ #define FF_INPUT_BUFFER_PADDING_SIZE 8 +/** + * minimum encoding buffer size. + * used to avoid some checks during header writing + */ +#define FF_MIN_BUFFER_SIZE 16384 + /* motion estimation type, EPZS by default */ enum Motion_Est_ID { ME_ZERO = 1, @@ -227,6 +273,14 @@ enum Motion_Est_ID { ME_X1 }; +enum AVRounding { + AV_ROUND_ZERO = 0, ///< round toward zero + AV_ROUND_INF = 1, ///< round away from zero + AV_ROUND_DOWN = 2, ///< round toward -infinity + AV_ROUND_UP = 3, ///< round toward +infinity + AV_ROUND_NEAR_INF = 5, ///< round to nearest and halfway cases away from zero +}; + typedef struct RcOverride{ int start_frame; int end_frame; @@ -281,6 +335,10 @@ extern int motion_estimation_method; #define CODEC_FLAG_INTERLACED_ME 0x20000000 ///< interlaced motion estimation #define CODEC_FLAG_SVCD_SCAN_OFFSET 0x40000000 ///< will reserve space for SVCD scan offset user data #define CODEC_FLAG_CLOSED_GOP 0x80000000 +#define CODEC_FLAG2_FAST 0x00000001 ///< allow non spec compliant speedup tricks +#define CODEC_FLAG2_STRICT_GOP 0x00000002 ///< strictly enforce GOP size +#define CODEC_FLAG2_NO_OUTPUT 0x00000004 ///< skip bitstream encoding + /* Unsupported options : * Syntax Arithmetic coding (SAC) * Reference Picture Selection @@ -300,6 +358,11 @@ extern int motion_estimation_method; #define CODEC_CAP_TRUNCATED 0x0008 /* codec can export data for HW decoding (XvMC) */ #define CODEC_CAP_HWACCEL 0x0010 +/** + * codec has a non zero delay and needs to be feeded with NULL at the end to get the delayed data. + * if this is not set, the codec is guranteed to never be feeded with NULL data + */ +#define CODEC_CAP_DELAY 0x0020 //the following defines might change, so dont expect compatibility if u use them #define MB_TYPE_INTRA4x4 0x0001 @@ -448,7 +511,14 @@ typedef struct AVPanScan{ uint8_t *mbskip_table;\ \ /**\ - * Motion vector table\ + * Motion vector table.\ + * @code\ + * example:\ + * int mv_sample_log2= 4 - motion_subsample_log2;\ + * int mb_width= (width+15)>>4;\ + * int mv_stride= (mb_width << mv_sample_log2) + 1;\ + * motion_val[direction][x + y*mv_stride][0->mv_x, 1->mv_y];\ + * @endcode\ * - encoding: set by user\ * - decoding: set by lavc\ */\ @@ -463,7 +533,8 @@ typedef struct AVPanScan{ uint32_t *mb_type;\ \ /**\ - * Macroblock size: (0->16x16, 1->8x8, 2-> 4x4, 3-> 2x2)\ + * log2 of the size of the block which a single vector in motion_val represents: \ + * (4->16x16, 3->8x8, 2-> 4x4, 1-> 2x2)\ * - encoding: unused\ * - decoding: set by lavc\ */\ @@ -645,6 +716,8 @@ typedef struct AVCodecContext { * mjpeg: huffman tables * rv10: additional flags * mpeg4: global headers (they can be in the bitstream or here) + * the allocated memory should be FF_INPUT_BUFFER_PADDING_SIZE bytes larger + * then extradata_size to avoid prolems if its read with the bitstream reader * - encoding: set/allocated/freed by lavc. * - decoding: set/allocated/freed by user. */ @@ -662,9 +735,11 @@ typedef struct AVCodecContext { int frame_rate; /** - * width / height. + * picture width / height. * - encoding: MUST be set by user. - * - decoding: set by user if known, codec should override / dynamically change if needed + * - decoding: set by lavc. + * Note, for compatibility its possible to set this instead of + * coded_width/height before decoding */ int width, height; @@ -679,10 +754,7 @@ typedef struct AVCodecContext { /** * pixel format, see PIX_FMT_xxx. - * - encoding: FIXME: used by ffmpeg to decide whether an pix_fmt - * conversion is in order. This only works for - * codecs with one supported pix_fmt, we should - * do something for a generic case as well. + * - encoding: set by user. * - decoding: set by lavc. */ enum PixelFormat pix_fmt; @@ -714,7 +786,13 @@ typedef struct AVCodecContext { /* audio only */ int sample_rate; ///< samples per sec int channels; - int sample_fmt; ///< sample format, currenly unused + + /** + * audio sample format. + * - encoding: set by user. + * - decoding: set by lavc. + */ + enum SampleFormat sample_fmt; ///< sample format, currenly unused /* the following data should not be initialized */ int frame_size; ///< in samples, initialized when calling 'init' @@ -798,8 +876,10 @@ typedef struct AVCodecContext { /* The RTP callcack: This function is called */ /* every time the encoder as a packet to send */ /* Depends on the encoder if the data starts */ - /* with a Start Code (it should) H.263 does */ - void (*rtp_callback)(struct AVCodecContext *avctx, void *data, int size, int packet_number); + /* with a Start Code (it should) H.263 does. */ + /* mb_nb contains the number of macroblocks */ + /* encoded in the RTP payload */ + void (*rtp_callback)(struct AVCodecContext *avctx, void *data, int size, int mb_nb); /* statistics, used for 2-pass encoding */ int mv_bits; @@ -839,7 +919,7 @@ typedef struct AVCodecContext { /** * workaround bugs in encoders which sometimes cannot be detected automatically. - * - encoding: unused + * - encoding: set by user * - decoding: set by user */ int workaround_bugs; @@ -857,6 +937,7 @@ typedef struct AVCodecContext { #define FF_BUG_EDGE 1024 #define FF_BUG_HPEL_CHROMA 2048 #define FF_BUG_DC_CLIP 4096 +#define FF_BUG_MS 8192 ///< workaround various bugs in microsofts broken decoders //#define FF_BUG_FAKE_SCALABILITY 16 //autodetection should work 100% /** @@ -921,8 +1002,8 @@ typedef struct AVCodecContext { void (*release_buffer)(struct AVCodecContext *c, AVFrame *pic); /** - * is 1 if the decoded stream contains b frames, 0 otherwise. - * - encoding: unused + * if 1 the stream has a 1 frame delay during decoding. + * - encoding: set by lavc * - decoding: set by lavc */ int has_b_frames; @@ -1096,6 +1177,7 @@ typedef struct AVCodecContext { #define FF_IDCT_ALTIVEC 8 #define FF_IDCT_SH4 9 #define FF_IDCT_SIMPLEARM 10 +#define FF_IDCT_H264 11 /** * slice count. @@ -1136,6 +1218,7 @@ typedef struct AVCodecContext { #define FF_MM_MMXEXT 0x0002 /* SSE integer functions or AMD MMX ext */ #define FF_MM_SSE 0x0008 /* SSE functions */ #define FF_MM_SSE2 0x0010 /* PIV SSE2 functions */ +#define FF_MM_3DNOWEXT 0x0020 /* AMD 3DNowExt */ #endif /* HAVE_MMX */ /** @@ -1211,14 +1294,14 @@ typedef struct AVCodecContext { /** * minimum MB quantizer. - * - encoding: set by user. + * - encoding: unused * - decoding: unused */ int mb_qmin; /** * maximum MB quantizer. - * - encoding: set by user. + * - encoding: unused * - decoding: unused */ int mb_qmax; @@ -1257,6 +1340,10 @@ typedef struct AVCodecContext { #define FF_CMP_ZERO 7 #define FF_CMP_VSAD 8 #define FF_CMP_VSSE 9 +#define FF_CMP_NSSE 10 +#define FF_CMP_W53 11 +#define FF_CMP_W97 12 +#define FF_CMP_DCTMAX 13 #define FF_CMP_CHROMA 256 /** @@ -1596,11 +1683,120 @@ typedef struct AVCodecContext { int mb_threshold; /** - * + * precision of the intra dc coefficient - 8. * - encoding: set by user * - decoding: unused */ int intra_dc_precision; + + /** + * noise vs. sse weight for the nsse comparsion function. + * - encoding: set by user + * - decoding: unused + */ + int nsse_weight; + + /** + * number of macroblock rows at the top which are skiped. + * - encoding: unused + * - decoding: set by user + */ + int skip_top; + + /** + * number of macroblock rows at the bottom which are skiped. + * - encoding: unused + * - decoding: set by user + */ + int skip_bottom; + + /** + * profile + * - encoding: set by user + * - decoding: set by lavc + */ + int profile; +#define FF_PROFILE_UNKNOWN -99 + + /** + * level + * - encoding: set by user + * - decoding: set by lavc + */ + int level; +#define FF_LEVEL_UNKNOWN -99 + + /** + * low resolution decoding. 1-> 1/2 size, 2->1/4 size + * - encoding: unused + * - decoding: set by user + */ + int lowres; + + /** + * bitsream width / height. may be different from width/height if lowres + * or other things are used + * - encoding: unused + * - decoding: set by user before init if known, codec should override / dynamically change if needed + */ + int coded_width, coded_height; + + /** + * frame skip threshold + * - encoding: set by user + * - decoding: unused + */ + int frame_skip_threshold; + + /** + * frame skip factor + * - encoding: set by user + * - decoding: unused + */ + int frame_skip_factor; + + /** + * frame skip exponent + * - encoding: set by user + * - decoding: unused + */ + int frame_skip_exp; + + /** + * frame skip comparission function + * - encoding: set by user. + * - decoding: unused + */ + int frame_skip_cmp; + + /** + * border processing masking. raises the quantizer for mbs on the borders + * of the picture. + * - encoding: set by user + * - decoding: unused + */ + float border_masking; + + /** + * minimum MB lagrange multipler. + * - encoding: set by user. + * - decoding: unused + */ + int mb_lmin; + + /** + * maximum MB lagrange multipler. + * - encoding: set by user. + * - decoding: unused + */ + int mb_lmax; + + /** + * + * - encoding: set by user. + * - decoding: unused + */ + int me_penalty_compensation; } AVCodecContext; @@ -1640,21 +1836,12 @@ typedef struct AVOption { } AVOption; /** - * Parse option(s) and sets fields in passed structure - * @param strct structure where the parsed results will be written - * @param list list with AVOptions - * @param opts string with options for parsing - */ -int avoption_parse(void* strct, const AVOption* list, const char* opts); - - -/** * AVCodec. */ typedef struct AVCodec { const char *name; enum CodecType type; - int id; + enum CodecID id; int priv_data_size; int (*init)(AVCodecContext *); int (*encode)(AVCodecContext *, uint8_t *buf, int buf_size, void *data); @@ -1662,7 +1849,7 @@ typedef struct AVCodec { int (*decode)(AVCodecContext *, void *outdata, int *outdata_size, uint8_t *buf, int buf_size); int capabilities; - const AVOption *options; + void *dummy; // FIXME remove next time we break binary compatibility struct AVCodec *next; void (*flush)(AVCodecContext *); const AVRational *supported_framerates; ///array of supported framerates, or NULL if any, array is terminated by {0,0} @@ -1703,9 +1890,12 @@ extern AVCodec ac3_encoder; extern AVCodec mp2_encoder; extern AVCodec mp3lame_encoder; extern AVCodec oggvorbis_encoder; +extern AVCodec oggtheora_encoder; extern AVCodec faac_encoder; +extern AVCodec xvid_encoder; extern AVCodec mpeg1video_encoder; extern AVCodec mpeg2video_encoder; +extern AVCodec h261_encoder; extern AVCodec h263_encoder; extern AVCodec h263p_encoder; extern AVCodec flv_encoder; @@ -1714,6 +1904,12 @@ extern AVCodec rv20_encoder; extern AVCodec dvvideo_encoder; extern AVCodec mjpeg_encoder; extern AVCodec ljpeg_encoder; +extern AVCodec png_encoder; +extern AVCodec ppm_encoder; +extern AVCodec pgm_encoder; +extern AVCodec pgmyuv_encoder; +extern AVCodec pbm_encoder; +extern AVCodec pam_encoder; extern AVCodec mpeg4_encoder; extern AVCodec msmpeg4v1_encoder; extern AVCodec msmpeg4v2_encoder; @@ -1721,22 +1917,30 @@ extern AVCodec msmpeg4v3_encoder; extern AVCodec wmv1_encoder; extern AVCodec wmv2_encoder; extern AVCodec huffyuv_encoder; +extern AVCodec ffvhuff_encoder; extern AVCodec h264_encoder; extern AVCodec asv1_encoder; extern AVCodec asv2_encoder; extern AVCodec vcr1_encoder; extern AVCodec ffv1_encoder; +extern AVCodec snow_encoder; extern AVCodec mdec_encoder; extern AVCodec zlib_encoder; +extern AVCodec sonic_encoder; +extern AVCodec sonic_ls_encoder; extern AVCodec svq1_encoder; +extern AVCodec x264_encoder; extern AVCodec h263_decoder; +extern AVCodec h261_decoder; extern AVCodec mpeg4_decoder; extern AVCodec msmpeg4v1_decoder; extern AVCodec msmpeg4v2_decoder; extern AVCodec msmpeg4v3_decoder; extern AVCodec wmv1_decoder; extern AVCodec wmv2_decoder; +extern AVCodec vc9_decoder; +extern AVCodec wmv3_decoder; extern AVCodec mpeg1video_decoder; extern AVCodec mpeg2video_decoder; extern AVCodec mpegvideo_decoder; @@ -1745,6 +1949,8 @@ extern AVCodec h263i_decoder; extern AVCodec flv_decoder; extern AVCodec rv10_decoder; extern AVCodec rv20_decoder; +extern AVCodec rv30_decoder; +extern AVCodec rv40_decoder; extern AVCodec svq1_decoder; extern AVCodec svq3_decoder; extern AVCodec dvvideo_decoder; @@ -1753,12 +1959,17 @@ extern AVCodec wmav2_decoder; extern AVCodec mjpeg_decoder; extern AVCodec mjpegb_decoder; extern AVCodec sp5x_decoder; +extern AVCodec png_decoder; extern AVCodec mp2_decoder; extern AVCodec mp3_decoder; +extern AVCodec mp3adu_decoder; +extern AVCodec mp3on4_decoder; extern AVCodec mace3_decoder; extern AVCodec mace6_decoder; extern AVCodec huffyuv_decoder; +extern AVCodec ffvhuff_decoder; extern AVCodec oggvorbis_decoder; +extern AVCodec oggtheora_decoder; extern AVCodec cyuv_decoder; extern AVCodec h264_decoder; extern AVCodec indeo3_decoder; @@ -1775,6 +1986,7 @@ extern AVCodec asv2_decoder; extern AVCodec vcr1_decoder; extern AVCodec cljr_decoder; extern AVCodec ffv1_decoder; +extern AVCodec snow_decoder; extern AVCodec fourxm_decoder; extern AVCodec mdec_decoder; extern AVCodec roq_decoder; @@ -1799,8 +2011,21 @@ extern AVCodec ra_288_decoder; extern AVCodec roq_dpcm_decoder; extern AVCodec interplay_dpcm_decoder; extern AVCodec xan_dpcm_decoder; +extern AVCodec sol_dpcm_decoder; +extern AVCodec sonic_decoder; extern AVCodec qtrle_decoder; extern AVCodec flac_decoder; +extern AVCodec tscc_decoder; +extern AVCodec ulti_decoder; +extern AVCodec qdraw_decoder; +extern AVCodec xl_decoder; +extern AVCodec qpeg_decoder; +extern AVCodec shorten_decoder; +extern AVCodec loco_decoder; +extern AVCodec wnv1_decoder; +extern AVCodec aasc_decoder; +extern AVCodec alac_decoder; +extern AVCodec ws_snd1_decoder; /* pcm codecs */ #define PCM_CODEC(id, name) \ @@ -1830,6 +2055,8 @@ PCM_CODEC(CODEC_ID_ADPCM_XA, adpcm_xa); PCM_CODEC(CODEC_ID_ADPCM_ADX, adpcm_adx); PCM_CODEC(CODEC_ID_ADPCM_EA, adpcm_ea); PCM_CODEC(CODEC_ID_ADPCM_G726, adpcm_g726); +PCM_CODEC(CODEC_ID_ADPCM_CT, adpcm_ct); +PCM_CODEC(CODEC_ID_ADPCM_SWF, adpcm_swf); #undef PCM_CODEC @@ -1839,10 +2066,12 @@ extern AVCodec rawvideo_decoder; /* the following codecs use external GPL libs */ extern AVCodec ac3_decoder; +extern AVCodec dts_decoder; /* resample.c */ struct ReSampleContext; +struct AVResampleContext; typedef struct ReSampleContext ReSampleContext; @@ -1851,6 +2080,11 @@ ReSampleContext *audio_resample_init(int output_channels, int input_channels, int audio_resample(ReSampleContext *s, short *output, short *input, int nb_samples); void audio_resample_close(ReSampleContext *s); +struct AVResampleContext *av_resample_init(int out_rate, int in_rate, int filter_length, int log2_phase_count, int linear, double cutoff); +int av_resample(struct AVResampleContext *c, short *dst, short *src, int *consumed, int src_size, int dst_size, int update_ctx); +void av_resample_compensate(struct AVResampleContext *c, int sample_delta, int compensation_distance); +void av_resample_close(struct AVResampleContext *c); + /* YUV420 format is assumed ! */ struct ImgReSampleContext; @@ -1894,7 +2128,9 @@ int avpicture_layout(const AVPicture* src, int pix_fmt, int width, int height, int avpicture_get_size(int pix_fmt, int width, int height); void avcodec_get_chroma_sub_sample(int pix_fmt, int *h_shift, int *v_shift); const char *avcodec_get_pix_fmt_name(int pix_fmt); +void avcodec_set_dimensions(AVCodecContext *s, int width, int height); enum PixelFormat avcodec_get_pix_fmt(const char* name); +unsigned int avcodec_pix_fmt_to_codec_tag(enum PixelFormat p); #define FF_LOSS_RESOLUTION 0x0001 /* loss due to resolution change */ #define FF_LOSS_DEPTH 0x0002 /* loss due to color depth change */ @@ -1948,6 +2184,7 @@ int avcodec_default_get_buffer(AVCodecContext *s, AVFrame *pic); void avcodec_default_release_buffer(AVCodecContext *s, AVFrame *pic); int avcodec_default_reget_buffer(AVCodecContext *s, AVFrame *pic); void avcodec_align_dimensions(AVCodecContext *s, int *width, int *height); +int avcodec_check_dimensions(void *av_log_ctx, unsigned int w, unsigned int h); enum PixelFormat avcodec_default_get_format(struct AVCodecContext *s, const enum PixelFormat * fmt); int avcodec_thread_init(AVCodecContext *s, int thread_count); @@ -1982,6 +2219,8 @@ void avcodec_register_all(void); void avcodec_flush_buffers(AVCodecContext *avctx); +void avcodec_default_free_buffers(AVCodecContext *s); + /* misc usefull functions */ /** @@ -1998,87 +2237,16 @@ char av_get_pict_type_char(int pict_type); int av_reduce(int *dst_nom, int *dst_den, int64_t nom, int64_t den, int64_t max); /** - * rescale a 64bit integer. + * rescale a 64bit integer with rounding to nearest. * a simple a*b/c isnt possible as it can overflow */ int64_t av_rescale(int64_t a, int64_t b, int64_t c); - /** - * Interface for 0.5.0 version - * - * do not even think about it's usage for this moment - */ - -typedef struct { - /// compressed size used from given memory buffer - int size; - /// I/P/B frame type - int frame_type; -} avc_enc_result_t; - -/** - * Commands - * order can't be changed - once it was defined - */ -typedef enum { - // general commands - AVC_OPEN_BY_NAME = 0xACA000, - AVC_OPEN_BY_CODEC_ID, - AVC_OPEN_BY_FOURCC, - AVC_CLOSE, - - AVC_FLUSH, - // pin - struct { uint8_t* src, uint_t src_size } - // pout - struct { AVPicture* img, consumed_bytes, - AVC_DECODE, - // pin - struct { AVPicture* img, uint8_t* dest, uint_t dest_size } - // pout - uint_t used_from_dest_size - AVC_ENCODE, - - // query/get video commands - AVC_GET_VERSION = 0xACB000, - AVC_GET_WIDTH, - AVC_GET_HEIGHT, - AVC_GET_DELAY, - AVC_GET_QUANT_TABLE, - // ... - - // query/get audio commands - AVC_GET_FRAME_SIZE = 0xABC000, - - // maybe define some simple structure which - // might be passed to the user - but they can't - // contain any codec specific parts and these - // calls are usualy necessary only few times - - // set video commands - AVC_SET_WIDTH = 0xACD000, - AVC_SET_HEIGHT, - - // set video encoding commands - AVC_SET_FRAME_RATE = 0xACD800, - AVC_SET_QUALITY, - AVC_SET_HURRY_UP, - - // set audio commands - AVC_SET_SAMPLE_RATE = 0xACE000, - AVC_SET_CHANNELS, - -} avc_cmd_t; - -/** - * \param handle allocated private structure by libavcodec - * for initialization pass NULL - will be returned pout - * user is supposed to know nothing about its structure - * \param cmd type of operation to be performed - * \param pint input parameter - * \param pout output parameter - * - * \returns command status - eventually for query command it might return - * integer resulting value + * rescale a 64bit integer with specified rounding. + * a simple a*b/c isnt possible as it can overflow */ -int avcodec(void* handle, avc_cmd_t cmd, void* pin, void* pout); +int64_t av_rescale_rnd(int64_t a, int64_t b, int64_t c, enum AVRounding); /* frame parsing */ typedef struct AVCodecParserContext { @@ -2107,7 +2275,7 @@ typedef struct AVCodecParserContext { } AVCodecParserContext; typedef struct AVCodecParser { - int codec_ids[3]; /* several codec IDs are permitted */ + int codec_ids[5]; /* several codec IDs are permitted */ int priv_data_size; int (*parser_init)(AVCodecParserContext *s); int (*parser_parse)(AVCodecParserContext *s, @@ -2131,8 +2299,11 @@ void av_parser_close(AVCodecParserContext *s); extern AVCodecParser mpegvideo_parser; extern AVCodecParser mpeg4video_parser; +extern AVCodecParser h261_parser; extern AVCodecParser h263_parser; extern AVCodecParser h264_parser; +extern AVCodecParser mjpeg_parser; +extern AVCodecParser pnm_parser; extern AVCodecParser mpegaudio_parser; extern AVCodecParser ac3_parser; @@ -2148,6 +2319,7 @@ void *av_fast_realloc(void *ptr, unsigned int *size, unsigned int min_size); /* call av_free_static to release all staticaly allocated tables */ void av_free_static(void); void *av_mallocz_static(unsigned int size); +void *av_realloc_static(void *ptr, unsigned int size); /* add by bero : in adx.c */ int is_adx(const unsigned char *buf,size_t bufsize); @@ -2164,7 +2336,12 @@ void img_copy(AVPicture *dst, const AVPicture *src, #define AV_LOG_INFO 1 #define AV_LOG_DEBUG 2 +#ifdef __GNUC__ extern void av_log(void*, int level, const char *fmt, ...) __attribute__ ((__format__ (__printf__, 3, 4))); +#else +extern void av_log(void*, int level, const char *fmt, ...); +#endif + extern void av_vlog(void*, int level, const char *fmt, va_list); extern int av_log_get_level(void); extern void av_log_set_level(int); diff --git a/src/libffmpeg/libavcodec/bitstream.c b/src/libffmpeg/libavcodec/bitstream.c new file mode 100755 index 000000000..2678772c4 --- /dev/null +++ b/src/libffmpeg/libavcodec/bitstream.c @@ -0,0 +1,287 @@ +/* + * Common bit i/o utils + * Copyright (c) 2000, 2001 Fabrice Bellard. + * Copyright (c) 2002-2004 Michael Niedermayer <michaelni@gmx.at> + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + * + * alternative bitstream reader & writer by Michael Niedermayer <michaelni@gmx.at> + */ + +/** + * @file bitstream.c + * bitstream api. + */ + +#include "avcodec.h" +#include "bitstream.h" + +void align_put_bits(PutBitContext *s) +{ +#ifdef ALT_BITSTREAM_WRITER + put_bits(s,( - s->index) & 7,0); +#else + put_bits(s,s->bit_left & 7,0); +#endif +} + +void put_string(PutBitContext * pbc, char *s, int put_zero) +{ + while(*s){ + put_bits(pbc, 8, *s); + s++; + } + if(put_zero) + put_bits(pbc, 8, 0); +} + +/* bit input functions */ + +/** + * reads 0-32 bits. + */ +unsigned int get_bits_long(GetBitContext *s, int n){ + if(n<=17) return get_bits(s, n); + else{ + int ret= get_bits(s, 16) << (n-16); + return ret | get_bits(s, n-16); + } +} + +/** + * shows 0-32 bits. + */ +unsigned int show_bits_long(GetBitContext *s, int n){ + if(n<=17) return show_bits(s, n); + else{ + GetBitContext gb= *s; + int ret= get_bits_long(s, n); + *s= gb; + return ret; + } +} + +void align_get_bits(GetBitContext *s) +{ + int n= (-get_bits_count(s)) & 7; + if(n) skip_bits(s, n); +} + +int check_marker(GetBitContext *s, const char *msg) +{ + int bit= get_bits1(s); + if(!bit) + av_log(NULL, AV_LOG_INFO, "Marker bit missing %s\n", msg); + + return bit; +} + +/* VLC decoding */ + +//#define DEBUG_VLC + +#define GET_DATA(v, table, i, wrap, size) \ +{\ + const uint8_t *ptr = (const uint8_t *)table + i * wrap;\ + switch(size) {\ + case 1:\ + v = *(const uint8_t *)ptr;\ + break;\ + case 2:\ + v = *(const uint16_t *)ptr;\ + break;\ + default:\ + v = *(const uint32_t *)ptr;\ + break;\ + }\ +} + + +static int alloc_table(VLC *vlc, int size, int use_static) +{ + int index; + index = vlc->table_size; + vlc->table_size += size; + if (vlc->table_size > vlc->table_allocated) { + vlc->table_allocated += (1 << vlc->bits); + if(use_static) + vlc->table = av_realloc_static(vlc->table, + sizeof(VLC_TYPE) * 2 * vlc->table_allocated); + else + vlc->table = av_realloc(vlc->table, + sizeof(VLC_TYPE) * 2 * vlc->table_allocated); + if (!vlc->table) + return -1; + } + return index; +} + +static int build_table(VLC *vlc, int table_nb_bits, + int nb_codes, + const void *bits, int bits_wrap, int bits_size, + const void *codes, int codes_wrap, int codes_size, + uint32_t code_prefix, int n_prefix, int use_static) +{ + int i, j, k, n, table_size, table_index, nb, n1, index; + uint32_t code; + VLC_TYPE (*table)[2]; + + table_size = 1 << table_nb_bits; + table_index = alloc_table(vlc, table_size, use_static); +#ifdef DEBUG_VLC + printf("new table index=%d size=%d code_prefix=%x n=%d\n", + table_index, table_size, code_prefix, n_prefix); +#endif + if (table_index < 0) + return -1; + table = &vlc->table[table_index]; + + for(i=0;i<table_size;i++) { + table[i][1] = 0; //bits + table[i][0] = -1; //codes + } + + /* first pass: map codes and compute auxillary table sizes */ + for(i=0;i<nb_codes;i++) { + GET_DATA(n, bits, i, bits_wrap, bits_size); + GET_DATA(code, codes, i, codes_wrap, codes_size); + /* we accept tables with holes */ + if (n <= 0) + continue; +#if defined(DEBUG_VLC) && 0 + printf("i=%d n=%d code=0x%x\n", i, n, code); +#endif + /* if code matches the prefix, it is in the table */ + n -= n_prefix; + if (n > 0 && (code >> n) == code_prefix) { + if (n <= table_nb_bits) { + /* no need to add another table */ + j = (code << (table_nb_bits - n)) & (table_size - 1); + nb = 1 << (table_nb_bits - n); + for(k=0;k<nb;k++) { +#ifdef DEBUG_VLC + av_log(NULL, AV_LOG_DEBUG, "%4x: code=%d n=%d\n", + j, i, n); +#endif + if (table[j][1] /*bits*/ != 0) { + av_log(NULL, AV_LOG_ERROR, "incorrect codes\n"); + return -1; + } + table[j][1] = n; //bits + table[j][0] = i; //code + j++; + } + } else { + n -= table_nb_bits; + j = (code >> n) & ((1 << table_nb_bits) - 1); +#ifdef DEBUG_VLC + printf("%4x: n=%d (subtable)\n", + j, n); +#endif + /* compute table size */ + n1 = -table[j][1]; //bits + if (n > n1) + n1 = n; + table[j][1] = -n1; //bits + } + } + } + + /* second pass : fill auxillary tables recursively */ + for(i=0;i<table_size;i++) { + n = table[i][1]; //bits + if (n < 0) { + n = -n; + if (n > table_nb_bits) { + n = table_nb_bits; + table[i][1] = -n; //bits + } + index = build_table(vlc, n, nb_codes, + bits, bits_wrap, bits_size, + codes, codes_wrap, codes_size, + (code_prefix << table_nb_bits) | i, + n_prefix + table_nb_bits, use_static); + if (index < 0) + return -1; + /* note: realloc has been done, so reload tables */ + table = &vlc->table[table_index]; + table[i][0] = index; //code + } + } + return table_index; +} + + +/* Build VLC decoding tables suitable for use with get_vlc(). + + 'nb_bits' set thee decoding table size (2^nb_bits) entries. The + bigger it is, the faster is the decoding. But it should not be too + big to save memory and L1 cache. '9' is a good compromise. + + 'nb_codes' : number of vlcs codes + + 'bits' : table which gives the size (in bits) of each vlc code. + + 'codes' : table which gives the bit pattern of of each vlc code. + + 'xxx_wrap' : give the number of bytes between each entry of the + 'bits' or 'codes' tables. + + 'xxx_size' : gives the number of bytes of each entry of the 'bits' + or 'codes' tables. + + 'wrap' and 'size' allows to use any memory configuration and types + (byte/word/long) to store the 'bits' and 'codes' tables. + + 'use_static' should be set to 1 for tables, which should be freed + with av_free_static(), 0 if free_vlc() will be used. +*/ +int init_vlc(VLC *vlc, int nb_bits, int nb_codes, + const void *bits, int bits_wrap, int bits_size, + const void *codes, int codes_wrap, int codes_size, + int use_static) +{ + vlc->bits = nb_bits; + if(!use_static) { + vlc->table = NULL; + vlc->table_allocated = 0; + vlc->table_size = 0; + } else { + /* Static tables are initially always NULL, return + if vlc->table != NULL to avoid double allocation */ + if(vlc->table) + return 0; + } + +#ifdef DEBUG_VLC + printf("build table nb_codes=%d\n", nb_codes); +#endif + + if (build_table(vlc, nb_bits, nb_codes, + bits, bits_wrap, bits_size, + codes, codes_wrap, codes_size, + 0, 0, use_static) < 0) { + av_free(vlc->table); + return -1; + } + return 0; +} + + +void free_vlc(VLC *vlc) +{ + av_free(vlc->table); +} + diff --git a/src/libffmpeg/libavcodec/bitstream.h b/src/libffmpeg/libavcodec/bitstream.h new file mode 100644 index 000000000..fd69915d8 --- /dev/null +++ b/src/libffmpeg/libavcodec/bitstream.h @@ -0,0 +1,854 @@ +/** + * @file bitstream.h + * bitstream api header. + */ + +#ifndef BITSTREAM_H +#define BITSTREAM_H + +//#define ALT_BITSTREAM_WRITER +//#define ALIGNED_BITSTREAM_WRITER + +#define ALT_BITSTREAM_READER +//#define LIBMPEG2_BITSTREAM_READER +//#define A32_BITSTREAM_READER +#define LIBMPEG2_BITSTREAM_READER_HACK //add BERO + +extern const uint8_t ff_reverse[256]; + +#if defined(ARCH_X86) || defined(ARCH_X86_64) +// avoid +32 for shift optimization (gcc should do that ...) +static inline int32_t NEG_SSR32( int32_t a, int8_t s){ + asm ("sarl %1, %0\n\t" + : "+r" (a) + : "ic" ((uint8_t)(-s)) + ); + return a; +} +static inline uint32_t NEG_USR32(uint32_t a, int8_t s){ + asm ("shrl %1, %0\n\t" + : "+r" (a) + : "ic" ((uint8_t)(-s)) + ); + return a; +} +#else +# define NEG_SSR32(a,s) ((( int32_t)(a))>>(32-(s))) +# define NEG_USR32(a,s) (((uint32_t)(a))>>(32-(s))) +#endif + +/* bit output */ + +/* buf and buf_end must be present and used by every alternative writer. */ +typedef struct PutBitContext { +#ifdef ALT_BITSTREAM_WRITER + uint8_t *buf, *buf_end; + int index; +#else + uint32_t bit_buf; + int bit_left; + uint8_t *buf, *buf_ptr, *buf_end; +#endif +} PutBitContext; + +static inline void init_put_bits(PutBitContext *s, uint8_t *buffer, int buffer_size) +{ + s->buf = buffer; + s->buf_end = s->buf + buffer_size; +#ifdef ALT_BITSTREAM_WRITER + s->index=0; + ((uint32_t*)(s->buf))[0]=0; +// memset(buffer, 0, buffer_size); +#else + s->buf_ptr = s->buf; + s->bit_left=32; + s->bit_buf=0; +#endif +} + +/* return the number of bits output */ +static inline int put_bits_count(PutBitContext *s) +{ +#ifdef ALT_BITSTREAM_WRITER + return s->index; +#else + return (s->buf_ptr - s->buf) * 8 + 32 - s->bit_left; +#endif +} + +/* pad the end of the output stream with zeros */ +static inline void flush_put_bits(PutBitContext *s) +{ +#ifdef ALT_BITSTREAM_WRITER + align_put_bits(s); +#else + s->bit_buf<<= s->bit_left; + while (s->bit_left < 32) { + /* XXX: should test end of buffer */ + *s->buf_ptr++=s->bit_buf >> 24; + s->bit_buf<<=8; + s->bit_left+=8; + } + s->bit_left=32; + s->bit_buf=0; +#endif +} + +void align_put_bits(PutBitContext *s); +void put_string(PutBitContext * pbc, char *s, int put_zero); + +/* bit input */ +/* buffer, buffer_end and size_in_bits must be present and used by every reader */ +typedef struct GetBitContext { + const uint8_t *buffer, *buffer_end; +#ifdef ALT_BITSTREAM_READER + int index; +#elif defined LIBMPEG2_BITSTREAM_READER + uint8_t *buffer_ptr; + uint32_t cache; + int bit_count; +#elif defined A32_BITSTREAM_READER + uint32_t *buffer_ptr; + uint32_t cache0; + uint32_t cache1; + int bit_count; +#endif + int size_in_bits; +} GetBitContext; + +#define VLC_TYPE int16_t + +typedef struct VLC { + int bits; + VLC_TYPE (*table)[2]; ///< code, bits + int table_size, table_allocated; +} VLC; + +typedef struct RL_VLC_ELEM { + int16_t level; + int8_t len; + uint8_t run; +} RL_VLC_ELEM; + +#ifdef ARCH_SPARC +#define UNALIGNED_STORES_ARE_BAD +#endif + +/* used to avoid missaligned exceptions on some archs (alpha, ...) */ +#if defined(ARCH_X86) || defined(ARCH_X86_64) +# define unaligned32(a) (*(const uint32_t*)(a)) +#else +# ifdef __GNUC__ +static inline uint32_t unaligned32(const void *v) { + struct Unaligned { + uint32_t i; + } __attribute__((packed)); + + return ((const struct Unaligned *) v)->i; +} +# elif defined(__DECC) +static inline uint32_t unaligned32(const void *v) { + return *(const __unaligned uint32_t *) v; +} +# else +static inline uint32_t unaligned32(const void *v) { + return *(const uint32_t *) v; +} +# endif +#endif //!ARCH_X86 + +#ifndef ALT_BITSTREAM_WRITER +static inline void put_bits(PutBitContext *s, int n, unsigned int value) +{ + unsigned int bit_buf; + int bit_left; + +#ifdef STATS + st_out_bit_counts[st_current_index] += n; +#endif + // printf("put_bits=%d %x\n", n, value); + assert(n == 32 || value < (1U << n)); + + bit_buf = s->bit_buf; + bit_left = s->bit_left; + + // printf("n=%d value=%x cnt=%d buf=%x\n", n, value, bit_cnt, bit_buf); + /* XXX: optimize */ + if (n < bit_left) { + bit_buf = (bit_buf<<n) | value; + bit_left-=n; + } else { + bit_buf<<=bit_left; + bit_buf |= value >> (n - bit_left); +#ifdef UNALIGNED_STORES_ARE_BAD + if (3 & (intptr_t) s->buf_ptr) { + s->buf_ptr[0] = bit_buf >> 24; + s->buf_ptr[1] = bit_buf >> 16; + s->buf_ptr[2] = bit_buf >> 8; + s->buf_ptr[3] = bit_buf ; + } else +#endif + *(uint32_t *)s->buf_ptr = be2me_32(bit_buf); + //printf("bitbuf = %08x\n", bit_buf); + s->buf_ptr+=4; + bit_left+=32 - n; + bit_buf = value; + } + + s->bit_buf = bit_buf; + s->bit_left = bit_left; +} +#endif + + +#ifdef ALT_BITSTREAM_WRITER +static inline void put_bits(PutBitContext *s, int n, unsigned int value) +{ +# ifdef ALIGNED_BITSTREAM_WRITER +# if defined(ARCH_X86) || defined(ARCH_X86_64) + asm volatile( + "movl %0, %%ecx \n\t" + "xorl %%eax, %%eax \n\t" + "shrdl %%cl, %1, %%eax \n\t" + "shrl %%cl, %1 \n\t" + "movl %0, %%ecx \n\t" + "shrl $3, %%ecx \n\t" + "andl $0xFFFFFFFC, %%ecx \n\t" + "bswapl %1 \n\t" + "orl %1, (%2, %%ecx) \n\t" + "bswapl %%eax \n\t" + "addl %3, %0 \n\t" + "movl %%eax, 4(%2, %%ecx) \n\t" + : "=&r" (s->index), "=&r" (value) + : "r" (s->buf), "r" (n), "0" (s->index), "1" (value<<(-n)) + : "%eax", "%ecx" + ); +# else + int index= s->index; + uint32_t *ptr= ((uint32_t *)s->buf)+(index>>5); + + value<<= 32-n; + + ptr[0] |= be2me_32(value>>(index&31)); + ptr[1] = be2me_32(value<<(32-(index&31))); +//if(n>24) printf("%d %d\n", n, value); + index+= n; + s->index= index; +# endif +# else //ALIGNED_BITSTREAM_WRITER +# if defined(ARCH_X86) || defined(ARCH_X86_64) + asm volatile( + "movl $7, %%ecx \n\t" + "andl %0, %%ecx \n\t" + "addl %3, %%ecx \n\t" + "negl %%ecx \n\t" + "shll %%cl, %1 \n\t" + "bswapl %1 \n\t" + "movl %0, %%ecx \n\t" + "shrl $3, %%ecx \n\t" + "orl %1, (%%ecx, %2) \n\t" + "addl %3, %0 \n\t" + "movl $0, 4(%%ecx, %2) \n\t" + : "=&r" (s->index), "=&r" (value) + : "r" (s->buf), "r" (n), "0" (s->index), "1" (value) + : "%ecx" + ); +# else + int index= s->index; + uint32_t *ptr= (uint32_t*)(((uint8_t *)s->buf)+(index>>3)); + + ptr[0] |= be2me_32(value<<(32-n-(index&7) )); + ptr[1] = 0; +//if(n>24) printf("%d %d\n", n, value); + index+= n; + s->index= index; +# endif +# endif //!ALIGNED_BITSTREAM_WRITER +} +#endif + + +static inline uint8_t* pbBufPtr(PutBitContext *s) +{ +#ifdef ALT_BITSTREAM_WRITER + return s->buf + (s->index>>3); +#else + return s->buf_ptr; +#endif +} + +/** + * + * PutBitContext must be flushed & aligned to a byte boundary before calling this. + */ +static inline void skip_put_bytes(PutBitContext *s, int n){ + assert((put_bits_count(s)&7)==0); +#ifdef ALT_BITSTREAM_WRITER + FIXME may need some cleaning of the buffer + s->index += n<<3; +#else + assert(s->bit_left==32); + s->buf_ptr += n; +#endif +} + +/** + * skips the given number of bits. + * must only be used if the actual values in the bitstream dont matter + */ +static inline void skip_put_bits(PutBitContext *s, int n){ +#ifdef ALT_BITSTREAM_WRITER + s->index += n; +#else + s->bit_left -= n; + s->buf_ptr-= s->bit_left>>5; + s->bit_left &= 31; +#endif +} + +/** + * Changes the end of the buffer. + */ +static inline void set_put_bits_buffer_size(PutBitContext *s, int size){ + s->buf_end= s->buf + size; +} + +/* Bitstream reader API docs: +name + abritary name which is used as prefix for the internal variables + +gb + getbitcontext + +OPEN_READER(name, gb) + loads gb into local variables + +CLOSE_READER(name, gb) + stores local vars in gb + +UPDATE_CACHE(name, gb) + refills the internal cache from the bitstream + after this call at least MIN_CACHE_BITS will be available, + +GET_CACHE(name, gb) + will output the contents of the internal cache, next bit is MSB of 32 or 64 bit (FIXME 64bit) + +SHOW_UBITS(name, gb, num) + will return the nest num bits + +SHOW_SBITS(name, gb, num) + will return the nest num bits and do sign extension + +SKIP_BITS(name, gb, num) + will skip over the next num bits + note, this is equinvalent to SKIP_CACHE; SKIP_COUNTER + +SKIP_CACHE(name, gb, num) + will remove the next num bits from the cache (note SKIP_COUNTER MUST be called before UPDATE_CACHE / CLOSE_READER) + +SKIP_COUNTER(name, gb, num) + will increment the internal bit counter (see SKIP_CACHE & SKIP_BITS) + +LAST_SKIP_CACHE(name, gb, num) + will remove the next num bits from the cache if it is needed for UPDATE_CACHE otherwise it will do nothing + +LAST_SKIP_BITS(name, gb, num) + is equinvalent to SKIP_LAST_CACHE; SKIP_COUNTER + +for examples see get_bits, show_bits, skip_bits, get_vlc +*/ + +static inline int unaligned32_be(const void *v) +{ +#ifdef CONFIG_ALIGN + const uint8_t *p=v; + return (((p[0]<<8) | p[1])<<16) | (p[2]<<8) | (p[3]); +#else + return be2me_32( unaligned32(v)); //original +#endif +} + +#ifdef ALT_BITSTREAM_READER +# define MIN_CACHE_BITS 25 + +# define OPEN_READER(name, gb)\ + int name##_index= (gb)->index;\ + int name##_cache= 0;\ + +# define CLOSE_READER(name, gb)\ + (gb)->index= name##_index;\ + +# define UPDATE_CACHE(name, gb)\ + name##_cache= unaligned32_be( ((const uint8_t *)(gb)->buffer)+(name##_index>>3) ) << (name##_index&0x07);\ + +# define SKIP_CACHE(name, gb, num)\ + name##_cache <<= (num);\ + +// FIXME name? +# define SKIP_COUNTER(name, gb, num)\ + name##_index += (num);\ + +# define SKIP_BITS(name, gb, num)\ + {\ + SKIP_CACHE(name, gb, num)\ + SKIP_COUNTER(name, gb, num)\ + }\ + +# define LAST_SKIP_BITS(name, gb, num) SKIP_COUNTER(name, gb, num) +# define LAST_SKIP_CACHE(name, gb, num) ; + +# define SHOW_UBITS(name, gb, num)\ + NEG_USR32(name##_cache, num) + +# define SHOW_SBITS(name, gb, num)\ + NEG_SSR32(name##_cache, num) + +# define GET_CACHE(name, gb)\ + ((uint32_t)name##_cache) + +static inline int get_bits_count(GetBitContext *s){ + return s->index; +} +#elif defined LIBMPEG2_BITSTREAM_READER +//libmpeg2 like reader + +# define MIN_CACHE_BITS 17 + +# define OPEN_READER(name, gb)\ + int name##_bit_count=(gb)->bit_count;\ + int name##_cache= (gb)->cache;\ + uint8_t * name##_buffer_ptr=(gb)->buffer_ptr;\ + +# define CLOSE_READER(name, gb)\ + (gb)->bit_count= name##_bit_count;\ + (gb)->cache= name##_cache;\ + (gb)->buffer_ptr= name##_buffer_ptr;\ + +#ifdef LIBMPEG2_BITSTREAM_READER_HACK + +# define UPDATE_CACHE(name, gb)\ + if(name##_bit_count >= 0){\ + name##_cache+= (int)be2me_16(*(uint16_t*)name##_buffer_ptr) << name##_bit_count;\ + name##_buffer_ptr += 2;\ + name##_bit_count-= 16;\ + }\ + +#else + +# define UPDATE_CACHE(name, gb)\ + if(name##_bit_count >= 0){\ + name##_cache+= ((name##_buffer_ptr[0]<<8) + name##_buffer_ptr[1]) << name##_bit_count;\ + name##_buffer_ptr+=2;\ + name##_bit_count-= 16;\ + }\ + +#endif + +# define SKIP_CACHE(name, gb, num)\ + name##_cache <<= (num);\ + +# define SKIP_COUNTER(name, gb, num)\ + name##_bit_count += (num);\ + +# define SKIP_BITS(name, gb, num)\ + {\ + SKIP_CACHE(name, gb, num)\ + SKIP_COUNTER(name, gb, num)\ + }\ + +# define LAST_SKIP_BITS(name, gb, num) SKIP_BITS(name, gb, num) +# define LAST_SKIP_CACHE(name, gb, num) SKIP_CACHE(name, gb, num) + +# define SHOW_UBITS(name, gb, num)\ + NEG_USR32(name##_cache, num) + +# define SHOW_SBITS(name, gb, num)\ + NEG_SSR32(name##_cache, num) + +# define GET_CACHE(name, gb)\ + ((uint32_t)name##_cache) + +static inline int get_bits_count(GetBitContext *s){ + return (s->buffer_ptr - s->buffer)*8 - 16 + s->bit_count; +} + +#elif defined A32_BITSTREAM_READER + +# define MIN_CACHE_BITS 32 + +# define OPEN_READER(name, gb)\ + int name##_bit_count=(gb)->bit_count;\ + uint32_t name##_cache0= (gb)->cache0;\ + uint32_t name##_cache1= (gb)->cache1;\ + uint32_t * name##_buffer_ptr=(gb)->buffer_ptr;\ + +# define CLOSE_READER(name, gb)\ + (gb)->bit_count= name##_bit_count;\ + (gb)->cache0= name##_cache0;\ + (gb)->cache1= name##_cache1;\ + (gb)->buffer_ptr= name##_buffer_ptr;\ + +# define UPDATE_CACHE(name, gb)\ + if(name##_bit_count > 0){\ + const uint32_t next= be2me_32( *name##_buffer_ptr );\ + name##_cache0 |= NEG_USR32(next,name##_bit_count);\ + name##_cache1 |= next<<name##_bit_count;\ + name##_buffer_ptr++;\ + name##_bit_count-= 32;\ + }\ + +#if defined(ARCH_X86) || defined(ARCH_X86_64) +# define SKIP_CACHE(name, gb, num)\ + asm(\ + "shldl %2, %1, %0 \n\t"\ + "shll %2, %1 \n\t"\ + : "+r" (name##_cache0), "+r" (name##_cache1)\ + : "Ic" ((uint8_t)num)\ + ); +#else +# define SKIP_CACHE(name, gb, num)\ + name##_cache0 <<= (num);\ + name##_cache0 |= NEG_USR32(name##_cache1,num);\ + name##_cache1 <<= (num); +#endif + +# define SKIP_COUNTER(name, gb, num)\ + name##_bit_count += (num);\ + +# define SKIP_BITS(name, gb, num)\ + {\ + SKIP_CACHE(name, gb, num)\ + SKIP_COUNTER(name, gb, num)\ + }\ + +# define LAST_SKIP_BITS(name, gb, num) SKIP_BITS(name, gb, num) +# define LAST_SKIP_CACHE(name, gb, num) SKIP_CACHE(name, gb, num) + +# define SHOW_UBITS(name, gb, num)\ + NEG_USR32(name##_cache0, num) + +# define SHOW_SBITS(name, gb, num)\ + NEG_SSR32(name##_cache0, num) + +# define GET_CACHE(name, gb)\ + (name##_cache0) + +static inline int get_bits_count(GetBitContext *s){ + return ((uint8_t*)s->buffer_ptr - s->buffer)*8 - 32 + s->bit_count; +} + +#endif + +/** + * read mpeg1 dc style vlc (sign bit + mantisse with no MSB). + * if MSB not set it is negative + * @param n length in bits + * @author BERO + */ +static inline int get_xbits(GetBitContext *s, int n){ + register int tmp; + register int32_t cache; + OPEN_READER(re, s) + UPDATE_CACHE(re, s) + cache = GET_CACHE(re,s); + if ((int32_t)cache<0) { //MSB=1 + tmp = NEG_USR32(cache,n); + } else { + // tmp = (-1<<n) | NEG_USR32(cache,n) + 1; mpeg12.c algo + // tmp = - (NEG_USR32(cache,n) ^ ((1 << n) - 1)); h263.c algo + tmp = - NEG_USR32(~cache,n); + } + LAST_SKIP_BITS(re, s, n) + CLOSE_READER(re, s) + return tmp; +} + +static inline int get_sbits(GetBitContext *s, int n){ + register int tmp; + OPEN_READER(re, s) + UPDATE_CACHE(re, s) + tmp= SHOW_SBITS(re, s, n); + LAST_SKIP_BITS(re, s, n) + CLOSE_READER(re, s) + return tmp; +} + +/** + * reads 0-17 bits. + * Note, the alt bitstream reader can read upto 25 bits, but the libmpeg2 reader cant + */ +static inline unsigned int get_bits(GetBitContext *s, int n){ + register int tmp; + OPEN_READER(re, s) + UPDATE_CACHE(re, s) + tmp= SHOW_UBITS(re, s, n); + LAST_SKIP_BITS(re, s, n) + CLOSE_READER(re, s) + return tmp; +} + +unsigned int get_bits_long(GetBitContext *s, int n); + +/** + * shows 0-17 bits. + * Note, the alt bitstream reader can read upto 25 bits, but the libmpeg2 reader cant + */ +static inline unsigned int show_bits(GetBitContext *s, int n){ + register int tmp; + OPEN_READER(re, s) + UPDATE_CACHE(re, s) + tmp= SHOW_UBITS(re, s, n); +// CLOSE_READER(re, s) + return tmp; +} + +unsigned int show_bits_long(GetBitContext *s, int n); + +static inline void skip_bits(GetBitContext *s, int n){ + //Note gcc seems to optimize this to s->index+=n for the ALT_READER :)) + OPEN_READER(re, s) + UPDATE_CACHE(re, s) + LAST_SKIP_BITS(re, s, n) + CLOSE_READER(re, s) +} + +static inline unsigned int get_bits1(GetBitContext *s){ +#ifdef ALT_BITSTREAM_READER + int index= s->index; + uint8_t result= s->buffer[ index>>3 ]; + result<<= (index&0x07); + result>>= 8 - 1; + index++; + s->index= index; + + return result; +#else + return get_bits(s, 1); +#endif +} + +static inline unsigned int show_bits1(GetBitContext *s){ + return show_bits(s, 1); +} + +static inline void skip_bits1(GetBitContext *s){ + skip_bits(s, 1); +} + +/** + * init GetBitContext. + * @param buffer bitstream buffer, must be FF_INPUT_BUFFER_PADDING_SIZE bytes larger then the actual read bits + * because some optimized bitstream readers read 32 or 64 bit at once and could read over the end + * @param bit_size the size of the buffer in bits + */ +static inline void init_get_bits(GetBitContext *s, + const uint8_t *buffer, int bit_size) +{ + const int buffer_size= (bit_size+7)>>3; + + s->buffer= buffer; + s->size_in_bits= bit_size; + s->buffer_end= buffer + buffer_size; +#ifdef ALT_BITSTREAM_READER + s->index=0; +#elif defined LIBMPEG2_BITSTREAM_READER +#ifdef LIBMPEG2_BITSTREAM_READER_HACK + if ((int)buffer&1) { + /* word alignment */ + s->cache = (*buffer++)<<24; + s->buffer_ptr = buffer; + s->bit_count = 16-8; + } else +#endif + { + s->buffer_ptr = buffer; + s->bit_count = 16; + s->cache = 0; + } +#elif defined A32_BITSTREAM_READER + s->buffer_ptr = (uint32_t*)buffer; + s->bit_count = 32; + s->cache0 = 0; + s->cache1 = 0; +#endif + { + OPEN_READER(re, s) + UPDATE_CACHE(re, s) + UPDATE_CACHE(re, s) + CLOSE_READER(re, s) + } +#ifdef A32_BITSTREAM_READER + s->cache1 = 0; +#endif +} + +int check_marker(GetBitContext *s, const char *msg); +void align_get_bits(GetBitContext *s); +int init_vlc(VLC *vlc, int nb_bits, int nb_codes, + const void *bits, int bits_wrap, int bits_size, + const void *codes, int codes_wrap, int codes_size, + int use_static); +void free_vlc(VLC *vlc); + +/** + * + * if the vlc code is invalid and max_depth=1 than no bits will be removed + * if the vlc code is invalid and max_depth>1 than the number of bits removed + * is undefined + */ +#define GET_VLC(code, name, gb, table, bits, max_depth)\ +{\ + int n, index, nb_bits;\ +\ + index= SHOW_UBITS(name, gb, bits);\ + code = table[index][0];\ + n = table[index][1];\ +\ + if(max_depth > 1 && n < 0){\ + LAST_SKIP_BITS(name, gb, bits)\ + UPDATE_CACHE(name, gb)\ +\ + nb_bits = -n;\ +\ + index= SHOW_UBITS(name, gb, nb_bits) + code;\ + code = table[index][0];\ + n = table[index][1];\ + if(max_depth > 2 && n < 0){\ + LAST_SKIP_BITS(name, gb, nb_bits)\ + UPDATE_CACHE(name, gb)\ +\ + nb_bits = -n;\ +\ + index= SHOW_UBITS(name, gb, nb_bits) + code;\ + code = table[index][0];\ + n = table[index][1];\ + }\ + }\ + SKIP_BITS(name, gb, n)\ +} + +#define GET_RL_VLC(level, run, name, gb, table, bits, max_depth, need_update)\ +{\ + int n, index, nb_bits;\ +\ + index= SHOW_UBITS(name, gb, bits);\ + level = table[index].level;\ + n = table[index].len;\ +\ + if(max_depth > 1 && n < 0){\ + SKIP_BITS(name, gb, bits)\ + if(need_update){\ + UPDATE_CACHE(name, gb)\ + }\ +\ + nb_bits = -n;\ +\ + index= SHOW_UBITS(name, gb, nb_bits) + level;\ + level = table[index].level;\ + n = table[index].len;\ + }\ + run= table[index].run;\ + SKIP_BITS(name, gb, n)\ +} + +// deprecated, dont use get_vlc for new code, use get_vlc2 instead or use GET_VLC directly +static inline int get_vlc(GetBitContext *s, VLC *vlc) +{ + int code; + VLC_TYPE (*table)[2]= vlc->table; + + OPEN_READER(re, s) + UPDATE_CACHE(re, s) + + GET_VLC(code, re, s, table, vlc->bits, 3) + + CLOSE_READER(re, s) + return code; +} + +/** + * parses a vlc code, faster then get_vlc() + * @param bits is the number of bits which will be read at once, must be + * identical to nb_bits in init_vlc() + * @param max_depth is the number of times bits bits must be readed to completly + * read the longest vlc code + * = (max_vlc_length + bits - 1) / bits + */ +static always_inline int get_vlc2(GetBitContext *s, VLC_TYPE (*table)[2], + int bits, int max_depth) +{ + int code; + + OPEN_READER(re, s) + UPDATE_CACHE(re, s) + + GET_VLC(code, re, s, table, bits, max_depth) + + CLOSE_READER(re, s) + return code; +} + +//#define TRACE + +#ifdef TRACE +#include "avcodec.h" +static inline void print_bin(int bits, int n){ + int i; + + for(i=n-1; i>=0; i--){ + av_log(NULL, AV_LOG_DEBUG, "%d", (bits>>i)&1); + } + for(i=n; i<24; i++) + av_log(NULL, AV_LOG_DEBUG, " "); +} + +static inline int get_bits_trace(GetBitContext *s, int n, char *file, const char *func, int line){ + int r= get_bits(s, n); + + print_bin(r, n); + av_log(NULL, AV_LOG_DEBUG, "%5d %2d %3d bit @%5d in %s %s:%d\n", r, n, r, get_bits_count(s)-n, file, func, line); + return r; +} +static inline int get_vlc_trace(GetBitContext *s, VLC_TYPE (*table)[2], int bits, int max_depth, char *file, const char *func, int line){ + int show= show_bits(s, 24); + int pos= get_bits_count(s); + int r= get_vlc2(s, table, bits, max_depth); + int len= get_bits_count(s) - pos; + int bits2= show>>(24-len); + + print_bin(bits2, len); + + av_log(NULL, AV_LOG_DEBUG, "%5d %2d %3d vlc @%5d in %s %s:%d\n", bits2, len, r, pos, file, func, line); + return r; +} +static inline int get_xbits_trace(GetBitContext *s, int n, char *file, const char *func, int line){ + int show= show_bits(s, n); + int r= get_xbits(s, n); + + print_bin(show, n); + av_log(NULL, AV_LOG_DEBUG, "%5d %2d %3d xbt @%5d in %s %s:%d\n", show, n, r, get_bits_count(s)-n, file, func, line); + return r; +} + +#define get_bits(s, n) get_bits_trace(s, n, __FILE__, __PRETTY_FUNCTION__, __LINE__) +#define get_bits1(s) get_bits_trace(s, 1, __FILE__, __PRETTY_FUNCTION__, __LINE__) +#define get_xbits(s, n) get_xbits_trace(s, n, __FILE__, __PRETTY_FUNCTION__, __LINE__) +#define get_vlc(s, vlc) get_vlc_trace(s, (vlc)->table, (vlc)->bits, 3, __FILE__, __PRETTY_FUNCTION__, __LINE__) +#define get_vlc2(s, tab, bits, max) get_vlc_trace(s, tab, bits, max, __FILE__, __PRETTY_FUNCTION__, __LINE__) + +#define tprintf(...) av_log(NULL, AV_LOG_DEBUG, __VA_ARGS__) + +#else //TRACE +#define tprintf(...) {} +#endif + +static inline int decode012(GetBitContext *gb){ + int n; + n = get_bits1(gb); + if (n == 0) + return 0; + else + return get_bits1(gb) + 1; +} + +#endif /* BITSTREAM_H */ diff --git a/src/libffmpeg/libavcodec/bswap.h b/src/libffmpeg/libavcodec/bswap.h index 460f7abd4..eb1d87a55 100644 --- a/src/libffmpeg/libavcodec/bswap.h +++ b/src/libffmpeg/libavcodec/bswap.h @@ -10,17 +10,23 @@ #include <byteswap.h> #else -#ifdef ARCH_X86 -static inline unsigned short ByteSwap16(unsigned short x) +#ifdef ARCH_X86_64 +# define LEGACY_REGS "=Q" +#else +# define LEGACY_REGS "=q" +#endif + +#if defined(ARCH_X86) || defined(ARCH_X86_64) +static inline uint16_t ByteSwap16(uint16_t x) { __asm("xchgb %b0,%h0" : - "=q" (x) : + LEGACY_REGS (x) : "0" (x)); return x; } #define bswap_16(x) ByteSwap16(x) -static inline unsigned int ByteSwap32(unsigned int x) +static inline uint32_t ByteSwap32(uint32_t x) { #if __CPU__ > 386 __asm("bswap %0": @@ -29,21 +35,28 @@ static inline unsigned int ByteSwap32(unsigned int x) __asm("xchgb %b0,%h0\n" " rorl $16,%0\n" " xchgb %b0,%h0": - "=q" (x) : + LEGACY_REGS (x) : #endif "0" (x)); return x; } #define bswap_32(x) ByteSwap32(x) -static inline unsigned long long int ByteSwap64(unsigned long long int x) +static inline uint64_t ByteSwap64(uint64_t x) { +#ifdef ARCH_X86_64 + __asm("bswap %0": + "=r" (x) : + "0" (x)); + return x; +#else register union { __extension__ uint64_t __ll; uint32_t __l[2]; } __x; asm("xchgl %0,%1": "=r"(__x.__l[0]),"=r"(__x.__l[1]): - "0"(bswap_32((unsigned long)x)),"1"(bswap_32((unsigned long)(x>>32)))); + "0"(bswap_32((uint32_t)x)),"1"(bswap_32((uint32_t)(x>>32)))); return __x.__ll; +#endif } #define bswap_64(x) ByteSwap64(x) diff --git a/src/libffmpeg/libavcodec/cabac.c b/src/libffmpeg/libavcodec/cabac.c index 9d56e23fc..9a598fa47 100644 --- a/src/libffmpeg/libavcodec/cabac.c +++ b/src/libffmpeg/libavcodec/cabac.c @@ -26,6 +26,7 @@ #include <string.h> #include "common.h" +#include "bitstream.h" #include "cabac.h" const uint8_t ff_h264_lps_range[64][4]= { @@ -69,6 +70,25 @@ const uint8_t ff_h264_lps_state[64]= { 36,36,37,37,37,38,38,63, }; +const uint8_t ff_h264_norm_shift[256]= { + 8,7,6,6,5,5,5,5,4,4,4,4,4,4,4,4, + 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3, + 2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2, + 2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2, + 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, + 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, + 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, + 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +}; + /** * * @param buf_size size of buf in bits @@ -95,10 +115,14 @@ void ff_init_cabac_decoder(CABACContext *c, const uint8_t *buf, int buf_size){ c->bytestream= buf; c->bytestream_end= buf + buf_size; - c->low= *c->bytestream++; - c->low= (c->low<<9) + ((*c->bytestream++)<<1); - c->range= 0x1FE00; - c->bits_left= 7; +#if CABAC_BITS == 16 + c->low = (*c->bytestream++)<<18; + c->low+= (*c->bytestream++)<<10; +#else + c->low = (*c->bytestream++)<<10; +#endif + c->low+= ((*c->bytestream++)<<2) + 2; + c->range= 0x1FE<<(CABAC_BITS + 1); } void ff_init_cabac_states(CABACContext *c, uint8_t const (*lps_range)[4], @@ -107,8 +131,8 @@ void ff_init_cabac_states(CABACContext *c, uint8_t const (*lps_range)[4], for(i=0; i<state_count; i++){ for(j=0; j<4; j++){ //FIXME check if this is worth the 1 shift we save - c->lps_range[2*i+0][j]= - c->lps_range[2*i+1][j]= lps_range[i][j]; + c->lps_range[2*i+0][j+4]= + c->lps_range[2*i+1][j+4]= lps_range[i][j]; } c->mps_state[2*i+0]= 2*mps_state[i]; @@ -126,6 +150,9 @@ void ff_init_cabac_states(CABACContext *c, uint8_t const (*lps_range)[4], #if 0 //selftest #define SIZE 10240 + +#include "avcodec.h" + int main(){ CABACContext c; uint8_t b[9*SIZE]; @@ -173,33 +200,33 @@ STOP_TIMER("put_cabac_ueg") for(i=0; i<SIZE; i++){ START_TIMER if( (r[i]&1) != get_cabac_bypass(&c) ) - printf("CABAC bypass failure at %d\n", i); + av_log(NULL, AV_LOG_ERROR, "CABAC bypass failure at %d\n", i); STOP_TIMER("get_cabac_bypass") } for(i=0; i<SIZE; i++){ START_TIMER if( (r[i]&1) != get_cabac(&c, state) ) - printf("CABAC failure at %d\n", i); + av_log(NULL, AV_LOG_ERROR, "CABAC failure at %d\n", i); STOP_TIMER("get_cabac") } - +#if 0 for(i=0; i<SIZE; i++){ START_TIMER if( r[i] != get_cabac_u(&c, state, (i&1) ? 6 : 7, 3, i&1) ) - printf("CABAC unary (truncated) binarization failure at %d\n", i); + av_log(NULL, AV_LOG_ERROR, "CABAC unary (truncated) binarization failure at %d\n", i); STOP_TIMER("get_cabac_u") } for(i=0; i<SIZE; i++){ START_TIMER if( r[i] != get_cabac_ueg(&c, state, 3, 0, 1, 2)) - printf("CABAC unary (truncated) binarization failure at %d\n", i); + av_log(NULL, AV_LOG_ERROR, "CABAC unary (truncated) binarization failure at %d\n", i); STOP_TIMER("get_cabac_ueg") } - +#endif if(!get_cabac_terminate(&c)) - printf("where's the Terminator?\n"); + av_log(NULL, AV_LOG_ERROR, "where's the Terminator?\n"); return 0; } diff --git a/src/libffmpeg/libavcodec/cabac.h b/src/libffmpeg/libavcodec/cabac.h index 21085b21e..cc03eba96 100644 --- a/src/libffmpeg/libavcodec/cabac.h +++ b/src/libffmpeg/libavcodec/cabac.h @@ -27,6 +27,9 @@ #undef NDEBUG #include <assert.h> +#define CABAC_BITS 8 +#define CABAC_MASK ((1<<CABAC_BITS)-1) + typedef struct CABACContext{ int low; int range; @@ -34,19 +37,20 @@ typedef struct CABACContext{ #ifdef STRICT_LIMITS int symCount; #endif - uint8_t lps_range[2*64][4]; ///< rangeTabLPS + uint8_t lps_range[2*65][4]; ///< rangeTabLPS uint8_t lps_state[2*64]; ///< transIdxLPS uint8_t mps_state[2*64]; ///< transIdxMPS const uint8_t *bytestream_start; const uint8_t *bytestream; const uint8_t *bytestream_end; - int bits_left; ///< PutBitContext pb; }CABACContext; extern const uint8_t ff_h264_lps_range[64][4]; extern const uint8_t ff_h264_mps_state[64]; extern const uint8_t ff_h264_lps_state[64]; +extern const uint8_t ff_h264_norm_shift[256]; + void ff_init_cabac_encoder(CABACContext *c, uint8_t *buf, int buf_size); void ff_init_cabac_decoder(CABACContext *c, const uint8_t *buf, int buf_size); @@ -80,7 +84,7 @@ static inline void renorm_cabac_encoder(CABACContext *c){ } static inline void put_cabac(CABACContext *c, uint8_t * const state, int bit){ - int RangeLPS= c->lps_range[*state][((c->range)>>6)&3]; + int RangeLPS= c->lps_range[*state][c->range>>6]; if(bit == ((*state)&1)){ c->range -= RangeLPS; @@ -249,63 +253,102 @@ static inline void put_cabac_ueg(CABACContext *c, uint8_t * state, int v, int ma } } +static void refill(CABACContext *c){ + if(c->bytestream < c->bytestream_end) +#if CABAC_BITS == 16 + c->low+= ((c->bytestream[0]<<9) + (c->bytestream[1])<<1); +#else + c->low+= c->bytestream[0]<<1; +#endif + c->low -= CABAC_MASK; + c->bytestream+= CABAC_BITS/8; +} + +#if 0 /* all use commented */ +static void refill2(CABACContext *c){ + int i, x; + + x= c->low ^ (c->low-1); + i= 8 - ff_h264_norm_shift[x>>(CABAC_BITS+1)]; + + x= -CABAC_MASK; + + if(c->bytestream < c->bytestream_end) +#if CABAC_BITS == 16 + x+= (c->bytestream[0]<<9) + (c->bytestream[1]<<1); +#else + x+= c->bytestream[0]<<1; +#endif + + c->low += x<<i; + c->bytestream+= CABAC_BITS/8; +} +#endif + static inline void renorm_cabac_decoder(CABACContext *c){ - while(c->range < 0x10000){ + while(c->range < (0x200 << CABAC_BITS)){ c->range+= c->range; c->low+= c->low; - if(--c->bits_left == 0){ - if(c->bytestream < c->bytestream_end) - c->low+= *c->bytestream; - c->bytestream++; - c->bits_left= 8; - } + if(!(c->low & CABAC_MASK)) + refill(c); } } +static inline void renorm_cabac_decoder_once(CABACContext *c){ + int mask= (c->range - (0x200 << CABAC_BITS))>>31; + c->range+= c->range&mask; + c->low += c->low &mask; + if(!(c->low & CABAC_MASK)) + refill(c); +} + static inline int get_cabac(CABACContext *c, uint8_t * const state){ - int RangeLPS= c->lps_range[*state][((c->range)>>14)&3]<<8; - int bit; + int RangeLPS= c->lps_range[*state][c->range>>(CABAC_BITS+7)]<<(CABAC_BITS+1); + int bit, lps_mask attribute_unused; c->range -= RangeLPS; +#if 1 if(c->low < c->range){ bit= (*state)&1; *state= c->mps_state[*state]; + renorm_cabac_decoder_once(c); }else{ +// int shift= ff_h264_norm_shift[RangeLPS>>17]; bit= ((*state)&1)^1; c->low -= c->range; - c->range = RangeLPS; *state= c->lps_state[*state]; + c->range = RangeLPS; + renorm_cabac_decoder(c); +/* c->range = RangeLPS<<shift; + c->low <<= shift; + if(!(c->low & 0xFFFF)){ + refill2(c); + }*/ } - renorm_cabac_decoder(c); +#else + lps_mask= (c->range - c->low)>>31; - return bit; -} - -static inline int get_cabac_static(CABACContext *c, int RangeLPS){ - int bit; + c->low -= c->range & lps_mask; + c->range += (RangeLPS - c->range) & lps_mask; - c->range -= RangeLPS; - if(c->low < c->range){ - bit= 0; - }else{ - bit= 1; - c->low -= c->range; - c->range = RangeLPS; - } - renorm_cabac_decoder(c); + bit= ((*state)^lps_mask)&1; + *state= c->mps_state[(*state) - (128&lps_mask)]; + lps_mask= ff_h264_norm_shift[c->range>>(CABAC_BITS+2)]; + c->range<<= lps_mask; + c->low <<= lps_mask; + if(!(c->low & CABAC_MASK)) + refill2(c); +#endif + return bit; } static inline int get_cabac_bypass(CABACContext *c){ c->low += c->low; - if(--c->bits_left == 0){ - if(c->bytestream < c->bytestream_end) - c->low+= *c->bytestream; - c->bytestream++; - c->bits_left= 8; - } + if(!(c->low & CABAC_MASK)) + refill(c); if(c->low < c->range){ return 0; @@ -320,9 +363,9 @@ static inline int get_cabac_bypass(CABACContext *c){ * @return the number of bytes read or 0 if no end */ static inline int get_cabac_terminate(CABACContext *c){ - c->range -= 2<<8; + c->range -= 4<<CABAC_BITS; if(c->low < c->range){ - renorm_cabac_decoder(c); + renorm_cabac_decoder_once(c); return 0; }else{ return c->bytestream - c->bytestream_start; diff --git a/src/libffmpeg/libavcodec/cinepak.c b/src/libffmpeg/libavcodec/cinepak.c index da9a8127f..3c560fdc5 100644 --- a/src/libffmpeg/libavcodec/cinepak.c +++ b/src/libffmpeg/libavcodec/cinepak.c @@ -35,7 +35,6 @@ #include "avcodec.h" #include "dsputil.h" -#define PALETTE_COUNT 256 typedef struct { uint8_t y0, y1, y2, y3; @@ -63,7 +62,6 @@ typedef struct CinepakContext { int width, height; - unsigned char palette[PALETTE_COUNT * 4]; int palette_video; cvid_strip_t strips[MAX_STRIPS]; @@ -177,28 +175,28 @@ static int cinepak_decode_vectors (CinepakContext *s, cvid_strip_t *strip, s->frame.data[2][iv[0]] = codebook->v; } - s->frame.data[0][iy[0] + 2] = codebook->y0; - s->frame.data[0][iy[0] + 3] = codebook->y0; - s->frame.data[0][iy[1] + 2] = codebook->y0; - s->frame.data[0][iy[1] + 3] = codebook->y0; + s->frame.data[0][iy[0] + 2] = codebook->y1; + s->frame.data[0][iy[0] + 3] = codebook->y1; + s->frame.data[0][iy[1] + 2] = codebook->y1; + s->frame.data[0][iy[1] + 3] = codebook->y1; if (!s->palette_video) { s->frame.data[1][iu[0] + 1] = codebook->u; s->frame.data[2][iv[0] + 1] = codebook->v; } - s->frame.data[0][iy[2] + 0] = codebook->y0; - s->frame.data[0][iy[2] + 1] = codebook->y0; - s->frame.data[0][iy[3] + 0] = codebook->y0; - s->frame.data[0][iy[3] + 1] = codebook->y0; + s->frame.data[0][iy[2] + 0] = codebook->y2; + s->frame.data[0][iy[2] + 1] = codebook->y2; + s->frame.data[0][iy[3] + 0] = codebook->y2; + s->frame.data[0][iy[3] + 1] = codebook->y2; if (!s->palette_video) { s->frame.data[1][iu[1]] = codebook->u; s->frame.data[2][iv[1]] = codebook->v; } - s->frame.data[0][iy[2] + 2] = codebook->y0; - s->frame.data[0][iy[2] + 3] = codebook->y0; - s->frame.data[0][iy[3] + 2] = codebook->y0; - s->frame.data[0][iy[3] + 3] = codebook->y0; + s->frame.data[0][iy[2] + 2] = codebook->y3; + s->frame.data[0][iy[2] + 3] = codebook->y3; + s->frame.data[0][iy[3] + 2] = codebook->y3; + s->frame.data[0][iy[3] + 3] = codebook->y3; if (!s->palette_video) { s->frame.data[1][iu[1] + 1] = codebook->u; s->frame.data[2][iv[1] + 1] = codebook->v; @@ -361,22 +359,20 @@ static int cinepak_decode (CinepakContext *s) static int cinepak_decode_init(AVCodecContext *avctx) { CinepakContext *s = (CinepakContext *)avctx->priv_data; -/* - int i; - unsigned char r, g, b; - unsigned char *raw_palette; - unsigned int *palette32; -*/ s->avctx = avctx; s->width = (avctx->width + 3) & ~3; s->height = (avctx->height + 3) & ~3; -// check for paletted data -s->palette_video = 0; - + // check for paletted data + if ((avctx->palctrl == NULL) || (avctx->bits_per_sample == 40)) { + s->palette_video = 0; + avctx->pix_fmt = PIX_FMT_YUV420P; + } else { + s->palette_video = 1; + avctx->pix_fmt = PIX_FMT_PAL8; + } - avctx->pix_fmt = PIX_FMT_YUV420P; avctx->has_b_frames = 0; dsputil_init(&s->dsp, avctx); @@ -404,6 +400,15 @@ static int cinepak_decode_frame(AVCodecContext *avctx, cinepak_decode(s); + if (s->palette_video) { + memcpy (s->frame.data[1], avctx->palctrl->palette, AVPALETTE_SIZE); + if (avctx->palctrl->palette_changed) { + s->frame.palette_has_changed = 1; + avctx->palctrl->palette_changed = 0; + } else + s->frame.palette_has_changed = 0; + } + *data_size = sizeof(AVFrame); *(AVFrame*)data = s->frame; diff --git a/src/libffmpeg/libavcodec/cljr.c b/src/libffmpeg/libavcodec/cljr.c index df1f79851..8072eee18 100644 --- a/src/libffmpeg/libavcodec/cljr.c +++ b/src/libffmpeg/libavcodec/cljr.c @@ -43,11 +43,6 @@ static int decode_frame(AVCodecContext *avctx, AVFrame * const p= (AVFrame*)&a->picture; int x, y; - /* special case for last picture */ - if (buf_size == 0) { - return 0; - } - if(p->data[0]) avctx->release_buffer(avctx, p); @@ -124,12 +119,14 @@ static int decode_init(AVCodecContext *avctx){ return 0; } +#if 0 static int encode_init(AVCodecContext *avctx){ common_init(avctx); return 0; } +#endif AVCodec cljr_decoder = { "cljr", diff --git a/src/libffmpeg/libavcodec/common.h b/src/libffmpeg/libavcodec/common.h index 5cf9f2c73..b9e89be46 100644 --- a/src/libffmpeg/libavcodec/common.h +++ b/src/libffmpeg/libavcodec/common.h @@ -15,14 +15,6 @@ # define CONFIG_WIN32 #endif -//#define ALT_BITSTREAM_WRITER -//#define ALIGNED_BITSTREAM_WRITER - -#define ALT_BITSTREAM_READER -//#define LIBMPEG2_BITSTREAM_READER -//#define A32_BITSTREAM_READER -#define LIBMPEG2_BITSTREAM_READER_HACK //add BERO - #ifndef M_PI #define M_PI 3.14159265358979323846 #endif @@ -35,6 +27,7 @@ # include <stdio.h> # include <string.h> # include <ctype.h> +# include <limits.h> # ifndef __BEOS__ # include <errno.h> # else @@ -66,14 +59,6 @@ #define AVOPTION_SUB(ptr) { .name = NULL, .help = (const char*)ptr } #define AVOPTION_END() AVOPTION_SUB(NULL) -struct AVOption; -#ifdef HAVE_MMX -extern const struct AVOption avoptions_common[3 + 5]; -#else -extern const struct AVOption avoptions_common[3]; -#endif -extern const struct AVOption avoptions_workaround_bug[11]; - #endif /* HAVE_AV_CONFIG_H */ /* Suppress restrict if it was not defined in config.h. */ @@ -97,6 +82,14 @@ extern const struct AVOption avoptions_workaround_bug[11]; #endif #endif +#ifndef attribute_unused +#if defined(__GNUC__) && (__GNUC__ > 3 || __GNUC__ == 3 && __GNUC_MINOR__ > 0) +# define attribute_unused __attribute__((unused)) +#else +# define attribute_unused +#endif +#endif + #ifndef EMULATE_INTTYPES # include <inttypes.h> #else @@ -116,6 +109,18 @@ extern const struct AVOption avoptions_workaround_bug[11]; # endif /* other OS */ #endif /* HAVE_INTTYPES_H */ +#ifndef INT16_MIN +#define INT16_MIN (-0x7fff-1) +#endif + +#ifndef INT16_MAX +#define INT16_MAX 0x7fff +#endif + +#ifndef INT64_MIN +#define INT64_MIN (-0x7fffffffffffffffLL-1) +#endif + #ifndef INT64_MAX #define INT64_MAX int64_t_C(9223372036854775807) #endif @@ -134,6 +139,14 @@ typedef unsigned int uint_fast16_t; typedef unsigned int uint_fast32_t; #endif +#ifndef INT_BIT +# if INT_MAX != 2147483647 +# define INT_BIT 64 +# else +# define INT_BIT 32 +# endif +#endif + #if defined(CONFIG_OS2) || defined(CONFIG_SUNOS) static inline float floorf(float f) { return floor(f); @@ -208,11 +221,20 @@ static inline float floorf(float f) { # include "bswap.h" +// Use rip-relative addressing if compiling PIC code on x86-64. # if defined(__MINGW32__) || defined(__CYGWIN__) || \ defined(__OS2__) || (defined (__OpenBSD__) && !defined(__ELF__)) -# define MANGLE(a) "_" #a +# if defined(ARCH_X86_64) && defined(PIC) +# define MANGLE(a) "_" #a"(%%rip)" +# else +# define MANGLE(a) "_" #a +# endif # else -# define MANGLE(a) #a +# if defined(ARCH_X86_64) && defined(PIC) +# define MANGLE(a) #a"(%%rip)" +# else +# define MANGLE(a) #a +# endif # endif /* debug stuff */ @@ -252,7 +274,7 @@ inline void dprintf(const char* fmt,...) {} extern const uint32_t inverse[256]; -#ifdef ARCH_X86 +#if defined(ARCH_X86) || defined(ARCH_X86_64) # define FASTDIV(a,b) \ ({\ int ret,dmy;\ @@ -269,819 +291,6 @@ extern const uint32_t inverse[256]; # define FASTDIV(a,b) ((a)/(b)) #endif -#ifdef ARCH_X86 -// avoid +32 for shift optimization (gcc should do that ...) -static inline int32_t NEG_SSR32( int32_t a, int8_t s){ - asm ("sarl %1, %0\n\t" - : "+r" (a) - : "ic" ((uint8_t)(-s)) - ); - return a; -} -static inline uint32_t NEG_USR32(uint32_t a, int8_t s){ - asm ("shrl %1, %0\n\t" - : "+r" (a) - : "ic" ((uint8_t)(-s)) - ); - return a; -} -#else -# define NEG_SSR32(a,s) ((( int32_t)(a))>>(32-(s))) -# define NEG_USR32(a,s) (((uint32_t)(a))>>(32-(s))) -#endif - -/* bit output */ - -struct PutBitContext; - -typedef void (*WriteDataFunc)(void *, uint8_t *, int); - -/* buf and buf_end must be present and used by every alternative writer. */ -typedef struct PutBitContext { -#ifdef ALT_BITSTREAM_WRITER - uint8_t *buf, *buf_end; - int index; -#else - uint32_t bit_buf; - int bit_left; - uint8_t *buf, *buf_ptr, *buf_end; -#endif -} PutBitContext; - -static inline void init_put_bits(PutBitContext *s, uint8_t *buffer, int buffer_size) -{ - s->buf = buffer; - s->buf_end = s->buf + buffer_size; -#ifdef ALT_BITSTREAM_WRITER - s->index=0; - ((uint32_t*)(s->buf))[0]=0; -// memset(buffer, 0, buffer_size); -#else - s->buf_ptr = s->buf; - s->bit_left=32; - s->bit_buf=0; -#endif -} - -/* return the number of bits output */ -static inline int put_bits_count(PutBitContext *s) -{ -#ifdef ALT_BITSTREAM_WRITER - return s->index; -#else - return (s->buf_ptr - s->buf) * 8 + 32 - s->bit_left; -#endif -} - -/* pad the end of the output stream with zeros */ -static inline void flush_put_bits(PutBitContext *s) -{ -#ifdef ALT_BITSTREAM_WRITER - align_put_bits(s); -#else - s->bit_buf<<= s->bit_left; - while (s->bit_left < 32) { - /* XXX: should test end of buffer */ - *s->buf_ptr++=s->bit_buf >> 24; - s->bit_buf<<=8; - s->bit_left+=8; - } - s->bit_left=32; - s->bit_buf=0; -#endif -} - -void align_put_bits(PutBitContext *s); -void put_string(PutBitContext * pbc, char *s, int put_zero); - -/* bit input */ -/* buffer, buffer_end and size_in_bits must be present and used by every reader */ -typedef struct GetBitContext { - const uint8_t *buffer, *buffer_end; -#ifdef ALT_BITSTREAM_READER - int index; -#elif defined LIBMPEG2_BITSTREAM_READER - uint8_t *buffer_ptr; - uint32_t cache; - int bit_count; -#elif defined A32_BITSTREAM_READER - uint32_t *buffer_ptr; - uint32_t cache0; - uint32_t cache1; - int bit_count; -#endif - int size_in_bits; -} GetBitContext; - -#define VLC_TYPE int16_t - -typedef struct VLC { - int bits; - VLC_TYPE (*table)[2]; ///< code, bits - int table_size, table_allocated; -} VLC; - -typedef struct RL_VLC_ELEM { - int16_t level; - int8_t len; - uint8_t run; -} RL_VLC_ELEM; - -#ifdef ARCH_SPARC -#define UNALIGNED_STORES_ARE_BAD -#endif - -/* used to avoid missaligned exceptions on some archs (alpha, ...) */ -#ifdef ARCH_X86 -# define unaligned32(a) (*(uint32_t*)(a)) -#else -# ifdef __GNUC__ -static inline uint32_t unaligned32(const void *v) { - struct Unaligned { - uint32_t i; - } __attribute__((packed)); - - return ((const struct Unaligned *) v)->i; -} -# elif defined(__DECC) -static inline uint32_t unaligned32(const void *v) { - return *(const __unaligned uint32_t *) v; -} -# else -static inline uint32_t unaligned32(const void *v) { - return *(const uint32_t *) v; -} -# endif -#endif //!ARCH_X86 - -#ifndef ALT_BITSTREAM_WRITER -static inline void put_bits(PutBitContext *s, int n, unsigned int value) -{ - unsigned int bit_buf; - int bit_left; - -#ifdef STATS - st_out_bit_counts[st_current_index] += n; -#endif - // printf("put_bits=%d %x\n", n, value); - assert(n == 32 || value < (1U << n)); - - bit_buf = s->bit_buf; - bit_left = s->bit_left; - - // printf("n=%d value=%x cnt=%d buf=%x\n", n, value, bit_cnt, bit_buf); - /* XXX: optimize */ - if (n < bit_left) { - bit_buf = (bit_buf<<n) | value; - bit_left-=n; - } else { - bit_buf<<=bit_left; - bit_buf |= value >> (n - bit_left); -#ifdef UNALIGNED_STORES_ARE_BAD - if (3 & (intptr_t) s->buf_ptr) { - s->buf_ptr[0] = bit_buf >> 24; - s->buf_ptr[1] = bit_buf >> 16; - s->buf_ptr[2] = bit_buf >> 8; - s->buf_ptr[3] = bit_buf ; - } else -#endif - *(uint32_t *)s->buf_ptr = be2me_32(bit_buf); - //printf("bitbuf = %08x\n", bit_buf); - s->buf_ptr+=4; - bit_left+=32 - n; - bit_buf = value; - } - - s->bit_buf = bit_buf; - s->bit_left = bit_left; -} -#endif - - -#ifdef ALT_BITSTREAM_WRITER -static inline void put_bits(PutBitContext *s, int n, unsigned int value) -{ -# ifdef ALIGNED_BITSTREAM_WRITER -# ifdef ARCH_X86 - asm volatile( - "movl %0, %%ecx \n\t" - "xorl %%eax, %%eax \n\t" - "shrdl %%cl, %1, %%eax \n\t" - "shrl %%cl, %1 \n\t" - "movl %0, %%ecx \n\t" - "shrl $3, %%ecx \n\t" - "andl $0xFFFFFFFC, %%ecx \n\t" - "bswapl %1 \n\t" - "orl %1, (%2, %%ecx) \n\t" - "bswapl %%eax \n\t" - "addl %3, %0 \n\t" - "movl %%eax, 4(%2, %%ecx) \n\t" - : "=&r" (s->index), "=&r" (value) - : "r" (s->buf), "r" (n), "0" (s->index), "1" (value<<(-n)) - : "%eax", "%ecx" - ); -# else - int index= s->index; - uint32_t *ptr= ((uint32_t *)s->buf)+(index>>5); - - value<<= 32-n; - - ptr[0] |= be2me_32(value>>(index&31)); - ptr[1] = be2me_32(value<<(32-(index&31))); -//if(n>24) printf("%d %d\n", n, value); - index+= n; - s->index= index; -# endif -# else //ALIGNED_BITSTREAM_WRITER -# ifdef ARCH_X86 - asm volatile( - "movl $7, %%ecx \n\t" - "andl %0, %%ecx \n\t" - "addl %3, %%ecx \n\t" - "negl %%ecx \n\t" - "shll %%cl, %1 \n\t" - "bswapl %1 \n\t" - "movl %0, %%ecx \n\t" - "shrl $3, %%ecx \n\t" - "orl %1, (%%ecx, %2) \n\t" - "addl %3, %0 \n\t" - "movl $0, 4(%%ecx, %2) \n\t" - : "=&r" (s->index), "=&r" (value) - : "r" (s->buf), "r" (n), "0" (s->index), "1" (value) - : "%ecx" - ); -# else - int index= s->index; - uint32_t *ptr= (uint32_t*)(((uint8_t *)s->buf)+(index>>3)); - - ptr[0] |= be2me_32(value<<(32-n-(index&7) )); - ptr[1] = 0; -//if(n>24) printf("%d %d\n", n, value); - index+= n; - s->index= index; -# endif -# endif //!ALIGNED_BITSTREAM_WRITER -} -#endif - - -static inline uint8_t* pbBufPtr(PutBitContext *s) -{ -#ifdef ALT_BITSTREAM_WRITER - return s->buf + (s->index>>3); -#else - return s->buf_ptr; -#endif -} - -/** - * - * PutBitContext must be flushed & aligned to a byte boundary before calling this. - */ -static inline void skip_put_bytes(PutBitContext *s, int n){ - assert((put_bits_count(s)&7)==0); -#ifdef ALT_BITSTREAM_WRITER - FIXME may need some cleaning of the buffer - s->index += n<<3; -#else - assert(s->bit_left==32); - s->buf_ptr += n; -#endif -} - -/** - * Changes the end of the buffer. - */ -static inline void set_put_bits_buffer_size(PutBitContext *s, int size){ - s->buf_end= s->buf + size; -} - -/* Bitstream reader API docs: -name - abritary name which is used as prefix for the internal variables - -gb - getbitcontext - -OPEN_READER(name, gb) - loads gb into local variables - -CLOSE_READER(name, gb) - stores local vars in gb - -UPDATE_CACHE(name, gb) - refills the internal cache from the bitstream - after this call at least MIN_CACHE_BITS will be available, - -GET_CACHE(name, gb) - will output the contents of the internal cache, next bit is MSB of 32 or 64 bit (FIXME 64bit) - -SHOW_UBITS(name, gb, num) - will return the nest num bits - -SHOW_SBITS(name, gb, num) - will return the nest num bits and do sign extension - -SKIP_BITS(name, gb, num) - will skip over the next num bits - note, this is equinvalent to SKIP_CACHE; SKIP_COUNTER - -SKIP_CACHE(name, gb, num) - will remove the next num bits from the cache (note SKIP_COUNTER MUST be called before UPDATE_CACHE / CLOSE_READER) - -SKIP_COUNTER(name, gb, num) - will increment the internal bit counter (see SKIP_CACHE & SKIP_BITS) - -LAST_SKIP_CACHE(name, gb, num) - will remove the next num bits from the cache if it is needed for UPDATE_CACHE otherwise it will do nothing - -LAST_SKIP_BITS(name, gb, num) - is equinvalent to SKIP_LAST_CACHE; SKIP_COUNTER - -for examples see get_bits, show_bits, skip_bits, get_vlc -*/ - -static inline int unaligned32_be(const void *v) -{ -#ifdef CONFIG_ALIGN - const uint8_t *p=v; - return (((p[0]<<8) | p[1])<<16) | (p[2]<<8) | (p[3]); -#else - return be2me_32( unaligned32(v)); //original -#endif -} - -#ifdef ALT_BITSTREAM_READER -# define MIN_CACHE_BITS 25 - -# define OPEN_READER(name, gb)\ - int name##_index= (gb)->index;\ - int name##_cache= 0;\ - -# define CLOSE_READER(name, gb)\ - (gb)->index= name##_index;\ - -# define UPDATE_CACHE(name, gb)\ - name##_cache= unaligned32_be( ((uint8_t *)(gb)->buffer)+(name##_index>>3) ) << (name##_index&0x07);\ - -# define SKIP_CACHE(name, gb, num)\ - name##_cache <<= (num);\ - -// FIXME name? -# define SKIP_COUNTER(name, gb, num)\ - name##_index += (num);\ - -# define SKIP_BITS(name, gb, num)\ - {\ - SKIP_CACHE(name, gb, num)\ - SKIP_COUNTER(name, gb, num)\ - }\ - -# define LAST_SKIP_BITS(name, gb, num) SKIP_COUNTER(name, gb, num) -# define LAST_SKIP_CACHE(name, gb, num) ; - -# define SHOW_UBITS(name, gb, num)\ - NEG_USR32(name##_cache, num) - -# define SHOW_SBITS(name, gb, num)\ - NEG_SSR32(name##_cache, num) - -# define GET_CACHE(name, gb)\ - ((uint32_t)name##_cache) - -static inline int get_bits_count(GetBitContext *s){ - return s->index; -} -#elif defined LIBMPEG2_BITSTREAM_READER -//libmpeg2 like reader - -# define MIN_CACHE_BITS 17 - -# define OPEN_READER(name, gb)\ - int name##_bit_count=(gb)->bit_count;\ - int name##_cache= (gb)->cache;\ - uint8_t * name##_buffer_ptr=(gb)->buffer_ptr;\ - -# define CLOSE_READER(name, gb)\ - (gb)->bit_count= name##_bit_count;\ - (gb)->cache= name##_cache;\ - (gb)->buffer_ptr= name##_buffer_ptr;\ - -#ifdef LIBMPEG2_BITSTREAM_READER_HACK - -# define UPDATE_CACHE(name, gb)\ - if(name##_bit_count >= 0){\ - name##_cache+= (int)be2me_16(*(uint16_t*)name##_buffer_ptr) << name##_bit_count;\ - ((uint16_t*)name##_buffer_ptr)++;\ - name##_bit_count-= 16;\ - }\ - -#else - -# define UPDATE_CACHE(name, gb)\ - if(name##_bit_count >= 0){\ - name##_cache+= ((name##_buffer_ptr[0]<<8) + name##_buffer_ptr[1]) << name##_bit_count;\ - name##_buffer_ptr+=2;\ - name##_bit_count-= 16;\ - }\ - -#endif - -# define SKIP_CACHE(name, gb, num)\ - name##_cache <<= (num);\ - -# define SKIP_COUNTER(name, gb, num)\ - name##_bit_count += (num);\ - -# define SKIP_BITS(name, gb, num)\ - {\ - SKIP_CACHE(name, gb, num)\ - SKIP_COUNTER(name, gb, num)\ - }\ - -# define LAST_SKIP_BITS(name, gb, num) SKIP_BITS(name, gb, num) -# define LAST_SKIP_CACHE(name, gb, num) SKIP_CACHE(name, gb, num) - -# define SHOW_UBITS(name, gb, num)\ - NEG_USR32(name##_cache, num) - -# define SHOW_SBITS(name, gb, num)\ - NEG_SSR32(name##_cache, num) - -# define GET_CACHE(name, gb)\ - ((uint32_t)name##_cache) - -static inline int get_bits_count(GetBitContext *s){ - return (s->buffer_ptr - s->buffer)*8 - 16 + s->bit_count; -} - -#elif defined A32_BITSTREAM_READER - -# define MIN_CACHE_BITS 32 - -# define OPEN_READER(name, gb)\ - int name##_bit_count=(gb)->bit_count;\ - uint32_t name##_cache0= (gb)->cache0;\ - uint32_t name##_cache1= (gb)->cache1;\ - uint32_t * name##_buffer_ptr=(gb)->buffer_ptr;\ - -# define CLOSE_READER(name, gb)\ - (gb)->bit_count= name##_bit_count;\ - (gb)->cache0= name##_cache0;\ - (gb)->cache1= name##_cache1;\ - (gb)->buffer_ptr= name##_buffer_ptr;\ - -# define UPDATE_CACHE(name, gb)\ - if(name##_bit_count > 0){\ - const uint32_t next= be2me_32( *name##_buffer_ptr );\ - name##_cache0 |= NEG_USR32(next,name##_bit_count);\ - name##_cache1 |= next<<name##_bit_count;\ - name##_buffer_ptr++;\ - name##_bit_count-= 32;\ - }\ - -#ifdef ARCH_X86 -# define SKIP_CACHE(name, gb, num)\ - asm(\ - "shldl %2, %1, %0 \n\t"\ - "shll %2, %1 \n\t"\ - : "+r" (name##_cache0), "+r" (name##_cache1)\ - : "Ic" ((uint8_t)num)\ - ); -#else -# define SKIP_CACHE(name, gb, num)\ - name##_cache0 <<= (num);\ - name##_cache0 |= NEG_USR32(name##_cache1,num);\ - name##_cache1 <<= (num); -#endif - -# define SKIP_COUNTER(name, gb, num)\ - name##_bit_count += (num);\ - -# define SKIP_BITS(name, gb, num)\ - {\ - SKIP_CACHE(name, gb, num)\ - SKIP_COUNTER(name, gb, num)\ - }\ - -# define LAST_SKIP_BITS(name, gb, num) SKIP_BITS(name, gb, num) -# define LAST_SKIP_CACHE(name, gb, num) SKIP_CACHE(name, gb, num) - -# define SHOW_UBITS(name, gb, num)\ - NEG_USR32(name##_cache0, num) - -# define SHOW_SBITS(name, gb, num)\ - NEG_SSR32(name##_cache0, num) - -# define GET_CACHE(name, gb)\ - (name##_cache0) - -static inline int get_bits_count(GetBitContext *s){ - return ((uint8_t*)s->buffer_ptr - s->buffer)*8 - 32 + s->bit_count; -} - -#endif - -/** - * read mpeg1 dc style vlc (sign bit + mantisse with no MSB). - * if MSB not set it is negative - * @param n length in bits - * @author BERO - */ -static inline int get_xbits(GetBitContext *s, int n){ - register int tmp; - register int32_t cache; - OPEN_READER(re, s) - UPDATE_CACHE(re, s) - cache = GET_CACHE(re,s); - if ((int32_t)cache<0) { //MSB=1 - tmp = NEG_USR32(cache,n); - } else { - // tmp = (-1<<n) | NEG_USR32(cache,n) + 1; mpeg12.c algo - // tmp = - (NEG_USR32(cache,n) ^ ((1 << n) - 1)); h263.c algo - tmp = - NEG_USR32(~cache,n); - } - LAST_SKIP_BITS(re, s, n) - CLOSE_READER(re, s) - return tmp; -} - -static inline int get_sbits(GetBitContext *s, int n){ - register int tmp; - OPEN_READER(re, s) - UPDATE_CACHE(re, s) - tmp= SHOW_SBITS(re, s, n); - LAST_SKIP_BITS(re, s, n) - CLOSE_READER(re, s) - return tmp; -} - -/** - * reads 0-17 bits. - * Note, the alt bitstream reader can read upto 25 bits, but the libmpeg2 reader cant - */ -static inline unsigned int get_bits(GetBitContext *s, int n){ - register int tmp; - OPEN_READER(re, s) - UPDATE_CACHE(re, s) - tmp= SHOW_UBITS(re, s, n); - LAST_SKIP_BITS(re, s, n) - CLOSE_READER(re, s) - return tmp; -} - -unsigned int get_bits_long(GetBitContext *s, int n); - -/** - * shows 0-17 bits. - * Note, the alt bitstream reader can read upto 25 bits, but the libmpeg2 reader cant - */ -static inline unsigned int show_bits(GetBitContext *s, int n){ - register int tmp; - OPEN_READER(re, s) - UPDATE_CACHE(re, s) - tmp= SHOW_UBITS(re, s, n); -// CLOSE_READER(re, s) - return tmp; -} - -unsigned int show_bits_long(GetBitContext *s, int n); - -static inline void skip_bits(GetBitContext *s, int n){ - //Note gcc seems to optimize this to s->index+=n for the ALT_READER :)) - OPEN_READER(re, s) - UPDATE_CACHE(re, s) - LAST_SKIP_BITS(re, s, n) - CLOSE_READER(re, s) -} - -static inline unsigned int get_bits1(GetBitContext *s){ -#ifdef ALT_BITSTREAM_READER - int index= s->index; - uint8_t result= s->buffer[ index>>3 ]; - result<<= (index&0x07); - result>>= 8 - 1; - index++; - s->index= index; - - return result; -#else - return get_bits(s, 1); -#endif -} - -static inline unsigned int show_bits1(GetBitContext *s){ - return show_bits(s, 1); -} - -static inline void skip_bits1(GetBitContext *s){ - skip_bits(s, 1); -} - -/** - * init GetBitContext. - * @param buffer bitstream buffer, must be FF_INPUT_BUFFER_PADDING_SIZE bytes larger then the actual read bits - * because some optimized bitstream readers read 32 or 64 bit at once and could read over the end - * @param bit_size the size of the buffer in bits - */ -static inline void init_get_bits(GetBitContext *s, - const uint8_t *buffer, int bit_size) -{ - const int buffer_size= (bit_size+7)>>3; - - s->buffer= buffer; - s->size_in_bits= bit_size; - s->buffer_end= buffer + buffer_size; -#ifdef ALT_BITSTREAM_READER - s->index=0; -#elif defined LIBMPEG2_BITSTREAM_READER -#ifdef LIBMPEG2_BITSTREAM_READER_HACK - if ((int)buffer&1) { - /* word alignment */ - s->cache = (*buffer++)<<24; - s->buffer_ptr = buffer; - s->bit_count = 16-8; - } else -#endif - { - s->buffer_ptr = buffer; - s->bit_count = 16; - s->cache = 0; - } -#elif defined A32_BITSTREAM_READER - s->buffer_ptr = (uint32_t*)buffer; - s->bit_count = 32; - s->cache0 = 0; - s->cache1 = 0; -#endif - { - OPEN_READER(re, s) - UPDATE_CACHE(re, s) - UPDATE_CACHE(re, s) - CLOSE_READER(re, s) - } -#ifdef A32_BITSTREAM_READER - s->cache1 = 0; -#endif -} - -int check_marker(GetBitContext *s, const char *msg); -void align_get_bits(GetBitContext *s); -int init_vlc(VLC *vlc, int nb_bits, int nb_codes, - const void *bits, int bits_wrap, int bits_size, - const void *codes, int codes_wrap, int codes_size); -void free_vlc(VLC *vlc); - -/** - * - * if the vlc code is invalid and max_depth=1 than no bits will be removed - * if the vlc code is invalid and max_depth>1 than the number of bits removed - * is undefined - */ -#define GET_VLC(code, name, gb, table, bits, max_depth)\ -{\ - int n, index, nb_bits;\ -\ - index= SHOW_UBITS(name, gb, bits);\ - code = table[index][0];\ - n = table[index][1];\ -\ - if(max_depth > 1 && n < 0){\ - LAST_SKIP_BITS(name, gb, bits)\ - UPDATE_CACHE(name, gb)\ -\ - nb_bits = -n;\ -\ - index= SHOW_UBITS(name, gb, nb_bits) + code;\ - code = table[index][0];\ - n = table[index][1];\ - if(max_depth > 2 && n < 0){\ - LAST_SKIP_BITS(name, gb, nb_bits)\ - UPDATE_CACHE(name, gb)\ -\ - nb_bits = -n;\ -\ - index= SHOW_UBITS(name, gb, nb_bits) + code;\ - code = table[index][0];\ - n = table[index][1];\ - }\ - }\ - SKIP_BITS(name, gb, n)\ -} - -#define GET_RL_VLC(level, run, name, gb, table, bits, max_depth)\ -{\ - int n, index, nb_bits;\ -\ - index= SHOW_UBITS(name, gb, bits);\ - level = table[index].level;\ - n = table[index].len;\ -\ - if(max_depth > 1 && n < 0){\ - LAST_SKIP_BITS(name, gb, bits)\ - UPDATE_CACHE(name, gb)\ -\ - nb_bits = -n;\ -\ - index= SHOW_UBITS(name, gb, nb_bits) + level;\ - level = table[index].level;\ - n = table[index].len;\ - }\ - run= table[index].run;\ - SKIP_BITS(name, gb, n)\ -} - -// deprecated, dont use get_vlc for new code, use get_vlc2 instead or use GET_VLC directly -static inline int get_vlc(GetBitContext *s, VLC *vlc) -{ - int code; - VLC_TYPE (*table)[2]= vlc->table; - - OPEN_READER(re, s) - UPDATE_CACHE(re, s) - - GET_VLC(code, re, s, table, vlc->bits, 3) - - CLOSE_READER(re, s) - return code; -} - -/** - * parses a vlc code, faster then get_vlc() - * @param bits is the number of bits which will be read at once, must be - * identical to nb_bits in init_vlc() - * @param max_depth is the number of times bits bits must be readed to completly - * read the longest vlc code - * = (max_vlc_length + bits - 1) / bits - */ -static always_inline int get_vlc2(GetBitContext *s, VLC_TYPE (*table)[2], - int bits, int max_depth) -{ - int code; - - OPEN_READER(re, s) - UPDATE_CACHE(re, s) - - GET_VLC(code, re, s, table, bits, max_depth) - - CLOSE_READER(re, s) - return code; -} - -//#define TRACE - -#ifdef TRACE - -static inline void print_bin(int bits, int n){ - int i; - - for(i=n-1; i>=0; i--){ - printf("%d", (bits>>i)&1); - } - for(i=n; i<24; i++) - printf(" "); -} - -static inline int get_bits_trace(GetBitContext *s, int n, char *file, char *func, int line){ - int r= get_bits(s, n); - - print_bin(r, n); - printf("%5d %2d %3d bit @%5d in %s %s:%d\n", r, n, r, get_bits_count(s)-n, file, func, line); - return r; -} -static inline int get_vlc_trace(GetBitContext *s, VLC_TYPE (*table)[2], int bits, int max_depth, char *file, char *func, int line){ - int show= show_bits(s, 24); - int pos= get_bits_count(s); - int r= get_vlc2(s, table, bits, max_depth); - int len= get_bits_count(s) - pos; - int bits2= show>>(24-len); - - print_bin(bits2, len); - - printf("%5d %2d %3d vlc @%5d in %s %s:%d\n", bits2, len, r, pos, file, func, line); - return r; -} -static inline int get_xbits_trace(GetBitContext *s, int n, char *file, char *func, int line){ - int show= show_bits(s, n); - int r= get_xbits(s, n); - - print_bin(show, n); - printf("%5d %2d %3d xbt @%5d in %s %s:%d\n", show, n, r, get_bits_count(s)-n, file, func, line); - return r; -} - -#define get_bits(s, n) get_bits_trace(s, n, __FILE__, __PRETTY_FUNCTION__, __LINE__) -#define get_bits1(s) get_bits_trace(s, 1, __FILE__, __PRETTY_FUNCTION__, __LINE__) -#define get_xbits(s, n) get_xbits_trace(s, n, __FILE__, __PRETTY_FUNCTION__, __LINE__) -#define get_vlc(s, vlc) get_vlc_trace(s, (vlc)->table, (vlc)->bits, 3, __FILE__, __PRETTY_FUNCTION__, __LINE__) -#define get_vlc2(s, tab, bits, max) get_vlc_trace(s, tab, bits, max, __FILE__, __PRETTY_FUNCTION__, __LINE__) - -#define tprintf(...) av_log(NULL, AV_LOG_DEBUG, __VA_ARGS__) - -#else //TRACE -#define tprintf(...) {} -#endif - /* define it to include statistics code (useful only for optimizing codec efficiency */ //#define STATS @@ -1220,7 +429,7 @@ static inline int ff_get_fourcc(const char *s){ #define MKBETAG(a,b,c,d) (d | (c << 8) | (b << 16) | (a << 24)) -#ifdef ARCH_X86 +#if defined(ARCH_X86) || defined(ARCH_X86_64) #define MASK_ABS(mask, level)\ asm volatile(\ "cdq \n\t"\ @@ -1254,8 +463,18 @@ if((y)<(x)){\ } #endif -#ifdef ARCH_X86 -static inline long long rdtsc() +#if defined(ARCH_X86) || defined(ARCH_X86_64) +#if defined(ARCH_X86_64) +static inline uint64_t rdtsc(void) +{ + uint64_t a, d; + asm volatile( "rdtsc\n\t" + : "=a" (a), "=d" (d) + ); + return (d << 32) | (a & 0xffffffff); +} +#else +static inline long long rdtsc(void) { long long l; asm volatile( "rdtsc\n\t" @@ -1263,6 +482,7 @@ static inline long long rdtsc() ); return l; } +#endif #define START_TIMER \ uint64_t tend;\ @@ -1283,6 +503,9 @@ tend= rdtsc();\ av_log(NULL, AV_LOG_DEBUG, "%Ld dezicycles in %s, %d runs, %d skips\n", tsum*10/tcount, id, tcount, tskip_count);\ }\ } +#else +#define START_TIMER +#define STOP_TIMER(id) {} #endif #define CLAMP_TO_8BIT(d) ((d > 0xff) ? 0xff : (d < 0) ? 0 : d) @@ -1294,6 +517,8 @@ tend= rdtsc();\ #define time time_is_forbidden_due_to_security_issues #define rand rand_is_forbidden_due_to_state_trashing #define srand srand_is_forbidden_due_to_state_trashing +#define sprintf sprintf_is_forbidden_due_to_security_issues_use_snprintf +#define strcat strcat_is_forbidden_due_to_security_issues_use_pstrcat #if !(defined(LIBAVFORMAT_BUILD) || defined(_FRAMEHOOK_H)) #define printf please_use_av_log #define fprintf please_use_av_log diff --git a/src/libffmpeg/libavcodec/dpcm.c b/src/libffmpeg/libavcodec/dpcm.c index b80604e5f..487203ae9 100644 --- a/src/libffmpeg/libavcodec/dpcm.c +++ b/src/libffmpeg/libavcodec/dpcm.c @@ -24,6 +24,7 @@ * Xan DPCM decoder by Mario Brito (mbrito@student.dei.uc.pt) * for more information on the specific data formats, visit: * http://www.pcisys.net/~melanson/codecs/simpleaudio.html + * SOL DPCMs implemented by Konstantin Shishkov * * Note about using the Xan DPCM decoder: Xan DPCM is used in AVI files * found in the Wing Commander IV computer game. These AVI files contain @@ -39,6 +40,8 @@ typedef struct DPCMContext { int channels; short roq_square_array[256]; + long sample[2];//for SOL_DPCM + int *sol_table;//for SOL_DPCM } DPCMContext; #define SATURATE_S16(x) if (x < -32768) x = -32768; \ @@ -81,6 +84,32 @@ static int interplay_delta_table[] = { }; +static int sol_table_old[16] = + { 0x0, 0x1, 0x2 , 0x3, 0x6, 0xA, 0xF, 0x15, + -0x15, -0xF, -0xA, -0x6, -0x3, -0x2, -0x1, 0x0}; + +static int sol_table_new[16] = + { 0x0, 0x1, 0x2, 0x3, 0x6, 0xA, 0xF, 0x15, + 0x0, -0x1, -0x2, -0x3, -0x6, -0xA, -0xF, -0x15}; + +static int sol_table_16[128] = { + 0x000, 0x008, 0x010, 0x020, 0x030, 0x040, 0x050, 0x060, 0x070, 0x080, + 0x090, 0x0A0, 0x0B0, 0x0C0, 0x0D0, 0x0E0, 0x0F0, 0x100, 0x110, 0x120, + 0x130, 0x140, 0x150, 0x160, 0x170, 0x180, 0x190, 0x1A0, 0x1B0, 0x1C0, + 0x1D0, 0x1E0, 0x1F0, 0x200, 0x208, 0x210, 0x218, 0x220, 0x228, 0x230, + 0x238, 0x240, 0x248, 0x250, 0x258, 0x260, 0x268, 0x270, 0x278, 0x280, + 0x288, 0x290, 0x298, 0x2A0, 0x2A8, 0x2B0, 0x2B8, 0x2C0, 0x2C8, 0x2D0, + 0x2D8, 0x2E0, 0x2E8, 0x2F0, 0x2F8, 0x300, 0x308, 0x310, 0x318, 0x320, + 0x328, 0x330, 0x338, 0x340, 0x348, 0x350, 0x358, 0x360, 0x368, 0x370, + 0x378, 0x380, 0x388, 0x390, 0x398, 0x3A0, 0x3A8, 0x3B0, 0x3B8, 0x3C0, + 0x3C8, 0x3D0, 0x3D8, 0x3E0, 0x3E8, 0x3F0, 0x3F8, 0x400, 0x440, 0x480, + 0x4C0, 0x500, 0x540, 0x580, 0x5C0, 0x600, 0x640, 0x680, 0x6C0, 0x700, + 0x740, 0x780, 0x7C0, 0x800, 0x900, 0xA00, 0xB00, 0xC00, 0xD00, 0xE00, + 0xF00, 0x1000, 0x1400, 0x1800, 0x1C00, 0x2000, 0x3000, 0x4000 +}; + + + static int dpcm_decode_init(AVCodecContext *avctx) { DPCMContext *s = avctx->priv_data; @@ -88,6 +117,7 @@ static int dpcm_decode_init(AVCodecContext *avctx) short square; s->channels = avctx->channels; + s->sample[0] = s->sample[1] = 0; switch(avctx->codec->id) { @@ -100,6 +130,26 @@ static int dpcm_decode_init(AVCodecContext *avctx) } break; + + case CODEC_ID_SOL_DPCM: + switch(avctx->codec_tag){ + case 1: + s->sol_table=sol_table_old; + s->sample[0] = s->sample[1] = 0x80; + break; + case 2: + s->sol_table=sol_table_new; + s->sample[0] = s->sample[1] = 0x80; + break; + case 3: + s->sol_table=sol_table_16; + break; + default: + av_log(avctx, AV_LOG_ERROR, "Unknown SOL subcodec\n"); + return -1; + } + break; + default: break; } @@ -203,6 +253,38 @@ static int dpcm_decode_frame(AVCodecContext *avctx, channel_number ^= s->channels - 1; } break; + case CODEC_ID_SOL_DPCM: + in = 0; + if (avctx->codec_tag != 3) { + while (in < buf_size) { + int n1, n2; + n1 = (buf[in] >> 4) & 0xF; + n2 = buf[in++] & 0xF; + s->sample[0] += s->sol_table[n1]; + if (s->sample[0] < 0) s->sample[0] = 0; + if (s->sample[0] > 255) s->sample[0] = 255; + output_samples[out++] = (s->sample[0] - 128) << 8; + s->sample[s->channels - 1] += s->sol_table[n2]; + if (s->sample[s->channels - 1] < 0) s->sample[s->channels - 1] = 0; + if (s->sample[s->channels - 1] > 255) s->sample[s->channels - 1] = 255; + output_samples[out++] = (s->sample[s->channels - 1] - 128) << 8; + } + } else { + while (in < buf_size) { + int n; + n = buf[in++]; + if (n & 0x80) s->sample[channel_number] -= s->sol_table[n & 0x7F]; + else s->sample[channel_number] += s->sol_table[n & 0x7F]; + SATURATE_S16(s->sample[channel_number]); + output_samples[out++] = s->sample[channel_number]; + /* toggle channel */ + channel_number ^= s->channels - 1; + } + } + break; + + default: + break; } *data_size = out * sizeof(short); @@ -241,3 +323,14 @@ AVCodec xan_dpcm_decoder = { NULL, dpcm_decode_frame, }; + +AVCodec sol_dpcm_decoder = { + "sol_dpcm", + CODEC_TYPE_AUDIO, + CODEC_ID_SOL_DPCM, + sizeof(DPCMContext), + dpcm_decode_init, + NULL, + NULL, + dpcm_decode_frame, +}; diff --git a/src/libffmpeg/libavcodec/dsputil.c b/src/libffmpeg/libavcodec/dsputil.c index b1252251a..926832ff1 100644 --- a/src/libffmpeg/libavcodec/dsputil.c +++ b/src/libffmpeg/libavcodec/dsputil.c @@ -31,8 +31,11 @@ #include "simple_idct.h" #include "faandct.h" -uint8_t cropTbl[256 + 2 * MAX_NEG_CROP]; -uint32_t squareTbl[512]; +/* snow.c */ +void ff_spatial_dwt(int *buffer, int width, int height, int stride, int type, int decomposition_count); + +uint8_t cropTbl[256 + 2 * MAX_NEG_CROP] = {0, }; +uint32_t squareTbl[512] = {0, }; const uint8_t ff_zigzag_direct[64] = { 0, 1, 8, 16, 9, 2, 3, 10, @@ -59,7 +62,7 @@ const uint8_t ff_zigzag248_direct[64] = { }; /* not permutated inverse zigzag_direct + 1 for MMX quantizer */ -uint16_t __align8 inv_zigzag_direct16[64]; +uint16_t __align8 inv_zigzag_direct16[64] = {0, }; const uint8_t ff_alternate_horizontal_scan[64] = { 0, 1, 2, 3, 8, 9, 16, 17, @@ -219,6 +222,23 @@ static void bswap_buf(uint32_t *dst, uint32_t *src, int w){ } } +static int sse4_c(void *v, uint8_t * pix1, uint8_t * pix2, int line_size, int h) +{ + int s, i; + uint32_t *sq = squareTbl + 256; + + s = 0; + for (i = 0; i < h; i++) { + s += sq[pix1[0] - pix2[0]]; + s += sq[pix1[1] - pix2[1]]; + s += sq[pix1[2] - pix2[2]]; + s += sq[pix1[3] - pix2[3]]; + pix1 += line_size; + pix2 += line_size; + } + return s; +} + static int sse8_c(void *v, uint8_t * pix1, uint8_t * pix2, int line_size, int h) { int s, i; @@ -270,6 +290,103 @@ static int sse16_c(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h) return s; } + +static inline int w_c(void *v, uint8_t * pix1, uint8_t * pix2, int line_size, int w, int h, int type){ + int s, i, j; + const int dec_count= w==8 ? 3 : 4; + int tmp[16*16]; +#if 0 + int level, ori; + static const int scale[2][2][4][4]={ + { + { + //8x8 dec=3 + {268, 239, 239, 213}, + { 0, 224, 224, 152}, + { 0, 135, 135, 110}, + },{ + //16x16 dec=4 + {344, 310, 310, 280}, + { 0, 320, 320, 228}, + { 0, 175, 175, 136}, + { 0, 129, 129, 102}, + } + },{ + {//FIXME 5/3 + //8x8 dec=3 + {275, 245, 245, 218}, + { 0, 230, 230, 156}, + { 0, 138, 138, 113}, + },{ + //16x16 dec=4 + {352, 317, 317, 286}, + { 0, 328, 328, 233}, + { 0, 180, 180, 140}, + { 0, 132, 132, 105}, + } + } + }; +#endif + + for (i = 0; i < h; i++) { + for (j = 0; j < w; j+=4) { + tmp[16*i+j+0] = (pix1[j+0] - pix2[j+0])<<4; + tmp[16*i+j+1] = (pix1[j+1] - pix2[j+1])<<4; + tmp[16*i+j+2] = (pix1[j+2] - pix2[j+2])<<4; + tmp[16*i+j+3] = (pix1[j+3] - pix2[j+3])<<4; + } + pix1 += line_size; + pix2 += line_size; + } + ff_spatial_dwt(tmp, w, h, 16, type, dec_count); + + s=0; +#if 0 + for(level=0; level<dec_count; level++){ + for(ori= level ? 1 : 0; ori<4; ori++){ + int sx= (ori&1) ? 1<<level: 0; + int stride= 16<<(dec_count-level); + int sy= (ori&2) ? stride>>1 : 0; + int size= 1<<level; + + for(i=0; i<size; i++){ + for(j=0; j<size; j++){ + int v= tmp[sx + sy + i*stride + j] * scale[type][dec_count-3][level][ori]; + s += ABS(v); + } + } + } + } +#endif + for (i = 0; i < h; i++) { + for (j = 0; j < w; j+=4) { + s+= ABS(tmp[16*i+j+0]); + s+= ABS(tmp[16*i+j+1]); + s+= ABS(tmp[16*i+j+2]); + s+= ABS(tmp[16*i+j+3]); + } + } + assert(s>=0); + + return s>>2; +} + +static int w53_8_c(void *v, uint8_t * pix1, uint8_t * pix2, int line_size, int h){ + return w_c(v, pix1, pix2, line_size, 8, h, 1); +} + +static int w97_8_c(void *v, uint8_t * pix1, uint8_t * pix2, int line_size, int h){ + return w_c(v, pix1, pix2, line_size, 8, h, 0); +} + +static int w53_16_c(void *v, uint8_t * pix1, uint8_t * pix2, int line_size, int h){ + return w_c(v, pix1, pix2, line_size, 16, h, 1); +} + +static int w97_16_c(void *v, uint8_t * pix1, uint8_t * pix2, int line_size, int h){ + return w_c(v, pix1, pix2, line_size, 16, h, 0); +} + static void get_pixels_c(DCTELEM *restrict block, const uint8_t *pixels, int line_size) { int i; @@ -332,6 +449,40 @@ static void put_pixels_clamped_c(const DCTELEM *block, uint8_t *restrict pixels, } } +static void put_pixels_clamped4_c(const DCTELEM *block, uint8_t *restrict pixels, + int line_size) +{ + int i; + uint8_t *cm = cropTbl + MAX_NEG_CROP; + + /* read the pixels */ + for(i=0;i<4;i++) { + pixels[0] = cm[block[0]]; + pixels[1] = cm[block[1]]; + pixels[2] = cm[block[2]]; + pixels[3] = cm[block[3]]; + + pixels += line_size; + block += 8; + } +} + +static void put_pixels_clamped2_c(const DCTELEM *block, uint8_t *restrict pixels, + int line_size) +{ + int i; + uint8_t *cm = cropTbl + MAX_NEG_CROP; + + /* read the pixels */ + for(i=0;i<2;i++) { + pixels[0] = cm[block[0]]; + pixels[1] = cm[block[1]]; + + pixels += line_size; + block += 8; + } +} + static void put_signed_pixels_clamped_c(const DCTELEM *block, uint8_t *restrict pixels, int line_size) @@ -373,6 +524,38 @@ static void add_pixels_clamped_c(const DCTELEM *block, uint8_t *restrict pixels, block += 8; } } + +static void add_pixels_clamped4_c(const DCTELEM *block, uint8_t *restrict pixels, + int line_size) +{ + int i; + uint8_t *cm = cropTbl + MAX_NEG_CROP; + + /* read the pixels */ + for(i=0;i<4;i++) { + pixels[0] = cm[pixels[0] + block[0]]; + pixels[1] = cm[pixels[1] + block[1]]; + pixels[2] = cm[pixels[2] + block[2]]; + pixels[3] = cm[pixels[3] + block[3]]; + pixels += line_size; + block += 8; + } +} + +static void add_pixels_clamped2_c(const DCTELEM *block, uint8_t *restrict pixels, + int line_size) +{ + int i; + uint8_t *cm = cropTbl + MAX_NEG_CROP; + + /* read the pixels */ + for(i=0;i<2;i++) { + pixels[0] = cm[pixels[0] + block[0]]; + pixels[1] = cm[pixels[1] + block[1]]; + pixels += line_size; + block += 8; + } +} #if 0 #define PIXOP2(OPNAME, OP) \ @@ -2031,7 +2214,6 @@ static void OPNAME ## h264_qpel16_hv_lowpass(uint8_t *dst, int16_t *tmp, uint8_t OPNAME ## h264_qpel8_hv_lowpass(dst , tmp , src , dstStride, tmpStride, srcStride);\ OPNAME ## h264_qpel8_hv_lowpass(dst+8, tmp+8, src+8, dstStride, tmpStride, srcStride);\ src += 8*srcStride;\ - tmp += 8*tmpStride;\ dst += 8*dstStride;\ OPNAME ## h264_qpel8_hv_lowpass(dst , tmp , src , dstStride, tmpStride, srcStride);\ OPNAME ## h264_qpel8_hv_lowpass(dst+8, tmp+8, src+8, dstStride, tmpStride, srcStride);\ @@ -2195,6 +2377,77 @@ H264_MC(avg_, 16) #undef op2_put #endif +#define op_scale1(x) block[x] = clip_uint8( (block[x]*weight + offset) >> log2_denom ) +#define op_scale2(x) dst[x] = clip_uint8( (src[x]*weights + dst[x]*weightd + offset) >> (log2_denom+1)) +#define H264_WEIGHT(W,H) \ +static void weight_h264_pixels ## W ## x ## H ## _c(uint8_t *block, int stride, int log2_denom, int weight, int offset){ \ + int attribute_unused x, y; \ + offset <<= log2_denom; \ + if(log2_denom) offset += 1<<(log2_denom-1); \ + for(y=0; y<H; y++, block += stride){ \ + op_scale1(0); \ + op_scale1(1); \ + if(W==2) continue; \ + op_scale1(2); \ + op_scale1(3); \ + if(W==4) continue; \ + op_scale1(4); \ + op_scale1(5); \ + op_scale1(6); \ + op_scale1(7); \ + if(W==8) continue; \ + op_scale1(8); \ + op_scale1(9); \ + op_scale1(10); \ + op_scale1(11); \ + op_scale1(12); \ + op_scale1(13); \ + op_scale1(14); \ + op_scale1(15); \ + } \ +} \ +static void biweight_h264_pixels ## W ## x ## H ## _c(uint8_t *dst, uint8_t *src, int stride, int log2_denom, int weightd, int weights, int offsetd, int offsets){ \ + int attribute_unused x, y; \ + int offset = (offsets + offsetd + 1) >> 1; \ + offset = ((offset << 1) + 1) << log2_denom; \ + for(y=0; y<H; y++, dst += stride, src += stride){ \ + op_scale2(0); \ + op_scale2(1); \ + if(W==2) continue; \ + op_scale2(2); \ + op_scale2(3); \ + if(W==4) continue; \ + op_scale2(4); \ + op_scale2(5); \ + op_scale2(6); \ + op_scale2(7); \ + if(W==8) continue; \ + op_scale2(8); \ + op_scale2(9); \ + op_scale2(10); \ + op_scale2(11); \ + op_scale2(12); \ + op_scale2(13); \ + op_scale2(14); \ + op_scale2(15); \ + } \ +} + +H264_WEIGHT(16,16) +H264_WEIGHT(16,8) +H264_WEIGHT(8,16) +H264_WEIGHT(8,8) +H264_WEIGHT(8,4) +H264_WEIGHT(4,8) +H264_WEIGHT(4,4) +H264_WEIGHT(4,2) +H264_WEIGHT(2,4) +H264_WEIGHT(2,2) + +#undef op_scale1 +#undef op_scale2 +#undef H264_WEIGHT + static void wmv2_mspel8_h_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride, int h){ uint8_t *cm = cropTbl + MAX_NEG_CROP; int i; @@ -2360,6 +2613,33 @@ static void h263_h_loop_filter_c(uint8_t *src, int stride, int qscale){ } } +static void h261_loop_filter_c(uint8_t *src, int stride){ + int x,y,xy,yz; + int temp[64]; + + for(x=0; x<8; x++){ + temp[x ] = 4*src[x ]; + temp[x + 7*8] = 4*src[x + 7*stride]; + } + for(y=1; y<7; y++){ + for(x=0; x<8; x++){ + xy = y * stride + x; + yz = y * 8 + x; + temp[yz] = src[xy - stride] + 2*src[xy] + src[xy + stride]; + } + } + + for(y=0; y<8; y++){ + src[ y*stride] = (temp[ y*8] + 2)>>2; + src[7+y*stride] = (temp[7+y*8] + 2)>>2; + for(x=1; x<7; x++){ + xy = y * stride + x; + yz = y * 8 + x; + src[xy] = (temp[yz-1] + 2*temp[yz] + temp[yz+1] + 8)>>4; + } + } +} + static inline int pix_abs16_c(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h) { int s, i; @@ -2560,6 +2840,56 @@ static int pix_abs8_xy2_c(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, return s; } +static int nsse16_c(MpegEncContext *c, uint8_t *s1, uint8_t *s2, int stride, int h){ + int score1=0; + int score2=0; + int x,y; + + for(y=0; y<h; y++){ + for(x=0; x<16; x++){ + score1+= (s1[x ] - s2[x ])*(s1[x ] - s2[x ]); + } + if(y+1<h){ + for(x=0; x<15; x++){ + score2+= ABS( s1[x ] - s1[x +stride] + - s1[x+1] + s1[x+1+stride]) + -ABS( s2[x ] - s2[x +stride] + - s2[x+1] + s2[x+1+stride]); + } + } + s1+= stride; + s2+= stride; + } + + if(c) return score1 + ABS(score2)*c->avctx->nsse_weight; + else return score1 + ABS(score2)*8; +} + +static int nsse8_c(MpegEncContext *c, uint8_t *s1, uint8_t *s2, int stride, int h){ + int score1=0; + int score2=0; + int x,y; + + for(y=0; y<h; y++){ + for(x=0; x<8; x++){ + score1+= (s1[x ] - s2[x ])*(s1[x ] - s2[x ]); + } + if(y+1<h){ + for(x=0; x<7; x++){ + score2+= ABS( s1[x ] - s1[x +stride] + - s1[x+1] + s1[x+1+stride]) + -ABS( s2[x ] - s2[x +stride] + - s2[x+1] + s2[x+1+stride]); + } + } + s1+= stride; + s2+= stride; + } + + if(c) return score1 + ABS(score2)*c->avctx->nsse_weight; + else return score1 + ABS(score2)*8; +} + static int try_8x8basis_c(int16_t rem[64], int16_t weight[64], int16_t basis[64], int scale){ int i; unsigned int sum=0; @@ -2635,6 +2965,9 @@ void ff_set_cmp(DSPContext* c, me_cmp_func *cmp, int type){ case FF_CMP_DCT: cmp[i]= c->dct_sad[i]; break; + case FF_CMP_DCTMAX: + cmp[i]= c->dct_max[i]; + break; case FF_CMP_PSNR: cmp[i]= c->quant_psnr[i]; break; @@ -2653,6 +2986,15 @@ void ff_set_cmp(DSPContext* c, me_cmp_func *cmp, int type){ case FF_CMP_ZERO: cmp[i]= zero_cmp; break; + case FF_CMP_NSSE: + cmp[i]= c->nsse[i]; + break; + case FF_CMP_W53: + cmp[i]= c->w53[i]; + break; + case FF_CMP_W97: + cmp[i]= c->w97[i]; + break; default: av_log(NULL, AV_LOG_ERROR,"internal error in cmp function selection\n"); } @@ -2849,6 +3191,23 @@ static int dct_sad8x8_c(/*MpegEncContext*/ void *c, uint8_t *src1, uint8_t *src2 return sum; } +static int dct_max8x8_c(/*MpegEncContext*/ void *c, uint8_t *src1, uint8_t *src2, int stride, int h){ + MpegEncContext * const s= (MpegEncContext *)c; + uint64_t __align8 aligned_temp[sizeof(DCTELEM)*64/8]; + DCTELEM * const temp= (DCTELEM*)aligned_temp; + int sum=0, i; + + assert(h==8); + + s->dsp.diff_pixels(temp, src1, src2, stride); + s->dsp.fdct(temp); + + for(i=0; i<64; i++) + sum= FFMAX(sum, ABS(temp[i])); + + return sum; +} + void simple_idct(DCTELEM *block); //FIXME static int quant_psnr8x8_c(/*MpegEncContext*/ void *c, uint8_t *src1, uint8_t *src2, int stride, int h){ @@ -3078,6 +3437,7 @@ static int vsse16_c(/*MpegEncContext*/ void *c, uint8_t *s1, uint8_t *s2, int st WARPER8_16_SQ(hadamard8_diff8x8_c, hadamard8_diff16_c) WARPER8_16_SQ(hadamard8_intra8x8_c, hadamard8_intra16_c) WARPER8_16_SQ(dct_sad8x8_c, dct_sad16_c) +WARPER8_16_SQ(dct_max8x8_c, dct_max16_c) WARPER8_16_SQ(quant_psnr8x8_c, quant_psnr16_c) WARPER8_16_SQ(rd8x8_c, rd16_c) WARPER8_16_SQ(bit8x8_c, bit16_c) @@ -3095,6 +3455,41 @@ static void ff_jref_idct_add(uint8_t *dest, int line_size, DCTELEM *block) add_pixels_clamped_c(block, dest, line_size); } +static void ff_jref_idct4_put(uint8_t *dest, int line_size, DCTELEM *block) +{ + j_rev_dct4 (block); + put_pixels_clamped4_c(block, dest, line_size); +} +static void ff_jref_idct4_add(uint8_t *dest, int line_size, DCTELEM *block) +{ + j_rev_dct4 (block); + add_pixels_clamped4_c(block, dest, line_size); +} + +static void ff_jref_idct2_put(uint8_t *dest, int line_size, DCTELEM *block) +{ + j_rev_dct2 (block); + put_pixels_clamped2_c(block, dest, line_size); +} +static void ff_jref_idct2_add(uint8_t *dest, int line_size, DCTELEM *block) +{ + j_rev_dct2 (block); + add_pixels_clamped2_c(block, dest, line_size); +} + +static void ff_jref_idct1_put(uint8_t *dest, int line_size, DCTELEM *block) +{ + uint8_t *cm = cropTbl + MAX_NEG_CROP; + + dest[0] = cm[(block[0] + 4)>>3]; +} +static void ff_jref_idct1_add(uint8_t *dest, int line_size, DCTELEM *block) +{ + uint8_t *cm = cropTbl + MAX_NEG_CROP; + + dest[0] = cm[dest[0] + ((block[0] + 4)>>3)]; +} + /* init static data */ void dsputil_static_init(void) { @@ -3133,18 +3528,42 @@ void dsputil_init(DSPContext* c, AVCodecContext *avctx) } #endif //CONFIG_ENCODERS - if(avctx->idct_algo==FF_IDCT_INT){ - c->idct_put= ff_jref_idct_put; - c->idct_add= ff_jref_idct_add; - c->idct = j_rev_dct; - c->idct_permutation_type= FF_LIBMPEG2_IDCT_PERM; - }else{ //accurate/default - c->idct_put= simple_idct_put; - c->idct_add= simple_idct_add; - c->idct = simple_idct; + if(avctx->lowres==1){ + if(avctx->idct_algo==FF_IDCT_INT || avctx->idct_algo==FF_IDCT_AUTO){ + c->idct_put= ff_jref_idct4_put; + c->idct_add= ff_jref_idct4_add; + }else{ + c->idct_put= ff_h264_lowres_idct_put_c; + c->idct_add= ff_h264_lowres_idct_add_c; + } + c->idct = j_rev_dct4; + c->idct_permutation_type= FF_NO_IDCT_PERM; + }else if(avctx->lowres==2){ + c->idct_put= ff_jref_idct2_put; + c->idct_add= ff_jref_idct2_add; + c->idct = j_rev_dct2; + c->idct_permutation_type= FF_NO_IDCT_PERM; + }else if(avctx->lowres==3){ + c->idct_put= ff_jref_idct1_put; + c->idct_add= ff_jref_idct1_add; + c->idct = j_rev_dct1; c->idct_permutation_type= FF_NO_IDCT_PERM; + }else{ + if(avctx->idct_algo==FF_IDCT_INT){ + c->idct_put= ff_jref_idct_put; + c->idct_add= ff_jref_idct_add; + c->idct = j_rev_dct; + c->idct_permutation_type= FF_LIBMPEG2_IDCT_PERM; + }else{ //accurate/default + c->idct_put= simple_idct_put; + c->idct_add= simple_idct_add; + c->idct = simple_idct; + c->idct_permutation_type= FF_NO_IDCT_PERM; + } } + c->h264_idct_add= ff_h264_idct_add_c; + /* VP3 DSP support */ c->vp3_dsp_init = vp3_dsp_init_c; c->vp3_idct = vp3_idct_c; @@ -3259,6 +3678,27 @@ void dsputil_init(DSPContext* c, AVCodecContext *avctx) c->avg_h264_chroma_pixels_tab[1]= avg_h264_chroma_mc4_c; c->avg_h264_chroma_pixels_tab[2]= avg_h264_chroma_mc2_c; + c->weight_h264_pixels_tab[0]= weight_h264_pixels16x16_c; + c->weight_h264_pixels_tab[1]= weight_h264_pixels16x8_c; + c->weight_h264_pixels_tab[2]= weight_h264_pixels8x16_c; + c->weight_h264_pixels_tab[3]= weight_h264_pixels8x8_c; + c->weight_h264_pixels_tab[4]= weight_h264_pixels8x4_c; + c->weight_h264_pixels_tab[5]= weight_h264_pixels4x8_c; + c->weight_h264_pixels_tab[6]= weight_h264_pixels4x4_c; + c->weight_h264_pixels_tab[7]= weight_h264_pixels4x2_c; + c->weight_h264_pixels_tab[8]= weight_h264_pixels2x4_c; + c->weight_h264_pixels_tab[9]= weight_h264_pixels2x2_c; + c->biweight_h264_pixels_tab[0]= biweight_h264_pixels16x16_c; + c->biweight_h264_pixels_tab[1]= biweight_h264_pixels16x8_c; + c->biweight_h264_pixels_tab[2]= biweight_h264_pixels8x16_c; + c->biweight_h264_pixels_tab[3]= biweight_h264_pixels8x8_c; + c->biweight_h264_pixels_tab[4]= biweight_h264_pixels8x4_c; + c->biweight_h264_pixels_tab[5]= biweight_h264_pixels4x8_c; + c->biweight_h264_pixels_tab[6]= biweight_h264_pixels4x4_c; + c->biweight_h264_pixels_tab[7]= biweight_h264_pixels4x2_c; + c->biweight_h264_pixels_tab[8]= biweight_h264_pixels2x4_c; + c->biweight_h264_pixels_tab[9]= biweight_h264_pixels2x2_c; + c->put_mspel_pixels_tab[0]= put_mspel8_mc00_c; c->put_mspel_pixels_tab[1]= put_mspel8_mc10_c; c->put_mspel_pixels_tab[2]= put_mspel8_mc20_c; @@ -3275,10 +3715,12 @@ void dsputil_init(DSPContext* c, AVCodecContext *avctx) SET_CMP_FUNC(hadamard8_diff) c->hadamard8_diff[4]= hadamard8_intra16_c; SET_CMP_FUNC(dct_sad) + SET_CMP_FUNC(dct_max) c->sad[0]= pix_abs16_c; c->sad[1]= pix_abs8_c; c->sse[0]= sse16_c; c->sse[1]= sse8_c; + c->sse[2]= sse4_c; SET_CMP_FUNC(quant_psnr) SET_CMP_FUNC(rd) SET_CMP_FUNC(bit) @@ -3286,7 +3728,13 @@ void dsputil_init(DSPContext* c, AVCodecContext *avctx) c->vsad[4]= vsad_intra16_c; c->vsse[0]= vsse16_c; c->vsse[4]= vsse_intra16_c; - + c->nsse[0]= nsse16_c; + c->nsse[1]= nsse8_c; + c->w53[0]= w53_16_c; + c->w53[1]= w53_8_c; + c->w97[0]= w97_16_c; + c->w97[1]= w97_8_c; + c->add_bytes= add_bytes_c; c->diff_bytes= diff_bytes_c; c->sub_hfyu_median_prediction= sub_hfyu_median_prediction_c; @@ -3295,6 +3743,8 @@ void dsputil_init(DSPContext* c, AVCodecContext *avctx) c->h263_h_loop_filter= h263_h_loop_filter_c; c->h263_v_loop_filter= h263_v_loop_filter_c; + c->h261_loop_filter= h261_loop_filter_c; + c->try_8x8basis= try_8x8basis_c; c->add_8x8basis= add_8x8basis_c; diff --git a/src/libffmpeg/libavcodec/dsputil.h b/src/libffmpeg/libavcodec/dsputil.h index adb4679e0..c728a24d6 100644 --- a/src/libffmpeg/libavcodec/dsputil.h +++ b/src/libffmpeg/libavcodec/dsputil.h @@ -45,11 +45,18 @@ void ff_jpeg_fdct_islow (DCTELEM *data); void ff_fdct248_islow (DCTELEM *data); void j_rev_dct (DCTELEM *data); +void j_rev_dct4 (DCTELEM *data); +void j_rev_dct2 (DCTELEM *data); +void j_rev_dct1 (DCTELEM *data); void ff_fdct_mmx(DCTELEM *block); void ff_fdct_mmx2(DCTELEM *block); void ff_fdct_sse2(DCTELEM *block); +void ff_h264_idct_add_c(uint8_t *dst, DCTELEM *block, int stride); +void ff_h264_lowres_idct_add_c(uint8_t *dst, int stride, DCTELEM *block); +void ff_h264_lowres_idct_put_c(uint8_t *dst, int stride, DCTELEM *block); + /* encoding scans */ extern const uint8_t ff_alternate_horizontal_scan[64]; extern const uint8_t ff_alternate_vertical_scan[64]; @@ -57,7 +64,7 @@ extern const uint8_t ff_zigzag_direct[64]; extern const uint8_t ff_zigzag248_direct[64]; /* pixel operations */ -#define MAX_NEG_CROP 384 +#define MAX_NEG_CROP 1024 /* temporary */ extern uint32_t squareTbl[512]; @@ -101,6 +108,8 @@ typedef void (*op_pixels_func)(uint8_t *block/*align width (8 or 16)*/, const ui typedef void (*tpel_mc_func)(uint8_t *block/*align width (8 or 16)*/, const uint8_t *pixels/*align 1*/, int line_size, int w, int h); typedef void (*qpel_mc_func)(uint8_t *dst/*align width (8 or 16)*/, uint8_t *src/*align 1*/, int stride); typedef void (*h264_chroma_mc_func)(uint8_t *dst/*align 8*/, uint8_t *src/*align 1*/, int srcStride, int h, int x, int y); +typedef void (*h264_weight_func)(uint8_t *block, int stride, int log2_denom, int weight, int offset); +typedef void (*h264_biweight_func)(uint8_t *dst, uint8_t *src, int stride, int log2_denom, int weightd, int weights, int offsetd, int offsets); #define DEF_OLD_QPEL(name)\ void ff_put_ ## name (uint8_t *dst/*align width (8 or 16)*/, uint8_t *src/*align 1*/, int stride);\ @@ -165,12 +174,17 @@ typedef struct DSPContext { me_cmp_func rd[5]; me_cmp_func vsad[5]; me_cmp_func vsse[5]; + me_cmp_func nsse[5]; + me_cmp_func w53[5]; + me_cmp_func w97[5]; + me_cmp_func dct_max[5]; me_cmp_func me_pre_cmp[5]; me_cmp_func me_cmp[5]; me_cmp_func me_sub_cmp[5]; me_cmp_func mb_cmp[5]; me_cmp_func ildct_cmp[5]; //only width 16 used + me_cmp_func frame_skip_cmp[5]; //only width 8 used /** * Halfpel motion compensation with rounding (a+b+1)>>1. @@ -206,7 +220,7 @@ typedef struct DSPContext { * @param line_size number of bytes in a horizontal line of block * @param h height */ - op_pixels_func put_no_rnd_pixels_tab[2][4]; + op_pixels_func put_no_rnd_pixels_tab[4][4]; /** * Halfpel motion compensation with no rounding (a+b)>>1. @@ -218,7 +232,7 @@ typedef struct DSPContext { * @param line_size number of bytes in a horizontal line of block * @param h height */ - op_pixels_func avg_no_rnd_pixels_tab[2][4]; + op_pixels_func avg_no_rnd_pixels_tab[4][4]; void (*put_no_rnd_pixels_l2[2])(uint8_t *block/*align width (8 or 16)*/, const uint8_t *a/*align 1*/, const uint8_t *b/*align 1*/, int line_size, int h); @@ -249,6 +263,9 @@ typedef struct DSPContext { qpel_mc_func put_h264_qpel_pixels_tab[3][16]; qpel_mc_func avg_h264_qpel_pixels_tab[3][16]; + h264_weight_func weight_h264_pixels_tab[10]; + h264_biweight_func biweight_h264_pixels_tab[10]; + me_cmp_func pix_abs[2][4]; /* huffyuv specific */ @@ -264,6 +281,8 @@ typedef struct DSPContext { void (*h263_v_loop_filter)(uint8_t *src, int stride, int qscale); void (*h263_h_loop_filter)(uint8_t *src, int stride, int qscale); + void (*h261_loop_filter)(uint8_t *src, int stride); + /* (I)DCT */ void (*fdct)(DCTELEM *block/* align 16*/); void (*fdct248)(DCTELEM *block/* align 16*/); @@ -325,7 +344,8 @@ typedef struct DSPContext { */ void (*vp3_idct)(int16_t *input_data, int16_t *dequant_matrix, int coeff_count, DCTELEM *output_samples); - + + void (*h264_idct_add)(uint8_t *dst, DCTELEM *block, int stride); } DSPContext; void dsputil_static_init(void); @@ -351,6 +371,29 @@ static inline uint32_t no_rnd_avg32(uint32_t a, uint32_t b) return (a & b) + (((a ^ b) & ~BYTE_VEC32(0x01)) >> 1); } +static inline int get_penalty_factor(int lambda, int lambda2, int type){ + switch(type&0xFF){ + default: + case FF_CMP_SAD: + return lambda>>FF_LAMBDA_SHIFT; + case FF_CMP_DCT: + return (3*lambda)>>(FF_LAMBDA_SHIFT+1); + case FF_CMP_W53: + return (4*lambda)>>(FF_LAMBDA_SHIFT); + case FF_CMP_W97: + return (2*lambda)>>(FF_LAMBDA_SHIFT); + case FF_CMP_SATD: + return (2*lambda)>>FF_LAMBDA_SHIFT; + case FF_CMP_RD: + case FF_CMP_PSNR: + case FF_CMP_SSE: + case FF_CMP_NSSE: + return lambda2>>FF_LAMBDA_SHIFT; + case FF_CMP_BIT: + return 1; + } +} + /** * Empty mmx state. * this must be called between any dsp function and float/double code. @@ -373,6 +416,7 @@ int mm_support(void); #define MM_MMXEXT 0x0002 /* SSE integer functions or AMD MMX ext */ #define MM_SSE 0x0008 /* SSE functions */ #define MM_SSE2 0x0010 /* PIV SSE2 functions */ +#define MM_3DNOWEXT 0x0020 /* AMD 3DNowExt */ extern int mm_flags; @@ -393,6 +437,7 @@ static inline void emms(void) } #define __align8 __attribute__ ((aligned (8))) +#define STRIDE_ALIGN 8 void dsputil_init_mmx(DSPContext* c, AVCodecContext *avctx); void dsputil_init_pix_mmx(DSPContext* c, AVCodecContext *avctx); @@ -402,6 +447,7 @@ void dsputil_init_pix_mmx(DSPContext* c, AVCodecContext *avctx); /* This is to use 4 bytes read to the IDCT pointers for some 'zero' line optimizations */ #define __align8 __attribute__ ((aligned (4))) +#define STRIDE_ALIGN 4 void dsputil_init_armv4l(DSPContext* c, AVCodecContext *avctx); @@ -409,6 +455,7 @@ void dsputil_init_armv4l(DSPContext* c, AVCodecContext *avctx); /* SPARC/VIS IDCT needs 8-byte aligned DCT blocks */ #define __align8 __attribute__ ((aligned (8))) +#define STRIDE_ALIGN 8 void dsputil_init_mlib(DSPContext* c, AVCodecContext *avctx); @@ -416,11 +463,13 @@ void dsputil_init_mlib(DSPContext* c, AVCodecContext *avctx); /* SPARC/VIS IDCT needs 8-byte aligned DCT blocks */ #define __align8 __attribute__ ((aligned (8))) +#define STRIDE_ALIGN 8 void dsputil_init_vis(DSPContext* c, AVCodecContext *avctx); #elif defined(ARCH_ALPHA) #define __align8 __attribute__ ((aligned (8))) +#define STRIDE_ALIGN 8 void dsputil_init_alpha(DSPContext* c, AVCodecContext *avctx); @@ -437,24 +486,28 @@ extern int mm_flags; #endif #define __align8 __attribute__ ((aligned (16))) +#define STRIDE_ALIGN 16 void dsputil_init_ppc(DSPContext* c, AVCodecContext *avctx); #elif defined(HAVE_MMI) #define __align8 __attribute__ ((aligned (16))) +#define STRIDE_ALIGN 16 void dsputil_init_mmi(DSPContext* c, AVCodecContext *avctx); #elif defined(ARCH_SH4) #define __align8 __attribute__ ((aligned (8))) +#define STRIDE_ALIGN 8 void dsputil_init_sh4(DSPContext* c, AVCodecContext *avctx); #else -#define __align8 +#define __align8 __attribute__ ((aligned (8))) +#define STRIDE_ALIGN 8 #endif @@ -558,11 +611,20 @@ static int name16(void /*MpegEncContext*/ *s, uint8_t *dst, uint8_t *src, int st /* XXX: add ISOC specific test to avoid specific BSD testing. */ /* better than nothing implementation. */ /* btw, rintf() is existing on fbsd too -- alex */ -static inline long int lrintf(float x) +static always_inline long int lrintf(float x) { #ifdef CONFIG_WIN32 +# ifdef ARCH_X86 + int32_t i; + asm volatile( + "fistpl %0\n\t" + : "=m" (i) : "t" (x) : "st" + ); + return i; +# else /* XXX: incorrect, but make it compile */ - return (int)(x); + return (int)(x + (x < 0 ? -0.5 : 0.5)); +# endif #else return (int)(rint(x)); #endif diff --git a/src/libffmpeg/libavcodec/dv.c b/src/libffmpeg/libavcodec/dv.c index 8e359e361..94440ed5c 100644 --- a/src/libffmpeg/libavcodec/dv.c +++ b/src/libffmpeg/libavcodec/dv.c @@ -144,7 +144,7 @@ static int dvvideo_init(AVCodecContext *avctx) /* NOTE: as a trick, we use the fact the no codes are unused to accelerate the parsing of partial codes */ init_vlc(&dv_vlc, TEX_VLC_BITS, j, - new_dv_vlc_len, 1, 1, new_dv_vlc_bits, 2, 2); + new_dv_vlc_len, 1, 1, new_dv_vlc_bits, 2, 2, 0); dv_rl_vlc = av_malloc(dv_vlc.table_size * sizeof(RL_VLC_ELEM)); if (!dv_rl_vlc) { @@ -263,6 +263,7 @@ static const int mb_area_start[5] = { 1, 6, 21, 43, 64 }; #ifndef ALT_BITSTREAM_READER #warning only works with ALT_BITSTREAM_READER +static int re_index; //Hack to make it compile #endif static inline int get_bits_left(GetBitContext *s) @@ -394,8 +395,7 @@ static inline void dv_decode_video_segment(DVVideoContext *s, init_get_bits(&gb, buf_ptr, last_index); /* get the dc */ - dc = get_bits(&gb, 9); - dc = (dc << (32 - 9)) >> (32 - 9); + dc = get_sbits(&gb, 9); dct_mode = get_bits1(&gb); mb->dct_mode = dct_mode; mb->scan_table = s->dv_zigzag[dct_mode]; @@ -889,10 +889,6 @@ static int dvvideo_decode_frame(AVCodecContext *avctx, { DVVideoContext *s = avctx->priv_data; - /* special case for last picture */ - if(buf_size==0) - return 0; - s->sys = dv_frame_profile(buf); if (!s->sys || buf_size < s->sys->frame_size) return -1; /* NOTE: we only accept several full frames */ @@ -932,7 +928,9 @@ static int dvvideo_encode_frame(AVCodecContext *c, uint8_t *buf, int buf_size, s->sys = dv_codec_profile(c); if (!s->sys) return -1; - + if(buf_size < s->sys->frame_size) + return -1; + c->pix_fmt = s->sys->pix_fmt; s->picture = *((AVFrame *)data); diff --git a/src/libffmpeg/libavcodec/dvdata.h b/src/libffmpeg/libavcodec/dvdata.h index e60d99448..acda751d6 100644 --- a/src/libffmpeg/libavcodec/dvdata.h +++ b/src/libffmpeg/libavcodec/dvdata.h @@ -1299,7 +1299,7 @@ static const DVprofile dv_profiles[] = { .frame_rate_base = 1001, .height = 480, .width = 720, - .sar = {{72, 79}, {96, 79}}, + .sar = {{10, 11}, {40, 33}}, .video_place = dv_place_411, .pix_fmt = PIX_FMT_YUV411P, .audio_stride = 90, @@ -1315,7 +1315,7 @@ static const DVprofile dv_profiles[] = { .ltc_divisor = 25, .height = 576, .width = 720, - .sar = {{128, 117}, {512, 351}}, + .sar = {{59, 54}, {118, 81}}, .video_place = dv_place_420, .pix_fmt = PIX_FMT_YUV420P, .audio_stride = 108, @@ -1331,7 +1331,7 @@ static const DVprofile dv_profiles[] = { .ltc_divisor = 25, .height = 576, .width = 720, - .sar = {{128, 117}, {512, 351}}, + .sar = {{59, 54}, {118, 81}}, .video_place = dv_place_411P, .pix_fmt = PIX_FMT_YUV411P, .audio_stride = 108, diff --git a/src/libffmpeg/libavcodec/error_resilience.c b/src/libffmpeg/libavcodec/error_resilience.c index b7aeebddf..b0d22ddf9 100644 --- a/src/libffmpeg/libavcodec/error_resilience.c +++ b/src/libffmpeg/libavcodec/error_resilience.c @@ -652,7 +652,7 @@ void ff_er_add_slice(MpegEncContext *s, int startx, int starty, int endx, int en s->error_status_table[start_xy] |= VP_START; - if(start_xy > 0 && s->avctx->thread_count <= 1){ + if(start_xy > 0 && s->avctx->thread_count <= 1 && s->avctx->skip_top*s->mb_width < start_i){ int prev_status= s->error_status_table[ s->mb_index2xy[start_i - 1] ]; prev_status &= ~ VP_START; @@ -661,31 +661,34 @@ void ff_er_add_slice(MpegEncContext *s, int startx, int starty, int endx, int en } void ff_er_frame_end(MpegEncContext *s){ - int i, mb_x, mb_y, error, error_type; + int i, mb_x, mb_y, error, error_type, dc_error, mv_error, ac_error; int distance; int threshold_part[4]= {100,100,100}; int threshold= 50; int is_intra_likely; + int size = s->b8_stride * 2 * s->mb_height; + Picture *pic= s->current_picture_ptr; - if(!s->error_resilience || s->error_count==0) return; + if(!s->error_resilience || s->error_count==0 || + s->error_count==3*s->mb_width*(s->avctx->skip_top + s->avctx->skip_bottom)) return; - av_log(s->avctx, AV_LOG_INFO, "concealing errors\n"); - if(s->current_picture.motion_val[0] == NULL){ - int size = s->b8_stride * 2 * s->mb_height; - Picture *pic= s->current_picture_ptr; - av_log(s->avctx, AV_LOG_ERROR, "Warning MVs not available\n"); for(i=0; i<2; i++){ pic->ref_index[i]= av_mallocz(size * sizeof(uint8_t)); - pic->motion_val_base[i]= av_mallocz((size+2) * 2 * sizeof(uint16_t)); - pic->motion_val[i]= pic->motion_val_base[i]+2; + pic->motion_val_base[i]= av_mallocz((size+4) * 2 * sizeof(uint16_t)); + pic->motion_val[i]= pic->motion_val_base[i]+4; } pic->motion_subsample_log2= 3; s->current_picture= *s->current_picture_ptr; } + for(i=0; i<2; i++){ + if(pic->ref_index[i]) + memset(pic->ref_index[i], 0, size * sizeof(uint8_t)); + } + if(s->avctx->debug&FF_DEBUG_ER){ for(mb_y=0; mb_y<s->mb_height; mb_y++){ for(mb_x=0; mb_x<s->mb_width; mb_x++){ @@ -816,6 +819,17 @@ void ff_er_frame_end(MpegEncContext *s){ } } #endif + + dc_error= ac_error= mv_error=0; + for(i=0; i<s->mb_num; i++){ + const int mb_xy= s->mb_index2xy[i]; + error= s->error_status_table[mb_xy]; + if(error&DC_ERROR) dc_error ++; + if(error&AC_ERROR) ac_error ++; + if(error&MV_ERROR) mv_error ++; + } + av_log(s->avctx, AV_LOG_INFO, "concealing %d DC, %d AC, %d MV errors\n", dc_error, ac_error, mv_error); + is_intra_likely= is_intra_more_likely(s); /* set unknown mb-type to most likely */ diff --git a/src/libffmpeg/libavcodec/eval.c b/src/libffmpeg/libavcodec/eval.c index aead600e8..330781581 100644 --- a/src/libffmpeg/libavcodec/eval.c +++ b/src/libffmpeg/libavcodec/eval.c @@ -42,10 +42,7 @@ #define M_PI 3.14159265358979323846 #endif -#define STACK_SIZE 100 - typedef struct Parser{ - double stack[STACK_SIZE]; int stack_index; char *s; double *const_value; @@ -57,25 +54,7 @@ typedef struct Parser{ void *opaque; } Parser; -static void evalExpression(Parser *p); - -static void push(Parser *p, double d){ - if(p->stack_index+1>= STACK_SIZE){ - av_log(NULL, AV_LOG_ERROR, "stack overflow in the parser\n"); - return; - } - p->stack[ p->stack_index++ ]= d; -//printf("push %f\n", d); fflush(stdout); -} - -static double pop(Parser *p){ - if(p->stack_index<=0){ - av_log(NULL, AV_LOG_ERROR, "stack underflow in the parser\n"); - return NAN; - } -//printf("pop\n"); fflush(stdout); - return p->stack[ --p->stack_index ]; -} +static double evalExpression(Parser *p); static int strmatch(const char *s, const char *prefix){ int i; @@ -85,7 +64,7 @@ static int strmatch(const char *s, const char *prefix){ return 1; } -static void evalPrimary(Parser *p){ +static double evalPrimary(Parser *p){ double d, d2=NAN; char *next= p->s; int i; @@ -93,36 +72,32 @@ static void evalPrimary(Parser *p){ /* number */ d= strtod(p->s, &next); if(next != p->s){ - push(p, d); p->s= next; - return; + return d; } /* named constants */ - for(i=0; p->const_name[i]; i++){ + for(i=0; p->const_name && p->const_name[i]; i++){ if(strmatch(p->s, p->const_name[i])){ - push(p, p->const_value[i]); p->s+= strlen(p->const_name[i]); - return; + return p->const_value[i]; } } p->s= strchr(p->s, '('); if(p->s==NULL){ av_log(NULL, AV_LOG_ERROR, "Parser: missing ( in \"%s\"\n", next); - return; + return NAN; } p->s++; // "(" - evalExpression(p); - d= pop(p); + d= evalExpression(p); if(p->s[0]== ','){ p->s++; // "," - evalExpression(p); - d2= pop(p); + d2= evalExpression(p); } if(p->s[0] != ')'){ av_log(NULL, AV_LOG_ERROR, "Parser: missing ) in \"%s\"\n", next); - return; + return NAN; } p->s++; // ")" @@ -144,96 +119,67 @@ static void evalPrimary(Parser *p){ else if( strmatch(next, "lt" ) ) d= d > d2 ? 0.0 : 1.0; else if( strmatch(next, "lte" ) ) d= d >= d2 ? 0.0 : 1.0; else if( strmatch(next, "eq" ) ) d= d == d2 ? 1.0 : 0.0; + else if( strmatch(next, "(" ) ) d= d; // else if( strmatch(next, "l1" ) ) d= 1 + d2*(d - 1); // else if( strmatch(next, "sq01" ) ) d= (d >= 0.0 && d <=1.0) ? 1.0 : 0.0; else{ - int error=1; for(i=0; p->func1_name && p->func1_name[i]; i++){ if(strmatch(next, p->func1_name[i])){ - d= p->func1[i](p->opaque, d); - error=0; - break; + return p->func1[i](p->opaque, d); } } for(i=0; p->func2_name && p->func2_name[i]; i++){ if(strmatch(next, p->func2_name[i])){ - d= p->func2[i](p->opaque, d, d2); - error=0; - break; + return p->func2[i](p->opaque, d, d2); } } - if(error){ - av_log(NULL, AV_LOG_ERROR, "Parser: unknown function in \"%s\"\n", next); - return; - } + av_log(NULL, AV_LOG_ERROR, "Parser: unknown function in \"%s\"\n", next); + return NAN; } - - push(p, d); + + return d; } - -static void evalPow(Parser *p){ - int neg= 0; - if(p->s[0]=='+') p->s++; - - if(p->s[0]=='-'){ - neg= 1; - p->s++; - } - - if(p->s[0]=='('){ - p->s++;; - evalExpression(p); - if(p->s[0]!=')') - av_log(NULL, AV_LOG_ERROR, "Parser: missing )\n"); - p->s++; - }else{ - evalPrimary(p); - } - - if(neg) push(p, -pop(p)); +static double evalPow(Parser *p){ + int sign= (*p->s == '+') - (*p->s == '-'); + p->s += sign&1; + return (sign|1) * evalPrimary(p); } -static void evalFactor(Parser *p){ - evalPow(p); +static double evalFactor(Parser *p){ + double ret= evalPow(p); while(p->s[0]=='^'){ - double d; - p->s++; - evalPow(p); - d= pop(p); - push(p, pow(pop(p), d)); + ret= pow(ret, evalPow(p)); } + return ret; } -static void evalTerm(Parser *p){ - evalFactor(p); +static double evalTerm(Parser *p){ + double ret= evalFactor(p); while(p->s[0]=='*' || p->s[0]=='/'){ - int inv= p->s[0]=='/'; - double d; - - p->s++; - evalFactor(p); - d= pop(p); - if(inv) d= 1.0/d; - push(p, d * pop(p)); + if(*p->s++ == '*') ret*= evalFactor(p); + else ret/= evalFactor(p); } + return ret; } -static void evalExpression(Parser *p){ - evalTerm(p); - while(p->s[0]=='+' || p->s[0]=='-'){ - int sign= p->s[0]=='-'; - double d; +static double evalExpression(Parser *p){ + double ret= 0; - p->s++; - evalTerm(p); - d= pop(p); - if(sign) d= -d; - push(p, d + pop(p)); - } + if(p->stack_index <= 0) //protect against stack overflows + return NAN; + p->stack_index--; + + do{ + ret += evalTerm(p); + }while(*p->s == '+' || *p->s == '-'); + + p->stack_index++; + + return ret; } double ff_eval(char *s, double *const_value, const char **const_name, @@ -242,7 +188,7 @@ double ff_eval(char *s, double *const_value, const char **const_name, void *opaque){ Parser p; - p.stack_index=0; + p.stack_index=100; p.s= s; p.const_value= const_value; p.const_name = const_name; @@ -252,6 +198,29 @@ double ff_eval(char *s, double *const_value, const char **const_name, p.func2_name = func2_name; p.opaque = opaque; - evalExpression(&p); - return pop(&p); + return evalExpression(&p); +} + +#ifdef TEST +#undef printf +static double const_values[]={ + M_PI, + M_E, + 0 +}; +static const char *const_names[]={ + "PI", + "E", + 0 +}; +main(){ + int i; + printf("%f == 12.7\n", ff_eval("1+(5-2)^(3-1)+1/2+sin(PI)-max(-2.2,-3.1)", const_values, const_names, NULL, NULL, NULL, NULL, NULL)); + + for(i=0; i<1050; i++){ + START_TIMER + ff_eval("1+(5-2)^(3-1)+1/2+sin(PI)-max(-2.2,-3.1)", const_values, const_names, NULL, NULL, NULL, NULL, NULL); + STOP_TIMER("ff_eval") + } } +#endif diff --git a/src/libffmpeg/libavcodec/ffv1.c b/src/libffmpeg/libavcodec/ffv1.c index a85baea4b..6a4c6ed3f 100644 --- a/src/libffmpeg/libavcodec/ffv1.c +++ b/src/libffmpeg/libavcodec/ffv1.c @@ -25,9 +25,10 @@ */ #include "common.h" +#include "bitstream.h" #include "avcodec.h" #include "dsputil.h" -#include "cabac.h" +#include "rangecoder.h" #include "golomb.h" #define MAX_PLANES 4 @@ -164,7 +165,7 @@ typedef struct PlaneContext{ typedef struct FFV1Context{ AVCodecContext *avctx; - CABACContext c; + RangeCoder c; GetBitContext gb; PutBitContext pb; int version; @@ -218,57 +219,52 @@ static inline int get_context(FFV1Context *f, int_fast16_t *src, int_fast16_t *l return f->quant_table[0][(L-LT) & 0xFF] + f->quant_table[1][(LT-T) & 0xFF] + f->quant_table[2][(T-RT) & 0xFF]; } -/** - * put - */ -static inline void put_symbol(CABACContext *c, uint8_t *state, int v, int is_signed, int max_exp){ +static inline void put_symbol(RangeCoder *c, uint8_t *state, int v, int is_signed){ int i; if(v){ const int a= ABS(v); const int e= av_log2(a); - - put_cabac(c, state+0, 0); + put_rac(c, state+0, 0); + assert(e<=9); + for(i=0; i<e; i++){ - put_cabac(c, state+1+i, 1); //1..8 + put_rac(c, state+1+i, 1); //1..10 } + put_rac(c, state+1+i, 0); - if(e<max_exp){ - put_cabac(c, state+1+i, 0); //1..8 - - for(i=e-1; i>=0; i--){ - put_cabac(c, state+16+e+i, (a>>i)&1); //17..29 - } - if(is_signed) - put_cabac(c, state+9 + e, v < 0); //9..16 + for(i=e-1; i>=0; i--){ + put_rac(c, state+22+i, (a>>i)&1); //22..31 } + + if(is_signed) + put_rac(c, state+11 + e, v < 0); //11..21 }else{ - put_cabac(c, state+0, 1); + put_rac(c, state+0, 1); } } -static inline int get_symbol(CABACContext *c, uint8_t *state, int is_signed, int max_exp){ - if(get_cabac(c, state+0)) +static inline int get_symbol(RangeCoder *c, uint8_t *state, int is_signed){ + if(get_rac(c, state+0)) return 0; else{ - int i, e; - - for(e=0; e<max_exp; e++){ - int a= 1<<e; - - if(get_cabac(c, state + 1 + e)==0){ // 1..8 - for(i=e-1; i>=0; i--){ - a += get_cabac(c, state+16+e+i)<<i; //17..29 - } + int i, e, a; + e= 0; + while(get_rac(c, state+1 + e)){ //1..10 + e++; + } + assert(e<=9); - if(is_signed && get_cabac(c, state+9 + e)) //9..16 - return -a; - else - return a; - } + a= 1; + for(i=e-1; i>=0; i--){ + a += a + get_rac(c, state+22 + i); //22..31 } - return -(1<<e); + + if(is_signed && get_rac(c, state+11 + e)) //11..21 + return -a; + else + return a; } } @@ -324,10 +320,8 @@ static inline void put_vlc_symbol(PutBitContext *pb, VlcState * const state, int code= v ^ ((2*state->drift + state->count)>>31); #endif - code = -2*code-1; - code^= (code>>31); //printf("v:%d/%d bias:%d error:%d drift:%d count:%d k:%d\n", v, code, state->bias, state->error_sum, state->drift, state->count, k); - set_ur_golomb(pb, code, k, 12, bits); + set_sr_golomb(pb, code, k, 12, bits); update_vlc_state(state, v); } @@ -344,13 +338,9 @@ static inline int get_vlc_symbol(GetBitContext *gb, VlcState * const state, int assert(k<=8); - v= get_ur_golomb(gb, k, 12, bits); + v= get_sr_golomb(gb, k, 12, bits); //printf("v:%d bias:%d error:%d drift:%d count:%d k:%d", v, state->bias, state->error_sum, state->drift, state->count, k); - v++; - if(v&1) v= (v>>1); - else v= -(v>>1); - #if 0 // JPEG LS if(k==0 && 2*state->drift <= - state->count) v ^= (-1); #else @@ -364,14 +354,26 @@ static inline int get_vlc_symbol(GetBitContext *gb, VlcState * const state, int return ret; } -static inline void encode_line(FFV1Context *s, int w, int_fast16_t *sample[2], int plane_index, int bits){ +static inline int encode_line(FFV1Context *s, int w, int_fast16_t *sample[2], int plane_index, int bits){ PlaneContext * const p= &s->plane[plane_index]; - CABACContext * const c= &s->c; + RangeCoder * const c= &s->c; int x; int run_index= s->run_index; int run_count=0; int run_mode=0; + if(s->ac){ + if(c->bytestream_end - c->bytestream < w*20){ + av_log(s->avctx, AV_LOG_ERROR, "encoded frame too large\n"); + return -1; + } + }else{ + if(s->pb.buf_end - s->pb.buf - (put_bits_count(&s->pb)>>3) < w*4){ + av_log(s->avctx, AV_LOG_ERROR, "encoded frame too large\n"); + return -1; + } + } + for(x=0; x<w; x++){ int diff, context; @@ -386,7 +388,7 @@ static inline void encode_line(FFV1Context *s, int w, int_fast16_t *sample[2], i diff= fold(diff, bits); if(s->ac){ - put_symbol(c, p->state[context], diff, 1, bits-1); + put_symbol(c, p->state[context], diff, 1); }else{ if(context == 0) run_mode=1; @@ -426,11 +428,13 @@ static inline void encode_line(FFV1Context *s, int w, int_fast16_t *sample[2], i put_bits(&s->pb, 1, 1); } s->run_index= run_index; + + return 0; } static void encode_plane(FFV1Context *s, uint8_t *src, int w, int h, int stride, int plane_index){ int x,y,i; - const int ring_size=2; + const int ring_size= s->avctx->context_model ? 3 : 2; int_fast16_t sample_buffer[ring_size][w+6], *sample[ring_size]; s->run_index=0; @@ -453,7 +457,7 @@ static void encode_plane(FFV1Context *s, uint8_t *src, int w, int h, int stride, static void encode_rgb_frame(FFV1Context *s, uint32_t *src, int w, int h, int stride){ int x, y, p, i; - const int ring_size=2; + const int ring_size= s->avctx->context_model ? 3 : 2; int_fast16_t sample_buffer[3][ring_size][w+6], *sample[3][ring_size]; s->run_index=0; @@ -490,32 +494,35 @@ static void encode_rgb_frame(FFV1Context *s, uint32_t *src, int w, int h, int st } } -static void write_quant_table(CABACContext *c, int16_t *quant_table){ +static void write_quant_table(RangeCoder *c, int16_t *quant_table){ int last=0; int i; - uint8_t state[CONTEXT_SIZE]={0}; + uint8_t state[CONTEXT_SIZE]; + memset(state, 128, sizeof(state)); for(i=1; i<128 ; i++){ if(quant_table[i] != quant_table[i-1]){ - put_symbol(c, state, i-last-1, 0, 7); + put_symbol(c, state, i-last-1, 0); last= i; } } - put_symbol(c, state, i-last-1, 0, 7); + put_symbol(c, state, i-last-1, 0); } static void write_header(FFV1Context *f){ - uint8_t state[CONTEXT_SIZE]={0}; + uint8_t state[CONTEXT_SIZE]; int i; - CABACContext * const c= &f->c; + RangeCoder * const c= &f->c; - put_symbol(c, state, f->version, 0, 7); - put_symbol(c, state, f->avctx->coder_type, 0, 7); - put_symbol(c, state, f->colorspace, 0, 7); //YUV cs type - put_cabac(c, state, 1); //chroma planes - put_symbol(c, state, f->chroma_h_shift, 0, 7); - put_symbol(c, state, f->chroma_v_shift, 0, 7); - put_cabac(c, state, 0); //no transparency plane + memset(state, 128, sizeof(state)); + + put_symbol(c, state, f->version, 0); + put_symbol(c, state, f->avctx->coder_type, 0); + put_symbol(c, state, f->colorspace, 0); //YUV cs type + put_rac(c, state, 1); //chroma planes + put_symbol(c, state, f->chroma_h_shift, 0); + put_symbol(c, state, f->chroma_v_shift, 0); + put_rac(c, state, 0); //no transparency plane for(i=0; i<5; i++) write_quant_table(c, f->quant_table[i]); @@ -543,6 +550,12 @@ static int encode_init(AVCodecContext *avctx) FFV1Context *s = avctx->priv_data; int i; + if(avctx->strict_std_compliance >= 0){ + av_log(avctx, AV_LOG_ERROR, "this codec is under development, files encoded with it wont be decodeable with future versions!!!\n" + "use vstrict=-1 / -strict -1 to use it anyway\n"); + return -1; + } + common_init(avctx); s->version=0; @@ -609,13 +622,12 @@ static void clear_state(FFV1Context *f){ for(i=0; i<f->plane_count; i++){ PlaneContext *p= &f->plane[i]; - p->interlace_bit_state[0]= 0; - p->interlace_bit_state[1]= 0; + p->interlace_bit_state[0]= 128; + p->interlace_bit_state[1]= 128; for(j=0; j<p->context_count; j++){ if(f->ac){ - memset(p->state[j], 0, sizeof(uint8_t)*CONTEXT_SIZE); - p->state[j][7] = 2*62; + memset(p->state[j], 128, sizeof(uint8_t)*CONTEXT_SIZE); }else{ p->vlc_state[j].drift= 0; p->vlc_state[j].error_sum= 4; //FFMAX((RANGE + 32)/64, 2); @@ -628,39 +640,33 @@ static void clear_state(FFV1Context *f){ static int encode_frame(AVCodecContext *avctx, unsigned char *buf, int buf_size, void *data){ FFV1Context *f = avctx->priv_data; - CABACContext * const c= &f->c; + RangeCoder * const c= &f->c; AVFrame *pict = data; const int width= f->width; const int height= f->height; AVFrame * const p= &f->picture; int used_count= 0; + uint8_t keystate=128; + + ff_init_range_encoder(c, buf, buf_size); +// ff_init_cabac_states(c, ff_h264_lps_range, ff_h264_mps_state, ff_h264_lps_state, 64); + ff_build_rac_states(c, 0.05*(1LL<<32), 256-8); - if(avctx->strict_std_compliance >= 0){ - av_log(avctx, AV_LOG_ERROR, "this codec is under development, files encoded with it wont be decodeable with future versions!!!\n" - "use vstrict=-1 to use it anyway\n"); - return -1; - } - - ff_init_cabac_encoder(c, buf, buf_size); - ff_init_cabac_states(c, ff_h264_lps_range, ff_h264_mps_state, ff_h264_lps_state, 64); - c->lps_state[2] = 1; - c->lps_state[3] = 0; - *p = *pict; p->pict_type= FF_I_TYPE; if(avctx->gop_size==0 || f->picture_number % avctx->gop_size == 0){ - put_cabac_bypass(c, 1); + put_rac(c, &keystate, 1); p->key_frame= 1; write_header(f); clear_state(f); }else{ - put_cabac_bypass(c, 0); + put_rac(c, &keystate, 0); p->key_frame= 0; } if(!f->ac){ - used_count += put_cabac_terminate(c, 1); + used_count += ff_rac_terminate(c); //printf("pos=%d\n", used_count); init_put_bits(&f->pb, buf + used_count, buf_size - used_count); } @@ -681,7 +687,7 @@ static int encode_frame(AVCodecContext *avctx, unsigned char *buf, int buf_size, f->picture_number++; if(f->ac){ - return put_cabac_terminate(c, 1); + return ff_rac_terminate(c); }else{ flush_put_bits(&f->pb); //nicer padding FIXME return used_count + (put_bits_count(&f->pb)+7)/8; @@ -709,7 +715,7 @@ static int encode_end(AVCodecContext *avctx) static inline void decode_line(FFV1Context *s, int w, int_fast16_t *sample[2], int plane_index, int bits){ PlaneContext * const p= &s->plane[plane_index]; - CABACContext * const c= &s->c; + RangeCoder * const c= &s->c; int x; int run_count=0; int run_mode=0; @@ -726,9 +732,9 @@ static inline void decode_line(FFV1Context *s, int w, int_fast16_t *sample[2], i sign=0; - if(s->ac) - diff= get_symbol(c, p->state[context], 1, bits-1); - else{ + if(s->ac){ + diff= get_symbol(c, p->state[context], 1); + }else{ if(context == 0 && run_mode==0) run_mode=1; if(run_mode){ @@ -833,13 +839,15 @@ static void decode_rgb_frame(FFV1Context *s, uint32_t *src, int w, int h, int st } } -static int read_quant_table(CABACContext *c, int16_t *quant_table, int scale){ +static int read_quant_table(RangeCoder *c, int16_t *quant_table, int scale){ int v; int i=0; - uint8_t state[CONTEXT_SIZE]={0}; + uint8_t state[CONTEXT_SIZE]; + + memset(state, 128, sizeof(state)); for(v=0; i<128 ; v++){ - int len= get_symbol(c, state, 0, 7) + 1; + int len= get_symbol(c, state, 0) + 1; if(len + i > 128) return -1; @@ -860,17 +868,19 @@ static int read_quant_table(CABACContext *c, int16_t *quant_table, int scale){ } static int read_header(FFV1Context *f){ - uint8_t state[CONTEXT_SIZE]={0}; + uint8_t state[CONTEXT_SIZE]; int i, context_count; - CABACContext * const c= &f->c; + RangeCoder * const c= &f->c; - f->version= get_symbol(c, state, 0, 7); - f->ac= f->avctx->coder_type= get_symbol(c, state, 0, 7); - f->colorspace= get_symbol(c, state, 0, 7); //YUV cs type - get_cabac(c, state); //no chroma = false - f->chroma_h_shift= get_symbol(c, state, 0, 7); - f->chroma_v_shift= get_symbol(c, state, 0, 7); - get_cabac(c, state); //transparency plane + memset(state, 128, sizeof(state)); + + f->version= get_symbol(c, state, 0); + f->ac= f->avctx->coder_type= get_symbol(c, state, 0); + f->colorspace= get_symbol(c, state, 0); //YUV cs type + get_rac(c, state); //no chroma = false + f->chroma_h_shift= get_symbol(c, state, 0); + f->chroma_v_shift= get_symbol(c, state, 0); + get_rac(c, state); //transparency plane f->plane_count= 2; if(f->colorspace==0){ @@ -879,7 +889,7 @@ static int read_header(FFV1Context *f){ case 0x10: f->avctx->pix_fmt= PIX_FMT_YUV422P; break; case 0x11: f->avctx->pix_fmt= PIX_FMT_YUV420P; break; case 0x20: f->avctx->pix_fmt= PIX_FMT_YUV411P; break; - case 0x33: f->avctx->pix_fmt= PIX_FMT_YUV410P; break; + case 0x22: f->avctx->pix_fmt= PIX_FMT_YUV410P; break; default: av_log(f->avctx, AV_LOG_ERROR, "format not supported\n"); return -1; @@ -900,7 +910,7 @@ static int read_header(FFV1Context *f){ context_count=1; for(i=0; i<5; i++){ context_count*= read_quant_table(c, f->quant_table[i], context_count); - if(context_count < 0){ + if(context_count < 0 || context_count > 32768){ av_log(f->avctx, AV_LOG_ERROR, "read_quant_table error\n"); return -1; } @@ -933,26 +943,21 @@ static int decode_init(AVCodecContext *avctx) static int decode_frame(AVCodecContext *avctx, void *data, int *data_size, uint8_t *buf, int buf_size){ FFV1Context *f = avctx->priv_data; - CABACContext * const c= &f->c; + RangeCoder * const c= &f->c; const int width= f->width; const int height= f->height; AVFrame * const p= &f->picture; int bytes_read; + uint8_t keystate= 128; AVFrame *picture = data; - /* no supplementary picture */ - if (buf_size == 0) - return 0; - - ff_init_cabac_decoder(c, buf, buf_size); - ff_init_cabac_states(c, ff_h264_lps_range, ff_h264_mps_state, ff_h264_lps_state, 64); - c->lps_state[2] = 1; - c->lps_state[3] = 0; + ff_init_range_decoder(c, buf, buf_size); + ff_build_rac_states(c, 0.05*(1LL<<32), 256-8); p->pict_type= FF_I_TYPE; //FIXME I vs. P - if(get_cabac_bypass(c)){ + if(get_rac(c, &keystate)){ p->key_frame= 1; read_header(f); clear_state(f); @@ -970,8 +975,8 @@ static int decode_frame(AVCodecContext *avctx, void *data, int *data_size, uint8 av_log(avctx, AV_LOG_ERROR, "keyframe:%d coder:%d\n", p->key_frame, f->ac); if(!f->ac){ - bytes_read = get_cabac_terminate(c); - if(bytes_read ==0) av_log(avctx, AV_LOG_ERROR, "error at end of AC stream\n"); + bytes_read = c->bytestream - c->bytestream_start - 1; + if(bytes_read ==0) av_log(avctx, AV_LOG_ERROR, "error at end of AC stream\n"); //FIXME //printf("pos=%d\n", bytes_read); init_get_bits(&f->gb, buf + bytes_read, buf_size - bytes_read); } else { @@ -1000,7 +1005,7 @@ static int decode_frame(AVCodecContext *avctx, void *data, int *data_size, uint8 *data_size = sizeof(AVFrame); if(f->ac){ - bytes_read= get_cabac_terminate(c); + bytes_read= c->bytestream - c->bytestream_start - 1; if(bytes_read ==0) av_log(f->avctx, AV_LOG_ERROR, "error at end of frame\n"); }else{ bytes_read+= (get_bits_count(&f->gb)+7)/8; diff --git a/src/libffmpeg/libavcodec/flac.c b/src/libffmpeg/libavcodec/flac.c index 464d7999d..894da9384 100644 --- a/src/libffmpeg/libavcodec/flac.c +++ b/src/libffmpeg/libavcodec/flac.c @@ -34,6 +34,7 @@ #include <limits.h> #include "avcodec.h" +#include "bitstream.h" #include "golomb.h" #undef NDEBUG @@ -142,6 +143,29 @@ static int64_t get_utf8(GetBitContext *gb) return val; } +#if 0 +static int skip_utf8(GetBitContext *gb) +{ + int ones=0, bytes; + + while(get_bits1(gb)) + ones++; + + if (ones==0) bytes=0; + else if(ones==1) return -1; + else bytes= ones - 1; + + skip_bits(gb, 7-ones); + while(bytes--){ + const int tmp = get_bits(gb, 8); + + if((tmp>>6) != 2) + return -1; + } + return 0; +} +#endif + static int get_crc8(const uint8_t *buf, int count){ int crc=0; int i; @@ -569,12 +593,12 @@ static int flac_decode_frame(AVCodecContext *avctx, int16_t *samples = data; if(s->max_framesize == 0){ - s->max_framesize= 8192; // should hopefully be enough for the first header + s->max_framesize= 65536; // should hopefully be enough for the first header s->bitstream= av_fast_realloc(s->bitstream, &s->allocated_bitstream_size, s->max_framesize); } if(1 && s->max_framesize){//FIXME truncated - buf_size= FFMIN(buf_size, s->max_framesize - s->bitstream_size); + buf_size= FFMAX(FFMIN(buf_size, s->max_framesize - s->bitstream_size), 0); input_buf_size= buf_size; if(s->bitstream_index + s->bitstream_size + buf_size > s->allocated_bitstream_size){ @@ -612,10 +636,20 @@ static int flac_decode_frame(AVCodecContext *avctx, if(metadata_size){ switch(metadata_type) { - case METADATA_TYPE_STREAMINFO: + case METADATA_TYPE_STREAMINFO:{ metadata_streaminfo(s); + + /* Buffer might have been reallocated, reinit bitreader */ + if(buf != &s->bitstream[s->bitstream_index]) + { + int bits_count = get_bits_count(&s->gb); + buf= &s->bitstream[s->bitstream_index]; + init_get_bits(&s->gb, buf, buf_size*8); + skip_bits(&s->gb, bits_count); + } + dump_headers(s); - break; + break;} default: for(i=0; i<metadata_size; i++) skip_bits(&s->gb, 8); diff --git a/src/libffmpeg/libavcodec/flicvideo.c b/src/libffmpeg/libavcodec/flicvideo.c index 99825cebc..92cb8bd0b 100644 --- a/src/libffmpeg/libavcodec/flicvideo.c +++ b/src/libffmpeg/libavcodec/flicvideo.c @@ -176,7 +176,7 @@ static int flic_decode_frame(AVCodecContext *avctx, for (j = 0; j < color_changes; j++) { /* wrap around, for good measure */ - if (palette_ptr >= 256) + if ((unsigned)palette_ptr >= 256) palette_ptr = 0; r = buf[stream_ptr++] << color_shift; diff --git a/src/libffmpeg/libavcodec/g726.c b/src/libffmpeg/libavcodec/g726.c index c016f32cf..bc9374d3e 100644 --- a/src/libffmpeg/libavcodec/g726.c +++ b/src/libffmpeg/libavcodec/g726.c @@ -22,16 +22,18 @@ #include <limits.h> #include "avcodec.h" #include "common.h" +#include "bitstream.h" -/* +/** + * G.726 11bit float. * G.726 Standard uses rather odd 11bit floating point arithmentic for * numerous occasions. It's a mistery to me why they did it this way * instead of simply using 32bit integer arithmetic. */ typedef struct Float11 { - int sign; /* 1bit sign */ - int exp; /* 4bit exponent */ - int mant; /* 6bit mantissa */ + int sign; /**< 1bit sign */ + int exp; /**< 4bit exponent */ + int mant; /**< 6bit mantissa */ } Float11; static inline Float11* i2f(int16_t i, Float11* f) @@ -61,35 +63,35 @@ static inline int sgn(int value) } typedef struct G726Tables { - int bits; /* bits per sample */ - int* quant; /* quantization table */ - int* iquant; /* inverse quantization table */ - int* W; /* special table #1 ;-) */ - int* F; /* special table #2 */ + int bits; /**< bits per sample */ + int* quant; /**< quantization table */ + int* iquant; /**< inverse quantization table */ + int* W; /**< special table #1 ;-) */ + int* F; /**< special table #2 */ } G726Tables; typedef struct G726Context { - G726Tables* tbls; /* static tables needed for computation */ + G726Tables* tbls; /**< static tables needed for computation */ - Float11 sr[2]; /* prev. reconstructed samples */ - Float11 dq[6]; /* prev. difference */ - int a[2]; /* second order predictor coeffs */ - int b[6]; /* sixth order predictor coeffs */ - int pk[2]; /* signs of prev. 2 sez + dq */ + Float11 sr[2]; /**< prev. reconstructed samples */ + Float11 dq[6]; /**< prev. difference */ + int a[2]; /**< second order predictor coeffs */ + int b[6]; /**< sixth order predictor coeffs */ + int pk[2]; /**< signs of prev. 2 sez + dq */ - int ap; /* scale factor control */ - int yu; /* fast scale factor */ - int yl; /* slow scale factor */ - int dms; /* short average magnitude of F[i] */ - int dml; /* long average magnitude of F[i] */ - int td; /* tone detect */ - - int se; /* estimated signal for the next iteration */ - int sez; /* estimated second order prediction */ - int y; /* quantizer scaling factor for the next iteration */ + int ap; /**< scale factor control */ + int yu; /**< fast scale factor */ + int yl; /**< slow scale factor */ + int dms; /**< short average magnitude of F[i] */ + int dml; /**< long average magnitude of F[i] */ + int td; /**< tone detect */ + + int se; /**< estimated signal for the next iteration */ + int sez; /**< estimated second order prediction */ + int y; /**< quantizer scaling factor for the next iteration */ } G726Context; -static int quant_tbl16[] = /* 16kbit/s 2bits per sample */ +static int quant_tbl16[] = /**< 16kbit/s 2bits per sample */ { 260, INT_MAX }; static int iquant_tbl16[] = { 116, 365, 365, 116 }; @@ -98,7 +100,7 @@ static int W_tbl16[] = static int F_tbl16[] = { 0, 7, 7, 0 }; -static int quant_tbl24[] = /* 24kbit/s 3bits per sample */ +static int quant_tbl24[] = /**< 24kbit/s 3bits per sample */ { 7, 217, 330, INT_MAX }; static int iquant_tbl24[] = { INT_MIN, 135, 273, 373, 373, 273, 135, INT_MIN }; @@ -107,7 +109,7 @@ static int W_tbl24[] = static int F_tbl24[] = { 0, 1, 2, 7, 7, 2, 1, 0 }; -static int quant_tbl32[] = /* 32kbit/s 4bits per sample */ +static int quant_tbl32[] = /**< 32kbit/s 4bits per sample */ { -125, 79, 177, 245, 299, 348, 399, INT_MAX }; static int iquant_tbl32[] = { INT_MIN, 4, 135, 213, 273, 323, 373, 425, @@ -118,7 +120,7 @@ static int W_tbl32[] = static int F_tbl32[] = { 0, 0, 0, 1, 1, 1, 3, 7, 7, 3, 1, 1, 1, 0, 0, 0 }; -static int quant_tbl40[] = /* 40kbit/s 5bits per sample */ +static int quant_tbl40[] = /**< 40kbit/s 5bits per sample */ { -122, -16, 67, 138, 197, 249, 297, 338, 377, 412, 444, 474, 501, 527, 552, INT_MAX }; static int iquant_tbl40[] = @@ -142,7 +144,7 @@ static G726Tables G726Tables_pool[] = { 5, quant_tbl40, iquant_tbl40, W_tbl40, F_tbl40 }}; -/* +/** * Para 4.2.2 page 18: Adaptive quantizer. */ static inline uint8_t quant(G726Context* c, int d) @@ -168,7 +170,7 @@ static inline uint8_t quant(G726Context* c, int d) return i; } -/* +/** * Para 4.2.3 page 22: Inverse adaptive quantizer. */ static inline int16_t inverse_quant(G726Context* c, int i) diff --git a/src/libffmpeg/libavcodec/golomb.h b/src/libffmpeg/libavcodec/golomb.h index cd8bdd38d..1204a52e2 100644 --- a/src/libffmpeg/libavcodec/golomb.h +++ b/src/libffmpeg/libavcodec/golomb.h @@ -1,6 +1,7 @@ /* * exp golomb vlc stuff * Copyright (c) 2003 Michael Niedermayer <michaelni@gmx.at> + * Copyright (c) 2004 Alex Beregszaszi * * This library is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public @@ -22,7 +23,7 @@ * @file golomb.h * @brief * exp golomb vlc stuff - * @author Michael Niedermayer <michaelni@gmx.at> + * @author Michael Niedermayer <michaelni@gmx.at> and Alex Beregszaszi */ #define INVALID_VLC 0x80000000 @@ -80,7 +81,10 @@ static inline int svq3_get_ue_golomb(GetBitContext *gb){ return ff_interleaved_ue_golomb_vlc_code[buf]; }else{ - buf|=1; + LAST_SKIP_BITS(re, gb, 8); + UPDATE_CACHE(re, gb); + buf |= 1 | (GET_CACHE(re, gb) >> 8); + if((buf & 0xAAAAAAAA) == 0) return INVALID_VLC; @@ -88,7 +92,7 @@ static inline int svq3_get_ue_golomb(GetBitContext *gb){ buf = (buf << 2) - ((buf << log) >> (log - 1)) + (buf >> 30); } - LAST_SKIP_BITS(re, gb, 63 - 2*log); + LAST_SKIP_BITS(re, gb, 63 - 2*log - 8); CLOSE_READER(re, gb); return ((buf << log) >> log) - 1; @@ -163,7 +167,10 @@ static inline int svq3_get_se_golomb(GetBitContext *gb){ return ff_interleaved_se_golomb_vlc_code[buf]; }else{ - buf |=1; + LAST_SKIP_BITS(re, gb, 8); + UPDATE_CACHE(re, gb); + buf |= 1 | (GET_CACHE(re, gb) >> 8); + if((buf & 0xAAAAAAAA) == 0) return INVALID_VLC; @@ -171,7 +178,7 @@ static inline int svq3_get_se_golomb(GetBitContext *gb){ buf = (buf << 2) - ((buf << log) >> (log - 1)) + (buf >> 30); } - LAST_SKIP_BITS(re, gb, 63 - 2*log); + LAST_SKIP_BITS(re, gb, 63 - 2*log - 8); CLOSE_READER(re, gb); return (signed) (((((buf << log) >> log) - 1) ^ -(buf & 0x1)) + 1) >> 1; @@ -257,16 +264,50 @@ static inline int get_ur_golomb_jpegls(GetBitContext *gb, int k, int limit, int } /** - * read unsigned golomb rice code (flac). + * read signed golomb rice code (ffv1). + */ +static inline int get_sr_golomb(GetBitContext *gb, int k, int limit, int esc_len){ + int v= get_ur_golomb(gb, k, limit, esc_len); + + v++; + if (v&1) return v>>1; + else return -(v>>1); + +// return (v>>1) ^ -(v&1); +} + +/** + * read signed golomb rice code (flac). */ static inline int get_sr_golomb_flac(GetBitContext *gb, int k, int limit, int esc_len){ int v= get_ur_golomb_jpegls(gb, k, limit, esc_len); return (v>>1) ^ -(v&1); } +/** + * read unsigned golomb rice code (shorten). + */ +static inline unsigned int get_ur_golomb_shorten(GetBitContext *gb, int k){ + return get_ur_golomb_jpegls(gb, k, INT_MAX, 0); +} + +/** + * read signed golomb rice code (shorten). + */ +static inline int get_sr_golomb_shorten(GetBitContext* gb, int k) +{ + int uvar = get_ur_golomb_jpegls(gb, k + 1, INT_MAX, 0); + if (uvar & 1) + return ~(uvar >> 1); + else + return uvar >> 1; +} + + + #ifdef TRACE -static inline int get_ue(GetBitContext *s, char *file, char *func, int line){ +static inline int get_ue(GetBitContext *s, char *file, const char *func, int line){ int show= show_bits(s, 24); int pos= get_bits_count(s); int i= get_ue_golomb(s); @@ -275,12 +316,12 @@ static inline int get_ue(GetBitContext *s, char *file, char *func, int line){ print_bin(bits, len); - printf("%5d %2d %3d ue @%5d in %s %s:%d\n", bits, len, i, pos, file, func, line); + av_log(NULL, AV_LOG_DEBUG, "%5d %2d %3d ue @%5d in %s %s:%d\n", bits, len, i, pos, file, func, line); return i; } -static inline int get_se(GetBitContext *s, char *file, char *func, int line){ +static inline int get_se(GetBitContext *s, char *file, const char *func, int line){ int show= show_bits(s, 24); int pos= get_bits_count(s); int i= get_se_golomb(s); @@ -289,12 +330,12 @@ static inline int get_se(GetBitContext *s, char *file, char *func, int line){ print_bin(bits, len); - printf("%5d %2d %3d se @%5d in %s %s:%d\n", bits, len, i, pos, file, func, line); + av_log(NULL, AV_LOG_DEBUG, "%5d %2d %3d se @%5d in %s %s:%d\n", bits, len, i, pos, file, func, line); return i; } -static inline int get_te(GetBitContext *s, int r, char *file, char *func, int line){ +static inline int get_te(GetBitContext *s, int r, char *file, const char *func, int line){ int show= show_bits(s, 24); int pos= get_bits_count(s); int i= get_te0_golomb(s, r); @@ -303,7 +344,7 @@ static inline int get_te(GetBitContext *s, int r, char *file, char *func, int li print_bin(bits, len); - printf("%5d %2d %3d te @%5d in %s %s:%d\n", bits, len, i, pos, file, func, line); + av_log(NULL, AV_LOG_DEBUG, "%5d %2d %3d te @%5d in %s %s:%d\n", bits, len, i, pos, file, func, line); return i; } @@ -400,3 +441,27 @@ static inline void set_ur_golomb_jpegls(PutBitContext *pb, int i, int k, int lim put_bits(pb, esc_len, i - 1); } } + +/** + * write signed golomb rice code (ffv1). + */ +static inline void set_sr_golomb(PutBitContext *pb, int i, int k, int limit, int esc_len){ + int v; + + v = -2*i-1; + v ^= (v>>31); + + set_ur_golomb(pb, v, k, limit, esc_len); +} + +/** + * write signed golomb rice code (flac). + */ +static inline void set_sr_golomb_flac(PutBitContext *pb, int i, int k, int limit, int esc_len){ + int v; + + v = -2*i-1; + v ^= (v>>31); + + set_ur_golomb_jpegls(pb, v, k, limit, esc_len); +} diff --git a/src/libffmpeg/libavcodec/h261.c b/src/libffmpeg/libavcodec/h261.c new file mode 100644 index 000000000..aceebaa38 --- /dev/null +++ b/src/libffmpeg/libavcodec/h261.c @@ -0,0 +1,1041 @@ +/* + * H261 decoder + * Copyright (c) 2002-2004 Michael Niedermayer <michaelni@gmx.at> + * Copyright (c) 2004 Maarten Daniels + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +/** + * @file h261.c + * h261codec. + */ + +#include "common.h" +#include "dsputil.h" +#include "avcodec.h" +#include "mpegvideo.h" +#include "h261data.h" + + +#define H261_MBA_VLC_BITS 9 +#define H261_MTYPE_VLC_BITS 6 +#define H261_MV_VLC_BITS 7 +#define H261_CBP_VLC_BITS 9 +#define TCOEFF_VLC_BITS 9 + +#define MBA_STUFFING 33 +#define MBA_STARTCODE 34 +#define IS_FIL(a) ((a)&MB_TYPE_H261_FIL) + +/** + * H261Context + */ +typedef struct H261Context{ + MpegEncContext s; + + int current_mba; + int previous_mba; + int mba_diff; + int mtype; + int current_mv_x; + int current_mv_y; + int gob_number; + int gob_start_code_skipped; // 1 if gob start code is already read before gob header is read +}H261Context; + +void ff_h261_loop_filter(MpegEncContext *s){ + H261Context * h= (H261Context*)s; + const int linesize = s->linesize; + const int uvlinesize= s->uvlinesize; + uint8_t *dest_y = s->dest[0]; + uint8_t *dest_cb= s->dest[1]; + uint8_t *dest_cr= s->dest[2]; + + if(!(IS_FIL (h->mtype))) + return; + + s->dsp.h261_loop_filter(dest_y , linesize); + s->dsp.h261_loop_filter(dest_y + 8, linesize); + s->dsp.h261_loop_filter(dest_y + 8 * linesize , linesize); + s->dsp.h261_loop_filter(dest_y + 8 * linesize + 8, linesize); + s->dsp.h261_loop_filter(dest_cb, uvlinesize); + s->dsp.h261_loop_filter(dest_cr, uvlinesize); +} + +static int ff_h261_get_picture_format(int width, int height){ + // QCIF + if (width == 176 && height == 144) + return 0; + // CIF + else if (width == 352 && height == 288) + return 1; + // ERROR + else + return -1; +} + +static void h261_encode_block(H261Context * h, DCTELEM * block, + int n); +static int h261_decode_block(H261Context *h, DCTELEM *block, + int n, int coded); + +void ff_h261_encode_picture_header(MpegEncContext * s, int picture_number){ + H261Context * h = (H261Context *) s; + int format, temp_ref; + + align_put_bits(&s->pb); + + /* Update the pointer to last GOB */ + s->ptr_lastgob = pbBufPtr(&s->pb); + + put_bits(&s->pb, 20, 0x10); /* PSC */ + + temp_ref= s->picture_number * (int64_t)30000 * s->avctx->frame_rate_base / + (1001 * (int64_t)s->avctx->frame_rate); + put_bits(&s->pb, 5, temp_ref & 0x1f); /* TemporalReference */ + + put_bits(&s->pb, 1, 0); /* split screen off */ + put_bits(&s->pb, 1, 0); /* camera off */ + put_bits(&s->pb, 1, 0); /* freeze picture release off */ + + format = ff_h261_get_picture_format(s->width, s->height); + + put_bits(&s->pb, 1, format); /* 0 == QCIF, 1 == CIF */ + + put_bits(&s->pb, 1, 0); /* still image mode */ + put_bits(&s->pb, 1, 0); /* reserved */ + + put_bits(&s->pb, 1, 0); /* no PEI */ + if(format == 0) + h->gob_number = -1; + else + h->gob_number = 0; + h->current_mba = 0; +} + +/** + * Encodes a group of blocks header. + */ +static void h261_encode_gob_header(MpegEncContext * s, int mb_line){ + H261Context * h = (H261Context *)s; + if(ff_h261_get_picture_format(s->width, s->height) == 0){ + h->gob_number+=2; // QCIF + } + else{ + h->gob_number++; // CIF + } + put_bits(&s->pb, 16, 1); /* GBSC */ + put_bits(&s->pb, 4, h->gob_number); /* GN */ + put_bits(&s->pb, 5, s->qscale); /* GQUANT */ + put_bits(&s->pb, 1, 0); /* no GEI */ + h->current_mba = 0; + h->previous_mba = 0; + h->current_mv_x=0; + h->current_mv_y=0; +} + +void ff_h261_reorder_mb_index(MpegEncContext* s){ + int index= s->mb_x + s->mb_y*s->mb_width; + + if(index % 33 == 0) + h261_encode_gob_header(s,0); + + /* for CIF the GOB's are fragmented in the middle of a scanline + that's why we need to adjust the x and y index of the macroblocks */ + if(ff_h261_get_picture_format(s->width,s->height) == 1){ // CIF + s->mb_x = index % 11 ; index /= 11; + s->mb_y = index % 3 ; index /= 3; + s->mb_x+= 11*(index % 2); index /= 2; + s->mb_y+= 3*index; + + ff_init_block_index(s); + ff_update_block_index(s); + } +} + +static void h261_encode_motion(H261Context * h, int val){ + MpegEncContext * const s = &h->s; + int sign, code; + if(val==0){ + code = 0; + put_bits(&s->pb,h261_mv_tab[code][1],h261_mv_tab[code][0]); + } + else{ + if(val > 15) + val -=32; + if(val < -16) + val+=32; + sign = val < 0; + code = sign ? -val : val; + put_bits(&s->pb,h261_mv_tab[code][1],h261_mv_tab[code][0]); + put_bits(&s->pb,1,sign); + } +} + +static inline int get_cbp(MpegEncContext * s, + DCTELEM block[6][64]) +{ + int i, cbp; + cbp= 0; + for (i = 0; i < 6; i++) { + if (s->block_last_index[i] >= 0) + cbp |= 1 << (5 - i); + } + return cbp; +} +void ff_h261_encode_mb(MpegEncContext * s, + DCTELEM block[6][64], + int motion_x, int motion_y) +{ + H261Context * h = (H261Context *)s; + int mvd, mv_diff_x, mv_diff_y, i, cbp; + cbp = 63; // avoid warning + mvd = 0; + + h->current_mba++; + h->mtype = 0; + + if (!s->mb_intra){ + /* compute cbp */ + cbp= get_cbp(s, block); + + /* mvd indicates if this block is motion compensated */ + mvd = motion_x | motion_y; + + if((cbp | mvd | s->dquant ) == 0) { + /* skip macroblock */ + s->skip_count++; + h->current_mv_x=0; + h->current_mv_y=0; + return; + } + } + + /* MB is not skipped, encode MBA */ + put_bits(&s->pb, h261_mba_bits[(h->current_mba-h->previous_mba)-1], h261_mba_code[(h->current_mba-h->previous_mba)-1]); + + /* calculate MTYPE */ + if(!s->mb_intra){ + h->mtype++; + + if(mvd || s->loop_filter) + h->mtype+=3; + if(s->loop_filter) + h->mtype+=3; + if(cbp || s->dquant) + h->mtype++; + assert(h->mtype > 1); + } + + if(s->dquant) + h->mtype++; + + put_bits(&s->pb, h261_mtype_bits[h->mtype], h261_mtype_code[h->mtype]); + + h->mtype = h261_mtype_map[h->mtype]; + + if(IS_QUANT(h->mtype)){ + ff_set_qscale(s,s->qscale+s->dquant); + put_bits(&s->pb, 5, s->qscale); + } + + if(IS_16X16(h->mtype)){ + mv_diff_x = (motion_x >> 1) - h->current_mv_x; + mv_diff_y = (motion_y >> 1) - h->current_mv_y; + h->current_mv_x = (motion_x >> 1); + h->current_mv_y = (motion_y >> 1); + h261_encode_motion(h,mv_diff_x); + h261_encode_motion(h,mv_diff_y); + } + + h->previous_mba = h->current_mba; + + if(HAS_CBP(h->mtype)){ + put_bits(&s->pb,h261_cbp_tab[cbp-1][1],h261_cbp_tab[cbp-1][0]); + } + for(i=0; i<6; i++) { + /* encode each block */ + h261_encode_block(h, block[i], i); + } + + if ( ( h->current_mba == 11 ) || ( h->current_mba == 22 ) || ( h->current_mba == 33 ) || ( !IS_16X16 ( h->mtype ) )){ + h->current_mv_x=0; + h->current_mv_y=0; + } +} + +void ff_h261_encode_init(MpegEncContext *s){ + static int done = 0; + + if (!done) { + done = 1; + init_rl(&h261_rl_tcoeff, 1); + } + + s->min_qcoeff= -127; + s->max_qcoeff= 127; + s->y_dc_scale_table= + s->c_dc_scale_table= ff_mpeg1_dc_scale_table; +} + + +/** + * encodes a 8x8 block. + * @param block the 8x8 block + * @param n block index (0-3 are luma, 4-5 are chroma) + */ +static void h261_encode_block(H261Context * h, DCTELEM * block, int n){ + MpegEncContext * const s = &h->s; + int level, run, last, i, j, last_index, last_non_zero, sign, slevel, code; + RLTable *rl; + + rl = &h261_rl_tcoeff; + if (s->mb_intra) { + /* DC coef */ + level = block[0]; + /* 255 cannot be represented, so we clamp */ + if (level > 254) { + level = 254; + block[0] = 254; + } + /* 0 cannot be represented also */ + else if (level < 1) { + level = 1; + block[0] = 1; + } + if (level == 128) + put_bits(&s->pb, 8, 0xff); + else + put_bits(&s->pb, 8, level); + i = 1; + } else if((block[0]==1 || block[0] == -1) && (s->block_last_index[n] > -1)){ + //special case + put_bits(&s->pb,2,block[0]>0 ? 2 : 3 ); + i = 1; + } else { + i = 0; + } + + /* AC coefs */ + last_index = s->block_last_index[n]; + last_non_zero = i - 1; + for (; i <= last_index; i++) { + j = s->intra_scantable.permutated[i]; + level = block[j]; + if (level) { + run = i - last_non_zero - 1; + last = (i == last_index); + sign = 0; + slevel = level; + if (level < 0) { + sign = 1; + level = -level; + } + code = get_rl_index(rl, 0 /*no last in H.261, EOB is used*/, run, level); + if(run==0 && level < 16) + code+=1; + put_bits(&s->pb, rl->table_vlc[code][1], rl->table_vlc[code][0]); + if (code == rl->n) { + put_bits(&s->pb, 6, run); + assert(slevel != 0); + assert(level <= 127); + put_bits(&s->pb, 8, slevel & 0xff); + } else { + put_bits(&s->pb, 1, sign); + } + last_non_zero = i; + } + } + if(last_index > -1){ + put_bits(&s->pb, rl->table_vlc[0][1], rl->table_vlc[0][0]);// END OF BLOCK + } +} + +/***********************************************/ +/* decoding */ + +static VLC h261_mba_vlc; +static VLC h261_mtype_vlc; +static VLC h261_mv_vlc; +static VLC h261_cbp_vlc; + +void init_vlc_rl(RLTable *rl, int use_static); + +static void h261_decode_init_vlc(H261Context *h){ + static int done = 0; + + if(!done){ + done = 1; + init_vlc(&h261_mba_vlc, H261_MBA_VLC_BITS, 35, + h261_mba_bits, 1, 1, + h261_mba_code, 1, 1, 1); + init_vlc(&h261_mtype_vlc, H261_MTYPE_VLC_BITS, 10, + h261_mtype_bits, 1, 1, + h261_mtype_code, 1, 1, 1); + init_vlc(&h261_mv_vlc, H261_MV_VLC_BITS, 17, + &h261_mv_tab[0][1], 2, 1, + &h261_mv_tab[0][0], 2, 1, 1); + init_vlc(&h261_cbp_vlc, H261_CBP_VLC_BITS, 63, + &h261_cbp_tab[0][1], 2, 1, + &h261_cbp_tab[0][0], 2, 1, 1); + init_rl(&h261_rl_tcoeff, 1); + init_vlc_rl(&h261_rl_tcoeff, 1); + } +} + +static int h261_decode_init(AVCodecContext *avctx){ + H261Context *h= avctx->priv_data; + MpegEncContext * const s = &h->s; + + // set defaults + MPV_decode_defaults(s); + s->avctx = avctx; + + s->width = s->avctx->coded_width; + s->height = s->avctx->coded_height; + s->codec_id = s->avctx->codec->id; + + s->out_format = FMT_H261; + s->low_delay= 1; + avctx->pix_fmt= PIX_FMT_YUV420P; + + s->codec_id= avctx->codec->id; + + h261_decode_init_vlc(h); + + h->gob_start_code_skipped = 0; + + return 0; +} + +/** + * decodes the group of blocks header or slice header. + * @return <0 if an error occured + */ +static int h261_decode_gob_header(H261Context *h){ + unsigned int val; + MpegEncContext * const s = &h->s; + + if ( !h->gob_start_code_skipped ){ + /* Check for GOB Start Code */ + val = show_bits(&s->gb, 15); + if(val) + return -1; + + /* We have a GBSC */ + skip_bits(&s->gb, 16); + } + + h->gob_start_code_skipped = 0; + + h->gob_number = get_bits(&s->gb, 4); /* GN */ + s->qscale = get_bits(&s->gb, 5); /* GQUANT */ + + /* Check if gob_number is valid */ + if (s->mb_height==18){ //cif + if ((h->gob_number<=0) || (h->gob_number>12)) + return -1; + } + else{ //qcif + if ((h->gob_number!=1) && (h->gob_number!=3) && (h->gob_number!=5)) + return -1; + } + + /* GEI */ + while (get_bits1(&s->gb) != 0) { + skip_bits(&s->gb, 8); + } + + if(s->qscale==0) + return -1; + + // For the first transmitted macroblock in a GOB, MBA is the absolute address. For + // subsequent macroblocks, MBA is the difference between the absolute addresses of + // the macroblock and the last transmitted macroblock. + h->current_mba = 0; + h->mba_diff = 0; + + return 0; +} + +/** + * decodes the group of blocks / video packet header. + * @return <0 if no resync found + */ +static int ff_h261_resync(H261Context *h){ + MpegEncContext * const s = &h->s; + int left, ret; + + if ( h->gob_start_code_skipped ){ + ret= h261_decode_gob_header(h); + if(ret>=0) + return 0; + } + else{ + if(show_bits(&s->gb, 15)==0){ + ret= h261_decode_gob_header(h); + if(ret>=0) + return 0; + } + //ok, its not where its supposed to be ... + s->gb= s->last_resync_gb; + align_get_bits(&s->gb); + left= s->gb.size_in_bits - get_bits_count(&s->gb); + + for(;left>15+1+4+5; left-=8){ + if(show_bits(&s->gb, 15)==0){ + GetBitContext bak= s->gb; + + ret= h261_decode_gob_header(h); + if(ret>=0) + return 0; + + s->gb= bak; + } + skip_bits(&s->gb, 8); + } + } + + return -1; +} + +/** + * decodes skipped macroblocks + * @return 0 + */ +static int h261_decode_mb_skipped(H261Context *h, int mba1, int mba2 ) +{ + MpegEncContext * const s = &h->s; + int i; + + s->mb_intra = 0; + + for(i=mba1; i<mba2; i++){ + int j, xy; + + s->mb_x= ((h->gob_number-1) % 2) * 11 + i % 11; + s->mb_y= ((h->gob_number-1) / 2) * 3 + i / 11; + xy = s->mb_x + s->mb_y * s->mb_stride; + ff_init_block_index(s); + ff_update_block_index(s); + s->dsp.clear_blocks(s->block[0]); + + for(j=0;j<6;j++) + s->block_last_index[j] = -1; + + s->mv_dir = MV_DIR_FORWARD; + s->mv_type = MV_TYPE_16X16; + s->current_picture.mb_type[xy]= MB_TYPE_SKIP | MB_TYPE_16x16 | MB_TYPE_L0; + s->mv[0][0][0] = 0; + s->mv[0][0][1] = 0; + s->mb_skiped = 1; + h->mtype &= ~MB_TYPE_H261_FIL; + + MPV_decode_mb(s, s->block); + } + + return 0; +} + +static int decode_mv_component(GetBitContext *gb, int v){ + int mv_diff = get_vlc2(gb, h261_mv_vlc.table, H261_MV_VLC_BITS, 2); + + /* check if mv_diff is valid */ + if ( mv_diff < 0 ) + return v; + + mv_diff = mvmap[mv_diff]; + + if(mv_diff && !get_bits1(gb)) + mv_diff= -mv_diff; + + v += mv_diff; + if (v <=-16) v+= 32; + else if(v >= 16) v-= 32; + + return v; +} + +static int h261_decode_mb(H261Context *h){ + MpegEncContext * const s = &h->s; + int i, cbp, xy; + + cbp = 63; + // Read mba + do{ + h->mba_diff = get_vlc2(&s->gb, h261_mba_vlc.table, H261_MBA_VLC_BITS, 2); + + /* Check for slice end */ + /* NOTE: GOB can be empty (no MB data) or exist only of MBA_stuffing */ + if (h->mba_diff == MBA_STARTCODE){ // start code + h->gob_start_code_skipped = 1; + return SLICE_END; + } + } + while( h->mba_diff == MBA_STUFFING ); // stuffing + + if ( h->mba_diff < 0 ){ + if ( get_bits_count(&s->gb) + 7 >= s->gb.size_in_bits ) + return SLICE_END; + + av_log(s->avctx, AV_LOG_ERROR, "illegal mba at %d %d\n", s->mb_x, s->mb_y); + return SLICE_ERROR; + } + + h->mba_diff += 1; + h->current_mba += h->mba_diff; + + if ( h->current_mba > MBA_STUFFING ) + return SLICE_ERROR; + + s->mb_x= ((h->gob_number-1) % 2) * 11 + ((h->current_mba-1) % 11); + s->mb_y= ((h->gob_number-1) / 2) * 3 + ((h->current_mba-1) / 11); + xy = s->mb_x + s->mb_y * s->mb_stride; + ff_init_block_index(s); + ff_update_block_index(s); + s->dsp.clear_blocks(s->block[0]); + + // Read mtype + h->mtype = get_vlc2(&s->gb, h261_mtype_vlc.table, H261_MTYPE_VLC_BITS, 2); + h->mtype = h261_mtype_map[h->mtype]; + + // Read mquant + if ( IS_QUANT ( h->mtype ) ){ + ff_set_qscale(s, get_bits(&s->gb, 5)); + } + + s->mb_intra = IS_INTRA4x4(h->mtype); + + // Read mv + if ( IS_16X16 ( h->mtype ) ){ + // Motion vector data is included for all MC macroblocks. MVD is obtained from the macroblock vector by subtracting the + // vector of the preceding macroblock. For this calculation the vector of the preceding macroblock is regarded as zero in the + // following three situations: + // 1) evaluating MVD for macroblocks 1, 12 and 23; + // 2) evaluating MVD for macroblocks in which MBA does not represent a difference of 1; + // 3) MTYPE of the previous macroblock was not MC. + if ( ( h->current_mba == 1 ) || ( h->current_mba == 12 ) || ( h->current_mba == 23 ) || + ( h->mba_diff != 1)) + { + h->current_mv_x = 0; + h->current_mv_y = 0; + } + + h->current_mv_x= decode_mv_component(&s->gb, h->current_mv_x); + h->current_mv_y= decode_mv_component(&s->gb, h->current_mv_y); + }else{ + h->current_mv_x = 0; + h->current_mv_y = 0; + } + + // Read cbp + if ( HAS_CBP( h->mtype ) ){ + cbp = get_vlc2(&s->gb, h261_cbp_vlc.table, H261_CBP_VLC_BITS, 2) + 1; + } + + if(s->mb_intra){ + s->current_picture.mb_type[xy]= MB_TYPE_INTRA; + goto intra; + } + + //set motion vectors + s->mv_dir = MV_DIR_FORWARD; + s->mv_type = MV_TYPE_16X16; + s->current_picture.mb_type[xy]= MB_TYPE_16x16 | MB_TYPE_L0; + s->mv[0][0][0] = h->current_mv_x * 2;//gets divided by 2 in motion compensation + s->mv[0][0][1] = h->current_mv_y * 2; + +intra: + /* decode each block */ + if(s->mb_intra || HAS_CBP(h->mtype)){ + for (i = 0; i < 6; i++) { + if (h261_decode_block(h, s->block[i], i, cbp&32) < 0){ + return SLICE_ERROR; + } + cbp+=cbp; + } + } + + MPV_decode_mb(s, s->block); + + return SLICE_OK; +} + +/** + * decodes a macroblock + * @return <0 if an error occured + */ +static int h261_decode_block(H261Context * h, DCTELEM * block, + int n, int coded) +{ + MpegEncContext * const s = &h->s; + int code, level, i, j, run; + RLTable *rl = &h261_rl_tcoeff; + const uint8_t *scan_table; + + // For the variable length encoding there are two code tables, one being used for + // the first transmitted LEVEL in INTER, INTER+MC and INTER+MC+FIL blocks, the second + // for all other LEVELs except the first one in INTRA blocks which is fixed length + // coded with 8 bits. + // NOTE: the two code tables only differ in one VLC so we handle that manually. + scan_table = s->intra_scantable.permutated; + if (s->mb_intra){ + /* DC coef */ + level = get_bits(&s->gb, 8); + // 0 (00000000b) and -128 (10000000b) are FORBIDDEN + if((level&0x7F) == 0){ + av_log(s->avctx, AV_LOG_ERROR, "illegal dc %d at %d %d\n", level, s->mb_x, s->mb_y); + return -1; + } + // The code 1000 0000 is not used, the reconstruction level of 1024 being coded as 1111 1111. + if (level == 255) + level = 128; + block[0] = level; + i = 1; + }else if(coded){ + // Run Level Code + // EOB Not possible for first level when cbp is available (that's why the table is different) + // 0 1 1s + // * * 0* + int check = show_bits(&s->gb, 2); + i = 0; + if ( check & 0x2 ){ + skip_bits(&s->gb, 2); + block[0] = ( check & 0x1 ) ? -1 : 1; + i = 1; + } + }else{ + i = 0; + } + if(!coded){ + s->block_last_index[n] = i - 1; + return 0; + } + for(;;){ + code = get_vlc2(&s->gb, rl->vlc.table, TCOEFF_VLC_BITS, 2); + if (code < 0){ + av_log(s->avctx, AV_LOG_ERROR, "illegal ac vlc code at %dx%d\n", s->mb_x, s->mb_y); + return -1; + } + if (code == rl->n) { + /* escape */ + // The remaining combinations of (run, level) are encoded with a 20-bit word consisting of 6 bits escape, 6 bits run and 8 bits level. + run = get_bits(&s->gb, 6); + level = get_sbits(&s->gb, 8); + }else if(code == 0){ + break; + }else{ + run = rl->table_run[code]; + level = rl->table_level[code]; + if (get_bits1(&s->gb)) + level = -level; + } + i += run; + if (i >= 64){ + av_log(s->avctx, AV_LOG_ERROR, "run overflow at %dx%d\n", s->mb_x, s->mb_y); + return -1; + } + j = scan_table[i]; + block[j] = level; + i++; + } + s->block_last_index[n] = i-1; + return 0; +} + +/** + * decodes the H261 picture header. + * @return <0 if no startcode found + */ +int h261_decode_picture_header(H261Context *h){ + MpegEncContext * const s = &h->s; + int format, i; + uint32_t startcode= 0; + + for(i= s->gb.size_in_bits - get_bits_count(&s->gb); i>24; i-=1){ + startcode = ((startcode << 1) | get_bits(&s->gb, 1)) & 0x000FFFFF; + + if(startcode == 0x10) + break; + } + + if (startcode != 0x10){ + av_log(s->avctx, AV_LOG_ERROR, "Bad picture start code\n"); + return -1; + } + + /* temporal reference */ + s->picture_number = get_bits(&s->gb, 5); /* picture timestamp */ + + /* PTYPE starts here */ + skip_bits1(&s->gb); /* split screen off */ + skip_bits1(&s->gb); /* camera off */ + skip_bits1(&s->gb); /* freeze picture release off */ + + format = get_bits1(&s->gb); + + //only 2 formats possible + if (format == 0){//QCIF + s->width = 176; + s->height = 144; + s->mb_width = 11; + s->mb_height = 9; + }else{//CIF + s->width = 352; + s->height = 288; + s->mb_width = 22; + s->mb_height = 18; + } + + s->mb_num = s->mb_width * s->mb_height; + + skip_bits1(&s->gb); /* still image mode off */ + skip_bits1(&s->gb); /* Reserved */ + + /* PEI */ + while (get_bits1(&s->gb) != 0){ + skip_bits(&s->gb, 8); + } + + // h261 has no I-FRAMES, but if we pass I_TYPE for the first frame, the codec crashes if it does + // not contain all I-blocks (e.g. when a packet is lost) + s->pict_type = P_TYPE; + + h->gob_number = 0; + return 0; +} + +static int h261_decode_gob(H261Context *h){ + MpegEncContext * const s = &h->s; + + ff_set_qscale(s, s->qscale); + + /* decode mb's */ + while(h->current_mba <= MBA_STUFFING) + { + int ret; + /* DCT & quantize */ + ret= h261_decode_mb(h); + if(ret<0){ + if(ret==SLICE_END){ + h261_decode_mb_skipped(h, h->current_mba, 33); + return 0; + } + av_log(s->avctx, AV_LOG_ERROR, "Error at MB: %d\n", s->mb_x + s->mb_y*s->mb_stride); + return -1; + } + + h261_decode_mb_skipped(h, h->current_mba-h->mba_diff, h->current_mba-1); + } + + return -1; +} + +static int h261_find_frame_end(ParseContext *pc, AVCodecContext* avctx, const uint8_t *buf, int buf_size){ + int vop_found, i, j; + uint32_t state; + + vop_found= pc->frame_start_found; + state= pc->state; + + for(i=0; i<buf_size && !vop_found; i++){ + state= (state<<8) | buf[i]; + for(j=0; j<8; j++){ + if(((state>>j)&0xFFFFF) == 0x00010){ + i++; + vop_found=1; + break; + } + } + } + if(vop_found){ + for(; i<buf_size; i++){ + state= (state<<8) | buf[i]; + for(j=0; j<8; j++){ + if(((state>>j)&0xFFFFF) == 0x00010){ + pc->frame_start_found=0; + pc->state= state>>(2*8); + return i-1; + } + } + } + } + + pc->frame_start_found= vop_found; + pc->state= state; + return END_NOT_FOUND; +} + +static int h261_parse(AVCodecParserContext *s, + AVCodecContext *avctx, + uint8_t **poutbuf, int *poutbuf_size, + const uint8_t *buf, int buf_size) +{ + ParseContext *pc = s->priv_data; + int next; + + next= h261_find_frame_end(pc,avctx, buf, buf_size); + if (ff_combine_frame(pc, next, (uint8_t **)&buf, &buf_size) < 0) { + *poutbuf = NULL; + *poutbuf_size = 0; + return buf_size; + } + *poutbuf = (uint8_t *)buf; + *poutbuf_size = buf_size; + return next; +} + +/** + * returns the number of bytes consumed for building the current frame + */ +static int get_consumed_bytes(MpegEncContext *s, int buf_size){ + int pos= get_bits_count(&s->gb)>>3; + if(pos==0) pos=1; //avoid infinite loops (i doubt thats needed but ...) + if(pos+10>buf_size) pos=buf_size; // oops ;) + + return pos; +} + +static int h261_decode_frame(AVCodecContext *avctx, + void *data, int *data_size, + uint8_t *buf, int buf_size) +{ + H261Context *h= avctx->priv_data; + MpegEncContext *s = &h->s; + int ret; + AVFrame *pict = data; + +#ifdef DEBUG + printf("*****frame %d size=%d\n", avctx->frame_number, buf_size); + printf("bytes=%x %x %x %x\n", buf[0], buf[1], buf[2], buf[3]); +#endif + s->flags= avctx->flags; + s->flags2= avctx->flags2; + + h->gob_start_code_skipped=0; + +retry: + + init_get_bits(&s->gb, buf, buf_size*8); + + if(!s->context_initialized){ + if (MPV_common_init(s) < 0) //we need the idct permutaton for reading a custom matrix + return -1; + } + + //we need to set current_picture_ptr before reading the header, otherwise we cant store anyting im there + if(s->current_picture_ptr==NULL || s->current_picture_ptr->data[0]){ + int i= ff_find_unused_picture(s, 0); + s->current_picture_ptr= &s->picture[i]; + } + + ret = h261_decode_picture_header(h); + + /* skip if the header was thrashed */ + if (ret < 0){ + av_log(s->avctx, AV_LOG_ERROR, "header damaged\n"); + return -1; + } + + if (s->width != avctx->coded_width || s->height != avctx->coded_height){ + ParseContext pc= s->parse_context; //FIXME move these demuxng hack to avformat + s->parse_context.buffer=0; + MPV_common_end(s); + s->parse_context= pc; + } + if (!s->context_initialized) { + avcodec_set_dimensions(avctx, s->width, s->height); + + goto retry; + } + + // for hurry_up==5 + s->current_picture.pict_type= s->pict_type; + s->current_picture.key_frame= s->pict_type == I_TYPE; + + /* skip everything if we are in a hurry>=5 */ + if(avctx->hurry_up>=5) return get_consumed_bytes(s, buf_size); + + if(MPV_frame_start(s, avctx) < 0) + return -1; + + ff_er_frame_start(s); + + /* decode each macroblock */ + s->mb_x=0; + s->mb_y=0; + + while(h->gob_number < (s->mb_height==18 ? 12 : 5)){ + if(ff_h261_resync(h)<0) + break; + h261_decode_gob(h); + } + MPV_frame_end(s); + +assert(s->current_picture.pict_type == s->current_picture_ptr->pict_type); +assert(s->current_picture.pict_type == s->pict_type); + *pict= *(AVFrame*)s->current_picture_ptr; + ff_print_debug_info(s, pict); + + /* Return the Picture timestamp as the frame number */ + /* we substract 1 because it is added on utils.c */ + avctx->frame_number = s->picture_number - 1; + + *data_size = sizeof(AVFrame); + + return get_consumed_bytes(s, buf_size); +} + +static int h261_decode_end(AVCodecContext *avctx) +{ + H261Context *h= avctx->priv_data; + MpegEncContext *s = &h->s; + + MPV_common_end(s); + return 0; +} + +#ifdef CONFIG_ENCODERS +AVCodec h261_encoder = { + "h261", + CODEC_TYPE_VIDEO, + CODEC_ID_H261, + sizeof(H261Context), + MPV_encode_init, + MPV_encode_picture, + MPV_encode_end, +}; +#endif + +AVCodec h261_decoder = { + "h261", + CODEC_TYPE_VIDEO, + CODEC_ID_H261, + sizeof(H261Context), + h261_decode_init, + NULL, + h261_decode_end, + h261_decode_frame, + CODEC_CAP_DR1, +}; + +AVCodecParser h261_parser = { + { CODEC_ID_H261 }, + sizeof(ParseContext), + NULL, + h261_parse, + ff_parse_close, +}; diff --git a/src/libffmpeg/libavcodec/h261data.h b/src/libffmpeg/libavcodec/h261data.h new file mode 100755 index 000000000..9ea991b23 --- /dev/null +++ b/src/libffmpeg/libavcodec/h261data.h @@ -0,0 +1,136 @@ +/** + * @file h261data.h + * H.261 tables. + */ +#define MB_TYPE_H261_FIL 0x800000 + +// H.261 VLC table for macroblock addressing +const uint8_t h261_mba_code[35] = { + 1, 3, 2, 3, + 2, 3, 2, 7, + 6, 11, 10, 9, + 8, 7, 6, 23, + 22, 21, 20, 19, + 18, 35, 34, 33, + 32, 31, 30, 29, + 28, 27, 26, 25, + 24, + 15, //(MBA stuffing) + 1 //(start code) +}; + +const uint8_t h261_mba_bits[35] = { + 1, 3, 3, 4, + 4, 5, 5, 7, + 7, 8, 8, 8, + 8, 8, 8, 10, + 10, 10, 10, 10, + 10, 11, 11, 11, + 11, 11, 11, 11, + 11, 11, 11, 11, + 11, + 11, //(MBA stuffing) + 16 //(start code) +}; + +//H.261 VLC table for macroblock type +const uint8_t h261_mtype_code[10] = { + 1, 1, 1, 1, + 1, 1, 1, 1, + 1, 1 +}; + +const uint8_t h261_mtype_bits[10] = { + 4, 7, 1, 5, + 9, 8, 10, 3, + 2, 6 +}; + +static const int h261_mtype_map[10]= { + MB_TYPE_INTRA4x4, + MB_TYPE_INTRA4x4 | MB_TYPE_QUANT, + MB_TYPE_CBP, + MB_TYPE_QUANT | MB_TYPE_CBP, + MB_TYPE_16x16, + MB_TYPE_CBP | MB_TYPE_16x16, + MB_TYPE_QUANT | MB_TYPE_CBP | MB_TYPE_16x16, + MB_TYPE_16x16 | MB_TYPE_H261_FIL, + MB_TYPE_CBP | MB_TYPE_16x16 | MB_TYPE_H261_FIL, + MB_TYPE_QUANT | MB_TYPE_CBP | MB_TYPE_16x16 | MB_TYPE_H261_FIL +}; + +//H.261 VLC table for motion vectors +const uint8_t h261_mv_tab[17][2] = { + {1,1}, {1,2}, {1,3}, {1,4}, {3,6}, {5,7}, {4,7}, {3,7}, + {11,9}, {10,9}, {9,9}, {17,10}, {16,10}, {15,10}, {14,10}, {13,10}, {12,10} +}; + +static const int mvmap[17] = +{ + 0, -1, -2, -3, -4, -5, -6, -7, -8, -9, -10, -11, -12, -13, -14, -15, -16 +}; + +//H.261 VLC table for coded block pattern +const uint8_t h261_cbp_tab[63][2] = +{ + {11,5}, {9,5}, {13,6}, {13,4}, {23,7}, {19,7}, {31,8}, {12,4}, + {22,7}, {18,7}, {30,8}, {19,5}, {27,8}, {23,8}, {19,8}, {11,4}, + {21,7}, {17,7}, {29,8}, {17,5}, {25,8}, {21,8}, {17,8}, {15,6}, + {15,8}, {13,8}, {3,9}, {15,5}, {11,8}, {7,8}, {7,9}, {10,4}, + {20,7}, {16,7}, {28,8}, {14,6}, {14,8}, {12,8}, {2,9}, {16,5}, + {24,8}, {20,8}, {16,8}, {14,5}, {10,8}, {6,8}, {6,9}, {18,5}, + {26,8}, {22,8}, {18,8}, {13,5}, {9,8}, {5,8}, {5,9}, {12,5}, + {8,8}, {4,8}, {4,9}, {7,3}, {10,5}, {8,5}, {12,6} +}; + +//H.261 VLC table for transform coefficients +const uint16_t h261_tcoeff_vlc[65][2] = { +{ 0x2, 2 }, { 0x3, 2 },{ 0x4, 4 },{ 0x5, 5 }, +{ 0x6, 7 },{ 0x26, 8 },{ 0x21, 8 },{ 0xa, 10 }, +{ 0x1d, 12 },{ 0x18, 12 },{ 0x13, 12 },{ 0x10 , 12 }, +{ 0x1a, 13},{ 0x19, 13 }, { 0x18, 13 }, { 0x17, 13 }, +{ 0x3, 3 }, { 0x6, 6 }, { 0x25 , 8 }, { 0xc, 10 }, +{ 0x1b, 12 }, { 0x16, 13 }, { 0x15, 13 }, { 0x5, 4}, +{ 0x4, 7}, { 0xb, 10 }, { 0x14, 12 }, { 0x14, 13 }, +{ 0x7, 5 }, { 0x24, 8 }, { 0x1c, 12 }, { 0x13, 13 }, +{ 0x6, 5 }, { 0xf, 10 }, { 0x12, 12}, { 0x7, 6}, +{ 0x9 , 10 }, { 0x12, 13 }, { 0x5, 6 }, { 0x1e, 12 }, +{ 0x4, 6 }, { 0x15, 12 }, { 0x7, 7 }, { 0x11, 12}, +{ 0x5, 7 }, { 0x11, 13 }, { 0x27, 8 }, { 0x10, 13 }, +{ 0x23, 8 }, { 0x22, 8 }, { 0x20, 8 }, { 0xe , 10 }, +{ 0xd, 10 }, { 0x8, 10 },{ 0x1f, 12 }, { 0x1a, 12 }, +{ 0x19, 12 }, { 0x17, 12 }, { 0x16, 12}, { 0x1f, 13}, +{ 0x1e, 13 }, { 0x1d, 13 }, { 0x1c, 13}, { 0x1b, 13}, +{ 0x1, 6 } //escape +}; + +const int8_t h261_tcoeff_level[64] = { + 0, 1, 2, 3, 4, 5, 6, 7, + 8, 9, 10, 11, 12, 13, 14, 15, + 1, 2, 3, 4, 5, 6, 7, 1, + 2, 3, 4, 5, 1, 2, 3, 4, + 1, 2, 3, 1, 2, 3, 1, 2, + 1, 2, 1, 2, 1, 2, 1, 2, + 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1 +}; + +const int8_t h261_tcoeff_run[64] = { + 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 1, + 1, 1, 1, 1, 1, 1, 2, 2, + 2, 2, 2, 3, 3, 3, 3, 4, + 4, 4, 5, 5, 5, 6, 6, 7, + 7, 8, 8, 9, 9, 10, 10, 11, + 12, 13, 14, 15, 16, 17, 18, 19, + 20, 21, 22, 23, 24, 25, 26 +}; + +static RLTable h261_rl_tcoeff = { + 64, + 64, + h261_tcoeff_vlc, + h261_tcoeff_run, + h261_tcoeff_level, +}; diff --git a/src/libffmpeg/libavcodec/h263.c b/src/libffmpeg/libavcodec/h263.c index dd14a7bef..81c3648f1 100644 --- a/src/libffmpeg/libavcodec/h263.c +++ b/src/libffmpeg/libavcodec/h263.c @@ -57,20 +57,27 @@ static void h263_encode_block(MpegEncContext * s, DCTELEM * block, int n); static void h263p_encode_umotion(MpegEncContext * s, int val); +static inline void mpeg4_encode_block(MpegEncContext * s, DCTELEM * block, + int n, int dc, uint8_t *scan_table, + PutBitContext *dc_pb, PutBitContext *ac_pb); #endif static int h263_decode_motion(MpegEncContext * s, int pred, int fcode); static int h263p_decode_umotion(MpegEncContext * s, int pred); static int h263_decode_block(MpegEncContext * s, DCTELEM * block, int n, int coded); +static inline int mpeg4_decode_dc(MpegEncContext * s, int n, int *dir_ptr); static inline int mpeg4_decode_block(MpegEncContext * s, DCTELEM * block, int n, int coded, int intra, int rvlc); +static int mpeg4_get_block_length(MpegEncContext * s, DCTELEM * block, int n, int intra_dc, + uint8_t *scan_table); static int h263_pred_dc(MpegEncContext * s, int n, uint16_t **dc_val_ptr); #ifdef CONFIG_ENCODERS static void mpeg4_encode_visual_object_header(MpegEncContext * s); static void mpeg4_encode_vol_header(MpegEncContext * s, int vo_number, int vol_number); #endif //CONFIG_ENCODERS static void mpeg4_decode_sprite_trajectory(MpegEncContext * s, GetBitContext *gb); +static inline int ff_mpeg4_pred_dc(MpegEncContext * s, int n, int level, int *dir_ptr, int encoding); #ifdef CONFIG_ENCODERS static uint8_t uni_DCtab_lum_len[512]; @@ -86,6 +93,8 @@ static uint32_t uni_mpeg4_intra_rl_bits[64*64*2*2]; static uint8_t uni_mpeg4_intra_rl_len [64*64*2*2]; static uint32_t uni_mpeg4_inter_rl_bits[64*64*2*2]; static uint8_t uni_mpeg4_inter_rl_len [64*64*2*2]; +static uint8_t uni_h263_intra_aic_rl_len [64*64*2*2]; +static uint8_t uni_h263_inter_rl_len [64*64*2*2]; //#define UNI_MPEG4_ENC_INDEX(last,run,level) ((last)*128 + (run)*256 + (level)) //#define UNI_MPEG4_ENC_INDEX(last,run,level) ((last)*128*64 + (run) + (level)*64) #define UNI_MPEG4_ENC_INDEX(last,run,level) ((last)*128*64 + (run)*128 + (level)) @@ -656,87 +665,36 @@ void ff_h263_update_motion_val(MpegEncContext * s){ } } -/** - * predicts the dc. - * encoding quantized level -> quantized diff - * decoding quantized diff -> quantized level - * @param n block index (0-3 are luma, 4-5 are chroma) - * @param dir_ptr pointer to an integer where the prediction direction will be stored - */ -static inline int ff_mpeg4_pred_dc(MpegEncContext * s, int n, int level, int *dir_ptr, int encoding) -{ - int a, b, c, wrap, pred, scale, ret; - uint16_t *dc_val; - - /* find prediction */ - if (n < 4) { - scale = s->y_dc_scale; - } else { - scale = s->c_dc_scale; - } - if(IS_3IV1) - scale= 8; - - wrap= s->block_wrap[n]; - dc_val = s->dc_val[0] + s->block_index[n]; +#ifdef CONFIG_ENCODERS - /* B C - * A X - */ - a = dc_val[ - 1]; - b = dc_val[ - 1 - wrap]; - c = dc_val[ - wrap]; +static inline int h263_get_motion_length(MpegEncContext * s, int val, int f_code){ + int l, bit_size, code; - /* outside slice handling (we cant do that by memset as we need the dc for error resilience) */ - if(s->first_slice_line && n!=3){ - if(n!=2) b=c= 1024; - if(n!=1 && s->mb_x == s->resync_mb_x) b=a= 1024; - } - if(s->mb_x == s->resync_mb_x && s->mb_y == s->resync_mb_y+1){ - if(n==0 || n==4 || n==5) - b=1024; - } - - if (abs(a - b) < abs(b - c)) { - pred = c; - *dir_ptr = 1; /* top */ + if (val == 0) { + return mvtab[0][1]; } else { - pred = a; - *dir_ptr = 0; /* left */ + bit_size = f_code - 1; + /* modulo encoding */ + l= INT_BIT - 6 - bit_size; + val = (val<<l)>>l; + val--; + code = (val >> bit_size) + 1; + + return mvtab[code][1] + 1 + bit_size; } - /* we assume pred is positive */ - pred = FASTDIV((pred + (scale >> 1)), scale); +} - if(encoding){ - ret = level - pred; +static inline void ff_h263_encode_motion_vector(MpegEncContext * s, int x, int y, int f_code){ + if(s->flags2 & CODEC_FLAG2_NO_OUTPUT){ + skip_put_bits(&s->pb, + h263_get_motion_length(s, x, f_code) + +h263_get_motion_length(s, y, f_code)); }else{ - level += pred; - ret= level; - if(s->error_resilience>=3){ - if(level<0){ - av_log(s->avctx, AV_LOG_ERROR, "dc<0 at %dx%d\n", s->mb_x, s->mb_y); - return -1; - } - if(level*scale > 2048 + scale){ - av_log(s->avctx, AV_LOG_ERROR, "dc overflow at %dx%d\n", s->mb_x, s->mb_y); - return -1; - } - } - } - level *=scale; - if(level&(~2047)){ - if(level<0) - level=0; - else if(!(s->workaround_bugs&FF_BUG_DC_CLIP)) - level=2047; + ff_h263_encode_motion(s, x, f_code); + ff_h263_encode_motion(s, y, f_code); } - dc_val[0]= level; - - return ret; } -#ifdef CONFIG_ENCODERS - static inline int get_p_cbp(MpegEncContext * s, DCTELEM block[6][64], int motion_x, int motion_y){ @@ -836,169 +794,33 @@ static inline int get_b_cbp(MpegEncContext * s, DCTELEM block[6][64], return cbp; } -/** - * encodes the dc value. - * @param n block index (0-3 are luma, 4-5 are chroma) - */ -static inline void mpeg4_encode_dc(PutBitContext * s, int level, int n) -{ -#if 1 -// if(level<-255 || level>255) printf("dc overflow\n"); - level+=256; - if (n < 4) { - /* luminance */ - put_bits(s, uni_DCtab_lum_len[level], uni_DCtab_lum_bits[level]); - } else { - /* chrominance */ - put_bits(s, uni_DCtab_chrom_len[level], uni_DCtab_chrom_bits[level]); - } -#else - int size, v; - /* find number of bits */ - size = 0; - v = abs(level); - while (v) { - v >>= 1; - size++; - } - - if (n < 4) { - /* luminance */ - put_bits(&s->pb, DCtab_lum[size][1], DCtab_lum[size][0]); - } else { - /* chrominance */ - put_bits(&s->pb, DCtab_chrom[size][1], DCtab_chrom[size][0]); - } - - /* encode remaining bits */ - if (size > 0) { - if (level < 0) - level = (-level) ^ ((1 << size) - 1); - put_bits(&s->pb, size, level); - if (size > 8) - put_bits(&s->pb, 1, 1); - } -#endif -} - -/** - * encodes a 8x8 block - * @param n block index (0-3 are luma, 4-5 are chroma) - */ -static inline void mpeg4_encode_block(MpegEncContext * s, DCTELEM * block, int n, int intra_dc, - uint8_t *scan_table, PutBitContext *dc_pb, PutBitContext *ac_pb) -{ - int i, last_non_zero; -#if 0 //variables for the outcommented version - int code, sign, last; -#endif - const RLTable *rl; - uint32_t *bits_tab; - uint8_t *len_tab; - const int last_index = s->block_last_index[n]; - - if (s->mb_intra) { //Note gcc (3.2.1 at least) will optimize this away - /* mpeg4 based DC predictor */ - mpeg4_encode_dc(dc_pb, intra_dc, n); - if(last_index<1) return; - i = 1; - rl = &rl_intra; - bits_tab= uni_mpeg4_intra_rl_bits; - len_tab = uni_mpeg4_intra_rl_len; - } else { - if(last_index<0) return; - i = 0; - rl = &rl_inter; - bits_tab= uni_mpeg4_inter_rl_bits; - len_tab = uni_mpeg4_inter_rl_len; - } - - /* AC coefs */ - last_non_zero = i - 1; -#if 1 - for (; i < last_index; i++) { - int level = block[ scan_table[i] ]; - if (level) { - int run = i - last_non_zero - 1; - level+=64; - if((level&(~127)) == 0){ - const int index= UNI_MPEG4_ENC_INDEX(0, run, level); - put_bits(ac_pb, len_tab[index], bits_tab[index]); - }else{ //ESC3 - put_bits(ac_pb, 7+2+1+6+1+12+1, (3<<23)+(3<<21)+(0<<20)+(run<<14)+(1<<13)+(((level-64)&0xfff)<<1)+1); +static inline void mpeg4_encode_blocks(MpegEncContext * s, DCTELEM block[6][64], int intra_dc[6], + uint8_t **scan_table, PutBitContext *dc_pb, PutBitContext *ac_pb){ + int i; + + if(scan_table){ + if(s->flags2 & CODEC_FLAG2_NO_OUTPUT){ + for (i = 0; i < 6; i++) { + skip_put_bits(&s->pb, mpeg4_get_block_length(s, block[i], i, intra_dc[i], scan_table[i])); + } + }else{ + /* encode each block */ + for (i = 0; i < 6; i++) { + mpeg4_encode_block(s, block[i], i, intra_dc[i], scan_table[i], dc_pb, ac_pb); } - last_non_zero = i; - } - } - /*if(i<=last_index)*/{ - int level = block[ scan_table[i] ]; - int run = i - last_non_zero - 1; - level+=64; - if((level&(~127)) == 0){ - const int index= UNI_MPEG4_ENC_INDEX(1, run, level); - put_bits(ac_pb, len_tab[index], bits_tab[index]); - }else{ //ESC3 - put_bits(ac_pb, 7+2+1+6+1+12+1, (3<<23)+(3<<21)+(1<<20)+(run<<14)+(1<<13)+(((level-64)&0xfff)<<1)+1); } - } -#else - for (; i <= last_index; i++) { - const int slevel = block[ scan_table[i] ]; - if (slevel) { - int level; - int run = i - last_non_zero - 1; - last = (i == last_index); - sign = 0; - level = slevel; - if (level < 0) { - sign = 1; - level = -level; - } - code = get_rl_index(rl, last, run, level); - put_bits(ac_pb, rl->table_vlc[code][1], rl->table_vlc[code][0]); - if (code == rl->n) { - int level1, run1; - level1 = level - rl->max_level[last][run]; - if (level1 < 1) - goto esc2; - code = get_rl_index(rl, last, run, level1); - if (code == rl->n) { - esc2: - put_bits(ac_pb, 1, 1); - if (level > MAX_LEVEL) - goto esc3; - run1 = run - rl->max_run[last][level] - 1; - if (run1 < 0) - goto esc3; - code = get_rl_index(rl, last, run1, level); - if (code == rl->n) { - esc3: - /* third escape */ - put_bits(ac_pb, 1, 1); - put_bits(ac_pb, 1, last); - put_bits(ac_pb, 6, run); - put_bits(ac_pb, 1, 1); - put_bits(ac_pb, 12, slevel & 0xfff); - put_bits(ac_pb, 1, 1); - } else { - /* second escape */ - put_bits(ac_pb, 1, 0); - put_bits(ac_pb, rl->table_vlc[code][1], rl->table_vlc[code][0]); - put_bits(ac_pb, 1, sign); - } - } else { - /* first escape */ - put_bits(ac_pb, 1, 0); - put_bits(ac_pb, rl->table_vlc[code][1], rl->table_vlc[code][0]); - put_bits(ac_pb, 1, sign); - } - } else { - put_bits(ac_pb, 1, sign); + }else{ + if(s->flags2 & CODEC_FLAG2_NO_OUTPUT){ + for (i = 0; i < 6; i++) { + skip_put_bits(&s->pb, mpeg4_get_block_length(s, block[i], i, 0, s->intra_scantable.permutated)); } - last_non_zero = i; - } + }else{ + /* encode each block */ + for (i = 0; i < 6; i++) { + mpeg4_encode_block(s, block[i], i, 0, s->intra_scantable.permutated, dc_pb, ac_pb); + } + } } -#endif } void mpeg4_encode_mb(MpegEncContext * s, @@ -1089,23 +911,22 @@ void mpeg4_encode_mb(MpegEncContext * s, if(mb_type == 0){ assert(s->mv_dir & MV_DIRECT); - ff_h263_encode_motion(s, motion_x, 1); - ff_h263_encode_motion(s, motion_y, 1); + ff_h263_encode_motion_vector(s, motion_x, motion_y, 1); s->b_count++; s->f_count++; }else{ assert(mb_type > 0 && mb_type < 4); if(s->mv_type != MV_TYPE_FIELD){ if(s->mv_dir & MV_DIR_FORWARD){ - ff_h263_encode_motion(s, s->mv[0][0][0] - s->last_mv[0][0][0], s->f_code); - ff_h263_encode_motion(s, s->mv[0][0][1] - s->last_mv[0][0][1], s->f_code); + ff_h263_encode_motion_vector(s, s->mv[0][0][0] - s->last_mv[0][0][0], + s->mv[0][0][1] - s->last_mv[0][0][1], s->f_code); s->last_mv[0][0][0]= s->last_mv[0][1][0]= s->mv[0][0][0]; s->last_mv[0][0][1]= s->last_mv[0][1][1]= s->mv[0][0][1]; s->f_count++; } if(s->mv_dir & MV_DIR_BACKWARD){ - ff_h263_encode_motion(s, s->mv[1][0][0] - s->last_mv[1][0][0], s->b_code); - ff_h263_encode_motion(s, s->mv[1][0][1] - s->last_mv[1][0][1], s->b_code); + ff_h263_encode_motion_vector(s, s->mv[1][0][0] - s->last_mv[1][0][0], + s->mv[1][0][1] - s->last_mv[1][0][1], s->b_code); s->last_mv[1][0][0]= s->last_mv[1][1][0]= s->mv[1][0][0]; s->last_mv[1][0][1]= s->last_mv[1][1][1]= s->mv[1][0][1]; s->b_count++; @@ -1121,8 +942,8 @@ void mpeg4_encode_mb(MpegEncContext * s, } if(s->mv_dir & MV_DIR_FORWARD){ for(i=0; i<2; i++){ - ff_h263_encode_motion(s, s->mv[0][i][0] - s->last_mv[0][i][0] , s->f_code); - ff_h263_encode_motion(s, s->mv[0][i][1] - s->last_mv[0][i][1]/2, s->f_code); + ff_h263_encode_motion_vector(s, s->mv[0][i][0] - s->last_mv[0][i][0] , + s->mv[0][i][1] - s->last_mv[0][i][1]/2, s->f_code); s->last_mv[0][i][0]= s->mv[0][i][0]; s->last_mv[0][i][1]= s->mv[0][i][1]*2; } @@ -1130,8 +951,8 @@ void mpeg4_encode_mb(MpegEncContext * s, } if(s->mv_dir & MV_DIR_BACKWARD){ for(i=0; i<2; i++){ - ff_h263_encode_motion(s, s->mv[1][i][0] - s->last_mv[1][i][0] , s->b_code); - ff_h263_encode_motion(s, s->mv[1][i][1] - s->last_mv[1][i][1]/2, s->b_code); + ff_h263_encode_motion_vector(s, s->mv[1][i][0] - s->last_mv[1][i][0] , + s->mv[1][i][1] - s->last_mv[1][i][1]/2, s->b_code); s->last_mv[1][i][0]= s->mv[1][i][0]; s->last_mv[1][i][1]= s->mv[1][i][1]*2; } @@ -1144,10 +965,7 @@ void mpeg4_encode_mb(MpegEncContext * s, s->mv_bits+= get_bits_diff(s); } - /* encode each block */ - for (i = 0; i < 6; i++) { - mpeg4_encode_block(s, block[i], i, 0, s->intra_scantable.permutated, NULL, &s->pb); - } + mpeg4_encode_blocks(s, block, NULL, NULL, NULL, &s->pb); if(interleaved_stats){ s->p_tex_bits+= get_bits_diff(s); @@ -1231,8 +1049,8 @@ void mpeg4_encode_mb(MpegEncContext * s, /* motion vectors: 16x16 mode */ h263_pred_motion(s, 0, 0, &pred_x, &pred_y); - ff_h263_encode_motion(s, motion_x - pred_x, s->f_code); - ff_h263_encode_motion(s, motion_y - pred_y, s->f_code); + ff_h263_encode_motion_vector(s, motion_x - pred_x, + motion_y - pred_y, s->f_code); }else if(s->mv_type==MV_TYPE_FIELD){ if(s->dquant) cbpc+= 8; put_bits(&s->pb, @@ -1259,10 +1077,10 @@ void mpeg4_encode_mb(MpegEncContext * s, put_bits(&s->pb, 1, s->field_select[0][0]); put_bits(&s->pb, 1, s->field_select[0][1]); - ff_h263_encode_motion(s, s->mv[0][0][0] - pred_x, s->f_code); - ff_h263_encode_motion(s, s->mv[0][0][1] - pred_y, s->f_code); - ff_h263_encode_motion(s, s->mv[0][1][0] - pred_x, s->f_code); - ff_h263_encode_motion(s, s->mv[0][1][1] - pred_y, s->f_code); + ff_h263_encode_motion_vector(s, s->mv[0][0][0] - pred_x, + s->mv[0][0][1] - pred_y, s->f_code); + ff_h263_encode_motion_vector(s, s->mv[0][1][0] - pred_x, + s->mv[0][1][1] - pred_y, s->f_code); }else{ assert(s->mv_type==MV_TYPE_8X8); put_bits(&s->pb, @@ -1283,8 +1101,8 @@ void mpeg4_encode_mb(MpegEncContext * s, /* motion vectors: 8x8 mode*/ h263_pred_motion(s, i, 0, &pred_x, &pred_y); - ff_h263_encode_motion(s, s->current_picture.motion_val[0][ s->block_index[i] ][0] - pred_x, s->f_code); - ff_h263_encode_motion(s, s->current_picture.motion_val[0][ s->block_index[i] ][1] - pred_y, s->f_code); + ff_h263_encode_motion_vector(s, s->current_picture.motion_val[0][ s->block_index[i] ][0] - pred_x, + s->current_picture.motion_val[0][ s->block_index[i] ][1] - pred_y, s->f_code); } } @@ -1292,10 +1110,7 @@ void mpeg4_encode_mb(MpegEncContext * s, s->mv_bits+= get_bits_diff(s); } - /* encode each block */ - for (i = 0; i < 6; i++) { - mpeg4_encode_block(s, block[i], i, 0, s->intra_scantable.permutated, NULL, tex_pb); - } + mpeg4_encode_blocks(s, block, NULL, NULL, NULL, tex_pb); if(interleaved_stats){ s->p_tex_bits+= get_bits_diff(s); @@ -1357,10 +1172,7 @@ void mpeg4_encode_mb(MpegEncContext * s, s->misc_bits+= get_bits_diff(s); } - /* encode each block */ - for (i = 0; i < 6; i++) { - mpeg4_encode_block(s, block[i], i, dc_diff[i], scan_table[i], dc_pb, tex_pb); - } + mpeg4_encode_blocks(s, block, dc_diff, scan_table, dc_pb, tex_pb); if(interleaved_stats){ s->i_tex_bits+= get_bits_diff(s); @@ -1373,8 +1185,6 @@ void mpeg4_encode_mb(MpegEncContext * s, } } - - void h263_encode_mb(MpegEncContext * s, DCTELEM block[6][64], int motion_x, int motion_y) @@ -1426,8 +1236,8 @@ void h263_encode_mb(MpegEncContext * s, h263_pred_motion(s, 0, 0, &pred_x, &pred_y); if (!s->umvplus) { - ff_h263_encode_motion(s, motion_x - pred_x, 1); - ff_h263_encode_motion(s, motion_y - pred_y, 1); + ff_h263_encode_motion_vector(s, motion_x - pred_x, + motion_y - pred_y, 1); } else { h263p_encode_umotion(s, motion_x - pred_x); @@ -1455,8 +1265,8 @@ void h263_encode_mb(MpegEncContext * s, motion_x= s->current_picture.motion_val[0][ s->block_index[i] ][0]; motion_y= s->current_picture.motion_val[0][ s->block_index[i] ][1]; if (!s->umvplus) { - ff_h263_encode_motion(s, motion_x - pred_x, 1); - ff_h263_encode_motion(s, motion_y - pred_y, 1); + ff_h263_encode_motion_vector(s, motion_x - pred_x, + motion_y - pred_y, 1); } else { h263p_encode_umotion(s, motion_x - pred_x); @@ -1636,7 +1446,7 @@ void ff_h263_loop_filter(MpegEncContext * s){ const int chroma_qp= s->chroma_qscale_table[qp_dt]; s->dsp.h263_h_loop_filter(dest_y -8*linesize , linesize, qp_dt); s->dsp.h263_h_loop_filter(dest_cb-8*uvlinesize, uvlinesize, chroma_qp); - s->dsp.h263_h_loop_filter(dest_cb-8*uvlinesize, uvlinesize, chroma_qp); + s->dsp.h263_h_loop_filter(dest_cr-8*uvlinesize, uvlinesize, chroma_qp); } } } @@ -1871,30 +1681,12 @@ void ff_h263_encode_motion(MpegEncContext * s, int val, int f_code) bit_size = f_code - 1; range = 1 << bit_size; /* modulo encoding */ - l = range * 32; -#if 1 - val+= l; - val&= 2*l-1; - val-= l; + l= INT_BIT - 6 - bit_size; + val = (val<<l)>>l; sign = val>>31; val= (val^sign)-sign; sign&=1; -#else - if (val < -l) { - val += 2*l; - } else if (val >= l) { - val -= 2*l; - } - assert(val>=-l && val<l); - - if (val >= 0) { - sign = 0; - } else { - val = -val; - sign = 1; - } -#endif val--; code = (val >> bit_size) + 1; bits = val & (range - 1); @@ -1904,7 +1696,6 @@ void ff_h263_encode_motion(MpegEncContext * s, int val, int f_code) put_bits(&s->pb, bit_size, bits); } } - } /* Encode MV differences on H.263+ with Unrestricted MV mode */ @@ -2135,6 +1926,49 @@ static void init_uni_mpeg4_rl_tab(RLTable *rl, uint32_t *bits_tab, uint8_t *len_ } } +static void init_uni_h263_rl_tab(RLTable *rl, uint32_t *bits_tab, uint8_t *len_tab){ + int slevel, run, last; + + assert(MAX_LEVEL >= 64); + assert(MAX_RUN >= 63); + + for(slevel=-64; slevel<64; slevel++){ + if(slevel==0) continue; + for(run=0; run<64; run++){ + for(last=0; last<=1; last++){ + const int index= UNI_MPEG4_ENC_INDEX(last, run, slevel+64); + int level= slevel < 0 ? -slevel : slevel; + int sign= slevel < 0 ? 1 : 0; + int bits, len, code; + + len_tab[index]= 100; + + /* ESC0 */ + code= get_rl_index(rl, last, run, level); + bits= rl->table_vlc[code][0]; + len= rl->table_vlc[code][1]; + bits=bits*2+sign; len++; + + if(code!=rl->n && len < len_tab[index]){ + if(bits_tab) bits_tab[index]= bits; + len_tab [index]= len; + } + /* ESC */ + bits= rl->table_vlc[rl->n][0]; + len = rl->table_vlc[rl->n][1]; + bits=bits*2+last; len++; + bits=bits*64+run; len+=6; + bits=bits*256+(level&0xff); len+=8; + + if(len < len_tab[index]){ + if(bits_tab) bits_tab[index]= bits; + len_tab [index]= len; + } + } + } + } +} + void h263_encode_init(MpegEncContext *s) { static int done = 0; @@ -2144,17 +1978,28 @@ void h263_encode_init(MpegEncContext *s) init_uni_dc_tab(); - init_rl(&rl_inter); - init_rl(&rl_intra); - init_rl(&rl_intra_aic); + init_rl(&rl_inter, 1); + init_rl(&rl_intra, 1); + init_rl(&rl_intra_aic, 1); init_uni_mpeg4_rl_tab(&rl_intra, uni_mpeg4_intra_rl_bits, uni_mpeg4_intra_rl_len); init_uni_mpeg4_rl_tab(&rl_inter, uni_mpeg4_inter_rl_bits, uni_mpeg4_inter_rl_len); + init_uni_h263_rl_tab(&rl_intra_aic, NULL, uni_h263_intra_aic_rl_len); + init_uni_h263_rl_tab(&rl_inter , NULL, uni_h263_inter_rl_len); + init_mv_penalty_and_fcode(s); } s->me.mv_penalty= mv_penalty; //FIXME exact table for msmpeg4 & h263p + s->intra_ac_vlc_length =s->inter_ac_vlc_length = uni_h263_inter_rl_len; + s->intra_ac_vlc_last_length=s->inter_ac_vlc_last_length= uni_h263_inter_rl_len + 128*64; + if(s->h263_aic){ + s->intra_ac_vlc_length = uni_h263_intra_aic_rl_len; + s->intra_ac_vlc_last_length= uni_h263_intra_aic_rl_len + 128*64; + } + s->ac_esc_length= 7+1+6+8; + // use fcodes >1 only for mpeg4 & h263 & h263p FIXME switch(s->codec_id){ case CODEC_ID_MPEG4: @@ -2176,7 +2021,8 @@ void h263_encode_init(MpegEncContext *s) s->avctx->extradata= av_malloc(1024); init_put_bits(&s->pb, s->avctx->extradata, 1024); - mpeg4_encode_visual_object_header(s); + if(!(s->workaround_bugs & FF_BUG_MS)) + mpeg4_encode_visual_object_header(s); mpeg4_encode_vol_header(s, 0, 0); // ff_mpeg4_stuffing(&s->pb); ? @@ -2371,11 +2217,13 @@ void ff_set_mpeg4_time(MpegEncContext * s, int picture_number){ if(s->pict_type==B_TYPE){ s->pb_time= s->pp_time - (s->last_non_b_time - s->time); + assert(s->pb_time > 0 && s->pb_time < s->pp_time); }else{ s->last_time_base= s->time_base; s->time_base= time_div; s->pp_time= s->time - s->last_non_b_time; s->last_non_b_time= s->time; + assert(picture_number==0 || s->pp_time > 0); } } @@ -2413,13 +2261,26 @@ static void mpeg4_encode_visual_object_header(MpegEncContext * s){ int profile_and_level_indication; int vo_ver_id; - if(s->max_b_frames || s->quarter_sample){ - profile_and_level_indication= 0xF1; // adv simple level 1 + if(s->avctx->profile != FF_PROFILE_UNKNOWN){ + profile_and_level_indication = s->avctx->profile << 4; + }else if(s->max_b_frames || s->quarter_sample){ + profile_and_level_indication= 0xF0; // adv simple + }else{ + profile_and_level_indication= 0x00; // simple + } + + if(s->avctx->level != FF_LEVEL_UNKNOWN){ + profile_and_level_indication |= s->avctx->level; + }else{ + profile_and_level_indication |= 1; //level 1 + } + + if(profile_and_level_indication>>4 == 0xF){ vo_ver_id= 5; }else{ - profile_and_level_indication= 0x01; // simple level 1 vo_ver_id= 1; } + //FIXME levels put_bits(&s->pb, 16, 0); @@ -2460,9 +2321,13 @@ static void mpeg4_encode_vol_header(MpegEncContext * s, int vo_number, int vol_n put_bits(&s->pb, 1, 0); /* random access vol */ put_bits(&s->pb, 8, s->vo_type); /* video obj type indication */ - put_bits(&s->pb, 1, 1); /* is obj layer id= yes */ - put_bits(&s->pb, 4, vo_ver_id); /* is obj layer ver id */ - put_bits(&s->pb, 3, 1); /* is obj layer priority */ + if(s->workaround_bugs & FF_BUG_MS) { + put_bits(&s->pb, 1, 0); /* is obj layer id= no */ + } else { + put_bits(&s->pb, 1, 1); /* is obj layer id= yes */ + put_bits(&s->pb, 4, vo_ver_id); /* is obj layer ver id */ + put_bits(&s->pb, 3, 1); /* is obj layer priority */ + } aspect_to_info(s, s->avctx->sample_aspect_ratio); @@ -2472,13 +2337,13 @@ static void mpeg4_encode_vol_header(MpegEncContext * s, int vo_number, int vol_n put_bits(&s->pb, 8, s->avctx->sample_aspect_ratio.den); } - if(s->low_delay){ - put_bits(&s->pb, 1, 1); /* vol control parameters= yes */ - put_bits(&s->pb, 2, 1); /* chroma format YUV 420/YV12 */ + if(s->workaround_bugs & FF_BUG_MS) { // + put_bits(&s->pb, 1, 0); /* vol control parameters= no @@@ */ + } else { + put_bits(&s->pb, 1, 1); /* vol control parameters= yes */ + put_bits(&s->pb, 2, 1); /* chroma format YUV 420/YV12 */ put_bits(&s->pb, 1, s->low_delay); - put_bits(&s->pb, 1, 0); /* vbv parameters= no */ - }else{ - put_bits(&s->pb, 1, 0); /* vol control parameters= no */ + put_bits(&s->pb, 1, 0); /* vbv parameters= no */ } put_bits(&s->pb, 2, RECT_SHAPE); /* vol shape= rectangle */ @@ -2549,7 +2414,8 @@ void mpeg4_encode_picture_header(MpegEncContext * s, int picture_number) if(s->strict_std_compliance < 2 || picture_number==0) //HACK, the reference sw is buggy mpeg4_encode_vol_header(s, 0, 0); } - mpeg4_encode_gop_header(s); + if(!(s->workaround_bugs & FF_BUG_MS)) + mpeg4_encode_gop_header(s); } s->partitioned_frame= s->data_partitioning && s->pict_type!=B_TYPE; @@ -2612,6 +2478,84 @@ void ff_set_qscale(MpegEncContext * s, int qscale) s->c_dc_scale= s->c_dc_scale_table[ s->chroma_qscale ]; } +/** + * predicts the dc. + * encoding quantized level -> quantized diff + * decoding quantized diff -> quantized level + * @param n block index (0-3 are luma, 4-5 are chroma) + * @param dir_ptr pointer to an integer where the prediction direction will be stored + */ +static inline int ff_mpeg4_pred_dc(MpegEncContext * s, int n, int level, int *dir_ptr, int encoding) +{ + int a, b, c, wrap, pred, scale, ret; + uint16_t *dc_val; + + /* find prediction */ + if (n < 4) { + scale = s->y_dc_scale; + } else { + scale = s->c_dc_scale; + } + if(IS_3IV1) + scale= 8; + + wrap= s->block_wrap[n]; + dc_val = s->dc_val[0] + s->block_index[n]; + + /* B C + * A X + */ + a = dc_val[ - 1]; + b = dc_val[ - 1 - wrap]; + c = dc_val[ - wrap]; + + /* outside slice handling (we cant do that by memset as we need the dc for error resilience) */ + if(s->first_slice_line && n!=3){ + if(n!=2) b=c= 1024; + if(n!=1 && s->mb_x == s->resync_mb_x) b=a= 1024; + } + if(s->mb_x == s->resync_mb_x && s->mb_y == s->resync_mb_y+1){ + if(n==0 || n==4 || n==5) + b=1024; + } + + if (abs(a - b) < abs(b - c)) { + pred = c; + *dir_ptr = 1; /* top */ + } else { + pred = a; + *dir_ptr = 0; /* left */ + } + /* we assume pred is positive */ + pred = FASTDIV((pred + (scale >> 1)), scale); + + if(encoding){ + ret = level - pred; + }else{ + level += pred; + ret= level; + if(s->error_resilience>=3){ + if(level<0){ + av_log(s->avctx, AV_LOG_ERROR, "dc<0 at %dx%d\n", s->mb_x, s->mb_y); + return -1; + } + if(level*scale > 2048 + scale){ + av_log(s->avctx, AV_LOG_ERROR, "dc overflow at %dx%d\n", s->mb_x, s->mb_y); + return -1; + } + } + } + level *=scale; + if(level&(~2047)){ + if(level<0) + level=0; + else if(!(s->workaround_bugs&FF_BUG_DC_CLIP)) + level=2047; + } + dc_val[0]= level; + + return ret; +} /** * predicts the ac. @@ -2675,11 +2619,180 @@ void mpeg4_pred_ac(MpegEncContext * s, DCTELEM *block, int n, #ifdef CONFIG_ENCODERS +/** + * encodes the dc value. + * @param n block index (0-3 are luma, 4-5 are chroma) + */ +static inline void mpeg4_encode_dc(PutBitContext * s, int level, int n) +{ +#if 1 +// if(level<-255 || level>255) printf("dc overflow\n"); + level+=256; + if (n < 4) { + /* luminance */ + put_bits(s, uni_DCtab_lum_len[level], uni_DCtab_lum_bits[level]); + } else { + /* chrominance */ + put_bits(s, uni_DCtab_chrom_len[level], uni_DCtab_chrom_bits[level]); + } +#else + int size, v; + /* find number of bits */ + size = 0; + v = abs(level); + while (v) { + v >>= 1; + size++; + } + + if (n < 4) { + /* luminance */ + put_bits(&s->pb, DCtab_lum[size][1], DCtab_lum[size][0]); + } else { + /* chrominance */ + put_bits(&s->pb, DCtab_chrom[size][1], DCtab_chrom[size][0]); + } + + /* encode remaining bits */ + if (size > 0) { + if (level < 0) + level = (-level) ^ ((1 << size) - 1); + put_bits(&s->pb, size, level); + if (size > 8) + put_bits(&s->pb, 1, 1); + } +#endif +} +static inline int mpeg4_get_dc_length(int level, int n){ + if (n < 4) { + return uni_DCtab_lum_len[level + 256]; + } else { + return uni_DCtab_chrom_len[level + 256]; + } +} +/** + * encodes a 8x8 block + * @param n block index (0-3 are luma, 4-5 are chroma) + */ +static inline void mpeg4_encode_block(MpegEncContext * s, DCTELEM * block, int n, int intra_dc, + uint8_t *scan_table, PutBitContext *dc_pb, PutBitContext *ac_pb) +{ + int i, last_non_zero; +#if 0 //variables for the outcommented version + int code, sign, last; +#endif + const RLTable *rl; + uint32_t *bits_tab; + uint8_t *len_tab; + const int last_index = s->block_last_index[n]; + if (s->mb_intra) { //Note gcc (3.2.1 at least) will optimize this away + /* mpeg4 based DC predictor */ + mpeg4_encode_dc(dc_pb, intra_dc, n); + if(last_index<1) return; + i = 1; + rl = &rl_intra; + bits_tab= uni_mpeg4_intra_rl_bits; + len_tab = uni_mpeg4_intra_rl_len; + } else { + if(last_index<0) return; + i = 0; + rl = &rl_inter; + bits_tab= uni_mpeg4_inter_rl_bits; + len_tab = uni_mpeg4_inter_rl_len; + } + + /* AC coefs */ + last_non_zero = i - 1; +#if 1 + for (; i < last_index; i++) { + int level = block[ scan_table[i] ]; + if (level) { + int run = i - last_non_zero - 1; + level+=64; + if((level&(~127)) == 0){ + const int index= UNI_MPEG4_ENC_INDEX(0, run, level); + put_bits(ac_pb, len_tab[index], bits_tab[index]); + }else{ //ESC3 + put_bits(ac_pb, 7+2+1+6+1+12+1, (3<<23)+(3<<21)+(0<<20)+(run<<14)+(1<<13)+(((level-64)&0xfff)<<1)+1); + } + last_non_zero = i; + } + } + /*if(i<=last_index)*/{ + int level = block[ scan_table[i] ]; + int run = i - last_non_zero - 1; + level+=64; + if((level&(~127)) == 0){ + const int index= UNI_MPEG4_ENC_INDEX(1, run, level); + put_bits(ac_pb, len_tab[index], bits_tab[index]); + }else{ //ESC3 + put_bits(ac_pb, 7+2+1+6+1+12+1, (3<<23)+(3<<21)+(1<<20)+(run<<14)+(1<<13)+(((level-64)&0xfff)<<1)+1); + } + } +#else + for (; i <= last_index; i++) { + const int slevel = block[ scan_table[i] ]; + if (slevel) { + int level; + int run = i - last_non_zero - 1; + last = (i == last_index); + sign = 0; + level = slevel; + if (level < 0) { + sign = 1; + level = -level; + } + code = get_rl_index(rl, last, run, level); + put_bits(ac_pb, rl->table_vlc[code][1], rl->table_vlc[code][0]); + if (code == rl->n) { + int level1, run1; + level1 = level - rl->max_level[last][run]; + if (level1 < 1) + goto esc2; + code = get_rl_index(rl, last, run, level1); + if (code == rl->n) { + esc2: + put_bits(ac_pb, 1, 1); + if (level > MAX_LEVEL) + goto esc3; + run1 = run - rl->max_run[last][level] - 1; + if (run1 < 0) + goto esc3; + code = get_rl_index(rl, last, run1, level); + if (code == rl->n) { + esc3: + /* third escape */ + put_bits(ac_pb, 1, 1); + put_bits(ac_pb, 1, last); + put_bits(ac_pb, 6, run); + put_bits(ac_pb, 1, 1); + put_bits(ac_pb, 12, slevel & 0xfff); + put_bits(ac_pb, 1, 1); + } else { + /* second escape */ + put_bits(ac_pb, 1, 0); + put_bits(ac_pb, rl->table_vlc[code][1], rl->table_vlc[code][0]); + put_bits(ac_pb, 1, sign); + } + } else { + /* first escape */ + put_bits(ac_pb, 1, 0); + put_bits(ac_pb, rl->table_vlc[code][1], rl->table_vlc[code][0]); + put_bits(ac_pb, 1, sign); + } + } else { + put_bits(ac_pb, 1, sign); + } + last_non_zero = i; + } + } +#endif +} -static inline int mpeg4_get_block_length(MpegEncContext * s, DCTELEM * block, int n, int intra_dc, +static int mpeg4_get_block_length(MpegEncContext * s, DCTELEM * block, int n, int intra_dc, uint8_t *scan_table) { int i, last_non_zero; @@ -2690,7 +2803,7 @@ static inline int mpeg4_get_block_length(MpegEncContext * s, DCTELEM * block, in if (s->mb_intra) { //Note gcc (3.2.1 at least) will optimize this away /* mpeg4 based DC predictor */ - //mpeg4_encode_dc(dc_pb, intra_dc, n); //FIXME + len += mpeg4_get_dc_length(intra_dc, n); if(last_index<1) return len; i = 1; rl = &rl_intra; @@ -2749,13 +2862,17 @@ static VLC mb_type_b_vlc; static VLC h263_mbtype_b_vlc; static VLC cbpc_b_vlc; -void init_vlc_rl(RLTable *rl) +void init_vlc_rl(RLTable *rl, int use_static) { int i, q; - + + /* Return if static table is already initialized */ + if(use_static && rl->rl_vlc[0]) + return; + init_vlc(&rl->vlc, 9, rl->n + 1, &rl->table_vlc[0][1], 4, 2, - &rl->table_vlc[0][0], 4, 2); + &rl->table_vlc[0][0], 4, 2, use_static); for(q=0; q<32; q++){ @@ -2766,8 +2883,10 @@ void init_vlc_rl(RLTable *rl) qmul=1; qadd=0; } - - rl->rl_vlc[q]= av_malloc(rl->vlc.table_size*sizeof(RL_VLC_ELEM)); + if(use_static) + rl->rl_vlc[q]= av_mallocz_static(rl->vlc.table_size*sizeof(RL_VLC_ELEM)); + else + rl->rl_vlc[q]= av_malloc(rl->vlc.table_size*sizeof(RL_VLC_ELEM)); for(i=0; i<rl->vlc.table_size; i++){ int code= rl->vlc.table[i][0]; int len = rl->vlc.table[i][1]; @@ -2808,44 +2927,44 @@ void h263_decode_init_vlc(MpegEncContext *s) init_vlc(&intra_MCBPC_vlc, INTRA_MCBPC_VLC_BITS, 9, intra_MCBPC_bits, 1, 1, - intra_MCBPC_code, 1, 1); + intra_MCBPC_code, 1, 1, 1); init_vlc(&inter_MCBPC_vlc, INTER_MCBPC_VLC_BITS, 28, inter_MCBPC_bits, 1, 1, - inter_MCBPC_code, 1, 1); + inter_MCBPC_code, 1, 1, 1); init_vlc(&cbpy_vlc, CBPY_VLC_BITS, 16, &cbpy_tab[0][1], 2, 1, - &cbpy_tab[0][0], 2, 1); + &cbpy_tab[0][0], 2, 1, 1); init_vlc(&mv_vlc, MV_VLC_BITS, 33, &mvtab[0][1], 2, 1, - &mvtab[0][0], 2, 1); - init_rl(&rl_inter); - init_rl(&rl_intra); - init_rl(&rvlc_rl_inter); - init_rl(&rvlc_rl_intra); - init_rl(&rl_intra_aic); - init_vlc_rl(&rl_inter); - init_vlc_rl(&rl_intra); - init_vlc_rl(&rvlc_rl_inter); - init_vlc_rl(&rvlc_rl_intra); - init_vlc_rl(&rl_intra_aic); + &mvtab[0][0], 2, 1, 1); + init_rl(&rl_inter, 1); + init_rl(&rl_intra, 1); + init_rl(&rvlc_rl_inter, 1); + init_rl(&rvlc_rl_intra, 1); + init_rl(&rl_intra_aic, 1); + init_vlc_rl(&rl_inter, 1); + init_vlc_rl(&rl_intra, 1); + init_vlc_rl(&rvlc_rl_inter, 1); + init_vlc_rl(&rvlc_rl_intra, 1); + init_vlc_rl(&rl_intra_aic, 1); init_vlc(&dc_lum, DC_VLC_BITS, 10 /* 13 */, &DCtab_lum[0][1], 2, 1, - &DCtab_lum[0][0], 2, 1); + &DCtab_lum[0][0], 2, 1, 1); init_vlc(&dc_chrom, DC_VLC_BITS, 10 /* 13 */, &DCtab_chrom[0][1], 2, 1, - &DCtab_chrom[0][0], 2, 1); + &DCtab_chrom[0][0], 2, 1, 1); init_vlc(&sprite_trajectory, SPRITE_TRAJ_VLC_BITS, 15, &sprite_trajectory_tab[0][1], 4, 2, - &sprite_trajectory_tab[0][0], 4, 2); + &sprite_trajectory_tab[0][0], 4, 2, 1); init_vlc(&mb_type_b_vlc, MB_TYPE_B_VLC_BITS, 4, &mb_type_b_tab[0][1], 2, 1, - &mb_type_b_tab[0][0], 2, 1); + &mb_type_b_tab[0][0], 2, 1, 1); init_vlc(&h263_mbtype_b_vlc, H263_MBTYPE_B_VLC_BITS, 15, &h263_mbtype_b_tab[0][1], 2, 1, - &h263_mbtype_b_tab[0][0], 2, 1); + &h263_mbtype_b_tab[0][0], 2, 1, 1); init_vlc(&cbpc_b_vlc, CBPC_B_VLC_BITS, 4, &cbpc_b_tab[0][1], 2, 1, - &cbpc_b_tab[0][0], 2, 1); + &cbpc_b_tab[0][0], 2, 1, 1); } } @@ -2955,7 +3074,7 @@ void ff_mpeg4_init_partitions(MpegEncContext *s) uint8_t *start= pbBufPtr(&s->pb); uint8_t *end= s->pb.buf_end; int size= end - start; - int pb_size = (((int)start + size/3)&(~3)) - (int)start; + int pb_size = (((long)start + size/3)&(~3)) - (long)start; int tex_size= (size - 2*pb_size)&(~3); set_put_bits_buffer_size(&s->pb, pb_size); @@ -3274,53 +3393,6 @@ static inline int get_amv(MpegEncContext *s, int n){ } /** - * decodes the dc value. - * @param n block index (0-3 are luma, 4-5 are chroma) - * @param dir_ptr the prediction direction will be stored here - * @return the quantized dc - */ -static inline int mpeg4_decode_dc(MpegEncContext * s, int n, int *dir_ptr) -{ - int level, code; - - if (n < 4) - code = get_vlc2(&s->gb, dc_lum.table, DC_VLC_BITS, 1); - else - code = get_vlc2(&s->gb, dc_chrom.table, DC_VLC_BITS, 1); - if (code < 0 || code > 9 /* && s->nbit<9 */){ - av_log(s->avctx, AV_LOG_ERROR, "illegal dc vlc\n"); - return -1; - } - if (code == 0) { - level = 0; - } else { - if(IS_3IV1){ - if(code==1) - level= 2*get_bits1(&s->gb)-1; - else{ - if(get_bits1(&s->gb)) - level = get_bits(&s->gb, code-1) + (1<<(code-1)); - else - level = -get_bits(&s->gb, code-1) - (1<<(code-1)); - } - }else{ - level = get_xbits(&s->gb, code); - } - - if (code > 8){ - if(get_bits1(&s->gb)==0){ /* marker */ - if(s->error_resilience>=2){ - av_log(s->avctx, AV_LOG_ERROR, "dc marker bit missing\n"); - return -1; - } - } - } - } - - return ff_mpeg4_pred_dc(s, n, level, dir_ptr, 0); -} - -/** * decodes first partition. * @return number of MBs decoded or <0 if an error occured */ @@ -3626,263 +3698,6 @@ int ff_mpeg4_decode_partitions(MpegEncContext *s) } /** - * decodes a block. - * @return <0 if an error occured - */ -static inline int mpeg4_decode_block(MpegEncContext * s, DCTELEM * block, - int n, int coded, int intra, int rvlc) -{ - int level, i, last, run; - int dc_pred_dir; - RLTable * rl; - RL_VLC_ELEM * rl_vlc; - const uint8_t * scan_table; - int qmul, qadd; - - //Note intra & rvlc should be optimized away if this is inlined - - if(intra) { - if(s->qscale < s->intra_dc_threshold){ - /* DC coef */ - if(s->partitioned_frame){ - level = s->dc_val[0][ s->block_index[n] ]; - if(n<4) level= FASTDIV((level + (s->y_dc_scale>>1)), s->y_dc_scale); - else level= FASTDIV((level + (s->c_dc_scale>>1)), s->c_dc_scale); - dc_pred_dir= (s->pred_dir_table[s->mb_x + s->mb_y*s->mb_stride]<<n)&32; - }else{ - level = mpeg4_decode_dc(s, n, &dc_pred_dir); - if (level < 0) - return -1; - } - block[0] = level; - i = 0; - }else{ - i = -1; - } - if (!coded) - goto not_coded; - - if(rvlc){ - rl = &rvlc_rl_intra; - rl_vlc = rvlc_rl_intra.rl_vlc[0]; - }else{ - rl = &rl_intra; - rl_vlc = rl_intra.rl_vlc[0]; - } - if (s->ac_pred) { - if (dc_pred_dir == 0) - scan_table = s->intra_v_scantable.permutated; /* left */ - else - scan_table = s->intra_h_scantable.permutated; /* top */ - } else { - scan_table = s->intra_scantable.permutated; - } - qmul=1; - qadd=0; - } else { - i = -1; - if (!coded) { - s->block_last_index[n] = i; - return 0; - } - if(rvlc) rl = &rvlc_rl_inter; - else rl = &rl_inter; - - scan_table = s->intra_scantable.permutated; - - if(s->mpeg_quant){ - qmul=1; - qadd=0; - if(rvlc){ - rl_vlc = rvlc_rl_inter.rl_vlc[0]; - }else{ - rl_vlc = rl_inter.rl_vlc[0]; - } - }else{ - qmul = s->qscale << 1; - qadd = (s->qscale - 1) | 1; - if(rvlc){ - rl_vlc = rvlc_rl_inter.rl_vlc[s->qscale]; - }else{ - rl_vlc = rl_inter.rl_vlc[s->qscale]; - } - } - } - { - OPEN_READER(re, &s->gb); - for(;;) { - UPDATE_CACHE(re, &s->gb); - GET_RL_VLC(level, run, re, &s->gb, rl_vlc, TEX_VLC_BITS, 2); - if (level==0) { - /* escape */ - if(rvlc){ - if(SHOW_UBITS(re, &s->gb, 1)==0){ - av_log(s->avctx, AV_LOG_ERROR, "1. marker bit missing in rvlc esc\n"); - return -1; - }; SKIP_CACHE(re, &s->gb, 1); - - last= SHOW_UBITS(re, &s->gb, 1); SKIP_CACHE(re, &s->gb, 1); - run= SHOW_UBITS(re, &s->gb, 6); LAST_SKIP_CACHE(re, &s->gb, 6); - SKIP_COUNTER(re, &s->gb, 1+1+6); - UPDATE_CACHE(re, &s->gb); - - if(SHOW_UBITS(re, &s->gb, 1)==0){ - av_log(s->avctx, AV_LOG_ERROR, "2. marker bit missing in rvlc esc\n"); - return -1; - }; SKIP_CACHE(re, &s->gb, 1); - - level= SHOW_UBITS(re, &s->gb, 11); SKIP_CACHE(re, &s->gb, 11); - - if(SHOW_UBITS(re, &s->gb, 5)!=0x10){ - av_log(s->avctx, AV_LOG_ERROR, "reverse esc missing\n"); - return -1; - }; SKIP_CACHE(re, &s->gb, 5); - - level= level * qmul + qadd; - level = (level ^ SHOW_SBITS(re, &s->gb, 1)) - SHOW_SBITS(re, &s->gb, 1); LAST_SKIP_CACHE(re, &s->gb, 1); - SKIP_COUNTER(re, &s->gb, 1+11+5+1); - - i+= run + 1; - if(last) i+=192; - }else{ - int cache; - cache= GET_CACHE(re, &s->gb); - - if(IS_3IV1) - cache ^= 0xC0000000; - - if (cache&0x80000000) { - if (cache&0x40000000) { - int ulevel; - - /* third escape */ - SKIP_CACHE(re, &s->gb, 2); - last= SHOW_UBITS(re, &s->gb, 1); SKIP_CACHE(re, &s->gb, 1); - run= SHOW_UBITS(re, &s->gb, 6); LAST_SKIP_CACHE(re, &s->gb, 6); - SKIP_COUNTER(re, &s->gb, 2+1+6); - UPDATE_CACHE(re, &s->gb); - - if(IS_3IV1){ - level= SHOW_SBITS(re, &s->gb, 12); LAST_SKIP_BITS(re, &s->gb, 12); - }else{ - if(SHOW_UBITS(re, &s->gb, 1)==0){ - av_log(s->avctx, AV_LOG_ERROR, "1. marker bit missing in 3. esc\n"); - return -1; - }; SKIP_CACHE(re, &s->gb, 1); - - level= SHOW_SBITS(re, &s->gb, 12); SKIP_CACHE(re, &s->gb, 12); - - if(SHOW_UBITS(re, &s->gb, 1)==0){ - av_log(s->avctx, AV_LOG_ERROR, "2. marker bit missing in 3. esc\n"); - return -1; - }; LAST_SKIP_CACHE(re, &s->gb, 1); - - SKIP_COUNTER(re, &s->gb, 1+12+1); - } - - if(s->mpeg_quant){ - if(intra) ulevel= level*s->qscale*s->intra_matrix[scan_table[1]]; - else ulevel= level*s->qscale*s->inter_matrix[scan_table[0]]; - }else - ulevel= level*s->qscale*16; - if(ulevel>1030*16 || ulevel<-1030*16){ - av_log(s->avctx, AV_LOG_ERROR, "|level| overflow in 3. esc, qp=%d\n", s->qscale); - return -1; - } - -#if 0 - if(s->error_resilience >= FF_ER_COMPLIANT){ - const int abs_level= ABS(level); - if(abs_level<=MAX_LEVEL && run<=MAX_RUN){ - const int run1= run - rl->max_run[last][abs_level] - 1; - if(abs_level <= rl->max_level[last][run]){ - av_log(s->avctx, AV_LOG_ERROR, "illegal 3. esc, vlc encoding possible\n"); - return -1; - } - if(s->error_resilience > FF_ER_COMPLIANT){ - if(abs_level <= rl->max_level[last][run]*2){ - fprintf(stderr, "illegal 3. esc, esc 1 encoding possible\n"); - return -1; - } - if(run1 >= 0 && abs_level <= rl->max_level[last][run1]){ - fprintf(stderr, "illegal 3. esc, esc 2 encoding possible\n"); - return -1; - } - } - } - } -#endif - if (level>0) level= level * qmul + qadd; - else level= level * qmul - qadd; - - i+= run + 1; - if(last) i+=192; - } else { - /* second escape */ -#if MIN_CACHE_BITS < 20 - LAST_SKIP_BITS(re, &s->gb, 2); - UPDATE_CACHE(re, &s->gb); -#else - SKIP_BITS(re, &s->gb, 2); -#endif - GET_RL_VLC(level, run, re, &s->gb, rl_vlc, TEX_VLC_BITS, 2); - i+= run + rl->max_run[run>>7][level/qmul] +1; //FIXME opt indexing - level = (level ^ SHOW_SBITS(re, &s->gb, 1)) - SHOW_SBITS(re, &s->gb, 1); - LAST_SKIP_BITS(re, &s->gb, 1); - } - } else { - /* first escape */ -#if MIN_CACHE_BITS < 19 - LAST_SKIP_BITS(re, &s->gb, 1); - UPDATE_CACHE(re, &s->gb); -#else - SKIP_BITS(re, &s->gb, 1); -#endif - GET_RL_VLC(level, run, re, &s->gb, rl_vlc, TEX_VLC_BITS, 2); - i+= run; - level = level + rl->max_level[run>>7][(run-1)&63] * qmul;//FIXME opt indexing - level = (level ^ SHOW_SBITS(re, &s->gb, 1)) - SHOW_SBITS(re, &s->gb, 1); - LAST_SKIP_BITS(re, &s->gb, 1); - } - } - } else { - i+= run; - level = (level ^ SHOW_SBITS(re, &s->gb, 1)) - SHOW_SBITS(re, &s->gb, 1); - LAST_SKIP_BITS(re, &s->gb, 1); - } - if (i > 62){ - i-= 192; - if(i&(~63)){ - av_log(s->avctx, AV_LOG_ERROR, "ac-tex damaged at %d %d\n", s->mb_x, s->mb_y); - return -1; - } - - block[scan_table[i]] = level; - break; - } - - block[scan_table[i]] = level; - } - CLOSE_READER(re, &s->gb); - } - not_coded: - if (intra) { - if(s->qscale >= s->intra_dc_threshold){ - block[0] = ff_mpeg4_pred_dc(s, n, block[0], &dc_pred_dir, 0); - - if(i == -1) i=0; - } - - mpeg4_pred_ac(s, block, n, dc_pred_dir); - if (s->ac_pred) { - i = 63; /* XXX: not optimal */ - } - } - s->block_last_index[n] = i; - return 0; -} - -/** * decode partition C of one MB. * @return <0 if an error occured */ @@ -4681,8 +4496,8 @@ static int h263_decode_motion(MpegEncContext * s, int pred, int f_code) /* modulo decoding */ if (!s->h263_long_vectors) { - l = 1 << (f_code + 4); - val = ((val + l)&(l*2-1)) - l; + l = INT_BIT - 5 - f_code; + val = (val<<l)>>l; } else { /* horrible h263 long vector mode */ if (pred < -31 && val < -63) @@ -4830,7 +4645,7 @@ retry: memset(block, 0, sizeof(DCTELEM)*64); goto retry; } - av_log(s->avctx, AV_LOG_ERROR, "run overflow at %dx%d\n", s->mb_x, s->mb_y); + av_log(s->avctx, AV_LOG_ERROR, "run overflow at %dx%d i:%d\n", s->mb_x, s->mb_y, s->mb_intra); return -1; } j = scan_table[i]; @@ -4848,9 +4663,307 @@ not_coded: return 0; } +/** + * decodes the dc value. + * @param n block index (0-3 are luma, 4-5 are chroma) + * @param dir_ptr the prediction direction will be stored here + * @return the quantized dc + */ +static inline int mpeg4_decode_dc(MpegEncContext * s, int n, int *dir_ptr) +{ + int level, code; + + if (n < 4) + code = get_vlc2(&s->gb, dc_lum.table, DC_VLC_BITS, 1); + else + code = get_vlc2(&s->gb, dc_chrom.table, DC_VLC_BITS, 1); + if (code < 0 || code > 9 /* && s->nbit<9 */){ + av_log(s->avctx, AV_LOG_ERROR, "illegal dc vlc\n"); + return -1; + } + if (code == 0) { + level = 0; + } else { + if(IS_3IV1){ + if(code==1) + level= 2*get_bits1(&s->gb)-1; + else{ + if(get_bits1(&s->gb)) + level = get_bits(&s->gb, code-1) + (1<<(code-1)); + else + level = -get_bits(&s->gb, code-1) - (1<<(code-1)); + } + }else{ + level = get_xbits(&s->gb, code); + } + + if (code > 8){ + if(get_bits1(&s->gb)==0){ /* marker */ + if(s->error_resilience>=2){ + av_log(s->avctx, AV_LOG_ERROR, "dc marker bit missing\n"); + return -1; + } + } + } + } + + return ff_mpeg4_pred_dc(s, n, level, dir_ptr, 0); +} + +/** + * decodes a block. + * @return <0 if an error occured + */ +static inline int mpeg4_decode_block(MpegEncContext * s, DCTELEM * block, + int n, int coded, int intra, int rvlc) +{ + int level, i, last, run; + int dc_pred_dir; + RLTable * rl; + RL_VLC_ELEM * rl_vlc; + const uint8_t * scan_table; + int qmul, qadd; + + //Note intra & rvlc should be optimized away if this is inlined + + if(intra) { + if(s->qscale < s->intra_dc_threshold){ + /* DC coef */ + if(s->partitioned_frame){ + level = s->dc_val[0][ s->block_index[n] ]; + if(n<4) level= FASTDIV((level + (s->y_dc_scale>>1)), s->y_dc_scale); + else level= FASTDIV((level + (s->c_dc_scale>>1)), s->c_dc_scale); + dc_pred_dir= (s->pred_dir_table[s->mb_x + s->mb_y*s->mb_stride]<<n)&32; + }else{ + level = mpeg4_decode_dc(s, n, &dc_pred_dir); + if (level < 0) + return -1; + } + block[0] = level; + i = 0; + }else{ + i = -1; + } + if (!coded) + goto not_coded; + + if(rvlc){ + rl = &rvlc_rl_intra; + rl_vlc = rvlc_rl_intra.rl_vlc[0]; + }else{ + rl = &rl_intra; + rl_vlc = rl_intra.rl_vlc[0]; + } + if (s->ac_pred) { + if (dc_pred_dir == 0) + scan_table = s->intra_v_scantable.permutated; /* left */ + else + scan_table = s->intra_h_scantable.permutated; /* top */ + } else { + scan_table = s->intra_scantable.permutated; + } + qmul=1; + qadd=0; + } else { + i = -1; + if (!coded) { + s->block_last_index[n] = i; + return 0; + } + if(rvlc) rl = &rvlc_rl_inter; + else rl = &rl_inter; + + scan_table = s->intra_scantable.permutated; + + if(s->mpeg_quant){ + qmul=1; + qadd=0; + if(rvlc){ + rl_vlc = rvlc_rl_inter.rl_vlc[0]; + }else{ + rl_vlc = rl_inter.rl_vlc[0]; + } + }else{ + qmul = s->qscale << 1; + qadd = (s->qscale - 1) | 1; + if(rvlc){ + rl_vlc = rvlc_rl_inter.rl_vlc[s->qscale]; + }else{ + rl_vlc = rl_inter.rl_vlc[s->qscale]; + } + } + } + { + OPEN_READER(re, &s->gb); + for(;;) { + UPDATE_CACHE(re, &s->gb); + GET_RL_VLC(level, run, re, &s->gb, rl_vlc, TEX_VLC_BITS, 2, 0); + if (level==0) { + /* escape */ + if(rvlc){ + if(SHOW_UBITS(re, &s->gb, 1)==0){ + av_log(s->avctx, AV_LOG_ERROR, "1. marker bit missing in rvlc esc\n"); + return -1; + }; SKIP_CACHE(re, &s->gb, 1); + + last= SHOW_UBITS(re, &s->gb, 1); SKIP_CACHE(re, &s->gb, 1); + run= SHOW_UBITS(re, &s->gb, 6); LAST_SKIP_CACHE(re, &s->gb, 6); + SKIP_COUNTER(re, &s->gb, 1+1+6); + UPDATE_CACHE(re, &s->gb); + + if(SHOW_UBITS(re, &s->gb, 1)==0){ + av_log(s->avctx, AV_LOG_ERROR, "2. marker bit missing in rvlc esc\n"); + return -1; + }; SKIP_CACHE(re, &s->gb, 1); + + level= SHOW_UBITS(re, &s->gb, 11); SKIP_CACHE(re, &s->gb, 11); + + if(SHOW_UBITS(re, &s->gb, 5)!=0x10){ + av_log(s->avctx, AV_LOG_ERROR, "reverse esc missing\n"); + return -1; + }; SKIP_CACHE(re, &s->gb, 5); + + level= level * qmul + qadd; + level = (level ^ SHOW_SBITS(re, &s->gb, 1)) - SHOW_SBITS(re, &s->gb, 1); LAST_SKIP_CACHE(re, &s->gb, 1); + SKIP_COUNTER(re, &s->gb, 1+11+5+1); + + i+= run + 1; + if(last) i+=192; + }else{ + int cache; + cache= GET_CACHE(re, &s->gb); + + if(IS_3IV1) + cache ^= 0xC0000000; + + if (cache&0x80000000) { + if (cache&0x40000000) { + /* third escape */ + SKIP_CACHE(re, &s->gb, 2); + last= SHOW_UBITS(re, &s->gb, 1); SKIP_CACHE(re, &s->gb, 1); + run= SHOW_UBITS(re, &s->gb, 6); LAST_SKIP_CACHE(re, &s->gb, 6); + SKIP_COUNTER(re, &s->gb, 2+1+6); + UPDATE_CACHE(re, &s->gb); + + if(IS_3IV1){ + level= SHOW_SBITS(re, &s->gb, 12); LAST_SKIP_BITS(re, &s->gb, 12); + }else{ + if(SHOW_UBITS(re, &s->gb, 1)==0){ + av_log(s->avctx, AV_LOG_ERROR, "1. marker bit missing in 3. esc\n"); + return -1; + }; SKIP_CACHE(re, &s->gb, 1); + + level= SHOW_SBITS(re, &s->gb, 12); SKIP_CACHE(re, &s->gb, 12); + + if(SHOW_UBITS(re, &s->gb, 1)==0){ + av_log(s->avctx, AV_LOG_ERROR, "2. marker bit missing in 3. esc\n"); + return -1; + }; LAST_SKIP_CACHE(re, &s->gb, 1); + + SKIP_COUNTER(re, &s->gb, 1+12+1); + } + +#if 0 + if(s->error_resilience >= FF_ER_COMPLIANT){ + const int abs_level= ABS(level); + if(abs_level<=MAX_LEVEL && run<=MAX_RUN){ + const int run1= run - rl->max_run[last][abs_level] - 1; + if(abs_level <= rl->max_level[last][run]){ + av_log(s->avctx, AV_LOG_ERROR, "illegal 3. esc, vlc encoding possible\n"); + return -1; + } + if(s->error_resilience > FF_ER_COMPLIANT){ + if(abs_level <= rl->max_level[last][run]*2){ + fprintf(stderr, "illegal 3. esc, esc 1 encoding possible\n"); + return -1; + } + if(run1 >= 0 && abs_level <= rl->max_level[last][run1]){ + fprintf(stderr, "illegal 3. esc, esc 2 encoding possible\n"); + return -1; + } + } + } + } +#endif + if (level>0) level= level * qmul + qadd; + else level= level * qmul - qadd; + + if((unsigned)(level + 2048) > 4095){ + if(s->error_resilience > FF_ER_COMPLIANT){ + if(level > 2560 || level<-2560){ + av_log(s->avctx, AV_LOG_ERROR, "|level| overflow in 3. esc, qp=%d\n", s->qscale); + return -1; + } + } + level= level<0 ? -2048 : 2047; + } + i+= run + 1; + if(last) i+=192; + } else { + /* second escape */ +#if MIN_CACHE_BITS < 20 + LAST_SKIP_BITS(re, &s->gb, 2); + UPDATE_CACHE(re, &s->gb); +#else + SKIP_BITS(re, &s->gb, 2); +#endif + GET_RL_VLC(level, run, re, &s->gb, rl_vlc, TEX_VLC_BITS, 2, 1); + i+= run + rl->max_run[run>>7][level/qmul] +1; //FIXME opt indexing + level = (level ^ SHOW_SBITS(re, &s->gb, 1)) - SHOW_SBITS(re, &s->gb, 1); + LAST_SKIP_BITS(re, &s->gb, 1); + } + } else { + /* first escape */ +#if MIN_CACHE_BITS < 19 + LAST_SKIP_BITS(re, &s->gb, 1); + UPDATE_CACHE(re, &s->gb); +#else + SKIP_BITS(re, &s->gb, 1); +#endif + GET_RL_VLC(level, run, re, &s->gb, rl_vlc, TEX_VLC_BITS, 2, 1); + i+= run; + level = level + rl->max_level[run>>7][(run-1)&63] * qmul;//FIXME opt indexing + level = (level ^ SHOW_SBITS(re, &s->gb, 1)) - SHOW_SBITS(re, &s->gb, 1); + LAST_SKIP_BITS(re, &s->gb, 1); + } + } + } else { + i+= run; + level = (level ^ SHOW_SBITS(re, &s->gb, 1)) - SHOW_SBITS(re, &s->gb, 1); + LAST_SKIP_BITS(re, &s->gb, 1); + } + if (i > 62){ + i-= 192; + if(i&(~63)){ + av_log(s->avctx, AV_LOG_ERROR, "ac-tex damaged at %d %d\n", s->mb_x, s->mb_y); + return -1; + } + + block[scan_table[i]] = level; + break; + } + block[scan_table[i]] = level; + } + CLOSE_READER(re, &s->gb); + } + not_coded: + if (intra) { + if(s->qscale >= s->intra_dc_threshold){ + block[0] = ff_mpeg4_pred_dc(s, n, block[0], &dc_pred_dir, 0); + + if(i == -1) i=0; + } + mpeg4_pred_ac(s, block, n, dc_pred_dir); + if (s->ac_pred) { + i = 63; /* XXX: not optimal */ + } + } + s->block_last_index[n] = i; + return 0; +} /* most is hardcoded. should extend to handle all h263 streams */ int h263_decode_picture_header(MpegEncContext *s) @@ -5434,7 +5547,7 @@ static int decode_vol_header(MpegEncContext *s, GetBitContext *gb){ skip_bits1(gb); /* marker */ height = get_bits(gb, 13); skip_bits1(gb); /* marker */ - if(width && height){ /* they should be non zero but who knows ... */ + if(width && height && !(s->width && s->avctx->codec_tag == ff_get_fourcc("MP4S"))){ /* they should be non zero but who knows ... */ s->width = width; s->height = height; // printf("width/height: %d %d\n", width, height); @@ -5738,7 +5851,8 @@ static int decode_vop_header(MpegEncContext *s, GetBitContext *gb){ /* vop coded */ if (get_bits1(gb) != 1){ - av_log(s->avctx, AV_LOG_ERROR, "vop not coded\n"); + if(s->avctx->debug&FF_DEBUG_PICT_INFO) + av_log(s->avctx, AV_LOG_ERROR, "vop not coded\n"); return FRAME_SKIPED; } //printf("time %d %d %d || %Ld %Ld %Ld\n", s->time_increment_bits, s->time_increment_resolution, s->time_base, @@ -5874,13 +5988,20 @@ int ff_mpeg4_decode_picture_header(MpegEncContext * s, GetBitContext *gb) /* search next start code */ align_get_bits(gb); + + if(s->avctx->codec_tag == ff_get_fourcc("WV1F") && show_bits(gb, 24) == 0x575630){ + skip_bits(gb, 24); + if(get_bits(gb, 8) == 0xF0) + return decode_vop_header(s, gb); + } + startcode = 0xff; for(;;) { v = get_bits(gb, 8); startcode = ((startcode << 8) | v) & 0xffffffff; if(get_bits_count(gb) >= gb->size_in_bits){ - if(gb->size_in_bits==8 && s->divx_version){ + if(gb->size_in_bits==8 && (s->divx_version || s->xvid_build)){ av_log(s->avctx, AV_LOG_ERROR, "frame skip %d\n", gb->size_in_bits); return FRAME_SKIPED; //divx bug }else @@ -6059,7 +6180,7 @@ int flv_h263_decode_picture_header(MpegEncContext *s) width = height = 0; break; } - if ((width == 0) || (height == 0)) + if(avcodec_check_dimensions(s->avctx, width, height)) return -1; s->width = width; s->height = height; @@ -6068,7 +6189,7 @@ int flv_h263_decode_picture_header(MpegEncContext *s) s->dropable= s->pict_type > P_TYPE; if (s->dropable) s->pict_type = P_TYPE; - + skip_bits1(&s->gb); /* deblocking flag */ s->chroma_qscale= s->qscale = get_bits(&s->gb, 5); @@ -6085,7 +6206,7 @@ int flv_h263_decode_picture_header(MpegEncContext *s) if(s->avctx->debug & FF_DEBUG_PICT_INFO){ av_log(s->avctx, AV_LOG_DEBUG, "%c esc_type:%d, qp:%d num:%d\n", - av_get_pict_type_char(s->pict_type), s->h263_flv-1, s->qscale, s->picture_number); + s->dropable ? 'D' : av_get_pict_type_char(s->pict_type), s->h263_flv-1, s->qscale, s->picture_number); } s->y_dc_scale_table= diff --git a/src/libffmpeg/libavcodec/h263dec.c b/src/libffmpeg/libavcodec/h263dec.c index 1ffefa1b2..da2bd54a2 100644 --- a/src/libffmpeg/libavcodec/h263dec.c +++ b/src/libffmpeg/libavcodec/h263dec.c @@ -37,8 +37,8 @@ int ff_h263_decode_init(AVCodecContext *avctx) s->avctx = avctx; s->out_format = FMT_H263; - s->width = avctx->width; - s->height = avctx->height; + s->width = avctx->coded_width; + s->height = avctx->coded_height; s->workaround_bugs= avctx->workaround_bugs; // set defaults @@ -85,6 +85,11 @@ int ff_h263_decode_init(AVCodecContext *avctx) s->h263_pred = 1; s->msmpeg4_version=5; break; + case CODEC_ID_WMV3: + s->h263_msmpeg4 = 1; + s->h263_pred = 1; + s->msmpeg4_version=6; + break; case CODEC_ID_H263I: break; case CODEC_ID_FLV1: @@ -139,6 +144,7 @@ static int get_consumed_bytes(MpegEncContext *s, int buf_size){ static int decode_slice(MpegEncContext *s){ const int part_mask= s->partitioned_frame ? (AC_END|AC_ERROR) : 0x7F; + const int mb_size= 16>>s->avctx->lowres; s->last_resync_gb= s->gb; s->first_slice_line= 1; @@ -214,7 +220,7 @@ static int decode_slice(MpegEncContext *s){ if(++s->mb_x >= s->mb_width){ s->mb_x=0; - ff_draw_horiz_band(s, s->mb_y*16, 16); + ff_draw_horiz_band(s, s->mb_y*mb_size, mb_size); s->mb_y++; } return 0; @@ -234,7 +240,7 @@ static int decode_slice(MpegEncContext *s){ ff_h263_loop_filter(s); } - ff_draw_horiz_band(s, s->mb_y*16, 16); + ff_draw_horiz_band(s, s->mb_y*mb_size, mb_size); s->mb_x= 0; } @@ -254,18 +260,25 @@ static int decode_slice(MpegEncContext *s){ if(bits_left==0){ s->padding_bug_score+=16; - }else if(bits_left>8){ - s->padding_bug_score++; } else if(bits_left != 1){ int v= show_bits(&s->gb, 8); v|= 0x7F >> (7-(bits_count&7)); - if(v==0x7F) + if(v==0x7F && bits_left<=8) s->padding_bug_score--; + else if(v==0x7F && ((get_bits_count(&s->gb)+8)&8) && bits_left<=16) + s->padding_bug_score+= 4; else s->padding_bug_score++; } } + + if(s->workaround_bugs&FF_BUG_AUTODETECT){ + if(s->padding_bug_score > -2 && !s->data_partitioning /*&& (s->divx_version || !s->resync_marker)*/) + s->workaround_bugs |= FF_BUG_NO_PADDING; + else + s->workaround_bugs &= ~FF_BUG_NO_PADDING; + } // handle formats which dont have unique end markers if(s->msmpeg4_version || (s->workaround_bugs&FF_BUG_NO_PADDING)){ //FIXME perhaps solve this more cleanly @@ -515,13 +528,13 @@ retry: if(s->avctx->codec_tag == ff_get_fourcc("DIVX") && s->vo_type==0 && s->vol_control_parameters==0) s->divx_version= 400; //divx 4 } + + if(s->xvid_build && s->divx_version){ + s->divx_version= + s->divx_build= 0; + } if(s->workaround_bugs&FF_BUG_AUTODETECT){ - s->workaround_bugs &= ~FF_BUG_NO_PADDING; - - if(s->padding_bug_score > -2 && !s->data_partitioning && (s->divx_version || !s->resync_marker)) - s->workaround_bugs |= FF_BUG_NO_PADDING; - if(s->avctx->codec_tag == ff_get_fourcc("XVIX")) s->workaround_bugs|= FF_BUG_XVID_ILACE; @@ -623,13 +636,21 @@ retry: fprintf(f, "%d %d %f\n", buf_size, s->qscale, buf_size*(double)s->qscale); } #endif - + +#ifdef HAVE_MMX + if(s->codec_id == CODEC_ID_MPEG4 && s->xvid_build && avctx->idct_algo == FF_IDCT_AUTO && (mm_flags & MM_MMX) && !(s->flags&CODEC_FLAG_BITEXACT)){ + avctx->idct_algo= FF_IDCT_LIBMPEG2MMX; + avctx->coded_width= 0; // force reinit + } +#endif + /* After H263 & mpeg4 header decode we have the height, width,*/ /* and other parameters. So then we could init the picture */ /* FIXME: By the way H263 decoder is evolving it should have */ /* an H263EncContext */ - if ( s->width != avctx->width || s->height != avctx->height) { + if ( s->width != avctx->coded_width + || s->height != avctx->coded_height) { /* H.263 could change picture size any time */ ParseContext pc= s->parse_context; //FIXME move these demuxng hack to avformat s->parse_context.buffer=0; @@ -637,8 +658,7 @@ retry: s->parse_context= pc; } if (!s->context_initialized) { - avctx->width = s->width; - avctx->height = s->height; + avcodec_set_dimensions(avctx, s->width, s->height); goto retry; } @@ -651,7 +671,7 @@ retry: s->current_picture.key_frame= s->pict_type == I_TYPE; /* skip b frames if we dont have reference frames */ - if(s->last_picture_ptr==NULL && s->pict_type==B_TYPE) return get_consumed_bytes(s, buf_size); + if(s->last_picture_ptr==NULL && (s->pict_type==B_TYPE || s->dropable)) return get_consumed_bytes(s, buf_size); /* skip b frames if we are in a hurry */ if(avctx->hurry_up && s->pict_type==B_TYPE) return get_consumed_bytes(s, buf_size); /* skip everything if we are in a hurry>=5 */ @@ -709,9 +729,8 @@ retry: if(s->codec_id==CODEC_ID_MPEG4 && s->bitstream_buffer_size==0 && s->divx_packed){ int current_pos= get_bits_count(&s->gb)>>3; int startcode_found=0; - - if( buf_size - current_pos > 5 - && buf_size - current_pos < BITSTREAM_BUFFER_SIZE){ + + if(buf_size - current_pos > 5){ int i; for(i=current_pos; i<buf_size-3; i++){ if(buf[i]==0 && buf[i+1]==0 && buf[i+2]==1 && buf[i+3]==0xB6){ @@ -726,6 +745,10 @@ retry: } if(startcode_found){ + s->bitstream_buffer= av_fast_realloc( + s->bitstream_buffer, + &s->allocated_bitstream_buffer_size, + buf_size - current_pos + FF_INPUT_BUFFER_PADDING_SIZE); memcpy(s->bitstream_buffer, buf + current_pos, buf_size - current_pos); s->bitstream_buffer_size= buf_size - current_pos; } @@ -760,12 +783,6 @@ printf("%Ld\n", rdtsc()-time); return get_consumed_bytes(s, buf_size); } -static const AVOption mpeg4_decoptions[] = -{ - AVOPTION_SUB(avoptions_workaround_bug), - AVOPTION_END() -}; - AVCodec mpeg4_decoder = { "mpeg4", CODEC_TYPE_VIDEO, @@ -775,8 +792,7 @@ AVCodec mpeg4_decoder = { NULL, ff_h263_decode_end, ff_h263_decode_frame, - CODEC_CAP_DRAW_HORIZ_BAND | CODEC_CAP_DR1 | CODEC_CAP_TRUNCATED, - .options = mpeg4_decoptions, + CODEC_CAP_DRAW_HORIZ_BAND | CODEC_CAP_DR1 | CODEC_CAP_TRUNCATED | CODEC_CAP_DELAY, .flush= ff_mpeg_flush, }; @@ -789,7 +805,7 @@ AVCodec h263_decoder = { NULL, ff_h263_decode_end, ff_h263_decode_frame, - CODEC_CAP_DRAW_HORIZ_BAND | CODEC_CAP_DR1 | CODEC_CAP_TRUNCATED, + CODEC_CAP_DRAW_HORIZ_BAND | CODEC_CAP_DR1 | CODEC_CAP_TRUNCATED | CODEC_CAP_DELAY, .flush= ff_mpeg_flush, }; @@ -803,7 +819,6 @@ AVCodec msmpeg4v1_decoder = { ff_h263_decode_end, ff_h263_decode_frame, CODEC_CAP_DRAW_HORIZ_BAND | CODEC_CAP_DR1, - mpeg4_decoptions, }; AVCodec msmpeg4v2_decoder = { @@ -816,7 +831,6 @@ AVCodec msmpeg4v2_decoder = { ff_h263_decode_end, ff_h263_decode_frame, CODEC_CAP_DRAW_HORIZ_BAND | CODEC_CAP_DR1, - mpeg4_decoptions, }; AVCodec msmpeg4v3_decoder = { @@ -829,7 +843,6 @@ AVCodec msmpeg4v3_decoder = { ff_h263_decode_end, ff_h263_decode_frame, CODEC_CAP_DRAW_HORIZ_BAND | CODEC_CAP_DR1, - .options = mpeg4_decoptions, }; AVCodec wmv1_decoder = { @@ -842,7 +855,6 @@ AVCodec wmv1_decoder = { ff_h263_decode_end, ff_h263_decode_frame, CODEC_CAP_DRAW_HORIZ_BAND | CODEC_CAP_DR1, - mpeg4_decoptions, }; AVCodec h263i_decoder = { @@ -855,7 +867,6 @@ AVCodec h263i_decoder = { ff_h263_decode_end, ff_h263_decode_frame, CODEC_CAP_DRAW_HORIZ_BAND | CODEC_CAP_DR1, - mpeg4_decoptions, }; AVCodec flv_decoder = { diff --git a/src/libffmpeg/libavcodec/h264.c b/src/libffmpeg/libavcodec/h264.c index 889900362..39ee4c6a2 100644 --- a/src/libffmpeg/libavcodec/h264.c +++ b/src/libffmpeg/libavcodec/h264.c @@ -87,6 +87,8 @@ typedef struct SPS{ uint32_t time_scale; int fixed_frame_rate_flag; short offset_for_ref_frame[256]; //FIXME dyn aloc? + int bitstream_restriction_flag; + int num_reorder_frames; }SPS; /** @@ -151,10 +153,12 @@ typedef struct H264Context{ uint8_t *rbsp_buffer; int rbsp_buffer_size; - // AVC - int is_avc; // != 0 if data is avc variant of h264 - int got_avcC; // flag to parse avcC data only once - int nal_length_size; // Number of bytes used for nal length (1, 2 or 4) + /** + * Used to parse AVC variant of h264 + */ + int is_avc; ///< this flag is != 0 if codec is avc1 + int got_avcC; ///< flag used to parse avcC data only once + int nal_length_size; ///< Number of bytes used for nal length (1, 2 or 4) int chroma_qp; //QPc @@ -163,6 +167,9 @@ typedef struct H264Context{ //prediction stuff int chroma_pred_mode; int intra16x16_pred_mode; + + int top_mb_xy; + int left_mb_xy[2]; int8_t intra4x4_pred_mode_cache[5*8]; int8_t (*intra4x4_pred_mode)[8]; @@ -173,21 +180,21 @@ typedef struct H264Context{ unsigned int top_samples_available; unsigned int topright_samples_available; unsigned int left_samples_available; - uint8_t (*top_border)[16+2*8]; - uint8_t left_border[17+2*9]; + uint8_t (*top_borders[2])[16+2*8]; + uint8_t left_border[2*(17+2*9)]; /** * non zero coeff count cache. * is 64 if not available. */ - uint8_t non_zero_count_cache[6*8]; + uint8_t non_zero_count_cache[6*8] __align8; uint8_t (*non_zero_count)[16]; /** * Motion vector cache. */ - int16_t mv_cache[2][5*8][2]; - int8_t ref_cache[2][5*8]; + int16_t mv_cache[2][5*8][2] __align8; + int8_t ref_cache[2][5*8] __align8; #define LIST_NOT_USED -1 //FIXME rename? #define PART_NOT_AVAILABLE -2 @@ -196,12 +203,15 @@ typedef struct H264Context{ */ int mv_cache_clean[2]; - int block_offset[16+8]; - int chroma_subblock_offset[16]; //FIXME remove + /** + * block_offset[ 0..23] for frame macroblocks + * block_offset[24..47] for field macroblocks + */ + int block_offset[2*(16+8)]; uint16_t *mb2b_xy; //FIXME are these 4 a good idea? uint16_t *mb2b8_xy; - int b_stride; + int b_stride; //FIXME use s->b4_stride int b8_stride; int halfpel_flag; @@ -226,6 +236,7 @@ typedef struct H264Context{ int slice_type_fixed; //interlacing specific flags + int mb_aff_frame; int mb_field_decoding_flag; int sub_mb_type[4]; @@ -253,12 +264,15 @@ typedef struct H264Context{ int max_pic_num; //Weighted pred stuff + int use_weight; + int use_weight_chroma; int luma_log2_weight_denom; int chroma_log2_weight_denom; int luma_weight[2][16]; int luma_offset[2][16]; int chroma_weight[2][16][2]; int chroma_offset[2][16][2]; + int implicit_weight[16][16]; //deblock int deblocking_filter; ///< disable_deblocking_filter_idc with 1<->0 @@ -268,16 +282,20 @@ typedef struct H264Context{ int redundant_pic_count; int direct_spatial_mv_pred; + int dist_scale_factor[16]; + int map_col_to_list0[2][16]; /** * num_ref_idx_l0/1_active_minus1 + 1 */ int ref_count[2];// FIXME split for AFF - Picture *short_ref[16]; - Picture *long_ref[16]; + Picture *short_ref[32]; + Picture *long_ref[32]; Picture default_ref_list[2][32]; Picture ref_list[2][32]; //FIXME size? Picture field_ref_list[2][32]; //FIXME size? + Picture *delayed_pic[16]; //FIXME size? + Picture *delayed_output_pic; /** * memory management control operations buffer. @@ -305,11 +323,15 @@ typedef struct H264Context{ /* 0x100 -> non null luma_dc, 0x80/0x40 -> non null chroma_dc (cb/cr), 0x?0 -> chroma_cbp(0,1,2), 0x0? luma_cbp */ uint16_t *cbp_table; + int top_cbp; + int left_cbp; /* chroma_pred_mode for i4x4 or i16x16, else 0 */ uint8_t *chroma_pred_mode_table; int last_qscale_diff; int16_t (*mvd_table[2])[2]; - int16_t mvd_cache[2][5*8][2]; + int16_t mvd_cache[2][5*8][2] __align8; + uint8_t *direct_table; + uint8_t direct_cache[5*8]; }H264Context; @@ -324,7 +346,7 @@ static VLC run7_vlc; static void svq3_luma_dc_dequant_idct_c(DCTELEM *block, int qp); static void svq3_add_idct_c(uint8_t *dst, DCTELEM *block, int stride, int qp, int dc); -static void filter_mb( H264Context *h, int mb_x, int mb_y, uint8_t *img_y, uint8_t *img_cb, uint8_t *img_cr); +static void filter_mb( H264Context *h, int mb_x, int mb_y, uint8_t *img_y, uint8_t *img_cb, uint8_t *img_cr, unsigned int linesize, unsigned int uvlinesize); static inline uint32_t pack16to32(int a, int b){ #ifdef WORDS_BIGENDIAN @@ -336,8 +358,8 @@ static inline uint32_t pack16to32(int a, int b){ /** * fill a rectangle. - * @param h height of the recatangle, should be a constant - * @param w width of the recatangle, should be a constant + * @param h height of the rectangle, should be a constant + * @param w width of the rectangle, should be a constant * @param size the size of val (1 or 4), should be a constant */ static inline void fill_rectangle(void *vp, int w, int h, int stride, uint32_t val, int size){ //FIXME ensure this IS inlined @@ -347,6 +369,7 @@ static inline void fill_rectangle(void *vp, int w, int h, int stride, uint32_t v w *= size; stride *= size; + assert((((int)vp)&(FFMIN(w, STRIDE_ALIGN)-1)) == 0); //FIXME check what gcc generates for 64 bit on x86 and possible write a 32 bit ver of it if(w==2 && h==2){ *(uint16_t*)(p + 0)= @@ -397,38 +420,110 @@ static inline void fill_rectangle(void *vp, int w, int h, int stride, uint32_t v assert(0); } -static inline void fill_caches(H264Context *h, int mb_type){ +static inline void fill_caches(H264Context *h, int mb_type, int for_deblock){ MpegEncContext * const s = &h->s; const int mb_xy= s->mb_x + s->mb_y*s->mb_stride; int topleft_xy, top_xy, topright_xy, left_xy[2]; int topleft_type, top_type, topright_type, left_type[2]; - int left_block[4]; + int left_block[8]; int i; //wow what a mess, why didnt they simplify the interlacing&intra stuff, i cant imagine that these complex rules are worth it - if(h->sps.mb_aff){ - //FIXME - topleft_xy = 0; /* avoid warning */ - top_xy = 0; /* avoid warning */ - topright_xy = 0; /* avoid warning */ + top_xy = mb_xy - s->mb_stride; + topleft_xy = top_xy - 1; + topright_xy= top_xy + 1; + left_xy[1] = left_xy[0] = mb_xy-1; + left_block[0]= 0; + left_block[1]= 1; + left_block[2]= 2; + left_block[3]= 3; + left_block[4]= 7; + left_block[5]= 10; + left_block[6]= 8; + left_block[7]= 11; + if(h->mb_aff_frame){ + const int pair_xy = s->mb_x + (s->mb_y & ~1)*s->mb_stride; + const int top_pair_xy = pair_xy - s->mb_stride; + const int topleft_pair_xy = top_pair_xy - 1; + const int topright_pair_xy = top_pair_xy + 1; + const int topleft_mb_frame_flag = !IS_INTERLACED(s->current_picture.mb_type[topleft_pair_xy]); + const int top_mb_frame_flag = !IS_INTERLACED(s->current_picture.mb_type[top_pair_xy]); + const int topright_mb_frame_flag = !IS_INTERLACED(s->current_picture.mb_type[topright_pair_xy]); + const int left_mb_frame_flag = !IS_INTERLACED(s->current_picture.mb_type[pair_xy-1]); + const int curr_mb_frame_flag = !IS_INTERLACED(mb_type); + const int bottom = (s->mb_y & 1); + tprintf("fill_caches: curr_mb_frame_flag:%d, left_mb_frame_flag:%d, topleft_mb_frame_flag:%d, top_mb_frame_flag:%d, topright_mb_frame_flag:%d\n", curr_mb_frame_flag, left_mb_frame_flag, topleft_mb_frame_flag, top_mb_frame_flag, topright_mb_frame_flag); + if (bottom + ? !curr_mb_frame_flag // bottom macroblock + : (!curr_mb_frame_flag && !top_mb_frame_flag) // top macroblock + ) { + top_xy -= s->mb_stride; + } + if (bottom + ? !curr_mb_frame_flag // bottom macroblock + : (!curr_mb_frame_flag && !topleft_mb_frame_flag) // top macroblock + ) { + topleft_xy -= s->mb_stride; + } + if (bottom + ? !curr_mb_frame_flag // bottom macroblock + : (!curr_mb_frame_flag && !topright_mb_frame_flag) // top macroblock + ) { + topright_xy -= s->mb_stride; + } + if (left_mb_frame_flag != curr_mb_frame_flag) { + left_xy[1] = left_xy[0] = pair_xy - 1; + if (curr_mb_frame_flag) { + if (bottom) { + left_block[0]= 2; + left_block[1]= 2; + left_block[2]= 3; + left_block[3]= 3; + left_block[4]= 8; + left_block[5]= 11; + left_block[6]= 8; + left_block[7]= 11; + } else { + left_block[0]= 0; + left_block[1]= 0; + left_block[2]= 1; + left_block[3]= 1; + left_block[4]= 7; + left_block[5]= 10; + left_block[6]= 7; + left_block[7]= 10; + } + } else { + left_xy[1] += s->mb_stride; + //left_block[0]= 0; + left_block[1]= 2; + left_block[2]= 0; + left_block[3]= 2; + //left_block[4]= 7; + left_block[5]= 10; + left_block[6]= 7; + left_block[7]= 10; + } + } + } + + h->top_mb_xy = top_xy; + h->left_mb_xy[0] = left_xy[0]; + h->left_mb_xy[1] = left_xy[1]; + if(for_deblock){ + topleft_type = h->slice_table[topleft_xy ] < 255 ? s->current_picture.mb_type[topleft_xy] : 0; + top_type = h->slice_table[top_xy ] < 255 ? s->current_picture.mb_type[top_xy] : 0; + topright_type= h->slice_table[topright_xy] < 255 ? s->current_picture.mb_type[topright_xy]: 0; + left_type[0] = h->slice_table[left_xy[0] ] < 255 ? s->current_picture.mb_type[left_xy[0]] : 0; + left_type[1] = h->slice_table[left_xy[1] ] < 255 ? s->current_picture.mb_type[left_xy[1]] : 0; }else{ - topleft_xy = mb_xy-1 - s->mb_stride; - top_xy = mb_xy - s->mb_stride; - topright_xy= mb_xy+1 - s->mb_stride; - left_xy[0] = mb_xy-1; - left_xy[1] = mb_xy-1; - left_block[0]= 0; - left_block[1]= 1; - left_block[2]= 2; - left_block[3]= 3; - } - - topleft_type = h->slice_table[topleft_xy ] == h->slice_num ? s->current_picture.mb_type[topleft_xy] : 0; - top_type = h->slice_table[top_xy ] == h->slice_num ? s->current_picture.mb_type[top_xy] : 0; - topright_type= h->slice_table[topright_xy] == h->slice_num ? s->current_picture.mb_type[topright_xy]: 0; - left_type[0] = h->slice_table[left_xy[0] ] == h->slice_num ? s->current_picture.mb_type[left_xy[0]] : 0; - left_type[1] = h->slice_table[left_xy[1] ] == h->slice_num ? s->current_picture.mb_type[left_xy[1]] : 0; + topleft_type = h->slice_table[topleft_xy ] == h->slice_num ? s->current_picture.mb_type[topleft_xy] : 0; + top_type = h->slice_table[top_xy ] == h->slice_num ? s->current_picture.mb_type[top_xy] : 0; + topright_type= h->slice_table[topright_xy] == h->slice_num ? s->current_picture.mb_type[topright_xy]: 0; + left_type[0] = h->slice_table[left_xy[0] ] == h->slice_num ? s->current_picture.mb_type[left_xy[0]] : 0; + left_type[1] = h->slice_table[left_xy[1] ] == h->slice_num ? s->current_picture.mb_type[left_xy[1]] : 0; + } if(IS_INTRA(mb_type)){ h->topleft_samples_available= @@ -462,10 +557,10 @@ static inline void fill_caches(H264Context *h, int mb_type){ h->intra4x4_pred_mode_cache[7+8*0]= h->intra4x4_pred_mode[top_xy][3]; }else{ int pred; - if(IS_INTRA16x16(top_type) || (IS_INTER(top_type) && !h->pps.constrained_intra_pred)) - pred= 2; - else{ + if(!top_type || (IS_INTER(top_type) && h->pps.constrained_intra_pred)) pred= -1; + else{ + pred= 2; } h->intra4x4_pred_mode_cache[4+8*0]= h->intra4x4_pred_mode_cache[5+8*0]= @@ -478,10 +573,10 @@ static inline void fill_caches(H264Context *h, int mb_type){ h->intra4x4_pred_mode_cache[3+8*2 + 2*8*i]= h->intra4x4_pred_mode[left_xy[i]][left_block[1+2*i]]; }else{ int pred; - if(IS_INTRA16x16(left_type[i]) || (IS_INTER(left_type[i]) && !h->pps.constrained_intra_pred)) - pred= 2; - else{ + if(!left_type[i] || (IS_INTER(left_type[i]) && h->pps.constrained_intra_pred)) pred= -1; + else{ + pred= 2; } h->intra4x4_pred_mode_cache[3+8*1 + 2*8*i]= h->intra4x4_pred_mode_cache[3+8*2 + 2*8*i]= pred; @@ -501,16 +596,17 @@ static inline void fill_caches(H264Context *h, int mb_type){ */ //FIXME constraint_intra_pred & partitioning & nnz (lets hope this is just a typo in the spec) if(top_type){ - h->non_zero_count_cache[4+8*0]= h->non_zero_count[top_xy][0]; - h->non_zero_count_cache[5+8*0]= h->non_zero_count[top_xy][1]; - h->non_zero_count_cache[6+8*0]= h->non_zero_count[top_xy][2]; + h->non_zero_count_cache[4+8*0]= h->non_zero_count[top_xy][4]; + h->non_zero_count_cache[5+8*0]= h->non_zero_count[top_xy][5]; + h->non_zero_count_cache[6+8*0]= h->non_zero_count[top_xy][6]; h->non_zero_count_cache[7+8*0]= h->non_zero_count[top_xy][3]; - h->non_zero_count_cache[1+8*0]= h->non_zero_count[top_xy][7]; + h->non_zero_count_cache[1+8*0]= h->non_zero_count[top_xy][9]; h->non_zero_count_cache[2+8*0]= h->non_zero_count[top_xy][8]; - h->non_zero_count_cache[1+8*3]= h->non_zero_count[top_xy][10]; + h->non_zero_count_cache[1+8*3]= h->non_zero_count[top_xy][12]; h->non_zero_count_cache[2+8*3]= h->non_zero_count[top_xy][11]; + }else{ h->non_zero_count_cache[4+8*0]= h->non_zero_count_cache[5+8*0]= @@ -521,44 +617,61 @@ static inline void fill_caches(H264Context *h, int mb_type){ h->non_zero_count_cache[2+8*0]= h->non_zero_count_cache[1+8*3]= - h->non_zero_count_cache[2+8*3]= 64; + h->non_zero_count_cache[2+8*3]= h->pps.cabac && !IS_INTRA(mb_type) ? 0 : 64; + } - - if(left_type[0]){ - h->non_zero_count_cache[3+8*1]= h->non_zero_count[left_xy[0]][6]; - h->non_zero_count_cache[3+8*2]= h->non_zero_count[left_xy[0]][5]; - h->non_zero_count_cache[0+8*1]= h->non_zero_count[left_xy[0]][9]; //FIXME left_block - h->non_zero_count_cache[0+8*4]= h->non_zero_count[left_xy[0]][12]; - }else{ - h->non_zero_count_cache[3+8*1]= - h->non_zero_count_cache[3+8*2]= - h->non_zero_count_cache[0+8*1]= - h->non_zero_count_cache[0+8*4]= 64; + + for (i=0; i<2; i++) { + if(left_type[i]){ + h->non_zero_count_cache[3+8*1 + 2*8*i]= h->non_zero_count[left_xy[i]][left_block[0+2*i]]; + h->non_zero_count_cache[3+8*2 + 2*8*i]= h->non_zero_count[left_xy[i]][left_block[1+2*i]]; + h->non_zero_count_cache[0+8*1 + 8*i]= h->non_zero_count[left_xy[i]][left_block[4+2*i]]; + h->non_zero_count_cache[0+8*4 + 8*i]= h->non_zero_count[left_xy[i]][left_block[5+2*i]]; + }else{ + h->non_zero_count_cache[3+8*1 + 2*8*i]= + h->non_zero_count_cache[3+8*2 + 2*8*i]= + h->non_zero_count_cache[0+8*1 + 8*i]= + h->non_zero_count_cache[0+8*4 + 8*i]= h->pps.cabac && !IS_INTRA(mb_type) ? 0 : 64; + } } - - if(left_type[1]){ - h->non_zero_count_cache[3+8*3]= h->non_zero_count[left_xy[1]][4]; - h->non_zero_count_cache[3+8*4]= h->non_zero_count[left_xy[1]][3]; - h->non_zero_count_cache[0+8*2]= h->non_zero_count[left_xy[1]][8]; - h->non_zero_count_cache[0+8*5]= h->non_zero_count[left_xy[1]][11]; - }else{ - h->non_zero_count_cache[3+8*3]= - h->non_zero_count_cache[3+8*4]= - h->non_zero_count_cache[0+8*2]= - h->non_zero_count_cache[0+8*5]= 64; + + if( h->pps.cabac ) { + // top_cbp + if(top_type) { + h->top_cbp = h->cbp_table[top_xy]; + } else if(IS_INTRA(mb_type)) { + h->top_cbp = 0x1C0; + } else { + h->top_cbp = 0; + } + // left_cbp + if (left_type[0]) { + h->left_cbp = h->cbp_table[left_xy[0]] & 0x1f0; + } else if(IS_INTRA(mb_type)) { + h->left_cbp = 0x1C0; + } else { + h->left_cbp = 0; + } + if (left_type[0]) { + h->left_cbp |= ((h->cbp_table[left_xy[0]]>>((left_block[0]&(~1))+1))&0x1) << 1; + } + if (left_type[1]) { + h->left_cbp |= ((h->cbp_table[left_xy[1]]>>((left_block[2]&(~1))+1))&0x1) << 3; + } } - + #if 1 - if(IS_INTER(mb_type)){ + //FIXME direct mb can skip much of this + if(IS_INTER(mb_type) || (IS_DIRECT(mb_type) && h->direct_spatial_mv_pred)){ int list; for(list=0; list<2; list++){ - if((!IS_8X8(mb_type)) && !USES_LIST(mb_type, list)){ + if(!USES_LIST(mb_type, list) && !IS_DIRECT(mb_type) && !for_deblock){ /*if(!h->mv_cache_clean[list]){ memset(h->mv_cache [list], 0, 8*5*2*sizeof(int16_t)); //FIXME clean only input? clean at all? memset(h->ref_cache[list], PART_NOT_AVAILABLE, 8*5*sizeof(int8_t)); h->mv_cache_clean[list]= 1; }*/ - continue; //FIXME direct mode ... + continue; } h->mv_cache_clean[list]= 0; @@ -630,6 +743,9 @@ static inline void fill_caches(H264Context *h, int mb_type){ h->ref_cache[list][scan8[0] - 1 + 3*8]= left_type[0] ? LIST_NOT_USED : PART_NOT_AVAILABLE; } + if(for_deblock) + continue; + h->ref_cache[list][scan8[5 ]+1] = h->ref_cache[list][scan8[7 ]+1] = h->ref_cache[list][scan8[13]+1] = //FIXME remove past 3 (init somewher else) @@ -683,9 +799,35 @@ static inline void fill_caches(H264Context *h, int mb_type){ *(uint32_t*)h->mvd_cache [list][scan8[13]+1]= //FIXME remove past 3 (init somewher else) *(uint32_t*)h->mvd_cache [list][scan8[4 ]]= *(uint32_t*)h->mvd_cache [list][scan8[12]]= 0; + + if(h->slice_type == B_TYPE){ + fill_rectangle(&h->direct_cache[scan8[0]], 4, 4, 8, 0, 1); + + if(IS_DIRECT(top_type)){ + *(uint32_t*)&h->direct_cache[scan8[0] - 1*8]= 0x01010101; + }else if(IS_8X8(top_type)){ + int b8_xy = h->mb2b8_xy[top_xy] + h->b8_stride; + h->direct_cache[scan8[0] + 0 - 1*8]= h->direct_table[b8_xy]; + h->direct_cache[scan8[0] + 2 - 1*8]= h->direct_table[b8_xy + 1]; + }else{ + *(uint32_t*)&h->direct_cache[scan8[0] - 1*8]= 0; + } + + //FIXME interlacing + if(IS_DIRECT(left_type[0])){ + h->direct_cache[scan8[0] - 1 + 0*8]= + h->direct_cache[scan8[0] - 1 + 2*8]= 1; + }else if(IS_8X8(left_type[0])){ + int b8_xy = h->mb2b8_xy[left_xy[0]] + 1; + h->direct_cache[scan8[0] - 1 + 0*8]= h->direct_table[b8_xy]; + h->direct_cache[scan8[0] - 1 + 2*8]= h->direct_table[b8_xy + h->b8_stride]; + }else{ + h->direct_cache[scan8[0] - 1 + 0*8]= + h->direct_cache[scan8[0] - 1 + 2*8]= 0; + } + } } } -//FIXME } #endif } @@ -747,8 +889,10 @@ static inline int check_intra_pred_mode(H264Context *h, int mode){ static const int8_t top [7]= {LEFT_DC_PRED8x8, 1,-1,-1}; static const int8_t left[7]= { TOP_DC_PRED8x8,-1, 2,-1,DC_128_PRED8x8}; - if(mode < 0 || mode > 6) + if(mode < 0 || mode > 6) { + av_log(h->s.avctx, AV_LOG_ERROR, "out of range intra chroma pred mode at %d %d\n", s->mb_x, s->mb_y); return -1; + } if(!(h->top_samples_available&0x8000)){ mode= top[ mode ]; @@ -788,21 +932,21 @@ static inline void write_back_non_zero_count(H264Context *h){ MpegEncContext * const s = &h->s; const int mb_xy= s->mb_x + s->mb_y*s->mb_stride; - h->non_zero_count[mb_xy][0]= h->non_zero_count_cache[4+8*4]; - h->non_zero_count[mb_xy][1]= h->non_zero_count_cache[5+8*4]; - h->non_zero_count[mb_xy][2]= h->non_zero_count_cache[6+8*4]; + h->non_zero_count[mb_xy][0]= h->non_zero_count_cache[7+8*1]; + h->non_zero_count[mb_xy][1]= h->non_zero_count_cache[7+8*2]; + h->non_zero_count[mb_xy][2]= h->non_zero_count_cache[7+8*3]; h->non_zero_count[mb_xy][3]= h->non_zero_count_cache[7+8*4]; - h->non_zero_count[mb_xy][4]= h->non_zero_count_cache[7+8*3]; - h->non_zero_count[mb_xy][5]= h->non_zero_count_cache[7+8*2]; - h->non_zero_count[mb_xy][6]= h->non_zero_count_cache[7+8*1]; + h->non_zero_count[mb_xy][4]= h->non_zero_count_cache[4+8*4]; + h->non_zero_count[mb_xy][5]= h->non_zero_count_cache[5+8*4]; + h->non_zero_count[mb_xy][6]= h->non_zero_count_cache[6+8*4]; - h->non_zero_count[mb_xy][7]= h->non_zero_count_cache[1+8*2]; + h->non_zero_count[mb_xy][9]= h->non_zero_count_cache[1+8*2]; h->non_zero_count[mb_xy][8]= h->non_zero_count_cache[2+8*2]; - h->non_zero_count[mb_xy][9]= h->non_zero_count_cache[2+8*1]; + h->non_zero_count[mb_xy][7]= h->non_zero_count_cache[2+8*1]; - h->non_zero_count[mb_xy][10]=h->non_zero_count_cache[1+8*5]; + h->non_zero_count[mb_xy][12]=h->non_zero_count_cache[1+8*5]; h->non_zero_count[mb_xy][11]=h->non_zero_count_cache[2+8*5]; - h->non_zero_count[mb_xy][12]=h->non_zero_count_cache[2+8*4]; + h->non_zero_count[mb_xy][10]=h->non_zero_count_cache[2+8*4]; } /** @@ -864,6 +1008,7 @@ static inline void pred_motion(H264Context * const h, int n, int part_width, int diagonal_ref= fetch_diagonal_mv(h, &C, index8, list, part_width); match_count= (diagonal_ref==ref) + (top_ref==ref) + (left_ref==ref); + tprintf("pred_motion match_count=%d\n", match_count); if(match_count > 1){ //most common *mx= mid_pred(A[0], B[0], C[0]); *my= mid_pred(A[1], B[1], C[1]); @@ -902,7 +1047,7 @@ static inline void pred_16x8_motion(H264Context * const h, int n, int list, int const int top_ref= h->ref_cache[list][ scan8[0] - 8 ]; const int16_t * const B= h->mv_cache[list][ scan8[0] - 8 ]; - tprintf("pred_16x8: (%2d %2d %2d) at %2d %2d %d list %d", top_ref, B[0], B[1], h->s.mb_x, h->s.mb_y, n, list); + tprintf("pred_16x8: (%2d %2d %2d) at %2d %2d %d list %d\n", top_ref, B[0], B[1], h->s.mb_x, h->s.mb_y, n, list); if(top_ref == ref){ *mx= B[0]; @@ -913,7 +1058,7 @@ static inline void pred_16x8_motion(H264Context * const h, int n, int list, int const int left_ref= h->ref_cache[list][ scan8[8] - 1 ]; const int16_t * const A= h->mv_cache[list][ scan8[8] - 1 ]; - tprintf("pred_16x8: (%2d %2d %2d) at %2d %2d %d list %d", left_ref, A[0], A[1], h->s.mb_x, h->s.mb_y, n, list); + tprintf("pred_16x8: (%2d %2d %2d) at %2d %2d %d list %d\n", left_ref, A[0], A[1], h->s.mb_x, h->s.mb_y, n, list); if(left_ref == ref){ *mx= A[0]; @@ -937,7 +1082,7 @@ static inline void pred_8x16_motion(H264Context * const h, int n, int list, int const int left_ref= h->ref_cache[list][ scan8[0] - 1 ]; const int16_t * const A= h->mv_cache[list][ scan8[0] - 1 ]; - tprintf("pred_8x16: (%2d %2d %2d) at %2d %2d %d list %d", left_ref, A[0], A[1], h->s.mb_x, h->s.mb_y, n, list); + tprintf("pred_8x16: (%2d %2d %2d) at %2d %2d %d list %d\n", left_ref, A[0], A[1], h->s.mb_x, h->s.mb_y, n, list); if(left_ref == ref){ *mx= A[0]; @@ -950,7 +1095,7 @@ static inline void pred_8x16_motion(H264Context * const h, int n, int list, int diagonal_ref= fetch_diagonal_mv(h, &C, scan8[4], list, 2); - tprintf("pred_8x16: (%2d %2d %2d) at %2d %2d %d list %d", diagonal_ref, C[0], C[1], h->s.mb_x, h->s.mb_y, n, list); + tprintf("pred_8x16: (%2d %2d %2d) at %2d %2d %d list %d\n", diagonal_ref, C[0], C[1], h->s.mb_x, h->s.mb_y, n, list); if(diagonal_ref == ref){ *mx= C[0]; @@ -967,7 +1112,7 @@ static inline void pred_pskip_motion(H264Context * const h, int * const mx, int const int top_ref = h->ref_cache[0][ scan8[0] - 8 ]; const int left_ref= h->ref_cache[0][ scan8[0] - 1 ]; - tprintf("pred_pskip: (%d) (%d) at %2d %2d", top_ref, left_ref, h->s.mb_x, h->s.mb_y); + tprintf("pred_pskip: (%d) (%d) at %2d %2d\n", top_ref, left_ref, h->s.mb_x, h->s.mb_y); if(top_ref == PART_NOT_AVAILABLE || left_ref == PART_NOT_AVAILABLE || (top_ref == 0 && *(uint32_t*)h->mv_cache[0][ scan8[0] - 8 ] == 0) @@ -982,6 +1127,226 @@ static inline void pred_pskip_motion(H264Context * const h, int * const mx, int return; } +static inline void direct_dist_scale_factor(H264Context * const h){ + const int poc = h->s.current_picture_ptr->poc; + const int poc1 = h->ref_list[1][0].poc; + int i; + for(i=0; i<h->ref_count[0]; i++){ + int poc0 = h->ref_list[0][i].poc; + int td = clip(poc1 - poc0, -128, 127); + if(td == 0 /* FIXME || pic0 is a long-term ref */){ + h->dist_scale_factor[i] = 256; + }else{ + int tb = clip(poc - poc0, -128, 127); + int tx = (16384 + (ABS(td) >> 1)) / td; + h->dist_scale_factor[i] = clip((tb*tx + 32) >> 6, -1024, 1023); + } + } +} +static inline void direct_ref_list_init(H264Context * const h){ + MpegEncContext * const s = &h->s; + Picture * const ref1 = &h->ref_list[1][0]; + Picture * const cur = s->current_picture_ptr; + int list, i, j; + if(cur->pict_type == I_TYPE) + cur->ref_count[0] = 0; + if(cur->pict_type != B_TYPE) + cur->ref_count[1] = 0; + for(list=0; list<2; list++){ + cur->ref_count[list] = h->ref_count[list]; + for(j=0; j<h->ref_count[list]; j++) + cur->ref_poc[list][j] = h->ref_list[list][j].poc; + } + if(cur->pict_type != B_TYPE || h->direct_spatial_mv_pred) + return; + for(list=0; list<2; list++){ + for(i=0; i<ref1->ref_count[list]; i++){ + const int poc = ref1->ref_poc[list][i]; + h->map_col_to_list0[list][i] = PART_NOT_AVAILABLE; + for(j=0; j<h->ref_count[list]; j++) + if(h->ref_list[list][j].poc == poc){ + h->map_col_to_list0[list][i] = j; + break; + } + } + } +} + +static inline void pred_direct_motion(H264Context * const h, int *mb_type){ + MpegEncContext * const s = &h->s; + const int mb_xy = s->mb_x + s->mb_y*s->mb_stride; + const int b8_xy = 2*s->mb_x + 2*s->mb_y*h->b8_stride; + const int b4_xy = 4*s->mb_x + 4*s->mb_y*h->b_stride; + const int mb_type_col = h->ref_list[1][0].mb_type[mb_xy]; + const int16_t (*l1mv0)[2] = (const int16_t (*)[2]) &h->ref_list[1][0].motion_val[0][b4_xy]; + const int8_t *l1ref0 = &h->ref_list[1][0].ref_index[0][b8_xy]; + const int8_t *l1ref1 = &h->ref_list[1][0].ref_index[1][b8_xy]; + const int is_b8x8 = IS_8X8(*mb_type); + int sub_mb_type; + int i8, i4; + + if(IS_8X8(mb_type_col) && !h->sps.direct_8x8_inference_flag){ + /* FIXME save sub mb types from previous frames (or derive from MVs) + * so we know exactly what block size to use */ + sub_mb_type = MB_TYPE_8x8|MB_TYPE_P0L0|MB_TYPE_P0L1|MB_TYPE_DIRECT2; /* B_SUB_4x4 */ + *mb_type = MB_TYPE_8x8|MB_TYPE_L0L1; + }else if(!is_b8x8 && (IS_16X16(mb_type_col) || IS_INTRA(mb_type_col))){ + sub_mb_type = MB_TYPE_16x16|MB_TYPE_P0L0|MB_TYPE_P0L1|MB_TYPE_DIRECT2; /* B_SUB_8x8 */ + *mb_type = MB_TYPE_16x16|MB_TYPE_P0L0|MB_TYPE_P0L1|MB_TYPE_DIRECT2; /* B_16x16 */ + }else{ + sub_mb_type = MB_TYPE_16x16|MB_TYPE_P0L0|MB_TYPE_P0L1|MB_TYPE_DIRECT2; /* B_SUB_8x8 */ + *mb_type = MB_TYPE_8x8|MB_TYPE_L0L1; + } + if(!is_b8x8) + *mb_type |= MB_TYPE_DIRECT2; + + tprintf("mb_type = %08x, sub_mb_type = %08x, is_b8x8 = %d, mb_type_col = %08x\n", *mb_type, sub_mb_type, is_b8x8, mb_type_col); + + if(h->direct_spatial_mv_pred){ + int ref[2]; + int mv[2][2]; + int list; + + /* ref = min(neighbors) */ + for(list=0; list<2; list++){ + int refa = h->ref_cache[list][scan8[0] - 1]; + int refb = h->ref_cache[list][scan8[0] - 8]; + int refc = h->ref_cache[list][scan8[0] - 8 + 4]; + if(refc == -2) + refc = h->ref_cache[list][scan8[0] - 8 - 1]; + ref[list] = refa; + if(ref[list] < 0 || (refb < ref[list] && refb >= 0)) + ref[list] = refb; + if(ref[list] < 0 || (refc < ref[list] && refc >= 0)) + ref[list] = refc; + if(ref[list] < 0) + ref[list] = -1; + } + + if(ref[0] < 0 && ref[1] < 0){ + ref[0] = ref[1] = 0; + mv[0][0] = mv[0][1] = + mv[1][0] = mv[1][1] = 0; + }else{ + for(list=0; list<2; list++){ + if(ref[list] >= 0) + pred_motion(h, 0, 4, list, ref[list], &mv[list][0], &mv[list][1]); + else + mv[list][0] = mv[list][1] = 0; + } + } + + if(ref[1] < 0){ + *mb_type &= ~MB_TYPE_P0L1; + sub_mb_type &= ~MB_TYPE_P0L1; + }else if(ref[0] < 0){ + *mb_type &= ~MB_TYPE_P0L0; + sub_mb_type &= ~MB_TYPE_P0L0; + } + + if(IS_16X16(*mb_type)){ + fill_rectangle(&h->ref_cache[0][scan8[0]], 4, 4, 8, ref[0], 1); + fill_rectangle(&h->ref_cache[1][scan8[0]], 4, 4, 8, ref[1], 1); + if(!IS_INTRA(mb_type_col) && l1ref0[0] == 0 && + ABS(l1mv0[0][0]) <= 1 && ABS(l1mv0[0][1]) <= 1){ + if(ref[0] > 0) + fill_rectangle(&h->mv_cache[0][scan8[0]], 4, 4, 8, pack16to32(mv[0][0],mv[0][1]), 4); + else + fill_rectangle(&h->mv_cache[0][scan8[0]], 4, 4, 8, 0, 4); + if(ref[1] > 0) + fill_rectangle(&h->mv_cache[1][scan8[0]], 4, 4, 8, pack16to32(mv[1][0],mv[1][1]), 4); + else + fill_rectangle(&h->mv_cache[1][scan8[0]], 4, 4, 8, 0, 4); + }else{ + fill_rectangle(&h->mv_cache[0][scan8[0]], 4, 4, 8, pack16to32(mv[0][0],mv[0][1]), 4); + fill_rectangle(&h->mv_cache[1][scan8[0]], 4, 4, 8, pack16to32(mv[1][0],mv[1][1]), 4); + } + }else{ + for(i8=0; i8<4; i8++){ + const int x8 = i8&1; + const int y8 = i8>>1; + + if(is_b8x8 && !IS_DIRECT(h->sub_mb_type[i8])) + continue; + h->sub_mb_type[i8] = sub_mb_type; + + fill_rectangle(&h->mv_cache[0][scan8[i8*4]], 2, 2, 8, pack16to32(mv[0][0],mv[0][1]), 4); + fill_rectangle(&h->mv_cache[1][scan8[i8*4]], 2, 2, 8, pack16to32(mv[1][0],mv[1][1]), 4); + fill_rectangle(&h->ref_cache[0][scan8[i8*4]], 2, 2, 8, ref[0], 1); + fill_rectangle(&h->ref_cache[1][scan8[i8*4]], 2, 2, 8, ref[1], 1); + + /* col_zero_flag */ + if(!IS_INTRA(mb_type_col) && l1ref0[x8 + y8*h->b8_stride] == 0){ + for(i4=0; i4<4; i4++){ + const int16_t *mv_col = l1mv0[x8*2 + (i4&1) + (y8*2 + (i4>>1))*h->b_stride]; + if(ABS(mv_col[0]) <= 1 && ABS(mv_col[1]) <= 1){ + if(ref[0] == 0) + *(uint32_t*)h->mv_cache[0][scan8[i8*4+i4]] = 0; + if(ref[1] == 0) + *(uint32_t*)h->mv_cache[1][scan8[i8*4+i4]] = 0; + } + } + } + } + } + }else{ /* direct temporal mv pred */ + if(IS_16X16(*mb_type)){ + fill_rectangle(&h->ref_cache[1][scan8[0]], 4, 4, 8, 0, 1); + if(IS_INTRA(mb_type_col)){ + fill_rectangle(&h->ref_cache[0][scan8[0]], 4, 4, 8, 0, 1); + fill_rectangle(&h-> mv_cache[0][scan8[0]], 4, 4, 8, 0, 4); + fill_rectangle(&h-> mv_cache[1][scan8[0]], 4, 4, 8, 0, 4); + }else{ + const int ref0 = l1ref0[0] >= 0 ? h->map_col_to_list0[0][l1ref0[0]] + : h->map_col_to_list0[1][l1ref1[0]]; + const int dist_scale_factor = h->dist_scale_factor[ref0]; + const int16_t *mv_col = l1mv0[0]; + int mv_l0[2]; + mv_l0[0] = (dist_scale_factor * mv_col[0] + 128) >> 8; + mv_l0[1] = (dist_scale_factor * mv_col[1] + 128) >> 8; + fill_rectangle(&h->ref_cache[0][scan8[0]], 4, 4, 8, ref0, 1); + fill_rectangle(&h-> mv_cache[0][scan8[0]], 4, 4, 8, pack16to32(mv_l0[0],mv_l0[1]), 4); + fill_rectangle(&h-> mv_cache[1][scan8[0]], 4, 4, 8, pack16to32(mv_l0[0]-mv_col[0],mv_l0[1]-mv_col[1]), 4); + } + }else{ + for(i8=0; i8<4; i8++){ + const int x8 = i8&1; + const int y8 = i8>>1; + int ref0, dist_scale_factor; + + if(is_b8x8 && !IS_DIRECT(h->sub_mb_type[i8])) + continue; + h->sub_mb_type[i8] = sub_mb_type; + if(IS_INTRA(mb_type_col)){ + fill_rectangle(&h->ref_cache[0][scan8[i8*4]], 2, 2, 8, 0, 1); + fill_rectangle(&h->ref_cache[1][scan8[i8*4]], 2, 2, 8, 0, 1); + fill_rectangle(&h-> mv_cache[0][scan8[i8*4]], 2, 2, 8, 0, 4); + fill_rectangle(&h-> mv_cache[1][scan8[i8*4]], 2, 2, 8, 0, 4); + continue; + } + + ref0 = l1ref0[x8 + y8*h->b8_stride]; + if(ref0 >= 0) + ref0 = h->map_col_to_list0[0][ref0]; + else + ref0 = h->map_col_to_list0[1][l1ref1[x8 + y8*h->b8_stride]]; + dist_scale_factor = h->dist_scale_factor[ref0]; + + fill_rectangle(&h->ref_cache[0][scan8[i8*4]], 2, 2, 8, ref0, 1); + fill_rectangle(&h->ref_cache[1][scan8[i8*4]], 2, 2, 8, 0, 1); + for(i4=0; i4<4; i4++){ + const int16_t *mv_col = l1mv0[x8*2 + (i4&1) + (y8*2 + (i4>>1))*h->b_stride]; + int16_t *mv_l0 = h->mv_cache[0][scan8[i8*4+i4]]; + mv_l0[0] = (dist_scale_factor * mv_col[0] + 128) >> 8; + mv_l0[1] = (dist_scale_factor * mv_col[1] + 128) >> 8; + *(uint32_t*)h->mv_cache[1][scan8[i8*4+i4]] = + pack16to32(mv_l0[0]-mv_col[0],mv_l0[1]-mv_col[1]); + } + } + } + } +} + static inline void write_back_motion(H264Context *h, int mb_type){ MpegEncContext * const s = &h->s; const int b_xy = 4*s->mb_x + 4*s->mb_y*h->b_stride; @@ -990,7 +1355,7 @@ static inline void write_back_motion(H264Context *h, int mb_type){ for(list=0; list<2; list++){ int y; - if((!IS_8X8(mb_type)) && !USES_LIST(mb_type, list)){ + if(!USES_LIST(mb_type, list)){ if(1){ //FIXME skip or never read if mb_type doesnt use it for(y=0; y<4; y++){ *(uint64_t*)s->current_picture.motion_val[list][b_xy + 0 + y*h->b_stride]= @@ -1004,10 +1369,10 @@ static inline void write_back_motion(H264Context *h, int mb_type){ } } for(y=0; y<2; y++){ - *(uint16_t*)s->current_picture.motion_val[list][b8_xy + y*h->b8_stride]= (LIST_NOT_USED&0xFF)*0x0101; + *(uint16_t*)&s->current_picture.ref_index[list][b8_xy + y*h->b8_stride]= (LIST_NOT_USED&0xFF)*0x0101; } } - continue; //FIXME direct mode ... + continue; } for(y=0; y<4; y++){ @@ -1025,6 +1390,14 @@ static inline void write_back_motion(H264Context *h, int mb_type){ s->current_picture.ref_index[list][b8_xy + 1 + y*h->b8_stride]= h->ref_cache[list][scan8[0]+2 + 16*y]; } } + + if(h->slice_type == B_TYPE && h->pps.cabac){ + if(IS_8X8(mb_type)){ + h->direct_table[b8_xy+1+0*h->b8_stride] = IS_DIRECT(h->sub_mb_type[1]) ? 1 : 0; + h->direct_table[b8_xy+0+1*h->b8_stride] = IS_DIRECT(h->sub_mb_type[2]) ? 1 : 0; + h->direct_table[b8_xy+1+1*h->b8_stride] = IS_DIRECT(h->sub_mb_type[3]) ? 1 : 0; + } + } } /** @@ -1315,46 +1688,12 @@ static void chroma_dc_dct_c(DCTELEM *block){ /** * gets the chroma qp. */ -static inline int get_chroma_qp(H264Context *h, int qscale){ +static inline int get_chroma_qp(int chroma_qp_index_offset, int qscale){ - return chroma_qp[clip(qscale + h->pps.chroma_qp_index_offset, 0, 51)]; + return chroma_qp[clip(qscale + chroma_qp_index_offset, 0, 51)]; } -/** - * - */ -static void h264_add_idct_c(uint8_t *dst, DCTELEM *block, int stride){ - int i; - uint8_t *cm = cropTbl + MAX_NEG_CROP; - - block[0] += 32; - - for(i=0; i<4; i++){ - const int z0= block[0 + 4*i] + block[2 + 4*i]; - const int z1= block[0 + 4*i] - block[2 + 4*i]; - const int z2= (block[1 + 4*i]>>1) - block[3 + 4*i]; - const int z3= block[1 + 4*i] + (block[3 + 4*i]>>1); - - block[0 + 4*i]= z0 + z3; - block[1 + 4*i]= z1 + z2; - block[2 + 4*i]= z1 - z2; - block[3 + 4*i]= z0 - z3; - } - - for(i=0; i<4; i++){ - const int z0= block[i + 4*0] + block[i + 4*2]; - const int z1= block[i + 4*0] - block[i + 4*2]; - const int z2= (block[i + 4*1]>>1) - block[i + 4*3]; - const int z3= block[i + 4*1] + (block[i + 4*3]>>1); - - dst[i + 0*stride]= cm[ dst[i + 0*stride] + ((z0 + z3) >> 6) ]; - dst[i + 1*stride]= cm[ dst[i + 1*stride] + ((z1 + z2) >> 6) ]; - dst[i + 2*stride]= cm[ dst[i + 2*stride] + ((z1 - z2) >> 6) ]; - dst[i + 3*stride]= cm[ dst[i + 3*stride] + ((z0 - z3) >> 6) ]; - } -} - #if 0 static void h264_diff_dct_c(DCTELEM *block, uint8_t *src1, uint8_t *src2, int stride){ int i; @@ -1998,7 +2337,7 @@ static inline void mc_dir_part(H264Context *h, Picture *pic, int n, int square, chroma_op(dest_cr, src_cr, s->uvlinesize, chroma_height, mx&7, my&7); } -static inline void mc_part(H264Context *h, int n, int square, int chroma_height, int delta, +static inline void mc_part_std(H264Context *h, int n, int square, int chroma_height, int delta, uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr, int x_offset, int y_offset, qpel_mc_func *qpix_put, h264_chroma_mc_func chroma_put, @@ -2032,9 +2371,95 @@ static inline void mc_part(H264Context *h, int n, int square, int chroma_height, } } +static inline void mc_part_weighted(H264Context *h, int n, int square, int chroma_height, int delta, + uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr, + int x_offset, int y_offset, + qpel_mc_func *qpix_put, h264_chroma_mc_func chroma_put, + h264_weight_func luma_weight_op, h264_weight_func chroma_weight_op, + h264_biweight_func luma_weight_avg, h264_biweight_func chroma_weight_avg, + int list0, int list1){ + MpegEncContext * const s = &h->s; + + dest_y += 2*x_offset + 2*y_offset*s-> linesize; + dest_cb += x_offset + y_offset*s->uvlinesize; + dest_cr += x_offset + y_offset*s->uvlinesize; + x_offset += 8*s->mb_x; + y_offset += 8*s->mb_y; + + if(list0 && list1){ + /* don't optimize for luma-only case, since B-frames usually + * use implicit weights => chroma too. */ + uint8_t *tmp_cb = s->obmc_scratchpad; + uint8_t *tmp_cr = tmp_cb + 8*s->uvlinesize; + uint8_t *tmp_y = tmp_cr + 8*s->uvlinesize; + int refn0 = h->ref_cache[0][ scan8[n] ]; + int refn1 = h->ref_cache[1][ scan8[n] ]; + + mc_dir_part(h, &h->ref_list[0][refn0], n, square, chroma_height, delta, 0, + dest_y, dest_cb, dest_cr, + x_offset, y_offset, qpix_put, chroma_put); + mc_dir_part(h, &h->ref_list[1][refn1], n, square, chroma_height, delta, 1, + tmp_y, tmp_cb, tmp_cr, + x_offset, y_offset, qpix_put, chroma_put); + + if(h->use_weight == 2){ + int weight0 = h->implicit_weight[refn0][refn1]; + int weight1 = 64 - weight0; + luma_weight_avg( dest_y, tmp_y, s-> linesize, 5, weight0, weight1, 0, 0); + chroma_weight_avg(dest_cb, tmp_cb, s->uvlinesize, 5, weight0, weight1, 0, 0); + chroma_weight_avg(dest_cr, tmp_cr, s->uvlinesize, 5, weight0, weight1, 0, 0); + }else{ + luma_weight_avg(dest_y, tmp_y, s->linesize, h->luma_log2_weight_denom, + h->luma_weight[0][refn0], h->luma_weight[1][refn1], + h->luma_offset[0][refn0], h->luma_offset[1][refn1]); + chroma_weight_avg(dest_cb, tmp_cb, s->uvlinesize, h->chroma_log2_weight_denom, + h->chroma_weight[0][refn0][0], h->chroma_weight[1][refn1][0], + h->chroma_offset[0][refn0][0], h->chroma_offset[1][refn1][0]); + chroma_weight_avg(dest_cr, tmp_cr, s->uvlinesize, h->chroma_log2_weight_denom, + h->chroma_weight[0][refn0][1], h->chroma_weight[1][refn1][1], + h->chroma_offset[0][refn0][1], h->chroma_offset[1][refn1][1]); + } + }else{ + int list = list1 ? 1 : 0; + int refn = h->ref_cache[list][ scan8[n] ]; + Picture *ref= &h->ref_list[list][refn]; + mc_dir_part(h, ref, n, square, chroma_height, delta, list, + dest_y, dest_cb, dest_cr, x_offset, y_offset, + qpix_put, chroma_put); + + luma_weight_op(dest_y, s->linesize, h->luma_log2_weight_denom, + h->luma_weight[list][refn], h->luma_offset[list][refn]); + if(h->use_weight_chroma){ + chroma_weight_op(dest_cb, s->uvlinesize, h->chroma_log2_weight_denom, + h->chroma_weight[list][refn][0], h->chroma_offset[list][refn][0]); + chroma_weight_op(dest_cr, s->uvlinesize, h->chroma_log2_weight_denom, + h->chroma_weight[list][refn][1], h->chroma_offset[list][refn][1]); + } + } +} + +static inline void mc_part(H264Context *h, int n, int square, int chroma_height, int delta, + uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr, + int x_offset, int y_offset, + qpel_mc_func *qpix_put, h264_chroma_mc_func chroma_put, + qpel_mc_func *qpix_avg, h264_chroma_mc_func chroma_avg, + h264_weight_func *weight_op, h264_biweight_func *weight_avg, + int list0, int list1){ + if((h->use_weight==2 && list0 && list1 + && (h->implicit_weight[ h->ref_cache[0][scan8[n]] ][ h->ref_cache[1][scan8[n]] ] != 32)) + || h->use_weight==1) + mc_part_weighted(h, n, square, chroma_height, delta, dest_y, dest_cb, dest_cr, + x_offset, y_offset, qpix_put, chroma_put, + weight_op[0], weight_op[3], weight_avg[0], weight_avg[3], list0, list1); + else + mc_part_std(h, n, square, chroma_height, delta, dest_y, dest_cb, dest_cr, + x_offset, y_offset, qpix_put, chroma_put, qpix_avg, chroma_avg, list0, list1); +} + static void hl_motion(H264Context *h, uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr, qpel_mc_func (*qpix_put)[16], h264_chroma_mc_func (*chroma_put), - qpel_mc_func (*qpix_avg)[16], h264_chroma_mc_func (*chroma_avg)){ + qpel_mc_func (*qpix_avg)[16], h264_chroma_mc_func (*chroma_avg), + h264_weight_func *weight_op, h264_biweight_func *weight_avg){ MpegEncContext * const s = &h->s; const int mb_xy= s->mb_x + s->mb_y*s->mb_stride; const int mb_type= s->current_picture.mb_type[mb_xy]; @@ -2044,20 +2469,25 @@ static void hl_motion(H264Context *h, uint8_t *dest_y, uint8_t *dest_cb, uint8_t if(IS_16X16(mb_type)){ mc_part(h, 0, 1, 8, 0, dest_y, dest_cb, dest_cr, 0, 0, qpix_put[0], chroma_put[0], qpix_avg[0], chroma_avg[0], + &weight_op[0], &weight_avg[0], IS_DIR(mb_type, 0, 0), IS_DIR(mb_type, 0, 1)); }else if(IS_16X8(mb_type)){ mc_part(h, 0, 0, 4, 8, dest_y, dest_cb, dest_cr, 0, 0, qpix_put[1], chroma_put[0], qpix_avg[1], chroma_avg[0], + &weight_op[1], &weight_avg[1], IS_DIR(mb_type, 0, 0), IS_DIR(mb_type, 0, 1)); mc_part(h, 8, 0, 4, 8, dest_y, dest_cb, dest_cr, 0, 4, qpix_put[1], chroma_put[0], qpix_avg[1], chroma_avg[0], + &weight_op[1], &weight_avg[1], IS_DIR(mb_type, 1, 0), IS_DIR(mb_type, 1, 1)); }else if(IS_8X16(mb_type)){ mc_part(h, 0, 0, 8, 8*s->linesize, dest_y, dest_cb, dest_cr, 0, 0, qpix_put[1], chroma_put[1], qpix_avg[1], chroma_avg[1], + &weight_op[2], &weight_avg[2], IS_DIR(mb_type, 0, 0), IS_DIR(mb_type, 0, 1)); mc_part(h, 4, 0, 8, 8*s->linesize, dest_y, dest_cb, dest_cr, 4, 0, qpix_put[1], chroma_put[1], qpix_avg[1], chroma_avg[1], + &weight_op[2], &weight_avg[2], IS_DIR(mb_type, 1, 0), IS_DIR(mb_type, 1, 1)); }else{ int i; @@ -2073,20 +2503,25 @@ static void hl_motion(H264Context *h, uint8_t *dest_y, uint8_t *dest_cb, uint8_t if(IS_SUB_8X8(sub_mb_type)){ mc_part(h, n, 1, 4, 0, dest_y, dest_cb, dest_cr, x_offset, y_offset, qpix_put[1], chroma_put[1], qpix_avg[1], chroma_avg[1], + &weight_op[3], &weight_avg[3], IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1)); }else if(IS_SUB_8X4(sub_mb_type)){ mc_part(h, n , 0, 2, 4, dest_y, dest_cb, dest_cr, x_offset, y_offset, qpix_put[2], chroma_put[1], qpix_avg[2], chroma_avg[1], + &weight_op[4], &weight_avg[4], IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1)); mc_part(h, n+2, 0, 2, 4, dest_y, dest_cb, dest_cr, x_offset, y_offset+2, qpix_put[2], chroma_put[1], qpix_avg[2], chroma_avg[1], + &weight_op[4], &weight_avg[4], IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1)); }else if(IS_SUB_4X8(sub_mb_type)){ mc_part(h, n , 0, 4, 4*s->linesize, dest_y, dest_cb, dest_cr, x_offset, y_offset, qpix_put[2], chroma_put[2], qpix_avg[2], chroma_avg[2], + &weight_op[5], &weight_avg[5], IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1)); mc_part(h, n+1, 0, 4, 4*s->linesize, dest_y, dest_cb, dest_cr, x_offset+2, y_offset, qpix_put[2], chroma_put[2], qpix_avg[2], chroma_avg[2], + &weight_op[5], &weight_avg[5], IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1)); }else{ int j; @@ -2096,6 +2531,7 @@ static void hl_motion(H264Context *h, uint8_t *dest_y, uint8_t *dest_cb, uint8_t int sub_y_offset= y_offset + (j&2); mc_part(h, n+j, 1, 2, 0, dest_y, dest_cb, dest_cr, sub_x_offset, sub_y_offset, qpix_put[2], chroma_put[2], qpix_avg[2], chroma_avg[2], + &weight_op[6], &weight_avg[6], IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1)); } } @@ -2112,33 +2548,33 @@ static void decode_init_vlc(H264Context *h){ init_vlc(&chroma_dc_coeff_token_vlc, CHROMA_DC_COEFF_TOKEN_VLC_BITS, 4*5, &chroma_dc_coeff_token_len [0], 1, 1, - &chroma_dc_coeff_token_bits[0], 1, 1); + &chroma_dc_coeff_token_bits[0], 1, 1, 1); for(i=0; i<4; i++){ init_vlc(&coeff_token_vlc[i], COEFF_TOKEN_VLC_BITS, 4*17, &coeff_token_len [i][0], 1, 1, - &coeff_token_bits[i][0], 1, 1); + &coeff_token_bits[i][0], 1, 1, 1); } for(i=0; i<3; i++){ init_vlc(&chroma_dc_total_zeros_vlc[i], CHROMA_DC_TOTAL_ZEROS_VLC_BITS, 4, &chroma_dc_total_zeros_len [i][0], 1, 1, - &chroma_dc_total_zeros_bits[i][0], 1, 1); + &chroma_dc_total_zeros_bits[i][0], 1, 1, 1); } for(i=0; i<15; i++){ init_vlc(&total_zeros_vlc[i], TOTAL_ZEROS_VLC_BITS, 16, &total_zeros_len [i][0], 1, 1, - &total_zeros_bits[i][0], 1, 1); + &total_zeros_bits[i][0], 1, 1, 1); } for(i=0; i<6; i++){ init_vlc(&run_vlc[i], RUN_VLC_BITS, 7, &run_len [i][0], 1, 1, - &run_bits[i][0], 1, 1); + &run_bits[i][0], 1, 1, 1); } init_vlc(&run7_vlc, RUN7_VLC_BITS, 16, &run_len [6][0], 1, 1, - &run_bits[6][0], 1, 1); + &run_bits[6][0], 1, 1, 1); } } @@ -2184,13 +2620,17 @@ static void free_tables(H264Context *h){ av_freep(&h->cbp_table); av_freep(&h->mvd_table[0]); av_freep(&h->mvd_table[1]); + av_freep(&h->direct_table); av_freep(&h->non_zero_count); av_freep(&h->slice_table_base); - av_freep(&h->top_border); + av_freep(&h->top_borders[1]); + av_freep(&h->top_borders[0]); h->slice_table= NULL; av_freep(&h->mb2b_xy); av_freep(&h->mb2b8_xy); + + av_freep(&h->s.obmc_scratchpad); } /** @@ -2206,13 +2646,15 @@ static int alloc_tables(H264Context *h){ CHECKED_ALLOCZ(h->non_zero_count , big_mb_num * 16 * sizeof(uint8_t)) CHECKED_ALLOCZ(h->slice_table_base , big_mb_num * sizeof(uint8_t)) - CHECKED_ALLOCZ(h->top_border , s->mb_width * (16+8+8) * sizeof(uint8_t)) + CHECKED_ALLOCZ(h->top_borders[0] , s->mb_width * (16+8+8) * sizeof(uint8_t)) + CHECKED_ALLOCZ(h->top_borders[1] , s->mb_width * (16+8+8) * sizeof(uint8_t)) + CHECKED_ALLOCZ(h->cbp_table, big_mb_num * sizeof(uint16_t)) if( h->pps.cabac ) { CHECKED_ALLOCZ(h->chroma_pred_mode_table, big_mb_num * sizeof(uint8_t)) - CHECKED_ALLOCZ(h->cbp_table, big_mb_num * sizeof(uint16_t)) CHECKED_ALLOCZ(h->mvd_table[0], 32*big_mb_num * sizeof(uint16_t)); CHECKED_ALLOCZ(h->mvd_table[1], 32*big_mb_num * sizeof(uint16_t)); + CHECKED_ALLOCZ(h->direct_table, 32*big_mb_num * sizeof(uint8_t)); } memset(h->slice_table_base, -1, big_mb_num * sizeof(uint8_t)); @@ -2230,7 +2672,9 @@ static int alloc_tables(H264Context *h){ h->mb2b8_xy[mb_xy]= b8_xy; } } - + + s->obmc_scratchpad = NULL; + return 0; fail: free_tables(h); @@ -2269,15 +2713,12 @@ static int decode_init(AVCodecContext *avctx){ decode_init_vlc(h); - if(avctx->codec_tag != 0x31637661) // avc1 - h->is_avc = 0; - else { - if((avctx->extradata_size == 0) || (avctx->extradata == NULL)) { - av_log(avctx, AV_LOG_ERROR, "AVC codec requires avcC data\n"); - return -1; - } + if(avctx->extradata_size > 0 && avctx->extradata && + *(char *)avctx->extradata == 1){ h->is_avc = 1; h->got_avcC = 0; + } else { + h->is_avc = 0; } return 0; @@ -2289,19 +2730,25 @@ static void frame_start(H264Context *h){ MPV_frame_start(s, s->avctx); ff_er_frame_start(s); - h->mmco_index=0; assert(s->linesize && s->uvlinesize); for(i=0; i<16; i++){ h->block_offset[i]= 4*((scan8[i] - scan8[0])&7) + 4*s->linesize*((scan8[i] - scan8[0])>>3); - h->chroma_subblock_offset[i]= 2*((scan8[i] - scan8[0])&7) + 2*s->uvlinesize*((scan8[i] - scan8[0])>>3); + h->block_offset[24+i]= 4*((scan8[i] - scan8[0])&7) + 8*s->linesize*((scan8[i] - scan8[0])>>3); } for(i=0; i<4; i++){ h->block_offset[16+i]= h->block_offset[20+i]= 4*((scan8[i] - scan8[0])&7) + 4*s->uvlinesize*((scan8[i] - scan8[0])>>3); + h->block_offset[24+16+i]= + h->block_offset[24+20+i]= 4*((scan8[i] - scan8[0])&7) + 8*s->uvlinesize*((scan8[i] - scan8[0])>>3); } + /* can't be in alloc_tables because linesize isn't known there. + * FIXME: redo bipred weight to not require extra buffer? */ + if(!s->obmc_scratchpad) + s->obmc_scratchpad = av_malloc(16*s->linesize + 2*8*s->uvlinesize); + // s->decode= (s->flags&CODEC_FLAG_PSNR) || !s->encoding || s->current_picture.reference /*|| h->contains_intra*/ || 1; } @@ -2313,23 +2760,25 @@ static inline void backup_mb_border(H264Context *h, uint8_t *src_y, uint8_t *src src_cb -= uvlinesize; src_cr -= uvlinesize; - h->left_border[0]= h->top_border[s->mb_x][15]; + // There is two lines saved, the line above the the top macroblock of a pair, + // and the line above the bottom macroblock + h->left_border[0]= h->top_borders[0][s->mb_x][15]; for(i=1; i<17; i++){ h->left_border[i]= src_y[15+i* linesize]; } - *(uint64_t*)(h->top_border[s->mb_x]+0)= *(uint64_t*)(src_y + 16*linesize); - *(uint64_t*)(h->top_border[s->mb_x]+8)= *(uint64_t*)(src_y +8+16*linesize); + *(uint64_t*)(h->top_borders[0][s->mb_x]+0)= *(uint64_t*)(src_y + 16*linesize); + *(uint64_t*)(h->top_borders[0][s->mb_x]+8)= *(uint64_t*)(src_y +8+16*linesize); if(!(s->flags&CODEC_FLAG_GRAY)){ - h->left_border[17 ]= h->top_border[s->mb_x][16+7]; - h->left_border[17+9]= h->top_border[s->mb_x][24+7]; + h->left_border[17 ]= h->top_borders[0][s->mb_x][16+7]; + h->left_border[17+9]= h->top_borders[0][s->mb_x][24+7]; for(i=1; i<9; i++){ h->left_border[i+17 ]= src_cb[7+i*uvlinesize]; h->left_border[i+17+9]= src_cr[7+i*uvlinesize]; } - *(uint64_t*)(h->top_border[s->mb_x]+16)= *(uint64_t*)(src_cb+8*uvlinesize); - *(uint64_t*)(h->top_border[s->mb_x]+24)= *(uint64_t*)(src_cr+8*uvlinesize); + *(uint64_t*)(h->top_borders[0][s->mb_x]+16)= *(uint64_t*)(src_cb+8*uvlinesize); + *(uint64_t*)(h->top_borders[0][s->mb_x]+24)= *(uint64_t*)(src_cr+8*uvlinesize); } } @@ -2357,8 +2806,8 @@ b= t; } if(deblock_top){ - XCHG(*(uint64_t*)(h->top_border[s->mb_x]+0), *(uint64_t*)(src_y +1), temp64, xchg); - XCHG(*(uint64_t*)(h->top_border[s->mb_x]+8), *(uint64_t*)(src_y +9), temp64, 1); + XCHG(*(uint64_t*)(h->top_borders[0][s->mb_x]+0), *(uint64_t*)(src_y +1), temp64, xchg); + XCHG(*(uint64_t*)(h->top_borders[0][s->mb_x]+8), *(uint64_t*)(src_y +9), temp64, 1); } if(!(s->flags&CODEC_FLAG_GRAY)){ @@ -2369,8 +2818,93 @@ b= t; } } if(deblock_top){ - XCHG(*(uint64_t*)(h->top_border[s->mb_x]+16), *(uint64_t*)(src_cb+1), temp64, 1); - XCHG(*(uint64_t*)(h->top_border[s->mb_x]+24), *(uint64_t*)(src_cr+1), temp64, 1); + XCHG(*(uint64_t*)(h->top_borders[0][s->mb_x]+16), *(uint64_t*)(src_cb+1), temp64, 1); + XCHG(*(uint64_t*)(h->top_borders[0][s->mb_x]+24), *(uint64_t*)(src_cr+1), temp64, 1); + } + } +} + +static inline void backup_pair_border(H264Context *h, uint8_t *src_y, uint8_t *src_cb, uint8_t *src_cr, int linesize, int uvlinesize){ + MpegEncContext * const s = &h->s; + int i; + + src_y -= 2 * linesize; + src_cb -= 2 * uvlinesize; + src_cr -= 2 * uvlinesize; + + // There is two lines saved, the line above the the top macroblock of a pair, + // and the line above the bottom macroblock + h->left_border[0]= h->top_borders[0][s->mb_x][15]; + h->left_border[1]= h->top_borders[1][s->mb_x][15]; + for(i=2; i<34; i++){ + h->left_border[i]= src_y[15+i* linesize]; + } + + *(uint64_t*)(h->top_borders[0][s->mb_x]+0)= *(uint64_t*)(src_y + 32*linesize); + *(uint64_t*)(h->top_borders[0][s->mb_x]+8)= *(uint64_t*)(src_y +8+32*linesize); + *(uint64_t*)(h->top_borders[1][s->mb_x]+0)= *(uint64_t*)(src_y + 33*linesize); + *(uint64_t*)(h->top_borders[1][s->mb_x]+8)= *(uint64_t*)(src_y +8+33*linesize); + + if(!(s->flags&CODEC_FLAG_GRAY)){ + h->left_border[34 ]= h->top_borders[0][s->mb_x][16+7]; + h->left_border[34+ 1]= h->top_borders[1][s->mb_x][16+7]; + h->left_border[34+18 ]= h->top_borders[0][s->mb_x][24+7]; + h->left_border[34+18+1]= h->top_borders[1][s->mb_x][24+7]; + for(i=2; i<18; i++){ + h->left_border[i+34 ]= src_cb[7+i*uvlinesize]; + h->left_border[i+34+18]= src_cr[7+i*uvlinesize]; + } + *(uint64_t*)(h->top_borders[0][s->mb_x]+16)= *(uint64_t*)(src_cb+16*uvlinesize); + *(uint64_t*)(h->top_borders[0][s->mb_x]+24)= *(uint64_t*)(src_cr+16*uvlinesize); + *(uint64_t*)(h->top_borders[1][s->mb_x]+16)= *(uint64_t*)(src_cb+17*uvlinesize); + *(uint64_t*)(h->top_borders[1][s->mb_x]+24)= *(uint64_t*)(src_cr+17*uvlinesize); + } +} + +static inline void xchg_pair_border(H264Context *h, uint8_t *src_y, uint8_t *src_cb, uint8_t *src_cr, int linesize, int uvlinesize, int xchg){ + MpegEncContext * const s = &h->s; + int temp8, i; + uint64_t temp64; + int deblock_left = (s->mb_x > 0); + int deblock_top = (s->mb_y > 0); + + tprintf("xchg_pair_border: src_y:%p src_cb:%p src_cr:%p ls:%d uvls:%d\n", src_y, src_cb, src_cr, linesize, uvlinesize); + + src_y -= 2 * linesize + 1; + src_cb -= 2 * uvlinesize + 1; + src_cr -= 2 * uvlinesize + 1; + +#define XCHG(a,b,t,xchg)\ +t= a;\ +if(xchg)\ + a= b;\ +b= t; + + if(deblock_left){ + for(i = (!deblock_top)<<1; i<34; i++){ + XCHG(h->left_border[i ], src_y [i* linesize], temp8, xchg); + } + } + + if(deblock_top){ + XCHG(*(uint64_t*)(h->top_borders[0][s->mb_x]+0), *(uint64_t*)(src_y +1), temp64, xchg); + XCHG(*(uint64_t*)(h->top_borders[0][s->mb_x]+8), *(uint64_t*)(src_y +9), temp64, 1); + XCHG(*(uint64_t*)(h->top_borders[1][s->mb_x]+0), *(uint64_t*)(src_y +1 +linesize), temp64, xchg); + XCHG(*(uint64_t*)(h->top_borders[1][s->mb_x]+8), *(uint64_t*)(src_y +9 +linesize), temp64, 1); + } + + if(!(s->flags&CODEC_FLAG_GRAY)){ + if(deblock_left){ + for(i = (!deblock_top) << 1; i<18; i++){ + XCHG(h->left_border[i+34 ], src_cb[i*uvlinesize], temp8, xchg); + XCHG(h->left_border[i+34+18], src_cr[i*uvlinesize], temp8, xchg); + } + } + if(deblock_top){ + XCHG(*(uint64_t*)(h->top_borders[0][s->mb_x]+16), *(uint64_t*)(src_cb+1), temp64, 1); + XCHG(*(uint64_t*)(h->top_borders[0][s->mb_x]+24), *(uint64_t*)(src_cr+1), temp64, 1); + XCHG(*(uint64_t*)(h->top_borders[1][s->mb_x]+16), *(uint64_t*)(src_cb+1 +uvlinesize), temp64, 1); + XCHG(*(uint64_t*)(h->top_borders[1][s->mb_x]+24), *(uint64_t*)(src_cr+1 +uvlinesize), temp64, 1); } } } @@ -2384,13 +2918,12 @@ static void hl_decode_mb(H264Context *h){ uint8_t *dest_y, *dest_cb, *dest_cr; int linesize, uvlinesize /*dct_offset*/; int i; + int *block_offset = &h->block_offset[0]; + const unsigned int bottom = mb_y & 1; if(!s->decode) return; - if(s->mb_skiped){ - } - dest_y = s->current_picture.data[0] + (mb_y * 16* s->linesize ) + mb_x * 16; dest_cb = s->current_picture.data[1] + (mb_y * 8 * s->uvlinesize) + mb_x * 8; dest_cr = s->current_picture.data[2] + (mb_y * 8 * s->uvlinesize) + mb_x * 8; @@ -2398,10 +2931,11 @@ static void hl_decode_mb(H264Context *h){ if (h->mb_field_decoding_flag) { linesize = s->linesize * 2; uvlinesize = s->uvlinesize * 2; + block_offset = &h->block_offset[24]; if(mb_y&1){ //FIXME move out of this func? dest_y -= s->linesize*15; - dest_cb-= s->linesize*7; - dest_cr-= s->linesize*7; + dest_cb-= s->uvlinesize*7; + dest_cr-= s->uvlinesize*7; } } else { linesize = s->linesize; @@ -2409,112 +2943,195 @@ static void hl_decode_mb(H264Context *h){ // dct_offset = s->linesize * 16; } - if(IS_INTRA(mb_type)){ - if(h->deblocking_filter) - xchg_mb_border(h, dest_y, dest_cb, dest_cr, linesize, uvlinesize, 1); + if (IS_INTRA_PCM(mb_type)) { + unsigned int x, y; - if(!(s->flags&CODEC_FLAG_GRAY)){ - h->pred8x8[ h->chroma_pred_mode ](dest_cb, uvlinesize); - h->pred8x8[ h->chroma_pred_mode ](dest_cr, uvlinesize); + // The pixels are stored in h->mb array in the same order as levels, + // copy them in output in the correct order. + for(i=0; i<16; i++) { + for (y=0; y<4; y++) { + for (x=0; x<4; x++) { + *(dest_y + block_offset[i] + y*linesize + x) = h->mb[i*16+y*4+x]; + } + } + } + for(i=16; i<16+4; i++) { + for (y=0; y<4; y++) { + for (x=0; x<4; x++) { + *(dest_cb + block_offset[i] + y*uvlinesize + x) = h->mb[i*16+y*4+x]; + } + } + } + for(i=20; i<20+4; i++) { + for (y=0; y<4; y++) { + for (x=0; x<4; x++) { + *(dest_cr + block_offset[i] + y*uvlinesize + x) = h->mb[i*16+y*4+x]; + } + } } + } else { + if(IS_INTRA(mb_type)){ + if(h->deblocking_filter) { + if (h->mb_aff_frame) { + if (!bottom) + xchg_pair_border(h, dest_y, dest_cb, dest_cr, s->linesize, s->uvlinesize, 1); + } else { + xchg_mb_border(h, dest_y, dest_cb, dest_cr, linesize, uvlinesize, 1); + } + } - if(IS_INTRA4x4(mb_type)){ - if(!s->encoding){ - for(i=0; i<16; i++){ - uint8_t * const ptr= dest_y + h->block_offset[i]; - uint8_t *topright= ptr + 4 - linesize; - const int topright_avail= (h->topright_samples_available<<i)&0x8000; - const int dir= h->intra4x4_pred_mode_cache[ scan8[i] ]; - int tr; - - if(!topright_avail){ - /* xine: avoid (negative) buffer overflow */ - tr= (!mb_y && linesize > h->block_offset[i]) ? - ptr[3]*0x01010101 : - ptr[3 - linesize]*0x01010101; - topright= (uint8_t*) &tr; - }else if(i==5 && h->deblocking_filter){ - tr= *(uint32_t*)h->top_border[mb_x+1]; - topright= (uint8_t*) &tr; - } + if(!(s->flags&CODEC_FLAG_GRAY)){ + h->pred8x8[ h->chroma_pred_mode ](dest_cb, uvlinesize); + h->pred8x8[ h->chroma_pred_mode ](dest_cr, uvlinesize); + } - h->pred4x4[ dir ](ptr, topright, linesize); - if(h->non_zero_count_cache[ scan8[i] ]){ - if(s->codec_id == CODEC_ID_H264) - h264_add_idct_c(ptr, h->mb + i*16, linesize); - else - svq3_add_idct_c(ptr, h->mb + i*16, linesize, s->qscale, 0); + if(IS_INTRA4x4(mb_type)){ + if(!s->encoding){ + for(i=0; i<16; i++){ + uint8_t * const ptr= dest_y + block_offset[i]; + uint8_t *topright; + const int dir= h->intra4x4_pred_mode_cache[ scan8[i] ]; + int tr; + + if(dir == DIAG_DOWN_LEFT_PRED || dir == VERT_LEFT_PRED){ + const int topright_avail= (h->topright_samples_available<<i)&0x8000; + assert(mb_y || linesize <= block_offset[i]); + if(!topright_avail){ + tr= ptr[3 - linesize]*0x01010101; + topright= (uint8_t*) &tr; + }else if(i==5 && h->deblocking_filter){ + tr= *(uint32_t*)h->top_borders[h->mb_aff_frame ? IS_INTERLACED(mb_type) ? bottom : 1 : 0][mb_x+1]; + topright= (uint8_t*) &tr; + }else + topright= ptr + 4 - linesize; + }else + topright= NULL; + + h->pred4x4[ dir ](ptr, topright, linesize); + if(h->non_zero_count_cache[ scan8[i] ]){ + if(s->codec_id == CODEC_ID_H264) + s->dsp.h264_idct_add(ptr, h->mb + i*16, linesize); + else + svq3_add_idct_c(ptr, h->mb + i*16, linesize, s->qscale, 0); + } } } + }else{ + h->pred16x16[ h->intra16x16_pred_mode ](dest_y , linesize); + if(s->codec_id == CODEC_ID_H264) + h264_luma_dc_dequant_idct_c(h->mb, s->qscale); + else + svq3_luma_dc_dequant_idct_c(h->mb, s->qscale); } - }else{ - h->pred16x16[ h->intra16x16_pred_mode ](dest_y , linesize); - if(s->codec_id == CODEC_ID_H264) - h264_luma_dc_dequant_idct_c(h->mb, s->qscale); - else - svq3_luma_dc_dequant_idct_c(h->mb, s->qscale); + if(h->deblocking_filter) { + if (h->mb_aff_frame) { + if (bottom) { + uint8_t *pair_dest_y = s->current_picture.data[0] + ((mb_y-1) * 16* s->linesize ) + mb_x * 16; + uint8_t *pair_dest_cb = s->current_picture.data[1] + ((mb_y-1) * 8 * s->uvlinesize) + mb_x * 8; + uint8_t *pair_dest_cr = s->current_picture.data[2] + ((mb_y-1) * 8 * s->uvlinesize) + mb_x * 8; + s->mb_y--; + xchg_pair_border(h, pair_dest_y, pair_dest_cb, pair_dest_cr, s->linesize, s->uvlinesize, 0); + s->mb_y++; + } + } else { + xchg_mb_border(h, dest_y, dest_cb, dest_cr, linesize, uvlinesize, 0); + } + } + }else if(s->codec_id == CODEC_ID_H264){ + hl_motion(h, dest_y, dest_cb, dest_cr, + s->dsp.put_h264_qpel_pixels_tab, s->dsp.put_h264_chroma_pixels_tab, + s->dsp.avg_h264_qpel_pixels_tab, s->dsp.avg_h264_chroma_pixels_tab, + s->dsp.weight_h264_pixels_tab, s->dsp.biweight_h264_pixels_tab); } - if(h->deblocking_filter) - xchg_mb_border(h, dest_y, dest_cb, dest_cr, linesize, uvlinesize, 0); - }else if(s->codec_id == CODEC_ID_H264){ - hl_motion(h, dest_y, dest_cb, dest_cr, - s->dsp.put_h264_qpel_pixels_tab, s->dsp.put_h264_chroma_pixels_tab, - s->dsp.avg_h264_qpel_pixels_tab, s->dsp.avg_h264_chroma_pixels_tab); - } - if(!IS_INTRA4x4(mb_type)){ - if(s->codec_id == CODEC_ID_H264){ - for(i=0; i<16; i++){ - if(h->non_zero_count_cache[ scan8[i] ] || h->mb[i*16]){ //FIXME benchmark weird rule, & below - uint8_t * const ptr= dest_y + h->block_offset[i]; - h264_add_idct_c(ptr, h->mb + i*16, linesize); + if(!IS_INTRA4x4(mb_type)){ + if(s->codec_id == CODEC_ID_H264){ + for(i=0; i<16; i++){ + if(h->non_zero_count_cache[ scan8[i] ] || h->mb[i*16]){ //FIXME benchmark weird rule, & below + uint8_t * const ptr= dest_y + block_offset[i]; + s->dsp.h264_idct_add(ptr, h->mb + i*16, linesize); + } } - } - }else{ - for(i=0; i<16; i++){ - if(h->non_zero_count_cache[ scan8[i] ] || h->mb[i*16]){ //FIXME benchmark weird rule, & below - uint8_t * const ptr= dest_y + h->block_offset[i]; - svq3_add_idct_c(ptr, h->mb + i*16, linesize, s->qscale, IS_INTRA(mb_type) ? 1 : 0); + }else{ + for(i=0; i<16; i++){ + if(h->non_zero_count_cache[ scan8[i] ] || h->mb[i*16]){ //FIXME benchmark weird rule, & below + uint8_t * const ptr= dest_y + block_offset[i]; + svq3_add_idct_c(ptr, h->mb + i*16, linesize, s->qscale, IS_INTRA(mb_type) ? 1 : 0); + } } } } - } - if(!(s->flags&CODEC_FLAG_GRAY)){ - chroma_dc_dequant_idct_c(h->mb + 16*16, h->chroma_qp); - chroma_dc_dequant_idct_c(h->mb + 16*16+4*16, h->chroma_qp); - if(s->codec_id == CODEC_ID_H264){ - for(i=16; i<16+4; i++){ - if(h->non_zero_count_cache[ scan8[i] ] || h->mb[i*16]){ - uint8_t * const ptr= dest_cb + h->block_offset[i]; - h264_add_idct_c(ptr, h->mb + i*16, uvlinesize); + if(!(s->flags&CODEC_FLAG_GRAY)){ + chroma_dc_dequant_idct_c(h->mb + 16*16, h->chroma_qp); + chroma_dc_dequant_idct_c(h->mb + 16*16+4*16, h->chroma_qp); + if(s->codec_id == CODEC_ID_H264){ + for(i=16; i<16+4; i++){ + if(h->non_zero_count_cache[ scan8[i] ] || h->mb[i*16]){ + uint8_t * const ptr= dest_cb + block_offset[i]; + s->dsp.h264_idct_add(ptr, h->mb + i*16, uvlinesize); + } } - } - for(i=20; i<20+4; i++){ - if(h->non_zero_count_cache[ scan8[i] ] || h->mb[i*16]){ - uint8_t * const ptr= dest_cr + h->block_offset[i]; - h264_add_idct_c(ptr, h->mb + i*16, uvlinesize); + for(i=20; i<20+4; i++){ + if(h->non_zero_count_cache[ scan8[i] ] || h->mb[i*16]){ + uint8_t * const ptr= dest_cr + block_offset[i]; + s->dsp.h264_idct_add(ptr, h->mb + i*16, uvlinesize); + } } - } - }else{ - for(i=16; i<16+4; i++){ - if(h->non_zero_count_cache[ scan8[i] ] || h->mb[i*16]){ - uint8_t * const ptr= dest_cb + h->block_offset[i]; - svq3_add_idct_c(ptr, h->mb + i*16, uvlinesize, chroma_qp[s->qscale + 12] - 12, 2); + }else{ + for(i=16; i<16+4; i++){ + if(h->non_zero_count_cache[ scan8[i] ] || h->mb[i*16]){ + uint8_t * const ptr= dest_cb + block_offset[i]; + svq3_add_idct_c(ptr, h->mb + i*16, uvlinesize, chroma_qp[s->qscale + 12] - 12, 2); + } } - } - for(i=20; i<20+4; i++){ - if(h->non_zero_count_cache[ scan8[i] ] || h->mb[i*16]){ - uint8_t * const ptr= dest_cr + h->block_offset[i]; - svq3_add_idct_c(ptr, h->mb + i*16, uvlinesize, chroma_qp[s->qscale + 12] - 12, 2); + for(i=20; i<20+4; i++){ + if(h->non_zero_count_cache[ scan8[i] ] || h->mb[i*16]){ + uint8_t * const ptr= dest_cr + block_offset[i]; + svq3_add_idct_c(ptr, h->mb + i*16, uvlinesize, chroma_qp[s->qscale + 12] - 12, 2); + } } } } } if(h->deblocking_filter) { - backup_mb_border(h, dest_y, dest_cb, dest_cr, linesize, uvlinesize); - filter_mb(h, mb_x, mb_y, dest_y, dest_cb, dest_cr); + if (h->mb_aff_frame) { + const int mb_y = s->mb_y - 1; + uint8_t *pair_dest_y, *pair_dest_cb, *pair_dest_cr; + const int mb_xy= mb_x + mb_y*s->mb_stride; + const int mb_type_top = s->current_picture.mb_type[mb_xy]; + const int mb_type_bottom= s->current_picture.mb_type[mb_xy+s->mb_stride]; + uint8_t tmp = s->current_picture.data[1][384]; + if (!bottom) return; + pair_dest_y = s->current_picture.data[0] + (mb_y * 16* s->linesize ) + mb_x * 16; + pair_dest_cb = s->current_picture.data[1] + (mb_y * 8 * s->uvlinesize) + mb_x * 8; + pair_dest_cr = s->current_picture.data[2] + (mb_y * 8 * s->uvlinesize) + mb_x * 8; + + backup_pair_border(h, pair_dest_y, pair_dest_cb, pair_dest_cr, s->linesize, s->uvlinesize); + // TODO deblock a pair + // top + s->mb_y--; + tprintf("call mbaff filter_mb mb_x:%d mb_y:%d pair_dest_y = %p, dest_y = %p\n", mb_x, mb_y, pair_dest_y, dest_y); + fill_caches(h, mb_type_top, 1); //FIXME dont fill stuff which isnt used by filter_mb + filter_mb(h, mb_x, mb_y, pair_dest_y, pair_dest_cb, pair_dest_cr, linesize, uvlinesize); + if (tmp != s->current_picture.data[1][384]) { + tprintf("modified pixel 8,1 (1)\n"); + } + // bottom + s->mb_y++; + tprintf("call mbaff filter_mb\n"); + fill_caches(h, mb_type_bottom, 1); //FIXME dont fill stuff which isnt used by filter_mb + filter_mb(h, mb_x, mb_y+1, dest_y, dest_cb, dest_cr, linesize, uvlinesize); + if (tmp != s->current_picture.data[1][384]) { + tprintf("modified pixel 8,1 (2)\n"); + } + } else { + tprintf("call filter_mb\n"); + backup_mb_border(h, dest_y, dest_cb, dest_cr, linesize, uvlinesize); + fill_caches(h, mb_type, 1); //FIXME dont fill stuff which isnt used by filter_mb + filter_mb(h, mb_x, mb_y, dest_y, dest_cb, dest_cr, linesize, uvlinesize); + } } } @@ -2524,15 +3141,17 @@ static void hl_decode_mb(H264Context *h){ static int fill_default_ref_list(H264Context *h){ MpegEncContext * const s = &h->s; int i; - Picture sorted_short_ref[16]; + int smallest_poc_greater_than_current = -1; + Picture sorted_short_ref[32]; if(h->slice_type==B_TYPE){ int out_i; int limit= -1; + /* sort frame according to poc in B slice */ for(out_i=0; out_i<h->short_ref_count; out_i++){ int best_i=-1; - int best_poc=-1; + int best_poc=INT_MAX; for(i=0; i<h->short_ref_count; i++){ const int poc= h->short_ref[i]->poc; @@ -2546,37 +3165,47 @@ static int fill_default_ref_list(H264Context *h){ limit= best_poc; sorted_short_ref[out_i]= *h->short_ref[best_i]; + tprintf("sorted poc: %d->%d poc:%d fn:%d\n", best_i, out_i, sorted_short_ref[out_i].poc, sorted_short_ref[out_i].frame_num); + if (-1 == smallest_poc_greater_than_current) { + if (h->short_ref[best_i]->poc >= s->current_picture_ptr->poc) { + smallest_poc_greater_than_current = out_i; + } + } } } if(s->picture_structure == PICT_FRAME){ if(h->slice_type==B_TYPE){ - const int current_poc= s->current_picture_ptr->poc; int list; + tprintf("current poc: %d, smallest_poc_greater_than_current: %d\n", s->current_picture_ptr->poc, smallest_poc_greater_than_current); + // find the largest poc for(list=0; list<2; list++){ - int index=0; - - for(i=0; i<h->short_ref_count && index < h->ref_count[list]; i++){ - const int i2= list ? h->short_ref_count - i - 1 : i; - const int poc= sorted_short_ref[i2].poc; - - if(sorted_short_ref[i2].reference != 3) continue; //FIXME refernce field shit - - if((list==1 && poc > current_poc) || (list==0 && poc < current_poc)){ - h->default_ref_list[list][index ]= sorted_short_ref[i2]; - h->default_ref_list[list][index++].pic_id= sorted_short_ref[i2].frame_num; + int index = 0; + int j= -99; + int step= list ? -1 : 1; + + for(i=0; i<h->short_ref_count && index < h->ref_count[list]; i++, j+=step) { + while(j<0 || j>= h->short_ref_count){ + step = -step; + j= smallest_poc_greater_than_current + (step>>1); } + if(sorted_short_ref[j].reference != 3) continue; + h->default_ref_list[list][index ]= sorted_short_ref[j]; + h->default_ref_list[list][index++].pic_id= sorted_short_ref[j].frame_num; } - for(i=0; i<h->long_ref_count && index < h->ref_count[ list ]; i++){ + for(i = 0; i < 16 && index < h->ref_count[ list ]; i++){ + if(h->long_ref[i] == NULL) continue; if(h->long_ref[i]->reference != 3) continue; h->default_ref_list[ list ][index ]= *h->long_ref[i]; h->default_ref_list[ list ][index++].pic_id= i;; } - if(h->long_ref_count > 1 && h->short_ref_count==0){ + if(list && (smallest_poc_greater_than_current<=0 || smallest_poc_greater_than_current>=h->short_ref_count) && (1 < index)){ + // swap the two first elements of L1 when + // L0 and L1 are identical Picture temp= h->default_ref_list[1][0]; h->default_ref_list[1][0] = h->default_ref_list[1][1]; h->default_ref_list[1][0] = temp; @@ -2587,12 +3216,13 @@ static int fill_default_ref_list(H264Context *h){ } }else{ int index=0; - for(i=0; i<h->short_ref_count && index < h->ref_count[0]; i++){ + for(i=0; i<h->short_ref_count; i++){ if(h->short_ref[i]->reference != 3) continue; //FIXME refernce field shit h->default_ref_list[0][index ]= *h->short_ref[i]; h->default_ref_list[0][index++].pic_id= h->short_ref[i]->frame_num; } - for(i=0; i<h->long_ref_count && index < h->ref_count[0]; i++){ + for(i = 0; i < 16; i++){ + if(h->long_ref[i] == NULL) continue; if(h->long_ref[i]->reference != 3) continue; h->default_ref_list[0][index ]= *h->long_ref[i]; h->default_ref_list[0][index++].pic_id= i;; @@ -2606,13 +3236,28 @@ static int fill_default_ref_list(H264Context *h){ //FIXME second field balh } } +#ifdef TRACE + for (i=0; i<h->ref_count[0]; i++) { + tprintf("List0: %s fn:%d 0x%p\n", (h->default_ref_list[0][i].long_ref ? "LT" : "ST"), h->default_ref_list[0][i].pic_id, h->default_ref_list[0][i].data[0]); + } + if(h->slice_type==B_TYPE){ + for (i=0; i<h->ref_count[1]; i++) { + tprintf("List1: %s fn:%d 0x%p\n", (h->default_ref_list[1][i].long_ref ? "LT" : "ST"), h->default_ref_list[1][i].pic_id, h->default_ref_list[0][i].data[0]); + } + } +#endif return 0; } +static void print_short_term(H264Context *h); +static void print_long_term(H264Context *h); + static int decode_ref_pic_list_reordering(H264Context *h){ MpegEncContext * const s = &h->s; int list; + print_short_term(h); + print_long_term(h); if(h->slice_type==I_TYPE || h->slice_type==SI_TYPE) return 0; //FIXME move beofre func for(list=0; list<2; list++){ @@ -2626,7 +3271,10 @@ static int decode_ref_pic_list_reordering(H264Context *h){ int reordering_of_pic_nums_idc= get_ue_golomb(&s->gb); int pic_id; int i; + Picture *ref = NULL; + if(reordering_of_pic_nums_idc==3) + break; if(index >= h->ref_count[list]){ av_log(h->s.avctx, AV_LOG_ERROR, "reference count overflow\n"); @@ -2646,32 +3294,23 @@ static int decode_ref_pic_list_reordering(H264Context *h){ else pred+= abs_diff_pic_num; pred &= h->max_pic_num - 1; - for(i= h->ref_count[list]-1; i>=index; i--){ - if(h->ref_list[list][i].pic_id == pred && h->ref_list[list][i].long_ref==0) + for(i= h->short_ref_count-1; i>=0; i--){ + ref = h->short_ref[i]; + if(ref->data[0] != NULL && ref->frame_num == pred && ref->long_ref == 0) // ignore non existing pictures by testing data[0] pointer break; } }else{ pic_id= get_ue_golomb(&s->gb); //long_term_pic_idx - - for(i= h->ref_count[list]-1; i>=index; i--){ - if(h->ref_list[list][i].pic_id == pic_id && h->ref_list[list][i].long_ref==1) - break; - } + ref = h->long_ref[pic_id]; } - if(i < index){ + if (i < 0) { av_log(h->s.avctx, AV_LOG_ERROR, "reference picture missing during reorder\n"); memset(&h->ref_list[list][index], 0, sizeof(Picture)); //FIXME - }else if(i > index){ - Picture tmp= h->ref_list[list][i]; - for(; i>index; i--){ - h->ref_list[list][i]= h->ref_list[list][i-1]; - } - h->ref_list[list][index]= tmp; + } else { + h->ref_list[list][index]= *ref; } - }else if(reordering_of_pic_nums_idc==3) - break; - else{ + }else{ av_log(h->s.avctx, AV_LOG_ERROR, "illegal reordering_of_pic_nums_idc\n"); return -1; } @@ -2680,15 +3319,24 @@ static int decode_ref_pic_list_reordering(H264Context *h){ if(h->slice_type!=B_TYPE) break; } + + if(h->slice_type==B_TYPE && !h->direct_spatial_mv_pred) + direct_dist_scale_factor(h); + direct_ref_list_init(h); return 0; } static int pred_weight_table(H264Context *h){ MpegEncContext * const s = &h->s; int list, i; + int luma_def, chroma_def; + h->use_weight= 0; + h->use_weight_chroma= 0; h->luma_log2_weight_denom= get_ue_golomb(&s->gb); h->chroma_log2_weight_denom= get_ue_golomb(&s->gb); + luma_def = 1<<h->luma_log2_weight_denom; + chroma_def = 1<<h->chroma_log2_weight_denom; for(list=0; list<2; list++){ for(i=0; i<h->ref_count[list]; i++){ @@ -2698,6 +3346,12 @@ static int pred_weight_table(H264Context *h){ if(luma_weight_flag){ h->luma_weight[list][i]= get_se_golomb(&s->gb); h->luma_offset[list][i]= get_se_golomb(&s->gb); + if( h->luma_weight[list][i] != luma_def + || h->luma_offset[list][i] != 0) + h->use_weight= 1; + }else{ + h->luma_weight[list][i]= luma_def; + h->luma_offset[list][i]= 0; } chroma_weight_flag= get_bits1(&s->gb); @@ -2706,28 +3360,91 @@ static int pred_weight_table(H264Context *h){ for(j=0; j<2; j++){ h->chroma_weight[list][i][j]= get_se_golomb(&s->gb); h->chroma_offset[list][i][j]= get_se_golomb(&s->gb); + if( h->chroma_weight[list][i][j] != chroma_def + || h->chroma_offset[list][i][j] != 0) + h->use_weight_chroma= 1; + } + }else{ + int j; + for(j=0; j<2; j++){ + h->chroma_weight[list][i][j]= chroma_def; + h->chroma_offset[list][i][j]= 0; } } } if(h->slice_type != B_TYPE) break; } + h->use_weight= h->use_weight || h->use_weight_chroma; return 0; } +static void implicit_weight_table(H264Context *h){ + MpegEncContext * const s = &h->s; + int ref0, ref1; + int cur_poc = s->current_picture_ptr->poc; + + if( h->ref_count[0] == 1 && h->ref_count[1] == 1 + && h->ref_list[0][0].poc + h->ref_list[1][0].poc == 2*cur_poc){ + h->use_weight= 0; + h->use_weight_chroma= 0; + return; + } + + h->use_weight= 2; + h->use_weight_chroma= 2; + h->luma_log2_weight_denom= 5; + h->chroma_log2_weight_denom= 5; + + /* FIXME: MBAFF */ + for(ref0=0; ref0 < h->ref_count[0]; ref0++){ + int poc0 = h->ref_list[0][ref0].poc; + for(ref1=0; ref1 < h->ref_count[1]; ref1++){ + int poc1 = h->ref_list[1][ref1].poc; + int td = clip(poc1 - poc0, -128, 127); + if(td){ + int tb = clip(cur_poc - poc0, -128, 127); + int tx = (16384 + (ABS(td) >> 1)) / td; + int dist_scale_factor = clip((tb*tx + 32) >> 6, -1024, 1023) >> 2; + if(dist_scale_factor < -64 || dist_scale_factor > 128) + h->implicit_weight[ref0][ref1] = 32; + else + h->implicit_weight[ref0][ref1] = 64 - dist_scale_factor; + }else + h->implicit_weight[ref0][ref1] = 32; + } + } +} + +static inline void unreference_pic(H264Context *h, Picture *pic){ + int i; + pic->reference=0; + if(pic == h->delayed_output_pic) + pic->reference=1; + else{ + for(i = 0; h->delayed_pic[i]; i++) + if(pic == h->delayed_pic[i]){ + pic->reference=1; + break; + } + } +} + /** - * instantaneos decoder refresh. + * instantaneous decoder refresh. */ static void idr(H264Context *h){ int i; - for(i=0; i<h->long_ref_count; i++){ - h->long_ref[i]->reference=0; - h->long_ref[i]= NULL; + for(i=0; i<16; i++){ + if (h->long_ref[i] != NULL) { + unreference_pic(h, h->long_ref[i]); + h->long_ref[i]= NULL; + } } h->long_ref_count=0; for(i=0; i<h->short_ref_count; i++){ - h->short_ref[i]->reference=0; + unreference_pic(h, h->short_ref[i]); h->short_ref[i]= NULL; } h->short_ref_count=0; @@ -2765,23 +3482,49 @@ static Picture * remove_short(H264Context *h, int frame_num){ static Picture * remove_long(H264Context *h, int i){ Picture *pic; - if(i >= h->long_ref_count) return NULL; pic= h->long_ref[i]; - if(pic==NULL) return NULL; - h->long_ref[i]= NULL; - memmove(&h->long_ref[i], &h->long_ref[i+1], (h->long_ref_count - i - 1)*sizeof(Picture*)); - h->long_ref_count--; + if(pic) h->long_ref_count--; return pic; } /** + * print short term list + */ +static void print_short_term(H264Context *h) { + uint32_t i; + if(h->s.avctx->debug&FF_DEBUG_MMCO) { + av_log(h->s.avctx, AV_LOG_DEBUG, "short term list:\n"); + for(i=0; i<h->short_ref_count; i++){ + Picture *pic= h->short_ref[i]; + av_log(h->s.avctx, AV_LOG_DEBUG, "%d fn:%d poc:%d %p\n", i, pic->frame_num, pic->poc, pic->data[0]); + } + } +} + +/** + * print long term list + */ +static void print_long_term(H264Context *h) { + uint32_t i; + if(h->s.avctx->debug&FF_DEBUG_MMCO) { + av_log(h->s.avctx, AV_LOG_DEBUG, "long term list:\n"); + for(i = 0; i < 16; i++){ + Picture *pic= h->long_ref[i]; + if (pic) { + av_log(h->s.avctx, AV_LOG_DEBUG, "%d fn:%d poc:%d %p\n", i, pic->frame_num, pic->poc, pic->data[0]); + } + } + } +} + +/** * Executes the reference picture marking (memory management control operations). */ static int execute_ref_pic_marking(H264Context *h, MMCO *mmco, int mmco_count){ MpegEncContext * const s = &h->s; - int i; + int i, j; int current_is_long=0; Picture *pic; @@ -2796,23 +3539,24 @@ static int execute_ref_pic_marking(H264Context *h, MMCO *mmco, int mmco_count){ case MMCO_SHORT2UNUSED: pic= remove_short(h, mmco[i].short_frame_num); if(pic==NULL) return -1; - pic->reference= 0; + unreference_pic(h, pic); break; case MMCO_SHORT2LONG: pic= remove_long(h, mmco[i].long_index); - if(pic) pic->reference=0; + if(pic) unreference_pic(h, pic); h->long_ref[ mmco[i].long_index ]= remove_short(h, mmco[i].short_frame_num); h->long_ref[ mmco[i].long_index ]->long_ref=1; + h->long_ref_count++; break; case MMCO_LONG2UNUSED: pic= remove_long(h, mmco[i].long_index); if(pic==NULL) return -1; - pic->reference= 0; + unreference_pic(h, pic); break; case MMCO_LONG: pic= remove_long(h, mmco[i].long_index); - if(pic) pic->reference=0; + if(pic) unreference_pic(h, pic); h->long_ref[ mmco[i].long_index ]= s->current_picture_ptr; h->long_ref[ mmco[i].long_index ]->long_ref=1; @@ -2822,22 +3566,20 @@ static int execute_ref_pic_marking(H264Context *h, MMCO *mmco, int mmco_count){ break; case MMCO_SET_MAX_LONG: assert(mmco[i].long_index <= 16); - while(mmco[i].long_index < h->long_ref_count){ - pic= remove_long(h, mmco[i].long_index); - pic->reference=0; - } - while(mmco[i].long_index > h->long_ref_count){ - h->long_ref[ h->long_ref_count++ ]= NULL; + // just remove the long term which index is greater than new max + for(j = mmco[i].long_index; j<16; j++){ + pic = remove_long(h, j); + if (pic) unreference_pic(h, pic); } break; case MMCO_RESET: while(h->short_ref_count){ pic= remove_short(h, h->short_ref[0]->frame_num); - pic->reference=0; + unreference_pic(h, pic); } - while(h->long_ref_count){ - pic= remove_long(h, h->long_ref_count-1); - pic->reference=0; + for(j = 0; j < 16; j++) { + pic= remove_long(h, j); + if(pic) unreference_pic(h, pic); } break; default: assert(0); @@ -2847,7 +3589,7 @@ static int execute_ref_pic_marking(H264Context *h, MMCO *mmco, int mmco_count){ if(!current_is_long){ pic= remove_short(h, s->current_picture_ptr->frame_num); if(pic){ - pic->reference=0; + unreference_pic(h, pic); av_log(h->s.avctx, AV_LOG_ERROR, "illegal short term buffer state detected\n"); } @@ -2859,6 +3601,8 @@ static int execute_ref_pic_marking(H264Context *h, MMCO *mmco, int mmco_count){ h->short_ref_count++; } + print_short_term(h); + print_long_term(h); return 0; } @@ -2877,7 +3621,7 @@ static int decode_ref_pic_marking(H264Context *h){ } }else{ if(get_bits1(&s->gb)){ // adaptive_ref_pic_marking_mode_flag - for(i= h->mmco_index; i<MAX_MMCO_COUNT; i++) { + for(i= 0; i<MAX_MMCO_COUNT; i++) { MMCOOpcode opcode= get_ue_golomb(&s->gb);; h->mmco[i].opcode= opcode; @@ -2900,6 +3644,8 @@ static int decode_ref_pic_marking(H264Context *h){ av_log(h->s.avctx, AV_LOG_ERROR, "illegal memory management control operation %d\n", opcode); return -1; } + if(opcode == MMCO_END) + break; } h->mmco_index= i; }else{ @@ -3010,23 +3756,32 @@ static int decode_slice_header(H264Context *h){ int first_mb_in_slice, pps_id; int num_ref_idx_active_override_flag; static const uint8_t slice_type_map[5]= {P_TYPE, B_TYPE, I_TYPE, SP_TYPE, SI_TYPE}; + int slice_type; + int default_ref_list_done = 0; s->current_picture.reference= h->nal_ref_idc != 0; + s->dropable= h->nal_ref_idc == 0; first_mb_in_slice= get_ue_golomb(&s->gb); - h->slice_type= get_ue_golomb(&s->gb); - if(h->slice_type > 9){ + slice_type= get_ue_golomb(&s->gb); + if(slice_type > 9){ av_log(h->s.avctx, AV_LOG_ERROR, "slice type too large (%d) at %d %d\n", h->slice_type, s->mb_x, s->mb_y); + return -1; } - if(h->slice_type > 4){ - h->slice_type -= 5; + if(slice_type > 4){ + slice_type -= 5; h->slice_type_fixed=1; }else h->slice_type_fixed=0; - h->slice_type= slice_type_map[ h->slice_type ]; - + slice_type= slice_type_map[ slice_type ]; + if (slice_type == I_TYPE + || (h->slice_num != 0 && slice_type == h->slice_type) ) { + default_ref_list_done = 1; + } + h->slice_type= slice_type; + s->pict_type= h->slice_type; // to make a few old func happy, its wrong though pps_id= get_ue_golomb(&s->gb); @@ -3047,14 +3802,11 @@ static int decode_slice_header(H264Context *h){ } s->mb_width= h->sps.mb_width; - s->mb_height= h->sps.mb_height; + s->mb_height= h->sps.mb_height * (2 - h->sps.frame_mbs_only_flag); - h->b_stride= s->mb_width*4; - h->b8_stride= s->mb_width*2; + h->b_stride= s->mb_width*4 + 1; + h->b8_stride= s->mb_width*2 + 1; - s->mb_x = first_mb_in_slice % s->mb_width; - s->mb_y = first_mb_in_slice / s->mb_width; //FIXME AFFW - s->width = 16*s->mb_width - 2*(h->sps.crop_left + h->sps.crop_right ); if(h->sps.frame_mbs_only_flag) s->height= 16*s->mb_height - 2*(h->sps.crop_top + h->sps.crop_bottom); @@ -3075,6 +3827,8 @@ static int decode_slice_header(H264Context *h){ s->avctx->width = s->width; s->avctx->height = s->height; s->avctx->sample_aspect_ratio= h->sps.sar; + if(!s->avctx->sample_aspect_ratio.den) + s->avctx->sample_aspect_ratio.den = 1; if(h->sps.timing_info_present_flag && h->sps.fixed_frame_rate_flag){ s->avctx->frame_rate = h->sps.time_scale; @@ -3082,22 +3836,29 @@ static int decode_slice_header(H264Context *h){ } } - if(first_mb_in_slice == 0){ + if(h->slice_num == 0){ frame_start(h); } s->current_picture_ptr->frame_num= //FIXME frame_num cleanup h->frame_num= get_bits(&s->gb, h->sps.log2_max_frame_num); + h->mb_aff_frame = 0; if(h->sps.frame_mbs_only_flag){ s->picture_structure= PICT_FRAME; }else{ - if(get_bits1(&s->gb)) //field_pic_flag + if(get_bits1(&s->gb)) { //field_pic_flag s->picture_structure= PICT_TOP_FIELD + get_bits1(&s->gb); //bottom_field_flag - else + } else { s->picture_structure= PICT_FRAME; + first_mb_in_slice <<= 1; + h->mb_aff_frame = h->sps.mb_aff; + } } + s->resync_mb_x = s->mb_x = first_mb_in_slice % s->mb_width; + s->resync_mb_y = s->mb_y = first_mb_in_slice / s->mb_width; + if(s->picture_structure==PICT_FRAME){ h->curr_pic_num= h->frame_num; h->max_pic_num= 1<< h->sps.log2_max_frame_num; @@ -3153,7 +3914,7 @@ static int decode_slice_header(H264Context *h){ } } - if(first_mb_in_slice == 0){ + if(!default_ref_list_done){ fill_default_ref_list(h); } @@ -3162,6 +3923,10 @@ static int decode_slice_header(H264Context *h){ if( (h->pps.weighted_pred && (h->slice_type == P_TYPE || h->slice_type == SP_TYPE )) || (h->pps.weighted_bipred_idc==1 && h->slice_type==B_TYPE ) ) pred_weight_table(h); + else if(h->pps.weighted_bipred_idc==2 && h->slice_type==B_TYPE) + implicit_weight_table(h); + else + h->use_weight = 0; if(s->current_picture.reference) decode_ref_pic_marking(h); @@ -3175,6 +3940,7 @@ static int decode_slice_header(H264Context *h){ av_log(s->avctx, AV_LOG_ERROR, "QP %d out of range\n", s->qscale); return -1; } + h->chroma_qp = get_chroma_qp(h->pps.chroma_qp_index_offset, s->qscale); //FIXME qscale / qp ... stuff if(h->slice_type == SP_TYPE){ get_bits1(&s->gb); /* sp_for_switch_flag */ @@ -3202,15 +3968,21 @@ static int decode_slice_header(H264Context *h){ slice_group_change_cycle= get_bits(&s->gb, ?); #endif + h->slice_num++; + if(s->avctx->debug&FF_DEBUG_PICT_INFO){ - av_log(h->s.avctx, AV_LOG_DEBUG, "mb:%d %c pps:%d frame:%d poc:%d/%d ref:%d/%d qp:%d loop:%d\n", + av_log(h->s.avctx, AV_LOG_DEBUG, "slice:%d %s mb:%d %c pps:%d frame:%d poc:%d/%d ref:%d/%d qp:%d loop:%d:%d:%d weight:%d%s\n", + h->slice_num, + (s->picture_structure==PICT_FRAME ? "F" : s->picture_structure==PICT_TOP_FIELD ? "T" : "B"), first_mb_in_slice, av_get_pict_type_char(h->slice_type), pps_id, h->frame_num, s->current_picture_ptr->field_poc[0], s->current_picture_ptr->field_poc[1], h->ref_count[0], h->ref_count[1], s->qscale, - h->deblocking_filter + h->deblocking_filter, h->slice_alpha_c0_offset/2, h->slice_beta_offset/2, + h->use_weight, + h->use_weight==1 && h->use_weight_chroma ? "c" : "" ); } @@ -3231,7 +4003,7 @@ static inline int get_level_prefix(GetBitContext *gb){ log= 32 - av_log2(buf); #ifdef TRACE print_bin(buf>>(32-log), log); - printf("%5d %2d %3d lpr @%5d in %s get_level_prefix\n", buf>>(32-log), log, log-1, get_bits_count(gb), __FILE__); + av_log(NULL, AV_LOG_DEBUG, "%5d %2d %3d lpr @%5d in %s get_level_prefix\n", buf>>(32-log), log, log-1, get_bits_count(gb), __FILE__); #endif LAST_SKIP_BITS(re, gb, log); @@ -3381,6 +4153,55 @@ static int decode_residual(H264Context *h, GetBitContext *gb, DCTELEM *block, in } /** + * decodes a P_SKIP or B_SKIP macroblock + */ +static void decode_mb_skip(H264Context *h){ + MpegEncContext * const s = &h->s; + const int mb_xy= s->mb_x + s->mb_y*s->mb_stride; + int mb_type; + + memset(h->non_zero_count[mb_xy], 0, 16); + memset(h->non_zero_count_cache + 8, 0, 8*5); //FIXME ugly, remove pfui + + if(h->mb_aff_frame && s->mb_skip_run==0 && (s->mb_y&1)==0){ + h->mb_field_decoding_flag= get_bits1(&s->gb); + } + if(h->mb_field_decoding_flag) + mb_type|= MB_TYPE_INTERLACED; + + if( h->slice_type == B_TYPE ) + { + // just for fill_caches. pred_direct_motion will set the real mb_type + mb_type= MB_TYPE_16x16|MB_TYPE_P0L0|MB_TYPE_P0L1|MB_TYPE_DIRECT2|MB_TYPE_SKIP; + + fill_caches(h, mb_type, 0); //FIXME check what is needed and what not ... + pred_direct_motion(h, &mb_type); + if(h->pps.cabac){ + fill_rectangle(h->mvd_cache[0][scan8[0]], 4, 4, 8, 0, 4); + fill_rectangle(h->mvd_cache[1][scan8[0]], 4, 4, 8, 0, 4); + } + } + else + { + int mx, my; + mb_type= MB_TYPE_16x16|MB_TYPE_P0L0|MB_TYPE_P1L0|MB_TYPE_SKIP; + + fill_caches(h, mb_type, 0); //FIXME check what is needed and what not ... + pred_pskip_motion(h, &mx, &my); + fill_rectangle(&h->ref_cache[0][scan8[0]], 4, 4, 8, 0, 1); + fill_rectangle( h->mv_cache[0][scan8[0]], 4, 4, 8, pack16to32(mx,my), 4); + if(h->pps.cabac) + fill_rectangle(h->mvd_cache[0][scan8[0]], 4, 4, 8, 0, 4); + } + + write_back_motion(h, mb_type); + s->current_picture.mb_type[mb_xy]= mb_type|MB_TYPE_SKIP; + s->current_picture.qscale_table[mb_xy]= s->qscale; + h->slice_table[ mb_xy ]= h->slice_num; + h->prev_mb_skiped= 1; +} + +/** * decodes a macroblock * @returns 0 if ok, AC_ERROR / DC_ERROR / MV_ERROR if an error is noticed */ @@ -3399,40 +4220,15 @@ static int decode_mb_cavlc(H264Context *h){ s->mb_skip_run= get_ue_golomb(&s->gb); if (s->mb_skip_run--) { - int mx, my; - /* skip mb */ -//FIXME b frame - mb_type= MB_TYPE_16x16|MB_TYPE_P0L0|MB_TYPE_P1L0; - - memset(h->non_zero_count[mb_xy], 0, 16); - memset(h->non_zero_count_cache + 8, 0, 8*5); //FIXME ugly, remove pfui - - if(h->sps.mb_aff && s->mb_skip_run==0 && (s->mb_y&1)==0){ - h->mb_field_decoding_flag= get_bits1(&s->gb); - } - - if(h->mb_field_decoding_flag) - mb_type|= MB_TYPE_INTERLACED; - - fill_caches(h, mb_type); //FIXME check what is needed and what not ... - pred_pskip_motion(h, &mx, &my); - fill_rectangle(&h->ref_cache[0][scan8[0]], 4, 4, 8, 0, 1); - fill_rectangle( h->mv_cache[0][scan8[0]], 4, 4, 8, pack16to32(mx,my), 4); - write_back_motion(h, mb_type); - - s->current_picture.mb_type[mb_xy]= mb_type; //FIXME SKIP type - s->current_picture.qscale_table[mb_xy]= s->qscale; - h->slice_table[ mb_xy ]= h->slice_num; - - h->prev_mb_skiped= 1; + decode_mb_skip(h); return 0; } } - if(h->sps.mb_aff /* && !field pic FIXME needed? */){ - if((s->mb_y&1)==0) + if(h->mb_aff_frame){ + if ( ((s->mb_y&1) == 0) || h->prev_mb_skiped) h->mb_field_decoding_flag = get_bits1(&s->gb); }else - h->mb_field_decoding_flag=0; //FIXME som ed note ?! + h->mb_field_decoding_flag= (s->picture_structure!=PICT_FRAME); h->prev_mb_skiped= 0; @@ -3473,43 +4269,44 @@ decode_intra_mb: h->slice_table[ mb_xy ]= h->slice_num; if(IS_INTRA_PCM(mb_type)){ - const uint8_t *ptr; - int x, y; + unsigned int x, y; // we assume these blocks are very rare so we dont optimize it align_get_bits(&s->gb); - ptr= s->gb.buffer + get_bits_count(&s->gb); - + // The pixels are stored in the same order as levels in h->mb array. for(y=0; y<16; y++){ - const int index= 4*(y&3) + 64*(y>>2); + const int index= 4*(y&3) + 32*((y>>2)&1) + 128*(y>>3); for(x=0; x<16; x++){ - h->mb[index + (x&3) + 16*(x>>2)]= *(ptr++); + tprintf("LUMA ICPM LEVEL (%3d)\n", show_bits(&s->gb, 8)); + h->mb[index + (x&3) + 16*((x>>2)&1) + 64*(x>>3)]= get_bits(&s->gb, 8); } } for(y=0; y<8; y++){ const int index= 256 + 4*(y&3) + 32*(y>>2); for(x=0; x<8; x++){ - h->mb[index + (x&3) + 16*(x>>2)]= *(ptr++); + tprintf("CHROMA U ICPM LEVEL (%3d)\n", show_bits(&s->gb, 8)); + h->mb[index + (x&3) + 16*(x>>2)]= get_bits(&s->gb, 8); } } for(y=0; y<8; y++){ const int index= 256 + 64 + 4*(y&3) + 32*(y>>2); for(x=0; x<8; x++){ - h->mb[index + (x&3) + 16*(x>>2)]= *(ptr++); + tprintf("CHROMA V ICPM LEVEL (%3d)\n", show_bits(&s->gb, 8)); + h->mb[index + (x&3) + 16*(x>>2)]= get_bits(&s->gb, 8); } } - skip_bits(&s->gb, 384); //FIXME check /fix the bitstream readers - - //FIXME deblock filter, non_zero_count_cache init ... + // In deblocking, the quantiser is 0 + s->current_picture.qscale_table[mb_xy]= 0; + h->chroma_qp = get_chroma_qp(h->pps.chroma_qp_index_offset, 0); + // All coeffs are presents memset(h->non_zero_count[mb_xy], 16, 16); - s->current_picture.qscale_table[mb_xy]= s->qscale; return 0; } - fill_caches(h, mb_type); + fill_caches(h, mb_type, 0); //mb_pred if(IS_INTRA(mb_type)){ @@ -3561,6 +4358,9 @@ decode_intra_mb: sub_partition_count[i]= b_sub_mb_type_info[ h->sub_mb_type[i] ].partition_count; h->sub_mb_type[i]= b_sub_mb_type_info[ h->sub_mb_type[i] ].type; } + if( IS_DIRECT(h->sub_mb_type[0]) || IS_DIRECT(h->sub_mb_type[1]) + || IS_DIRECT(h->sub_mb_type[2]) || IS_DIRECT(h->sub_mb_type[3])) + pred_direct_motion(h, &mb_type); }else{ assert(h->slice_type == P_TYPE || h->slice_type == SP_TYPE); //FIXME SP correct ? for(i=0; i<4; i++){ @@ -3575,10 +4375,14 @@ decode_intra_mb: } for(list=0; list<2; list++){ - const int ref_count= IS_REF0(mb_type) ? 1 : h->ref_count[list]; + int ref_count= IS_REF0(mb_type) ? 1 : h->ref_count[list]; if(ref_count == 0) continue; + if (h->mb_aff_frame && h->mb_field_decoding_flag) { + ref_count <<= 1; + } for(i=0; i<4; i++){ - if(IS_DIR(h->sub_mb_type[i], 0, list) && !IS_DIRECT(h->sub_mb_type[i])){ + if(IS_DIRECT(h->sub_mb_type[i])) continue; + if(IS_DIR(h->sub_mb_type[i], 0, list)){ ref[list][i] = get_te0_golomb(&s->gb, ref_count); //FIXME init to 0 before and skip? }else{ //FIXME @@ -3592,10 +4396,11 @@ decode_intra_mb: if(ref_count == 0) continue; for(i=0; i<4; i++){ + if(IS_DIRECT(h->sub_mb_type[i])) continue; h->ref_cache[list][ scan8[4*i] ]=h->ref_cache[list][ scan8[4*i]+1 ]= h->ref_cache[list][ scan8[4*i]+8 ]=h->ref_cache[list][ scan8[4*i]+9 ]= ref[list][i]; - if(IS_DIR(h->sub_mb_type[i], 0, list) && !IS_DIRECT(h->sub_mb_type[i])){ + if(IS_DIR(h->sub_mb_type[i], 0, list)){ const int sub_mb_type= h->sub_mb_type[i]; const int block_width= (sub_mb_type & (MB_TYPE_16x16|MB_TYPE_16x8)) ? 2 : 1; for(j=0; j<sub_partition_count[i]; j++){ @@ -3631,16 +4436,20 @@ decode_intra_mb: } } } - }else if(!IS_DIRECT(mb_type)){ + }else if(IS_DIRECT(mb_type)){ + pred_direct_motion(h, &mb_type); + s->current_picture.mb_type[mb_xy]= mb_type; + }else{ int list, mx, my, i; //FIXME we should set ref_idx_l? to 0 if we use that later ... if(IS_16X16(mb_type)){ for(list=0; list<2; list++){ - if(h->ref_count[0]>0){ + if(h->ref_count[list]>0){ if(IS_DIR(mb_type, 0, list)){ const int val= get_te0_golomb(&s->gb, h->ref_count[list]); fill_rectangle(&h->ref_cache[list][ scan8[0] ], 4, 4, 8, val, 1); - } + }else + fill_rectangle(&h->ref_cache[list][ scan8[0] ], 4, 4, 8, (LIST_NOT_USED&0xFF), 1); } } for(list=0; list<2; list++){ @@ -3651,7 +4460,8 @@ decode_intra_mb: tprintf("final mv:%d %d\n", mx, my); fill_rectangle(h->mv_cache[list][ scan8[0] ], 4, 4, 8, pack16to32(mx,my), 4); - } + }else + fill_rectangle(h->mv_cache[list][ scan8[0] ], 4, 4, 8, 0, 4); } } else if(IS_16X8(mb_type)){ @@ -3661,7 +4471,8 @@ decode_intra_mb: if(IS_DIR(mb_type, i, list)){ const int val= get_te0_golomb(&s->gb, h->ref_count[list]); fill_rectangle(&h->ref_cache[list][ scan8[0] + 16*i ], 4, 2, 8, val, 1); - } + }else + fill_rectangle(&h->ref_cache[list][ scan8[0] + 16*i ], 4, 2, 8, (LIST_NOT_USED&0xFF), 1); } } } @@ -3674,7 +4485,8 @@ decode_intra_mb: tprintf("final mv:%d %d\n", mx, my); fill_rectangle(h->mv_cache[list][ scan8[0] + 16*i ], 4, 2, 8, pack16to32(mx,my), 4); - } + }else + fill_rectangle(h->mv_cache[list][ scan8[0] + 16*i ], 4, 2, 8, 0, 4); } } }else{ @@ -3685,7 +4497,8 @@ decode_intra_mb: if(IS_DIR(mb_type, i, list)){ //FIXME optimize const int val= get_te0_golomb(&s->gb, h->ref_count[list]); fill_rectangle(&h->ref_cache[list][ scan8[0] + 2*i ], 2, 4, 8, val, 1); - } + }else + fill_rectangle(&h->ref_cache[list][ scan8[0] + 2*i ], 2, 4, 8, (LIST_NOT_USED&0xFF), 1); } } } @@ -3698,7 +4511,8 @@ decode_intra_mb: tprintf("final mv:%d %d\n", mx, my); fill_rectangle(h->mv_cache[list][ scan8[0] + 2*i ], 2, 4, 8, pack16to32(mx,my), 4); - } + }else + fill_rectangle(h->mv_cache[list][ scan8[0] + 2*i ], 2, 4, 8, 0, 4); } } } @@ -3749,7 +4563,7 @@ decode_intra_mb: else s->qscale-= 52; } - h->chroma_qp= chroma_qp= get_chroma_qp(h, s->qscale); + h->chroma_qp= chroma_qp= get_chroma_qp(h->pps.chroma_qp_index_offset, s->qscale); if(IS_INTRA16x16(mb_type)){ if( decode_residual(h, h->intra_gb_ptr, h->mb, LUMA_DC_BLOCK_INDEX, dc_scan, s->qscale, 16) < 0){ return -1; //FIXME continue if partotioned and other retirn -1 too @@ -3819,41 +4633,71 @@ decode_intra_mb: return 0; } -static int decode_cabac_mb_type( H264Context *h ) { +static int decode_cabac_field_decoding_flag(H264Context *h) { MpegEncContext * const s = &h->s; + const int mb_x = s->mb_x; + const int mb_y = s->mb_y & ~1; + const int mba_xy = mb_x - 1 + mb_y *s->mb_stride; + const int mbb_xy = mb_x + (mb_y-2)*s->mb_stride; - if( h->slice_type == I_TYPE ) { - const int mb_xy= s->mb_x + s->mb_y*s->mb_stride; - int ctx = 0; - int mb_type; + unsigned int ctx = 0; + + if( h->slice_table[mba_xy] == h->slice_num && IS_INTERLACED( s->current_picture.mb_type[mba_xy] ) ) { + ctx += 1; + } + if( h->slice_table[mbb_xy] == h->slice_num && IS_INTERLACED( s->current_picture.mb_type[mbb_xy] ) ) { + ctx += 1; + } - if( s->mb_x > 0 && !IS_INTRA4x4( s->current_picture.mb_type[mb_xy-1] ) ) + return get_cabac( &h->cabac, &h->cabac_state[70 + ctx] ); +} + +static int decode_cabac_intra_mb_type(H264Context *h, int ctx_base, int intra_slice) { + uint8_t *state= &h->cabac_state[ctx_base]; + int mb_type; + + if(intra_slice){ + MpegEncContext * const s = &h->s; + const int mba_xy = h->left_mb_xy[0]; + const int mbb_xy = h->top_mb_xy; + int ctx=0; + if( h->slice_table[mba_xy] == h->slice_num && !IS_INTRA4x4( s->current_picture.mb_type[mba_xy] ) ) ctx++; - if( s->mb_y > 0 && !IS_INTRA4x4( s->current_picture.mb_type[mb_xy-s->mb_stride] ) ) + if( h->slice_table[mbb_xy] == h->slice_num && !IS_INTRA4x4( s->current_picture.mb_type[mbb_xy] ) ) ctx++; - - if( get_cabac( &h->cabac, &h->cabac_state[3+ctx] ) == 0 ) + if( get_cabac( &h->cabac, &state[ctx] ) == 0 ) + return 0; /* I4x4 */ + state += 2; + }else{ + if( get_cabac( &h->cabac, &state[0] ) == 0 ) return 0; /* I4x4 */ + } - if( get_cabac_terminate( &h->cabac ) ) - return 25; /* PCM */ + if( get_cabac_terminate( &h->cabac ) ) + return 25; /* PCM */ - mb_type = 1; /* I16x16 */ - if( get_cabac( &h->cabac, &h->cabac_state[3+3] ) ) - mb_type += 12; /* cbp_luma != 0 */ + mb_type = 1; /* I16x16 */ + if( get_cabac( &h->cabac, &state[1] ) ) + mb_type += 12; /* cbp_luma != 0 */ - if( get_cabac( &h->cabac, &h->cabac_state[3+4] ) ) { - if( get_cabac( &h->cabac, &h->cabac_state[3+5] ) ) - mb_type += 4 * 2; /* cbp_chroma == 2 */ - else - mb_type += 4 * 1; /* cbp_chroma == 1 */ - } - if( get_cabac( &h->cabac, &h->cabac_state[3+6] ) ) - mb_type += 2; - if( get_cabac( &h->cabac, &h->cabac_state[3+7] ) ) - mb_type += 1; - return mb_type; + if( get_cabac( &h->cabac, &state[2] ) ) { + if( get_cabac( &h->cabac, &state[2+intra_slice] ) ) + mb_type += 4 * 2; /* cbp_chroma == 2 */ + else + mb_type += 4 * 1; /* cbp_chroma == 1 */ + } + if( get_cabac( &h->cabac, &state[3+intra_slice] ) ) + mb_type += 2; + if( get_cabac( &h->cabac, &state[3+2*intra_slice] ) ) + mb_type += 1; + return mb_type; +} + +static int decode_cabac_mb_type( H264Context *h ) { + MpegEncContext * const s = &h->s; + if( h->slice_type == I_TYPE ) { + return decode_cabac_intra_mb_type(h, 3, 1); } else if( h->slice_type == P_TYPE ) { if( get_cabac( &h->cabac, &h->cabac_state[14] ) == 0 ) { /* P-type */ @@ -3869,31 +4713,45 @@ static int decode_cabac_mb_type( H264Context *h ) { return 1; /* P_L0_D16x8; */ } } else { - int mb_type; - /* I-type */ - if( get_cabac( &h->cabac, &h->cabac_state[17] ) == 0 ) - return 5+0; /* I_4x4 */ - if( get_cabac_terminate( &h->cabac ) ) - return 5+25; /*I_PCM */ - mb_type = 5+1; /* I16x16 */ - if( get_cabac( &h->cabac, &h->cabac_state[17+1] ) ) - mb_type += 12; /* cbp_luma != 0 */ - - if( get_cabac( &h->cabac, &h->cabac_state[17+2] ) ) { - if( get_cabac( &h->cabac, &h->cabac_state[17+2] ) ) - mb_type += 4 * 2; /* cbp_chroma == 2 */ - else - mb_type += 4 * 1; /* cbp_chroma == 1 */ - } - if( get_cabac( &h->cabac, &h->cabac_state[17+3] ) ) - mb_type += 2; - if( get_cabac( &h->cabac, &h->cabac_state[17+3] ) ) - mb_type += 1; + return decode_cabac_intra_mb_type(h, 17, 0) + 5; + } + } else if( h->slice_type == B_TYPE ) { + const int mba_xy = h->left_mb_xy[0]; + const int mbb_xy = h->top_mb_xy; + int ctx = 0; + int bits; + + if( h->slice_table[mba_xy] == h->slice_num && !IS_SKIP( s->current_picture.mb_type[mba_xy] ) + && !IS_DIRECT( s->current_picture.mb_type[mba_xy] ) ) + ctx++; + if( h->slice_table[mbb_xy] == h->slice_num && !IS_SKIP( s->current_picture.mb_type[mbb_xy] ) + && !IS_DIRECT( s->current_picture.mb_type[mbb_xy] ) ) + ctx++; + + if( !get_cabac( &h->cabac, &h->cabac_state[27+ctx] ) ) + return 0; /* B_Direct_16x16 */ - return mb_type; + if( !get_cabac( &h->cabac, &h->cabac_state[27+3] ) ) { + return 1 + get_cabac( &h->cabac, &h->cabac_state[27+5] ); /* B_L[01]_16x16 */ } + + bits = get_cabac( &h->cabac, &h->cabac_state[27+4] ) << 3; + bits|= get_cabac( &h->cabac, &h->cabac_state[27+5] ) << 2; + bits|= get_cabac( &h->cabac, &h->cabac_state[27+5] ) << 1; + bits|= get_cabac( &h->cabac, &h->cabac_state[27+5] ); + if( bits < 8 ) + return bits + 3; /* B_Bi_16x16 through B_L1_L0_16x8 */ + else if( bits == 13 ) { + return decode_cabac_intra_mb_type(h, 32, 0) + 23; + } else if( bits == 14 ) + return 11; /* B_L1_L0_8x16 */ + else if( bits == 15 ) + return 22; /* B_8x8 */ + + bits= ( bits<<1 ) | get_cabac( &h->cabac, &h->cabac_state[27+5] ); + return bits - 4; /* B_L0_Bi_* through B_Bi_Bi_* */ } else { - /* TODO do others frames types */ + /* TODO SI/SP frames? */ return -1; } } @@ -3905,9 +4763,9 @@ static int decode_cabac_mb_skip( H264Context *h) { const int mbb_xy = mb_xy - s->mb_stride; int ctx = 0; - if( s->mb_x > 0 && !IS_SKIP( s->current_picture.mb_type[mba_xy] ) ) + if( h->slice_table[mba_xy] == h->slice_num && !IS_SKIP( s->current_picture.mb_type[mba_xy] )) ctx++; - if( s->mb_y > 0 && !IS_SKIP( s->current_picture.mb_type[mbb_xy] ) ) + if( h->slice_table[mbb_xy] == h->slice_num && !IS_SKIP( s->current_picture.mb_type[mbb_xy] )) ctx++; if( h->slice_type == P_TYPE || h->slice_type == SP_TYPE) @@ -3935,18 +4793,16 @@ static int decode_cabac_mb_intra4x4_pred_mode( H264Context *h, int pred_mode ) { } static int decode_cabac_mb_chroma_pre_mode( H264Context *h) { - MpegEncContext * const s = &h->s; - const int mb_xy = s->mb_x + s->mb_y*s->mb_stride; - const int mba_xy = mb_xy - 1; - const int mbb_xy = mb_xy - s->mb_stride; + const int mba_xy = h->left_mb_xy[0]; + const int mbb_xy = h->top_mb_xy; int ctx = 0; /* No need to test for IS_INTRA4x4 and IS_INTRA16x16, as we set chroma_pred_mode_table to 0 */ - if( s->mb_x > 0 && h->chroma_pred_mode_table[mba_xy] != 0 ) + if( h->slice_table[mba_xy] == h->slice_num && h->chroma_pred_mode_table[mba_xy] != 0 ) ctx++; - if( s->mb_y > 0 && h->chroma_pred_mode_table[mbb_xy] != 0 ) + if( h->slice_table[mbb_xy] == h->slice_num && h->chroma_pred_mode_table[mbb_xy] != 0 ) ctx++; if( get_cabac( &h->cabac, &h->cabac_state[64+ctx] ) == 0 ) @@ -3975,16 +4831,13 @@ static const uint8_t block_idx_xy[4][4] = { static int decode_cabac_mb_cbp_luma( H264Context *h) { MpegEncContext * const s = &h->s; - const int mb_xy = s->mb_x + s->mb_y*s->mb_stride; int cbp = 0; int i8x8; - h->cbp_table[mb_xy] = 0; /* FIXME aaahahahah beurk */ - for( i8x8 = 0; i8x8 < 4; i8x8++ ) { - int mba_xy = -1; - int mbb_xy = -1; + int cbp_a = -1; + int cbp_b = -1; int x, y; int ctx = 0; @@ -3992,51 +4845,45 @@ static int decode_cabac_mb_cbp_luma( H264Context *h) { y = block_idx_y[4*i8x8]; if( x > 0 ) - mba_xy = mb_xy; - else if( s->mb_x > 0 ) - mba_xy = mb_xy - 1; + cbp_a = cbp; + else if( s->mb_x > 0 && (h->slice_table[h->left_mb_xy[0]] == h->slice_num)) { + cbp_a = h->left_cbp; + tprintf("cbp_a = left_cbp = %x\n", cbp_a); + } if( y > 0 ) - mbb_xy = mb_xy; - else if( s->mb_y > 0 ) - mbb_xy = mb_xy - s->mb_stride; + cbp_b = cbp; + else if( s->mb_y > 0 && (h->slice_table[h->top_mb_xy] == h->slice_num)) { + cbp_b = h->top_cbp; + tprintf("cbp_b = top_cbp = %x\n", cbp_b); + } /* No need to test for skip as we put 0 for skip block */ - if( mba_xy >= 0 ) { + /* No need to test for IPCM as we put 1 for IPCM block */ + if( cbp_a >= 0 ) { int i8x8a = block_idx_xy[(x-1)&0x03][y]/4; - if( ((h->cbp_table[mba_xy] >> i8x8a)&0x01) == 0 ) + if( ((cbp_a >> i8x8a)&0x01) == 0 ) ctx++; } - if( mbb_xy >= 0 ) { + if( cbp_b >= 0 ) { int i8x8b = block_idx_xy[x][(y-1)&0x03]/4; - if( ((h->cbp_table[mbb_xy] >> i8x8b)&0x01) == 0 ) + if( ((cbp_b >> i8x8b)&0x01) == 0 ) ctx += 2; } if( get_cabac( &h->cabac, &h->cabac_state[73 + ctx] ) ) { cbp |= 1 << i8x8; - h->cbp_table[mb_xy] = cbp; /* FIXME aaahahahah beurk */ } } return cbp; } static int decode_cabac_mb_cbp_chroma( H264Context *h) { - MpegEncContext * const s = &h->s; - const int mb_xy = s->mb_x + s->mb_y*s->mb_stride; int ctx; int cbp_a, cbp_b; - /* No need to test for skip */ - if( s->mb_x > 0 ) - cbp_a = (h->cbp_table[mb_xy-1]>>4)&0x03; - else - cbp_a = -1; - - if( s->mb_y > 0 ) - cbp_b = (h->cbp_table[mb_xy-s->mb_stride]>>4)&0x03; - else - cbp_b = -1; + cbp_a = (h->left_cbp>>4)&0x03; + cbp_b = (h-> top_cbp>>4)&0x03; ctx = 0; if( cbp_a > 0 ) ctx++; @@ -4047,10 +4894,7 @@ static int decode_cabac_mb_cbp_chroma( H264Context *h) { ctx = 4; if( cbp_a == 2 ) ctx++; if( cbp_b == 2 ) ctx += 2; - if( get_cabac( &h->cabac, &h->cabac_state[77 + ctx] ) ) - return 2; - else - return 1; + return 1 + get_cabac( &h->cabac, &h->cabac_state[77 + ctx] ); } static int decode_cabac_mb_dqp( H264Context *h) { MpegEncContext * const s = &h->s; @@ -4063,7 +4907,7 @@ static int decode_cabac_mb_dqp( H264Context *h) { else mbn_xy = s->mb_width - 1 + (s->mb_y-1)*s->mb_stride; - if( mbn_xy >= 0 && h->last_qscale_diff != 0 && ( IS_INTRA16x16(s->current_picture.mb_type[mbn_xy] ) || (h->cbp_table[mbn_xy]&0x3f) ) ) + if( h->last_qscale_diff != 0 && ( IS_INTRA16x16(s->current_picture.mb_type[mbn_xy] ) || (h->cbp_table[mbn_xy]&0x3f) ) ) ctx++; while( get_cabac( &h->cabac, &h->cabac_state[60 + ctx] ) ) { @@ -4079,7 +4923,7 @@ static int decode_cabac_mb_dqp( H264Context *h) { else return -(val + 1)/2; } -static int decode_cabac_mb_sub_type( H264Context *h ) { +static int decode_cabac_p_mb_sub_type( H264Context *h ) { if( get_cabac( &h->cabac, &h->cabac_state[21] ) ) return 0; /* 8x8 */ if( !get_cabac( &h->cabac, &h->cabac_state[22] ) ) @@ -4088,6 +4932,22 @@ static int decode_cabac_mb_sub_type( H264Context *h ) { return 2; /* 4x8 */ return 3; /* 4x4 */ } +static int decode_cabac_b_mb_sub_type( H264Context *h ) { + int type; + if( !get_cabac( &h->cabac, &h->cabac_state[36] ) ) + return 0; /* B_Direct_8x8 */ + if( !get_cabac( &h->cabac, &h->cabac_state[37] ) ) + return 1 + get_cabac( &h->cabac, &h->cabac_state[39] ); /* B_L0_8x8, B_L1_8x8 */ + type = 3; + if( get_cabac( &h->cabac, &h->cabac_state[38] ) ) { + if( get_cabac( &h->cabac, &h->cabac_state[39] ) ) + return 11 + get_cabac( &h->cabac, &h->cabac_state[39] ); /* B_L1_4x4, B_Bi_4x4 */ + type += 4; + } + type += 2*get_cabac( &h->cabac, &h->cabac_state[39] ); + type += get_cabac( &h->cabac, &h->cabac_state[39] ); + return type; +} static int decode_cabac_mb_ref( H264Context *h, int list, int n ) { int refa = h->ref_cache[list][scan8[n] - 1]; @@ -4095,10 +4955,17 @@ static int decode_cabac_mb_ref( H264Context *h, int list, int n ) { int ref = 0; int ctx = 0; - if( refa > 0 ) - ctx++; - if( refb > 0 ) - ctx += 2; + if( h->slice_type == B_TYPE) { + if( refa > 0 && !h->direct_cache[scan8[n] - 1] ) + ctx++; + if( refb > 0 && !h->direct_cache[scan8[n] - 8] ) + ctx += 2; + } else { + if( refa > 0 ) + ctx++; + if( refb > 0 ) + ctx += 2; + } while( get_cabac( &h->cabac, &h->cabac_state[54+ctx] ) ) { ref++; @@ -4114,8 +4981,7 @@ static int decode_cabac_mb_mvd( H264Context *h, int list, int n, int l ) { int amvd = abs( h->mvd_cache[list][scan8[n] - 1][l] ) + abs( h->mvd_cache[list][scan8[n] - 8][l] ); int ctxbase = (l == 0) ? 40 : 47; - int ctx; - int mvd = 0; + int ctx, mvd; if( amvd < 3 ) ctx = 0; @@ -4124,11 +4990,14 @@ static int decode_cabac_mb_mvd( H264Context *h, int list, int n, int l ) { else ctx = 1; + if(!get_cabac(&h->cabac, &h->cabac_state[ctxbase+ctx])) + return 0; + + mvd= 1; + ctx= 3; while( mvd < 9 && get_cabac( &h->cabac, &h->cabac_state[ctxbase+ctx] ) ) { mvd++; - if( ctx < 3 ) - ctx = 3; - else if( ctx < 6 ) + if( ctx < 6 ) ctx++; } @@ -4143,132 +5012,52 @@ static int decode_cabac_mb_mvd( H264Context *h, int list, int n, int l ) { mvd += 1 << k; } } - if( mvd != 0 && get_cabac_bypass( &h->cabac ) ) - return -mvd; - return mvd; + if( get_cabac_bypass( &h->cabac ) ) return -mvd; + else return mvd; } - -static int get_cabac_cbf_ctx( H264Context *h, int cat, int idx ) { - MpegEncContext * const s = &h->s; - const int mb_xy = s->mb_x + s->mb_y*s->mb_stride; - int mba_xy = -1; - int mbb_xy = -1; - - int nza = -1; - int nzb = -1; +static int inline get_cabac_cbf_ctx( H264Context *h, int cat, int idx ) { + int nza, nzb; int ctx = 0; if( cat == 0 ) { - if( s->mb_x > 0 ) { - mba_xy = mb_xy - 1; - if( IS_INTRA16x16(s->current_picture.mb_type[mba_xy] ) ) - nza = h->cbp_table[mba_xy]&0x100; - } - if( s->mb_y > 0 ) { - mbb_xy = mb_xy - s->mb_stride; - if( IS_INTRA16x16(s->current_picture.mb_type[mbb_xy] ) ) - nzb = h->cbp_table[mbb_xy]&0x100; - } + nza = h->left_cbp&0x100; + nzb = h-> top_cbp&0x100; } else if( cat == 1 || cat == 2 ) { - int i8x8a, i8x8b; - int x, y; - - x = block_idx_x[idx]; - y = block_idx_y[idx]; - - if( x > 0 ) - mba_xy = mb_xy; - else if( s->mb_x > 0 ) - mba_xy = mb_xy - 1; - - if( y > 0 ) - mbb_xy = mb_xy; - else if( s->mb_y > 0 ) - mbb_xy = mb_xy - s->mb_stride; - - /* No need to test for skip */ - if( mba_xy >= 0 ) { - i8x8a = block_idx_xy[(x-1)&0x03][y]/4; - - if( !IS_INTRA_PCM(s->current_picture.mb_type[mba_xy] ) && - ((h->cbp_table[mba_xy]&0x0f)>>i8x8a)) - nza = h->non_zero_count_cache[scan8[idx] - 1]; - } - - if( mbb_xy >= 0 ) { - i8x8b = block_idx_xy[x][(y-1)&0x03]/4; - - if( !IS_INTRA_PCM(s->current_picture.mb_type[mbb_xy] ) && - ((h->cbp_table[mbb_xy]&0x0f)>>i8x8b)) - nzb = h->non_zero_count_cache[scan8[idx] - 8]; - } + nza = h->non_zero_count_cache[scan8[idx] - 1]; + nzb = h->non_zero_count_cache[scan8[idx] - 8]; } else if( cat == 3 ) { - if( s->mb_x > 0 ) { - mba_xy = mb_xy - 1; - - if( !IS_INTRA_PCM(s->current_picture.mb_type[mba_xy] ) && - (h->cbp_table[mba_xy]&0x30) ) - nza = (h->cbp_table[mba_xy]>>(6+idx))&0x01; - } - if( s->mb_y > 0 ) { - mbb_xy = mb_xy - s->mb_stride; - - if( !IS_INTRA_PCM(s->current_picture.mb_type[mbb_xy] ) && - (h->cbp_table[mbb_xy]&0x30) ) - nzb = (h->cbp_table[mbb_xy]>>(6+idx))&0x01; - } - } else if( cat == 4 ) { - int idxc = idx % 4 ; - if( idxc == 1 || idxc == 3 ) - mba_xy = mb_xy; - else if( s->mb_x > 0 ) - mba_xy = mb_xy -1; - - if( idxc == 2 || idxc == 3 ) - mbb_xy = mb_xy; - else if( s->mb_y > 0 ) - mbb_xy = mb_xy - s->mb_stride; - - if( mba_xy >= 0 && - !IS_INTRA_PCM(s->current_picture.mb_type[mba_xy] ) && - (h->cbp_table[mba_xy]&0x30) == 0x20 ) - nza = h->non_zero_count_cache[scan8[16+idx] - 1]; - - if( mbb_xy >= 0 && - !IS_INTRA_PCM(s->current_picture.mb_type[mbb_xy] ) && - (h->cbp_table[mbb_xy]&0x30) == 0x20 ) - nzb = h->non_zero_count_cache[scan8[16+idx] - 8]; - } - - if( ( mba_xy < 0 && IS_INTRA( s->current_picture.mb_type[mb_xy] ) ) || - ( mba_xy >= 0 && IS_INTRA_PCM(s->current_picture.mb_type[mba_xy] ) ) || - nza > 0 ) + nza = (h->left_cbp>>(6+idx))&0x01; + nzb = (h-> top_cbp>>(6+idx))&0x01; + } else { + assert(cat == 4); + nza = h->non_zero_count_cache[scan8[16+idx] - 1]; + nzb = h->non_zero_count_cache[scan8[16+idx] - 8]; + } + + if( nza > 0 ) ctx++; - if( ( mbb_xy < 0 && IS_INTRA( s->current_picture.mb_type[mb_xy] ) ) || - ( mbb_xy >= 0 && IS_INTRA_PCM(s->current_picture.mb_type[mbb_xy] ) ) || - nzb > 0 ) + if( nzb > 0 ) ctx += 2; return ctx + 4 * cat; } -static int decode_cabac_residual( H264Context *h, DCTELEM *block, int cat, int n, const uint8_t *scantable, int qp, int max_coeff) { +static int inline decode_cabac_residual( H264Context *h, DCTELEM *block, int cat, int n, const uint8_t *scantable, int qp, int max_coeff) { const int mb_xy = h->s.mb_x + h->s.mb_y*h->s.mb_stride; const uint16_t *qmul= dequant_coeff[qp]; + static const int significant_coeff_flag_field_offset[2] = { 105, 277 }; + static const int last_significant_coeff_flag_field_offset[2] = { 166, 338 }; static const int significant_coeff_flag_offset[5] = { 0, 15, 29, 44, 47 }; - static const int last_significant_coeff_flag_offset[5] = { 0, 15, 29, 44, 47 }; - static const int coeff_abs_level_m1_offset[5] = { 0, 10, 20, 30, 39 }; + static const int coeff_abs_level_m1_offset[5] = {227+ 0, 227+10, 227+20, 227+30, 227+39 }; - int coeff[16]; + int index[16]; - int last = 0; + int i, last; int coeff_count = 0; - int nz[16] = {0}; - int i; - int abslevel1 = 0; + int abslevel1 = 1; int abslevelgt1 = 0; /* cat: 0-> DC 16x16 n = 0 @@ -4288,96 +5077,103 @@ static int decode_cabac_residual( H264Context *h, DCTELEM *block, int cat, int n return 0; } - while( last < max_coeff - 1 ) { - int ctx = FFMIN( last, max_coeff - 2 ); - - if( get_cabac( &h->cabac, &h->cabac_state[105+significant_coeff_flag_offset[cat]+ctx] ) == 0 ) { - nz[last++] = 0; - } - else { - nz[last++] = 1; - coeff_count++; - if( get_cabac( &h->cabac, &h->cabac_state[166+last_significant_coeff_flag_offset[cat]+ctx] ) ) { - while( last < max_coeff ) { - nz[last++] = 0; - } + for(last= 0; last < max_coeff - 1; last++) { + if( get_cabac( &h->cabac, &h->cabac_state[significant_coeff_flag_field_offset[h->mb_field_decoding_flag]+significant_coeff_flag_offset[cat]+last] )) { + index[coeff_count++] = last; + if( get_cabac( &h->cabac, &h->cabac_state[last_significant_coeff_flag_field_offset[h->mb_field_decoding_flag]+significant_coeff_flag_offset[cat]+last] ) ) { + last= max_coeff; break; } } } if( last == max_coeff -1 ) { - nz[last++] = 1; - coeff_count++; + index[coeff_count++] = last; } + assert(coeff_count > 0); - if( cat == 0 && coeff_count > 0 ) + if( cat == 0 ) h->cbp_table[mb_xy] |= 0x100; else if( cat == 1 || cat == 2 ) h->non_zero_count_cache[scan8[n]] = coeff_count; - else if( cat == 3 && coeff_count > 0 ) + else if( cat == 3 ) h->cbp_table[mb_xy] |= 0x40 << n; - else if( cat == 4 ) + else { + assert( cat == 4 ); h->non_zero_count_cache[scan8[16+n]] = coeff_count; + } for( i = coeff_count - 1; i >= 0; i-- ) { - int coeff_abs_m1; - - int ctx = (abslevelgt1 != 0 ? 0 : FFMIN( 4, abslevel1 + 1 )) + coeff_abs_level_m1_offset[cat]; + int ctx = (abslevelgt1 != 0 ? 0 : FFMIN( 4, abslevel1 )) + coeff_abs_level_m1_offset[cat]; + int j= scantable[index[i]]; - if( get_cabac( &h->cabac, &h->cabac_state[227+ctx] ) == 0 ) { - coeff_abs_m1 = 0; + if( get_cabac( &h->cabac, &h->cabac_state[ctx] ) == 0 ) { + if( cat == 0 || cat == 3 ) { + if( get_cabac_bypass( &h->cabac ) ) block[j] = -1; + else block[j] = 1; + }else{ + if( get_cabac_bypass( &h->cabac ) ) block[j] = -qmul[j]; + else block[j] = qmul[j]; + } + + abslevel1++; } else { - coeff_abs_m1 = 1; + int coeff_abs = 2; ctx = 5 + FFMIN( 4, abslevelgt1 ) + coeff_abs_level_m1_offset[cat]; - while( coeff_abs_m1 < 14 && get_cabac( &h->cabac, &h->cabac_state[227+ctx] ) ) { - coeff_abs_m1++; + while( coeff_abs < 15 && get_cabac( &h->cabac, &h->cabac_state[ctx] ) ) { + coeff_abs++; } - } - if( coeff_abs_m1 >= 14 ) { - int j = 0; - while( get_cabac_bypass( &h->cabac ) ) { - coeff_abs_m1 += 1 << j; - j++; + if( coeff_abs >= 15 ) { + int j = 0; + while( get_cabac_bypass( &h->cabac ) ) { + coeff_abs += 1 << j; + j++; + } + + while( j-- ) { + if( get_cabac_bypass( &h->cabac ) ) + coeff_abs += 1 << j ; + } } - while( j-- ) { - if( get_cabac_bypass( &h->cabac ) ) - coeff_abs_m1 += 1 << j ; + if( cat == 0 || cat == 3 ) { + if( get_cabac_bypass( &h->cabac ) ) block[j] = -coeff_abs; + else block[j] = coeff_abs; + }else{ + if( get_cabac_bypass( &h->cabac ) ) block[j] = -coeff_abs * qmul[j]; + else block[j] = coeff_abs * qmul[j]; } - } - if( get_cabac_bypass( &h->cabac ) ) - coeff[i] = -1 *( coeff_abs_m1 + 1 ); - else - coeff[i] = coeff_abs_m1 + 1; - - if( coeff_abs_m1 == 0 ) - abslevel1++; - else + abslevelgt1++; - } - - if( cat == 0 || cat == 3 ) { /* DC */ - int j; - for( i = 0, j = 0; j < coeff_count; i++ ) { - if( nz[i] ) { - block[scantable[i]] = coeff[j]; - - j++; - } } + } + return 0; +} - } else { /* AC */ - int j; - for( i = 0, j = 0; j < coeff_count; i++ ) { - if( nz[i] ) { - block[scantable[i]] = coeff[j] * qmul[scantable[i]]; - - j++; - } +void inline compute_mb_neighboors(H264Context *h) +{ + MpegEncContext * const s = &h->s; + const int mb_xy = s->mb_x + s->mb_y*s->mb_stride; + h->top_mb_xy = mb_xy - s->mb_stride; + h->left_mb_xy[0] = mb_xy - 1; + if(h->mb_aff_frame){ + const int pair_xy = s->mb_x + (s->mb_y & ~1)*s->mb_stride; + const int top_pair_xy = pair_xy - s->mb_stride; + const int top_mb_frame_flag = !IS_INTERLACED(s->current_picture.mb_type[top_pair_xy]); + const int left_mb_frame_flag = !IS_INTERLACED(s->current_picture.mb_type[pair_xy-1]); + const int curr_mb_frame_flag = !h->mb_field_decoding_flag; + const int bottom = (s->mb_y & 1); + if (bottom + ? !curr_mb_frame_flag // bottom macroblock + : (!curr_mb_frame_flag && !top_mb_frame_flag) // top macroblock + ) { + h->top_mb_xy -= s->mb_stride; + } + if (left_mb_frame_flag != curr_mb_frame_flag) { + h->left_mb_xy[0] = pair_xy - 1; } } - return 0; + return; } /** @@ -4391,61 +5187,43 @@ static int decode_mb_cabac(H264Context *h) { s->dsp.clear_blocks(h->mb); //FIXME avoid if allready clear (move after skip handlong?) - if( h->slice_type == B_TYPE ) { - av_log( h->s.avctx, AV_LOG_ERROR, "B-frame not supported with CABAC\n" ); - return -1; - } - if( h->sps.mb_aff ) { - av_log( h->s.avctx, AV_LOG_ERROR, "Fields not supported with CABAC\n" ); - return -1; - } - + tprintf("pic:%d mb:%d/%d\n", h->frame_num, s->mb_x, s->mb_y); if( h->slice_type != I_TYPE && h->slice_type != SI_TYPE ) { /* read skip flags */ if( decode_cabac_mb_skip( h ) ) { - int mx, my; - - /* skip mb */ - mb_type= MB_TYPE_16x16|MB_TYPE_P0L0|MB_TYPE_P1L0|MB_TYPE_SKIP; + decode_mb_skip(h); - memset(h->non_zero_count[mb_xy], 0, 16); - memset(h->non_zero_count_cache + 8, 0, 8*5); //FIXME ugly, remove pfui -#if 0 - if(h->sps.mb_aff && s->mb_skip_run==0 && (s->mb_y&1)==0){ - h->mb_field_decoding_flag= get_bits1(&s->gb); - } - if(h->mb_field_decoding_flag) - mb_type|= MB_TYPE_INTERLACED; -#endif - - fill_caches(h, mb_type); //FIXME check what is needed and what not ... - pred_pskip_motion(h, &mx, &my); - fill_rectangle(&h->ref_cache[0][scan8[0]], 4, 4, 8, 0, 1); - fill_rectangle( h->mvd_cache[0][scan8[0]], 4, 4, 8, pack16to32(0,0), 4); - fill_rectangle( h->mv_cache[0][scan8[0]], 4, 4, 8, pack16to32(mx,my), 4); - write_back_motion(h, mb_type); - - s->current_picture.mb_type[mb_xy]= mb_type; //FIXME SKIP type - s->current_picture.qscale_table[mb_xy]= s->qscale; - h->slice_table[ mb_xy ]= h->slice_num; h->cbp_table[mb_xy] = 0; h->chroma_pred_mode_table[mb_xy] = 0; h->last_qscale_diff = 0; - h->prev_mb_skiped= 1; - return 0; } } + if(h->mb_aff_frame){ + if ( ((s->mb_y&1) == 0) || h->prev_mb_skiped) + h->mb_field_decoding_flag = decode_cabac_field_decoding_flag(h); + }else + h->mb_field_decoding_flag= (s->picture_structure!=PICT_FRAME); + h->prev_mb_skiped = 0; + compute_mb_neighboors(h); if( ( mb_type = decode_cabac_mb_type( h ) ) < 0 ) { av_log( h->s.avctx, AV_LOG_ERROR, "decode_cabac_mb_type failed\n" ); return -1; } - if( h->slice_type == P_TYPE ) { + if( h->slice_type == B_TYPE ) { + if( mb_type < 23 ){ + partition_count= b_mb_type_info[mb_type].partition_count; + mb_type= b_mb_type_info[mb_type].type; + }else{ + mb_type -= 23; + goto decode_intra_mb; + } + } else if( h->slice_type == P_TYPE ) { if( mb_type < 5) { partition_count= p_mb_type_info[mb_type].partition_count; mb_type= p_mb_type_info[mb_type].type; @@ -4461,23 +5239,59 @@ decode_intra_mb: h->intra16x16_pred_mode= i_mb_type_info[mb_type].pred_mode; mb_type= i_mb_type_info[mb_type].type; } -#if 0 if(h->mb_field_decoding_flag) mb_type |= MB_TYPE_INTERLACED; -#endif s->current_picture.mb_type[mb_xy]= mb_type; h->slice_table[ mb_xy ]= h->slice_num; if(IS_INTRA_PCM(mb_type)) { - /* TODO */ - h->cbp_table[mb_xy] = 0xf +4*2; + const uint8_t *ptr; + unsigned int x, y; + + // We assume these blocks are very rare so we dont optimize it. + // FIXME The two following lines get the bitstream position in the cabac + // decode, I think it should be done by a function in cabac.h (or cabac.c). + ptr= h->cabac.bytestream; + if (h->cabac.low&0x1) ptr-=CABAC_BITS/8; + + // The pixels are stored in the same order as levels in h->mb array. + for(y=0; y<16; y++){ + const int index= 4*(y&3) + 32*((y>>2)&1) + 128*(y>>3); + for(x=0; x<16; x++){ + tprintf("LUMA ICPM LEVEL (%3d)\n", *ptr); + h->mb[index + (x&3) + 16*((x>>2)&1) + 64*(x>>3)]= *ptr++; + } + } + for(y=0; y<8; y++){ + const int index= 256 + 4*(y&3) + 32*(y>>2); + for(x=0; x<8; x++){ + tprintf("CHROMA U ICPM LEVEL (%3d)\n", *ptr); + h->mb[index + (x&3) + 16*(x>>2)]= *ptr++; + } + } + for(y=0; y<8; y++){ + const int index= 256 + 64 + 4*(y&3) + 32*(y>>2); + for(x=0; x<8; x++){ + tprintf("CHROMA V ICPM LEVEL (%3d)\n", *ptr); + h->mb[index + (x&3) + 16*(x>>2)]= *ptr++; + } + } + + ff_init_cabac_decoder(&h->cabac, ptr, h->cabac.bytestream_end - ptr); + + // All blocks are presents + h->cbp_table[mb_xy] = 0x1ef; h->chroma_pred_mode_table[mb_xy] = 0; - s->current_picture.qscale_table[mb_xy]= s->qscale; - return -1; + // In deblocking, the quantiser is 0 + s->current_picture.qscale_table[mb_xy]= 0; + h->chroma_qp = get_chroma_qp(h->pps.chroma_qp_index_offset, 0); + // All coeffs are presents + memset(h->non_zero_count[mb_xy], 16, 16); + return 0; } - fill_caches(h, mb_type); + fill_caches(h, mb_type, 0); if( IS_INTRA( mb_type ) ) { if( IS_INTRA4x4( mb_type ) ) { @@ -4502,17 +5316,34 @@ decode_intra_mb: } else if( partition_count == 4 ) { int i, j, sub_partition_count[4], list, ref[2][4]; - /* Only P-frame */ - for( i = 0; i < 4; i++ ) { - h->sub_mb_type[i] = decode_cabac_mb_sub_type( h ); - sub_partition_count[i]= p_sub_mb_type_info[ h->sub_mb_type[i] ].partition_count; - h->sub_mb_type[i]= p_sub_mb_type_info[ h->sub_mb_type[i] ].type; + if( h->slice_type == B_TYPE ) { + for( i = 0; i < 4; i++ ) { + h->sub_mb_type[i] = decode_cabac_b_mb_sub_type( h ); + sub_partition_count[i]= b_sub_mb_type_info[ h->sub_mb_type[i] ].partition_count; + h->sub_mb_type[i]= b_sub_mb_type_info[ h->sub_mb_type[i] ].type; + } + if( IS_DIRECT(h->sub_mb_type[0]) || IS_DIRECT(h->sub_mb_type[1]) + || IS_DIRECT(h->sub_mb_type[2]) || IS_DIRECT(h->sub_mb_type[3])) { + pred_direct_motion(h, &mb_type); + if( h->ref_count[0] > 1 || h->ref_count[1] > 1 ) { + for( i = 0; i < 4; i++ ) + if( IS_DIRECT(h->sub_mb_type[i]) ) + fill_rectangle( &h->direct_cache[scan8[4*i]], 2, 2, 8, 1, 1 ); + } + } + } else { + for( i = 0; i < 4; i++ ) { + h->sub_mb_type[i] = decode_cabac_p_mb_sub_type( h ); + sub_partition_count[i]= p_sub_mb_type_info[ h->sub_mb_type[i] ].partition_count; + h->sub_mb_type[i]= p_sub_mb_type_info[ h->sub_mb_type[i] ].type; + } } for( list = 0; list < 2; list++ ) { if( h->ref_count[list] > 0 ) { for( i = 0; i < 4; i++ ) { - if(IS_DIR(h->sub_mb_type[i], 0, list) && !IS_DIRECT(h->sub_mb_type[i])){ + if(IS_DIRECT(h->sub_mb_type[i])) continue; + if(IS_DIR(h->sub_mb_type[i], 0, list)){ if( h->ref_count[list] > 1 ) ref[list][i] = decode_cabac_mb_ref( h, list, 4*i ); else @@ -4528,6 +5359,10 @@ decode_intra_mb: for(list=0; list<2; list++){ for(i=0; i<4; i++){ + if(IS_DIRECT(h->sub_mb_type[i])){ + fill_rectangle(h->mvd_cache[list][scan8[4*i]], 2, 2, 8, 0, 4); + continue; + } h->ref_cache[list][ scan8[4*i] ]=h->ref_cache[list][ scan8[4*i]+1 ]; if(IS_DIR(h->sub_mb_type[i], 0, list) && !IS_DIRECT(h->sub_mb_type[i])){ @@ -4584,7 +5419,12 @@ decode_intra_mb: } } } - } else if( !IS_DIRECT(mb_type) ) { + } else if( IS_DIRECT(mb_type) ) { + pred_direct_motion(h, &mb_type); + s->current_picture.mb_type[mb_xy]= mb_type; + fill_rectangle(h->mvd_cache[0][scan8[0]], 4, 4, 8, 0, 4); + fill_rectangle(h->mvd_cache[1][scan8[0]], 4, 4, 8, 0, 4); + } else { int list, mx, my, i, mpx, mpy; if(IS_16X16(mb_type)){ for(list=0; list<2; list++){ @@ -4593,7 +5433,8 @@ decode_intra_mb: const int ref = h->ref_count[list] > 1 ? decode_cabac_mb_ref( h, list, 0 ) : 0; fill_rectangle(&h->ref_cache[list][ scan8[0] ], 4, 4, 8, ref, 1); } - } + }else + fill_rectangle(&h->ref_cache[list][ scan8[0] ], 4, 4, 8, (uint8_t)LIST_NOT_USED, 1); } for(list=0; list<2; list++){ if(IS_DIR(mb_type, 0, list)){ @@ -4605,7 +5446,8 @@ decode_intra_mb: fill_rectangle(h->mvd_cache[list][ scan8[0] ], 4, 4, 8, pack16to32(mx-mpx,my-mpy), 4); fill_rectangle(h->mv_cache[list][ scan8[0] ], 4, 4, 8, pack16to32(mx,my), 4); - } + }else + fill_rectangle(h->mv_cache[list][ scan8[0] ], 4, 4, 8, 0, 4); } } else if(IS_16X8(mb_type)){ @@ -4615,7 +5457,8 @@ decode_intra_mb: if(IS_DIR(mb_type, i, list)){ const int ref= h->ref_count[list] > 1 ? decode_cabac_mb_ref( h, list, 8*i ) : 0; fill_rectangle(&h->ref_cache[list][ scan8[0] + 16*i ], 4, 2, 8, ref, 1); - } + }else + fill_rectangle(&h->ref_cache[list][ scan8[0] + 16*i ], 4, 2, 8, (LIST_NOT_USED&0xFF), 1); } } } @@ -4629,6 +5472,9 @@ decode_intra_mb: fill_rectangle(h->mvd_cache[list][ scan8[0] + 16*i ], 4, 2, 8, pack16to32(mx-mpx,my-mpy), 4); fill_rectangle(h->mv_cache[list][ scan8[0] + 16*i ], 4, 2, 8, pack16to32(mx,my), 4); + }else{ + fill_rectangle(h->mvd_cache[list][ scan8[0] + 16*i ], 4, 2, 8, 0, 4); + fill_rectangle(h-> mv_cache[list][ scan8[0] + 16*i ], 4, 2, 8, 0, 4); } } } @@ -4640,7 +5486,8 @@ decode_intra_mb: if(IS_DIR(mb_type, i, list)){ //FIXME optimize const int ref= h->ref_count[list] > 1 ? decode_cabac_mb_ref( h, list, 4*i ) : 0; fill_rectangle(&h->ref_cache[list][ scan8[0] + 2*i ], 2, 4, 8, ref, 1); - } + }else + fill_rectangle(&h->ref_cache[list][ scan8[0] + 2*i ], 2, 4, 8, (LIST_NOT_USED&0xFF), 1); } } } @@ -4654,6 +5501,9 @@ decode_intra_mb: tprintf("final mv:%d %d\n", mx, my); fill_rectangle(h->mvd_cache[list][ scan8[0] + 2*i ], 2, 4, 8, pack16to32(mx-mpx,my-mpy), 4); fill_rectangle(h->mv_cache[list][ scan8[0] + 2*i ], 2, 4, 8, pack16to32(mx,my), 4); + }else{ + fill_rectangle(h->mvd_cache[list][ scan8[0] + 2*i ], 2, 4, 8, 0, 4); + fill_rectangle(h-> mv_cache[list][ scan8[0] + 2*i ], 2, 4, 8, 0, 4); } } } @@ -4690,7 +5540,7 @@ decode_intra_mb: if(s->qscale<0) s->qscale+= 52; else s->qscale-= 52; } - h->chroma_qp = get_chroma_qp(h, s->qscale); + h->chroma_qp = get_chroma_qp(h->pps.chroma_qp_index_offset, s->qscale); if( IS_INTRA16x16( mb_type ) ) { int i; @@ -4748,7 +5598,10 @@ decode_intra_mb: nnz[ scan8[20]+0 ] = nnz[ scan8[20]+1 ] =nnz[ scan8[20]+8 ] =nnz[ scan8[20]+9 ] = 0; } } else { - memset( &h->non_zero_count_cache[8], 0, 8*5 ); + uint8_t * const nnz= &h->non_zero_count_cache[0]; + fill_rectangle(&nnz[scan8[0]], 4, 4, 8, 0, 1); + nnz[ scan8[16]+0 ] = nnz[ scan8[16]+1 ] =nnz[ scan8[16]+8 ] =nnz[ scan8[16]+9 ] = + nnz[ scan8[20]+0 ] = nnz[ scan8[20]+1 ] =nnz[ scan8[20]+8 ] =nnz[ scan8[20]+9 ] = 0; } s->current_picture.qscale_table[mb_xy]= s->qscale; @@ -4799,6 +5652,7 @@ static void filter_mb_edgev( H264Context *h, uint8_t *pix, int stride, int bS[4] i_delta = clip( (((q0 - p0 ) << 2) + (p1 - q1) + 4) >> 3, -tc, tc ); pix[-1] = clip_uint8( p0 + i_delta ); /* p0' */ pix[0] = clip_uint8( q0 - i_delta ); /* q0' */ + tprintf("filter_mb_edgev i:%d d:%d, qp:%d, indexA:%d, alpha:%d, beta:%d, tc:%d\n# bS:%d -> [%02x, %02x, %02x, %02x, %02x, %02x] =>[%02x, %02x, %02x, %02x]\n", i, d, qp, index_a, alpha, beta, tc, bS[i], pix[-3], p1, p0, q0, q1, pix[2], pix[-2], pix[-1], pix[0], pix[1]); } pix += stride; } @@ -4845,6 +5699,7 @@ static void filter_mb_edgev( H264Context *h, uint8_t *pix, int stride, int bS[4] pix[-1] = ( 2*p1 + p0 + q1 + 2 ) >> 2; pix[ 0] = ( 2*q1 + q0 + p1 + 2 ) >> 2; } + tprintf("filter_mb_edgev i:%d d:%d\n# bS:4 -> [%02x, %02x, %02x, %02x, %02x, %02x] =>[%02x, %02x, %02x, %02x]\n", i, d, p2, p1, p0, q0, q1, q2, pix[-2], pix[-1], pix[0], pix[1]); } pix += stride; } @@ -4879,6 +5734,7 @@ static void filter_mb_edgecv( H264Context *h, uint8_t *pix, int stride, int bS[4 pix[-1] = clip_uint8( p0 + i_delta ); /* p0' */ pix[0] = clip_uint8( q0 - i_delta ); /* q0' */ + tprintf("filter_mb_edgecv i:%d d:%d, qp:%d, indexA:%d, alpha:%d, beta:%d, tc:%d\n# bS:%d -> [%02x, %02x, %02x, %02x, %02x, %02x] =>[%02x, %02x, %02x, %02x]\n", i, d, qp, index_a, alpha, beta, tc, bS[i], pix[-3], p1, p0, q0, q1, pix[2], p1, pix[-1], pix[0], q1); } pix += stride; } @@ -4896,6 +5752,7 @@ static void filter_mb_edgecv( H264Context *h, uint8_t *pix, int stride, int bS[4 pix[-1] = ( 2*p1 + p0 + q1 + 2 ) >> 2; /* p0' */ pix[0] = ( 2*q1 + q0 + p1 + 2 ) >> 2; /* q0' */ + tprintf("filter_mb_edgecv i:%d d:%d\n# bS:4 -> [%02x, %02x, %02x, %02x, %02x, %02x] =>[%02x, %02x, %02x, %02x]\n", i, d, pix[-3], p1, p0, q0, q1, pix[2], p1, pix[-1], pix[0], q1); } pix += stride; } @@ -4903,6 +5760,160 @@ static void filter_mb_edgecv( H264Context *h, uint8_t *pix, int stride, int bS[4 } } +static void filter_mb_mbaff_edgev( H264Context *h, uint8_t *pix, int stride, int bS[8], int qp[2] ) { + int i; + for( i = 0; i < 16; i++, pix += stride) { + int index_a; + int alpha; + int beta; + + int qp_index; + int bS_index = (i >> 1); + if (h->mb_field_decoding_flag) { + bS_index &= ~1; + bS_index |= (i & 1); + } + + if( bS[bS_index] == 0 ) { + continue; + } + + qp_index = h->mb_field_decoding_flag ? (i & 1) : (i >> 3); + index_a = clip( qp[qp_index] + h->slice_alpha_c0_offset, 0, 51 ); + alpha = alpha_table[index_a]; + beta = beta_table[clip( qp[qp_index] + h->slice_beta_offset, 0, 51 )]; + + + if( bS[bS_index] < 4 ) { + const int tc0 = tc0_table[index_a][bS[bS_index] - 1]; + /* 4px edge length */ + const int p0 = pix[-1]; + const int p1 = pix[-2]; + const int p2 = pix[-3]; + const int q0 = pix[0]; + const int q1 = pix[1]; + const int q2 = pix[2]; + + if( ABS( p0 - q0 ) < alpha && + ABS( p1 - p0 ) < beta && + ABS( q1 - q0 ) < beta ) { + int tc = tc0; + int i_delta; + + if( ABS( p2 - p0 ) < beta ) { + pix[-2] = p1 + clip( ( p2 + ( ( p0 + q0 + 1 ) >> 1 ) - ( p1 << 1 ) ) >> 1, -tc0, tc0 ); + tc++; + } + if( ABS( q2 - q0 ) < beta ) { + pix[1] = q1 + clip( ( q2 + ( ( p0 + q0 + 1 ) >> 1 ) - ( q1 << 1 ) ) >> 1, -tc0, tc0 ); + tc++; + } + + i_delta = clip( (((q0 - p0 ) << 2) + (p1 - q1) + 4) >> 3, -tc, tc ); + pix[-1] = clip_uint8( p0 + i_delta ); /* p0' */ + pix[0] = clip_uint8( q0 - i_delta ); /* q0' */ + tprintf("filter_mb_mbaff_edgev i:%d, qp:%d, indexA:%d, alpha:%d, beta:%d, tc:%d\n# bS:%d -> [%02x, %02x, %02x, %02x, %02x, %02x] =>[%02x, %02x, %02x, %02x]\n", i, qp[qp_index], index_a, alpha, beta, tc, bS[bS_index], pix[-3], p1, p0, q0, q1, pix[2], p1, pix[-1], pix[0], q1); + } + }else{ + /* 4px edge length */ + const int p0 = pix[-1]; + const int p1 = pix[-2]; + const int p2 = pix[-3]; + + const int q0 = pix[0]; + const int q1 = pix[1]; + const int q2 = pix[2]; + + if( ABS( p0 - q0 ) < alpha && + ABS( p1 - p0 ) < beta && + ABS( q1 - q0 ) < beta ) { + + if(ABS( p0 - q0 ) < (( alpha >> 2 ) + 2 )){ + if( ABS( p2 - p0 ) < beta) + { + const int p3 = pix[-4]; + /* p0', p1', p2' */ + pix[-1] = ( p2 + 2*p1 + 2*p0 + 2*q0 + q1 + 4 ) >> 3; + pix[-2] = ( p2 + p1 + p0 + q0 + 2 ) >> 2; + pix[-3] = ( 2*p3 + 3*p2 + p1 + p0 + q0 + 4 ) >> 3; + } else { + /* p0' */ + pix[-1] = ( 2*p1 + p0 + q1 + 2 ) >> 2; + } + if( ABS( q2 - q0 ) < beta) + { + const int q3 = pix[3]; + /* q0', q1', q2' */ + pix[0] = ( p1 + 2*p0 + 2*q0 + 2*q1 + q2 + 4 ) >> 3; + pix[1] = ( p0 + q0 + q1 + q2 + 2 ) >> 2; + pix[2] = ( 2*q3 + 3*q2 + q1 + q0 + p0 + 4 ) >> 3; + } else { + /* q0' */ + pix[0] = ( 2*q1 + q0 + p1 + 2 ) >> 2; + } + }else{ + /* p0', q0' */ + pix[-1] = ( 2*p1 + p0 + q1 + 2 ) >> 2; + pix[ 0] = ( 2*q1 + q0 + p1 + 2 ) >> 2; + } + tprintf("filter_mb_mbaff_edgev i:%d, qp:%d, indexA:%d, alpha:%d, beta:%d\n# bS:4 -> [%02x, %02x, %02x, %02x, %02x, %02x] =>[%02x, %02x, %02x, %02x, %02x, %02x]\n", i, qp[qp_index], index_a, alpha, beta, p2, p1, p0, q0, q1, q2, pix[-3], pix[-2], pix[-1], pix[0], pix[1], pix[2]); + } + } + } +} +static void filter_mb_mbaff_edgecv( H264Context *h, uint8_t *pix, int stride, int bS[4], int qp[2] ) { + int i; + for( i = 0; i < 8; i++, pix += stride) { + int index_a; + int alpha; + int beta; + + int qp_index; + int bS_index = i; + + if( bS[bS_index] == 0 ) { + continue; + } + + qp_index = h->mb_field_decoding_flag ? (i & 1) : (i >> 3); + index_a = clip( qp[qp_index] + h->slice_alpha_c0_offset, 0, 51 ); + alpha = alpha_table[index_a]; + beta = beta_table[clip( qp[qp_index] + h->slice_beta_offset, 0, 51 )]; + if( bS[bS_index] < 4 ) { + const int tc = tc0_table[index_a][bS[bS_index] - 1] + 1; + /* 2px edge length (because we use same bS than the one for luma) */ + const int p0 = pix[-1]; + const int p1 = pix[-2]; + const int q0 = pix[0]; + const int q1 = pix[1]; + + if( ABS( p0 - q0 ) < alpha && + ABS( p1 - p0 ) < beta && + ABS( q1 - q0 ) < beta ) { + const int i_delta = clip( (((q0 - p0 ) << 2) + (p1 - q1) + 4) >> 3, -tc, tc ); + + pix[-1] = clip_uint8( p0 + i_delta ); /* p0' */ + pix[0] = clip_uint8( q0 - i_delta ); /* q0' */ + tprintf("filter_mb_mbaff_edgecv i:%d, qp:%d, indexA:%d, alpha:%d, beta:%d, tc:%d\n# bS:%d -> [%02x, %02x, %02x, %02x, %02x, %02x] =>[%02x, %02x, %02x, %02x]\n", i, qp[qp_index], index_a, alpha, beta, tc, bS[bS_index], pix[-3], p1, p0, q0, q1, pix[2], p1, pix[-1], pix[0], q1); + } + }else{ + const int p0 = pix[-1]; + const int p1 = pix[-2]; + const int q0 = pix[0]; + const int q1 = pix[1]; + + if( ABS( p0 - q0 ) < alpha && + ABS( p1 - p0 ) < beta && + ABS( q1 - q0 ) < beta ) { + + pix[-1] = ( 2*p1 + p0 + q1 + 2 ) >> 2; /* p0' */ + pix[0] = ( 2*q1 + q0 + p1 + 2 ) >> 2; /* q0' */ + tprintf("filter_mb_mbaff_edgecv i:%d\n# bS:4 -> [%02x, %02x, %02x, %02x, %02x, %02x] =>[%02x, %02x, %02x, %02x, %02x, %02x]\n", i, pix[-3], p1, p0, q0, q1, pix[2], pix[-3], pix[-2], pix[-1], pix[0], pix[1], pix[2]); + } + } + } +} + static void filter_mb_edgeh( H264Context *h, uint8_t *pix, int stride, int bS[4], int qp ) { int i, d; const int index_a = clip( qp + h->slice_alpha_c0_offset, 0, 51 ); @@ -4946,6 +5957,7 @@ static void filter_mb_edgeh( H264Context *h, uint8_t *pix, int stride, int bS[4] i_delta = clip( (((q0 - p0 ) << 2) + (p1 - q1) + 4) >> 3, -tc, tc ); pix[-pix_next] = clip_uint8( p0 + i_delta ); /* p0' */ pix[0] = clip_uint8( q0 - i_delta ); /* q0' */ + tprintf("filter_mb_edgeh i:%d d:%d, qp:%d, indexA:%d, alpha:%d, beta:%d, tc:%d\n# bS:%d -> [%02x, %02x, %02x, %02x, %02x, %02x] =>[%02x, %02x, %02x, %02x]\n", i, d, qp, index_a, alpha, beta, tc, bS[i], p2, p1, p0, q0, q1, q2, pix[-2*pix_next], pix[-pix_next], pix[0], pix[pix_next]); } pix++; } @@ -4990,6 +6002,7 @@ static void filter_mb_edgeh( H264Context *h, uint8_t *pix, int stride, int bS[4] pix[-1*pix_next] = ( 2*p1 + p0 + q1 + 2 ) >> 2; pix[ 0*pix_next] = ( 2*q1 + q0 + p1 + 2 ) >> 2; } + tprintf("filter_mb_edgeh i:%d d:%d, qp:%d, indexA:%d, alpha:%d, beta:%d\n# bS:%d -> [%02x, %02x, %02x, %02x, %02x, %02x] =>[%02x, %02x, %02x, %02x]\n", i, d, qp, index_a, alpha, beta, bS[i], p2, p1, p0, q0, q1, q2, pix[-2*pix_next], pix[-pix_next], pix[0], pix[pix_next]); } pix++; } @@ -5028,6 +6041,7 @@ static void filter_mb_edgech( H264Context *h, uint8_t *pix, int stride, int bS[4 pix[-pix_next] = clip_uint8( p0 + i_delta ); /* p0' */ pix[0] = clip_uint8( q0 - i_delta ); /* q0' */ + tprintf("filter_mb_edgech i:%d d:%d, qp:%d, indexA:%d, alpha:%d, beta:%d, tc:%d\n# bS:%d -> [%02x, %02x, %02x, %02x, %02x, %02x] =>[%02x, %02x, %02x, %02x]\n", i, d, qp, index_a, alpha, beta, tc, bS[i], pix[-3*pix_next], p1, p0, q0, q1, pix[2*pix_next], pix[-2*pix_next], pix[-pix_next], pix[0], pix[pix_next]); } pix++; } @@ -5045,6 +6059,7 @@ static void filter_mb_edgech( H264Context *h, uint8_t *pix, int stride, int bS[4 pix[-pix_next] = ( 2*p1 + p0 + q1 + 2 ) >> 2; /* p0' */ pix[0] = ( 2*q1 + q0 + p1 + 2 ) >> 2; /* q0' */ + tprintf("filter_mb_edgech i:%d d:%d, qp:%d, indexA:%d, alpha:%d, beta:%d\n# bS:%d -> [%02x, %02x, %02x, %02x, %02x, %02x] =>[%02x, %02x, %02x, %02x]\n", i, d, qp, index_a, alpha, beta, bS[i], pix[-3*pix_next], p1, p0, q0, q1, pix[2*pix_next], pix[-2*pix_next], pix[-pix_next], pix[0], pix[pix_next]); } pix++; } @@ -5052,43 +6067,167 @@ static void filter_mb_edgech( H264Context *h, uint8_t *pix, int stride, int bS[4 } } -static void filter_mb( H264Context *h, int mb_x, int mb_y, uint8_t *img_y, uint8_t *img_cb, uint8_t *img_cr) { +static void filter_mb( H264Context *h, int mb_x, int mb_y, uint8_t *img_y, uint8_t *img_cb, uint8_t *img_cr, unsigned int linesize, unsigned int uvlinesize) { MpegEncContext * const s = &h->s; const int mb_xy= mb_x + mb_y*s->mb_stride; - int linesize, uvlinesize; + int first_vertical_edge_done = 0; int dir; - /* FIXME Implement deblocking filter for field MB */ - if( h->sps.mb_aff ) { - return; - } - linesize = s->linesize; - uvlinesize = s->uvlinesize; + if (h->mb_aff_frame + // left mb is in picture + && h->slice_table[mb_xy-1] != 255 + // and current and left pair do not have the same interlaced type + && (IS_INTERLACED(s->current_picture.mb_type[mb_xy]) != IS_INTERLACED(s->current_picture.mb_type[mb_xy-1])) + // and left mb is in the same slice if deblocking_filter == 2 + && (h->deblocking_filter!=2 || h->slice_table[mb_xy-1] == h->slice_table[mb_xy])) { + /* First vertical edge is different in MBAFF frames + * There are 8 differents bS to compute and 2 differents Qp + */ + int bS[8]; + int qp[2]; + int chroma_qp[2]; + + int i; + first_vertical_edge_done = 1; + for( i = 0; i < 8; i++ ) { + int y = i>>1; + int b_idx= 8 + 4 + 8*y; + int bn_idx= b_idx - 1; + + int mbn_xy = h->mb_field_decoding_flag ? h->left_mb_xy[i>>2] : h->left_mb_xy[i&1]; + + if( IS_INTRA( s->current_picture.mb_type[mb_xy] ) || + IS_INTRA( s->current_picture.mb_type[mbn_xy] ) ) { + bS[i] = 4; + } else if( h->non_zero_count_cache[b_idx] != 0 || + h->non_zero_count_cache[bn_idx] != 0 ) { + bS[i] = 2; + } else { + /* FIXME: A given frame may occupy more than one position in + * the reference list. So we should compare the frame numbers, + * not the indices in the ref list. */ + int l; + bS[i] = 0; + for( l = 0; l < 1 + (h->slice_type == B_TYPE); l++ ) { + if( h->ref_cache[l][b_idx] != h->ref_cache[l][bn_idx] || + ABS( h->mv_cache[l][b_idx][0] - h->mv_cache[l][bn_idx][0] ) >= 4 || + ABS( h->mv_cache[l][b_idx][1] - h->mv_cache[l][bn_idx][1] ) >= 4 ) { + bS[i] = 1; + break; + } + } + } + } + if(bS[0]+bS[1]+bS[2]+bS[3] != 0) { + // Do not use s->qscale as luma quantiser because it has not the same + // value in IPCM macroblocks. + qp[0] = ( s->current_picture.qscale_table[mb_xy] + s->current_picture.qscale_table[h->left_mb_xy[0]] + 1 ) >> 1; + chroma_qp[0] = ( get_chroma_qp( h->pps.chroma_qp_index_offset, s->current_picture.qscale_table[mb_xy] ) + + get_chroma_qp( h->pps.chroma_qp_index_offset, s->current_picture.qscale_table[h->left_mb_xy[0]] ) + 1 ) >> 1; + qp[1] = ( s->current_picture.qscale_table[mb_xy] + s->current_picture.qscale_table[h->left_mb_xy[1]] + 1 ) >> 1; + chroma_qp[1] = ( get_chroma_qp( h->pps.chroma_qp_index_offset, s->current_picture.qscale_table[mb_xy] ) + + get_chroma_qp( h->pps.chroma_qp_index_offset, s->current_picture.qscale_table[h->left_mb_xy[1]] ) + 1 ) >> 1; + /* Filter edge */ + tprintf("filter mb:%d/%d MBAFF, QPy:%d/%d, QPc:%d/%d ls:%d uvls:%d", mb_x, mb_y, qp[0], qp[1], chroma_qp[0], chroma_qp[1], linesize, uvlinesize); + { int i; for (i = 0; i < 8; i++) tprintf(" bS[%d]:%d", i, bS[i]); tprintf("\n"); } + filter_mb_mbaff_edgev ( h, &img_y [0], linesize, bS, qp ); + filter_mb_mbaff_edgecv( h, &img_cb[0], uvlinesize, bS, chroma_qp ); + filter_mb_mbaff_edgecv( h, &img_cr[0], uvlinesize, bS, chroma_qp ); + } + } /* dir : 0 -> vertical edge, 1 -> horizontal edge */ for( dir = 0; dir < 2; dir++ ) { - int start = 0; int edge; + const int mbm_xy = dir == 0 ? mb_xy -1 : h->top_mb_xy; + int start = h->slice_table[mbm_xy] == 255 ? 1 : 0; - /* test picture boundary */ - if( ( dir == 0 && mb_x == 0 ) || ( dir == 1 && mb_y == 0 ) ) { + if (first_vertical_edge_done) { start = 1; + first_vertical_edge_done = 0; } - /* FIXME test slice boundary */ - if( h->deblocking_filter == 2 ) { - } + + if (h->deblocking_filter==2 && h->slice_table[mbm_xy] != h->slice_table[mb_xy]) + start = 1; /* Calculate bS */ for( edge = start; edge < 4; edge++ ) { - /* mbn_xy: neighbour macroblock (how that works for field ?) */ - int mbn_xy = edge > 0 ? mb_xy : ( dir == 0 ? mb_xy -1 : mb_xy - s->mb_stride ); + /* mbn_xy: neighbour macroblock */ + int mbn_xy = edge > 0 ? mb_xy : mbm_xy; int bS[4]; int qp; + if (h->mb_aff_frame && (dir == 1) && (edge == 0) && ((mb_y & 1) == 0) + && !IS_INTERLACED(s->current_picture.mb_type[mb_xy]) + && IS_INTERLACED(s->current_picture.mb_type[mbn_xy]) + ) { + // This is a special case in the norm where the filtering must + // be done twice (one each of the field) even if we are in a + // frame macroblock. + // + unsigned int tmp_linesize = 2 * linesize; + unsigned int tmp_uvlinesize = 2 * uvlinesize; + int mbn_xy = mb_xy - 2 * s->mb_stride; + int qp, chroma_qp; + + // first filtering + if( IS_INTRA( s->current_picture.mb_type[mb_xy] ) || + IS_INTRA( s->current_picture.mb_type[mbn_xy] ) ) { + bS[0] = bS[1] = bS[2] = bS[3] = 3; + } else { + // TODO + assert(0); + } + /* Filter edge */ + // Do not use s->qscale as luma quantiser because it has not the same + // value in IPCM macroblocks. + qp = ( s->current_picture.qscale_table[mb_xy] + s->current_picture.qscale_table[mbn_xy] + 1 ) >> 1; + tprintf("filter mb:%d/%d dir:%d edge:%d, QPy:%d ls:%d uvls:%d", mb_x, mb_y, dir, edge, qp, tmp_linesize, tmp_uvlinesize); + { int i; for (i = 0; i < 4; i++) tprintf(" bS[%d]:%d", i, bS[i]); tprintf("\n"); } + filter_mb_edgeh( h, &img_y[0], tmp_linesize, bS, qp ); + chroma_qp = ( h->chroma_qp + + get_chroma_qp( h->pps.chroma_qp_index_offset, s->current_picture.qscale_table[mbn_xy] ) + 1 ) >> 1; + filter_mb_edgech( h, &img_cb[0], tmp_uvlinesize, bS, chroma_qp ); + filter_mb_edgech( h, &img_cr[0], tmp_uvlinesize, bS, chroma_qp ); + + // second filtering + mbn_xy += s->mb_stride; + if( IS_INTRA( s->current_picture.mb_type[mb_xy] ) || + IS_INTRA( s->current_picture.mb_type[mbn_xy] ) ) { + bS[0] = bS[1] = bS[2] = bS[3] = 3; + } else { + // TODO + assert(0); + } + /* Filter edge */ + // Do not use s->qscale as luma quantiser because it has not the same + // value in IPCM macroblocks. + qp = ( s->current_picture.qscale_table[mb_xy] + s->current_picture.qscale_table[mbn_xy] + 1 ) >> 1; + tprintf("filter mb:%d/%d dir:%d edge:%d, QPy:%d ls:%d uvls:%d", mb_x, mb_y, dir, edge, qp, tmp_linesize, tmp_uvlinesize); + { int i; for (i = 0; i < 4; i++) tprintf(" bS[%d]:%d", i, bS[i]); tprintf("\n"); } + filter_mb_edgeh( h, &img_y[linesize], tmp_linesize, bS, qp ); + chroma_qp = ( h->chroma_qp + + get_chroma_qp( h->pps.chroma_qp_index_offset, s->current_picture.qscale_table[mbn_xy] ) + 1 ) >> 1; + filter_mb_edgech( h, &img_cb[uvlinesize], tmp_uvlinesize, bS, chroma_qp ); + filter_mb_edgech( h, &img_cr[uvlinesize], tmp_uvlinesize, bS, chroma_qp ); + continue; + } if( IS_INTRA( s->current_picture.mb_type[mb_xy] ) || IS_INTRA( s->current_picture.mb_type[mbn_xy] ) ) { - bS[0] = bS[1] = bS[2] = bS[3] = ( edge == 0 ? 4 : 3 ); + int value; + if (edge == 0) { + if ( (!IS_INTERLACED(s->current_picture.mb_type[mb_xy]) && !IS_INTERLACED(s->current_picture.mb_type[mbm_xy])) + || ((h->mb_aff_frame || (s->picture_structure != PICT_FRAME)) && (dir == 0)) + ) { + value = 4; + } else { + value = 3; + } + } else { + value = 3; + } + bS[0] = bS[1] = bS[2] = bS[3] = value; } else { int i; for( i = 0; i < 4; i++ ) { @@ -5101,17 +6240,21 @@ static void filter_mb( H264Context *h, int mb_x, int mb_y, uint8_t *img_y, uint8 h->non_zero_count_cache[bn_idx] != 0 ) { bS[i] = 2; } - else if( h->slice_type == P_TYPE ) { - if( h->ref_cache[0][b_idx] != h->ref_cache[0][bn_idx] || - ABS( h->mv_cache[0][b_idx][0] - h->mv_cache[0][bn_idx][0] ) >= 4 || - ABS( h->mv_cache[0][b_idx][1] - h->mv_cache[0][bn_idx][1] ) >= 4 ) - bS[i] = 1; - else - bS[i] = 0; - } - else { - /* FIXME Add support for B frame */ - return; + else + { + /* FIXME: A given frame may occupy more than one position in + * the reference list. So we should compare the frame numbers, + * not the indices in the ref list. */ + int l; + bS[i] = 0; + for( l = 0; l < 1 + (h->slice_type == B_TYPE); l++ ) { + if( h->ref_cache[l][b_idx] != h->ref_cache[l][bn_idx] || + ABS( h->mv_cache[l][b_idx][0] - h->mv_cache[l][bn_idx][0] ) >= 4 || + ABS( h->mv_cache[l][b_idx][1] - h->mv_cache[l][bn_idx][1] ) >= 4 ) { + bS[i] = 1; + break; + } + } } } @@ -5120,12 +6263,17 @@ static void filter_mb( H264Context *h, int mb_x, int mb_y, uint8_t *img_y, uint8 } /* Filter edge */ - qp = ( s->qscale + s->current_picture.qscale_table[mbn_xy] + 1 ) >> 1; + // Do not use s->qscale as luma quantiser because it has not the same + // value in IPCM macroblocks. + qp = ( s->current_picture.qscale_table[mb_xy] + s->current_picture.qscale_table[mbn_xy] + 1 ) >> 1; + //tprintf("filter mb:%d/%d dir:%d edge:%d, QPy:%d, QPc:%d, QPcn:%d\n", mb_x, mb_y, dir, edge, qp, h->chroma_qp, s->current_picture.qscale_table[mbn_xy]); + tprintf("filter mb:%d/%d dir:%d edge:%d, QPy:%d ls:%d uvls:%d", mb_x, mb_y, dir, edge, qp, linesize, uvlinesize); + { int i; for (i = 0; i < 4; i++) tprintf(" bS[%d]:%d", i, bS[i]); tprintf("\n"); } if( dir == 0 ) { filter_mb_edgev( h, &img_y[4*edge], linesize, bS, qp ); if( (edge&1) == 0 ) { int chroma_qp = ( h->chroma_qp + - get_chroma_qp( h, s->current_picture.qscale_table[mbn_xy] ) + 1 ) >> 1; + get_chroma_qp( h->pps.chroma_qp_index_offset, s->current_picture.qscale_table[mbn_xy] ) + 1 ) >> 1; filter_mb_edgecv( h, &img_cb[2*edge], uvlinesize, bS, chroma_qp ); filter_mb_edgecv( h, &img_cr[2*edge], uvlinesize, bS, chroma_qp ); } @@ -5133,7 +6281,7 @@ static void filter_mb( H264Context *h, int mb_x, int mb_y, uint8_t *img_y, uint8 filter_mb_edgeh( h, &img_y[4*edge*linesize], linesize, bS, qp ); if( (edge&1) == 0 ) { int chroma_qp = ( h->chroma_qp + - get_chroma_qp( h, s->current_picture.qscale_table[mbn_xy] ) + 1 ) >> 1; + get_chroma_qp( h->pps.chroma_qp_index_offset, s->current_picture.qscale_table[mbn_xy] ) + 1 ) >> 1; filter_mb_edgech( h, &img_cb[2*edge*uvlinesize], uvlinesize, bS, chroma_qp ); filter_mb_edgech( h, &img_cr[2*edge*uvlinesize], uvlinesize, bS, chroma_qp ); } @@ -5175,20 +6323,20 @@ static int decode_slice(H264Context *h){ for(;;){ int ret = decode_mb_cabac(h); - int eos = get_cabac_terminate( &h->cabac ); /* End of Slice flag */ + int eos; if(ret>=0) hl_decode_mb(h); /* XXX: useless as decode_mb_cabac it doesn't support that ... */ - if( ret >= 0 && h->sps.mb_aff ) { //FIXME optimal? or let mb_decode decode 16x32 ? + if( ret >= 0 && h->mb_aff_frame ) { //FIXME optimal? or let mb_decode decode 16x32 ? s->mb_y++; if(ret>=0) ret = decode_mb_cabac(h); - eos = get_cabac_terminate( &h->cabac ); hl_decode_mb(h); s->mb_y--; } + eos = get_cabac_terminate( &h->cabac ); if( ret < 0 || h->cabac.bytestream > h->cabac.bytestream_end + 1) { av_log(h->s.avctx, AV_LOG_ERROR, "error while decoding MB %d %d\n", s->mb_x, s->mb_y); @@ -5199,22 +6347,17 @@ static int decode_slice(H264Context *h){ if( ++s->mb_x >= s->mb_width ) { s->mb_x = 0; ff_draw_horiz_band(s, 16*s->mb_y, 16); - if( ++s->mb_y >= s->mb_height ) { - tprintf("slice end %d %d\n", get_bits_count(&s->gb), s->gb.size_in_bits); + ++s->mb_y; + if(h->mb_aff_frame) { + ++s->mb_y; } } if( eos || s->mb_y >= s->mb_height ) { + tprintf("slice end %d %d\n", get_bits_count(&s->gb), s->gb.size_in_bits); ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x-1, s->mb_y, (AC_END|DC_END|MV_END)&part_mask); return 0; } -#if 0 - /* TODO test over-reading in cabac code */ - else if( read too much in h->cabac ) { - ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x, s->mb_y, (AC_ERROR|DC_ERROR|MV_ERROR)&part_mask); - return -1; - } -#endif } } else { @@ -5223,7 +6366,7 @@ static int decode_slice(H264Context *h){ if(ret>=0) hl_decode_mb(h); - if(ret>=0 && h->sps.mb_aff){ //FIXME optimal? or let mb_decode decode 16x32 ? + if(ret>=0 && h->mb_aff_frame){ //FIXME optimal? or let mb_decode decode 16x32 ? s->mb_y++; ret = decode_mb_cavlc(h); @@ -5241,7 +6384,11 @@ static int decode_slice(H264Context *h){ if(++s->mb_x >= s->mb_width){ s->mb_x=0; ff_draw_horiz_band(s, 16*s->mb_y, 16); - if(++s->mb_y >= s->mb_height){ + ++s->mb_y; + if(h->mb_aff_frame) { + ++s->mb_y; + } + if(s->mb_y >= s->mb_height){ tprintf("slice end %d %d\n", get_bits_count(&s->gb), s->gb.size_in_bits); if(get_bits_count(&s->gb) == s->gb.size_in_bits ) { @@ -5257,6 +6404,7 @@ static int decode_slice(H264Context *h){ } if(get_bits_count(&s->gb) >= s->gb.size_in_bits && s->mb_skip_run<=0){ + tprintf("slice end %d %d\n", get_bits_count(&s->gb), s->gb.size_in_bits); if(get_bits_count(&s->gb) == s->gb.size_in_bits ){ ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x-1, s->mb_y, (AC_END|DC_END|MV_END)&part_mask); @@ -5318,9 +6466,27 @@ static int decode_slice(H264Context *h){ return -1; //not reached } +static inline void decode_hrd_parameters(H264Context *h, SPS *sps){ + MpegEncContext * const s = &h->s; + int cpb_count, i; + cpb_count = get_ue_golomb(&s->gb) + 1; + get_bits(&s->gb, 4); /* bit_rate_scale */ + get_bits(&s->gb, 4); /* cpb_size_scale */ + for(i=0; i<cpb_count; i++){ + get_ue_golomb(&s->gb); /* bit_rate_value_minus1 */ + get_ue_golomb(&s->gb); /* cpb_size_value_minus1 */ + get_bits1(&s->gb); /* cbr_flag */ + } + get_bits(&s->gb, 5); /* initial_cpb_removal_delay_length_minus1 */ + get_bits(&s->gb, 5); /* cpb_removal_delay_length_minus1 */ + get_bits(&s->gb, 5); /* dpb_output_delay_length_minus1 */ + get_bits(&s->gb, 5); /* time_offset_length */ +} + static inline int decode_vui_parameters(H264Context *h, SPS *sps){ MpegEncContext * const s = &h->s; int aspect_ratio_info_present_flag, aspect_ratio_idc; + int nal_hrd_parameters_present_flag, vcl_hrd_parameters_present_flag; aspect_ratio_info_present_flag= get_bits1(&s->gb); @@ -5367,29 +6533,27 @@ static inline int decode_vui_parameters(H264Context *h, SPS *sps){ sps->fixed_frame_rate_flag = get_bits1(&s->gb); } -#if 0 -| nal_hrd_parameters_present_flag |0 |u(1) | -| if( nal_hrd_parameters_present_flag = = 1) | | | -| hrd_parameters( ) | | | -| vcl_hrd_parameters_present_flag |0 |u(1) | -| if( vcl_hrd_parameters_present_flag = = 1) | | | -| hrd_parameters( ) | | | -| if( ( nal_hrd_parameters_present_flag = = 1 | || | | -| | | | -|( vcl_hrd_parameters_present_flag = = 1 ) ) | | | -| low_delay_hrd_flag |0 |u(1) | -| bitstream_restriction_flag |0 |u(1) | -| if( bitstream_restriction_flag ) { |0 |u(1) | -| motion_vectors_over_pic_boundaries_flag |0 |u(1) | -| max_bytes_per_pic_denom |0 |ue(v) | -| max_bits_per_mb_denom |0 |ue(v) | -| log2_max_mv_length_horizontal |0 |ue(v) | -| log2_max_mv_length_vertical |0 |ue(v) | -| num_reorder_frames |0 |ue(v) | -| max_dec_frame_buffering |0 |ue(v) | -| } | | | -|} | | | -#endif + nal_hrd_parameters_present_flag = get_bits1(&s->gb); + if(nal_hrd_parameters_present_flag) + decode_hrd_parameters(h, sps); + vcl_hrd_parameters_present_flag = get_bits1(&s->gb); + if(vcl_hrd_parameters_present_flag) + decode_hrd_parameters(h, sps); + if(nal_hrd_parameters_present_flag || vcl_hrd_parameters_present_flag) + get_bits1(&s->gb); /* low_delay_hrd_flag */ + get_bits1(&s->gb); /* pic_struct_present_flag */ + + sps->bitstream_restriction_flag = get_bits1(&s->gb); + if(sps->bitstream_restriction_flag){ + get_bits1(&s->gb); /* motion_vectors_over_pic_boundaries_flag */ + get_ue_golomb(&s->gb); /* max_bytes_per_pic_denom */ + get_ue_golomb(&s->gb); /* max_bits_per_mb_denom */ + get_ue_golomb(&s->gb); /* log2_max_mv_length_horizontal */ + get_ue_golomb(&s->gb); /* log2_max_mv_length_vertical */ + sps->num_reorder_frames = get_ue_golomb(&s->gb); + get_ue_golomb(&s->gb); /* max_dec_frame_buffering */ + } + return 0; } @@ -5403,14 +6567,15 @@ static inline int decode_seq_parameter_set(H264Context *h){ get_bits1(&s->gb); //constraint_set0_flag get_bits1(&s->gb); //constraint_set1_flag get_bits1(&s->gb); //constraint_set2_flag - get_bits(&s->gb, 5); // reserved + get_bits1(&s->gb); //constraint_set3_flag + get_bits(&s->gb, 4); // reserved level_idc= get_bits(&s->gb, 8); sps_id= get_ue_golomb(&s->gb); sps= &h->sps_buffer[ sps_id ]; sps->profile_idc= profile_idc; sps->level_idc= level_idc; - + sps->log2_max_frame_num= get_ue_golomb(&s->gb) + 4; sps->poc_type= get_ue_golomb(&s->gb); @@ -5431,9 +6596,16 @@ static inline int decode_seq_parameter_set(H264Context *h){ } sps->ref_frame_count= get_ue_golomb(&s->gb); + if(sps->ref_frame_count > MAX_PICTURE_COUNT-2){ + av_log(h->s.avctx, AV_LOG_ERROR, "too many reference frames\n"); + } sps->gaps_in_frame_num_allowed_flag= get_bits1(&s->gb); sps->mb_width= get_ue_golomb(&s->gb) + 1; sps->mb_height= get_ue_golomb(&s->gb) + 1; + if((unsigned)sps->mb_width >= INT_MAX/16 || (unsigned)sps->mb_height >= INT_MAX/16 || + avcodec_check_dimensions(NULL, 16*sps->mb_width, 16*sps->mb_height)) + return -1; + sps->frame_mbs_only_flag= get_bits1(&s->gb); if(!sps->frame_mbs_only_flag) sps->mb_aff= get_bits1(&s->gb); @@ -5561,23 +6733,34 @@ static inline int decode_picture_parameter_set(H264Context *h){ * finds the end of the current frame in the bitstream. * @return the position of the first byte of the next frame, or -1 */ -static int find_frame_end(ParseContext *pc, const uint8_t *buf, int buf_size){ +static int find_frame_end(H264Context *h, const uint8_t *buf, int buf_size){ int i; uint32_t state; + ParseContext *pc = &(h->s.parse_context); //printf("first %02X%02X%02X%02X\n", buf[0], buf[1],buf[2],buf[3]); // mb_addr= pc->mb_addr - 1; state= pc->state; - //FIXME this will fail with slices - for(i=0; i<buf_size; i++){ - state= (state<<8) | buf[i]; + for(i=0; i<=buf_size; i++){ if((state&0xFFFFFF1F) == 0x101 || (state&0xFFFFFF1F) == 0x102 || (state&0xFFFFFF1F) == 0x105){ + tprintf("find_frame_end new startcode = %08x, frame_start_found = %d, pos = %d\n", state, pc->frame_start_found, i); if(pc->frame_start_found){ - pc->state=-1; - pc->frame_start_found= 0; - return i-3; + // If there isn't one more byte in the buffer + // the test on first_mb_in_slice cannot be done yet + // do it at next call. + if (i >= buf_size) break; + if (buf[i] & 0x80) { + // first_mb_in_slice is 0, probably the first nal of a new + // slice + tprintf("find_frame_end frame_end_found, state = %08x, pos = %d\n", state, i); + pc->state=-1; + pc->frame_start_found= 0; + return i-4; + } } - pc->frame_start_found= 1; + pc->frame_start_found = 1; } + if (i<buf_size) + state= (state<<8) | buf[i]; } pc->state= state; @@ -5589,10 +6772,11 @@ static int h264_parse(AVCodecParserContext *s, uint8_t **poutbuf, int *poutbuf_size, const uint8_t *buf, int buf_size) { - ParseContext *pc = s->priv_data; + H264Context *h = s->priv_data; + ParseContext *pc = &h->s.parse_context; int next; - next= find_frame_end(pc, buf, buf_size); + next= find_frame_end(h, buf, buf_size); if (ff_combine_frame(pc, next, (uint8_t **)&buf, &buf_size) < 0) { *poutbuf = NULL; @@ -5615,6 +6799,7 @@ static int decode_nal_units(H264Context *h, uint8_t *buf, int buf_size){ printf("%X ", buf[i]); } #endif + h->slice_num = 0; for(;;){ int consumed; int dst_length; @@ -5626,8 +6811,7 @@ static int decode_nal_units(H264Context *h, uint8_t *buf, int buf_size){ if(buf_index >= buf_size) break; nalsize = 0; for(i = 0; i < h->nal_length_size; i++) - nalsize = (nalsize << 8) | buf[buf_index + i]; - buf_index += h->nal_length_size; + nalsize = (nalsize << 8) | buf[buf_index++]; } else { // start code prefix search for(; buf_index + 3 < buf_size; buf_index++){ @@ -5641,12 +6825,12 @@ static int decode_nal_units(H264Context *h, uint8_t *buf, int buf_size){ buf_index+=3; } - ptr= decode_nal(h, buf + buf_index, &dst_length, &consumed, buf_size - buf_index); + ptr= decode_nal(h, buf + buf_index, &dst_length, &consumed, h->is_avc ? nalsize : buf_size - buf_index); if(ptr[dst_length - 1] == 0) dst_length--; bit_length= 8*dst_length - decode_rbsp_trailing(ptr + dst_length - 1); if(s->avctx->debug&FF_DEBUG_STARTCODE){ - av_log(h->s.avctx, AV_LOG_DEBUG, "NAL %d at %d length %d\n", h->nal_unit_type, buf_index, dst_length); + av_log(h->s.avctx, AV_LOG_DEBUG, "NAL %d at %d/%d length %d\n", h->nal_unit_type, buf_index, buf_size, dst_length); } if (h->is_avc && (nalsize != consumed)) @@ -5698,7 +6882,8 @@ static int decode_nal_units(H264Context *h, uint8_t *buf, int buf_size){ if(s->flags& CODEC_FLAG_LOW_DELAY) s->low_delay=1; - avctx->has_b_frames= !s->low_delay; + if(avctx->has_b_frames < 2) + avctx->has_b_frames= !s->low_delay; break; case NAL_PPS: init_get_bits(&s->gb, ptr, bit_length); @@ -5713,13 +6898,12 @@ static int decode_nal_units(H264Context *h, uint8_t *buf, int buf_size){ default: av_log(avctx, AV_LOG_ERROR, "Unknown NAL code: %d\n", h->nal_unit_type); } - - //FIXME move after where irt is set - s->current_picture.pict_type= s->pict_type; - s->current_picture.key_frame= s->pict_type == I_TYPE; } if(!s->current_picture_ptr) return buf_index; //no frame + + s->current_picture_ptr->pict_type= s->pict_type; + s->current_picture_ptr->key_frame= s->pict_type == I_TYPE && h->nal_unit_type == NAL_IDR_SLICE; h->prev_frame_num_offset= h->frame_num_offset; h->prev_frame_num= h->frame_num; @@ -5729,8 +6913,6 @@ static int decode_nal_units(H264Context *h, uint8_t *buf, int buf_size){ } if(s->current_picture_ptr->reference) execute_ref_pic_marking(h, h->mmco, h->mmco_index); - else - assert(h->mmco_index==0); ff_er_frame_end(s); @@ -5774,7 +6956,7 @@ static int decode_frame(AVCodecContext *avctx, } if(s->flags&CODEC_FLAG_TRUNCATED){ - int next= find_frame_end(&s->parse_context, buf, buf_size); + int next= find_frame_end(h, buf, buf_size); if( ff_combine_frame(&s->parse_context, next, &buf, &buf_size) < 0 ) return buf_size; @@ -5782,7 +6964,7 @@ static int decode_frame(AVCodecContext *avctx, } if(h->is_avc && !h->got_avcC) { - int i, cnt, poffs; + int i, cnt, nalsize; unsigned char *p = avctx->extradata; if(avctx->extradata_size < 7) { av_log(avctx, AV_LOG_ERROR, "avcC too short\n"); @@ -5795,27 +6977,29 @@ static int decode_frame(AVCodecContext *avctx, /* sps and pps in the avcC always have length coded with 2 bytes, so put a fake nal_length_size = 2 while parsing them */ h->nal_length_size = 2; - poffs = 6; // Decode sps from avcC cnt = *(p+5) & 0x1f; // Number of sps + p += 6; for (i = 0; i < cnt; i++) { - if(decode_nal_units(h, p + poffs, BE_16(p + poffs) + 2) != BE_16(p + poffs) + 2) { + nalsize = BE_16(p) + 2; + if(decode_nal_units(h, p, nalsize) != nalsize) { av_log(avctx, AV_LOG_ERROR, "Decoding sps %d from avcC failed\n", i); return -1; } - poffs += BE_16(p + poffs) + 2; + p += nalsize; } // Decode pps from avcC - cnt = *(p + poffs++); // Number of pps + cnt = *(p++); // Number of pps for (i = 0; i < cnt; i++) { - if(decode_nal_units(h, p + poffs, BE_16(p + poffs) + 2) != BE_16(p + poffs) + 2) { + nalsize = BE_16(p) + 2; + if(decode_nal_units(h, p, nalsize) != nalsize) { av_log(avctx, AV_LOG_ERROR, "Decoding pps %d from avcC failed\n", i); return -1; } - poffs += BE_16(p + poffs) + 2; + p += nalsize; } // Now store right nal length size, that will be use to parse all other nals - h->nal_length_size = ((*(p+4))&0x03)+1; + h->nal_length_size = ((*(((char*)(avctx->extradata))+4))&0x03)+1; // Do not reparse avcC h->got_avcC = 1; } @@ -5832,21 +7016,83 @@ static int decode_frame(AVCodecContext *avctx, //FIXME do something with unavailable reference frames // if(ret==FRAME_SKIPED) return get_consumed_bytes(s, buf_index, buf_size); -#if 0 - if(s->pict_type==B_TYPE || s->low_delay){ - *pict= *(AVFrame*)&s->current_picture; - } else { - *pict= *(AVFrame*)&s->last_picture; - } -#endif if(!s->current_picture_ptr){ av_log(h->s.avctx, AV_LOG_DEBUG, "error, NO frame\n"); return -1; } - *pict= *(AVFrame*)&s->current_picture; //FIXME - ff_print_debug_info(s, pict); + { + Picture *out = s->current_picture_ptr; +#if 0 //decode order + *data_size = sizeof(AVFrame); +#else + /* Sort B-frames into display order */ + Picture *cur = s->current_picture_ptr; + Picture *prev = h->delayed_output_pic; + int out_idx = 0; + int pics = 0; + int out_of_order; + int cross_idr = 0; + int dropped_frame = 0; + int i; + + if(h->sps.bitstream_restriction_flag + && s->avctx->has_b_frames < h->sps.num_reorder_frames){ + s->avctx->has_b_frames = h->sps.num_reorder_frames; + s->low_delay = 0; + } + + while(h->delayed_pic[pics]) pics++; + h->delayed_pic[pics++] = cur; + if(cur->reference == 0) + cur->reference = 1; + + for(i=0; h->delayed_pic[i]; i++) + if(h->delayed_pic[i]->key_frame) + cross_idr = 1; + + out = h->delayed_pic[0]; + for(i=1; h->delayed_pic[i] && !h->delayed_pic[i]->key_frame; i++) + if(h->delayed_pic[i]->poc < out->poc){ + out = h->delayed_pic[i]; + out_idx = i; + } + + out_of_order = !cross_idr && prev && out->poc < prev->poc; + if(prev && pics <= s->avctx->has_b_frames) + out = prev; + else if((out_of_order && pics-1 == s->avctx->has_b_frames) + || (s->low_delay && + ((!cross_idr && prev && out->poc > prev->poc + 2) + || cur->pict_type == B_TYPE))) + { + s->low_delay = 0; + s->avctx->has_b_frames++; + out = prev; + } + else if(out_of_order) + out = prev; + + if(out_of_order || pics > s->avctx->has_b_frames){ + dropped_frame = (out != h->delayed_pic[out_idx]); + for(i=out_idx; h->delayed_pic[i]; i++) + h->delayed_pic[i] = h->delayed_pic[i+1]; + } + + if(prev == out && !dropped_frame) + *data_size = 0; + else + *data_size = sizeof(AVFrame); + if(prev && prev != out && prev->reference == 1) + prev->reference = 0; + h->delayed_output_pic = out; +#endif + + *pict= *(AVFrame*)out; + } + assert(pict->data[0]); + ff_print_debug_info(s, pict); //printf("out %d\n", (int)pict->data[0]); #if 0 //? @@ -5854,12 +7100,6 @@ static int decode_frame(AVCodecContext *avctx, /* we substract 1 because it is added on utils.c */ avctx->frame_number = s->picture_number - 1; #endif -#if 0 - /* dont output the last pic after seeking */ - if(s->last_picture_ptr || s->low_delay) - //Note this isnt a issue as a IDR pic should flush teh buffers -#endif - *data_size = sizeof(AVFrame); return get_consumed_bytes(s, buf_index, buf_size); } #if 0 @@ -5970,7 +7210,7 @@ int main(){ } // printf("\n"); - h264_add_idct_c(ref, block, 4); + s->dsp.h264_idct_add(ref, block, 4); /* for(j=0; j<16; j++){ printf("%d ", ref[j]); } @@ -6079,12 +7319,12 @@ AVCodec h264_decoder = { NULL, decode_end, decode_frame, - /*CODEC_CAP_DRAW_HORIZ_BAND |*/ CODEC_CAP_DR1 | CODEC_CAP_TRUNCATED, + /*CODEC_CAP_DRAW_HORIZ_BAND |*/ CODEC_CAP_DR1 | CODEC_CAP_TRUNCATED | CODEC_CAP_DELAY, }; AVCodecParser h264_parser = { { CODEC_ID_H264 }, - sizeof(ParseContext), + sizeof(H264Context), NULL, h264_parse, ff_parse_close, diff --git a/src/libffmpeg/libavcodec/h264data.h b/src/libffmpeg/libavcodec/h264data.h index 5480becd4..21d2260e8 100644 --- a/src/libffmpeg/libavcodec/h264data.h +++ b/src/libffmpeg/libavcodec/h264data.h @@ -353,8 +353,8 @@ static const PMbInfo p_mb_type_info[5]={ {MB_TYPE_16x16|MB_TYPE_P0L0 , 1}, {MB_TYPE_16x8 |MB_TYPE_P0L0|MB_TYPE_P1L0, 2}, {MB_TYPE_8x16 |MB_TYPE_P0L0|MB_TYPE_P1L0, 2}, -{MB_TYPE_8x8 , 4}, -{MB_TYPE_8x8 |MB_TYPE_REF0 , 4}, +{MB_TYPE_8x8 |MB_TYPE_P0L0|MB_TYPE_P1L0, 4}, +{MB_TYPE_8x8 |MB_TYPE_P0L0|MB_TYPE_P1L0|MB_TYPE_REF0, 4}, }; static const PMbInfo p_sub_mb_type_info[4]={ @@ -387,7 +387,7 @@ static const PMbInfo b_mb_type_info[23]={ {MB_TYPE_8x16 |MB_TYPE_P0L0|MB_TYPE_P0L1 |MB_TYPE_P1L1, 2, }, {MB_TYPE_16x8 |MB_TYPE_P0L0|MB_TYPE_P0L1|MB_TYPE_P1L0|MB_TYPE_P1L1, 2, }, {MB_TYPE_8x16 |MB_TYPE_P0L0|MB_TYPE_P0L1|MB_TYPE_P1L0|MB_TYPE_P1L1, 2, }, -{MB_TYPE_8x8 , 4, }, +{MB_TYPE_8x8 |MB_TYPE_P0L0|MB_TYPE_P0L1|MB_TYPE_P1L0|MB_TYPE_P1L1, 4, }, }; static const PMbInfo b_sub_mb_type_info[13]={ diff --git a/src/libffmpeg/libavcodec/h264idct.c b/src/libffmpeg/libavcodec/h264idct.c new file mode 100755 index 000000000..551197d37 --- /dev/null +++ b/src/libffmpeg/libavcodec/h264idct.c @@ -0,0 +1,70 @@ +/* + * H.264 IDCT + * Copyright (c) 2004 Michael Niedermayer <michaelni@gmx.at> + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + * + */ + +/** + * @file h264-idct.c + * H.264 IDCT. + * @author Michael Niedermayer <michaelni@gmx.at> + */ + +#include "dsputil.h" + +static always_inline void idct_internal(uint8_t *dst, DCTELEM *block, int stride, int block_stride, int shift, int add){ + int i; + uint8_t *cm = cropTbl + MAX_NEG_CROP; + + block[0] += 1<<(shift-1); + + for(i=0; i<4; i++){ + const int z0= block[0 + block_stride*i] + block[2 + block_stride*i]; + const int z1= block[0 + block_stride*i] - block[2 + block_stride*i]; + const int z2= (block[1 + block_stride*i]>>1) - block[3 + block_stride*i]; + const int z3= block[1 + block_stride*i] + (block[3 + block_stride*i]>>1); + + block[0 + block_stride*i]= z0 + z3; + block[1 + block_stride*i]= z1 + z2; + block[2 + block_stride*i]= z1 - z2; + block[3 + block_stride*i]= z0 - z3; + } + + for(i=0; i<4; i++){ + const int z0= block[i + block_stride*0] + block[i + block_stride*2]; + const int z1= block[i + block_stride*0] - block[i + block_stride*2]; + const int z2= (block[i + block_stride*1]>>1) - block[i + block_stride*3]; + const int z3= block[i + block_stride*1] + (block[i + block_stride*3]>>1); + + dst[i + 0*stride]= cm[ add*dst[i + 0*stride] + ((z0 + z3) >> shift) ]; + dst[i + 1*stride]= cm[ add*dst[i + 1*stride] + ((z1 + z2) >> shift) ]; + dst[i + 2*stride]= cm[ add*dst[i + 2*stride] + ((z1 - z2) >> shift) ]; + dst[i + 3*stride]= cm[ add*dst[i + 3*stride] + ((z0 - z3) >> shift) ]; + } +} + +void ff_h264_idct_add_c(uint8_t *dst, DCTELEM *block, int stride){ + idct_internal(dst, block, stride, 4, 6, 1); +} + +void ff_h264_lowres_idct_add_c(uint8_t *dst, int stride, DCTELEM *block){ + idct_internal(dst, block, stride, 8, 3, 1); +} + +void ff_h264_lowres_idct_put_c(uint8_t *dst, int stride, DCTELEM *block){ + idct_internal(dst, block, stride, 8, 3, 0); +} diff --git a/src/libffmpeg/libavcodec/huffyuv.c b/src/libffmpeg/libavcodec/huffyuv.c index ecc6a5fa2..5b496c512 100644 --- a/src/libffmpeg/libavcodec/huffyuv.c +++ b/src/libffmpeg/libavcodec/huffyuv.c @@ -27,11 +27,22 @@ */ #include "common.h" +#include "bitstream.h" #include "avcodec.h" #include "dsputil.h" #define VLC_BITS 11 +#ifdef WORDS_BIGENDIAN +#define B 3 +#define G 2 +#define R 1 +#else +#define B 0 +#define G 1 +#define R 2 +#endif + typedef enum Predictor{ LEFT= 0, PLANE, @@ -51,15 +62,17 @@ typedef struct HYuvContext{ int bgr32; //use bgr32 instead of bgr24 int width, height; int flags; + int context; int picture_number; int last_slice_end; - uint8_t __align8 temp[3][2560]; + uint8_t *temp[3]; uint64_t stats[3][256]; uint8_t len[3][256]; uint32_t bits[3][256]; VLC vlc[3]; AVFrame picture; - uint8_t __align8 bitstream_buffer[1024*1024*3]; //FIXME dynamic alloc or some other solution + uint8_t *bitstream_buffer; + int bitstream_buffer_size; DSPContext dsp; }HYuvContext; @@ -157,13 +170,13 @@ static inline void add_left_prediction_bgr32(uint8_t *dst, uint8_t *src, int w, b= *blue; for(i=0; i<w; i++){ - b+= src[4*i+0]; - g+= src[4*i+1]; - r+= src[4*i+2]; + b+= src[4*i+B]; + g+= src[4*i+G]; + r+= src[4*i+R]; - dst[4*i+0]= b; - dst[4*i+1]= g; - dst[4*i+2]= r; + dst[4*i+B]= b; + dst[4*i+G]= g; + dst[4*i+R]= r; } *red= r; @@ -271,7 +284,7 @@ static void generate_len_table(uint8_t *dst, uint64_t *stats, int size){ for(len=0; up[index] != -1; len++) index= up[index]; - if(len > 32) break; + if(len >= 32) break; dst[i]= len; } @@ -296,10 +309,11 @@ for(j=0; j<256; j++){ printf("%6X, %2d, %3d\n", s->bits[i][j], s->len[i][j], j); } #endif - init_vlc(&s->vlc[i], VLC_BITS, 256, s->len[i], 1, 1, s->bits[i], 4, 4); + free_vlc(&s->vlc[i]); + init_vlc(&s->vlc[i], VLC_BITS, 256, s->len[i], 1, 1, s->bits[i], 4, 4, 0); } - return 0; + return (get_bits_count(&gb)+7)/8; } static int read_old_huffman_tables(HYuvContext *s){ @@ -322,8 +336,10 @@ static int read_old_huffman_tables(HYuvContext *s){ memcpy(s->bits[2], s->bits[1], 256*sizeof(uint32_t)); memcpy(s->len[2] , s->len [1], 256*sizeof(uint8_t)); - for(i=0; i<3; i++) - init_vlc(&s->vlc[i], VLC_BITS, 256, s->len[i], 1, 1, s->bits[i], 4, 4); + for(i=0; i<3; i++){ + free_vlc(&s->vlc[i]); + init_vlc(&s->vlc[i], VLC_BITS, 256, s->len[i], 1, 1, s->bits[i], 4, 4, 0); + } return 0; #else @@ -332,22 +348,44 @@ static int read_old_huffman_tables(HYuvContext *s){ #endif } -static int decode_init(AVCodecContext *avctx) -{ +static void alloc_temp(HYuvContext *s){ + int i; + + if(s->bitstream_bpp<24){ + for(i=0; i<3; i++){ + s->temp[i]= av_malloc(s->width + 16); + } + }else{ + s->temp[0]= av_malloc(4*s->width + 16); + } +} + +static int common_init(AVCodecContext *avctx){ HYuvContext *s = avctx->priv_data; - int width, height; s->avctx= avctx; s->flags= avctx->flags; dsputil_init(&s->dsp, avctx); - width= s->width= avctx->width; - height= s->height= avctx->height; + s->width= avctx->width; + s->height= avctx->height; + assert(s->width>0 && s->height>0); + + return 0; +} + +static int decode_init(AVCodecContext *avctx) +{ + HYuvContext *s = avctx->priv_data; + + common_init(avctx); + memset(s->vlc, 0, 3*sizeof(VLC)); + avctx->coded_frame= &s->picture; + s->interlaced= s->height > 288; s->bgr32=1; - assert(width && height); //if(avctx->extradata) // printf("extradata:%X, extradata_size:%d\n", *(uint32_t*)avctx->extradata, avctx->extradata_size); if(avctx->extradata_size){ @@ -359,7 +397,7 @@ s->bgr32=1; s->version=0; if(s->version==2){ - int method; + int method, interlace; method= ((uint8_t*)avctx->extradata)[0]; s->decorrelate= method&64 ? 1 : 0; @@ -367,6 +405,9 @@ s->bgr32=1; s->bitstream_bpp= ((uint8_t*)avctx->extradata)[1]; if(s->bitstream_bpp==0) s->bitstream_bpp= avctx->bits_per_sample&~7; + interlace= (((uint8_t*)avctx->extradata)[2] & 0x30) >> 4; + s->interlaced= (interlace==1) ? 1 : (interlace==2) ? 0 : s->interlaced; + s->context= ((uint8_t*)avctx->extradata)[2] & 0x40 ? 1 : 0; if(read_huffman_tables(s, ((uint8_t*)avctx->extradata)+4, avctx->extradata_size) < 0) return -1; @@ -394,13 +435,12 @@ s->bgr32=1; break; } s->bitstream_bpp= avctx->bits_per_sample & ~7; + s->context= 0; if(read_old_huffman_tables(s) < 0) return -1; } - s->interlaced= height > 288; - switch(s->bitstream_bpp){ case 12: avctx->pix_fmt = PIX_FMT_YUV420P; @@ -424,14 +464,16 @@ s->bgr32=1; assert(0); } -// printf("pred:%d bpp:%d hbpp:%d il:%d\n", s->predictor, s->bitstream_bpp, avctx->bits_per_sample, s->interlaced); + alloc_temp(s); +// av_log(NULL, AV_LOG_DEBUG, "pred:%d bpp:%d hbpp:%d il:%d\n", s->predictor, s->bitstream_bpp, avctx->bits_per_sample, s->interlaced); + return 0; } -static void store_table(HYuvContext *s, uint8_t *len){ +static int store_table(HYuvContext *s, uint8_t *len, uint8_t *buf){ int i; - int index= s->avctx->extradata_size; + int index= 0; for(i=0; i<256;){ int val= len[i]; @@ -442,43 +484,31 @@ static void store_table(HYuvContext *s, uint8_t *len){ assert(val < 32 && val >0 && repeat<256 && repeat>0); if(repeat>7){ - ((uint8_t*)s->avctx->extradata)[index++]= val; - ((uint8_t*)s->avctx->extradata)[index++]= repeat; + buf[index++]= val; + buf[index++]= repeat; }else{ - ((uint8_t*)s->avctx->extradata)[index++]= val | (repeat<<5); + buf[index++]= val | (repeat<<5); } } - s->avctx->extradata_size= index; + return index; } static int encode_init(AVCodecContext *avctx) { HYuvContext *s = avctx->priv_data; - int i, j, width, height; + int i, j; - s->avctx= avctx; - s->flags= avctx->flags; - - dsputil_init(&s->dsp, avctx); - - width= s->width= avctx->width; - height= s->height= avctx->height; - - assert(width && height); + common_init(avctx); - avctx->extradata= av_mallocz(1024*30); - avctx->stats_out= av_mallocz(1024*30); + avctx->extradata= av_mallocz(1024*30); // 256*3+4 == 772 + avctx->stats_out= av_mallocz(1024*30); // 21*256*3(%llu ) + 3(\n) + 1(0) = 16132 s->version=2; avctx->coded_frame= &s->picture; switch(avctx->pix_fmt){ case PIX_FMT_YUV420P: - if(avctx->strict_std_compliance>=0){ - av_log(avctx, AV_LOG_ERROR, "YV12-huffyuv is experimental, there WILL be no compatbility! (use (v)strict=-1)\n"); - return -1; - } s->bitstream_bpp= 12; break; case PIX_FMT_YUV422P: @@ -491,10 +521,36 @@ static int encode_init(AVCodecContext *avctx) avctx->bits_per_sample= s->bitstream_bpp; s->decorrelate= s->bitstream_bpp >= 24; s->predictor= avctx->prediction_method; + s->interlaced= avctx->flags&CODEC_FLAG_INTERLACED_ME ? 1 : 0; + if(avctx->context_model==1){ + s->context= avctx->context_model; + if(s->flags & (CODEC_FLAG_PASS1|CODEC_FLAG_PASS2)){ + av_log(avctx, AV_LOG_ERROR, "context=1 is not compatible with 2 pass huffyuv encoding\n"); + return -1; + } + }else s->context= 0; + + if(avctx->codec->id==CODEC_ID_HUFFYUV){ + if(avctx->pix_fmt==PIX_FMT_YUV420P){ + av_log(avctx, AV_LOG_ERROR, "Error: YV12 is not supported by huffyuv; use vcodec=ffvhuff or format=422p\n"); + return -1; + } + if(avctx->context_model){ + av_log(avctx, AV_LOG_ERROR, "Error: per-frame huffman tables are not supported by huffyuv; use vcodec=ffvhuff\n"); + return -1; + } + if(s->interlaced != ( s->height > 288 )) + av_log(avctx, AV_LOG_INFO, "using huffyuv 2.2.0 or newer interlacing flag\n"); + }else if(avctx->strict_std_compliance>=0){ + av_log(avctx, AV_LOG_ERROR, "This codec is under development; files encoded with it may not be decodeable with future versions!!! Set vstrict=-1 / -strict -1 to use it anyway.\n"); + return -1; + } ((uint8_t*)avctx->extradata)[0]= s->predictor; ((uint8_t*)avctx->extradata)[1]= s->bitstream_bpp; - ((uint8_t*)avctx->extradata)[2]= + ((uint8_t*)avctx->extradata)[2]= s->interlaced ? 0x10 : 0x20; + if(s->context) + ((uint8_t*)avctx->extradata)[2]|= 0x40; ((uint8_t*)avctx->extradata)[3]= 0; s->avctx->extradata_size= 4; @@ -533,17 +589,28 @@ static int encode_init(AVCodecContext *avctx) return -1; } - store_table(s, s->len[i]); + s->avctx->extradata_size+= + store_table(s, s->len[i], &((uint8_t*)s->avctx->extradata)[s->avctx->extradata_size]); } - for(i=0; i<3; i++) - for(j=0; j<256; j++) - s->stats[i][j]= 0; + if(s->context){ + for(i=0; i<3; i++){ + int pels = s->width*s->height / (i?40:10); + for(j=0; j<256; j++){ + int d= FFMIN(j, 256-j); + s->stats[i][j]= pels/(d+1); + } + } + }else{ + for(i=0; i<3; i++) + for(j=0; j<256; j++) + s->stats[i][j]= 0; + } - s->interlaced= height > 288; - // printf("pred:%d bpp:%d hbpp:%d il:%d\n", s->predictor, s->bitstream_bpp, avctx->bits_per_sample, s->interlaced); + alloc_temp(s); + s->picture_number=0; return 0; @@ -573,9 +640,14 @@ static void decode_gray_bitstream(HYuvContext *s, int count){ } } -static void encode_422_bitstream(HYuvContext *s, int count){ +static int encode_422_bitstream(HYuvContext *s, int count){ int i; + if(s->pb.buf_end - s->pb.buf - (put_bits_count(&s->pb)>>3) < 2*4*count){ + av_log(s->avctx, AV_LOG_ERROR, "encoded frame too large\n"); + return -1; + } + count/=2; if(s->flags&CODEC_FLAG_PASS1){ for(i=0; i<count; i++){ @@ -584,6 +656,20 @@ static void encode_422_bitstream(HYuvContext *s, int count){ s->stats[0][ s->temp[0][2*i+1] ]++; s->stats[2][ s->temp[2][ i ] ]++; } + } + if(s->avctx->flags2&CODEC_FLAG2_NO_OUTPUT) + return 0; + if(s->context){ + for(i=0; i<count; i++){ + s->stats[0][ s->temp[0][2*i ] ]++; + put_bits(&s->pb, s->len[0][ s->temp[0][2*i ] ], s->bits[0][ s->temp[0][2*i ] ]); + s->stats[1][ s->temp[1][ i ] ]++; + put_bits(&s->pb, s->len[1][ s->temp[1][ i ] ], s->bits[1][ s->temp[1][ i ] ]); + s->stats[0][ s->temp[0][2*i+1] ]++; + put_bits(&s->pb, s->len[0][ s->temp[0][2*i+1] ], s->bits[0][ s->temp[0][2*i+1] ]); + s->stats[2][ s->temp[2][ i ] ]++; + put_bits(&s->pb, s->len[2][ s->temp[2][ i ] ], s->bits[2][ s->temp[2][ i ] ]); + } }else{ for(i=0; i<count; i++){ put_bits(&s->pb, s->len[0][ s->temp[0][2*i ] ], s->bits[0][ s->temp[0][2*i ] ]); @@ -592,23 +678,41 @@ static void encode_422_bitstream(HYuvContext *s, int count){ put_bits(&s->pb, s->len[2][ s->temp[2][ i ] ], s->bits[2][ s->temp[2][ i ] ]); } } + return 0; } -static void encode_gray_bitstream(HYuvContext *s, int count){ +static int encode_gray_bitstream(HYuvContext *s, int count){ int i; + if(s->pb.buf_end - s->pb.buf - (put_bits_count(&s->pb)>>3) < 4*count){ + av_log(s->avctx, AV_LOG_ERROR, "encoded frame too large\n"); + return -1; + } + count/=2; if(s->flags&CODEC_FLAG_PASS1){ for(i=0; i<count; i++){ s->stats[0][ s->temp[0][2*i ] ]++; s->stats[0][ s->temp[0][2*i+1] ]++; } + } + if(s->avctx->flags2&CODEC_FLAG2_NO_OUTPUT) + return 0; + + if(s->context){ + for(i=0; i<count; i++){ + s->stats[0][ s->temp[0][2*i ] ]++; + put_bits(&s->pb, s->len[0][ s->temp[0][2*i ] ], s->bits[0][ s->temp[0][2*i ] ]); + s->stats[0][ s->temp[0][2*i+1] ]++; + put_bits(&s->pb, s->len[0][ s->temp[0][2*i+1] ], s->bits[0][ s->temp[0][2*i+1] ]); + } }else{ for(i=0; i<count; i++){ put_bits(&s->pb, s->len[0][ s->temp[0][2*i ] ], s->bits[0][ s->temp[0][2*i ] ]); put_bits(&s->pb, s->len[0][ s->temp[0][2*i+1] ], s->bits[0][ s->temp[0][2*i+1] ]); } } + return 0; } static void decode_bgr_bitstream(HYuvContext *s, int count){ @@ -617,30 +721,30 @@ static void decode_bgr_bitstream(HYuvContext *s, int count){ if(s->decorrelate){ if(s->bitstream_bpp==24){ for(i=0; i<count; i++){ - s->temp[0][4*i+1]= get_vlc2(&s->gb, s->vlc[1].table, VLC_BITS, 3); - s->temp[0][4*i ]= get_vlc2(&s->gb, s->vlc[0].table, VLC_BITS, 3) + s->temp[0][4*i+1]; - s->temp[0][4*i+2]= get_vlc2(&s->gb, s->vlc[2].table, VLC_BITS, 3) + s->temp[0][4*i+1]; + s->temp[0][4*i+G]= get_vlc2(&s->gb, s->vlc[1].table, VLC_BITS, 3); + s->temp[0][4*i+B]= get_vlc2(&s->gb, s->vlc[0].table, VLC_BITS, 3) + s->temp[0][4*i+G]; + s->temp[0][4*i+R]= get_vlc2(&s->gb, s->vlc[2].table, VLC_BITS, 3) + s->temp[0][4*i+G]; } }else{ for(i=0; i<count; i++){ - s->temp[0][4*i+1]= get_vlc2(&s->gb, s->vlc[1].table, VLC_BITS, 3); - s->temp[0][4*i ]= get_vlc2(&s->gb, s->vlc[0].table, VLC_BITS, 3) + s->temp[0][4*i+1]; - s->temp[0][4*i+2]= get_vlc2(&s->gb, s->vlc[2].table, VLC_BITS, 3) + s->temp[0][4*i+1]; + s->temp[0][4*i+G]= get_vlc2(&s->gb, s->vlc[1].table, VLC_BITS, 3); + s->temp[0][4*i+B]= get_vlc2(&s->gb, s->vlc[0].table, VLC_BITS, 3) + s->temp[0][4*i+G]; + s->temp[0][4*i+R]= get_vlc2(&s->gb, s->vlc[2].table, VLC_BITS, 3) + s->temp[0][4*i+G]; get_vlc2(&s->gb, s->vlc[2].table, VLC_BITS, 3); //?! } } }else{ if(s->bitstream_bpp==24){ for(i=0; i<count; i++){ - s->temp[0][4*i ]= get_vlc2(&s->gb, s->vlc[0].table, VLC_BITS, 3); - s->temp[0][4*i+1]= get_vlc2(&s->gb, s->vlc[1].table, VLC_BITS, 3); - s->temp[0][4*i+2]= get_vlc2(&s->gb, s->vlc[2].table, VLC_BITS, 3); + s->temp[0][4*i+B]= get_vlc2(&s->gb, s->vlc[0].table, VLC_BITS, 3); + s->temp[0][4*i+G]= get_vlc2(&s->gb, s->vlc[1].table, VLC_BITS, 3); + s->temp[0][4*i+R]= get_vlc2(&s->gb, s->vlc[2].table, VLC_BITS, 3); } }else{ for(i=0; i<count; i++){ - s->temp[0][4*i ]= get_vlc2(&s->gb, s->vlc[0].table, VLC_BITS, 3); - s->temp[0][4*i+1]= get_vlc2(&s->gb, s->vlc[1].table, VLC_BITS, 3); - s->temp[0][4*i+2]= get_vlc2(&s->gb, s->vlc[2].table, VLC_BITS, 3); + s->temp[0][4*i+B]= get_vlc2(&s->gb, s->vlc[0].table, VLC_BITS, 3); + s->temp[0][4*i+G]= get_vlc2(&s->gb, s->vlc[1].table, VLC_BITS, 3); + s->temp[0][4*i+R]= get_vlc2(&s->gb, s->vlc[2].table, VLC_BITS, 3); get_vlc2(&s->gb, s->vlc[2].table, VLC_BITS, 3); //?! } } @@ -681,17 +785,14 @@ static int decode_frame(AVCodecContext *avctx, void *data, int *data_size, uint8 const int height= s->height; int fake_ystride, fake_ustride, fake_vstride; AVFrame * const p= &s->picture; + int table_size= 0; AVFrame *picture = data; - /* no supplementary picture */ - if (buf_size == 0) - return 0; + s->bitstream_buffer= av_fast_realloc(s->bitstream_buffer, &s->bitstream_buffer_size, buf_size + FF_INPUT_BUFFER_PADDING_SIZE); s->dsp.bswap_buf((uint32_t*)s->bitstream_buffer, (uint32_t*)buf, buf_size/4); - init_get_bits(&s->gb, s->bitstream_buffer, buf_size*8); - if(p->data[0]) avctx->release_buffer(avctx, p); @@ -700,6 +801,14 @@ static int decode_frame(AVCodecContext *avctx, void *data, int *data_size, uint8 av_log(avctx, AV_LOG_ERROR, "get_buffer() failed\n"); return -1; } + + if(s->context){ + table_size = read_huffman_tables(s, s->bitstream_buffer, buf_size); + if(table_size < 0) + return -1; + } + + init_get_bits(&s->gb, s->bitstream_buffer+table_size, (buf_size-table_size)*8); fake_ystride= s->interlaced ? p->linesize[0]*2 : p->linesize[0]; fake_ustride= s->interlaced ? p->linesize[1]*2 : p->linesize[1]; @@ -858,14 +967,14 @@ static int decode_frame(AVCodecContext *avctx, void *data, int *data_size, uint8 const int last_line= (height-1)*p->linesize[0]; if(s->bitstream_bpp==32){ - p->data[0][last_line+3]= get_bits(&s->gb, 8); - leftr= p->data[0][last_line+2]= get_bits(&s->gb, 8); - leftg= p->data[0][last_line+1]= get_bits(&s->gb, 8); - leftb= p->data[0][last_line+0]= get_bits(&s->gb, 8); + skip_bits(&s->gb, 8); + leftr= p->data[0][last_line+R]= get_bits(&s->gb, 8); + leftg= p->data[0][last_line+G]= get_bits(&s->gb, 8); + leftb= p->data[0][last_line+B]= get_bits(&s->gb, 8); }else{ - leftr= p->data[0][last_line+2]= get_bits(&s->gb, 8); - leftg= p->data[0][last_line+1]= get_bits(&s->gb, 8); - leftb= p->data[0][last_line+0]= get_bits(&s->gb, 8); + leftr= p->data[0][last_line+R]= get_bits(&s->gb, 8); + leftg= p->data[0][last_line+G]= get_bits(&s->gb, 8); + leftb= p->data[0][last_line+B]= get_bits(&s->gb, 8); skip_bits(&s->gb, 8); } @@ -881,7 +990,7 @@ static int decode_frame(AVCodecContext *avctx, void *data, int *data_size, uint8 add_left_prediction_bgr32(p->data[0] + p->linesize[0]*y, s->temp[0], width, &leftr, &leftg, &leftb); if(s->predictor == PLANE){ - if((y&s->interlaced)==0){ + if((y&s->interlaced)==0 && y<s->height-1-s->interlaced){ s->dsp.add_bytes(p->data[0] + p->linesize[0]*y, p->data[0] + p->linesize[0]*y + fake_ystride, fake_ystride); } @@ -906,11 +1015,23 @@ static int decode_frame(AVCodecContext *avctx, void *data, int *data_size, uint8 return (get_bits_count(&s->gb)+31)/32*4; } +static int common_end(HYuvContext *s){ + int i; + + for(i=0; i<3; i++){ + av_freep(&s->temp[i]); + } + return 0; +} + static int decode_end(AVCodecContext *avctx) { HYuvContext *s = avctx->priv_data; int i; + common_end(s); + av_freep(&s->bitstream_buffer); + for(i=0; i<3; i++){ free_vlc(&s->vlc[i]); } @@ -928,14 +1049,27 @@ static int encode_frame(AVCodecContext *avctx, unsigned char *buf, int buf_size, const int fake_ustride= s->interlaced ? pict->linesize[1]*2 : pict->linesize[1]; const int fake_vstride= s->interlaced ? pict->linesize[2]*2 : pict->linesize[2]; AVFrame * const p= &s->picture; - int i, size; + int i, j, size=0; - init_put_bits(&s->pb, buf, buf_size); - *p = *pict; p->pict_type= FF_I_TYPE; p->key_frame= 1; + if(s->context){ + for(i=0; i<3; i++){ + generate_len_table(s->len[i], s->stats[i], 256); + if(generate_bits_table(s->bits[i], s->len[i])<0) + return -1; + size+= store_table(s, s->len[i], &buf[size]); + } + + for(i=0; i<3; i++) + for(j=0; j<256; j++) + s->stats[i][j] >>= 1; + } + + init_put_bits(&s->pb, buf+size, buf_size-size); + if(avctx->pix_fmt == PIX_FMT_YUV422P || avctx->pix_fmt == PIX_FMT_YUV420P){ int lefty, leftu, leftv, y, cy; @@ -963,8 +1097,8 @@ static int encode_frame(AVCodecContext *avctx, unsigned char *buf, int buf_size, } lefty= sub_left_prediction(s, s->temp[0], p->data[0]+fake_ystride, 4, lefty); - leftu= sub_left_prediction(s, s->temp[1], p->data[1]+fake_ystride, 2, leftu); - leftv= sub_left_prediction(s, s->temp[2], p->data[2]+fake_ystride, 2, leftv); + leftu= sub_left_prediction(s, s->temp[1], p->data[1]+fake_ustride, 2, leftu); + leftv= sub_left_prediction(s, s->temp[2], p->data[2]+fake_vstride, 2, leftv); encode_422_bitstream(s, 4); @@ -1026,11 +1160,11 @@ static int encode_frame(AVCodecContext *avctx, unsigned char *buf, int buf_size, if(s->predictor == PLANE && s->interlaced < cy){ s->dsp.diff_bytes(s->temp[1], ydst, ydst - fake_ystride, width); s->dsp.diff_bytes(s->temp[2], udst, udst - fake_ustride, width2); - s->dsp.diff_bytes(s->temp[2] + 1250, vdst, vdst - fake_vstride, width2); + s->dsp.diff_bytes(s->temp[2] + width2, vdst, vdst - fake_vstride, width2); lefty= sub_left_prediction(s, s->temp[0], s->temp[1], width , lefty); leftu= sub_left_prediction(s, s->temp[1], s->temp[2], width2, leftu); - leftv= sub_left_prediction(s, s->temp[2], s->temp[2] + 1250, width2, leftv); + leftv= sub_left_prediction(s, s->temp[2], s->temp[2] + width2, width2, leftv); }else{ lefty= sub_left_prediction(s, s->temp[0], ydst, width , lefty); leftu= sub_left_prediction(s, s->temp[1], udst, width2, leftu); @@ -1045,23 +1179,27 @@ static int encode_frame(AVCodecContext *avctx, unsigned char *buf, int buf_size, } emms_c(); - size= (put_bits_count(&s->pb)+31)/32; + size+= (put_bits_count(&s->pb)+31)/8; + size/= 4; if((s->flags&CODEC_FLAG_PASS1) && (s->picture_number&31)==0){ int j; char *p= avctx->stats_out; + char *end= p + 1024*30; for(i=0; i<3; i++){ for(j=0; j<256; j++){ - sprintf(p, "%llu ", s->stats[i][j]); + snprintf(p, end-p, "%llu ", s->stats[i][j]); p+= strlen(p); s->stats[i][j]= 0; } - sprintf(p, "\n"); + snprintf(p, end-p, "\n"); p++; } - }else{ + } + if(!(s->avctx->flags2 & CODEC_FLAG2_NO_OUTPUT)){ flush_put_bits(&s->pb); s->dsp.bswap_buf((uint32_t*)buf, (uint32_t*)buf, size); + avctx->stats_out[0] = '\0'; } s->picture_number++; @@ -1071,7 +1209,9 @@ static int encode_frame(AVCodecContext *avctx, unsigned char *buf, int buf_size, static int encode_end(AVCodecContext *avctx) { -// HYuvContext *s = avctx->priv_data; + HYuvContext *s = avctx->priv_data; + + common_end(s); av_freep(&avctx->extradata); av_freep(&avctx->stats_out); @@ -1079,12 +1219,6 @@ static int encode_end(AVCodecContext *avctx) return 0; } -static const AVOption huffyuv_options[] = -{ - AVOPTION_CODEC_INT("prediction_method", "prediction_method", prediction_method, 0, 2, 0), - AVOPTION_END() -}; - AVCodec huffyuv_decoder = { "huffyuv", CODEC_TYPE_VIDEO, @@ -1098,6 +1232,19 @@ AVCodec huffyuv_decoder = { NULL }; +AVCodec ffvhuff_decoder = { + "ffvhuff", + CODEC_TYPE_VIDEO, + CODEC_ID_FFVHUFF, + sizeof(HYuvContext), + decode_init, + NULL, + decode_end, + decode_frame, + CODEC_CAP_DR1 | CODEC_CAP_DRAW_HORIZ_BAND, + NULL +}; + #ifdef CONFIG_ENCODERS AVCodec huffyuv_encoder = { @@ -1108,7 +1255,16 @@ AVCodec huffyuv_encoder = { encode_init, encode_frame, encode_end, - .options = huffyuv_options, +}; + +AVCodec ffvhuff_encoder = { + "ffvhuff", + CODEC_TYPE_VIDEO, + CODEC_ID_FFVHUFF, + sizeof(HYuvContext), + encode_init, + encode_frame, + encode_end, }; #endif //CONFIG_ENCODERS diff --git a/src/libffmpeg/libavcodec/i386/cputest.c b/src/libffmpeg/libavcodec/i386/cputest.c index b50d653c4..593e0550d 100644 --- a/src/libffmpeg/libavcodec/i386/cputest.c +++ b/src/libffmpeg/libavcodec/i386/cputest.c @@ -4,12 +4,20 @@ #include <stdlib.h> #include "../dsputil.h" +#ifdef ARCH_X86_64 +# define REG_b "rbx" +# define REG_S "rsi" +#else +# define REG_b "ebx" +# define REG_S "esi" +#endif + /* ebx saving is necessary for PIC. gcc seems unable to see it alone */ #define cpuid(index,eax,ebx,ecx,edx)\ __asm __volatile\ - ("movl %%ebx, %%esi\n\t"\ + ("mov %%"REG_b", %%"REG_S"\n\t"\ "cpuid\n\t"\ - "xchgl %%ebx, %%esi"\ + "xchg %%"REG_b", %%"REG_S\ : "=a" (eax), "=S" (ebx),\ "=c" (ecx), "=d" (edx)\ : "0" (index)); @@ -17,82 +25,72 @@ /* Function to test if multimedia instructions are supported... */ int mm_support(void) { - int rval; + int rval = 0; int eax, ebx, ecx, edx; + int max_std_level, max_ext_level, std_caps=0, ext_caps=0; + long a, c; __asm__ __volatile__ ( /* See if CPUID instruction is supported ... */ /* ... Get copies of EFLAGS into eax and ecx */ "pushf\n\t" - "popl %0\n\t" - "movl %0, %1\n\t" + "pop %0\n\t" + "mov %0, %1\n\t" /* ... Toggle the ID bit in one copy and store */ /* to the EFLAGS reg */ - "xorl $0x200000, %0\n\t" + "xor $0x200000, %0\n\t" "push %0\n\t" "popf\n\t" /* ... Get the (hopefully modified) EFLAGS */ "pushf\n\t" - "popl %0\n\t" - : "=a" (eax), "=c" (ecx) + "pop %0\n\t" + : "=a" (a), "=c" (c) : : "cc" ); - if (eax == ecx) + if (a == c) return 0; /* CPUID not supported */ - - cpuid(0, eax, ebx, ecx, edx); - if (ebx == 0x756e6547 && - edx == 0x49656e69 && - ecx == 0x6c65746e) { - - /* intel */ - inteltest: - cpuid(1, eax, ebx, ecx, edx); - if ((edx & 0x00800000) == 0) - return 0; - rval = MM_MMX; - if (edx & 0x02000000) + cpuid(0, max_std_level, ebx, ecx, edx); + + if(max_std_level >= 1){ + cpuid(1, eax, ebx, ecx, std_caps); + if (std_caps & (1<<23)) + rval |= MM_MMX; + if (std_caps & (1<<25)) rval |= MM_MMXEXT | MM_SSE; - if (edx & 0x04000000) + if (std_caps & (1<<26)) rval |= MM_SSE2; - return rval; - } else if (ebx == 0x68747541 && + } + + cpuid(0x80000000, max_ext_level, ebx, ecx, edx); + + if(max_ext_level >= 0x80000001){ + cpuid(0x80000001, eax, ebx, ecx, ext_caps); + if (ext_caps & (1<<31)) + rval |= MM_3DNOW; + if (ext_caps & (1<<30)) + rval |= MM_3DNOWEXT; + if (ext_caps & (1<<23)) + rval |= MM_MMX; + } + + cpuid(0, eax, ebx, ecx, edx); + if ( ebx == 0x68747541 && edx == 0x69746e65 && ecx == 0x444d4163) { /* AMD */ - cpuid(0x80000000, eax, ebx, ecx, edx); - if ((unsigned)eax < 0x80000001) - goto inteltest; - cpuid(0x80000001, eax, ebx, ecx, edx); - if ((edx & 0x00800000) == 0) - return 0; - rval = MM_MMX; - if (edx & 0x80000000) - rval |= MM_3DNOW; - if (edx & 0x00400000) + if(ext_caps & (1<<22)) rval |= MM_MMXEXT; - return rval; } else if (ebx == 0x746e6543 && edx == 0x48727561 && ecx == 0x736c7561) { /* "CentaurHauls" */ /* VIA C3 */ - cpuid(0x80000000, eax, ebx, ecx, edx); - if ((unsigned)eax < 0x80000001) - goto inteltest; - cpuid(0x80000001, eax, ebx, ecx, edx); - rval = 0; - if( edx & ( 1 << 31) ) - rval |= MM_3DNOW; - if( edx & ( 1 << 23) ) - rval |= MM_MMX; - if( edx & ( 1 << 24) ) + if(ext_caps & (1<<24)) rval |= MM_MMXEXT; - return rval; } else if (ebx == 0x69727943 && edx == 0x736e4978 && ecx == 0x64616574) { @@ -105,18 +103,21 @@ int mm_support(void) According to the table, the only CPU which supports level 2 is also the only one which supports extended CPUID levels. */ - if (eax != 2) - goto inteltest; - cpuid(0x80000001, eax, ebx, ecx, edx); - if ((eax & 0x00800000) == 0) - return 0; - rval = MM_MMX; - if (eax & 0x01000000) + if (eax < 2) + return rval; + if (ext_caps & (1<<24)) rval |= MM_MMXEXT; - return rval; - } else { - return 0; } +#if 0 + av_log(NULL, AV_LOG_DEBUG, "%s%s%s%s%s%s\n", + (rval&MM_MMX) ? "MMX ":"", + (rval&MM_MMXEXT) ? "MMX2 ":"", + (rval&MM_SSE) ? "SSE ":"", + (rval&MM_SSE2) ? "SSE2 ":"", + (rval&MM_3DNOW) ? "3DNow ":"", + (rval&MM_3DNOWEXT) ? "3DNowExt ":""); +#endif + return rval; } #ifdef __TEST__ @@ -124,7 +125,7 @@ int main ( void ) { int mm_flags; mm_flags = mm_support(); - printf("mm_support = 0x%08u\n",mm_flags); + printf("mm_support = 0x%08X\n",mm_flags); return 0; } #endif diff --git a/src/libffmpeg/libavcodec/i386/dsputil_mmx.c b/src/libffmpeg/libavcodec/i386/dsputil_mmx.c index 11504e225..550122673 100644 --- a/src/libffmpeg/libavcodec/i386/dsputil_mmx.c +++ b/src/libffmpeg/libavcodec/i386/dsputil_mmx.c @@ -22,6 +22,7 @@ #include "../dsputil.h" #include "../simple_idct.h" +#include "../mpegvideo.h" #include "mmx.h" //#undef NDEBUG @@ -38,7 +39,9 @@ static const uint64_t mm_wtwo attribute_used __attribute__ ((aligned(8))) = 0x00 static const uint64_t ff_pw_20 attribute_used __attribute__ ((aligned(8))) = 0x0014001400140014ULL; static const uint64_t ff_pw_3 attribute_used __attribute__ ((aligned(8))) = 0x0003000300030003ULL; +static const uint64_t ff_pw_5 attribute_used __attribute__ ((aligned(8))) = 0x0005000500050005ULL; static const uint64_t ff_pw_16 attribute_used __attribute__ ((aligned(8))) = 0x0010001000100010ULL; +static const uint64_t ff_pw_32 attribute_used __attribute__ ((aligned(8))) = 0x0020002000200020ULL; static const uint64_t ff_pw_15 attribute_used __attribute__ ((aligned(8))) = 0x000F000F000F000FULL; static const uint64_t ff_pb_FC attribute_used __attribute__ ((aligned(8))) = 0xFCFCFCFCFCFCFCFCULL; @@ -184,7 +187,7 @@ static const uint64_t ff_pb_FC attribute_used __attribute__ ((aligned(8))) = 0xF static void get_pixels_mmx(DCTELEM *block, const uint8_t *pixels, int line_size) { asm volatile( - "movl $-128, %%eax \n\t" + "mov $-128, %%"REG_a" \n\t" "pxor %%mm7, %%mm7 \n\t" ".balign 16 \n\t" "1: \n\t" @@ -196,16 +199,16 @@ static void get_pixels_mmx(DCTELEM *block, const uint8_t *pixels, int line_size) "punpckhbw %%mm7, %%mm1 \n\t" "punpcklbw %%mm7, %%mm2 \n\t" "punpckhbw %%mm7, %%mm3 \n\t" - "movq %%mm0, (%1, %%eax)\n\t" - "movq %%mm1, 8(%1, %%eax)\n\t" - "movq %%mm2, 16(%1, %%eax)\n\t" - "movq %%mm3, 24(%1, %%eax)\n\t" - "addl %3, %0 \n\t" - "addl $32, %%eax \n\t" + "movq %%mm0, (%1, %%"REG_a")\n\t" + "movq %%mm1, 8(%1, %%"REG_a")\n\t" + "movq %%mm2, 16(%1, %%"REG_a")\n\t" + "movq %%mm3, 24(%1, %%"REG_a")\n\t" + "add %3, %0 \n\t" + "add $32, %%"REG_a" \n\t" "js 1b \n\t" : "+r" (pixels) - : "r" (block+64), "r" (line_size), "r" (line_size*2) - : "%eax" + : "r" (block+64), "r" ((long)line_size), "r" ((long)line_size*2) + : "%"REG_a ); } @@ -213,7 +216,7 @@ static inline void diff_pixels_mmx(DCTELEM *block, const uint8_t *s1, const uint { asm volatile( "pxor %%mm7, %%mm7 \n\t" - "movl $-128, %%eax \n\t" + "mov $-128, %%"REG_a" \n\t" ".balign 16 \n\t" "1: \n\t" "movq (%0), %%mm0 \n\t" @@ -226,15 +229,15 @@ static inline void diff_pixels_mmx(DCTELEM *block, const uint8_t *s1, const uint "punpckhbw %%mm7, %%mm3 \n\t" "psubw %%mm2, %%mm0 \n\t" "psubw %%mm3, %%mm1 \n\t" - "movq %%mm0, (%2, %%eax)\n\t" - "movq %%mm1, 8(%2, %%eax)\n\t" - "addl %3, %0 \n\t" - "addl %3, %1 \n\t" - "addl $16, %%eax \n\t" + "movq %%mm0, (%2, %%"REG_a")\n\t" + "movq %%mm1, 8(%2, %%"REG_a")\n\t" + "add %3, %0 \n\t" + "add %3, %1 \n\t" + "add $16, %%"REG_a" \n\t" "jnz 1b \n\t" : "+r" (s1), "+r" (s2) - : "r" (block+64), "r" (stride) - : "%eax" + : "r" (block+64), "r" ((long)stride) + : "%"REG_a ); } #endif //CONFIG_ENCODERS @@ -265,7 +268,7 @@ void put_pixels_clamped_mmx(const DCTELEM *block, uint8_t *pixels, int line_size "movq %%mm2, (%0, %1)\n\t" "movq %%mm4, (%0, %1, 2)\n\t" "movq %%mm6, (%0, %2)\n\t" - ::"r" (pix), "r" (line_size), "r" (line_size*3), "m"(*p) + ::"r" (pix), "r" ((long)line_size), "r" ((long)line_size*3), "m"(*p) :"memory"); pix += line_size*4; p += 32; @@ -290,7 +293,7 @@ void put_pixels_clamped_mmx(const DCTELEM *block, uint8_t *pixels, int line_size "movq %%mm2, (%0, %1)\n\t" "movq %%mm4, (%0, %1, 2)\n\t" "movq %%mm6, (%0, %2)\n\t" - ::"r" (pix), "r" (line_size), "r" (line_size*3), "r"(p) + ::"r" (pix), "r" ((long)line_size), "r" ((long)line_size*3), "r"(p) :"memory"); } @@ -353,36 +356,62 @@ void add_pixels_clamped_mmx(const DCTELEM *block, uint8_t *pixels, int line_size } while (--i); } +static void put_pixels4_mmx(uint8_t *block, const uint8_t *pixels, int line_size, int h) +{ + __asm __volatile( + "lea (%3, %3), %%"REG_a" \n\t" + ".balign 8 \n\t" + "1: \n\t" + "movd (%1), %%mm0 \n\t" + "movd (%1, %3), %%mm1 \n\t" + "movd %%mm0, (%2) \n\t" + "movd %%mm1, (%2, %3) \n\t" + "add %%"REG_a", %1 \n\t" + "add %%"REG_a", %2 \n\t" + "movd (%1), %%mm0 \n\t" + "movd (%1, %3), %%mm1 \n\t" + "movd %%mm0, (%2) \n\t" + "movd %%mm1, (%2, %3) \n\t" + "add %%"REG_a", %1 \n\t" + "add %%"REG_a", %2 \n\t" + "subl $4, %0 \n\t" + "jnz 1b \n\t" + : "+g"(h), "+r" (pixels), "+r" (block) + : "r"((long)line_size) + : "%"REG_a, "memory" + ); +} + static void put_pixels8_mmx(uint8_t *block, const uint8_t *pixels, int line_size, int h) { __asm __volatile( - "lea (%3, %3), %%eax \n\t" + "lea (%3, %3), %%"REG_a" \n\t" ".balign 8 \n\t" "1: \n\t" "movq (%1), %%mm0 \n\t" "movq (%1, %3), %%mm1 \n\t" "movq %%mm0, (%2) \n\t" "movq %%mm1, (%2, %3) \n\t" - "addl %%eax, %1 \n\t" - "addl %%eax, %2 \n\t" + "add %%"REG_a", %1 \n\t" + "add %%"REG_a", %2 \n\t" "movq (%1), %%mm0 \n\t" "movq (%1, %3), %%mm1 \n\t" "movq %%mm0, (%2) \n\t" "movq %%mm1, (%2, %3) \n\t" - "addl %%eax, %1 \n\t" - "addl %%eax, %2 \n\t" + "add %%"REG_a", %1 \n\t" + "add %%"REG_a", %2 \n\t" "subl $4, %0 \n\t" "jnz 1b \n\t" : "+g"(h), "+r" (pixels), "+r" (block) - : "r"(line_size) - : "%eax", "memory" + : "r"((long)line_size) + : "%"REG_a, "memory" ); } static void put_pixels16_mmx(uint8_t *block, const uint8_t *pixels, int line_size, int h) { __asm __volatile( - "lea (%3, %3), %%eax \n\t" + "lea (%3, %3), %%"REG_a" \n\t" ".balign 8 \n\t" "1: \n\t" "movq (%1), %%mm0 \n\t" @@ -393,8 +422,8 @@ static void put_pixels16_mmx(uint8_t *block, const uint8_t *pixels, int line_siz "movq %%mm4, 8(%2) \n\t" "movq %%mm1, (%2, %3) \n\t" "movq %%mm5, 8(%2, %3) \n\t" - "addl %%eax, %1 \n\t" - "addl %%eax, %2 \n\t" + "add %%"REG_a", %1 \n\t" + "add %%"REG_a", %2 \n\t" "movq (%1), %%mm0 \n\t" "movq 8(%1), %%mm4 \n\t" "movq (%1, %3), %%mm1 \n\t" @@ -403,13 +432,13 @@ static void put_pixels16_mmx(uint8_t *block, const uint8_t *pixels, int line_siz "movq %%mm4, 8(%2) \n\t" "movq %%mm1, (%2, %3) \n\t" "movq %%mm5, 8(%2, %3) \n\t" - "addl %%eax, %1 \n\t" - "addl %%eax, %2 \n\t" + "add %%"REG_a", %1 \n\t" + "add %%"REG_a", %2 \n\t" "subl $4, %0 \n\t" "jnz 1b \n\t" : "+g"(h), "+r" (pixels), "+r" (block) - : "r"(line_size) - : "%eax", "memory" + : "r"((long)line_size) + : "%"REG_a, "memory" ); } @@ -417,16 +446,16 @@ static void clear_blocks_mmx(DCTELEM *blocks) { __asm __volatile( "pxor %%mm7, %%mm7 \n\t" - "movl $-128*6, %%eax \n\t" + "mov $-128*6, %%"REG_a" \n\t" "1: \n\t" - "movq %%mm7, (%0, %%eax) \n\t" - "movq %%mm7, 8(%0, %%eax) \n\t" - "movq %%mm7, 16(%0, %%eax) \n\t" - "movq %%mm7, 24(%0, %%eax) \n\t" - "addl $32, %%eax \n\t" + "movq %%mm7, (%0, %%"REG_a") \n\t" + "movq %%mm7, 8(%0, %%"REG_a") \n\t" + "movq %%mm7, 16(%0, %%"REG_a") \n\t" + "movq %%mm7, 24(%0, %%"REG_a") \n\t" + "add $32, %%"REG_a" \n\t" " js 1b \n\t" - : : "r" (((int)blocks)+128*6) - : "%eax" + : : "r" (((uint8_t *)blocks)+128*6) + : "%"REG_a ); } @@ -434,7 +463,7 @@ static void clear_blocks_mmx(DCTELEM *blocks) static int pix_sum16_mmx(uint8_t * pix, int line_size){ const int h=16; int sum; - int index= -line_size*h; + long index= -line_size*h; __asm __volatile( "pxor %%mm7, %%mm7 \n\t" @@ -452,7 +481,7 @@ static int pix_sum16_mmx(uint8_t * pix, int line_size){ "paddw %%mm2, %%mm3 \n\t" "paddw %%mm1, %%mm3 \n\t" "paddw %%mm3, %%mm6 \n\t" - "addl %3, %1 \n\t" + "add %3, %1 \n\t" " js 1b \n\t" "movq %%mm6, %%mm5 \n\t" "psrlq $32, %%mm6 \n\t" @@ -463,7 +492,7 @@ static int pix_sum16_mmx(uint8_t * pix, int line_size){ "movd %%mm6, %0 \n\t" "andl $0xFFFF, %0 \n\t" : "=&r" (sum), "+r" (index) - : "r" (pix - index), "r" (line_size) + : "r" (pix - index), "r" ((long)line_size) ); return sum; @@ -471,7 +500,7 @@ static int pix_sum16_mmx(uint8_t * pix, int line_size){ #endif //CONFIG_ENCODERS static void add_bytes_mmx(uint8_t *dst, uint8_t *src, int w){ - int i=0; + long i=0; asm volatile( "1: \n\t" "movq (%1, %0), %%mm0 \n\t" @@ -482,11 +511,11 @@ static void add_bytes_mmx(uint8_t *dst, uint8_t *src, int w){ "movq 8(%2, %0), %%mm1 \n\t" "paddb %%mm0, %%mm1 \n\t" "movq %%mm1, 8(%2, %0) \n\t" - "addl $16, %0 \n\t" - "cmpl %3, %0 \n\t" + "add $16, %0 \n\t" + "cmp %3, %0 \n\t" " jb 1b \n\t" : "+r" (i) - : "r"(src), "r"(dst), "r"(w-15) + : "r"(src), "r"(dst), "r"((long)w-15) ); for(; i<w; i++) dst[i+0] += src[i+0]; @@ -643,26 +672,22 @@ static void h263_h_loop_filter_mmx(uint8_t *src, int stride, int qscale){ "punpcklwd %%mm0, %%mm1 \n\t" "punpckhwd %%mm4, %%mm3 \n\t" "punpckhwd %%mm0, %%mm6 \n\t" - "movd %%mm5, %0 \n\t" + "movd %%mm5, (%0) \n\t" "punpckhdq %%mm5, %%mm5 \n\t" - "movd %%mm5, %1 \n\t" - "movd %%mm3, %2 \n\t" + "movd %%mm5, (%0,%2) \n\t" + "movd %%mm3, (%0,%2,2) \n\t" "punpckhdq %%mm3, %%mm3 \n\t" - "movd %%mm3, %3 \n\t" - "movd %%mm1, %4 \n\t" + "movd %%mm3, (%0,%3) \n\t" + "movd %%mm1, (%1) \n\t" "punpckhdq %%mm1, %%mm1 \n\t" - "movd %%mm1, %5 \n\t" - "movd %%mm6, %6 \n\t" + "movd %%mm1, (%1,%2) \n\t" + "movd %%mm6, (%1,%2,2) \n\t" "punpckhdq %%mm6, %%mm6 \n\t" - "movd %%mm6, %7 \n\t" - : "=m" (*(uint32_t*)(src + 0*stride)), - "=m" (*(uint32_t*)(src + 1*stride)), - "=m" (*(uint32_t*)(src + 2*stride)), - "=m" (*(uint32_t*)(src + 3*stride)), - "=m" (*(uint32_t*)(src + 4*stride)), - "=m" (*(uint32_t*)(src + 5*stride)), - "=m" (*(uint32_t*)(src + 6*stride)), - "=m" (*(uint32_t*)(src + 7*stride)) + "movd %%mm6, (%1,%3) \n\t" + :: "r" (src), + "r" (src + 4*stride), + "r" ((long) stride ), + "r" ((long)(3*stride)) ); } @@ -697,7 +722,7 @@ static int pix_norm1_mmx(uint8_t *pix, int line_size) { "paddd %%mm3,%%mm4\n" "paddd %%mm2,%%mm7\n" - "addl %2, %0\n" + "add %2, %0\n" "paddd %%mm4,%%mm7\n" "dec %%ecx\n" "jnz 1b\n" @@ -706,7 +731,50 @@ static int pix_norm1_mmx(uint8_t *pix, int line_size) { "psrlq $32, %%mm7\n" /* shift hi dword to lo */ "paddd %%mm7,%%mm1\n" "movd %%mm1,%1\n" - : "+r" (pix), "=r"(tmp) : "r" (line_size) : "%ecx" ); + : "+r" (pix), "=r"(tmp) : "r" ((long)line_size) : "%ecx" ); + return tmp; +} + +static int sse8_mmx(void *v, uint8_t * pix1, uint8_t * pix2, int line_size, int h) { + int tmp; + asm volatile ( + "movl %4,%%ecx\n" + "pxor %%mm0,%%mm0\n" /* mm0 = 0 */ + "pxor %%mm7,%%mm7\n" /* mm7 holds the sum */ + "1:\n" + "movq (%0),%%mm1\n" /* mm1 = pix1[0-7] */ + "movq (%1),%%mm2\n" /* mm2 = pix2[0-7] */ + + "movq %%mm1,%%mm5\n" + "psubusb %%mm2,%%mm1\n" + "psubusb %%mm5,%%mm2\n" + + "por %%mm1,%%mm2\n" + + "movq %%mm2,%%mm1\n" + + "punpckhbw %%mm0,%%mm2\n" + "punpcklbw %%mm0,%%mm1\n" /* mm1 now spread over (mm1,mm2) */ + + "pmaddwd %%mm2,%%mm2\n" + "pmaddwd %%mm1,%%mm1\n" + + "add %3,%0\n" + "add %3,%1\n" + + "paddd %%mm2,%%mm1\n" + "paddd %%mm1,%%mm7\n" + + "decl %%ecx\n" + "jnz 1b\n" + + "movq %%mm7,%%mm1\n" + "psrlq $32, %%mm7\n" /* shift hi dword to lo */ + "paddd %%mm7,%%mm1\n" + "movd %%mm1,%2\n" + : "+r" (pix1), "+r" (pix2), "=r"(tmp) + : "r" ((long)line_size) , "m" (h) + : "%ecx"); return tmp; } @@ -749,8 +817,8 @@ static int sse16_mmx(void *v, uint8_t * pix1, uint8_t * pix2, int line_size, int "pmaddwd %%mm1,%%mm1\n" "pmaddwd %%mm3,%%mm3\n" - "addl %3,%0\n" - "addl %3,%1\n" + "add %3,%0\n" + "add %3,%1\n" "paddd %%mm2,%%mm1\n" "paddd %%mm4,%%mm3\n" @@ -765,11 +833,266 @@ static int sse16_mmx(void *v, uint8_t * pix1, uint8_t * pix2, int line_size, int "paddd %%mm7,%%mm1\n" "movd %%mm1,%2\n" : "+r" (pix1), "+r" (pix2), "=r"(tmp) - : "r" (line_size) , "m" (h) + : "r" ((long)line_size) , "m" (h) : "%ecx"); return tmp; } +static int hf_noise8_mmx(uint8_t * pix1, int line_size, int h) { + int tmp; + asm volatile ( + "movl %3,%%ecx\n" + "pxor %%mm7,%%mm7\n" + "pxor %%mm6,%%mm6\n" + + "movq (%0),%%mm0\n" + "movq %%mm0, %%mm1\n" + "psllq $8, %%mm0\n" + "psrlq $8, %%mm1\n" + "psrlq $8, %%mm0\n" + "movq %%mm0, %%mm2\n" + "movq %%mm1, %%mm3\n" + "punpcklbw %%mm7,%%mm0\n" + "punpcklbw %%mm7,%%mm1\n" + "punpckhbw %%mm7,%%mm2\n" + "punpckhbw %%mm7,%%mm3\n" + "psubw %%mm1, %%mm0\n" + "psubw %%mm3, %%mm2\n" + + "add %2,%0\n" + + "movq (%0),%%mm4\n" + "movq %%mm4, %%mm1\n" + "psllq $8, %%mm4\n" + "psrlq $8, %%mm1\n" + "psrlq $8, %%mm4\n" + "movq %%mm4, %%mm5\n" + "movq %%mm1, %%mm3\n" + "punpcklbw %%mm7,%%mm4\n" + "punpcklbw %%mm7,%%mm1\n" + "punpckhbw %%mm7,%%mm5\n" + "punpckhbw %%mm7,%%mm3\n" + "psubw %%mm1, %%mm4\n" + "psubw %%mm3, %%mm5\n" + "psubw %%mm4, %%mm0\n" + "psubw %%mm5, %%mm2\n" + "pxor %%mm3, %%mm3\n" + "pxor %%mm1, %%mm1\n" + "pcmpgtw %%mm0, %%mm3\n\t" + "pcmpgtw %%mm2, %%mm1\n\t" + "pxor %%mm3, %%mm0\n" + "pxor %%mm1, %%mm2\n" + "psubw %%mm3, %%mm0\n" + "psubw %%mm1, %%mm2\n" + "paddw %%mm0, %%mm2\n" + "paddw %%mm2, %%mm6\n" + + "add %2,%0\n" + "1:\n" + + "movq (%0),%%mm0\n" + "movq %%mm0, %%mm1\n" + "psllq $8, %%mm0\n" + "psrlq $8, %%mm1\n" + "psrlq $8, %%mm0\n" + "movq %%mm0, %%mm2\n" + "movq %%mm1, %%mm3\n" + "punpcklbw %%mm7,%%mm0\n" + "punpcklbw %%mm7,%%mm1\n" + "punpckhbw %%mm7,%%mm2\n" + "punpckhbw %%mm7,%%mm3\n" + "psubw %%mm1, %%mm0\n" + "psubw %%mm3, %%mm2\n" + "psubw %%mm0, %%mm4\n" + "psubw %%mm2, %%mm5\n" + "pxor %%mm3, %%mm3\n" + "pxor %%mm1, %%mm1\n" + "pcmpgtw %%mm4, %%mm3\n\t" + "pcmpgtw %%mm5, %%mm1\n\t" + "pxor %%mm3, %%mm4\n" + "pxor %%mm1, %%mm5\n" + "psubw %%mm3, %%mm4\n" + "psubw %%mm1, %%mm5\n" + "paddw %%mm4, %%mm5\n" + "paddw %%mm5, %%mm6\n" + + "add %2,%0\n" + + "movq (%0),%%mm4\n" + "movq %%mm4, %%mm1\n" + "psllq $8, %%mm4\n" + "psrlq $8, %%mm1\n" + "psrlq $8, %%mm4\n" + "movq %%mm4, %%mm5\n" + "movq %%mm1, %%mm3\n" + "punpcklbw %%mm7,%%mm4\n" + "punpcklbw %%mm7,%%mm1\n" + "punpckhbw %%mm7,%%mm5\n" + "punpckhbw %%mm7,%%mm3\n" + "psubw %%mm1, %%mm4\n" + "psubw %%mm3, %%mm5\n" + "psubw %%mm4, %%mm0\n" + "psubw %%mm5, %%mm2\n" + "pxor %%mm3, %%mm3\n" + "pxor %%mm1, %%mm1\n" + "pcmpgtw %%mm0, %%mm3\n\t" + "pcmpgtw %%mm2, %%mm1\n\t" + "pxor %%mm3, %%mm0\n" + "pxor %%mm1, %%mm2\n" + "psubw %%mm3, %%mm0\n" + "psubw %%mm1, %%mm2\n" + "paddw %%mm0, %%mm2\n" + "paddw %%mm2, %%mm6\n" + + "add %2,%0\n" + "subl $2, %%ecx\n" + " jnz 1b\n" + + "movq %%mm6, %%mm0\n" + "punpcklwd %%mm7,%%mm0\n" + "punpckhwd %%mm7,%%mm6\n" + "paddd %%mm0, %%mm6\n" + + "movq %%mm6,%%mm0\n" + "psrlq $32, %%mm6\n" + "paddd %%mm6,%%mm0\n" + "movd %%mm0,%1\n" + : "+r" (pix1), "=r"(tmp) + : "r" ((long)line_size) , "g" (h-2) + : "%ecx"); + return tmp; +} + +static int hf_noise16_mmx(uint8_t * pix1, int line_size, int h) { + int tmp; + uint8_t * pix= pix1; + asm volatile ( + "movl %3,%%ecx\n" + "pxor %%mm7,%%mm7\n" + "pxor %%mm6,%%mm6\n" + + "movq (%0),%%mm0\n" + "movq 1(%0),%%mm1\n" + "movq %%mm0, %%mm2\n" + "movq %%mm1, %%mm3\n" + "punpcklbw %%mm7,%%mm0\n" + "punpcklbw %%mm7,%%mm1\n" + "punpckhbw %%mm7,%%mm2\n" + "punpckhbw %%mm7,%%mm3\n" + "psubw %%mm1, %%mm0\n" + "psubw %%mm3, %%mm2\n" + + "add %2,%0\n" + + "movq (%0),%%mm4\n" + "movq 1(%0),%%mm1\n" + "movq %%mm4, %%mm5\n" + "movq %%mm1, %%mm3\n" + "punpcklbw %%mm7,%%mm4\n" + "punpcklbw %%mm7,%%mm1\n" + "punpckhbw %%mm7,%%mm5\n" + "punpckhbw %%mm7,%%mm3\n" + "psubw %%mm1, %%mm4\n" + "psubw %%mm3, %%mm5\n" + "psubw %%mm4, %%mm0\n" + "psubw %%mm5, %%mm2\n" + "pxor %%mm3, %%mm3\n" + "pxor %%mm1, %%mm1\n" + "pcmpgtw %%mm0, %%mm3\n\t" + "pcmpgtw %%mm2, %%mm1\n\t" + "pxor %%mm3, %%mm0\n" + "pxor %%mm1, %%mm2\n" + "psubw %%mm3, %%mm0\n" + "psubw %%mm1, %%mm2\n" + "paddw %%mm0, %%mm2\n" + "paddw %%mm2, %%mm6\n" + + "add %2,%0\n" + "1:\n" + + "movq (%0),%%mm0\n" + "movq 1(%0),%%mm1\n" + "movq %%mm0, %%mm2\n" + "movq %%mm1, %%mm3\n" + "punpcklbw %%mm7,%%mm0\n" + "punpcklbw %%mm7,%%mm1\n" + "punpckhbw %%mm7,%%mm2\n" + "punpckhbw %%mm7,%%mm3\n" + "psubw %%mm1, %%mm0\n" + "psubw %%mm3, %%mm2\n" + "psubw %%mm0, %%mm4\n" + "psubw %%mm2, %%mm5\n" + "pxor %%mm3, %%mm3\n" + "pxor %%mm1, %%mm1\n" + "pcmpgtw %%mm4, %%mm3\n\t" + "pcmpgtw %%mm5, %%mm1\n\t" + "pxor %%mm3, %%mm4\n" + "pxor %%mm1, %%mm5\n" + "psubw %%mm3, %%mm4\n" + "psubw %%mm1, %%mm5\n" + "paddw %%mm4, %%mm5\n" + "paddw %%mm5, %%mm6\n" + + "add %2,%0\n" + + "movq (%0),%%mm4\n" + "movq 1(%0),%%mm1\n" + "movq %%mm4, %%mm5\n" + "movq %%mm1, %%mm3\n" + "punpcklbw %%mm7,%%mm4\n" + "punpcklbw %%mm7,%%mm1\n" + "punpckhbw %%mm7,%%mm5\n" + "punpckhbw %%mm7,%%mm3\n" + "psubw %%mm1, %%mm4\n" + "psubw %%mm3, %%mm5\n" + "psubw %%mm4, %%mm0\n" + "psubw %%mm5, %%mm2\n" + "pxor %%mm3, %%mm3\n" + "pxor %%mm1, %%mm1\n" + "pcmpgtw %%mm0, %%mm3\n\t" + "pcmpgtw %%mm2, %%mm1\n\t" + "pxor %%mm3, %%mm0\n" + "pxor %%mm1, %%mm2\n" + "psubw %%mm3, %%mm0\n" + "psubw %%mm1, %%mm2\n" + "paddw %%mm0, %%mm2\n" + "paddw %%mm2, %%mm6\n" + + "add %2,%0\n" + "subl $2, %%ecx\n" + " jnz 1b\n" + + "movq %%mm6, %%mm0\n" + "punpcklwd %%mm7,%%mm0\n" + "punpckhwd %%mm7,%%mm6\n" + "paddd %%mm0, %%mm6\n" + + "movq %%mm6,%%mm0\n" + "psrlq $32, %%mm6\n" + "paddd %%mm6,%%mm0\n" + "movd %%mm0,%1\n" + : "+r" (pix1), "=r"(tmp) + : "r" ((long)line_size) , "g" (h-2) + : "%ecx"); + return tmp + hf_noise8_mmx(pix+8, line_size, h); +} + +static int nsse16_mmx(MpegEncContext *c, uint8_t * pix1, uint8_t * pix2, int line_size, int h) { + int score1= sse16_mmx(c, pix1, pix2, line_size, h); + int score2= hf_noise16_mmx(pix1, line_size, h) - hf_noise16_mmx(pix2, line_size, h); + + if(c) return score1 + ABS(score2)*c->avctx->nsse_weight; + else return score1 + ABS(score2)*8; +} + +static int nsse8_mmx(MpegEncContext *c, uint8_t * pix1, uint8_t * pix2, int line_size, int h) { + int score1= sse8_mmx(c, pix1, pix2, line_size, h); + int score2= hf_noise8_mmx(pix1, line_size, h) - hf_noise8_mmx(pix2, line_size, h); + + if(c) return score1 + ABS(score2)*c->avctx->nsse_weight; + else return score1 + ABS(score2)*8; +} + static int vsad_intra16_mmx(void *v, uint8_t * pix, uint8_t * dummy, int line_size, int h) { int tmp; @@ -779,7 +1102,7 @@ static int vsad_intra16_mmx(void *v, uint8_t * pix, uint8_t * dummy, int line_si #define SUM(in0, in1, out0, out1) \ "movq (%0), %%mm2\n"\ "movq 8(%0), %%mm3\n"\ - "addl %2,%0\n"\ + "add %2,%0\n"\ "movq %%mm2, " #out0 "\n"\ "movq %%mm3, " #out1 "\n"\ "psubusb " #in0 ", %%mm2\n"\ @@ -806,7 +1129,7 @@ static int vsad_intra16_mmx(void *v, uint8_t * pix, uint8_t * dummy, int line_si "pxor %%mm7,%%mm7\n" "movq (%0),%%mm0\n" "movq 8(%0),%%mm1\n" - "addl %2,%0\n" + "add %2,%0\n" "subl $2, %%ecx\n" SUM(%%mm0, %%mm1, %%mm4, %%mm5) "1:\n" @@ -826,7 +1149,7 @@ static int vsad_intra16_mmx(void *v, uint8_t * pix, uint8_t * dummy, int line_si "paddw %%mm6,%%mm0\n" "movd %%mm0,%1\n" : "+r" (pix), "=r"(tmp) - : "r" (line_size) , "m" (h) + : "r" ((long)line_size) , "m" (h) : "%ecx"); return tmp & 0xFFFF; } @@ -841,7 +1164,7 @@ static int vsad_intra16_mmx2(void *v, uint8_t * pix, uint8_t * dummy, int line_s #define SUM(in0, in1, out0, out1) \ "movq (%0), " #out0 "\n"\ "movq 8(%0), " #out1 "\n"\ - "addl %2,%0\n"\ + "add %2,%0\n"\ "psadbw " #out0 ", " #in0 "\n"\ "psadbw " #out1 ", " #in1 "\n"\ "paddw " #in1 ", " #in0 "\n"\ @@ -853,7 +1176,7 @@ static int vsad_intra16_mmx2(void *v, uint8_t * pix, uint8_t * dummy, int line_s "pxor %%mm7,%%mm7\n" "movq (%0),%%mm0\n" "movq 8(%0),%%mm1\n" - "addl %2,%0\n" + "add %2,%0\n" "subl $2, %%ecx\n" SUM(%%mm0, %%mm1, %%mm4, %%mm5) "1:\n" @@ -867,7 +1190,7 @@ static int vsad_intra16_mmx2(void *v, uint8_t * pix, uint8_t * dummy, int line_s "movd %%mm6,%1\n" : "+r" (pix), "=r"(tmp) - : "r" (line_size) , "m" (h) + : "r" ((long)line_size) , "m" (h) : "%ecx"); return tmp; } @@ -885,8 +1208,8 @@ static int vsad16_mmx(void *v, uint8_t * pix1, uint8_t * pix2, int line_size, in "movq (%1)," #out0 "\n"\ "movq 8(%0),%%mm3\n"\ "movq 8(%1)," #out1 "\n"\ - "addl %3,%0\n"\ - "addl %3,%1\n"\ + "add %3,%0\n"\ + "add %3,%1\n"\ "psubb " #out0 ", %%mm2\n"\ "psubb " #out1 ", %%mm3\n"\ "pxor %%mm7, %%mm2\n"\ @@ -921,8 +1244,8 @@ static int vsad16_mmx(void *v, uint8_t * pix1, uint8_t * pix2, int line_size, in "movq (%1),%%mm2\n" "movq 8(%0),%%mm1\n" "movq 8(%1),%%mm3\n" - "addl %3,%0\n" - "addl %3,%1\n" + "add %3,%0\n" + "add %3,%1\n" "subl $2, %%ecx\n" "psubb %%mm2, %%mm0\n" "psubb %%mm3, %%mm1\n" @@ -946,7 +1269,7 @@ static int vsad16_mmx(void *v, uint8_t * pix1, uint8_t * pix2, int line_size, in "paddw %%mm6,%%mm0\n" "movd %%mm0,%2\n" : "+r" (pix1), "+r" (pix2), "=r"(tmp) - : "r" (line_size) , "m" (h) + : "r" ((long)line_size) , "m" (h) : "%ecx"); return tmp & 0x7FFF; } @@ -964,8 +1287,8 @@ static int vsad16_mmx2(void *v, uint8_t * pix1, uint8_t * pix2, int line_size, i "movq (%1),%%mm2\n"\ "movq 8(%0)," #out1 "\n"\ "movq 8(%1),%%mm3\n"\ - "addl %3,%0\n"\ - "addl %3,%1\n"\ + "add %3,%0\n"\ + "add %3,%1\n"\ "psubb %%mm2, " #out0 "\n"\ "psubb %%mm3, " #out1 "\n"\ "pxor %%mm7, " #out0 "\n"\ @@ -985,8 +1308,8 @@ static int vsad16_mmx2(void *v, uint8_t * pix1, uint8_t * pix2, int line_size, i "movq (%1),%%mm2\n" "movq 8(%0),%%mm1\n" "movq 8(%1),%%mm3\n" - "addl %3,%0\n" - "addl %3,%1\n" + "add %3,%0\n" + "add %3,%1\n" "subl $2, %%ecx\n" "psubb %%mm2, %%mm0\n" "psubb %%mm3, %%mm1\n" @@ -1004,14 +1327,14 @@ static int vsad16_mmx2(void *v, uint8_t * pix1, uint8_t * pix2, int line_size, i "movd %%mm6,%2\n" : "+r" (pix1), "+r" (pix2), "=r"(tmp) - : "r" (line_size) , "m" (h) + : "r" ((long)line_size) , "m" (h) : "%ecx"); return tmp; } #undef SUM static void diff_bytes_mmx(uint8_t *dst, uint8_t *src1, uint8_t *src2, int w){ - int i=0; + long i=0; asm volatile( "1: \n\t" "movq (%2, %0), %%mm0 \n\t" @@ -1022,20 +1345,20 @@ static void diff_bytes_mmx(uint8_t *dst, uint8_t *src1, uint8_t *src2, int w){ "movq 8(%1, %0), %%mm1 \n\t" "psubb %%mm0, %%mm1 \n\t" "movq %%mm1, 8(%3, %0) \n\t" - "addl $16, %0 \n\t" - "cmpl %4, %0 \n\t" + "add $16, %0 \n\t" + "cmp %4, %0 \n\t" " jb 1b \n\t" : "+r" (i) - : "r"(src1), "r"(src2), "r"(dst), "r"(w-15) + : "r"(src1), "r"(src2), "r"(dst), "r"((long)w-15) ); for(; i<w; i++) dst[i+0] = src1[i+0]-src2[i+0]; } static void sub_hfyu_median_prediction_mmx2(uint8_t *dst, uint8_t *src1, uint8_t *src2, int w, int *left, int *left_top){ - int i=0; + long i=0; uint8_t l, lt; - + asm volatile( "1: \n\t" "movq -1(%1, %0), %%mm0 \n\t" // LT @@ -1052,11 +1375,11 @@ static void sub_hfyu_median_prediction_mmx2(uint8_t *dst, uint8_t *src1, uint8_t "pmaxub %%mm1, %%mm4 \n\t" "psubb %%mm4, %%mm3 \n\t" // dst - pred "movq %%mm3, (%3, %0) \n\t" - "addl $8, %0 \n\t" - "cmpl %4, %0 \n\t" + "add $8, %0 \n\t" + "cmp %4, %0 \n\t" " jb 1b \n\t" : "+r" (i) - : "r"(src1), "r"(src2), "r"(dst), "r"(w) + : "r"(src1), "r"(src2), "r"(dst), "r"((long)w) ); l= *left; @@ -1445,12 +1768,12 @@ static void OPNAME ## mpeg4_qpel16_h_lowpass_mmx2(uint8_t *dst, uint8_t *src, in "packuswb %%mm4, %%mm0 \n\t"\ OP_MMX2(%%mm0, 8(%1), %%mm4, q)\ \ - "addl %3, %0 \n\t"\ - "addl %4, %1 \n\t"\ + "add %3, %0 \n\t"\ + "add %4, %1 \n\t"\ "decl %2 \n\t"\ " jnz 1b \n\t"\ : "+a"(src), "+c"(dst), "+m"(h)\ - : "d"(srcStride), "S"(dstStride), /*"m"(ff_pw_20), "m"(ff_pw_3),*/ "m"(temp), "m"(ROUNDER)\ + : "d"((long)srcStride), "S"((long)dstStride), /*"m"(ff_pw_20), "m"(ff_pw_3),*/ "m"(temp), "m"(ROUNDER)\ : "memory"\ );\ }\ @@ -1558,12 +1881,12 @@ static void OPNAME ## mpeg4_qpel8_h_lowpass_mmx2(uint8_t *dst, uint8_t *src, int "packuswb %%mm3, %%mm0 \n\t"\ OP_MMX2(%%mm0, (%1), %%mm4, q)\ \ - "addl %3, %0 \n\t"\ - "addl %4, %1 \n\t"\ + "add %3, %0 \n\t"\ + "add %4, %1 \n\t"\ "decl %2 \n\t"\ " jnz 1b \n\t"\ : "+a"(src), "+c"(dst), "+m"(h)\ - : "S"(srcStride), "D"(dstStride), /*"m"(ff_pw_20), "m"(ff_pw_3),*/ "m"(temp), "m"(ROUNDER)\ + : "S"((long)srcStride), "D"((long)dstStride), /*"m"(ff_pw_20), "m"(ff_pw_3),*/ "m"(temp), "m"(ROUNDER)\ : "memory"\ );\ }\ @@ -1622,12 +1945,12 @@ static void OPNAME ## mpeg4_qpel16_v_lowpass_ ## MMX(uint8_t *dst, uint8_t *src, "movq %%mm1, 17*8(%1) \n\t"\ "movq %%mm2, 2*17*8(%1) \n\t"\ "movq %%mm3, 3*17*8(%1) \n\t"\ - "addl $8, %1 \n\t"\ - "addl %3, %0 \n\t"\ + "add $8, %1 \n\t"\ + "add %3, %0 \n\t"\ "decl %2 \n\t"\ " jnz 1b \n\t"\ : "+r" (src), "+r" (temp_ptr), "+r"(count)\ - : "r" (srcStride)\ + : "r" ((long)srcStride)\ : "memory"\ );\ \ @@ -1644,43 +1967,43 @@ static void OPNAME ## mpeg4_qpel16_v_lowpass_ ## MMX(uint8_t *dst, uint8_t *src, "movq 24(%0), %%mm3 \n\t"\ QPEL_V_LOW(%%mm0, %%mm1, %%mm2, %%mm3, %5, %6, %5, 16(%0), 8(%0), (%0), 32(%0), (%1), OP)\ QPEL_V_LOW(%%mm1, %%mm2, %%mm3, %%mm0, %5, %6, %5, 8(%0), (%0), (%0), 40(%0), (%1, %3), OP)\ - "addl %4, %1 \n\t"\ + "add %4, %1 \n\t"\ QPEL_V_LOW(%%mm2, %%mm3, %%mm0, %%mm1, %5, %6, %5, (%0), (%0), 8(%0), 48(%0), (%1), OP)\ \ QPEL_V_LOW(%%mm3, %%mm0, %%mm1, %%mm2, %5, %6, %5, (%0), 8(%0), 16(%0), 56(%0), (%1, %3), OP)\ - "addl %4, %1 \n\t"\ + "add %4, %1 \n\t"\ QPEL_V_LOW(%%mm0, %%mm1, %%mm2, %%mm3, %5, %6, %5, 8(%0), 16(%0), 24(%0), 64(%0), (%1), OP)\ QPEL_V_LOW(%%mm1, %%mm2, %%mm3, %%mm0, %5, %6, %5, 16(%0), 24(%0), 32(%0), 72(%0), (%1, %3), OP)\ - "addl %4, %1 \n\t"\ + "add %4, %1 \n\t"\ QPEL_V_LOW(%%mm2, %%mm3, %%mm0, %%mm1, %5, %6, %5, 24(%0), 32(%0), 40(%0), 80(%0), (%1), OP)\ QPEL_V_LOW(%%mm3, %%mm0, %%mm1, %%mm2, %5, %6, %5, 32(%0), 40(%0), 48(%0), 88(%0), (%1, %3), OP)\ - "addl %4, %1 \n\t"\ + "add %4, %1 \n\t"\ QPEL_V_LOW(%%mm0, %%mm1, %%mm2, %%mm3, %5, %6, %5, 40(%0), 48(%0), 56(%0), 96(%0), (%1), OP)\ QPEL_V_LOW(%%mm1, %%mm2, %%mm3, %%mm0, %5, %6, %5, 48(%0), 56(%0), 64(%0),104(%0), (%1, %3), OP)\ - "addl %4, %1 \n\t"\ + "add %4, %1 \n\t"\ QPEL_V_LOW(%%mm2, %%mm3, %%mm0, %%mm1, %5, %6, %5, 56(%0), 64(%0), 72(%0),112(%0), (%1), OP)\ QPEL_V_LOW(%%mm3, %%mm0, %%mm1, %%mm2, %5, %6, %5, 64(%0), 72(%0), 80(%0),120(%0), (%1, %3), OP)\ - "addl %4, %1 \n\t"\ + "add %4, %1 \n\t"\ QPEL_V_LOW(%%mm0, %%mm1, %%mm2, %%mm3, %5, %6, %5, 72(%0), 80(%0), 88(%0),128(%0), (%1), OP)\ \ QPEL_V_LOW(%%mm1, %%mm2, %%mm3, %%mm0, %5, %6, %5, 80(%0), 88(%0), 96(%0),128(%0), (%1, %3), OP)\ - "addl %4, %1 \n\t" \ + "add %4, %1 \n\t" \ QPEL_V_LOW(%%mm2, %%mm3, %%mm0, %%mm1, %5, %6, %5, 88(%0), 96(%0),104(%0),120(%0), (%1), OP)\ QPEL_V_LOW(%%mm3, %%mm0, %%mm1, %%mm2, %5, %6, %5, 96(%0),104(%0),112(%0),112(%0), (%1, %3), OP)\ \ - "addl $136, %0 \n\t"\ - "addl %6, %1 \n\t"\ + "add $136, %0 \n\t"\ + "add %6, %1 \n\t"\ "decl %2 \n\t"\ " jnz 1b \n\t"\ \ : "+r"(temp_ptr), "+r"(dst), "+g"(count)\ - : "r"(dstStride), "r"(2*dstStride), /*"m"(ff_pw_20), "m"(ff_pw_3),*/ "m"(ROUNDER), "g"(4-14*dstStride)\ + : "r"((long)dstStride), "r"(2*(long)dstStride), /*"m"(ff_pw_20), "m"(ff_pw_3),*/ "m"(ROUNDER), "g"(4-14*(long)dstStride)\ :"memory"\ );\ }\ \ static void OPNAME ## mpeg4_qpel8_v_lowpass_ ## MMX(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\ - uint64_t temp[9*4];\ + uint64_t temp[9*2];\ uint64_t *temp_ptr= temp;\ int count= 9;\ \ @@ -1694,12 +2017,12 @@ static void OPNAME ## mpeg4_qpel8_v_lowpass_ ## MMX(uint8_t *dst, uint8_t *src, "punpckhbw %%mm7, %%mm1 \n\t"\ "movq %%mm0, (%1) \n\t"\ "movq %%mm1, 9*8(%1) \n\t"\ - "addl $8, %1 \n\t"\ - "addl %3, %0 \n\t"\ + "add $8, %1 \n\t"\ + "add %3, %0 \n\t"\ "decl %2 \n\t"\ " jnz 1b \n\t"\ : "+r" (src), "+r" (temp_ptr), "+r"(count)\ - : "r" (srcStride)\ + : "r" ((long)srcStride)\ : "memory"\ );\ \ @@ -1716,25 +2039,25 @@ static void OPNAME ## mpeg4_qpel8_v_lowpass_ ## MMX(uint8_t *dst, uint8_t *src, "movq 24(%0), %%mm3 \n\t"\ QPEL_V_LOW(%%mm0, %%mm1, %%mm2, %%mm3, %5, %6, %5, 16(%0), 8(%0), (%0), 32(%0), (%1), OP)\ QPEL_V_LOW(%%mm1, %%mm2, %%mm3, %%mm0, %5, %6, %5, 8(%0), (%0), (%0), 40(%0), (%1, %3), OP)\ - "addl %4, %1 \n\t"\ + "add %4, %1 \n\t"\ QPEL_V_LOW(%%mm2, %%mm3, %%mm0, %%mm1, %5, %6, %5, (%0), (%0), 8(%0), 48(%0), (%1), OP)\ \ QPEL_V_LOW(%%mm3, %%mm0, %%mm1, %%mm2, %5, %6, %5, (%0), 8(%0), 16(%0), 56(%0), (%1, %3), OP)\ - "addl %4, %1 \n\t"\ + "add %4, %1 \n\t"\ QPEL_V_LOW(%%mm0, %%mm1, %%mm2, %%mm3, %5, %6, %5, 8(%0), 16(%0), 24(%0), 64(%0), (%1), OP)\ \ QPEL_V_LOW(%%mm1, %%mm2, %%mm3, %%mm0, %5, %6, %5, 16(%0), 24(%0), 32(%0), 64(%0), (%1, %3), OP)\ - "addl %4, %1 \n\t"\ + "add %4, %1 \n\t"\ QPEL_V_LOW(%%mm2, %%mm3, %%mm0, %%mm1, %5, %6, %5, 24(%0), 32(%0), 40(%0), 56(%0), (%1), OP)\ QPEL_V_LOW(%%mm3, %%mm0, %%mm1, %%mm2, %5, %6, %5, 32(%0), 40(%0), 48(%0), 48(%0), (%1, %3), OP)\ \ - "addl $72, %0 \n\t"\ - "addl %6, %1 \n\t"\ + "add $72, %0 \n\t"\ + "add %6, %1 \n\t"\ "decl %2 \n\t"\ " jnz 1b \n\t"\ \ : "+r"(temp_ptr), "+r"(dst), "+g"(count)\ - : "r"(dstStride), "r"(2*dstStride), /*"m"(ff_pw_20), "m"(ff_pw_3),*/ "m"(ROUNDER), "g"(4-6*dstStride)\ + : "r"((long)dstStride), "r"(2*(long)dstStride), /*"m"(ff_pw_20), "m"(ff_pw_3),*/ "m"(ROUNDER), "g"(4-6*(long)dstStride)\ : "memory"\ );\ }\ @@ -1747,7 +2070,7 @@ static void OPNAME ## qpel8_mc10_ ## MMX(uint8_t *dst, uint8_t *src, int stride) uint64_t temp[8];\ uint8_t * const half= (uint8_t*)temp;\ put ## RND ## mpeg4_qpel8_h_lowpass_ ## MMX(half, src, 8, stride, 8);\ - OPNAME ## pixels8_l2_mmx(dst, src, half, stride, stride, 8);\ + OPNAME ## pixels8_l2_ ## MMX(dst, src, half, stride, stride, 8);\ }\ \ static void OPNAME ## qpel8_mc20_ ## MMX(uint8_t *dst, uint8_t *src, int stride){\ @@ -1758,14 +2081,14 @@ static void OPNAME ## qpel8_mc30_ ## MMX(uint8_t *dst, uint8_t *src, int stride) uint64_t temp[8];\ uint8_t * const half= (uint8_t*)temp;\ put ## RND ## mpeg4_qpel8_h_lowpass_ ## MMX(half, src, 8, stride, 8);\ - OPNAME ## pixels8_l2_mmx(dst, src+1, half, stride, stride, 8);\ + OPNAME ## pixels8_l2_ ## MMX(dst, src+1, half, stride, stride, 8);\ }\ \ static void OPNAME ## qpel8_mc01_ ## MMX(uint8_t *dst, uint8_t *src, int stride){\ uint64_t temp[8];\ uint8_t * const half= (uint8_t*)temp;\ put ## RND ## mpeg4_qpel8_v_lowpass_ ## MMX(half, src, 8, stride);\ - OPNAME ## pixels8_l2_mmx(dst, src, half, stride, stride, 8);\ + OPNAME ## pixels8_l2_ ## MMX(dst, src, half, stride, stride, 8);\ }\ \ static void OPNAME ## qpel8_mc02_ ## MMX(uint8_t *dst, uint8_t *src, int stride){\ @@ -1776,43 +2099,43 @@ static void OPNAME ## qpel8_mc03_ ## MMX(uint8_t *dst, uint8_t *src, int stride) uint64_t temp[8];\ uint8_t * const half= (uint8_t*)temp;\ put ## RND ## mpeg4_qpel8_v_lowpass_ ## MMX(half, src, 8, stride);\ - OPNAME ## pixels8_l2_mmx(dst, src+stride, half, stride, stride, 8);\ + OPNAME ## pixels8_l2_ ## MMX(dst, src+stride, half, stride, stride, 8);\ }\ static void OPNAME ## qpel8_mc11_ ## MMX(uint8_t *dst, uint8_t *src, int stride){\ uint64_t half[8 + 9];\ uint8_t * const halfH= ((uint8_t*)half) + 64;\ uint8_t * const halfHV= ((uint8_t*)half);\ put ## RND ## mpeg4_qpel8_h_lowpass_ ## MMX(halfH, src, 8, stride, 9);\ - put ## RND ## pixels8_l2_mmx(halfH, src, halfH, 8, stride, 9);\ + put ## RND ## pixels8_l2_ ## MMX(halfH, src, halfH, 8, stride, 9);\ put ## RND ## mpeg4_qpel8_v_lowpass_ ## MMX(halfHV, halfH, 8, 8);\ - OPNAME ## pixels8_l2_mmx(dst, halfH, halfHV, stride, 8, 8);\ + OPNAME ## pixels8_l2_ ## MMX(dst, halfH, halfHV, stride, 8, 8);\ }\ static void OPNAME ## qpel8_mc31_ ## MMX(uint8_t *dst, uint8_t *src, int stride){\ uint64_t half[8 + 9];\ uint8_t * const halfH= ((uint8_t*)half) + 64;\ uint8_t * const halfHV= ((uint8_t*)half);\ put ## RND ## mpeg4_qpel8_h_lowpass_ ## MMX(halfH, src, 8, stride, 9);\ - put ## RND ## pixels8_l2_mmx(halfH, src+1, halfH, 8, stride, 9);\ + put ## RND ## pixels8_l2_ ## MMX(halfH, src+1, halfH, 8, stride, 9);\ put ## RND ## mpeg4_qpel8_v_lowpass_ ## MMX(halfHV, halfH, 8, 8);\ - OPNAME ## pixels8_l2_mmx(dst, halfH, halfHV, stride, 8, 8);\ + OPNAME ## pixels8_l2_ ## MMX(dst, halfH, halfHV, stride, 8, 8);\ }\ static void OPNAME ## qpel8_mc13_ ## MMX(uint8_t *dst, uint8_t *src, int stride){\ uint64_t half[8 + 9];\ uint8_t * const halfH= ((uint8_t*)half) + 64;\ uint8_t * const halfHV= ((uint8_t*)half);\ put ## RND ## mpeg4_qpel8_h_lowpass_ ## MMX(halfH, src, 8, stride, 9);\ - put ## RND ## pixels8_l2_mmx(halfH, src, halfH, 8, stride, 9);\ + put ## RND ## pixels8_l2_ ## MMX(halfH, src, halfH, 8, stride, 9);\ put ## RND ## mpeg4_qpel8_v_lowpass_ ## MMX(halfHV, halfH, 8, 8);\ - OPNAME ## pixels8_l2_mmx(dst, halfH+8, halfHV, stride, 8, 8);\ + OPNAME ## pixels8_l2_ ## MMX(dst, halfH+8, halfHV, stride, 8, 8);\ }\ static void OPNAME ## qpel8_mc33_ ## MMX(uint8_t *dst, uint8_t *src, int stride){\ uint64_t half[8 + 9];\ uint8_t * const halfH= ((uint8_t*)half) + 64;\ uint8_t * const halfHV= ((uint8_t*)half);\ put ## RND ## mpeg4_qpel8_h_lowpass_ ## MMX(halfH, src, 8, stride, 9);\ - put ## RND ## pixels8_l2_mmx(halfH, src+1, halfH, 8, stride, 9);\ + put ## RND ## pixels8_l2_ ## MMX(halfH, src+1, halfH, 8, stride, 9);\ put ## RND ## mpeg4_qpel8_v_lowpass_ ## MMX(halfHV, halfH, 8, 8);\ - OPNAME ## pixels8_l2_mmx(dst, halfH+8, halfHV, stride, 8, 8);\ + OPNAME ## pixels8_l2_ ## MMX(dst, halfH+8, halfHV, stride, 8, 8);\ }\ static void OPNAME ## qpel8_mc21_ ## MMX(uint8_t *dst, uint8_t *src, int stride){\ uint64_t half[8 + 9];\ @@ -1820,7 +2143,7 @@ static void OPNAME ## qpel8_mc21_ ## MMX(uint8_t *dst, uint8_t *src, int stride) uint8_t * const halfHV= ((uint8_t*)half);\ put ## RND ## mpeg4_qpel8_h_lowpass_ ## MMX(halfH, src, 8, stride, 9);\ put ## RND ## mpeg4_qpel8_v_lowpass_ ## MMX(halfHV, halfH, 8, 8);\ - OPNAME ## pixels8_l2_mmx(dst, halfH, halfHV, stride, 8, 8);\ + OPNAME ## pixels8_l2_ ## MMX(dst, halfH, halfHV, stride, 8, 8);\ }\ static void OPNAME ## qpel8_mc23_ ## MMX(uint8_t *dst, uint8_t *src, int stride){\ uint64_t half[8 + 9];\ @@ -1828,20 +2151,20 @@ static void OPNAME ## qpel8_mc23_ ## MMX(uint8_t *dst, uint8_t *src, int stride) uint8_t * const halfHV= ((uint8_t*)half);\ put ## RND ## mpeg4_qpel8_h_lowpass_ ## MMX(halfH, src, 8, stride, 9);\ put ## RND ## mpeg4_qpel8_v_lowpass_ ## MMX(halfHV, halfH, 8, 8);\ - OPNAME ## pixels8_l2_mmx(dst, halfH+8, halfHV, stride, 8, 8);\ + OPNAME ## pixels8_l2_ ## MMX(dst, halfH+8, halfHV, stride, 8, 8);\ }\ static void OPNAME ## qpel8_mc12_ ## MMX(uint8_t *dst, uint8_t *src, int stride){\ uint64_t half[8 + 9];\ uint8_t * const halfH= ((uint8_t*)half);\ put ## RND ## mpeg4_qpel8_h_lowpass_ ## MMX(halfH, src, 8, stride, 9);\ - put ## RND ## pixels8_l2_mmx(halfH, src, halfH, 8, stride, 9);\ + put ## RND ## pixels8_l2_ ## MMX(halfH, src, halfH, 8, stride, 9);\ OPNAME ## mpeg4_qpel8_v_lowpass_ ## MMX(dst, halfH, stride, 8);\ }\ static void OPNAME ## qpel8_mc32_ ## MMX(uint8_t *dst, uint8_t *src, int stride){\ uint64_t half[8 + 9];\ uint8_t * const halfH= ((uint8_t*)half);\ put ## RND ## mpeg4_qpel8_h_lowpass_ ## MMX(halfH, src, 8, stride, 9);\ - put ## RND ## pixels8_l2_mmx(halfH, src+1, halfH, 8, stride, 9);\ + put ## RND ## pixels8_l2_ ## MMX(halfH, src+1, halfH, 8, stride, 9);\ OPNAME ## mpeg4_qpel8_v_lowpass_ ## MMX(dst, halfH, stride, 8);\ }\ static void OPNAME ## qpel8_mc22_ ## MMX(uint8_t *dst, uint8_t *src, int stride){\ @@ -1858,7 +2181,7 @@ static void OPNAME ## qpel16_mc10_ ## MMX(uint8_t *dst, uint8_t *src, int stride uint64_t temp[32];\ uint8_t * const half= (uint8_t*)temp;\ put ## RND ## mpeg4_qpel16_h_lowpass_ ## MMX(half, src, 16, stride, 16);\ - OPNAME ## pixels16_l2_mmx(dst, src, half, stride, stride, 16);\ + OPNAME ## pixels16_l2_ ## MMX(dst, src, half, stride, stride, 16);\ }\ \ static void OPNAME ## qpel16_mc20_ ## MMX(uint8_t *dst, uint8_t *src, int stride){\ @@ -1869,14 +2192,14 @@ static void OPNAME ## qpel16_mc30_ ## MMX(uint8_t *dst, uint8_t *src, int stride uint64_t temp[32];\ uint8_t * const half= (uint8_t*)temp;\ put ## RND ## mpeg4_qpel16_h_lowpass_ ## MMX(half, src, 16, stride, 16);\ - OPNAME ## pixels16_l2_mmx(dst, src+1, half, stride, stride, 16);\ + OPNAME ## pixels16_l2_ ## MMX(dst, src+1, half, stride, stride, 16);\ }\ \ static void OPNAME ## qpel16_mc01_ ## MMX(uint8_t *dst, uint8_t *src, int stride){\ uint64_t temp[32];\ uint8_t * const half= (uint8_t*)temp;\ put ## RND ## mpeg4_qpel16_v_lowpass_ ## MMX(half, src, 16, stride);\ - OPNAME ## pixels16_l2_mmx(dst, src, half, stride, stride, 16);\ + OPNAME ## pixels16_l2_ ## MMX(dst, src, half, stride, stride, 16);\ }\ \ static void OPNAME ## qpel16_mc02_ ## MMX(uint8_t *dst, uint8_t *src, int stride){\ @@ -1887,43 +2210,43 @@ static void OPNAME ## qpel16_mc03_ ## MMX(uint8_t *dst, uint8_t *src, int stride uint64_t temp[32];\ uint8_t * const half= (uint8_t*)temp;\ put ## RND ## mpeg4_qpel16_v_lowpass_ ## MMX(half, src, 16, stride);\ - OPNAME ## pixels16_l2_mmx(dst, src+stride, half, stride, stride, 16);\ + OPNAME ## pixels16_l2_ ## MMX(dst, src+stride, half, stride, stride, 16);\ }\ static void OPNAME ## qpel16_mc11_ ## MMX(uint8_t *dst, uint8_t *src, int stride){\ uint64_t half[16*2 + 17*2];\ uint8_t * const halfH= ((uint8_t*)half) + 256;\ uint8_t * const halfHV= ((uint8_t*)half);\ put ## RND ## mpeg4_qpel16_h_lowpass_ ## MMX(halfH, src, 16, stride, 17);\ - put ## RND ## pixels16_l2_mmx(halfH, src, halfH, 16, stride, 17);\ + put ## RND ## pixels16_l2_ ## MMX(halfH, src, halfH, 16, stride, 17);\ put ## RND ## mpeg4_qpel16_v_lowpass_ ## MMX(halfHV, halfH, 16, 16);\ - OPNAME ## pixels16_l2_mmx(dst, halfH, halfHV, stride, 16, 16);\ + OPNAME ## pixels16_l2_ ## MMX(dst, halfH, halfHV, stride, 16, 16);\ }\ static void OPNAME ## qpel16_mc31_ ## MMX(uint8_t *dst, uint8_t *src, int stride){\ uint64_t half[16*2 + 17*2];\ uint8_t * const halfH= ((uint8_t*)half) + 256;\ uint8_t * const halfHV= ((uint8_t*)half);\ put ## RND ## mpeg4_qpel16_h_lowpass_ ## MMX(halfH, src, 16, stride, 17);\ - put ## RND ## pixels16_l2_mmx(halfH, src+1, halfH, 16, stride, 17);\ + put ## RND ## pixels16_l2_ ## MMX(halfH, src+1, halfH, 16, stride, 17);\ put ## RND ## mpeg4_qpel16_v_lowpass_ ## MMX(halfHV, halfH, 16, 16);\ - OPNAME ## pixels16_l2_mmx(dst, halfH, halfHV, stride, 16, 16);\ + OPNAME ## pixels16_l2_ ## MMX(dst, halfH, halfHV, stride, 16, 16);\ }\ static void OPNAME ## qpel16_mc13_ ## MMX(uint8_t *dst, uint8_t *src, int stride){\ uint64_t half[16*2 + 17*2];\ uint8_t * const halfH= ((uint8_t*)half) + 256;\ uint8_t * const halfHV= ((uint8_t*)half);\ put ## RND ## mpeg4_qpel16_h_lowpass_ ## MMX(halfH, src, 16, stride, 17);\ - put ## RND ## pixels16_l2_mmx(halfH, src, halfH, 16, stride, 17);\ + put ## RND ## pixels16_l2_ ## MMX(halfH, src, halfH, 16, stride, 17);\ put ## RND ## mpeg4_qpel16_v_lowpass_ ## MMX(halfHV, halfH, 16, 16);\ - OPNAME ## pixels16_l2_mmx(dst, halfH+16, halfHV, stride, 16, 16);\ + OPNAME ## pixels16_l2_ ## MMX(dst, halfH+16, halfHV, stride, 16, 16);\ }\ static void OPNAME ## qpel16_mc33_ ## MMX(uint8_t *dst, uint8_t *src, int stride){\ uint64_t half[16*2 + 17*2];\ uint8_t * const halfH= ((uint8_t*)half) + 256;\ uint8_t * const halfHV= ((uint8_t*)half);\ put ## RND ## mpeg4_qpel16_h_lowpass_ ## MMX(halfH, src, 16, stride, 17);\ - put ## RND ## pixels16_l2_mmx(halfH, src+1, halfH, 16, stride, 17);\ + put ## RND ## pixels16_l2_ ## MMX(halfH, src+1, halfH, 16, stride, 17);\ put ## RND ## mpeg4_qpel16_v_lowpass_ ## MMX(halfHV, halfH, 16, 16);\ - OPNAME ## pixels16_l2_mmx(dst, halfH+16, halfHV, stride, 16, 16);\ + OPNAME ## pixels16_l2_ ## MMX(dst, halfH+16, halfHV, stride, 16, 16);\ }\ static void OPNAME ## qpel16_mc21_ ## MMX(uint8_t *dst, uint8_t *src, int stride){\ uint64_t half[16*2 + 17*2];\ @@ -1931,7 +2254,7 @@ static void OPNAME ## qpel16_mc21_ ## MMX(uint8_t *dst, uint8_t *src, int stride uint8_t * const halfHV= ((uint8_t*)half);\ put ## RND ## mpeg4_qpel16_h_lowpass_ ## MMX(halfH, src, 16, stride, 17);\ put ## RND ## mpeg4_qpel16_v_lowpass_ ## MMX(halfHV, halfH, 16, 16);\ - OPNAME ## pixels16_l2_mmx(dst, halfH, halfHV, stride, 16, 16);\ + OPNAME ## pixels16_l2_ ## MMX(dst, halfH, halfHV, stride, 16, 16);\ }\ static void OPNAME ## qpel16_mc23_ ## MMX(uint8_t *dst, uint8_t *src, int stride){\ uint64_t half[16*2 + 17*2];\ @@ -1939,20 +2262,20 @@ static void OPNAME ## qpel16_mc23_ ## MMX(uint8_t *dst, uint8_t *src, int stride uint8_t * const halfHV= ((uint8_t*)half);\ put ## RND ## mpeg4_qpel16_h_lowpass_ ## MMX(halfH, src, 16, stride, 17);\ put ## RND ## mpeg4_qpel16_v_lowpass_ ## MMX(halfHV, halfH, 16, 16);\ - OPNAME ## pixels16_l2_mmx(dst, halfH+16, halfHV, stride, 16, 16);\ + OPNAME ## pixels16_l2_ ## MMX(dst, halfH+16, halfHV, stride, 16, 16);\ }\ static void OPNAME ## qpel16_mc12_ ## MMX(uint8_t *dst, uint8_t *src, int stride){\ uint64_t half[17*2];\ uint8_t * const halfH= ((uint8_t*)half);\ put ## RND ## mpeg4_qpel16_h_lowpass_ ## MMX(halfH, src, 16, stride, 17);\ - put ## RND ## pixels16_l2_mmx(halfH, src, halfH, 16, stride, 17);\ + put ## RND ## pixels16_l2_ ## MMX(halfH, src, halfH, 16, stride, 17);\ OPNAME ## mpeg4_qpel16_v_lowpass_ ## MMX(dst, halfH, stride, 16);\ }\ static void OPNAME ## qpel16_mc32_ ## MMX(uint8_t *dst, uint8_t *src, int stride){\ uint64_t half[17*2];\ uint8_t * const halfH= ((uint8_t*)half);\ put ## RND ## mpeg4_qpel16_h_lowpass_ ## MMX(halfH, src, 16, stride, 17);\ - put ## RND ## pixels16_l2_mmx(halfH, src+1, halfH, 16, stride, 17);\ + put ## RND ## pixels16_l2_ ## MMX(halfH, src+1, halfH, 16, stride, 17);\ OPNAME ## mpeg4_qpel16_v_lowpass_ ## MMX(dst, halfH, stride, 16);\ }\ static void OPNAME ## qpel16_mc22_ ## MMX(uint8_t *dst, uint8_t *src, int stride){\ @@ -1962,6 +2285,499 @@ static void OPNAME ## qpel16_mc22_ ## MMX(uint8_t *dst, uint8_t *src, int stride OPNAME ## mpeg4_qpel16_v_lowpass_ ## MMX(dst, halfH, stride, 16);\ } +#define QPEL_H264V(A,B,C,D,E,F,OP)\ + "movd (%0), "#F" \n\t"\ + "movq "#C", %%mm6 \n\t"\ + "paddw "#D", %%mm6 \n\t"\ + "psllw $2, %%mm6 \n\t"\ + "psubw "#B", %%mm6 \n\t"\ + "psubw "#E", %%mm6 \n\t"\ + "pmullw %4, %%mm6 \n\t"\ + "add %2, %0 \n\t"\ + "punpcklbw %%mm7, "#F" \n\t"\ + "paddw %5, "#A" \n\t"\ + "paddw "#F", "#A" \n\t"\ + "paddw "#A", %%mm6 \n\t"\ + "psraw $5, %%mm6 \n\t"\ + "packuswb %%mm6, %%mm6 \n\t"\ + OP(%%mm6, (%1), A, d)\ + "add %3, %1 \n\t" + +#define QPEL_H264HV(A,B,C,D,E,F,OF)\ + "movd (%0), "#F" \n\t"\ + "movq "#C", %%mm6 \n\t"\ + "paddw "#D", %%mm6 \n\t"\ + "psllw $2, %%mm6 \n\t"\ + "psubw "#B", %%mm6 \n\t"\ + "psubw "#E", %%mm6 \n\t"\ + "pmullw %3, %%mm6 \n\t"\ + "add %2, %0 \n\t"\ + "punpcklbw %%mm7, "#F" \n\t"\ + "paddw "#F", "#A" \n\t"\ + "paddw "#A", %%mm6 \n\t"\ + "movq %%mm6, "#OF"(%1) \n\t" + +#define QPEL_H264(OPNAME, OP, MMX)\ +static void OPNAME ## h264_qpel4_h_lowpass_ ## MMX(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\ + int h=4;\ +\ + asm volatile(\ + "pxor %%mm7, %%mm7 \n\t"\ + "movq %5, %%mm4 \n\t"\ + "movq %6, %%mm5 \n\t"\ + "1: \n\t"\ + "movd -1(%0), %%mm1 \n\t"\ + "movd (%0), %%mm2 \n\t"\ + "movd 1(%0), %%mm3 \n\t"\ + "movd 2(%0), %%mm0 \n\t"\ + "punpcklbw %%mm7, %%mm1 \n\t"\ + "punpcklbw %%mm7, %%mm2 \n\t"\ + "punpcklbw %%mm7, %%mm3 \n\t"\ + "punpcklbw %%mm7, %%mm0 \n\t"\ + "paddw %%mm0, %%mm1 \n\t"\ + "paddw %%mm3, %%mm2 \n\t"\ + "movd -2(%0), %%mm0 \n\t"\ + "movd 3(%0), %%mm3 \n\t"\ + "punpcklbw %%mm7, %%mm0 \n\t"\ + "punpcklbw %%mm7, %%mm3 \n\t"\ + "paddw %%mm3, %%mm0 \n\t"\ + "psllw $2, %%mm2 \n\t"\ + "psubw %%mm1, %%mm2 \n\t"\ + "pmullw %%mm4, %%mm2 \n\t"\ + "paddw %%mm5, %%mm0 \n\t"\ + "paddw %%mm2, %%mm0 \n\t"\ + "psraw $5, %%mm0 \n\t"\ + "packuswb %%mm0, %%mm0 \n\t"\ + OP(%%mm0, (%1),%%mm6, d)\ + "add %3, %0 \n\t"\ + "add %4, %1 \n\t"\ + "decl %2 \n\t"\ + " jnz 1b \n\t"\ + : "+a"(src), "+c"(dst), "+m"(h)\ + : "d"((long)srcStride), "S"((long)dstStride), "m"(ff_pw_5), "m"(ff_pw_16)\ + : "memory"\ + );\ +}\ +static void OPNAME ## h264_qpel4_v_lowpass_ ## MMX(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\ + src -= 2*srcStride;\ + asm volatile(\ + "pxor %%mm7, %%mm7 \n\t"\ + "movd (%0), %%mm0 \n\t"\ + "add %2, %0 \n\t"\ + "movd (%0), %%mm1 \n\t"\ + "add %2, %0 \n\t"\ + "movd (%0), %%mm2 \n\t"\ + "add %2, %0 \n\t"\ + "movd (%0), %%mm3 \n\t"\ + "add %2, %0 \n\t"\ + "movd (%0), %%mm4 \n\t"\ + "add %2, %0 \n\t"\ + "punpcklbw %%mm7, %%mm0 \n\t"\ + "punpcklbw %%mm7, %%mm1 \n\t"\ + "punpcklbw %%mm7, %%mm2 \n\t"\ + "punpcklbw %%mm7, %%mm3 \n\t"\ + "punpcklbw %%mm7, %%mm4 \n\t"\ + QPEL_H264V(%%mm0, %%mm1, %%mm2, %%mm3, %%mm4, %%mm5, OP)\ + QPEL_H264V(%%mm1, %%mm2, %%mm3, %%mm4, %%mm5, %%mm0, OP)\ + QPEL_H264V(%%mm2, %%mm3, %%mm4, %%mm5, %%mm0, %%mm1, OP)\ + QPEL_H264V(%%mm3, %%mm4, %%mm5, %%mm0, %%mm1, %%mm2, OP)\ + \ + : "+a"(src), "+c"(dst)\ + : "S"((long)srcStride), "D"((long)dstStride), "m"(ff_pw_5), "m"(ff_pw_16)\ + : "memory"\ + );\ +}\ +static void OPNAME ## h264_qpel4_hv_lowpass_ ## MMX(uint8_t *dst, int16_t *tmp, uint8_t *src, int dstStride, int tmpStride, int srcStride){\ + int h=4;\ + int w=3;\ + src -= 2*srcStride+2;\ + while(w--){\ + asm volatile(\ + "pxor %%mm7, %%mm7 \n\t"\ + "movd (%0), %%mm0 \n\t"\ + "add %2, %0 \n\t"\ + "movd (%0), %%mm1 \n\t"\ + "add %2, %0 \n\t"\ + "movd (%0), %%mm2 \n\t"\ + "add %2, %0 \n\t"\ + "movd (%0), %%mm3 \n\t"\ + "add %2, %0 \n\t"\ + "movd (%0), %%mm4 \n\t"\ + "add %2, %0 \n\t"\ + "punpcklbw %%mm7, %%mm0 \n\t"\ + "punpcklbw %%mm7, %%mm1 \n\t"\ + "punpcklbw %%mm7, %%mm2 \n\t"\ + "punpcklbw %%mm7, %%mm3 \n\t"\ + "punpcklbw %%mm7, %%mm4 \n\t"\ + QPEL_H264HV(%%mm0, %%mm1, %%mm2, %%mm3, %%mm4, %%mm5, 0*8*3)\ + QPEL_H264HV(%%mm1, %%mm2, %%mm3, %%mm4, %%mm5, %%mm0, 1*8*3)\ + QPEL_H264HV(%%mm2, %%mm3, %%mm4, %%mm5, %%mm0, %%mm1, 2*8*3)\ + QPEL_H264HV(%%mm3, %%mm4, %%mm5, %%mm0, %%mm1, %%mm2, 3*8*3)\ + \ + : "+a"(src)\ + : "c"(tmp), "S"((long)srcStride), "m"(ff_pw_5)\ + : "memory"\ + );\ + tmp += 4;\ + src += 4 - 9*srcStride;\ + }\ + tmp -= 3*4;\ + asm volatile(\ + "movq %4, %%mm6 \n\t"\ + "1: \n\t"\ + "movq (%0), %%mm0 \n\t"\ + "paddw 10(%0), %%mm0 \n\t"\ + "movq 2(%0), %%mm1 \n\t"\ + "paddw 8(%0), %%mm1 \n\t"\ + "movq 4(%0), %%mm2 \n\t"\ + "paddw 6(%0), %%mm2 \n\t"\ + "psubw %%mm1, %%mm0 \n\t"/*a-b (abccba)*/\ + "psraw $2, %%mm0 \n\t"/*(a-b)/4 */\ + "psubw %%mm1, %%mm0 \n\t"/*(a-b)/4-b */\ + "paddsw %%mm2, %%mm0 \n\t"\ + "psraw $2, %%mm0 \n\t"/*((a-b)/4-b)/4 */\ + "paddw %%mm6, %%mm2 \n\t"\ + "paddw %%mm2, %%mm0 \n\t"\ + "psraw $6, %%mm0 \n\t"\ + "packuswb %%mm0, %%mm0 \n\t"\ + OP(%%mm0, (%1),%%mm7, d)\ + "add $24, %0 \n\t"\ + "add %3, %1 \n\t"\ + "decl %2 \n\t"\ + " jnz 1b \n\t"\ + : "+a"(tmp), "+c"(dst), "+m"(h)\ + : "S"((long)dstStride), "m"(ff_pw_32)\ + : "memory"\ + );\ +}\ +\ +static void OPNAME ## h264_qpel8_h_lowpass_ ## MMX(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\ + int h=8;\ + asm volatile(\ + "pxor %%mm7, %%mm7 \n\t"\ + "movq %5, %%mm6 \n\t"\ + "1: \n\t"\ + "movq (%0), %%mm0 \n\t"\ + "movq 1(%0), %%mm2 \n\t"\ + "movq %%mm0, %%mm1 \n\t"\ + "movq %%mm2, %%mm3 \n\t"\ + "punpcklbw %%mm7, %%mm0 \n\t"\ + "punpckhbw %%mm7, %%mm1 \n\t"\ + "punpcklbw %%mm7, %%mm2 \n\t"\ + "punpckhbw %%mm7, %%mm3 \n\t"\ + "paddw %%mm2, %%mm0 \n\t"\ + "paddw %%mm3, %%mm1 \n\t"\ + "psllw $2, %%mm0 \n\t"\ + "psllw $2, %%mm1 \n\t"\ + "movq -1(%0), %%mm2 \n\t"\ + "movq 2(%0), %%mm4 \n\t"\ + "movq %%mm2, %%mm3 \n\t"\ + "movq %%mm4, %%mm5 \n\t"\ + "punpcklbw %%mm7, %%mm2 \n\t"\ + "punpckhbw %%mm7, %%mm3 \n\t"\ + "punpcklbw %%mm7, %%mm4 \n\t"\ + "punpckhbw %%mm7, %%mm5 \n\t"\ + "paddw %%mm4, %%mm2 \n\t"\ + "paddw %%mm3, %%mm5 \n\t"\ + "psubw %%mm2, %%mm0 \n\t"\ + "psubw %%mm5, %%mm1 \n\t"\ + "pmullw %%mm6, %%mm0 \n\t"\ + "pmullw %%mm6, %%mm1 \n\t"\ + "movd -2(%0), %%mm2 \n\t"\ + "movd 7(%0), %%mm5 \n\t"\ + "punpcklbw %%mm7, %%mm2 \n\t"\ + "punpcklbw %%mm7, %%mm5 \n\t"\ + "paddw %%mm3, %%mm2 \n\t"\ + "paddw %%mm5, %%mm4 \n\t"\ + "movq %6, %%mm5 \n\t"\ + "paddw %%mm5, %%mm2 \n\t"\ + "paddw %%mm5, %%mm4 \n\t"\ + "paddw %%mm2, %%mm0 \n\t"\ + "paddw %%mm4, %%mm1 \n\t"\ + "psraw $5, %%mm0 \n\t"\ + "psraw $5, %%mm1 \n\t"\ + "packuswb %%mm1, %%mm0 \n\t"\ + OP(%%mm0, (%1),%%mm5, q)\ + "add %3, %0 \n\t"\ + "add %4, %1 \n\t"\ + "decl %2 \n\t"\ + " jnz 1b \n\t"\ + : "+a"(src), "+c"(dst), "+m"(h)\ + : "d"((long)srcStride), "S"((long)dstStride), "m"(ff_pw_5), "m"(ff_pw_16)\ + : "memory"\ + );\ +}\ +\ +static void OPNAME ## h264_qpel8_v_lowpass_ ## MMX(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\ + int h= 2;\ + src -= 2*srcStride;\ + \ + while(h--){\ + asm volatile(\ + "pxor %%mm7, %%mm7 \n\t"\ + "movd (%0), %%mm0 \n\t"\ + "add %2, %0 \n\t"\ + "movd (%0), %%mm1 \n\t"\ + "add %2, %0 \n\t"\ + "movd (%0), %%mm2 \n\t"\ + "add %2, %0 \n\t"\ + "movd (%0), %%mm3 \n\t"\ + "add %2, %0 \n\t"\ + "movd (%0), %%mm4 \n\t"\ + "add %2, %0 \n\t"\ + "punpcklbw %%mm7, %%mm0 \n\t"\ + "punpcklbw %%mm7, %%mm1 \n\t"\ + "punpcklbw %%mm7, %%mm2 \n\t"\ + "punpcklbw %%mm7, %%mm3 \n\t"\ + "punpcklbw %%mm7, %%mm4 \n\t"\ + QPEL_H264V(%%mm0, %%mm1, %%mm2, %%mm3, %%mm4, %%mm5, OP)\ + QPEL_H264V(%%mm1, %%mm2, %%mm3, %%mm4, %%mm5, %%mm0, OP)\ + QPEL_H264V(%%mm2, %%mm3, %%mm4, %%mm5, %%mm0, %%mm1, OP)\ + QPEL_H264V(%%mm3, %%mm4, %%mm5, %%mm0, %%mm1, %%mm2, OP)\ + QPEL_H264V(%%mm4, %%mm5, %%mm0, %%mm1, %%mm2, %%mm3, OP)\ + QPEL_H264V(%%mm5, %%mm0, %%mm1, %%mm2, %%mm3, %%mm4, OP)\ + QPEL_H264V(%%mm0, %%mm1, %%mm2, %%mm3, %%mm4, %%mm5, OP)\ + QPEL_H264V(%%mm1, %%mm2, %%mm3, %%mm4, %%mm5, %%mm0, OP)\ + \ + : "+a"(src), "+c"(dst)\ + : "S"((long)srcStride), "D"((long)dstStride), "m"(ff_pw_5), "m"(ff_pw_16)\ + : "memory"\ + );\ + src += 4-13*srcStride;\ + dst += 4-8*dstStride;\ + }\ +}\ +static void OPNAME ## h264_qpel8_hv_lowpass_ ## MMX(uint8_t *dst, int16_t *tmp, uint8_t *src, int dstStride, int tmpStride, int srcStride){\ + int h=8;\ + int w=4;\ + src -= 2*srcStride+2;\ + while(w--){\ + asm volatile(\ + "pxor %%mm7, %%mm7 \n\t"\ + "movd (%0), %%mm0 \n\t"\ + "add %2, %0 \n\t"\ + "movd (%0), %%mm1 \n\t"\ + "add %2, %0 \n\t"\ + "movd (%0), %%mm2 \n\t"\ + "add %2, %0 \n\t"\ + "movd (%0), %%mm3 \n\t"\ + "add %2, %0 \n\t"\ + "movd (%0), %%mm4 \n\t"\ + "add %2, %0 \n\t"\ + "punpcklbw %%mm7, %%mm0 \n\t"\ + "punpcklbw %%mm7, %%mm1 \n\t"\ + "punpcklbw %%mm7, %%mm2 \n\t"\ + "punpcklbw %%mm7, %%mm3 \n\t"\ + "punpcklbw %%mm7, %%mm4 \n\t"\ + QPEL_H264HV(%%mm0, %%mm1, %%mm2, %%mm3, %%mm4, %%mm5, 0*8*4)\ + QPEL_H264HV(%%mm1, %%mm2, %%mm3, %%mm4, %%mm5, %%mm0, 1*8*4)\ + QPEL_H264HV(%%mm2, %%mm3, %%mm4, %%mm5, %%mm0, %%mm1, 2*8*4)\ + QPEL_H264HV(%%mm3, %%mm4, %%mm5, %%mm0, %%mm1, %%mm2, 3*8*4)\ + QPEL_H264HV(%%mm4, %%mm5, %%mm0, %%mm1, %%mm2, %%mm3, 4*8*4)\ + QPEL_H264HV(%%mm5, %%mm0, %%mm1, %%mm2, %%mm3, %%mm4, 5*8*4)\ + QPEL_H264HV(%%mm0, %%mm1, %%mm2, %%mm3, %%mm4, %%mm5, 6*8*4)\ + QPEL_H264HV(%%mm1, %%mm2, %%mm3, %%mm4, %%mm5, %%mm0, 7*8*4)\ + \ + : "+a"(src)\ + : "c"(tmp), "S"((long)srcStride), "m"(ff_pw_5)\ + : "memory"\ + );\ + tmp += 4;\ + src += 4 - 13*srcStride;\ + }\ + tmp -= 4*4;\ + asm volatile(\ + "movq %4, %%mm6 \n\t"\ + "1: \n\t"\ + "movq (%0), %%mm0 \n\t"\ + "movq 8(%0), %%mm3 \n\t"\ + "movq 2(%0), %%mm1 \n\t"\ + "movq 10(%0), %%mm4 \n\t"\ + "paddw %%mm4, %%mm0 \n\t"\ + "paddw %%mm3, %%mm1 \n\t"\ + "paddw 18(%0), %%mm3 \n\t"\ + "paddw 16(%0), %%mm4 \n\t"\ + "movq 4(%0), %%mm2 \n\t"\ + "movq 12(%0), %%mm5 \n\t"\ + "paddw 6(%0), %%mm2 \n\t"\ + "paddw 14(%0), %%mm5 \n\t"\ + "psubw %%mm1, %%mm0 \n\t"\ + "psubw %%mm4, %%mm3 \n\t"\ + "psraw $2, %%mm0 \n\t"\ + "psraw $2, %%mm3 \n\t"\ + "psubw %%mm1, %%mm0 \n\t"\ + "psubw %%mm4, %%mm3 \n\t"\ + "paddsw %%mm2, %%mm0 \n\t"\ + "paddsw %%mm5, %%mm3 \n\t"\ + "psraw $2, %%mm0 \n\t"\ + "psraw $2, %%mm3 \n\t"\ + "paddw %%mm6, %%mm2 \n\t"\ + "paddw %%mm6, %%mm5 \n\t"\ + "paddw %%mm2, %%mm0 \n\t"\ + "paddw %%mm5, %%mm3 \n\t"\ + "psraw $6, %%mm0 \n\t"\ + "psraw $6, %%mm3 \n\t"\ + "packuswb %%mm3, %%mm0 \n\t"\ + OP(%%mm0, (%1),%%mm7, q)\ + "add $32, %0 \n\t"\ + "add %3, %1 \n\t"\ + "decl %2 \n\t"\ + " jnz 1b \n\t"\ + : "+a"(tmp), "+c"(dst), "+m"(h)\ + : "S"((long)dstStride), "m"(ff_pw_32)\ + : "memory"\ + );\ +}\ +static void OPNAME ## h264_qpel16_v_lowpass_ ## MMX(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\ + OPNAME ## h264_qpel8_v_lowpass_ ## MMX(dst , src , dstStride, srcStride);\ + OPNAME ## h264_qpel8_v_lowpass_ ## MMX(dst+8, src+8, dstStride, srcStride);\ + src += 8*srcStride;\ + dst += 8*dstStride;\ + OPNAME ## h264_qpel8_v_lowpass_ ## MMX(dst , src , dstStride, srcStride);\ + OPNAME ## h264_qpel8_v_lowpass_ ## MMX(dst+8, src+8, dstStride, srcStride);\ +}\ +\ +static void OPNAME ## h264_qpel16_h_lowpass_ ## MMX(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\ + OPNAME ## h264_qpel8_h_lowpass_ ## MMX(dst , src , dstStride, srcStride);\ + OPNAME ## h264_qpel8_h_lowpass_ ## MMX(dst+8, src+8, dstStride, srcStride);\ + src += 8*srcStride;\ + dst += 8*dstStride;\ + OPNAME ## h264_qpel8_h_lowpass_ ## MMX(dst , src , dstStride, srcStride);\ + OPNAME ## h264_qpel8_h_lowpass_ ## MMX(dst+8, src+8, dstStride, srcStride);\ +}\ +\ +static void OPNAME ## h264_qpel16_hv_lowpass_ ## MMX(uint8_t *dst, int16_t *tmp, uint8_t *src, int dstStride, int tmpStride, int srcStride){\ + OPNAME ## h264_qpel8_hv_lowpass_ ## MMX(dst , tmp , src , dstStride, tmpStride, srcStride);\ + OPNAME ## h264_qpel8_hv_lowpass_ ## MMX(dst+8, tmp , src+8, dstStride, tmpStride, srcStride);\ + src += 8*srcStride;\ + dst += 8*dstStride;\ + OPNAME ## h264_qpel8_hv_lowpass_ ## MMX(dst , tmp , src , dstStride, tmpStride, srcStride);\ + OPNAME ## h264_qpel8_hv_lowpass_ ## MMX(dst+8, tmp , src+8, dstStride, tmpStride, srcStride);\ +}\ + +#define H264_MC(OPNAME, SIZE, MMX) \ +static void OPNAME ## h264_qpel ## SIZE ## _mc00_ ## MMX (uint8_t *dst, uint8_t *src, int stride){\ + OPNAME ## pixels ## SIZE ## _mmx(dst, src, stride, SIZE);\ +}\ +\ +static void OPNAME ## h264_qpel ## SIZE ## _mc10_ ## MMX(uint8_t *dst, uint8_t *src, int stride){\ + uint64_t temp[SIZE*SIZE/8];\ + uint8_t * const half= (uint8_t*)temp;\ + put_h264_qpel ## SIZE ## _h_lowpass_ ## MMX(half, src, SIZE, stride);\ + OPNAME ## pixels ## SIZE ## _l2_ ## MMX(dst, src, half, stride, stride, SIZE);\ +}\ +\ +static void OPNAME ## h264_qpel ## SIZE ## _mc20_ ## MMX(uint8_t *dst, uint8_t *src, int stride){\ + OPNAME ## h264_qpel ## SIZE ## _h_lowpass_ ## MMX(dst, src, stride, stride);\ +}\ +\ +static void OPNAME ## h264_qpel ## SIZE ## _mc30_ ## MMX(uint8_t *dst, uint8_t *src, int stride){\ + uint64_t temp[SIZE*SIZE/8];\ + uint8_t * const half= (uint8_t*)temp;\ + put_h264_qpel ## SIZE ## _h_lowpass_ ## MMX(half, src, SIZE, stride);\ + OPNAME ## pixels ## SIZE ## _l2_ ## MMX(dst, src+1, half, stride, stride, SIZE);\ +}\ +\ +static void OPNAME ## h264_qpel ## SIZE ## _mc01_ ## MMX(uint8_t *dst, uint8_t *src, int stride){\ + uint64_t temp[SIZE*SIZE/8];\ + uint8_t * const half= (uint8_t*)temp;\ + put_h264_qpel ## SIZE ## _v_lowpass_ ## MMX(half, src, SIZE, stride);\ + OPNAME ## pixels ## SIZE ## _l2_ ## MMX(dst, src, half, stride, stride, SIZE);\ +}\ +\ +static void OPNAME ## h264_qpel ## SIZE ## _mc02_ ## MMX(uint8_t *dst, uint8_t *src, int stride){\ + OPNAME ## h264_qpel ## SIZE ## _v_lowpass_ ## MMX(dst, src, stride, stride);\ +}\ +\ +static void OPNAME ## h264_qpel ## SIZE ## _mc03_ ## MMX(uint8_t *dst, uint8_t *src, int stride){\ + uint64_t temp[SIZE*SIZE/8];\ + uint8_t * const half= (uint8_t*)temp;\ + put_h264_qpel ## SIZE ## _v_lowpass_ ## MMX(half, src, SIZE, stride);\ + OPNAME ## pixels ## SIZE ## _l2_ ## MMX(dst, src+stride, half, stride, stride, SIZE);\ +}\ +\ +static void OPNAME ## h264_qpel ## SIZE ## _mc11_ ## MMX(uint8_t *dst, uint8_t *src, int stride){\ + uint64_t temp[SIZE*SIZE/4];\ + uint8_t * const halfH= (uint8_t*)temp;\ + uint8_t * const halfV= ((uint8_t*)temp) + SIZE*SIZE;\ + put_h264_qpel ## SIZE ## _h_lowpass_ ## MMX(halfH, src, SIZE, stride);\ + put_h264_qpel ## SIZE ## _v_lowpass_ ## MMX(halfV, src, SIZE, stride);\ + OPNAME ## pixels ## SIZE ## _l2_ ## MMX(dst, halfH, halfV, stride, SIZE, SIZE);\ +}\ +\ +static void OPNAME ## h264_qpel ## SIZE ## _mc31_ ## MMX(uint8_t *dst, uint8_t *src, int stride){\ + uint64_t temp[SIZE*SIZE/4];\ + uint8_t * const halfH= (uint8_t*)temp;\ + uint8_t * const halfV= ((uint8_t*)temp) + SIZE*SIZE;\ + put_h264_qpel ## SIZE ## _h_lowpass_ ## MMX(halfH, src, SIZE, stride);\ + put_h264_qpel ## SIZE ## _v_lowpass_ ## MMX(halfV, src+1, SIZE, stride);\ + OPNAME ## pixels ## SIZE ## _l2_ ## MMX(dst, halfH, halfV, stride, SIZE, SIZE);\ +}\ +\ +static void OPNAME ## h264_qpel ## SIZE ## _mc13_ ## MMX(uint8_t *dst, uint8_t *src, int stride){\ + uint64_t temp[SIZE*SIZE/4];\ + uint8_t * const halfH= (uint8_t*)temp;\ + uint8_t * const halfV= ((uint8_t*)temp) + SIZE*SIZE;\ + put_h264_qpel ## SIZE ## _h_lowpass_ ## MMX(halfH, src + stride, SIZE, stride);\ + put_h264_qpel ## SIZE ## _v_lowpass_ ## MMX(halfV, src, SIZE, stride);\ + OPNAME ## pixels ## SIZE ## _l2_ ## MMX(dst, halfH, halfV, stride, SIZE, SIZE);\ +}\ +\ +static void OPNAME ## h264_qpel ## SIZE ## _mc33_ ## MMX(uint8_t *dst, uint8_t *src, int stride){\ + uint64_t temp[SIZE*SIZE/4];\ + uint8_t * const halfH= (uint8_t*)temp;\ + uint8_t * const halfV= ((uint8_t*)temp) + SIZE*SIZE;\ + put_h264_qpel ## SIZE ## _h_lowpass_ ## MMX(halfH, src + stride, SIZE, stride);\ + put_h264_qpel ## SIZE ## _v_lowpass_ ## MMX(halfV, src+1, SIZE, stride);\ + OPNAME ## pixels ## SIZE ## _l2_ ## MMX(dst, halfH, halfV, stride, SIZE, SIZE);\ +}\ +\ +static void OPNAME ## h264_qpel ## SIZE ## _mc22_ ## MMX(uint8_t *dst, uint8_t *src, int stride){\ + uint64_t temp[SIZE*(SIZE+8)/4];\ + int16_t * const tmp= (int16_t*)temp;\ + OPNAME ## h264_qpel ## SIZE ## _hv_lowpass_ ## MMX(dst, tmp, src, stride, SIZE, stride);\ +}\ +\ +static void OPNAME ## h264_qpel ## SIZE ## _mc21_ ## MMX(uint8_t *dst, uint8_t *src, int stride){\ + uint64_t temp[SIZE*(SIZE+8)/4 + SIZE*SIZE/4];\ + uint8_t * const halfH= (uint8_t*)temp;\ + uint8_t * const halfHV= ((uint8_t*)temp) + SIZE*SIZE;\ + int16_t * const tmp= ((int16_t*)temp) + SIZE*SIZE;\ + put_h264_qpel ## SIZE ## _h_lowpass_ ## MMX(halfH, src, SIZE, stride);\ + put_h264_qpel ## SIZE ## _hv_lowpass_ ## MMX(halfHV, tmp, src, SIZE, SIZE, stride);\ + OPNAME ## pixels ## SIZE ## _l2_ ## MMX(dst, halfH, halfHV, stride, SIZE, SIZE);\ +}\ +\ +static void OPNAME ## h264_qpel ## SIZE ## _mc23_ ## MMX(uint8_t *dst, uint8_t *src, int stride){\ + uint64_t temp[SIZE*(SIZE+8)/4 + SIZE*SIZE/4];\ + uint8_t * const halfH= (uint8_t*)temp;\ + uint8_t * const halfHV= ((uint8_t*)temp) + SIZE*SIZE;\ + int16_t * const tmp= ((int16_t*)temp) + SIZE*SIZE;\ + put_h264_qpel ## SIZE ## _h_lowpass_ ## MMX(halfH, src + stride, SIZE, stride);\ + put_h264_qpel ## SIZE ## _hv_lowpass_ ## MMX(halfHV, tmp, src, SIZE, SIZE, stride);\ + OPNAME ## pixels ## SIZE ## _l2_ ## MMX(dst, halfH, halfHV, stride, SIZE, SIZE);\ +}\ +\ +static void OPNAME ## h264_qpel ## SIZE ## _mc12_ ## MMX(uint8_t *dst, uint8_t *src, int stride){\ + uint64_t temp[SIZE*(SIZE+8)/4 + SIZE*SIZE/4];\ + uint8_t * const halfV= (uint8_t*)temp;\ + uint8_t * const halfHV= ((uint8_t*)temp) + SIZE*SIZE;\ + int16_t * const tmp= ((int16_t*)temp) + SIZE*SIZE;\ + put_h264_qpel ## SIZE ## _v_lowpass_ ## MMX(halfV, src, SIZE, stride);\ + put_h264_qpel ## SIZE ## _hv_lowpass_ ## MMX(halfHV, tmp, src, SIZE, SIZE, stride);\ + OPNAME ## pixels ## SIZE ## _l2_ ## MMX(dst, halfV, halfHV, stride, SIZE, SIZE);\ +}\ +\ +static void OPNAME ## h264_qpel ## SIZE ## _mc32_ ## MMX(uint8_t *dst, uint8_t *src, int stride){\ + uint64_t temp[SIZE*(SIZE+8)/4 + SIZE*SIZE/4];\ + uint8_t * const halfV= (uint8_t*)temp;\ + uint8_t * const halfHV= ((uint8_t*)temp) + SIZE*SIZE;\ + int16_t * const tmp= ((int16_t*)temp) + SIZE*SIZE;\ + put_h264_qpel ## SIZE ## _v_lowpass_ ## MMX(halfV, src+1, SIZE, stride);\ + put_h264_qpel ## SIZE ## _hv_lowpass_ ## MMX(halfHV, tmp, src, SIZE, SIZE, stride);\ + OPNAME ## pixels ## SIZE ## _l2_ ## MMX(dst, halfV, halfHV, stride, SIZE, SIZE);\ +}\ + #define PUT_OP(a,b,temp, size) "mov" #size " " #a ", " #b " \n\t" #define AVG_3DNOW_OP(a,b,temp, size) \ @@ -1983,6 +2799,24 @@ QPEL_OP(put_ , ff_pw_16, _ , PUT_OP, mmx2) QPEL_OP(avg_ , ff_pw_16, _ , AVG_MMX2_OP, mmx2) QPEL_OP(put_no_rnd_, ff_pw_15, _no_rnd_, PUT_OP, mmx2) +QPEL_H264(put_ , PUT_OP, 3dnow) +QPEL_H264(avg_ , AVG_3DNOW_OP, 3dnow) +QPEL_H264(put_ , PUT_OP, mmx2) +QPEL_H264(avg_ , AVG_MMX2_OP, mmx2) + +H264_MC(put_, 4, 3dnow) +H264_MC(put_, 8, 3dnow) +H264_MC(put_, 16,3dnow) +H264_MC(avg_, 4, 3dnow) +H264_MC(avg_, 8, 3dnow) +H264_MC(avg_, 16,3dnow) +H264_MC(put_, 4, mmx2) +H264_MC(put_, 8, mmx2) +H264_MC(put_, 16,mmx2) +H264_MC(avg_, 4, mmx2) +H264_MC(avg_, 8, mmx2) +H264_MC(avg_, 16,mmx2) + #if 0 static void just_return() { return; } #endif @@ -1993,7 +2827,7 @@ static void just_return() { return; } c->avg_ ## postfix1 = avg_ ## postfix2; static int try_8x8basis_mmx(int16_t rem[64], int16_t weight[64], int16_t basis[64], int scale){ - int i=0; + long i=0; assert(ABS(scale) < 256); scale<<= 16 + 1 - BASIS_SHIFT + RECON_SHIFT; @@ -2025,8 +2859,8 @@ static int try_8x8basis_mmx(int16_t rem[64], int16_t weight[64], int16_t basis[6 "paddd %%mm1, %%mm0 \n\t" "psrld $4, %%mm0 \n\t" "paddd %%mm0, %%mm7 \n\t" - "addl $16, %0 \n\t" - "cmpl $128, %0 \n\t" //FIXME optimize & bench + "add $16, %0 \n\t" + "cmp $128, %0 \n\t" //FIXME optimize & bench " jb 1b \n\t" "movq %%mm7, %%mm6 \n\t" "psrlq $32, %%mm7 \n\t" @@ -2041,7 +2875,7 @@ static int try_8x8basis_mmx(int16_t rem[64], int16_t weight[64], int16_t basis[6 } static void add_8x8basis_mmx(int16_t rem[64], int16_t basis[64], int scale){ - int i=0; + long i=0; if(ABS(scale) < 256){ scale<<= 16 + 1 - BASIS_SHIFT + RECON_SHIFT; @@ -2064,8 +2898,8 @@ static void add_8x8basis_mmx(int16_t rem[64], int16_t basis[64], int scale){ "paddw 8(%2, %0), %%mm1 \n\t" "movq %%mm0, (%2, %0) \n\t" "movq %%mm1, 8(%2, %0) \n\t" - "addl $16, %0 \n\t" - "cmpl $128, %0 \n\t" //FIXME optimize & bench + "add $16, %0 \n\t" + "cmp $128, %0 \n\t" //FIXME optimize & bench " jb 1b \n\t" : "+r" (i) @@ -2146,23 +2980,24 @@ void dsputil_init_mmx(DSPContext* c, AVCodecContext *avctx) } } #endif //CONFIG_ENCODERS - - if(idct_algo==FF_IDCT_AUTO || idct_algo==FF_IDCT_SIMPLEMMX){ - c->idct_put= ff_simple_idct_put_mmx; - c->idct_add= ff_simple_idct_add_mmx; - c->idct = ff_simple_idct_mmx; - c->idct_permutation_type= FF_SIMPLE_IDCT_PERM; - }else if(idct_algo==FF_IDCT_LIBMPEG2MMX){ - if(mm_flags & MM_MMXEXT){ - c->idct_put= ff_libmpeg2mmx2_idct_put; - c->idct_add= ff_libmpeg2mmx2_idct_add; - c->idct = ff_mmxext_idct; - }else{ - c->idct_put= ff_libmpeg2mmx_idct_put; - c->idct_add= ff_libmpeg2mmx_idct_add; - c->idct = ff_mmx_idct; + if(avctx->lowres==0){ + if(idct_algo==FF_IDCT_AUTO || idct_algo==FF_IDCT_SIMPLEMMX){ + c->idct_put= ff_simple_idct_put_mmx; + c->idct_add= ff_simple_idct_add_mmx; + c->idct = ff_simple_idct_mmx; + c->idct_permutation_type= FF_SIMPLE_IDCT_PERM; + }else if(idct_algo==FF_IDCT_LIBMPEG2MMX){ + if(mm_flags & MM_MMXEXT){ + c->idct_put= ff_libmpeg2mmx2_idct_put; + c->idct_add= ff_libmpeg2mmx2_idct_add; + c->idct = ff_mmxext_idct; + }else{ + c->idct_put= ff_libmpeg2mmx_idct_put; + c->idct_add= ff_libmpeg2mmx_idct_add; + c->idct = ff_mmx_idct; + } + c->idct_permutation_type= FF_LIBMPEG2_IDCT_PERM; } - c->idct_permutation_type= FF_LIBMPEG2_IDCT_PERM; } /* VP3 optimized DSP functions */ @@ -2235,8 +3070,11 @@ void dsputil_init_mmx(DSPContext* c, AVCodecContext *avctx) c->pix_norm1 = pix_norm1_mmx; c->sse[0] = sse16_mmx; + c->sse[1] = sse8_mmx; c->vsad[4]= vsad_intra16_mmx; + c->nsse[0] = nsse16_mmx; + c->nsse[1] = nsse8_mmx; if(!(avctx->flags & CODEC_FLAG_BITEXACT)){ c->vsad[0] = vsad16_mmx; } @@ -2319,6 +3157,33 @@ void dsputil_init_mmx(DSPContext* c, AVCodecContext *avctx) SET_QPEL_FUNC(qpel_pixels_tab[1][15], qpel8_mc33_mmx2) #endif +//FIXME 3dnow too +#define dspfunc(PFX, IDX, NUM) \ + c->PFX ## _pixels_tab[IDX][ 0] = PFX ## NUM ## _mc00_mmx2; \ + c->PFX ## _pixels_tab[IDX][ 1] = PFX ## NUM ## _mc10_mmx2; \ + c->PFX ## _pixels_tab[IDX][ 2] = PFX ## NUM ## _mc20_mmx2; \ + c->PFX ## _pixels_tab[IDX][ 3] = PFX ## NUM ## _mc30_mmx2; \ + c->PFX ## _pixels_tab[IDX][ 4] = PFX ## NUM ## _mc01_mmx2; \ + c->PFX ## _pixels_tab[IDX][ 5] = PFX ## NUM ## _mc11_mmx2; \ + c->PFX ## _pixels_tab[IDX][ 6] = PFX ## NUM ## _mc21_mmx2; \ + c->PFX ## _pixels_tab[IDX][ 7] = PFX ## NUM ## _mc31_mmx2; \ + c->PFX ## _pixels_tab[IDX][ 8] = PFX ## NUM ## _mc02_mmx2; \ + c->PFX ## _pixels_tab[IDX][ 9] = PFX ## NUM ## _mc12_mmx2; \ + c->PFX ## _pixels_tab[IDX][10] = PFX ## NUM ## _mc22_mmx2; \ + c->PFX ## _pixels_tab[IDX][11] = PFX ## NUM ## _mc32_mmx2; \ + c->PFX ## _pixels_tab[IDX][12] = PFX ## NUM ## _mc03_mmx2; \ + c->PFX ## _pixels_tab[IDX][13] = PFX ## NUM ## _mc13_mmx2; \ + c->PFX ## _pixels_tab[IDX][14] = PFX ## NUM ## _mc23_mmx2; \ + c->PFX ## _pixels_tab[IDX][15] = PFX ## NUM ## _mc33_mmx2 + + dspfunc(put_h264_qpel, 0, 16); + dspfunc(put_h264_qpel, 1, 8); + dspfunc(put_h264_qpel, 2, 4); + dspfunc(avg_h264_qpel, 0, 16); + dspfunc(avg_h264_qpel, 1, 8); + dspfunc(avg_h264_qpel, 2, 4); +#undef dspfunc + #ifdef CONFIG_ENCODERS c->sub_hfyu_median_prediction= sub_hfyu_median_prediction_mmx2; #endif //CONFIG_ENCODERS @@ -2378,6 +3243,31 @@ void dsputil_init_mmx(DSPContext* c, AVCodecContext *avctx) SET_QPEL_FUNC(qpel_pixels_tab[1][13], qpel8_mc13_3dnow) SET_QPEL_FUNC(qpel_pixels_tab[1][14], qpel8_mc23_3dnow) SET_QPEL_FUNC(qpel_pixels_tab[1][15], qpel8_mc33_3dnow) + +#define dspfunc(PFX, IDX, NUM) \ + c->PFX ## _pixels_tab[IDX][ 0] = PFX ## NUM ## _mc00_3dnow; \ + c->PFX ## _pixels_tab[IDX][ 1] = PFX ## NUM ## _mc10_3dnow; \ + c->PFX ## _pixels_tab[IDX][ 2] = PFX ## NUM ## _mc20_3dnow; \ + c->PFX ## _pixels_tab[IDX][ 3] = PFX ## NUM ## _mc30_3dnow; \ + c->PFX ## _pixels_tab[IDX][ 4] = PFX ## NUM ## _mc01_3dnow; \ + c->PFX ## _pixels_tab[IDX][ 5] = PFX ## NUM ## _mc11_3dnow; \ + c->PFX ## _pixels_tab[IDX][ 6] = PFX ## NUM ## _mc21_3dnow; \ + c->PFX ## _pixels_tab[IDX][ 7] = PFX ## NUM ## _mc31_3dnow; \ + c->PFX ## _pixels_tab[IDX][ 8] = PFX ## NUM ## _mc02_3dnow; \ + c->PFX ## _pixels_tab[IDX][ 9] = PFX ## NUM ## _mc12_3dnow; \ + c->PFX ## _pixels_tab[IDX][10] = PFX ## NUM ## _mc22_3dnow; \ + c->PFX ## _pixels_tab[IDX][11] = PFX ## NUM ## _mc32_3dnow; \ + c->PFX ## _pixels_tab[IDX][12] = PFX ## NUM ## _mc03_3dnow; \ + c->PFX ## _pixels_tab[IDX][13] = PFX ## NUM ## _mc13_3dnow; \ + c->PFX ## _pixels_tab[IDX][14] = PFX ## NUM ## _mc23_3dnow; \ + c->PFX ## _pixels_tab[IDX][15] = PFX ## NUM ## _mc33_3dnow + + dspfunc(put_h264_qpel, 0, 16); + dspfunc(put_h264_qpel, 1, 8); + dspfunc(put_h264_qpel, 2, 4); + dspfunc(avg_h264_qpel, 0, 16); + dspfunc(avg_h264_qpel, 1, 8); + dspfunc(avg_h264_qpel, 2, 4); } } diff --git a/src/libffmpeg/libavcodec/i386/dsputil_mmx_avg.h b/src/libffmpeg/libavcodec/i386/dsputil_mmx_avg.h index 052aad75c..c70891304 100644 --- a/src/libffmpeg/libavcodec/i386/dsputil_mmx_avg.h +++ b/src/libffmpeg/libavcodec/i386/dsputil_mmx_avg.h @@ -28,7 +28,7 @@ static void DEF(put_pixels8_x2)(uint8_t *block, const uint8_t *pixels, int line_size, int h) { __asm __volatile( - "lea (%3, %3), %%eax \n\t" + "lea (%3, %3), %%"REG_a" \n\t" "1: \n\t" "movq (%1), %%mm0 \n\t" "movq (%1, %3), %%mm1 \n\t" @@ -36,59 +36,305 @@ static void DEF(put_pixels8_x2)(uint8_t *block, const uint8_t *pixels, int line_ PAVGB" 1(%1, %3), %%mm1 \n\t" "movq %%mm0, (%2) \n\t" "movq %%mm1, (%2, %3) \n\t" - "addl %%eax, %1 \n\t" - "addl %%eax, %2 \n\t" + "add %%"REG_a", %1 \n\t" + "add %%"REG_a", %2 \n\t" "movq (%1), %%mm0 \n\t" "movq (%1, %3), %%mm1 \n\t" PAVGB" 1(%1), %%mm0 \n\t" PAVGB" 1(%1, %3), %%mm1 \n\t" - "addl %%eax, %1 \n\t" + "add %%"REG_a", %1 \n\t" "movq %%mm0, (%2) \n\t" "movq %%mm1, (%2, %3) \n\t" - "addl %%eax, %2 \n\t" + "add %%"REG_a", %2 \n\t" "subl $4, %0 \n\t" "jnz 1b \n\t" :"+g"(h), "+S"(pixels), "+D"(block) - :"r" (line_size) - :"%eax", "memory"); + :"r" ((long)line_size) + :"%"REG_a, "memory"); } -static __attribute__((unused)) void DEF(put_pixels8_l2)(uint8_t *dst, uint8_t *src1, uint8_t *src2, int dstStride, int src1Stride, int h) +static void DEF(put_pixels4_l2)(uint8_t *dst, uint8_t *src1, uint8_t *src2, int dstStride, int src1Stride, int h) { __asm __volatile( + "testl $1, %0 \n\t" + " jz 1f \n\t" + "movd (%1), %%mm0 \n\t" + "movd (%2), %%mm1 \n\t" + "add %4, %1 \n\t" + "add $4, %2 \n\t" + PAVGB" %%mm1, %%mm0 \n\t" + "movd %%mm0, (%3) \n\t" + "add %5, %3 \n\t" + "decl %0 \n\t" + "1: \n\t" + "movd (%1), %%mm0 \n\t" + "add %4, %1 \n\t" + "movd (%1), %%mm1 \n\t" + "movd (%2), %%mm2 \n\t" + "movd 4(%2), %%mm3 \n\t" + "add %4, %1 \n\t" + PAVGB" %%mm2, %%mm0 \n\t" + PAVGB" %%mm3, %%mm1 \n\t" + "movd %%mm0, (%3) \n\t" + "add %5, %3 \n\t" + "movd %%mm1, (%3) \n\t" + "add %5, %3 \n\t" + "movd (%1), %%mm0 \n\t" + "add %4, %1 \n\t" + "movd (%1), %%mm1 \n\t" + "movd 8(%2), %%mm2 \n\t" + "movd 12(%2), %%mm3 \n\t" + "add %4, %1 \n\t" + PAVGB" %%mm2, %%mm0 \n\t" + PAVGB" %%mm3, %%mm1 \n\t" + "movd %%mm0, (%3) \n\t" + "add %5, %3 \n\t" + "movd %%mm1, (%3) \n\t" + "add %5, %3 \n\t" + "add $16, %2 \n\t" + "subl $4, %0 \n\t" + "jnz 1b \n\t" +#ifdef PIC //Note "+bm" and "+mb" are buggy too (with gcc 3.2.2 at least) and cant be used + :"+m"(h), "+a"(src1), "+c"(src2), "+d"(dst) +#else + :"+b"(h), "+a"(src1), "+c"(src2), "+d"(dst) +#endif + :"S"((long)src1Stride), "D"((long)dstStride) + :"memory"); +} + + +static void DEF(put_pixels8_l2)(uint8_t *dst, uint8_t *src1, uint8_t *src2, int dstStride, int src1Stride, int h) +{ + __asm __volatile( + "testl $1, %0 \n\t" + " jz 1f \n\t" + "movq (%1), %%mm0 \n\t" + "movq (%2), %%mm1 \n\t" + "add %4, %1 \n\t" + "add $8, %2 \n\t" + PAVGB" %%mm1, %%mm0 \n\t" + "movq %%mm0, (%3) \n\t" + "add %5, %3 \n\t" + "decl %0 \n\t" + "1: \n\t" + "movq (%1), %%mm0 \n\t" + "add %4, %1 \n\t" + "movq (%1), %%mm1 \n\t" + "add %4, %1 \n\t" + PAVGB" (%2), %%mm0 \n\t" + PAVGB" 8(%2), %%mm1 \n\t" + "movq %%mm0, (%3) \n\t" + "add %5, %3 \n\t" + "movq %%mm1, (%3) \n\t" + "add %5, %3 \n\t" + "movq (%1), %%mm0 \n\t" + "add %4, %1 \n\t" + "movq (%1), %%mm1 \n\t" + "add %4, %1 \n\t" + PAVGB" 16(%2), %%mm0 \n\t" + PAVGB" 24(%2), %%mm1 \n\t" + "movq %%mm0, (%3) \n\t" + "add %5, %3 \n\t" + "movq %%mm1, (%3) \n\t" + "add %5, %3 \n\t" + "add $32, %2 \n\t" + "subl $4, %0 \n\t" + "jnz 1b \n\t" +#ifdef PIC //Note "+bm" and "+mb" are buggy too (with gcc 3.2.2 at least) and cant be used + :"+m"(h), "+a"(src1), "+c"(src2), "+d"(dst) +#else + :"+b"(h), "+a"(src1), "+c"(src2), "+d"(dst) +#endif + :"S"((long)src1Stride), "D"((long)dstStride) + :"memory"); +//the following should be used, though better not with gcc ... +/* :"+g"(h), "+r"(src1), "+r"(src2), "+r"(dst) + :"r"(src1Stride), "r"(dstStride) + :"memory");*/ +} + +static void DEF(put_no_rnd_pixels8_l2)(uint8_t *dst, uint8_t *src1, uint8_t *src2, int dstStride, int src1Stride, int h) +{ + __asm __volatile( + "pcmpeqb %%mm6, %%mm6 \n\t" + "testl $1, %0 \n\t" + " jz 1f \n\t" + "movq (%1), %%mm0 \n\t" + "movq (%2), %%mm1 \n\t" + "add %4, %1 \n\t" + "add $8, %2 \n\t" + "pxor %%mm6, %%mm0 \n\t" + "pxor %%mm6, %%mm1 \n\t" + PAVGB" %%mm1, %%mm0 \n\t" + "pxor %%mm6, %%mm0 \n\t" + "movq %%mm0, (%3) \n\t" + "add %5, %3 \n\t" + "decl %0 \n\t" + "1: \n\t" + "movq (%1), %%mm0 \n\t" + "add %4, %1 \n\t" + "movq (%1), %%mm1 \n\t" + "add %4, %1 \n\t" + "movq (%2), %%mm2 \n\t" + "movq 8(%2), %%mm3 \n\t" + "pxor %%mm6, %%mm0 \n\t" + "pxor %%mm6, %%mm1 \n\t" + "pxor %%mm6, %%mm2 \n\t" + "pxor %%mm6, %%mm3 \n\t" + PAVGB" %%mm2, %%mm0 \n\t" + PAVGB" %%mm3, %%mm1 \n\t" + "pxor %%mm6, %%mm0 \n\t" + "pxor %%mm6, %%mm1 \n\t" + "movq %%mm0, (%3) \n\t" + "add %5, %3 \n\t" + "movq %%mm1, (%3) \n\t" + "add %5, %3 \n\t" + "movq (%1), %%mm0 \n\t" + "add %4, %1 \n\t" + "movq (%1), %%mm1 \n\t" + "add %4, %1 \n\t" + "movq 16(%2), %%mm2 \n\t" + "movq 24(%2), %%mm3 \n\t" + "pxor %%mm6, %%mm0 \n\t" + "pxor %%mm6, %%mm1 \n\t" + "pxor %%mm6, %%mm2 \n\t" + "pxor %%mm6, %%mm3 \n\t" + PAVGB" %%mm2, %%mm0 \n\t" + PAVGB" %%mm3, %%mm1 \n\t" + "pxor %%mm6, %%mm0 \n\t" + "pxor %%mm6, %%mm1 \n\t" + "movq %%mm0, (%3) \n\t" + "add %5, %3 \n\t" + "movq %%mm1, (%3) \n\t" + "add %5, %3 \n\t" + "add $32, %2 \n\t" + "subl $4, %0 \n\t" + "jnz 1b \n\t" +#ifdef PIC //Note "+bm" and "+mb" are buggy too (with gcc 3.2.2 at least) and cant be used + :"+m"(h), "+a"(src1), "+c"(src2), "+d"(dst) +#else + :"+b"(h), "+a"(src1), "+c"(src2), "+d"(dst) +#endif + :"S"((long)src1Stride), "D"((long)dstStride) + :"memory"); +//the following should be used, though better not with gcc ... +/* :"+g"(h), "+r"(src1), "+r"(src2), "+r"(dst) + :"r"(src1Stride), "r"(dstStride) + :"memory");*/ +} + +static void DEF(avg_pixels4_l2)(uint8_t *dst, uint8_t *src1, uint8_t *src2, int dstStride, int src1Stride, int h) +{ + __asm __volatile( + "testl $1, %0 \n\t" + " jz 1f \n\t" + "movd (%1), %%mm0 \n\t" + "movd (%2), %%mm1 \n\t" + "add %4, %1 \n\t" + "add $4, %2 \n\t" + PAVGB" %%mm1, %%mm0 \n\t" + PAVGB" (%3), %%mm0 \n\t" + "movd %%mm0, (%3) \n\t" + "add %5, %3 \n\t" + "decl %0 \n\t" + "1: \n\t" + "movd (%1), %%mm0 \n\t" + "add %4, %1 \n\t" + "movd (%1), %%mm1 \n\t" + "add %4, %1 \n\t" + PAVGB" (%2), %%mm0 \n\t" + PAVGB" 4(%2), %%mm1 \n\t" + PAVGB" (%3), %%mm0 \n\t" + "movd %%mm0, (%3) \n\t" + "add %5, %3 \n\t" + PAVGB" (%3), %%mm1 \n\t" + "movd %%mm1, (%3) \n\t" + "add %5, %3 \n\t" + "movd (%1), %%mm0 \n\t" + "add %4, %1 \n\t" + "movd (%1), %%mm1 \n\t" + "add %4, %1 \n\t" + PAVGB" 8(%2), %%mm0 \n\t" + PAVGB" 12(%2), %%mm1 \n\t" + PAVGB" (%3), %%mm0 \n\t" + "movd %%mm0, (%3) \n\t" + "add %5, %3 \n\t" + PAVGB" (%3), %%mm1 \n\t" + "movd %%mm1, (%3) \n\t" + "add %5, %3 \n\t" + "add $16, %2 \n\t" + "subl $4, %0 \n\t" + "jnz 1b \n\t" +#ifdef PIC //Note "+bm" and "+mb" are buggy too (with gcc 3.2.2 at least) and cant be used + :"+m"(h), "+a"(src1), "+c"(src2), "+d"(dst) +#else + :"+b"(h), "+a"(src1), "+c"(src2), "+d"(dst) +#endif + :"S"((long)src1Stride), "D"((long)dstStride) + :"memory"); +} + + +static void DEF(avg_pixels8_l2)(uint8_t *dst, uint8_t *src1, uint8_t *src2, int dstStride, int src1Stride, int h) +{ + __asm __volatile( + "testl $1, %0 \n\t" + " jz 1f \n\t" + "movq (%1), %%mm0 \n\t" + "movq (%2), %%mm1 \n\t" + "add %4, %1 \n\t" + "add $8, %2 \n\t" + PAVGB" %%mm1, %%mm0 \n\t" + PAVGB" (%3), %%mm0 \n\t" + "movq %%mm0, (%3) \n\t" + "add %5, %3 \n\t" + "decl %0 \n\t" "1: \n\t" "movq (%1), %%mm0 \n\t" - "addl %4, %1 \n\t" + "add %4, %1 \n\t" "movq (%1), %%mm1 \n\t" - "addl %4, %1 \n\t" + "add %4, %1 \n\t" PAVGB" (%2), %%mm0 \n\t" PAVGB" 8(%2), %%mm1 \n\t" + PAVGB" (%3), %%mm0 \n\t" "movq %%mm0, (%3) \n\t" - "addl %5, %3 \n\t" + "add %5, %3 \n\t" + PAVGB" (%3), %%mm1 \n\t" "movq %%mm1, (%3) \n\t" - "addl %5, %3 \n\t" + "add %5, %3 \n\t" "movq (%1), %%mm0 \n\t" - "addl %4, %1 \n\t" + "add %4, %1 \n\t" "movq (%1), %%mm1 \n\t" - "addl %4, %1 \n\t" + "add %4, %1 \n\t" PAVGB" 16(%2), %%mm0 \n\t" PAVGB" 24(%2), %%mm1 \n\t" + PAVGB" (%3), %%mm0 \n\t" "movq %%mm0, (%3) \n\t" - "addl %5, %3 \n\t" + "add %5, %3 \n\t" + PAVGB" (%3), %%mm1 \n\t" "movq %%mm1, (%3) \n\t" - "addl %5, %3 \n\t" - "addl $32, %2 \n\t" + "add %5, %3 \n\t" + "add $32, %2 \n\t" "subl $4, %0 \n\t" "jnz 1b \n\t" - :"+g"(h), "+r"(src1), "+r"(src2), "+r"(dst) +#ifdef PIC //Note "+bm" and "+mb" are buggy too (with gcc 3.2.2 at least) and cant be used + :"+m"(h), "+a"(src1), "+c"(src2), "+d"(dst) +#else + :"+b"(h), "+a"(src1), "+c"(src2), "+d"(dst) +#endif + :"S"((long)src1Stride), "D"((long)dstStride) + :"memory"); +//the following should be used, though better not with gcc ... +/* :"+g"(h), "+r"(src1), "+r"(src2), "+r"(dst) :"r"(src1Stride), "r"(dstStride) - :"memory"); + :"memory");*/ } static void DEF(put_pixels16_x2)(uint8_t *block, const uint8_t *pixels, int line_size, int h) { __asm __volatile( - "lea (%3, %3), %%eax \n\t" + "lea (%3, %3), %%"REG_a" \n\t" "1: \n\t" "movq (%1), %%mm0 \n\t" "movq (%1, %3), %%mm1 \n\t" @@ -102,8 +348,8 @@ static void DEF(put_pixels16_x2)(uint8_t *block, const uint8_t *pixels, int line "movq %%mm1, (%2, %3) \n\t" "movq %%mm2, 8(%2) \n\t" "movq %%mm3, 8(%2, %3) \n\t" - "addl %%eax, %1 \n\t" - "addl %%eax, %2 \n\t" + "add %%"REG_a", %1 \n\t" + "add %%"REG_a", %2 \n\t" "movq (%1), %%mm0 \n\t" "movq (%1, %3), %%mm1 \n\t" "movq 8(%1), %%mm2 \n\t" @@ -112,45 +358,192 @@ static void DEF(put_pixels16_x2)(uint8_t *block, const uint8_t *pixels, int line PAVGB" 1(%1, %3), %%mm1 \n\t" PAVGB" 9(%1), %%mm2 \n\t" PAVGB" 9(%1, %3), %%mm3 \n\t" - "addl %%eax, %1 \n\t" + "add %%"REG_a", %1 \n\t" "movq %%mm0, (%2) \n\t" "movq %%mm1, (%2, %3) \n\t" "movq %%mm2, 8(%2) \n\t" "movq %%mm3, 8(%2, %3) \n\t" - "addl %%eax, %2 \n\t" + "add %%"REG_a", %2 \n\t" "subl $4, %0 \n\t" "jnz 1b \n\t" :"+g"(h), "+S"(pixels), "+D"(block) - :"r" (line_size) - :"%eax", "memory"); + :"r" ((long)line_size) + :"%"REG_a, "memory"); +} + +static void DEF(put_pixels16_l2)(uint8_t *dst, uint8_t *src1, uint8_t *src2, int dstStride, int src1Stride, int h) +{ + __asm __volatile( + "testl $1, %0 \n\t" + " jz 1f \n\t" + "movq (%1), %%mm0 \n\t" + "movq 8(%1), %%mm1 \n\t" + PAVGB" (%2), %%mm0 \n\t" + PAVGB" 8(%2), %%mm1 \n\t" + "add %4, %1 \n\t" + "add $16, %2 \n\t" + "movq %%mm0, (%3) \n\t" + "movq %%mm1, 8(%3) \n\t" + "add %5, %3 \n\t" + "decl %0 \n\t" + "1: \n\t" + "movq (%1), %%mm0 \n\t" + "movq 8(%1), %%mm1 \n\t" + "add %4, %1 \n\t" + PAVGB" (%2), %%mm0 \n\t" + PAVGB" 8(%2), %%mm1 \n\t" + "movq %%mm0, (%3) \n\t" + "movq %%mm1, 8(%3) \n\t" + "add %5, %3 \n\t" + "movq (%1), %%mm0 \n\t" + "movq 8(%1), %%mm1 \n\t" + "add %4, %1 \n\t" + PAVGB" 16(%2), %%mm0 \n\t" + PAVGB" 24(%2), %%mm1 \n\t" + "movq %%mm0, (%3) \n\t" + "movq %%mm1, 8(%3) \n\t" + "add %5, %3 \n\t" + "add $32, %2 \n\t" + "subl $2, %0 \n\t" + "jnz 1b \n\t" +#ifdef PIC //Note "+bm" and "+mb" are buggy too (with gcc 3.2.2 at least) and cant be used + :"+m"(h), "+a"(src1), "+c"(src2), "+d"(dst) +#else + :"+b"(h), "+a"(src1), "+c"(src2), "+d"(dst) +#endif + :"S"((long)src1Stride), "D"((long)dstStride) + :"memory"); +//the following should be used, though better not with gcc ... +/* :"+g"(h), "+r"(src1), "+r"(src2), "+r"(dst) + :"r"(src1Stride), "r"(dstStride) + :"memory");*/ } -static __attribute__((unused)) void DEF(put_pixels16_l2)(uint8_t *dst, uint8_t *src1, uint8_t *src2, int dstStride, int src1Stride, int h) +static void DEF(avg_pixels16_l2)(uint8_t *dst, uint8_t *src1, uint8_t *src2, int dstStride, int src1Stride, int h) { __asm __volatile( + "testl $1, %0 \n\t" + " jz 1f \n\t" + "movq (%1), %%mm0 \n\t" + "movq 8(%1), %%mm1 \n\t" + PAVGB" (%2), %%mm0 \n\t" + PAVGB" 8(%2), %%mm1 \n\t" + "add %4, %1 \n\t" + "add $16, %2 \n\t" + PAVGB" (%3), %%mm0 \n\t" + PAVGB" 8(%3), %%mm1 \n\t" + "movq %%mm0, (%3) \n\t" + "movq %%mm1, 8(%3) \n\t" + "add %5, %3 \n\t" + "decl %0 \n\t" "1: \n\t" "movq (%1), %%mm0 \n\t" "movq 8(%1), %%mm1 \n\t" - "addl %4, %1 \n\t" + "add %4, %1 \n\t" PAVGB" (%2), %%mm0 \n\t" PAVGB" 8(%2), %%mm1 \n\t" + PAVGB" (%3), %%mm0 \n\t" + PAVGB" 8(%3), %%mm1 \n\t" "movq %%mm0, (%3) \n\t" "movq %%mm1, 8(%3) \n\t" - "addl %5, %3 \n\t" + "add %5, %3 \n\t" "movq (%1), %%mm0 \n\t" "movq 8(%1), %%mm1 \n\t" - "addl %4, %1 \n\t" + "add %4, %1 \n\t" PAVGB" 16(%2), %%mm0 \n\t" PAVGB" 24(%2), %%mm1 \n\t" + PAVGB" (%3), %%mm0 \n\t" + PAVGB" 8(%3), %%mm1 \n\t" + "movq %%mm0, (%3) \n\t" + "movq %%mm1, 8(%3) \n\t" + "add %5, %3 \n\t" + "add $32, %2 \n\t" + "subl $2, %0 \n\t" + "jnz 1b \n\t" +#ifdef PIC //Note "+bm" and "+mb" are buggy too (with gcc 3.2.2 at least) and cant be used + :"+m"(h), "+a"(src1), "+c"(src2), "+d"(dst) +#else + :"+b"(h), "+a"(src1), "+c"(src2), "+d"(dst) +#endif + :"S"((long)src1Stride), "D"((long)dstStride) + :"memory"); +//the following should be used, though better not with gcc ... +/* :"+g"(h), "+r"(src1), "+r"(src2), "+r"(dst) + :"r"(src1Stride), "r"(dstStride) + :"memory");*/ +} + +static void DEF(put_no_rnd_pixels16_l2)(uint8_t *dst, uint8_t *src1, uint8_t *src2, int dstStride, int src1Stride, int h) +{ + __asm __volatile( + "pcmpeqb %%mm6, %%mm6\n\t" + "testl $1, %0 \n\t" + " jz 1f \n\t" + "movq (%1), %%mm0 \n\t" + "movq 8(%1), %%mm1 \n\t" + "movq (%2), %%mm2 \n\t" + "movq 8(%2), %%mm3 \n\t" + "pxor %%mm6, %%mm0 \n\t" + "pxor %%mm6, %%mm1 \n\t" + "pxor %%mm6, %%mm2 \n\t" + "pxor %%mm6, %%mm3 \n\t" + PAVGB" %%mm2, %%mm0 \n\t" + PAVGB" %%mm3, %%mm1 \n\t" + "pxor %%mm6, %%mm0 \n\t" + "pxor %%mm6, %%mm1 \n\t" + "add %4, %1 \n\t" + "add $16, %2 \n\t" + "movq %%mm0, (%3) \n\t" + "movq %%mm1, 8(%3) \n\t" + "add %5, %3 \n\t" + "decl %0 \n\t" + "1: \n\t" + "movq (%1), %%mm0 \n\t" + "movq 8(%1), %%mm1 \n\t" + "add %4, %1 \n\t" + "movq (%2), %%mm2 \n\t" + "movq 8(%2), %%mm3 \n\t" + "pxor %%mm6, %%mm0 \n\t" + "pxor %%mm6, %%mm1 \n\t" + "pxor %%mm6, %%mm2 \n\t" + "pxor %%mm6, %%mm3 \n\t" + PAVGB" %%mm2, %%mm0 \n\t" + PAVGB" %%mm3, %%mm1 \n\t" + "pxor %%mm6, %%mm0 \n\t" + "pxor %%mm6, %%mm1 \n\t" + "movq %%mm0, (%3) \n\t" + "movq %%mm1, 8(%3) \n\t" + "add %5, %3 \n\t" + "movq (%1), %%mm0 \n\t" + "movq 8(%1), %%mm1 \n\t" + "add %4, %1 \n\t" + "movq 16(%2), %%mm2 \n\t" + "movq 24(%2), %%mm3 \n\t" + "pxor %%mm6, %%mm0 \n\t" + "pxor %%mm6, %%mm1 \n\t" + "pxor %%mm6, %%mm2 \n\t" + "pxor %%mm6, %%mm3 \n\t" + PAVGB" %%mm2, %%mm0 \n\t" + PAVGB" %%mm3, %%mm1 \n\t" + "pxor %%mm6, %%mm0 \n\t" + "pxor %%mm6, %%mm1 \n\t" "movq %%mm0, (%3) \n\t" "movq %%mm1, 8(%3) \n\t" - "addl %5, %3 \n\t" - "addl $32, %2 \n\t" + "add %5, %3 \n\t" + "add $32, %2 \n\t" "subl $2, %0 \n\t" "jnz 1b \n\t" - :"+g"(h), "+r"(src1), "+r"(src2), "+r"(dst) +#ifdef PIC //Note "+bm" and "+mb" are buggy too (with gcc 3.2.2 at least) and cant be used + :"+m"(h), "+a"(src1), "+c"(src2), "+d"(dst) +#else + :"+b"(h), "+a"(src1), "+c"(src2), "+d"(dst) +#endif + :"S"((long)src1Stride), "D"((long)dstStride) + :"memory"); +//the following should be used, though better not with gcc ... +/* :"+g"(h), "+r"(src1), "+r"(src2), "+r"(dst) :"r"(src1Stride), "r"(dstStride) - :"memory"); + :"memory");*/ } /* GL: this function does incorrect rounding if overflow */ @@ -158,13 +551,13 @@ static void DEF(put_no_rnd_pixels8_x2)(uint8_t *block, const uint8_t *pixels, in { MOVQ_BONE(mm6); __asm __volatile( - "lea (%3, %3), %%eax \n\t" + "lea (%3, %3), %%"REG_a" \n\t" "1: \n\t" "movq (%1), %%mm0 \n\t" "movq (%1, %3), %%mm2 \n\t" "movq 1(%1), %%mm1 \n\t" "movq 1(%1, %3), %%mm3 \n\t" - "addl %%eax, %1 \n\t" + "add %%"REG_a", %1 \n\t" "psubusb %%mm6, %%mm0 \n\t" "psubusb %%mm6, %%mm2 \n\t" PAVGB" %%mm1, %%mm0 \n\t" @@ -175,50 +568,50 @@ static void DEF(put_no_rnd_pixels8_x2)(uint8_t *block, const uint8_t *pixels, in "movq 1(%1), %%mm1 \n\t" "movq (%1, %3), %%mm2 \n\t" "movq 1(%1, %3), %%mm3 \n\t" - "addl %%eax, %2 \n\t" - "addl %%eax, %1 \n\t" + "add %%"REG_a", %2 \n\t" + "add %%"REG_a", %1 \n\t" "psubusb %%mm6, %%mm0 \n\t" "psubusb %%mm6, %%mm2 \n\t" PAVGB" %%mm1, %%mm0 \n\t" PAVGB" %%mm3, %%mm2 \n\t" "movq %%mm0, (%2) \n\t" "movq %%mm2, (%2, %3) \n\t" - "addl %%eax, %2 \n\t" + "add %%"REG_a", %2 \n\t" "subl $4, %0 \n\t" "jnz 1b \n\t" :"+g"(h), "+S"(pixels), "+D"(block) - :"r" (line_size) - :"%eax", "memory"); + :"r" ((long)line_size) + :"%"REG_a, "memory"); } static void DEF(put_pixels8_y2)(uint8_t *block, const uint8_t *pixels, int line_size, int h) { __asm __volatile( - "lea (%3, %3), %%eax \n\t" + "lea (%3, %3), %%"REG_a" \n\t" "movq (%1), %%mm0 \n\t" - "subl %3, %2 \n\t" + "sub %3, %2 \n\t" "1: \n\t" "movq (%1, %3), %%mm1 \n\t" - "movq (%1, %%eax), %%mm2 \n\t" - "addl %%eax, %1 \n\t" + "movq (%1, %%"REG_a"), %%mm2 \n\t" + "add %%"REG_a", %1 \n\t" PAVGB" %%mm1, %%mm0 \n\t" PAVGB" %%mm2, %%mm1 \n\t" "movq %%mm0, (%2, %3) \n\t" - "movq %%mm1, (%2, %%eax) \n\t" + "movq %%mm1, (%2, %%"REG_a") \n\t" "movq (%1, %3), %%mm1 \n\t" - "movq (%1, %%eax), %%mm0 \n\t" - "addl %%eax, %2 \n\t" - "addl %%eax, %1 \n\t" + "movq (%1, %%"REG_a"), %%mm0 \n\t" + "add %%"REG_a", %2 \n\t" + "add %%"REG_a", %1 \n\t" PAVGB" %%mm1, %%mm2 \n\t" PAVGB" %%mm0, %%mm1 \n\t" "movq %%mm2, (%2, %3) \n\t" - "movq %%mm1, (%2, %%eax) \n\t" - "addl %%eax, %2 \n\t" + "movq %%mm1, (%2, %%"REG_a") \n\t" + "add %%"REG_a", %2 \n\t" "subl $4, %0 \n\t" "jnz 1b \n\t" :"+g"(h), "+S"(pixels), "+D" (block) - :"r" (line_size) - :"%eax", "memory"); + :"r" ((long)line_size) + :"%"REG_a, "memory"); } /* GL: this function does incorrect rounding if overflow */ @@ -226,39 +619,39 @@ static void DEF(put_no_rnd_pixels8_y2)(uint8_t *block, const uint8_t *pixels, in { MOVQ_BONE(mm6); __asm __volatile( - "lea (%3, %3), %%eax \n\t" + "lea (%3, %3), %%"REG_a" \n\t" "movq (%1), %%mm0 \n\t" - "subl %3, %2 \n\t" + "sub %3, %2 \n\t" "1: \n\t" "movq (%1, %3), %%mm1 \n\t" - "movq (%1, %%eax), %%mm2 \n\t" - "addl %%eax, %1 \n\t" + "movq (%1, %%"REG_a"), %%mm2 \n\t" + "add %%"REG_a", %1 \n\t" "psubusb %%mm6, %%mm1 \n\t" PAVGB" %%mm1, %%mm0 \n\t" PAVGB" %%mm2, %%mm1 \n\t" "movq %%mm0, (%2, %3) \n\t" - "movq %%mm1, (%2, %%eax) \n\t" + "movq %%mm1, (%2, %%"REG_a") \n\t" "movq (%1, %3), %%mm1 \n\t" - "movq (%1, %%eax), %%mm0 \n\t" - "addl %%eax, %2 \n\t" - "addl %%eax, %1 \n\t" + "movq (%1, %%"REG_a"), %%mm0 \n\t" + "add %%"REG_a", %2 \n\t" + "add %%"REG_a", %1 \n\t" "psubusb %%mm6, %%mm1 \n\t" PAVGB" %%mm1, %%mm2 \n\t" PAVGB" %%mm0, %%mm1 \n\t" "movq %%mm2, (%2, %3) \n\t" - "movq %%mm1, (%2, %%eax) \n\t" - "addl %%eax, %2 \n\t" + "movq %%mm1, (%2, %%"REG_a") \n\t" + "add %%"REG_a", %2 \n\t" "subl $4, %0 \n\t" "jnz 1b \n\t" :"+g"(h), "+S"(pixels), "+D" (block) - :"r" (line_size) - :"%eax", "memory"); + :"r" ((long)line_size) + :"%"REG_a, "memory"); } static void DEF(avg_pixels8)(uint8_t *block, const uint8_t *pixels, int line_size, int h) { __asm __volatile( - "lea (%3, %3), %%eax \n\t" + "lea (%3, %3), %%"REG_a" \n\t" "1: \n\t" "movq (%2), %%mm0 \n\t" "movq (%2, %3), %%mm1 \n\t" @@ -266,27 +659,27 @@ static void DEF(avg_pixels8)(uint8_t *block, const uint8_t *pixels, int line_siz PAVGB" (%1, %3), %%mm1 \n\t" "movq %%mm0, (%2) \n\t" "movq %%mm1, (%2, %3) \n\t" - "addl %%eax, %1 \n\t" - "addl %%eax, %2 \n\t" + "add %%"REG_a", %1 \n\t" + "add %%"REG_a", %2 \n\t" "movq (%2), %%mm0 \n\t" "movq (%2, %3), %%mm1 \n\t" PAVGB" (%1), %%mm0 \n\t" PAVGB" (%1, %3), %%mm1 \n\t" - "addl %%eax, %1 \n\t" + "add %%"REG_a", %1 \n\t" "movq %%mm0, (%2) \n\t" "movq %%mm1, (%2, %3) \n\t" - "addl %%eax, %2 \n\t" + "add %%"REG_a", %2 \n\t" "subl $4, %0 \n\t" "jnz 1b \n\t" :"+g"(h), "+S"(pixels), "+D"(block) - :"r" (line_size) - :"%eax", "memory"); + :"r" ((long)line_size) + :"%"REG_a, "memory"); } static void DEF(avg_pixels8_x2)(uint8_t *block, const uint8_t *pixels, int line_size, int h) { __asm __volatile( - "lea (%3, %3), %%eax \n\t" + "lea (%3, %3), %%"REG_a" \n\t" "1: \n\t" "movq (%1), %%mm0 \n\t" "movq (%1, %3), %%mm2 \n\t" @@ -294,63 +687,63 @@ static void DEF(avg_pixels8_x2)(uint8_t *block, const uint8_t *pixels, int line_ PAVGB" 1(%1, %3), %%mm2 \n\t" PAVGB" (%2), %%mm0 \n\t" PAVGB" (%2, %3), %%mm2 \n\t" - "addl %%eax, %1 \n\t" + "add %%"REG_a", %1 \n\t" "movq %%mm0, (%2) \n\t" "movq %%mm2, (%2, %3) \n\t" "movq (%1), %%mm0 \n\t" "movq (%1, %3), %%mm2 \n\t" PAVGB" 1(%1), %%mm0 \n\t" PAVGB" 1(%1, %3), %%mm2 \n\t" - "addl %%eax, %2 \n\t" - "addl %%eax, %1 \n\t" + "add %%"REG_a", %2 \n\t" + "add %%"REG_a", %1 \n\t" PAVGB" (%2), %%mm0 \n\t" PAVGB" (%2, %3), %%mm2 \n\t" "movq %%mm0, (%2) \n\t" "movq %%mm2, (%2, %3) \n\t" - "addl %%eax, %2 \n\t" + "add %%"REG_a", %2 \n\t" "subl $4, %0 \n\t" "jnz 1b \n\t" :"+g"(h), "+S"(pixels), "+D"(block) - :"r" (line_size) - :"%eax", "memory"); + :"r" ((long)line_size) + :"%"REG_a, "memory"); } static void DEF(avg_pixels8_y2)(uint8_t *block, const uint8_t *pixels, int line_size, int h) { __asm __volatile( - "lea (%3, %3), %%eax \n\t" + "lea (%3, %3), %%"REG_a" \n\t" "movq (%1), %%mm0 \n\t" - "subl %3, %2 \n\t" + "sub %3, %2 \n\t" "1: \n\t" "movq (%1, %3), %%mm1 \n\t" - "movq (%1, %%eax), %%mm2 \n\t" - "addl %%eax, %1 \n\t" + "movq (%1, %%"REG_a"), %%mm2 \n\t" + "add %%"REG_a", %1 \n\t" PAVGB" %%mm1, %%mm0 \n\t" PAVGB" %%mm2, %%mm1 \n\t" "movq (%2, %3), %%mm3 \n\t" - "movq (%2, %%eax), %%mm4 \n\t" + "movq (%2, %%"REG_a"), %%mm4 \n\t" PAVGB" %%mm3, %%mm0 \n\t" PAVGB" %%mm4, %%mm1 \n\t" "movq %%mm0, (%2, %3) \n\t" - "movq %%mm1, (%2, %%eax) \n\t" + "movq %%mm1, (%2, %%"REG_a") \n\t" "movq (%1, %3), %%mm1 \n\t" - "movq (%1, %%eax), %%mm0 \n\t" + "movq (%1, %%"REG_a"), %%mm0 \n\t" PAVGB" %%mm1, %%mm2 \n\t" PAVGB" %%mm0, %%mm1 \n\t" - "addl %%eax, %2 \n\t" - "addl %%eax, %1 \n\t" + "add %%"REG_a", %2 \n\t" + "add %%"REG_a", %1 \n\t" "movq (%2, %3), %%mm3 \n\t" - "movq (%2, %%eax), %%mm4 \n\t" + "movq (%2, %%"REG_a"), %%mm4 \n\t" PAVGB" %%mm3, %%mm2 \n\t" PAVGB" %%mm4, %%mm1 \n\t" "movq %%mm2, (%2, %3) \n\t" - "movq %%mm1, (%2, %%eax) \n\t" - "addl %%eax, %2 \n\t" + "movq %%mm1, (%2, %%"REG_a") \n\t" + "add %%"REG_a", %2 \n\t" "subl $4, %0 \n\t" "jnz 1b \n\t" :"+g"(h), "+S"(pixels), "+D"(block) - :"r" (line_size) - :"%eax", "memory"); + :"r" ((long)line_size) + :"%"REG_a, "memory"); } // Note this is not correctly rounded, but this function is only used for b frames so it doesnt matter @@ -358,17 +751,17 @@ static void DEF(avg_pixels8_xy2)(uint8_t *block, const uint8_t *pixels, int line { MOVQ_BONE(mm6); __asm __volatile( - "lea (%3, %3), %%eax \n\t" + "lea (%3, %3), %%"REG_a" \n\t" "movq (%1), %%mm0 \n\t" PAVGB" 1(%1), %%mm0 \n\t" ".balign 8 \n\t" "1: \n\t" - "movq (%1, %%eax), %%mm2 \n\t" + "movq (%1, %%"REG_a"), %%mm2 \n\t" "movq (%1, %3), %%mm1 \n\t" "psubusb %%mm6, %%mm2 \n\t" PAVGB" 1(%1, %3), %%mm1 \n\t" - PAVGB" 1(%1, %%eax), %%mm2 \n\t" - "addl %%eax, %1 \n\t" + PAVGB" 1(%1, %%"REG_a"), %%mm2 \n\t" + "add %%"REG_a", %1 \n\t" PAVGB" %%mm1, %%mm0 \n\t" PAVGB" %%mm2, %%mm1 \n\t" PAVGB" (%2), %%mm0 \n\t" @@ -376,23 +769,23 @@ static void DEF(avg_pixels8_xy2)(uint8_t *block, const uint8_t *pixels, int line "movq %%mm0, (%2) \n\t" "movq %%mm1, (%2, %3) \n\t" "movq (%1, %3), %%mm1 \n\t" - "movq (%1, %%eax), %%mm0 \n\t" + "movq (%1, %%"REG_a"), %%mm0 \n\t" PAVGB" 1(%1, %3), %%mm1 \n\t" - PAVGB" 1(%1, %%eax), %%mm0 \n\t" - "addl %%eax, %2 \n\t" - "addl %%eax, %1 \n\t" + PAVGB" 1(%1, %%"REG_a"), %%mm0 \n\t" + "add %%"REG_a", %2 \n\t" + "add %%"REG_a", %1 \n\t" PAVGB" %%mm1, %%mm2 \n\t" PAVGB" %%mm0, %%mm1 \n\t" PAVGB" (%2), %%mm2 \n\t" PAVGB" (%2, %3), %%mm1 \n\t" "movq %%mm2, (%2) \n\t" "movq %%mm1, (%2, %3) \n\t" - "addl %%eax, %2 \n\t" + "add %%"REG_a", %2 \n\t" "subl $4, %0 \n\t" "jnz 1b \n\t" :"+g"(h), "+S"(pixels), "+D"(block) - :"r" (line_size) - :"%eax", "memory"); + :"r" ((long)line_size) + :"%"REG_a, "memory"); } //FIXME the following could be optimized too ... diff --git a/src/libffmpeg/libavcodec/i386/dsputil_mmx_rnd.h b/src/libffmpeg/libavcodec/i386/dsputil_mmx_rnd.h index 1b79aa56a..20ea1b59e 100644 --- a/src/libffmpeg/libavcodec/i386/dsputil_mmx_rnd.h +++ b/src/libffmpeg/libavcodec/i386/dsputil_mmx_rnd.h @@ -27,7 +27,7 @@ static void DEF(put, pixels8_x2)(uint8_t *block, const uint8_t *pixels, int line { MOVQ_BFE(mm6); __asm __volatile( - "lea (%3, %3), %%eax \n\t" + "lea (%3, %3), %%"REG_a" \n\t" ".balign 8 \n\t" "1: \n\t" "movq (%1), %%mm0 \n\t" @@ -37,8 +37,8 @@ static void DEF(put, pixels8_x2)(uint8_t *block, const uint8_t *pixels, int line PAVGBP(%%mm0, %%mm1, %%mm4, %%mm2, %%mm3, %%mm5) "movq %%mm4, (%2) \n\t" "movq %%mm5, (%2, %3) \n\t" - "addl %%eax, %1 \n\t" - "addl %%eax, %2 \n\t" + "add %%"REG_a", %1 \n\t" + "add %%"REG_a", %2 \n\t" "movq (%1), %%mm0 \n\t" "movq 1(%1), %%mm1 \n\t" "movq (%1, %3), %%mm2 \n\t" @@ -46,13 +46,13 @@ static void DEF(put, pixels8_x2)(uint8_t *block, const uint8_t *pixels, int line PAVGBP(%%mm0, %%mm1, %%mm4, %%mm2, %%mm3, %%mm5) "movq %%mm4, (%2) \n\t" "movq %%mm5, (%2, %3) \n\t" - "addl %%eax, %1 \n\t" - "addl %%eax, %2 \n\t" + "add %%"REG_a", %1 \n\t" + "add %%"REG_a", %2 \n\t" "subl $4, %0 \n\t" "jnz 1b \n\t" :"+g"(h), "+S"(pixels), "+D"(block) - :"r"(line_size) - :"eax", "memory"); + :"r"((long)line_size) + :REG_a, "memory"); } static void DEF(put, pixels8_l2)(uint8_t *dst, uint8_t *src1, uint8_t *src2, int dstStride, int src1Stride, int h) @@ -63,37 +63,37 @@ static void DEF(put, pixels8_l2)(uint8_t *dst, uint8_t *src1, uint8_t *src2, int " jz 1f \n\t" "movq (%1), %%mm0 \n\t" "movq (%2), %%mm1 \n\t" - "addl %4, %1 \n\t" - "addl $8, %2 \n\t" + "add %4, %1 \n\t" + "add $8, %2 \n\t" PAVGB(%%mm0, %%mm1, %%mm4, %%mm6) "movq %%mm4, (%3) \n\t" - "addl %5, %3 \n\t" + "add %5, %3 \n\t" "decl %0 \n\t" ".balign 8 \n\t" "1: \n\t" "movq (%1), %%mm0 \n\t" "movq (%2), %%mm1 \n\t" - "addl %4, %1 \n\t" + "add %4, %1 \n\t" "movq (%1), %%mm2 \n\t" "movq 8(%2), %%mm3 \n\t" - "addl %4, %1 \n\t" + "add %4, %1 \n\t" PAVGBP(%%mm0, %%mm1, %%mm4, %%mm2, %%mm3, %%mm5) "movq %%mm4, (%3) \n\t" - "addl %5, %3 \n\t" + "add %5, %3 \n\t" "movq %%mm5, (%3) \n\t" - "addl %5, %3 \n\t" + "add %5, %3 \n\t" "movq (%1), %%mm0 \n\t" "movq 16(%2), %%mm1 \n\t" - "addl %4, %1 \n\t" + "add %4, %1 \n\t" "movq (%1), %%mm2 \n\t" "movq 24(%2), %%mm3 \n\t" - "addl %4, %1 \n\t" - "addl $32, %2 \n\t" + "add %4, %1 \n\t" + "add $32, %2 \n\t" PAVGBP(%%mm0, %%mm1, %%mm4, %%mm2, %%mm3, %%mm5) "movq %%mm4, (%3) \n\t" - "addl %5, %3 \n\t" + "add %5, %3 \n\t" "movq %%mm5, (%3) \n\t" - "addl %5, %3 \n\t" + "add %5, %3 \n\t" "subl $4, %0 \n\t" "jnz 1b \n\t" #ifdef PIC //Note "+bm" and "+mb" are buggy too (with gcc 3.2.2 at least) and cant be used @@ -101,7 +101,7 @@ static void DEF(put, pixels8_l2)(uint8_t *dst, uint8_t *src1, uint8_t *src2, int #else :"+b"(h), "+a"(src1), "+c"(src2), "+d"(dst) #endif - :"S"(src1Stride), "D"(dstStride) + :"S"((long)src1Stride), "D"((long)dstStride) :"memory"); } @@ -109,7 +109,7 @@ static void DEF(put, pixels16_x2)(uint8_t *block, const uint8_t *pixels, int lin { MOVQ_BFE(mm6); __asm __volatile( - "lea (%3, %3), %%eax \n\t" + "lea (%3, %3), %%"REG_a" \n\t" ".balign 8 \n\t" "1: \n\t" "movq (%1), %%mm0 \n\t" @@ -126,8 +126,8 @@ static void DEF(put, pixels16_x2)(uint8_t *block, const uint8_t *pixels, int lin PAVGBP(%%mm0, %%mm1, %%mm4, %%mm2, %%mm3, %%mm5) "movq %%mm4, 8(%2) \n\t" "movq %%mm5, 8(%2, %3) \n\t" - "addl %%eax, %1 \n\t" - "addl %%eax, %2 \n\t" + "add %%"REG_a", %1 \n\t" + "add %%"REG_a", %2 \n\t" "movq (%1), %%mm0 \n\t" "movq 1(%1), %%mm1 \n\t" "movq (%1, %3), %%mm2 \n\t" @@ -142,13 +142,13 @@ static void DEF(put, pixels16_x2)(uint8_t *block, const uint8_t *pixels, int lin PAVGBP(%%mm0, %%mm1, %%mm4, %%mm2, %%mm3, %%mm5) "movq %%mm4, 8(%2) \n\t" "movq %%mm5, 8(%2, %3) \n\t" - "addl %%eax, %1 \n\t" - "addl %%eax, %2 \n\t" + "add %%"REG_a", %1 \n\t" + "add %%"REG_a", %2 \n\t" "subl $4, %0 \n\t" "jnz 1b \n\t" :"+g"(h), "+S"(pixels), "+D"(block) - :"r"(line_size) - :"eax", "memory"); + :"r"((long)line_size) + :REG_a, "memory"); } static void DEF(put, pixels16_l2)(uint8_t *dst, uint8_t *src1, uint8_t *src2, int dstStride, int src1Stride, int h) @@ -161,12 +161,12 @@ static void DEF(put, pixels16_l2)(uint8_t *dst, uint8_t *src1, uint8_t *src2, in "movq (%2), %%mm1 \n\t" "movq 8(%1), %%mm2 \n\t" "movq 8(%2), %%mm3 \n\t" - "addl %4, %1 \n\t" - "addl $16, %2 \n\t" + "add %4, %1 \n\t" + "add $16, %2 \n\t" PAVGBP(%%mm0, %%mm1, %%mm4, %%mm2, %%mm3, %%mm5) "movq %%mm4, (%3) \n\t" "movq %%mm5, 8(%3) \n\t" - "addl %5, %3 \n\t" + "add %5, %3 \n\t" "decl %0 \n\t" ".balign 8 \n\t" "1: \n\t" @@ -174,21 +174,21 @@ static void DEF(put, pixels16_l2)(uint8_t *dst, uint8_t *src1, uint8_t *src2, in "movq (%2), %%mm1 \n\t" "movq 8(%1), %%mm2 \n\t" "movq 8(%2), %%mm3 \n\t" - "addl %4, %1 \n\t" + "add %4, %1 \n\t" PAVGBP(%%mm0, %%mm1, %%mm4, %%mm2, %%mm3, %%mm5) "movq %%mm4, (%3) \n\t" "movq %%mm5, 8(%3) \n\t" - "addl %5, %3 \n\t" + "add %5, %3 \n\t" "movq (%1), %%mm0 \n\t" "movq 16(%2), %%mm1 \n\t" "movq 8(%1), %%mm2 \n\t" "movq 24(%2), %%mm3 \n\t" - "addl %4, %1 \n\t" + "add %4, %1 \n\t" PAVGBP(%%mm0, %%mm1, %%mm4, %%mm2, %%mm3, %%mm5) "movq %%mm4, (%3) \n\t" "movq %%mm5, 8(%3) \n\t" - "addl %5, %3 \n\t" - "addl $32, %2 \n\t" + "add %5, %3 \n\t" + "add $32, %2 \n\t" "subl $2, %0 \n\t" "jnz 1b \n\t" #ifdef PIC //Note "+bm" and "+mb" are buggy too (with gcc 3.2.2 at least) and cant be used @@ -196,7 +196,7 @@ static void DEF(put, pixels16_l2)(uint8_t *dst, uint8_t *src1, uint8_t *src2, in #else :"+b"(h), "+a"(src1), "+c"(src2), "+d"(dst) #endif - :"S"(src1Stride), "D"(dstStride) + :"S"((long)src1Stride), "D"((long)dstStride) :"memory"); } @@ -204,29 +204,29 @@ static void DEF(put, pixels8_y2)(uint8_t *block, const uint8_t *pixels, int line { MOVQ_BFE(mm6); __asm __volatile( - "lea (%3, %3), %%eax \n\t" + "lea (%3, %3), %%"REG_a" \n\t" "movq (%1), %%mm0 \n\t" ".balign 8 \n\t" "1: \n\t" "movq (%1, %3), %%mm1 \n\t" - "movq (%1, %%eax),%%mm2 \n\t" + "movq (%1, %%"REG_a"),%%mm2 \n\t" PAVGBP(%%mm1, %%mm0, %%mm4, %%mm2, %%mm1, %%mm5) "movq %%mm4, (%2) \n\t" "movq %%mm5, (%2, %3) \n\t" - "addl %%eax, %1 \n\t" - "addl %%eax, %2 \n\t" + "add %%"REG_a", %1 \n\t" + "add %%"REG_a", %2 \n\t" "movq (%1, %3), %%mm1 \n\t" - "movq (%1, %%eax),%%mm0 \n\t" + "movq (%1, %%"REG_a"),%%mm0 \n\t" PAVGBP(%%mm1, %%mm2, %%mm4, %%mm0, %%mm1, %%mm5) "movq %%mm4, (%2) \n\t" "movq %%mm5, (%2, %3) \n\t" - "addl %%eax, %1 \n\t" - "addl %%eax, %2 \n\t" + "add %%"REG_a", %1 \n\t" + "add %%"REG_a", %2 \n\t" "subl $4, %0 \n\t" "jnz 1b \n\t" :"+g"(h), "+S"(pixels), "+D"(block) - :"r"(line_size) - :"eax", "memory"); + :"r"((long)line_size) + :REG_a, "memory"); } static void DEF(put, pixels8_xy2)(uint8_t *block, const uint8_t *pixels, int line_size, int h) @@ -244,12 +244,12 @@ static void DEF(put, pixels8_xy2)(uint8_t *block, const uint8_t *pixels, int lin "punpckhbw %%mm7, %%mm5 \n\t" "paddusw %%mm0, %%mm4 \n\t" "paddusw %%mm1, %%mm5 \n\t" - "xorl %%eax, %%eax \n\t" - "addl %3, %1 \n\t" + "xor %%"REG_a", %%"REG_a" \n\t" + "add %3, %1 \n\t" ".balign 8 \n\t" "1: \n\t" - "movq (%1, %%eax), %%mm0 \n\t" - "movq 1(%1, %%eax), %%mm2 \n\t" + "movq (%1, %%"REG_a"), %%mm0 \n\t" + "movq 1(%1, %%"REG_a"), %%mm2 \n\t" "movq %%mm0, %%mm1 \n\t" "movq %%mm2, %%mm3 \n\t" "punpcklbw %%mm7, %%mm0 \n\t" @@ -265,11 +265,11 @@ static void DEF(put, pixels8_xy2)(uint8_t *block, const uint8_t *pixels, int lin "psrlw $2, %%mm4 \n\t" "psrlw $2, %%mm5 \n\t" "packuswb %%mm5, %%mm4 \n\t" - "movq %%mm4, (%2, %%eax) \n\t" - "addl %3, %%eax \n\t" + "movq %%mm4, (%2, %%"REG_a") \n\t" + "add %3, %%"REG_a" \n\t" - "movq (%1, %%eax), %%mm2 \n\t" // 0 <-> 2 1 <-> 3 - "movq 1(%1, %%eax), %%mm4 \n\t" + "movq (%1, %%"REG_a"), %%mm2 \n\t" // 0 <-> 2 1 <-> 3 + "movq 1(%1, %%"REG_a"), %%mm4 \n\t" "movq %%mm2, %%mm3 \n\t" "movq %%mm4, %%mm5 \n\t" "punpcklbw %%mm7, %%mm2 \n\t" @@ -285,17 +285,36 @@ static void DEF(put, pixels8_xy2)(uint8_t *block, const uint8_t *pixels, int lin "psrlw $2, %%mm0 \n\t" "psrlw $2, %%mm1 \n\t" "packuswb %%mm1, %%mm0 \n\t" - "movq %%mm0, (%2, %%eax) \n\t" - "addl %3, %%eax \n\t" + "movq %%mm0, (%2, %%"REG_a") \n\t" + "add %3, %%"REG_a" \n\t" "subl $2, %0 \n\t" "jnz 1b \n\t" :"+g"(h), "+S"(pixels) - :"D"(block), "r"(line_size) - :"eax", "memory"); + :"D"(block), "r"((long)line_size) + :REG_a, "memory"); } // avg_pixels +static void DEF(avg, pixels4)(uint8_t *block, const uint8_t *pixels, int line_size, int h) +{ + MOVQ_BFE(mm6); + JUMPALIGN(); + do { + __asm __volatile( + "movd %0, %%mm0 \n\t" + "movd %1, %%mm1 \n\t" + PAVGB(%%mm0, %%mm1, %%mm2, %%mm6) + "movd %%mm2, %0 \n\t" + :"+m"(*block) + :"m"(*pixels) + :"memory"); + pixels += line_size; + block += line_size; + } + while (--h); +} + // in case more speed is needed - unroling would certainly help static void DEF(avg, pixels8)(uint8_t *block, const uint8_t *pixels, int line_size, int h) { @@ -437,12 +456,12 @@ static void DEF(avg, pixels8_y2)(uint8_t *block, const uint8_t *pixels, int line { MOVQ_BFE(mm6); __asm __volatile( - "lea (%3, %3), %%eax \n\t" + "lea (%3, %3), %%"REG_a" \n\t" "movq (%1), %%mm0 \n\t" ".balign 8 \n\t" "1: \n\t" "movq (%1, %3), %%mm1 \n\t" - "movq (%1, %%eax), %%mm2 \n\t" + "movq (%1, %%"REG_a"), %%mm2 \n\t" PAVGBP(%%mm1, %%mm0, %%mm4, %%mm2, %%mm1, %%mm5) "movq (%2), %%mm3 \n\t" PAVGB(%%mm3, %%mm4, %%mm0, %%mm6) @@ -450,11 +469,11 @@ static void DEF(avg, pixels8_y2)(uint8_t *block, const uint8_t *pixels, int line PAVGB(%%mm3, %%mm5, %%mm1, %%mm6) "movq %%mm0, (%2) \n\t" "movq %%mm1, (%2, %3) \n\t" - "addl %%eax, %1 \n\t" - "addl %%eax, %2 \n\t" + "add %%"REG_a", %1 \n\t" + "add %%"REG_a", %2 \n\t" "movq (%1, %3), %%mm1 \n\t" - "movq (%1, %%eax), %%mm0 \n\t" + "movq (%1, %%"REG_a"), %%mm0 \n\t" PAVGBP(%%mm1, %%mm2, %%mm4, %%mm0, %%mm1, %%mm5) "movq (%2), %%mm3 \n\t" PAVGB(%%mm3, %%mm4, %%mm2, %%mm6) @@ -462,14 +481,14 @@ static void DEF(avg, pixels8_y2)(uint8_t *block, const uint8_t *pixels, int line PAVGB(%%mm3, %%mm5, %%mm1, %%mm6) "movq %%mm2, (%2) \n\t" "movq %%mm1, (%2, %3) \n\t" - "addl %%eax, %1 \n\t" - "addl %%eax, %2 \n\t" + "add %%"REG_a", %1 \n\t" + "add %%"REG_a", %2 \n\t" "subl $4, %0 \n\t" "jnz 1b \n\t" :"+g"(h), "+S"(pixels), "+D"(block) - :"r"(line_size) - :"eax", "memory"); + :"r"((long)line_size) + :REG_a, "memory"); } // this routine is 'slightly' suboptimal but mostly unused @@ -488,12 +507,12 @@ static void DEF(avg, pixels8_xy2)(uint8_t *block, const uint8_t *pixels, int lin "punpckhbw %%mm7, %%mm5 \n\t" "paddusw %%mm0, %%mm4 \n\t" "paddusw %%mm1, %%mm5 \n\t" - "xorl %%eax, %%eax \n\t" - "addl %3, %1 \n\t" + "xor %%"REG_a", %%"REG_a" \n\t" + "add %3, %1 \n\t" ".balign 8 \n\t" "1: \n\t" - "movq (%1, %%eax), %%mm0 \n\t" - "movq 1(%1, %%eax), %%mm2 \n\t" + "movq (%1, %%"REG_a"), %%mm0 \n\t" + "movq 1(%1, %%"REG_a"), %%mm2 \n\t" "movq %%mm0, %%mm1 \n\t" "movq %%mm2, %%mm3 \n\t" "punpcklbw %%mm7, %%mm0 \n\t" @@ -508,16 +527,16 @@ static void DEF(avg, pixels8_xy2)(uint8_t *block, const uint8_t *pixels, int lin "paddusw %%mm1, %%mm5 \n\t" "psrlw $2, %%mm4 \n\t" "psrlw $2, %%mm5 \n\t" - "movq (%2, %%eax), %%mm3 \n\t" + "movq (%2, %%"REG_a"), %%mm3 \n\t" "packuswb %%mm5, %%mm4 \n\t" "pcmpeqd %%mm2, %%mm2 \n\t" "paddb %%mm2, %%mm2 \n\t" PAVGB(%%mm3, %%mm4, %%mm5, %%mm2) - "movq %%mm5, (%2, %%eax) \n\t" - "addl %3, %%eax \n\t" + "movq %%mm5, (%2, %%"REG_a") \n\t" + "add %3, %%"REG_a" \n\t" - "movq (%1, %%eax), %%mm2 \n\t" // 0 <-> 2 1 <-> 3 - "movq 1(%1, %%eax), %%mm4 \n\t" + "movq (%1, %%"REG_a"), %%mm2 \n\t" // 0 <-> 2 1 <-> 3 + "movq 1(%1, %%"REG_a"), %%mm4 \n\t" "movq %%mm2, %%mm3 \n\t" "movq %%mm4, %%mm5 \n\t" "punpcklbw %%mm7, %%mm2 \n\t" @@ -532,19 +551,19 @@ static void DEF(avg, pixels8_xy2)(uint8_t *block, const uint8_t *pixels, int lin "paddusw %%mm5, %%mm1 \n\t" "psrlw $2, %%mm0 \n\t" "psrlw $2, %%mm1 \n\t" - "movq (%2, %%eax), %%mm3 \n\t" + "movq (%2, %%"REG_a"), %%mm3 \n\t" "packuswb %%mm1, %%mm0 \n\t" "pcmpeqd %%mm2, %%mm2 \n\t" "paddb %%mm2, %%mm2 \n\t" PAVGB(%%mm3, %%mm0, %%mm1, %%mm2) - "movq %%mm1, (%2, %%eax) \n\t" - "addl %3, %%eax \n\t" + "movq %%mm1, (%2, %%"REG_a") \n\t" + "add %3, %%"REG_a" \n\t" "subl $2, %0 \n\t" "jnz 1b \n\t" :"+g"(h), "+S"(pixels) - :"D"(block), "r"(line_size) - :"eax", "memory"); + :"D"(block), "r"((long)line_size) + :REG_a, "memory"); } //FIXME optimize diff --git a/src/libffmpeg/libavcodec/i386/fdct_mmx.c b/src/libffmpeg/libavcodec/i386/fdct_mmx.c index 7f348329a..aacbe5743 100644 --- a/src/libffmpeg/libavcodec/i386/fdct_mmx.c +++ b/src/libffmpeg/libavcodec/i386/fdct_mmx.c @@ -47,13 +47,13 @@ static const int16_t ocos_4_16[4] ATTR_ALIGN(8) = { 23170, 23170, 23170, 23170, //cos * (2<<15) + 0.5 }; -static const long long fdct_one_corr ATTR_ALIGN(8) = 0x0001000100010001LL; +static const int64_t fdct_one_corr ATTR_ALIGN(8) = 0x0001000100010001LL; -static const long fdct_r_row[2] ATTR_ALIGN(8) = {RND_FRW_ROW, RND_FRW_ROW }; +static const int32_t fdct_r_row[2] ATTR_ALIGN(8) = {RND_FRW_ROW, RND_FRW_ROW }; struct { - const long fdct_r_row_sse2[4] ATTR_ALIGN(16); + const int32_t fdct_r_row_sse2[4] ATTR_ALIGN(16); } fdct_r_row_sse2 ATTR_ALIGN(16)= {{ RND_FRW_ROW, RND_FRW_ROW, RND_FRW_ROW, RND_FRW_ROW diff --git a/src/libffmpeg/libavcodec/i386/mmx.h b/src/libffmpeg/libavcodec/i386/mmx.h index ad684bc5a..f0ef1b79e 100644 --- a/src/libffmpeg/libavcodec/i386/mmx.h +++ b/src/libffmpeg/libavcodec/i386/mmx.h @@ -5,6 +5,12 @@ #ifndef AVCODEC_I386MMX_H #define AVCODEC_I386MMX_H +#ifdef ARCH_X86_64 +# define REG_a "rax" +#else +# define REG_a "eax" +#endif + /* * The type of an value that fits in an MMX register (note that long * long constant values MUST be suffixed by LL and unsigned long long diff --git a/src/libffmpeg/libavcodec/i386/motion_est_mmx.c b/src/libffmpeg/libavcodec/i386/motion_est_mmx.c index 39246d905..1b90f8e40 100644 --- a/src/libffmpeg/libavcodec/i386/motion_est_mmx.c +++ b/src/libffmpeg/libavcodec/i386/motion_est_mmx.c @@ -20,6 +20,7 @@ * mostly by Michael Niedermayer <michaelni@gmx.at> */ #include "../dsputil.h" +#include "mmx.h" static const __attribute__ ((aligned(8))) uint64_t round_tab[3]={ 0x0000000000000000ULL, @@ -31,19 +32,19 @@ static attribute_used __attribute__ ((aligned(8))) uint64_t bone= 0x010101010101 static inline void sad8_1_mmx(uint8_t *blk1, uint8_t *blk2, int stride, int h) { - int len= -(stride*h); + long len= -(stride*h); asm volatile( ".balign 16 \n\t" "1: \n\t" - "movq (%1, %%eax), %%mm0 \n\t" - "movq (%2, %%eax), %%mm2 \n\t" - "movq (%2, %%eax), %%mm4 \n\t" - "addl %3, %%eax \n\t" + "movq (%1, %%"REG_a"), %%mm0 \n\t" + "movq (%2, %%"REG_a"), %%mm2 \n\t" + "movq (%2, %%"REG_a"), %%mm4 \n\t" + "add %3, %%"REG_a" \n\t" "psubusb %%mm0, %%mm2 \n\t" "psubusb %%mm4, %%mm0 \n\t" - "movq (%1, %%eax), %%mm1 \n\t" - "movq (%2, %%eax), %%mm3 \n\t" - "movq (%2, %%eax), %%mm5 \n\t" + "movq (%1, %%"REG_a"), %%mm1 \n\t" + "movq (%2, %%"REG_a"), %%mm3 \n\t" + "movq (%2, %%"REG_a"), %%mm5 \n\t" "psubusb %%mm1, %%mm3 \n\t" "psubusb %%mm5, %%mm1 \n\t" "por %%mm2, %%mm0 \n\t" @@ -58,116 +59,116 @@ static inline void sad8_1_mmx(uint8_t *blk1, uint8_t *blk2, int stride, int h) "paddw %%mm3, %%mm2 \n\t" "paddw %%mm2, %%mm0 \n\t" "paddw %%mm0, %%mm6 \n\t" - "addl %3, %%eax \n\t" + "add %3, %%"REG_a" \n\t" " js 1b \n\t" : "+a" (len) - : "r" (blk1 - len), "r" (blk2 - len), "r" (stride) + : "r" (blk1 - len), "r" (blk2 - len), "r" ((long)stride) ); } static inline void sad8_1_mmx2(uint8_t *blk1, uint8_t *blk2, int stride, int h) { - int len= -(stride*h); + long len= -(stride*h); asm volatile( ".balign 16 \n\t" "1: \n\t" - "movq (%1, %%eax), %%mm0 \n\t" - "movq (%2, %%eax), %%mm2 \n\t" + "movq (%1, %%"REG_a"), %%mm0 \n\t" + "movq (%2, %%"REG_a"), %%mm2 \n\t" "psadbw %%mm2, %%mm0 \n\t" - "addl %3, %%eax \n\t" - "movq (%1, %%eax), %%mm1 \n\t" - "movq (%2, %%eax), %%mm3 \n\t" + "add %3, %%"REG_a" \n\t" + "movq (%1, %%"REG_a"), %%mm1 \n\t" + "movq (%2, %%"REG_a"), %%mm3 \n\t" "psadbw %%mm1, %%mm3 \n\t" "paddw %%mm3, %%mm0 \n\t" "paddw %%mm0, %%mm6 \n\t" - "addl %3, %%eax \n\t" + "add %3, %%"REG_a" \n\t" " js 1b \n\t" : "+a" (len) - : "r" (blk1 - len), "r" (blk2 - len), "r" (stride) + : "r" (blk1 - len), "r" (blk2 - len), "r" ((long)stride) ); } static inline void sad8_2_mmx2(uint8_t *blk1a, uint8_t *blk1b, uint8_t *blk2, int stride, int h) { - int len= -(stride*h); + long len= -(stride*h); asm volatile( ".balign 16 \n\t" "1: \n\t" - "movq (%1, %%eax), %%mm0 \n\t" - "movq (%2, %%eax), %%mm2 \n\t" + "movq (%1, %%"REG_a"), %%mm0 \n\t" + "movq (%2, %%"REG_a"), %%mm2 \n\t" "pavgb %%mm2, %%mm0 \n\t" - "movq (%3, %%eax), %%mm2 \n\t" + "movq (%3, %%"REG_a"), %%mm2 \n\t" "psadbw %%mm2, %%mm0 \n\t" - "addl %4, %%eax \n\t" - "movq (%1, %%eax), %%mm1 \n\t" - "movq (%2, %%eax), %%mm3 \n\t" + "add %4, %%"REG_a" \n\t" + "movq (%1, %%"REG_a"), %%mm1 \n\t" + "movq (%2, %%"REG_a"), %%mm3 \n\t" "pavgb %%mm1, %%mm3 \n\t" - "movq (%3, %%eax), %%mm1 \n\t" + "movq (%3, %%"REG_a"), %%mm1 \n\t" "psadbw %%mm1, %%mm3 \n\t" "paddw %%mm3, %%mm0 \n\t" "paddw %%mm0, %%mm6 \n\t" - "addl %4, %%eax \n\t" + "add %4, %%"REG_a" \n\t" " js 1b \n\t" : "+a" (len) - : "r" (blk1a - len), "r" (blk1b -len), "r" (blk2 - len), "r" (stride) + : "r" (blk1a - len), "r" (blk1b -len), "r" (blk2 - len), "r" ((long)stride) ); } static inline void sad8_4_mmx2(uint8_t *blk1, uint8_t *blk2, int stride, int h) { //FIXME reuse src - int len= -(stride*h); + long len= -(stride*h); asm volatile( ".balign 16 \n\t" "movq "MANGLE(bone)", %%mm5 \n\t" "1: \n\t" - "movq (%1, %%eax), %%mm0 \n\t" - "movq (%2, %%eax), %%mm2 \n\t" - "movq 1(%1, %%eax), %%mm1 \n\t" - "movq 1(%2, %%eax), %%mm3 \n\t" + "movq (%1, %%"REG_a"), %%mm0 \n\t" + "movq (%2, %%"REG_a"), %%mm2 \n\t" + "movq 1(%1, %%"REG_a"), %%mm1 \n\t" + "movq 1(%2, %%"REG_a"), %%mm3 \n\t" "pavgb %%mm2, %%mm0 \n\t" "pavgb %%mm1, %%mm3 \n\t" "psubusb %%mm5, %%mm3 \n\t" "pavgb %%mm3, %%mm0 \n\t" - "movq (%3, %%eax), %%mm2 \n\t" + "movq (%3, %%"REG_a"), %%mm2 \n\t" "psadbw %%mm2, %%mm0 \n\t" - "addl %4, %%eax \n\t" - "movq (%1, %%eax), %%mm1 \n\t" - "movq (%2, %%eax), %%mm3 \n\t" - "movq 1(%1, %%eax), %%mm2 \n\t" - "movq 1(%2, %%eax), %%mm4 \n\t" + "add %4, %%"REG_a" \n\t" + "movq (%1, %%"REG_a"), %%mm1 \n\t" + "movq (%2, %%"REG_a"), %%mm3 \n\t" + "movq 1(%1, %%"REG_a"), %%mm2 \n\t" + "movq 1(%2, %%"REG_a"), %%mm4 \n\t" "pavgb %%mm3, %%mm1 \n\t" "pavgb %%mm4, %%mm2 \n\t" "psubusb %%mm5, %%mm2 \n\t" "pavgb %%mm1, %%mm2 \n\t" - "movq (%3, %%eax), %%mm1 \n\t" + "movq (%3, %%"REG_a"), %%mm1 \n\t" "psadbw %%mm1, %%mm2 \n\t" "paddw %%mm2, %%mm0 \n\t" "paddw %%mm0, %%mm6 \n\t" - "addl %4, %%eax \n\t" + "add %4, %%"REG_a" \n\t" " js 1b \n\t" : "+a" (len) - : "r" (blk1 - len), "r" (blk1 - len + stride), "r" (blk2 - len), "r" (stride) + : "r" (blk1 - len), "r" (blk1 - len + stride), "r" (blk2 - len), "r" ((long)stride) ); } static inline void sad8_2_mmx(uint8_t *blk1a, uint8_t *blk1b, uint8_t *blk2, int stride, int h) { - int len= -(stride*h); + long len= -(stride*h); asm volatile( ".balign 16 \n\t" "1: \n\t" - "movq (%1, %%eax), %%mm0 \n\t" - "movq (%2, %%eax), %%mm1 \n\t" - "movq (%1, %%eax), %%mm2 \n\t" - "movq (%2, %%eax), %%mm3 \n\t" + "movq (%1, %%"REG_a"), %%mm0 \n\t" + "movq (%2, %%"REG_a"), %%mm1 \n\t" + "movq (%1, %%"REG_a"), %%mm2 \n\t" + "movq (%2, %%"REG_a"), %%mm3 \n\t" "punpcklbw %%mm7, %%mm0 \n\t" "punpcklbw %%mm7, %%mm1 \n\t" "punpckhbw %%mm7, %%mm2 \n\t" "punpckhbw %%mm7, %%mm3 \n\t" "paddw %%mm0, %%mm1 \n\t" "paddw %%mm2, %%mm3 \n\t" - "movq (%3, %%eax), %%mm4 \n\t" - "movq (%3, %%eax), %%mm2 \n\t" + "movq (%3, %%"REG_a"), %%mm4 \n\t" + "movq (%3, %%"REG_a"), %%mm2 \n\t" "paddw %%mm5, %%mm1 \n\t" "paddw %%mm5, %%mm3 \n\t" "psrlw $1, %%mm1 \n\t" @@ -181,21 +182,21 @@ static inline void sad8_2_mmx(uint8_t *blk1a, uint8_t *blk1b, uint8_t *blk2, int "punpckhbw %%mm7, %%mm1 \n\t" "paddw %%mm1, %%mm0 \n\t" "paddw %%mm0, %%mm6 \n\t" - "addl %4, %%eax \n\t" + "add %4, %%"REG_a" \n\t" " js 1b \n\t" : "+a" (len) - : "r" (blk1a - len), "r" (blk1b -len), "r" (blk2 - len), "r" (stride) + : "r" (blk1a - len), "r" (blk1b -len), "r" (blk2 - len), "r" ((long)stride) ); } static inline void sad8_4_mmx(uint8_t *blk1, uint8_t *blk2, int stride, int h) { - int len= -(stride*h); + long len= -(stride*h); asm volatile( ".balign 16 \n\t" "1: \n\t" - "movq (%1, %%eax), %%mm0 \n\t" - "movq (%2, %%eax), %%mm1 \n\t" + "movq (%1, %%"REG_a"), %%mm0 \n\t" + "movq (%2, %%"REG_a"), %%mm1 \n\t" "movq %%mm0, %%mm4 \n\t" "movq %%mm1, %%mm2 \n\t" "punpcklbw %%mm7, %%mm0 \n\t" @@ -204,8 +205,8 @@ static inline void sad8_4_mmx(uint8_t *blk1, uint8_t *blk2, int stride, int h) "punpckhbw %%mm7, %%mm2 \n\t" "paddw %%mm1, %%mm0 \n\t" "paddw %%mm2, %%mm4 \n\t" - "movq 1(%1, %%eax), %%mm2 \n\t" - "movq 1(%2, %%eax), %%mm3 \n\t" + "movq 1(%1, %%"REG_a"), %%mm2 \n\t" + "movq 1(%2, %%"REG_a"), %%mm3 \n\t" "movq %%mm2, %%mm1 \n\t" "punpcklbw %%mm7, %%mm2 \n\t" "punpckhbw %%mm7, %%mm1 \n\t" @@ -216,8 +217,8 @@ static inline void sad8_4_mmx(uint8_t *blk1, uint8_t *blk2, int stride, int h) "punpckhbw %%mm7, %%mm4 \n\t" "paddw %%mm3, %%mm2 \n\t" "paddw %%mm4, %%mm1 \n\t" - "movq (%3, %%eax), %%mm3 \n\t" - "movq (%3, %%eax), %%mm4 \n\t" + "movq (%3, %%"REG_a"), %%mm3 \n\t" + "movq (%3, %%"REG_a"), %%mm4 \n\t" "paddw %%mm5, %%mm2 \n\t" "paddw %%mm5, %%mm1 \n\t" "psrlw $2, %%mm2 \n\t" @@ -231,10 +232,10 @@ static inline void sad8_4_mmx(uint8_t *blk1, uint8_t *blk2, int stride, int h) "punpckhbw %%mm7, %%mm2 \n\t" "paddw %%mm2, %%mm0 \n\t" "paddw %%mm0, %%mm6 \n\t" - "addl %4, %%eax \n\t" + "add %4, %%"REG_a" \n\t" " js 1b \n\t" : "+a" (len) - : "r" (blk1 - len), "r" (blk1 -len + stride), "r" (blk2 - len), "r" (stride) + : "r" (blk1 - len), "r" (blk1 -len + stride), "r" (blk2 - len), "r" ((long)stride) ); } diff --git a/src/libffmpeg/libavcodec/i386/mpegvideo_mmx.c b/src/libffmpeg/libavcodec/i386/mpegvideo_mmx.c index f19de73d6..70c81f675 100644 --- a/src/libffmpeg/libavcodec/i386/mpegvideo_mmx.c +++ b/src/libffmpeg/libavcodec/i386/mpegvideo_mmx.c @@ -23,6 +23,7 @@ #include "../dsputil.h" #include "../mpegvideo.h" #include "../avcodec.h" +#include "mmx.h" extern uint8_t zigzag_direct_noperm[64]; extern uint16_t inv_zigzag_direct16[64]; @@ -34,7 +35,7 @@ static const unsigned long long int mm_wone __attribute__ ((aligned(8))) = 0x000 static void dct_unquantize_h263_intra_mmx(MpegEncContext *s, DCTELEM *block, int n, int qscale) { - int level, qmul, qadd, nCoeffs; + long level, qmul, qadd, nCoeffs; qmul = qscale << 1; @@ -97,7 +98,7 @@ asm volatile( "movq %%mm0, (%0, %3) \n\t" "movq %%mm1, 8(%0, %3) \n\t" - "addl $16, %3 \n\t" + "add $16, %3 \n\t" "jng 1b \n\t" ::"r" (block+nCoeffs), "g"(qmul), "g" (qadd), "r" (2*(-nCoeffs)) : "memory" @@ -109,7 +110,7 @@ asm volatile( static void dct_unquantize_h263_inter_mmx(MpegEncContext *s, DCTELEM *block, int n, int qscale) { - int qmul, qadd, nCoeffs; + long qmul, qadd, nCoeffs; qmul = qscale << 1; qadd = (qscale - 1) | 1; @@ -160,7 +161,7 @@ asm volatile( "movq %%mm0, (%0, %3) \n\t" "movq %%mm1, 8(%0, %3) \n\t" - "addl $16, %3 \n\t" + "add $16, %3 \n\t" "jng 1b \n\t" ::"r" (block+nCoeffs), "g"(qmul), "g" (qadd), "r" (2*(-nCoeffs)) : "memory" @@ -200,7 +201,7 @@ asm volatile( static void dct_unquantize_mpeg1_intra_mmx(MpegEncContext *s, DCTELEM *block, int n, int qscale) { - int nCoeffs; + long nCoeffs; const uint16_t *quant_matrix; int block0; @@ -220,13 +221,13 @@ asm volatile( "movd %2, %%mm6 \n\t" "packssdw %%mm6, %%mm6 \n\t" "packssdw %%mm6, %%mm6 \n\t" - "movl %3, %%eax \n\t" + "mov %3, %%"REG_a" \n\t" ".balign 16\n\t" "1: \n\t" - "movq (%0, %%eax), %%mm0 \n\t" - "movq 8(%0, %%eax), %%mm1 \n\t" - "movq (%1, %%eax), %%mm4 \n\t" - "movq 8(%1, %%eax), %%mm5 \n\t" + "movq (%0, %%"REG_a"), %%mm0 \n\t" + "movq 8(%0, %%"REG_a"), %%mm1 \n\t" + "movq (%1, %%"REG_a"), %%mm4 \n\t" + "movq 8(%1, %%"REG_a"), %%mm5 \n\t" "pmullw %%mm6, %%mm4 \n\t" // q=qscale*quant_matrix[i] "pmullw %%mm6, %%mm5 \n\t" // q=qscale*quant_matrix[i] "pxor %%mm2, %%mm2 \n\t" @@ -241,8 +242,8 @@ asm volatile( "pmullw %%mm5, %%mm1 \n\t" // abs(block[i])*q "pxor %%mm4, %%mm4 \n\t" "pxor %%mm5, %%mm5 \n\t" // FIXME slow - "pcmpeqw (%0, %%eax), %%mm4 \n\t" // block[i] == 0 ? -1 : 0 - "pcmpeqw 8(%0, %%eax), %%mm5 \n\t" // block[i] == 0 ? -1 : 0 + "pcmpeqw (%0, %%"REG_a"), %%mm4 \n\t" // block[i] == 0 ? -1 : 0 + "pcmpeqw 8(%0, %%"REG_a"), %%mm5\n\t" // block[i] == 0 ? -1 : 0 "psraw $3, %%mm0 \n\t" "psraw $3, %%mm1 \n\t" "psubw %%mm7, %%mm0 \n\t" @@ -255,13 +256,13 @@ asm volatile( "psubw %%mm3, %%mm1 \n\t" "pandn %%mm0, %%mm4 \n\t" "pandn %%mm1, %%mm5 \n\t" - "movq %%mm4, (%0, %%eax) \n\t" - "movq %%mm5, 8(%0, %%eax) \n\t" + "movq %%mm4, (%0, %%"REG_a") \n\t" + "movq %%mm5, 8(%0, %%"REG_a") \n\t" - "addl $16, %%eax \n\t" + "add $16, %%"REG_a" \n\t" "js 1b \n\t" ::"r" (block+nCoeffs), "r"(quant_matrix+nCoeffs), "g" (qscale), "g" (-2*nCoeffs) - : "%eax", "memory" + : "%"REG_a, "memory" ); block[0]= block0; } @@ -269,7 +270,7 @@ asm volatile( static void dct_unquantize_mpeg1_inter_mmx(MpegEncContext *s, DCTELEM *block, int n, int qscale) { - int nCoeffs; + long nCoeffs; const uint16_t *quant_matrix; assert(s->block_last_index[n]>=0); @@ -283,13 +284,13 @@ asm volatile( "movd %2, %%mm6 \n\t" "packssdw %%mm6, %%mm6 \n\t" "packssdw %%mm6, %%mm6 \n\t" - "movl %3, %%eax \n\t" + "mov %3, %%"REG_a" \n\t" ".balign 16\n\t" "1: \n\t" - "movq (%0, %%eax), %%mm0 \n\t" - "movq 8(%0, %%eax), %%mm1 \n\t" - "movq (%1, %%eax), %%mm4 \n\t" - "movq 8(%1, %%eax), %%mm5 \n\t" + "movq (%0, %%"REG_a"), %%mm0 \n\t" + "movq 8(%0, %%"REG_a"), %%mm1 \n\t" + "movq (%1, %%"REG_a"), %%mm4 \n\t" + "movq 8(%1, %%"REG_a"), %%mm5 \n\t" "pmullw %%mm6, %%mm4 \n\t" // q=qscale*quant_matrix[i] "pmullw %%mm6, %%mm5 \n\t" // q=qscale*quant_matrix[i] "pxor %%mm2, %%mm2 \n\t" @@ -308,8 +309,8 @@ asm volatile( "pmullw %%mm5, %%mm1 \n\t" // (abs(block[i])*2 + 1)*q "pxor %%mm4, %%mm4 \n\t" "pxor %%mm5, %%mm5 \n\t" // FIXME slow - "pcmpeqw (%0, %%eax), %%mm4 \n\t" // block[i] == 0 ? -1 : 0 - "pcmpeqw 8(%0, %%eax), %%mm5 \n\t" // block[i] == 0 ? -1 : 0 + "pcmpeqw (%0, %%"REG_a"), %%mm4 \n\t" // block[i] == 0 ? -1 : 0 + "pcmpeqw 8(%0, %%"REG_a"), %%mm5\n\t" // block[i] == 0 ? -1 : 0 "psraw $4, %%mm0 \n\t" "psraw $4, %%mm1 \n\t" "psubw %%mm7, %%mm0 \n\t" @@ -322,20 +323,20 @@ asm volatile( "psubw %%mm3, %%mm1 \n\t" "pandn %%mm0, %%mm4 \n\t" "pandn %%mm1, %%mm5 \n\t" - "movq %%mm4, (%0, %%eax) \n\t" - "movq %%mm5, 8(%0, %%eax) \n\t" + "movq %%mm4, (%0, %%"REG_a") \n\t" + "movq %%mm5, 8(%0, %%"REG_a") \n\t" - "addl $16, %%eax \n\t" + "add $16, %%"REG_a" \n\t" "js 1b \n\t" ::"r" (block+nCoeffs), "r"(quant_matrix+nCoeffs), "g" (qscale), "g" (-2*nCoeffs) - : "%eax", "memory" + : "%"REG_a, "memory" ); } static void dct_unquantize_mpeg2_intra_mmx(MpegEncContext *s, DCTELEM *block, int n, int qscale) { - int nCoeffs; + long nCoeffs; const uint16_t *quant_matrix; int block0; @@ -355,13 +356,13 @@ asm volatile( "movd %2, %%mm6 \n\t" "packssdw %%mm6, %%mm6 \n\t" "packssdw %%mm6, %%mm6 \n\t" - "movl %3, %%eax \n\t" + "mov %3, %%"REG_a" \n\t" ".balign 16\n\t" "1: \n\t" - "movq (%0, %%eax), %%mm0 \n\t" - "movq 8(%0, %%eax), %%mm1 \n\t" - "movq (%1, %%eax), %%mm4 \n\t" - "movq 8(%1, %%eax), %%mm5 \n\t" + "movq (%0, %%"REG_a"), %%mm0 \n\t" + "movq 8(%0, %%"REG_a"), %%mm1 \n\t" + "movq (%1, %%"REG_a"), %%mm4 \n\t" + "movq 8(%1, %%"REG_a"), %%mm5 \n\t" "pmullw %%mm6, %%mm4 \n\t" // q=qscale*quant_matrix[i] "pmullw %%mm6, %%mm5 \n\t" // q=qscale*quant_matrix[i] "pxor %%mm2, %%mm2 \n\t" @@ -376,8 +377,8 @@ asm volatile( "pmullw %%mm5, %%mm1 \n\t" // abs(block[i])*q "pxor %%mm4, %%mm4 \n\t" "pxor %%mm5, %%mm5 \n\t" // FIXME slow - "pcmpeqw (%0, %%eax), %%mm4 \n\t" // block[i] == 0 ? -1 : 0 - "pcmpeqw 8(%0, %%eax), %%mm5 \n\t" // block[i] == 0 ? -1 : 0 + "pcmpeqw (%0, %%"REG_a"), %%mm4 \n\t" // block[i] == 0 ? -1 : 0 + "pcmpeqw 8(%0, %%"REG_a"), %%mm5\n\t" // block[i] == 0 ? -1 : 0 "psraw $3, %%mm0 \n\t" "psraw $3, %%mm1 \n\t" "pxor %%mm2, %%mm0 \n\t" @@ -386,13 +387,13 @@ asm volatile( "psubw %%mm3, %%mm1 \n\t" "pandn %%mm0, %%mm4 \n\t" "pandn %%mm1, %%mm5 \n\t" - "movq %%mm4, (%0, %%eax) \n\t" - "movq %%mm5, 8(%0, %%eax) \n\t" + "movq %%mm4, (%0, %%"REG_a") \n\t" + "movq %%mm5, 8(%0, %%"REG_a") \n\t" - "addl $16, %%eax \n\t" + "add $16, %%"REG_a" \n\t" "jng 1b \n\t" ::"r" (block+nCoeffs), "r"(quant_matrix+nCoeffs), "g" (qscale), "g" (-2*nCoeffs) - : "%eax", "memory" + : "%"REG_a, "memory" ); block[0]= block0; //Note, we dont do mismatch control for intra as errors cannot accumulate @@ -401,7 +402,7 @@ asm volatile( static void dct_unquantize_mpeg2_inter_mmx(MpegEncContext *s, DCTELEM *block, int n, int qscale) { - int nCoeffs; + long nCoeffs; const uint16_t *quant_matrix; assert(s->block_last_index[n]>=0); @@ -416,13 +417,13 @@ asm volatile( "movd %2, %%mm6 \n\t" "packssdw %%mm6, %%mm6 \n\t" "packssdw %%mm6, %%mm6 \n\t" - "movl %3, %%eax \n\t" + "mov %3, %%"REG_a" \n\t" ".balign 16\n\t" "1: \n\t" - "movq (%0, %%eax), %%mm0 \n\t" - "movq 8(%0, %%eax), %%mm1 \n\t" - "movq (%1, %%eax), %%mm4 \n\t" - "movq 8(%1, %%eax), %%mm5 \n\t" + "movq (%0, %%"REG_a"), %%mm0 \n\t" + "movq 8(%0, %%"REG_a"), %%mm1 \n\t" + "movq (%1, %%"REG_a"), %%mm4 \n\t" + "movq 8(%1, %%"REG_a"), %%mm5 \n\t" "pmullw %%mm6, %%mm4 \n\t" // q=qscale*quant_matrix[i] "pmullw %%mm6, %%mm5 \n\t" // q=qscale*quant_matrix[i] "pxor %%mm2, %%mm2 \n\t" @@ -441,8 +442,8 @@ asm volatile( "paddw %%mm5, %%mm1 \n\t" // (abs(block[i])*2 + 1)*q "pxor %%mm4, %%mm4 \n\t" "pxor %%mm5, %%mm5 \n\t" // FIXME slow - "pcmpeqw (%0, %%eax), %%mm4 \n\t" // block[i] == 0 ? -1 : 0 - "pcmpeqw 8(%0, %%eax), %%mm5 \n\t" // block[i] == 0 ? -1 : 0 + "pcmpeqw (%0, %%"REG_a"), %%mm4 \n\t" // block[i] == 0 ? -1 : 0 + "pcmpeqw 8(%0, %%"REG_a"), %%mm5\n\t" // block[i] == 0 ? -1 : 0 "psrlw $4, %%mm0 \n\t" "psrlw $4, %%mm1 \n\t" "pxor %%mm2, %%mm0 \n\t" @@ -453,10 +454,10 @@ asm volatile( "pandn %%mm1, %%mm5 \n\t" "pxor %%mm4, %%mm7 \n\t" "pxor %%mm5, %%mm7 \n\t" - "movq %%mm4, (%0, %%eax) \n\t" - "movq %%mm5, 8(%0, %%eax) \n\t" + "movq %%mm4, (%0, %%"REG_a") \n\t" + "movq %%mm5, 8(%0, %%"REG_a") \n\t" - "addl $16, %%eax \n\t" + "add $16, %%"REG_a" \n\t" "jng 1b \n\t" "movd 124(%0, %3), %%mm0 \n\t" "movq %%mm7, %%mm6 \n\t" @@ -471,7 +472,7 @@ asm volatile( "movd %%mm0, 124(%0, %3) \n\t" ::"r" (block+nCoeffs), "r"(quant_matrix+nCoeffs), "g" (qscale), "r" (-2*nCoeffs) - : "%eax", "memory" + : "%"REG_a, "memory" ); } @@ -499,11 +500,11 @@ static void draw_edges_mmx(uint8_t *buf, int wrap, int width, int height, int w) "punpckhwd %%mm1, %%mm1 \n\t" "punpckhdq %%mm1, %%mm1 \n\t" "movq %%mm1, (%0, %2) \n\t" - "addl %1, %0 \n\t" - "cmpl %3, %0 \n\t" + "add %1, %0 \n\t" + "cmp %3, %0 \n\t" " jb 1b \n\t" : "+r" (ptr) - : "r" (wrap), "r" (width), "r" (ptr + wrap*height) + : "r" ((long)wrap), "r" ((long)width), "r" (ptr + wrap*height) ); } else @@ -522,11 +523,11 @@ static void draw_edges_mmx(uint8_t *buf, int wrap, int width, int height, int w) "punpckhdq %%mm1, %%mm1 \n\t" "movq %%mm1, (%0, %2) \n\t" "movq %%mm1, 8(%0, %2) \n\t" - "addl %1, %0 \n\t" - "cmpl %3, %0 \n\t" + "add %1, %0 \n\t" + "cmp %3, %0 \n\t" " jb 1b \n\t" : "+r" (ptr) - : "r" (wrap), "r" (width), "r" (ptr + wrap*height) + : "r" ((long)wrap), "r" ((long)width), "r" (ptr + wrap*height) ); } @@ -540,11 +541,11 @@ static void draw_edges_mmx(uint8_t *buf, int wrap, int width, int height, int w) "movq %%mm0, (%0, %2) \n\t" "movq %%mm0, (%0, %2, 2) \n\t" "movq %%mm0, (%0, %3) \n\t" - "addl $8, %0 \n\t" - "cmpl %4, %0 \n\t" + "add $8, %0 \n\t" + "cmp %4, %0 \n\t" " jb 1b \n\t" : "+r" (ptr) - : "r" ((int)buf - (int)ptr - w), "r" (-wrap), "r" (-wrap*3), "r" (ptr+width+2*w) + : "r" ((long)buf - (long)ptr - w), "r" ((long)-wrap), "r" ((long)-wrap*3), "r" (ptr+width+2*w) ); ptr= last_line + (i + 1) * wrap - w; asm volatile( @@ -554,11 +555,11 @@ static void draw_edges_mmx(uint8_t *buf, int wrap, int width, int height, int w) "movq %%mm0, (%0, %2) \n\t" "movq %%mm0, (%0, %2, 2) \n\t" "movq %%mm0, (%0, %3) \n\t" - "addl $8, %0 \n\t" - "cmpl %4, %0 \n\t" + "add $8, %0 \n\t" + "cmp %4, %0 \n\t" " jb 1b \n\t" : "+r" (ptr) - : "r" ((int)last_line - (int)ptr - w), "r" (wrap), "r" (wrap*3), "r" (ptr+width+2*w) + : "r" ((long)last_line - (long)ptr - w), "r" ((long)wrap), "r" ((long)wrap*3), "r" (ptr+width+2*w) ); } } @@ -607,10 +608,10 @@ static void denoise_dct_mmx(MpegEncContext *s, DCTELEM *block){ "movq %%mm2, 8(%1) \n\t" "movq %%mm5, 16(%1) \n\t" "movq %%mm3, 24(%1) \n\t" - "addl $16, %0 \n\t" - "addl $32, %1 \n\t" - "addl $16, %2 \n\t" - "cmpl %3, %0 \n\t" + "add $16, %0 \n\t" + "add $32, %1 \n\t" + "add $16, %2 \n\t" + "cmp %3, %0 \n\t" " jb 1b \n\t" : "+r" (block), "+r" (sum), "+r" (offset) : "r"(block+64) @@ -661,10 +662,10 @@ static void denoise_dct_sse2(MpegEncContext *s, DCTELEM *block){ "movdqa %%xmm6, 16(%1) \n\t" "movdqa %%xmm5, 32(%1) \n\t" "movdqa %%xmm0, 48(%1) \n\t" - "addl $32, %0 \n\t" - "addl $64, %1 \n\t" - "addl $32, %2 \n\t" - "cmpl %3, %0 \n\t" + "add $32, %0 \n\t" + "add $64, %1 \n\t" + "add $32, %2 \n\t" + "cmp %3, %0 \n\t" " jb 1b \n\t" : "+r" (block), "+r" (sum), "+r" (offset) : "r"(block+64) diff --git a/src/libffmpeg/libavcodec/i386/mpegvideo_mmx_template.c b/src/libffmpeg/libavcodec/i386/mpegvideo_mmx_template.c index d4ed61ecb..c9354dc1b 100644 --- a/src/libffmpeg/libavcodec/i386/mpegvideo_mmx_template.c +++ b/src/libffmpeg/libavcodec/i386/mpegvideo_mmx_template.c @@ -36,7 +36,8 @@ static int RENAME(dct_quantize)(MpegEncContext *s, DCTELEM *block, int n, int qscale, int *overflow) { - int level=0, last_non_zero_p1, q; //=0 is cuz gcc says uninitalized ... + long last_non_zero_p1; + int level=0, q; //=0 is cuz gcc says uninitalized ... const uint16_t *qmat, *bias; __align8 int16_t temp_block[64]; @@ -58,7 +59,7 @@ static int RENAME(dct_quantize)(MpegEncContext *s, if (!s->h263_aic) { #if 1 asm volatile ( - "imul %%ecx \n\t" + "mul %%ecx \n\t" : "=d" (level), "=a"(dummy) : "a" ((block[0]>>2) + q), "c" (inverse[q<<1]) ); @@ -87,21 +88,21 @@ static int RENAME(dct_quantize)(MpegEncContext *s, qmat = s->q_inter_matrix16[qscale][0]; } - if(s->out_format == FMT_H263 && s->mpeg_quant==0){ + if((s->out_format == FMT_H263 || s->out_format == FMT_H261) && s->mpeg_quant==0){ asm volatile( - "movd %%eax, %%mm3 \n\t" // last_non_zero_p1 + "movd %%"REG_a", %%mm3 \n\t" // last_non_zero_p1 SPREADW(%%mm3) "pxor %%mm7, %%mm7 \n\t" // 0 "pxor %%mm4, %%mm4 \n\t" // 0 "movq (%2), %%mm5 \n\t" // qmat[0] "pxor %%mm6, %%mm6 \n\t" "psubw (%3), %%mm6 \n\t" // -bias[0] - "movl $-128, %%eax \n\t" + "mov $-128, %%"REG_a" \n\t" ".balign 16 \n\t" "1: \n\t" "pxor %%mm1, %%mm1 \n\t" // 0 - "movq (%1, %%eax), %%mm0 \n\t" // block[i] + "movq (%1, %%"REG_a"), %%mm0 \n\t" // block[i] "pcmpgtw %%mm0, %%mm1 \n\t" // block[i] <= 0 ? 0xFF : 0x00 "pxor %%mm1, %%mm0 \n\t" "psubw %%mm1, %%mm0 \n\t" // ABS(block[i]) @@ -110,13 +111,13 @@ static int RENAME(dct_quantize)(MpegEncContext *s, "por %%mm0, %%mm4 \n\t" "pxor %%mm1, %%mm0 \n\t" "psubw %%mm1, %%mm0 \n\t" // out=((ABS(block[i])*qmat[0] - bias[0]*qmat[0])>>16)*sign(block[i]) - "movq %%mm0, (%5, %%eax) \n\t" + "movq %%mm0, (%5, %%"REG_a") \n\t" "pcmpeqw %%mm7, %%mm0 \n\t" // out==0 ? 0xFF : 0x00 - "movq (%4, %%eax), %%mm1 \n\t" - "movq %%mm7, (%1, %%eax) \n\t" // 0 + "movq (%4, %%"REG_a"), %%mm1 \n\t" + "movq %%mm7, (%1, %%"REG_a") \n\t" // 0 "pandn %%mm1, %%mm0 \n\t" PMAXW(%%mm0, %%mm3) - "addl $8, %%eax \n\t" + "add $8, %%"REG_a" \n\t" " js 1b \n\t" "movq %%mm3, %%mm0 \n\t" "psrlq $32, %%mm3 \n\t" @@ -124,8 +125,8 @@ static int RENAME(dct_quantize)(MpegEncContext *s, "movq %%mm3, %%mm0 \n\t" "psrlq $16, %%mm3 \n\t" PMAXW(%%mm0, %%mm3) - "movd %%mm3, %%eax \n\t" - "movzbl %%al, %%eax \n\t" // last_non_zero_p1 + "movd %%mm3, %%"REG_a" \n\t" + "movzb %%al, %%"REG_a" \n\t" // last_non_zero_p1 : "+a" (last_non_zero_p1) : "r" (block+64), "r" (qmat), "r" (bias), "r" (inv_zigzag_direct16+64), "r" (temp_block+64) @@ -142,32 +143,32 @@ static int RENAME(dct_quantize)(MpegEncContext *s, ); }else{ // FMT_H263 asm volatile( - "movd %%eax, %%mm3 \n\t" // last_non_zero_p1 + "movd %%"REG_a", %%mm3 \n\t" // last_non_zero_p1 SPREADW(%%mm3) "pxor %%mm7, %%mm7 \n\t" // 0 "pxor %%mm4, %%mm4 \n\t" // 0 - "movl $-128, %%eax \n\t" + "mov $-128, %%"REG_a" \n\t" ".balign 16 \n\t" "1: \n\t" "pxor %%mm1, %%mm1 \n\t" // 0 - "movq (%1, %%eax), %%mm0 \n\t" // block[i] + "movq (%1, %%"REG_a"), %%mm0 \n\t" // block[i] "pcmpgtw %%mm0, %%mm1 \n\t" // block[i] <= 0 ? 0xFF : 0x00 "pxor %%mm1, %%mm0 \n\t" "psubw %%mm1, %%mm0 \n\t" // ABS(block[i]) - "movq (%3, %%eax), %%mm6 \n\t" // bias[0] + "movq (%3, %%"REG_a"), %%mm6 \n\t" // bias[0] "paddusw %%mm6, %%mm0 \n\t" // ABS(block[i]) + bias[0] - "movq (%2, %%eax), %%mm5 \n\t" // qmat[i] + "movq (%2, %%"REG_a"), %%mm5 \n\t" // qmat[i] "pmulhw %%mm5, %%mm0 \n\t" // (ABS(block[i])*qmat[0] + bias[0]*qmat[0])>>16 "por %%mm0, %%mm4 \n\t" "pxor %%mm1, %%mm0 \n\t" "psubw %%mm1, %%mm0 \n\t" // out=((ABS(block[i])*qmat[0] - bias[0]*qmat[0])>>16)*sign(block[i]) - "movq %%mm0, (%5, %%eax) \n\t" + "movq %%mm0, (%5, %%"REG_a") \n\t" "pcmpeqw %%mm7, %%mm0 \n\t" // out==0 ? 0xFF : 0x00 - "movq (%4, %%eax), %%mm1 \n\t" - "movq %%mm7, (%1, %%eax) \n\t" // 0 + "movq (%4, %%"REG_a"), %%mm1 \n\t" + "movq %%mm7, (%1, %%"REG_a") \n\t" // 0 "pandn %%mm1, %%mm0 \n\t" PMAXW(%%mm0, %%mm3) - "addl $8, %%eax \n\t" + "add $8, %%"REG_a" \n\t" " js 1b \n\t" "movq %%mm3, %%mm0 \n\t" "psrlq $32, %%mm3 \n\t" @@ -175,8 +176,8 @@ static int RENAME(dct_quantize)(MpegEncContext *s, "movq %%mm3, %%mm0 \n\t" "psrlq $16, %%mm3 \n\t" PMAXW(%%mm0, %%mm3) - "movd %%mm3, %%eax \n\t" - "movzbl %%al, %%eax \n\t" // last_non_zero_p1 + "movd %%mm3, %%"REG_a" \n\t" + "movzb %%al, %%"REG_a" \n\t" // last_non_zero_p1 : "+a" (last_non_zero_p1) : "r" (block+64), "r" (qmat+64), "r" (bias+64), "r" (inv_zigzag_direct16+64), "r" (temp_block+64) diff --git a/src/libffmpeg/libavcodec/idcinvideo.c b/src/libffmpeg/libavcodec/idcinvideo.c index e53246bd1..f5df5a49d 100644 --- a/src/libffmpeg/libavcodec/idcinvideo.c +++ b/src/libffmpeg/libavcodec/idcinvideo.c @@ -192,7 +192,7 @@ static void idcin_decode_vlcs(IdcinContext *s) while(node_num >= HUF_TOKENS) { if(!bit_pos) { - if(dat_pos > s->size) { + if(dat_pos >= s->size) { av_log(s->avctx, AV_LOG_ERROR, "Huffman decode error.\n"); return; } diff --git a/src/libffmpeg/libavcodec/imgconvert.c b/src/libffmpeg/libavcodec/imgconvert.c index b351d2219..1ba723a95 100644 --- a/src/libffmpeg/libavcodec/imgconvert.c +++ b/src/libffmpeg/libavcodec/imgconvert.c @@ -97,6 +97,14 @@ static PixFmtInfo pix_fmt_info[PIX_FMT_NB] = { .depth = 8, .x_chroma_shift = 1, .y_chroma_shift = 0, }, + [PIX_FMT_UYVY422] = { + .name = "uyvy422", + .nb_channels = 1, + .color_type = FF_COLOR_YUV, + .pixel_type = FF_PIXEL_PACKED, + .depth = 8, + .x_chroma_shift = 1, .y_chroma_shift = 0, + }, [PIX_FMT_YUV410P] = { .name = "yuv410p", .nb_channels = 3, @@ -213,6 +221,20 @@ static PixFmtInfo pix_fmt_info[PIX_FMT_NB] = { .pixel_type = FF_PIXEL_PALETTE, .depth = 8, }, + [PIX_FMT_XVMC_MPEG2_MC] = { + .name = "xvmcmc", + }, + [PIX_FMT_XVMC_MPEG2_IDCT] = { + .name = "xvmcidct", + }, + [PIX_FMT_UYVY411] = { + .name = "uyvy411", + .nb_channels = 1, + .color_type = FF_COLOR_YUV, + .pixel_type = FF_PIXEL_PACKED, + .depth = 8, + .x_chroma_shift = 2, .y_chroma_shift = 0, + }, }; void avcodec_get_chroma_sub_sample(int pix_fmt, int *h_shift, int *v_shift) @@ -246,6 +268,9 @@ int avpicture_fill(AVPicture *picture, uint8_t *ptr, int size, w2, h2, size2; PixFmtInfo *pinfo; + if(avcodec_check_dimensions(NULL, width, height)) + goto fail; + pinfo = &pix_fmt_info[pix_fmt]; size = width * height; switch(pix_fmt) { @@ -288,6 +313,18 @@ int avpicture_fill(AVPicture *picture, uint8_t *ptr, picture->data[2] = NULL; picture->linesize[0] = width * 2; return size * 2; + case PIX_FMT_UYVY422: + picture->data[0] = ptr; + picture->data[1] = NULL; + picture->data[2] = NULL; + picture->linesize[0] = width * 2; + return size * 2; + case PIX_FMT_UYVY411: + picture->data[0] = ptr; + picture->data[1] = NULL; + picture->data[2] = NULL; + picture->linesize[0] = width + width/2; + return size + size/2; case PIX_FMT_GRAY8: picture->data[0] = ptr; picture->data[1] = NULL; @@ -310,6 +347,7 @@ int avpicture_fill(AVPicture *picture, uint8_t *ptr, picture->linesize[1] = 4; return size2 + 256 * 4; default: +fail: picture->data[0] = NULL; picture->data[1] = NULL; picture->data[2] = NULL; @@ -326,13 +364,17 @@ int avpicture_layout(const AVPicture* src, int pix_fmt, int width, int height, const unsigned char* s; int size = avpicture_get_size(pix_fmt, width, height); - if (size > dest_size) + if (size > dest_size || size < 0) return -1; if (pf->pixel_type == FF_PIXEL_PACKED || pf->pixel_type == FF_PIXEL_PALETTE) { - if (pix_fmt == PIX_FMT_YUV422 || pix_fmt == PIX_FMT_RGB565 || - pix_fmt == PIX_FMT_RGB555) - w = width * 2; + if (pix_fmt == PIX_FMT_YUV422 || + pix_fmt == PIX_FMT_UYVY422 || + pix_fmt == PIX_FMT_RGB565 || + pix_fmt == PIX_FMT_RGB555) + w = width * 2; + else if (pix_fmt == PIX_FMT_UYVY411) + w = width + width/2; else if (pix_fmt == PIX_FMT_PAL8) w = width; else @@ -342,7 +384,7 @@ int avpicture_layout(const AVPicture* src, int pix_fmt, int width, int height, h = height; } else { data_planes = pf->nb_channels; - w = width; + w = (width*pf->depth + 7)/8; h = height; } @@ -439,10 +481,14 @@ static int avg_bits_per_pixel(int pix_fmt) case FF_PIXEL_PACKED: switch(pix_fmt) { case PIX_FMT_YUV422: + case PIX_FMT_UYVY422: case PIX_FMT_RGB565: case PIX_FMT_RGB555: bits = 16; break; + case PIX_FMT_UYVY411: + bits = 12; + break; default: bits = pf->depth * pf->nb_channels; break; @@ -551,10 +597,14 @@ void img_copy(AVPicture *dst, const AVPicture *src, case FF_PIXEL_PACKED: switch(pix_fmt) { case PIX_FMT_YUV422: + case PIX_FMT_UYVY422: case PIX_FMT_RGB565: case PIX_FMT_RGB555: bits = 16; break; + case PIX_FMT_UYVY411: + bits = 12; + break; default: bits = pf->depth * pf->nb_channels; break; @@ -649,6 +699,98 @@ static void yuv422_to_yuv420p(AVPicture *dst, const AVPicture *src, } } +static void uyvy422_to_yuv420p(AVPicture *dst, const AVPicture *src, + int width, int height) +{ + const uint8_t *p, *p1; + uint8_t *lum, *cr, *cb, *lum1, *cr1, *cb1; + int w; + + p1 = src->data[0]; + + lum1 = dst->data[0]; + cb1 = dst->data[1]; + cr1 = dst->data[2]; + + for(;height >= 1; height -= 2) { + p = p1; + lum = lum1; + cb = cb1; + cr = cr1; + for(w = width; w >= 2; w -= 2) { + lum[0] = p[1]; + cb[0] = p[0]; + lum[1] = p[3]; + cr[0] = p[2]; + p += 4; + lum += 2; + cb++; + cr++; + } + if (w) { + lum[0] = p[1]; + cb[0] = p[0]; + cr[0] = p[2]; + cb++; + cr++; + } + p1 += src->linesize[0]; + lum1 += dst->linesize[0]; + if (height>1) { + p = p1; + lum = lum1; + for(w = width; w >= 2; w -= 2) { + lum[0] = p[1]; + lum[1] = p[3]; + p += 4; + lum += 2; + } + if (w) { + lum[0] = p[1]; + } + p1 += src->linesize[0]; + lum1 += dst->linesize[0]; + } + cb1 += dst->linesize[1]; + cr1 += dst->linesize[2]; + } +} + + +static void uyvy422_to_yuv422p(AVPicture *dst, const AVPicture *src, + int width, int height) +{ + const uint8_t *p, *p1; + uint8_t *lum, *cr, *cb, *lum1, *cr1, *cb1; + int w; + + p1 = src->data[0]; + lum1 = dst->data[0]; + cb1 = dst->data[1]; + cr1 = dst->data[2]; + for(;height > 0; height--) { + p = p1; + lum = lum1; + cb = cb1; + cr = cr1; + for(w = width; w >= 2; w -= 2) { + lum[0] = p[1]; + cb[0] = p[0]; + lum[1] = p[3]; + cr[0] = p[2]; + p += 4; + lum += 2; + cb++; + cr++; + } + p1 += src->linesize[0]; + lum1 += dst->linesize[0]; + cb1 += dst->linesize[1]; + cr1 += dst->linesize[2]; + } +} + + static void yuv422_to_yuv422p(AVPicture *dst, const AVPicture *src, int width, int height) { @@ -715,6 +857,141 @@ static void yuv422p_to_yuv422(AVPicture *dst, const AVPicture *src, } } +static void yuv422p_to_uyvy422(AVPicture *dst, const AVPicture *src, + int width, int height) +{ + uint8_t *p, *p1; + const uint8_t *lum, *cr, *cb, *lum1, *cr1, *cb1; + int w; + + p1 = dst->data[0]; + lum1 = src->data[0]; + cb1 = src->data[1]; + cr1 = src->data[2]; + for(;height > 0; height--) { + p = p1; + lum = lum1; + cb = cb1; + cr = cr1; + for(w = width; w >= 2; w -= 2) { + p[1] = lum[0]; + p[0] = cb[0]; + p[3] = lum[1]; + p[2] = cr[0]; + p += 4; + lum += 2; + cb++; + cr++; + } + p1 += dst->linesize[0]; + lum1 += src->linesize[0]; + cb1 += src->linesize[1]; + cr1 += src->linesize[2]; + } +} + +static void uyvy411_to_yuv411p(AVPicture *dst, const AVPicture *src, + int width, int height) +{ + const uint8_t *p, *p1; + uint8_t *lum, *cr, *cb, *lum1, *cr1, *cb1; + int w; + + p1 = src->data[0]; + lum1 = dst->data[0]; + cb1 = dst->data[1]; + cr1 = dst->data[2]; + for(;height > 0; height--) { + p = p1; + lum = lum1; + cb = cb1; + cr = cr1; + for(w = width; w >= 4; w -= 4) { + cb[0] = p[0]; + lum[0] = p[1]; + lum[1] = p[2]; + cr[0] = p[3]; + lum[2] = p[4]; + lum[3] = p[5]; + p += 6; + lum += 4; + cb++; + cr++; + } + p1 += src->linesize[0]; + lum1 += dst->linesize[0]; + cb1 += dst->linesize[1]; + cr1 += dst->linesize[2]; + } +} + + +static void yuv420p_to_yuv422(AVPicture *dst, const AVPicture *src, + int width, int height) +{ + int w, h; + uint8_t *line1, *line2, *linesrc = dst->data[0]; + uint8_t *lum1, *lum2, *lumsrc = src->data[0]; + uint8_t *cb1, *cb2 = src->data[1]; + uint8_t *cr1, *cr2 = src->data[2]; + + for(h = height / 2; h--;) { + line1 = linesrc; + line2 = linesrc + dst->linesize[0]; + + lum1 = lumsrc; + lum2 = lumsrc + src->linesize[0]; + + cb1 = cb2; + cr1 = cr2; + + for(w = width / 2; w--;) { + *line1++ = *lum1++; *line2++ = *lum2++; + *line1++ = *line2++ = *cb1++; + *line1++ = *lum1++; *line2++ = *lum2++; + *line1++ = *line2++ = *cr1++; + } + + linesrc += dst->linesize[0] * 2; + lumsrc += src->linesize[0] * 2; + cb2 += src->linesize[1]; + cr2 += src->linesize[2]; + } +} + +static void yuv420p_to_uyvy422(AVPicture *dst, const AVPicture *src, + int width, int height) +{ + int w, h; + uint8_t *line1, *line2, *linesrc = dst->data[0]; + uint8_t *lum1, *lum2, *lumsrc = src->data[0]; + uint8_t *cb1, *cb2 = src->data[1]; + uint8_t *cr1, *cr2 = src->data[2]; + + for(h = height / 2; h--;) { + line1 = linesrc; + line2 = linesrc + dst->linesize[0]; + + lum1 = lumsrc; + lum2 = lumsrc + src->linesize[0]; + + cb1 = cb2; + cr1 = cr2; + + for(w = width / 2; w--;) { + *line1++ = *line2++ = *cb1++; + *line1++ = *lum1++; *line2++ = *lum2++; + *line1++ = *line2++ = *cr1++; + *line1++ = *lum1++; *line2++ = *lum2++; + } + + linesrc += dst->linesize[0] * 2; + lumsrc += src->linesize[0] * 2; + cb2 += src->linesize[1]; + cr2 += src->linesize[2]; + } +} + #define SCALEBITS 10 #define ONE_HALF (1 << (SCALEBITS - 1)) #define FIX(x) ((int) ((x) * (1<<SCALEBITS) + 0.5)) @@ -1424,6 +1701,9 @@ typedef struct ConvertEntry { */ static ConvertEntry convert_table[PIX_FMT_NB][PIX_FMT_NB] = { [PIX_FMT_YUV420P] = { + [PIX_FMT_YUV422] = { + .convert = yuv420p_to_yuv422, + }, [PIX_FMT_RGB555] = { .convert = yuv420p_to_rgb555 }, @@ -1439,11 +1719,17 @@ static ConvertEntry convert_table[PIX_FMT_NB][PIX_FMT_NB] = { [PIX_FMT_RGBA32] = { .convert = yuv420p_to_rgba32 }, + [PIX_FMT_UYVY422] = { + .convert = yuv420p_to_uyvy422, + }, }, [PIX_FMT_YUV422P] = { [PIX_FMT_YUV422] = { .convert = yuv422p_to_yuv422, }, + [PIX_FMT_UYVY422] = { + .convert = yuv422p_to_uyvy422, + }, }, [PIX_FMT_YUV444P] = { [PIX_FMT_RGB24] = { @@ -1480,7 +1766,14 @@ static ConvertEntry convert_table[PIX_FMT_NB][PIX_FMT_NB] = { .convert = yuv422_to_yuv422p, }, }, - + [PIX_FMT_UYVY422] = { + [PIX_FMT_YUV420P] = { + .convert = uyvy422_to_yuv420p, + }, + [PIX_FMT_YUV422P] = { + .convert = uyvy422_to_yuv422p, + }, + }, [PIX_FMT_RGB24] = { [PIX_FMT_YUV420P] = { .convert = rgb24_to_yuv420p @@ -1616,6 +1909,12 @@ static ConvertEntry convert_table[PIX_FMT_NB][PIX_FMT_NB] = { .convert = pal8_to_rgba32 }, }, + [PIX_FMT_UYVY411] = { + [PIX_FMT_YUV411P] = { + .convert = uyvy411_to_yuv411p, + }, + }, + }; int avpicture_alloc(AVPicture *picture, @@ -1625,6 +1924,8 @@ int avpicture_alloc(AVPicture *picture, void *ptr; size = avpicture_get_size(pix_fmt, width, height); + if(size<0) + goto fail; ptr = av_malloc(size); if (!ptr) goto fail; @@ -1683,7 +1984,7 @@ int img_convert(AVPicture *dst, int dst_pix_fmt, ce = &convert_table[src_pix_fmt][dst_pix_fmt]; if (ce->convert) { - /* specific convertion routine */ + /* specific conversion routine */ ce->convert(dst, src, dst_width, dst_height); return 0; } @@ -1838,6 +2139,14 @@ int img_convert(AVPicture *dst, int dst_pix_fmt, dst_pix_fmt == PIX_FMT_YUV422) { /* specific case: convert to YUV422P first */ int_pix_fmt = PIX_FMT_YUV422P; + } else if (src_pix_fmt == PIX_FMT_UYVY422 || + dst_pix_fmt == PIX_FMT_UYVY422) { + /* specific case: convert to YUV422P first */ + int_pix_fmt = PIX_FMT_YUV422P; + } else if (src_pix_fmt == PIX_FMT_UYVY411 || + dst_pix_fmt == PIX_FMT_UYVY411) { + /* specific case: convert to YUV411P first */ + int_pix_fmt = PIX_FMT_YUV411P; } else if ((src_pix->color_type == FF_COLOR_GRAY && src_pix_fmt != PIX_FMT_GRAY8) || (dst_pix->color_type == FF_COLOR_GRAY && diff --git a/src/libffmpeg/libavcodec/imgresample.c b/src/libffmpeg/libavcodec/imgresample.c index 14fdb1059..2c7e1120a 100644 --- a/src/libffmpeg/libavcodec/imgresample.c +++ b/src/libffmpeg/libavcodec/imgresample.c @@ -55,6 +55,8 @@ struct ImgReSampleContext { uint8_t *line_buf; }; +void av_build_filter(int16_t *filter, double factor, int tap_count, int phase_count, int scale, int type); + static inline int get_phase(int pos) { return ((pos) >> (POS_FRAC_BITS - PHASE_BITS)) & ((1 << PHASE_BITS) - 1); @@ -540,39 +542,6 @@ static void component_resample(ImgReSampleContext *s, } } -/* XXX: the following filter is quite naive, but it seems to suffice - for 4 taps */ -static void build_filter(int16_t *filter, float factor) -{ - int ph, i, v; - float x, y, tab[NB_TAPS], norm, mult; - - /* if upsampling, only need to interpolate, no filter */ - if (factor > 1.0) - factor = 1.0; - - for(ph=0;ph<NB_PHASES;ph++) { - norm = 0; - for(i=0;i<NB_TAPS;i++) { - - x = M_PI * ((float)(i - FCENTER) - (float)ph / NB_PHASES) * factor; - if (x == 0) - y = 1.0; - else - y = sin(x) / x; - tab[i] = y; - norm += y; - } - - /* normalize so that an uniform color remains the same */ - mult = (float)(1 << FILTER_BITS) / norm; - for(i=0;i<NB_TAPS;i++) { - v = (int)(tab[i] * mult); - filter[ph * NB_TAPS + i] = v; - } - } -} - ImgReSampleContext *img_resample_init(int owidth, int oheight, int iwidth, int iheight) { @@ -592,6 +561,8 @@ ImgReSampleContext *img_resample_full_init(int owidth, int oheight, s = av_mallocz(sizeof(ImgReSampleContext)); if (!s) return NULL; + if((unsigned)owidth >= UINT_MAX / (LINE_BUF_HEIGHT + NB_TAPS)) + return NULL; s->line_buf = av_mallocz(owidth * (LINE_BUF_HEIGHT + NB_TAPS)); if (!s->line_buf) goto fail; @@ -617,10 +588,10 @@ ImgReSampleContext *img_resample_full_init(int owidth, int oheight, s->h_incr = ((iwidth - leftBand - rightBand) * POS_FRAC) / s->pad_owidth; s->v_incr = ((iheight - topBand - bottomBand) * POS_FRAC) / s->pad_oheight; - build_filter(&s->h_filters[0][0], (float) s->pad_owidth / - (float) (iwidth - leftBand - rightBand)); - build_filter(&s->v_filters[0][0], (float) s->pad_oheight / - (float) (iheight - topBand - bottomBand)); + av_build_filter(&s->h_filters[0][0], (float) s->pad_owidth / + (float) (iwidth - leftBand - rightBand), NB_TAPS, NB_PHASES, 1<<FILTER_BITS, 0); + av_build_filter(&s->v_filters[0][0], (float) s->pad_oheight / + (float) (iheight - topBand - bottomBand), NB_TAPS, NB_PHASES, 1<<FILTER_BITS, 0); return s; fail: @@ -657,21 +628,7 @@ void img_resample_close(ImgReSampleContext *s) } #ifdef TEST - -void *av_mallocz(int size) -{ - void *ptr; - ptr = malloc(size); - memset(ptr, 0, size); - return ptr; -} - -void av_free(void *ptr) -{ - /* XXX: this test should not be needed on most libcs */ - if (ptr) - free(ptr); -} +#include <stdio.h> /* input */ #define XSIZE 256 @@ -698,11 +655,11 @@ static void dump_filter(int16_t *filter) int i, ph; for(ph=0;ph<NB_PHASES;ph++) { - printf("%2d: ", ph); + av_log(NULL, AV_LOG_INFO, "%2d: ", ph); for(i=0;i<NB_TAPS;i++) { - printf(" %5.2f", filter[ph * NB_TAPS + i] / 256.0); + av_log(NULL, AV_LOG_INFO, " %5.2f", filter[ph * NB_TAPS + i] / 256.0); } - printf("\n"); + av_log(NULL, AV_LOG_INFO, "\n"); } } @@ -766,20 +723,20 @@ int main(int argc, char **argv) fact = factors[i]; xsize = (int)(XSIZE * fact); ysize = (int)((YSIZE - 100) * fact); - s = img_resample_full_init(xsize, ysize, XSIZE, YSIZE, 50 ,50, 0, 0); - printf("Factor=%0.2f\n", fact); + s = img_resample_full_init(xsize, ysize, XSIZE, YSIZE, 50 ,50, 0, 0, 0, 0, 0, 0); + av_log(NULL, AV_LOG_INFO, "Factor=%0.2f\n", fact); dump_filter(&s->h_filters[0][0]); component_resample(s, img1, xsize, xsize, ysize, img + 50 * XSIZE, XSIZE, XSIZE, YSIZE - 100); img_resample_close(s); - sprintf(buf, "/tmp/out%d.pgm", i); + snprintf(buf, sizeof(buf), "/tmp/out%d.pgm", i); save_pgm(buf, img1, xsize, ysize); } /* mmx test */ #ifdef HAVE_MMX - printf("MMX test\n"); + av_log(NULL, AV_LOG_INFO, "MMX test\n"); fact = 0.72; xsize = (int)(XSIZE * fact); ysize = (int)(YSIZE * fact); @@ -793,10 +750,10 @@ int main(int argc, char **argv) component_resample(s, img2, xsize, xsize, ysize, img, XSIZE, XSIZE, YSIZE); if (memcmp(img1, img2, xsize * ysize) != 0) { - fprintf(stderr, "mmx error\n"); + av_log(NULL, AV_LOG_ERROR, "mmx error\n"); exit(1); } - printf("MMX OK\n"); + av_log(NULL, AV_LOG_INFO, "MMX OK\n"); #endif return 0; } diff --git a/src/libffmpeg/libavcodec/indeo3.c b/src/libffmpeg/libavcodec/indeo3.c index 14ff02858..351af2191 100644 --- a/src/libffmpeg/libavcodec/indeo3.c +++ b/src/libffmpeg/libavcodec/indeo3.c @@ -95,13 +95,16 @@ static void iv_Decode_Chunk(Indeo3DecodeContext *s, unsigned char *cur, long fflags2, unsigned char *hdr, unsigned char *buf2, int min_width_160); +#ifndef min #define min(a,b) ((a) < (b) ? (a) : (b)) +#endif /* ---------------------------------------------------------------------- */ static void iv_alloc_frames(Indeo3DecodeContext *s) { int luma_width, luma_height, luma_pixels, chroma_width, chroma_height, - chroma_pixels, bufsize, i; + chroma_pixels, i; + unsigned int bufsize; luma_width = (s->width + 3) & (~3); luma_height = (s->height + 3) & (~3); @@ -195,6 +198,10 @@ static unsigned long iv_decode_frame(Indeo3DecodeContext *s, hdr_height = le2me_16(*(uint16_t *)buf_pos); buf_pos += 2; hdr_width = le2me_16(*(uint16_t *)buf_pos); + + if(avcodec_check_dimensions(NULL, hdr_width, hdr_height)) + return -1; + buf_pos += 2; chroma_height = ((hdr_height >> 2) + 3) & 0x7ffc; chroma_width = ((hdr_width >> 2) + 3) & 0x7ffc; @@ -223,6 +230,9 @@ static unsigned long iv_decode_frame(Indeo3DecodeContext *s, hdr_height, buf_pos + offs * 2, fflags2, hdr_pos, buf_pos, min(hdr_width, 160)); + if (!(s->avctx->flags & CODEC_FLAG_GRAY)) + { + buf_pos = buf + 16 + offs2; offs = le2me_32(*(uint32_t *)buf_pos); buf_pos += 4; @@ -239,6 +249,8 @@ static unsigned long iv_decode_frame(Indeo3DecodeContext *s, chroma_height, buf_pos + offs * 2, fflags2, hdr_pos, buf_pos, min(chroma_width, 40)); + } + return 8; } @@ -304,7 +316,7 @@ static void iv_Decode_Chunk(Indeo3DecodeContext *s, unsigned char bit_buf; unsigned long bit_pos, lv, lv1, lv2; long *width_tbl, width_tbl_arr[10]; - char *ref_vectors; + signed char *ref_vectors; unsigned char *cur_frm_pos, *ref_frm_pos, *cp, *cp2; uint32_t *cur_lp, *ref_lp; const uint32_t *correction_lp[2], *correctionloworder_lp[2], *correctionhighorder_lp[2]; @@ -312,6 +324,7 @@ static void iv_Decode_Chunk(Indeo3DecodeContext *s, ustr_t strip_tbl[20], *strip; int i, j, k, lp1, lp2, flag1, cmd, blks_width, blks_height, region_160_width, rle_v1, rle_v2, rle_v3; + unsigned short res; bit_buf = 0; ref_vectors = NULL; @@ -446,14 +459,15 @@ static void iv_Decode_Chunk(Indeo3DecodeContext *s, switch(correction_type_sp[0][k]) { case 0: - *cur_lp = ((*ref_lp >> 1) + correction_lp[lp2 & 0x01][k]) << 1; + *cur_lp = le2me_32(((le2me_32(*ref_lp) >> 1) + correction_lp[lp2 & 0x01][k]) << 1); lp2++; break; case 1: - ((unsigned short *)cur_lp)[0] = ((((unsigned short *)(ref_lp))[0] >> 1) - + correction_lp[lp2 & 0x01][*buf1++]) << 1; - ((unsigned short *)cur_lp)[1] = ((((unsigned short *)(ref_lp))[1] >> 1) - + correction_lp[lp2 & 0x01][k]) << 1; + res = ((le2me_16(((unsigned short *)(ref_lp))[0]) >> 1) + correction_lp[lp2 & 0x01][*buf1]) << 1; + ((unsigned short *)cur_lp)[0] = le2me_16(res); + res = ((le2me_16(((unsigned short *)(ref_lp))[1]) >> 1) + correction_lp[lp2 & 0x01][k]) << 1; + ((unsigned short *)cur_lp)[1] = le2me_16(res); + buf1++; lp2++; break; case 2: @@ -548,23 +562,25 @@ static void iv_Decode_Chunk(Indeo3DecodeContext *s, switch(correction_type_sp[lp2 & 0x01][k]) { case 0: - cur_lp[width_tbl[1]] = ((*ref_lp >> 1) + correction_lp[lp2 & 0x01][k]) << 1; + cur_lp[width_tbl[1]] = le2me_32(((le2me_32(*ref_lp) >> 1) + correction_lp[lp2 & 0x01][k]) << 1); if(lp2 > 0 || flag1 == 0 || strip->ypos != 0) cur_lp[0] = ((cur_lp[-width_tbl[1]] >> 1) + (cur_lp[width_tbl[1]] >> 1)) & 0xFEFEFEFE; else - cur_lp[0] = ((*ref_lp >> 1) + correction_lp[lp2 & 0x01][k]) << 1; + cur_lp[0] = le2me_32(((le2me_32(*ref_lp) >> 1) + correction_lp[lp2 & 0x01][k]) << 1); lp2++; break; case 1: - ((unsigned short *)cur_lp)[width_tbl[2]] = - ((((unsigned short *)ref_lp)[0] >> 1) + correction_lp[lp2 & 0x01][*buf1++]) << 1; - ((unsigned short *)cur_lp)[width_tbl[2]+1] = - ((((unsigned short *)ref_lp)[1] >> 1) + correction_lp[lp2 & 0x01][k]) << 1; + res = ((le2me_16(((unsigned short *)ref_lp)[0]) >> 1) + correction_lp[lp2 & 0x01][*buf1]) << 1; + ((unsigned short *)cur_lp)[width_tbl[2]] = le2me_16(res); + res = ((le2me_16(((unsigned short *)ref_lp)[1]) >> 1) + correction_lp[lp2 & 0x01][k]) << 1; + ((unsigned short *)cur_lp)[width_tbl[2]+1] = le2me_16(res); + if(lp2 > 0 || flag1 == 0 || strip->ypos != 0) cur_lp[0] = ((cur_lp[-width_tbl[1]] >> 1) + (cur_lp[width_tbl[1]] >> 1)) & 0xFEFEFEFE; else cur_lp[0] = cur_lp[width_tbl[1]]; + buf1++; lp2++; break; @@ -660,16 +676,23 @@ static void iv_Decode_Chunk(Indeo3DecodeContext *s, lv1 = ref_lp[0]; lv2 = ref_lp[1]; if(lp2 == 0 && flag1 != 0) { +#ifdef WORDS_BIGENDIAN + lv1 = lv1 & 0xFF00FF00; + lv1 = (lv1 >> 8) | lv1; + lv2 = lv2 & 0xFF00FF00; + lv2 = (lv2 >> 8) | lv2; +#else lv1 = lv1 & 0x00FF00FF; lv1 = (lv1 << 8) | lv1; lv2 = lv2 & 0x00FF00FF; lv2 = (lv2 << 8) | lv2; +#endif } switch(correction_type_sp[lp2 & 0x01][k]) { case 0: - cur_lp[width_tbl[1]] = ((lv1 >> 1) + correctionloworder_lp[lp2 & 0x01][k]) << 1; - cur_lp[width_tbl[1]+1] = ((lv2 >> 1) + correctionhighorder_lp[lp2 & 0x01][k]) << 1; + cur_lp[width_tbl[1]] = le2me_32(((le2me_32(lv1) >> 1) + correctionloworder_lp[lp2 & 0x01][k]) << 1); + cur_lp[width_tbl[1]+1] = le2me_32(((le2me_32(lv2) >> 1) + correctionhighorder_lp[lp2 & 0x01][k]) << 1); if(lp2 > 0 || strip->ypos != 0 || flag1 == 0) { cur_lp[0] = ((cur_lp[-width_tbl[1]] >> 1) + (cur_lp[width_tbl[1]] >> 1)) & 0xFEFEFEFE; cur_lp[1] = ((cur_lp[-width_tbl[1]+1] >> 1) + (cur_lp[width_tbl[1]+1] >> 1)) & 0xFEFEFEFE; @@ -681,8 +704,8 @@ static void iv_Decode_Chunk(Indeo3DecodeContext *s, break; case 1: - cur_lp[width_tbl[1]] = ((lv1 >> 1) + correctionloworder_lp[lp2 & 0x01][*buf1++]) << 1; - cur_lp[width_tbl[1]+1] = ((lv2 >> 1) + correctionloworder_lp[lp2 & 0x01][k]) << 1; + cur_lp[width_tbl[1]] = le2me_32(((le2me_32(lv1) >> 1) + correctionloworder_lp[lp2 & 0x01][*buf1]) << 1); + cur_lp[width_tbl[1]+1] = le2me_32(((le2me_32(lv2) >> 1) + correctionloworder_lp[lp2 & 0x01][k]) << 1); if(lp2 > 0 || strip->ypos != 0 || flag1 == 0) { cur_lp[0] = ((cur_lp[-width_tbl[1]] >> 1) + (cur_lp[width_tbl[1]] >> 1)) & 0xFEFEFEFE; cur_lp[1] = ((cur_lp[-width_tbl[1]+1] >> 1) + (cur_lp[width_tbl[1]+1] >> 1)) & 0xFEFEFEFE; @@ -690,6 +713,7 @@ static void iv_Decode_Chunk(Indeo3DecodeContext *s, cur_lp[0] = cur_lp[width_tbl[1]]; cur_lp[1] = cur_lp[width_tbl[1]+1]; } + buf1++; lp2++; break; @@ -824,20 +848,20 @@ static void iv_Decode_Chunk(Indeo3DecodeContext *s, case 0: lv1 = correctionloworder_lp[lp2 & 0x01][k]; lv2 = correctionhighorder_lp[lp2 & 0x01][k]; - cur_lp[0] = ((ref_lp[0] >> 1) + lv1) << 1; - cur_lp[1] = ((ref_lp[1] >> 1) + lv2) << 1; - cur_lp[width_tbl[1]] = ((ref_lp[width_tbl[1]] >> 1) + lv1) << 1; - cur_lp[width_tbl[1]+1] = ((ref_lp[width_tbl[1]+1] >> 1) + lv2) << 1; + cur_lp[0] = le2me_32(((le2me_32(ref_lp[0]) >> 1) + lv1) << 1); + cur_lp[1] = le2me_32(((le2me_32(ref_lp[1]) >> 1) + lv2) << 1); + cur_lp[width_tbl[1]] = le2me_32(((le2me_32(ref_lp[width_tbl[1]]) >> 1) + lv1) << 1); + cur_lp[width_tbl[1]+1] = le2me_32(((le2me_32(ref_lp[width_tbl[1]+1]) >> 1) + lv2) << 1); lp2++; break; case 1: lv1 = correctionloworder_lp[lp2 & 0x01][*buf1++]; lv2 = correctionloworder_lp[lp2 & 0x01][k]; - cur_lp[0] = ((ref_lp[0] >> 1) + lv1) << 1; - cur_lp[1] = ((ref_lp[1] >> 1) + lv2) << 1; - cur_lp[width_tbl[1]] = ((ref_lp[width_tbl[1]] >> 1) + lv1) << 1; - cur_lp[width_tbl[1]+1] = ((ref_lp[width_tbl[1]+1] >> 1) + lv2) << 1; + cur_lp[0] = le2me_32(((le2me_32(ref_lp[0]) >> 1) + lv1) << 1); + cur_lp[1] = le2me_32(((le2me_32(ref_lp[1]) >> 1) + lv2) << 1); + cur_lp[width_tbl[1]] = le2me_32(((le2me_32(ref_lp[width_tbl[1]]) >> 1) + lv1) << 1); + cur_lp[width_tbl[1]+1] = le2me_32(((le2me_32(ref_lp[width_tbl[1]+1]) >> 1) + lv2) << 1); lp2++; break; @@ -925,18 +949,22 @@ static void iv_Decode_Chunk(Indeo3DecodeContext *s, switch(correction_type_sp[lp2 & 0x01][k]) { case 0: - cur_lp[0] = ((*ref_lp >> 1) + correction_lp[lp2 & 0x01][k]) << 1; - cur_lp[width_tbl[1]] = ((ref_lp[width_tbl[1]] >> 1) + correction_lp[lp2 & 0x01][k]) << 1; + cur_lp[0] = le2me_32(((le2me_32(*ref_lp) >> 1) + correction_lp[lp2 & 0x01][k]) << 1); + cur_lp[width_tbl[1]] = le2me_32(((le2me_32(ref_lp[width_tbl[1]]) >> 1) + correction_lp[lp2 & 0x01][k]) << 1); lp2++; break; case 1: lv1 = (unsigned short)(correction_lp[lp2 & 0x01][*buf1++]); lv2 = (unsigned short)(correction_lp[lp2 & 0x01][k]); - ((unsigned short *)cur_lp)[0] = ((((unsigned short *)ref_lp)[0] >> 1) + lv1) << 1; - ((unsigned short *)cur_lp)[1] = ((((unsigned short *)ref_lp)[1] >> 1) + lv2) << 1; - ((unsigned short *)cur_lp)[width_tbl[2]] = ((((unsigned short *)ref_lp)[width_tbl[2]] >> 1) + lv1) << 1; - ((unsigned short *)cur_lp)[width_tbl[2]+1] = ((((unsigned short *)ref_lp)[width_tbl[2]+1] >> 1) + lv2) << 1; + res = (unsigned short)(((le2me_16(((unsigned short *)ref_lp)[0]) >> 1) + lv1) << 1); + ((unsigned short *)cur_lp)[0] = le2me_16(res); + res = (unsigned short)(((le2me_16(((unsigned short *)ref_lp)[1]) >> 1) + lv2) << 1); + ((unsigned short *)cur_lp)[1] = le2me_16(res); + res = (unsigned short)(((le2me_16(((unsigned short *)ref_lp)[width_tbl[2]]) >> 1) + lv1) << 1); + ((unsigned short *)cur_lp)[width_tbl[2]] = le2me_16(res); + res = (unsigned short)(((le2me_16(((unsigned short *)ref_lp)[width_tbl[2]+1]) >> 1) + lv2) << 1); + ((unsigned short *)cur_lp)[width_tbl[2]+1] = le2me_16(res); lp2++; break; @@ -1056,11 +1084,6 @@ static int indeo3_decode_frame(AVCodecContext *avctx, unsigned char *src, *dest; int y; - /* no supplementary picture */ - if (buf_size == 0) { - return 0; - } - iv_decode_frame(s, buf, buf_size); if(s->frame.data[0]) @@ -1080,6 +1103,8 @@ static int indeo3_decode_frame(AVCodecContext *avctx, dest += s->frame.linesize[0]; } + if (!(s->avctx->flags & CODEC_FLAG_GRAY)) + { src = s->cur_frame->Ubuf; dest = s->frame.data[1]; for (y = 0; y < s->height / 4; y++) { @@ -1095,6 +1120,7 @@ static int indeo3_decode_frame(AVCodecContext *avctx, src += s->cur_frame->uv_w; dest += s->frame.linesize[2]; } + } *data_size=sizeof(AVFrame); *(AVFrame*)data= s->frame; diff --git a/src/libffmpeg/libavcodec/integer.c b/src/libffmpeg/libavcodec/integer.c index 025560f9e..38a826f86 100644 --- a/src/libffmpeg/libavcodec/integer.c +++ b/src/libffmpeg/libavcodec/integer.c @@ -47,6 +47,10 @@ AVInteger av_sub_i(AVInteger a, AVInteger b){ return a; } +/** + * returns the rounded down value of the logarithm of base 2 of the given AVInteger. + * this is simply the index of the most significant bit which is 1. Or 0 of all bits are 0 + */ int av_log2_i(AVInteger a){ int i; @@ -78,6 +82,9 @@ AVInteger av_mul_i(AVInteger a, AVInteger b){ return out; } +/** + * returns 0 if a==b, 1 if a>b and -1 if a<b. + */ int av_cmp_i(AVInteger a, AVInteger b){ int i; int v= (int16_t)a.v[AV_INTEGER_SIZE-1] - (int16_t)b.v[AV_INTEGER_SIZE-1]; @@ -90,6 +97,10 @@ int av_cmp_i(AVInteger a, AVInteger b){ return 0; } +/** + * bitwise shift. + * @param s the number of bits by which the value should be shifted right, may be negative for shifting left + */ AVInteger av_shr_i(AVInteger a, int s){ AVInteger out; int i; @@ -104,6 +115,10 @@ AVInteger av_shr_i(AVInteger a, int s){ return out; } +/** + * returns a % b. + * @param quot a/b will be stored here + */ AVInteger av_mod_i(AVInteger *quot, AVInteger a, AVInteger b){ int i= av_log2_i(a) - av_log2_i(b); AVInteger quot_temp; @@ -128,12 +143,18 @@ AVInteger av_mod_i(AVInteger *quot, AVInteger a, AVInteger b){ return a; } +/** + * returns a/b. + */ AVInteger av_div_i(AVInteger a, AVInteger b){ AVInteger quot; av_mod_i(", a, b); return quot; } +/** + * converts the given int64_t to an AVInteger. + */ AVInteger av_int2i(int64_t a){ AVInteger out; int i; @@ -145,6 +166,11 @@ AVInteger av_int2i(int64_t a){ return out; } +/** + * converts the given AVInteger to an int64_t. + * if the AVInteger is too large to fit into an int64_t, + * then only the least significant 64bit will be used + */ int64_t av_i2int(AVInteger a){ int i; int64_t out=(int8_t)a.v[AV_INTEGER_SIZE-1]; diff --git a/src/libffmpeg/libavcodec/interplayvideo.c b/src/libffmpeg/libavcodec/interplayvideo.c index 06816ba3e..f4add08c0 100644 --- a/src/libffmpeg/libavcodec/interplayvideo.c +++ b/src/libffmpeg/libavcodec/interplayvideo.c @@ -47,7 +47,7 @@ /* debugging support */ #define DEBUG_INTERPLAY 0 #if DEBUG_INTERPLAY -#define debug_interplay printf +#define debug_interplay(x,...) av_log(NULL, AV_LOG_DEBUG, x, __VA_ARGS__) #else static inline void debug_interplay(const char *format, ...) { } #endif diff --git a/src/libffmpeg/libavcodec/jrevdct.c b/src/libffmpeg/libavcodec/jrevdct.c index 3bd78c192..c08d1241f 100644 --- a/src/libffmpeg/libavcodec/jrevdct.c +++ b/src/libffmpeg/libavcodec/jrevdct.c @@ -235,9 +235,7 @@ void j_rev_dct(DCTBLOCK data) /* The rotator is sqrt(2)*c(-6). */ { if (d6) { - if (d4) { if (d2) { - if (d0) { /* d0 != 0, d2 != 0, d4 != 0, d6 != 0 */ z1 = MULTIPLY(d2 + d6, FIX_0_541196100); tmp2 = z1 + MULTIPLY(-d6, FIX_1_847759065); @@ -250,21 +248,7 @@ void j_rev_dct(DCTBLOCK data) tmp13 = tmp0 - tmp3; tmp11 = tmp1 + tmp2; tmp12 = tmp1 - tmp2; - } else { - /* d0 == 0, d2 != 0, d4 != 0, d6 != 0 */ - z1 = MULTIPLY(d2 + d6, FIX_0_541196100); - tmp2 = z1 + MULTIPLY(-d6, FIX_1_847759065); - tmp3 = z1 + MULTIPLY(d2, FIX_0_765366865); - - tmp0 = d4 << CONST_BITS; - - tmp10 = tmp0 + tmp3; - tmp13 = tmp0 - tmp3; - tmp11 = tmp2 - tmp0; - tmp12 = -(tmp0 + tmp2); - } } else { - if (d0) { /* d0 != 0, d2 == 0, d4 != 0, d6 != 0 */ tmp2 = MULTIPLY(-d6, FIX_1_306562965); tmp3 = MULTIPLY(d6, FIX_0_541196100); @@ -276,72 +260,9 @@ void j_rev_dct(DCTBLOCK data) tmp13 = tmp0 - tmp3; tmp11 = tmp1 + tmp2; tmp12 = tmp1 - tmp2; - } else { - /* d0 == 0, d2 == 0, d4 != 0, d6 != 0 */ - tmp2 = MULTIPLY(-d6, FIX_1_306562965); - tmp3 = MULTIPLY(d6, FIX_0_541196100); - - tmp0 = d4 << CONST_BITS; - - tmp10 = tmp0 + tmp3; - tmp13 = tmp0 - tmp3; - tmp11 = tmp2 - tmp0; - tmp12 = -(tmp0 + tmp2); - } } - } else { - if (d2) { - if (d0) { - /* d0 != 0, d2 != 0, d4 == 0, d6 != 0 */ - z1 = MULTIPLY(d2 + d6, FIX_0_541196100); - tmp2 = z1 + MULTIPLY(-d6, FIX_1_847759065); - tmp3 = z1 + MULTIPLY(d2, FIX_0_765366865); - - tmp0 = d0 << CONST_BITS; - - tmp10 = tmp0 + tmp3; - tmp13 = tmp0 - tmp3; - tmp11 = tmp0 + tmp2; - tmp12 = tmp0 - tmp2; - } else { - /* d0 == 0, d2 != 0, d4 == 0, d6 != 0 */ - z1 = MULTIPLY(d2 + d6, FIX_0_541196100); - tmp2 = z1 + MULTIPLY(-d6, FIX_1_847759065); - tmp3 = z1 + MULTIPLY(d2, FIX_0_765366865); - - tmp10 = tmp3; - tmp13 = -tmp3; - tmp11 = tmp2; - tmp12 = -tmp2; - } - } else { - if (d0) { - /* d0 != 0, d2 == 0, d4 == 0, d6 != 0 */ - tmp2 = MULTIPLY(-d6, FIX_1_306562965); - tmp3 = MULTIPLY(d6, FIX_0_541196100); - - tmp0 = d0 << CONST_BITS; - - tmp10 = tmp0 + tmp3; - tmp13 = tmp0 - tmp3; - tmp11 = tmp0 + tmp2; - tmp12 = tmp0 - tmp2; - } else { - /* d0 == 0, d2 == 0, d4 == 0, d6 != 0 */ - tmp2 = MULTIPLY(-d6, FIX_1_306562965); - tmp3 = MULTIPLY(d6, FIX_0_541196100); - - tmp10 = tmp3; - tmp13 = -tmp3; - tmp11 = tmp2; - tmp12 = -tmp2; - } - } - } } else { - if (d4) { if (d2) { - if (d0) { /* d0 != 0, d2 != 0, d4 != 0, d6 == 0 */ tmp2 = MULTIPLY(d2, FIX_0_541196100); tmp3 = MULTIPLY(d2, FIX_1_306562965); @@ -353,62 +274,11 @@ void j_rev_dct(DCTBLOCK data) tmp13 = tmp0 - tmp3; tmp11 = tmp1 + tmp2; tmp12 = tmp1 - tmp2; - } else { - /* d0 == 0, d2 != 0, d4 != 0, d6 == 0 */ - tmp2 = MULTIPLY(d2, FIX_0_541196100); - tmp3 = MULTIPLY(d2, FIX_1_306562965); - - tmp0 = d4 << CONST_BITS; - - tmp10 = tmp0 + tmp3; - tmp13 = tmp0 - tmp3; - tmp11 = tmp2 - tmp0; - tmp12 = -(tmp0 + tmp2); - } } else { - if (d0) { /* d0 != 0, d2 == 0, d4 != 0, d6 == 0 */ tmp10 = tmp13 = (d0 + d4) << CONST_BITS; tmp11 = tmp12 = (d0 - d4) << CONST_BITS; - } else { - /* d0 == 0, d2 == 0, d4 != 0, d6 == 0 */ - tmp10 = tmp13 = d4 << CONST_BITS; - tmp11 = tmp12 = -tmp10; - } - } - } else { - if (d2) { - if (d0) { - /* d0 != 0, d2 != 0, d4 == 0, d6 == 0 */ - tmp2 = MULTIPLY(d2, FIX_0_541196100); - tmp3 = MULTIPLY(d2, FIX_1_306562965); - - tmp0 = d0 << CONST_BITS; - - tmp10 = tmp0 + tmp3; - tmp13 = tmp0 - tmp3; - tmp11 = tmp0 + tmp2; - tmp12 = tmp0 - tmp2; - } else { - /* d0 == 0, d2 != 0, d4 == 0, d6 == 0 */ - tmp2 = MULTIPLY(d2, FIX_0_541196100); - tmp3 = MULTIPLY(d2, FIX_1_306562965); - - tmp10 = tmp3; - tmp13 = -tmp3; - tmp11 = tmp2; - tmp12 = -tmp2; - } - } else { - if (d0) { - /* d0 != 0, d2 == 0, d4 == 0, d6 == 0 */ - tmp10 = tmp13 = tmp11 = tmp12 = d0 << CONST_BITS; - } else { - /* d0 == 0, d2 == 0, d4 == 0, d6 == 0 */ - tmp10 = tmp13 = tmp11 = tmp12 = 0; - } } - } } /* Odd part per figure 8; the matrix is unitary and hence its @@ -711,9 +581,7 @@ void j_rev_dct(DCTBLOCK data) /* Even part: reverse the even part of the forward DCT. */ /* The rotator is sqrt(2)*c(-6). */ if (d6) { - if (d4) { if (d2) { - if (d0) { /* d0 != 0, d2 != 0, d4 != 0, d6 != 0 */ z1 = MULTIPLY(d2 + d6, FIX_0_541196100); tmp2 = z1 + MULTIPLY(-d6, FIX_1_847759065); @@ -726,21 +594,7 @@ void j_rev_dct(DCTBLOCK data) tmp13 = tmp0 - tmp3; tmp11 = tmp1 + tmp2; tmp12 = tmp1 - tmp2; - } else { - /* d0 == 0, d2 != 0, d4 != 0, d6 != 0 */ - z1 = MULTIPLY(d2 + d6, FIX_0_541196100); - tmp2 = z1 + MULTIPLY(-d6, FIX_1_847759065); - tmp3 = z1 + MULTIPLY(d2, FIX_0_765366865); - - tmp0 = d4 << CONST_BITS; - - tmp10 = tmp0 + tmp3; - tmp13 = tmp0 - tmp3; - tmp11 = tmp2 - tmp0; - tmp12 = -(tmp0 + tmp2); - } } else { - if (d0) { /* d0 != 0, d2 == 0, d4 != 0, d6 != 0 */ tmp2 = MULTIPLY(-d6, FIX_1_306562965); tmp3 = MULTIPLY(d6, FIX_0_541196100); @@ -752,72 +606,9 @@ void j_rev_dct(DCTBLOCK data) tmp13 = tmp0 - tmp3; tmp11 = tmp1 + tmp2; tmp12 = tmp1 - tmp2; - } else { - /* d0 == 0, d2 == 0, d4 != 0, d6 != 0 */ - tmp2 = MULTIPLY(-d6, FIX_1_306562965); - tmp3 = MULTIPLY(d6, FIX_0_541196100); - - tmp0 = d4 << CONST_BITS; - - tmp10 = tmp0 + tmp3; - tmp13 = tmp0 - tmp3; - tmp11 = tmp2 - tmp0; - tmp12 = -(tmp0 + tmp2); - } - } - } else { - if (d2) { - if (d0) { - /* d0 != 0, d2 != 0, d4 == 0, d6 != 0 */ - z1 = MULTIPLY(d2 + d6, FIX_0_541196100); - tmp2 = z1 + MULTIPLY(-d6, FIX_1_847759065); - tmp3 = z1 + MULTIPLY(d2, FIX_0_765366865); - - tmp0 = d0 << CONST_BITS; - - tmp10 = tmp0 + tmp3; - tmp13 = tmp0 - tmp3; - tmp11 = tmp0 + tmp2; - tmp12 = tmp0 - tmp2; - } else { - /* d0 == 0, d2 != 0, d4 == 0, d6 != 0 */ - z1 = MULTIPLY(d2 + d6, FIX_0_541196100); - tmp2 = z1 + MULTIPLY(-d6, FIX_1_847759065); - tmp3 = z1 + MULTIPLY(d2, FIX_0_765366865); - - tmp10 = tmp3; - tmp13 = -tmp3; - tmp11 = tmp2; - tmp12 = -tmp2; - } - } else { - if (d0) { - /* d0 != 0, d2 == 0, d4 == 0, d6 != 0 */ - tmp2 = MULTIPLY(-d6, FIX_1_306562965); - tmp3 = MULTIPLY(d6, FIX_0_541196100); - - tmp0 = d0 << CONST_BITS; - - tmp10 = tmp0 + tmp3; - tmp13 = tmp0 - tmp3; - tmp11 = tmp0 + tmp2; - tmp12 = tmp0 - tmp2; - } else { - /* d0 == 0, d2 == 0, d4 == 0, d6 != 0 */ - tmp2 = MULTIPLY(-d6, FIX_1_306562965); - tmp3 = MULTIPLY(d6, FIX_0_541196100); - - tmp10 = tmp3; - tmp13 = -tmp3; - tmp11 = tmp2; - tmp12 = -tmp2; - } } - } } else { - if (d4) { if (d2) { - if (d0) { /* d0 != 0, d2 != 0, d4 != 0, d6 == 0 */ tmp2 = MULTIPLY(d2, FIX_0_541196100); tmp3 = MULTIPLY(d2, FIX_1_306562965); @@ -829,62 +620,11 @@ void j_rev_dct(DCTBLOCK data) tmp13 = tmp0 - tmp3; tmp11 = tmp1 + tmp2; tmp12 = tmp1 - tmp2; - } else { - /* d0 == 0, d2 != 0, d4 != 0, d6 == 0 */ - tmp2 = MULTIPLY(d2, FIX_0_541196100); - tmp3 = MULTIPLY(d2, FIX_1_306562965); - - tmp0 = d4 << CONST_BITS; - - tmp10 = tmp0 + tmp3; - tmp13 = tmp0 - tmp3; - tmp11 = tmp2 - tmp0; - tmp12 = -(tmp0 + tmp2); - } } else { - if (d0) { /* d0 != 0, d2 == 0, d4 != 0, d6 == 0 */ tmp10 = tmp13 = (d0 + d4) << CONST_BITS; tmp11 = tmp12 = (d0 - d4) << CONST_BITS; - } else { - /* d0 == 0, d2 == 0, d4 != 0, d6 == 0 */ - tmp10 = tmp13 = d4 << CONST_BITS; - tmp11 = tmp12 = -tmp10; - } - } - } else { - if (d2) { - if (d0) { - /* d0 != 0, d2 != 0, d4 == 0, d6 == 0 */ - tmp2 = MULTIPLY(d2, FIX_0_541196100); - tmp3 = MULTIPLY(d2, FIX_1_306562965); - - tmp0 = d0 << CONST_BITS; - - tmp10 = tmp0 + tmp3; - tmp13 = tmp0 - tmp3; - tmp11 = tmp0 + tmp2; - tmp12 = tmp0 - tmp2; - } else { - /* d0 == 0, d2 != 0, d4 == 0, d6 == 0 */ - tmp2 = MULTIPLY(d2, FIX_0_541196100); - tmp3 = MULTIPLY(d2, FIX_1_306562965); - - tmp10 = tmp3; - tmp13 = -tmp3; - tmp11 = tmp2; - tmp12 = -tmp2; - } - } else { - if (d0) { - /* d0 != 0, d2 == 0, d4 == 0, d6 == 0 */ - tmp10 = tmp13 = tmp11 = tmp12 = d0 << CONST_BITS; - } else { - /* d0 == 0, d2 == 0, d4 == 0, d6 == 0 */ - tmp10 = tmp13 = tmp11 = tmp12 = 0; - } } - } } /* Odd part per figure 8; the matrix is unitary and hence its @@ -1172,5 +912,215 @@ void j_rev_dct(DCTBLOCK data) } } +#undef DCTSIZE +#define DCTSIZE 4 +#define DCTSTRIDE 8 + +void j_rev_dct4(DCTBLOCK data) +{ + int32_t tmp0, tmp1, tmp2, tmp3; + int32_t tmp10, tmp11, tmp12, tmp13; + int32_t z1; + int32_t d0, d2, d4, d6; + register DCTELEM *dataptr; + int rowctr; + + /* Pass 1: process rows. */ + /* Note results are scaled up by sqrt(8) compared to a true IDCT; */ + /* furthermore, we scale the results by 2**PASS1_BITS. */ + + data[0] += 4; + + dataptr = data; + + for (rowctr = DCTSIZE-1; rowctr >= 0; rowctr--) { + /* Due to quantization, we will usually find that many of the input + * coefficients are zero, especially the AC terms. We can exploit this + * by short-circuiting the IDCT calculation for any row in which all + * the AC terms are zero. In that case each output is equal to the + * DC coefficient (with scale factor as needed). + * With typical images and quantization tables, half or more of the + * row DCT calculations can be simplified this way. + */ + + register int *idataptr = (int*)dataptr; + + d0 = dataptr[0]; + d2 = dataptr[1]; + d4 = dataptr[2]; + d6 = dataptr[3]; + + if ((d2 | d4 | d6) == 0) { + /* AC terms all zero */ + if (d0) { + /* Compute a 32 bit value to assign. */ + DCTELEM dcval = (DCTELEM) (d0 << PASS1_BITS); + register int v = (dcval & 0xffff) | ((dcval << 16) & 0xffff0000); + + idataptr[0] = v; + idataptr[1] = v; + } + + dataptr += DCTSTRIDE; /* advance pointer to next row */ + continue; + } + + /* Even part: reverse the even part of the forward DCT. */ + /* The rotator is sqrt(2)*c(-6). */ + if (d6) { + if (d2) { + /* d0 != 0, d2 != 0, d4 != 0, d6 != 0 */ + z1 = MULTIPLY(d2 + d6, FIX_0_541196100); + tmp2 = z1 + MULTIPLY(-d6, FIX_1_847759065); + tmp3 = z1 + MULTIPLY(d2, FIX_0_765366865); + + tmp0 = (d0 + d4) << CONST_BITS; + tmp1 = (d0 - d4) << CONST_BITS; + + tmp10 = tmp0 + tmp3; + tmp13 = tmp0 - tmp3; + tmp11 = tmp1 + tmp2; + tmp12 = tmp1 - tmp2; + } else { + /* d0 != 0, d2 == 0, d4 != 0, d6 != 0 */ + tmp2 = MULTIPLY(-d6, FIX_1_306562965); + tmp3 = MULTIPLY(d6, FIX_0_541196100); + + tmp0 = (d0 + d4) << CONST_BITS; + tmp1 = (d0 - d4) << CONST_BITS; + + tmp10 = tmp0 + tmp3; + tmp13 = tmp0 - tmp3; + tmp11 = tmp1 + tmp2; + tmp12 = tmp1 - tmp2; + } + } else { + if (d2) { + /* d0 != 0, d2 != 0, d4 != 0, d6 == 0 */ + tmp2 = MULTIPLY(d2, FIX_0_541196100); + tmp3 = MULTIPLY(d2, FIX_1_306562965); + + tmp0 = (d0 + d4) << CONST_BITS; + tmp1 = (d0 - d4) << CONST_BITS; + + tmp10 = tmp0 + tmp3; + tmp13 = tmp0 - tmp3; + tmp11 = tmp1 + tmp2; + tmp12 = tmp1 - tmp2; + } else { + /* d0 != 0, d2 == 0, d4 != 0, d6 == 0 */ + tmp10 = tmp13 = (d0 + d4) << CONST_BITS; + tmp11 = tmp12 = (d0 - d4) << CONST_BITS; + } + } + + /* Final output stage: inputs are tmp10..tmp13, tmp0..tmp3 */ + + dataptr[0] = (DCTELEM) DESCALE(tmp10, CONST_BITS-PASS1_BITS); + dataptr[1] = (DCTELEM) DESCALE(tmp11, CONST_BITS-PASS1_BITS); + dataptr[2] = (DCTELEM) DESCALE(tmp12, CONST_BITS-PASS1_BITS); + dataptr[3] = (DCTELEM) DESCALE(tmp13, CONST_BITS-PASS1_BITS); + + dataptr += DCTSTRIDE; /* advance pointer to next row */ + } + + /* Pass 2: process columns. */ + /* Note that we must descale the results by a factor of 8 == 2**3, */ + /* and also undo the PASS1_BITS scaling. */ + + dataptr = data; + for (rowctr = DCTSIZE-1; rowctr >= 0; rowctr--) { + /* Columns of zeroes can be exploited in the same way as we did with rows. + * However, the row calculation has created many nonzero AC terms, so the + * simplification applies less often (typically 5% to 10% of the time). + * On machines with very fast multiplication, it's possible that the + * test takes more time than it's worth. In that case this section + * may be commented out. + */ + + d0 = dataptr[DCTSTRIDE*0]; + d2 = dataptr[DCTSTRIDE*1]; + d4 = dataptr[DCTSTRIDE*2]; + d6 = dataptr[DCTSTRIDE*3]; + + /* Even part: reverse the even part of the forward DCT. */ + /* The rotator is sqrt(2)*c(-6). */ + if (d6) { + if (d2) { + /* d0 != 0, d2 != 0, d4 != 0, d6 != 0 */ + z1 = MULTIPLY(d2 + d6, FIX_0_541196100); + tmp2 = z1 + MULTIPLY(-d6, FIX_1_847759065); + tmp3 = z1 + MULTIPLY(d2, FIX_0_765366865); + + tmp0 = (d0 + d4) << CONST_BITS; + tmp1 = (d0 - d4) << CONST_BITS; + + tmp10 = tmp0 + tmp3; + tmp13 = tmp0 - tmp3; + tmp11 = tmp1 + tmp2; + tmp12 = tmp1 - tmp2; + } else { + /* d0 != 0, d2 == 0, d4 != 0, d6 != 0 */ + tmp2 = MULTIPLY(-d6, FIX_1_306562965); + tmp3 = MULTIPLY(d6, FIX_0_541196100); + + tmp0 = (d0 + d4) << CONST_BITS; + tmp1 = (d0 - d4) << CONST_BITS; + + tmp10 = tmp0 + tmp3; + tmp13 = tmp0 - tmp3; + tmp11 = tmp1 + tmp2; + tmp12 = tmp1 - tmp2; + } + } else { + if (d2) { + /* d0 != 0, d2 != 0, d4 != 0, d6 == 0 */ + tmp2 = MULTIPLY(d2, FIX_0_541196100); + tmp3 = MULTIPLY(d2, FIX_1_306562965); + + tmp0 = (d0 + d4) << CONST_BITS; + tmp1 = (d0 - d4) << CONST_BITS; + + tmp10 = tmp0 + tmp3; + tmp13 = tmp0 - tmp3; + tmp11 = tmp1 + tmp2; + tmp12 = tmp1 - tmp2; + } else { + /* d0 != 0, d2 == 0, d4 != 0, d6 == 0 */ + tmp10 = tmp13 = (d0 + d4) << CONST_BITS; + tmp11 = tmp12 = (d0 - d4) << CONST_BITS; + } + } + + /* Final output stage: inputs are tmp10..tmp13, tmp0..tmp3 */ + + dataptr[DCTSTRIDE*0] = tmp10 >> (CONST_BITS+PASS1_BITS+3); + dataptr[DCTSTRIDE*1] = tmp11 >> (CONST_BITS+PASS1_BITS+3); + dataptr[DCTSTRIDE*2] = tmp12 >> (CONST_BITS+PASS1_BITS+3); + dataptr[DCTSTRIDE*3] = tmp13 >> (CONST_BITS+PASS1_BITS+3); + + dataptr++; /* advance pointer to next column */ + } +} + +void j_rev_dct2(DCTBLOCK data){ + int d00, d01, d10, d11; + + data[0] += 4; + d00 = data[0+0*DCTSTRIDE] + data[1+0*DCTSTRIDE]; + d01 = data[0+0*DCTSTRIDE] - data[1+0*DCTSTRIDE]; + d10 = data[0+1*DCTSTRIDE] + data[1+1*DCTSTRIDE]; + d11 = data[0+1*DCTSTRIDE] - data[1+1*DCTSTRIDE]; + + data[0+0*DCTSTRIDE]= (d00 + d10)>>3; + data[1+0*DCTSTRIDE]= (d01 + d11)>>3; + data[0+1*DCTSTRIDE]= (d00 - d10)>>3; + data[1+1*DCTSTRIDE]= (d01 - d11)>>3; +} + +void j_rev_dct1(DCTBLOCK data){ + data[0] = (data[0] + 4)>>3; +} + #undef FIX #undef CONST_BITS diff --git a/src/libffmpeg/libavcodec/lcl.c b/src/libffmpeg/libavcodec/lcl.c index 9a8591a89..a15a10769 100644 --- a/src/libffmpeg/libavcodec/lcl.c +++ b/src/libffmpeg/libavcodec/lcl.c @@ -41,6 +41,7 @@ #include <stdlib.h> #include "common.h" +#include "bitstream.h" #include "avcodec.h" #ifdef CONFIG_ZLIB @@ -144,14 +145,15 @@ static inline unsigned char get_r (unsigned char yq, signed char rq) -static int mszh_decomp(unsigned char * srcptr, int srclen, unsigned char * destptr) +static unsigned int mszh_decomp(unsigned char * srcptr, int srclen, unsigned char * destptr, unsigned int destsize) { unsigned char *destptr_bak = destptr; + unsigned char *destptr_end = destptr + destsize; unsigned char mask = 0; unsigned char maskbit = 0; unsigned int ofs, cnt; - while (srclen > 0) { + while ((srclen > 0) && (destptr < destptr_end)) { if (maskbit == 0) { mask = *(srcptr++); maskbit = 8; @@ -159,6 +161,8 @@ static int mszh_decomp(unsigned char * srcptr, int srclen, unsigned char * destp continue; } if ((mask & (1 << (--maskbit))) == 0) { + if (destptr + 4 > destptr_end) + break; *(int*)destptr = *(int*)srcptr; srclen -= 4; destptr += 4; @@ -171,6 +175,9 @@ static int mszh_decomp(unsigned char * srcptr, int srclen, unsigned char * destp ofs &= 0x7ff; srclen -= 2; cnt *= 4; + if (destptr + cnt > destptr_end) { + cnt = destptr_end - destptr; + } for (; cnt > 0; cnt--) { *(destptr) = *(destptr - ofs); destptr++; @@ -193,7 +200,7 @@ static int decode_frame(AVCodecContext *avctx, void *data, int *data_size, uint8 { LclContext * const c = (LclContext *)avctx->priv_data; unsigned char *encoded = (unsigned char *)buf; - int pixel_ptr; + unsigned int pixel_ptr; int row, col; unsigned char *outptr; unsigned int width = avctx->width; // Real image width @@ -205,11 +212,7 @@ static int decode_frame(AVCodecContext *avctx, void *data, int *data_size, uint8 #ifdef CONFIG_ZLIB int zret; // Zlib return code #endif - int len = buf_size; - - /* no supplementary picture */ - if (buf_size == 0) - return 0; + unsigned int len = buf_size; if(c->pic.data[0]) avctx->release_buffer(avctx, &c->pic); @@ -231,24 +234,29 @@ static int decode_frame(AVCodecContext *avctx, void *data, int *data_size, uint8 if (c->flags & FLAG_MULTITHREAD) { mthread_inlen = *((unsigned int*)encoded); mthread_outlen = *((unsigned int*)(encoded+4)); - mszh_dlen = mszh_decomp(encoded + 8, mthread_inlen, c->decomp_buf); + if (mthread_outlen > c->decomp_size) // this should not happen + mthread_outlen = c->decomp_size; + mszh_dlen = mszh_decomp(encoded + 8, mthread_inlen, c->decomp_buf, c->decomp_size); if (mthread_outlen != mszh_dlen) { av_log(avctx, AV_LOG_ERROR, "Mthread1 decoded size differs (%d != %d)\n", mthread_outlen, mszh_dlen); + return -1; } mszh_dlen = mszh_decomp(encoded + 8 + mthread_inlen, len - mthread_inlen, - c->decomp_buf + mthread_outlen); - if ((c->decomp_size - mthread_outlen) != mszh_dlen) { + c->decomp_buf + mthread_outlen, c->decomp_size - mthread_outlen); + if (mthread_outlen != mszh_dlen) { av_log(avctx, AV_LOG_ERROR, "Mthread2 decoded size differs (%d != %d)\n", - c->decomp_size - mthread_outlen, mszh_dlen); + mthread_outlen, mszh_dlen); + return -1; } encoded = c->decomp_buf; len = c->decomp_size; } else { - mszh_dlen = mszh_decomp(encoded, len, c->decomp_buf); + mszh_dlen = mszh_decomp(encoded, len, c->decomp_buf, c->decomp_size); if (c->decomp_size != mszh_dlen) { av_log(avctx, AV_LOG_ERROR, "Decoded size differs (%d != %d)\n", c->decomp_size, mszh_dlen); + return -1; } encoded = c->decomp_buf; len = mszh_dlen; @@ -277,10 +285,12 @@ static int decode_frame(AVCodecContext *avctx, void *data, int *data_size, uint8 if (c->flags & FLAG_MULTITHREAD) { mthread_inlen = *((unsigned int*)encoded); mthread_outlen = *((unsigned int*)(encoded+4)); + if (mthread_outlen > c->decomp_size) + mthread_outlen = c->decomp_size; c->zstream.next_in = encoded + 8; c->zstream.avail_in = mthread_inlen; c->zstream.next_out = c->decomp_buf; - c->zstream.avail_out = mthread_outlen; + c->zstream.avail_out = c->decomp_size; zret = inflate(&(c->zstream), Z_FINISH); if ((zret != Z_OK) && (zret != Z_STREAM_END)) { av_log(avctx, AV_LOG_ERROR, "Mthread1 inflate error: %d\n", zret); @@ -289,6 +299,7 @@ static int decode_frame(AVCodecContext *avctx, void *data, int *data_size, uint8 if (mthread_outlen != (unsigned int)(c->zstream.total_out)) { av_log(avctx, AV_LOG_ERROR, "Mthread1 decoded size differs (%u != %lu)\n", mthread_outlen, c->zstream.total_out); + return -1; } zret = inflateReset(&(c->zstream)); if (zret != Z_OK) { @@ -298,15 +309,16 @@ static int decode_frame(AVCodecContext *avctx, void *data, int *data_size, uint8 c->zstream.next_in = encoded + 8 + mthread_inlen; c->zstream.avail_in = len - mthread_inlen; c->zstream.next_out = c->decomp_buf + mthread_outlen; - c->zstream.avail_out = mthread_outlen; + c->zstream.avail_out = c->decomp_size - mthread_outlen; zret = inflate(&(c->zstream), Z_FINISH); if ((zret != Z_OK) && (zret != Z_STREAM_END)) { av_log(avctx, AV_LOG_ERROR, "Mthread2 inflate error: %d\n", zret); return -1; } - if ((c->decomp_size - mthread_outlen) != (unsigned int)(c->zstream.total_out)) { + if (mthread_outlen != (unsigned int)(c->zstream.total_out)) { av_log(avctx, AV_LOG_ERROR, "Mthread2 decoded size differs (%d != %lu)\n", - c->decomp_size - mthread_outlen, c->zstream.total_out); + mthread_outlen, c->zstream.total_out); + return -1; } } else { c->zstream.next_in = encoded; @@ -321,6 +333,7 @@ static int decode_frame(AVCodecContext *avctx, void *data, int *data_size, uint8 if (c->decomp_size != (unsigned int)(c->zstream.total_out)) { av_log(avctx, AV_LOG_ERROR, "Decoded size differs (%d != %lu)\n", c->decomp_size, c->zstream.total_out); + return -1; } } encoded = c->decomp_buf; @@ -567,13 +580,20 @@ static int encode_frame(AVCodecContext *avctx, unsigned char *buf, int buf_size, av_log(avctx, AV_LOG_ERROR, "Deflate reset error: %d\n", zret); return -1; } - c->zstream.next_in = p->data[0]; - c->zstream.avail_in = c->decomp_size; c->zstream.next_out = c->comp_buf; c->zstream.avail_out = c->max_comp_size; + for(i = avctx->height - 1; i >= 0; i--) { + c->zstream.next_in = p->data[0]+p->linesize[0]*i; + c->zstream.avail_in = avctx->width*3; + zret = deflate(&(c->zstream), Z_NO_FLUSH); + if (zret != Z_OK) { + av_log(avctx, AV_LOG_ERROR, "Deflate error: %d\n", zret); + return -1; + } + } zret = deflate(&(c->zstream), Z_FINISH); - if ((zret != Z_OK) && (zret != Z_STREAM_END)) { + if (zret != Z_STREAM_END) { av_log(avctx, AV_LOG_ERROR, "Deflate error: %d\n", zret); return -1; } @@ -596,7 +616,9 @@ static int encode_frame(AVCodecContext *avctx, unsigned char *buf, int buf_size, static int decode_init(AVCodecContext *avctx) { LclContext * const c = (LclContext *)avctx->priv_data; - int basesize = avctx->width * avctx->height; + unsigned int basesize = avctx->width * avctx->height; + unsigned int max_basesize = ((avctx->width + 3) & ~3) * ((avctx->height + 3) & ~3); + unsigned int max_decomp_size; int zret; // Zlib return code c->avctx = avctx; @@ -614,6 +636,10 @@ static int decode_init(AVCodecContext *avctx) return 1; } + if (avcodec_check_dimensions(avctx, avctx->width, avctx->height) < 0) { + return 1; + } + /* Check codec type */ if (((avctx->codec_id == CODEC_ID_MSZH) && (*((char *)avctx->extradata + 7) != CODEC_MSZH)) || ((avctx->codec_id == CODEC_ID_ZLIB) && (*((char *)avctx->extradata + 7) != CODEC_ZLIB))) { @@ -624,26 +650,32 @@ static int decode_init(AVCodecContext *avctx) switch (c->imgtype = *((char *)avctx->extradata + 4)) { case IMGTYPE_YUV111: c->decomp_size = basesize * 3; + max_decomp_size = max_basesize * 3; av_log(avctx, AV_LOG_INFO, "Image type is YUV 1:1:1.\n"); break; case IMGTYPE_YUV422: c->decomp_size = basesize * 2; + max_decomp_size = max_basesize * 2; av_log(avctx, AV_LOG_INFO, "Image type is YUV 4:2:2.\n"); break; case IMGTYPE_RGB24: c->decomp_size = basesize * 3; + max_decomp_size = max_basesize * 3; av_log(avctx, AV_LOG_INFO, "Image type is RGB 24.\n"); break; case IMGTYPE_YUV411: c->decomp_size = basesize / 2 * 3; + max_decomp_size = max_basesize / 2 * 3; av_log(avctx, AV_LOG_INFO, "Image type is YUV 4:1:1.\n"); break; case IMGTYPE_YUV211: c->decomp_size = basesize * 2; + max_decomp_size = max_basesize * 2; av_log(avctx, AV_LOG_INFO, "Image type is YUV 2:1:1.\n"); break; case IMGTYPE_YUV420: c->decomp_size = basesize / 2 * 3; + max_decomp_size = max_basesize / 2 * 3; av_log(avctx, AV_LOG_INFO, "Image type is YUV 4:2:0.\n"); break; default: @@ -698,9 +730,8 @@ static int decode_init(AVCodecContext *avctx) } /* Allocate decompression buffer */ - /* 4*8 max overflow space for mszh decomp algorithm */ if (c->decomp_size) { - if ((c->decomp_buf = av_malloc(c->decomp_size+4*8)) == NULL) { + if ((c->decomp_buf = av_malloc(max_decomp_size)) == NULL) { av_log(avctx, AV_LOG_ERROR, "Can't allocate decompression buffer.\n"); return 1; } @@ -785,7 +816,7 @@ static int encode_init(AVCodecContext *avctx) ((uint8_t*)avctx->extradata)[4]= c->imgtype; ((uint8_t*)avctx->extradata)[5]= c->compression; ((uint8_t*)avctx->extradata)[6]= c->flags; - ((uint8_t*)avctx->extradata)[7]= 0; + ((uint8_t*)avctx->extradata)[7]= CODEC_ZLIB; c->avctx->extradata_size= 8; c->zstream.zalloc = Z_NULL; @@ -843,7 +874,7 @@ static int encode_end(AVCodecContext *avctx) LclContext *c = avctx->priv_data; av_freep(&avctx->extradata); - av_freep(c->comp_buf); + av_freep(&c->comp_buf); #ifdef CONFIG_ZLIB deflateEnd(&(c->zstream)); #endif @@ -886,7 +917,6 @@ AVCodec zlib_encoder = { encode_init, encode_frame, encode_end, -// .options = lcl_options, }; #endif //CONFIG_ENCODERS diff --git a/src/libffmpeg/libavcodec/libpostproc/mangle.h b/src/libffmpeg/libavcodec/libpostproc/mangle.h index f3894cc33..aa09cd6bf 100644 --- a/src/libffmpeg/libavcodec/libpostproc/mangle.h +++ b/src/libffmpeg/libavcodec/libpostproc/mangle.h @@ -8,12 +8,21 @@ #define __MANGLE_H /* Feel free to add more to the list, eg. a.out IMO */ +/* Use rip-relative addressing if compiling PIC code on x86-64. */ #if defined(__CYGWIN__) || defined(__MINGW32__) || defined(__OS2__) || \ (defined(__OpenBSD__) && !defined(__ELF__)) +#if defined(ARCH_X86_64) && defined(PIC) +#define MANGLE(a) "_" #a"(%%rip)" +#else #define MANGLE(a) "_" #a +#endif +#else +#if defined(ARCH_X86_64) && defined(PIC) +#define MANGLE(a) #a"(%%rip)" #else #define MANGLE(a) #a #endif +#endif #endif /* !__MANGLE_H */ diff --git a/src/libffmpeg/libavcodec/libpostproc/postprocess.c b/src/libffmpeg/libavcodec/libpostproc/postprocess.c index a03ff133d..e7ca0191d 100644 --- a/src/libffmpeg/libavcodec/libpostproc/postprocess.c +++ b/src/libffmpeg/libavcodec/libpostproc/postprocess.c @@ -29,10 +29,11 @@ isVertDC Ec Ec Ec isVertMinMaxOk Ec Ec Ec doVertLowPass E e e Ec doVertDefFilter Ec Ec e e Ec -isHorizDC Ec Ec -isHorizMinMaxOk a E -doHorizLowPass E e e -doHorizDefFilter Ec Ec e e +isHorizDC Ec Ec Ec +isHorizMinMaxOk a E Ec +doHorizLowPass E e e Ec +doHorizDefFilter Ec Ec e e Ec +do_a_deblock Ec E Ec E deRing E e e* Ecp Vertical RKAlgo1 E a a Horizontal RKAlgo1 a a @@ -42,7 +43,7 @@ LinIpolDeinterlace e E E* CubicIpolDeinterlace a e e* LinBlendDeinterlace e E E* MedianDeinterlace# E Ec Ec -TempDeNoiser# E e e +TempDeNoiser# E e e Ec * i dont have a 3dnow CPU -> its untested, but noone said it doesnt work so it seems to work # more or less selfinvented filters so the exactness isnt too meaningfull @@ -91,6 +92,10 @@ try to unroll inner for(x=0 ... loop to avoid these damn if(x ... checks #include "mangle.h" //FIXME should be supressed +#ifdef HAVE_ALTIVEC_H +#include <altivec.h> +#endif + #ifndef HAVE_MEMALIGN #define memalign(a,b) malloc(b) #endif @@ -108,12 +113,15 @@ try to unroll inner for(x=0 ... loop to avoid these damn if(x ... checks #if defined(__GNUC__) && (__GNUC__ > 3 || __GNUC__ == 3 && __GNUC_MINOR__ > 0) # define attribute_used __attribute__((used)) +# define always_inline __attribute__((always_inline)) inline #else # define attribute_used +# define always_inline inline #endif -#ifdef ARCH_X86 +#if defined(ARCH_X86) || defined(ARCH_X86_64) static uint64_t __attribute__((aligned(8))) attribute_used w05= 0x0005000500050005LL; +static uint64_t __attribute__((aligned(8))) attribute_used w04= 0x0004000400040004LL; static uint64_t __attribute__((aligned(8))) attribute_used w20= 0x0020002000200020LL; static uint64_t __attribute__((aligned(8))) attribute_used b00= 0x0000000000000000LL; static uint64_t __attribute__((aligned(8))) attribute_used b01= 0x0101010101010101LL; @@ -122,7 +130,6 @@ static uint64_t __attribute__((aligned(8))) attribute_used b08= 0x080808080808 static uint64_t __attribute__((aligned(8))) attribute_used b80= 0x8080808080808080LL; #endif - static uint8_t clip_table[3*256]; static uint8_t * const clip_tab= clip_table + 256; @@ -139,6 +146,8 @@ static struct PPFilter filters[]= {"vr", "rkvdeblock", 1, 2, 4, V_RK1_FILTER},*/ {"h1", "x1hdeblock", 1, 1, 3, H_X1_FILTER}, {"v1", "x1vdeblock", 1, 2, 4, V_X1_FILTER}, + {"ha", "ahdeblock", 1, 1, 3, H_A_DEBLOCK}, + {"va", "avdeblock", 1, 2, 4, V_A_DEBLOCK}, {"dr", "dering", 1, 5, 6, DERING}, {"al", "autolevels", 0, 1, 2, LEVEL_FIX}, {"lb", "linblenddeint", 1, 1, 4, LINEAR_BLEND_DEINT_FILTER}, @@ -154,15 +163,16 @@ static struct PPFilter filters[]= static char *replaceTable[]= { - "default", "hdeblock:a,vdeblock:a,dering:a,autolevels,tmpnoise:a:150:200:400", - "de", "hdeblock:a,vdeblock:a,dering:a,autolevels,tmpnoise:a:150:200:400", - "fast", "x1hdeblock:a,x1vdeblock:a,dering:a,autolevels,tmpnoise:a:150:200:400", - "fa", "x1hdeblock:a,x1vdeblock:a,dering:a,autolevels,tmpnoise:a:150:200:400", + "default", "hdeblock:a,vdeblock:a,dering:a", + "de", "hdeblock:a,vdeblock:a,dering:a", + "fast", "x1hdeblock:a,x1vdeblock:a,dering:a", + "fa", "x1hdeblock:a,x1vdeblock:a,dering:a", + "ac", "ha:a:128:7,va:a,dering:a", NULL //End Marker }; -#ifdef ARCH_X86 +#if defined(ARCH_X86) || defined(ARCH_X86_64) static inline void prefetchnta(void *p) { asm volatile( "prefetchnta (%0)\n\t" @@ -372,32 +382,32 @@ static inline void doHorizDefFilter_C(uint8_t dst[], int stride, PPContext *c) */ static inline void doHorizLowPass_C(uint8_t dst[], int stride, PPContext *c) { - int y; for(y=0; y<BLOCK_SIZE; y++) { const int first= ABS(dst[-1] - dst[0]) < c->QP ? dst[-1] : dst[0]; const int last= ABS(dst[8] - dst[7]) < c->QP ? dst[8] : dst[7]; - int sums[9]; - sums[0] = first + dst[0]; - sums[1] = dst[0] + dst[1]; - sums[2] = dst[1] + dst[2]; - sums[3] = dst[2] + dst[3]; - sums[4] = dst[3] + dst[4]; - sums[5] = dst[4] + dst[5]; - sums[6] = dst[5] + dst[6]; - sums[7] = dst[6] + dst[7]; - sums[8] = dst[7] + last; - - dst[0]= ((sums[0]<<2) + ((first + sums[2])<<1) + sums[4] + 8)>>4; - dst[1]= ((dst[1]<<2) + ((first + sums[0] + sums[3])<<1) + sums[5] + 8)>>4; - dst[2]= ((dst[2]<<2) + ((first + sums[1] + sums[4])<<1) + sums[6] + 8)>>4; - dst[3]= ((dst[3]<<2) + ((sums[2] + sums[5])<<1) + sums[0] + sums[7] + 8)>>4; - dst[4]= ((dst[4]<<2) + ((sums[3] + sums[6])<<1) + sums[1] + sums[8] + 8)>>4; - dst[5]= ((dst[5]<<2) + ((last + sums[7] + sums[4])<<1) + sums[2] + 8)>>4; - dst[6]= (((last + dst[6])<<2) + ((dst[7] + sums[5])<<1) + sums[3] + 8)>>4; - dst[7]= ((sums[8]<<2) + ((last + sums[6])<<1) + sums[4] + 8)>>4; + int sums[10]; + sums[0] = 4*first + dst[0] + dst[1] + dst[2] + 4; + sums[1] = sums[0] - first + dst[3]; + sums[2] = sums[1] - first + dst[4]; + sums[3] = sums[2] - first + dst[5]; + sums[4] = sums[3] - first + dst[6]; + sums[5] = sums[4] - dst[0] + dst[7]; + sums[6] = sums[5] - dst[1] + last; + sums[7] = sums[6] - dst[2] + last; + sums[8] = sums[7] - dst[3] + last; + sums[9] = sums[8] - dst[4] + last; + + dst[0]= (sums[0] + sums[2] + 2*dst[0])>>4; + dst[1]= (sums[1] + sums[3] + 2*dst[1])>>4; + dst[2]= (sums[2] + sums[4] + 2*dst[2])>>4; + dst[3]= (sums[3] + sums[5] + 2*dst[3])>>4; + dst[4]= (sums[4] + sums[6] + 2*dst[4])>>4; + dst[5]= (sums[5] + sums[7] + 2*dst[5])>>4; + dst[6]= (sums[6] + sums[8] + 2*dst[6])>>4; + dst[7]= (sums[7] + sums[9] + 2*dst[7])>>4; dst+= stride; } @@ -469,6 +479,111 @@ static inline void horizX1Filter(uint8_t *src, int stride, int QP) } } +/** + * accurate deblock filter + */ +static always_inline void do_a_deblock_C(uint8_t *src, int step, int stride, PPContext *c){ + int y; + const int QP= c->QP; + const int dcOffset= ((c->nonBQP*c->ppMode.baseDcDiff)>>8) + 1; + const int dcThreshold= dcOffset*2 + 1; +//START_TIMER + src+= step*4; // src points to begin of the 8x8 Block + for(y=0; y<8; y++){ + int numEq= 0; + + if(((unsigned)(src[-1*step] - src[0*step] + dcOffset)) < dcThreshold) numEq++; + if(((unsigned)(src[ 0*step] - src[1*step] + dcOffset)) < dcThreshold) numEq++; + if(((unsigned)(src[ 1*step] - src[2*step] + dcOffset)) < dcThreshold) numEq++; + if(((unsigned)(src[ 2*step] - src[3*step] + dcOffset)) < dcThreshold) numEq++; + if(((unsigned)(src[ 3*step] - src[4*step] + dcOffset)) < dcThreshold) numEq++; + if(((unsigned)(src[ 4*step] - src[5*step] + dcOffset)) < dcThreshold) numEq++; + if(((unsigned)(src[ 5*step] - src[6*step] + dcOffset)) < dcThreshold) numEq++; + if(((unsigned)(src[ 6*step] - src[7*step] + dcOffset)) < dcThreshold) numEq++; + if(((unsigned)(src[ 7*step] - src[8*step] + dcOffset)) < dcThreshold) numEq++; + if(numEq > c->ppMode.flatnessThreshold){ + int min, max, x; + + if(src[0] > src[step]){ + max= src[0]; + min= src[step]; + }else{ + max= src[step]; + min= src[0]; + } + for(x=2; x<8; x+=2){ + if(src[x*step] > src[(x+1)*step]){ + if(src[x *step] > max) max= src[ x *step]; + if(src[(x+1)*step] < min) min= src[(x+1)*step]; + }else{ + if(src[(x+1)*step] > max) max= src[(x+1)*step]; + if(src[ x *step] < min) min= src[ x *step]; + } + } + if(max-min < 2*QP){ + const int first= ABS(src[-1*step] - src[0]) < QP ? src[-1*step] : src[0]; + const int last= ABS(src[8*step] - src[7*step]) < QP ? src[8*step] : src[7*step]; + + int sums[10]; + sums[0] = 4*first + src[0*step] + src[1*step] + src[2*step] + 4; + sums[1] = sums[0] - first + src[3*step]; + sums[2] = sums[1] - first + src[4*step]; + sums[3] = sums[2] - first + src[5*step]; + sums[4] = sums[3] - first + src[6*step]; + sums[5] = sums[4] - src[0*step] + src[7*step]; + sums[6] = sums[5] - src[1*step] + last; + sums[7] = sums[6] - src[2*step] + last; + sums[8] = sums[7] - src[3*step] + last; + sums[9] = sums[8] - src[4*step] + last; + + src[0*step]= (sums[0] + sums[2] + 2*src[0*step])>>4; + src[1*step]= (sums[1] + sums[3] + 2*src[1*step])>>4; + src[2*step]= (sums[2] + sums[4] + 2*src[2*step])>>4; + src[3*step]= (sums[3] + sums[5] + 2*src[3*step])>>4; + src[4*step]= (sums[4] + sums[6] + 2*src[4*step])>>4; + src[5*step]= (sums[5] + sums[7] + 2*src[5*step])>>4; + src[6*step]= (sums[6] + sums[8] + 2*src[6*step])>>4; + src[7*step]= (sums[7] + sums[9] + 2*src[7*step])>>4; + } + }else{ + const int middleEnergy= 5*(src[4*step] - src[3*step]) + 2*(src[2*step] - src[5*step]); + + if(ABS(middleEnergy) < 8*QP) + { + const int q=(src[3*step] - src[4*step])/2; + const int leftEnergy= 5*(src[2*step] - src[1*step]) + 2*(src[0*step] - src[3*step]); + const int rightEnergy= 5*(src[6*step] - src[5*step]) + 2*(src[4*step] - src[7*step]); + + int d= ABS(middleEnergy) - MIN( ABS(leftEnergy), ABS(rightEnergy) ); + d= MAX(d, 0); + + d= (5*d + 32) >> 6; + d*= SIGN(-middleEnergy); + + if(q>0) + { + d= d<0 ? 0 : d; + d= d>q ? q : d; + } + else + { + d= d>0 ? 0 : d; + d= d<q ? q : d; + } + + src[3*step]-= d; + src[4*step]+= d; + } + } + + src += stride; + } +/*if(step==16){ + STOP_TIMER("step16") +}else{ + STOP_TIMER("stepX") +}*/ +} //Note: we have C, MMX, MMX2, 3DNOW version there is no 3DNOW+MMX2 one //Plain C versions @@ -479,15 +594,10 @@ static inline void horizX1Filter(uint8_t *src, int stride, int QP) #ifdef ARCH_POWERPC #ifdef HAVE_ALTIVEC #define COMPILE_ALTIVEC -#ifndef CONFIG_DARWIN -#warning "################################################################################" -#warning "WARNING: No gcc available as of today (2004-05-25) seems to be able to compile properly some of the code under non-Darwin PPC OSes. Some functions result in wrong results, while others simply won't compile (gcc explodes after allocating 1GiB+)." -#warning "################################################################################" -#endif //CONFIG_DARWIN #endif //HAVE_ALTIVEC #endif //ARCH_POWERPC -#ifdef ARCH_X86 +#if defined(ARCH_X86) || defined(ARCH_X86_64) #if (defined (HAVE_MMX) && !defined (HAVE_3DNOW) && !defined (HAVE_MMX2)) || defined (RUNTIME_CPUDETECT) #define COMPILE_MMX @@ -506,13 +616,11 @@ static inline void horizX1Filter(uint8_t *src, int stride, int QP) #undef HAVE_MMX2 #undef HAVE_3DNOW #undef HAVE_ALTIVEC -#undef ARCH_X86 #ifdef COMPILE_C #undef HAVE_MMX #undef HAVE_MMX2 #undef HAVE_3DNOW -#undef ARCH_X86 #define RENAME(a) a ## _C #include "postprocess_template.c" #endif @@ -533,7 +641,6 @@ static inline void horizX1Filter(uint8_t *src, int stride, int QP) #define HAVE_MMX #undef HAVE_MMX2 #undef HAVE_3DNOW -#define ARCH_X86 #define RENAME(a) a ## _MMX #include "postprocess_template.c" #endif @@ -544,7 +651,6 @@ static inline void horizX1Filter(uint8_t *src, int stride, int QP) #define HAVE_MMX #define HAVE_MMX2 #undef HAVE_3DNOW -#define ARCH_X86 #define RENAME(a) a ## _MMX2 #include "postprocess_template.c" #endif @@ -555,7 +661,6 @@ static inline void horizX1Filter(uint8_t *src, int stride, int QP) #define HAVE_MMX #undef HAVE_MMX2 #define HAVE_3DNOW -#define ARCH_X86 #define RENAME(a) a ## _3DNow #include "postprocess_template.c" #endif @@ -573,7 +678,7 @@ static inline void postProcess(uint8_t src[], int srcStride, uint8_t dst[], int // difference wouldnt be messureable here but its much better because // someone might exchange the cpu whithout restarting mplayer ;) #ifdef RUNTIME_CPUDETECT -#ifdef ARCH_X86 +#if defined(ARCH_X86) || defined(ARCH_X86_64) // ordered per speed fasterst first if(c->cpuCaps & PP_CPU_CAPS_MMX2) postProcess_MMX2(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c); @@ -586,7 +691,7 @@ static inline void postProcess(uint8_t src[], int srcStride, uint8_t dst[], int #else #ifdef ARCH_POWERPC #ifdef HAVE_ALTIVEC - else if(c->cpuCaps & PP_CPU_CAPS_ALTIVEC) + if(c->cpuCaps & PP_CPU_CAPS_ALTIVEC) postProcess_altivec(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c); else #endif @@ -614,24 +719,21 @@ static inline void postProcess(uint8_t src[], int srcStride, uint8_t dst[], int /* -pp Command line Help */ char *pp_help= -"<filterName>[:<option>[:<option>...]][[,|/][-]<filterName>[:<option>...]]...\n" -"long form example:\n" -"vdeblock:autoq/hdeblock:autoq/linblenddeint default,-vdeblock\n" -"short form example:\n" -"vb:a/hb:a/lb de,-vb\n" -"more examples:\n" -"tn:64:128:256\n" +"Available postprocessing filters:\n" "Filters Options\n" "short long name short long option Description\n" "* * a autoq CPU power dependent enabler\n" " c chrom chrominance filtering enabled\n" " y nochrom chrominance filtering disabled\n" +" n noluma luma filtering disabled\n" "hb hdeblock (2 threshold) horizontal deblocking filter\n" " 1. difference factor: default=32, higher -> more deblocking\n" " 2. flatness threshold: default=39, lower -> more deblocking\n" " the h & v deblocking filters share these\n" " so you can't set different thresholds for h / v\n" "vb vdeblock (2 threshold) vertical deblocking filter\n" +"ha hadeblock (2 threshold) horizontal deblocking filter\n" +"va vadeblock (2 threshold) vertical deblocking filter\n" "h1 x1hdeblock experimental h deblock filter 1\n" "v1 x1vdeblock experimental v deblock filter 1\n" "dr dering deringing filter\n" @@ -642,11 +744,20 @@ char *pp_help= "ci cubicipoldeint cubic interpolating deinterlacer\n" "md mediandeint median deinterlacer\n" "fd ffmpegdeint ffmpeg deinterlacer\n" -"de default hb:a,vb:a,dr:a,al\n" -"fa fast h1:a,v1:a,dr:a,al\n" +"l5 lowpass5 FIR lowpass deinterlacer\n" +"de default hb:a,vb:a,dr:a\n" +"fa fast h1:a,v1:a,dr:a\n" "tn tmpnoise (3 threshold) temporal noise reducer\n" " 1. <= 2. <= 3. larger -> stronger filtering\n" "fq forceQuant <quantizer> force quantizer\n" +"Usage:\n" +"<filterName>[:<option>[:<option>...]][[,|/][-]<filterName>[:<option>...]]...\n" +"long form example:\n" +"vdeblock:autoq/hdeblock:autoq/linblenddeint default,-vdeblock\n" +"short form example:\n" +"vb:a/hb:a/lb de,-vb\n" +"more examples:\n" +"tn:64:128:256\n" ; pp_mode_t *pp_get_mode_by_name_and_quality(char *name, int quality) @@ -680,6 +791,7 @@ pp_mode_t *pp_get_mode_by_name_and_quality(char *name, int quality) char *filterName; int q= 1000000; //PP_QUALITY_MAX; int chrom=-1; + int luma=-1; char *option; char *options[OPTIONS_ARRAY_SIZE]; int i; @@ -707,6 +819,7 @@ pp_mode_t *pp_get_mode_by_name_and_quality(char *name, int quality) if(!strcmp("autoq", option) || !strcmp("a", option)) q= quality; else if(!strcmp("nochrom", option) || !strcmp("y", option)) chrom=0; else if(!strcmp("chrom", option) || !strcmp("c", option)) chrom=1; + else if(!strcmp("noluma", option) || !strcmp("n", option)) luma=0; else { options[numOfUnknownOptions] = option; @@ -753,7 +866,7 @@ pp_mode_t *pp_get_mode_by_name_and_quality(char *name, int quality) filterNameOk=1; if(!enable) break; // user wants to disable it - if(q >= filters[i].minLumQuality) + if(q >= filters[i].minLumQuality && luma) ppMode->lumMode|= filters[i].mask; if(chrom==1 || (chrom==-1 && filters[i].chromDefault)) if(q >= filters[i].minChromQuality) @@ -793,7 +906,8 @@ pp_mode_t *pp_get_mode_by_name_and_quality(char *name, int quality) } } } - else if(filters[i].mask == V_DEBLOCK || filters[i].mask == H_DEBLOCK) + else if(filters[i].mask == V_DEBLOCK || filters[i].mask == H_DEBLOCK + || filters[i].mask == V_A_DEBLOCK || filters[i].mask == H_A_DEBLOCK) { int o; @@ -940,18 +1054,20 @@ void pp_postprocess(uint8_t * src[3], int srcStride[3], int mbHeight= (height+15)>>4; PPMode *mode = (PPMode*)vm; PPContext *c = (PPContext*)vc; - int minStride= MAX(srcStride[0], dstStride[0]); + int minStride= MAX(ABS(srcStride[0]), ABS(dstStride[0])); + int absQPStride = ABS(QPStride); - if(c->stride < minStride || c->qpStride < QPStride) + // c->stride and c->QPStride are always positive + if(c->stride < minStride || c->qpStride < absQPStride) reallocBuffers(c, width, height, MAX(minStride, c->stride), - MAX(c->qpStride, QPStride)); + MAX(c->qpStride, absQPStride)); if(QP_store==NULL || (mode->lumMode & FORCE_QUANT)) { int i; QP_store= c->forcedQPTable; - QPStride= 0; + absQPStride = QPStride = 0; if(mode->lumMode & FORCE_QUANT) for(i=0; i<mbWidth; i++) QP_store[i]= mode->forcedQuant; else @@ -961,7 +1077,7 @@ void pp_postprocess(uint8_t * src[3], int srcStride[3], if(pict_type & PP_PICT_TYPE_QP2){ int i; - const int count= mbHeight * QPStride; + const int count= mbHeight * absQPStride; for(i=0; i<(count>>2); i++){ ((uint32_t*)c->stdQPTable)[i] = (((uint32_t*)QP_store)[i]>>1) & 0x7F7F7F7F; } @@ -969,6 +1085,7 @@ void pp_postprocess(uint8_t * src[3], int srcStride[3], c->stdQPTable[i] = QP_store[i]>>1; } QP_store= c->stdQPTable; + QPStride= absQPStride; } if(0){ @@ -984,13 +1101,22 @@ for(y=0; y<mbHeight; y++){ if((pict_type&7)!=3) { - int i; - const int count= mbHeight * QPStride; - for(i=0; i<(count>>2); i++){ - ((uint32_t*)c->nonBQPTable)[i] = ((uint32_t*)QP_store)[i] & 0x3F3F3F3F; - } - for(i<<=2; i<count; i++){ - c->nonBQPTable[i] = QP_store[i] & 0x3F; + if (QPStride >= 0) { + int i; + const int count= mbHeight * QPStride; + for(i=0; i<(count>>2); i++){ + ((uint32_t*)c->nonBQPTable)[i] = ((uint32_t*)QP_store)[i] & 0x3F3F3F3F; + } + for(i<<=2; i<count; i++){ + c->nonBQPTable[i] = QP_store[i] & 0x3F; + } + } else { + int i,j; + for(i=0; i<mbHeight; i++) { + for(j=0; j<absQPStride; j++) { + c->nonBQPTable[i*absQPStride+j] = QP_store[i*QPStride+j] & 0x3F; + } + } } } @@ -1014,8 +1140,8 @@ for(y=0; y<mbHeight; y++){ } else if(srcStride[1] == dstStride[1] && srcStride[2] == dstStride[2]) { - memcpy(dst[1], src[1], srcStride[1]*height); - memcpy(dst[2], src[2], srcStride[2]*height); + linecpy(dst[1], src[1], height, srcStride[1]); + linecpy(dst[2], src[2], height, srcStride[2]); } else { diff --git a/src/libffmpeg/libavcodec/libpostproc/postprocess_altivec_template.c b/src/libffmpeg/libavcodec/libpostproc/postprocess_altivec_template.c index 0c84873cc..1c59b9465 100644 --- a/src/libffmpeg/libavcodec/libpostproc/postprocess_altivec_template.c +++ b/src/libffmpeg/libavcodec/libpostproc/postprocess_altivec_template.c @@ -25,6 +25,39 @@ #define AVV(x...) {x} #endif +#define ALTIVEC_TRANSPOSE_8x8_SHORT(src_a,src_b,src_c,src_d,src_e,src_f,src_g,src_h) \ + do { \ + __typeof__(src_a) tempA1, tempB1, tempC1, tempD1; \ + __typeof__(src_a) tempE1, tempF1, tempG1, tempH1; \ + __typeof__(src_a) tempA2, tempB2, tempC2, tempD2; \ + __typeof__(src_a) tempE2, tempF2, tempG2, tempH2; \ + tempA1 = vec_mergeh (src_a, src_e); \ + tempB1 = vec_mergel (src_a, src_e); \ + tempC1 = vec_mergeh (src_b, src_f); \ + tempD1 = vec_mergel (src_b, src_f); \ + tempE1 = vec_mergeh (src_c, src_g); \ + tempF1 = vec_mergel (src_c, src_g); \ + tempG1 = vec_mergeh (src_d, src_h); \ + tempH1 = vec_mergel (src_d, src_h); \ + tempA2 = vec_mergeh (tempA1, tempE1); \ + tempB2 = vec_mergel (tempA1, tempE1); \ + tempC2 = vec_mergeh (tempB1, tempF1); \ + tempD2 = vec_mergel (tempB1, tempF1); \ + tempE2 = vec_mergeh (tempC1, tempG1); \ + tempF2 = vec_mergel (tempC1, tempG1); \ + tempG2 = vec_mergeh (tempD1, tempH1); \ + tempH2 = vec_mergel (tempD1, tempH1); \ + src_a = vec_mergeh (tempA2, tempE2); \ + src_b = vec_mergel (tempA2, tempE2); \ + src_c = vec_mergeh (tempB2, tempF2); \ + src_d = vec_mergel (tempB2, tempF2); \ + src_e = vec_mergeh (tempC2, tempG2); \ + src_f = vec_mergel (tempC2, tempG2); \ + src_g = vec_mergeh (tempD2, tempH2); \ + src_h = vec_mergel (tempD2, tempH2); \ + } while (0) + + static inline int vertClassify_altivec(uint8_t src[], int stride, PPContext *c) { /* this code makes no assumption on src or stride. @@ -40,7 +73,9 @@ static inline int vertClassify_altivec(uint8_t src[], int stride, PPContext *c) vector signed short v2QP; vector unsigned short v4QP; vector unsigned short v_dcThreshold; - int two_vectors = ((((unsigned long)src2 % 16) > 8) || (stride % 16)) ? 1 : 0; + const int properStride = (stride % 16); + const int srcAlign = ((unsigned long)src2 % 16); + const int two_vectors = ((srcAlign > 8) || properStride) ? 1 : 0; const vector signed int zero = vec_splat_s32(0); const vector signed short mask = vec_splat_s16(1); vector signed int v_numEq = vec_splat_s32(0); @@ -57,6 +92,8 @@ static inline int vertClassify_altivec(uint8_t src[], int stride, PPContext *c) src2 += stride * 4; + vector signed short v_srcAss0, v_srcAss1, v_srcAss2, v_srcAss3, v_srcAss4, v_srcAss5, v_srcAss6, v_srcAss7; + #define LOAD_LINE(i) \ register int j##i = i * stride; \ vector unsigned char perm##i = vec_lvsl(j##i, src2); \ @@ -66,19 +103,41 @@ static inline int vertClassify_altivec(uint8_t src[], int stride, PPContext *c) v_srcA2##i = vec_ld(j##i + 16, src2); \ const vector unsigned char v_srcA##i = \ vec_perm(v_srcA1##i, v_srcA2##i, perm##i); \ - vector signed short v_srcAss##i = \ + v_srcAss##i = \ (vector signed short)vec_mergeh((vector signed char)zero, \ (vector signed char)v_srcA##i) - LOAD_LINE(0); - LOAD_LINE(1); - LOAD_LINE(2); - LOAD_LINE(3); - LOAD_LINE(4); - LOAD_LINE(5); - LOAD_LINE(6); - LOAD_LINE(7); +#define LOAD_LINE_ALIGNED(i) \ + register int j##i = i * stride; \ + const vector unsigned char v_srcA##i = vec_ld(j##i, src2); \ + v_srcAss##i = \ + (vector signed short)vec_mergeh((vector signed char)zero, \ + (vector signed char)v_srcA##i) + + // special casing the aligned case is worthwhile, as all call from + // the (transposed) horizontable deblocks will be aligned, i naddition + // to the naturraly aligned vertical deblocks. + if (properStride && srcAlign) { + LOAD_LINE_ALIGNED(0); + LOAD_LINE_ALIGNED(1); + LOAD_LINE_ALIGNED(2); + LOAD_LINE_ALIGNED(3); + LOAD_LINE_ALIGNED(4); + LOAD_LINE_ALIGNED(5); + LOAD_LINE_ALIGNED(6); + LOAD_LINE_ALIGNED(7); + } else { + LOAD_LINE(0); + LOAD_LINE(1); + LOAD_LINE(2); + LOAD_LINE(3); + LOAD_LINE(4); + LOAD_LINE(5); + LOAD_LINE(6); + LOAD_LINE(7); + } #undef LOAD_LINE +#undef LOAD_LINE_ALIGNED #define ITER(i, j) \ const vector signed short v_diff##i = \ @@ -133,7 +192,6 @@ static inline int vertClassify_altivec(uint8_t src[], int stride, PPContext *c) else return 2; } - static inline void doVertLowPass_altivec(uint8_t *src, int stride, PPContext *c) { /* this code makes no assumption on src or stride. @@ -145,112 +203,130 @@ static inline void doVertLowPass_altivec(uint8_t *src, int stride, PPContext *c) */ uint8_t *src2 = src; const vector signed int zero = vec_splat_s32(0); + const int properStride = (stride % 16); + const int srcAlign = ((unsigned long)src2 % 16); short __attribute__ ((aligned(16))) qp[8]; qp[0] = c->QP; vector signed short vqp = vec_ld(0, qp); vqp = vec_splat(vqp, 0); + src2 += stride*3; + + vector signed short vb0, vb1, vb2, vb3, vb4, vb5, vb6, vb7, vb8, vb9; + vector unsigned char vbA0, vbA1, vbA2, vbA3, vbA4, vbA5, vbA6, vbA7, vbA8, vbA9; + vector unsigned char vbB0, vbB1, vbB2, vbB3, vbB4, vbB5, vbB6, vbB7, vbB8, vbB9; + vector unsigned char vbT0, vbT1, vbT2, vbT3, vbT4, vbT5, vbT6, vbT7, vbT8, vbT9; + #define LOAD_LINE(i) \ const vector unsigned char perml##i = \ vec_lvsl(i * stride, src2); \ - const vector unsigned char vbA##i = \ - vec_ld(i * stride, src2); \ - const vector unsigned char vbB##i = \ - vec_ld(i * stride + 16, src2); \ - const vector unsigned char vbT##i = \ - vec_perm(vbA##i, vbB##i, perml##i); \ - const vector signed short vb##i = \ + vbA##i = vec_ld(i * stride, src2); \ + vbB##i = vec_ld(i * stride + 16, src2); \ + vbT##i = vec_perm(vbA##i, vbB##i, perml##i); \ + vb##i = \ (vector signed short)vec_mergeh((vector unsigned char)zero, \ (vector unsigned char)vbT##i) - - src2 += stride*3; - LOAD_LINE(0); - LOAD_LINE(1); - LOAD_LINE(2); - LOAD_LINE(3); - LOAD_LINE(4); - LOAD_LINE(5); - LOAD_LINE(6); - LOAD_LINE(7); - LOAD_LINE(8); - LOAD_LINE(9); +#define LOAD_LINE_ALIGNED(i) \ + register int j##i = i * stride; \ + vbT##i = vec_ld(j##i, src2); \ + vb##i = \ + (vector signed short)vec_mergeh((vector signed char)zero, \ + (vector signed char)vbT##i) + + // special casing the aligned case is worthwhile, as all call from + // the (transposed) horizontable deblocks will be aligned, in addition + // to the naturraly aligned vertical deblocks. + if (properStride && srcAlign) { + LOAD_LINE_ALIGNED(0); + LOAD_LINE_ALIGNED(1); + LOAD_LINE_ALIGNED(2); + LOAD_LINE_ALIGNED(3); + LOAD_LINE_ALIGNED(4); + LOAD_LINE_ALIGNED(5); + LOAD_LINE_ALIGNED(6); + LOAD_LINE_ALIGNED(7); + LOAD_LINE_ALIGNED(8); + LOAD_LINE_ALIGNED(9); + } else { + LOAD_LINE(0); + LOAD_LINE(1); + LOAD_LINE(2); + LOAD_LINE(3); + LOAD_LINE(4); + LOAD_LINE(5); + LOAD_LINE(6); + LOAD_LINE(7); + LOAD_LINE(8); + LOAD_LINE(9); + } #undef LOAD_LINE +#undef LOAD_LINE_ALIGNED const vector unsigned short v_1 = vec_splat_u16(1); const vector unsigned short v_2 = vec_splat_u16(2); const vector unsigned short v_4 = vec_splat_u16(4); - const vector signed short v_8 = vec_splat_s16(8); - - const vector signed short v_first = vec_sel(vb1, vb0, - vec_cmplt(vec_abs(vec_sub(vb0, vb1)), - vqp)); - const vector signed short v_last = vec_sel(vb8, vb9, - vec_cmplt(vec_abs(vec_sub(vb8, vb9)), - vqp)); - - const vector signed short v_sums0 = vec_add(v_first, vb1); - const vector signed short v_sums1 = vec_add(vb1, vb2); - const vector signed short v_sums2 = vec_add(vb2, vb3); - const vector signed short v_sums3 = vec_add(vb3, vb4); - const vector signed short v_sums4 = vec_add(vb4, vb5); - const vector signed short v_sums5 = vec_add(vb5, vb6); - const vector signed short v_sums6 = vec_add(vb6, vb7); - const vector signed short v_sums7 = vec_add(vb7, vb8); - const vector signed short v_sums8 = vec_add(vb8, v_last); - - const vector signed short vr1 = vec_sra(vec_add(vec_add(vec_sl(v_sums0, v_2), - vec_sl(vec_add(v_first, v_sums2), v_1)), - vec_add(v_sums4, v_8)), - v_4); - const vector signed short vr2 = vec_sra(vec_add(vec_add(vec_sl(vb2, v_2), - v_sums5), - vec_add(v_8, - vec_sl(vec_add(v_first, - vec_add(v_sums0, v_sums3)), - v_1))), - v_4); - const vector signed short vr3 = vec_sra(vec_add(vec_add(vec_sl(vb3, v_2), - v_sums6), - vec_add(v_8, - vec_sl(vec_add(v_first, - vec_add(v_sums1, v_sums4)), - v_1))), - v_4); - const vector signed short vr4 = vec_sra(vec_add(vec_add(vec_sl(vb4, v_2), - v_sums7), - vec_add(v_8, - vec_add(v_sums0, - vec_sl(vec_add(v_sums2, v_sums5), - v_1)))), - v_4); - const vector signed short vr5 = vec_sra(vec_add(vec_add(vec_sl(vb5, v_2), - v_sums8), - vec_add(v_8, - vec_add(v_sums1, - vec_sl(vec_add(v_sums3, v_sums6), - v_1)))), - v_4); - const vector signed short vr6 = vec_sra(vec_add(vec_add(vec_sl(vb6, v_2), - v_sums2), - vec_add(v_8, - vec_sl(vec_add(v_last, - vec_add(v_sums7, v_sums4)), - v_1))), - v_4); - const vector signed short vr7 = vec_sra(vec_add(vec_add(vec_sl(vec_add(v_last, vb7), v_2), - vec_sl(vec_add(vb8, v_sums5), v_1)), - vec_add(v_8, v_sums3)), - v_4); - const vector signed short vr8 = vec_sra(vec_add(vec_add(vec_sl(v_sums8, v_2), - vec_sl(vec_add(v_last, v_sums6), v_1)), - vec_add(v_sums4, v_8)), - v_4); - - const vector unsigned char neg1 = (vector unsigned char)AVV(-1, -1, -1, -1, -1, -1, -1, -1, - -1, -1, -1, -1, -1, -1, -1, -1); - const vector unsigned char permHH = (vector unsigned char)AVV(0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, - 0x18, 0x19, 0x1A, 0x1B, 0x1C, 0x1D, 0x1E, 0x1F); + + const vector signed short v_diff01 = vec_sub(vb0, vb1); + const vector unsigned short v_cmp01 = + (const vector unsigned short) vec_cmplt(vec_abs(v_diff01), vqp); + const vector signed short v_first = vec_sel(vb1, vb0, v_cmp01); + const vector signed short v_diff89 = vec_sub(vb8, vb9); + const vector unsigned short v_cmp89 = + (const vector unsigned short) vec_cmplt(vec_abs(v_diff89), vqp); + const vector signed short v_last = vec_sel(vb8, vb9, v_cmp89); + + const vector signed short temp01 = vec_mladd(v_first, (vector signed short)v_4, vb1); + const vector signed short temp02 = vec_add(vb2, vb3); + const vector signed short temp03 = vec_add(temp01, (vector signed short)v_4); + const vector signed short v_sumsB0 = vec_add(temp02, temp03); + + const vector signed short temp11 = vec_sub(v_sumsB0, v_first); + const vector signed short v_sumsB1 = vec_add(temp11, vb4); + + const vector signed short temp21 = vec_sub(v_sumsB1, v_first); + const vector signed short v_sumsB2 = vec_add(temp21, vb5); + + const vector signed short temp31 = vec_sub(v_sumsB2, v_first); + const vector signed short v_sumsB3 = vec_add(temp31, vb6); + + const vector signed short temp41 = vec_sub(v_sumsB3, v_first); + const vector signed short v_sumsB4 = vec_add(temp41, vb7); + + const vector signed short temp51 = vec_sub(v_sumsB4, vb1); + const vector signed short v_sumsB5 = vec_add(temp51, vb8); + + const vector signed short temp61 = vec_sub(v_sumsB5, vb2); + const vector signed short v_sumsB6 = vec_add(temp61, v_last); + + const vector signed short temp71 = vec_sub(v_sumsB6, vb3); + const vector signed short v_sumsB7 = vec_add(temp71, v_last); + + const vector signed short temp81 = vec_sub(v_sumsB7, vb4); + const vector signed short v_sumsB8 = vec_add(temp81, v_last); + + const vector signed short temp91 = vec_sub(v_sumsB8, vb5); + const vector signed short v_sumsB9 = vec_add(temp91, v_last); + +#define COMPUTE_VR(i, j, k) \ + const vector signed short temps1##i = \ + vec_add(v_sumsB##i, v_sumsB##k); \ + const vector signed short temps2##i = \ + vec_mladd(vb##j, (vector signed short)v_2, temps1##i); \ + const vector signed short vr##j = vec_sra(temps2##i, v_4) + + COMPUTE_VR(0, 1, 2); + COMPUTE_VR(1, 2, 3); + COMPUTE_VR(2, 3, 4); + COMPUTE_VR(3, 4, 5); + COMPUTE_VR(4, 5, 6); + COMPUTE_VR(5, 6, 7); + COMPUTE_VR(6, 7, 8); + COMPUTE_VR(7, 8, 9); + + const vector signed char neg1 = vec_splat_s8(-1); + const vector unsigned char permHH = (const vector unsigned char)AVV(0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, + 0x18, 0x19, 0x1A, 0x1B, 0x1C, 0x1D, 0x1E, 0x1F); #define PACK_AND_STORE(i) \ const vector unsigned char perms##i = \ @@ -260,7 +336,7 @@ static inline void doVertLowPass_altivec(uint8_t *src, int stride, PPContext *c) const vector unsigned char vg##i = \ vec_perm(vf##i, vbT##i, permHH); \ const vector unsigned char mask##i = \ - vec_perm((vector unsigned char)zero, neg1, perms##i); \ + vec_perm((vector unsigned char)zero, (vector unsigned char)neg1, perms##i); \ const vector unsigned char vg2##i = \ vec_perm(vg##i, vg##i, perms##i); \ const vector unsigned char svA##i = \ @@ -270,16 +346,37 @@ static inline void doVertLowPass_altivec(uint8_t *src, int stride, PPContext *c) vec_st(svA##i, i * stride, src2); \ vec_st(svB##i, i * stride + 16, src2) - PACK_AND_STORE(1); - PACK_AND_STORE(2); - PACK_AND_STORE(3); - PACK_AND_STORE(4); - PACK_AND_STORE(5); - PACK_AND_STORE(6); - PACK_AND_STORE(7); - PACK_AND_STORE(8); +#define PACK_AND_STORE_ALIGNED(i) \ + const vector unsigned char vf##i = \ + vec_packsu(vr##i, (vector signed short)zero); \ + const vector unsigned char vg##i = \ + vec_perm(vf##i, vbT##i, permHH); \ + vec_st(vg##i, i * stride, src2) + // special casing the aligned case is worthwhile, as all call from + // the (transposed) horizontable deblocks will be aligned, in addition + // to the naturraly aligned vertical deblocks. + if (properStride && srcAlign) { + PACK_AND_STORE_ALIGNED(1); + PACK_AND_STORE_ALIGNED(2); + PACK_AND_STORE_ALIGNED(3); + PACK_AND_STORE_ALIGNED(4); + PACK_AND_STORE_ALIGNED(5); + PACK_AND_STORE_ALIGNED(6); + PACK_AND_STORE_ALIGNED(7); + PACK_AND_STORE_ALIGNED(8); + } else { + PACK_AND_STORE(1); + PACK_AND_STORE(2); + PACK_AND_STORE(3); + PACK_AND_STORE(4); + PACK_AND_STORE(5); + PACK_AND_STORE(6); + PACK_AND_STORE(7); + PACK_AND_STORE(8); + } #undef PACK_AND_STORE +#undef PACK_AND_STORE_ALIGNED } @@ -383,12 +480,10 @@ static inline void doVertDefFilter_altivec(uint8_t src[], int stride, PPContext /* finally, stores */ const vector unsigned char st4 = vec_packsu(vb4minusd, (vector signed short)zero); const vector unsigned char st5 = vec_packsu(vb5plusd, (vector signed short)zero); - - const vector unsigned char neg1 = (vector unsigned char)AVV(-1, -1, -1, -1, -1, -1, -1, -1, - -1, -1, -1, -1, -1, -1, -1, -1); - - const vector unsigned char permHH = (vector unsigned char)AVV(0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, - 0x18, 0x19, 0x1A, 0x1B, 0x1C, 0x1D, 0x1E, 0x1F); + + const vector signed char neg1 = vec_splat_s8(-1); + const vector unsigned char permHH = (const vector unsigned char)AVV(0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, + 0x18, 0x19, 0x1A, 0x1B, 0x1C, 0x1D, 0x1E, 0x1F); #define STORE(i) \ const vector unsigned char perms##i = \ @@ -396,7 +491,7 @@ static inline void doVertDefFilter_altivec(uint8_t src[], int stride, PPContext const vector unsigned char vg##i = \ vec_perm(st##i, vbT##i, permHH); \ const vector unsigned char mask##i = \ - vec_perm((vector unsigned char)zero, neg1, perms##i); \ + vec_perm((vector unsigned char)zero, (vector unsigned char)neg1, perms##i); \ const vector unsigned char vg2##i = \ vec_perm(vg##i, vg##i, perms##i); \ const vector unsigned char svA##i = \ @@ -680,7 +775,7 @@ static inline void dering_altivec(uint8_t src[], int stride, PPContext *c) { ITER(6, 7, 8); ITER(7, 8, 9); - const vector signed char neg1 = vec_splat_s8( -1 ); + const vector signed char neg1 = vec_splat_s8(-1); #define STORE_LINE(i) \ const vector unsigned char permST##i = \ @@ -708,6 +803,394 @@ static inline void dering_altivec(uint8_t src[], int stride, PPContext *c) { #undef F2 } -#define horizClassify_altivec(a...) horizClassify_C(a) #define doHorizLowPass_altivec(a...) doHorizLowPass_C(a) #define doHorizDefFilter_altivec(a...) doHorizDefFilter_C(a) +#define do_a_deblock_altivec(a...) do_a_deblock_C(a) + +static inline void RENAME(tempNoiseReducer)(uint8_t *src, int stride, + uint8_t *tempBlured, uint32_t *tempBluredPast, int *maxNoise) +{ + const vector signed int zero = vec_splat_s32(0); + const vector signed short vsint16_1 = vec_splat_s16(1); + vector signed int v_dp = zero; + vector signed int v_sysdp = zero; + int d, sysd, i; + + tempBluredPast[127]= maxNoise[0]; + tempBluredPast[128]= maxNoise[1]; + tempBluredPast[129]= maxNoise[2]; + +#define LOAD_LINE(src, i) \ + register int j##src##i = i * stride; \ + vector unsigned char perm##src##i = vec_lvsl(j##src##i, src); \ + const vector unsigned char v_##src##A1##i = vec_ld(j##src##i, src); \ + const vector unsigned char v_##src##A2##i = vec_ld(j##src##i + 16, src); \ + const vector unsigned char v_##src##A##i = \ + vec_perm(v_##src##A1##i, v_##src##A2##i, perm##src##i); \ + vector signed short v_##src##Ass##i = \ + (vector signed short)vec_mergeh((vector signed char)zero, \ + (vector signed char)v_##src##A##i) + + LOAD_LINE(src, 0); + LOAD_LINE(src, 1); + LOAD_LINE(src, 2); + LOAD_LINE(src, 3); + LOAD_LINE(src, 4); + LOAD_LINE(src, 5); + LOAD_LINE(src, 6); + LOAD_LINE(src, 7); + + LOAD_LINE(tempBlured, 0); + LOAD_LINE(tempBlured, 1); + LOAD_LINE(tempBlured, 2); + LOAD_LINE(tempBlured, 3); + LOAD_LINE(tempBlured, 4); + LOAD_LINE(tempBlured, 5); + LOAD_LINE(tempBlured, 6); + LOAD_LINE(tempBlured, 7); +#undef LOAD_LINE + +#define ACCUMULATE_DIFFS(i) \ + vector signed short v_d##i = vec_sub(v_tempBluredAss##i, \ + v_srcAss##i); \ + v_dp = vec_msums(v_d##i, v_d##i, v_dp); \ + v_sysdp = vec_msums(v_d##i, vsint16_1, v_sysdp) + + ACCUMULATE_DIFFS(0); + ACCUMULATE_DIFFS(1); + ACCUMULATE_DIFFS(2); + ACCUMULATE_DIFFS(3); + ACCUMULATE_DIFFS(4); + ACCUMULATE_DIFFS(5); + ACCUMULATE_DIFFS(6); + ACCUMULATE_DIFFS(7); +#undef ACCUMULATE_DIFFS + + v_dp = vec_sums(v_dp, zero); + v_sysdp = vec_sums(v_sysdp, zero); + + v_dp = vec_splat(v_dp, 3); + v_sysdp = vec_splat(v_sysdp, 3); + + vec_ste(v_dp, 0, &d); + vec_ste(v_sysdp, 0, &sysd); + + i = d; + d = (4*d + +(*(tempBluredPast-256)) + +(*(tempBluredPast-1))+ (*(tempBluredPast+1)) + +(*(tempBluredPast+256)) + +4)>>3; + + *tempBluredPast=i; + + if (d > maxNoise[1]) { + if (d < maxNoise[2]) { +#define OP(i) v_tempBluredAss##i = vec_avg(v_tempBluredAss##i, v_srcAss##i); + + OP(0); + OP(1); + OP(2); + OP(3); + OP(4); + OP(5); + OP(6); + OP(7); +#undef OP + } else { +#define OP(i) v_tempBluredAss##i = v_srcAss##i; + + OP(0); + OP(1); + OP(2); + OP(3); + OP(4); + OP(5); + OP(6); + OP(7); +#undef OP + } + } else { + if (d < maxNoise[0]) { + const vector signed short vsint16_7 = vec_splat_s16(7); + const vector signed short vsint16_4 = vec_splat_s16(4); + const vector unsigned short vuint16_3 = vec_splat_u16(3); + +#define OP(i) \ + const vector signed short v_temp##i = \ + vec_mladd(v_tempBluredAss##i, \ + vsint16_7, v_srcAss##i); \ + const vector signed short v_temp2##i = \ + vec_add(v_temp##i, vsint16_4); \ + v_tempBluredAss##i = vec_sr(v_temp2##i, vuint16_3) + + OP(0); + OP(1); + OP(2); + OP(3); + OP(4); + OP(5); + OP(6); + OP(7); +#undef OP + } else { + const vector signed short vsint16_3 = vec_splat_s16(3); + const vector signed short vsint16_2 = vec_splat_s16(2); + +#define OP(i) \ + const vector signed short v_temp##i = \ + vec_mladd(v_tempBluredAss##i, \ + vsint16_3, v_srcAss##i); \ + const vector signed short v_temp2##i = \ + vec_add(v_temp##i, vsint16_2); \ + v_tempBluredAss##i = vec_sr(v_temp2##i, (vector unsigned short)vsint16_2) + + OP(0); + OP(1); + OP(2); + OP(3); + OP(4); + OP(5); + OP(6); + OP(7); +#undef OP + } + } + + const vector signed char neg1 = vec_splat_s8(-1); + const vector unsigned char permHH = (const vector unsigned char)AVV(0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, + 0x18, 0x19, 0x1A, 0x1B, 0x1C, 0x1D, 0x1E, 0x1F); + +#define PACK_AND_STORE(src, i) \ + const vector unsigned char perms##src##i = \ + vec_lvsr(i * stride, src); \ + const vector unsigned char vf##src##i = \ + vec_packsu(v_tempBluredAss##i, (vector signed short)zero); \ + const vector unsigned char vg##src##i = \ + vec_perm(vf##src##i, v_##src##A##i, permHH); \ + const vector unsigned char mask##src##i = \ + vec_perm((vector unsigned char)zero, (vector unsigned char)neg1, perms##src##i); \ + const vector unsigned char vg2##src##i = \ + vec_perm(vg##src##i, vg##src##i, perms##src##i); \ + const vector unsigned char svA##src##i = \ + vec_sel(v_##src##A1##i, vg2##src##i, mask##src##i); \ + const vector unsigned char svB##src##i = \ + vec_sel(vg2##src##i, v_##src##A2##i, mask##src##i); \ + vec_st(svA##src##i, i * stride, src); \ + vec_st(svB##src##i, i * stride + 16, src) + + PACK_AND_STORE(src, 0); + PACK_AND_STORE(src, 1); + PACK_AND_STORE(src, 2); + PACK_AND_STORE(src, 3); + PACK_AND_STORE(src, 4); + PACK_AND_STORE(src, 5); + PACK_AND_STORE(src, 6); + PACK_AND_STORE(src, 7); + PACK_AND_STORE(tempBlured, 0); + PACK_AND_STORE(tempBlured, 1); + PACK_AND_STORE(tempBlured, 2); + PACK_AND_STORE(tempBlured, 3); + PACK_AND_STORE(tempBlured, 4); + PACK_AND_STORE(tempBlured, 5); + PACK_AND_STORE(tempBlured, 6); + PACK_AND_STORE(tempBlured, 7); +#undef PACK_AND_STORE +} + +static inline void transpose_16x8_char_toPackedAlign_altivec(unsigned char* dst, unsigned char* src, int stride) { + const vector unsigned char zero = vec_splat_u8(0); + +#define LOAD_DOUBLE_LINE(i, j) \ + vector unsigned char perm1##i = vec_lvsl(i * stride, src); \ + vector unsigned char perm2##i = vec_lvsl(j * stride, src); \ + vector unsigned char srcA##i = vec_ld(i * stride, src); \ + vector unsigned char srcB##i = vec_ld(i * stride + 16, src); \ + vector unsigned char srcC##i = vec_ld(j * stride, src); \ + vector unsigned char srcD##i = vec_ld(j * stride+ 16, src); \ + vector unsigned char src##i = vec_perm(srcA##i, srcB##i, perm1##i); \ + vector unsigned char src##j = vec_perm(srcC##i, srcD##i, perm2##i) + + LOAD_DOUBLE_LINE(0, 1); + LOAD_DOUBLE_LINE(2, 3); + LOAD_DOUBLE_LINE(4, 5); + LOAD_DOUBLE_LINE(6, 7); +#undef LOAD_DOUBLE_LINE + + vector unsigned char tempA = vec_mergeh(src0, zero); + vector unsigned char tempB = vec_mergel(src0, zero); + vector unsigned char tempC = vec_mergeh(src1, zero); + vector unsigned char tempD = vec_mergel(src1, zero); + vector unsigned char tempE = vec_mergeh(src2, zero); + vector unsigned char tempF = vec_mergel(src2, zero); + vector unsigned char tempG = vec_mergeh(src3, zero); + vector unsigned char tempH = vec_mergel(src3, zero); + vector unsigned char tempI = vec_mergeh(src4, zero); + vector unsigned char tempJ = vec_mergel(src4, zero); + vector unsigned char tempK = vec_mergeh(src5, zero); + vector unsigned char tempL = vec_mergel(src5, zero); + vector unsigned char tempM = vec_mergeh(src6, zero); + vector unsigned char tempN = vec_mergel(src6, zero); + vector unsigned char tempO = vec_mergeh(src7, zero); + vector unsigned char tempP = vec_mergel(src7, zero); + + vector unsigned char temp0 = vec_mergeh(tempA, tempI); + vector unsigned char temp1 = vec_mergel(tempA, tempI); + vector unsigned char temp2 = vec_mergeh(tempB, tempJ); + vector unsigned char temp3 = vec_mergel(tempB, tempJ); + vector unsigned char temp4 = vec_mergeh(tempC, tempK); + vector unsigned char temp5 = vec_mergel(tempC, tempK); + vector unsigned char temp6 = vec_mergeh(tempD, tempL); + vector unsigned char temp7 = vec_mergel(tempD, tempL); + vector unsigned char temp8 = vec_mergeh(tempE, tempM); + vector unsigned char temp9 = vec_mergel(tempE, tempM); + vector unsigned char temp10 = vec_mergeh(tempF, tempN); + vector unsigned char temp11 = vec_mergel(tempF, tempN); + vector unsigned char temp12 = vec_mergeh(tempG, tempO); + vector unsigned char temp13 = vec_mergel(tempG, tempO); + vector unsigned char temp14 = vec_mergeh(tempH, tempP); + vector unsigned char temp15 = vec_mergel(tempH, tempP); + + tempA = vec_mergeh(temp0, temp8); + tempB = vec_mergel(temp0, temp8); + tempC = vec_mergeh(temp1, temp9); + tempD = vec_mergel(temp1, temp9); + tempE = vec_mergeh(temp2, temp10); + tempF = vec_mergel(temp2, temp10); + tempG = vec_mergeh(temp3, temp11); + tempH = vec_mergel(temp3, temp11); + tempI = vec_mergeh(temp4, temp12); + tempJ = vec_mergel(temp4, temp12); + tempK = vec_mergeh(temp5, temp13); + tempL = vec_mergel(temp5, temp13); + tempM = vec_mergeh(temp6, temp14); + tempN = vec_mergel(temp6, temp14); + tempO = vec_mergeh(temp7, temp15); + tempP = vec_mergel(temp7, temp15); + + temp0 = vec_mergeh(tempA, tempI); + temp1 = vec_mergel(tempA, tempI); + temp2 = vec_mergeh(tempB, tempJ); + temp3 = vec_mergel(tempB, tempJ); + temp4 = vec_mergeh(tempC, tempK); + temp5 = vec_mergel(tempC, tempK); + temp6 = vec_mergeh(tempD, tempL); + temp7 = vec_mergel(tempD, tempL); + temp8 = vec_mergeh(tempE, tempM); + temp9 = vec_mergel(tempE, tempM); + temp10 = vec_mergeh(tempF, tempN); + temp11 = vec_mergel(tempF, tempN); + temp12 = vec_mergeh(tempG, tempO); + temp13 = vec_mergel(tempG, tempO); + temp14 = vec_mergeh(tempH, tempP); + temp15 = vec_mergel(tempH, tempP); + + vec_st(temp0, 0, dst); + vec_st(temp1, 16, dst); + vec_st(temp2, 32, dst); + vec_st(temp3, 48, dst); + vec_st(temp4, 64, dst); + vec_st(temp5, 80, dst); + vec_st(temp6, 96, dst); + vec_st(temp7, 112, dst); + vec_st(temp8, 128, dst); + vec_st(temp9, 144, dst); + vec_st(temp10, 160, dst); + vec_st(temp11, 176, dst); + vec_st(temp12, 192, dst); + vec_st(temp13, 208, dst); + vec_st(temp14, 224, dst); + vec_st(temp15, 240, dst); +} + +static inline void transpose_8x16_char_fromPackedAlign_altivec(unsigned char* dst, unsigned char* src, int stride) { + const vector unsigned char zero = vec_splat_u8(0); + const vector unsigned char magic_perm = (const vector unsigned char) + AVV(0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, + 0x18, 0x19, 0x1A, 0x1B, 0x1C, 0x1D, 0x1E, 0x1F); + +#define LOAD_DOUBLE_LINE(i, j) \ + vector unsigned char src##i = vec_ld(i * 16, src); \ + vector unsigned char src##j = vec_ld(j * 16, src) + + LOAD_DOUBLE_LINE(0, 1); + LOAD_DOUBLE_LINE(2, 3); + LOAD_DOUBLE_LINE(4, 5); + LOAD_DOUBLE_LINE(6, 7); + LOAD_DOUBLE_LINE(8, 9); + LOAD_DOUBLE_LINE(10, 11); + LOAD_DOUBLE_LINE(12, 13); + LOAD_DOUBLE_LINE(14, 15); +#undef LOAD_DOUBLE_LINE + + vector unsigned char tempA = vec_mergeh(src0, src8); + vector unsigned char tempB; + vector unsigned char tempC = vec_mergeh(src1, src9); + vector unsigned char tempD; + vector unsigned char tempE = vec_mergeh(src2, src10); + vector unsigned char tempG = vec_mergeh(src3, src11); + vector unsigned char tempI = vec_mergeh(src4, src12); + vector unsigned char tempJ; + vector unsigned char tempK = vec_mergeh(src5, src13); + vector unsigned char tempL; + vector unsigned char tempM = vec_mergeh(src6, src14); + vector unsigned char tempO = vec_mergeh(src7, src15); + + vector unsigned char temp0 = vec_mergeh(tempA, tempI); + vector unsigned char temp1 = vec_mergel(tempA, tempI); + vector unsigned char temp2; + vector unsigned char temp3; + vector unsigned char temp4 = vec_mergeh(tempC, tempK); + vector unsigned char temp5 = vec_mergel(tempC, tempK); + vector unsigned char temp6; + vector unsigned char temp7; + vector unsigned char temp8 = vec_mergeh(tempE, tempM); + vector unsigned char temp9 = vec_mergel(tempE, tempM); + vector unsigned char temp12 = vec_mergeh(tempG, tempO); + vector unsigned char temp13 = vec_mergel(tempG, tempO); + + tempA = vec_mergeh(temp0, temp8); + tempB = vec_mergel(temp0, temp8); + tempC = vec_mergeh(temp1, temp9); + tempD = vec_mergel(temp1, temp9); + tempI = vec_mergeh(temp4, temp12); + tempJ = vec_mergel(temp4, temp12); + tempK = vec_mergeh(temp5, temp13); + tempL = vec_mergel(temp5, temp13); + + temp0 = vec_mergeh(tempA, tempI); + temp1 = vec_mergel(tempA, tempI); + temp2 = vec_mergeh(tempB, tempJ); + temp3 = vec_mergel(tempB, tempJ); + temp4 = vec_mergeh(tempC, tempK); + temp5 = vec_mergel(tempC, tempK); + temp6 = vec_mergeh(tempD, tempL); + temp7 = vec_mergel(tempD, tempL); + + + const vector signed char neg1 = vec_splat_s8(-1); +#define STORE_DOUBLE_LINE(i, j) \ + vector unsigned char dstA##i = vec_ld(i * stride, dst); \ + vector unsigned char dstB##i = vec_ld(i * stride + 16, dst); \ + vector unsigned char dstA##j = vec_ld(j * stride, dst); \ + vector unsigned char dstB##j = vec_ld(j * stride+ 16, dst); \ + vector unsigned char align##i = vec_lvsr(i * stride, dst); \ + vector unsigned char align##j = vec_lvsr(j * stride, dst); \ + vector unsigned char mask##i = vec_perm(zero, (vector unsigned char)neg1, align##i); \ + vector unsigned char mask##j = vec_perm(zero, (vector unsigned char)neg1, align##j); \ + vector unsigned char dstR##i = vec_perm(temp##i, temp##i, align##i); \ + vector unsigned char dstR##j = vec_perm(temp##j, temp##j, align##j); \ + vector unsigned char dstAF##i = vec_sel(dstA##i, dstR##i, mask##i); \ + vector unsigned char dstBF##i = vec_sel(dstR##i, dstB##i, mask##i); \ + vector unsigned char dstAF##j = vec_sel(dstA##j, dstR##j, mask##j); \ + vector unsigned char dstBF##j = vec_sel(dstR##j, dstB##j, mask##j); \ + vec_st(dstAF##i, i * stride, dst); \ + vec_st(dstBF##i, i * stride + 16, dst); \ + vec_st(dstAF##j, j * stride, dst); \ + vec_st(dstBF##j, j * stride + 16, dst) + + STORE_DOUBLE_LINE(0,1); + STORE_DOUBLE_LINE(2,3); + STORE_DOUBLE_LINE(4,5); + STORE_DOUBLE_LINE(6,7); +} diff --git a/src/libffmpeg/libavcodec/libpostproc/postprocess_internal.h b/src/libffmpeg/libavcodec/libpostproc/postprocess_internal.h index db50fa3b5..01d4679ad 100644 --- a/src/libffmpeg/libavcodec/libpostproc/postprocess_internal.h +++ b/src/libffmpeg/libavcodec/libpostproc/postprocess_internal.h @@ -37,9 +37,11 @@ // Experimental vertical filters #define V_X1_FILTER 0x0200 // 512 +#define V_A_DEBLOCK 0x0400 // Experimental horizontal filters #define H_X1_FILTER 0x2000 // 8192 +#define H_A_DEBLOCK 0x4000 /// select between full y range (255-0) or standart one (234-16) #define FULL_Y_RANGE 0x8000 // 32768 @@ -158,3 +160,11 @@ typedef struct PPContext{ } PPContext; +static inline void linecpy(void *dest, void *src, int lines, int stride) +{ + if (stride > 0) { + memcpy(dest, src, lines*stride); + } else { + memcpy(dest+(lines-1)*stride, src+(lines-1)*stride, -lines*stride); + } +} diff --git a/src/libffmpeg/libavcodec/libpostproc/postprocess_template.c b/src/libffmpeg/libavcodec/libpostproc/postprocess_template.c index 4e81bd556..d1307caca 100644 --- a/src/libffmpeg/libavcodec/libpostproc/postprocess_template.c +++ b/src/libffmpeg/libavcodec/libpostproc/postprocess_template.c @@ -22,15 +22,37 @@ */ +#ifdef ARCH_X86_64 +# define REGa rax +# define REGc rcx +# define REGd rdx +# define REG_a "rax" +# define REG_c "rcx" +# define REG_d "rdx" +# define REG_SP "rsp" +# define ALIGN_MASK "$0xFFFFFFFFFFFFFFF8" +#else +# define REGa eax +# define REGc ecx +# define REGd edx +# define REG_a "eax" +# define REG_c "ecx" +# define REG_d "edx" +# define REG_SP "esp" +# define ALIGN_MASK "$0xFFFFFFF8" +#endif + + #undef PAVGB #undef PMINUB #undef PMAXUB #ifdef HAVE_MMX2 -#define PAVGB(a,b) "pavgb " #a ", " #b " \n\t" +#define REAL_PAVGB(a,b) "pavgb " #a ", " #b " \n\t" #elif defined (HAVE_3DNOW) -#define PAVGB(a,b) "pavgusb " #a ", " #b " \n\t" +#define REAL_PAVGB(a,b) "pavgusb " #a ", " #b " \n\t" #endif +#define PAVGB(a,b) REAL_PAVGB(a,b) #ifdef HAVE_MMX2 #define PMINUB(a,b,t) "pminub " #a ", " #b " \n\t" @@ -64,12 +86,12 @@ asm volatile( ); asm volatile( - "leal (%2, %3), %%eax \n\t" + "lea (%2, %3), %%"REG_a" \n\t" // 0 1 2 3 4 5 6 7 8 9 // %1 eax eax+%2 eax+2%2 %1+4%2 ecx ecx+%2 ecx+2%2 %1+8%2 ecx+4%2 "movq (%2), %%mm0 \n\t" - "movq (%%eax), %%mm1 \n\t" + "movq (%%"REG_a"), %%mm1 \n\t" "movq %%mm0, %%mm3 \n\t" "movq %%mm0, %%mm4 \n\t" PMAXUB(%%mm1, %%mm4) @@ -78,7 +100,7 @@ asm volatile( "paddb %%mm7, %%mm0 \n\t" "pcmpgtb %%mm6, %%mm0 \n\t" - "movq (%%eax,%3), %%mm2 \n\t" + "movq (%%"REG_a",%3), %%mm2 \n\t" PMAXUB(%%mm2, %%mm4) PMINUB(%%mm2, %%mm3, %%mm5) "psubb %%mm2, %%mm1 \n\t" @@ -86,7 +108,7 @@ asm volatile( "pcmpgtb %%mm6, %%mm1 \n\t" "paddb %%mm1, %%mm0 \n\t" - "movq (%%eax, %3, 2), %%mm1 \n\t" + "movq (%%"REG_a", %3, 2), %%mm1 \n\t" PMAXUB(%%mm1, %%mm4) PMINUB(%%mm1, %%mm3, %%mm5) "psubb %%mm1, %%mm2 \n\t" @@ -94,7 +116,7 @@ asm volatile( "pcmpgtb %%mm6, %%mm2 \n\t" "paddb %%mm2, %%mm0 \n\t" - "leal (%%eax, %3, 4), %%eax \n\t" + "lea (%%"REG_a", %3, 4), %%"REG_a" \n\t" "movq (%2, %3, 4), %%mm2 \n\t" PMAXUB(%%mm2, %%mm4) @@ -104,7 +126,7 @@ asm volatile( "pcmpgtb %%mm6, %%mm1 \n\t" "paddb %%mm1, %%mm0 \n\t" - "movq (%%eax), %%mm1 \n\t" + "movq (%%"REG_a"), %%mm1 \n\t" PMAXUB(%%mm1, %%mm4) PMINUB(%%mm1, %%mm3, %%mm5) "psubb %%mm1, %%mm2 \n\t" @@ -112,7 +134,7 @@ asm volatile( "pcmpgtb %%mm6, %%mm2 \n\t" "paddb %%mm2, %%mm0 \n\t" - "movq (%%eax, %3), %%mm2 \n\t" + "movq (%%"REG_a", %3), %%mm2 \n\t" PMAXUB(%%mm2, %%mm4) PMINUB(%%mm2, %%mm3, %%mm5) "psubb %%mm2, %%mm1 \n\t" @@ -120,7 +142,7 @@ asm volatile( "pcmpgtb %%mm6, %%mm1 \n\t" "paddb %%mm1, %%mm0 \n\t" - "movq (%%eax, %3, 2), %%mm1 \n\t" + "movq (%%"REG_a", %3, 2), %%mm1 \n\t" PMAXUB(%%mm1, %%mm4) PMINUB(%%mm1, %%mm3, %%mm5) "psubb %%mm1, %%mm2 \n\t" @@ -152,8 +174,8 @@ asm volatile( "movd %%mm4, %1 \n\t" : "=r" (numEq), "=r" (dcOk) - : "r" (src), "r" (stride), "m" (c->pQPb) - : "%eax" + : "r" (src), "r" ((long)stride), "m" (c->pQPb) + : "%"REG_a ); numEq= (-numEq) &0xFF; @@ -194,10 +216,10 @@ static inline void RENAME(doVertLowPass)(uint8_t *src, int stride, PPContext *c) "por %%mm2, %%mm6 \n\t"// First Line to Filter "movq (%0, %1, 8), %%mm5 \n\t" - "leal (%0, %1, 4), %%eax \n\t" - "leal (%0, %1, 8), %%ecx \n\t" - "subl %1, %%ecx \n\t" - "addl %1, %0 \n\t" // %0 points to line 1 not 0 + "lea (%0, %1, 4), %%"REG_a" \n\t" + "lea (%0, %1, 8), %%"REG_c" \n\t" + "sub %1, %%"REG_c" \n\t" + "add %1, %0 \n\t" // %0 points to line 1 not 0 "movq (%0, %1, 8), %%mm7 \n\t" "movq %%mm5, %%mm1 \n\t" "movq %%mm7, %%mm2 \n\t" @@ -225,7 +247,7 @@ static inline void RENAME(doVertLowPass)(uint8_t *src, int stride, PPContext *c) "movq (%0, %1, 4), %%mm2 \n\t" // 1 "movq %%mm2, %%mm5 \n\t" // 1 - PAVGB((%%eax), %%mm2) // 11 /2 + PAVGB((%%REGa), %%mm2) // 11 /2 PAVGB((%0, %1, 2), %%mm2) // 211 /4 "movq %%mm2, %%mm3 \n\t" // 211 /4 "movq (%0), %%mm4 \n\t" // 1 @@ -237,15 +259,15 @@ static inline void RENAME(doVertLowPass)(uint8_t *src, int stride, PPContext *c) PAVGB(%%mm6, %%mm0) //1 1 /2 "movq %%mm4, %%mm3 \n\t" // 1 PAVGB((%0,%1,2), %%mm3) // 1 1 /2 - PAVGB((%%eax,%1,2), %%mm5) // 11 /2 - PAVGB((%%eax), %%mm5) // 211 /4 + PAVGB((%%REGa,%1,2), %%mm5) // 11 /2 + PAVGB((%%REGa), %%mm5) // 211 /4 PAVGB(%%mm5, %%mm3) // 2 2211 /8 PAVGB(%%mm0, %%mm3) //4242211 /16 "movq %%mm3, (%0,%1) \n\t" // X // mm1=2 mm2=3(211) mm4=1 mm5=4(211) mm6=0 mm7=9 PAVGB(%%mm4, %%mm6) //11 /2 - "movq (%%ecx), %%mm0 \n\t" // 1 - PAVGB((%%eax, %1, 2), %%mm0) // 11/2 + "movq (%%"REG_c"), %%mm0 \n\t" // 1 + PAVGB((%%REGa, %1, 2), %%mm0) // 11/2 "movq %%mm0, %%mm3 \n\t" // 11/2 PAVGB(%%mm1, %%mm0) // 2 11/4 PAVGB(%%mm6, %%mm0) //222 11/8 @@ -253,17 +275,17 @@ static inline void RENAME(doVertLowPass)(uint8_t *src, int stride, PPContext *c) "movq (%0, %1, 2), %%mm2 \n\t" // 1 "movq %%mm0, (%0, %1, 2) \n\t" // X // mm1=2 mm2=3 mm3=6(11) mm4=1 mm5=4(211) mm6=0(11) mm7=9 - "movq (%%eax, %1, 4), %%mm0 \n\t" // 1 - PAVGB((%%ecx), %%mm0) // 11 /2 + "movq (%%"REG_a", %1, 4), %%mm0 \n\t" // 1 + PAVGB((%%REGc), %%mm0) // 11 /2 PAVGB(%%mm0, %%mm6) //11 11 /4 PAVGB(%%mm1, %%mm4) // 11 /2 PAVGB(%%mm2, %%mm1) // 11 /2 PAVGB(%%mm1, %%mm6) //1122 11 /8 PAVGB(%%mm5, %%mm6) //112242211 /16 - "movq (%%eax), %%mm5 \n\t" // 1 - "movq %%mm6, (%%eax) \n\t" // X + "movq (%%"REG_a"), %%mm5 \n\t" // 1 + "movq %%mm6, (%%"REG_a") \n\t" // X // mm0=7(11) mm1=2(11) mm2=3 mm3=6(11) mm4=1(11) mm5=4 mm7=9 - "movq (%%eax, %1, 4), %%mm6 \n\t" // 1 + "movq (%%"REG_a", %1, 4), %%mm6 \n\t" // 1 PAVGB(%%mm7, %%mm6) // 11 /2 PAVGB(%%mm4, %%mm6) // 11 11 /4 PAVGB(%%mm3, %%mm6) // 11 2211 /8 @@ -276,29 +298,29 @@ static inline void RENAME(doVertLowPass)(uint8_t *src, int stride, PPContext *c) PAVGB(%%mm7, %%mm1) // 11 2 /4 PAVGB(%%mm4, %%mm5) // 11 /2 PAVGB(%%mm5, %%mm0) // 11 11 /4 - "movq (%%eax, %1, 2), %%mm6 \n\t" // 1 + "movq (%%"REG_a", %1, 2), %%mm6 \n\t" // 1 PAVGB(%%mm6, %%mm1) // 11 4 2 /8 PAVGB(%%mm0, %%mm1) // 11224222 /16 - "movq %%mm1, (%%eax, %1, 2) \n\t" // X + "movq %%mm1, (%%"REG_a", %1, 2) \n\t" // X // mm2=3(112) mm3=6(11) mm4=5 mm5=4(11) mm6=6 mm7=9 - PAVGB((%%ecx), %%mm2) // 112 4 /8 - "movq (%%eax, %1, 4), %%mm0 \n\t" // 1 + PAVGB((%%REGc), %%mm2) // 112 4 /8 + "movq (%%"REG_a", %1, 4), %%mm0 \n\t" // 1 PAVGB(%%mm0, %%mm6) // 1 1 /2 PAVGB(%%mm7, %%mm6) // 1 12 /4 PAVGB(%%mm2, %%mm6) // 1122424 /4 - "movq %%mm6, (%%ecx) \n\t" // X + "movq %%mm6, (%%"REG_c") \n\t" // X // mm0=8 mm3=6(11) mm4=5 mm5=4(11) mm7=9 PAVGB(%%mm7, %%mm5) // 11 2 /4 PAVGB(%%mm7, %%mm5) // 11 6 /8 PAVGB(%%mm3, %%mm0) // 112 /4 PAVGB(%%mm0, %%mm5) // 112246 /16 - "movq %%mm5, (%%eax, %1, 4) \n\t" // X - "subl %1, %0 \n\t" + "movq %%mm5, (%%"REG_a", %1, 4) \n\t" // X + "sub %1, %0 \n\t" : - : "r" (src), "r" (stride), "m" (c->pQPb) - : "%eax", "%ecx" + : "r" (src), "r" ((long)stride), "m" (c->pQPb) + : "%"REG_a, "%"REG_c ); #else const int l1= stride; @@ -317,25 +339,26 @@ static inline void RENAME(doVertLowPass)(uint8_t *src, int stride, PPContext *c) const int first= ABS(src[0] - src[l1]) < c->QP ? src[0] : src[l1]; const int last= ABS(src[l8] - src[l9]) < c->QP ? src[l9] : src[l8]; - int sums[9]; - sums[0] = first + src[l1]; - sums[1] = src[l1] + src[l2]; - sums[2] = src[l2] + src[l3]; - sums[3] = src[l3] + src[l4]; - sums[4] = src[l4] + src[l5]; - sums[5] = src[l5] + src[l6]; - sums[6] = src[l6] + src[l7]; - sums[7] = src[l7] + src[l8]; - sums[8] = src[l8] + last; - - src[l1]= ((sums[0]<<2) + ((first + sums[2])<<1) + sums[4] + 8)>>4; - src[l2]= ((src[l2]<<2) + ((first + sums[0] + sums[3])<<1) + sums[5] + 8)>>4; - src[l3]= ((src[l3]<<2) + ((first + sums[1] + sums[4])<<1) + sums[6] + 8)>>4; - src[l4]= ((src[l4]<<2) + ((sums[2] + sums[5])<<1) + sums[0] + sums[7] + 8)>>4; - src[l5]= ((src[l5]<<2) + ((sums[3] + sums[6])<<1) + sums[1] + sums[8] + 8)>>4; - src[l6]= ((src[l6]<<2) + ((last + sums[7] + sums[4])<<1) + sums[2] + 8)>>4; - src[l7]= (((last + src[l7])<<2) + ((src[l8] + sums[5])<<1) + sums[3] + 8)>>4; - src[l8]= ((sums[8]<<2) + ((last + sums[6])<<1) + sums[4] + 8)>>4; + int sums[10]; + sums[0] = 4*first + src[l1] + src[l2] + src[l3] + 4; + sums[1] = sums[0] - first + src[l4]; + sums[2] = sums[1] - first + src[l5]; + sums[3] = sums[2] - first + src[l6]; + sums[4] = sums[3] - first + src[l7]; + sums[5] = sums[4] - src[l1] + src[l8]; + sums[6] = sums[5] - src[l2] + last; + sums[7] = sums[6] - src[l3] + last; + sums[8] = sums[7] - src[l4] + last; + sums[9] = sums[8] - src[l5] + last; + + src[l1]= (sums[0] + sums[2] + 2*src[l1])>>4; + src[l2]= (sums[1] + sums[3] + 2*src[l2])>>4; + src[l3]= (sums[2] + sums[4] + 2*src[l3])>>4; + src[l4]= (sums[3] + sums[5] + 2*src[l4])>>4; + src[l5]= (sums[4] + sums[6] + 2*src[l5])>>4; + src[l6]= (sums[5] + sums[7] + 2*src[l6])>>4; + src[l7]= (sums[6] + sums[8] + 2*src[l7])>>4; + src[l8]= (sums[7] + sums[9] + 2*src[l8])>>4; src++; } @@ -363,8 +386,8 @@ static inline void RENAME(vertRK1Filter)(uint8_t *src, int stride, int QP) asm volatile( "pxor %%mm7, %%mm7 \n\t" // 0 "movq "MANGLE(b80)", %%mm6 \n\t" // MIN_SIGNED_BYTE - "leal (%0, %1), %%eax \n\t" - "leal (%%eax, %1, 4), %%ecx \n\t" + "leal (%0, %1), %%"REG_a" \n\t" + "leal (%%"REG_a", %1, 4), %%"REG_c" \n\t" // 0 1 2 3 4 5 6 7 8 9 // %0 eax eax+%1 eax+2%1 %0+4%1 ecx ecx+%1 ecx+2%1 %0+8%1 ecx+4%1 "movq "MANGLE(pQPb)", %%mm0 \n\t" // QP,..., QP @@ -374,7 +397,7 @@ static inline void RENAME(vertRK1Filter)(uint8_t *src, int stride, int QP) "pand "MANGLE(b3F)", %%mm0 \n\t" // QP/4,..., QP/4 "paddusb %%mm1, %%mm0 \n\t" // QP*1.25 ... "movq (%0, %1, 4), %%mm2 \n\t" // line 4 - "movq (%%ecx), %%mm3 \n\t" // line 5 + "movq (%%"REG_c"), %%mm3 \n\t" // line 5 "movq %%mm2, %%mm4 \n\t" // line 4 "pcmpeqb %%mm5, %%mm5 \n\t" // -1 "pxor %%mm2, %%mm5 \n\t" // -line 4 - 1 @@ -392,32 +415,32 @@ static inline void RENAME(vertRK1Filter)(uint8_t *src, int stride, int QP) // "psubb %%mm6, %%mm2 \n\t" "movq %%mm2, (%0,%1, 4) \n\t" - "movq (%%ecx), %%mm2 \n\t" + "movq (%%"REG_c"), %%mm2 \n\t" // "paddb %%mm6, %%mm2 \n\t" // line 5 + 0x80 "psubb %%mm5, %%mm2 \n\t" // "psubb %%mm6, %%mm2 \n\t" - "movq %%mm2, (%%ecx) \n\t" + "movq %%mm2, (%%"REG_c") \n\t" "paddb %%mm6, %%mm5 \n\t" "psrlw $2, %%mm5 \n\t" "pand "MANGLE(b3F)", %%mm5 \n\t" "psubb "MANGLE(b20)", %%mm5 \n\t" // (l5-l4)/8 - "movq (%%eax, %1, 2), %%mm2 \n\t" + "movq (%%"REG_a", %1, 2), %%mm2 \n\t" "paddb %%mm6, %%mm2 \n\t" // line 3 + 0x80 "paddsb %%mm5, %%mm2 \n\t" "psubb %%mm6, %%mm2 \n\t" - "movq %%mm2, (%%eax, %1, 2) \n\t" + "movq %%mm2, (%%"REG_a", %1, 2) \n\t" - "movq (%%ecx, %1), %%mm2 \n\t" + "movq (%%"REG_c", %1), %%mm2 \n\t" "paddb %%mm6, %%mm2 \n\t" // line 6 + 0x80 "psubsb %%mm5, %%mm2 \n\t" "psubb %%mm6, %%mm2 \n\t" - "movq %%mm2, (%%ecx, %1) \n\t" + "movq %%mm2, (%%"REG_c", %1) \n\t" : - : "r" (src), "r" (stride) - : "%eax", "%ecx" + : "r" (src), "r" ((long)stride) + : "%"REG_a, "%"REG_c ); #else const int l1= stride; @@ -463,18 +486,18 @@ static inline void RENAME(vertX1Filter)(uint8_t *src, int stride, PPContext *co) asm volatile( "pxor %%mm7, %%mm7 \n\t" // 0 - "leal (%0, %1), %%eax \n\t" - "leal (%%eax, %1, 4), %%ecx \n\t" + "lea (%0, %1), %%"REG_a" \n\t" + "lea (%%"REG_a", %1, 4), %%"REG_c" \n\t" // 0 1 2 3 4 5 6 7 8 9 // %0 eax eax+%1 eax+2%1 %0+4%1 ecx ecx+%1 ecx+2%1 %0+8%1 ecx+4%1 - "movq (%%eax, %1, 2), %%mm0 \n\t" // line 3 + "movq (%%"REG_a", %1, 2), %%mm0 \n\t" // line 3 "movq (%0, %1, 4), %%mm1 \n\t" // line 4 "movq %%mm1, %%mm2 \n\t" // line 4 "psubusb %%mm0, %%mm1 \n\t" "psubusb %%mm2, %%mm0 \n\t" "por %%mm1, %%mm0 \n\t" // |l2 - l3| - "movq (%%ecx), %%mm3 \n\t" // line 5 - "movq (%%ecx, %1), %%mm4 \n\t" // line 6 + "movq (%%"REG_c"), %%mm3 \n\t" // line 5 + "movq (%%"REG_c", %1), %%mm4 \n\t" // line 6 "movq %%mm3, %%mm5 \n\t" // line 5 "psubusb %%mm4, %%mm3 \n\t" "psubusb %%mm5, %%mm4 \n\t" @@ -506,43 +529,43 @@ static inline void RENAME(vertX1Filter)(uint8_t *src, int stride, PPContext *co) "pxor %%mm2, %%mm0 \n\t" "movq %%mm0, (%0, %1, 4) \n\t" // line 4 - "movq (%%ecx), %%mm0 \n\t" // line 5 + "movq (%%"REG_c"), %%mm0 \n\t" // line 5 "pxor %%mm2, %%mm0 \n\t" //(l4 - l5) <= 0 ? -l5-1 : l5 "paddusb %%mm3, %%mm0 \n\t" "pxor %%mm2, %%mm0 \n\t" - "movq %%mm0, (%%ecx) \n\t" // line 5 + "movq %%mm0, (%%"REG_c") \n\t" // line 5 PAVGB(%%mm7, %%mm1) // d/4 - "movq (%%eax, %1, 2), %%mm0 \n\t" // line 3 + "movq (%%"REG_a", %1, 2), %%mm0 \n\t" // line 3 "pxor %%mm2, %%mm0 \n\t" //(l4 - l5) <= 0 ? -l4-1 : l4 "psubusb %%mm1, %%mm0 \n\t" "pxor %%mm2, %%mm0 \n\t" - "movq %%mm0, (%%eax, %1, 2) \n\t" // line 3 + "movq %%mm0, (%%"REG_a", %1, 2) \n\t" // line 3 - "movq (%%ecx, %1), %%mm0 \n\t" // line 6 + "movq (%%"REG_c", %1), %%mm0 \n\t" // line 6 "pxor %%mm2, %%mm0 \n\t" //(l4 - l5) <= 0 ? -l5-1 : l5 "paddusb %%mm1, %%mm0 \n\t" "pxor %%mm2, %%mm0 \n\t" - "movq %%mm0, (%%ecx, %1) \n\t" // line 6 + "movq %%mm0, (%%"REG_c", %1) \n\t" // line 6 PAVGB(%%mm7, %%mm1) // d/8 - "movq (%%eax, %1), %%mm0 \n\t" // line 2 + "movq (%%"REG_a", %1), %%mm0 \n\t" // line 2 "pxor %%mm2, %%mm0 \n\t" //(l4 - l5) <= 0 ? -l2-1 : l2 "psubusb %%mm1, %%mm0 \n\t" "pxor %%mm2, %%mm0 \n\t" - "movq %%mm0, (%%eax, %1) \n\t" // line 2 + "movq %%mm0, (%%"REG_a", %1) \n\t" // line 2 - "movq (%%ecx, %1, 2), %%mm0 \n\t" // line 7 + "movq (%%"REG_c", %1, 2), %%mm0 \n\t" // line 7 "pxor %%mm2, %%mm0 \n\t" //(l4 - l5) <= 0 ? -l7-1 : l7 "paddusb %%mm1, %%mm0 \n\t" "pxor %%mm2, %%mm0 \n\t" - "movq %%mm0, (%%ecx, %1, 2) \n\t" // line 7 + "movq %%mm0, (%%"REG_c", %1, 2) \n\t" // line 7 : - : "r" (src), "r" (stride), "m" (co->pQPb) - : "%eax", "%ecx" + : "r" (src), "r" ((long)stride), "m" (co->pQPb) + : "%"REG_a, "%"REG_c ); #else @@ -607,8 +630,8 @@ static inline void RENAME(doVertDefFilter)(uint8_t src[], int stride, PPContext #if 0 //sligtly more accurate and slightly slower "pxor %%mm7, %%mm7 \n\t" // 0 - "leal (%0, %1), %%eax \n\t" - "leal (%%eax, %1, 4), %%ecx \n\t" + "lea (%0, %1), %%"REG_a" \n\t" + "lea (%%"REG_a", %1, 4), %%"REG_c" \n\t" // 0 1 2 3 4 5 6 7 // %0 %0+%1 %0+2%1 eax+2%1 %0+4%1 eax+4%1 ecx+%1 ecx+2%1 // %0 eax eax+%1 eax+2%1 %0+4%1 ecx ecx+%1 ecx+2%1 @@ -621,8 +644,8 @@ static inline void RENAME(doVertDefFilter)(uint8_t src[], int stride, PPContext PAVGB(%%mm1, %%mm0) // ~(l2 + 2l0)/4 PAVGB(%%mm2, %%mm0) // ~(5l2 + 2l0)/8 - "movq (%%eax), %%mm1 \n\t" // l1 - "movq (%%eax, %1, 2), %%mm3 \n\t" // l3 + "movq (%%"REG_a"), %%mm1 \n\t" // l1 + "movq (%%"REG_a", %1, 2), %%mm3 \n\t" // l3 "movq %%mm1, %%mm4 \n\t" // l1 PAVGB(%%mm7, %%mm1) // ~l1/2 PAVGB(%%mm3, %%mm1) // ~(l1 + 2l3)/4 @@ -640,7 +663,7 @@ static inline void RENAME(doVertDefFilter)(uint8_t src[], int stride, PPContext PAVGB(%%mm2, %%mm0) // ~(l4 + 2l2)/4 PAVGB(%%mm4, %%mm0) // ~(5l4 + 2l2)/8 - "movq (%%ecx), %%mm2 \n\t" // l5 + "movq (%%"REG_c"), %%mm2 \n\t" // l5 "movq %%mm3, %%mm5 \n\t" // l3 PAVGB(%%mm7, %%mm3) // ~l3/2 PAVGB(%%mm2, %%mm3) // ~(l3 + 2l5)/4 @@ -653,13 +676,13 @@ static inline void RENAME(doVertDefFilter)(uint8_t src[], int stride, PPContext "pcmpeqb %%mm7, %%mm0 \n\t" // SIGN(2l2 - 5l3 + 5l4 - 2l5) // mm0= SIGN(menergy), mm1= |lenergy|, mm2= l5, mm3= |menergy|, mm4=l4, mm5= l3, mm7=0 - "movq (%%ecx, %1), %%mm6 \n\t" // l6 + "movq (%%"REG_c", %1), %%mm6 \n\t" // l6 "movq %%mm6, %%mm5 \n\t" // l6 PAVGB(%%mm7, %%mm6) // ~l6/2 PAVGB(%%mm4, %%mm6) // ~(l6 + 2l4)/4 PAVGB(%%mm5, %%mm6) // ~(5l6 + 2l4)/8 - "movq (%%ecx, %1, 2), %%mm5 \n\t" // l7 + "movq (%%"REG_c", %1, 2), %%mm5 \n\t" // l7 "movq %%mm2, %%mm4 \n\t" // l5 PAVGB(%%mm7, %%mm2) // ~l5/2 PAVGB(%%mm5, %%mm2) // ~(l5 + 2l7)/4 @@ -686,7 +709,7 @@ static inline void RENAME(doVertDefFilter)(uint8_t src[], int stride, PPContext "paddusb %%mm1, %%mm3 \n\t" // "paddusb "MANGLE(b01)", %%mm3 \n\t" - "movq (%%eax, %1, 2), %%mm6 \n\t" //l3 + "movq (%%"REG_a", %1, 2), %%mm6 \n\t" //l3 "movq (%0, %1, 4), %%mm5 \n\t" //l4 "movq (%0, %1, 4), %%mm4 \n\t" //l4 "psubusb %%mm6, %%mm5 \n\t" @@ -700,7 +723,7 @@ static inline void RENAME(doVertDefFilter)(uint8_t src[], int stride, PPContext "psubusb "MANGLE(b01)", %%mm3 \n\t" PAVGB(%%mm7, %%mm3) - "movq (%%eax, %1, 2), %%mm0 \n\t" + "movq (%%"REG_a", %1, 2), %%mm0 \n\t" "movq (%0, %1, 4), %%mm2 \n\t" "pxor %%mm6, %%mm0 \n\t" "pxor %%mm6, %%mm2 \n\t" @@ -708,36 +731,36 @@ static inline void RENAME(doVertDefFilter)(uint8_t src[], int stride, PPContext "paddb %%mm3, %%mm2 \n\t" "pxor %%mm6, %%mm0 \n\t" "pxor %%mm6, %%mm2 \n\t" - "movq %%mm0, (%%eax, %1, 2) \n\t" + "movq %%mm0, (%%"REG_a", %1, 2) \n\t" "movq %%mm2, (%0, %1, 4) \n\t" #endif - "leal (%0, %1), %%eax \n\t" + "lea (%0, %1), %%"REG_a" \n\t" "pcmpeqb %%mm6, %%mm6 \n\t" // -1 // 0 1 2 3 4 5 6 7 // %0 %0+%1 %0+2%1 eax+2%1 %0+4%1 eax+4%1 ecx+%1 ecx+2%1 // %0 eax eax+%1 eax+2%1 %0+4%1 ecx ecx+%1 ecx+2%1 - "movq (%%eax, %1, 2), %%mm1 \n\t" // l3 + "movq (%%"REG_a", %1, 2), %%mm1 \n\t" // l3 "movq (%0, %1, 4), %%mm0 \n\t" // l4 "pxor %%mm6, %%mm1 \n\t" // -l3-1 PAVGB(%%mm1, %%mm0) // -q+128 = (l4-l3+256)/2 // mm1=-l3-1, mm0=128-q - "movq (%%eax, %1, 4), %%mm2 \n\t" // l5 - "movq (%%eax, %1), %%mm3 \n\t" // l2 + "movq (%%"REG_a", %1, 4), %%mm2 \n\t" // l5 + "movq (%%"REG_a", %1), %%mm3 \n\t" // l2 "pxor %%mm6, %%mm2 \n\t" // -l5-1 "movq %%mm2, %%mm5 \n\t" // -l5-1 "movq "MANGLE(b80)", %%mm4 \n\t" // 128 - "leal (%%eax, %1, 4), %%ecx \n\t" + "lea (%%"REG_a", %1, 4), %%"REG_c" \n\t" PAVGB(%%mm3, %%mm2) // (l2-l5+256)/2 PAVGB(%%mm0, %%mm4) // ~(l4-l3)/4 + 128 PAVGB(%%mm2, %%mm4) // ~(l2-l5)/4 +(l4-l3)/8 + 128 PAVGB(%%mm0, %%mm4) // ~(l2-l5)/8 +5(l4-l3)/16 + 128 // mm1=-l3-1, mm0=128-q, mm3=l2, mm4=menergy/16 + 128, mm5= -l5-1 - "movq (%%eax), %%mm2 \n\t" // l1 + "movq (%%"REG_a"), %%mm2 \n\t" // l1 "pxor %%mm6, %%mm2 \n\t" // -l1-1 PAVGB(%%mm3, %%mm2) // (l2-l1+256)/2 PAVGB((%0), %%mm1) // (l0-l3+256)/2 @@ -747,8 +770,8 @@ static inline void RENAME(doVertDefFilter)(uint8_t src[], int stride, PPContext PAVGB(%%mm2, %%mm3) // ~(l0-l3)/8 +5(l2-l1)/16 + 128 // mm0=128-q, mm3=lenergy/16 + 128, mm4= menergy/16 + 128, mm5= -l5-1 - PAVGB((%%ecx, %1), %%mm5) // (l6-l5+256)/2 - "movq (%%ecx, %1, 2), %%mm1 \n\t" // l7 + PAVGB((%%REGc, %1), %%mm5) // (l6-l5+256)/2 + "movq (%%"REG_c", %1, 2), %%mm1 \n\t" // l7 "pxor %%mm6, %%mm1 \n\t" // -l7-1 PAVGB((%0, %1, 4), %%mm1) // (l4-l7+256)/2 "movq "MANGLE(b80)", %%mm2 \n\t" // 128 @@ -797,7 +820,7 @@ static inline void RENAME(doVertDefFilter)(uint8_t src[], int stride, PPContext "pxor %%mm1, %%mm7 \n\t" // SIGN(d*q) "pand %%mm7, %%mm4 \n\t" - "movq (%%eax, %1, 2), %%mm0 \n\t" + "movq (%%"REG_a", %1, 2), %%mm0 \n\t" "movq (%0, %1, 4), %%mm2 \n\t" "pxor %%mm1, %%mm0 \n\t" "pxor %%mm1, %%mm2 \n\t" @@ -805,12 +828,12 @@ static inline void RENAME(doVertDefFilter)(uint8_t src[], int stride, PPContext "psubb %%mm4, %%mm2 \n\t" "pxor %%mm1, %%mm0 \n\t" "pxor %%mm1, %%mm2 \n\t" - "movq %%mm0, (%%eax, %1, 2) \n\t" + "movq %%mm0, (%%"REG_a", %1, 2) \n\t" "movq %%mm2, (%0, %1, 4) \n\t" : - : "r" (src), "r" (stride), "m" (c->pQPb) - : "%eax", "%ecx" + : "r" (src), "r" ((long)stride), "m" (c->pQPb) + : "%"REG_a, "%"REG_c ); /* @@ -881,8 +904,8 @@ src-=8; src+= stride*4; asm volatile( "pxor %%mm7, %%mm7 \n\t" - "leal -40(%%esp), %%ecx \n\t" // make space for 4 8-byte vars - "andl $0xFFFFFFF8, %%ecx \n\t" // align + "lea -40(%%"REG_SP"), %%"REG_c" \n\t" // make space for 4 8-byte vars + "and "ALIGN_MASK", %%"REG_c" \n\t" // align // 0 1 2 3 4 5 6 7 // %0 %0+%1 %0+2%1 eax+2%1 %0+4%1 eax+4%1 edx+%1 edx+2%1 // %0 eax eax+%1 eax+2%1 %0+4%1 edx edx+%1 edx+2%1 @@ -893,12 +916,12 @@ src-=8; "punpckhbw %%mm7, %%mm1 \n\t" // high part of line 0 "movq (%0, %1), %%mm2 \n\t" - "leal (%0, %1, 2), %%eax \n\t" + "lea (%0, %1, 2), %%"REG_a" \n\t" "movq %%mm2, %%mm3 \n\t" "punpcklbw %%mm7, %%mm2 \n\t" // low part of line 1 "punpckhbw %%mm7, %%mm3 \n\t" // high part of line 1 - "movq (%%eax), %%mm4 \n\t" + "movq (%%"REG_a"), %%mm4 \n\t" "movq %%mm4, %%mm5 \n\t" "punpcklbw %%mm7, %%mm4 \n\t" // low part of line 2 "punpckhbw %%mm7, %%mm5 \n\t" // high part of line 2 @@ -915,7 +938,7 @@ src-=8; "psubw %%mm2, %%mm0 \n\t" // 2L0 - 5L1 + 5L2 "psubw %%mm3, %%mm1 \n\t" // 2H0 - 5H1 + 5H2 - "movq (%%eax, %1), %%mm2 \n\t" + "movq (%%"REG_a", %1), %%mm2 \n\t" "movq %%mm2, %%mm3 \n\t" "punpcklbw %%mm7, %%mm2 \n\t" // L3 "punpckhbw %%mm7, %%mm3 \n\t" // H3 @@ -924,24 +947,24 @@ src-=8; "psubw %%mm3, %%mm1 \n\t" // 2H0 - 5H1 + 5H2 - H3 "psubw %%mm2, %%mm0 \n\t" // 2L0 - 5L1 + 5L2 - 2L3 "psubw %%mm3, %%mm1 \n\t" // 2H0 - 5H1 + 5H2 - 2H3 - "movq %%mm0, (%%ecx) \n\t" // 2L0 - 5L1 + 5L2 - 2L3 - "movq %%mm1, 8(%%ecx) \n\t" // 2H0 - 5H1 + 5H2 - 2H3 + "movq %%mm0, (%%"REG_c") \n\t" // 2L0 - 5L1 + 5L2 - 2L3 + "movq %%mm1, 8(%%"REG_c") \n\t" // 2H0 - 5H1 + 5H2 - 2H3 - "movq (%%eax, %1, 2), %%mm0 \n\t" + "movq (%%"REG_a", %1, 2), %%mm0 \n\t" "movq %%mm0, %%mm1 \n\t" "punpcklbw %%mm7, %%mm0 \n\t" // L4 "punpckhbw %%mm7, %%mm1 \n\t" // H4 "psubw %%mm0, %%mm2 \n\t" // L3 - L4 "psubw %%mm1, %%mm3 \n\t" // H3 - H4 - "movq %%mm2, 16(%%ecx) \n\t" // L3 - L4 - "movq %%mm3, 24(%%ecx) \n\t" // H3 - H4 + "movq %%mm2, 16(%%"REG_c") \n\t" // L3 - L4 + "movq %%mm3, 24(%%"REG_c") \n\t" // H3 - H4 "paddw %%mm4, %%mm4 \n\t" // 2L2 "paddw %%mm5, %%mm5 \n\t" // 2H2 "psubw %%mm2, %%mm4 \n\t" // 2L2 - L3 + L4 "psubw %%mm3, %%mm5 \n\t" // 2H2 - H3 + H4 - "leal (%%eax, %1), %0 \n\t" + "lea (%%"REG_a", %1), %0 \n\t" "psllw $2, %%mm2 \n\t" // 4L3 - 4L4 "psllw $2, %%mm3 \n\t" // 4H3 - 4H4 "psubw %%mm2, %%mm4 \n\t" // 2L2 - 5L3 + 5L4 @@ -956,10 +979,10 @@ src-=8; "psubw %%mm2, %%mm4 \n\t" // 2L2 - 5L3 + 5L4 - 2L5 "psubw %%mm3, %%mm5 \n\t" // 2H2 - 5H3 + 5H4 - 2H5 - "movq (%%eax, %1, 4), %%mm6 \n\t" + "movq (%%"REG_a", %1, 4), %%mm6 \n\t" "punpcklbw %%mm7, %%mm6 \n\t" // L6 "psubw %%mm6, %%mm2 \n\t" // L5 - L6 - "movq (%%eax, %1, 4), %%mm6 \n\t" + "movq (%%"REG_a", %1, 4), %%mm6 \n\t" "punpckhbw %%mm7, %%mm6 \n\t" // H6 "psubw %%mm6, %%mm3 \n\t" // H5 - H6 @@ -983,8 +1006,8 @@ src-=8; "psubw %%mm2, %%mm0 \n\t" // 2L4 - 5L5 + 5L6 - 2L7 "psubw %%mm3, %%mm1 \n\t" // 2H4 - 5H5 + 5H6 - 2H7 - "movq (%%ecx), %%mm2 \n\t" // 2L0 - 5L1 + 5L2 - 2L3 - "movq 8(%%ecx), %%mm3 \n\t" // 2H0 - 5H1 + 5H2 - 2H3 + "movq (%%"REG_c"), %%mm2 \n\t" // 2L0 - 5L1 + 5L2 - 2L3 + "movq 8(%%"REG_c"), %%mm3 \n\t" // 2H0 - 5H1 + 5H2 - 2H3 #ifdef HAVE_MMX2 "movq %%mm7, %%mm6 \n\t" // 0 @@ -1030,6 +1053,9 @@ src-=8; "psubw %%mm6, %%mm1 \n\t" #endif + "movd %2, %%mm2 \n\t" // QP + "punpcklbw %%mm7, %%mm2 \n\t" + "movq %%mm7, %%mm6 \n\t" // 0 "pcmpgtw %%mm4, %%mm6 \n\t" // sign(2L2 - 5L3 + 5L4 - 2L5) "pxor %%mm6, %%mm4 \n\t" @@ -1038,7 +1064,6 @@ src-=8; "pxor %%mm7, %%mm5 \n\t" "psubw %%mm7, %%mm5 \n\t" // |2H2 - 5H3 + 5H4 - 2H5| // 100 opcodes - "movd %2, %%mm2 \n\t" // QP "psllw $3, %%mm2 \n\t" // 8QP "movq %%mm2, %%mm3 \n\t" // 8QP "pcmpgtw %%mm4, %%mm2 \n\t" @@ -1060,8 +1085,8 @@ src-=8; "psrlw $6, %%mm4 \n\t" "psrlw $6, %%mm5 \n\t" - "movq 16(%%ecx), %%mm0 \n\t" // L3 - L4 - "movq 24(%%ecx), %%mm1 \n\t" // H3 - H4 + "movq 16(%%"REG_c"), %%mm0 \n\t" // L3 - L4 + "movq 24(%%"REG_c"), %%mm1 \n\t" // H3 - H4 "pxor %%mm2, %%mm2 \n\t" "pxor %%mm3, %%mm3 \n\t" @@ -1104,8 +1129,8 @@ src-=8; "movq %%mm0, (%0, %1) \n\t" : "+r" (src) - : "r" (stride), "m" (c->pQPb) - : "%eax", "%ecx" + : "r" ((long)stride), "m" (c->pQPb) + : "%"REG_a, "%"REG_c ); #else const int l1= stride; @@ -1168,20 +1193,20 @@ static inline void RENAME(dering)(uint8_t src[], int stride, PPContext *c) "packuswb %%mm0, %%mm0 \n\t" "movq %%mm0, %3 \n\t" - "leal (%0, %1), %%eax \n\t" - "leal (%%eax, %1, 4), %%edx \n\t" + "lea (%0, %1), %%"REG_a" \n\t" + "lea (%%"REG_a", %1, 4), %%"REG_d" \n\t" // 0 1 2 3 4 5 6 7 8 9 // %0 eax eax+%1 eax+2%1 %0+4%1 edx edx+%1 edx+2%1 %0+8%1 edx+4%1 #undef FIND_MIN_MAX #ifdef HAVE_MMX2 -#define FIND_MIN_MAX(addr)\ +#define REAL_FIND_MIN_MAX(addr)\ "movq " #addr ", %%mm0 \n\t"\ "pminub %%mm0, %%mm7 \n\t"\ "pmaxub %%mm0, %%mm6 \n\t" #else -#define FIND_MIN_MAX(addr)\ +#define REAL_FIND_MIN_MAX(addr)\ "movq " #addr ", %%mm0 \n\t"\ "movq %%mm7, %%mm1 \n\t"\ "psubusb %%mm0, %%mm6 \n\t"\ @@ -1189,14 +1214,15 @@ static inline void RENAME(dering)(uint8_t src[], int stride, PPContext *c) "psubusb %%mm0, %%mm1 \n\t"\ "psubb %%mm1, %%mm7 \n\t" #endif +#define FIND_MIN_MAX(addr) REAL_FIND_MIN_MAX(addr) -FIND_MIN_MAX((%%eax)) -FIND_MIN_MAX((%%eax, %1)) -FIND_MIN_MAX((%%eax, %1, 2)) +FIND_MIN_MAX((%%REGa)) +FIND_MIN_MAX((%%REGa, %1)) +FIND_MIN_MAX((%%REGa, %1, 2)) FIND_MIN_MAX((%0, %1, 4)) -FIND_MIN_MAX((%%edx)) -FIND_MIN_MAX((%%edx, %1)) -FIND_MIN_MAX((%%edx, %1, 2)) +FIND_MIN_MAX((%%REGd)) +FIND_MIN_MAX((%%REGd, %1)) +FIND_MIN_MAX((%%REGd, %1, 2)) FIND_MIN_MAX((%0, %1, 8)) "movq %%mm7, %%mm4 \n\t" @@ -1249,13 +1275,13 @@ FIND_MIN_MAX((%0, %1, 8)) "movd %%mm6, %%ecx \n\t" "cmpb "MANGLE(deringThreshold)", %%cl \n\t" " jb 1f \n\t" - "leal -24(%%esp), %%ecx \n\t" - "andl $0xFFFFFFF8, %%ecx \n\t" + "lea -24(%%"REG_SP"), %%"REG_c" \n\t" + "and "ALIGN_MASK", %%"REG_c" \n\t" PAVGB(%%mm0, %%mm7) // a=(max + min)/2 "punpcklbw %%mm7, %%mm7 \n\t" "punpcklbw %%mm7, %%mm7 \n\t" "punpcklbw %%mm7, %%mm7 \n\t" - "movq %%mm7, (%%ecx) \n\t" + "movq %%mm7, (%%"REG_c") \n\t" "movq (%0), %%mm0 \n\t" // L10 "movq %%mm0, %%mm1 \n\t" // L10 @@ -1280,13 +1306,13 @@ FIND_MIN_MAX((%0, %1, 8)) "paddb %%mm2, %%mm0 \n\t" "paddb %%mm3, %%mm0 \n\t" - "movq (%%eax), %%mm2 \n\t" // L11 + "movq (%%"REG_a"), %%mm2 \n\t" // L11 "movq %%mm2, %%mm3 \n\t" // L11 "movq %%mm2, %%mm4 \n\t" // L11 "psllq $8, %%mm3 \n\t" "psrlq $8, %%mm4 \n\t" - "movd -4(%%eax), %%mm5 \n\t" - "movd 8(%%eax), %%mm6 \n\t" + "movd -4(%%"REG_a"), %%mm5 \n\t" + "movd 8(%%"REG_a"), %%mm6 \n\t" "psrlq $24, %%mm5 \n\t" "psllq $56, %%mm6 \n\t" "por %%mm5, %%mm3 \n\t" // L01 @@ -1303,7 +1329,7 @@ FIND_MIN_MAX((%0, %1, 8)) "paddb %%mm4, %%mm2 \n\t" "paddb %%mm5, %%mm2 \n\t" // 0, 2, 3, 1 -#define DERING_CORE(dst,src,ppsx,psx,sx,pplx,plx,lx,t0,t1) \ +#define REAL_DERING_CORE(dst,src,ppsx,psx,sx,pplx,plx,lx,t0,t1) \ "movq " #src ", " #sx " \n\t" /* src[0] */\ "movq " #sx ", " #lx " \n\t" /* src[0] */\ "movq " #sx ", " #t0 " \n\t" /* src[0] */\ @@ -1319,8 +1345,8 @@ FIND_MIN_MAX((%0, %1, 8)) PAVGB(t0, lx) /* (src[-1] + src[+1])/2 */\ PAVGB(sx, lx) /* (src[-1] + 2src[0] + src[+1])/4 */\ PAVGB(lx, pplx) \ - "movq " #lx ", 8(%%ecx) \n\t"\ - "movq (%%ecx), " #lx " \n\t"\ + "movq " #lx ", 8(%%"REG_c") \n\t"\ + "movq (%%"REG_c"), " #lx " \n\t"\ "psubusb " #lx ", " #t1 " \n\t"\ "psubusb " #lx ", " #t0 " \n\t"\ "psubusb " #lx ", " #sx " \n\t"\ @@ -1347,8 +1373,10 @@ FIND_MIN_MAX((%0, %1, 8)) "pandn " #dst ", " #ppsx " \n\t"\ "por " #pplx ", " #ppsx " \n\t"\ "movq " #ppsx ", " #dst " \n\t"\ - "movq 8(%%ecx), " #lx " \n\t" + "movq 8(%%"REG_c"), " #lx " \n\t" +#define DERING_CORE(dst,src,ppsx,psx,sx,pplx,plx,lx,t0,t1) \ + REAL_DERING_CORE(dst,src,ppsx,psx,sx,pplx,plx,lx,t0,t1) /* 0000000 1111111 @@ -1365,18 +1393,18 @@ FIND_MIN_MAX((%0, %1, 8)) */ //DERING_CORE(dst,src ,ppsx ,psx ,sx ,pplx ,plx ,lx ,t0 ,t1) -DERING_CORE((%%eax),(%%eax, %1) ,%%mm0,%%mm2,%%mm4,%%mm1,%%mm3,%%mm5,%%mm6,%%mm7) -DERING_CORE((%%eax, %1),(%%eax, %1, 2) ,%%mm2,%%mm4,%%mm0,%%mm3,%%mm5,%%mm1,%%mm6,%%mm7) -DERING_CORE((%%eax, %1, 2),(%0, %1, 4) ,%%mm4,%%mm0,%%mm2,%%mm5,%%mm1,%%mm3,%%mm6,%%mm7) -DERING_CORE((%0, %1, 4),(%%edx) ,%%mm0,%%mm2,%%mm4,%%mm1,%%mm3,%%mm5,%%mm6,%%mm7) -DERING_CORE((%%edx),(%%edx, %1) ,%%mm2,%%mm4,%%mm0,%%mm3,%%mm5,%%mm1,%%mm6,%%mm7) -DERING_CORE((%%edx, %1), (%%edx, %1, 2),%%mm4,%%mm0,%%mm2,%%mm5,%%mm1,%%mm3,%%mm6,%%mm7) -DERING_CORE((%%edx, %1, 2),(%0, %1, 8) ,%%mm0,%%mm2,%%mm4,%%mm1,%%mm3,%%mm5,%%mm6,%%mm7) -DERING_CORE((%0, %1, 8),(%%edx, %1, 4) ,%%mm2,%%mm4,%%mm0,%%mm3,%%mm5,%%mm1,%%mm6,%%mm7) +DERING_CORE((%%REGa),(%%REGa, %1) ,%%mm0,%%mm2,%%mm4,%%mm1,%%mm3,%%mm5,%%mm6,%%mm7) +DERING_CORE((%%REGa, %1),(%%REGa, %1, 2) ,%%mm2,%%mm4,%%mm0,%%mm3,%%mm5,%%mm1,%%mm6,%%mm7) +DERING_CORE((%%REGa, %1, 2),(%0, %1, 4) ,%%mm4,%%mm0,%%mm2,%%mm5,%%mm1,%%mm3,%%mm6,%%mm7) +DERING_CORE((%0, %1, 4),(%%REGd) ,%%mm0,%%mm2,%%mm4,%%mm1,%%mm3,%%mm5,%%mm6,%%mm7) +DERING_CORE((%%REGd),(%%REGd, %1) ,%%mm2,%%mm4,%%mm0,%%mm3,%%mm5,%%mm1,%%mm6,%%mm7) +DERING_CORE((%%REGd, %1), (%%REGd, %1, 2),%%mm4,%%mm0,%%mm2,%%mm5,%%mm1,%%mm3,%%mm6,%%mm7) +DERING_CORE((%%REGd, %1, 2),(%0, %1, 8) ,%%mm0,%%mm2,%%mm4,%%mm1,%%mm3,%%mm5,%%mm6,%%mm7) +DERING_CORE((%0, %1, 8),(%%REGd, %1, 4) ,%%mm2,%%mm4,%%mm0,%%mm3,%%mm5,%%mm1,%%mm6,%%mm7) "1: \n\t" - : : "r" (src), "r" (stride), "m" (c->pQPb), "m"(c->pQPb2) - : "%eax", "%edx", "%ecx" + : : "r" (src), "r" ((long)stride), "m" (c->pQPb), "m"(c->pQPb2) + : "%"REG_a, "%"REG_d, "%"REG_c ); #else int y; @@ -1523,27 +1551,27 @@ static inline void RENAME(deInterlaceInterpolateLinear)(uint8_t src[], int strid #if defined (HAVE_MMX2) || defined (HAVE_3DNOW) src+= 4*stride; asm volatile( - "leal (%0, %1), %%eax \n\t" - "leal (%%eax, %1, 4), %%ecx \n\t" + "lea (%0, %1), %%"REG_a" \n\t" + "lea (%%"REG_a", %1, 4), %%"REG_c" \n\t" // 0 1 2 3 4 5 6 7 8 9 // %0 eax eax+%1 eax+2%1 %0+4%1 ecx ecx+%1 ecx+2%1 %0+8%1 ecx+4%1 "movq (%0), %%mm0 \n\t" - "movq (%%eax, %1), %%mm1 \n\t" + "movq (%%"REG_a", %1), %%mm1 \n\t" PAVGB(%%mm1, %%mm0) - "movq %%mm0, (%%eax) \n\t" + "movq %%mm0, (%%"REG_a") \n\t" "movq (%0, %1, 4), %%mm0 \n\t" PAVGB(%%mm0, %%mm1) - "movq %%mm1, (%%eax, %1, 2) \n\t" - "movq (%%ecx, %1), %%mm1 \n\t" + "movq %%mm1, (%%"REG_a", %1, 2) \n\t" + "movq (%%"REG_c", %1), %%mm1 \n\t" PAVGB(%%mm1, %%mm0) - "movq %%mm0, (%%ecx) \n\t" + "movq %%mm0, (%%"REG_c") \n\t" "movq (%0, %1, 8), %%mm0 \n\t" PAVGB(%%mm0, %%mm1) - "movq %%mm1, (%%ecx, %1, 2) \n\t" + "movq %%mm1, (%%"REG_c", %1, 2) \n\t" - : : "r" (src), "r" (stride) - : "%eax", "%ecx" + : : "r" (src), "r" ((long)stride) + : "%"REG_a, "%"REG_c ); #else int a, b, x; @@ -1576,15 +1604,15 @@ static inline void RENAME(deInterlaceInterpolateCubic)(uint8_t src[], int stride #if defined (HAVE_MMX2) || defined (HAVE_3DNOW) src+= stride*3; asm volatile( - "leal (%0, %1), %%eax \n\t" - "leal (%%eax, %1, 4), %%edx \n\t" - "leal (%%edx, %1, 4), %%ecx \n\t" - "addl %1, %%ecx \n\t" + "lea (%0, %1), %%"REG_a" \n\t" + "lea (%%"REG_a", %1, 4), %%"REG_d" \n\t" + "lea (%%"REG_d", %1, 4), %%"REG_c" \n\t" + "add %1, %%"REG_c" \n\t" "pxor %%mm7, %%mm7 \n\t" // 0 1 2 3 4 5 6 7 8 9 10 // %0 eax eax+%1 eax+2%1 %0+4%1 edx edx+%1 edx+2%1 %0+8%1 edx+4%1 ecx -#define DEINT_CUBIC(a,b,c,d,e)\ +#define REAL_DEINT_CUBIC(a,b,c,d,e)\ "movq " #a ", %%mm0 \n\t"\ "movq " #b ", %%mm1 \n\t"\ "movq " #d ", %%mm2 \n\t"\ @@ -1605,14 +1633,15 @@ static inline void RENAME(deInterlaceInterpolateCubic)(uint8_t src[], int stride "psubw %%mm2, %%mm3 \n\t" /* H(9b + 9d - a - e)/16 */\ "packuswb %%mm3, %%mm1 \n\t"\ "movq %%mm1, " #c " \n\t" +#define DEINT_CUBIC(a,b,c,d,e) REAL_DEINT_CUBIC(a,b,c,d,e) -DEINT_CUBIC((%0), (%%eax, %1), (%%eax, %1, 2), (%0, %1, 4), (%%edx, %1)) -DEINT_CUBIC((%%eax, %1), (%0, %1, 4), (%%edx), (%%edx, %1), (%0, %1, 8)) -DEINT_CUBIC((%0, %1, 4), (%%edx, %1), (%%edx, %1, 2), (%0, %1, 8), (%%ecx)) -DEINT_CUBIC((%%edx, %1), (%0, %1, 8), (%%edx, %1, 4), (%%ecx), (%%ecx, %1, 2)) +DEINT_CUBIC((%0), (%%REGa, %1), (%%REGa, %1, 2), (%0, %1, 4), (%%REGd, %1)) +DEINT_CUBIC((%%REGa, %1), (%0, %1, 4), (%%REGd), (%%REGd, %1), (%0, %1, 8)) +DEINT_CUBIC((%0, %1, 4), (%%REGd, %1), (%%REGd, %1, 2), (%0, %1, 8), (%%REGc)) +DEINT_CUBIC((%%REGd, %1), (%0, %1, 8), (%%REGd, %1, 4), (%%REGc), (%%REGc, %1, 2)) - : : "r" (src), "r" (stride) - : "%eax", "%edx", "ecx" + : : "r" (src), "r" ((long)stride) + : "%"REG_a, "%"REG_d, "%"REG_c ); #else int x; @@ -1640,14 +1669,14 @@ static inline void RENAME(deInterlaceFF)(uint8_t src[], int stride, uint8_t *tmp #if defined (HAVE_MMX2) || defined (HAVE_3DNOW) src+= stride*4; asm volatile( - "leal (%0, %1), %%eax \n\t" - "leal (%%eax, %1, 4), %%edx \n\t" + "lea (%0, %1), %%"REG_a" \n\t" + "lea (%%"REG_a", %1, 4), %%"REG_d" \n\t" "pxor %%mm7, %%mm7 \n\t" "movq (%2), %%mm0 \n\t" // 0 1 2 3 4 5 6 7 8 9 10 // %0 eax eax+%1 eax+2%1 %0+4%1 edx edx+%1 edx+2%1 %0+8%1 edx+4%1 ecx -#define DEINT_FF(a,b,c,d)\ +#define REAL_DEINT_FF(a,b,c,d)\ "movq " #a ", %%mm1 \n\t"\ "movq " #b ", %%mm2 \n\t"\ "movq " #c ", %%mm3 \n\t"\ @@ -1675,14 +1704,16 @@ static inline void RENAME(deInterlaceFF)(uint8_t src[], int stride, uint8_t *tmp "packuswb %%mm4, %%mm1 \n\t"\ "movq %%mm1, " #b " \n\t"\ -DEINT_FF((%0) , (%%eax) , (%%eax, %1), (%%eax, %1, 2)) -DEINT_FF((%%eax, %1), (%%eax, %1, 2), (%0, %1, 4), (%%edx) ) -DEINT_FF((%0, %1, 4), (%%edx) , (%%edx, %1), (%%edx, %1, 2)) -DEINT_FF((%%edx, %1), (%%edx, %1, 2), (%0, %1, 8), (%%edx, %1, 4)) +#define DEINT_FF(a,b,c,d) REAL_DEINT_FF(a,b,c,d) + +DEINT_FF((%0) , (%%REGa) , (%%REGa, %1), (%%REGa, %1, 2)) +DEINT_FF((%%REGa, %1), (%%REGa, %1, 2), (%0, %1, 4), (%%REGd) ) +DEINT_FF((%0, %1, 4), (%%REGd) , (%%REGd, %1), (%%REGd, %1, 2)) +DEINT_FF((%%REGd, %1), (%%REGd, %1, 2), (%0, %1, 8), (%%REGd, %1, 4)) "movq %%mm0, (%2) \n\t" - : : "r" (src), "r" (stride), "r"(tmp) - : "%eax", "%edx" + : : "r" (src), "r" ((long)stride), "r"(tmp) + : "%"REG_a, "%"REG_d ); #else int x; @@ -1718,15 +1749,15 @@ static inline void RENAME(deInterlaceL5)(uint8_t src[], int stride, uint8_t *tmp #if defined (HAVE_MMX2) || defined (HAVE_3DNOW) src+= stride*4; asm volatile( - "leal (%0, %1), %%eax \n\t" - "leal (%%eax, %1, 4), %%edx \n\t" + "lea (%0, %1), %%"REG_a" \n\t" + "lea (%%"REG_a", %1, 4), %%"REG_d" \n\t" "pxor %%mm7, %%mm7 \n\t" "movq (%2), %%mm0 \n\t" "movq (%3), %%mm1 \n\t" // 0 1 2 3 4 5 6 7 8 9 10 // %0 eax eax+%1 eax+2%1 %0+4%1 edx edx+%1 edx+2%1 %0+8%1 edx+4%1 ecx -#define DEINT_L5(t1,t2,a,b,c)\ +#define REAL_DEINT_L5(t1,t2,a,b,c)\ "movq " #a ", %%mm2 \n\t"\ "movq " #b ", %%mm3 \n\t"\ "movq " #c ", %%mm4 \n\t"\ @@ -1759,19 +1790,21 @@ static inline void RENAME(deInterlaceL5)(uint8_t src[], int stride, uint8_t *tmp "packuswb %%mm5, %%mm2 \n\t"\ "movq %%mm2, " #a " \n\t"\ -DEINT_L5(%%mm0, %%mm1, (%0) , (%%eax) , (%%eax, %1) ) -DEINT_L5(%%mm1, %%mm0, (%%eax) , (%%eax, %1) , (%%eax, %1, 2)) -DEINT_L5(%%mm0, %%mm1, (%%eax, %1) , (%%eax, %1, 2), (%0, %1, 4) ) -DEINT_L5(%%mm1, %%mm0, (%%eax, %1, 2), (%0, %1, 4) , (%%edx) ) -DEINT_L5(%%mm0, %%mm1, (%0, %1, 4) , (%%edx) , (%%edx, %1) ) -DEINT_L5(%%mm1, %%mm0, (%%edx) , (%%edx, %1) , (%%edx, %1, 2)) -DEINT_L5(%%mm0, %%mm1, (%%edx, %1) , (%%edx, %1, 2), (%0, %1, 8) ) -DEINT_L5(%%mm1, %%mm0, (%%edx, %1, 2), (%0, %1, 8) , (%%edx, %1, 4)) +#define DEINT_L5(t1,t2,a,b,c) REAL_DEINT_L5(t1,t2,a,b,c) + +DEINT_L5(%%mm0, %%mm1, (%0) , (%%REGa) , (%%REGa, %1) ) +DEINT_L5(%%mm1, %%mm0, (%%REGa) , (%%REGa, %1) , (%%REGa, %1, 2)) +DEINT_L5(%%mm0, %%mm1, (%%REGa, %1) , (%%REGa, %1, 2), (%0, %1, 4) ) +DEINT_L5(%%mm1, %%mm0, (%%REGa, %1, 2), (%0, %1, 4) , (%%REGd) ) +DEINT_L5(%%mm0, %%mm1, (%0, %1, 4) , (%%REGd) , (%%REGd, %1) ) +DEINT_L5(%%mm1, %%mm0, (%%REGd) , (%%REGd, %1) , (%%REGd, %1, 2)) +DEINT_L5(%%mm0, %%mm1, (%%REGd, %1) , (%%REGd, %1, 2), (%0, %1, 8) ) +DEINT_L5(%%mm1, %%mm0, (%%REGd, %1, 2), (%0, %1, 8) , (%%REGd, %1, 4)) "movq %%mm0, (%2) \n\t" "movq %%mm1, (%3) \n\t" - : : "r" (src), "r" (stride), "r"(tmp), "r"(tmp2) - : "%eax", "%edx" + : : "r" (src), "r" ((long)stride), "r"(tmp), "r"(tmp2) + : "%"REG_a, "%"REG_d ); #else int x; @@ -1818,49 +1851,49 @@ static inline void RENAME(deInterlaceBlendLinear)(uint8_t src[], int stride, uin #if defined (HAVE_MMX2) || defined (HAVE_3DNOW) src+= 4*stride; asm volatile( - "leal (%0, %1), %%eax \n\t" - "leal (%%eax, %1, 4), %%edx \n\t" + "lea (%0, %1), %%"REG_a" \n\t" + "lea (%%"REG_a", %1, 4), %%"REG_d" \n\t" // 0 1 2 3 4 5 6 7 8 9 // %0 eax eax+%1 eax+2%1 %0+4%1 edx edx+%1 edx+2%1 %0+8%1 edx+4%1 "movq (%2), %%mm0 \n\t" // L0 - "movq (%%eax), %%mm1 \n\t" // L2 + "movq (%%"REG_a"), %%mm1 \n\t" // L2 PAVGB(%%mm1, %%mm0) // L0+L2 "movq (%0), %%mm2 \n\t" // L1 PAVGB(%%mm2, %%mm0) "movq %%mm0, (%0) \n\t" - "movq (%%eax, %1), %%mm0 \n\t" // L3 + "movq (%%"REG_a", %1), %%mm0 \n\t" // L3 PAVGB(%%mm0, %%mm2) // L1+L3 PAVGB(%%mm1, %%mm2) // 2L2 + L1 + L3 - "movq %%mm2, (%%eax) \n\t" - "movq (%%eax, %1, 2), %%mm2 \n\t" // L4 + "movq %%mm2, (%%"REG_a") \n\t" + "movq (%%"REG_a", %1, 2), %%mm2 \n\t" // L4 PAVGB(%%mm2, %%mm1) // L2+L4 PAVGB(%%mm0, %%mm1) // 2L3 + L2 + L4 - "movq %%mm1, (%%eax, %1) \n\t" + "movq %%mm1, (%%"REG_a", %1) \n\t" "movq (%0, %1, 4), %%mm1 \n\t" // L5 PAVGB(%%mm1, %%mm0) // L3+L5 PAVGB(%%mm2, %%mm0) // 2L4 + L3 + L5 - "movq %%mm0, (%%eax, %1, 2) \n\t" - "movq (%%edx), %%mm0 \n\t" // L6 + "movq %%mm0, (%%"REG_a", %1, 2) \n\t" + "movq (%%"REG_d"), %%mm0 \n\t" // L6 PAVGB(%%mm0, %%mm2) // L4+L6 PAVGB(%%mm1, %%mm2) // 2L5 + L4 + L6 "movq %%mm2, (%0, %1, 4) \n\t" - "movq (%%edx, %1), %%mm2 \n\t" // L7 + "movq (%%"REG_d", %1), %%mm2 \n\t" // L7 PAVGB(%%mm2, %%mm1) // L5+L7 PAVGB(%%mm0, %%mm1) // 2L6 + L5 + L7 - "movq %%mm1, (%%edx) \n\t" - "movq (%%edx, %1, 2), %%mm1 \n\t" // L8 + "movq %%mm1, (%%"REG_d") \n\t" + "movq (%%"REG_d", %1, 2), %%mm1 \n\t" // L8 PAVGB(%%mm1, %%mm0) // L6+L8 PAVGB(%%mm2, %%mm0) // 2L7 + L6 + L8 - "movq %%mm0, (%%edx, %1) \n\t" + "movq %%mm0, (%%"REG_d", %1) \n\t" "movq (%0, %1, 8), %%mm0 \n\t" // L9 PAVGB(%%mm0, %%mm2) // L7+L9 PAVGB(%%mm1, %%mm2) // 2L8 + L7 + L9 - "movq %%mm2, (%%edx, %1, 2) \n\t" + "movq %%mm2, (%%"REG_d", %1, 2) \n\t" "movq %%mm1, (%2) \n\t" - : : "r" (src), "r" (stride), "r" (tmp) - : "%eax", "%edx" + : : "r" (src), "r" ((long)stride), "r" (tmp) + : "%"REG_a, "%"REG_d ); #else int a, b, c, x; @@ -1920,62 +1953,62 @@ static inline void RENAME(deInterlaceMedian)(uint8_t src[], int stride) src+= 4*stride; #ifdef HAVE_MMX2 asm volatile( - "leal (%0, %1), %%eax \n\t" - "leal (%%eax, %1, 4), %%edx \n\t" + "lea (%0, %1), %%"REG_a" \n\t" + "lea (%%"REG_a", %1, 4), %%"REG_d" \n\t" // 0 1 2 3 4 5 6 7 8 9 // %0 eax eax+%1 eax+2%1 %0+4%1 edx edx+%1 edx+2%1 %0+8%1 edx+4%1 "movq (%0), %%mm0 \n\t" // - "movq (%%eax, %1), %%mm2 \n\t" // - "movq (%%eax), %%mm1 \n\t" // + "movq (%%"REG_a", %1), %%mm2 \n\t" // + "movq (%%"REG_a"), %%mm1 \n\t" // "movq %%mm0, %%mm3 \n\t" "pmaxub %%mm1, %%mm0 \n\t" // "pminub %%mm3, %%mm1 \n\t" // "pmaxub %%mm2, %%mm1 \n\t" // "pminub %%mm1, %%mm0 \n\t" - "movq %%mm0, (%%eax) \n\t" + "movq %%mm0, (%%"REG_a") \n\t" "movq (%0, %1, 4), %%mm0 \n\t" // - "movq (%%eax, %1, 2), %%mm1 \n\t" // + "movq (%%"REG_a", %1, 2), %%mm1 \n\t" // "movq %%mm2, %%mm3 \n\t" "pmaxub %%mm1, %%mm2 \n\t" // "pminub %%mm3, %%mm1 \n\t" // "pmaxub %%mm0, %%mm1 \n\t" // "pminub %%mm1, %%mm2 \n\t" - "movq %%mm2, (%%eax, %1, 2) \n\t" + "movq %%mm2, (%%"REG_a", %1, 2) \n\t" - "movq (%%edx), %%mm2 \n\t" // - "movq (%%edx, %1), %%mm1 \n\t" // + "movq (%%"REG_d"), %%mm2 \n\t" // + "movq (%%"REG_d", %1), %%mm1 \n\t" // "movq %%mm2, %%mm3 \n\t" "pmaxub %%mm0, %%mm2 \n\t" // "pminub %%mm3, %%mm0 \n\t" // "pmaxub %%mm1, %%mm0 \n\t" // "pminub %%mm0, %%mm2 \n\t" - "movq %%mm2, (%%edx) \n\t" + "movq %%mm2, (%%"REG_d") \n\t" - "movq (%%edx, %1, 2), %%mm2 \n\t" // + "movq (%%"REG_d", %1, 2), %%mm2 \n\t" // "movq (%0, %1, 8), %%mm0 \n\t" // "movq %%mm2, %%mm3 \n\t" "pmaxub %%mm0, %%mm2 \n\t" // "pminub %%mm3, %%mm0 \n\t" // "pmaxub %%mm1, %%mm0 \n\t" // "pminub %%mm0, %%mm2 \n\t" - "movq %%mm2, (%%edx, %1, 2) \n\t" + "movq %%mm2, (%%"REG_d", %1, 2) \n\t" - : : "r" (src), "r" (stride) - : "%eax", "%edx" + : : "r" (src), "r" ((long)stride) + : "%"REG_a, "%"REG_d ); #else // MMX without MMX2 asm volatile( - "leal (%0, %1), %%eax \n\t" - "leal (%%eax, %1, 4), %%edx \n\t" + "lea (%0, %1), %%"REG_a" \n\t" + "lea (%%"REG_a", %1, 4), %%"REG_d" \n\t" // 0 1 2 3 4 5 6 7 8 9 // %0 eax eax+%1 eax+2%1 %0+4%1 edx edx+%1 edx+2%1 %0+8%1 edx+4%1 "pxor %%mm7, %%mm7 \n\t" -#define MEDIAN(a,b,c)\ +#define REAL_MEDIAN(a,b,c)\ "movq " #a ", %%mm0 \n\t"\ "movq " #b ", %%mm2 \n\t"\ "movq " #c ", %%mm1 \n\t"\ @@ -1998,14 +2031,15 @@ static inline void RENAME(deInterlaceMedian)(uint8_t src[], int stride) "pand %%mm2, %%mm0 \n\t"\ "pand %%mm1, %%mm0 \n\t"\ "movq %%mm0, " #b " \n\t" +#define MEDIAN(a,b,c) REAL_MEDIAN(a,b,c) -MEDIAN((%0), (%%eax), (%%eax, %1)) -MEDIAN((%%eax, %1), (%%eax, %1, 2), (%0, %1, 4)) -MEDIAN((%0, %1, 4), (%%edx), (%%edx, %1)) -MEDIAN((%%edx, %1), (%%edx, %1, 2), (%0, %1, 8)) +MEDIAN((%0), (%%REGa), (%%REGa, %1)) +MEDIAN((%%REGa, %1), (%%REGa, %1, 2), (%0, %1, 4)) +MEDIAN((%0, %1, 4), (%%REGd), (%%REGd, %1)) +MEDIAN((%%REGd, %1), (%%REGd, %1, 2), (%0, %1, 8)) - : : "r" (src), "r" (stride) - : "%eax", "%edx" + : : "r" (src), "r" ((long)stride) + : "%"REG_a, "%"REG_d ); #endif // MMX #else @@ -2039,17 +2073,17 @@ MEDIAN((%%edx, %1), (%%edx, %1, 2), (%0, %1, 8)) static inline void RENAME(transpose1)(uint8_t *dst1, uint8_t *dst2, uint8_t *src, int srcStride) { asm( - "leal (%0, %1), %%eax \n\t" + "lea (%0, %1), %%"REG_a" \n\t" // 0 1 2 3 4 5 6 7 8 9 // %0 eax eax+%1 eax+2%1 %0+4%1 edx edx+%1 edx+2%1 %0+8%1 edx+4%1 "movq (%0), %%mm0 \n\t" // 12345678 - "movq (%%eax), %%mm1 \n\t" // abcdefgh + "movq (%%"REG_a"), %%mm1 \n\t" // abcdefgh "movq %%mm0, %%mm2 \n\t" // 12345678 "punpcklbw %%mm1, %%mm0 \n\t" // 1a2b3c4d "punpckhbw %%mm1, %%mm2 \n\t" // 5e6f7g8h - "movq (%%eax, %1), %%mm1 \n\t" - "movq (%%eax, %1, 2), %%mm3 \n\t" + "movq (%%"REG_a", %1), %%mm1 \n\t" + "movq (%%"REG_a", %1, 2), %%mm3 \n\t" "movq %%mm1, %%mm4 \n\t" "punpcklbw %%mm3, %%mm1 \n\t" "punpckhbw %%mm3, %%mm4 \n\t" @@ -2076,16 +2110,16 @@ static inline void RENAME(transpose1)(uint8_t *dst1, uint8_t *dst2, uint8_t *src "psrlq $32, %%mm1 \n\t" "movd %%mm1, 112(%3) \n\t" - "leal (%%eax, %1, 4), %%eax \n\t" + "lea (%%"REG_a", %1, 4), %%"REG_a" \n\t" "movq (%0, %1, 4), %%mm0 \n\t" // 12345678 - "movq (%%eax), %%mm1 \n\t" // abcdefgh + "movq (%%"REG_a"), %%mm1 \n\t" // abcdefgh "movq %%mm0, %%mm2 \n\t" // 12345678 "punpcklbw %%mm1, %%mm0 \n\t" // 1a2b3c4d "punpckhbw %%mm1, %%mm2 \n\t" // 5e6f7g8h - "movq (%%eax, %1), %%mm1 \n\t" - "movq (%%eax, %1, 2), %%mm3 \n\t" + "movq (%%"REG_a", %1), %%mm1 \n\t" + "movq (%%"REG_a", %1, 2), %%mm3 \n\t" "movq %%mm1, %%mm4 \n\t" "punpcklbw %%mm3, %%mm1 \n\t" "punpckhbw %%mm3, %%mm4 \n\t" @@ -2113,8 +2147,8 @@ static inline void RENAME(transpose1)(uint8_t *dst1, uint8_t *dst2, uint8_t *src "movd %%mm1, 116(%3) \n\t" - :: "r" (src), "r" (srcStride), "r" (dst1), "r" (dst2) - : "%eax" + :: "r" (src), "r" ((long)srcStride), "r" (dst1), "r" (dst2) + : "%"REG_a ); } @@ -2124,8 +2158,8 @@ static inline void RENAME(transpose1)(uint8_t *dst1, uint8_t *dst2, uint8_t *src static inline void RENAME(transpose2)(uint8_t *dst, int dstStride, uint8_t *src) { asm( - "leal (%0, %1), %%eax \n\t" - "leal (%%eax, %1, 4), %%edx \n\t" + "lea (%0, %1), %%"REG_a" \n\t" + "lea (%%"REG_a",%1,4), %%"REG_d"\n\t" // 0 1 2 3 4 5 6 7 8 9 // %0 eax eax+%1 eax+2%1 %0+4%1 edx edx+%1 edx+2%1 %0+8%1 edx+4%1 "movq (%2), %%mm0 \n\t" // 12345678 @@ -2149,16 +2183,16 @@ static inline void RENAME(transpose2)(uint8_t *dst, int dstStride, uint8_t *src) "movd %%mm0, (%0) \n\t" "psrlq $32, %%mm0 \n\t" - "movd %%mm0, (%%eax) \n\t" - "movd %%mm3, (%%eax, %1) \n\t" + "movd %%mm0, (%%"REG_a") \n\t" + "movd %%mm3, (%%"REG_a", %1) \n\t" "psrlq $32, %%mm3 \n\t" - "movd %%mm3, (%%eax, %1, 2) \n\t" + "movd %%mm3, (%%"REG_a", %1, 2) \n\t" "movd %%mm2, (%0, %1, 4) \n\t" "psrlq $32, %%mm2 \n\t" - "movd %%mm2, (%%edx) \n\t" - "movd %%mm1, (%%edx, %1) \n\t" + "movd %%mm2, (%%"REG_d") \n\t" + "movd %%mm1, (%%"REG_d", %1) \n\t" "psrlq $32, %%mm1 \n\t" - "movd %%mm1, (%%edx, %1, 2) \n\t" + "movd %%mm1, (%%"REG_d", %1, 2) \n\t" "movq 64(%2), %%mm0 \n\t" // 12345678 @@ -2182,24 +2216,25 @@ static inline void RENAME(transpose2)(uint8_t *dst, int dstStride, uint8_t *src) "movd %%mm0, 4(%0) \n\t" "psrlq $32, %%mm0 \n\t" - "movd %%mm0, 4(%%eax) \n\t" - "movd %%mm3, 4(%%eax, %1) \n\t" + "movd %%mm0, 4(%%"REG_a") \n\t" + "movd %%mm3, 4(%%"REG_a", %1) \n\t" "psrlq $32, %%mm3 \n\t" - "movd %%mm3, 4(%%eax, %1, 2) \n\t" + "movd %%mm3, 4(%%"REG_a", %1, 2) \n\t" "movd %%mm2, 4(%0, %1, 4) \n\t" "psrlq $32, %%mm2 \n\t" - "movd %%mm2, 4(%%edx) \n\t" - "movd %%mm1, 4(%%edx, %1) \n\t" + "movd %%mm2, 4(%%"REG_d") \n\t" + "movd %%mm1, 4(%%"REG_d", %1) \n\t" "psrlq $32, %%mm1 \n\t" - "movd %%mm1, 4(%%edx, %1, 2) \n\t" + "movd %%mm1, 4(%%"REG_d", %1, 2) \n\t" - :: "r" (dst), "r" (dstStride), "r" (src) - : "%eax", "%edx" + :: "r" (dst), "r" ((long)dstStride), "r" (src) + : "%"REG_a, "%"REG_d ); } #endif -//static int test=0; +//static long test=0; +#ifndef HAVE_ALTIVEC static inline void RENAME(tempNoiseReducer)(uint8_t *src, int stride, uint8_t *tempBlured, uint32_t *tempBluredPast, int *maxNoise) { @@ -2212,9 +2247,9 @@ static inline void RENAME(tempNoiseReducer)(uint8_t *src, int stride, //#define L1_DIFF //u should change the thresholds too if u try that one #if defined (HAVE_MMX2) || defined (HAVE_3DNOW) asm volatile( - "leal (%2, %2, 2), %%eax \n\t" // 3*stride - "leal (%2, %2, 4), %%edx \n\t" // 5*stride - "leal (%%edx, %2, 2), %%ecx \n\t" // 7*stride + "lea (%2, %2, 2), %%"REG_a" \n\t" // 3*stride + "lea (%2, %2, 4), %%"REG_d" \n\t" // 5*stride + "lea (%%"REG_d", %2, 2), %%"REG_c" \n\t" // 7*stride // 0 1 2 3 4 5 6 7 8 9 // %x %x+%2 %x+2%2 %x+eax %x+4%2 %x+edx %x+2eax %x+ecx %x+8%2 //FIXME reorder? @@ -2225,29 +2260,30 @@ static inline void RENAME(tempNoiseReducer)(uint8_t *src, int stride, "psadbw (%1, %2), %%mm1 \n\t" // |L1-R1| "movq (%0, %2, 2), %%mm2 \n\t" // L2 "psadbw (%1, %2, 2), %%mm2 \n\t" // |L2-R2| - "movq (%0, %%eax), %%mm3 \n\t" // L3 - "psadbw (%1, %%eax), %%mm3 \n\t" // |L3-R3| + "movq (%0, %%"REG_a"), %%mm3 \n\t" // L3 + "psadbw (%1, %%"REG_a"), %%mm3 \n\t" // |L3-R3| "movq (%0, %2, 4), %%mm4 \n\t" // L4 "paddw %%mm1, %%mm0 \n\t" "psadbw (%1, %2, 4), %%mm4 \n\t" // |L4-R4| - "movq (%0, %%edx), %%mm5 \n\t" // L5 + "movq (%0, %%"REG_d"), %%mm5 \n\t" // L5 "paddw %%mm2, %%mm0 \n\t" - "psadbw (%1, %%edx), %%mm5 \n\t" // |L5-R5| - "movq (%0, %%eax, 2), %%mm6 \n\t" // L6 + "psadbw (%1, %%"REG_d"), %%mm5 \n\t" // |L5-R5| + "movq (%0, %%"REG_a", 2), %%mm6 \n\t" // L6 "paddw %%mm3, %%mm0 \n\t" - "psadbw (%1, %%eax, 2), %%mm6 \n\t" // |L6-R6| - "movq (%0, %%ecx), %%mm7 \n\t" // L7 + "psadbw (%1, %%"REG_a", 2), %%mm6 \n\t" // |L6-R6| + "movq (%0, %%"REG_c"), %%mm7 \n\t" // L7 "paddw %%mm4, %%mm0 \n\t" - "psadbw (%1, %%ecx), %%mm7 \n\t" // |L7-R7| + "psadbw (%1, %%"REG_c"), %%mm7 \n\t" // |L7-R7| "paddw %%mm5, %%mm6 \n\t" "paddw %%mm7, %%mm6 \n\t" "paddw %%mm6, %%mm0 \n\t" -#elif defined (FAST_L2_DIFF) +#else +#if defined (FAST_L2_DIFF) "pcmpeqb %%mm7, %%mm7 \n\t" "movq "MANGLE(b80)", %%mm6 \n\t" "pxor %%mm0, %%mm0 \n\t" -#define L2_DIFF_CORE(a, b)\ +#define REAL_L2_DIFF_CORE(a, b)\ "movq " #a ", %%mm5 \n\t"\ "movq " #b ", %%mm2 \n\t"\ "pxor %%mm7, %%mm2 \n\t"\ @@ -2261,19 +2297,10 @@ static inline void RENAME(tempNoiseReducer)(uint8_t *src, int stride, "psrld $14, %%mm5 \n\t"\ "paddd %%mm5, %%mm0 \n\t" -L2_DIFF_CORE((%0), (%1)) -L2_DIFF_CORE((%0, %2), (%1, %2)) -L2_DIFF_CORE((%0, %2, 2), (%1, %2, 2)) -L2_DIFF_CORE((%0, %%eax), (%1, %%eax)) -L2_DIFF_CORE((%0, %2, 4), (%1, %2, 4)) -L2_DIFF_CORE((%0, %%edx), (%1, %%edx)) -L2_DIFF_CORE((%0, %%eax,2), (%1, %%eax,2)) -L2_DIFF_CORE((%0, %%ecx), (%1, %%ecx)) - #else "pxor %%mm7, %%mm7 \n\t" "pxor %%mm0, %%mm0 \n\t" -#define L2_DIFF_CORE(a, b)\ +#define REAL_L2_DIFF_CORE(a, b)\ "movq " #a ", %%mm5 \n\t"\ "movq " #b ", %%mm2 \n\t"\ "movq %%mm5, %%mm1 \n\t"\ @@ -2289,14 +2316,18 @@ L2_DIFF_CORE((%0, %%ecx), (%1, %%ecx)) "paddd %%mm1, %%mm5 \n\t"\ "paddd %%mm5, %%mm0 \n\t" +#endif + +#define L2_DIFF_CORE(a, b) REAL_L2_DIFF_CORE(a, b) + L2_DIFF_CORE((%0), (%1)) L2_DIFF_CORE((%0, %2), (%1, %2)) L2_DIFF_CORE((%0, %2, 2), (%1, %2, 2)) -L2_DIFF_CORE((%0, %%eax), (%1, %%eax)) +L2_DIFF_CORE((%0, %%REGa), (%1, %%REGa)) L2_DIFF_CORE((%0, %2, 4), (%1, %2, 4)) -L2_DIFF_CORE((%0, %%edx), (%1, %%edx)) -L2_DIFF_CORE((%0, %%eax,2), (%1, %%eax,2)) -L2_DIFF_CORE((%0, %%ecx), (%1, %%ecx)) +L2_DIFF_CORE((%0, %%REGd), (%1, %%REGd)) +L2_DIFF_CORE((%0, %%REGa,2), (%1, %%REGa,2)) +L2_DIFF_CORE((%0, %%REGc), (%1, %%REGc)) #endif @@ -2305,94 +2336,94 @@ L2_DIFF_CORE((%0, %%ecx), (%1, %%ecx)) "paddd %%mm0, %%mm4 \n\t" "movd %%mm4, %%ecx \n\t" "shll $2, %%ecx \n\t" - "movl %3, %%edx \n\t" - "addl -4(%%edx), %%ecx \n\t" - "addl 4(%%edx), %%ecx \n\t" - "addl -1024(%%edx), %%ecx \n\t" + "mov %3, %%"REG_d" \n\t" + "addl -4(%%"REG_d"), %%ecx \n\t" + "addl 4(%%"REG_d"), %%ecx \n\t" + "addl -1024(%%"REG_d"), %%ecx \n\t" "addl $4, %%ecx \n\t" - "addl 1024(%%edx), %%ecx \n\t" + "addl 1024(%%"REG_d"), %%ecx \n\t" "shrl $3, %%ecx \n\t" - "movl %%ecx, (%%edx) \n\t" + "movl %%ecx, (%%"REG_d") \n\t" -// "movl %3, %%ecx \n\t" -// "movl %%ecx, test \n\t" +// "mov %3, %%"REG_c" \n\t" +// "mov %%"REG_c", test \n\t" // "jmp 4f \n\t" - "cmpl 512(%%edx), %%ecx \n\t" + "cmpl 512(%%"REG_d"), %%ecx \n\t" " jb 2f \n\t" - "cmpl 516(%%edx), %%ecx \n\t" + "cmpl 516(%%"REG_d"), %%ecx \n\t" " jb 1f \n\t" - "leal (%%eax, %2, 2), %%edx \n\t" // 5*stride - "leal (%%edx, %2, 2), %%ecx \n\t" // 7*stride + "lea (%%"REG_a", %2, 2), %%"REG_d" \n\t" // 5*stride + "lea (%%"REG_d", %2, 2), %%"REG_c" \n\t" // 7*stride "movq (%0), %%mm0 \n\t" // L0 "movq (%0, %2), %%mm1 \n\t" // L1 "movq (%0, %2, 2), %%mm2 \n\t" // L2 - "movq (%0, %%eax), %%mm3 \n\t" // L3 + "movq (%0, %%"REG_a"), %%mm3 \n\t" // L3 "movq (%0, %2, 4), %%mm4 \n\t" // L4 - "movq (%0, %%edx), %%mm5 \n\t" // L5 - "movq (%0, %%eax, 2), %%mm6 \n\t" // L6 - "movq (%0, %%ecx), %%mm7 \n\t" // L7 + "movq (%0, %%"REG_d"), %%mm5 \n\t" // L5 + "movq (%0, %%"REG_a", 2), %%mm6 \n\t" // L6 + "movq (%0, %%"REG_c"), %%mm7 \n\t" // L7 "movq %%mm0, (%1) \n\t" // L0 "movq %%mm1, (%1, %2) \n\t" // L1 "movq %%mm2, (%1, %2, 2) \n\t" // L2 - "movq %%mm3, (%1, %%eax) \n\t" // L3 + "movq %%mm3, (%1, %%"REG_a") \n\t" // L3 "movq %%mm4, (%1, %2, 4) \n\t" // L4 - "movq %%mm5, (%1, %%edx) \n\t" // L5 - "movq %%mm6, (%1, %%eax, 2) \n\t" // L6 - "movq %%mm7, (%1, %%ecx) \n\t" // L7 + "movq %%mm5, (%1, %%"REG_d") \n\t" // L5 + "movq %%mm6, (%1, %%"REG_a", 2) \n\t" // L6 + "movq %%mm7, (%1, %%"REG_c") \n\t" // L7 "jmp 4f \n\t" "1: \n\t" - "leal (%%eax, %2, 2), %%edx \n\t" // 5*stride - "leal (%%edx, %2, 2), %%ecx \n\t" // 7*stride + "lea (%%"REG_a", %2, 2), %%"REG_d" \n\t" // 5*stride + "lea (%%"REG_d", %2, 2), %%"REG_c" \n\t" // 7*stride "movq (%0), %%mm0 \n\t" // L0 PAVGB((%1), %%mm0) // L0 "movq (%0, %2), %%mm1 \n\t" // L1 PAVGB((%1, %2), %%mm1) // L1 "movq (%0, %2, 2), %%mm2 \n\t" // L2 PAVGB((%1, %2, 2), %%mm2) // L2 - "movq (%0, %%eax), %%mm3 \n\t" // L3 - PAVGB((%1, %%eax), %%mm3) // L3 + "movq (%0, %%"REG_a"), %%mm3 \n\t" // L3 + PAVGB((%1, %%REGa), %%mm3) // L3 "movq (%0, %2, 4), %%mm4 \n\t" // L4 PAVGB((%1, %2, 4), %%mm4) // L4 - "movq (%0, %%edx), %%mm5 \n\t" // L5 - PAVGB((%1, %%edx), %%mm5) // L5 - "movq (%0, %%eax, 2), %%mm6 \n\t" // L6 - PAVGB((%1, %%eax, 2), %%mm6) // L6 - "movq (%0, %%ecx), %%mm7 \n\t" // L7 - PAVGB((%1, %%ecx), %%mm7) // L7 + "movq (%0, %%"REG_d"), %%mm5 \n\t" // L5 + PAVGB((%1, %%REGd), %%mm5) // L5 + "movq (%0, %%"REG_a", 2), %%mm6 \n\t" // L6 + PAVGB((%1, %%REGa, 2), %%mm6) // L6 + "movq (%0, %%"REG_c"), %%mm7 \n\t" // L7 + PAVGB((%1, %%REGc), %%mm7) // L7 "movq %%mm0, (%1) \n\t" // R0 "movq %%mm1, (%1, %2) \n\t" // R1 "movq %%mm2, (%1, %2, 2) \n\t" // R2 - "movq %%mm3, (%1, %%eax) \n\t" // R3 + "movq %%mm3, (%1, %%"REG_a") \n\t" // R3 "movq %%mm4, (%1, %2, 4) \n\t" // R4 - "movq %%mm5, (%1, %%edx) \n\t" // R5 - "movq %%mm6, (%1, %%eax, 2) \n\t" // R6 - "movq %%mm7, (%1, %%ecx) \n\t" // R7 + "movq %%mm5, (%1, %%"REG_d") \n\t" // R5 + "movq %%mm6, (%1, %%"REG_a", 2) \n\t" // R6 + "movq %%mm7, (%1, %%"REG_c") \n\t" // R7 "movq %%mm0, (%0) \n\t" // L0 "movq %%mm1, (%0, %2) \n\t" // L1 "movq %%mm2, (%0, %2, 2) \n\t" // L2 - "movq %%mm3, (%0, %%eax) \n\t" // L3 + "movq %%mm3, (%0, %%"REG_a") \n\t" // L3 "movq %%mm4, (%0, %2, 4) \n\t" // L4 - "movq %%mm5, (%0, %%edx) \n\t" // L5 - "movq %%mm6, (%0, %%eax, 2) \n\t" // L6 - "movq %%mm7, (%0, %%ecx) \n\t" // L7 + "movq %%mm5, (%0, %%"REG_d") \n\t" // L5 + "movq %%mm6, (%0, %%"REG_a", 2) \n\t" // L6 + "movq %%mm7, (%0, %%"REG_c") \n\t" // L7 "jmp 4f \n\t" "2: \n\t" - "cmpl 508(%%edx), %%ecx \n\t" + "cmpl 508(%%"REG_d"), %%ecx \n\t" " jb 3f \n\t" - "leal (%%eax, %2, 2), %%edx \n\t" // 5*stride - "leal (%%edx, %2, 2), %%ecx \n\t" // 7*stride + "lea (%%"REG_a", %2, 2), %%"REG_d" \n\t" // 5*stride + "lea (%%"REG_d", %2, 2), %%"REG_c" \n\t" // 7*stride "movq (%0), %%mm0 \n\t" // L0 "movq (%0, %2), %%mm1 \n\t" // L1 "movq (%0, %2, 2), %%mm2 \n\t" // L2 - "movq (%0, %%eax), %%mm3 \n\t" // L3 + "movq (%0, %%"REG_a"), %%mm3 \n\t" // L3 "movq (%1), %%mm4 \n\t" // R0 "movq (%1, %2), %%mm5 \n\t" // R1 "movq (%1, %2, 2), %%mm6 \n\t" // R2 - "movq (%1, %%eax), %%mm7 \n\t" // R3 + "movq (%1, %%"REG_a"), %%mm7 \n\t" // R3 PAVGB(%%mm4, %%mm0) PAVGB(%%mm5, %%mm1) PAVGB(%%mm6, %%mm2) @@ -2404,20 +2435,20 @@ L2_DIFF_CORE((%0, %%ecx), (%1, %%ecx)) "movq %%mm0, (%1) \n\t" // R0 "movq %%mm1, (%1, %2) \n\t" // R1 "movq %%mm2, (%1, %2, 2) \n\t" // R2 - "movq %%mm3, (%1, %%eax) \n\t" // R3 + "movq %%mm3, (%1, %%"REG_a") \n\t" // R3 "movq %%mm0, (%0) \n\t" // L0 "movq %%mm1, (%0, %2) \n\t" // L1 "movq %%mm2, (%0, %2, 2) \n\t" // L2 - "movq %%mm3, (%0, %%eax) \n\t" // L3 + "movq %%mm3, (%0, %%"REG_a") \n\t" // L3 "movq (%0, %2, 4), %%mm0 \n\t" // L4 - "movq (%0, %%edx), %%mm1 \n\t" // L5 - "movq (%0, %%eax, 2), %%mm2 \n\t" // L6 - "movq (%0, %%ecx), %%mm3 \n\t" // L7 + "movq (%0, %%"REG_d"), %%mm1 \n\t" // L5 + "movq (%0, %%"REG_a", 2), %%mm2 \n\t" // L6 + "movq (%0, %%"REG_c"), %%mm3 \n\t" // L7 "movq (%1, %2, 4), %%mm4 \n\t" // R4 - "movq (%1, %%edx), %%mm5 \n\t" // R5 - "movq (%1, %%eax, 2), %%mm6 \n\t" // R6 - "movq (%1, %%ecx), %%mm7 \n\t" // R7 + "movq (%1, %%"REG_d"), %%mm5 \n\t" // R5 + "movq (%1, %%"REG_a", 2), %%mm6 \n\t" // R6 + "movq (%1, %%"REG_c"), %%mm7 \n\t" // R7 PAVGB(%%mm4, %%mm0) PAVGB(%%mm5, %%mm1) PAVGB(%%mm6, %%mm2) @@ -2427,26 +2458,26 @@ L2_DIFF_CORE((%0, %%ecx), (%1, %%ecx)) PAVGB(%%mm6, %%mm2) PAVGB(%%mm7, %%mm3) "movq %%mm0, (%1, %2, 4) \n\t" // R4 - "movq %%mm1, (%1, %%edx) \n\t" // R5 - "movq %%mm2, (%1, %%eax, 2) \n\t" // R6 - "movq %%mm3, (%1, %%ecx) \n\t" // R7 + "movq %%mm1, (%1, %%"REG_d") \n\t" // R5 + "movq %%mm2, (%1, %%"REG_a", 2) \n\t" // R6 + "movq %%mm3, (%1, %%"REG_c") \n\t" // R7 "movq %%mm0, (%0, %2, 4) \n\t" // L4 - "movq %%mm1, (%0, %%edx) \n\t" // L5 - "movq %%mm2, (%0, %%eax, 2) \n\t" // L6 - "movq %%mm3, (%0, %%ecx) \n\t" // L7 + "movq %%mm1, (%0, %%"REG_d") \n\t" // L5 + "movq %%mm2, (%0, %%"REG_a", 2) \n\t" // L6 + "movq %%mm3, (%0, %%"REG_c") \n\t" // L7 "jmp 4f \n\t" "3: \n\t" - "leal (%%eax, %2, 2), %%edx \n\t" // 5*stride - "leal (%%edx, %2, 2), %%ecx \n\t" // 7*stride + "lea (%%"REG_a", %2, 2), %%"REG_d" \n\t" // 5*stride + "lea (%%"REG_d", %2, 2), %%"REG_c" \n\t" // 7*stride "movq (%0), %%mm0 \n\t" // L0 "movq (%0, %2), %%mm1 \n\t" // L1 "movq (%0, %2, 2), %%mm2 \n\t" // L2 - "movq (%0, %%eax), %%mm3 \n\t" // L3 + "movq (%0, %%"REG_a"), %%mm3 \n\t" // L3 "movq (%1), %%mm4 \n\t" // R0 "movq (%1, %2), %%mm5 \n\t" // R1 "movq (%1, %2, 2), %%mm6 \n\t" // R2 - "movq (%1, %%eax), %%mm7 \n\t" // R3 + "movq (%1, %%"REG_a"), %%mm7 \n\t" // R3 PAVGB(%%mm4, %%mm0) PAVGB(%%mm5, %%mm1) PAVGB(%%mm6, %%mm2) @@ -2462,20 +2493,20 @@ L2_DIFF_CORE((%0, %%ecx), (%1, %%ecx)) "movq %%mm0, (%1) \n\t" // R0 "movq %%mm1, (%1, %2) \n\t" // R1 "movq %%mm2, (%1, %2, 2) \n\t" // R2 - "movq %%mm3, (%1, %%eax) \n\t" // R3 + "movq %%mm3, (%1, %%"REG_a") \n\t" // R3 "movq %%mm0, (%0) \n\t" // L0 "movq %%mm1, (%0, %2) \n\t" // L1 "movq %%mm2, (%0, %2, 2) \n\t" // L2 - "movq %%mm3, (%0, %%eax) \n\t" // L3 + "movq %%mm3, (%0, %%"REG_a") \n\t" // L3 "movq (%0, %2, 4), %%mm0 \n\t" // L4 - "movq (%0, %%edx), %%mm1 \n\t" // L5 - "movq (%0, %%eax, 2), %%mm2 \n\t" // L6 - "movq (%0, %%ecx), %%mm3 \n\t" // L7 + "movq (%0, %%"REG_d"), %%mm1 \n\t" // L5 + "movq (%0, %%"REG_a", 2), %%mm2 \n\t" // L6 + "movq (%0, %%"REG_c"), %%mm3 \n\t" // L7 "movq (%1, %2, 4), %%mm4 \n\t" // R4 - "movq (%1, %%edx), %%mm5 \n\t" // R5 - "movq (%1, %%eax, 2), %%mm6 \n\t" // R6 - "movq (%1, %%ecx), %%mm7 \n\t" // R7 + "movq (%1, %%"REG_d"), %%mm5 \n\t" // R5 + "movq (%1, %%"REG_a", 2), %%mm6 \n\t" // R6 + "movq (%1, %%"REG_c"), %%mm7 \n\t" // R7 PAVGB(%%mm4, %%mm0) PAVGB(%%mm5, %%mm1) PAVGB(%%mm6, %%mm2) @@ -2489,25 +2520,25 @@ L2_DIFF_CORE((%0, %%ecx), (%1, %%ecx)) PAVGB(%%mm6, %%mm2) PAVGB(%%mm7, %%mm3) "movq %%mm0, (%1, %2, 4) \n\t" // R4 - "movq %%mm1, (%1, %%edx) \n\t" // R5 - "movq %%mm2, (%1, %%eax, 2) \n\t" // R6 - "movq %%mm3, (%1, %%ecx) \n\t" // R7 + "movq %%mm1, (%1, %%"REG_d") \n\t" // R5 + "movq %%mm2, (%1, %%"REG_a", 2) \n\t" // R6 + "movq %%mm3, (%1, %%"REG_c") \n\t" // R7 "movq %%mm0, (%0, %2, 4) \n\t" // L4 - "movq %%mm1, (%0, %%edx) \n\t" // L5 - "movq %%mm2, (%0, %%eax, 2) \n\t" // L6 - "movq %%mm3, (%0, %%ecx) \n\t" // L7 + "movq %%mm1, (%0, %%"REG_d") \n\t" // L5 + "movq %%mm2, (%0, %%"REG_a", 2) \n\t" // L6 + "movq %%mm3, (%0, %%"REG_c") \n\t" // L7 "4: \n\t" - :: "r" (src), "r" (tempBlured), "r"(stride), "m" (tempBluredPast) - : "%eax", "%edx", "%ecx", "memory" + :: "r" (src), "r" (tempBlured), "r"((long)stride), "m" (tempBluredPast) + : "%"REG_a, "%"REG_d, "%"REG_c, "memory" ); //printf("%d\n", test); #else { int y; int d=0; - int sysd=0; +// int sysd=0; int i; for(y=0; y<8; y++) @@ -2522,7 +2553,7 @@ L2_DIFF_CORE((%0, %%ecx), (%1, %%ecx)) // if(y==0 || y==7) d1+= d1>>1; // d+= ABS(d1); d+= d1*d1; - sysd+= d1; +// sysd+= d1; } } i=d; @@ -2608,6 +2639,543 @@ Switch between } #endif } +#endif //HAVE_ALTIVEC + +#ifdef HAVE_MMX +/** + * accurate deblock filter + */ +static always_inline void RENAME(do_a_deblock)(uint8_t *src, int step, int stride, PPContext *c){ + int64_t dc_mask, eq_mask; + int64_t sums[10*8*2]; + src+= step*3; // src points to begin of the 8x8 Block +//START_TIMER +asm volatile( + "movq %0, %%mm7 \n\t" + "movq %1, %%mm6 \n\t" + : : "m" (c->mmxDcOffset[c->nonBQP]), "m" (c->mmxDcThreshold[c->nonBQP]) + ); + +asm volatile( + "lea (%2, %3), %%"REG_a" \n\t" +// 0 1 2 3 4 5 6 7 8 9 +// %1 eax eax+%2 eax+2%2 %1+4%2 ecx ecx+%2 ecx+2%2 %1+8%2 ecx+4%2 + + "movq (%2), %%mm0 \n\t" + "movq (%%"REG_a"), %%mm1 \n\t" + "movq %%mm1, %%mm3 \n\t" + "movq %%mm1, %%mm4 \n\t" + "psubb %%mm1, %%mm0 \n\t" // mm0 = differnece + "paddb %%mm7, %%mm0 \n\t" + "pcmpgtb %%mm6, %%mm0 \n\t" + + "movq (%%"REG_a",%3), %%mm2 \n\t" + PMAXUB(%%mm2, %%mm4) + PMINUB(%%mm2, %%mm3, %%mm5) + "psubb %%mm2, %%mm1 \n\t" + "paddb %%mm7, %%mm1 \n\t" + "pcmpgtb %%mm6, %%mm1 \n\t" + "paddb %%mm1, %%mm0 \n\t" + + "movq (%%"REG_a", %3, 2), %%mm1 \n\t" + PMAXUB(%%mm1, %%mm4) + PMINUB(%%mm1, %%mm3, %%mm5) + "psubb %%mm1, %%mm2 \n\t" + "paddb %%mm7, %%mm2 \n\t" + "pcmpgtb %%mm6, %%mm2 \n\t" + "paddb %%mm2, %%mm0 \n\t" + + "lea (%%"REG_a", %3, 4), %%"REG_a" \n\t" + + "movq (%2, %3, 4), %%mm2 \n\t" + PMAXUB(%%mm2, %%mm4) + PMINUB(%%mm2, %%mm3, %%mm5) + "psubb %%mm2, %%mm1 \n\t" + "paddb %%mm7, %%mm1 \n\t" + "pcmpgtb %%mm6, %%mm1 \n\t" + "paddb %%mm1, %%mm0 \n\t" + + "movq (%%"REG_a"), %%mm1 \n\t" + PMAXUB(%%mm1, %%mm4) + PMINUB(%%mm1, %%mm3, %%mm5) + "psubb %%mm1, %%mm2 \n\t" + "paddb %%mm7, %%mm2 \n\t" + "pcmpgtb %%mm6, %%mm2 \n\t" + "paddb %%mm2, %%mm0 \n\t" + + "movq (%%"REG_a", %3), %%mm2 \n\t" + PMAXUB(%%mm2, %%mm4) + PMINUB(%%mm2, %%mm3, %%mm5) + "psubb %%mm2, %%mm1 \n\t" + "paddb %%mm7, %%mm1 \n\t" + "pcmpgtb %%mm6, %%mm1 \n\t" + "paddb %%mm1, %%mm0 \n\t" + + "movq (%%"REG_a", %3, 2), %%mm1 \n\t" + PMAXUB(%%mm1, %%mm4) + PMINUB(%%mm1, %%mm3, %%mm5) + "psubb %%mm1, %%mm2 \n\t" + "paddb %%mm7, %%mm2 \n\t" + "pcmpgtb %%mm6, %%mm2 \n\t" + "paddb %%mm2, %%mm0 \n\t" + + "movq (%2, %3, 8), %%mm2 \n\t" + PMAXUB(%%mm2, %%mm4) + PMINUB(%%mm2, %%mm3, %%mm5) + "psubb %%mm2, %%mm1 \n\t" + "paddb %%mm7, %%mm1 \n\t" + "pcmpgtb %%mm6, %%mm1 \n\t" + "paddb %%mm1, %%mm0 \n\t" + + "movq (%%"REG_a", %3, 4), %%mm1 \n\t" + "psubb %%mm1, %%mm2 \n\t" + "paddb %%mm7, %%mm2 \n\t" + "pcmpgtb %%mm6, %%mm2 \n\t" + "paddb %%mm2, %%mm0 \n\t" + "psubusb %%mm3, %%mm4 \n\t" + + "pxor %%mm6, %%mm6 \n\t" + "movq %4, %%mm7 \n\t" // QP,..., QP + "paddusb %%mm7, %%mm7 \n\t" // 2QP ... 2QP + "psubusb %%mm4, %%mm7 \n\t" // Diff >=2QP -> 0 + "pcmpeqb %%mm6, %%mm7 \n\t" // Diff < 2QP -> 0 + "pcmpeqb %%mm6, %%mm7 \n\t" // Diff < 2QP -> 0 + "movq %%mm7, %1 \n\t" + + "movq %5, %%mm7 \n\t" + "punpcklbw %%mm7, %%mm7 \n\t" + "punpcklbw %%mm7, %%mm7 \n\t" + "punpcklbw %%mm7, %%mm7 \n\t" + "psubb %%mm0, %%mm6 \n\t" + "pcmpgtb %%mm7, %%mm6 \n\t" + "movq %%mm6, %0 \n\t" + + : "=m" (eq_mask), "=m" (dc_mask) + : "r" (src), "r" ((long)step), "m" (c->pQPb), "m"(c->ppMode.flatnessThreshold) + : "%"REG_a + ); + + if(dc_mask & eq_mask){ + long offset= -8*step; + int64_t *temp_sums= sums; + + asm volatile( + "movq %2, %%mm0 \n\t" // QP,..., QP + "pxor %%mm4, %%mm4 \n\t" + + "movq (%0), %%mm6 \n\t" + "movq (%0, %1), %%mm5 \n\t" + "movq %%mm5, %%mm1 \n\t" + "movq %%mm6, %%mm2 \n\t" + "psubusb %%mm6, %%mm5 \n\t" + "psubusb %%mm1, %%mm2 \n\t" + "por %%mm5, %%mm2 \n\t" // ABS Diff of lines + "psubusb %%mm2, %%mm0 \n\t" // diff >= QP -> 0 + "pcmpeqb %%mm4, %%mm0 \n\t" // diff >= QP -> FF + + "pxor %%mm6, %%mm1 \n\t" + "pand %%mm0, %%mm1 \n\t" + "pxor %%mm1, %%mm6 \n\t" + // 0:QP 6:First + + "movq (%0, %1, 8), %%mm5 \n\t" + "add %1, %0 \n\t" // %0 points to line 1 not 0 + "movq (%0, %1, 8), %%mm7 \n\t" + "movq %%mm5, %%mm1 \n\t" + "movq %%mm7, %%mm2 \n\t" + "psubusb %%mm7, %%mm5 \n\t" + "psubusb %%mm1, %%mm2 \n\t" + "por %%mm5, %%mm2 \n\t" // ABS Diff of lines + "movq %2, %%mm0 \n\t" // QP,..., QP + "psubusb %%mm2, %%mm0 \n\t" // diff >= QP -> 0 + "pcmpeqb %%mm4, %%mm0 \n\t" // diff >= QP -> FF + + "pxor %%mm7, %%mm1 \n\t" + "pand %%mm0, %%mm1 \n\t" + "pxor %%mm1, %%mm7 \n\t" + + "movq %%mm6, %%mm5 \n\t" + "punpckhbw %%mm4, %%mm6 \n\t" + "punpcklbw %%mm4, %%mm5 \n\t" + // 4:0 5/6:First 7:Last + + "movq %%mm5, %%mm0 \n\t" + "movq %%mm6, %%mm1 \n\t" + "psllw $2, %%mm0 \n\t" + "psllw $2, %%mm1 \n\t" + "paddw "MANGLE(w04)", %%mm0 \n\t" + "paddw "MANGLE(w04)", %%mm1 \n\t" + +#define NEXT\ + "movq (%0), %%mm2 \n\t"\ + "movq (%0), %%mm3 \n\t"\ + "add %1, %0 \n\t"\ + "punpcklbw %%mm4, %%mm2 \n\t"\ + "punpckhbw %%mm4, %%mm3 \n\t"\ + "paddw %%mm2, %%mm0 \n\t"\ + "paddw %%mm3, %%mm1 \n\t" + +#define PREV\ + "movq (%0), %%mm2 \n\t"\ + "movq (%0), %%mm3 \n\t"\ + "add %1, %0 \n\t"\ + "punpcklbw %%mm4, %%mm2 \n\t"\ + "punpckhbw %%mm4, %%mm3 \n\t"\ + "psubw %%mm2, %%mm0 \n\t"\ + "psubw %%mm3, %%mm1 \n\t" + + + NEXT //0 + NEXT //1 + NEXT //2 + "movq %%mm0, (%3) \n\t" + "movq %%mm1, 8(%3) \n\t" + + NEXT //3 + "psubw %%mm5, %%mm0 \n\t" + "psubw %%mm6, %%mm1 \n\t" + "movq %%mm0, 16(%3) \n\t" + "movq %%mm1, 24(%3) \n\t" + + NEXT //4 + "psubw %%mm5, %%mm0 \n\t" + "psubw %%mm6, %%mm1 \n\t" + "movq %%mm0, 32(%3) \n\t" + "movq %%mm1, 40(%3) \n\t" + + NEXT //5 + "psubw %%mm5, %%mm0 \n\t" + "psubw %%mm6, %%mm1 \n\t" + "movq %%mm0, 48(%3) \n\t" + "movq %%mm1, 56(%3) \n\t" + + NEXT //6 + "psubw %%mm5, %%mm0 \n\t" + "psubw %%mm6, %%mm1 \n\t" + "movq %%mm0, 64(%3) \n\t" + "movq %%mm1, 72(%3) \n\t" + + "movq %%mm7, %%mm6 \n\t" + "punpckhbw %%mm4, %%mm7 \n\t" + "punpcklbw %%mm4, %%mm6 \n\t" + + NEXT //7 + "mov %4, %0 \n\t" + "add %1, %0 \n\t" + PREV //0 + "movq %%mm0, 80(%3) \n\t" + "movq %%mm1, 88(%3) \n\t" + + PREV //1 + "paddw %%mm6, %%mm0 \n\t" + "paddw %%mm7, %%mm1 \n\t" + "movq %%mm0, 96(%3) \n\t" + "movq %%mm1, 104(%3) \n\t" + + PREV //2 + "paddw %%mm6, %%mm0 \n\t" + "paddw %%mm7, %%mm1 \n\t" + "movq %%mm0, 112(%3) \n\t" + "movq %%mm1, 120(%3) \n\t" + + PREV //3 + "paddw %%mm6, %%mm0 \n\t" + "paddw %%mm7, %%mm1 \n\t" + "movq %%mm0, 128(%3) \n\t" + "movq %%mm1, 136(%3) \n\t" + + PREV //4 + "paddw %%mm6, %%mm0 \n\t" + "paddw %%mm7, %%mm1 \n\t" + "movq %%mm0, 144(%3) \n\t" + "movq %%mm1, 152(%3) \n\t" + + "mov %4, %0 \n\t" //FIXME + + : "+&r"(src) + : "r" ((long)step), "m" (c->pQPb), "r"(sums), "g"(src) + ); + + src+= step; // src points to begin of the 8x8 Block + + asm volatile( + "movq %4, %%mm6 \n\t" + "pcmpeqb %%mm5, %%mm5 \n\t" + "pxor %%mm6, %%mm5 \n\t" + "pxor %%mm7, %%mm7 \n\t" + + "1: \n\t" + "movq (%1), %%mm0 \n\t" + "movq 8(%1), %%mm1 \n\t" + "paddw 32(%1), %%mm0 \n\t" + "paddw 40(%1), %%mm1 \n\t" + "movq (%0, %3), %%mm2 \n\t" + "movq %%mm2, %%mm3 \n\t" + "movq %%mm2, %%mm4 \n\t" + "punpcklbw %%mm7, %%mm2 \n\t" + "punpckhbw %%mm7, %%mm3 \n\t" + "paddw %%mm2, %%mm0 \n\t" + "paddw %%mm3, %%mm1 \n\t" + "paddw %%mm2, %%mm0 \n\t" + "paddw %%mm3, %%mm1 \n\t" + "psrlw $4, %%mm0 \n\t" + "psrlw $4, %%mm1 \n\t" + "packuswb %%mm1, %%mm0 \n\t" + "pand %%mm6, %%mm0 \n\t" + "pand %%mm5, %%mm4 \n\t" + "por %%mm4, %%mm0 \n\t" + "movq %%mm0, (%0, %3) \n\t" + "add $16, %1 \n\t" + "add %2, %0 \n\t" + " js 1b \n\t" + + : "+r"(offset), "+r"(temp_sums) + : "r" ((long)step), "r"(src - offset), "m"(dc_mask & eq_mask) + ); + }else + src+= step; // src points to begin of the 8x8 Block + + if(eq_mask != -1LL){ + uint8_t *temp_src= src; + asm volatile( + "pxor %%mm7, %%mm7 \n\t" + "lea -40(%%"REG_SP"), %%"REG_c" \n\t" // make space for 4 8-byte vars + "and "ALIGN_MASK", %%"REG_c" \n\t" // align +// 0 1 2 3 4 5 6 7 8 9 +// %0 eax eax+%1 eax+2%1 %0+4%1 ecx ecx+%1 ecx+2%1 %1+8%1 ecx+4%1 + + "movq (%0), %%mm0 \n\t" + "movq %%mm0, %%mm1 \n\t" + "punpcklbw %%mm7, %%mm0 \n\t" // low part of line 0 + "punpckhbw %%mm7, %%mm1 \n\t" // high part of line 0 + + "movq (%0, %1), %%mm2 \n\t" + "lea (%0, %1, 2), %%"REG_a" \n\t" + "movq %%mm2, %%mm3 \n\t" + "punpcklbw %%mm7, %%mm2 \n\t" // low part of line 1 + "punpckhbw %%mm7, %%mm3 \n\t" // high part of line 1 + + "movq (%%"REG_a"), %%mm4 \n\t" + "movq %%mm4, %%mm5 \n\t" + "punpcklbw %%mm7, %%mm4 \n\t" // low part of line 2 + "punpckhbw %%mm7, %%mm5 \n\t" // high part of line 2 + + "paddw %%mm0, %%mm0 \n\t" // 2L0 + "paddw %%mm1, %%mm1 \n\t" // 2H0 + "psubw %%mm4, %%mm2 \n\t" // L1 - L2 + "psubw %%mm5, %%mm3 \n\t" // H1 - H2 + "psubw %%mm2, %%mm0 \n\t" // 2L0 - L1 + L2 + "psubw %%mm3, %%mm1 \n\t" // 2H0 - H1 + H2 + + "psllw $2, %%mm2 \n\t" // 4L1 - 4L2 + "psllw $2, %%mm3 \n\t" // 4H1 - 4H2 + "psubw %%mm2, %%mm0 \n\t" // 2L0 - 5L1 + 5L2 + "psubw %%mm3, %%mm1 \n\t" // 2H0 - 5H1 + 5H2 + + "movq (%%"REG_a", %1), %%mm2 \n\t" + "movq %%mm2, %%mm3 \n\t" + "punpcklbw %%mm7, %%mm2 \n\t" // L3 + "punpckhbw %%mm7, %%mm3 \n\t" // H3 + + "psubw %%mm2, %%mm0 \n\t" // 2L0 - 5L1 + 5L2 - L3 + "psubw %%mm3, %%mm1 \n\t" // 2H0 - 5H1 + 5H2 - H3 + "psubw %%mm2, %%mm0 \n\t" // 2L0 - 5L1 + 5L2 - 2L3 + "psubw %%mm3, %%mm1 \n\t" // 2H0 - 5H1 + 5H2 - 2H3 + "movq %%mm0, (%%"REG_c") \n\t" // 2L0 - 5L1 + 5L2 - 2L3 + "movq %%mm1, 8(%%"REG_c") \n\t" // 2H0 - 5H1 + 5H2 - 2H3 + + "movq (%%"REG_a", %1, 2), %%mm0 \n\t" + "movq %%mm0, %%mm1 \n\t" + "punpcklbw %%mm7, %%mm0 \n\t" // L4 + "punpckhbw %%mm7, %%mm1 \n\t" // H4 + + "psubw %%mm0, %%mm2 \n\t" // L3 - L4 + "psubw %%mm1, %%mm3 \n\t" // H3 - H4 + "movq %%mm2, 16(%%"REG_c") \n\t" // L3 - L4 + "movq %%mm3, 24(%%"REG_c") \n\t" // H3 - H4 + "paddw %%mm4, %%mm4 \n\t" // 2L2 + "paddw %%mm5, %%mm5 \n\t" // 2H2 + "psubw %%mm2, %%mm4 \n\t" // 2L2 - L3 + L4 + "psubw %%mm3, %%mm5 \n\t" // 2H2 - H3 + H4 + + "lea (%%"REG_a", %1), %0 \n\t" + "psllw $2, %%mm2 \n\t" // 4L3 - 4L4 + "psllw $2, %%mm3 \n\t" // 4H3 - 4H4 + "psubw %%mm2, %%mm4 \n\t" // 2L2 - 5L3 + 5L4 + "psubw %%mm3, %%mm5 \n\t" // 2H2 - 5H3 + 5H4 +//50 opcodes so far + "movq (%0, %1, 2), %%mm2 \n\t" + "movq %%mm2, %%mm3 \n\t" + "punpcklbw %%mm7, %%mm2 \n\t" // L5 + "punpckhbw %%mm7, %%mm3 \n\t" // H5 + "psubw %%mm2, %%mm4 \n\t" // 2L2 - 5L3 + 5L4 - L5 + "psubw %%mm3, %%mm5 \n\t" // 2H2 - 5H3 + 5H4 - H5 + "psubw %%mm2, %%mm4 \n\t" // 2L2 - 5L3 + 5L4 - 2L5 + "psubw %%mm3, %%mm5 \n\t" // 2H2 - 5H3 + 5H4 - 2H5 + + "movq (%%"REG_a", %1, 4), %%mm6 \n\t" + "punpcklbw %%mm7, %%mm6 \n\t" // L6 + "psubw %%mm6, %%mm2 \n\t" // L5 - L6 + "movq (%%"REG_a", %1, 4), %%mm6 \n\t" + "punpckhbw %%mm7, %%mm6 \n\t" // H6 + "psubw %%mm6, %%mm3 \n\t" // H5 - H6 + + "paddw %%mm0, %%mm0 \n\t" // 2L4 + "paddw %%mm1, %%mm1 \n\t" // 2H4 + "psubw %%mm2, %%mm0 \n\t" // 2L4 - L5 + L6 + "psubw %%mm3, %%mm1 \n\t" // 2H4 - H5 + H6 + + "psllw $2, %%mm2 \n\t" // 4L5 - 4L6 + "psllw $2, %%mm3 \n\t" // 4H5 - 4H6 + "psubw %%mm2, %%mm0 \n\t" // 2L4 - 5L5 + 5L6 + "psubw %%mm3, %%mm1 \n\t" // 2H4 - 5H5 + 5H6 + + "movq (%0, %1, 4), %%mm2 \n\t" + "movq %%mm2, %%mm3 \n\t" + "punpcklbw %%mm7, %%mm2 \n\t" // L7 + "punpckhbw %%mm7, %%mm3 \n\t" // H7 + + "paddw %%mm2, %%mm2 \n\t" // 2L7 + "paddw %%mm3, %%mm3 \n\t" // 2H7 + "psubw %%mm2, %%mm0 \n\t" // 2L4 - 5L5 + 5L6 - 2L7 + "psubw %%mm3, %%mm1 \n\t" // 2H4 - 5H5 + 5H6 - 2H7 + + "movq (%%"REG_c"), %%mm2 \n\t" // 2L0 - 5L1 + 5L2 - 2L3 + "movq 8(%%"REG_c"), %%mm3 \n\t" // 2H0 - 5H1 + 5H2 - 2H3 + +#ifdef HAVE_MMX2 + "movq %%mm7, %%mm6 \n\t" // 0 + "psubw %%mm0, %%mm6 \n\t" + "pmaxsw %%mm6, %%mm0 \n\t" // |2L4 - 5L5 + 5L6 - 2L7| + "movq %%mm7, %%mm6 \n\t" // 0 + "psubw %%mm1, %%mm6 \n\t" + "pmaxsw %%mm6, %%mm1 \n\t" // |2H4 - 5H5 + 5H6 - 2H7| + "movq %%mm7, %%mm6 \n\t" // 0 + "psubw %%mm2, %%mm6 \n\t" + "pmaxsw %%mm6, %%mm2 \n\t" // |2L0 - 5L1 + 5L2 - 2L3| + "movq %%mm7, %%mm6 \n\t" // 0 + "psubw %%mm3, %%mm6 \n\t" + "pmaxsw %%mm6, %%mm3 \n\t" // |2H0 - 5H1 + 5H2 - 2H3| +#else + "movq %%mm7, %%mm6 \n\t" // 0 + "pcmpgtw %%mm0, %%mm6 \n\t" + "pxor %%mm6, %%mm0 \n\t" + "psubw %%mm6, %%mm0 \n\t" // |2L4 - 5L5 + 5L6 - 2L7| + "movq %%mm7, %%mm6 \n\t" // 0 + "pcmpgtw %%mm1, %%mm6 \n\t" + "pxor %%mm6, %%mm1 \n\t" + "psubw %%mm6, %%mm1 \n\t" // |2H4 - 5H5 + 5H6 - 2H7| + "movq %%mm7, %%mm6 \n\t" // 0 + "pcmpgtw %%mm2, %%mm6 \n\t" + "pxor %%mm6, %%mm2 \n\t" + "psubw %%mm6, %%mm2 \n\t" // |2L0 - 5L1 + 5L2 - 2L3| + "movq %%mm7, %%mm6 \n\t" // 0 + "pcmpgtw %%mm3, %%mm6 \n\t" + "pxor %%mm6, %%mm3 \n\t" + "psubw %%mm6, %%mm3 \n\t" // |2H0 - 5H1 + 5H2 - 2H3| +#endif + +#ifdef HAVE_MMX2 + "pminsw %%mm2, %%mm0 \n\t" + "pminsw %%mm3, %%mm1 \n\t" +#else + "movq %%mm0, %%mm6 \n\t" + "psubusw %%mm2, %%mm6 \n\t" + "psubw %%mm6, %%mm0 \n\t" + "movq %%mm1, %%mm6 \n\t" + "psubusw %%mm3, %%mm6 \n\t" + "psubw %%mm6, %%mm1 \n\t" +#endif + + "movd %2, %%mm2 \n\t" // QP + "punpcklbw %%mm7, %%mm2 \n\t" + + "movq %%mm7, %%mm6 \n\t" // 0 + "pcmpgtw %%mm4, %%mm6 \n\t" // sign(2L2 - 5L3 + 5L4 - 2L5) + "pxor %%mm6, %%mm4 \n\t" + "psubw %%mm6, %%mm4 \n\t" // |2L2 - 5L3 + 5L4 - 2L5| + "pcmpgtw %%mm5, %%mm7 \n\t" // sign(2H2 - 5H3 + 5H4 - 2H5) + "pxor %%mm7, %%mm5 \n\t" + "psubw %%mm7, %%mm5 \n\t" // |2H2 - 5H3 + 5H4 - 2H5| +// 100 opcodes + "psllw $3, %%mm2 \n\t" // 8QP + "movq %%mm2, %%mm3 \n\t" // 8QP + "pcmpgtw %%mm4, %%mm2 \n\t" + "pcmpgtw %%mm5, %%mm3 \n\t" + "pand %%mm2, %%mm4 \n\t" + "pand %%mm3, %%mm5 \n\t" + + + "psubusw %%mm0, %%mm4 \n\t" // hd + "psubusw %%mm1, %%mm5 \n\t" // ld + + + "movq "MANGLE(w05)", %%mm2 \n\t" // 5 + "pmullw %%mm2, %%mm4 \n\t" + "pmullw %%mm2, %%mm5 \n\t" + "movq "MANGLE(w20)", %%mm2 \n\t" // 32 + "paddw %%mm2, %%mm4 \n\t" + "paddw %%mm2, %%mm5 \n\t" + "psrlw $6, %%mm4 \n\t" + "psrlw $6, %%mm5 \n\t" + + "movq 16(%%"REG_c"), %%mm0 \n\t" // L3 - L4 + "movq 24(%%"REG_c"), %%mm1 \n\t" // H3 - H4 + + "pxor %%mm2, %%mm2 \n\t" + "pxor %%mm3, %%mm3 \n\t" + + "pcmpgtw %%mm0, %%mm2 \n\t" // sign (L3-L4) + "pcmpgtw %%mm1, %%mm3 \n\t" // sign (H3-H4) + "pxor %%mm2, %%mm0 \n\t" + "pxor %%mm3, %%mm1 \n\t" + "psubw %%mm2, %%mm0 \n\t" // |L3-L4| + "psubw %%mm3, %%mm1 \n\t" // |H3-H4| + "psrlw $1, %%mm0 \n\t" // |L3 - L4|/2 + "psrlw $1, %%mm1 \n\t" // |H3 - H4|/2 + + "pxor %%mm6, %%mm2 \n\t" + "pxor %%mm7, %%mm3 \n\t" + "pand %%mm2, %%mm4 \n\t" + "pand %%mm3, %%mm5 \n\t" + +#ifdef HAVE_MMX2 + "pminsw %%mm0, %%mm4 \n\t" + "pminsw %%mm1, %%mm5 \n\t" +#else + "movq %%mm4, %%mm2 \n\t" + "psubusw %%mm0, %%mm2 \n\t" + "psubw %%mm2, %%mm4 \n\t" + "movq %%mm5, %%mm2 \n\t" + "psubusw %%mm1, %%mm2 \n\t" + "psubw %%mm2, %%mm5 \n\t" +#endif + "pxor %%mm6, %%mm4 \n\t" + "pxor %%mm7, %%mm5 \n\t" + "psubw %%mm6, %%mm4 \n\t" + "psubw %%mm7, %%mm5 \n\t" + "packsswb %%mm5, %%mm4 \n\t" + "movq %3, %%mm1 \n\t" + "pandn %%mm4, %%mm1 \n\t" + "movq (%0), %%mm0 \n\t" + "paddb %%mm1, %%mm0 \n\t" + "movq %%mm0, (%0) \n\t" + "movq (%0, %1), %%mm0 \n\t" + "psubb %%mm1, %%mm0 \n\t" + "movq %%mm0, (%0, %1) \n\t" + + : "+r" (temp_src) + : "r" ((long)step), "m" (c->pQPb), "m"(eq_mask) + : "%"REG_a, "%"REG_c + ); + } +/*if(step==16){ + STOP_TIMER("step16") +}else{ + STOP_TIMER("stepX") +}*/ +} +#endif //HAVE_MMX static void RENAME(postProcess)(uint8_t src[], int srcStride, uint8_t dst[], int dstStride, int width, int height, QP_STORE_T QPs[], int QPStride, int isColor, PPContext *c); @@ -2628,13 +3196,13 @@ static inline void RENAME(blockCopy)(uint8_t dst[], int dstStride, uint8_t src[] { #ifdef HAVE_MMX asm volatile( - "movq (%%eax), %%mm2 \n\t" // packedYOffset - "movq 8(%%eax), %%mm3 \n\t" // packedYScale - "leal (%2,%4), %%eax \n\t" - "leal (%3,%5), %%edx \n\t" + "movq (%%"REG_a"), %%mm2 \n\t" // packedYOffset + "movq 8(%%"REG_a"), %%mm3 \n\t" // packedYScale + "lea (%2,%4), %%"REG_a" \n\t" + "lea (%3,%5), %%"REG_d" \n\t" "pxor %%mm4, %%mm4 \n\t" #ifdef HAVE_MMX2 -#define SCALED_CPY(src1, src2, dst1, dst2) \ +#define REAL_SCALED_CPY(src1, src2, dst1, dst2) \ "movq " #src1 ", %%mm0 \n\t"\ "movq " #src1 ", %%mm5 \n\t"\ "movq " #src2 ", %%mm1 \n\t"\ @@ -2657,7 +3225,7 @@ static inline void RENAME(blockCopy)(uint8_t dst[], int dstStride, uint8_t src[] "movq %%mm1, " #dst2 " \n\t"\ #else //HAVE_MMX2 -#define SCALED_CPY(src1, src2, dst1, dst2) \ +#define REAL_SCALED_CPY(src1, src2, dst1, dst2) \ "movq " #src1 ", %%mm0 \n\t"\ "movq " #src1 ", %%mm5 \n\t"\ "punpcklbw %%mm4, %%mm0 \n\t"\ @@ -2684,22 +3252,24 @@ static inline void RENAME(blockCopy)(uint8_t dst[], int dstStride, uint8_t src[] "movq %%mm1, " #dst2 " \n\t"\ #endif //!HAVE_MMX2 +#define SCALED_CPY(src1, src2, dst1, dst2)\ + REAL_SCALED_CPY(src1, src2, dst1, dst2) SCALED_CPY((%2) , (%2, %4) , (%3) , (%3, %5)) -SCALED_CPY((%2, %4, 2), (%%eax, %4, 2), (%3, %5, 2), (%%edx, %5, 2)) -SCALED_CPY((%2, %4, 4), (%%eax, %4, 4), (%3, %5, 4), (%%edx, %5, 4)) - "leal (%%eax,%4,4), %%eax \n\t" - "leal (%%edx,%5,4), %%edx \n\t" -SCALED_CPY((%%eax, %4), (%%eax, %4, 2), (%%edx, %5), (%%edx, %5, 2)) +SCALED_CPY((%2, %4, 2), (%%REGa, %4, 2), (%3, %5, 2), (%%REGd, %5, 2)) +SCALED_CPY((%2, %4, 4), (%%REGa, %4, 4), (%3, %5, 4), (%%REGd, %5, 4)) + "lea (%%"REG_a",%4,4), %%"REG_a" \n\t" + "lea (%%"REG_d",%5,4), %%"REG_d" \n\t" +SCALED_CPY((%%REGa, %4), (%%REGa, %4, 2), (%%REGd, %5), (%%REGd, %5, 2)) : "=&a" (packedOffsetAndScale) : "0" (packedOffsetAndScale), "r"(src), "r"(dst), - "r" (srcStride), - "r" (dstStride) - : "%edx" + "r" ((long)srcStride), + "r" ((long)dstStride) + : "%"REG_d ); #else for(i=0; i<8; i++) @@ -2711,27 +3281,30 @@ SCALED_CPY((%%eax, %4), (%%eax, %4, 2), (%%edx, %5), (%%edx, %5, 2)) { #ifdef HAVE_MMX asm volatile( - "leal (%0,%2), %%eax \n\t" - "leal (%1,%3), %%edx \n\t" + "lea (%0,%2), %%"REG_a" \n\t" + "lea (%1,%3), %%"REG_d" \n\t" -#define SIMPLE_CPY(src1, src2, dst1, dst2) \ +#define REAL_SIMPLE_CPY(src1, src2, dst1, dst2) \ "movq " #src1 ", %%mm0 \n\t"\ "movq " #src2 ", %%mm1 \n\t"\ "movq %%mm0, " #dst1 " \n\t"\ "movq %%mm1, " #dst2 " \n\t"\ +#define SIMPLE_CPY(src1, src2, dst1, dst2)\ + REAL_SIMPLE_CPY(src1, src2, dst1, dst2) + SIMPLE_CPY((%0) , (%0, %2) , (%1) , (%1, %3)) -SIMPLE_CPY((%0, %2, 2), (%%eax, %2, 2), (%1, %3, 2), (%%edx, %3, 2)) -SIMPLE_CPY((%0, %2, 4), (%%eax, %2, 4), (%1, %3, 4), (%%edx, %3, 4)) - "leal (%%eax,%2,4), %%eax \n\t" - "leal (%%edx,%3,4), %%edx \n\t" -SIMPLE_CPY((%%eax, %2), (%%eax, %2, 2), (%%edx, %3), (%%edx, %3, 2)) +SIMPLE_CPY((%0, %2, 2), (%%REGa, %2, 2), (%1, %3, 2), (%%REGd, %3, 2)) +SIMPLE_CPY((%0, %2, 4), (%%REGa, %2, 4), (%1, %3, 4), (%%REGd, %3, 4)) + "lea (%%"REG_a",%2,4), %%"REG_a" \n\t" + "lea (%%"REG_d",%3,4), %%"REG_d" \n\t" +SIMPLE_CPY((%%REGa, %2), (%%REGa, %2, 2), (%%REGd, %3), (%%REGd, %3, 2)) : : "r" (src), "r" (dst), - "r" (srcStride), - "r" (dstStride) - : "%eax", "%edx" + "r" ((long)srcStride), + "r" ((long)dstStride) + : "%"REG_a, "%"REG_d ); #else for(i=0; i<8; i++) @@ -2749,12 +3322,12 @@ static inline void RENAME(duplicate)(uint8_t src[], int stride) #ifdef HAVE_MMX asm volatile( "movq (%0), %%mm0 \n\t" - "addl %1, %0 \n\t" + "add %1, %0 \n\t" "movq %%mm0, (%0) \n\t" "movq %%mm0, (%0, %1) \n\t" "movq %%mm0, (%0, %1, 2) \n\t" : "+r" (src) - : "r" (-stride) + : "r" ((long)-stride) ); #else int i; @@ -2793,8 +3366,8 @@ static void RENAME(postProcess)(uint8_t src[], int srcStride, uint8_t dst[], int //FIXME remove uint64_t * const yHistogram= c.yHistogram; - uint8_t * const tempSrc= c.tempSrc; - uint8_t * const tempDst= c.tempDst; + uint8_t * const tempSrc= srcStride > 0 ? c.tempSrc : c.tempSrc - 23*srcStride; + uint8_t * const tempDst= dstStride > 0 ? c.tempDst : c.tempDst - 23*dstStride; //const int mbWidth= isColor ? (width+7)>>3 : (width+15)>>4; #ifdef HAVE_MMX @@ -2814,7 +3387,8 @@ static void RENAME(postProcess)(uint8_t src[], int srcStride, uint8_t dst[], int || (mode & LOWPASS5_DEINT_FILTER)) copyAhead=14; else if( (mode & V_DEBLOCK) || (mode & LINEAR_IPOL_DEINT_FILTER) - || (mode & MEDIAN_DEINT_FILTER)) copyAhead=13; + || (mode & MEDIAN_DEINT_FILTER) + || (mode & V_A_DEBLOCK)) copyAhead=13; else if(mode & V_X1_FILTER) copyAhead=11; // else if(mode & V_RK1_FILTER) copyAhead=10; else if(mode & DERING) copyAhead=9; @@ -2905,22 +3479,22 @@ static void RENAME(postProcess)(uint8_t src[], int srcStride, uint8_t dst[], int */ asm( - "movl %4, %%eax \n\t" - "shrl $2, %%eax \n\t" - "andl $6, %%eax \n\t" - "addl %5, %%eax \n\t" - "movl %%eax, %%edx \n\t" - "imul %1, %%eax \n\t" - "imul %3, %%edx \n\t" - "prefetchnta 32(%%eax, %0) \n\t" - "prefetcht0 32(%%edx, %2) \n\t" - "addl %1, %%eax \n\t" - "addl %3, %%edx \n\t" - "prefetchnta 32(%%eax, %0) \n\t" - "prefetcht0 32(%%edx, %2) \n\t" - :: "r" (srcBlock), "r" (srcStride), "r" (dstBlock), "r" (dstStride), - "m" (x), "m" (copyAhead) - : "%eax", "%edx" + "mov %4, %%"REG_a" \n\t" + "shr $2, %%"REG_a" \n\t" + "and $6, %%"REG_a" \n\t" + "add %5, %%"REG_a" \n\t" + "mov %%"REG_a", %%"REG_d" \n\t" + "imul %1, %%"REG_a" \n\t" + "imul %3, %%"REG_d" \n\t" + "prefetchnta 32(%%"REG_a", %0) \n\t" + "prefetcht0 32(%%"REG_d", %2) \n\t" + "add %1, %%"REG_a" \n\t" + "add %3, %%"REG_d" \n\t" + "prefetchnta 32(%%"REG_a", %0) \n\t" + "prefetcht0 32(%%"REG_d", %2) \n\t" + :: "r" (srcBlock), "r" ((long)srcStride), "r" (dstBlock), "r" ((long)dstStride), + "m" ((long)x), "m" ((long)copyAhead) + : "%"REG_a, "%"REG_d ); #elif defined(HAVE_3DNOW) @@ -2955,8 +3529,8 @@ static void RENAME(postProcess)(uint8_t src[], int srcStride, uint8_t dst[], int dstBlock+=8; srcBlock+=8; } - if(width==dstStride) - memcpy(dst, tempDst + 9*dstStride, copyAhead*dstStride); + if(width==ABS(dstStride)) + linecpy(dst, tempDst + 9*dstStride, copyAhead, dstStride); else { int i; @@ -2978,7 +3552,7 @@ static void RENAME(postProcess)(uint8_t src[], int srcStride, uint8_t dst[], int uint8_t *tempBlock2= c.tempBlocks + 8; #endif int8_t *QPptr= &QPs[(y>>qpVShift)*QPStride]; - int8_t *nonBQPptr= &c.nonBQPTable[(y>>qpVShift)*QPStride]; + int8_t *nonBQPptr= &c.nonBQPTable[(y>>qpVShift)*ABS(QPStride)]; int QP=0; /* can we mess with a 8x16 block from srcBlock/dstBlock downwards and 1 line upwards if not than use a temporary buffer */ @@ -2987,19 +3561,19 @@ static void RENAME(postProcess)(uint8_t src[], int srcStride, uint8_t dst[], int int i; /* copy from line (copyAhead) to (copyAhead+7) of src, these will be copied with blockcopy to dst later */ - memcpy(tempSrc + srcStride*copyAhead, srcBlock + srcStride*copyAhead, - srcStride*MAX(height-y-copyAhead, 0) ); + linecpy(tempSrc + srcStride*copyAhead, srcBlock + srcStride*copyAhead, + MAX(height-y-copyAhead, 0), srcStride); /* duplicate last line of src to fill the void upto line (copyAhead+7) */ for(i=MAX(height-y, 8); i<copyAhead+8; i++) - memcpy(tempSrc + srcStride*i, src + srcStride*(height-1), srcStride); + memcpy(tempSrc + srcStride*i, src + srcStride*(height-1), ABS(srcStride)); /* copy up to (copyAhead+1) lines of dst (line -1 to (copyAhead-1))*/ - memcpy(tempDst, dstBlock - dstStride, dstStride*MIN(height-y+1, copyAhead+1) ); + linecpy(tempDst, dstBlock - dstStride, MIN(height-y+1, copyAhead+1), dstStride); /* duplicate last line of dst to fill the void upto line (copyAhead) */ for(i=height-y+1; i<=copyAhead; i++) - memcpy(tempDst + dstStride*i, dst + dstStride*(height-1), dstStride); + memcpy(tempDst + dstStride*i, dst + dstStride*(height-1), ABS(dstStride)); dstBlock= tempDst + dstStride; srcBlock= tempSrc; @@ -3051,22 +3625,22 @@ static void RENAME(postProcess)(uint8_t src[], int srcStride, uint8_t dst[], int */ asm( - "movl %4, %%eax \n\t" - "shrl $2, %%eax \n\t" - "andl $6, %%eax \n\t" - "addl %5, %%eax \n\t" - "movl %%eax, %%edx \n\t" - "imul %1, %%eax \n\t" - "imul %3, %%edx \n\t" - "prefetchnta 32(%%eax, %0) \n\t" - "prefetcht0 32(%%edx, %2) \n\t" - "addl %1, %%eax \n\t" - "addl %3, %%edx \n\t" - "prefetchnta 32(%%eax, %0) \n\t" - "prefetcht0 32(%%edx, %2) \n\t" - :: "r" (srcBlock), "r" (srcStride), "r" (dstBlock), "r" (dstStride), - "m" (x), "m" (copyAhead) - : "%eax", "%edx" + "mov %4, %%"REG_a" \n\t" + "shr $2, %%"REG_a" \n\t" + "and $6, %%"REG_a" \n\t" + "add %5, %%"REG_a" \n\t" + "mov %%"REG_a", %%"REG_d" \n\t" + "imul %1, %%"REG_a" \n\t" + "imul %3, %%"REG_d" \n\t" + "prefetchnta 32(%%"REG_a", %0) \n\t" + "prefetcht0 32(%%"REG_d", %2) \n\t" + "add %1, %%"REG_a" \n\t" + "add %3, %%"REG_d" \n\t" + "prefetchnta 32(%%"REG_a", %0) \n\t" + "prefetcht0 32(%%"REG_d", %2) \n\t" + :: "r" (srcBlock), "r" ((long)srcStride), "r" (dstBlock), "r" ((long)dstStride), + "m" ((long)x), "m" ((long)copyAhead) + : "%"REG_a, "%"REG_d ); #elif defined(HAVE_3DNOW) @@ -3110,6 +3684,8 @@ static void RENAME(postProcess)(uint8_t src[], int srcStride, uint8_t dst[], int RENAME(doVertLowPass)(dstBlock, stride, &c); else if(t==2) RENAME(doVertDefFilter)(dstBlock, stride, &c); + }else if(mode & V_A_DEBLOCK){ + RENAME(do_a_deblock)(dstBlock, stride, 1, &c); } } @@ -3131,6 +3707,8 @@ static void RENAME(postProcess)(uint8_t src[], int srcStride, uint8_t dst[], int RENAME(doVertLowPass)(tempBlock1, 16, &c); else if(t==2) RENAME(doVertDefFilter)(tempBlock1, 16, &c); + }else if(mode & H_A_DEBLOCK){ + RENAME(do_a_deblock)(tempBlock1, 16, 1, &c); } RENAME(transpose2)(dstBlock-4, dstStride, tempBlock1 + 4*16); @@ -3140,12 +3718,29 @@ static void RENAME(postProcess)(uint8_t src[], int srcStride, uint8_t dst[], int horizX1Filter(dstBlock-4, stride, QP); else if(mode & H_DEBLOCK) { +#ifdef HAVE_ALTIVEC + unsigned char __attribute__ ((aligned(16))) tempBlock[272]; + transpose_16x8_char_toPackedAlign_altivec(tempBlock, dstBlock - (4 + 1), stride); + + const int t=vertClassify_altivec(tempBlock-48, 16, &c); + if(t==1) { + doVertLowPass_altivec(tempBlock-48, 16, &c); + transpose_8x16_char_fromPackedAlign_altivec(dstBlock - (4 + 1), tempBlock, stride); + } + else if(t==2) { + doVertDefFilter_altivec(tempBlock-48, 16, &c); + transpose_8x16_char_fromPackedAlign_altivec(dstBlock - (4 + 1), tempBlock, stride); + } +#else const int t= RENAME(horizClassify)(dstBlock-4, stride, &c); if(t==1) RENAME(doHorizLowPass)(dstBlock-4, stride, &c); else if(t==2) RENAME(doHorizDefFilter)(dstBlock-4, stride, &c); +#endif + }else if(mode & H_A_DEBLOCK){ + RENAME(do_a_deblock)(dstBlock-8, 1, stride, &c); } #endif if(mode & DERING) @@ -3190,8 +3785,8 @@ static void RENAME(postProcess)(uint8_t src[], int srcStride, uint8_t dst[], int if(y+15 >= height) { uint8_t *dstBlock= &(dst[y*dstStride]); - if(width==dstStride) - memcpy(dstBlock, tempDst + dstStride, dstStride*(height-y)); + if(width==ABS(dstStride)) + linecpy(dstBlock, tempDst + dstStride, height-y, dstStride); else { int i; diff --git a/src/libffmpeg/libavcodec/loco.c b/src/libffmpeg/libavcodec/loco.c new file mode 100644 index 000000000..6f90c1ef1 --- /dev/null +++ b/src/libffmpeg/libavcodec/loco.c @@ -0,0 +1,285 @@ +/* + * LOCO codec + * Copyright (c) 2005 Konstantin Shishkov + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + * + */ + +/** + * @file loco.c + * LOCO codec. + */ + +#include "avcodec.h" +#include "common.h" +#include "bitstream.h" +#include "golomb.h" + +enum LOCO_MODE {LOCO_UNKN=0, LOCO_CYUY2=-1, LOCO_CRGB=-2, LOCO_CRGBA=-3, LOCO_CYV12=-4, + LOCO_YUY2=1, LOCO_UYVY=2, LOCO_RGB=3, LOCO_RGBA=4, LOCO_YV12=5}; + +typedef struct LOCOContext{ + AVCodecContext *avctx; + AVFrame pic; + int lossy; + int mode; +} LOCOContext; + +typedef struct RICEContext{ + GetBitContext gb; + int save, run, run2; /* internal rice decoder state */ + int sum, count; /* sum and count for getting rice parameter */ + int lossy; +}RICEContext; + +static int loco_get_rice_param(RICEContext *r) +{ + int cnt = 0; + int val = r->count; + + while(r->sum > val && cnt < 9) { + val <<= 1; + cnt++; + } + + return cnt; +} + +static inline void loco_update_rice_param(RICEContext *r, int val) +{ + r->sum += val; + r->count++; + + if(r->count == 16) { + r->sum >>= 1; + r->count >>= 1; + } +} + +static inline int loco_get_rice(RICEContext *r) +{ + int v; + if (r->run > 0) { /* we have zero run */ + r->run--; + loco_update_rice_param(r, 0); + return 0; + } + v = get_ur_golomb_jpegls(&r->gb, loco_get_rice_param(r), INT_MAX, 0); + loco_update_rice_param(r, (v+1)>>1); + if (!v) { + if (r->save >= 0) { + r->run = get_ur_golomb_jpegls(&r->gb, 2, INT_MAX, 0); + if(r->run > 1) + r->save += r->run + 1; + else + r->save -= 3; + } + else + r->run2++; + } else { + v = ((v>>1) + r->lossy) ^ -(v&1); + if (r->run2 > 0) { + if (r->run2 > 2) + r->save += r->run2; + else + r->save -= 3; + r->run2 = 0; + } + } + + return v; +} + +/* LOCO main predictor - LOCO-I/JPEG-LS predictor */ +static inline int loco_predict(uint8_t* data, int stride, int step) +{ + int a, b, c; + + a = data[-stride]; + b = data[-step]; + c = data[-stride - step]; + + return mid_pred(a, a + b - c, b); +} + +static int loco_decode_plane(LOCOContext *l, uint8_t *data, int width, int height, + int stride, uint8_t *buf, int buf_size, int step) +{ + RICEContext rc; + int val; + int i, j; + + init_get_bits(&rc.gb, buf, buf_size*8); + rc.save = 0; + rc.run = 0; + rc.run2 = 0; + rc.lossy = l->lossy; + + rc.sum = 8; + rc.count = 1; + + /* restore top left pixel */ + val = loco_get_rice(&rc); + data[0] = 128 + val; + /* restore top line */ + for (i = 1; i < width; i++) { + val = loco_get_rice(&rc); + data[i * step] = data[i * step - step] + val; + } + data += stride; + for (j = 1; j < height; j++) { + /* restore left column */ + val = loco_get_rice(&rc); + data[0] = data[-stride] + val; + /* restore all other pixels */ + for (i = 1; i < width; i++) { + val = loco_get_rice(&rc); + data[i * step] = loco_predict(&data[i * step], stride, step) + val; + } + data += stride; + } + + return ((get_bits_count(&rc.gb) + 7) >> 3); +} + +static int decode_frame(AVCodecContext *avctx, + void *data, int *data_size, + uint8_t *buf, int buf_size) +{ + LOCOContext * const l = avctx->priv_data; + AVFrame * const p= (AVFrame*)&l->pic; + int decoded; + + if(p->data[0]) + avctx->release_buffer(avctx, p); + + p->reference = 0; + if(avctx->get_buffer(avctx, p) < 0){ + av_log(avctx, AV_LOG_ERROR, "get_buffer() failed\n"); + return -1; + } + p->key_frame = 1; + + switch(l->mode) { + case LOCO_CYUY2: case LOCO_YUY2: case LOCO_UYVY: + decoded = loco_decode_plane(l, p->data[0], avctx->width, avctx->height, + p->linesize[0], buf, buf_size, 1); + buf += decoded; buf_size -= decoded; + decoded = loco_decode_plane(l, p->data[1], avctx->width / 2, avctx->height, + p->linesize[1], buf, buf_size, 1); + buf += decoded; buf_size -= decoded; + decoded = loco_decode_plane(l, p->data[2], avctx->width / 2, avctx->height, + p->linesize[2], buf, buf_size, 1); + break; + case LOCO_CYV12: case LOCO_YV12: + decoded = loco_decode_plane(l, p->data[0], avctx->width, avctx->height, + p->linesize[0], buf, buf_size, 1); + buf += decoded; buf_size -= decoded; + decoded = loco_decode_plane(l, p->data[2], avctx->width / 2, avctx->height / 2, + p->linesize[2], buf, buf_size, 1); + buf += decoded; buf_size -= decoded; + decoded = loco_decode_plane(l, p->data[1], avctx->width / 2, avctx->height / 2, + p->linesize[1], buf, buf_size, 1); + break; + case LOCO_CRGB: case LOCO_RGB: + decoded = loco_decode_plane(l, p->data[0] + p->linesize[0]*(avctx->height-1), avctx->width, avctx->height, + -p->linesize[0], buf, buf_size, 3); + buf += decoded; buf_size -= decoded; + decoded = loco_decode_plane(l, p->data[0] + p->linesize[0]*(avctx->height-1) + 1, avctx->width, avctx->height, + -p->linesize[0], buf, buf_size, 3); + buf += decoded; buf_size -= decoded; + decoded = loco_decode_plane(l, p->data[0] + p->linesize[0]*(avctx->height-1) + 2, avctx->width, avctx->height, + -p->linesize[0], buf, buf_size, 3); + break; + case LOCO_RGBA: + decoded = loco_decode_plane(l, p->data[0], avctx->width, avctx->height, + p->linesize[0], buf, buf_size, 4); + buf += decoded; buf_size -= decoded; + decoded = loco_decode_plane(l, p->data[0] + 1, avctx->width, avctx->height, + p->linesize[0], buf, buf_size, 4); + buf += decoded; buf_size -= decoded; + decoded = loco_decode_plane(l, p->data[0] + 2, avctx->width, avctx->height, + p->linesize[0], buf, buf_size, 4); + buf += decoded; buf_size -= decoded; + decoded = loco_decode_plane(l, p->data[0] + 3, avctx->width, avctx->height, + p->linesize[0], buf, buf_size, 4); + break; + } + + *data_size = sizeof(AVFrame); + *(AVFrame*)data = l->pic; + + return buf_size; +} + +static int decode_init(AVCodecContext *avctx){ + LOCOContext * const l = avctx->priv_data; + int version; + + l->avctx = avctx; + if (avctx->extradata_size < 12) { + av_log(avctx, AV_LOG_ERROR, "Extradata size must be >= 12 instead of %i\n", + avctx->extradata_size); + return -1; + } + version = LE_32(avctx->extradata); + switch(version) { + case 1: + l->lossy = 0; + break; + case 2: + l->lossy = LE_32(avctx->extradata + 8); + break; + default: + l->lossy = LE_32(avctx->extradata + 8); + av_log(avctx, AV_LOG_INFO, "This is LOCO codec version %i, please upload file for study\n", version); + } + + l->mode = LE_32(avctx->extradata + 4); + switch(l->mode) { + case LOCO_CYUY2: case LOCO_YUY2: case LOCO_UYVY: + avctx->pix_fmt = PIX_FMT_YUV422P; + break; + case LOCO_CRGB: case LOCO_RGB: + avctx->pix_fmt = PIX_FMT_BGR24; + break; + case LOCO_CYV12: case LOCO_YV12: + avctx->pix_fmt = PIX_FMT_YUV420P; + break; + case LOCO_CRGBA: case LOCO_RGBA: + avctx->pix_fmt = PIX_FMT_RGBA32; + break; + default: + av_log(avctx, AV_LOG_INFO, "Unknown colorspace, index = %i\n", l->mode); + return -1; + } + if(avctx->debug & FF_DEBUG_PICT_INFO) + av_log(avctx, AV_LOG_INFO, "lossy:%i, version:%i, mode: %i\n", l->lossy, version, l->mode); + + return 0; +} + +AVCodec loco_decoder = { + "loco", + CODEC_TYPE_VIDEO, + CODEC_ID_LOCO, + sizeof(LOCOContext), + decode_init, + NULL, + NULL, + decode_frame, + CODEC_CAP_DR1, +}; diff --git a/src/libffmpeg/libavcodec/mace.c b/src/libffmpeg/libavcodec/mace.c index 8a4a20568..80cd28393 100644 --- a/src/libffmpeg/libavcodec/mace.c +++ b/src/libffmpeg/libavcodec/mace.c @@ -242,7 +242,8 @@ typedef struct MACEContext { static void chomp3(MACEContext *ctx, uint8_t val, const uint16_t tab1[], - const uint16_t tab2[][8]) + const uint16_t tab2[][8], + uint32_t numChannels) { short current; @@ -252,7 +253,8 @@ static void chomp3(MACEContext *ctx, else current+=ctx->lev; ctx->lev=current-(current >> 3); // *ctx->outPtr++=current >> 8; - *ctx->outPtr++=current; + *ctx->outPtr=current; + ctx->outPtr+=numChannels; if ( ( ctx->index += tab1[val]-(ctx->index>>5) ) < 0 ) ctx->index = 0; } /* \\\ */ @@ -281,13 +283,13 @@ static void Exp1to3(MACEContext *ctx, while (cnt>0) { pkt=inBuffer[0]; - chomp3(ctx, pkt & 7, MACEtab1, MACEtab2); - chomp3(ctx,(pkt >> 3) & 3, MACEtab3, MACEtab4); - chomp3(ctx, pkt >> 5 , MACEtab1, MACEtab2); + chomp3(ctx, pkt & 7, MACEtab1, MACEtab2, numChannels); + chomp3(ctx,(pkt >> 3) & 3, MACEtab3, MACEtab4, numChannels); + chomp3(ctx, pkt >> 5 , MACEtab1, MACEtab2, numChannels); pkt=inBuffer[1]; - chomp3(ctx, pkt & 7, MACEtab1, MACEtab2); - chomp3(ctx,(pkt >> 3) & 3, MACEtab3, MACEtab4); - chomp3(ctx, pkt >> 5 , MACEtab1, MACEtab2); + chomp3(ctx, pkt & 7, MACEtab1, MACEtab2, numChannels); + chomp3(ctx,(pkt >> 3) & 3, MACEtab3, MACEtab4, numChannels); + chomp3(ctx, pkt >> 5 , MACEtab1, MACEtab2, numChannels); inBuffer+=numChannels*2; --cnt; @@ -306,7 +308,8 @@ static void Exp1to3(MACEContext *ctx, static void chomp6(MACEContext *ctx, uint8_t val, const uint16_t tab1[], - const uint16_t tab2[][8]) + const uint16_t tab2[][8], + uint32_t numChannels) { short current; @@ -329,9 +332,10 @@ static void chomp6(MACEContext *ctx, // *ctx->outPtr++=(ctx->previous+ctx->prev2-((ctx->prev2-current) >> 2)) >> 8; // *ctx->outPtr++=(ctx->previous+current+((ctx->prev2-current) >> 2)) >> 8; - *ctx->outPtr++=(ctx->previous+ctx->prev2-((ctx->prev2-current) >> 2)); - *ctx->outPtr++=(ctx->previous+current+((ctx->prev2-current) >> 2)); - + *ctx->outPtr=(ctx->previous+ctx->prev2-((ctx->prev2-current) >> 2)); + ctx->outPtr+=numChannels; + *ctx->outPtr=(ctx->previous+current+((ctx->prev2-current) >> 2)); + ctx->outPtr+=numChannels; ctx->prev2=ctx->previous; ctx->previous=current; @@ -366,9 +370,9 @@ static void Exp1to6(MACEContext *ctx, while (cnt>0) { pkt=*inBuffer; - chomp6(ctx, pkt >> 5 , MACEtab1, MACEtab2); - chomp6(ctx,(pkt >> 3) & 3, MACEtab3, MACEtab4); - chomp6(ctx, pkt & 7, MACEtab1, MACEtab2); + chomp6(ctx, pkt >> 5 , MACEtab1, MACEtab2, numChannels); + chomp6(ctx,(pkt >> 3) & 3, MACEtab3, MACEtab4, numChannels); + chomp6(ctx, pkt & 7, MACEtab1, MACEtab2, numChannels); inBuffer+=numChannels; --cnt; diff --git a/src/libffmpeg/libavcodec/mdec.c b/src/libffmpeg/libavcodec/mdec.c index ef4e6ec0a..d6e5d044a 100644 --- a/src/libffmpeg/libavcodec/mdec.c +++ b/src/libffmpeg/libavcodec/mdec.c @@ -81,7 +81,7 @@ static inline int mdec_decode_block_intra(MDECContext *a, DCTELEM *block, int n) /* now quantify & encode AC coefs */ for(;;) { UPDATE_CACHE(re, &a->gb); - GET_RL_VLC(level, run, re, &a->gb, rl->rl_vlc[0], TEX_VLC_BITS, 2); + GET_RL_VLC(level, run, re, &a->gb, rl->rl_vlc[0], TEX_VLC_BITS, 2, 0); if(level == 127){ break; @@ -163,11 +163,6 @@ static int decode_frame(AVCodecContext *avctx, AVFrame * const p= (AVFrame*)&a->picture; int i; - /* special case for last picture */ - if (buf_size == 0) { - return 0; - } - if(p->data[0]) avctx->release_buffer(avctx, p); @@ -222,8 +217,8 @@ static void mdec_common_init(AVCodecContext *avctx){ dsputil_init(&a->dsp, avctx); - a->mb_width = (avctx->width + 15) / 16; - a->mb_height = (avctx->height + 15) / 16; + a->mb_width = (avctx->coded_width + 15) / 16; + a->mb_height = (avctx->coded_height + 15) / 16; avctx->coded_frame= (AVFrame*)&a->picture; a->avctx= avctx; diff --git a/src/libffmpeg/libavcodec/mem.c b/src/libffmpeg/libavcodec/mem.c index c5ca166d3..462d674e4 100644 --- a/src/libffmpeg/libavcodec/mem.c +++ b/src/libffmpeg/libavcodec/mem.c @@ -45,8 +45,20 @@ void *av_malloc(unsigned int size) { void *ptr; +#ifdef MEMALIGN_HACK + int diff; +#endif + + /* lets disallow possible ambiguous cases */ + if(size > INT_MAX) + return NULL; -#if defined (HAVE_MEMALIGN) +#ifdef MEMALIGN_HACK + ptr = malloc(size+16+1); + diff= ((-(int)ptr - 1)&15) + 1; + ptr += diff; + ((char*)ptr)[-1]= diff; +#elif defined (HAVE_MEMALIGN) ptr = memalign(16,size); /* Why 64? Indeed, we should align it: @@ -87,7 +99,22 @@ void *av_malloc(unsigned int size) */ void *av_realloc(void *ptr, unsigned int size) { +#ifdef MEMALIGN_HACK + int diff; +#endif + + /* lets disallow possible ambiguous cases */ + if(size > INT_MAX) + return NULL; + +#ifdef MEMALIGN_HACK + //FIXME this isnt aligned correctly though it probably isnt needed + if(!ptr) return av_malloc(size); + diff= ((char*)ptr)[-1]; + return realloc(ptr - diff, size + diff) + diff; +#else return realloc(ptr, size); +#endif } /* NOTE: ptr = NULL is explicetly allowed */ @@ -95,6 +122,10 @@ void av_free(void *ptr) { /* XXX: this test should not be needed on most libcs */ if (ptr) +#ifdef MEMALIGN_HACK + free(ptr - ((char*)ptr)[-1]); +#else free(ptr); +#endif } diff --git a/src/libffmpeg/libavcodec/mjpeg.c b/src/libffmpeg/libavcodec/mjpeg.c index 4e2305aef..4c2b4793b 100644 --- a/src/libffmpeg/libavcodec/mjpeg.c +++ b/src/libffmpeg/libavcodec/mjpeg.c @@ -1,6 +1,8 @@ /* * MJPEG encoder and decoder * Copyright (c) 2000, 2001 Fabrice Bellard. + * Copyright (c) 2003 Alex Beregszaszi + * Copyright (c) 2003-2004 Michael Niedermayer * * This library is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public @@ -398,6 +400,19 @@ static void jpeg_put_comments(MpegEncContext *s) ptr[0] = size >> 8; ptr[1] = size; } + + if( s->avctx->pix_fmt == PIX_FMT_YUV420P + ||s->avctx->pix_fmt == PIX_FMT_YUV422P + ||s->avctx->pix_fmt == PIX_FMT_YUV444P){ + put_marker(p, COM); + flush_put_bits(p); + ptr = pbBufPtr(p); + put_bits(p, 16, 0); /* patched later */ + put_string(p, "CS=ITU601", 1); + size = strlen("CS=ITU601")+3; + ptr[0] = size >> 8; + ptr[1] = size; + } } void mjpeg_picture_header(MpegEncContext *s) @@ -657,11 +672,11 @@ static int encode_picture_lossless(AVCodecContext *avctx, unsigned char *buf, in mjpeg_picture_header(s); s->header_bits= put_bits_count(&s->pb); - + if(avctx->pix_fmt == PIX_FMT_RGBA32){ int x, y, i; const int linesize= p->linesize[0]; - uint16_t buffer[2048][4]; + uint16_t (*buffer)[4]= (void *) s->rd_scratchpad; int left[3], top[3], topleft[3]; for(i=0; i<3; i++){ @@ -672,6 +687,11 @@ static int encode_picture_lossless(AVCodecContext *avctx, unsigned char *buf, in const int modified_predictor= y ? predictor : 1; uint8_t *ptr = p->data[0] + (linesize * y); + if(s->pb.buf_end - s->pb.buf - (put_bits_count(&s->pb)>>3) < width*3*4){ + av_log(s->avctx, AV_LOG_ERROR, "encoded frame too large\n"); + return -1; + } + for(i=0; i<3; i++){ top[i]= left[i]= topleft[i]= buffer[0][i]; } @@ -705,6 +725,10 @@ static int encode_picture_lossless(AVCodecContext *avctx, unsigned char *buf, in const int mb_height = (height + s->mjpeg_vsample[0] - 1) / s->mjpeg_vsample[0]; for(mb_y = 0; mb_y < mb_height; mb_y++) { + if(s->pb.buf_end - s->pb.buf - (put_bits_count(&s->pb)>>3) < mb_width * 4 * 3 * s->mjpeg_hsample[0] * s->mjpeg_vsample[0]){ + av_log(s->avctx, AV_LOG_ERROR, "encoded frame too large\n"); + return -1; + } for(mb_x = 0; mb_x < mb_width; mb_x++) { if(mb_x==0 || mb_y==0){ for(i=0;i<3;i++) { @@ -825,7 +849,7 @@ typedef struct MJpegDecodeContext { int last_dc[MAX_COMPONENTS]; /* last DEQUANTIZED dc (XXX: am I right to do that ?) */ AVFrame picture; /* picture structure */ int linesize[MAX_COMPONENTS]; ///< linesize << interlaced - uint8_t *qscale_table; + int8_t *qscale_table; DCTELEM block[64] __align8; ScanTable scantable; void (*idct_put)(uint8_t *dest/*align 8*/, int line_size, DCTELEM *block/*align 16*/); @@ -834,13 +858,16 @@ typedef struct MJpegDecodeContext { int restart_count; int buggy_avid; + int cs_itu601; int interlace_polarity; + + int mjpb_skiptosod; } MJpegDecodeContext; static int mjpeg_decode_dht(MJpegDecodeContext *s); static int build_vlc(VLC *vlc, const uint8_t *bits_table, const uint8_t *val_table, - int nb_codes) + int nb_codes, int use_static) { uint8_t huff_size[256]; uint16_t huff_code[256]; @@ -848,7 +875,7 @@ static int build_vlc(VLC *vlc, const uint8_t *bits_table, const uint8_t *val_tab memset(huff_size, 0, sizeof(huff_size)); build_huffman_codes(huff_size, huff_code, bits_table, val_table); - return init_vlc(vlc, 9, nb_codes, huff_size, 1, 1, huff_code, 2, 2); + return init_vlc(vlc, 9, nb_codes, huff_size, 1, 1, huff_code, 2, 2, use_static); } static int mjpeg_decode_init(AVCodecContext *avctx) @@ -876,12 +903,12 @@ static int mjpeg_decode_init(AVCodecContext *avctx) return -1; s->start_code = -1; s->first_picture = 1; - s->org_height = avctx->height; + s->org_height = avctx->coded_height; - build_vlc(&s->vlcs[0][0], bits_dc_luminance, val_dc_luminance, 12); - build_vlc(&s->vlcs[0][1], bits_dc_chrominance, val_dc_chrominance, 12); - build_vlc(&s->vlcs[1][0], bits_ac_luminance, val_ac_luminance, 251); - build_vlc(&s->vlcs[1][1], bits_ac_chrominance, val_ac_chrominance, 251); + build_vlc(&s->vlcs[0][0], bits_dc_luminance, val_dc_luminance, 12, 0); + build_vlc(&s->vlcs[0][1], bits_dc_chrominance, val_dc_chrominance, 12, 0); + build_vlc(&s->vlcs[1][0], bits_ac_luminance, val_ac_luminance, 251, 0); + build_vlc(&s->vlcs[1][1], bits_ac_chrominance, val_ac_chrominance, 251, 0); if (avctx->flags & CODEC_FLAG_EXTERN_HUFF) { @@ -894,6 +921,69 @@ static int mjpeg_decode_init(AVCodecContext *avctx) return 0; } + +/** + * finds the end of the current frame in the bitstream. + * @return the position of the first byte of the next frame, or -1 + */ +static int find_frame_end(ParseContext *pc, const uint8_t *buf, int buf_size){ + int vop_found, i; + uint16_t state; + + vop_found= pc->frame_start_found; + state= pc->state; + + i=0; + if(!vop_found){ + for(i=0; i<buf_size; i++){ + state= (state<<8) | buf[i]; + if(state == 0xFFD8){ + i++; + vop_found=1; + break; + } + } + } + + if(vop_found){ + /* EOF considered as end of frame */ + if (buf_size == 0) + return 0; + for(; i<buf_size; i++){ + state= (state<<8) | buf[i]; + if(state == 0xFFD8){ + pc->frame_start_found=0; + pc->state=0; + return i-1; + } + } + } + pc->frame_start_found= vop_found; + pc->state= state; + return END_NOT_FOUND; +} + +static int jpeg_parse(AVCodecParserContext *s, + AVCodecContext *avctx, + uint8_t **poutbuf, int *poutbuf_size, + const uint8_t *buf, int buf_size) +{ + ParseContext *pc = s->priv_data; + int next; + + next= find_frame_end(pc, buf, buf_size); + + if (ff_combine_frame(pc, next, (uint8_t **)&buf, &buf_size) < 0) { + *poutbuf = NULL; + *poutbuf_size = 0; + return buf_size; + } + + *poutbuf = (uint8_t *)buf; + *poutbuf_size = buf_size; + return next; +} + /* quantize tables */ static int mjpeg_decode_dqt(MJpegDecodeContext *s) { @@ -969,7 +1059,7 @@ static int mjpeg_decode_dht(MJpegDecodeContext *s) free_vlc(&s->vlcs[class][index]); dprintf("class=%d index=%d nb_codes=%d\n", class, index, code_max + 1); - if(build_vlc(&s->vlcs[class][index], bits_table, val_table, code_max + 1) < 0){ + if(build_vlc(&s->vlcs[class][index], bits_table, val_table, code_max + 1, 0) < 0){ return -1; } } @@ -993,7 +1083,10 @@ static int mjpeg_decode_sof(MJpegDecodeContext *s) } height = get_bits(&s->gb, 16); width = get_bits(&s->gb, 16); + dprintf("sof0: picture: %dx%d\n", width, height); + if(avcodec_check_dimensions(s->avctx, width, height)) + return -1; nb_components = get_bits(&s->gb, 8); if (nb_components <= 0 || @@ -1028,8 +1121,7 @@ static int mjpeg_decode_sof(MJpegDecodeContext *s) s->width = width; s->height = height; - s->avctx->width = s->width; - s->avctx->height = s->height; + avcodec_set_dimensions(s->avctx, width, height); /* test interlaced mode */ if (s->first_picture && @@ -1055,16 +1147,16 @@ static int mjpeg_decode_sof(MJpegDecodeContext *s) if(s->rgb){ s->avctx->pix_fmt = PIX_FMT_RGBA32; }else if(s->nb_components==3) - s->avctx->pix_fmt = PIX_FMT_YUV444P; + s->avctx->pix_fmt = s->cs_itu601 ? PIX_FMT_YUV444P : PIX_FMT_YUVJ444P; else s->avctx->pix_fmt = PIX_FMT_GRAY8; break; case 0x21: - s->avctx->pix_fmt = PIX_FMT_YUV422P; + s->avctx->pix_fmt = s->cs_itu601 ? PIX_FMT_YUV422P : PIX_FMT_YUVJ422P; break; default: case 0x22: - s->avctx->pix_fmt = PIX_FMT_YUV420P; + s->avctx->pix_fmt = s->cs_itu601 ? PIX_FMT_YUV420P : PIX_FMT_YUVJ420P; break; } @@ -1162,11 +1254,14 @@ static int decode_block(MJpegDecodeContext *s, DCTELEM *block, static int ljpeg_decode_rgb_scan(MJpegDecodeContext *s, int predictor, int point_transform){ int i, mb_x, mb_y; - uint16_t buffer[2048][4]; + uint16_t buffer[32768][4]; int left[3], top[3], topleft[3]; const int linesize= s->linesize[0]; const int mask= (1<<s->bits)-1; + if((unsigned)s->mb_width > 32768) //dynamic alloc + return -1; + for(i=0; i<3; i++){ buffer[0][i]= 1 << (s->bits + point_transform - 1); } @@ -1336,8 +1431,8 @@ static int mjpeg_decode_scan(MJpegDecodeContext *s){ } // dprintf("mb: %d %d processed\n", mb_y, mb_x); ptr = s->picture.data[c] + - (s->linesize[c] * (v * mb_y + y) * 8) + - (h * mb_x + x) * 8; + (((s->linesize[c] * (v * mb_y + y) * 8) + + (h * mb_x + x) * 8) >> s->avctx->lowres); if (s->interlaced && s->bottom_field) ptr += s->linesize[c] >> 1; //av_log(NULL, AV_LOG_DEBUG, "%d %d %d %d %d %d %d %d \n", mb_x, mb_y, x, y, c, s->bottom_field, (v * mb_y + y) * 8, (h * mb_x + x) * 8); @@ -1453,6 +1548,10 @@ static int mjpeg_decode_sos(MJpegDecodeContext *s) if(s->avctx->debug & FF_DEBUG_PICT_INFO) av_log(s->avctx, AV_LOG_DEBUG, "%s %s p:%d >>:%d\n", s->lossless ? "lossless" : "sequencial DCT", s->rgb ? "RGB" : "", predictor, point_transform); + /* mjpeg-b can have padding bytes between sos and image data, skip them */ + for (i = s->mjpb_skiptosod; i > 0; i--) + skip_bits(&s->gb, 8); + if(s->lossless){ if(s->rgb){ if(ljpeg_decode_rgb_scan(s, predictor, point_transform) < 0) @@ -1652,6 +1751,9 @@ static int mjpeg_decode_com(MJpegDecodeContext *s) // if (s->first_picture) // printf("mjpeg: workarounding buggy AVID\n"); } + else if(!strcmp(cbuf, "CS=ITU601")){ + s->cs_itu601= 1; + } av_free(cbuf); } @@ -1724,10 +1826,6 @@ static int mjpeg_decode_frame(AVCodecContext *avctx, int start_code; AVFrame *picture = data; - /* no supplementary picture */ - if (buf_size == 0) - return 0; - buf_ptr = buf; buf_end = buf + buf_size; while (buf_ptr < buf_end) { @@ -1762,9 +1860,9 @@ static int mjpeg_decode_frame(AVCodecContext *avctx, *(dst++) = x; if (x == 0xff) { - while(*src == 0xff) src++; + while(src<buf_end && x == 0xff) + x = *(src++); - x = *(src++); if (x >= 0xd0 && x <= 0xd7) *(dst++) = x; else if (x) @@ -1898,11 +1996,7 @@ static int mjpegb_decode_frame(AVCodecContext *avctx, AVFrame *picture = data; GetBitContext hgb; /* for the header */ uint32_t dqt_offs, dht_offs, sof_offs, sos_offs, second_field_offs; - uint32_t field_size; - - /* no supplementary picture */ - if (buf_size == 0) - return 0; + uint32_t field_size, sod_offs; buf_ptr = buf; buf_end = buf + buf_size; @@ -1910,26 +2004,27 @@ static int mjpegb_decode_frame(AVCodecContext *avctx, read_header: /* reset on every SOI */ s->restart_interval = 0; + s->mjpb_skiptosod = 0; init_get_bits(&hgb, buf_ptr, /*buf_size*/(buf_end - buf_ptr)*8); skip_bits(&hgb, 32); /* reserved zeros */ - if (get_bits(&hgb, 32) != be2me_32(ff_get_fourcc("mjpg"))) + if (get_bits_long(&hgb, 32) != be2me_32(ff_get_fourcc("mjpg"))) { dprintf("not mjpeg-b (bad fourcc)\n"); return 0; } - field_size = get_bits(&hgb, 32); /* field size */ + field_size = get_bits_long(&hgb, 32); /* field size */ dprintf("field size: 0x%x\n", field_size); skip_bits(&hgb, 32); /* padded field size */ - second_field_offs = get_bits(&hgb, 32); + second_field_offs = get_bits_long(&hgb, 32); dprintf("second field offs: 0x%x\n", second_field_offs); if (second_field_offs) s->interlaced = 1; - dqt_offs = get_bits(&hgb, 32); + dqt_offs = get_bits_long(&hgb, 32); dprintf("dqt offs: 0x%x\n", dqt_offs); if (dqt_offs) { @@ -1938,7 +2033,7 @@ read_header: mjpeg_decode_dqt(s); } - dht_offs = get_bits(&hgb, 32); + dht_offs = get_bits_long(&hgb, 32); dprintf("dht offs: 0x%x\n", dht_offs); if (dht_offs) { @@ -1947,7 +2042,7 @@ read_header: mjpeg_decode_dht(s); } - sof_offs = get_bits(&hgb, 32); + sof_offs = get_bits_long(&hgb, 32); dprintf("sof offs: 0x%x\n", sof_offs); if (sof_offs) { @@ -1957,18 +2052,19 @@ read_header: return -1; } - sos_offs = get_bits(&hgb, 32); + sos_offs = get_bits_long(&hgb, 32); dprintf("sos offs: 0x%x\n", sos_offs); + sod_offs = get_bits_long(&hgb, 32); + dprintf("sod offs: 0x%x\n", sod_offs); if (sos_offs) { // init_get_bits(&s->gb, buf+sos_offs, (buf_end - (buf+sos_offs))*8); init_get_bits(&s->gb, buf+sos_offs, field_size*8); + s->mjpb_skiptosod = (sod_offs - sos_offs - show_bits(&s->gb, 16)); s->start_code = SOS; mjpeg_decode_sos(s); } - skip_bits(&hgb, 32); /* start of data offset */ - if (s->interlaced) { s->bottom_field ^= 1; /* if not bottom field, do not output image yet */ @@ -2011,10 +2107,6 @@ static int sp5x_decode_frame(AVCodecContext *avctx, uint8_t *buf_ptr, *buf_end, *recoded; int i = 0, j = 0; - /* no supplementary picture */ - if (buf_size == 0) - return 0; - if (!avctx->width || !avctx->height) return -1; @@ -2039,10 +2131,10 @@ static int sp5x_decode_frame(AVCodecContext *avctx, j += sizeof(sp5x_data_dht); memcpy(recoded+j, &sp5x_data_sof[0], sizeof(sp5x_data_sof)); - recoded[j+5] = (avctx->height >> 8) & 0xFF; - recoded[j+6] = avctx->height & 0xFF; - recoded[j+7] = (avctx->width >> 8) & 0xFF; - recoded[j+8] = avctx->width & 0xFF; + recoded[j+5] = (avctx->coded_height >> 8) & 0xFF; + recoded[j+6] = avctx->coded_height & 0xFF; + recoded[j+7] = (avctx->coded_width >> 8) & 0xFF; + recoded[j+8] = avctx->coded_width & 0xFF; j += sizeof(sp5x_data_sof); memcpy(recoded+j, &sp5x_data_sos[0], sizeof(sp5x_data_sos)); @@ -2066,8 +2158,8 @@ static int sp5x_decode_frame(AVCodecContext *avctx, #else /* SOF */ s->bits = 8; - s->width = avctx->width; - s->height = avctx->height; + s->width = avctx->coded_width; + s->height = avctx->coded_height; s->nb_components = 3; s->component_id[0] = 0; s->h_count[0] = 2; @@ -2085,7 +2177,7 @@ static int sp5x_decode_frame(AVCodecContext *avctx, s->v_max = 2; s->qscale_table = av_mallocz((s->width+15)/16); - avctx->pix_fmt = PIX_FMT_YUV420P; + avctx->pix_fmt = s->cs_itu601 ? PIX_FMT_YUV420P : PIX_FMT_YUVJ420; s->interlaced = 0; s->picture.reference = 0; @@ -2223,3 +2315,12 @@ AVCodec ljpeg_encoder = { //FIXME avoid MPV_* lossless jpeg shouldnt need them MPV_encode_end, }; #endif + +AVCodecParser mjpeg_parser = { + { CODEC_ID_MJPEG }, + sizeof(ParseContext), + NULL, + jpeg_parse, + ff_parse_close, +}; + diff --git a/src/libffmpeg/libavcodec/motion_est.c b/src/libffmpeg/libavcodec/motion_est.c index 242bb13e7..9aaad6daa 100644 --- a/src/libffmpeg/libavcodec/motion_est.c +++ b/src/libffmpeg/libavcodec/motion_est.c @@ -178,7 +178,7 @@ static always_inline int cmp(MpegEncContext *s, const int x, const int y, const }else d= 256*256*256*32; }else{ - int uvdxy = 0; + int uvdxy; if(dxy){ if(qpel){ c->qpel_put[size][dxy](c->temp, ref[0] + x + y*stride, stride); //FIXME prototype (add h) @@ -222,23 +222,11 @@ static always_inline int cmp(MpegEncContext *s, const int x, const int y, const #include "motion_est_template.c" -static inline int get_penalty_factor(MpegEncContext *s, int type){ - switch(type&0xFF){ - default: - case FF_CMP_SAD: - return s->qscale*2; - case FF_CMP_DCT: - return s->qscale*3; - case FF_CMP_SATD: - return s->qscale*6; - case FF_CMP_SSE: - return s->qscale*s->qscale*2; - case FF_CMP_BIT: - return 1; - case FF_CMP_RD: - case FF_CMP_PSNR: - return (s->qscale*s->qscale*185 + 64)>>7; - } +static int zero_cmp(void *s, uint8_t *a, uint8_t *b, int stride, int h){ + return 0; +} + +static void zero_hpel(uint8_t *a, const uint8_t *b, int stride, int h){ } void ff_init_me(MpegEncContext *s){ @@ -269,10 +257,11 @@ void ff_init_me(MpegEncContext *s){ c->sub_motion_search= sad_hpel_motion_search; // 2050 vs. 2450 cycles else c->sub_motion_search= hpel_motion_search; - c->hpel_avg= s->dsp.avg_pixels_tab; - if(s->no_rounding) c->hpel_put= s->dsp.put_no_rnd_pixels_tab; - else c->hpel_put= s->dsp.put_pixels_tab; } + c->hpel_avg= s->dsp.avg_pixels_tab; + if(s->no_rounding) c->hpel_put= s->dsp.put_no_rnd_pixels_tab; + else c->hpel_put= s->dsp.put_pixels_tab; + if(s->linesize){ c->stride = s->linesize; c->uvstride= s->uvlinesize; @@ -281,6 +270,22 @@ void ff_init_me(MpegEncContext *s){ c->uvstride= 8*s->mb_width + 16; } + // 8x8 fullpel search would need a 4x4 chroma compare, which we dont have yet, and even if we had the motion estimation code doesnt expect it + if(s->codec_id != CODEC_ID_SNOW){ + if((c->avctx->me_cmp&FF_CMP_CHROMA) && !s->dsp.me_cmp[2]){ + s->dsp.me_cmp[2]= zero_cmp; + } + if((c->avctx->me_sub_cmp&FF_CMP_CHROMA) && !s->dsp.me_sub_cmp[2]){ + s->dsp.me_sub_cmp[2]= zero_cmp; + } + c->hpel_put[2][0]= c->hpel_put[2][1]= + c->hpel_put[2][2]= c->hpel_put[2][3]= zero_hpel; + } + + if(s->codec_id == CODEC_ID_H261){ + c->sub_motion_search= no_sub_motion_search; + } + c->temp= c->scratchpad; } @@ -315,6 +320,7 @@ static inline void no_motion_search(MpegEncContext * s, *my_ptr = 16 * s->mb_y; } +#if 0 /* the use of these functions is inside #if 0 */ static int full_motion_search(MpegEncContext * s, int *mx_ptr, int *my_ptr, int range, int xmin, int ymin, int xmax, int ymax, uint8_t *ref_picture) @@ -535,7 +541,7 @@ static int phods_motion_search(MpegEncContext * s, *my_ptr = my; return dminy; } - +#endif /* 0 */ #define Z_THRESHOLD 256 @@ -693,6 +699,12 @@ static inline void get_limits(MpegEncContext *s, int x, int y) c->ymin = - y - 16; c->xmax = - x + s->mb_width *16; c->ymax = - y + s->mb_height*16; + } else if (s->out_format == FMT_H261){ + // Search range of H261 is different from other codec standards + c->xmin = (x > 15) ? - 15 : 0; + c->ymin = (y > 15) ? - 15 : 0; + c->xmax = (x < s->mb_width * 16 - 16) ? 15 : 0; + c->ymax = (y < s->mb_height * 16 - 16) ? 15 : 0; } else { c->xmin = - x; c->ymin = - y; @@ -722,7 +734,6 @@ static inline int h263_mv4_search(MpegEncContext *s, int mx, int my, int shift) int dmin_sum=0, mx4_sum=0, my4_sum=0; int same=1; const int stride= c->stride; - const int uvstride= c->uvstride; uint8_t *mv_penalty= c->current_mv_penalty; init_mv4_ref(c); @@ -873,7 +884,6 @@ static int interlaced_search(MpegEncContext *s, int ref_index, uint8_t * const mv_penalty= c->current_mv_penalty; int same=1; const int stride= 2*s->linesize; - const int uvstride= 2*s->uvlinesize; int dmin_sum= 0; const int mot_stride= s->mb_stride; const int xy= s->mb_x + s->mb_y*mot_stride; @@ -983,6 +993,16 @@ static int interlaced_search(MpegEncContext *s, int ref_index, } } +static void clip_input_mv(MpegEncContext * s, int16_t *mv, int interlaced){ + int ymax= s->me.ymax>>interlaced; + int ymin= s->me.ymin>>interlaced; + + if(mv[0] < s->me.xmin) mv[0] = s->me.xmin; + if(mv[0] > s->me.xmax) mv[0] = s->me.xmax; + if(mv[1] < ymin) mv[1] = ymin; + if(mv[1] > ymax) mv[1] = ymax; +} + static inline int check_input_motion(MpegEncContext * s, int mb_x, int mb_y, int p_type){ MotionEstContext * const c= &s->me; Picture *p= s->current_picture_ptr; @@ -997,9 +1017,18 @@ static inline int check_input_motion(MpegEncContext * s, int mb_x, int mb_y, int me_cmp_func cmpf= s->dsp.sse[0]; me_cmp_func chroma_cmpf= s->dsp.sse[1]; - assert(p_type==0 || !USES_LIST(mb_type, 1)); + if(p_type && USES_LIST(mb_type, 1)){ + av_log(c->avctx, AV_LOG_ERROR, "backward motion vector in P frame\n"); + return INT_MAX/2; + } assert(IS_INTRA(mb_type) || USES_LIST(mb_type,0) || USES_LIST(mb_type,1)); + for(i=0; i<4; i++){ + int xy= s->block_index[i]; + clip_input_mv(s, p->motion_val[0][xy], !!IS_INTERLACED(mb_type)); + clip_input_mv(s, p->motion_val[1][xy], !!IS_INTERLACED(mb_type)); + } + if(IS_INTERLACED(mb_type)){ int xy2= xy + s->b8_stride; s->mb_type[mb_xy]=CANDIDATE_MB_TYPE_INTRA; @@ -1008,7 +1037,7 @@ static inline int check_input_motion(MpegEncContext * s, int mb_x, int mb_y, int if(!(s->flags & CODEC_FLAG_INTERLACED_ME)){ av_log(c->avctx, AV_LOG_ERROR, "Interlaced macroblock selected but interlaced motion estimation disabled\n"); - return -1; + return INT_MAX/2; } if(USES_LIST(mb_type, 0)){ @@ -1069,7 +1098,7 @@ static inline int check_input_motion(MpegEncContext * s, int mb_x, int mb_y, int }else if(IS_8X8(mb_type)){ if(!(s->flags & CODEC_FLAG_4MV)){ av_log(c->avctx, AV_LOG_ERROR, "4MV macroblock selected but 4MV encoding disabled\n"); - return -1; + return INT_MAX/2; } cmpf= s->dsp.sse[1]; chroma_cmpf= s->dsp.sse[1]; @@ -1127,9 +1156,9 @@ void ff_estimate_p_frame_motion(MpegEncContext * s, assert(s->linesize == c->stride); assert(s->uvlinesize == c->uvstride); - c->penalty_factor = get_penalty_factor(s, c->avctx->me_cmp); - c->sub_penalty_factor= get_penalty_factor(s, c->avctx->me_sub_cmp); - c->mb_penalty_factor = get_penalty_factor(s, c->avctx->mb_cmp); + c->penalty_factor = get_penalty_factor(s->lambda, s->lambda2, c->avctx->me_cmp); + c->sub_penalty_factor= get_penalty_factor(s->lambda, s->lambda2, c->avctx->me_sub_cmp); + c->mb_penalty_factor = get_penalty_factor(s->lambda, s->lambda2, c->avctx->mb_cmp); c->current_mv_penalty= c->mv_penalty[s->f_code] + MAX_MV; get_limits(s, 16*mb_x, 16*mb_y); @@ -1222,7 +1251,7 @@ void ff_estimate_p_frame_motion(MpegEncContext * s, } } - dmin = epzs_motion_search(s, &mx, &my, P, 0, 0, s->p_mv_table, (1<<16)>>shift); + dmin = ff_epzs_motion_search(s, &mx, &my, P, 0, 0, s->p_mv_table, (1<<16)>>shift, 0, 16); break; } @@ -1298,7 +1327,7 @@ void ff_estimate_p_frame_motion(MpegEncContext * s, dmin= c->sub_motion_search(s, &mx, &my, dmin, 0, 0, 0, 16); if(c->avctx->me_sub_cmp != c->avctx->mb_cmp && !c->skip) - dmin= get_mb_score(s, mx, my, 0, 0); + dmin= ff_get_mb_score(s, mx, my, 0, 0, 0, 16, 1); if((s->flags&CODEC_FLAG_4MV) && !c->skip && varc>50 && vard>10){ @@ -1390,7 +1419,7 @@ int ff_pre_estimate_p_frame_motion(MpegEncContext * s, assert(s->quarter_sample==0 || s->quarter_sample==1); - c->pre_penalty_factor = get_penalty_factor(s, c->avctx->me_pre_cmp); + c->pre_penalty_factor = get_penalty_factor(s->lambda, s->lambda2, c->avctx->me_pre_cmp); c->current_mv_penalty= c->mv_penalty[s->f_code] + MAX_MV; get_limits(s, 16*mb_x, 16*mb_y); @@ -1423,7 +1452,7 @@ int ff_pre_estimate_p_frame_motion(MpegEncContext * s, c->pred_y = P_MEDIAN[1]; } - dmin = epzs_motion_search(s, &mx, &my, P, 0, 0, s->p_mv_table, (1<<16)>>shift); + dmin = ff_epzs_motion_search(s, &mx, &my, P, 0, 0, s->p_mv_table, (1<<16)>>shift, 0, 16); s->p_mv_table[xy][0] = mx<<shift; s->p_mv_table[xy][1] = my<<shift; @@ -1443,9 +1472,9 @@ static int ff_estimate_motion_b(MpegEncContext * s, uint8_t * const mv_penalty= c->mv_penalty[f_code] + MAX_MV; int mv_scale; - c->penalty_factor = get_penalty_factor(s, c->avctx->me_cmp); - c->sub_penalty_factor= get_penalty_factor(s, c->avctx->me_sub_cmp); - c->mb_penalty_factor = get_penalty_factor(s, c->avctx->mb_cmp); + c->penalty_factor = get_penalty_factor(s->lambda, s->lambda2, c->avctx->me_cmp); + c->sub_penalty_factor= get_penalty_factor(s->lambda, s->lambda2, c->avctx->me_sub_cmp); + c->mb_penalty_factor = get_penalty_factor(s->lambda, s->lambda2, c->avctx->mb_cmp); c->current_mv_penalty= mv_penalty; get_limits(s, 16*mb_x, 16*mb_y); @@ -1506,7 +1535,7 @@ static int ff_estimate_motion_b(MpegEncContext * s, mv_scale= ((s->pb_time - s->pp_time)<<16) / (s->pp_time<<shift); } - dmin = epzs_motion_search(s, &mx, &my, P, 0, ref_index, s->p_mv_table, mv_scale); + dmin = ff_epzs_motion_search(s, &mx, &my, P, 0, ref_index, s->p_mv_table, mv_scale, 0, 16); break; } @@ -1514,7 +1543,7 @@ static int ff_estimate_motion_b(MpegEncContext * s, dmin= c->sub_motion_search(s, &mx, &my, dmin, 0, ref_index, 0, 16); if(c->avctx->me_sub_cmp != c->avctx->mb_cmp && !c->skip) - dmin= get_mb_score(s, mx, my, 0, ref_index); + dmin= ff_get_mb_score(s, mx, my, 0, ref_index, 0, 16, 1); //printf("%d %d %d %d//", s->mb_x, s->mb_y, mx, my); // s->mb_type[mb_y*s->mb_width + mb_x]= mb_type; @@ -1697,14 +1726,14 @@ static inline int direct_search(MpegEncContext * s, int mb_x, int mb_y) P_MEDIAN[1]= mid_pred(P_LEFT[1], P_TOP[1], P_TOPRIGHT[1]); } - dmin = epzs_motion_search(s, &mx, &my, P, 0, 0, mv_table, 1<<(16-shift)); + dmin = ff_epzs_motion_search(s, &mx, &my, P, 0, 0, mv_table, 1<<(16-shift), 0, 16); if(c->sub_flags&FLAG_QPEL) dmin = qpel_motion_search(s, &mx, &my, dmin, 0, 0, 0, 16); else dmin = hpel_motion_search(s, &mx, &my, dmin, 0, 0, 0, 16); if(c->avctx->me_sub_cmp != c->avctx->mb_cmp && !c->skip) - dmin= get_mb_score(s, mx, my, 0, 0); + dmin= ff_get_mb_score(s, mx, my, 0, 0, 0, 16, 1); get_limits(s, 16*mb_x, 16*mb_y); //restore c->?min/max, maybe not needed @@ -1726,6 +1755,7 @@ void ff_estimate_b_frame_motion(MpegEncContext * s, const int xy = mb_y*s->mb_stride + mb_x; init_ref(c, s->new_picture.data, s->last_picture.data, s->next_picture.data, 16*mb_x, 16*mb_y, 2); + get_limits(s, 16*mb_x, 16*mb_y); c->skip=0; if(c->avctx->me_threshold){ @@ -1864,7 +1894,7 @@ int ff_get_best_fcode(MpegEncContext * s, int16_t (*mv_table)[2], int type) { if(s->me_method>=ME_EPZS){ int score[8]; - int i, y; + int i, y, range= s->avctx->me_range; uint8_t * fcode_tab= s->fcode_tab; int best_fcode=-1; int best_score=-10000000; @@ -1876,10 +1906,18 @@ int ff_get_best_fcode(MpegEncContext * s, int16_t (*mv_table)[2], int type) int xy= y*s->mb_stride; for(x=0; x<s->mb_width; x++){ if(s->mb_type[xy] & type){ - int fcode= FFMAX(fcode_tab[mv_table[xy][0] + MAX_MV], - fcode_tab[mv_table[xy][1] + MAX_MV]); + int mx= mv_table[xy][0]; + int my= mv_table[xy][1]; + int fcode= FFMAX(fcode_tab[mx + MAX_MV], + fcode_tab[my + MAX_MV]); int j; + if(range){ + if(mx >= range || mx < -range || + my >= range || my < -range) + continue; + } + for(j=0; j<fcode && j<8; j++){ if(s->pict_type==B_TYPE || s->current_picture.mc_mb_var[xy] < s->current_picture.mb_var[xy]) score[j]-= 170; diff --git a/src/libffmpeg/libavcodec/motion_est_template.c b/src/libffmpeg/libavcodec/motion_est_template.c index 8cfb24955..d8101ec33 100644 --- a/src/libffmpeg/libavcodec/motion_est_template.c +++ b/src/libffmpeg/libavcodec/motion_est_template.c @@ -25,11 +25,11 @@ //lets hope gcc will remove the unused vars ...(gcc 3.2.2 seems to do it ...) #define LOAD_COMMON\ - uint32_t * const score_map= c->score_map;\ - const int xmin= c->xmin;\ - const int ymin= c->ymin;\ - const int xmax= c->xmax;\ - const int ymax= c->ymax;\ + uint32_t attribute_unused * const score_map= c->score_map;\ + const int attribute_unused xmin= c->xmin;\ + const int attribute_unused ymin= c->ymin;\ + const int attribute_unused xmax= c->xmax;\ + const int attribute_unused ymax= c->ymax;\ uint8_t *mv_penalty= c->current_mv_penalty;\ const int pred_x= c->pred_x;\ const int pred_y= c->pred_y;\ @@ -221,13 +221,21 @@ static int hpel_motion_search(MpegEncContext * s, } #endif -static int inline get_mb_score(MpegEncContext * s, int mx, int my, int src_index, - int ref_index) +static int no_sub_motion_search(MpegEncContext * s, + int *mx_ptr, int *my_ptr, int dmin, + int src_index, int ref_index, + int size, int h) +{ + (*mx_ptr)<<=1; + (*my_ptr)<<=1; + return dmin; +} + +int inline ff_get_mb_score(MpegEncContext * s, int mx, int my, int src_index, + int ref_index, int size, int h, int add_rate) { // const int check_luma= s->dsp.me_sub_cmp != s->dsp.mb_cmp; MotionEstContext * const c= &s->me; - const int size= 0; - const int h= 16; const int penalty_factor= c->mb_penalty_factor; const int flags= c->mb_flags; const int qpel= flags & FLAG_QPEL; @@ -242,12 +250,12 @@ static int inline get_mb_score(MpegEncContext * s, int mx, int my, int src_index cmp_sub= s->dsp.mb_cmp[size]; chroma_cmp_sub= s->dsp.mb_cmp[size+1]; - assert(!c->skip); - assert(c->avctx->me_sub_cmp != c->avctx->mb_cmp); +// assert(!c->skip); +// assert(c->avctx->me_sub_cmp != c->avctx->mb_cmp); d= cmp(s, mx>>(qpel+1), my>>(qpel+1), mx&mask, my&mask, size, h, ref_index, src_index, cmp_sub, chroma_cmp_sub, flags); //FIXME check cbp before adding penalty for (0,0) vector - if(mx || my || size>0) + if(add_rate && (mx || my || size>0)) d += (mv_penalty[mx - pred_x] + mv_penalty[my - pred_y])*penalty_factor; return d; @@ -323,15 +331,16 @@ static int qpel_motion_search(MpegEncContext * s, for(ny= -3; ny <= 3; ny++){ for(nx= -3; nx <= 3; nx++){ - const int t2= nx*nx*(tr + tl - 2*t) + 4*nx*(tr-tl) + 32*t; - const int c2= nx*nx*( r + l - 2*c) + 4*nx*( r- l) + 32*c; - const int b2= nx*nx*(br + bl - 2*b) + 4*nx*(br-bl) + 32*b; - int score= ny*ny*(b2 + t2 - 2*c2) + 4*ny*(b2 - t2) + 32*c2; + //FIXME this could overflow (unlikely though) + const int64_t t2= nx*nx*(tr + tl - 2*t) + 4*nx*(tr-tl) + 32*t; + const int64_t c2= nx*nx*( r + l - 2*c) + 4*nx*( r- l) + 32*c; + const int64_t b2= nx*nx*(br + bl - 2*b) + 4*nx*(br-bl) + 32*b; + int score= (ny*ny*(b2 + t2 - 2*c2) + 4*ny*(b2 - t2) + 32*c2 + 512)>>10; int i; if((nx&3)==0 && (ny&3)==0) continue; - score += 1024*(mv_penalty[4*mx + nx - pred_x] + mv_penalty[4*my + ny - pred_y])*penalty_factor; + score += (mv_penalty[4*mx + nx - pred_x] + mv_penalty[4*my + ny - pred_y])*penalty_factor; // if(nx&1) score-=1024*c->penalty_factor; // if(ny&1) score-=1024*c->penalty_factor; @@ -350,6 +359,7 @@ static int qpel_motion_search(MpegEncContext * s, } }else{ int tl; + //FIXME this could overflow (unlikely though) const int cx = 4*(r - l); const int cx2= r + l - 2*c; const int cy = 4*(b - t); @@ -372,6 +382,7 @@ static int qpel_motion_search(MpegEncContext * s, for(ny= -3; ny <= 3; ny++){ for(nx= -3; nx <= 3; nx++){ + //FIXME this could overflow (unlikely though) int score= ny*nx*cxy + nx*nx*cx2 + ny*ny*cy2 + nx*cx + ny*cy + 32*c; //FIXME factor int i; @@ -487,6 +498,10 @@ static int qpel_motion_search(MpegEncContext * s, {\ const int key= ((y)<<ME_MAP_MV_BITS) + (x) + map_generation;\ const int index= (((y)<<ME_MAP_SHIFT) + (x))&(ME_MAP_SIZE-1);\ + assert((x) >= xmin);\ + assert((x) <= xmax);\ + assert((y) >= ymin);\ + assert((y) <= ymax);\ /*printf("check_mv %d %d\n", x, y);*/\ if(map[index]!=key){\ d= cmp(s, x, y, 0, 0, size, h, ref_index, src_index, cmpf, chroma_cmpf, flags);\ @@ -670,7 +685,7 @@ if(256*256*256*64 % (stats[0]+1)==0){ }\ } -#define MAX_SAB_SIZE 16 +#define MAX_SAB_SIZE ME_MAP_SIZE static int sab_diamond_search(MpegEncContext * s, int *best, int dmin, int src_index, int ref_index, int const penalty_factor, int size, int h, int flags) @@ -844,15 +859,13 @@ static always_inline int diamond_search(MpegEncContext * s, int *best, int dmin, static always_inline int epzs_motion_search_internal(MpegEncContext * s, int *mx_ptr, int *my_ptr, int P[10][2], int src_index, int ref_index, int16_t (*last_mv)[2], - int ref_mv_scale, int flags) + int ref_mv_scale, int flags, int size, int h) { MotionEstContext * const c= &s->me; int best[2]={0, 0}; int d, dmin; int map_generation; - const int penalty_factor= c->penalty_factor; - const int size=0; - const int h=16; + int penalty_factor; const int ref_mv_stride= s->mb_stride; //pass as arg FIXME const int ref_mv_xy= s->mb_x + s->mb_y*ref_mv_stride; //add to last_mv beforepassing FIXME me_cmp_func cmpf, chroma_cmpf; @@ -860,11 +873,19 @@ static always_inline int epzs_motion_search_internal(MpegEncContext * s, int *mx LOAD_COMMON LOAD_COMMON2 - cmpf= s->dsp.me_cmp[size]; - chroma_cmpf= s->dsp.me_cmp[size+1]; + if(c->pre_pass){ + penalty_factor= c->pre_penalty_factor; + cmpf= s->dsp.me_pre_cmp[size]; + chroma_cmpf= s->dsp.me_pre_cmp[size+1]; + }else{ + penalty_factor= c->penalty_factor; + cmpf= s->dsp.me_cmp[size]; + chroma_cmpf= s->dsp.me_cmp[size+1]; + } map_generation= update_map_generation(c); + assert(cmpf); dmin= cmp(s, 0, 0, 0, 0, size, h, ref_index, src_index, cmpf, chroma_cmpf, flags); map[0]= map_generation; score_map[0]= dmin; @@ -875,7 +896,7 @@ static always_inline int epzs_motion_search_internal(MpegEncContext * s, int *mx CHECK_CLIPED_MV((last_mv[ref_mv_xy][0]*ref_mv_scale + (1<<15))>>16, (last_mv[ref_mv_xy][1]*ref_mv_scale + (1<<15))>>16) }else{ - if(dmin<256 && ( P_LEFT[0] |P_LEFT[1] + if(dmin<h*h && ( P_LEFT[0] |P_LEFT[1] |P_TOP[0] |P_TOP[1] |P_TOPRIGHT[0]|P_TOPRIGHT[1])==0){ *mx_ptr= 0; @@ -884,7 +905,7 @@ static always_inline int epzs_motion_search_internal(MpegEncContext * s, int *mx return dmin; } CHECK_MV(P_MEDIAN[0]>>shift, P_MEDIAN[1]>>shift) - if(dmin>256*2){ + if(dmin>h*h*2){ CHECK_CLIPED_MV((last_mv[ref_mv_xy][0]*ref_mv_scale + (1<<15))>>16, (last_mv[ref_mv_xy][1]*ref_mv_scale + (1<<15))>>16) CHECK_MV(P_LEFT[0] >>shift, P_LEFT[1] >>shift) @@ -892,7 +913,7 @@ static always_inline int epzs_motion_search_internal(MpegEncContext * s, int *mx CHECK_MV(P_TOPRIGHT[0]>>shift, P_TOPRIGHT[1]>>shift) } } - if(dmin>256*4){ + if(dmin>h*h*4){ if(c->pre_pass){ CHECK_CLIPED_MV((last_mv[ref_mv_xy-1][0]*ref_mv_scale + (1<<15))>>16, (last_mv[ref_mv_xy-1][1]*ref_mv_scale + (1<<15))>>16) @@ -941,19 +962,18 @@ static always_inline int epzs_motion_search_internal(MpegEncContext * s, int *mx } //this function is dedicated to the braindamaged gcc -static inline int epzs_motion_search(MpegEncContext * s, int *mx_ptr, int *my_ptr, +inline int ff_epzs_motion_search(MpegEncContext * s, int *mx_ptr, int *my_ptr, int P[10][2], int src_index, int ref_index, int16_t (*last_mv)[2], - int ref_mv_scale) + int ref_mv_scale, int size, int h) { MotionEstContext * const c= &s->me; //FIXME convert other functions in the same way if faster - switch(c->flags){ - case 0: - return epzs_motion_search_internal(s, mx_ptr, my_ptr, P, src_index, ref_index, last_mv, ref_mv_scale, 0); + if(c->flags==0 && h==16 && size==0){ + return epzs_motion_search_internal(s, mx_ptr, my_ptr, P, src_index, ref_index, last_mv, ref_mv_scale, 0, 0, 16); // case FLAG_QPEL: // return epzs_motion_search_internal(s, mx_ptr, my_ptr, P, src_index, ref_index, last_mv, ref_mv_scale, FLAG_QPEL); - default: - return epzs_motion_search_internal(s, mx_ptr, my_ptr, P, src_index, ref_index, last_mv, ref_mv_scale, c->flags); + }else{ + return epzs_motion_search_internal(s, mx_ptr, my_ptr, P, src_index, ref_index, last_mv, ref_mv_scale, c->flags, size, h); } } diff --git a/src/libffmpeg/libavcodec/mpeg12.c b/src/libffmpeg/libavcodec/mpeg12.c index 872c39c0c..40ef61503 100644 --- a/src/libffmpeg/libavcodec/mpeg12.c +++ b/src/libffmpeg/libavcodec/mpeg12.c @@ -65,6 +65,20 @@ static void mpeg1_encode_block(MpegEncContext *s, int component); static void mpeg1_encode_motion(MpegEncContext *s, int val, int f_or_b_code); // RAL: f_code parameter added #endif //CONFIG_ENCODERS +static inline int mpeg1_decode_block_inter(MpegEncContext *s, + DCTELEM *block, + int n); +static inline int mpeg1_decode_block_intra(MpegEncContext *s, + DCTELEM *block, + int n); +static inline int mpeg1_fast_decode_block_inter(MpegEncContext *s, DCTELEM *block, int n); +static inline int mpeg2_decode_block_non_intra(MpegEncContext *s, + DCTELEM *block, + int n); +static inline int mpeg2_decode_block_intra(MpegEncContext *s, + DCTELEM *block, + int n); +static inline int mpeg2_fast_decode_block_non_intra(MpegEncContext *s, DCTELEM *block, int n); static int mpeg_decode_motion(MpegEncContext *s, int fcode, int pred); static void exchange_uv(MpegEncContext *s); @@ -97,16 +111,19 @@ static uint8_t mpeg1_index_run[2][64]; static int8_t mpeg1_max_level[2][64]; #endif //CONFIG_ENCODERS -static void init_2d_vlc_rl(RLTable *rl) +static void init_2d_vlc_rl(RLTable *rl, int use_static) { int i; init_vlc(&rl->vlc, TEX_VLC_BITS, rl->n + 2, &rl->table_vlc[0][1], 4, 2, - &rl->table_vlc[0][0], 4, 2); + &rl->table_vlc[0][0], 4, 2, use_static); + + if(use_static) + rl->rl_vlc[0]= av_mallocz_static(rl->vlc.table_size*sizeof(RL_VLC_ELEM)); + else + rl->rl_vlc[0]= av_malloc(rl->vlc.table_size*sizeof(RL_VLC_ELEM)); - - rl->rl_vlc[0]= av_malloc(rl->vlc.table_size*sizeof(RL_VLC_ELEM)); for(i=0; i<rl->vlc.table_size; i++){ int code= rl->vlc.table[i][0]; int len = rl->vlc.table[i][1]; @@ -296,6 +313,7 @@ static void mpeg1_encode_sequence_header(MpegEncContext *s) s->mb_width * s->mb_height <= 396 && s->mb_width * s->mb_height * framerate.num <= framerate.den*396*25 && framerate.num <= framerate.den*30 && + s->avctx->me_range && s->avctx->me_range < 128 && vbv_buffer_size <= 20 && v <= 1856000/400 && s->codec_id == CODEC_ID_MPEG1VIDEO; @@ -309,8 +327,19 @@ static void mpeg1_encode_sequence_header(MpegEncContext *s) put_header(s, EXT_START_CODE); put_bits(&s->pb, 4, 1); //seq ext put_bits(&s->pb, 1, 0); //esc - put_bits(&s->pb, 3, 4); //profile - put_bits(&s->pb, 4, 8); //level + + if(s->avctx->profile == FF_PROFILE_UNKNOWN){ + put_bits(&s->pb, 3, 4); //profile + }else{ + put_bits(&s->pb, 3, s->avctx->profile); //profile + } + + if(s->avctx->level == FF_LEVEL_UNKNOWN){ + put_bits(&s->pb, 4, 8); //level + }else{ + put_bits(&s->pb, 4, s->avctx->level); //level + } + put_bits(&s->pb, 1, s->progressive_sequence); put_bits(&s->pb, 2, 1); //chroma format 4:2:0 put_bits(&s->pb, 2, 0); //horizontal size ext @@ -691,7 +720,7 @@ void mpeg1_encode_mb(MpegEncContext *s, // RAL: Parameter added: f_or_b_code static void mpeg1_encode_motion(MpegEncContext *s, int val, int f_or_b_code) { - int code, bit_size, l, m, bits, range, sign; + int code, bit_size, l, bits, range, sign; if (val == 0) { /* zero vector */ @@ -703,13 +732,8 @@ static void mpeg1_encode_motion(MpegEncContext *s, int val, int f_or_b_code) bit_size = f_or_b_code - 1; range = 1 << bit_size; /* modulo encoding */ - l = 16 * range; - m = 2 * l; - if (val < -l) { - val += m; - } else if (val >= l) { - val -= m; - } + l= INT_BIT - 5 - bit_size; + val= (val<<l)>>l; if (val >= 0) { val--; @@ -749,7 +773,7 @@ void ff_mpeg1_encode_init(MpegEncContext *s) int i; done=1; - init_rl(&rl_mpeg1); + init_rl(&rl_mpeg1, 1); for(i=0; i<64; i++) { @@ -977,31 +1001,31 @@ static void init_vlcs(void) init_vlc(&dc_lum_vlc, DC_VLC_BITS, 12, vlc_dc_lum_bits, 1, 1, - vlc_dc_lum_code, 2, 2); + vlc_dc_lum_code, 2, 2, 1); init_vlc(&dc_chroma_vlc, DC_VLC_BITS, 12, vlc_dc_chroma_bits, 1, 1, - vlc_dc_chroma_code, 2, 2); + vlc_dc_chroma_code, 2, 2, 1); init_vlc(&mv_vlc, MV_VLC_BITS, 17, &mbMotionVectorTable[0][1], 2, 1, - &mbMotionVectorTable[0][0], 2, 1); + &mbMotionVectorTable[0][0], 2, 1, 1); init_vlc(&mbincr_vlc, MBINCR_VLC_BITS, 36, &mbAddrIncrTable[0][1], 2, 1, - &mbAddrIncrTable[0][0], 2, 1); + &mbAddrIncrTable[0][0], 2, 1, 1); init_vlc(&mb_pat_vlc, MB_PAT_VLC_BITS, 64, &mbPatTable[0][1], 2, 1, - &mbPatTable[0][0], 2, 1); + &mbPatTable[0][0], 2, 1, 1); init_vlc(&mb_ptype_vlc, MB_PTYPE_VLC_BITS, 7, &table_mb_ptype[0][1], 2, 1, - &table_mb_ptype[0][0], 2, 1); + &table_mb_ptype[0][0], 2, 1, 1); init_vlc(&mb_btype_vlc, MB_BTYPE_VLC_BITS, 11, &table_mb_btype[0][1], 2, 1, - &table_mb_btype[0][0], 2, 1); - init_rl(&rl_mpeg1); - init_rl(&rl_mpeg2); + &table_mb_btype[0][0], 2, 1, 1); + init_rl(&rl_mpeg1, 1); + init_rl(&rl_mpeg2, 1); - init_2d_vlc_rl(&rl_mpeg1); - init_2d_vlc_rl(&rl_mpeg2); + init_2d_vlc_rl(&rl_mpeg1, 1); + init_2d_vlc_rl(&rl_mpeg2, 1); } } @@ -1023,337 +1047,6 @@ static inline int get_qscale(MpegEncContext *s) } } -static inline int decode_dc(GetBitContext *gb, int component) -{ - int code, diff; - - if (component == 0) { - code = get_vlc2(gb, dc_lum_vlc.table, DC_VLC_BITS, 2); - } else { - code = get_vlc2(gb, dc_chroma_vlc.table, DC_VLC_BITS, 2); - } - if (code < 0){ - av_log(NULL, AV_LOG_ERROR, "invalid dc code at\n"); - return 0xffff; - } - if (code == 0) { - diff = 0; - } else { - diff = get_xbits(gb, code); - } - return diff; -} - -static inline int mpeg1_decode_block_intra(MpegEncContext *s, - DCTELEM *block, - int n) -{ - int level, dc, diff, i, j, run; - int component; - RLTable *rl = &rl_mpeg1; - uint8_t * const scantable= s->intra_scantable.permutated; - const uint16_t *quant_matrix= s->intra_matrix; - const int qscale= s->qscale; - - /* DC coef */ - component = (n <= 3 ? 0 : n - 4 + 1); - diff = decode_dc(&s->gb, component); - if (diff >= 0xffff) - return -1; - dc = s->last_dc[component]; - dc += diff; - s->last_dc[component] = dc; - block[0] = dc<<3; - dprintf("dc=%d diff=%d\n", dc, diff); - i = 0; - { - OPEN_READER(re, &s->gb); - /* now quantify & encode AC coefs */ - for(;;) { - UPDATE_CACHE(re, &s->gb); - GET_RL_VLC(level, run, re, &s->gb, rl->rl_vlc[0], TEX_VLC_BITS, 2); - - if(level == 127){ - break; - } else if(level != 0) { - i += run; - j = scantable[i]; - level= (level*qscale*quant_matrix[j])>>4; - level= (level-1)|1; - level = (level ^ SHOW_SBITS(re, &s->gb, 1)) - SHOW_SBITS(re, &s->gb, 1); - LAST_SKIP_BITS(re, &s->gb, 1); - } else { - /* escape */ - run = SHOW_UBITS(re, &s->gb, 6)+1; LAST_SKIP_BITS(re, &s->gb, 6); - UPDATE_CACHE(re, &s->gb); - level = SHOW_SBITS(re, &s->gb, 8); SKIP_BITS(re, &s->gb, 8); - if (level == -128) { - level = SHOW_UBITS(re, &s->gb, 8) - 256; LAST_SKIP_BITS(re, &s->gb, 8); - } else if (level == 0) { - level = SHOW_UBITS(re, &s->gb, 8) ; LAST_SKIP_BITS(re, &s->gb, 8); - } - i += run; - j = scantable[i]; - if(level<0){ - level= -level; - level= (level*qscale*quant_matrix[j])>>4; - level= (level-1)|1; - level= -level; - }else{ - level= (level*qscale*quant_matrix[j])>>4; - level= (level-1)|1; - } - } - if (i > 63){ - av_log(s->avctx, AV_LOG_ERROR, "ac-tex damaged at %d %d\n", s->mb_x, s->mb_y); - return -1; - } - - block[j] = level; - } - CLOSE_READER(re, &s->gb); - } - s->block_last_index[n] = i; - return 0; -} - -static inline int mpeg1_decode_block_inter(MpegEncContext *s, - DCTELEM *block, - int n) -{ - int level, i, j, run; - RLTable *rl = &rl_mpeg1; - uint8_t * const scantable= s->intra_scantable.permutated; - const uint16_t *quant_matrix= s->inter_matrix; - const int qscale= s->qscale; - - { - int v; - OPEN_READER(re, &s->gb); - i = -1; - /* special case for the first coef. no need to add a second vlc table */ - UPDATE_CACHE(re, &s->gb); - v= SHOW_UBITS(re, &s->gb, 2); - if (v & 2) { - LAST_SKIP_BITS(re, &s->gb, 2); - level= (3*qscale*quant_matrix[0])>>5; - level= (level-1)|1; - if(v&1) - level= -level; - block[0] = level; - i++; - } - - /* now quantify & encode AC coefs */ - for(;;) { - UPDATE_CACHE(re, &s->gb); - GET_RL_VLC(level, run, re, &s->gb, rl->rl_vlc[0], TEX_VLC_BITS, 2); - - if(level == 127){ - break; - } else if(level != 0) { - i += run; - j = scantable[i]; - level= ((level*2+1)*qscale*quant_matrix[j])>>5; - level= (level-1)|1; - level = (level ^ SHOW_SBITS(re, &s->gb, 1)) - SHOW_SBITS(re, &s->gb, 1); - LAST_SKIP_BITS(re, &s->gb, 1); - } else { - /* escape */ - run = SHOW_UBITS(re, &s->gb, 6)+1; LAST_SKIP_BITS(re, &s->gb, 6); - UPDATE_CACHE(re, &s->gb); - level = SHOW_SBITS(re, &s->gb, 8); SKIP_BITS(re, &s->gb, 8); - if (level == -128) { - level = SHOW_UBITS(re, &s->gb, 8) - 256; LAST_SKIP_BITS(re, &s->gb, 8); - } else if (level == 0) { - level = SHOW_UBITS(re, &s->gb, 8) ; LAST_SKIP_BITS(re, &s->gb, 8); - } - i += run; - j = scantable[i]; - if(level<0){ - level= -level; - level= ((level*2+1)*qscale*quant_matrix[j])>>5; - level= (level-1)|1; - level= -level; - }else{ - level= ((level*2+1)*qscale*quant_matrix[j])>>5; - level= (level-1)|1; - } - } - if (i > 63){ - av_log(s->avctx, AV_LOG_ERROR, "ac-tex damaged at %d %d\n", s->mb_x, s->mb_y); - return -1; - } - - block[j] = level; - } - CLOSE_READER(re, &s->gb); - } - s->block_last_index[n] = i; - return 0; -} - -/* Also does unquantization here, since I will never support mpeg2 - encoding */ -static inline int mpeg2_decode_block_non_intra(MpegEncContext *s, - DCTELEM *block, - int n) -{ - int level, i, j, run; - RLTable *rl = &rl_mpeg1; - uint8_t * const scantable= s->intra_scantable.permutated; - const uint16_t *quant_matrix; - const int qscale= s->qscale; - int mismatch; - - mismatch = 1; - - { - int v; - OPEN_READER(re, &s->gb); - i = -1; - if (n < 4) - quant_matrix = s->inter_matrix; - else - quant_matrix = s->chroma_inter_matrix; - - /* special case for the first coef. no need to add a second vlc table */ - UPDATE_CACHE(re, &s->gb); - v= SHOW_UBITS(re, &s->gb, 2); - if (v & 2) { - LAST_SKIP_BITS(re, &s->gb, 2); - level= (3*qscale*quant_matrix[0])>>5; - if(v&1) - level= -level; - block[0] = level; - mismatch ^= level; - i++; - } - - /* now quantify & encode AC coefs */ - for(;;) { - UPDATE_CACHE(re, &s->gb); - GET_RL_VLC(level, run, re, &s->gb, rl->rl_vlc[0], TEX_VLC_BITS, 2); - - if(level == 127){ - break; - } else if(level != 0) { - i += run; - j = scantable[i]; - level= ((level*2+1)*qscale*quant_matrix[j])>>5; - level = (level ^ SHOW_SBITS(re, &s->gb, 1)) - SHOW_SBITS(re, &s->gb, 1); - LAST_SKIP_BITS(re, &s->gb, 1); - } else { - /* escape */ - run = SHOW_UBITS(re, &s->gb, 6)+1; LAST_SKIP_BITS(re, &s->gb, 6); - UPDATE_CACHE(re, &s->gb); - level = SHOW_SBITS(re, &s->gb, 12); SKIP_BITS(re, &s->gb, 12); - - i += run; - j = scantable[i]; - if(level<0){ - level= ((-level*2+1)*qscale*quant_matrix[j])>>5; - level= -level; - }else{ - level= ((level*2+1)*qscale*quant_matrix[j])>>5; - } - } - if (i > 63){ - av_log(s->avctx, AV_LOG_ERROR, "ac-tex damaged at %d %d\n", s->mb_x, s->mb_y); - return -1; - } - - mismatch ^= level; - block[j] = level; - } - CLOSE_READER(re, &s->gb); - } - block[63] ^= (mismatch & 1); - - s->block_last_index[n] = i; - return 0; -} - -static inline int mpeg2_decode_block_intra(MpegEncContext *s, - DCTELEM *block, - int n) -{ - int level, dc, diff, i, j, run; - int component; - RLTable *rl; - uint8_t * const scantable= s->intra_scantable.permutated; - const uint16_t *quant_matrix; - const int qscale= s->qscale; - int mismatch; - - /* DC coef */ - if (n < 4){ - quant_matrix = s->intra_matrix; - component = 0; - }else{ - quant_matrix = s->chroma_intra_matrix; - component = (n&1) + 1; - } - diff = decode_dc(&s->gb, component); - if (diff >= 0xffff) - return -1; - dc = s->last_dc[component]; - dc += diff; - s->last_dc[component] = dc; - block[0] = dc << (3 - s->intra_dc_precision); - dprintf("dc=%d\n", block[0]); - mismatch = block[0] ^ 1; - i = 0; - if (s->intra_vlc_format) - rl = &rl_mpeg2; - else - rl = &rl_mpeg1; - - { - OPEN_READER(re, &s->gb); - /* now quantify & encode AC coefs */ - for(;;) { - UPDATE_CACHE(re, &s->gb); - GET_RL_VLC(level, run, re, &s->gb, rl->rl_vlc[0], TEX_VLC_BITS, 2); - - if(level == 127){ - break; - } else if(level != 0) { - i += run; - j = scantable[i]; - level= (level*qscale*quant_matrix[j])>>4; - level = (level ^ SHOW_SBITS(re, &s->gb, 1)) - SHOW_SBITS(re, &s->gb, 1); - LAST_SKIP_BITS(re, &s->gb, 1); - } else { - /* escape */ - run = SHOW_UBITS(re, &s->gb, 6)+1; LAST_SKIP_BITS(re, &s->gb, 6); - UPDATE_CACHE(re, &s->gb); - level = SHOW_SBITS(re, &s->gb, 12); SKIP_BITS(re, &s->gb, 12); - i += run; - j = scantable[i]; - if(level<0){ - level= (-level*qscale*quant_matrix[j])>>4; - level= -level; - }else{ - level= (level*qscale*quant_matrix[j])>>4; - } - } - if (i > 63){ - av_log(s->avctx, AV_LOG_ERROR, "ac-tex damaged at %d %d\n", s->mb_x, s->mb_y); - return -1; - } - - mismatch^= level; - block[j] = level; - } - CLOSE_READER(re, &s->gb); - } - block[63]^= mismatch&1; - - s->block_last_index[n] = i; - return 0; -} - /* motion type (for mpeg2) */ #define MT_FIELD 1 #define MT_FRAME 2 @@ -1364,7 +1057,8 @@ static int mpeg_decode_mb(MpegEncContext *s, DCTELEM block[12][64]) { int i, j, k, cbp, val, mb_type, motion_type; - + const int mb_block_count = 4 + (1<< s->chroma_format); + dprintf("decode_mb: x=%d y=%d\n", s->mb_x, s->mb_y); assert(s->mb_skiped==0); @@ -1393,6 +1087,15 @@ static int mpeg_decode_mb(MpegEncContext *s, s->mb_skiped = 1; s->current_picture.mb_type[ s->mb_x + s->mb_y*s->mb_stride ]= MB_TYPE_SKIP | MB_TYPE_L0 | MB_TYPE_16x16; } else { + int mb_type; + + if(s->mb_x) + mb_type= s->current_picture.mb_type[ s->mb_x + s->mb_y*s->mb_stride - 1]; + else + mb_type= s->current_picture.mb_type[ s->mb_width + (s->mb_y-1)*s->mb_stride - 1]; // FIXME not sure if this is allowed in mpeg at all, + if(IS_INTRA(mb_type)) + return -1; + /* if B type, reuse previous vectors and directions */ s->mv[0][0][0] = s->last_mv[0][0][0]; s->mv[0][0][1] = s->last_mv[0][0][1]; @@ -1400,7 +1103,7 @@ static int mpeg_decode_mb(MpegEncContext *s, s->mv[1][0][1] = s->last_mv[1][0][1]; s->current_picture.mb_type[ s->mb_x + s->mb_y*s->mb_stride ]= - s->current_picture.mb_type[ s->mb_x + s->mb_y*s->mb_stride - 1] | MB_TYPE_SKIP; + mb_type | MB_TYPE_SKIP; // assert(s->current_picture.mb_type[ s->mb_x + s->mb_y*s->mb_stride - 1]&(MB_TYPE_16x16|MB_TYPE_16x8)); if((s->mv[0][0][0]|s->mv[0][0][1]|s->mv[1][0][0]|s->mv[1][0][1])==0) @@ -1477,7 +1180,7 @@ static int mpeg_decode_mb(MpegEncContext *s, #endif if (s->codec_id == CODEC_ID_MPEG2VIDEO) { - for(i=0;i<4+(1<<s->chroma_format);i++) { + for(i=0;i<mb_block_count;i++) { if (mpeg2_decode_block_intra(s, s->pblocks[i], i) < 0) return -1; } @@ -1661,11 +1364,9 @@ static int mpeg_decode_mb(MpegEncContext *s, av_log(s->avctx, AV_LOG_ERROR, "invalid cbp at %d %d\n", s->mb_x, s->mb_y); return -1; } - if(s->chroma_format == 2){//CHROMA422 - cbp|= ( get_bits(&s->gb,2) ) << 6; - }else - if(s->chroma_format > 2){//CHROMA444 - cbp|= ( get_bits(&s->gb,6) ) << 6; + if(mb_block_count > 6){ + cbp<<= mb_block_count-6; + cbp |= get_bits(&s->gb, mb_block_count-6); } #ifdef HAVE_XVMC @@ -1679,44 +1380,48 @@ static int mpeg_decode_mb(MpegEncContext *s, #endif if (s->codec_id == CODEC_ID_MPEG2VIDEO) { - for(i=0;i<6;i++) { - if (cbp & (1<<(5-i)) ) { - if (mpeg2_decode_block_non_intra(s, s->pblocks[i], i) < 0) - return -1; - } else { - s->block_last_index[i] = -1; - } - } - if (s->chroma_format >= 2) { - if (s->chroma_format == 2) {//CHROMA_422) - for(i=6;i<8;i++) { - if (cbp & (1<<(6+7-i)) ) { - if (mpeg2_decode_block_non_intra(s, s->pblocks[i], i) < 0) - return -1; - } else { - s->block_last_index[i] = -1; - } + if(s->flags2 & CODEC_FLAG2_FAST){ + for(i=0;i<6;i++) { + if(cbp & 32) { + mpeg2_fast_decode_block_non_intra(s, s->pblocks[i], i); + } else { + s->block_last_index[i] = -1; } - }else{ /*CHROMA_444*/ - for(i=6;i<12;i++) { - if (cbp & (1<<(6+11-i)) ) { - if (mpeg2_decode_block_non_intra(s, s->pblocks[i], i) < 0) - return -1; - } else { - s->block_last_index[i] = -1; - } + cbp+=cbp; + } + }else{ + cbp<<= 12-mb_block_count; + + for(i=0;i<mb_block_count;i++) { + if ( cbp & (1<<11) ) { + if (mpeg2_decode_block_non_intra(s, s->pblocks[i], i) < 0) + return -1; + } else { + s->block_last_index[i] = -1; } + cbp+=cbp; } } } else { - for(i=0;i<6;i++) { - if (cbp & 32) { - if (mpeg1_decode_block_inter(s, s->pblocks[i], i) < 0) - return -1; - } else { - s->block_last_index[i] = -1; + if(s->flags2 & CODEC_FLAG2_FAST){ + for(i=0;i<6;i++) { + if (cbp & 32) { + mpeg1_fast_decode_block_inter(s, s->pblocks[i], i); + } else { + s->block_last_index[i] = -1; + } + cbp+=cbp; + } + }else{ + for(i=0;i<6;i++) { + if (cbp & 32) { + if (mpeg1_decode_block_inter(s, s->pblocks[i], i) < 0) + return -1; + } else { + s->block_last_index[i] = -1; + } + cbp+=cbp; } - cbp+=cbp; } } }else{ @@ -1756,11 +1461,471 @@ static int mpeg_decode_motion(MpegEncContext *s, int fcode, int pred) val += pred; /* modulo decoding */ - l = 1 << (shift+4); - val = ((val + l)&(l*2-1)) - l; + l= INT_BIT - 5 - shift; + val = (val<<l)>>l; return val; } +static inline int decode_dc(GetBitContext *gb, int component) +{ + int code, diff; + + if (component == 0) { + code = get_vlc2(gb, dc_lum_vlc.table, DC_VLC_BITS, 2); + } else { + code = get_vlc2(gb, dc_chroma_vlc.table, DC_VLC_BITS, 2); + } + if (code < 0){ + av_log(NULL, AV_LOG_ERROR, "invalid dc code at\n"); + return 0xffff; + } + if (code == 0) { + diff = 0; + } else { + diff = get_xbits(gb, code); + } + return diff; +} + +static inline int mpeg1_decode_block_intra(MpegEncContext *s, + DCTELEM *block, + int n) +{ + int level, dc, diff, i, j, run; + int component; + RLTable *rl = &rl_mpeg1; + uint8_t * const scantable= s->intra_scantable.permutated; + const uint16_t *quant_matrix= s->intra_matrix; + const int qscale= s->qscale; + + /* DC coef */ + component = (n <= 3 ? 0 : n - 4 + 1); + diff = decode_dc(&s->gb, component); + if (diff >= 0xffff) + return -1; + dc = s->last_dc[component]; + dc += diff; + s->last_dc[component] = dc; + block[0] = dc<<3; + dprintf("dc=%d diff=%d\n", dc, diff); + i = 0; + { + OPEN_READER(re, &s->gb); + /* now quantify & encode AC coefs */ + for(;;) { + UPDATE_CACHE(re, &s->gb); + GET_RL_VLC(level, run, re, &s->gb, rl->rl_vlc[0], TEX_VLC_BITS, 2, 0); + + if(level == 127){ + break; + } else if(level != 0) { + i += run; + j = scantable[i]; + level= (level*qscale*quant_matrix[j])>>4; + level= (level-1)|1; + level = (level ^ SHOW_SBITS(re, &s->gb, 1)) - SHOW_SBITS(re, &s->gb, 1); + LAST_SKIP_BITS(re, &s->gb, 1); + } else { + /* escape */ + run = SHOW_UBITS(re, &s->gb, 6)+1; LAST_SKIP_BITS(re, &s->gb, 6); + UPDATE_CACHE(re, &s->gb); + level = SHOW_SBITS(re, &s->gb, 8); SKIP_BITS(re, &s->gb, 8); + if (level == -128) { + level = SHOW_UBITS(re, &s->gb, 8) - 256; LAST_SKIP_BITS(re, &s->gb, 8); + } else if (level == 0) { + level = SHOW_UBITS(re, &s->gb, 8) ; LAST_SKIP_BITS(re, &s->gb, 8); + } + i += run; + j = scantable[i]; + if(level<0){ + level= -level; + level= (level*qscale*quant_matrix[j])>>4; + level= (level-1)|1; + level= -level; + }else{ + level= (level*qscale*quant_matrix[j])>>4; + level= (level-1)|1; + } + } + if (i > 63){ + av_log(s->avctx, AV_LOG_ERROR, "ac-tex damaged at %d %d\n", s->mb_x, s->mb_y); + return -1; + } + + block[j] = level; + } + CLOSE_READER(re, &s->gb); + } + s->block_last_index[n] = i; + return 0; +} + +static inline int mpeg1_decode_block_inter(MpegEncContext *s, + DCTELEM *block, + int n) +{ + int level, i, j, run; + RLTable *rl = &rl_mpeg1; + uint8_t * const scantable= s->intra_scantable.permutated; + const uint16_t *quant_matrix= s->inter_matrix; + const int qscale= s->qscale; + + { + int v; + OPEN_READER(re, &s->gb); + i = -1; + /* special case for the first coef. no need to add a second vlc table */ + UPDATE_CACHE(re, &s->gb); + v= SHOW_UBITS(re, &s->gb, 2); + if (v & 2) { + LAST_SKIP_BITS(re, &s->gb, 2); + level= (3*qscale*quant_matrix[0])>>5; + level= (level-1)|1; + if(v&1) + level= -level; + block[0] = level; + i++; + } + + /* now quantify & encode AC coefs */ + for(;;) { + UPDATE_CACHE(re, &s->gb); + GET_RL_VLC(level, run, re, &s->gb, rl->rl_vlc[0], TEX_VLC_BITS, 2, 0); + + if(level == 127){ + break; + } else if(level != 0) { + i += run; + j = scantable[i]; + level= ((level*2+1)*qscale*quant_matrix[j])>>5; + level= (level-1)|1; + level = (level ^ SHOW_SBITS(re, &s->gb, 1)) - SHOW_SBITS(re, &s->gb, 1); + LAST_SKIP_BITS(re, &s->gb, 1); + } else { + /* escape */ + run = SHOW_UBITS(re, &s->gb, 6)+1; LAST_SKIP_BITS(re, &s->gb, 6); + UPDATE_CACHE(re, &s->gb); + level = SHOW_SBITS(re, &s->gb, 8); SKIP_BITS(re, &s->gb, 8); + if (level == -128) { + level = SHOW_UBITS(re, &s->gb, 8) - 256; LAST_SKIP_BITS(re, &s->gb, 8); + } else if (level == 0) { + level = SHOW_UBITS(re, &s->gb, 8) ; LAST_SKIP_BITS(re, &s->gb, 8); + } + i += run; + j = scantable[i]; + if(level<0){ + level= -level; + level= ((level*2+1)*qscale*quant_matrix[j])>>5; + level= (level-1)|1; + level= -level; + }else{ + level= ((level*2+1)*qscale*quant_matrix[j])>>5; + level= (level-1)|1; + } + } + if (i > 63){ + av_log(s->avctx, AV_LOG_ERROR, "ac-tex damaged at %d %d\n", s->mb_x, s->mb_y); + return -1; + } + + block[j] = level; + } + CLOSE_READER(re, &s->gb); + } + s->block_last_index[n] = i; + return 0; +} + +static inline int mpeg1_fast_decode_block_inter(MpegEncContext *s, DCTELEM *block, int n) +{ + int level, i, j, run; + RLTable *rl = &rl_mpeg1; + uint8_t * const scantable= s->intra_scantable.permutated; + const int qscale= s->qscale; + + { + int v; + OPEN_READER(re, &s->gb); + i = -1; + /* special case for the first coef. no need to add a second vlc table */ + UPDATE_CACHE(re, &s->gb); + v= SHOW_UBITS(re, &s->gb, 2); + if (v & 2) { + LAST_SKIP_BITS(re, &s->gb, 2); + level= (3*qscale)>>1; + level= (level-1)|1; + if(v&1) + level= -level; + block[0] = level; + i++; + } + + /* now quantify & encode AC coefs */ + for(;;) { + UPDATE_CACHE(re, &s->gb); + GET_RL_VLC(level, run, re, &s->gb, rl->rl_vlc[0], TEX_VLC_BITS, 2, 0); + + if(level == 127){ + break; + } else if(level != 0) { + i += run; + j = scantable[i]; + level= ((level*2+1)*qscale)>>1; + level= (level-1)|1; + level = (level ^ SHOW_SBITS(re, &s->gb, 1)) - SHOW_SBITS(re, &s->gb, 1); + LAST_SKIP_BITS(re, &s->gb, 1); + } else { + /* escape */ + run = SHOW_UBITS(re, &s->gb, 6)+1; LAST_SKIP_BITS(re, &s->gb, 6); + UPDATE_CACHE(re, &s->gb); + level = SHOW_SBITS(re, &s->gb, 8); SKIP_BITS(re, &s->gb, 8); + if (level == -128) { + level = SHOW_UBITS(re, &s->gb, 8) - 256; LAST_SKIP_BITS(re, &s->gb, 8); + } else if (level == 0) { + level = SHOW_UBITS(re, &s->gb, 8) ; LAST_SKIP_BITS(re, &s->gb, 8); + } + i += run; + j = scantable[i]; + if(level<0){ + level= -level; + level= ((level*2+1)*qscale)>>1; + level= (level-1)|1; + level= -level; + }else{ + level= ((level*2+1)*qscale)>>1; + level= (level-1)|1; + } + } + + block[j] = level; + } + CLOSE_READER(re, &s->gb); + } + s->block_last_index[n] = i; + return 0; +} + + +static inline int mpeg2_decode_block_non_intra(MpegEncContext *s, + DCTELEM *block, + int n) +{ + int level, i, j, run; + RLTable *rl = &rl_mpeg1; + uint8_t * const scantable= s->intra_scantable.permutated; + const uint16_t *quant_matrix; + const int qscale= s->qscale; + int mismatch; + + mismatch = 1; + + { + int v; + OPEN_READER(re, &s->gb); + i = -1; + if (n < 4) + quant_matrix = s->inter_matrix; + else + quant_matrix = s->chroma_inter_matrix; + + /* special case for the first coef. no need to add a second vlc table */ + UPDATE_CACHE(re, &s->gb); + v= SHOW_UBITS(re, &s->gb, 2); + if (v & 2) { + LAST_SKIP_BITS(re, &s->gb, 2); + level= (3*qscale*quant_matrix[0])>>5; + if(v&1) + level= -level; + block[0] = level; + mismatch ^= level; + i++; + } + + /* now quantify & encode AC coefs */ + for(;;) { + UPDATE_CACHE(re, &s->gb); + GET_RL_VLC(level, run, re, &s->gb, rl->rl_vlc[0], TEX_VLC_BITS, 2, 0); + + if(level == 127){ + break; + } else if(level != 0) { + i += run; + j = scantable[i]; + level= ((level*2+1)*qscale*quant_matrix[j])>>5; + level = (level ^ SHOW_SBITS(re, &s->gb, 1)) - SHOW_SBITS(re, &s->gb, 1); + LAST_SKIP_BITS(re, &s->gb, 1); + } else { + /* escape */ + run = SHOW_UBITS(re, &s->gb, 6)+1; LAST_SKIP_BITS(re, &s->gb, 6); + UPDATE_CACHE(re, &s->gb); + level = SHOW_SBITS(re, &s->gb, 12); SKIP_BITS(re, &s->gb, 12); + + i += run; + j = scantable[i]; + if(level<0){ + level= ((-level*2+1)*qscale*quant_matrix[j])>>5; + level= -level; + }else{ + level= ((level*2+1)*qscale*quant_matrix[j])>>5; + } + } + if (i > 63){ + av_log(s->avctx, AV_LOG_ERROR, "ac-tex damaged at %d %d\n", s->mb_x, s->mb_y); + return -1; + } + + mismatch ^= level; + block[j] = level; + } + CLOSE_READER(re, &s->gb); + } + block[63] ^= (mismatch & 1); + + s->block_last_index[n] = i; + return 0; +} + +static inline int mpeg2_fast_decode_block_non_intra(MpegEncContext *s, + DCTELEM *block, + int n) +{ + int level, i, j, run; + RLTable *rl = &rl_mpeg1; + uint8_t * const scantable= s->intra_scantable.permutated; + const int qscale= s->qscale; + int v; + OPEN_READER(re, &s->gb); + i = -1; + + /* special case for the first coef. no need to add a second vlc table */ + UPDATE_CACHE(re, &s->gb); + v= SHOW_UBITS(re, &s->gb, 2); + if (v & 2) { + LAST_SKIP_BITS(re, &s->gb, 2); + level= (3*qscale)>>1; + if(v&1) + level= -level; + block[0] = level; + i++; + } + + /* now quantify & encode AC coefs */ + for(;;) { + UPDATE_CACHE(re, &s->gb); + GET_RL_VLC(level, run, re, &s->gb, rl->rl_vlc[0], TEX_VLC_BITS, 2, 0); + + if(level == 127){ + break; + } else if(level != 0) { + i += run; + j = scantable[i]; + level= ((level*2+1)*qscale)>>1; + level = (level ^ SHOW_SBITS(re, &s->gb, 1)) - SHOW_SBITS(re, &s->gb, 1); + LAST_SKIP_BITS(re, &s->gb, 1); + } else { + /* escape */ + run = SHOW_UBITS(re, &s->gb, 6)+1; LAST_SKIP_BITS(re, &s->gb, 6); + UPDATE_CACHE(re, &s->gb); + level = SHOW_SBITS(re, &s->gb, 12); SKIP_BITS(re, &s->gb, 12); + + i += run; + j = scantable[i]; + if(level<0){ + level= ((-level*2+1)*qscale)>>1; + level= -level; + }else{ + level= ((level*2+1)*qscale)>>1; + } + } + + block[j] = level; + } + CLOSE_READER(re, &s->gb); + s->block_last_index[n] = i; + return 0; +} + + +static inline int mpeg2_decode_block_intra(MpegEncContext *s, + DCTELEM *block, + int n) +{ + int level, dc, diff, i, j, run; + int component; + RLTable *rl; + uint8_t * const scantable= s->intra_scantable.permutated; + const uint16_t *quant_matrix; + const int qscale= s->qscale; + int mismatch; + + /* DC coef */ + if (n < 4){ + quant_matrix = s->intra_matrix; + component = 0; + }else{ + quant_matrix = s->chroma_intra_matrix; + component = (n&1) + 1; + } + diff = decode_dc(&s->gb, component); + if (diff >= 0xffff) + return -1; + dc = s->last_dc[component]; + dc += diff; + s->last_dc[component] = dc; + block[0] = dc << (3 - s->intra_dc_precision); + dprintf("dc=%d\n", block[0]); + mismatch = block[0] ^ 1; + i = 0; + if (s->intra_vlc_format) + rl = &rl_mpeg2; + else + rl = &rl_mpeg1; + + { + OPEN_READER(re, &s->gb); + /* now quantify & encode AC coefs */ + for(;;) { + UPDATE_CACHE(re, &s->gb); + GET_RL_VLC(level, run, re, &s->gb, rl->rl_vlc[0], TEX_VLC_BITS, 2, 0); + + if(level == 127){ + break; + } else if(level != 0) { + i += run; + j = scantable[i]; + level= (level*qscale*quant_matrix[j])>>4; + level = (level ^ SHOW_SBITS(re, &s->gb, 1)) - SHOW_SBITS(re, &s->gb, 1); + LAST_SKIP_BITS(re, &s->gb, 1); + } else { + /* escape */ + run = SHOW_UBITS(re, &s->gb, 6)+1; LAST_SKIP_BITS(re, &s->gb, 6); + UPDATE_CACHE(re, &s->gb); + level = SHOW_SBITS(re, &s->gb, 12); SKIP_BITS(re, &s->gb, 12); + i += run; + j = scantable[i]; + if(level<0){ + level= (-level*qscale*quant_matrix[j])>>4; + level= -level; + }else{ + level= (level*qscale*quant_matrix[j])>>4; + } + } + if (i > 63){ + av_log(s->avctx, AV_LOG_ERROR, "ac-tex damaged at %d %d\n", s->mb_x, s->mb_y); + return -1; + } + + mismatch^= level; + block[j] = level; + } + CLOSE_READER(re, &s->gb); + } + block[63]^= mismatch&1; + + s->block_last_index[n] = i; + return 0; +} + typedef struct Mpeg1Context { MpegEncContext mpeg_enc_ctx; int mpeg_enc_ctx_allocated; /* true if decoding context allocated */ @@ -1769,6 +1934,7 @@ typedef struct Mpeg1Context { int slice_count; int swap_uv;//indicate VCR2 int save_aspect_info; + AVRational frame_rate_ext; ///< MPEG-2 specific framerate modificator } Mpeg1Context; @@ -1801,8 +1967,8 @@ static int mpeg_decode_init(AVCodecContext *avctx) static void quant_matrix_rebuild(uint16_t *matrix, const uint8_t *old_perm, const uint8_t *new_perm){ -uint16_t temp_matrix[64]; -int i; + uint16_t temp_matrix[64]; + int i; memcpy(temp_matrix,matrix,64*sizeof(uint16_t)); @@ -1814,16 +1980,15 @@ int i; //Call this function when we know all parameters //it may be called in different places for mpeg1 and mpeg2 static int mpeg_decode_postinit(AVCodecContext *avctx){ -Mpeg1Context *s1 = avctx->priv_data; -MpegEncContext *s = &s1->mpeg_enc_ctx; -uint8_t old_permutation[64]; - + Mpeg1Context *s1 = avctx->priv_data; + MpegEncContext *s = &s1->mpeg_enc_ctx; + uint8_t old_permutation[64]; if ( (s1->mpeg_enc_ctx_allocated == 0)|| - avctx->width != s->width || - avctx->height != s->height|| -// s1->save_aspect_info != avctx->aspect_ratio_info|| + avctx->coded_width != s->width || + avctx->coded_height != s->height|| + s1->save_aspect_info != s->aspect_ratio_info|| 0) { @@ -1834,8 +1999,7 @@ uint8_t old_permutation[64]; if( (s->width == 0 )||(s->height == 0)) return -2; - avctx->width = s->width; - avctx->height = s->height; + avcodec_set_dimensions(avctx, s->width, s->height); avctx->bit_rate = s->bit_rate; s1->save_aspect_info = s->aspect_ratio_info; @@ -1856,8 +2020,8 @@ uint8_t old_permutation[64]; av_reduce( &s->avctx->frame_rate, &s->avctx->frame_rate_base, - frame_rate_tab[s->frame_rate_index].num * (s->frame_rate_ext_n+1), - frame_rate_tab[s->frame_rate_index].den * (s->frame_rate_ext_d+1), + frame_rate_tab[s->frame_rate_index].num * s1->frame_rate_ext.num, + frame_rate_tab[s->frame_rate_index].den * s1->frame_rate_ext.den, 1<<30); //mpeg2 aspect if(s->aspect_ratio_info > 1){ @@ -1956,6 +2120,8 @@ static int mpeg1_decode_picture(AVCodecContext *avctx, ref = get_bits(&s->gb, 10); /* temporal ref */ s->pict_type = get_bits(&s->gb, 3); + if(s->pict_type == 0 || s->pict_type > 3) + return -1; vbv_delay= get_bits(&s->gb, 16); if (s->pict_type == P_TYPE || s->pict_type == B_TYPE) { @@ -1977,8 +2143,8 @@ static int mpeg1_decode_picture(AVCodecContext *avctx, s->current_picture.pict_type= s->pict_type; s->current_picture.key_frame= s->pict_type == I_TYPE; -// if(avctx->debug & FF_DEBUG_PICT_INFO) -// av_log(avctx, AV_LOG_DEBUG, "vbv_delay %d, ref %d\n", vbv_delay, ref); + if(avctx->debug & FF_DEBUG_PICT_INFO) + av_log(avctx, AV_LOG_DEBUG, "vbv_delay %d, ref %d type:%d\n", vbv_delay, ref, s->pict_type); s->y_dc_scale = 8; s->c_dc_scale = 8; @@ -1986,15 +2152,15 @@ static int mpeg1_decode_picture(AVCodecContext *avctx, return 0; } -static void mpeg_decode_sequence_extension(MpegEncContext *s) +static void mpeg_decode_sequence_extension(Mpeg1Context *s1) { + MpegEncContext *s= &s1->mpeg_enc_ctx; int horiz_size_ext, vert_size_ext; int bit_rate_ext; - int level, profile; skip_bits(&s->gb, 1); /* profil and level esc*/ - profile= get_bits(&s->gb, 3); - level= get_bits(&s->gb, 4); + s->avctx->profile= get_bits(&s->gb, 3); + s->avctx->level= get_bits(&s->gb, 4); s->progressive_sequence = get_bits1(&s->gb); /* progressive_sequence */ s->chroma_format = get_bits(&s->gb, 2); /* chroma_format 1=420, 2=422, 3=444 */ horiz_size_ext = get_bits(&s->gb, 2); @@ -2002,15 +2168,15 @@ static void mpeg_decode_sequence_extension(MpegEncContext *s) s->width |= (horiz_size_ext << 12); s->height |= (vert_size_ext << 12); bit_rate_ext = get_bits(&s->gb, 12); /* XXX: handle it */ - s->bit_rate += (bit_rate_ext << 12) * 400; + s->bit_rate += (bit_rate_ext << 18) * 400; skip_bits1(&s->gb); /* marker */ s->avctx->rc_buffer_size += get_bits(&s->gb, 8)*1024*16<<10; s->low_delay = get_bits1(&s->gb); if(s->flags & CODEC_FLAG_LOW_DELAY) s->low_delay=1; - s->frame_rate_ext_n = get_bits(&s->gb, 2); - s->frame_rate_ext_d = get_bits(&s->gb, 5); + s1->frame_rate_ext.num = get_bits(&s->gb, 2)+1; + s1->frame_rate_ext.den = get_bits(&s->gb, 5)+1; dprintf("sequence extension\n"); s->codec_id= s->avctx->codec_id= CODEC_ID_MPEG2VIDEO; @@ -2018,7 +2184,7 @@ static void mpeg_decode_sequence_extension(MpegEncContext *s) if(s->avctx->debug & FF_DEBUG_PICT_INFO) av_log(s->avctx, AV_LOG_DEBUG, "profile: %d, level: %d vbv buffer: %d, bitrate:%d\n", - profile, level, s->avctx->rc_buffer_size, s->bit_rate); + s->avctx->profile, s->avctx->level, s->avctx->rc_buffer_size, s->bit_rate); } @@ -2176,7 +2342,7 @@ static void mpeg_decode_extension(AVCodecContext *avctx, ext_type = get_bits(&s->gb, 4); switch(ext_type) { case 0x1: - mpeg_decode_sequence_extension(s); + mpeg_decode_sequence_extension(s1); break; case 0x2: mpeg_decode_sequence_display_extension(s1); @@ -2194,9 +2360,7 @@ static void mpeg_decode_extension(AVCodecContext *avctx, } static void exchange_uv(MpegEncContext *s){ -short * tmp; - - tmp = s->pblocks[4]; + short * tmp = s->pblocks[4]; s->pblocks[4] = s->pblocks[5]; s->pblocks[5] = tmp; } @@ -2266,6 +2430,7 @@ static int mpeg_decode_slice(Mpeg1Context *s1, int mb_y, AVCodecContext *avctx= s->avctx; int ret; const int field_pic= s->picture_structure != PICT_FRAME; + const int lowres= s->avctx->lowres; s->resync_mb_x= s->resync_mb_y= -1; @@ -2335,7 +2500,9 @@ static int mpeg_decode_slice(Mpeg1Context *s1, int mb_y, #endif s->dsp.clear_blocks(s->block[0]); - + if(!s->chroma_y_shift){ + s->dsp.clear_blocks(s->block[6]); + } ret = mpeg_decode_mb(s, s->block); s->chroma_qscale= s->qscale; @@ -2373,15 +2540,16 @@ static int mpeg_decode_slice(Mpeg1Context *s1, int mb_y, } } - s->dest[0] += 16; - s->dest[1] += 8; - s->dest[2] += 8; + s->dest[0] += 16 >> lowres; + s->dest[1] += 16 >> (s->chroma_x_shift + lowres); + s->dest[2] += 16 >> (s->chroma_x_shift + lowres); MPV_decode_mb(s, s->block); if (++s->mb_x >= s->mb_width) { + const int mb_size= 16>>s->avctx->lowres; - ff_draw_horiz_band(s, 16*s->mb_y, 16); + ff_draw_horiz_band(s, mb_size*s->mb_y, mb_size); s->mb_x = 0; s->mb_y++; @@ -2557,7 +2725,7 @@ static int mpeg1_decode_sequence(AVCodecContext *avctx, #ifdef DEBUG dprintf("intra matrix present\n"); for(i=0;i<64;i++) - dprintf(" %d", s->intra_matrix[s->dsp.idct_permutation[i]); + dprintf(" %d", s->intra_matrix[s->dsp.idct_permutation[i]]); printf("\n"); #endif } else { @@ -2582,7 +2750,7 @@ static int mpeg1_decode_sequence(AVCodecContext *avctx, #ifdef DEBUG dprintf("non intra matrix present\n"); for(i=0;i<64;i++) - dprintf(" %d", s->inter_matrix[s->dsp.idct_permutation[i]); + dprintf(" %d", s->inter_matrix[s->dsp.idct_permutation[i]]); printf("\n"); #endif } else { @@ -2629,8 +2797,8 @@ static int vcr2_init_sequence(AVCodecContext *avctx) if (s1->mpeg_enc_ctx_allocated) { MPV_common_end(s); } - s->width = avctx->width; - s->height = avctx->height; + s->width = avctx->coded_width; + s->height = avctx->coded_height; avctx->has_b_frames= 0; //true? s->low_delay= 1; @@ -2786,12 +2954,14 @@ static int mpeg_decode_frame(AVCodecContext *avctx, MpegEncContext *s2 = &s->mpeg_enc_ctx; dprintf("fill_buffer\n"); - /* special case for last picture */ - if (buf_size == 0 && s2->low_delay==0 && s2->next_picture_ptr) { - *picture= *(AVFrame*)s2->next_picture_ptr; - s2->next_picture_ptr= NULL; + if (buf_size == 0) { + /* special case for last picture */ + if (s2->low_delay==0 && s2->next_picture_ptr) { + *picture= *(AVFrame*)s2->next_picture_ptr; + s2->next_picture_ptr= NULL; - *data_size = sizeof(AVFrame); + *data_size = sizeof(AVFrame); + } return 0; } @@ -2886,6 +3056,11 @@ static int mpeg_decode_frame(AVCodecContext *avctx, if(avctx->hurry_up>=5) break; if (!s->mpeg_enc_ctx_allocated) break; + + if(s2->codec_id == CODEC_ID_MPEG2VIDEO){ + if(mb_y < avctx->skip_top || mb_y >= s2->mb_height - avctx->skip_bottom) + break; + } if(s2->first_slice){ s2->first_slice=0; @@ -2943,7 +3118,7 @@ AVCodec mpeg1video_decoder = { NULL, mpeg_decode_end, mpeg_decode_frame, - CODEC_CAP_DRAW_HORIZ_BAND | CODEC_CAP_DR1 | CODEC_CAP_TRUNCATED, + CODEC_CAP_DRAW_HORIZ_BAND | CODEC_CAP_DR1 | CODEC_CAP_TRUNCATED | CODEC_CAP_DELAY, .flush= ff_mpeg_flush, }; @@ -2956,7 +3131,7 @@ AVCodec mpeg2video_decoder = { NULL, mpeg_decode_end, mpeg_decode_frame, - CODEC_CAP_DRAW_HORIZ_BAND | CODEC_CAP_DR1 | CODEC_CAP_TRUNCATED, + CODEC_CAP_DRAW_HORIZ_BAND | CODEC_CAP_DR1 | CODEC_CAP_TRUNCATED | CODEC_CAP_DELAY, .flush= ff_mpeg_flush, }; @@ -2970,7 +3145,7 @@ AVCodec mpegvideo_decoder = { NULL, mpeg_decode_end, mpeg_decode_frame, - CODEC_CAP_DRAW_HORIZ_BAND | CODEC_CAP_DR1 | CODEC_CAP_TRUNCATED, + CODEC_CAP_DRAW_HORIZ_BAND | CODEC_CAP_DR1 | CODEC_CAP_TRUNCATED | CODEC_CAP_DELAY, .flush= ff_mpeg_flush, }; @@ -2985,10 +3160,10 @@ AVCodec mpeg1video_encoder = { MPV_encode_picture, MPV_encode_end, .supported_framerates= frame_rate_tab+1, + .pix_fmts= (enum PixelFormat[]){PIX_FMT_YUV420P, -1}, + .capabilities= CODEC_CAP_DELAY, }; -#ifdef CONFIG_RISKY - AVCodec mpeg2video_encoder = { "mpeg2video", CODEC_TYPE_VIDEO, @@ -2998,14 +3173,17 @@ AVCodec mpeg2video_encoder = { MPV_encode_picture, MPV_encode_end, .supported_framerates= frame_rate_tab+1, + .pix_fmts= (enum PixelFormat[]){PIX_FMT_YUV420P, -1}, + .capabilities= CODEC_CAP_DELAY, }; #endif -#endif #ifdef HAVE_XVMC static int mpeg_mc_decode_init(AVCodecContext *avctx){ Mpeg1Context *s; + if( avctx->thread_count > 1) + return -1; if( !(avctx->slice_flags & SLICE_FLAG_CODED_ORDER) ) return -1; if( !(avctx->slice_flags & SLICE_FLAG_ALLOW_FIELD) ){ @@ -3029,7 +3207,7 @@ AVCodec mpeg_xvmc_decoder = { NULL, mpeg_decode_end, mpeg_decode_frame, - CODEC_CAP_DRAW_HORIZ_BAND | CODEC_CAP_DR1 | CODEC_CAP_TRUNCATED| CODEC_CAP_HWACCEL, + CODEC_CAP_DRAW_HORIZ_BAND | CODEC_CAP_DR1 | CODEC_CAP_TRUNCATED| CODEC_CAP_HWACCEL | CODEC_CAP_DELAY, .flush= ff_mpeg_flush, }; diff --git a/src/libffmpeg/libavcodec/mpegaudio.h b/src/libffmpeg/libavcodec/mpegaudio.h index e50e8bd6f..072c41bda 100644 --- a/src/libffmpeg/libavcodec/mpegaudio.h +++ b/src/libffmpeg/libavcodec/mpegaudio.h @@ -18,6 +18,10 @@ #define MPA_DUAL 2 #define MPA_MONO 3 +/* header + layer + bitrate + freq + lsf/mpeg25 */ +#define SAME_HEADER_MASK \ + (0xffe00000 | (3 << 17) | (0xf << 12) | (3 << 10) | (3 << 19)) + int l2_select_table(int bitrate, int nb_channels, int freq, int lsf); int mpa_decode_header(AVCodecContext *avctx, uint32_t head); @@ -29,3 +33,20 @@ extern const int sblimit_table[5]; extern const int quant_steps[17]; extern const int quant_bits[17]; extern const int32_t mpa_enwindow[257]; + +/* fast header check for resync */ +static inline int ff_mpa_check_header(uint32_t header){ + /* header */ + if ((header & 0xffe00000) != 0xffe00000) + return -1; + /* layer check */ + if ((header & (3<<17)) == 0) + return -1; + /* bit rate */ + if ((header & (0xf<<12)) == 0xf<<12) + return -1; + /* frequency */ + if ((header & (3<<10)) == 3<<10) + return -1; + return 0; +} diff --git a/src/libffmpeg/libavcodec/mpegaudiodec.c b/src/libffmpeg/libavcodec/mpegaudiodec.c index f9cb389aa..196d77d2a 100644 --- a/src/libffmpeg/libavcodec/mpegaudiodec.c +++ b/src/libffmpeg/libavcodec/mpegaudiodec.c @@ -24,6 +24,7 @@ //#define DEBUG #include "avcodec.h" +#include "bitstream.h" #include "mpegaudio.h" #include "dsputil.h" @@ -47,6 +48,18 @@ #define WFRAC_BITS 14 /* fractional bits for window */ #endif +#if defined(USE_HIGHPRECISION) && defined(CONFIG_AUDIO_NONSHORT) +typedef int32_t OUT_INT; +#define OUT_MAX INT32_MAX +#define OUT_MIN INT32_MIN +#define OUT_SHIFT (WFRAC_BITS + FRAC_BITS - 31) +#else +typedef int16_t OUT_INT; +#define OUT_MAX INT16_MAX +#define OUT_MIN INT16_MIN +#define OUT_SHIFT (WFRAC_BITS + FRAC_BITS - 15) +#endif + #define FRAC_ONE (1 << FRAC_BITS) #define MULL(a,b) (((int64_t)(a) * (int64_t)(b)) >> FRAC_BITS) @@ -56,6 +69,12 @@ #define FIXR(a) ((int)((a) * FRAC_ONE + 0.5)) #define FRAC_RND(a) (((a) + (FRAC_ONE/2)) >> FRAC_BITS) +#define FIXHR(a) ((int)((a) * (1LL<<32) + 0.5)) +//#define MULH(a,b) (((int64_t)(a) * (int64_t)(b))>>32) //gcc 3.4 creates an incredibly bloated mess out of this +static always_inline int MULH(int a, int b){ + return ((int64_t)(a) * (int64_t)(b))>>32; +} + #if FRAC_BITS <= 15 typedef int16_t MPA_INT; #else @@ -97,8 +116,19 @@ typedef struct MPADecodeContext { int frame_count; #endif void (*compute_antialias)(struct MPADecodeContext *s, struct GranuleDef *g); + int adu_mode; ///< 0 for standard mp3, 1 for adu formatted mp3 + unsigned int dither_state; } MPADecodeContext; +/** + * Context for MP3On4 decoder + */ +typedef struct MP3On4DecodeContext { + int frames; ///< number of mp3 frames per block (number of mp3 decoder instances) + int chan_cfg; ///< channel config number + MPADecodeContext *mp3decctx[5]; ///< MPADecodeContext for every decoder instance +} MP3On4DecodeContext; + /* layer 3 "granule" */ typedef struct GranuleDef { uint8_t scfsi; @@ -141,13 +171,9 @@ static VLC huff_quad_vlc[2]; /* computed from band_size_long */ static uint16_t band_index_long[9][23]; /* XXX: free when all decoders are closed */ -#define TABLE_4_3_SIZE (8191 + 16) +#define TABLE_4_3_SIZE (8191 + 16)*4 static int8_t *table_4_3_exp; -#if FRAC_BITS <= 15 -static uint16_t *table_4_3_value; -#else static uint32_t *table_4_3_value; -#endif /* intensity stereo coef table */ static int32_t is_table[2][16]; static int32_t is_table_lsf[2][2][16]; @@ -170,14 +196,7 @@ static int32_t scale_factor_mult2[3][3] = { SCALE_GEN(4.0 / 9.0), /* 9 steps */ }; -/* 2^(n/4) */ -static uint32_t scale_factor_mult3[4] = { - FIXR(1.0), - FIXR(1.18920711500272106671), - FIXR(1.41421356237309504880), - FIXR(1.68179283050742908605), -}; - +void ff_mpa_synth_init(MPA_INT *window); static MPA_INT window[512] __attribute__((aligned(16))); /* layer 1 unscaling */ @@ -214,30 +233,18 @@ static inline int l2_unscale_group(int steps, int mant, int scale_factor) /* compute value^(4/3) * 2^(exponent/4). It normalized to FRAC_BITS */ static inline int l3_unscale(int value, int exponent) { -#if FRAC_BITS <= 15 unsigned int m; -#else - uint64_t m; -#endif int e; - e = table_4_3_exp[value]; - e += (exponent >> 2); - e = FRAC_BITS - e; -#if FRAC_BITS <= 15 + e = table_4_3_exp [4*value + (exponent&3)]; + m = table_4_3_value[4*value + (exponent&3)]; + e -= (exponent >> 2); + assert(e>=1); if (e > 31) - e = 31; -#endif - m = table_4_3_value[value]; -#if FRAC_BITS <= 15 - m = (m * scale_factor_mult3[exponent & 3]); + return 0; m = (m + (1 << (e-1))) >> e; + return m; -#else - m = MUL64(m, scale_factor_mult3[exponent & 3]); - m = (m + (uint64_t_C(1) << (e-1))) >> e; - return m; -#endif } /* all integer n^(4/3) computation code */ @@ -250,11 +257,13 @@ static inline int l3_unscale(int value, int exponent) static int dev_4_3_coefs[DEV_ORDER]; +#if 0 /* unused */ static int pow_mult3[3] = { POW_FIX(1.0), POW_FIX(1.25992104989487316476), POW_FIX(1.58740105196819947474), }; +#endif static void int_pow_init(void) { @@ -267,6 +276,7 @@ static void int_pow_init(void) } } +#if 0 /* unused, remove? */ /* return the mantissa and the binary exponent */ static int int_pow(int i, int *exp_ptr) { @@ -311,6 +321,7 @@ static int int_pow(int i, int *exp_ptr) *exp_ptr = eq; return a; } +#endif static int decode_init(AVCodecContext * avctx) { @@ -318,7 +329,13 @@ static int decode_init(AVCodecContext * avctx) static int init=0; int i, j, k; - if(avctx->antialias_algo == FF_AA_INT) +#if defined(USE_HIGHPRECISION) && defined(CONFIG_AUDIO_NONSHORT) + avctx->sample_fmt= SAMPLE_FMT_S32; +#else + avctx->sample_fmt= SAMPLE_FMT_S16; +#endif + + if(avctx->antialias_algo != FF_AA_FLOAT) s->compute_antialias= compute_antialias_integer; else s->compute_antialias= compute_antialias_float; @@ -348,20 +365,7 @@ static int decode_init(AVCodecContext * avctx) scale_factor_mult[i][2]); } - /* window */ - /* max = 18760, max sum over all 16 coefs : 44736 */ - for(i=0;i<257;i++) { - int v; - v = mpa_enwindow[i]; -#if WFRAC_BITS < 16 - v = (v + (1 << (16 - WFRAC_BITS - 1))) >> (16 - WFRAC_BITS); -#endif - window[i] = v; - if ((i & 63) != 0) - v = -v; - if (i != 0) - window[512 - i] = v; - } + ff_mpa_synth_init(window); /* huffman decode tables */ huff_code_table[0] = NULL; @@ -375,7 +379,7 @@ static int decode_init(AVCodecContext * avctx) n = xsize * xsize; /* XXX: fail test */ init_vlc(&huff_vlc[i], 8, n, - h->bits, 1, 1, h->codes, 2, 2); + h->bits, 1, 1, h->codes, 2, 2, 1); code_table = av_mallocz(n); j = 0; @@ -387,7 +391,7 @@ static int decode_init(AVCodecContext * avctx) } for(i=0;i<2;i++) { init_vlc(&huff_quad_vlc[i], i == 0 ? 7 : 4, 16, - mpa_quad_bits[i], 1, 1, mpa_quad_codes[i], 1, 1); + mpa_quad_bits[i], 1, 1, mpa_quad_codes[i], 1, 1, 1); } for(i=0;i<9;i++) { @@ -409,32 +413,17 @@ static int decode_init(AVCodecContext * avctx) int_pow_init(); for(i=1;i<TABLE_4_3_SIZE;i++) { + double f, fm; int e, m; - m = int_pow(i, &e); -#if 0 - /* test code */ - { - double f, fm; - int e1, m1; - f = pow((double)i, 4.0 / 3.0); - fm = frexp(f, &e1); - m1 = FIXR(2 * fm); -#if FRAC_BITS <= 15 - if ((unsigned short)m1 != m1) { - m1 = m1 >> 1; - e1++; - } -#endif - e1--; - if (m != m1 || e != e1) { - printf("%4d: m=%x m1=%x e=%d e1=%d\n", - i, m, m1, e, e1); - } - } -#endif + f = pow((double)(i/4), 4.0 / 3.0) * pow(2, (i&3)*0.25); + fm = frexp(f, &e); + m = (uint32_t)(fm*(1LL<<31) + 0.5); + e+= FRAC_BITS - 31 + 5; + /* normalized to FRAC_BITS */ table_4_3_value[i] = m; - table_4_3_exp[i] = e; +// av_log(NULL, AV_LOG_DEBUG, "%d %d %f\n", i, m, pow((double)i, 4.0 / 3.0)); + table_4_3_exp[i] = -e; } for(i=0;i<7;i++) { @@ -473,38 +462,47 @@ static int decode_init(AVCodecContext * avctx) ci = ci_table[i]; cs = 1.0 / sqrt(1.0 + ci * ci); ca = cs * ci; - csa_table[i][0] = FIX(cs); - csa_table[i][1] = FIX(ca); - csa_table[i][2] = FIX(ca) + FIX(cs); - csa_table[i][3] = FIX(ca) - FIX(cs); + csa_table[i][0] = FIXHR(cs/4); + csa_table[i][1] = FIXHR(ca/4); + csa_table[i][2] = FIXHR(ca/4) + FIXHR(cs/4); + csa_table[i][3] = FIXHR(ca/4) - FIXHR(cs/4); csa_table_float[i][0] = cs; csa_table_float[i][1] = ca; csa_table_float[i][2] = ca + cs; csa_table_float[i][3] = ca - cs; // printf("%d %d %d %d\n", FIX(cs), FIX(cs-1), FIX(ca), FIX(cs)-FIX(ca)); +// av_log(NULL, AV_LOG_DEBUG,"%f %f %f %f\n", cs, ca, ca+cs, ca-cs); } /* compute mdct windows */ for(i=0;i<36;i++) { - int v; - v = FIXR(sin(M_PI * (i + 0.5) / 36.0)); - mdct_win[0][i] = v; - mdct_win[1][i] = v; - mdct_win[3][i] = v; - } - for(i=0;i<6;i++) { - mdct_win[1][18 + i] = FIXR(1.0); - mdct_win[1][24 + i] = FIXR(sin(M_PI * ((i + 6) + 0.5) / 12.0)); - mdct_win[1][30 + i] = FIXR(0.0); - - mdct_win[3][i] = FIXR(0.0); - mdct_win[3][6 + i] = FIXR(sin(M_PI * (i + 0.5) / 12.0)); - mdct_win[3][12 + i] = FIXR(1.0); + for(j=0; j<4; j++){ + double d; + + if(j==2 && i%3 != 1) + continue; + + d= sin(M_PI * (i + 0.5) / 36.0); + if(j==1){ + if (i>=30) d= 0; + else if(i>=24) d= sin(M_PI * (i - 18 + 0.5) / 12.0); + else if(i>=18) d= 1; + }else if(j==3){ + if (i< 6) d= 0; + else if(i< 12) d= sin(M_PI * (i - 6 + 0.5) / 12.0); + else if(i< 18) d= 1; + } + //merge last stage of imdct into the window coefficients + d*= 0.5 / cos(M_PI*(2*i + 19)/72); + + if(j==2) + mdct_win[j][i/3] = FIXHR((d / (1<<5))); + else + mdct_win[j][i ] = FIXHR((d / (1<<5))); +// av_log(NULL, AV_LOG_DEBUG, "%2d %d %f\n", i,j,d / (1<<5)); + } } - for(i=0;i<12;i++) - mdct_win[2][i] = FIXR(sin(M_PI * (i + 0.5) / 12.0)); - /* NOTE: we do frequency inversion adter the MDCT by changing the sign of the right window coefs */ for(j=0;j<4;j++) { @@ -531,6 +529,8 @@ static int decode_init(AVCodecContext * avctx) #ifdef DEBUG s->frame_count = 0; #endif + if (avctx->codec_id == CODEC_ID_MP3ADU) + s->adu_mode = 1; return 0; } @@ -753,18 +753,17 @@ static void dct32(int32_t *out, int32_t *tab) out[31] = tab[31]; } -#define OUT_SHIFT (WFRAC_BITS + FRAC_BITS - 15) - #if FRAC_BITS <= 15 -static inline int round_sample(int sum) +static inline int round_sample(int *sum) { int sum1; - sum1 = (sum + (1 << (OUT_SHIFT - 1))) >> OUT_SHIFT; - if (sum1 < -32768) - sum1 = -32768; - else if (sum1 > 32767) - sum1 = 32767; + sum1 = (*sum) >> OUT_SHIFT; + *sum &= (1<<OUT_SHIFT)-1; + if (sum1 < OUT_MIN) + sum1 = OUT_MIN; + else if (sum1 > OUT_MAX) + sum1 = OUT_MAX; return sum1; } @@ -790,14 +789,15 @@ static inline int round_sample(int sum) #else -static inline int round_sample(int64_t sum) +static inline int round_sample(int64_t *sum) { int sum1; - sum1 = (int)((sum + (int64_t_C(1) << (OUT_SHIFT - 1))) >> OUT_SHIFT); - if (sum1 < -32768) - sum1 = -32768; - else if (sum1 > 32767) - sum1 = 32767; + sum1 = (int)((*sum) >> OUT_SHIFT); + *sum &= (1<<OUT_SHIFT)-1; + if (sum1 < OUT_MIN) + sum1 = OUT_MIN; + else if (sum1 > OUT_MAX) + sum1 = OUT_MAX; return sum1; } @@ -846,29 +846,48 @@ static inline int round_sample(int64_t sum) sum2 op2 MULS((w2)[7 * 64], tmp);\ } +void ff_mpa_synth_init(MPA_INT *window) +{ + int i; + + /* max = 18760, max sum over all 16 coefs : 44736 */ + for(i=0;i<257;i++) { + int v; + v = mpa_enwindow[i]; +#if WFRAC_BITS < 16 + v = (v + (1 << (16 - WFRAC_BITS - 1))) >> (16 - WFRAC_BITS); +#endif + window[i] = v; + if ((i & 63) != 0) + v = -v; + if (i != 0) + window[512 - i] = v; + } +} /* 32 sub band synthesis filter. Input: 32 sub band samples, Output: 32 samples. */ /* XXX: optimize by avoiding ring buffer usage */ -static void synth_filter(MPADecodeContext *s1, - int ch, int16_t *samples, int incr, +void ff_mpa_synth_filter(MPA_INT *synth_buf_ptr, int *synth_buf_offset, + MPA_INT *window, int *dither_state, + OUT_INT *samples, int incr, int32_t sb_samples[SBLIMIT]) { int32_t tmp[32]; register MPA_INT *synth_buf; - const register MPA_INT *w, *w2, *p; + register const MPA_INT *w, *w2, *p; int j, offset, v; - int16_t *samples2; + OUT_INT *samples2; #if FRAC_BITS <= 15 int sum, sum2; #else int64_t sum, sum2; #endif - + dct32(tmp, sb_samples); - offset = s1->synth_buf_offset[ch]; - synth_buf = s1->synth_buf[ch] + offset; + offset = *synth_buf_offset; + synth_buf = synth_buf_ptr + offset; for(j=0;j<32;j++) { v = tmp[j]; @@ -889,148 +908,116 @@ static void synth_filter(MPADecodeContext *s1, w = window; w2 = window + 31; - sum = 0; + sum = *dither_state; p = synth_buf + 16; SUM8(sum, +=, w, p); p = synth_buf + 48; SUM8(sum, -=, w + 32, p); - *samples = round_sample(sum); + *samples = round_sample(&sum); samples += incr; w++; /* we calculate two samples at the same time to avoid one memory access per two sample */ for(j=1;j<16;j++) { - sum = 0; sum2 = 0; p = synth_buf + 16 + j; SUM8P2(sum, +=, sum2, -=, w, w2, p); p = synth_buf + 48 - j; SUM8P2(sum, -=, sum2, -=, w + 32, w2 + 32, p); - *samples = round_sample(sum); + *samples = round_sample(&sum); samples += incr; - *samples2 = round_sample(sum2); + sum += sum2; + *samples2 = round_sample(&sum); samples2 -= incr; w++; w2--; } p = synth_buf + 32; - sum = 0; SUM8(sum, -=, w + 32, p); - *samples = round_sample(sum); + *samples = round_sample(&sum); + *dither_state= sum; offset = (offset - 32) & 511; - s1->synth_buf_offset[ch] = offset; + *synth_buf_offset = offset; } -/* cos(pi*i/24) */ -#define C1 FIXR(0.99144486137381041114) -#define C3 FIXR(0.92387953251128675612) -#define C5 FIXR(0.79335334029123516458) -#define C7 FIXR(0.60876142900872063941) -#define C9 FIXR(0.38268343236508977173) -#define C11 FIXR(0.13052619222005159154) - -/* 12 points IMDCT. We compute it "by hand" by factorizing obvious - cases. */ -static void imdct12(int *out, int *in) -{ - int tmp; - int64_t in1_3, in1_9, in4_3, in4_9; - - in1_3 = MUL64(in[1], C3); - in1_9 = MUL64(in[1], C9); - in4_3 = MUL64(in[4], C3); - in4_9 = MUL64(in[4], C9); - - tmp = FRAC_RND(MUL64(in[0], C7) - in1_3 - MUL64(in[2], C11) + - MUL64(in[3], C1) - in4_9 - MUL64(in[5], C5)); - out[0] = tmp; - out[5] = -tmp; - tmp = FRAC_RND(MUL64(in[0] - in[3], C9) - in1_3 + - MUL64(in[2] + in[5], C3) - in4_9); - out[1] = tmp; - out[4] = -tmp; - tmp = FRAC_RND(MUL64(in[0], C11) - in1_9 + MUL64(in[2], C7) - - MUL64(in[3], C5) + in4_3 - MUL64(in[5], C1)); - out[2] = tmp; - out[3] = -tmp; - tmp = FRAC_RND(MUL64(-in[0], C5) + in1_9 + MUL64(in[2], C1) + - MUL64(in[3], C11) - in4_3 - MUL64(in[5], C7)); - out[6] = tmp; - out[11] = tmp; - tmp = FRAC_RND(MUL64(-in[0] + in[3], C3) - in1_9 + - MUL64(in[2] + in[5], C9) + in4_3); - out[7] = tmp; - out[10] = tmp; - tmp = FRAC_RND(-MUL64(in[0], C1) - in1_3 - MUL64(in[2], C5) - - MUL64(in[3], C7) - in4_9 - MUL64(in[5], C11)); - out[8] = tmp; - out[9] = tmp; -} - -#undef C1 -#undef C3 -#undef C5 -#undef C7 -#undef C9 -#undef C11 - -/* cos(pi*i/18) */ -#define C1 FIXR(0.98480775301220805936) -#define C2 FIXR(0.93969262078590838405) -#define C3 FIXR(0.86602540378443864676) -#define C4 FIXR(0.76604444311897803520) -#define C5 FIXR(0.64278760968653932632) -#define C6 FIXR(0.5) -#define C7 FIXR(0.34202014332566873304) -#define C8 FIXR(0.17364817766693034885) +#define C3 FIXHR(0.86602540378443864676/2) /* 0.5 / cos(pi*(2*i+1)/36) */ static const int icos36[9] = { FIXR(0.50190991877167369479), - FIXR(0.51763809020504152469), + FIXR(0.51763809020504152469), //0 FIXR(0.55168895948124587824), FIXR(0.61038729438072803416), - FIXR(0.70710678118654752439), + FIXR(0.70710678118654752439), //1 FIXR(0.87172339781054900991), FIXR(1.18310079157624925896), - FIXR(1.93185165257813657349), + FIXR(1.93185165257813657349), //2 FIXR(5.73685662283492756461), }; -static const int icos72[18] = { - /* 0.5 / cos(pi*(2*i+19)/72) */ - FIXR(0.74009361646113053152), - FIXR(0.82133981585229078570), - FIXR(0.93057949835178895673), - FIXR(1.08284028510010010928), - FIXR(1.30656296487637652785), - FIXR(1.66275476171152078719), - FIXR(2.31011315767264929558), - FIXR(3.83064878777019433457), - FIXR(11.46279281302667383546), - - /* 0.5 / cos(pi*(2*(i + 18) +19)/72) */ - FIXR(-0.67817085245462840086), - FIXR(-0.63023620700513223342), - FIXR(-0.59284452371708034528), - FIXR(-0.56369097343317117734), - FIXR(-0.54119610014619698439), - FIXR(-0.52426456257040533932), - FIXR(-0.51213975715725461845), - FIXR(-0.50431448029007636036), - FIXR(-0.50047634258165998492), -}; +/* 12 points IMDCT. We compute it "by hand" by factorizing obvious + cases. */ +static void imdct12(int *out, int *in) +{ + int in0, in1, in2, in3, in4, in5, t1, t2; + + in0= in[0*3]; + in1= in[1*3] + in[0*3]; + in2= in[2*3] + in[1*3]; + in3= in[3*3] + in[2*3]; + in4= in[4*3] + in[3*3]; + in5= in[5*3] + in[4*3]; + in5 += in3; + in3 += in1; + + in2= MULH(2*in2, C3); + in3= MULH(2*in3, C3); + + t1 = in0 - in4; + t2 = MULL(in1 - in5, icos36[4]); + + out[ 7]= + out[10]= t1 + t2; + out[ 1]= + out[ 4]= t1 - t2; + + in0 += in4>>1; + in4 = in0 + in2; + in1 += in5>>1; + in5 = MULL(in1 + in3, icos36[1]); + out[ 8]= + out[ 9]= in4 + in5; + out[ 2]= + out[ 3]= in4 - in5; + + in0 -= in2; + in1 = MULL(in1 - in3, icos36[7]); + out[ 0]= + out[ 5]= in0 - in1; + out[ 6]= + out[11]= in0 + in1; +} + +/* cos(pi*i/18) */ +#define C1 FIXHR(0.98480775301220805936/2) +#define C2 FIXHR(0.93969262078590838405/2) +#define C3 FIXHR(0.86602540378443864676/2) +#define C4 FIXHR(0.76604444311897803520/2) +#define C5 FIXHR(0.64278760968653932632/2) +#define C6 FIXHR(0.5/2) +#define C7 FIXHR(0.34202014332566873304/2) +#define C8 FIXHR(0.17364817766693034885/2) + /* using Lee like decomposition followed by hand coded 9 points DCT */ -static void imdct36(int *out, int *in) +static void imdct36(int *out, int *buf, int *in, int *win) { int i, j, t0, t1, t2, t3, s0, s1, s2, s3; int tmp[18], *tmp1, *in1; - int64_t in3_3, in6_6; for(i=17;i>=1;i--) in[i] += in[i-1]; @@ -1040,30 +1027,61 @@ static void imdct36(int *out, int *in) for(j=0;j<2;j++) { tmp1 = tmp + j; in1 = in + j; +#if 0 +//more accurate but slower + int64_t t0, t1, t2, t3; + t2 = in1[2*4] + in1[2*8] - in1[2*2]; + + t3 = (in1[2*0] + (int64_t)(in1[2*6]>>1))<<32; + t1 = in1[2*0] - in1[2*6]; + tmp1[ 6] = t1 - (t2>>1); + tmp1[16] = t1 + t2; + + t0 = MUL64(2*(in1[2*2] + in1[2*4]), C2); + t1 = MUL64( in1[2*4] - in1[2*8] , -2*C8); + t2 = MUL64(2*(in1[2*2] + in1[2*8]), -C4); + + tmp1[10] = (t3 - t0 - t2) >> 32; + tmp1[ 2] = (t3 + t0 + t1) >> 32; + tmp1[14] = (t3 + t2 - t1) >> 32; + + tmp1[ 4] = MULH(2*(in1[2*5] + in1[2*7] - in1[2*1]), -C3); + t2 = MUL64(2*(in1[2*1] + in1[2*5]), C1); + t3 = MUL64( in1[2*5] - in1[2*7] , -2*C7); + t0 = MUL64(2*in1[2*3], C3); + + t1 = MUL64(2*(in1[2*1] + in1[2*7]), -C5); - in3_3 = MUL64(in1[2*3], C3); - in6_6 = MUL64(in1[2*6], C6); - - tmp1[0] = FRAC_RND(MUL64(in1[2*1], C1) + in3_3 + - MUL64(in1[2*5], C5) + MUL64(in1[2*7], C7)); - tmp1[2] = in1[2*0] + FRAC_RND(MUL64(in1[2*2], C2) + - MUL64(in1[2*4], C4) + in6_6 + - MUL64(in1[2*8], C8)); - tmp1[4] = FRAC_RND(MUL64(in1[2*1] - in1[2*5] - in1[2*7], C3)); - tmp1[6] = FRAC_RND(MUL64(in1[2*2] - in1[2*4] - in1[2*8], C6)) - - in1[2*6] + in1[2*0]; - tmp1[8] = FRAC_RND(MUL64(in1[2*1], C5) - in3_3 - - MUL64(in1[2*5], C7) + MUL64(in1[2*7], C1)); - tmp1[10] = in1[2*0] + FRAC_RND(MUL64(-in1[2*2], C8) - - MUL64(in1[2*4], C2) + in6_6 + - MUL64(in1[2*8], C4)); - tmp1[12] = FRAC_RND(MUL64(in1[2*1], C7) - in3_3 + - MUL64(in1[2*5], C1) - - MUL64(in1[2*7], C5)); - tmp1[14] = in1[2*0] + FRAC_RND(MUL64(-in1[2*2], C4) + - MUL64(in1[2*4], C8) + in6_6 - - MUL64(in1[2*8], C2)); - tmp1[16] = in1[2*0] - in1[2*2] + in1[2*4] - in1[2*6] + in1[2*8]; + tmp1[ 0] = (t2 + t3 + t0) >> 32; + tmp1[12] = (t2 + t1 - t0) >> 32; + tmp1[ 8] = (t3 - t1 - t0) >> 32; +#else + t2 = in1[2*4] + in1[2*8] - in1[2*2]; + + t3 = in1[2*0] + (in1[2*6]>>1); + t1 = in1[2*0] - in1[2*6]; + tmp1[ 6] = t1 - (t2>>1); + tmp1[16] = t1 + t2; + + t0 = MULH(2*(in1[2*2] + in1[2*4]), C2); + t1 = MULH( in1[2*4] - in1[2*8] , -2*C8); + t2 = MULH(2*(in1[2*2] + in1[2*8]), -C4); + + tmp1[10] = t3 - t0 - t2; + tmp1[ 2] = t3 + t0 + t1; + tmp1[14] = t3 + t2 - t1; + + tmp1[ 4] = MULH(2*(in1[2*5] + in1[2*7] - in1[2*1]), -C3); + t2 = MULH(2*(in1[2*1] + in1[2*5]), C1); + t3 = MULH( in1[2*5] - in1[2*7] , -2*C7); + t0 = MULH(2*in1[2*3], C3); + + t1 = MULH(2*(in1[2*1] + in1[2*7]), -C5); + + tmp1[ 0] = t2 + t3 + t0; + tmp1[12] = t2 + t1 - t0; + tmp1[ 8] = t3 - t1 - t0; +#endif } i = 0; @@ -1078,54 +1096,32 @@ static void imdct36(int *out, int *in) s1 = MULL(t3 + t2, icos36[j]); s3 = MULL(t3 - t2, icos36[8 - j]); - t0 = MULL(s0 + s1, icos72[9 + 8 - j]); - t1 = MULL(s0 - s1, icos72[8 - j]); - out[18 + 9 + j] = t0; - out[18 + 8 - j] = t0; - out[9 + j] = -t1; - out[8 - j] = t1; + t0 = s0 + s1; + t1 = s0 - s1; + out[(9 + j)*SBLIMIT] = MULH(t1, win[9 + j]) + buf[9 + j]; + out[(8 - j)*SBLIMIT] = MULH(t1, win[8 - j]) + buf[8 - j]; + buf[9 + j] = MULH(t0, win[18 + 9 + j]); + buf[8 - j] = MULH(t0, win[18 + 8 - j]); - t0 = MULL(s2 + s3, icos72[9+j]); - t1 = MULL(s2 - s3, icos72[j]); - out[18 + 9 + (8 - j)] = t0; - out[18 + j] = t0; - out[9 + (8 - j)] = -t1; - out[j] = t1; + t0 = s2 + s3; + t1 = s2 - s3; + out[(9 + 8 - j)*SBLIMIT] = MULH(t1, win[9 + 8 - j]) + buf[9 + 8 - j]; + out[( j)*SBLIMIT] = MULH(t1, win[ j]) + buf[ j]; + buf[9 + 8 - j] = MULH(t0, win[18 + 9 + 8 - j]); + buf[ + j] = MULH(t0, win[18 + j]); i += 4; } s0 = tmp[16]; s1 = MULL(tmp[17], icos36[4]); - t0 = MULL(s0 + s1, icos72[9 + 4]); - t1 = MULL(s0 - s1, icos72[4]); - out[18 + 9 + 4] = t0; - out[18 + 8 - 4] = t0; - out[9 + 4] = -t1; - out[8 - 4] = t1; -} - -/* fast header check for resync */ -static int check_header(uint32_t header) -{ - /* header */ - if ((header & 0xffe00000) != 0xffe00000) - return -1; - /* layer check */ - if (((header >> 17) & 3) == 0) - return -1; - /* bit rate */ - if (((header >> 12) & 0xf) == 0xf) - return -1; - /* frequency */ - if (((header >> 10) & 3) == 3) - return -1; - return 0; + t0 = s0 + s1; + t1 = s0 - s1; + out[(9 + 4)*SBLIMIT] = MULH(t1, win[9 + 4]) + buf[9 + 4]; + out[(8 - 4)*SBLIMIT] = MULH(t1, win[8 - 4]) + buf[8 - 4]; + buf[9 + 4] = MULH(t0, win[18 + 9 + 4]); + buf[8 - 4] = MULH(t0, win[18 + 8 - 4]); } -/* header + layer + bitrate + freq + lsf/mpeg25 */ -#define SAME_HEADER_MASK \ - (0xffe00000 | (3 << 17) | (0xf << 12) | (3 << 10) | (3 << 19)) - /* header decoding. MUST check the header before because no consistency check is done there. Return 1 if free format found and that the frame size must be computed externally */ @@ -1233,7 +1229,7 @@ int mpa_decode_header(AVCodecContext *avctx, uint32_t head) MPADecodeContext s1, *s = &s1; memset( s, 0, sizeof(MPADecodeContext) ); - if (check_header(head) != 0) + if (ff_mpa_check_header(head) != 0) return -1; if (decode_header(s, head) != 0) { @@ -1920,8 +1916,8 @@ static void compute_stereo(MPADecodeContext *s, static void compute_antialias_integer(MPADecodeContext *s, GranuleDef *g) { - int32_t *ptr, *p0, *p1, *csa; - int n, i, j; + int32_t *ptr, *csa; + int n, i; /* we antialias only "long" bands */ if (g->block_type == 2) { @@ -1935,35 +1931,24 @@ static void compute_antialias_integer(MPADecodeContext *s, ptr = g->sb_hybrid + 18; for(i = n;i > 0;i--) { - p0 = ptr - 1; - p1 = ptr; - csa = &csa_table[0][0]; - for(j=0;j<4;j++) { - int tmp0 = *p0; - int tmp1 = *p1; -#if 0 - *p0 = FRAC_RND(MUL64(tmp0, csa[0]) - MUL64(tmp1, csa[1])); - *p1 = FRAC_RND(MUL64(tmp0, csa[1]) + MUL64(tmp1, csa[0])); -#else - int64_t tmp2= MUL64(tmp0 + tmp1, csa[0]); - *p0 = FRAC_RND(tmp2 - MUL64(tmp1, csa[2])); - *p1 = FRAC_RND(tmp2 + MUL64(tmp0, csa[3])); -#endif - p0--; p1++; - csa += 4; - tmp0 = *p0; - tmp1 = *p1; -#if 0 - *p0 = FRAC_RND(MUL64(tmp0, csa[0]) - MUL64(tmp1, csa[1])); - *p1 = FRAC_RND(MUL64(tmp0, csa[1]) + MUL64(tmp1, csa[0])); -#else - tmp2= MUL64(tmp0 + tmp1, csa[0]); - *p0 = FRAC_RND(tmp2 - MUL64(tmp1, csa[2])); - *p1 = FRAC_RND(tmp2 + MUL64(tmp0, csa[3])); -#endif - p0--; p1++; - csa += 4; - } + int tmp0, tmp1, tmp2; + csa = &csa_table[0][0]; +#define INT_AA(j) \ + tmp0 = ptr[-1-j];\ + tmp1 = ptr[ j];\ + tmp2= MULH(tmp0 + tmp1, csa[0+4*j]);\ + ptr[-1-j] = 4*(tmp2 - MULH(tmp1, csa[2+4*j]));\ + ptr[ j] = 4*(tmp2 + MULH(tmp0, csa[3+4*j])); + + INT_AA(0) + INT_AA(1) + INT_AA(2) + INT_AA(3) + INT_AA(4) + INT_AA(5) + INT_AA(6) + INT_AA(7) + ptr += 18; } } @@ -1971,8 +1956,8 @@ static void compute_antialias_integer(MPADecodeContext *s, static void compute_antialias_float(MPADecodeContext *s, GranuleDef *g) { - int32_t *ptr, *p0, *p1; - int n, i, j; + int32_t *ptr; + int n, i; /* we antialias only "long" bands */ if (g->block_type == 2) { @@ -1986,35 +1971,23 @@ static void compute_antialias_float(MPADecodeContext *s, ptr = g->sb_hybrid + 18; for(i = n;i > 0;i--) { + float tmp0, tmp1; float *csa = &csa_table_float[0][0]; - p0 = ptr - 1; - p1 = ptr; - for(j=0;j<4;j++) { - float tmp0 = *p0; - float tmp1 = *p1; -#if 1 - *p0 = lrintf(tmp0 * csa[0] - tmp1 * csa[1]); - *p1 = lrintf(tmp0 * csa[1] + tmp1 * csa[0]); -#else - float tmp2= (tmp0 + tmp1) * csa[0]; - *p0 = lrintf(tmp2 - tmp1 * csa[2]); - *p1 = lrintf(tmp2 + tmp0 * csa[3]); -#endif - p0--; p1++; - csa += 4; - tmp0 = *p0; - tmp1 = *p1; -#if 1 - *p0 = lrintf(tmp0 * csa[0] - tmp1 * csa[1]); - *p1 = lrintf(tmp0 * csa[1] + tmp1 * csa[0]); -#else - tmp2= (tmp0 + tmp1) * csa[0]; - *p0 = lrintf(tmp2 - tmp1 * csa[2]); - *p1 = lrintf(tmp2 + tmp0 * csa[3]); -#endif - p0--; p1++; - csa += 4; - } +#define FLOAT_AA(j)\ + tmp0= ptr[-1-j];\ + tmp1= ptr[ j];\ + ptr[-1-j] = lrintf(tmp0 * csa[0+4*j] - tmp1 * csa[1+4*j]);\ + ptr[ j] = lrintf(tmp0 * csa[1+4*j] + tmp1 * csa[0+4*j]); + + FLOAT_AA(0) + FLOAT_AA(1) + FLOAT_AA(2) + FLOAT_AA(3) + FLOAT_AA(4) + FLOAT_AA(5) + FLOAT_AA(6) + FLOAT_AA(7) + ptr += 18; } } @@ -2024,11 +1997,9 @@ static void compute_imdct(MPADecodeContext *s, int32_t *sb_samples, int32_t *mdct_buf) { - int32_t *ptr, *win, *win1, *buf, *buf2, *out_ptr, *ptr1; - int32_t in[6]; - int32_t out[36]; + int32_t *ptr, *win, *win1, *buf, *out_ptr, *ptr1; int32_t out2[12]; - int i, j, k, mdct_long_end, v, sblimit; + int i, j, mdct_long_end, v, sblimit; /* find last non zero block */ ptr = g->sb_hybrid + 576; @@ -2054,7 +2025,6 @@ static void compute_imdct(MPADecodeContext *s, buf = mdct_buf; ptr = g->sb_hybrid; for(j=0;j<mdct_long_end;j++) { - imdct36(out, ptr); /* apply window & overlap with previous buffer */ out_ptr = sb_samples + j; /* select window */ @@ -2064,45 +2034,38 @@ static void compute_imdct(MPADecodeContext *s, win1 = mdct_win[g->block_type]; /* select frequency inversion */ win = win1 + ((4 * 36) & -(j & 1)); - for(i=0;i<18;i++) { - *out_ptr = MULL(out[i], win[i]) + buf[i]; - buf[i] = MULL(out[i + 18], win[i + 18]); - out_ptr += SBLIMIT; - } + imdct36(out_ptr, buf, ptr, win); + out_ptr += 18*SBLIMIT; ptr += 18; buf += 18; } for(j=mdct_long_end;j<sblimit;j++) { - for(i=0;i<6;i++) { - out[i] = 0; - out[6 + i] = 0; - out[30+i] = 0; - } /* select frequency inversion */ win = mdct_win[2] + ((4 * 36) & -(j & 1)); - buf2 = out + 6; - for(k=0;k<3;k++) { - /* reorder input for short mdct */ - ptr1 = ptr + k; - for(i=0;i<6;i++) { - in[i] = *ptr1; - ptr1 += 3; - } - imdct12(out2, in); - /* apply 12 point window and do small overlap */ - for(i=0;i<6;i++) { - buf2[i] = MULL(out2[i], win[i]) + buf2[i]; - buf2[i + 6] = MULL(out2[i + 6], win[i + 6]); - } - buf2 += 6; - } - /* overlap */ out_ptr = sb_samples + j; - for(i=0;i<18;i++) { - *out_ptr = out[i] + buf[i]; - buf[i] = out[i + 18]; + + for(i=0; i<6; i++){ + *out_ptr = buf[i]; out_ptr += SBLIMIT; } + imdct12(out2, ptr + 0); + for(i=0;i<6;i++) { + *out_ptr = MULH(out2[i], win[i]) + buf[i + 6*1]; + buf[i + 6*2] = MULH(out2[i + 6], win[i + 6]); + out_ptr += SBLIMIT; + } + imdct12(out2, ptr + 1); + for(i=0;i<6;i++) { + *out_ptr = MULH(out2[i], win[i]) + buf[i + 6*2]; + buf[i + 6*0] = MULH(out2[i + 6], win[i + 6]); + out_ptr += SBLIMIT; + } + imdct12(out2, ptr + 2); + for(i=0;i<6;i++) { + buf[i + 6*0] = MULH(out2[i], win[i]) + buf[i + 6*0]; + buf[i + 6*1] = MULH(out2[i + 6], win[i + 6]); + buf[i + 6*2] = 0; + } ptr += 18; buf += 18; } @@ -2129,7 +2092,7 @@ void sample_dump(int fnum, int32_t *tab, int n) f = files[fnum]; if (!f) { - sprintf(buf, "/tmp/out%d.%s.pcm", + snprintf(buf, sizeof(buf), "/tmp/out%d.%s.pcm", fnum, #ifdef USE_HIGHPRECISION "hp" @@ -2145,11 +2108,11 @@ void sample_dump(int fnum, int32_t *tab, int n) if (fnum == 0) { static int pos = 0; - printf("pos=%d\n", pos); + av_log(NULL, AV_LOG_DEBUG, "pos=%d\n", pos); for(i=0;i<n;i++) { - printf(" %0.4f", (double)tab[i] / FRAC_ONE); + av_log(NULL, AV_LOG_DEBUG, " %0.4f", (double)tab[i] / FRAC_ONE); if ((i % 18) == 17) - printf("\n"); + av_log(NULL, AV_LOG_DEBUG, "\n"); } pos += n; } @@ -2297,9 +2260,11 @@ static int mp_decode_layer3(MPADecodeContext *s) } } + if (!s->adu_mode) { /* now we get bits from the main_data_begin offset */ dprintf("seekback: %d\n", main_data_begin); seek_to_maindata(s, main_data_begin); + } for(gr=0;gr<nb_granules;gr++) { for(ch=0;ch<s->nb_channels;ch++) { @@ -2459,10 +2424,10 @@ static int mp_decode_layer3(MPADecodeContext *s) } static int mp_decode_frame(MPADecodeContext *s, - short *samples) + OUT_INT *samples) { int i, nb_frames, ch; - short *samples_ptr; + OUT_INT *samples_ptr; init_get_bits(&s->gb, s->inbuf + HEADER_SIZE, (s->inbuf_ptr - s->inbuf - HEADER_SIZE)*8); @@ -2499,7 +2464,9 @@ static int mp_decode_frame(MPADecodeContext *s, for(ch=0;ch<s->nb_channels;ch++) { samples_ptr = samples + ch; for(i=0;i<nb_frames;i++) { - synth_filter(s, ch, samples_ptr, s->nb_channels, + ff_mpa_synth_filter(s->synth_buf[ch], &(s->synth_buf_offset[ch]), + window, &s->dither_state, + samples_ptr, s->nb_channels, s->sb_samples[ch][i]); samples_ptr += 32 * s->nb_channels; } @@ -2507,7 +2474,7 @@ static int mp_decode_frame(MPADecodeContext *s, #ifdef DEBUG s->frame_count++; #endif - return nb_frames * 32 * sizeof(short) * s->nb_channels; + return nb_frames * 32 * sizeof(OUT_INT) * s->nb_channels; } static int decode_frame(AVCodecContext * avctx, @@ -2518,7 +2485,7 @@ static int decode_frame(AVCodecContext * avctx, uint32_t header; uint8_t *buf_ptr; int len, out_size; - short *out_samples = data; + OUT_INT *out_samples = data; buf_ptr = buf; while (buf_size > 0) { @@ -2551,7 +2518,7 @@ static int decode_frame(AVCodecContext * avctx, header = (s->inbuf[0] << 24) | (s->inbuf[1] << 16) | (s->inbuf[2] << 8) | s->inbuf[3]; - if (check_header(header) < 0) { + if (ff_mpa_check_header(header) < 0) { /* no sync found : move by one byte (inefficient, but simple!) */ memmove(s->inbuf, s->inbuf + 1, s->inbuf_ptr - s->inbuf - 1); s->inbuf_ptr--; @@ -2668,6 +2635,226 @@ static int decode_frame(AVCodecContext * avctx, return buf_ptr - buf; } + +static int decode_frame_adu(AVCodecContext * avctx, + void *data, int *data_size, + uint8_t * buf, int buf_size) +{ + MPADecodeContext *s = avctx->priv_data; + uint32_t header; + int len, out_size; + OUT_INT *out_samples = data; + + len = buf_size; + + // Discard too short frames + if (buf_size < HEADER_SIZE) { + *data_size = 0; + return buf_size; + } + + + if (len > MPA_MAX_CODED_FRAME_SIZE) + len = MPA_MAX_CODED_FRAME_SIZE; + + memcpy(s->inbuf, buf, len); + s->inbuf_ptr = s->inbuf + len; + + // Get header and restore sync word + header = (s->inbuf[0] << 24) | (s->inbuf[1] << 16) | + (s->inbuf[2] << 8) | s->inbuf[3] | 0xffe00000; + + if (ff_mpa_check_header(header) < 0) { // Bad header, discard frame + *data_size = 0; + return buf_size; + } + + decode_header(s, header); + /* update codec info */ + avctx->sample_rate = s->sample_rate; + avctx->channels = s->nb_channels; + avctx->bit_rate = s->bit_rate; + avctx->sub_id = s->layer; + + avctx->frame_size=s->frame_size = len; + + if (avctx->parse_only) { + /* simply return the frame data */ + *(uint8_t **)data = s->inbuf; + out_size = s->inbuf_ptr - s->inbuf; + } else { + out_size = mp_decode_frame(s, out_samples); + } + + *data_size = out_size; + return buf_size; +} + + +/* Next 3 arrays are indexed by channel config number (passed via codecdata) */ +static int mp3Frames[16] = {0,1,1,2,3,3,4,5,2}; /* number of mp3 decoder instances */ +static int mp3Channels[16] = {0,1,2,3,4,5,6,8,4}; /* total output channels */ +/* offsets into output buffer, assume output order is FL FR BL BR C LFE */ +static int chan_offset[9][5] = { + {0}, + {0}, // C + {0}, // FLR + {2,0}, // C FLR + {2,0,3}, // C FLR BS + {4,0,2}, // C FLR BLRS + {4,0,2,5}, // C FLR BLRS LFE + {4,0,2,6,5}, // C FLR BLRS BLR LFE + {0,2} // FLR BLRS +}; + + +static int decode_init_mp3on4(AVCodecContext * avctx) +{ + MP3On4DecodeContext *s = avctx->priv_data; + int i; + + if ((avctx->extradata_size < 2) || (avctx->extradata == NULL)) { + av_log(avctx, AV_LOG_ERROR, "Codec extradata missing or too short.\n"); + return -1; + } + + s->chan_cfg = (((unsigned char *)avctx->extradata)[1] >> 3) & 0x0f; + s->frames = mp3Frames[s->chan_cfg]; + if(!s->frames) { + av_log(avctx, AV_LOG_ERROR, "Invalid channel config number.\n"); + return -1; + } + avctx->channels = mp3Channels[s->chan_cfg]; + + /* Init the first mp3 decoder in standard way, so that all tables get builded + * We replace avctx->priv_data with the context of the first decoder so that + * decode_init() does not have to be changed. + * Other decoders will be inited here copying data from the first context + */ + // Allocate zeroed memory for the first decoder context + s->mp3decctx[0] = av_mallocz(sizeof(MPADecodeContext)); + // Put decoder context in place to make init_decode() happy + avctx->priv_data = s->mp3decctx[0]; + decode_init(avctx); + // Restore mp3on4 context pointer + avctx->priv_data = s; + s->mp3decctx[0]->adu_mode = 1; // Set adu mode + + /* Create a separate codec/context for each frame (first is already ok). + * Each frame is 1 or 2 channels - up to 5 frames allowed + */ + for (i = 1; i < s->frames; i++) { + s->mp3decctx[i] = av_mallocz(sizeof(MPADecodeContext)); + s->mp3decctx[i]->compute_antialias = s->mp3decctx[0]->compute_antialias; + s->mp3decctx[i]->inbuf = &s->mp3decctx[i]->inbuf1[0][BACKSTEP_SIZE]; + s->mp3decctx[i]->inbuf_ptr = s->mp3decctx[i]->inbuf; + s->mp3decctx[i]->adu_mode = 1; + } + + return 0; +} + + +static int decode_close_mp3on4(AVCodecContext * avctx) +{ + MP3On4DecodeContext *s = avctx->priv_data; + int i; + + for (i = 0; i < s->frames; i++) + if (s->mp3decctx[i]) + av_free(s->mp3decctx[i]); + + return 0; +} + + +static int decode_frame_mp3on4(AVCodecContext * avctx, + void *data, int *data_size, + uint8_t * buf, int buf_size) +{ + MP3On4DecodeContext *s = avctx->priv_data; + MPADecodeContext *m; + int len, out_size = 0; + uint32_t header; + OUT_INT *out_samples = data; + OUT_INT decoded_buf[MPA_FRAME_SIZE * MPA_MAX_CHANNELS]; + OUT_INT *outptr, *bp; + int fsize; + unsigned char *start2 = buf, *start; + int fr, i, j, n; + int off = avctx->channels; + int *coff = chan_offset[s->chan_cfg]; + + len = buf_size; + + // Discard too short frames + if (buf_size < HEADER_SIZE) { + *data_size = 0; + return buf_size; + } + + // If only one decoder interleave is not needed + outptr = s->frames == 1 ? out_samples : decoded_buf; + + for (fr = 0; fr < s->frames; fr++) { + start = start2; + fsize = (start[0] << 4) | (start[1] >> 4); + start2 += fsize; + if (fsize > len) + fsize = len; + len -= fsize; + if (fsize > MPA_MAX_CODED_FRAME_SIZE) + fsize = MPA_MAX_CODED_FRAME_SIZE; + m = s->mp3decctx[fr]; + assert (m != NULL); + /* copy original to new */ + m->inbuf_ptr = m->inbuf + fsize; + memcpy(m->inbuf, start, fsize); + + // Get header + header = (m->inbuf[0] << 24) | (m->inbuf[1] << 16) | + (m->inbuf[2] << 8) | m->inbuf[3] | 0xfff00000; + + if (ff_mpa_check_header(header) < 0) { // Bad header, discard block + *data_size = 0; + return buf_size; + } + + decode_header(m, header); + mp_decode_frame(m, decoded_buf); + + n = MPA_FRAME_SIZE * m->nb_channels; + out_size += n * sizeof(OUT_INT); + if(s->frames > 1) { + /* interleave output data */ + bp = out_samples + coff[fr]; + if(m->nb_channels == 1) { + for(j = 0; j < n; j++) { + *bp = decoded_buf[j]; + bp += off; + } + } else { + for(j = 0; j < n; j++) { + bp[0] = decoded_buf[j++]; + bp[1] = decoded_buf[j]; + bp += off; + } + } + } + } + + /* update codec info */ + avctx->sample_rate = s->mp3decctx[0]->sample_rate; + avctx->frame_size= buf_size; + avctx->bit_rate = 0; + for (i = 0; i < s->frames; i++) + avctx->bit_rate += s->mp3decctx[i]->bit_rate; + + *data_size = out_size; + return buf_size; +} + + AVCodec mp2_decoder = { "mp2", @@ -2693,3 +2880,29 @@ AVCodec mp3_decoder = decode_frame, CODEC_CAP_PARSE_ONLY, }; + +AVCodec mp3adu_decoder = +{ + "mp3adu", + CODEC_TYPE_AUDIO, + CODEC_ID_MP3ADU, + sizeof(MPADecodeContext), + decode_init, + NULL, + NULL, + decode_frame_adu, + CODEC_CAP_PARSE_ONLY, +}; + +AVCodec mp3on4_decoder = +{ + "mp3on4", + CODEC_TYPE_AUDIO, + CODEC_ID_MP3ON4, + sizeof(MP3On4DecodeContext), + decode_init_mp3on4, + NULL, + decode_close_mp3on4, + decode_frame_mp3on4, + 0 +}; diff --git a/src/libffmpeg/libavcodec/mpegvideo.c b/src/libffmpeg/libavcodec/mpegvideo.c index 460fdbb32..6eecd0259 100644 --- a/src/libffmpeg/libavcodec/mpegvideo.c +++ b/src/libffmpeg/libavcodec/mpegvideo.c @@ -117,9 +117,10 @@ static uint8_t default_fcode_tab[MAX_MV*2+1]; enum PixelFormat ff_yuv420p_list[2]= {PIX_FMT_YUV420P, -1}; static void convert_matrix(DSPContext *dsp, int (*qmat)[64], uint16_t (*qmat16)[2][64], - const uint16_t *quant_matrix, int bias, int qmin, int qmax) + const uint16_t *quant_matrix, int bias, int qmin, int qmax, int intra) { int qscale; + int shift=0; for(qscale=qmin; qscale<=qmax; qscale++){ int i; @@ -169,6 +170,23 @@ static void convert_matrix(DSPContext *dsp, int (*qmat)[64], uint16_t (*qmat16)[ qmat16[qscale][1][i]= ROUNDED_DIV(bias<<(16-QUANT_BIAS_SHIFT), qmat16[qscale][0][i]); } } + + for(i=intra; i<64; i++){ + int64_t max= 8191; + if (dsp->fdct == fdct_ifast +#ifndef FAAN_POSTSCALE + || dsp->fdct == ff_faandct +#endif + ) { + max= (8191LL*aanscales[i]) >> 14; + } + while(((max * qmat[qscale][i]) >> shift) > INT_MAX){ + shift++; + } + } + } + if(shift){ + av_log(NULL, AV_LOG_INFO, "Warning, QMAT_SHIFT is larger then %d, overflows possible\n", QMAT_SHIFT - shift); } } @@ -231,7 +249,7 @@ int DCT_common_init(MpegEncContext *s) #ifdef CONFIG_ENCODERS s->dct_quantize= dct_quantize_c; s->denoise_dct= denoise_dct_c; -#endif +#endif //CONFIG_ENCODERS #ifdef HAVE_MMX MPV_common_init_mmx(s); @@ -375,15 +393,15 @@ static int alloc_picture(MpegEncContext *s, Picture *pic, int shared){ pic->mb_type= pic->mb_type_base + s->mb_stride+1; if(s->out_format == FMT_H264){ for(i=0; i<2; i++){ - CHECKED_ALLOCZ(pic->motion_val_base[i], 2 * (b4_array_size+2) * sizeof(int16_t)) - pic->motion_val[i]= pic->motion_val_base[i]+2; + CHECKED_ALLOCZ(pic->motion_val_base[i], 2 * (b4_array_size+4) * sizeof(int16_t)) + pic->motion_val[i]= pic->motion_val_base[i]+4; CHECKED_ALLOCZ(pic->ref_index[i], b8_array_size * sizeof(uint8_t)) } pic->motion_subsample_log2= 2; }else if(s->out_format == FMT_H263 || s->encoding || (s->avctx->debug&FF_DEBUG_MV) || (s->avctx->debug_mv)){ for(i=0; i<2; i++){ - CHECKED_ALLOCZ(pic->motion_val_base[i], 2 * (b8_array_size+2) * sizeof(int16_t)) - pic->motion_val[i]= pic->motion_val_base[i]+2; + CHECKED_ALLOCZ(pic->motion_val_base[i], 2 * (b8_array_size+4) * sizeof(int16_t)) + pic->motion_val[i]= pic->motion_val_base[i]+4; CHECKED_ALLOCZ(pic->ref_index[i], b8_array_size * sizeof(uint8_t)) } pic->motion_subsample_log2= 3; @@ -447,7 +465,7 @@ static int init_duplicate_context(MpegEncContext *s, MpegEncContext *base){ s->edge_emu_buffer= s->allocated_edge_emu_buffer + (s->width+64)*2*17; //FIXME should be linesize instead of s->width*2 but that isnt known before get_buffer() - CHECKED_ALLOCZ(s->me.scratchpad, (s->width+64)*2*16*2*sizeof(uint8_t)) + CHECKED_ALLOCZ(s->me.scratchpad, (s->width+64)*4*16*2*sizeof(uint8_t)) s->rd_scratchpad= s->me.scratchpad; s->b_scratchpad= s->me.scratchpad; s->obmc_scratchpad= s->me.scratchpad + 16; @@ -584,7 +602,6 @@ static void MPV_encode_defaults(MpegEncContext *s){ done=1; default_mv_penalty= av_mallocz( sizeof(uint8_t)*(MAX_FCODE+1)*(2*MAX_MV+1) ); - memset(default_mv_penalty, 0, sizeof(uint8_t)*(MAX_FCODE+1)*(2*MAX_MV+1)); memset(default_fcode_tab , 0, sizeof(uint8_t)*(2*MAX_MV+1)); for(i=-16; i<16; i++){ @@ -609,6 +626,9 @@ int MPV_common_init(MpegEncContext *s) return -1; } + if((s->width || s->height) && avcodec_check_dimensions(s->avctx, s->width, s->height)) + return -1; + dsputil_init(&s->dsp, s->avctx); DCT_common_init(s); @@ -623,6 +643,10 @@ int MPV_common_init(MpegEncContext *s) mb_array_size= s->mb_height * s->mb_stride; mv_table_size= (s->mb_height+2) * s->mb_stride + 1; + /* set chroma shifts */ + avcodec_get_chroma_sub_sample(s->avctx->pix_fmt,&(s->chroma_x_shift), + &(s->chroma_y_shift) ); + /* set default edge pos, will be overriden in decode_header if needed */ s->h_edge_pos= s->mb_width*16; s->v_edge_pos= s->mb_height*16; @@ -728,9 +752,6 @@ int MPV_common_init(MpegEncContext *s) CHECKED_ALLOCZ(s->coded_block_base, y_size); s->coded_block= s->coded_block_base + s->b8_stride + 1; - /* divx501 bitstream reorder buffer */ - CHECKED_ALLOCZ(s->bitstream_buffer, BITSTREAM_BUFFER_SIZE); - /* cbp, ac_pred, pred_dir */ CHECKED_ALLOCZ(s->cbp_table , mb_array_size * sizeof(uint8_t)) CHECKED_ALLOCZ(s->pred_dir_table, mb_array_size * sizeof(uint8_t)) @@ -835,6 +856,8 @@ void MPV_common_end(MpegEncContext *s) av_freep(&s->mbskip_table); av_freep(&s->prev_pict_types); av_freep(&s->bitstream_buffer); + s->allocated_bitstream_buffer_size=0; + av_freep(&s->avctx->stats_out); av_freep(&s->ac_stats); av_freep(&s->error_status_table); @@ -858,9 +881,12 @@ void MPV_common_end(MpegEncContext *s) s->last_picture_ptr= s->next_picture_ptr= s->current_picture_ptr= NULL; + s->linesize= s->uvlinesize= 0; for(i=0; i<3; i++) av_freep(&s->visualization_buffer[i]); + + avcodec_default_free_buffers(s->avctx); } #ifdef CONFIG_ENCODERS @@ -874,7 +900,22 @@ int MPV_encode_init(AVCodecContext *avctx) MPV_encode_defaults(s); - avctx->pix_fmt = PIX_FMT_YUV420P; // FIXME + if(avctx->pix_fmt != PIX_FMT_YUVJ420P && avctx->pix_fmt != PIX_FMT_YUV420P){ + av_log(avctx, AV_LOG_ERROR, "only YUV420 is supported\n"); + return -1; + } + + if(avctx->codec_id == CODEC_ID_MJPEG || avctx->codec_id == CODEC_ID_LJPEG){ + if(avctx->strict_std_compliance>=0 && avctx->pix_fmt != PIX_FMT_YUVJ420P){ + av_log(avctx, AV_LOG_ERROR, "colorspace not supported in jpeg\n"); + return -1; + } + }else{ + if(avctx->strict_std_compliance>=0 && avctx->pix_fmt != PIX_FMT_YUV420P){ + av_log(avctx, AV_LOG_ERROR, "colorspace not supported\n"); + return -1; + } + } s->bit_rate = avctx->bit_rate; s->width = avctx->width; @@ -897,6 +938,7 @@ int MPV_encode_init(AVCodecContext *avctx) s->mpeg_quant= avctx->mpeg_quant; s->rtp_mode= !!avctx->rtp_payload_size; s->intra_dc_precision= avctx->intra_dc_precision; + s->user_specified_pts = AV_NOPTS_VALUE; if (s->gop_size <= 1) { s->intra_only = 1; @@ -915,6 +957,7 @@ int MPV_encode_init(AVCodecContext *avctx) || s->avctx->temporal_cplx_masking || s->avctx->spatial_cplx_masking || s->avctx->p_masking + || s->avctx->border_masking || (s->flags&CODEC_FLAG_QP_RD)) && !s->fixed_qscale; @@ -931,6 +974,16 @@ int MPV_encode_init(AVCodecContext *avctx) av_log(avctx, AV_LOG_INFO, "Warning min_rate > 0 but min_rate != max_rate isnt recommanded!\n"); } + if(avctx->rc_min_rate && avctx->rc_min_rate > avctx->bit_rate){ + av_log(avctx, AV_LOG_INFO, "bitrate below min bitrate\n"); + return -1; + } + + if(avctx->rc_max_rate && avctx->rc_max_rate < avctx->bit_rate){ + av_log(avctx, AV_LOG_INFO, "bitrate above max bitrate\n"); + return -1; + } + if( s->avctx->rc_max_rate && s->avctx->rc_min_rate == s->avctx->rc_max_rate && (s->codec_id == CODEC_ID_MPEG1VIDEO || s->codec_id == CODEC_ID_MPEG2VIDEO) && 90000LL * (avctx->rc_buffer_size-1) > s->avctx->rc_max_rate*0xFFFFLL){ @@ -1005,6 +1058,21 @@ int MPV_encode_init(AVCodecContext *avctx) if(s->avctx->thread_count > 1) s->rtp_mode= 1; + if(!avctx->frame_rate || !avctx->frame_rate_base){ + av_log(avctx, AV_LOG_ERROR, "framerate not set\n"); + return -1; + } + + i= (INT_MAX/2+128)>>8; + if(avctx->me_threshold >= i){ + av_log(avctx, AV_LOG_ERROR, "me_threshold too large, max is %d\n", i - 1); + return -1; + } + if(avctx->mb_threshold >= i){ + av_log(avctx, AV_LOG_ERROR, "mb_threshold too large, max is %d\n", i - 1); + return -1; + } + i= ff_gcd(avctx->frame_rate, avctx->frame_rate_base); if(i > 1){ av_log(avctx, AV_LOG_INFO, "removing common factors from framerate\n"); @@ -1065,7 +1133,11 @@ int MPV_encode_init(AVCodecContext *avctx) avctx->delay=0; s->low_delay=1; break; -#ifdef CONFIG_RISKY + case CODEC_ID_H261: + s->out_format = FMT_H261; + avctx->delay=0; + s->low_delay=1; + break; case CODEC_ID_H263: if (h263_get_picture_format(s->width, s->height) == 7) { av_log(avctx, AV_LOG_INFO, "Input picture size isn't suitable for h263 codec! try h263+\n"); @@ -1107,6 +1179,16 @@ int MPV_encode_init(AVCodecContext *avctx) avctx->delay=0; s->low_delay=1; break; + case CODEC_ID_RV20: + s->out_format = FMT_H263; + avctx->delay=0; + s->low_delay=1; + s->modified_quant=1; + s->h263_aic=1; + s->h263_plus=1; + s->loop_filter=1; + s->unrestricted_mv= s->obmc || s->loop_filter || s->umvplus; + break; case CODEC_ID_MPEG4: s->out_format = FMT_H263; s->h263_pred = 1; @@ -1162,11 +1244,12 @@ int MPV_encode_init(AVCodecContext *avctx) avctx->delay=0; s->low_delay=1; break; -#endif #endif /* #if 0 */ default: return -1; } + + avctx->has_b_frames= !s->low_delay; s->encoding = 1; @@ -1181,39 +1264,31 @@ int MPV_encode_init(AVCodecContext *avctx) s->quant_precision=5; ff_set_cmp(&s->dsp, s->dsp.ildct_cmp, s->avctx->ildct_cmp); + ff_set_cmp(&s->dsp, s->dsp.frame_skip_cmp, s->avctx->frame_skip_cmp); /* xine: do not need this for decode or MPEG-1 encoding modes */ #if 0 - ff_init_me(s); -#endif /* #if 0 */ - -#ifdef CONFIG_ENCODERS -/* xine: do not need this for decode or MPEG-1 encoding modes */ -#if 0 -#ifdef CONFIG_RISKY + if (s->out_format == FMT_H261) + ff_h261_encode_init(s); if (s->out_format == FMT_H263) h263_encode_init(s); if(s->msmpeg4_version) ff_msmpeg4_encode_init(s); -#endif #endif /* #if 0 */ -/* xine: we do want this for MPEG-1 encoding */ +/* xine: we DO want this for MPEG-1 encoding */ if (s->out_format == FMT_MPEG1) ff_mpeg1_encode_init(s); -#endif /* init q matrix */ for(i=0;i<64;i++) { int j= s->dsp.idct_permutation[i]; -#ifdef CONFIG_RISKY if(s->codec_id==CODEC_ID_MPEG4 && s->mpeg_quant){ s->intra_matrix[j] = ff_mpeg4_default_intra_matrix[i]; s->inter_matrix[j] = ff_mpeg4_default_non_intra_matrix[i]; - }else if(s->out_format == FMT_H263){ + }else if(s->out_format == FMT_H263 || s->out_format == FMT_H261){ s->intra_matrix[j] = s->inter_matrix[j] = ff_mpeg1_default_non_intra_matrix[i]; }else -#endif { /* mpeg1/2 */ s->intra_matrix[j] = ff_mpeg1_default_intra_matrix[i]; s->inter_matrix[j] = ff_mpeg1_default_non_intra_matrix[i]; @@ -1228,9 +1303,9 @@ int MPV_encode_init(AVCodecContext *avctx) /* for mjpeg, we do include qscale in the matrix */ if (s->out_format != FMT_MJPEG) { convert_matrix(&s->dsp, s->q_intra_matrix, s->q_intra_matrix16, - s->intra_matrix, s->intra_quant_bias, 1, 31); + s->intra_matrix, s->intra_quant_bias, avctx->qmin, 31, 1); convert_matrix(&s->dsp, s->q_inter_matrix, s->q_inter_matrix16, - s->inter_matrix, s->inter_quant_bias, 1, 31); + s->inter_matrix, s->inter_quant_bias, avctx->qmin, 31, 0); } if(ff_rate_control_init(s) < 0) @@ -1263,12 +1338,16 @@ int MPV_encode_end(AVCodecContext *avctx) #endif //CONFIG_ENCODERS -void init_rl(RLTable *rl) +void init_rl(RLTable *rl, int use_static) { int8_t max_level[MAX_RUN+1], max_run[MAX_LEVEL+1]; uint8_t index_run[MAX_RUN+1]; int last, run, level, start, end, i; + /* If table is static, we can quit if rl->max_level[0] is not NULL */ + if(use_static && rl->max_level[0]) + return; + /* compute max_level[], max_run[] and index_run[] */ for(last=0;last<2;last++) { if (last == 0) { @@ -1292,11 +1371,20 @@ void init_rl(RLTable *rl) if (run > max_run[level]) max_run[level] = run; } - rl->max_level[last] = av_malloc(MAX_RUN + 1); + if(use_static) + rl->max_level[last] = av_mallocz_static(MAX_RUN + 1); + else + rl->max_level[last] = av_malloc(MAX_RUN + 1); memcpy(rl->max_level[last], max_level, MAX_RUN + 1); - rl->max_run[last] = av_malloc(MAX_LEVEL + 1); + if(use_static) + rl->max_run[last] = av_mallocz_static(MAX_LEVEL + 1); + else + rl->max_run[last] = av_malloc(MAX_LEVEL + 1); memcpy(rl->max_run[last], max_run, MAX_LEVEL + 1); - rl->index_run[last] = av_malloc(MAX_RUN + 1); + if(use_static) + rl->index_run[last] = av_mallocz_static(MAX_RUN + 1); + else + rl->index_run[last] = av_malloc(MAX_RUN + 1); memcpy(rl->index_run[last], index_run, MAX_RUN + 1); } } @@ -1409,7 +1497,8 @@ alloc: pic= (AVFrame*)&s->picture[i]; } - pic->reference= s->pict_type != B_TYPE && !s->dropable ? 3 : 0; + pic->reference= (s->pict_type != B_TYPE || s->codec_id == CODEC_ID_H264) + && !s->dropable ? 3 : 0; pic->coded_picture_number= s->coded_picture_number++; @@ -1472,7 +1561,7 @@ alloc: if(s->mpeg_quant || s->codec_id == CODEC_ID_MPEG2VIDEO){ s->dct_unquantize_intra = s->dct_unquantize_mpeg2_intra; s->dct_unquantize_inter = s->dct_unquantize_mpeg2_inter; - }else if(s->out_format == FMT_H263){ + }else if(s->out_format == FMT_H263 || s->out_format == FMT_H261){ s->dct_unquantize_intra = s->dct_unquantize_h263_intra; s->dct_unquantize_inter = s->dct_unquantize_h263_inter; }else{ @@ -1504,7 +1593,7 @@ void MPV_frame_end(MpegEncContext *s) XVMC_field_end(s); }else #endif - if(s->unrestricted_mv && s->pict_type != B_TYPE && !s->intra_only && !(s->flags&CODEC_FLAG_EMU_EDGE)) { + if(s->unrestricted_mv && s->current_picture.reference && !s->intra_only && !(s->flags&CODEC_FLAG_EMU_EDGE)) { draw_edges(s->current_picture.data[0], s->linesize , s->h_edge_pos , s->v_edge_pos , EDGE_WIDTH ); draw_edges(s->current_picture.data[1], s->uvlinesize, s->h_edge_pos>>1, s->v_edge_pos>>1, EDGE_WIDTH/2); draw_edges(s->current_picture.data[2], s->uvlinesize, s->h_edge_pos>>1, s->v_edge_pos>>1, EDGE_WIDTH/2); @@ -1540,6 +1629,7 @@ void MPV_frame_end(MpegEncContext *s) memset(&s->next_picture, 0, sizeof(Picture)); memset(&s->current_picture, 0, sizeof(Picture)); #endif + s->avctx->coded_frame= (AVFrame*)s->current_picture_ptr; } /** @@ -1713,11 +1803,15 @@ void ff_print_debug_info(MpegEncContext *s, AVFrame *pict){ uint8_t *ptr; int i; int h_chroma_shift, v_chroma_shift; + const int width = s->avctx->width; + const int height= s->avctx->height; + const int mv_sample_log2= 4 - pict->motion_subsample_log2; + const int mv_stride= (s->mb_width << mv_sample_log2) + 1; s->low_delay=0; //needed to see the vectors without trashing the buffers avcodec_get_chroma_sub_sample(s->avctx->pix_fmt, &h_chroma_shift, &v_chroma_shift); for(i=0; i<3; i++){ - memcpy(s->visualization_buffer[i], pict->data[i], (i==0) ? pict->linesize[i]*s->height:pict->linesize[i]*s->height >> v_chroma_shift); + memcpy(s->visualization_buffer[i], pict->data[i], (i==0) ? pict->linesize[i]*height:pict->linesize[i]*height >> v_chroma_shift); pict->data[i]= s->visualization_buffer[i]; } pict->type= FF_BUFFER_TYPE_COPY; @@ -1748,38 +1842,51 @@ void ff_print_debug_info(MpegEncContext *s, AVFrame *pict){ if(!USES_LIST(pict->mb_type[mb_index], direction)) continue; - //FIXME for h264 if(IS_8X8(pict->mb_type[mb_index])){ int i; for(i=0; i<4; i++){ int sx= mb_x*16 + 4 + 8*(i&1); int sy= mb_y*16 + 4 + 8*(i>>1); - int xy= mb_x*2 + (i&1) + (mb_y*2 + (i>>1))*s->b8_stride; + int xy= (mb_x*2 + (i&1) + (mb_y*2 + (i>>1))*mv_stride) << (mv_sample_log2-1); int mx= (pict->motion_val[direction][xy][0]>>shift) + sx; int my= (pict->motion_val[direction][xy][1]>>shift) + sy; - draw_arrow(ptr, sx, sy, mx, my, s->width, s->height, s->linesize, 100); + draw_arrow(ptr, sx, sy, mx, my, width, height, s->linesize, 100); } }else if(IS_16X8(pict->mb_type[mb_index])){ int i; for(i=0; i<2; i++){ int sx=mb_x*16 + 8; int sy=mb_y*16 + 4 + 8*i; - int xy= mb_x*2 + (mb_y*2 + i)*s->b8_stride; + int xy= (mb_x*2 + (mb_y*2 + i)*mv_stride) << (mv_sample_log2-1); + int mx=(pict->motion_val[direction][xy][0]>>shift); + int my=(pict->motion_val[direction][xy][1]>>shift); + + if(IS_INTERLACED(pict->mb_type[mb_index])) + my*=2; + + draw_arrow(ptr, sx, sy, mx+sx, my+sy, width, height, s->linesize, 100); + } + }else if(IS_8X16(pict->mb_type[mb_index])){ + int i; + for(i=0; i<2; i++){ + int sx=mb_x*16 + 4 + 8*i; + int sy=mb_y*16 + 8; + int xy= (mb_x*2 + i + mb_y*2*mv_stride) << (mv_sample_log2-1); int mx=(pict->motion_val[direction][xy][0]>>shift); int my=(pict->motion_val[direction][xy][1]>>shift); if(IS_INTERLACED(pict->mb_type[mb_index])) my*=2; - draw_arrow(ptr, sx, sy, mx+sx, my+sy, s->width, s->height, s->linesize, 100); + draw_arrow(ptr, sx, sy, mx+sx, my+sy, width, height, s->linesize, 100); } }else{ int sx= mb_x*16 + 8; int sy= mb_y*16 + 8; - int xy= mb_x*2 + mb_y*2*s->b8_stride; + int xy= (mb_x + mb_y*mv_stride) << mv_sample_log2; int mx= (pict->motion_val[direction][xy][0]>>shift) + sx; int my= (pict->motion_val[direction][xy][1]>>shift) + sy; - draw_arrow(ptr, sx, sy, mx, my, s->width, s->height, s->linesize, 100); + draw_arrow(ptr, sx, sy, mx, my, width, height, s->linesize, 100); } } } @@ -1842,6 +1949,21 @@ v= (int)(128 + r*sin(theta*3.141592/180)); for(y=0; y<16; y++) pict->data[0][16*mb_x + 8 + (16*mb_y + y)*pict->linesize[0]]^= 0x80; } + if(IS_8X8(mb_type) && mv_sample_log2 >= 2){ + int dm= 1 << (mv_sample_log2-2); + for(i=0; i<4; i++){ + int sx= mb_x*16 + 8*(i&1); + int sy= mb_y*16 + 8*(i>>1); + int xy= (mb_x*2 + (i&1) + (mb_y*2 + (i>>1))*mv_stride) << (mv_sample_log2-1); + //FIXME bidir + int32_t *mv = (int32_t*)&pict->motion_val[0][xy]; + if(mv[0] != mv[dm] || mv[dm*mv_stride] != mv[dm*(mv_stride+1)]) + for(y=0; y<8; y++) + pict->data[0][sx + 4 + (sy + y)*pict->linesize[0]]^= 0x80; + if(mv[0] != mv[dm*mv_stride] || mv[dm] != mv[dm*(mv_stride+1)]) + *(uint64_t*)(pict->data[0] + sx + (sy + 4)*pict->linesize[0])^= 0x8080808080808080ULL; + } + } if(IS_INTERLACED(mb_type) && s->codec_id == CODEC_ID_H264){ // hmm @@ -1891,10 +2013,37 @@ static int get_intra_count(MpegEncContext *s, uint8_t *src, uint8_t *ref, int st static int load_input_picture(MpegEncContext *s, AVFrame *pic_arg){ AVFrame *pic=NULL; + int64_t pts; int i; const int encoding_delay= s->max_b_frames; int direct=1; + if(pic_arg){ + pts= pic_arg->pts; + pic_arg->display_picture_number= s->input_picture_number++; + + if(pts != AV_NOPTS_VALUE){ + if(s->user_specified_pts != AV_NOPTS_VALUE){ + int64_t time= av_rescale(pts, s->avctx->frame_rate, s->avctx->frame_rate_base*(int64_t)AV_TIME_BASE); + int64_t last= av_rescale(s->user_specified_pts, s->avctx->frame_rate, s->avctx->frame_rate_base*(int64_t)AV_TIME_BASE); + + if(time <= last){ + av_log(s->avctx, AV_LOG_ERROR, "Error, Invalid timestamp=%Ld, last=%Ld\n", pts, s->user_specified_pts); + return -1; + } + } + s->user_specified_pts= pts; + }else{ + if(s->user_specified_pts != AV_NOPTS_VALUE){ + s->user_specified_pts= + pts= s->user_specified_pts + AV_TIME_BASE*(int64_t)s->avctx->frame_rate_base / s->avctx->frame_rate; + av_log(s->avctx, AV_LOG_INFO, "Warning: AVFrame.pts=? trying to guess (%Ld)\n", pts); + }else{ + pts= av_rescale(pic_arg->display_picture_number*(int64_t)s->avctx->frame_rate_base, AV_TIME_BASE, s->avctx->frame_rate); + } + } + } + if(pic_arg){ if(encoding_delay && !(s->flags&CODEC_FLAG_INPUT_PRESERVED)) direct=0; if(pic_arg->linesize[0] != s->linesize) direct=0; @@ -1954,18 +2103,7 @@ static int load_input_picture(MpegEncContext *s, AVFrame *pic_arg){ } } copy_picture_attributes(s, pic, pic_arg); - - pic->display_picture_number= s->input_picture_number++; - if(pic->pts != AV_NOPTS_VALUE){ - s->user_specified_pts= pic->pts; - }else{ - if(s->user_specified_pts){ - pic->pts= s->user_specified_pts + AV_TIME_BASE*(int64_t)s->avctx->frame_rate_base / s->avctx->frame_rate; - av_log(s->avctx, AV_LOG_INFO, "Warning: AVFrame.pts=? trying to guess (%Ld)\n", pic->pts); - }else{ - pic->pts= av_rescale(pic->display_picture_number*(int64_t)s->avctx->frame_rate_base, AV_TIME_BASE, s->avctx->frame_rate); - } - } + pic->pts= pts; //we set this here to avoid modifiying pic_arg } /* shift buffer entries */ @@ -1977,6 +2115,38 @@ static int load_input_picture(MpegEncContext *s, AVFrame *pic_arg){ return 0; } +static int skip_check(MpegEncContext *s, Picture *p, Picture *ref){ + int x, y, plane; + int score=0; + int64_t score64=0; + + for(plane=0; plane<3; plane++){ + const int stride= p->linesize[plane]; + const int bw= plane ? 1 : 2; + for(y=0; y<s->mb_height*bw; y++){ + for(x=0; x<s->mb_width*bw; x++){ + int v= s->dsp.frame_skip_cmp[1](s, p->data[plane] + 8*(x + y*stride), ref->data[plane] + 8*(x + y*stride), stride, 8); + + switch(s->avctx->frame_skip_exp){ + case 0: score= FFMAX(score, v); break; + case 1: score+= ABS(v);break; + case 2: score+= v*v;break; + case 3: score64+= ABS(v*v*(int64_t)v);break; + case 4: score64+= v*v*(int64_t)(v*v);break; + } + } + } + } + + if(score) score64= score; + + if(score64 < s->avctx->frame_skip_threshold) + return 1; + if(score64 < ((s->avctx->frame_skip_factor * (int64_t)s->lambda)>>8)) + return 1; + return 0; +} + static void select_input_picture(MpegEncContext *s){ int i; @@ -1992,28 +2162,43 @@ static void select_input_picture(MpegEncContext *s){ s->reordered_input_picture[0]->coded_picture_number= s->coded_picture_number++; }else{ int b_frames; + + if(s->avctx->frame_skip_threshold || s->avctx->frame_skip_factor){ + if(skip_check(s, s->input_picture[0], s->next_picture_ptr)){ +//av_log(NULL, AV_LOG_DEBUG, "skip %p %Ld\n", s->input_picture[0]->data[0], s->input_picture[0]->pts); + + if(s->input_picture[0]->type == FF_BUFFER_TYPE_SHARED){ + for(i=0; i<4; i++) + s->input_picture[0]->data[i]= NULL; + s->input_picture[0]->type= 0; + }else{ + assert( s->input_picture[0]->type==FF_BUFFER_TYPE_USER + || s->input_picture[0]->type==FF_BUFFER_TYPE_INTERNAL); + s->avctx->release_buffer(s->avctx, (AVFrame*)s->input_picture[0]); + } + + goto no_output_pic; + } + } + if(s->flags&CODEC_FLAG_PASS2){ for(i=0; i<s->max_b_frames+1; i++){ int pict_num= s->input_picture[0]->display_picture_number + i; - int pict_type= s->rc_context.entry[pict_num].new_pict_type; - s->input_picture[i]->pict_type= pict_type; - - if(i + 1 >= s->rc_context.num_entries) break; + + if(pict_num >= s->rc_context.num_entries) + break; + if(!s->input_picture[i]){ + s->rc_context.entry[pict_num-1].new_pict_type = P_TYPE; + break; + } + + s->input_picture[i]->pict_type= + s->rc_context.entry[pict_num].new_pict_type; } } - if(s->input_picture[0]->pict_type){ - /* user selected pict_type */ - for(b_frames=0; b_frames<s->max_b_frames+1; b_frames++){ - if(s->input_picture[b_frames]->pict_type!=B_TYPE) break; - } - - if(b_frames > s->max_b_frames){ - av_log(s->avctx, AV_LOG_ERROR, "warning, too many bframes in a row\n"); - b_frames = s->max_b_frames; - } - }else if(s->avctx->b_frame_strategy==0){ + if(s->avctx->b_frame_strategy==0){ b_frames= s->max_b_frames; while(b_frames && !s->input_picture[b_frames]) b_frames--; }else if(s->avctx->b_frame_strategy==1){ @@ -2043,10 +2228,24 @@ static void select_input_picture(MpegEncContext *s){ //static int b_count=0; //b_count+= b_frames; //av_log(s->avctx, AV_LOG_DEBUG, "b_frames: %d\n", b_count); + + for(i= b_frames - 1; i>=0; i--){ + int type= s->input_picture[i]->pict_type; + if(type && type != B_TYPE) + b_frames= i; + } + if(s->input_picture[b_frames]->pict_type == B_TYPE && b_frames == s->max_b_frames){ + av_log(s->avctx, AV_LOG_ERROR, "warning, too many bframes in a row\n"); + } + if(s->picture_in_gop_number + b_frames >= s->gop_size){ + if((s->flags2 & CODEC_FLAG2_STRICT_GOP) && s->gop_size > s->picture_in_gop_number){ + b_frames= s->gop_size - s->picture_in_gop_number - 1; + }else{ if(s->flags & CODEC_FLAG_CLOSED_GOP) b_frames=0; s->input_picture[b_frames]->pict_type= I_TYPE; + } } if( (s->flags & CODEC_FLAG_CLOSED_GOP) @@ -2065,7 +2264,7 @@ static void select_input_picture(MpegEncContext *s){ } } } - +no_output_pic: if(s->reordered_input_picture[0]){ s->reordered_input_picture[0]->reference= s->reordered_input_picture[0]->pict_type!=B_TYPE ? 3 : 0; @@ -2116,7 +2315,7 @@ int MPV_encode_picture(AVCodecContext *avctx, AVFrame *pic_arg = data; int i, stuffing_count; - if(avctx->pix_fmt != PIX_FMT_YUV420P){ + if(avctx->pix_fmt != PIX_FMT_YUV420P && avctx->pix_fmt != PIX_FMT_YUVJ420P){ av_log(avctx, AV_LOG_ERROR, "this codec supports only YUV420P\n"); return -1; } @@ -2133,7 +2332,8 @@ int MPV_encode_picture(AVCodecContext *avctx, s->picture_in_gop_number++; - load_input_picture(s, pic_arg); + if(load_input_picture(s, pic_arg) < 0) + return -1; select_input_picture(s); @@ -2171,11 +2371,18 @@ int MPV_encode_picture(AVCodecContext *avctx, avctx->error[i] += s->current_picture_ptr->error[i]; } + if(s->flags&CODEC_FLAG_PASS1) + assert(avctx->header_bits + avctx->mv_bits + avctx->misc_bits + avctx->i_tex_bits + avctx->p_tex_bits == put_bits_count(&s->pb)); flush_put_bits(&s->pb); s->frame_bits = put_bits_count(&s->pb); stuffing_count= ff_vbv_update(s, s->frame_bits); if(stuffing_count){ + if(s->pb.buf_end - s->pb.buf - (put_bits_count(&s->pb)>>3) < stuffing_count + 50){ + av_log(s->avctx, AV_LOG_ERROR, "stuffing too large\n"); + return -1; + } + switch(s->codec_id){ case CODEC_ID_MPEG1VIDEO: case CODEC_ID_MPEG2VIDEO: @@ -2480,8 +2687,50 @@ static inline int hpel_motion(MpegEncContext *s, return emu; } +static inline int hpel_motion_lowres(MpegEncContext *s, + uint8_t *dest, uint8_t *src, + int field_based, int field_select, + int src_x, int src_y, + int width, int height, int stride, + int h_edge_pos, int v_edge_pos, + int w, int h, h264_chroma_mc_func *pix_op, + int motion_x, int motion_y) +{ + const int lowres= s->avctx->lowres; + const int s_mask= (2<<lowres)-1; + int emu=0; + int sx, sy; + + if(s->quarter_sample){ + motion_x/=2; + motion_y/=2; + } + + sx= motion_x & s_mask; + sy= motion_y & s_mask; + src_x += motion_x >> (lowres+1); + src_y += motion_y >> (lowres+1); + + src += src_y * stride + src_x; + + if( (unsigned)src_x > h_edge_pos - (!!sx) - w + || (unsigned)src_y >(v_edge_pos >> field_based) - (!!sy) - h){ + ff_emulated_edge_mc(s->edge_emu_buffer, src, s->linesize, w+1, (h+1)<<field_based, + src_x, src_y<<field_based, h_edge_pos, v_edge_pos); + src= s->edge_emu_buffer; + emu=1; + } + + sx <<= 2 - lowres; + sy <<= 2 - lowres; + if(field_select) + src += s->linesize; + pix_op[lowres](dest, src, stride, h, sx, sy); + return emu; +} + /* apply one mpeg motion vector to the three components */ -static inline void mpeg_motion(MpegEncContext *s, +static always_inline void mpeg_motion(MpegEncContext *s, uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr, int field_based, int bottom_field, int field_select, uint8_t **ref_picture, op_pixels_func (*pix_op)[4], @@ -2504,7 +2753,7 @@ if(s->quarter_sample) dxy = ((motion_y & 1) << 1) | (motion_x & 1); src_x = s->mb_x* 16 + (motion_x >> 1); - src_y = s->mb_y*(16>>field_based) + (motion_y >> 1); + src_y =(s->mb_y<<(4-field_based)) + (motion_y >> 1); if (s->out_format == FMT_H263) { if((s->workaround_bugs & FF_BUG_HPEL_CHROMA) && field_based){ @@ -2512,18 +2761,39 @@ if(s->quarter_sample) my = motion_y >>1; uvdxy = ((my & 1) << 1) | (mx & 1); uvsrc_x = s->mb_x* 8 + (mx >> 1); - uvsrc_y = s->mb_y*(8>>field_based) + (my >> 1); + uvsrc_y = (s->mb_y<<(3-field_based)) + (my >> 1); }else{ uvdxy = dxy | (motion_y & 2) | ((motion_x & 2) >> 1); uvsrc_x = src_x>>1; uvsrc_y = src_y>>1; } + }else if(s->out_format == FMT_H261){//even chroma mv's are full pel in H261 + mx = motion_x / 4; + my = motion_y / 4; + uvdxy = 0; + uvsrc_x = s->mb_x*8 + mx; + uvsrc_y = s->mb_y*8 + my; } else { - mx = motion_x / 2; - my = motion_y / 2; - uvdxy = ((my & 1) << 1) | (mx & 1); - uvsrc_x = s->mb_x* 8 + (mx >> 1); - uvsrc_y = s->mb_y*(8>>field_based) + (my >> 1); + if(s->chroma_y_shift){ + mx = motion_x / 2; + my = motion_y / 2; + uvdxy = ((my & 1) << 1) | (mx & 1); + uvsrc_x = s->mb_x* 8 + (mx >> 1); + uvsrc_y = (s->mb_y<<(3-field_based)) + (my >> 1); + } else { + if(s->chroma_x_shift){ + //Chroma422 + mx = motion_x / 2; + uvdxy = ((motion_y & 1) << 1) | (mx & 1); + uvsrc_x = s->mb_x* 8 + (mx >> 1); + uvsrc_y = src_y; + } else { + //Chroma444 + uvdxy = dxy; + uvsrc_x = src_x; + uvsrc_y = src_y; + } + } } ptr_y = ref_picture[0] + src_y * linesize + src_x; @@ -2532,6 +2802,11 @@ if(s->quarter_sample) if( (unsigned)src_x > s->h_edge_pos - (motion_x&1) - 16 || (unsigned)src_y > v_edge_pos - (motion_y&1) - h){ + if(s->codec_id == CODEC_ID_MPEG2VIDEO || + s->codec_id == CODEC_ID_MPEG1VIDEO){ + av_log(s->avctx,AV_LOG_DEBUG,"MPEG motion vector out of boundary\n"); + return ; + } ff_emulated_edge_mc(s->edge_emu_buffer, ptr_y, s->linesize, 17, 17+field_based, src_x, src_y<<field_based, s->h_edge_pos, s->v_edge_pos); ptr_y = s->edge_emu_buffer; @@ -2561,10 +2836,111 @@ if(s->quarter_sample) pix_op[0][dxy](dest_y, ptr_y, linesize, h); if(!(s->flags&CODEC_FLAG_GRAY)){ - pix_op[1][uvdxy](dest_cb, ptr_cb, uvlinesize, h >> 1); - pix_op[1][uvdxy](dest_cr, ptr_cr, uvlinesize, h >> 1); + pix_op[s->chroma_x_shift][uvdxy](dest_cb, ptr_cb, uvlinesize, h >> s->chroma_y_shift); + pix_op[s->chroma_x_shift][uvdxy](dest_cr, ptr_cr, uvlinesize, h >> s->chroma_y_shift); + } + if(s->out_format == FMT_H261){ + ff_h261_loop_filter(s); + } +} + +/* apply one mpeg motion vector to the three components */ +static always_inline void mpeg_motion_lowres(MpegEncContext *s, + uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr, + int field_based, int bottom_field, int field_select, + uint8_t **ref_picture, h264_chroma_mc_func *pix_op, + int motion_x, int motion_y, int h) +{ + uint8_t *ptr_y, *ptr_cb, *ptr_cr; + int mx, my, src_x, src_y, uvsrc_x, uvsrc_y, uvlinesize, linesize, sx, sy, uvsx, uvsy; + const int lowres= s->avctx->lowres; + const int block_s= 8>>lowres; + const int s_mask= (2<<lowres)-1; + const int h_edge_pos = s->h_edge_pos >> lowres; + const int v_edge_pos = s->v_edge_pos >> lowres; + linesize = s->current_picture.linesize[0] << field_based; + uvlinesize = s->current_picture.linesize[1] << field_based; + + if(s->quarter_sample){ //FIXME obviously not perfect but qpel wont work in lowres anyway + motion_x/=2; + motion_y/=2; + } + + if(field_based){ + motion_y += (bottom_field - field_select)*((1<<lowres)-1); + } + + sx= motion_x & s_mask; + sy= motion_y & s_mask; + src_x = s->mb_x*2*block_s + (motion_x >> (lowres+1)); + src_y =(s->mb_y*2*block_s>>field_based) + (motion_y >> (lowres+1)); + + if (s->out_format == FMT_H263) { + uvsx = ((motion_x>>1) & s_mask) | (sx&1); + uvsy = ((motion_y>>1) & s_mask) | (sy&1); + uvsrc_x = src_x>>1; + uvsrc_y = src_y>>1; + }else if(s->out_format == FMT_H261){//even chroma mv's are full pel in H261 + mx = motion_x / 4; + my = motion_y / 4; + uvsx = (2*mx) & s_mask; + uvsy = (2*my) & s_mask; + uvsrc_x = s->mb_x*block_s + (mx >> lowres); + uvsrc_y = s->mb_y*block_s + (my >> lowres); + } else { + mx = motion_x / 2; + my = motion_y / 2; + uvsx = mx & s_mask; + uvsy = my & s_mask; + uvsrc_x = s->mb_x*block_s + (mx >> (lowres+1)); + uvsrc_y =(s->mb_y*block_s>>field_based) + (my >> (lowres+1)); + } + + ptr_y = ref_picture[0] + src_y * linesize + src_x; + ptr_cb = ref_picture[1] + uvsrc_y * uvlinesize + uvsrc_x; + ptr_cr = ref_picture[2] + uvsrc_y * uvlinesize + uvsrc_x; + + if( (unsigned)src_x > h_edge_pos - (!!sx) - 2*block_s + || (unsigned)src_y >(v_edge_pos >> field_based) - (!!sy) - h){ + ff_emulated_edge_mc(s->edge_emu_buffer, ptr_y, s->linesize, 17, 17+field_based, + src_x, src_y<<field_based, h_edge_pos, v_edge_pos); + ptr_y = s->edge_emu_buffer; + if(!(s->flags&CODEC_FLAG_GRAY)){ + uint8_t *uvbuf= s->edge_emu_buffer+18*s->linesize; + ff_emulated_edge_mc(uvbuf , ptr_cb, s->uvlinesize, 9, 9+field_based, + uvsrc_x, uvsrc_y<<field_based, h_edge_pos>>1, v_edge_pos>>1); + ff_emulated_edge_mc(uvbuf+16, ptr_cr, s->uvlinesize, 9, 9+field_based, + uvsrc_x, uvsrc_y<<field_based, h_edge_pos>>1, v_edge_pos>>1); + ptr_cb= uvbuf; + ptr_cr= uvbuf+16; + } + } + + if(bottom_field){ //FIXME use this for field pix too instead of the obnoxious hack which changes picture.data + dest_y += s->linesize; + dest_cb+= s->uvlinesize; + dest_cr+= s->uvlinesize; + } + + if(field_select){ + ptr_y += s->linesize; + ptr_cb+= s->uvlinesize; + ptr_cr+= s->uvlinesize; } + + sx <<= 2 - lowres; + sy <<= 2 - lowres; + pix_op[lowres-1](dest_y, ptr_y, linesize, h, sx, sy); + + if(!(s->flags&CODEC_FLAG_GRAY)){ + uvsx <<= 2 - lowres; + uvsy <<= 2 - lowres; + pix_op[lowres](dest_cb, ptr_cb, uvlinesize, h >> s->chroma_y_shift, uvsx, uvsy); + pix_op[lowres](dest_cr, ptr_cr, uvlinesize, h >> s->chroma_y_shift, uvsx, uvsy); + } + //FIXME h261 lowres loop filter } + //FIXME move to dsputil, avg variant, 16x16 version static inline void put_obmc(uint8_t *dst, uint8_t *src[5], int stride){ int x; @@ -2792,6 +3168,56 @@ static inline void chroma_4mv_motion(MpegEncContext *s, pix_op[dxy](dest_cr, ptr, s->uvlinesize, 8); } +static inline void chroma_4mv_motion_lowres(MpegEncContext *s, + uint8_t *dest_cb, uint8_t *dest_cr, + uint8_t **ref_picture, + h264_chroma_mc_func *pix_op, + int mx, int my){ + const int lowres= s->avctx->lowres; + const int block_s= 8>>lowres; + const int s_mask= (2<<lowres)-1; + const int h_edge_pos = s->h_edge_pos >> (lowres+1); + const int v_edge_pos = s->v_edge_pos >> (lowres+1); + int emu=0, src_x, src_y, offset, sx, sy; + uint8_t *ptr; + + if(s->quarter_sample){ + mx/=2; + my/=2; + } + + /* In case of 8X8, we construct a single chroma motion vector + with a special rounding */ + mx= ff_h263_round_chroma(mx); + my= ff_h263_round_chroma(my); + + sx= mx & s_mask; + sy= my & s_mask; + src_x = s->mb_x*block_s + (mx >> (lowres+1)); + src_y = s->mb_y*block_s + (my >> (lowres+1)); + + offset = src_y * s->uvlinesize + src_x; + ptr = ref_picture[1] + offset; + if(s->flags&CODEC_FLAG_EMU_EDGE){ + if( (unsigned)src_x > h_edge_pos - (!!sx) - block_s + || (unsigned)src_y > v_edge_pos - (!!sy) - block_s){ + ff_emulated_edge_mc(s->edge_emu_buffer, ptr, s->uvlinesize, 9, 9, src_x, src_y, h_edge_pos, v_edge_pos); + ptr= s->edge_emu_buffer; + emu=1; + } + } + sx <<= 2 - lowres; + sy <<= 2 - lowres; + pix_op[lowres](dest_cb, ptr, s->uvlinesize, block_s, sx, sy); + + ptr = ref_picture[2] + offset; + if(emu){ + ff_emulated_edge_mc(s->edge_emu_buffer, ptr, s->uvlinesize, 9, 9, src_x, src_y, h_edge_pos, v_edge_pos); + ptr= s->edge_emu_buffer; + } + pix_op[lowres](dest_cr, ptr, s->uvlinesize, block_s, sx, sy); +} + /** * motion compesation of a single macroblock * @param s context @@ -2879,7 +3305,6 @@ static inline void MPV_motion(MpegEncContext *s, switch(s->mv_type) { case MV_TYPE_16X16: -#ifdef CONFIG_RISKY if(s->mcsel){ if(s->real_sprite_warping_points==1){ gmc1_motion(s, dest_y, dest_cb, dest_cr, @@ -2898,7 +3323,6 @@ static inline void MPV_motion(MpegEncContext *s, ref_picture, pix_op, s->mv[dir][0][0], s->mv[dir][0][1], 16); }else -#endif { mpeg_motion(s, dest_y, dest_cb, dest_cr, 0, 0, 0, @@ -3006,8 +3430,8 @@ static inline void MPV_motion(MpegEncContext *s, s->mv[dir][i][0], s->mv[dir][i][1] + 16*i, 8); dest_y += 16*s->linesize; - dest_cb+= 8*s->uvlinesize; - dest_cr+= 8*s->uvlinesize; + dest_cb+= (16>>s->chroma_y_shift)*s->uvlinesize; + dest_cr+= (16>>s->chroma_y_shift)*s->uvlinesize; } break; case MV_TYPE_DMV: @@ -3043,6 +3467,131 @@ static inline void MPV_motion(MpegEncContext *s, } } +/** + * motion compesation of a single macroblock + * @param s context + * @param dest_y luma destination pointer + * @param dest_cb chroma cb/u destination pointer + * @param dest_cr chroma cr/v destination pointer + * @param dir direction (0->forward, 1->backward) + * @param ref_picture array[3] of pointers to the 3 planes of the reference picture + * @param pic_op halfpel motion compensation function (average or put normally) + * the motion vectors are taken from s->mv and the MV type from s->mv_type + */ +static inline void MPV_motion_lowres(MpegEncContext *s, + uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr, + int dir, uint8_t **ref_picture, + h264_chroma_mc_func *pix_op) +{ + int mx, my; + int mb_x, mb_y, i; + const int lowres= s->avctx->lowres; + const int block_s= 8>>lowres; + + mb_x = s->mb_x; + mb_y = s->mb_y; + + switch(s->mv_type) { + case MV_TYPE_16X16: + mpeg_motion_lowres(s, dest_y, dest_cb, dest_cr, + 0, 0, 0, + ref_picture, pix_op, + s->mv[dir][0][0], s->mv[dir][0][1], 2*block_s); + break; + case MV_TYPE_8X8: + mx = 0; + my = 0; + for(i=0;i<4;i++) { + hpel_motion_lowres(s, dest_y + ((i & 1) + (i >> 1) * s->linesize)*block_s, + ref_picture[0], 0, 0, + (2*mb_x + (i & 1))*block_s, (2*mb_y + (i >>1))*block_s, + s->width, s->height, s->linesize, + s->h_edge_pos >> lowres, s->v_edge_pos >> lowres, + block_s, block_s, pix_op, + s->mv[dir][i][0], s->mv[dir][i][1]); + + mx += s->mv[dir][i][0]; + my += s->mv[dir][i][1]; + } + + if(!(s->flags&CODEC_FLAG_GRAY)) + chroma_4mv_motion_lowres(s, dest_cb, dest_cr, ref_picture, pix_op, mx, my); + break; + case MV_TYPE_FIELD: + if (s->picture_structure == PICT_FRAME) { + /* top field */ + mpeg_motion_lowres(s, dest_y, dest_cb, dest_cr, + 1, 0, s->field_select[dir][0], + ref_picture, pix_op, + s->mv[dir][0][0], s->mv[dir][0][1], block_s); + /* bottom field */ + mpeg_motion_lowres(s, dest_y, dest_cb, dest_cr, + 1, 1, s->field_select[dir][1], + ref_picture, pix_op, + s->mv[dir][1][0], s->mv[dir][1][1], block_s); + } else { + if(s->picture_structure != s->field_select[dir][0] + 1 && s->pict_type != B_TYPE && !s->first_field){ + ref_picture= s->current_picture_ptr->data; + } + + mpeg_motion_lowres(s, dest_y, dest_cb, dest_cr, + 0, 0, s->field_select[dir][0], + ref_picture, pix_op, + s->mv[dir][0][0], s->mv[dir][0][1], 2*block_s); + } + break; + case MV_TYPE_16X8: + for(i=0; i<2; i++){ + uint8_t ** ref2picture; + + if(s->picture_structure == s->field_select[dir][i] + 1 || s->pict_type == B_TYPE || s->first_field){ + ref2picture= ref_picture; + }else{ + ref2picture= s->current_picture_ptr->data; + } + + mpeg_motion_lowres(s, dest_y, dest_cb, dest_cr, + 0, 0, s->field_select[dir][i], + ref2picture, pix_op, + s->mv[dir][i][0], s->mv[dir][i][1] + 2*block_s*i, block_s); + + dest_y += 2*block_s*s->linesize; + dest_cb+= (2*block_s>>s->chroma_y_shift)*s->uvlinesize; + dest_cr+= (2*block_s>>s->chroma_y_shift)*s->uvlinesize; + } + break; + case MV_TYPE_DMV: + if(s->picture_structure == PICT_FRAME){ + for(i=0; i<2; i++){ + int j; + for(j=0; j<2; j++){ + mpeg_motion_lowres(s, dest_y, dest_cb, dest_cr, + 1, j, j^i, + ref_picture, pix_op, + s->mv[dir][2*i + j][0], s->mv[dir][2*i + j][1], block_s); + } + pix_op = s->dsp.avg_h264_chroma_pixels_tab; + } + }else{ + for(i=0; i<2; i++){ + mpeg_motion_lowres(s, dest_y, dest_cb, dest_cr, + 0, 0, s->picture_structure != i+1, + ref_picture, pix_op, + s->mv[dir][2*i][0],s->mv[dir][2*i][1],2*block_s); + + // after put we make avg of the same block + pix_op = s->dsp.avg_h264_chroma_pixels_tab; + + //opposite parity is always in the same frame if this is second field + if(!s->first_field){ + ref_picture = s->current_picture_ptr->data; + } + } + } + break; + default: assert(0); + } +} /* put block[] to dest[] */ static inline void put_dct(MpegEncContext *s, @@ -3114,7 +3663,7 @@ void ff_clean_intra_table_entries(MpegEncContext *s) s->mv : motion vector s->interlaced_dct : true if interlaced dct used (mpeg2) */ -void MPV_decode_mb(MpegEncContext *s, DCTELEM block[6][64]) +static always_inline void MPV_decode_mb_internal(MpegEncContext *s, DCTELEM block[12][64], int lowres_flag) { int mb_x, mb_y; const int mb_xy = s->mb_y * s->mb_stride + s->mb_x; @@ -3160,7 +3709,8 @@ void MPV_decode_mb(MpegEncContext *s, DCTELEM block[6][64]) qpel_mc_func (*op_qpix)[16]; const int linesize= s->current_picture.linesize[0]; //not s->linesize as this woulnd be wrong for field pics const int uvlinesize= s->current_picture.linesize[1]; - const int readable= s->pict_type != B_TYPE || s->encoding || s->avctx->draw_horiz_band; + const int readable= s->pict_type != B_TYPE || s->encoding || s->avctx->draw_horiz_band || lowres_flag; + const int block_size= lowres_flag ? 8>>s->avctx->lowres : 8; /* avoid copy if macroblock skipped in last frame too */ /* skip only during decoding as we might trash the buffers during encoding a bit */ @@ -3188,14 +3738,10 @@ void MPV_decode_mb(MpegEncContext *s, DCTELEM block[6][64]) *mbskip_ptr = 0; /* not skipped */ } } - - if (s->interlaced_dct) { - dct_linesize = linesize * 2; - dct_offset = linesize; - } else { - dct_linesize = linesize; - dct_offset = linesize * 8; - } + + dct_linesize = linesize << s->interlaced_dct; + dct_offset =(s->interlaced_dct)? linesize : linesize*block_size; + if(readable){ dest_y= s->dest[0]; dest_cb= s->dest[1]; @@ -3203,27 +3749,39 @@ void MPV_decode_mb(MpegEncContext *s, DCTELEM block[6][64]) }else{ dest_y = s->b_scratchpad; dest_cb= s->b_scratchpad+16*linesize; - dest_cr= s->b_scratchpad+16*linesize+8; + dest_cr= s->b_scratchpad+32*linesize; } + if (!s->mb_intra) { /* motion handling */ /* decoding or more than one mb_type (MC was allready done otherwise) */ if(!s->encoding){ - if ((!s->no_rounding) || s->pict_type==B_TYPE){ - op_pix = s->dsp.put_pixels_tab; - op_qpix= s->dsp.put_qpel_pixels_tab; - }else{ - op_pix = s->dsp.put_no_rnd_pixels_tab; - op_qpix= s->dsp.put_no_rnd_qpel_pixels_tab; - } + if(lowres_flag){ + h264_chroma_mc_func *op_pix = s->dsp.put_h264_chroma_pixels_tab; - if (s->mv_dir & MV_DIR_FORWARD) { - MPV_motion(s, dest_y, dest_cb, dest_cr, 0, s->last_picture.data, op_pix, op_qpix); - op_pix = s->dsp.avg_pixels_tab; - op_qpix= s->dsp.avg_qpel_pixels_tab; - } - if (s->mv_dir & MV_DIR_BACKWARD) { - MPV_motion(s, dest_y, dest_cb, dest_cr, 1, s->next_picture.data, op_pix, op_qpix); + if (s->mv_dir & MV_DIR_FORWARD) { + MPV_motion_lowres(s, dest_y, dest_cb, dest_cr, 0, s->last_picture.data, op_pix); + op_pix = s->dsp.avg_h264_chroma_pixels_tab; + } + if (s->mv_dir & MV_DIR_BACKWARD) { + MPV_motion_lowres(s, dest_y, dest_cb, dest_cr, 1, s->next_picture.data, op_pix); + } + }else{ + if ((!s->no_rounding) || s->pict_type==B_TYPE){ + op_pix = s->dsp.put_pixels_tab; + op_qpix= s->dsp.put_qpel_pixels_tab; + }else{ + op_pix = s->dsp.put_no_rnd_pixels_tab; + op_qpix= s->dsp.put_no_rnd_qpel_pixels_tab; + } + if (s->mv_dir & MV_DIR_FORWARD) { + MPV_motion(s, dest_y, dest_cb, dest_cr, 0, s->last_picture.data, op_pix, op_qpix); + op_pix = s->dsp.avg_pixels_tab; + op_qpix= s->dsp.avg_qpel_pixels_tab; + } + if (s->mv_dir & MV_DIR_BACKWARD) { + MPV_motion(s, dest_y, dest_cb, dest_cr, 1, s->next_picture.data, op_pix, op_qpix); + } } } @@ -3233,63 +3791,100 @@ void MPV_decode_mb(MpegEncContext *s, DCTELEM block[6][64]) /* add dct residue */ if(s->encoding || !( s->h263_msmpeg4 || s->codec_id==CODEC_ID_MPEG1VIDEO || s->codec_id==CODEC_ID_MPEG2VIDEO || (s->codec_id==CODEC_ID_MPEG4 && !s->mpeg_quant))){ - add_dequant_dct(s, block[0], 0, dest_y, dct_linesize, s->qscale); - add_dequant_dct(s, block[1], 1, dest_y + 8, dct_linesize, s->qscale); - add_dequant_dct(s, block[2], 2, dest_y + dct_offset, dct_linesize, s->qscale); - add_dequant_dct(s, block[3], 3, dest_y + dct_offset + 8, dct_linesize, s->qscale); + add_dequant_dct(s, block[0], 0, dest_y , dct_linesize, s->qscale); + add_dequant_dct(s, block[1], 1, dest_y + block_size, dct_linesize, s->qscale); + add_dequant_dct(s, block[2], 2, dest_y + dct_offset , dct_linesize, s->qscale); + add_dequant_dct(s, block[3], 3, dest_y + dct_offset + block_size, dct_linesize, s->qscale); if(!(s->flags&CODEC_FLAG_GRAY)){ add_dequant_dct(s, block[4], 4, dest_cb, uvlinesize, s->chroma_qscale); add_dequant_dct(s, block[5], 5, dest_cr, uvlinesize, s->chroma_qscale); } } else if(s->codec_id != CODEC_ID_WMV2){ - add_dct(s, block[0], 0, dest_y, dct_linesize); - add_dct(s, block[1], 1, dest_y + 8, dct_linesize); - add_dct(s, block[2], 2, dest_y + dct_offset, dct_linesize); - add_dct(s, block[3], 3, dest_y + dct_offset + 8, dct_linesize); + add_dct(s, block[0], 0, dest_y , dct_linesize); + add_dct(s, block[1], 1, dest_y + block_size, dct_linesize); + add_dct(s, block[2], 2, dest_y + dct_offset , dct_linesize); + add_dct(s, block[3], 3, dest_y + dct_offset + block_size, dct_linesize); if(!(s->flags&CODEC_FLAG_GRAY)){ - add_dct(s, block[4], 4, dest_cb, uvlinesize); - add_dct(s, block[5], 5, dest_cr, uvlinesize); - } - } -#ifdef CONFIG_RISKY + if(s->chroma_y_shift){//Chroma420 + add_dct(s, block[4], 4, dest_cb, uvlinesize); + add_dct(s, block[5], 5, dest_cr, uvlinesize); + }else{ + //chroma422 + dct_linesize = uvlinesize << s->interlaced_dct; + dct_offset =(s->interlaced_dct)? uvlinesize : uvlinesize*8; + + add_dct(s, block[4], 4, dest_cb, dct_linesize); + add_dct(s, block[5], 5, dest_cr, dct_linesize); + add_dct(s, block[6], 6, dest_cb+dct_offset, dct_linesize); + add_dct(s, block[7], 7, dest_cr+dct_offset, dct_linesize); + if(!s->chroma_x_shift){//Chroma444 + add_dct(s, block[8], 8, dest_cb+8, dct_linesize); + add_dct(s, block[9], 9, dest_cr+8, dct_linesize); + add_dct(s, block[10], 10, dest_cb+8+dct_offset, dct_linesize); + add_dct(s, block[11], 11, dest_cr+8+dct_offset, dct_linesize); + } + } + }//fi gray + } else{ ff_wmv2_add_mb(s, block, dest_y, dest_cb, dest_cr); } -#endif } else { /* dct only in intra block */ if(s->encoding || !(s->codec_id==CODEC_ID_MPEG1VIDEO || s->codec_id==CODEC_ID_MPEG2VIDEO)){ - put_dct(s, block[0], 0, dest_y, dct_linesize, s->qscale); - put_dct(s, block[1], 1, dest_y + 8, dct_linesize, s->qscale); - put_dct(s, block[2], 2, dest_y + dct_offset, dct_linesize, s->qscale); - put_dct(s, block[3], 3, dest_y + dct_offset + 8, dct_linesize, s->qscale); + put_dct(s, block[0], 0, dest_y , dct_linesize, s->qscale); + put_dct(s, block[1], 1, dest_y + block_size, dct_linesize, s->qscale); + put_dct(s, block[2], 2, dest_y + dct_offset , dct_linesize, s->qscale); + put_dct(s, block[3], 3, dest_y + dct_offset + block_size, dct_linesize, s->qscale); if(!(s->flags&CODEC_FLAG_GRAY)){ put_dct(s, block[4], 4, dest_cb, uvlinesize, s->chroma_qscale); put_dct(s, block[5], 5, dest_cr, uvlinesize, s->chroma_qscale); } }else{ - s->dsp.idct_put(dest_y , dct_linesize, block[0]); - s->dsp.idct_put(dest_y + 8, dct_linesize, block[1]); - s->dsp.idct_put(dest_y + dct_offset , dct_linesize, block[2]); - s->dsp.idct_put(dest_y + dct_offset + 8, dct_linesize, block[3]); + s->dsp.idct_put(dest_y , dct_linesize, block[0]); + s->dsp.idct_put(dest_y + block_size, dct_linesize, block[1]); + s->dsp.idct_put(dest_y + dct_offset , dct_linesize, block[2]); + s->dsp.idct_put(dest_y + dct_offset + block_size, dct_linesize, block[3]); if(!(s->flags&CODEC_FLAG_GRAY)){ - s->dsp.idct_put(dest_cb, uvlinesize, block[4]); - s->dsp.idct_put(dest_cr, uvlinesize, block[5]); - } + if(s->chroma_y_shift){ + s->dsp.idct_put(dest_cb, uvlinesize, block[4]); + s->dsp.idct_put(dest_cr, uvlinesize, block[5]); + }else{ + + dct_linesize = uvlinesize << s->interlaced_dct; + dct_offset =(s->interlaced_dct)? uvlinesize : uvlinesize*8; + + s->dsp.idct_put(dest_cb, dct_linesize, block[4]); + s->dsp.idct_put(dest_cr, dct_linesize, block[5]); + s->dsp.idct_put(dest_cb + dct_offset, dct_linesize, block[6]); + s->dsp.idct_put(dest_cr + dct_offset, dct_linesize, block[7]); + if(!s->chroma_x_shift){//Chroma444 + s->dsp.idct_put(dest_cb + 8, dct_linesize, block[8]); + s->dsp.idct_put(dest_cr + 8, dct_linesize, block[9]); + s->dsp.idct_put(dest_cb + 8 + dct_offset, dct_linesize, block[10]); + s->dsp.idct_put(dest_cr + 8 + dct_offset, dct_linesize, block[11]); + } + } + }//gray } } if(!readable){ s->dsp.put_pixels_tab[0][0](s->dest[0], dest_y , linesize,16); - s->dsp.put_pixels_tab[1][0](s->dest[1], dest_cb, uvlinesize, 8); - s->dsp.put_pixels_tab[1][0](s->dest[2], dest_cr, uvlinesize, 8); + s->dsp.put_pixels_tab[s->chroma_x_shift][0](s->dest[1], dest_cb, uvlinesize,16 >> s->chroma_y_shift); + s->dsp.put_pixels_tab[s->chroma_x_shift][0](s->dest[2], dest_cr, uvlinesize,16 >> s->chroma_y_shift); } } } +void MPV_decode_mb(MpegEncContext *s, DCTELEM block[12][64]){ + if(s->avctx->lowres) MPV_decode_mb_internal(s, block, 1); + else MPV_decode_mb_internal(s, block, 0); +} + #ifdef CONFIG_ENCODERS static inline void dct_single_coeff_elimination(MpegEncContext *s, int n, int threshold) @@ -3389,7 +3984,7 @@ void ff_draw_horiz_band(MpegEncContext *s, int y, int h){ if(s->first_field && !(s->avctx->slice_flags&SLICE_FLAG_ALLOW_FIELD)) return; } - h= FFMIN(h, s->height - y); + h= FFMIN(h, s->avctx->height - y); if(s->pict_type==B_TYPE || s->low_delay || (s->avctx->slice_flags&SLICE_FLAG_CODED_ORDER)) src= (AVFrame*)s->current_picture_ptr; @@ -3406,7 +4001,7 @@ void ff_draw_horiz_band(MpegEncContext *s, int y, int h){ }else{ offset[0]= y * s->linesize;; offset[1]= - offset[2]= (y>>1) * s->uvlinesize;; + offset[2]= (y >> s->chroma_y_shift) * s->uvlinesize; offset[3]= 0; } @@ -3420,6 +4015,7 @@ void ff_draw_horiz_band(MpegEncContext *s, int y, int h){ void ff_init_block_index(MpegEncContext *s){ //FIXME maybe rename const int linesize= s->current_picture.linesize[0]; //not s->linesize as this woulnd be wrong for field pics const int uvlinesize= s->current_picture.linesize[1]; + const int mb_size= 4 - s->avctx->lowres; s->block_index[0]= s->b8_stride*(s->mb_y*2 ) - 2 + s->mb_x*2; s->block_index[1]= s->b8_stride*(s->mb_y*2 ) - 1 + s->mb_x*2; @@ -3427,16 +4023,18 @@ void ff_init_block_index(MpegEncContext *s){ //FIXME maybe rename s->block_index[3]= s->b8_stride*(s->mb_y*2 + 1) - 1 + s->mb_x*2; s->block_index[4]= s->mb_stride*(s->mb_y + 1) + s->b8_stride*s->mb_height*2 + s->mb_x - 1; s->block_index[5]= s->mb_stride*(s->mb_y + s->mb_height + 2) + s->b8_stride*s->mb_height*2 + s->mb_x - 1; - - if(s->pict_type==B_TYPE && s->avctx->draw_horiz_band && s->picture_structure==PICT_FRAME){ - s->dest[0] = s->current_picture.data[0] + s->mb_x * 16 - 16; - s->dest[1] = s->current_picture.data[1] + s->mb_x * 8 - 8; - s->dest[2] = s->current_picture.data[2] + s->mb_x * 8 - 8; - }else{ - s->dest[0] = s->current_picture.data[0] + (s->mb_y * 16* linesize ) + s->mb_x * 16 - 16; - s->dest[1] = s->current_picture.data[1] + (s->mb_y * 8 * uvlinesize) + s->mb_x * 8 - 8; - s->dest[2] = s->current_picture.data[2] + (s->mb_y * 8 * uvlinesize) + s->mb_x * 8 - 8; - } + //block_index is not used by mpeg2, so it is not affected by chroma_format + + s->dest[0] = s->current_picture.data[0] + ((s->mb_x - 1) << mb_size); + s->dest[1] = s->current_picture.data[1] + ((s->mb_x - 1) << (mb_size - s->chroma_x_shift)); + s->dest[2] = s->current_picture.data[2] + ((s->mb_x - 1) << (mb_size - s->chroma_x_shift)); + + if(!(s->pict_type==B_TYPE && s->avctx->draw_horiz_band && s->picture_structure==PICT_FRAME)) + { + s->dest[0] += s->mb_y * linesize << mb_size; + s->dest[1] += s->mb_y * uvlinesize << (mb_size - s->chroma_y_shift); + s->dest[2] += s->mb_y * uvlinesize << (mb_size - s->chroma_y_shift); + } } #ifdef CONFIG_ENCODERS @@ -3506,7 +4104,8 @@ static void encode_mb(MpegEncContext *s, int motion_x, int motion_y) } } ff_set_qscale(s, last_qp + s->dquant); - } + }else if(s->flags&CODEC_FLAG_QP_RD) + ff_set_qscale(s, s->qscale + s->dquant); wrap_y = s->linesize; wrap_c = s->uvlinesize; @@ -3515,12 +4114,13 @@ static void encode_mb(MpegEncContext *s, int motion_x, int motion_y) ptr_cr = s->new_picture.data[2] + (mb_y * 8 * wrap_c) + mb_x * 8; if(mb_x*16+16 > s->width || mb_y*16+16 > s->height){ - ff_emulated_edge_mc(s->edge_emu_buffer , ptr_y , wrap_y,16,16,mb_x*16,mb_y*16, s->width , s->height); - ptr_y= s->edge_emu_buffer; - ff_emulated_edge_mc(s->edge_emu_buffer+18*wrap_y , ptr_cb, wrap_c, 8, 8, mb_x*8, mb_y*8, s->width>>1, s->height>>1); - ptr_cb= s->edge_emu_buffer+18*wrap_y; - ff_emulated_edge_mc(s->edge_emu_buffer+18*wrap_y+9, ptr_cr, wrap_c, 8, 8, mb_x*8, mb_y*8, s->width>>1, s->height>>1); - ptr_cr= s->edge_emu_buffer+18*wrap_y+9; + uint8_t *ebuf= s->edge_emu_buffer + 32; + ff_emulated_edge_mc(ebuf , ptr_y , wrap_y,16,16,mb_x*16,mb_y*16, s->width , s->height); + ptr_y= ebuf; + ff_emulated_edge_mc(ebuf+18*wrap_y , ptr_cb, wrap_c, 8, 8, mb_x*8, mb_y*8, s->width>>1, s->height>>1); + ptr_cb= ebuf+18*wrap_y; + ff_emulated_edge_mc(ebuf+18*wrap_y+8, ptr_cr, wrap_c, 8, 8, mb_x*8, mb_y*8, s->width>>1, s->height>>1); + ptr_cr= ebuf+18*wrap_y+8; } if (s->mb_intra) { @@ -3701,7 +4301,6 @@ static void encode_mb(MpegEncContext *s, int motion_x, int motion_y) mpeg1_encode_mb(s, s->block, motion_x, motion_y); break; /* xine: do not need this for decode or MPEG-1 encoding modes */ #if 0 -#ifdef CONFIG_RISKY case CODEC_ID_MPEG4: mpeg4_encode_mb(s, s->block, motion_x, motion_y); break; case CODEC_ID_MSMPEG4V2: @@ -3710,12 +4309,14 @@ static void encode_mb(MpegEncContext *s, int motion_x, int motion_y) msmpeg4_encode_mb(s, s->block, motion_x, motion_y); break; case CODEC_ID_WMV2: ff_wmv2_encode_mb(s, s->block, motion_x, motion_y); break; + case CODEC_ID_H261: + ff_h261_encode_mb(s, s->block, motion_x, motion_y); break; case CODEC_ID_H263: case CODEC_ID_H263P: case CODEC_ID_FLV1: case CODEC_ID_RV10: + case CODEC_ID_RV20: h263_encode_mb(s, s->block, motion_x, motion_y); break; -#endif case CODEC_ID_MJPEG: mjpeg_encode_mb(s, s->block); break; #endif /* #if 0 */ @@ -3740,6 +4341,8 @@ void ff_mpeg_flush(AVCodecContext *avctx){ } s->current_picture_ptr = s->last_picture_ptr = s->next_picture_ptr = NULL; + s->mb_x= s->mb_y= 0; + s->parse_context.state= -1; s->parse_context.frame_start_found= 0; s->parse_context.overread= 0; @@ -3917,9 +4520,15 @@ static int sse_mb(MpegEncContext *s){ if(s->mb_y*16 + 16 > s->height) h= s->height- s->mb_y*16; if(w==16 && h==16) + if(s->avctx->mb_cmp == FF_CMP_NSSE){ + return s->dsp.nsse[0](s, s->new_picture.data[0] + s->mb_x*16 + s->mb_y*s->linesize*16, s->dest[0], s->linesize, 16) + +s->dsp.nsse[1](s, s->new_picture.data[1] + s->mb_x*8 + s->mb_y*s->uvlinesize*8,s->dest[1], s->uvlinesize, 8) + +s->dsp.nsse[1](s, s->new_picture.data[2] + s->mb_x*8 + s->mb_y*s->uvlinesize*8,s->dest[2], s->uvlinesize, 8); + }else{ return s->dsp.sse[0](NULL, s->new_picture.data[0] + s->mb_x*16 + s->mb_y*s->linesize*16, s->dest[0], s->linesize, 16) +s->dsp.sse[1](NULL, s->new_picture.data[1] + s->mb_x*8 + s->mb_y*s->uvlinesize*8,s->dest[1], s->uvlinesize, 8) +s->dsp.sse[1](NULL, s->new_picture.data[2] + s->mb_x*8 + s->mb_y*s->uvlinesize*8,s->dest[2], s->uvlinesize, 8); + } else return sse(s, s->new_picture.data[0] + s->mb_x*16 + s->mb_y*s->linesize*16, s->dest[0], w, h, s->linesize) +sse(s, s->new_picture.data[1] + s->mb_x*8 + s->mb_y*s->uvlinesize*8,s->dest[1], w>>1, h>>1, s->uvlinesize) @@ -3971,7 +4580,7 @@ static int estimate_motion_thread(AVCodecContext *c, void *arg){ } return 0; } -#endif +#endif /* #if 0 */ static int mb_var_thread(AVCodecContext *c, void *arg){ MpegEncContext *s= arg; @@ -4011,6 +4620,9 @@ static void write_slice_end(MpegEncContext *s){ align_put_bits(&s->pb); flush_put_bits(&s->pb); + + if((s->flags&CODEC_FLAG_PASS1) && !s->partitioned_frame) + s->misc_bits+= get_bits_diff(s); } static int encode_thread(AVCodecContext *c, void *arg){ @@ -4018,16 +4630,16 @@ static int encode_thread(AVCodecContext *c, void *arg){ int mb_x, mb_y, pdif = 0; int i, j; MpegEncContext best_s, backup_s; - uint8_t bit_buf[2][3000]; - uint8_t bit_buf2[2][3000]; - uint8_t bit_buf_tex[2][3000]; + uint8_t bit_buf[2][MAX_MB_BYTES]; + uint8_t bit_buf2[2][MAX_MB_BYTES]; + uint8_t bit_buf_tex[2][MAX_MB_BYTES]; PutBitContext pb[2], pb2[2], tex_pb[2]; //printf("%d->%d\n", s->resync_mb_y, s->end_mb_y); for(i=0; i<2; i++){ - init_put_bits(&pb [i], bit_buf [i], 3000); - init_put_bits(&pb2 [i], bit_buf2 [i], 3000); - init_put_bits(&tex_pb[i], bit_buf_tex[i], 3000); + init_put_bits(&pb [i], bit_buf [i], MAX_MB_BYTES); + init_put_bits(&pb2 [i], bit_buf2 [i], MAX_MB_BYTES); + init_put_bits(&tex_pb[i], bit_buf_tex[i], MAX_MB_BYTES); } s->last_bits= put_bits_count(&s->pb); @@ -4052,7 +4664,6 @@ static int encode_thread(AVCodecContext *c, void *arg){ s->last_mv_dir = 0; -#ifdef CONFIG_RISKY switch(s->codec_id){ case CODEC_ID_H263: case CODEC_ID_H263P: @@ -4067,7 +4678,6 @@ static int encode_thread(AVCodecContext *c, void *arg){ break; #endif /* #if 0 */ } -#endif s->resync_mb_x=0; s->resync_mb_y=0; @@ -4082,17 +4692,35 @@ static int encode_thread(AVCodecContext *c, void *arg){ ff_init_block_index(s); for(mb_x=0; mb_x < s->mb_width; mb_x++) { - const int xy= mb_y*s->mb_stride + mb_x; + int xy= mb_y*s->mb_stride + mb_x; // removed const, H261 needs to adjust this int mb_type= s->mb_type[xy]; // int d; int dmin= INT_MAX; int dir; + if(s->pb.buf_end - s->pb.buf - (put_bits_count(&s->pb)>>3) < MAX_MB_BYTES){ + av_log(s->avctx, AV_LOG_ERROR, "encoded frame too large\n"); + return -1; + } + if(s->data_partitioning){ + if( s->pb2 .buf_end - s->pb2 .buf - (put_bits_count(&s-> pb2)>>3) < MAX_MB_BYTES + || s->tex_pb.buf_end - s->tex_pb.buf - (put_bits_count(&s->tex_pb )>>3) < MAX_MB_BYTES){ + av_log(s->avctx, AV_LOG_ERROR, "encoded frame too large\n"); + return -1; + } + } + s->mb_x = mb_x; + s->mb_y = mb_y; // moved into loop, can get changed by H.261 ff_update_block_index(s); + if(s->codec_id == CODEC_ID_H261){ + ff_h261_reorder_mb_index(s); + xy= s->mb_y*s->mb_stride + s->mb_x; + mb_type= s->mb_type[xy]; + } + /* write gob / video packet header */ -#ifdef CONFIG_RISKY if(s->rtp_mode){ int current_packet_size, is_gob_start; @@ -4131,7 +4759,7 @@ static int encode_thread(AVCodecContext *c, void *arg){ current_packet_size= pbBufPtr(&s->pb) - s->ptr_lastgob; if(s->avctx->error_rate && s->resync_mb_x + s->resync_mb_y > 0){ - int r= put_bits_count(&s->pb)/8 + s->picture_number + s->codec_id + s->mb_x + s->mb_y; + int r= put_bits_count(&s->pb)/8 + s->picture_number + 16 + s->mb_x + s->mb_y; int d= 100 / s->avctx->error_rate; if(r % d == 0){ current_packet_size=0; @@ -4141,9 +4769,11 @@ static int encode_thread(AVCodecContext *c, void *arg){ assert(pbBufPtr(&s->pb) == s->ptr_lastgob); } } - - if (s->avctx->rtp_callback) - s->avctx->rtp_callback(s->avctx, s->ptr_lastgob, current_packet_size, 0); + + if (s->avctx->rtp_callback){ + int number_mb = (mb_y - s->resync_mb_y)*s->mb_width + mb_x - s->resync_mb_x; + s->avctx->rtp_callback(s->avctx, s->ptr_lastgob, current_packet_size, number_mb); + } switch(s->codec_id){ /* xine: do not need this for decode or MPEG-1 encoding modes */ @@ -4179,7 +4809,6 @@ static int encode_thread(AVCodecContext *c, void *arg){ s->resync_mb_y=mb_y; } } -#endif if( (s->resync_mb_x == s->mb_x) && s->resync_mb_y+1 == s->mb_y){ @@ -4189,7 +4818,7 @@ static int encode_thread(AVCodecContext *c, void *arg){ s->mb_skiped=0; s->dquant=0; //only for QP_RD - if(mb_type & (mb_type-1) || (s->flags & CODEC_FLAG_QP_RD)){ // more than 1 MB type possible + if(mb_type & (mb_type-1) || (s->flags & CODEC_FLAG_QP_RD)){ // more than 1 MB type possible or CODEC_FLAG_QP_RD int next_block=0; int pb_bits_count, pb2_bits_count, tex_pb_bits_count; @@ -4280,9 +4909,7 @@ static int encode_thread(AVCodecContext *c, void *arg){ s->mb_intra= 0; /* xine: do not need this for decode or MPEG-1 encoding modes */ #if 0 -#ifdef CONFIG_RISKY ff_mpeg4_set_direct_mv(s, mx, my); -#endif #endif /* #if 0 */ encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_DIRECT, pb, pb2, tex_pb, &dmin, &next_block, mx, my); @@ -4366,7 +4993,7 @@ static int encode_thread(AVCodecContext *c, void *arg){ if(qp < s->avctx->qmin || qp > s->avctx->qmax) break; backup_s.dquant= dquant; - if(s->mb_intra){ + if(s->mb_intra && s->dc_val[0]){ for(i=0; i<6; i++){ dc[i]= s->dc_val[0][ s->block_index[i] ]; memcpy(ac[i], s->ac_val[0][s->block_index[i]], sizeof(DCTELEM)*16); @@ -4376,7 +5003,7 @@ static int encode_thread(AVCodecContext *c, void *arg){ encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_INTER /* wrong but unused */, pb, pb2, tex_pb, &dmin, &next_block, s->mv[mvdir][0][0], s->mv[mvdir][0][1]); if(best_s.qscale != qp){ - if(s->mb_intra){ + if(s->mb_intra && s->dc_val[0]){ for(i=0; i<6; i++){ s->dc_val[0][ s->block_index[i] ]= dc[i]; memcpy(s->ac_val[0][s->block_index[i]], ac[i], sizeof(DCTELEM)*16); @@ -4414,10 +5041,8 @@ static int encode_thread(AVCodecContext *c, void *arg){ } s->last_bits= put_bits_count(&s->pb); -#ifdef CONFIG_RISKY if (s->out_format == FMT_H263 && s->pict_type!=B_TYPE) ff_h263_update_motion_val(s); -#endif if(next_block==0){ //FIXME 16 vs linesize16 s->dsp.put_pixels_tab[0][0](s->dest[0], s->rd_scratchpad , s->linesize ,16); @@ -4473,9 +5098,7 @@ static int encode_thread(AVCodecContext *c, void *arg){ motion_y=s->b_direct_mv_table[xy][1]; /* xine: do not need this for decode or MPEG-1 encoding modes */ #if 0 -#ifdef CONFIG_RISKY - ff_mpeg4_set_direct_mv(s, motion_x, motion_y); -#endif + ff_mpeg4_set_direct_mv(s, mx, my); #endif /* #if 0 */ break; case CANDIDATE_MB_TYPE_BIDIR: @@ -4546,10 +5169,8 @@ static int encode_thread(AVCodecContext *c, void *arg){ // RAL: Update last macrobloc type s->last_mv_dir = s->mv_dir; -#ifdef CONFIG_RISKY if (s->out_format == FMT_H263 && s->pict_type!=B_TYPE) ff_h263_update_motion_val(s); -#endif MPV_decode_mb(s, s->block); } @@ -4577,29 +5198,30 @@ static int encode_thread(AVCodecContext *c, void *arg){ s, s->new_picture .data[2] + s->mb_x*8 + s->mb_y*s->uvlinesize*8, s->dest[2], w>>1, h>>1, s->uvlinesize); } - if(s->loop_filter) - ff_h263_loop_filter(s); + if(s->loop_filter){ + if(s->out_format == FMT_H263) + ff_h263_loop_filter(s); + } //printf("MB %d %d bits\n", s->mb_x+s->mb_y*s->mb_stride, put_bits_count(&s->pb)); } } /* xine: do not need this for decode or MPEG-1 encoding modes */ #if 0 -#ifdef CONFIG_RISKY //not beautifull here but we must write it before flushing so it has to be here if (s->msmpeg4_version && s->msmpeg4_version<4 && s->pict_type == I_TYPE) msmpeg4_encode_ext_header(s); -#endif #endif /* #if 0 */ write_slice_end(s); /* Send the last GOB if RTP */ if (s->avctx->rtp_callback) { + int number_mb = (mb_y - s->resync_mb_y)*s->mb_width - s->resync_mb_x; pdif = pbBufPtr(&s->pb) - s->ptr_lastgob; /* Call the RTP callback to send the last GOB */ emms_c(); - s->avctx->rtp_callback(s->avctx, s->ptr_lastgob, pdif, 0); + s->avctx->rtp_callback(s->avctx, s->ptr_lastgob, pdif, number_mb); } return 0; @@ -4618,7 +5240,6 @@ static void merge_context_after_encode(MpegEncContext *dst, MpegEncContext *src) MERGE(dct_count[0]); //note, the other dct vars are not part of the context MERGE(dct_count[1]); MERGE(mv_bits); - MERGE(header_bits); MERGE(i_tex_bits); MERGE(p_tex_bits); MERGE(i_count); @@ -4655,17 +5276,15 @@ static void encode_picture(MpegEncContext *s, int picture_number) /* xine: do not need this for decode or MPEG-1 encoding modes */ #if 0 -#ifdef CONFIG_RISKY /* we need to initialize some time vars before we can encode b-frames */ // RAL: Condition added for MPEG1VIDEO if (s->codec_id == CODEC_ID_MPEG1VIDEO || s->codec_id == CODEC_ID_MPEG2VIDEO || (s->h263_pred && !s->h263_msmpeg4)) ff_set_mpeg4_time(s, s->picture_number); //FIXME rename and use has_b_frames or similar -#endif #endif /* #if 0 */ s->me.scene_change_score=0; - s->lambda= s->current_picture_ptr->quality; //FIXME qscale / ... stuff for ME ratedistoration +// s->lambda= s->current_picture_ptr->quality; //FIXME qscale / ... stuff for ME ratedistoration if(s->pict_type==I_TYPE){ if(s->msmpeg4_version >= 3) s->no_rounding=1; @@ -4686,6 +5305,8 @@ static void encode_picture(MpegEncContext *s, int picture_number) /* Estimate motion for every MB */ if(s->pict_type != I_TYPE){ + s->lambda = (s->lambda * s->avctx->me_penalty_compensation + 128)>>8; + s->lambda2= (s->lambda2* s->avctx->me_penalty_compensation + 128)>>8; if(s->pict_type != B_TYPE && s->avctx->me_threshold==0){ if((s->avctx->pre_me && s->last_non_b_pict_type==I_TYPE) || s->avctx->pre_me==2){ s->avctx->execute(s->avctx, pre_estimate_motion_thread, (void**)&(s->thread_context[0]), NULL, s->avctx->thread_count); @@ -4782,7 +5403,6 @@ static void encode_picture(MpegEncContext *s, int picture_number) if(s->adaptive_quant){ /* xine: do not need this for decode or MPEG-1 encoding modes */ #if 0 -#ifdef CONFIG_RISKY switch(s->codec_id){ case CODEC_ID_MPEG4: ff_clean_mpeg4_qscales(s); @@ -4793,7 +5413,6 @@ static void encode_picture(MpegEncContext *s, int picture_number) ff_clean_h263_qscales(s); break; } -#endif #endif /* #if 0 */ s->lambda= s->lambda_table[0]; @@ -4815,12 +5434,14 @@ static void encode_picture(MpegEncContext *s, int picture_number) s->intra_matrix[j] = CLAMP_TO_8BIT((ff_mpeg1_default_intra_matrix[i] * s->qscale) >> 3); } convert_matrix(&s->dsp, s->q_intra_matrix, s->q_intra_matrix16, - s->intra_matrix, s->intra_quant_bias, 8, 8); + s->intra_matrix, s->intra_quant_bias, 8, 8, 1); s->qscale= 8; } //FIXME var duplication + s->current_picture_ptr->key_frame= s->current_picture.key_frame= s->pict_type == I_TYPE; //FIXME pic_ptr + s->current_picture_ptr->pict_type= s->current_picture.pict_type= s->pict_type; if(s->current_picture.key_frame) @@ -4833,7 +5454,9 @@ static void encode_picture(MpegEncContext *s, int picture_number) case FMT_MJPEG: mjpeg_picture_header(s); break; -#ifdef CONFIG_RISKY + case FMT_H261: + ff_h261_encode_picture_header(s, picture_number); + break; case FMT_H263: if (s->codec_id == CODEC_ID_WMV2) ff_wmv2_encode_picture_header(s, picture_number); @@ -4843,12 +5466,13 @@ static void encode_picture(MpegEncContext *s, int picture_number) mpeg4_encode_picture_header(s, picture_number); else if (s->codec_id == CODEC_ID_RV10) rv10_encode_picture_header(s, picture_number); + else if (s->codec_id == CODEC_ID_RV20) + rv20_encode_picture_header(s, picture_number); else if (s->codec_id == CODEC_ID_FLV1) ff_flv_encode_picture_header(s, picture_number); else h263_encode_picture_header(s, picture_number); break; -#endif #endif /* #if 0 */ case FMT_MPEG1: mpeg1_encode_picture_header(s, picture_number); @@ -5216,7 +5840,7 @@ static int dct_quantize_refine(MpegEncContext *s, //FIXME breaks denoise? DCTELEM *block, int16_t *weight, DCTELEM *orig, int n, int qscale){ int16_t rem[64]; - DCTELEM d1[64]; + DCTELEM d1[64] __align16; const int *qmat; const uint8_t *scantable= s->intra_scantable.scantable; const uint8_t *perm_scantable= s->intra_scantable.permutated; @@ -5855,82 +6479,7 @@ static void dct_unquantize_h263_inter_c(MpegEncContext *s, } } -static const AVOption mpeg4_options[] = -{ - AVOPTION_CODEC_INT("bitrate", "desired video bitrate", bit_rate, 4, 240000000, 800000), - AVOPTION_CODEC_INT("ratetol", "number of bits the bitstream is allowed to diverge from the reference" - "the reference can be CBR (for CBR pass1) or VBR (for pass2)", - bit_rate_tolerance, 4, 240000000, 8000), - AVOPTION_CODEC_INT("qmin", "minimum quantizer", qmin, 1, 31, 2), - AVOPTION_CODEC_INT("qmax", "maximum quantizer", qmax, 1, 31, 31), - AVOPTION_CODEC_STRING("rc_eq", "rate control equation", - rc_eq, "tex^qComp,option1,options2", 0), - AVOPTION_CODEC_INT("rc_minrate", "rate control minimum bitrate", - rc_min_rate, 4, 24000000, 0), - AVOPTION_CODEC_INT("rc_maxrate", "rate control maximum bitrate", - rc_max_rate, 4, 24000000, 0), - AVOPTION_CODEC_DOUBLE("rc_buf_aggresivity", "rate control buffer aggresivity", - rc_buffer_aggressivity, 4, 24000000, 0), - AVOPTION_CODEC_DOUBLE("rc_initial_cplx", "initial complexity for pass1 ratecontrol", - rc_initial_cplx, 0., 9999999., 0), - AVOPTION_CODEC_DOUBLE("i_quant_factor", "qscale factor between p and i frames", - i_quant_factor, 0., 0., 0), - AVOPTION_CODEC_DOUBLE("i_quant_offset", "qscale offset between p and i frames", - i_quant_factor, -999999., 999999., 0), - AVOPTION_CODEC_INT("dct_algo", "dct alghorithm", - dct_algo, 0, 5, 0), // fixme - "Auto,FastInt,Int,MMX,MLib,Altivec" - AVOPTION_CODEC_DOUBLE("lumi_masking", "luminance masking", - lumi_masking, 0., 999999., 0), - AVOPTION_CODEC_DOUBLE("temporal_cplx_masking", "temporary complexity masking", - temporal_cplx_masking, 0., 999999., 0), - AVOPTION_CODEC_DOUBLE("spatial_cplx_masking", "spatial complexity masking", - spatial_cplx_masking, 0., 999999., 0), - AVOPTION_CODEC_DOUBLE("p_masking", "p block masking", - p_masking, 0., 999999., 0), - AVOPTION_CODEC_DOUBLE("dark_masking", "darkness masking", - dark_masking, 0., 999999., 0), - AVOPTION_CODEC_INT("idct_algo", "idct alghorithm", - idct_algo, 0, 8, 0), // fixme - "Auto,Int,Simple,SimpleMMX,LibMPEG2MMX,PS2,MLib,ARM,Altivec" - - AVOPTION_CODEC_INT("mb_qmin", "minimum MB quantizer", - mb_qmin, 0, 8, 0), - AVOPTION_CODEC_INT("mb_qmax", "maximum MB quantizer", - mb_qmin, 0, 8, 0), - - AVOPTION_CODEC_INT("me_cmp", "ME compare function", - me_cmp, 0, 24000000, 0), - AVOPTION_CODEC_INT("me_sub_cmp", "subpixel ME compare function", - me_sub_cmp, 0, 24000000, 0), - - - AVOPTION_CODEC_INT("dia_size", "ME diamond size & shape", - dia_size, 0, 24000000, 0), - AVOPTION_CODEC_INT("last_predictor_count", "amount of previous MV predictors", - last_predictor_count, 0, 24000000, 0), - - AVOPTION_CODEC_INT("pre_me", "pre pass for ME", - pre_me, 0, 24000000, 0), - AVOPTION_CODEC_INT("me_pre_cmp", "ME pre pass compare function", - me_pre_cmp, 0, 24000000, 0), - - AVOPTION_CODEC_INT("me_range", "maximum ME search range", - me_range, 0, 24000000, 0), - AVOPTION_CODEC_INT("pre_dia_size", "ME pre pass diamod size & shape", - pre_dia_size, 0, 24000000, 0), - AVOPTION_CODEC_INT("me_subpel_quality", "subpel ME quality", - me_subpel_quality, 0, 24000000, 0), - AVOPTION_CODEC_INT("me_range", "maximum ME search range", - me_range, 0, 24000000, 0), - AVOPTION_CODEC_FLAG("psnr", "calculate PSNR of compressed frames", - flags, CODEC_FLAG_PSNR, 0), - AVOPTION_CODEC_RCOVERRIDE("rc_override", "ratecontrol override (=startframe,endframe,qscale,quality_factor)", - rc_override), - AVOPTION_SUB(avoptions_common), - AVOPTION_END() -}; - #ifdef CONFIG_ENCODERS -#ifdef CONFIG_RISKY AVCodec h263_encoder = { "h263", CODEC_TYPE_VIDEO, @@ -5939,6 +6488,7 @@ AVCodec h263_encoder = { MPV_encode_init, MPV_encode_picture, MPV_encode_end, + .pix_fmts= (enum PixelFormat[]){PIX_FMT_YUV420P, -1}, }; AVCodec h263p_encoder = { @@ -5949,6 +6499,7 @@ AVCodec h263p_encoder = { MPV_encode_init, MPV_encode_picture, MPV_encode_end, + .pix_fmts= (enum PixelFormat[]){PIX_FMT_YUV420P, -1}, }; AVCodec flv_encoder = { @@ -5959,6 +6510,7 @@ AVCodec flv_encoder = { MPV_encode_init, MPV_encode_picture, MPV_encode_end, + .pix_fmts= (enum PixelFormat[]){PIX_FMT_YUV420P, -1}, }; AVCodec rv10_encoder = { @@ -5969,6 +6521,18 @@ AVCodec rv10_encoder = { MPV_encode_init, MPV_encode_picture, MPV_encode_end, + .pix_fmts= (enum PixelFormat[]){PIX_FMT_YUV420P, -1}, +}; + +AVCodec rv20_encoder = { + "rv20", + CODEC_TYPE_VIDEO, + CODEC_ID_RV20, + sizeof(MpegEncContext), + MPV_encode_init, + MPV_encode_picture, + MPV_encode_end, + .pix_fmts= (enum PixelFormat[]){PIX_FMT_YUV420P, -1}, }; AVCodec mpeg4_encoder = { @@ -5979,7 +6543,8 @@ AVCodec mpeg4_encoder = { MPV_encode_init, MPV_encode_picture, MPV_encode_end, - .options = mpeg4_options, + .pix_fmts= (enum PixelFormat[]){PIX_FMT_YUV420P, -1}, + .capabilities= CODEC_CAP_DELAY, }; AVCodec msmpeg4v1_encoder = { @@ -5990,7 +6555,7 @@ AVCodec msmpeg4v1_encoder = { MPV_encode_init, MPV_encode_picture, MPV_encode_end, - .options = mpeg4_options, + .pix_fmts= (enum PixelFormat[]){PIX_FMT_YUV420P, -1}, }; AVCodec msmpeg4v2_encoder = { @@ -6001,7 +6566,7 @@ AVCodec msmpeg4v2_encoder = { MPV_encode_init, MPV_encode_picture, MPV_encode_end, - .options = mpeg4_options, + .pix_fmts= (enum PixelFormat[]){PIX_FMT_YUV420P, -1}, }; AVCodec msmpeg4v3_encoder = { @@ -6012,7 +6577,7 @@ AVCodec msmpeg4v3_encoder = { MPV_encode_init, MPV_encode_picture, MPV_encode_end, - .options = mpeg4_options, + .pix_fmts= (enum PixelFormat[]){PIX_FMT_YUV420P, -1}, }; AVCodec wmv1_encoder = { @@ -6023,11 +6588,9 @@ AVCodec wmv1_encoder = { MPV_encode_init, MPV_encode_picture, MPV_encode_end, - .options = mpeg4_options, + .pix_fmts= (enum PixelFormat[]){PIX_FMT_YUV420P, -1}, }; -#endif - AVCodec mjpeg_encoder = { "mjpeg", CODEC_TYPE_VIDEO, @@ -6036,6 +6599,7 @@ AVCodec mjpeg_encoder = { MPV_encode_init, MPV_encode_picture, MPV_encode_end, + .pix_fmts= (enum PixelFormat[]){PIX_FMT_YUVJ420P, -1}, }; #endif //CONFIG_ENCODERS diff --git a/src/libffmpeg/libavcodec/mpegvideo.h b/src/libffmpeg/libavcodec/mpegvideo.h index 715fb6d92..a1c459e97 100644 --- a/src/libffmpeg/libavcodec/mpegvideo.h +++ b/src/libffmpeg/libavcodec/mpegvideo.h @@ -27,11 +27,13 @@ #define AVCODEC_MPEGVIDEO_H #include "dsputil.h" +#include "bitstream.h" #define FRAME_SKIPED 100 ///< return value for header parsers if frame is not coded enum OutputFormat { FMT_MPEG1, + FMT_H261, FMT_H263, FMT_MJPEG, FMT_H264, @@ -49,7 +51,7 @@ enum OutputFormat { #define MAX_THREADS 8 -#define MAX_PICTURE_COUNT 15 +#define MAX_PICTURE_COUNT 32 #define ME_MAP_SIZE 64 #define ME_MAP_SHIFT 3 @@ -66,6 +68,8 @@ enum OutputFormat { #define SI_TYPE FF_SI_TYPE ///< Switching Intra #define SP_TYPE FF_SP_TYPE ///< Switching Predicted +#define MAX_MB_BYTES (30*16*16*3/8 + 120) + typedef struct Predictor{ double coeff; double count; @@ -169,6 +173,8 @@ typedef struct Picture{ int frame_num; ///< h264 frame_num int pic_id; ///< h264 pic_num or long_term_pic_idx int long_ref; ///< 1->long term reference 0->short term reference + int ref_poc[2][16]; ///< h264 POCs of the frames used as reference + int ref_count[2]; ///< number of entries in ref_poc int mb_var_sum; ///< sum of MB variance for current frame int mc_mb_var_sum; ///< motion compensated MB variance for current frame @@ -262,7 +268,7 @@ typedef struct MpegEncContext { int h263_msmpeg4; ///< generate MSMPEG4 compatible stream (deprecated, use msmpeg4_version instead) int h263_flv; ///< use flv h263 header - int codec_id; /* see CODEC_ID_xxx */ + enum CodecID codec_id; /* see CODEC_ID_xxx */ int fixed_qscale; ///< fixed qscale if non zero int encoding; ///< true if we are encoding (vs decoding) int flags; ///< AVCodecContext.flags (HQ, MV4, ...) @@ -368,8 +374,6 @@ typedef struct MpegEncContext { int last_non_b_pict_type; ///< used for mpeg4 gmc b-frames & ratecontrol int dropable; int frame_rate_index; - int frame_rate_ext_n; ///< MPEG-2 specific framerate modificators (numerator) - int frame_rate_ext_d; ///< MPEG-2 specific framerate modificators (denominator) /* motion compensation */ int unrestricted_mv; ///< mv can point outside of the coded picture @@ -599,9 +603,9 @@ typedef struct MpegEncContext { int divx_version; int divx_build; int divx_packed; -#define BITSTREAM_BUFFER_SIZE 1024*256 uint8_t *bitstream_buffer; //Divx 5.01 puts several frames in a single one, this is used to reorder them int bitstream_buffer_size; + int allocated_bitstream_buffer_size; int xvid_build; @@ -669,6 +673,8 @@ typedef struct MpegEncContext { #define CHROMA_420 1 #define CHROMA_422 2 #define CHROMA_444 3 + int chroma_x_shift;//depend on pix_format, that depend on chroma_format + int chroma_y_shift; int progressive_frame; int full_pel[2]; @@ -703,6 +709,10 @@ typedef struct MpegEncContext { DCTELEM *block/*align 16*/, int n, int qscale); void (*dct_unquantize_h263_inter)(struct MpegEncContext *s, DCTELEM *block/*align 16*/, int n, int qscale); + void (*dct_unquantize_h261_intra)(struct MpegEncContext *s, + DCTELEM *block/*align 16*/, int n, int qscale); + void (*dct_unquantize_h261_inter)(struct MpegEncContext *s, + DCTELEM *block/*align 16*/, int n, int qscale); void (*dct_unquantize_intra)(struct MpegEncContext *s, // unquantizer to use (mpeg4 can use both) DCTELEM *block/*align 16*/, int n, int qscale); void (*dct_unquantize_inter)(struct MpegEncContext *s, // unquantizer to use (mpeg4 can use both) @@ -717,7 +727,7 @@ int DCT_common_init(MpegEncContext *s); void MPV_decode_defaults(MpegEncContext *s); int MPV_common_init(MpegEncContext *s); void MPV_common_end(MpegEncContext *s); -void MPV_decode_mb(MpegEncContext *s, DCTELEM block[6][64]); +void MPV_decode_mb(MpegEncContext *s, DCTELEM block[12][64]); int MPV_frame_start(MpegEncContext *s, AVCodecContext *avctx); void MPV_frame_end(MpegEncContext *s); int MPV_encode_init(AVCodecContext *avctx); @@ -768,15 +778,17 @@ extern enum PixelFormat ff_yuv420p_list[2]; void ff_init_block_index(MpegEncContext *s); static inline void ff_update_block_index(MpegEncContext *s){ + const int block_size= 8>>s->avctx->lowres; + s->block_index[0]+=2; s->block_index[1]+=2; s->block_index[2]+=2; s->block_index[3]+=2; s->block_index[4]++; s->block_index[5]++; - s->dest[0]+= 16; - s->dest[1]+= 8; - s->dest[2]+= 8; + s->dest[0]+= 2*block_size; + s->dest[1]+= block_size; + s->dest[2]+= block_size; } static inline int get_bits_diff(MpegEncContext *s){ @@ -799,7 +811,11 @@ void ff_fix_long_mvs(MpegEncContext * s, uint8_t *field_select_table, int field_ int16_t (*mv_table)[2], int f_code, int type, int truncate); void ff_init_me(MpegEncContext *s); int ff_pre_estimate_p_frame_motion(MpegEncContext * s, int mb_x, int mb_y); - +inline int ff_epzs_motion_search(MpegEncContext * s, int *mx_ptr, int *my_ptr, + int P[10][2], int src_index, int ref_index, int16_t (*last_mv)[2], + int ref_mv_scale, int size, int h); +int inline ff_get_mb_score(MpegEncContext * s, int mx, int my, int src_index, + int ref_index, int size, int h, int add_rate); /* mpeg12.c */ extern const int16_t ff_mpeg1_default_intra_matrix[64]; @@ -830,8 +846,8 @@ typedef struct RLTable { RL_VLC_ELEM *rl_vlc[32]; ///< decoding only } RLTable; -void init_rl(RLTable *rl); -void init_vlc_rl(RLTable *rl); +void init_rl(RLTable *rl, int use_static); +void init_vlc_rl(RLTable *rl, int use_static); static inline int get_rl_index(const RLTable *rl, int last, int run, int level) { @@ -852,6 +868,15 @@ extern const int16_t ff_mpeg4_default_non_intra_matrix[64]; extern const uint8_t ff_h263_chroma_qscale_table[32]; extern const uint8_t ff_h263_loop_filter_strength[32]; +/* h261.c */ +void ff_h261_loop_filter(MpegEncContext *s); +void ff_h261_reorder_mb_index(MpegEncContext* s); +void ff_h261_encode_mb(MpegEncContext *s, + DCTELEM block[6][64], + int motion_x, int motion_y); +void ff_h261_encode_picture_header(MpegEncContext * s, int picture_number); +void ff_h261_encode_init(MpegEncContext *s); + /* h263.c, h263dec.c */ int ff_h263_decode_init(AVCodecContext *avctx); @@ -912,6 +937,7 @@ int ff_mpeg4_find_frame_end(ParseContext *pc, const uint8_t *buf, int buf_size); /* rv10.c */ void rv10_encode_picture_header(MpegEncContext *s, int picture_number); int rv_decode_dc(MpegEncContext *s, int n); +void rv20_encode_picture_header(MpegEncContext *s, int picture_number); /* msmpeg4.c */ diff --git a/src/libffmpeg/libavcodec/msmpeg4.c b/src/libffmpeg/libavcodec/msmpeg4.c index 701a8da92..a3140abb8 100644 --- a/src/libffmpeg/libavcodec/msmpeg4.c +++ b/src/libffmpeg/libavcodec/msmpeg4.c @@ -59,6 +59,9 @@ static uint32_t v2_dc_lum_table[512][2]; static uint32_t v2_dc_chroma_table[512][2]; +static inline void msmpeg4_encode_block(MpegEncContext * s, DCTELEM * block, int n); +static inline int msmpeg4_decode_block(MpegEncContext * s, DCTELEM * block, + int n, int coded, const uint8_t *scantable); static int msmpeg4_decode_dc(MpegEncContext * s, int n, int *dir_ptr); static int msmpeg4_decode_motion(MpegEncContext * s, int *mx_ptr, int *my_ptr); @@ -67,12 +70,13 @@ static void init_h263_dc_for_msmpeg4(void); static inline void msmpeg4_memsetw(short *tab, int val, int n); #ifdef CONFIG_ENCODERS static int get_size_of_code(MpegEncContext * s, RLTable *rl, int last, int run, int level, int intra); -static void msmpeg4_encode_dc(MpegEncContext * s, int level, int n, int *dir_ptr); #endif //CONFIG_ENCODERS static int msmpeg4v12_decode_mb(MpegEncContext *s, DCTELEM block[6][64]); static int msmpeg4v34_decode_mb(MpegEncContext *s, DCTELEM block[6][64]); static int wmv2_decode_mb(MpegEncContext *s, DCTELEM block[6][64]); +/* vc9 externs */ +extern uint8_t wmv3_dc_scale_table[32]; #ifdef DEBUG int intra_count = 0; @@ -173,6 +177,14 @@ static void common_init(MpegEncContext * s) s->y_dc_scale_table= wmv1_y_dc_scale_table; s->c_dc_scale_table= wmv1_c_dc_scale_table; break; + case 6: +/* xine: comment this out as WMV3 support is incomplete */ +#if 0 + s->y_dc_scale_table= wmv3_dc_scale_table; + s->c_dc_scale_table= wmv3_dc_scale_table; +#endif /* #if 0 */ + break; + } @@ -237,7 +249,7 @@ void ff_msmpeg4_encode_init(MpegEncContext *s) init_mv_table(&mv_tables[0]); init_mv_table(&mv_tables[1]); for(i=0;i<NB_RL_TABLES;i++) - init_rl(&rl_table[i]); + init_rl(&rl_table[i], 1); for(i=0; i<NB_RL_TABLES; i++){ int level; @@ -522,129 +534,6 @@ static inline void handle_slices(MpegEncContext *s){ } } -/* Encoding of a block. Very similar to MPEG4 except for a different - escape coding (same as H263) and more vlc tables. - */ -static inline void msmpeg4_encode_block(MpegEncContext * s, DCTELEM * block, int n) -{ - int level, run, last, i, j, last_index; - int last_non_zero, sign, slevel; - int code, run_diff, dc_pred_dir; - const RLTable *rl; - const uint8_t *scantable; - - if (s->mb_intra) { - set_stat(ST_DC); - msmpeg4_encode_dc(s, block[0], n, &dc_pred_dir); - i = 1; - if (n < 4) { - rl = &rl_table[s->rl_table_index]; - } else { - rl = &rl_table[3 + s->rl_chroma_table_index]; - } - run_diff = 0; - scantable= s->intra_scantable.permutated; - set_stat(ST_INTRA_AC); - } else { - i = 0; - rl = &rl_table[3 + s->rl_table_index]; - if(s->msmpeg4_version<=2) - run_diff = 0; - else - run_diff = 1; - scantable= s->inter_scantable.permutated; - set_stat(ST_INTER_AC); - } - - /* recalculate block_last_index for M$ wmv1 */ - if(s->msmpeg4_version>=4 && s->block_last_index[n]>0){ - for(last_index=63; last_index>=0; last_index--){ - if(block[scantable[last_index]]) break; - } - s->block_last_index[n]= last_index; - }else - last_index = s->block_last_index[n]; - /* AC coefs */ - last_non_zero = i - 1; - for (; i <= last_index; i++) { - j = scantable[i]; - level = block[j]; - if (level) { - run = i - last_non_zero - 1; - last = (i == last_index); - sign = 0; - slevel = level; - if (level < 0) { - sign = 1; - level = -level; - } - - if(level<=MAX_LEVEL && run<=MAX_RUN){ - s->ac_stats[s->mb_intra][n>3][level][run][last]++; - } -#if 0 -else - s->ac_stats[s->mb_intra][n>3][40][63][0]++; //esc3 like -#endif - code = get_rl_index(rl, last, run, level); - put_bits(&s->pb, rl->table_vlc[code][1], rl->table_vlc[code][0]); - if (code == rl->n) { - int level1, run1; - - level1 = level - rl->max_level[last][run]; - if (level1 < 1) - goto esc2; - code = get_rl_index(rl, last, run, level1); - if (code == rl->n) { - esc2: - put_bits(&s->pb, 1, 0); - if (level > MAX_LEVEL) - goto esc3; - run1 = run - rl->max_run[last][level] - run_diff; - if (run1 < 0) - goto esc3; - code = get_rl_index(rl, last, run1, level); - if (code == rl->n) { - esc3: - /* third escape */ - put_bits(&s->pb, 1, 0); - put_bits(&s->pb, 1, last); - if(s->msmpeg4_version>=4){ - if(s->esc3_level_length==0){ - s->esc3_level_length=8; - s->esc3_run_length= 6; - if(s->qscale<8) - put_bits(&s->pb, 6, 3); - else - put_bits(&s->pb, 8, 3); - } - put_bits(&s->pb, s->esc3_run_length, run); - put_bits(&s->pb, 1, sign); - put_bits(&s->pb, s->esc3_level_length, level); - }else{ - put_bits(&s->pb, 6, run); - put_bits(&s->pb, 8, slevel & 0xff); - } - } else { - /* second escape */ - put_bits(&s->pb, 1, 1); - put_bits(&s->pb, rl->table_vlc[code][1], rl->table_vlc[code][0]); - put_bits(&s->pb, 1, sign); - } - } else { - /* first escape */ - put_bits(&s->pb, 1, 1); - put_bits(&s->pb, rl->table_vlc[code][1], rl->table_vlc[code][0]); - put_bits(&s->pb, 1, sign); - } - } else { - put_bits(&s->pb, 1, sign); - } - last_non_zero = i; - } - } -} - void msmpeg4_encode_mb(MpegEncContext * s, DCTELEM block[6][64], int motion_x, int motion_y) @@ -750,7 +639,7 @@ void msmpeg4_encode_mb(MpegEncContext * s, if (s->pict_type == I_TYPE) { set_stat(ST_INTRA_MB); put_bits(&s->pb, - table_mb_intra[coded_cbp][1], table_mb_intra[coded_cbp][0]); + ff_msmp4_mb_i_table[coded_cbp][1], ff_msmp4_mb_i_table[coded_cbp][0]); } else { if (s->use_skip_mb_code) put_bits(&s->pb, 1, 0); /* mb coded */ @@ -837,7 +726,7 @@ static inline int msmpeg4_pred_dc(MpegEncContext * s, int n, necessitate to modify mpegvideo.c. The problem comes from the fact they decided to store the quantized DC (which would lead to problems if Q could vary !) */ -#if defined ARCH_X86 && !defined PIC +#if (defined(ARCH_X86) || defined(ARCH_X86_64)) && !defined PIC asm volatile( "movl %3, %%eax \n\t" "shrl $1, %%eax \n\t" @@ -1003,15 +892,15 @@ static void msmpeg4_encode_dc(MpegEncContext * s, int level, int n, int *dir_ptr if (s->dc_table_index == 0) { if (n < 4) { - put_bits(&s->pb, table0_dc_lum[code][1], table0_dc_lum[code][0]); + put_bits(&s->pb, ff_table0_dc_lum[code][1], ff_table0_dc_lum[code][0]); } else { - put_bits(&s->pb, table0_dc_chroma[code][1], table0_dc_chroma[code][0]); + put_bits(&s->pb, ff_table0_dc_chroma[code][1], ff_table0_dc_chroma[code][0]); } } else { if (n < 4) { - put_bits(&s->pb, table1_dc_lum[code][1], table1_dc_lum[code][0]); + put_bits(&s->pb, ff_table1_dc_lum[code][1], ff_table1_dc_lum[code][0]); } else { - put_bits(&s->pb, table1_dc_chroma[code][1], table1_dc_chroma[code][0]); + put_bits(&s->pb, ff_table1_dc_chroma[code][1], ff_table1_dc_chroma[code][0]); } } @@ -1024,14 +913,136 @@ static void msmpeg4_encode_dc(MpegEncContext * s, int level, int n, int *dir_ptr } } +/* Encoding of a block. Very similar to MPEG4 except for a different + escape coding (same as H263) and more vlc tables. + */ +static inline void msmpeg4_encode_block(MpegEncContext * s, DCTELEM * block, int n) +{ + int level, run, last, i, j, last_index; + int last_non_zero, sign, slevel; + int code, run_diff, dc_pred_dir; + const RLTable *rl; + const uint8_t *scantable; + + if (s->mb_intra) { + set_stat(ST_DC); + msmpeg4_encode_dc(s, block[0], n, &dc_pred_dir); + i = 1; + if (n < 4) { + rl = &rl_table[s->rl_table_index]; + } else { + rl = &rl_table[3 + s->rl_chroma_table_index]; + } + run_diff = 0; + scantable= s->intra_scantable.permutated; + set_stat(ST_INTRA_AC); + } else { + i = 0; + rl = &rl_table[3 + s->rl_table_index]; + if(s->msmpeg4_version<=2) + run_diff = 0; + else + run_diff = 1; + scantable= s->inter_scantable.permutated; + set_stat(ST_INTER_AC); + } + + /* recalculate block_last_index for M$ wmv1 */ + if(s->msmpeg4_version>=4 && s->block_last_index[n]>0){ + for(last_index=63; last_index>=0; last_index--){ + if(block[scantable[last_index]]) break; + } + s->block_last_index[n]= last_index; + }else + last_index = s->block_last_index[n]; + /* AC coefs */ + last_non_zero = i - 1; + for (; i <= last_index; i++) { + j = scantable[i]; + level = block[j]; + if (level) { + run = i - last_non_zero - 1; + last = (i == last_index); + sign = 0; + slevel = level; + if (level < 0) { + sign = 1; + level = -level; + } + + if(level<=MAX_LEVEL && run<=MAX_RUN){ + s->ac_stats[s->mb_intra][n>3][level][run][last]++; + } +#if 0 +else + s->ac_stats[s->mb_intra][n>3][40][63][0]++; //esc3 like +#endif + code = get_rl_index(rl, last, run, level); + put_bits(&s->pb, rl->table_vlc[code][1], rl->table_vlc[code][0]); + if (code == rl->n) { + int level1, run1; + + level1 = level - rl->max_level[last][run]; + if (level1 < 1) + goto esc2; + code = get_rl_index(rl, last, run, level1); + if (code == rl->n) { + esc2: + put_bits(&s->pb, 1, 0); + if (level > MAX_LEVEL) + goto esc3; + run1 = run - rl->max_run[last][level] - run_diff; + if (run1 < 0) + goto esc3; + code = get_rl_index(rl, last, run1, level); + if (code == rl->n) { + esc3: + /* third escape */ + put_bits(&s->pb, 1, 0); + put_bits(&s->pb, 1, last); + if(s->msmpeg4_version>=4){ + if(s->esc3_level_length==0){ + s->esc3_level_length=8; + s->esc3_run_length= 6; + if(s->qscale<8) + put_bits(&s->pb, 6, 3); + else + put_bits(&s->pb, 8, 3); + } + put_bits(&s->pb, s->esc3_run_length, run); + put_bits(&s->pb, 1, sign); + put_bits(&s->pb, s->esc3_level_length, level); + }else{ + put_bits(&s->pb, 6, run); + put_bits(&s->pb, 8, slevel & 0xff); + } + } else { + /* second escape */ + put_bits(&s->pb, 1, 1); + put_bits(&s->pb, rl->table_vlc[code][1], rl->table_vlc[code][0]); + put_bits(&s->pb, 1, sign); + } + } else { + /* first escape */ + put_bits(&s->pb, 1, 1); + put_bits(&s->pb, rl->table_vlc[code][1], rl->table_vlc[code][0]); + put_bits(&s->pb, 1, sign); + } + } else { + put_bits(&s->pb, 1, sign); + } + last_non_zero = i; + } + } +} /****************************************/ /* decoding stuff */ static VLC mb_non_intra_vlc[4]; -static VLC mb_intra_vlc; -static VLC dc_lum_vlc[2]; -static VLC dc_chroma_vlc[2]; +VLC ff_msmp4_mb_i_vlc; +VLC ff_msmp4_dc_luma_vlc[2]; +VLC ff_msmp4_dc_chroma_vlc[2]; static VLC v2_dc_lum_vlc; static VLC v2_dc_chroma_vlc; static VLC cbpy_vlc; @@ -1110,69 +1121,69 @@ int ff_msmpeg4_decode_init(MpegEncContext *s) done = 1; for(i=0;i<NB_RL_TABLES;i++) { - init_rl(&rl_table[i]); - init_vlc_rl(&rl_table[i]); + init_rl(&rl_table[i], 1); + init_vlc_rl(&rl_table[i], 1); } for(i=0;i<2;i++) { mv = &mv_tables[i]; init_vlc(&mv->vlc, MV_VLC_BITS, mv->n + 1, mv->table_mv_bits, 1, 1, - mv->table_mv_code, 2, 2); + mv->table_mv_code, 2, 2, 1); } - init_vlc(&dc_lum_vlc[0], DC_VLC_BITS, 120, - &table0_dc_lum[0][1], 8, 4, - &table0_dc_lum[0][0], 8, 4); - init_vlc(&dc_chroma_vlc[0], DC_VLC_BITS, 120, - &table0_dc_chroma[0][1], 8, 4, - &table0_dc_chroma[0][0], 8, 4); - init_vlc(&dc_lum_vlc[1], DC_VLC_BITS, 120, - &table1_dc_lum[0][1], 8, 4, - &table1_dc_lum[0][0], 8, 4); - init_vlc(&dc_chroma_vlc[1], DC_VLC_BITS, 120, - &table1_dc_chroma[0][1], 8, 4, - &table1_dc_chroma[0][0], 8, 4); + init_vlc(&ff_msmp4_dc_luma_vlc[0], DC_VLC_BITS, 120, + &ff_table0_dc_lum[0][1], 8, 4, + &ff_table0_dc_lum[0][0], 8, 4, 1); + init_vlc(&ff_msmp4_dc_chroma_vlc[0], DC_VLC_BITS, 120, + &ff_table0_dc_chroma[0][1], 8, 4, + &ff_table0_dc_chroma[0][0], 8, 4, 1); + init_vlc(&ff_msmp4_dc_luma_vlc[1], DC_VLC_BITS, 120, + &ff_table1_dc_lum[0][1], 8, 4, + &ff_table1_dc_lum[0][0], 8, 4, 1); + init_vlc(&ff_msmp4_dc_chroma_vlc[1], DC_VLC_BITS, 120, + &ff_table1_dc_chroma[0][1], 8, 4, + &ff_table1_dc_chroma[0][0], 8, 4, 1); init_vlc(&v2_dc_lum_vlc, DC_VLC_BITS, 512, &v2_dc_lum_table[0][1], 8, 4, - &v2_dc_lum_table[0][0], 8, 4); + &v2_dc_lum_table[0][0], 8, 4, 1); init_vlc(&v2_dc_chroma_vlc, DC_VLC_BITS, 512, &v2_dc_chroma_table[0][1], 8, 4, - &v2_dc_chroma_table[0][0], 8, 4); + &v2_dc_chroma_table[0][0], 8, 4, 1); init_vlc(&cbpy_vlc, CBPY_VLC_BITS, 16, &cbpy_tab[0][1], 2, 1, - &cbpy_tab[0][0], 2, 1); + &cbpy_tab[0][0], 2, 1, 1); init_vlc(&v2_intra_cbpc_vlc, V2_INTRA_CBPC_VLC_BITS, 4, &v2_intra_cbpc[0][1], 2, 1, - &v2_intra_cbpc[0][0], 2, 1); + &v2_intra_cbpc[0][0], 2, 1, 1); init_vlc(&v2_mb_type_vlc, V2_MB_TYPE_VLC_BITS, 8, &v2_mb_type[0][1], 2, 1, - &v2_mb_type[0][0], 2, 1); + &v2_mb_type[0][0], 2, 1, 1); init_vlc(&v2_mv_vlc, V2_MV_VLC_BITS, 33, &mvtab[0][1], 2, 1, - &mvtab[0][0], 2, 1); + &mvtab[0][0], 2, 1, 1); for(i=0; i<4; i++){ init_vlc(&mb_non_intra_vlc[i], MB_NON_INTRA_VLC_BITS, 128, &wmv2_inter_table[i][0][1], 8, 4, - &wmv2_inter_table[i][0][0], 8, 4); //FIXME name? + &wmv2_inter_table[i][0][0], 8, 4, 1); //FIXME name? } - init_vlc(&mb_intra_vlc, MB_INTRA_VLC_BITS, 64, - &table_mb_intra[0][1], 4, 2, - &table_mb_intra[0][0], 4, 2); + init_vlc(&ff_msmp4_mb_i_vlc, MB_INTRA_VLC_BITS, 64, + &ff_msmp4_mb_i_table[0][1], 4, 2, + &ff_msmp4_mb_i_table[0][0], 4, 2, 1); init_vlc(&v1_intra_cbpc_vlc, V1_INTRA_CBPC_VLC_BITS, 8, intra_MCBPC_bits, 1, 1, - intra_MCBPC_code, 1, 1); + intra_MCBPC_code, 1, 1, 1); init_vlc(&v1_inter_cbpc_vlc, V1_INTER_CBPC_VLC_BITS, 25, inter_MCBPC_bits, 1, 1, - inter_MCBPC_code, 1, 1); + inter_MCBPC_code, 1, 1, 1); init_vlc(&inter_intra_vlc, INTER_INTRA_VLC_BITS, 4, &table_inter_intra[0][1], 2, 1, - &table_inter_intra[0][0], 2, 1); + &table_inter_intra[0][0], 2, 1, 1); } switch(s->msmpeg4_version){ @@ -1186,6 +1197,8 @@ int ff_msmpeg4_decode_init(MpegEncContext *s) break; case 5: s->decode_mb= wmv2_decode_mb; + case 6: + //FIXME + TODO VC9 decode mb break; } @@ -1194,16 +1207,6 @@ int ff_msmpeg4_decode_init(MpegEncContext *s) return 0; } -static int decode012(GetBitContext *gb) -{ - int n; - n = get_bits1(gb); - if (n == 0) - return 0; - else - return get_bits1(gb) + 1; -} - int msmpeg4_decode_picture_header(MpegEncContext * s) { int code; @@ -1477,6 +1480,183 @@ static int msmpeg4v2_decode_motion(MpegEncContext * s, int pred, int f_code) return val; } +static int msmpeg4v12_decode_mb(MpegEncContext *s, DCTELEM block[6][64]) +{ + int cbp, code, i; + + if (s->pict_type == P_TYPE) { + if (s->use_skip_mb_code) { + if (get_bits1(&s->gb)) { + /* skip mb */ + s->mb_intra = 0; + for(i=0;i<6;i++) + s->block_last_index[i] = -1; + s->mv_dir = MV_DIR_FORWARD; + s->mv_type = MV_TYPE_16X16; + s->mv[0][0][0] = 0; + s->mv[0][0][1] = 0; + s->mb_skiped = 1; + return 0; + } + } + + if(s->msmpeg4_version==2) + code = get_vlc2(&s->gb, v2_mb_type_vlc.table, V2_MB_TYPE_VLC_BITS, 1); + else + code = get_vlc2(&s->gb, v1_inter_cbpc_vlc.table, V1_INTER_CBPC_VLC_BITS, 3); + if(code<0 || code>7){ + av_log(s->avctx, AV_LOG_ERROR, "cbpc %d invalid at %d %d\n", code, s->mb_x, s->mb_y); + return -1; + } + + s->mb_intra = code >>2; + + cbp = code & 0x3; + } else { + s->mb_intra = 1; + if(s->msmpeg4_version==2) + cbp= get_vlc2(&s->gb, v2_intra_cbpc_vlc.table, V2_INTRA_CBPC_VLC_BITS, 1); + else + cbp= get_vlc2(&s->gb, v1_intra_cbpc_vlc.table, V1_INTRA_CBPC_VLC_BITS, 1); + if(cbp<0 || cbp>3){ + av_log(s->avctx, AV_LOG_ERROR, "cbpc %d invalid at %d %d\n", cbp, s->mb_x, s->mb_y); + return -1; + } + } + + if (!s->mb_intra) { + int mx, my, cbpy; + + cbpy= get_vlc2(&s->gb, cbpy_vlc.table, CBPY_VLC_BITS, 1); + if(cbpy<0){ + av_log(s->avctx, AV_LOG_ERROR, "cbpy %d invalid at %d %d\n", cbp, s->mb_x, s->mb_y); + return -1; + } + + cbp|= cbpy<<2; + if(s->msmpeg4_version==1 || (cbp&3) != 3) cbp^= 0x3C; + + h263_pred_motion(s, 0, 0, &mx, &my); + mx= msmpeg4v2_decode_motion(s, mx, 1); + my= msmpeg4v2_decode_motion(s, my, 1); + + s->mv_dir = MV_DIR_FORWARD; + s->mv_type = MV_TYPE_16X16; + s->mv[0][0][0] = mx; + s->mv[0][0][1] = my; + } else { + if(s->msmpeg4_version==2){ + s->ac_pred = get_bits1(&s->gb); + cbp|= get_vlc2(&s->gb, cbpy_vlc.table, CBPY_VLC_BITS, 1)<<2; //FIXME check errors + } else{ + s->ac_pred = 0; + cbp|= get_vlc2(&s->gb, cbpy_vlc.table, CBPY_VLC_BITS, 1)<<2; //FIXME check errors + if(s->pict_type==P_TYPE) cbp^=0x3C; + } + } + + for (i = 0; i < 6; i++) { + if (msmpeg4_decode_block(s, block[i], i, (cbp >> (5 - i)) & 1, NULL) < 0) + { + av_log(s->avctx, AV_LOG_ERROR, "\nerror while decoding block: %d x %d (%d)\n", s->mb_x, s->mb_y, i); + return -1; + } + } + return 0; +} + +static int msmpeg4v34_decode_mb(MpegEncContext *s, DCTELEM block[6][64]) +{ + int cbp, code, i; + uint8_t *coded_val; + uint32_t * const mb_type_ptr= &s->current_picture.mb_type[ s->mb_x + s->mb_y*s->mb_stride ]; + + if (s->pict_type == P_TYPE) { + set_stat(ST_INTER_MB); + if (s->use_skip_mb_code) { + if (get_bits1(&s->gb)) { + /* skip mb */ + s->mb_intra = 0; + for(i=0;i<6;i++) + s->block_last_index[i] = -1; + s->mv_dir = MV_DIR_FORWARD; + s->mv_type = MV_TYPE_16X16; + s->mv[0][0][0] = 0; + s->mv[0][0][1] = 0; + s->mb_skiped = 1; + *mb_type_ptr = MB_TYPE_SKIP | MB_TYPE_L0 | MB_TYPE_16x16; + + return 0; + } + } + + code = get_vlc2(&s->gb, mb_non_intra_vlc[DEFAULT_INTER_INDEX].table, MB_NON_INTRA_VLC_BITS, 3); + if (code < 0) + return -1; + //s->mb_intra = (code & 0x40) ? 0 : 1; + s->mb_intra = (~code & 0x40) >> 6; + + cbp = code & 0x3f; + } else { + set_stat(ST_INTRA_MB); + s->mb_intra = 1; + code = get_vlc2(&s->gb, ff_msmp4_mb_i_vlc.table, MB_INTRA_VLC_BITS, 2); + if (code < 0) + return -1; + /* predict coded block pattern */ + cbp = 0; + for(i=0;i<6;i++) { + int val = ((code >> (5 - i)) & 1); + if (i < 4) { + int pred = coded_block_pred(s, i, &coded_val); + val = val ^ pred; + *coded_val = val; + } + cbp |= val << (5 - i); + } + } + + if (!s->mb_intra) { + int mx, my; +//printf("P at %d %d\n", s->mb_x, s->mb_y); + if(s->per_mb_rl_table && cbp){ + s->rl_table_index = decode012(&s->gb); + s->rl_chroma_table_index = s->rl_table_index; + } + set_stat(ST_MV); + h263_pred_motion(s, 0, 0, &mx, &my); + if (msmpeg4_decode_motion(s, &mx, &my) < 0) + return -1; + s->mv_dir = MV_DIR_FORWARD; + s->mv_type = MV_TYPE_16X16; + s->mv[0][0][0] = mx; + s->mv[0][0][1] = my; + *mb_type_ptr = MB_TYPE_L0 | MB_TYPE_16x16; + } else { +//printf("I at %d %d %d %06X\n", s->mb_x, s->mb_y, ((cbp&3)? 1 : 0) +((cbp&0x3C)? 2 : 0), show_bits(&s->gb, 24)); + set_stat(ST_INTRA_MB); + s->ac_pred = get_bits1(&s->gb); + *mb_type_ptr = MB_TYPE_INTRA; + if(s->inter_intra_pred){ + s->h263_aic_dir= get_vlc2(&s->gb, inter_intra_vlc.table, INTER_INTRA_VLC_BITS, 1); +// printf("%d%d %d %d/", s->ac_pred, s->h263_aic_dir, s->mb_x, s->mb_y); + } + if(s->per_mb_rl_table && cbp){ + s->rl_table_index = decode012(&s->gb); + s->rl_chroma_table_index = s->rl_table_index; + } + } + + for (i = 0; i < 6; i++) { + if (msmpeg4_decode_block(s, block[i], i, (cbp >> (5 - i)) & 1, NULL) < 0) + { + av_log(s->avctx, AV_LOG_ERROR, "\nerror while decoding block: %d x %d (%d)\n", s->mb_x, s->mb_y, i); + return -1; + } + } + + return 0; +} //#define ERROR_DETAILS static inline int msmpeg4_decode_block(MpegEncContext * s, DCTELEM * block, int n, int coded, const uint8_t *scan_table) @@ -1554,7 +1734,7 @@ static inline int msmpeg4_decode_block(MpegEncContext * s, DCTELEM * block, OPEN_READER(re, &s->gb); for(;;) { UPDATE_CACHE(re, &s->gb); - GET_RL_VLC(level, run, re, &s->gb, rl_vlc, TEX_VLC_BITS, 2); + GET_RL_VLC(level, run, re, &s->gb, rl_vlc, TEX_VLC_BITS, 2, 0); if (level==0) { int cache; cache= GET_CACHE(re, &s->gb); @@ -1652,7 +1832,7 @@ static inline int msmpeg4_decode_block(MpegEncContext * s, DCTELEM * block, #else SKIP_BITS(re, &s->gb, 2); #endif - GET_RL_VLC(level, run, re, &s->gb, rl_vlc, TEX_VLC_BITS, 2); + GET_RL_VLC(level, run, re, &s->gb, rl_vlc, TEX_VLC_BITS, 2, 1); i+= run + rl->max_run[run>>7][level/qmul] + run_diff; //FIXME opt indexing level = (level ^ SHOW_SBITS(re, &s->gb, 1)) - SHOW_SBITS(re, &s->gb, 1); LAST_SKIP_BITS(re, &s->gb, 1); @@ -1671,7 +1851,7 @@ static inline int msmpeg4_decode_block(MpegEncContext * s, DCTELEM * block, #else SKIP_BITS(re, &s->gb, 1); #endif - GET_RL_VLC(level, run, re, &s->gb, rl_vlc, TEX_VLC_BITS, 2); + GET_RL_VLC(level, run, re, &s->gb, rl_vlc, TEX_VLC_BITS, 2, 1); i+= run; level = level + rl->max_level[run>>7][(run-1)&63] * qmul;//FIXME opt indexing level = (level ^ SHOW_SBITS(re, &s->gb, 1)) - SHOW_SBITS(re, &s->gb, 1); @@ -1728,184 +1908,6 @@ static inline int msmpeg4_decode_block(MpegEncContext * s, DCTELEM * block, return 0; } -static int msmpeg4v12_decode_mb(MpegEncContext *s, DCTELEM block[6][64]) -{ - int cbp, code, i; - - if (s->pict_type == P_TYPE) { - if (s->use_skip_mb_code) { - if (get_bits1(&s->gb)) { - /* skip mb */ - s->mb_intra = 0; - for(i=0;i<6;i++) - s->block_last_index[i] = -1; - s->mv_dir = MV_DIR_FORWARD; - s->mv_type = MV_TYPE_16X16; - s->mv[0][0][0] = 0; - s->mv[0][0][1] = 0; - s->mb_skiped = 1; - return 0; - } - } - - if(s->msmpeg4_version==2) - code = get_vlc2(&s->gb, v2_mb_type_vlc.table, V2_MB_TYPE_VLC_BITS, 1); - else - code = get_vlc2(&s->gb, v1_inter_cbpc_vlc.table, V1_INTER_CBPC_VLC_BITS, 3); - if(code<0 || code>7){ - av_log(s->avctx, AV_LOG_ERROR, "cbpc %d invalid at %d %d\n", code, s->mb_x, s->mb_y); - return -1; - } - - s->mb_intra = code >>2; - - cbp = code & 0x3; - } else { - s->mb_intra = 1; - if(s->msmpeg4_version==2) - cbp= get_vlc2(&s->gb, v2_intra_cbpc_vlc.table, V2_INTRA_CBPC_VLC_BITS, 1); - else - cbp= get_vlc2(&s->gb, v1_intra_cbpc_vlc.table, V1_INTRA_CBPC_VLC_BITS, 1); - if(cbp<0 || cbp>3){ - av_log(s->avctx, AV_LOG_ERROR, "cbpc %d invalid at %d %d\n", cbp, s->mb_x, s->mb_y); - return -1; - } - } - - if (!s->mb_intra) { - int mx, my, cbpy; - - cbpy= get_vlc2(&s->gb, cbpy_vlc.table, CBPY_VLC_BITS, 1); - if(cbpy<0){ - av_log(s->avctx, AV_LOG_ERROR, "cbpy %d invalid at %d %d\n", cbp, s->mb_x, s->mb_y); - return -1; - } - - cbp|= cbpy<<2; - if(s->msmpeg4_version==1 || (cbp&3) != 3) cbp^= 0x3C; - - h263_pred_motion(s, 0, 0, &mx, &my); - mx= msmpeg4v2_decode_motion(s, mx, 1); - my= msmpeg4v2_decode_motion(s, my, 1); - - s->mv_dir = MV_DIR_FORWARD; - s->mv_type = MV_TYPE_16X16; - s->mv[0][0][0] = mx; - s->mv[0][0][1] = my; - } else { - if(s->msmpeg4_version==2){ - s->ac_pred = get_bits1(&s->gb); - cbp|= get_vlc2(&s->gb, cbpy_vlc.table, CBPY_VLC_BITS, 1)<<2; //FIXME check errors - } else{ - s->ac_pred = 0; - cbp|= get_vlc2(&s->gb, cbpy_vlc.table, CBPY_VLC_BITS, 1)<<2; //FIXME check errors - if(s->pict_type==P_TYPE) cbp^=0x3C; - } - } - - for (i = 0; i < 6; i++) { - if (msmpeg4_decode_block(s, block[i], i, (cbp >> (5 - i)) & 1, NULL) < 0) - { - av_log(s->avctx, AV_LOG_ERROR, "\nerror while decoding block: %d x %d (%d)\n", s->mb_x, s->mb_y, i); - return -1; - } - } - return 0; -} - -static int msmpeg4v34_decode_mb(MpegEncContext *s, DCTELEM block[6][64]) -{ - int cbp, code, i; - uint8_t *coded_val; - uint32_t * const mb_type_ptr= &s->current_picture.mb_type[ s->mb_x + s->mb_y*s->mb_stride ]; - - if (s->pict_type == P_TYPE) { - set_stat(ST_INTER_MB); - if (s->use_skip_mb_code) { - if (get_bits1(&s->gb)) { - /* skip mb */ - s->mb_intra = 0; - for(i=0;i<6;i++) - s->block_last_index[i] = -1; - s->mv_dir = MV_DIR_FORWARD; - s->mv_type = MV_TYPE_16X16; - s->mv[0][0][0] = 0; - s->mv[0][0][1] = 0; - s->mb_skiped = 1; - *mb_type_ptr = MB_TYPE_SKIP | MB_TYPE_L0 | MB_TYPE_16x16; - - return 0; - } - } - - code = get_vlc2(&s->gb, mb_non_intra_vlc[DEFAULT_INTER_INDEX].table, MB_NON_INTRA_VLC_BITS, 3); - if (code < 0) - return -1; - //s->mb_intra = (code & 0x40) ? 0 : 1; - s->mb_intra = (~code & 0x40) >> 6; - - cbp = code & 0x3f; - } else { - set_stat(ST_INTRA_MB); - s->mb_intra = 1; - code = get_vlc2(&s->gb, mb_intra_vlc.table, MB_INTRA_VLC_BITS, 2); - if (code < 0) - return -1; - /* predict coded block pattern */ - cbp = 0; - for(i=0;i<6;i++) { - int val = ((code >> (5 - i)) & 1); - if (i < 4) { - int pred = coded_block_pred(s, i, &coded_val); - val = val ^ pred; - *coded_val = val; - } - cbp |= val << (5 - i); - } - } - - if (!s->mb_intra) { - int mx, my; -//printf("P at %d %d\n", s->mb_x, s->mb_y); - if(s->per_mb_rl_table && cbp){ - s->rl_table_index = decode012(&s->gb); - s->rl_chroma_table_index = s->rl_table_index; - } - set_stat(ST_MV); - h263_pred_motion(s, 0, 0, &mx, &my); - if (msmpeg4_decode_motion(s, &mx, &my) < 0) - return -1; - s->mv_dir = MV_DIR_FORWARD; - s->mv_type = MV_TYPE_16X16; - s->mv[0][0][0] = mx; - s->mv[0][0][1] = my; - *mb_type_ptr = MB_TYPE_L0 | MB_TYPE_16x16; - } else { -//printf("I at %d %d %d %06X\n", s->mb_x, s->mb_y, ((cbp&3)? 1 : 0) +((cbp&0x3C)? 2 : 0), show_bits(&s->gb, 24)); - set_stat(ST_INTRA_MB); - s->ac_pred = get_bits1(&s->gb); - *mb_type_ptr = MB_TYPE_INTRA; - if(s->inter_intra_pred){ - s->h263_aic_dir= get_vlc2(&s->gb, inter_intra_vlc.table, INTER_INTRA_VLC_BITS, 1); -// printf("%d%d %d %d/", s->ac_pred, s->h263_aic_dir, s->mb_x, s->mb_y); - } - if(s->per_mb_rl_table && cbp){ - s->rl_table_index = decode012(&s->gb); - s->rl_chroma_table_index = s->rl_table_index; - } - } - - for (i = 0; i < 6; i++) { - if (msmpeg4_decode_block(s, block[i], i, (cbp >> (5 - i)) & 1, NULL) < 0) - { - av_log(s->avctx, AV_LOG_ERROR, "\nerror while decoding block: %d x %d (%d)\n", s->mb_x, s->mb_y, i); - return -1; - } - } - - return 0; -} - static int msmpeg4_decode_dc(MpegEncContext * s, int n, int *dir_ptr) { int level, pred; @@ -1921,9 +1923,9 @@ static int msmpeg4_decode_dc(MpegEncContext * s, int n, int *dir_ptr) level-=256; }else{ //FIXME optimize use unified tables & index if (n < 4) { - level = get_vlc2(&s->gb, dc_lum_vlc[s->dc_table_index].table, DC_VLC_BITS, 3); + level = get_vlc2(&s->gb, ff_msmp4_dc_luma_vlc[s->dc_table_index].table, DC_VLC_BITS, 3); } else { - level = get_vlc2(&s->gb, dc_chroma_vlc[s->dc_table_index].table, DC_VLC_BITS, 3); + level = get_vlc2(&s->gb, ff_msmp4_dc_chroma_vlc[s->dc_table_index].table, DC_VLC_BITS, 3); } if (level < 0){ av_log(s->avctx, AV_LOG_ERROR, "illegal dc vlc\n"); diff --git a/src/libffmpeg/libavcodec/msmpeg4data.h b/src/libffmpeg/libavcodec/msmpeg4data.h index 69568cbf6..bc4b454ac 100644 --- a/src/libffmpeg/libavcodec/msmpeg4data.h +++ b/src/libffmpeg/libavcodec/msmpeg4data.h @@ -4,7 +4,7 @@ */ /* intra picture macro block coded block pattern */ -static const uint16_t table_mb_intra[64][2] = { +const uint16_t ff_msmp4_mb_i_table[64][2] = { { 0x1, 1 },{ 0x17, 6 },{ 0x9, 5 },{ 0x5, 5 }, { 0x6, 5 },{ 0x47, 9 },{ 0x20, 7 },{ 0x10, 7 }, { 0x2, 5 },{ 0x7c, 9 },{ 0x3a, 7 },{ 0x1d, 7 }, @@ -61,7 +61,7 @@ static const uint32_t table_mb_non_intra[128][2] = { /* dc table 0 */ -static const uint32_t table0_dc_lum[120][2] = { +const uint32_t ff_table0_dc_lum[120][2] = { { 0x1, 1 },{ 0x1, 2 },{ 0x1, 4 },{ 0x1, 5 }, { 0x5, 5 },{ 0x7, 5 },{ 0x8, 6 },{ 0xc, 6 }, { 0x0, 7 },{ 0x2, 7 },{ 0x12, 7 },{ 0x1a, 7 }, @@ -94,7 +94,7 @@ static const uint32_t table0_dc_lum[120][2] = { { 0x6078c, 24 },{ 0x6078d, 24 },{ 0x6078e, 24 },{ 0x6078f, 24 }, }; -static const uint32_t table0_dc_chroma[120][2] = { +const uint32_t ff_table0_dc_chroma[120][2] = { { 0x0, 2 },{ 0x1, 2 },{ 0x5, 3 },{ 0x9, 4 }, { 0xd, 4 },{ 0x11, 5 },{ 0x1d, 5 },{ 0x1f, 5 }, { 0x21, 6 },{ 0x31, 6 },{ 0x38, 6 },{ 0x33, 6 }, @@ -129,7 +129,7 @@ static const uint32_t table0_dc_chroma[120][2] = { /* dc table 1 */ -static const uint32_t table1_dc_lum[120][2] = { +const uint32_t ff_table1_dc_lum[120][2] = { { 0x2, 2 },{ 0x3, 2 },{ 0x3, 3 },{ 0x2, 4 }, { 0x5, 4 },{ 0x1, 5 },{ 0x3, 5 },{ 0x8, 5 }, { 0x0, 6 },{ 0x5, 6 },{ 0xd, 6 },{ 0xf, 6 }, @@ -162,7 +162,7 @@ static const uint32_t table1_dc_lum[120][2] = { { 0x1e6964, 26 },{ 0x1e6965, 26 },{ 0x1e6966, 26 },{ 0x1e6967, 26 }, }; -static const uint32_t table1_dc_chroma[120][2] = { +const uint32_t ff_table1_dc_chroma[120][2] = { { 0x0, 2 },{ 0x1, 2 },{ 0x4, 3 },{ 0x7, 3 }, { 0xb, 4 },{ 0xd, 4 },{ 0x15, 5 },{ 0x28, 6 }, { 0x30, 6 },{ 0x32, 6 },{ 0x52, 7 },{ 0x62, 7 }, diff --git a/src/libffmpeg/libavcodec/msrle.c b/src/libffmpeg/libavcodec/msrle.c index b318faa77..d95e3f79b 100644 --- a/src/libffmpeg/libavcodec/msrle.c +++ b/src/libffmpeg/libavcodec/msrle.c @@ -254,10 +254,6 @@ static int msrle_decode_frame(AVCodecContext *avctx, { MsrleContext *s = (MsrleContext *)avctx->priv_data; - /* no supplementary picture */ - if (buf_size == 0) - return 0; - s->buf = buf; s->size = buf_size; diff --git a/src/libffmpeg/libavcodec/msvideo1.c b/src/libffmpeg/libavcodec/msvideo1.c index b88bdab5d..518df0e52 100644 --- a/src/libffmpeg/libavcodec/msvideo1.c +++ b/src/libffmpeg/libavcodec/msvideo1.c @@ -302,10 +302,6 @@ static int msvideo1_decode_frame(AVCodecContext *avctx, { Msvideo1Context *s = (Msvideo1Context *)avctx->priv_data; - /* no supplementary picture */ - if (buf_size == 0) - return 0; - s->buf = buf; s->size = buf_size; diff --git a/src/libffmpeg/libavcodec/parser.c b/src/libffmpeg/libavcodec/parser.c index ed386611a..4725d56c6 100644 --- a/src/libffmpeg/libavcodec/parser.c +++ b/src/libffmpeg/libavcodec/parser.c @@ -34,11 +34,16 @@ AVCodecParserContext *av_parser_init(int codec_id) AVCodecParserContext *s; AVCodecParser *parser; int ret; + + if(codec_id == CODEC_ID_NONE) + return NULL; for(parser = av_first_parser; parser != NULL; parser = parser->next) { if (parser->codec_ids[0] == codec_id || parser->codec_ids[1] == codec_id || - parser->codec_ids[2] == codec_id) + parser->codec_ids[2] == codec_id || + parser->codec_ids[3] == codec_id || + parser->codec_ids[4] == codec_id) goto found; } return NULL; @@ -92,12 +97,14 @@ int av_parser_parse(AVCodecParserContext *s, s->fetch_timestamp=0; s->last_pts = pts; s->last_dts = dts; + s->cur_frame_pts[k] = + s->cur_frame_dts[k] = AV_NOPTS_VALUE; } } /* WARNING: the returned index can be negative */ index = s->parser->parser_parse(s, avctx, poutbuf, poutbuf_size, buf, buf_size); -//av_log(NULL, AV_LOG_DEBUG, "parser: in:%lld, %lld, out:%lld, %lld, in:%d out:%d %d\n", pts, dts, s->last_pts, s->last_dts, buf_size, *poutbuf_size, avctx->codec_id); +//av_log(NULL, AV_LOG_DEBUG, "parser: in:%lld, %lld, out:%lld, %lld, in:%d out:%d id:%d\n", pts, dts, s->last_pts, s->last_dts, buf_size, *poutbuf_size, avctx->codec_id); /* update the file pointer */ if (*poutbuf_size) { /* fill the data for the current frame */ @@ -183,7 +190,12 @@ int ff_combine_frame(ParseContext *pc, int next, uint8_t **buf, int *buf_size) for(; pc->overread>0; pc->overread--){ pc->buffer[pc->index++]= pc->buffer[pc->overread_index++]; } - + + /* flush remaining if EOF */ + if(!*buf_size && next == END_NOT_FOUND){ + next= 0; + } + pc->last_index= pc->index; /* copy into buffer end return */ @@ -279,8 +291,8 @@ static void mpegvideo_extract_headers(AVCodecParserContext *s, int32_t start_code; int frame_rate_index, ext_type, bytes_left; int frame_rate_ext_n, frame_rate_ext_d; - int top_field_first, repeat_first_field, progressive_frame; - int horiz_size_ext, vert_size_ext; + int picture_structure, top_field_first, repeat_first_field, progressive_frame; + int horiz_size_ext, vert_size_ext, bit_rate_ext; s->repeat_pict = 0; buf_end = buf + buf_size; @@ -294,12 +306,14 @@ static void mpegvideo_extract_headers(AVCodecParserContext *s, } break; case SEQ_START_CODE: - if (bytes_left >= 4) { - pc->width = avctx->width = (buf[0] << 4) | (buf[1] >> 4); - pc->height = avctx->height = ((buf[1] & 0x0f) << 8) | buf[2]; + if (bytes_left >= 7) { + pc->width = (buf[0] << 4) | (buf[1] >> 4); + pc->height = ((buf[1] & 0x0f) << 8) | buf[2]; + avcodec_set_dimensions(avctx, pc->width, pc->height); frame_rate_index = buf[3] & 0xf; pc->frame_rate = avctx->frame_rate = frame_rate_tab[frame_rate_index]; avctx->frame_rate_base = MPEG1_FRAME_RATE_BASE; + avctx->bit_rate = ((buf[4]<<10) | (buf[5]<<2) | (buf[6]>>6))*400; avctx->codec_id = CODEC_ID_MPEG1VIDEO; avctx->sub_id = 1; } @@ -312,12 +326,16 @@ static void mpegvideo_extract_headers(AVCodecParserContext *s, if (bytes_left >= 6) { horiz_size_ext = ((buf[1] & 1) << 1) | (buf[2] >> 7); vert_size_ext = (buf[2] >> 5) & 3; + bit_rate_ext = ((buf[2] & 0x1F)<<7) | (buf[3]>>1); frame_rate_ext_n = (buf[5] >> 5) & 3; frame_rate_ext_d = (buf[5] & 0x1f); pc->progressive_sequence = buf[1] & (1 << 3); + avctx->has_b_frames= !(buf[5] >> 7); - avctx->width = pc->width | (horiz_size_ext << 12); - avctx->height = pc->height | (vert_size_ext << 12); + pc->width |=(horiz_size_ext << 12); + pc->height |=( vert_size_ext << 12); + avctx->bit_rate += (bit_rate_ext << 18) * 400; + avcodec_set_dimensions(avctx, pc->width, pc->height); avctx->frame_rate = pc->frame_rate * (frame_rate_ext_n + 1); avctx->frame_rate_base = MPEG1_FRAME_RATE_BASE * (frame_rate_ext_d + 1); avctx->codec_id = CODEC_ID_MPEG2VIDEO; @@ -326,6 +344,7 @@ static void mpegvideo_extract_headers(AVCodecParserContext *s, break; case 0x8: /* picture coding extension */ if (bytes_left >= 5) { + picture_structure = buf[2]&3; top_field_first = buf[3] & (1 << 7); repeat_first_field = buf[3] & (1 << 1); progressive_frame = buf[4] & (1 << 7); @@ -341,6 +360,11 @@ static void mpegvideo_extract_headers(AVCodecParserContext *s, s->repeat_pict = 1; } } + + /* the packet only represents half a frame + XXX,FIXME maybe find a different solution */ + if(picture_structure != 3) + s->repeat_pict = -1; } break; } @@ -429,8 +453,7 @@ static int av_mpeg4_decode_header(AVCodecParserContext *s1, init_get_bits(gb, buf, 8 * buf_size); ret = ff_mpeg4_decode_picture_header(s, gb); if (s->width) { - avctx->width = s->width; - avctx->height = s->height; + avcodec_set_dimensions(avctx, s->width, s->height); } pc->first_picture = 0; return ret; @@ -477,13 +500,16 @@ typedef struct MpegAudioParseContext { int frame_size; int free_format_frame_size; int free_format_next_header; + uint32_t header; + int header_count; } MpegAudioParseContext; #define MPA_HEADER_SIZE 4 /* header + layer + bitrate + freq + lsf/mpeg25 */ +#undef SAME_HEADER_MASK /* mpegaudio.h defines different version */ #define SAME_HEADER_MASK \ - (0xffe00000 | (3 << 17) | (0xf << 12) | (3 << 10) | (3 << 19)) + (0xffe00000 | (3 << 17) | (3 << 10) | (3 << 19)) static int mpegaudio_parse_init(AVCodecParserContext *s1) { @@ -498,7 +524,7 @@ static int mpegaudio_parse(AVCodecParserContext *s1, const uint8_t *buf, int buf_size) { MpegAudioParseContext *s = s1->priv_data; - int len, ret; + int len, ret, sr; uint32_t header; const uint8_t *buf_ptr; @@ -532,11 +558,13 @@ static int mpegaudio_parse(AVCodecParserContext *s1, } if ((s->inbuf_ptr - s->inbuf) >= MPA_HEADER_SIZE) { got_header: + sr= avctx->sample_rate; header = (s->inbuf[0] << 24) | (s->inbuf[1] << 16) | (s->inbuf[2] << 8) | s->inbuf[3]; ret = mpa_decode_header(avctx, header); if (ret < 0) { + s->header_count= -2; /* no sync found : move by one byte (inefficient, but simple!) */ memmove(s->inbuf, s->inbuf + 1, s->inbuf_ptr - s->inbuf - 1); s->inbuf_ptr--; @@ -545,7 +573,12 @@ static int mpegaudio_parse(AVCodecParserContext *s1, to get a new bitrate */ s->free_format_frame_size = 0; } else { + if((header&SAME_HEADER_MASK) != (s->header&SAME_HEADER_MASK) && s->header) + s->header_count= -3; + s->header= header; + s->header_count++; s->frame_size = ret; + #if 0 /* free format: prepare to compute frame size */ if (decode_header(s, header) == 1) { @@ -553,6 +586,8 @@ static int mpegaudio_parse(AVCodecParserContext *s1, } #endif } + if(s->header_count <= 0) + avctx->sample_rate= sr; //FIXME ugly } } else #if 0 @@ -625,8 +660,10 @@ static int mpegaudio_parse(AVCodecParserContext *s1, // next_data: if (s->frame_size > 0 && (s->inbuf_ptr - s->inbuf) >= s->frame_size) { - *poutbuf = s->inbuf; - *poutbuf_size = s->inbuf_ptr - s->inbuf; + if(s->header_count > 0){ + *poutbuf = s->inbuf; + *poutbuf_size = s->inbuf_ptr - s->inbuf; + } s->inbuf_ptr = s->inbuf; s->frame_size = 0; break; diff --git a/src/libffmpeg/libavcodec/pcm.c b/src/libffmpeg/libavcodec/pcm.c index 4c999b430..8e57d11a1 100644 --- a/src/libffmpeg/libavcodec/pcm.c +++ b/src/libffmpeg/libavcodec/pcm.c @@ -127,6 +127,23 @@ static int pcm_encode_init(AVCodecContext *avctx) break; } + switch(avctx->codec->id) { + case CODEC_ID_PCM_S16LE: + case CODEC_ID_PCM_S16BE: + case CODEC_ID_PCM_U16LE: + case CODEC_ID_PCM_U16BE: + avctx->block_align = 2 * avctx->channels; + break; + case CODEC_ID_PCM_S8: + case CODEC_ID_PCM_U8: + case CODEC_ID_PCM_MULAW: + case CODEC_ID_PCM_ALAW: + avctx->block_align = avctx->channels; + break; + default: + break; + } + avctx->coded_frame= avcodec_alloc_frame(); avctx->coded_frame->key_frame= 1; @@ -282,6 +299,9 @@ static int pcm_decode_frame(AVCodecContext *avctx, samples = data; src = buf; + if(buf_size > AVCODEC_MAX_AUDIO_FRAME_SIZE/2) + buf_size = AVCODEC_MAX_AUDIO_FRAME_SIZE/2; + switch(avctx->codec->id) { case CODEC_ID_PCM_S16LE: n = buf_size >> 1; diff --git a/src/libffmpeg/libavcodec/qdrw.c b/src/libffmpeg/libavcodec/qdrw.c new file mode 100644 index 000000000..a12d45067 --- /dev/null +++ b/src/libffmpeg/libavcodec/qdrw.c @@ -0,0 +1,158 @@ +/* + * QuickDraw (qdrw) codec + * Copyright (c) 2004 Konstantin Shishkov + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + * + */ + +/** + * @file qdrw.c + * Apple QuickDraw codec. + */ + +#include "avcodec.h" +#include "mpegvideo.h" + +typedef struct QdrawContext{ + AVCodecContext *avctx; + AVFrame pic; + uint8_t palette[256*3]; +} QdrawContext; + +static int decode_frame(AVCodecContext *avctx, + void *data, int *data_size, + uint8_t *buf, int buf_size) +{ + QdrawContext * const a = avctx->priv_data; + AVFrame * const p= (AVFrame*)&a->pic; + uint8_t* outdata; + int colors; + int i; + + if(p->data[0]) + avctx->release_buffer(avctx, p); + + p->reference= 0; + if(avctx->get_buffer(avctx, p) < 0){ + av_log(avctx, AV_LOG_ERROR, "get_buffer() failed\n"); + return -1; + } + p->pict_type= I_TYPE; + p->key_frame= 1; + + outdata = a->pic.data[0]; + + buf += 0x68; /* jump to palette */ + colors = BE_32(buf); + buf += 4; + + if(colors < 0 || colors > 256) { + av_log(avctx, AV_LOG_ERROR, "Error color count - %i(0x%X)\n", colors, colors); + return -1; + } + + for (i = 0; i <= colors; i++) { + unsigned int idx; + idx = BE_16(buf); /* color index */ + buf += 2; + + if (idx > 255) { + av_log(avctx, AV_LOG_ERROR, "Palette index out of range: %u\n", idx); + buf += 6; + continue; + } + a->palette[idx * 3 + 0] = *buf++; + buf++; + a->palette[idx * 3 + 1] = *buf++; + buf++; + a->palette[idx * 3 + 2] = *buf++; + buf++; + } + + buf += 18; /* skip unneeded data */ + for (i = 0; i < avctx->height; i++) { + int size, left, code, pix; + uint8_t *next; + uint8_t *out; + int tsize = 0; + + /* decode line */ + out = outdata; + size = BE_16(buf); /* size of packed line */ + buf += 2; + left = size; + next = buf + size; + while (left > 0) { + code = *buf++; + if (code & 0x80 ) { /* run */ + int i; + pix = *buf++; + if ((out + (257 - code) * 3) > (outdata + a->pic.linesize[0])) + break; + for (i = 0; i < 257 - code; i++) { + *out++ = a->palette[pix * 3 + 0]; + *out++ = a->palette[pix * 3 + 1]; + *out++ = a->palette[pix * 3 + 2]; + } + tsize += 257 - code; + left -= 2; + } else { /* copy */ + int i, pix; + if ((out + code * 3) > (outdata + a->pic.linesize[0])) + break; + for (i = 0; i <= code; i++) { + pix = *buf++; + *out++ = a->palette[pix * 3 + 0]; + *out++ = a->palette[pix * 3 + 1]; + *out++ = a->palette[pix * 3 + 2]; + } + left -= 2 + code; + tsize += code + 1; + } + } + buf = next; + outdata += a->pic.linesize[0]; + } + + *data_size = sizeof(AVFrame); + *(AVFrame*)data = a->pic; + + return buf_size; +} + +static int decode_init(AVCodecContext *avctx){ +// QdrawContext * const a = avctx->priv_data; + + if (avcodec_check_dimensions(avctx, avctx->height, avctx->width) < 0) { + return 1; + } + + avctx->pix_fmt= PIX_FMT_RGB24; + + return 0; +} + +AVCodec qdraw_decoder = { + "qdraw", + CODEC_TYPE_VIDEO, + CODEC_ID_QDRAW, + sizeof(QdrawContext), + decode_init, + NULL, + NULL, + decode_frame, + CODEC_CAP_DR1, +}; diff --git a/src/libffmpeg/libavcodec/qpeg.c b/src/libffmpeg/libavcodec/qpeg.c new file mode 100644 index 000000000..a2d7e4acc --- /dev/null +++ b/src/libffmpeg/libavcodec/qpeg.c @@ -0,0 +1,302 @@ +/* + * QPEG codec + * Copyright (c) 2004 Konstantin Shishkov + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + * + */ + +/** + * @file qpeg.c + * QPEG codec. + */ + +#include "avcodec.h" +#include "mpegvideo.h" + +typedef struct QpegContext{ + AVCodecContext *avctx; + AVFrame pic; + uint8_t *refdata; +} QpegContext; + +static void qpeg_decode_intra(uint8_t *src, uint8_t *dst, int size, + int stride, int width, int height) +{ + int i; + int code; + int c0, c1; + int run, copy; + int filled = 0; + + height--; + dst = dst + height * stride; + + while(size > 0) { + code = *src++; + size--; + run = copy = 0; + if(code == 0xFC) /* end-of-picture code */ + break; + if(code >= 0xF8) { /* very long run */ + c0 = *src++; + c1 = *src++; + size -= 2; + run = ((code & 0x7) << 16) + (c0 << 8) + c1 + 2; + } else if (code >= 0xF0) { /* long run */ + c0 = *src++; + size--; + run = ((code & 0xF) << 8) + c0 + 2; + } else if (code >= 0xE0) { /* short run */ + run = (code & 0x1F) + 2; + } else if (code >= 0xC0) { /* very long copy */ + c0 = *src++; + c1 = *src++; + size -= 2; + copy = ((code & 0x3F) << 16) + (c0 << 8) + c1 + 1; + } else if (code >= 0x80) { /* long copy */ + c0 = *src++; + size--; + copy = ((code & 0x7F) << 8) + c0 + 1; + } else { /* short copy */ + copy = code + 1; + } + + /* perform actual run or copy */ + if(run) { + int p; + + p = *src++; + size--; + for(i = 0; i < run; i++) { + dst[filled++] = p; + if (filled >= width) { + filled = 0; + dst -= stride; + } + } + } else { + for(i = 0; i < copy; i++) { + dst[filled++] = *src++; + if (filled >= width) { + filled = 0; + dst -= stride; + } + } + size -= copy; + } + } +} + +static int qpeg_table_h[16] = + { 0x00, 0x20, 0x20, 0x20, 0x18, 0x10, 0x10, 0x20, 0x10, 0x08, 0x18, 0x08, 0x08, 0x18, 0x10, 0x04}; +static int qpeg_table_w[16] = + { 0x00, 0x20, 0x18, 0x08, 0x18, 0x10, 0x20, 0x10, 0x08, 0x10, 0x20, 0x20, 0x08, 0x10, 0x18, 0x04}; + +/* Decodes delta frames */ +static void qpeg_decode_inter(uint8_t *src, uint8_t *dst, int size, + int stride, int width, int height, + int delta, uint8_t *ctable, uint8_t *refdata) +{ + int i, j; + int code; + int filled = 0; + uint8_t *blkdata; + + /* copy prev frame */ + for(i = 0; i < height; i++) + memcpy(refdata + (i * width), dst + (i * stride), width); + + blkdata = src - 0x86; + height--; + dst = dst + height * stride; + + while(size > 0) { + code = *src++; + size--; + + if(delta) { + /* motion compensation */ + while((code & 0xF0) == 0xF0) { + if(delta == 1) { + int me_idx; + int me_w, me_h, me_x, me_y; + uint8_t *me_plane; + int corr, val; + + /* get block size by index */ + me_idx = code & 0xF; + me_w = qpeg_table_w[me_idx]; + me_h = qpeg_table_h[me_idx]; + + /* extract motion vector */ + corr = *src++; + size--; + + val = corr >> 4; + if(val > 7) + val -= 16; + me_x = val; + + val = corr & 0xF; + if(val > 7) + val -= 16; + me_y = val; + + /* do motion compensation */ + me_plane = refdata + (filled + me_x) + (height - me_y) * width; + for(j = 0; j < me_h; j++) { + for(i = 0; i < me_w; i++) + dst[filled + i - (j * stride)] = me_plane[i - (j * width)]; + } + } + code = *src++; + size--; + } + } + + if(code == 0xE0) /* end-of-picture code */ + break; + if(code > 0xE0) { /* run code: 0xE1..0xFF */ + int p; + + code &= 0x1F; + p = *src++; + size--; + for(i = 0; i <= code; i++) { + dst[filled++] = p; + if(filled >= width) { + filled = 0; + dst -= stride; + height--; + } + } + } else if(code >= 0xC0) { /* copy code: 0xC0..0xDF */ + code &= 0x1F; + + for(i = 0; i <= code; i++) { + dst[filled++] = *src++; + if(filled >= width) { + filled = 0; + dst -= stride; + height--; + } + } + size -= code + 1; + } else if(code >= 0x80) { /* skip code: 0x80..0xBF */ + int skip; + + code &= 0x3F; + /* codes 0x80 and 0x81 are actually escape codes, + skip value minus constant is in the next byte */ + if(!code) + skip = (*src++) + 64; + else if(code == 1) + skip = (*src++) + 320; + else + skip = code; + filled += skip; + while( filled >= width) { + filled -= width; + dst -= stride; + height--; + } + } else { + /* zero code treated as one-pixel skip */ + if(code) + dst[filled++] = ctable[code & 0x7F]; + else + filled++; + if(filled >= width) { + filled = 0; + dst -= stride; + height--; + } + } + } +} + +static int decode_frame(AVCodecContext *avctx, + void *data, int *data_size, + uint8_t *buf, int buf_size) +{ + QpegContext * const a = avctx->priv_data; + AVFrame * const p= (AVFrame*)&a->pic; + uint8_t* outdata; + int delta; + + if(p->data[0]) + avctx->release_buffer(avctx, p); + + p->reference= 0; + if(avctx->get_buffer(avctx, p) < 0){ + av_log(avctx, AV_LOG_ERROR, "get_buffer() failed\n"); + return -1; + } + outdata = a->pic.data[0]; + if(buf[0x85] == 0x10) { + qpeg_decode_intra(buf+0x86, outdata, buf_size - 0x86, a->pic.linesize[0], avctx->width, avctx->height); + } else { + delta = buf[0x85]; + qpeg_decode_inter(buf+0x86, outdata, buf_size - 0x86, a->pic.linesize[0], avctx->width, avctx->height, delta, buf + 4, a->refdata); + } + + /* make the palette available on the way out */ + memcpy(a->pic.data[1], a->avctx->palctrl->palette, AVPALETTE_SIZE); + if (a->avctx->palctrl->palette_changed) { + a->pic.palette_has_changed = 1; + a->avctx->palctrl->palette_changed = 0; + } + + *data_size = sizeof(AVFrame); + *(AVFrame*)data = a->pic; + + return buf_size; +} + +static int decode_init(AVCodecContext *avctx){ + QpegContext * const a = avctx->priv_data; + + a->avctx = avctx; + avctx->pix_fmt= PIX_FMT_PAL8; + avctx->has_b_frames = 0; + a->pic.data[0] = NULL; + a->refdata = av_malloc(avctx->width * avctx->height); + + return 0; +} + +static int decode_end(AVCodecContext *avctx){ + QpegContext * const a = avctx->priv_data; + AVFrame * const p= (AVFrame*)&a->pic; + + if(p->data[0]) + avctx->release_buffer(avctx, p); + + av_free(a->refdata); + return 0; +} + +AVCodec qpeg_decoder = { + "qpeg", + CODEC_TYPE_VIDEO, + CODEC_ID_QPEG, + sizeof(QpegContext), + decode_init, + NULL, + decode_end, + decode_frame, + CODEC_CAP_DR1, +}; diff --git a/src/libffmpeg/libavcodec/qtrle.c b/src/libffmpeg/libavcodec/qtrle.c index 55fa98663..41e4120db 100644 --- a/src/libffmpeg/libavcodec/qtrle.c +++ b/src/libffmpeg/libavcodec/qtrle.c @@ -74,6 +74,92 @@ static void qtrle_decode_2bpp(QtrleContext *s) static void qtrle_decode_4bpp(QtrleContext *s) { + int stream_ptr; + int header; + int start_line; + int lines_to_change; + int rle_code; + int row_ptr, pixel_ptr; + int row_inc = s->frame.linesize[0]; + unsigned char pi1, pi2, pi3, pi4, pi5, pi6, pi7, pi8; /* 8 palette indices */ + unsigned char *rgb = s->frame.data[0]; + int pixel_limit = s->frame.linesize[0] * s->avctx->height; + + /* check if this frame is even supposed to change */ + if (s->size < 8) + return; + + /* start after the chunk size */ + stream_ptr = 4; + + /* fetch the header */ + CHECK_STREAM_PTR(2); + header = BE_16(&s->buf[stream_ptr]); + stream_ptr += 2; + + /* if a header is present, fetch additional decoding parameters */ + if (header & 0x0008) { + CHECK_STREAM_PTR(8); + start_line = BE_16(&s->buf[stream_ptr]); + stream_ptr += 4; + lines_to_change = BE_16(&s->buf[stream_ptr]); + stream_ptr += 4; + } else { + start_line = 0; + lines_to_change = s->avctx->height; + } + + row_ptr = row_inc * start_line; + while (lines_to_change--) { + CHECK_STREAM_PTR(2); + pixel_ptr = row_ptr + (8 * (s->buf[stream_ptr++] - 1)); + + while ((rle_code = (signed char)s->buf[stream_ptr++]) != -1) { + if (rle_code == 0) { + /* there's another skip code in the stream */ + CHECK_STREAM_PTR(1); + pixel_ptr += (8 * (s->buf[stream_ptr++] - 1)); + } else if (rle_code < 0) { + /* decode the run length code */ + rle_code = -rle_code; + /* get the next 4 bytes from the stream, treat them as palette + * indices, and output them rle_code times */ + CHECK_STREAM_PTR(4); + pi1 = ((s->buf[stream_ptr]) >> 4) & 0x0f; + pi2 = (s->buf[stream_ptr++]) & 0x0f; + pi3 = ((s->buf[stream_ptr]) >> 4) & 0x0f; + pi4 = (s->buf[stream_ptr++]) & 0x0f; + pi5 = ((s->buf[stream_ptr]) >> 4) & 0x0f; + pi6 = (s->buf[stream_ptr++]) & 0x0f; + pi7 = ((s->buf[stream_ptr]) >> 4) & 0x0f; + pi8 = (s->buf[stream_ptr++]) & 0x0f; + + CHECK_PIXEL_PTR(rle_code * 8); + + while (rle_code--) { + rgb[pixel_ptr++] = pi1; + rgb[pixel_ptr++] = pi2; + rgb[pixel_ptr++] = pi3; + rgb[pixel_ptr++] = pi4; + rgb[pixel_ptr++] = pi5; + rgb[pixel_ptr++] = pi6; + rgb[pixel_ptr++] = pi7; + rgb[pixel_ptr++] = pi8; + } + } else { + /* copy the same pixel directly to output 4 times */ + rle_code *= 4; + CHECK_STREAM_PTR(rle_code); + CHECK_PIXEL_PTR(rle_code*2); + + while (rle_code--) { + rgb[pixel_ptr++] = ((s->buf[stream_ptr]) >> 4) & 0x0f; + rgb[pixel_ptr++] = (s->buf[stream_ptr++]) & 0x0f; + } + } + } + row_ptr += row_inc; + } } static void qtrle_decode_8bpp(QtrleContext *s) @@ -444,10 +530,6 @@ static int qtrle_decode_frame(AVCodecContext *avctx, { QtrleContext *s = (QtrleContext *)avctx->priv_data; - /* no supplementary picture */ - if (buf_size == 0) - return 0; - s->buf = buf; s->size = buf_size; @@ -473,6 +555,12 @@ static int qtrle_decode_frame(AVCodecContext *avctx, case 4: case 36: qtrle_decode_4bpp(s); + /* make the palette available on the way out */ + memcpy(s->frame.data[1], s->avctx->palctrl->palette, AVPALETTE_SIZE); + if (s->avctx->palctrl->palette_changed) { + s->frame.palette_has_changed = 1; + s->avctx->palctrl->palette_changed = 0; + } break; case 8: diff --git a/src/libffmpeg/libavcodec/ra144.c b/src/libffmpeg/libavcodec/ra144.c index 65829b6a3..79cce2cef 100644 --- a/src/libffmpeg/libavcodec/ra144.c +++ b/src/libffmpeg/libavcodec/ra144.c @@ -130,7 +130,7 @@ static void do_voice(int *a1, int *a2) /* do quarter-block output */ -static void do_output_subblock(Real144_internal *glob, int x) +static void do_output_subblock(Real144_internal *glob, unsigned int x) { int a,b,c,d,e,f,g; diff --git a/src/libffmpeg/libavcodec/ra288.c b/src/libffmpeg/libavcodec/ra288.c index 09ecc7aac..4cff3106e 100644 --- a/src/libffmpeg/libavcodec/ra288.c +++ b/src/libffmpeg/libavcodec/ra288.c @@ -47,7 +47,7 @@ static void colmult(float *tgt, float *m1, const float *m2, int n); /* initial decode */ -static void unpack(unsigned short *tgt, unsigned char *src, int len) +static void unpack(unsigned short *tgt, unsigned char *src, unsigned int len) { int x,y,z; int n,temp; diff --git a/src/libffmpeg/libavcodec/rangecoder.c b/src/libffmpeg/libavcodec/rangecoder.c new file mode 100644 index 000000000..ba3022c45 --- /dev/null +++ b/src/libffmpeg/libavcodec/rangecoder.c @@ -0,0 +1,178 @@ +/* + * Range coder + * Copyright (c) 2004 Michael Niedermayer <michaelni@gmx.at> + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + * + */ + +/** + * @file rangecoder.c + * Range coder. + * based upon + * "Range encoding: an algorithm for removing redundancy from a digitised + * message. + * G. N. N. Martin Presented in March 1979 to the Video & + * Data Recording Conference, + * IBM UK Scientific Center held in Southampton July 24-27 1979." + * + */ + +#include <string.h> + +#include "avcodec.h" +#include "common.h" +#include "rangecoder.h" + + +void ff_init_range_encoder(RangeCoder *c, uint8_t *buf, int buf_size){ + c->bytestream_start= + c->bytestream= buf; + c->bytestream_end= buf + buf_size; + + c->low= 0; + c->range= 0xFF00; + c->outstanding_count= 0; + c->outstanding_byte= -1; +} + +void ff_init_range_decoder(RangeCoder *c, const uint8_t *buf, int buf_size){ + ff_init_range_encoder(c, buf, buf_size); + + c->low =(*c->bytestream++)<<8; + c->low+= *c->bytestream++; +} + +void ff_build_rac_states(RangeCoder *c, int factor, int max_p){ + const int64_t one= 1LL<<32; + int64_t p; + int last_p8, p8, i; + + memset(c->zero_state, 0, sizeof(c->zero_state)); + memset(c-> one_state, 0, sizeof(c-> one_state)); + +#if 0 + for(i=1; i<256; i++){ + if(c->one_state[i]) + continue; + + p= (i*one + 128) >> 8; + last_p8= i; + for(;;){ + p+= ((one-p)*factor + one/2) >> 32; + p8= (256*p + one/2) >> 32; //FIXME try without the one + if(p8 <= last_p8) p8= last_p8+1; + if(p8 > max_p) p8= max_p; + if(p8 < last_p8) + break; + c->one_state[last_p8]= p8; + if(p8 == last_p8) + break; + last_p8= p8; + } + } +#endif +#if 1 + last_p8= 0; + p= one/2; + for(i=0; i<128; i++){ + p8= (256*p + one/2) >> 32; //FIXME try without the one + if(p8 <= last_p8) p8= last_p8+1; + if(last_p8 && last_p8<256 && p8<=max_p) + c->one_state[last_p8]= p8; + + p+= ((one-p)*factor + one/2) >> 32; + last_p8= p8; + } +#endif + for(i=256-max_p; i<=max_p; i++){ + if(c->one_state[i]) + continue; + + p= (i*one + 128) >> 8; + p+= ((one-p)*factor + one/2) >> 32; + p8= (256*p + one/2) >> 32; //FIXME try without the one + if(p8 <= i) p8= i+1; + if(p8 > max_p) p8= max_p; + c->one_state[ i]= p8; + } + + for(i=0; i<256; i++) + c->zero_state[i]= 256-c->one_state[256-i]; +#if 0 + for(i=0; i<256; i++) + av_log(NULL, AV_LOG_DEBUG, "%3d %3d\n", i, c->one_state[i]); +#endif +} + +/** + * + * @return the number of bytes written + */ +int ff_rac_terminate(RangeCoder *c){ + c->range=0xFF; + c->low +=0xFF; + renorm_encoder(c); + c->range=0xFF; + renorm_encoder(c); + + assert(c->low == 0); + assert(c->range >= 0x100); + + return c->bytestream - c->bytestream_start; +} + +#if 0 //selftest +#define SIZE 10240 +int main(){ + RangeCoder c; + uint8_t b[9*SIZE]; + uint8_t r[9*SIZE]; + int i; + uint8_t state[10]= {0}; + + ff_init_range_encoder(&c, b, SIZE); + ff_build_rac_states(&c, 0.05*(1LL<<32), 128+64+32+16); + + memset(state, 128, sizeof(state)); + + for(i=0; i<SIZE; i++){ + r[i]= random()%7; + } + + + for(i=0; i<SIZE; i++){ +START_TIMER + put_rac(&c, state, r[i]&1); +STOP_TIMER("put_rac") + } + + ff_put_rac_terminate(&c); + + ff_init_range_decoder(&c, b, SIZE); + + memset(state, 128, sizeof(state)); + + for(i=0; i<SIZE; i++){ +START_TIMER + if( (r[i]&1) != get_rac(&c, state) ) + av_log(NULL, AV_LOG_DEBUG, "rac failure at %d\n", i); +STOP_TIMER("get_rac") + } + + return 0; +} + +#endif diff --git a/src/libffmpeg/libavcodec/rangecoder.h b/src/libffmpeg/libavcodec/rangecoder.h new file mode 100644 index 000000000..6fd7b43bf --- /dev/null +++ b/src/libffmpeg/libavcodec/rangecoder.h @@ -0,0 +1,125 @@ +/* + * Range coder + * Copyright (c) 2004 Michael Niedermayer <michaelni@gmx.at> + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + * + */ + +/** + * @file rangecoder.h + * Range coder. + */ + +typedef struct RangeCoder{ + int low; + int range; + int outstanding_count; + int outstanding_byte; + uint8_t zero_state[256]; + uint8_t one_state[256]; + uint8_t *bytestream_start; + uint8_t *bytestream; + uint8_t *bytestream_end; +}RangeCoder; + +void ff_init_range_encoder(RangeCoder *c, uint8_t *buf, int buf_size); +void ff_init_range_decoder(RangeCoder *c, const uint8_t *buf, int buf_size); +int ff_rac_terminate(RangeCoder *c); +void ff_build_rac_states(RangeCoder *c, int factor, int max_p); + +static inline void renorm_encoder(RangeCoder *c){ + //FIXME optimize + while(c->range < 0x100){ + if(c->outstanding_byte < 0){ + c->outstanding_byte= c->low>>8; + }else if(c->low <= 0xFF00){ + *c->bytestream++ = c->outstanding_byte; + for(;c->outstanding_count; c->outstanding_count--) + *c->bytestream++ = 0xFF; + c->outstanding_byte= c->low>>8; + }else if(c->low >= 0x10000){ + *c->bytestream++ = c->outstanding_byte + 1; + for(;c->outstanding_count; c->outstanding_count--) + *c->bytestream++ = 0x00; + c->outstanding_byte= (c->low>>8) & 0xFF; + }else{ + c->outstanding_count++; + } + + c->low = (c->low & 0xFF)<<8; + c->range <<= 8; + } +} + +static inline void put_rac(RangeCoder *c, uint8_t * const state, int bit){ + int range1= (c->range * (*state)) >> 8; + + assert(*state); + assert(range1 < c->range); + assert(range1 > 0); + if(!bit){ + c->range -= range1; + *state= c->zero_state[*state]; + }else{ + c->low += c->range - range1; + c->range = range1; + *state= c->one_state[*state]; + } + + renorm_encoder(c); +} + +static inline void refill(RangeCoder *c){ + if(c->range < 0x100){ + c->range <<= 8; + c->low <<= 8; + if(c->bytestream < c->bytestream_end) + c->low+= c->bytestream[0]; + c->bytestream++; + } +} + +static inline int get_rac(RangeCoder *c, uint8_t * const state){ + int range1= (c->range * (*state)) >> 8; + int attribute_unused one_mask; + + c->range -= range1; +#if 1 + if(c->low < c->range){ + *state= c->zero_state[*state]; + refill(c); + return 0; + }else{ + c->low -= c->range; + *state= c->one_state[*state]; + c->range = range1; + refill(c); + return 1; + } +#else + one_mask= (c->range - c->low-1)>>31; + + c->low -= c->range & one_mask; + c->range += (range1 - c->range) & one_mask; + + *state= c->zero_state[(*state) + (256&one_mask)]; + + refill(c); + + return one_mask&1; +#endif +} + diff --git a/src/libffmpeg/libavcodec/ratecontrol.c b/src/libffmpeg/libavcodec/ratecontrol.c index 473645def..19641d453 100644 --- a/src/libffmpeg/libavcodec/ratecontrol.c +++ b/src/libffmpeg/libavcodec/ratecontrol.c @@ -38,7 +38,7 @@ static int init_pass2(MpegEncContext *s); static double get_qscale(MpegEncContext *s, RateControlEntry *rce, double rate_factor, int frame_num); void ff_write_pass1_stats(MpegEncContext *s){ - sprintf(s->avctx->stats_out, "in:%d out:%d type:%d q:%d itex:%d ptex:%d mv:%d misc:%d fcode:%d bcode:%d mc-var:%d var:%d icount:%d;\n", + snprintf(s->avctx->stats_out, 256, "in:%d out:%d type:%d q:%d itex:%d ptex:%d mv:%d misc:%d fcode:%d bcode:%d mc-var:%d var:%d icount:%d;\n", s->current_picture_ptr->display_picture_number, s->current_picture_ptr->coded_picture_number, s->pict_type, s->current_picture.quality, s->i_tex_bits, s->p_tex_bits, s->mv_bits, s->misc_bits, s->f_code, s->b_code, s->current_picture.mc_mb_var_sum, s->current_picture.mb_var_sum, s->i_count); @@ -74,6 +74,8 @@ int ff_rate_control_init(MpegEncContext *s) p= strchr(p+1, ';'); } i+= s->max_b_frames; + if(i<=0 || i>=INT_MAX / sizeof(RateControlEntry)) + return -1; rcc->entry = (RateControlEntry*)av_mallocz(i*sizeof(RateControlEntry)); rcc->num_entries= i; @@ -499,13 +501,16 @@ static void adaptive_quantization(MpegEncContext *s, double q){ const float temp_cplx_masking= s->avctx->temporal_cplx_masking; const float spatial_cplx_masking = s->avctx->spatial_cplx_masking; const float p_masking = s->avctx->p_masking; + const float border_masking = s->avctx->border_masking; float bits_sum= 0.0; float cplx_sum= 0.0; float cplx_tab[s->mb_num]; float bits_tab[s->mb_num]; - const int qmin= s->avctx->lmin; - const int qmax= s->avctx->lmax; + const int qmin= s->avctx->mb_lmin; + const int qmax= s->avctx->mb_lmax; Picture * const pic= &s->current_picture; + const int mb_width = s->mb_width; + const int mb_height = s->mb_height; for(i=0; i<s->mb_num; i++){ const int mb_xy= s->mb_index2xy[i]; @@ -513,6 +518,10 @@ static void adaptive_quantization(MpegEncContext *s, double q){ float spat_cplx= sqrt(pic->mb_var[mb_xy]); const int lumi= pic->mb_mean[mb_xy]; float bits, cplx, factor; + int mb_x = mb_xy % s->mb_stride; + int mb_y = mb_xy / s->mb_stride; + int mb_distance; + float mb_factor = 0.0; #if 0 if(spat_cplx < q/3) spat_cplx= q/3; //FIXME finetune if(temp_cplx < q/3) temp_cplx= q/3; //FIXME finetune @@ -533,6 +542,23 @@ static void adaptive_quantization(MpegEncContext *s, double q){ factor*= (1.0 - (lumi-128)*(lumi-128)*lumi_masking); else factor*= (1.0 - (lumi-128)*(lumi-128)*dark_masking); + + if(mb_x < mb_width/5){ + mb_distance = mb_width/5 - mb_x; + mb_factor = (float)mb_distance / (float)(mb_width/5); + }else if(mb_x > 4*mb_width/5){ + mb_distance = mb_x - 4*mb_width/5; + mb_factor = (float)mb_distance / (float)(mb_width/5); + } + if(mb_y < mb_height/5){ + mb_distance = mb_height/5 - mb_y; + mb_factor = FFMAX(mb_factor, (float)mb_distance / (float)(mb_height/5)); + }else if(mb_y > 4*mb_height/5){ + mb_distance = mb_y - 4*mb_height/5; + mb_factor = FFMAX(mb_factor, (float)mb_distance / (float)(mb_height/5)); + } + + factor*= 1.0 - border_masking*mb_factor; if(factor<0.00001) factor= 0.00001; diff --git a/src/libffmpeg/libavcodec/rational.c b/src/libffmpeg/libavcodec/rational.c index ad085653a..7ccad9e38 100644 --- a/src/libffmpeg/libavcodec/rational.c +++ b/src/libffmpeg/libavcodec/rational.c @@ -31,21 +31,33 @@ #include "avcodec.h" #include "rational.h" +/** + * returns b*c. + */ AVRational av_mul_q(AVRational b, AVRational c){ av_reduce(&b.num, &b.den, b.num * (int64_t)c.num, b.den * (int64_t)c.den, INT_MAX); return b; } +/** + * returns b/c. + */ AVRational av_div_q(AVRational b, AVRational c){ av_reduce(&b.num, &b.den, b.num * (int64_t)c.den, b.den * (int64_t)c.num, INT_MAX); return b; } +/** + * returns b+c. + */ AVRational av_add_q(AVRational b, AVRational c){ av_reduce(&b.num, &b.den, b.num * (int64_t)c.den + c.num * (int64_t)b.den, b.den * (int64_t)c.den, INT_MAX); return b; } +/** + * returns b-c. + */ AVRational av_sub_q(AVRational b, AVRational c){ av_reduce(&b.num, &b.den, b.num * (int64_t)c.den - c.num * (int64_t)b.den, b.den * (int64_t)c.den, INT_MAX); return b; diff --git a/src/libffmpeg/libavcodec/rational.h b/src/libffmpeg/libavcodec/rational.h index d5fc77f1a..fcda759c4 100644 --- a/src/libffmpeg/libavcodec/rational.h +++ b/src/libffmpeg/libavcodec/rational.h @@ -27,19 +27,27 @@ #ifndef RATIONAL_H #define RATIONAL_H +/** + * Rational number num/den. + */ typedef struct AVRational{ - int num; - int den; + int num; ///< numerator + int den; ///< denominator } AVRational; +/** + * returns 0 if a==b, 1 if a>b and -1 if a<b. + */ static inline int av_cmp_q(AVRational a, AVRational b){ const int64_t tmp= a.num * (int64_t)b.den - b.num * (int64_t)a.den; - if (tmp < 0) return -1; - else if(tmp == 0) return 0; - else return 1; + if(tmp) return (tmp>>63)|1; + else return 0; } +/** + * converts the given AVRational to a double. + */ static inline double av_q2d(AVRational a){ return a.num / (double) a.den; } diff --git a/src/libffmpeg/libavcodec/raw.c b/src/libffmpeg/libavcodec/raw.c index 8c554c41c..957a809d8 100644 --- a/src/libffmpeg/libavcodec/raw.c +++ b/src/libffmpeg/libavcodec/raw.c @@ -47,7 +47,7 @@ const PixelFormatTag pixelFormatTags[] = { { PIX_FMT_YUV422, MKTAG('Y', '4', '2', '2') }, /* Packed formats */ - { PIX_FMT_YUV422, MKTAG('U', 'Y', 'V', 'Y') }, + { PIX_FMT_UYVY422, MKTAG('U', 'Y', 'V', 'Y') }, { PIX_FMT_GRAY8, MKTAG('G', 'R', 'E', 'Y') }, { -1, 0 }, @@ -64,7 +64,7 @@ static int findPixelFormat(unsigned int fourcc) return PIX_FMT_YUV420P; } -static unsigned int findFourCC(int fmt) +unsigned int avcodec_pix_fmt_to_codec_tag(enum PixelFormat fmt) { const PixelFormatTag * tags = pixelFormatTags; while (tags->pix_fmt >= 0) { @@ -83,6 +83,14 @@ static int raw_init_decoder(AVCodecContext *avctx) if (avctx->codec_tag) avctx->pix_fmt = findPixelFormat(avctx->codec_tag); + else if (avctx->bits_per_sample){ + switch(avctx->bits_per_sample){ + case 15: avctx->pix_fmt= PIX_FMT_RGB555; break; + case 16: avctx->pix_fmt= PIX_FMT_RGB565; break; + case 24: avctx->pix_fmt= PIX_FMT_BGR24 ; break; + case 32: avctx->pix_fmt= PIX_FMT_RGBA32; break; + } + } context->length = avpicture_get_size(avctx->pix_fmt, avctx->width, avctx->height); context->buffer = av_malloc(context->length); @@ -98,6 +106,13 @@ static int raw_init_decoder(AVCodecContext *avctx) return 0; } +static void flip(AVCodecContext *avctx, AVPicture * picture){ + if(!avctx->codec_tag && avctx->bits_per_sample && picture->linesize[1]==0){ + picture->data[0] += picture->linesize[0] * (avctx->height-1); + picture->linesize[0] *= -1; + } +} + static int raw_decode(AVCodecContext *avctx, void *data, int *data_size, uint8_t *buf, int buf_size) @@ -110,6 +125,7 @@ static int raw_decode(AVCodecContext *avctx, /* Early out without copy if packet size == frame size */ if (buf_size == context->length && context->p == context->buffer) { avpicture_fill(picture, buf, avctx->pix_fmt, avctx->width, avctx->height); + flip(avctx, picture); *data_size = sizeof(AVPicture); return buf_size; } @@ -124,6 +140,7 @@ static int raw_decode(AVCodecContext *avctx, memcpy(context->p, buf, bytesNeeded); context->p = context->buffer; avpicture_fill(picture, context->buffer, avctx->pix_fmt, avctx->width, avctx->height); + flip(avctx, picture); *data_size = sizeof(AVPicture); return bytesNeeded; } @@ -143,7 +160,8 @@ static int raw_init_encoder(AVCodecContext *avctx) avctx->coded_frame = (AVFrame *)avctx->priv_data; avctx->coded_frame->pict_type = FF_I_TYPE; avctx->coded_frame->key_frame = 1; - avctx->codec_tag = findFourCC(avctx->pix_fmt); + if(!avctx->codec_tag) + avctx->codec_tag = avcodec_pix_fmt_to_codec_tag(avctx->pix_fmt); return 0; } diff --git a/src/libffmpeg/libavcodec/rpza.c b/src/libffmpeg/libavcodec/rpza.c index 2be26346a..317c240b9 100644 --- a/src/libffmpeg/libavcodec/rpza.c +++ b/src/libffmpeg/libavcodec/rpza.c @@ -248,10 +248,6 @@ static int rpza_decode_frame(AVCodecContext *avctx, { RpzaContext *s = (RpzaContext *)avctx->priv_data; - /* no supplementary picture */ - if (buf_size == 0) - return 0; - s->buf = buf; s->size = buf_size; diff --git a/src/libffmpeg/libavcodec/rv10.c b/src/libffmpeg/libavcodec/rv10.c index 58c5db7f4..884be9c7c 100644 --- a/src/libffmpeg/libavcodec/rv10.c +++ b/src/libffmpeg/libavcodec/rv10.c @@ -258,6 +258,36 @@ void rv10_encode_picture_header(MpegEncContext *s, int picture_number) put_bits(&s->pb, 3, 0); /* ignored */ } +void rv20_encode_picture_header(MpegEncContext *s, int picture_number){ + put_bits(&s->pb, 2, s->pict_type); //I 0 vs. 1 ? + put_bits(&s->pb, 1, 0); /* unknown bit */ + put_bits(&s->pb, 5, s->qscale); + + put_bits(&s->pb, 8, picture_number&0xFF); //FIXME wrong, but correct is not known + s->mb_x= s->mb_y= 0; + ff_h263_encode_mba(s); + + put_bits(&s->pb, 1, s->no_rounding); + + assert(s->f_code == 1); + assert(s->unrestricted_mv == 1); +// assert(s->h263_aic== (s->pict_type == I_TYPE)); + assert(s->alt_inter_vlc == 0); + assert(s->umvplus == 0); + assert(s->modified_quant==1); + assert(s->loop_filter==1); + + s->h263_aic= s->pict_type == I_TYPE; + if(s->h263_aic){ + s->y_dc_scale_table= + s->c_dc_scale_table= ff_aic_dc_scale_table; + }else{ + s->y_dc_scale_table= + s->c_dc_scale_table= ff_mpeg1_dc_scale_table; + } +} + +#if 0 /* unused, remove? */ static int get_num(GetBitContext *gb) { int n, n1; @@ -270,15 +300,15 @@ static int get_num(GetBitContext *gb) return (n << 16) | n1; } } +#endif #endif //CONFIG_ENCODERS /* read RV 1.0 compatible frame header */ static int rv10_decode_picture_header(MpegEncContext *s) { - int mb_count, pb_frame, marker, full_frame, unk; + int mb_count, pb_frame, marker, unk, mb_xy; - full_frame= s->avctx->slice_count==1; //printf("ff:%d\n", full_frame); marker = get_bits(&s->gb, 1); @@ -321,7 +351,9 @@ static int rv10_decode_picture_header(MpegEncContext *s) } /* if multiple packets per frame are sent, the position at which to display the macro blocks is coded here */ - if ((!full_frame) || show_bits(&s->gb, 12)==0) { + + mb_xy= s->mb_x + s->mb_y*s->mb_width; + if(show_bits(&s->gb, 12)==0 || (mb_xy && mb_xy < s->mb_num)){ s->mb_x = get_bits(&s->gb, 6); /* mb_x */ s->mb_y = get_bits(&s->gb, 6); /* mb_y */ mb_count = get_bits(&s->gb, 12); @@ -342,6 +374,22 @@ static int rv20_decode_picture_header(MpegEncContext *s) { int seq, mb_pos, i; +#if 0 + GetBitContext gb= s->gb; + for(i=0; i<64; i++){ + av_log(s->avctx, AV_LOG_DEBUG, "%d", get_bits1(&gb)); + if(i%4==3) av_log(s->avctx, AV_LOG_DEBUG, " "); + } + av_log(s->avctx, AV_LOG_DEBUG, "\n"); +#endif +#if 0 + for(i=0; i<s->avctx->extradata_size; i++){ + av_log(s->avctx, AV_LOG_DEBUG, "%2X ", ((uint8_t*)s->avctx->extradata)[i]); + if(i%4==3) av_log(s->avctx, AV_LOG_DEBUG, " "); + } + av_log(s->avctx, AV_LOG_DEBUG, "\n"); +#endif + if(s->avctx->sub_id == 0x30202002 || s->avctx->sub_id == 0x30203002){ if (get_bits(&s->gb, 3)){ av_log(s->avctx, AV_LOG_ERROR, "unknown triplet set\n"); @@ -383,15 +431,23 @@ static int rv20_decode_picture_header(MpegEncContext *s) } if(s->avctx->has_b_frames){ + int f=9; + int v= s->avctx->extradata_size >= 4 ? ((uint8_t*)s->avctx->extradata)[1] : 0; + if (get_bits(&s->gb, 1)){ -// av_log(s->avctx, AV_LOG_ERROR, "unknown bit3 set\n"); + av_log(s->avctx, AV_LOG_ERROR, "unknown bit3 set\n"); // return -1; } - seq= get_bits(&s->gb, 15); - if (s->avctx->sub_id == 0x20201002 && get_bits(&s->gb, 1)){ - av_log(s->avctx, AV_LOG_ERROR, "unknown bit4 set\n"); -// return -1; + seq= get_bits(&s->gb, 14)<<1; + + if(v>1 || (s->avctx->sub_id < 0x20201002 && v>0)){ + f= get_bits(&s->gb, av_log2(v-1)+1); } + + if(s->avctx->debug & FF_DEBUG_PICT_INFO){ + av_log(s->avctx, AV_LOG_DEBUG, "F %d/%d\n", f, v); + } + mb_pos= get_bits(&s->gb, av_log2(s->mb_num-1)+1); s->mb_x= mb_pos % s->mb_width; s->mb_y= mb_pos / s->mb_width; @@ -482,6 +538,7 @@ static int rv10_decode_init(AVCodecContext *avctx) case 0x20001000: case 0x20100001: case 0x20101001: + case 0x20103001: s->low_delay=1; break; case 0x20200002: @@ -494,7 +551,11 @@ static int rv10_decode_init(AVCodecContext *avctx) default: av_log(s->avctx, AV_LOG_ERROR, "unknown header %X\n", avctx->sub_id); } -//printf("ver:%X\n", avctx->sub_id); + + if(avctx->debug & FF_DEBUG_PICT_INFO){ + av_log(avctx, AV_LOG_DEBUG, "ver:%X ver0:%X\n", avctx->sub_id, avctx->extradata_size >= 4 ? ((uint32_t*)avctx->extradata)[0] : -1); + } + if (MPV_common_init(s) < 0) return -1; @@ -504,10 +565,10 @@ static int rv10_decode_init(AVCodecContext *avctx) if (!done) { init_vlc(&rv_dc_lum, DC_VLC_BITS, 256, rv_lum_bits, 1, 1, - rv_lum_code, 2, 2); + rv_lum_code, 2, 2, 1); init_vlc(&rv_dc_chrom, DC_VLC_BITS, 256, rv_chrom_bits, 1, 1, - rv_chrom_code, 2, 2); + rv_chrom_code, 2, 2, 1); done = 1; } @@ -676,8 +737,8 @@ static int rv10_decode_frame(AVCodecContext *avctx, *pict= *(AVFrame*)&s->last_picture; ff_print_debug_info(s, pict); } - - *data_size = sizeof(AVFrame); + if(s->last_picture_ptr || s->low_delay) + *data_size = sizeof(AVFrame); } return buf_size; @@ -704,6 +765,7 @@ AVCodec rv20_decoder = { NULL, rv10_decode_end, rv10_decode_frame, - CODEC_CAP_DR1 + CODEC_CAP_DR1 | CODEC_CAP_DELAY, + .flush= ff_mpeg_flush, }; diff --git a/src/libffmpeg/libavcodec/shorten.c b/src/libffmpeg/libavcodec/shorten.c new file mode 100644 index 000000000..b523a9250 --- /dev/null +++ b/src/libffmpeg/libavcodec/shorten.c @@ -0,0 +1,521 @@ +/* + * Shorten decoder + * Copyright (c) 2005 Jeff Muizelaar + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +/** + * @file shorten.c + * Shorten decoder + * @author Jeff Muizelaar + * + */ + +#define DEBUG +#include <limits.h> +#include "avcodec.h" +#include "bitstream.h" +#include "golomb.h" + +#define MAX_CHANNELS 8 +#define MAX_BLOCKSIZE 65535 + +#define OUT_BUFFER_SIZE 16384 + +#define ULONGSIZE 2 + +#define WAVE_FORMAT_PCM 0x0001 + +#define DEFAULT_BLOCK_SIZE 256 + +#define TYPESIZE 4 +#define CHANSIZE 0 +#define LPCQSIZE 2 +#define ENERGYSIZE 3 +#define BITSHIFTSIZE 2 + +#define TYPE_S16HL 3 +#define TYPE_S16LH 5 + +#define NWRAP 3 +#define NSKIPSIZE 1 + +#define LPCQUANT 5 +#define V2LPCQOFFSET (1 << LPCQUANT) + +#define FNSIZE 2 +#define FN_DIFF0 0 +#define FN_DIFF1 1 +#define FN_DIFF2 2 +#define FN_DIFF3 3 +#define FN_QUIT 4 +#define FN_BLOCKSIZE 5 +#define FN_BITSHIFT 6 +#define FN_QLPC 7 +#define FN_ZERO 8 +#define FN_VERBATIM 9 + +#define VERBATIM_CKSIZE_SIZE 5 +#define VERBATIM_BYTE_SIZE 8 +#define CANONICAL_HEADER_SIZE 44 + +typedef struct ShortenContext { + AVCodecContext *avctx; + GetBitContext gb; + + int min_framesize, max_framesize; + int channels; + + int32_t *decoded[MAX_CHANNELS]; + int32_t *offset[MAX_CHANNELS]; + uint8_t *bitstream; + int bitstream_size; + int bitstream_index; + int allocated_bitstream_size; + int header_size; + uint8_t header[OUT_BUFFER_SIZE]; + int version; + int cur_chan; + int bitshift; + int nmean; + int internal_ftype; + int nwrap; + int blocksize; + int bitindex; + int32_t lpcqoffset; +} ShortenContext; + +static int shorten_decode_init(AVCodecContext * avctx) +{ + ShortenContext *s = avctx->priv_data; + s->avctx = avctx; + + return 0; +} + +static void allocate_buffers(ShortenContext *s) +{ + int i, chan; + for (chan=0; chan<s->channels; chan++) { + s->offset[chan] = av_realloc(s->offset[chan], sizeof(int32_t)*FFMAX(1, s->nmean)); + + s->decoded[chan] = av_realloc(s->decoded[chan], sizeof(int32_t)*(s->blocksize + s->nwrap)); + for (i=0; i<s->nwrap; i++) + s->decoded[chan][i] = 0; + s->decoded[chan] += s->nwrap; + + } +} + + +static inline unsigned int get_uint(ShortenContext *s, int k) +{ + if (s->version != 0) + k = get_ur_golomb_shorten(&s->gb, ULONGSIZE); + return get_ur_golomb_shorten(&s->gb, k); +} + + +static void fix_bitshift(ShortenContext *s, int32_t *buffer) +{ + int i; + + if (s->bitshift != 0) + for (i = 0; i < s->blocksize; i++) + buffer[s->nwrap + i] <<= s->bitshift; +} + + +static void init_offset(ShortenContext *s) +{ + int32_t mean = 0; + int chan, i; + int nblock = FFMAX(1, s->nmean); + /* initialise offset */ + switch (s->internal_ftype) + { + case TYPE_S16HL: + case TYPE_S16LH: + mean = 0; + break; + default: + av_log(s->avctx, AV_LOG_ERROR, "unknown audio type"); + abort(); + } + + for (chan = 0; chan < s->channels; chan++) + for (i = 0; i < nblock; i++) + s->offset[chan][i] = mean; +} + +static int inline get_le32(GetBitContext *gb) +{ + return bswap_32(get_bits_long(gb, 32)); +} + +static short inline get_le16(GetBitContext *gb) +{ + return bswap_16(get_bits_long(gb, 16)); +} + +static int decode_wave_header(AVCodecContext *avctx, uint8_t *header, int header_size) +{ + GetBitContext hb; + int len; + int chunk_size; + short wave_format; + + init_get_bits(&hb, header, header_size*8); + if (get_le32(&hb) != MKTAG('R','I','F','F')) { + av_log(avctx, AV_LOG_ERROR, "missing RIFF tag\n"); + return -1; + } + + chunk_size = get_le32(&hb); + + if (get_le32(&hb) != MKTAG('W','A','V','E')) { + av_log(avctx, AV_LOG_ERROR, "missing WAVE tag\n"); + return -1; + } + + while (get_le32(&hb) != MKTAG('f','m','t',' ')) { + len = get_le32(&hb); + skip_bits(&hb, 8*len); + } + len = get_le32(&hb); + + if (len < 16) { + av_log(avctx, AV_LOG_ERROR, "fmt chunk was too short\n"); + return -1; + } + + wave_format = get_le16(&hb); + + switch (wave_format) { + case WAVE_FORMAT_PCM: + break; + default: + av_log(avctx, AV_LOG_ERROR, "unsupported wave format\n"); + return -1; + } + + avctx->channels = get_le16(&hb); + avctx->sample_rate = get_le32(&hb); + avctx->bit_rate = get_le32(&hb) * 8; + avctx->block_align = get_le16(&hb); + avctx->bits_per_sample = get_le16(&hb); + + if (avctx->bits_per_sample != 16) { + av_log(avctx, AV_LOG_ERROR, "unsupported number of bits per sample\n"); + return -1; + } + + len -= 16; + if (len > 0) + av_log(avctx, AV_LOG_INFO, "%d header bytes unparsed\n", len); + + return 0; +} + +static int16_t * interleave_buffer(int16_t *samples, int nchan, int blocksize, int32_t **buffer) { + int i, chan; + for (i=0; i<blocksize; i++) + for (chan=0; chan < nchan; chan++) + *samples++ = FFMIN(buffer[chan][i], 32768); + return samples; +} + +static void decode_subframe_lpc(ShortenContext *s, int channel, int residual_size, int pred_order) +{ + int sum, i, j; + int coeffs[pred_order]; + + for (i=0; i<pred_order; i++) + coeffs[i] = get_sr_golomb_shorten(&s->gb, LPCQUANT); + + for (i=0; i < s->blocksize; i++) { + sum = s->lpcqoffset; + for (j=0; j<pred_order; j++) + sum += coeffs[j] * s->decoded[channel][i-j-1]; + s->decoded[channel][i] = get_sr_golomb_shorten(&s->gb, residual_size) + (sum >> LPCQUANT); + } +} + + +static int shorten_decode_frame(AVCodecContext *avctx, + void *data, int *data_size, + uint8_t *buf, int buf_size) +{ + ShortenContext *s = avctx->priv_data; + int i, input_buf_size = 0; + int16_t *samples = data; + if(s->max_framesize == 0){ + s->max_framesize= 1024; // should hopefully be enough for the first header + s->bitstream= av_fast_realloc(s->bitstream, &s->allocated_bitstream_size, s->max_framesize); + } + + if(1 && s->max_framesize){//FIXME truncated + buf_size= FFMIN(buf_size, s->max_framesize - s->bitstream_size); + input_buf_size= buf_size; + + if(s->bitstream_index + s->bitstream_size + buf_size > s->allocated_bitstream_size){ + // printf("memmove\n"); + memmove(s->bitstream, &s->bitstream[s->bitstream_index], s->bitstream_size); + s->bitstream_index=0; + } + memcpy(&s->bitstream[s->bitstream_index + s->bitstream_size], buf, buf_size); + buf= &s->bitstream[s->bitstream_index]; + buf_size += s->bitstream_size; + s->bitstream_size= buf_size; + + if(buf_size < s->max_framesize){ + //dprintf("wanna more data ... %d\n", buf_size); + return input_buf_size; + } + } + init_get_bits(&s->gb, buf, buf_size*8); + get_bits(&s->gb, s->bitindex); + if (!s->blocksize) + { + int maxnlpc = 0; + /* shorten signature */ + if (get_bits_long(&s->gb, 32) != bswap_32(ff_get_fourcc("ajkg"))) { + av_log(s->avctx, AV_LOG_ERROR, "missing shorten magic 'ajkg'\n"); + return -1; + } + + s->lpcqoffset = 0; + s->blocksize = DEFAULT_BLOCK_SIZE; + s->channels = 1; + s->nmean = -1; + s->version = get_bits(&s->gb, 8); + s->internal_ftype = get_uint(s, TYPESIZE); + + s->channels = get_uint(s, CHANSIZE); + if (s->channels > MAX_CHANNELS) { + av_log(s->avctx, AV_LOG_ERROR, "too many channels: %d\n", s->channels); + return -1; + } + + /* get blocksize if version > 0 */ + if (s->version > 0) { + int skip_bytes; + s->blocksize = get_uint(s, av_log2(DEFAULT_BLOCK_SIZE)); + maxnlpc = get_uint(s, LPCQSIZE); + s->nmean = get_uint(s, 0); + + skip_bytes = get_uint(s, NSKIPSIZE); + for (i=0; i<skip_bytes; i++) { + skip_bits(&s->gb, 8); + } + } + s->nwrap = FFMAX(NWRAP, maxnlpc); + + allocate_buffers(s); + + init_offset(s); + + if (s->version > 1) + s->lpcqoffset = V2LPCQOFFSET; + + if (get_ur_golomb_shorten(&s->gb, FNSIZE) != FN_VERBATIM) { + av_log(s->avctx, AV_LOG_ERROR, "missing verbatim section at begining of stream\n"); + return -1; + } + + s->header_size = get_ur_golomb_shorten(&s->gb, VERBATIM_CKSIZE_SIZE); + if (s->header_size >= OUT_BUFFER_SIZE || s->header_size < CANONICAL_HEADER_SIZE) { + av_log(s->avctx, AV_LOG_ERROR, "header is wrong size: %d\n", s->header_size); + return -1; + } + + for (i=0; i<s->header_size; i++) + s->header[i] = (char)get_ur_golomb_shorten(&s->gb, VERBATIM_BYTE_SIZE); + + if (decode_wave_header(avctx, s->header, s->header_size) < 0) + return -1; + + s->cur_chan = 0; + s->bitshift = 0; + } + else + { + int cmd; + int len; + cmd = get_ur_golomb_shorten(&s->gb, FNSIZE); + switch (cmd) { + case FN_ZERO: + case FN_DIFF0: + case FN_DIFF1: + case FN_DIFF2: + case FN_DIFF3: + case FN_QLPC: + { + int residual_size = 0; + int channel = s->cur_chan; + int32_t coffset; + if (cmd != FN_ZERO) { + residual_size = get_ur_golomb_shorten(&s->gb, ENERGYSIZE); + /* this is a hack as version 0 differed in defintion of get_sr_golomb_shorten */ + if (s->version == 0) + residual_size--; + } + + if (s->nmean == 0) + coffset = s->offset[channel][0]; + else { + int32_t sum = (s->version < 2) ? 0 : s->nmean / 2; + for (i=0; i<s->nmean; i++) + sum += s->offset[channel][i]; + coffset = sum / s->nmean; + if (s->version >= 2) + coffset >>= FFMIN(1, s->bitshift); + } + switch (cmd) { + case FN_ZERO: + for (i=0; i<s->blocksize; i++) + s->decoded[channel][i] = 0; + break; + case FN_DIFF0: + for (i=0; i<s->blocksize; i++) + s->decoded[channel][i] = get_sr_golomb_shorten(&s->gb, residual_size) + coffset; + break; + case FN_DIFF1: + for (i=0; i<s->blocksize; i++) + s->decoded[channel][i] = get_sr_golomb_shorten(&s->gb, residual_size) + s->decoded[channel][i - 1]; + break; + case FN_DIFF2: + for (i=0; i<s->blocksize; i++) + s->decoded[channel][i] = get_sr_golomb_shorten(&s->gb, residual_size) + 2*s->decoded[channel][i-1] + - s->decoded[channel][i-2]; + break; + case FN_DIFF3: + for (i=0; i<s->blocksize; i++) + s->decoded[channel][i] = get_sr_golomb_shorten(&s->gb, residual_size) + 3*s->decoded[channel][i-1] + - 3*s->decoded[channel][i-2] + + s->decoded[channel][i-3]; + break; + case FN_QLPC: + { + int pred_order = get_ur_golomb_shorten(&s->gb, LPCQSIZE); + for (i=0; i<pred_order; i++) + s->decoded[channel][i - pred_order] -= coffset; + decode_subframe_lpc(s, channel, residual_size, pred_order); + if (coffset != 0) + for (i=0; i < s->blocksize; i++) + s->decoded[channel][i] += coffset; + } + } + if (s->nmean > 0) { + int32_t sum = (s->version < 2) ? 0 : s->blocksize / 2; + for (i=0; i<s->blocksize; i++) + sum += s->decoded[channel][i]; + + for (i=1; i<s->nmean; i++) + s->offset[channel][i-1] = s->offset[channel][i]; + + if (s->version < 2) + s->offset[channel][s->nmean - 1] = sum / s->blocksize; + else + s->offset[channel][s->nmean - 1] = (sum / s->blocksize) << s->bitshift; + } + for (i=-s->nwrap; i<0; i++) + s->decoded[channel][i] = s->decoded[channel][i + s->blocksize]; + + fix_bitshift(s, s->decoded[channel]); + + s->cur_chan++; + if (s->cur_chan == s->channels) { + samples = interleave_buffer(samples, s->channels, s->blocksize, s->decoded); + s->cur_chan = 0; + goto frame_done; + } + break; + } + break; + case FN_VERBATIM: + len = get_ur_golomb_shorten(&s->gb, VERBATIM_CKSIZE_SIZE); + while (len--) { + get_ur_golomb_shorten(&s->gb, VERBATIM_BYTE_SIZE); + } + break; + case FN_BITSHIFT: + s->bitshift = get_ur_golomb_shorten(&s->gb, BITSHIFTSIZE); + break; + case FN_BLOCKSIZE: + s->blocksize = get_uint(s, av_log2(s->blocksize)); + break; + case FN_QUIT: + return buf_size; + break; + default: + av_log(avctx, AV_LOG_ERROR, "unknown shorten function %d\n", cmd); + return -1; + break; + } + } +frame_done: + *data_size = (int8_t *)samples - (int8_t *)data; + + // s->last_blocksize = s->blocksize; + s->bitindex = get_bits_count(&s->gb) - 8*((get_bits_count(&s->gb))/8); + i= (get_bits_count(&s->gb))/8; + if (i > buf_size) { + av_log(s->avctx, AV_LOG_ERROR, "overread: %d\n", i - buf_size); + s->bitstream_size=0; + s->bitstream_index=0; + return -1; + } + if (s->bitstream_size) { + s->bitstream_index += i; + s->bitstream_size -= i; + return input_buf_size; + } else + return i; +} + +static int shorten_decode_close(AVCodecContext *avctx) +{ + ShortenContext *s = avctx->priv_data; + int i; + + for (i = 0; i < s->channels; i++) { + s->decoded[i] -= s->nwrap; + av_freep(&s->decoded[i]); + av_freep(&s->offset[i]); + } + av_freep(&s->bitstream); + return 0; +} + +static void shorten_flush(AVCodecContext *avctx){ + ShortenContext *s = avctx->priv_data; + + s->bitstream_size= + s->bitstream_index= 0; +} + +AVCodec shorten_decoder = { + "shorten", + CODEC_TYPE_AUDIO, + CODEC_ID_SHORTEN, + sizeof(ShortenContext), + shorten_decode_init, + NULL, + shorten_decode_close, + shorten_decode_frame, + .flush= shorten_flush, +}; diff --git a/src/libffmpeg/libavcodec/smc.c b/src/libffmpeg/libavcodec/smc.c index e937b03c8..dbb5adef1 100644 --- a/src/libffmpeg/libavcodec/smc.c +++ b/src/libffmpeg/libavcodec/smc.c @@ -125,7 +125,7 @@ static void smc_decode_stream(SmcContext *s) chunk_size, s->size); chunk_size = s->size; - total_blocks = (s->avctx->width * s->avctx->height) / (4 * 4); + total_blocks = ((s->avctx->width + 3) / 4) * ((s->avctx->height + 3) / 4); /* traverse through the blocks */ while (total_blocks) { @@ -448,10 +448,6 @@ static int smc_decode_frame(AVCodecContext *avctx, { SmcContext *s = (SmcContext *)avctx->priv_data; - /* no supplementary picture */ - if (buf_size == 0) - return 0; - s->buf = buf; s->size = buf_size; diff --git a/src/libffmpeg/libavcodec/snow.c b/src/libffmpeg/libavcodec/snow.c new file mode 100644 index 000000000..9cfddfa95 --- /dev/null +++ b/src/libffmpeg/libavcodec/snow.c @@ -0,0 +1,3996 @@ +/* + * Copyright (C) 2004 Michael Niedermayer <michaelni@gmx.at> + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +#include "avcodec.h" +#include "common.h" +#include "dsputil.h" + +#include "rangecoder.h" +#define MID_STATE 128 + +#include "mpegvideo.h" + +#undef NDEBUG +#include <assert.h> + +#define MAX_DECOMPOSITIONS 8 +#define MAX_PLANES 4 +#define DWTELEM int +#define QSHIFT 5 +#define QROOT (1<<QSHIFT) +#define LOSSLESS_QLOG -128 +#define FRAC_BITS 8 + +static const int8_t quant3[256]={ + 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, +-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, +-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, +-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, +-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, +-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, +-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, +-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, +-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, 0, +}; +static const int8_t quant3b[256]={ + 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, +-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, +-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, +-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, +-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, +-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, +-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, +-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, +-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, +}; +static const int8_t quant3bA[256]={ + 0, 0, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, + 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, + 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, + 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, + 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, + 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, + 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, + 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, + 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, + 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, + 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, + 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, + 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, + 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, + 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, + 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, +}; +static const int8_t quant5[256]={ + 0, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, +-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2, +-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2, +-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2, +-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2, +-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2, +-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2, +-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2, +-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-1,-1,-1, +}; +static const int8_t quant7[256]={ + 0, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, + 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, + 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, + 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, + 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, + 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, +-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3, +-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3, +-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3, +-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3, +-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3, +-3,-3,-3,-3,-3,-3,-3,-3,-3,-2,-2,-2,-2,-2,-2,-2, +-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2, +-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-1,-1, +}; +static const int8_t quant9[256]={ + 0, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, 3, + 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, + 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, + 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, + 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, + 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, + 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, + 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, +-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4, +-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4, +-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4, +-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4, +-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4, +-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4, +-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-3,-3,-3,-3, +-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-2,-2,-2,-2,-1,-1, +}; +static const int8_t quant11[256]={ + 0, 1, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4, + 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, + 4, 4, 4, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, + 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, + 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, + 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, + 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, + 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, +-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5, +-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5, +-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5, +-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5, +-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5, +-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-4,-4, +-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4, +-4,-4,-4,-4,-4,-3,-3,-3,-3,-3,-3,-3,-2,-2,-2,-1, +}; +static const int8_t quant13[256]={ + 0, 1, 2, 2, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4, + 4, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, + 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, + 5, 5, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, + 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, + 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, + 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, + 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, +-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6, +-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6, +-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6, +-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6, +-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-5, +-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5, +-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5, +-4,-4,-4,-4,-4,-4,-4,-4,-4,-3,-3,-3,-3,-2,-2,-1, +}; + +#define LOG2_OBMC_MAX 6 +#define OBMC_MAX (1<<(LOG2_OBMC_MAX)) +#if 0 //64*cubic +static const uint8_t obmc32[1024]={ + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, + 0, 0, 0, 1, 1, 1, 1, 2, 2, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4, 3, 3, 3, 2, 2, 1, 1, 1, 1, 0, 0, 0, + 0, 0, 1, 1, 2, 2, 3, 4, 4, 5, 6, 6, 7, 7, 8, 8, 8, 8, 7, 7, 6, 6, 5, 4, 4, 3, 2, 2, 1, 1, 0, 0, + 0, 0, 1, 2, 2, 3, 4, 6, 7, 8, 9,10,11,12,12,12,12,12,12,11,10, 9, 8, 7, 6, 4, 3, 2, 2, 1, 0, 0, + 0, 1, 1, 2, 3, 5, 6, 8,10,11,13,14,15,16,17,18,18,17,16,15,14,13,11,10, 8, 6, 5, 3, 2, 1, 1, 0, + 0, 1, 1, 3, 4, 6, 8,10,13,15,17,19,20,22,22,23,23,22,22,20,19,17,15,13,10, 8, 6, 4, 3, 1, 1, 0, + 0, 1, 2, 4, 6, 8,10,13,16,19,21,23,25,27,28,29,29,28,27,25,23,21,19,16,13,10, 8, 6, 4, 2, 1, 0, + 0, 1, 2, 4, 7,10,13,16,19,22,25,28,31,33,34,35,35,34,33,31,28,25,22,19,16,13,10, 7, 4, 2, 1, 0, + 0, 1, 3, 5, 8,11,15,19,22,26,30,33,36,38,40,41,41,40,38,36,33,30,26,22,19,15,11, 8, 5, 3, 1, 0, + 0, 1, 3, 6, 9,12,17,21,25,30,34,38,41,44,45,46,46,45,44,41,38,34,30,25,21,17,12, 9, 6, 3, 1, 0, + 0, 1, 3, 6,10,14,19,23,28,33,38,42,45,48,51,52,52,51,48,45,42,38,33,28,23,19,14,10, 6, 3, 1, 0, + 0, 1, 4, 7,11,15,20,25,31,36,41,45,49,52,55,56,56,55,52,49,45,41,36,31,25,20,15,11, 7, 4, 1, 0, + 0, 2, 4, 7,12,16,22,27,33,38,44,48,52,56,58,60,60,58,56,52,48,44,38,33,27,22,16,12, 7, 4, 2, 0, + 0, 1, 4, 8,12,17,22,28,34,40,45,51,55,58,61,62,62,61,58,55,51,45,40,34,28,22,17,12, 8, 4, 1, 0, + 0, 2, 4, 8,12,18,23,29,35,41,46,52,56,60,62,64,64,62,60,56,52,46,41,35,29,23,18,12, 8, 4, 2, 0, + 0, 2, 4, 8,12,18,23,29,35,41,46,52,56,60,62,64,64,62,60,56,52,46,41,35,29,23,18,12, 8, 4, 2, 0, + 0, 1, 4, 8,12,17,22,28,34,40,45,51,55,58,61,62,62,61,58,55,51,45,40,34,28,22,17,12, 8, 4, 1, 0, + 0, 2, 4, 7,12,16,22,27,33,38,44,48,52,56,58,60,60,58,56,52,48,44,38,33,27,22,16,12, 7, 4, 2, 0, + 0, 1, 4, 7,11,15,20,25,31,36,41,45,49,52,55,56,56,55,52,49,45,41,36,31,25,20,15,11, 7, 4, 1, 0, + 0, 1, 3, 6,10,14,19,23,28,33,38,42,45,48,51,52,52,51,48,45,42,38,33,28,23,19,14,10, 6, 3, 1, 0, + 0, 1, 3, 6, 9,12,17,21,25,30,34,38,41,44,45,46,46,45,44,41,38,34,30,25,21,17,12, 9, 6, 3, 1, 0, + 0, 1, 3, 5, 8,11,15,19,22,26,30,33,36,38,40,41,41,40,38,36,33,30,26,22,19,15,11, 8, 5, 3, 1, 0, + 0, 1, 2, 4, 7,10,13,16,19,22,25,28,31,33,34,35,35,34,33,31,28,25,22,19,16,13,10, 7, 4, 2, 1, 0, + 0, 1, 2, 4, 6, 8,10,13,16,19,21,23,25,27,28,29,29,28,27,25,23,21,19,16,13,10, 8, 6, 4, 2, 1, 0, + 0, 1, 1, 3, 4, 6, 8,10,13,15,17,19,20,22,22,23,23,22,22,20,19,17,15,13,10, 8, 6, 4, 3, 1, 1, 0, + 0, 1, 1, 2, 3, 5, 6, 8,10,11,13,14,15,16,17,18,18,17,16,15,14,13,11,10, 8, 6, 5, 3, 2, 1, 1, 0, + 0, 0, 1, 2, 2, 3, 4, 6, 7, 8, 9,10,11,12,12,12,12,12,12,11,10, 9, 8, 7, 6, 4, 3, 2, 2, 1, 0, 0, + 0, 0, 1, 1, 2, 2, 3, 4, 4, 5, 6, 6, 7, 7, 8, 8, 8, 8, 7, 7, 6, 6, 5, 4, 4, 3, 2, 2, 1, 1, 0, 0, + 0, 0, 0, 1, 1, 1, 1, 2, 2, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4, 3, 3, 3, 2, 2, 1, 1, 1, 1, 0, 0, 0, + 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, +//error:0.000022 +}; +static const uint8_t obmc16[256]={ + 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, + 0, 1, 1, 2, 4, 5, 5, 6, 6, 5, 5, 4, 2, 1, 1, 0, + 0, 1, 4, 6, 9,11,13,15,15,13,11, 9, 6, 4, 1, 0, + 0, 2, 6,11,15,20,24,26,26,24,20,15,11, 6, 2, 0, + 0, 4, 9,15,23,29,34,38,38,34,29,23,15, 9, 4, 0, + 0, 5,11,20,29,38,45,49,49,45,38,29,20,11, 5, 0, + 1, 5,13,24,34,45,53,57,57,53,45,34,24,13, 5, 1, + 1, 6,15,26,38,49,57,62,62,57,49,38,26,15, 6, 1, + 1, 6,15,26,38,49,57,62,62,57,49,38,26,15, 6, 1, + 1, 5,13,24,34,45,53,57,57,53,45,34,24,13, 5, 1, + 0, 5,11,20,29,38,45,49,49,45,38,29,20,11, 5, 0, + 0, 4, 9,15,23,29,34,38,38,34,29,23,15, 9, 4, 0, + 0, 2, 6,11,15,20,24,26,26,24,20,15,11, 6, 2, 0, + 0, 1, 4, 6, 9,11,13,15,15,13,11, 9, 6, 4, 1, 0, + 0, 1, 1, 2, 4, 5, 5, 6, 6, 5, 5, 4, 2, 1, 1, 0, + 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, +//error:0.000033 +}; +#elif 1 // 64*linear +static const uint8_t obmc32[1024]={ + 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, + 0, 1, 1, 1, 2, 2, 2, 3, 3, 4, 4, 4, 5, 5, 5, 6, 6, 5, 5, 5, 4, 4, 4, 3, 3, 2, 2, 2, 1, 1, 1, 0, + 0, 1, 2, 2, 3, 3, 4, 5, 5, 6, 7, 7, 8, 8, 9,10,10, 9, 8, 8, 7, 7, 6, 5, 5, 4, 3, 3, 2, 2, 1, 0, + 0, 1, 2, 3, 4, 5, 6, 7, 7, 8, 9,10,11,12,13,14,14,13,12,11,10, 9, 8, 7, 7, 6, 5, 4, 3, 2, 1, 0, + 1, 2, 3, 4, 5, 6, 7, 8,10,11,12,13,14,15,16,17,17,16,15,14,13,12,11,10, 8, 7, 6, 5, 4, 3, 2, 1, + 1, 2, 3, 5, 6, 8, 9,10,12,13,14,16,17,19,20,21,21,20,19,17,16,14,13,12,10, 9, 8, 6, 5, 3, 2, 1, + 1, 2, 4, 6, 7, 9,11,12,14,15,17,19,20,22,24,25,25,24,22,20,19,17,15,14,12,11, 9, 7, 6, 4, 2, 1, + 1, 3, 5, 7, 8,10,12,14,16,18,20,22,23,25,27,29,29,27,25,23,22,20,18,16,14,12,10, 8, 7, 5, 3, 1, + 1, 3, 5, 7,10,12,14,16,18,20,22,24,27,29,31,33,33,31,29,27,24,22,20,18,16,14,12,10, 7, 5, 3, 1, + 1, 4, 6, 8,11,13,15,18,20,23,25,27,30,32,34,37,37,34,32,30,27,25,23,20,18,15,13,11, 8, 6, 4, 1, + 1, 4, 7, 9,12,14,17,20,22,25,28,30,33,35,38,41,41,38,35,33,30,28,25,22,20,17,14,12, 9, 7, 4, 1, + 1, 4, 7,10,13,16,19,22,24,27,30,33,36,39,42,45,45,42,39,36,33,30,27,24,22,19,16,13,10, 7, 4, 1, + 2, 5, 8,11,14,17,20,23,27,30,33,36,39,42,45,48,48,45,42,39,36,33,30,27,23,20,17,14,11, 8, 5, 2, + 2, 5, 8,12,15,19,22,25,29,32,35,39,42,46,49,52,52,49,46,42,39,35,32,29,25,22,19,15,12, 8, 5, 2, + 2, 5, 9,13,16,20,24,27,31,34,38,42,45,49,53,56,56,53,49,45,42,38,34,31,27,24,20,16,13, 9, 5, 2, + 2, 6,10,14,17,21,25,29,33,37,41,45,48,52,56,60,60,56,52,48,45,41,37,33,29,25,21,17,14,10, 6, 2, + 2, 6,10,14,17,21,25,29,33,37,41,45,48,52,56,60,60,56,52,48,45,41,37,33,29,25,21,17,14,10, 6, 2, + 2, 5, 9,13,16,20,24,27,31,34,38,42,45,49,53,56,56,53,49,45,42,38,34,31,27,24,20,16,13, 9, 5, 2, + 2, 5, 8,12,15,19,22,25,29,32,35,39,42,46,49,52,52,49,46,42,39,35,32,29,25,22,19,15,12, 8, 5, 2, + 2, 5, 8,11,14,17,20,23,27,30,33,36,39,42,45,48,48,45,42,39,36,33,30,27,23,20,17,14,11, 8, 5, 2, + 1, 4, 7,10,13,16,19,22,24,27,30,33,36,39,42,45,45,42,39,36,33,30,27,24,22,19,16,13,10, 7, 4, 1, + 1, 4, 7, 9,12,14,17,20,22,25,28,30,33,35,38,41,41,38,35,33,30,28,25,22,20,17,14,12, 9, 7, 4, 1, + 1, 4, 6, 8,11,13,15,18,20,23,25,27,30,32,34,37,37,34,32,30,27,25,23,20,18,15,13,11, 8, 6, 4, 1, + 1, 3, 5, 7,10,12,14,16,18,20,22,24,27,29,31,33,33,31,29,27,24,22,20,18,16,14,12,10, 7, 5, 3, 1, + 1, 3, 5, 7, 8,10,12,14,16,18,20,22,23,25,27,29,29,27,25,23,22,20,18,16,14,12,10, 8, 7, 5, 3, 1, + 1, 2, 4, 6, 7, 9,11,12,14,15,17,19,20,22,24,25,25,24,22,20,19,17,15,14,12,11, 9, 7, 6, 4, 2, 1, + 1, 2, 3, 5, 6, 8, 9,10,12,13,14,16,17,19,20,21,21,20,19,17,16,14,13,12,10, 9, 8, 6, 5, 3, 2, 1, + 1, 2, 3, 4, 5, 6, 7, 8,10,11,12,13,14,15,16,17,17,16,15,14,13,12,11,10, 8, 7, 6, 5, 4, 3, 2, 1, + 0, 1, 2, 3, 4, 5, 6, 7, 7, 8, 9,10,11,12,13,14,14,13,12,11,10, 9, 8, 7, 7, 6, 5, 4, 3, 2, 1, 0, + 0, 1, 2, 2, 3, 3, 4, 5, 5, 6, 7, 7, 8, 8, 9,10,10, 9, 8, 8, 7, 7, 6, 5, 5, 4, 3, 3, 2, 2, 1, 0, + 0, 1, 1, 1, 2, 2, 2, 3, 3, 4, 4, 4, 5, 5, 5, 6, 6, 5, 5, 5, 4, 4, 4, 3, 3, 2, 2, 2, 1, 1, 1, 0, + 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, + //error:0.000020 +}; +static const uint8_t obmc16[256]={ + 0, 1, 1, 2, 2, 3, 3, 4, 4, 3, 3, 2, 2, 1, 1, 0, + 1, 2, 4, 5, 7, 8,10,11,11,10, 8, 7, 5, 4, 2, 1, + 1, 4, 6, 9,11,14,16,19,19,16,14,11, 9, 6, 4, 1, + 2, 5, 9,12,16,19,23,26,26,23,19,16,12, 9, 5, 2, + 2, 7,11,16,20,25,29,34,34,29,25,20,16,11, 7, 2, + 3, 8,14,19,25,30,36,41,41,36,30,25,19,14, 8, 3, + 3,10,16,23,29,36,42,49,49,42,36,29,23,16,10, 3, + 4,11,19,26,34,41,49,56,56,49,41,34,26,19,11, 4, + 4,11,19,26,34,41,49,56,56,49,41,34,26,19,11, 4, + 3,10,16,23,29,36,42,49,49,42,36,29,23,16,10, 3, + 3, 8,14,19,25,30,36,41,41,36,30,25,19,14, 8, 3, + 2, 7,11,16,20,25,29,34,34,29,25,20,16,11, 7, 2, + 2, 5, 9,12,16,19,23,26,26,23,19,16,12, 9, 5, 2, + 1, 4, 6, 9,11,14,16,19,19,16,14,11, 9, 6, 4, 1, + 1, 2, 4, 5, 7, 8,10,11,11,10, 8, 7, 5, 4, 2, 1, + 0, 1, 1, 2, 2, 3, 3, 4, 4, 3, 3, 2, 2, 1, 1, 0, +//error:0.000015 +}; +#else //64*cos +static const uint8_t obmc32[1024]={ + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 2, 1, 1, 2, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 1, 1, 1, 1, 2, 2, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 3, 3, 3, 3, 2, 2, 1, 1, 1, 1, 0, 0, 0, + 0, 0, 1, 1, 1, 2, 2, 3, 4, 5, 5, 6, 7, 7, 7, 7, 7, 7, 7, 7, 6, 5, 5, 4, 3, 2, 2, 1, 1, 1, 0, 0, + 0, 0, 1, 1, 2, 3, 4, 5, 6, 7, 9,10,11,11,12,12,12,12,11,11,10, 9, 7, 6, 5, 4, 3, 2, 1, 1, 0, 0, + 0, 0, 1, 2, 3, 5, 6, 8, 9,11,12,14,15,16,17,17,17,17,16,15,14,12,11, 9, 8, 6, 5, 3, 2, 1, 0, 0, + 0, 1, 1, 2, 4, 6, 8,10,12,15,17,19,20,21,22,23,23,22,21,20,19,17,15,12,10, 8, 6, 4, 2, 1, 1, 0, + 0, 1, 2, 3, 5, 8,10,13,16,19,21,24,26,27,28,29,29,28,27,26,24,21,19,16,13,10, 8, 5, 3, 2, 1, 0, + 0, 1, 2, 4, 6, 9,12,16,19,23,26,29,31,33,34,35,35,34,33,31,29,26,23,19,16,12, 9, 6, 4, 2, 1, 0, + 0, 1, 3, 5, 7,11,15,19,23,26,30,34,37,39,40,41,41,40,39,37,34,30,26,23,19,15,11, 7, 5, 3, 1, 0, + 0, 1, 3, 5, 9,12,17,21,26,30,35,38,42,44,46,47,47,46,44,42,38,35,30,26,21,17,12, 9, 5, 3, 1, 0, + 0, 1, 3, 6, 9,14,19,24,29,34,38,43,46,49,51,52,52,51,49,46,43,38,34,29,24,19,14, 9, 6, 3, 1, 0, + 0, 1, 3, 6,11,15,20,26,31,37,42,46,50,53,56,57,57,56,53,50,46,42,37,31,26,20,15,11, 6, 3, 1, 0, + 0, 1, 3, 7,11,16,21,27,33,39,44,49,53,57,59,60,60,59,57,53,49,44,39,33,27,21,16,11, 7, 3, 1, 0, + 0, 1, 4, 7,12,17,22,28,34,40,46,51,56,59,61,63,63,61,59,56,51,46,40,34,28,22,17,12, 7, 4, 1, 0, + 0, 1, 4, 7,12,17,23,29,35,41,47,52,57,60,63,64,64,63,60,57,52,47,41,35,29,23,17,12, 7, 4, 1, 0, + 0, 1, 4, 7,12,17,23,29,35,41,47,52,57,60,63,64,64,63,60,57,52,47,41,35,29,23,17,12, 7, 4, 1, 0, + 0, 1, 4, 7,12,17,22,28,34,40,46,51,56,59,61,63,63,61,59,56,51,46,40,34,28,22,17,12, 7, 4, 1, 0, + 0, 1, 3, 7,11,16,21,27,33,39,44,49,53,57,59,60,60,59,57,53,49,44,39,33,27,21,16,11, 7, 3, 1, 0, + 0, 1, 3, 6,11,15,20,26,31,37,42,46,50,53,56,57,57,56,53,50,46,42,37,31,26,20,15,11, 6, 3, 1, 0, + 0, 1, 3, 6, 9,14,19,24,29,34,38,43,46,49,51,52,52,51,49,46,43,38,34,29,24,19,14, 9, 6, 3, 1, 0, + 0, 1, 3, 5, 9,12,17,21,26,30,35,38,42,44,46,47,47,46,44,42,38,35,30,26,21,17,12, 9, 5, 3, 1, 0, + 0, 1, 3, 5, 7,11,15,19,23,26,30,34,37,39,40,41,41,40,39,37,34,30,26,23,19,15,11, 7, 5, 3, 1, 0, + 0, 1, 2, 4, 6, 9,12,16,19,23,26,29,31,33,34,35,35,34,33,31,29,26,23,19,16,12, 9, 6, 4, 2, 1, 0, + 0, 1, 2, 3, 5, 8,10,13,16,19,21,24,26,27,28,29,29,28,27,26,24,21,19,16,13,10, 8, 5, 3, 2, 1, 0, + 0, 1, 1, 2, 4, 6, 8,10,12,15,17,19,20,21,22,23,23,22,21,20,19,17,15,12,10, 8, 6, 4, 2, 1, 1, 0, + 0, 0, 1, 2, 3, 5, 6, 8, 9,11,12,14,15,16,17,17,17,17,16,15,14,12,11, 9, 8, 6, 5, 3, 2, 1, 0, 0, + 0, 0, 1, 1, 2, 3, 4, 5, 6, 7, 9,10,11,11,12,12,12,12,11,11,10, 9, 7, 6, 5, 4, 3, 2, 1, 1, 0, 0, + 0, 0, 1, 1, 1, 2, 2, 3, 4, 5, 5, 6, 7, 7, 7, 7, 7, 7, 7, 7, 6, 5, 5, 4, 3, 2, 2, 1, 1, 1, 0, 0, + 0, 0, 0, 1, 1, 1, 1, 2, 2, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 3, 3, 3, 3, 2, 2, 1, 1, 1, 1, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 2, 1, 1, 2, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, +//error:0.000022 +}; +static const uint8_t obmc16[256]={ + 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, + 0, 0, 1, 2, 3, 4, 5, 5, 5, 5, 4, 3, 2, 1, 0, 0, + 0, 1, 3, 6, 8,11,13,14,14,13,11, 8, 6, 3, 1, 0, + 0, 2, 6,10,15,20,24,26,26,24,20,15,10, 6, 2, 0, + 0, 3, 8,16,23,30,35,38,38,35,30,23,16, 8, 3, 0, + 1, 4,11,20,30,39,46,49,49,46,39,30,20,11, 4, 1, + 1, 5,13,24,35,46,54,58,58,54,46,35,24,13, 5, 1, + 0, 5,14,26,38,49,58,63,63,58,49,38,26,14, 5, 0, + 0, 5,14,26,38,49,58,63,63,58,49,38,26,14, 5, 0, + 1, 5,13,24,35,46,54,58,58,54,46,35,24,13, 5, 1, + 1, 4,11,20,30,39,46,49,49,46,39,30,20,11, 4, 1, + 0, 3, 8,16,23,30,35,38,38,35,30,23,16, 8, 3, 0, + 0, 2, 6,10,15,20,24,26,26,24,20,15,10, 6, 2, 0, + 0, 1, 3, 6, 8,11,13,14,14,13,11, 8, 6, 3, 1, 0, + 0, 0, 1, 2, 3, 4, 5, 5, 5, 5, 4, 3, 2, 1, 0, 0, + 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, +//error:0.000022 +}; +#endif + +//linear *64 +static const uint8_t obmc8[64]={ + 1, 3, 5, 7, 7, 5, 3, 1, + 3, 9,15,21,21,15, 9, 3, + 5,15,25,35,35,25,15, 5, + 7,21,35,49,49,35,21, 7, + 7,21,35,49,49,35,21, 7, + 5,15,25,35,35,25,15, 5, + 3, 9,15,21,21,15, 9, 3, + 1, 3, 5, 7, 7, 5, 3, 1, +//error:0.000000 +}; + +//linear *64 +static const uint8_t obmc4[16]={ + 4,12,12, 4, +12,36,36,12, +12,36,36,12, + 4,12,12, 4, +//error:0.000000 +}; + +static const uint8_t *obmc_tab[4]={ + obmc32, obmc16, obmc8, obmc4 +}; + +typedef struct BlockNode{ + int16_t mx; + int16_t my; + uint8_t color[3]; + uint8_t type; +//#define TYPE_SPLIT 1 +#define BLOCK_INTRA 1 +//#define TYPE_NOCOLOR 4 + uint8_t level; //FIXME merge into type? +}BlockNode; + +#define LOG2_MB_SIZE 4 +#define MB_SIZE (1<<LOG2_MB_SIZE) + +typedef struct x_and_coeff{ + int16_t x; + uint16_t coeff; +} x_and_coeff; + +typedef struct SubBand{ + int level; + int stride; + int width; + int height; + int qlog; ///< log(qscale)/log[2^(1/6)] + DWTELEM *buf; + int buf_x_offset; + int buf_y_offset; + int stride_line; ///< Stride measured in lines, not pixels. + x_and_coeff * x_coeff; + struct SubBand *parent; + uint8_t state[/*7*2*/ 7 + 512][32]; +}SubBand; + +typedef struct Plane{ + int width; + int height; + SubBand band[MAX_DECOMPOSITIONS][4]; +}Plane; + +/** Used to minimize the amount of memory used in order to optimize cache performance. **/ +typedef struct { + DWTELEM * * line; ///< For use by idwt and predict_slices. + DWTELEM * * data_stack; ///< Used for internal purposes. + int data_stack_top; + int line_count; + int line_width; + int data_count; + DWTELEM * base_buffer; ///< Buffer that this structure is caching. +} slice_buffer; + +typedef struct SnowContext{ +// MpegEncContext m; // needed for motion estimation, should not be used for anything else, the idea is to make the motion estimation eventually independant of MpegEncContext, so this will be removed then (FIXME/XXX) + + AVCodecContext *avctx; + RangeCoder c; + DSPContext dsp; + AVFrame input_picture; + AVFrame current_picture; + AVFrame last_picture; + AVFrame mconly_picture; +// uint8_t q_context[16]; + uint8_t header_state[32]; + uint8_t block_state[128 + 32*128]; + int keyframe; + int always_reset; + int version; + int spatial_decomposition_type; + int temporal_decomposition_type; + int spatial_decomposition_count; + int temporal_decomposition_count; + DWTELEM *spatial_dwt_buffer; + int colorspace_type; + int chroma_h_shift; + int chroma_v_shift; + int spatial_scalability; + int qlog; + int lambda; + int lambda2; + int mv_scale; + int qbias; +#define QBIAS_SHIFT 3 + int b_width; + int b_height; + int block_max_depth; + Plane plane[MAX_PLANES]; + BlockNode *block; + slice_buffer sb; + + MpegEncContext m; // needed for motion estimation, should not be used for anything else, the idea is to make the motion estimation eventually independant of MpegEncContext, so this will be removed then (FIXME/XXX) +}SnowContext; + +typedef struct { + DWTELEM *b0; + DWTELEM *b1; + DWTELEM *b2; + DWTELEM *b3; + int y; +} dwt_compose_t; + +#define slice_buffer_get_line(slice_buf, line_num) ((slice_buf)->line[line_num] ? (slice_buf)->line[line_num] : slice_buffer_load_line((slice_buf), (line_num))) +//#define slice_buffer_get_line(slice_buf, line_num) (slice_buffer_load_line((slice_buf), (line_num))) + +static void slice_buffer_init(slice_buffer * buf, int line_count, int max_allocated_lines, int line_width, DWTELEM * base_buffer) +{ + int i; + + buf->base_buffer = base_buffer; + buf->line_count = line_count; + buf->line_width = line_width; + buf->data_count = max_allocated_lines; + buf->line = (DWTELEM * *) av_mallocz (sizeof(DWTELEM *) * line_count); + buf->data_stack = (DWTELEM * *) av_malloc (sizeof(DWTELEM *) * max_allocated_lines); + + for (i = 0; i < max_allocated_lines; i++) + { + buf->data_stack[i] = (DWTELEM *) av_malloc (sizeof(DWTELEM) * line_width); + } + + buf->data_stack_top = max_allocated_lines - 1; +} + +static DWTELEM * slice_buffer_load_line(slice_buffer * buf, int line) +{ + int i; + int offset; + DWTELEM * buffer; + +// av_log(NULL, AV_LOG_DEBUG, "Cache hit: %d\n", line); + + assert(buf->data_stack_top >= 0); +// assert(!buf->line[line]); + if (buf->line[line]) + return buf->line[line]; + + offset = buf->line_width * line; + buffer = buf->data_stack[buf->data_stack_top]; + buf->data_stack_top--; + buf->line[line] = buffer; + +// av_log(NULL, AV_LOG_DEBUG, "slice_buffer_load_line: line: %d remaining: %d\n", line, buf->data_stack_top + 1); + + return buffer; +} + +static void slice_buffer_release(slice_buffer * buf, int line) +{ + int i; + int offset; + DWTELEM * buffer; + + assert(line >= 0 && line < buf->line_count); + assert(buf->line[line]); + + offset = buf->line_width * line; + buffer = buf->line[line]; + buf->data_stack_top++; + buf->data_stack[buf->data_stack_top] = buffer; + buf->line[line] = NULL; + +// av_log(NULL, AV_LOG_DEBUG, "slice_buffer_release: line: %d remaining: %d\n", line, buf->data_stack_top + 1); +} + +static void slice_buffer_flush(slice_buffer * buf) +{ + int i; + for (i = 0; i < buf->line_count; i++) + { + if (buf->line[i]) + { +// av_log(NULL, AV_LOG_DEBUG, "slice_buffer_flush: line: %d \n", i); + slice_buffer_release(buf, i); + } + } +} + +static void slice_buffer_destroy(slice_buffer * buf) +{ + int i; + slice_buffer_flush(buf); + + for (i = buf->data_count - 1; i >= 0; i--) + { + assert(buf->data_stack[i]); + av_free(buf->data_stack[i]); + } + assert(buf->data_stack); + av_free(buf->data_stack); + assert(buf->line); + av_free(buf->line); +} + +#ifdef __sgi +// Avoid a name clash on SGI IRIX +#undef qexp +#endif +#define QEXPSHIFT (7-FRAC_BITS+8) //FIXME try to change this to 0 +static uint8_t qexp[QROOT]; + +static inline int mirror(int v, int m){ + if (v<0) return -v; + else if(v>m) return 2*m-v; + else return v; +} + +static inline void put_symbol(RangeCoder *c, uint8_t *state, int v, int is_signed){ + int i; + + if(v){ + const int a= ABS(v); + const int e= av_log2(a); +#if 1 + const int el= FFMIN(e, 10); + put_rac(c, state+0, 0); + + for(i=0; i<el; i++){ + put_rac(c, state+1+i, 1); //1..10 + } + for(; i<e; i++){ + put_rac(c, state+1+9, 1); //1..10 + } + put_rac(c, state+1+FFMIN(i,9), 0); + + for(i=e-1; i>=el; i--){ + put_rac(c, state+22+9, (a>>i)&1); //22..31 + } + for(; i>=0; i--){ + put_rac(c, state+22+i, (a>>i)&1); //22..31 + } + + if(is_signed) + put_rac(c, state+11 + el, v < 0); //11..21 +#else + + put_rac(c, state+0, 0); + if(e<=9){ + for(i=0; i<e; i++){ + put_rac(c, state+1+i, 1); //1..10 + } + put_rac(c, state+1+i, 0); + + for(i=e-1; i>=0; i--){ + put_rac(c, state+22+i, (a>>i)&1); //22..31 + } + + if(is_signed) + put_rac(c, state+11 + e, v < 0); //11..21 + }else{ + for(i=0; i<e; i++){ + put_rac(c, state+1+FFMIN(i,9), 1); //1..10 + } + put_rac(c, state+1+FFMIN(i,9), 0); + + for(i=e-1; i>=0; i--){ + put_rac(c, state+22+FFMIN(i,9), (a>>i)&1); //22..31 + } + + if(is_signed) + put_rac(c, state+11 + FFMIN(e,10), v < 0); //11..21 + } +#endif + }else{ + put_rac(c, state+0, 1); + } +} + +static inline int get_symbol(RangeCoder *c, uint8_t *state, int is_signed){ + if(get_rac(c, state+0)) + return 0; + else{ + int i, e, a; + e= 0; + while(get_rac(c, state+1 + FFMIN(e,9))){ //1..10 + e++; + } + + a= 1; + for(i=e-1; i>=0; i--){ + a += a + get_rac(c, state+22 + FFMIN(i,9)); //22..31 + } + + if(is_signed && get_rac(c, state+11 + FFMIN(e,10))) //11..21 + return -a; + else + return a; + } +} + +static inline void put_symbol2(RangeCoder *c, uint8_t *state, int v, int log2){ + int i; + int r= log2>=0 ? 1<<log2 : 1; + + assert(v>=0); + assert(log2>=-4); + + while(v >= r){ + put_rac(c, state+4+log2, 1); + v -= r; + log2++; + if(log2>0) r+=r; + } + put_rac(c, state+4+log2, 0); + + for(i=log2-1; i>=0; i--){ + put_rac(c, state+31-i, (v>>i)&1); + } +} + +static inline int get_symbol2(RangeCoder *c, uint8_t *state, int log2){ + int i; + int r= log2>=0 ? 1<<log2 : 1; + int v=0; + + assert(log2>=-4); + + while(get_rac(c, state+4+log2)){ + v+= r; + log2++; + if(log2>0) r+=r; + } + + for(i=log2-1; i>=0; i--){ + v+= get_rac(c, state+31-i)<<i; + } + + return v; +} + +static always_inline void lift(DWTELEM *dst, DWTELEM *src, DWTELEM *ref, int dst_step, int src_step, int ref_step, int width, int mul, int add, int shift, int highpass, int inverse){ + const int mirror_left= !highpass; + const int mirror_right= (width&1) ^ highpass; + const int w= (width>>1) - 1 + (highpass & width); + int i; + +#define LIFT(src, ref, inv) ((src) + ((inv) ? - (ref) : + (ref))) + if(mirror_left){ + dst[0] = LIFT(src[0], ((mul*2*ref[0]+add)>>shift), inverse); + dst += dst_step; + src += src_step; + } + + for(i=0; i<w; i++){ + dst[i*dst_step] = LIFT(src[i*src_step], ((mul*(ref[i*ref_step] + ref[(i+1)*ref_step])+add)>>shift), inverse); + } + + if(mirror_right){ + dst[w*dst_step] = LIFT(src[w*src_step], ((mul*2*ref[w*ref_step]+add)>>shift), inverse); + } +} + +static always_inline void lift5(DWTELEM *dst, DWTELEM *src, DWTELEM *ref, int dst_step, int src_step, int ref_step, int width, int mul, int add, int shift, int highpass, int inverse){ + const int mirror_left= !highpass; + const int mirror_right= (width&1) ^ highpass; + const int w= (width>>1) - 1 + (highpass & width); + int i; + + if(mirror_left){ + int r= 3*2*ref[0]; + r += r>>4; + r += r>>8; + dst[0] = LIFT(src[0], ((r+add)>>shift), inverse); + dst += dst_step; + src += src_step; + } + + for(i=0; i<w; i++){ + int r= 3*(ref[i*ref_step] + ref[(i+1)*ref_step]); + r += r>>4; + r += r>>8; + dst[i*dst_step] = LIFT(src[i*src_step], ((r+add)>>shift), inverse); + } + + if(mirror_right){ + int r= 3*2*ref[w*ref_step]; + r += r>>4; + r += r>>8; + dst[w*dst_step] = LIFT(src[w*src_step], ((r+add)>>shift), inverse); + } +} + +static always_inline void liftS(DWTELEM *dst, DWTELEM *src, DWTELEM *ref, int dst_step, int src_step, int ref_step, int width, int mul, int add, int shift, int highpass, int inverse){ + const int mirror_left= !highpass; + const int mirror_right= (width&1) ^ highpass; + const int w= (width>>1) - 1 + (highpass & width); + int i; + + assert(shift == 4); +#define LIFTS(src, ref, inv) ((inv) ? (src) - (((ref) - 4*(src))>>shift): (16*4*(src) + 4*(ref) + 8 + (5<<27))/(5*16) - (1<<23)) + if(mirror_left){ + dst[0] = LIFTS(src[0], mul*2*ref[0]+add, inverse); + dst += dst_step; + src += src_step; + } + + for(i=0; i<w; i++){ + dst[i*dst_step] = LIFTS(src[i*src_step], mul*(ref[i*ref_step] + ref[(i+1)*ref_step])+add, inverse); + } + + if(mirror_right){ + dst[w*dst_step] = LIFTS(src[w*src_step], mul*2*ref[w*ref_step]+add, inverse); + } +} + + +static void inplace_lift(DWTELEM *dst, int width, int *coeffs, int n, int shift, int start, int inverse){ + int x, i; + + for(x=start; x<width; x+=2){ + int64_t sum=0; + + for(i=0; i<n; i++){ + int x2= x + 2*i - n + 1; + if (x2< 0) x2= -x2; + else if(x2>=width) x2= 2*width-x2-2; + sum += coeffs[i]*(int64_t)dst[x2]; + } + if(inverse) dst[x] -= (sum + (1<<shift)/2)>>shift; + else dst[x] += (sum + (1<<shift)/2)>>shift; + } +} + +static void inplace_liftV(DWTELEM *dst, int width, int height, int stride, int *coeffs, int n, int shift, int start, int inverse){ + int x, y, i; + for(y=start; y<height; y+=2){ + for(x=0; x<width; x++){ + int64_t sum=0; + + for(i=0; i<n; i++){ + int y2= y + 2*i - n + 1; + if (y2< 0) y2= -y2; + else if(y2>=height) y2= 2*height-y2-2; + sum += coeffs[i]*(int64_t)dst[x + y2*stride]; + } + if(inverse) dst[x + y*stride] -= (sum + (1<<shift)/2)>>shift; + else dst[x + y*stride] += (sum + (1<<shift)/2)>>shift; + } + } +} + +#define SCALEX 1 +#define LX0 0 +#define LX1 1 + +#if 0 // more accurate 9/7 +#define N1 2 +#define SHIFT1 14 +#define COEFFS1 (int[]){-25987,-25987} +#define N2 2 +#define SHIFT2 19 +#define COEFFS2 (int[]){-27777,-27777} +#define N3 2 +#define SHIFT3 15 +#define COEFFS3 (int[]){28931,28931} +#define N4 2 +#define SHIFT4 15 +#define COEFFS4 (int[]){14533,14533} +#elif 1 // 13/7 CRF +#define N1 4 +#define SHIFT1 4 +#define COEFFS1 (int[]){1,-9,-9,1} +#define N2 4 +#define SHIFT2 4 +#define COEFFS2 (int[]){-1,5,5,-1} +#define N3 0 +#define SHIFT3 1 +#define COEFFS3 NULL +#define N4 0 +#define SHIFT4 1 +#define COEFFS4 NULL +#elif 1 // 3/5 +#define LX0 1 +#define LX1 0 +#define SCALEX 0.5 +#define N1 2 +#define SHIFT1 1 +#define COEFFS1 (int[]){1,1} +#define N2 2 +#define SHIFT2 2 +#define COEFFS2 (int[]){-1,-1} +#define N3 0 +#define SHIFT3 0 +#define COEFFS3 NULL +#define N4 0 +#define SHIFT4 0 +#define COEFFS4 NULL +#elif 1 // 11/5 +#define N1 0 +#define SHIFT1 1 +#define COEFFS1 NULL +#define N2 2 +#define SHIFT2 2 +#define COEFFS2 (int[]){-1,-1} +#define N3 2 +#define SHIFT3 0 +#define COEFFS3 (int[]){-1,-1} +#define N4 4 +#define SHIFT4 7 +#define COEFFS4 (int[]){-5,29,29,-5} +#define SCALEX 4 +#elif 1 // 9/7 CDF +#define N1 2 +#define SHIFT1 7 +#define COEFFS1 (int[]){-203,-203} +#define N2 2 +#define SHIFT2 12 +#define COEFFS2 (int[]){-217,-217} +#define N3 2 +#define SHIFT3 7 +#define COEFFS3 (int[]){113,113} +#define N4 2 +#define SHIFT4 9 +#define COEFFS4 (int[]){227,227} +#define SCALEX 1 +#elif 1 // 7/5 CDF +#define N1 0 +#define SHIFT1 1 +#define COEFFS1 NULL +#define N2 2 +#define SHIFT2 2 +#define COEFFS2 (int[]){-1,-1} +#define N3 2 +#define SHIFT3 0 +#define COEFFS3 (int[]){-1,-1} +#define N4 2 +#define SHIFT4 4 +#define COEFFS4 (int[]){3,3} +#elif 1 // 9/7 MN +#define N1 4 +#define SHIFT1 4 +#define COEFFS1 (int[]){1,-9,-9,1} +#define N2 2 +#define SHIFT2 2 +#define COEFFS2 (int[]){1,1} +#define N3 0 +#define SHIFT3 1 +#define COEFFS3 NULL +#define N4 0 +#define SHIFT4 1 +#define COEFFS4 NULL +#else // 13/7 CRF +#define N1 4 +#define SHIFT1 4 +#define COEFFS1 (int[]){1,-9,-9,1} +#define N2 4 +#define SHIFT2 4 +#define COEFFS2 (int[]){-1,5,5,-1} +#define N3 0 +#define SHIFT3 1 +#define COEFFS3 NULL +#define N4 0 +#define SHIFT4 1 +#define COEFFS4 NULL +#endif +static void horizontal_decomposeX(DWTELEM *b, int width){ + DWTELEM temp[width]; + const int width2= width>>1; + const int w2= (width+1)>>1; + int A1,A2,A3,A4, x; + + inplace_lift(b, width, COEFFS1, N1, SHIFT1, LX1, 0); + inplace_lift(b, width, COEFFS2, N2, SHIFT2, LX0, 0); + inplace_lift(b, width, COEFFS3, N3, SHIFT3, LX1, 0); + inplace_lift(b, width, COEFFS4, N4, SHIFT4, LX0, 0); + + for(x=0; x<width2; x++){ + temp[x ]= b[2*x ]; + temp[x+w2]= b[2*x + 1]; + } + if(width&1) + temp[x ]= b[2*x ]; + memcpy(b, temp, width*sizeof(int)); +} + +static void horizontal_composeX(DWTELEM *b, int width){ + DWTELEM temp[width]; + const int width2= width>>1; + int A1,A2,A3,A4, x; + const int w2= (width+1)>>1; + + memcpy(temp, b, width*sizeof(int)); + for(x=0; x<width2; x++){ + b[2*x ]= temp[x ]; + b[2*x + 1]= temp[x+w2]; + } + if(width&1) + b[2*x ]= temp[x ]; + + inplace_lift(b, width, COEFFS4, N4, SHIFT4, LX0, 1); + inplace_lift(b, width, COEFFS3, N3, SHIFT3, LX1, 1); + inplace_lift(b, width, COEFFS2, N2, SHIFT2, LX0, 1); + inplace_lift(b, width, COEFFS1, N1, SHIFT1, LX1, 1); +} + +static void spatial_decomposeX(DWTELEM *buffer, int width, int height, int stride){ + int x, y; + + for(y=0; y<height; y++){ + for(x=0; x<width; x++){ + buffer[y*stride + x] *= SCALEX; + } + } + + for(y=0; y<height; y++){ + horizontal_decomposeX(buffer + y*stride, width); + } + + inplace_liftV(buffer, width, height, stride, COEFFS1, N1, SHIFT1, LX1, 0); + inplace_liftV(buffer, width, height, stride, COEFFS2, N2, SHIFT2, LX0, 0); + inplace_liftV(buffer, width, height, stride, COEFFS3, N3, SHIFT3, LX1, 0); + inplace_liftV(buffer, width, height, stride, COEFFS4, N4, SHIFT4, LX0, 0); +} + +static void spatial_composeX(DWTELEM *buffer, int width, int height, int stride){ + int x, y; + + inplace_liftV(buffer, width, height, stride, COEFFS4, N4, SHIFT4, LX0, 1); + inplace_liftV(buffer, width, height, stride, COEFFS3, N3, SHIFT3, LX1, 1); + inplace_liftV(buffer, width, height, stride, COEFFS2, N2, SHIFT2, LX0, 1); + inplace_liftV(buffer, width, height, stride, COEFFS1, N1, SHIFT1, LX1, 1); + + for(y=0; y<height; y++){ + horizontal_composeX(buffer + y*stride, width); + } + + for(y=0; y<height; y++){ + for(x=0; x<width; x++){ + buffer[y*stride + x] /= SCALEX; + } + } +} + +static void horizontal_decompose53i(DWTELEM *b, int width){ + DWTELEM temp[width]; + const int width2= width>>1; + int A1,A2,A3,A4, x; + const int w2= (width+1)>>1; + + for(x=0; x<width2; x++){ + temp[x ]= b[2*x ]; + temp[x+w2]= b[2*x + 1]; + } + if(width&1) + temp[x ]= b[2*x ]; +#if 0 + A2= temp[1 ]; + A4= temp[0 ]; + A1= temp[0+width2]; + A1 -= (A2 + A4)>>1; + A4 += (A1 + 1)>>1; + b[0+width2] = A1; + b[0 ] = A4; + for(x=1; x+1<width2; x+=2){ + A3= temp[x+width2]; + A4= temp[x+1 ]; + A3 -= (A2 + A4)>>1; + A2 += (A1 + A3 + 2)>>2; + b[x+width2] = A3; + b[x ] = A2; + + A1= temp[x+1+width2]; + A2= temp[x+2 ]; + A1 -= (A2 + A4)>>1; + A4 += (A1 + A3 + 2)>>2; + b[x+1+width2] = A1; + b[x+1 ] = A4; + } + A3= temp[width-1]; + A3 -= A2; + A2 += (A1 + A3 + 2)>>2; + b[width -1] = A3; + b[width2-1] = A2; +#else + lift(b+w2, temp+w2, temp, 1, 1, 1, width, -1, 0, 1, 1, 0); + lift(b , temp , b+w2, 1, 1, 1, width, 1, 2, 2, 0, 0); +#endif +} + +static void vertical_decompose53iH0(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, int width){ + int i; + + for(i=0; i<width; i++){ + b1[i] -= (b0[i] + b2[i])>>1; + } +} + +static void vertical_decompose53iL0(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, int width){ + int i; + + for(i=0; i<width; i++){ + b1[i] += (b0[i] + b2[i] + 2)>>2; + } +} + +static void spatial_decompose53i(DWTELEM *buffer, int width, int height, int stride){ + int y; + DWTELEM *b0= buffer + mirror(-2-1, height-1)*stride; + DWTELEM *b1= buffer + mirror(-2 , height-1)*stride; + + for(y=-2; y<height; y+=2){ + DWTELEM *b2= buffer + mirror(y+1, height-1)*stride; + DWTELEM *b3= buffer + mirror(y+2, height-1)*stride; + +{START_TIMER + if(b1 <= b3) horizontal_decompose53i(b2, width); + if(y+2 < height) horizontal_decompose53i(b3, width); +STOP_TIMER("horizontal_decompose53i")} + +{START_TIMER + if(b1 <= b3) vertical_decompose53iH0(b1, b2, b3, width); + if(b0 <= b2) vertical_decompose53iL0(b0, b1, b2, width); +STOP_TIMER("vertical_decompose53i*")} + + b0=b2; + b1=b3; + } +} + +#define liftS lift +#define lift5 lift +#if 1 +#define W_AM 3 +#define W_AO 0 +#define W_AS 1 + +#undef liftS +#define W_BM 1 +#define W_BO 8 +#define W_BS 4 + +#define W_CM 1 +#define W_CO 0 +#define W_CS 0 + +#define W_DM 3 +#define W_DO 4 +#define W_DS 3 +#elif 0 +#define W_AM 55 +#define W_AO 16 +#define W_AS 5 + +#define W_BM 3 +#define W_BO 32 +#define W_BS 6 + +#define W_CM 127 +#define W_CO 64 +#define W_CS 7 + +#define W_DM 7 +#define W_DO 8 +#define W_DS 4 +#elif 0 +#define W_AM 97 +#define W_AO 32 +#define W_AS 6 + +#define W_BM 63 +#define W_BO 512 +#define W_BS 10 + +#define W_CM 13 +#define W_CO 8 +#define W_CS 4 + +#define W_DM 15 +#define W_DO 16 +#define W_DS 5 + +#else + +#define W_AM 203 +#define W_AO 64 +#define W_AS 7 + +#define W_BM 217 +#define W_BO 2048 +#define W_BS 12 + +#define W_CM 113 +#define W_CO 64 +#define W_CS 7 + +#define W_DM 227 +#define W_DO 128 +#define W_DS 9 +#endif +static void horizontal_decompose97i(DWTELEM *b, int width){ + DWTELEM temp[width]; + const int w2= (width+1)>>1; + + lift (temp+w2, b +1, b , 1, 2, 2, width, -W_AM, W_AO, W_AS, 1, 0); + liftS(temp , b , temp+w2, 1, 2, 1, width, -W_BM, W_BO, W_BS, 0, 0); + lift5(b +w2, temp+w2, temp , 1, 1, 1, width, W_CM, W_CO, W_CS, 1, 0); + lift (b , temp , b +w2, 1, 1, 1, width, W_DM, W_DO, W_DS, 0, 0); +} + + +static void vertical_decompose97iH0(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, int width){ + int i; + + for(i=0; i<width; i++){ + b1[i] -= (W_AM*(b0[i] + b2[i])+W_AO)>>W_AS; + } +} + +static void vertical_decompose97iH1(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, int width){ + int i; + + for(i=0; i<width; i++){ +#ifdef lift5 + b1[i] += (W_CM*(b0[i] + b2[i])+W_CO)>>W_CS; +#else + int r= 3*(b0[i] + b2[i]); + r+= r>>4; + r+= r>>8; + b1[i] += (r+W_CO)>>W_CS; +#endif + } +} + +static void vertical_decompose97iL0(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, int width){ + int i; + + for(i=0; i<width; i++){ +#ifdef liftS + b1[i] -= (W_BM*(b0[i] + b2[i])+W_BO)>>W_BS; +#else + b1[i] = (16*4*b1[i] - 4*(b0[i] + b2[i]) + 8*5 + (5<<27)) / (5*16) - (1<<23); +#endif + } +} + +static void vertical_decompose97iL1(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, int width){ + int i; + + for(i=0; i<width; i++){ + b1[i] += (W_DM*(b0[i] + b2[i])+W_DO)>>W_DS; + } +} + +static void spatial_decompose97i(DWTELEM *buffer, int width, int height, int stride){ + int y; + DWTELEM *b0= buffer + mirror(-4-1, height-1)*stride; + DWTELEM *b1= buffer + mirror(-4 , height-1)*stride; + DWTELEM *b2= buffer + mirror(-4+1, height-1)*stride; + DWTELEM *b3= buffer + mirror(-4+2, height-1)*stride; + + for(y=-4; y<height; y+=2){ + DWTELEM *b4= buffer + mirror(y+3, height-1)*stride; + DWTELEM *b5= buffer + mirror(y+4, height-1)*stride; + +{START_TIMER + if(b3 <= b5) horizontal_decompose97i(b4, width); + if(y+4 < height) horizontal_decompose97i(b5, width); +if(width>400){ +STOP_TIMER("horizontal_decompose97i") +}} + +{START_TIMER + if(b3 <= b5) vertical_decompose97iH0(b3, b4, b5, width); + if(b2 <= b4) vertical_decompose97iL0(b2, b3, b4, width); + if(b1 <= b3) vertical_decompose97iH1(b1, b2, b3, width); + if(b0 <= b2) vertical_decompose97iL1(b0, b1, b2, width); + +if(width>400){ +STOP_TIMER("vertical_decompose97i") +}} + + b0=b2; + b1=b3; + b2=b4; + b3=b5; + } +} + +void ff_spatial_dwt(DWTELEM *buffer, int width, int height, int stride, int type, int decomposition_count){ + int level; + + for(level=0; level<decomposition_count; level++){ + switch(type){ + case 0: spatial_decompose97i(buffer, width>>level, height>>level, stride<<level); break; + case 1: spatial_decompose53i(buffer, width>>level, height>>level, stride<<level); break; + case 2: spatial_decomposeX (buffer, width>>level, height>>level, stride<<level); break; + } + } +} + +static void horizontal_compose53i(DWTELEM *b, int width){ + DWTELEM temp[width]; + const int width2= width>>1; + const int w2= (width+1)>>1; + int A1,A2,A3,A4, x; + +#if 0 + A2= temp[1 ]; + A4= temp[0 ]; + A1= temp[0+width2]; + A1 -= (A2 + A4)>>1; + A4 += (A1 + 1)>>1; + b[0+width2] = A1; + b[0 ] = A4; + for(x=1; x+1<width2; x+=2){ + A3= temp[x+width2]; + A4= temp[x+1 ]; + A3 -= (A2 + A4)>>1; + A2 += (A1 + A3 + 2)>>2; + b[x+width2] = A3; + b[x ] = A2; + + A1= temp[x+1+width2]; + A2= temp[x+2 ]; + A1 -= (A2 + A4)>>1; + A4 += (A1 + A3 + 2)>>2; + b[x+1+width2] = A1; + b[x+1 ] = A4; + } + A3= temp[width-1]; + A3 -= A2; + A2 += (A1 + A3 + 2)>>2; + b[width -1] = A3; + b[width2-1] = A2; +#else + lift(temp , b , b+w2, 1, 1, 1, width, 1, 2, 2, 0, 1); + lift(temp+w2, b+w2, temp, 1, 1, 1, width, -1, 0, 1, 1, 1); +#endif + for(x=0; x<width2; x++){ + b[2*x ]= temp[x ]; + b[2*x + 1]= temp[x+w2]; + } + if(width&1) + b[2*x ]= temp[x ]; +} + +static void vertical_compose53iH0(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, int width){ + int i; + + for(i=0; i<width; i++){ + b1[i] += (b0[i] + b2[i])>>1; + } +} + +static void vertical_compose53iL0(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, int width){ + int i; + + for(i=0; i<width; i++){ + b1[i] -= (b0[i] + b2[i] + 2)>>2; + } +} + +static void spatial_compose53i_buffered_init(dwt_compose_t *cs, slice_buffer * sb, int height, int stride_line){ + cs->b0 = slice_buffer_get_line(sb, mirror(-1-1, height-1) * stride_line); + cs->b1 = slice_buffer_get_line(sb, mirror(-1 , height-1) * stride_line); + cs->y = -1; +} + +static void spatial_compose53i_init(dwt_compose_t *cs, DWTELEM *buffer, int height, int stride){ + cs->b0 = buffer + mirror(-1-1, height-1)*stride; + cs->b1 = buffer + mirror(-1 , height-1)*stride; + cs->y = -1; +} + +static void spatial_compose53i_dy_buffered(dwt_compose_t *cs, slice_buffer * sb, int width, int height, int stride_line){ + int y= cs->y; + int mirror0 = mirror(y-1, height-1); + int mirror1 = mirror(y , height-1); + int mirror2 = mirror(y+1, height-1); + int mirror3 = mirror(y+2, height-1); + + DWTELEM *b0= cs->b0; + DWTELEM *b1= cs->b1; + DWTELEM *b2= slice_buffer_get_line(sb, mirror2 * stride_line); + DWTELEM *b3= slice_buffer_get_line(sb, mirror3 * stride_line); + +{START_TIMER + if(mirror1 <= mirror3) vertical_compose53iL0(b1, b2, b3, width); + if(mirror0 <= mirror2) vertical_compose53iH0(b0, b1, b2, width); +STOP_TIMER("vertical_compose53i*")} + +{START_TIMER + if(y-1 >= 0) horizontal_compose53i(b0, width); + if(mirror0 <= mirror2) horizontal_compose53i(b1, width); +STOP_TIMER("horizontal_compose53i")} + + cs->b0 = b2; + cs->b1 = b3; + cs->y += 2; +} + +static void spatial_compose53i_dy(dwt_compose_t *cs, DWTELEM *buffer, int width, int height, int stride){ + int y= cs->y; + DWTELEM *b0= cs->b0; + DWTELEM *b1= cs->b1; + DWTELEM *b2= buffer + mirror(y+1, height-1)*stride; + DWTELEM *b3= buffer + mirror(y+2, height-1)*stride; + +{START_TIMER + if(b1 <= b3) vertical_compose53iL0(b1, b2, b3, width); + if(b0 <= b2) vertical_compose53iH0(b0, b1, b2, width); +STOP_TIMER("vertical_compose53i*")} + +{START_TIMER + if(y-1 >= 0) horizontal_compose53i(b0, width); + if(b0 <= b2) horizontal_compose53i(b1, width); +STOP_TIMER("horizontal_compose53i")} + + cs->b0 = b2; + cs->b1 = b3; + cs->y += 2; +} + +static void spatial_compose53i(DWTELEM *buffer, int width, int height, int stride){ + dwt_compose_t cs; + spatial_compose53i_init(&cs, buffer, height, stride); + while(cs.y <= height) + spatial_compose53i_dy(&cs, buffer, width, height, stride); +} + + +static void horizontal_compose97i(DWTELEM *b, int width){ + DWTELEM temp[width]; + const int w2= (width+1)>>1; + + lift (temp , b , b +w2, 1, 1, 1, width, W_DM, W_DO, W_DS, 0, 1); + lift5(temp+w2, b +w2, temp , 1, 1, 1, width, W_CM, W_CO, W_CS, 1, 1); + liftS(b , temp , temp+w2, 2, 1, 1, width, -W_BM, W_BO, W_BS, 0, 1); + lift (b+1 , temp+w2, b , 2, 1, 2, width, -W_AM, W_AO, W_AS, 1, 1); +} + +static void vertical_compose97iH0(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, int width){ + int i; + + for(i=0; i<width; i++){ + b1[i] += (W_AM*(b0[i] + b2[i])+W_AO)>>W_AS; + } +} + +static void vertical_compose97iH1(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, int width){ + int i; + + for(i=0; i<width; i++){ +#ifdef lift5 + b1[i] -= (W_CM*(b0[i] + b2[i])+W_CO)>>W_CS; +#else + int r= 3*(b0[i] + b2[i]); + r+= r>>4; + r+= r>>8; + b1[i] -= (r+W_CO)>>W_CS; +#endif + } +} + +static void vertical_compose97iL0(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, int width){ + int i; + + for(i=0; i<width; i++){ +#ifdef liftS + b1[i] += (W_BM*(b0[i] + b2[i])+W_BO)>>W_BS; +#else + b1[i] += (W_BM*(b0[i] + b2[i])+4*b1[i]+W_BO)>>W_BS; +#endif + } +} + +static void vertical_compose97iL1(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, int width){ + int i; + + for(i=0; i<width; i++){ + b1[i] -= (W_DM*(b0[i] + b2[i])+W_DO)>>W_DS; + } +} + +static void vertical_compose97i(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, DWTELEM *b3, DWTELEM *b4, DWTELEM *b5, int width){ + int i; + + for(i=0; i<width; i++){ + int r; + b4[i] -= (W_DM*(b3[i] + b5[i])+W_DO)>>W_DS; +#ifdef lift5 + b3[i] -= (W_CM*(b2[i] + b4[i])+W_CO)>>W_CS; +#else + r= 3*(b2[i] + b4[i]); + r+= r>>4; + r+= r>>8; + b3[i] -= (r+W_CO)>>W_CS; +#endif +#ifdef liftS + b2[i] += (W_BM*(b1[i] + b3[i])+W_BO)>>W_BS; +#else + b2[i] += (W_BM*(b1[i] + b3[i])+4*b2[i]+W_BO)>>W_BS; +#endif + b1[i] += (W_AM*(b0[i] + b2[i])+W_AO)>>W_AS; + } +} + +static void spatial_compose97i_buffered_init(dwt_compose_t *cs, slice_buffer * sb, int height, int stride_line){ + cs->b0 = slice_buffer_get_line(sb, mirror(-3-1, height-1) * stride_line); + cs->b1 = slice_buffer_get_line(sb, mirror(-3 , height-1) * stride_line); + cs->b2 = slice_buffer_get_line(sb, mirror(-3+1, height-1) * stride_line); + cs->b3 = slice_buffer_get_line(sb, mirror(-3+2, height-1) * stride_line); + cs->y = -3; +} + +static void spatial_compose97i_init(dwt_compose_t *cs, DWTELEM *buffer, int height, int stride){ + cs->b0 = buffer + mirror(-3-1, height-1)*stride; + cs->b1 = buffer + mirror(-3 , height-1)*stride; + cs->b2 = buffer + mirror(-3+1, height-1)*stride; + cs->b3 = buffer + mirror(-3+2, height-1)*stride; + cs->y = -3; +} + +static void spatial_compose97i_dy_buffered(dwt_compose_t *cs, slice_buffer * sb, int width, int height, int stride_line){ + int y = cs->y; + + int mirror0 = mirror(y - 1, height - 1); + int mirror1 = mirror(y + 0, height - 1); + int mirror2 = mirror(y + 1, height - 1); + int mirror3 = mirror(y + 2, height - 1); + int mirror4 = mirror(y + 3, height - 1); + int mirror5 = mirror(y + 4, height - 1); + DWTELEM *b0= cs->b0; + DWTELEM *b1= cs->b1; + DWTELEM *b2= cs->b2; + DWTELEM *b3= cs->b3; + DWTELEM *b4= slice_buffer_get_line(sb, mirror4 * stride_line); + DWTELEM *b5= slice_buffer_get_line(sb, mirror5 * stride_line); + +{START_TIMER + if(y>0 && y+4<height){ + vertical_compose97i(b0, b1, b2, b3, b4, b5, width); + }else{ + if(mirror3 <= mirror5) vertical_compose97iL1(b3, b4, b5, width); + if(mirror2 <= mirror4) vertical_compose97iH1(b2, b3, b4, width); + if(mirror1 <= mirror3) vertical_compose97iL0(b1, b2, b3, width); + if(mirror0 <= mirror2) vertical_compose97iH0(b0, b1, b2, width); + } +if(width>400){ +STOP_TIMER("vertical_compose97i")}} + +{START_TIMER + if(y-1>= 0) horizontal_compose97i(b0, width); + if(mirror0 <= mirror2) horizontal_compose97i(b1, width); +if(width>400 && mirror0 <= mirror2){ +STOP_TIMER("horizontal_compose97i")}} + + cs->b0=b2; + cs->b1=b3; + cs->b2=b4; + cs->b3=b5; + cs->y += 2; +} + +static void spatial_compose97i_dy(dwt_compose_t *cs, DWTELEM *buffer, int width, int height, int stride){ + int y = cs->y; + DWTELEM *b0= cs->b0; + DWTELEM *b1= cs->b1; + DWTELEM *b2= cs->b2; + DWTELEM *b3= cs->b3; + DWTELEM *b4= buffer + mirror(y+3, height-1)*stride; + DWTELEM *b5= buffer + mirror(y+4, height-1)*stride; + + if(stride == width && y+4 < height && 0){ + int x; + for(x=0; x<width/2; x++) + b5[x] += 64*2; + for(; x<width; x++) + b5[x] += 169*2; + } + +{START_TIMER + if(b3 <= b5) vertical_compose97iL1(b3, b4, b5, width); + if(b2 <= b4) vertical_compose97iH1(b2, b3, b4, width); + if(b1 <= b3) vertical_compose97iL0(b1, b2, b3, width); + if(b0 <= b2) vertical_compose97iH0(b0, b1, b2, width); +if(width>400){ +STOP_TIMER("vertical_compose97i")}} + +{START_TIMER + if(y-1>= 0) horizontal_compose97i(b0, width); + if(b0 <= b2) horizontal_compose97i(b1, width); +if(width>400 && b0 <= b2){ +STOP_TIMER("horizontal_compose97i")}} + + cs->b0=b2; + cs->b1=b3; + cs->b2=b4; + cs->b3=b5; + cs->y += 2; +} + +static void spatial_compose97i(DWTELEM *buffer, int width, int height, int stride){ + dwt_compose_t cs; + spatial_compose97i_init(&cs, buffer, height, stride); + while(cs.y <= height) + spatial_compose97i_dy(&cs, buffer, width, height, stride); +} + +void ff_spatial_idwt_buffered_init(dwt_compose_t *cs, slice_buffer * sb, int width, int height, int stride_line, int type, int decomposition_count){ + int level; + for(level=decomposition_count-1; level>=0; level--){ + switch(type){ + case 0: spatial_compose97i_buffered_init(cs+level, sb, height>>level, stride_line<<level); break; + case 1: spatial_compose53i_buffered_init(cs+level, sb, height>>level, stride_line<<level); break; + /* not slicified yet */ + case 2: /*spatial_composeX(buffer, width>>level, height>>level, stride<<level); break;*/ + av_log(NULL, AV_LOG_ERROR, "spatial_composeX neither buffered nor slicified yet.\n"); break; + } + } +} + +void ff_spatial_idwt_init(dwt_compose_t *cs, DWTELEM *buffer, int width, int height, int stride, int type, int decomposition_count){ + int level; + for(level=decomposition_count-1; level>=0; level--){ + switch(type){ + case 0: spatial_compose97i_init(cs+level, buffer, height>>level, stride<<level); break; + case 1: spatial_compose53i_init(cs+level, buffer, height>>level, stride<<level); break; + /* not slicified yet */ + case 2: spatial_composeX(buffer, width>>level, height>>level, stride<<level); break; + } + } +} + +void ff_spatial_idwt_slice(dwt_compose_t *cs, DWTELEM *buffer, int width, int height, int stride, int type, int decomposition_count, int y){ + const int support = type==1 ? 3 : 5; + int level; + if(type==2) return; + + for(level=decomposition_count-1; level>=0; level--){ + while(cs[level].y <= FFMIN((y>>level)+support, height>>level)){ + switch(type){ + case 0: spatial_compose97i_dy(cs+level, buffer, width>>level, height>>level, stride<<level); + break; + case 1: spatial_compose53i_dy(cs+level, buffer, width>>level, height>>level, stride<<level); + break; + case 2: break; + } + } + } +} + +void ff_spatial_idwt_buffered_slice(dwt_compose_t *cs, slice_buffer * slice_buf, int width, int height, int stride_line, int type, int decomposition_count, int y){ + const int support = type==1 ? 3 : 5; + int level; + if(type==2) return; + + for(level=decomposition_count-1; level>=0; level--){ + while(cs[level].y <= FFMIN((y>>level)+support, height>>level)){ + switch(type){ + case 0: spatial_compose97i_dy_buffered(cs+level, slice_buf, width>>level, height>>level, stride_line<<level); + break; + case 1: spatial_compose53i_dy_buffered(cs+level, slice_buf, width>>level, height>>level, stride_line<<level); + break; + case 2: break; + } + } + } +} + +void ff_spatial_idwt(DWTELEM *buffer, int width, int height, int stride, int type, int decomposition_count){ + if(type==2){ + int level; + for(level=decomposition_count-1; level>=0; level--) + spatial_composeX (buffer, width>>level, height>>level, stride<<level); + }else{ + dwt_compose_t cs[MAX_DECOMPOSITIONS]; + int y; + ff_spatial_idwt_init(cs, buffer, width, height, stride, type, decomposition_count); + for(y=0; y<height; y+=4) + ff_spatial_idwt_slice(cs, buffer, width, height, stride, type, decomposition_count, y); + } +} + +static int encode_subband_c0run(SnowContext *s, SubBand *b, DWTELEM *src, DWTELEM *parent, int stride, int orientation){ + const int w= b->width; + const int h= b->height; + int x, y; + + if(1){ + int run=0; + int runs[w*h]; + int run_index=0; + int max_index; + + for(y=0; y<h; y++){ + for(x=0; x<w; x++){ + int v, p=0; + int /*ll=0, */l=0, lt=0, t=0, rt=0; + v= src[x + y*stride]; + + if(y){ + t= src[x + (y-1)*stride]; + if(x){ + lt= src[x - 1 + (y-1)*stride]; + } + if(x + 1 < w){ + rt= src[x + 1 + (y-1)*stride]; + } + } + if(x){ + l= src[x - 1 + y*stride]; + /*if(x > 1){ + if(orientation==1) ll= src[y + (x-2)*stride]; + else ll= src[x - 2 + y*stride]; + }*/ + } + if(parent){ + int px= x>>1; + int py= y>>1; + if(px<b->parent->width && py<b->parent->height) + p= parent[px + py*2*stride]; + } + if(!(/*ll|*/l|lt|t|rt|p)){ + if(v){ + runs[run_index++]= run; + run=0; + }else{ + run++; + } + } + } + } + max_index= run_index; + runs[run_index++]= run; + run_index=0; + run= runs[run_index++]; + + put_symbol2(&s->c, b->state[30], max_index, 0); + if(run_index <= max_index) + put_symbol2(&s->c, b->state[1], run, 3); + + for(y=0; y<h; y++){ + if(s->c.bytestream_end - s->c.bytestream < w*40){ + av_log(s->avctx, AV_LOG_ERROR, "encoded frame too large\n"); + return -1; + } + for(x=0; x<w; x++){ + int v, p=0; + int /*ll=0, */l=0, lt=0, t=0, rt=0; + v= src[x + y*stride]; + + if(y){ + t= src[x + (y-1)*stride]; + if(x){ + lt= src[x - 1 + (y-1)*stride]; + } + if(x + 1 < w){ + rt= src[x + 1 + (y-1)*stride]; + } + } + if(x){ + l= src[x - 1 + y*stride]; + /*if(x > 1){ + if(orientation==1) ll= src[y + (x-2)*stride]; + else ll= src[x - 2 + y*stride]; + }*/ + } + if(parent){ + int px= x>>1; + int py= y>>1; + if(px<b->parent->width && py<b->parent->height) + p= parent[px + py*2*stride]; + } + if(/*ll|*/l|lt|t|rt|p){ + int context= av_log2(/*ABS(ll) + */3*ABS(l) + ABS(lt) + 2*ABS(t) + ABS(rt) + ABS(p)); + + put_rac(&s->c, &b->state[0][context], !!v); + }else{ + if(!run){ + run= runs[run_index++]; + + if(run_index <= max_index) + put_symbol2(&s->c, b->state[1], run, 3); + assert(v); + }else{ + run--; + assert(!v); + } + } + if(v){ + int context= av_log2(/*ABS(ll) + */3*ABS(l) + ABS(lt) + 2*ABS(t) + ABS(rt) + ABS(p)); + int l2= 2*ABS(l) + (l<0); + int t2= 2*ABS(t) + (t<0); + + put_symbol2(&s->c, b->state[context + 2], ABS(v)-1, context-4); + put_rac(&s->c, &b->state[0][16 + 1 + 3 + quant3bA[l2&0xFF] + 3*quant3bA[t2&0xFF]], v<0); + } + } + } + } + return 0; +} + +static int encode_subband(SnowContext *s, SubBand *b, DWTELEM *src, DWTELEM *parent, int stride, int orientation){ +// encode_subband_qtree(s, b, src, parent, stride, orientation); +// encode_subband_z0run(s, b, src, parent, stride, orientation); + return encode_subband_c0run(s, b, src, parent, stride, orientation); +// encode_subband_dzr(s, b, src, parent, stride, orientation); +} + +static inline void unpack_coeffs(SnowContext *s, SubBand *b, SubBand * parent, int orientation){ + const int w= b->width; + const int h= b->height; + int x,y; + + if(1){ + int run, runs; + x_and_coeff *xc= b->x_coeff; + x_and_coeff *prev_xc= NULL; + x_and_coeff *prev2_xc= xc; + x_and_coeff *parent_xc= parent ? parent->x_coeff : NULL; + x_and_coeff *prev_parent_xc= parent_xc; + + runs= get_symbol2(&s->c, b->state[30], 0); + if(runs-- > 0) run= get_symbol2(&s->c, b->state[1], 3); + else run= INT_MAX; + + for(y=0; y<h; y++){ + int v=0; + int lt=0, t=0, rt=0; + + if(y && prev_xc->x == 0){ + rt= prev_xc->coeff; + } + for(x=0; x<w; x++){ + int p=0; + const int l= v; + + lt= t; t= rt; + + if(y){ + if(prev_xc->x <= x) + prev_xc++; + if(prev_xc->x == x + 1) + rt= prev_xc->coeff; + else + rt=0; + } + if(parent_xc){ + if(x>>1 > parent_xc->x){ + parent_xc++; + } + if(x>>1 == parent_xc->x){ + p= parent_xc->coeff; + } + } + if(/*ll|*/l|lt|t|rt|p){ + int context= av_log2(/*ABS(ll) + */3*(l>>1) + (lt>>1) + (t&~1) + (rt>>1) + (p>>1)); + + v=get_rac(&s->c, &b->state[0][context]); + if(v){ + v= 2*(get_symbol2(&s->c, b->state[context + 2], context-4) + 1); + v+=get_rac(&s->c, &b->state[0][16 + 1 + 3 + quant3bA[l&0xFF] + 3*quant3bA[t&0xFF]]); + + xc->x=x; + (xc++)->coeff= v; + } + }else{ + if(!run){ + if(runs-- > 0) run= get_symbol2(&s->c, b->state[1], 3); + else run= INT_MAX; + v= 2*(get_symbol2(&s->c, b->state[0 + 2], 0-4) + 1); + v+=get_rac(&s->c, &b->state[0][16 + 1 + 3]); + + xc->x=x; + (xc++)->coeff= v; + }else{ + int max_run; + run--; + v=0; + + if(y) max_run= FFMIN(run, prev_xc->x - x - 2); + else max_run= FFMIN(run, w-x-1); + if(parent_xc) + max_run= FFMIN(max_run, 2*parent_xc->x - x - 1); + x+= max_run; + run-= max_run; + } + } + } + (xc++)->x= w+1; //end marker + prev_xc= prev2_xc; + prev2_xc= xc; + + if(parent_xc){ + if(y&1){ + while(parent_xc->x != parent->width+1) + parent_xc++; + parent_xc++; + prev_parent_xc= parent_xc; + }else{ + parent_xc= prev_parent_xc; + } + } + } + + (xc++)->x= w+1; //end marker + } +} + +static inline void decode_subband_slice_buffered(SnowContext *s, SubBand *b, slice_buffer * sb, int start_y, int h, int save_state[1]){ + const int w= b->width; + int x,y; + const int qlog= clip(s->qlog + b->qlog, 0, QROOT*16); + int qmul= qexp[qlog&(QROOT-1)]<<(qlog>>QSHIFT); + int qadd= (s->qbias*qmul)>>QBIAS_SHIFT; + int new_index = 0; + + START_TIMER + + if(b->buf == s->spatial_dwt_buffer || s->qlog == LOSSLESS_QLOG){ + qadd= 0; + qmul= 1<<QEXPSHIFT; + } + + /* If we are on the second or later slice, restore our index. */ + if (start_y != 0) + new_index = save_state[0]; + + + for(y=start_y; y<h; y++){ + int x = 0; + int v; + DWTELEM * line = slice_buffer_get_line(sb, y * b->stride_line + b->buf_y_offset) + b->buf_x_offset; + memset(line, 0, b->width*sizeof(DWTELEM)); + v = b->x_coeff[new_index].coeff; + x = b->x_coeff[new_index++].x; + while(x < w) + { + register int t= ( (v>>1)*qmul + qadd)>>QEXPSHIFT; + register int u= -(v&1); + line[x] = (t^u) - u; + + v = b->x_coeff[new_index].coeff; + x = b->x_coeff[new_index++].x; + } + } + if(w > 200 && start_y != 0/*level+1 == s->spatial_decomposition_count*/){ + STOP_TIMER("decode_subband") + } + + /* Save our variables for the next slice. */ + save_state[0] = new_index; + + return; +} + +static void reset_contexts(SnowContext *s){ + int plane_index, level, orientation; + + for(plane_index=0; plane_index<3; plane_index++){ + for(level=0; level<s->spatial_decomposition_count; level++){ + for(orientation=level ? 1:0; orientation<4; orientation++){ + memset(s->plane[plane_index].band[level][orientation].state, MID_STATE, sizeof(s->plane[plane_index].band[level][orientation].state)); + } + } + } + memset(s->header_state, MID_STATE, sizeof(s->header_state)); + memset(s->block_state, MID_STATE, sizeof(s->block_state)); +} + +static int alloc_blocks(SnowContext *s){ + int w= -((-s->avctx->width )>>LOG2_MB_SIZE); + int h= -((-s->avctx->height)>>LOG2_MB_SIZE); + + s->b_width = w; + s->b_height= h; + + s->block= av_mallocz(w * h * sizeof(BlockNode) << (s->block_max_depth*2)); + return 0; +} + +static inline void copy_rac_state(RangeCoder *d, RangeCoder *s){ + uint8_t *bytestream= d->bytestream; + uint8_t *bytestream_start= d->bytestream_start; + *d= *s; + d->bytestream= bytestream; + d->bytestream_start= bytestream_start; +} + +//near copy & paste from dsputil, FIXME +static int pix_sum(uint8_t * pix, int line_size, int w) +{ + int s, i, j; + + s = 0; + for (i = 0; i < w; i++) { + for (j = 0; j < w; j++) { + s += pix[0]; + pix ++; + } + pix += line_size - w; + } + return s; +} + +//near copy & paste from dsputil, FIXME +static int pix_norm1(uint8_t * pix, int line_size, int w) +{ + int s, i, j; + uint32_t *sq = squareTbl + 256; + + s = 0; + for (i = 0; i < w; i++) { + for (j = 0; j < w; j ++) { + s += sq[pix[0]]; + pix ++; + } + pix += line_size - w; + } + return s; +} + +static inline void set_blocks(SnowContext *s, int level, int x, int y, int l, int cb, int cr, int mx, int my, int type){ + const int w= s->b_width << s->block_max_depth; + const int rem_depth= s->block_max_depth - level; + const int index= (x + y*w) << rem_depth; + const int block_w= 1<<rem_depth; + BlockNode block; + int i,j; + + block.color[0]= l; + block.color[1]= cb; + block.color[2]= cr; + block.mx= mx; + block.my= my; + block.type= type; + block.level= level; + + for(j=0; j<block_w; j++){ + for(i=0; i<block_w; i++){ + s->block[index + i + j*w]= block; + } + } +} + +static inline void init_ref(MotionEstContext *c, uint8_t *src[3], uint8_t *ref[3], uint8_t *ref2[3], int x, int y, int ref_index){ + const int offset[3]= { + y*c-> stride + x, + ((y*c->uvstride + x)>>1), + ((y*c->uvstride + x)>>1), + }; + int i; + for(i=0; i<3; i++){ + c->src[0][i]= src [i]; + c->ref[0][i]= ref [i] + offset[i]; + } + assert(!ref_index); +} + +//FIXME copy&paste +#define P_LEFT P[1] +#define P_TOP P[2] +#define P_TOPRIGHT P[3] +#define P_MEDIAN P[4] +#define P_MV1 P[9] +#define FLAG_QPEL 1 //must be 1 + +static int encode_q_branch(SnowContext *s, int level, int x, int y){ + uint8_t p_buffer[1024]; + uint8_t i_buffer[1024]; + uint8_t p_state[sizeof(s->block_state)]; + uint8_t i_state[sizeof(s->block_state)]; + RangeCoder pc, ic; + uint8_t *pbbak= s->c.bytestream; + uint8_t *pbbak_start= s->c.bytestream_start; + int score, score2, iscore, i_len, p_len, block_s, sum; + const int w= s->b_width << s->block_max_depth; + const int h= s->b_height << s->block_max_depth; + const int rem_depth= s->block_max_depth - level; + const int index= (x + y*w) << rem_depth; + const int block_w= 1<<(LOG2_MB_SIZE - level); + static BlockNode null_block= { //FIXME add border maybe + .color= {128,128,128}, + .mx= 0, + .my= 0, + .type= 0, + .level= 0, + }; + int trx= (x+1)<<rem_depth; + int try= (y+1)<<rem_depth; + BlockNode *left = x ? &s->block[index-1] : &null_block; + BlockNode *top = y ? &s->block[index-w] : &null_block; + BlockNode *right = trx<w ? &s->block[index+1] : &null_block; + BlockNode *bottom= try<h ? &s->block[index+w] : &null_block; + BlockNode *tl = y && x ? &s->block[index-w-1] : left; + BlockNode *tr = y && trx<w && ((x&1)==0 || level==0) ? &s->block[index-w+(1<<rem_depth)] : tl; //FIXME use lt + int pl = left->color[0]; + int pcb= left->color[1]; + int pcr= left->color[2]; + int pmx= mid_pred(left->mx, top->mx, tr->mx); + int pmy= mid_pred(left->my, top->my, tr->my); + int mx=0, my=0; + int l,cr,cb, i; + const int stride= s->current_picture.linesize[0]; + const int uvstride= s->current_picture.linesize[1]; + const int instride= s->input_picture.linesize[0]; + const int uvinstride= s->input_picture.linesize[1]; + uint8_t *new_l = s->input_picture.data[0] + (x + y* instride)*block_w; + uint8_t *new_cb= s->input_picture.data[1] + (x + y*uvinstride)*block_w/2; + uint8_t *new_cr= s->input_picture.data[2] + (x + y*uvinstride)*block_w/2; + uint8_t current_mb[3][stride*block_w]; + uint8_t *current_data[3]= {¤t_mb[0][0], ¤t_mb[1][0], ¤t_mb[2][0]}; + int P[10][2]; + int16_t last_mv[3][2]; + int qpel= !!(s->avctx->flags & CODEC_FLAG_QPEL); //unused + const int shift= 1+qpel; + MotionEstContext *c= &s->m.me; + int mx_context= av_log2(2*ABS(left->mx - top->mx)); + int my_context= av_log2(2*ABS(left->my - top->my)); + int s_context= 2*left->level + 2*top->level + tl->level + tr->level; + + assert(sizeof(s->block_state) >= 256); + if(s->keyframe){ + set_blocks(s, level, x, y, pl, pcb, pcr, pmx, pmy, BLOCK_INTRA); + return 0; + } + + //FIXME optimize + for(i=0; i<block_w; i++) + memcpy(¤t_mb[0][0] + stride*i, new_l + instride*i, block_w); + for(i=0; i<block_w>>1; i++) + memcpy(¤t_mb[1][0] + uvstride*i, new_cb + uvinstride*i, block_w>>1); + for(i=0; i<block_w>>1; i++) + memcpy(¤t_mb[2][0] + uvstride*i, new_cr + uvinstride*i, block_w>>1); + +// clip predictors / edge ? + + P_LEFT[0]= left->mx; + P_LEFT[1]= left->my; + P_TOP [0]= top->mx; + P_TOP [1]= top->my; + P_TOPRIGHT[0]= tr->mx; + P_TOPRIGHT[1]= tr->my; + + last_mv[0][0]= s->block[index].mx; + last_mv[0][1]= s->block[index].my; + last_mv[1][0]= right->mx; + last_mv[1][1]= right->my; + last_mv[2][0]= bottom->mx; + last_mv[2][1]= bottom->my; + + s->m.mb_stride=2; + s->m.mb_x= + s->m.mb_y= 0; + s->m.me.skip= 0; + + init_ref(c, current_data, s->last_picture.data, NULL, block_w*x, block_w*y, 0); + + assert(s->m.me. stride == stride); + assert(s->m.me.uvstride == uvstride); + + c->penalty_factor = get_penalty_factor(s->lambda, s->lambda2, c->avctx->me_cmp); + c->sub_penalty_factor= get_penalty_factor(s->lambda, s->lambda2, c->avctx->me_sub_cmp); + c->mb_penalty_factor = get_penalty_factor(s->lambda, s->lambda2, c->avctx->mb_cmp); + c->current_mv_penalty= c->mv_penalty[s->m.f_code=1] + MAX_MV; + + c->xmin = - x*block_w - 16+2; + c->ymin = - y*block_w - 16+2; + c->xmax = - (x+1)*block_w + (w<<(LOG2_MB_SIZE - s->block_max_depth)) + 16-2; + c->ymax = - (y+1)*block_w + (h<<(LOG2_MB_SIZE - s->block_max_depth)) + 16-2; + + if(P_LEFT[0] > (c->xmax<<shift)) P_LEFT[0] = (c->xmax<<shift); + if(P_LEFT[1] > (c->ymax<<shift)) P_LEFT[1] = (c->ymax<<shift); + if(P_TOP[0] > (c->xmax<<shift)) P_TOP[0] = (c->xmax<<shift); + if(P_TOP[1] > (c->ymax<<shift)) P_TOP[1] = (c->ymax<<shift); + if(P_TOPRIGHT[0] < (c->xmin<<shift)) P_TOPRIGHT[0]= (c->xmin<<shift); + if(P_TOPRIGHT[0] > (c->xmax<<shift)) P_TOPRIGHT[0]= (c->xmax<<shift); //due to pmx no clip + if(P_TOPRIGHT[1] > (c->ymax<<shift)) P_TOPRIGHT[1]= (c->ymax<<shift); + + P_MEDIAN[0]= mid_pred(P_LEFT[0], P_TOP[0], P_TOPRIGHT[0]); + P_MEDIAN[1]= mid_pred(P_LEFT[1], P_TOP[1], P_TOPRIGHT[1]); + + if (!y) { + c->pred_x= P_LEFT[0]; + c->pred_y= P_LEFT[1]; + } else { + c->pred_x = P_MEDIAN[0]; + c->pred_y = P_MEDIAN[1]; + } + + score= ff_epzs_motion_search(&s->m, &mx, &my, P, 0, /*ref_index*/ 0, last_mv, + (1<<16)>>shift, level-LOG2_MB_SIZE+4, block_w); + + assert(mx >= c->xmin); + assert(mx <= c->xmax); + assert(my >= c->ymin); + assert(my <= c->ymax); + + score= s->m.me.sub_motion_search(&s->m, &mx, &my, score, 0, 0, level-LOG2_MB_SIZE+4, block_w); + score= ff_get_mb_score(&s->m, mx, my, 0, 0, level-LOG2_MB_SIZE+4, block_w, 0); + //FIXME if mb_cmp != SSE then intra cant be compared currently and mb_penalty vs. lambda2 + + // subpel search + pc= s->c; + pc.bytestream_start= + pc.bytestream= p_buffer; //FIXME end/start? and at the other stoo + memcpy(p_state, s->block_state, sizeof(s->block_state)); + + if(level!=s->block_max_depth) + put_rac(&pc, &p_state[4 + s_context], 1); + put_rac(&pc, &p_state[1 + left->type + top->type], 0); + put_symbol(&pc, &p_state[128 + 32*mx_context], mx - pmx, 1); + put_symbol(&pc, &p_state[128 + 32*my_context], my - pmy, 1); + p_len= pc.bytestream - pc.bytestream_start; + score += (s->lambda2*(p_len*8 + + (pc.outstanding_count - s->c.outstanding_count)*8 + + (-av_log2(pc.range) + av_log2(s->c.range)) + ))>>FF_LAMBDA_SHIFT; + + block_s= block_w*block_w; + sum = pix_sum(¤t_mb[0][0], stride, block_w); + l= (sum + block_s/2)/block_s; + iscore = pix_norm1(¤t_mb[0][0], stride, block_w) - 2*l*sum + l*l*block_s; + + block_s= block_w*block_w>>2; + sum = pix_sum(¤t_mb[1][0], uvstride, block_w>>1); + cb= (sum + block_s/2)/block_s; +// iscore += pix_norm1(¤t_mb[1][0], uvstride, block_w>>1) - 2*cb*sum + cb*cb*block_s; + sum = pix_sum(¤t_mb[2][0], uvstride, block_w>>1); + cr= (sum + block_s/2)/block_s; +// iscore += pix_norm1(¤t_mb[2][0], uvstride, block_w>>1) - 2*cr*sum + cr*cr*block_s; + + ic= s->c; + ic.bytestream_start= + ic.bytestream= i_buffer; //FIXME end/start? and at the other stoo + memcpy(i_state, s->block_state, sizeof(s->block_state)); + if(level!=s->block_max_depth) + put_rac(&ic, &i_state[4 + s_context], 1); + put_rac(&ic, &i_state[1 + left->type + top->type], 1); + put_symbol(&ic, &i_state[32], l-pl , 1); + put_symbol(&ic, &i_state[64], cb-pcb, 1); + put_symbol(&ic, &i_state[96], cr-pcr, 1); + i_len= ic.bytestream - ic.bytestream_start; + iscore += (s->lambda2*(i_len*8 + + (ic.outstanding_count - s->c.outstanding_count)*8 + + (-av_log2(ic.range) + av_log2(s->c.range)) + ))>>FF_LAMBDA_SHIFT; + +// assert(score==256*256*256*64-1); + assert(iscore < 255*255*256 + s->lambda2*10); + assert(iscore >= 0); + assert(l>=0 && l<=255); + assert(pl>=0 && pl<=255); + + if(level==0){ + int varc= iscore >> 8; + int vard= score >> 8; + if (vard <= 64 || vard < varc) + c->scene_change_score+= ff_sqrt(vard) - ff_sqrt(varc); + else + c->scene_change_score+= s->m.qscale; + } + + if(level!=s->block_max_depth){ + put_rac(&s->c, &s->block_state[4 + s_context], 0); + score2 = encode_q_branch(s, level+1, 2*x+0, 2*y+0); + score2+= encode_q_branch(s, level+1, 2*x+1, 2*y+0); + score2+= encode_q_branch(s, level+1, 2*x+0, 2*y+1); + score2+= encode_q_branch(s, level+1, 2*x+1, 2*y+1); + score2+= s->lambda2>>FF_LAMBDA_SHIFT; //FIXME exact split overhead + + if(score2 < score && score2 < iscore) + return score2; + } + + if(iscore < score){ + memcpy(pbbak, i_buffer, i_len); + s->c= ic; + s->c.bytestream_start= pbbak_start; + s->c.bytestream= pbbak + i_len; + set_blocks(s, level, x, y, l, cb, cr, pmx, pmy, BLOCK_INTRA); + memcpy(s->block_state, i_state, sizeof(s->block_state)); + return iscore; + }else{ + memcpy(pbbak, p_buffer, p_len); + s->c= pc; + s->c.bytestream_start= pbbak_start; + s->c.bytestream= pbbak + p_len; + set_blocks(s, level, x, y, pl, pcb, pcr, mx, my, 0); + memcpy(s->block_state, p_state, sizeof(s->block_state)); + return score; + } +} + +static void decode_q_branch(SnowContext *s, int level, int x, int y){ + const int w= s->b_width << s->block_max_depth; + const int rem_depth= s->block_max_depth - level; + const int index= (x + y*w) << rem_depth; + static BlockNode null_block= { //FIXME add border maybe + .color= {128,128,128}, + .mx= 0, + .my= 0, + .type= 0, + .level= 0, + }; + int trx= (x+1)<<rem_depth; + BlockNode *left = x ? &s->block[index-1] : &null_block; + BlockNode *top = y ? &s->block[index-w] : &null_block; + BlockNode *tl = y && x ? &s->block[index-w-1] : left; + BlockNode *tr = y && trx<w && ((x&1)==0 || level==0) ? &s->block[index-w+(1<<rem_depth)] : tl; //FIXME use lt + int s_context= 2*left->level + 2*top->level + tl->level + tr->level; + + if(s->keyframe){ + set_blocks(s, level, x, y, null_block.color[0], null_block.color[1], null_block.color[2], null_block.mx, null_block.my, BLOCK_INTRA); + return; + } + + if(level==s->block_max_depth || get_rac(&s->c, &s->block_state[4 + s_context])){ + int type; + int l = left->color[0]; + int cb= left->color[1]; + int cr= left->color[2]; + int mx= mid_pred(left->mx, top->mx, tr->mx); + int my= mid_pred(left->my, top->my, tr->my); + int mx_context= av_log2(2*ABS(left->mx - top->mx)) + 0*av_log2(2*ABS(tr->mx - top->mx)); + int my_context= av_log2(2*ABS(left->my - top->my)) + 0*av_log2(2*ABS(tr->my - top->my)); + + type= get_rac(&s->c, &s->block_state[1 + left->type + top->type]) ? BLOCK_INTRA : 0; + + if(type){ + l += get_symbol(&s->c, &s->block_state[32], 1); + cb+= get_symbol(&s->c, &s->block_state[64], 1); + cr+= get_symbol(&s->c, &s->block_state[96], 1); + }else{ + mx+= get_symbol(&s->c, &s->block_state[128 + 32*mx_context], 1); + my+= get_symbol(&s->c, &s->block_state[128 + 32*my_context], 1); + } + set_blocks(s, level, x, y, l, cb, cr, mx, my, type); + }else{ + decode_q_branch(s, level+1, 2*x+0, 2*y+0); + decode_q_branch(s, level+1, 2*x+1, 2*y+0); + decode_q_branch(s, level+1, 2*x+0, 2*y+1); + decode_q_branch(s, level+1, 2*x+1, 2*y+1); + } +} + +static void encode_blocks(SnowContext *s){ + int x, y; + int w= s->b_width; + int h= s->b_height; + + for(y=0; y<h; y++){ + if(s->c.bytestream_end - s->c.bytestream < w*MB_SIZE*MB_SIZE*3){ //FIXME nicer limit + av_log(s->avctx, AV_LOG_ERROR, "encoded frame too large\n"); + return; + } + for(x=0; x<w; x++){ + encode_q_branch(s, 0, x, y); + } + } +} + +static void decode_blocks(SnowContext *s){ + int x, y; + int w= s->b_width; + int h= s->b_height; + + for(y=0; y<h; y++){ + for(x=0; x<w; x++){ + decode_q_branch(s, 0, x, y); + } + } +} + +static void mc_block(uint8_t *dst, uint8_t *src, uint8_t *tmp, int stride, int b_w, int b_h, int dx, int dy){ + int x, y; +START_TIMER + for(y=0; y < b_h+5; y++){ + for(x=0; x < b_w; x++){ + int a0= src[x ]; + int a1= src[x + 1]; + int a2= src[x + 2]; + int a3= src[x + 3]; + int a4= src[x + 4]; + int a5= src[x + 5]; +// int am= 9*(a1+a2) - (a0+a3); + int am= 20*(a2+a3) - 5*(a1+a4) + (a0+a5); +// int am= 18*(a2+a3) - 2*(a1+a4); +// int aL= (-7*a0 + 105*a1 + 35*a2 - 5*a3)>>3; +// int aR= (-7*a3 + 105*a2 + 35*a1 - 5*a0)>>3; + +// if(b_w==16) am= 8*(a1+a2); + + if(dx<8) am = (32*a2*( 8-dx) + am* dx + 128)>>8; + else am = ( am*(16-dx) + 32*a3*(dx-8) + 128)>>8; + + /* FIXME Try increasing tmp buffer to 16 bits and not clipping here. Should give marginally better results. - Robert*/ + if(am&(~255)) am= ~(am>>31); + + tmp[x] = am; + +/* if (dx< 4) tmp[x + y*stride]= (16*a1*( 4-dx) + aL* dx + 32)>>6; + else if(dx< 8) tmp[x + y*stride]= ( aL*( 8-dx) + am*(dx- 4) + 32)>>6; + else if(dx<12) tmp[x + y*stride]= ( am*(12-dx) + aR*(dx- 8) + 32)>>6; + else tmp[x + y*stride]= ( aR*(16-dx) + 16*a2*(dx-12) + 32)>>6;*/ + } + tmp += stride; + src += stride; + } + tmp -= (b_h+5)*stride; + + for(y=0; y < b_h; y++){ + for(x=0; x < b_w; x++){ + int a0= tmp[x + 0*stride]; + int a1= tmp[x + 1*stride]; + int a2= tmp[x + 2*stride]; + int a3= tmp[x + 3*stride]; + int a4= tmp[x + 4*stride]; + int a5= tmp[x + 5*stride]; + int am= 20*(a2+a3) - 5*(a1+a4) + (a0+a5); +// int am= 18*(a2+a3) - 2*(a1+a4); +/* int aL= (-7*a0 + 105*a1 + 35*a2 - 5*a3)>>3; + int aR= (-7*a3 + 105*a2 + 35*a1 - 5*a0)>>3;*/ + +// if(b_w==16) am= 8*(a1+a2); + + if(dy<8) am = (32*a2*( 8-dy) + am* dy + 128)>>8; + else am = ( am*(16-dy) + 32*a3*(dy-8) + 128)>>8; + + if(am&(~255)) am= ~(am>>31); + + dst[x] = am; +/* if (dy< 4) tmp[x + y*stride]= (16*a1*( 4-dy) + aL* dy + 32)>>6; + else if(dy< 8) tmp[x + y*stride]= ( aL*( 8-dy) + am*(dy- 4) + 32)>>6; + else if(dy<12) tmp[x + y*stride]= ( am*(12-dy) + aR*(dy- 8) + 32)>>6; + else tmp[x + y*stride]= ( aR*(16-dy) + 16*a2*(dy-12) + 32)>>6;*/ + } + dst += stride; + tmp += stride; + } +STOP_TIMER("mc_block") +} + +#define mca(dx,dy,b_w)\ +static void mc_block_hpel ## dx ## dy ## b_w(uint8_t *dst, uint8_t *src, int stride, int h){\ + uint8_t tmp[stride*(b_w+5)];\ + assert(h==b_w);\ + mc_block(dst, src-2-2*stride, tmp, stride, b_w, b_w, dx, dy);\ +} + +mca( 0, 0,16) +mca( 8, 0,16) +mca( 0, 8,16) +mca( 8, 8,16) +mca( 0, 0,8) +mca( 8, 0,8) +mca( 0, 8,8) +mca( 8, 8,8) + +static void pred_block(SnowContext *s, uint8_t *dst, uint8_t *src, uint8_t *tmp, int stride, int sx, int sy, int b_w, int b_h, BlockNode *block, int plane_index, int w, int h){ + if(block->type){ + int x, y; + const int color= block->color[plane_index]; + for(y=0; y < b_h; y++){ + for(x=0; x < b_w; x++){ + dst[x + y*stride]= color; + } + } + }else{ + const int scale= plane_index ? s->mv_scale : 2*s->mv_scale; + int mx= block->mx*scale; + int my= block->my*scale; + const int dx= mx&15; + const int dy= my&15; + sx += (mx>>4) - 2; + sy += (my>>4) - 2; + src += sx + sy*stride; + if( (unsigned)sx >= w - b_w - 4 + || (unsigned)sy >= h - b_h - 4){ + ff_emulated_edge_mc(tmp + MB_SIZE, src, stride, b_w+5, b_h+5, sx, sy, w, h); + src= tmp + MB_SIZE; + } + if((dx&3) || (dy&3) || b_w!=b_h || (b_w!=4 && b_w!=8 && b_w!=16)) + mc_block(dst, src, tmp, stride, b_w, b_h, dx, dy); + else + s->dsp.put_h264_qpel_pixels_tab[2-(b_w>>3)][dy+(dx>>2)](dst,src + 2 + 2*stride,stride); + } +} + +static always_inline int same_block(BlockNode *a, BlockNode *b){ + return !((a->mx - b->mx) | (a->my - b->my) | a->type | b->type); +} + +//FIXME name clenup (b_w, block_w, b_width stuff) +static always_inline void add_yblock_buffered(SnowContext *s, slice_buffer * sb, DWTELEM *old_dst, uint8_t *dst8, uint8_t *src, uint8_t *obmc, int src_x, int src_y, int b_w, int b_h, int w, int h, int dst_stride, int src_stride, int obmc_stride, int b_x, int b_y, int add, int plane_index){ + DWTELEM * dst = NULL; + const int b_width = s->b_width << s->block_max_depth; + const int b_height= s->b_height << s->block_max_depth; + const int b_stride= b_width; + BlockNode *lt= &s->block[b_x + b_y*b_stride]; + BlockNode *rt= lt+1; + BlockNode *lb= lt+b_stride; + BlockNode *rb= lb+1; + uint8_t *block[4]; + uint8_t tmp[src_stride*(b_h+5)]; //FIXME align + int x,y; + + if(b_x<0){ + lt= rt; + lb= rb; + }else if(b_x + 1 >= b_width){ + rt= lt; + rb= lb; + } + if(b_y<0){ + lt= lb; + rt= rb; + }else if(b_y + 1 >= b_height){ + lb= lt; + rb= rt; + } + + if(src_x<0){ //FIXME merge with prev & always round internal width upto *16 + obmc -= src_x; + b_w += src_x; + src_x=0; + }else if(src_x + b_w > w){ + b_w = w - src_x; + } + if(src_y<0){ + obmc -= src_y*obmc_stride; + b_h += src_y; + src_y=0; + }else if(src_y + b_h> h){ + b_h = h - src_y; + } + + if(b_w<=0 || b_h<=0) return; + +assert(src_stride > 7*MB_SIZE); +// old_dst += src_x + src_y*dst_stride; + dst8+= src_x + src_y*src_stride; +// src += src_x + src_y*src_stride; + + block[0]= tmp+3*MB_SIZE; + pred_block(s, block[0], src, tmp, src_stride, src_x, src_y, b_w, b_h, lt, plane_index, w, h); + + if(same_block(lt, rt)){ + block[1]= block[0]; + }else{ + block[1]= tmp + 4*MB_SIZE; + pred_block(s, block[1], src, tmp, src_stride, src_x, src_y, b_w, b_h, rt, plane_index, w, h); + } + + if(same_block(lt, lb)){ + block[2]= block[0]; + }else if(same_block(rt, lb)){ + block[2]= block[1]; + }else{ + block[2]= tmp+5*MB_SIZE; + pred_block(s, block[2], src, tmp, src_stride, src_x, src_y, b_w, b_h, lb, plane_index, w, h); + } + + if(same_block(lt, rb) ){ + block[3]= block[0]; + }else if(same_block(rt, rb)){ + block[3]= block[1]; + }else if(same_block(lb, rb)){ + block[3]= block[2]; + }else{ + block[3]= tmp+6*MB_SIZE; + pred_block(s, block[3], src, tmp, src_stride, src_x, src_y, b_w, b_h, rb, plane_index, w, h); + } +#if 0 + for(y=0; y<b_h; y++){ + for(x=0; x<b_w; x++){ + int v= obmc [x + y*obmc_stride] * block[3][x + y*src_stride] * (256/OBMC_MAX); + if(add) dst[x + y*dst_stride] += v; + else dst[x + y*dst_stride] -= v; + } + } + for(y=0; y<b_h; y++){ + uint8_t *obmc2= obmc + (obmc_stride>>1); + for(x=0; x<b_w; x++){ + int v= obmc2[x + y*obmc_stride] * block[2][x + y*src_stride] * (256/OBMC_MAX); + if(add) dst[x + y*dst_stride] += v; + else dst[x + y*dst_stride] -= v; + } + } + for(y=0; y<b_h; y++){ + uint8_t *obmc3= obmc + obmc_stride*(obmc_stride>>1); + for(x=0; x<b_w; x++){ + int v= obmc3[x + y*obmc_stride] * block[1][x + y*src_stride] * (256/OBMC_MAX); + if(add) dst[x + y*dst_stride] += v; + else dst[x + y*dst_stride] -= v; + } + } + for(y=0; y<b_h; y++){ + uint8_t *obmc3= obmc + obmc_stride*(obmc_stride>>1); + uint8_t *obmc4= obmc3+ (obmc_stride>>1); + for(x=0; x<b_w; x++){ + int v= obmc4[x + y*obmc_stride] * block[0][x + y*src_stride] * (256/OBMC_MAX); + if(add) dst[x + y*dst_stride] += v; + else dst[x + y*dst_stride] -= v; + } + } +#else +{ + + START_TIMER + + int block_index = 0; + for(y=0; y<b_h; y++){ + //FIXME ugly missue of obmc_stride + uint8_t *obmc1= obmc + y*obmc_stride; + uint8_t *obmc2= obmc1+ (obmc_stride>>1); + uint8_t *obmc3= obmc1+ obmc_stride*(obmc_stride>>1); + uint8_t *obmc4= obmc3+ (obmc_stride>>1); + dst = slice_buffer_get_line(sb, src_y + y); + for(x=0; x<b_w; x++){ + int v= obmc1[x] * block[3][x + y*src_stride] + +obmc2[x] * block[2][x + y*src_stride] + +obmc3[x] * block[1][x + y*src_stride] + +obmc4[x] * block[0][x + y*src_stride]; + + v <<= 8 - LOG2_OBMC_MAX; + if(FRAC_BITS != 8){ + v += 1<<(7 - FRAC_BITS); + v >>= 8 - FRAC_BITS; + } + if(add){ +// v += old_dst[x + y*dst_stride]; + v += dst[x + src_x]; + v = (v + (1<<(FRAC_BITS-1))) >> FRAC_BITS; + if(v&(~255)) v= ~(v>>31); + dst8[x + y*src_stride] = v; + }else{ +// old_dst[x + y*dst_stride] -= v; + dst[x + src_x] -= v; + } + } + } + STOP_TIMER("Inner add y block") +} +#endif +} + +//FIXME name clenup (b_w, block_w, b_width stuff) +static always_inline void add_yblock(SnowContext *s, DWTELEM *dst, uint8_t *dst8, uint8_t *src, uint8_t *obmc, int src_x, int src_y, int b_w, int b_h, int w, int h, int dst_stride, int src_stride, int obmc_stride, int b_x, int b_y, int add, int plane_index){ + const int b_width = s->b_width << s->block_max_depth; + const int b_height= s->b_height << s->block_max_depth; + const int b_stride= b_width; + BlockNode *lt= &s->block[b_x + b_y*b_stride]; + BlockNode *rt= lt+1; + BlockNode *lb= lt+b_stride; + BlockNode *rb= lb+1; + uint8_t *block[4]; + uint8_t tmp[src_stride*(b_h+5)]; //FIXME align + int x,y; + + if(b_x<0){ + lt= rt; + lb= rb; + }else if(b_x + 1 >= b_width){ + rt= lt; + rb= lb; + } + if(b_y<0){ + lt= lb; + rt= rb; + }else if(b_y + 1 >= b_height){ + lb= lt; + rb= rt; + } + + if(src_x<0){ //FIXME merge with prev & always round internal width upto *16 + obmc -= src_x; + b_w += src_x; + src_x=0; + }else if(src_x + b_w > w){ + b_w = w - src_x; + } + if(src_y<0){ + obmc -= src_y*obmc_stride; + b_h += src_y; + src_y=0; + }else if(src_y + b_h> h){ + b_h = h - src_y; + } + + if(b_w<=0 || b_h<=0) return; + +assert(src_stride > 7*MB_SIZE); + dst += src_x + src_y*dst_stride; + dst8+= src_x + src_y*src_stride; +// src += src_x + src_y*src_stride; + + block[0]= tmp+3*MB_SIZE; + pred_block(s, block[0], src, tmp, src_stride, src_x, src_y, b_w, b_h, lt, plane_index, w, h); + + if(same_block(lt, rt)){ + block[1]= block[0]; + }else{ + block[1]= tmp + 4*MB_SIZE; + pred_block(s, block[1], src, tmp, src_stride, src_x, src_y, b_w, b_h, rt, plane_index, w, h); + } + + if(same_block(lt, lb)){ + block[2]= block[0]; + }else if(same_block(rt, lb)){ + block[2]= block[1]; + }else{ + block[2]= tmp+5*MB_SIZE; + pred_block(s, block[2], src, tmp, src_stride, src_x, src_y, b_w, b_h, lb, plane_index, w, h); + } + + if(same_block(lt, rb) ){ + block[3]= block[0]; + }else if(same_block(rt, rb)){ + block[3]= block[1]; + }else if(same_block(lb, rb)){ + block[3]= block[2]; + }else{ + block[3]= tmp+6*MB_SIZE; + pred_block(s, block[3], src, tmp, src_stride, src_x, src_y, b_w, b_h, rb, plane_index, w, h); + } +#if 0 + for(y=0; y<b_h; y++){ + for(x=0; x<b_w; x++){ + int v= obmc [x + y*obmc_stride] * block[3][x + y*src_stride] * (256/OBMC_MAX); + if(add) dst[x + y*dst_stride] += v; + else dst[x + y*dst_stride] -= v; + } + } + for(y=0; y<b_h; y++){ + uint8_t *obmc2= obmc + (obmc_stride>>1); + for(x=0; x<b_w; x++){ + int v= obmc2[x + y*obmc_stride] * block[2][x + y*src_stride] * (256/OBMC_MAX); + if(add) dst[x + y*dst_stride] += v; + else dst[x + y*dst_stride] -= v; + } + } + for(y=0; y<b_h; y++){ + uint8_t *obmc3= obmc + obmc_stride*(obmc_stride>>1); + for(x=0; x<b_w; x++){ + int v= obmc3[x + y*obmc_stride] * block[1][x + y*src_stride] * (256/OBMC_MAX); + if(add) dst[x + y*dst_stride] += v; + else dst[x + y*dst_stride] -= v; + } + } + for(y=0; y<b_h; y++){ + uint8_t *obmc3= obmc + obmc_stride*(obmc_stride>>1); + uint8_t *obmc4= obmc3+ (obmc_stride>>1); + for(x=0; x<b_w; x++){ + int v= obmc4[x + y*obmc_stride] * block[0][x + y*src_stride] * (256/OBMC_MAX); + if(add) dst[x + y*dst_stride] += v; + else dst[x + y*dst_stride] -= v; + } + } +#else + for(y=0; y<b_h; y++){ + //FIXME ugly missue of obmc_stride + uint8_t *obmc1= obmc + y*obmc_stride; + uint8_t *obmc2= obmc1+ (obmc_stride>>1); + uint8_t *obmc3= obmc1+ obmc_stride*(obmc_stride>>1); + uint8_t *obmc4= obmc3+ (obmc_stride>>1); + for(x=0; x<b_w; x++){ + int v= obmc1[x] * block[3][x + y*src_stride] + +obmc2[x] * block[2][x + y*src_stride] + +obmc3[x] * block[1][x + y*src_stride] + +obmc4[x] * block[0][x + y*src_stride]; + + v <<= 8 - LOG2_OBMC_MAX; + if(FRAC_BITS != 8){ + v += 1<<(7 - FRAC_BITS); + v >>= 8 - FRAC_BITS; + } + if(add){ + v += dst[x + y*dst_stride]; + v = (v + (1<<(FRAC_BITS-1))) >> FRAC_BITS; + if(v&(~255)) v= ~(v>>31); + dst8[x + y*src_stride] = v; + }else{ + dst[x + y*dst_stride] -= v; + } + } + } +#endif +} + +static always_inline void predict_slice_buffered(SnowContext *s, slice_buffer * sb, DWTELEM * old_buffer, int plane_index, int add, int mb_y){ + Plane *p= &s->plane[plane_index]; + const int mb_w= s->b_width << s->block_max_depth; + const int mb_h= s->b_height << s->block_max_depth; + int x, y, mb_x; + int block_size = MB_SIZE >> s->block_max_depth; + int block_w = plane_index ? block_size/2 : block_size; + const uint8_t *obmc = plane_index ? obmc_tab[s->block_max_depth+1] : obmc_tab[s->block_max_depth]; + int obmc_stride= plane_index ? block_size : 2*block_size; + int ref_stride= s->current_picture.linesize[plane_index]; + uint8_t *ref = s->last_picture.data[plane_index]; + uint8_t *dst8= s->current_picture.data[plane_index]; + int w= p->width; + int h= p->height; + START_TIMER + + if(s->keyframe || (s->avctx->debug&512)){ + if(mb_y==mb_h) + return; + + if(add){ + for(y=block_w*mb_y; y<FFMIN(h,block_w*(mb_y+1)); y++) + { +// DWTELEM * line = slice_buffer_get_line(sb, y); + DWTELEM * line = sb->line[y]; + for(x=0; x<w; x++) + { +// int v= buf[x + y*w] + (128<<FRAC_BITS) + (1<<(FRAC_BITS-1)); + int v= line[x] + (128<<FRAC_BITS) + (1<<(FRAC_BITS-1)); + v >>= FRAC_BITS; + if(v&(~255)) v= ~(v>>31); + dst8[x + y*ref_stride]= v; + } + } + }else{ + for(y=block_w*mb_y; y<FFMIN(h,block_w*(mb_y+1)); y++) + { +// DWTELEM * line = slice_buffer_get_line(sb, y); + DWTELEM * line = sb->line[y]; + for(x=0; x<w; x++) + { + line[x] -= 128 << FRAC_BITS; +// buf[x + y*w]-= 128<<FRAC_BITS; + } + } + } + + return; + } + + for(mb_x=0; mb_x<=mb_w; mb_x++){ + START_TIMER + + add_yblock_buffered(s, sb, old_buffer, dst8, ref, obmc, + block_w*mb_x - block_w/2, + block_w*mb_y - block_w/2, + block_w, block_w, + w, h, + w, ref_stride, obmc_stride, + mb_x - 1, mb_y - 1, + add, plane_index); + + STOP_TIMER("add_yblock") + } + + STOP_TIMER("predict_slice") +} + +static always_inline void predict_slice(SnowContext *s, DWTELEM *buf, int plane_index, int add, int mb_y){ + Plane *p= &s->plane[plane_index]; + const int mb_w= s->b_width << s->block_max_depth; + const int mb_h= s->b_height << s->block_max_depth; + int x, y, mb_x; + int block_size = MB_SIZE >> s->block_max_depth; + int block_w = plane_index ? block_size/2 : block_size; + const uint8_t *obmc = plane_index ? obmc_tab[s->block_max_depth+1] : obmc_tab[s->block_max_depth]; + int obmc_stride= plane_index ? block_size : 2*block_size; + int ref_stride= s->current_picture.linesize[plane_index]; + uint8_t *ref = s->last_picture.data[plane_index]; + uint8_t *dst8= s->current_picture.data[plane_index]; + int w= p->width; + int h= p->height; + START_TIMER + + if(s->keyframe || (s->avctx->debug&512)){ + if(mb_y==mb_h) + return; + + if(add){ + for(y=block_w*mb_y; y<FFMIN(h,block_w*(mb_y+1)); y++){ + for(x=0; x<w; x++){ + int v= buf[x + y*w] + (128<<FRAC_BITS) + (1<<(FRAC_BITS-1)); + v >>= FRAC_BITS; + if(v&(~255)) v= ~(v>>31); + dst8[x + y*ref_stride]= v; + } + } + }else{ + for(y=block_w*mb_y; y<FFMIN(h,block_w*(mb_y+1)); y++){ + for(x=0; x<w; x++){ + buf[x + y*w]-= 128<<FRAC_BITS; + } + } + } + + return; + } + + for(mb_x=0; mb_x<=mb_w; mb_x++){ + START_TIMER + + add_yblock(s, buf, dst8, ref, obmc, + block_w*mb_x - block_w/2, + block_w*mb_y - block_w/2, + block_w, block_w, + w, h, + w, ref_stride, obmc_stride, + mb_x - 1, mb_y - 1, + add, plane_index); + + STOP_TIMER("add_yblock") + } + + STOP_TIMER("predict_slice") +} + +static always_inline void predict_plane(SnowContext *s, DWTELEM *buf, int plane_index, int add){ + const int mb_h= s->b_height << s->block_max_depth; + int mb_y; + for(mb_y=0; mb_y<=mb_h; mb_y++) + predict_slice(s, buf, plane_index, add, mb_y); +} + +static void quantize(SnowContext *s, SubBand *b, DWTELEM *src, int stride, int bias){ + const int level= b->level; + const int w= b->width; + const int h= b->height; + const int qlog= clip(s->qlog + b->qlog, 0, QROOT*16); + const int qmul= qexp[qlog&(QROOT-1)]<<(qlog>>QSHIFT); + int x,y, thres1, thres2; + START_TIMER + + if(s->qlog == LOSSLESS_QLOG) return; + + bias= bias ? 0 : (3*qmul)>>3; + thres1= ((qmul - bias)>>QEXPSHIFT) - 1; + thres2= 2*thres1; + + if(!bias){ + for(y=0; y<h; y++){ + for(x=0; x<w; x++){ + int i= src[x + y*stride]; + + if((unsigned)(i+thres1) > thres2){ + if(i>=0){ + i<<= QEXPSHIFT; + i/= qmul; //FIXME optimize + src[x + y*stride]= i; + }else{ + i= -i; + i<<= QEXPSHIFT; + i/= qmul; //FIXME optimize + src[x + y*stride]= -i; + } + }else + src[x + y*stride]= 0; + } + } + }else{ + for(y=0; y<h; y++){ + for(x=0; x<w; x++){ + int i= src[x + y*stride]; + + if((unsigned)(i+thres1) > thres2){ + if(i>=0){ + i<<= QEXPSHIFT; + i= (i + bias) / qmul; //FIXME optimize + src[x + y*stride]= i; + }else{ + i= -i; + i<<= QEXPSHIFT; + i= (i + bias) / qmul; //FIXME optimize + src[x + y*stride]= -i; + } + }else + src[x + y*stride]= 0; + } + } + } + if(level+1 == s->spatial_decomposition_count){ +// STOP_TIMER("quantize") + } +} + +static void dequantize_buffered(SnowContext *s, slice_buffer * sb, SubBand *b, DWTELEM *src, int stride){ + const int w= b->width; + const int h= b->height; + const int qlog= clip(s->qlog + b->qlog, 0, QROOT*16); + const int qmul= qexp[qlog&(QROOT-1)]<<(qlog>>QSHIFT); + const int qadd= (s->qbias*qmul)>>QBIAS_SHIFT; + int x,y; + START_TIMER + + if(s->qlog == LOSSLESS_QLOG) return; + + for(y=0; y<h; y++){ +// DWTELEM * line = slice_buffer_get_line_from_address(sb, src + (y * stride)); + DWTELEM * line = slice_buffer_get_line(sb, (y * b->stride_line) + b->buf_y_offset) + b->buf_x_offset; + for(x=0; x<w; x++){ + int i= line[x]; + if(i<0){ + line[x]= -((-i*qmul + qadd)>>(QEXPSHIFT)); //FIXME try different bias + }else if(i>0){ + line[x]= (( i*qmul + qadd)>>(QEXPSHIFT)); + } + } + } + if(w > 200 /*level+1 == s->spatial_decomposition_count*/){ + STOP_TIMER("dquant") + } +} + +static void dequantize(SnowContext *s, SubBand *b, DWTELEM *src, int stride){ + const int w= b->width; + const int h= b->height; + const int qlog= clip(s->qlog + b->qlog, 0, QROOT*16); + const int qmul= qexp[qlog&(QROOT-1)]<<(qlog>>QSHIFT); + const int qadd= (s->qbias*qmul)>>QBIAS_SHIFT; + int x,y; + START_TIMER + + if(s->qlog == LOSSLESS_QLOG) return; + + for(y=0; y<h; y++){ + for(x=0; x<w; x++){ + int i= src[x + y*stride]; + if(i<0){ + src[x + y*stride]= -((-i*qmul + qadd)>>(QEXPSHIFT)); //FIXME try different bias + }else if(i>0){ + src[x + y*stride]= (( i*qmul + qadd)>>(QEXPSHIFT)); + } + } + } + if(w > 200 /*level+1 == s->spatial_decomposition_count*/){ + STOP_TIMER("dquant") + } +} + +static void decorrelate(SnowContext *s, SubBand *b, DWTELEM *src, int stride, int inverse, int use_median){ + const int w= b->width; + const int h= b->height; + int x,y; + + for(y=h-1; y>=0; y--){ + for(x=w-1; x>=0; x--){ + int i= x + y*stride; + + if(x){ + if(use_median){ + if(y && x+1<w) src[i] -= mid_pred(src[i - 1], src[i - stride], src[i - stride + 1]); + else src[i] -= src[i - 1]; + }else{ + if(y) src[i] -= mid_pred(src[i - 1], src[i - stride], src[i - 1] + src[i - stride] - src[i - 1 - stride]); + else src[i] -= src[i - 1]; + } + }else{ + if(y) src[i] -= src[i - stride]; + } + } + } +} + +static void correlate_buffered(SnowContext *s, slice_buffer * sb, SubBand *b, DWTELEM *src, int stride, int inverse, int use_median){ + const int w= b->width; + const int h= b->height; + int x,y; + +// START_TIMER + + DWTELEM * line; + DWTELEM * prev; + + for(y=0; y<h; y++){ + prev = line; +// line = slice_buffer_get_line_from_address(sb, src + (y * stride)); + line = slice_buffer_get_line(sb, (y * b->stride_line) + b->buf_y_offset) + b->buf_x_offset; + for(x=0; x<w; x++){ + if(x){ + if(use_median){ + if(y && x+1<w) line[x] += mid_pred(line[x - 1], prev[x], prev[x + 1]); + else line[x] += line[x - 1]; + }else{ + if(y) line[x] += mid_pred(line[x - 1], prev[x], line[x - 1] + prev[x] - prev[x - 1]); + else line[x] += line[x - 1]; + } + }else{ + if(y) line[x] += prev[x]; + } + } + } + +// STOP_TIMER("correlate") +} + +static void correlate(SnowContext *s, SubBand *b, DWTELEM *src, int stride, int inverse, int use_median){ + const int w= b->width; + const int h= b->height; + int x,y; + + for(y=0; y<h; y++){ + for(x=0; x<w; x++){ + int i= x + y*stride; + + if(x){ + if(use_median){ + if(y && x+1<w) src[i] += mid_pred(src[i - 1], src[i - stride], src[i - stride + 1]); + else src[i] += src[i - 1]; + }else{ + if(y) src[i] += mid_pred(src[i - 1], src[i - stride], src[i - 1] + src[i - stride] - src[i - 1 - stride]); + else src[i] += src[i - 1]; + } + }else{ + if(y) src[i] += src[i - stride]; + } + } + } +} + +static void encode_header(SnowContext *s){ + int plane_index, level, orientation; + uint8_t kstate[32]; + + memset(kstate, MID_STATE, sizeof(kstate)); + + put_rac(&s->c, kstate, s->keyframe); + if(s->keyframe || s->always_reset) + reset_contexts(s); + if(s->keyframe){ + put_symbol(&s->c, s->header_state, s->version, 0); + put_rac(&s->c, s->header_state, s->always_reset); + put_symbol(&s->c, s->header_state, s->temporal_decomposition_type, 0); + put_symbol(&s->c, s->header_state, s->temporal_decomposition_count, 0); + put_symbol(&s->c, s->header_state, s->spatial_decomposition_count, 0); + put_symbol(&s->c, s->header_state, s->colorspace_type, 0); + put_symbol(&s->c, s->header_state, s->chroma_h_shift, 0); + put_symbol(&s->c, s->header_state, s->chroma_v_shift, 0); + put_rac(&s->c, s->header_state, s->spatial_scalability); +// put_rac(&s->c, s->header_state, s->rate_scalability); + + for(plane_index=0; plane_index<2; plane_index++){ + for(level=0; level<s->spatial_decomposition_count; level++){ + for(orientation=level ? 1:0; orientation<4; orientation++){ + if(orientation==2) continue; + put_symbol(&s->c, s->header_state, s->plane[plane_index].band[level][orientation].qlog, 1); + } + } + } + } + put_symbol(&s->c, s->header_state, s->spatial_decomposition_type, 0); + put_symbol(&s->c, s->header_state, s->qlog, 1); + put_symbol(&s->c, s->header_state, s->mv_scale, 0); + put_symbol(&s->c, s->header_state, s->qbias, 1); + put_symbol(&s->c, s->header_state, s->block_max_depth, 0); +} + +static int decode_header(SnowContext *s){ + int plane_index, level, orientation; + uint8_t kstate[32]; + + memset(kstate, MID_STATE, sizeof(kstate)); + + s->keyframe= get_rac(&s->c, kstate); + if(s->keyframe || s->always_reset) + reset_contexts(s); + if(s->keyframe){ + s->version= get_symbol(&s->c, s->header_state, 0); + if(s->version>0){ + av_log(s->avctx, AV_LOG_ERROR, "version %d not supported", s->version); + return -1; + } + s->always_reset= get_rac(&s->c, s->header_state); + s->temporal_decomposition_type= get_symbol(&s->c, s->header_state, 0); + s->temporal_decomposition_count= get_symbol(&s->c, s->header_state, 0); + s->spatial_decomposition_count= get_symbol(&s->c, s->header_state, 0); + s->colorspace_type= get_symbol(&s->c, s->header_state, 0); + s->chroma_h_shift= get_symbol(&s->c, s->header_state, 0); + s->chroma_v_shift= get_symbol(&s->c, s->header_state, 0); + s->spatial_scalability= get_rac(&s->c, s->header_state); +// s->rate_scalability= get_rac(&s->c, s->header_state); + + for(plane_index=0; plane_index<3; plane_index++){ + for(level=0; level<s->spatial_decomposition_count; level++){ + for(orientation=level ? 1:0; orientation<4; orientation++){ + int q; + if (plane_index==2) q= s->plane[1].band[level][orientation].qlog; + else if(orientation==2) q= s->plane[plane_index].band[level][1].qlog; + else q= get_symbol(&s->c, s->header_state, 1); + s->plane[plane_index].band[level][orientation].qlog= q; + } + } + } + } + + s->spatial_decomposition_type= get_symbol(&s->c, s->header_state, 0); + if(s->spatial_decomposition_type > 2){ + av_log(s->avctx, AV_LOG_ERROR, "spatial_decomposition_type %d not supported", s->spatial_decomposition_type); + return -1; + } + + s->qlog= get_symbol(&s->c, s->header_state, 1); + s->mv_scale= get_symbol(&s->c, s->header_state, 0); + s->qbias= get_symbol(&s->c, s->header_state, 1); + s->block_max_depth= get_symbol(&s->c, s->header_state, 0); + + return 0; +} + +static void init_qexp(){ + int i; + double v=128; + + for(i=0; i<QROOT; i++){ + qexp[i]= lrintf(v); + v *= pow(2, 1.0 / QROOT); + } +} + +static int common_init(AVCodecContext *avctx){ + SnowContext *s = avctx->priv_data; + int width, height; + int level, orientation, plane_index, dec; + + s->avctx= avctx; + + dsputil_init(&s->dsp, avctx); + +#define mcf(dx,dy)\ + s->dsp.put_qpel_pixels_tab [0][dy+dx/4]=\ + s->dsp.put_no_rnd_qpel_pixels_tab[0][dy+dx/4]=\ + s->dsp.put_h264_qpel_pixels_tab[0][dy+dx/4];\ + s->dsp.put_qpel_pixels_tab [1][dy+dx/4]=\ + s->dsp.put_no_rnd_qpel_pixels_tab[1][dy+dx/4]=\ + s->dsp.put_h264_qpel_pixels_tab[1][dy+dx/4]; + + mcf( 0, 0) + mcf( 4, 0) + mcf( 8, 0) + mcf(12, 0) + mcf( 0, 4) + mcf( 4, 4) + mcf( 8, 4) + mcf(12, 4) + mcf( 0, 8) + mcf( 4, 8) + mcf( 8, 8) + mcf(12, 8) + mcf( 0,12) + mcf( 4,12) + mcf( 8,12) + mcf(12,12) + +#define mcfh(dx,dy)\ + s->dsp.put_pixels_tab [0][dy/4+dx/8]=\ + s->dsp.put_no_rnd_pixels_tab[0][dy/4+dx/8]=\ + mc_block_hpel ## dx ## dy ## 16;\ + s->dsp.put_pixels_tab [1][dy/4+dx/8]=\ + s->dsp.put_no_rnd_pixels_tab[1][dy/4+dx/8]=\ + mc_block_hpel ## dx ## dy ## 8; + + mcfh(0, 0) + mcfh(8, 0) + mcfh(0, 8) + mcfh(8, 8) + + if(!qexp[0]) + init_qexp(); + + dec= s->spatial_decomposition_count= 5; + s->spatial_decomposition_type= avctx->prediction_method; //FIXME add decorrelator type r transform_type + + s->chroma_h_shift= 1; //FIXME XXX + s->chroma_v_shift= 1; + +// dec += FFMAX(s->chroma_h_shift, s->chroma_v_shift); + + width= s->avctx->width; + height= s->avctx->height; + + s->spatial_dwt_buffer= av_mallocz(width*height*sizeof(DWTELEM)); + + s->mv_scale= (s->avctx->flags & CODEC_FLAG_QPEL) ? 2 : 4; + s->block_max_depth= (s->avctx->flags & CODEC_FLAG_4MV) ? 1 : 0; + + for(plane_index=0; plane_index<3; plane_index++){ + int w= s->avctx->width; + int h= s->avctx->height; + + if(plane_index){ + w>>= s->chroma_h_shift; + h>>= s->chroma_v_shift; + } + s->plane[plane_index].width = w; + s->plane[plane_index].height= h; +//av_log(NULL, AV_LOG_DEBUG, "%d %d\n", w, h); + for(level=s->spatial_decomposition_count-1; level>=0; level--){ + for(orientation=level ? 1 : 0; orientation<4; orientation++){ + SubBand *b= &s->plane[plane_index].band[level][orientation]; + + b->buf= s->spatial_dwt_buffer; + b->level= level; + b->stride= s->plane[plane_index].width << (s->spatial_decomposition_count - level); + b->width = (w + !(orientation&1))>>1; + b->height= (h + !(orientation>1))>>1; + + b->stride_line = 1 << (s->spatial_decomposition_count - level); + b->buf_x_offset = 0; + b->buf_y_offset = 0; + + if(orientation&1){ + b->buf += (w+1)>>1; + b->buf_x_offset = (w+1)>>1; + } + if(orientation>1){ + b->buf += b->stride>>1; + b->buf_y_offset = b->stride_line >> 1; + } + + if(level) + b->parent= &s->plane[plane_index].band[level-1][orientation]; + b->x_coeff=av_mallocz(((b->width+1) * b->height+1)*sizeof(x_and_coeff)); + } + w= (w+1)>>1; + h= (h+1)>>1; + } + } + + reset_contexts(s); +/* + width= s->width= avctx->width; + height= s->height= avctx->height; + + assert(width && height); +*/ + s->avctx->get_buffer(s->avctx, &s->mconly_picture); + + return 0; +} + + +static void calculate_vissual_weight(SnowContext *s, Plane *p){ + int width = p->width; + int height= p->height; + int level, orientation, x, y; + + for(level=0; level<s->spatial_decomposition_count; level++){ + for(orientation=level ? 1 : 0; orientation<4; orientation++){ + SubBand *b= &p->band[level][orientation]; + DWTELEM *buf= b->buf; + int64_t error=0; + + memset(s->spatial_dwt_buffer, 0, sizeof(int)*width*height); + buf[b->width/2 + b->height/2*b->stride]= 256*256; + ff_spatial_idwt(s->spatial_dwt_buffer, width, height, width, s->spatial_decomposition_type, s->spatial_decomposition_count); + for(y=0; y<height; y++){ + for(x=0; x<width; x++){ + int64_t d= s->spatial_dwt_buffer[x + y*width]; + error += d*d; + } + } + + b->qlog= (int)(log(352256.0/sqrt(error)) / log(pow(2.0, 1.0/QROOT))+0.5); +// av_log(NULL, AV_LOG_DEBUG, "%d %d %d\n", level, orientation, b->qlog/*, sqrt(error)*/); + } + } +} + +static int encode_init(AVCodecContext *avctx) +{ + SnowContext *s = avctx->priv_data; + int plane_index; + + if(avctx->strict_std_compliance >= 0){ + av_log(avctx, AV_LOG_ERROR, "this codec is under development, files encoded with it wont be decodeable with future versions!!!\n" + "use vstrict=-1 / -strict -1 to use it anyway\n"); + return -1; + } + + common_init(avctx); + alloc_blocks(s); + + s->version=0; + + s->m.avctx = avctx; + s->m.flags = avctx->flags; + s->m.bit_rate= avctx->bit_rate; + + s->m.me.scratchpad= av_mallocz((avctx->width+64)*2*16*2*sizeof(uint8_t)); + s->m.me.map = av_mallocz(ME_MAP_SIZE*sizeof(uint32_t)); + s->m.me.score_map = av_mallocz(ME_MAP_SIZE*sizeof(uint32_t)); + h263_encode_init(&s->m); //mv_penalty + + if(avctx->flags&CODEC_FLAG_PASS1){ + if(!avctx->stats_out) + avctx->stats_out = av_mallocz(256); + } + if(avctx->flags&CODEC_FLAG_PASS2){ + if(ff_rate_control_init(&s->m) < 0) + return -1; + } + + for(plane_index=0; plane_index<3; plane_index++){ + calculate_vissual_weight(s, &s->plane[plane_index]); + } + + + avctx->coded_frame= &s->current_picture; + switch(avctx->pix_fmt){ +// case PIX_FMT_YUV444P: +// case PIX_FMT_YUV422P: + case PIX_FMT_YUV420P: + case PIX_FMT_GRAY8: +// case PIX_FMT_YUV411P: +// case PIX_FMT_YUV410P: + s->colorspace_type= 0; + break; +/* case PIX_FMT_RGBA32: + s->colorspace= 1; + break;*/ + default: + av_log(avctx, AV_LOG_ERROR, "format not supported\n"); + return -1; + } +// avcodec_get_chroma_sub_sample(avctx->pix_fmt, &s->chroma_h_shift, &s->chroma_v_shift); + s->chroma_h_shift= 1; + s->chroma_v_shift= 1; + return 0; +} + +static int frame_start(SnowContext *s){ + AVFrame tmp; + int w= s->avctx->width; //FIXME round up to x16 ? + int h= s->avctx->height; + + if(s->current_picture.data[0]){ + draw_edges(s->current_picture.data[0], s->current_picture.linesize[0], w , h , EDGE_WIDTH ); + draw_edges(s->current_picture.data[1], s->current_picture.linesize[1], w>>1, h>>1, EDGE_WIDTH/2); + draw_edges(s->current_picture.data[2], s->current_picture.linesize[2], w>>1, h>>1, EDGE_WIDTH/2); + } + + tmp= s->last_picture; + s->last_picture= s->current_picture; + s->current_picture= tmp; + + s->current_picture.reference= 1; + if(s->avctx->get_buffer(s->avctx, &s->current_picture) < 0){ + av_log(s->avctx, AV_LOG_ERROR, "get_buffer() failed\n"); + return -1; + } + + return 0; +} + +static int encode_frame(AVCodecContext *avctx, unsigned char *buf, int buf_size, void *data){ + SnowContext *s = avctx->priv_data; + RangeCoder * const c= &s->c; + AVFrame *pict = data; + const int width= s->avctx->width; + const int height= s->avctx->height; + int level, orientation, plane_index; + + ff_init_range_encoder(c, buf, buf_size); + ff_build_rac_states(c, 0.05*(1LL<<32), 256-8); + + s->input_picture = *pict; + + if(avctx->flags&CODEC_FLAG_PASS2){ + s->m.pict_type = + pict->pict_type= s->m.rc_context.entry[avctx->frame_number].new_pict_type; + s->keyframe= pict->pict_type==FF_I_TYPE; + s->m.picture_number= avctx->frame_number; + pict->quality= ff_rate_estimate_qscale(&s->m); + }else{ + s->keyframe= avctx->gop_size==0 || avctx->frame_number % avctx->gop_size == 0; + pict->pict_type= s->keyframe ? FF_I_TYPE : FF_P_TYPE; + } + + if(pict->quality){ + s->qlog= rint(QROOT*log(pict->quality / (float)FF_QP2LAMBDA)/log(2)); + //<64 >60 + s->qlog += 61*QROOT/8; + }else{ + s->qlog= LOSSLESS_QLOG; + } + + frame_start(s); + s->current_picture.key_frame= s->keyframe; + + s->m.current_picture_ptr= &s->m.current_picture; + if(pict->pict_type == P_TYPE){ + int block_width = (width +15)>>4; + int block_height= (height+15)>>4; + int stride= s->current_picture.linesize[0]; + + assert(s->current_picture.data[0]); + assert(s->last_picture.data[0]); + + s->m.avctx= s->avctx; + s->m.current_picture.data[0]= s->current_picture.data[0]; + s->m. last_picture.data[0]= s-> last_picture.data[0]; + s->m. new_picture.data[0]= s-> input_picture.data[0]; + s->m. last_picture_ptr= &s->m. last_picture; + s->m.linesize= + s->m. last_picture.linesize[0]= + s->m. new_picture.linesize[0]= + s->m.current_picture.linesize[0]= stride; + s->m.uvlinesize= s->current_picture.linesize[1]; + s->m.width = width; + s->m.height= height; + s->m.mb_width = block_width; + s->m.mb_height= block_height; + s->m.mb_stride= s->m.mb_width+1; + s->m.b8_stride= 2*s->m.mb_width+1; + s->m.f_code=1; + s->m.pict_type= pict->pict_type; + s->m.me_method= s->avctx->me_method; + s->m.me.scene_change_score=0; + s->m.flags= s->avctx->flags; + s->m.quarter_sample= (s->avctx->flags & CODEC_FLAG_QPEL)!=0; + s->m.out_format= FMT_H263; + s->m.unrestricted_mv= 1; + + s->lambda = s->m.lambda= pict->quality * 3/2; //FIXME bug somewhere else + s->m.qscale= (s->m.lambda*139 + FF_LAMBDA_SCALE*64) >> (FF_LAMBDA_SHIFT + 7); + s->lambda2= s->m.lambda2= (s->m.lambda*s->m.lambda + FF_LAMBDA_SCALE/2) >> FF_LAMBDA_SHIFT; + + s->m.dsp= s->dsp; //move + ff_init_me(&s->m); + } + +redo_frame: + + s->qbias= pict->pict_type == P_TYPE ? 2 : 0; + + encode_header(s); + s->m.misc_bits = 8*(s->c.bytestream - s->c.bytestream_start); + encode_blocks(s); + s->m.mv_bits = 8*(s->c.bytestream - s->c.bytestream_start) - s->m.misc_bits; + + for(plane_index=0; plane_index<3; plane_index++){ + Plane *p= &s->plane[plane_index]; + int w= p->width; + int h= p->height; + int x, y; +// int bits= put_bits_count(&s->c.pb); + + //FIXME optimize + if(pict->data[plane_index]) //FIXME gray hack + for(y=0; y<h; y++){ + for(x=0; x<w; x++){ + s->spatial_dwt_buffer[y*w + x]= pict->data[plane_index][y*pict->linesize[plane_index] + x]<<FRAC_BITS; + } + } + predict_plane(s, s->spatial_dwt_buffer, plane_index, 0); + + if( plane_index==0 + && pict->pict_type == P_TYPE + && s->m.me.scene_change_score > s->avctx->scenechange_threshold){ + ff_init_range_encoder(c, buf, buf_size); + ff_build_rac_states(c, 0.05*(1LL<<32), 256-8); + pict->pict_type= FF_I_TYPE; + s->keyframe=1; + reset_contexts(s); + goto redo_frame; + } + + if(s->qlog == LOSSLESS_QLOG){ + for(y=0; y<h; y++){ + for(x=0; x<w; x++){ + s->spatial_dwt_buffer[y*w + x]= (s->spatial_dwt_buffer[y*w + x] + (1<<(FRAC_BITS-1))-1)>>FRAC_BITS; + } + } + } + + ff_spatial_dwt(s->spatial_dwt_buffer, w, h, w, s->spatial_decomposition_type, s->spatial_decomposition_count); + + for(level=0; level<s->spatial_decomposition_count; level++){ + for(orientation=level ? 1 : 0; orientation<4; orientation++){ + SubBand *b= &p->band[level][orientation]; + + quantize(s, b, b->buf, b->stride, s->qbias); + if(orientation==0) + decorrelate(s, b, b->buf, b->stride, pict->pict_type == P_TYPE, 0); + encode_subband(s, b, b->buf, b->parent ? b->parent->buf : NULL, b->stride, orientation); + assert(b->parent==NULL || b->parent->stride == b->stride*2); + if(orientation==0) + correlate(s, b, b->buf, b->stride, 1, 0); + } + } +// av_log(NULL, AV_LOG_DEBUG, "plane:%d bits:%d\n", plane_index, put_bits_count(&s->c.pb) - bits); + + for(level=0; level<s->spatial_decomposition_count; level++){ + for(orientation=level ? 1 : 0; orientation<4; orientation++){ + SubBand *b= &p->band[level][orientation]; + + dequantize(s, b, b->buf, b->stride); + } + } + + ff_spatial_idwt(s->spatial_dwt_buffer, w, h, w, s->spatial_decomposition_type, s->spatial_decomposition_count); + if(s->qlog == LOSSLESS_QLOG){ + for(y=0; y<h; y++){ + for(x=0; x<w; x++){ + s->spatial_dwt_buffer[y*w + x]<<=FRAC_BITS; + } + } + } +{START_TIMER + predict_plane(s, s->spatial_dwt_buffer, plane_index, 1); +STOP_TIMER("pred-conv")} + if(s->avctx->flags&CODEC_FLAG_PSNR){ + int64_t error= 0; + + if(pict->data[plane_index]) //FIXME gray hack + for(y=0; y<h; y++){ + for(x=0; x<w; x++){ + int d= s->current_picture.data[plane_index][y*s->current_picture.linesize[plane_index] + x] - pict->data[plane_index][y*pict->linesize[plane_index] + x]; + error += d*d; + } + } + s->avctx->error[plane_index] += error; + s->current_picture.error[plane_index] = error; + } + } + + if(s->last_picture.data[0]) + avctx->release_buffer(avctx, &s->last_picture); + + s->current_picture.coded_picture_number = avctx->frame_number; + s->current_picture.pict_type = pict->pict_type; + s->current_picture.quality = pict->quality; + if(avctx->flags&CODEC_FLAG_PASS1){ + s->m.p_tex_bits = 8*(s->c.bytestream - s->c.bytestream_start) - s->m.misc_bits - s->m.mv_bits; + s->m.current_picture.display_picture_number = + s->m.current_picture.coded_picture_number = avctx->frame_number; + s->m.pict_type = pict->pict_type; + s->m.current_picture.quality = pict->quality; + ff_write_pass1_stats(&s->m); + } + if(avctx->flags&CODEC_FLAG_PASS2){ + s->m.total_bits += 8*(s->c.bytestream - s->c.bytestream_start); + } + + emms_c(); + + return ff_rac_terminate(c); +} + +static void common_end(SnowContext *s){ + int plane_index, level, orientation; + + av_freep(&s->spatial_dwt_buffer); + + av_freep(&s->m.me.scratchpad); + av_freep(&s->m.me.map); + av_freep(&s->m.me.score_map); + + av_freep(&s->block); + + for(plane_index=0; plane_index<3; plane_index++){ + for(level=s->spatial_decomposition_count-1; level>=0; level--){ + for(orientation=level ? 1 : 0; orientation<4; orientation++){ + SubBand *b= &s->plane[plane_index].band[level][orientation]; + + av_freep(&b->x_coeff); + } + } + } +} + +static int encode_end(AVCodecContext *avctx) +{ + SnowContext *s = avctx->priv_data; + + common_end(s); + av_free(avctx->stats_out); + + return 0; +} + +static int decode_init(AVCodecContext *avctx) +{ + SnowContext *s = avctx->priv_data; + int block_size; + + common_init(avctx); + + block_size = MB_SIZE >> s->block_max_depth; + /* FIXME block_size * 2 is determined empirically. block_size * 1.5 is definitely needed, but I (Robert) cannot figure out why more than that is needed. Perhaps there is a bug, or perhaps I overlooked some demands that are placed on the buffer. */ + /* FIXME The formula is WRONG. For height > 480, the buffer will overflow. */ + /* FIXME For now, I will use a full frame of lines. Fortunately, this should not materially effect cache performance because lines are allocated using a stack, so if in fact only 50 out of 496 lines are needed at a time, the other 446 will sit allocated but never accessed. */ +// slice_buffer_init(s->plane[0].sb, s->plane[0].height, (block_size * 2) + (s->spatial_decomposition_count * s->spatial_decomposition_count), s->plane[0].width, s->spatial_dwt_buffer); + slice_buffer_init(&s->sb, s->plane[0].height, s->plane[0].height, s->plane[0].width, s->spatial_dwt_buffer); + + return 0; +} + +static int decode_frame(AVCodecContext *avctx, void *data, int *data_size, uint8_t *buf, int buf_size){ + SnowContext *s = avctx->priv_data; + RangeCoder * const c= &s->c; + int bytes_read; + AVFrame *picture = data; + int level, orientation, plane_index; + + ff_init_range_decoder(c, buf, buf_size); + ff_build_rac_states(c, 0.05*(1LL<<32), 256-8); + + s->current_picture.pict_type= FF_I_TYPE; //FIXME I vs. P + decode_header(s); + if(!s->block) alloc_blocks(s); + + frame_start(s); + //keyframe flag dupliaction mess FIXME + if(avctx->debug&FF_DEBUG_PICT_INFO) + av_log(avctx, AV_LOG_ERROR, "keyframe:%d qlog:%d\n", s->keyframe, s->qlog); + + decode_blocks(s); + + for(plane_index=0; plane_index<3; plane_index++){ + Plane *p= &s->plane[plane_index]; + int w= p->width; + int h= p->height; + int x, y; + int decode_state[MAX_DECOMPOSITIONS][4][1]; /* Stored state info for unpack_coeffs. 1 variable per instance. */ + SubBand * correlate_band; + +if(s->avctx->debug&2048){ + memset(s->spatial_dwt_buffer, 0, sizeof(DWTELEM)*w*h); + predict_plane(s, s->spatial_dwt_buffer, plane_index, 1); + + for(y=0; y<h; y++){ + for(x=0; x<w; x++){ + int v= s->current_picture.data[plane_index][y*s->current_picture.linesize[plane_index] + x]; + s->mconly_picture.data[plane_index][y*s->mconly_picture.linesize[plane_index] + x]= v; + } + } +} + +{ START_TIMER + for(level=0; level<s->spatial_decomposition_count; level++){ + for(orientation=level ? 1 : 0; orientation<4; orientation++){ + SubBand *b= &p->band[level][orientation]; + unpack_coeffs(s, b, b->parent, orientation); + } + } + STOP_TIMER("unpack coeffs"); +} + + /* Handle level 0, orientation 0 specially. It is particularly resistant to slicing but fortunately quite small, so process it in one pass. */ + correlate_band = &p->band[0][0]; + decode_subband_slice_buffered(s, correlate_band, &s->sb, 0, correlate_band->height, decode_state[0][0]); + correlate_buffered(s, &s->sb, correlate_band, correlate_band->buf, correlate_band->stride, 1, 0); + dequantize_buffered(s, &s->sb, correlate_band, correlate_band->buf, correlate_band->stride); + +{START_TIMER + const int mb_h= s->b_height << s->block_max_depth; + const int block_size = MB_SIZE >> s->block_max_depth; + const int block_w = plane_index ? block_size/2 : block_size; + int mb_y; + dwt_compose_t cs[MAX_DECOMPOSITIONS]; + int yd=0, yq=0; + int y; + int end_y; + + ff_spatial_idwt_buffered_init(cs, &s->sb, w, h, 1, s->spatial_decomposition_type, s->spatial_decomposition_count); + for(mb_y=0; mb_y<=mb_h; mb_y++){ + + const int slice_starty = block_w*mb_y; + const int slice_h = block_w*(mb_y+1); + + { + START_TIMER + for(level=0; level<s->spatial_decomposition_count; level++){ + for(orientation=level ? 1 : 1; orientation<4; orientation++){ + SubBand *b= &p->band[level][orientation]; + int start_y; + int end_y; + int our_mb_start = mb_y; + int our_mb_end = (mb_y + 1); + start_y = FFMIN(b->height, (mb_y ? ((block_w * our_mb_start - 4) >> (s->spatial_decomposition_count - level)) + 5 : 0)); + end_y = FFMIN(b->height, (((block_w * our_mb_end - 4) >> (s->spatial_decomposition_count - level)) + 5)); + + if (start_y != end_y) + decode_subband_slice_buffered(s, b, &s->sb, start_y, end_y, decode_state[level][orientation]); + } + } + STOP_TIMER("decode_subband_slice"); + } + +{ START_TIMER + for(; yd<slice_h; yd+=4){ + ff_spatial_idwt_buffered_slice(cs, &s->sb, w, h, 1, s->spatial_decomposition_type, s->spatial_decomposition_count, yd); + } + STOP_TIMER("idwt slice");} + + + if(s->qlog == LOSSLESS_QLOG){ + for(; yq<slice_h && yq<h; yq++){ + DWTELEM * line = slice_buffer_get_line(&s->sb, yq); + for(x=0; x<w; x++){ + line[x] <<= FRAC_BITS; + } + } + } + + predict_slice_buffered(s, &s->sb, s->spatial_dwt_buffer, plane_index, 1, mb_y); + + /* Nasty hack based empirically on how predict_slice_buffered() hits the buffer. */ + /* FIXME If possible, make predict_slice fit into the slice. As of now, it works on some previous lines (up to slice_height / 2) if the condition on the next line is false. */ + if (s->keyframe || (s->avctx->debug&512)){ + y = FFMIN(p->height, slice_starty); + end_y = FFMIN(p->height, slice_h); + } + else{ + y = FFMAX(0, FFMIN(p->height, slice_starty - (block_w >> 1))); + end_y = FFMAX(0, FFMIN(p->height, slice_h - (block_w >> 1))); + } + while(y < end_y) + slice_buffer_release(&s->sb, y++); + } + + slice_buffer_flush(&s->sb); + +STOP_TIMER("idwt + predict_slices")} + } + + emms_c(); + + if(s->last_picture.data[0]) + avctx->release_buffer(avctx, &s->last_picture); + +if(!(s->avctx->debug&2048)) + *picture= s->current_picture; +else + *picture= s->mconly_picture; + + *data_size = sizeof(AVFrame); + + bytes_read= c->bytestream - c->bytestream_start; + if(bytes_read ==0) av_log(s->avctx, AV_LOG_ERROR, "error at end of frame\n"); //FIXME + + return bytes_read; +} + +static int decode_end(AVCodecContext *avctx) +{ + SnowContext *s = avctx->priv_data; + + slice_buffer_destroy(&s->sb); + + common_end(s); + + return 0; +} + +AVCodec snow_decoder = { + "snow", + CODEC_TYPE_VIDEO, + CODEC_ID_SNOW, + sizeof(SnowContext), + decode_init, + NULL, + decode_end, + decode_frame, + 0 /*CODEC_CAP_DR1*/ /*| CODEC_CAP_DRAW_HORIZ_BAND*/, + NULL +}; + +#ifdef CONFIG_ENCODERS +AVCodec snow_encoder = { + "snow", + CODEC_TYPE_VIDEO, + CODEC_ID_SNOW, + sizeof(SnowContext), + encode_init, + encode_frame, + encode_end, +}; +#endif + + +#if 0 +#undef malloc +#undef free +#undef printf + +int main(){ + int width=256; + int height=256; + int buffer[2][width*height]; + SnowContext s; + int i; + s.spatial_decomposition_count=6; + s.spatial_decomposition_type=1; + + printf("testing 5/3 DWT\n"); + for(i=0; i<width*height; i++) + buffer[0][i]= buffer[1][i]= random()%54321 - 12345; + + ff_spatial_dwt(buffer[0], width, height, width, s->spatial_decomposition_type, s->spatial_decomposition_count); + ff_spatial_idwt(buffer[0], width, height, width, s->spatial_decomposition_type, s->spatial_decomposition_count); + + for(i=0; i<width*height; i++) + if(buffer[0][i]!= buffer[1][i]) printf("fsck: %d %d %d\n",i, buffer[0][i], buffer[1][i]); + + printf("testing 9/7 DWT\n"); + s.spatial_decomposition_type=0; + for(i=0; i<width*height; i++) + buffer[0][i]= buffer[1][i]= random()%54321 - 12345; + + ff_spatial_dwt(buffer[0], width, height, width, s->spatial_decomposition_type, s->spatial_decomposition_count); + ff_spatial_idwt(buffer[0], width, height, width, s->spatial_decomposition_type, s->spatial_decomposition_count); + + for(i=0; i<width*height; i++) + if(buffer[0][i]!= buffer[1][i]) printf("fsck: %d %d %d\n",i, buffer[0][i], buffer[1][i]); + + printf("testing AC coder\n"); + memset(s.header_state, 0, sizeof(s.header_state)); + ff_init_range_encoder(&s.c, buffer[0], 256*256); + ff_init_cabac_states(&s.c, ff_h264_lps_range, ff_h264_mps_state, ff_h264_lps_state, 64); + + for(i=-256; i<256; i++){ +START_TIMER + put_symbol(&s.c, s.header_state, i*i*i/3*ABS(i), 1); +STOP_TIMER("put_symbol") + } + ff_rac_terminate(&s.c); + + memset(s.header_state, 0, sizeof(s.header_state)); + ff_init_range_decoder(&s.c, buffer[0], 256*256); + ff_init_cabac_states(&s.c, ff_h264_lps_range, ff_h264_mps_state, ff_h264_lps_state, 64); + + for(i=-256; i<256; i++){ + int j; +START_TIMER + j= get_symbol(&s.c, s.header_state, 1); +STOP_TIMER("get_symbol") + if(j!=i*i*i/3*ABS(i)) printf("fsck: %d != %d\n", i, j); + } +{ +int level, orientation, x, y; +int64_t errors[8][4]; +int64_t g=0; + + memset(errors, 0, sizeof(errors)); + s.spatial_decomposition_count=3; + s.spatial_decomposition_type=0; + for(level=0; level<s.spatial_decomposition_count; level++){ + for(orientation=level ? 1 : 0; orientation<4; orientation++){ + int w= width >> (s.spatial_decomposition_count-level); + int h= height >> (s.spatial_decomposition_count-level); + int stride= width << (s.spatial_decomposition_count-level); + DWTELEM *buf= buffer[0]; + int64_t error=0; + + if(orientation&1) buf+=w; + if(orientation>1) buf+=stride>>1; + + memset(buffer[0], 0, sizeof(int)*width*height); + buf[w/2 + h/2*stride]= 256*256; + ff_spatial_idwt(buffer[0], width, height, width, s->spatial_decomposition_type, s->spatial_decomposition_count); + for(y=0; y<height; y++){ + for(x=0; x<width; x++){ + int64_t d= buffer[0][x + y*width]; + error += d*d; + if(ABS(width/2-x)<9 && ABS(height/2-y)<9 && level==2) printf("%8lld ", d); + } + if(ABS(height/2-y)<9 && level==2) printf("\n"); + } + error= (int)(sqrt(error)+0.5); + errors[level][orientation]= error; + if(g) g=ff_gcd(g, error); + else g= error; + } + } + printf("static int const visual_weight[][4]={\n"); + for(level=0; level<s.spatial_decomposition_count; level++){ + printf(" {"); + for(orientation=0; orientation<4; orientation++){ + printf("%8lld,", errors[level][orientation]/g); + } + printf("},\n"); + } + printf("};\n"); + { + int level=2; + int orientation=3; + int w= width >> (s.spatial_decomposition_count-level); + int h= height >> (s.spatial_decomposition_count-level); + int stride= width << (s.spatial_decomposition_count-level); + DWTELEM *buf= buffer[0]; + int64_t error=0; + + buf+=w; + buf+=stride>>1; + + memset(buffer[0], 0, sizeof(int)*width*height); +#if 1 + for(y=0; y<height; y++){ + for(x=0; x<width; x++){ + int tab[4]={0,2,3,1}; + buffer[0][x+width*y]= 256*256*tab[(x&1) + 2*(y&1)]; + } + } + ff_spatial_dwt(buffer[0], width, height, width, s->spatial_decomposition_type, s->spatial_decomposition_count); +#else + for(y=0; y<h; y++){ + for(x=0; x<w; x++){ + buf[x + y*stride ]=169; + buf[x + y*stride-w]=64; + } + } + ff_spatial_idwt(buffer[0], width, height, width, s->spatial_decomposition_type, s->spatial_decomposition_count); +#endif + for(y=0; y<height; y++){ + for(x=0; x<width; x++){ + int64_t d= buffer[0][x + y*width]; + error += d*d; + if(ABS(width/2-x)<9 && ABS(height/2-y)<9) printf("%8lld ", d); + } + if(ABS(height/2-y)<9) printf("\n"); + } + } + +} + return 0; +} +#endif + diff --git a/src/libffmpeg/libavcodec/sparc/dsputil_vis.c b/src/libffmpeg/libavcodec/sparc/dsputil_vis.c index e5feff27f..53f38b2aa 100644 --- a/src/libffmpeg/libavcodec/sparc/dsputil_vis.c +++ b/src/libffmpeg/libavcodec/sparc/dsputil_vis.c @@ -26,7 +26,7 @@ #include "config.h" -#if defined(ARCH_SPARC) && defined(ENABLE_VIS) +#ifdef ARCH_SPARC #include <inttypes.h> #include <signal.h> @@ -3986,21 +3986,6 @@ static void MC_avg_no_round_xy_8_vis (uint8_t * dest, const uint8_t * _ref, /* End of no rounding code */ -void get_pixels_vis(uint8_t *restrict dest, const uint8_t *_ref, int stride) -{ - int i; - uint8_t *ref = (uint8_t*)_ref; - ref = vis_alignaddr(ref); - - for (i = 0; i < 8; i++) - { - vis_ld64(ref[0], TMP0); - vis_st64(TMP0, dest[0]); - dest += 8; - ref += stride; - } -} - static sigjmp_buf jmpbuf; static volatile sig_atomic_t canjump = 0; @@ -4010,7 +3995,7 @@ static void sigill_handler (int sig) signal (sig, SIG_DFL); raise (sig); } - + canjump = 0; siglongjmp (jmpbuf, 1); } @@ -4032,23 +4017,23 @@ static int vis_level () /* pdist %f0, %f0, %f0 */ __asm__ __volatile__(".word\t0x81b007c0"); - + canjump = 0; accel |= ACCEL_SPARC_VIS; - + if (sigsetjmp (jmpbuf, 1)) { signal (SIGILL, SIG_DFL); return accel; } - + canjump = 1; - + /* edge8n %g0, %g0, %g0 */ __asm__ __volatile__(".word\t0x81b00020"); - + canjump = 0; accel |= ACCEL_SPARC_VIS2; - + signal (SIGILL, SIG_DFL); return accel; @@ -4061,7 +4046,6 @@ void dsputil_init_vis(DSPContext* c, AVCodecContext *avctx) int accel = vis_level (); if (accel & ACCEL_SPARC_VIS) { - c->get_pixels = get_pixels_vis; c->put_pixels_tab[0][0] = MC_put_o_16_vis; c->put_pixels_tab[0][1] = MC_put_x_16_vis; c->put_pixels_tab[0][2] = MC_put_y_16_vis; @@ -4104,4 +4088,4 @@ void dsputil_init_vis(DSPContext* c, AVCodecContext *avctx) } } -#endif /* defined(ARCH_SPARC) && defined(ENABLE_VIS) */ +#endif /* !(ARCH_SPARC) */ diff --git a/src/libffmpeg/libavcodec/svq1.c b/src/libffmpeg/libavcodec/svq1.c index 25bc44fd1..068dd51d3 100644 --- a/src/libffmpeg/libavcodec/svq1.c +++ b/src/libffmpeg/libavcodec/svq1.c @@ -600,6 +600,7 @@ static uint16_t svq1_packet_checksum (uint8_t *data, int length, int value) { return value; } +#if 0 /* unused, remove? */ static uint16_t svq1_component_checksum (uint16_t *pixels, int pitch, int width, int height, int value) { int x, y; @@ -614,6 +615,7 @@ static uint16_t svq1_component_checksum (uint16_t *pixels, int pitch, return value; } +#endif static void svq1_parse_string (GetBitContext *bitbuf, uint8_t *out) { uint8_t seed; @@ -713,10 +715,6 @@ static int svq1_decode_frame(AVCodecContext *avctx, int result, i, x, y, width, height; AVFrame *pict = data; - if(buf==NULL && buf_size==0){ - return 0; - } - /* initialize bit buffer */ init_get_bits(&s->gb,buf,buf_size*8); @@ -844,28 +842,28 @@ static int svq1_decode_init(AVCodecContext *avctx) init_vlc(&svq1_block_type, 2, 4, &svq1_block_type_vlc[0][1], 2, 1, - &svq1_block_type_vlc[0][0], 2, 1); + &svq1_block_type_vlc[0][0], 2, 1, 1); init_vlc(&svq1_motion_component, 7, 33, &mvtab[0][1], 2, 1, - &mvtab[0][0], 2, 1); + &mvtab[0][0], 2, 1, 1); for (i = 0; i < 6; i++) { init_vlc(&svq1_intra_multistage[i], 3, 8, &svq1_intra_multistage_vlc[i][0][1], 2, 1, - &svq1_intra_multistage_vlc[i][0][0], 2, 1); + &svq1_intra_multistage_vlc[i][0][0], 2, 1, 1); init_vlc(&svq1_inter_multistage[i], 3, 8, &svq1_inter_multistage_vlc[i][0][1], 2, 1, - &svq1_inter_multistage_vlc[i][0][0], 2, 1); + &svq1_inter_multistage_vlc[i][0][0], 2, 1, 1); } init_vlc(&svq1_intra_mean, 8, 256, &svq1_intra_mean_vlc[0][1], 4, 2, - &svq1_intra_mean_vlc[0][0], 4, 2); + &svq1_intra_mean_vlc[0][0], 4, 2, 1); init_vlc(&svq1_inter_mean, 9, 512, &svq1_inter_mean_vlc[0][1], 4, 2, - &svq1_inter_mean_vlc[0][0], 4, 2); + &svq1_inter_mean_vlc[0][0], 4, 2, 1); return 0; } @@ -880,6 +878,8 @@ static int svq1_decode_end(AVCodecContext *avctx) static void svq1_write_header(SVQ1Context *s, int frame_type) { + int i; + /* frame code */ put_bits(&s->pb, 22, 0x20); @@ -898,12 +898,22 @@ static void svq1_write_header(SVQ1Context *s, int frame_type) /* output 5 unknown bits (2 + 2 + 1) */ put_bits(&s->pb, 5, 0); - /* forget about matching up resolutions, just use the free-form - * resolution code (7) for now */ - put_bits(&s->pb, 3, 7); - put_bits(&s->pb, 12, s->frame_width); - put_bits(&s->pb, 12, s->frame_height); - + for (i = 0; i < 7; i++) + { + if ((svq1_frame_size_table[i].width == s->frame_width) && + (svq1_frame_size_table[i].height == s->frame_height)) + { + put_bits(&s->pb, 3, i); + break; + } + } + + if (i == 7) + { + put_bits(&s->pb, 3, 7); + put_bits(&s->pb, 12, s->frame_width); + put_bits(&s->pb, 12, s->frame_height); + } } /* no checksum or extra data (next 2 bits get 0) */ @@ -1069,7 +1079,7 @@ static int encode_block(SVQ1Context *s, uint8_t *src, uint8_t *ref, uint8_t *dec #ifdef CONFIG_ENCODERS -static void svq1_encode_plane(SVQ1Context *s, int plane, unsigned char *src_plane, unsigned char *ref_plane, unsigned char *decoded_plane, +static int svq1_encode_plane(SVQ1Context *s, int plane, unsigned char *src_plane, unsigned char *ref_plane, unsigned char *decoded_plane, int width, int height, int src_stride, int stride) { int x, y; @@ -1108,10 +1118,10 @@ static void svq1_encode_plane(SVQ1Context *s, int plane, unsigned char *src_plan s->m.me_method= s->avctx->me_method; if(!s->motion_val8[plane]){ - s->motion_val8 [plane]= av_mallocz(s->m.b8_stride*block_height*2*2*sizeof(int16_t)); - s->motion_val16[plane]= av_mallocz(s->m.mb_stride*block_height*2*sizeof(int16_t)); + s->motion_val8 [plane]= av_mallocz((s->m.b8_stride*block_height*2 + 2)*2*sizeof(int16_t)); + s->motion_val16[plane]= av_mallocz((s->m.mb_stride*(block_height + 2) + 1)*2*sizeof(int16_t)); } - + s->m.mb_type= s->mb_type; //dummies, to avoid segfaults @@ -1120,8 +1130,8 @@ static void svq1_encode_plane(SVQ1Context *s, int plane, unsigned char *src_plan s->m.current_picture.mc_mb_var= (uint16_t*)s->dummy; s->m.current_picture.mb_type= s->dummy; - s->m.current_picture.motion_val[0]= s->motion_val8[plane]; - s->m.p_mv_table= s->motion_val16[plane]; + s->m.current_picture.motion_val[0]= s->motion_val8[plane] + 2; + s->m.p_mv_table= s->motion_val16[plane] + s->m.mb_stride + 1; s->m.dsp= s->dsp; //move ff_init_me(&s->m); @@ -1176,6 +1186,11 @@ static void svq1_encode_plane(SVQ1Context *s, int plane, unsigned char *src_plan uint8_t *ref= ref_plane + offset; int score[4]={0,0,0,0}, best; uint8_t temp[16*stride]; + + if(s->pb.buf_end - s->pb.buf - (put_bits_count(&s->pb)>>3) < 3000){ //FIXME check size + av_log(s->avctx, AV_LOG_ERROR, "encoded frame too large\n"); + return -1; + } s->m.mb_x= x; ff_init_block_index(&s->m); @@ -1268,6 +1283,7 @@ static void svq1_encode_plane(SVQ1Context *s, int plane, unsigned char *src_plan } s->m.first_slice_line=0; } + return 0; } static int svq1_encode_init(AVCodecContext *avctx) @@ -1287,6 +1303,7 @@ static int svq1_encode_init(AVCodecContext *avctx) s->c_block_height = (s->frame_height / 4 + 15) / 16; s->avctx= avctx; + s->m.avctx= avctx; s->m.me.scratchpad= av_mallocz((avctx->width+64)*2*16*2*sizeof(uint8_t)); s->m.me.map = av_mallocz(ME_MAP_SIZE*sizeof(uint32_t)); s->m.me.score_map = av_mallocz(ME_MAP_SIZE*sizeof(uint32_t)); @@ -1294,11 +1311,6 @@ static int svq1_encode_init(AVCodecContext *avctx) s->dummy = av_mallocz((s->y_block_width+1)*s->y_block_height*sizeof(int32_t)); h263_encode_init(&s->m); //mv_penalty -av_log(s->avctx, AV_LOG_INFO, " Hey: %d x %d, %d x %d, %d x %d\n", - s->frame_width, s->frame_height, - s->y_block_width, s->y_block_height, - s->c_block_width, s->c_block_height); - return 0; } @@ -1333,10 +1345,11 @@ static int svq1_encode_frame(AVCodecContext *avctx, unsigned char *buf, svq1_write_header(s, p->pict_type); for(i=0; i<3; i++){ - svq1_encode_plane(s, i, + if(svq1_encode_plane(s, i, s->picture.data[i], s->last_picture.data[i], s->current_picture.data[i], s->frame_width / (i?4:1), s->frame_height / (i?4:1), - s->picture.linesize[i], s->current_picture.linesize[i]); + s->picture.linesize[i], s->current_picture.linesize[i]) < 0) + return -1; } // align_put_bits(&s->pb); diff --git a/src/libffmpeg/libavcodec/svq3.c b/src/libffmpeg/libavcodec/svq3.c index e064626fc..547679bf1 100644 --- a/src/libffmpeg/libavcodec/svq3.c +++ b/src/libffmpeg/libavcodec/svq3.c @@ -370,6 +370,7 @@ static inline int svq3_mc_dir (H264Context *h, int size, int mode, int dir, int dx = svq3_get_se_golomb (&s->gb); if (dx == INVALID_VLC || dy == INVALID_VLC) { + av_log(h->s.avctx, AV_LOG_ERROR, "invalid MV vlc\n"); return -1; } } @@ -453,8 +454,11 @@ static int svq3_decode_mb (H264Context *h, unsigned int mb_type) { mb_type = MB_TYPE_SKIP; } else { - svq3_mc_dir (h, s->next_picture.mb_type[mb_xy], PREDICT_MODE, 0, 0); - svq3_mc_dir (h, s->next_picture.mb_type[mb_xy], PREDICT_MODE, 1, 1); + mb_type= FFMIN(s->next_picture.mb_type[mb_xy], 6); + if(svq3_mc_dir (h, mb_type, PREDICT_MODE, 0, 0) < 0) + return -1; + if(svq3_mc_dir (h, mb_type, PREDICT_MODE, 1, 1) < 0) + return -1; mb_type = MB_TYPE_16x16; } @@ -512,17 +516,20 @@ static int svq3_decode_mb (H264Context *h, unsigned int mb_type) { /* decode motion vector(s) and form prediction(s) */ if (s->pict_type == P_TYPE) { - svq3_mc_dir (h, (mb_type - 1), mode, 0, 0); + if(svq3_mc_dir (h, (mb_type - 1), mode, 0, 0) < 0) + return -1; } else { /* B_TYPE */ if (mb_type != 2) { - svq3_mc_dir (h, 0, mode, 0, 0); + if(svq3_mc_dir (h, 0, mode, 0, 0) < 0) + return -1; } else { for (i=0; i < 4; i++) { memset (s->current_picture.motion_val[0][b_xy + i*h->b_stride], 0, 4*2*sizeof(int16_t)); } } if (mb_type != 1) { - svq3_mc_dir (h, 0, mode, 1, (mb_type == 3)); + if(svq3_mc_dir (h, 0, mode, 1, (mb_type == 3)) < 0) + return -1; } else { for (i=0; i < 4; i++) { memset (s->current_picture.motion_val[1][b_xy + i*h->b_stride], 0, 4*2*sizeof(int16_t)); @@ -558,8 +565,10 @@ static int svq3_decode_mb (H264Context *h, unsigned int mb_type) { for (i=0; i < 16; i+=2) { vlc = svq3_get_ue_golomb (&s->gb); - if (vlc >= 25) + if (vlc >= 25){ + av_log(h->s.avctx, AV_LOG_ERROR, "luma prediction:%d\n", vlc); return -1; + } left = &h->intra4x4_pred_mode_cache[scan8[i] - 1]; top = &h->intra4x4_pred_mode_cache[scan8[i] - 8]; @@ -567,8 +576,10 @@ static int svq3_decode_mb (H264Context *h, unsigned int mb_type) { left[1] = svq3_pred_1[top[0] + 1][left[0] + 1][svq3_pred_0[vlc][0]]; left[2] = svq3_pred_1[top[1] + 1][left[1] + 1][svq3_pred_0[vlc][1]]; - if (left[1] == -1 || left[2] == -1) + if (left[1] == -1 || left[2] == -1){ + av_log(h->s.avctx, AV_LOG_ERROR, "weird prediction\n"); return -1; + } } } else { /* mb_type == 33, DC_128_PRED block type */ for (i=0; i < 4; i++) { @@ -597,8 +608,10 @@ static int svq3_decode_mb (H264Context *h, unsigned int mb_type) { dir = i_mb_type_info[mb_type - 8].pred_mode; dir = (dir >> 1) ^ 3*(dir & 1) ^ 1; - if ((h->intra16x16_pred_mode = check_intra_pred_mode (h, dir)) == -1) + if ((h->intra16x16_pred_mode = check_intra_pred_mode (h, dir)) == -1){ + av_log(h->s.avctx, AV_LOG_ERROR, "check_intra_pred_mode = -1\n"); return -1; + } cbp = i_mb_type_info[mb_type - 8].cbp; mb_type = MB_TYPE_INTRA16x16; @@ -623,20 +636,26 @@ static int svq3_decode_mb (H264Context *h, unsigned int mb_type) { } if (!IS_INTRA16x16(mb_type) && (!IS_SKIP(mb_type) || s->pict_type == B_TYPE)) { - if ((vlc = svq3_get_ue_golomb (&s->gb)) >= 48) + if ((vlc = svq3_get_ue_golomb (&s->gb)) >= 48){ + av_log(h->s.avctx, AV_LOG_ERROR, "cbp_vlc=%d\n", vlc); return -1; + } cbp = IS_INTRA(mb_type) ? golomb_to_intra4x4_cbp[vlc] : golomb_to_inter_cbp[vlc]; } if (IS_INTRA16x16(mb_type) || (s->pict_type != I_TYPE && s->adaptive_quant && cbp)) { s->qscale += svq3_get_se_golomb (&s->gb); - if (s->qscale > 31) + if (s->qscale > 31){ + av_log(h->s.avctx, AV_LOG_ERROR, "qscale:%d\n", s->qscale); return -1; + } } if (IS_INTRA16x16(mb_type)) { - if (svq3_decode_block (&s->gb, h->mb, 0, 0)) + if (svq3_decode_block (&s->gb, h->mb, 0, 0)){ + av_log(h->s.avctx, AV_LOG_ERROR, "error while decoding intra luma dc\n"); return -1; + } } if (cbp) { @@ -649,24 +668,30 @@ static int svq3_decode_mb (H264Context *h, unsigned int mb_type) { k = index ? ((j&1) + 2*(i&1) + 2*(j&2) + 4*(i&2)) : (4*i + j); h->non_zero_count_cache[ scan8[k] ] = 1; - if (svq3_decode_block (&s->gb, &h->mb[16*k], index, type)) + if (svq3_decode_block (&s->gb, &h->mb[16*k], index, type)){ + av_log(h->s.avctx, AV_LOG_ERROR, "error while decoding block\n"); return -1; + } } } } if ((cbp & 0x30)) { for (i=0; i < 2; ++i) { - if (svq3_decode_block (&s->gb, &h->mb[16*(16 + 4*i)], 0, 3)) + if (svq3_decode_block (&s->gb, &h->mb[16*(16 + 4*i)], 0, 3)){ + av_log(h->s.avctx, AV_LOG_ERROR, "error while decoding chroma dc block\n"); return -1; + } } if ((cbp & 0x20)) { for (i=0; i < 8; i++) { h->non_zero_count_cache[ scan8[16+i] ] = 1; - if (svq3_decode_block (&s->gb, &h->mb[16*(16 + i)], 1, 1)) + if (svq3_decode_block (&s->gb, &h->mb[16*(16 + i)], 1, 1)){ + av_log(h->s.avctx, AV_LOG_ERROR, "error while decoding chroma ac block\n"); return -1; + } } } } @@ -695,7 +720,7 @@ static int svq3_decode_slice_header (H264Context *h) { } else { int length = (header >> 5) & 3; - h->next_slice_index = s->gb.index + 8*show_bits (&s->gb, 8*length) + 8*length; + h->next_slice_index = get_bits_count(&s->gb) + 8*show_bits (&s->gb, 8*length) + 8*length; if (h->next_slice_index > s->gb.size_in_bits){ av_log(h->s.avctx, AV_LOG_ERROR, "slice after bitstream end\n"); @@ -703,10 +728,10 @@ static int svq3_decode_slice_header (H264Context *h) { } s->gb.size_in_bits = h->next_slice_index - 8*(length - 1); - s->gb.index += 8; + skip_bits(&s->gb, 8); if (length > 0) { - memcpy ((uint8_t *) &s->gb.buffer[s->gb.index >> 3], + memcpy ((uint8_t *) &s->gb.buffer[get_bits_count(&s->gb) >> 3], &s->gb.buffer[s->gb.size_in_bits >> 3], (length - 1)); } } @@ -914,10 +939,10 @@ static int svq3_decode_frame (AVCodecContext *avctx, for (s->mb_y=0; s->mb_y < s->mb_height; s->mb_y++) { for (s->mb_x=0; s->mb_x < s->mb_width; s->mb_x++) { - if ( (s->gb.index + 7) >= s->gb.size_in_bits && - ((s->gb.index & 7) == 0 || show_bits (&s->gb, (-s->gb.index & 7)) == 0)) { + if ( (get_bits_count(&s->gb) + 7) >= s->gb.size_in_bits && + ((get_bits_count(&s->gb) & 7) == 0 || show_bits (&s->gb, (-get_bits_count(&s->gb) & 7)) == 0)) { - s->gb.index = h->next_slice_index; + skip_bits(&s->gb, h->next_slice_index - get_bits_count(&s->gb)); s->gb.size_in_bits = 8*buf_size; if (svq3_decode_slice_header (h)) @@ -979,5 +1004,5 @@ AVCodec svq3_decoder = { NULL, decode_end, svq3_decode_frame, - CODEC_CAP_DRAW_HORIZ_BAND | CODEC_CAP_DR1, + CODEC_CAP_DRAW_HORIZ_BAND | CODEC_CAP_DR1 | CODEC_CAP_DELAY, }; diff --git a/src/libffmpeg/libavcodec/truemotion1.c b/src/libffmpeg/libavcodec/truemotion1.c index 5f32227fe..0c3bb09cf 100644 --- a/src/libffmpeg/libavcodec/truemotion1.c +++ b/src/libffmpeg/libavcodec/truemotion1.c @@ -24,7 +24,7 @@ * Mike Melanson (melanson@pcisys.net) * * The TrueMotion v1 decoder presently only decodes 16-bit TM1 data and - * outputs RGB555 data. 24-bit TM1 data is not supported yet. + * outputs RGB555 (or RGB565) data. 24-bit TM1 data is not supported yet. */ #include <stdio.h> @@ -43,12 +43,12 @@ typedef struct TrueMotion1Context { AVFrame frame; AVFrame prev_frame; - unsigned char *buf; + uint8_t *buf; int size; - unsigned char *mb_change_bits; + uint8_t *mb_change_bits; int mb_change_bits_row_size; - unsigned char *index_stream; + uint8_t *index_stream; int index_stream_size; int flags; @@ -56,6 +56,8 @@ typedef struct TrueMotion1Context { uint32_t y_predictor_table[1024]; uint32_t c_predictor_table[1024]; + uint32_t fat_y_predictor_table[1024]; + uint32_t fat_c_predictor_table[1024]; int compression; int block_type; @@ -109,12 +111,12 @@ struct frame_header { typedef struct comp_types { int algorithm; - int block_width; - int block_height; + int block_width; // vres + int block_height; // hres int block_type; } comp_types; -/* { valid for metatype }, algorithm, num of deltas, horiz res, vert res */ +/* { valid for metatype }, algorithm, num of deltas, vert res, horiz res */ static comp_types compression_types[17] = { { ALGO_NOP, 0, 0, 0 }, @@ -163,9 +165,9 @@ static void select_delta_tables(TrueMotion1Context *s, int delta_table_index) } #ifdef WORDS_BIGENDIAN -static int make_ydt_entry(int p2, int p1, int16_t *ydt) +static int make_ydt15_entry(int p2, int p1, int16_t *ydt) #else -static int make_ydt_entry(int p1, int p2, int16_t *ydt) +static int make_ydt15_entry(int p1, int p2, int16_t *ydt) #endif { int lo, hi; @@ -178,9 +180,9 @@ static int make_ydt_entry(int p1, int p2, int16_t *ydt) } #ifdef WORDS_BIGENDIAN -static int make_cdt_entry(int p2, int p1, int16_t *cdt) +static int make_cdt15_entry(int p2, int p1, int16_t *cdt) #else -static int make_cdt_entry(int p1, int p2, int16_t *cdt) +static int make_cdt15_entry(int p1, int p2, int16_t *cdt) #endif { int r, b, lo; @@ -191,7 +193,62 @@ static int make_cdt_entry(int p1, int p2, int16_t *cdt) return ((lo + (lo << 16)) << 1); } -static void gen_vector_table(TrueMotion1Context *s, uint8_t *sel_vector_table) +#ifdef WORDS_BIGENDIAN +static int make_ydt16_entry(int p2, int p1, int16_t *ydt) +#else +static int make_ydt16_entry(int p1, int p2, int16_t *ydt) +#endif +{ + int lo, hi; + + lo = ydt[p1]; + lo += (lo << 6) + (lo << 11); + hi = ydt[p2]; + hi += (hi << 6) + (hi << 11); + return ((lo + (hi << 16)) << 1); +} + +#ifdef WORDS_BIGENDIAN +static int make_cdt16_entry(int p2, int p1, int16_t *cdt) +#else +static int make_cdt16_entry(int p1, int p2, int16_t *cdt) +#endif +{ + int r, b, lo; + + b = cdt[p2]; + r = cdt[p1] << 11; + lo = b + r; + return ((lo + (lo << 16)) << 1); +} + +#ifdef WORDS_BIGENDIAN +static int make_ydt24_entry(int p2, int p1, int16_t *ydt) +#else +static int make_ydt24_entry(int p1, int p2, int16_t *ydt) +#endif +{ + int lo, hi; + + lo = ydt[p1]; + hi = ydt[p2]; + return ((lo + (hi << 8)) << 1); +} + +#ifdef WORDS_BIGENDIAN +static int make_cdt24_entry(int p2, int p1, int16_t *cdt) +#else +static int make_cdt24_entry(int p1, int p2, int16_t *cdt) +#endif +{ + int r, b; + + b = cdt[p2]; + r = cdt[p1]<<16; + return ((b+r) << 1); +} + +static void gen_vector_table15(TrueMotion1Context *s, uint8_t *sel_vector_table) { int len, i, j; unsigned char delta_pair; @@ -203,15 +260,63 @@ static void gen_vector_table(TrueMotion1Context *s, uint8_t *sel_vector_table) { delta_pair = *sel_vector_table++; s->y_predictor_table[i+j] = 0xfffffffe & - make_ydt_entry(delta_pair >> 4, delta_pair & 0xf, s->ydt); + make_ydt15_entry(delta_pair >> 4, delta_pair & 0xf, s->ydt); s->c_predictor_table[i+j] = 0xfffffffe & - make_cdt_entry(delta_pair >> 4, delta_pair & 0xf, s->cdt); + make_cdt15_entry(delta_pair >> 4, delta_pair & 0xf, s->cdt); } s->y_predictor_table[i+(j-1)] |= 1; s->c_predictor_table[i+(j-1)] |= 1; } } +static void gen_vector_table16(TrueMotion1Context *s, uint8_t *sel_vector_table) +{ + int len, i, j; + unsigned char delta_pair; + + for (i = 0; i < 1024; i += 4) + { + len = *sel_vector_table++ / 2; + for (j = 0; j < len; j++) + { + delta_pair = *sel_vector_table++; + s->y_predictor_table[i+j] = 0xfffffffe & + make_ydt16_entry(delta_pair >> 4, delta_pair & 0xf, s->ydt); + s->c_predictor_table[i+j] = 0xfffffffe & + make_cdt16_entry(delta_pair >> 4, delta_pair & 0xf, s->cdt); + } + s->y_predictor_table[i+(j-1)] |= 1; + s->c_predictor_table[i+(j-1)] |= 1; + } +} + +static void gen_vector_table24(TrueMotion1Context *s, uint8_t *sel_vector_table) +{ + int len, i, j; + unsigned char delta_pair; + + for (i = 0; i < 1024; i += 4) + { + len = *sel_vector_table++ / 2; + for (j = 0; j < len; j++) + { + delta_pair = *sel_vector_table++; + s->y_predictor_table[i+j] = 0xfffffffe & + make_ydt24_entry(delta_pair >> 4, delta_pair & 0xf, s->ydt); + s->c_predictor_table[i+j] = 0xfffffffe & + make_cdt24_entry(delta_pair >> 4, delta_pair & 0xf, s->cdt); + s->fat_y_predictor_table[i+j] = 0xfffffffe & + make_ydt24_entry(delta_pair >> 4, delta_pair & 0xf, s->fat_ydt); + s->fat_c_predictor_table[i+j] = 0xfffffffe & + make_cdt24_entry(delta_pair >> 4, delta_pair & 0xf, s->fat_cdt); + } + s->y_predictor_table[i+(j-1)] |= 1; + s->c_predictor_table[i+(j-1)] |= 1; + s->fat_y_predictor_table[i+(j-1)] |= 1; + s->fat_c_predictor_table[i+(j-1)] |= 1; + } +} + /* Returns the number of bytes consumed from the bytestream. Returns -1 if * there was an error while decoding the header */ static int truemotion1_decode_header(TrueMotion1Context *s) @@ -229,14 +334,15 @@ static int truemotion1_decode_header(TrueMotion1Context *s) header.header_size = ((s->buf[0] >> 5) | (s->buf[0] << 3)) & 0x7f; if (s->buf[0] < 0x10) { - av_log(s->avctx, AV_LOG_ERROR, "invalid header size\n"); + av_log(s->avctx, AV_LOG_ERROR, "invalid header size (%d)\n", s->buf[0]); return -1; } /* unscramble the header bytes with a XOR operation */ memset(header_buffer, 0, 128); for (i = 1; i < header.header_size; i++) - header_buffer[i - 1] = s->buf[i] ^ s->buf[i + 1]; + header_buffer[i - 1] = s->buf[i] ^ s->buf[i + 1]; + header.compression = header_buffer[0]; header.deltaset = header_buffer[1]; header.vectable = header_buffer[2]; @@ -253,7 +359,7 @@ static int truemotion1_decode_header(TrueMotion1Context *s) { if (header.header_type > 3) { - av_log(s->avctx, AV_LOG_ERROR, "truemotion1: invalid header type\n"); + av_log(s->avctx, AV_LOG_ERROR, "invalid header type (%d)\n", header.header_type); return -1; } else if ((header.header_type == 2) || (header.header_type == 3)) { s->flags = header.flags; @@ -265,6 +371,7 @@ static int truemotion1_decode_header(TrueMotion1Context *s) s->flags = FLAG_KEYFRAME; if (s->flags & FLAG_SPRITE) { + av_log(s->avctx, AV_LOG_INFO, "SPRITE frame found, please report the sample to the developers\n"); s->w = header.width; s->h = header.height; s->x = header.xoffset; @@ -274,7 +381,10 @@ static int truemotion1_decode_header(TrueMotion1Context *s) s->h = header.ysize; if (header.header_type < 2) { if ((s->w < 213) && (s->h >= 176)) + { s->flags |= FLAG_INTERPOLATED; + av_log(s->avctx, AV_LOG_INFO, "INTERPOLATION selected, please report the sample to the developers\n"); + } } } @@ -297,15 +407,22 @@ static int truemotion1_decode_header(TrueMotion1Context *s) return -1; } } + + // FIXME: where to place this ?!?! + if (compression_types[header.compression].algorithm == ALGO_RGB24H) + s->avctx->pix_fmt = PIX_FMT_BGR24; + else + s->avctx->pix_fmt = PIX_FMT_RGB555; // RGB565 is supported aswell if ((header.deltaset != s->last_deltaset) || (header.vectable != s->last_vectable)) { if (compression_types[header.compression].algorithm == ALGO_RGB24H) - { - av_log(s->avctx, AV_LOG_ERROR, "24bit compression not yet supported\n"); - } + gen_vector_table24(s, sel_vector_table); else - gen_vector_table(s, sel_vector_table); + if (s->avctx->pix_fmt == PIX_FMT_RGB555) + gen_vector_table15(s, sel_vector_table); + else + gen_vector_table16(s, sel_vector_table); } /* set up pointers to the other key data chunks */ @@ -327,6 +444,15 @@ static int truemotion1_decode_header(TrueMotion1Context *s) s->block_height = compression_types[header.compression].block_height; s->block_type = compression_types[header.compression].block_type; + if (s->avctx->debug & FF_DEBUG_PICT_INFO) + av_log(s->avctx, AV_LOG_INFO, "tables: %d / %d c:%d %dx%d t:%d %s%s%s%s\n", + s->last_deltaset, s->last_vectable, s->compression, s->block_width, + s->block_height, s->block_type, + s->flags & FLAG_KEYFRAME ? " KEY" : "", + s->flags & FLAG_INTERFRAME ? " INTER" : "", + s->flags & FLAG_SPRITE ? " SPRITE" : "", + s->flags & FLAG_INTERPOLATED ? " INTERPOL" : ""); + return header.header_size; } @@ -336,7 +462,12 @@ static int truemotion1_decode_init(AVCodecContext *avctx) s->avctx = avctx; - avctx->pix_fmt = PIX_FMT_RGB555; + // FIXME: it may change ? +// if (avctx->bits_per_sample == 24) +// avctx->pix_fmt = PIX_FMT_RGB24; +// else +// avctx->pix_fmt = PIX_FMT_RGB555; + avctx->has_b_frames = 0; s->frame.data[0] = s->prev_frame.data[0] = NULL; @@ -348,6 +479,32 @@ static int truemotion1_decode_init(AVCodecContext *avctx) return 0; } +/* +Block decoding order: + +dxi: Y-Y +dxic: Y-C-Y +dxic2: Y-C-Y-C + +hres,vres,i,i%vres (0 < i < 4) +2x2 0: 0 dxic2 +2x2 1: 1 dxi +2x2 2: 0 dxic2 +2x2 3: 1 dxi +2x4 0: 0 dxic2 +2x4 1: 1 dxi +2x4 2: 2 dxi +2x4 3: 3 dxi +4x2 0: 0 dxic +4x2 1: 1 dxi +4x2 2: 0 dxic +4x2 3: 1 dxi +4x4 0: 0 dxic +4x4 1: 1 dxi +4x4 2: 2 dxi +4x4 3: 3 dxi +*/ + #define GET_NEXT_INDEX() \ {\ if (index_stream_index >= s->index_stream_size) { \ @@ -374,6 +531,25 @@ static int truemotion1_decode_init(AVCodecContext *avctx) } else \ index++; +#define APPLY_C_PREDICTOR_24() \ + predictor_pair = s->c_predictor_table[index]; \ + c_horiz_pred += (predictor_pair >> 1); \ + if (predictor_pair & 1) { \ + GET_NEXT_INDEX() \ + if (!index) { \ + GET_NEXT_INDEX() \ + predictor_pair = s->fat_c_predictor_table[index]; \ + c_horiz_pred += (predictor_pair >> 1); \ + if (predictor_pair & 1) \ + GET_NEXT_INDEX() \ + else \ + index++; \ + } \ + } else \ + index++; +// c_last+coff = clast+c_horiz_pred; + + #define APPLY_Y_PREDICTOR() \ predictor_pair = s->y_predictor_table[index]; \ horiz_pred += (predictor_pair >> 1); \ @@ -391,6 +567,23 @@ static int truemotion1_decode_init(AVCodecContext *avctx) } else \ index++; +#define APPLY_Y_PREDICTOR_24() \ + predictor_pair = s->y_predictor_table[index]; \ + horiz_pred += (predictor_pair >> 1); \ + if (predictor_pair & 1) { \ + GET_NEXT_INDEX() \ + if (!index) { \ + GET_NEXT_INDEX() \ + predictor_pair = s->fat_y_predictor_table[index]; \ + horiz_pred += (predictor_pair >> 1); \ + if (predictor_pair & 1) \ + GET_NEXT_INDEX() \ + else \ + index++; \ + } \ + } else \ + index++; + #define OUTPUT_PIXEL_PAIR() \ *current_pixel_pair = *vert_pred + horiz_pred; \ *vert_pred++ = *current_pixel_pair++; \ @@ -528,6 +721,149 @@ static void truemotion1_decode_16bit(TrueMotion1Context *s) } } +static void truemotion1_decode_24bit(TrueMotion1Context *s) +{ + int y; + int pixels_left; /* remaining pixels on this line */ + unsigned int predictor_pair; + unsigned int horiz_pred; + unsigned int c_horiz_pred; + unsigned int *vert_pred; + unsigned int *current_pixel_pair; + unsigned int *prev_pixel_pair; + unsigned char *current_line = s->frame.data[0]; + unsigned char *prev_line = s->prev_frame.data[0]; + int keyframe = s->flags & FLAG_KEYFRAME; + + /* these variables are for managing the stream of macroblock change bits */ + unsigned char *mb_change_bits = s->mb_change_bits; + unsigned char mb_change_byte; + unsigned char mb_change_byte_mask; + int mb_change_index; + + /* these variables are for managing the main index stream */ + int index_stream_index = 0; /* yes, the index into the index stream */ + int index; + + /* clean out the line buffer */ + memset(s->vert_pred, 0, s->avctx->width * sizeof(unsigned short)); + + GET_NEXT_INDEX(); + + for (y = 0; y < s->avctx->height; y++) { + + /* re-init variables for the next line iteration */ + horiz_pred = c_horiz_pred = 0; + current_pixel_pair = (unsigned int *)current_line; + prev_pixel_pair = (unsigned int *)prev_line; + vert_pred = s->vert_pred; + mb_change_index = 0; + mb_change_byte = mb_change_bits[mb_change_index++]; + mb_change_byte_mask = 0x01; + pixels_left = s->avctx->width; + + while (pixels_left > 0) { + + if (keyframe || ((mb_change_byte & mb_change_byte_mask) == 0)) { + + switch (y & 3) { + case 0: + /* if macroblock width is 2, apply C-Y-C-Y; else + * apply C-Y-Y */ + if (s->block_width == 2) { + APPLY_C_PREDICTOR_24(); + APPLY_Y_PREDICTOR_24(); + OUTPUT_PIXEL_PAIR(); +// OUTPUT_PIXEL_PAIR_24_C(); + APPLY_C_PREDICTOR_24(); + APPLY_Y_PREDICTOR_24(); + OUTPUT_PIXEL_PAIR(); +// OUTPUT_PIXEL_PAIR_24_C(); + } else { + APPLY_C_PREDICTOR_24(); + APPLY_Y_PREDICTOR_24(); + OUTPUT_PIXEL_PAIR(); +// OUTPUT_PIXEL_PAIR_24_C(); + APPLY_Y_PREDICTOR_24(); + OUTPUT_PIXEL_PAIR(); +// OUTPUT_PIXEL_PAIR_24_C(); + } + break; + + case 1: + case 3: + /* always apply 2 Y predictors on these iterations */ + APPLY_Y_PREDICTOR_24(); + OUTPUT_PIXEL_PAIR(); + APPLY_Y_PREDICTOR_24(); + OUTPUT_PIXEL_PAIR(); + break; + + case 2: + /* this iteration might be C-Y-C-Y, Y-Y, or C-Y-Y + * depending on the macroblock type */ + if (s->block_type == BLOCK_2x2) { + APPLY_C_PREDICTOR_24(); + APPLY_Y_PREDICTOR_24(); + OUTPUT_PIXEL_PAIR(); +// OUTPUT_PIXEL_PAIR_24_C(); + APPLY_C_PREDICTOR_24(); + APPLY_Y_PREDICTOR_24(); + OUTPUT_PIXEL_PAIR(); +// OUTPUT_PIXEL_PAIR_24_C(); + } else if (s->block_type == BLOCK_4x2) { + APPLY_C_PREDICTOR_24(); + APPLY_Y_PREDICTOR_24(); + OUTPUT_PIXEL_PAIR(); +// OUTPUT_PIXEL_PAIR_24_C(); + APPLY_Y_PREDICTOR_24(); + OUTPUT_PIXEL_PAIR(); +// OUTPUT_PIXEL_PAIR_24_C(); + } else { + APPLY_Y_PREDICTOR_24(); + OUTPUT_PIXEL_PAIR(); + APPLY_Y_PREDICTOR_24(); + OUTPUT_PIXEL_PAIR(); + } + break; + } + + } else { + + /* skip (copy) four pixels, but reassign the horizontal + * predictor */ + *current_pixel_pair = *prev_pixel_pair++; + *vert_pred++ = *current_pixel_pair++; + *current_pixel_pair = *prev_pixel_pair++; + horiz_pred = *current_pixel_pair - *vert_pred; +// c_horiz_pred = *current_pixel_pair - *vert_pred; + *vert_pred++ = *current_pixel_pair++; + + } + + if (!keyframe) { + mb_change_byte_mask <<= 1; + + /* next byte */ + if (!mb_change_byte_mask) { + mb_change_byte = mb_change_bits[mb_change_index++]; + mb_change_byte_mask = 0x01; + } + } + + pixels_left -= 4; + } + + /* next change row */ + if (((y + 1) & 3) == 0) + mb_change_bits += s->mb_change_bits_row_size; + + current_line += s->frame.linesize[0]; + prev_line += s->prev_frame.linesize[0]; + } +} + + static int truemotion1_decode_frame(AVCodecContext *avctx, void *data, int *data_size, uint8_t *buf, int buf_size) @@ -537,26 +873,22 @@ static int truemotion1_decode_frame(AVCodecContext *avctx, s->buf = buf; s->size = buf_size; + if (truemotion1_decode_header(s) == -1) + return -1; + s->frame.reference = 1; if (avctx->get_buffer(avctx, &s->frame) < 0) { - av_log(s->avctx, AV_LOG_ERROR, "truemotion1: get_buffer() failed\n"); + av_log(s->avctx, AV_LOG_ERROR, "get_buffer() failed\n"); return -1; } - /* no supplementary picture */ - if (buf_size == 0) - return 0; - - if (truemotion1_decode_header(s) == -1) - return -1; - /* check for a do-nothing frame and copy the previous frame */ if (compression_types[s->compression].algorithm == ALGO_NOP) { memcpy(s->frame.data[0], s->prev_frame.data[0], s->frame.linesize[0] * s->avctx->height); } else if (compression_types[s->compression].algorithm == ALGO_RGB24H) { - av_log(s->avctx, AV_LOG_ERROR, "24bit compression not yet supported\n"); + truemotion1_decode_24bit(s); } else { truemotion1_decode_16bit(s); } diff --git a/src/libffmpeg/libavcodec/tscc.c b/src/libffmpeg/libavcodec/tscc.c new file mode 100644 index 000000000..109404404 --- /dev/null +++ b/src/libffmpeg/libavcodec/tscc.c @@ -0,0 +1,330 @@ +/* + * TechSmith Camtasia decoder + * Copyright (c) 2004 Konstantin Shishkov + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + * + */ + +/** + * @file tscc.c + * TechSmith Camtasia decoder + * + * Fourcc: TSCC + * + * Codec is very simple: + * it codes picture (picture difference, really) + * with algorithm almost identical to Windows RLE8, + * only without padding and with greater pixel sizes, + * then this coded picture is packed with ZLib + * + * Supports: BGR8,BGR555,BGR24 - only BGR8 and BGR555 tested + * + */ + +#include <stdio.h> +#include <stdlib.h> + +#include "common.h" +#include "avcodec.h" + +#ifdef CONFIG_ZLIB +#include <zlib.h> +#endif + + +/* + * Decoder context + */ +typedef struct TsccContext { + + AVCodecContext *avctx; + AVFrame pic; + + // Bits per pixel + int bpp; + // Decompressed data size + unsigned int decomp_size; + // Decompression buffer + unsigned char* decomp_buf; + int height; +#ifdef CONFIG_ZLIB + z_stream zstream; +#endif +} CamtasiaContext; + +/* + * + * Decode RLE - almost identical to Windows BMP RLE8 + * and enhanced to bigger color depths + * + */ + +static int decode_rle(CamtasiaContext *c, unsigned int srcsize) +{ + unsigned char *src = c->decomp_buf; + unsigned char *output, *output_end; + int p1, p2, line=c->height, pos=0, i; + + output = c->pic.data[0] + (c->height - 1) * c->pic.linesize[0]; + output_end = c->pic.data[0] + (c->height) * c->pic.linesize[0]; + while(src < c->decomp_buf + srcsize) { + p1 = *src++; + if(p1 == 0) { //Escape code + p2 = *src++; + if(p2 == 0) { //End-of-line + output = c->pic.data[0] + (--line) * c->pic.linesize[0]; + if (line < 0) + return -1; + pos = 0; + continue; + } else if(p2 == 1) { //End-of-picture + return 0; + } else if(p2 == 2) { //Skip + p1 = *src++; + p2 = *src++; + line -= p2; + if (line < 0) + return -1; + pos += p1; + output = c->pic.data[0] + line * c->pic.linesize[0] + pos * (c->bpp / 8); + continue; + } + // Copy data + if (output + p2 * (c->bpp / 8) > output_end) { + src += p2 * (c->bpp / 8); + continue; + } + for(i = 0; i < p2 * (c->bpp / 8); i++) { + *output++ = *src++; + } + // RLE8 copy is actually padded - and runs are not! + if(c->bpp == 8 && (p2 & 1)) { + src++; + } + pos += p2; + } else { //Run of pixels + int pix[4]; //original pixel + switch(c->bpp){ + case 8: pix[0] = *src++; + break; + case 16: pix[0] = *src++; + pix[1] = *src++; + break; + case 24: pix[0] = *src++; + pix[1] = *src++; + pix[2] = *src++; + break; + case 32: pix[0] = *src++; + pix[1] = *src++; + pix[2] = *src++; + pix[3] = *src++; + break; + } + if (output + p1 * (c->bpp / 8) > output_end) + continue; + for(i = 0; i < p1; i++) { + switch(c->bpp){ + case 8: *output++ = pix[0]; + break; + case 16: *output++ = pix[0]; + *output++ = pix[1]; + break; + case 24: *output++ = pix[0]; + *output++ = pix[1]; + *output++ = pix[2]; + break; + case 32: *output++ = pix[0]; + *output++ = pix[1]; + *output++ = pix[2]; + *output++ = pix[3]; + break; + } + } + pos += p1; + } + } + + av_log(c->avctx, AV_LOG_ERROR, "Camtasia warning: no End-of-picture code\n"); + return 1; +} + +/* + * + * Decode a frame + * + */ +static int decode_frame(AVCodecContext *avctx, void *data, int *data_size, uint8_t *buf, int buf_size) +{ + CamtasiaContext * const c = (CamtasiaContext *)avctx->priv_data; + unsigned char *encoded = (unsigned char *)buf; + unsigned char *outptr; +#ifdef CONFIG_ZLIB + int zret; // Zlib return code +#endif + int len = buf_size; + + if(c->pic.data[0]) + avctx->release_buffer(avctx, &c->pic); + + c->pic.reference = 1; + c->pic.buffer_hints = FF_BUFFER_HINTS_VALID; + if(avctx->get_buffer(avctx, &c->pic) < 0){ + av_log(avctx, AV_LOG_ERROR, "get_buffer() failed\n"); + return -1; + } + + outptr = c->pic.data[0]; // Output image pointer + +#ifdef CONFIG_ZLIB + zret = inflateReset(&(c->zstream)); + if (zret != Z_OK) { + av_log(avctx, AV_LOG_ERROR, "Inflate reset error: %d\n", zret); + return -1; + } + c->zstream.next_in = encoded; + c->zstream.avail_in = len; + c->zstream.next_out = c->decomp_buf; + c->zstream.avail_out = c->decomp_size; + zret = inflate(&(c->zstream), Z_FINISH); + // Z_DATA_ERROR means empty picture + if ((zret != Z_OK) && (zret != Z_STREAM_END) && (zret != Z_DATA_ERROR)) { + av_log(avctx, AV_LOG_ERROR, "Inflate error: %d\n", zret); + return -1; + } + + + if(zret != Z_DATA_ERROR) + decode_rle(c, c->zstream.avail_out); + + /* make the palette available on the way out */ + if (c->avctx->pix_fmt == PIX_FMT_PAL8) { + memcpy(c->pic.data[1], c->avctx->palctrl->palette, AVPALETTE_SIZE); + if (c->avctx->palctrl->palette_changed) { + c->pic.palette_has_changed = 1; + c->avctx->palctrl->palette_changed = 0; + } + } + +#else + av_log(avctx, AV_LOG_ERROR, "BUG! Zlib support not compiled in frame decoder.\n"); + return -1; +#endif + + *data_size = sizeof(AVFrame); + *(AVFrame*)data = c->pic; + + /* always report that the buffer was completely consumed */ + return buf_size; +} + + + +/* + * + * Init tscc decoder + * + */ +static int decode_init(AVCodecContext *avctx) +{ + CamtasiaContext * const c = (CamtasiaContext *)avctx->priv_data; + int zret; // Zlib return code + + c->avctx = avctx; + avctx->has_b_frames = 0; + + c->pic.data[0] = NULL; + c->height = avctx->height; + + if (avcodec_check_dimensions(avctx, avctx->height, avctx->width) < 0) { + return 1; + } + +#ifdef CONFIG_ZLIB + // Needed if zlib unused or init aborted before inflateInit + memset(&(c->zstream), 0, sizeof(z_stream)); +#else + av_log(avctx, AV_LOG_ERROR, "Zlib support not compiled.\n"); + return 1; +#endif + switch(avctx->bits_per_sample){ + case 8: avctx->pix_fmt = PIX_FMT_PAL8; break; + case 16: avctx->pix_fmt = PIX_FMT_RGB555; break; + case 24: + avctx->pix_fmt = PIX_FMT_BGR24; + break; + case 32: avctx->pix_fmt = PIX_FMT_RGBA32; break; + default: av_log(avctx, AV_LOG_ERROR, "Camtasia error: unknown depth %i bpp\n", avctx->bits_per_sample); + return -1; + } + c->bpp = avctx->bits_per_sample; + c->decomp_size = (avctx->width * c->bpp + (avctx->width + 254) / 255 + 2) * avctx->height + 2;//RLE in the 'best' case + + /* Allocate decompression buffer */ + if (c->decomp_size) { + if ((c->decomp_buf = av_malloc(c->decomp_size)) == NULL) { + av_log(avctx, AV_LOG_ERROR, "Can't allocate decompression buffer.\n"); + return 1; + } + } + +#ifdef CONFIG_ZLIB + c->zstream.zalloc = Z_NULL; + c->zstream.zfree = Z_NULL; + c->zstream.opaque = Z_NULL; + zret = inflateInit(&(c->zstream)); + if (zret != Z_OK) { + av_log(avctx, AV_LOG_ERROR, "Inflate init error: %d\n", zret); + return 1; + } +#endif + + return 0; +} + + + +/* + * + * Uninit tscc decoder + * + */ +static int decode_end(AVCodecContext *avctx) +{ + CamtasiaContext * const c = (CamtasiaContext *)avctx->priv_data; + + av_freep(&c->decomp_buf); + + if (c->pic.data[0]) + avctx->release_buffer(avctx, &c->pic); +#ifdef CONFIG_ZLIB + inflateEnd(&(c->zstream)); +#endif + + return 0; +} + +AVCodec tscc_decoder = { + "camtasia", + CODEC_TYPE_VIDEO, + CODEC_ID_TSCC, + sizeof(CamtasiaContext), + decode_init, + NULL, + decode_end, + decode_frame, + CODEC_CAP_DR1, +}; + diff --git a/src/libffmpeg/libavcodec/ulti.c b/src/libffmpeg/libavcodec/ulti.c new file mode 100755 index 000000000..d4a0c847a --- /dev/null +++ b/src/libffmpeg/libavcodec/ulti.c @@ -0,0 +1,428 @@ +/* + * + * Copyright (C) 2004 Konstantin Shishkov + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + * + * IBM Ultimotion Video Decoder + * + */ + +/** + * @file ulti.c + * IBM Ultimotion Video Decoder. + */ + +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <unistd.h> + +#include "common.h" +#include "avcodec.h" + +#include "ulti_cb.h" + +typedef struct UltimotionDecodeContext { + AVCodecContext *avctx; + int width, height, blocks; + AVFrame frame; + uint8_t *ulti_codebook; +} UltimotionDecodeContext; + +static int ulti_decode_init(AVCodecContext *avctx) +{ + UltimotionDecodeContext *s = avctx->priv_data; + + s->avctx = avctx; + s->width = avctx->width; + s->height = avctx->height; + s->blocks = (s->width / 8) * (s->height / 8); + avctx->pix_fmt = PIX_FMT_YUV410P; + avctx->has_b_frames = 0; + avctx->coded_frame = (AVFrame*) &s->frame; + s->ulti_codebook = ulti_codebook; + + return 0; +} + +static int block_coords[8] = // 4x4 block coords in 8x8 superblock + { 0, 0, 0, 4, 4, 4, 4, 0}; + +static int angle_by_index[4] = { 0, 2, 6, 12}; + +/* Lookup tables for luma and chroma - used by ulti_convert_yuv() */ +static uint8_t ulti_lumas[64] = + { 0x10, 0x13, 0x17, 0x1A, 0x1E, 0x21, 0x25, 0x28, + 0x2C, 0x2F, 0x33, 0x36, 0x3A, 0x3D, 0x41, 0x44, + 0x48, 0x4B, 0x4F, 0x52, 0x56, 0x59, 0x5C, 0x60, + 0x63, 0x67, 0x6A, 0x6E, 0x71, 0x75, 0x78, 0x7C, + 0x7F, 0x83, 0x86, 0x8A, 0x8D, 0x91, 0x94, 0x98, + 0x9B, 0x9F, 0xA2, 0xA5, 0xA9, 0xAC, 0xB0, 0xB3, + 0xB7, 0xBA, 0xBE, 0xC1, 0xC5, 0xC8, 0xCC, 0xCF, + 0xD3, 0xD6, 0xDA, 0xDD, 0xE1, 0xE4, 0xE8, 0xEB}; + +static uint8_t ulti_chromas[16] = + { 0x60, 0x67, 0x6D, 0x73, 0x7A, 0x80, 0x86, 0x8D, + 0x93, 0x99, 0xA0, 0xA6, 0xAC, 0xB3, 0xB9, 0xC0}; + +/* convert Ultimotion YUV block (sixteen 6-bit Y samples and + two 4-bit chroma samples) into standard YUV and put it into frame */ +static void ulti_convert_yuv(AVFrame *frame, int x, int y, + uint8_t *luma,int chroma) +{ + uint8_t *y_plane, *cr_plane, *cb_plane; + int i; + + y_plane = frame->data[0] + x + y * frame->linesize[0]; + cr_plane = frame->data[1] + (x / 4) + (y / 4) * frame->linesize[1]; + cb_plane = frame->data[2] + (x / 4) + (y / 4) * frame->linesize[2]; + + cr_plane[0] = ulti_chromas[chroma >> 4]; + + cb_plane[0] = ulti_chromas[chroma & 0xF]; + + + for(i = 0; i < 16; i++){ + y_plane[i & 3] = ulti_lumas[luma[i]]; + if((i & 3) == 3) { //next row + y_plane += frame->linesize[0]; + } + } +} + +/* generate block like in MS Video1 */ +static void ulti_pattern(AVFrame *frame, int x, int y, + int f0, int f1, int Y0, int Y1, int chroma) +{ + uint8_t Luma[16]; + int mask, i; + for(mask = 0x80, i = 0; mask; mask >>= 1, i++) { + if(f0 & mask) + Luma[i] = Y1; + else + Luma[i] = Y0; + } + + for(mask = 0x80, i = 8; mask; mask >>= 1, i++) { + if(f1 & mask) + Luma[i] = Y1; + else + Luma[i] = Y0; + } + + ulti_convert_yuv(frame, x, y, Luma, chroma); +} + +/* fill block with some gradient */ +static void ulti_grad(AVFrame *frame, int x, int y, uint8_t *Y, int chroma, int angle) +{ + uint8_t Luma[16]; + if(angle & 8) { //reverse order + int t; + angle &= 0x7; + t = Y[0]; + Y[0] = Y[3]; + Y[3] = t; + t = Y[1]; + Y[1] = Y[2]; + Y[2] = t; + } + switch(angle){ + case 0: + Luma[0] = Y[0]; Luma[1] = Y[1]; Luma[2] = Y[2]; Luma[3] = Y[3]; + Luma[4] = Y[0]; Luma[5] = Y[1]; Luma[6] = Y[2]; Luma[7] = Y[3]; + Luma[8] = Y[0]; Luma[9] = Y[1]; Luma[10] = Y[2]; Luma[11] = Y[3]; + Luma[12] = Y[0]; Luma[13] = Y[1]; Luma[14] = Y[2]; Luma[15] = Y[3]; + break; + case 1: + Luma[0] = Y[1]; Luma[1] = Y[2]; Luma[2] = Y[3]; Luma[3] = Y[3]; + Luma[4] = Y[0]; Luma[5] = Y[1]; Luma[6] = Y[2]; Luma[7] = Y[3]; + Luma[8] = Y[0]; Luma[9] = Y[1]; Luma[10] = Y[2]; Luma[11] = Y[3]; + Luma[12] = Y[0]; Luma[13] = Y[0]; Luma[14] = Y[1]; Luma[15] = Y[2]; + break; + case 2: + Luma[0] = Y[1]; Luma[1] = Y[2]; Luma[2] = Y[3]; Luma[3] = Y[3]; + Luma[4] = Y[1]; Luma[5] = Y[2]; Luma[6] = Y[2]; Luma[7] = Y[3]; + Luma[8] = Y[0]; Luma[9] = Y[1]; Luma[10] = Y[1]; Luma[11] = Y[2]; + Luma[12] = Y[0]; Luma[13] = Y[0]; Luma[14] = Y[1]; Luma[15] = Y[2]; + break; + case 3: + Luma[0] = Y[2]; Luma[1] = Y[3]; Luma[2] = Y[3]; Luma[3] = Y[3]; + Luma[4] = Y[1]; Luma[5] = Y[2]; Luma[6] = Y[2]; Luma[7] = Y[3]; + Luma[8] = Y[0]; Luma[9] = Y[1]; Luma[10] = Y[1]; Luma[11] = Y[2]; + Luma[12] = Y[0]; Luma[13] = Y[0]; Luma[14] = Y[0]; Luma[15] = Y[1]; + break; + case 4: + Luma[0] = Y[3]; Luma[1] = Y[3]; Luma[2] = Y[3]; Luma[3] = Y[3]; + Luma[4] = Y[2]; Luma[5] = Y[2]; Luma[6] = Y[2]; Luma[7] = Y[2]; + Luma[8] = Y[1]; Luma[9] = Y[1]; Luma[10] = Y[1]; Luma[11] = Y[1]; + Luma[12] = Y[0]; Luma[13] = Y[0]; Luma[14] = Y[0]; Luma[15] = Y[0]; + break; + case 5: + Luma[0] = Y[3]; Luma[1] = Y[3]; Luma[2] = Y[3]; Luma[3] = Y[2]; + Luma[4] = Y[3]; Luma[5] = Y[2]; Luma[6] = Y[2]; Luma[7] = Y[1]; + Luma[8] = Y[2]; Luma[9] = Y[1]; Luma[10] = Y[1]; Luma[11] = Y[0]; + Luma[12] = Y[1]; Luma[13] = Y[0]; Luma[14] = Y[0]; Luma[15] = Y[0]; + break; + case 6: + Luma[0] = Y[3]; Luma[1] = Y[3]; Luma[2] = Y[2]; Luma[3] = Y[2]; + Luma[4] = Y[3]; Luma[5] = Y[2]; Luma[6] = Y[1]; Luma[7] = Y[1]; + Luma[8] = Y[2]; Luma[9] = Y[2]; Luma[10] = Y[1]; Luma[11] = Y[0]; + Luma[12] = Y[1]; Luma[13] = Y[1]; Luma[14] = Y[0]; Luma[15] = Y[0]; + break; + case 7: + Luma[0] = Y[3]; Luma[1] = Y[3]; Luma[2] = Y[2]; Luma[3] = Y[1]; + Luma[4] = Y[3]; Luma[5] = Y[2]; Luma[6] = Y[1]; Luma[7] = Y[0]; + Luma[8] = Y[3]; Luma[9] = Y[2]; Luma[10] = Y[1]; Luma[11] = Y[0]; + Luma[12] = Y[2]; Luma[13] = Y[1]; Luma[14] = Y[0]; Luma[15] = Y[0]; + break; + default: + Luma[0] = Y[0]; Luma[1] = Y[0]; Luma[2] = Y[1]; Luma[3] = Y[1]; + Luma[4] = Y[0]; Luma[5] = Y[0]; Luma[6] = Y[1]; Luma[7] = Y[1]; + Luma[8] = Y[2]; Luma[9] = Y[2]; Luma[10] = Y[3]; Luma[11] = Y[3]; + Luma[12] = Y[2]; Luma[13] = Y[2]; Luma[14] = Y[3]; Luma[15] = Y[3]; + break; + } + + ulti_convert_yuv(frame, x, y, Luma, chroma); +} + +static int ulti_decode_frame(AVCodecContext *avctx, + void *data, int *data_size, + uint8_t *buf, int buf_size) +{ + UltimotionDecodeContext *s=avctx->priv_data; + int modifier = 0; + int uniq = 0; + int mode = 0; + int blocks = 0; + int done = 0; + int x = 0, y = 0; + int i; + int skip; + int tmp; + + if(s->frame.data[0]) + avctx->release_buffer(avctx, &s->frame); + + s->frame.reference = 1; + s->frame.buffer_hints = FF_BUFFER_HINTS_VALID | FF_BUFFER_HINTS_PRESERVE | FF_BUFFER_HINTS_REUSABLE; + if(avctx->get_buffer(avctx, &s->frame) < 0) { + av_log(avctx, AV_LOG_ERROR, "get_buffer() failed\n"); + return -1; + } + + while(!done) { + int idx; + if(blocks >= s->blocks || y >= s->height) + break;//all blocks decoded + + idx = *buf++; + if((idx & 0xF8) == 0x70) { + switch(idx) { + case 0x70: //change modifier + modifier = *buf++; + if(modifier>1) + av_log(avctx, AV_LOG_INFO, "warning: modifier must be 0 or 1, got %i\n", modifier); + break; + case 0x71: // set uniq flag + uniq = 1; + break; + case 0x72: //toggle mode + mode = !mode; + break; + case 0x73: //end-of-frame + done = 1; + break; + case 0x74: //skip some blocks + skip = *buf++; + if ((blocks + skip) >= s->blocks) + break; + blocks += skip; + x += skip * 8; + while(x >= s->width) { + x -= s->width; + y += 8; + } + break; + default: + av_log(avctx, AV_LOG_INFO, "warning: unknown escape 0x%02X\n", idx); + } + } else { //handle one block + int code; + int cf; + int angle = 0; + uint8_t Y[4]; // luma samples of block + int tx = 0, ty = 0; //coords of subblock + int chroma = 0; + if (mode || uniq) { + uniq = 0; + cf = 1; + chroma = 0; + } else { + cf = 0; + if (idx) + chroma = *buf++; + } + for (i = 0; i < 4; i++) { // for every subblock + code = (idx >> (6 - i*2)) & 3; //extract 2 bits + if(!code) //skip subblock + continue; + if(cf) + chroma = *buf++; + tx = x + block_coords[i * 2]; + ty = y + block_coords[(i * 2) + 1]; + switch(code) { + case 1: + tmp = *buf++; + + angle = angle_by_index[(tmp >> 6) & 0x3]; + + Y[0] = tmp & 0x3F; + Y[1] = Y[0]; + + if (angle) { + Y[2] = Y[0]+1; + if (Y[2] > 0x3F) + Y[2] = 0x3F; + Y[3] = Y[2]; + } else { + Y[2] = Y[0]; + Y[3] = Y[0]; + } + break; + + case 2: + if (modifier) { // unpack four luma samples + tmp = (*buf++) << 16; + tmp += (*buf++) << 8; + tmp += *buf++; + + Y[0] = (tmp >> 18) & 0x3F; + Y[1] = (tmp >> 12) & 0x3F; + Y[2] = (tmp >> 6) & 0x3F; + Y[3] = tmp & 0x3F; + angle = 16; + } else { // retrieve luma samples from codebook + tmp = (*buf++) << 8; + tmp += (*buf++); + + angle = (tmp >> 12) & 0xF; + tmp &= 0xFFF; + tmp <<= 2; + Y[0] = s->ulti_codebook[tmp]; + Y[1] = s->ulti_codebook[tmp + 1]; + Y[2] = s->ulti_codebook[tmp + 2]; + Y[3] = s->ulti_codebook[tmp + 3]; + } + break; + + case 3: + if (modifier) { // all 16 luma samples + uint8_t Luma[16]; + + tmp = (*buf++) << 16; + tmp += (*buf++) << 8; + tmp += *buf++; + Luma[0] = (tmp >> 18) & 0x3F; + Luma[1] = (tmp >> 12) & 0x3F; + Luma[2] = (tmp >> 6) & 0x3F; + Luma[3] = tmp & 0x3F; + + tmp = (*buf++) << 16; + tmp += (*buf++) << 8; + tmp += *buf++; + Luma[4] = (tmp >> 18) & 0x3F; + Luma[5] = (tmp >> 12) & 0x3F; + Luma[6] = (tmp >> 6) & 0x3F; + Luma[7] = tmp & 0x3F; + + tmp = (*buf++) << 16; + tmp += (*buf++) << 8; + tmp += *buf++; + Luma[8] = (tmp >> 18) & 0x3F; + Luma[9] = (tmp >> 12) & 0x3F; + Luma[10] = (tmp >> 6) & 0x3F; + Luma[11] = tmp & 0x3F; + + tmp = (*buf++) << 16; + tmp += (*buf++) << 8; + tmp += *buf++; + Luma[12] = (tmp >> 18) & 0x3F; + Luma[13] = (tmp >> 12) & 0x3F; + Luma[14] = (tmp >> 6) & 0x3F; + Luma[15] = tmp & 0x3F; + + ulti_convert_yuv(&s->frame, tx, ty, Luma, chroma); + } else { + tmp = *buf++; + if(tmp & 0x80) { + angle = (tmp >> 4) & 0x7; + tmp = (tmp << 8) + *buf++; + Y[0] = (tmp >> 6) & 0x3F; + Y[1] = tmp & 0x3F; + Y[2] = (*buf++) & 0x3F; + Y[3] = (*buf++) & 0x3F; + ulti_grad(&s->frame, tx, ty, Y, chroma, angle); //draw block + } else { // some patterns + int f0, f1; + f0 = *buf++; + f1 = tmp; + Y[0] = (*buf++) & 0x3F; + Y[1] = (*buf++) & 0x3F; + ulti_pattern(&s->frame, tx, ty, f1, f0, Y[0], Y[1], chroma); + } + } + break; + } + if(code != 3) + ulti_grad(&s->frame, tx, ty, Y, chroma, angle); // draw block + } + blocks++; + x += 8; + if(x >= s->width) { + x = 0; + y += 8; + } + } + } + + *data_size=sizeof(AVFrame); + *(AVFrame*)data= s->frame; + + return buf_size; +} + +static int ulti_decode_end(AVCodecContext *avctx) +{ +/* UltimotionDecodeContext *s = avctx->priv_data;*/ + + return 0; +} + +AVCodec ulti_decoder = { + "ultimotion", + CODEC_TYPE_VIDEO, + CODEC_ID_ULTI, + sizeof(UltimotionDecodeContext), + ulti_decode_init, + NULL, + ulti_decode_end, + ulti_decode_frame, + CODEC_CAP_DR1, + NULL +}; + diff --git a/src/libffmpeg/libavcodec/ulti_cb.h b/src/libffmpeg/libavcodec/ulti_cb.h new file mode 100755 index 000000000..d059439dc --- /dev/null +++ b/src/libffmpeg/libavcodec/ulti_cb.h @@ -0,0 +1,4098 @@ +static unsigned char ulti_codebook[16384]={ + 0x00, 0x01, 0x01, 0x02, + 0x00, 0x01, 0x02, 0x03, + 0x00, 0x02, 0x03, 0x04, + 0x00, 0x01, 0x03, 0x04, + 0x00, 0x01, 0x02, 0x04, + 0x00, 0x02, 0x03, 0x05, + 0x00, 0x02, 0x04, 0x05, + 0x00, 0x01, 0x04, 0x05, + 0x00, 0x01, 0x03, 0x05, + 0x00, 0x02, 0x04, 0x06, + 0x00, 0x03, 0x05, 0x06, + 0x00, 0x01, 0x05, 0x06, + 0x00, 0x01, 0x03, 0x06, + 0x00, 0x06, 0x06, 0x06, + 0x00, 0x00, 0x06, 0x06, + 0x00, 0x00, 0x00, 0x06, + 0x00, 0x03, 0x04, 0x07, + 0x00, 0x03, 0x06, 0x07, + 0x00, 0x01, 0x06, 0x07, + 0x00, 0x01, 0x04, 0x07, + 0x00, 0x03, 0x05, 0x08, + 0x00, 0x04, 0x06, 0x08, + 0x00, 0x02, 0x06, 0x08, + 0x00, 0x02, 0x04, 0x08, + 0x00, 0x08, 0x08, 0x08, + 0x00, 0x00, 0x08, 0x08, + 0x00, 0x00, 0x00, 0x08, + 0x00, 0x04, 0x07, 0x0B, + 0x00, 0x05, 0x09, 0x0B, + 0x00, 0x02, 0x09, 0x0B, + 0x00, 0x02, 0x06, 0x0B, + 0x00, 0x0B, 0x0B, 0x0B, + 0x00, 0x00, 0x0B, 0x0B, + 0x00, 0x00, 0x00, 0x0B, + 0x00, 0x05, 0x09, 0x0E, + 0x00, 0x07, 0x0B, 0x0E, + 0x00, 0x03, 0x0B, 0x0E, + 0x00, 0x03, 0x07, 0x0E, + 0x00, 0x0E, 0x0E, 0x0E, + 0x00, 0x00, 0x0E, 0x0E, + 0x00, 0x00, 0x00, 0x0E, + 0x00, 0x06, 0x0B, 0x11, + 0x00, 0x08, 0x0D, 0x11, + 0x00, 0x04, 0x0D, 0x11, + 0x00, 0x04, 0x09, 0x11, + 0x00, 0x11, 0x11, 0x11, + 0x00, 0x00, 0x11, 0x11, + 0x00, 0x00, 0x00, 0x11, + 0x00, 0x07, 0x0D, 0x14, + 0x00, 0x0A, 0x0F, 0x14, + 0x00, 0x05, 0x0F, 0x14, + 0x00, 0x05, 0x0A, 0x14, + 0x00, 0x14, 0x14, 0x14, + 0x00, 0x00, 0x14, 0x14, + 0x00, 0x00, 0x00, 0x14, + 0x00, 0x0B, 0x12, 0x17, + 0x00, 0x05, 0x12, 0x17, + 0x00, 0x05, 0x0C, 0x17, + 0x00, 0x17, 0x17, 0x17, + 0x00, 0x00, 0x17, 0x17, + 0x00, 0x00, 0x00, 0x17, + 0x00, 0x0D, 0x14, 0x1A, + 0x00, 0x06, 0x14, 0x1A, + 0x00, 0x06, 0x0D, 0x1A, + 0x00, 0x1A, 0x1A, 0x1A, + 0x00, 0x00, 0x1A, 0x1A, + 0x00, 0x00, 0x00, 0x1A, + 0x00, 0x0E, 0x16, 0x1D, + 0x00, 0x07, 0x16, 0x1D, + 0x00, 0x07, 0x0F, 0x1D, + 0x00, 0x1D, 0x1D, 0x1D, + 0x00, 0x00, 0x1D, 0x1D, + 0x00, 0x00, 0x00, 0x1D, + 0x00, 0x10, 0x18, 0x20, + 0x00, 0x08, 0x18, 0x20, + 0x00, 0x08, 0x10, 0x20, + 0x00, 0x20, 0x20, 0x20, + 0x00, 0x00, 0x20, 0x20, + 0x00, 0x00, 0x00, 0x20, + 0x00, 0x23, 0x23, 0x23, + 0x00, 0x00, 0x23, 0x23, + 0x00, 0x00, 0x00, 0x23, + 0x00, 0x12, 0x1B, 0x24, + 0x00, 0x09, 0x1B, 0x24, + 0x00, 0x09, 0x12, 0x24, + 0x00, 0x28, 0x28, 0x28, + 0x00, 0x00, 0x28, 0x28, + 0x00, 0x00, 0x00, 0x28, + 0x00, 0x2E, 0x2E, 0x2E, + 0x00, 0x00, 0x2E, 0x2E, + 0x00, 0x00, 0x00, 0x2E, + 0x01, 0x02, 0x02, 0x03, + 0x01, 0x02, 0x03, 0x04, + 0x01, 0x03, 0x04, 0x05, + 0x01, 0x02, 0x04, 0x05, + 0x01, 0x02, 0x03, 0x05, + 0x01, 0x03, 0x04, 0x06, + 0x01, 0x03, 0x05, 0x06, + 0x01, 0x02, 0x05, 0x06, + 0x01, 0x02, 0x04, 0x06, + 0x01, 0x03, 0x05, 0x07, + 0x01, 0x04, 0x06, 0x07, + 0x01, 0x02, 0x06, 0x07, + 0x01, 0x02, 0x04, 0x07, + 0x01, 0x07, 0x07, 0x07, + 0x01, 0x01, 0x07, 0x07, + 0x01, 0x01, 0x01, 0x07, + 0x01, 0x04, 0x05, 0x08, + 0x01, 0x04, 0x07, 0x08, + 0x01, 0x02, 0x07, 0x08, + 0x01, 0x02, 0x05, 0x08, + 0x01, 0x04, 0x06, 0x09, + 0x01, 0x05, 0x07, 0x09, + 0x01, 0x03, 0x07, 0x09, + 0x01, 0x03, 0x05, 0x09, + 0x01, 0x09, 0x09, 0x09, + 0x01, 0x01, 0x09, 0x09, + 0x01, 0x01, 0x01, 0x09, + 0x01, 0x05, 0x08, 0x0C, + 0x01, 0x06, 0x0A, 0x0C, + 0x01, 0x03, 0x0A, 0x0C, + 0x01, 0x03, 0x07, 0x0C, + 0x01, 0x0C, 0x0C, 0x0C, + 0x01, 0x01, 0x0C, 0x0C, + 0x01, 0x01, 0x01, 0x0C, + 0x01, 0x06, 0x0A, 0x0F, + 0x01, 0x08, 0x0C, 0x0F, + 0x01, 0x04, 0x0C, 0x0F, + 0x01, 0x04, 0x08, 0x0F, + 0x01, 0x0F, 0x0F, 0x0F, + 0x01, 0x01, 0x0F, 0x0F, + 0x01, 0x01, 0x01, 0x0F, + 0x01, 0x07, 0x0C, 0x12, + 0x01, 0x09, 0x0E, 0x12, + 0x01, 0x05, 0x0E, 0x12, + 0x01, 0x05, 0x0A, 0x12, + 0x01, 0x12, 0x12, 0x12, + 0x01, 0x01, 0x12, 0x12, + 0x01, 0x01, 0x01, 0x12, + 0x01, 0x08, 0x0E, 0x15, + 0x01, 0x0B, 0x10, 0x15, + 0x01, 0x06, 0x10, 0x15, + 0x01, 0x06, 0x0B, 0x15, + 0x01, 0x15, 0x15, 0x15, + 0x01, 0x01, 0x15, 0x15, + 0x01, 0x01, 0x01, 0x15, + 0x01, 0x0C, 0x13, 0x18, + 0x01, 0x06, 0x13, 0x18, + 0x01, 0x06, 0x0D, 0x18, + 0x01, 0x18, 0x18, 0x18, + 0x01, 0x01, 0x18, 0x18, + 0x01, 0x01, 0x01, 0x18, + 0x01, 0x0E, 0x15, 0x1B, + 0x01, 0x07, 0x15, 0x1B, + 0x01, 0x07, 0x0E, 0x1B, + 0x01, 0x1B, 0x1B, 0x1B, + 0x01, 0x01, 0x1B, 0x1B, + 0x01, 0x01, 0x01, 0x1B, + 0x01, 0x0F, 0x17, 0x1E, + 0x01, 0x08, 0x17, 0x1E, + 0x01, 0x08, 0x10, 0x1E, + 0x01, 0x1E, 0x1E, 0x1E, + 0x01, 0x01, 0x1E, 0x1E, + 0x01, 0x01, 0x01, 0x1E, + 0x01, 0x11, 0x19, 0x21, + 0x01, 0x09, 0x19, 0x21, + 0x01, 0x09, 0x11, 0x21, + 0x01, 0x21, 0x21, 0x21, + 0x01, 0x01, 0x21, 0x21, + 0x01, 0x01, 0x01, 0x21, + 0x01, 0x24, 0x24, 0x24, + 0x01, 0x01, 0x24, 0x24, + 0x01, 0x01, 0x01, 0x24, + 0x01, 0x13, 0x1C, 0x25, + 0x01, 0x0A, 0x1C, 0x25, + 0x01, 0x0A, 0x13, 0x25, + 0x01, 0x29, 0x29, 0x29, + 0x01, 0x01, 0x29, 0x29, + 0x01, 0x01, 0x01, 0x29, + 0x01, 0x2F, 0x2F, 0x2F, + 0x01, 0x01, 0x2F, 0x2F, + 0x01, 0x01, 0x01, 0x2F, + 0x02, 0x03, 0x03, 0x04, + 0x02, 0x03, 0x04, 0x05, + 0x02, 0x04, 0x05, 0x06, + 0x02, 0x03, 0x05, 0x06, + 0x02, 0x03, 0x04, 0x06, + 0x02, 0x04, 0x05, 0x07, + 0x02, 0x04, 0x06, 0x07, + 0x02, 0x03, 0x06, 0x07, + 0x02, 0x03, 0x05, 0x07, + 0x02, 0x04, 0x06, 0x08, + 0x02, 0x05, 0x07, 0x08, + 0x02, 0x03, 0x07, 0x08, + 0x02, 0x03, 0x05, 0x08, + 0x02, 0x08, 0x08, 0x08, + 0x02, 0x02, 0x08, 0x08, + 0x02, 0x02, 0x02, 0x08, + 0x02, 0x05, 0x06, 0x09, + 0x02, 0x05, 0x08, 0x09, + 0x02, 0x03, 0x08, 0x09, + 0x02, 0x03, 0x06, 0x09, + 0x02, 0x05, 0x07, 0x0A, + 0x02, 0x06, 0x08, 0x0A, + 0x02, 0x04, 0x08, 0x0A, + 0x02, 0x04, 0x06, 0x0A, + 0x02, 0x0A, 0x0A, 0x0A, + 0x02, 0x02, 0x0A, 0x0A, + 0x02, 0x02, 0x02, 0x0A, + 0x02, 0x06, 0x09, 0x0D, + 0x02, 0x07, 0x0B, 0x0D, + 0x02, 0x04, 0x0B, 0x0D, + 0x02, 0x04, 0x08, 0x0D, + 0x02, 0x0D, 0x0D, 0x0D, + 0x02, 0x02, 0x0D, 0x0D, + 0x02, 0x02, 0x02, 0x0D, + 0x02, 0x07, 0x0B, 0x10, + 0x02, 0x09, 0x0D, 0x10, + 0x02, 0x05, 0x0D, 0x10, + 0x02, 0x05, 0x09, 0x10, + 0x02, 0x10, 0x10, 0x10, + 0x02, 0x02, 0x10, 0x10, + 0x02, 0x02, 0x02, 0x10, + 0x02, 0x08, 0x0D, 0x13, + 0x02, 0x0A, 0x0F, 0x13, + 0x02, 0x06, 0x0F, 0x13, + 0x02, 0x06, 0x0B, 0x13, + 0x02, 0x13, 0x13, 0x13, + 0x02, 0x02, 0x13, 0x13, + 0x02, 0x02, 0x02, 0x13, + 0x02, 0x09, 0x0F, 0x16, + 0x02, 0x0C, 0x11, 0x16, + 0x02, 0x07, 0x11, 0x16, + 0x02, 0x07, 0x0C, 0x16, + 0x02, 0x16, 0x16, 0x16, + 0x02, 0x02, 0x16, 0x16, + 0x02, 0x02, 0x02, 0x16, + 0x02, 0x0D, 0x14, 0x19, + 0x02, 0x07, 0x14, 0x19, + 0x02, 0x07, 0x0E, 0x19, + 0x02, 0x19, 0x19, 0x19, + 0x02, 0x02, 0x19, 0x19, + 0x02, 0x02, 0x02, 0x19, + 0x02, 0x0F, 0x16, 0x1C, + 0x02, 0x08, 0x16, 0x1C, + 0x02, 0x08, 0x0F, 0x1C, + 0x02, 0x1C, 0x1C, 0x1C, + 0x02, 0x02, 0x1C, 0x1C, + 0x02, 0x02, 0x02, 0x1C, + 0x02, 0x10, 0x18, 0x1F, + 0x02, 0x09, 0x18, 0x1F, + 0x02, 0x09, 0x11, 0x1F, + 0x02, 0x1F, 0x1F, 0x1F, + 0x02, 0x02, 0x1F, 0x1F, + 0x02, 0x02, 0x02, 0x1F, + 0x02, 0x12, 0x1A, 0x22, + 0x02, 0x0A, 0x1A, 0x22, + 0x02, 0x0A, 0x12, 0x22, + 0x02, 0x22, 0x22, 0x22, + 0x02, 0x02, 0x22, 0x22, + 0x02, 0x02, 0x02, 0x22, + 0x02, 0x25, 0x25, 0x25, + 0x02, 0x02, 0x25, 0x25, + 0x02, 0x02, 0x02, 0x25, + 0x02, 0x14, 0x1D, 0x26, + 0x02, 0x0B, 0x1D, 0x26, + 0x02, 0x0B, 0x14, 0x26, + 0x02, 0x2A, 0x2A, 0x2A, + 0x02, 0x02, 0x2A, 0x2A, + 0x02, 0x02, 0x02, 0x2A, + 0x02, 0x30, 0x30, 0x30, + 0x02, 0x02, 0x30, 0x30, + 0x02, 0x02, 0x02, 0x30, + 0x03, 0x04, 0x04, 0x05, + 0x03, 0x04, 0x05, 0x06, + 0x03, 0x05, 0x06, 0x07, + 0x03, 0x04, 0x06, 0x07, + 0x03, 0x04, 0x05, 0x07, + 0x03, 0x05, 0x06, 0x08, + 0x03, 0x05, 0x07, 0x08, + 0x03, 0x04, 0x07, 0x08, + 0x03, 0x04, 0x06, 0x08, + 0x03, 0x05, 0x07, 0x09, + 0x03, 0x06, 0x08, 0x09, + 0x03, 0x04, 0x08, 0x09, + 0x03, 0x04, 0x06, 0x09, + 0x03, 0x09, 0x09, 0x09, + 0x03, 0x03, 0x09, 0x09, + 0x03, 0x03, 0x03, 0x09, + 0x03, 0x06, 0x07, 0x0A, + 0x03, 0x06, 0x09, 0x0A, + 0x03, 0x04, 0x09, 0x0A, + 0x03, 0x04, 0x07, 0x0A, + 0x03, 0x06, 0x08, 0x0B, + 0x03, 0x07, 0x09, 0x0B, + 0x03, 0x05, 0x09, 0x0B, + 0x03, 0x05, 0x07, 0x0B, + 0x03, 0x0B, 0x0B, 0x0B, + 0x03, 0x03, 0x0B, 0x0B, + 0x03, 0x03, 0x03, 0x0B, + 0x03, 0x07, 0x0A, 0x0E, + 0x03, 0x08, 0x0C, 0x0E, + 0x03, 0x05, 0x0C, 0x0E, + 0x03, 0x05, 0x09, 0x0E, + 0x03, 0x0E, 0x0E, 0x0E, + 0x03, 0x03, 0x0E, 0x0E, + 0x03, 0x03, 0x03, 0x0E, + 0x03, 0x08, 0x0C, 0x11, + 0x03, 0x0A, 0x0E, 0x11, + 0x03, 0x06, 0x0E, 0x11, + 0x03, 0x06, 0x0A, 0x11, + 0x03, 0x11, 0x11, 0x11, + 0x03, 0x03, 0x11, 0x11, + 0x03, 0x03, 0x03, 0x11, + 0x03, 0x09, 0x0E, 0x14, + 0x03, 0x0B, 0x10, 0x14, + 0x03, 0x07, 0x10, 0x14, + 0x03, 0x07, 0x0C, 0x14, + 0x03, 0x14, 0x14, 0x14, + 0x03, 0x03, 0x14, 0x14, + 0x03, 0x03, 0x03, 0x14, + 0x03, 0x0A, 0x10, 0x17, + 0x03, 0x0D, 0x12, 0x17, + 0x03, 0x08, 0x12, 0x17, + 0x03, 0x08, 0x0D, 0x17, + 0x03, 0x17, 0x17, 0x17, + 0x03, 0x03, 0x17, 0x17, + 0x03, 0x03, 0x03, 0x17, + 0x03, 0x0E, 0x15, 0x1A, + 0x03, 0x08, 0x15, 0x1A, + 0x03, 0x08, 0x0F, 0x1A, + 0x03, 0x1A, 0x1A, 0x1A, + 0x03, 0x03, 0x1A, 0x1A, + 0x03, 0x03, 0x03, 0x1A, + 0x03, 0x10, 0x17, 0x1D, + 0x03, 0x09, 0x17, 0x1D, + 0x03, 0x09, 0x10, 0x1D, + 0x03, 0x1D, 0x1D, 0x1D, + 0x03, 0x03, 0x1D, 0x1D, + 0x03, 0x03, 0x03, 0x1D, + 0x03, 0x11, 0x19, 0x20, + 0x03, 0x0A, 0x19, 0x20, + 0x03, 0x0A, 0x12, 0x20, + 0x03, 0x20, 0x20, 0x20, + 0x03, 0x03, 0x20, 0x20, + 0x03, 0x03, 0x03, 0x20, + 0x03, 0x13, 0x1B, 0x23, + 0x03, 0x0B, 0x1B, 0x23, + 0x03, 0x0B, 0x13, 0x23, + 0x03, 0x23, 0x23, 0x23, + 0x03, 0x03, 0x23, 0x23, + 0x03, 0x03, 0x03, 0x23, + 0x03, 0x26, 0x26, 0x26, + 0x03, 0x03, 0x26, 0x26, + 0x03, 0x03, 0x03, 0x26, + 0x03, 0x15, 0x1E, 0x27, + 0x03, 0x0C, 0x1E, 0x27, + 0x03, 0x0C, 0x15, 0x27, + 0x03, 0x2B, 0x2B, 0x2B, + 0x03, 0x03, 0x2B, 0x2B, + 0x03, 0x03, 0x03, 0x2B, + 0x03, 0x31, 0x31, 0x31, + 0x03, 0x03, 0x31, 0x31, + 0x03, 0x03, 0x03, 0x31, + 0x04, 0x05, 0x05, 0x06, + 0x04, 0x05, 0x06, 0x07, + 0x04, 0x06, 0x07, 0x08, + 0x04, 0x05, 0x07, 0x08, + 0x04, 0x05, 0x06, 0x08, + 0x04, 0x06, 0x07, 0x09, + 0x04, 0x06, 0x08, 0x09, + 0x04, 0x05, 0x08, 0x09, + 0x04, 0x05, 0x07, 0x09, + 0x04, 0x06, 0x08, 0x0A, + 0x04, 0x07, 0x09, 0x0A, + 0x04, 0x05, 0x09, 0x0A, + 0x04, 0x05, 0x07, 0x0A, + 0x04, 0x0A, 0x0A, 0x0A, + 0x04, 0x04, 0x0A, 0x0A, + 0x04, 0x04, 0x04, 0x0A, + 0x04, 0x07, 0x08, 0x0B, + 0x04, 0x07, 0x0A, 0x0B, + 0x04, 0x05, 0x0A, 0x0B, + 0x04, 0x05, 0x08, 0x0B, + 0x04, 0x07, 0x09, 0x0C, + 0x04, 0x08, 0x0A, 0x0C, + 0x04, 0x06, 0x0A, 0x0C, + 0x04, 0x06, 0x08, 0x0C, + 0x04, 0x0C, 0x0C, 0x0C, + 0x04, 0x04, 0x0C, 0x0C, + 0x04, 0x04, 0x04, 0x0C, + 0x04, 0x08, 0x0B, 0x0F, + 0x04, 0x09, 0x0D, 0x0F, + 0x04, 0x06, 0x0D, 0x0F, + 0x04, 0x06, 0x0A, 0x0F, + 0x04, 0x0F, 0x0F, 0x0F, + 0x04, 0x04, 0x0F, 0x0F, + 0x04, 0x04, 0x04, 0x0F, + 0x04, 0x09, 0x0D, 0x12, + 0x04, 0x0B, 0x0F, 0x12, + 0x04, 0x07, 0x0F, 0x12, + 0x04, 0x07, 0x0B, 0x12, + 0x04, 0x12, 0x12, 0x12, + 0x04, 0x04, 0x12, 0x12, + 0x04, 0x04, 0x04, 0x12, + 0x04, 0x0A, 0x0F, 0x15, + 0x04, 0x0C, 0x11, 0x15, + 0x04, 0x08, 0x11, 0x15, + 0x04, 0x08, 0x0D, 0x15, + 0x04, 0x15, 0x15, 0x15, + 0x04, 0x04, 0x15, 0x15, + 0x04, 0x04, 0x04, 0x15, + 0x04, 0x0B, 0x11, 0x18, + 0x04, 0x0E, 0x13, 0x18, + 0x04, 0x09, 0x13, 0x18, + 0x04, 0x09, 0x0E, 0x18, + 0x04, 0x18, 0x18, 0x18, + 0x04, 0x04, 0x18, 0x18, + 0x04, 0x04, 0x04, 0x18, + 0x04, 0x0F, 0x16, 0x1B, + 0x04, 0x09, 0x16, 0x1B, + 0x04, 0x09, 0x10, 0x1B, + 0x04, 0x1B, 0x1B, 0x1B, + 0x04, 0x04, 0x1B, 0x1B, + 0x04, 0x04, 0x04, 0x1B, + 0x04, 0x11, 0x18, 0x1E, + 0x04, 0x0A, 0x18, 0x1E, + 0x04, 0x0A, 0x11, 0x1E, + 0x04, 0x1E, 0x1E, 0x1E, + 0x04, 0x04, 0x1E, 0x1E, + 0x04, 0x04, 0x04, 0x1E, + 0x04, 0x12, 0x1A, 0x21, + 0x04, 0x0B, 0x1A, 0x21, + 0x04, 0x0B, 0x13, 0x21, + 0x04, 0x21, 0x21, 0x21, + 0x04, 0x04, 0x21, 0x21, + 0x04, 0x04, 0x04, 0x21, + 0x04, 0x14, 0x1C, 0x24, + 0x04, 0x0C, 0x1C, 0x24, + 0x04, 0x0C, 0x14, 0x24, + 0x04, 0x24, 0x24, 0x24, + 0x04, 0x04, 0x24, 0x24, + 0x04, 0x04, 0x04, 0x24, + 0x04, 0x27, 0x27, 0x27, + 0x04, 0x04, 0x27, 0x27, + 0x04, 0x04, 0x04, 0x27, + 0x04, 0x16, 0x1F, 0x28, + 0x04, 0x0D, 0x1F, 0x28, + 0x04, 0x0D, 0x16, 0x28, + 0x04, 0x2C, 0x2C, 0x2C, + 0x04, 0x04, 0x2C, 0x2C, + 0x04, 0x04, 0x04, 0x2C, + 0x04, 0x32, 0x32, 0x32, + 0x04, 0x04, 0x32, 0x32, + 0x04, 0x04, 0x04, 0x32, + 0x05, 0x06, 0x06, 0x07, + 0x05, 0x06, 0x07, 0x08, + 0x05, 0x07, 0x08, 0x09, + 0x05, 0x06, 0x08, 0x09, + 0x05, 0x06, 0x07, 0x09, + 0x05, 0x07, 0x08, 0x0A, + 0x05, 0x07, 0x09, 0x0A, + 0x05, 0x06, 0x09, 0x0A, + 0x05, 0x06, 0x08, 0x0A, + 0x05, 0x07, 0x09, 0x0B, + 0x05, 0x08, 0x0A, 0x0B, + 0x05, 0x06, 0x0A, 0x0B, + 0x05, 0x06, 0x08, 0x0B, + 0x05, 0x0B, 0x0B, 0x0B, + 0x05, 0x05, 0x0B, 0x0B, + 0x05, 0x05, 0x05, 0x0B, + 0x05, 0x08, 0x09, 0x0C, + 0x05, 0x08, 0x0B, 0x0C, + 0x05, 0x06, 0x0B, 0x0C, + 0x05, 0x06, 0x09, 0x0C, + 0x05, 0x08, 0x0A, 0x0D, + 0x05, 0x09, 0x0B, 0x0D, + 0x05, 0x07, 0x0B, 0x0D, + 0x05, 0x07, 0x09, 0x0D, + 0x05, 0x0D, 0x0D, 0x0D, + 0x05, 0x05, 0x0D, 0x0D, + 0x05, 0x05, 0x05, 0x0D, + 0x05, 0x09, 0x0C, 0x10, + 0x05, 0x0A, 0x0E, 0x10, + 0x05, 0x07, 0x0E, 0x10, + 0x05, 0x07, 0x0B, 0x10, + 0x05, 0x10, 0x10, 0x10, + 0x05, 0x05, 0x10, 0x10, + 0x05, 0x05, 0x05, 0x10, + 0x05, 0x0A, 0x0E, 0x13, + 0x05, 0x0C, 0x10, 0x13, + 0x05, 0x08, 0x10, 0x13, + 0x05, 0x08, 0x0C, 0x13, + 0x05, 0x13, 0x13, 0x13, + 0x05, 0x05, 0x13, 0x13, + 0x05, 0x05, 0x05, 0x13, + 0x05, 0x0B, 0x10, 0x16, + 0x05, 0x0D, 0x12, 0x16, + 0x05, 0x09, 0x12, 0x16, + 0x05, 0x09, 0x0E, 0x16, + 0x05, 0x16, 0x16, 0x16, + 0x05, 0x05, 0x16, 0x16, + 0x05, 0x05, 0x05, 0x16, + 0x05, 0x0C, 0x12, 0x19, + 0x05, 0x0F, 0x14, 0x19, + 0x05, 0x0A, 0x14, 0x19, + 0x05, 0x0A, 0x0F, 0x19, + 0x05, 0x19, 0x19, 0x19, + 0x05, 0x05, 0x19, 0x19, + 0x05, 0x05, 0x05, 0x19, + 0x05, 0x10, 0x17, 0x1C, + 0x05, 0x0A, 0x17, 0x1C, + 0x05, 0x0A, 0x11, 0x1C, + 0x05, 0x1C, 0x1C, 0x1C, + 0x05, 0x05, 0x1C, 0x1C, + 0x05, 0x05, 0x05, 0x1C, + 0x05, 0x12, 0x19, 0x1F, + 0x05, 0x0B, 0x19, 0x1F, + 0x05, 0x0B, 0x12, 0x1F, + 0x05, 0x1F, 0x1F, 0x1F, + 0x05, 0x05, 0x1F, 0x1F, + 0x05, 0x05, 0x05, 0x1F, + 0x05, 0x13, 0x1B, 0x22, + 0x05, 0x0C, 0x1B, 0x22, + 0x05, 0x0C, 0x14, 0x22, + 0x05, 0x22, 0x22, 0x22, + 0x05, 0x05, 0x22, 0x22, + 0x05, 0x05, 0x05, 0x22, + 0x05, 0x15, 0x1D, 0x25, + 0x05, 0x0D, 0x1D, 0x25, + 0x05, 0x0D, 0x15, 0x25, + 0x05, 0x25, 0x25, 0x25, + 0x05, 0x05, 0x25, 0x25, + 0x05, 0x05, 0x05, 0x25, + 0x05, 0x28, 0x28, 0x28, + 0x05, 0x05, 0x28, 0x28, + 0x05, 0x05, 0x05, 0x28, + 0x05, 0x17, 0x20, 0x29, + 0x05, 0x0E, 0x20, 0x29, + 0x05, 0x0E, 0x17, 0x29, + 0x05, 0x2D, 0x2D, 0x2D, + 0x05, 0x05, 0x2D, 0x2D, + 0x05, 0x05, 0x05, 0x2D, + 0x05, 0x33, 0x33, 0x33, + 0x05, 0x05, 0x33, 0x33, + 0x05, 0x05, 0x05, 0x33, + 0x06, 0x07, 0x07, 0x08, + 0x06, 0x07, 0x08, 0x09, + 0x06, 0x08, 0x09, 0x0A, + 0x06, 0x07, 0x09, 0x0A, + 0x06, 0x07, 0x08, 0x0A, + 0x06, 0x08, 0x09, 0x0B, + 0x06, 0x08, 0x0A, 0x0B, + 0x06, 0x07, 0x0A, 0x0B, + 0x06, 0x07, 0x09, 0x0B, + 0x06, 0x08, 0x0A, 0x0C, + 0x06, 0x09, 0x0B, 0x0C, + 0x06, 0x07, 0x0B, 0x0C, + 0x06, 0x07, 0x09, 0x0C, + 0x06, 0x0C, 0x0C, 0x0C, + 0x06, 0x06, 0x0C, 0x0C, + 0x06, 0x06, 0x06, 0x0C, + 0x06, 0x09, 0x0A, 0x0D, + 0x06, 0x09, 0x0C, 0x0D, + 0x06, 0x07, 0x0C, 0x0D, + 0x06, 0x07, 0x0A, 0x0D, + 0x06, 0x09, 0x0B, 0x0E, + 0x06, 0x0A, 0x0C, 0x0E, + 0x06, 0x08, 0x0C, 0x0E, + 0x06, 0x08, 0x0A, 0x0E, + 0x06, 0x0E, 0x0E, 0x0E, + 0x06, 0x06, 0x0E, 0x0E, + 0x06, 0x06, 0x06, 0x0E, + 0x06, 0x0A, 0x0D, 0x11, + 0x06, 0x0B, 0x0F, 0x11, + 0x06, 0x08, 0x0F, 0x11, + 0x06, 0x08, 0x0C, 0x11, + 0x06, 0x11, 0x11, 0x11, + 0x06, 0x06, 0x11, 0x11, + 0x06, 0x06, 0x06, 0x11, + 0x06, 0x0B, 0x0F, 0x14, + 0x06, 0x0D, 0x11, 0x14, + 0x06, 0x09, 0x11, 0x14, + 0x06, 0x09, 0x0D, 0x14, + 0x06, 0x14, 0x14, 0x14, + 0x06, 0x06, 0x14, 0x14, + 0x06, 0x06, 0x06, 0x14, + 0x06, 0x0C, 0x11, 0x17, + 0x06, 0x0E, 0x13, 0x17, + 0x06, 0x0A, 0x13, 0x17, + 0x06, 0x0A, 0x0F, 0x17, + 0x06, 0x17, 0x17, 0x17, + 0x06, 0x06, 0x17, 0x17, + 0x06, 0x06, 0x06, 0x17, + 0x06, 0x0D, 0x13, 0x1A, + 0x06, 0x10, 0x15, 0x1A, + 0x06, 0x0B, 0x15, 0x1A, + 0x06, 0x0B, 0x10, 0x1A, + 0x06, 0x1A, 0x1A, 0x1A, + 0x06, 0x06, 0x1A, 0x1A, + 0x06, 0x06, 0x06, 0x1A, + 0x06, 0x11, 0x18, 0x1D, + 0x06, 0x0B, 0x18, 0x1D, + 0x06, 0x0B, 0x12, 0x1D, + 0x06, 0x1D, 0x1D, 0x1D, + 0x06, 0x06, 0x1D, 0x1D, + 0x06, 0x06, 0x06, 0x1D, + 0x06, 0x13, 0x1A, 0x20, + 0x06, 0x0C, 0x1A, 0x20, + 0x06, 0x0C, 0x13, 0x20, + 0x06, 0x20, 0x20, 0x20, + 0x06, 0x06, 0x20, 0x20, + 0x06, 0x06, 0x06, 0x20, + 0x06, 0x14, 0x1C, 0x23, + 0x06, 0x0D, 0x1C, 0x23, + 0x06, 0x0D, 0x15, 0x23, + 0x06, 0x23, 0x23, 0x23, + 0x06, 0x06, 0x23, 0x23, + 0x06, 0x06, 0x06, 0x23, + 0x06, 0x16, 0x1E, 0x26, + 0x06, 0x0E, 0x1E, 0x26, + 0x06, 0x0E, 0x16, 0x26, + 0x06, 0x26, 0x26, 0x26, + 0x06, 0x06, 0x26, 0x26, + 0x06, 0x06, 0x06, 0x26, + 0x06, 0x29, 0x29, 0x29, + 0x06, 0x06, 0x29, 0x29, + 0x06, 0x06, 0x06, 0x29, + 0x06, 0x18, 0x21, 0x2A, + 0x06, 0x0F, 0x21, 0x2A, + 0x06, 0x0F, 0x18, 0x2A, + 0x06, 0x2E, 0x2E, 0x2E, + 0x06, 0x06, 0x2E, 0x2E, + 0x06, 0x06, 0x06, 0x2E, + 0x06, 0x34, 0x34, 0x34, + 0x06, 0x06, 0x34, 0x34, + 0x06, 0x06, 0x06, 0x34, + 0x07, 0x08, 0x08, 0x09, + 0x07, 0x08, 0x09, 0x0A, + 0x07, 0x09, 0x0A, 0x0B, + 0x07, 0x08, 0x0A, 0x0B, + 0x07, 0x08, 0x09, 0x0B, + 0x07, 0x09, 0x0A, 0x0C, + 0x07, 0x09, 0x0B, 0x0C, + 0x07, 0x08, 0x0B, 0x0C, + 0x07, 0x08, 0x0A, 0x0C, + 0x07, 0x09, 0x0B, 0x0D, + 0x07, 0x0A, 0x0C, 0x0D, + 0x07, 0x08, 0x0C, 0x0D, + 0x07, 0x08, 0x0A, 0x0D, + 0x07, 0x0D, 0x0D, 0x0D, + 0x07, 0x07, 0x0D, 0x0D, + 0x07, 0x07, 0x07, 0x0D, + 0x07, 0x0A, 0x0B, 0x0E, + 0x07, 0x0A, 0x0D, 0x0E, + 0x07, 0x08, 0x0D, 0x0E, + 0x07, 0x08, 0x0B, 0x0E, + 0x07, 0x0A, 0x0C, 0x0F, + 0x07, 0x0B, 0x0D, 0x0F, + 0x07, 0x09, 0x0D, 0x0F, + 0x07, 0x09, 0x0B, 0x0F, + 0x07, 0x0F, 0x0F, 0x0F, + 0x07, 0x07, 0x0F, 0x0F, + 0x07, 0x07, 0x07, 0x0F, + 0x07, 0x0B, 0x0E, 0x12, + 0x07, 0x0C, 0x10, 0x12, + 0x07, 0x09, 0x10, 0x12, + 0x07, 0x09, 0x0D, 0x12, + 0x07, 0x12, 0x12, 0x12, + 0x07, 0x07, 0x12, 0x12, + 0x07, 0x07, 0x07, 0x12, + 0x07, 0x0C, 0x10, 0x15, + 0x07, 0x0E, 0x12, 0x15, + 0x07, 0x0A, 0x12, 0x15, + 0x07, 0x0A, 0x0E, 0x15, + 0x07, 0x15, 0x15, 0x15, + 0x07, 0x07, 0x15, 0x15, + 0x07, 0x07, 0x07, 0x15, + 0x07, 0x0D, 0x12, 0x18, + 0x07, 0x0F, 0x14, 0x18, + 0x07, 0x0B, 0x14, 0x18, + 0x07, 0x0B, 0x10, 0x18, + 0x07, 0x18, 0x18, 0x18, + 0x07, 0x07, 0x18, 0x18, + 0x07, 0x07, 0x07, 0x18, + 0x07, 0x0E, 0x14, 0x1B, + 0x07, 0x11, 0x16, 0x1B, + 0x07, 0x0C, 0x16, 0x1B, + 0x07, 0x0C, 0x11, 0x1B, + 0x07, 0x1B, 0x1B, 0x1B, + 0x07, 0x07, 0x1B, 0x1B, + 0x07, 0x07, 0x07, 0x1B, + 0x07, 0x12, 0x19, 0x1E, + 0x07, 0x0C, 0x19, 0x1E, + 0x07, 0x0C, 0x13, 0x1E, + 0x07, 0x1E, 0x1E, 0x1E, + 0x07, 0x07, 0x1E, 0x1E, + 0x07, 0x07, 0x07, 0x1E, + 0x07, 0x14, 0x1B, 0x21, + 0x07, 0x0D, 0x1B, 0x21, + 0x07, 0x0D, 0x14, 0x21, + 0x07, 0x21, 0x21, 0x21, + 0x07, 0x07, 0x21, 0x21, + 0x07, 0x07, 0x07, 0x21, + 0x07, 0x15, 0x1D, 0x24, + 0x07, 0x0E, 0x1D, 0x24, + 0x07, 0x0E, 0x16, 0x24, + 0x07, 0x24, 0x24, 0x24, + 0x07, 0x07, 0x24, 0x24, + 0x07, 0x07, 0x07, 0x24, + 0x07, 0x17, 0x1F, 0x27, + 0x07, 0x0F, 0x1F, 0x27, + 0x07, 0x0F, 0x17, 0x27, + 0x07, 0x27, 0x27, 0x27, + 0x07, 0x07, 0x27, 0x27, + 0x07, 0x07, 0x07, 0x27, + 0x07, 0x2A, 0x2A, 0x2A, + 0x07, 0x07, 0x2A, 0x2A, + 0x07, 0x07, 0x07, 0x2A, + 0x07, 0x19, 0x22, 0x2B, + 0x07, 0x10, 0x22, 0x2B, + 0x07, 0x10, 0x19, 0x2B, + 0x07, 0x2F, 0x2F, 0x2F, + 0x07, 0x07, 0x2F, 0x2F, + 0x07, 0x07, 0x07, 0x2F, + 0x07, 0x35, 0x35, 0x35, + 0x07, 0x07, 0x35, 0x35, + 0x07, 0x07, 0x07, 0x35, + 0x08, 0x09, 0x09, 0x0A, + 0x08, 0x09, 0x0A, 0x0B, + 0x08, 0x0A, 0x0B, 0x0C, + 0x08, 0x09, 0x0B, 0x0C, + 0x08, 0x09, 0x0A, 0x0C, + 0x08, 0x0A, 0x0B, 0x0D, + 0x08, 0x0A, 0x0C, 0x0D, + 0x08, 0x09, 0x0C, 0x0D, + 0x08, 0x09, 0x0B, 0x0D, + 0x08, 0x0A, 0x0C, 0x0E, + 0x08, 0x0B, 0x0D, 0x0E, + 0x08, 0x09, 0x0D, 0x0E, + 0x08, 0x09, 0x0B, 0x0E, + 0x08, 0x0E, 0x0E, 0x0E, + 0x08, 0x08, 0x0E, 0x0E, + 0x08, 0x08, 0x08, 0x0E, + 0x08, 0x0B, 0x0C, 0x0F, + 0x08, 0x0B, 0x0E, 0x0F, + 0x08, 0x09, 0x0E, 0x0F, + 0x08, 0x09, 0x0C, 0x0F, + 0x08, 0x0B, 0x0D, 0x10, + 0x08, 0x0C, 0x0E, 0x10, + 0x08, 0x0A, 0x0E, 0x10, + 0x08, 0x0A, 0x0C, 0x10, + 0x08, 0x10, 0x10, 0x10, + 0x08, 0x08, 0x10, 0x10, + 0x08, 0x08, 0x08, 0x10, + 0x08, 0x0C, 0x0F, 0x13, + 0x08, 0x0D, 0x11, 0x13, + 0x08, 0x0A, 0x11, 0x13, + 0x08, 0x0A, 0x0E, 0x13, + 0x08, 0x13, 0x13, 0x13, + 0x08, 0x08, 0x13, 0x13, + 0x08, 0x08, 0x08, 0x13, + 0x08, 0x0D, 0x11, 0x16, + 0x08, 0x0F, 0x13, 0x16, + 0x08, 0x0B, 0x13, 0x16, + 0x08, 0x0B, 0x0F, 0x16, + 0x08, 0x16, 0x16, 0x16, + 0x08, 0x08, 0x16, 0x16, + 0x08, 0x08, 0x08, 0x16, + 0x08, 0x0E, 0x13, 0x19, + 0x08, 0x10, 0x15, 0x19, + 0x08, 0x0C, 0x15, 0x19, + 0x08, 0x0C, 0x11, 0x19, + 0x08, 0x19, 0x19, 0x19, + 0x08, 0x08, 0x19, 0x19, + 0x08, 0x08, 0x08, 0x19, + 0x08, 0x0F, 0x15, 0x1C, + 0x08, 0x12, 0x17, 0x1C, + 0x08, 0x0D, 0x17, 0x1C, + 0x08, 0x0D, 0x12, 0x1C, + 0x08, 0x1C, 0x1C, 0x1C, + 0x08, 0x08, 0x1C, 0x1C, + 0x08, 0x08, 0x08, 0x1C, + 0x08, 0x13, 0x1A, 0x1F, + 0x08, 0x0D, 0x1A, 0x1F, + 0x08, 0x0D, 0x14, 0x1F, + 0x08, 0x1F, 0x1F, 0x1F, + 0x08, 0x08, 0x1F, 0x1F, + 0x08, 0x08, 0x08, 0x1F, + 0x08, 0x15, 0x1C, 0x22, + 0x08, 0x0E, 0x1C, 0x22, + 0x08, 0x0E, 0x15, 0x22, + 0x08, 0x22, 0x22, 0x22, + 0x08, 0x08, 0x22, 0x22, + 0x08, 0x08, 0x08, 0x22, + 0x08, 0x16, 0x1E, 0x25, + 0x08, 0x0F, 0x1E, 0x25, + 0x08, 0x0F, 0x17, 0x25, + 0x08, 0x25, 0x25, 0x25, + 0x08, 0x08, 0x25, 0x25, + 0x08, 0x08, 0x08, 0x25, + 0x08, 0x18, 0x20, 0x28, + 0x08, 0x10, 0x20, 0x28, + 0x08, 0x10, 0x18, 0x28, + 0x08, 0x28, 0x28, 0x28, + 0x08, 0x08, 0x28, 0x28, + 0x08, 0x08, 0x08, 0x28, + 0x08, 0x2B, 0x2B, 0x2B, + 0x08, 0x08, 0x2B, 0x2B, + 0x08, 0x08, 0x08, 0x2B, + 0x08, 0x1A, 0x23, 0x2C, + 0x08, 0x11, 0x23, 0x2C, + 0x08, 0x11, 0x1A, 0x2C, + 0x08, 0x30, 0x30, 0x30, + 0x08, 0x08, 0x30, 0x30, + 0x08, 0x08, 0x08, 0x30, + 0x08, 0x36, 0x36, 0x36, + 0x08, 0x08, 0x36, 0x36, + 0x08, 0x08, 0x08, 0x36, + 0x09, 0x0A, 0x0A, 0x0B, + 0x09, 0x0A, 0x0B, 0x0C, + 0x09, 0x0B, 0x0C, 0x0D, + 0x09, 0x0A, 0x0C, 0x0D, + 0x09, 0x0A, 0x0B, 0x0D, + 0x09, 0x0B, 0x0C, 0x0E, + 0x09, 0x0B, 0x0D, 0x0E, + 0x09, 0x0A, 0x0D, 0x0E, + 0x09, 0x0A, 0x0C, 0x0E, + 0x09, 0x0B, 0x0D, 0x0F, + 0x09, 0x0C, 0x0E, 0x0F, + 0x09, 0x0A, 0x0E, 0x0F, + 0x09, 0x0A, 0x0C, 0x0F, + 0x09, 0x0F, 0x0F, 0x0F, + 0x09, 0x09, 0x0F, 0x0F, + 0x09, 0x09, 0x09, 0x0F, + 0x09, 0x0C, 0x0D, 0x10, + 0x09, 0x0C, 0x0F, 0x10, + 0x09, 0x0A, 0x0F, 0x10, + 0x09, 0x0A, 0x0D, 0x10, + 0x09, 0x0C, 0x0E, 0x11, + 0x09, 0x0D, 0x0F, 0x11, + 0x09, 0x0B, 0x0F, 0x11, + 0x09, 0x0B, 0x0D, 0x11, + 0x09, 0x11, 0x11, 0x11, + 0x09, 0x09, 0x11, 0x11, + 0x09, 0x09, 0x09, 0x11, + 0x09, 0x0D, 0x10, 0x14, + 0x09, 0x0E, 0x12, 0x14, + 0x09, 0x0B, 0x12, 0x14, + 0x09, 0x0B, 0x0F, 0x14, + 0x09, 0x14, 0x14, 0x14, + 0x09, 0x09, 0x14, 0x14, + 0x09, 0x09, 0x09, 0x14, + 0x09, 0x0E, 0x12, 0x17, + 0x09, 0x10, 0x14, 0x17, + 0x09, 0x0C, 0x14, 0x17, + 0x09, 0x0C, 0x10, 0x17, + 0x09, 0x17, 0x17, 0x17, + 0x09, 0x09, 0x17, 0x17, + 0x09, 0x09, 0x09, 0x17, + 0x09, 0x0F, 0x14, 0x1A, + 0x09, 0x11, 0x16, 0x1A, + 0x09, 0x0D, 0x16, 0x1A, + 0x09, 0x0D, 0x12, 0x1A, + 0x09, 0x1A, 0x1A, 0x1A, + 0x09, 0x09, 0x1A, 0x1A, + 0x09, 0x09, 0x09, 0x1A, + 0x09, 0x10, 0x16, 0x1D, + 0x09, 0x13, 0x18, 0x1D, + 0x09, 0x0E, 0x18, 0x1D, + 0x09, 0x0E, 0x13, 0x1D, + 0x09, 0x1D, 0x1D, 0x1D, + 0x09, 0x09, 0x1D, 0x1D, + 0x09, 0x09, 0x09, 0x1D, + 0x09, 0x14, 0x1B, 0x20, + 0x09, 0x0E, 0x1B, 0x20, + 0x09, 0x0E, 0x15, 0x20, + 0x09, 0x20, 0x20, 0x20, + 0x09, 0x09, 0x20, 0x20, + 0x09, 0x09, 0x09, 0x20, + 0x09, 0x16, 0x1D, 0x23, + 0x09, 0x0F, 0x1D, 0x23, + 0x09, 0x0F, 0x16, 0x23, + 0x09, 0x23, 0x23, 0x23, + 0x09, 0x09, 0x23, 0x23, + 0x09, 0x09, 0x09, 0x23, + 0x09, 0x17, 0x1F, 0x26, + 0x09, 0x10, 0x1F, 0x26, + 0x09, 0x10, 0x18, 0x26, + 0x09, 0x26, 0x26, 0x26, + 0x09, 0x09, 0x26, 0x26, + 0x09, 0x09, 0x09, 0x26, + 0x09, 0x19, 0x21, 0x29, + 0x09, 0x11, 0x21, 0x29, + 0x09, 0x11, 0x19, 0x29, + 0x09, 0x29, 0x29, 0x29, + 0x09, 0x09, 0x29, 0x29, + 0x09, 0x09, 0x09, 0x29, + 0x09, 0x2C, 0x2C, 0x2C, + 0x09, 0x09, 0x2C, 0x2C, + 0x09, 0x09, 0x09, 0x2C, + 0x09, 0x1B, 0x24, 0x2D, + 0x09, 0x12, 0x24, 0x2D, + 0x09, 0x12, 0x1B, 0x2D, + 0x09, 0x31, 0x31, 0x31, + 0x09, 0x09, 0x31, 0x31, + 0x09, 0x09, 0x09, 0x31, + 0x09, 0x37, 0x37, 0x37, + 0x09, 0x09, 0x37, 0x37, + 0x09, 0x09, 0x09, 0x37, + 0x0A, 0x0B, 0x0B, 0x0C, + 0x0A, 0x0B, 0x0C, 0x0D, + 0x0A, 0x0C, 0x0D, 0x0E, + 0x0A, 0x0B, 0x0D, 0x0E, + 0x0A, 0x0B, 0x0C, 0x0E, + 0x0A, 0x0C, 0x0D, 0x0F, + 0x0A, 0x0C, 0x0E, 0x0F, + 0x0A, 0x0B, 0x0E, 0x0F, + 0x0A, 0x0B, 0x0D, 0x0F, + 0x0A, 0x0C, 0x0E, 0x10, + 0x0A, 0x0D, 0x0F, 0x10, + 0x0A, 0x0B, 0x0F, 0x10, + 0x0A, 0x0B, 0x0D, 0x10, + 0x0A, 0x10, 0x10, 0x10, + 0x0A, 0x0A, 0x10, 0x10, + 0x0A, 0x0A, 0x0A, 0x10, + 0x0A, 0x0D, 0x0E, 0x11, + 0x0A, 0x0D, 0x10, 0x11, + 0x0A, 0x0B, 0x10, 0x11, + 0x0A, 0x0B, 0x0E, 0x11, + 0x0A, 0x0D, 0x0F, 0x12, + 0x0A, 0x0E, 0x10, 0x12, + 0x0A, 0x0C, 0x10, 0x12, + 0x0A, 0x0C, 0x0E, 0x12, + 0x0A, 0x12, 0x12, 0x12, + 0x0A, 0x0A, 0x12, 0x12, + 0x0A, 0x0A, 0x0A, 0x12, + 0x0A, 0x0E, 0x11, 0x15, + 0x0A, 0x0F, 0x13, 0x15, + 0x0A, 0x0C, 0x13, 0x15, + 0x0A, 0x0C, 0x10, 0x15, + 0x0A, 0x15, 0x15, 0x15, + 0x0A, 0x0A, 0x15, 0x15, + 0x0A, 0x0A, 0x0A, 0x15, + 0x0A, 0x0F, 0x13, 0x18, + 0x0A, 0x11, 0x15, 0x18, + 0x0A, 0x0D, 0x15, 0x18, + 0x0A, 0x0D, 0x11, 0x18, + 0x0A, 0x18, 0x18, 0x18, + 0x0A, 0x0A, 0x18, 0x18, + 0x0A, 0x0A, 0x0A, 0x18, + 0x0A, 0x10, 0x15, 0x1B, + 0x0A, 0x12, 0x17, 0x1B, + 0x0A, 0x0E, 0x17, 0x1B, + 0x0A, 0x0E, 0x13, 0x1B, + 0x0A, 0x1B, 0x1B, 0x1B, + 0x0A, 0x0A, 0x1B, 0x1B, + 0x0A, 0x0A, 0x0A, 0x1B, + 0x0A, 0x11, 0x17, 0x1E, + 0x0A, 0x14, 0x19, 0x1E, + 0x0A, 0x0F, 0x19, 0x1E, + 0x0A, 0x0F, 0x14, 0x1E, + 0x0A, 0x1E, 0x1E, 0x1E, + 0x0A, 0x0A, 0x1E, 0x1E, + 0x0A, 0x0A, 0x0A, 0x1E, + 0x0A, 0x15, 0x1C, 0x21, + 0x0A, 0x0F, 0x1C, 0x21, + 0x0A, 0x0F, 0x16, 0x21, + 0x0A, 0x21, 0x21, 0x21, + 0x0A, 0x0A, 0x21, 0x21, + 0x0A, 0x0A, 0x0A, 0x21, + 0x0A, 0x17, 0x1E, 0x24, + 0x0A, 0x10, 0x1E, 0x24, + 0x0A, 0x10, 0x17, 0x24, + 0x0A, 0x24, 0x24, 0x24, + 0x0A, 0x0A, 0x24, 0x24, + 0x0A, 0x0A, 0x0A, 0x24, + 0x0A, 0x18, 0x20, 0x27, + 0x0A, 0x11, 0x20, 0x27, + 0x0A, 0x11, 0x19, 0x27, + 0x0A, 0x27, 0x27, 0x27, + 0x0A, 0x0A, 0x27, 0x27, + 0x0A, 0x0A, 0x0A, 0x27, + 0x0A, 0x1A, 0x22, 0x2A, + 0x0A, 0x12, 0x22, 0x2A, + 0x0A, 0x12, 0x1A, 0x2A, + 0x0A, 0x2A, 0x2A, 0x2A, + 0x0A, 0x0A, 0x2A, 0x2A, + 0x0A, 0x0A, 0x0A, 0x2A, + 0x0A, 0x2D, 0x2D, 0x2D, + 0x0A, 0x0A, 0x2D, 0x2D, + 0x0A, 0x0A, 0x0A, 0x2D, + 0x0A, 0x1C, 0x25, 0x2E, + 0x0A, 0x13, 0x25, 0x2E, + 0x0A, 0x13, 0x1C, 0x2E, + 0x0A, 0x32, 0x32, 0x32, + 0x0A, 0x0A, 0x32, 0x32, + 0x0A, 0x0A, 0x0A, 0x32, + 0x0A, 0x38, 0x38, 0x38, + 0x0A, 0x0A, 0x38, 0x38, + 0x0A, 0x0A, 0x0A, 0x38, + 0x0B, 0x0C, 0x0C, 0x0D, + 0x0B, 0x0C, 0x0D, 0x0E, + 0x0B, 0x0D, 0x0E, 0x0F, + 0x0B, 0x0C, 0x0E, 0x0F, + 0x0B, 0x0C, 0x0D, 0x0F, + 0x0B, 0x0D, 0x0E, 0x10, + 0x0B, 0x0D, 0x0F, 0x10, + 0x0B, 0x0C, 0x0F, 0x10, + 0x0B, 0x0C, 0x0E, 0x10, + 0x0B, 0x0D, 0x0F, 0x11, + 0x0B, 0x0E, 0x10, 0x11, + 0x0B, 0x0C, 0x10, 0x11, + 0x0B, 0x0C, 0x0E, 0x11, + 0x0B, 0x11, 0x11, 0x11, + 0x0B, 0x0B, 0x11, 0x11, + 0x0B, 0x0B, 0x0B, 0x11, + 0x0B, 0x0E, 0x0F, 0x12, + 0x0B, 0x0E, 0x11, 0x12, + 0x0B, 0x0C, 0x11, 0x12, + 0x0B, 0x0C, 0x0F, 0x12, + 0x0B, 0x0E, 0x10, 0x13, + 0x0B, 0x0F, 0x11, 0x13, + 0x0B, 0x0D, 0x11, 0x13, + 0x0B, 0x0D, 0x0F, 0x13, + 0x0B, 0x13, 0x13, 0x13, + 0x0B, 0x0B, 0x13, 0x13, + 0x0B, 0x0B, 0x0B, 0x13, + 0x0B, 0x0F, 0x12, 0x16, + 0x0B, 0x10, 0x14, 0x16, + 0x0B, 0x0D, 0x14, 0x16, + 0x0B, 0x0D, 0x11, 0x16, + 0x0B, 0x16, 0x16, 0x16, + 0x0B, 0x0B, 0x16, 0x16, + 0x0B, 0x0B, 0x0B, 0x16, + 0x0B, 0x10, 0x14, 0x19, + 0x0B, 0x12, 0x16, 0x19, + 0x0B, 0x0E, 0x16, 0x19, + 0x0B, 0x0E, 0x12, 0x19, + 0x0B, 0x19, 0x19, 0x19, + 0x0B, 0x0B, 0x19, 0x19, + 0x0B, 0x0B, 0x0B, 0x19, + 0x0B, 0x11, 0x16, 0x1C, + 0x0B, 0x13, 0x18, 0x1C, + 0x0B, 0x0F, 0x18, 0x1C, + 0x0B, 0x0F, 0x14, 0x1C, + 0x0B, 0x1C, 0x1C, 0x1C, + 0x0B, 0x0B, 0x1C, 0x1C, + 0x0B, 0x0B, 0x0B, 0x1C, + 0x0B, 0x12, 0x18, 0x1F, + 0x0B, 0x15, 0x1A, 0x1F, + 0x0B, 0x10, 0x1A, 0x1F, + 0x0B, 0x10, 0x15, 0x1F, + 0x0B, 0x1F, 0x1F, 0x1F, + 0x0B, 0x0B, 0x1F, 0x1F, + 0x0B, 0x0B, 0x0B, 0x1F, + 0x0B, 0x16, 0x1D, 0x22, + 0x0B, 0x10, 0x1D, 0x22, + 0x0B, 0x10, 0x17, 0x22, + 0x0B, 0x22, 0x22, 0x22, + 0x0B, 0x0B, 0x22, 0x22, + 0x0B, 0x0B, 0x0B, 0x22, + 0x0B, 0x18, 0x1F, 0x25, + 0x0B, 0x11, 0x1F, 0x25, + 0x0B, 0x11, 0x18, 0x25, + 0x0B, 0x25, 0x25, 0x25, + 0x0B, 0x0B, 0x25, 0x25, + 0x0B, 0x0B, 0x0B, 0x25, + 0x0B, 0x19, 0x21, 0x28, + 0x0B, 0x12, 0x21, 0x28, + 0x0B, 0x12, 0x1A, 0x28, + 0x0B, 0x28, 0x28, 0x28, + 0x0B, 0x0B, 0x28, 0x28, + 0x0B, 0x0B, 0x0B, 0x28, + 0x0B, 0x1B, 0x23, 0x2B, + 0x0B, 0x13, 0x23, 0x2B, + 0x0B, 0x13, 0x1B, 0x2B, + 0x0B, 0x2B, 0x2B, 0x2B, + 0x0B, 0x0B, 0x2B, 0x2B, + 0x0B, 0x0B, 0x0B, 0x2B, + 0x0B, 0x2E, 0x2E, 0x2E, + 0x0B, 0x0B, 0x2E, 0x2E, + 0x0B, 0x0B, 0x0B, 0x2E, + 0x0B, 0x1D, 0x26, 0x2F, + 0x0B, 0x14, 0x26, 0x2F, + 0x0B, 0x14, 0x1D, 0x2F, + 0x0B, 0x33, 0x33, 0x33, + 0x0B, 0x0B, 0x33, 0x33, + 0x0B, 0x0B, 0x0B, 0x33, + 0x0B, 0x39, 0x39, 0x39, + 0x0B, 0x0B, 0x39, 0x39, + 0x0B, 0x0B, 0x0B, 0x39, + 0x0C, 0x0D, 0x0D, 0x0E, + 0x0C, 0x0D, 0x0E, 0x0F, + 0x0C, 0x0E, 0x0F, 0x10, + 0x0C, 0x0D, 0x0F, 0x10, + 0x0C, 0x0D, 0x0E, 0x10, + 0x0C, 0x0E, 0x0F, 0x11, + 0x0C, 0x0E, 0x10, 0x11, + 0x0C, 0x0D, 0x10, 0x11, + 0x0C, 0x0D, 0x0F, 0x11, + 0x0C, 0x0E, 0x10, 0x12, + 0x0C, 0x0F, 0x11, 0x12, + 0x0C, 0x0D, 0x11, 0x12, + 0x0C, 0x0D, 0x0F, 0x12, + 0x0C, 0x12, 0x12, 0x12, + 0x0C, 0x0C, 0x12, 0x12, + 0x0C, 0x0C, 0x0C, 0x12, + 0x0C, 0x0F, 0x10, 0x13, + 0x0C, 0x0F, 0x12, 0x13, + 0x0C, 0x0D, 0x12, 0x13, + 0x0C, 0x0D, 0x10, 0x13, + 0x0C, 0x0F, 0x11, 0x14, + 0x0C, 0x10, 0x12, 0x14, + 0x0C, 0x0E, 0x12, 0x14, + 0x0C, 0x0E, 0x10, 0x14, + 0x0C, 0x14, 0x14, 0x14, + 0x0C, 0x0C, 0x14, 0x14, + 0x0C, 0x0C, 0x0C, 0x14, + 0x0C, 0x10, 0x13, 0x17, + 0x0C, 0x11, 0x15, 0x17, + 0x0C, 0x0E, 0x15, 0x17, + 0x0C, 0x0E, 0x12, 0x17, + 0x0C, 0x17, 0x17, 0x17, + 0x0C, 0x0C, 0x17, 0x17, + 0x0C, 0x0C, 0x0C, 0x17, + 0x0C, 0x11, 0x15, 0x1A, + 0x0C, 0x13, 0x17, 0x1A, + 0x0C, 0x0F, 0x17, 0x1A, + 0x0C, 0x0F, 0x13, 0x1A, + 0x0C, 0x1A, 0x1A, 0x1A, + 0x0C, 0x0C, 0x1A, 0x1A, + 0x0C, 0x0C, 0x0C, 0x1A, + 0x0C, 0x12, 0x17, 0x1D, + 0x0C, 0x14, 0x19, 0x1D, + 0x0C, 0x10, 0x19, 0x1D, + 0x0C, 0x10, 0x15, 0x1D, + 0x0C, 0x1D, 0x1D, 0x1D, + 0x0C, 0x0C, 0x1D, 0x1D, + 0x0C, 0x0C, 0x0C, 0x1D, + 0x0C, 0x13, 0x19, 0x20, + 0x0C, 0x16, 0x1B, 0x20, + 0x0C, 0x11, 0x1B, 0x20, + 0x0C, 0x11, 0x16, 0x20, + 0x0C, 0x20, 0x20, 0x20, + 0x0C, 0x0C, 0x20, 0x20, + 0x0C, 0x0C, 0x0C, 0x20, + 0x0C, 0x17, 0x1E, 0x23, + 0x0C, 0x11, 0x1E, 0x23, + 0x0C, 0x11, 0x18, 0x23, + 0x0C, 0x23, 0x23, 0x23, + 0x0C, 0x0C, 0x23, 0x23, + 0x0C, 0x0C, 0x0C, 0x23, + 0x0C, 0x19, 0x20, 0x26, + 0x0C, 0x12, 0x20, 0x26, + 0x0C, 0x12, 0x19, 0x26, + 0x0C, 0x26, 0x26, 0x26, + 0x0C, 0x0C, 0x26, 0x26, + 0x0C, 0x0C, 0x0C, 0x26, + 0x0C, 0x1A, 0x22, 0x29, + 0x0C, 0x13, 0x22, 0x29, + 0x0C, 0x13, 0x1B, 0x29, + 0x0C, 0x29, 0x29, 0x29, + 0x0C, 0x0C, 0x29, 0x29, + 0x0C, 0x0C, 0x0C, 0x29, + 0x0C, 0x1C, 0x24, 0x2C, + 0x0C, 0x14, 0x24, 0x2C, + 0x0C, 0x14, 0x1C, 0x2C, + 0x0C, 0x2C, 0x2C, 0x2C, + 0x0C, 0x0C, 0x2C, 0x2C, + 0x0C, 0x0C, 0x0C, 0x2C, + 0x0C, 0x2F, 0x2F, 0x2F, + 0x0C, 0x0C, 0x2F, 0x2F, + 0x0C, 0x0C, 0x0C, 0x2F, + 0x0C, 0x1E, 0x27, 0x30, + 0x0C, 0x15, 0x27, 0x30, + 0x0C, 0x15, 0x1E, 0x30, + 0x0C, 0x34, 0x34, 0x34, + 0x0C, 0x0C, 0x34, 0x34, + 0x0C, 0x0C, 0x0C, 0x34, + 0x0C, 0x3A, 0x3A, 0x3A, + 0x0C, 0x0C, 0x3A, 0x3A, + 0x0C, 0x0C, 0x0C, 0x3A, + 0x0D, 0x0E, 0x0E, 0x0F, + 0x0D, 0x0E, 0x0F, 0x10, + 0x0D, 0x0F, 0x10, 0x11, + 0x0D, 0x0E, 0x10, 0x11, + 0x0D, 0x0E, 0x0F, 0x11, + 0x0D, 0x0F, 0x10, 0x12, + 0x0D, 0x0F, 0x11, 0x12, + 0x0D, 0x0E, 0x11, 0x12, + 0x0D, 0x0E, 0x10, 0x12, + 0x0D, 0x0F, 0x11, 0x13, + 0x0D, 0x10, 0x12, 0x13, + 0x0D, 0x0E, 0x12, 0x13, + 0x0D, 0x0E, 0x10, 0x13, + 0x0D, 0x13, 0x13, 0x13, + 0x0D, 0x0D, 0x13, 0x13, + 0x0D, 0x0D, 0x0D, 0x13, + 0x0D, 0x10, 0x11, 0x14, + 0x0D, 0x10, 0x13, 0x14, + 0x0D, 0x0E, 0x13, 0x14, + 0x0D, 0x0E, 0x11, 0x14, + 0x0D, 0x10, 0x12, 0x15, + 0x0D, 0x11, 0x13, 0x15, + 0x0D, 0x0F, 0x13, 0x15, + 0x0D, 0x0F, 0x11, 0x15, + 0x0D, 0x15, 0x15, 0x15, + 0x0D, 0x0D, 0x15, 0x15, + 0x0D, 0x0D, 0x0D, 0x15, + 0x0D, 0x11, 0x14, 0x18, + 0x0D, 0x12, 0x16, 0x18, + 0x0D, 0x0F, 0x16, 0x18, + 0x0D, 0x0F, 0x13, 0x18, + 0x0D, 0x18, 0x18, 0x18, + 0x0D, 0x0D, 0x18, 0x18, + 0x0D, 0x0D, 0x0D, 0x18, + 0x0D, 0x12, 0x16, 0x1B, + 0x0D, 0x14, 0x18, 0x1B, + 0x0D, 0x10, 0x18, 0x1B, + 0x0D, 0x10, 0x14, 0x1B, + 0x0D, 0x1B, 0x1B, 0x1B, + 0x0D, 0x0D, 0x1B, 0x1B, + 0x0D, 0x0D, 0x0D, 0x1B, + 0x0D, 0x13, 0x18, 0x1E, + 0x0D, 0x15, 0x1A, 0x1E, + 0x0D, 0x11, 0x1A, 0x1E, + 0x0D, 0x11, 0x16, 0x1E, + 0x0D, 0x1E, 0x1E, 0x1E, + 0x0D, 0x0D, 0x1E, 0x1E, + 0x0D, 0x0D, 0x0D, 0x1E, + 0x0D, 0x14, 0x1A, 0x21, + 0x0D, 0x17, 0x1C, 0x21, + 0x0D, 0x12, 0x1C, 0x21, + 0x0D, 0x12, 0x17, 0x21, + 0x0D, 0x21, 0x21, 0x21, + 0x0D, 0x0D, 0x21, 0x21, + 0x0D, 0x0D, 0x0D, 0x21, + 0x0D, 0x18, 0x1F, 0x24, + 0x0D, 0x12, 0x1F, 0x24, + 0x0D, 0x12, 0x19, 0x24, + 0x0D, 0x24, 0x24, 0x24, + 0x0D, 0x0D, 0x24, 0x24, + 0x0D, 0x0D, 0x0D, 0x24, + 0x0D, 0x1A, 0x21, 0x27, + 0x0D, 0x13, 0x21, 0x27, + 0x0D, 0x13, 0x1A, 0x27, + 0x0D, 0x27, 0x27, 0x27, + 0x0D, 0x0D, 0x27, 0x27, + 0x0D, 0x0D, 0x0D, 0x27, + 0x0D, 0x1B, 0x23, 0x2A, + 0x0D, 0x14, 0x23, 0x2A, + 0x0D, 0x14, 0x1C, 0x2A, + 0x0D, 0x2A, 0x2A, 0x2A, + 0x0D, 0x0D, 0x2A, 0x2A, + 0x0D, 0x0D, 0x0D, 0x2A, + 0x0D, 0x1D, 0x25, 0x2D, + 0x0D, 0x15, 0x25, 0x2D, + 0x0D, 0x15, 0x1D, 0x2D, + 0x0D, 0x2D, 0x2D, 0x2D, + 0x0D, 0x0D, 0x2D, 0x2D, + 0x0D, 0x0D, 0x0D, 0x2D, + 0x0D, 0x30, 0x30, 0x30, + 0x0D, 0x0D, 0x30, 0x30, + 0x0D, 0x0D, 0x0D, 0x30, + 0x0D, 0x1F, 0x28, 0x31, + 0x0D, 0x16, 0x28, 0x31, + 0x0D, 0x16, 0x1F, 0x31, + 0x0D, 0x35, 0x35, 0x35, + 0x0D, 0x0D, 0x35, 0x35, + 0x0D, 0x0D, 0x0D, 0x35, + 0x0D, 0x3B, 0x3B, 0x3B, + 0x0D, 0x0D, 0x3B, 0x3B, + 0x0D, 0x0D, 0x0D, 0x3B, + 0x0E, 0x0F, 0x0F, 0x10, + 0x0E, 0x0F, 0x10, 0x11, + 0x0E, 0x10, 0x11, 0x12, + 0x0E, 0x0F, 0x11, 0x12, + 0x0E, 0x0F, 0x10, 0x12, + 0x0E, 0x10, 0x11, 0x13, + 0x0E, 0x10, 0x12, 0x13, + 0x0E, 0x0F, 0x12, 0x13, + 0x0E, 0x0F, 0x11, 0x13, + 0x0E, 0x10, 0x12, 0x14, + 0x0E, 0x11, 0x13, 0x14, + 0x0E, 0x0F, 0x13, 0x14, + 0x0E, 0x0F, 0x11, 0x14, + 0x0E, 0x14, 0x14, 0x14, + 0x0E, 0x0E, 0x14, 0x14, + 0x0E, 0x0E, 0x0E, 0x14, + 0x0E, 0x11, 0x12, 0x15, + 0x0E, 0x11, 0x14, 0x15, + 0x0E, 0x0F, 0x14, 0x15, + 0x0E, 0x0F, 0x12, 0x15, + 0x0E, 0x11, 0x13, 0x16, + 0x0E, 0x12, 0x14, 0x16, + 0x0E, 0x10, 0x14, 0x16, + 0x0E, 0x10, 0x12, 0x16, + 0x0E, 0x16, 0x16, 0x16, + 0x0E, 0x0E, 0x16, 0x16, + 0x0E, 0x0E, 0x0E, 0x16, + 0x0E, 0x12, 0x15, 0x19, + 0x0E, 0x13, 0x17, 0x19, + 0x0E, 0x10, 0x17, 0x19, + 0x0E, 0x10, 0x14, 0x19, + 0x0E, 0x19, 0x19, 0x19, + 0x0E, 0x0E, 0x19, 0x19, + 0x0E, 0x0E, 0x0E, 0x19, + 0x0E, 0x13, 0x17, 0x1C, + 0x0E, 0x15, 0x19, 0x1C, + 0x0E, 0x11, 0x19, 0x1C, + 0x0E, 0x11, 0x15, 0x1C, + 0x0E, 0x1C, 0x1C, 0x1C, + 0x0E, 0x0E, 0x1C, 0x1C, + 0x0E, 0x0E, 0x0E, 0x1C, + 0x0E, 0x14, 0x19, 0x1F, + 0x0E, 0x16, 0x1B, 0x1F, + 0x0E, 0x12, 0x1B, 0x1F, + 0x0E, 0x12, 0x17, 0x1F, + 0x0E, 0x1F, 0x1F, 0x1F, + 0x0E, 0x0E, 0x1F, 0x1F, + 0x0E, 0x0E, 0x0E, 0x1F, + 0x0E, 0x15, 0x1B, 0x22, + 0x0E, 0x18, 0x1D, 0x22, + 0x0E, 0x13, 0x1D, 0x22, + 0x0E, 0x13, 0x18, 0x22, + 0x0E, 0x22, 0x22, 0x22, + 0x0E, 0x0E, 0x22, 0x22, + 0x0E, 0x0E, 0x0E, 0x22, + 0x0E, 0x19, 0x20, 0x25, + 0x0E, 0x13, 0x20, 0x25, + 0x0E, 0x13, 0x1A, 0x25, + 0x0E, 0x25, 0x25, 0x25, + 0x0E, 0x0E, 0x25, 0x25, + 0x0E, 0x0E, 0x0E, 0x25, + 0x0E, 0x1B, 0x22, 0x28, + 0x0E, 0x14, 0x22, 0x28, + 0x0E, 0x14, 0x1B, 0x28, + 0x0E, 0x28, 0x28, 0x28, + 0x0E, 0x0E, 0x28, 0x28, + 0x0E, 0x0E, 0x0E, 0x28, + 0x0E, 0x1C, 0x24, 0x2B, + 0x0E, 0x15, 0x24, 0x2B, + 0x0E, 0x15, 0x1D, 0x2B, + 0x0E, 0x2B, 0x2B, 0x2B, + 0x0E, 0x0E, 0x2B, 0x2B, + 0x0E, 0x0E, 0x0E, 0x2B, + 0x0E, 0x1E, 0x26, 0x2E, + 0x0E, 0x16, 0x26, 0x2E, + 0x0E, 0x16, 0x1E, 0x2E, + 0x0E, 0x2E, 0x2E, 0x2E, + 0x0E, 0x0E, 0x2E, 0x2E, + 0x0E, 0x0E, 0x0E, 0x2E, + 0x0E, 0x31, 0x31, 0x31, + 0x0E, 0x0E, 0x31, 0x31, + 0x0E, 0x0E, 0x0E, 0x31, + 0x0E, 0x20, 0x29, 0x32, + 0x0E, 0x17, 0x29, 0x32, + 0x0E, 0x17, 0x20, 0x32, + 0x0E, 0x36, 0x36, 0x36, + 0x0E, 0x0E, 0x36, 0x36, + 0x0E, 0x0E, 0x0E, 0x36, + 0x0E, 0x3C, 0x3C, 0x3C, + 0x0E, 0x0E, 0x3C, 0x3C, + 0x0E, 0x0E, 0x0E, 0x3C, + 0x0F, 0x10, 0x10, 0x11, + 0x0F, 0x10, 0x11, 0x12, + 0x0F, 0x11, 0x12, 0x13, + 0x0F, 0x10, 0x12, 0x13, + 0x0F, 0x10, 0x11, 0x13, + 0x0F, 0x11, 0x12, 0x14, + 0x0F, 0x11, 0x13, 0x14, + 0x0F, 0x10, 0x13, 0x14, + 0x0F, 0x10, 0x12, 0x14, + 0x0F, 0x11, 0x13, 0x15, + 0x0F, 0x12, 0x14, 0x15, + 0x0F, 0x10, 0x14, 0x15, + 0x0F, 0x10, 0x12, 0x15, + 0x0F, 0x15, 0x15, 0x15, + 0x0F, 0x0F, 0x15, 0x15, + 0x0F, 0x0F, 0x0F, 0x15, + 0x0F, 0x12, 0x13, 0x16, + 0x0F, 0x12, 0x15, 0x16, + 0x0F, 0x10, 0x15, 0x16, + 0x0F, 0x10, 0x13, 0x16, + 0x0F, 0x12, 0x14, 0x17, + 0x0F, 0x13, 0x15, 0x17, + 0x0F, 0x11, 0x15, 0x17, + 0x0F, 0x11, 0x13, 0x17, + 0x0F, 0x17, 0x17, 0x17, + 0x0F, 0x0F, 0x17, 0x17, + 0x0F, 0x0F, 0x0F, 0x17, + 0x0F, 0x13, 0x16, 0x1A, + 0x0F, 0x14, 0x18, 0x1A, + 0x0F, 0x11, 0x18, 0x1A, + 0x0F, 0x11, 0x15, 0x1A, + 0x0F, 0x1A, 0x1A, 0x1A, + 0x0F, 0x0F, 0x1A, 0x1A, + 0x0F, 0x0F, 0x0F, 0x1A, + 0x0F, 0x14, 0x18, 0x1D, + 0x0F, 0x16, 0x1A, 0x1D, + 0x0F, 0x12, 0x1A, 0x1D, + 0x0F, 0x12, 0x16, 0x1D, + 0x0F, 0x1D, 0x1D, 0x1D, + 0x0F, 0x0F, 0x1D, 0x1D, + 0x0F, 0x0F, 0x0F, 0x1D, + 0x0F, 0x15, 0x1A, 0x20, + 0x0F, 0x17, 0x1C, 0x20, + 0x0F, 0x13, 0x1C, 0x20, + 0x0F, 0x13, 0x18, 0x20, + 0x0F, 0x20, 0x20, 0x20, + 0x0F, 0x0F, 0x20, 0x20, + 0x0F, 0x0F, 0x0F, 0x20, + 0x0F, 0x16, 0x1C, 0x23, + 0x0F, 0x19, 0x1E, 0x23, + 0x0F, 0x14, 0x1E, 0x23, + 0x0F, 0x14, 0x19, 0x23, + 0x0F, 0x23, 0x23, 0x23, + 0x0F, 0x0F, 0x23, 0x23, + 0x0F, 0x0F, 0x0F, 0x23, + 0x0F, 0x1A, 0x21, 0x26, + 0x0F, 0x14, 0x21, 0x26, + 0x0F, 0x14, 0x1B, 0x26, + 0x0F, 0x26, 0x26, 0x26, + 0x0F, 0x0F, 0x26, 0x26, + 0x0F, 0x0F, 0x0F, 0x26, + 0x0F, 0x1C, 0x23, 0x29, + 0x0F, 0x15, 0x23, 0x29, + 0x0F, 0x15, 0x1C, 0x29, + 0x0F, 0x29, 0x29, 0x29, + 0x0F, 0x0F, 0x29, 0x29, + 0x0F, 0x0F, 0x0F, 0x29, + 0x0F, 0x1D, 0x25, 0x2C, + 0x0F, 0x16, 0x25, 0x2C, + 0x0F, 0x16, 0x1E, 0x2C, + 0x0F, 0x2C, 0x2C, 0x2C, + 0x0F, 0x0F, 0x2C, 0x2C, + 0x0F, 0x0F, 0x0F, 0x2C, + 0x0F, 0x1F, 0x27, 0x2F, + 0x0F, 0x17, 0x27, 0x2F, + 0x0F, 0x17, 0x1F, 0x2F, + 0x0F, 0x2F, 0x2F, 0x2F, + 0x0F, 0x0F, 0x2F, 0x2F, + 0x0F, 0x0F, 0x0F, 0x2F, + 0x0F, 0x32, 0x32, 0x32, + 0x0F, 0x0F, 0x32, 0x32, + 0x0F, 0x0F, 0x0F, 0x32, + 0x0F, 0x21, 0x2A, 0x33, + 0x0F, 0x18, 0x2A, 0x33, + 0x0F, 0x18, 0x21, 0x33, + 0x0F, 0x37, 0x37, 0x37, + 0x0F, 0x0F, 0x37, 0x37, + 0x0F, 0x0F, 0x0F, 0x37, + 0x0F, 0x3D, 0x3D, 0x3D, + 0x0F, 0x0F, 0x3D, 0x3D, + 0x0F, 0x0F, 0x0F, 0x3D, + 0x10, 0x11, 0x11, 0x12, + 0x10, 0x11, 0x12, 0x13, + 0x10, 0x12, 0x13, 0x14, + 0x10, 0x11, 0x13, 0x14, + 0x10, 0x11, 0x12, 0x14, + 0x10, 0x12, 0x13, 0x15, + 0x10, 0x12, 0x14, 0x15, + 0x10, 0x11, 0x14, 0x15, + 0x10, 0x11, 0x13, 0x15, + 0x10, 0x12, 0x14, 0x16, + 0x10, 0x13, 0x15, 0x16, + 0x10, 0x11, 0x15, 0x16, + 0x10, 0x11, 0x13, 0x16, + 0x10, 0x16, 0x16, 0x16, + 0x10, 0x10, 0x16, 0x16, + 0x10, 0x10, 0x10, 0x16, + 0x10, 0x13, 0x14, 0x17, + 0x10, 0x13, 0x16, 0x17, + 0x10, 0x11, 0x16, 0x17, + 0x10, 0x11, 0x14, 0x17, + 0x10, 0x13, 0x15, 0x18, + 0x10, 0x14, 0x16, 0x18, + 0x10, 0x12, 0x16, 0x18, + 0x10, 0x12, 0x14, 0x18, + 0x10, 0x18, 0x18, 0x18, + 0x10, 0x10, 0x18, 0x18, + 0x10, 0x10, 0x10, 0x18, + 0x10, 0x14, 0x17, 0x1B, + 0x10, 0x15, 0x19, 0x1B, + 0x10, 0x12, 0x19, 0x1B, + 0x10, 0x12, 0x16, 0x1B, + 0x10, 0x1B, 0x1B, 0x1B, + 0x10, 0x10, 0x1B, 0x1B, + 0x10, 0x10, 0x10, 0x1B, + 0x10, 0x15, 0x19, 0x1E, + 0x10, 0x17, 0x1B, 0x1E, + 0x10, 0x13, 0x1B, 0x1E, + 0x10, 0x13, 0x17, 0x1E, + 0x10, 0x1E, 0x1E, 0x1E, + 0x10, 0x10, 0x1E, 0x1E, + 0x10, 0x10, 0x10, 0x1E, + 0x10, 0x16, 0x1B, 0x21, + 0x10, 0x18, 0x1D, 0x21, + 0x10, 0x14, 0x1D, 0x21, + 0x10, 0x14, 0x19, 0x21, + 0x10, 0x21, 0x21, 0x21, + 0x10, 0x10, 0x21, 0x21, + 0x10, 0x10, 0x10, 0x21, + 0x10, 0x17, 0x1D, 0x24, + 0x10, 0x1A, 0x1F, 0x24, + 0x10, 0x15, 0x1F, 0x24, + 0x10, 0x15, 0x1A, 0x24, + 0x10, 0x24, 0x24, 0x24, + 0x10, 0x10, 0x24, 0x24, + 0x10, 0x10, 0x10, 0x24, + 0x10, 0x1B, 0x22, 0x27, + 0x10, 0x15, 0x22, 0x27, + 0x10, 0x15, 0x1C, 0x27, + 0x10, 0x27, 0x27, 0x27, + 0x10, 0x10, 0x27, 0x27, + 0x10, 0x10, 0x10, 0x27, + 0x10, 0x1D, 0x24, 0x2A, + 0x10, 0x16, 0x24, 0x2A, + 0x10, 0x16, 0x1D, 0x2A, + 0x10, 0x2A, 0x2A, 0x2A, + 0x10, 0x10, 0x2A, 0x2A, + 0x10, 0x10, 0x10, 0x2A, + 0x10, 0x1E, 0x26, 0x2D, + 0x10, 0x17, 0x26, 0x2D, + 0x10, 0x17, 0x1F, 0x2D, + 0x10, 0x2D, 0x2D, 0x2D, + 0x10, 0x10, 0x2D, 0x2D, + 0x10, 0x10, 0x10, 0x2D, + 0x10, 0x20, 0x28, 0x30, + 0x10, 0x18, 0x28, 0x30, + 0x10, 0x18, 0x20, 0x30, + 0x10, 0x30, 0x30, 0x30, + 0x10, 0x10, 0x30, 0x30, + 0x10, 0x10, 0x10, 0x30, + 0x10, 0x33, 0x33, 0x33, + 0x10, 0x10, 0x33, 0x33, + 0x10, 0x10, 0x10, 0x33, + 0x10, 0x22, 0x2B, 0x34, + 0x10, 0x19, 0x2B, 0x34, + 0x10, 0x19, 0x22, 0x34, + 0x10, 0x38, 0x38, 0x38, + 0x10, 0x10, 0x38, 0x38, + 0x10, 0x10, 0x10, 0x38, + 0x10, 0x3E, 0x3E, 0x3E, + 0x10, 0x10, 0x3E, 0x3E, + 0x10, 0x10, 0x10, 0x3E, + 0x11, 0x12, 0x12, 0x13, + 0x11, 0x12, 0x13, 0x14, + 0x11, 0x13, 0x14, 0x15, + 0x11, 0x12, 0x14, 0x15, + 0x11, 0x12, 0x13, 0x15, + 0x11, 0x13, 0x14, 0x16, + 0x11, 0x13, 0x15, 0x16, + 0x11, 0x12, 0x15, 0x16, + 0x11, 0x12, 0x14, 0x16, + 0x11, 0x13, 0x15, 0x17, + 0x11, 0x14, 0x16, 0x17, + 0x11, 0x12, 0x16, 0x17, + 0x11, 0x12, 0x14, 0x17, + 0x11, 0x17, 0x17, 0x17, + 0x11, 0x11, 0x17, 0x17, + 0x11, 0x11, 0x11, 0x17, + 0x11, 0x14, 0x15, 0x18, + 0x11, 0x14, 0x17, 0x18, + 0x11, 0x12, 0x17, 0x18, + 0x11, 0x12, 0x15, 0x18, + 0x11, 0x14, 0x16, 0x19, + 0x11, 0x15, 0x17, 0x19, + 0x11, 0x13, 0x17, 0x19, + 0x11, 0x13, 0x15, 0x19, + 0x11, 0x19, 0x19, 0x19, + 0x11, 0x11, 0x19, 0x19, + 0x11, 0x11, 0x11, 0x19, + 0x11, 0x15, 0x18, 0x1C, + 0x11, 0x16, 0x1A, 0x1C, + 0x11, 0x13, 0x1A, 0x1C, + 0x11, 0x13, 0x17, 0x1C, + 0x11, 0x1C, 0x1C, 0x1C, + 0x11, 0x11, 0x1C, 0x1C, + 0x11, 0x11, 0x11, 0x1C, + 0x11, 0x16, 0x1A, 0x1F, + 0x11, 0x18, 0x1C, 0x1F, + 0x11, 0x14, 0x1C, 0x1F, + 0x11, 0x14, 0x18, 0x1F, + 0x11, 0x1F, 0x1F, 0x1F, + 0x11, 0x11, 0x1F, 0x1F, + 0x11, 0x11, 0x11, 0x1F, + 0x11, 0x17, 0x1C, 0x22, + 0x11, 0x19, 0x1E, 0x22, + 0x11, 0x15, 0x1E, 0x22, + 0x11, 0x15, 0x1A, 0x22, + 0x11, 0x22, 0x22, 0x22, + 0x11, 0x11, 0x22, 0x22, + 0x11, 0x11, 0x11, 0x22, + 0x11, 0x18, 0x1E, 0x25, + 0x11, 0x1B, 0x20, 0x25, + 0x11, 0x16, 0x20, 0x25, + 0x11, 0x16, 0x1B, 0x25, + 0x11, 0x25, 0x25, 0x25, + 0x11, 0x11, 0x25, 0x25, + 0x11, 0x11, 0x11, 0x25, + 0x11, 0x1C, 0x23, 0x28, + 0x11, 0x16, 0x23, 0x28, + 0x11, 0x16, 0x1D, 0x28, + 0x11, 0x28, 0x28, 0x28, + 0x11, 0x11, 0x28, 0x28, + 0x11, 0x11, 0x11, 0x28, + 0x11, 0x1E, 0x25, 0x2B, + 0x11, 0x17, 0x25, 0x2B, + 0x11, 0x17, 0x1E, 0x2B, + 0x11, 0x2B, 0x2B, 0x2B, + 0x11, 0x11, 0x2B, 0x2B, + 0x11, 0x11, 0x11, 0x2B, + 0x11, 0x1F, 0x27, 0x2E, + 0x11, 0x18, 0x27, 0x2E, + 0x11, 0x18, 0x20, 0x2E, + 0x11, 0x2E, 0x2E, 0x2E, + 0x11, 0x11, 0x2E, 0x2E, + 0x11, 0x11, 0x11, 0x2E, + 0x11, 0x21, 0x29, 0x31, + 0x11, 0x19, 0x29, 0x31, + 0x11, 0x19, 0x21, 0x31, + 0x11, 0x31, 0x31, 0x31, + 0x11, 0x11, 0x31, 0x31, + 0x11, 0x11, 0x11, 0x31, + 0x11, 0x34, 0x34, 0x34, + 0x11, 0x11, 0x34, 0x34, + 0x11, 0x11, 0x11, 0x34, + 0x11, 0x23, 0x2C, 0x35, + 0x11, 0x1A, 0x2C, 0x35, + 0x11, 0x1A, 0x23, 0x35, + 0x11, 0x39, 0x39, 0x39, + 0x11, 0x11, 0x39, 0x39, + 0x11, 0x11, 0x11, 0x39, + 0x11, 0x3F, 0x3F, 0x3F, + 0x11, 0x11, 0x3F, 0x3F, + 0x11, 0x11, 0x11, 0x3F, + 0x12, 0x13, 0x13, 0x14, + 0x12, 0x13, 0x14, 0x15, + 0x12, 0x14, 0x15, 0x16, + 0x12, 0x13, 0x15, 0x16, + 0x12, 0x13, 0x14, 0x16, + 0x12, 0x14, 0x15, 0x17, + 0x12, 0x14, 0x16, 0x17, + 0x12, 0x13, 0x16, 0x17, + 0x12, 0x13, 0x15, 0x17, + 0x12, 0x14, 0x16, 0x18, + 0x12, 0x15, 0x17, 0x18, + 0x12, 0x13, 0x17, 0x18, + 0x12, 0x13, 0x15, 0x18, + 0x12, 0x18, 0x18, 0x18, + 0x12, 0x12, 0x18, 0x18, + 0x12, 0x12, 0x12, 0x18, + 0x12, 0x15, 0x16, 0x19, + 0x12, 0x15, 0x18, 0x19, + 0x12, 0x13, 0x18, 0x19, + 0x12, 0x13, 0x16, 0x19, + 0x12, 0x15, 0x17, 0x1A, + 0x12, 0x16, 0x18, 0x1A, + 0x12, 0x14, 0x18, 0x1A, + 0x12, 0x14, 0x16, 0x1A, + 0x12, 0x1A, 0x1A, 0x1A, + 0x12, 0x12, 0x1A, 0x1A, + 0x12, 0x12, 0x12, 0x1A, + 0x12, 0x16, 0x19, 0x1D, + 0x12, 0x17, 0x1B, 0x1D, + 0x12, 0x14, 0x1B, 0x1D, + 0x12, 0x14, 0x18, 0x1D, + 0x12, 0x1D, 0x1D, 0x1D, + 0x12, 0x12, 0x1D, 0x1D, + 0x12, 0x12, 0x12, 0x1D, + 0x12, 0x17, 0x1B, 0x20, + 0x12, 0x19, 0x1D, 0x20, + 0x12, 0x15, 0x1D, 0x20, + 0x12, 0x15, 0x19, 0x20, + 0x12, 0x20, 0x20, 0x20, + 0x12, 0x12, 0x20, 0x20, + 0x12, 0x12, 0x12, 0x20, + 0x12, 0x18, 0x1D, 0x23, + 0x12, 0x1A, 0x1F, 0x23, + 0x12, 0x16, 0x1F, 0x23, + 0x12, 0x16, 0x1B, 0x23, + 0x12, 0x23, 0x23, 0x23, + 0x12, 0x12, 0x23, 0x23, + 0x12, 0x12, 0x12, 0x23, + 0x12, 0x19, 0x1F, 0x26, + 0x12, 0x1C, 0x21, 0x26, + 0x12, 0x17, 0x21, 0x26, + 0x12, 0x17, 0x1C, 0x26, + 0x12, 0x26, 0x26, 0x26, + 0x12, 0x12, 0x26, 0x26, + 0x12, 0x12, 0x12, 0x26, + 0x12, 0x1D, 0x24, 0x29, + 0x12, 0x17, 0x24, 0x29, + 0x12, 0x17, 0x1E, 0x29, + 0x12, 0x29, 0x29, 0x29, + 0x12, 0x12, 0x29, 0x29, + 0x12, 0x12, 0x12, 0x29, + 0x12, 0x1F, 0x26, 0x2C, + 0x12, 0x18, 0x26, 0x2C, + 0x12, 0x18, 0x1F, 0x2C, + 0x12, 0x2C, 0x2C, 0x2C, + 0x12, 0x12, 0x2C, 0x2C, + 0x12, 0x12, 0x12, 0x2C, + 0x12, 0x20, 0x28, 0x2F, + 0x12, 0x19, 0x28, 0x2F, + 0x12, 0x19, 0x21, 0x2F, + 0x12, 0x2F, 0x2F, 0x2F, + 0x12, 0x12, 0x2F, 0x2F, + 0x12, 0x12, 0x12, 0x2F, + 0x12, 0x22, 0x2A, 0x32, + 0x12, 0x1A, 0x2A, 0x32, + 0x12, 0x1A, 0x22, 0x32, + 0x12, 0x32, 0x32, 0x32, + 0x12, 0x12, 0x32, 0x32, + 0x12, 0x12, 0x12, 0x32, + 0x12, 0x35, 0x35, 0x35, + 0x12, 0x12, 0x35, 0x35, + 0x12, 0x12, 0x12, 0x35, + 0x12, 0x24, 0x2D, 0x36, + 0x12, 0x1B, 0x2D, 0x36, + 0x12, 0x1B, 0x24, 0x36, + 0x12, 0x3A, 0x3A, 0x3A, + 0x12, 0x12, 0x3A, 0x3A, + 0x12, 0x12, 0x12, 0x3A, + 0x13, 0x14, 0x14, 0x15, + 0x13, 0x14, 0x15, 0x16, + 0x13, 0x15, 0x16, 0x17, + 0x13, 0x14, 0x16, 0x17, + 0x13, 0x14, 0x15, 0x17, + 0x13, 0x15, 0x16, 0x18, + 0x13, 0x15, 0x17, 0x18, + 0x13, 0x14, 0x17, 0x18, + 0x13, 0x14, 0x16, 0x18, + 0x13, 0x15, 0x17, 0x19, + 0x13, 0x16, 0x18, 0x19, + 0x13, 0x14, 0x18, 0x19, + 0x13, 0x14, 0x16, 0x19, + 0x13, 0x19, 0x19, 0x19, + 0x13, 0x13, 0x19, 0x19, + 0x13, 0x13, 0x13, 0x19, + 0x13, 0x16, 0x17, 0x1A, + 0x13, 0x16, 0x19, 0x1A, + 0x13, 0x14, 0x19, 0x1A, + 0x13, 0x14, 0x17, 0x1A, + 0x13, 0x16, 0x18, 0x1B, + 0x13, 0x17, 0x19, 0x1B, + 0x13, 0x15, 0x19, 0x1B, + 0x13, 0x15, 0x17, 0x1B, + 0x13, 0x1B, 0x1B, 0x1B, + 0x13, 0x13, 0x1B, 0x1B, + 0x13, 0x13, 0x13, 0x1B, + 0x13, 0x17, 0x1A, 0x1E, + 0x13, 0x18, 0x1C, 0x1E, + 0x13, 0x15, 0x1C, 0x1E, + 0x13, 0x15, 0x19, 0x1E, + 0x13, 0x1E, 0x1E, 0x1E, + 0x13, 0x13, 0x1E, 0x1E, + 0x13, 0x13, 0x13, 0x1E, + 0x13, 0x18, 0x1C, 0x21, + 0x13, 0x1A, 0x1E, 0x21, + 0x13, 0x16, 0x1E, 0x21, + 0x13, 0x16, 0x1A, 0x21, + 0x13, 0x21, 0x21, 0x21, + 0x13, 0x13, 0x21, 0x21, + 0x13, 0x13, 0x13, 0x21, + 0x13, 0x19, 0x1E, 0x24, + 0x13, 0x1B, 0x20, 0x24, + 0x13, 0x17, 0x20, 0x24, + 0x13, 0x17, 0x1C, 0x24, + 0x13, 0x24, 0x24, 0x24, + 0x13, 0x13, 0x24, 0x24, + 0x13, 0x13, 0x13, 0x24, + 0x13, 0x1A, 0x20, 0x27, + 0x13, 0x1D, 0x22, 0x27, + 0x13, 0x18, 0x22, 0x27, + 0x13, 0x18, 0x1D, 0x27, + 0x13, 0x27, 0x27, 0x27, + 0x13, 0x13, 0x27, 0x27, + 0x13, 0x13, 0x13, 0x27, + 0x13, 0x1E, 0x25, 0x2A, + 0x13, 0x18, 0x25, 0x2A, + 0x13, 0x18, 0x1F, 0x2A, + 0x13, 0x2A, 0x2A, 0x2A, + 0x13, 0x13, 0x2A, 0x2A, + 0x13, 0x13, 0x13, 0x2A, + 0x13, 0x20, 0x27, 0x2D, + 0x13, 0x19, 0x27, 0x2D, + 0x13, 0x19, 0x20, 0x2D, + 0x13, 0x2D, 0x2D, 0x2D, + 0x13, 0x13, 0x2D, 0x2D, + 0x13, 0x13, 0x13, 0x2D, + 0x13, 0x21, 0x29, 0x30, + 0x13, 0x1A, 0x29, 0x30, + 0x13, 0x1A, 0x22, 0x30, + 0x13, 0x30, 0x30, 0x30, + 0x13, 0x13, 0x30, 0x30, + 0x13, 0x13, 0x13, 0x30, + 0x13, 0x23, 0x2B, 0x33, + 0x13, 0x1B, 0x2B, 0x33, + 0x13, 0x1B, 0x23, 0x33, + 0x13, 0x33, 0x33, 0x33, + 0x13, 0x13, 0x33, 0x33, + 0x13, 0x13, 0x13, 0x33, + 0x13, 0x36, 0x36, 0x36, + 0x13, 0x13, 0x36, 0x36, + 0x13, 0x13, 0x13, 0x36, + 0x13, 0x25, 0x2E, 0x37, + 0x13, 0x1C, 0x2E, 0x37, + 0x13, 0x1C, 0x25, 0x37, + 0x13, 0x3B, 0x3B, 0x3B, + 0x13, 0x13, 0x3B, 0x3B, + 0x13, 0x13, 0x13, 0x3B, + 0x14, 0x15, 0x15, 0x16, + 0x14, 0x15, 0x16, 0x17, + 0x14, 0x16, 0x17, 0x18, + 0x14, 0x15, 0x17, 0x18, + 0x14, 0x15, 0x16, 0x18, + 0x14, 0x16, 0x17, 0x19, + 0x14, 0x16, 0x18, 0x19, + 0x14, 0x15, 0x18, 0x19, + 0x14, 0x15, 0x17, 0x19, + 0x14, 0x16, 0x18, 0x1A, + 0x14, 0x17, 0x19, 0x1A, + 0x14, 0x15, 0x19, 0x1A, + 0x14, 0x15, 0x17, 0x1A, + 0x14, 0x1A, 0x1A, 0x1A, + 0x14, 0x14, 0x1A, 0x1A, + 0x14, 0x14, 0x14, 0x1A, + 0x14, 0x17, 0x18, 0x1B, + 0x14, 0x17, 0x1A, 0x1B, + 0x14, 0x15, 0x1A, 0x1B, + 0x14, 0x15, 0x18, 0x1B, + 0x14, 0x17, 0x19, 0x1C, + 0x14, 0x18, 0x1A, 0x1C, + 0x14, 0x16, 0x1A, 0x1C, + 0x14, 0x16, 0x18, 0x1C, + 0x14, 0x1C, 0x1C, 0x1C, + 0x14, 0x14, 0x1C, 0x1C, + 0x14, 0x14, 0x14, 0x1C, + 0x14, 0x18, 0x1B, 0x1F, + 0x14, 0x19, 0x1D, 0x1F, + 0x14, 0x16, 0x1D, 0x1F, + 0x14, 0x16, 0x1A, 0x1F, + 0x14, 0x1F, 0x1F, 0x1F, + 0x14, 0x14, 0x1F, 0x1F, + 0x14, 0x14, 0x14, 0x1F, + 0x14, 0x19, 0x1D, 0x22, + 0x14, 0x1B, 0x1F, 0x22, + 0x14, 0x17, 0x1F, 0x22, + 0x14, 0x17, 0x1B, 0x22, + 0x14, 0x22, 0x22, 0x22, + 0x14, 0x14, 0x22, 0x22, + 0x14, 0x14, 0x14, 0x22, + 0x14, 0x1A, 0x1F, 0x25, + 0x14, 0x1C, 0x21, 0x25, + 0x14, 0x18, 0x21, 0x25, + 0x14, 0x18, 0x1D, 0x25, + 0x14, 0x25, 0x25, 0x25, + 0x14, 0x14, 0x25, 0x25, + 0x14, 0x14, 0x14, 0x25, + 0x14, 0x1B, 0x21, 0x28, + 0x14, 0x1E, 0x23, 0x28, + 0x14, 0x19, 0x23, 0x28, + 0x14, 0x19, 0x1E, 0x28, + 0x14, 0x28, 0x28, 0x28, + 0x14, 0x14, 0x28, 0x28, + 0x14, 0x14, 0x14, 0x28, + 0x14, 0x1F, 0x26, 0x2B, + 0x14, 0x19, 0x26, 0x2B, + 0x14, 0x19, 0x20, 0x2B, + 0x14, 0x2B, 0x2B, 0x2B, + 0x14, 0x14, 0x2B, 0x2B, + 0x14, 0x14, 0x14, 0x2B, + 0x14, 0x21, 0x28, 0x2E, + 0x14, 0x1A, 0x28, 0x2E, + 0x14, 0x1A, 0x21, 0x2E, + 0x14, 0x2E, 0x2E, 0x2E, + 0x14, 0x14, 0x2E, 0x2E, + 0x14, 0x14, 0x14, 0x2E, + 0x14, 0x22, 0x2A, 0x31, + 0x14, 0x1B, 0x2A, 0x31, + 0x14, 0x1B, 0x23, 0x31, + 0x14, 0x31, 0x31, 0x31, + 0x14, 0x14, 0x31, 0x31, + 0x14, 0x14, 0x14, 0x31, + 0x14, 0x24, 0x2C, 0x34, + 0x14, 0x1C, 0x2C, 0x34, + 0x14, 0x1C, 0x24, 0x34, + 0x14, 0x34, 0x34, 0x34, + 0x14, 0x14, 0x34, 0x34, + 0x14, 0x14, 0x14, 0x34, + 0x14, 0x37, 0x37, 0x37, + 0x14, 0x14, 0x37, 0x37, + 0x14, 0x14, 0x14, 0x37, + 0x14, 0x26, 0x2F, 0x38, + 0x14, 0x1D, 0x2F, 0x38, + 0x14, 0x1D, 0x26, 0x38, + 0x14, 0x3C, 0x3C, 0x3C, + 0x14, 0x14, 0x3C, 0x3C, + 0x14, 0x14, 0x14, 0x3C, + 0x15, 0x16, 0x16, 0x17, + 0x15, 0x16, 0x17, 0x18, + 0x15, 0x17, 0x18, 0x19, + 0x15, 0x16, 0x18, 0x19, + 0x15, 0x16, 0x17, 0x19, + 0x15, 0x17, 0x18, 0x1A, + 0x15, 0x17, 0x19, 0x1A, + 0x15, 0x16, 0x19, 0x1A, + 0x15, 0x16, 0x18, 0x1A, + 0x15, 0x17, 0x19, 0x1B, + 0x15, 0x18, 0x1A, 0x1B, + 0x15, 0x16, 0x1A, 0x1B, + 0x15, 0x16, 0x18, 0x1B, + 0x15, 0x1B, 0x1B, 0x1B, + 0x15, 0x15, 0x1B, 0x1B, + 0x15, 0x15, 0x15, 0x1B, + 0x15, 0x18, 0x19, 0x1C, + 0x15, 0x18, 0x1B, 0x1C, + 0x15, 0x16, 0x1B, 0x1C, + 0x15, 0x16, 0x19, 0x1C, + 0x15, 0x18, 0x1A, 0x1D, + 0x15, 0x19, 0x1B, 0x1D, + 0x15, 0x17, 0x1B, 0x1D, + 0x15, 0x17, 0x19, 0x1D, + 0x15, 0x1D, 0x1D, 0x1D, + 0x15, 0x15, 0x1D, 0x1D, + 0x15, 0x15, 0x15, 0x1D, + 0x15, 0x19, 0x1C, 0x20, + 0x15, 0x1A, 0x1E, 0x20, + 0x15, 0x17, 0x1E, 0x20, + 0x15, 0x17, 0x1B, 0x20, + 0x15, 0x20, 0x20, 0x20, + 0x15, 0x15, 0x20, 0x20, + 0x15, 0x15, 0x15, 0x20, + 0x15, 0x1A, 0x1E, 0x23, + 0x15, 0x1C, 0x20, 0x23, + 0x15, 0x18, 0x20, 0x23, + 0x15, 0x18, 0x1C, 0x23, + 0x15, 0x23, 0x23, 0x23, + 0x15, 0x15, 0x23, 0x23, + 0x15, 0x15, 0x15, 0x23, + 0x15, 0x1B, 0x20, 0x26, + 0x15, 0x1D, 0x22, 0x26, + 0x15, 0x19, 0x22, 0x26, + 0x15, 0x19, 0x1E, 0x26, + 0x15, 0x26, 0x26, 0x26, + 0x15, 0x15, 0x26, 0x26, + 0x15, 0x15, 0x15, 0x26, + 0x15, 0x1C, 0x22, 0x29, + 0x15, 0x1F, 0x24, 0x29, + 0x15, 0x1A, 0x24, 0x29, + 0x15, 0x1A, 0x1F, 0x29, + 0x15, 0x29, 0x29, 0x29, + 0x15, 0x15, 0x29, 0x29, + 0x15, 0x15, 0x15, 0x29, + 0x15, 0x20, 0x27, 0x2C, + 0x15, 0x1A, 0x27, 0x2C, + 0x15, 0x1A, 0x21, 0x2C, + 0x15, 0x2C, 0x2C, 0x2C, + 0x15, 0x15, 0x2C, 0x2C, + 0x15, 0x15, 0x15, 0x2C, + 0x15, 0x22, 0x29, 0x2F, + 0x15, 0x1B, 0x29, 0x2F, + 0x15, 0x1B, 0x22, 0x2F, + 0x15, 0x2F, 0x2F, 0x2F, + 0x15, 0x15, 0x2F, 0x2F, + 0x15, 0x15, 0x15, 0x2F, + 0x15, 0x23, 0x2B, 0x32, + 0x15, 0x1C, 0x2B, 0x32, + 0x15, 0x1C, 0x24, 0x32, + 0x15, 0x32, 0x32, 0x32, + 0x15, 0x15, 0x32, 0x32, + 0x15, 0x15, 0x15, 0x32, + 0x15, 0x25, 0x2D, 0x35, + 0x15, 0x1D, 0x2D, 0x35, + 0x15, 0x1D, 0x25, 0x35, + 0x15, 0x35, 0x35, 0x35, + 0x15, 0x15, 0x35, 0x35, + 0x15, 0x15, 0x15, 0x35, + 0x15, 0x38, 0x38, 0x38, + 0x15, 0x15, 0x38, 0x38, + 0x15, 0x15, 0x15, 0x38, + 0x15, 0x27, 0x30, 0x39, + 0x15, 0x1E, 0x30, 0x39, + 0x15, 0x1E, 0x27, 0x39, + 0x15, 0x3D, 0x3D, 0x3D, + 0x15, 0x15, 0x3D, 0x3D, + 0x15, 0x15, 0x15, 0x3D, + 0x16, 0x17, 0x17, 0x18, + 0x16, 0x17, 0x18, 0x19, + 0x16, 0x18, 0x19, 0x1A, + 0x16, 0x17, 0x19, 0x1A, + 0x16, 0x17, 0x18, 0x1A, + 0x16, 0x18, 0x19, 0x1B, + 0x16, 0x18, 0x1A, 0x1B, + 0x16, 0x17, 0x1A, 0x1B, + 0x16, 0x17, 0x19, 0x1B, + 0x16, 0x18, 0x1A, 0x1C, + 0x16, 0x19, 0x1B, 0x1C, + 0x16, 0x17, 0x1B, 0x1C, + 0x16, 0x17, 0x19, 0x1C, + 0x16, 0x1C, 0x1C, 0x1C, + 0x16, 0x16, 0x1C, 0x1C, + 0x16, 0x16, 0x16, 0x1C, + 0x16, 0x19, 0x1A, 0x1D, + 0x16, 0x19, 0x1C, 0x1D, + 0x16, 0x17, 0x1C, 0x1D, + 0x16, 0x17, 0x1A, 0x1D, + 0x16, 0x19, 0x1B, 0x1E, + 0x16, 0x1A, 0x1C, 0x1E, + 0x16, 0x18, 0x1C, 0x1E, + 0x16, 0x18, 0x1A, 0x1E, + 0x16, 0x1E, 0x1E, 0x1E, + 0x16, 0x16, 0x1E, 0x1E, + 0x16, 0x16, 0x16, 0x1E, + 0x16, 0x1A, 0x1D, 0x21, + 0x16, 0x1B, 0x1F, 0x21, + 0x16, 0x18, 0x1F, 0x21, + 0x16, 0x18, 0x1C, 0x21, + 0x16, 0x21, 0x21, 0x21, + 0x16, 0x16, 0x21, 0x21, + 0x16, 0x16, 0x16, 0x21, + 0x16, 0x1B, 0x1F, 0x24, + 0x16, 0x1D, 0x21, 0x24, + 0x16, 0x19, 0x21, 0x24, + 0x16, 0x19, 0x1D, 0x24, + 0x16, 0x24, 0x24, 0x24, + 0x16, 0x16, 0x24, 0x24, + 0x16, 0x16, 0x16, 0x24, + 0x16, 0x1C, 0x21, 0x27, + 0x16, 0x1E, 0x23, 0x27, + 0x16, 0x1A, 0x23, 0x27, + 0x16, 0x1A, 0x1F, 0x27, + 0x16, 0x27, 0x27, 0x27, + 0x16, 0x16, 0x27, 0x27, + 0x16, 0x16, 0x16, 0x27, + 0x16, 0x1D, 0x23, 0x2A, + 0x16, 0x20, 0x25, 0x2A, + 0x16, 0x1B, 0x25, 0x2A, + 0x16, 0x1B, 0x20, 0x2A, + 0x16, 0x2A, 0x2A, 0x2A, + 0x16, 0x16, 0x2A, 0x2A, + 0x16, 0x16, 0x16, 0x2A, + 0x16, 0x21, 0x28, 0x2D, + 0x16, 0x1B, 0x28, 0x2D, + 0x16, 0x1B, 0x22, 0x2D, + 0x16, 0x2D, 0x2D, 0x2D, + 0x16, 0x16, 0x2D, 0x2D, + 0x16, 0x16, 0x16, 0x2D, + 0x16, 0x23, 0x2A, 0x30, + 0x16, 0x1C, 0x2A, 0x30, + 0x16, 0x1C, 0x23, 0x30, + 0x16, 0x30, 0x30, 0x30, + 0x16, 0x16, 0x30, 0x30, + 0x16, 0x16, 0x16, 0x30, + 0x16, 0x24, 0x2C, 0x33, + 0x16, 0x1D, 0x2C, 0x33, + 0x16, 0x1D, 0x25, 0x33, + 0x16, 0x33, 0x33, 0x33, + 0x16, 0x16, 0x33, 0x33, + 0x16, 0x16, 0x16, 0x33, + 0x16, 0x26, 0x2E, 0x36, + 0x16, 0x1E, 0x2E, 0x36, + 0x16, 0x1E, 0x26, 0x36, + 0x16, 0x36, 0x36, 0x36, + 0x16, 0x16, 0x36, 0x36, + 0x16, 0x16, 0x16, 0x36, + 0x16, 0x39, 0x39, 0x39, + 0x16, 0x16, 0x39, 0x39, + 0x16, 0x16, 0x16, 0x39, + 0x16, 0x28, 0x31, 0x3A, + 0x16, 0x1F, 0x31, 0x3A, + 0x16, 0x1F, 0x28, 0x3A, + 0x16, 0x3E, 0x3E, 0x3E, + 0x16, 0x16, 0x3E, 0x3E, + 0x16, 0x16, 0x16, 0x3E, + 0x17, 0x18, 0x18, 0x19, + 0x17, 0x18, 0x19, 0x1A, + 0x17, 0x19, 0x1A, 0x1B, + 0x17, 0x18, 0x1A, 0x1B, + 0x17, 0x18, 0x19, 0x1B, + 0x17, 0x19, 0x1A, 0x1C, + 0x17, 0x19, 0x1B, 0x1C, + 0x17, 0x18, 0x1B, 0x1C, + 0x17, 0x18, 0x1A, 0x1C, + 0x17, 0x19, 0x1B, 0x1D, + 0x17, 0x1A, 0x1C, 0x1D, + 0x17, 0x18, 0x1C, 0x1D, + 0x17, 0x18, 0x1A, 0x1D, + 0x17, 0x1D, 0x1D, 0x1D, + 0x17, 0x17, 0x1D, 0x1D, + 0x17, 0x17, 0x17, 0x1D, + 0x17, 0x1A, 0x1B, 0x1E, + 0x17, 0x1A, 0x1D, 0x1E, + 0x17, 0x18, 0x1D, 0x1E, + 0x17, 0x18, 0x1B, 0x1E, + 0x17, 0x1A, 0x1C, 0x1F, + 0x17, 0x1B, 0x1D, 0x1F, + 0x17, 0x19, 0x1D, 0x1F, + 0x17, 0x19, 0x1B, 0x1F, + 0x17, 0x1F, 0x1F, 0x1F, + 0x17, 0x17, 0x1F, 0x1F, + 0x17, 0x17, 0x17, 0x1F, + 0x17, 0x1B, 0x1E, 0x22, + 0x17, 0x1C, 0x20, 0x22, + 0x17, 0x19, 0x20, 0x22, + 0x17, 0x19, 0x1D, 0x22, + 0x17, 0x22, 0x22, 0x22, + 0x17, 0x17, 0x22, 0x22, + 0x17, 0x17, 0x17, 0x22, + 0x17, 0x1C, 0x20, 0x25, + 0x17, 0x1E, 0x22, 0x25, + 0x17, 0x1A, 0x22, 0x25, + 0x17, 0x1A, 0x1E, 0x25, + 0x17, 0x25, 0x25, 0x25, + 0x17, 0x17, 0x25, 0x25, + 0x17, 0x17, 0x17, 0x25, + 0x17, 0x1D, 0x22, 0x28, + 0x17, 0x1F, 0x24, 0x28, + 0x17, 0x1B, 0x24, 0x28, + 0x17, 0x1B, 0x20, 0x28, + 0x17, 0x28, 0x28, 0x28, + 0x17, 0x17, 0x28, 0x28, + 0x17, 0x17, 0x17, 0x28, + 0x17, 0x1E, 0x24, 0x2B, + 0x17, 0x21, 0x26, 0x2B, + 0x17, 0x1C, 0x26, 0x2B, + 0x17, 0x1C, 0x21, 0x2B, + 0x17, 0x2B, 0x2B, 0x2B, + 0x17, 0x17, 0x2B, 0x2B, + 0x17, 0x17, 0x17, 0x2B, + 0x17, 0x22, 0x29, 0x2E, + 0x17, 0x1C, 0x29, 0x2E, + 0x17, 0x1C, 0x23, 0x2E, + 0x17, 0x2E, 0x2E, 0x2E, + 0x17, 0x17, 0x2E, 0x2E, + 0x17, 0x17, 0x17, 0x2E, + 0x17, 0x24, 0x2B, 0x31, + 0x17, 0x1D, 0x2B, 0x31, + 0x17, 0x1D, 0x24, 0x31, + 0x17, 0x31, 0x31, 0x31, + 0x17, 0x17, 0x31, 0x31, + 0x17, 0x17, 0x17, 0x31, + 0x17, 0x25, 0x2D, 0x34, + 0x17, 0x1E, 0x2D, 0x34, + 0x17, 0x1E, 0x26, 0x34, + 0x17, 0x34, 0x34, 0x34, + 0x17, 0x17, 0x34, 0x34, + 0x17, 0x17, 0x17, 0x34, + 0x17, 0x27, 0x2F, 0x37, + 0x17, 0x1F, 0x2F, 0x37, + 0x17, 0x1F, 0x27, 0x37, + 0x17, 0x37, 0x37, 0x37, + 0x17, 0x17, 0x37, 0x37, + 0x17, 0x17, 0x17, 0x37, + 0x17, 0x3A, 0x3A, 0x3A, + 0x17, 0x17, 0x3A, 0x3A, + 0x17, 0x17, 0x17, 0x3A, + 0x17, 0x29, 0x32, 0x3B, + 0x17, 0x20, 0x32, 0x3B, + 0x17, 0x20, 0x29, 0x3B, + 0x17, 0x3F, 0x3F, 0x3F, + 0x17, 0x17, 0x3F, 0x3F, + 0x17, 0x17, 0x17, 0x3F, + 0x18, 0x19, 0x19, 0x1A, + 0x18, 0x19, 0x1A, 0x1B, + 0x18, 0x1A, 0x1B, 0x1C, + 0x18, 0x19, 0x1B, 0x1C, + 0x18, 0x19, 0x1A, 0x1C, + 0x18, 0x1A, 0x1B, 0x1D, + 0x18, 0x1A, 0x1C, 0x1D, + 0x18, 0x19, 0x1C, 0x1D, + 0x18, 0x19, 0x1B, 0x1D, + 0x18, 0x1A, 0x1C, 0x1E, + 0x18, 0x1B, 0x1D, 0x1E, + 0x18, 0x19, 0x1D, 0x1E, + 0x18, 0x19, 0x1B, 0x1E, + 0x18, 0x1E, 0x1E, 0x1E, + 0x18, 0x18, 0x1E, 0x1E, + 0x18, 0x18, 0x18, 0x1E, + 0x18, 0x1B, 0x1C, 0x1F, + 0x18, 0x1B, 0x1E, 0x1F, + 0x18, 0x19, 0x1E, 0x1F, + 0x18, 0x19, 0x1C, 0x1F, + 0x18, 0x1B, 0x1D, 0x20, + 0x18, 0x1C, 0x1E, 0x20, + 0x18, 0x1A, 0x1E, 0x20, + 0x18, 0x1A, 0x1C, 0x20, + 0x18, 0x20, 0x20, 0x20, + 0x18, 0x18, 0x20, 0x20, + 0x18, 0x18, 0x18, 0x20, + 0x18, 0x1C, 0x1F, 0x23, + 0x18, 0x1D, 0x21, 0x23, + 0x18, 0x1A, 0x21, 0x23, + 0x18, 0x1A, 0x1E, 0x23, + 0x18, 0x23, 0x23, 0x23, + 0x18, 0x18, 0x23, 0x23, + 0x18, 0x18, 0x18, 0x23, + 0x18, 0x1D, 0x21, 0x26, + 0x18, 0x1F, 0x23, 0x26, + 0x18, 0x1B, 0x23, 0x26, + 0x18, 0x1B, 0x1F, 0x26, + 0x18, 0x26, 0x26, 0x26, + 0x18, 0x18, 0x26, 0x26, + 0x18, 0x18, 0x18, 0x26, + 0x18, 0x1E, 0x23, 0x29, + 0x18, 0x20, 0x25, 0x29, + 0x18, 0x1C, 0x25, 0x29, + 0x18, 0x1C, 0x21, 0x29, + 0x18, 0x29, 0x29, 0x29, + 0x18, 0x18, 0x29, 0x29, + 0x18, 0x18, 0x18, 0x29, + 0x18, 0x1F, 0x25, 0x2C, + 0x18, 0x22, 0x27, 0x2C, + 0x18, 0x1D, 0x27, 0x2C, + 0x18, 0x1D, 0x22, 0x2C, + 0x18, 0x2C, 0x2C, 0x2C, + 0x18, 0x18, 0x2C, 0x2C, + 0x18, 0x18, 0x18, 0x2C, + 0x18, 0x23, 0x2A, 0x2F, + 0x18, 0x1D, 0x2A, 0x2F, + 0x18, 0x1D, 0x24, 0x2F, + 0x18, 0x2F, 0x2F, 0x2F, + 0x18, 0x18, 0x2F, 0x2F, + 0x18, 0x18, 0x18, 0x2F, + 0x18, 0x25, 0x2C, 0x32, + 0x18, 0x1E, 0x2C, 0x32, + 0x18, 0x1E, 0x25, 0x32, + 0x18, 0x32, 0x32, 0x32, + 0x18, 0x18, 0x32, 0x32, + 0x18, 0x18, 0x18, 0x32, + 0x18, 0x26, 0x2E, 0x35, + 0x18, 0x1F, 0x2E, 0x35, + 0x18, 0x1F, 0x27, 0x35, + 0x18, 0x35, 0x35, 0x35, + 0x18, 0x18, 0x35, 0x35, + 0x18, 0x18, 0x18, 0x35, + 0x18, 0x28, 0x30, 0x38, + 0x18, 0x20, 0x30, 0x38, + 0x18, 0x20, 0x28, 0x38, + 0x18, 0x38, 0x38, 0x38, + 0x18, 0x18, 0x38, 0x38, + 0x18, 0x18, 0x18, 0x38, + 0x18, 0x3B, 0x3B, 0x3B, + 0x18, 0x18, 0x3B, 0x3B, + 0x18, 0x18, 0x18, 0x3B, + 0x18, 0x2A, 0x33, 0x3C, + 0x18, 0x21, 0x33, 0x3C, + 0x18, 0x21, 0x2A, 0x3C, + 0x19, 0x1A, 0x1A, 0x1B, + 0x19, 0x1A, 0x1B, 0x1C, + 0x19, 0x1B, 0x1C, 0x1D, + 0x19, 0x1A, 0x1C, 0x1D, + 0x19, 0x1A, 0x1B, 0x1D, + 0x19, 0x1B, 0x1C, 0x1E, + 0x19, 0x1B, 0x1D, 0x1E, + 0x19, 0x1A, 0x1D, 0x1E, + 0x19, 0x1A, 0x1C, 0x1E, + 0x19, 0x1B, 0x1D, 0x1F, + 0x19, 0x1C, 0x1E, 0x1F, + 0x19, 0x1A, 0x1E, 0x1F, + 0x19, 0x1A, 0x1C, 0x1F, + 0x19, 0x1F, 0x1F, 0x1F, + 0x19, 0x19, 0x1F, 0x1F, + 0x19, 0x19, 0x19, 0x1F, + 0x19, 0x1C, 0x1D, 0x20, + 0x19, 0x1C, 0x1F, 0x20, + 0x19, 0x1A, 0x1F, 0x20, + 0x19, 0x1A, 0x1D, 0x20, + 0x19, 0x1C, 0x1E, 0x21, + 0x19, 0x1D, 0x1F, 0x21, + 0x19, 0x1B, 0x1F, 0x21, + 0x19, 0x1B, 0x1D, 0x21, + 0x19, 0x21, 0x21, 0x21, + 0x19, 0x19, 0x21, 0x21, + 0x19, 0x19, 0x19, 0x21, + 0x19, 0x1D, 0x20, 0x24, + 0x19, 0x1E, 0x22, 0x24, + 0x19, 0x1B, 0x22, 0x24, + 0x19, 0x1B, 0x1F, 0x24, + 0x19, 0x24, 0x24, 0x24, + 0x19, 0x19, 0x24, 0x24, + 0x19, 0x19, 0x19, 0x24, + 0x19, 0x1E, 0x22, 0x27, + 0x19, 0x20, 0x24, 0x27, + 0x19, 0x1C, 0x24, 0x27, + 0x19, 0x1C, 0x20, 0x27, + 0x19, 0x27, 0x27, 0x27, + 0x19, 0x19, 0x27, 0x27, + 0x19, 0x19, 0x19, 0x27, + 0x19, 0x1F, 0x24, 0x2A, + 0x19, 0x21, 0x26, 0x2A, + 0x19, 0x1D, 0x26, 0x2A, + 0x19, 0x1D, 0x22, 0x2A, + 0x19, 0x2A, 0x2A, 0x2A, + 0x19, 0x19, 0x2A, 0x2A, + 0x19, 0x19, 0x19, 0x2A, + 0x19, 0x20, 0x26, 0x2D, + 0x19, 0x23, 0x28, 0x2D, + 0x19, 0x1E, 0x28, 0x2D, + 0x19, 0x1E, 0x23, 0x2D, + 0x19, 0x2D, 0x2D, 0x2D, + 0x19, 0x19, 0x2D, 0x2D, + 0x19, 0x19, 0x19, 0x2D, + 0x19, 0x24, 0x2B, 0x30, + 0x19, 0x1E, 0x2B, 0x30, + 0x19, 0x1E, 0x25, 0x30, + 0x19, 0x30, 0x30, 0x30, + 0x19, 0x19, 0x30, 0x30, + 0x19, 0x19, 0x19, 0x30, + 0x19, 0x26, 0x2D, 0x33, + 0x19, 0x1F, 0x2D, 0x33, + 0x19, 0x1F, 0x26, 0x33, + 0x19, 0x33, 0x33, 0x33, + 0x19, 0x19, 0x33, 0x33, + 0x19, 0x19, 0x19, 0x33, + 0x19, 0x27, 0x2F, 0x36, + 0x19, 0x20, 0x2F, 0x36, + 0x19, 0x20, 0x28, 0x36, + 0x19, 0x36, 0x36, 0x36, + 0x19, 0x19, 0x36, 0x36, + 0x19, 0x19, 0x19, 0x36, + 0x19, 0x29, 0x31, 0x39, + 0x19, 0x21, 0x31, 0x39, + 0x19, 0x21, 0x29, 0x39, + 0x19, 0x39, 0x39, 0x39, + 0x19, 0x19, 0x39, 0x39, + 0x19, 0x19, 0x19, 0x39, + 0x19, 0x3C, 0x3C, 0x3C, + 0x19, 0x19, 0x3C, 0x3C, + 0x19, 0x19, 0x19, 0x3C, + 0x19, 0x2B, 0x34, 0x3D, + 0x19, 0x22, 0x34, 0x3D, + 0x19, 0x22, 0x2B, 0x3D, + 0x1A, 0x1B, 0x1B, 0x1C, + 0x1A, 0x1B, 0x1C, 0x1D, + 0x1A, 0x1C, 0x1D, 0x1E, + 0x1A, 0x1B, 0x1D, 0x1E, + 0x1A, 0x1B, 0x1C, 0x1E, + 0x1A, 0x1C, 0x1D, 0x1F, + 0x1A, 0x1C, 0x1E, 0x1F, + 0x1A, 0x1B, 0x1E, 0x1F, + 0x1A, 0x1B, 0x1D, 0x1F, + 0x1A, 0x1C, 0x1E, 0x20, + 0x1A, 0x1D, 0x1F, 0x20, + 0x1A, 0x1B, 0x1F, 0x20, + 0x1A, 0x1B, 0x1D, 0x20, + 0x1A, 0x20, 0x20, 0x20, + 0x1A, 0x1A, 0x20, 0x20, + 0x1A, 0x1A, 0x1A, 0x20, + 0x1A, 0x1D, 0x1E, 0x21, + 0x1A, 0x1D, 0x20, 0x21, + 0x1A, 0x1B, 0x20, 0x21, + 0x1A, 0x1B, 0x1E, 0x21, + 0x1A, 0x1D, 0x1F, 0x22, + 0x1A, 0x1E, 0x20, 0x22, + 0x1A, 0x1C, 0x20, 0x22, + 0x1A, 0x1C, 0x1E, 0x22, + 0x1A, 0x22, 0x22, 0x22, + 0x1A, 0x1A, 0x22, 0x22, + 0x1A, 0x1A, 0x1A, 0x22, + 0x1A, 0x1E, 0x21, 0x25, + 0x1A, 0x1F, 0x23, 0x25, + 0x1A, 0x1C, 0x23, 0x25, + 0x1A, 0x1C, 0x20, 0x25, + 0x1A, 0x25, 0x25, 0x25, + 0x1A, 0x1A, 0x25, 0x25, + 0x1A, 0x1A, 0x1A, 0x25, + 0x1A, 0x1F, 0x23, 0x28, + 0x1A, 0x21, 0x25, 0x28, + 0x1A, 0x1D, 0x25, 0x28, + 0x1A, 0x1D, 0x21, 0x28, + 0x1A, 0x28, 0x28, 0x28, + 0x1A, 0x1A, 0x28, 0x28, + 0x1A, 0x1A, 0x1A, 0x28, + 0x1A, 0x20, 0x25, 0x2B, + 0x1A, 0x22, 0x27, 0x2B, + 0x1A, 0x1E, 0x27, 0x2B, + 0x1A, 0x1E, 0x23, 0x2B, + 0x1A, 0x2B, 0x2B, 0x2B, + 0x1A, 0x1A, 0x2B, 0x2B, + 0x1A, 0x1A, 0x1A, 0x2B, + 0x1A, 0x21, 0x27, 0x2E, + 0x1A, 0x24, 0x29, 0x2E, + 0x1A, 0x1F, 0x29, 0x2E, + 0x1A, 0x1F, 0x24, 0x2E, + 0x1A, 0x2E, 0x2E, 0x2E, + 0x1A, 0x1A, 0x2E, 0x2E, + 0x1A, 0x1A, 0x1A, 0x2E, + 0x1A, 0x25, 0x2C, 0x31, + 0x1A, 0x1F, 0x2C, 0x31, + 0x1A, 0x1F, 0x26, 0x31, + 0x1A, 0x31, 0x31, 0x31, + 0x1A, 0x1A, 0x31, 0x31, + 0x1A, 0x1A, 0x1A, 0x31, + 0x1A, 0x27, 0x2E, 0x34, + 0x1A, 0x20, 0x2E, 0x34, + 0x1A, 0x20, 0x27, 0x34, + 0x1A, 0x34, 0x34, 0x34, + 0x1A, 0x1A, 0x34, 0x34, + 0x1A, 0x1A, 0x1A, 0x34, + 0x1A, 0x28, 0x30, 0x37, + 0x1A, 0x21, 0x30, 0x37, + 0x1A, 0x21, 0x29, 0x37, + 0x1A, 0x37, 0x37, 0x37, + 0x1A, 0x1A, 0x37, 0x37, + 0x1A, 0x1A, 0x1A, 0x37, + 0x1A, 0x2A, 0x32, 0x3A, + 0x1A, 0x22, 0x32, 0x3A, + 0x1A, 0x22, 0x2A, 0x3A, + 0x1A, 0x3A, 0x3A, 0x3A, + 0x1A, 0x1A, 0x3A, 0x3A, + 0x1A, 0x1A, 0x1A, 0x3A, + 0x1A, 0x3D, 0x3D, 0x3D, + 0x1A, 0x1A, 0x3D, 0x3D, + 0x1A, 0x1A, 0x1A, 0x3D, + 0x1A, 0x2C, 0x35, 0x3E, + 0x1A, 0x23, 0x35, 0x3E, + 0x1A, 0x23, 0x2C, 0x3E, + 0x1B, 0x1C, 0x1C, 0x1D, + 0x1B, 0x1C, 0x1D, 0x1E, + 0x1B, 0x1D, 0x1E, 0x1F, + 0x1B, 0x1C, 0x1E, 0x1F, + 0x1B, 0x1C, 0x1D, 0x1F, + 0x1B, 0x1D, 0x1E, 0x20, + 0x1B, 0x1D, 0x1F, 0x20, + 0x1B, 0x1C, 0x1F, 0x20, + 0x1B, 0x1C, 0x1E, 0x20, + 0x1B, 0x1D, 0x1F, 0x21, + 0x1B, 0x1E, 0x20, 0x21, + 0x1B, 0x1C, 0x20, 0x21, + 0x1B, 0x1C, 0x1E, 0x21, + 0x1B, 0x21, 0x21, 0x21, + 0x1B, 0x1B, 0x21, 0x21, + 0x1B, 0x1B, 0x1B, 0x21, + 0x1B, 0x1E, 0x1F, 0x22, + 0x1B, 0x1E, 0x21, 0x22, + 0x1B, 0x1C, 0x21, 0x22, + 0x1B, 0x1C, 0x1F, 0x22, + 0x1B, 0x1E, 0x20, 0x23, + 0x1B, 0x1F, 0x21, 0x23, + 0x1B, 0x1D, 0x21, 0x23, + 0x1B, 0x1D, 0x1F, 0x23, + 0x1B, 0x23, 0x23, 0x23, + 0x1B, 0x1B, 0x23, 0x23, + 0x1B, 0x1B, 0x1B, 0x23, + 0x1B, 0x1F, 0x22, 0x26, + 0x1B, 0x20, 0x24, 0x26, + 0x1B, 0x1D, 0x24, 0x26, + 0x1B, 0x1D, 0x21, 0x26, + 0x1B, 0x26, 0x26, 0x26, + 0x1B, 0x1B, 0x26, 0x26, + 0x1B, 0x1B, 0x1B, 0x26, + 0x1B, 0x20, 0x24, 0x29, + 0x1B, 0x22, 0x26, 0x29, + 0x1B, 0x1E, 0x26, 0x29, + 0x1B, 0x1E, 0x22, 0x29, + 0x1B, 0x29, 0x29, 0x29, + 0x1B, 0x1B, 0x29, 0x29, + 0x1B, 0x1B, 0x1B, 0x29, + 0x1B, 0x21, 0x26, 0x2C, + 0x1B, 0x23, 0x28, 0x2C, + 0x1B, 0x1F, 0x28, 0x2C, + 0x1B, 0x1F, 0x24, 0x2C, + 0x1B, 0x2C, 0x2C, 0x2C, + 0x1B, 0x1B, 0x2C, 0x2C, + 0x1B, 0x1B, 0x1B, 0x2C, + 0x1B, 0x22, 0x28, 0x2F, + 0x1B, 0x25, 0x2A, 0x2F, + 0x1B, 0x20, 0x2A, 0x2F, + 0x1B, 0x20, 0x25, 0x2F, + 0x1B, 0x2F, 0x2F, 0x2F, + 0x1B, 0x1B, 0x2F, 0x2F, + 0x1B, 0x1B, 0x1B, 0x2F, + 0x1B, 0x26, 0x2D, 0x32, + 0x1B, 0x20, 0x2D, 0x32, + 0x1B, 0x20, 0x27, 0x32, + 0x1B, 0x32, 0x32, 0x32, + 0x1B, 0x1B, 0x32, 0x32, + 0x1B, 0x1B, 0x1B, 0x32, + 0x1B, 0x28, 0x2F, 0x35, + 0x1B, 0x21, 0x2F, 0x35, + 0x1B, 0x21, 0x28, 0x35, + 0x1B, 0x35, 0x35, 0x35, + 0x1B, 0x1B, 0x35, 0x35, + 0x1B, 0x1B, 0x1B, 0x35, + 0x1B, 0x29, 0x31, 0x38, + 0x1B, 0x22, 0x31, 0x38, + 0x1B, 0x22, 0x2A, 0x38, + 0x1B, 0x38, 0x38, 0x38, + 0x1B, 0x1B, 0x38, 0x38, + 0x1B, 0x1B, 0x1B, 0x38, + 0x1B, 0x2B, 0x33, 0x3B, + 0x1B, 0x23, 0x33, 0x3B, + 0x1B, 0x23, 0x2B, 0x3B, + 0x1B, 0x3B, 0x3B, 0x3B, + 0x1B, 0x1B, 0x3B, 0x3B, + 0x1B, 0x1B, 0x1B, 0x3B, + 0x1B, 0x3E, 0x3E, 0x3E, + 0x1B, 0x1B, 0x3E, 0x3E, + 0x1B, 0x1B, 0x1B, 0x3E, + 0x1B, 0x2D, 0x36, 0x3F, + 0x1B, 0x24, 0x36, 0x3F, + 0x1B, 0x24, 0x2D, 0x3F, + 0x1C, 0x1D, 0x1D, 0x1E, + 0x1C, 0x1D, 0x1E, 0x1F, + 0x1C, 0x1E, 0x1F, 0x20, + 0x1C, 0x1D, 0x1F, 0x20, + 0x1C, 0x1D, 0x1E, 0x20, + 0x1C, 0x1E, 0x1F, 0x21, + 0x1C, 0x1E, 0x20, 0x21, + 0x1C, 0x1D, 0x20, 0x21, + 0x1C, 0x1D, 0x1F, 0x21, + 0x1C, 0x1E, 0x20, 0x22, + 0x1C, 0x1F, 0x21, 0x22, + 0x1C, 0x1D, 0x21, 0x22, + 0x1C, 0x1D, 0x1F, 0x22, + 0x1C, 0x22, 0x22, 0x22, + 0x1C, 0x1C, 0x22, 0x22, + 0x1C, 0x1C, 0x1C, 0x22, + 0x1C, 0x1F, 0x20, 0x23, + 0x1C, 0x1F, 0x22, 0x23, + 0x1C, 0x1D, 0x22, 0x23, + 0x1C, 0x1D, 0x20, 0x23, + 0x1C, 0x1F, 0x21, 0x24, + 0x1C, 0x20, 0x22, 0x24, + 0x1C, 0x1E, 0x22, 0x24, + 0x1C, 0x1E, 0x20, 0x24, + 0x1C, 0x24, 0x24, 0x24, + 0x1C, 0x1C, 0x24, 0x24, + 0x1C, 0x1C, 0x1C, 0x24, + 0x1C, 0x20, 0x23, 0x27, + 0x1C, 0x21, 0x25, 0x27, + 0x1C, 0x1E, 0x25, 0x27, + 0x1C, 0x1E, 0x22, 0x27, + 0x1C, 0x27, 0x27, 0x27, + 0x1C, 0x1C, 0x27, 0x27, + 0x1C, 0x1C, 0x1C, 0x27, + 0x1C, 0x21, 0x25, 0x2A, + 0x1C, 0x23, 0x27, 0x2A, + 0x1C, 0x1F, 0x27, 0x2A, + 0x1C, 0x1F, 0x23, 0x2A, + 0x1C, 0x2A, 0x2A, 0x2A, + 0x1C, 0x1C, 0x2A, 0x2A, + 0x1C, 0x1C, 0x1C, 0x2A, + 0x1C, 0x22, 0x27, 0x2D, + 0x1C, 0x24, 0x29, 0x2D, + 0x1C, 0x20, 0x29, 0x2D, + 0x1C, 0x20, 0x25, 0x2D, + 0x1C, 0x2D, 0x2D, 0x2D, + 0x1C, 0x1C, 0x2D, 0x2D, + 0x1C, 0x1C, 0x1C, 0x2D, + 0x1C, 0x23, 0x29, 0x30, + 0x1C, 0x26, 0x2B, 0x30, + 0x1C, 0x21, 0x2B, 0x30, + 0x1C, 0x21, 0x26, 0x30, + 0x1C, 0x30, 0x30, 0x30, + 0x1C, 0x1C, 0x30, 0x30, + 0x1C, 0x1C, 0x1C, 0x30, + 0x1C, 0x27, 0x2E, 0x33, + 0x1C, 0x21, 0x2E, 0x33, + 0x1C, 0x21, 0x28, 0x33, + 0x1C, 0x33, 0x33, 0x33, + 0x1C, 0x1C, 0x33, 0x33, + 0x1C, 0x1C, 0x1C, 0x33, + 0x1C, 0x29, 0x30, 0x36, + 0x1C, 0x22, 0x30, 0x36, + 0x1C, 0x22, 0x29, 0x36, + 0x1C, 0x36, 0x36, 0x36, + 0x1C, 0x1C, 0x36, 0x36, + 0x1C, 0x1C, 0x1C, 0x36, + 0x1C, 0x2A, 0x32, 0x39, + 0x1C, 0x23, 0x32, 0x39, + 0x1C, 0x23, 0x2B, 0x39, + 0x1C, 0x39, 0x39, 0x39, + 0x1C, 0x1C, 0x39, 0x39, + 0x1C, 0x1C, 0x1C, 0x39, + 0x1C, 0x2C, 0x34, 0x3C, + 0x1C, 0x24, 0x34, 0x3C, + 0x1C, 0x24, 0x2C, 0x3C, + 0x1C, 0x3C, 0x3C, 0x3C, + 0x1C, 0x1C, 0x3C, 0x3C, + 0x1C, 0x1C, 0x1C, 0x3C, + 0x1C, 0x3F, 0x3F, 0x3F, + 0x1C, 0x1C, 0x3F, 0x3F, + 0x1C, 0x1C, 0x1C, 0x3F, + 0x1D, 0x1E, 0x1E, 0x1F, + 0x1D, 0x1E, 0x1F, 0x20, + 0x1D, 0x1F, 0x20, 0x21, + 0x1D, 0x1E, 0x20, 0x21, + 0x1D, 0x1E, 0x1F, 0x21, + 0x1D, 0x1F, 0x20, 0x22, + 0x1D, 0x1F, 0x21, 0x22, + 0x1D, 0x1E, 0x21, 0x22, + 0x1D, 0x1E, 0x20, 0x22, + 0x1D, 0x1F, 0x21, 0x23, + 0x1D, 0x20, 0x22, 0x23, + 0x1D, 0x1E, 0x22, 0x23, + 0x1D, 0x1E, 0x20, 0x23, + 0x1D, 0x23, 0x23, 0x23, + 0x1D, 0x1D, 0x23, 0x23, + 0x1D, 0x1D, 0x1D, 0x23, + 0x1D, 0x20, 0x21, 0x24, + 0x1D, 0x20, 0x23, 0x24, + 0x1D, 0x1E, 0x23, 0x24, + 0x1D, 0x1E, 0x21, 0x24, + 0x1D, 0x20, 0x22, 0x25, + 0x1D, 0x21, 0x23, 0x25, + 0x1D, 0x1F, 0x23, 0x25, + 0x1D, 0x1F, 0x21, 0x25, + 0x1D, 0x25, 0x25, 0x25, + 0x1D, 0x1D, 0x25, 0x25, + 0x1D, 0x1D, 0x1D, 0x25, + 0x1D, 0x21, 0x24, 0x28, + 0x1D, 0x22, 0x26, 0x28, + 0x1D, 0x1F, 0x26, 0x28, + 0x1D, 0x1F, 0x23, 0x28, + 0x1D, 0x28, 0x28, 0x28, + 0x1D, 0x1D, 0x28, 0x28, + 0x1D, 0x1D, 0x1D, 0x28, + 0x1D, 0x22, 0x26, 0x2B, + 0x1D, 0x24, 0x28, 0x2B, + 0x1D, 0x20, 0x28, 0x2B, + 0x1D, 0x20, 0x24, 0x2B, + 0x1D, 0x2B, 0x2B, 0x2B, + 0x1D, 0x1D, 0x2B, 0x2B, + 0x1D, 0x1D, 0x1D, 0x2B, + 0x1D, 0x23, 0x28, 0x2E, + 0x1D, 0x25, 0x2A, 0x2E, + 0x1D, 0x21, 0x2A, 0x2E, + 0x1D, 0x21, 0x26, 0x2E, + 0x1D, 0x2E, 0x2E, 0x2E, + 0x1D, 0x1D, 0x2E, 0x2E, + 0x1D, 0x1D, 0x1D, 0x2E, + 0x1D, 0x24, 0x2A, 0x31, + 0x1D, 0x27, 0x2C, 0x31, + 0x1D, 0x22, 0x2C, 0x31, + 0x1D, 0x22, 0x27, 0x31, + 0x1D, 0x31, 0x31, 0x31, + 0x1D, 0x1D, 0x31, 0x31, + 0x1D, 0x1D, 0x1D, 0x31, + 0x1D, 0x28, 0x2F, 0x34, + 0x1D, 0x22, 0x2F, 0x34, + 0x1D, 0x22, 0x29, 0x34, + 0x1D, 0x34, 0x34, 0x34, + 0x1D, 0x1D, 0x34, 0x34, + 0x1D, 0x1D, 0x1D, 0x34, + 0x1D, 0x2A, 0x31, 0x37, + 0x1D, 0x23, 0x31, 0x37, + 0x1D, 0x23, 0x2A, 0x37, + 0x1D, 0x37, 0x37, 0x37, + 0x1D, 0x1D, 0x37, 0x37, + 0x1D, 0x1D, 0x1D, 0x37, + 0x1D, 0x2B, 0x33, 0x3A, + 0x1D, 0x24, 0x33, 0x3A, + 0x1D, 0x24, 0x2C, 0x3A, + 0x1D, 0x3A, 0x3A, 0x3A, + 0x1D, 0x1D, 0x3A, 0x3A, + 0x1D, 0x1D, 0x1D, 0x3A, + 0x1D, 0x2D, 0x35, 0x3D, + 0x1D, 0x25, 0x35, 0x3D, + 0x1D, 0x25, 0x2D, 0x3D, + 0x1D, 0x3D, 0x3D, 0x3D, + 0x1D, 0x1D, 0x3D, 0x3D, + 0x1D, 0x1D, 0x1D, 0x3D, + 0x1E, 0x1F, 0x1F, 0x20, + 0x1E, 0x1F, 0x20, 0x21, + 0x1E, 0x20, 0x21, 0x22, + 0x1E, 0x1F, 0x21, 0x22, + 0x1E, 0x1F, 0x20, 0x22, + 0x1E, 0x20, 0x21, 0x23, + 0x1E, 0x20, 0x22, 0x23, + 0x1E, 0x1F, 0x22, 0x23, + 0x1E, 0x1F, 0x21, 0x23, + 0x1E, 0x20, 0x22, 0x24, + 0x1E, 0x21, 0x23, 0x24, + 0x1E, 0x1F, 0x23, 0x24, + 0x1E, 0x1F, 0x21, 0x24, + 0x1E, 0x24, 0x24, 0x24, + 0x1E, 0x1E, 0x24, 0x24, + 0x1E, 0x1E, 0x1E, 0x24, + 0x1E, 0x21, 0x22, 0x25, + 0x1E, 0x21, 0x24, 0x25, + 0x1E, 0x1F, 0x24, 0x25, + 0x1E, 0x1F, 0x22, 0x25, + 0x1E, 0x21, 0x23, 0x26, + 0x1E, 0x22, 0x24, 0x26, + 0x1E, 0x20, 0x24, 0x26, + 0x1E, 0x20, 0x22, 0x26, + 0x1E, 0x26, 0x26, 0x26, + 0x1E, 0x1E, 0x26, 0x26, + 0x1E, 0x1E, 0x1E, 0x26, + 0x1E, 0x22, 0x25, 0x29, + 0x1E, 0x23, 0x27, 0x29, + 0x1E, 0x20, 0x27, 0x29, + 0x1E, 0x20, 0x24, 0x29, + 0x1E, 0x29, 0x29, 0x29, + 0x1E, 0x1E, 0x29, 0x29, + 0x1E, 0x1E, 0x1E, 0x29, + 0x1E, 0x23, 0x27, 0x2C, + 0x1E, 0x25, 0x29, 0x2C, + 0x1E, 0x21, 0x29, 0x2C, + 0x1E, 0x21, 0x25, 0x2C, + 0x1E, 0x2C, 0x2C, 0x2C, + 0x1E, 0x1E, 0x2C, 0x2C, + 0x1E, 0x1E, 0x1E, 0x2C, + 0x1E, 0x24, 0x29, 0x2F, + 0x1E, 0x26, 0x2B, 0x2F, + 0x1E, 0x22, 0x2B, 0x2F, + 0x1E, 0x22, 0x27, 0x2F, + 0x1E, 0x2F, 0x2F, 0x2F, + 0x1E, 0x1E, 0x2F, 0x2F, + 0x1E, 0x1E, 0x1E, 0x2F, + 0x1E, 0x25, 0x2B, 0x32, + 0x1E, 0x28, 0x2D, 0x32, + 0x1E, 0x23, 0x2D, 0x32, + 0x1E, 0x23, 0x28, 0x32, + 0x1E, 0x32, 0x32, 0x32, + 0x1E, 0x1E, 0x32, 0x32, + 0x1E, 0x1E, 0x1E, 0x32, + 0x1E, 0x29, 0x30, 0x35, + 0x1E, 0x23, 0x30, 0x35, + 0x1E, 0x23, 0x2A, 0x35, + 0x1E, 0x35, 0x35, 0x35, + 0x1E, 0x1E, 0x35, 0x35, + 0x1E, 0x1E, 0x1E, 0x35, + 0x1E, 0x2B, 0x32, 0x38, + 0x1E, 0x24, 0x32, 0x38, + 0x1E, 0x24, 0x2B, 0x38, + 0x1E, 0x38, 0x38, 0x38, + 0x1E, 0x1E, 0x38, 0x38, + 0x1E, 0x1E, 0x1E, 0x38, + 0x1E, 0x2C, 0x34, 0x3B, + 0x1E, 0x25, 0x34, 0x3B, + 0x1E, 0x25, 0x2D, 0x3B, + 0x1E, 0x3B, 0x3B, 0x3B, + 0x1E, 0x1E, 0x3B, 0x3B, + 0x1E, 0x1E, 0x1E, 0x3B, + 0x1E, 0x2E, 0x36, 0x3E, + 0x1E, 0x26, 0x36, 0x3E, + 0x1E, 0x26, 0x2E, 0x3E, + 0x1E, 0x3E, 0x3E, 0x3E, + 0x1E, 0x1E, 0x3E, 0x3E, + 0x1E, 0x1E, 0x1E, 0x3E, + 0x1F, 0x20, 0x20, 0x21, + 0x1F, 0x20, 0x21, 0x22, + 0x1F, 0x21, 0x22, 0x23, + 0x1F, 0x20, 0x22, 0x23, + 0x1F, 0x20, 0x21, 0x23, + 0x1F, 0x21, 0x22, 0x24, + 0x1F, 0x21, 0x23, 0x24, + 0x1F, 0x20, 0x23, 0x24, + 0x1F, 0x20, 0x22, 0x24, + 0x1F, 0x21, 0x23, 0x25, + 0x1F, 0x22, 0x24, 0x25, + 0x1F, 0x20, 0x24, 0x25, + 0x1F, 0x20, 0x22, 0x25, + 0x1F, 0x25, 0x25, 0x25, + 0x1F, 0x1F, 0x25, 0x25, + 0x1F, 0x1F, 0x1F, 0x25, + 0x1F, 0x22, 0x23, 0x26, + 0x1F, 0x22, 0x25, 0x26, + 0x1F, 0x20, 0x25, 0x26, + 0x1F, 0x20, 0x23, 0x26, + 0x1F, 0x22, 0x24, 0x27, + 0x1F, 0x23, 0x25, 0x27, + 0x1F, 0x21, 0x25, 0x27, + 0x1F, 0x21, 0x23, 0x27, + 0x1F, 0x27, 0x27, 0x27, + 0x1F, 0x1F, 0x27, 0x27, + 0x1F, 0x1F, 0x1F, 0x27, + 0x1F, 0x23, 0x26, 0x2A, + 0x1F, 0x24, 0x28, 0x2A, + 0x1F, 0x21, 0x28, 0x2A, + 0x1F, 0x21, 0x25, 0x2A, + 0x1F, 0x2A, 0x2A, 0x2A, + 0x1F, 0x1F, 0x2A, 0x2A, + 0x1F, 0x1F, 0x1F, 0x2A, + 0x1F, 0x24, 0x28, 0x2D, + 0x1F, 0x26, 0x2A, 0x2D, + 0x1F, 0x22, 0x2A, 0x2D, + 0x1F, 0x22, 0x26, 0x2D, + 0x1F, 0x2D, 0x2D, 0x2D, + 0x1F, 0x1F, 0x2D, 0x2D, + 0x1F, 0x1F, 0x1F, 0x2D, + 0x1F, 0x25, 0x2A, 0x30, + 0x1F, 0x27, 0x2C, 0x30, + 0x1F, 0x23, 0x2C, 0x30, + 0x1F, 0x23, 0x28, 0x30, + 0x1F, 0x30, 0x30, 0x30, + 0x1F, 0x1F, 0x30, 0x30, + 0x1F, 0x1F, 0x1F, 0x30, + 0x1F, 0x26, 0x2C, 0x33, + 0x1F, 0x29, 0x2E, 0x33, + 0x1F, 0x24, 0x2E, 0x33, + 0x1F, 0x24, 0x29, 0x33, + 0x1F, 0x33, 0x33, 0x33, + 0x1F, 0x1F, 0x33, 0x33, + 0x1F, 0x1F, 0x1F, 0x33, + 0x1F, 0x2A, 0x31, 0x36, + 0x1F, 0x24, 0x31, 0x36, + 0x1F, 0x24, 0x2B, 0x36, + 0x1F, 0x36, 0x36, 0x36, + 0x1F, 0x1F, 0x36, 0x36, + 0x1F, 0x1F, 0x1F, 0x36, + 0x1F, 0x2C, 0x33, 0x39, + 0x1F, 0x25, 0x33, 0x39, + 0x1F, 0x25, 0x2C, 0x39, + 0x1F, 0x39, 0x39, 0x39, + 0x1F, 0x1F, 0x39, 0x39, + 0x1F, 0x1F, 0x1F, 0x39, + 0x1F, 0x2D, 0x35, 0x3C, + 0x1F, 0x26, 0x35, 0x3C, + 0x1F, 0x26, 0x2E, 0x3C, + 0x1F, 0x3C, 0x3C, 0x3C, + 0x1F, 0x1F, 0x3C, 0x3C, + 0x1F, 0x1F, 0x1F, 0x3C, + 0x1F, 0x2F, 0x37, 0x3F, + 0x1F, 0x27, 0x37, 0x3F, + 0x1F, 0x27, 0x2F, 0x3F, + 0x1F, 0x3F, 0x3F, 0x3F, + 0x1F, 0x1F, 0x3F, 0x3F, + 0x1F, 0x1F, 0x1F, 0x3F, + 0x20, 0x21, 0x21, 0x22, + 0x20, 0x21, 0x22, 0x23, + 0x20, 0x22, 0x23, 0x24, + 0x20, 0x21, 0x23, 0x24, + 0x20, 0x21, 0x22, 0x24, + 0x20, 0x22, 0x23, 0x25, + 0x20, 0x22, 0x24, 0x25, + 0x20, 0x21, 0x24, 0x25, + 0x20, 0x21, 0x23, 0x25, + 0x20, 0x22, 0x24, 0x26, + 0x20, 0x23, 0x25, 0x26, + 0x20, 0x21, 0x25, 0x26, + 0x20, 0x21, 0x23, 0x26, + 0x20, 0x26, 0x26, 0x26, + 0x20, 0x20, 0x26, 0x26, + 0x20, 0x20, 0x20, 0x26, + 0x20, 0x23, 0x24, 0x27, + 0x20, 0x23, 0x26, 0x27, + 0x20, 0x21, 0x26, 0x27, + 0x20, 0x21, 0x24, 0x27, + 0x20, 0x23, 0x25, 0x28, + 0x20, 0x24, 0x26, 0x28, + 0x20, 0x22, 0x26, 0x28, + 0x20, 0x22, 0x24, 0x28, + 0x20, 0x28, 0x28, 0x28, + 0x20, 0x20, 0x28, 0x28, + 0x20, 0x20, 0x20, 0x28, + 0x20, 0x24, 0x27, 0x2B, + 0x20, 0x25, 0x29, 0x2B, + 0x20, 0x22, 0x29, 0x2B, + 0x20, 0x22, 0x26, 0x2B, + 0x20, 0x2B, 0x2B, 0x2B, + 0x20, 0x20, 0x2B, 0x2B, + 0x20, 0x20, 0x20, 0x2B, + 0x20, 0x25, 0x29, 0x2E, + 0x20, 0x27, 0x2B, 0x2E, + 0x20, 0x23, 0x2B, 0x2E, + 0x20, 0x23, 0x27, 0x2E, + 0x20, 0x2E, 0x2E, 0x2E, + 0x20, 0x20, 0x2E, 0x2E, + 0x20, 0x20, 0x20, 0x2E, + 0x20, 0x26, 0x2B, 0x31, + 0x20, 0x28, 0x2D, 0x31, + 0x20, 0x24, 0x2D, 0x31, + 0x20, 0x24, 0x29, 0x31, + 0x20, 0x31, 0x31, 0x31, + 0x20, 0x20, 0x31, 0x31, + 0x20, 0x20, 0x20, 0x31, + 0x20, 0x27, 0x2D, 0x34, + 0x20, 0x2A, 0x2F, 0x34, + 0x20, 0x25, 0x2F, 0x34, + 0x20, 0x25, 0x2A, 0x34, + 0x20, 0x34, 0x34, 0x34, + 0x20, 0x20, 0x34, 0x34, + 0x20, 0x20, 0x20, 0x34, + 0x20, 0x2B, 0x32, 0x37, + 0x20, 0x25, 0x32, 0x37, + 0x20, 0x25, 0x2C, 0x37, + 0x20, 0x37, 0x37, 0x37, + 0x20, 0x20, 0x37, 0x37, + 0x20, 0x20, 0x20, 0x37, + 0x20, 0x2D, 0x34, 0x3A, + 0x20, 0x26, 0x34, 0x3A, + 0x20, 0x26, 0x2D, 0x3A, + 0x20, 0x3A, 0x3A, 0x3A, + 0x20, 0x20, 0x3A, 0x3A, + 0x20, 0x20, 0x20, 0x3A, + 0x20, 0x2E, 0x36, 0x3D, + 0x20, 0x27, 0x36, 0x3D, + 0x20, 0x27, 0x2F, 0x3D, + 0x20, 0x3D, 0x3D, 0x3D, + 0x20, 0x20, 0x3D, 0x3D, + 0x20, 0x20, 0x20, 0x3D, + 0x21, 0x22, 0x22, 0x23, + 0x21, 0x22, 0x23, 0x24, + 0x21, 0x23, 0x24, 0x25, + 0x21, 0x22, 0x24, 0x25, + 0x21, 0x22, 0x23, 0x25, + 0x21, 0x23, 0x24, 0x26, + 0x21, 0x23, 0x25, 0x26, + 0x21, 0x22, 0x25, 0x26, + 0x21, 0x22, 0x24, 0x26, + 0x21, 0x23, 0x25, 0x27, + 0x21, 0x24, 0x26, 0x27, + 0x21, 0x22, 0x26, 0x27, + 0x21, 0x22, 0x24, 0x27, + 0x21, 0x27, 0x27, 0x27, + 0x21, 0x21, 0x27, 0x27, + 0x21, 0x21, 0x21, 0x27, + 0x21, 0x24, 0x25, 0x28, + 0x21, 0x24, 0x27, 0x28, + 0x21, 0x22, 0x27, 0x28, + 0x21, 0x22, 0x25, 0x28, + 0x21, 0x24, 0x26, 0x29, + 0x21, 0x25, 0x27, 0x29, + 0x21, 0x23, 0x27, 0x29, + 0x21, 0x23, 0x25, 0x29, + 0x21, 0x29, 0x29, 0x29, + 0x21, 0x21, 0x29, 0x29, + 0x21, 0x21, 0x21, 0x29, + 0x21, 0x25, 0x28, 0x2C, + 0x21, 0x26, 0x2A, 0x2C, + 0x21, 0x23, 0x2A, 0x2C, + 0x21, 0x23, 0x27, 0x2C, + 0x21, 0x2C, 0x2C, 0x2C, + 0x21, 0x21, 0x2C, 0x2C, + 0x21, 0x21, 0x21, 0x2C, + 0x21, 0x26, 0x2A, 0x2F, + 0x21, 0x28, 0x2C, 0x2F, + 0x21, 0x24, 0x2C, 0x2F, + 0x21, 0x24, 0x28, 0x2F, + 0x21, 0x2F, 0x2F, 0x2F, + 0x21, 0x21, 0x2F, 0x2F, + 0x21, 0x21, 0x21, 0x2F, + 0x21, 0x27, 0x2C, 0x32, + 0x21, 0x29, 0x2E, 0x32, + 0x21, 0x25, 0x2E, 0x32, + 0x21, 0x25, 0x2A, 0x32, + 0x21, 0x32, 0x32, 0x32, + 0x21, 0x21, 0x32, 0x32, + 0x21, 0x21, 0x21, 0x32, + 0x21, 0x28, 0x2E, 0x35, + 0x21, 0x2B, 0x30, 0x35, + 0x21, 0x26, 0x30, 0x35, + 0x21, 0x26, 0x2B, 0x35, + 0x21, 0x35, 0x35, 0x35, + 0x21, 0x21, 0x35, 0x35, + 0x21, 0x21, 0x21, 0x35, + 0x21, 0x2C, 0x33, 0x38, + 0x21, 0x26, 0x33, 0x38, + 0x21, 0x26, 0x2D, 0x38, + 0x21, 0x38, 0x38, 0x38, + 0x21, 0x21, 0x38, 0x38, + 0x21, 0x21, 0x21, 0x38, + 0x21, 0x2E, 0x35, 0x3B, + 0x21, 0x27, 0x35, 0x3B, + 0x21, 0x27, 0x2E, 0x3B, + 0x21, 0x3B, 0x3B, 0x3B, + 0x21, 0x21, 0x3B, 0x3B, + 0x21, 0x21, 0x21, 0x3B, + 0x21, 0x2F, 0x37, 0x3E, + 0x21, 0x28, 0x37, 0x3E, + 0x21, 0x28, 0x30, 0x3E, + 0x21, 0x3E, 0x3E, 0x3E, + 0x21, 0x21, 0x3E, 0x3E, + 0x21, 0x21, 0x21, 0x3E, + 0x22, 0x23, 0x23, 0x24, + 0x22, 0x23, 0x24, 0x25, + 0x22, 0x24, 0x25, 0x26, + 0x22, 0x23, 0x25, 0x26, + 0x22, 0x23, 0x24, 0x26, + 0x22, 0x24, 0x25, 0x27, + 0x22, 0x24, 0x26, 0x27, + 0x22, 0x23, 0x26, 0x27, + 0x22, 0x23, 0x25, 0x27, + 0x22, 0x24, 0x26, 0x28, + 0x22, 0x25, 0x27, 0x28, + 0x22, 0x23, 0x27, 0x28, + 0x22, 0x23, 0x25, 0x28, + 0x22, 0x28, 0x28, 0x28, + 0x22, 0x22, 0x28, 0x28, + 0x22, 0x22, 0x22, 0x28, + 0x22, 0x25, 0x26, 0x29, + 0x22, 0x25, 0x28, 0x29, + 0x22, 0x23, 0x28, 0x29, + 0x22, 0x23, 0x26, 0x29, + 0x22, 0x25, 0x27, 0x2A, + 0x22, 0x26, 0x28, 0x2A, + 0x22, 0x24, 0x28, 0x2A, + 0x22, 0x24, 0x26, 0x2A, + 0x22, 0x2A, 0x2A, 0x2A, + 0x22, 0x22, 0x2A, 0x2A, + 0x22, 0x22, 0x22, 0x2A, + 0x22, 0x26, 0x29, 0x2D, + 0x22, 0x27, 0x2B, 0x2D, + 0x22, 0x24, 0x2B, 0x2D, + 0x22, 0x24, 0x28, 0x2D, + 0x22, 0x2D, 0x2D, 0x2D, + 0x22, 0x22, 0x2D, 0x2D, + 0x22, 0x22, 0x22, 0x2D, + 0x22, 0x27, 0x2B, 0x30, + 0x22, 0x29, 0x2D, 0x30, + 0x22, 0x25, 0x2D, 0x30, + 0x22, 0x25, 0x29, 0x30, + 0x22, 0x30, 0x30, 0x30, + 0x22, 0x22, 0x30, 0x30, + 0x22, 0x22, 0x22, 0x30, + 0x22, 0x28, 0x2D, 0x33, + 0x22, 0x2A, 0x2F, 0x33, + 0x22, 0x26, 0x2F, 0x33, + 0x22, 0x26, 0x2B, 0x33, + 0x22, 0x33, 0x33, 0x33, + 0x22, 0x22, 0x33, 0x33, + 0x22, 0x22, 0x22, 0x33, + 0x22, 0x29, 0x2F, 0x36, + 0x22, 0x2C, 0x31, 0x36, + 0x22, 0x27, 0x31, 0x36, + 0x22, 0x27, 0x2C, 0x36, + 0x22, 0x36, 0x36, 0x36, + 0x22, 0x22, 0x36, 0x36, + 0x22, 0x22, 0x22, 0x36, + 0x22, 0x2D, 0x34, 0x39, + 0x22, 0x27, 0x34, 0x39, + 0x22, 0x27, 0x2E, 0x39, + 0x22, 0x39, 0x39, 0x39, + 0x22, 0x22, 0x39, 0x39, + 0x22, 0x22, 0x22, 0x39, + 0x22, 0x2F, 0x36, 0x3C, + 0x22, 0x28, 0x36, 0x3C, + 0x22, 0x28, 0x2F, 0x3C, + 0x22, 0x3C, 0x3C, 0x3C, + 0x22, 0x22, 0x3C, 0x3C, + 0x22, 0x22, 0x22, 0x3C, + 0x22, 0x30, 0x38, 0x3F, + 0x22, 0x29, 0x38, 0x3F, + 0x22, 0x29, 0x31, 0x3F, + 0x22, 0x3F, 0x3F, 0x3F, + 0x22, 0x22, 0x3F, 0x3F, + 0x22, 0x22, 0x22, 0x3F, + 0x23, 0x24, 0x24, 0x25, + 0x23, 0x24, 0x25, 0x26, + 0x23, 0x25, 0x26, 0x27, + 0x23, 0x24, 0x26, 0x27, + 0x23, 0x24, 0x25, 0x27, + 0x23, 0x25, 0x26, 0x28, + 0x23, 0x25, 0x27, 0x28, + 0x23, 0x24, 0x27, 0x28, + 0x23, 0x24, 0x26, 0x28, + 0x23, 0x25, 0x27, 0x29, + 0x23, 0x26, 0x28, 0x29, + 0x23, 0x24, 0x28, 0x29, + 0x23, 0x24, 0x26, 0x29, + 0x23, 0x29, 0x29, 0x29, + 0x23, 0x23, 0x29, 0x29, + 0x23, 0x23, 0x23, 0x29, + 0x23, 0x26, 0x27, 0x2A, + 0x23, 0x26, 0x29, 0x2A, + 0x23, 0x24, 0x29, 0x2A, + 0x23, 0x24, 0x27, 0x2A, + 0x23, 0x26, 0x28, 0x2B, + 0x23, 0x27, 0x29, 0x2B, + 0x23, 0x25, 0x29, 0x2B, + 0x23, 0x25, 0x27, 0x2B, + 0x23, 0x2B, 0x2B, 0x2B, + 0x23, 0x23, 0x2B, 0x2B, + 0x23, 0x23, 0x23, 0x2B, + 0x23, 0x27, 0x2A, 0x2E, + 0x23, 0x28, 0x2C, 0x2E, + 0x23, 0x25, 0x2C, 0x2E, + 0x23, 0x25, 0x29, 0x2E, + 0x23, 0x2E, 0x2E, 0x2E, + 0x23, 0x23, 0x2E, 0x2E, + 0x23, 0x23, 0x23, 0x2E, + 0x23, 0x28, 0x2C, 0x31, + 0x23, 0x2A, 0x2E, 0x31, + 0x23, 0x26, 0x2E, 0x31, + 0x23, 0x26, 0x2A, 0x31, + 0x23, 0x31, 0x31, 0x31, + 0x23, 0x23, 0x31, 0x31, + 0x23, 0x23, 0x23, 0x31, + 0x23, 0x29, 0x2E, 0x34, + 0x23, 0x2B, 0x30, 0x34, + 0x23, 0x27, 0x30, 0x34, + 0x23, 0x27, 0x2C, 0x34, + 0x23, 0x34, 0x34, 0x34, + 0x23, 0x23, 0x34, 0x34, + 0x23, 0x23, 0x23, 0x34, + 0x23, 0x2A, 0x30, 0x37, + 0x23, 0x2D, 0x32, 0x37, + 0x23, 0x28, 0x32, 0x37, + 0x23, 0x28, 0x2D, 0x37, + 0x23, 0x37, 0x37, 0x37, + 0x23, 0x23, 0x37, 0x37, + 0x23, 0x23, 0x23, 0x37, + 0x23, 0x2E, 0x35, 0x3A, + 0x23, 0x28, 0x35, 0x3A, + 0x23, 0x28, 0x2F, 0x3A, + 0x23, 0x3A, 0x3A, 0x3A, + 0x23, 0x23, 0x3A, 0x3A, + 0x23, 0x23, 0x23, 0x3A, + 0x23, 0x30, 0x37, 0x3D, + 0x23, 0x29, 0x37, 0x3D, + 0x23, 0x29, 0x30, 0x3D, + 0x23, 0x3D, 0x3D, 0x3D, + 0x23, 0x23, 0x3D, 0x3D, + 0x23, 0x23, 0x23, 0x3D, + 0x24, 0x25, 0x25, 0x26, + 0x24, 0x25, 0x26, 0x27, + 0x24, 0x26, 0x27, 0x28, + 0x24, 0x25, 0x27, 0x28, + 0x24, 0x25, 0x26, 0x28, + 0x24, 0x26, 0x27, 0x29, + 0x24, 0x26, 0x28, 0x29, + 0x24, 0x25, 0x28, 0x29, + 0x24, 0x25, 0x27, 0x29, + 0x24, 0x26, 0x28, 0x2A, + 0x24, 0x27, 0x29, 0x2A, + 0x24, 0x25, 0x29, 0x2A, + 0x24, 0x25, 0x27, 0x2A, + 0x24, 0x2A, 0x2A, 0x2A, + 0x24, 0x24, 0x2A, 0x2A, + 0x24, 0x24, 0x24, 0x2A, + 0x24, 0x27, 0x28, 0x2B, + 0x24, 0x27, 0x2A, 0x2B, + 0x24, 0x25, 0x2A, 0x2B, + 0x24, 0x25, 0x28, 0x2B, + 0x24, 0x27, 0x29, 0x2C, + 0x24, 0x28, 0x2A, 0x2C, + 0x24, 0x26, 0x2A, 0x2C, + 0x24, 0x26, 0x28, 0x2C, + 0x24, 0x2C, 0x2C, 0x2C, + 0x24, 0x24, 0x2C, 0x2C, + 0x24, 0x24, 0x24, 0x2C, + 0x24, 0x28, 0x2B, 0x2F, + 0x24, 0x29, 0x2D, 0x2F, + 0x24, 0x26, 0x2D, 0x2F, + 0x24, 0x26, 0x2A, 0x2F, + 0x24, 0x2F, 0x2F, 0x2F, + 0x24, 0x24, 0x2F, 0x2F, + 0x24, 0x24, 0x24, 0x2F, + 0x24, 0x29, 0x2D, 0x32, + 0x24, 0x2B, 0x2F, 0x32, + 0x24, 0x27, 0x2F, 0x32, + 0x24, 0x27, 0x2B, 0x32, + 0x24, 0x32, 0x32, 0x32, + 0x24, 0x24, 0x32, 0x32, + 0x24, 0x24, 0x24, 0x32, + 0x24, 0x2A, 0x2F, 0x35, + 0x24, 0x2C, 0x31, 0x35, + 0x24, 0x28, 0x31, 0x35, + 0x24, 0x28, 0x2D, 0x35, + 0x24, 0x35, 0x35, 0x35, + 0x24, 0x24, 0x35, 0x35, + 0x24, 0x24, 0x24, 0x35, + 0x24, 0x2B, 0x31, 0x38, + 0x24, 0x2E, 0x33, 0x38, + 0x24, 0x29, 0x33, 0x38, + 0x24, 0x29, 0x2E, 0x38, + 0x24, 0x38, 0x38, 0x38, + 0x24, 0x24, 0x38, 0x38, + 0x24, 0x24, 0x24, 0x38, + 0x24, 0x2F, 0x36, 0x3B, + 0x24, 0x29, 0x36, 0x3B, + 0x24, 0x29, 0x30, 0x3B, + 0x24, 0x3B, 0x3B, 0x3B, + 0x24, 0x24, 0x3B, 0x3B, + 0x24, 0x24, 0x24, 0x3B, + 0x24, 0x31, 0x38, 0x3E, + 0x24, 0x2A, 0x38, 0x3E, + 0x24, 0x2A, 0x31, 0x3E, + 0x24, 0x3E, 0x3E, 0x3E, + 0x24, 0x24, 0x3E, 0x3E, + 0x24, 0x24, 0x24, 0x3E, + 0x25, 0x26, 0x26, 0x27, + 0x25, 0x26, 0x27, 0x28, + 0x25, 0x27, 0x28, 0x29, + 0x25, 0x26, 0x28, 0x29, + 0x25, 0x26, 0x27, 0x29, + 0x25, 0x27, 0x28, 0x2A, + 0x25, 0x27, 0x29, 0x2A, + 0x25, 0x26, 0x29, 0x2A, + 0x25, 0x26, 0x28, 0x2A, + 0x25, 0x27, 0x29, 0x2B, + 0x25, 0x28, 0x2A, 0x2B, + 0x25, 0x26, 0x2A, 0x2B, + 0x25, 0x26, 0x28, 0x2B, + 0x25, 0x2B, 0x2B, 0x2B, + 0x25, 0x25, 0x2B, 0x2B, + 0x25, 0x25, 0x25, 0x2B, + 0x25, 0x28, 0x29, 0x2C, + 0x25, 0x28, 0x2B, 0x2C, + 0x25, 0x26, 0x2B, 0x2C, + 0x25, 0x26, 0x29, 0x2C, + 0x25, 0x28, 0x2A, 0x2D, + 0x25, 0x29, 0x2B, 0x2D, + 0x25, 0x27, 0x2B, 0x2D, + 0x25, 0x27, 0x29, 0x2D, + 0x25, 0x2D, 0x2D, 0x2D, + 0x25, 0x25, 0x2D, 0x2D, + 0x25, 0x25, 0x25, 0x2D, + 0x25, 0x29, 0x2C, 0x30, + 0x25, 0x2A, 0x2E, 0x30, + 0x25, 0x27, 0x2E, 0x30, + 0x25, 0x27, 0x2B, 0x30, + 0x25, 0x30, 0x30, 0x30, + 0x25, 0x25, 0x30, 0x30, + 0x25, 0x25, 0x25, 0x30, + 0x25, 0x2A, 0x2E, 0x33, + 0x25, 0x2C, 0x30, 0x33, + 0x25, 0x28, 0x30, 0x33, + 0x25, 0x28, 0x2C, 0x33, + 0x25, 0x33, 0x33, 0x33, + 0x25, 0x25, 0x33, 0x33, + 0x25, 0x25, 0x25, 0x33, + 0x25, 0x2B, 0x30, 0x36, + 0x25, 0x2D, 0x32, 0x36, + 0x25, 0x29, 0x32, 0x36, + 0x25, 0x29, 0x2E, 0x36, + 0x25, 0x36, 0x36, 0x36, + 0x25, 0x25, 0x36, 0x36, + 0x25, 0x25, 0x25, 0x36, + 0x25, 0x2C, 0x32, 0x39, + 0x25, 0x2F, 0x34, 0x39, + 0x25, 0x2A, 0x34, 0x39, + 0x25, 0x2A, 0x2F, 0x39, + 0x25, 0x39, 0x39, 0x39, + 0x25, 0x25, 0x39, 0x39, + 0x25, 0x25, 0x25, 0x39, + 0x25, 0x30, 0x37, 0x3C, + 0x25, 0x2A, 0x37, 0x3C, + 0x25, 0x2A, 0x31, 0x3C, + 0x25, 0x3C, 0x3C, 0x3C, + 0x25, 0x25, 0x3C, 0x3C, + 0x25, 0x25, 0x25, 0x3C, + 0x25, 0x32, 0x39, 0x3F, + 0x25, 0x2B, 0x39, 0x3F, + 0x25, 0x2B, 0x32, 0x3F, + 0x25, 0x3F, 0x3F, 0x3F, + 0x25, 0x25, 0x3F, 0x3F, + 0x25, 0x25, 0x25, 0x3F, + 0x26, 0x27, 0x27, 0x28, + 0x26, 0x27, 0x28, 0x29, + 0x26, 0x28, 0x29, 0x2A, + 0x26, 0x27, 0x29, 0x2A, + 0x26, 0x27, 0x28, 0x2A, + 0x26, 0x28, 0x29, 0x2B, + 0x26, 0x28, 0x2A, 0x2B, + 0x26, 0x27, 0x2A, 0x2B, + 0x26, 0x27, 0x29, 0x2B, + 0x26, 0x28, 0x2A, 0x2C, + 0x26, 0x29, 0x2B, 0x2C, + 0x26, 0x27, 0x2B, 0x2C, + 0x26, 0x27, 0x29, 0x2C, + 0x26, 0x2C, 0x2C, 0x2C, + 0x26, 0x26, 0x2C, 0x2C, + 0x26, 0x26, 0x26, 0x2C, + 0x26, 0x29, 0x2A, 0x2D, + 0x26, 0x29, 0x2C, 0x2D, + 0x26, 0x27, 0x2C, 0x2D, + 0x26, 0x27, 0x2A, 0x2D, + 0x26, 0x29, 0x2B, 0x2E, + 0x26, 0x2A, 0x2C, 0x2E, + 0x26, 0x28, 0x2C, 0x2E, + 0x26, 0x28, 0x2A, 0x2E, + 0x26, 0x2E, 0x2E, 0x2E, + 0x26, 0x26, 0x2E, 0x2E, + 0x26, 0x26, 0x26, 0x2E, + 0x26, 0x2A, 0x2D, 0x31, + 0x26, 0x2B, 0x2F, 0x31, + 0x26, 0x28, 0x2F, 0x31, + 0x26, 0x28, 0x2C, 0x31, + 0x26, 0x31, 0x31, 0x31, + 0x26, 0x26, 0x31, 0x31, + 0x26, 0x26, 0x26, 0x31, + 0x26, 0x2B, 0x2F, 0x34, + 0x26, 0x2D, 0x31, 0x34, + 0x26, 0x29, 0x31, 0x34, + 0x26, 0x29, 0x2D, 0x34, + 0x26, 0x34, 0x34, 0x34, + 0x26, 0x26, 0x34, 0x34, + 0x26, 0x26, 0x26, 0x34, + 0x26, 0x2C, 0x31, 0x37, + 0x26, 0x2E, 0x33, 0x37, + 0x26, 0x2A, 0x33, 0x37, + 0x26, 0x2A, 0x2F, 0x37, + 0x26, 0x37, 0x37, 0x37, + 0x26, 0x26, 0x37, 0x37, + 0x26, 0x26, 0x26, 0x37, + 0x26, 0x2D, 0x33, 0x3A, + 0x26, 0x30, 0x35, 0x3A, + 0x26, 0x2B, 0x35, 0x3A, + 0x26, 0x2B, 0x30, 0x3A, + 0x26, 0x3A, 0x3A, 0x3A, + 0x26, 0x26, 0x3A, 0x3A, + 0x26, 0x26, 0x26, 0x3A, + 0x26, 0x31, 0x38, 0x3D, + 0x26, 0x2B, 0x38, 0x3D, + 0x26, 0x2B, 0x32, 0x3D, + 0x26, 0x3D, 0x3D, 0x3D, + 0x26, 0x26, 0x3D, 0x3D, + 0x26, 0x26, 0x26, 0x3D, + 0x27, 0x28, 0x28, 0x29, + 0x27, 0x28, 0x29, 0x2A, + 0x27, 0x29, 0x2A, 0x2B, + 0x27, 0x28, 0x2A, 0x2B, + 0x27, 0x28, 0x29, 0x2B, + 0x27, 0x29, 0x2A, 0x2C, + 0x27, 0x29, 0x2B, 0x2C, + 0x27, 0x28, 0x2B, 0x2C, + 0x27, 0x28, 0x2A, 0x2C, + 0x27, 0x29, 0x2B, 0x2D, + 0x27, 0x2A, 0x2C, 0x2D, + 0x27, 0x28, 0x2C, 0x2D, + 0x27, 0x28, 0x2A, 0x2D, + 0x27, 0x2D, 0x2D, 0x2D, + 0x27, 0x27, 0x2D, 0x2D, + 0x27, 0x27, 0x27, 0x2D, + 0x27, 0x2A, 0x2B, 0x2E, + 0x27, 0x2A, 0x2D, 0x2E, + 0x27, 0x28, 0x2D, 0x2E, + 0x27, 0x28, 0x2B, 0x2E, + 0x27, 0x2A, 0x2C, 0x2F, + 0x27, 0x2B, 0x2D, 0x2F, + 0x27, 0x29, 0x2D, 0x2F, + 0x27, 0x29, 0x2B, 0x2F, + 0x27, 0x2F, 0x2F, 0x2F, + 0x27, 0x27, 0x2F, 0x2F, + 0x27, 0x27, 0x27, 0x2F, + 0x27, 0x2B, 0x2E, 0x32, + 0x27, 0x2C, 0x30, 0x32, + 0x27, 0x29, 0x30, 0x32, + 0x27, 0x29, 0x2D, 0x32, + 0x27, 0x32, 0x32, 0x32, + 0x27, 0x27, 0x32, 0x32, + 0x27, 0x27, 0x27, 0x32, + 0x27, 0x2C, 0x30, 0x35, + 0x27, 0x2E, 0x32, 0x35, + 0x27, 0x2A, 0x32, 0x35, + 0x27, 0x2A, 0x2E, 0x35, + 0x27, 0x35, 0x35, 0x35, + 0x27, 0x27, 0x35, 0x35, + 0x27, 0x27, 0x27, 0x35, + 0x27, 0x2D, 0x32, 0x38, + 0x27, 0x2F, 0x34, 0x38, + 0x27, 0x2B, 0x34, 0x38, + 0x27, 0x2B, 0x30, 0x38, + 0x27, 0x38, 0x38, 0x38, + 0x27, 0x27, 0x38, 0x38, + 0x27, 0x27, 0x27, 0x38, + 0x27, 0x2E, 0x34, 0x3B, + 0x27, 0x31, 0x36, 0x3B, + 0x27, 0x2C, 0x36, 0x3B, + 0x27, 0x2C, 0x31, 0x3B, + 0x27, 0x3B, 0x3B, 0x3B, + 0x27, 0x27, 0x3B, 0x3B, + 0x27, 0x27, 0x27, 0x3B, + 0x27, 0x32, 0x39, 0x3E, + 0x27, 0x2C, 0x39, 0x3E, + 0x27, 0x2C, 0x33, 0x3E, + 0x27, 0x3E, 0x3E, 0x3E, + 0x27, 0x27, 0x3E, 0x3E, + 0x27, 0x27, 0x27, 0x3E, + 0x28, 0x29, 0x29, 0x2A, + 0x28, 0x29, 0x2A, 0x2B, + 0x28, 0x2A, 0x2B, 0x2C, + 0x28, 0x29, 0x2B, 0x2C, + 0x28, 0x29, 0x2A, 0x2C, + 0x28, 0x2A, 0x2B, 0x2D, + 0x28, 0x2A, 0x2C, 0x2D, + 0x28, 0x29, 0x2C, 0x2D, + 0x28, 0x29, 0x2B, 0x2D, + 0x28, 0x2A, 0x2C, 0x2E, + 0x28, 0x2B, 0x2D, 0x2E, + 0x28, 0x29, 0x2D, 0x2E, + 0x28, 0x29, 0x2B, 0x2E, + 0x28, 0x2E, 0x2E, 0x2E, + 0x28, 0x28, 0x2E, 0x2E, + 0x28, 0x28, 0x28, 0x2E, + 0x28, 0x2B, 0x2C, 0x2F, + 0x28, 0x2B, 0x2E, 0x2F, + 0x28, 0x29, 0x2E, 0x2F, + 0x28, 0x29, 0x2C, 0x2F, + 0x28, 0x2B, 0x2D, 0x30, + 0x28, 0x2C, 0x2E, 0x30, + 0x28, 0x2A, 0x2E, 0x30, + 0x28, 0x2A, 0x2C, 0x30, + 0x28, 0x30, 0x30, 0x30, + 0x28, 0x28, 0x30, 0x30, + 0x28, 0x28, 0x28, 0x30, + 0x28, 0x2C, 0x2F, 0x33, + 0x28, 0x2D, 0x31, 0x33, + 0x28, 0x2A, 0x31, 0x33, + 0x28, 0x2A, 0x2E, 0x33, + 0x28, 0x33, 0x33, 0x33, + 0x28, 0x28, 0x33, 0x33, + 0x28, 0x28, 0x28, 0x33, + 0x28, 0x2D, 0x31, 0x36, + 0x28, 0x2F, 0x33, 0x36, + 0x28, 0x2B, 0x33, 0x36, + 0x28, 0x2B, 0x2F, 0x36, + 0x28, 0x36, 0x36, 0x36, + 0x28, 0x28, 0x36, 0x36, + 0x28, 0x28, 0x28, 0x36, + 0x28, 0x2E, 0x33, 0x39, + 0x28, 0x30, 0x35, 0x39, + 0x28, 0x2C, 0x35, 0x39, + 0x28, 0x2C, 0x31, 0x39, + 0x28, 0x39, 0x39, 0x39, + 0x28, 0x28, 0x39, 0x39, + 0x28, 0x28, 0x28, 0x39, + 0x28, 0x2F, 0x35, 0x3C, + 0x28, 0x32, 0x37, 0x3C, + 0x28, 0x2D, 0x37, 0x3C, + 0x28, 0x2D, 0x32, 0x3C, + 0x28, 0x3C, 0x3C, 0x3C, + 0x28, 0x28, 0x3C, 0x3C, + 0x28, 0x28, 0x28, 0x3C, + 0x28, 0x33, 0x3A, 0x3F, + 0x28, 0x2D, 0x3A, 0x3F, + 0x28, 0x2D, 0x34, 0x3F, + 0x28, 0x3F, 0x3F, 0x3F, + 0x28, 0x28, 0x3F, 0x3F, + 0x28, 0x28, 0x28, 0x3F, + 0x29, 0x2A, 0x2A, 0x2B, + 0x29, 0x2A, 0x2B, 0x2C, + 0x29, 0x2B, 0x2C, 0x2D, + 0x29, 0x2A, 0x2C, 0x2D, + 0x29, 0x2A, 0x2B, 0x2D, + 0x29, 0x2B, 0x2C, 0x2E, + 0x29, 0x2B, 0x2D, 0x2E, + 0x29, 0x2A, 0x2D, 0x2E, + 0x29, 0x2A, 0x2C, 0x2E, + 0x29, 0x2B, 0x2D, 0x2F, + 0x29, 0x2C, 0x2E, 0x2F, + 0x29, 0x2A, 0x2E, 0x2F, + 0x29, 0x2A, 0x2C, 0x2F, + 0x29, 0x2F, 0x2F, 0x2F, + 0x29, 0x29, 0x2F, 0x2F, + 0x29, 0x29, 0x29, 0x2F, + 0x29, 0x2C, 0x2D, 0x30, + 0x29, 0x2C, 0x2F, 0x30, + 0x29, 0x2A, 0x2F, 0x30, + 0x29, 0x2A, 0x2D, 0x30, + 0x29, 0x2C, 0x2E, 0x31, + 0x29, 0x2D, 0x2F, 0x31, + 0x29, 0x2B, 0x2F, 0x31, + 0x29, 0x2B, 0x2D, 0x31, + 0x29, 0x31, 0x31, 0x31, + 0x29, 0x29, 0x31, 0x31, + 0x29, 0x29, 0x29, 0x31, + 0x29, 0x2D, 0x30, 0x34, + 0x29, 0x2E, 0x32, 0x34, + 0x29, 0x2B, 0x32, 0x34, + 0x29, 0x2B, 0x2F, 0x34, + 0x29, 0x34, 0x34, 0x34, + 0x29, 0x29, 0x34, 0x34, + 0x29, 0x29, 0x29, 0x34, + 0x29, 0x2E, 0x32, 0x37, + 0x29, 0x30, 0x34, 0x37, + 0x29, 0x2C, 0x34, 0x37, + 0x29, 0x2C, 0x30, 0x37, + 0x29, 0x37, 0x37, 0x37, + 0x29, 0x29, 0x37, 0x37, + 0x29, 0x29, 0x29, 0x37, + 0x29, 0x2F, 0x34, 0x3A, + 0x29, 0x31, 0x36, 0x3A, + 0x29, 0x2D, 0x36, 0x3A, + 0x29, 0x2D, 0x32, 0x3A, + 0x29, 0x3A, 0x3A, 0x3A, + 0x29, 0x29, 0x3A, 0x3A, + 0x29, 0x29, 0x29, 0x3A, + 0x29, 0x30, 0x36, 0x3D, + 0x29, 0x33, 0x38, 0x3D, + 0x29, 0x2E, 0x38, 0x3D, + 0x29, 0x2E, 0x33, 0x3D, + 0x29, 0x3D, 0x3D, 0x3D, + 0x29, 0x29, 0x3D, 0x3D, + 0x29, 0x29, 0x29, 0x3D, + 0x2A, 0x2B, 0x2B, 0x2C, + 0x2A, 0x2B, 0x2C, 0x2D, + 0x2A, 0x2C, 0x2D, 0x2E, + 0x2A, 0x2B, 0x2D, 0x2E, + 0x2A, 0x2B, 0x2C, 0x2E, + 0x2A, 0x2C, 0x2D, 0x2F, + 0x2A, 0x2C, 0x2E, 0x2F, + 0x2A, 0x2B, 0x2E, 0x2F, + 0x2A, 0x2B, 0x2D, 0x2F, + 0x2A, 0x2C, 0x2E, 0x30, + 0x2A, 0x2D, 0x2F, 0x30, + 0x2A, 0x2B, 0x2F, 0x30, + 0x2A, 0x2B, 0x2D, 0x30, + 0x2A, 0x30, 0x30, 0x30, + 0x2A, 0x2A, 0x30, 0x30, + 0x2A, 0x2A, 0x2A, 0x30, + 0x2A, 0x2D, 0x2E, 0x31, + 0x2A, 0x2D, 0x30, 0x31, + 0x2A, 0x2B, 0x30, 0x31, + 0x2A, 0x2B, 0x2E, 0x31, + 0x2A, 0x2D, 0x2F, 0x32, + 0x2A, 0x2E, 0x30, 0x32, + 0x2A, 0x2C, 0x30, 0x32, + 0x2A, 0x2C, 0x2E, 0x32, + 0x2A, 0x32, 0x32, 0x32, + 0x2A, 0x2A, 0x32, 0x32, + 0x2A, 0x2A, 0x2A, 0x32, + 0x2A, 0x2E, 0x31, 0x35, + 0x2A, 0x2F, 0x33, 0x35, + 0x2A, 0x2C, 0x33, 0x35, + 0x2A, 0x2C, 0x30, 0x35, + 0x2A, 0x35, 0x35, 0x35, + 0x2A, 0x2A, 0x35, 0x35, + 0x2A, 0x2A, 0x2A, 0x35, + 0x2A, 0x2F, 0x33, 0x38, + 0x2A, 0x31, 0x35, 0x38, + 0x2A, 0x2D, 0x35, 0x38, + 0x2A, 0x2D, 0x31, 0x38, + 0x2A, 0x38, 0x38, 0x38, + 0x2A, 0x2A, 0x38, 0x38, + 0x2A, 0x2A, 0x2A, 0x38, + 0x2A, 0x30, 0x35, 0x3B, + 0x2A, 0x32, 0x37, 0x3B, + 0x2A, 0x2E, 0x37, 0x3B, + 0x2A, 0x2E, 0x33, 0x3B, + 0x2A, 0x3B, 0x3B, 0x3B, + 0x2A, 0x2A, 0x3B, 0x3B, + 0x2A, 0x2A, 0x2A, 0x3B, + 0x2A, 0x31, 0x37, 0x3E, + 0x2A, 0x34, 0x39, 0x3E, + 0x2A, 0x2F, 0x39, 0x3E, + 0x2A, 0x2F, 0x34, 0x3E, + 0x2A, 0x3E, 0x3E, 0x3E, + 0x2A, 0x2A, 0x3E, 0x3E, + 0x2A, 0x2A, 0x2A, 0x3E, + 0x2B, 0x2C, 0x2C, 0x2D, + 0x2B, 0x2C, 0x2D, 0x2E, + 0x2B, 0x2D, 0x2E, 0x2F, + 0x2B, 0x2C, 0x2E, 0x2F, + 0x2B, 0x2C, 0x2D, 0x2F, + 0x2B, 0x2D, 0x2E, 0x30, + 0x2B, 0x2D, 0x2F, 0x30, + 0x2B, 0x2C, 0x2F, 0x30, + 0x2B, 0x2C, 0x2E, 0x30, + 0x2B, 0x2D, 0x2F, 0x31, + 0x2B, 0x2E, 0x30, 0x31, + 0x2B, 0x2C, 0x30, 0x31, + 0x2B, 0x2C, 0x2E, 0x31, + 0x2B, 0x31, 0x31, 0x31, + 0x2B, 0x2B, 0x31, 0x31, + 0x2B, 0x2B, 0x2B, 0x31, + 0x2B, 0x2E, 0x2F, 0x32, + 0x2B, 0x2E, 0x31, 0x32, + 0x2B, 0x2C, 0x31, 0x32, + 0x2B, 0x2C, 0x2F, 0x32, + 0x2B, 0x2E, 0x30, 0x33, + 0x2B, 0x2F, 0x31, 0x33, + 0x2B, 0x2D, 0x31, 0x33, + 0x2B, 0x2D, 0x2F, 0x33, + 0x2B, 0x33, 0x33, 0x33, + 0x2B, 0x2B, 0x33, 0x33, + 0x2B, 0x2B, 0x2B, 0x33, + 0x2B, 0x2F, 0x32, 0x36, + 0x2B, 0x30, 0x34, 0x36, + 0x2B, 0x2D, 0x34, 0x36, + 0x2B, 0x2D, 0x31, 0x36, + 0x2B, 0x36, 0x36, 0x36, + 0x2B, 0x2B, 0x36, 0x36, + 0x2B, 0x2B, 0x2B, 0x36, + 0x2B, 0x30, 0x34, 0x39, + 0x2B, 0x32, 0x36, 0x39, + 0x2B, 0x2E, 0x36, 0x39, + 0x2B, 0x2E, 0x32, 0x39, + 0x2B, 0x39, 0x39, 0x39, + 0x2B, 0x2B, 0x39, 0x39, + 0x2B, 0x2B, 0x2B, 0x39, + 0x2B, 0x31, 0x36, 0x3C, + 0x2B, 0x33, 0x38, 0x3C, + 0x2B, 0x2F, 0x38, 0x3C, + 0x2B, 0x2F, 0x34, 0x3C, + 0x2B, 0x3C, 0x3C, 0x3C, + 0x2B, 0x2B, 0x3C, 0x3C, + 0x2B, 0x2B, 0x2B, 0x3C, + 0x2B, 0x32, 0x38, 0x3F, + 0x2B, 0x35, 0x3A, 0x3F, + 0x2B, 0x30, 0x3A, 0x3F, + 0x2B, 0x30, 0x35, 0x3F, + 0x2B, 0x3F, 0x3F, 0x3F, + 0x2B, 0x2B, 0x3F, 0x3F, + 0x2B, 0x2B, 0x2B, 0x3F, + 0x2C, 0x2D, 0x2D, 0x2E, + 0x2C, 0x2D, 0x2E, 0x2F, + 0x2C, 0x2E, 0x2F, 0x30, + 0x2C, 0x2D, 0x2F, 0x30, + 0x2C, 0x2D, 0x2E, 0x30, + 0x2C, 0x2E, 0x2F, 0x31, + 0x2C, 0x2E, 0x30, 0x31, + 0x2C, 0x2D, 0x30, 0x31, + 0x2C, 0x2D, 0x2F, 0x31, + 0x2C, 0x2E, 0x30, 0x32, + 0x2C, 0x2F, 0x31, 0x32, + 0x2C, 0x2D, 0x31, 0x32, + 0x2C, 0x2D, 0x2F, 0x32, + 0x2C, 0x32, 0x32, 0x32, + 0x2C, 0x2C, 0x32, 0x32, + 0x2C, 0x2C, 0x2C, 0x32, + 0x2C, 0x2F, 0x30, 0x33, + 0x2C, 0x2F, 0x32, 0x33, + 0x2C, 0x2D, 0x32, 0x33, + 0x2C, 0x2D, 0x30, 0x33, + 0x2C, 0x2F, 0x31, 0x34, + 0x2C, 0x30, 0x32, 0x34, + 0x2C, 0x2E, 0x32, 0x34, + 0x2C, 0x2E, 0x30, 0x34, + 0x2C, 0x34, 0x34, 0x34, + 0x2C, 0x2C, 0x34, 0x34, + 0x2C, 0x2C, 0x2C, 0x34, + 0x2C, 0x30, 0x33, 0x37, + 0x2C, 0x31, 0x35, 0x37, + 0x2C, 0x2E, 0x35, 0x37, + 0x2C, 0x2E, 0x32, 0x37, + 0x2C, 0x37, 0x37, 0x37, + 0x2C, 0x2C, 0x37, 0x37, + 0x2C, 0x2C, 0x2C, 0x37, + 0x2C, 0x31, 0x35, 0x3A, + 0x2C, 0x33, 0x37, 0x3A, + 0x2C, 0x2F, 0x37, 0x3A, + 0x2C, 0x2F, 0x33, 0x3A, + 0x2C, 0x3A, 0x3A, 0x3A, + 0x2C, 0x2C, 0x3A, 0x3A, + 0x2C, 0x2C, 0x2C, 0x3A, + 0x2C, 0x32, 0x37, 0x3D, + 0x2C, 0x34, 0x39, 0x3D, + 0x2C, 0x30, 0x39, 0x3D, + 0x2C, 0x30, 0x35, 0x3D, + 0x2C, 0x3D, 0x3D, 0x3D, + 0x2C, 0x2C, 0x3D, 0x3D, + 0x2C, 0x2C, 0x2C, 0x3D, + 0x2D, 0x2E, 0x2E, 0x2F, + 0x2D, 0x2E, 0x2F, 0x30, + 0x2D, 0x2F, 0x30, 0x31, + 0x2D, 0x2E, 0x30, 0x31, + 0x2D, 0x2E, 0x2F, 0x31, + 0x2D, 0x2F, 0x30, 0x32, + 0x2D, 0x2F, 0x31, 0x32, + 0x2D, 0x2E, 0x31, 0x32, + 0x2D, 0x2E, 0x30, 0x32, + 0x2D, 0x2F, 0x31, 0x33, + 0x2D, 0x30, 0x32, 0x33, + 0x2D, 0x2E, 0x32, 0x33, + 0x2D, 0x2E, 0x30, 0x33, + 0x2D, 0x33, 0x33, 0x33, + 0x2D, 0x2D, 0x33, 0x33, + 0x2D, 0x2D, 0x2D, 0x33, + 0x2D, 0x30, 0x31, 0x34, + 0x2D, 0x30, 0x33, 0x34, + 0x2D, 0x2E, 0x33, 0x34, + 0x2D, 0x2E, 0x31, 0x34, + 0x2D, 0x30, 0x32, 0x35, + 0x2D, 0x31, 0x33, 0x35, + 0x2D, 0x2F, 0x33, 0x35, + 0x2D, 0x2F, 0x31, 0x35, + 0x2D, 0x35, 0x35, 0x35, + 0x2D, 0x2D, 0x35, 0x35, + 0x2D, 0x2D, 0x2D, 0x35, + 0x2D, 0x31, 0x34, 0x38, + 0x2D, 0x32, 0x36, 0x38, + 0x2D, 0x2F, 0x36, 0x38, + 0x2D, 0x2F, 0x33, 0x38, + 0x2D, 0x38, 0x38, 0x38, + 0x2D, 0x2D, 0x38, 0x38, + 0x2D, 0x2D, 0x2D, 0x38, + 0x2D, 0x32, 0x36, 0x3B, + 0x2D, 0x34, 0x38, 0x3B, + 0x2D, 0x30, 0x38, 0x3B, + 0x2D, 0x30, 0x34, 0x3B, + 0x2D, 0x3B, 0x3B, 0x3B, + 0x2D, 0x2D, 0x3B, 0x3B, + 0x2D, 0x2D, 0x2D, 0x3B, + 0x2D, 0x33, 0x38, 0x3E, + 0x2D, 0x35, 0x3A, 0x3E, + 0x2D, 0x31, 0x3A, 0x3E, + 0x2D, 0x31, 0x36, 0x3E, + 0x2D, 0x3E, 0x3E, 0x3E, + 0x2D, 0x2D, 0x3E, 0x3E, + 0x2D, 0x2D, 0x2D, 0x3E, + 0x2E, 0x2F, 0x2F, 0x30, + 0x2E, 0x2F, 0x30, 0x31, + 0x2E, 0x30, 0x31, 0x32, + 0x2E, 0x2F, 0x31, 0x32, + 0x2E, 0x2F, 0x30, 0x32, + 0x2E, 0x30, 0x31, 0x33, + 0x2E, 0x30, 0x32, 0x33, + 0x2E, 0x2F, 0x32, 0x33, + 0x2E, 0x2F, 0x31, 0x33, + 0x2E, 0x30, 0x32, 0x34, + 0x2E, 0x31, 0x33, 0x34, + 0x2E, 0x2F, 0x33, 0x34, + 0x2E, 0x2F, 0x31, 0x34, + 0x2E, 0x34, 0x34, 0x34, + 0x2E, 0x2E, 0x34, 0x34, + 0x2E, 0x2E, 0x2E, 0x34, + 0x2E, 0x31, 0x32, 0x35, + 0x2E, 0x31, 0x34, 0x35, + 0x2E, 0x2F, 0x34, 0x35, + 0x2E, 0x2F, 0x32, 0x35, + 0x2E, 0x31, 0x33, 0x36, + 0x2E, 0x32, 0x34, 0x36, + 0x2E, 0x30, 0x34, 0x36, + 0x2E, 0x30, 0x32, 0x36, + 0x2E, 0x36, 0x36, 0x36, + 0x2E, 0x2E, 0x36, 0x36, + 0x2E, 0x2E, 0x2E, 0x36, + 0x2E, 0x32, 0x35, 0x39, + 0x2E, 0x33, 0x37, 0x39, + 0x2E, 0x30, 0x37, 0x39, + 0x2E, 0x30, 0x34, 0x39, + 0x2E, 0x39, 0x39, 0x39, + 0x2E, 0x2E, 0x39, 0x39, + 0x2E, 0x2E, 0x2E, 0x39, + 0x2E, 0x33, 0x37, 0x3C, + 0x2E, 0x35, 0x39, 0x3C, + 0x2E, 0x31, 0x39, 0x3C, + 0x2E, 0x31, 0x35, 0x3C, + 0x2E, 0x3C, 0x3C, 0x3C, + 0x2E, 0x2E, 0x3C, 0x3C, + 0x2E, 0x2E, 0x2E, 0x3C, + 0x2E, 0x34, 0x39, 0x3F, + 0x2E, 0x36, 0x3B, 0x3F, + 0x2E, 0x32, 0x3B, 0x3F, + 0x2E, 0x32, 0x37, 0x3F, + 0x2E, 0x3F, 0x3F, 0x3F, + 0x2E, 0x2E, 0x3F, 0x3F, + 0x2E, 0x2E, 0x2E, 0x3F, + 0x2F, 0x30, 0x30, 0x31, + 0x2F, 0x30, 0x31, 0x32, + 0x2F, 0x31, 0x32, 0x33, + 0x2F, 0x30, 0x32, 0x33, + 0x2F, 0x30, 0x31, 0x33, + 0x2F, 0x31, 0x32, 0x34, + 0x2F, 0x31, 0x33, 0x34, + 0x2F, 0x30, 0x33, 0x34, + 0x2F, 0x30, 0x32, 0x34, + 0x2F, 0x31, 0x33, 0x35, + 0x2F, 0x32, 0x34, 0x35, + 0x2F, 0x30, 0x34, 0x35, + 0x2F, 0x30, 0x32, 0x35, + 0x2F, 0x35, 0x35, 0x35, + 0x2F, 0x2F, 0x35, 0x35, + 0x2F, 0x2F, 0x2F, 0x35, + 0x2F, 0x32, 0x33, 0x36, + 0x2F, 0x32, 0x35, 0x36, + 0x2F, 0x30, 0x35, 0x36, + 0x2F, 0x30, 0x33, 0x36, + 0x2F, 0x32, 0x34, 0x37, + 0x2F, 0x33, 0x35, 0x37, + 0x2F, 0x31, 0x35, 0x37, + 0x2F, 0x31, 0x33, 0x37, + 0x2F, 0x37, 0x37, 0x37, + 0x2F, 0x2F, 0x37, 0x37, + 0x2F, 0x2F, 0x2F, 0x37, + 0x2F, 0x33, 0x36, 0x3A, + 0x2F, 0x34, 0x38, 0x3A, + 0x2F, 0x31, 0x38, 0x3A, + 0x2F, 0x31, 0x35, 0x3A, + 0x2F, 0x3A, 0x3A, 0x3A, + 0x2F, 0x2F, 0x3A, 0x3A, + 0x2F, 0x2F, 0x2F, 0x3A, + 0x2F, 0x34, 0x38, 0x3D, + 0x2F, 0x36, 0x3A, 0x3D, + 0x2F, 0x32, 0x3A, 0x3D, + 0x2F, 0x32, 0x36, 0x3D, + 0x2F, 0x3D, 0x3D, 0x3D, + 0x2F, 0x2F, 0x3D, 0x3D, + 0x2F, 0x2F, 0x2F, 0x3D, + 0x30, 0x31, 0x31, 0x32, + 0x30, 0x31, 0x32, 0x33, + 0x30, 0x32, 0x33, 0x34, + 0x30, 0x31, 0x33, 0x34, + 0x30, 0x31, 0x32, 0x34, + 0x30, 0x32, 0x33, 0x35, + 0x30, 0x32, 0x34, 0x35, + 0x30, 0x31, 0x34, 0x35, + 0x30, 0x31, 0x33, 0x35, + 0x30, 0x32, 0x34, 0x36, + 0x30, 0x33, 0x35, 0x36, + 0x30, 0x31, 0x35, 0x36, + 0x30, 0x31, 0x33, 0x36, + 0x30, 0x36, 0x36, 0x36, + 0x30, 0x30, 0x36, 0x36, + 0x30, 0x30, 0x30, 0x36, + 0x30, 0x33, 0x34, 0x37, + 0x30, 0x33, 0x36, 0x37, + 0x30, 0x31, 0x36, 0x37, + 0x30, 0x31, 0x34, 0x37, + 0x30, 0x33, 0x35, 0x38, + 0x30, 0x34, 0x36, 0x38, + 0x30, 0x32, 0x36, 0x38, + 0x30, 0x32, 0x34, 0x38, + 0x30, 0x38, 0x38, 0x38, + 0x30, 0x30, 0x38, 0x38, + 0x30, 0x30, 0x30, 0x38, + 0x30, 0x34, 0x37, 0x3B, + 0x30, 0x35, 0x39, 0x3B, + 0x30, 0x32, 0x39, 0x3B, + 0x30, 0x32, 0x36, 0x3B, + 0x30, 0x3B, 0x3B, 0x3B, + 0x30, 0x30, 0x3B, 0x3B, + 0x30, 0x30, 0x30, 0x3B, + 0x30, 0x35, 0x39, 0x3E, + 0x30, 0x37, 0x3B, 0x3E, + 0x30, 0x33, 0x3B, 0x3E, + 0x30, 0x33, 0x37, 0x3E, + 0x30, 0x3E, 0x3E, 0x3E, + 0x30, 0x30, 0x3E, 0x3E, + 0x30, 0x30, 0x30, 0x3E, + 0x31, 0x32, 0x32, 0x33, + 0x31, 0x32, 0x33, 0x34, + 0x31, 0x33, 0x34, 0x35, + 0x31, 0x32, 0x34, 0x35, + 0x31, 0x32, 0x33, 0x35, + 0x31, 0x33, 0x34, 0x36, + 0x31, 0x33, 0x35, 0x36, + 0x31, 0x32, 0x35, 0x36, + 0x31, 0x32, 0x34, 0x36, + 0x31, 0x33, 0x35, 0x37, + 0x31, 0x34, 0x36, 0x37, + 0x31, 0x32, 0x36, 0x37, + 0x31, 0x32, 0x34, 0x37, + 0x31, 0x37, 0x37, 0x37, + 0x31, 0x31, 0x37, 0x37, + 0x31, 0x31, 0x31, 0x37, + 0x31, 0x34, 0x35, 0x38, + 0x31, 0x34, 0x37, 0x38, + 0x31, 0x32, 0x37, 0x38, + 0x31, 0x32, 0x35, 0x38, + 0x31, 0x34, 0x36, 0x39, + 0x31, 0x35, 0x37, 0x39, + 0x31, 0x33, 0x37, 0x39, + 0x31, 0x33, 0x35, 0x39, + 0x31, 0x39, 0x39, 0x39, + 0x31, 0x31, 0x39, 0x39, + 0x31, 0x31, 0x31, 0x39, + 0x31, 0x35, 0x38, 0x3C, + 0x31, 0x36, 0x3A, 0x3C, + 0x31, 0x33, 0x3A, 0x3C, + 0x31, 0x33, 0x37, 0x3C, + 0x31, 0x3C, 0x3C, 0x3C, + 0x31, 0x31, 0x3C, 0x3C, + 0x31, 0x31, 0x31, 0x3C, + 0x31, 0x36, 0x3A, 0x3F, + 0x31, 0x38, 0x3C, 0x3F, + 0x31, 0x34, 0x3C, 0x3F, + 0x31, 0x34, 0x38, 0x3F, + 0x31, 0x3F, 0x3F, 0x3F, + 0x31, 0x31, 0x3F, 0x3F, + 0x31, 0x31, 0x31, 0x3F, + 0x32, 0x33, 0x33, 0x34, + 0x32, 0x33, 0x34, 0x35, + 0x32, 0x34, 0x35, 0x36, + 0x32, 0x33, 0x35, 0x36, + 0x32, 0x33, 0x34, 0x36, + 0x32, 0x34, 0x35, 0x37, + 0x32, 0x34, 0x36, 0x37, + 0x32, 0x33, 0x36, 0x37, + 0x32, 0x33, 0x35, 0x37, + 0x32, 0x34, 0x36, 0x38, + 0x32, 0x35, 0x37, 0x38, + 0x32, 0x33, 0x37, 0x38, + 0x32, 0x33, 0x35, 0x38, + 0x32, 0x38, 0x38, 0x38, + 0x32, 0x32, 0x38, 0x38, + 0x32, 0x32, 0x32, 0x38, + 0x32, 0x35, 0x36, 0x39, + 0x32, 0x35, 0x38, 0x39, + 0x32, 0x33, 0x38, 0x39, + 0x32, 0x33, 0x36, 0x39, + 0x32, 0x35, 0x37, 0x3A, + 0x32, 0x36, 0x38, 0x3A, + 0x32, 0x34, 0x38, 0x3A, + 0x32, 0x34, 0x36, 0x3A, + 0x32, 0x3A, 0x3A, 0x3A, + 0x32, 0x32, 0x3A, 0x3A, + 0x32, 0x32, 0x32, 0x3A, + 0x32, 0x36, 0x39, 0x3D, + 0x32, 0x37, 0x3B, 0x3D, + 0x32, 0x34, 0x3B, 0x3D, + 0x32, 0x34, 0x38, 0x3D, + 0x32, 0x3D, 0x3D, 0x3D, + 0x32, 0x32, 0x3D, 0x3D, + 0x32, 0x32, 0x32, 0x3D, + 0x33, 0x34, 0x34, 0x35, + 0x33, 0x34, 0x35, 0x36, + 0x33, 0x35, 0x36, 0x37, + 0x33, 0x34, 0x36, 0x37, + 0x33, 0x34, 0x35, 0x37, + 0x33, 0x35, 0x36, 0x38, + 0x33, 0x35, 0x37, 0x38, + 0x33, 0x34, 0x37, 0x38, + 0x33, 0x34, 0x36, 0x38, + 0x33, 0x35, 0x37, 0x39, + 0x33, 0x36, 0x38, 0x39, + 0x33, 0x34, 0x38, 0x39, + 0x33, 0x34, 0x36, 0x39, + 0x33, 0x39, 0x39, 0x39, + 0x33, 0x33, 0x39, 0x39, + 0x33, 0x33, 0x33, 0x39, + 0x33, 0x36, 0x37, 0x3A, + 0x33, 0x36, 0x39, 0x3A, + 0x33, 0x34, 0x39, 0x3A, + 0x33, 0x34, 0x37, 0x3A, + 0x33, 0x36, 0x38, 0x3B, + 0x33, 0x37, 0x39, 0x3B, + 0x33, 0x35, 0x39, 0x3B, + 0x33, 0x35, 0x37, 0x3B, + 0x33, 0x3B, 0x3B, 0x3B, + 0x33, 0x33, 0x3B, 0x3B, + 0x33, 0x33, 0x33, 0x3B, + 0x33, 0x37, 0x3A, 0x3E, + 0x33, 0x38, 0x3C, 0x3E, + 0x33, 0x35, 0x3C, 0x3E, + 0x33, 0x35, 0x39, 0x3E, + 0x33, 0x3E, 0x3E, 0x3E, + 0x33, 0x33, 0x3E, 0x3E, + 0x33, 0x33, 0x33, 0x3E, + 0x34, 0x35, 0x35, 0x36, + 0x34, 0x35, 0x36, 0x37, + 0x34, 0x36, 0x37, 0x38, + 0x34, 0x35, 0x37, 0x38, + 0x34, 0x35, 0x36, 0x38, + 0x34, 0x36, 0x37, 0x39, + 0x34, 0x36, 0x38, 0x39, + 0x34, 0x35, 0x38, 0x39, + 0x34, 0x35, 0x37, 0x39, + 0x34, 0x36, 0x38, 0x3A, + 0x34, 0x37, 0x39, 0x3A, + 0x34, 0x35, 0x39, 0x3A, + 0x34, 0x35, 0x37, 0x3A, + 0x34, 0x3A, 0x3A, 0x3A, + 0x34, 0x34, 0x3A, 0x3A, + 0x34, 0x34, 0x34, 0x3A, + 0x34, 0x37, 0x38, 0x3B, + 0x34, 0x37, 0x3A, 0x3B, + 0x34, 0x35, 0x3A, 0x3B, + 0x34, 0x35, 0x38, 0x3B, + 0x34, 0x37, 0x39, 0x3C, + 0x34, 0x38, 0x3A, 0x3C, + 0x34, 0x36, 0x3A, 0x3C, + 0x34, 0x36, 0x38, 0x3C, + 0x34, 0x3C, 0x3C, 0x3C, + 0x34, 0x34, 0x3C, 0x3C, + 0x34, 0x34, 0x34, 0x3C, + 0x34, 0x38, 0x3B, 0x3F, + 0x34, 0x39, 0x3D, 0x3F, + 0x34, 0x36, 0x3D, 0x3F, + 0x34, 0x36, 0x3A, 0x3F, + 0x34, 0x3F, 0x3F, 0x3F, + 0x34, 0x34, 0x3F, 0x3F, + 0x34, 0x34, 0x34, 0x3F, + 0x35, 0x36, 0x36, 0x37, + 0x35, 0x36, 0x37, 0x38, + 0x35, 0x37, 0x38, 0x39, + 0x35, 0x36, 0x38, 0x39, + 0x35, 0x36, 0x37, 0x39, + 0x35, 0x37, 0x38, 0x3A, + 0x35, 0x37, 0x39, 0x3A, + 0x35, 0x36, 0x39, 0x3A, + 0x35, 0x36, 0x38, 0x3A, + 0x35, 0x37, 0x39, 0x3B, + 0x35, 0x38, 0x3A, 0x3B, + 0x35, 0x36, 0x3A, 0x3B, + 0x35, 0x36, 0x38, 0x3B, + 0x35, 0x3B, 0x3B, 0x3B, + 0x35, 0x35, 0x3B, 0x3B, + 0x35, 0x35, 0x35, 0x3B, + 0x35, 0x38, 0x39, 0x3C, + 0x35, 0x38, 0x3B, 0x3C, + 0x35, 0x36, 0x3B, 0x3C, + 0x35, 0x36, 0x39, 0x3C, + 0x35, 0x38, 0x3A, 0x3D, + 0x35, 0x39, 0x3B, 0x3D, + 0x35, 0x37, 0x3B, 0x3D, + 0x35, 0x37, 0x39, 0x3D, + 0x35, 0x3D, 0x3D, 0x3D, + 0x35, 0x35, 0x3D, 0x3D, + 0x35, 0x35, 0x35, 0x3D, + 0x36, 0x37, 0x37, 0x38, + 0x36, 0x37, 0x38, 0x39, + 0x36, 0x38, 0x39, 0x3A, + 0x36, 0x37, 0x39, 0x3A, + 0x36, 0x37, 0x38, 0x3A, + 0x36, 0x38, 0x39, 0x3B, + 0x36, 0x38, 0x3A, 0x3B, + 0x36, 0x37, 0x3A, 0x3B, + 0x36, 0x37, 0x39, 0x3B, + 0x36, 0x38, 0x3A, 0x3C, + 0x36, 0x39, 0x3B, 0x3C, + 0x36, 0x37, 0x3B, 0x3C, + 0x36, 0x37, 0x39, 0x3C, + 0x36, 0x3C, 0x3C, 0x3C, + 0x36, 0x36, 0x3C, 0x3C, + 0x36, 0x36, 0x36, 0x3C, + 0x36, 0x39, 0x3A, 0x3D, + 0x36, 0x39, 0x3C, 0x3D, + 0x36, 0x37, 0x3C, 0x3D, + 0x36, 0x37, 0x3A, 0x3D, + 0x36, 0x39, 0x3B, 0x3E, + 0x36, 0x3A, 0x3C, 0x3E, + 0x36, 0x38, 0x3C, 0x3E, + 0x36, 0x38, 0x3A, 0x3E, + 0x36, 0x3E, 0x3E, 0x3E, + 0x36, 0x36, 0x3E, 0x3E, + 0x36, 0x36, 0x36, 0x3E, + 0x37, 0x38, 0x38, 0x39, + 0x37, 0x38, 0x39, 0x3A, + 0x37, 0x39, 0x3A, 0x3B, + 0x37, 0x38, 0x3A, 0x3B, + 0x37, 0x38, 0x39, 0x3B, + 0x37, 0x39, 0x3A, 0x3C, + 0x37, 0x39, 0x3B, 0x3C, + 0x37, 0x38, 0x3B, 0x3C, + 0x37, 0x38, 0x3A, 0x3C, + 0x37, 0x39, 0x3B, 0x3D, + 0x37, 0x3A, 0x3C, 0x3D, + 0x37, 0x38, 0x3C, 0x3D, + 0x37, 0x38, 0x3A, 0x3D, + 0x37, 0x3D, 0x3D, 0x3D, + 0x37, 0x37, 0x3D, 0x3D, + 0x37, 0x37, 0x37, 0x3D, + 0x37, 0x3A, 0x3B, 0x3E, + 0x37, 0x3A, 0x3D, 0x3E, + 0x37, 0x38, 0x3D, 0x3E, + 0x37, 0x38, 0x3B, 0x3E, + 0x37, 0x3A, 0x3C, 0x3F, + 0x37, 0x3B, 0x3D, 0x3F, + 0x37, 0x39, 0x3D, 0x3F, + 0x37, 0x39, 0x3B, 0x3F, + 0x37, 0x3F, 0x3F, 0x3F, + 0x37, 0x37, 0x3F, 0x3F, + 0x37, 0x37, 0x37, 0x3F, + 0x38, 0x39, 0x39, 0x3A, + 0x38, 0x39, 0x3A, 0x3B, + 0x38, 0x3A, 0x3B, 0x3C, + 0x38, 0x39, 0x3B, 0x3C, + 0x38, 0x39, 0x3A, 0x3C, + 0x38, 0x3A, 0x3B, 0x3D, + 0x38, 0x3A, 0x3C, 0x3D, + 0x38, 0x39, 0x3C, 0x3D, + 0x38, 0x39, 0x3B, 0x3D, + 0x38, 0x3A, 0x3C, 0x3E, + 0x38, 0x3B, 0x3D, 0x3E, + 0x38, 0x39, 0x3D, 0x3E, + 0x38, 0x39, 0x3B, 0x3E, + 0x38, 0x3E, 0x3E, 0x3E, + 0x38, 0x38, 0x3E, 0x3E, + 0x38, 0x38, 0x38, 0x3E, + 0x38, 0x3B, 0x3C, 0x3F, + 0x38, 0x3B, 0x3E, 0x3F, + 0x38, 0x39, 0x3E, 0x3F, + 0x38, 0x39, 0x3C, 0x3F, + 0x39, 0x3A, 0x3A, 0x3B, + 0x39, 0x3A, 0x3B, 0x3C, + 0x39, 0x3B, 0x3C, 0x3D, + 0x39, 0x3A, 0x3C, 0x3D, + 0x39, 0x3A, 0x3B, 0x3D, + 0x39, 0x3B, 0x3C, 0x3E, + 0x39, 0x3B, 0x3D, 0x3E, + 0x39, 0x3A, 0x3D, 0x3E, + 0x39, 0x3A, 0x3C, 0x3E, + 0x39, 0x3B, 0x3D, 0x3F, + 0x39, 0x3C, 0x3E, 0x3F, + 0x39, 0x3A, 0x3E, 0x3F, + 0x39, 0x3A, 0x3C, 0x3F, + 0x39, 0x3F, 0x3F, 0x3F, + 0x39, 0x39, 0x3F, 0x3F, + 0x39, 0x39, 0x39, 0x3F, + 0x3A, 0x3B, 0x3B, 0x3C, + 0x3A, 0x3B, 0x3C, 0x3D, + 0x3A, 0x3C, 0x3D, 0x3E, + 0x3A, 0x3B, 0x3D, 0x3E, + 0x3A, 0x3B, 0x3C, 0x3E, + 0x3A, 0x3C, 0x3D, 0x3F, + 0x3A, 0x3C, 0x3E, 0x3F, + 0x3A, 0x3B, 0x3E, 0x3F, + 0x3A, 0x3B, 0x3D, 0x3F, + 0x3B, 0x3C, 0x3C, 0x3D, + 0x3B, 0x3C, 0x3D, 0x3E, + 0x3B, 0x3D, 0x3E, 0x3F, + 0x3B, 0x3C, 0x3E, 0x3F, + 0x3B, 0x3C, 0x3D, 0x3F, + 0x3C, 0x3D, 0x3D, 0x3E, + 0x3C, 0x3D, 0x3E, 0x3F, + 0x3D, 0x3E, 0x3E, 0x3F +}; diff --git a/src/libffmpeg/libavcodec/utils.c b/src/libffmpeg/libavcodec/utils.c index a45d57de0..afff2f98a 100644 --- a/src/libffmpeg/libavcodec/utils.c +++ b/src/libffmpeg/libavcodec/utils.c @@ -31,7 +31,25 @@ #include <stdarg.h> #include <limits.h> -static void avcodec_default_free_buffers(AVCodecContext *s); +const uint8_t ff_sqrt_tab[128]={ + 0, 1, 1, 1, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4, 4, 5, 5, 5, 5, 5, 5, 5, + 5, 5, 5, 5, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, + 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, + 9, 9, 9, 9,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,11,11,11,11,11,11,11 +}; + +const uint8_t ff_log2_tab[256]={ + 0,0,1,1,2,2,2,2,3,3,3,3,3,3,3,3,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4, + 5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5, + 6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6, + 6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6, + 7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7, + 7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7, + 7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7, + 7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7 +}; + +void avcodec_default_free_buffers(AVCodecContext *s); void *av_mallocz(unsigned int size) { @@ -64,7 +82,7 @@ void *av_fast_realloc(void *ptr, unsigned int *size, unsigned int min_size) if(min_size < *size) return ptr; - *size= 17*min_size/16 + 32; + *size= FFMAX(17*min_size/16 + 32, min_size); return av_realloc(ptr, *size); } @@ -83,6 +101,8 @@ void *av_mallocz_static(unsigned int size) if(ptr){ array_static =av_fast_realloc(array_static, &allocated_static, sizeof(void*)*(last_static+1)); + if(!array_static) + return NULL; array_static[last_static++] = ptr; } @@ -90,6 +110,26 @@ void *av_mallocz_static(unsigned int size) } /** + * same as above, but does realloc + */ + +void *av_realloc_static(void *ptr, unsigned int size) +{ + int i; + if(!ptr) + return av_mallocz_static(size); + /* Look for the old ptr */ + for(i = 0; i < last_static; i++) { + if(array_static[i] == ptr) { + array_static[i] = av_realloc(array_static[i], size); + return array_static[i]; + } + } + return NULL; + +} + +/** * free all static arrays and reset pointers to 0. */ void av_free_static(void) @@ -112,7 +152,7 @@ void av_freep(void *arg) } /* encoder management */ -AVCodec *first_avcodec; +AVCodec *first_avcodec = NULL; void register_avcodec(AVCodec *format) { @@ -123,6 +163,13 @@ void register_avcodec(AVCodec *format) format->next = NULL; } +void avcodec_set_dimensions(AVCodecContext *s, int width, int height){ + s->coded_width = width; + s->coded_height= height; + s->width = -((-width )>>s->lowres); + s->height= -((-height)>>s->lowres); +} + typedef struct InternalBuffer{ int last_pic_num; uint8_t *base[4]; @@ -141,6 +188,7 @@ void avcodec_align_dimensions(AVCodecContext *s, int *width, int *height){ switch(s->pix_fmt){ case PIX_FMT_YUV420P: case PIX_FMT_YUV422: + case PIX_FMT_UYVY422: case PIX_FMT_YUV422P: case PIX_FMT_YUV444P: case PIX_FMT_GRAY8: @@ -151,6 +199,7 @@ void avcodec_align_dimensions(AVCodecContext *s, int *width, int *height){ h_align= 16; break; case PIX_FMT_YUV411P: + case PIX_FMT_UYVY411: w_align=32; h_align=8; break; @@ -159,6 +208,22 @@ void avcodec_align_dimensions(AVCodecContext *s, int *width, int *height){ w_align=64; h_align=64; } + case PIX_FMT_RGB555: + if(s->codec_id == CODEC_ID_RPZA){ + w_align=4; + h_align=4; + } + case PIX_FMT_PAL8: + if(s->codec_id == CODEC_ID_SMC){ + w_align=4; + h_align=4; + } + break; + case PIX_FMT_BGR24: + if((s->codec_id == CODEC_ID_MSZH) || (s->codec_id == CODEC_ID_ZLIB)){ + w_align=4; + h_align=4; + } break; default: w_align= 1; @@ -170,16 +235,27 @@ void avcodec_align_dimensions(AVCodecContext *s, int *width, int *height){ *height= ALIGN(*height, h_align); } +int avcodec_check_dimensions(void *av_log_ctx, unsigned int w, unsigned int h){ + if((int)w>0 && (int)h>0 && (w+128)*(uint64_t)(h+128) < INT_MAX/4) + return 0; + + av_log(av_log_ctx, AV_LOG_ERROR, "picture size invalid (%ux%u)\n", w, h); + return -1; +} + int avcodec_default_get_buffer(AVCodecContext *s, AVFrame *pic){ int i; int w= s->width; int h= s->height; InternalBuffer *buf; int *picture_number; - + assert(pic->data[0]==NULL); assert(INTERNAL_BUFFER_SIZE > s->internal_buffer_count); + if(avcodec_check_dimensions(s,w,h)) + return -1; + if(s->internal_buffer==NULL){ s->internal_buffer= av_mallocz(INTERNAL_BUFFER_SIZE*sizeof(InternalBuffer)); } @@ -200,7 +276,7 @@ int avcodec_default_get_buffer(AVCodecContext *s, AVFrame *pic){ buf->last_pic_num= *picture_number; }else{ int h_chroma_shift, v_chroma_shift; - int s_align, pixel_size; + int pixel_size; avcodec_get_chroma_sub_sample(s->pix_fmt, &h_chroma_shift, &v_chroma_shift); @@ -208,6 +284,7 @@ int avcodec_default_get_buffer(AVCodecContext *s, AVFrame *pic){ case PIX_FMT_RGB555: case PIX_FMT_RGB565: case PIX_FMT_YUV422: + case PIX_FMT_UYVY422: pixel_size=2; break; case PIX_FMT_RGB24: @@ -222,11 +299,6 @@ int avcodec_default_get_buffer(AVCodecContext *s, AVFrame *pic){ } avcodec_align_dimensions(s, &w, &h); -#if defined(ARCH_POWERPC) || defined(HAVE_MMI) //FIXME some cleaner check - s_align= 16; -#else - s_align= 8; -#endif if(!(s->flags&CODEC_FLAG_EMU_EDGE)){ w+= EDGE_WIDTH*2; @@ -240,16 +312,16 @@ int avcodec_default_get_buffer(AVCodecContext *s, AVFrame *pic){ const int v_shift= i==0 ? 0 : v_chroma_shift; //FIXME next ensures that linesize= 2^x uvlinesize, thats needed because some MC code assumes it - buf->linesize[i]= ALIGN(pixel_size*w>>h_shift, s_align<<(h_chroma_shift-h_shift)); + buf->linesize[i]= ALIGN(pixel_size*w>>h_shift, STRIDE_ALIGN<<(h_chroma_shift-h_shift)); - buf->base[i]= av_mallocz((buf->linesize[i]*h>>v_shift)+16); //FIXME 16 + buf->base[i]= av_malloc((buf->linesize[i]*h>>v_shift)+16); //FIXME 16 if(buf->base[i]==NULL) return -1; memset(buf->base[i], 128, buf->linesize[i]*h>>v_shift); if(s->flags&CODEC_FLAG_EMU_EDGE) buf->data[i] = buf->base[i]; else - buf->data[i] = buf->base[i] + ALIGN((buf->linesize[i]*EDGE_WIDTH>>v_shift) + (EDGE_WIDTH>>h_shift), s_align); + buf->data[i] = buf->base[i] + ALIGN((buf->linesize[i]*EDGE_WIDTH>>v_shift) + (EDGE_WIDTH>>h_shift), STRIDE_ALIGN); } pic->age= 256*256*256*64; } @@ -358,8 +430,8 @@ void avcodec_get_context_defaults(AVCodecContext *s){ s->bit_rate_tolerance= s->bit_rate*10; s->qmin= 2; s->qmax= 31; - s->mb_qmin= 2; - s->mb_qmax= 31; + s->mb_lmin= FF_QP2LAMBDA * 2; + s->mb_lmax= FF_QP2LAMBDA * 31; s->rc_eq= "tex^qComp"; s->qcompress= 0.5; s->max_qdiff= 3; @@ -384,6 +456,9 @@ void avcodec_get_context_defaults(AVCodecContext *s){ s->lmax= FF_QP2LAMBDA * s->qmax; s->sample_aspect_ratio= (AVRational){0,1}; s->ildct_cmp= FF_CMP_VSAD; + s->profile= FF_PROFILE_UNKNOWN; + s->level= FF_LEVEL_UNKNOWN; + s->me_penalty_compensation= 256; s->intra_quant_bias= FF_DEFAULT_QUANT_BIAS; s->inter_quant_bias= FF_DEFAULT_QUANT_BIAS; @@ -409,6 +484,7 @@ void avcodec_get_frame_defaults(AVFrame *pic){ memset(pic, 0, sizeof(AVFrame)); pic->pts= AV_NOPTS_VALUE; + pic->key_frame= 1; } /** @@ -442,6 +518,17 @@ int avcodec_open(AVCodecContext *avctx, AVCodec *codec) } else { avctx->priv_data = NULL; } + + if(avctx->coded_width && avctx->coded_height) + avcodec_set_dimensions(avctx, avctx->coded_width, avctx->coded_height); + else if(avctx->width && avctx->height) + avcodec_set_dimensions(avctx, avctx->width, avctx->height); + + if((avctx->coded_width||avctx->coded_height) && avcodec_check_dimensions(avctx,avctx->coded_width,avctx->coded_height)){ + av_freep(&avctx->priv_data); + return -1; + } + ret = avctx->codec->init(avctx); if (ret < 0) { av_freep(&avctx->priv_data); @@ -453,24 +540,35 @@ int avcodec_open(AVCodecContext *avctx, AVCodec *codec) int avcodec_encode_audio(AVCodecContext *avctx, uint8_t *buf, int buf_size, const short *samples) { - int ret; - - ret = avctx->codec->encode(avctx, buf, buf_size, (void *)samples); - avctx->frame_number++; - return ret; + if(buf_size < FF_MIN_BUFFER_SIZE && 0){ + av_log(avctx, AV_LOG_ERROR, "buffer smaller then minimum size\n"); + return -1; + } + if((avctx->codec->capabilities & CODEC_CAP_DELAY) || samples){ + int ret = avctx->codec->encode(avctx, buf, buf_size, (void *)samples); + avctx->frame_number++; + return ret; + }else + return 0; } int avcodec_encode_video(AVCodecContext *avctx, uint8_t *buf, int buf_size, const AVFrame *pict) { - int ret; - - ret = avctx->codec->encode(avctx, buf, buf_size, (void *)pict); + if(buf_size < FF_MIN_BUFFER_SIZE){ + av_log(avctx, AV_LOG_ERROR, "buffer smaller then minimum size\n"); + return -1; + } + if(avcodec_check_dimensions(avctx,avctx->width,avctx->height)) + return -1; + if((avctx->codec->capabilities & CODEC_CAP_DELAY) || pict){ + int ret = avctx->codec->encode(avctx, buf, buf_size, (void *)pict); + avctx->frame_number++; + emms_c(); //needed to avoid a emms_c() call before every return; - emms_c(); //needed to avoid a emms_c() call before every return; - - avctx->frame_number++; - return ret; + return ret; + }else + return 0; } /** @@ -489,13 +587,19 @@ int avcodec_decode_video(AVCodecContext *avctx, AVFrame *picture, int ret; *got_picture_ptr= 0; - ret = avctx->codec->decode(avctx, picture, got_picture_ptr, - buf, buf_size); + if((avctx->coded_width||avctx->coded_height) && avcodec_check_dimensions(avctx,avctx->coded_width,avctx->coded_height)) + return -1; + if((avctx->codec->capabilities & CODEC_CAP_DELAY) || buf_size){ + ret = avctx->codec->decode(avctx, picture, got_picture_ptr, + buf, buf_size); - emms_c(); //needed to avoid a emms_c() call before every return; + emms_c(); //needed to avoid a emms_c() call before every return; - if (*got_picture_ptr) - avctx->frame_number++; + if (*got_picture_ptr) + avctx->frame_number++; + }else + ret= 0; + return ret; } @@ -574,18 +678,6 @@ AVCodec *avcodec_find_decoder_by_name(const char *name) return NULL; } -static AVCodec *avcodec_find(enum CodecID id) -{ - AVCodec *p; - p = first_avcodec; - while (p) { - if (p->id == id) - return p; - p = p->next; - } - return NULL; -} - void avcodec_string(char *buf, int buf_size, AVCodecContext *enc, int encode) { const char *codec_name; @@ -664,7 +756,7 @@ void avcodec_string(char *buf, int buf_size, AVCodecContext *enc, int encode) strcpy(channels_str, "5:1"); break; default: - sprintf(channels_str, "%d channels", enc->channels); + snprintf(channels_str, sizeof(channels_str), "%d channels", enc->channels); break; } if (enc->sample_rate) { @@ -698,7 +790,8 @@ void avcodec_string(char *buf, int buf_size, AVCodecContext *enc, int encode) bitrate = enc->bit_rate; break; default: - av_abort(); + snprintf(buf, buf_size, "Invalid Codec type %d", enc->codec_type); + return; } if (encode) { if (enc->flags & CODEC_FLAG_PASS1) @@ -745,7 +838,7 @@ void avcodec_flush_buffers(AVCodecContext *avctx) avctx->codec->flush(avctx); } -static void avcodec_default_free_buffers(AVCodecContext *s){ +void avcodec_default_free_buffers(AVCodecContext *s){ int i, j; if(s->internal_buffer==NULL) return; @@ -775,70 +868,70 @@ char av_get_pict_type_char(int pict_type){ } int av_reduce(int *dst_nom, int *dst_den, int64_t nom, int64_t den, int64_t max){ - int exact=1, sign=0; - int64_t gcd; - - assert(den != 0); - - if(den < 0) - return av_reduce(dst_nom, dst_den, -nom, -den, max); - - sign= nom < 0; - nom= ABS(nom); - - gcd = ff_gcd(nom, den); - nom /= gcd; - den /= gcd; - - if(nom > max || den > max){ - AVRational a0={0,1}, a1={1,0}; - exact=0; - - for(;;){ - int64_t x= nom / den; - int64_t a2n= x*a1.num + a0.num; - int64_t a2d= x*a1.den + a0.den; - - if(a2n > max || a2d > max) break; - - nom %= den; - - a0= a1; - a1= (AVRational){a2n, a2d}; - if(nom==0) break; - x= nom; nom=den; den=x; - } - nom= a1.num; - den= a1.den; + AVRational a0={0,1}, a1={1,0}; + int sign= (nom<0) ^ (den<0); + int64_t gcd= ff_gcd(ABS(nom), ABS(den)); + + nom = ABS(nom)/gcd; + den = ABS(den)/gcd; + if(nom<=max && den<=max){ + a1= (AVRational){nom, den}; + den=0; } - assert(ff_gcd(nom, den) == 1); + while(den){ + int64_t x = nom / den; + int64_t next_den= nom - den*x; + int64_t a2n= x*a1.num + a0.num; + int64_t a2d= x*a1.den + a0.den; + + if(a2n > max || a2d > max) break; + + a0= a1; + a1= (AVRational){a2n, a2d}; + nom= den; + den= next_den; + } + assert(ff_gcd(a1.num, a1.den) == 1); - *dst_nom = sign ? -nom : nom; - *dst_den = den; + *dst_nom = sign ? -a1.num : a1.num; + *dst_den = a1.den; - return exact; + return den==0; } -int64_t av_rescale(int64_t a, int64_t b, int64_t c){ - AVInteger ai, ci; +int64_t av_rescale_rnd(int64_t a, int64_t b, int64_t c, enum AVRounding rnd){ + AVInteger ai; + int64_t r=0; assert(c > 0); assert(b >=0); + assert(rnd >=0 && rnd<=5 && rnd!=4); - if(a<0) return -av_rescale(-a, b, c); + if(a<0 && a != INT64_MIN) return -av_rescale_rnd(-a, b, c, rnd ^ ((rnd>>1)&1)); + if(rnd==AV_ROUND_NEAR_INF) r= c/2; + else if(rnd&1) r= c-1; + if(b<=INT_MAX && c<=INT_MAX){ if(a<=INT_MAX) - return (a * b + c/2)/c; + return (a * b + r)/c; else - return a/c*b + (a%c*b + c/2)/c; + return a/c*b + (a%c*b + r)/c; } ai= av_mul_i(av_int2i(a), av_int2i(b)); - ci= av_int2i(c); - ai= av_add_i(ai, av_shr_i(ci,1)); + ai= av_add_i(ai, av_int2i(r)); - return av_i2int(av_div_i(ai, ci)); + return av_i2int(av_div_i(ai, av_int2i(c))); +} + +int64_t av_rescale(int64_t a, int64_t b, int64_t c){ + return av_rescale_rnd(a, b, c, AV_ROUND_NEAR_INF); +} + +int64_t ff_gcd(int64_t a, int64_t b){ + if(b) return ff_gcd(b, a%b); + else return a; } /* av_log API */ @@ -892,7 +985,7 @@ void av_log_set_callback(void (*callback)(void*, int, const char*, va_list)) av_log_callback = callback; } -#if !defined(HAVE_PTHREADS) && !defined(HAVE_W32THREADS) +#if !defined(HAVE_THREADS) int avcodec_thread_init(AVCodecContext *s, int thread_count){ return -1; } diff --git a/src/libffmpeg/libavcodec/vcr1.c b/src/libffmpeg/libavcodec/vcr1.c index 9a706af31..442ad9136 100644 --- a/src/libffmpeg/libavcodec/vcr1.c +++ b/src/libffmpeg/libavcodec/vcr1.c @@ -45,11 +45,6 @@ static int decode_frame(AVCodecContext *avctx, uint8_t *bytestream= buf; int i, x, y; - /* special case for last picture */ - if (buf_size == 0) { - return 0; - } - if(p->data[0]) avctx->release_buffer(avctx, p); @@ -156,12 +151,14 @@ static int decode_init(AVCodecContext *avctx){ return 0; } +#if 0 static int encode_init(AVCodecContext *avctx){ common_init(avctx); return 0; } +#endif AVCodec vcr1_decoder = { "vcr1", diff --git a/src/libffmpeg/libavcodec/vmdav.c b/src/libffmpeg/libavcodec/vmdav.c index c11f80af6..4305f81fd 100644 --- a/src/libffmpeg/libavcodec/vmdav.c +++ b/src/libffmpeg/libavcodec/vmdav.c @@ -494,13 +494,10 @@ memset(data, 0x00, s->block_align * 2); bytes_decoded = s->block_align * 2; } } else { - if (s->bits == 16) { - } else { - /* copy the data but convert it to signed */ - for (i = 0; i < s->block_align; i++) - data[i * 2 + 1] = buf[i] + 0x80; - bytes_decoded = s->block_align * 2; - } + /* copy the data but convert it to signed */ + for (i = 0; i < s->block_align; i++) + data[i * 2 + 1] = buf[i] + 0x80; + bytes_decoded = s->block_align * 2; } } diff --git a/src/libffmpeg/libavcodec/vp3.c b/src/libffmpeg/libavcodec/vp3.c index 5b3f1b926..659d6913b 100644 --- a/src/libffmpeg/libavcodec/vp3.c +++ b/src/libffmpeg/libavcodec/vp3.c @@ -2093,6 +2093,9 @@ static void render_fragments(Vp3DecodeContext *s, upper_motion_limit = 7 * s->current_frame.linesize[2]; lower_motion_limit = height * s->current_frame.linesize[2] + width - 8; } + + if(ABS(stride) > 2048) + return; //various tables are fixed size /* for each fragment row... */ for (y = 0; y < height; y += 8) { @@ -2216,7 +2219,143 @@ av_log(s->avctx, AV_LOG_ERROR, " help! got beefy vector! (%X, %X)\n", motion_x, } emms_c(); +} + +#define SATURATE_U8(x) ((x) < 0) ? 0 : ((x) > 255) ? 255 : x + +static void horizontal_filter(unsigned char *first_pixel, int stride, + int *bounding_values) +{ + int i; + int filter_value; + + for (i = 0; i < 8; i++, first_pixel += stride) { + filter_value = + (first_pixel[-2] * 1) - + (first_pixel[-1] * 3) + + (first_pixel[ 0] * 3) - + (first_pixel[ 1] * 1); + filter_value = bounding_values[(filter_value + 4) >> 3]; + first_pixel[-1] = SATURATE_U8(first_pixel[-1] + filter_value); + first_pixel[ 0] = SATURATE_U8(first_pixel[ 0] - filter_value); + } +} + +static void vertical_filter(unsigned char *first_pixel, int stride, + int *bounding_values) +{ + int i; + int filter_value; + + for (i = 0; i < 8; i++, first_pixel++) { + filter_value = + (first_pixel[-(2 * stride)] * 1) - + (first_pixel[-(1 * stride)] * 3) + + (first_pixel[ (0 )] * 3) - + (first_pixel[ (1 * stride)] * 1); + filter_value = bounding_values[(filter_value + 4) >> 3]; + first_pixel[-(1 * stride)] = SATURATE_U8(first_pixel[-(1 * stride)] + filter_value); + first_pixel[0] = SATURATE_U8(first_pixel[0] - filter_value); + } +} + +static void apply_loop_filter(Vp3DecodeContext *s) +{ + int x, y, plane; + int width, height; + int fragment; + int stride; + unsigned char *plane_data; + int bounding_values[256]; + int filter_limit; + + /* find the right loop limit value */ + for (x = 63; x >= 0; x--) { + if (vp31_ac_scale_factor[x] >= s->quality_index) + break; + } + filter_limit = vp31_filter_limit_values[x]; + + /* set up the bounding values */ + memset(bounding_values, 0, 256 * sizeof(int)); + for (x = 0; x < filter_limit; x++) { + bounding_values[-x - filter_limit] = -filter_limit + x; + bounding_values[-x] = -x; + bounding_values[x] = x; + bounding_values[x + filter_limit] = filter_limit - x; + } + + for (plane = 0; plane < 3; plane++) { + + if (plane == 0) { + /* Y plane parameters */ + fragment = 0; + width = s->fragment_width; + height = s->fragment_height; + stride = s->current_frame.linesize[0]; + plane_data = s->current_frame.data[0]; + } else if (plane == 1) { + /* U plane parameters */ + fragment = s->u_fragment_start; + width = s->fragment_width / 2; + height = s->fragment_height / 2; + stride = s->current_frame.linesize[1]; + plane_data = s->current_frame.data[1]; + } else { + /* V plane parameters */ + fragment = s->v_fragment_start; + width = s->fragment_width / 2; + height = s->fragment_height / 2; + stride = s->current_frame.linesize[2]; + plane_data = s->current_frame.data[2]; + } + + for (y = 0; y < height; y++) { + + for (x = 0; x < width; x++) { + + /* do not perform left edge filter for left columns frags */ + if ((x > 0) && + (s->all_fragments[fragment].coding_method != MODE_COPY)) { + horizontal_filter( + plane_data + s->all_fragments[fragment].first_pixel, + stride, bounding_values); + } + + /* do not perform top edge filter for top row fragments */ + if ((y > 0) && + (s->all_fragments[fragment].coding_method != MODE_COPY)) { + vertical_filter( + plane_data + s->all_fragments[fragment].first_pixel, + stride, bounding_values); + } + + /* do not perform right edge filter for right column + * fragments or if right fragment neighbor is also coded + * in this frame (it will be filtered in next iteration) */ + if ((x < width - 1) && + (s->all_fragments[fragment].coding_method != MODE_COPY) && + (s->all_fragments[fragment + 1].coding_method == MODE_COPY)) { + horizontal_filter( + plane_data + s->all_fragments[fragment + 1].first_pixel, + stride, bounding_values); + } + /* do not perform bottom edge filter for bottom row + * fragments or if bottom fragment neighbor is also coded + * in this frame (it will be filtered in the next row) */ + if ((y < height - 1) && + (s->all_fragments[fragment].coding_method != MODE_COPY) && + (s->all_fragments[fragment + width].coding_method == MODE_COPY)) { + vertical_filter( + plane_data + s->all_fragments[fragment + width].first_pixel, + stride, bounding_values); + } + + fragment++; + } + } + } } /* @@ -2420,27 +2559,27 @@ static int vp3_decode_init(AVCodecContext *avctx) /* DC histograms */ init_vlc(&s->dc_vlc[i], 5, 32, &dc_bias[i][0][1], 4, 2, - &dc_bias[i][0][0], 4, 2); + &dc_bias[i][0][0], 4, 2, 0); /* group 1 AC histograms */ init_vlc(&s->ac_vlc_1[i], 5, 32, &ac_bias_0[i][0][1], 4, 2, - &ac_bias_0[i][0][0], 4, 2); + &ac_bias_0[i][0][0], 4, 2, 0); /* group 2 AC histograms */ init_vlc(&s->ac_vlc_2[i], 5, 32, &ac_bias_1[i][0][1], 4, 2, - &ac_bias_1[i][0][0], 4, 2); + &ac_bias_1[i][0][0], 4, 2, 0); /* group 3 AC histograms */ init_vlc(&s->ac_vlc_3[i], 5, 32, &ac_bias_2[i][0][1], 4, 2, - &ac_bias_2[i][0][0], 4, 2); + &ac_bias_2[i][0][0], 4, 2, 0); /* group 4 AC histograms */ init_vlc(&s->ac_vlc_4[i], 5, 32, &ac_bias_3[i][0][1], 4, 2, - &ac_bias_3[i][0][0], 4, 2); + &ac_bias_3[i][0][0], 4, 2, 0); } /* build quantization zigzag table */ @@ -2598,6 +2737,7 @@ if (!s->keyframe) { reverse_dc_prediction(s, 0, s->fragment_width, s->fragment_height); render_fragments(s, 0, s->width, s->height, 0); +// apply_loop_filter(s); if ((avctx->flags & CODEC_FLAG_GRAY) == 0) { reverse_dc_prediction(s, s->u_fragment_start, @@ -2681,6 +2821,11 @@ static int theora_decode_header(AVCodecContext *avctx, GetBitContext gb) s->width = get_bits(&gb, 16) << 4; s->height = get_bits(&gb, 16) << 4; + if(avcodec_check_dimensions(avctx, s->width, s->height)){ + s->width= s->height= 0; + return -1; + } + skip_bits(&gb, 24); /* frame width */ skip_bits(&gb, 24); /* frame height */ @@ -2719,16 +2864,16 @@ static int theora_decode_comments(AVCodecContext *avctx, GetBitContext gb) { int nb_comments, i, tmp; - tmp = get_bits(&gb, 32); + tmp = get_bits_long(&gb, 32); tmp = be2me_32(tmp); while(tmp--) skip_bits(&gb, 8); - nb_comments = get_bits(&gb, 32); + nb_comments = get_bits_long(&gb, 32); nb_comments = be2me_32(nb_comments); for (i = 0; i < nb_comments; i++) { - tmp = get_bits(&gb, 32); + tmp = get_bits_long(&gb, 32); tmp = be2me_32(tmp); while(tmp--) skip_bits(&gb, 8); @@ -2774,13 +2919,20 @@ static int theora_decode_init(AVCodecContext *avctx) Vp3DecodeContext *s = avctx->priv_data; GetBitContext gb; int ptype; + uint8_t *p= avctx->extradata; + int op_bytes, i; s->theora = 1; if (!avctx->extradata_size) return -1; - init_get_bits(&gb, avctx->extradata, avctx->extradata_size); + for(i=0;i<3;i++) { + op_bytes = *(p++)<<8; + op_bytes += *(p++); + + init_get_bits(&gb, p, op_bytes); + p += op_bytes; ptype = get_bits(&gb, 8); debug_vp3("Theora headerpacket type: %x\n", ptype); @@ -2803,6 +2955,7 @@ static int theora_decode_init(AVCodecContext *avctx) theora_decode_tables(avctx, gb); break; } + } return 0; } diff --git a/src/libffmpeg/libavcodec/vp3data.h b/src/libffmpeg/libavcodec/vp3data.h index 1dd511fa0..85a233716 100644 --- a/src/libffmpeg/libavcodec/vp3data.h +++ b/src/libffmpeg/libavcodec/vp3data.h @@ -61,6 +61,17 @@ static const uint32_t vp31_ac_scale_factor[64] = 21, 19, 18, 17, 15, 13, 12, 10 }; +static const uint32_t vp31_filter_limit_values[64] = +{ 30, 25, 20, 20, 15, 15, 14, 14, + 13, 13, 12, 12, 11, 11, 10, 10, + 9, 9, 8, 8, 7, 7, 7, 7, + 6, 6, 6, 6, 5, 5, 5, 5, + 4, 4, 4, 4, 3, 3, 3, 3, + 2, 2, 2, 2, 2, 2, 2, 2, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0 +}; + /* table used to convert natural order <-> zigzag order */ static const int dezigzag_index[64] = { 0, 1, 8, 16, 9, 2, 3, 10, diff --git a/src/libffmpeg/libavcodec/vqavideo.c b/src/libffmpeg/libavcodec/vqavideo.c index ea86fb108..fb0871e18 100644 --- a/src/libffmpeg/libavcodec/vqavideo.c +++ b/src/libffmpeg/libavcodec/vqavideo.c @@ -151,6 +151,10 @@ static int vqa_decode_init(AVCodecContext *avctx) s->vqa_version = vqa_header[0]; s->width = LE_16(&vqa_header[6]); s->height = LE_16(&vqa_header[8]); + if(avcodec_check_dimensions(avctx, s->width, s->height)){ + s->width= s->height= 0; + return -1; + } s->vector_width = vqa_header[10]; s->vector_height = vqa_header[11]; s->partial_count = s->partial_countdown = vqa_header[13]; diff --git a/src/libffmpeg/libavcodec/wmadec.c b/src/libffmpeg/libavcodec/wmadec.c index cf2db1494..e0788375f 100644 --- a/src/libffmpeg/libavcodec/wmadec.c +++ b/src/libffmpeg/libavcodec/wmadec.c @@ -32,6 +32,7 @@ */ #include "avcodec.h" +#include "bitstream.h" #include "dsputil.h" /* size of blocks */ @@ -182,7 +183,7 @@ static void init_coef_vlc(VLC *vlc, const uint16_t *p; int i, l, j, level; - init_vlc(vlc, 9, n, table_bits, 1, 1, table_codes, 4, 4); + init_vlc(vlc, 9, n, table_bits, 1, 1, table_codes, 4, 4, 0); run_table = av_malloc(n * sizeof(uint16_t)); level_table = av_malloc(n * sizeof(uint16_t)); @@ -208,7 +209,8 @@ static int wma_decode_init(AVCodecContext * avctx) int i, flags1, flags2; float *window; uint8_t *extradata; - float bps1, high_freq, bps; + float bps1, high_freq; + volatile float bps; int sample_rate1; int coef_vlc_table; @@ -492,13 +494,13 @@ static int wma_decode_init(AVCodecContext * avctx) #endif init_vlc(&s->hgain_vlc, 9, sizeof(hgain_huffbits), hgain_huffbits, 1, 1, - hgain_huffcodes, 2, 2); + hgain_huffcodes, 2, 2, 0); } if (s->use_exp_vlc) { init_vlc(&s->exp_vlc, 9, sizeof(scale_huffbits), scale_huffbits, 1, 1, - scale_huffcodes, 4, 4); + scale_huffcodes, 4, 4, 0); } else { wma_lsp_to_curve_init(s, s->frame_len); } @@ -702,7 +704,12 @@ static int wma_decode_block(WMADecodeContext *s) int n, v, a, ch, code, bsize; int coef_nb_bits, total_gain, parse_exponents; float window[BLOCK_MAX_SIZE * 2]; +// XXX: FIXME!! there's a bug somewhere which makes this mandatory under altivec +#ifdef HAVE_ALTIVEC + volatile int nb_coefs[MAX_CHANNELS] __attribute__((aligned(16))); +#else int nb_coefs[MAX_CHANNELS]; +#endif float mdct_norm; #ifdef TRACE diff --git a/src/libffmpeg/libavcodec/wmv2.c b/src/libffmpeg/libavcodec/wmv2.c index 13a112d1f..b6376d80b 100644 --- a/src/libffmpeg/libavcodec/wmv2.c +++ b/src/libffmpeg/libavcodec/wmv2.c @@ -101,6 +101,7 @@ static int wmv2_encode_init(AVCodecContext *avctx){ return 0; } +#if 0 /* unused, remove? */ static int wmv2_encode_end(AVCodecContext *avctx){ if(MPV_encode_end(avctx) < 0) @@ -111,6 +112,7 @@ static int wmv2_encode_end(AVCodecContext *avctx){ return 0; } +#endif int ff_wmv2_encode_picture_header(MpegEncContext * s, int picture_number) { @@ -244,7 +246,7 @@ void ff_wmv2_encode_mb(MpegEncContext * s, if (s->pict_type == I_TYPE) { set_stat(ST_INTRA_MB); put_bits(&s->pb, - table_mb_intra[coded_cbp][1], table_mb_intra[coded_cbp][0]); + ff_msmp4_mb_i_table[coded_cbp][1], ff_msmp4_mb_i_table[coded_cbp][0]); } else { put_bits(&s->pb, wmv2_inter_table[w->cbp_table_index][cbp][1], @@ -734,7 +736,7 @@ static int wmv2_decode_mb(MpegEncContext *s, DCTELEM block[6][64]) cbp = code & 0x3f; } else { s->mb_intra = 1; - code = get_vlc2(&s->gb, mb_intra_vlc.table, MB_INTRA_VLC_BITS, 2); + code = get_vlc2(&s->gb, ff_msmp4_mb_i_vlc.table, MB_INTRA_VLC_BITS, 2); if (code < 0){ av_log(s->avctx, AV_LOG_ERROR, "II-cbp illegal at %d %d\n", s->mb_x, s->mb_y); return -1; diff --git a/src/libffmpeg/libavcodec/wnv1.c b/src/libffmpeg/libavcodec/wnv1.c new file mode 100644 index 000000000..292c7e042 --- /dev/null +++ b/src/libffmpeg/libavcodec/wnv1.c @@ -0,0 +1,144 @@ +/* + * Winnov WNV1 codec + * Copyright (c) 2005 Konstantin Shishkov + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + * + */ + +/** + * @file wnv1.c + * Winnov WNV1 codec. + */ + +#include "avcodec.h" +#include "common.h" +#include "bitstream.h" + + +typedef struct WNV1Context{ + AVCodecContext *avctx; + AVFrame pic; + + int shift; + GetBitContext gb; +} WNV1Context; + +static uint16_t code_tab[16][2]={ +{0x1FD,9}, {0xFD,8}, {0x7D,7}, {0x3D,6}, {0x1D,5}, {0x0D,4}, {0x005,3}, +{0x000,1}, +{0x004,3}, {0x0C,4}, {0x1C,5}, {0x3C,6}, {0x7C,7}, {0xFC,8}, {0x1FC,9}, {0xFF,8} +}; + +#define CODE_VLC_BITS 9 +static VLC code_vlc; + +/* returns modified base_value */ +static inline int wnv1_get_code(WNV1Context *w, int base_value) +{ + int v = get_vlc2(&w->gb, code_vlc.table, CODE_VLC_BITS, 1); + + if(v==15) + return ff_reverse[ get_bits(&w->gb, 8 - w->shift) ]; + else + return base_value + ((v - 7)<<w->shift); +} + +static int decode_frame(AVCodecContext *avctx, + void *data, int *data_size, + uint8_t *buf, int buf_size) +{ + WNV1Context * const l = avctx->priv_data; + AVFrame * const p= (AVFrame*)&l->pic; + unsigned char *Y,*U,*V; + int i, j; + int prev_y = 0, prev_u = 0, prev_v = 0; + + if(p->data[0]) + avctx->release_buffer(avctx, p); + + p->reference = 0; + if(avctx->get_buffer(avctx, p) < 0){ + av_log(avctx, AV_LOG_ERROR, "get_buffer() failed\n"); + return -1; + } + p->key_frame = 1; + + for(i=8; i<buf_size; i++) + buf[i]= ff_reverse[ buf[i] ]; //FIXME ensure that the buffer is modifyable or use a temp one + init_get_bits(&l->gb, buf+8, (buf_size-8)*8); + + if (buf[2] >> 4 == 6) + l->shift = 2; + else { + l->shift = 8 - (buf[2] >> 4); + if (l->shift > 4) { + av_log(avctx, AV_LOG_ERROR, "Unknown WNV1 frame header value %i, please upload file for study\n", buf[2] >> 4); + l->shift = 4; + } + if (l->shift < 1) { + av_log(avctx, AV_LOG_ERROR, "Unknown WNV1 frame header value %i, please upload file for study\n", buf[2] >> 4); + l->shift = 1; + } + } + + Y = p->data[0]; + U = p->data[1]; + V = p->data[2]; + for (j = 0; j < avctx->height; j++) { + for (i = 0; i < avctx->width / 2; i++) { + Y[i * 2] = wnv1_get_code(l, prev_y); + prev_u = U[i] = wnv1_get_code(l, prev_u); + prev_y = Y[(i * 2) + 1] = wnv1_get_code(l, Y[i * 2]); + prev_v = V[i] = wnv1_get_code(l, prev_v); + } + Y += p->linesize[0]; + U += p->linesize[1]; + V += p->linesize[2]; + } + + + *data_size = sizeof(AVFrame); + *(AVFrame*)data = l->pic; + + return buf_size; +} + +static int decode_init(AVCodecContext *avctx){ + WNV1Context * const l = avctx->priv_data; + + l->avctx = avctx; + avctx->pix_fmt = PIX_FMT_YUV422P; + + if(!code_vlc.table){ + init_vlc(&code_vlc, CODE_VLC_BITS, 16, + &code_tab[0][1], 4, 2, + &code_tab[0][0], 4, 2, 1); + } + + return 0; +} + +AVCodec wnv1_decoder = { + "wnv1", + CODEC_TYPE_VIDEO, + CODEC_ID_WNV1, + sizeof(WNV1Context), + decode_init, + NULL, + NULL, + decode_frame, + CODEC_CAP_DR1, +}; diff --git a/src/libffmpeg/libavcodec/ws-snd1.c b/src/libffmpeg/libavcodec/ws-snd1.c new file mode 100644 index 000000000..5ac4c61bd --- /dev/null +++ b/src/libffmpeg/libavcodec/ws-snd1.c @@ -0,0 +1,145 @@ +/* + * Westwood SNDx codecs + * Copyright (c) 2005 Konstantin Shishkov + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ +#include "avcodec.h" + +/** + * @file ws-snd.c + * Westwood SNDx codecs. + * + * Reference documents about VQA format and its audio codecs + * can be found here: + * http://www.multimedia.cx + */ + +typedef struct { +} WSSNDContext; + +static const char ws_adpcm_2bit[] = { -2, -1, 0, 1}; +static const char ws_adpcm_4bit[] = { + -9, -8, -6, -5, -4, -3, -2, -1, + 0, 1, 2, 3, 4, 5, 6, 8 }; + +#define CLIP8(a) if(a>127)a=127;if(a<-128)a=-128; + +static int ws_snd_decode_init(AVCodecContext * avctx) +{ +// WSSNDContext *c = avctx->priv_data; + + return 0; +} + +static int ws_snd_decode_frame(AVCodecContext *avctx, + void *data, int *data_size, + uint8_t *buf, int buf_size) +{ +// WSSNDContext *c = avctx->priv_data; + + int in_size, out_size; + int sample = 0; + int i; + short *samples = data; + + if (!buf_size) + return 0; + + out_size = LE_16(&buf[0]); + *data_size = out_size * 2; + in_size = LE_16(&buf[2]); + buf += 4; + + if (in_size == out_size) { + for (i = 0; i < out_size; i++) + *samples++ = (*buf++ - 0x80) << 8; + return buf_size; + } + + while (out_size > 0) { + int code; + uint8_t count; + code = (*buf) >> 6; + count = (*buf) & 0x3F; + buf++; + switch(code) { + case 0: /* ADPCM 2-bit */ + for (count++; count > 0; count--) { + code = *buf++; + sample += ws_adpcm_2bit[code & 0x3]; + CLIP8(sample); + *samples++ = sample << 8; + sample += ws_adpcm_2bit[(code >> 2) & 0x3]; + CLIP8(sample); + *samples++ = sample << 8; + sample += ws_adpcm_2bit[(code >> 4) & 0x3]; + CLIP8(sample); + *samples++ = sample << 8; + sample += ws_adpcm_2bit[(code >> 6) & 0x3]; + CLIP8(sample); + *samples++ = sample << 8; + out_size -= 4; + } + break; + case 1: /* ADPCM 4-bit */ + for (count++; count > 0; count--) { + code = *buf++; + sample += ws_adpcm_4bit[code & 0xF]; + CLIP8(sample); + *samples++ = sample << 8; + sample += ws_adpcm_4bit[code >> 4]; + CLIP8(sample); + *samples++ = sample << 8; + out_size -= 2; + } + break; + case 2: /* no compression */ + if (count & 0x20) { /* big delta */ + char t; + t = count; + t <<= 3; + sample += t >> 3; + *samples++ = sample << 8; + out_size--; + } else { /* copy */ + for (count++; count > 0; count--) { + *samples++ = (*buf++ - 0x80) << 8; + out_size--; + } + sample = buf[-1] - 0x80; + } + break; + default: /* run */ + for(count++; count > 0; count--) { + *samples++ = sample << 8; + out_size--; + } + } + } + + return buf_size; +} + +AVCodec ws_snd1_decoder = { + "ws_snd1", + CODEC_TYPE_AUDIO, + CODEC_ID_WESTWOOD_SND1, + sizeof(WSSNDContext), + ws_snd_decode_init, + NULL, + NULL, + ws_snd_decode_frame, +}; diff --git a/src/libffmpeg/libavcodec/xan.c b/src/libffmpeg/libavcodec/xan.c index f98a06bca..377a71ef2 100644 --- a/src/libffmpeg/libavcodec/xan.c +++ b/src/libffmpeg/libavcodec/xan.c @@ -132,6 +132,9 @@ static int xan_decode_init(AVCodecContext *avctx) v_b_table[i] = V_B * i; } + if(avcodec_check_dimensions(avctx, avctx->width, avctx->height)) + return -1; + s->buffer1 = av_malloc(avctx->width * avctx->height); s->buffer2 = av_malloc(avctx->width * avctx->height); if (!s->buffer1 || !s->buffer2) diff --git a/src/libffmpeg/libavcodec/xl.c b/src/libffmpeg/libavcodec/xl.c new file mode 100644 index 000000000..2ba48eb27 --- /dev/null +++ b/src/libffmpeg/libavcodec/xl.c @@ -0,0 +1,138 @@ +/* + * Miro VideoXL codec + * Copyright (c) 2004 Konstantin Shishkov + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + * + */ + +/** + * @file xl.c + * Miro VideoXL codec. + */ + +#include "avcodec.h" +#include "mpegvideo.h" + +typedef struct VideoXLContext{ + AVCodecContext *avctx; + AVFrame pic; +} VideoXLContext; + +const int xl_table[32] = { + 0, 1, 2, 3, 4, 5, 6, 7, + 8, 9, 12, 15, 20, 25, 34, 46, + 64, 82, 94, 103, 108, 113, 116, 119, + 120, 121, 122, 123, 124, 125, 126, 127}; + +static int decode_frame(AVCodecContext *avctx, + void *data, int *data_size, + uint8_t *buf, int buf_size) +{ + VideoXLContext * const a = avctx->priv_data; + AVFrame * const p= (AVFrame*)&a->pic; + uint8_t *Y, *U, *V; + int i, j; + int stride; + uint32_t val; + int y0, y1, y2, y3, c0, c1; + + if(p->data[0]) + avctx->release_buffer(avctx, p); + + p->reference = 0; + if(avctx->get_buffer(avctx, p) < 0){ + av_log(avctx, AV_LOG_ERROR, "get_buffer() failed\n"); + return -1; + } + p->pict_type= I_TYPE; + p->key_frame= 1; + + Y = a->pic.data[0]; + U = a->pic.data[1]; + V = a->pic.data[2]; + + stride = avctx->width - 4; + for (i = 0; i < avctx->height; i++) { + /* lines are stored in reversed order */ + buf += stride; + + for (j = 0; j < avctx->width; j += 4) { + /* value is stored in LE dword with word swapped */ + val = LE_32(buf); + buf -= 4; + val = ((val >> 16) & 0xFFFF) | ((val & 0xFFFF) << 16); + + if(!j) + y0 = (val & 0x1F) << 2; + else + y0 = y3 + xl_table[val & 0x1F]; + val >>= 5; + y1 = y0 + xl_table[val & 0x1F]; + val >>= 5; + y2 = y1 + xl_table[val & 0x1F]; + val >>= 6; /* align to word */ + y3 = y2 + xl_table[val & 0x1F]; + val >>= 5; + if(!j) + c0 = (val & 0x1F) << 2; + else + c0 += xl_table[val & 0x1F]; + val >>= 5; + if(!j) + c1 = (val & 0x1F) << 2; + else + c1 += xl_table[val & 0x1F]; + + Y[j + 0] = y0 << 1; + Y[j + 1] = y1 << 1; + Y[j + 2] = y2 << 1; + Y[j + 3] = y3 << 1; + + U[j >> 2] = c0 << 1; + V[j >> 2] = c1 << 1; + } + + buf += avctx->width + 4; + Y += a->pic.linesize[0]; + U += a->pic.linesize[1]; + V += a->pic.linesize[2]; + } + + *data_size = sizeof(AVFrame); + *(AVFrame*)data = a->pic; + + return buf_size; +} + +static int decode_init(AVCodecContext *avctx){ +// VideoXLContext * const a = avctx->priv_data; + + avctx->pix_fmt= PIX_FMT_YUV411P; + + return 0; +} + +AVCodec xl_decoder = { + "xl", + CODEC_TYPE_VIDEO, + CODEC_ID_VIXL, + sizeof(VideoXLContext), + decode_init, + NULL, + NULL, + decode_frame, + CODEC_CAP_DR1, +}; |