diff options
75 files changed, 6816 insertions, 3345 deletions
diff --git a/src/libffmpeg/libavcodec/4xm.c b/src/libffmpeg/libavcodec/4xm.c index 3ca2338d2..a986f151e 100644 --- a/src/libffmpeg/libavcodec/4xm.c +++ b/src/libffmpeg/libavcodec/4xm.c @@ -606,7 +606,7 @@ static int decode_frame(AVCodecContext *avctx, int i, frame_4cc, frame_size; frame_4cc= get32(buf); - if(buf_size != get32(buf+4)+8){ + if(buf_size != get32(buf+4)+8 || buf_size < 20){ av_log(f->avctx, AV_LOG_ERROR, "size mismatch %d %d\n", buf_size, get32(buf+4)); } @@ -634,6 +634,10 @@ static int decode_frame(AVCodecContext *avctx, cfrm= &f->cfrm[i]; cfrm->data= av_fast_realloc(cfrm->data, &cfrm->allocated_size, cfrm->size + data_size + FF_INPUT_BUFFER_PADDING_SIZE); + if(!cfrm->data){ //explicit check needed as memcpy below might not catch a NULL + av_log(f->avctx, AV_LOG_ERROR, "realloc falure"); + return -1; + } memcpy(cfrm->data + cfrm->size, buf+20, data_size); cfrm->size += data_size; diff --git a/src/libffmpeg/libavcodec/Makefile.am b/src/libffmpeg/libavcodec/Makefile.am index 687f37f3e..d94646f59 100644 --- a/src/libffmpeg/libavcodec/Makefile.am +++ b/src/libffmpeg/libavcodec/Makefile.am @@ -9,7 +9,7 @@ EXTRA_DIST = motion_est_template.c \ # we need to compile everything in debug mode, including the encoders, # otherwise we get unresolved symbols, because some unsatisfied function calls # are not optimized away with debug optimization -AM_CFLAGS = `test "$(CFLAGS)" = "$(DEBUG_CFLAGS)" && echo -DCONFIG_ENCODERS` -fno-strict-aliasing +AM_CFLAGS = `test "$(CFLAGS)" = "$(DEBUG_CFLAGS)" && echo -DCONFIG_ENCODERS` -fno-strict-aliasing -DCONFIG_VC1_DECODER AM_CPPFLAGS = $(ZLIB_CPPFLAGS) $(LIBFFMPEG_CPPFLAGS) \ -I$(top_srcdir)/src/libffmpeg/libavutil ASFLAGS = @@ -23,9 +23,13 @@ libavcodec_la_SOURCES = \ adpcm.c \ alac.c \ asv1.c \ + avs.c \ bitstream.c \ cabac.c \ + cavs.c \ cinepak.c \ + cook.c \ + cscd.c \ cyuv.c \ dpcm.c \ dsputil.c \ @@ -34,6 +38,7 @@ libavcodec_la_SOURCES = \ eval.c \ faandct.c \ flac.c \ + flashsv.c \ flicvideo.c \ fraps.c \ fft.c \ @@ -53,12 +58,15 @@ libavcodec_la_SOURCES = \ jfdctfst.c \ jfdctint.c \ jrevdct.c \ + kmvc.c \ lcl.c \ loco.c \ + lzo.c \ mdct.c \ mace.c \ mem.c \ mjpeg.c \ + mmvideo.c \ motion_est.c \ mpeg12.c \ mpegaudiodec.c \ @@ -66,6 +74,7 @@ libavcodec_la_SOURCES = \ msmpeg4.c \ msrle.c \ msvideo1.c \ + nuv.c \ parser.c \ pcm.c \ qdm2.c \ @@ -79,17 +88,23 @@ libavcodec_la_SOURCES = \ resample2.c \ roqvideo.c \ rpza.c \ + rtjpeg.c \ rv10.c \ shorten.c \ simple_idct.c \ + smacker.c \ smc.c \ snow.c \ svq1.c \ tscc.c \ truemotion1.c \ truemotion2.c \ + truespeech.c \ + tta.c \ ulti.c \ utils.c \ + vc1.c \ + vc1dsp.c \ vcr1.c \ vmdav.c \ vorbis.c \ @@ -99,7 +114,8 @@ libavcodec_la_SOURCES = \ wmadec.c \ wnv1.c \ xan.c \ - xl.c + xl.c \ + zmbv.c libavcodec_la_LDFLAGS = \ $(top_builddir)/src/libffmpeg/libavcodec/armv4l/libavcodec_armv4l.la \ @@ -114,6 +130,8 @@ noinst_HEADERS = \ avcodec.h \ bitstream.h \ cabac.h \ + cavsdata.h \ + cookdata.h \ dsputil.h \ dvdata.h \ faandct.h \ @@ -142,7 +160,10 @@ noinst_HEADERS = \ svq1_cb.h \ svq1_vlc.h \ truemotion1data.h \ + truespeech_data.h \ ulti_cb.h \ vorbis.h \ + vc1acdata.h \ + vc1data.h \ vp3data.h \ wmadata.h diff --git a/src/libffmpeg/libavcodec/adpcm.c b/src/libffmpeg/libavcodec/adpcm.c index ed3106aa0..796cd267c 100644 --- a/src/libffmpeg/libavcodec/adpcm.c +++ b/src/libffmpeg/libavcodec/adpcm.c @@ -203,49 +203,11 @@ static int adpcm_encode_close(AVCodecContext *avctx) static inline unsigned char adpcm_ima_compress_sample(ADPCMChannelStatus *c, short sample) { - int step_index; - unsigned char nibble; - - int sign = 0; /* sign bit of the nibble (MSB) */ - int delta, predicted_delta; - - delta = sample - c->prev_sample; - - if (delta < 0) { - sign = 1; - delta = -delta; - } - - step_index = c->step_index; - - /* nibble = 4 * delta / step_table[step_index]; */ - nibble = (delta << 2) / step_table[step_index]; - - if (nibble > 7) - nibble = 7; - - step_index += index_table[nibble]; - if (step_index < 0) - step_index = 0; - if (step_index > 88) - step_index = 88; - - /* what the decoder will find */ - predicted_delta = ((step_table[step_index] * nibble) / 4) + (step_table[step_index] / 8); - - if (sign) - c->prev_sample -= predicted_delta; - else - c->prev_sample += predicted_delta; - + int delta = sample - c->prev_sample; + int nibble = FFMIN(7, abs(delta)*4/step_table[c->step_index]) + (delta<0)*8; + c->prev_sample = c->prev_sample + ((step_table[c->step_index] * yamaha_difflookup[nibble]) / 8); CLAMP_TO_SHORT(c->prev_sample); - - - nibble += sign << 3; /* sign * 8 */ - - /* save back */ - c->step_index = step_index; - + c->step_index = clip(c->step_index + index_table[nibble], 0, 88); return nibble; } @@ -276,27 +238,194 @@ static inline unsigned char adpcm_ms_compress_sample(ADPCMChannelStatus *c, shor static inline unsigned char adpcm_yamaha_compress_sample(ADPCMChannelStatus *c, short sample) { - int i1 = 0, j1; + int nibble, delta; if(!c->step) { c->predictor = 0; c->step = 127; } - j1 = sample - c->predictor; - j1 = (j1 * 8) / c->step; - i1 = abs(j1) / 2; - if (i1 > 7) - i1 = 7; - if (j1 < 0) - i1 += 8; + delta = sample - c->predictor; + + nibble = FFMIN(7, abs(delta)*4/c->step) + (delta<0)*8; - c->predictor = c->predictor + ((c->step * yamaha_difflookup[i1]) / 8); + c->predictor = c->predictor + ((c->step * yamaha_difflookup[nibble]) / 8); CLAMP_TO_SHORT(c->predictor); - c->step = (c->step * yamaha_indexscale[i1]) >> 8; + c->step = (c->step * yamaha_indexscale[nibble]) >> 8; c->step = clip(c->step, 127, 24567); - return i1; + return nibble; +} + +typedef struct TrellisPath { + int nibble; + int prev; +} TrellisPath; + +typedef struct TrellisNode { + uint32_t ssd; + int path; + int sample1; + int sample2; + int step; +} TrellisNode; + +static void adpcm_compress_trellis(AVCodecContext *avctx, const short *samples, + uint8_t *dst, ADPCMChannelStatus *c, int n) +{ +#define FREEZE_INTERVAL 128 + //FIXME 6% faster if frontier is a compile-time constant + const int frontier = 1 << avctx->trellis; + const int stride = avctx->channels; + const int version = avctx->codec->id; + const int max_paths = frontier*FREEZE_INTERVAL; + TrellisPath paths[max_paths], *p; + TrellisNode node_buf[2][frontier]; + TrellisNode *nodep_buf[2][frontier]; + TrellisNode **nodes = nodep_buf[0]; // nodes[] is always sorted by .ssd + TrellisNode **nodes_next = nodep_buf[1]; + int pathn = 0, froze = -1, i, j, k; + + assert(!(max_paths&(max_paths-1))); + + memset(nodep_buf, 0, sizeof(nodep_buf)); + nodes[0] = &node_buf[1][0]; + nodes[0]->ssd = 0; + nodes[0]->path = 0; + nodes[0]->step = c->step_index; + nodes[0]->sample1 = c->sample1; + nodes[0]->sample2 = c->sample2; + if(version == CODEC_ID_ADPCM_IMA_WAV) + nodes[0]->sample1 = c->prev_sample; + if(version == CODEC_ID_ADPCM_MS) + nodes[0]->step = c->idelta; + if(version == CODEC_ID_ADPCM_YAMAHA) { + if(c->step == 0) { + nodes[0]->step = 127; + nodes[0]->sample1 = 0; + } else { + nodes[0]->step = c->step; + nodes[0]->sample1 = c->predictor; + } + } + + for(i=0; i<n; i++) { + TrellisNode *t = node_buf[i&1]; + TrellisNode **u; + int sample = samples[i*stride]; + memset(nodes_next, 0, frontier*sizeof(TrellisNode*)); + for(j=0; j<frontier && nodes[j]; j++) { + // higher j have higher ssd already, so they're unlikely to use a suboptimal next sample too + const int range = (j < frontier/2) ? 1 : 0; + const int step = nodes[j]->step; + int nidx; + if(version == CODEC_ID_ADPCM_MS) { + const int predictor = ((nodes[j]->sample1 * c->coeff1) + (nodes[j]->sample2 * c->coeff2)) / 256; + const int div = (sample - predictor) / step; + const int nmin = clip(div-range, -8, 6); + const int nmax = clip(div+range, -7, 7); + for(nidx=nmin; nidx<=nmax; nidx++) { + const int nibble = nidx & 0xf; + int dec_sample = predictor + nidx * step; +#define STORE_NODE(NAME, STEP_INDEX)\ + int d;\ + uint32_t ssd;\ + CLAMP_TO_SHORT(dec_sample);\ + d = sample - dec_sample;\ + ssd = nodes[j]->ssd + d*d;\ + if(nodes_next[frontier-1] && ssd >= nodes_next[frontier-1]->ssd)\ + continue;\ + /* Collapse any two states with the same previous sample value. \ + * One could also distinguish states by step and by 2nd to last + * sample, but the effects of that are negligible. */\ + for(k=0; k<frontier && nodes_next[k]; k++) {\ + if(dec_sample == nodes_next[k]->sample1) {\ + assert(ssd >= nodes_next[k]->ssd);\ + goto next_##NAME;\ + }\ + }\ + for(k=0; k<frontier; k++) {\ + if(!nodes_next[k] || ssd < nodes_next[k]->ssd) {\ + TrellisNode *u = nodes_next[frontier-1];\ + if(!u) {\ + assert(pathn < max_paths);\ + u = t++;\ + u->path = pathn++;\ + }\ + u->ssd = ssd;\ + u->step = STEP_INDEX;\ + u->sample2 = nodes[j]->sample1;\ + u->sample1 = dec_sample;\ + paths[u->path].nibble = nibble;\ + paths[u->path].prev = nodes[j]->path;\ + memmove(&nodes_next[k+1], &nodes_next[k], (frontier-k-1)*sizeof(TrellisNode*));\ + nodes_next[k] = u;\ + break;\ + }\ + }\ + next_##NAME:; + STORE_NODE(ms, FFMAX(16, (AdaptationTable[nibble] * step) >> 8)); + } + } else if(version == CODEC_ID_ADPCM_IMA_WAV) { +#define LOOP_NODES(NAME, STEP_TABLE, STEP_INDEX)\ + const int predictor = nodes[j]->sample1;\ + const int div = (sample - predictor) * 4 / STEP_TABLE;\ + int nmin = clip(div-range, -7, 6);\ + int nmax = clip(div+range, -6, 7);\ + if(nmin<=0) nmin--; /* distinguish -0 from +0 */\ + if(nmax<0) nmax--;\ + for(nidx=nmin; nidx<=nmax; nidx++) {\ + const int nibble = nidx<0 ? 7-nidx : nidx;\ + int dec_sample = predictor + (STEP_TABLE * yamaha_difflookup[nibble]) / 8;\ + STORE_NODE(NAME, STEP_INDEX);\ + } + LOOP_NODES(ima, step_table[step], clip(step + index_table[nibble], 0, 88)); + } else { //CODEC_ID_ADPCM_YAMAHA + LOOP_NODES(yamaha, step, clip((step * yamaha_indexscale[nibble]) >> 8, 127, 24567)); +#undef LOOP_NODES +#undef STORE_NODE + } + } + + u = nodes; + nodes = nodes_next; + nodes_next = u; + + // prevent overflow + if(nodes[0]->ssd > (1<<28)) { + for(j=1; j<frontier && nodes[j]; j++) + nodes[j]->ssd -= nodes[0]->ssd; + nodes[0]->ssd = 0; + } + + // merge old paths to save memory + if(i == froze + FREEZE_INTERVAL) { + p = &paths[nodes[0]->path]; + for(k=i; k>froze; k--) { + dst[k] = p->nibble; + p = &paths[p->prev]; + } + froze = i; + pathn = 0; + // other nodes might use paths that don't coincide with the frozen one. + // checking which nodes do so is too slow, so just kill them all. + // this also slightly improves quality, but I don't know why. + memset(nodes+1, 0, (frontier-1)*sizeof(TrellisNode*)); + } + } + + p = &paths[nodes[0]->path]; + for(i=n-1; i>froze; i--) { + dst[i] = p->nibble; + p = &paths[p->prev]; + } + + c->predictor = nodes[0]->sample1; + c->sample1 = nodes[0]->sample1; + c->sample2 = nodes[0]->sample2; + c->step_index = nodes[0]->step; + c->step = nodes[0]->step; + c->idelta = nodes[0]->step; } static int adpcm_encode_frame(AVCodecContext *avctx, @@ -335,6 +464,24 @@ static int adpcm_encode_frame(AVCodecContext *avctx, } /* stereo: 4 bytes (8 samples) for left, 4 bytes for right, 4 bytes left, ... */ + if(avctx->trellis > 0) { + uint8_t buf[2][n*8]; + adpcm_compress_trellis(avctx, samples, buf[0], &c->status[0], n*8); + if(avctx->channels == 2) + adpcm_compress_trellis(avctx, samples+1, buf[1], &c->status[1], n*8); + for(i=0; i<n; i++) { + *dst++ = buf[0][8*i+0] | (buf[0][8*i+1] << 4); + *dst++ = buf[0][8*i+2] | (buf[0][8*i+3] << 4); + *dst++ = buf[0][8*i+4] | (buf[0][8*i+5] << 4); + *dst++ = buf[0][8*i+6] | (buf[0][8*i+7] << 4); + if (avctx->channels == 2) { + *dst++ = buf[1][8*i+0] | (buf[1][8*i+1] << 4); + *dst++ = buf[1][8*i+2] | (buf[1][8*i+3] << 4); + *dst++ = buf[1][8*i+4] | (buf[1][8*i+5] << 4); + *dst++ = buf[1][8*i+6] | (buf[1][8*i+7] << 4); + } + } + } else for (; n>0; n--) { *dst = adpcm_ima_compress_sample(&c->status[0], samples[0]) & 0x0F; *dst |= (adpcm_ima_compress_sample(&c->status[0], samples[avctx->channels]) << 4) & 0xF0; @@ -394,6 +541,21 @@ static int adpcm_encode_frame(AVCodecContext *avctx, *dst++ = c->status[i].sample2 >> 8; } + if(avctx->trellis > 0) { + int n = avctx->block_align - 7*avctx->channels; + uint8_t buf[2][n]; + if(avctx->channels == 1) { + n *= 2; + adpcm_compress_trellis(avctx, samples, buf[0], &c->status[0], n); + for(i=0; i<n; i+=2) + *dst++ = (buf[0][i] << 4) | buf[0][i+1]; + } else { + adpcm_compress_trellis(avctx, samples, buf[0], &c->status[0], n); + adpcm_compress_trellis(avctx, samples+1, buf[1], &c->status[1], n); + for(i=0; i<n; i++) + *dst++ = (buf[0][i] << 4) | buf[1][i]; + } + } else for(i=7*avctx->channels; i<avctx->block_align; i++) { int nibble; nibble = adpcm_ms_compress_sample(&c->status[ 0], *samples++)<<4; @@ -403,6 +565,20 @@ static int adpcm_encode_frame(AVCodecContext *avctx, break; case CODEC_ID_ADPCM_YAMAHA: n = avctx->frame_size / 2; + if(avctx->trellis > 0) { + uint8_t buf[2][n*2]; + n *= 2; + if(avctx->channels == 1) { + adpcm_compress_trellis(avctx, samples, buf[0], &c->status[0], n); + for(i=0; i<n; i+=2) + *dst++ = buf[0][i] | (buf[0][i+1] << 4); + } else { + adpcm_compress_trellis(avctx, samples, buf[0], &c->status[0], n); + adpcm_compress_trellis(avctx, samples+1, buf[1], &c->status[1], n); + for(i=0; i<n; i++) + *dst++ = buf[0][i] | (buf[1][i] << 4); + } + } else for (; n>0; n--) { for(i = 0; i < avctx->channels; i++) { int nibble; @@ -514,6 +690,34 @@ static inline short adpcm_ct_expand_nibble(ADPCMChannelStatus *c, char nibble) return (short)predictor; } +static inline short adpcm_sbpro_expand_nibble(ADPCMChannelStatus *c, char nibble, int size, int shift) +{ + int sign, delta, diff; + + sign = nibble & (1<<(size-1)); + delta = nibble & ((1<<(size-1))-1); + diff = delta << (7 + c->step + shift); + + if (sign) + c->predictor -= diff; + else + c->predictor += diff; + + /* clamp result */ + if (c->predictor > 16256) + c->predictor = 16256; + else if (c->predictor < -16384) + c->predictor = -16384; + + /* calculate new step */ + if (delta >= (2*size - 3) && c->step < 3) + c->step++; + else if (delta == 0 && c->step > 0) + c->step--; + + return (short) c->predictor; +} + static inline short adpcm_yamaha_expand_nibble(ADPCMChannelStatus *c, unsigned char nibble) { if(!c->step) { @@ -644,7 +848,7 @@ static int adpcm_decode_frame(AVCodecContext *avctx, samples = data; src = buf; - st = avctx->channels == 2; + st = avctx->channels == 2 ? 1 : 0; switch(avctx->codec->id) { case CODEC_ID_ADPCM_IMA_QT: @@ -666,8 +870,10 @@ static int adpcm_decode_frame(AVCodecContext *avctx, cs->step_index = (*src++) & 0x7F; - if (cs->step_index > 88) av_log(avctx, AV_LOG_ERROR, "ERROR: step_index = %i\n", cs->step_index); - if (cs->step_index > 88) cs->step_index = 88; + if (cs->step_index > 88){ + av_log(avctx, AV_LOG_ERROR, "ERROR: step_index = %i\n", cs->step_index); + cs->step_index = 88; + } cs->step = step_table[cs->step_index]; @@ -693,35 +899,32 @@ static int adpcm_decode_frame(AVCodecContext *avctx, if (avctx->block_align != 0 && buf_size > avctx->block_align) buf_size = avctx->block_align; +// samples_per_block= (block_align-4*chanels)*8 / (bits_per_sample * chanels) + 1; + for(i=0; i<avctx->channels; i++){ cs = &(c->status[i]); - cs->predictor = *src++; - cs->predictor |= (*src++) << 8; - if(cs->predictor & 0x8000) - cs->predictor -= 0x10000; - CLAMP_TO_SHORT(cs->predictor); + cs->predictor = (int16_t)(src[0] + (src[1]<<8)); + src+=2; // XXX: is this correct ??: *samples++ = cs->predictor; cs->step_index = *src++; - if (cs->step_index < 0) cs->step_index = 0; - if (cs->step_index > 88) cs->step_index = 88; - if (*src++) av_log(avctx, AV_LOG_ERROR, "unused byte should be null !!\n"); /* unused */ + if (cs->step_index > 88){ + av_log(avctx, AV_LOG_ERROR, "ERROR: step_index = %i\n", cs->step_index); + cs->step_index = 88; + } + if (*src++) av_log(avctx, AV_LOG_ERROR, "unused byte should be null but is %d!!\n", src[-1]); /* unused */ } - for(m=4; src < (buf + buf_size);) { - *samples++ = adpcm_ima_expand_nibble(&c->status[0], src[0] & 0x0F, 3); - if (st) - *samples++ = adpcm_ima_expand_nibble(&c->status[1], src[4] & 0x0F, 3); - *samples++ = adpcm_ima_expand_nibble(&c->status[0], (src[0] >> 4) & 0x0F, 3); - if (st) { - *samples++ = adpcm_ima_expand_nibble(&c->status[1], (src[4] >> 4) & 0x0F, 3); - if (!--m) { - m=4; - src+=4; - } + while(src < buf + buf_size){ + for(m=0; m<4; m++){ + for(i=0; i<=st; i++) + *samples++ = adpcm_ima_expand_nibble(&c->status[i], src[4*i] & 0x0F, 3); + for(i=0; i<=st; i++) + *samples++ = adpcm_ima_expand_nibble(&c->status[i], src[4*i] >> 4 , 3); + src++; } - src++; + src += 4*st; } break; case CODEC_ID_ADPCM_4XM: @@ -973,6 +1176,48 @@ static int adpcm_decode_frame(AVCodecContext *avctx, src++; } break; + case CODEC_ID_ADPCM_SBPRO_4: + case CODEC_ID_ADPCM_SBPRO_3: + case CODEC_ID_ADPCM_SBPRO_2: + if (!c->status[0].step_index) { + /* the first byte is a raw sample */ + *samples++ = 128 * (*src++ - 0x80); + if (st) + *samples++ = 128 * (*src++ - 0x80); + c->status[0].step_index = 1; + } + if (avctx->codec->id == CODEC_ID_ADPCM_SBPRO_4) { + while (src < buf + buf_size) { + *samples++ = adpcm_sbpro_expand_nibble(&c->status[0], + (src[0] >> 4) & 0x0F, 4, 0); + *samples++ = adpcm_sbpro_expand_nibble(&c->status[st], + src[0] & 0x0F, 4, 0); + src++; + } + } else if (avctx->codec->id == CODEC_ID_ADPCM_SBPRO_3) { + while (src < buf + buf_size) { + *samples++ = adpcm_sbpro_expand_nibble(&c->status[0], + (src[0] >> 5) & 0x07, 3, 0); + *samples++ = adpcm_sbpro_expand_nibble(&c->status[0], + (src[0] >> 2) & 0x07, 3, 0); + *samples++ = adpcm_sbpro_expand_nibble(&c->status[0], + src[0] & 0x03, 2, 0); + src++; + } + } else { + while (src < buf + buf_size) { + *samples++ = adpcm_sbpro_expand_nibble(&c->status[0], + (src[0] >> 6) & 0x03, 2, 2); + *samples++ = adpcm_sbpro_expand_nibble(&c->status[st], + (src[0] >> 4) & 0x03, 2, 2); + *samples++ = adpcm_sbpro_expand_nibble(&c->status[0], + (src[0] >> 2) & 0x03, 2, 2); + *samples++ = adpcm_sbpro_expand_nibble(&c->status[st], + src[0] & 0x03, 2, 2); + src++; + } + } + break; case CODEC_ID_ADPCM_SWF: { GetBitContext gb; @@ -1112,10 +1357,12 @@ ADPCM_CODEC(CODEC_ID_ADPCM_IMA_SMJPEG, adpcm_ima_smjpeg); ADPCM_CODEC(CODEC_ID_ADPCM_MS, adpcm_ms); ADPCM_CODEC(CODEC_ID_ADPCM_4XM, adpcm_4xm); ADPCM_CODEC(CODEC_ID_ADPCM_XA, adpcm_xa); -ADPCM_CODEC(CODEC_ID_ADPCM_ADX, adpcm_adx); ADPCM_CODEC(CODEC_ID_ADPCM_EA, adpcm_ea); ADPCM_CODEC(CODEC_ID_ADPCM_CT, adpcm_ct); ADPCM_CODEC(CODEC_ID_ADPCM_SWF, adpcm_swf); ADPCM_CODEC(CODEC_ID_ADPCM_YAMAHA, adpcm_yamaha); +ADPCM_CODEC(CODEC_ID_ADPCM_SBPRO_4, adpcm_sbpro_4); +ADPCM_CODEC(CODEC_ID_ADPCM_SBPRO_3, adpcm_sbpro_3); +ADPCM_CODEC(CODEC_ID_ADPCM_SBPRO_2, adpcm_sbpro_2); #undef ADPCM_CODEC diff --git a/src/libffmpeg/libavcodec/adx.c b/src/libffmpeg/libavcodec/adx.c index c841e4eb8..c8c785590 100644 --- a/src/libffmpeg/libavcodec/adx.c +++ b/src/libffmpeg/libavcodec/adx.c @@ -267,7 +267,7 @@ static uint32_t read_long(const unsigned char *p) return (p[0]<<24)|(p[1]<<16)|(p[2]<<8)|p[3]; } -int is_adx(const unsigned char *buf,size_t bufsize) +static int is_adx(const unsigned char *buf,size_t bufsize) { int offset; @@ -385,8 +385,8 @@ static int adx_decode_frame(AVCodecContext *avctx, } #ifdef CONFIG_ENCODERS -AVCodec adx_adpcm_encoder = { - "adx_adpcm", +AVCodec adpcm_adx_encoder = { + "adpcm_adx", CODEC_TYPE_AUDIO, CODEC_ID_ADPCM_ADX, sizeof(ADXContext), @@ -397,8 +397,8 @@ AVCodec adx_adpcm_encoder = { }; #endif //CONFIG_ENCODERS -AVCodec adx_adpcm_decoder = { - "adx_adpcm", +AVCodec adpcm_adx_decoder = { + "adpcm_adx", CODEC_TYPE_AUDIO, CODEC_ID_ADPCM_ADX, sizeof(ADXContext), diff --git a/src/libffmpeg/libavcodec/alac.c b/src/libffmpeg/libavcodec/alac.c index 21457ab23..8bd75e5d9 100644 --- a/src/libffmpeg/libavcodec/alac.c +++ b/src/libffmpeg/libavcodec/alac.c @@ -32,6 +32,22 @@ * bytes 0-3 atom size (0x24), big-endian * bytes 4-7 atom type ('alac', not the 'alac' tag from start of stsd) * bytes 8-35 data bytes needed by decoder + * + * Extradata: + * 32bit size + * 32bit tag (=alac) + * 32bit zero? + * 32bit max sample per frame + * 8bit ?? (zero?) + * 8bit sample size + * 8bit history mult + * 8bit initial history + * 8bit kmodifier + * 8bit channels? + * 16bit ?? + * 32bit max coded frame size + * 32bit bitrate? + * 32bit samplerate */ @@ -84,7 +100,7 @@ static void allocate_buffers(ALACContext *alac) alac->outputsamples_buffer_b = av_malloc(alac->setinfo_max_samples_per_frame * 4); } -static void alac_set_info(ALACContext *alac) +static int alac_set_info(ALACContext *alac) { unsigned char *ptr = alac->avctx->extradata; @@ -92,6 +108,10 @@ static void alac_set_info(ALACContext *alac) ptr += 4; /* alac */ ptr += 4; /* 0 ? */ + if(BE_32(ptr) >= UINT_MAX/4){ + av_log(alac->avctx, AV_LOG_ERROR, "setinfo_max_samples_per_frame too large\n"); + return -1; + } alac->setinfo_max_samples_per_frame = BE_32(ptr); /* buffer size / 2 ? */ ptr += 4; alac->setinfo_7a = *ptr++; @@ -99,17 +119,19 @@ static void alac_set_info(ALACContext *alac) alac->setinfo_rice_historymult = *ptr++; alac->setinfo_rice_initialhistory = *ptr++; alac->setinfo_rice_kmodifier = *ptr++; - alac->setinfo_7f = *ptr++; + alac->setinfo_7f = *ptr++; // channels? alac->setinfo_80 = BE_16(ptr); ptr += 2; - alac->setinfo_82 = BE_32(ptr); + alac->setinfo_82 = BE_32(ptr); // max coded frame size ptr += 4; - alac->setinfo_86 = BE_32(ptr); + alac->setinfo_86 = BE_32(ptr); // bitrate ? ptr += 4; - alac->setinfo_8a_rate = BE_32(ptr); + alac->setinfo_8a_rate = BE_32(ptr); // samplerate ptr += 4; allocate_buffers(alac); + + return 0; } /* hideously inefficient. could use a bitmask search, @@ -385,7 +407,7 @@ static void predictor_decompress_fir_adapt(int32_t *error_buffer, } } -void deinterlace_16(int32_t *buffer_a, int32_t *buffer_b, +static void deinterlace_16(int32_t *buffer_a, int32_t *buffer_b, int16_t *buffer_out, int numchannels, int numsamples, uint8_t interlacing_shift, @@ -444,7 +466,7 @@ static int alac_decode_frame(AVCodecContext *avctx, /* initialize from the extradata */ if (!alac->context_initialized) { if (alac->avctx->extradata_size != ALAC_EXTRADATA_SIZE) { - av_log(NULL, AV_LOG_ERROR, "alac: expected %d extradata bytes\n", + av_log(avctx, AV_LOG_ERROR, "alac: expected %d extradata bytes\n", ALAC_EXTRADATA_SIZE); return input_buffer_size; } @@ -500,7 +522,7 @@ static int alac_decode_frame(AVCodecContext *avctx, int prediction_quantitization; int i; - /* skip 16 bits, not sure what they are. seem to be used in + /* FIXME: skip 16 bits, not sure what they are. seem to be used in * two channel case */ get_bits(&alac->gb, 8); get_bits(&alac->gb, 8); @@ -520,7 +542,7 @@ static int alac_decode_frame(AVCodecContext *avctx, /* these bytes seem to have something to do with * > 2 channel files. */ - av_log(NULL, AV_LOG_ERROR, "FIXME: unimplemented, unhandling of wasted_bytes\n"); + av_log(avctx, AV_LOG_ERROR, "FIXME: unimplemented, unhandling of wasted_bytes\n"); } bastardized_rice_decompress(alac, @@ -542,7 +564,7 @@ static int alac_decode_frame(AVCodecContext *avctx, predictor_coef_num, prediction_quantitization); } else { - av_log(NULL, AV_LOG_ERROR, "FIXME: unhandled prediction type: %i\n", prediction_type); + av_log(avctx, AV_LOG_ERROR, "FIXME: unhandled prediction type: %i\n", prediction_type); /* i think the only other prediction type (or perhaps this is just a * boolean?) runs adaptive fir twice.. like: * predictor_decompress_fir_adapt(predictor_error, tempout, ...) @@ -586,7 +608,6 @@ static int alac_decode_frame(AVCodecContext *avctx, int i; for (i = 0; i < outputsamples; i++) { int16_t sample = alac->outputsamples_buffer_a[i]; - sample = be2me_16(sample); ((int16_t*)outbuffer)[i * alac->numchannels] = sample; } break; @@ -594,7 +615,7 @@ static int alac_decode_frame(AVCodecContext *avctx, case 20: case 24: case 32: - av_log(NULL, AV_LOG_ERROR, "FIXME: unimplemented sample size %i\n", alac->setinfo_sample_size); + av_log(avctx, AV_LOG_ERROR, "FIXME: unimplemented sample size %i\n", alac->setinfo_sample_size); break; default: break; @@ -679,7 +700,7 @@ static int alac_decode_frame(AVCodecContext *avctx, /*********************/ if (wasted_bytes) { /* see mono case */ - av_log(NULL, AV_LOG_ERROR, "FIXME: unimplemented, unhandling of wasted_bytes\n"); + av_log(avctx, AV_LOG_ERROR, "FIXME: unimplemented, unhandling of wasted_bytes\n"); } /* channel 1 */ @@ -703,7 +724,7 @@ static int alac_decode_frame(AVCodecContext *avctx, prediction_quantitization_a); } else { /* see mono case */ - av_log(NULL, AV_LOG_ERROR, "FIXME: unhandled prediction type: %i\n", prediction_type_a); + av_log(avctx, AV_LOG_ERROR, "FIXME: unhandled prediction type: %i\n", prediction_type_a); } /* channel 2 */ @@ -726,7 +747,7 @@ static int alac_decode_frame(AVCodecContext *avctx, predictor_coef_num_b, prediction_quantitization_b); } else { - av_log(NULL, AV_LOG_ERROR, "FIXME: unhandled prediction type: %i\n", prediction_type_b); + av_log(avctx, AV_LOG_ERROR, "FIXME: unhandled prediction type: %i\n", prediction_type_b); } } else { /* not compressed, easy case */ @@ -782,7 +803,7 @@ static int alac_decode_frame(AVCodecContext *avctx, case 20: case 24: case 32: - av_log(NULL, AV_LOG_ERROR, "FIXME: unimplemented sample size %i\n", alac->setinfo_sample_size); + av_log(avctx, AV_LOG_ERROR, "FIXME: unimplemented sample size %i\n", alac->setinfo_sample_size); break; default: break; diff --git a/src/libffmpeg/libavcodec/alpha/dsputil_alpha_asm.S b/src/libffmpeg/libavcodec/alpha/dsputil_alpha_asm.S index d555b874c..29ba9dc02 100644 --- a/src/libffmpeg/libavcodec/alpha/dsputil_alpha_asm.S +++ b/src/libffmpeg/libavcodec/alpha/dsputil_alpha_asm.S @@ -2,19 +2,19 @@ * Alpha optimized DSP utils * Copyright (c) 2002 Falk Hueffner <falk@debian.org> * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2 of the License, or (at your option) any later version. * - * This program is distributed in the hope that it will be useful, + * This library is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA */ /* diff --git a/src/libffmpeg/libavcodec/alpha/motion_est_mvi_asm.S b/src/libffmpeg/libavcodec/alpha/motion_est_mvi_asm.S index 276d310ef..e043f4371 100644 --- a/src/libffmpeg/libavcodec/alpha/motion_est_mvi_asm.S +++ b/src/libffmpeg/libavcodec/alpha/motion_est_mvi_asm.S @@ -2,19 +2,19 @@ * Alpha optimized DSP utils * Copyright (c) 2002 Falk Hueffner <falk@debian.org> * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2 of the License, or (at your option) any later version. * - * This program is distributed in the hope that it will be useful, + * This library is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA */ #include "regdef.h" diff --git a/src/libffmpeg/libavcodec/asv1.c b/src/libffmpeg/libavcodec/asv1.c index 1cb15d812..3cfb76e65 100644 --- a/src/libffmpeg/libavcodec/asv1.c +++ b/src/libffmpeg/libavcodec/asv1.c @@ -289,6 +289,7 @@ static inline void asv2_encode_block(ASV1Context *a, DCTELEM block[64]){ if( (block[index + 1] = (block[index + 1]*a->q_intra_matrix[index + 1] + (1<<15))>>16) ) ccp |= 2; if( (block[index + 9] = (block[index + 9]*a->q_intra_matrix[index + 9] + (1<<15))>>16) ) ccp |= 1; + assert(i || ccp<8); if(i) put_bits(&a->pb, ac_ccp_tab[ccp][1], ac_ccp_tab[ccp][0]); else put_bits(&a->pb, dc_ccp_tab[ccp][1], dc_ccp_tab[ccp][0]); diff --git a/src/libffmpeg/libavcodec/avcodec.h b/src/libffmpeg/libavcodec/avcodec.h index 430504dc3..9be5dcf6e 100644 --- a/src/libffmpeg/libavcodec/avcodec.h +++ b/src/libffmpeg/libavcodec/avcodec.h @@ -14,15 +14,11 @@ extern "C" { #include "avutil.h" #include <sys/types.h> /* size_t */ -//FIXME the following 2 really dont belong in here -#define FFMPEG_VERSION_INT 0x000409 -#define FFMPEG_VERSION "CVS" - #define AV_STRINGIFY(s) AV_TOSTRING(s) #define AV_TOSTRING(s) #s -#define LIBAVCODEC_VERSION_INT ((51<<16)+(1<<8)+0) -#define LIBAVCODEC_VERSION 51.1.0 +#define LIBAVCODEC_VERSION_INT ((51<<16)+(11<<8)+0) +#define LIBAVCODEC_VERSION 51.11.0 #define LIBAVCODEC_BUILD LIBAVCODEC_VERSION_INT #define LIBAVCODEC_IDENT "Lavc" AV_STRINGIFY(LIBAVCODEC_VERSION) @@ -112,7 +108,7 @@ enum CodecID { CODEC_ID_FFVHUFF, CODEC_ID_RV30, CODEC_ID_RV40, - CODEC_ID_VC9, + CODEC_ID_VC1, CODEC_ID_WMV3, CODEC_ID_LOCO, CODEC_ID_WNV1, @@ -122,6 +118,14 @@ enum CodecID { CODEC_ID_TRUEMOTION2, CODEC_ID_BMP, CODEC_ID_CSCD, + CODEC_ID_MMVIDEO, + CODEC_ID_ZMBV, + CODEC_ID_AVS, + CODEC_ID_SMACKVIDEO, + CODEC_ID_NUV, + CODEC_ID_KMVC, + CODEC_ID_FLASHSV, + CODEC_ID_CAVS, /* various pcm "codecs" */ CODEC_ID_PCM_S16LE= 0x10000, @@ -158,6 +162,9 @@ enum CodecID { CODEC_ID_ADPCM_CT, CODEC_ID_ADPCM_SWF, CODEC_ID_ADPCM_YAMAHA, + CODEC_ID_ADPCM_SBPRO_4, + CODEC_ID_ADPCM_SBPRO_3, + CODEC_ID_ADPCM_SBPRO_2, /* AMR */ CODEC_ID_AMR_NB= 0x12000, @@ -198,8 +205,8 @@ enum CodecID { CODEC_ID_QDM2, CODEC_ID_COOK, CODEC_ID_TRUESPEECH, - - CODEC_ID_OGGTHEORA= 0x16000, + CODEC_ID_TTA, + CODEC_ID_SMACKAUDIO, /* subtitle codecs */ CODEC_ID_DVD_SUBTITLE= 0x17000, @@ -220,61 +227,19 @@ enum CodecType { CODEC_TYPE_SUBTITLE, }; -/** - * Pixel format. Notes: - * - * PIX_FMT_RGBA32 is handled in an endian-specific manner. A RGBA - * color is put together as: - * (A << 24) | (R << 16) | (G << 8) | B - * This is stored as BGRA on little endian CPU architectures and ARGB on - * big endian CPUs. - * - * When the pixel format is palettized RGB (PIX_FMT_PAL8), the palettized - * image data is stored in AVFrame.data[0]. The palette is transported in - * AVFrame.data[1] and, is 1024 bytes long (256 4-byte entries) and is - * formatted the same as in PIX_FMT_RGBA32 described above (i.e., it is - * also endian-specific). Note also that the individual RGB palette - * components stored in AVFrame.data[1] should be in the range 0..255. - * This is important as many custom PAL8 video codecs that were designed - * to run on the IBM VGA graphics adapter use 6-bit palette components. - */ -enum PixelFormat { - PIX_FMT_NONE= -1, - PIX_FMT_YUV420P, ///< Planar YUV 4:2:0 (1 Cr & Cb sample per 2x2 Y samples) - PIX_FMT_YUV422, ///< Packed pixel, Y0 Cb Y1 Cr - PIX_FMT_RGB24, ///< Packed pixel, 3 bytes per pixel, RGBRGB... - PIX_FMT_BGR24, ///< Packed pixel, 3 bytes per pixel, BGRBGR... - PIX_FMT_YUV422P, ///< Planar YUV 4:2:2 (1 Cr & Cb sample per 2x1 Y samples) - PIX_FMT_YUV444P, ///< Planar YUV 4:4:4 (1 Cr & Cb sample per 1x1 Y samples) - PIX_FMT_RGBA32, ///< Packed pixel, 4 bytes per pixel, BGRABGRA..., stored in cpu endianness - PIX_FMT_YUV410P, ///< Planar YUV 4:1:0 (1 Cr & Cb sample per 4x4 Y samples) - PIX_FMT_YUV411P, ///< Planar YUV 4:1:1 (1 Cr & Cb sample per 4x1 Y samples) - PIX_FMT_RGB565, ///< always stored in cpu endianness - PIX_FMT_RGB555, ///< always stored in cpu endianness, most significant bit to 1 - PIX_FMT_GRAY8, - PIX_FMT_MONOWHITE, ///< 0 is white - PIX_FMT_MONOBLACK, ///< 0 is black - PIX_FMT_PAL8, ///< 8 bit with RGBA palette - PIX_FMT_YUVJ420P, ///< Planar YUV 4:2:0 full scale (jpeg) - PIX_FMT_YUVJ422P, ///< Planar YUV 4:2:2 full scale (jpeg) - PIX_FMT_YUVJ444P, ///< Planar YUV 4:4:4 full scale (jpeg) - PIX_FMT_XVMC_MPEG2_MC,///< XVideo Motion Acceleration via common packet passing(xvmc_render.h) - PIX_FMT_XVMC_MPEG2_IDCT, - PIX_FMT_UYVY422, ///< Packed pixel, Cb Y0 Cr Y1 - PIX_FMT_UYVY411, ///< Packed pixel, Cb Y0 Y1 Cr Y2 Y3 - PIX_FMT_NB, -}; - /* currently unused, may be used if 24/32 bits samples ever supported */ +/* all in native endian */ enum SampleFormat { - SAMPLE_FMT_S16 = 0, ///< signed 16 bits + SAMPLE_FMT_NONE = -1, + SAMPLE_FMT_U8, ///< unsigned 8 bits + SAMPLE_FMT_S16, ///< signed 16 bits + SAMPLE_FMT_S24, ///< signed 24 bits SAMPLE_FMT_S32, ///< signed 32 bits SAMPLE_FMT_FLT, ///< float - SAMPLE_FMT_DBL, ///< double }; /* in bytes */ -#define AVCODEC_MAX_AUDIO_FRAME_SIZE 131072 +#define AVCODEC_MAX_AUDIO_FRAME_SIZE 192000 // 1 second of 48khz 32bit audio /** * Required number of additionally allocated bytes at the end of the input bitstream for decoding. @@ -321,10 +286,7 @@ typedef struct RcOverride{ float quality_factor; } RcOverride; -/* only for ME compatiblity with old apps */ -extern int motion_estimation_method; - -#define FF_MAX_B_FRAMES 8 +#define FF_MAX_B_FRAMES 16 /* encoding support these flags can be passed in AVCodecContext.flags before initing @@ -367,7 +329,7 @@ extern int motion_estimation_method; #define CODEC_FLAG_H263P_SLICE_STRUCT 0x10000000 #define CODEC_FLAG_INTERLACED_ME 0x20000000 ///< interlaced motion estimation #define CODEC_FLAG_SVCD_SCAN_OFFSET 0x40000000 ///< will reserve space for SVCD scan offset user data -#define CODEC_FLAG_CLOSED_GOP 0x80000000 +#define CODEC_FLAG_CLOSED_GOP ((int)0x80000000) #define CODEC_FLAG2_FAST 0x00000001 ///< allow non spec compliant speedup tricks #define CODEC_FLAG2_STRICT_GOP 0x00000002 ///< strictly enforce GOP size #define CODEC_FLAG2_NO_OUTPUT 0x00000004 ///< skip bitstream encoding @@ -379,6 +341,8 @@ extern int motion_estimation_method; #define CODEC_FLAG2_FASTPSKIP 0x00000100 ///< H.264 fast pskip #define CODEC_FLAG2_AUD 0x00000200 ///< H.264 access unit delimiters #define CODEC_FLAG2_BRDO 0x00000400 ///< b-frame rate-distortion optimization +#define CODEC_FLAG2_INTRA_VLC 0x00000800 ///< use MPEG-2 intra VLC table +#define CODEC_FLAG2_MEMC_ONLY 0x00001000 ///< only do ME/MC (I frames -> ref, P frame -> ME+MC) /* Unsupported options : * Syntax Arithmetic coding (SAC) @@ -404,6 +368,11 @@ extern int motion_estimation_method; * if this is not set, the codec is guranteed to never be feeded with NULL data */ #define CODEC_CAP_DELAY 0x0020 +/** + * Codec can be fed a final frame with a smaller size. + * This can be used to prevent truncation of the last audio samples. + */ +#define CODEC_CAP_SMALL_LAST_FRAME 0x0040 //the following defines may change, don't expect compatibility if you use them #define MB_TYPE_INTRA4x4 0x0001 @@ -697,19 +666,6 @@ typedef struct AVFrame { #define DEFAULT_FRAME_RATE_BASE 1001000 /** - * Used by av_log - */ -typedef struct AVCLASS AVClass; -struct AVCLASS { - const char* class_name; - const char* (*item_name)(void*); /* actually passing a pointer to an AVCodecContext - or AVFormatContext, which begin with an AVClass. - Needed because av_log is in libavcodec and has no visibility - of AVIn/OutputFormat */ - struct AVOption *option; -}; - -/** * main external api structure. */ typedef struct AVCodecContext { @@ -1243,6 +1199,7 @@ typedef struct AVCodecContext { #define FF_IDCT_VP3 12 #define FF_IDCT_IPP 13 #define FF_IDCT_XVIDMMX 14 +#define FF_IDCT_CAVS 15 /** * slice count. @@ -2005,6 +1962,73 @@ typedef struct AVCodecContext { * - decoding: unused */ int scenechange_factor; + + /** + * + * note: value depends upon the compare functin used for fullpel ME + * - encoding: set by user. + * - decoding: unused + */ + int mv0_threshold; + + /** + * adjusts sensitivity of b_frame_strategy 1 + * - encoding: set by user. + * - decoding: unused + */ + int b_sensitivity; + + /** + * - encoding: set by user. + * - decoding: unused + */ + int compression_level; +#define FF_COMPRESSION_DEFAULT -1 + + /** + * sets whether to use LPC mode - used by FLAC encoder + * - encoding: set by user. + * - decoding: unused. + */ + int use_lpc; + + /** + * LPC coefficient precision - used by FLAC encoder + * - encoding: set by user. + * - decoding: unused. + */ + int lpc_coeff_precision; + + /** + * - encoding: set by user. + * - decoding: unused. + */ + int min_prediction_order; + + /** + * - encoding: set by user. + * - decoding: unused. + */ + int max_prediction_order; + + /** + * search method for selecting prediction order + * - encoding: set by user. + * - decoding: unused. + */ + int prediction_order_method; + + /** + * - encoding: set by user. + * - decoding: unused. + */ + int min_partition_order; + + /** + * - encoding: set by user. + * - decoding: unused. + */ + int max_partition_order; } AVCodecContext; /** @@ -2083,8 +2107,8 @@ extern AVCodec ac3_encoder; extern AVCodec mp2_encoder; extern AVCodec mp3lame_encoder; extern AVCodec oggvorbis_encoder; -extern AVCodec oggtheora_encoder; extern AVCodec faac_encoder; +extern AVCodec flac_encoder; extern AVCodec xvid_encoder; extern AVCodec mpeg1video_encoder; extern AVCodec mpeg2video_encoder; @@ -2133,7 +2157,7 @@ extern AVCodec msmpeg4v2_decoder; extern AVCodec msmpeg4v3_decoder; extern AVCodec wmv1_decoder; extern AVCodec wmv2_decoder; -extern AVCodec vc9_decoder; +extern AVCodec vc1_decoder; extern AVCodec wmv3_decoder; extern AVCodec mpeg1video_decoder; extern AVCodec mpeg2video_decoder; @@ -2161,12 +2185,12 @@ extern AVCodec mp3on4_decoder; extern AVCodec qdm2_decoder; extern AVCodec cook_decoder; extern AVCodec truespeech_decoder; +extern AVCodec tta_decoder; extern AVCodec mace3_decoder; extern AVCodec mace6_decoder; extern AVCodec huffyuv_decoder; extern AVCodec ffvhuff_decoder; extern AVCodec oggvorbis_decoder; -extern AVCodec oggtheora_decoder; extern AVCodec cyuv_decoder; extern AVCodec h264_decoder; extern AVCodec indeo3_decoder; @@ -2215,6 +2239,7 @@ extern AVCodec qtrle_decoder; extern AVCodec flac_decoder; extern AVCodec tscc_decoder; extern AVCodec cscd_decoder; +extern AVCodec nuv_decoder; extern AVCodec ulti_decoder; extern AVCodec qdraw_decoder; extern AVCodec xl_decoder; @@ -2231,6 +2256,14 @@ extern AVCodec fraps_decoder; extern AVCodec libgsm_encoder; extern AVCodec libgsm_decoder; extern AVCodec bmp_decoder; +extern AVCodec mmvideo_decoder; +extern AVCodec zmbv_decoder; +extern AVCodec avs_decoder; +extern AVCodec smacker_decoder; +extern AVCodec smackaud_decoder; +extern AVCodec kmvc_decoder; +extern AVCodec flashsv_decoder; +extern AVCodec cavs_decoder; /* pcm codecs */ #define PCM_CODEC(id, name) \ @@ -2272,6 +2305,9 @@ PCM_CODEC(CODEC_ID_ADPCM_G726, adpcm_g726); PCM_CODEC(CODEC_ID_ADPCM_CT, adpcm_ct); PCM_CODEC(CODEC_ID_ADPCM_SWF, adpcm_swf); PCM_CODEC(CODEC_ID_ADPCM_YAMAHA, adpcm_yamaha); +PCM_CODEC(CODEC_ID_ADPCM_SBPRO_4, adpcm_sbpro_4); +PCM_CODEC(CODEC_ID_ADPCM_SBPRO_3, adpcm_sbpro_3); +PCM_CODEC(CODEC_ID_ADPCM_SBPRO_2, adpcm_sbpro_2); #undef PCM_CODEC @@ -2454,6 +2490,10 @@ void avcodec_default_free_buffers(AVCodecContext *s); */ char av_get_pict_type_char(int pict_type); +/** + * returns codec bits per sample + */ +int av_get_bits_per_sample(enum CodecID codec_id); /* frame parsing */ typedef struct AVCodecParserContext { @@ -2514,6 +2554,7 @@ void av_parser_close(AVCodecParserContext *s); extern AVCodecParser mpegvideo_parser; extern AVCodecParser mpeg4video_parser; +extern AVCodecParser cavsvideo_parser; extern AVCodecParser h261_parser; extern AVCodecParser h263_parser; extern AVCodecParser h264_parser; @@ -2523,12 +2564,44 @@ extern AVCodecParser mpegaudio_parser; extern AVCodecParser ac3_parser; extern AVCodecParser dvdsub_parser; extern AVCodecParser dvbsub_parser; +extern AVCodecParser aac_parser; + + +typedef struct AVBitStreamFilterContext { + void *priv_data; + struct AVBitStreamFilter *filter; + AVCodecParserContext *parser; + struct AVBitStreamFilterContext *next; +} AVBitStreamFilterContext; + + +typedef struct AVBitStreamFilter { + const char *name; + int priv_data_size; + int (*filter)(AVBitStreamFilterContext *bsfc, + AVCodecContext *avctx, const char *args, + uint8_t **poutbuf, int *poutbuf_size, + const uint8_t *buf, int buf_size, int keyframe); + struct AVBitStreamFilter *next; +} AVBitStreamFilter; + +extern AVBitStreamFilter *av_first_bitstream_filter; + +void av_register_bitstream_filter(AVBitStreamFilter *bsf); +AVBitStreamFilterContext *av_bitstream_filter_init(const char *name); +int av_bitstream_filter_filter(AVBitStreamFilterContext *bsfc, + AVCodecContext *avctx, const char *args, + uint8_t **poutbuf, int *poutbuf_size, + const uint8_t *buf, int buf_size, int keyframe); +void av_bitstream_filter_close(AVBitStreamFilterContext *bsf); + +extern AVBitStreamFilter dump_extradata_bsf; +extern AVBitStreamFilter remove_extradata_bsf; +extern AVBitStreamFilter noise_bsf; + /* memory */ -void *av_malloc(unsigned int size); void *av_mallocz(unsigned int size); -void *av_realloc(void *ptr, unsigned int size); -void av_free(void *ptr); char *av_strdup(const char *s); void av_freep(void *ptr); void *av_fast_realloc(void *ptr, unsigned int *size, unsigned int min_size); @@ -2538,31 +2611,14 @@ void av_free_static(void); void *av_mallocz_static(unsigned int size); void *av_realloc_static(void *ptr, unsigned int size); -/* add by bero : in adx.c */ -int is_adx(const unsigned char *buf,size_t bufsize); - void img_copy(AVPicture *dst, const AVPicture *src, int pix_fmt, int width, int height); -/* av_log API */ - -#include <stdarg.h> - -#define AV_LOG_QUIET -1 -#define AV_LOG_ERROR 0 -#define AV_LOG_INFO 1 -#define AV_LOG_DEBUG 2 - -#ifdef __GNUC__ -extern void av_log(void*, int level, const char *fmt, ...) __attribute__ ((__format__ (__printf__, 3, 4))); -#else -extern void av_log(void*, int level, const char *fmt, ...); -#endif +int img_crop(AVPicture *dst, const AVPicture *src, + int pix_fmt, int top_band, int left_band); -extern void av_vlog(void*, int level, const char *fmt, va_list); -extern int av_log_get_level(void); -extern void av_log_set_level(int); -extern void av_log_set_callback(void (*)(void*, int, const char*, va_list)); +int img_pad(AVPicture *dst, const AVPicture *src, int height, int width, int pix_fmt, + int padtop, int padbottom, int padleft, int padright, int *color); /* endian macros */ #if !defined(BE_16) || !defined(BE_32) || !defined(LE_16) || !defined(LE_32) diff --git a/src/libffmpeg/libavcodec/bitstream.h b/src/libffmpeg/libavcodec/bitstream.h index 4a3d55d19..10db64d33 100644 --- a/src/libffmpeg/libavcodec/bitstream.h +++ b/src/libffmpeg/libavcodec/bitstream.h @@ -135,31 +135,40 @@ typedef struct RL_VLC_ELEM { uint8_t run; } RL_VLC_ELEM; -#if defined(ARCH_SPARC) || defined(ARCH_ARMV4L) +#if defined(ARCH_SPARC) || defined(ARCH_ARMV4L) || defined(ARCH_MIPS) #define UNALIGNED_STORES_ARE_BAD #endif /* used to avoid missaligned exceptions on some archs (alpha, ...) */ #if defined(ARCH_X86) || defined(ARCH_X86_64) +# define unaligned16(a) (*(const uint16_t*)(a)) # define unaligned32(a) (*(const uint32_t*)(a)) +# define unaligned64(a) (*(const uint64_t*)(a)) #else # ifdef __GNUC__ -static inline uint32_t unaligned32(const void *v) { - struct Unaligned { - uint32_t i; - } __attribute__((packed)); - - return ((const struct Unaligned *) v)->i; +# define unaligned(x) \ +static inline uint##x##_t unaligned##x(const void *v) { \ + struct Unaligned { \ + uint##x##_t i; \ + } __attribute__((packed)); \ + \ + return ((const struct Unaligned *) v)->i; \ } # elif defined(__DECC) -static inline uint32_t unaligned32(const void *v) { - return *(const __unaligned uint32_t *) v; +# define unaligned(x) \ +static inline uint##x##_t unaligned##x##(const void *v) { \ + return *(const __unaligned uint##x##_t *) v; \ } # else -static inline uint32_t unaligned32(const void *v) { - return *(const uint32_t *) v; +# define unaligned(x) \ +static inline uint##x##_t unaligned##x##(const void *v) { \ + return *(const uint##x##_t *) v; \ } # endif +unaligned(16) +unaligned(32) +unaligned(64) +#undef unaligned #endif //!ARCH_X86 #ifndef ALT_BITSTREAM_WRITER @@ -168,9 +177,6 @@ static inline void put_bits(PutBitContext *s, int n, unsigned int value) unsigned int bit_buf; int bit_left; -#ifdef STATS - st_out_bit_counts[st_current_index] += n; -#endif // printf("put_bits=%d %x\n", n, value); assert(n == 32 || value < (1U << n)); @@ -574,21 +580,15 @@ static inline int get_bits_count(GetBitContext *s){ * @author BERO */ static inline int get_xbits(GetBitContext *s, int n){ - register int tmp; + register int sign; register int32_t cache; OPEN_READER(re, s) UPDATE_CACHE(re, s) cache = GET_CACHE(re,s); - if ((int32_t)cache<0) { //MSB=1 - tmp = NEG_USR32(cache,n); - } else { - // tmp = (-1<<n) | NEG_USR32(cache,n) + 1; mpeg12.c algo - // tmp = - (NEG_USR32(cache,n) ^ ((1 << n) - 1)); h263.c algo - tmp = - NEG_USR32(~cache,n); - } + sign=(~cache)>>31; LAST_SKIP_BITS(re, s, n) CLOSE_READER(re, s) - return tmp; + return (NEG_USR32(sign ^ cache, n) ^ sign) - sign; } static inline int get_sbits(GetBitContext *s, int n){ diff --git a/src/libffmpeg/libavcodec/cabac.h b/src/libffmpeg/libavcodec/cabac.h index 2e4ec7083..e79774157 100644 --- a/src/libffmpeg/libavcodec/cabac.h +++ b/src/libffmpeg/libavcodec/cabac.h @@ -24,7 +24,7 @@ */ -#undef NDEBUG +//#undef NDEBUG #include <assert.h> #define CABAC_BITS 8 diff --git a/src/libffmpeg/libavcodec/dpcm.c b/src/libffmpeg/libavcodec/dpcm.c index c920cb403..df9da9489 100644 --- a/src/libffmpeg/libavcodec/dpcm.c +++ b/src/libffmpeg/libavcodec/dpcm.c @@ -41,7 +41,7 @@ typedef struct DPCMContext { int channels; short roq_square_array[256]; long sample[2];//for SOL_DPCM - int *sol_table;//for SOL_DPCM + const int *sol_table;//for SOL_DPCM } DPCMContext; #define SATURATE_S16(x) if (x < -32768) x = -32768; \ @@ -84,15 +84,15 @@ static int interplay_delta_table[] = { }; -static int sol_table_old[16] = +static const int sol_table_old[16] = { 0x0, 0x1, 0x2 , 0x3, 0x6, 0xA, 0xF, 0x15, -0x15, -0xF, -0xA, -0x6, -0x3, -0x2, -0x1, 0x0}; -static int sol_table_new[16] = +static const int sol_table_new[16] = { 0x0, 0x1, 0x2, 0x3, 0x6, 0xA, 0xF, 0x15, 0x0, -0x1, -0x2, -0x3, -0x6, -0xA, -0xF, -0x15}; -static int sol_table_16[128] = { +static const int sol_table_16[128] = { 0x000, 0x008, 0x010, 0x020, 0x030, 0x040, 0x050, 0x060, 0x070, 0x080, 0x090, 0x0A0, 0x0B0, 0x0C0, 0x0D0, 0x0E0, 0x0F0, 0x100, 0x110, 0x120, 0x130, 0x140, 0x150, 0x160, 0x170, 0x180, 0x190, 0x1A0, 0x1B0, 0x1C0, diff --git a/src/libffmpeg/libavcodec/dsputil.c b/src/libffmpeg/libavcodec/dsputil.c index 3931c3978..9b79b8659 100644 --- a/src/libffmpeg/libavcodec/dsputil.c +++ b/src/libffmpeg/libavcodec/dsputil.c @@ -30,6 +30,7 @@ #include "mpegvideo.h" #include "simple_idct.h" #include "faandct.h" +#include "snow.h" /* snow.c */ void ff_spatial_dwt(int *buffer, int width, int height, int stride, int type, int decomposition_count); @@ -291,35 +292,34 @@ static int sse16_c(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h) } +#ifdef CONFIG_SNOW_ENCODER //dwt is in snow.c static inline int w_c(void *v, uint8_t * pix1, uint8_t * pix2, int line_size, int w, int h, int type){ -#ifdef CONFIG_SNOW_ENCODER //idwt is in snow.c int s, i, j; const int dec_count= w==8 ? 3 : 4; - int tmp[16*16]; -#if 0 + int tmp[32*32]; int level, ori; static const int scale[2][2][4][4]={ { { - //8x8 dec=3 + // 9/7 8x8 dec=3 {268, 239, 239, 213}, { 0, 224, 224, 152}, { 0, 135, 135, 110}, },{ - //16x16 dec=4 + // 9/7 16x16 or 32x32 dec=4 {344, 310, 310, 280}, { 0, 320, 320, 228}, { 0, 175, 175, 136}, { 0, 129, 129, 102}, } },{ - {//FIXME 5/3 - //8x8 dec=3 + { + // 5/3 8x8 dec=3 {275, 245, 245, 218}, { 0, 230, 230, 156}, { 0, 138, 138, 113}, },{ - //16x16 dec=4 + // 5/3 16x16 or 32x32 dec=4 {352, 317, 317, 286}, { 0, 328, 328, 233}, { 0, 180, 180, 140}, @@ -327,29 +327,28 @@ static inline int w_c(void *v, uint8_t * pix1, uint8_t * pix2, int line_size, in } } }; -#endif for (i = 0; i < h; i++) { for (j = 0; j < w; j+=4) { - tmp[16*i+j+0] = (pix1[j+0] - pix2[j+0])<<4; - tmp[16*i+j+1] = (pix1[j+1] - pix2[j+1])<<4; - tmp[16*i+j+2] = (pix1[j+2] - pix2[j+2])<<4; - tmp[16*i+j+3] = (pix1[j+3] - pix2[j+3])<<4; + tmp[32*i+j+0] = (pix1[j+0] - pix2[j+0])<<4; + tmp[32*i+j+1] = (pix1[j+1] - pix2[j+1])<<4; + tmp[32*i+j+2] = (pix1[j+2] - pix2[j+2])<<4; + tmp[32*i+j+3] = (pix1[j+3] - pix2[j+3])<<4; } pix1 += line_size; pix2 += line_size; } - ff_spatial_dwt(tmp, w, h, 16, type, dec_count); + ff_spatial_dwt(tmp, w, h, 32, type, dec_count); s=0; -#if 0 + assert(w==h); for(level=0; level<dec_count; level++){ for(ori= level ? 1 : 0; ori<4; ori++){ - int sx= (ori&1) ? 1<<level: 0; - int stride= 16<<(dec_count-level); + int size= w>>(dec_count-level); + int sx= (ori&1) ? size : 0; + int stride= 32<<(dec_count-level); int sy= (ori&2) ? stride>>1 : 0; - int size= 1<<level; for(i=0; i<size; i++){ for(j=0; j<size; j++){ @@ -359,21 +358,8 @@ static inline int w_c(void *v, uint8_t * pix1, uint8_t * pix2, int line_size, in } } } -#endif - for (i = 0; i < h; i++) { - for (j = 0; j < w; j+=4) { - s+= ABS(tmp[16*i+j+0]); - s+= ABS(tmp[16*i+j+1]); - s+= ABS(tmp[16*i+j+2]); - s+= ABS(tmp[16*i+j+3]); - } - } assert(s>=0); - - return s>>2; -#else - return 0; -#endif + return s>>9; } static int w53_8_c(void *v, uint8_t * pix1, uint8_t * pix2, int line_size, int h){ @@ -392,6 +378,15 @@ static int w97_16_c(void *v, uint8_t * pix1, uint8_t * pix2, int line_size, int return w_c(v, pix1, pix2, line_size, 16, h, 0); } +int w53_32_c(void *v, uint8_t * pix1, uint8_t * pix2, int line_size, int h){ + return w_c(v, pix1, pix2, line_size, 32, h, 1); +} + +int w97_32_c(void *v, uint8_t * pix1, uint8_t * pix2, int line_size, int h){ + return w_c(v, pix1, pix2, line_size, 32, h, 0); +} +#endif + static void get_pixels_c(DCTELEM *restrict block, const uint8_t *pixels, int line_size) { int i; @@ -1145,7 +1140,7 @@ static void gmc1_c(uint8_t *dst, uint8_t *src, int stride, int h, int x16, int y } } -static void gmc_c(uint8_t *dst, uint8_t *src, int stride, int h, int ox, int oy, +void ff_gmc_c(uint8_t *dst, uint8_t *src, int stride, int h, int ox, int oy, int dxx, int dxy, int dyx, int dyy, int shift, int r, int width, int height) { int y, vx, vy; @@ -2575,6 +2570,33 @@ static void wmv2_mspel8_h_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int } } +#ifdef CONFIG_CAVS_DECODER +/* AVS specific */ +void ff_cavsdsp_init(DSPContext* c, AVCodecContext *avctx); + +void ff_put_cavs_qpel8_mc00_c(uint8_t *dst, uint8_t *src, int stride) { + put_pixels8_c(dst, src, stride, 8); +} +void ff_avg_cavs_qpel8_mc00_c(uint8_t *dst, uint8_t *src, int stride) { + avg_pixels8_c(dst, src, stride, 8); +} +void ff_put_cavs_qpel16_mc00_c(uint8_t *dst, uint8_t *src, int stride) { + put_pixels16_c(dst, src, stride, 16); +} +void ff_avg_cavs_qpel16_mc00_c(uint8_t *dst, uint8_t *src, int stride) { + avg_pixels16_c(dst, src, stride, 16); +} +#endif /* CONFIG_CAVS_DECODER */ + +#if defined(CONFIG_VC1_DECODER) || defined(CONFIG_WMV3_DECODER) +/* VC-1 specific */ +void ff_vc1dsp_init(DSPContext* c, AVCodecContext *avctx); + +void ff_put_vc1_mspel_mc00_c(uint8_t *dst, uint8_t *src, int stride, int rnd) { + put_pixels8_c(dst, src, stride, 8); +} +#endif /* CONFIG_VC1_DECODER||CONFIG_WMV3_DECODER */ + static void wmv2_mspel8_v_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride, int w){ uint8_t *cm = cropTbl + MAX_NEG_CROP; int i; @@ -3217,12 +3239,14 @@ void ff_set_cmp(DSPContext* c, me_cmp_func *cmp, int type){ case FF_CMP_NSSE: cmp[i]= c->nsse[i]; break; +#ifdef CONFIG_SNOW_ENCODER case FF_CMP_W53: cmp[i]= c->w53[i]; break; case FF_CMP_W97: cmp[i]= c->w97[i]; break; +#endif default: av_log(NULL, AV_LOG_ERROR,"internal error in cmp function selection\n"); } @@ -3774,6 +3798,8 @@ static void ff_jref_idct1_add(uint8_t *dest, int line_size, DCTELEM *block) dest[0] = cm[dest[0] + ((block[0] + 4)>>3)]; } +static void just_return() { return; } + /* init static data */ void dsputil_static_init(void) { @@ -3853,6 +3879,8 @@ void dsputil_init(DSPContext* c, AVCodecContext *avctx) c->h264_idct_add= ff_h264_idct_add_c; c->h264_idct8_add= ff_h264_idct8_add_c; + c->h264_idct_dc_add= ff_h264_idct_dc_add_c; + c->h264_idct8_dc_add= ff_h264_idct8_dc_add_c; c->get_pixels = get_pixels_c; c->diff_pixels = diff_pixels_c; @@ -3862,7 +3890,7 @@ void dsputil_init(DSPContext* c, AVCodecContext *avctx) c->add_pixels8 = add_pixels8_c; c->add_pixels4 = add_pixels4_c; c->gmc1 = gmc1_c; - c->gmc = gmc_c; + c->gmc = ff_gmc_c; c->clear_blocks = clear_blocks_c; c->pix_sum = pix_sum_c; c->pix_norm1 = pix_norm1_c; @@ -3988,6 +4016,13 @@ void dsputil_init(DSPContext* c, AVCodecContext *avctx) c->biweight_h264_pixels_tab[8]= biweight_h264_pixels2x4_c; c->biweight_h264_pixels_tab[9]= biweight_h264_pixels2x2_c; +#ifdef CONFIG_CAVS_DECODER + ff_cavsdsp_init(c,avctx); +#endif +#if defined(CONFIG_VC1_DECODER) || defined(CONFIG_WMV3_DECODER) + ff_vc1dsp_init(c,avctx); +#endif + c->put_mspel_pixels_tab[0]= put_mspel8_mc00_c; c->put_mspel_pixels_tab[1]= put_mspel8_mc10_c; c->put_mspel_pixels_tab[2]= put_mspel8_mc20_c; @@ -4022,10 +4057,12 @@ void dsputil_init(DSPContext* c, AVCodecContext *avctx) c->vsse[4]= vsse_intra16_c; c->nsse[0]= nsse16_c; c->nsse[1]= nsse8_c; +#ifdef CONFIG_SNOW_ENCODER c->w53[0]= w53_16_c; c->w53[1]= w53_8_c; c->w97[0]= w97_16_c; c->w97[1]= w97_8_c; +#endif c->add_bytes= add_bytes_c; c->diff_bytes= diff_bytes_c; @@ -4047,6 +4084,19 @@ void dsputil_init(DSPContext* c, AVCodecContext *avctx) c->try_8x8basis= try_8x8basis_c; c->add_8x8basis= add_8x8basis_c; +#ifdef CONFIG_SNOW_ENCODER + c->vertical_compose97i = ff_snow_vertical_compose97i; + c->horizontal_compose97i = ff_snow_horizontal_compose97i; + c->inner_add_yblock = ff_snow_inner_add_yblock; +#endif + + c->shrink[0]= ff_img_copy_plane; + c->shrink[1]= ff_shrink22; + c->shrink[2]= ff_shrink44; + c->shrink[3]= ff_shrink88; + + c->prefetch= just_return; + #ifdef HAVE_MMX dsputil_init_mmx(c, avctx); #endif diff --git a/src/libffmpeg/libavcodec/dsputil.h b/src/libffmpeg/libavcodec/dsputil.h index dc3bc01e8..df7830564 100644 --- a/src/libffmpeg/libavcodec/dsputil.h +++ b/src/libffmpeg/libavcodec/dsputil.h @@ -38,6 +38,7 @@ //#define DEBUG /* dct code */ typedef short DCTELEM; +typedef int DWTELEM; void fdct_ifast (DCTELEM *data); void fdct_ifast248 (DCTELEM *data); @@ -55,6 +56,8 @@ void ff_fdct_sse2(DCTELEM *block); void ff_h264_idct8_add_c(uint8_t *dst, DCTELEM *block, int stride); void ff_h264_idct_add_c(uint8_t *dst, DCTELEM *block, int stride); +void ff_h264_idct8_dc_add_c(uint8_t *dst, DCTELEM *block, int stride); +void ff_h264_idct_dc_add_c(uint8_t *dst, DCTELEM *block, int stride); void ff_h264_lowres_idct_add_c(uint8_t *dst, int stride, DCTELEM *block); void ff_h264_lowres_idct_put_c(uint8_t *dst, int stride, DCTELEM *block); @@ -76,6 +79,15 @@ void ff_vp3_idct_c(DCTELEM *block/* align 16*/); void ff_vp3_idct_put_c(uint8_t *dest/*align 8*/, int line_size, DCTELEM *block/*align 16*/); void ff_vp3_idct_add_c(uint8_t *dest/*align 8*/, int line_size, DCTELEM *block/*align 16*/); +/* 1/2^n downscaling functions from imgconvert.c */ +void ff_img_copy_plane(uint8_t *dst, int dst_wrap, const uint8_t *src, int src_wrap, int width, int height); +void ff_shrink22(uint8_t *dst, int dst_wrap, const uint8_t *src, int src_wrap, int width, int height); +void ff_shrink44(uint8_t *dst, int dst_wrap, const uint8_t *src, int src_wrap, int width, int height); +void ff_shrink88(uint8_t *dst, int dst_wrap, const uint8_t *src, int src_wrap, int width, int height); + +void ff_gmc_c(uint8_t *dst, uint8_t *src, int stride, int h, int ox, int oy, + int dxx, int dxy, int dyx, int dyy, int shift, int r, int width, int height); + /* minimum alignment rules ;) if u notice errors in the align stuff, need more alignment for some asm code for some cpu or need to use a function with less aligned data then send a mail to the ffmpeg-dev list, ... @@ -134,6 +146,9 @@ static void a(uint8_t *block, const uint8_t *pixels, int line_size, int h){\ typedef int (*me_cmp_func)(void /*MpegEncContext*/ *s, uint8_t *blk1/*align width (8 or 16)*/, uint8_t *blk2/*align 1*/, int line_size, int h)/* __attribute__ ((const))*/; +// for snow slices +typedef struct slice_buffer_s slice_buffer; + /** * DSPContext. */ @@ -262,6 +277,15 @@ typedef struct DSPContext { h264_weight_func weight_h264_pixels_tab[10]; h264_biweight_func biweight_h264_pixels_tab[10]; + /* AVS specific */ + qpel_mc_func put_cavs_qpel_pixels_tab[2][16]; + qpel_mc_func avg_cavs_qpel_pixels_tab[2][16]; + void (*cavs_filter_lv)(uint8_t *pix, int stride, int alpha, int beta, int tc, int bs1, int bs2); + void (*cavs_filter_lh)(uint8_t *pix, int stride, int alpha, int beta, int tc, int bs1, int bs2); + void (*cavs_filter_cv)(uint8_t *pix, int stride, int alpha, int beta, int tc, int bs1, int bs2); + void (*cavs_filter_ch)(uint8_t *pix, int stride, int alpha, int beta, int tc, int bs1, int bs2); + void (*cavs_idct8_add)(uint8_t *dst, DCTELEM *block, int stride); + me_cmp_func pix_abs[2][4]; /* huffyuv specific */ @@ -333,6 +357,29 @@ typedef struct DSPContext { void (*h264_idct_add)(uint8_t *dst, DCTELEM *block, int stride); void (*h264_idct8_add)(uint8_t *dst, DCTELEM *block, int stride); + void (*h264_idct_dc_add)(uint8_t *dst, DCTELEM *block, int stride); + void (*h264_idct8_dc_add)(uint8_t *dst, DCTELEM *block, int stride); + + /* snow wavelet */ + void (*vertical_compose97i)(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, DWTELEM *b3, DWTELEM *b4, DWTELEM *b5, int width); + void (*horizontal_compose97i)(DWTELEM *b, int width); + void (*inner_add_yblock)(uint8_t *obmc, const int obmc_stride, uint8_t * * block, int b_w, int b_h, int src_x, int src_y, int src_stride, slice_buffer * sb, int add, uint8_t * dst8); + + void (*prefetch)(void *mem, int stride, int h); + + void (*shrink[4])(uint8_t *dst, int dst_wrap, const uint8_t *src, int src_wrap, int width, int height); + + /* vc1 functions */ + void (*vc1_inv_trans_8x8)(DCTELEM *b); + void (*vc1_inv_trans_8x4)(DCTELEM *b, int n); + void (*vc1_inv_trans_4x8)(DCTELEM *b, int n); + void (*vc1_inv_trans_4x4)(DCTELEM *b, int n); + void (*vc1_v_overlap)(uint8_t* src, int stride, int rnd); + void (*vc1_h_overlap)(uint8_t* src, int stride, int rnd); + /* put 8x8 block with bicubic interpolation and quarterpel precision + * last argument is actually round value instead of height + */ + op_pixels_func put_vc1_mspel_pixels_tab[16]; } DSPContext; void dsputil_static_init(void); @@ -409,6 +456,7 @@ int mm_support(void); #define MM_SSE 0x0008 /* SSE functions */ #define MM_SSE2 0x0010 /* PIV SSE2 functions */ #define MM_3DNOWEXT 0x0020 /* AMD 3DNowExt */ +#define MM_SSE3 0x0040 /* Prescott SSE3 functions */ extern int mm_flags; @@ -531,6 +579,7 @@ struct unaligned_16 { uint16_t l; } __attribute__((packed)); #define LD32(a) (*((uint32_t*)(a))) #define LD64(a) (*((uint64_t*)(a))) +#define ST16(a, b) *((uint16_t*)(a)) = (b) #define ST32(a, b) *((uint32_t*)(a)) = (b) #endif /* !__GNUC__ */ @@ -563,6 +612,8 @@ int ff_fft_init(FFTContext *s, int nbits, int inverse); void ff_fft_permute(FFTContext *s, FFTComplex *z); void ff_fft_calc_c(FFTContext *s, FFTComplex *z); void ff_fft_calc_sse(FFTContext *s, FFTComplex *z); +void ff_fft_calc_3dn(FFTContext *s, FFTComplex *z); +void ff_fft_calc_3dn2(FFTContext *s, FFTComplex *z); void ff_fft_calc_altivec(FFTContext *s, FFTComplex *z); static inline void ff_fft_calc(FFTContext *s, FFTComplex *z) diff --git a/src/libffmpeg/libavcodec/dv.c b/src/libffmpeg/libavcodec/dv.c index 08611a900..c39d70c54 100644 --- a/src/libffmpeg/libavcodec/dv.c +++ b/src/libffmpeg/libavcodec/dv.c @@ -6,6 +6,9 @@ * DV encoder * Copyright (c) 2003 Roman Shaposhnik. * + * 50 Mbps (DVCPRO50) support + * Copyright (c) 2006 Daniel Maas <dmaas@maasdigital.com> + * * Many thanks to Dan Dennedy <dan@dennedy.org> for providing wealth * of DV technical info. * @@ -51,8 +54,12 @@ typedef struct DVVideoContext { void (*idct_put[2])(uint8_t *dest, int line_size, DCTELEM *block); } DVVideoContext; -/* MultiThreading - applies to entire DV codec, not just the avcontext */ -uint8_t** dv_anchor; +/* MultiThreading - dv_anchor applies to entire DV codec, not just the avcontext */ +/* one element is needed for each video segment in a DV frame */ +/* at most there are 2 DIF channels * 12 DIF sequences * 27 video segments (PAL 50Mbps) */ +#define DV_ANCHOR_SIZE (2*12*27) + +static void* dv_anchor[DV_ANCHOR_SIZE]; #define TEX_VLC_BITS 9 @@ -118,11 +125,7 @@ static int dvvideo_init(AVCodecContext *avctx) return -ENOMEM; /* dv_anchor lets each thread know its Id */ - dv_anchor = av_malloc(12*27*sizeof(void*)); - if (!dv_anchor) { - return -ENOMEM; - } - for (i=0; i<12*27; i++) + for (i=0; i<DV_ANCHOR_SIZE; i++) dv_anchor[i] = (void*)(size_t)i; /* it's faster to include sign bit in a generic VLC parsing scheme */ @@ -238,9 +241,6 @@ static int dvvideo_init(AVCodecContext *avctx) /* XXX: do it only for constant case */ dv_build_unquantize_tables(s, dsp.idct_permutation); - /* FIXME: I really don't think this should be here */ - if (dv_codec_profile(avctx)) - avctx->pix_fmt = dv_codec_profile(avctx)->pix_fmt; avctx->coded_frame = &s->picture; s->avctx= avctx; @@ -253,6 +253,7 @@ static int dvvideo_init(AVCodecContext *avctx) typedef struct BlockInfo { const uint8_t *shift_table; const uint8_t *scan_table; + const int *iweight_table; uint8_t pos; /* position in block */ uint8_t dct_mode; uint8_t partial_bit_count; @@ -295,6 +296,7 @@ static void dv_decode_ac(GetBitContext *gb, BlockInfo *mb, DCTELEM *block) int last_index = get_bits_size(gb); const uint8_t *scan_table = mb->scan_table; const uint8_t *shift_table = mb->shift_table; + const int *iweight_table = mb->iweight_table; int pos = mb->pos; int partial_bit_count = mb->partial_bit_count; int level, pos1, run, vlc_len, index; @@ -342,9 +344,13 @@ static void dv_decode_ac(GetBitContext *gb, BlockInfo *mb, DCTELEM *block) if (pos >= 64) break; - assert(level); pos1 = scan_table[pos]; - block[pos1] = level << shift_table[pos1]; + level <<= shift_table[pos1]; + + /* unweigh, round, and shift down */ + level = (level*iweight_table[pos] + (1 << (dv_iweight_bits-1))) >> dv_iweight_bits; + + block[pos1] = level; UPDATE_CACHE(re, gb); } @@ -410,6 +416,7 @@ static inline void dv_decode_video_segment(DVVideoContext *s, dct_mode = get_bits1(&gb); mb->dct_mode = dct_mode; mb->scan_table = s->dv_zigzag[dct_mode]; + mb->iweight_table = dct_mode ? dv_iweight_248 : dv_iweight_88; class1 = get_bits(&gb, 2); mb->shift_table = s->dv_idct_shift[class1 == 3][dct_mode] [quant + dv_quant_offset[class1]]; @@ -488,45 +495,63 @@ static inline void dv_decode_video_segment(DVVideoContext *s, v = *mb_pos_ptr++; mb_x = v & 0xff; mb_y = v >> 8; - y_ptr = s->picture.data[0] + ((mb_y * s->picture.linesize[0] + mb_x)<<log2_blocksize); - if (s->sys->pix_fmt == PIX_FMT_YUV411P) + if (s->sys->pix_fmt == PIX_FMT_YUV422P) { + y_ptr = s->picture.data[0] + ((mb_y * s->picture.linesize[0] + (mb_x>>1))<<log2_blocksize); c_offset = ((mb_y * s->picture.linesize[1] + (mb_x >> 2))<<log2_blocksize); - else - c_offset = (((mb_y >> 1) * s->picture.linesize[1] + (mb_x >> 1))<<log2_blocksize); + } else { /* 4:1:1 or 4:2:0 */ + y_ptr = s->picture.data[0] + ((mb_y * s->picture.linesize[0] + mb_x)<<log2_blocksize); + if (s->sys->pix_fmt == PIX_FMT_YUV411P) + c_offset = ((mb_y * s->picture.linesize[1] + (mb_x >> 2))<<log2_blocksize); + else /* 4:2:0 */ + c_offset = (((mb_y >> 1) * s->picture.linesize[1] + (mb_x >> 1))<<log2_blocksize); + } for(j = 0;j < 6; j++) { idct_put = s->idct_put[mb->dct_mode && log2_blocksize==3]; - if (j < 4) { - if (s->sys->pix_fmt == PIX_FMT_YUV411P && mb_x < (704 / 8)) { - /* NOTE: at end of line, the macroblock is handled as 420 */ - idct_put(y_ptr + (j<<log2_blocksize), s->picture.linesize[0], block); - } else { - idct_put(y_ptr + (((j & 1) + (j >> 1) * s->picture.linesize[0])<<log2_blocksize), + if (s->sys->pix_fmt == PIX_FMT_YUV422P) { /* 4:2:2 */ + if (j == 0 || j == 2) { + /* Y0 Y1 */ + idct_put(y_ptr + ((j >> 1)<<log2_blocksize), s->picture.linesize[0], block); + } else if(j > 3) { + /* Cr Cb */ + idct_put(s->picture.data[6 - j] + c_offset, + s->picture.linesize[6 - j], block); } - } else { - if (s->sys->pix_fmt == PIX_FMT_YUV411P && mb_x >= (704 / 8)) { - uint64_t aligned_pixels[64/8]; - uint8_t *pixels= (uint8_t*)aligned_pixels; - uint8_t *c_ptr, *c_ptr1, *ptr, *ptr1; - int x, y, linesize; - /* NOTE: at end of line, the macroblock is handled as 420 */ - idct_put(pixels, 8, block); - linesize = s->picture.linesize[6 - j]; - c_ptr = s->picture.data[6 - j] + c_offset; - ptr = pixels; - for(y = 0;y < (1<<log2_blocksize); y++) { - ptr1= ptr + (1<<(log2_blocksize-1)); - c_ptr1 = c_ptr + (linesize<<log2_blocksize); - for(x=0; x < (1<<(log2_blocksize-1)); x++){ - c_ptr[x]= ptr[x]; c_ptr1[x]= ptr1[x]; - } - c_ptr += linesize; - ptr += 8; + /* note: j=1 and j=3 are "dummy" blocks in 4:2:2 */ + } else { /* 4:1:1 or 4:2:0 */ + if (j < 4) { + if (s->sys->pix_fmt == PIX_FMT_YUV411P && mb_x < (704 / 8)) { + /* NOTE: at end of line, the macroblock is handled as 420 */ + idct_put(y_ptr + (j<<log2_blocksize), s->picture.linesize[0], block); + } else { + idct_put(y_ptr + (((j & 1) + (j >> 1) * s->picture.linesize[0])<<log2_blocksize), + s->picture.linesize[0], block); } } else { - /* don't ask me why they inverted Cb and Cr ! */ - idct_put(s->picture.data[6 - j] + c_offset, - s->picture.linesize[6 - j], block); + if (s->sys->pix_fmt == PIX_FMT_YUV411P && mb_x >= (704 / 8)) { + uint64_t aligned_pixels[64/8]; + uint8_t *pixels= (uint8_t*)aligned_pixels; + uint8_t *c_ptr, *c_ptr1, *ptr, *ptr1; + int x, y, linesize; + /* NOTE: at end of line, the macroblock is handled as 420 */ + idct_put(pixels, 8, block); + linesize = s->picture.linesize[6 - j]; + c_ptr = s->picture.data[6 - j] + c_offset; + ptr = pixels; + for(y = 0;y < (1<<log2_blocksize); y++) { + ptr1= ptr + (1<<(log2_blocksize-1)); + c_ptr1 = c_ptr + (linesize<<log2_blocksize); + for(x=0; x < (1<<(log2_blocksize-1)); x++){ + c_ptr[x]= ptr[x]; c_ptr1[x]= ptr1[x]; + } + c_ptr += linesize; + ptr += 8; + } + } else { + /* don't ask me why they inverted Cb and Cr ! */ + idct_put(s->picture.data[6 - j] + c_offset, + s->picture.linesize[6 - j], block); + } } } block += 64; @@ -648,11 +673,25 @@ static always_inline PutBitContext* dv_encode_ac(EncBlockInfo* bi, PutBitContext } static always_inline void dv_set_class_number(DCTELEM* blk, EncBlockInfo* bi, - const uint8_t* zigzag_scan, int bias) + const uint8_t* zigzag_scan, const int *weight, int bias) { int i, area; + /* We offer two different methods for class number assignment: the + method suggested in SMPTE 314M Table 22, and an improved + method. The SMPTE method is very conservative; it assigns class + 3 (i.e. severe quantization) to any block where the largest AC + component is greater than 36. ffmpeg's DV encoder tracks AC bit + consumption precisely, so there is no need to bias most blocks + towards strongly lossy compression. Instead, we assign class 2 + to most blocks, and use class 3 only when strictly necessary + (for blocks whose largest AC component exceeds 255). */ + +#if 0 /* SMPTE spec method */ static const int classes[] = {12, 24, 36, 0xffff}; - int max=12; +#else /* improved ffmpeg method */ + static const int classes[] = {-1, -1, 255, 0xffff}; +#endif + int max=classes[0]; int prev=0; bi->mb[0] = blk[0]; @@ -665,7 +704,11 @@ static always_inline void dv_set_class_number(DCTELEM* blk, EncBlockInfo* bi, if (level+15 > 30U) { bi->sign[i] = (level>>31)&1; - bi->mb[i] = level= ABS(level)>>4; + /* weigh it and and shift down into range, adding for rounding */ + /* the extra division by a factor of 2^4 reverses the 8x expansion of the DCT + AND the 2x doubling of the weights */ + level = (ABS(level) * weight[i] + (1<<(dv_weight_bits+3))) >> (dv_weight_bits+4); + bi->mb[i] = level; if(level>max) max= level; bi->bit_size[area] += dv_rl2vlc_size(i - prev - 1, level); bi->next[prev]= i; @@ -728,9 +771,10 @@ static always_inline int dv_guess_dct_mode(DCTELEM *blk) { static inline void dv_guess_qnos(EncBlockInfo* blks, int* qnos) { int size[5]; - int i, j, k, a, prev; + int i, j, k, a, prev, a2; EncBlockInfo* b; + size[0] = size[1] = size[2] = size[3] = size[4] = 1<<24; do { b = blks; for (i=0; i<5; i++) { @@ -745,12 +789,23 @@ static inline void dv_guess_qnos(EncBlockInfo* blks, int* qnos) b->bit_size[a] = 1; // 4 areas 4 bits for EOB :) b->area_q[a]++; prev= b->prev[a]; + assert(b->next[prev] >= mb_area_start[a+1] || b->mb[prev]); for (k= b->next[prev] ; k<mb_area_start[a+1]; k= b->next[k]) { b->mb[k] >>= 1; if (b->mb[k]) { b->bit_size[a] += dv_rl2vlc_size(k - prev - 1, b->mb[k]); prev= k; } else { + if(b->next[k] >= mb_area_start[a+1] && b->next[k]<64){ + for(a2=a+1; b->next[k] >= mb_area_start[a2+1]; a2++) + b->prev[a2] = prev; + assert(a2<4); + assert(b->mb[b->next[k]]); + b->bit_size[a2] += dv_rl2vlc_size(b->next[k] - prev - 1, b->mb[b->next[k]]) + -dv_rl2vlc_size(b->next[k] - k - 1, b->mb[b->next[k]]); + assert(b->prev[a2]==k && (a2+1 >= 4 || b->prev[a2+1]!=k)); + b->prev[a2] = prev; + } b->next[prev] = b->next[k]; } } @@ -759,16 +814,29 @@ static inline void dv_guess_qnos(EncBlockInfo* blks, int* qnos) size[i] += b->bit_size[a]; } } + if(vs_total_ac_bits >= size[0] + size[1] + size[2] + size[3] + size[4]) + return; } - } while ((vs_total_ac_bits < size[0] + size[1] + size[2] + size[3] + size[4]) && - (qnos[0]|qnos[1]|qnos[2]|qnos[3]|qnos[4])); + } while (qnos[0]|qnos[1]|qnos[2]|qnos[3]|qnos[4]); + + + for(a=2; a==2 || vs_total_ac_bits < size[0]; a+=a){ + b = blks; + size[0] = 5*6*4; //EOB + for (j=0; j<6*5; j++, b++) { + prev= b->prev[0]; + for (k= b->next[prev]; k<64; k= b->next[k]) { + if(b->mb[k] < a && b->mb[k] > -a){ + b->next[prev] = b->next[k]; + }else{ + size[0] += dv_rl2vlc_size(k - prev - 1, b->mb[k]); + prev= k; + } + } + } + } } -/* - * This is a very rough initial implementaion. The performance is - * horrible and the weighting is missing. But it's missing from the - * decoding step also -- so at least we're on the same page with decoder ;-) - */ static inline void dv_encode_video_segment(DVVideoContext *s, uint8_t *dif, const uint16_t *mb_pos_ptr) @@ -795,28 +863,52 @@ static inline void dv_encode_video_segment(DVVideoContext *s, v = *mb_pos_ptr++; mb_x = v & 0xff; mb_y = v >> 8; - y_ptr = s->picture.data[0] + (mb_y * s->picture.linesize[0] * 8) + (mb_x * 8); - c_offset = (s->sys->pix_fmt == PIX_FMT_YUV411P) ? - ((mb_y * s->picture.linesize[1] * 8) + ((mb_x >> 2) * 8)) : - (((mb_y >> 1) * s->picture.linesize[1] * 8) + ((mb_x >> 1) * 8)); + if (s->sys->pix_fmt == PIX_FMT_YUV422P) { + y_ptr = s->picture.data[0] + (mb_y * s->picture.linesize[0] * 8) + (mb_x * 4); + } else { /* 4:1:1 */ + y_ptr = s->picture.data[0] + (mb_y * s->picture.linesize[0] * 8) + (mb_x * 8); + } + if (s->sys->pix_fmt == PIX_FMT_YUV420P) { + c_offset = (((mb_y >> 1) * s->picture.linesize[1] * 8) + ((mb_x >> 1) * 8)); + } else { /* 4:2:2 or 4:1:1 */ + c_offset = ((mb_y * s->picture.linesize[1] * 8) + ((mb_x >> 2) * 8)); + } do_edge_wrap = 0; qnos[mb_index] = 15; /* No quantization */ ptr = dif + mb_index*80 + 4; for(j = 0;j < 6; j++) { - if (j < 4) { /* Four Y blocks */ - /* NOTE: at end of line, the macroblock is handled as 420 */ - if (s->sys->pix_fmt == PIX_FMT_YUV411P && mb_x < (704 / 8)) { - data = y_ptr + (j * 8); + int dummy = 0; + if (s->sys->pix_fmt == PIX_FMT_YUV422P) { /* 4:2:2 */ + if (j == 0 || j == 2) { + /* Y0 Y1 */ + data = y_ptr + ((j>>1) * 8); + linesize = s->picture.linesize[0]; + } else if (j > 3) { + /* Cr Cb */ + data = s->picture.data[6 - j] + c_offset; + linesize = s->picture.linesize[6 - j]; } else { - data = y_ptr + ((j & 1) * 8) + ((j >> 1) * 8 * s->picture.linesize[0]); + /* j=1 and j=3 are "dummy" blocks, used for AC data only */ + data = 0; + linesize = 0; + dummy = 1; + } + } else { /* 4:1:1 or 4:2:0 */ + if (j < 4) { /* Four Y blocks */ + /* NOTE: at end of line, the macroblock is handled as 420 */ + if (s->sys->pix_fmt == PIX_FMT_YUV411P && mb_x < (704 / 8)) { + data = y_ptr + (j * 8); + } else { + data = y_ptr + ((j & 1) * 8) + ((j >> 1) * 8 * s->picture.linesize[0]); + } + linesize = s->picture.linesize[0]; + } else { /* Cr and Cb blocks */ + /* don't ask Fabrice why they inverted Cb and Cr ! */ + data = s->picture.data[6 - j] + c_offset; + linesize = s->picture.linesize[6 - j]; + if (s->sys->pix_fmt == PIX_FMT_YUV411P && mb_x >= (704 / 8)) + do_edge_wrap = 1; } - linesize = s->picture.linesize[0]; - } else { /* Cr and Cb blocks */ - /* don't ask Fabrice why they inverted Cb and Cr ! */ - data = s->picture.data[6 - j] + c_offset; - linesize = s->picture.linesize[6 - j]; - if (s->sys->pix_fmt == PIX_FMT_YUV411P && mb_x >= (704 / 8)) - do_edge_wrap = 1; } /* Everything is set up -- now just copy data -> DCT block */ @@ -831,7 +923,8 @@ static inline void dv_encode_video_segment(DVVideoContext *s, b += 8; } } else { /* Simple copy: 8x8 -> 8x8 */ - s->get_pixels(block, data, linesize); + if (!dummy) + s->get_pixels(block, data, linesize); } if(s->avctx->flags & CODEC_FLAG_INTERLACED_DCT) @@ -843,10 +936,18 @@ static inline void dv_encode_video_segment(DVVideoContext *s, enc_blk->partial_bit_buffer = 0; enc_blk->cur_ac = 0; - s->fdct[enc_blk->dct_mode](block); + if (dummy) { + /* We rely on the fact that encoding all zeros leads to an immediate EOB, + which is precisely what the spec calls for in the "dummy" blocks. */ + memset(block, 0, sizeof(block)); + } else { + s->fdct[enc_blk->dct_mode](block); + } dv_set_class_number(block, enc_blk, - enc_blk->dct_mode ? ff_zigzag248_direct : ff_zigzag_direct, j/4); + enc_blk->dct_mode ? ff_zigzag248_direct : ff_zigzag_direct, + enc_blk->dct_mode ? dv_weight_248 : dv_weight_88, + j/4); init_put_bits(pb, ptr, block_sizes[j]/8); put_bits(pb, 9, (uint16_t)(((enc_blk->mb[0] >> 3) - 1024 + 2) >> 2)); @@ -886,6 +987,8 @@ static inline void dv_encode_video_segment(DVVideoContext *s, for (j=0; j<5*6; j++) { if (enc_blks[j].partial_bit_count) pb=dv_encode_ac(&enc_blks[j], pb, &pbs[6*5]); + if (enc_blks[j].partial_bit_count) + av_log(NULL, AV_LOG_ERROR, "ac bitstream overflow\n"); } for (j=0; j<5*6; j++) @@ -896,7 +999,17 @@ static int dv_decode_mt(AVCodecContext *avctx, void* sl) { DVVideoContext *s = avctx->priv_data; int slice = (size_t)sl; - dv_decode_video_segment(s, &s->buf[((slice/27)*6+(slice/3)+slice*5+7)*80], + + /* which DIF channel is this? */ + int chan = slice / (s->sys->difseg_size * 27); + + /* slice within the DIF channel */ + int chan_slice = slice % (s->sys->difseg_size * 27); + + /* byte offset of this channel's data */ + int chan_offset = chan * s->sys->difseg_size * 150 * 80; + + dv_decode_video_segment(s, &s->buf[((chan_slice/27)*6+(chan_slice/3)+chan_slice*5+7)*80 + chan_offset], &s->sys->video_place[slice*5]); return 0; } @@ -905,13 +1018,23 @@ static int dv_encode_mt(AVCodecContext *avctx, void* sl) { DVVideoContext *s = avctx->priv_data; int slice = (size_t)sl; - dv_encode_video_segment(s, &s->buf[((slice/27)*6+(slice/3)+slice*5+7)*80], + + /* which DIF channel is this? */ + int chan = slice / (s->sys->difseg_size * 27); + + /* slice within the DIF channel */ + int chan_slice = slice % (s->sys->difseg_size * 27); + + /* byte offset of this channel's data */ + int chan_offset = chan * s->sys->difseg_size * 150 * 80; + + dv_encode_video_segment(s, &s->buf[((chan_slice/27)*6+(chan_slice/3)+chan_slice*5+7)*80 + chan_offset], &s->sys->video_place[slice*5]); return 0; } /* NOTE: exactly one frame must be given (120000 bytes for NTSC, - 144000 bytes for PAL) */ + 144000 bytes for PAL - or twice those for 50Mbps) */ static int dvvideo_decode_frame(AVCodecContext *avctx, void *data, int *data_size, uint8_t *buf, int buf_size) @@ -939,7 +1062,7 @@ static int dvvideo_decode_frame(AVCodecContext *avctx, s->buf = buf; avctx->execute(avctx, dv_decode_mt, (void**)&dv_anchor[0], NULL, - s->sys->difseg_size * 27); + s->sys->n_difchan * s->sys->difseg_size * 27); emms_c(); @@ -968,9 +1091,23 @@ static int dvvideo_encode_frame(AVCodecContext *c, uint8_t *buf, int buf_size, s->buf = buf; c->execute(c, dv_encode_mt, (void**)&dv_anchor[0], NULL, - s->sys->difseg_size * 27); + s->sys->n_difchan * s->sys->difseg_size * 27); emms_c(); + + /* Fill in just enough of the header for dv_frame_profile() to + return the correct result, so that the frame can be decoded + correctly. The rest of the metadata is filled in by the dvvideo + avformat. (this should probably change so that encode_frame() + fills in ALL of the metadata - e.g. for Quicktime-wrapped DV + streams) */ + + /* NTSC/PAL format */ + buf[3] = s->sys->dsf ? 0x80 : 0x00; + + /* 25Mbps or 50Mbps */ + buf[80*5 + 48 + 3] = (s->sys->pix_fmt == PIX_FMT_YUV422P) ? 0x4 : 0x0; + return s->sys->frame_size; } diff --git a/src/libffmpeg/libavcodec/dvdata.h b/src/libffmpeg/libavcodec/dvdata.h index f817ead2a..a3d42d66c 100644 --- a/src/libffmpeg/libavcodec/dvdata.h +++ b/src/libffmpeg/libavcodec/dvdata.h @@ -31,7 +31,8 @@ typedef struct DVprofile { int dsf; /* value of the dsf in the DV header */ int frame_size; /* total size of one frame in bytes */ - int difseg_size; /* number of DIF segments */ + int difseg_size; /* number of DIF segments per DIF channel */ + int n_difchan; /* number of DIF channels per frame */ int frame_rate; int frame_rate_base; int ltc_divisor; /* FPS from the LTS standpoint */ @@ -1256,6 +1257,1251 @@ static const uint16_t dv_place_411[1350] = { 0x0834, 0x2320, 0x2f44, 0x3810, 0x1658, }; +/* 4:2:2 macroblock placement tables created by dvtables.py */ + +/* 2 channels per frame, 10 DIF sequences per channel, + 27 video segments per DIF sequence, 5 macroblocks per video segment */ +static const uint16_t dv_place_422_525[2*10*27*5] = { + 0x0c48, 0x2424, 0x306c, 0x0000, 0x1890, + 0x0d48, 0x2524, 0x316c, 0x0100, 0x1990, + 0x0e48, 0x2624, 0x326c, 0x0200, 0x1a90, + 0x0e4c, 0x2628, 0x3270, 0x0204, 0x1a94, + 0x0d4c, 0x2528, 0x3170, 0x0104, 0x1994, + 0x0c4c, 0x2428, 0x3070, 0x0004, 0x1894, + 0x0c50, 0x242c, 0x3074, 0x0008, 0x1898, + 0x0d50, 0x252c, 0x3174, 0x0108, 0x1998, + 0x0e50, 0x262c, 0x3274, 0x0208, 0x1a98, + 0x0e54, 0x2630, 0x3278, 0x020c, 0x1a9c, + 0x0d54, 0x2530, 0x3178, 0x010c, 0x199c, + 0x0c54, 0x2430, 0x3078, 0x000c, 0x189c, + 0x0c58, 0x2434, 0x307c, 0x0010, 0x18a0, + 0x0d58, 0x2534, 0x317c, 0x0110, 0x19a0, + 0x0e58, 0x2634, 0x327c, 0x0210, 0x1aa0, + 0x0e5c, 0x2638, 0x3280, 0x0214, 0x1aa4, + 0x0d5c, 0x2538, 0x3180, 0x0114, 0x19a4, + 0x0c5c, 0x2438, 0x3080, 0x0014, 0x18a4, + 0x0c60, 0x243c, 0x3084, 0x0018, 0x18a8, + 0x0d60, 0x253c, 0x3184, 0x0118, 0x19a8, + 0x0e60, 0x263c, 0x3284, 0x0218, 0x1aa8, + 0x0e64, 0x2640, 0x3288, 0x021c, 0x1aac, + 0x0d64, 0x2540, 0x3188, 0x011c, 0x19ac, + 0x0c64, 0x2440, 0x3088, 0x001c, 0x18ac, + 0x0c68, 0x2444, 0x308c, 0x0020, 0x18b0, + 0x0d68, 0x2544, 0x318c, 0x0120, 0x19b0, + 0x0e68, 0x2644, 0x328c, 0x0220, 0x1ab0, + 0x1248, 0x2a24, 0x366c, 0x0600, 0x1e90, + 0x1348, 0x2b24, 0x376c, 0x0700, 0x1f90, + 0x1448, 0x2c24, 0x386c, 0x0800, 0x2090, + 0x144c, 0x2c28, 0x3870, 0x0804, 0x2094, + 0x134c, 0x2b28, 0x3770, 0x0704, 0x1f94, + 0x124c, 0x2a28, 0x3670, 0x0604, 0x1e94, + 0x1250, 0x2a2c, 0x3674, 0x0608, 0x1e98, + 0x1350, 0x2b2c, 0x3774, 0x0708, 0x1f98, + 0x1450, 0x2c2c, 0x3874, 0x0808, 0x2098, + 0x1454, 0x2c30, 0x3878, 0x080c, 0x209c, + 0x1354, 0x2b30, 0x3778, 0x070c, 0x1f9c, + 0x1254, 0x2a30, 0x3678, 0x060c, 0x1e9c, + 0x1258, 0x2a34, 0x367c, 0x0610, 0x1ea0, + 0x1358, 0x2b34, 0x377c, 0x0710, 0x1fa0, + 0x1458, 0x2c34, 0x387c, 0x0810, 0x20a0, + 0x145c, 0x2c38, 0x3880, 0x0814, 0x20a4, + 0x135c, 0x2b38, 0x3780, 0x0714, 0x1fa4, + 0x125c, 0x2a38, 0x3680, 0x0614, 0x1ea4, + 0x1260, 0x2a3c, 0x3684, 0x0618, 0x1ea8, + 0x1360, 0x2b3c, 0x3784, 0x0718, 0x1fa8, + 0x1460, 0x2c3c, 0x3884, 0x0818, 0x20a8, + 0x1464, 0x2c40, 0x3888, 0x081c, 0x20ac, + 0x1364, 0x2b40, 0x3788, 0x071c, 0x1fac, + 0x1264, 0x2a40, 0x3688, 0x061c, 0x1eac, + 0x1268, 0x2a44, 0x368c, 0x0620, 0x1eb0, + 0x1368, 0x2b44, 0x378c, 0x0720, 0x1fb0, + 0x1468, 0x2c44, 0x388c, 0x0820, 0x20b0, + 0x1848, 0x3024, 0x006c, 0x0c00, 0x2490, + 0x1948, 0x3124, 0x016c, 0x0d00, 0x2590, + 0x1a48, 0x3224, 0x026c, 0x0e00, 0x2690, + 0x1a4c, 0x3228, 0x0270, 0x0e04, 0x2694, + 0x194c, 0x3128, 0x0170, 0x0d04, 0x2594, + 0x184c, 0x3028, 0x0070, 0x0c04, 0x2494, + 0x1850, 0x302c, 0x0074, 0x0c08, 0x2498, + 0x1950, 0x312c, 0x0174, 0x0d08, 0x2598, + 0x1a50, 0x322c, 0x0274, 0x0e08, 0x2698, + 0x1a54, 0x3230, 0x0278, 0x0e0c, 0x269c, + 0x1954, 0x3130, 0x0178, 0x0d0c, 0x259c, + 0x1854, 0x3030, 0x0078, 0x0c0c, 0x249c, + 0x1858, 0x3034, 0x007c, 0x0c10, 0x24a0, + 0x1958, 0x3134, 0x017c, 0x0d10, 0x25a0, + 0x1a58, 0x3234, 0x027c, 0x0e10, 0x26a0, + 0x1a5c, 0x3238, 0x0280, 0x0e14, 0x26a4, + 0x195c, 0x3138, 0x0180, 0x0d14, 0x25a4, + 0x185c, 0x3038, 0x0080, 0x0c14, 0x24a4, + 0x1860, 0x303c, 0x0084, 0x0c18, 0x24a8, + 0x1960, 0x313c, 0x0184, 0x0d18, 0x25a8, + 0x1a60, 0x323c, 0x0284, 0x0e18, 0x26a8, + 0x1a64, 0x3240, 0x0288, 0x0e1c, 0x26ac, + 0x1964, 0x3140, 0x0188, 0x0d1c, 0x25ac, + 0x1864, 0x3040, 0x0088, 0x0c1c, 0x24ac, + 0x1868, 0x3044, 0x008c, 0x0c20, 0x24b0, + 0x1968, 0x3144, 0x018c, 0x0d20, 0x25b0, + 0x1a68, 0x3244, 0x028c, 0x0e20, 0x26b0, + 0x1e48, 0x3624, 0x066c, 0x1200, 0x2a90, + 0x1f48, 0x3724, 0x076c, 0x1300, 0x2b90, + 0x2048, 0x3824, 0x086c, 0x1400, 0x2c90, + 0x204c, 0x3828, 0x0870, 0x1404, 0x2c94, + 0x1f4c, 0x3728, 0x0770, 0x1304, 0x2b94, + 0x1e4c, 0x3628, 0x0670, 0x1204, 0x2a94, + 0x1e50, 0x362c, 0x0674, 0x1208, 0x2a98, + 0x1f50, 0x372c, 0x0774, 0x1308, 0x2b98, + 0x2050, 0x382c, 0x0874, 0x1408, 0x2c98, + 0x2054, 0x3830, 0x0878, 0x140c, 0x2c9c, + 0x1f54, 0x3730, 0x0778, 0x130c, 0x2b9c, + 0x1e54, 0x3630, 0x0678, 0x120c, 0x2a9c, + 0x1e58, 0x3634, 0x067c, 0x1210, 0x2aa0, + 0x1f58, 0x3734, 0x077c, 0x1310, 0x2ba0, + 0x2058, 0x3834, 0x087c, 0x1410, 0x2ca0, + 0x205c, 0x3838, 0x0880, 0x1414, 0x2ca4, + 0x1f5c, 0x3738, 0x0780, 0x1314, 0x2ba4, + 0x1e5c, 0x3638, 0x0680, 0x1214, 0x2aa4, + 0x1e60, 0x363c, 0x0684, 0x1218, 0x2aa8, + 0x1f60, 0x373c, 0x0784, 0x1318, 0x2ba8, + 0x2060, 0x383c, 0x0884, 0x1418, 0x2ca8, + 0x2064, 0x3840, 0x0888, 0x141c, 0x2cac, + 0x1f64, 0x3740, 0x0788, 0x131c, 0x2bac, + 0x1e64, 0x3640, 0x0688, 0x121c, 0x2aac, + 0x1e68, 0x3644, 0x068c, 0x1220, 0x2ab0, + 0x1f68, 0x3744, 0x078c, 0x1320, 0x2bb0, + 0x2068, 0x3844, 0x088c, 0x1420, 0x2cb0, + 0x2448, 0x0024, 0x0c6c, 0x1800, 0x3090, + 0x2548, 0x0124, 0x0d6c, 0x1900, 0x3190, + 0x2648, 0x0224, 0x0e6c, 0x1a00, 0x3290, + 0x264c, 0x0228, 0x0e70, 0x1a04, 0x3294, + 0x254c, 0x0128, 0x0d70, 0x1904, 0x3194, + 0x244c, 0x0028, 0x0c70, 0x1804, 0x3094, + 0x2450, 0x002c, 0x0c74, 0x1808, 0x3098, + 0x2550, 0x012c, 0x0d74, 0x1908, 0x3198, + 0x2650, 0x022c, 0x0e74, 0x1a08, 0x3298, + 0x2654, 0x0230, 0x0e78, 0x1a0c, 0x329c, + 0x2554, 0x0130, 0x0d78, 0x190c, 0x319c, + 0x2454, 0x0030, 0x0c78, 0x180c, 0x309c, + 0x2458, 0x0034, 0x0c7c, 0x1810, 0x30a0, + 0x2558, 0x0134, 0x0d7c, 0x1910, 0x31a0, + 0x2658, 0x0234, 0x0e7c, 0x1a10, 0x32a0, + 0x265c, 0x0238, 0x0e80, 0x1a14, 0x32a4, + 0x255c, 0x0138, 0x0d80, 0x1914, 0x31a4, + 0x245c, 0x0038, 0x0c80, 0x1814, 0x30a4, + 0x2460, 0x003c, 0x0c84, 0x1818, 0x30a8, + 0x2560, 0x013c, 0x0d84, 0x1918, 0x31a8, + 0x2660, 0x023c, 0x0e84, 0x1a18, 0x32a8, + 0x2664, 0x0240, 0x0e88, 0x1a1c, 0x32ac, + 0x2564, 0x0140, 0x0d88, 0x191c, 0x31ac, + 0x2464, 0x0040, 0x0c88, 0x181c, 0x30ac, + 0x2468, 0x0044, 0x0c8c, 0x1820, 0x30b0, + 0x2568, 0x0144, 0x0d8c, 0x1920, 0x31b0, + 0x2668, 0x0244, 0x0e8c, 0x1a20, 0x32b0, + 0x2a48, 0x0624, 0x126c, 0x1e00, 0x3690, + 0x2b48, 0x0724, 0x136c, 0x1f00, 0x3790, + 0x2c48, 0x0824, 0x146c, 0x2000, 0x3890, + 0x2c4c, 0x0828, 0x1470, 0x2004, 0x3894, + 0x2b4c, 0x0728, 0x1370, 0x1f04, 0x3794, + 0x2a4c, 0x0628, 0x1270, 0x1e04, 0x3694, + 0x2a50, 0x062c, 0x1274, 0x1e08, 0x3698, + 0x2b50, 0x072c, 0x1374, 0x1f08, 0x3798, + 0x2c50, 0x082c, 0x1474, 0x2008, 0x3898, + 0x2c54, 0x0830, 0x1478, 0x200c, 0x389c, + 0x2b54, 0x0730, 0x1378, 0x1f0c, 0x379c, + 0x2a54, 0x0630, 0x1278, 0x1e0c, 0x369c, + 0x2a58, 0x0634, 0x127c, 0x1e10, 0x36a0, + 0x2b58, 0x0734, 0x137c, 0x1f10, 0x37a0, + 0x2c58, 0x0834, 0x147c, 0x2010, 0x38a0, + 0x2c5c, 0x0838, 0x1480, 0x2014, 0x38a4, + 0x2b5c, 0x0738, 0x1380, 0x1f14, 0x37a4, + 0x2a5c, 0x0638, 0x1280, 0x1e14, 0x36a4, + 0x2a60, 0x063c, 0x1284, 0x1e18, 0x36a8, + 0x2b60, 0x073c, 0x1384, 0x1f18, 0x37a8, + 0x2c60, 0x083c, 0x1484, 0x2018, 0x38a8, + 0x2c64, 0x0840, 0x1488, 0x201c, 0x38ac, + 0x2b64, 0x0740, 0x1388, 0x1f1c, 0x37ac, + 0x2a64, 0x0640, 0x1288, 0x1e1c, 0x36ac, + 0x2a68, 0x0644, 0x128c, 0x1e20, 0x36b0, + 0x2b68, 0x0744, 0x138c, 0x1f20, 0x37b0, + 0x2c68, 0x0844, 0x148c, 0x2020, 0x38b0, + 0x3048, 0x0c24, 0x186c, 0x2400, 0x0090, + 0x3148, 0x0d24, 0x196c, 0x2500, 0x0190, + 0x3248, 0x0e24, 0x1a6c, 0x2600, 0x0290, + 0x324c, 0x0e28, 0x1a70, 0x2604, 0x0294, + 0x314c, 0x0d28, 0x1970, 0x2504, 0x0194, + 0x304c, 0x0c28, 0x1870, 0x2404, 0x0094, + 0x3050, 0x0c2c, 0x1874, 0x2408, 0x0098, + 0x3150, 0x0d2c, 0x1974, 0x2508, 0x0198, + 0x3250, 0x0e2c, 0x1a74, 0x2608, 0x0298, + 0x3254, 0x0e30, 0x1a78, 0x260c, 0x029c, + 0x3154, 0x0d30, 0x1978, 0x250c, 0x019c, + 0x3054, 0x0c30, 0x1878, 0x240c, 0x009c, + 0x3058, 0x0c34, 0x187c, 0x2410, 0x00a0, + 0x3158, 0x0d34, 0x197c, 0x2510, 0x01a0, + 0x3258, 0x0e34, 0x1a7c, 0x2610, 0x02a0, + 0x325c, 0x0e38, 0x1a80, 0x2614, 0x02a4, + 0x315c, 0x0d38, 0x1980, 0x2514, 0x01a4, + 0x305c, 0x0c38, 0x1880, 0x2414, 0x00a4, + 0x3060, 0x0c3c, 0x1884, 0x2418, 0x00a8, + 0x3160, 0x0d3c, 0x1984, 0x2518, 0x01a8, + 0x3260, 0x0e3c, 0x1a84, 0x2618, 0x02a8, + 0x3264, 0x0e40, 0x1a88, 0x261c, 0x02ac, + 0x3164, 0x0d40, 0x1988, 0x251c, 0x01ac, + 0x3064, 0x0c40, 0x1888, 0x241c, 0x00ac, + 0x3068, 0x0c44, 0x188c, 0x2420, 0x00b0, + 0x3168, 0x0d44, 0x198c, 0x2520, 0x01b0, + 0x3268, 0x0e44, 0x1a8c, 0x2620, 0x02b0, + 0x3648, 0x1224, 0x1e6c, 0x2a00, 0x0690, + 0x3748, 0x1324, 0x1f6c, 0x2b00, 0x0790, + 0x3848, 0x1424, 0x206c, 0x2c00, 0x0890, + 0x384c, 0x1428, 0x2070, 0x2c04, 0x0894, + 0x374c, 0x1328, 0x1f70, 0x2b04, 0x0794, + 0x364c, 0x1228, 0x1e70, 0x2a04, 0x0694, + 0x3650, 0x122c, 0x1e74, 0x2a08, 0x0698, + 0x3750, 0x132c, 0x1f74, 0x2b08, 0x0798, + 0x3850, 0x142c, 0x2074, 0x2c08, 0x0898, + 0x3854, 0x1430, 0x2078, 0x2c0c, 0x089c, + 0x3754, 0x1330, 0x1f78, 0x2b0c, 0x079c, + 0x3654, 0x1230, 0x1e78, 0x2a0c, 0x069c, + 0x3658, 0x1234, 0x1e7c, 0x2a10, 0x06a0, + 0x3758, 0x1334, 0x1f7c, 0x2b10, 0x07a0, + 0x3858, 0x1434, 0x207c, 0x2c10, 0x08a0, + 0x385c, 0x1438, 0x2080, 0x2c14, 0x08a4, + 0x375c, 0x1338, 0x1f80, 0x2b14, 0x07a4, + 0x365c, 0x1238, 0x1e80, 0x2a14, 0x06a4, + 0x3660, 0x123c, 0x1e84, 0x2a18, 0x06a8, + 0x3760, 0x133c, 0x1f84, 0x2b18, 0x07a8, + 0x3860, 0x143c, 0x2084, 0x2c18, 0x08a8, + 0x3864, 0x1440, 0x2088, 0x2c1c, 0x08ac, + 0x3764, 0x1340, 0x1f88, 0x2b1c, 0x07ac, + 0x3664, 0x1240, 0x1e88, 0x2a1c, 0x06ac, + 0x3668, 0x1244, 0x1e8c, 0x2a20, 0x06b0, + 0x3768, 0x1344, 0x1f8c, 0x2b20, 0x07b0, + 0x3868, 0x1444, 0x208c, 0x2c20, 0x08b0, + 0x0048, 0x1824, 0x246c, 0x3000, 0x0c90, + 0x0148, 0x1924, 0x256c, 0x3100, 0x0d90, + 0x0248, 0x1a24, 0x266c, 0x3200, 0x0e90, + 0x024c, 0x1a28, 0x2670, 0x3204, 0x0e94, + 0x014c, 0x1928, 0x2570, 0x3104, 0x0d94, + 0x004c, 0x1828, 0x2470, 0x3004, 0x0c94, + 0x0050, 0x182c, 0x2474, 0x3008, 0x0c98, + 0x0150, 0x192c, 0x2574, 0x3108, 0x0d98, + 0x0250, 0x1a2c, 0x2674, 0x3208, 0x0e98, + 0x0254, 0x1a30, 0x2678, 0x320c, 0x0e9c, + 0x0154, 0x1930, 0x2578, 0x310c, 0x0d9c, + 0x0054, 0x1830, 0x2478, 0x300c, 0x0c9c, + 0x0058, 0x1834, 0x247c, 0x3010, 0x0ca0, + 0x0158, 0x1934, 0x257c, 0x3110, 0x0da0, + 0x0258, 0x1a34, 0x267c, 0x3210, 0x0ea0, + 0x025c, 0x1a38, 0x2680, 0x3214, 0x0ea4, + 0x015c, 0x1938, 0x2580, 0x3114, 0x0da4, + 0x005c, 0x1838, 0x2480, 0x3014, 0x0ca4, + 0x0060, 0x183c, 0x2484, 0x3018, 0x0ca8, + 0x0160, 0x193c, 0x2584, 0x3118, 0x0da8, + 0x0260, 0x1a3c, 0x2684, 0x3218, 0x0ea8, + 0x0264, 0x1a40, 0x2688, 0x321c, 0x0eac, + 0x0164, 0x1940, 0x2588, 0x311c, 0x0dac, + 0x0064, 0x1840, 0x2488, 0x301c, 0x0cac, + 0x0068, 0x1844, 0x248c, 0x3020, 0x0cb0, + 0x0168, 0x1944, 0x258c, 0x3120, 0x0db0, + 0x0268, 0x1a44, 0x268c, 0x3220, 0x0eb0, + 0x0648, 0x1e24, 0x2a6c, 0x3600, 0x1290, + 0x0748, 0x1f24, 0x2b6c, 0x3700, 0x1390, + 0x0848, 0x2024, 0x2c6c, 0x3800, 0x1490, + 0x084c, 0x2028, 0x2c70, 0x3804, 0x1494, + 0x074c, 0x1f28, 0x2b70, 0x3704, 0x1394, + 0x064c, 0x1e28, 0x2a70, 0x3604, 0x1294, + 0x0650, 0x1e2c, 0x2a74, 0x3608, 0x1298, + 0x0750, 0x1f2c, 0x2b74, 0x3708, 0x1398, + 0x0850, 0x202c, 0x2c74, 0x3808, 0x1498, + 0x0854, 0x2030, 0x2c78, 0x380c, 0x149c, + 0x0754, 0x1f30, 0x2b78, 0x370c, 0x139c, + 0x0654, 0x1e30, 0x2a78, 0x360c, 0x129c, + 0x0658, 0x1e34, 0x2a7c, 0x3610, 0x12a0, + 0x0758, 0x1f34, 0x2b7c, 0x3710, 0x13a0, + 0x0858, 0x2034, 0x2c7c, 0x3810, 0x14a0, + 0x085c, 0x2038, 0x2c80, 0x3814, 0x14a4, + 0x075c, 0x1f38, 0x2b80, 0x3714, 0x13a4, + 0x065c, 0x1e38, 0x2a80, 0x3614, 0x12a4, + 0x0660, 0x1e3c, 0x2a84, 0x3618, 0x12a8, + 0x0760, 0x1f3c, 0x2b84, 0x3718, 0x13a8, + 0x0860, 0x203c, 0x2c84, 0x3818, 0x14a8, + 0x0864, 0x2040, 0x2c88, 0x381c, 0x14ac, + 0x0764, 0x1f40, 0x2b88, 0x371c, 0x13ac, + 0x0664, 0x1e40, 0x2a88, 0x361c, 0x12ac, + 0x0668, 0x1e44, 0x2a8c, 0x3620, 0x12b0, + 0x0768, 0x1f44, 0x2b8c, 0x3720, 0x13b0, + 0x0868, 0x2044, 0x2c8c, 0x3820, 0x14b0, + 0x0f48, 0x2724, 0x336c, 0x0300, 0x1b90, + 0x1048, 0x2824, 0x346c, 0x0400, 0x1c90, + 0x1148, 0x2924, 0x356c, 0x0500, 0x1d90, + 0x114c, 0x2928, 0x3570, 0x0504, 0x1d94, + 0x104c, 0x2828, 0x3470, 0x0404, 0x1c94, + 0x0f4c, 0x2728, 0x3370, 0x0304, 0x1b94, + 0x0f50, 0x272c, 0x3374, 0x0308, 0x1b98, + 0x1050, 0x282c, 0x3474, 0x0408, 0x1c98, + 0x1150, 0x292c, 0x3574, 0x0508, 0x1d98, + 0x1154, 0x2930, 0x3578, 0x050c, 0x1d9c, + 0x1054, 0x2830, 0x3478, 0x040c, 0x1c9c, + 0x0f54, 0x2730, 0x3378, 0x030c, 0x1b9c, + 0x0f58, 0x2734, 0x337c, 0x0310, 0x1ba0, + 0x1058, 0x2834, 0x347c, 0x0410, 0x1ca0, + 0x1158, 0x2934, 0x357c, 0x0510, 0x1da0, + 0x115c, 0x2938, 0x3580, 0x0514, 0x1da4, + 0x105c, 0x2838, 0x3480, 0x0414, 0x1ca4, + 0x0f5c, 0x2738, 0x3380, 0x0314, 0x1ba4, + 0x0f60, 0x273c, 0x3384, 0x0318, 0x1ba8, + 0x1060, 0x283c, 0x3484, 0x0418, 0x1ca8, + 0x1160, 0x293c, 0x3584, 0x0518, 0x1da8, + 0x1164, 0x2940, 0x3588, 0x051c, 0x1dac, + 0x1064, 0x2840, 0x3488, 0x041c, 0x1cac, + 0x0f64, 0x2740, 0x3388, 0x031c, 0x1bac, + 0x0f68, 0x2744, 0x338c, 0x0320, 0x1bb0, + 0x1068, 0x2844, 0x348c, 0x0420, 0x1cb0, + 0x1168, 0x2944, 0x358c, 0x0520, 0x1db0, + 0x1548, 0x2d24, 0x396c, 0x0900, 0x2190, + 0x1648, 0x2e24, 0x3a6c, 0x0a00, 0x2290, + 0x1748, 0x2f24, 0x3b6c, 0x0b00, 0x2390, + 0x174c, 0x2f28, 0x3b70, 0x0b04, 0x2394, + 0x164c, 0x2e28, 0x3a70, 0x0a04, 0x2294, + 0x154c, 0x2d28, 0x3970, 0x0904, 0x2194, + 0x1550, 0x2d2c, 0x3974, 0x0908, 0x2198, + 0x1650, 0x2e2c, 0x3a74, 0x0a08, 0x2298, + 0x1750, 0x2f2c, 0x3b74, 0x0b08, 0x2398, + 0x1754, 0x2f30, 0x3b78, 0x0b0c, 0x239c, + 0x1654, 0x2e30, 0x3a78, 0x0a0c, 0x229c, + 0x1554, 0x2d30, 0x3978, 0x090c, 0x219c, + 0x1558, 0x2d34, 0x397c, 0x0910, 0x21a0, + 0x1658, 0x2e34, 0x3a7c, 0x0a10, 0x22a0, + 0x1758, 0x2f34, 0x3b7c, 0x0b10, 0x23a0, + 0x175c, 0x2f38, 0x3b80, 0x0b14, 0x23a4, + 0x165c, 0x2e38, 0x3a80, 0x0a14, 0x22a4, + 0x155c, 0x2d38, 0x3980, 0x0914, 0x21a4, + 0x1560, 0x2d3c, 0x3984, 0x0918, 0x21a8, + 0x1660, 0x2e3c, 0x3a84, 0x0a18, 0x22a8, + 0x1760, 0x2f3c, 0x3b84, 0x0b18, 0x23a8, + 0x1764, 0x2f40, 0x3b88, 0x0b1c, 0x23ac, + 0x1664, 0x2e40, 0x3a88, 0x0a1c, 0x22ac, + 0x1564, 0x2d40, 0x3988, 0x091c, 0x21ac, + 0x1568, 0x2d44, 0x398c, 0x0920, 0x21b0, + 0x1668, 0x2e44, 0x3a8c, 0x0a20, 0x22b0, + 0x1768, 0x2f44, 0x3b8c, 0x0b20, 0x23b0, + 0x1b48, 0x3324, 0x036c, 0x0f00, 0x2790, + 0x1c48, 0x3424, 0x046c, 0x1000, 0x2890, + 0x1d48, 0x3524, 0x056c, 0x1100, 0x2990, + 0x1d4c, 0x3528, 0x0570, 0x1104, 0x2994, + 0x1c4c, 0x3428, 0x0470, 0x1004, 0x2894, + 0x1b4c, 0x3328, 0x0370, 0x0f04, 0x2794, + 0x1b50, 0x332c, 0x0374, 0x0f08, 0x2798, + 0x1c50, 0x342c, 0x0474, 0x1008, 0x2898, + 0x1d50, 0x352c, 0x0574, 0x1108, 0x2998, + 0x1d54, 0x3530, 0x0578, 0x110c, 0x299c, + 0x1c54, 0x3430, 0x0478, 0x100c, 0x289c, + 0x1b54, 0x3330, 0x0378, 0x0f0c, 0x279c, + 0x1b58, 0x3334, 0x037c, 0x0f10, 0x27a0, + 0x1c58, 0x3434, 0x047c, 0x1010, 0x28a0, + 0x1d58, 0x3534, 0x057c, 0x1110, 0x29a0, + 0x1d5c, 0x3538, 0x0580, 0x1114, 0x29a4, + 0x1c5c, 0x3438, 0x0480, 0x1014, 0x28a4, + 0x1b5c, 0x3338, 0x0380, 0x0f14, 0x27a4, + 0x1b60, 0x333c, 0x0384, 0x0f18, 0x27a8, + 0x1c60, 0x343c, 0x0484, 0x1018, 0x28a8, + 0x1d60, 0x353c, 0x0584, 0x1118, 0x29a8, + 0x1d64, 0x3540, 0x0588, 0x111c, 0x29ac, + 0x1c64, 0x3440, 0x0488, 0x101c, 0x28ac, + 0x1b64, 0x3340, 0x0388, 0x0f1c, 0x27ac, + 0x1b68, 0x3344, 0x038c, 0x0f20, 0x27b0, + 0x1c68, 0x3444, 0x048c, 0x1020, 0x28b0, + 0x1d68, 0x3544, 0x058c, 0x1120, 0x29b0, + 0x2148, 0x3924, 0x096c, 0x1500, 0x2d90, + 0x2248, 0x3a24, 0x0a6c, 0x1600, 0x2e90, + 0x2348, 0x3b24, 0x0b6c, 0x1700, 0x2f90, + 0x234c, 0x3b28, 0x0b70, 0x1704, 0x2f94, + 0x224c, 0x3a28, 0x0a70, 0x1604, 0x2e94, + 0x214c, 0x3928, 0x0970, 0x1504, 0x2d94, + 0x2150, 0x392c, 0x0974, 0x1508, 0x2d98, + 0x2250, 0x3a2c, 0x0a74, 0x1608, 0x2e98, + 0x2350, 0x3b2c, 0x0b74, 0x1708, 0x2f98, + 0x2354, 0x3b30, 0x0b78, 0x170c, 0x2f9c, + 0x2254, 0x3a30, 0x0a78, 0x160c, 0x2e9c, + 0x2154, 0x3930, 0x0978, 0x150c, 0x2d9c, + 0x2158, 0x3934, 0x097c, 0x1510, 0x2da0, + 0x2258, 0x3a34, 0x0a7c, 0x1610, 0x2ea0, + 0x2358, 0x3b34, 0x0b7c, 0x1710, 0x2fa0, + 0x235c, 0x3b38, 0x0b80, 0x1714, 0x2fa4, + 0x225c, 0x3a38, 0x0a80, 0x1614, 0x2ea4, + 0x215c, 0x3938, 0x0980, 0x1514, 0x2da4, + 0x2160, 0x393c, 0x0984, 0x1518, 0x2da8, + 0x2260, 0x3a3c, 0x0a84, 0x1618, 0x2ea8, + 0x2360, 0x3b3c, 0x0b84, 0x1718, 0x2fa8, + 0x2364, 0x3b40, 0x0b88, 0x171c, 0x2fac, + 0x2264, 0x3a40, 0x0a88, 0x161c, 0x2eac, + 0x2164, 0x3940, 0x0988, 0x151c, 0x2dac, + 0x2168, 0x3944, 0x098c, 0x1520, 0x2db0, + 0x2268, 0x3a44, 0x0a8c, 0x1620, 0x2eb0, + 0x2368, 0x3b44, 0x0b8c, 0x1720, 0x2fb0, + 0x2748, 0x0324, 0x0f6c, 0x1b00, 0x3390, + 0x2848, 0x0424, 0x106c, 0x1c00, 0x3490, + 0x2948, 0x0524, 0x116c, 0x1d00, 0x3590, + 0x294c, 0x0528, 0x1170, 0x1d04, 0x3594, + 0x284c, 0x0428, 0x1070, 0x1c04, 0x3494, + 0x274c, 0x0328, 0x0f70, 0x1b04, 0x3394, + 0x2750, 0x032c, 0x0f74, 0x1b08, 0x3398, + 0x2850, 0x042c, 0x1074, 0x1c08, 0x3498, + 0x2950, 0x052c, 0x1174, 0x1d08, 0x3598, + 0x2954, 0x0530, 0x1178, 0x1d0c, 0x359c, + 0x2854, 0x0430, 0x1078, 0x1c0c, 0x349c, + 0x2754, 0x0330, 0x0f78, 0x1b0c, 0x339c, + 0x2758, 0x0334, 0x0f7c, 0x1b10, 0x33a0, + 0x2858, 0x0434, 0x107c, 0x1c10, 0x34a0, + 0x2958, 0x0534, 0x117c, 0x1d10, 0x35a0, + 0x295c, 0x0538, 0x1180, 0x1d14, 0x35a4, + 0x285c, 0x0438, 0x1080, 0x1c14, 0x34a4, + 0x275c, 0x0338, 0x0f80, 0x1b14, 0x33a4, + 0x2760, 0x033c, 0x0f84, 0x1b18, 0x33a8, + 0x2860, 0x043c, 0x1084, 0x1c18, 0x34a8, + 0x2960, 0x053c, 0x1184, 0x1d18, 0x35a8, + 0x2964, 0x0540, 0x1188, 0x1d1c, 0x35ac, + 0x2864, 0x0440, 0x1088, 0x1c1c, 0x34ac, + 0x2764, 0x0340, 0x0f88, 0x1b1c, 0x33ac, + 0x2768, 0x0344, 0x0f8c, 0x1b20, 0x33b0, + 0x2868, 0x0444, 0x108c, 0x1c20, 0x34b0, + 0x2968, 0x0544, 0x118c, 0x1d20, 0x35b0, + 0x2d48, 0x0924, 0x156c, 0x2100, 0x3990, + 0x2e48, 0x0a24, 0x166c, 0x2200, 0x3a90, + 0x2f48, 0x0b24, 0x176c, 0x2300, 0x3b90, + 0x2f4c, 0x0b28, 0x1770, 0x2304, 0x3b94, + 0x2e4c, 0x0a28, 0x1670, 0x2204, 0x3a94, + 0x2d4c, 0x0928, 0x1570, 0x2104, 0x3994, + 0x2d50, 0x092c, 0x1574, 0x2108, 0x3998, + 0x2e50, 0x0a2c, 0x1674, 0x2208, 0x3a98, + 0x2f50, 0x0b2c, 0x1774, 0x2308, 0x3b98, + 0x2f54, 0x0b30, 0x1778, 0x230c, 0x3b9c, + 0x2e54, 0x0a30, 0x1678, 0x220c, 0x3a9c, + 0x2d54, 0x0930, 0x1578, 0x210c, 0x399c, + 0x2d58, 0x0934, 0x157c, 0x2110, 0x39a0, + 0x2e58, 0x0a34, 0x167c, 0x2210, 0x3aa0, + 0x2f58, 0x0b34, 0x177c, 0x2310, 0x3ba0, + 0x2f5c, 0x0b38, 0x1780, 0x2314, 0x3ba4, + 0x2e5c, 0x0a38, 0x1680, 0x2214, 0x3aa4, + 0x2d5c, 0x0938, 0x1580, 0x2114, 0x39a4, + 0x2d60, 0x093c, 0x1584, 0x2118, 0x39a8, + 0x2e60, 0x0a3c, 0x1684, 0x2218, 0x3aa8, + 0x2f60, 0x0b3c, 0x1784, 0x2318, 0x3ba8, + 0x2f64, 0x0b40, 0x1788, 0x231c, 0x3bac, + 0x2e64, 0x0a40, 0x1688, 0x221c, 0x3aac, + 0x2d64, 0x0940, 0x1588, 0x211c, 0x39ac, + 0x2d68, 0x0944, 0x158c, 0x2120, 0x39b0, + 0x2e68, 0x0a44, 0x168c, 0x2220, 0x3ab0, + 0x2f68, 0x0b44, 0x178c, 0x2320, 0x3bb0, + 0x3348, 0x0f24, 0x1b6c, 0x2700, 0x0390, + 0x3448, 0x1024, 0x1c6c, 0x2800, 0x0490, + 0x3548, 0x1124, 0x1d6c, 0x2900, 0x0590, + 0x354c, 0x1128, 0x1d70, 0x2904, 0x0594, + 0x344c, 0x1028, 0x1c70, 0x2804, 0x0494, + 0x334c, 0x0f28, 0x1b70, 0x2704, 0x0394, + 0x3350, 0x0f2c, 0x1b74, 0x2708, 0x0398, + 0x3450, 0x102c, 0x1c74, 0x2808, 0x0498, + 0x3550, 0x112c, 0x1d74, 0x2908, 0x0598, + 0x3554, 0x1130, 0x1d78, 0x290c, 0x059c, + 0x3454, 0x1030, 0x1c78, 0x280c, 0x049c, + 0x3354, 0x0f30, 0x1b78, 0x270c, 0x039c, + 0x3358, 0x0f34, 0x1b7c, 0x2710, 0x03a0, + 0x3458, 0x1034, 0x1c7c, 0x2810, 0x04a0, + 0x3558, 0x1134, 0x1d7c, 0x2910, 0x05a0, + 0x355c, 0x1138, 0x1d80, 0x2914, 0x05a4, + 0x345c, 0x1038, 0x1c80, 0x2814, 0x04a4, + 0x335c, 0x0f38, 0x1b80, 0x2714, 0x03a4, + 0x3360, 0x0f3c, 0x1b84, 0x2718, 0x03a8, + 0x3460, 0x103c, 0x1c84, 0x2818, 0x04a8, + 0x3560, 0x113c, 0x1d84, 0x2918, 0x05a8, + 0x3564, 0x1140, 0x1d88, 0x291c, 0x05ac, + 0x3464, 0x1040, 0x1c88, 0x281c, 0x04ac, + 0x3364, 0x0f40, 0x1b88, 0x271c, 0x03ac, + 0x3368, 0x0f44, 0x1b8c, 0x2720, 0x03b0, + 0x3468, 0x1044, 0x1c8c, 0x2820, 0x04b0, + 0x3568, 0x1144, 0x1d8c, 0x2920, 0x05b0, + 0x3948, 0x1524, 0x216c, 0x2d00, 0x0990, + 0x3a48, 0x1624, 0x226c, 0x2e00, 0x0a90, + 0x3b48, 0x1724, 0x236c, 0x2f00, 0x0b90, + 0x3b4c, 0x1728, 0x2370, 0x2f04, 0x0b94, + 0x3a4c, 0x1628, 0x2270, 0x2e04, 0x0a94, + 0x394c, 0x1528, 0x2170, 0x2d04, 0x0994, + 0x3950, 0x152c, 0x2174, 0x2d08, 0x0998, + 0x3a50, 0x162c, 0x2274, 0x2e08, 0x0a98, + 0x3b50, 0x172c, 0x2374, 0x2f08, 0x0b98, + 0x3b54, 0x1730, 0x2378, 0x2f0c, 0x0b9c, + 0x3a54, 0x1630, 0x2278, 0x2e0c, 0x0a9c, + 0x3954, 0x1530, 0x2178, 0x2d0c, 0x099c, + 0x3958, 0x1534, 0x217c, 0x2d10, 0x09a0, + 0x3a58, 0x1634, 0x227c, 0x2e10, 0x0aa0, + 0x3b58, 0x1734, 0x237c, 0x2f10, 0x0ba0, + 0x3b5c, 0x1738, 0x2380, 0x2f14, 0x0ba4, + 0x3a5c, 0x1638, 0x2280, 0x2e14, 0x0aa4, + 0x395c, 0x1538, 0x2180, 0x2d14, 0x09a4, + 0x3960, 0x153c, 0x2184, 0x2d18, 0x09a8, + 0x3a60, 0x163c, 0x2284, 0x2e18, 0x0aa8, + 0x3b60, 0x173c, 0x2384, 0x2f18, 0x0ba8, + 0x3b64, 0x1740, 0x2388, 0x2f1c, 0x0bac, + 0x3a64, 0x1640, 0x2288, 0x2e1c, 0x0aac, + 0x3964, 0x1540, 0x2188, 0x2d1c, 0x09ac, + 0x3968, 0x1544, 0x218c, 0x2d20, 0x09b0, + 0x3a68, 0x1644, 0x228c, 0x2e20, 0x0ab0, + 0x3b68, 0x1744, 0x238c, 0x2f20, 0x0bb0, + 0x0348, 0x1b24, 0x276c, 0x3300, 0x0f90, + 0x0448, 0x1c24, 0x286c, 0x3400, 0x1090, + 0x0548, 0x1d24, 0x296c, 0x3500, 0x1190, + 0x054c, 0x1d28, 0x2970, 0x3504, 0x1194, + 0x044c, 0x1c28, 0x2870, 0x3404, 0x1094, + 0x034c, 0x1b28, 0x2770, 0x3304, 0x0f94, + 0x0350, 0x1b2c, 0x2774, 0x3308, 0x0f98, + 0x0450, 0x1c2c, 0x2874, 0x3408, 0x1098, + 0x0550, 0x1d2c, 0x2974, 0x3508, 0x1198, + 0x0554, 0x1d30, 0x2978, 0x350c, 0x119c, + 0x0454, 0x1c30, 0x2878, 0x340c, 0x109c, + 0x0354, 0x1b30, 0x2778, 0x330c, 0x0f9c, + 0x0358, 0x1b34, 0x277c, 0x3310, 0x0fa0, + 0x0458, 0x1c34, 0x287c, 0x3410, 0x10a0, + 0x0558, 0x1d34, 0x297c, 0x3510, 0x11a0, + 0x055c, 0x1d38, 0x2980, 0x3514, 0x11a4, + 0x045c, 0x1c38, 0x2880, 0x3414, 0x10a4, + 0x035c, 0x1b38, 0x2780, 0x3314, 0x0fa4, + 0x0360, 0x1b3c, 0x2784, 0x3318, 0x0fa8, + 0x0460, 0x1c3c, 0x2884, 0x3418, 0x10a8, + 0x0560, 0x1d3c, 0x2984, 0x3518, 0x11a8, + 0x0564, 0x1d40, 0x2988, 0x351c, 0x11ac, + 0x0464, 0x1c40, 0x2888, 0x341c, 0x10ac, + 0x0364, 0x1b40, 0x2788, 0x331c, 0x0fac, + 0x0368, 0x1b44, 0x278c, 0x3320, 0x0fb0, + 0x0468, 0x1c44, 0x288c, 0x3420, 0x10b0, + 0x0568, 0x1d44, 0x298c, 0x3520, 0x11b0, + 0x0948, 0x2124, 0x2d6c, 0x3900, 0x1590, + 0x0a48, 0x2224, 0x2e6c, 0x3a00, 0x1690, + 0x0b48, 0x2324, 0x2f6c, 0x3b00, 0x1790, + 0x0b4c, 0x2328, 0x2f70, 0x3b04, 0x1794, + 0x0a4c, 0x2228, 0x2e70, 0x3a04, 0x1694, + 0x094c, 0x2128, 0x2d70, 0x3904, 0x1594, + 0x0950, 0x212c, 0x2d74, 0x3908, 0x1598, + 0x0a50, 0x222c, 0x2e74, 0x3a08, 0x1698, + 0x0b50, 0x232c, 0x2f74, 0x3b08, 0x1798, + 0x0b54, 0x2330, 0x2f78, 0x3b0c, 0x179c, + 0x0a54, 0x2230, 0x2e78, 0x3a0c, 0x169c, + 0x0954, 0x2130, 0x2d78, 0x390c, 0x159c, + 0x0958, 0x2134, 0x2d7c, 0x3910, 0x15a0, + 0x0a58, 0x2234, 0x2e7c, 0x3a10, 0x16a0, + 0x0b58, 0x2334, 0x2f7c, 0x3b10, 0x17a0, + 0x0b5c, 0x2338, 0x2f80, 0x3b14, 0x17a4, + 0x0a5c, 0x2238, 0x2e80, 0x3a14, 0x16a4, + 0x095c, 0x2138, 0x2d80, 0x3914, 0x15a4, + 0x0960, 0x213c, 0x2d84, 0x3918, 0x15a8, + 0x0a60, 0x223c, 0x2e84, 0x3a18, 0x16a8, + 0x0b60, 0x233c, 0x2f84, 0x3b18, 0x17a8, + 0x0b64, 0x2340, 0x2f88, 0x3b1c, 0x17ac, + 0x0a64, 0x2240, 0x2e88, 0x3a1c, 0x16ac, + 0x0964, 0x2140, 0x2d88, 0x391c, 0x15ac, + 0x0968, 0x2144, 0x2d8c, 0x3920, 0x15b0, + 0x0a68, 0x2244, 0x2e8c, 0x3a20, 0x16b0, + 0x0b68, 0x2344, 0x2f8c, 0x3b20, 0x17b0, +}; + +/* 2 channels per frame, 12 DIF sequences per channel, + 27 video segments per DIF sequence, 5 macroblocks per video segment */ +static const uint16_t dv_place_422_625[2*12*27*5] = { + 0x0c48, 0x2424, 0x306c, 0x0000, 0x1890, + 0x0d48, 0x2524, 0x316c, 0x0100, 0x1990, + 0x0e48, 0x2624, 0x326c, 0x0200, 0x1a90, + 0x0e4c, 0x2628, 0x3270, 0x0204, 0x1a94, + 0x0d4c, 0x2528, 0x3170, 0x0104, 0x1994, + 0x0c4c, 0x2428, 0x3070, 0x0004, 0x1894, + 0x0c50, 0x242c, 0x3074, 0x0008, 0x1898, + 0x0d50, 0x252c, 0x3174, 0x0108, 0x1998, + 0x0e50, 0x262c, 0x3274, 0x0208, 0x1a98, + 0x0e54, 0x2630, 0x3278, 0x020c, 0x1a9c, + 0x0d54, 0x2530, 0x3178, 0x010c, 0x199c, + 0x0c54, 0x2430, 0x3078, 0x000c, 0x189c, + 0x0c58, 0x2434, 0x307c, 0x0010, 0x18a0, + 0x0d58, 0x2534, 0x317c, 0x0110, 0x19a0, + 0x0e58, 0x2634, 0x327c, 0x0210, 0x1aa0, + 0x0e5c, 0x2638, 0x3280, 0x0214, 0x1aa4, + 0x0d5c, 0x2538, 0x3180, 0x0114, 0x19a4, + 0x0c5c, 0x2438, 0x3080, 0x0014, 0x18a4, + 0x0c60, 0x243c, 0x3084, 0x0018, 0x18a8, + 0x0d60, 0x253c, 0x3184, 0x0118, 0x19a8, + 0x0e60, 0x263c, 0x3284, 0x0218, 0x1aa8, + 0x0e64, 0x2640, 0x3288, 0x021c, 0x1aac, + 0x0d64, 0x2540, 0x3188, 0x011c, 0x19ac, + 0x0c64, 0x2440, 0x3088, 0x001c, 0x18ac, + 0x0c68, 0x2444, 0x308c, 0x0020, 0x18b0, + 0x0d68, 0x2544, 0x318c, 0x0120, 0x19b0, + 0x0e68, 0x2644, 0x328c, 0x0220, 0x1ab0, + 0x1248, 0x2a24, 0x366c, 0x0600, 0x1e90, + 0x1348, 0x2b24, 0x376c, 0x0700, 0x1f90, + 0x1448, 0x2c24, 0x386c, 0x0800, 0x2090, + 0x144c, 0x2c28, 0x3870, 0x0804, 0x2094, + 0x134c, 0x2b28, 0x3770, 0x0704, 0x1f94, + 0x124c, 0x2a28, 0x3670, 0x0604, 0x1e94, + 0x1250, 0x2a2c, 0x3674, 0x0608, 0x1e98, + 0x1350, 0x2b2c, 0x3774, 0x0708, 0x1f98, + 0x1450, 0x2c2c, 0x3874, 0x0808, 0x2098, + 0x1454, 0x2c30, 0x3878, 0x080c, 0x209c, + 0x1354, 0x2b30, 0x3778, 0x070c, 0x1f9c, + 0x1254, 0x2a30, 0x3678, 0x060c, 0x1e9c, + 0x1258, 0x2a34, 0x367c, 0x0610, 0x1ea0, + 0x1358, 0x2b34, 0x377c, 0x0710, 0x1fa0, + 0x1458, 0x2c34, 0x387c, 0x0810, 0x20a0, + 0x145c, 0x2c38, 0x3880, 0x0814, 0x20a4, + 0x135c, 0x2b38, 0x3780, 0x0714, 0x1fa4, + 0x125c, 0x2a38, 0x3680, 0x0614, 0x1ea4, + 0x1260, 0x2a3c, 0x3684, 0x0618, 0x1ea8, + 0x1360, 0x2b3c, 0x3784, 0x0718, 0x1fa8, + 0x1460, 0x2c3c, 0x3884, 0x0818, 0x20a8, + 0x1464, 0x2c40, 0x3888, 0x081c, 0x20ac, + 0x1364, 0x2b40, 0x3788, 0x071c, 0x1fac, + 0x1264, 0x2a40, 0x3688, 0x061c, 0x1eac, + 0x1268, 0x2a44, 0x368c, 0x0620, 0x1eb0, + 0x1368, 0x2b44, 0x378c, 0x0720, 0x1fb0, + 0x1468, 0x2c44, 0x388c, 0x0820, 0x20b0, + 0x1848, 0x3024, 0x3c6c, 0x0c00, 0x2490, + 0x1948, 0x3124, 0x3d6c, 0x0d00, 0x2590, + 0x1a48, 0x3224, 0x3e6c, 0x0e00, 0x2690, + 0x1a4c, 0x3228, 0x3e70, 0x0e04, 0x2694, + 0x194c, 0x3128, 0x3d70, 0x0d04, 0x2594, + 0x184c, 0x3028, 0x3c70, 0x0c04, 0x2494, + 0x1850, 0x302c, 0x3c74, 0x0c08, 0x2498, + 0x1950, 0x312c, 0x3d74, 0x0d08, 0x2598, + 0x1a50, 0x322c, 0x3e74, 0x0e08, 0x2698, + 0x1a54, 0x3230, 0x3e78, 0x0e0c, 0x269c, + 0x1954, 0x3130, 0x3d78, 0x0d0c, 0x259c, + 0x1854, 0x3030, 0x3c78, 0x0c0c, 0x249c, + 0x1858, 0x3034, 0x3c7c, 0x0c10, 0x24a0, + 0x1958, 0x3134, 0x3d7c, 0x0d10, 0x25a0, + 0x1a58, 0x3234, 0x3e7c, 0x0e10, 0x26a0, + 0x1a5c, 0x3238, 0x3e80, 0x0e14, 0x26a4, + 0x195c, 0x3138, 0x3d80, 0x0d14, 0x25a4, + 0x185c, 0x3038, 0x3c80, 0x0c14, 0x24a4, + 0x1860, 0x303c, 0x3c84, 0x0c18, 0x24a8, + 0x1960, 0x313c, 0x3d84, 0x0d18, 0x25a8, + 0x1a60, 0x323c, 0x3e84, 0x0e18, 0x26a8, + 0x1a64, 0x3240, 0x3e88, 0x0e1c, 0x26ac, + 0x1964, 0x3140, 0x3d88, 0x0d1c, 0x25ac, + 0x1864, 0x3040, 0x3c88, 0x0c1c, 0x24ac, + 0x1868, 0x3044, 0x3c8c, 0x0c20, 0x24b0, + 0x1968, 0x3144, 0x3d8c, 0x0d20, 0x25b0, + 0x1a68, 0x3244, 0x3e8c, 0x0e20, 0x26b0, + 0x1e48, 0x3624, 0x426c, 0x1200, 0x2a90, + 0x1f48, 0x3724, 0x436c, 0x1300, 0x2b90, + 0x2048, 0x3824, 0x446c, 0x1400, 0x2c90, + 0x204c, 0x3828, 0x4470, 0x1404, 0x2c94, + 0x1f4c, 0x3728, 0x4370, 0x1304, 0x2b94, + 0x1e4c, 0x3628, 0x4270, 0x1204, 0x2a94, + 0x1e50, 0x362c, 0x4274, 0x1208, 0x2a98, + 0x1f50, 0x372c, 0x4374, 0x1308, 0x2b98, + 0x2050, 0x382c, 0x4474, 0x1408, 0x2c98, + 0x2054, 0x3830, 0x4478, 0x140c, 0x2c9c, + 0x1f54, 0x3730, 0x4378, 0x130c, 0x2b9c, + 0x1e54, 0x3630, 0x4278, 0x120c, 0x2a9c, + 0x1e58, 0x3634, 0x427c, 0x1210, 0x2aa0, + 0x1f58, 0x3734, 0x437c, 0x1310, 0x2ba0, + 0x2058, 0x3834, 0x447c, 0x1410, 0x2ca0, + 0x205c, 0x3838, 0x4480, 0x1414, 0x2ca4, + 0x1f5c, 0x3738, 0x4380, 0x1314, 0x2ba4, + 0x1e5c, 0x3638, 0x4280, 0x1214, 0x2aa4, + 0x1e60, 0x363c, 0x4284, 0x1218, 0x2aa8, + 0x1f60, 0x373c, 0x4384, 0x1318, 0x2ba8, + 0x2060, 0x383c, 0x4484, 0x1418, 0x2ca8, + 0x2064, 0x3840, 0x4488, 0x141c, 0x2cac, + 0x1f64, 0x3740, 0x4388, 0x131c, 0x2bac, + 0x1e64, 0x3640, 0x4288, 0x121c, 0x2aac, + 0x1e68, 0x3644, 0x428c, 0x1220, 0x2ab0, + 0x1f68, 0x3744, 0x438c, 0x1320, 0x2bb0, + 0x2068, 0x3844, 0x448c, 0x1420, 0x2cb0, + 0x2448, 0x3c24, 0x006c, 0x1800, 0x3090, + 0x2548, 0x3d24, 0x016c, 0x1900, 0x3190, + 0x2648, 0x3e24, 0x026c, 0x1a00, 0x3290, + 0x264c, 0x3e28, 0x0270, 0x1a04, 0x3294, + 0x254c, 0x3d28, 0x0170, 0x1904, 0x3194, + 0x244c, 0x3c28, 0x0070, 0x1804, 0x3094, + 0x2450, 0x3c2c, 0x0074, 0x1808, 0x3098, + 0x2550, 0x3d2c, 0x0174, 0x1908, 0x3198, + 0x2650, 0x3e2c, 0x0274, 0x1a08, 0x3298, + 0x2654, 0x3e30, 0x0278, 0x1a0c, 0x329c, + 0x2554, 0x3d30, 0x0178, 0x190c, 0x319c, + 0x2454, 0x3c30, 0x0078, 0x180c, 0x309c, + 0x2458, 0x3c34, 0x007c, 0x1810, 0x30a0, + 0x2558, 0x3d34, 0x017c, 0x1910, 0x31a0, + 0x2658, 0x3e34, 0x027c, 0x1a10, 0x32a0, + 0x265c, 0x3e38, 0x0280, 0x1a14, 0x32a4, + 0x255c, 0x3d38, 0x0180, 0x1914, 0x31a4, + 0x245c, 0x3c38, 0x0080, 0x1814, 0x30a4, + 0x2460, 0x3c3c, 0x0084, 0x1818, 0x30a8, + 0x2560, 0x3d3c, 0x0184, 0x1918, 0x31a8, + 0x2660, 0x3e3c, 0x0284, 0x1a18, 0x32a8, + 0x2664, 0x3e40, 0x0288, 0x1a1c, 0x32ac, + 0x2564, 0x3d40, 0x0188, 0x191c, 0x31ac, + 0x2464, 0x3c40, 0x0088, 0x181c, 0x30ac, + 0x2468, 0x3c44, 0x008c, 0x1820, 0x30b0, + 0x2568, 0x3d44, 0x018c, 0x1920, 0x31b0, + 0x2668, 0x3e44, 0x028c, 0x1a20, 0x32b0, + 0x2a48, 0x4224, 0x066c, 0x1e00, 0x3690, + 0x2b48, 0x4324, 0x076c, 0x1f00, 0x3790, + 0x2c48, 0x4424, 0x086c, 0x2000, 0x3890, + 0x2c4c, 0x4428, 0x0870, 0x2004, 0x3894, + 0x2b4c, 0x4328, 0x0770, 0x1f04, 0x3794, + 0x2a4c, 0x4228, 0x0670, 0x1e04, 0x3694, + 0x2a50, 0x422c, 0x0674, 0x1e08, 0x3698, + 0x2b50, 0x432c, 0x0774, 0x1f08, 0x3798, + 0x2c50, 0x442c, 0x0874, 0x2008, 0x3898, + 0x2c54, 0x4430, 0x0878, 0x200c, 0x389c, + 0x2b54, 0x4330, 0x0778, 0x1f0c, 0x379c, + 0x2a54, 0x4230, 0x0678, 0x1e0c, 0x369c, + 0x2a58, 0x4234, 0x067c, 0x1e10, 0x36a0, + 0x2b58, 0x4334, 0x077c, 0x1f10, 0x37a0, + 0x2c58, 0x4434, 0x087c, 0x2010, 0x38a0, + 0x2c5c, 0x4438, 0x0880, 0x2014, 0x38a4, + 0x2b5c, 0x4338, 0x0780, 0x1f14, 0x37a4, + 0x2a5c, 0x4238, 0x0680, 0x1e14, 0x36a4, + 0x2a60, 0x423c, 0x0684, 0x1e18, 0x36a8, + 0x2b60, 0x433c, 0x0784, 0x1f18, 0x37a8, + 0x2c60, 0x443c, 0x0884, 0x2018, 0x38a8, + 0x2c64, 0x4440, 0x0888, 0x201c, 0x38ac, + 0x2b64, 0x4340, 0x0788, 0x1f1c, 0x37ac, + 0x2a64, 0x4240, 0x0688, 0x1e1c, 0x36ac, + 0x2a68, 0x4244, 0x068c, 0x1e20, 0x36b0, + 0x2b68, 0x4344, 0x078c, 0x1f20, 0x37b0, + 0x2c68, 0x4444, 0x088c, 0x2020, 0x38b0, + 0x3048, 0x0024, 0x0c6c, 0x2400, 0x3c90, + 0x3148, 0x0124, 0x0d6c, 0x2500, 0x3d90, + 0x3248, 0x0224, 0x0e6c, 0x2600, 0x3e90, + 0x324c, 0x0228, 0x0e70, 0x2604, 0x3e94, + 0x314c, 0x0128, 0x0d70, 0x2504, 0x3d94, + 0x304c, 0x0028, 0x0c70, 0x2404, 0x3c94, + 0x3050, 0x002c, 0x0c74, 0x2408, 0x3c98, + 0x3150, 0x012c, 0x0d74, 0x2508, 0x3d98, + 0x3250, 0x022c, 0x0e74, 0x2608, 0x3e98, + 0x3254, 0x0230, 0x0e78, 0x260c, 0x3e9c, + 0x3154, 0x0130, 0x0d78, 0x250c, 0x3d9c, + 0x3054, 0x0030, 0x0c78, 0x240c, 0x3c9c, + 0x3058, 0x0034, 0x0c7c, 0x2410, 0x3ca0, + 0x3158, 0x0134, 0x0d7c, 0x2510, 0x3da0, + 0x3258, 0x0234, 0x0e7c, 0x2610, 0x3ea0, + 0x325c, 0x0238, 0x0e80, 0x2614, 0x3ea4, + 0x315c, 0x0138, 0x0d80, 0x2514, 0x3da4, + 0x305c, 0x0038, 0x0c80, 0x2414, 0x3ca4, + 0x3060, 0x003c, 0x0c84, 0x2418, 0x3ca8, + 0x3160, 0x013c, 0x0d84, 0x2518, 0x3da8, + 0x3260, 0x023c, 0x0e84, 0x2618, 0x3ea8, + 0x3264, 0x0240, 0x0e88, 0x261c, 0x3eac, + 0x3164, 0x0140, 0x0d88, 0x251c, 0x3dac, + 0x3064, 0x0040, 0x0c88, 0x241c, 0x3cac, + 0x3068, 0x0044, 0x0c8c, 0x2420, 0x3cb0, + 0x3168, 0x0144, 0x0d8c, 0x2520, 0x3db0, + 0x3268, 0x0244, 0x0e8c, 0x2620, 0x3eb0, + 0x3648, 0x0624, 0x126c, 0x2a00, 0x4290, + 0x3748, 0x0724, 0x136c, 0x2b00, 0x4390, + 0x3848, 0x0824, 0x146c, 0x2c00, 0x4490, + 0x384c, 0x0828, 0x1470, 0x2c04, 0x4494, + 0x374c, 0x0728, 0x1370, 0x2b04, 0x4394, + 0x364c, 0x0628, 0x1270, 0x2a04, 0x4294, + 0x3650, 0x062c, 0x1274, 0x2a08, 0x4298, + 0x3750, 0x072c, 0x1374, 0x2b08, 0x4398, + 0x3850, 0x082c, 0x1474, 0x2c08, 0x4498, + 0x3854, 0x0830, 0x1478, 0x2c0c, 0x449c, + 0x3754, 0x0730, 0x1378, 0x2b0c, 0x439c, + 0x3654, 0x0630, 0x1278, 0x2a0c, 0x429c, + 0x3658, 0x0634, 0x127c, 0x2a10, 0x42a0, + 0x3758, 0x0734, 0x137c, 0x2b10, 0x43a0, + 0x3858, 0x0834, 0x147c, 0x2c10, 0x44a0, + 0x385c, 0x0838, 0x1480, 0x2c14, 0x44a4, + 0x375c, 0x0738, 0x1380, 0x2b14, 0x43a4, + 0x365c, 0x0638, 0x1280, 0x2a14, 0x42a4, + 0x3660, 0x063c, 0x1284, 0x2a18, 0x42a8, + 0x3760, 0x073c, 0x1384, 0x2b18, 0x43a8, + 0x3860, 0x083c, 0x1484, 0x2c18, 0x44a8, + 0x3864, 0x0840, 0x1488, 0x2c1c, 0x44ac, + 0x3764, 0x0740, 0x1388, 0x2b1c, 0x43ac, + 0x3664, 0x0640, 0x1288, 0x2a1c, 0x42ac, + 0x3668, 0x0644, 0x128c, 0x2a20, 0x42b0, + 0x3768, 0x0744, 0x138c, 0x2b20, 0x43b0, + 0x3868, 0x0844, 0x148c, 0x2c20, 0x44b0, + 0x3c48, 0x0c24, 0x186c, 0x3000, 0x0090, + 0x3d48, 0x0d24, 0x196c, 0x3100, 0x0190, + 0x3e48, 0x0e24, 0x1a6c, 0x3200, 0x0290, + 0x3e4c, 0x0e28, 0x1a70, 0x3204, 0x0294, + 0x3d4c, 0x0d28, 0x1970, 0x3104, 0x0194, + 0x3c4c, 0x0c28, 0x1870, 0x3004, 0x0094, + 0x3c50, 0x0c2c, 0x1874, 0x3008, 0x0098, + 0x3d50, 0x0d2c, 0x1974, 0x3108, 0x0198, + 0x3e50, 0x0e2c, 0x1a74, 0x3208, 0x0298, + 0x3e54, 0x0e30, 0x1a78, 0x320c, 0x029c, + 0x3d54, 0x0d30, 0x1978, 0x310c, 0x019c, + 0x3c54, 0x0c30, 0x1878, 0x300c, 0x009c, + 0x3c58, 0x0c34, 0x187c, 0x3010, 0x00a0, + 0x3d58, 0x0d34, 0x197c, 0x3110, 0x01a0, + 0x3e58, 0x0e34, 0x1a7c, 0x3210, 0x02a0, + 0x3e5c, 0x0e38, 0x1a80, 0x3214, 0x02a4, + 0x3d5c, 0x0d38, 0x1980, 0x3114, 0x01a4, + 0x3c5c, 0x0c38, 0x1880, 0x3014, 0x00a4, + 0x3c60, 0x0c3c, 0x1884, 0x3018, 0x00a8, + 0x3d60, 0x0d3c, 0x1984, 0x3118, 0x01a8, + 0x3e60, 0x0e3c, 0x1a84, 0x3218, 0x02a8, + 0x3e64, 0x0e40, 0x1a88, 0x321c, 0x02ac, + 0x3d64, 0x0d40, 0x1988, 0x311c, 0x01ac, + 0x3c64, 0x0c40, 0x1888, 0x301c, 0x00ac, + 0x3c68, 0x0c44, 0x188c, 0x3020, 0x00b0, + 0x3d68, 0x0d44, 0x198c, 0x3120, 0x01b0, + 0x3e68, 0x0e44, 0x1a8c, 0x3220, 0x02b0, + 0x4248, 0x1224, 0x1e6c, 0x3600, 0x0690, + 0x4348, 0x1324, 0x1f6c, 0x3700, 0x0790, + 0x4448, 0x1424, 0x206c, 0x3800, 0x0890, + 0x444c, 0x1428, 0x2070, 0x3804, 0x0894, + 0x434c, 0x1328, 0x1f70, 0x3704, 0x0794, + 0x424c, 0x1228, 0x1e70, 0x3604, 0x0694, + 0x4250, 0x122c, 0x1e74, 0x3608, 0x0698, + 0x4350, 0x132c, 0x1f74, 0x3708, 0x0798, + 0x4450, 0x142c, 0x2074, 0x3808, 0x0898, + 0x4454, 0x1430, 0x2078, 0x380c, 0x089c, + 0x4354, 0x1330, 0x1f78, 0x370c, 0x079c, + 0x4254, 0x1230, 0x1e78, 0x360c, 0x069c, + 0x4258, 0x1234, 0x1e7c, 0x3610, 0x06a0, + 0x4358, 0x1334, 0x1f7c, 0x3710, 0x07a0, + 0x4458, 0x1434, 0x207c, 0x3810, 0x08a0, + 0x445c, 0x1438, 0x2080, 0x3814, 0x08a4, + 0x435c, 0x1338, 0x1f80, 0x3714, 0x07a4, + 0x425c, 0x1238, 0x1e80, 0x3614, 0x06a4, + 0x4260, 0x123c, 0x1e84, 0x3618, 0x06a8, + 0x4360, 0x133c, 0x1f84, 0x3718, 0x07a8, + 0x4460, 0x143c, 0x2084, 0x3818, 0x08a8, + 0x4464, 0x1440, 0x2088, 0x381c, 0x08ac, + 0x4364, 0x1340, 0x1f88, 0x371c, 0x07ac, + 0x4264, 0x1240, 0x1e88, 0x361c, 0x06ac, + 0x4268, 0x1244, 0x1e8c, 0x3620, 0x06b0, + 0x4368, 0x1344, 0x1f8c, 0x3720, 0x07b0, + 0x4468, 0x1444, 0x208c, 0x3820, 0x08b0, + 0x0048, 0x1824, 0x246c, 0x3c00, 0x0c90, + 0x0148, 0x1924, 0x256c, 0x3d00, 0x0d90, + 0x0248, 0x1a24, 0x266c, 0x3e00, 0x0e90, + 0x024c, 0x1a28, 0x2670, 0x3e04, 0x0e94, + 0x014c, 0x1928, 0x2570, 0x3d04, 0x0d94, + 0x004c, 0x1828, 0x2470, 0x3c04, 0x0c94, + 0x0050, 0x182c, 0x2474, 0x3c08, 0x0c98, + 0x0150, 0x192c, 0x2574, 0x3d08, 0x0d98, + 0x0250, 0x1a2c, 0x2674, 0x3e08, 0x0e98, + 0x0254, 0x1a30, 0x2678, 0x3e0c, 0x0e9c, + 0x0154, 0x1930, 0x2578, 0x3d0c, 0x0d9c, + 0x0054, 0x1830, 0x2478, 0x3c0c, 0x0c9c, + 0x0058, 0x1834, 0x247c, 0x3c10, 0x0ca0, + 0x0158, 0x1934, 0x257c, 0x3d10, 0x0da0, + 0x0258, 0x1a34, 0x267c, 0x3e10, 0x0ea0, + 0x025c, 0x1a38, 0x2680, 0x3e14, 0x0ea4, + 0x015c, 0x1938, 0x2580, 0x3d14, 0x0da4, + 0x005c, 0x1838, 0x2480, 0x3c14, 0x0ca4, + 0x0060, 0x183c, 0x2484, 0x3c18, 0x0ca8, + 0x0160, 0x193c, 0x2584, 0x3d18, 0x0da8, + 0x0260, 0x1a3c, 0x2684, 0x3e18, 0x0ea8, + 0x0264, 0x1a40, 0x2688, 0x3e1c, 0x0eac, + 0x0164, 0x1940, 0x2588, 0x3d1c, 0x0dac, + 0x0064, 0x1840, 0x2488, 0x3c1c, 0x0cac, + 0x0068, 0x1844, 0x248c, 0x3c20, 0x0cb0, + 0x0168, 0x1944, 0x258c, 0x3d20, 0x0db0, + 0x0268, 0x1a44, 0x268c, 0x3e20, 0x0eb0, + 0x0648, 0x1e24, 0x2a6c, 0x4200, 0x1290, + 0x0748, 0x1f24, 0x2b6c, 0x4300, 0x1390, + 0x0848, 0x2024, 0x2c6c, 0x4400, 0x1490, + 0x084c, 0x2028, 0x2c70, 0x4404, 0x1494, + 0x074c, 0x1f28, 0x2b70, 0x4304, 0x1394, + 0x064c, 0x1e28, 0x2a70, 0x4204, 0x1294, + 0x0650, 0x1e2c, 0x2a74, 0x4208, 0x1298, + 0x0750, 0x1f2c, 0x2b74, 0x4308, 0x1398, + 0x0850, 0x202c, 0x2c74, 0x4408, 0x1498, + 0x0854, 0x2030, 0x2c78, 0x440c, 0x149c, + 0x0754, 0x1f30, 0x2b78, 0x430c, 0x139c, + 0x0654, 0x1e30, 0x2a78, 0x420c, 0x129c, + 0x0658, 0x1e34, 0x2a7c, 0x4210, 0x12a0, + 0x0758, 0x1f34, 0x2b7c, 0x4310, 0x13a0, + 0x0858, 0x2034, 0x2c7c, 0x4410, 0x14a0, + 0x085c, 0x2038, 0x2c80, 0x4414, 0x14a4, + 0x075c, 0x1f38, 0x2b80, 0x4314, 0x13a4, + 0x065c, 0x1e38, 0x2a80, 0x4214, 0x12a4, + 0x0660, 0x1e3c, 0x2a84, 0x4218, 0x12a8, + 0x0760, 0x1f3c, 0x2b84, 0x4318, 0x13a8, + 0x0860, 0x203c, 0x2c84, 0x4418, 0x14a8, + 0x0864, 0x2040, 0x2c88, 0x441c, 0x14ac, + 0x0764, 0x1f40, 0x2b88, 0x431c, 0x13ac, + 0x0664, 0x1e40, 0x2a88, 0x421c, 0x12ac, + 0x0668, 0x1e44, 0x2a8c, 0x4220, 0x12b0, + 0x0768, 0x1f44, 0x2b8c, 0x4320, 0x13b0, + 0x0868, 0x2044, 0x2c8c, 0x4420, 0x14b0, + 0x0f48, 0x2724, 0x336c, 0x0300, 0x1b90, + 0x1048, 0x2824, 0x346c, 0x0400, 0x1c90, + 0x1148, 0x2924, 0x356c, 0x0500, 0x1d90, + 0x114c, 0x2928, 0x3570, 0x0504, 0x1d94, + 0x104c, 0x2828, 0x3470, 0x0404, 0x1c94, + 0x0f4c, 0x2728, 0x3370, 0x0304, 0x1b94, + 0x0f50, 0x272c, 0x3374, 0x0308, 0x1b98, + 0x1050, 0x282c, 0x3474, 0x0408, 0x1c98, + 0x1150, 0x292c, 0x3574, 0x0508, 0x1d98, + 0x1154, 0x2930, 0x3578, 0x050c, 0x1d9c, + 0x1054, 0x2830, 0x3478, 0x040c, 0x1c9c, + 0x0f54, 0x2730, 0x3378, 0x030c, 0x1b9c, + 0x0f58, 0x2734, 0x337c, 0x0310, 0x1ba0, + 0x1058, 0x2834, 0x347c, 0x0410, 0x1ca0, + 0x1158, 0x2934, 0x357c, 0x0510, 0x1da0, + 0x115c, 0x2938, 0x3580, 0x0514, 0x1da4, + 0x105c, 0x2838, 0x3480, 0x0414, 0x1ca4, + 0x0f5c, 0x2738, 0x3380, 0x0314, 0x1ba4, + 0x0f60, 0x273c, 0x3384, 0x0318, 0x1ba8, + 0x1060, 0x283c, 0x3484, 0x0418, 0x1ca8, + 0x1160, 0x293c, 0x3584, 0x0518, 0x1da8, + 0x1164, 0x2940, 0x3588, 0x051c, 0x1dac, + 0x1064, 0x2840, 0x3488, 0x041c, 0x1cac, + 0x0f64, 0x2740, 0x3388, 0x031c, 0x1bac, + 0x0f68, 0x2744, 0x338c, 0x0320, 0x1bb0, + 0x1068, 0x2844, 0x348c, 0x0420, 0x1cb0, + 0x1168, 0x2944, 0x358c, 0x0520, 0x1db0, + 0x1548, 0x2d24, 0x396c, 0x0900, 0x2190, + 0x1648, 0x2e24, 0x3a6c, 0x0a00, 0x2290, + 0x1748, 0x2f24, 0x3b6c, 0x0b00, 0x2390, + 0x174c, 0x2f28, 0x3b70, 0x0b04, 0x2394, + 0x164c, 0x2e28, 0x3a70, 0x0a04, 0x2294, + 0x154c, 0x2d28, 0x3970, 0x0904, 0x2194, + 0x1550, 0x2d2c, 0x3974, 0x0908, 0x2198, + 0x1650, 0x2e2c, 0x3a74, 0x0a08, 0x2298, + 0x1750, 0x2f2c, 0x3b74, 0x0b08, 0x2398, + 0x1754, 0x2f30, 0x3b78, 0x0b0c, 0x239c, + 0x1654, 0x2e30, 0x3a78, 0x0a0c, 0x229c, + 0x1554, 0x2d30, 0x3978, 0x090c, 0x219c, + 0x1558, 0x2d34, 0x397c, 0x0910, 0x21a0, + 0x1658, 0x2e34, 0x3a7c, 0x0a10, 0x22a0, + 0x1758, 0x2f34, 0x3b7c, 0x0b10, 0x23a0, + 0x175c, 0x2f38, 0x3b80, 0x0b14, 0x23a4, + 0x165c, 0x2e38, 0x3a80, 0x0a14, 0x22a4, + 0x155c, 0x2d38, 0x3980, 0x0914, 0x21a4, + 0x1560, 0x2d3c, 0x3984, 0x0918, 0x21a8, + 0x1660, 0x2e3c, 0x3a84, 0x0a18, 0x22a8, + 0x1760, 0x2f3c, 0x3b84, 0x0b18, 0x23a8, + 0x1764, 0x2f40, 0x3b88, 0x0b1c, 0x23ac, + 0x1664, 0x2e40, 0x3a88, 0x0a1c, 0x22ac, + 0x1564, 0x2d40, 0x3988, 0x091c, 0x21ac, + 0x1568, 0x2d44, 0x398c, 0x0920, 0x21b0, + 0x1668, 0x2e44, 0x3a8c, 0x0a20, 0x22b0, + 0x1768, 0x2f44, 0x3b8c, 0x0b20, 0x23b0, + 0x1b48, 0x3324, 0x3f6c, 0x0f00, 0x2790, + 0x1c48, 0x3424, 0x406c, 0x1000, 0x2890, + 0x1d48, 0x3524, 0x416c, 0x1100, 0x2990, + 0x1d4c, 0x3528, 0x4170, 0x1104, 0x2994, + 0x1c4c, 0x3428, 0x4070, 0x1004, 0x2894, + 0x1b4c, 0x3328, 0x3f70, 0x0f04, 0x2794, + 0x1b50, 0x332c, 0x3f74, 0x0f08, 0x2798, + 0x1c50, 0x342c, 0x4074, 0x1008, 0x2898, + 0x1d50, 0x352c, 0x4174, 0x1108, 0x2998, + 0x1d54, 0x3530, 0x4178, 0x110c, 0x299c, + 0x1c54, 0x3430, 0x4078, 0x100c, 0x289c, + 0x1b54, 0x3330, 0x3f78, 0x0f0c, 0x279c, + 0x1b58, 0x3334, 0x3f7c, 0x0f10, 0x27a0, + 0x1c58, 0x3434, 0x407c, 0x1010, 0x28a0, + 0x1d58, 0x3534, 0x417c, 0x1110, 0x29a0, + 0x1d5c, 0x3538, 0x4180, 0x1114, 0x29a4, + 0x1c5c, 0x3438, 0x4080, 0x1014, 0x28a4, + 0x1b5c, 0x3338, 0x3f80, 0x0f14, 0x27a4, + 0x1b60, 0x333c, 0x3f84, 0x0f18, 0x27a8, + 0x1c60, 0x343c, 0x4084, 0x1018, 0x28a8, + 0x1d60, 0x353c, 0x4184, 0x1118, 0x29a8, + 0x1d64, 0x3540, 0x4188, 0x111c, 0x29ac, + 0x1c64, 0x3440, 0x4088, 0x101c, 0x28ac, + 0x1b64, 0x3340, 0x3f88, 0x0f1c, 0x27ac, + 0x1b68, 0x3344, 0x3f8c, 0x0f20, 0x27b0, + 0x1c68, 0x3444, 0x408c, 0x1020, 0x28b0, + 0x1d68, 0x3544, 0x418c, 0x1120, 0x29b0, + 0x2148, 0x3924, 0x456c, 0x1500, 0x2d90, + 0x2248, 0x3a24, 0x466c, 0x1600, 0x2e90, + 0x2348, 0x3b24, 0x476c, 0x1700, 0x2f90, + 0x234c, 0x3b28, 0x4770, 0x1704, 0x2f94, + 0x224c, 0x3a28, 0x4670, 0x1604, 0x2e94, + 0x214c, 0x3928, 0x4570, 0x1504, 0x2d94, + 0x2150, 0x392c, 0x4574, 0x1508, 0x2d98, + 0x2250, 0x3a2c, 0x4674, 0x1608, 0x2e98, + 0x2350, 0x3b2c, 0x4774, 0x1708, 0x2f98, + 0x2354, 0x3b30, 0x4778, 0x170c, 0x2f9c, + 0x2254, 0x3a30, 0x4678, 0x160c, 0x2e9c, + 0x2154, 0x3930, 0x4578, 0x150c, 0x2d9c, + 0x2158, 0x3934, 0x457c, 0x1510, 0x2da0, + 0x2258, 0x3a34, 0x467c, 0x1610, 0x2ea0, + 0x2358, 0x3b34, 0x477c, 0x1710, 0x2fa0, + 0x235c, 0x3b38, 0x4780, 0x1714, 0x2fa4, + 0x225c, 0x3a38, 0x4680, 0x1614, 0x2ea4, + 0x215c, 0x3938, 0x4580, 0x1514, 0x2da4, + 0x2160, 0x393c, 0x4584, 0x1518, 0x2da8, + 0x2260, 0x3a3c, 0x4684, 0x1618, 0x2ea8, + 0x2360, 0x3b3c, 0x4784, 0x1718, 0x2fa8, + 0x2364, 0x3b40, 0x4788, 0x171c, 0x2fac, + 0x2264, 0x3a40, 0x4688, 0x161c, 0x2eac, + 0x2164, 0x3940, 0x4588, 0x151c, 0x2dac, + 0x2168, 0x3944, 0x458c, 0x1520, 0x2db0, + 0x2268, 0x3a44, 0x468c, 0x1620, 0x2eb0, + 0x2368, 0x3b44, 0x478c, 0x1720, 0x2fb0, + 0x2748, 0x3f24, 0x036c, 0x1b00, 0x3390, + 0x2848, 0x4024, 0x046c, 0x1c00, 0x3490, + 0x2948, 0x4124, 0x056c, 0x1d00, 0x3590, + 0x294c, 0x4128, 0x0570, 0x1d04, 0x3594, + 0x284c, 0x4028, 0x0470, 0x1c04, 0x3494, + 0x274c, 0x3f28, 0x0370, 0x1b04, 0x3394, + 0x2750, 0x3f2c, 0x0374, 0x1b08, 0x3398, + 0x2850, 0x402c, 0x0474, 0x1c08, 0x3498, + 0x2950, 0x412c, 0x0574, 0x1d08, 0x3598, + 0x2954, 0x4130, 0x0578, 0x1d0c, 0x359c, + 0x2854, 0x4030, 0x0478, 0x1c0c, 0x349c, + 0x2754, 0x3f30, 0x0378, 0x1b0c, 0x339c, + 0x2758, 0x3f34, 0x037c, 0x1b10, 0x33a0, + 0x2858, 0x4034, 0x047c, 0x1c10, 0x34a0, + 0x2958, 0x4134, 0x057c, 0x1d10, 0x35a0, + 0x295c, 0x4138, 0x0580, 0x1d14, 0x35a4, + 0x285c, 0x4038, 0x0480, 0x1c14, 0x34a4, + 0x275c, 0x3f38, 0x0380, 0x1b14, 0x33a4, + 0x2760, 0x3f3c, 0x0384, 0x1b18, 0x33a8, + 0x2860, 0x403c, 0x0484, 0x1c18, 0x34a8, + 0x2960, 0x413c, 0x0584, 0x1d18, 0x35a8, + 0x2964, 0x4140, 0x0588, 0x1d1c, 0x35ac, + 0x2864, 0x4040, 0x0488, 0x1c1c, 0x34ac, + 0x2764, 0x3f40, 0x0388, 0x1b1c, 0x33ac, + 0x2768, 0x3f44, 0x038c, 0x1b20, 0x33b0, + 0x2868, 0x4044, 0x048c, 0x1c20, 0x34b0, + 0x2968, 0x4144, 0x058c, 0x1d20, 0x35b0, + 0x2d48, 0x4524, 0x096c, 0x2100, 0x3990, + 0x2e48, 0x4624, 0x0a6c, 0x2200, 0x3a90, + 0x2f48, 0x4724, 0x0b6c, 0x2300, 0x3b90, + 0x2f4c, 0x4728, 0x0b70, 0x2304, 0x3b94, + 0x2e4c, 0x4628, 0x0a70, 0x2204, 0x3a94, + 0x2d4c, 0x4528, 0x0970, 0x2104, 0x3994, + 0x2d50, 0x452c, 0x0974, 0x2108, 0x3998, + 0x2e50, 0x462c, 0x0a74, 0x2208, 0x3a98, + 0x2f50, 0x472c, 0x0b74, 0x2308, 0x3b98, + 0x2f54, 0x4730, 0x0b78, 0x230c, 0x3b9c, + 0x2e54, 0x4630, 0x0a78, 0x220c, 0x3a9c, + 0x2d54, 0x4530, 0x0978, 0x210c, 0x399c, + 0x2d58, 0x4534, 0x097c, 0x2110, 0x39a0, + 0x2e58, 0x4634, 0x0a7c, 0x2210, 0x3aa0, + 0x2f58, 0x4734, 0x0b7c, 0x2310, 0x3ba0, + 0x2f5c, 0x4738, 0x0b80, 0x2314, 0x3ba4, + 0x2e5c, 0x4638, 0x0a80, 0x2214, 0x3aa4, + 0x2d5c, 0x4538, 0x0980, 0x2114, 0x39a4, + 0x2d60, 0x453c, 0x0984, 0x2118, 0x39a8, + 0x2e60, 0x463c, 0x0a84, 0x2218, 0x3aa8, + 0x2f60, 0x473c, 0x0b84, 0x2318, 0x3ba8, + 0x2f64, 0x4740, 0x0b88, 0x231c, 0x3bac, + 0x2e64, 0x4640, 0x0a88, 0x221c, 0x3aac, + 0x2d64, 0x4540, 0x0988, 0x211c, 0x39ac, + 0x2d68, 0x4544, 0x098c, 0x2120, 0x39b0, + 0x2e68, 0x4644, 0x0a8c, 0x2220, 0x3ab0, + 0x2f68, 0x4744, 0x0b8c, 0x2320, 0x3bb0, + 0x3348, 0x0324, 0x0f6c, 0x2700, 0x3f90, + 0x3448, 0x0424, 0x106c, 0x2800, 0x4090, + 0x3548, 0x0524, 0x116c, 0x2900, 0x4190, + 0x354c, 0x0528, 0x1170, 0x2904, 0x4194, + 0x344c, 0x0428, 0x1070, 0x2804, 0x4094, + 0x334c, 0x0328, 0x0f70, 0x2704, 0x3f94, + 0x3350, 0x032c, 0x0f74, 0x2708, 0x3f98, + 0x3450, 0x042c, 0x1074, 0x2808, 0x4098, + 0x3550, 0x052c, 0x1174, 0x2908, 0x4198, + 0x3554, 0x0530, 0x1178, 0x290c, 0x419c, + 0x3454, 0x0430, 0x1078, 0x280c, 0x409c, + 0x3354, 0x0330, 0x0f78, 0x270c, 0x3f9c, + 0x3358, 0x0334, 0x0f7c, 0x2710, 0x3fa0, + 0x3458, 0x0434, 0x107c, 0x2810, 0x40a0, + 0x3558, 0x0534, 0x117c, 0x2910, 0x41a0, + 0x355c, 0x0538, 0x1180, 0x2914, 0x41a4, + 0x345c, 0x0438, 0x1080, 0x2814, 0x40a4, + 0x335c, 0x0338, 0x0f80, 0x2714, 0x3fa4, + 0x3360, 0x033c, 0x0f84, 0x2718, 0x3fa8, + 0x3460, 0x043c, 0x1084, 0x2818, 0x40a8, + 0x3560, 0x053c, 0x1184, 0x2918, 0x41a8, + 0x3564, 0x0540, 0x1188, 0x291c, 0x41ac, + 0x3464, 0x0440, 0x1088, 0x281c, 0x40ac, + 0x3364, 0x0340, 0x0f88, 0x271c, 0x3fac, + 0x3368, 0x0344, 0x0f8c, 0x2720, 0x3fb0, + 0x3468, 0x0444, 0x108c, 0x2820, 0x40b0, + 0x3568, 0x0544, 0x118c, 0x2920, 0x41b0, + 0x3948, 0x0924, 0x156c, 0x2d00, 0x4590, + 0x3a48, 0x0a24, 0x166c, 0x2e00, 0x4690, + 0x3b48, 0x0b24, 0x176c, 0x2f00, 0x4790, + 0x3b4c, 0x0b28, 0x1770, 0x2f04, 0x4794, + 0x3a4c, 0x0a28, 0x1670, 0x2e04, 0x4694, + 0x394c, 0x0928, 0x1570, 0x2d04, 0x4594, + 0x3950, 0x092c, 0x1574, 0x2d08, 0x4598, + 0x3a50, 0x0a2c, 0x1674, 0x2e08, 0x4698, + 0x3b50, 0x0b2c, 0x1774, 0x2f08, 0x4798, + 0x3b54, 0x0b30, 0x1778, 0x2f0c, 0x479c, + 0x3a54, 0x0a30, 0x1678, 0x2e0c, 0x469c, + 0x3954, 0x0930, 0x1578, 0x2d0c, 0x459c, + 0x3958, 0x0934, 0x157c, 0x2d10, 0x45a0, + 0x3a58, 0x0a34, 0x167c, 0x2e10, 0x46a0, + 0x3b58, 0x0b34, 0x177c, 0x2f10, 0x47a0, + 0x3b5c, 0x0b38, 0x1780, 0x2f14, 0x47a4, + 0x3a5c, 0x0a38, 0x1680, 0x2e14, 0x46a4, + 0x395c, 0x0938, 0x1580, 0x2d14, 0x45a4, + 0x3960, 0x093c, 0x1584, 0x2d18, 0x45a8, + 0x3a60, 0x0a3c, 0x1684, 0x2e18, 0x46a8, + 0x3b60, 0x0b3c, 0x1784, 0x2f18, 0x47a8, + 0x3b64, 0x0b40, 0x1788, 0x2f1c, 0x47ac, + 0x3a64, 0x0a40, 0x1688, 0x2e1c, 0x46ac, + 0x3964, 0x0940, 0x1588, 0x2d1c, 0x45ac, + 0x3968, 0x0944, 0x158c, 0x2d20, 0x45b0, + 0x3a68, 0x0a44, 0x168c, 0x2e20, 0x46b0, + 0x3b68, 0x0b44, 0x178c, 0x2f20, 0x47b0, + 0x3f48, 0x0f24, 0x1b6c, 0x3300, 0x0390, + 0x4048, 0x1024, 0x1c6c, 0x3400, 0x0490, + 0x4148, 0x1124, 0x1d6c, 0x3500, 0x0590, + 0x414c, 0x1128, 0x1d70, 0x3504, 0x0594, + 0x404c, 0x1028, 0x1c70, 0x3404, 0x0494, + 0x3f4c, 0x0f28, 0x1b70, 0x3304, 0x0394, + 0x3f50, 0x0f2c, 0x1b74, 0x3308, 0x0398, + 0x4050, 0x102c, 0x1c74, 0x3408, 0x0498, + 0x4150, 0x112c, 0x1d74, 0x3508, 0x0598, + 0x4154, 0x1130, 0x1d78, 0x350c, 0x059c, + 0x4054, 0x1030, 0x1c78, 0x340c, 0x049c, + 0x3f54, 0x0f30, 0x1b78, 0x330c, 0x039c, + 0x3f58, 0x0f34, 0x1b7c, 0x3310, 0x03a0, + 0x4058, 0x1034, 0x1c7c, 0x3410, 0x04a0, + 0x4158, 0x1134, 0x1d7c, 0x3510, 0x05a0, + 0x415c, 0x1138, 0x1d80, 0x3514, 0x05a4, + 0x405c, 0x1038, 0x1c80, 0x3414, 0x04a4, + 0x3f5c, 0x0f38, 0x1b80, 0x3314, 0x03a4, + 0x3f60, 0x0f3c, 0x1b84, 0x3318, 0x03a8, + 0x4060, 0x103c, 0x1c84, 0x3418, 0x04a8, + 0x4160, 0x113c, 0x1d84, 0x3518, 0x05a8, + 0x4164, 0x1140, 0x1d88, 0x351c, 0x05ac, + 0x4064, 0x1040, 0x1c88, 0x341c, 0x04ac, + 0x3f64, 0x0f40, 0x1b88, 0x331c, 0x03ac, + 0x3f68, 0x0f44, 0x1b8c, 0x3320, 0x03b0, + 0x4068, 0x1044, 0x1c8c, 0x3420, 0x04b0, + 0x4168, 0x1144, 0x1d8c, 0x3520, 0x05b0, + 0x4548, 0x1524, 0x216c, 0x3900, 0x0990, + 0x4648, 0x1624, 0x226c, 0x3a00, 0x0a90, + 0x4748, 0x1724, 0x236c, 0x3b00, 0x0b90, + 0x474c, 0x1728, 0x2370, 0x3b04, 0x0b94, + 0x464c, 0x1628, 0x2270, 0x3a04, 0x0a94, + 0x454c, 0x1528, 0x2170, 0x3904, 0x0994, + 0x4550, 0x152c, 0x2174, 0x3908, 0x0998, + 0x4650, 0x162c, 0x2274, 0x3a08, 0x0a98, + 0x4750, 0x172c, 0x2374, 0x3b08, 0x0b98, + 0x4754, 0x1730, 0x2378, 0x3b0c, 0x0b9c, + 0x4654, 0x1630, 0x2278, 0x3a0c, 0x0a9c, + 0x4554, 0x1530, 0x2178, 0x390c, 0x099c, + 0x4558, 0x1534, 0x217c, 0x3910, 0x09a0, + 0x4658, 0x1634, 0x227c, 0x3a10, 0x0aa0, + 0x4758, 0x1734, 0x237c, 0x3b10, 0x0ba0, + 0x475c, 0x1738, 0x2380, 0x3b14, 0x0ba4, + 0x465c, 0x1638, 0x2280, 0x3a14, 0x0aa4, + 0x455c, 0x1538, 0x2180, 0x3914, 0x09a4, + 0x4560, 0x153c, 0x2184, 0x3918, 0x09a8, + 0x4660, 0x163c, 0x2284, 0x3a18, 0x0aa8, + 0x4760, 0x173c, 0x2384, 0x3b18, 0x0ba8, + 0x4764, 0x1740, 0x2388, 0x3b1c, 0x0bac, + 0x4664, 0x1640, 0x2288, 0x3a1c, 0x0aac, + 0x4564, 0x1540, 0x2188, 0x391c, 0x09ac, + 0x4568, 0x1544, 0x218c, 0x3920, 0x09b0, + 0x4668, 0x1644, 0x228c, 0x3a20, 0x0ab0, + 0x4768, 0x1744, 0x238c, 0x3b20, 0x0bb0, + 0x0348, 0x1b24, 0x276c, 0x3f00, 0x0f90, + 0x0448, 0x1c24, 0x286c, 0x4000, 0x1090, + 0x0548, 0x1d24, 0x296c, 0x4100, 0x1190, + 0x054c, 0x1d28, 0x2970, 0x4104, 0x1194, + 0x044c, 0x1c28, 0x2870, 0x4004, 0x1094, + 0x034c, 0x1b28, 0x2770, 0x3f04, 0x0f94, + 0x0350, 0x1b2c, 0x2774, 0x3f08, 0x0f98, + 0x0450, 0x1c2c, 0x2874, 0x4008, 0x1098, + 0x0550, 0x1d2c, 0x2974, 0x4108, 0x1198, + 0x0554, 0x1d30, 0x2978, 0x410c, 0x119c, + 0x0454, 0x1c30, 0x2878, 0x400c, 0x109c, + 0x0354, 0x1b30, 0x2778, 0x3f0c, 0x0f9c, + 0x0358, 0x1b34, 0x277c, 0x3f10, 0x0fa0, + 0x0458, 0x1c34, 0x287c, 0x4010, 0x10a0, + 0x0558, 0x1d34, 0x297c, 0x4110, 0x11a0, + 0x055c, 0x1d38, 0x2980, 0x4114, 0x11a4, + 0x045c, 0x1c38, 0x2880, 0x4014, 0x10a4, + 0x035c, 0x1b38, 0x2780, 0x3f14, 0x0fa4, + 0x0360, 0x1b3c, 0x2784, 0x3f18, 0x0fa8, + 0x0460, 0x1c3c, 0x2884, 0x4018, 0x10a8, + 0x0560, 0x1d3c, 0x2984, 0x4118, 0x11a8, + 0x0564, 0x1d40, 0x2988, 0x411c, 0x11ac, + 0x0464, 0x1c40, 0x2888, 0x401c, 0x10ac, + 0x0364, 0x1b40, 0x2788, 0x3f1c, 0x0fac, + 0x0368, 0x1b44, 0x278c, 0x3f20, 0x0fb0, + 0x0468, 0x1c44, 0x288c, 0x4020, 0x10b0, + 0x0568, 0x1d44, 0x298c, 0x4120, 0x11b0, + 0x0948, 0x2124, 0x2d6c, 0x4500, 0x1590, + 0x0a48, 0x2224, 0x2e6c, 0x4600, 0x1690, + 0x0b48, 0x2324, 0x2f6c, 0x4700, 0x1790, + 0x0b4c, 0x2328, 0x2f70, 0x4704, 0x1794, + 0x0a4c, 0x2228, 0x2e70, 0x4604, 0x1694, + 0x094c, 0x2128, 0x2d70, 0x4504, 0x1594, + 0x0950, 0x212c, 0x2d74, 0x4508, 0x1598, + 0x0a50, 0x222c, 0x2e74, 0x4608, 0x1698, + 0x0b50, 0x232c, 0x2f74, 0x4708, 0x1798, + 0x0b54, 0x2330, 0x2f78, 0x470c, 0x179c, + 0x0a54, 0x2230, 0x2e78, 0x460c, 0x169c, + 0x0954, 0x2130, 0x2d78, 0x450c, 0x159c, + 0x0958, 0x2134, 0x2d7c, 0x4510, 0x15a0, + 0x0a58, 0x2234, 0x2e7c, 0x4610, 0x16a0, + 0x0b58, 0x2334, 0x2f7c, 0x4710, 0x17a0, + 0x0b5c, 0x2338, 0x2f80, 0x4714, 0x17a4, + 0x0a5c, 0x2238, 0x2e80, 0x4614, 0x16a4, + 0x095c, 0x2138, 0x2d80, 0x4514, 0x15a4, + 0x0960, 0x213c, 0x2d84, 0x4518, 0x15a8, + 0x0a60, 0x223c, 0x2e84, 0x4618, 0x16a8, + 0x0b60, 0x233c, 0x2f84, 0x4718, 0x17a8, + 0x0b64, 0x2340, 0x2f88, 0x471c, 0x17ac, + 0x0a64, 0x2240, 0x2e88, 0x461c, 0x16ac, + 0x0964, 0x2140, 0x2d88, 0x451c, 0x15ac, + 0x0968, 0x2144, 0x2d8c, 0x4520, 0x15b0, + 0x0a68, 0x2244, 0x2e8c, 0x4620, 0x16b0, + 0x0b68, 0x2344, 0x2f8c, 0x4720, 0x17b0, +}; + +/* DV25/50 DCT coefficient weights and inverse weights */ +/* created by dvtables.py */ +static const int dv_weight_bits = 18; +static const int dv_weight_88[64] = { + 131072, 257107, 257107, 242189, 252167, 242189, 235923, 237536, + 237536, 235923, 229376, 231390, 223754, 231390, 229376, 222935, + 224969, 217965, 217965, 224969, 222935, 200636, 218652, 211916, + 212325, 211916, 218652, 200636, 188995, 196781, 205965, 206433, + 206433, 205965, 196781, 188995, 185364, 185364, 200636, 200704, + 200636, 185364, 185364, 174609, 180568, 195068, 195068, 180568, + 174609, 170091, 175557, 189591, 175557, 170091, 165371, 170627, + 170627, 165371, 160727, 153560, 160727, 144651, 144651, 136258, +}; +static const int dv_weight_248[64] = { + 131072, 242189, 257107, 237536, 229376, 200636, 242189, 223754, + 224969, 196781, 262144, 242189, 229376, 200636, 257107, 237536, + 211916, 185364, 235923, 217965, 229376, 211916, 206433, 180568, + 242189, 223754, 224969, 196781, 211916, 185364, 235923, 217965, + 200704, 175557, 222935, 205965, 200636, 185364, 195068, 170627, + 229376, 211916, 206433, 180568, 200704, 175557, 222935, 205965, + 175557, 153560, 188995, 174609, 165371, 144651, 200636, 185364, + 195068, 170627, 175557, 153560, 188995, 174609, 165371, 144651, +}; +static const int dv_iweight_bits = 14; +static const int dv_iweight_88[64] = { + 32768, 16710, 16710, 17735, 17015, 17735, 18197, 18079, + 18079, 18197, 18725, 18559, 19196, 18559, 18725, 19284, + 19108, 19692, 19692, 19108, 19284, 21400, 19645, 20262, + 20214, 20262, 19645, 21400, 22733, 21845, 20867, 20815, + 20815, 20867, 21845, 22733, 23173, 23173, 21400, 21400, + 21400, 23173, 23173, 24600, 23764, 22017, 22017, 23764, + 24600, 25267, 24457, 22672, 24457, 25267, 25971, 25191, + 25191, 25971, 26715, 27962, 26715, 29642, 29642, 31536, +}; +static const int dv_iweight_248[64] = { + 32768, 17735, 16710, 18079, 18725, 21400, 17735, 19196, + 19108, 21845, 16384, 17735, 18725, 21400, 16710, 18079, + 20262, 23173, 18197, 19692, 18725, 20262, 20815, 23764, + 17735, 19196, 19108, 21845, 20262, 23173, 18197, 19692, + 21400, 24457, 19284, 20867, 21400, 23173, 22017, 25191, + 18725, 20262, 20815, 23764, 21400, 24457, 19284, 20867, + 24457, 27962, 22733, 24600, 25971, 29642, 21400, 23173, + 22017, 25191, 24457, 27962, 22733, 24600, 25971, 29642, +}; + static const uint16_t dv_audio_shuffle525[10][9] = { { 0, 30, 60, 20, 50, 80, 10, 40, 70 }, /* 1st channel */ { 6, 36, 66, 26, 56, 86, 16, 46, 76 }, @@ -1294,6 +2540,7 @@ static const DVprofile dv_profiles[] = { { .dsf = 0, .frame_size = 120000, /* IEC 61834, SMPTE-314M - 525/60 (NTSC) */ .difseg_size = 10, + .n_difchan = 1, .frame_rate = 30000, .ltc_divisor = 30, .frame_rate_base = 1001, @@ -1304,12 +2551,13 @@ static const DVprofile dv_profiles[] = { .pix_fmt = PIX_FMT_YUV411P, .audio_stride = 90, .audio_min_samples = { 1580, 1452, 1053 }, /* for 48, 44.1 and 32Khz */ - .audio_samples_dist = { 1602, 1601, 1602, 1601, 1602 }, + .audio_samples_dist = { 1600, 1602, 1602, 1602, 1602 }, /* per SMPTE-314M */ .audio_shuffle = dv_audio_shuffle525, }, { .dsf = 1, .frame_size = 144000, /* IEC 61834 - 625/50 (PAL) */ .difseg_size = 12, + .n_difchan = 1, .frame_rate = 25, .frame_rate_base = 1, .ltc_divisor = 25, @@ -1326,6 +2574,7 @@ static const DVprofile dv_profiles[] = { { .dsf = 1, .frame_size = 144000, /* SMPTE-314M - 625/50 (PAL) */ .difseg_size = 12, + .n_difchan = 1, .frame_rate = 25, .frame_rate_base = 1, .ltc_divisor = 25, @@ -1338,29 +2587,79 @@ static const DVprofile dv_profiles[] = { .audio_min_samples = { 1896, 1742, 1264 }, /* for 48, 44.1 and 32Khz */ .audio_samples_dist = { 1920, 1920, 1920, 1920, 1920 }, .audio_shuffle = dv_audio_shuffle625, - } + }, + { .dsf = 0, + .frame_size = 240000, /* SMPTE-314M - 525/60 (NTSC) 50 Mbps */ + .difseg_size = 10, /* also known as "DVCPRO50" */ + .n_difchan = 2, + .frame_rate = 30000, + .ltc_divisor = 30, + .frame_rate_base = 1001, + .height = 480, + .width = 720, + .sar = {{10, 11}, {40, 33}}, + .video_place = dv_place_422_525, + .pix_fmt = PIX_FMT_YUV422P, + .audio_stride = 90, + .audio_min_samples = { 1580, 1452, 1053 }, /* for 48, 44.1 and 32Khz */ + .audio_samples_dist = { 1600, 1602, 1602, 1602, 1602 }, /* per SMPTE-314M */ + .audio_shuffle = dv_audio_shuffle525, + }, + { .dsf = 1, + .frame_size = 288000, /* SMPTE-314M - 625/50 (PAL) 50 Mbps */ + .difseg_size = 12, /* also known as "DVCPRO50" */ + .n_difchan = 2, + .frame_rate = 25, + .frame_rate_base = 1, + .ltc_divisor = 25, + .height = 576, + .width = 720, + .sar = {{59, 54}, {118, 81}}, + .video_place = dv_place_422_625, + .pix_fmt = PIX_FMT_YUV422P, + .audio_stride = 108, + .audio_min_samples = { 1896, 1742, 1264 }, /* for 48, 44.1 and 32Khz */ + .audio_samples_dist = { 1920, 1920, 1920, 1920, 1920 }, + .audio_shuffle = dv_audio_shuffle625, + } }; +/* minimum number of bytes to read from a DV stream in order to determine the profile */ +#define DV_PROFILE_BYTES (6*80) /* 6 DIF blocks */ + +/* largest possible DV frame, in bytes (PAL 50Mbps) */ +#define DV_MAX_FRAME_SIZE 288000 + static inline const DVprofile* dv_frame_profile(uint8_t* frame) { if ((frame[3] & 0x80) == 0) { /* DSF flag */ - return &dv_profiles[0]; - } - else if ((frame[5] & 0x07) == 0) { /* APT flag */ - return &dv_profiles[1]; + /* it's an NTSC format */ + if ((frame[80*5 + 48 + 3] & 0x4)) { /* 4:2:2 sampling */ + return &dv_profiles[3]; /* NTSC 50Mbps */ + } else { /* 4:1:1 sampling */ + return &dv_profiles[0]; /* NTSC 25Mbps */ + } + } else { + /* it's a PAL format */ + if ((frame[80*5 + 48 + 3] & 0x4)) { /* 4:2:2 sampling */ + return &dv_profiles[4]; /* PAL 50Mbps */ + } else if ((frame[5] & 0x07) == 0) { /* APT flag */ + return &dv_profiles[1]; /* PAL 25Mbps 4:2:0 */ + } else + return &dv_profiles[2]; /* PAL 25Mbps 4:1:1 */ } - else - return &dv_profiles[2]; } static inline const DVprofile* dv_codec_profile(AVCodecContext* codec) { - if (codec->width != 720) { + int i; + + if (codec->width != 720) return NULL; - } - else if (codec->height == 480) { - return &dv_profiles[0]; - } - else - return &dv_profiles[1]; + + for (i=0; i<sizeof(dv_profiles)/sizeof(DVprofile); i++) + if (codec->height == dv_profiles[i].height && codec->pix_fmt == dv_profiles[i].pix_fmt) + return &dv_profiles[i]; + + return NULL; } diff --git a/src/libffmpeg/libavcodec/fft.c b/src/libffmpeg/libavcodec/fft.c index 81b6843e9..1c63f6889 100644 --- a/src/libffmpeg/libavcodec/fft.c +++ b/src/libffmpeg/libavcodec/fft.c @@ -57,12 +57,16 @@ int ff_fft_init(FFTContext *s, int nbits, int inverse) s->exptab1 = NULL; /* compute constant table for HAVE_SSE version */ -#if (defined(HAVE_MMX) && defined(HAVE_BUILTIN_VECTOR)) || defined(HAVE_ALTIVEC) +#if (defined(HAVE_MMX) && (defined(HAVE_BUILTIN_VECTOR) || defined(HAVE_MM3DNOW))) || defined(HAVE_ALTIVEC) { int has_vectors = 0; #if defined(HAVE_MMX) - has_vectors = mm_support() & MM_SSE; +#ifdef HAVE_MM3DNOW + has_vectors = mm_support() & (MM_3DNOW | MM_3DNOWEXT | MM_SSE | MM_SSE2); +#else + has_vectors = mm_support() & (MM_SSE | MM_SSE2); +#endif #endif #if defined(HAVE_ALTIVEC) && !defined(ALTIVEC_USE_REFERENCE_C_CODE) has_vectors = mm_support() & MM_ALTIVEC; @@ -94,8 +98,24 @@ int ff_fft_init(FFTContext *s, int nbits, int inverse) } while (nblocks != 0); av_freep(&s->exptab); #if defined(HAVE_MMX) - s->fft_calc = ff_fft_calc_sse; -#else +#ifdef HAVE_MM3DNOW + if (has_vectors & MM_3DNOWEXT) + /* 3DNowEx for Athlon(XP) */ + s->fft_calc = ff_fft_calc_3dn2; + else if (has_vectors & MM_3DNOW) + /* 3DNow! for K6-2/3 */ + s->fft_calc = ff_fft_calc_3dn; +#endif +#ifdef HAVE_BUILTIN_VECTOR + if (has_vectors & MM_SSE2) + /* SSE for P4/K8 */ + s->fft_calc = ff_fft_calc_sse; + else if ((has_vectors & MM_SSE) && + s->fft_calc == ff_fft_calc_c) + /* SSE for P3 */ + s->fft_calc = ff_fft_calc_sse; +#endif +#else /* HAVE_MMX */ s->fft_calc = ff_fft_calc_altivec; #endif } diff --git a/src/libffmpeg/libavcodec/ffv1.c b/src/libffmpeg/libavcodec/ffv1.c index 10ba21b4c..c987d84f6 100644 --- a/src/libffmpeg/libavcodec/ffv1.c +++ b/src/libffmpeg/libavcodec/ffv1.c @@ -550,12 +550,6 @@ static int encode_init(AVCodecContext *avctx) FFV1Context *s = avctx->priv_data; int i; - if(avctx->strict_std_compliance >FF_COMPLIANCE_EXPERIMENTAL){ - av_log(avctx, AV_LOG_ERROR, "this codec is under development, files encoded with it may not be decodeable with future versions!!!\n" - "use vstrict=-2 / -strict -2 to use it anyway\n"); - return -1; - } - common_init(avctx); s->version=0; @@ -694,7 +688,8 @@ static int encode_frame(AVCodecContext *avctx, unsigned char *buf, int buf_size, } } -static void common_end(FFV1Context *s){ +static int common_end(AVCodecContext *avctx){ + FFV1Context *s = avctx->priv_data; int i; for(i=0; i<s->plane_count; i++){ @@ -702,13 +697,6 @@ static void common_end(FFV1Context *s){ av_freep(&p->state); } -} - -static int encode_end(AVCodecContext *avctx) -{ - FFV1Context *s = avctx->priv_data; - - common_end(s); return 0; } @@ -959,11 +947,14 @@ static int decode_frame(AVCodecContext *avctx, void *data, int *data_size, uint8 p->pict_type= FF_I_TYPE; //FIXME I vs. P if(get_rac(c, &keystate)){ p->key_frame= 1; - read_header(f); + if(read_header(f) < 0) + return -1; clear_state(f); }else{ p->key_frame= 0; } + if(!f->plane[0].state && !f->plane[0].vlc_state) + return -1; p->reference= 0; if(avctx->get_buffer(avctx, p) < 0){ @@ -1021,7 +1012,7 @@ AVCodec ffv1_decoder = { sizeof(FFV1Context), decode_init, NULL, - NULL, + common_end, decode_frame, CODEC_CAP_DR1 /*| CODEC_CAP_DRAW_HORIZ_BAND*/, NULL @@ -1035,6 +1026,6 @@ AVCodec ffv1_encoder = { sizeof(FFV1Context), encode_init, encode_frame, - encode_end, + common_end, }; #endif diff --git a/src/libffmpeg/libavcodec/flac.c b/src/libffmpeg/libavcodec/flac.c index 97ac53745..659112c77 100644 --- a/src/libffmpeg/libavcodec/flac.c +++ b/src/libffmpeg/libavcodec/flac.c @@ -36,6 +36,7 @@ #include "avcodec.h" #include "bitstream.h" #include "golomb.h" +#include "crc.h" #undef NDEBUG #include <assert.h> @@ -84,99 +85,12 @@ static int blocksize_table[] = { 256<<0, 256<<1, 256<<2, 256<<3, 256<<4, 256<<5, 256<<6, 256<<7 }; -static const uint8_t table_crc8[256] = { - 0x00, 0x07, 0x0e, 0x09, 0x1c, 0x1b, 0x12, 0x15, - 0x38, 0x3f, 0x36, 0x31, 0x24, 0x23, 0x2a, 0x2d, - 0x70, 0x77, 0x7e, 0x79, 0x6c, 0x6b, 0x62, 0x65, - 0x48, 0x4f, 0x46, 0x41, 0x54, 0x53, 0x5a, 0x5d, - 0xe0, 0xe7, 0xee, 0xe9, 0xfc, 0xfb, 0xf2, 0xf5, - 0xd8, 0xdf, 0xd6, 0xd1, 0xc4, 0xc3, 0xca, 0xcd, - 0x90, 0x97, 0x9e, 0x99, 0x8c, 0x8b, 0x82, 0x85, - 0xa8, 0xaf, 0xa6, 0xa1, 0xb4, 0xb3, 0xba, 0xbd, - 0xc7, 0xc0, 0xc9, 0xce, 0xdb, 0xdc, 0xd5, 0xd2, - 0xff, 0xf8, 0xf1, 0xf6, 0xe3, 0xe4, 0xed, 0xea, - 0xb7, 0xb0, 0xb9, 0xbe, 0xab, 0xac, 0xa5, 0xa2, - 0x8f, 0x88, 0x81, 0x86, 0x93, 0x94, 0x9d, 0x9a, - 0x27, 0x20, 0x29, 0x2e, 0x3b, 0x3c, 0x35, 0x32, - 0x1f, 0x18, 0x11, 0x16, 0x03, 0x04, 0x0d, 0x0a, - 0x57, 0x50, 0x59, 0x5e, 0x4b, 0x4c, 0x45, 0x42, - 0x6f, 0x68, 0x61, 0x66, 0x73, 0x74, 0x7d, 0x7a, - 0x89, 0x8e, 0x87, 0x80, 0x95, 0x92, 0x9b, 0x9c, - 0xb1, 0xb6, 0xbf, 0xb8, 0xad, 0xaa, 0xa3, 0xa4, - 0xf9, 0xfe, 0xf7, 0xf0, 0xe5, 0xe2, 0xeb, 0xec, - 0xc1, 0xc6, 0xcf, 0xc8, 0xdd, 0xda, 0xd3, 0xd4, - 0x69, 0x6e, 0x67, 0x60, 0x75, 0x72, 0x7b, 0x7c, - 0x51, 0x56, 0x5f, 0x58, 0x4d, 0x4a, 0x43, 0x44, - 0x19, 0x1e, 0x17, 0x10, 0x05, 0x02, 0x0b, 0x0c, - 0x21, 0x26, 0x2f, 0x28, 0x3d, 0x3a, 0x33, 0x34, - 0x4e, 0x49, 0x40, 0x47, 0x52, 0x55, 0x5c, 0x5b, - 0x76, 0x71, 0x78, 0x7f, 0x6a, 0x6d, 0x64, 0x63, - 0x3e, 0x39, 0x30, 0x37, 0x22, 0x25, 0x2c, 0x2b, - 0x06, 0x01, 0x08, 0x0f, 0x1a, 0x1d, 0x14, 0x13, - 0xae, 0xa9, 0xa0, 0xa7, 0xb2, 0xb5, 0xbc, 0xbb, - 0x96, 0x91, 0x98, 0x9f, 0x8a, 0x8d, 0x84, 0x83, - 0xde, 0xd9, 0xd0, 0xd7, 0xc2, 0xc5, 0xcc, 0xcb, - 0xe6, 0xe1, 0xe8, 0xef, 0xfa, 0xfd, 0xf4, 0xf3 -}; - -static int64_t get_utf8(GetBitContext *gb) -{ - uint64_t val; - int ones=0, bytes; - - while(get_bits1(gb)) - ones++; - - if (ones==0) bytes=0; - else if(ones==1) return -1; - else bytes= ones - 1; - - val= get_bits(gb, 7-ones); - while(bytes--){ - const int tmp = get_bits(gb, 8); - - if((tmp>>6) != 2) - return -1; - val<<=6; - val|= tmp&0x3F; - } +static int64_t get_utf8(GetBitContext *gb){ + int64_t val; + GET_UTF8(val, get_bits(gb, 8), return -1;) return val; } -#if 0 -static int skip_utf8(GetBitContext *gb) -{ - int ones=0, bytes; - - while(get_bits1(gb)) - ones++; - - if (ones==0) bytes=0; - else if(ones==1) return -1; - else bytes= ones - 1; - - skip_bits(gb, 7-ones); - while(bytes--){ - const int tmp = get_bits(gb, 8); - - if((tmp>>6) != 2) - return -1; - } - return 0; -} -#endif - -static int get_crc8(const uint8_t *buf, int count){ - int crc=0; - int i; - - for(i=0; i<count; i++){ - crc = table_crc8[crc ^ buf[i]]; - } - - return crc; -} - static void metadata_streaminfo(FLACContext *s); static void dump_headers(FLACContext *s); @@ -341,7 +255,7 @@ static int decode_subframe_fixed(FLACContext *s, int channel, int pred_order) static int decode_subframe_lpc(FLACContext *s, int channel, int pred_order) { - int sum, i, j; + int i, j; int coeff_prec, qlevel; int coeffs[pred_order]; @@ -379,12 +293,24 @@ static int decode_subframe_lpc(FLACContext *s, int channel, int pred_order) if (decode_residuals(s, channel, pred_order) < 0) return -1; - for (i = pred_order; i < s->blocksize; i++) - { - sum = 0; - for (j = 0; j < pred_order; j++) - sum += coeffs[j] * s->decoded[channel][i-j-1]; - s->decoded[channel][i] += sum >> qlevel; + if (s->bps > 16) { + int64_t sum; + for (i = pred_order; i < s->blocksize; i++) + { + sum = 0; + for (j = 0; j < pred_order; j++) + sum += (int64_t)coeffs[j] * s->decoded[channel][i-j-1]; + s->decoded[channel][i] += sum >> qlevel; + } + } else { + int sum; + for (i = pred_order; i < s->blocksize; i++) + { + sum = 0; + for (j = 0; j < pred_order; j++) + sum += coeffs[j] * s->decoded[channel][i-j-1]; + s->decoded[channel][i] += sum >> qlevel; + } } return 0; @@ -554,7 +480,7 @@ static int decode_frame(FLACContext *s) } skip_bits(&s->gb, 8); - crc8= get_crc8(s->gb.buffer, get_bits_count(&s->gb)/8); + crc8= av_crc(av_crc07, 0, s->gb.buffer, get_bits_count(&s->gb)/8); if(crc8){ av_log(s->avctx, AV_LOG_ERROR, "header crc mismatch crc=%2X\n", crc8); return -1; @@ -583,6 +509,17 @@ static int decode_frame(FLACContext *s) return 0; } +static inline int16_t shift_to_16_bits(int32_t data, int bps) +{ + if (bps == 24) { + return (data >> 8); + } else if (bps == 20) { + return (data >> 4); + } else { + return data; + } +} + static int flac_decode_frame(AVCodecContext *avctx, void *data, int *data_size, uint8_t *buf, int buf_size) @@ -719,53 +656,32 @@ static int flac_decode_frame(AVCodecContext *avctx, } } #else +#define DECORRELATE(left, right)\ + assert(s->channels == 2);\ + for (i = 0; i < s->blocksize; i++)\ + {\ + int a= s->decoded[0][i];\ + int b= s->decoded[1][i];\ + *(samples++) = (left ) >> (16 - s->bps);\ + *(samples++) = (right) >> (16 - s->bps);\ + }\ + break; + switch(s->decorrelation) { case INDEPENDENT: for (j = 0; j < s->blocksize; j++) { for (i = 0; i < s->channels; i++) - *(samples++) = s->decoded[i][j]; + *(samples++) = shift_to_16_bits(s->decoded[i][j], s->bps); } break; case LEFT_SIDE: - assert(s->channels == 2); - for (i = 0; i < s->blocksize; i++) - { - *(samples++) = s->decoded[0][i]; - *(samples++) = s->decoded[0][i] - s->decoded[1][i]; - } - break; + DECORRELATE(a,a-b) case RIGHT_SIDE: - assert(s->channels == 2); - for (i = 0; i < s->blocksize; i++) - { - *(samples++) = s->decoded[0][i] + s->decoded[1][i]; - *(samples++) = s->decoded[1][i]; - } - break; + DECORRELATE(a+b,b) case MID_SIDE: - assert(s->channels == 2); - for (i = 0; i < s->blocksize; i++) - { - int mid, side; - mid = s->decoded[0][i]; - side = s->decoded[1][i]; - -#if 1 //needs to be checked but IMHO it should be binary identical - mid -= side>>1; - *(samples++) = mid + side; - *(samples++) = mid; -#else - - mid <<= 1; - if (side & 1) - mid++; - *(samples++) = (mid + side) >> 1; - *(samples++) = (mid - side) >> 1; -#endif - } - break; + DECORRELATE( (a-=b>>1) + b, a) } #endif diff --git a/src/libffmpeg/libavcodec/golomb.h b/src/libffmpeg/libavcodec/golomb.h index ef74f15c6..a8221ec29 100644 --- a/src/libffmpeg/libavcodec/golomb.h +++ b/src/libffmpeg/libavcodec/golomb.h @@ -435,6 +435,10 @@ static inline void set_ur_golomb_jpegls(PutBitContext *pb, int i, int k, int lim e= (i>>k) + 1; if(e<limit){ + while(e > 31) { + put_bits(pb, 31, 0); + e -= 31; + } put_bits(pb, e, 1); if(k) put_bits(pb, k, i&((1<<k)-1)); diff --git a/src/libffmpeg/libavcodec/h261.c b/src/libffmpeg/libavcodec/h261.c index c6218c8b9..e56978e61 100644 --- a/src/libffmpeg/libavcodec/h261.c +++ b/src/libffmpeg/libavcodec/h261.c @@ -264,6 +264,7 @@ void ff_h261_encode_mb(MpegEncContext * s, h->previous_mba = h->current_mba; if(HAS_CBP(h->mtype)){ + assert(cbp>0); put_bits(&s->pb,h261_cbp_tab[cbp-1][1],h261_cbp_tab[cbp-1][0]); } for(i=0; i<6; i++) { @@ -846,6 +847,7 @@ static int h261_decode_gob(H261Context *h){ return -1; } +#ifdef CONFIG_H261_PARSER static int h261_find_frame_end(ParseContext *pc, AVCodecContext* avctx, const uint8_t *buf, int buf_size){ int vop_found, i, j; uint32_t state; @@ -899,6 +901,7 @@ static int h261_parse(AVCodecParserContext *s, *poutbuf_size = buf_size; return next; } +#endif /** * returns the number of bytes consumed for building the current frame @@ -921,8 +924,8 @@ static int h261_decode_frame(AVCodecContext *avctx, AVFrame *pict = data; #ifdef DEBUG - printf("*****frame %d size=%d\n", avctx->frame_number, buf_size); - printf("bytes=%x %x %x %x\n", buf[0], buf[1], buf[2], buf[3]); + av_log(avctx, AV_LOG_DEBUG, "*****frame %d size=%d\n", avctx->frame_number, buf_size); + av_log(avctx, AV_LOG_DEBUG, "bytes=%x %x %x %x\n", buf[0], buf[1], buf[2], buf[3]); #endif s->flags= avctx->flags; s->flags2= avctx->flags2; @@ -1038,6 +1041,7 @@ AVCodec h261_decoder = { CODEC_CAP_DR1, }; +#ifdef CONFIG_H261_PARSER AVCodecParser h261_parser = { { CODEC_ID_H261 }, sizeof(ParseContext), @@ -1045,3 +1049,4 @@ AVCodecParser h261_parser = { h261_parse, ff_parse_close, }; +#endif diff --git a/src/libffmpeg/libavcodec/h263.c b/src/libffmpeg/libavcodec/h263.c index f7369c18d..f88114f70 100644 --- a/src/libffmpeg/libavcodec/h263.c +++ b/src/libffmpeg/libavcodec/h263.c @@ -552,6 +552,49 @@ void ff_clean_mpeg4_qscales(MpegEncContext *s){ } #endif //CONFIG_ENCODERS + +#define tab_size ((signed)(sizeof(s->direct_scale_mv[0])/sizeof(int16_t))) +#define tab_bias (tab_size/2) + +static void ff_mpeg4_init_direct_mv(MpegEncContext *s){ + int i; + for(i=0; i<tab_size; i++){ + s->direct_scale_mv[0][i] = (i-tab_bias)*s->pb_time/s->pp_time; + s->direct_scale_mv[1][i] = (i-tab_bias)*(s->pb_time-s->pp_time)/s->pp_time; + } +} + +static inline void ff_mpeg4_set_one_direct_mv(MpegEncContext *s, int mx, int my, int i){ + int xy= s->block_index[i]; + uint16_t time_pp= s->pp_time; + uint16_t time_pb= s->pb_time; + int p_mx, p_my; + + p_mx= s->next_picture.motion_val[0][xy][0]; + if((unsigned)(p_mx + tab_bias) < tab_size){ + s->mv[0][i][0] = s->direct_scale_mv[0][p_mx + tab_bias] + mx; + s->mv[1][i][0] = mx ? s->mv[0][i][0] - p_mx + : s->direct_scale_mv[1][p_mx + tab_bias]; + }else{ + s->mv[0][i][0] = p_mx*time_pb/time_pp + mx; + s->mv[1][i][0] = mx ? s->mv[0][i][0] - p_mx + : p_mx*(time_pb - time_pp)/time_pp; + } + p_my= s->next_picture.motion_val[0][xy][1]; + if((unsigned)(p_my + tab_bias) < tab_size){ + s->mv[0][i][1] = s->direct_scale_mv[0][p_my + tab_bias] + my; + s->mv[1][i][1] = my ? s->mv[0][i][1] - p_my + : s->direct_scale_mv[1][p_my + tab_bias]; + }else{ + s->mv[0][i][1] = p_my*time_pb/time_pp + my; + s->mv[1][i][1] = my ? s->mv[0][i][1] - p_my + : p_my*(time_pb - time_pp)/time_pp; + } +} + +#undef tab_size +#undef tab_bias + /** * * @return the mb_type @@ -559,29 +602,25 @@ void ff_clean_mpeg4_qscales(MpegEncContext *s){ int ff_mpeg4_set_direct_mv(MpegEncContext *s, int mx, int my){ const int mb_index= s->mb_x + s->mb_y*s->mb_stride; const int colocated_mb_type= s->next_picture.mb_type[mb_index]; - int xy= s->block_index[0]; uint16_t time_pp= s->pp_time; uint16_t time_pb= s->pb_time; int i; //FIXME avoid divides + // try special case with shifts for 1 and 3 B-frames? if(IS_8X8(colocated_mb_type)){ s->mv_type = MV_TYPE_8X8; for(i=0; i<4; i++){ - xy= s->block_index[i]; - s->mv[0][i][0] = s->next_picture.motion_val[0][xy][0]*time_pb/time_pp + mx; - s->mv[0][i][1] = s->next_picture.motion_val[0][xy][1]*time_pb/time_pp + my; - s->mv[1][i][0] = mx ? s->mv[0][i][0] - s->next_picture.motion_val[0][xy][0] - : s->next_picture.motion_val[0][xy][0]*(time_pb - time_pp)/time_pp; - s->mv[1][i][1] = my ? s->mv[0][i][1] - s->next_picture.motion_val[0][xy][1] - : s->next_picture.motion_val[0][xy][1]*(time_pb - time_pp)/time_pp; + ff_mpeg4_set_one_direct_mv(s, mx, my, i); } return MB_TYPE_DIRECT2 | MB_TYPE_8x8 | MB_TYPE_L0L1; } else if(IS_INTERLACED(colocated_mb_type)){ s->mv_type = MV_TYPE_FIELD; for(i=0; i<2; i++){ int field_select= s->next_picture.ref_index[0][s->block_index[2*i]]; + s->field_select[0][i]= field_select; + s->field_select[1][i]= i; if(s->top_field_first){ time_pp= s->pp_field_time - field_select + i; time_pb= s->pb_field_time - field_select + i; @@ -598,12 +637,11 @@ int ff_mpeg4_set_direct_mv(MpegEncContext *s, int mx, int my){ } return MB_TYPE_DIRECT2 | MB_TYPE_16x8 | MB_TYPE_L0L1 | MB_TYPE_INTERLACED; }else{ - s->mv[0][0][0] = s->mv[0][1][0] = s->mv[0][2][0] = s->mv[0][3][0] = s->next_picture.motion_val[0][xy][0]*time_pb/time_pp + mx; - s->mv[0][0][1] = s->mv[0][1][1] = s->mv[0][2][1] = s->mv[0][3][1] = s->next_picture.motion_val[0][xy][1]*time_pb/time_pp + my; - s->mv[1][0][0] = s->mv[1][1][0] = s->mv[1][2][0] = s->mv[1][3][0] = mx ? s->mv[0][0][0] - s->next_picture.motion_val[0][xy][0] - : s->next_picture.motion_val[0][xy][0]*(time_pb - time_pp)/time_pp; - s->mv[1][0][1] = s->mv[1][1][1] = s->mv[1][2][1] = s->mv[1][3][1] = my ? s->mv[0][0][1] - s->next_picture.motion_val[0][xy][1] - : s->next_picture.motion_val[0][xy][1]*(time_pb - time_pp)/time_pp; + ff_mpeg4_set_one_direct_mv(s, mx, my, 0); + s->mv[0][1][0] = s->mv[0][2][0] = s->mv[0][3][0] = s->mv[0][0][0]; + s->mv[0][1][1] = s->mv[0][2][1] = s->mv[0][3][1] = s->mv[0][0][1]; + s->mv[1][1][0] = s->mv[1][2][0] = s->mv[1][3][0] = s->mv[1][0][0]; + s->mv[1][1][1] = s->mv[1][2][1] = s->mv[1][3][1] = s->mv[1][0][1]; if((s->avctx->workaround_bugs & FF_BUG_DIRECT_BLOCKSIZE) || !s->quarter_sample) s->mv_type= MV_TYPE_16X16; else @@ -1480,7 +1518,7 @@ void ff_h263_loop_filter(MpegEncContext * s){ static int h263_pred_dc(MpegEncContext * s, int n, uint16_t **dc_val_ptr) { int x, y, wrap, a, c, pred_dc, scale; - int16_t *dc_val, *ac_val; + int16_t *dc_val; /* find prediction */ if (n < 4) { @@ -1488,14 +1526,12 @@ static int h263_pred_dc(MpegEncContext * s, int n, uint16_t **dc_val_ptr) y = 2 * s->mb_y + ((n & 2) >> 1); wrap = s->b8_stride; dc_val = s->dc_val[0]; - ac_val = s->ac_val[0][0]; scale = s->y_dc_scale; } else { x = s->mb_x; y = s->mb_y; wrap = s->mb_stride; dc_val = s->dc_val[n - 4 + 1]; - ac_val = s->ac_val[n - 4 + 1][0]; scale = s->c_dc_scale; } /* B C @@ -2219,6 +2255,7 @@ void ff_set_mpeg4_time(MpegEncContext * s, int picture_number){ if(s->pict_type==B_TYPE){ s->pb_time= s->pp_time - (s->last_non_b_time - s->time); assert(s->pb_time > 0 && s->pb_time < s->pp_time); + ff_mpeg4_init_direct_mv(s); }else{ s->last_time_base= s->time_base; s->time_base= time_div; @@ -3711,6 +3748,8 @@ static int mpeg4_decode_partitioned_mb(MpegEncContext *s, DCTELEM block[6][64]) mb_type= s->current_picture.mb_type[xy]; cbp = s->cbp_table[xy]; + s->use_intra_dc_vlc= s->qscale < s->intra_dc_threshold; + if(s->current_picture.qscale_table[xy] != s->qscale){ ff_set_qscale(s, s->current_picture.qscale_table[xy] ); } @@ -4447,6 +4486,9 @@ intra: return -1; } cbp = (cbpc & 3) | (cbpy << 2); + + s->use_intra_dc_vlc= s->qscale < s->intra_dc_threshold; + if (dquant) { ff_set_qscale(s, s->qscale + quant_tab[get_bits(&s->gb, 2)]); } @@ -4474,6 +4516,12 @@ end: /* per-MB end of slice check */ if(s->codec_id==CODEC_ID_MPEG4){ +#if 0 //http://standards.iso.org/ittf/PubliclyAvailableStandards/ISO_IEC_14496-4_2004_Conformance_Testing/video_conformance/version_1/simple/ERROR.ZIP/mit025.m4v needs this but its unclear if the mpeg4 standard allows this at all (MN) + if(s->pict_type != B_TYPE){ + while(show_bits(&s->gb, 9 + (s->pict_type == P_TYPE)) == 1) + skip_bits(&s->gb, 9 + (s->pict_type == P_TYPE)); + } +#endif if(mpeg4_is_resync(s)){ const int delta= s->mb_x + 1 == s->mb_width ? 2 : 1; if(s->pict_type==B_TYPE && s->next_picture.mbskip_table[xy + delta]) @@ -4742,7 +4790,7 @@ static inline int mpeg4_decode_block(MpegEncContext * s, DCTELEM * block, //Note intra & rvlc should be optimized away if this is inlined if(intra) { - if(s->qscale < s->intra_dc_threshold){ + if(s->use_intra_dc_vlc){ /* DC coef */ if(s->partitioned_frame){ level = s->dc_val[0][ s->block_index[n] ]; @@ -4758,6 +4806,7 @@ static inline int mpeg4_decode_block(MpegEncContext * s, DCTELEM * block, i = 0; }else{ i = -1; + ff_mpeg4_pred_dc(s, n, 0, &dc_pred_dir, 0); } if (!coded) goto not_coded; @@ -4965,10 +5014,10 @@ static inline int mpeg4_decode_block(MpegEncContext * s, DCTELEM * block, } not_coded: if (intra) { - if(s->qscale >= s->intra_dc_threshold){ + if(!s->use_intra_dc_vlc){ block[0] = ff_mpeg4_pred_dc(s, n, block[0], &dc_pred_dir, 0); - if(i == -1) i=0; + i -= i>>31; //if(i == -1) i=0; } mpeg4_pred_ac(s, block, n, dc_pred_dir); @@ -5575,6 +5624,7 @@ static int decode_vol_header(MpegEncContext *s, GetBitContext *gb){ s->progressive_sequence= s->progressive_frame= get_bits1(gb)^1; + s->interlaced_dct=0; if(!get_bits1(gb) && (s->avctx->debug & FF_DEBUG_PICT_INFO)) av_log(s->avctx, AV_LOG_INFO, "MPEG4 OBMC not supported (very likely buggy encoder)\n"); /* OBMC Disable */ if (vo_ver_id == 1) { @@ -5849,6 +5899,7 @@ static int decode_vop_header(MpegEncContext *s, GetBitContext *gb){ // printf("messed up order, maybe after seeking? skipping current b frame\n"); return FRAME_SKIPPED; } + ff_mpeg4_init_direct_mv(s); if(s->t_frame==0) s->t_frame= s->pb_time; if(s->t_frame==0) s->t_frame=1; // 1/0 protection diff --git a/src/libffmpeg/libavcodec/h263data.h b/src/libffmpeg/libavcodec/h263data.h index 2968531a5..01bcaedb4 100644 --- a/src/libffmpeg/libavcodec/h263data.h +++ b/src/libffmpeg/libavcodec/h263data.h @@ -274,8 +274,8 @@ const uint16_t ff_mba_max[6]={ 47, 98, 395,1583,6335,9215 }; -const uint8_t ff_mba_length[6]={ - 6, 7, 9, 11, 13, 14 +const uint8_t ff_mba_length[7]={ + 6, 7, 9, 11, 13, 14, 14 }; const uint8_t ff_h263_loop_filter_strength[32]={ diff --git a/src/libffmpeg/libavcodec/h263dec.c b/src/libffmpeg/libavcodec/h263dec.c index 87c9e4991..73b050325 100644 --- a/src/libffmpeg/libavcodec/h263dec.c +++ b/src/libffmpeg/libavcodec/h263dec.c @@ -85,6 +85,7 @@ int ff_h263_decode_init(AVCodecContext *avctx) s->h263_pred = 1; s->msmpeg4_version=5; break; + case CODEC_ID_VC1: case CODEC_ID_WMV3: s->h263_msmpeg4 = 1; s->h263_pred = 1; @@ -390,6 +391,7 @@ static int h263_find_frame_end(ParseContext *pc, const uint8_t *buf, int buf_siz return END_NOT_FOUND; } +#ifdef CONFIG_H263_PARSER static int h263_parse(AVCodecParserContext *s, AVCodecContext *avctx, uint8_t **poutbuf, int *poutbuf_size, @@ -410,6 +412,7 @@ static int h263_parse(AVCodecParserContext *s, *poutbuf_size = buf_size; return next; } +#endif int ff_h263_decode_frame(AVCodecContext *avctx, void *data, int *data_size, @@ -514,7 +517,8 @@ retry: if(s->xvid_build==0 && s->divx_version==0 && s->lavc_build==0){ if(s->avctx->stream_codec_tag == ff_get_fourcc("XVID") || - s->avctx->codec_tag == ff_get_fourcc("XVID") || s->avctx->codec_tag == ff_get_fourcc("XVIX")) + s->avctx->codec_tag == ff_get_fourcc("XVID") || s->avctx->codec_tag == ff_get_fourcc("XVIX") || + s->avctx->codec_tag == ff_get_fourcc("RMP4")) s->xvid_build= -1; #if 0 if(s->avctx->codec_tag == ff_get_fourcc("DIVX") && s->vo_type==0 && s->vol_control_parameters==1 @@ -693,7 +697,7 @@ retry: return -1; #ifdef DEBUG - printf("qscale=%d\n", s->qscale); + av_log(avctx, AV_LOG_DEBUG, "qscale=%d\n", s->qscale); #endif ff_er_frame_start(s); @@ -765,24 +769,23 @@ retry: assert(s->current_picture.pict_type == s->current_picture_ptr->pict_type); assert(s->current_picture.pict_type == s->pict_type); - if(s->pict_type==B_TYPE || s->low_delay){ - *pict= *(AVFrame*)&s->current_picture; + if (s->pict_type == B_TYPE || s->low_delay) { + *pict= *(AVFrame*)s->current_picture_ptr; + } else if (s->last_picture_ptr != NULL) { + *pict= *(AVFrame*)s->last_picture_ptr; + } + + if(s->last_picture_ptr || s->low_delay){ + *data_size = sizeof(AVFrame); ff_print_debug_info(s, pict); - } else { - *pict= *(AVFrame*)&s->last_picture; - if(pict) - ff_print_debug_info(s, pict); } /* Return the Picture timestamp as the frame number */ /* we substract 1 because it is added on utils.c */ avctx->frame_number = s->picture_number - 1; - /* don't output the last pic after seeking */ - if(s->last_picture_ptr || s->low_delay) - *data_size = sizeof(AVFrame); #ifdef PRINT_FRAME_TIME -printf("%Ld\n", rdtsc()-time); +av_log(avctx, AV_LOG_DEBUG, "%Ld\n", rdtsc()-time); #endif return get_consumed_bytes(s, buf_size); @@ -886,6 +889,7 @@ AVCodec flv_decoder = { CODEC_CAP_DRAW_HORIZ_BAND | CODEC_CAP_DR1 }; +#ifdef CONFIG_H263_PARSER AVCodecParser h263_parser = { { CODEC_ID_H263 }, sizeof(ParseContext), @@ -893,3 +897,4 @@ AVCodecParser h263_parser = { h263_parse, ff_parse_close, }; +#endif diff --git a/src/libffmpeg/libavcodec/h264.c b/src/libffmpeg/libavcodec/h264.c index e80a3992c..1a7fb76b4 100644 --- a/src/libffmpeg/libavcodec/h264.c +++ b/src/libffmpeg/libavcodec/h264.c @@ -33,7 +33,7 @@ #include "cabac.h" -#undef NDEBUG +//#undef NDEBUG #include <assert.h> #define interlaced_dct interlaced_dct_is_a_bad_name @@ -54,6 +54,22 @@ #define MAX_MMCO_COUNT 66 +/* Compiling in interlaced support reduces the speed + * of progressive decoding by about 2%. */ +#define ALLOW_INTERLACE + +#ifdef ALLOW_INTERLACE +#define MB_MBAFF h->mb_mbaff +#define MB_FIELD h->mb_field_decoding_flag +#define FRAME_MBAFF h->mb_aff_frame +#else +#define MB_MBAFF 0 +#define MB_FIELD 0 +#define FRAME_MBAFF 0 +#undef IS_INTERLACED +#define IS_INTERLACED(mb_type) 0 +#endif + /** * Sequence parameter set */ @@ -173,7 +189,8 @@ typedef struct H264Context{ int chroma_qp; //QPc - int prev_mb_skipped; //FIXME remove (IMHO not used) + int prev_mb_skipped; + int next_mb_skipped; //prediction stuff int chroma_pred_mode; @@ -231,6 +248,12 @@ typedef struct H264Context{ int b_stride; //FIXME use s->b4_stride int b8_stride; + int mb_linesize; ///< may be equal to s->linesize or s->linesize*2, for mbaff + int mb_uvlinesize; + + int emu_edge_width; + int emu_edge_height; + int halfpel_flag; int thirdpel_flag; @@ -254,13 +277,14 @@ typedef struct H264Context{ int slice_num; uint8_t *slice_table_base; - uint8_t *slice_table; ///< slice_table_base + mb_stride + 1 + uint8_t *slice_table; ///< slice_table_base + 2*mb_stride + 1 int slice_type; int slice_type_fixed; //interlacing specific flags int mb_aff_frame; int mb_field_decoding_flag; + int mb_mbaff; ///< mb_aff_frame && mb_field_decoding_flag int sub_mb_type[4]; @@ -291,11 +315,11 @@ typedef struct H264Context{ int use_weight_chroma; int luma_log2_weight_denom; int chroma_log2_weight_denom; - int luma_weight[2][16]; - int luma_offset[2][16]; - int chroma_weight[2][16][2]; - int chroma_offset[2][16][2]; - int implicit_weight[16][16]; + int luma_weight[2][48]; + int luma_offset[2][48]; + int chroma_weight[2][48][2]; + int chroma_offset[2][48][2]; + int implicit_weight[48][48]; //deblock int deblocking_filter; ///< disable_deblocking_filter_idc with 1<->0 @@ -306,17 +330,18 @@ typedef struct H264Context{ int direct_spatial_mv_pred; int dist_scale_factor[16]; + int dist_scale_factor_field[32]; int map_col_to_list0[2][16]; + int map_col_to_list0_field[2][32]; /** * num_ref_idx_l0/1_active_minus1 + 1 */ - int ref_count[2];// FIXME split for AFF + int ref_count[2]; ///< counts frames or fields, depending on current mb mode Picture *short_ref[32]; Picture *long_ref[32]; Picture default_ref_list[2][32]; - Picture ref_list[2][32]; //FIXME size? - Picture field_ref_list[2][32]; //FIXME size? + Picture ref_list[2][48]; ///< 0..15: frame refs, 16..47: mbaff field refs Picture *delayed_pic[16]; //FIXME size? Picture *delayed_output_pic; @@ -357,9 +382,17 @@ typedef struct H264Context{ uint8_t direct_cache[5*8]; uint8_t zigzag_scan[16]; + uint8_t zigzag_scan8x8[64]; + uint8_t zigzag_scan8x8_cavlc[64]; uint8_t field_scan[16]; + uint8_t field_scan8x8[64]; + uint8_t field_scan8x8_cavlc[64]; const uint8_t *zigzag_scan_q0; + const uint8_t *zigzag_scan8x8_q0; + const uint8_t *zigzag_scan8x8_cavlc_q0; const uint8_t *field_scan_q0; + const uint8_t *field_scan8x8_q0; + const uint8_t *field_scan8x8_cavlc_q0; int x264_build; }H264Context; @@ -394,60 +427,83 @@ static always_inline uint32_t pack16to32(int a, int b){ static always_inline void fill_rectangle(void *vp, int w, int h, int stride, uint32_t val, int size){ uint8_t *p= (uint8_t*)vp; assert(size==1 || size==4); + assert(w<=4); w *= size; stride *= size; assert((((long)vp)&(FFMIN(w, STRIDE_ALIGN)-1)) == 0); assert((stride&(w-1))==0); -//FIXME check what gcc generates for 64 bit on x86 and possibly write a 32 bit ver of it - if(w==2 && h==2){ - *(uint16_t*)(p + 0)= - *(uint16_t*)(p + stride)= size==4 ? val : val*0x0101; - }else if(w==2 && h==4){ - *(uint16_t*)(p + 0*stride)= - *(uint16_t*)(p + 1*stride)= + if(w==2){ + const uint16_t v= size==4 ? val : val*0x0101; + *(uint16_t*)(p + 0*stride)= v; + if(h==1) return; + *(uint16_t*)(p + 1*stride)= v; + if(h==2) return; *(uint16_t*)(p + 2*stride)= - *(uint16_t*)(p + 3*stride)= size==4 ? val : val*0x0101; - }else if(w==4 && h==1){ - *(uint32_t*)(p + 0*stride)= size==4 ? val : val*0x01010101; - }else if(w==4 && h==2){ - *(uint32_t*)(p + 0*stride)= - *(uint32_t*)(p + 1*stride)= size==4 ? val : val*0x01010101; - }else if(w==4 && h==4){ - *(uint32_t*)(p + 0*stride)= - *(uint32_t*)(p + 1*stride)= + *(uint16_t*)(p + 3*stride)= v; + }else if(w==4){ + const uint32_t v= size==4 ? val : val*0x01010101; + *(uint32_t*)(p + 0*stride)= v; + if(h==1) return; + *(uint32_t*)(p + 1*stride)= v; + if(h==2) return; *(uint32_t*)(p + 2*stride)= - *(uint32_t*)(p + 3*stride)= size==4 ? val : val*0x01010101; - }else if(w==8 && h==1){ - *(uint32_t*)(p + 0)= - *(uint32_t*)(p + 4)= size==4 ? val : val*0x01010101; - }else if(w==8 && h==2){ - *(uint32_t*)(p + 0 + 0*stride)= - *(uint32_t*)(p + 4 + 0*stride)= - *(uint32_t*)(p + 0 + 1*stride)= - *(uint32_t*)(p + 4 + 1*stride)= size==4 ? val : val*0x01010101; - }else if(w==8 && h==4){ - *(uint64_t*)(p + 0*stride)= - *(uint64_t*)(p + 1*stride)= + *(uint32_t*)(p + 3*stride)= v; + }else if(w==8){ + //gcc can't optimize 64bit math on x86_32 +#if defined(ARCH_X86_64) || (defined(MP_WORDSIZE) && MP_WORDSIZE >= 64) + const uint64_t v= val*0x0100000001ULL; + *(uint64_t*)(p + 0*stride)= v; + if(h==1) return; + *(uint64_t*)(p + 1*stride)= v; + if(h==2) return; *(uint64_t*)(p + 2*stride)= - *(uint64_t*)(p + 3*stride)= size==4 ? val*0x0100000001ULL : val*0x0101010101010101ULL; - }else if(w==16 && h==2){ - *(uint64_t*)(p + 0+0*stride)= - *(uint64_t*)(p + 8+0*stride)= - *(uint64_t*)(p + 0+1*stride)= - *(uint64_t*)(p + 8+1*stride)= size==4 ? val*0x0100000001ULL : val*0x0101010101010101ULL; - }else if(w==16 && h==4){ + *(uint64_t*)(p + 3*stride)= v; + }else if(w==16){ + const uint64_t v= val*0x0100000001ULL; *(uint64_t*)(p + 0+0*stride)= *(uint64_t*)(p + 8+0*stride)= *(uint64_t*)(p + 0+1*stride)= - *(uint64_t*)(p + 8+1*stride)= + *(uint64_t*)(p + 8+1*stride)= v; + if(h==2) return; *(uint64_t*)(p + 0+2*stride)= *(uint64_t*)(p + 8+2*stride)= *(uint64_t*)(p + 0+3*stride)= - *(uint64_t*)(p + 8+3*stride)= size==4 ? val*0x0100000001ULL : val*0x0101010101010101ULL; + *(uint64_t*)(p + 8+3*stride)= v; +#else + *(uint32_t*)(p + 0+0*stride)= + *(uint32_t*)(p + 4+0*stride)= val; + if(h==1) return; + *(uint32_t*)(p + 0+1*stride)= + *(uint32_t*)(p + 4+1*stride)= val; + if(h==2) return; + *(uint32_t*)(p + 0+2*stride)= + *(uint32_t*)(p + 4+2*stride)= + *(uint32_t*)(p + 0+3*stride)= + *(uint32_t*)(p + 4+3*stride)= val; + }else if(w==16){ + *(uint32_t*)(p + 0+0*stride)= + *(uint32_t*)(p + 4+0*stride)= + *(uint32_t*)(p + 8+0*stride)= + *(uint32_t*)(p +12+0*stride)= + *(uint32_t*)(p + 0+1*stride)= + *(uint32_t*)(p + 4+1*stride)= + *(uint32_t*)(p + 8+1*stride)= + *(uint32_t*)(p +12+1*stride)= val; + if(h==2) return; + *(uint32_t*)(p + 0+2*stride)= + *(uint32_t*)(p + 4+2*stride)= + *(uint32_t*)(p + 8+2*stride)= + *(uint32_t*)(p +12+2*stride)= + *(uint32_t*)(p + 0+3*stride)= + *(uint32_t*)(p + 4+3*stride)= + *(uint32_t*)(p + 8+3*stride)= + *(uint32_t*)(p +12+3*stride)= val; +#endif }else assert(0); + assert(h==4); } static void fill_caches(H264Context *h, int mb_type, int for_deblock){ @@ -458,10 +514,8 @@ static void fill_caches(H264Context *h, int mb_type, int for_deblock){ int left_block[8]; int i; - //FIXME deblocking can skip fill_caches much of the time with multiple slices too. - // the actual condition is whether we're on the edge of a slice, - // and even then the intra and nnz parts are unnecessary. - if(for_deblock && h->slice_num == 1) + //FIXME deblocking could skip the intra and nnz parts. + if(for_deblock && (h->slice_num == 1 || h->slice_table[mb_xy] == h->slice_table[mb_xy-s->mb_stride]) && !FRAME_MBAFF) return; //wow what a mess, why didn't they simplify the interlacing&intra stuff, i can't imagine that these complex rules are worth it @@ -478,7 +532,7 @@ static void fill_caches(H264Context *h, int mb_type, int for_deblock){ left_block[5]= 10; left_block[6]= 8; left_block[7]= 11; - if(h->mb_aff_frame){ + if(FRAME_MBAFF){ const int pair_xy = s->mb_x + (s->mb_y & ~1)*s->mb_stride; const int top_pair_xy = pair_xy - s->mb_stride; const int topleft_pair_xy = top_pair_xy - 1; @@ -548,11 +602,39 @@ static void fill_caches(H264Context *h, int mb_type, int for_deblock){ h->left_mb_xy[0] = left_xy[0]; h->left_mb_xy[1] = left_xy[1]; if(for_deblock){ - topleft_type = h->slice_table[topleft_xy ] < 255 ? s->current_picture.mb_type[topleft_xy] : 0; + topleft_type = 0; + topright_type = 0; top_type = h->slice_table[top_xy ] < 255 ? s->current_picture.mb_type[top_xy] : 0; - topright_type= h->slice_table[topright_xy] < 255 ? s->current_picture.mb_type[topright_xy]: 0; left_type[0] = h->slice_table[left_xy[0] ] < 255 ? s->current_picture.mb_type[left_xy[0]] : 0; left_type[1] = h->slice_table[left_xy[1] ] < 255 ? s->current_picture.mb_type[left_xy[1]] : 0; + + if(FRAME_MBAFF && !IS_INTRA(mb_type)){ + int list; + int v = *(uint16_t*)&h->non_zero_count[mb_xy][14]; + for(i=0; i<16; i++) + h->non_zero_count_cache[scan8[i]] = (v>>i)&1; + for(list=0; list<1+(h->slice_type==B_TYPE); list++){ + if(USES_LIST(mb_type,list)){ + uint32_t *src = (uint32_t*)s->current_picture.motion_val[list][h->mb2b_xy[mb_xy]]; + uint32_t *dst = (uint32_t*)h->mv_cache[list][scan8[0]]; + uint8_t *ref = &s->current_picture.ref_index[list][h->mb2b8_xy[mb_xy]]; + for(i=0; i<4; i++, dst+=8, src+=h->b_stride){ + dst[0] = src[0]; + dst[1] = src[1]; + dst[2] = src[2]; + dst[3] = src[3]; + } + *(uint32_t*)&h->ref_cache[list][scan8[ 0]] = + *(uint32_t*)&h->ref_cache[list][scan8[ 2]] = pack16to32(ref[0],ref[1])*0x0101; + ref += h->b8_stride; + *(uint32_t*)&h->ref_cache[list][scan8[ 8]] = + *(uint32_t*)&h->ref_cache[list][scan8[10]] = pack16to32(ref[0],ref[1])*0x0101; + }else{ + fill_rectangle(&h-> mv_cache[list][scan8[ 0]], 4, 4, 8, 0, 4); + fill_rectangle(&h->ref_cache[list][scan8[ 0]], 4, 4, 8, (uint8_t)LIST_NOT_USED, 1); + } + } + } }else{ topleft_type = h->slice_table[topleft_xy ] == h->slice_num ? s->current_picture.mb_type[topleft_xy] : 0; top_type = h->slice_table[top_xy ] == h->slice_num ? s->current_picture.mb_type[top_xy] : 0; @@ -697,7 +779,6 @@ static void fill_caches(H264Context *h, int mb_type, int for_deblock){ } #if 1 - //FIXME direct mb can skip much of this if(IS_INTER(mb_type) || IS_DIRECT(mb_type)){ int list; for(list=0; list<1+(h->slice_type==B_TYPE); list++){ @@ -711,7 +792,7 @@ static void fill_caches(H264Context *h, int mb_type, int for_deblock){ } h->mv_cache_clean[list]= 0; - if(IS_INTER(top_type)){ + if(USES_LIST(top_type, list)){ const int b_xy= h->mb2b_xy[top_xy] + 3*h->b_stride; const int b8_xy= h->mb2b8_xy[top_xy] + h->b8_stride; *(uint32_t*)h->mv_cache[list][scan8[0] + 0 - 1*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy + 0]; @@ -731,13 +812,13 @@ static void fill_caches(H264Context *h, int mb_type, int for_deblock){ } //FIXME unify cleanup or sth - if(IS_INTER(left_type[0])){ + if(USES_LIST(left_type[0], list)){ const int b_xy= h->mb2b_xy[left_xy[0]] + 3; const int b8_xy= h->mb2b8_xy[left_xy[0]] + 1; *(uint32_t*)h->mv_cache[list][scan8[0] - 1 + 0*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy + h->b_stride*left_block[0]]; *(uint32_t*)h->mv_cache[list][scan8[0] - 1 + 1*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy + h->b_stride*left_block[1]]; - h->ref_cache[list][scan8[0] - 1 + 0*8]= - h->ref_cache[list][scan8[0] - 1 + 1*8]= s->current_picture.ref_index[list][b8_xy + h->b8_stride*(left_block[0]>>1)]; + h->ref_cache[list][scan8[0] - 1 + 0*8]= s->current_picture.ref_index[list][b8_xy + h->b8_stride*(left_block[0]>>1)]; + h->ref_cache[list][scan8[0] - 1 + 1*8]= s->current_picture.ref_index[list][b8_xy + h->b8_stride*(left_block[1]>>1)]; }else{ *(uint32_t*)h->mv_cache [list][scan8[0] - 1 + 0*8]= *(uint32_t*)h->mv_cache [list][scan8[0] - 1 + 1*8]= 0; @@ -745,13 +826,13 @@ static void fill_caches(H264Context *h, int mb_type, int for_deblock){ h->ref_cache[list][scan8[0] - 1 + 1*8]= left_type[0] ? LIST_NOT_USED : PART_NOT_AVAILABLE; } - if(IS_INTER(left_type[1])){ + if(USES_LIST(left_type[1], list)){ const int b_xy= h->mb2b_xy[left_xy[1]] + 3; const int b8_xy= h->mb2b8_xy[left_xy[1]] + 1; *(uint32_t*)h->mv_cache[list][scan8[0] - 1 + 2*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy + h->b_stride*left_block[2]]; *(uint32_t*)h->mv_cache[list][scan8[0] - 1 + 3*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy + h->b_stride*left_block[3]]; - h->ref_cache[list][scan8[0] - 1 + 2*8]= - h->ref_cache[list][scan8[0] - 1 + 3*8]= s->current_picture.ref_index[list][b8_xy + h->b8_stride*(left_block[2]>>1)]; + h->ref_cache[list][scan8[0] - 1 + 2*8]= s->current_picture.ref_index[list][b8_xy + h->b8_stride*(left_block[2]>>1)]; + h->ref_cache[list][scan8[0] - 1 + 3*8]= s->current_picture.ref_index[list][b8_xy + h->b8_stride*(left_block[3]>>1)]; }else{ *(uint32_t*)h->mv_cache [list][scan8[0] - 1 + 2*8]= *(uint32_t*)h->mv_cache [list][scan8[0] - 1 + 3*8]= 0; @@ -760,10 +841,10 @@ static void fill_caches(H264Context *h, int mb_type, int for_deblock){ assert((!left_type[0]) == (!left_type[1])); } - if(for_deblock || (IS_DIRECT(mb_type) && !h->direct_spatial_mv_pred)) + if((for_deblock || (IS_DIRECT(mb_type) && !h->direct_spatial_mv_pred)) && !FRAME_MBAFF) continue; - if(IS_INTER(topleft_type)){ + if(USES_LIST(topleft_type, list)){ const int b_xy = h->mb2b_xy[topleft_xy] + 3 + 3*h->b_stride; const int b8_xy= h->mb2b8_xy[topleft_xy] + 1 + h->b8_stride; *(uint32_t*)h->mv_cache[list][scan8[0] - 1 - 1*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy]; @@ -773,7 +854,7 @@ static void fill_caches(H264Context *h, int mb_type, int for_deblock){ h->ref_cache[list][scan8[0] - 1 - 1*8]= topleft_type ? LIST_NOT_USED : PART_NOT_AVAILABLE; } - if(IS_INTER(topright_type)){ + if(USES_LIST(topright_type, list)){ const int b_xy= h->mb2b_xy[topright_xy] + 3*h->b_stride; const int b8_xy= h->mb2b8_xy[topright_xy] + h->b8_stride; *(uint32_t*)h->mv_cache[list][scan8[0] + 4 - 1*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy]; @@ -783,6 +864,8 @@ static void fill_caches(H264Context *h, int mb_type, int for_deblock){ h->ref_cache[list][scan8[0] + 4 - 1*8]= topright_type ? LIST_NOT_USED : PART_NOT_AVAILABLE; } + if((IS_SKIP(mb_type) || IS_DIRECT(mb_type)) && !FRAME_MBAFF) + continue; h->ref_cache[list][scan8[5 ]+1] = h->ref_cache[list][scan8[7 ]+1] = @@ -797,14 +880,7 @@ static void fill_caches(H264Context *h, int mb_type, int for_deblock){ if( h->pps.cabac ) { /* XXX beurk, Load mvd */ - if(IS_INTER(topleft_type)){ - const int b_xy = h->mb2b_xy[topleft_xy] + 3 + 3*h->b_stride; - *(uint32_t*)h->mvd_cache[list][scan8[0] - 1 - 1*8]= *(uint32_t*)h->mvd_table[list][b_xy]; - }else{ - *(uint32_t*)h->mvd_cache[list][scan8[0] - 1 - 1*8]= 0; - } - - if(IS_INTER(top_type)){ + if(USES_LIST(top_type, list)){ const int b_xy= h->mb2b_xy[top_xy] + 3*h->b_stride; *(uint32_t*)h->mvd_cache[list][scan8[0] + 0 - 1*8]= *(uint32_t*)h->mvd_table[list][b_xy + 0]; *(uint32_t*)h->mvd_cache[list][scan8[0] + 1 - 1*8]= *(uint32_t*)h->mvd_table[list][b_xy + 1]; @@ -816,7 +892,7 @@ static void fill_caches(H264Context *h, int mb_type, int for_deblock){ *(uint32_t*)h->mvd_cache [list][scan8[0] + 2 - 1*8]= *(uint32_t*)h->mvd_cache [list][scan8[0] + 3 - 1*8]= 0; } - if(IS_INTER(left_type[0])){ + if(USES_LIST(left_type[0], list)){ const int b_xy= h->mb2b_xy[left_xy[0]] + 3; *(uint32_t*)h->mvd_cache[list][scan8[0] - 1 + 0*8]= *(uint32_t*)h->mvd_table[list][b_xy + h->b_stride*left_block[0]]; *(uint32_t*)h->mvd_cache[list][scan8[0] - 1 + 1*8]= *(uint32_t*)h->mvd_table[list][b_xy + h->b_stride*left_block[1]]; @@ -824,7 +900,7 @@ static void fill_caches(H264Context *h, int mb_type, int for_deblock){ *(uint32_t*)h->mvd_cache [list][scan8[0] - 1 + 0*8]= *(uint32_t*)h->mvd_cache [list][scan8[0] - 1 + 1*8]= 0; } - if(IS_INTER(left_type[1])){ + if(USES_LIST(left_type[1], list)){ const int b_xy= h->mb2b_xy[left_xy[1]] + 3; *(uint32_t*)h->mvd_cache[list][scan8[0] - 1 + 2*8]= *(uint32_t*)h->mvd_table[list][b_xy + h->b_stride*left_block[2]]; *(uint32_t*)h->mvd_cache[list][scan8[0] - 1 + 3*8]= *(uint32_t*)h->mvd_table[list][b_xy + h->b_stride*left_block[3]]; @@ -851,18 +927,52 @@ static void fill_caches(H264Context *h, int mb_type, int for_deblock){ *(uint32_t*)&h->direct_cache[scan8[0] - 1*8]= 0; } - //FIXME interlacing - if(IS_DIRECT(left_type[0])){ - h->direct_cache[scan8[0] - 1 + 0*8]= + if(IS_DIRECT(left_type[0])) + h->direct_cache[scan8[0] - 1 + 0*8]= 1; + else if(IS_8X8(left_type[0])) + h->direct_cache[scan8[0] - 1 + 0*8]= h->direct_table[h->mb2b8_xy[left_xy[0]] + 1 + h->b8_stride*(left_block[0]>>1)]; + else + h->direct_cache[scan8[0] - 1 + 0*8]= 0; + + if(IS_DIRECT(left_type[1])) h->direct_cache[scan8[0] - 1 + 2*8]= 1; - }else if(IS_8X8(left_type[0])){ - int b8_xy = h->mb2b8_xy[left_xy[0]] + 1; - h->direct_cache[scan8[0] - 1 + 0*8]= h->direct_table[b8_xy]; - h->direct_cache[scan8[0] - 1 + 2*8]= h->direct_table[b8_xy + h->b8_stride]; - }else{ - h->direct_cache[scan8[0] - 1 + 0*8]= + else if(IS_8X8(left_type[1])) + h->direct_cache[scan8[0] - 1 + 2*8]= h->direct_table[h->mb2b8_xy[left_xy[1]] + 1 + h->b8_stride*(left_block[2]>>1)]; + else h->direct_cache[scan8[0] - 1 + 2*8]= 0; + } + } + + if(FRAME_MBAFF){ +#define MAP_MVS\ + MAP_F2F(scan8[0] - 1 - 1*8, topleft_type)\ + MAP_F2F(scan8[0] + 0 - 1*8, top_type)\ + MAP_F2F(scan8[0] + 1 - 1*8, top_type)\ + MAP_F2F(scan8[0] + 2 - 1*8, top_type)\ + MAP_F2F(scan8[0] + 3 - 1*8, top_type)\ + MAP_F2F(scan8[0] + 4 - 1*8, topright_type)\ + MAP_F2F(scan8[0] - 1 + 0*8, left_type[0])\ + MAP_F2F(scan8[0] - 1 + 1*8, left_type[0])\ + MAP_F2F(scan8[0] - 1 + 2*8, left_type[1])\ + MAP_F2F(scan8[0] - 1 + 3*8, left_type[1]) + if(MB_FIELD){ +#define MAP_F2F(idx, mb_type)\ + if(!IS_INTERLACED(mb_type) && h->ref_cache[list][idx] >= 0){\ + h->ref_cache[list][idx] <<= 1;\ + h->mv_cache[list][idx][1] /= 2;\ + h->mvd_cache[list][idx][1] /= 2;\ + } + MAP_MVS +#undef MAP_F2F + }else{ +#define MAP_F2F(idx, mb_type)\ + if(IS_INTERLACED(mb_type) && h->ref_cache[list][idx] >= 0){\ + h->ref_cache[list][idx] >>= 1;\ + h->mv_cache[list][idx][1] <<= 1;\ + h->mvd_cache[list][idx][1] <<= 1;\ } + MAP_MVS +#undef MAP_F2F } } } @@ -987,6 +1097,14 @@ static inline void write_back_non_zero_count(H264Context *h){ h->non_zero_count[mb_xy][12]=h->non_zero_count_cache[1+8*5]; h->non_zero_count[mb_xy][11]=h->non_zero_count_cache[2+8*5]; h->non_zero_count[mb_xy][10]=h->non_zero_count_cache[2+8*4]; + + if(FRAME_MBAFF){ + // store all luma nnzs, for deblocking + int v = 0, i; + for(i=0; i<16; i++) + v += (!!h->non_zero_count_cache[scan8[i]]) << i; + *(uint16_t*)&h->non_zero_count[mb_xy][14] = v; + } } /** @@ -1009,6 +1127,49 @@ static inline int pred_non_zero_count(H264Context *h, int n){ static inline int fetch_diagonal_mv(H264Context *h, const int16_t **C, int i, int list, int part_width){ const int topright_ref= h->ref_cache[list][ i - 8 + part_width ]; + /* there is no consistent mapping of mvs to neighboring locations that will + * make mbaff happy, so we can't move all this logic to fill_caches */ + if(FRAME_MBAFF){ + MpegEncContext *s = &h->s; + const int *mb_types = s->current_picture_ptr->mb_type; + const int16_t *mv; + *(uint32_t*)h->mv_cache[list][scan8[0]-2] = 0; + *C = h->mv_cache[list][scan8[0]-2]; + + if(!MB_FIELD + && (s->mb_y&1) && i < scan8[0]+8 && topright_ref != PART_NOT_AVAILABLE){ + int topright_xy = s->mb_x + (s->mb_y-1)*s->mb_stride + (i == scan8[0]+3); + if(IS_INTERLACED(mb_types[topright_xy])){ +#define SET_DIAG_MV(MV_OP, REF_OP, X4, Y4)\ + const int x4 = X4, y4 = Y4;\ + const int mb_type = mb_types[(x4>>2)+(y4>>2)*s->mb_stride];\ + if(!USES_LIST(mb_type,list) && !IS_8X8(mb_type))\ + return LIST_NOT_USED;\ + mv = s->current_picture_ptr->motion_val[list][x4 + y4*h->b_stride];\ + h->mv_cache[list][scan8[0]-2][0] = mv[0];\ + h->mv_cache[list][scan8[0]-2][1] = mv[1] MV_OP;\ + return s->current_picture_ptr->ref_index[list][(x4>>1) + (y4>>1)*h->b8_stride] REF_OP; + + SET_DIAG_MV(*2, >>1, s->mb_x*4+(i&7)-4+part_width, s->mb_y*4-1); + } + } + if(topright_ref == PART_NOT_AVAILABLE + && ((s->mb_y&1) || i >= scan8[0]+8) && (i&7)==4 + && h->ref_cache[list][scan8[0]-1] != PART_NOT_AVAILABLE){ + if(!MB_FIELD + && IS_INTERLACED(mb_types[h->left_mb_xy[0]])){ + SET_DIAG_MV(*2, >>1, s->mb_x*4-1, (s->mb_y|1)*4+(s->mb_y&1)*2+(i>>4)-1); + } + if(MB_FIELD + && !IS_INTERLACED(mb_types[h->left_mb_xy[0]]) + && i >= scan8[0]+8){ + // leftshift will turn LIST_NOT_USED into PART_NOT_AVAILABLE, but that's ok. + SET_DIAG_MV(>>1, <<1, s->mb_x*4-1, (s->mb_y&~1)*4 - 1 + ((i-scan8[0])>>3)*2); + } + } +#undef SET_DIAG_MV + } + if(topright_ref != PART_NOT_AVAILABLE){ *C= h->mv_cache[list][ i - 8 + part_width ]; return topright_ref; @@ -1182,6 +1343,12 @@ static inline void direct_dist_scale_factor(H264Context * const h){ h->dist_scale_factor[i] = clip((tb*tx + 32) >> 6, -1024, 1023); } } + if(FRAME_MBAFF){ + for(i=0; i<h->ref_count[0]; i++){ + h->dist_scale_factor_field[2*i] = + h->dist_scale_factor_field[2*i+1] = h->dist_scale_factor[i]; + } + } } static inline void direct_ref_list_init(H264Context * const h){ MpegEncContext * const s = &h->s; @@ -1202,7 +1369,7 @@ static inline void direct_ref_list_init(H264Context * const h){ for(list=0; list<2; list++){ for(i=0; i<ref1->ref_count[list]; i++){ const int poc = ref1->ref_poc[list][i]; - h->map_col_to_list0[list][i] = PART_NOT_AVAILABLE; + h->map_col_to_list0[list][i] = 0; /* bogus; fills in for missing frames */ for(j=0; j<h->ref_count[list]; j++) if(h->ref_list[list][j].poc == poc){ h->map_col_to_list0[list][i] = j; @@ -1210,6 +1377,15 @@ static inline void direct_ref_list_init(H264Context * const h){ } } } + if(FRAME_MBAFF){ + for(list=0; list<2; list++){ + for(i=0; i<ref1->ref_count[list]; i++){ + j = h->map_col_to_list0[list][i]; + h->map_col_to_list0_field[list][2*i] = 2*j; + h->map_col_to_list0_field[list][2*i+1] = 2*j+1; + } + } + } } static inline void pred_direct_motion(H264Context * const h, int *mb_type){ @@ -1226,12 +1402,13 @@ static inline void pred_direct_motion(H264Context * const h, int *mb_type){ int sub_mb_type; int i8, i4; +#define MB_TYPE_16x16_OR_INTRA (MB_TYPE_16x16|MB_TYPE_INTRA4x4|MB_TYPE_INTRA16x16|MB_TYPE_INTRA_PCM) if(IS_8X8(mb_type_col) && !h->sps.direct_8x8_inference_flag){ /* FIXME save sub mb types from previous frames (or derive from MVs) * so we know exactly what block size to use */ sub_mb_type = MB_TYPE_8x8|MB_TYPE_P0L0|MB_TYPE_P0L1|MB_TYPE_DIRECT2; /* B_SUB_4x4 */ *mb_type = MB_TYPE_8x8|MB_TYPE_L0L1; - }else if(!is_b8x8 && (IS_16X16(mb_type_col) || IS_INTRA(mb_type_col))){ + }else if(!is_b8x8 && (mb_type_col & MB_TYPE_16x16_OR_INTRA)){ sub_mb_type = MB_TYPE_16x16|MB_TYPE_P0L0|MB_TYPE_P0L1|MB_TYPE_DIRECT2; /* B_SUB_8x8 */ *mb_type = MB_TYPE_16x16|MB_TYPE_P0L0|MB_TYPE_P0L1|MB_TYPE_DIRECT2; /* B_16x16 */ }else{ @@ -1240,6 +1417,8 @@ static inline void pred_direct_motion(H264Context * const h, int *mb_type){ } if(!is_b8x8) *mb_type |= MB_TYPE_DIRECT2; + if(MB_FIELD) + *mb_type |= MB_TYPE_INTERLACED; tprintf("mb_type = %08x, sub_mb_type = %08x, is_b8x8 = %d, mb_type_col = %08x\n", *mb_type, sub_mb_type, is_b8x8, mb_type_col); @@ -1248,6 +1427,8 @@ static inline void pred_direct_motion(H264Context * const h, int *mb_type){ int mv[2][2]; int list; + /* FIXME interlacing + spatial direct uses wrong colocated block positions */ + /* ref = min(neighbors) */ for(list=0; list<2; list++){ int refa = h->ref_cache[list][scan8[0] - 1]; @@ -1345,6 +1526,107 @@ static inline void pred_direct_motion(H264Context * const h, int *mb_type){ } } }else{ /* direct temporal mv pred */ + const int *map_col_to_list0[2] = {h->map_col_to_list0[0], h->map_col_to_list0[1]}; + const int *dist_scale_factor = h->dist_scale_factor; + + if(FRAME_MBAFF){ + if(IS_INTERLACED(*mb_type)){ + map_col_to_list0[0] = h->map_col_to_list0_field[0]; + map_col_to_list0[1] = h->map_col_to_list0_field[1]; + dist_scale_factor = h->dist_scale_factor_field; + } + if(IS_INTERLACED(*mb_type) != IS_INTERLACED(mb_type_col)){ + /* FIXME assumes direct_8x8_inference == 1 */ + const int pair_xy = s->mb_x + (s->mb_y&~1)*s->mb_stride; + int mb_types_col[2]; + int y_shift; + + *mb_type = MB_TYPE_8x8|MB_TYPE_L0L1 + | (is_b8x8 ? 0 : MB_TYPE_DIRECT2) + | (*mb_type & MB_TYPE_INTERLACED); + sub_mb_type = MB_TYPE_P0L0|MB_TYPE_P0L1|MB_TYPE_DIRECT2|MB_TYPE_16x16; + + if(IS_INTERLACED(*mb_type)){ + /* frame to field scaling */ + mb_types_col[0] = h->ref_list[1][0].mb_type[pair_xy]; + mb_types_col[1] = h->ref_list[1][0].mb_type[pair_xy+s->mb_stride]; + if(s->mb_y&1){ + l1ref0 -= 2*h->b8_stride; + l1ref1 -= 2*h->b8_stride; + l1mv0 -= 4*h->b_stride; + l1mv1 -= 4*h->b_stride; + } + y_shift = 0; + + if( (mb_types_col[0] & MB_TYPE_16x16_OR_INTRA) + && (mb_types_col[1] & MB_TYPE_16x16_OR_INTRA) + && !is_b8x8) + *mb_type |= MB_TYPE_16x8; + else + *mb_type |= MB_TYPE_8x8; + }else{ + /* field to frame scaling */ + /* col_mb_y = (mb_y&~1) + (topAbsDiffPOC < bottomAbsDiffPOC ? 0 : 1) + * but in MBAFF, top and bottom POC are equal */ + int dy = (s->mb_y&1) ? 1 : 2; + mb_types_col[0] = + mb_types_col[1] = h->ref_list[1][0].mb_type[pair_xy+s->mb_stride]; + l1ref0 += dy*h->b8_stride; + l1ref1 += dy*h->b8_stride; + l1mv0 += 2*dy*h->b_stride; + l1mv1 += 2*dy*h->b_stride; + y_shift = 2; + + if((mb_types_col[0] & (MB_TYPE_16x16_OR_INTRA|MB_TYPE_16x8)) + && !is_b8x8) + *mb_type |= MB_TYPE_16x16; + else + *mb_type |= MB_TYPE_8x8; + } + + for(i8=0; i8<4; i8++){ + const int x8 = i8&1; + const int y8 = i8>>1; + int ref0, scale; + const int16_t (*l1mv)[2]= l1mv0; + + if(is_b8x8 && !IS_DIRECT(h->sub_mb_type[i8])) + continue; + h->sub_mb_type[i8] = sub_mb_type; + + fill_rectangle(&h->ref_cache[1][scan8[i8*4]], 2, 2, 8, 0, 1); + if(IS_INTRA(mb_types_col[y8])){ + fill_rectangle(&h->ref_cache[0][scan8[i8*4]], 2, 2, 8, 0, 1); + fill_rectangle(&h-> mv_cache[0][scan8[i8*4]], 2, 2, 8, 0, 4); + fill_rectangle(&h-> mv_cache[1][scan8[i8*4]], 2, 2, 8, 0, 4); + continue; + } + + ref0 = l1ref0[x8 + (y8*2>>y_shift)*h->b8_stride]; + if(ref0 >= 0) + ref0 = map_col_to_list0[0][ref0*2>>y_shift]; + else{ + ref0 = map_col_to_list0[1][l1ref1[x8 + (y8*2>>y_shift)*h->b8_stride]*2>>y_shift]; + l1mv= l1mv1; + } + scale = dist_scale_factor[ref0]; + fill_rectangle(&h->ref_cache[0][scan8[i8*4]], 2, 2, 8, ref0, 1); + + { + const int16_t *mv_col = l1mv[x8*3 + (y8*6>>y_shift)*h->b_stride]; + int my_col = (mv_col[1]<<y_shift)/2; + int mx = (scale * mv_col[0] + 128) >> 8; + int my = (scale * my_col + 128) >> 8; + fill_rectangle(&h->mv_cache[0][scan8[i8*4]], 2, 2, 8, pack16to32(mx,my), 4); + fill_rectangle(&h->mv_cache[1][scan8[i8*4]], 2, 2, 8, pack16to32(mx-mv_col[0],my-my_col), 4); + } + } + return; + } + } + + /* one-to-one mv scaling */ + if(IS_16X16(*mb_type)){ fill_rectangle(&h->ref_cache[1][scan8[0]], 4, 4, 8, 0, 1); if(IS_INTRA(mb_type_col)){ @@ -1352,13 +1634,13 @@ static inline void pred_direct_motion(H264Context * const h, int *mb_type){ fill_rectangle(&h-> mv_cache[0][scan8[0]], 4, 4, 8, 0, 4); fill_rectangle(&h-> mv_cache[1][scan8[0]], 4, 4, 8, 0, 4); }else{ - const int ref0 = l1ref0[0] >= 0 ? h->map_col_to_list0[0][l1ref0[0]] - : h->map_col_to_list0[1][l1ref1[0]]; - const int dist_scale_factor = h->dist_scale_factor[ref0]; + const int ref0 = l1ref0[0] >= 0 ? map_col_to_list0[0][l1ref0[0]] + : map_col_to_list0[1][l1ref1[0]]; + const int scale = dist_scale_factor[ref0]; const int16_t *mv_col = l1ref0[0] >= 0 ? l1mv0[0] : l1mv1[0]; int mv_l0[2]; - mv_l0[0] = (dist_scale_factor * mv_col[0] + 128) >> 8; - mv_l0[1] = (dist_scale_factor * mv_col[1] + 128) >> 8; + mv_l0[0] = (scale * mv_col[0] + 128) >> 8; + mv_l0[1] = (scale * mv_col[1] + 128) >> 8; fill_rectangle(&h->ref_cache[0][scan8[0]], 4, 4, 8, ref0, 1); fill_rectangle(&h-> mv_cache[0][scan8[0]], 4, 4, 8, pack16to32(mv_l0[0],mv_l0[1]), 4); fill_rectangle(&h-> mv_cache[1][scan8[0]], 4, 4, 8, pack16to32(mv_l0[0]-mv_col[0],mv_l0[1]-mv_col[1]), 4); @@ -1367,15 +1649,15 @@ static inline void pred_direct_motion(H264Context * const h, int *mb_type){ for(i8=0; i8<4; i8++){ const int x8 = i8&1; const int y8 = i8>>1; - int ref0, dist_scale_factor; + int ref0, scale; const int16_t (*l1mv)[2]= l1mv0; if(is_b8x8 && !IS_DIRECT(h->sub_mb_type[i8])) continue; h->sub_mb_type[i8] = sub_mb_type; + fill_rectangle(&h->ref_cache[1][scan8[i8*4]], 2, 2, 8, 0, 1); if(IS_INTRA(mb_type_col)){ fill_rectangle(&h->ref_cache[0][scan8[i8*4]], 2, 2, 8, 0, 1); - fill_rectangle(&h->ref_cache[1][scan8[i8*4]], 2, 2, 8, 0, 1); fill_rectangle(&h-> mv_cache[0][scan8[i8*4]], 2, 2, 8, 0, 4); fill_rectangle(&h-> mv_cache[1][scan8[i8*4]], 2, 2, 8, 0, 4); continue; @@ -1383,27 +1665,26 @@ static inline void pred_direct_motion(H264Context * const h, int *mb_type){ ref0 = l1ref0[x8 + y8*h->b8_stride]; if(ref0 >= 0) - ref0 = h->map_col_to_list0[0][ref0]; + ref0 = map_col_to_list0[0][ref0]; else{ - ref0 = h->map_col_to_list0[1][l1ref1[x8 + y8*h->b8_stride]]; + ref0 = map_col_to_list0[1][l1ref1[x8 + y8*h->b8_stride]]; l1mv= l1mv1; } - dist_scale_factor = h->dist_scale_factor[ref0]; + scale = dist_scale_factor[ref0]; fill_rectangle(&h->ref_cache[0][scan8[i8*4]], 2, 2, 8, ref0, 1); - fill_rectangle(&h->ref_cache[1][scan8[i8*4]], 2, 2, 8, 0, 1); if(IS_SUB_8X8(sub_mb_type)){ const int16_t *mv_col = l1mv[x8*3 + y8*3*h->b_stride]; - int mx = (dist_scale_factor * mv_col[0] + 128) >> 8; - int my = (dist_scale_factor * mv_col[1] + 128) >> 8; + int mx = (scale * mv_col[0] + 128) >> 8; + int my = (scale * mv_col[1] + 128) >> 8; fill_rectangle(&h->mv_cache[0][scan8[i8*4]], 2, 2, 8, pack16to32(mx,my), 4); fill_rectangle(&h->mv_cache[1][scan8[i8*4]], 2, 2, 8, pack16to32(mx-mv_col[0],my-mv_col[1]), 4); }else for(i4=0; i4<4; i4++){ const int16_t *mv_col = l1mv[x8*2 + (i4&1) + (y8*2 + (i4>>1))*h->b_stride]; int16_t *mv_l0 = h->mv_cache[0][scan8[i8*4+i4]]; - mv_l0[0] = (dist_scale_factor * mv_col[0] + 128) >> 8; - mv_l0[1] = (dist_scale_factor * mv_col[1] + 128) >> 8; + mv_l0[0] = (scale * mv_col[0] + 128) >> 8; + mv_l0[1] = (scale * mv_col[1] + 128) >> 8; *(uint32_t*)h->mv_cache[1][scan8[i8*4+i4]] = pack16to32(mv_l0[0]-mv_col[0],mv_l0[1]-mv_col[1]); } @@ -1418,28 +1699,13 @@ static inline void write_back_motion(H264Context *h, int mb_type){ const int b8_xy= 2*s->mb_x + 2*s->mb_y*h->b8_stride; int list; + if(!USES_LIST(mb_type, 0)) + fill_rectangle(&s->current_picture.ref_index[0][b8_xy], 2, 2, h->b8_stride, (uint8_t)LIST_NOT_USED, 1); + for(list=0; list<2; list++){ int y; - if(!USES_LIST(mb_type, list)){ - if(1){ //FIXME skip or never read if mb_type doesn't use it - for(y=0; y<4; y++){ - *(uint64_t*)s->current_picture.motion_val[list][b_xy + 0 + y*h->b_stride]= - *(uint64_t*)s->current_picture.motion_val[list][b_xy + 2 + y*h->b_stride]= 0; - } - if( h->pps.cabac ) { - /* FIXME needed ? */ - for(y=0; y<4; y++){ - *(uint64_t*)h->mvd_table[list][b_xy + 0 + y*h->b_stride]= - *(uint64_t*)h->mvd_table[list][b_xy + 2 + y*h->b_stride]= 0; - } - } - for(y=0; y<2; y++){ - s->current_picture.ref_index[list][b8_xy + 0 + y*h->b8_stride]= - s->current_picture.ref_index[list][b8_xy + 1 + y*h->b8_stride]= LIST_NOT_USED; - } - } + if(!USES_LIST(mb_type, list)) continue; - } for(y=0; y<4; y++){ *(uint64_t*)s->current_picture.motion_val[list][b_xy + 0 + y*h->b_stride]= *(uint64_t*)h->mv_cache[list][scan8[0]+0 + 8*y]; @@ -1451,17 +1717,22 @@ static inline void write_back_motion(H264Context *h, int mb_type){ *(uint64_t*)h->mvd_table[list][b_xy + 2 + y*h->b_stride]= *(uint64_t*)h->mvd_cache[list][scan8[0]+2 + 8*y]; } } - for(y=0; y<2; y++){ - s->current_picture.ref_index[list][b8_xy + 0 + y*h->b8_stride]= h->ref_cache[list][scan8[0]+0 + 16*y]; - s->current_picture.ref_index[list][b8_xy + 1 + y*h->b8_stride]= h->ref_cache[list][scan8[0]+2 + 16*y]; + + { + uint8_t *ref_index = &s->current_picture.ref_index[list][b8_xy]; + ref_index[0+0*h->b8_stride]= h->ref_cache[list][scan8[0]]; + ref_index[1+0*h->b8_stride]= h->ref_cache[list][scan8[4]]; + ref_index[0+1*h->b8_stride]= h->ref_cache[list][scan8[8]]; + ref_index[1+1*h->b8_stride]= h->ref_cache[list][scan8[12]]; } } if(h->slice_type == B_TYPE && h->pps.cabac){ if(IS_8X8(mb_type)){ - h->direct_table[b8_xy+1+0*h->b8_stride] = IS_DIRECT(h->sub_mb_type[1]) ? 1 : 0; - h->direct_table[b8_xy+0+1*h->b8_stride] = IS_DIRECT(h->sub_mb_type[2]) ? 1 : 0; - h->direct_table[b8_xy+1+1*h->b8_stride] = IS_DIRECT(h->sub_mb_type[3]) ? 1 : 0; + uint8_t *direct_table = &h->direct_table[b8_xy]; + direct_table[1+0*h->b8_stride] = IS_DIRECT(h->sub_mb_type[1]) ? 1 : 0; + direct_table[0+1*h->b8_stride] = IS_DIRECT(h->sub_mb_type[2]) ? 1 : 0; + direct_table[1+1*h->b8_stride] = IS_DIRECT(h->sub_mb_type[3]) ? 1 : 0; } } } @@ -2594,20 +2865,20 @@ static inline void mc_dir_part(H264Context *h, Picture *pic, int n, int square, qpel_mc_func *qpix_op, h264_chroma_mc_func chroma_op){ MpegEncContext * const s = &h->s; const int mx= h->mv_cache[list][ scan8[n] ][0] + src_x_offset*8; - const int my= h->mv_cache[list][ scan8[n] ][1] + src_y_offset*8; + int my= h->mv_cache[list][ scan8[n] ][1] + src_y_offset*8; const int luma_xy= (mx&3) + ((my&3)<<2); - uint8_t * src_y = pic->data[0] + (mx>>2) + (my>>2)*s->linesize; - uint8_t * src_cb= pic->data[1] + (mx>>3) + (my>>3)*s->uvlinesize; - uint8_t * src_cr= pic->data[2] + (mx>>3) + (my>>3)*s->uvlinesize; - int extra_width= (s->flags&CODEC_FLAG_EMU_EDGE) ? 0 : 16; //FIXME increase edge?, IMHO not worth it - int extra_height= extra_width; + uint8_t * src_y = pic->data[0] + (mx>>2) + (my>>2)*h->mb_linesize; + uint8_t * src_cb, * src_cr; + int extra_width= h->emu_edge_width; + int extra_height= h->emu_edge_height; int emu=0; const int full_mx= mx>>2; const int full_my= my>>2; const int pic_width = 16*s->mb_width; - const int pic_height = 16*s->mb_height; + const int pic_height = 16*s->mb_height >> MB_MBAFF; - assert(pic->data[0]); + if(!pic->data[0]) + return; if(mx&7) extra_width -= 3; if(my&7) extra_height -= 3; @@ -2616,29 +2887,37 @@ static inline void mc_dir_part(H264Context *h, Picture *pic, int n, int square, || full_my < 0-extra_height || full_mx + 16/*FIXME*/ > pic_width + extra_width || full_my + 16/*FIXME*/ > pic_height + extra_height){ - ff_emulated_edge_mc(s->edge_emu_buffer, src_y - 2 - 2*s->linesize, s->linesize, 16+5, 16+5/*FIXME*/, full_mx-2, full_my-2, pic_width, pic_height); - src_y= s->edge_emu_buffer + 2 + 2*s->linesize; + ff_emulated_edge_mc(s->edge_emu_buffer, src_y - 2 - 2*h->mb_linesize, h->mb_linesize, 16+5, 16+5/*FIXME*/, full_mx-2, full_my-2, pic_width, pic_height); + src_y= s->edge_emu_buffer + 2 + 2*h->mb_linesize; emu=1; } - qpix_op[luma_xy](dest_y, src_y, s->linesize); //FIXME try variable height perhaps? + qpix_op[luma_xy](dest_y, src_y, h->mb_linesize); //FIXME try variable height perhaps? if(!square){ - qpix_op[luma_xy](dest_y + delta, src_y + delta, s->linesize); + qpix_op[luma_xy](dest_y + delta, src_y + delta, h->mb_linesize); } if(s->flags&CODEC_FLAG_GRAY) return; + if(MB_MBAFF){ + // chroma offset when predicting from a field of opposite parity + my += 2 * ((s->mb_y & 1) - (h->ref_cache[list][scan8[n]] & 1)); + emu |= (my>>3) < 0 || (my>>3) + 8 >= (pic_height>>1); + } + src_cb= pic->data[1] + (mx>>3) + (my>>3)*h->mb_uvlinesize; + src_cr= pic->data[2] + (mx>>3) + (my>>3)*h->mb_uvlinesize; + if(emu){ - ff_emulated_edge_mc(s->edge_emu_buffer, src_cb, s->uvlinesize, 9, 9/*FIXME*/, (mx>>3), (my>>3), pic_width>>1, pic_height>>1); + ff_emulated_edge_mc(s->edge_emu_buffer, src_cb, h->mb_uvlinesize, 9, 9/*FIXME*/, (mx>>3), (my>>3), pic_width>>1, pic_height>>1); src_cb= s->edge_emu_buffer; } - chroma_op(dest_cb, src_cb, s->uvlinesize, chroma_height, mx&7, my&7); + chroma_op(dest_cb, src_cb, h->mb_uvlinesize, chroma_height, mx&7, my&7); if(emu){ - ff_emulated_edge_mc(s->edge_emu_buffer, src_cr, s->uvlinesize, 9, 9/*FIXME*/, (mx>>3), (my>>3), pic_width>>1, pic_height>>1); + ff_emulated_edge_mc(s->edge_emu_buffer, src_cr, h->mb_uvlinesize, 9, 9/*FIXME*/, (mx>>3), (my>>3), pic_width>>1, pic_height>>1); src_cr= s->edge_emu_buffer; } - chroma_op(dest_cr, src_cr, s->uvlinesize, chroma_height, mx&7, my&7); + chroma_op(dest_cr, src_cr, h->mb_uvlinesize, chroma_height, mx&7, my&7); } static inline void mc_part_std(H264Context *h, int n, int square, int chroma_height, int delta, @@ -2651,11 +2930,11 @@ static inline void mc_part_std(H264Context *h, int n, int square, int chroma_hei qpel_mc_func *qpix_op= qpix_put; h264_chroma_mc_func chroma_op= chroma_put; - dest_y += 2*x_offset + 2*y_offset*s-> linesize; - dest_cb += x_offset + y_offset*s->uvlinesize; - dest_cr += x_offset + y_offset*s->uvlinesize; + dest_y += 2*x_offset + 2*y_offset*h-> mb_linesize; + dest_cb += x_offset + y_offset*h->mb_uvlinesize; + dest_cr += x_offset + y_offset*h->mb_uvlinesize; x_offset += 8*s->mb_x; - y_offset += 8*s->mb_y; + y_offset += 8*(s->mb_y >> MB_MBAFF); if(list0){ Picture *ref= &h->ref_list[0][ h->ref_cache[0][ scan8[n] ] ]; @@ -2684,18 +2963,18 @@ static inline void mc_part_weighted(H264Context *h, int n, int square, int chrom int list0, int list1){ MpegEncContext * const s = &h->s; - dest_y += 2*x_offset + 2*y_offset*s-> linesize; - dest_cb += x_offset + y_offset*s->uvlinesize; - dest_cr += x_offset + y_offset*s->uvlinesize; + dest_y += 2*x_offset + 2*y_offset*h-> mb_linesize; + dest_cb += x_offset + y_offset*h->mb_uvlinesize; + dest_cr += x_offset + y_offset*h->mb_uvlinesize; x_offset += 8*s->mb_x; - y_offset += 8*s->mb_y; + y_offset += 8*(s->mb_y >> MB_MBAFF); if(list0 && list1){ /* don't optimize for luma-only case, since B-frames usually * use implicit weights => chroma too. */ uint8_t *tmp_cb = s->obmc_scratchpad; - uint8_t *tmp_cr = tmp_cb + 8*s->uvlinesize; - uint8_t *tmp_y = tmp_cr + 8*s->uvlinesize; + uint8_t *tmp_cr = s->obmc_scratchpad + 8; + uint8_t *tmp_y = s->obmc_scratchpad + 8*h->mb_uvlinesize; int refn0 = h->ref_cache[0][ scan8[n] ]; int refn1 = h->ref_cache[1][ scan8[n] ]; @@ -2709,17 +2988,17 @@ static inline void mc_part_weighted(H264Context *h, int n, int square, int chrom if(h->use_weight == 2){ int weight0 = h->implicit_weight[refn0][refn1]; int weight1 = 64 - weight0; - luma_weight_avg( dest_y, tmp_y, s-> linesize, 5, weight0, weight1, 0); - chroma_weight_avg(dest_cb, tmp_cb, s->uvlinesize, 5, weight0, weight1, 0); - chroma_weight_avg(dest_cr, tmp_cr, s->uvlinesize, 5, weight0, weight1, 0); + luma_weight_avg( dest_y, tmp_y, h-> mb_linesize, 5, weight0, weight1, 0); + chroma_weight_avg(dest_cb, tmp_cb, h->mb_uvlinesize, 5, weight0, weight1, 0); + chroma_weight_avg(dest_cr, tmp_cr, h->mb_uvlinesize, 5, weight0, weight1, 0); }else{ - luma_weight_avg(dest_y, tmp_y, s->linesize, h->luma_log2_weight_denom, + luma_weight_avg(dest_y, tmp_y, h->mb_linesize, h->luma_log2_weight_denom, h->luma_weight[0][refn0], h->luma_weight[1][refn1], h->luma_offset[0][refn0] + h->luma_offset[1][refn1]); - chroma_weight_avg(dest_cb, tmp_cb, s->uvlinesize, h->chroma_log2_weight_denom, + chroma_weight_avg(dest_cb, tmp_cb, h->mb_uvlinesize, h->chroma_log2_weight_denom, h->chroma_weight[0][refn0][0], h->chroma_weight[1][refn1][0], h->chroma_offset[0][refn0][0] + h->chroma_offset[1][refn1][0]); - chroma_weight_avg(dest_cr, tmp_cr, s->uvlinesize, h->chroma_log2_weight_denom, + chroma_weight_avg(dest_cr, tmp_cr, h->mb_uvlinesize, h->chroma_log2_weight_denom, h->chroma_weight[0][refn0][1], h->chroma_weight[1][refn1][1], h->chroma_offset[0][refn0][1] + h->chroma_offset[1][refn1][1]); } @@ -2731,12 +3010,12 @@ static inline void mc_part_weighted(H264Context *h, int n, int square, int chrom dest_y, dest_cb, dest_cr, x_offset, y_offset, qpix_put, chroma_put); - luma_weight_op(dest_y, s->linesize, h->luma_log2_weight_denom, + luma_weight_op(dest_y, h->mb_linesize, h->luma_log2_weight_denom, h->luma_weight[list][refn], h->luma_offset[list][refn]); if(h->use_weight_chroma){ - chroma_weight_op(dest_cb, s->uvlinesize, h->chroma_log2_weight_denom, + chroma_weight_op(dest_cb, h->mb_uvlinesize, h->chroma_log2_weight_denom, h->chroma_weight[list][refn][0], h->chroma_offset[list][refn][0]); - chroma_weight_op(dest_cr, s->uvlinesize, h->chroma_log2_weight_denom, + chroma_weight_op(dest_cr, h->mb_uvlinesize, h->chroma_log2_weight_denom, h->chroma_weight[list][refn][1], h->chroma_offset[list][refn][1]); } } @@ -2760,6 +3039,22 @@ static inline void mc_part(H264Context *h, int n, int square, int chroma_height, x_offset, y_offset, qpix_put, chroma_put, qpix_avg, chroma_avg, list0, list1); } +static inline void prefetch_motion(H264Context *h, int list){ + /* fetch pixels for estimated mv 4 macroblocks ahead + * optimized for 64byte cache lines */ + MpegEncContext * const s = &h->s; + const int refn = h->ref_cache[list][scan8[0]]; + if(refn >= 0){ + const int mx= (h->mv_cache[list][scan8[0]][0]>>2) + 16*s->mb_x + 8; + const int my= (h->mv_cache[list][scan8[0]][1]>>2) + 16*s->mb_y; + uint8_t **src= h->ref_list[list][refn].data; + int off= mx + (my + (s->mb_x&3)*4)*h->mb_linesize + 64; + s->dsp.prefetch(src[0]+off, s->linesize, 4); + off= (mx>>1) + ((my>>1) + (s->mb_x&7))*s->uvlinesize + 64; + s->dsp.prefetch(src[1]+off, src[2]-src[1], 2); + } +} + static void hl_motion(H264Context *h, uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr, qpel_mc_func (*qpix_put)[16], h264_chroma_mc_func (*chroma_put), qpel_mc_func (*qpix_avg)[16], h264_chroma_mc_func (*chroma_avg), @@ -2770,6 +3065,8 @@ static void hl_motion(H264Context *h, uint8_t *dest_y, uint8_t *dest_cb, uint8_t assert(IS_INTER(mb_type)); + prefetch_motion(h, 0); + if(IS_16X16(mb_type)){ mc_part(h, 0, 1, 8, 0, dest_y, dest_cb, dest_cr, 0, 0, qpix_put[0], chroma_put[0], qpix_avg[0], chroma_avg[0], @@ -2785,11 +3082,11 @@ static void hl_motion(H264Context *h, uint8_t *dest_y, uint8_t *dest_cb, uint8_t &weight_op[1], &weight_avg[1], IS_DIR(mb_type, 1, 0), IS_DIR(mb_type, 1, 1)); }else if(IS_8X16(mb_type)){ - mc_part(h, 0, 0, 8, 8*s->linesize, dest_y, dest_cb, dest_cr, 0, 0, + mc_part(h, 0, 0, 8, 8*h->mb_linesize, dest_y, dest_cb, dest_cr, 0, 0, qpix_put[1], chroma_put[1], qpix_avg[1], chroma_avg[1], &weight_op[2], &weight_avg[2], IS_DIR(mb_type, 0, 0), IS_DIR(mb_type, 0, 1)); - mc_part(h, 4, 0, 8, 8*s->linesize, dest_y, dest_cb, dest_cr, 4, 0, + mc_part(h, 4, 0, 8, 8*h->mb_linesize, dest_y, dest_cb, dest_cr, 4, 0, qpix_put[1], chroma_put[1], qpix_avg[1], chroma_avg[1], &weight_op[2], &weight_avg[2], IS_DIR(mb_type, 1, 0), IS_DIR(mb_type, 1, 1)); @@ -2819,11 +3116,11 @@ static void hl_motion(H264Context *h, uint8_t *dest_y, uint8_t *dest_cb, uint8_t &weight_op[4], &weight_avg[4], IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1)); }else if(IS_SUB_4X8(sub_mb_type)){ - mc_part(h, n , 0, 4, 4*s->linesize, dest_y, dest_cb, dest_cr, x_offset, y_offset, + mc_part(h, n , 0, 4, 4*h->mb_linesize, dest_y, dest_cb, dest_cr, x_offset, y_offset, qpix_put[2], chroma_put[2], qpix_avg[2], chroma_avg[2], &weight_op[5], &weight_avg[5], IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1)); - mc_part(h, n+1, 0, 4, 4*s->linesize, dest_y, dest_cb, dest_cr, x_offset+2, y_offset, + mc_part(h, n+1, 0, 4, 4*h->mb_linesize, dest_y, dest_cb, dest_cr, x_offset+2, y_offset, qpix_put[2], chroma_put[2], qpix_avg[2], chroma_avg[2], &weight_op[5], &weight_avg[5], IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1)); @@ -2841,6 +3138,8 @@ static void hl_motion(H264Context *h, uint8_t *dest_y, uint8_t *dest_cb, uint8_t } } } + + prefetch_motion(h, 1); } static void decode_init_vlc(H264Context *h){ @@ -2952,6 +3251,7 @@ static void free_tables(H264Context *h){ static void init_dequant8_coeff_table(H264Context *h){ int i,q,x; + const int transpose = (h->s.dsp.h264_idct8_add != ff_h264_idct8_add_c); //FIXME ugly h->dequant8_coeff[0] = h->dequant8_buffer[0]; h->dequant8_coeff[1] = h->dequant8_buffer[1]; @@ -2965,8 +3265,9 @@ static void init_dequant8_coeff_table(H264Context *h){ int shift = div6[q]; int idx = rem6[q]; for(x=0; x<64; x++) - h->dequant8_coeff[i][q][x] = ((uint32_t)dequant8_coeff_init[idx][ - dequant8_coeff_init_scan[((x>>1)&12) | (x&3)] ] * h->pps.scaling_matrix8[i][x]) << shift; + h->dequant8_coeff[i][q][transpose ? (x>>3)|((x&7)<<3) : x] = + ((uint32_t)dequant8_coeff_init[idx][ dequant8_coeff_init_scan[((x>>1)&12) | (x&3)] ] * + h->pps.scaling_matrix8[i][x]) << shift; } } } @@ -3025,7 +3326,7 @@ static int alloc_tables(H264Context *h){ CHECKED_ALLOCZ(h->intra4x4_pred_mode, big_mb_num * 8 * sizeof(uint8_t)) CHECKED_ALLOCZ(h->non_zero_count , big_mb_num * 16 * sizeof(uint8_t)) - CHECKED_ALLOCZ(h->slice_table_base , big_mb_num * sizeof(uint8_t)) + CHECKED_ALLOCZ(h->slice_table_base , (big_mb_num+s->mb_stride) * sizeof(uint8_t)) CHECKED_ALLOCZ(h->top_borders[0] , s->mb_width * (16+8+8) * sizeof(uint8_t)) CHECKED_ALLOCZ(h->top_borders[1] , s->mb_width * (16+8+8) * sizeof(uint8_t)) CHECKED_ALLOCZ(h->cbp_table, big_mb_num * sizeof(uint16_t)) @@ -3037,8 +3338,8 @@ static int alloc_tables(H264Context *h){ CHECKED_ALLOCZ(h->direct_table, 32*big_mb_num * sizeof(uint8_t)); } - memset(h->slice_table_base, -1, big_mb_num * sizeof(uint8_t)); - h->slice_table= h->slice_table_base + s->mb_stride + 1; + memset(h->slice_table_base, -1, (big_mb_num+s->mb_stride) * sizeof(uint8_t)); + h->slice_table= h->slice_table_base + s->mb_stride*2 + 1; CHECKED_ALLOCZ(h->mb2b_xy , big_mb_num * sizeof(uint32_t)); CHECKED_ALLOCZ(h->mb2b8_xy , big_mb_num * sizeof(uint32_t)); @@ -3135,7 +3436,11 @@ static int frame_start(H264Context *h){ /* can't be in alloc_tables because linesize isn't known there. * FIXME: redo bipred weight to not require extra buffer? */ if(!s->obmc_scratchpad) - s->obmc_scratchpad = av_malloc(16*s->linesize + 2*8*s->uvlinesize); + s->obmc_scratchpad = av_malloc(16*2*s->linesize + 8*2*s->uvlinesize); + + /* some macroblocks will be accessed before they're available */ + if(FRAME_MBAFF) + memset(h->slice_table, -1, (s->mb_height*s->mb_stride-1) * sizeof(uint8_t)); // s->decode= (s->flags&CODEC_FLAG_PSNR) || !s->encoding || s->current_picture.reference /*|| h->contains_intra*/ || 1; return 0; @@ -3258,7 +3563,7 @@ static inline void xchg_pair_border(H264Context *h, uint8_t *src_y, uint8_t *src int temp8, i; uint64_t temp64; int deblock_left = (s->mb_x > 0); - int deblock_top = (s->mb_y > 0); + int deblock_top = (s->mb_y > 1); tprintf("xchg_pair_border: src_y:%p src_cb:%p src_cr:%p ls:%d uvls:%d\n", src_y, src_cb, src_cr, linesize, uvlinesize); @@ -3283,6 +3588,10 @@ b= t; XCHG(*(uint64_t*)(h->top_borders[0][s->mb_x]+8), *(uint64_t*)(src_y +9), temp64, 1); XCHG(*(uint64_t*)(h->top_borders[1][s->mb_x]+0), *(uint64_t*)(src_y +1 +linesize), temp64, xchg); XCHG(*(uint64_t*)(h->top_borders[1][s->mb_x]+8), *(uint64_t*)(src_y +9 +linesize), temp64, 1); + if(s->mb_x+1 < s->mb_width){ + XCHG(*(uint64_t*)(h->top_borders[0][s->mb_x+1]), *(uint64_t*)(src_y +17), temp64, 1); + XCHG(*(uint64_t*)(h->top_borders[1][s->mb_x+1]), *(uint64_t*)(src_y +17 +linesize), temp64, 1); + } } if(!(s->flags&CODEC_FLAG_GRAY)){ @@ -3314,6 +3623,7 @@ static void hl_decode_mb(H264Context *h){ const unsigned int bottom = mb_y & 1; const int transform_bypass = (s->qscale == 0 && h->sps.transform_bypass); void (*idct_add)(uint8_t *dst, DCTELEM *block, int stride); + void (*idct_dc_add)(uint8_t *dst, DCTELEM *block, int stride); if(!s->decode) return; @@ -3322,24 +3632,58 @@ static void hl_decode_mb(H264Context *h){ dest_cb = s->current_picture.data[1] + (mb_y * 8 * s->uvlinesize) + mb_x * 8; dest_cr = s->current_picture.data[2] + (mb_y * 8 * s->uvlinesize) + mb_x * 8; - if (h->mb_field_decoding_flag) { - linesize = s->linesize * 2; - uvlinesize = s->uvlinesize * 2; + if (MB_FIELD) { + linesize = h->mb_linesize = s->linesize * 2; + uvlinesize = h->mb_uvlinesize = s->uvlinesize * 2; block_offset = &h->block_offset[24]; if(mb_y&1){ //FIXME move out of this func? dest_y -= s->linesize*15; dest_cb-= s->uvlinesize*7; dest_cr-= s->uvlinesize*7; } + if(FRAME_MBAFF) { + int list; + for(list=0; list<2; list++){ + if(!USES_LIST(mb_type, list)) + continue; + if(IS_16X16(mb_type)){ + int8_t *ref = &h->ref_cache[list][scan8[0]]; + fill_rectangle(ref, 4, 4, 8, 16+*ref^(s->mb_y&1), 1); + }else{ + for(i=0; i<16; i+=4){ + //FIXME can refs be smaller than 8x8 when !direct_8x8_inference ? + int ref = h->ref_cache[list][scan8[i]]; + if(ref >= 0) + fill_rectangle(&h->ref_cache[list][scan8[i]], 2, 2, 8, 16+ref^(s->mb_y&1), 1); + } + } + } + } } else { - linesize = s->linesize; - uvlinesize = s->uvlinesize; + linesize = h->mb_linesize = s->linesize; + uvlinesize = h->mb_uvlinesize = s->uvlinesize; // dct_offset = s->linesize * 16; } - idct_add = transform_bypass - ? IS_8x8DCT(mb_type) ? s->dsp.add_pixels8 : s->dsp.add_pixels4 - : IS_8x8DCT(mb_type) ? s->dsp.h264_idct8_add : s->dsp.h264_idct_add; + if(transform_bypass){ + idct_dc_add = + idct_add = IS_8x8DCT(mb_type) ? s->dsp.add_pixels8 : s->dsp.add_pixels4; + }else if(IS_8x8DCT(mb_type)){ + idct_dc_add = s->dsp.h264_idct8_dc_add; + idct_add = s->dsp.h264_idct8_add; + }else{ + idct_dc_add = s->dsp.h264_idct_dc_add; + idct_add = s->dsp.h264_idct_add; + } + + if(FRAME_MBAFF && h->deblocking_filter && IS_INTRA(mb_type) + && (!bottom || !IS_INTRA(s->current_picture.mb_type[mb_xy-s->mb_stride]))){ + int mbt_y = mb_y&~1; + uint8_t *top_y = s->current_picture.data[0] + (mbt_y * 16* s->linesize ) + mb_x * 16; + uint8_t *top_cb = s->current_picture.data[1] + (mbt_y * 8 * s->uvlinesize) + mb_x * 8; + uint8_t *top_cr = s->current_picture.data[2] + (mbt_y * 8 * s->uvlinesize) + mb_x * 8; + xchg_pair_border(h, top_y, top_cb, top_cr, s->linesize, s->uvlinesize, 1); + } if (IS_INTRA_PCM(mb_type)) { unsigned int x, y; @@ -3369,14 +3713,8 @@ static void hl_decode_mb(H264Context *h){ } } else { if(IS_INTRA(mb_type)){ - if(h->deblocking_filter) { - if (h->mb_aff_frame) { - if (!bottom) - xchg_pair_border(h, dest_y, dest_cb, dest_cr, s->linesize, s->uvlinesize, 1); - } else { - xchg_mb_border(h, dest_y, dest_cb, dest_cr, linesize, uvlinesize, 1); - } - } + if(h->deblocking_filter && !FRAME_MBAFF) + xchg_mb_border(h, dest_y, dest_cb, dest_cr, linesize, uvlinesize, 1); if(!(s->flags&CODEC_FLAG_GRAY)){ h->pred8x8[ h->chroma_pred_mode ](dest_cb, uvlinesize); @@ -3389,17 +3727,22 @@ static void hl_decode_mb(H264Context *h){ for(i=0; i<16; i+=4){ uint8_t * const ptr= dest_y + block_offset[i]; const int dir= h->intra4x4_pred_mode_cache[ scan8[i] ]; + const int nnz = h->non_zero_count_cache[ scan8[i] ]; h->pred8x8l[ dir ](ptr, (h->topleft_samples_available<<i)&0x8000, (h->topright_samples_available<<(i+1))&0x8000, linesize); - if(h->non_zero_count_cache[ scan8[i] ]) - idct_add(ptr, h->mb + i*16, linesize); + if(nnz){ + if(nnz == 1 && h->mb[i*16]) + idct_dc_add(ptr, h->mb + i*16, linesize); + else + idct_add(ptr, h->mb + i*16, linesize); + } } }else for(i=0; i<16; i++){ uint8_t * const ptr= dest_y + block_offset[i]; uint8_t *topright; const int dir= h->intra4x4_pred_mode_cache[ scan8[i] ]; - int tr; + int nnz, tr; if(dir == DIAG_DOWN_LEFT_PRED || dir == VERT_LEFT_PRED){ const int topright_avail= (h->topright_samples_available<<i)&0x8000; @@ -3413,10 +3756,14 @@ static void hl_decode_mb(H264Context *h){ topright= NULL; h->pred4x4[ dir ](ptr, topright, linesize); - if(h->non_zero_count_cache[ scan8[i] ]){ - if(s->codec_id == CODEC_ID_H264) - idct_add(ptr, h->mb + i*16, linesize); - else + nnz = h->non_zero_count_cache[ scan8[i] ]; + if(nnz){ + if(s->codec_id == CODEC_ID_H264){ + if(nnz == 1 && h->mb[i*16]) + idct_dc_add(ptr, h->mb + i*16, linesize); + else + idct_add(ptr, h->mb + i*16, linesize); + }else svq3_add_idct_c(ptr, h->mb + i*16, linesize, s->qscale, 0); } } @@ -3429,20 +3776,8 @@ static void hl_decode_mb(H264Context *h){ }else svq3_luma_dc_dequant_idct_c(h->mb, s->qscale); } - if(h->deblocking_filter) { - if (h->mb_aff_frame) { - if (bottom) { - uint8_t *pair_dest_y = s->current_picture.data[0] + ((mb_y-1) * 16* s->linesize ) + mb_x * 16; - uint8_t *pair_dest_cb = s->current_picture.data[1] + ((mb_y-1) * 8 * s->uvlinesize) + mb_x * 8; - uint8_t *pair_dest_cr = s->current_picture.data[2] + ((mb_y-1) * 8 * s->uvlinesize) + mb_x * 8; - s->mb_y--; - xchg_pair_border(h, pair_dest_y, pair_dest_cb, pair_dest_cr, s->linesize, s->uvlinesize, 0); - s->mb_y++; - } - } else { - xchg_mb_border(h, dest_y, dest_cb, dest_cr, linesize, uvlinesize, 0); - } - } + if(h->deblocking_filter && !FRAME_MBAFF) + xchg_mb_border(h, dest_y, dest_cb, dest_cr, linesize, uvlinesize, 0); }else if(s->codec_id == CODEC_ID_H264){ hl_motion(h, dest_y, dest_cb, dest_cr, s->dsp.put_h264_qpel_pixels_tab, s->dsp.put_h264_chroma_pixels_tab, @@ -3453,11 +3788,23 @@ static void hl_decode_mb(H264Context *h){ if(!IS_INTRA4x4(mb_type)){ if(s->codec_id == CODEC_ID_H264){ - const int di = IS_8x8DCT(mb_type) ? 4 : 1; - for(i=0; i<16; i+=di){ - if(h->non_zero_count_cache[ scan8[i] ] || h->mb[i*16]){ //FIXME benchmark weird rule, & below - uint8_t * const ptr= dest_y + block_offset[i]; - idct_add(ptr, h->mb + i*16, linesize); + if(IS_INTRA16x16(mb_type)){ + for(i=0; i<16; i++){ + if(h->non_zero_count_cache[ scan8[i] ]) + idct_add(dest_y + block_offset[i], h->mb + i*16, linesize); + else if(h->mb[i*16]) + idct_dc_add(dest_y + block_offset[i], h->mb + i*16, linesize); + } + }else{ + const int di = IS_8x8DCT(mb_type) ? 4 : 1; + for(i=0; i<16; i+=di){ + int nnz = h->non_zero_count_cache[ scan8[i] ]; + if(nnz){ + if(nnz==1 && h->mb[i*16]) + idct_dc_add(dest_y + block_offset[i], h->mb + i*16, linesize); + else + idct_add(dest_y + block_offset[i], h->mb + i*16, linesize); + } } } }else{ @@ -3471,34 +3818,26 @@ static void hl_decode_mb(H264Context *h){ } if(!(s->flags&CODEC_FLAG_GRAY)){ - idct_add = transform_bypass ? s->dsp.add_pixels4 : s->dsp.h264_idct_add; - if(!transform_bypass){ + uint8_t *dest[2] = {dest_cb, dest_cr}; + if(transform_bypass){ + idct_add = idct_dc_add = s->dsp.add_pixels4; + }else{ + idct_add = s->dsp.h264_idct_add; + idct_dc_add = s->dsp.h264_idct_dc_add; chroma_dc_dequant_idct_c(h->mb + 16*16, h->chroma_qp, h->dequant4_coeff[IS_INTRA(mb_type) ? 1:4][h->chroma_qp][0]); chroma_dc_dequant_idct_c(h->mb + 16*16+4*16, h->chroma_qp, h->dequant4_coeff[IS_INTRA(mb_type) ? 2:5][h->chroma_qp][0]); } if(s->codec_id == CODEC_ID_H264){ - for(i=16; i<16+4; i++){ - if(h->non_zero_count_cache[ scan8[i] ] || h->mb[i*16]){ - uint8_t * const ptr= dest_cb + block_offset[i]; - idct_add(ptr, h->mb + i*16, uvlinesize); - } - } - for(i=20; i<20+4; i++){ - if(h->non_zero_count_cache[ scan8[i] ] || h->mb[i*16]){ - uint8_t * const ptr= dest_cr + block_offset[i]; - idct_add(ptr, h->mb + i*16, uvlinesize); - } + for(i=16; i<16+8; i++){ + if(h->non_zero_count_cache[ scan8[i] ]) + idct_add(dest[(i&4)>>2] + block_offset[i], h->mb + i*16, uvlinesize); + else if(h->mb[i*16]) + idct_dc_add(dest[(i&4)>>2] + block_offset[i], h->mb + i*16, uvlinesize); } }else{ - for(i=16; i<16+4; i++){ + for(i=16; i<16+8; i++){ if(h->non_zero_count_cache[ scan8[i] ] || h->mb[i*16]){ - uint8_t * const ptr= dest_cb + block_offset[i]; - svq3_add_idct_c(ptr, h->mb + i*16, uvlinesize, chroma_qp[s->qscale + 12] - 12, 2); - } - } - for(i=20; i<20+4; i++){ - if(h->non_zero_count_cache[ scan8[i] ] || h->mb[i*16]){ - uint8_t * const ptr= dest_cr + block_offset[i]; + uint8_t * const ptr= dest[(i&4)>>2] + block_offset[i]; svq3_add_idct_c(ptr, h->mb + i*16, uvlinesize, chroma_qp[s->qscale + 12] - 12, 2); } } @@ -3506,36 +3845,36 @@ static void hl_decode_mb(H264Context *h){ } } if(h->deblocking_filter) { - if (h->mb_aff_frame) { + if (FRAME_MBAFF) { + //FIXME try deblocking one mb at a time? + // the reduction in load/storing mvs and such might outweigh the extra backup/xchg_border const int mb_y = s->mb_y - 1; uint8_t *pair_dest_y, *pair_dest_cb, *pair_dest_cr; const int mb_xy= mb_x + mb_y*s->mb_stride; const int mb_type_top = s->current_picture.mb_type[mb_xy]; const int mb_type_bottom= s->current_picture.mb_type[mb_xy+s->mb_stride]; - uint8_t tmp = s->current_picture.data[1][384]; if (!bottom) return; pair_dest_y = s->current_picture.data[0] + (mb_y * 16* s->linesize ) + mb_x * 16; pair_dest_cb = s->current_picture.data[1] + (mb_y * 8 * s->uvlinesize) + mb_x * 8; pair_dest_cr = s->current_picture.data[2] + (mb_y * 8 * s->uvlinesize) + mb_x * 8; + if(IS_INTRA(mb_type_top | mb_type_bottom)) + xchg_pair_border(h, pair_dest_y, pair_dest_cb, pair_dest_cr, s->linesize, s->uvlinesize, 0); + backup_pair_border(h, pair_dest_y, pair_dest_cb, pair_dest_cr, s->linesize, s->uvlinesize); - // TODO deblock a pair + // deblock a pair // top s->mb_y--; tprintf("call mbaff filter_mb mb_x:%d mb_y:%d pair_dest_y = %p, dest_y = %p\n", mb_x, mb_y, pair_dest_y, dest_y); fill_caches(h, mb_type_top, 1); //FIXME don't fill stuff which isn't used by filter_mb + h->chroma_qp = get_chroma_qp(h->pps.chroma_qp_index_offset, s->current_picture.qscale_table[mb_xy]); filter_mb(h, mb_x, mb_y, pair_dest_y, pair_dest_cb, pair_dest_cr, linesize, uvlinesize); - if (tmp != s->current_picture.data[1][384]) { - tprintf("modified pixel 8,1 (1)\n"); - } // bottom s->mb_y++; tprintf("call mbaff filter_mb\n"); fill_caches(h, mb_type_bottom, 1); //FIXME don't fill stuff which isn't used by filter_mb + h->chroma_qp = get_chroma_qp(h->pps.chroma_qp_index_offset, s->current_picture.qscale_table[mb_xy+s->mb_stride]); filter_mb(h, mb_x, mb_y+1, dest_y, dest_cb, dest_cr, linesize, uvlinesize); - if (tmp != s->current_picture.data[1][384]) { - tprintf("modified pixel 8,1 (2)\n"); - } } else { tprintf("call filter_mb\n"); backup_mb_border(h, dest_y, dest_cb, dest_cr, linesize, uvlinesize); @@ -3759,6 +4098,35 @@ static int decode_ref_pic_list_reordering(H264Context *h){ return 0; } +static void fill_mbaff_ref_list(H264Context *h){ + int list, i, j; + for(list=0; list<2; list++){ + for(i=0; i<h->ref_count[list]; i++){ + Picture *frame = &h->ref_list[list][i]; + Picture *field = &h->ref_list[list][16+2*i]; + field[0] = *frame; + for(j=0; j<3; j++) + field[0].linesize[j] <<= 1; + field[1] = field[0]; + for(j=0; j<3; j++) + field[1].data[j] += frame->linesize[j]; + + h->luma_weight[list][16+2*i] = h->luma_weight[list][16+2*i+1] = h->luma_weight[list][i]; + h->luma_offset[list][16+2*i] = h->luma_offset[list][16+2*i+1] = h->luma_offset[list][i]; + for(j=0; j<2; j++){ + h->chroma_weight[list][16+2*i][j] = h->chroma_weight[list][16+2*i+1][j] = h->chroma_weight[list][i][j]; + h->chroma_offset[list][16+2*i][j] = h->chroma_offset[list][16+2*i+1][j] = h->chroma_offset[list][i][j]; + } + } + } + for(j=0; j<h->ref_count[1]; j++){ + for(i=0; i<h->ref_count[0]; i++) + h->implicit_weight[j][16+2*i] = h->implicit_weight[j][16+2*i+1] = h->implicit_weight[j][i]; + memcpy(h->implicit_weight[16+2*j], h->implicit_weight[j], sizeof(*h->implicit_weight)); + memcpy(h->implicit_weight[16+2*j+1], h->implicit_weight[j], sizeof(*h->implicit_weight)); + } +} + static int pred_weight_table(H264Context *h){ MpegEncContext * const s = &h->s; int list, i; @@ -3828,7 +4196,6 @@ static void implicit_weight_table(H264Context *h){ h->luma_log2_weight_denom= 5; h->chroma_log2_weight_denom= 5; - /* FIXME: MBAFF */ for(ref0=0; ref0 < h->ref_count[0]; ref0++){ int poc0 = h->ref_list[0][ref0].poc; for(ref1=0; ref1 < h->ref_count[1]; ref1++){ @@ -3887,8 +4254,13 @@ static void idr(H264Context *h){ static void flush_dpb(AVCodecContext *avctx){ H264Context *h= avctx->priv_data; int i; - for(i=0; i<16; i++) + for(i=0; i<16; i++) { + if(h->delayed_pic[i]) + h->delayed_pic[i]->reference= 0; h->delayed_pic[i]= NULL; + } + if(h->delayed_output_pic) + h->delayed_output_pic->reference= 0; h->delayed_output_pic= NULL; idr(h); if(h->s.current_picture_ptr) @@ -4263,8 +4635,8 @@ static int decode_slice_header(H264Context *h){ s->mb_width= h->sps.mb_width; s->mb_height= h->sps.mb_height * (2 - h->sps.frame_mbs_only_flag); - h->b_stride= s->mb_width*4 + 1; - h->b8_stride= s->mb_width*2 + 1; + h->b_stride= s->mb_width*4; + h->b8_stride= s->mb_width*2; s->width = 16*s->mb_width - 2*(h->sps.crop_left + h->sps.crop_right ); if(h->sps.frame_mbs_only_flag) @@ -4290,14 +4662,39 @@ static int decode_slice_header(H264Context *h){ #define T(x) (x>>2) | ((x<<2) & 0xF) h->zigzag_scan[i] = T(zigzag_scan[i]); h-> field_scan[i] = T( field_scan[i]); +#undef T + } + } + if(s->dsp.h264_idct8_add == ff_h264_idct8_add_c){ + memcpy(h->zigzag_scan8x8, zigzag_scan8x8, 64*sizeof(uint8_t)); + memcpy(h->zigzag_scan8x8_cavlc, zigzag_scan8x8_cavlc, 64*sizeof(uint8_t)); + memcpy(h->field_scan8x8, field_scan8x8, 64*sizeof(uint8_t)); + memcpy(h->field_scan8x8_cavlc, field_scan8x8_cavlc, 64*sizeof(uint8_t)); + }else{ + int i; + for(i=0; i<64; i++){ +#define T(x) (x>>3) | ((x&7)<<3) + h->zigzag_scan8x8[i] = T(zigzag_scan8x8[i]); + h->zigzag_scan8x8_cavlc[i] = T(zigzag_scan8x8_cavlc[i]); + h->field_scan8x8[i] = T(field_scan8x8[i]); + h->field_scan8x8_cavlc[i] = T(field_scan8x8_cavlc[i]); +#undef T } } if(h->sps.transform_bypass){ //FIXME same ugly - h->zigzag_scan_q0 = zigzag_scan; - h->field_scan_q0 = field_scan; + h->zigzag_scan_q0 = zigzag_scan; + h->zigzag_scan8x8_q0 = zigzag_scan8x8; + h->zigzag_scan8x8_cavlc_q0 = zigzag_scan8x8_cavlc; + h->field_scan_q0 = field_scan; + h->field_scan8x8_q0 = field_scan8x8; + h->field_scan8x8_cavlc_q0 = field_scan8x8_cavlc; }else{ - h->zigzag_scan_q0 = h->zigzag_scan; - h->field_scan_q0 = h->field_scan; + h->zigzag_scan_q0 = h->zigzag_scan; + h->zigzag_scan8x8_q0 = h->zigzag_scan8x8; + h->zigzag_scan8x8_cavlc_q0 = h->zigzag_scan8x8_cavlc; + h->field_scan_q0 = h->field_scan; + h->field_scan8x8_q0 = h->field_scan8x8; + h->field_scan8x8_cavlc_q0 = h->field_scan8x8_cavlc; } alloc_tables(h); @@ -4325,21 +4722,22 @@ static int decode_slice_header(H264Context *h){ s->current_picture_ptr->frame_num= //FIXME frame_num cleanup h->frame_num= get_bits(&s->gb, h->sps.log2_max_frame_num); + h->mb_mbaff = 0; h->mb_aff_frame = 0; if(h->sps.frame_mbs_only_flag){ s->picture_structure= PICT_FRAME; }else{ if(get_bits1(&s->gb)) { //field_pic_flag s->picture_structure= PICT_TOP_FIELD + get_bits1(&s->gb); //bottom_field_flag + av_log(h->s.avctx, AV_LOG_ERROR, "PAFF interlacing is not implemented\n"); } else { s->picture_structure= PICT_FRAME; - first_mb_in_slice <<= h->sps.mb_aff; h->mb_aff_frame = h->sps.mb_aff; } } s->resync_mb_x = s->mb_x = first_mb_in_slice % s->mb_width; - s->resync_mb_y = s->mb_y = first_mb_in_slice / s->mb_width; + s->resync_mb_y = s->mb_y = (first_mb_in_slice / s->mb_width) << h->mb_aff_frame; if(s->mb_y >= s->mb_height){ return -1; } @@ -4384,6 +4782,8 @@ static int decode_slice_header(H264Context *h){ if(h->slice_type == P_TYPE || h->slice_type == SP_TYPE || h->slice_type == B_TYPE){ if(h->slice_type == B_TYPE){ h->direct_spatial_mv_pred= get_bits1(&s->gb); + if(h->sps.mb_aff && h->direct_spatial_mv_pred) + av_log(h->s.avctx, AV_LOG_ERROR, "MBAFF + spatial direct mode is not implemented\n"); } num_ref_idx_active_override_flag= get_bits1(&s->gb); @@ -4417,6 +4817,9 @@ static int decode_slice_header(H264Context *h){ if(s->current_picture.reference) decode_ref_pic_marking(h); + if(FRAME_MBAFF) + fill_mbaff_ref_list(h); + if( h->slice_type != I_TYPE && h->slice_type != SI_TYPE && h->pps.cabac ) h->cabac_init_idc = get_ue_golomb(&s->gb); @@ -4461,6 +4864,9 @@ static int decode_slice_header(H264Context *h){ h->slice_num++; + h->emu_edge_width= (s->flags&CODEC_FLAG_EMU_EDGE) ? 0 : 16; + h->emu_edge_height= FRAME_MBAFF ? 0 : h->emu_edge_width; + if(s->avctx->debug&FF_DEBUG_PICT_INFO){ av_log(h->s.avctx, AV_LOG_DEBUG, "slice:%d %s mb:%d %c pps:%d frame:%d poc:%d/%d ref:%d/%d qp:%d loop:%d:%d:%d weight:%d%s\n", h->slice_num, @@ -4662,6 +5068,17 @@ static int decode_residual(H264Context *h, GetBitContext *gb, DCTELEM *block, in return 0; } +static void predict_field_decoding_flag(H264Context *h){ + MpegEncContext * const s = &h->s; + const int mb_xy= s->mb_x + s->mb_y*s->mb_stride; + int mb_type = (h->slice_table[mb_xy-1] == h->slice_num) + ? s->current_picture.mb_type[mb_xy-1] + : (h->slice_table[mb_xy-s->mb_stride] == h->slice_num) + ? s->current_picture.mb_type[mb_xy-s->mb_stride] + : 0; + h->mb_mbaff = h->mb_field_decoding_flag = IS_INTERLACED(mb_type) ? 1 : 0; +} + /** * decodes a P_SKIP or B_SKIP macroblock */ @@ -4673,10 +5090,7 @@ static void decode_mb_skip(H264Context *h){ memset(h->non_zero_count[mb_xy], 0, 16); memset(h->non_zero_count_cache + 8, 0, 8*5); //FIXME ugly, remove pfui - if(h->mb_aff_frame && s->mb_skip_run==0 && (s->mb_y&1)==0){ - h->mb_field_decoding_flag= get_bits1(&s->gb); - } - if(h->mb_field_decoding_flag) + if(MB_FIELD) mb_type|= MB_TYPE_INTERLACED; if( h->slice_type == B_TYPE ) @@ -4731,13 +5145,19 @@ static int decode_mb_cavlc(H264Context *h){ s->mb_skip_run= get_ue_golomb(&s->gb); if (s->mb_skip_run--) { + if(FRAME_MBAFF && (s->mb_y&1) == 0){ + if(s->mb_skip_run==0) + h->mb_mbaff = h->mb_field_decoding_flag = get_bits1(&s->gb); + else + predict_field_decoding_flag(h); + } decode_mb_skip(h); return 0; } } - if(h->mb_aff_frame){ - if ( ((s->mb_y&1) == 0) || h->prev_mb_skipped) - h->mb_field_decoding_flag = get_bits1(&s->gb); + if(FRAME_MBAFF){ + if( (s->mb_y&1) == 0 ) + h->mb_mbaff = h->mb_field_decoding_flag = get_bits1(&s->gb); }else h->mb_field_decoding_flag= (s->picture_structure!=PICT_FRAME); @@ -4773,7 +5193,7 @@ decode_intra_mb: mb_type= i_mb_type_info[mb_type].type; } - if(h->mb_field_decoding_flag) + if(MB_FIELD) mb_type |= MB_TYPE_INTERLACED; h->slice_table[ mb_xy ]= h->slice_num; @@ -4817,6 +5237,11 @@ decode_intra_mb: return 0; } + if(MB_MBAFF){ + h->ref_count[0] <<= 1; + h->ref_count[1] <<= 1; + } + fill_caches(h, mb_type, 0); //mb_pred @@ -4832,18 +5257,11 @@ decode_intra_mb: // fill_intra4x4_pred_table(h); for(i=0; i<16; i+=di){ - const int mode_coded= !get_bits1(&s->gb); - const int predicted_mode= pred_intra_mode(h, i); - int mode; + int mode= pred_intra_mode(h, i); - if(mode_coded){ + if(!get_bits1(&s->gb)){ const int rem_mode= get_bits(&s->gb, 3); - if(rem_mode<predicted_mode) - mode= rem_mode; - else - mode= rem_mode + 1; - }else{ - mode= predicted_mode; + mode = rem_mode + (rem_mode >= mode); } if(di==4) @@ -4901,9 +5319,6 @@ decode_intra_mb: for(list=0; list<2; list++){ int ref_count= IS_REF0(mb_type) ? 1 : h->ref_count[list]; if(ref_count == 0) continue; - if (h->mb_aff_frame && h->mb_field_decoding_flag) { - ref_count <<= 1; - } for(i=0; i<4; i++){ if(IS_DIRECT(h->sub_mb_type[i])) continue; if(IS_DIR(h->sub_mb_type[i], 0, list)){ @@ -5074,14 +5489,16 @@ decode_intra_mb: int i8x8, i4x4, chroma_idx; int chroma_qp, dquant; GetBitContext *gb= IS_INTRA(mb_type) ? h->intra_gb_ptr : h->inter_gb_ptr; - const uint8_t *scan, *dc_scan; + const uint8_t *scan, *scan8x8, *dc_scan; // fill_non_zero_count_cache(h); if(IS_INTERLACED(mb_type)){ + scan8x8= s->qscale ? h->field_scan8x8_cavlc : h->field_scan8x8_cavlc_q0; scan= s->qscale ? h->field_scan : h->field_scan_q0; dc_scan= luma_dc_field_scan; }else{ + scan8x8= s->qscale ? h->zigzag_scan8x8_cavlc : h->zigzag_scan8x8_cavlc_q0; scan= s->qscale ? h->zigzag_scan : h->zigzag_scan_q0; dc_scan= luma_dc_zigzag_scan; } @@ -5126,12 +5543,12 @@ decode_intra_mb: DCTELEM *buf = &h->mb[64*i8x8]; uint8_t *nnz; for(i4x4=0; i4x4<4; i4x4++){ - if( decode_residual(h, gb, buf, i4x4+4*i8x8, zigzag_scan8x8_cavlc+16*i4x4, + if( decode_residual(h, gb, buf, i4x4+4*i8x8, scan8x8+16*i4x4, h->dequant8_coeff[IS_INTRA( mb_type ) ? 0:1][s->qscale], 16) <0 ) return -1; } nnz= &h->non_zero_count_cache[ scan8[4*i8x8] ]; - nnz[0] |= nnz[1] | nnz[8] | nnz[9]; + nnz[0] += nnz[1] + nnz[8] + nnz[9]; }else{ for(i4x4=0; i4x4<4; i4x4++){ const int index= i4x4 + 4*i8x8; @@ -5178,6 +5595,11 @@ decode_intra_mb: s->current_picture.qscale_table[mb_xy]= s->qscale; write_back_non_zero_count(h); + if(MB_MBAFF){ + h->ref_count[0] >>= 1; + h->ref_count[1] >>= 1; + } + return 0; } @@ -5225,19 +5647,11 @@ static int decode_cabac_intra_mb_type(H264Context *h, int ctx_base, int intra_sl return 25; /* PCM */ mb_type = 1; /* I16x16 */ - if( get_cabac( &h->cabac, &state[1] ) ) - mb_type += 12; /* cbp_luma != 0 */ - - if( get_cabac( &h->cabac, &state[2] ) ) { - if( get_cabac( &h->cabac, &state[2+intra_slice] ) ) - mb_type += 4 * 2; /* cbp_chroma == 2 */ - else - mb_type += 4 * 1; /* cbp_chroma == 1 */ - } - if( get_cabac( &h->cabac, &state[3+intra_slice] ) ) - mb_type += 2; - if( get_cabac( &h->cabac, &state[3+2*intra_slice] ) ) - mb_type += 1; + mb_type += 12 * get_cabac( &h->cabac, &state[1] ); /* cbp_luma != 0 */ + if( get_cabac( &h->cabac, &state[2] ) ) /* cbp_chroma */ + mb_type += 4 + 4 * get_cabac( &h->cabac, &state[2+intra_slice] ); + mb_type += 2 * get_cabac( &h->cabac, &state[3+intra_slice] ); + mb_type += 1 * get_cabac( &h->cabac, &state[3+2*intra_slice] ); return mb_type; } @@ -5250,15 +5664,11 @@ static int decode_cabac_mb_type( H264Context *h ) { if( get_cabac( &h->cabac, &h->cabac_state[14] ) == 0 ) { /* P-type */ if( get_cabac( &h->cabac, &h->cabac_state[15] ) == 0 ) { - if( get_cabac( &h->cabac, &h->cabac_state[16] ) == 0 ) - return 0; /* P_L0_D16x16; */ - else - return 3; /* P_8x8; */ + /* P_L0_D16x16, P_8x8 */ + return 3 * get_cabac( &h->cabac, &h->cabac_state[16] ); } else { - if( get_cabac( &h->cabac, &h->cabac_state[17] ) == 0 ) - return 2; /* P_L0_D8x16; */ - else - return 1; /* P_L0_D16x8; */ + /* P_L0_D8x16, P_L0_D16x8 */ + return 2 - get_cabac( &h->cabac, &h->cabac_state[17] ); } } else { return decode_cabac_intra_mb_type(h, 17, 0) + 5; @@ -5269,11 +5679,9 @@ static int decode_cabac_mb_type( H264Context *h ) { int ctx = 0; int bits; - if( h->slice_table[mba_xy] == h->slice_num && !IS_SKIP( s->current_picture.mb_type[mba_xy] ) - && !IS_DIRECT( s->current_picture.mb_type[mba_xy] ) ) + if( h->slice_table[mba_xy] == h->slice_num && !IS_DIRECT( s->current_picture.mb_type[mba_xy] ) ) ctx++; - if( h->slice_table[mbb_xy] == h->slice_num && !IS_SKIP( s->current_picture.mb_type[mbb_xy] ) - && !IS_DIRECT( s->current_picture.mb_type[mbb_xy] ) ) + if( h->slice_table[mbb_xy] == h->slice_num && !IS_DIRECT( s->current_picture.mb_type[mbb_xy] ) ) ctx++; if( !get_cabac( &h->cabac, &h->cabac_state[27+ctx] ) ) @@ -5304,22 +5712,40 @@ static int decode_cabac_mb_type( H264Context *h ) { } } -static int decode_cabac_mb_skip( H264Context *h) { +static int decode_cabac_mb_skip( H264Context *h, int mb_x, int mb_y ) { MpegEncContext * const s = &h->s; - const int mb_xy = s->mb_x + s->mb_y*s->mb_stride; - const int mba_xy = mb_xy - 1; - const int mbb_xy = mb_xy - s->mb_stride; + int mba_xy, mbb_xy; int ctx = 0; + if(FRAME_MBAFF){ //FIXME merge with the stuff in fill_caches? + int mb_xy = mb_x + (mb_y&~1)*s->mb_stride; + mba_xy = mb_xy - 1; + if( (mb_y&1) + && h->slice_table[mba_xy] == h->slice_num + && MB_FIELD == !!IS_INTERLACED( s->current_picture.mb_type[mba_xy] ) ) + mba_xy += s->mb_stride; + if( MB_FIELD ){ + mbb_xy = mb_xy - s->mb_stride; + if( !(mb_y&1) + && h->slice_table[mbb_xy] == h->slice_num + && IS_INTERLACED( s->current_picture.mb_type[mbb_xy] ) ) + mbb_xy -= s->mb_stride; + }else + mbb_xy = mb_x + (mb_y-1)*s->mb_stride; + }else{ + int mb_xy = mb_x + mb_y*s->mb_stride; + mba_xy = mb_xy - 1; + mbb_xy = mb_xy - s->mb_stride; + } + if( h->slice_table[mba_xy] == h->slice_num && !IS_SKIP( s->current_picture.mb_type[mba_xy] )) ctx++; if( h->slice_table[mbb_xy] == h->slice_num && !IS_SKIP( s->current_picture.mb_type[mbb_xy] )) ctx++; - if( h->slice_type == P_TYPE || h->slice_type == SP_TYPE) - return get_cabac( &h->cabac, &h->cabac_state[11+ctx] ); - else /* B-frame */ - return get_cabac( &h->cabac, &h->cabac_state[24+ctx] ); + if( h->slice_type == B_TYPE ) + ctx += 13; + return get_cabac( &h->cabac, &h->cabac_state[11+ctx] ); } static int decode_cabac_mb_intra4x4_pred_mode( H264Context *h, int pred_mode ) { @@ -5376,14 +5802,17 @@ static const uint8_t block_idx_xy[4][4] = { }; static int decode_cabac_mb_cbp_luma( H264Context *h) { - MpegEncContext * const s = &h->s; - int cbp = 0; + int cbp_b = -1; int i8x8; + if( h->slice_table[h->top_mb_xy] == h->slice_num ) { + cbp_b = h->top_cbp; + tprintf("cbp_b = top_cbp = %x\n", cbp_b); + } + for( i8x8 = 0; i8x8 < 4; i8x8++ ) { int cbp_a = -1; - int cbp_b = -1; int x, y; int ctx = 0; @@ -5392,17 +5821,13 @@ static int decode_cabac_mb_cbp_luma( H264Context *h) { if( x > 0 ) cbp_a = cbp; - else if( s->mb_x > 0 && (h->slice_table[h->left_mb_xy[0]] == h->slice_num)) { + else if( h->slice_table[h->left_mb_xy[0]] == h->slice_num ) { cbp_a = h->left_cbp; tprintf("cbp_a = left_cbp = %x\n", cbp_a); } if( y > 0 ) cbp_b = cbp; - else if( s->mb_y > 0 && (h->slice_table[h->top_mb_xy] == h->slice_num)) { - cbp_b = h->top_cbp; - tprintf("cbp_b = top_cbp = %x\n", cbp_b); - } /* No need to test for skip as we put 0 for skip block */ /* No need to test for IPCM as we put 1 for IPCM block */ @@ -5453,7 +5878,7 @@ static int decode_cabac_mb_dqp( H264Context *h) { else mbn_xy = s->mb_width - 1 + (s->mb_y-1)*s->mb_stride; - if( h->last_qscale_diff != 0 && ( IS_INTRA16x16(s->current_picture.mb_type[mbn_xy] ) || (h->cbp_table[mbn_xy]&0x3f) ) ) + if( h->last_qscale_diff != 0 ) ctx++; while( get_cabac( &h->cabac, &h->cabac_state[60 + ctx] ) ) { @@ -5462,7 +5887,7 @@ static int decode_cabac_mb_dqp( H264Context *h) { else ctx = 3; val++; - if(val > 52) //prevent infinite loop + if(val > 102) //prevent infinite loop return INT_MIN; } @@ -5598,16 +6023,26 @@ static int inline get_cabac_cbf_ctx( H264Context *h, int cat, int idx ) { static int decode_cabac_residual( H264Context *h, DCTELEM *block, int cat, int n, const uint8_t *scantable, const uint32_t *qmul, int max_coeff) { const int mb_xy = h->s.mb_x + h->s.mb_y*h->s.mb_stride; - static const int significant_coeff_flag_field_offset[2] = { 105, 277 }; - static const int last_significant_coeff_flag_field_offset[2] = { 166, 338 }; - static const int significant_coeff_flag_offset[6] = { 0, 15, 29, 44, 47, 297 }; - static const int last_significant_coeff_flag_offset[6] = { 0, 15, 29, 44, 47, 251 }; - static const int coeff_abs_level_m1_offset[6] = { 227+0, 227+10, 227+20, 227+30, 227+39, 426 }; - static const int significant_coeff_flag_offset_8x8[63] = { - 0, 1, 2, 3, 4, 5, 5, 4, 4, 3, 3, 4, 4, 4, 5, 5, + static const int significant_coeff_flag_offset[2][6] = { + { 105+0, 105+15, 105+29, 105+44, 105+47, 402 }, + { 277+0, 277+15, 277+29, 277+44, 277+47, 436 } + }; + static const int last_coeff_flag_offset[2][6] = { + { 166+0, 166+15, 166+29, 166+44, 166+47, 417 }, + { 338+0, 338+15, 338+29, 338+44, 338+47, 451 } + }; + static const int coeff_abs_level_m1_offset[6] = { + 227+0, 227+10, 227+20, 227+30, 227+39, 426 + }; + static const int significant_coeff_flag_offset_8x8[2][63] = { + { 0, 1, 2, 3, 4, 5, 5, 4, 4, 3, 3, 4, 4, 4, 5, 5, 4, 4, 4, 4, 3, 3, 6, 7, 7, 7, 8, 9,10, 9, 8, 7, 7, 6,11,12,13,11, 6, 7, 8, 9,14,10, 9, 8, 6,11, - 12,13,11, 6, 9,14,10, 9,11,12,13,11,14,10,12 + 12,13,11, 6, 9,14,10, 9,11,12,13,11,14,10,12 }, + { 0, 1, 1, 2, 2, 3, 3, 4, 5, 6, 7, 7, 7, 8, 4, 5, + 6, 9,10,10, 8,11,12,11, 9, 9,10,10, 8,11,12,11, + 9, 9,10,10, 8,11,12,11, 9, 9,10,10, 8,13,13, 9, + 9,10,10, 8,13,13, 9, 9,10,10,14,14,14,14,14 } }; static const int last_coeff_flag_offset_8x8[63] = { 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, @@ -5649,11 +6084,9 @@ static int decode_cabac_residual( H264Context *h, DCTELEM *block, int cat, int n } significant_coeff_ctx_base = h->cabac_state - + significant_coeff_flag_offset[cat] - + significant_coeff_flag_field_offset[h->mb_field_decoding_flag]; + + significant_coeff_flag_offset[MB_FIELD][cat]; last_coeff_ctx_base = h->cabac_state - + last_significant_coeff_flag_offset[cat] - + last_significant_coeff_flag_field_offset[h->mb_field_decoding_flag]; + + last_coeff_flag_offset[MB_FIELD][cat]; abs_level_m1_ctx_base = h->cabac_state + coeff_abs_level_m1_offset[cat]; @@ -5670,8 +6103,8 @@ static int decode_cabac_residual( H264Context *h, DCTELEM *block, int cat, int n } \ } \ } - DECODE_SIGNIFICANCE( 63, significant_coeff_flag_offset_8x8[last], - last_coeff_flag_offset_8x8[last] ); + const int *sig_off = significant_coeff_flag_offset_8x8[MB_FIELD]; + DECODE_SIGNIFICANCE( 63, sig_off[last], last_coeff_flag_offset_8x8[last] ); } else { DECODE_SIGNIFICANCE( max_coeff - 1, last, last ); } @@ -5690,7 +6123,7 @@ static int decode_cabac_residual( H264Context *h, DCTELEM *block, int cat, int n h->non_zero_count_cache[scan8[16+n]] = coeff_count; else { assert( cat == 5 ); - fill_rectangle(&h->non_zero_count_cache[scan8[n]], 2, 2, 8, 1, 1); + fill_rectangle(&h->non_zero_count_cache[scan8[n]], 2, 2, 8, coeff_count, 1); } for( i = coeff_count - 1; i >= 0; i-- ) { @@ -5747,12 +6180,12 @@ static void inline compute_mb_neighbors(H264Context *h) const int mb_xy = s->mb_x + s->mb_y*s->mb_stride; h->top_mb_xy = mb_xy - s->mb_stride; h->left_mb_xy[0] = mb_xy - 1; - if(h->mb_aff_frame){ + if(FRAME_MBAFF){ const int pair_xy = s->mb_x + (s->mb_y & ~1)*s->mb_stride; const int top_pair_xy = pair_xy - s->mb_stride; const int top_mb_frame_flag = !IS_INTERLACED(s->current_picture.mb_type[top_pair_xy]); const int left_mb_frame_flag = !IS_INTERLACED(s->current_picture.mb_type[pair_xy-1]); - const int curr_mb_frame_flag = !h->mb_field_decoding_flag; + const int curr_mb_frame_flag = !MB_FIELD; const int bottom = (s->mb_y & 1); if (bottom ? !curr_mb_frame_flag // bottom macroblock @@ -5781,8 +6214,25 @@ static int decode_mb_cabac(H264Context *h) { tprintf("pic:%d mb:%d/%d\n", h->frame_num, s->mb_x, s->mb_y); if( h->slice_type != I_TYPE && h->slice_type != SI_TYPE ) { + int skip; + /* a skipped mb needs the aff flag from the following mb */ + if( FRAME_MBAFF && s->mb_x==0 && (s->mb_y&1)==0 ) + predict_field_decoding_flag(h); + if( FRAME_MBAFF && (s->mb_y&1)==1 && h->prev_mb_skipped ) + skip = h->next_mb_skipped; + else + skip = decode_cabac_mb_skip( h, s->mb_x, s->mb_y ); /* read skip flags */ - if( decode_cabac_mb_skip( h ) ) { + if( skip ) { + if( FRAME_MBAFF && (s->mb_y&1)==0 ){ + s->current_picture.mb_type[mb_xy] = MB_TYPE_SKIP; + h->next_mb_skipped = decode_cabac_mb_skip( h, s->mb_x, s->mb_y+1 ); + if(h->next_mb_skipped) + predict_field_decoding_flag(h); + else + h->mb_mbaff = h->mb_field_decoding_flag = decode_cabac_field_decoding_flag(h); + } + decode_mb_skip(h); h->cbp_table[mb_xy] = 0; @@ -5793,8 +6243,9 @@ static int decode_mb_cabac(H264Context *h) { } } - if(h->mb_aff_frame){ - if ( ((s->mb_y&1) == 0) || h->prev_mb_skipped) + if(FRAME_MBAFF){ + if( (s->mb_y&1) == 0 ) + h->mb_mbaff = h->mb_field_decoding_flag = decode_cabac_field_decoding_flag(h); }else h->mb_field_decoding_flag= (s->picture_structure!=PICT_FRAME); @@ -5831,7 +6282,7 @@ decode_intra_mb: h->intra16x16_pred_mode= i_mb_type_info[mb_type].pred_mode; mb_type= i_mb_type_info[mb_type].type; } - if(h->mb_field_decoding_flag) + if(MB_FIELD) mb_type |= MB_TYPE_INTERLACED; h->slice_table[ mb_xy ]= h->slice_num; @@ -5883,6 +6334,11 @@ decode_intra_mb: return 0; } + if(MB_MBAFF){ + h->ref_count[0] <<= 1; + h->ref_count[1] <<= 1; + } + fill_caches(h, mb_type, 0); if( IS_INTRA( mb_type ) ) { @@ -5923,8 +6379,8 @@ decode_intra_mb: sub_partition_count[i]= b_sub_mb_type_info[ h->sub_mb_type[i] ].partition_count; h->sub_mb_type[i]= b_sub_mb_type_info[ h->sub_mb_type[i] ].type; } - if( IS_DIRECT(h->sub_mb_type[0]) || IS_DIRECT(h->sub_mb_type[1]) - || IS_DIRECT(h->sub_mb_type[2]) || IS_DIRECT(h->sub_mb_type[3])) { + if( IS_DIRECT(h->sub_mb_type[0] | h->sub_mb_type[1] | + h->sub_mb_type[2] | h->sub_mb_type[3]) ) { pred_direct_motion(h, &mb_type); if( h->ref_count[0] > 1 || h->ref_count[1] > 1 ) { for( i = 0; i < 4; i++ ) @@ -6133,13 +6589,15 @@ decode_intra_mb: s->current_picture.mb_type[mb_xy]= mb_type; if( cbp || IS_INTRA16x16( mb_type ) ) { - const uint8_t *scan, *dc_scan; + const uint8_t *scan, *scan8x8, *dc_scan; int dqp; if(IS_INTERLACED(mb_type)){ + scan8x8= s->qscale ? h->field_scan8x8 : h->field_scan8x8_q0; scan= s->qscale ? h->field_scan : h->field_scan_q0; dc_scan= luma_dc_field_scan; }else{ + scan8x8= s->qscale ? h->zigzag_scan8x8 : h->zigzag_scan8x8_q0; scan= s->qscale ? h->zigzag_scan : h->zigzag_scan_q0; dc_scan= luma_dc_zigzag_scan; } @@ -6176,7 +6634,7 @@ decode_intra_mb: if( cbp & (1<<i8x8) ) { if( IS_8x8DCT(mb_type) ) { if( decode_cabac_residual(h, h->mb + 64*i8x8, 5, 4*i8x8, - zigzag_scan8x8, h->dequant8_coeff[IS_INTRA( mb_type ) ? 0:1][s->qscale], 64) < 0 ) + scan8x8, h->dequant8_coeff[IS_INTRA( mb_type ) ? 0:1][s->qscale], 64) < 0 ) return -1; } else for( i4x4 = 0; i4x4 < 4; i4x4++ ) { @@ -6221,11 +6679,17 @@ decode_intra_mb: fill_rectangle(&nnz[scan8[0]], 4, 4, 8, 0, 1); nnz[ scan8[16]+0 ] = nnz[ scan8[16]+1 ] =nnz[ scan8[16]+8 ] =nnz[ scan8[16]+9 ] = nnz[ scan8[20]+0 ] = nnz[ scan8[20]+1 ] =nnz[ scan8[20]+8 ] =nnz[ scan8[20]+9 ] = 0; + h->last_qscale_diff = 0; } s->current_picture.qscale_table[mb_xy]= s->qscale; write_back_non_zero_count(h); + if(MB_MBAFF){ + h->ref_count[0] >>= 1; + h->ref_count[1] >>= 1; + } + return 0; } @@ -6316,7 +6780,7 @@ static void filter_mb_mbaff_edgev( H264Context *h, uint8_t *pix, int stride, int int qp_index; int bS_index = (i >> 1); - if (h->mb_field_decoding_flag) { + if (!MB_FIELD) { bS_index &= ~1; bS_index |= (i & 1); } @@ -6325,15 +6789,13 @@ static void filter_mb_mbaff_edgev( H264Context *h, uint8_t *pix, int stride, int continue; } - qp_index = h->mb_field_decoding_flag ? (i & 1) : (i >> 3); + qp_index = MB_FIELD ? (i >> 3) : (i & 1); index_a = clip( qp[qp_index] + h->slice_alpha_c0_offset, 0, 51 ); alpha = alpha_table[index_a]; beta = beta_table[clip( qp[qp_index] + h->slice_beta_offset, 0, 51 )]; - if( bS[bS_index] < 4 ) { const int tc0 = tc0_table[index_a][bS[bS_index] - 1]; - /* 4px edge length */ const int p0 = pix[-1]; const int p1 = pix[-2]; const int p2 = pix[-3]; @@ -6362,7 +6824,6 @@ static void filter_mb_mbaff_edgev( H264Context *h, uint8_t *pix, int stride, int tprintf("filter_mb_mbaff_edgev i:%d, qp:%d, indexA:%d, alpha:%d, beta:%d, tc:%d\n# bS:%d -> [%02x, %02x, %02x, %02x, %02x, %02x] =>[%02x, %02x, %02x, %02x]\n", i, qp[qp_index], index_a, alpha, beta, tc, bS[bS_index], pix[-3], p1, p0, q0, q1, pix[2], p1, pix[-1], pix[0], q1); } }else{ - /* 4px edge length */ const int p0 = pix[-1]; const int p1 = pix[-2]; const int p2 = pix[-3]; @@ -6408,7 +6869,7 @@ static void filter_mb_mbaff_edgev( H264Context *h, uint8_t *pix, int stride, int } } } -static void filter_mb_mbaff_edgecv( H264Context *h, uint8_t *pix, int stride, int bS[4], int qp[2] ) { +static void filter_mb_mbaff_edgecv( H264Context *h, uint8_t *pix, int stride, int bS[8], int qp[2] ) { int i; for( i = 0; i < 8; i++, pix += stride) { int index_a; @@ -6422,13 +6883,13 @@ static void filter_mb_mbaff_edgecv( H264Context *h, uint8_t *pix, int stride, in continue; } - qp_index = h->mb_field_decoding_flag ? (i & 1) : (i >> 3); + qp_index = MB_FIELD ? (i >> 2) : (i & 1); index_a = clip( qp[qp_index] + h->slice_alpha_c0_offset, 0, 51 ); alpha = alpha_table[index_a]; beta = beta_table[clip( qp[qp_index] + h->slice_beta_offset, 0, 51 )]; + if( bS[bS_index] < 4 ) { const int tc = tc0_table[index_a][bS[bS_index] - 1] + 1; - /* 2px edge length (because we use same bS than the one for luma) */ const int p0 = pix[-1]; const int p1 = pix[-2]; const int q0 = pix[0]; @@ -6540,85 +7001,91 @@ static void filter_mb_edgech( H264Context *h, uint8_t *pix, int stride, int bS[4 static void filter_mb( H264Context *h, int mb_x, int mb_y, uint8_t *img_y, uint8_t *img_cb, uint8_t *img_cr, unsigned int linesize, unsigned int uvlinesize) { MpegEncContext * const s = &h->s; const int mb_xy= mb_x + mb_y*s->mb_stride; + const int mb_type = s->current_picture.mb_type[mb_xy]; + const int mvy_limit = IS_INTERLACED(mb_type) ? 2 : 4; int first_vertical_edge_done = 0; int dir; /* FIXME: A given frame may occupy more than one position in * the reference list. So ref2frm should be populated with * frame numbers, not indices. */ - static const int ref2frm[18] = {-1,-1,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15}; + static const int ref2frm[34] = {-1,-1,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15, + 16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31}; + + //for sufficiently low qp, filtering wouldn't do anything + //this is a conservative estimate: could also check beta_offset and more accurate chroma_qp + if(!FRAME_MBAFF){ + int qp_thresh = 15 - h->slice_alpha_c0_offset - FFMAX(0, h->pps.chroma_qp_index_offset); + int qp = s->current_picture.qscale_table[mb_xy]; + if(qp <= qp_thresh + && (mb_x == 0 || ((qp + s->current_picture.qscale_table[mb_xy-1] + 1)>>1) <= qp_thresh) + && (mb_y == 0 || ((qp + s->current_picture.qscale_table[h->top_mb_xy] + 1)>>1) <= qp_thresh)){ + return; + } + } - if (h->mb_aff_frame + if (FRAME_MBAFF // left mb is in picture && h->slice_table[mb_xy-1] != 255 // and current and left pair do not have the same interlaced type - && (IS_INTERLACED(s->current_picture.mb_type[mb_xy]) != IS_INTERLACED(s->current_picture.mb_type[mb_xy-1])) + && (IS_INTERLACED(mb_type) != IS_INTERLACED(s->current_picture.mb_type[mb_xy-1])) // and left mb is in the same slice if deblocking_filter == 2 && (h->deblocking_filter!=2 || h->slice_table[mb_xy-1] == h->slice_table[mb_xy])) { /* First vertical edge is different in MBAFF frames * There are 8 different bS to compute and 2 different Qp */ + const int pair_xy = mb_x + (mb_y&~1)*s->mb_stride; + const int left_mb_xy[2] = { pair_xy-1, pair_xy-1+s->mb_stride }; int bS[8]; int qp[2]; int chroma_qp[2]; - + int mb_qp, mbn0_qp, mbn1_qp; int i; first_vertical_edge_done = 1; - for( i = 0; i < 8; i++ ) { - int y = i>>1; - int b_idx= 8 + 4 + 8*y; - int bn_idx= b_idx - 1; - - int mbn_xy = h->mb_field_decoding_flag ? h->left_mb_xy[i>>2] : h->left_mb_xy[i&1]; - - if( IS_INTRA( s->current_picture.mb_type[mb_xy] ) || - IS_INTRA( s->current_picture.mb_type[mbn_xy] ) ) { - bS[i] = 4; - } else if( h->non_zero_count_cache[b_idx] != 0 || - /* FIXME: with 8x8dct + cavlc, should check cbp instead of nnz */ - h->non_zero_count_cache[bn_idx] != 0 ) { - bS[i] = 2; - } else { - int l; - bS[i] = 0; - for( l = 0; l < 1 + (h->slice_type == B_TYPE); l++ ) { - if( ref2frm[h->ref_cache[l][b_idx]+2] != ref2frm[h->ref_cache[l][bn_idx]+2] || - ABS( h->mv_cache[l][b_idx][0] - h->mv_cache[l][bn_idx][0] ) >= 4 || - ABS( h->mv_cache[l][b_idx][1] - h->mv_cache[l][bn_idx][1] ) >= 4 ) { - bS[i] = 1; - break; - } - } + + if( IS_INTRA(mb_type) ) + bS[0] = bS[1] = bS[2] = bS[3] = bS[4] = bS[5] = bS[6] = bS[7] = 4; + else { + for( i = 0; i < 8; i++ ) { + int mbn_xy = MB_FIELD ? left_mb_xy[i>>2] : left_mb_xy[i&1]; + + if( IS_INTRA( s->current_picture.mb_type[mbn_xy] ) ) + bS[i] = 4; + else if( h->non_zero_count_cache[12+8*(i>>1)] != 0 || + /* FIXME: with 8x8dct + cavlc, should check cbp instead of nnz */ + h->non_zero_count[mbn_xy][MB_FIELD ? i&3 : (i>>2)+(mb_y&1)*2] ) + bS[i] = 2; + else + bS[i] = 1; } } - if(bS[0]+bS[1]+bS[2]+bS[3] != 0) { - // Do not use s->qscale as luma quantizer because it has not the same - // value in IPCM macroblocks. - qp[0] = ( s->current_picture.qscale_table[mb_xy] + s->current_picture.qscale_table[h->left_mb_xy[0]] + 1 ) >> 1; - chroma_qp[0] = ( get_chroma_qp( h->pps.chroma_qp_index_offset, s->current_picture.qscale_table[mb_xy] ) + - get_chroma_qp( h->pps.chroma_qp_index_offset, s->current_picture.qscale_table[h->left_mb_xy[0]] ) + 1 ) >> 1; - qp[1] = ( s->current_picture.qscale_table[mb_xy] + s->current_picture.qscale_table[h->left_mb_xy[1]] + 1 ) >> 1; - chroma_qp[1] = ( get_chroma_qp( h->pps.chroma_qp_index_offset, s->current_picture.qscale_table[mb_xy] ) + - get_chroma_qp( h->pps.chroma_qp_index_offset, s->current_picture.qscale_table[h->left_mb_xy[1]] ) + 1 ) >> 1; - /* Filter edge */ - tprintf("filter mb:%d/%d MBAFF, QPy:%d/%d, QPc:%d/%d ls:%d uvls:%d", mb_x, mb_y, qp[0], qp[1], chroma_qp[0], chroma_qp[1], linesize, uvlinesize); - { int i; for (i = 0; i < 8; i++) tprintf(" bS[%d]:%d", i, bS[i]); tprintf("\n"); } - filter_mb_mbaff_edgev ( h, &img_y [0], linesize, bS, qp ); - filter_mb_mbaff_edgecv( h, &img_cb[0], uvlinesize, bS, chroma_qp ); - filter_mb_mbaff_edgecv( h, &img_cr[0], uvlinesize, bS, chroma_qp ); - } + mb_qp = s->current_picture.qscale_table[mb_xy]; + mbn0_qp = s->current_picture.qscale_table[left_mb_xy[0]]; + mbn1_qp = s->current_picture.qscale_table[left_mb_xy[1]]; + qp[0] = ( mb_qp + mbn0_qp + 1 ) >> 1; + chroma_qp[0] = ( get_chroma_qp( h->pps.chroma_qp_index_offset, mb_qp ) + + get_chroma_qp( h->pps.chroma_qp_index_offset, mbn0_qp ) + 1 ) >> 1; + qp[1] = ( mb_qp + mbn1_qp + 1 ) >> 1; + chroma_qp[1] = ( get_chroma_qp( h->pps.chroma_qp_index_offset, mb_qp ) + + get_chroma_qp( h->pps.chroma_qp_index_offset, mbn1_qp ) + 1 ) >> 1; + + /* Filter edge */ + tprintf("filter mb:%d/%d MBAFF, QPy:%d/%d, QPc:%d/%d ls:%d uvls:%d", mb_x, mb_y, qp[0], qp[1], chroma_qp[0], chroma_qp[1], linesize, uvlinesize); + { int i; for (i = 0; i < 8; i++) tprintf(" bS[%d]:%d", i, bS[i]); tprintf("\n"); } + filter_mb_mbaff_edgev ( h, &img_y [0], linesize, bS, qp ); + filter_mb_mbaff_edgecv( h, &img_cb[0], uvlinesize, bS, chroma_qp ); + filter_mb_mbaff_edgecv( h, &img_cr[0], uvlinesize, bS, chroma_qp ); } /* dir : 0 -> vertical edge, 1 -> horizontal edge */ for( dir = 0; dir < 2; dir++ ) { int edge; const int mbm_xy = dir == 0 ? mb_xy -1 : h->top_mb_xy; - const int mb_type = s->current_picture.mb_type[mb_xy]; const int mbm_type = s->current_picture.mb_type[mbm_xy]; int start = h->slice_table[mbm_xy] == 255 ? 1 : 0; - const int edges = ((mb_type & mbm_type) & (MB_TYPE_16x16|MB_TYPE_SKIP)) - == (MB_TYPE_16x16|MB_TYPE_SKIP) ? 1 : 4; + const int edges = (mb_type & (MB_TYPE_16x16|MB_TYPE_SKIP)) + == (MB_TYPE_16x16|MB_TYPE_SKIP) ? 1 : 4; // how often to recheck mv-based bS when iterating between edges const int mask_edge = (mb_type & (MB_TYPE_16x16 | (MB_TYPE_16x8 << dir))) ? 3 : (mb_type & (MB_TYPE_8x16 >> dir)) ? 1 : 0; @@ -6633,78 +7100,68 @@ static void filter_mb( H264Context *h, int mb_x, int mb_y, uint8_t *img_y, uint8 if (h->deblocking_filter==2 && h->slice_table[mbm_xy] != h->slice_table[mb_xy]) start = 1; - /* Calculate bS */ - for( edge = start; edge < edges; edge++ ) { - /* mbn_xy: neighbor macroblock */ - const int mbn_xy = edge > 0 ? mb_xy : mbm_xy; - const int mbn_type = s->current_picture.mb_type[mbn_xy]; + if (FRAME_MBAFF && (dir == 1) && ((mb_y&1) == 0) && start == 0 + && !IS_INTERLACED(mb_type) + && IS_INTERLACED(mbm_type) + ) { + // This is a special case in the norm where the filtering must + // be done twice (one each of the field) even if we are in a + // frame macroblock. + // + static const int nnz_idx[4] = {4,5,6,3}; + unsigned int tmp_linesize = 2 * linesize; + unsigned int tmp_uvlinesize = 2 * uvlinesize; + int mbn_xy = mb_xy - 2 * s->mb_stride; + int qp, chroma_qp; + int i, j; int bS[4]; - int qp; - - if( (edge&1) && IS_8x8DCT(mb_type) ) - continue; - if (h->mb_aff_frame && (dir == 1) && (edge == 0) && ((mb_y & 1) == 0) - && !IS_INTERLACED(mb_type) - && IS_INTERLACED(mbn_type) - ) { - // This is a special case in the norm where the filtering must - // be done twice (one each of the field) even if we are in a - // frame macroblock. - // - unsigned int tmp_linesize = 2 * linesize; - unsigned int tmp_uvlinesize = 2 * uvlinesize; - int mbn_xy = mb_xy - 2 * s->mb_stride; - int qp, chroma_qp; - - // first filtering + for(j=0; j<2; j++, mbn_xy += s->mb_stride){ if( IS_INTRA(mb_type) || IS_INTRA(s->current_picture.mb_type[mbn_xy]) ) { bS[0] = bS[1] = bS[2] = bS[3] = 3; } else { - // TODO - av_log(h->s.avctx, AV_LOG_ERROR, "both non intra (TODO)\n"); + const uint8_t *mbn_nnz = h->non_zero_count[mbn_xy]; + for( i = 0; i < 4; i++ ) { + if( h->non_zero_count_cache[scan8[0]+i] != 0 || + mbn_nnz[nnz_idx[i]] != 0 ) + bS[i] = 2; + else + bS[i] = 1; + } } - /* Filter edge */ // Do not use s->qscale as luma quantizer because it has not the same // value in IPCM macroblocks. qp = ( s->current_picture.qscale_table[mb_xy] + s->current_picture.qscale_table[mbn_xy] + 1 ) >> 1; tprintf("filter mb:%d/%d dir:%d edge:%d, QPy:%d ls:%d uvls:%d", mb_x, mb_y, dir, edge, qp, tmp_linesize, tmp_uvlinesize); { int i; for (i = 0; i < 4; i++) tprintf(" bS[%d]:%d", i, bS[i]); tprintf("\n"); } - filter_mb_edgeh( h, &img_y[0], tmp_linesize, bS, qp ); + filter_mb_edgeh( h, &img_y[j*linesize], tmp_linesize, bS, qp ); chroma_qp = ( h->chroma_qp + get_chroma_qp( h->pps.chroma_qp_index_offset, s->current_picture.qscale_table[mbn_xy] ) + 1 ) >> 1; - filter_mb_edgech( h, &img_cb[0], tmp_uvlinesize, bS, chroma_qp ); - filter_mb_edgech( h, &img_cr[0], tmp_uvlinesize, bS, chroma_qp ); + filter_mb_edgech( h, &img_cb[j*uvlinesize], tmp_uvlinesize, bS, chroma_qp ); + filter_mb_edgech( h, &img_cr[j*uvlinesize], tmp_uvlinesize, bS, chroma_qp ); + } - // second filtering - mbn_xy += s->mb_stride; - if( IS_INTRA(mb_type) || - IS_INTRA(mbn_type) ) { - bS[0] = bS[1] = bS[2] = bS[3] = 3; - } else { - // TODO - av_log(h->s.avctx, AV_LOG_ERROR, "both non intra (TODO)\n"); - } - /* Filter edge */ - // Do not use s->qscale as luma quantizer because it has not the same - // value in IPCM macroblocks. - qp = ( s->current_picture.qscale_table[mb_xy] + s->current_picture.qscale_table[mbn_xy] + 1 ) >> 1; - tprintf("filter mb:%d/%d dir:%d edge:%d, QPy:%d ls:%d uvls:%d", mb_x, mb_y, dir, edge, qp, tmp_linesize, tmp_uvlinesize); - { int i; for (i = 0; i < 4; i++) tprintf(" bS[%d]:%d", i, bS[i]); tprintf("\n"); } - filter_mb_edgeh( h, &img_y[linesize], tmp_linesize, bS, qp ); - chroma_qp = ( h->chroma_qp + - get_chroma_qp( h->pps.chroma_qp_index_offset, s->current_picture.qscale_table[mbn_xy] ) + 1 ) >> 1; - filter_mb_edgech( h, &img_cb[uvlinesize], tmp_uvlinesize, bS, chroma_qp ); - filter_mb_edgech( h, &img_cr[uvlinesize], tmp_uvlinesize, bS, chroma_qp ); + start = 1; + } + + /* Calculate bS */ + for( edge = start; edge < edges; edge++ ) { + /* mbn_xy: neighbor macroblock */ + const int mbn_xy = edge > 0 ? mb_xy : mbm_xy; + const int mbn_type = s->current_picture.mb_type[mbn_xy]; + int bS[4]; + int qp; + + if( (edge&1) && IS_8x8DCT(mb_type) ) continue; - } + if( IS_INTRA(mb_type) || IS_INTRA(mbn_type) ) { int value; if (edge == 0) { if ( (!IS_INTERLACED(mb_type) && !IS_INTERLACED(mbm_type)) - || ((h->mb_aff_frame || (s->picture_structure != PICT_FRAME)) && (dir == 0)) + || ((FRAME_MBAFF || (s->picture_structure != PICT_FRAME)) && (dir == 0)) ) { value = 4; } else { @@ -6722,6 +7179,10 @@ static void filter_mb( H264Context *h, int mb_x, int mb_y, uint8_t *img_y, uint8 bS[0] = bS[1] = bS[2] = bS[3] = 0; mv_done = 1; } + else if( FRAME_MBAFF && IS_INTERLACED(mb_type ^ mbn_type)) { + bS[0] = bS[1] = bS[2] = bS[3] = 1; + mv_done = 1; + } else if( mask_par0 && (edge || (mbn_type & (MB_TYPE_16x16 | (MB_TYPE_8x16 >> dir)))) ) { int b_idx= 8 + 4 + edge * (dir ? 8:1); int bn_idx= b_idx - (dir ? 8:1); @@ -6729,7 +7190,7 @@ static void filter_mb( H264Context *h, int mb_x, int mb_y, uint8_t *img_y, uint8 for( l = 0; !v && l < 1 + (h->slice_type == B_TYPE); l++ ) { v |= ref2frm[h->ref_cache[l][b_idx]+2] != ref2frm[h->ref_cache[l][bn_idx]+2] || ABS( h->mv_cache[l][b_idx][0] - h->mv_cache[l][bn_idx][0] ) >= 4 || - ABS( h->mv_cache[l][b_idx][1] - h->mv_cache[l][bn_idx][1] ) >= 4; + ABS( h->mv_cache[l][b_idx][1] - h->mv_cache[l][bn_idx][1] ) >= mvy_limit; } bS[0] = bS[1] = bS[2] = bS[3] = v; mv_done = 1; @@ -6753,7 +7214,7 @@ static void filter_mb( H264Context *h, int mb_x, int mb_y, uint8_t *img_y, uint8 for( l = 0; l < 1 + (h->slice_type == B_TYPE); l++ ) { if( ref2frm[h->ref_cache[l][b_idx]+2] != ref2frm[h->ref_cache[l][bn_idx]+2] || ABS( h->mv_cache[l][b_idx][0] - h->mv_cache[l][bn_idx][0] ) >= 4 || - ABS( h->mv_cache[l][b_idx][1] - h->mv_cache[l][bn_idx][1] ) >= 4 ) { + ABS( h->mv_cache[l][b_idx][1] - h->mv_cache[l][bn_idx][1] ) >= mvy_limit ) { bS[i] = 1; break; } @@ -6830,8 +7291,7 @@ static int decode_slice(H264Context *h){ if(ret>=0) hl_decode_mb(h); - /* XXX: useless as decode_mb_cabac it doesn't support that ... */ - if( ret >= 0 && h->mb_aff_frame ) { //FIXME optimal? or let mb_decode decode 16x32 ? + if( ret >= 0 && FRAME_MBAFF ) { //FIXME optimal? or let mb_decode decode 16x32 ? s->mb_y++; if(ret>=0) ret = decode_mb_cabac(h); @@ -6842,7 +7302,7 @@ static int decode_slice(H264Context *h){ eos = get_cabac_terminate( &h->cabac ); if( ret < 0 || h->cabac.bytestream > h->cabac.bytestream_end + 1) { - av_log(h->s.avctx, AV_LOG_ERROR, "error while decoding MB %d %d\n", s->mb_x, s->mb_y); + av_log(h->s.avctx, AV_LOG_ERROR, "error while decoding MB %d %d, bytestream (%d)\n", s->mb_x, s->mb_y, h->cabac.bytestream_end - h->cabac.bytestream); ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x, s->mb_y, (AC_ERROR|DC_ERROR|MV_ERROR)&part_mask); return -1; } @@ -6851,7 +7311,7 @@ static int decode_slice(H264Context *h){ s->mb_x = 0; ff_draw_horiz_band(s, 16*s->mb_y, 16); ++s->mb_y; - if(h->mb_aff_frame) { + if(FRAME_MBAFF) { ++s->mb_y; } } @@ -6869,7 +7329,7 @@ static int decode_slice(H264Context *h){ if(ret>=0) hl_decode_mb(h); - if(ret>=0 && h->mb_aff_frame){ //FIXME optimal? or let mb_decode decode 16x32 ? + if(ret>=0 && FRAME_MBAFF){ //FIXME optimal? or let mb_decode decode 16x32 ? s->mb_y++; ret = decode_mb_cavlc(h); @@ -6888,7 +7348,7 @@ static int decode_slice(H264Context *h){ s->mb_x=0; ff_draw_horiz_band(s, 16*s->mb_y, 16); ++s->mb_y; - if(h->mb_aff_frame) { + if(FRAME_MBAFF) { ++s->mb_y; } if(s->mb_y >= s->mb_height){ @@ -6929,7 +7389,7 @@ static int decode_slice(H264Context *h){ hl_decode_mb(h); if(ret<0){ - fprintf(stderr, "error while decoding MB %d %d\n", s->mb_x, s->mb_y); + av_log(s->avctx, AV_LOG_ERROR, "error while decoding MB %d %d\n", s->mb_x, s->mb_y); ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x, s->mb_y, (AC_ERROR|DC_ERROR|MV_ERROR)&part_mask); return -1; @@ -7013,7 +7473,7 @@ static int decode_sei(H264Context *h){ switch(type){ case 5: - if(decode_unregistered_user_data(h, size) < 0); + if(decode_unregistered_user_data(h, size) < 0) return -1; break; default: @@ -7056,7 +7516,7 @@ static inline int decode_vui_parameters(H264Context *h, SPS *sps){ if( aspect_ratio_idc == EXTENDED_SAR ) { sps->sar.num= get_bits(&s->gb, 16); sps->sar.den= get_bits(&s->gb, 16); - }else if(aspect_ratio_idc < 16){ + }else if(aspect_ratio_idc < 14){ sps->sar= pixel_aspect[aspect_ratio_idc]; }else{ av_log(h->s.avctx, AV_LOG_ERROR, "illegal aspect ratio\n"); @@ -7232,6 +7692,13 @@ static inline int decode_seq_parameter_set(H264Context *h){ sps->direct_8x8_inference_flag= get_bits1(&s->gb); +#ifndef ALLOW_INTERLACE + if(sps->mb_aff) + av_log(h->s.avctx, AV_LOG_ERROR, "MBAFF support not included; enable it compilation time\n"); +#endif + if(!sps->direct_8x8_inference_flag && sps->mb_aff) + av_log(h->s.avctx, AV_LOG_ERROR, "MBAFF + !direct_8x8_inference is not implemented\n"); + sps->crop= get_bits1(&s->gb); if(sps->crop){ sps->crop_left = get_ue_golomb(&s->gb); @@ -7330,6 +7797,8 @@ static inline int decode_picture_parameter_set(H264Context *h, int bit_length){ pps->constrained_intra_pred= get_bits1(&s->gb); pps->redundant_pic_cnt_present = get_bits1(&s->gb); + pps->transform_8x8_mode= 0; + h->dequant_coeff_pps= -1; //contents of sps/pps can change even if id doesn't, so reinit memset(pps->scaling_matrix4, 16, 6*16*sizeof(uint8_t)); memset(pps->scaling_matrix8, 16, 2*64*sizeof(uint8_t)); @@ -7402,6 +7871,7 @@ static int find_frame_end(H264Context *h, const uint8_t *buf, int buf_size){ return END_NOT_FOUND; } +#ifdef CONFIG_H264_PARSER static int h264_parse(AVCodecParserContext *s, AVCodecContext *avctx, uint8_t **poutbuf, int *poutbuf_size, @@ -7447,7 +7917,7 @@ static int h264_split(AVCodecContext *avctx, } return 0; } - +#endif /* CONFIG_H264_PARSER */ static int decode_nal_units(H264Context *h, uint8_t *buf, int buf_size){ MpegEncContext * const s = &h->s; @@ -7473,6 +7943,15 @@ static int decode_nal_units(H264Context *h, uint8_t *buf, int buf_size){ nalsize = 0; for(i = 0; i < h->nal_length_size; i++) nalsize = (nalsize << 8) | buf[buf_index++]; + if(nalsize <= 1){ + if(nalsize == 1){ + buf_index++; + continue; + }else{ + av_log(h->s.avctx, AV_LOG_ERROR, "AVC: nal size %d\n", nalsize); + break; + } + } } else { // start code prefix search for(; buf_index + 3 < buf_size; buf_index++){ @@ -7487,7 +7966,8 @@ static int decode_nal_units(H264Context *h, uint8_t *buf, int buf_size){ } ptr= decode_nal(h, buf + buf_index, &dst_length, &consumed, h->is_avc ? nalsize : buf_size - buf_index); - if(ptr[dst_length - 1] == 0) dst_length--; + while(ptr[dst_length - 1] == 0 && dst_length > 1) + dst_length--; bit_length= 8*dst_length - decode_rbsp_trailing(ptr + dst_length - 1); if(s->avctx->debug&FF_DEBUG_STARTCODE){ @@ -7516,6 +7996,7 @@ static int decode_nal_units(H264Context *h, uint8_t *buf, int buf_size){ av_log(h->s.avctx, AV_LOG_ERROR, "decode_slice_header error\n"); break; } + s->current_picture_ptr->key_frame= (h->nal_unit_type == NAL_IDR_SLICE); if(h->redundant_pic_count==0 && s->hurry_up < 5 && (avctx->skip_frame < AVDISCARD_NONREF || h->nal_ref_idc) && (avctx->skip_frame < AVDISCARD_BIDIR || h->slice_type!=B_TYPE) @@ -7585,7 +8066,6 @@ static int decode_nal_units(H264Context *h, uint8_t *buf, int buf_size){ s->current_picture_ptr->qscale_type= FF_QSCALE_TYPE_H264; s->current_picture_ptr->pict_type= s->pict_type; - s->current_picture_ptr->key_frame= s->pict_type == I_TYPE && h->nal_unit_type == NAL_IDR_SLICE; h->prev_frame_num_offset= h->frame_num_offset; h->prev_frame_num= h->frame_num; @@ -7711,12 +8191,7 @@ static int decode_frame(AVCodecContext *avctx, /* Sort B-frames into display order */ Picture *cur = s->current_picture_ptr; Picture *prev = h->delayed_output_pic; - int out_idx = 0; - int pics = 0; - int out_of_order; - int cross_idr = 0; - int dropped_frame = 0; - int i; + int i, pics, cross_idr, out_of_order, out_idx; if(h->sps.bitstream_restriction_flag && s->avctx->has_b_frames < h->sps.num_reorder_frames){ @@ -7724,16 +8199,19 @@ static int decode_frame(AVCodecContext *avctx, s->low_delay = 0; } + pics = 0; while(h->delayed_pic[pics]) pics++; h->delayed_pic[pics++] = cur; if(cur->reference == 0) cur->reference = 1; + cross_idr = 0; for(i=0; h->delayed_pic[i]; i++) if(h->delayed_pic[i]->key_frame || h->delayed_pic[i]->poc==0) cross_idr = 1; out = h->delayed_pic[0]; + out_idx = 0; for(i=1; h->delayed_pic[i] && !h->delayed_pic[i]->key_frame; i++) if(h->delayed_pic[i]->poc < out->poc){ out = h->delayed_pic[i]; @@ -7741,7 +8219,9 @@ static int decode_frame(AVCodecContext *avctx, } out_of_order = !cross_idr && prev && out->poc < prev->poc; - if(prev && pics <= s->avctx->has_b_frames) + if(h->sps.bitstream_restriction_flag && s->avctx->has_b_frames >= h->sps.num_reorder_frames) + { } + else if(prev && pics <= s->avctx->has_b_frames) out = prev; else if((out_of_order && pics-1 == s->avctx->has_b_frames && pics < 15) || (s->low_delay && @@ -7756,12 +8236,11 @@ static int decode_frame(AVCodecContext *avctx, out = prev; if(out_of_order || pics > s->avctx->has_b_frames){ - dropped_frame = (out != h->delayed_pic[out_idx]); for(i=out_idx; h->delayed_pic[i]; i++) h->delayed_pic[i] = h->delayed_pic[i+1]; } - if(prev == out && !dropped_frame) + if(prev == out) *data_size = 0; else *data_size = sizeof(AVFrame); @@ -8009,6 +8488,7 @@ AVCodec h264_decoder = { .flush= flush_dpb, }; +#ifdef CONFIG_H264_PARSER AVCodecParser h264_parser = { { CODEC_ID_H264 }, sizeof(H264Context), @@ -8017,5 +8497,6 @@ AVCodecParser h264_parser = { ff_parse_close, h264_split, }; +#endif #include "svq3.c" diff --git a/src/libffmpeg/libavcodec/h264data.h b/src/libffmpeg/libavcodec/h264data.h index 3132102df..1dd9dafe5 100644 --- a/src/libffmpeg/libavcodec/h264data.h +++ b/src/libffmpeg/libavcodec/h264data.h @@ -345,6 +345,44 @@ static const uint8_t zigzag_scan8x8_cavlc[64]={ 5+5*8, 6+5*8, 6+6*8, 7+7*8, }; +static const uint8_t field_scan8x8[64]={ + 0+0*8, 0+1*8, 0+2*8, 1+0*8, + 1+1*8, 0+3*8, 0+4*8, 1+2*8, + 2+0*8, 1+3*8, 0+5*8, 0+6*8, + 0+7*8, 1+4*8, 2+1*8, 3+0*8, + 2+2*8, 1+5*8, 1+6*8, 1+7*8, + 2+3*8, 3+1*8, 4+0*8, 3+2*8, + 2+4*8, 2+5*8, 2+6*8, 2+7*8, + 3+3*8, 4+1*8, 5+0*8, 4+2*8, + 3+4*8, 3+5*8, 3+6*8, 3+7*8, + 4+3*8, 5+1*8, 6+0*8, 5+2*8, + 4+4*8, 4+5*8, 4+6*8, 4+7*8, + 5+3*8, 6+1*8, 6+2*8, 5+4*8, + 5+5*8, 5+6*8, 5+7*8, 6+3*8, + 7+0*8, 7+1*8, 6+4*8, 6+5*8, + 6+6*8, 6+7*8, 7+2*8, 7+3*8, + 7+4*8, 7+5*8, 7+6*8, 7+7*8, +}; + +static const uint8_t field_scan8x8_cavlc[64]={ + 0+0*8, 1+1*8, 2+0*8, 0+7*8, + 2+2*8, 2+3*8, 2+4*8, 3+3*8, + 3+4*8, 4+3*8, 4+4*8, 5+3*8, + 5+5*8, 7+0*8, 6+6*8, 7+4*8, + 0+1*8, 0+3*8, 1+3*8, 1+4*8, + 1+5*8, 3+1*8, 2+5*8, 4+1*8, + 3+5*8, 5+1*8, 4+5*8, 6+1*8, + 5+6*8, 7+1*8, 6+7*8, 7+5*8, + 0+2*8, 0+4*8, 0+5*8, 2+1*8, + 1+6*8, 4+0*8, 2+6*8, 5+0*8, + 3+6*8, 6+0*8, 4+6*8, 6+2*8, + 5+7*8, 6+4*8, 7+2*8, 7+6*8, + 1+0*8, 1+2*8, 0+6*8, 3+0*8, + 1+7*8, 3+2*8, 2+7*8, 4+2*8, + 3+7*8, 5+2*8, 4+7*8, 5+4*8, + 6+3*8, 6+5*8, 7+3*8, 7+7*8, +}; + #define MB_TYPE_REF0 MB_TYPE_ACPRED //dirty but it fits in 16bit #define MB_TYPE_8x8DCT 0x01000000 #define IS_REF0(a) ((a)&MB_TYPE_REF0) diff --git a/src/libffmpeg/libavcodec/h264idct.c b/src/libffmpeg/libavcodec/h264idct.c index a4ddf1d51..3e44385d5 100755 --- a/src/libffmpeg/libavcodec/h264idct.c +++ b/src/libffmpeg/libavcodec/h264idct.c @@ -139,3 +139,28 @@ void ff_h264_idct8_add_c(uint8_t *dst, DCTELEM *block, int stride){ dst[i + 7*stride] = cm[ dst[i + 7*stride] + ((b0 - b7) >> 6) ]; } } + +// assumes all AC coefs are 0 +void ff_h264_idct_dc_add_c(uint8_t *dst, DCTELEM *block, int stride){ + int i, j; + uint8_t *cm = cropTbl + MAX_NEG_CROP; + int dc = (block[0] + 32) >> 6; + for( j = 0; j < 4; j++ ) + { + for( i = 0; i < 4; i++ ) + dst[i] = cm[ dst[i] + dc ]; + dst += stride; + } +} + +void ff_h264_idct8_dc_add_c(uint8_t *dst, DCTELEM *block, int stride){ + int i, j; + uint8_t *cm = cropTbl + MAX_NEG_CROP; + int dc = (block[0] + 32) >> 6; + for( j = 0; j < 8; j++ ) + { + for( i = 0; i < 8; i++ ) + dst[i] = cm[ dst[i] + dc ]; + dst += stride; + } +} diff --git a/src/libffmpeg/libavcodec/huffyuv.c b/src/libffmpeg/libavcodec/huffyuv.c index dc9e123ff..d65943fcc 100644 --- a/src/libffmpeg/libavcodec/huffyuv.c +++ b/src/libffmpeg/libavcodec/huffyuv.c @@ -343,7 +343,7 @@ static int read_old_huffman_tables(HYuvContext *s){ return 0; #else - fprintf(stderr, "v1 huffyuv is not supported \n"); + av_log(s->avctx, AV_LOG_DEBUG, "v1 huffyuv is not supported \n"); return -1; #endif } @@ -541,9 +541,6 @@ static int encode_init(AVCodecContext *avctx) } if(s->interlaced != ( s->height > 288 )) av_log(avctx, AV_LOG_INFO, "using huffyuv 2.2.0 or newer interlacing flag\n"); - }else if(avctx->strict_std_compliance>FF_COMPLIANCE_EXPERIMENTAL){ - av_log(avctx, AV_LOG_ERROR, "This codec is under development; files encoded with it may not be decodable with future versions!!! Set vstrict=-2 / -strict -2 to use it anyway.\n"); - return -1; } ((uint8_t*)avctx->extradata)[0]= s->predictor; @@ -808,6 +805,9 @@ static int decode_frame(AVCodecContext *avctx, void *data, int *data_size, uint8 return -1; } + if((unsigned)(buf_size-table_size) >= INT_MAX/8) + return -1; + init_get_bits(&s->gb, s->bitstream_buffer+table_size, (buf_size-table_size)*8); fake_ystride= s->interlaced ? p->linesize[0]*2 : p->linesize[0]; @@ -1012,7 +1012,7 @@ static int decode_frame(AVCodecContext *avctx, void *data, int *data_size, uint8 *picture= *p; *data_size = sizeof(AVFrame); - return (get_bits_count(&s->gb)+31)/32*4; + return (get_bits_count(&s->gb)+31)/32*4 + table_size; } static int common_end(HYuvContext *s){ diff --git a/src/libffmpeg/libavcodec/i386/cputest.c b/src/libffmpeg/libavcodec/i386/cputest.c index 64656c65a..a66bdbe98 100644 --- a/src/libffmpeg/libavcodec/i386/cputest.c +++ b/src/libffmpeg/libavcodec/i386/cputest.c @@ -64,6 +64,8 @@ int mm_support(void) rval |= MM_MMXEXT | MM_SSE; if (std_caps & (1<<26)) rval |= MM_SSE2; + if (ecx & 1) + rval |= MM_SSE3; } cpuid(0x80000000, max_ext_level, ebx, ecx, edx); @@ -76,38 +78,10 @@ int mm_support(void) rval |= MM_3DNOWEXT; if (ext_caps & (1<<23)) rval |= MM_MMX; - } - - cpuid(0, eax, ebx, ecx, edx); - if ( ebx == 0x68747541 && - edx == 0x69746e65 && - ecx == 0x444d4163) { - /* AMD */ - if(ext_caps & (1<<22)) - rval |= MM_MMXEXT; - } else if (ebx == 0x746e6543 && - edx == 0x48727561 && - ecx == 0x736c7561) { /* "CentaurHauls" */ - /* VIA C3 */ - if(ext_caps & (1<<24)) - rval |= MM_MMXEXT; - } else if (ebx == 0x69727943 && - edx == 0x736e4978 && - ecx == 0x64616574) { - /* Cyrix Section */ - /* See if extended CPUID level 80000001 is supported */ - /* The value of CPUID/80000001 for the 6x86MX is undefined - according to the Cyrix CPU Detection Guide (Preliminary - Rev. 1.01 table 1), so we'll check the value of eax for - CPUID/0 to see if standard CPUID level 2 is supported. - According to the table, the only CPU which supports level - 2 is also the only one which supports extended CPUID levels. - */ - if (eax < 2) - return rval; - if (ext_caps & (1<<24)) + if (ext_caps & (1<<22)) rval |= MM_MMXEXT; } + #if 0 av_log(NULL, AV_LOG_DEBUG, "%s%s%s%s%s%s\n", (rval&MM_MMX) ? "MMX ":"", diff --git a/src/libffmpeg/libavcodec/i386/dsputil_h264_template_mmx.c b/src/libffmpeg/libavcodec/i386/dsputil_h264_template_mmx.c index d52938ccf..b49c880a7 100644 --- a/src/libffmpeg/libavcodec/i386/dsputil_h264_template_mmx.c +++ b/src/libffmpeg/libavcodec/i386/dsputil_h264_template_mmx.c @@ -37,112 +37,56 @@ static void H264_CHROMA_MC8_TMPL(uint8_t *dst/*align 8*/, uint8_t *src/*align 1* assert(x<8 && y<8 && x>=0 && y>=0); - if(y==0) + if(y==0 || x==0) { - /* horizontal filter only */ - asm volatile("movd %0, %%mm5\n\t" - "punpcklwd %%mm5, %%mm5\n\t" - "punpckldq %%mm5, %%mm5\n\t" /* mm5 = B = x */ - "movq %1, %%mm4\n\t" - "pxor %%mm7, %%mm7\n\t" - "psubw %%mm5, %%mm4\n\t" /* mm4 = A = 8-x */ - : : "rm" (x), "m" (ff_pw_8)); + /* 1 dimensional filter only */ + const int dxy = x ? 1 : stride; + + asm volatile( + "movd %0, %%mm5\n\t" + "movq %1, %%mm4\n\t" + "punpcklwd %%mm5, %%mm5\n\t" + "punpckldq %%mm5, %%mm5\n\t" /* mm5 = B = x */ + "movq %%mm4, %%mm6\n\t" + "pxor %%mm7, %%mm7\n\t" + "psubw %%mm5, %%mm4\n\t" /* mm4 = A = 8-x */ + "psrlw $1, %%mm6\n\t" /* mm6 = 4 */ + :: "rm"(x+y), "m"(ff_pw_8)); for(i=0; i<h; i++) { asm volatile( /* mm0 = src[0..7], mm1 = src[1..8] */ "movq %0, %%mm0\n\t" - "movq %1, %%mm1\n\t" - : : "m" (src[0]), "m" (src[1])); + "movq %1, %%mm2\n\t" + :: "m"(src[0]), "m"(src[dxy])); asm volatile( - /* [mm2,mm3] = A * src[0..7] */ - "movq %%mm0, %%mm2\n\t" - "punpcklbw %%mm7, %%mm2\n\t" - "pmullw %%mm4, %%mm2\n\t" - "movq %%mm0, %%mm3\n\t" - "punpckhbw %%mm7, %%mm3\n\t" - "pmullw %%mm4, %%mm3\n\t" - - /* [mm2,mm3] += B * src[1..8] */ - "movq %%mm1, %%mm0\n\t" + /* [mm0,mm1] = A * src[0..7] */ + /* [mm2,mm3] = B * src[1..8] */ + "movq %%mm0, %%mm1\n\t" + "movq %%mm2, %%mm3\n\t" "punpcklbw %%mm7, %%mm0\n\t" - "pmullw %%mm5, %%mm0\n\t" "punpckhbw %%mm7, %%mm1\n\t" - "pmullw %%mm5, %%mm1\n\t" - "paddw %%mm0, %%mm2\n\t" - "paddw %%mm1, %%mm3\n\t" - - /* dst[0..7] = pack(([mm2,mm3] + 32) >> 6) */ - "paddw %1, %%mm2\n\t" - "paddw %1, %%mm3\n\t" - "psrlw $3, %%mm2\n\t" - "psrlw $3, %%mm3\n\t" - "packuswb %%mm3, %%mm2\n\t" - H264_CHROMA_OP(%0, %%mm2) - "movq %%mm2, %0\n\t" - : "=m" (dst[0]) : "m" (ff_pw_4)); - - src += stride; - dst += stride; - } - return; - } - - if(x==0) - { - /* vertical filter only */ - asm volatile("movd %0, %%mm6\n\t" - "punpcklwd %%mm6, %%mm6\n\t" - "punpckldq %%mm6, %%mm6\n\t" /* mm6 = C = y */ - "movq %1, %%mm4\n\t" - "pxor %%mm7, %%mm7\n\t" - "psubw %%mm6, %%mm4\n\t" /* mm4 = A = 8-y */ - : : "rm" (y), "m" (ff_pw_8)); - - asm volatile( - /* mm0 = src[0..7] */ - "movq %0, %%mm0\n\t" - : : "m" (src[0])); - - for(i=0; i<h; i++) { - asm volatile( - /* [mm2,mm3] = A * src[0..7] */ - "movq %mm0, %mm2\n\t" - "punpcklbw %mm7, %mm2\n\t" - "pmullw %mm4, %mm2\n\t" - "movq %mm0, %mm3\n\t" - "punpckhbw %mm7, %mm3\n\t" - "pmullw %mm4, %mm3\n\t"); + "punpcklbw %%mm7, %%mm2\n\t" + "punpckhbw %%mm7, %%mm3\n\t" + "pmullw %%mm4, %%mm0\n\t" + "pmullw %%mm4, %%mm1\n\t" + "pmullw %%mm5, %%mm2\n\t" + "pmullw %%mm5, %%mm3\n\t" + + /* dst[0..7] = (A * src[0..7] + B * src[1..8] + 4) >> 3 */ + "paddw %%mm6, %%mm0\n\t" + "paddw %%mm6, %%mm1\n\t" + "paddw %%mm2, %%mm0\n\t" + "paddw %%mm3, %%mm1\n\t" + "psrlw $3, %%mm0\n\t" + "psrlw $3, %%mm1\n\t" + "packuswb %%mm1, %%mm0\n\t" + H264_CHROMA_OP(%0, %%mm0) + "movq %%mm0, %0\n\t" + : "=m" (dst[0])); src += stride; - asm volatile( - /* mm0 = src[0..7] */ - "movq %0, %%mm0\n\t" - : : "m" (src[0])); - - asm volatile( - /* [mm2,mm3] += C * src[0..7] */ - "movq %mm0, %mm1\n\t" - "punpcklbw %mm7, %mm1\n\t" - "pmullw %mm6, %mm1\n\t" - "paddw %mm1, %mm2\n\t" - "movq %mm0, %mm5\n\t" - "punpckhbw %mm7, %mm5\n\t" - "pmullw %mm6, %mm5\n\t" - "paddw %mm5, %mm3\n\t"); - - asm volatile( - /* dst[0..7] = pack(([mm2,mm3] + 32) >> 6) */ - "paddw %1, %%mm2\n\t" - "paddw %1, %%mm3\n\t" - "psrlw $3, %%mm2\n\t" - "psrlw $3, %%mm3\n\t" - "packuswb %%mm3, %%mm2\n\t" - H264_CHROMA_OP(%0, %%mm2) - "movq %%mm2, %0\n\t" - : "=m" (dst[0]) : "m" (ff_pw_4)); - dst += stride; } return; @@ -177,57 +121,53 @@ static void H264_CHROMA_MC8_TMPL(uint8_t *dst/*align 8*/, uint8_t *src/*align 1* : : "m" (src[0]), "m" (src[1])); for(i=0; i<h; i++) { + src += stride; + asm volatile( - /* [mm2,mm3] = A * src[0..7] */ + /* mm2 = A * src[0..3] + B * src[1..4] */ + /* mm3 = A * src[4..7] + B * src[5..8] */ "movq %%mm0, %%mm2\n\t" + "movq %%mm1, %%mm3\n\t" + "punpckhbw %%mm7, %%mm0\n\t" + "punpcklbw %%mm7, %%mm1\n\t" "punpcklbw %%mm7, %%mm2\n\t" - "pmullw %0, %%mm2\n\t" - "movq %%mm0, %%mm3\n\t" "punpckhbw %%mm7, %%mm3\n\t" - "pmullw %0, %%mm3\n\t" + "pmullw %0, %%mm0\n\t" + "pmullw %0, %%mm2\n\t" + "pmullw %%mm5, %%mm1\n\t" + "pmullw %%mm5, %%mm3\n\t" + "paddw %%mm1, %%mm2\n\t" + "paddw %%mm0, %%mm3\n\t" + : : "m" (AA)); - /* [mm2,mm3] += B * src[1..8] */ - "movq %%mm1, %%mm0\n\t" + asm volatile( + /* [mm2,mm3] += C * src[0..7] */ + "movq %0, %%mm0\n\t" + "movq %%mm0, %%mm1\n\t" "punpcklbw %%mm7, %%mm0\n\t" - "pmullw %%mm5, %%mm0\n\t" "punpckhbw %%mm7, %%mm1\n\t" - "pmullw %%mm5, %%mm1\n\t" + "pmullw %%mm6, %%mm0\n\t" + "pmullw %%mm6, %%mm1\n\t" "paddw %%mm0, %%mm2\n\t" "paddw %%mm1, %%mm3\n\t" - : : "m" (AA)); + : : "m" (src[0])); - src += stride; asm volatile( - /* mm0 = src[0..7], mm1 = src[1..8] */ - "movq %0, %%mm0\n\t" + /* [mm2,mm3] += D * src[1..8] */ "movq %1, %%mm1\n\t" - : : "m" (src[0]), "m" (src[1])); - - asm volatile( - /* [mm2,mm3] += C * src[0..7] */ - "movq %mm0, %mm4\n\t" - "punpcklbw %mm7, %mm4\n\t" - "pmullw %mm6, %mm4\n\t" - "paddw %mm4, %mm2\n\t" - "movq %mm0, %mm4\n\t" - "punpckhbw %mm7, %mm4\n\t" - "pmullw %mm6, %mm4\n\t" - "paddw %mm4, %mm3\n\t"); - - asm volatile( - /* [mm2,mm3] += D * src[1..8] */ - "movq %%mm1, %%mm4\n\t" - "punpcklbw %%mm7, %%mm4\n\t" - "pmullw %0, %%mm4\n\t" - "paddw %%mm4, %%mm2\n\t" + "movq %%mm1, %%mm0\n\t" "movq %%mm1, %%mm4\n\t" + "punpcklbw %%mm7, %%mm0\n\t" "punpckhbw %%mm7, %%mm4\n\t" - "pmullw %0, %%mm4\n\t" + "pmullw %2, %%mm0\n\t" + "pmullw %2, %%mm4\n\t" + "paddw %%mm0, %%mm2\n\t" "paddw %%mm4, %%mm3\n\t" - : : "m" (DD)); + "movq %0, %%mm0\n\t" + : : "m" (src[0]), "m" (src[1]), "m" (DD)); asm volatile( - /* dst[0..7] = pack(([mm2,mm3] + 32) >> 6) */ + /* dst[0..7] = ([mm2,mm3] + 32) >> 6 */ "paddw %1, %%mm2\n\t" "paddw %1, %%mm3\n\t" "psrlw $6, %%mm2\n\t" @@ -240,7 +180,7 @@ static void H264_CHROMA_MC8_TMPL(uint8_t *dst/*align 8*/, uint8_t *src/*align 1* } } -static void H264_CHROMA_MC4_TMPL(uint8_t *dst/*align 8*/, uint8_t *src/*align 1*/, int stride, int h, int x, int y) +static void H264_CHROMA_MC4_TMPL(uint8_t *dst/*align 4*/, uint8_t *src/*align 1*/, int stride, int h, int x, int y) { DECLARE_ALIGNED_8(uint64_t, AA); DECLARE_ALIGNED_8(uint64_t, DD); @@ -319,3 +259,66 @@ static void H264_CHROMA_MC4_TMPL(uint8_t *dst/*align 8*/, uint8_t *src/*align 1* dst += stride; } } + +#ifdef H264_CHROMA_MC2_TMPL +static void H264_CHROMA_MC2_TMPL(uint8_t *dst/*align 2*/, uint8_t *src/*align 1*/, int stride, int h, int x, int y) +{ + int CD=((1<<16)-1)*x*y + 8*y; + int AB=((8<<16)-8)*x + 64 - CD; + int i; + + asm volatile( + /* mm5 = {A,B,A,B} */ + /* mm6 = {C,D,C,D} */ + "movd %0, %%mm5\n\t" + "movd %1, %%mm6\n\t" + "punpckldq %%mm5, %%mm5\n\t" + "punpckldq %%mm6, %%mm6\n\t" + "pxor %%mm7, %%mm7\n\t" + :: "r"(AB), "r"(CD)); + + asm volatile( + /* mm0 = src[0,1,1,2] */ + "movd %0, %%mm0\n\t" + "punpcklbw %%mm7, %%mm0\n\t" + "pshufw $0x94, %%mm0, %%mm0\n\t" + :: "m"(src[0])); + + for(i=0; i<h; i++) { + asm volatile( + /* mm1 = A * src[0,1] + B * src[1,2] */ + "movq %%mm0, %%mm1\n\t" + "pmaddwd %%mm5, %%mm1\n\t" + ::); + + src += stride; + asm volatile( + /* mm0 = src[0,1,1,2] */ + "movd %0, %%mm0\n\t" + "punpcklbw %%mm7, %%mm0\n\t" + "pshufw $0x94, %%mm0, %%mm0\n\t" + :: "m"(src[0])); + + asm volatile( + /* mm1 += C * src[0,1] + D * src[1,2] */ + "movq %%mm0, %%mm2\n\t" + "pmaddwd %%mm6, %%mm2\n\t" + "paddw %%mm2, %%mm1\n\t" + ::); + + asm volatile( + /* dst[0,1] = pack((mm1 + 32) >> 6) */ + "paddw %1, %%mm1\n\t" + "psrlw $6, %%mm1\n\t" + "packssdw %%mm7, %%mm1\n\t" + "packuswb %%mm7, %%mm1\n\t" + /* writes garbage to the right of dst. + * ok because partitions are processed from left to right. */ + H264_CHROMA_OP4(%0, %%mm1, %%mm3) + "movd %%mm1, %0\n\t" + : "=m" (dst[0]) : "m" (ff_pw_32)); + dst += stride; + } +} +#endif + diff --git a/src/libffmpeg/libavcodec/i386/dsputil_mmx.c b/src/libffmpeg/libavcodec/i386/dsputil_mmx.c index 7d69859a6..ec6b2ad1a 100644 --- a/src/libffmpeg/libavcodec/i386/dsputil_mmx.c +++ b/src/libffmpeg/libavcodec/i386/dsputil_mmx.c @@ -23,6 +23,7 @@ #include "../dsputil.h" #include "../simple_idct.h" #include "../mpegvideo.h" +#include "x86_cpu.h" #include "mmx.h" //#undef NDEBUG @@ -186,6 +187,11 @@ static const uint64_t ff_pb_FC attribute_used __attribute__ ((aligned(8))) = 0xF #undef DEF #undef PAVGB +#define SBUTTERFLY(a,b,t,n)\ + "movq " #a ", " #t " \n\t" /* abcd */\ + "punpckl" #n " " #b ", " #a " \n\t" /* aebf */\ + "punpckh" #n " " #b ", " #t " \n\t" /* cgdh */\ + /***********************************/ /* standard MMX */ @@ -1522,11 +1528,6 @@ static void sub_hfyu_median_prediction_mmx2(uint8_t *dst, uint8_t *src1, uint8_t "pmaxsw " #z ", " #a " \n\t"\ "paddusw " #a ", " #sum " \n\t" -#define SBUTTERFLY(a,b,t,n)\ - "movq " #a ", " #t " \n\t" /* abcd */\ - "punpckl" #n " " #b ", " #a " \n\t" /* aebf */\ - "punpckh" #n " " #b ", " #t " \n\t" /* cgdh */\ - #define TRANSPOSE4(a,b,c,d,t)\ SBUTTERFLY(a,b,t,wd) /* a=aebf t=cgdh */\ SBUTTERFLY(c,d,b,wd) /* c=imjn b=kolp */\ @@ -2403,6 +2404,124 @@ static void just_return() { return; } c->put_no_rnd_ ## postfix1 = put_no_rnd_ ## postfix2;\ c->avg_ ## postfix1 = avg_ ## postfix2; +static void gmc_mmx(uint8_t *dst, uint8_t *src, int stride, int h, int ox, int oy, + int dxx, int dxy, int dyx, int dyy, int shift, int r, int width, int height){ + const int w = 8; + const int ix = ox>>(16+shift); + const int iy = oy>>(16+shift); + const int oxs = ox>>4; + const int oys = oy>>4; + const int dxxs = dxx>>4; + const int dxys = dxy>>4; + const int dyxs = dyx>>4; + const int dyys = dyy>>4; + const uint16_t r4[4] = {r,r,r,r}; + const uint16_t dxy4[4] = {dxys,dxys,dxys,dxys}; + const uint16_t dyy4[4] = {dyys,dyys,dyys,dyys}; + const uint64_t shift2 = 2*shift; + uint8_t edge_buf[(h+1)*stride]; + int x, y; + + const int dxw = (dxx-(1<<(16+shift)))*(w-1); + const int dyh = (dyy-(1<<(16+shift)))*(h-1); + const int dxh = dxy*(h-1); + const int dyw = dyx*(w-1); + if( // non-constant fullpel offset (3% of blocks) + (ox^(ox+dxw) | ox^(ox+dxh) | ox^(ox+dxw+dxh) | + oy^(oy+dyw) | oy^(oy+dyh) | oy^(oy+dyw+dyh)) >> (16+shift) + // uses more than 16 bits of subpel mv (only at huge resolution) + || (dxx|dxy|dyx|dyy)&15 ) + { + //FIXME could still use mmx for some of the rows + ff_gmc_c(dst, src, stride, h, ox, oy, dxx, dxy, dyx, dyy, shift, r, width, height); + return; + } + + src += ix + iy*stride; + if( (unsigned)ix >= width-w || + (unsigned)iy >= height-h ) + { + ff_emulated_edge_mc(edge_buf, src, stride, w+1, h+1, ix, iy, width, height); + src = edge_buf; + } + + asm volatile( + "movd %0, %%mm6 \n\t" + "pxor %%mm7, %%mm7 \n\t" + "punpcklwd %%mm6, %%mm6 \n\t" + "punpcklwd %%mm6, %%mm6 \n\t" + :: "r"(1<<shift) + ); + + for(x=0; x<w; x+=4){ + uint16_t dx4[4] = { oxs - dxys + dxxs*(x+0), + oxs - dxys + dxxs*(x+1), + oxs - dxys + dxxs*(x+2), + oxs - dxys + dxxs*(x+3) }; + uint16_t dy4[4] = { oys - dyys + dyxs*(x+0), + oys - dyys + dyxs*(x+1), + oys - dyys + dyxs*(x+2), + oys - dyys + dyxs*(x+3) }; + + for(y=0; y<h; y++){ + asm volatile( + "movq %0, %%mm4 \n\t" + "movq %1, %%mm5 \n\t" + "paddw %2, %%mm4 \n\t" + "paddw %3, %%mm5 \n\t" + "movq %%mm4, %0 \n\t" + "movq %%mm5, %1 \n\t" + "psrlw $12, %%mm4 \n\t" + "psrlw $12, %%mm5 \n\t" + : "+m"(*dx4), "+m"(*dy4) + : "m"(*dxy4), "m"(*dyy4) + ); + + asm volatile( + "movq %%mm6, %%mm2 \n\t" + "movq %%mm6, %%mm1 \n\t" + "psubw %%mm4, %%mm2 \n\t" + "psubw %%mm5, %%mm1 \n\t" + "movq %%mm2, %%mm0 \n\t" + "movq %%mm4, %%mm3 \n\t" + "pmullw %%mm1, %%mm0 \n\t" // (s-dx)*(s-dy) + "pmullw %%mm5, %%mm3 \n\t" // dx*dy + "pmullw %%mm5, %%mm2 \n\t" // (s-dx)*dy + "pmullw %%mm4, %%mm1 \n\t" // dx*(s-dy) + + "movd %4, %%mm5 \n\t" + "movd %3, %%mm4 \n\t" + "punpcklbw %%mm7, %%mm5 \n\t" + "punpcklbw %%mm7, %%mm4 \n\t" + "pmullw %%mm5, %%mm3 \n\t" // src[1,1] * dx*dy + "pmullw %%mm4, %%mm2 \n\t" // src[0,1] * (s-dx)*dy + + "movd %2, %%mm5 \n\t" + "movd %1, %%mm4 \n\t" + "punpcklbw %%mm7, %%mm5 \n\t" + "punpcklbw %%mm7, %%mm4 \n\t" + "pmullw %%mm5, %%mm1 \n\t" // src[1,0] * dx*(s-dy) + "pmullw %%mm4, %%mm0 \n\t" // src[0,0] * (s-dx)*(s-dy) + "paddw %5, %%mm1 \n\t" + "paddw %%mm3, %%mm2 \n\t" + "paddw %%mm1, %%mm0 \n\t" + "paddw %%mm2, %%mm0 \n\t" + + "psrlw %6, %%mm0 \n\t" + "packuswb %%mm0, %%mm0 \n\t" + "movd %%mm0, %0 \n\t" + + : "=m"(dst[x+y*stride]) + : "m"(src[0]), "m"(src[1]), + "m"(src[stride]), "m"(src[stride+1]), + "m"(*r4), "m"(shift2) + ); + src += stride; + } + src += 4-h*stride; + } +} + static int try_8x8basis_mmx(int16_t rem[64], int16_t weight[64], int16_t basis[64], int scale){ long i=0; @@ -2489,8 +2608,36 @@ static void add_8x8basis_mmx(int16_t rem[64], int16_t basis[64], int scale){ } } +#define PREFETCH(name, op) \ +void name(void *mem, int stride, int h){\ + const uint8_t *p= mem;\ + do{\ + asm volatile(#op" %0" :: "m"(*p));\ + p+= stride;\ + }while(--h);\ +} +PREFETCH(prefetch_mmx2, prefetcht0) +PREFETCH(prefetch_3dnow, prefetch) +#undef PREFETCH + #include "h264dsp_mmx.c" +/* AVS specific */ +void ff_cavsdsp_init_mmx2(DSPContext* c, AVCodecContext *avctx); + +void ff_put_cavs_qpel8_mc00_mmx2(uint8_t *dst, uint8_t *src, int stride) { + put_pixels8_mmx(dst, src, stride, 8); +} +void ff_avg_cavs_qpel8_mc00_mmx2(uint8_t *dst, uint8_t *src, int stride) { + avg_pixels8_mmx(dst, src, stride, 8); +} +void ff_put_cavs_qpel16_mc00_mmx2(uint8_t *dst, uint8_t *src, int stride) { + put_pixels16_mmx(dst, src, stride, 16); +} +void ff_avg_cavs_qpel16_mc00_mmx2(uint8_t *dst, uint8_t *src, int stride) { + avg_pixels16_mmx(dst, src, stride, 16); +} + /* external functions, from idct_mmx.c */ void ff_mmx_idct(DCTELEM *block); void ff_mmxext_idct(DCTELEM *block); @@ -2564,6 +2711,17 @@ static void ff_idct_xvid_mmx2_add(uint8_t *dest, int line_size, DCTELEM *block) } #endif +#ifdef CONFIG_SNOW_ENCODER +extern void ff_snow_horizontal_compose97i_sse2(DWTELEM *b, int width); +extern void ff_snow_horizontal_compose97i_mmx(DWTELEM *b, int width); +extern void ff_snow_vertical_compose97i_sse2(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, DWTELEM *b3, DWTELEM *b4, DWTELEM *b5, int width); +extern void ff_snow_vertical_compose97i_mmx(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, DWTELEM *b3, DWTELEM *b4, DWTELEM *b5, int width); +extern void ff_snow_inner_add_yblock_sse2(uint8_t *obmc, const int obmc_stride, uint8_t * * block, int b_w, int b_h, + int src_x, int src_y, int src_stride, slice_buffer * sb, int add, uint8_t * dst8); +extern void ff_snow_inner_add_yblock_mmx(uint8_t *obmc, const int obmc_stride, uint8_t * * block, int b_w, int b_h, + int src_x, int src_y, int src_stride, slice_buffer * sb, int add, uint8_t * dst8); +#endif + void dsputil_init_mmx(DSPContext* c, AVCodecContext *avctx) { mm_flags = mm_support(); @@ -2622,6 +2780,7 @@ void dsputil_init_mmx(DSPContext* c, AVCodecContext *avctx) c->idct = ff_mmx_idct; } c->idct_permutation_type= FF_LIBMPEG2_IDCT_PERM; +#if 0 }else if(idct_algo==FF_IDCT_VP3){ if(mm_flags & MM_SSE2){ c->idct_put= ff_vp3_idct_put_sse2; @@ -2635,6 +2794,9 @@ void dsputil_init_mmx(DSPContext* c, AVCodecContext *avctx) c->idct = ff_vp3_idct_mmx; c->idct_permutation_type= FF_PARTTRANS_IDCT_PERM; } +#endif + }else if(idct_algo==FF_IDCT_CAVS){ + c->idct_permutation_type= FF_TRANSPOSE_IDCT_PERM; #ifdef CONFIG_GPL }else if(idct_algo==FF_IDCT_XVIDMMX){ if(mm_flags & MM_MMXEXT){ @@ -2702,6 +2864,8 @@ void dsputil_init_mmx(DSPContext* c, AVCodecContext *avctx) c->avg_no_rnd_pixels_tab[1][2] = avg_no_rnd_pixels8_y2_mmx; c->avg_no_rnd_pixels_tab[1][3] = avg_no_rnd_pixels8_xy2_mmx; + c->gmc= gmc_mmx; + c->add_bytes= add_bytes_mmx; #ifdef CONFIG_ENCODERS c->diff_bytes= diff_bytes_mmx; @@ -2732,7 +2896,14 @@ void dsputil_init_mmx(DSPContext* c, AVCodecContext *avctx) c->put_h264_chroma_pixels_tab[0]= put_h264_chroma_mc8_mmx; c->put_h264_chroma_pixels_tab[1]= put_h264_chroma_mc4_mmx; + c->h264_idct_dc_add= + c->h264_idct_add= ff_h264_idct_add_mmx; + c->h264_idct8_dc_add= + c->h264_idct8_add= ff_h264_idct8_add_mmx; + if (mm_flags & MM_MMXEXT) { + c->prefetch = prefetch_mmx2; + c->put_pixels_tab[0][1] = put_pixels16_x2_mmx2; c->put_pixels_tab[0][2] = put_pixels16_y2_mmx2; @@ -2753,7 +2924,8 @@ void dsputil_init_mmx(DSPContext* c, AVCodecContext *avctx) c->vsad[4]= vsad_intra16_mmx2; #endif //CONFIG_ENCODERS - c->h264_idct_add= ff_h264_idct_add_mmx2; + c->h264_idct_dc_add= ff_h264_idct_dc_add_mmx2; + c->h264_idct8_dc_add= ff_h264_idct8_dc_add_mmx2; if(!(avctx->flags & CODEC_FLAG_BITEXACT)){ c->put_no_rnd_pixels_tab[0][1] = put_no_rnd_pixels16_x2_mmx2; @@ -2831,6 +3003,8 @@ void dsputil_init_mmx(DSPContext* c, AVCodecContext *avctx) c->avg_h264_chroma_pixels_tab[0]= avg_h264_chroma_mc8_mmx2; c->avg_h264_chroma_pixels_tab[1]= avg_h264_chroma_mc4_mmx2; + c->avg_h264_chroma_pixels_tab[2]= avg_h264_chroma_mc2_mmx2; + c->put_h264_chroma_pixels_tab[2]= put_h264_chroma_mc2_mmx2; c->h264_v_loop_filter_luma= h264_v_loop_filter_luma_mmx2; c->h264_h_loop_filter_luma= h264_h_loop_filter_luma_mmx2; c->h264_v_loop_filter_chroma= h264_v_loop_filter_chroma_mmx2; @@ -2856,10 +3030,16 @@ void dsputil_init_mmx(DSPContext* c, AVCodecContext *avctx) c->biweight_h264_pixels_tab[6]= ff_h264_biweight_4x4_mmx2; c->biweight_h264_pixels_tab[7]= ff_h264_biweight_4x2_mmx2; +#ifdef CONFIG_CAVS_DECODER + ff_cavsdsp_init_mmx2(c, avctx); +#endif + #ifdef CONFIG_ENCODERS c->sub_hfyu_median_prediction= sub_hfyu_median_prediction_mmx2; #endif //CONFIG_ENCODERS } else if (mm_flags & MM_3DNOW) { + c->prefetch = prefetch_3dnow; + c->put_pixels_tab[0][1] = put_pixels16_x2_3dnow; c->put_pixels_tab[0][2] = put_pixels16_y2_3dnow; @@ -2944,6 +3124,19 @@ void dsputil_init_mmx(DSPContext* c, AVCodecContext *avctx) c->avg_h264_chroma_pixels_tab[0]= avg_h264_chroma_mc8_3dnow; c->avg_h264_chroma_pixels_tab[1]= avg_h264_chroma_mc4_3dnow; } + +#ifdef CONFIG_SNOW_ENCODER + if(mm_flags & MM_SSE2){ + c->horizontal_compose97i = ff_snow_horizontal_compose97i_sse2; + c->vertical_compose97i = ff_snow_vertical_compose97i_sse2; + c->inner_add_yblock = ff_snow_inner_add_yblock_sse2; + } + else{ + c->horizontal_compose97i = ff_snow_horizontal_compose97i_mmx; + c->vertical_compose97i = ff_snow_vertical_compose97i_mmx; + c->inner_add_yblock = ff_snow_inner_add_yblock_mmx; + } +#endif } #ifdef CONFIG_ENCODERS diff --git a/src/libffmpeg/libavcodec/i386/fft_sse.c b/src/libffmpeg/libavcodec/i386/fft_sse.c index 54851fb94..631848265 100644 --- a/src/libffmpeg/libavcodec/i386/fft_sse.c +++ b/src/libffmpeg/libavcodec/i386/fft_sse.c @@ -23,14 +23,14 @@ #include <xmmintrin.h> -static const float p1p1p1m1[4] __attribute__((aligned(16))) = - { 1.0, 1.0, 1.0, -1.0 }; +static const int p1p1p1m1[4] __attribute__((aligned(16))) = + { 0, 0, 0, 1 << 31 }; -static const float p1p1m1p1[4] __attribute__((aligned(16))) = - { 1.0, 1.0, -1.0, 1.0 }; +static const int p1p1m1p1[4] __attribute__((aligned(16))) = + { 0, 0, 1 << 31, 0 }; -static const float p1p1m1m1[4] __attribute__((aligned(16))) = - { 1.0, 1.0, -1.0, -1.0 }; +static const int p1p1m1m1[4] __attribute__((aligned(16))) = + { 0, 0, 1 << 31, 1 << 31 }; #if 0 static void print_v4sf(const char *str, __m128 a) @@ -58,7 +58,6 @@ void ff_fft_calc_sse(FFTContext *s, FFTComplex *z) r = (__m128 *)&z[0]; c1 = *(__m128 *)p1p1m1m1; - c2 = *(__m128 *)p1p1p1m1; if (s->inverse) c2 = *(__m128 *)p1p1m1p1; else @@ -68,19 +67,20 @@ void ff_fft_calc_sse(FFTContext *s, FFTComplex *z) do { a = r[0]; b = _mm_shuffle_ps(a, a, _MM_SHUFFLE(1, 0, 3, 2)); - a = _mm_mul_ps(a, c1); + a = _mm_xor_ps(a, c1); /* do the pass 0 butterfly */ a = _mm_add_ps(a, b); a1 = r[1]; b = _mm_shuffle_ps(a1, a1, _MM_SHUFFLE(1, 0, 3, 2)); - a1 = _mm_mul_ps(a1, c1); + a1 = _mm_xor_ps(a1, c1); /* do the pass 0 butterfly */ b = _mm_add_ps(a1, b); /* multiply third by -i */ + /* by toggling the sign bit */ b = _mm_shuffle_ps(b, b, _MM_SHUFFLE(2, 3, 1, 0)); - b = _mm_mul_ps(b, c2); + b = _mm_xor_ps(b, c2); /* do the pass 1 butterfly */ r[0] = _mm_add_ps(a, b); diff --git a/src/libffmpeg/libavcodec/i386/h264dsp_mmx.c b/src/libffmpeg/libavcodec/i386/h264dsp_mmx.c index 8ab58f389..ac4ad6401 100644 --- a/src/libffmpeg/libavcodec/i386/h264dsp_mmx.c +++ b/src/libffmpeg/libavcodec/i386/h264dsp_mmx.c @@ -46,11 +46,6 @@ SUMSUBD2_AB( s13, d13, t )\ SUMSUB_BADC( d13, s02, s13, d02 ) -#define SBUTTERFLY(a,b,t,n)\ - "movq " #a ", " #t " \n\t" /* abcd */\ - "punpckl" #n " " #b ", " #a " \n\t" /* aebf */\ - "punpckh" #n " " #b ", " #t " \n\t" /* cgdh */\ - #define TRANSPOSE4(a,b,c,d,t)\ SBUTTERFLY(a,b,t,wd) /* a=aebf t=cgdh */\ SBUTTERFLY(c,d,b,wd) /* c=imjn b=kolp */\ @@ -65,7 +60,7 @@ "packuswb "#z", "#p" \n\t"\ "movd "#p", (%0) \n\t" -void ff_h264_idct_add_mmx2(uint8_t *dst, int16_t *block, int stride) +static void ff_h264_idct_add_mmx(uint8_t *dst, int16_t *block, int stride) { /* Load dct coeffs */ asm volatile( @@ -104,6 +99,208 @@ void ff_h264_idct_add_mmx2(uint8_t *dst, int16_t *block, int stride) ); } +static inline void h264_idct8_1d(int16_t *block) +{ + asm volatile( + "movq 112(%0), %%mm7 \n\t" + "movq 80(%0), %%mm5 \n\t" + "movq 48(%0), %%mm3 \n\t" + "movq 16(%0), %%mm1 \n\t" + + "movq %%mm7, %%mm4 \n\t" + "movq %%mm3, %%mm6 \n\t" + "movq %%mm5, %%mm0 \n\t" + "movq %%mm7, %%mm2 \n\t" + "psraw $1, %%mm4 \n\t" + "psraw $1, %%mm6 \n\t" + "psubw %%mm7, %%mm0 \n\t" + "psubw %%mm6, %%mm2 \n\t" + "psubw %%mm4, %%mm0 \n\t" + "psubw %%mm3, %%mm2 \n\t" + "psubw %%mm3, %%mm0 \n\t" + "paddw %%mm1, %%mm2 \n\t" + + "movq %%mm5, %%mm4 \n\t" + "movq %%mm1, %%mm6 \n\t" + "psraw $1, %%mm4 \n\t" + "psraw $1, %%mm6 \n\t" + "paddw %%mm5, %%mm4 \n\t" + "paddw %%mm1, %%mm6 \n\t" + "paddw %%mm7, %%mm4 \n\t" + "paddw %%mm5, %%mm6 \n\t" + "psubw %%mm1, %%mm4 \n\t" + "paddw %%mm3, %%mm6 \n\t" + + "movq %%mm0, %%mm1 \n\t" + "movq %%mm4, %%mm3 \n\t" + "movq %%mm2, %%mm5 \n\t" + "movq %%mm6, %%mm7 \n\t" + "psraw $2, %%mm6 \n\t" + "psraw $2, %%mm3 \n\t" + "psraw $2, %%mm5 \n\t" + "psraw $2, %%mm0 \n\t" + "paddw %%mm6, %%mm1 \n\t" + "paddw %%mm2, %%mm3 \n\t" + "psubw %%mm4, %%mm5 \n\t" + "psubw %%mm0, %%mm7 \n\t" + + "movq 32(%0), %%mm2 \n\t" + "movq 96(%0), %%mm6 \n\t" + "movq %%mm2, %%mm4 \n\t" + "movq %%mm6, %%mm0 \n\t" + "psraw $1, %%mm4 \n\t" + "psraw $1, %%mm6 \n\t" + "psubw %%mm0, %%mm4 \n\t" + "paddw %%mm2, %%mm6 \n\t" + + "movq (%0), %%mm2 \n\t" + "movq 64(%0), %%mm0 \n\t" + SUMSUB_BA( %%mm0, %%mm2 ) + SUMSUB_BA( %%mm6, %%mm0 ) + SUMSUB_BA( %%mm4, %%mm2 ) + SUMSUB_BA( %%mm7, %%mm6 ) + SUMSUB_BA( %%mm5, %%mm4 ) + SUMSUB_BA( %%mm3, %%mm2 ) + SUMSUB_BA( %%mm1, %%mm0 ) + :: "r"(block) + ); +} + +static void ff_h264_idct8_add_mmx(uint8_t *dst, int16_t *block, int stride) +{ + int i; + int16_t __attribute__ ((aligned(8))) b2[64]; + + block[0] += 32; + + for(i=0; i<2; i++){ + uint64_t tmp; + + h264_idct8_1d(block+4*i); + + asm volatile( + "movq %%mm7, %0 \n\t" + TRANSPOSE4( %%mm0, %%mm2, %%mm4, %%mm6, %%mm7 ) + "movq %%mm0, 8(%1) \n\t" + "movq %%mm6, 24(%1) \n\t" + "movq %%mm7, 40(%1) \n\t" + "movq %%mm4, 56(%1) \n\t" + "movq %0, %%mm7 \n\t" + TRANSPOSE4( %%mm7, %%mm5, %%mm3, %%mm1, %%mm0 ) + "movq %%mm7, (%1) \n\t" + "movq %%mm1, 16(%1) \n\t" + "movq %%mm0, 32(%1) \n\t" + "movq %%mm3, 48(%1) \n\t" + : "=m"(tmp) + : "r"(b2+32*i) + : "memory" + ); + } + + for(i=0; i<2; i++){ + h264_idct8_1d(b2+4*i); + + asm volatile( + "psraw $6, %%mm7 \n\t" + "psraw $6, %%mm6 \n\t" + "psraw $6, %%mm5 \n\t" + "psraw $6, %%mm4 \n\t" + "psraw $6, %%mm3 \n\t" + "psraw $6, %%mm2 \n\t" + "psraw $6, %%mm1 \n\t" + "psraw $6, %%mm0 \n\t" + + "movq %%mm7, (%0) \n\t" + "movq %%mm5, 16(%0) \n\t" + "movq %%mm3, 32(%0) \n\t" + "movq %%mm1, 48(%0) \n\t" + "movq %%mm0, 64(%0) \n\t" + "movq %%mm2, 80(%0) \n\t" + "movq %%mm4, 96(%0) \n\t" + "movq %%mm6, 112(%0) \n\t" + :: "r"(b2+4*i) + : "memory" + ); + } + + add_pixels_clamped_mmx(b2, dst, stride); +} + +static void ff_h264_idct_dc_add_mmx2(uint8_t *dst, int16_t *block, int stride) +{ + int dc = (block[0] + 32) >> 6; + asm volatile( + "movd %0, %%mm0 \n\t" + "pshufw $0, %%mm0, %%mm0 \n\t" + "pxor %%mm1, %%mm1 \n\t" + "psubw %%mm0, %%mm1 \n\t" + "packuswb %%mm0, %%mm0 \n\t" + "packuswb %%mm1, %%mm1 \n\t" + ::"r"(dc) + ); + asm volatile( + "movd %0, %%mm2 \n\t" + "movd %1, %%mm3 \n\t" + "movd %2, %%mm4 \n\t" + "movd %3, %%mm5 \n\t" + "paddusb %%mm0, %%mm2 \n\t" + "paddusb %%mm0, %%mm3 \n\t" + "paddusb %%mm0, %%mm4 \n\t" + "paddusb %%mm0, %%mm5 \n\t" + "psubusb %%mm1, %%mm2 \n\t" + "psubusb %%mm1, %%mm3 \n\t" + "psubusb %%mm1, %%mm4 \n\t" + "psubusb %%mm1, %%mm5 \n\t" + "movd %%mm2, %0 \n\t" + "movd %%mm3, %1 \n\t" + "movd %%mm4, %2 \n\t" + "movd %%mm5, %3 \n\t" + :"+m"(*(uint32_t*)(dst+0*stride)), + "+m"(*(uint32_t*)(dst+1*stride)), + "+m"(*(uint32_t*)(dst+2*stride)), + "+m"(*(uint32_t*)(dst+3*stride)) + ); +} + +static void ff_h264_idct8_dc_add_mmx2(uint8_t *dst, int16_t *block, int stride) +{ + int dc = (block[0] + 32) >> 6; + int y; + asm volatile( + "movd %0, %%mm0 \n\t" + "pshufw $0, %%mm0, %%mm0 \n\t" + "pxor %%mm1, %%mm1 \n\t" + "psubw %%mm0, %%mm1 \n\t" + "packuswb %%mm0, %%mm0 \n\t" + "packuswb %%mm1, %%mm1 \n\t" + ::"r"(dc) + ); + for(y=2; y--; dst += 4*stride){ + asm volatile( + "movq %0, %%mm2 \n\t" + "movq %1, %%mm3 \n\t" + "movq %2, %%mm4 \n\t" + "movq %3, %%mm5 \n\t" + "paddusb %%mm0, %%mm2 \n\t" + "paddusb %%mm0, %%mm3 \n\t" + "paddusb %%mm0, %%mm4 \n\t" + "paddusb %%mm0, %%mm5 \n\t" + "psubusb %%mm1, %%mm2 \n\t" + "psubusb %%mm1, %%mm3 \n\t" + "psubusb %%mm1, %%mm4 \n\t" + "psubusb %%mm1, %%mm5 \n\t" + "movq %%mm2, %0 \n\t" + "movq %%mm3, %1 \n\t" + "movq %%mm4, %2 \n\t" + "movq %%mm5, %3 \n\t" + :"+m"(*(uint64_t*)(dst+0*stride)), + "+m"(*(uint64_t*)(dst+1*stride)), + "+m"(*(uint64_t*)(dst+2*stride)), + "+m"(*(uint64_t*)(dst+3*stride)) + ); + } +} + /***********************************/ /* deblocking */ @@ -441,6 +638,50 @@ static void OPNAME ## h264_qpel4_h_lowpass_ ## MMX(uint8_t *dst, uint8_t *src, i : "memory"\ );\ }\ +static void OPNAME ## h264_qpel4_h_lowpass_l2_ ## MMX(uint8_t *dst, uint8_t *src, uint8_t *src2, int dstStride, int src2Stride){\ + int h=4;\ + asm volatile(\ + "pxor %%mm7, %%mm7 \n\t"\ + "movq %0, %%mm4 \n\t"\ + "movq %1, %%mm5 \n\t"\ + :: "m"(ff_pw_5), "m"(ff_pw_16)\ + );\ + do{\ + asm volatile(\ + "movd -1(%0), %%mm1 \n\t"\ + "movd (%0), %%mm2 \n\t"\ + "movd 1(%0), %%mm3 \n\t"\ + "movd 2(%0), %%mm0 \n\t"\ + "punpcklbw %%mm7, %%mm1 \n\t"\ + "punpcklbw %%mm7, %%mm2 \n\t"\ + "punpcklbw %%mm7, %%mm3 \n\t"\ + "punpcklbw %%mm7, %%mm0 \n\t"\ + "paddw %%mm0, %%mm1 \n\t"\ + "paddw %%mm3, %%mm2 \n\t"\ + "movd -2(%0), %%mm0 \n\t"\ + "movd 3(%0), %%mm3 \n\t"\ + "punpcklbw %%mm7, %%mm0 \n\t"\ + "punpcklbw %%mm7, %%mm3 \n\t"\ + "paddw %%mm3, %%mm0 \n\t"\ + "psllw $2, %%mm2 \n\t"\ + "psubw %%mm1, %%mm2 \n\t"\ + "pmullw %%mm4, %%mm2 \n\t"\ + "paddw %%mm5, %%mm0 \n\t"\ + "paddw %%mm2, %%mm0 \n\t"\ + "movd (%2), %%mm3 \n\t"\ + "psraw $5, %%mm0 \n\t"\ + "packuswb %%mm0, %%mm0 \n\t"\ + PAVGB" %%mm3, %%mm0 \n\t"\ + OP(%%mm0, (%1),%%mm6, d)\ + "add %4, %0 \n\t"\ + "add %4, %1 \n\t"\ + "add %3, %2 \n\t"\ + : "+a"(src), "+c"(dst), "+d"(src2)\ + : "D"((long)src2Stride), "S"((long)dstStride)\ + : "memory"\ + );\ + }while(--h);\ +}\ static void OPNAME ## h264_qpel4_v_lowpass_ ## MMX(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\ src -= 2*srcStride;\ asm volatile(\ @@ -591,11 +832,74 @@ static void OPNAME ## h264_qpel8_h_lowpass_ ## MMX(uint8_t *dst, uint8_t *src, i );\ }\ \ -static void OPNAME ## h264_qpel8_v_lowpass_ ## MMX(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\ - int h= 2;\ +static void OPNAME ## h264_qpel8_h_lowpass_l2_ ## MMX(uint8_t *dst, uint8_t *src, uint8_t *src2, int dstStride, int src2Stride){\ + int h=8;\ + asm volatile(\ + "pxor %%mm7, %%mm7 \n\t"\ + "movq %0, %%mm6 \n\t"\ + :: "m"(ff_pw_5)\ + );\ + do{\ + asm volatile(\ + "movq (%0), %%mm0 \n\t"\ + "movq 1(%0), %%mm2 \n\t"\ + "movq %%mm0, %%mm1 \n\t"\ + "movq %%mm2, %%mm3 \n\t"\ + "punpcklbw %%mm7, %%mm0 \n\t"\ + "punpckhbw %%mm7, %%mm1 \n\t"\ + "punpcklbw %%mm7, %%mm2 \n\t"\ + "punpckhbw %%mm7, %%mm3 \n\t"\ + "paddw %%mm2, %%mm0 \n\t"\ + "paddw %%mm3, %%mm1 \n\t"\ + "psllw $2, %%mm0 \n\t"\ + "psllw $2, %%mm1 \n\t"\ + "movq -1(%0), %%mm2 \n\t"\ + "movq 2(%0), %%mm4 \n\t"\ + "movq %%mm2, %%mm3 \n\t"\ + "movq %%mm4, %%mm5 \n\t"\ + "punpcklbw %%mm7, %%mm2 \n\t"\ + "punpckhbw %%mm7, %%mm3 \n\t"\ + "punpcklbw %%mm7, %%mm4 \n\t"\ + "punpckhbw %%mm7, %%mm5 \n\t"\ + "paddw %%mm4, %%mm2 \n\t"\ + "paddw %%mm3, %%mm5 \n\t"\ + "psubw %%mm2, %%mm0 \n\t"\ + "psubw %%mm5, %%mm1 \n\t"\ + "pmullw %%mm6, %%mm0 \n\t"\ + "pmullw %%mm6, %%mm1 \n\t"\ + "movd -2(%0), %%mm2 \n\t"\ + "movd 7(%0), %%mm5 \n\t"\ + "punpcklbw %%mm7, %%mm2 \n\t"\ + "punpcklbw %%mm7, %%mm5 \n\t"\ + "paddw %%mm3, %%mm2 \n\t"\ + "paddw %%mm5, %%mm4 \n\t"\ + "movq %5, %%mm5 \n\t"\ + "paddw %%mm5, %%mm2 \n\t"\ + "paddw %%mm5, %%mm4 \n\t"\ + "paddw %%mm2, %%mm0 \n\t"\ + "paddw %%mm4, %%mm1 \n\t"\ + "psraw $5, %%mm0 \n\t"\ + "psraw $5, %%mm1 \n\t"\ + "movq (%2), %%mm4 \n\t"\ + "packuswb %%mm1, %%mm0 \n\t"\ + PAVGB" %%mm4, %%mm0 \n\t"\ + OP(%%mm0, (%1),%%mm5, q)\ + "add %4, %0 \n\t"\ + "add %4, %1 \n\t"\ + "add %3, %2 \n\t"\ + : "+a"(src), "+c"(dst), "+d"(src2)\ + : "D"((long)src2Stride), "S"((long)dstStride),\ + "m"(ff_pw_16)\ + : "memory"\ + );\ + }while(--h);\ +}\ +\ +static inline void OPNAME ## h264_qpel8or16_v_lowpass_ ## MMX(uint8_t *dst, uint8_t *src, int dstStride, int srcStride, int h){\ + int w= 2;\ src -= 2*srcStride;\ \ - while(h--){\ + while(w--){\ asm volatile(\ "pxor %%mm7, %%mm7 \n\t"\ "movd (%0), %%mm0 \n\t"\ @@ -626,13 +930,29 @@ static void OPNAME ## h264_qpel8_v_lowpass_ ## MMX(uint8_t *dst, uint8_t *src, i : "S"((long)srcStride), "D"((long)dstStride), "m"(ff_pw_5), "m"(ff_pw_16)\ : "memory"\ );\ - src += 4-13*srcStride;\ - dst += 4-8*dstStride;\ + if(h==16){\ + asm volatile(\ + QPEL_H264V(%%mm2, %%mm3, %%mm4, %%mm5, %%mm0, %%mm1, OP)\ + QPEL_H264V(%%mm3, %%mm4, %%mm5, %%mm0, %%mm1, %%mm2, OP)\ + QPEL_H264V(%%mm4, %%mm5, %%mm0, %%mm1, %%mm2, %%mm3, OP)\ + QPEL_H264V(%%mm5, %%mm0, %%mm1, %%mm2, %%mm3, %%mm4, OP)\ + QPEL_H264V(%%mm0, %%mm1, %%mm2, %%mm3, %%mm4, %%mm5, OP)\ + QPEL_H264V(%%mm1, %%mm2, %%mm3, %%mm4, %%mm5, %%mm0, OP)\ + QPEL_H264V(%%mm2, %%mm3, %%mm4, %%mm5, %%mm0, %%mm1, OP)\ + QPEL_H264V(%%mm3, %%mm4, %%mm5, %%mm0, %%mm1, %%mm2, OP)\ + \ + : "+a"(src), "+c"(dst)\ + : "S"((long)srcStride), "D"((long)dstStride), "m"(ff_pw_5), "m"(ff_pw_16)\ + : "memory"\ + );\ + }\ + src += 4-(h+5)*srcStride;\ + dst += 4-h*dstStride;\ }\ }\ -static void OPNAME ## h264_qpel8_hv_lowpass_ ## MMX(uint8_t *dst, int16_t *tmp, uint8_t *src, int dstStride, int tmpStride, int srcStride){\ - int h=8;\ - int w=4;\ +static inline void OPNAME ## h264_qpel8or16_hv_lowpass_ ## MMX(uint8_t *dst, int16_t *tmp, uint8_t *src, int dstStride, int tmpStride, int srcStride, int size){\ + int h = size;\ + int w = (size+8)>>2;\ src -= 2*srcStride+2;\ while(w--){\ asm volatile(\ @@ -652,23 +972,40 @@ static void OPNAME ## h264_qpel8_hv_lowpass_ ## MMX(uint8_t *dst, int16_t *tmp, "punpcklbw %%mm7, %%mm2 \n\t"\ "punpcklbw %%mm7, %%mm3 \n\t"\ "punpcklbw %%mm7, %%mm4 \n\t"\ - QPEL_H264HV(%%mm0, %%mm1, %%mm2, %%mm3, %%mm4, %%mm5, 0*8*4)\ - QPEL_H264HV(%%mm1, %%mm2, %%mm3, %%mm4, %%mm5, %%mm0, 1*8*4)\ - QPEL_H264HV(%%mm2, %%mm3, %%mm4, %%mm5, %%mm0, %%mm1, 2*8*4)\ - QPEL_H264HV(%%mm3, %%mm4, %%mm5, %%mm0, %%mm1, %%mm2, 3*8*4)\ - QPEL_H264HV(%%mm4, %%mm5, %%mm0, %%mm1, %%mm2, %%mm3, 4*8*4)\ - QPEL_H264HV(%%mm5, %%mm0, %%mm1, %%mm2, %%mm3, %%mm4, 5*8*4)\ - QPEL_H264HV(%%mm0, %%mm1, %%mm2, %%mm3, %%mm4, %%mm5, 6*8*4)\ - QPEL_H264HV(%%mm1, %%mm2, %%mm3, %%mm4, %%mm5, %%mm0, 7*8*4)\ - \ + QPEL_H264HV(%%mm0, %%mm1, %%mm2, %%mm3, %%mm4, %%mm5, 0*48)\ + QPEL_H264HV(%%mm1, %%mm2, %%mm3, %%mm4, %%mm5, %%mm0, 1*48)\ + QPEL_H264HV(%%mm2, %%mm3, %%mm4, %%mm5, %%mm0, %%mm1, 2*48)\ + QPEL_H264HV(%%mm3, %%mm4, %%mm5, %%mm0, %%mm1, %%mm2, 3*48)\ + QPEL_H264HV(%%mm4, %%mm5, %%mm0, %%mm1, %%mm2, %%mm3, 4*48)\ + QPEL_H264HV(%%mm5, %%mm0, %%mm1, %%mm2, %%mm3, %%mm4, 5*48)\ + QPEL_H264HV(%%mm0, %%mm1, %%mm2, %%mm3, %%mm4, %%mm5, 6*48)\ + QPEL_H264HV(%%mm1, %%mm2, %%mm3, %%mm4, %%mm5, %%mm0, 7*48)\ : "+a"(src)\ : "c"(tmp), "S"((long)srcStride), "m"(ff_pw_5)\ : "memory"\ );\ + if(size==16){\ + asm volatile(\ + QPEL_H264HV(%%mm2, %%mm3, %%mm4, %%mm5, %%mm0, %%mm1, 8*48)\ + QPEL_H264HV(%%mm3, %%mm4, %%mm5, %%mm0, %%mm1, %%mm2, 9*48)\ + QPEL_H264HV(%%mm4, %%mm5, %%mm0, %%mm1, %%mm2, %%mm3, 10*48)\ + QPEL_H264HV(%%mm5, %%mm0, %%mm1, %%mm2, %%mm3, %%mm4, 11*48)\ + QPEL_H264HV(%%mm0, %%mm1, %%mm2, %%mm3, %%mm4, %%mm5, 12*48)\ + QPEL_H264HV(%%mm1, %%mm2, %%mm3, %%mm4, %%mm5, %%mm0, 13*48)\ + QPEL_H264HV(%%mm2, %%mm3, %%mm4, %%mm5, %%mm0, %%mm1, 14*48)\ + QPEL_H264HV(%%mm3, %%mm4, %%mm5, %%mm0, %%mm1, %%mm2, 15*48)\ + : "+a"(src)\ + : "c"(tmp), "S"((long)srcStride), "m"(ff_pw_5)\ + : "memory"\ + );\ + }\ tmp += 4;\ - src += 4 - 13*srcStride;\ + src += 4 - (size+5)*srcStride;\ }\ - tmp -= 4*4;\ + tmp -= size+8;\ + w = size>>4;\ + do{\ + h = size;\ asm volatile(\ "movq %4, %%mm6 \n\t"\ "1: \n\t"\ @@ -702,7 +1039,7 @@ static void OPNAME ## h264_qpel8_hv_lowpass_ ## MMX(uint8_t *dst, int16_t *tmp, "psraw $6, %%mm3 \n\t"\ "packuswb %%mm3, %%mm0 \n\t"\ OP(%%mm0, (%1),%%mm7, q)\ - "add $32, %0 \n\t"\ + "add $48, %0 \n\t"\ "add %3, %1 \n\t"\ "decl %2 \n\t"\ " jnz 1b \n\t"\ @@ -710,14 +1047,17 @@ static void OPNAME ## h264_qpel8_hv_lowpass_ ## MMX(uint8_t *dst, int16_t *tmp, : "S"((long)dstStride), "m"(ff_pw_32)\ : "memory"\ );\ + tmp += 8 - size*24;\ + dst += 8 - size*dstStride;\ + }while(w--);\ +}\ +\ +static void OPNAME ## h264_qpel8_v_lowpass_ ## MMX(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\ + OPNAME ## h264_qpel8or16_v_lowpass_ ## MMX(dst , src , dstStride, srcStride, 8);\ }\ static void OPNAME ## h264_qpel16_v_lowpass_ ## MMX(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\ - OPNAME ## h264_qpel8_v_lowpass_ ## MMX(dst , src , dstStride, srcStride);\ - OPNAME ## h264_qpel8_v_lowpass_ ## MMX(dst+8, src+8, dstStride, srcStride);\ - src += 8*srcStride;\ - dst += 8*dstStride;\ - OPNAME ## h264_qpel8_v_lowpass_ ## MMX(dst , src , dstStride, srcStride);\ - OPNAME ## h264_qpel8_v_lowpass_ ## MMX(dst+8, src+8, dstStride, srcStride);\ + OPNAME ## h264_qpel8or16_v_lowpass_ ## MMX(dst , src , dstStride, srcStride, 16);\ + OPNAME ## h264_qpel8or16_v_lowpass_ ## MMX(dst+8, src+8, dstStride, srcStride, 16);\ }\ \ static void OPNAME ## h264_qpel16_h_lowpass_ ## MMX(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\ @@ -729,14 +1069,88 @@ static void OPNAME ## h264_qpel16_h_lowpass_ ## MMX(uint8_t *dst, uint8_t *src, OPNAME ## h264_qpel8_h_lowpass_ ## MMX(dst+8, src+8, dstStride, srcStride);\ }\ \ -static void OPNAME ## h264_qpel16_hv_lowpass_ ## MMX(uint8_t *dst, int16_t *tmp, uint8_t *src, int dstStride, int tmpStride, int srcStride){\ - OPNAME ## h264_qpel8_hv_lowpass_ ## MMX(dst , tmp , src , dstStride, tmpStride, srcStride);\ - OPNAME ## h264_qpel8_hv_lowpass_ ## MMX(dst+8, tmp , src+8, dstStride, tmpStride, srcStride);\ - src += 8*srcStride;\ +static void OPNAME ## h264_qpel16_h_lowpass_l2_ ## MMX(uint8_t *dst, uint8_t *src, uint8_t *src2, int dstStride, int src2Stride){\ + OPNAME ## h264_qpel8_h_lowpass_l2_ ## MMX(dst , src , src2 , dstStride, src2Stride);\ + OPNAME ## h264_qpel8_h_lowpass_l2_ ## MMX(dst+8, src+8, src2+8, dstStride, src2Stride);\ + src += 8*dstStride;\ dst += 8*dstStride;\ - OPNAME ## h264_qpel8_hv_lowpass_ ## MMX(dst , tmp , src , dstStride, tmpStride, srcStride);\ - OPNAME ## h264_qpel8_hv_lowpass_ ## MMX(dst+8, tmp , src+8, dstStride, tmpStride, srcStride);\ + src2 += 8*src2Stride;\ + OPNAME ## h264_qpel8_h_lowpass_l2_ ## MMX(dst , src , src2 , dstStride, src2Stride);\ + OPNAME ## h264_qpel8_h_lowpass_l2_ ## MMX(dst+8, src+8, src2+8, dstStride, src2Stride);\ +}\ +\ +static void OPNAME ## h264_qpel8_hv_lowpass_ ## MMX(uint8_t *dst, int16_t *tmp, uint8_t *src, int dstStride, int tmpStride, int srcStride){\ + OPNAME ## h264_qpel8or16_hv_lowpass_ ## MMX(dst , tmp , src , dstStride, tmpStride, srcStride, 8);\ +}\ +\ +static void OPNAME ## h264_qpel16_hv_lowpass_ ## MMX(uint8_t *dst, int16_t *tmp, uint8_t *src, int dstStride, int tmpStride, int srcStride){\ + OPNAME ## h264_qpel8or16_hv_lowpass_ ## MMX(dst , tmp , src , dstStride, tmpStride, srcStride, 16);\ +}\ +\ +static void OPNAME ## pixels4_l2_shift5_ ## MMX(uint8_t *dst, int16_t *src16, uint8_t *src8, int dstStride, int src8Stride, int h)\ +{\ + asm volatile(\ + "movq %5, %%mm6 \n\t"\ + "movq (%1), %%mm0 \n\t"\ + "movq 24(%1), %%mm1 \n\t"\ + "paddw %%mm6, %%mm0 \n\t"\ + "paddw %%mm6, %%mm1 \n\t"\ + "psraw $5, %%mm0 \n\t"\ + "psraw $5, %%mm1 \n\t"\ + "packuswb %%mm0, %%mm0 \n\t"\ + "packuswb %%mm1, %%mm1 \n\t"\ + PAVGB" (%0), %%mm0 \n\t"\ + PAVGB" (%0,%3), %%mm1 \n\t"\ + OP(%%mm0, (%2), %%mm4, d)\ + OP(%%mm1, (%2,%4), %%mm5, d)\ + "lea (%0,%3,2), %0 \n\t"\ + "lea (%2,%4,2), %2 \n\t"\ + "movq 48(%1), %%mm0 \n\t"\ + "movq 72(%1), %%mm1 \n\t"\ + "paddw %%mm6, %%mm0 \n\t"\ + "paddw %%mm6, %%mm1 \n\t"\ + "psraw $5, %%mm0 \n\t"\ + "psraw $5, %%mm1 \n\t"\ + "packuswb %%mm0, %%mm0 \n\t"\ + "packuswb %%mm1, %%mm1 \n\t"\ + PAVGB" (%0), %%mm0 \n\t"\ + PAVGB" (%0,%3), %%mm1 \n\t"\ + OP(%%mm0, (%2), %%mm4, d)\ + OP(%%mm1, (%2,%4), %%mm5, d)\ + :"+a"(src8), "+c"(src16), "+d"(dst)\ + :"S"((long)src8Stride), "D"((long)dstStride), "m"(ff_pw_16)\ + :"memory");\ +}\ +static void OPNAME ## pixels8_l2_shift5_ ## MMX(uint8_t *dst, int16_t *src16, uint8_t *src8, int dstStride, int src8Stride, int h)\ +{\ + asm volatile(\ + "movq %0, %%mm6 \n\t"\ + ::"m"(ff_pw_16)\ + );\ + while(h--){\ + asm volatile(\ + "movq (%1), %%mm0 \n\t"\ + "movq 8(%1), %%mm1 \n\t"\ + "paddw %%mm6, %%mm0 \n\t"\ + "paddw %%mm6, %%mm1 \n\t"\ + "psraw $5, %%mm0 \n\t"\ + "psraw $5, %%mm1 \n\t"\ + "packuswb %%mm1, %%mm0 \n\t"\ + PAVGB" (%0), %%mm0 \n\t"\ + OP(%%mm0, (%2), %%mm5, q)\ + ::"a"(src8), "c"(src16), "d"(dst)\ + :"memory");\ + src8 += src8Stride;\ + src16 += 24;\ + dst += dstStride;\ + }\ }\ +static void OPNAME ## pixels16_l2_shift5_ ## MMX(uint8_t *dst, int16_t *src16, uint8_t *src8, int dstStride, int src8Stride, int h)\ +{\ + OPNAME ## pixels8_l2_shift5_ ## MMX(dst , src16 , src8 , dstStride, src8Stride, h);\ + OPNAME ## pixels8_l2_shift5_ ## MMX(dst+8, src16+8, src8+8, dstStride, src8Stride, h);\ +}\ + #define H264_MC(OPNAME, SIZE, MMX) \ static void OPNAME ## h264_qpel ## SIZE ## _mc00_ ## MMX (uint8_t *dst, uint8_t *src, int stride){\ @@ -744,10 +1158,7 @@ static void OPNAME ## h264_qpel ## SIZE ## _mc00_ ## MMX (uint8_t *dst, uint8_t }\ \ static void OPNAME ## h264_qpel ## SIZE ## _mc10_ ## MMX(uint8_t *dst, uint8_t *src, int stride){\ - uint64_t temp[SIZE*SIZE/8];\ - uint8_t * const half= (uint8_t*)temp;\ - put_h264_qpel ## SIZE ## _h_lowpass_ ## MMX(half, src, SIZE, stride);\ - OPNAME ## pixels ## SIZE ## _l2_ ## MMX(dst, src, half, stride, stride, SIZE);\ + OPNAME ## h264_qpel ## SIZE ## _h_lowpass_l2_ ## MMX(dst, src, src, stride, stride);\ }\ \ static void OPNAME ## h264_qpel ## SIZE ## _mc20_ ## MMX(uint8_t *dst, uint8_t *src, int stride){\ @@ -755,10 +1166,7 @@ static void OPNAME ## h264_qpel ## SIZE ## _mc20_ ## MMX(uint8_t *dst, uint8_t * }\ \ static void OPNAME ## h264_qpel ## SIZE ## _mc30_ ## MMX(uint8_t *dst, uint8_t *src, int stride){\ - uint64_t temp[SIZE*SIZE/8];\ - uint8_t * const half= (uint8_t*)temp;\ - put_h264_qpel ## SIZE ## _h_lowpass_ ## MMX(half, src, SIZE, stride);\ - OPNAME ## pixels ## SIZE ## _l2_ ## MMX(dst, src+1, half, stride, stride, SIZE);\ + OPNAME ## h264_qpel ## SIZE ## _h_lowpass_l2_ ## MMX(dst, src, src+1, stride, stride);\ }\ \ static void OPNAME ## h264_qpel ## SIZE ## _mc01_ ## MMX(uint8_t *dst, uint8_t *src, int stride){\ @@ -780,89 +1188,72 @@ static void OPNAME ## h264_qpel ## SIZE ## _mc03_ ## MMX(uint8_t *dst, uint8_t * }\ \ static void OPNAME ## h264_qpel ## SIZE ## _mc11_ ## MMX(uint8_t *dst, uint8_t *src, int stride){\ - uint64_t temp[SIZE*SIZE/4];\ - uint8_t * const halfH= (uint8_t*)temp;\ - uint8_t * const halfV= ((uint8_t*)temp) + SIZE*SIZE;\ - put_h264_qpel ## SIZE ## _h_lowpass_ ## MMX(halfH, src, SIZE, stride);\ + uint64_t temp[SIZE*SIZE/8];\ + uint8_t * const halfV= (uint8_t*)temp;\ put_h264_qpel ## SIZE ## _v_lowpass_ ## MMX(halfV, src, SIZE, stride);\ - OPNAME ## pixels ## SIZE ## _l2_ ## MMX(dst, halfH, halfV, stride, SIZE, SIZE);\ + OPNAME ## h264_qpel ## SIZE ## _h_lowpass_l2_ ## MMX(dst, src, halfV, stride, SIZE);\ }\ \ static void OPNAME ## h264_qpel ## SIZE ## _mc31_ ## MMX(uint8_t *dst, uint8_t *src, int stride){\ - uint64_t temp[SIZE*SIZE/4];\ - uint8_t * const halfH= (uint8_t*)temp;\ - uint8_t * const halfV= ((uint8_t*)temp) + SIZE*SIZE;\ - put_h264_qpel ## SIZE ## _h_lowpass_ ## MMX(halfH, src, SIZE, stride);\ + uint64_t temp[SIZE*SIZE/8];\ + uint8_t * const halfV= (uint8_t*)temp;\ put_h264_qpel ## SIZE ## _v_lowpass_ ## MMX(halfV, src+1, SIZE, stride);\ - OPNAME ## pixels ## SIZE ## _l2_ ## MMX(dst, halfH, halfV, stride, SIZE, SIZE);\ + OPNAME ## h264_qpel ## SIZE ## _h_lowpass_l2_ ## MMX(dst, src, halfV, stride, SIZE);\ }\ \ static void OPNAME ## h264_qpel ## SIZE ## _mc13_ ## MMX(uint8_t *dst, uint8_t *src, int stride){\ - uint64_t temp[SIZE*SIZE/4];\ - uint8_t * const halfH= (uint8_t*)temp;\ - uint8_t * const halfV= ((uint8_t*)temp) + SIZE*SIZE;\ - put_h264_qpel ## SIZE ## _h_lowpass_ ## MMX(halfH, src + stride, SIZE, stride);\ + uint64_t temp[SIZE*SIZE/8];\ + uint8_t * const halfV= (uint8_t*)temp;\ put_h264_qpel ## SIZE ## _v_lowpass_ ## MMX(halfV, src, SIZE, stride);\ - OPNAME ## pixels ## SIZE ## _l2_ ## MMX(dst, halfH, halfV, stride, SIZE, SIZE);\ + OPNAME ## h264_qpel ## SIZE ## _h_lowpass_l2_ ## MMX(dst, src+stride, halfV, stride, SIZE);\ }\ \ static void OPNAME ## h264_qpel ## SIZE ## _mc33_ ## MMX(uint8_t *dst, uint8_t *src, int stride){\ - uint64_t temp[SIZE*SIZE/4];\ - uint8_t * const halfH= (uint8_t*)temp;\ - uint8_t * const halfV= ((uint8_t*)temp) + SIZE*SIZE;\ - put_h264_qpel ## SIZE ## _h_lowpass_ ## MMX(halfH, src + stride, SIZE, stride);\ + uint64_t temp[SIZE*SIZE/8];\ + uint8_t * const halfV= (uint8_t*)temp;\ put_h264_qpel ## SIZE ## _v_lowpass_ ## MMX(halfV, src+1, SIZE, stride);\ - OPNAME ## pixels ## SIZE ## _l2_ ## MMX(dst, halfH, halfV, stride, SIZE, SIZE);\ + OPNAME ## h264_qpel ## SIZE ## _h_lowpass_l2_ ## MMX(dst, src+stride, halfV, stride, SIZE);\ }\ \ static void OPNAME ## h264_qpel ## SIZE ## _mc22_ ## MMX(uint8_t *dst, uint8_t *src, int stride){\ - uint64_t temp[SIZE*(SIZE+8)/4];\ + uint64_t temp[SIZE*(SIZE<8?12:24)/4];\ int16_t * const tmp= (int16_t*)temp;\ OPNAME ## h264_qpel ## SIZE ## _hv_lowpass_ ## MMX(dst, tmp, src, stride, SIZE, stride);\ }\ \ static void OPNAME ## h264_qpel ## SIZE ## _mc21_ ## MMX(uint8_t *dst, uint8_t *src, int stride){\ - uint64_t temp[SIZE*(SIZE+8)/4 + SIZE*SIZE/4];\ - uint8_t * const halfH= (uint8_t*)temp;\ - uint8_t * const halfHV= ((uint8_t*)temp) + SIZE*SIZE;\ - int16_t * const tmp= ((int16_t*)temp) + SIZE*SIZE;\ - put_h264_qpel ## SIZE ## _h_lowpass_ ## MMX(halfH, src, SIZE, stride);\ + uint64_t temp[SIZE*(SIZE<8?12:24)/4 + SIZE*SIZE/8];\ + uint8_t * const halfHV= (uint8_t*)temp;\ + int16_t * const tmp= ((int16_t*)temp) + SIZE*SIZE/2;\ put_h264_qpel ## SIZE ## _hv_lowpass_ ## MMX(halfHV, tmp, src, SIZE, SIZE, stride);\ - OPNAME ## pixels ## SIZE ## _l2_ ## MMX(dst, halfH, halfHV, stride, SIZE, SIZE);\ + OPNAME ## h264_qpel ## SIZE ## _h_lowpass_l2_ ## MMX(dst, src, halfHV, stride, SIZE);\ }\ \ static void OPNAME ## h264_qpel ## SIZE ## _mc23_ ## MMX(uint8_t *dst, uint8_t *src, int stride){\ - uint64_t temp[SIZE*(SIZE+8)/4 + SIZE*SIZE/4];\ - uint8_t * const halfH= (uint8_t*)temp;\ - uint8_t * const halfHV= ((uint8_t*)temp) + SIZE*SIZE;\ - int16_t * const tmp= ((int16_t*)temp) + SIZE*SIZE;\ - put_h264_qpel ## SIZE ## _h_lowpass_ ## MMX(halfH, src + stride, SIZE, stride);\ + uint64_t temp[SIZE*(SIZE<8?12:24)/4 + SIZE*SIZE/8];\ + uint8_t * const halfHV= (uint8_t*)temp;\ + int16_t * const tmp= ((int16_t*)temp) + SIZE*SIZE/2;\ put_h264_qpel ## SIZE ## _hv_lowpass_ ## MMX(halfHV, tmp, src, SIZE, SIZE, stride);\ - OPNAME ## pixels ## SIZE ## _l2_ ## MMX(dst, halfH, halfHV, stride, SIZE, SIZE);\ + OPNAME ## h264_qpel ## SIZE ## _h_lowpass_l2_ ## MMX(dst, src+stride, halfHV, stride, SIZE);\ }\ \ static void OPNAME ## h264_qpel ## SIZE ## _mc12_ ## MMX(uint8_t *dst, uint8_t *src, int stride){\ - uint64_t temp[SIZE*(SIZE+8)/4 + SIZE*SIZE/4];\ - uint8_t * const halfV= (uint8_t*)temp;\ - uint8_t * const halfHV= ((uint8_t*)temp) + SIZE*SIZE;\ - int16_t * const tmp= ((int16_t*)temp) + SIZE*SIZE;\ - put_h264_qpel ## SIZE ## _v_lowpass_ ## MMX(halfV, src, SIZE, stride);\ - put_h264_qpel ## SIZE ## _hv_lowpass_ ## MMX(halfHV, tmp, src, SIZE, SIZE, stride);\ - OPNAME ## pixels ## SIZE ## _l2_ ## MMX(dst, halfV, halfHV, stride, SIZE, SIZE);\ + uint64_t temp[SIZE*(SIZE<8?12:24)/4 + SIZE*SIZE/8];\ + int16_t * const halfV= ((int16_t*)temp) + SIZE*SIZE/2;\ + uint8_t * const halfHV= ((uint8_t*)temp);\ + put_h264_qpel ## SIZE ## _hv_lowpass_ ## MMX(halfHV, halfV, src, SIZE, SIZE, stride);\ + OPNAME ## pixels ## SIZE ## _l2_shift5_ ## MMX(dst, halfV+2, halfHV, stride, SIZE, SIZE);\ }\ \ static void OPNAME ## h264_qpel ## SIZE ## _mc32_ ## MMX(uint8_t *dst, uint8_t *src, int stride){\ - uint64_t temp[SIZE*(SIZE+8)/4 + SIZE*SIZE/4];\ - uint8_t * const halfV= (uint8_t*)temp;\ - uint8_t * const halfHV= ((uint8_t*)temp) + SIZE*SIZE;\ - int16_t * const tmp= ((int16_t*)temp) + SIZE*SIZE;\ - put_h264_qpel ## SIZE ## _v_lowpass_ ## MMX(halfV, src+1, SIZE, stride);\ - put_h264_qpel ## SIZE ## _hv_lowpass_ ## MMX(halfHV, tmp, src, SIZE, SIZE, stride);\ - OPNAME ## pixels ## SIZE ## _l2_ ## MMX(dst, halfV, halfHV, stride, SIZE, SIZE);\ + uint64_t temp[SIZE*(SIZE<8?12:24)/4 + SIZE*SIZE/8];\ + int16_t * const halfV= ((int16_t*)temp) + SIZE*SIZE/2;\ + uint8_t * const halfHV= ((uint8_t*)temp);\ + put_h264_qpel ## SIZE ## _hv_lowpass_ ## MMX(halfHV, halfV, src, SIZE, SIZE, stride);\ + OPNAME ## pixels ## SIZE ## _l2_shift5_ ## MMX(dst, halfV+3, halfHV, stride, SIZE, SIZE);\ }\ -#define PUT_OP(a,b,temp, size) "mov" #size " " #a ", " #b " \n\t" #define AVG_3DNOW_OP(a,b,temp, size) \ "mov" #size " " #b ", " #temp " \n\t"\ "pavgusb " #temp ", " #a " \n\t"\ @@ -872,10 +1263,14 @@ static void OPNAME ## h264_qpel ## SIZE ## _mc32_ ## MMX(uint8_t *dst, uint8_t * "pavgb " #temp ", " #a " \n\t"\ "mov" #size " " #a ", " #b " \n\t" +#define PAVGB "pavgusb" QPEL_H264(put_, PUT_OP, 3dnow) QPEL_H264(avg_, AVG_3DNOW_OP, 3dnow) +#undef PAVGB +#define PAVGB "pavgb" QPEL_H264(put_, PUT_OP, mmx2) QPEL_H264(avg_, AVG_MMX2_OP, mmx2) +#undef PAVGB H264_MC(put_, 4, 3dnow) H264_MC(put_, 8, 3dnow) @@ -895,12 +1290,14 @@ H264_MC(avg_, 16,mmx2) #define H264_CHROMA_OP4(S,D,T) #define H264_CHROMA_MC8_TMPL put_h264_chroma_mc8_mmx #define H264_CHROMA_MC4_TMPL put_h264_chroma_mc4_mmx +#define H264_CHROMA_MC2_TMPL put_h264_chroma_mc2_mmx2 #define H264_CHROMA_MC8_MV0 put_pixels8_mmx #include "dsputil_h264_template_mmx.c" #undef H264_CHROMA_OP #undef H264_CHROMA_OP4 #undef H264_CHROMA_MC8_TMPL #undef H264_CHROMA_MC4_TMPL +#undef H264_CHROMA_MC2_TMPL #undef H264_CHROMA_MC8_MV0 #define H264_CHROMA_OP(S,D) "pavgb " #S ", " #D " \n\t" @@ -908,12 +1305,14 @@ H264_MC(avg_, 16,mmx2) "pavgb " #T ", " #D " \n\t" #define H264_CHROMA_MC8_TMPL avg_h264_chroma_mc8_mmx2 #define H264_CHROMA_MC4_TMPL avg_h264_chroma_mc4_mmx2 +#define H264_CHROMA_MC2_TMPL avg_h264_chroma_mc2_mmx2 #define H264_CHROMA_MC8_MV0 avg_pixels8_mmx2 #include "dsputil_h264_template_mmx.c" #undef H264_CHROMA_OP #undef H264_CHROMA_OP4 #undef H264_CHROMA_MC8_TMPL #undef H264_CHROMA_MC4_TMPL +#undef H264_CHROMA_MC2_TMPL #undef H264_CHROMA_MC8_MV0 #define H264_CHROMA_OP(S,D) "pavgusb " #S ", " #D " \n\t" diff --git a/src/libffmpeg/libavcodec/i386/idct_mmx_xvid.c b/src/libffmpeg/libavcodec/i386/idct_mmx_xvid.c index 7bc6f5f78..a55d4ea07 100644 --- a/src/libffmpeg/libavcodec/i386/idct_mmx_xvid.c +++ b/src/libffmpeg/libavcodec/i386/idct_mmx_xvid.c @@ -20,7 +20,7 @@ // * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA // // * -// * $Id: idct_mmx_xvid.c,v 1.2 2006/02/05 14:11:36 miguelfreitas Exp $ +// * $Id: idct_mmx_xvid.c,v 1.3 2006/08/02 07:02:41 tmmm Exp $ // * // ***************************************************************************/ diff --git a/src/libffmpeg/libavcodec/i386/mmx.h b/src/libffmpeg/libavcodec/i386/mmx.h index df4620e0a..eab051341 100644 --- a/src/libffmpeg/libavcodec/i386/mmx.h +++ b/src/libffmpeg/libavcodec/i386/mmx.h @@ -5,22 +5,6 @@ #ifndef AVCODEC_I386MMX_H #define AVCODEC_I386MMX_H -#ifdef ARCH_X86_64 -# define REG_a "rax" -# define REG_b "rbx" -# define REG_c "rcx" -# define REG_d "rdx" -# define REG_D "rdi" -# define REG_S "rsi" -#else -# define REG_a "eax" -# define REG_b "ebx" -# define REG_c "ecx" -# define REG_d "edx" -# define REG_D "edi" -# define REG_S "esi" -#endif - /* * The type of an value that fits in an MMX register (note that long * long constant values MUST be suffixed by LL and unsigned long long diff --git a/src/libffmpeg/libavcodec/i386/motion_est_mmx.c b/src/libffmpeg/libavcodec/i386/motion_est_mmx.c index c14b79384..edcabcf38 100644 --- a/src/libffmpeg/libavcodec/i386/motion_est_mmx.c +++ b/src/libffmpeg/libavcodec/i386/motion_est_mmx.c @@ -20,7 +20,7 @@ * mostly by Michael Niedermayer <michaelni@gmx.at> */ #include "../dsputil.h" -#include "mmx.h" +#include "x86_cpu.h" static const __attribute__ ((aligned(8))) uint64_t round_tab[3]={ 0x0000000000000000ULL, diff --git a/src/libffmpeg/libavcodec/i386/mpegvideo_mmx.c b/src/libffmpeg/libavcodec/i386/mpegvideo_mmx.c index f83df3a19..c00a602bd 100644 --- a/src/libffmpeg/libavcodec/i386/mpegvideo_mmx.c +++ b/src/libffmpeg/libavcodec/i386/mpegvideo_mmx.c @@ -23,7 +23,7 @@ #include "../dsputil.h" #include "../mpegvideo.h" #include "../avcodec.h" -#include "mmx.h" +#include "x86_cpu.h" extern uint8_t zigzag_direct_noperm[64]; extern uint16_t inv_zigzag_direct16[64]; @@ -699,7 +699,8 @@ void MPV_common_init_mmx(MpegEncContext *s) s->dct_unquantize_h263_inter = dct_unquantize_h263_inter_mmx; s->dct_unquantize_mpeg1_intra = dct_unquantize_mpeg1_intra_mmx; s->dct_unquantize_mpeg1_inter = dct_unquantize_mpeg1_inter_mmx; - s->dct_unquantize_mpeg2_intra = dct_unquantize_mpeg2_intra_mmx; + if(!(s->flags & CODEC_FLAG_BITEXACT)) + s->dct_unquantize_mpeg2_intra = dct_unquantize_mpeg2_intra_mmx; s->dct_unquantize_mpeg2_inter = dct_unquantize_mpeg2_inter_mmx; draw_edges = draw_edges_mmx; diff --git a/src/libffmpeg/libavcodec/i386/mpegvideo_mmx_template.c b/src/libffmpeg/libavcodec/i386/mpegvideo_mmx_template.c index 2c50df232..de2ef08e5 100644 --- a/src/libffmpeg/libavcodec/i386/mpegvideo_mmx_template.c +++ b/src/libffmpeg/libavcodec/i386/mpegvideo_mmx_template.c @@ -19,6 +19,7 @@ */ #undef SPREADW #undef PMAXW +#undef PMAX #ifdef HAVE_MMX2 #define SPREADW(a) "pshufw $0, " #a ", " #a " \n\t" #define PMAXW(a,b) "pmaxsw " #a ", " #b " \n\t" diff --git a/src/libffmpeg/libavcodec/imgconvert.c b/src/libffmpeg/libavcodec/imgconvert.c index 850f9b04f..cc1a825fc 100644 --- a/src/libffmpeg/libavcodec/imgconvert.c +++ b/src/libffmpeg/libavcodec/imgconvert.c @@ -63,7 +63,7 @@ typedef struct PixFmtInfo { } PixFmtInfo; /* this table gives more information about formats */ -static PixFmtInfo pix_fmt_info[PIX_FMT_NB] = { +static const PixFmtInfo pix_fmt_info[PIX_FMT_NB] = { /* YUV formats */ [PIX_FMT_YUV420P] = { .name = "yuv420p", @@ -266,7 +266,7 @@ int avpicture_fill(AVPicture *picture, uint8_t *ptr, int pix_fmt, int width, int height) { int size, w2, h2, size2; - PixFmtInfo *pinfo; + const PixFmtInfo *pinfo; if(avcodec_check_dimensions(NULL, width, height)) goto fail; @@ -359,7 +359,7 @@ fail: int avpicture_layout(const AVPicture* src, int pix_fmt, int width, int height, unsigned char *dest, int dest_size) { - PixFmtInfo* pf = &pix_fmt_info[pix_fmt]; + const PixFmtInfo* pf = &pix_fmt_info[pix_fmt]; int i, j, w, h, data_planes; const unsigned char* s; int size = avpicture_get_size(pix_fmt, width, height); @@ -572,7 +572,7 @@ int avcodec_find_best_pix_fmt(int pix_fmt_mask, int src_pix_fmt, return dst_pix_fmt; } -static void img_copy_plane(uint8_t *dst, int dst_wrap, +void ff_img_copy_plane(uint8_t *dst, int dst_wrap, const uint8_t *src, int src_wrap, int width, int height) { @@ -592,7 +592,7 @@ void img_copy(AVPicture *dst, const AVPicture *src, int pix_fmt, int width, int height) { int bwidth, bits, i; - PixFmtInfo *pf = &pix_fmt_info[pix_fmt]; + const PixFmtInfo *pf = &pix_fmt_info[pix_fmt]; pf = &pix_fmt_info[pix_fmt]; switch(pf->pixel_type) { @@ -612,7 +612,7 @@ void img_copy(AVPicture *dst, const AVPicture *src, break; } bwidth = (width * bits + 7) >> 3; - img_copy_plane(dst->data[0], dst->linesize[0], + ff_img_copy_plane(dst->data[0], dst->linesize[0], src->data[0], src->linesize[0], bwidth, height); break; @@ -626,17 +626,17 @@ void img_copy(AVPicture *dst, const AVPicture *src, h >>= pf->y_chroma_shift; } bwidth = (w * pf->depth + 7) >> 3; - img_copy_plane(dst->data[i], dst->linesize[i], + ff_img_copy_plane(dst->data[i], dst->linesize[i], src->data[i], src->linesize[i], bwidth, h); } break; case FF_PIXEL_PALETTE: - img_copy_plane(dst->data[0], dst->linesize[0], + ff_img_copy_plane(dst->data[0], dst->linesize[0], src->data[0], src->linesize[0], width, height); /* copy the palette */ - img_copy_plane(dst->data[1], dst->linesize[1], + ff_img_copy_plane(dst->data[1], dst->linesize[1], src->data[1], src->linesize[1], 4, 256); break; @@ -1210,7 +1210,7 @@ static void shrink12(uint8_t *dst, int dst_wrap, } /* 2x2 -> 1x1 */ -static void shrink22(uint8_t *dst, int dst_wrap, +void ff_shrink22(uint8_t *dst, int dst_wrap, const uint8_t *src, int src_wrap, int width, int height) { @@ -1243,7 +1243,7 @@ static void shrink22(uint8_t *dst, int dst_wrap, } /* 4x4 -> 1x1 */ -static void shrink44(uint8_t *dst, int dst_wrap, +void ff_shrink44(uint8_t *dst, int dst_wrap, const uint8_t *src, int src_wrap, int width, int height) { @@ -1273,6 +1273,28 @@ static void shrink44(uint8_t *dst, int dst_wrap, } } +/* 8x8 -> 1x1 */ +void ff_shrink88(uint8_t *dst, int dst_wrap, + const uint8_t *src, int src_wrap, + int width, int height) +{ + int w, i; + + for(;height > 0; height--) { + for(w = width;w > 0; w--) { + int tmp=0; + for(i=0; i<8; i++){ + tmp += src[0] + src[1] + src[2] + src[3] + src[4] + src[5] + src[6] + src[7]; + src += src_wrap; + } + *(dst++) = (tmp + 32)>>6; + src += 8 - 8*src_wrap; + } + src += 8*src_wrap - 8*width; + dst += dst_wrap - width; + } +} + static void grow21_line(uint8_t *dst, const uint8_t *src, int width) { @@ -1701,7 +1723,7 @@ typedef struct ConvertEntry { The other conversion functions are just optimisations for common cases. */ -static ConvertEntry convert_table[PIX_FMT_NB][PIX_FMT_NB] = { +static const ConvertEntry convert_table[PIX_FMT_NB][PIX_FMT_NB] = { [PIX_FMT_YUV420P] = { [PIX_FMT_YUV422] = { .convert = yuv420p_to_yuv422, @@ -1922,7 +1944,7 @@ static ConvertEntry convert_table[PIX_FMT_NB][PIX_FMT_NB] = { int avpicture_alloc(AVPicture *picture, int pix_fmt, int width, int height) { - unsigned int size; + int size; void *ptr; size = avpicture_get_size(pix_fmt, width, height); @@ -1944,13 +1966,88 @@ void avpicture_free(AVPicture *picture) } /* return true if yuv planar */ -static inline int is_yuv_planar(PixFmtInfo *ps) +static inline int is_yuv_planar(const PixFmtInfo *ps) { return (ps->color_type == FF_COLOR_YUV || ps->color_type == FF_COLOR_YUV_JPEG) && ps->pixel_type == FF_PIXEL_PLANAR; } +/** + * Crop image top and left side + */ +int img_crop(AVPicture *dst, const AVPicture *src, + int pix_fmt, int top_band, int left_band) +{ + int y_shift; + int x_shift; + + if (pix_fmt < 0 || pix_fmt >= PIX_FMT_NB || !is_yuv_planar(&pix_fmt_info[pix_fmt])) + return -1; + + y_shift = pix_fmt_info[pix_fmt].y_chroma_shift; + x_shift = pix_fmt_info[pix_fmt].x_chroma_shift; + + dst->data[0] = src->data[0] + (top_band * src->linesize[0]) + left_band; + dst->data[1] = src->data[1] + ((top_band >> y_shift) * src->linesize[1]) + (left_band >> x_shift); + dst->data[2] = src->data[2] + ((top_band >> y_shift) * src->linesize[2]) + (left_band >> x_shift); + + dst->linesize[0] = src->linesize[0]; + dst->linesize[1] = src->linesize[1]; + dst->linesize[2] = src->linesize[2]; + return 0; +} + +/** + * Pad image + */ +int img_pad(AVPicture *dst, const AVPicture *src, int height, int width, int pix_fmt, + int padtop, int padbottom, int padleft, int padright, int *color) +{ + uint8_t *optr, *iptr; + int y_shift; + int x_shift; + int yheight; + int i, y; + + if (pix_fmt < 0 || pix_fmt >= PIX_FMT_NB || !is_yuv_planar(&pix_fmt_info[pix_fmt])) + return -1; + + for (i = 0; i < 3; i++) { + x_shift = i ? pix_fmt_info[pix_fmt].x_chroma_shift : 0; + y_shift = i ? pix_fmt_info[pix_fmt].y_chroma_shift : 0; + + if (padtop || padleft) { + memset(dst->data[i], color[i], dst->linesize[i] * (padtop >> y_shift) + (padleft >> x_shift)); + } + + if (padleft || padright || src) { + if (src) { /* first line */ + iptr = src->data[i]; + optr = dst->data[i] + dst->linesize[i] * (padtop >> y_shift) + (padleft >> x_shift); + memcpy(optr, iptr, src->linesize[i]); + iptr += src->linesize[i]; + } + optr = dst->data[i] + dst->linesize[i] * (padtop >> y_shift) + (dst->linesize[i] - (padright >> x_shift)); + yheight = (height - 1 - (padtop + padbottom)) >> y_shift; + for (y = 0; y < yheight; y++) { + memset(optr, color[i], (padleft + padright) >> x_shift); + if (src) { + memcpy(optr + ((padleft + padright) >> x_shift), iptr, src->linesize[i]); + iptr += src->linesize[i]; + } + optr += dst->linesize[i]; + } + } + + if (padbottom || padright) { + optr = dst->data[i] + dst->linesize[i] * ((height - padbottom) >> y_shift) - (padright >> x_shift); + memset(optr, color[i], dst->linesize[i] * (padbottom >> y_shift) + (padright >> x_shift)); + } + } + return 0; +} + /* XXX: always use linesize. Return -1 if not supported */ int img_convert(AVPicture *dst, int dst_pix_fmt, const AVPicture *src, int src_pix_fmt, @@ -1958,8 +2055,8 @@ int img_convert(AVPicture *dst, int dst_pix_fmt, { static int inited; int i, ret, dst_width, dst_height, int_pix_fmt; - PixFmtInfo *src_pix, *dst_pix; - ConvertEntry *ce; + const PixFmtInfo *src_pix, *dst_pix; + const ConvertEntry *ce; AVPicture tmp1, *tmp = &tmp1; if (src_pix_fmt < 0 || src_pix_fmt >= PIX_FMT_NB || @@ -1998,7 +2095,7 @@ int img_convert(AVPicture *dst, int dst_pix_fmt, uint8_t *d; if (dst_pix->color_type == FF_COLOR_YUV_JPEG) { - img_copy_plane(dst->data[0], dst->linesize[0], + ff_img_copy_plane(dst->data[0], dst->linesize[0], src->data[0], src->linesize[0], dst_width, dst_height); } else { @@ -2026,7 +2123,7 @@ int img_convert(AVPicture *dst, int dst_pix_fmt, if (is_yuv_planar(src_pix) && dst_pix_fmt == PIX_FMT_GRAY8) { if (src_pix->color_type == FF_COLOR_YUV_JPEG) { - img_copy_plane(dst->data[0], dst->linesize[0], + ff_img_copy_plane(dst->data[0], dst->linesize[0], src->data[0], src->linesize[0], dst_width, dst_height); } else { @@ -2064,7 +2161,7 @@ int img_convert(AVPicture *dst, int dst_pix_fmt, YUV444 format */ switch(xy_shift) { case 0x00: - resize_func = img_copy_plane; + resize_func = ff_img_copy_plane; break; case 0x10: resize_func = shrink21; @@ -2076,10 +2173,10 @@ int img_convert(AVPicture *dst, int dst_pix_fmt, resize_func = shrink12; break; case 0x11: - resize_func = shrink22; + resize_func = ff_shrink22; break; case 0x22: - resize_func = shrink44; + resize_func = ff_shrink44; break; case 0xf0: resize_func = grow21; @@ -2101,7 +2198,7 @@ int img_convert(AVPicture *dst, int dst_pix_fmt, goto no_chroma_filter; } - img_copy_plane(dst->data[0], dst->linesize[0], + ff_img_copy_plane(dst->data[0], dst->linesize[0], src->data[0], src->linesize[0], dst_width, dst_height); @@ -2226,7 +2323,7 @@ static int get_alpha_info_pal8(const AVPicture *src, int width, int height) int img_get_alpha_info(const AVPicture *src, int pix_fmt, int width, int height) { - PixFmtInfo *pf = &pix_fmt_info[pix_fmt]; + const PixFmtInfo *pf = &pix_fmt_info[pix_fmt]; int ret; pf = &pix_fmt_info[pix_fmt]; diff --git a/src/libffmpeg/libavcodec/imgresample.c b/src/libffmpeg/libavcodec/imgresample.c index 906fde3f2..8ffcd7960 100644 --- a/src/libffmpeg/libavcodec/imgresample.c +++ b/src/libffmpeg/libavcodec/imgresample.c @@ -23,6 +23,7 @@ */ #include "avcodec.h" +#include "swscale.h" #include "dsputil.h" #ifdef USE_FASTMEMCPY @@ -630,6 +631,143 @@ void img_resample_close(ImgReSampleContext *s) av_free(s); } +struct SwsContext *sws_getContext(int srcW, int srcH, int srcFormat, + int dstW, int dstH, int dstFormat, + int flags, SwsFilter *srcFilter, + SwsFilter *dstFilter, double *param) +{ + struct SwsContext *ctx; + + ctx = av_malloc(sizeof(struct SwsContext)); + if (ctx == NULL) { + av_log(NULL, AV_LOG_ERROR, "Cannot allocate a resampling context!\n"); + + return NULL; + } + + if ((srcH != dstH) || (srcW != dstW)) { + if ((srcFormat != PIX_FMT_YUV420P) || (dstFormat != PIX_FMT_YUV420P)) { + av_log(NULL, AV_LOG_INFO, "PIX_FMT_YUV420P will be used as an intermediate format for rescaling\n"); + } + ctx->resampling_ctx = img_resample_init(dstW, dstH, srcW, srcH); + } else { + ctx->resampling_ctx = av_malloc(sizeof(ImgReSampleContext)); + ctx->resampling_ctx->iheight = srcH; + ctx->resampling_ctx->iwidth = srcW; + ctx->resampling_ctx->oheight = dstH; + ctx->resampling_ctx->owidth = dstW; + } + ctx->src_pix_fmt = srcFormat; + ctx->dst_pix_fmt = dstFormat; + + return ctx; +} + +void sws_freeContext(struct SwsContext *ctx) +{ + if ((ctx->resampling_ctx->iwidth != ctx->resampling_ctx->owidth) || + (ctx->resampling_ctx->iheight != ctx->resampling_ctx->oheight)) { + img_resample_close(ctx->resampling_ctx); + } else { + av_free(ctx->resampling_ctx); + } + av_free(ctx); +} + +int sws_scale(struct SwsContext *ctx, uint8_t* src[], int srcStride[], + int srcSliceY, int srcSliceH, uint8_t* dst[], int dstStride[]) +{ + AVPicture src_pict, dst_pict; + int i, res = 0; + AVPicture picture_format_temp; + AVPicture picture_resample_temp, *formatted_picture, *resampled_picture; + uint8_t *buf1 = NULL, *buf2 = NULL; + enum PixelFormat current_pix_fmt; + + for (i = 0; i < 3; i++) { + src_pict.data[i] = src[i]; + src_pict.linesize[i] = srcStride[i]; + dst_pict.data[i] = dst[i]; + dst_pict.linesize[i] = dstStride[i]; + } + if ((ctx->resampling_ctx->iwidth != ctx->resampling_ctx->owidth) || + (ctx->resampling_ctx->iheight != ctx->resampling_ctx->oheight)) { + /* We have to rescale the picture, but only YUV420P rescaling is supported... */ + + if (ctx->src_pix_fmt != PIX_FMT_YUV420P) { + int size; + + /* create temporary picture for rescaling input*/ + size = avpicture_get_size(PIX_FMT_YUV420P, ctx->resampling_ctx->iwidth, ctx->resampling_ctx->iheight); + buf1 = av_malloc(size); + if (!buf1) { + res = -1; + goto the_end; + } + formatted_picture = &picture_format_temp; + avpicture_fill((AVPicture*)formatted_picture, buf1, + PIX_FMT_YUV420P, ctx->resampling_ctx->iwidth, ctx->resampling_ctx->iheight); + + if (img_convert((AVPicture*)formatted_picture, PIX_FMT_YUV420P, + &src_pict, ctx->src_pix_fmt, + ctx->resampling_ctx->iwidth, ctx->resampling_ctx->iheight) < 0) { + + av_log(NULL, AV_LOG_ERROR, "pixel format conversion not handled\n"); + res = -1; + goto the_end; + } + } else { + formatted_picture = &src_pict; + } + + if (ctx->dst_pix_fmt != PIX_FMT_YUV420P) { + int size; + + /* create temporary picture for rescaling output*/ + size = avpicture_get_size(PIX_FMT_YUV420P, ctx->resampling_ctx->owidth, ctx->resampling_ctx->oheight); + buf2 = av_malloc(size); + if (!buf2) { + res = -1; + goto the_end; + } + resampled_picture = &picture_resample_temp; + avpicture_fill((AVPicture*)resampled_picture, buf2, + PIX_FMT_YUV420P, ctx->resampling_ctx->owidth, ctx->resampling_ctx->oheight); + + } else { + resampled_picture = &dst_pict; + } + + /* ...and finally rescale!!! */ + img_resample(ctx->resampling_ctx, resampled_picture, formatted_picture); + current_pix_fmt = PIX_FMT_YUV420P; + } else { + resampled_picture = &src_pict; + current_pix_fmt = ctx->src_pix_fmt; + } + + if (current_pix_fmt != ctx->dst_pix_fmt) { + if (img_convert(&dst_pict, ctx->dst_pix_fmt, + resampled_picture, current_pix_fmt, + ctx->resampling_ctx->owidth, ctx->resampling_ctx->oheight) < 0) { + + av_log(NULL, AV_LOG_ERROR, "pixel format conversion not handled\n"); + + res = -1; + goto the_end; + } + } else if (resampled_picture != &dst_pict) { + img_copy(&dst_pict, resampled_picture, current_pix_fmt, + ctx->resampling_ctx->owidth, ctx->resampling_ctx->oheight); + } + +the_end: + av_free(buf1); + av_free(buf2); + return res; +} + + #ifdef TEST #include <stdio.h> diff --git a/src/libffmpeg/libavcodec/interplayvideo.c b/src/libffmpeg/libavcodec/interplayvideo.c index 588485a0a..73165e795 100644 --- a/src/libffmpeg/libavcodec/interplayvideo.c +++ b/src/libffmpeg/libavcodec/interplayvideo.c @@ -49,11 +49,7 @@ #if DEBUG_INTERPLAY #define debug_interplay(x,...) av_log(NULL, AV_LOG_DEBUG, x, __VA_ARGS__) #else -static inline void -#ifdef __GNUC__ -__attribute__ ((__format__ (__printf__, 1, 2))) -#endif -debug_interplay(const char *format, ...) { } +static inline void debug_interplay(const char *format, ...) { } #endif typedef struct IpvideoContext { diff --git a/src/libffmpeg/libavcodec/mjpeg.c b/src/libffmpeg/libavcodec/mjpeg.c index 951a622ee..dffd98946 100644 --- a/src/libffmpeg/libavcodec/mjpeg.c +++ b/src/libffmpeg/libavcodec/mjpeg.c @@ -894,14 +894,24 @@ typedef struct MJpegDecodeContext { static int mjpeg_decode_dht(MJpegDecodeContext *s); static int build_vlc(VLC *vlc, const uint8_t *bits_table, const uint8_t *val_table, - int nb_codes, int use_static) + int nb_codes, int use_static, int is_ac) { - uint8_t huff_size[256]; - uint16_t huff_code[256]; + uint8_t huff_size[256+16]; + uint16_t huff_code[256+16]; + + assert(nb_codes <= 256); memset(huff_size, 0, sizeof(huff_size)); build_huffman_codes(huff_size, huff_code, bits_table, val_table); + if(is_ac){ + memmove(huff_size+16, huff_size, sizeof(uint8_t)*nb_codes); + memmove(huff_code+16, huff_code, sizeof(uint16_t)*nb_codes); + memset(huff_size, 0, sizeof(uint8_t)*16); + memset(huff_code, 0, sizeof(uint16_t)*16); + nb_codes += 16; + } + return init_vlc(vlc, 9, nb_codes, huff_size, 1, 1, huff_code, 2, 2, use_static); } @@ -930,10 +940,10 @@ static int mjpeg_decode_init(AVCodecContext *avctx) s->first_picture = 1; s->org_height = avctx->coded_height; - build_vlc(&s->vlcs[0][0], bits_dc_luminance, val_dc_luminance, 12, 0); - build_vlc(&s->vlcs[0][1], bits_dc_chrominance, val_dc_chrominance, 12, 0); - build_vlc(&s->vlcs[1][0], bits_ac_luminance, val_ac_luminance, 251, 0); - build_vlc(&s->vlcs[1][1], bits_ac_chrominance, val_ac_chrominance, 251, 0); + build_vlc(&s->vlcs[0][0], bits_dc_luminance, val_dc_luminance, 12, 0, 0); + build_vlc(&s->vlcs[0][1], bits_dc_chrominance, val_dc_chrominance, 12, 0, 0); + build_vlc(&s->vlcs[1][0], bits_ac_luminance, val_ac_luminance, 251, 0, 1); + build_vlc(&s->vlcs[1][1], bits_ac_chrominance, val_ac_chrominance, 251, 0, 1); if (avctx->flags & CODEC_FLAG_EXTERN_HUFF) { @@ -1084,7 +1094,7 @@ static int mjpeg_decode_dht(MJpegDecodeContext *s) free_vlc(&s->vlcs[class][index]); dprintf("class=%d index=%d nb_codes=%d\n", class, index, code_max + 1); - if(build_vlc(&s->vlcs[class][index], bits_table, val_table, code_max + 1, 0) < 0){ + if(build_vlc(&s->vlcs[class][index], bits_table, val_table, code_max + 1, 0, class > 0) < 0){ return -1; } } @@ -1246,11 +1256,9 @@ static inline int mjpeg_decode_dc(MJpegDecodeContext *s, int dc_index) /* decode block and dequantize */ static int decode_block(MJpegDecodeContext *s, DCTELEM *block, - int component, int dc_index, int ac_index, int quant_index) + int component, int dc_index, int ac_index, int16_t *quant_matrix) { int code, i, j, level, val; - VLC *ac_vlc; - int16_t *quant_matrix; /* DC coef */ val = mjpeg_decode_dc(s, dc_index); @@ -1258,39 +1266,48 @@ static int decode_block(MJpegDecodeContext *s, DCTELEM *block, dprintf("error dc\n"); return -1; } - quant_matrix = s->quant_matrixes[quant_index]; val = val * quant_matrix[0] + s->last_dc[component]; s->last_dc[component] = val; block[0] = val; /* AC coefs */ - ac_vlc = &s->vlcs[1][ac_index]; - i = 1; + i = 0; + {OPEN_READER(re, &s->gb) for(;;) { - code = get_vlc2(&s->gb, s->vlcs[1][ac_index].table, 9, 2); + UPDATE_CACHE(re, &s->gb); + GET_VLC(code, re, &s->gb, s->vlcs[1][ac_index].table, 9, 2) - if (code < 0) { - dprintf("error ac\n"); - return -1; - } /* EOB */ - if (code == 0) + if (code == 0x10) break; - if (code == 0xf0) { - i += 16; - } else { - level = get_xbits(&s->gb, code & 0xf); - i += code >> 4; - if (i >= 64) { + i += ((unsigned)code) >> 4; + if(code != 0x100){ + code &= 0xf; + if(code > MIN_CACHE_BITS - 16){ + UPDATE_CACHE(re, &s->gb) + } + { + int cache=GET_CACHE(re,&s->gb); + int sign=(~cache)>>31; + level = (NEG_USR32(sign ^ cache,code) ^ sign) - sign; + } + + LAST_SKIP_BITS(re, &s->gb, code) + + if (i >= 63) { + if(i == 63){ + j = s->scantable.permutated[63]; + block[j] = level * quant_matrix[j]; + break; + } dprintf("error count: %d\n", i); return -1; } j = s->scantable.permutated[i]; block[j] = level * quant_matrix[j]; - i++; - if (i >= 64) - break; } } + CLOSE_READER(re, &s->gb)} + return 0; } @@ -1467,7 +1484,7 @@ static int mjpeg_decode_scan(MJpegDecodeContext *s){ memset(s->block, 0, sizeof(s->block)); if (decode_block(s, s->block, i, s->dc_index[i], s->ac_index[i], - s->quant_index[c]) < 0) { + s->quant_matrixes[ s->quant_index[c] ]) < 0) { dprintf("error y=%d x=%d\n", mb_y, mb_x); return -1; } diff --git a/src/libffmpeg/libavcodec/motion_est.c b/src/libffmpeg/libavcodec/motion_est.c index 991be55d0..c587369f3 100644 --- a/src/libffmpeg/libavcodec/motion_est.c +++ b/src/libffmpeg/libavcodec/motion_est.c @@ -368,7 +368,7 @@ static int full_motion_search(MpegEncContext * s, #if 0 if (*mx_ptr < -(2 * range) || *mx_ptr >= (2 * range) || *my_ptr < -(2 * range) || *my_ptr >= (2 * range)) { - fprintf(stderr, "error %d %d\n", *mx_ptr, *my_ptr); + av_log(NULL, AV_LOG_ERROR, "error %d %d\n", *mx_ptr, *my_ptr); } #endif return dmin; @@ -904,6 +904,8 @@ static int interlaced_search(MpegEncContext *s, int ref_index, int16_t (*mv_table)[2]= mv_tables[block][field_select]; if(user_field_select){ + assert(field_select==0 || field_select==1); + assert(field_select_tables[block][xy]==0 || field_select_tables[block][xy]==1); if(field_select_tables[block][xy] != field_select) continue; } diff --git a/src/libffmpeg/libavcodec/motion_est_template.c b/src/libffmpeg/libavcodec/motion_est_template.c index 23ead283c..16d34bb88 100644 --- a/src/libffmpeg/libavcodec/motion_est_template.c +++ b/src/libffmpeg/libavcodec/motion_est_template.c @@ -896,7 +896,8 @@ static always_inline int epzs_motion_search_internal(MpegEncContext * s, int *mx CHECK_CLIPED_MV((last_mv[ref_mv_xy][0]*ref_mv_scale + (1<<15))>>16, (last_mv[ref_mv_xy][1]*ref_mv_scale + (1<<15))>>16) }else{ - if(dmin<h*h && ( P_LEFT[0] |P_LEFT[1] + if(dmin<((h*h*s->avctx->mv0_threshold)>>8) + && ( P_LEFT[0] |P_LEFT[1] |P_TOP[0] |P_TOP[1] |P_TOPRIGHT[0]|P_TOPRIGHT[1])==0){ *mx_ptr= 0; diff --git a/src/libffmpeg/libavcodec/mpeg12.c b/src/libffmpeg/libavcodec/mpeg12.c index ddecae85d..c268cf707 100644 --- a/src/libffmpeg/libavcodec/mpeg12.c +++ b/src/libffmpeg/libavcodec/mpeg12.c @@ -101,8 +101,8 @@ const enum PixelFormat pixfmt_xvmc_mpg2_420[] = { static uint8_t (*mv_penalty)[MAX_MV*2+1]= NULL; static uint8_t fcode_tab[MAX_MV*2+1]; -static uint32_t uni_mpeg1_ac_vlc_bits[64*64*2]; static uint8_t uni_mpeg1_ac_vlc_len [64*64*2]; +static uint8_t uni_mpeg2_ac_vlc_len [64*64*2]; /* simple include everything table for dc, first byte is bits number next 3 are code*/ static uint32_t mpeg1_lum_dc_uni[512]; @@ -155,7 +155,7 @@ static void init_2d_vlc_rl(RLTable *rl, int use_static) } #ifdef CONFIG_ENCODERS -static void init_uni_ac_vlc(RLTable *rl, uint32_t *uni_ac_vlc_bits, uint8_t *uni_ac_vlc_len){ +static void init_uni_ac_vlc(RLTable *rl, uint8_t *uni_ac_vlc_len){ int i; for(i=0; i<128; i++){ @@ -174,11 +174,11 @@ static void init_uni_ac_vlc(RLTable *rl, uint32_t *uni_ac_vlc_bits, uint8_t *uni if (code < 111 /* rl->n */) { /* store the vlc & sign at once */ - len= mpeg1_vlc[code][1]+1; - bits= (mpeg1_vlc[code][0]<<1) + sign; + len= rl->table_vlc[code][1]+1; + bits= (rl->table_vlc[code][0]<<1) + sign; } else { - len= mpeg1_vlc[111/*rl->n*/][1]+6; - bits= mpeg1_vlc[111/*rl->n*/][0]<<6; + len= rl->table_vlc[111/*rl->n*/][1]+6; + bits= rl->table_vlc[111/*rl->n*/][0]<<6; bits|= run; if (alevel < 128) { @@ -195,7 +195,6 @@ static void init_uni_ac_vlc(RLTable *rl, uint32_t *uni_ac_vlc_bits, uint8_t *uni } } - uni_ac_vlc_bits[UNI_AC_ENC_INDEX(run, i)]= bits; uni_ac_vlc_len [UNI_AC_ENC_INDEX(run, i)]= len; } } @@ -208,7 +207,7 @@ static int find_frame_rate_index(MpegEncContext *s){ int64_t d; for(i=1;i<14;i++) { - int64_t n0= 1001LL/frame_rate_tab[i].den*frame_rate_tab[i].num*s->avctx->time_base.num; + int64_t n0= 1001LL/ff_frame_rate_tab[i].den*ff_frame_rate_tab[i].num*s->avctx->time_base.num; int64_t n1= 1001LL*s->avctx->time_base.den; if(s->avctx->strict_std_compliance > FF_COMPLIANCE_INOFFICIAL && i>=9) break; @@ -240,6 +239,12 @@ static int encode_init(AVCodecContext *avctx) } } + if(avctx->profile == FF_PROFILE_UNKNOWN) + avctx->profile = s->chroma_format == CHROMA_420 ? 4 : 0; + + if(avctx->level == FF_LEVEL_UNKNOWN) + avctx->level = s->chroma_format == CHROMA_420 ? 8 : 5; + return 0; } @@ -264,7 +269,7 @@ static void mpeg1_encode_sequence_header(MpegEncContext *s) if(aspect_ratio==0.0) aspect_ratio= 1.0; //pixel aspect 1:1 (VGA) if (s->current_picture.key_frame) { - AVRational framerate= frame_rate_tab[s->frame_rate_index]; + AVRational framerate= ff_frame_rate_tab[s->frame_rate_index]; /* mpeg1 header repeated every gop */ put_header(s, SEQ_START_CODE); @@ -327,22 +332,14 @@ static void mpeg1_encode_sequence_header(MpegEncContext *s) if(s->codec_id == CODEC_ID_MPEG2VIDEO){ put_header(s, EXT_START_CODE); put_bits(&s->pb, 4, 1); //seq ext - put_bits(&s->pb, 1, 0); //esc - if(s->avctx->profile == FF_PROFILE_UNKNOWN){ - put_bits(&s->pb, 3, 4); //profile - }else{ - put_bits(&s->pb, 3, s->avctx->profile); //profile - } + put_bits(&s->pb, 1, s->chroma_format == CHROMA_422); //escx - if(s->avctx->level == FF_LEVEL_UNKNOWN){ - put_bits(&s->pb, 4, 8); //level - }else{ - put_bits(&s->pb, 4, s->avctx->level); //level - } + put_bits(&s->pb, 3, s->avctx->profile); //profile + put_bits(&s->pb, 4, s->avctx->level); //level put_bits(&s->pb, 1, s->progressive_sequence); - put_bits(&s->pb, 2, 1); //chroma format 4:2:0 + put_bits(&s->pb, 2, s->chroma_format); put_bits(&s->pb, 2, 0); //horizontal size ext put_bits(&s->pb, 2, 0); //vertical size ext put_bits(&s->pb, 12, v>>18); //bitrate ext @@ -476,7 +473,7 @@ void mpeg1_encode_picture_header(MpegEncContext *s, int picture_number) put_bits(&s->pb, 1, s->alternate_scan); put_bits(&s->pb, 1, s->repeat_first_field); s->progressive_frame = s->progressive_sequence; - put_bits(&s->pb, 1, s->chroma_420_type=s->progressive_frame); + put_bits(&s->pb, 1, s->chroma_format == CHROMA_420 ? s->progressive_frame : 0); /* chroma_420_type */ put_bits(&s->pb, 1, s->progressive_frame); put_bits(&s->pb, 1, 0); //composite_display_flag } @@ -504,9 +501,10 @@ static inline void put_mb_modes(MpegEncContext *s, int n, int bits, } } -void mpeg1_encode_mb(MpegEncContext *s, - DCTELEM block[6][64], - int motion_x, int motion_y) +static always_inline void mpeg1_encode_mb_internal(MpegEncContext *s, + DCTELEM block[6][64], + int motion_x, int motion_y, + int mb_block_count) { int i, cbp; const int mb_x = s->mb_x; @@ -515,9 +513,9 @@ void mpeg1_encode_mb(MpegEncContext *s, /* compute cbp */ cbp = 0; - for(i=0;i<6;i++) { + for(i=0;i<mb_block_count;i++) { if (s->block_last_index[i] >= 0) - cbp |= 1 << (5 - i); + cbp |= 1 << (mb_block_count - 1 - i); } if (cbp == 0 && !first_mb && s->mv_type == MV_TYPE_16X16 && @@ -623,8 +621,14 @@ void mpeg1_encode_mb(MpegEncContext *s, } s->mv_bits+= get_bits_diff(s); } - if(cbp) - put_bits(&s->pb, mbPatTable[cbp][1], mbPatTable[cbp][0]); + if(cbp) { + if (s->chroma_y_shift) { + put_bits(&s->pb, mbPatTable[cbp][1], mbPatTable[cbp][0]); + } else { + put_bits(&s->pb, mbPatTable[cbp>>2][1], mbPatTable[cbp>>2][0]); + put_bits(&s->pb, 2, cbp & 3); + } + } s->f_count++; } else{ static const int mb_type_len[4]={0,3,4,2}; //bak,for,bi @@ -702,11 +706,17 @@ void mpeg1_encode_mb(MpegEncContext *s, } } s->mv_bits += get_bits_diff(s); - if(cbp) - put_bits(&s->pb, mbPatTable[cbp][1], mbPatTable[cbp][0]); + if(cbp) { + if (s->chroma_y_shift) { + put_bits(&s->pb, mbPatTable[cbp][1], mbPatTable[cbp][0]); + } else { + put_bits(&s->pb, mbPatTable[cbp>>2][1], mbPatTable[cbp>>2][0]); + put_bits(&s->pb, 2, cbp & 3); + } + } } - for(i=0;i<6;i++) { - if (cbp & (1 << (5 - i))) { + for(i=0;i<mb_block_count;i++) { + if (cbp & (1 << (mb_block_count - 1 - i))) { mpeg1_encode_block(s, block[i], i); } } @@ -718,6 +728,12 @@ void mpeg1_encode_mb(MpegEncContext *s, } } +void mpeg1_encode_mb(MpegEncContext *s, DCTELEM block[6][64], int motion_x, int motion_y) +{ + if (s->chroma_format == CHROMA_420) mpeg1_encode_mb_internal(s, block, motion_x, motion_y, 6); + else mpeg1_encode_mb_internal(s, block, motion_x, motion_y, 8); +} + // RAL: Parameter added: f_or_b_code static void mpeg1_encode_motion(MpegEncContext *s, int val, int f_or_b_code) { @@ -775,6 +791,8 @@ void ff_mpeg1_encode_init(MpegEncContext *s) done=1; init_rl(&rl_mpeg1, 1); + if(s->intra_vlc_format) + init_rl(&rl_mpeg2, 1); for(i=0; i<64; i++) { @@ -782,7 +800,9 @@ void ff_mpeg1_encode_init(MpegEncContext *s) mpeg1_index_run[0][i]= rl_mpeg1.index_run[0][i]; } - init_uni_ac_vlc(&rl_mpeg1, uni_mpeg1_ac_vlc_bits, uni_mpeg1_ac_vlc_len); + init_uni_ac_vlc(&rl_mpeg1, uni_mpeg1_ac_vlc_len); + if(s->intra_vlc_format) + init_uni_ac_vlc(&rl_mpeg2, uni_mpeg2_ac_vlc_len); /* build unified dc encoding tables */ for(i=-255; i<256; i++) @@ -849,9 +869,14 @@ void ff_mpeg1_encode_init(MpegEncContext *s) s->min_qcoeff=-2047; s->max_qcoeff= 2047; } - s->intra_ac_vlc_length= + if (s->intra_vlc_format) { + s->intra_ac_vlc_length= + s->intra_ac_vlc_last_length= uni_mpeg2_ac_vlc_len; + } else { + s->intra_ac_vlc_length= + s->intra_ac_vlc_last_length= uni_mpeg1_ac_vlc_len; + } s->inter_ac_vlc_length= - s->intra_ac_vlc_last_length= s->inter_ac_vlc_last_length= uni_mpeg1_ac_vlc_len; } @@ -898,24 +923,20 @@ static void mpeg1_encode_block(MpegEncContext *s, { int alevel, level, last_non_zero, dc, diff, i, j, run, last_index, sign; int code, component; -// RLTable *rl = &rl_mpeg1; + const uint16_t (*table_vlc)[2] = rl_mpeg1.table_vlc; last_index = s->block_last_index[n]; /* DC coef */ if (s->mb_intra) { - component = (n <= 3 ? 0 : n - 4 + 1); + component = (n <= 3 ? 0 : (n&1) + 1); dc = block[0]; /* overflow is impossible */ diff = dc - s->last_dc[component]; encode_dc(s, diff, component); s->last_dc[component] = dc; i = 1; -/* if (s->intra_vlc_format) - rl = &rl_mpeg2; - else - rl = &rl_mpeg1; -*/ + table_vlc = rl_mpeg2.table_vlc; } else { /* encode the first coefficient : needs to be done here because it is handled slightly differently */ @@ -950,14 +971,13 @@ static void mpeg1_encode_block(MpegEncContext *s, MASK_ABS(sign, alevel) sign&=1; -// code = get_rl_index(rl, 0, run, alevel); if (alevel <= mpeg1_max_level[0][run]){ code= mpeg1_index_run[0][run] + alevel - 1; /* store the vlc & sign at once */ - put_bits(&s->pb, mpeg1_vlc[code][1]+1, (mpeg1_vlc[code][0]<<1) + sign); + put_bits(&s->pb, table_vlc[code][1]+1, (table_vlc[code][0]<<1) + sign); } else { /* escape seems to be pretty rare <5% so i dont optimize it */ - put_bits(&s->pb, mpeg1_vlc[111/*rl->n*/][1], mpeg1_vlc[111/*rl->n*/][0]); + put_bits(&s->pb, table_vlc[111][1], table_vlc[111][0]); /* escape: only clip in this case */ put_bits(&s->pb, 6, run); if(s->codec_id == CODEC_ID_MPEG1VIDEO){ @@ -978,7 +998,7 @@ static void mpeg1_encode_block(MpegEncContext *s, } } /* end of block */ - put_bits(&s->pb, 2, 0x2); + put_bits(&s->pb, table_vlc[112][1], table_vlc[112][0]); } #endif //CONFIG_ENCODERS @@ -2108,8 +2128,8 @@ static int mpeg_decode_postinit(AVCodecContext *avctx){ if(avctx->sub_id==1){//s->codec_id==avctx->codec_id==CODEC_ID //mpeg1 fps - avctx->time_base.den = frame_rate_tab[s->frame_rate_index].num; - avctx->time_base.num= frame_rate_tab[s->frame_rate_index].den; + avctx->time_base.den= ff_frame_rate_tab[s->frame_rate_index].num; + avctx->time_base.num= ff_frame_rate_tab[s->frame_rate_index].den; //mpeg1 aspect avctx->sample_aspect_ratio= av_d2q( 1.0/mpeg1_aspect[s->aspect_ratio_info], 255); @@ -2119,8 +2139,8 @@ static int mpeg_decode_postinit(AVCodecContext *avctx){ av_reduce( &s->avctx->time_base.den, &s->avctx->time_base.num, - frame_rate_tab[s->frame_rate_index].num * s1->frame_rate_ext.num, - frame_rate_tab[s->frame_rate_index].den * s1->frame_rate_ext.den, + ff_frame_rate_tab[s->frame_rate_index].num * s1->frame_rate_ext.num, + ff_frame_rate_tab[s->frame_rate_index].den * s1->frame_rate_ext.den, 1<<30); //mpeg2 aspect if(s->aspect_ratio_info > 1){ @@ -2625,10 +2645,13 @@ static int mpeg_decode_slice(Mpeg1Context *s1, int mb_y, if(s->mb_y<<field_pic >= s->mb_height){ int left= s->gb.size_in_bits - get_bits_count(&s->gb); + int is_d10= s->chroma_format==2 && s->pict_type==I_TYPE && avctx->profile==0 && avctx->level==5 + && s->intra_dc_precision == 2 && s->q_scale_type == 1 && s->alternate_scan == 0 + && s->progressive_frame == 0 /* vbv_delay == 0xBBB || 0xE10*/; - if(left < 0 || (left && show_bits(&s->gb, FFMIN(left, 23))) + if(left < 0 || (left && show_bits(&s->gb, FFMIN(left, 23)) && !is_d10) || (avctx->error_resilience >= FF_ER_AGGRESSIVE && left>8)){ - av_log(avctx, AV_LOG_ERROR, "end mismatch left=%d\n", left); + av_log(avctx, AV_LOG_ERROR, "end mismatch left=%d %0X\n", left, show_bits(&s->gb, FFMIN(left, 23))); return -1; }else goto eos; @@ -2793,12 +2816,10 @@ static int mpeg1_decode_sequence(AVCodecContext *avctx, s->chroma_intra_matrix[j] = v; } #ifdef DEBUG -/* dprintf("intra matrix present\n"); for(i=0;i<64;i++) dprintf(" %d", s->intra_matrix[s->dsp.idct_permutation[i]]); - printf("\n"); -*/ + dprintf("\n"); #endif } else { for(i=0;i<64;i++) { @@ -2820,12 +2841,10 @@ static int mpeg1_decode_sequence(AVCodecContext *avctx, s->chroma_inter_matrix[j] = v; } #ifdef DEBUG -/* dprintf("non intra matrix present\n"); for(i=0;i<64;i++) dprintf(" %d", s->inter_matrix[s->dsp.idct_permutation[i]]); - printf("\n"); -*/ + dprintf("\n"); #endif } else { for(i=0;i<64;i++) { @@ -3125,7 +3144,7 @@ static int mpeg_decode_frame(AVCodecContext *avctx, /* skip b frames if we dont have reference frames */ if(s2->pict_type==B_TYPE) break; /* skip P frames if we dont have reference frame no valid header */ - if(s2->pict_type==P_TYPE && !s2->first_slice) break; +// if(s2->pict_type==P_TYPE && s2->first_field && !s2->first_slice) break; } /* skip b frames if we are in a hurry */ if(avctx->hurry_up && s2->pict_type==B_TYPE) break; @@ -3240,7 +3259,7 @@ AVCodec mpeg1video_encoder = { encode_init, MPV_encode_picture, MPV_encode_end, - .supported_framerates= frame_rate_tab+1, + .supported_framerates= ff_frame_rate_tab+1, .pix_fmts= (enum PixelFormat[]){PIX_FMT_YUV420P, -1}, .capabilities= CODEC_CAP_DELAY, }; @@ -3253,8 +3272,8 @@ AVCodec mpeg2video_encoder = { encode_init, MPV_encode_picture, MPV_encode_end, - .supported_framerates= frame_rate_tab+1, - .pix_fmts= (enum PixelFormat[]){PIX_FMT_YUV420P, -1}, + .supported_framerates= ff_frame_rate_tab+1, + .pix_fmts= (enum PixelFormat[]){PIX_FMT_YUV420P, PIX_FMT_YUV422P, -1}, .capabilities= CODEC_CAP_DELAY, }; #endif diff --git a/src/libffmpeg/libavcodec/mpeg12data.h b/src/libffmpeg/libavcodec/mpeg12data.h index a6b49aa78..e9a10ff3a 100644 --- a/src/libffmpeg/libavcodec/mpeg12data.h +++ b/src/libffmpeg/libavcodec/mpeg12data.h @@ -332,7 +332,7 @@ static const uint8_t mbMotionVectorTable[17][2] = { { 0xc, 10 }, }; -static const AVRational frame_rate_tab[] = { +const AVRational ff_frame_rate_tab[] = { { 0, 0}, {24000, 1001}, { 24, 1}, diff --git a/src/libffmpeg/libavcodec/mpegaudiodec.c b/src/libffmpeg/libavcodec/mpegaudiodec.c index ff1f1113e..0d82e3e98 100644 --- a/src/libffmpeg/libavcodec/mpegaudiodec.c +++ b/src/libffmpeg/libavcodec/mpegaudiodec.c @@ -489,10 +489,10 @@ static int decode_init(AVCodecContext * avctx) #if defined(DEBUG) for(j=0;j<8;j++) { - printf("win%d=\n", j); + av_log(avctx, AV_LOG_DEBUG, "win%d=\n", j); for(i=0;i<36;i++) - printf("%f, ", (double)mdct_win[j][i] / FRAC_ONE); - printf("\n"); + av_log(avctx, AV_LOG_DEBUG, "%f, ", (double)mdct_win[j][i] / FRAC_ONE); + av_log(avctx, AV_LOG_DEBUG, "\n"); } #endif init = 1; @@ -1179,20 +1179,20 @@ static int decode_header(MPADecodeContext *s, uint32_t header) } #if defined(DEBUG) - printf("layer%d, %d Hz, %d kbits/s, ", + dprintf("layer%d, %d Hz, %d kbits/s, ", s->layer, s->sample_rate, s->bit_rate); if (s->nb_channels == 2) { if (s->layer == 3) { if (s->mode_ext & MODE_EXT_MS_STEREO) - printf("ms-"); + dprintf("ms-"); if (s->mode_ext & MODE_EXT_I_STEREO) - printf("i-"); + dprintf("i-"); } - printf("stereo"); + dprintf("stereo"); } else { - printf("mono"); + dprintf("mono"); } - printf("\n"); + dprintf("\n"); #endif return 0; } @@ -1370,8 +1370,8 @@ static int mp_decode_layer2(MPADecodeContext *s) { for(ch=0;ch<s->nb_channels;ch++) { for(i=0;i<sblimit;i++) - printf(" %d", bit_alloc[ch][i]); - printf("\n"); + dprintf(" %d", bit_alloc[ch][i]); + dprintf("\n"); } } #endif @@ -1421,12 +1421,12 @@ static int mp_decode_layer2(MPADecodeContext *s) for(i=0;i<sblimit;i++) { if (bit_alloc[ch][i]) { sf = scale_factors[ch][i]; - printf(" %d %d %d", sf[0], sf[1], sf[2]); + dprintf(" %d %d %d", sf[0], sf[1], sf[2]); } else { - printf(" -"); + dprintf(" -"); } } - printf("\n"); + dprintf("\n"); } #endif @@ -1650,7 +1650,7 @@ static int huffman_decode(MPADecodeContext *s, GranuleDef *g, if (get_bits_count(&s->gb) >= end_pos) break; if (code_table) { - code = get_vlc2(&s->gb, vlc->table, 8, 2); + code = get_vlc2(&s->gb, vlc->table, 8, 3); if (code < 0) return -1; y = code_table[code]; @@ -2285,11 +2285,11 @@ static int mp_decode_layer3(MPADecodeContext *s) } #if defined(DEBUG) { - printf("scfsi=%x gr=%d ch=%d scale_factors:\n", + dprintf("scfsi=%x gr=%d ch=%d scale_factors:\n", g->scfsi, gr, ch); for(i=0;i<j;i++) - printf(" %d", g->scale_factors[i]); - printf("\n"); + dprintf(" %d", g->scale_factors[i]); + dprintf("\n"); } #endif } else { @@ -2342,11 +2342,11 @@ static int mp_decode_layer3(MPADecodeContext *s) g->scale_factors[j] = 0; #if defined(DEBUG) { - printf("gr=%d ch=%d scale_factors:\n", + dprintf("gr=%d ch=%d scale_factors:\n", gr, ch); for(i=0;i<40;i++) - printf(" %d", g->scale_factors[i]); - printf("\n"); + dprintf(" %d", g->scale_factors[i]); + dprintf("\n"); } #endif } @@ -2428,10 +2428,10 @@ static int mp_decode_frame(MPADecodeContext *s, for(i=0;i<nb_frames;i++) { for(ch=0;ch<s->nb_channels;ch++) { int j; - printf("%d-%d:", i, ch); + dprintf("%d-%d:", i, ch); for(j=0;j<SBLIMIT;j++) - printf(" %0.6f", (double)s->sb_samples[ch][i][j] / FRAC_ONE); - printf("\n"); + dprintf(" %0.6f", (double)s->sb_samples[ch][i][j] / FRAC_ONE); + dprintf("\n"); } } #endif diff --git a/src/libffmpeg/libavcodec/mpegvideo.c b/src/libffmpeg/libavcodec/mpegvideo.c index 77d6691af..f1c1b34bb 100644 --- a/src/libffmpeg/libavcodec/mpegvideo.c +++ b/src/libffmpeg/libavcodec/mpegvideo.c @@ -55,6 +55,8 @@ static void dct_unquantize_mpeg1_inter_c(MpegEncContext *s, DCTELEM *block, int n, int qscale); static void dct_unquantize_mpeg2_intra_c(MpegEncContext *s, DCTELEM *block, int n, int qscale); +static void dct_unquantize_mpeg2_intra_bitexact(MpegEncContext *s, + DCTELEM *block, int n, int qscale); static void dct_unquantize_mpeg2_inter_c(MpegEncContext *s, DCTELEM *block, int n, int qscale); static void dct_unquantize_h263_intra_c(MpegEncContext *s, @@ -239,6 +241,7 @@ void ff_write_quant_matrix(PutBitContext *pb, int16_t *matrix){ const uint8_t *ff_find_start_code(const uint8_t * restrict p, const uint8_t *end, uint32_t * restrict state){ int i; + assert(p<=end); if(p>=end) return end; @@ -273,6 +276,8 @@ int DCT_common_init(MpegEncContext *s) s->dct_unquantize_mpeg1_intra = dct_unquantize_mpeg1_intra_c; s->dct_unquantize_mpeg1_inter = dct_unquantize_mpeg1_inter_c; s->dct_unquantize_mpeg2_intra = dct_unquantize_mpeg2_intra_c; + if(s->flags & CODEC_FLAG_BITEXACT) + s->dct_unquantize_mpeg2_intra = dct_unquantize_mpeg2_intra_bitexact; s->dct_unquantize_mpeg2_inter = dct_unquantize_mpeg2_inter_c; #ifdef CONFIG_ENCODERS @@ -490,8 +495,8 @@ static int init_duplicate_context(MpegEncContext *s, MpegEncContext *base){ int i; // edge emu needs blocksize + filter length - 1 (=17x17 for halfpel / 21x21 for h264) - CHECKED_ALLOCZ(s->allocated_edge_emu_buffer, (s->width+64)*2*17*2); //(width + edge + align)*interlaced*MBsize*tolerance - s->edge_emu_buffer= s->allocated_edge_emu_buffer + (s->width+64)*2*17; + CHECKED_ALLOCZ(s->allocated_edge_emu_buffer, (s->width+64)*2*21*2); //(width + edge + align)*interlaced*MBsize*tolerance + s->edge_emu_buffer= s->allocated_edge_emu_buffer + (s->width+64)*2*21; //FIXME should be linesize instead of s->width*2 but that isnt known before get_buffer() CHECKED_ALLOCZ(s->me.scratchpad, (s->width+64)*4*16*2*sizeof(uint8_t)) @@ -930,27 +935,43 @@ int MPV_encode_init(AVCodecContext *avctx) MPV_encode_defaults(s); - if(avctx->pix_fmt != PIX_FMT_YUVJ420P && avctx->pix_fmt != PIX_FMT_YUV420P){ - av_log(avctx, AV_LOG_ERROR, "only YUV420 is supported\n"); - return -1; - } - - if(avctx->codec_id == CODEC_ID_MJPEG || avctx->codec_id == CODEC_ID_LJPEG){ - if(avctx->strict_std_compliance>FF_COMPLIANCE_INOFFICIAL && avctx->pix_fmt != PIX_FMT_YUVJ420P){ + switch (avctx->codec_id) { + case CODEC_ID_MPEG2VIDEO: + if(avctx->pix_fmt != PIX_FMT_YUV420P && avctx->pix_fmt != PIX_FMT_YUV422P){ + av_log(avctx, AV_LOG_ERROR, "only YUV420 and YUV422 are supported\n"); + return -1; + } + break; + case CODEC_ID_LJPEG: + case CODEC_ID_MJPEG: + if(avctx->pix_fmt != PIX_FMT_YUVJ420P && (avctx->pix_fmt != PIX_FMT_YUV420P || avctx->strict_std_compliance>FF_COMPLIANCE_INOFFICIAL)){ av_log(avctx, AV_LOG_ERROR, "colorspace not supported in jpeg\n"); return -1; } - }else{ - if(avctx->strict_std_compliance>FF_COMPLIANCE_INOFFICIAL && avctx->pix_fmt != PIX_FMT_YUV420P){ - av_log(avctx, AV_LOG_ERROR, "colorspace not supported\n"); + break; + default: + if(avctx->pix_fmt != PIX_FMT_YUV420P){ + av_log(avctx, AV_LOG_ERROR, "only YUV420 is supported\n"); return -1; } } + switch (avctx->pix_fmt) { + case PIX_FMT_YUVJ422P: + case PIX_FMT_YUV422P: + s->chroma_format = CHROMA_422; + break; + case PIX_FMT_YUVJ420P: + case PIX_FMT_YUV420P: + default: + s->chroma_format = CHROMA_420; + break; + } + s->bit_rate = avctx->bit_rate; s->width = avctx->width; s->height = avctx->height; - if(avctx->gop_size > 600){ + if(avctx->gop_size > 600 && avctx->strict_std_compliance>FF_COMPLIANCE_EXPERIMENTAL){ av_log(avctx, AV_LOG_ERROR, "Warning keyframe interval too large! reducing it ...\n"); avctx->gop_size=600; } @@ -994,6 +1015,7 @@ int MPV_encode_init(AVCodecContext *avctx) s->obmc= !!(s->flags & CODEC_FLAG_OBMC); s->loop_filter= !!(s->flags & CODEC_FLAG_LOOP_FILTER); s->alternate_scan= !!(s->flags & CODEC_FLAG_ALT_SCAN); + s->intra_vlc_format= !!(s->flags2 & CODEC_FLAG2_INTRA_VLC); if(avctx->rc_max_rate && !avctx->rc_buffer_size){ av_log(avctx, AV_LOG_ERROR, "a vbv buffer size is needed, for encoding with a maximum bitrate\n"); @@ -1078,6 +1100,11 @@ int MPV_encode_init(AVCodecContext *avctx) return -1; } + if((s->flags2 & CODEC_FLAG2_INTRA_VLC) && s->codec_id != CODEC_ID_MPEG2VIDEO){ + av_log(avctx, AV_LOG_ERROR, "intra vlc table not supported by codec\n"); + return -1; + } + if(s->avctx->thread_count > 1 && s->codec_id != CODEC_ID_MPEG4 && s->codec_id != CODEC_ID_MPEG1VIDEO && s->codec_id != CODEC_ID_MPEG2VIDEO && (s->codec_id != CODEC_ID_H263P || !(s->flags & CODEC_FLAG_H263P_SLICE_STRUCT))){ @@ -1104,8 +1131,8 @@ int MPV_encode_init(AVCodecContext *avctx) } if(avctx->b_frame_strategy && (avctx->flags&CODEC_FLAG_PASS2)){ - av_log(avctx, AV_LOG_ERROR, "b_frame_strategy must be 0 on the second pass\n"); - return -1; + av_log(avctx, AV_LOG_INFO, "notice: b_frame_strategy only affects the first pass\n"); + avctx->b_frame_strategy = 0; } i= ff_gcd(avctx->time_base.den, avctx->time_base.num); @@ -1359,10 +1386,6 @@ int MPV_encode_end(AVCodecContext *avctx) { MpegEncContext *s = avctx->priv_data; -#ifdef STATS - print_stats(); -#endif - ff_rate_control_uninit(s); /* xine: do not need this for decode or MPEG-1 encoding modes */ @@ -1848,7 +1871,7 @@ void ff_print_debug_info(MpegEncContext *s, AVFrame *pict){ const int width = s->avctx->width; const int height= s->avctx->height; const int mv_sample_log2= 4 - pict->motion_subsample_log2; - const int mv_stride= (s->mb_width << mv_sample_log2) + 1; + const int mv_stride= (s->mb_width << mv_sample_log2) + (s->codec_id == CODEC_ID_H264 ? 0 : 1); s->low_delay=0; //needed to see the vectors without trashing the buffers avcodec_get_chroma_sub_sample(s->avctx->pix_fmt, &h_chroma_shift, &v_chroma_shift); @@ -2197,10 +2220,11 @@ static int estimate_best_b_count(MpegEncContext *s){ int i, j, out_size, p_lambda, b_lambda, lambda2; int outbuf_size= s->width * s->height; //FIXME uint8_t *outbuf= av_malloc(outbuf_size); - ImgReSampleContext *resample; int64_t best_rd= INT64_MAX; int best_b_count= -1; + assert(scale>=0 && scale <=3); + // emms_c(); p_lambda= s->last_lambda_for[P_TYPE]; //s->next_picture_ptr->quality; b_lambda= s->last_lambda_for[B_TYPE]; //p_lambda *ABS(s->avctx->b_quant_factor) + s->avctx->b_quant_offset; @@ -2222,8 +2246,6 @@ static int estimate_best_b_count(MpegEncContext *s){ if (avcodec_open(c, codec) < 0) return -1; - resample= img_resample_init(c->width, c->height, s->width, s->height); //FIXME use sws - for(i=0; i<s->max_b_frames+2; i++){ int ysize= c->width*c->height; int csize= (c->width/2)*(c->height/2); @@ -2246,8 +2268,11 @@ static int estimate_best_b_count(MpegEncContext *s){ input[i].linesize[1]= input[i].linesize[2]= c->width/2; - if(!i || s->input_picture[i-1]) - img_resample(resample, &input[i], &pre_input); + if(!i || s->input_picture[i-1]){ + s->dsp.shrink[scale](input[i].data[0], input[i].linesize[0], pre_input.data[0], pre_input.linesize[0], c->width, c->height); + s->dsp.shrink[scale](input[i].data[1], input[i].linesize[1], pre_input.data[1], pre_input.linesize[1], c->width>>1, c->height>>1); + s->dsp.shrink[scale](input[i].data[2], input[i].linesize[2], pre_input.data[2], pre_input.linesize[2], c->width>>1, c->height>>1); + } } for(j=0; j<s->max_b_frames+1; j++){ @@ -2289,7 +2314,6 @@ static int estimate_best_b_count(MpegEncContext *s){ av_freep(&outbuf); avcodec_close(c); av_freep(&c); - img_resample_close(resample); for(i=0; i<s->max_b_frames+2; i++){ av_freep(&input[i].data[0]); @@ -2365,7 +2389,7 @@ static void select_input_picture(MpegEncContext *s){ } } for(i=0; i<s->max_b_frames+1; i++){ - if(s->input_picture[i]==NULL || s->input_picture[i]->b_frame_score - 1 > s->mb_num/40) break; + if(s->input_picture[i]==NULL || s->input_picture[i]->b_frame_score - 1 > s->mb_num/s->avctx->b_sensitivity) break; } b_frames= FFMAX(0, i-1); @@ -2472,11 +2496,6 @@ int MPV_encode_picture(AVCodecContext *avctx, AVFrame *pic_arg = data; int i, stuffing_count; - if(avctx->pix_fmt != PIX_FMT_YUV420P && avctx->pix_fmt != PIX_FMT_YUVJ420P){ - av_log(avctx, AV_LOG_ERROR, "this codec supports only YUV420P\n"); - return -1; - } - for(i=0; i<avctx->thread_count; i++){ int start_y= s->thread_context[i]->start_mb_y; int end_y= s->thread_context[i]-> end_mb_y; @@ -3378,6 +3397,18 @@ static inline void chroma_4mv_motion_lowres(MpegEncContext *s, pix_op[lowres](dest_cr, ptr, s->uvlinesize, block_s, sx, sy); } +static inline void prefetch_motion(MpegEncContext *s, uint8_t **pix, int dir){ + /* fetch pixels for estimated mv 4 macroblocks ahead + * optimized for 64byte cache lines */ + const int shift = s->quarter_sample ? 2 : 1; + const int mx= (s->mv[dir][0][0]>>shift) + 16*s->mb_x + 8; + const int my= (s->mv[dir][0][1]>>shift) + 16*s->mb_y; + int off= mx + (my + (s->mb_x&3)*4)*s->linesize + 64; + s->dsp.prefetch(pix[0]+off, s->linesize, 4); + off= (mx>>1) + ((my>>1) + (s->mb_x&7))*s->uvlinesize + 64; + s->dsp.prefetch(pix[1]+off, pix[2]-pix[1], 2); +} + /** * motion compensation of a single macroblock * @param s context @@ -3402,6 +3433,8 @@ static inline void MPV_motion(MpegEncContext *s, mb_x = s->mb_x; mb_y = s->mb_y; + prefetch_motion(s, ref_picture, dir); + if(s->obmc && s->pict_type != B_TYPE){ int16_t mv_cache[4][4][2]; const int xy= s->mb_x + s->mb_y*s->mb_stride; @@ -3963,8 +3996,17 @@ static always_inline void MPV_decode_mb_internal(MpegEncContext *s, DCTELEM bloc add_dequant_dct(s, block[3], 3, dest_y + dct_offset + block_size, dct_linesize, s->qscale); if(!(s->flags&CODEC_FLAG_GRAY)){ - add_dequant_dct(s, block[4], 4, dest_cb, uvlinesize, s->chroma_qscale); - add_dequant_dct(s, block[5], 5, dest_cr, uvlinesize, s->chroma_qscale); + if (s->chroma_y_shift){ + add_dequant_dct(s, block[4], 4, dest_cb, uvlinesize, s->chroma_qscale); + add_dequant_dct(s, block[5], 5, dest_cr, uvlinesize, s->chroma_qscale); + }else{ + dct_linesize >>= 1; + dct_offset >>=1; + add_dequant_dct(s, block[4], 4, dest_cb, dct_linesize, s->chroma_qscale); + add_dequant_dct(s, block[5], 5, dest_cr, dct_linesize, s->chroma_qscale); + add_dequant_dct(s, block[6], 6, dest_cb + dct_offset, dct_linesize, s->chroma_qscale); + add_dequant_dct(s, block[7], 7, dest_cr + dct_offset, dct_linesize, s->chroma_qscale); + } } } else if(s->codec_id != CODEC_ID_WMV2){ add_dct(s, block[0], 0, dest_y , dct_linesize); @@ -4006,8 +4048,17 @@ static always_inline void MPV_decode_mb_internal(MpegEncContext *s, DCTELEM bloc put_dct(s, block[3], 3, dest_y + dct_offset + block_size, dct_linesize, s->qscale); if(!(s->flags&CODEC_FLAG_GRAY)){ - put_dct(s, block[4], 4, dest_cb, uvlinesize, s->chroma_qscale); - put_dct(s, block[5], 5, dest_cr, uvlinesize, s->chroma_qscale); + if(s->chroma_y_shift){ + put_dct(s, block[4], 4, dest_cb, uvlinesize, s->chroma_qscale); + put_dct(s, block[5], 5, dest_cr, uvlinesize, s->chroma_qscale); + }else{ + dct_offset >>=1; + dct_linesize >>=1; + put_dct(s, block[4], 4, dest_cb, dct_linesize, s->chroma_qscale); + put_dct(s, block[5], 5, dest_cr, dct_linesize, s->chroma_qscale); + put_dct(s, block[6], 6, dest_cb + dct_offset, dct_linesize, s->chroma_qscale); + put_dct(s, block[7], 7, dest_cr + dct_offset, dct_linesize, s->chroma_qscale); + } } }else{ s->dsp.idct_put(dest_y , dct_linesize, block[0]); @@ -4229,19 +4280,19 @@ static void get_vissual_weight(int16_t *weight, uint8_t *ptr, int stride){ } } -static void encode_mb(MpegEncContext *s, int motion_x, int motion_y) +static always_inline void encode_mb_internal(MpegEncContext *s, int motion_x, int motion_y, int mb_block_height, int mb_block_count) { - int16_t weight[6][64]; - DCTELEM orig[6][64]; + int16_t weight[8][64]; + DCTELEM orig[8][64]; const int mb_x= s->mb_x; const int mb_y= s->mb_y; int i; - int skip_dct[6]; + int skip_dct[8]; int dct_offset = s->linesize*8; //default for progressive frames uint8_t *ptr_y, *ptr_cb, *ptr_cr; int wrap_y, wrap_c; - for(i=0; i<6; i++) skip_dct[i]=0; + for(i=0; i<mb_block_count; i++) skip_dct[i]=0; if(s->adaptive_quant){ const int last_qp= s->qscale; @@ -4277,16 +4328,16 @@ static void encode_mb(MpegEncContext *s, int motion_x, int motion_y) wrap_y = s->linesize; wrap_c = s->uvlinesize; ptr_y = s->new_picture.data[0] + (mb_y * 16 * wrap_y) + mb_x * 16; - ptr_cb = s->new_picture.data[1] + (mb_y * 8 * wrap_c) + mb_x * 8; - ptr_cr = s->new_picture.data[2] + (mb_y * 8 * wrap_c) + mb_x * 8; + ptr_cb = s->new_picture.data[1] + (mb_y * mb_block_height * wrap_c) + mb_x * 8; + ptr_cr = s->new_picture.data[2] + (mb_y * mb_block_height * wrap_c) + mb_x * 8; if(mb_x*16+16 > s->width || mb_y*16+16 > s->height){ uint8_t *ebuf= s->edge_emu_buffer + 32; ff_emulated_edge_mc(ebuf , ptr_y , wrap_y,16,16,mb_x*16,mb_y*16, s->width , s->height); ptr_y= ebuf; - ff_emulated_edge_mc(ebuf+18*wrap_y , ptr_cb, wrap_c, 8, 8, mb_x*8, mb_y*8, s->width>>1, s->height>>1); + ff_emulated_edge_mc(ebuf+18*wrap_y , ptr_cb, wrap_c, 8, mb_block_height, mb_x*8, mb_y*8, s->width>>1, s->height>>1); ptr_cb= ebuf+18*wrap_y; - ff_emulated_edge_mc(ebuf+18*wrap_y+8, ptr_cr, wrap_c, 8, 8, mb_x*8, mb_y*8, s->width>>1, s->height>>1); + ff_emulated_edge_mc(ebuf+18*wrap_y+8, ptr_cr, wrap_c, 8, mb_block_height, mb_x*8, mb_y*8, s->width>>1, s->height>>1); ptr_cr= ebuf+18*wrap_y+8; } @@ -4306,6 +4357,8 @@ static void encode_mb(MpegEncContext *s, int motion_x, int motion_y) dct_offset= wrap_y; wrap_y<<=1; + if (s->chroma_format == CHROMA_422) + wrap_c<<=1; } } } @@ -4321,6 +4374,10 @@ static void encode_mb(MpegEncContext *s, int motion_x, int motion_y) }else{ s->dsp.get_pixels(s->block[4], ptr_cb, wrap_c); s->dsp.get_pixels(s->block[5], ptr_cr, wrap_c); + if(!s->chroma_y_shift){ /* 422 */ + s->dsp.get_pixels(s->block[6], ptr_cb + (dct_offset>>1), wrap_c); + s->dsp.get_pixels(s->block[7], ptr_cr + (dct_offset>>1), wrap_c); + } } }else{ op_pixels_func (*op_pix)[4]; @@ -4366,6 +4423,8 @@ static void encode_mb(MpegEncContext *s, int motion_x, int motion_y) dct_offset= wrap_y; wrap_y<<=1; + if (s->chroma_format == CHROMA_422) + wrap_c<<=1; } } } @@ -4381,6 +4440,10 @@ static void encode_mb(MpegEncContext *s, int motion_x, int motion_y) }else{ s->dsp.diff_pixels(s->block[4], ptr_cb, dest_cb, wrap_c); s->dsp.diff_pixels(s->block[5], ptr_cr, dest_cr, wrap_c); + if(!s->chroma_y_shift){ /* 422 */ + s->dsp.diff_pixels(s->block[6], ptr_cb + (dct_offset>>1), dest_cb + (dct_offset>>1), wrap_c); + s->dsp.diff_pixels(s->block[7], ptr_cr + (dct_offset>>1), dest_cr + (dct_offset>>1), wrap_c); + } } /* pre quantization */ if(s->current_picture.mc_mb_var[s->mb_stride*mb_y+ mb_x]<2*s->qscale*s->qscale){ @@ -4391,6 +4454,10 @@ static void encode_mb(MpegEncContext *s, int motion_x, int motion_y) if(s->dsp.sad[1](NULL, ptr_y +dct_offset+ 8, dest_y +dct_offset+ 8, wrap_y, 8) < 20*s->qscale) skip_dct[3]= 1; if(s->dsp.sad[1](NULL, ptr_cb , dest_cb , wrap_c, 8) < 20*s->qscale) skip_dct[4]= 1; if(s->dsp.sad[1](NULL, ptr_cr , dest_cr , wrap_c, 8) < 20*s->qscale) skip_dct[5]= 1; + if(!s->chroma_y_shift){ /* 422 */ + if(s->dsp.sad[1](NULL, ptr_cb +(dct_offset>>1), dest_cb +(dct_offset>>1), wrap_c, 8) < 20*s->qscale) skip_dct[6]= 1; + if(s->dsp.sad[1](NULL, ptr_cr +(dct_offset>>1), dest_cr +(dct_offset>>1), wrap_c, 8) < 20*s->qscale) skip_dct[7]= 1; + } } } @@ -4401,13 +4468,17 @@ static void encode_mb(MpegEncContext *s, int motion_x, int motion_y) if(!skip_dct[3]) get_vissual_weight(weight[3], ptr_y + dct_offset + 8, wrap_y); if(!skip_dct[4]) get_vissual_weight(weight[4], ptr_cb , wrap_c); if(!skip_dct[5]) get_vissual_weight(weight[5], ptr_cr , wrap_c); - memcpy(orig[0], s->block[0], sizeof(DCTELEM)*64*6); + if(!s->chroma_y_shift){ /* 422 */ + if(!skip_dct[6]) get_vissual_weight(weight[6], ptr_cb + (dct_offset>>1), wrap_c); + if(!skip_dct[7]) get_vissual_weight(weight[7], ptr_cr + (dct_offset>>1), wrap_c); + } + memcpy(orig[0], s->block[0], sizeof(DCTELEM)*64*mb_block_count); } /* DCT & quantize */ assert(s->out_format!=FMT_MJPEG || s->qscale==8); { - for(i=0;i<6;i++) { + for(i=0;i<mb_block_count;i++) { if(!skip_dct[i]){ int overflow; s->block_last_index[i] = s->dct_quantize(s, s->block[i], i, s->qscale, &overflow); @@ -4419,7 +4490,7 @@ static void encode_mb(MpegEncContext *s, int motion_x, int motion_y) s->block_last_index[i]= -1; } if(s->avctx->quantizer_noise_shaping){ - for(i=0;i<6;i++) { + for(i=0;i<mb_block_count;i++) { if(!skip_dct[i]){ s->block_last_index[i] = dct_quantize_refine(s, s->block[i], weight[i], orig[i], i, s->qscale); } @@ -4430,11 +4501,11 @@ static void encode_mb(MpegEncContext *s, int motion_x, int motion_y) for(i=0; i<4; i++) dct_single_coeff_elimination(s, i, s->luma_elim_threshold); if(s->chroma_elim_threshold && !s->mb_intra) - for(i=4; i<6; i++) + for(i=4; i<mb_block_count; i++) dct_single_coeff_elimination(s, i, s->chroma_elim_threshold); if(s->flags & CODEC_FLAG_CBP_RD){ - for(i=0;i<6;i++) { + for(i=0;i<mb_block_count;i++) { if(s->block_last_index[i] == -1) s->coded_score[i]= INT_MAX/256; } @@ -4450,7 +4521,7 @@ static void encode_mb(MpegEncContext *s, int motion_x, int motion_y) //non c quantize code returns incorrect block_last_index FIXME if(s->alternate_scan && s->dct_quantize != dct_quantize_c){ - for(i=0; i<6; i++){ + for(i=0; i<mb_block_count; i++){ int j; if(s->block_last_index[i]>0){ for(j=63; j>0; j--){ @@ -4494,6 +4565,12 @@ static void encode_mb(MpegEncContext *s, int motion_x, int motion_y) } } +static always_inline void encode_mb(MpegEncContext *s, int motion_x, int motion_y) +{ + if (s->chroma_format == CHROMA_420) encode_mb_internal(s, motion_x, motion_y, 8, 6); + else encode_mb_internal(s, motion_x, motion_y, 16, 8); +} + #endif //CONFIG_ENCODERS void ff_mpeg_flush(AVCodecContext *avctx){ @@ -4603,7 +4680,7 @@ static inline void copy_context_after_encode(MpegEncContext *d, MpegEncContext * d->tex_pb= s->tex_pb; } d->block= s->block; - for(i=0; i<6; i++) + for(i=0; i<8; i++) d->block_last_index[i]= s->block_last_index[i]; d->interlaced_dct= s->interlaced_dct; d->qscale= s->qscale; @@ -5621,7 +5698,7 @@ static void encode_picture(MpegEncContext *s, int picture_number) for(i=1;i<64;i++){ int j= s->dsp.idct_permutation[i]; - s->intra_matrix[j] = clip_uint8((ff_mpeg1_default_intra_matrix[i] * s->qscale) >> 3) & 0xFF; + s->intra_matrix[j] = clip_uint8((ff_mpeg1_default_intra_matrix[i] * s->qscale) >> 3); } convert_matrix(&s->dsp, s->q_intra_matrix, s->q_intra_matrix16, s->intra_matrix, s->intra_quant_bias, 8, 8, 1); @@ -6581,6 +6658,39 @@ static void dct_unquantize_mpeg2_intra_c(MpegEncContext *s, } } +static void dct_unquantize_mpeg2_intra_bitexact(MpegEncContext *s, + DCTELEM *block, int n, int qscale) +{ + int i, level, nCoeffs; + const uint16_t *quant_matrix; + int sum=-1; + + if(s->alternate_scan) nCoeffs= 63; + else nCoeffs= s->block_last_index[n]; + + if (n < 4) + block[0] = block[0] * s->y_dc_scale; + else + block[0] = block[0] * s->c_dc_scale; + quant_matrix = s->intra_matrix; + for(i=1;i<=nCoeffs;i++) { + int j= s->intra_scantable.permutated[i]; + level = block[j]; + if (level) { + if (level < 0) { + level = -level; + level = (int)(level * qscale * quant_matrix[j]) >> 3; + level = -level; + } else { + level = (int)(level * qscale * quant_matrix[j]) >> 3; + } + block[j] = level; + sum+=level; + } + } + block[63]^=sum&1; +} + static void dct_unquantize_mpeg2_inter_c(MpegEncContext *s, DCTELEM *block, int n, int qscale) { diff --git a/src/libffmpeg/libavcodec/mpegvideo.h b/src/libffmpeg/libavcodec/mpegvideo.h index 888b0b608..023e65700 100644 --- a/src/libffmpeg/libavcodec/mpegvideo.h +++ b/src/libffmpeg/libavcodec/mpegvideo.h @@ -123,6 +123,7 @@ typedef struct RateControlContext{ void *non_lavc_opaque; ///< context for non lavc rc code (for example xvid) float dry_run_qscale; ///< for xvid rc + int last_picture_number; ///< for xvid rc }RateControlContext; /** @@ -429,6 +430,7 @@ typedef struct MpegEncContext { int field_select[2][2]; int last_mv[2][2][2]; ///< last MV, used for MV prediction in MPEG1 & B-frame MPEG4 uint8_t *fcode_tab; ///< smallest fcode needed for each MV + int16_t direct_scale_mv[2][64]; ///< precomputed to avoid divisions in ff_mpeg4_set_direct_mv MotionEstContext me; @@ -484,7 +486,7 @@ typedef struct MpegEncContext { uint8_t *chroma_dc_vlc_length; #define UNI_AC_ENC_INDEX(run,level) ((run)*128 + (level)) - int coded_score[6]; + int coded_score[8]; /** precomputed matrix (combine qscale and DCT renorm) */ int (*q_intra_matrix)[64]; @@ -600,6 +602,7 @@ typedef struct MpegEncContext { int vo_type; int vol_control_parameters; ///< does the stream contain the low_delay flag, used to workaround buggy encoders int intra_dc_threshold; ///< QP above whch the ac VLC should be used for intra dc + int use_intra_dc_vlc; PutBitContext tex_pb; ///< used for data partitioned VOPs PutBitContext pb2; ///< used for data partitioned VOPs int mpeg_quant; @@ -696,7 +699,7 @@ typedef struct MpegEncContext { short * pblocks[12]; DCTELEM (*block)[64]; ///< points to one of the following blocks - DCTELEM (*blocks)[6][64]; // for HQ mode we need to keep the best block + DCTELEM (*blocks)[8][64]; // for HQ mode we need to keep the best block int (*decode_mb)(struct MpegEncContext *s, DCTELEM block[6][64]); // used by some codecs to avoid a switch() #define SLICE_OK 0 #define SLICE_ERROR -1 diff --git a/src/libffmpeg/libavcodec/msmpeg4.c b/src/libffmpeg/libavcodec/msmpeg4.c index 5bb7158e6..6d83f5c6c 100644 --- a/src/libffmpeg/libavcodec/msmpeg4.c +++ b/src/libffmpeg/libavcodec/msmpeg4.c @@ -75,7 +75,7 @@ static int msmpeg4v12_decode_mb(MpegEncContext *s, DCTELEM block[6][64]); static int msmpeg4v34_decode_mb(MpegEncContext *s, DCTELEM block[6][64]); static int wmv2_decode_mb(MpegEncContext *s, DCTELEM block[6][64]); -/* vc9 externs */ +/* vc1 externs */ extern uint8_t wmv3_dc_scale_table[32]; #ifdef DEBUG @@ -89,70 +89,6 @@ int frame_count = 0; static uint8_t rl_length[NB_RL_TABLES][MAX_LEVEL+1][MAX_RUN+1][2]; #endif //CONFIG_ENCODERS -#ifdef STATS - -const char *st_names[ST_NB] = { - "unknown", - "dc", - "intra_ac", - "inter_ac", - "intra_mb", - "inter_mb", - "mv", -}; - -int st_current_index = 0; -unsigned int st_bit_counts[ST_NB]; -unsigned int st_out_bit_counts[ST_NB]; - -#define set_stat(var) st_current_index = var; - -void print_stats(void) -{ - unsigned int total; - int i; - - printf("Input:\n"); - total = 0; - for(i=0;i<ST_NB;i++) - total += st_bit_counts[i]; - if (total == 0) - total = 1; - for(i=0;i<ST_NB;i++) { - printf("%-10s : %10.1f %5.1f%%\n", - st_names[i], - (double)st_bit_counts[i] / 8.0, - (double)st_bit_counts[i] * 100.0 / total); - } - printf("%-10s : %10.1f %5.1f%%\n", - "total", - (double)total / 8.0, - 100.0); - - printf("Output:\n"); - total = 0; - for(i=0;i<ST_NB;i++) - total += st_out_bit_counts[i]; - if (total == 0) - total = 1; - for(i=0;i<ST_NB;i++) { - printf("%-10s : %10.1f %5.1f%%\n", - st_names[i], - (double)st_out_bit_counts[i] / 8.0, - (double)st_out_bit_counts[i] * 100.0 / total); - } - printf("%-10s : %10.1f %5.1f%%\n", - "total", - (double)total / 8.0, - 100.0); -} - -#else - -#define set_stat(var) - -#endif - static void common_init(MpegEncContext * s) { static int inited=0; @@ -177,7 +113,7 @@ static void common_init(MpegEncContext * s) s->y_dc_scale_table= wmv1_y_dc_scale_table; s->c_dc_scale_table= wmv1_c_dc_scale_table; break; -#if defined(CONFIG_WMV3_DECODER)||defined(CONFIG_VC9_DECODER) +#if defined(CONFIG_WMV3_DECODER)||defined(CONFIG_VC1_DECODER) case 6: s->y_dc_scale_table= wmv3_dc_scale_table; s->c_dc_scale_table= wmv3_dc_scale_table; @@ -434,7 +370,7 @@ void msmpeg4_encode_picture_header(MpegEncContext * s, int picture_number) #ifdef DEBUG intra_count = 0; - printf("*****frame %d:\n", frame_count++); + av_log(s->avctx, AV_LOG_DEBUG, "*****frame %d:\n", frame_count++); #endif } @@ -504,12 +440,11 @@ static void msmpeg4_encode_motion(MpegEncContext * s, #if 0 if ((unsigned)mx >= 64 || (unsigned)my >= 64) - fprintf(stderr, "error mx=%d my=%d\n", mx, my); + av_log(s->avctx, AV_LOG_ERROR, "error mx=%d my=%d\n", mx, my); #endif mv = &mv_tables[s->mv_table_index]; code = mv->table_mv_index[(mx << 6) | my]; - set_stat(ST_MV); put_bits(&s->pb, mv->table_mv_bits[code], mv->table_mv_code[code]); @@ -545,7 +480,6 @@ void msmpeg4_encode_mb(MpegEncContext * s, if (!s->mb_intra) { /* compute cbp */ - set_stat(ST_INTER_MB); cbp = 0; for (i = 0; i < 6; i++) { if (s->block_last_index[i] >= 0) @@ -636,7 +570,6 @@ void msmpeg4_encode_mb(MpegEncContext * s, cbpy_tab[cbp>>2][0]); }else{ if (s->pict_type == I_TYPE) { - set_stat(ST_INTRA_MB); put_bits(&s->pb, ff_msmp4_mb_i_table[coded_cbp][1], ff_msmp4_mb_i_table[coded_cbp][0]); } else { @@ -646,7 +579,6 @@ void msmpeg4_encode_mb(MpegEncContext * s, table_mb_non_intra[cbp][1], table_mb_non_intra[cbp][0]); } - set_stat(ST_INTRA_MB); put_bits(&s->pb, 1, 0); /* no AC prediction yet */ if(s->inter_intra_pred){ s->h263_aic_dir=0; @@ -924,7 +856,6 @@ static inline void msmpeg4_encode_block(MpegEncContext * s, DCTELEM * block, int const uint8_t *scantable; if (s->mb_intra) { - set_stat(ST_DC); msmpeg4_encode_dc(s, block[0], n, &dc_pred_dir); i = 1; if (n < 4) { @@ -934,7 +865,6 @@ static inline void msmpeg4_encode_block(MpegEncContext * s, DCTELEM * block, int } run_diff = 0; scantable= s->intra_scantable.permutated; - set_stat(ST_INTRA_AC); } else { i = 0; rl = &rl_table[3 + s->rl_table_index]; @@ -943,7 +873,6 @@ static inline void msmpeg4_encode_block(MpegEncContext * s, DCTELEM * block, int else run_diff = 1; scantable= s->inter_scantable.permutated; - set_stat(ST_INTER_AC); } /* recalculate block_last_index for M$ wmv1 */ @@ -1197,7 +1126,7 @@ int ff_msmpeg4_decode_init(MpegEncContext *s) case 5: s->decode_mb= wmv2_decode_mb; case 6: - //FIXME + TODO VC9 decode mb + //FIXME + TODO VC1 decode mb break; } @@ -1214,9 +1143,9 @@ int msmpeg4_decode_picture_header(MpegEncContext * s) { int i; for(i=0; i<s->gb.size_in_bits; i++) - printf("%d", get_bits1(&s->gb)); + av_log(s->avctx, AV_LOG_DEBUG, "%d", get_bits1(&s->gb)); // get_bits1(&s->gb); -printf("END\n"); +av_log(s->avctx, AV_LOG_DEBUG, "END\n"); return -1; } #endif @@ -1370,7 +1299,7 @@ return -1; s->esc3_run_length= 0; #ifdef DEBUG - printf("*****frame %d:\n", frame_count++); + av_log(s->avctx, AV_LOG_DEBUG, "*****frame %d:\n", frame_count++); #endif return 0; } @@ -1572,7 +1501,6 @@ static int msmpeg4v34_decode_mb(MpegEncContext *s, DCTELEM block[6][64]) uint32_t * const mb_type_ptr= &s->current_picture.mb_type[ s->mb_x + s->mb_y*s->mb_stride ]; if (s->pict_type == P_TYPE) { - set_stat(ST_INTER_MB); if (s->use_skip_mb_code) { if (get_bits1(&s->gb)) { /* skip mb */ @@ -1598,7 +1526,6 @@ static int msmpeg4v34_decode_mb(MpegEncContext *s, DCTELEM block[6][64]) cbp = code & 0x3f; } else { - set_stat(ST_INTRA_MB); s->mb_intra = 1; code = get_vlc2(&s->gb, ff_msmp4_mb_i_vlc.table, MB_INTRA_VLC_BITS, 2); if (code < 0) @@ -1623,7 +1550,6 @@ static int msmpeg4v34_decode_mb(MpegEncContext *s, DCTELEM block[6][64]) s->rl_table_index = decode012(&s->gb); s->rl_chroma_table_index = s->rl_table_index; } - set_stat(ST_MV); h263_pred_motion(s, 0, 0, &mx, &my); if (msmpeg4_decode_motion(s, &mx, &my) < 0) return -1; @@ -1634,7 +1560,6 @@ static int msmpeg4v34_decode_mb(MpegEncContext *s, DCTELEM block[6][64]) *mb_type_ptr = MB_TYPE_L0 | MB_TYPE_16x16; } else { //printf("I at %d %d %d %06X\n", s->mb_x, s->mb_y, ((cbp&3)? 1 : 0) +((cbp&0x3C)? 2 : 0), show_bits(&s->gb, 24)); - set_stat(ST_INTRA_MB); s->ac_pred = get_bits1(&s->gb); *mb_type_ptr = MB_TYPE_INTRA; if(s->inter_intra_pred){ @@ -1673,7 +1598,6 @@ static inline int msmpeg4_decode_block(MpegEncContext * s, DCTELEM * block, qadd=0; /* DC coef */ - set_stat(ST_DC); level = msmpeg4_decode_dc(s, n, &dc_pred_dir); if (level < 0){ @@ -1709,7 +1633,6 @@ static inline int msmpeg4_decode_block(MpegEncContext * s, DCTELEM * block, } else { scan_table = s->intra_scantable.permutated; } - set_stat(ST_INTRA_AC); rl_vlc= rl->rl_vlc[0]; } else { qmul = s->qscale << 1; @@ -1728,7 +1651,6 @@ static inline int msmpeg4_decode_block(MpegEncContext * s, DCTELEM * block, } if(!scan_table) scan_table = s->inter_scantable.permutated; - set_stat(ST_INTER_AC); rl_vlc= rl->rl_vlc[s->qscale]; } { @@ -1794,15 +1716,15 @@ static inline int msmpeg4_decode_block(MpegEncContext * s, DCTELEM * block, const int run1= run - rl->max_run[last][abs_level] - run_diff; if(abs_level<=MAX_LEVEL && run<=MAX_RUN){ if(abs_level <= rl->max_level[last][run]){ - fprintf(stderr, "illegal 3. esc, vlc encoding possible\n"); + av_log(s->avctx, AV_LOG_ERROR, "illegal 3. esc, vlc encoding possible\n"); return DECODING_AC_LOST; } if(abs_level <= rl->max_level[last][run]*2){ - fprintf(stderr, "illegal 3. esc, esc 1 encoding possible\n"); + av_log(s->avctx, AV_LOG_ERROR, "illegal 3. esc, esc 1 encoding possible\n"); return DECODING_AC_LOST; } if(run1>=0 && abs_level <= rl->max_level[last][run1]){ - fprintf(stderr, "illegal 3. esc, esc 2 encoding possible\n"); + av_log(s->avctx, AV_LOG_ERROR, "illegal 3. esc, esc 2 encoding possible\n"); return DECODING_AC_LOST; } } @@ -1813,7 +1735,7 @@ static inline int msmpeg4_decode_block(MpegEncContext * s, DCTELEM * block, else level= level * qmul - qadd; #if 0 // waste of time too :( if(level>2048 || level<-2048){ - fprintf(stderr, "|level| overflow in 3. esc\n"); + av_log(s->avctx, AV_LOG_ERROR, "|level| overflow in 3. esc\n"); return DECODING_AC_LOST; } #endif diff --git a/src/libffmpeg/libavcodec/parser.c b/src/libffmpeg/libavcodec/parser.c index 412cd8359..59087cdb8 100644 --- a/src/libffmpeg/libavcodec/parser.c +++ b/src/libffmpeg/libavcodec/parser.c @@ -145,6 +145,7 @@ int av_parser_parse(AVCodecParserContext *s, /** * * @return 0 if the output buffer is a subset of the input, 1 if it is allocated and must be freed + * @deprecated use AVBitstreamFilter */ int av_parser_change(AVCodecParserContext *s, AVCodecContext *avctx, @@ -297,6 +298,7 @@ static const int frame_rate_tab[16] = { 25025, }; +#ifdef CONFIG_MPEGVIDEO_PARSER //FIXME move into mpeg12.c static void mpegvideo_extract_headers(AVCodecParserContext *s, AVCodecContext *avctx, @@ -449,6 +451,7 @@ static int mpegvideo_split(AVCodecContext *avctx, } return 0; } +#endif /* CONFIG_MPEGVIDEO_PARSER */ void ff_parse_close(AVCodecParserContext *s) { @@ -467,6 +470,7 @@ static void parse1_close(AVCodecParserContext *s) /*************************/ +#ifdef CONFIG_MPEG4VIDEO_PARSER /* used by parser */ /* XXX: make it use less memory */ static int av_mpeg4_decode_header(AVCodecParserContext *s1, @@ -532,6 +536,33 @@ static int mpeg4video_parse(AVCodecParserContext *s, *poutbuf_size = buf_size; return next; } +#endif + +#ifdef CONFIG_CAVSVIDEO_PARSER +static int cavsvideo_parse(AVCodecParserContext *s, + AVCodecContext *avctx, + uint8_t **poutbuf, int *poutbuf_size, + const uint8_t *buf, int buf_size) +{ + ParseContext *pc = s->priv_data; + int next; + + if(s->flags & PARSER_FLAG_COMPLETE_FRAMES){ + next= buf_size; + }else{ + next= ff_cavs_find_frame_end(pc, buf, buf_size); + + if (ff_combine_frame(pc, next, (uint8_t **)&buf, &buf_size) < 0) { + *poutbuf = NULL; + *poutbuf_size = 0; + return buf_size; + } + } + *poutbuf = (uint8_t *)buf; + *poutbuf_size = buf_size; + return next; +} +#endif /* CONFIG_CAVSVIDEO_PARSER */ static int mpeg4video_split(AVCodecContext *avctx, const uint8_t *buf, int buf_size) @@ -549,6 +580,7 @@ static int mpeg4video_split(AVCodecContext *avctx, /*************************/ +#ifdef CONFIG_MPEGAUDIO_PARSER typedef struct MpegAudioParseContext { uint8_t inbuf[MPA_MAX_CODED_FRAME_SIZE]; /* input buffer */ uint8_t *inbuf_ptr; @@ -726,15 +758,23 @@ static int mpegaudio_parse(AVCodecParserContext *s1, } return buf_ptr - buf; } +#endif /* CONFIG_MPEGAUDIO_PARSER */ +#if defined(CONFIG_AC3_PARSER) || defined(CONFIG_AAC_PARSER) +/* also used for ADTS AAC */ typedef struct AC3ParseContext { - uint8_t inbuf[4096]; /* input buffer */ uint8_t *inbuf_ptr; int frame_size; + int header_size; + int (*sync)(const uint8_t *buf, int *channels, int *sample_rate, + int *bit_rate, int *samples); + uint8_t inbuf[8192]; /* input buffer */ } AC3ParseContext; #define AC3_HEADER_SIZE 7 +#define AAC_HEADER_SIZE 7 +#ifdef CONFIG_AC3_PARSER static const int ac3_sample_rates[4] = { 48000, 44100, 32000, 0 }; @@ -789,9 +829,22 @@ static const int ac3_bitrates[64] = { static const int ac3_channels[8] = { 2, 1, 2, 3, 3, 4, 4, 5 }; +#endif /* CONFIG_AC3_PARSER */ + +#ifdef CONFIG_AAC_PARSER +static const int aac_sample_rates[16] = { + 96000, 88200, 64000, 48000, 44100, 32000, + 24000, 22050, 16000, 12000, 11025, 8000, 7350 +}; +static const int aac_channels[8] = { + 0, 1, 2, 3, 4, 5, 6, 8 +}; +#endif + +#ifdef CONFIG_AC3_PARSER static int ac3_sync(const uint8_t *buf, int *channels, int *sample_rate, - int *bit_rate) + int *bit_rate, int *samples) { unsigned int fscod, frmsizecod, acmod, bsid, lfeon; GetBitContext bits; @@ -801,7 +854,7 @@ static int ac3_sync(const uint8_t *buf, int *channels, int *sample_rate, if(get_bits(&bits, 16) != 0x0b77) return 0; - get_bits(&bits, 16); /* crc */ + skip_bits(&bits, 16); /* crc */ fscod = get_bits(&bits, 2); frmsizecod = get_bits(&bits, 6); @@ -811,30 +864,90 @@ static int ac3_sync(const uint8_t *buf, int *channels, int *sample_rate, bsid = get_bits(&bits, 5); if(bsid > 8) return 0; - get_bits(&bits, 3); /* bsmod */ + skip_bits(&bits, 3); /* bsmod */ acmod = get_bits(&bits, 3); if(acmod & 1 && acmod != 1) - get_bits(&bits, 2); /* cmixlev */ + skip_bits(&bits, 2); /* cmixlev */ if(acmod & 4) - get_bits(&bits, 2); /* surmixlev */ + skip_bits(&bits, 2); /* surmixlev */ if(acmod & 2) - get_bits(&bits, 2); /* dsurmod */ - lfeon = get_bits(&bits, 1); + skip_bits(&bits, 2); /* dsurmod */ + lfeon = get_bits1(&bits); *sample_rate = ac3_sample_rates[fscod]; *bit_rate = ac3_bitrates[frmsizecod] * 1000; *channels = ac3_channels[acmod] + lfeon; + *samples = 6 * 256; return ac3_frame_sizes[frmsizecod][fscod] * 2; } +#endif /* CONFIG_AC3_PARSER */ + +#ifdef CONFIG_AAC_PARSER +static int aac_sync(const uint8_t *buf, int *channels, int *sample_rate, + int *bit_rate, int *samples) +{ + GetBitContext bits; + int size, rdb, ch, sr; + + init_get_bits(&bits, buf, AAC_HEADER_SIZE * 8); + + if(get_bits(&bits, 12) != 0xfff) + return 0; + + skip_bits1(&bits); /* id */ + skip_bits(&bits, 2); /* layer */ + skip_bits1(&bits); /* protection_absent */ + skip_bits(&bits, 2); /* profile_objecttype */ + sr = get_bits(&bits, 4); /* sample_frequency_index */ + if(!aac_sample_rates[sr]) + return 0; + skip_bits1(&bits); /* private_bit */ + ch = get_bits(&bits, 3); /* channel_configuration */ + if(!aac_channels[ch]) + return 0; + skip_bits1(&bits); /* original/copy */ + skip_bits1(&bits); /* home */ + + /* adts_variable_header */ + skip_bits1(&bits); /* copyright_identification_bit */ + skip_bits1(&bits); /* copyright_identification_start */ + size = get_bits(&bits, 13); /* aac_frame_length */ + skip_bits(&bits, 11); /* adts_buffer_fullness */ + rdb = get_bits(&bits, 2); /* number_of_raw_data_blocks_in_frame */ + + *channels = aac_channels[ch]; + *sample_rate = aac_sample_rates[sr]; + *samples = (rdb + 1) * 1024; + *bit_rate = size * 8 * *sample_rate / *samples; + + return size; +} +#endif /* CONFIG_AAC_PARSER */ +#ifdef CONFIG_AC3_PARSER static int ac3_parse_init(AVCodecParserContext *s1) { AC3ParseContext *s = s1->priv_data; s->inbuf_ptr = s->inbuf; + s->header_size = AC3_HEADER_SIZE; + s->sync = ac3_sync; return 0; } +#endif +#ifdef CONFIG_AAC_PARSER +static int aac_parse_init(AVCodecParserContext *s1) +{ + AC3ParseContext *s = s1->priv_data; + s->inbuf_ptr = s->inbuf; + s->header_size = AAC_HEADER_SIZE; + s->sync = aac_sync; + return 0; +} +#endif + +/* also used for ADTS AAC */ static int ac3_parse(AVCodecParserContext *s1, AVCodecContext *avctx, uint8_t **poutbuf, int *poutbuf_size, @@ -842,7 +955,7 @@ static int ac3_parse(AVCodecParserContext *s1, { AC3ParseContext *s = s1->priv_data; const uint8_t *buf_ptr; - int len, sample_rate, bit_rate, channels; + int len, sample_rate, bit_rate, channels, samples; *poutbuf = NULL; *poutbuf_size = 0; @@ -851,29 +964,35 @@ static int ac3_parse(AVCodecParserContext *s1, while (buf_size > 0) { len = s->inbuf_ptr - s->inbuf; if (s->frame_size == 0) { - /* no header seen : find one. We need at least 7 bytes to parse it */ - len = FFMIN(AC3_HEADER_SIZE - len, buf_size); + /* no header seen : find one. We need at least s->header_size + bytes to parse it */ + len = FFMIN(s->header_size - len, buf_size); memcpy(s->inbuf_ptr, buf_ptr, len); buf_ptr += len; s->inbuf_ptr += len; buf_size -= len; - if ((s->inbuf_ptr - s->inbuf) == AC3_HEADER_SIZE) { - len = ac3_sync(s->inbuf, &channels, &sample_rate, &bit_rate); + if ((s->inbuf_ptr - s->inbuf) == s->header_size) { + len = s->sync(s->inbuf, &channels, &sample_rate, &bit_rate, + &samples); if (len == 0) { /* no sync found : move by one byte (inefficient, but simple!) */ - memmove(s->inbuf, s->inbuf + 1, AC3_HEADER_SIZE - 1); + memmove(s->inbuf, s->inbuf + 1, s->header_size - 1); s->inbuf_ptr--; } else { s->frame_size = len; /* update codec info */ avctx->sample_rate = sample_rate; /* set channels,except if the user explicitly requests 1 or 2 channels, XXX/FIXME this is a bit ugly */ - if(avctx->channels!=1 && avctx->channels!=2){ + if(avctx->codec_id == CODEC_ID_AC3){ + if(avctx->channels!=1 && avctx->channels!=2){ + avctx->channels = channels; + } + } else { avctx->channels = channels; } avctx->bit_rate = bit_rate; - avctx->frame_size = 6 * 256; + avctx->frame_size = samples; } } } else { @@ -895,7 +1014,9 @@ static int ac3_parse(AVCodecParserContext *s1, } return buf_ptr - buf; } +#endif /* CONFIG_AC3_PARSER || CONFIG_AAC_PARSER */ +#ifdef CONFIG_MPEGVIDEO_PARSER AVCodecParser mpegvideo_parser = { { CODEC_ID_MPEG1VIDEO, CODEC_ID_MPEG2VIDEO }, sizeof(ParseContext1), @@ -904,7 +1025,8 @@ AVCodecParser mpegvideo_parser = { parse1_close, mpegvideo_split, }; - +#endif +#ifdef CONFIG_MPEG4VIDEO_PARSER AVCodecParser mpeg4video_parser = { { CODEC_ID_MPEG4 }, sizeof(ParseContext1), @@ -913,7 +1035,18 @@ AVCodecParser mpeg4video_parser = { parse1_close, mpeg4video_split, }; - +#endif +#ifdef CONFIG_CAVSVIDEO_PARSER +AVCodecParser cavsvideo_parser = { + { CODEC_ID_CAVS }, + sizeof(ParseContext1), + NULL, + cavsvideo_parse, + parse1_close, + mpeg4video_split, +}; +#endif +#ifdef CONFIG_MPEGAUDIO_PARSER AVCodecParser mpegaudio_parser = { { CODEC_ID_MP2, CODEC_ID_MP3 }, sizeof(MpegAudioParseContext), @@ -921,7 +1054,8 @@ AVCodecParser mpegaudio_parser = { mpegaudio_parse, NULL, }; - +#endif +#ifdef CONFIG_AC3_PARSER AVCodecParser ac3_parser = { { CODEC_ID_AC3 }, sizeof(AC3ParseContext), @@ -929,3 +1063,13 @@ AVCodecParser ac3_parser = { ac3_parse, NULL, }; +#endif +#ifdef CONFIG_AAC_PARSER +AVCodecParser aac_parser = { + { CODEC_ID_AAC }, + sizeof(AC3ParseContext), + aac_parse_init, + ac3_parse, + NULL, +}; +#endif diff --git a/src/libffmpeg/libavcodec/ppc/dsputil_altivec.c b/src/libffmpeg/libavcodec/ppc/dsputil_altivec.c index 31464fb7a..81a32c9e3 100644 --- a/src/libffmpeg/libavcodec/ppc/dsputil_altivec.c +++ b/src/libffmpeg/libavcodec/ppc/dsputil_altivec.c @@ -1308,13 +1308,12 @@ POWERPC_PERF_STOP_COUNT(altivec_put_no_rnd_pixels16_xy2_num, 1); #endif /* ALTIVEC_USE_REFERENCE_C_CODE */ } -#ifdef CONFIG_DARWIN int hadamard8_diff8x8_altivec(/*MpegEncContext*/ void *s, uint8_t *dst, uint8_t *src, int stride, int h){ POWERPC_PERF_DECLARE(altivec_hadamard8_diff8x8_num, 1); int sum; -POWERPC_PERF_START_COUNT(altivec_hadamard8_diff8x8_num, 1); register const_vector unsigned char vzero = (const_vector unsigned char)vec_splat_u8(0); register vector signed short temp0, temp1, temp2, temp3, temp4, temp5, temp6, temp7; +POWERPC_PERF_START_COUNT(altivec_hadamard8_diff8x8_num, 1); { register const_vector signed short vprod1 = (const_vector signed short)AVV( 1,-1, 1,-1, 1,-1, 1,-1); register const_vector signed short vprod2 = (const_vector signed short)AVV( 1, 1,-1,-1, 1, 1,-1,-1); @@ -1339,6 +1338,8 @@ POWERPC_PERF_START_COUNT(altivec_hadamard8_diff8x8_num, 1); { \ register vector unsigned char src1, src2, srcO; \ register vector unsigned char dst1, dst2, dstO; \ + register vector signed short srcV, dstV; \ + register vector signed short but0, but1, but2, op1, op2, op3; \ src1 = vec_ld(stride * i, src); \ if ((((stride * i) + (unsigned long)src) & 0x0000000F) > 8) \ src2 = vec_ld((stride * i) + 16, src); \ @@ -1349,17 +1350,19 @@ POWERPC_PERF_START_COUNT(altivec_hadamard8_diff8x8_num, 1); dstO = vec_perm(dst1, dst2, vec_lvsl(stride * i, dst)); \ /* promote the unsigned chars to signed shorts */ \ /* we're in the 8x8 function, we only care for the first 8 */ \ - register vector signed short srcV = \ - (vector signed short)vec_mergeh((vector signed char)vzero, (vector signed char)srcO); \ - register vector signed short dstV = \ - (vector signed short)vec_mergeh((vector signed char)vzero, (vector signed char)dstO); \ + srcV = \ + (vector signed short)vec_mergeh((vector signed char)vzero, \ + (vector signed char)srcO); \ + dstV = \ + (vector signed short)vec_mergeh((vector signed char)vzero, \ + (vector signed char)dstO); \ /* substractions inside the first butterfly */ \ - register vector signed short but0 = vec_sub(srcV, dstV); \ - register vector signed short op1 = vec_perm(but0, but0, perm1); \ - register vector signed short but1 = vec_mladd(but0, vprod1, op1); \ - register vector signed short op2 = vec_perm(but1, but1, perm2); \ - register vector signed short but2 = vec_mladd(but1, vprod2, op2); \ - register vector signed short op3 = vec_perm(but2, but2, perm3); \ + but0 = vec_sub(srcV, dstV); \ + op1 = vec_perm(but0, but0, perm1); \ + but1 = vec_mladd(but0, vprod1, op1); \ + op2 = vec_perm(but1, but1, perm2); \ + but2 = vec_mladd(but1, vprod2, op2); \ + op3 = vec_perm(but2, but2, perm3); \ res = vec_mladd(but2, vprod3, op3); \ } ONEITERBUTTERFLY(0, temp0); @@ -1442,39 +1445,39 @@ POWERPC_PERF_STOP_COUNT(altivec_hadamard8_diff8x8_num, 1); static int hadamard8_diff16x8_altivec(/*MpegEncContext*/ void *s, uint8_t *dst, uint8_t *src, int stride, int h) { int sum; register vector signed short - temp0 asm ("v0"), - temp1 asm ("v1"), - temp2 asm ("v2"), - temp3 asm ("v3"), - temp4 asm ("v4"), - temp5 asm ("v5"), - temp6 asm ("v6"), - temp7 asm ("v7"); + temp0 REG_v(v0), + temp1 REG_v(v1), + temp2 REG_v(v2), + temp3 REG_v(v3), + temp4 REG_v(v4), + temp5 REG_v(v5), + temp6 REG_v(v6), + temp7 REG_v(v7); register vector signed short - temp0S asm ("v8"), - temp1S asm ("v9"), - temp2S asm ("v10"), - temp3S asm ("v11"), - temp4S asm ("v12"), - temp5S asm ("v13"), - temp6S asm ("v14"), - temp7S asm ("v15"); - register const_vector unsigned char vzero asm ("v31")= (const_vector unsigned char)vec_splat_u8(0); + temp0S REG_v(v8), + temp1S REG_v(v9), + temp2S REG_v(v10), + temp3S REG_v(v11), + temp4S REG_v(v12), + temp5S REG_v(v13), + temp6S REG_v(v14), + temp7S REG_v(v15); + register const_vector unsigned char vzero REG_v(v31)= (const_vector unsigned char)vec_splat_u8(0); { - register const_vector signed short vprod1 asm ("v16")= (const_vector signed short)AVV( 1,-1, 1,-1, 1,-1, 1,-1); - register const_vector signed short vprod2 asm ("v17")= (const_vector signed short)AVV( 1, 1,-1,-1, 1, 1,-1,-1); - register const_vector signed short vprod3 asm ("v18")= (const_vector signed short)AVV( 1, 1, 1, 1,-1,-1,-1,-1); - register const_vector unsigned char perm1 asm ("v19")= (const_vector unsigned char) + register const_vector signed short vprod1 REG_v(v16)= (const_vector signed short)AVV( 1,-1, 1,-1, 1,-1, 1,-1); + register const_vector signed short vprod2 REG_v(v17)= (const_vector signed short)AVV( 1, 1,-1,-1, 1, 1,-1,-1); + register const_vector signed short vprod3 REG_v(v18)= (const_vector signed short)AVV( 1, 1, 1, 1,-1,-1,-1,-1); + register const_vector unsigned char perm1 REG_v(v19)= (const_vector unsigned char) AVV(0x02, 0x03, 0x00, 0x01, 0x06, 0x07, 0x04, 0x05, 0x0A, 0x0B, 0x08, 0x09, 0x0E, 0x0F, 0x0C, 0x0D); - register const_vector unsigned char perm2 asm ("v20")= (const_vector unsigned char) + register const_vector unsigned char perm2 REG_v(v20)= (const_vector unsigned char) AVV(0x04, 0x05, 0x06, 0x07, 0x00, 0x01, 0x02, 0x03, 0x0C, 0x0D, 0x0E, 0x0F, 0x08, 0x09, 0x0A, 0x0B); - register const_vector unsigned char perm3 asm ("v21")= (const_vector unsigned char) + register const_vector unsigned char perm3 REG_v(v21)= (const_vector unsigned char) AVV(0x08, 0x09, 0x0A, 0x0B, 0x0C, 0x0D, 0x0E, 0x0F, 0x00, 0x01, 0x02, 0x03, @@ -1482,37 +1485,63 @@ static int hadamard8_diff16x8_altivec(/*MpegEncContext*/ void *s, uint8_t *dst, #define ONEITERBUTTERFLY(i, res1, res2) \ { \ - register vector unsigned char src1 asm ("v22"), src2 asm ("v23"); \ - register vector unsigned char dst1 asm ("v24"), dst2 asm ("v25"); \ + register vector unsigned char src1 REG_v(v22), \ + src2 REG_v(v23), \ + dst1 REG_v(v24), \ + dst2 REG_v(v25), \ + srcO REG_v(v22), \ + dstO REG_v(v23); \ + \ + register vector signed short srcV REG_v(v24), \ + dstV REG_v(v25), \ + srcW REG_v(v26), \ + dstW REG_v(v27), \ + but0 REG_v(v28), \ + but0S REG_v(v29), \ + op1 REG_v(v30), \ + but1 REG_v(v22), \ + op1S REG_v(v23), \ + but1S REG_v(v24), \ + op2 REG_v(v25), \ + but2 REG_v(v26), \ + op2S REG_v(v27), \ + but2S REG_v(v28), \ + op3 REG_v(v29), \ + op3S REG_v(v30); \ + \ src1 = vec_ld(stride * i, src); \ src2 = vec_ld((stride * i) + 16, src); \ - register vector unsigned char srcO asm ("v22") = vec_perm(src1, src2, vec_lvsl(stride * i, src)); \ + srcO = vec_perm(src1, src2, vec_lvsl(stride * i, src)); \ dst1 = vec_ld(stride * i, dst); \ dst2 = vec_ld((stride * i) + 16, dst); \ - register vector unsigned char dstO asm ("v23") = vec_perm(dst1, dst2, vec_lvsl(stride * i, dst)); \ + dstO = vec_perm(dst1, dst2, vec_lvsl(stride * i, dst)); \ /* promote the unsigned chars to signed shorts */ \ - register vector signed short srcV asm ("v24") = \ - (vector signed short)vec_mergeh((vector signed char)vzero, (vector signed char)srcO); \ - register vector signed short dstV asm ("v25") = \ - (vector signed short)vec_mergeh((vector signed char)vzero, (vector signed char)dstO); \ - register vector signed short srcW asm ("v26") = \ - (vector signed short)vec_mergel((vector signed char)vzero, (vector signed char)srcO); \ - register vector signed short dstW asm ("v27") = \ - (vector signed short)vec_mergel((vector signed char)vzero, (vector signed char)dstO); \ + srcV = \ + (vector signed short)vec_mergeh((vector signed char)vzero, \ + (vector signed char)srcO); \ + dstV = \ + (vector signed short)vec_mergeh((vector signed char)vzero, \ + (vector signed char)dstO); \ + srcW = \ + (vector signed short)vec_mergel((vector signed char)vzero, \ + (vector signed char)srcO); \ + dstW = \ + (vector signed short)vec_mergel((vector signed char)vzero, \ + (vector signed char)dstO); \ /* substractions inside the first butterfly */ \ - register vector signed short but0 asm ("v28") = vec_sub(srcV, dstV); \ - register vector signed short but0S asm ("v29") = vec_sub(srcW, dstW); \ - register vector signed short op1 asm ("v30") = vec_perm(but0, but0, perm1); \ - register vector signed short but1 asm ("v22") = vec_mladd(but0, vprod1, op1); \ - register vector signed short op1S asm ("v23") = vec_perm(but0S, but0S, perm1); \ - register vector signed short but1S asm ("v24") = vec_mladd(but0S, vprod1, op1S); \ - register vector signed short op2 asm ("v25") = vec_perm(but1, but1, perm2); \ - register vector signed short but2 asm ("v26") = vec_mladd(but1, vprod2, op2); \ - register vector signed short op2S asm ("v27") = vec_perm(but1S, but1S, perm2); \ - register vector signed short but2S asm ("v28") = vec_mladd(but1S, vprod2, op2S); \ - register vector signed short op3 asm ("v29") = vec_perm(but2, but2, perm3); \ + but0 = vec_sub(srcV, dstV); \ + but0S = vec_sub(srcW, dstW); \ + op1 = vec_perm(but0, but0, perm1); \ + but1 = vec_mladd(but0, vprod1, op1); \ + op1S = vec_perm(but0S, but0S, perm1); \ + but1S = vec_mladd(but0S, vprod1, op1S); \ + op2 = vec_perm(but1, but1, perm2); \ + but2 = vec_mladd(but1, vprod2, op2); \ + op2S = vec_perm(but1S, but1S, perm2); \ + but2S = vec_mladd(but1S, vprod2, op2S); \ + op3 = vec_perm(but2, but2, perm3); \ res1 = vec_mladd(but2, vprod3, op3); \ - register vector signed short op3S asm ("v30") = vec_perm(but2S, but2S, perm3); \ + op3S = vec_perm(but2S, but2S, perm3); \ res2 = vec_mladd(but2S, vprod3, op3S); \ } ONEITERBUTTERFLY(0, temp0, temp0S); @@ -1527,6 +1556,12 @@ static int hadamard8_diff16x8_altivec(/*MpegEncContext*/ void *s, uint8_t *dst, #undef ONEITERBUTTERFLY { register vector signed int vsum; + register vector signed short line0S, line1S, line2S, line3S, line4S, + line5S, line6S, line7S, line0BS,line2BS, + line1BS,line3BS,line4BS,line6BS,line5BS, + line7BS,line0CS,line4CS,line1CS,line5CS, + line2CS,line6CS,line3CS,line7CS; + register vector signed short line0 = vec_add(temp0, temp1); register vector signed short line1 = vec_sub(temp0, temp1); register vector signed short line2 = vec_add(temp2, temp3); @@ -1563,32 +1598,32 @@ static int hadamard8_diff16x8_altivec(/*MpegEncContext*/ void *s, uint8_t *dst, vsum = vec_sum4s(vec_abs(line6C), vsum); vsum = vec_sum4s(vec_abs(line7C), vsum); - register vector signed short line0S = vec_add(temp0S, temp1S); - register vector signed short line1S = vec_sub(temp0S, temp1S); - register vector signed short line2S = vec_add(temp2S, temp3S); - register vector signed short line3S = vec_sub(temp2S, temp3S); - register vector signed short line4S = vec_add(temp4S, temp5S); - register vector signed short line5S = vec_sub(temp4S, temp5S); - register vector signed short line6S = vec_add(temp6S, temp7S); - register vector signed short line7S = vec_sub(temp6S, temp7S); - - register vector signed short line0BS = vec_add(line0S, line2S); - register vector signed short line2BS = vec_sub(line0S, line2S); - register vector signed short line1BS = vec_add(line1S, line3S); - register vector signed short line3BS = vec_sub(line1S, line3S); - register vector signed short line4BS = vec_add(line4S, line6S); - register vector signed short line6BS = vec_sub(line4S, line6S); - register vector signed short line5BS = vec_add(line5S, line7S); - register vector signed short line7BS = vec_sub(line5S, line7S); - - register vector signed short line0CS = vec_add(line0BS, line4BS); - register vector signed short line4CS = vec_sub(line0BS, line4BS); - register vector signed short line1CS = vec_add(line1BS, line5BS); - register vector signed short line5CS = vec_sub(line1BS, line5BS); - register vector signed short line2CS = vec_add(line2BS, line6BS); - register vector signed short line6CS = vec_sub(line2BS, line6BS); - register vector signed short line3CS = vec_add(line3BS, line7BS); - register vector signed short line7CS = vec_sub(line3BS, line7BS); + line0S = vec_add(temp0S, temp1S); + line1S = vec_sub(temp0S, temp1S); + line2S = vec_add(temp2S, temp3S); + line3S = vec_sub(temp2S, temp3S); + line4S = vec_add(temp4S, temp5S); + line5S = vec_sub(temp4S, temp5S); + line6S = vec_add(temp6S, temp7S); + line7S = vec_sub(temp6S, temp7S); + + line0BS = vec_add(line0S, line2S); + line2BS = vec_sub(line0S, line2S); + line1BS = vec_add(line1S, line3S); + line3BS = vec_sub(line1S, line3S); + line4BS = vec_add(line4S, line6S); + line6BS = vec_sub(line4S, line6S); + line5BS = vec_add(line5S, line7S); + line7BS = vec_sub(line5S, line7S); + + line0CS = vec_add(line0BS, line4BS); + line4CS = vec_sub(line0BS, line4BS); + line1CS = vec_add(line1BS, line5BS); + line5CS = vec_sub(line1BS, line5BS); + line2CS = vec_add(line2BS, line6BS); + line6CS = vec_sub(line2BS, line6BS); + line3CS = vec_add(line3BS, line7BS); + line7CS = vec_sub(line3BS, line7BS); vsum = vec_sum4s(vec_abs(line0CS), vsum); vsum = vec_sum4s(vec_abs(line1CS), vsum); @@ -1618,7 +1653,6 @@ POWERPC_PERF_START_COUNT(altivec_hadamard8_diff16_num, 1); POWERPC_PERF_STOP_COUNT(altivec_hadamard8_diff16_num, 1); return score; } -#endif //CONFIG_DARWIN int has_altivec(void) { diff --git a/src/libffmpeg/libavcodec/ppc/dsputil_h264_altivec.c b/src/libffmpeg/libavcodec/ppc/dsputil_h264_altivec.c index b9fef005e..14391e60c 100755 --- a/src/libffmpeg/libavcodec/ppc/dsputil_h264_altivec.c +++ b/src/libffmpeg/libavcodec/ppc/dsputil_h264_altivec.c @@ -188,44 +188,97 @@ static void OPNAME ## h264_qpel ## SIZE ## _mc32_ ## CODETYPE(uint8_t *dst, uint OPNAME ## pixels ## SIZE ## _l2_ ## CODETYPE(dst, halfV, halfHV, stride, SIZE, SIZE);\ }\ +static inline void put_pixels16_l2_altivec( uint8_t * dst, const uint8_t * src1, + const uint8_t * src2, int dst_stride, + int src_stride1, int h) +{ + int i; + vector unsigned char a, b, d, tmp1, tmp2, mask, mask_, edges, align; -/* from dsputil.c */ -static inline void put_pixels8_l2(uint8_t * dst, const uint8_t * src1, const uint8_t * src2, int dst_stride, int src_stride1, int src_stride2, int h) { - int i; - for (i = 0; i < h; i++) { - uint32_t a, b; - a = (((const struct unaligned_32 *) (&src1[i * src_stride1]))->l); - b = (((const struct unaligned_32 *) (&src2[i * src_stride2]))->l); - *((uint32_t *) & dst[i * dst_stride]) = rnd_avg32(a, b); - a = (((const struct unaligned_32 *) (&src1[i * src_stride1 + 4]))->l); - b = (((const struct unaligned_32 *) (&src2[i * src_stride2 + 4]))->l); - *((uint32_t *) & dst[i * dst_stride + 4]) = rnd_avg32(a, b); - } -} static inline void avg_pixels8_l2(uint8_t * dst, const uint8_t * src1, const uint8_t * src2, int dst_stride, int src_stride1, int src_stride2, int h) { - int i; - for (i = 0; i < h; i++) { - uint32_t a, b; - a = (((const struct unaligned_32 *) (&src1[i * src_stride1]))->l); - b = (((const struct unaligned_32 *) (&src2[i * src_stride2]))->l); - *((uint32_t *) & dst[i * dst_stride]) = rnd_avg32(*((uint32_t *) & dst[i * dst_stride]), rnd_avg32(a, b)); - a = (((const struct unaligned_32 *) (&src1[i * src_stride1 + 4]))->l); - b = (((const struct unaligned_32 *) (&src2[i * src_stride2 + 4]))->l); - *((uint32_t *) & dst[i * dst_stride + 4]) = rnd_avg32(*((uint32_t *) & dst[i * dst_stride + 4]), rnd_avg32(a, b)); - } -} static inline void put_pixels16_l2(uint8_t * dst, const uint8_t * src1, const uint8_t * src2, int dst_stride, int src_stride1, int src_stride2, int h) { - put_pixels8_l2(dst, src1, src2, dst_stride, src_stride1, src_stride2, h); - put_pixels8_l2(dst + 8, src1 + 8, src2 + 8, dst_stride, src_stride1, src_stride2, h); -} static inline void avg_pixels16_l2(uint8_t * dst, const uint8_t * src1, const uint8_t * src2, int dst_stride, int src_stride1, int src_stride2, int h) { - avg_pixels8_l2(dst, src1, src2, dst_stride, src_stride1, src_stride2, h); - avg_pixels8_l2(dst + 8, src1 + 8, src2 + 8, dst_stride, src_stride1, src_stride2, h); + mask_ = vec_lvsl(0, src2); + + for (i = 0; i < h; i++) { + + tmp1 = vec_ld(i * src_stride1, src1); + mask = vec_lvsl(i * src_stride1, src1); + tmp2 = vec_ld(i * src_stride1 + 15, src1); + + a = vec_perm(tmp1, tmp2, mask); + + tmp1 = vec_ld(i * 16, src2); + tmp2 = vec_ld(i * 16 + 15, src2); + + b = vec_perm(tmp1, tmp2, mask_); + + tmp1 = vec_ld(0, dst); + mask = vec_lvsl(0, dst); + tmp2 = vec_ld(15, dst); + + d = vec_avg(a, b); + + edges = vec_perm(tmp2, tmp1, mask); + + align = vec_lvsr(0, dst); + + tmp1 = vec_perm(edges, d, align); + tmp2 = vec_perm(d, edges, align); + + vec_st(tmp2, 15, dst); + vec_st(tmp1, 0 , dst); + + dst += dst_stride; + } +} + +static inline void avg_pixels16_l2_altivec( uint8_t * dst, const uint8_t * src1, + const uint8_t * src2, int dst_stride, + int src_stride1, int h) +{ + int i; + vector unsigned char a, b, d, tmp1, tmp2, mask, mask_, edges, align; + + mask_ = vec_lvsl(0, src2); + + for (i = 0; i < h; i++) { + + tmp1 = vec_ld(i * src_stride1, src1); + mask = vec_lvsl(i * src_stride1, src1); + tmp2 = vec_ld(i * src_stride1 + 15, src1); + + a = vec_perm(tmp1, tmp2, mask); + + tmp1 = vec_ld(i * 16, src2); + tmp2 = vec_ld(i * 16 + 15, src2); + + b = vec_perm(tmp1, tmp2, mask_); + + tmp1 = vec_ld(0, dst); + mask = vec_lvsl(0, dst); + tmp2 = vec_ld(15, dst); + + d = vec_avg(vec_perm(tmp1, tmp2, mask), vec_avg(a, b)); + + edges = vec_perm(tmp2, tmp1, mask); + + align = vec_lvsr(0, dst); + + tmp1 = vec_perm(edges, d, align); + tmp2 = vec_perm(d, edges, align); + + vec_st(tmp2, 15, dst); + vec_st(tmp1, 0 , dst); + + dst += dst_stride; + } } -/* UNIMPLEMENTED YET !! */ +/* Implemented but could be faster #define put_pixels16_l2_altivec(d,s1,s2,ds,s1s,h) put_pixels16_l2(d,s1,s2,ds,s1s,16,h) #define avg_pixels16_l2_altivec(d,s1,s2,ds,s1s,h) avg_pixels16_l2(d,s1,s2,ds,s1s,16,h) + */ -H264_MC(put_, 16, altivec) - H264_MC(avg_, 16, altivec) + H264_MC(put_, 16, altivec) + H264_MC(avg_, 16, altivec) void dsputil_h264_init_ppc(DSPContext* c, AVCodecContext *avctx) { diff --git a/src/libffmpeg/libavcodec/ppc/dsputil_h264_template_altivec.c b/src/libffmpeg/libavcodec/ppc/dsputil_h264_template_altivec.c index 7f46ccf14..37f4de58f 100755 --- a/src/libffmpeg/libavcodec/ppc/dsputil_h264_template_altivec.c +++ b/src/libffmpeg/libavcodec/ppc/dsputil_h264_template_altivec.c @@ -19,70 +19,76 @@ /* this code assume that stride % 16 == 0 */ void PREFIX_h264_chroma_mc8_altivec(uint8_t * dst, uint8_t * src, int stride, int h, int x, int y) { POWERPC_PERF_DECLARE(PREFIX_h264_chroma_mc8_num, 1); - POWERPC_PERF_START_COUNT(PREFIX_h264_chroma_mc8_num, 1); - signed int ABCD[4] __attribute__((aligned(16))); + signed int ABCD[4] __attribute__((aligned(16))) = + {((8 - x) * (8 - y)), + ((x) * (8 - y)), + ((8 - x) * (y)), + ((x) * (y))}; register int i; - ABCD[0] = ((8 - x) * (8 - y)); - ABCD[1] = ((x) * (8 - y)); - ABCD[2] = ((8 - x) * (y)); - ABCD[3] = ((x) * (y)); + vector unsigned char fperm; const vector signed int vABCD = vec_ld(0, ABCD); const vector signed short vA = vec_splat((vector signed short)vABCD, 1); const vector signed short vB = vec_splat((vector signed short)vABCD, 3); const vector signed short vC = vec_splat((vector signed short)vABCD, 5); const vector signed short vD = vec_splat((vector signed short)vABCD, 7); const vector signed int vzero = vec_splat_s32(0); - const vector signed short v32ss = (const vector signed short)AVV(32); + const vector signed short v32ss = vec_sl(vec_splat_s16(1),vec_splat_u16(5)); const vector unsigned short v6us = vec_splat_u16(6); + register int loadSecond = (((unsigned long)src) % 16) <= 7 ? 0 : 1; + register int reallyBadAlign = (((unsigned long)src) % 16) == 15 ? 1 : 0; - vector unsigned char fperm; + vector unsigned char vsrcAuc, vsrcBuc, vsrcperm0, vsrcperm1; + vector unsigned char vsrc0uc, vsrc1uc; + vector signed short vsrc0ssH, vsrc1ssH; + vector unsigned char vsrcCuc, vsrc2uc, vsrc3uc; + vector signed short vsrc2ssH, vsrc3ssH, psum; + vector unsigned char vdst, ppsum, vfdst, fsum; + + POWERPC_PERF_START_COUNT(PREFIX_h264_chroma_mc8_num, 1); if (((unsigned long)dst) % 16 == 0) { - fperm = (vector unsigned char)AVV(0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17, - 0x08, 0x09, 0x0A, 0x0B, 0x0C, 0x0D, 0x0E, 0x0F); + fperm = (vector unsigned char)AVV(0x10, 0x11, 0x12, 0x13, + 0x14, 0x15, 0x16, 0x17, + 0x08, 0x09, 0x0A, 0x0B, + 0x0C, 0x0D, 0x0E, 0x0F); } else { - fperm = (vector unsigned char)AVV(0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, - 0x18, 0x19, 0x1A, 0x1B, 0x1C, 0x1D, 0x1E, 0x1F); + fperm = (vector unsigned char)AVV(0x00, 0x01, 0x02, 0x03, + 0x04, 0x05, 0x06, 0x07, + 0x18, 0x19, 0x1A, 0x1B, + 0x1C, 0x1D, 0x1E, 0x1F); } - register int loadSecond = (((unsigned long)src) % 16) <= 7 ? 0 : 1; - register int reallyBadAlign = (((unsigned long)src) % 16) == 15 ? 1 : 0; - - vector unsigned char vsrcAuc; - vector unsigned char vsrcBuc; - vector unsigned char vsrcperm0; - vector unsigned char vsrcperm1; vsrcAuc = vec_ld(0, src); + if (loadSecond) vsrcBuc = vec_ld(16, src); vsrcperm0 = vec_lvsl(0, src); vsrcperm1 = vec_lvsl(1, src); - vector unsigned char vsrc0uc; - vector unsigned char vsrc1uc; vsrc0uc = vec_perm(vsrcAuc, vsrcBuc, vsrcperm0); if (reallyBadAlign) vsrc1uc = vsrcBuc; else vsrc1uc = vec_perm(vsrcAuc, vsrcBuc, vsrcperm1); - vector signed short vsrc0ssH = (vector signed short)vec_mergeh((vector unsigned char)vzero, (vector unsigned char)vsrc0uc); - vector signed short vsrc1ssH = (vector signed short)vec_mergeh((vector unsigned char)vzero, (vector unsigned char)vsrc1uc); + vsrc0ssH = (vector signed short)vec_mergeh((vector unsigned char)vzero, + (vector unsigned char)vsrc0uc); + vsrc1ssH = (vector signed short)vec_mergeh((vector unsigned char)vzero, + (vector unsigned char)vsrc1uc); if (!loadSecond) {// -> !reallyBadAlign for (i = 0 ; i < h ; i++) { - vector unsigned char vsrcCuc; + + vsrcCuc = vec_ld(stride + 0, src); - vector unsigned char vsrc2uc; - vector unsigned char vsrc3uc; vsrc2uc = vec_perm(vsrcCuc, vsrcCuc, vsrcperm0); vsrc3uc = vec_perm(vsrcCuc, vsrcCuc, vsrcperm1); - vector signed short vsrc2ssH = (vector signed short)vec_mergeh((vector unsigned char)vzero, (vector unsigned char)vsrc2uc); - vector signed short vsrc3ssH = (vector signed short)vec_mergeh((vector unsigned char)vzero, (vector unsigned char)vsrc3uc); - - vector signed short psum; + vsrc2ssH = (vector signed short)vec_mergeh((vector unsigned char)vzero, + (vector unsigned char)vsrc2uc); + vsrc3ssH = (vector signed short)vec_mergeh((vector unsigned char)vzero, + (vector unsigned char)vsrc3uc); psum = vec_mladd(vA, vsrc0ssH, vec_splat_s16(0)); psum = vec_mladd(vB, vsrc1ssH, psum); @@ -91,11 +97,9 @@ void PREFIX_h264_chroma_mc8_altivec(uint8_t * dst, uint8_t * src, int stride, in psum = vec_add(v32ss, psum); psum = vec_sra(psum, v6us); - vector unsigned char vdst = vec_ld(0, dst); - vector unsigned char ppsum = (vector unsigned char)vec_packsu(psum, psum); - - vector unsigned char vfdst = vec_perm(vdst, ppsum, fperm); - vector unsigned char fsum; + vdst = vec_ld(0, dst); + ppsum = (vector unsigned char)vec_packsu(psum, psum); + vfdst = vec_perm(vdst, ppsum, fperm); OP_U8_ALTIVEC(fsum, vfdst, vdst); @@ -108,24 +112,21 @@ void PREFIX_h264_chroma_mc8_altivec(uint8_t * dst, uint8_t * src, int stride, in src += stride; } } else { - for (i = 0 ; i < h ; i++) { - vector unsigned char vsrcCuc; vector unsigned char vsrcDuc; + for (i = 0 ; i < h ; i++) { vsrcCuc = vec_ld(stride + 0, src); vsrcDuc = vec_ld(stride + 16, src); - vector unsigned char vsrc2uc; - vector unsigned char vsrc3uc; vsrc2uc = vec_perm(vsrcCuc, vsrcDuc, vsrcperm0); if (reallyBadAlign) vsrc3uc = vsrcDuc; else vsrc3uc = vec_perm(vsrcCuc, vsrcDuc, vsrcperm1); - vector signed short vsrc2ssH = (vector signed short)vec_mergeh((vector unsigned char)vzero, (vector unsigned char)vsrc2uc); - vector signed short vsrc3ssH = (vector signed short)vec_mergeh((vector unsigned char)vzero, (vector unsigned char)vsrc3uc); - - vector signed short psum; + vsrc2ssH = (vector signed short)vec_mergeh((vector unsigned char)vzero, + (vector unsigned char)vsrc2uc); + vsrc3ssH = (vector signed short)vec_mergeh((vector unsigned char)vzero, + (vector unsigned char)vsrc3uc); psum = vec_mladd(vA, vsrc0ssH, vec_splat_s16(0)); psum = vec_mladd(vB, vsrc1ssH, psum); @@ -134,11 +135,9 @@ void PREFIX_h264_chroma_mc8_altivec(uint8_t * dst, uint8_t * src, int stride, in psum = vec_add(v32ss, psum); psum = vec_sr(psum, v6us); - vector unsigned char vdst = vec_ld(0, dst); - vector unsigned char ppsum = (vector unsigned char)vec_pack(psum, psum); - - vector unsigned char vfdst = vec_perm(vdst, ppsum, fperm); - vector unsigned char fsum; + vdst = vec_ld(0, dst); + ppsum = (vector unsigned char)vec_pack(psum, psum); + vfdst = vec_perm(vdst, ppsum, fperm); OP_U8_ALTIVEC(fsum, vfdst, vdst); @@ -157,7 +156,6 @@ void PREFIX_h264_chroma_mc8_altivec(uint8_t * dst, uint8_t * src, int stride, in /* this code assume stride % 16 == 0 */ static void PREFIX_h264_qpel16_h_lowpass_altivec(uint8_t * dst, uint8_t * src, int dstStride, int srcStride) { POWERPC_PERF_DECLARE(PREFIX_h264_qpel16_h_lowpass_num, 1); - POWERPC_PERF_START_COUNT(PREFIX_h264_qpel16_h_lowpass_num, 1); register int i; const vector signed int vzero = vec_splat_s32(0); @@ -167,18 +165,35 @@ static void PREFIX_h264_qpel16_h_lowpass_altivec(uint8_t * dst, uint8_t * src, i const vector unsigned char permP1 = vec_lvsl(+1, src); const vector unsigned char permP2 = vec_lvsl(+2, src); const vector unsigned char permP3 = vec_lvsl(+3, src); - const vector signed short v20ss = (const vector signed short)AVV(20); - const vector unsigned short v5us = vec_splat_u16(5); const vector signed short v5ss = vec_splat_s16(5); - const vector signed short v16ss = (const vector signed short)AVV(16); + const vector unsigned short v5us = vec_splat_u16(5); + const vector signed short v20ss = vec_sl(vec_splat_s16(5),vec_splat_u16(2)); + const vector signed short v16ss = vec_sl(vec_splat_s16(1),vec_splat_u16(4)); const vector unsigned char dstperm = vec_lvsr(0, dst); - const vector unsigned char neg1 = (const vector unsigned char)vec_splat_s8(-1); - const vector unsigned char dstmask = vec_perm((const vector unsigned char)vzero, neg1, dstperm); + const vector unsigned char neg1 = + (const vector unsigned char) vec_splat_s8(-1); + + const vector unsigned char dstmask = + vec_perm((const vector unsigned char)vzero, + neg1, dstperm); + + vector unsigned char srcM2, srcM1, srcP0, srcP1, srcP2, srcP3; register int align = ((((unsigned long)src) - 2) % 16); + vector signed short srcP0A, srcP0B, srcP1A, srcP1B, + srcP2A, srcP2B, srcP3A, srcP3B, + srcM1A, srcM1B, srcM2A, srcM2B, + sum1A, sum1B, sum2A, sum2B, sum3A, sum3B, + pp1A, pp1B, pp2A, pp2B, pp3A, pp3B, + psumA, psumB, sumA, sumB; + + vector unsigned char sum, dst1, dst2, vdst, fsum, + rsum, fdst1, fdst2; + + POWERPC_PERF_START_COUNT(PREFIX_h264_qpel16_h_lowpass_num, 1); + for (i = 0 ; i < 16 ; i ++) { - vector unsigned char srcM2, srcM1, srcP0, srcP1, srcP2, srcP3; vector unsigned char srcR1 = vec_ld(-2, src); vector unsigned char srcR2 = vec_ld(14, src); @@ -237,55 +252,66 @@ static void PREFIX_h264_qpel16_h_lowpass_altivec(uint8_t * dst, uint8_t * src, i } break; } - const vector signed short srcP0A = (vector signed short)vec_mergeh((vector unsigned char)vzero, srcP0); - const vector signed short srcP0B = (vector signed short)vec_mergel((vector unsigned char)vzero, srcP0); - const vector signed short srcP1A = (vector signed short)vec_mergeh((vector unsigned char)vzero, srcP1); - const vector signed short srcP1B = (vector signed short)vec_mergel((vector unsigned char)vzero, srcP1); - - const vector signed short srcP2A = (vector signed short)vec_mergeh((vector unsigned char)vzero, srcP2); - const vector signed short srcP2B = (vector signed short)vec_mergel((vector unsigned char)vzero, srcP2); - const vector signed short srcP3A = (vector signed short)vec_mergeh((vector unsigned char)vzero, srcP3); - const vector signed short srcP3B = (vector signed short)vec_mergel((vector unsigned char)vzero, srcP3); - - const vector signed short srcM1A = (vector signed short)vec_mergeh((vector unsigned char)vzero, srcM1); - const vector signed short srcM1B = (vector signed short)vec_mergel((vector unsigned char)vzero, srcM1); - const vector signed short srcM2A = (vector signed short)vec_mergeh((vector unsigned char)vzero, srcM2); - const vector signed short srcM2B = (vector signed short)vec_mergel((vector unsigned char)vzero, srcM2); - - const vector signed short sum1A = vec_adds(srcP0A, srcP1A); - const vector signed short sum1B = vec_adds(srcP0B, srcP1B); - const vector signed short sum2A = vec_adds(srcM1A, srcP2A); - const vector signed short sum2B = vec_adds(srcM1B, srcP2B); - const vector signed short sum3A = vec_adds(srcM2A, srcP3A); - const vector signed short sum3B = vec_adds(srcM2B, srcP3B); - - const vector signed short pp1A = vec_mladd(sum1A, v20ss, v16ss); - const vector signed short pp1B = vec_mladd(sum1B, v20ss, v16ss); - - const vector signed short pp2A = vec_mladd(sum2A, v5ss, (vector signed short)vzero); - const vector signed short pp2B = vec_mladd(sum2B, v5ss, (vector signed short)vzero); - - const vector signed short pp3A = vec_add(sum3A, pp1A); - const vector signed short pp3B = vec_add(sum3B, pp1B); - - const vector signed short psumA = vec_sub(pp3A, pp2A); - const vector signed short psumB = vec_sub(pp3B, pp2B); + srcP0A = (vector signed short) + vec_mergeh((vector unsigned char)vzero, srcP0); + srcP0B = (vector signed short) + vec_mergel((vector unsigned char)vzero, srcP0); + srcP1A = (vector signed short) + vec_mergeh((vector unsigned char)vzero, srcP1); + srcP1B = (vector signed short) + vec_mergel((vector unsigned char)vzero, srcP1); + + srcP2A = (vector signed short) + vec_mergeh((vector unsigned char)vzero, srcP2); + srcP2B = (vector signed short) + vec_mergel((vector unsigned char)vzero, srcP2); + srcP3A = (vector signed short) + vec_mergeh((vector unsigned char)vzero, srcP3); + srcP3B = (vector signed short) + vec_mergel((vector unsigned char)vzero, srcP3); + + srcM1A = (vector signed short) + vec_mergeh((vector unsigned char)vzero, srcM1); + srcM1B = (vector signed short) + vec_mergel((vector unsigned char)vzero, srcM1); + srcM2A = (vector signed short) + vec_mergeh((vector unsigned char)vzero, srcM2); + srcM2B = (vector signed short) + vec_mergel((vector unsigned char)vzero, srcM2); + + sum1A = vec_adds(srcP0A, srcP1A); + sum1B = vec_adds(srcP0B, srcP1B); + sum2A = vec_adds(srcM1A, srcP2A); + sum2B = vec_adds(srcM1B, srcP2B); + sum3A = vec_adds(srcM2A, srcP3A); + sum3B = vec_adds(srcM2B, srcP3B); + + pp1A = vec_mladd(sum1A, v20ss, v16ss); + pp1B = vec_mladd(sum1B, v20ss, v16ss); + + pp2A = vec_mladd(sum2A, v5ss, (vector signed short)vzero); + pp2B = vec_mladd(sum2B, v5ss, (vector signed short)vzero); + + pp3A = vec_add(sum3A, pp1A); + pp3B = vec_add(sum3B, pp1B); + + psumA = vec_sub(pp3A, pp2A); + psumB = vec_sub(pp3B, pp2B); + + sumA = vec_sra(psumA, v5us); + sumB = vec_sra(psumB, v5us); + + sum = vec_packsu(sumA, sumB); + + dst1 = vec_ld(0, dst); + dst2 = vec_ld(16, dst); + vdst = vec_perm(dst1, dst2, vec_lvsl(0, dst)); - const vector signed short sumA = vec_sra(psumA, v5us); - const vector signed short sumB = vec_sra(psumB, v5us); - - const vector unsigned char sum = vec_packsu(sumA, sumB); - - const vector unsigned char dst1 = vec_ld(0, dst); - const vector unsigned char dst2 = vec_ld(16, dst); - const vector unsigned char vdst = vec_perm(dst1, dst2, vec_lvsl(0, dst)); - - vector unsigned char fsum; OP_U8_ALTIVEC(fsum, sum, vdst); - const vector unsigned char rsum = vec_perm(fsum, fsum, dstperm); - const vector unsigned char fdst1 = vec_sel(dst1, rsum, dstmask); - const vector unsigned char fdst2 = vec_sel(rsum, dst2, dstmask); + rsum = vec_perm(fsum, fsum, dstperm); + fdst1 = vec_sel(dst1, rsum, dstmask); + fdst2 = vec_sel(rsum, dst2, dstmask); vec_st(fdst1, 0, dst); vec_st(fdst2, 16, dst); @@ -299,16 +325,15 @@ POWERPC_PERF_STOP_COUNT(PREFIX_h264_qpel16_h_lowpass_num, 1); /* this code assume stride % 16 == 0 */ static void PREFIX_h264_qpel16_v_lowpass_altivec(uint8_t * dst, uint8_t * src, int dstStride, int srcStride) { POWERPC_PERF_DECLARE(PREFIX_h264_qpel16_v_lowpass_num, 1); - POWERPC_PERF_START_COUNT(PREFIX_h264_qpel16_v_lowpass_num, 1); register int i; const vector signed int vzero = vec_splat_s32(0); const vector unsigned char perm = vec_lvsl(0, src); - const vector signed short v20ss = (const vector signed short)AVV(20); + const vector signed short v20ss = vec_sl(vec_splat_s16(5),vec_splat_u16(2)); const vector unsigned short v5us = vec_splat_u16(5); const vector signed short v5ss = vec_splat_s16(5); - const vector signed short v16ss = (const vector signed short)AVV(16); + const vector signed short v16ss = vec_sl(vec_splat_s16(1),vec_splat_u16(4)); const vector unsigned char dstperm = vec_lvsr(0, dst); const vector unsigned char neg1 = (const vector unsigned char)vec_splat_s8(-1); const vector unsigned char dstmask = vec_perm((const vector unsigned char)vzero, neg1, dstperm); @@ -318,49 +343,71 @@ static void PREFIX_h264_qpel16_v_lowpass_altivec(uint8_t * dst, uint8_t * src, i const vector unsigned char srcM2a = vec_ld(0, srcbis); const vector unsigned char srcM2b = vec_ld(16, srcbis); const vector unsigned char srcM2 = vec_perm(srcM2a, srcM2b, perm); - srcbis += srcStride; - const vector unsigned char srcM1a = vec_ld(0, srcbis); +// srcbis += srcStride; + const vector unsigned char srcM1a = vec_ld(0, srcbis += srcStride); const vector unsigned char srcM1b = vec_ld(16, srcbis); const vector unsigned char srcM1 = vec_perm(srcM1a, srcM1b, perm); - srcbis += srcStride; - const vector unsigned char srcP0a = vec_ld(0, srcbis); +// srcbis += srcStride; + const vector unsigned char srcP0a = vec_ld(0, srcbis += srcStride); const vector unsigned char srcP0b = vec_ld(16, srcbis); const vector unsigned char srcP0 = vec_perm(srcP0a, srcP0b, perm); - srcbis += srcStride; - const vector unsigned char srcP1a = vec_ld(0, srcbis); +// srcbis += srcStride; + const vector unsigned char srcP1a = vec_ld(0, srcbis += srcStride); const vector unsigned char srcP1b = vec_ld(16, srcbis); const vector unsigned char srcP1 = vec_perm(srcP1a, srcP1b, perm); - srcbis += srcStride; - const vector unsigned char srcP2a = vec_ld(0, srcbis); +// srcbis += srcStride; + const vector unsigned char srcP2a = vec_ld(0, srcbis += srcStride); const vector unsigned char srcP2b = vec_ld(16, srcbis); const vector unsigned char srcP2 = vec_perm(srcP2a, srcP2b, perm); - srcbis += srcStride; - - vector signed short srcM2ssA = (vector signed short)vec_mergeh((vector unsigned char)vzero, srcM2); - vector signed short srcM2ssB = (vector signed short)vec_mergel((vector unsigned char)vzero, srcM2); - vector signed short srcM1ssA = (vector signed short)vec_mergeh((vector unsigned char)vzero, srcM1); - vector signed short srcM1ssB = (vector signed short)vec_mergel((vector unsigned char)vzero, srcM1); - vector signed short srcP0ssA = (vector signed short)vec_mergeh((vector unsigned char)vzero, srcP0); - vector signed short srcP0ssB = (vector signed short)vec_mergel((vector unsigned char)vzero, srcP0); - vector signed short srcP1ssA = (vector signed short)vec_mergeh((vector unsigned char)vzero, srcP1); - vector signed short srcP1ssB = (vector signed short)vec_mergel((vector unsigned char)vzero, srcP1); - vector signed short srcP2ssA = (vector signed short)vec_mergeh((vector unsigned char)vzero, srcP2); - vector signed short srcP2ssB = (vector signed short)vec_mergel((vector unsigned char)vzero, srcP2); +// srcbis += srcStride; + + vector signed short srcM2ssA = (vector signed short) + vec_mergeh((vector unsigned char)vzero, srcM2); + vector signed short srcM2ssB = (vector signed short) + vec_mergel((vector unsigned char)vzero, srcM2); + vector signed short srcM1ssA = (vector signed short) + vec_mergeh((vector unsigned char)vzero, srcM1); + vector signed short srcM1ssB = (vector signed short) + vec_mergel((vector unsigned char)vzero, srcM1); + vector signed short srcP0ssA = (vector signed short) + vec_mergeh((vector unsigned char)vzero, srcP0); + vector signed short srcP0ssB = (vector signed short) + vec_mergel((vector unsigned char)vzero, srcP0); + vector signed short srcP1ssA = (vector signed short) + vec_mergeh((vector unsigned char)vzero, srcP1); + vector signed short srcP1ssB = (vector signed short) + vec_mergel((vector unsigned char)vzero, srcP1); + vector signed short srcP2ssA = (vector signed short) + vec_mergeh((vector unsigned char)vzero, srcP2); + vector signed short srcP2ssB = (vector signed short) + vec_mergel((vector unsigned char)vzero, srcP2); + + vector signed short pp1A, pp1B, pp2A, pp2B, pp3A, pp3B, + psumA, psumB, sumA, sumB, + srcP3ssA, srcP3ssB, + sum1A, sum1B, sum2A, sum2B, sum3A, sum3B; + + vector unsigned char sum, dst1, dst2, vdst, fsum, rsum, fdst1, fdst2, + srcP3a, srcP3b, srcP3; + + POWERPC_PERF_START_COUNT(PREFIX_h264_qpel16_v_lowpass_num, 1); for (i = 0 ; i < 16 ; i++) { - const vector unsigned char srcP3a = vec_ld(0, srcbis); - const vector unsigned char srcP3b = vec_ld(16, srcbis); - const vector unsigned char srcP3 = vec_perm(srcP3a, srcP3b, perm); - const vector signed short srcP3ssA = (vector signed short)vec_mergeh((vector unsigned char)vzero, srcP3); - const vector signed short srcP3ssB = (vector signed short)vec_mergel((vector unsigned char)vzero, srcP3); - srcbis += srcStride; - - const vector signed short sum1A = vec_adds(srcP0ssA, srcP1ssA); - const vector signed short sum1B = vec_adds(srcP0ssB, srcP1ssB); - const vector signed short sum2A = vec_adds(srcM1ssA, srcP2ssA); - const vector signed short sum2B = vec_adds(srcM1ssB, srcP2ssB); - const vector signed short sum3A = vec_adds(srcM2ssA, srcP3ssA); - const vector signed short sum3B = vec_adds(srcM2ssB, srcP3ssB); + srcP3a = vec_ld(0, srcbis += srcStride); + srcP3b = vec_ld(16, srcbis); + srcP3 = vec_perm(srcP3a, srcP3b, perm); + srcP3ssA = (vector signed short) + vec_mergeh((vector unsigned char)vzero, srcP3); + srcP3ssB = (vector signed short) + vec_mergel((vector unsigned char)vzero, srcP3); +// srcbis += srcStride; + + sum1A = vec_adds(srcP0ssA, srcP1ssA); + sum1B = vec_adds(srcP0ssB, srcP1ssB); + sum2A = vec_adds(srcM1ssA, srcP2ssA); + sum2B = vec_adds(srcM1ssB, srcP2ssB); + sum3A = vec_adds(srcM2ssA, srcP3ssA); + sum3B = vec_adds(srcM2ssB, srcP3ssB); srcM2ssA = srcM1ssA; srcM2ssB = srcM1ssB; @@ -373,33 +420,32 @@ static void PREFIX_h264_qpel16_v_lowpass_altivec(uint8_t * dst, uint8_t * src, i srcP2ssA = srcP3ssA; srcP2ssB = srcP3ssB; - const vector signed short pp1A = vec_mladd(sum1A, v20ss, v16ss); - const vector signed short pp1B = vec_mladd(sum1B, v20ss, v16ss); + pp1A = vec_mladd(sum1A, v20ss, v16ss); + pp1B = vec_mladd(sum1B, v20ss, v16ss); - const vector signed short pp2A = vec_mladd(sum2A, v5ss, (vector signed short)vzero); - const vector signed short pp2B = vec_mladd(sum2B, v5ss, (vector signed short)vzero); + pp2A = vec_mladd(sum2A, v5ss, (vector signed short)vzero); + pp2B = vec_mladd(sum2B, v5ss, (vector signed short)vzero); - const vector signed short pp3A = vec_add(sum3A, pp1A); - const vector signed short pp3B = vec_add(sum3B, pp1B); + pp3A = vec_add(sum3A, pp1A); + pp3B = vec_add(sum3B, pp1B); - const vector signed short psumA = vec_sub(pp3A, pp2A); - const vector signed short psumB = vec_sub(pp3B, pp2B); + psumA = vec_sub(pp3A, pp2A); + psumB = vec_sub(pp3B, pp2B); - const vector signed short sumA = vec_sra(psumA, v5us); - const vector signed short sumB = vec_sra(psumB, v5us); + sumA = vec_sra(psumA, v5us); + sumB = vec_sra(psumB, v5us); - const vector unsigned char sum = vec_packsu(sumA, sumB); + sum = vec_packsu(sumA, sumB); - const vector unsigned char dst1 = vec_ld(0, dst); - const vector unsigned char dst2 = vec_ld(16, dst); - const vector unsigned char vdst = vec_perm(dst1, dst2, vec_lvsl(0, dst)); + dst1 = vec_ld(0, dst); + dst2 = vec_ld(16, dst); + vdst = vec_perm(dst1, dst2, vec_lvsl(0, dst)); - vector unsigned char fsum; OP_U8_ALTIVEC(fsum, sum, vdst); - const vector unsigned char rsum = vec_perm(fsum, fsum, dstperm); - const vector unsigned char fdst1 = vec_sel(dst1, rsum, dstmask); - const vector unsigned char fdst2 = vec_sel(rsum, dst2, dstmask); + rsum = vec_perm(fsum, fsum, dstperm); + fdst1 = vec_sel(dst1, rsum, dstmask); + fdst2 = vec_sel(rsum, dst2, dstmask); vec_st(fdst1, 0, dst); vec_st(fdst2, 16, dst); @@ -412,7 +458,6 @@ static void PREFIX_h264_qpel16_v_lowpass_altivec(uint8_t * dst, uint8_t * src, i /* this code assume stride % 16 == 0 *and* tmp is properly aligned */ static void PREFIX_h264_qpel16_hv_lowpass_altivec(uint8_t * dst, int16_t * tmp, uint8_t * src, int dstStride, int tmpStride, int srcStride) { POWERPC_PERF_DECLARE(PREFIX_h264_qpel16_hv_lowpass_num, 1); - POWERPC_PERF_START_COUNT(PREFIX_h264_qpel16_hv_lowpass_num, 1); register int i; const vector signed int vzero = vec_splat_s32(0); const vector unsigned char permM2 = vec_lvsl(-2, src); @@ -421,17 +466,47 @@ static void PREFIX_h264_qpel16_hv_lowpass_altivec(uint8_t * dst, int16_t * tmp, const vector unsigned char permP1 = vec_lvsl(+1, src); const vector unsigned char permP2 = vec_lvsl(+2, src); const vector unsigned char permP3 = vec_lvsl(+3, src); - const vector signed short v20ss = (const vector signed short)AVV(20); + const vector signed short v20ss = vec_sl(vec_splat_s16(5),vec_splat_u16(2)); const vector unsigned int v10ui = vec_splat_u32(10); const vector signed short v5ss = vec_splat_s16(5); const vector signed short v1ss = vec_splat_s16(1); - const vector signed int v512si = (const vector signed int)AVV(512); - const vector unsigned int v16ui = (const vector unsigned int)AVV(16); + const vector signed int v512si = vec_sl(vec_splat_s32(1),vec_splat_u32(9)); + const vector unsigned int v16ui = vec_sl(vec_splat_u32(1),vec_splat_u32(4)); register int align = ((((unsigned long)src) - 2) % 16); - src -= (2 * srcStride); + const vector unsigned char neg1 = (const vector unsigned char) + vec_splat_s8(-1); + + vector signed short srcP0A, srcP0B, srcP1A, srcP1B, + srcP2A, srcP2B, srcP3A, srcP3B, + srcM1A, srcM1B, srcM2A, srcM2B, + sum1A, sum1B, sum2A, sum2B, sum3A, sum3B, + pp1A, pp1B, pp2A, pp2B, psumA, psumB; + + const vector unsigned char dstperm = vec_lvsr(0, dst); + + const vector unsigned char dstmask = vec_perm((const vector unsigned char)vzero, neg1, dstperm); + + const vector unsigned char mperm = (const vector unsigned char) + AVV(0x00, 0x08, 0x01, 0x09, 0x02, 0x0A, 0x03, 0x0B, + 0x04, 0x0C, 0x05, 0x0D, 0x06, 0x0E, 0x07, 0x0F); + int16_t *tmpbis = tmp; + + vector signed short tmpM1ssA, tmpM1ssB, tmpM2ssA, tmpM2ssB, + tmpP0ssA, tmpP0ssB, tmpP1ssA, tmpP1ssB, + tmpP2ssA, tmpP2ssB; + + vector signed int pp1Ae, pp1Ao, pp1Be, pp1Bo, pp2Ae, pp2Ao, pp2Be, pp2Bo, + pp3Ae, pp3Ao, pp3Be, pp3Bo, pp1cAe, pp1cAo, pp1cBe, pp1cBo, + pp32Ae, pp32Ao, pp32Be, pp32Bo, sumAe, sumAo, sumBe, sumBo, + ssumAe, ssumAo, ssumBe, ssumBo; + vector unsigned char fsum, sumv, sum, dst1, dst2, vdst, + rsum, fdst1, fdst2; + vector signed short ssume, ssumo; + POWERPC_PERF_START_COUNT(PREFIX_h264_qpel16_hv_lowpass_num, 1); + src -= (2 * srcStride); for (i = 0 ; i < 21 ; i ++) { vector unsigned char srcM2, srcM1, srcP0, srcP1, srcP2, srcP3; vector unsigned char srcR1 = vec_ld(-2, src); @@ -492,36 +567,48 @@ static void PREFIX_h264_qpel16_hv_lowpass_altivec(uint8_t * dst, int16_t * tmp, } break; } - const vector signed short srcP0A = (vector signed short)vec_mergeh((vector unsigned char)vzero, srcP0); - const vector signed short srcP0B = (vector signed short)vec_mergel((vector unsigned char)vzero, srcP0); - const vector signed short srcP1A = (vector signed short)vec_mergeh((vector unsigned char)vzero, srcP1); - const vector signed short srcP1B = (vector signed short)vec_mergel((vector unsigned char)vzero, srcP1); - - const vector signed short srcP2A = (vector signed short)vec_mergeh((vector unsigned char)vzero, srcP2); - const vector signed short srcP2B = (vector signed short)vec_mergel((vector unsigned char)vzero, srcP2); - const vector signed short srcP3A = (vector signed short)vec_mergeh((vector unsigned char)vzero, srcP3); - const vector signed short srcP3B = (vector signed short)vec_mergel((vector unsigned char)vzero, srcP3); - - const vector signed short srcM1A = (vector signed short)vec_mergeh((vector unsigned char)vzero, srcM1); - const vector signed short srcM1B = (vector signed short)vec_mergel((vector unsigned char)vzero, srcM1); - const vector signed short srcM2A = (vector signed short)vec_mergeh((vector unsigned char)vzero, srcM2); - const vector signed short srcM2B = (vector signed short)vec_mergel((vector unsigned char)vzero, srcM2); - - const vector signed short sum1A = vec_adds(srcP0A, srcP1A); - const vector signed short sum1B = vec_adds(srcP0B, srcP1B); - const vector signed short sum2A = vec_adds(srcM1A, srcP2A); - const vector signed short sum2B = vec_adds(srcM1B, srcP2B); - const vector signed short sum3A = vec_adds(srcM2A, srcP3A); - const vector signed short sum3B = vec_adds(srcM2B, srcP3B); - - const vector signed short pp1A = vec_mladd(sum1A, v20ss, sum3A); - const vector signed short pp1B = vec_mladd(sum1B, v20ss, sum3B); - - const vector signed short pp2A = vec_mladd(sum2A, v5ss, (vector signed short)vzero); - const vector signed short pp2B = vec_mladd(sum2B, v5ss, (vector signed short)vzero); - - const vector signed short psumA = vec_sub(pp1A, pp2A); - const vector signed short psumB = vec_sub(pp1B, pp2B); + srcP0A = (vector signed short) + vec_mergeh((vector unsigned char)vzero, srcP0); + srcP0B = (vector signed short) + vec_mergel((vector unsigned char)vzero, srcP0); + srcP1A = (vector signed short) + vec_mergeh((vector unsigned char)vzero, srcP1); + srcP1B = (vector signed short) + vec_mergel((vector unsigned char)vzero, srcP1); + + srcP2A = (vector signed short) + vec_mergeh((vector unsigned char)vzero, srcP2); + srcP2B = (vector signed short) + vec_mergel((vector unsigned char)vzero, srcP2); + srcP3A = (vector signed short) + vec_mergeh((vector unsigned char)vzero, srcP3); + srcP3B = (vector signed short) + vec_mergel((vector unsigned char)vzero, srcP3); + + srcM1A = (vector signed short) + vec_mergeh((vector unsigned char)vzero, srcM1); + srcM1B = (vector signed short) + vec_mergel((vector unsigned char)vzero, srcM1); + srcM2A = (vector signed short) + vec_mergeh((vector unsigned char)vzero, srcM2); + srcM2B = (vector signed short) + vec_mergel((vector unsigned char)vzero, srcM2); + + sum1A = vec_adds(srcP0A, srcP1A); + sum1B = vec_adds(srcP0B, srcP1B); + sum2A = vec_adds(srcM1A, srcP2A); + sum2B = vec_adds(srcM1B, srcP2B); + sum3A = vec_adds(srcM2A, srcP3A); + sum3B = vec_adds(srcM2B, srcP3B); + + pp1A = vec_mladd(sum1A, v20ss, sum3A); + pp1B = vec_mladd(sum1B, v20ss, sum3B); + + pp2A = vec_mladd(sum2A, v5ss, (vector signed short)vzero); + pp2B = vec_mladd(sum2B, v5ss, (vector signed short)vzero); + + psumA = vec_sub(pp1A, pp2A); + psumB = vec_sub(pp1B, pp2B); vec_st(psumA, 0, tmp); vec_st(psumB, 16, tmp); @@ -530,35 +617,25 @@ static void PREFIX_h264_qpel16_hv_lowpass_altivec(uint8_t * dst, int16_t * tmp, tmp += tmpStride; /* int16_t*, and stride is 16, so it's OK here */ } - const vector unsigned char dstperm = vec_lvsr(0, dst); - const vector unsigned char neg1 = (const vector unsigned char)vec_splat_s8(-1); - const vector unsigned char dstmask = vec_perm((const vector unsigned char)vzero, neg1, dstperm); - const vector unsigned char mperm = (const vector unsigned char) - AVV(0x00, 0x08, 0x01, 0x09, 0x02, 0x0A, 0x03, 0x0B, - 0x04, 0x0C, 0x05, 0x0D, 0x06, 0x0E, 0x07, 0x0F); - - int16_t *tmpbis = tmp - (tmpStride * 21); - - vector signed short tmpM2ssA = vec_ld(0, tmpbis); - vector signed short tmpM2ssB = vec_ld(16, tmpbis); + tmpM2ssA = vec_ld(0, tmpbis); + tmpM2ssB = vec_ld(16, tmpbis); tmpbis += tmpStride; - vector signed short tmpM1ssA = vec_ld(0, tmpbis); - vector signed short tmpM1ssB = vec_ld(16, tmpbis); + tmpM1ssA = vec_ld(0, tmpbis); + tmpM1ssB = vec_ld(16, tmpbis); tmpbis += tmpStride; - vector signed short tmpP0ssA = vec_ld(0, tmpbis); - vector signed short tmpP0ssB = vec_ld(16, tmpbis); + tmpP0ssA = vec_ld(0, tmpbis); + tmpP0ssB = vec_ld(16, tmpbis); tmpbis += tmpStride; - vector signed short tmpP1ssA = vec_ld(0, tmpbis); - vector signed short tmpP1ssB = vec_ld(16, tmpbis); + tmpP1ssA = vec_ld(0, tmpbis); + tmpP1ssB = vec_ld(16, tmpbis); tmpbis += tmpStride; - vector signed short tmpP2ssA = vec_ld(0, tmpbis); - vector signed short tmpP2ssB = vec_ld(16, tmpbis); + tmpP2ssA = vec_ld(0, tmpbis); + tmpP2ssB = vec_ld(16, tmpbis); tmpbis += tmpStride; for (i = 0 ; i < 16 ; i++) { const vector signed short tmpP3ssA = vec_ld(0, tmpbis); const vector signed short tmpP3ssB = vec_ld(16, tmpbis); - tmpbis += tmpStride; const vector signed short sum1A = vec_adds(tmpP0ssA, tmpP1ssA); const vector signed short sum1B = vec_adds(tmpP0ssB, tmpP1ssB); @@ -567,6 +644,8 @@ static void PREFIX_h264_qpel16_hv_lowpass_altivec(uint8_t * dst, int16_t * tmp, const vector signed short sum3A = vec_adds(tmpM2ssA, tmpP3ssA); const vector signed short sum3B = vec_adds(tmpM2ssB, tmpP3ssB); + tmpbis += tmpStride; + tmpM2ssA = tmpM1ssA; tmpM2ssB = tmpM1ssB; tmpM1ssA = tmpP0ssA; @@ -578,57 +657,56 @@ static void PREFIX_h264_qpel16_hv_lowpass_altivec(uint8_t * dst, int16_t * tmp, tmpP2ssA = tmpP3ssA; tmpP2ssB = tmpP3ssB; - const vector signed int pp1Ae = vec_mule(sum1A, v20ss); - const vector signed int pp1Ao = vec_mulo(sum1A, v20ss); - const vector signed int pp1Be = vec_mule(sum1B, v20ss); - const vector signed int pp1Bo = vec_mulo(sum1B, v20ss); + pp1Ae = vec_mule(sum1A, v20ss); + pp1Ao = vec_mulo(sum1A, v20ss); + pp1Be = vec_mule(sum1B, v20ss); + pp1Bo = vec_mulo(sum1B, v20ss); - const vector signed int pp2Ae = vec_mule(sum2A, v5ss); - const vector signed int pp2Ao = vec_mulo(sum2A, v5ss); - const vector signed int pp2Be = vec_mule(sum2B, v5ss); - const vector signed int pp2Bo = vec_mulo(sum2B, v5ss); + pp2Ae = vec_mule(sum2A, v5ss); + pp2Ao = vec_mulo(sum2A, v5ss); + pp2Be = vec_mule(sum2B, v5ss); + pp2Bo = vec_mulo(sum2B, v5ss); - const vector signed int pp3Ae = vec_sra((vector signed int)sum3A, v16ui); - const vector signed int pp3Ao = vec_mulo(sum3A, v1ss); - const vector signed int pp3Be = vec_sra((vector signed int)sum3B, v16ui); - const vector signed int pp3Bo = vec_mulo(sum3B, v1ss); + pp3Ae = vec_sra((vector signed int)sum3A, v16ui); + pp3Ao = vec_mulo(sum3A, v1ss); + pp3Be = vec_sra((vector signed int)sum3B, v16ui); + pp3Bo = vec_mulo(sum3B, v1ss); - const vector signed int pp1cAe = vec_add(pp1Ae, v512si); - const vector signed int pp1cAo = vec_add(pp1Ao, v512si); - const vector signed int pp1cBe = vec_add(pp1Be, v512si); - const vector signed int pp1cBo = vec_add(pp1Bo, v512si); + pp1cAe = vec_add(pp1Ae, v512si); + pp1cAo = vec_add(pp1Ao, v512si); + pp1cBe = vec_add(pp1Be, v512si); + pp1cBo = vec_add(pp1Bo, v512si); - const vector signed int pp32Ae = vec_sub(pp3Ae, pp2Ae); - const vector signed int pp32Ao = vec_sub(pp3Ao, pp2Ao); - const vector signed int pp32Be = vec_sub(pp3Be, pp2Be); - const vector signed int pp32Bo = vec_sub(pp3Bo, pp2Bo); + pp32Ae = vec_sub(pp3Ae, pp2Ae); + pp32Ao = vec_sub(pp3Ao, pp2Ao); + pp32Be = vec_sub(pp3Be, pp2Be); + pp32Bo = vec_sub(pp3Bo, pp2Bo); - const vector signed int sumAe = vec_add(pp1cAe, pp32Ae); - const vector signed int sumAo = vec_add(pp1cAo, pp32Ao); - const vector signed int sumBe = vec_add(pp1cBe, pp32Be); - const vector signed int sumBo = vec_add(pp1cBo, pp32Bo); + sumAe = vec_add(pp1cAe, pp32Ae); + sumAo = vec_add(pp1cAo, pp32Ao); + sumBe = vec_add(pp1cBe, pp32Be); + sumBo = vec_add(pp1cBo, pp32Bo); - const vector signed int ssumAe = vec_sra(sumAe, v10ui); - const vector signed int ssumAo = vec_sra(sumAo, v10ui); - const vector signed int ssumBe = vec_sra(sumBe, v10ui); - const vector signed int ssumBo = vec_sra(sumBo, v10ui); + ssumAe = vec_sra(sumAe, v10ui); + ssumAo = vec_sra(sumAo, v10ui); + ssumBe = vec_sra(sumBe, v10ui); + ssumBo = vec_sra(sumBo, v10ui); - const vector signed short ssume = vec_packs(ssumAe, ssumBe); - const vector signed short ssumo = vec_packs(ssumAo, ssumBo); + ssume = vec_packs(ssumAe, ssumBe); + ssumo = vec_packs(ssumAo, ssumBo); - const vector unsigned char sumv = vec_packsu(ssume, ssumo); - const vector unsigned char sum = vec_perm(sumv, sumv, mperm); + sumv = vec_packsu(ssume, ssumo); + sum = vec_perm(sumv, sumv, mperm); - const vector unsigned char dst1 = vec_ld(0, dst); - const vector unsigned char dst2 = vec_ld(16, dst); - const vector unsigned char vdst = vec_perm(dst1, dst2, vec_lvsl(0, dst)); + dst1 = vec_ld(0, dst); + dst2 = vec_ld(16, dst); + vdst = vec_perm(dst1, dst2, vec_lvsl(0, dst)); - vector unsigned char fsum; OP_U8_ALTIVEC(fsum, sum, vdst); - const vector unsigned char rsum = vec_perm(fsum, fsum, dstperm); - const vector unsigned char fdst1 = vec_sel(dst1, rsum, dstmask); - const vector unsigned char fdst2 = vec_sel(rsum, dst2, dstmask); + rsum = vec_perm(fsum, fsum, dstperm); + fdst1 = vec_sel(dst1, rsum, dstmask); + fdst2 = vec_sel(rsum, dst2, dstmask); vec_st(fdst1, 0, dst); vec_st(fdst2, 16, dst); diff --git a/src/libffmpeg/libavcodec/ppc/dsputil_ppc.c b/src/libffmpeg/libavcodec/ppc/dsputil_ppc.c index d5f55b80f..b63c8dd84 100644 --- a/src/libffmpeg/libavcodec/ppc/dsputil_ppc.c +++ b/src/libffmpeg/libavcodec/ppc/dsputil_ppc.c @@ -30,6 +30,17 @@ extern void fdct_altivec(int16_t *block); extern void idct_put_altivec(uint8_t *dest, int line_size, int16_t *block); extern void idct_add_altivec(uint8_t *dest, int line_size, int16_t *block); +extern void ff_snow_horizontal_compose97i_altivec(DWTELEM *b, int width); +extern void ff_snow_vertical_compose97i_altivec(DWTELEM *b0, DWTELEM *b1, + DWTELEM *b2, DWTELEM *b3, + DWTELEM *b4, DWTELEM *b5, + int width); +extern void ff_snow_inner_add_yblock_altivec(uint8_t *obmc, const int obmc_stride, + uint8_t * * block, int b_w, int b_h, + int src_x, int src_y, int src_stride, + slice_buffer * sb, int add, + uint8_t * dst8); + int mm_flags = 0; int mm_support(void) @@ -292,10 +303,13 @@ void dsputil_init_ppc(DSPContext* c, AVCodecContext *avctx) c->gmc1 = gmc1_altivec; -#ifdef CONFIG_DARWIN // ATM gcc-3.3 and gcc-3.4 fail to compile these in linux... c->hadamard8_diff[0] = hadamard8_diff16_altivec; c->hadamard8_diff[1] = hadamard8_diff8x8_altivec; -#endif + + + c->horizontal_compose97i = ff_snow_horizontal_compose97i_altivec; + c->vertical_compose97i = ff_snow_vertical_compose97i_altivec; + c->inner_add_yblock = ff_snow_inner_add_yblock_altivec; #ifdef CONFIG_ENCODERS if (avctx->dct_algo == FF_DCT_AUTO || diff --git a/src/libffmpeg/libavcodec/ppc/gcc_fixes.h b/src/libffmpeg/libavcodec/ppc/gcc_fixes.h index 288fdf834..943905bc5 100644 --- a/src/libffmpeg/libavcodec/ppc/gcc_fixes.h +++ b/src/libffmpeg/libavcodec/ppc/gcc_fixes.h @@ -17,8 +17,17 @@ # else # define AVV # endif +#define REG_v(a) asm ( #a ) #else + #define AVV(x...) {x} + +#if (__GNUC__ < 4) +# define REG_v(a) +#else +# define REG_v(a) asm ( #a ) +#endif + #if (__GNUC__ * 100 + __GNUC_MINOR__ < 303) /* This code was provided to me by Bartosch Pixa diff --git a/src/libffmpeg/libavcodec/qdm2.c b/src/libffmpeg/libavcodec/qdm2.c index 98bec5cca..81d548386 100644 --- a/src/libffmpeg/libavcodec/qdm2.c +++ b/src/libffmpeg/libavcodec/qdm2.c @@ -538,7 +538,7 @@ static void fix_coding_method_array (int sb, int channels, sb_int8_array coding_ run = 1; case_val = 8; } else { - switch (switchtable[coding_method[ch][sb][j]]) { + switch (switchtable[coding_method[ch][sb][j]-8]) { case 0: run = 10; case_val = 10; break; case 1: run = 1; case_val = 16; break; case 2: run = 5; case_val = 24; break; @@ -1470,13 +1470,13 @@ static void qdm2_decode_fft_packets (QDM2Context *q) if (duration >= 0 && duration < 4) qdm2_fft_decode_tones(q, duration, &gb, unknown_flag); } else if (type == 31) { - for (i=0; i < 4; i++) - qdm2_fft_decode_tones(q, i, &gb, unknown_flag); + for (j=0; j < 4; j++) + qdm2_fft_decode_tones(q, j, &gb, unknown_flag); } else if (type == 46) { - for (i=0; i < 6; i++) - q->fft_level_exp[i] = get_bits(&gb, 6); - for (i=0; i < 4; i++) - qdm2_fft_decode_tones(q, i, &gb, unknown_flag); + for (j=0; j < 6; j++) + q->fft_level_exp[j] = get_bits(&gb, 6); + for (j=0; j < 4; j++) + qdm2_fft_decode_tones(q, j, &gb, unknown_flag); } } // Loop on B packets @@ -2008,8 +2008,10 @@ static int qdm2_decode_frame(AVCodecContext *avctx, { QDM2Context *s = avctx->priv_data; - if((buf == NULL) || (buf_size < s->checksum_size)) + if(!buf) return 0; + if(buf_size < s->checksum_size) + return -1; *data_size = s->channels * s->frame_size * sizeof(int16_t); diff --git a/src/libffmpeg/libavcodec/qdm2data.h b/src/libffmpeg/libavcodec/qdm2data.h index f41a2078b..dafd4f490 100644 --- a/src/libffmpeg/libavcodec/qdm2data.h +++ b/src/libffmpeg/libavcodec/qdm2data.h @@ -32,18 +32,18 @@ /** VLC TABLES **/ /* values in this table range from -1..23; adjust retrieved value by -1 */ -static uint16_t vlc_tab_level_huffcodes[24] = { +static const uint16_t vlc_tab_level_huffcodes[24] = { 0x037c, 0x0004, 0x003c, 0x004c, 0x003a, 0x002c, 0x001c, 0x001a, 0x0024, 0x0014, 0x0001, 0x0002, 0x0000, 0x0003, 0x0007, 0x0005, 0x0006, 0x0008, 0x0009, 0x000a, 0x000c, 0x00fc, 0x007c, 0x017c }; -static uint8_t vlc_tab_level_huffbits[24] = { +static const uint8_t vlc_tab_level_huffbits[24] = { 10, 6, 7, 7, 6, 6, 6, 6, 6, 5, 4, 4, 4, 3, 3, 3, 3, 4, 4, 5, 7, 8, 9, 10 }; /* values in this table range from -1..36; adjust retrieved value by -1 */ -static uint16_t vlc_tab_diff_huffcodes[37] = { +static const uint16_t vlc_tab_diff_huffcodes[37] = { 0x1c57, 0x0004, 0x0000, 0x0001, 0x0003, 0x0002, 0x000f, 0x000e, 0x0007, 0x0016, 0x0037, 0x0027, 0x0026, 0x0066, 0x0006, 0x0097, 0x0046, 0x01c6, 0x0017, 0x0786, 0x0086, 0x0257, 0x00d7, 0x0357, @@ -51,111 +51,111 @@ static uint16_t vlc_tab_diff_huffcodes[37] = { 0x0b86, 0x0000, 0x1457, 0x0000, 0x0457 }; -static uint8_t vlc_tab_diff_huffbits[37] = { +static const uint8_t vlc_tab_diff_huffbits[37] = { 13, 3, 3, 2, 3, 3, 4, 4, 6, 5, 6, 6, 7, 7, 8, 8, 8, 9, 8, 11, 9, 10, 8, 10, 9, 12, 10, 0, 10, 13, 11, 0, 12, 0, 13, 0, 13 }; /* values in this table range from -1..5; adjust retrieved value by -1 */ -static uint8_t vlc_tab_run_huffcodes[6] = { +static const uint8_t vlc_tab_run_huffcodes[6] = { 0x1f, 0x00, 0x01, 0x03, 0x07, 0x0f }; -static uint8_t vlc_tab_run_huffbits[6] = { +static const uint8_t vlc_tab_run_huffbits[6] = { 5, 1, 2, 3, 4, 5 }; /* values in this table range from -1..19; adjust retrieved value by -1 */ -static uint16_t vlc_tab_tone_level_idx_hi1_huffcodes[20] = { +static const uint16_t vlc_tab_tone_level_idx_hi1_huffcodes[20] = { 0x5714, 0x000c, 0x0002, 0x0001, 0x0000, 0x0004, 0x0034, 0x0054, 0x0094, 0x0014, 0x0114, 0x0214, 0x0314, 0x0614, 0x0e14, 0x0f14, 0x2714, 0x0714, 0x1714, 0x3714 }; -static uint8_t vlc_tab_tone_level_idx_hi1_huffbits[20] = { +static const uint8_t vlc_tab_tone_level_idx_hi1_huffbits[20] = { 15, 4, 2, 1, 3, 5, 6, 7, 8, 10, 10, 11, 11, 12, 12, 12, 14, 14, 15, 14 }; /* values in this table range from -1..23; adjust retrieved value by -1 */ -static uint16_t vlc_tab_tone_level_idx_mid_huffcodes[24] = { +static const uint16_t vlc_tab_tone_level_idx_mid_huffcodes[24] = { 0x0fea, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x03ea, 0x00ea, 0x002a, 0x001a, 0x0006, 0x0001, 0x0000, 0x0002, 0x000a, 0x006a, 0x01ea, 0x07ea }; -static uint8_t vlc_tab_tone_level_idx_mid_huffbits[24] = { +static const uint8_t vlc_tab_tone_level_idx_mid_huffbits[24] = { 12, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 11, 9, 7, 5, 3, 1, 2, 4, 6, 8, 10, 12 }; /* values in this table range from -1..23; adjust retrieved value by -1 */ -static uint16_t vlc_tab_tone_level_idx_hi2_huffcodes[24] = { +static const uint16_t vlc_tab_tone_level_idx_hi2_huffcodes[24] = { 0x0664, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0064, 0x00e4, 0x00a4, 0x0068, 0x0004, 0x0008, 0x0014, 0x0018, 0x0000, 0x0001, 0x0002, 0x0003, 0x000c, 0x0028, 0x0024, 0x0164, 0x0000, 0x0264 }; -static uint8_t vlc_tab_tone_level_idx_hi2_huffbits[24] = { +static const uint8_t vlc_tab_tone_level_idx_hi2_huffbits[24] = { 11, 0, 0, 0, 0, 0, 10, 8, 8, 7, 6, 6, 5, 5, 4, 2, 2, 2, 4, 7, 8, 9, 0, 11 }; /* values in this table range from -1..8; adjust retrieved value by -1 */ -static uint8_t vlc_tab_type30_huffcodes[9] = { +static const uint8_t vlc_tab_type30_huffcodes[9] = { 0x3c, 0x06, 0x00, 0x01, 0x03, 0x02, 0x04, 0x0c, 0x1c }; -static uint8_t vlc_tab_type30_huffbits[9] = { +static const uint8_t vlc_tab_type30_huffbits[9] = { 6, 3, 3, 2, 2, 3, 4, 5, 6 }; /* values in this table range from -1..9; adjust retrieved value by -1 */ -static uint8_t vlc_tab_type34_huffcodes[10] = { +static const uint8_t vlc_tab_type34_huffcodes[10] = { 0x18, 0x00, 0x01, 0x04, 0x05, 0x07, 0x03, 0x02, 0x06, 0x08 }; -static uint8_t vlc_tab_type34_huffbits[10] = { +static const uint8_t vlc_tab_type34_huffbits[10] = { 5, 4, 3, 3, 3, 3, 3, 3, 3, 5 }; /* values in this table range from -1..22; adjust retrieved value by -1 */ -static uint16_t vlc_tab_fft_tone_offset_0_huffcodes[23] = { +static const uint16_t vlc_tab_fft_tone_offset_0_huffcodes[23] = { 0x038e, 0x0001, 0x0000, 0x0022, 0x000a, 0x0006, 0x0012, 0x0002, 0x001e, 0x003e, 0x0056, 0x0016, 0x000e, 0x0032, 0x0072, 0x0042, 0x008e, 0x004e, 0x00f2, 0x002e, 0x0036, 0x00c2, 0x018e }; -static uint8_t vlc_tab_fft_tone_offset_0_huffbits[23] = { +static const uint8_t vlc_tab_fft_tone_offset_0_huffbits[23] = { 10, 1, 2, 6, 4, 5, 6, 7, 6, 6, 7, 7, 8, 7, 8, 8, 9, 7, 8, 6, 6, 8, 10 }; /* values in this table range from -1..27; adjust retrieved value by -1 */ -static uint16_t vlc_tab_fft_tone_offset_1_huffcodes[28] = { +static const uint16_t vlc_tab_fft_tone_offset_1_huffcodes[28] = { 0x07a4, 0x0001, 0x0020, 0x0012, 0x001c, 0x0008, 0x0006, 0x0010, 0x0000, 0x0014, 0x0004, 0x0032, 0x0070, 0x000c, 0x0002, 0x003a, 0x001a, 0x002c, 0x002a, 0x0022, 0x0024, 0x000a, 0x0064, 0x0030, 0x0062, 0x00a4, 0x01a4, 0x03a4 }; -static uint8_t vlc_tab_fft_tone_offset_1_huffbits[28] = { +static const uint8_t vlc_tab_fft_tone_offset_1_huffbits[28] = { 11, 1, 6, 6, 5, 4, 3, 6, 6, 5, 6, 6, 7, 6, 6, 6, 6, 6, 6, 7, 8, 6, 7, 7, 7, 9, 10, 11 }; /* values in this table range from -1..31; adjust retrieved value by -1 */ -static uint16_t vlc_tab_fft_tone_offset_2_huffcodes[32] = { +static const uint16_t vlc_tab_fft_tone_offset_2_huffcodes[32] = { 0x1760, 0x0001, 0x0000, 0x0082, 0x000c, 0x0006, 0x0003, 0x0007, 0x0008, 0x0004, 0x0010, 0x0012, 0x0022, 0x001a, 0x0000, 0x0020, 0x000a, 0x0040, 0x004a, 0x006a, 0x002a, 0x0042, 0x0002, 0x0060, 0x00aa, 0x00e0, 0x00c2, 0x01c2, 0x0160, 0x0360, 0x0760, 0x0f60 }; -static uint8_t vlc_tab_fft_tone_offset_2_huffbits[32] = { +static const uint8_t vlc_tab_fft_tone_offset_2_huffbits[32] = { 13, 2, 0, 8, 4, 3, 3, 3, 4, 4, 5, 5, 6, 5, 7, 7, 7, 7, 7, 7, 8, 8, 8, 9, 8, 8, 9, 9, 10, 11, 13, 12 }; /* values in this table range from -1..34; adjust retrieved value by -1 */ -static uint16_t vlc_tab_fft_tone_offset_3_huffcodes[35] = { +static const uint16_t vlc_tab_fft_tone_offset_3_huffcodes[35] = { 0x33ea, 0x0005, 0x0000, 0x000c, 0x0000, 0x0006, 0x0003, 0x0008, 0x0002, 0x0001, 0x0004, 0x0007, 0x001a, 0x000f, 0x001c, 0x002c, 0x000a, 0x001d, 0x002d, 0x002a, 0x000d, 0x004c, 0x008c, 0x006a, @@ -163,14 +163,14 @@ static uint16_t vlc_tab_fft_tone_offset_3_huffcodes[35] = { 0x0bea, 0x03ea, 0x13ea }; -static uint8_t vlc_tab_fft_tone_offset_3_huffbits[35] = { +static const uint8_t vlc_tab_fft_tone_offset_3_huffbits[35] = { 14, 4, 0, 10, 4, 3, 3, 4, 4, 3, 4, 4, 5, 4, 5, 6, 6, 5, 6, 7, 7, 7, 8, 8, 8, 8, 9, 10, 10, 10, 10, 11, 12, 13, 14 }; /* values in this table range from -1..37; adjust retrieved value by -1 */ -static uint16_t vlc_tab_fft_tone_offset_4_huffcodes[38] = { +static const uint16_t vlc_tab_fft_tone_offset_4_huffcodes[38] = { 0x5282, 0x0016, 0x0000, 0x0136, 0x0004, 0x0000, 0x0007, 0x000a, 0x000e, 0x0003, 0x0001, 0x000d, 0x0006, 0x0009, 0x0012, 0x0005, 0x0025, 0x0022, 0x0015, 0x0002, 0x0076, 0x0035, 0x0042, 0x00c2, @@ -178,7 +178,7 @@ static uint16_t vlc_tab_fft_tone_offset_4_huffcodes[38] = { 0x0a82, 0x0082, 0x0282, 0x1282, 0x3282, 0x2282 }; -static uint8_t vlc_tab_fft_tone_offset_4_huffbits[38] = { +static const uint8_t vlc_tab_fft_tone_offset_4_huffbits[38] = { 15, 6, 0, 9, 3, 3, 3, 4, 4, 3, 4, 4, 5, 4, 5, 6, 6, 6, 6, 8, 7, 6, 8, 9, 9, 8, 9, 10, 11, 10, 11, 12, 12, 12, 14, 15, 14, 14 @@ -187,44 +187,44 @@ static uint8_t vlc_tab_fft_tone_offset_4_huffbits[38] = { /** FFT TABLES **/ /* values in this table range from -1..27; adjust retrieved value by -1 */ -static uint16_t fft_level_exp_alt_huffcodes[28] = { +static const uint16_t fft_level_exp_alt_huffcodes[28] = { 0x1ec6, 0x0006, 0x00c2, 0x0142, 0x0242, 0x0246, 0x00c6, 0x0046, 0x0042, 0x0146, 0x00a2, 0x0062, 0x0026, 0x0016, 0x000e, 0x0005, 0x0004, 0x0003, 0x0000, 0x0001, 0x000a, 0x0012, 0x0002, 0x0022, 0x01c6, 0x02c6, 0x06c6, 0x0ec6 }; -static uint8_t fft_level_exp_alt_huffbits[28] = { +static const uint8_t fft_level_exp_alt_huffbits[28] = { 13, 7, 8, 9, 10, 10, 10, 10, 10, 9, 8, 7, 6, 5, 4, 3, 3, 2, 3, 3, 4, 5, 7, 8, 9, 11, 12, 13 }; /* values in this table range from -1..19; adjust retrieved value by -1 */ -static uint16_t fft_level_exp_huffcodes[20] = { +static const uint16_t fft_level_exp_huffcodes[20] = { 0x0f24, 0x0001, 0x0002, 0x0000, 0x0006, 0x0005, 0x0007, 0x000c, 0x000b, 0x0014, 0x0013, 0x0004, 0x0003, 0x0023, 0x0064, 0x00a4, 0x0024, 0x0124, 0x0324, 0x0724 }; -static uint8_t fft_level_exp_huffbits[20] = { +static const uint8_t fft_level_exp_huffbits[20] = { 12, 3, 3, 3, 3, 3, 3, 4, 4, 5, 5, 6, 6, 6, 7, 8, 9, 10, 11, 12 }; /* values in this table range from -1..6; adjust retrieved value by -1 */ -static uint8_t fft_stereo_exp_huffcodes[7] = { +static const uint8_t fft_stereo_exp_huffcodes[7] = { 0x3e, 0x01, 0x00, 0x02, 0x06, 0x0e, 0x1e }; -static uint8_t fft_stereo_exp_huffbits[7] = { +static const uint8_t fft_stereo_exp_huffbits[7] = { 6, 1, 2, 3, 4, 5, 6 }; /* values in this table range from -1..8; adjust retrieved value by -1 */ -static uint8_t fft_stereo_phase_huffcodes[9] = { +static const uint8_t fft_stereo_phase_huffcodes[9] = { 0x35, 0x02, 0x00, 0x01, 0x0d, 0x15, 0x05, 0x09, 0x03 }; -static uint8_t fft_stereo_phase_huffbits[9] = { +static const uint8_t fft_stereo_phase_huffbits[9] = { 6, 2, 2, 4, 4, 6, 5, 4, 2 }; diff --git a/src/libffmpeg/libavcodec/rangecoder.c b/src/libffmpeg/libavcodec/rangecoder.c index 8607b8f6d..4266cf1b3 100644 --- a/src/libffmpeg/libavcodec/rangecoder.c +++ b/src/libffmpeg/libavcodec/rangecoder.c @@ -110,7 +110,7 @@ void ff_build_rac_states(RangeCoder *c, int factor, int max_p){ c->one_state[ i]= p8; } - for(i=0; i<256; i++) + for(i=1; i<255; i++) c->zero_state[i]= 256-c->one_state[256-i]; #if 0 for(i=0; i<256; i++) diff --git a/src/libffmpeg/libavcodec/ratecontrol.c b/src/libffmpeg/libavcodec/ratecontrol.c index 29dc1f495..f4f433add 100644 --- a/src/libffmpeg/libavcodec/ratecontrol.c +++ b/src/libffmpeg/libavcodec/ratecontrol.c @@ -117,13 +117,18 @@ int ff_rate_control_init(MpegEncContext *s) p= next; } -#ifdef CONFIG_XVID + + if(init_pass2(s) < 0) return -1; + //FIXME maybe move to end - if((s->flags&CODEC_FLAG_PASS2) && s->avctx->rc_strategy == FF_RC_STRATEGY_XVID) + if((s->flags&CODEC_FLAG_PASS2) && s->avctx->rc_strategy == FF_RC_STRATEGY_XVID) { +#ifdef CONFIG_XVID return ff_xvid_rate_control_init(s); +#else + av_log(s->avctx, AV_LOG_ERROR, "XviD ratecontrol requires libavcodec compiled with XviD support\n"); + return -1; #endif - - if(init_pass2(s) < 0) return -1; + } } if(!(s->flags&CODEC_FLAG_PASS2)){ @@ -906,7 +911,7 @@ static int init_pass2(MpegEncContext *s) av_free(qscale); av_free(blured_qscale); - if(abs(expected_bits/all_available_bits - 1.0) > 0.01 ){ + if(fabs(expected_bits/all_available_bits - 1.0) > 0.01 ){ av_log(s->avctx, AV_LOG_ERROR, "Error: 2pass curve failed to converge\n"); return -1; } diff --git a/src/libffmpeg/libavcodec/raw.c b/src/libffmpeg/libavcodec/raw.c index 28c3cad54..e777397fe 100644 --- a/src/libffmpeg/libavcodec/raw.c +++ b/src/libffmpeg/libavcodec/raw.c @@ -26,7 +26,6 @@ typedef struct RawVideoContext { unsigned char * buffer; /* block of memory for holding one frame */ - unsigned char * p; /* current position in buffer */ int length; /* number of bytes in buffer */ AVFrame pic; ///< AVCodecContext.coded_frame } RawVideoContext; @@ -51,6 +50,9 @@ const PixelFormatTag pixelFormatTags[] = { { PIX_FMT_UYVY422, MKTAG('U', 'Y', 'V', 'Y') }, { PIX_FMT_GRAY8, MKTAG('G', 'R', 'E', 'Y') }, + /* quicktime */ + { PIX_FMT_UYVY422, MKTAG('2', 'v', 'u', 'y') }, + { -1, 0 }, }; @@ -86,6 +88,7 @@ static int raw_init_decoder(AVCodecContext *avctx) avctx->pix_fmt = findPixelFormat(avctx->codec_tag); else if (avctx->bits_per_sample){ switch(avctx->bits_per_sample){ + case 8: avctx->pix_fmt= PIX_FMT_PAL8 ; break; case 15: avctx->pix_fmt= PIX_FMT_RGB555; break; case 16: avctx->pix_fmt= PIX_FMT_RGB565; break; case 24: avctx->pix_fmt= PIX_FMT_BGR24 ; break; @@ -95,7 +98,6 @@ static int raw_init_decoder(AVCodecContext *avctx) context->length = avpicture_get_size(avctx->pix_fmt, avctx->width, avctx->height); context->buffer = av_malloc(context->length); - context->p = context->buffer; context->pic.pict_type = FF_I_TYPE; context->pic.key_frame = 1; @@ -108,7 +110,7 @@ static int raw_init_decoder(AVCodecContext *avctx) } static void flip(AVCodecContext *avctx, AVPicture * picture){ - if(!avctx->codec_tag && avctx->bits_per_sample && picture->linesize[1]==0){ + if(!avctx->codec_tag && avctx->bits_per_sample && picture->linesize[2]==0){ picture->data[0] += picture->linesize[0] * (avctx->height-1); picture->linesize[0] *= -1; } @@ -119,7 +121,6 @@ static int raw_decode(AVCodecContext *avctx, uint8_t *buf, int buf_size) { RawVideoContext *context = avctx->priv_data; - int bytesNeeded; AVFrame * frame = (AVFrame *) data; AVPicture * picture = (AVPicture *) data; @@ -127,27 +128,21 @@ static int raw_decode(AVCodecContext *avctx, frame->interlaced_frame = avctx->coded_frame->interlaced_frame; frame->top_field_first = avctx->coded_frame->top_field_first; - /* Early out without copy if packet size == frame size */ - if (buf_size == context->length && context->p == context->buffer) { - avpicture_fill(picture, buf, avctx->pix_fmt, avctx->width, avctx->height); - flip(avctx, picture); - *data_size = sizeof(AVPicture); - return buf_size; - } + if(buf_size < context->length - (avctx->pix_fmt==PIX_FMT_PAL8 ? 256*4 : 0)) + return -1; - bytesNeeded = context->length - (context->p - context->buffer); - if (buf_size < bytesNeeded) { - memcpy(context->p, buf, buf_size); - context->p += buf_size; - return buf_size; + avpicture_fill(picture, buf, avctx->pix_fmt, avctx->width, avctx->height); + if(avctx->pix_fmt==PIX_FMT_PAL8 && buf_size < context->length){ + frame->data[1]= context->buffer; + } + if (avctx->palctrl && avctx->palctrl->palette_changed) { + memcpy(frame->data[1], avctx->palctrl->palette, AVPALETTE_SIZE); + avctx->palctrl->palette_changed = 0; } - memcpy(context->p, buf, bytesNeeded); - context->p = context->buffer; - avpicture_fill(picture, context->buffer, avctx->pix_fmt, avctx->width, avctx->height); flip(avctx, picture); *data_size = sizeof(AVPicture); - return bytesNeeded; + return buf_size; } static int raw_close_decoder(AVCodecContext *avctx) diff --git a/src/libffmpeg/libavcodec/resample2.c b/src/libffmpeg/libavcodec/resample2.c index 735f612d1..11da57651 100644 --- a/src/libffmpeg/libavcodec/resample2.c +++ b/src/libffmpeg/libavcodec/resample2.c @@ -62,7 +62,7 @@ typedef struct AVResampleContext{ /** * 0th order modified bessel function of the first kind. */ -double bessel(double x){ +static double bessel(double x){ double v=1; double t=1; int i; diff --git a/src/libffmpeg/libavcodec/rv10.c b/src/libffmpeg/libavcodec/rv10.c index 5dd942dc5..daec2b85b 100644 --- a/src/libffmpeg/libavcodec/rv10.c +++ b/src/libffmpeg/libavcodec/rv10.c @@ -321,7 +321,7 @@ static int rv10_decode_picture_header(MpegEncContext *s) pb_frame = get_bits(&s->gb, 1); #ifdef DEBUG - printf("pict_type=%d pb_frame=%d\n", s->pict_type, pb_frame); + av_log(s->avctx, AV_LOG_DEBUG, "pict_type=%d pb_frame=%d\n", s->pict_type, pb_frame); #endif if (pb_frame){ @@ -342,7 +342,7 @@ static int rv10_decode_picture_header(MpegEncContext *s) s->last_dc[1] = get_bits(&s->gb, 8); s->last_dc[2] = get_bits(&s->gb, 8); #ifdef DEBUG - printf("DC:%d %d %d\n", + av_log(s->avctx, AV_LOG_DEBUG, "DC:%d %d %d\n", s->last_dc[0], s->last_dc[1], s->last_dc[2]); @@ -631,7 +631,7 @@ static int rv10_decode_packet(AVCodecContext *avctx, } #ifdef DEBUG - printf("qscale=%d\n", s->qscale); + av_log(avctx, AV_LOG_DEBUG, "qscale=%d\n", s->qscale); #endif /* default quantization values */ @@ -639,9 +639,9 @@ static int rv10_decode_packet(AVCodecContext *avctx, if(s->mb_y==0) s->first_slice_line=1; }else{ s->first_slice_line=1; - s->resync_mb_x= s->mb_x; - s->resync_mb_y= s->mb_y; } + s->resync_mb_x= s->mb_x; + s->resync_mb_y= s->mb_y; if(s->h263_aic){ s->y_dc_scale_table= s->c_dc_scale_table= ff_aic_dc_scale_table; @@ -672,7 +672,7 @@ static int rv10_decode_packet(AVCodecContext *avctx, int ret; ff_update_block_index(s); #ifdef DEBUG - printf("**mb x=%d y=%d\n", s->mb_x, s->mb_y); + av_log(avctx, AV_LOG_DEBUG, "**mb x=%d y=%d\n", s->mb_x, s->mb_y); #endif s->mv_dir = MV_DIR_FORWARD; @@ -713,7 +713,7 @@ static int rv10_decode_frame(AVCodecContext *avctx, AVFrame *pict = data; #ifdef DEBUG - printf("*****frame %d size=%d\n", avctx->frame_number, buf_size); + av_log(avctx, AV_LOG_DEBUG, "*****frame %d size=%d\n", avctx->frame_number, buf_size); #endif /* no supplementary picture */ @@ -737,19 +737,20 @@ static int rv10_decode_frame(AVCodecContext *avctx, rv10_decode_packet(avctx, buf, buf_size); } - if(s->mb_y>=s->mb_height){ + if(s->current_picture_ptr != NULL && s->mb_y>=s->mb_height){ ff_er_frame_end(s); MPV_frame_end(s); - if(s->pict_type==B_TYPE || s->low_delay){ - *pict= *(AVFrame*)&s->current_picture; - ff_print_debug_info(s, pict); - } else { - *pict= *(AVFrame*)&s->last_picture; - ff_print_debug_info(s, pict); + if (s->pict_type == B_TYPE || s->low_delay) { + *pict= *(AVFrame*)s->current_picture_ptr; + } else if (s->last_picture_ptr != NULL) { + *pict= *(AVFrame*)s->last_picture_ptr; } - if(s->last_picture_ptr || s->low_delay) + + if(s->last_picture_ptr || s->low_delay){ *data_size = sizeof(AVFrame); + ff_print_debug_info(s, pict); + } s->current_picture_ptr= NULL; //so we can detect if frame_end wasnt called (find some nicer solution...) } diff --git a/src/libffmpeg/libavcodec/shorten.c b/src/libffmpeg/libavcodec/shorten.c index 4d80d40a5..af1c3fe6e 100644 --- a/src/libffmpeg/libavcodec/shorten.c +++ b/src/libffmpeg/libavcodec/shorten.c @@ -106,18 +106,27 @@ static int shorten_decode_init(AVCodecContext * avctx) return 0; } -static void allocate_buffers(ShortenContext *s) +static int allocate_buffers(ShortenContext *s) { int i, chan; for (chan=0; chan<s->channels; chan++) { + if(FFMAX(1, s->nmean) >= UINT_MAX/sizeof(int32_t)){ + av_log(s->avctx, AV_LOG_ERROR, "nmean too large\n"); + return -1; + } + if(s->blocksize + s->nwrap >= UINT_MAX/sizeof(int32_t) || s->blocksize + s->nwrap <= (unsigned)s->nwrap){ + av_log(s->avctx, AV_LOG_ERROR, "s->blocksize + s->nwrap too large\n"); + return -1; + } + s->offset[chan] = av_realloc(s->offset[chan], sizeof(int32_t)*FFMAX(1, s->nmean)); s->decoded[chan] = av_realloc(s->decoded[chan], sizeof(int32_t)*(s->blocksize + s->nwrap)); for (i=0; i<s->nwrap; i++) s->decoded[chan][i] = 0; s->decoded[chan] += s->nwrap; - } + return 0; } diff --git a/src/libffmpeg/libavcodec/snow.c b/src/libffmpeg/libavcodec/snow.c index ad69c3241..05ad44726 100644 --- a/src/libffmpeg/libavcodec/snow.c +++ b/src/libffmpeg/libavcodec/snow.c @@ -19,23 +19,15 @@ #include "avcodec.h" #include "common.h" #include "dsputil.h" +#include "snow.h" #include "rangecoder.h" -#define MID_STATE 128 #include "mpegvideo.h" #undef NDEBUG #include <assert.h> -#define MAX_DECOMPOSITIONS 8 -#define MAX_PLANES 4 -#define DWTELEM int -#define QSHIFT 5 -#define QROOT (1<<QSHIFT) -#define LOSSLESS_QLOG -128 -#define FRAC_BITS 8 - static const int8_t quant3[256]={ 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, @@ -181,8 +173,6 @@ static const int8_t quant13[256]={ -4,-4,-4,-4,-4,-4,-4,-4,-4,-3,-3,-3,-3,-2,-2,-1, }; -#define LOG2_OBMC_MAX 6 -#define OBMC_MAX (1<<(LOG2_OBMC_MAX)) #if 0 //64*cubic static const uint8_t obmc32[1024]={ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, @@ -240,57 +230,57 @@ static const uint8_t obmc16[256]={ }; #elif 1 // 64*linear static const uint8_t obmc32[1024]={ - 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, - 0, 1, 1, 1, 2, 2, 2, 3, 3, 4, 4, 4, 5, 5, 5, 6, 6, 5, 5, 5, 4, 4, 4, 3, 3, 2, 2, 2, 1, 1, 1, 0, - 0, 1, 2, 2, 3, 3, 4, 5, 5, 6, 7, 7, 8, 8, 9,10,10, 9, 8, 8, 7, 7, 6, 5, 5, 4, 3, 3, 2, 2, 1, 0, - 0, 1, 2, 3, 4, 5, 6, 7, 7, 8, 9,10,11,12,13,14,14,13,12,11,10, 9, 8, 7, 7, 6, 5, 4, 3, 2, 1, 0, - 1, 2, 3, 4, 5, 6, 7, 8,10,11,12,13,14,15,16,17,17,16,15,14,13,12,11,10, 8, 7, 6, 5, 4, 3, 2, 1, - 1, 2, 3, 5, 6, 8, 9,10,12,13,14,16,17,19,20,21,21,20,19,17,16,14,13,12,10, 9, 8, 6, 5, 3, 2, 1, - 1, 2, 4, 6, 7, 9,11,12,14,15,17,19,20,22,24,25,25,24,22,20,19,17,15,14,12,11, 9, 7, 6, 4, 2, 1, - 1, 3, 5, 7, 8,10,12,14,16,18,20,22,23,25,27,29,29,27,25,23,22,20,18,16,14,12,10, 8, 7, 5, 3, 1, - 1, 3, 5, 7,10,12,14,16,18,20,22,24,27,29,31,33,33,31,29,27,24,22,20,18,16,14,12,10, 7, 5, 3, 1, - 1, 4, 6, 8,11,13,15,18,20,23,25,27,30,32,34,37,37,34,32,30,27,25,23,20,18,15,13,11, 8, 6, 4, 1, - 1, 4, 7, 9,12,14,17,20,22,25,28,30,33,35,38,41,41,38,35,33,30,28,25,22,20,17,14,12, 9, 7, 4, 1, - 1, 4, 7,10,13,16,19,22,24,27,30,33,36,39,42,45,45,42,39,36,33,30,27,24,22,19,16,13,10, 7, 4, 1, - 2, 5, 8,11,14,17,20,23,27,30,33,36,39,42,45,48,48,45,42,39,36,33,30,27,23,20,17,14,11, 8, 5, 2, - 2, 5, 8,12,15,19,22,25,29,32,35,39,42,46,49,52,52,49,46,42,39,35,32,29,25,22,19,15,12, 8, 5, 2, - 2, 5, 9,13,16,20,24,27,31,34,38,42,45,49,53,56,56,53,49,45,42,38,34,31,27,24,20,16,13, 9, 5, 2, - 2, 6,10,14,17,21,25,29,33,37,41,45,48,52,56,60,60,56,52,48,45,41,37,33,29,25,21,17,14,10, 6, 2, - 2, 6,10,14,17,21,25,29,33,37,41,45,48,52,56,60,60,56,52,48,45,41,37,33,29,25,21,17,14,10, 6, 2, - 2, 5, 9,13,16,20,24,27,31,34,38,42,45,49,53,56,56,53,49,45,42,38,34,31,27,24,20,16,13, 9, 5, 2, - 2, 5, 8,12,15,19,22,25,29,32,35,39,42,46,49,52,52,49,46,42,39,35,32,29,25,22,19,15,12, 8, 5, 2, - 2, 5, 8,11,14,17,20,23,27,30,33,36,39,42,45,48,48,45,42,39,36,33,30,27,23,20,17,14,11, 8, 5, 2, - 1, 4, 7,10,13,16,19,22,24,27,30,33,36,39,42,45,45,42,39,36,33,30,27,24,22,19,16,13,10, 7, 4, 1, - 1, 4, 7, 9,12,14,17,20,22,25,28,30,33,35,38,41,41,38,35,33,30,28,25,22,20,17,14,12, 9, 7, 4, 1, - 1, 4, 6, 8,11,13,15,18,20,23,25,27,30,32,34,37,37,34,32,30,27,25,23,20,18,15,13,11, 8, 6, 4, 1, - 1, 3, 5, 7,10,12,14,16,18,20,22,24,27,29,31,33,33,31,29,27,24,22,20,18,16,14,12,10, 7, 5, 3, 1, - 1, 3, 5, 7, 8,10,12,14,16,18,20,22,23,25,27,29,29,27,25,23,22,20,18,16,14,12,10, 8, 7, 5, 3, 1, - 1, 2, 4, 6, 7, 9,11,12,14,15,17,19,20,22,24,25,25,24,22,20,19,17,15,14,12,11, 9, 7, 6, 4, 2, 1, - 1, 2, 3, 5, 6, 8, 9,10,12,13,14,16,17,19,20,21,21,20,19,17,16,14,13,12,10, 9, 8, 6, 5, 3, 2, 1, - 1, 2, 3, 4, 5, 6, 7, 8,10,11,12,13,14,15,16,17,17,16,15,14,13,12,11,10, 8, 7, 6, 5, 4, 3, 2, 1, - 0, 1, 2, 3, 4, 5, 6, 7, 7, 8, 9,10,11,12,13,14,14,13,12,11,10, 9, 8, 7, 7, 6, 5, 4, 3, 2, 1, 0, - 0, 1, 2, 2, 3, 3, 4, 5, 5, 6, 7, 7, 8, 8, 9,10,10, 9, 8, 8, 7, 7, 6, 5, 5, 4, 3, 3, 2, 2, 1, 0, - 0, 1, 1, 1, 2, 2, 2, 3, 3, 4, 4, 4, 5, 5, 5, 6, 6, 5, 5, 5, 4, 4, 4, 3, 3, 2, 2, 2, 1, 1, 1, 0, - 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, + 0, 0, 0, 0, 4, 4, 4, 4, 4, 4, 4, 4, 8, 8, 8, 8, 8, 8, 8, 8, 4, 4, 4, 4, 4, 4, 4, 4, 0, 0, 0, 0, + 0, 4, 4, 4, 8, 8, 8, 12, 12, 16, 16, 16, 20, 20, 20, 24, 24, 20, 20, 20, 16, 16, 16, 12, 12, 8, 8, 8, 4, 4, 4, 0, + 0, 4, 8, 8, 12, 12, 16, 20, 20, 24, 28, 28, 32, 32, 36, 40, 40, 36, 32, 32, 28, 28, 24, 20, 20, 16, 12, 12, 8, 8, 4, 0, + 0, 4, 8, 12, 16, 20, 24, 28, 28, 32, 36, 40, 44, 48, 52, 56, 56, 52, 48, 44, 40, 36, 32, 28, 28, 24, 20, 16, 12, 8, 4, 0, + 4, 8, 12, 16, 20, 24, 28, 32, 40, 44, 48, 52, 56, 60, 64, 68, 68, 64, 60, 56, 52, 48, 44, 40, 32, 28, 24, 20, 16, 12, 8, 4, + 4, 8, 12, 20, 24, 32, 36, 40, 48, 52, 56, 64, 68, 76, 80, 84, 84, 80, 76, 68, 64, 56, 52, 48, 40, 36, 32, 24, 20, 12, 8, 4, + 4, 8, 16, 24, 28, 36, 44, 48, 56, 60, 68, 76, 80, 88, 96,100,100, 96, 88, 80, 76, 68, 60, 56, 48, 44, 36, 28, 24, 16, 8, 4, + 4, 12, 20, 28, 32, 40, 48, 56, 64, 72, 80, 88, 92,100,108,116,116,108,100, 92, 88, 80, 72, 64, 56, 48, 40, 32, 28, 20, 12, 4, + 4, 12, 20, 28, 40, 48, 56, 64, 72, 80, 88, 96,108,116,124,132,132,124,116,108, 96, 88, 80, 72, 64, 56, 48, 40, 28, 20, 12, 4, + 4, 16, 24, 32, 44, 52, 60, 72, 80, 92,100,108,120,128,136,148,148,136,128,120,108,100, 92, 80, 72, 60, 52, 44, 32, 24, 16, 4, + 4, 16, 28, 36, 48, 56, 68, 80, 88,100,112,120,132,140,152,164,164,152,140,132,120,112,100, 88, 80, 68, 56, 48, 36, 28, 16, 4, + 4, 16, 28, 40, 52, 64, 76, 88, 96,108,120,132,144,156,168,180,180,168,156,144,132,120,108, 96, 88, 76, 64, 52, 40, 28, 16, 4, + 8, 20, 32, 44, 56, 68, 80, 92,108,120,132,144,156,168,180,192,192,180,168,156,144,132,120,108, 92, 80, 68, 56, 44, 32, 20, 8, + 8, 20, 32, 48, 60, 76, 88,100,116,128,140,156,168,184,196,208,208,196,184,168,156,140,128,116,100, 88, 76, 60, 48, 32, 20, 8, + 8, 20, 36, 52, 64, 80, 96,108,124,136,152,168,180,196,212,224,224,212,196,180,168,152,136,124,108, 96, 80, 64, 52, 36, 20, 8, + 8, 24, 40, 56, 68, 84,100,116,132,148,164,180,192,208,224,240,240,224,208,192,180,164,148,132,116,100, 84, 68, 56, 40, 24, 8, + 8, 24, 40, 56, 68, 84,100,116,132,148,164,180,192,208,224,240,240,224,208,192,180,164,148,132,116,100, 84, 68, 56, 40, 24, 8, + 8, 20, 36, 52, 64, 80, 96,108,124,136,152,168,180,196,212,224,224,212,196,180,168,152,136,124,108, 96, 80, 64, 52, 36, 20, 8, + 8, 20, 32, 48, 60, 76, 88,100,116,128,140,156,168,184,196,208,208,196,184,168,156,140,128,116,100, 88, 76, 60, 48, 32, 20, 8, + 8, 20, 32, 44, 56, 68, 80, 92,108,120,132,144,156,168,180,192,192,180,168,156,144,132,120,108, 92, 80, 68, 56, 44, 32, 20, 8, + 4, 16, 28, 40, 52, 64, 76, 88, 96,108,120,132,144,156,168,180,180,168,156,144,132,120,108, 96, 88, 76, 64, 52, 40, 28, 16, 4, + 4, 16, 28, 36, 48, 56, 68, 80, 88,100,112,120,132,140,152,164,164,152,140,132,120,112,100, 88, 80, 68, 56, 48, 36, 28, 16, 4, + 4, 16, 24, 32, 44, 52, 60, 72, 80, 92,100,108,120,128,136,148,148,136,128,120,108,100, 92, 80, 72, 60, 52, 44, 32, 24, 16, 4, + 4, 12, 20, 28, 40, 48, 56, 64, 72, 80, 88, 96,108,116,124,132,132,124,116,108, 96, 88, 80, 72, 64, 56, 48, 40, 28, 20, 12, 4, + 4, 12, 20, 28, 32, 40, 48, 56, 64, 72, 80, 88, 92,100,108,116,116,108,100, 92, 88, 80, 72, 64, 56, 48, 40, 32, 28, 20, 12, 4, + 4, 8, 16, 24, 28, 36, 44, 48, 56, 60, 68, 76, 80, 88, 96,100,100, 96, 88, 80, 76, 68, 60, 56, 48, 44, 36, 28, 24, 16, 8, 4, + 4, 8, 12, 20, 24, 32, 36, 40, 48, 52, 56, 64, 68, 76, 80, 84, 84, 80, 76, 68, 64, 56, 52, 48, 40, 36, 32, 24, 20, 12, 8, 4, + 4, 8, 12, 16, 20, 24, 28, 32, 40, 44, 48, 52, 56, 60, 64, 68, 68, 64, 60, 56, 52, 48, 44, 40, 32, 28, 24, 20, 16, 12, 8, 4, + 0, 4, 8, 12, 16, 20, 24, 28, 28, 32, 36, 40, 44, 48, 52, 56, 56, 52, 48, 44, 40, 36, 32, 28, 28, 24, 20, 16, 12, 8, 4, 0, + 0, 4, 8, 8, 12, 12, 16, 20, 20, 24, 28, 28, 32, 32, 36, 40, 40, 36, 32, 32, 28, 28, 24, 20, 20, 16, 12, 12, 8, 8, 4, 0, + 0, 4, 4, 4, 8, 8, 8, 12, 12, 16, 16, 16, 20, 20, 20, 24, 24, 20, 20, 20, 16, 16, 16, 12, 12, 8, 8, 8, 4, 4, 4, 0, + 0, 0, 0, 0, 4, 4, 4, 4, 4, 4, 4, 4, 8, 8, 8, 8, 8, 8, 8, 8, 4, 4, 4, 4, 4, 4, 4, 4, 0, 0, 0, 0, //error:0.000020 }; static const uint8_t obmc16[256]={ - 0, 1, 1, 2, 2, 3, 3, 4, 4, 3, 3, 2, 2, 1, 1, 0, - 1, 2, 4, 5, 7, 8,10,11,11,10, 8, 7, 5, 4, 2, 1, - 1, 4, 6, 9,11,14,16,19,19,16,14,11, 9, 6, 4, 1, - 2, 5, 9,12,16,19,23,26,26,23,19,16,12, 9, 5, 2, - 2, 7,11,16,20,25,29,34,34,29,25,20,16,11, 7, 2, - 3, 8,14,19,25,30,36,41,41,36,30,25,19,14, 8, 3, - 3,10,16,23,29,36,42,49,49,42,36,29,23,16,10, 3, - 4,11,19,26,34,41,49,56,56,49,41,34,26,19,11, 4, - 4,11,19,26,34,41,49,56,56,49,41,34,26,19,11, 4, - 3,10,16,23,29,36,42,49,49,42,36,29,23,16,10, 3, - 3, 8,14,19,25,30,36,41,41,36,30,25,19,14, 8, 3, - 2, 7,11,16,20,25,29,34,34,29,25,20,16,11, 7, 2, - 2, 5, 9,12,16,19,23,26,26,23,19,16,12, 9, 5, 2, - 1, 4, 6, 9,11,14,16,19,19,16,14,11, 9, 6, 4, 1, - 1, 2, 4, 5, 7, 8,10,11,11,10, 8, 7, 5, 4, 2, 1, - 0, 1, 1, 2, 2, 3, 3, 4, 4, 3, 3, 2, 2, 1, 1, 0, + 0, 4, 4, 8, 8, 12, 12, 16, 16, 12, 12, 8, 8, 4, 4, 0, + 4, 8, 16, 20, 28, 32, 40, 44, 44, 40, 32, 28, 20, 16, 8, 4, + 4, 16, 24, 36, 44, 56, 64, 76, 76, 64, 56, 44, 36, 24, 16, 4, + 8, 20, 36, 48, 64, 76, 92,104,104, 92, 76, 64, 48, 36, 20, 8, + 8, 28, 44, 64, 80,100,116,136,136,116,100, 80, 64, 44, 28, 8, + 12, 32, 56, 76,100,120,144,164,164,144,120,100, 76, 56, 32, 12, + 12, 40, 64, 92,116,144,168,196,196,168,144,116, 92, 64, 40, 12, + 16, 44, 76,104,136,164,196,224,224,196,164,136,104, 76, 44, 16, + 16, 44, 76,104,136,164,196,224,224,196,164,136,104, 76, 44, 16, + 12, 40, 64, 92,116,144,168,196,196,168,144,116, 92, 64, 40, 12, + 12, 32, 56, 76,100,120,144,164,164,144,120,100, 76, 56, 32, 12, + 8, 28, 44, 64, 80,100,116,136,136,116,100, 80, 64, 44, 28, 8, + 8, 20, 36, 48, 64, 76, 92,104,104, 92, 76, 64, 48, 36, 20, 8, + 4, 16, 24, 36, 44, 56, 64, 76, 76, 64, 56, 44, 36, 24, 16, 4, + 4, 8, 16, 20, 28, 32, 40, 44, 44, 40, 32, 28, 20, 16, 8, 4, + 0, 4, 4, 8, 8, 12, 12, 16, 16, 12, 12, 8, 8, 4, 4, 0, //error:0.000015 }; #else //64*cos @@ -352,23 +342,23 @@ static const uint8_t obmc16[256]={ //linear *64 static const uint8_t obmc8[64]={ - 1, 3, 5, 7, 7, 5, 3, 1, - 3, 9,15,21,21,15, 9, 3, - 5,15,25,35,35,25,15, 5, - 7,21,35,49,49,35,21, 7, - 7,21,35,49,49,35,21, 7, - 5,15,25,35,35,25,15, 5, - 3, 9,15,21,21,15, 9, 3, - 1, 3, 5, 7, 7, 5, 3, 1, + 4, 12, 20, 28, 28, 20, 12, 4, + 12, 36, 60, 84, 84, 60, 36, 12, + 20, 60,100,140,140,100, 60, 20, + 28, 84,140,196,196,140, 84, 28, + 28, 84,140,196,196,140, 84, 28, + 20, 60,100,140,140,100, 60, 20, + 12, 36, 60, 84, 84, 60, 36, 12, + 4, 12, 20, 28, 28, 20, 12, 4, //error:0.000000 }; //linear *64 static const uint8_t obmc4[16]={ - 4,12,12, 4, -12,36,36,12, -12,36,36,12, - 4,12,12, 4, + 16, 48, 48, 16, + 48,144,144, 48, + 48,144,144, 48, + 16, 48, 48, 16, //error:0.000000 }; @@ -376,9 +366,12 @@ static const uint8_t *obmc_tab[4]={ obmc32, obmc16, obmc8, obmc4 }; +static int scale_mv_ref[MAX_REF_FRAMES][MAX_REF_FRAMES]; + typedef struct BlockNode{ int16_t mx; int16_t my; + uint8_t ref; uint8_t color[3]; uint8_t type; //#define TYPE_SPLIT 1 @@ -392,6 +385,7 @@ static const BlockNode null_block= { //FIXME add border maybe .color= {128,128,128}, .mx= 0, .my= 0, + .ref= 0, .type= 0, .level= 0, }; @@ -425,17 +419,6 @@ typedef struct Plane{ SubBand band[MAX_DECOMPOSITIONS][4]; }Plane; -/** Used to minimize the amount of memory used in order to optimize cache performance. **/ -typedef struct { - DWTELEM * * line; ///< For use by idwt and predict_slices. - DWTELEM * * data_stack; ///< Used for internal purposes. - int data_stack_top; - int line_count; - int line_width; - int data_count; - DWTELEM * base_buffer; ///< Buffer that this structure is caching. -} slice_buffer; - typedef struct SnowContext{ // MpegEncContext m; // needed for motion estimation, should not be used for anything else, the idea is to make the motion estimation eventually independant of MpegEncContext, so this will be removed then (FIXME/XXX) @@ -445,7 +428,7 @@ typedef struct SnowContext{ AVFrame new_picture; AVFrame input_picture; ///< new_picture with the internal linesizes AVFrame current_picture; - AVFrame last_picture; + AVFrame last_picture[MAX_REF_FRAMES]; AVFrame mconly_picture; // uint8_t q_context[16]; uint8_t header_state[32]; @@ -457,6 +440,10 @@ typedef struct SnowContext{ int temporal_decomposition_type; int spatial_decomposition_count; int temporal_decomposition_count; + int max_ref_frames; + int ref_frames; + int16_t (*ref_mvs[MAX_REF_FRAMES])[2]; + uint32_t *ref_scores[MAX_REF_FRAMES]; DWTELEM *spatial_dwt_buffer; int colorspace_type; int chroma_h_shift; @@ -465,6 +452,7 @@ typedef struct SnowContext{ int qlog; int lambda; int lambda2; + int pass1_rc; int mv_scale; int qbias; #define QBIAS_SHIFT 3 @@ -573,12 +561,12 @@ static void slice_buffer_destroy(slice_buffer * buf) for (i = buf->data_count - 1; i >= 0; i--) { assert(buf->data_stack[i]); - av_free(buf->data_stack[i]); + av_freep(&buf->data_stack[i]); } assert(buf->data_stack); - av_free(buf->data_stack); + av_freep(&buf->data_stack); assert(buf->line); - av_free(buf->line); + av_freep(&buf->line); } #ifdef __sgi @@ -741,6 +729,7 @@ static always_inline void lift(DWTELEM *dst, DWTELEM *src, DWTELEM *ref, int dst } } +#ifndef lift5 static always_inline void lift5(DWTELEM *dst, DWTELEM *src, DWTELEM *ref, int dst_step, int src_step, int ref_step, int width, int mul, int add, int shift, int highpass, int inverse){ const int mirror_left= !highpass; const int mirror_right= (width&1) ^ highpass; @@ -770,7 +759,9 @@ static always_inline void lift5(DWTELEM *dst, DWTELEM *src, DWTELEM *ref, int ds dst[w*dst_step] = LIFT(src[w*src_step], ((r+add)>>shift), inverse); } } +#endif +#ifndef liftS static always_inline void liftS(DWTELEM *dst, DWTELEM *src, DWTELEM *ref, int dst_step, int src_step, int ref_step, int width, int mul, int add, int shift, int highpass, int inverse){ const int mirror_left= !highpass; const int mirror_right= (width&1) ^ highpass; @@ -793,6 +784,7 @@ static always_inline void liftS(DWTELEM *dst, DWTELEM *src, DWTELEM *ref, int ds dst[w*dst_step] = LIFTS(src[w*src_step], mul*2*ref[w*ref_step]+add, inverse); } } +#endif static void inplace_lift(DWTELEM *dst, int width, int *coeffs, int n, int shift, int start, int inverse){ @@ -1111,76 +1103,6 @@ STOP_TIMER("vertical_decompose53i*")} } } -#define liftS lift -#define lift5 lift -#if 1 -#define W_AM 3 -#define W_AO 0 -#define W_AS 1 - -#undef liftS -#define W_BM 1 -#define W_BO 8 -#define W_BS 4 - -#define W_CM 1 -#define W_CO 0 -#define W_CS 0 - -#define W_DM 3 -#define W_DO 4 -#define W_DS 3 -#elif 0 -#define W_AM 55 -#define W_AO 16 -#define W_AS 5 - -#define W_BM 3 -#define W_BO 32 -#define W_BS 6 - -#define W_CM 127 -#define W_CO 64 -#define W_CS 7 - -#define W_DM 7 -#define W_DO 8 -#define W_DS 4 -#elif 0 -#define W_AM 97 -#define W_AO 32 -#define W_AS 6 - -#define W_BM 63 -#define W_BO 512 -#define W_BS 10 - -#define W_CM 13 -#define W_CO 8 -#define W_CS 4 - -#define W_DM 15 -#define W_DO 16 -#define W_DS 5 - -#else - -#define W_AM 203 -#define W_AO 64 -#define W_AS 7 - -#define W_BM 217 -#define W_BO 2048 -#define W_BS 12 - -#define W_CM 113 -#define W_CO 64 -#define W_CS 7 - -#define W_DM 227 -#define W_DO 128 -#define W_DS 9 -#endif static void horizontal_decompose97i(DWTELEM *b, int width){ DWTELEM temp[width]; const int w2= (width+1)>>1; @@ -1275,9 +1197,9 @@ void ff_spatial_dwt(DWTELEM *buffer, int width, int height, int stride, int type for(level=0; level<decomposition_count; level++){ switch(type){ - case 0: spatial_decompose97i(buffer, width>>level, height>>level, stride<<level); break; - case 1: spatial_decompose53i(buffer, width>>level, height>>level, stride<<level); break; - case 2: spatial_decomposeX (buffer, width>>level, height>>level, stride<<level); break; + case DWT_97: spatial_decompose97i(buffer, width>>level, height>>level, stride<<level); break; + case DWT_53: spatial_decompose53i(buffer, width>>level, height>>level, stride<<level); break; + case DWT_X: spatial_decomposeX (buffer, width>>level, height>>level, stride<<level); break; } } } @@ -1410,7 +1332,7 @@ static void spatial_compose53i(DWTELEM *buffer, int width, int height, int strid } -static void horizontal_compose97i(DWTELEM *b, int width){ +void ff_snow_horizontal_compose97i(DWTELEM *b, int width){ DWTELEM temp[width]; const int w2= (width+1)>>1; @@ -1463,7 +1385,7 @@ static void vertical_compose97iL1(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, int wid } } -static void vertical_compose97i(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, DWTELEM *b3, DWTELEM *b4, DWTELEM *b5, int width){ +void ff_snow_vertical_compose97i(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, DWTELEM *b3, DWTELEM *b4, DWTELEM *b5, int width){ int i; for(i=0; i<width; i++){ @@ -1504,7 +1426,7 @@ static void spatial_compose97i_init(dwt_compose_t *cs, DWTELEM *buffer, int heig cs->y = -3; } -static void spatial_compose97i_dy_buffered(dwt_compose_t *cs, slice_buffer * sb, int width, int height, int stride_line){ +static void spatial_compose97i_dy_buffered(DSPContext *dsp, dwt_compose_t *cs, slice_buffer * sb, int width, int height, int stride_line){ int y = cs->y; DWTELEM *b0= cs->b0; @@ -1516,7 +1438,7 @@ static void spatial_compose97i_dy_buffered(dwt_compose_t *cs, slice_buffer * sb, {START_TIMER if(y>0 && y+4<height){ - vertical_compose97i(b0, b1, b2, b3, b4, b5, width); + dsp->vertical_compose97i(b0, b1, b2, b3, b4, b5, width); }else{ if(y+3<(unsigned)height) vertical_compose97iL1(b3, b4, b5, width); if(y+2<(unsigned)height) vertical_compose97iH1(b2, b3, b4, width); @@ -1527,8 +1449,8 @@ if(width>400){ STOP_TIMER("vertical_compose97i")}} {START_TIMER - if(y-1<(unsigned)height) horizontal_compose97i(b0, width); - if(y+0<(unsigned)height) horizontal_compose97i(b1, width); + if(y-1<(unsigned)height) dsp->horizontal_compose97i(b0, width); + if(y+0<(unsigned)height) dsp->horizontal_compose97i(b1, width); if(width>400 && y+0<(unsigned)height){ STOP_TIMER("horizontal_compose97i")}} @@ -1557,8 +1479,8 @@ if(width>400){ STOP_TIMER("vertical_compose97i")}} {START_TIMER - if(y-1<(unsigned)height) horizontal_compose97i(b0, width); - if(y+0<(unsigned)height) horizontal_compose97i(b1, width); + if(y-1<(unsigned)height) ff_snow_horizontal_compose97i(b0, width); + if(y+0<(unsigned)height) ff_snow_horizontal_compose97i(b1, width); if(width>400 && b0 <= b2){ STOP_TIMER("horizontal_compose97i")}} @@ -1580,10 +1502,10 @@ static void ff_spatial_idwt_buffered_init(dwt_compose_t *cs, slice_buffer * sb, int level; for(level=decomposition_count-1; level>=0; level--){ switch(type){ - case 0: spatial_compose97i_buffered_init(cs+level, sb, height>>level, stride_line<<level); break; - case 1: spatial_compose53i_buffered_init(cs+level, sb, height>>level, stride_line<<level); break; + case DWT_97: spatial_compose97i_buffered_init(cs+level, sb, height>>level, stride_line<<level); break; + case DWT_53: spatial_compose53i_buffered_init(cs+level, sb, height>>level, stride_line<<level); break; /* not slicified yet */ - case 2: /*spatial_composeX(buffer, width>>level, height>>level, stride<<level); break;*/ + case DWT_X: /*spatial_composeX(buffer, width>>level, height>>level, stride<<level); break;*/ av_log(NULL, AV_LOG_ERROR, "spatial_composeX neither buffered nor slicified yet.\n"); break; } } @@ -1593,10 +1515,10 @@ static void ff_spatial_idwt_init(dwt_compose_t *cs, DWTELEM *buffer, int width, int level; for(level=decomposition_count-1; level>=0; level--){ switch(type){ - case 0: spatial_compose97i_init(cs+level, buffer, height>>level, stride<<level); break; - case 1: spatial_compose53i_init(cs+level, buffer, height>>level, stride<<level); break; + case DWT_97: spatial_compose97i_init(cs+level, buffer, height>>level, stride<<level); break; + case DWT_53: spatial_compose53i_init(cs+level, buffer, height>>level, stride<<level); break; /* not slicified yet */ - case 2: spatial_composeX(buffer, width>>level, height>>level, stride<<level); break; + case DWT_X: spatial_composeX(buffer, width>>level, height>>level, stride<<level); break; } } } @@ -1609,17 +1531,17 @@ static void ff_spatial_idwt_slice(dwt_compose_t *cs, DWTELEM *buffer, int width, for(level=decomposition_count-1; level>=0; level--){ while(cs[level].y <= FFMIN((y>>level)+support, height>>level)){ switch(type){ - case 0: spatial_compose97i_dy(cs+level, buffer, width>>level, height>>level, stride<<level); + case DWT_97: spatial_compose97i_dy(cs+level, buffer, width>>level, height>>level, stride<<level); break; - case 1: spatial_compose53i_dy(cs+level, buffer, width>>level, height>>level, stride<<level); + case DWT_53: spatial_compose53i_dy(cs+level, buffer, width>>level, height>>level, stride<<level); break; - case 2: break; + case DWT_X: break; } } } } -static void ff_spatial_idwt_buffered_slice(dwt_compose_t *cs, slice_buffer * slice_buf, int width, int height, int stride_line, int type, int decomposition_count, int y){ +static void ff_spatial_idwt_buffered_slice(DSPContext *dsp, dwt_compose_t *cs, slice_buffer * slice_buf, int width, int height, int stride_line, int type, int decomposition_count, int y){ const int support = type==1 ? 3 : 5; int level; if(type==2) return; @@ -1627,11 +1549,11 @@ static void ff_spatial_idwt_buffered_slice(dwt_compose_t *cs, slice_buffer * sli for(level=decomposition_count-1; level>=0; level--){ while(cs[level].y <= FFMIN((y>>level)+support, height>>level)){ switch(type){ - case 0: spatial_compose97i_dy_buffered(cs+level, slice_buf, width>>level, height>>level, stride_line<<level); + case DWT_97: spatial_compose97i_dy_buffered(dsp, cs+level, slice_buf, width>>level, height>>level, stride_line<<level); break; - case 1: spatial_compose53i_dy_buffered(cs+level, slice_buf, width>>level, height>>level, stride_line<<level); + case DWT_53: spatial_compose53i_dy_buffered(cs+level, slice_buf, width>>level, height>>level, stride_line<<level); break; - case 2: break; + case DWT_X: break; } } } @@ -1991,7 +1913,7 @@ static int pix_norm1(uint8_t * pix, int line_size, int w) return s; } -static inline void set_blocks(SnowContext *s, int level, int x, int y, int l, int cb, int cr, int mx, int my, int type){ +static inline void set_blocks(SnowContext *s, int level, int x, int y, int l, int cb, int cr, int mx, int my, int ref, int type){ const int w= s->b_width << s->block_max_depth; const int rem_depth= s->block_max_depth - level; const int index= (x + y*w) << rem_depth; @@ -2004,6 +1926,7 @@ static inline void set_blocks(SnowContext *s, int level, int x, int y, int l, in block.color[2]= cr; block.mx= mx; block.my= my; + block.ref= ref; block.type= type; block.level= level; @@ -2028,6 +1951,22 @@ static inline void init_ref(MotionEstContext *c, uint8_t *src[3], uint8_t *ref[3 assert(!ref_index); } +static inline void pred_mv(SnowContext *s, int *mx, int *my, int ref, + BlockNode *left, BlockNode *top, BlockNode *tr){ + if(s->ref_frames == 1){ + *mx = mid_pred(left->mx, top->mx, tr->mx); + *my = mid_pred(left->my, top->my, tr->my); + }else{ + const int *scale = scale_mv_ref[ref]; + *mx = mid_pred(left->mx * scale[left->ref] + 128 >>8, + top ->mx * scale[top ->ref] + 128 >>8, + tr ->mx * scale[tr ->ref] + 128 >>8); + *my = mid_pred(left->my * scale[left->ref] + 128 >>8, + top ->my * scale[top ->ref] + 128 >>8, + tr ->my * scale[tr ->ref] + 128 >>8); + } +} + //FIXME copy&paste #define P_LEFT P[1] #define P_TOP P[2] @@ -2062,8 +2001,7 @@ static int encode_q_branch(SnowContext *s, int level, int x, int y){ int pl = left->color[0]; int pcb= left->color[1]; int pcr= left->color[2]; - int pmx= mid_pred(left->mx, top->mx, tr->mx); - int pmy= mid_pred(left->my, top->my, tr->my); + int pmx, pmy; int mx=0, my=0; int l,cr,cb; const int stride= s->current_picture.linesize[0]; @@ -2076,13 +2014,15 @@ static int encode_q_branch(SnowContext *s, int level, int x, int y){ int qpel= !!(s->avctx->flags & CODEC_FLAG_QPEL); //unused const int shift= 1+qpel; MotionEstContext *c= &s->m.me; + int ref_context= av_log2(2*left->ref) + av_log2(2*top->ref); int mx_context= av_log2(2*ABS(left->mx - top->mx)); int my_context= av_log2(2*ABS(left->my - top->my)); int s_context= 2*left->level + 2*top->level + tl->level + tr->level; + int ref, best_ref, ref_score, ref_mx, ref_my; assert(sizeof(s->block_state) >= 256); if(s->keyframe){ - set_blocks(s, level, x, y, pl, pcb, pcr, pmx, pmy, BLOCK_INTRA); + set_blocks(s, level, x, y, pl, pcb, pcr, 0, 0, 0, BLOCK_INTRA); return 0; } @@ -2107,8 +2047,6 @@ static int encode_q_branch(SnowContext *s, int level, int x, int y){ s->m.mb_y= 0; s->m.me.skip= 0; - init_ref(c, current_data, s->last_picture.data, NULL, block_w*x, block_w*y, 0); - assert(s->m.me. stride == stride); assert(s->m.me.uvstride == uvstride); @@ -2141,16 +2079,34 @@ static int encode_q_branch(SnowContext *s, int level, int x, int y){ c->pred_y = P_MEDIAN[1]; } - score= ff_epzs_motion_search(&s->m, &mx, &my, P, 0, /*ref_index*/ 0, last_mv, - (1<<16)>>shift, level-LOG2_MB_SIZE+4, block_w); + score= INT_MAX; + best_ref= 0; + for(ref=0; ref<s->ref_frames; ref++){ + init_ref(c, current_data, s->last_picture[ref].data, NULL, block_w*x, block_w*y, 0); + + ref_score= ff_epzs_motion_search(&s->m, &ref_mx, &ref_my, P, 0, /*ref_index*/ 0, last_mv, + (1<<16)>>shift, level-LOG2_MB_SIZE+4, block_w); - assert(mx >= c->xmin); - assert(mx <= c->xmax); - assert(my >= c->ymin); - assert(my <= c->ymax); + assert(ref_mx >= c->xmin); + assert(ref_mx <= c->xmax); + assert(ref_my >= c->ymin); + assert(ref_my <= c->ymax); - score= s->m.me.sub_motion_search(&s->m, &mx, &my, score, 0, 0, level-LOG2_MB_SIZE+4, block_w); - score= ff_get_mb_score(&s->m, mx, my, 0, 0, level-LOG2_MB_SIZE+4, block_w, 0); + ref_score= s->m.me.sub_motion_search(&s->m, &ref_mx, &ref_my, ref_score, 0, 0, level-LOG2_MB_SIZE+4, block_w); + ref_score= ff_get_mb_score(&s->m, ref_mx, ref_my, 0, 0, level-LOG2_MB_SIZE+4, block_w, 0); + ref_score+= 2*av_log2(2*ref)*c->penalty_factor; + if(s->ref_mvs[ref]){ + s->ref_mvs[ref][index][0]= ref_mx; + s->ref_mvs[ref][index][1]= ref_my; + s->ref_scores[ref][index]= ref_score; + } + if(score > ref_score){ + score= ref_score; + best_ref= ref; + mx= ref_mx; + my= ref_my; + } + } //FIXME if mb_cmp != SSE then intra cant be compared currently and mb_penalty vs. lambda2 // subpel search @@ -2162,8 +2118,11 @@ static int encode_q_branch(SnowContext *s, int level, int x, int y){ if(level!=s->block_max_depth) put_rac(&pc, &p_state[4 + s_context], 1); put_rac(&pc, &p_state[1 + left->type + top->type], 0); - put_symbol(&pc, &p_state[128 + 32*mx_context], mx - pmx, 1); - put_symbol(&pc, &p_state[128 + 32*my_context], my - pmy, 1); + if(s->ref_frames > 1) + put_symbol(&pc, &p_state[128 + 1024 + 32*ref_context], best_ref, 0); + pred_mv(s, &pmx, &pmy, best_ref, left, top, tr); + put_symbol(&pc, &p_state[128 + 32*(mx_context + 16*!!best_ref)], mx - pmx, 1); + put_symbol(&pc, &p_state[128 + 32*(my_context + 16*!!best_ref)], my - pmy, 1); p_len= pc.bytestream - pc.bytestream_start; score += (s->lambda2*(p_len*8 + (pc.outstanding_count - s->c.outstanding_count)*8 @@ -2227,11 +2186,12 @@ static int encode_q_branch(SnowContext *s, int level, int x, int y){ } if(iscore < score){ + pred_mv(s, &pmx, &pmy, 0, left, top, tr); memcpy(pbbak, i_buffer, i_len); s->c= ic; s->c.bytestream_start= pbbak_start; s->c.bytestream= pbbak + i_len; - set_blocks(s, level, x, y, l, cb, cr, pmx, pmy, BLOCK_INTRA); + set_blocks(s, level, x, y, l, cb, cr, pmx, pmy, 0, BLOCK_INTRA); memcpy(s->block_state, i_state, sizeof(s->block_state)); return iscore; }else{ @@ -2239,7 +2199,7 @@ static int encode_q_branch(SnowContext *s, int level, int x, int y){ s->c= pc; s->c.bytestream_start= pbbak_start; s->c.bytestream= pbbak + p_len; - set_blocks(s, level, x, y, pl, pcb, pcr, mx, my, 0); + set_blocks(s, level, x, y, pl, pcb, pcr, mx, my, best_ref, 0); memcpy(s->block_state, p_state, sizeof(s->block_state)); return score; } @@ -2250,7 +2210,7 @@ static always_inline int same_block(BlockNode *a, BlockNode *b){ if((a->type&BLOCK_INTRA) && (b->type&BLOCK_INTRA)){ return !((a->color[0] - b->color[0]) | (a->color[1] - b->color[1]) | (a->color[2] - b->color[2])); }else{ - return !((a->mx - b->mx) | (a->my - b->my) | ((a->type ^ b->type)&BLOCK_INTRA)); + return !((a->mx - b->mx) | (a->my - b->my) | (a->ref - b->ref) | ((a->type ^ b->type)&BLOCK_INTRA)); } } @@ -2267,14 +2227,14 @@ static void encode_q_branch2(SnowContext *s, int level, int x, int y){ int pl = left->color[0]; int pcb= left->color[1]; int pcr= left->color[2]; - int pmx= mid_pred(left->mx, top->mx, tr->mx); - int pmy= mid_pred(left->my, top->my, tr->my); - int mx_context= av_log2(2*ABS(left->mx - top->mx)); - int my_context= av_log2(2*ABS(left->my - top->my)); + int pmx, pmy; + int ref_context= av_log2(2*left->ref) + av_log2(2*top->ref); + int mx_context= av_log2(2*ABS(left->mx - top->mx)) + 16*!!b->ref; + int my_context= av_log2(2*ABS(left->my - top->my)) + 16*!!b->ref; int s_context= 2*left->level + 2*top->level + tl->level + tr->level; if(s->keyframe){ - set_blocks(s, level, x, y, pl, pcb, pcr, pmx, pmy, BLOCK_INTRA); + set_blocks(s, level, x, y, pl, pcb, pcr, 0, 0, 0, BLOCK_INTRA); return; } @@ -2291,16 +2251,20 @@ static void encode_q_branch2(SnowContext *s, int level, int x, int y){ } } if(b->type & BLOCK_INTRA){ + pred_mv(s, &pmx, &pmy, 0, left, top, tr); put_rac(&s->c, &s->block_state[1 + (left->type&1) + (top->type&1)], 1); put_symbol(&s->c, &s->block_state[32], b->color[0]-pl , 1); put_symbol(&s->c, &s->block_state[64], b->color[1]-pcb, 1); put_symbol(&s->c, &s->block_state[96], b->color[2]-pcr, 1); - set_blocks(s, level, x, y, b->color[0], b->color[1], b->color[2], pmx, pmy, BLOCK_INTRA); + set_blocks(s, level, x, y, b->color[0], b->color[1], b->color[2], pmx, pmy, 0, BLOCK_INTRA); }else{ + pred_mv(s, &pmx, &pmy, b->ref, left, top, tr); put_rac(&s->c, &s->block_state[1 + (left->type&1) + (top->type&1)], 0); + if(s->ref_frames > 1) + put_symbol(&s->c, &s->block_state[128 + 1024 + 32*ref_context], b->ref, 0); put_symbol(&s->c, &s->block_state[128 + 32*mx_context], b->mx - pmx, 1); put_symbol(&s->c, &s->block_state[128 + 32*my_context], b->my - pmy, 1); - set_blocks(s, level, x, y, pl, pcb, pcr, b->mx, b->my, 0); + set_blocks(s, level, x, y, pl, pcb, pcr, b->mx, b->my, b->ref, 0); } } @@ -2316,7 +2280,7 @@ static void decode_q_branch(SnowContext *s, int level, int x, int y){ int s_context= 2*left->level + 2*top->level + tl->level + tr->level; if(s->keyframe){ - set_blocks(s, level, x, y, null_block.color[0], null_block.color[1], null_block.color[2], null_block.mx, null_block.my, BLOCK_INTRA); + set_blocks(s, level, x, y, null_block.color[0], null_block.color[1], null_block.color[2], null_block.mx, null_block.my, null_block.ref, BLOCK_INTRA); return; } @@ -2327,20 +2291,26 @@ static void decode_q_branch(SnowContext *s, int level, int x, int y){ int cr= left->color[2]; int mx= mid_pred(left->mx, top->mx, tr->mx); int my= mid_pred(left->my, top->my, tr->my); + int ref = 0; + int ref_context= av_log2(2*left->ref) + av_log2(2*top->ref); int mx_context= av_log2(2*ABS(left->mx - top->mx)) + 0*av_log2(2*ABS(tr->mx - top->mx)); int my_context= av_log2(2*ABS(left->my - top->my)) + 0*av_log2(2*ABS(tr->my - top->my)); type= get_rac(&s->c, &s->block_state[1 + left->type + top->type]) ? BLOCK_INTRA : 0; if(type){ + pred_mv(s, &mx, &my, 0, left, top, tr); l += get_symbol(&s->c, &s->block_state[32], 1); cb+= get_symbol(&s->c, &s->block_state[64], 1); cr+= get_symbol(&s->c, &s->block_state[96], 1); }else{ - mx+= get_symbol(&s->c, &s->block_state[128 + 32*mx_context], 1); - my+= get_symbol(&s->c, &s->block_state[128 + 32*my_context], 1); + if(s->ref_frames > 1) + ref= get_symbol(&s->c, &s->block_state[128 + 1024 + 32*ref_context], 0); + pred_mv(s, &mx, &my, ref, left, top, tr); + mx+= get_symbol(&s->c, &s->block_state[128 + 32*(mx_context + 16*!!ref)], 1); + my+= get_symbol(&s->c, &s->block_state[128 + 32*(my_context + 16*!!ref)], 1); } - set_blocks(s, level, x, y, l, cb, cr, mx, my, type); + set_blocks(s, level, x, y, l, cb, cr, mx, my, ref, type); }else{ decode_q_branch(s, level+1, 2*x+0, 2*y+0); decode_q_branch(s, level+1, 2*x+1, 2*y+0); @@ -2470,7 +2440,7 @@ mca( 8, 0,8) mca( 0, 8,8) mca( 8, 8,8) -static void pred_block(SnowContext *s, uint8_t *dst, uint8_t *src, uint8_t *tmp, int stride, int sx, int sy, int b_w, int b_h, BlockNode *block, int plane_index, int w, int h){ +static void pred_block(SnowContext *s, uint8_t *dst, uint8_t *tmp, int stride, int sx, int sy, int b_w, int b_h, BlockNode *block, int plane_index, int w, int h){ if(block->type & BLOCK_INTRA){ int x, y; const int color = block->color[plane_index]; @@ -2510,6 +2480,7 @@ static void pred_block(SnowContext *s, uint8_t *dst, uint8_t *src, uint8_t *tmp, } } }else{ + uint8_t *src= s->last_picture[block->ref].data[plane_index]; const int scale= plane_index ? s->mv_scale : 2*s->mv_scale; int mx= block->mx*scale; int my= block->my*scale; @@ -2524,11 +2495,11 @@ static void pred_block(SnowContext *s, uint8_t *dst, uint8_t *src, uint8_t *tmp, ff_emulated_edge_mc(tmp + MB_SIZE, src, stride, b_w+5, b_h+5, sx, sy, w, h); src= tmp + MB_SIZE; } - assert(b_w == b_h || 2*b_w == b_h || b_w == 2*b_h); - assert(!(b_w&(b_w-1))); +// assert(b_w == b_h || 2*b_w == b_h || b_w == 2*b_h); +// assert(!(b_w&(b_w-1))); assert(b_w>1 && b_h>1); assert(tab_index>=0 && tab_index<4 || b_w==32); - if((dx&3) || (dy&3)) + if((dx&3) || (dy&3) || !(b_w == b_h || 2*b_w == b_h || b_w == 2*b_h) || (b_w&(b_w-1))) mc_block(dst, src, tmp, stride, b_w, b_h, dx, dy); else if(b_w==32){ int y; @@ -2549,8 +2520,42 @@ static void pred_block(SnowContext *s, uint8_t *dst, uint8_t *src, uint8_t *tmp, } } +void ff_snow_inner_add_yblock(uint8_t *obmc, const int obmc_stride, uint8_t * * block, int b_w, int b_h, + int src_x, int src_y, int src_stride, slice_buffer * sb, int add, uint8_t * dst8){ + int y, x; + DWTELEM * dst; + for(y=0; y<b_h; y++){ + //FIXME ugly missue of obmc_stride + uint8_t *obmc1= obmc + y*obmc_stride; + uint8_t *obmc2= obmc1+ (obmc_stride>>1); + uint8_t *obmc3= obmc1+ obmc_stride*(obmc_stride>>1); + uint8_t *obmc4= obmc3+ (obmc_stride>>1); + dst = slice_buffer_get_line(sb, src_y + y); + for(x=0; x<b_w; x++){ + int v= obmc1[x] * block[3][x + y*src_stride] + +obmc2[x] * block[2][x + y*src_stride] + +obmc3[x] * block[1][x + y*src_stride] + +obmc4[x] * block[0][x + y*src_stride]; + + v <<= 8 - LOG2_OBMC_MAX; + if(FRAC_BITS != 8){ + v += 1<<(7 - FRAC_BITS); + v >>= 8 - FRAC_BITS; + } + if(add){ + v += dst[x + src_x]; + v = (v + (1<<(FRAC_BITS-1))) >> FRAC_BITS; + if(v&(~255)) v= ~(v>>31); + dst8[x + y*src_stride] = v; + }else{ + dst[x + src_x] -= v; + } + } + } +} + //FIXME name clenup (b_w, block_w, b_width stuff) -static always_inline void add_yblock_buffered(SnowContext *s, slice_buffer * sb, DWTELEM *old_dst, uint8_t *dst8, uint8_t *src, uint8_t *obmc, int src_x, int src_y, int b_w, int b_h, int w, int h, int dst_stride, int src_stride, int obmc_stride, int b_x, int b_y, int add, int plane_index){ +static always_inline void add_yblock_buffered(SnowContext *s, slice_buffer * sb, DWTELEM *old_dst, uint8_t *dst8, uint8_t *obmc, int src_x, int src_y, int b_w, int b_h, int w, int h, int dst_stride, int src_stride, int obmc_stride, int b_x, int b_y, int add, int plane_index){ DWTELEM * dst = NULL; const int b_width = s->b_width << s->block_max_depth; const int b_height= s->b_height << s->block_max_depth; @@ -2605,14 +2610,14 @@ assert(src_stride > 2*MB_SIZE + 5); ptmp= tmp + 3*tmp_step; block[0]= ptmp; ptmp+=tmp_step; - pred_block(s, block[0], src, tmp, src_stride, src_x, src_y, b_w, b_h, lt, plane_index, w, h); + pred_block(s, block[0], tmp, src_stride, src_x, src_y, b_w, b_h, lt, plane_index, w, h); if(same_block(lt, rt)){ block[1]= block[0]; }else{ block[1]= ptmp; ptmp+=tmp_step; - pred_block(s, block[1], src, tmp, src_stride, src_x, src_y, b_w, b_h, rt, plane_index, w, h); + pred_block(s, block[1], tmp, src_stride, src_x, src_y, b_w, b_h, rt, plane_index, w, h); } if(same_block(lt, lb)){ @@ -2622,7 +2627,7 @@ assert(src_stride > 2*MB_SIZE + 5); }else{ block[2]= ptmp; ptmp+=tmp_step; - pred_block(s, block[2], src, tmp, src_stride, src_x, src_y, b_w, b_h, lb, plane_index, w, h); + pred_block(s, block[2], tmp, src_stride, src_x, src_y, b_w, b_h, lb, plane_index, w, h); } if(same_block(lt, rb) ){ @@ -2633,7 +2638,7 @@ assert(src_stride > 2*MB_SIZE + 5); block[3]= block[2]; }else{ block[3]= ptmp; - pred_block(s, block[3], src, tmp, src_stride, src_x, src_y, b_w, b_h, rb, plane_index, w, h); + pred_block(s, block[3], tmp, src_stride, src_x, src_y, b_w, b_h, rb, plane_index, w, h); } #if 0 for(y=0; y<b_h; y++){ @@ -2673,43 +2678,14 @@ assert(src_stride > 2*MB_SIZE + 5); START_TIMER - for(y=0; y<b_h; y++){ - //FIXME ugly missue of obmc_stride - uint8_t *obmc1= obmc + y*obmc_stride; - uint8_t *obmc2= obmc1+ (obmc_stride>>1); - uint8_t *obmc3= obmc1+ obmc_stride*(obmc_stride>>1); - uint8_t *obmc4= obmc3+ (obmc_stride>>1); - dst = slice_buffer_get_line(sb, src_y + y); - for(x=0; x<b_w; x++){ - int v= obmc1[x] * block[3][x + y*src_stride] - +obmc2[x] * block[2][x + y*src_stride] - +obmc3[x] * block[1][x + y*src_stride] - +obmc4[x] * block[0][x + y*src_stride]; - - v <<= 8 - LOG2_OBMC_MAX; - if(FRAC_BITS != 8){ - v += 1<<(7 - FRAC_BITS); - v >>= 8 - FRAC_BITS; - } - if(add){ -// v += old_dst[x + y*dst_stride]; - v += dst[x + src_x]; - v = (v + (1<<(FRAC_BITS-1))) >> FRAC_BITS; - if(v&(~255)) v= ~(v>>31); - dst8[x + y*src_stride] = v; - }else{ -// old_dst[x + y*dst_stride] -= v; - dst[x + src_x] -= v; - } - } - } + s->dsp.inner_add_yblock(obmc, obmc_stride, block, b_w, b_h, src_x,src_y, src_stride, sb, add, dst8); STOP_TIMER("Inner add y block") } #endif } //FIXME name clenup (b_w, block_w, b_width stuff) -static always_inline void add_yblock(SnowContext *s, DWTELEM *dst, uint8_t *dst8, uint8_t *src, uint8_t *obmc, int src_x, int src_y, int b_w, int b_h, int w, int h, int dst_stride, int src_stride, int obmc_stride, int b_x, int b_y, int add, int offset_dst, int plane_index){ +static always_inline void add_yblock(SnowContext *s, DWTELEM *dst, uint8_t *dst8, const uint8_t *obmc, int src_x, int src_y, int b_w, int b_h, int w, int h, int dst_stride, int src_stride, int obmc_stride, int b_x, int b_y, int add, int offset_dst, int plane_index){ const int b_width = s->b_width << s->block_max_depth; const int b_height= s->b_height << s->block_max_depth; const int b_stride= b_width; @@ -2768,14 +2744,14 @@ assert(src_stride > 2*MB_SIZE + 5); ptmp= tmp + 3*tmp_step; block[0]= ptmp; ptmp+=tmp_step; - pred_block(s, block[0], src, tmp, src_stride, src_x, src_y, b_w, b_h, lt, plane_index, w, h); + pred_block(s, block[0], tmp, src_stride, src_x, src_y, b_w, b_h, lt, plane_index, w, h); if(same_block(lt, rt)){ block[1]= block[0]; }else{ block[1]= ptmp; ptmp+=tmp_step; - pred_block(s, block[1], src, tmp, src_stride, src_x, src_y, b_w, b_h, rt, plane_index, w, h); + pred_block(s, block[1], tmp, src_stride, src_x, src_y, b_w, b_h, rt, plane_index, w, h); } if(same_block(lt, lb)){ @@ -2785,7 +2761,7 @@ assert(src_stride > 2*MB_SIZE + 5); }else{ block[2]= ptmp; ptmp+=tmp_step; - pred_block(s, block[2], src, tmp, src_stride, src_x, src_y, b_w, b_h, lb, plane_index, w, h); + pred_block(s, block[2], tmp, src_stride, src_x, src_y, b_w, b_h, lb, plane_index, w, h); } if(same_block(lt, rb) ){ @@ -2796,7 +2772,7 @@ assert(src_stride > 2*MB_SIZE + 5); block[3]= block[2]; }else{ block[3]= ptmp; - pred_block(s, block[3], src, tmp, src_stride, src_x, src_y, b_w, b_h, rb, plane_index, w, h); + pred_block(s, block[3], tmp, src_stride, src_x, src_y, b_w, b_h, rb, plane_index, w, h); } #if 0 for(y=0; y<b_h; y++){ @@ -2872,7 +2848,6 @@ static always_inline void predict_slice_buffered(SnowContext *s, slice_buffer * const uint8_t *obmc = plane_index ? obmc_tab[s->block_max_depth+1] : obmc_tab[s->block_max_depth]; int obmc_stride= plane_index ? block_size : 2*block_size; int ref_stride= s->current_picture.linesize[plane_index]; - uint8_t *ref = s->last_picture.data[plane_index]; uint8_t *dst8= s->current_picture.data[plane_index]; int w= p->width; int h= p->height; @@ -2915,7 +2890,7 @@ static always_inline void predict_slice_buffered(SnowContext *s, slice_buffer * for(mb_x=0; mb_x<=mb_w; mb_x++){ START_TIMER - add_yblock_buffered(s, sb, old_buffer, dst8, ref, obmc, + add_yblock_buffered(s, sb, old_buffer, dst8, obmc, block_w*mb_x - block_w/2, block_w*mb_y - block_w/2, block_w, block_w, @@ -2940,7 +2915,6 @@ static always_inline void predict_slice(SnowContext *s, DWTELEM *buf, int plane_ const uint8_t *obmc = plane_index ? obmc_tab[s->block_max_depth+1] : obmc_tab[s->block_max_depth]; const int obmc_stride= plane_index ? block_size : 2*block_size; int ref_stride= s->current_picture.linesize[plane_index]; - uint8_t *ref = s->last_picture.data[plane_index]; uint8_t *dst8= s->current_picture.data[plane_index]; int w= p->width; int h= p->height; @@ -2973,7 +2947,7 @@ static always_inline void predict_slice(SnowContext *s, DWTELEM *buf, int plane_ for(mb_x=0; mb_x<=mb_w; mb_x++){ START_TIMER - add_yblock(s, buf, dst8, ref, obmc, + add_yblock(s, buf, dst8, obmc, block_w*mb_x - block_w/2, block_w*mb_y - block_w/2, block_w, block_w, @@ -3003,7 +2977,6 @@ static int get_dc(SnowContext *s, int mb_x, int mb_y, int plane_index){ const uint8_t *obmc = plane_index ? obmc_tab[s->block_max_depth+1] : obmc_tab[s->block_max_depth]; const int obmc_stride= plane_index ? block_size : 2*block_size; const int ref_stride= s->current_picture.linesize[plane_index]; - uint8_t *ref= s-> last_picture.data[plane_index]; uint8_t *src= s-> input_picture.data[plane_index]; DWTELEM *dst= (DWTELEM*)s->m.obmc_scratchpad + plane_index*block_size*block_size*4; const int b_stride = s->b_width << s->block_max_depth; @@ -3025,7 +2998,7 @@ static int get_dc(SnowContext *s, int mb_x, int mb_y, int plane_index){ int x= block_w*mb_x2 + block_w/2; int y= block_w*mb_y2 + block_w/2; - add_yblock(s, dst + ((i&1)+(i>>1)*obmc_stride)*block_w, NULL, ref, obmc, + add_yblock(s, dst + ((i&1)+(i>>1)*obmc_stride)*block_w, NULL, obmc, x, y, block_w, block_w, w, h, obmc_stride, ref_stride, obmc_stride, mb_x2, mb_y2, 0, 0, plane_index); for(y2= FFMAX(y, 0); y2<FFMIN(h, y+block_w); y2++){ @@ -3048,7 +3021,7 @@ static int get_dc(SnowContext *s, int mb_x, int mb_y, int plane_index){ } *b= backup; - return clip(((ab<<6) + aa/2)/aa, 0, 255); //FIXME we shouldnt need cliping + return clip(((ab<<LOG2_OBMC_MAX) + aa/2)/aa, 0, 255); //FIXME we shouldnt need cliping } static inline int get_block_bits(SnowContext *s, int x, int y, int w){ @@ -3066,8 +3039,6 @@ static inline int get_block_bits(SnowContext *s, int x, int y, int w){ if(x<0 || x>=b_stride || y>=b_height) return 0; - dmx= b->mx - mid_pred(left->mx, top->mx, tr->mx); - dmy= b->my - mid_pred(left->my, top->my, tr->my); /* 1 0 0 01X 1-2 1 @@ -3081,9 +3052,14 @@ static inline int get_block_bits(SnowContext *s, int x, int y, int w){ return 3+2*( av_log2(2*ABS(left->color[0] - b->color[0])) + av_log2(2*ABS(left->color[1] - b->color[1])) + av_log2(2*ABS(left->color[2] - b->color[2]))); - }else - return 2*(1 + av_log2(2*ABS(dmx)) - + av_log2(2*ABS(dmy))); //FIXME kill the 2* can be merged in lambda + }else{ + pred_mv(s, &dmx, &dmy, b->ref, left, top, tr); + dmx-= b->mx; + dmy-= b->my; + return 2*(1 + av_log2(2*ABS(dmx)) //FIXME kill the 2* can be merged in lambda + + av_log2(2*ABS(dmy)) + + av_log2(2*b->ref)); + } } static int get_block_rd(SnowContext *s, int mb_x, int mb_y, int plane_index, const uint8_t *obmc_edged){ @@ -3093,7 +3069,6 @@ static int get_block_rd(SnowContext *s, int mb_x, int mb_y, int plane_index, con const uint8_t *obmc = plane_index ? obmc_tab[s->block_max_depth+1] : obmc_tab[s->block_max_depth]; const int obmc_stride= plane_index ? block_size : 2*block_size; const int ref_stride= s->current_picture.linesize[plane_index]; - uint8_t *ref= s-> last_picture.data[plane_index]; uint8_t *dst= s->current_picture.data[plane_index]; uint8_t *src= s-> input_picture.data[plane_index]; DWTELEM *pred= (DWTELEM*)s->m.obmc_scratchpad + plane_index*block_size*block_size*4; @@ -3108,13 +3083,13 @@ static int get_block_rd(SnowContext *s, int mb_x, int mb_y, int plane_index, con const int penalty_factor= get_penalty_factor(s->lambda, s->lambda2, s->avctx->me_cmp); int sx= block_w*mb_x - block_w/2; int sy= block_w*mb_y - block_w/2; - const int x0= FFMAX(0,-sx); - const int y0= FFMAX(0,-sy); - const int x1= FFMIN(block_w*2, w-sx); - const int y1= FFMIN(block_w*2, h-sy); + int x0= FFMAX(0,-sx); + int y0= FFMAX(0,-sy); + int x1= FFMIN(block_w*2, w-sx); + int y1= FFMIN(block_w*2, h-sy); int i,x,y; - pred_block(s, cur, ref, tmp, ref_stride, sx, sy, block_w*2, block_w*2, &s->block[mb_x + mb_y*b_stride], plane_index, w, h); + pred_block(s, cur, tmp, ref_stride, sx, sy, block_w*2, block_w*2, &s->block[mb_x + mb_y*b_stride], plane_index, w, h); for(y=y0; y<y1; y++){ const uint8_t *obmc1= obmc_edged + y*obmc_stride; @@ -3129,12 +3104,39 @@ static int get_block_rd(SnowContext *s, int mb_x, int mb_y, int plane_index, con } } - //FIXME sad/ssd can be broken up, but wavelet cmp should be one 32x32 block + /* copy the regions where obmc[] = (uint8_t)256 */ + if(LOG2_OBMC_MAX == 8 + && (mb_x == 0 || mb_x == b_stride-1) + && (mb_y == 0 || mb_y == b_height-1)){ + if(mb_x == 0) + x1 = block_w; + else + x0 = block_w; + if(mb_y == 0) + y1 = block_w; + else + y0 = block_w; + for(y=y0; y<y1; y++) + memcpy(dst + sx+x0 + (sy+y)*ref_stride, cur + x0 + y*ref_stride, x1-x0); + } + if(block_w==16){ - distortion = 0; - for(i=0; i<4; i++){ - int off = sx+16*(i&1) + (sy+16*(i>>1))*ref_stride; - distortion += s->dsp.me_cmp[0](&s->m, src + off, dst + off, ref_stride, 16); + /* FIXME rearrange dsputil to fit 32x32 cmp functions */ + /* FIXME check alignment of the cmp wavelet vs the encoding wavelet */ + /* FIXME cmps overlap but don't cover the wavelet's whole support, + * so improving the score of one block is not strictly guaranteed to + * improve the score of the whole frame, so iterative motion est + * doesn't always converge. */ + if(s->avctx->me_cmp == FF_CMP_W97) + distortion = w97_32_c(&s->m, src + sx + sy*ref_stride, dst + sx + sy*ref_stride, ref_stride, 32); + else if(s->avctx->me_cmp == FF_CMP_W53) + distortion = w53_32_c(&s->m, src + sx + sy*ref_stride, dst + sx + sy*ref_stride, ref_stride, 32); + else{ + distortion = 0; + for(i=0; i<4; i++){ + int off = sx+16*(i&1) + (sy+16*(i>>1))*ref_stride; + distortion += s->dsp.me_cmp[0](&s->m, src + off, dst + off, ref_stride, 16); + } } }else{ assert(block_w==8); @@ -3163,7 +3165,6 @@ static int get_4block_rd(SnowContext *s, int mb_x, int mb_y, int plane_index){ const uint8_t *obmc = plane_index ? obmc_tab[s->block_max_depth+1] : obmc_tab[s->block_max_depth]; const int obmc_stride= plane_index ? block_size : 2*block_size; const int ref_stride= s->current_picture.linesize[plane_index]; - uint8_t *ref= s-> last_picture.data[plane_index]; uint8_t *dst= s->current_picture.data[plane_index]; uint8_t *src= s-> input_picture.data[plane_index]; const static DWTELEM zero_dst[4096]; //FIXME @@ -3181,7 +3182,7 @@ static int get_4block_rd(SnowContext *s, int mb_x, int mb_y, int plane_index){ int x= block_w*mb_x2 + block_w/2; int y= block_w*mb_y2 + block_w/2; - add_yblock(s, zero_dst, dst, ref, obmc, + add_yblock(s, zero_dst, dst, obmc, x, y, block_w, block_w, w, h, /*dst_stride*/0, ref_stride, obmc_stride, mb_x2, mb_y2, 1, 1, plane_index); //FIXME find a cleaner/simpler way to skip the outside stuff @@ -3237,7 +3238,7 @@ static always_inline int check_block(SnowContext *s, int mb_x, int mb_y, int p[3 block->type |= BLOCK_INTRA; }else{ index= (p[0] + 31*p[1]) & (ME_CACHE_SIZE-1); - value= s->me_cache_generation + (p[0]>>10) + (p[1]<<6); + value= s->me_cache_generation + (p[0]>>10) + (p[1]<<6) + (block->ref<<12); if(s->me_cache[index] == value) return 0; s->me_cache[index]= value; @@ -3260,12 +3261,12 @@ static always_inline int check_block(SnowContext *s, int mb_x, int mb_y, int p[3 } /* special case for int[2] args we discard afterward, fixes compilation prob with gcc 2.95 */ -static always_inline int check_block_inter(SnowContext *s, int mb_x, int mb_y, int p0, int p1, int intra, const uint8_t *obmc_edged, int *best_rd){ +static always_inline int check_block_inter(SnowContext *s, int mb_x, int mb_y, int p0, int p1, const uint8_t *obmc_edged, int *best_rd){ int p[2] = {p0, p1}; - return check_block(s, mb_x, mb_y, p, intra, obmc_edged, best_rd); + return check_block(s, mb_x, mb_y, p, 0, obmc_edged, best_rd); } -static always_inline int check_4block_inter(SnowContext *s, int mb_x, int mb_y, int p0, int p1, int *best_rd){ +static always_inline int check_4block_inter(SnowContext *s, int mb_x, int mb_y, int p0, int p1, int ref, int *best_rd){ const int b_stride= s->b_width << s->block_max_depth; BlockNode *block= &s->block[mb_x + mb_y * b_stride]; BlockNode backup[4]= {block[0], block[1], block[b_stride], block[b_stride+1]}; @@ -3276,13 +3277,14 @@ static always_inline int check_4block_inter(SnowContext *s, int mb_x, int mb_y, assert(((mb_x|mb_y)&1) == 0); index= (p0 + 31*p1) & (ME_CACHE_SIZE-1); - value= s->me_cache_generation + (p0>>10) + (p1<<6); + value= s->me_cache_generation + (p0>>10) + (p1<<6) + (block->ref<<12); if(s->me_cache[index] == value) return 0; s->me_cache[index]= value; block->mx= p0; block->my= p1; + block->ref= ref; block->type &= ~BLOCK_INTRA; block[1]= block[b_stride]= block[b_stride+1]= *block; @@ -3308,24 +3310,35 @@ static void iterative_me(SnowContext *s){ const int b_stride= b_width; int color[3]; - for(pass=0; pass<50; pass++){ + { + RangeCoder r = s->c; + uint8_t state[sizeof(s->block_state)]; + memcpy(state, s->block_state, sizeof(s->block_state)); + for(mb_y= 0; mb_y<s->b_height; mb_y++) + for(mb_x= 0; mb_x<s->b_width; mb_x++) + encode_q_branch(s, 0, mb_x, mb_y); + s->c = r; + memcpy(s->block_state, state, sizeof(s->block_state)); + } + + for(pass=0; pass<25; pass++){ int change= 0; for(mb_y= 0; mb_y<b_height; mb_y++){ for(mb_x= 0; mb_x<b_width; mb_x++){ - int dia_change, i, j; - int best_rd= INT_MAX; - BlockNode backup; + int dia_change, i, j, ref; + int best_rd= INT_MAX, ref_rd; + BlockNode backup, ref_b; const int index= mb_x + mb_y * b_stride; BlockNode *block= &s->block[index]; - BlockNode *tb = mb_y ? &s->block[index-b_stride ] : &null_block; - BlockNode *lb = mb_x ? &s->block[index -1] : &null_block; - BlockNode *rb = mb_x+1<b_width ? &s->block[index +1] : &null_block; - BlockNode *bb = mb_y+1<b_height ? &s->block[index+b_stride ] : &null_block; - BlockNode *tlb= mb_x && mb_y ? &s->block[index-b_stride-1] : &null_block; - BlockNode *trb= mb_x+1<b_width && mb_y ? &s->block[index-b_stride+1] : &null_block; - BlockNode *blb= mb_x && mb_y+1<b_height ? &s->block[index+b_stride-1] : &null_block; - BlockNode *brb= mb_x+1<b_width && mb_y+1<b_height ? &s->block[index+b_stride+1] : &null_block; + BlockNode *tb = mb_y ? &s->block[index-b_stride ] : NULL; + BlockNode *lb = mb_x ? &s->block[index -1] : NULL; + BlockNode *rb = mb_x+1<b_width ? &s->block[index +1] : NULL; + BlockNode *bb = mb_y+1<b_height ? &s->block[index+b_stride ] : NULL; + BlockNode *tlb= mb_x && mb_y ? &s->block[index-b_stride-1] : NULL; + BlockNode *trb= mb_x+1<b_width && mb_y ? &s->block[index-b_stride+1] : NULL; + BlockNode *blb= mb_x && mb_y+1<b_height ? &s->block[index+b_stride-1] : NULL; + BlockNode *brb= mb_x+1<b_width && mb_y+1<b_height ? &s->block[index+b_stride+1] : NULL; const int b_w= (MB_SIZE >> s->block_max_depth); uint8_t obmc_edged[b_w*2][b_w*2]; @@ -3399,48 +3412,72 @@ static void iterative_me(SnowContext *s){ int color0[3]= {block->color[0], block->color[1], block->color[2]}; check_block(s, mb_x, mb_y, color0, 1, *obmc_edged, &best_rd); }else - check_block_inter(s, mb_x, mb_y, block->mx, block->my, 0, *obmc_edged, &best_rd); - - check_block_inter(s, mb_x, mb_y, 0, 0, 0, *obmc_edged, &best_rd); - check_block_inter(s, mb_x, mb_y, tb->mx, tb->my, 0, *obmc_edged, &best_rd); - check_block_inter(s, mb_x, mb_y, lb->mx, lb->my, 0, *obmc_edged, &best_rd); - check_block_inter(s, mb_x, mb_y, rb->mx, rb->my, 0, *obmc_edged, &best_rd); - check_block_inter(s, mb_x, mb_y, bb->mx, bb->my, 0, *obmc_edged, &best_rd); - - /* fullpel ME */ - //FIXME avoid subpel interpol / round to nearest integer - do{ - dia_change=0; - for(i=0; i<FFMAX(s->avctx->dia_size, 1); i++){ - for(j=0; j<i; j++){ - dia_change |= check_block_inter(s, mb_x, mb_y, block->mx+4*(i-j), block->my+(4*j), 0, *obmc_edged, &best_rd); - dia_change |= check_block_inter(s, mb_x, mb_y, block->mx-4*(i-j), block->my-(4*j), 0, *obmc_edged, &best_rd); - dia_change |= check_block_inter(s, mb_x, mb_y, block->mx+4*(i-j), block->my-(4*j), 0, *obmc_edged, &best_rd); - dia_change |= check_block_inter(s, mb_x, mb_y, block->mx-4*(i-j), block->my+(4*j), 0, *obmc_edged, &best_rd); + check_block_inter(s, mb_x, mb_y, block->mx, block->my, *obmc_edged, &best_rd); + + ref_b= *block; + ref_rd= best_rd; + for(ref=0; ref < s->ref_frames; ref++){ + int16_t (*mvr)[2]= &s->ref_mvs[ref][index]; + if(s->ref_scores[ref][index] > s->ref_scores[ref_b.ref][index]*3/2) //FIXME tune threshold + continue; + block->ref= ref; + best_rd= INT_MAX; + + check_block_inter(s, mb_x, mb_y, mvr[0][0], mvr[0][1], *obmc_edged, &best_rd); + check_block_inter(s, mb_x, mb_y, 0, 0, *obmc_edged, &best_rd); + if(tb) + check_block_inter(s, mb_x, mb_y, mvr[-b_stride][0], mvr[-b_stride][1], *obmc_edged, &best_rd); + if(lb) + check_block_inter(s, mb_x, mb_y, mvr[-1][0], mvr[-1][1], *obmc_edged, &best_rd); + if(rb) + check_block_inter(s, mb_x, mb_y, mvr[1][0], mvr[1][1], *obmc_edged, &best_rd); + if(bb) + check_block_inter(s, mb_x, mb_y, mvr[b_stride][0], mvr[b_stride][1], *obmc_edged, &best_rd); + + /* fullpel ME */ + //FIXME avoid subpel interpol / round to nearest integer + do{ + dia_change=0; + for(i=0; i<FFMAX(s->avctx->dia_size, 1); i++){ + for(j=0; j<i; j++){ + dia_change |= check_block_inter(s, mb_x, mb_y, block->mx+4*(i-j), block->my+(4*j), *obmc_edged, &best_rd); + dia_change |= check_block_inter(s, mb_x, mb_y, block->mx-4*(i-j), block->my-(4*j), *obmc_edged, &best_rd); + dia_change |= check_block_inter(s, mb_x, mb_y, block->mx+4*(i-j), block->my-(4*j), *obmc_edged, &best_rd); + dia_change |= check_block_inter(s, mb_x, mb_y, block->mx-4*(i-j), block->my+(4*j), *obmc_edged, &best_rd); + } } + }while(dia_change); + /* subpel ME */ + do{ + static const int square[8][2]= {{+1, 0},{-1, 0},{ 0,+1},{ 0,-1},{+1,+1},{-1,-1},{+1,-1},{-1,+1},}; + dia_change=0; + for(i=0; i<8; i++) + dia_change |= check_block_inter(s, mb_x, mb_y, block->mx+square[i][0], block->my+square[i][1], *obmc_edged, &best_rd); + }while(dia_change); + //FIXME or try the standard 2 pass qpel or similar + + mvr[0][0]= block->mx; + mvr[0][1]= block->my; + if(ref_rd > best_rd){ + ref_rd= best_rd; + ref_b= *block; } - }while(dia_change); - /* subpel ME */ - do{ - static const int square[8][2]= {{+1, 0},{-1, 0},{ 0,+1},{ 0,-1},{+1,+1},{-1,-1},{+1,-1},{-1,+1},}; - dia_change=0; - for(i=0; i<8; i++) - dia_change |= check_block_inter(s, mb_x, mb_y, block->mx+square[i][0], block->my+square[i][1], 0, *obmc_edged, &best_rd); - }while(dia_change); - //FIXME or try the standard 2 pass qpel or similar + } + best_rd= ref_rd; + *block= ref_b; #if 1 check_block(s, mb_x, mb_y, color, 1, *obmc_edged, &best_rd); //FIXME RD style color selection #endif if(!same_block(block, &backup)){ - if(tb != &null_block) tb ->type &= ~BLOCK_OPT; - if(lb != &null_block) lb ->type &= ~BLOCK_OPT; - if(rb != &null_block) rb ->type &= ~BLOCK_OPT; - if(bb != &null_block) bb ->type &= ~BLOCK_OPT; - if(tlb!= &null_block) tlb->type &= ~BLOCK_OPT; - if(trb!= &null_block) trb->type &= ~BLOCK_OPT; - if(blb!= &null_block) blb->type &= ~BLOCK_OPT; - if(brb!= &null_block) brb->type &= ~BLOCK_OPT; + if(tb ) tb ->type &= ~BLOCK_OPT; + if(lb ) lb ->type &= ~BLOCK_OPT; + if(rb ) rb ->type &= ~BLOCK_OPT; + if(bb ) bb ->type &= ~BLOCK_OPT; + if(tlb) tlb->type &= ~BLOCK_OPT; + if(trb) trb->type &= ~BLOCK_OPT; + if(blb) blb->type &= ~BLOCK_OPT; + if(brb) brb->type &= ~BLOCK_OPT; change ++; } } @@ -3454,7 +3491,7 @@ static void iterative_me(SnowContext *s){ int change= 0; for(mb_y= 0; mb_y<b_height; mb_y+=2){ for(mb_x= 0; mb_x<b_width; mb_x+=2){ - int dia_change, i, j; + int i; int best_rd, init_rd; const int index= mb_x + mb_y * b_stride; BlockNode *b[4]; @@ -3474,13 +3511,14 @@ static void iterative_me(SnowContext *s){ init_rd= best_rd= get_4block_rd(s, mb_x, mb_y, 0); + //FIXME more multiref search? check_4block_inter(s, mb_x, mb_y, (b[0]->mx + b[1]->mx + b[2]->mx + b[3]->mx + 2) >> 2, - (b[0]->my + b[1]->my + b[2]->my + b[3]->my + 2) >> 2, &best_rd); + (b[0]->my + b[1]->my + b[2]->my + b[3]->my + 2) >> 2, 0, &best_rd); for(i=0; i<4; i++) if(!(b[i]->type&BLOCK_INTRA)) - check_4block_inter(s, mb_x, mb_y, b[i]->mx, b[i]->my, &best_rd); + check_4block_inter(s, mb_x, mb_y, b[i]->mx, b[i]->my, b[i]->ref, &best_rd); if(init_rd != best_rd) change++; @@ -3706,6 +3744,7 @@ static void encode_header(SnowContext *s){ put_symbol(&s->c, s->header_state, s->chroma_v_shift, 0); put_rac(&s->c, s->header_state, s->spatial_scalability); // put_rac(&s->c, s->header_state, s->rate_scalability); + put_symbol(&s->c, s->header_state, s->max_ref_frames-1, 0); for(plane_index=0; plane_index<2; plane_index++){ for(level=0; level<s->spatial_decomposition_count; level++){ @@ -3747,6 +3786,7 @@ static int decode_header(SnowContext *s){ s->chroma_v_shift= get_symbol(&s->c, s->header_state, 0); s->spatial_scalability= get_rac(&s->c, s->header_state); // s->rate_scalability= get_rac(&s->c, s->header_state); + s->max_ref_frames= get_symbol(&s->c, s->header_state, 0)+1; for(plane_index=0; plane_index<3; plane_index++){ for(level=0; level<s->spatial_decomposition_count; level++){ @@ -3771,7 +3811,7 @@ static int decode_header(SnowContext *s){ s->mv_scale= get_symbol(&s->c, s->header_state, 0); s->qbias= get_symbol(&s->c, s->header_state, 1); s->block_max_depth= get_symbol(&s->c, s->header_state, 0); - if(s->block_max_depth > 1){ + if(s->block_max_depth > 1 || s->block_max_depth < 0){ av_log(s->avctx, AV_LOG_ERROR, "block_max_depth= %d is too large", s->block_max_depth); s->block_max_depth= 0; return -1; @@ -3794,6 +3834,7 @@ static int common_init(AVCodecContext *avctx){ SnowContext *s = avctx->priv_data; int width, height; int level, orientation, plane_index, dec; + int i, j; s->avctx= avctx; @@ -3899,6 +3940,10 @@ static int common_init(AVCodecContext *avctx){ } } + for(i=0; i<MAX_REF_FRAMES; i++) + for(j=0; j<MAX_REF_FRAMES; j++) + scale_mv_ref[i][j] = 256*(i+1)/(j+1); + reset_contexts(s); /* width= s->width= avctx->width; @@ -3911,6 +3956,56 @@ static int common_init(AVCodecContext *avctx){ return 0; } +static int qscale2qlog(int qscale){ + return rint(QROOT*log(qscale / (float)FF_QP2LAMBDA)/log(2)) + + 61*QROOT/8; //<64 >60 +} + +static void ratecontrol_1pass(SnowContext *s, AVFrame *pict) +{ + /* estimate the frame's complexity as a sum of weighted dwt coefs. + * FIXME we know exact mv bits at this point, + * but ratecontrol isn't set up to include them. */ + uint32_t coef_sum= 0; + int level, orientation; + + for(level=0; level<s->spatial_decomposition_count; level++){ + for(orientation=level ? 1 : 0; orientation<4; orientation++){ + SubBand *b= &s->plane[0].band[level][orientation]; + DWTELEM *buf= b->buf; + const int w= b->width; + const int h= b->height; + const int stride= b->stride; + const int qlog= clip(2*QROOT + b->qlog, 0, QROOT*16); + const int qmul= qexp[qlog&(QROOT-1)]<<(qlog>>QSHIFT); + const int qdiv= (1<<16)/qmul; + int x, y; + if(orientation==0) + decorrelate(s, b, buf, stride, 1, 0); + for(y=0; y<h; y++) + for(x=0; x<w; x++) + coef_sum+= abs(buf[x+y*stride]) * qdiv >> 16; + if(orientation==0) + correlate(s, b, buf, stride, 1, 0); + } + } + + /* ugly, ratecontrol just takes a sqrt again */ + coef_sum = (uint64_t)coef_sum * coef_sum >> 16; + assert(coef_sum < INT_MAX); + + if(pict->pict_type == I_TYPE){ + s->m.current_picture.mb_var_sum= coef_sum; + s->m.current_picture.mc_mb_var_sum= 0; + }else{ + s->m.current_picture.mc_mb_var_sum= coef_sum; + s->m.current_picture.mb_var_sum= 0; + } + + pict->quality= ff_rate_estimate_qscale(&s->m, 1); + s->lambda= pict->quality * 3/2; + s->qlog= qscale2qlog(pict->quality); +} static void calculate_vissual_weight(SnowContext *s, Plane *p){ int width = p->width; @@ -3951,6 +4046,13 @@ static int encode_init(AVCodecContext *avctx) return -1; } + if(avctx->prediction_method == DWT_97 + && (avctx->flags & CODEC_FLAG_QSCALE) + && avctx->global_quality == 0){ + av_log(avctx, AV_LOG_ERROR, "the 9/7 wavelet is incompatible with lossless mode\n"); + return -1; + } + common_init(avctx); alloc_blocks(s); @@ -3966,14 +4068,17 @@ static int encode_init(AVCodecContext *avctx) s->m.obmc_scratchpad= av_mallocz(MB_SIZE*MB_SIZE*12*sizeof(uint32_t)); h263_encode_init(&s->m); //mv_penalty + s->max_ref_frames = FFMAX(FFMIN(avctx->refs, MAX_REF_FRAMES), 1); + if(avctx->flags&CODEC_FLAG_PASS1){ if(!avctx->stats_out) avctx->stats_out = av_mallocz(256); } - if(avctx->flags&CODEC_FLAG_PASS2){ + if((avctx->flags&CODEC_FLAG_PASS2) || !(avctx->flags&CODEC_FLAG_QSCALE)){ if(ff_rate_control_init(&s->m) < 0) return -1; } + s->pass1_rc= !(avctx->flags & (CODEC_FLAG_QSCALE|CODEC_FLAG_PASS2)); for(plane_index=0; plane_index<3; plane_index++){ calculate_vissual_weight(s, &s->plane[plane_index]); @@ -4006,6 +4111,15 @@ static int encode_init(AVCodecContext *avctx) s->avctx->get_buffer(s->avctx, &s->input_picture); + if(s->avctx->me_method == ME_ITER){ + int i; + int size= s->b_width * s->b_height << 2*s->block_max_depth; + for(i=0; i<s->max_ref_frames; i++){ + s->ref_mvs[i]= av_mallocz(size*sizeof(int16_t[2])); + s->ref_scores[i]= av_mallocz(size*sizeof(uint32_t)); + } + } + return 0; } #endif @@ -4021,16 +4135,29 @@ static int frame_start(SnowContext *s){ draw_edges(s->current_picture.data[2], s->current_picture.linesize[2], w>>1, h>>1, EDGE_WIDTH/2); } - tmp= s->last_picture; - s->last_picture= s->current_picture; + tmp= s->last_picture[s->max_ref_frames-1]; + memmove(s->last_picture+1, s->last_picture, (s->max_ref_frames-1)*sizeof(AVFrame)); + s->last_picture[0]= s->current_picture; s->current_picture= tmp; + if(s->keyframe){ + s->ref_frames= 0; + }else{ + int i; + for(i=0; i<s->max_ref_frames && s->last_picture[i].data[0]; i++) + if(i && s->last_picture[i-1].key_frame) + break; + s->ref_frames= i; + } + s->current_picture.reference= 1; if(s->avctx->get_buffer(s->avctx, &s->current_picture) < 0){ av_log(s->avctx, AV_LOG_ERROR, "get_buffer() failed\n"); return -1; } + s->current_picture.key_frame= s->keyframe; + return 0; } @@ -4055,27 +4182,31 @@ static int encode_frame(AVCodecContext *avctx, unsigned char *buf, int buf_size, } s->new_picture = *pict; + s->m.picture_number= avctx->frame_number; if(avctx->flags&CODEC_FLAG_PASS2){ s->m.pict_type = pict->pict_type= s->m.rc_context.entry[avctx->frame_number].new_pict_type; s->keyframe= pict->pict_type==FF_I_TYPE; - s->m.picture_number= avctx->frame_number; - pict->quality= ff_rate_estimate_qscale(&s->m, 0); + if(!(avctx->flags&CODEC_FLAG_QSCALE)) + pict->quality= ff_rate_estimate_qscale(&s->m, 0); }else{ s->keyframe= avctx->gop_size==0 || avctx->frame_number % avctx->gop_size == 0; + s->m.pict_type= pict->pict_type= s->keyframe ? FF_I_TYPE : FF_P_TYPE; } + if(s->pass1_rc && avctx->frame_number == 0) + pict->quality= 2*FF_QP2LAMBDA; if(pict->quality){ - s->qlog= rint(QROOT*log(pict->quality / (float)FF_QP2LAMBDA)/log(2)); - //<64 >60 - s->qlog += 61*QROOT/8; - }else{ - s->qlog= LOSSLESS_QLOG; + s->qlog= qscale2qlog(pict->quality); + s->lambda = pict->quality * 3/2; } + if(s->qlog < 0 || (!pict->quality && (avctx->flags & CODEC_FLAG_QSCALE))){ + s->qlog= LOSSLESS_QLOG; + s->lambda = 0; + }//else keep previous frame's qlog until after motion est frame_start(s); - s->current_picture.key_frame= s->keyframe; s->m.current_picture_ptr= &s->m.current_picture; if(pict->pict_type == P_TYPE){ @@ -4084,11 +4215,11 @@ static int encode_frame(AVCodecContext *avctx, unsigned char *buf, int buf_size, int stride= s->current_picture.linesize[0]; assert(s->current_picture.data[0]); - assert(s->last_picture.data[0]); + assert(s->last_picture[0].data[0]); s->m.avctx= s->avctx; s->m.current_picture.data[0]= s->current_picture.data[0]; - s->m. last_picture.data[0]= s-> last_picture.data[0]; + s->m. last_picture.data[0]= s->last_picture[0].data[0]; s->m. new_picture.data[0]= s-> input_picture.data[0]; s->m. last_picture_ptr= &s->m. last_picture; s->m.linesize= @@ -4111,7 +4242,7 @@ static int encode_frame(AVCodecContext *avctx, unsigned char *buf, int buf_size, s->m.out_format= FMT_H263; s->m.unrestricted_mv= 1; - s->lambda = s->m.lambda= pict->quality * 3/2; //FIXME bug somewhere else + s->m.lambda = s->lambda; s->m.qscale= (s->m.lambda*139 + FF_LAMBDA_SCALE*64) >> (FF_LAMBDA_SHIFT + 7); s->lambda2= s->m.lambda2= (s->m.lambda*s->m.lambda + FF_LAMBDA_SCALE/2) >> FF_LAMBDA_SHIFT; @@ -4122,6 +4253,7 @@ static int encode_frame(AVCodecContext *avctx, unsigned char *buf, int buf_size, redo_frame: + s->m.pict_type = pict->pict_type; s->qbias= pict->pict_type == P_TYPE ? 2 : 0; encode_header(s); @@ -4136,6 +4268,7 @@ redo_frame: int x, y; // int bits= put_bits_count(&s->c.pb); + if(!(avctx->flags2 & CODEC_FLAG2_MEMC_ONLY)){ //FIXME optimize if(pict->data[plane_index]) //FIXME gray hack for(y=0; y<h; y++){ @@ -4147,11 +4280,13 @@ redo_frame: if( plane_index==0 && pict->pict_type == P_TYPE + && !(avctx->flags&CODEC_FLAG_PASS2) && s->m.me.scene_change_score > s->avctx->scenechange_threshold){ ff_init_range_encoder(c, buf, buf_size); ff_build_rac_states(c, 0.05*(1LL<<32), 256-8); pict->pict_type= FF_I_TYPE; s->keyframe=1; + s->current_picture.key_frame=1; reset_contexts(s); goto redo_frame; } @@ -4166,6 +4301,9 @@ redo_frame: ff_spatial_dwt(s->spatial_dwt_buffer, w, h, w, s->spatial_decomposition_type, s->spatial_decomposition_count); + if(s->pass1_rc && plane_index==0) + ratecontrol_1pass(s, pict); + for(level=0; level<s->spatial_decomposition_count; level++){ for(orientation=level ? 1 : 0; orientation<4; orientation++){ SubBand *b= &p->band[level][orientation]; @@ -4200,6 +4338,20 @@ redo_frame: {START_TIMER predict_plane(s, s->spatial_dwt_buffer, plane_index, 1); STOP_TIMER("pred-conv")} + }else{ + //ME/MC only + if(pict->pict_type == I_TYPE){ + for(y=0; y<h; y++){ + for(x=0; x<w; x++){ + s->current_picture.data[plane_index][y*s->current_picture.linesize[plane_index] + x]= + pict->data[plane_index][y*pict->linesize[plane_index] + x]; + } + } + }else{ + memset(s->spatial_dwt_buffer, 0, sizeof(DWTELEM)*w*h); + predict_plane(s, s->spatial_dwt_buffer, plane_index, 1); + } + } if(s->avctx->flags&CODEC_FLAG_PSNR){ int64_t error= 0; @@ -4215,23 +4367,23 @@ STOP_TIMER("pred-conv")} } } - if(s->last_picture.data[0]) - avctx->release_buffer(avctx, &s->last_picture); + if(s->last_picture[s->max_ref_frames-1].data[0]) + avctx->release_buffer(avctx, &s->last_picture[s->max_ref_frames-1]); s->current_picture.coded_picture_number = avctx->frame_number; s->current_picture.pict_type = pict->pict_type; s->current_picture.quality = pict->quality; - if(avctx->flags&CODEC_FLAG_PASS1){ - s->m.p_tex_bits = 8*(s->c.bytestream - s->c.bytestream_start) - s->m.misc_bits - s->m.mv_bits; - s->m.current_picture.display_picture_number = - s->m.current_picture.coded_picture_number = avctx->frame_number; - s->m.pict_type = pict->pict_type; - s->m.current_picture.quality = pict->quality; + s->m.frame_bits = 8*(s->c.bytestream - s->c.bytestream_start); + s->m.p_tex_bits = s->m.frame_bits - s->m.misc_bits - s->m.mv_bits; + s->m.current_picture.display_picture_number = + s->m.current_picture.coded_picture_number = avctx->frame_number; + s->m.current_picture.quality = pict->quality; + s->m.total_bits += 8*(s->c.bytestream - s->c.bytestream_start); + if(s->pass1_rc) + ff_rate_estimate_qscale(&s->m, 0); + if(avctx->flags&CODEC_FLAG_PASS1) ff_write_pass1_stats(&s->m); - } - if(avctx->flags&CODEC_FLAG_PASS2){ - s->m.total_bits += 8*(s->c.bytestream - s->c.bytestream_start); - } + s->m.last_pict_type = s->m.pict_type; emms_c(); @@ -4240,7 +4392,7 @@ STOP_TIMER("pred-conv")} #endif static void common_end(SnowContext *s){ - int plane_index, level, orientation; + int plane_index, level, orientation, i; av_freep(&s->spatial_dwt_buffer); @@ -4251,6 +4403,13 @@ static void common_end(SnowContext *s){ av_freep(&s->block); + for(i=0; i<MAX_REF_FRAMES; i++){ + av_freep(&s->ref_mvs[i]); + av_freep(&s->ref_scores[i]); + if(s->last_picture[i].data[0]) + s->avctx->release_buffer(s->avctx, &s->last_picture[i]); + } + for(plane_index=0; plane_index<3; plane_index++){ for(level=s->spatial_decomposition_count-1; level>=0; level--){ for(orientation=level ? 1 : 0; orientation<4; orientation++){ @@ -4397,7 +4556,7 @@ if(s->avctx->debug&2048){ { START_TIMER for(; yd<slice_h; yd+=4){ - ff_spatial_idwt_buffered_slice(cs, &s->sb, w, h, 1, s->spatial_decomposition_type, s->spatial_decomposition_count, yd); + ff_spatial_idwt_buffered_slice(&s->dsp, cs, &s->sb, w, h, 1, s->spatial_decomposition_type, s->spatial_decomposition_count, yd); } STOP_TIMER("idwt slice");} @@ -4426,8 +4585,8 @@ STOP_TIMER("idwt + predict_slices")} emms_c(); - if(s->last_picture.data[0]) - avctx->release_buffer(avctx, &s->last_picture); + if(s->last_picture[s->max_ref_frames-1].data[0]) + avctx->release_buffer(avctx, &s->last_picture[s->max_ref_frames-1]); if(!(s->avctx->debug&2048)) *picture= s->current_picture; diff --git a/src/libffmpeg/libavcodec/svq1_cb.h b/src/libffmpeg/libavcodec/svq1_cb.h index 5c98c8047..ef097457e 100644 --- a/src/libffmpeg/libavcodec/svq1_cb.h +++ b/src/libffmpeg/libavcodec/svq1_cb.h @@ -764,9 +764,10 @@ static const int8_t svq1_inter_codebook_8x8[6144] = { }; /* list of codebooks for inter-coded vectors */ -static const int8_t* const svq1_inter_codebooks[4] = { +static const int8_t* const svq1_inter_codebooks[6] = { svq1_inter_codebook_4x2, svq1_inter_codebook_4x4, - svq1_inter_codebook_8x4, svq1_inter_codebook_8x8 + svq1_inter_codebook_8x4, svq1_inter_codebook_8x8, + NULL, NULL, }; static const int8_t svq1_inter_codebook_sum[4][16*6] = { @@ -1538,9 +1539,10 @@ static const int8_t svq1_intra_codebook_8x8[6144] = { }; /* list of codebooks for intra-coded vectors */ -static const int8_t* const svq1_intra_codebooks[4] = { +static const int8_t* const svq1_intra_codebooks[6] = { svq1_intra_codebook_4x2, svq1_intra_codebook_4x4, - svq1_intra_codebook_8x4, svq1_intra_codebook_8x8 + svq1_intra_codebook_8x4, svq1_intra_codebook_8x8, + NULL, NULL, }; static const int8_t svq1_intra_codebook_sum[4][16*6] = { diff --git a/src/libffmpeg/libavcodec/truemotion1.c b/src/libffmpeg/libavcodec/truemotion1.c index 728dbcdb7..d2c9efbf8 100644 --- a/src/libffmpeg/libavcodec/truemotion1.c +++ b/src/libffmpeg/libavcodec/truemotion1.c @@ -322,7 +322,7 @@ static void gen_vector_table24(TrueMotion1Context *s, const uint8_t *sel_vector_ static int truemotion1_decode_header(TrueMotion1Context *s) { int i; - struct frame_header header = {0}; + struct frame_header header; uint8_t header_buffer[128]; /* logical maximum size of the header */ const uint8_t *sel_vector_table; diff --git a/src/libffmpeg/libavcodec/tscc.c b/src/libffmpeg/libavcodec/tscc.c index 8bc53bf89..19edf3b2e 100644 --- a/src/libffmpeg/libavcodec/tscc.c +++ b/src/libffmpeg/libavcodec/tscc.c @@ -77,6 +77,8 @@ static int decode_rle(CamtasiaContext *c, unsigned int srcsize) unsigned char *src = c->decomp_buf; unsigned char *output, *output_end; int p1, p2, line=c->height, pos=0, i; + uint16_t pix16; + uint32_t pix32; output = c->pic.data[0] + (c->height - 1) * c->pic.linesize[0]; output_end = c->pic.data[0] + (c->height) * c->pic.linesize[0]; @@ -107,12 +109,28 @@ static int decode_rle(CamtasiaContext *c, unsigned int srcsize) src += p2 * (c->bpp / 8); continue; } - for(i = 0; i < p2 * (c->bpp / 8); i++) { - *output++ = *src++; - } - // RLE8 copy is actually padded - and runs are not! - if(c->bpp == 8 && (p2 & 1)) { - src++; + if ((c->bpp == 8) || (c->bpp == 24)) { + for(i = 0; i < p2 * (c->bpp / 8); i++) { + *output++ = *src++; + } + // RLE8 copy is actually padded - and runs are not! + if(c->bpp == 8 && (p2 & 1)) { + src++; + } + } else if (c->bpp == 16) { + for(i = 0; i < p2; i++) { + pix16 = LE_16(src); + src += 2; + *(uint16_t*)output = pix16; + output += 2; + } + } else if (c->bpp == 32) { + for(i = 0; i < p2; i++) { + pix32 = LE_32(src); + src += 4; + *(uint32_t*)output = pix32; + output += 4; + } } pos += p2; } else { //Run of pixels @@ -120,17 +138,17 @@ static int decode_rle(CamtasiaContext *c, unsigned int srcsize) switch(c->bpp){ case 8: pix[0] = *src++; break; - case 16: pix[0] = *src++; - pix[1] = *src++; + case 16: pix16 = LE_16(src); + src += 2; + *(uint16_t*)pix = pix16; break; case 24: pix[0] = *src++; pix[1] = *src++; pix[2] = *src++; break; - case 32: pix[0] = *src++; - pix[1] = *src++; - pix[2] = *src++; - pix[3] = *src++; + case 32: pix32 = LE_32(src); + src += 4; + *(uint32_t*)pix = pix32; break; } if (output + p1 * (c->bpp / 8) > output_end) @@ -139,17 +157,15 @@ static int decode_rle(CamtasiaContext *c, unsigned int srcsize) switch(c->bpp){ case 8: *output++ = pix[0]; break; - case 16: *output++ = pix[0]; - *output++ = pix[1]; + case 16: *(uint16_t*)output = pix16; + output += 2; break; case 24: *output++ = pix[0]; *output++ = pix[1]; *output++ = pix[2]; break; - case 32: *output++ = pix[0]; - *output++ = pix[1]; - *output++ = pix[2]; - *output++ = pix[3]; + case 32: *(uint32_t*)output = pix32; + output += 4; break; } } diff --git a/src/libffmpeg/libavcodec/utils.c b/src/libffmpeg/libavcodec/utils.c index 525fc9a98..0f8a4f412 100644 --- a/src/libffmpeg/libavcodec/utils.c +++ b/src/libffmpeg/libavcodec/utils.c @@ -1,7 +1,6 @@ /* * utils for libavcodec * Copyright (c) 2001 Fabrice Bellard. - * Copyright (c) 2003 Michel Bardiaux for the av_log API * Copyright (c) 2002-2004 Michael Niedermayer <michaelni@gmx.at> * * This library is free software; you can redistribute it and/or @@ -29,9 +28,13 @@ #include "mpegvideo.h" #include "integer.h" #include "opt.h" +#include "crc.h" #include <stdarg.h> #include <limits.h> #include <float.h> +#ifdef __MINGW32__ +#include <fcntl.h> +#endif const uint8_t ff_reverse[256]={ 0x00,0x80,0x40,0xC0,0x20,0xA0,0x60,0xE0,0x10,0x90,0x50,0xD0,0x30,0xB0,0x70,0xF0, @@ -446,7 +449,7 @@ static const char* context_to_name(void* ptr) { #define E AV_OPT_FLAG_ENCODING_PARAM #define D AV_OPT_FLAG_DECODING_PARAM -static AVOption options[]={ +static const AVOption options[]={ {"bit_rate", NULL, OFFSET(bit_rate), FF_OPT_TYPE_INT, DEFAULT, INT_MIN, INT_MAX, V|A|E}, {"bit_rate_tolerance", NULL, OFFSET(bit_rate_tolerance), FF_OPT_TYPE_INT, DEFAULT, INT_MIN, INT_MAX, V|E}, {"flags", NULL, OFFSET(flags), FF_OPT_TYPE_FLAGS, DEFAULT, INT_MIN, INT_MAX, V|A|E|D, "flags"}, @@ -495,7 +498,7 @@ static AVOption options[]={ {"sample_rate", NULL, OFFSET(sample_rate), FF_OPT_TYPE_INT, DEFAULT, INT_MIN, INT_MAX}, {"channels", NULL, OFFSET(channels), FF_OPT_TYPE_INT, DEFAULT, INT_MIN, INT_MAX}, {"cutoff", "set cutoff bandwidth", OFFSET(cutoff), FF_OPT_TYPE_INT, DEFAULT, INT_MIN, INT_MAX, A|E}, -{"frame_size", NULL, OFFSET(frame_size), FF_OPT_TYPE_INT, DEFAULT, INT_MIN, INT_MAX}, +{"frame_size", NULL, OFFSET(frame_size), FF_OPT_TYPE_INT, DEFAULT, INT_MIN, INT_MAX, A|E}, {"frame_number", NULL, OFFSET(frame_number), FF_OPT_TYPE_INT, DEFAULT, INT_MIN, INT_MAX}, {"real_pict_num", NULL, OFFSET(real_pict_num), FF_OPT_TYPE_INT, DEFAULT, INT_MIN, INT_MAX}, {"delay", NULL, OFFSET(delay), FF_OPT_TYPE_INT, DEFAULT, INT_MIN, INT_MAX}, @@ -650,8 +653,10 @@ static AVOption options[]={ {"vsad", NULL, 0, FF_OPT_TYPE_CONST, FF_CMP_VSAD, INT_MIN, INT_MAX, V|E, "cmp_func"}, {"vsse", NULL, 0, FF_OPT_TYPE_CONST, FF_CMP_VSSE, INT_MIN, INT_MAX, V|E, "cmp_func"}, {"nsse", NULL, 0, FF_OPT_TYPE_CONST, FF_CMP_NSSE, INT_MIN, INT_MAX, V|E, "cmp_func"}, +#ifdef CONFIG_SNOW_ENCODER {"w53", NULL, 0, FF_OPT_TYPE_CONST, FF_CMP_W53, INT_MIN, INT_MAX, V|E, "cmp_func"}, {"w97", NULL, 0, FF_OPT_TYPE_CONST, FF_CMP_W97, INT_MIN, INT_MAX, V|E, "cmp_func"}, +#endif {"dctmax", NULL, 0, FF_OPT_TYPE_CONST, FF_CMP_DCTMAX, INT_MIN, INT_MAX, V|E, "cmp_func"}, {"chroma", NULL, 0, FF_OPT_TYPE_CONST, FF_CMP_CHROMA, INT_MIN, INT_MAX, V|E, "cmp_func"}, {"pre_dia_size", NULL, OFFSET(pre_dia_size), FF_OPT_TYPE_INT, DEFAULT, INT_MIN, INT_MAX, V|E}, @@ -716,7 +721,7 @@ static AVOption options[]={ {"refs", NULL, OFFSET(refs), FF_OPT_TYPE_INT, DEFAULT, INT_MIN, INT_MAX, V|E}, {"chromaoffset", NULL, OFFSET(chromaoffset), FF_OPT_TYPE_INT, DEFAULT, INT_MIN, INT_MAX, V|E}, {"bframebias", NULL, OFFSET(bframebias), FF_OPT_TYPE_INT, DEFAULT, INT_MIN, INT_MAX, V|E}, -{"trellis", NULL, OFFSET(trellis), FF_OPT_TYPE_INT, DEFAULT, INT_MIN, INT_MAX, V|E}, +{"trellis", NULL, OFFSET(trellis), FF_OPT_TYPE_INT, DEFAULT, INT_MIN, INT_MAX, V|A|E}, {"directpred", NULL, OFFSET(directpred), FF_OPT_TYPE_INT, DEFAULT, INT_MIN, INT_MAX, V|E}, {"bpyramid", NULL, 0, FF_OPT_TYPE_CONST, CODEC_FLAG2_BPYRAMID, INT_MIN, INT_MAX, V|E, "flags2"}, {"wpred", NULL, 0, FF_OPT_TYPE_CONST, CODEC_FLAG2_WPRED, INT_MIN, INT_MAX, V|E, "flags2"}, @@ -735,6 +740,17 @@ static AVOption options[]={ {"partp8x8", NULL, 0, FF_OPT_TYPE_CONST, X264_PART_P8X8, INT_MIN, INT_MAX, V|E, "partitions"}, {"partb8x8", NULL, 0, FF_OPT_TYPE_CONST, X264_PART_B8X8, INT_MIN, INT_MAX, V|E, "partitions"}, {"sc_factor", NULL, OFFSET(scenechange_factor), FF_OPT_TYPE_INT, 6, 0, INT_MAX, V|E}, +{"mv0_threshold", NULL, OFFSET(mv0_threshold), FF_OPT_TYPE_INT, 256, 0, INT_MAX, V|E}, +{"ivlc", "intra vlc table", 0, FF_OPT_TYPE_CONST, CODEC_FLAG2_INTRA_VLC, INT_MIN, INT_MAX, V|E, "flags2"}, +{"b_sensitivity", NULL, OFFSET(b_sensitivity), FF_OPT_TYPE_INT, 40, 1, INT_MAX, V|E}, +{"compression_level", NULL, OFFSET(compression_level), FF_OPT_TYPE_INT, FF_COMPRESSION_DEFAULT, INT_MIN, INT_MAX, V|A|E}, +{"use_lpc", NULL, OFFSET(use_lpc), FF_OPT_TYPE_INT, -1, INT_MIN, INT_MAX, A|E}, +{"lpc_coeff_precision", NULL, OFFSET(lpc_coeff_precision), FF_OPT_TYPE_INT, DEFAULT, 0, INT_MAX, A|E}, +{"min_prediction_order", NULL, OFFSET(min_prediction_order), FF_OPT_TYPE_INT, -1, INT_MIN, INT_MAX, A|E}, +{"max_prediction_order", NULL, OFFSET(max_prediction_order), FF_OPT_TYPE_INT, -1, INT_MIN, INT_MAX, A|E}, +{"prediction_order_method", NULL, OFFSET(prediction_order_method), FF_OPT_TYPE_INT, -1, INT_MIN, INT_MAX, A|E}, +{"min_partition_order", NULL, OFFSET(min_partition_order), FF_OPT_TYPE_INT, -1, INT_MIN, INT_MAX, A|E}, +{"max_partition_order", NULL, OFFSET(max_partition_order), FF_OPT_TYPE_INT, -1, INT_MIN, INT_MAX, A|E}, {NULL}, }; @@ -788,6 +804,16 @@ void avcodec_get_context_defaults(AVCodecContext *s){ s->pix_fmt= PIX_FMT_NONE; s->frame_skip_cmp= FF_CMP_DCTMAX; s->nsse_weight= 8; + s->sample_fmt= SAMPLE_FMT_S16; // FIXME: set to NONE + s->mv0_threshold= 256; + s->b_sensitivity= 40; + s->compression_level = FF_COMPRESSION_DEFAULT; + s->use_lpc = -1; + s->min_prediction_order = -1; + s->max_prediction_order = -1; + s->prediction_order_method = -1; + s->min_partition_order = -1; + s->max_partition_order = -1; s->intra_quant_bias= FF_DEFAULT_QUANT_BIAS; s->inter_quant_bias= FF_DEFAULT_QUANT_BIAS; @@ -843,9 +869,6 @@ int avcodec_open(AVCodecContext *avctx, AVCodec *codec) if(avctx->codec) goto end; - avctx->codec = codec; - avctx->codec_id = codec->id; - avctx->frame_number = 0; if (codec->priv_data_size > 0) { avctx->priv_data = av_mallocz(codec->priv_data_size); if (!avctx->priv_data) @@ -864,9 +887,13 @@ int avcodec_open(AVCodecContext *avctx, AVCodec *codec) goto end; } + avctx->codec = codec; + avctx->codec_id = codec->id; + avctx->frame_number = 0; ret = avctx->codec->init(avctx); if (ret < 0) { av_freep(&avctx->priv_data); + avctx->codec= NULL; goto end; } ret=0; @@ -1216,6 +1243,15 @@ unsigned avcodec_build( void ) return LIBAVCODEC_BUILD; } +static void init_crcs(void){ + av_crc04C11DB7= av_mallocz_static(sizeof(AVCRC) * 257); + av_crc8005 = av_mallocz_static(sizeof(AVCRC) * 257); + av_crc07 = av_mallocz_static(sizeof(AVCRC) * 257); + av_crc_init(av_crc04C11DB7, 0, 32, 0x04c11db7, sizeof(AVCRC)*257); + av_crc_init(av_crc8005 , 0, 16, 0x8005 , sizeof(AVCRC)*257); + av_crc_init(av_crc07 , 0, 8, 0x07 , sizeof(AVCRC)*257); +} + /* must be called before any other functions */ void avcodec_init(void) { @@ -1226,6 +1262,7 @@ void avcodec_init(void) inited = 1; dsputil_static_init(); + init_crcs(); } /** @@ -1266,55 +1303,39 @@ char av_get_pict_type_char(int pict_type){ } } -/* av_log API */ - -static int av_log_level = AV_LOG_INFO; - -static void av_log_default_callback(void* ptr, int level, const char* fmt, va_list vl) -{ - static int print_prefix=1; - AVClass* avc= ptr ? *(AVClass**)ptr : NULL; - if(level>av_log_level) - return; -/* #undef fprintf */ - if(print_prefix && avc) { - fprintf(stderr, "[%s @ %p]", avc->item_name(ptr), avc); +int av_get_bits_per_sample(enum CodecID codec_id){ + switch(codec_id){ + case CODEC_ID_ADPCM_SBPRO_2: + return 2; + case CODEC_ID_ADPCM_SBPRO_3: + return 3; + case CODEC_ID_ADPCM_SBPRO_4: + case CODEC_ID_ADPCM_CT: + return 4; + case CODEC_ID_PCM_ALAW: + case CODEC_ID_PCM_MULAW: + case CODEC_ID_PCM_S8: + case CODEC_ID_PCM_U8: + return 8; + case CODEC_ID_PCM_S16BE: + case CODEC_ID_PCM_S16LE: + case CODEC_ID_PCM_U16BE: + case CODEC_ID_PCM_U16LE: + return 16; + case CODEC_ID_PCM_S24DAUD: + case CODEC_ID_PCM_S24BE: + case CODEC_ID_PCM_S24LE: + case CODEC_ID_PCM_U24BE: + case CODEC_ID_PCM_U24LE: + return 24; + case CODEC_ID_PCM_S32BE: + case CODEC_ID_PCM_S32LE: + case CODEC_ID_PCM_U32BE: + case CODEC_ID_PCM_U32LE: + return 32; + default: + return 0; } -/* #define fprintf please_use_av_log */ - - print_prefix= strstr(fmt, "\n") != NULL; - - vfprintf(stderr, fmt, vl); -} - -static void (*av_log_callback)(void*, int, const char*, va_list) = av_log_default_callback; - -void av_log(void* avcl, int level, const char *fmt, ...) -{ - va_list vl; - va_start(vl, fmt); - av_vlog(avcl, level, fmt, vl); - va_end(vl); -} - -void av_vlog(void* avcl, int level, const char *fmt, va_list vl) -{ - av_log_callback(avcl, level, fmt, vl); -} - -int av_log_get_level(void) -{ - return av_log_level; -} - -void av_log_set_level(int level) -{ - av_log_level = level; -} - -void av_log_set_callback(void (*callback)(void*, int, const char*, va_list)) -{ - av_log_callback = callback; } #if !defined(HAVE_THREADS) @@ -1336,3 +1357,39 @@ unsigned int av_xiphlacing(unsigned char *s, unsigned int v) n++; return n; } + +/* Wrapper to work around the lack of mkstemp() on mingw/cygin. + * Also, tries to create file in /tmp first, if possible. + * *prefix can be a character constant; *filename will be allocated internally. + * Returns file descriptor of opened file (or -1 on error) + * and opened file name in **filename. */ +int av_tempfile(char *prefix, char **filename) { + int fd=-1; +#ifdef __MINGW32__ + *filename = tempnam(".", prefix); +#else + size_t len = strlen(prefix) + 12; /* room for "/tmp/" and "XXXXXX\0" */ + *filename = av_malloc(len); +#endif + /* -----common section-----*/ + if (*filename == NULL) { + av_log(NULL, AV_LOG_ERROR, "ff_tempfile: Cannot allocate file name\n"); + return -1; + } +#ifdef __MINGW32__ + fd = open(*filename, _O_RDWR | _O_BINARY | _O_CREAT, 0444); +#else + snprintf(*filename, len, "/tmp/%sXXXXXX", prefix); + fd = mkstemp(*filename); + if (fd < 0) { + snprintf(*filename, len, "./%sXXXXXX", prefix); + fd = mkstemp(*filename); + } +#endif + /* -----common section-----*/ + if (fd < 0) { + av_log(NULL, AV_LOG_ERROR, "ff_tempfile: Cannot open temporary file %s\n", *filename); + return -1; + } + return fd; /* success */ +} diff --git a/src/libffmpeg/libavcodec/vmdav.c b/src/libffmpeg/libavcodec/vmdav.c index 34685b676..b850a09f9 100644 --- a/src/libffmpeg/libavcodec/vmdav.c +++ b/src/libffmpeg/libavcodec/vmdav.c @@ -414,16 +414,28 @@ typedef struct VmdAudioContext { int channels; int bits; int block_align; - unsigned char steps8[16]; - unsigned short steps16[16]; - unsigned short steps128[256]; - short predictors[2]; + int predictors[2]; } VmdAudioContext; +static uint16_t vmdaudio_table[128] = { + 0x000, 0x008, 0x010, 0x020, 0x030, 0x040, 0x050, 0x060, 0x070, 0x080, + 0x090, 0x0A0, 0x0B0, 0x0C0, 0x0D0, 0x0E0, 0x0F0, 0x100, 0x110, 0x120, + 0x130, 0x140, 0x150, 0x160, 0x170, 0x180, 0x190, 0x1A0, 0x1B0, 0x1C0, + 0x1D0, 0x1E0, 0x1F0, 0x200, 0x208, 0x210, 0x218, 0x220, 0x228, 0x230, + 0x238, 0x240, 0x248, 0x250, 0x258, 0x260, 0x268, 0x270, 0x278, 0x280, + 0x288, 0x290, 0x298, 0x2A0, 0x2A8, 0x2B0, 0x2B8, 0x2C0, 0x2C8, 0x2D0, + 0x2D8, 0x2E0, 0x2E8, 0x2F0, 0x2F8, 0x300, 0x308, 0x310, 0x318, 0x320, + 0x328, 0x330, 0x338, 0x340, 0x348, 0x350, 0x358, 0x360, 0x368, 0x370, + 0x378, 0x380, 0x388, 0x390, 0x398, 0x3A0, 0x3A8, 0x3B0, 0x3B8, 0x3C0, + 0x3C8, 0x3D0, 0x3D8, 0x3E0, 0x3E8, 0x3F0, 0x3F8, 0x400, 0x440, 0x480, + 0x4C0, 0x500, 0x540, 0x580, 0x5C0, 0x600, 0x640, 0x680, 0x6C0, 0x700, + 0x740, 0x780, 0x7C0, 0x800, 0x900, 0xA00, 0xB00, 0xC00, 0xD00, 0xE00, + 0xF00, 0x1000, 0x1400, 0x1800, 0x1C00, 0x2000, 0x3000, 0x4000 +}; + static int vmdaudio_decode_init(AVCodecContext *avctx) { VmdAudioContext *s = (VmdAudioContext *)avctx->priv_data; - int i; s->avctx = avctx; s->channels = avctx->channels; @@ -433,53 +445,25 @@ static int vmdaudio_decode_init(AVCodecContext *avctx) av_log(s->avctx, AV_LOG_DEBUG, "%d channels, %d bits/sample, block align = %d, sample rate = %d\n", s->channels, s->bits, s->block_align, avctx->sample_rate); - /* set up the steps8 and steps16 tables */ - for (i = 0; i < 8; i++) { - if (i < 4) - s->steps8[i] = i; - else - s->steps8[i] = s->steps8[i - 1] + i - 1; - - if (i == 0) - s->steps16[i] = 0; - else if (i == 1) - s->steps16[i] = 4; - else if (i == 2) - s->steps16[i] = 16; - else - s->steps16[i] = 1 << (i + 4); - } - - /* set up the step128 table */ - s->steps128[0] = 0; - s->steps128[1] = 8; - for (i = 0x02; i <= 0x20; i++) - s->steps128[i] = (i - 1) << 4; - for (i = 0x21; i <= 0x60; i++) - s->steps128[i] = (i + 0x1F) << 3; - for (i = 0x61; i <= 0x70; i++) - s->steps128[i] = (i - 0x51) << 6; - for (i = 0x71; i <= 0x78; i++) - s->steps128[i] = (i - 0x69) << 8; - for (i = 0x79; i <= 0x7D; i++) - s->steps128[i] = (i - 0x75) << 10; - s->steps128[0x7E] = 0x3000; - s->steps128[0x7F] = 0x4000; - - /* set up the negative half of each table */ - for (i = 0; i < 8; i++) { - s->steps8[i + 8] = -s->steps8[i]; - s->steps16[i + 8] = -s->steps16[i]; - } - for (i = 0; i < 128; i++) - s->steps128[i + 128] = -s->steps128[i]; - return 0; } static void vmdaudio_decode_audio(VmdAudioContext *s, unsigned char *data, - uint8_t *buf, int ratio) { + uint8_t *buf, int stereo) +{ + int i; + int chan = 0; + int16_t *out = (int16_t*)data; + for(i = 0; i < s->block_align; i++) { + if(buf[i] & 0x80) + s->predictors[chan] -= vmdaudio_table[buf[i] & 0x7F]; + else + s->predictors[chan] += vmdaudio_table[buf[i]]; + s->predictors[chan] = clip(s->predictors[chan], -32768, 32767); + out[i] = s->predictors[chan]; + chan ^= stereo; + } } static int vmdaudio_loadsound(VmdAudioContext *s, unsigned char *data, @@ -488,44 +472,39 @@ static int vmdaudio_loadsound(VmdAudioContext *s, unsigned char *data, int bytes_decoded = 0; int i; - if (silence) - av_log(s->avctx, AV_LOG_INFO, "silent block!\n"); +// if (silence) +// av_log(s->avctx, AV_LOG_INFO, "silent block!\n"); if (s->channels == 2) { /* stereo handling */ - if ((s->block_align & 0x01) == 0) { - if (silence) - memset(data, 0, s->block_align * 2); - else - vmdaudio_decode_audio(s, data, buf, 1); + if (silence) { + memset(data, 0, s->block_align * 2); } else { - if (silence) - memset(data, 0, s->block_align * 2); - else + if (s->bits == 16) vmdaudio_decode_audio(s, data, buf, 1); + else + /* copy the data but convert it to signed */ + for (i = 0; i < s->block_align; i++) + data[i * 2 + 1] = buf[i] + 0x80; } } else { + bytes_decoded = s->block_align * 2; /* mono handling */ if (silence) { + memset(data, 0, s->block_align * 2); + } else { if (s->bits == 16) { - memset(data, 0, s->block_align * 2); - bytes_decoded = s->block_align * 2; + vmdaudio_decode_audio(s, data, buf, 0); } else { -// memset(data, 0x00, s->block_align); -// bytes_decoded = s->block_align; -memset(data, 0x00, s->block_align * 2); -bytes_decoded = s->block_align * 2; + /* copy the data but convert it to signed */ + for (i = 0; i < s->block_align; i++) + data[i * 2 + 1] = buf[i] + 0x80; } - } else { - /* copy the data but convert it to signed */ - for (i = 0; i < s->block_align; i++) - data[i * 2 + 1] = buf[i] + 0x80; - bytes_decoded = s->block_align * 2; } } - return bytes_decoded; + return s->block_align * 2; } static int vmdaudio_decode_frame(AVCodecContext *avctx, diff --git a/src/libffmpeg/libavcodec/vorbis.c b/src/libffmpeg/libavcodec/vorbis.c index 9cc09bed1..de3688c91 100644 --- a/src/libffmpeg/libavcodec/vorbis.c +++ b/src/libffmpeg/libavcodec/vorbis.c @@ -20,6 +20,8 @@ */ #undef V_DEBUG +//#define V_DEBUG +//#define AV_DEBUG(...) av_log(NULL, AV_LOG_INFO, __VA_ARGS__) #include <math.h> @@ -473,7 +475,7 @@ static int vorbis_parse_setup_hdr_floors(vorbis_context *vc) { } for(k=0;k<(1<<floor_setup->data.t1.class_subclasses[j]);++k) { - floor_setup->data.t1.subclass_books[j][k]=get_bits(gb, 8)-1; + floor_setup->data.t1.subclass_books[j][k]=(int16_t)get_bits(gb, 8)-1; AV_DEBUG(" book %d. : %d \n", k, floor_setup->data.t1.subclass_books[j][k]); } @@ -872,10 +874,17 @@ static int vorbis_parse_id_hdr(vorbis_context *vc){ bl1=get_bits(gb, 4); vc->blocksize_0=(1<<bl0); vc->blocksize_1=(1<<bl1); - if (bl0>13 || bl0<6 || bl1>13 || bl1<6) { + if (bl0>13 || bl0<6 || bl1>13 || bl1<6 || bl1<bl0) { av_log(vc->avccontext, AV_LOG_ERROR, " Vorbis id header packet corrupt (illegal blocksize). \n"); return 3; } + // output format int16 + if (vc->blocksize_1/2 * vc->audio_channels * 2 > + AVCODEC_MAX_AUDIO_FRAME_SIZE) { + av_log(vc->avccontext, AV_LOG_ERROR, "Vorbis channel count makes " + "output packets too large.\n"); + return 4; + } vc->swin=vwin[bl0-6]; vc->lwin=vwin[bl1-6]; @@ -1345,12 +1354,14 @@ static int vorbis_residue_decode(vorbis_context *vc, vorbis_residue *vr, uint_fa AV_DEBUG("Classword: %d \n", temp); - assert(vr->classifications > 1 && vr->classifications<256 && temp<=65536); //needed for inverse[] + assert(vr->classifications > 1 && temp<=65536); //needed for inverse[] for(i=0;i<c_p_c;++i) { uint_fast32_t temp2; temp2=(((uint_fast64_t)temp) * inverse[vr->classifications])>>32; - classifs[j_times_ptns_to_read+partition_count+c_p_c-1-i]=temp-temp2*vr->classifications; + if (partition_count+c_p_c-1-i < ptns_to_read) { + classifs[j_times_ptns_to_read+partition_count+c_p_c-1-i]=temp-temp2*vr->classifications; + } temp=temp2; } } diff --git a/src/libffmpeg/libavcodec/vp3.c b/src/libffmpeg/libavcodec/vp3.c index a7a9e8bac..b5cfbb02c 100644 --- a/src/libffmpeg/libavcodec/vp3.c +++ b/src/libffmpeg/libavcodec/vp3.c @@ -229,6 +229,8 @@ typedef struct Vp3DecodeContext { DSPContext dsp; int flipped_image; + int qis[3]; + int nqis; int quality_index; int last_quality_index; @@ -254,17 +256,17 @@ typedef struct Vp3DecodeContext { Vp3Fragment *all_fragments; Coeff *coeffs; Coeff *next_coeff; - int u_fragment_start; - int v_fragment_start; + int fragment_start[3]; ScanTable scantable; /* tables */ uint16_t coded_dc_scale_factor[64]; uint32_t coded_ac_scale_factor[64]; - uint16_t coded_intra_y_dequant[64]; - uint16_t coded_intra_c_dequant[64]; - uint16_t coded_inter_dequant[64]; + uint8_t base_matrix[384][64]; + uint8_t qr_count[2][3]; + uint8_t qr_size [2][3][64]; + uint16_t qr_base[2][3][64]; /* this is a list of indices into the all_fragments array indicating * which of the fragments are coded */ @@ -285,9 +287,7 @@ typedef struct Vp3DecodeContext { /* these arrays need to be on 16-byte boundaries since SSE2 operations * index into them */ - DECLARE_ALIGNED_16(int16_t, intra_y_dequant[64]); - DECLARE_ALIGNED_16(int16_t, intra_c_dequant[64]); - DECLARE_ALIGNED_16(int16_t, inter_dequant[64]); + DECLARE_ALIGNED_16(int16_t, qmat[2][4][64]); //<qmat[is_inter][plane] /* This table contains superblock_count * 16 entries. Each set of 16 * numbers corresponds to the fragment indices 0..15 of the superblock. @@ -328,8 +328,7 @@ typedef struct Vp3DecodeContext { int bounding_values_array[256]; } Vp3DecodeContext; -static int theora_decode_comments(AVCodecContext *avctx, GetBitContext gb); -static int theora_decode_tables(AVCodecContext *avctx, GetBitContext gb); +static int theora_decode_tables(AVCodecContext *avctx, GetBitContext *gb); /************************************************************************ * VP3 specific functions @@ -345,8 +344,6 @@ static int theora_decode_tables(AVCodecContext *avctx, GetBitContext gb); static int init_block_mapping(Vp3DecodeContext *s) { int i, j; - signed int hilbert_walk_y[16]; - signed int hilbert_walk_c[16]; signed int hilbert_walk_mb[4]; int current_fragment = 0; @@ -385,41 +382,6 @@ static int init_block_mapping(Vp3DecodeContext *s) debug_vp3(" vp3: initialize block mapping tables\n"); - /* figure out hilbert pattern per these frame dimensions */ - hilbert_walk_y[0] = 1; - hilbert_walk_y[1] = 1; - hilbert_walk_y[2] = s->fragment_width; - hilbert_walk_y[3] = -1; - hilbert_walk_y[4] = s->fragment_width; - hilbert_walk_y[5] = s->fragment_width; - hilbert_walk_y[6] = 1; - hilbert_walk_y[7] = -s->fragment_width; - hilbert_walk_y[8] = 1; - hilbert_walk_y[9] = s->fragment_width; - hilbert_walk_y[10] = 1; - hilbert_walk_y[11] = -s->fragment_width; - hilbert_walk_y[12] = -s->fragment_width; - hilbert_walk_y[13] = -1; - hilbert_walk_y[14] = -s->fragment_width; - hilbert_walk_y[15] = 1; - - hilbert_walk_c[0] = 1; - hilbert_walk_c[1] = 1; - hilbert_walk_c[2] = s->fragment_width / 2; - hilbert_walk_c[3] = -1; - hilbert_walk_c[4] = s->fragment_width / 2; - hilbert_walk_c[5] = s->fragment_width / 2; - hilbert_walk_c[6] = 1; - hilbert_walk_c[7] = -s->fragment_width / 2; - hilbert_walk_c[8] = 1; - hilbert_walk_c[9] = s->fragment_width / 2; - hilbert_walk_c[10] = 1; - hilbert_walk_c[11] = -s->fragment_width / 2; - hilbert_walk_c[12] = -s->fragment_width / 2; - hilbert_walk_c[13] = -1; - hilbert_walk_c[14] = -s->fragment_width / 2; - hilbert_walk_c[15] = 1; - hilbert_walk_mb[0] = 1; hilbert_walk_mb[1] = s->macroblock_width; hilbert_walk_mb[2] = 1; @@ -440,7 +402,6 @@ static int init_block_mapping(Vp3DecodeContext *s) current_height = 0; superblock_row_inc = 3 * s->fragment_width - (s->y_superblock_width * 4 - s->fragment_width); - hilbert = hilbert_walk_y; /* the first operation for this variable is to advance by 1 */ current_fragment = -1; @@ -454,10 +415,9 @@ static int init_block_mapping(Vp3DecodeContext *s) current_height = 0; superblock_row_inc = 3 * (s->fragment_width / 2) - (s->c_superblock_width * 4 - s->fragment_width / 2); - hilbert = hilbert_walk_c; /* the first operation for this variable is to advance by 1 */ - current_fragment = s->u_fragment_start - 1; + current_fragment = s->fragment_start[1] - 1; } else if (i == s->v_superblock_start) { @@ -468,10 +428,9 @@ static int init_block_mapping(Vp3DecodeContext *s) current_height = 0; superblock_row_inc = 3 * (s->fragment_width / 2) - (s->c_superblock_width * 4 - s->fragment_width / 2); - hilbert = hilbert_walk_c; /* the first operation for this variable is to advance by 1 */ - current_fragment = s->v_fragment_start - 1; + current_fragment = s->fragment_start[2] - 1; } @@ -486,7 +445,7 @@ static int init_block_mapping(Vp3DecodeContext *s) /* iterate through all 16 fragments in a superblock */ for (j = 0; j < 16; j++) { - current_fragment += hilbert[j]; + current_fragment += travel_width[j] + right_edge * travel_height[j]; current_width += travel_width[j]; current_height += travel_height[j]; @@ -593,13 +552,13 @@ static int init_block_mapping(Vp3DecodeContext *s) s->macroblock_fragments[mapping_index++] = -1; /* C planes */ - c_fragment = s->u_fragment_start + + c_fragment = s->fragment_start[1] + (i * s->fragment_width / 4) + (j / 2); s->all_fragments[c_fragment].macroblock = s->macroblock_count; s->macroblock_fragments[mapping_index++] = c_fragment; debug_init("%d ", c_fragment); - c_fragment = s->v_fragment_start + + c_fragment = s->fragment_start[2] + (i * s->fragment_width / 4) + (j / 2); s->all_fragments[c_fragment].macroblock = s->macroblock_count; s->macroblock_fragments[mapping_index++] = c_fragment; @@ -646,94 +605,38 @@ static void init_frame(Vp3DecodeContext *s, GetBitContext *gb) */ static void init_dequantizer(Vp3DecodeContext *s) { - int ac_scale_factor = s->coded_ac_scale_factor[s->quality_index]; int dc_scale_factor = s->coded_dc_scale_factor[s->quality_index]; - int i, j; + int i, j, plane, inter, qri, bmi, bmj, qistart; debug_vp3(" vp3: initializing dequantization tables\n"); - /* - * Scale dequantizers: - * - * quantizer * sf - * -------------- - * 100 - * - * where sf = dc_scale_factor for DC quantizer - * or ac_scale_factor for AC quantizer - * - * Then, saturate the result to a lower limit of MIN_DEQUANT_VAL. - */ -#define SCALER 4 - - /* scale DC quantizers */ - s->intra_y_dequant[0] = s->coded_intra_y_dequant[0] * dc_scale_factor / 100; - if (s->intra_y_dequant[0] < MIN_DEQUANT_VAL * 2) - s->intra_y_dequant[0] = MIN_DEQUANT_VAL * 2; - s->intra_y_dequant[0] *= SCALER; - - s->intra_c_dequant[0] = s->coded_intra_c_dequant[0] * dc_scale_factor / 100; - if (s->intra_c_dequant[0] < MIN_DEQUANT_VAL * 2) - s->intra_c_dequant[0] = MIN_DEQUANT_VAL * 2; - s->intra_c_dequant[0] *= SCALER; - - s->inter_dequant[0] = s->coded_inter_dequant[0] * dc_scale_factor / 100; - if (s->inter_dequant[0] < MIN_DEQUANT_VAL * 4) - s->inter_dequant[0] = MIN_DEQUANT_VAL * 4; - s->inter_dequant[0] *= SCALER; - - /* scale AC quantizers, zigzag at the same time in preparation for - * the dequantization phase */ - for (i = 1; i < 64; i++) { - int k= s->scantable.scantable[i]; - j = s->scantable.permutated[i]; - - s->intra_y_dequant[j] = s->coded_intra_y_dequant[k] * ac_scale_factor / 100; - if (s->intra_y_dequant[j] < MIN_DEQUANT_VAL) - s->intra_y_dequant[j] = MIN_DEQUANT_VAL; - s->intra_y_dequant[j] *= SCALER; - - s->intra_c_dequant[j] = s->coded_intra_c_dequant[k] * ac_scale_factor / 100; - if (s->intra_c_dequant[j] < MIN_DEQUANT_VAL) - s->intra_c_dequant[j] = MIN_DEQUANT_VAL; - s->intra_c_dequant[j] *= SCALER; - - s->inter_dequant[j] = s->coded_inter_dequant[k] * ac_scale_factor / 100; - if (s->inter_dequant[j] < MIN_DEQUANT_VAL * 2) - s->inter_dequant[j] = MIN_DEQUANT_VAL * 2; - s->inter_dequant[j] *= SCALER; + for(inter=0; inter<2; inter++){ + for(plane=0; plane<3; plane++){ + int sum=0; + for(qri=0; qri<s->qr_count[inter][plane]; qri++){ + sum+= s->qr_size[inter][plane][qri]; + if(s->quality_index <= sum) + break; + } + qistart= sum - s->qr_size[inter][plane][qri]; + bmi= s->qr_base[inter][plane][qri ]; + bmj= s->qr_base[inter][plane][qri+1]; + for(i=0; i<64; i++){ + int coeff= ( 2*(sum -s->quality_index)*s->base_matrix[bmi][i] + - 2*(qistart-s->quality_index)*s->base_matrix[bmj][i] + + s->qr_size[inter][plane][qri]) + / (2*s->qr_size[inter][plane][qri]); + + int qmin= 8<<(inter + !i); + int qscale= i ? ac_scale_factor : dc_scale_factor; + + s->qmat[inter][plane][i]= clip((qscale * coeff)/100 * 4, qmin, 4096); + } + } } - memset(s->qscale_table, (FFMAX(s->intra_y_dequant[1], s->intra_c_dequant[1])+8)/16, 512); //FIXME finetune - - /* print debug information as requested */ - debug_dequantizers("intra Y dequantizers:\n"); - for (i = 0; i < 8; i++) { - for (j = i * 8; j < i * 8 + 8; j++) { - debug_dequantizers(" %4d,", s->intra_y_dequant[j]); - } - debug_dequantizers("\n"); - } - debug_dequantizers("\n"); - - debug_dequantizers("intra C dequantizers:\n"); - for (i = 0; i < 8; i++) { - for (j = i * 8; j < i * 8 + 8; j++) { - debug_dequantizers(" %4d,", s->intra_c_dequant[j]); - } - debug_dequantizers("\n"); - } - debug_dequantizers("\n"); - - debug_dequantizers("interframe dequantizers:\n"); - for (i = 0; i < 8; i++) { - for (j = i * 8; j < i * 8 + 8; j++) { - debug_dequantizers(" %4d,", s->inter_dequant[j]); - } - debug_dequantizers("\n"); - } - debug_dequantizers("\n"); + memset(s->qscale_table, (FFMAX(s->qmat[0][0][1], s->qmat[0][1][1])+8)/16, 512); //FIXME finetune } /* @@ -903,7 +806,7 @@ static int unpack_superblocks(Vp3DecodeContext *s, GetBitContext *gb) s->all_fragments[current_fragment].next_coeff= s->coeffs + current_fragment; s->coded_fragment_list[s->coded_fragment_list_index] = current_fragment; - if ((current_fragment >= s->u_fragment_start) && + if ((current_fragment >= s->fragment_start[1]) && (s->last_coded_y_fragment == -1) && (!first_c_fragment_seen)) { s->first_coded_c_fragment = s->coded_fragment_list_index; @@ -931,7 +834,7 @@ static int unpack_superblocks(Vp3DecodeContext *s, GetBitContext *gb) s->all_fragments[current_fragment].next_coeff= s->coeffs + current_fragment; s->coded_fragment_list[s->coded_fragment_list_index] = current_fragment; - if ((current_fragment >= s->u_fragment_start) && + if ((current_fragment >= s->fragment_start[1]) && (s->last_coded_y_fragment == -1) && (!first_c_fragment_seen)) { s->first_coded_c_fragment = s->coded_fragment_list_index; @@ -1146,17 +1049,10 @@ static int unpack_vectors(Vp3DecodeContext *s, GetBitContext *gb) motion_y[4] += motion_y[k]; } - if (motion_x[4] >= 0) - motion_x[4] = (motion_x[4] + 2) / 4; - else - motion_x[4] = (motion_x[4] - 2) / 4; - motion_x[5] = motion_x[4]; - - if (motion_y[4] >= 0) - motion_y[4] = (motion_y[4] + 2) / 4; - else - motion_y[4] = (motion_y[4] - 2) / 4; - motion_y[5] = motion_y[4]; + motion_x[5]= + motion_x[4]= RSHIFT(motion_x[4], 2); + motion_y[5]= + motion_y[4]= RSHIFT(motion_y[4], 2); /* vector maintenance; vector[3] is treated as the * last vector in this case */ @@ -1416,7 +1312,6 @@ static int unpack_dct_coeffs(Vp3DecodeContext *s, GetBitContext *gb) (compatible_frame[s->all_fragments[x].coding_method] == current_frame_type) #define FRAME_CODED(x) (s->all_fragments[x].coding_method != MODE_COPY) #define DC_COEFF(u) (s->coeffs[u].index ? 0 : s->coeffs[u].coeff) //FIXME do somethin to simplify this -static inline int iabs (int x) { return ((x < 0) ? -x : x); } static void reverse_dc_prediction(Vp3DecodeContext *s, int first_fragment, @@ -1432,24 +1327,8 @@ static void reverse_dc_prediction(Vp3DecodeContext *s, int x, y; int i = first_fragment; - /* - * Fragment prediction groups: - * - * 32222222226 - * 10000000004 - * 10000000004 - * 10000000004 - * 10000000004 - * - * Note: Groups 5 and 7 do not exist as it would mean that the - * fragment's x coordinate is both 0 and (width - 1) at the same time. - */ - int predictor_group; short predicted_dc; - /* validity flags for the left, up-left, up, and up-right fragments */ - int fl, ful, fu, fur; - /* DC values for the left, up-left, up, and up-right fragments */ int vl, vul, vu, vur; @@ -1462,26 +1341,24 @@ static void reverse_dc_prediction(Vp3DecodeContext *s, * 1: up multiplier * 2: up-right multiplier * 3: left multiplier - * 4: mask - * 5: right bit shift divisor (e.g., 7 means >>=7, a.k.a. div by 128) */ - int predictor_transform[16][6] = { - { 0, 0, 0, 0, 0, 0 }, - { 0, 0, 0, 1, 0, 0 }, // PL - { 0, 0, 1, 0, 0, 0 }, // PUR - { 0, 0, 53, 75, 127, 7 }, // PUR|PL - { 0, 1, 0, 0, 0, 0 }, // PU - { 0, 1, 0, 1, 1, 1 }, // PU|PL - { 0, 1, 0, 0, 0, 0 }, // PU|PUR - { 0, 0, 53, 75, 127, 7 }, // PU|PUR|PL - { 1, 0, 0, 0, 0, 0 }, // PUL - { 0, 0, 0, 1, 0, 0 }, // PUL|PL - { 1, 0, 1, 0, 1, 1 }, // PUL|PUR - { 0, 0, 53, 75, 127, 7 }, // PUL|PUR|PL - { 0, 1, 0, 0, 0, 0 }, // PUL|PU - {-26, 29, 0, 29, 31, 5 }, // PUL|PU|PL - { 3, 10, 3, 0, 15, 4 }, // PUL|PU|PUR - {-26, 29, 0, 29, 31, 5 } // PUL|PU|PUR|PL + int predictor_transform[16][4] = { + { 0, 0, 0, 0}, + { 0, 0, 0,128}, // PL + { 0, 0,128, 0}, // PUR + { 0, 0, 53, 75}, // PUR|PL + { 0,128, 0, 0}, // PU + { 0, 64, 0, 64}, // PU|PL + { 0,128, 0, 0}, // PU|PUR + { 0, 0, 53, 75}, // PU|PUR|PL + {128, 0, 0, 0}, // PUL + { 0, 0, 0,128}, // PUL|PL + { 64, 0, 64, 0}, // PUL|PUR + { 0, 0, 53, 75}, // PUL|PUR|PL + { 0,128, 0, 0}, // PUL|PU + {-104,116, 0,116}, // PUL|PU|PL + { 24, 80, 24, 0}, // PUL|PU|PUR + {-104,116, 0,116} // PUL|PU|PUR|PL }; /* This table shows which types of blocks can use other blocks for @@ -1523,113 +1400,33 @@ static void reverse_dc_prediction(Vp3DecodeContext *s, current_frame_type = compatible_frame[s->all_fragments[i].coding_method]; - predictor_group = (x == 0) + ((y == 0) << 1) + - ((x + 1 == fragment_width) << 2); - debug_dc_pred(" frag %d: group %d, orig DC = %d, ", - i, predictor_group, DC_COEFF(i)); - - switch (predictor_group) { - - case 0: - /* main body of fragments; consider all 4 possible - * fragments for prediction */ - - /* calculate the indices of the predicting fragments */ - ul = i - fragment_width - 1; - u = i - fragment_width; - ur = i - fragment_width + 1; - l = i - 1; - - /* fetch the DC values for the predicting fragments */ - vul = DC_COEFF(ul); - vu = DC_COEFF(u); - vur = DC_COEFF(ur); - vl = DC_COEFF(l); - - /* figure out which fragments are valid */ - ful = FRAME_CODED(ul) && COMPATIBLE_FRAME(ul); - fu = FRAME_CODED(u) && COMPATIBLE_FRAME(u); - fur = FRAME_CODED(ur) && COMPATIBLE_FRAME(ur); - fl = FRAME_CODED(l) && COMPATIBLE_FRAME(l); - - /* decide which predictor transform to use */ - transform = (fl*PL) | (fu*PU) | (ful*PUL) | (fur*PUR); - - break; - - case 1: - /* left column of fragments, not including top corner; - * only consider up and up-right fragments */ + debug_dc_pred(" frag %d: orig DC = %d, ", + i, DC_COEFF(i)); - /* calculate the indices of the predicting fragments */ - u = i - fragment_width; - ur = i - fragment_width + 1; - - /* fetch the DC values for the predicting fragments */ - vu = DC_COEFF(u); - vur = DC_COEFF(ur); - - /* figure out which fragments are valid */ - fur = FRAME_CODED(ur) && COMPATIBLE_FRAME(ur); - fu = FRAME_CODED(u) && COMPATIBLE_FRAME(u); - - /* decide which predictor transform to use */ - transform = (fu*PU) | (fur*PUR); - - break; - - case 2: - case 6: - /* top row of fragments, not including top-left frag; - * only consider the left fragment for prediction */ - - /* calculate the indices of the predicting fragments */ - l = i - 1; - - /* fetch the DC values for the predicting fragments */ + transform= 0; + if(x){ + l= i-1; vl = DC_COEFF(l); - - /* figure out which fragments are valid */ - fl = FRAME_CODED(l) && COMPATIBLE_FRAME(l); - - /* decide which predictor transform to use */ - transform = (fl*PL); - - break; - - case 3: - /* top-left fragment */ - - /* nothing to predict from in this case */ - transform = 0; - - break; - - case 4: - /* right column of fragments, not including top corner; - * consider up-left, up, and left fragments for - * prediction */ - - /* calculate the indices of the predicting fragments */ - ul = i - fragment_width - 1; - u = i - fragment_width; - l = i - 1; - - /* fetch the DC values for the predicting fragments */ - vul = DC_COEFF(ul); + if(FRAME_CODED(l) && COMPATIBLE_FRAME(l)) + transform |= PL; + } + if(y){ + u= i-fragment_width; vu = DC_COEFF(u); - vl = DC_COEFF(l); - - /* figure out which fragments are valid */ - ful = FRAME_CODED(ul) && COMPATIBLE_FRAME(ul); - fu = FRAME_CODED(u) && COMPATIBLE_FRAME(u); - fl = FRAME_CODED(l) && COMPATIBLE_FRAME(l); - - /* decide which predictor transform to use */ - transform = (fl*PL) | (fu*PU) | (ful*PUL); - - break; - + if(FRAME_CODED(u) && COMPATIBLE_FRAME(u)) + transform |= PU; + if(x){ + ul= i-fragment_width-1; + vul = DC_COEFF(ul); + if(FRAME_CODED(ul) && COMPATIBLE_FRAME(ul)) + transform |= PUL; + } + if(x + 1 < fragment_width){ + ur= i-fragment_width+1; + vur = DC_COEFF(ur); + if(FRAME_CODED(ur) && COMPATIBLE_FRAME(ur)) + transform |= PUR; + } } debug_dc_pred("transform = %d, ", transform); @@ -1651,22 +1448,16 @@ static void reverse_dc_prediction(Vp3DecodeContext *s, (predictor_transform[transform][2] * vur) + (predictor_transform[transform][3] * vl); - /* if there is a shift value in the transform, add - * the sign bit before the shift */ - if (predictor_transform[transform][5] != 0) { - predicted_dc += ((predicted_dc >> 15) & - predictor_transform[transform][4]); - predicted_dc >>= predictor_transform[transform][5]; - } + predicted_dc /= 128; /* check for outranging on the [ul u l] and * [ul u ur l] predictors */ if ((transform == 13) || (transform == 15)) { - if (iabs(predicted_dc - vu) > 128) + if (ABS(predicted_dc - vu) > 128) predicted_dc = vu; - else if (iabs(predicted_dc - vl) > 128) + else if (ABS(predicted_dc - vl) > 128) predicted_dc = vl; - else if (iabs(predicted_dc - vul) > 128) + else if (ABS(predicted_dc - vul) > 128) predicted_dc = vul; } @@ -1707,73 +1498,32 @@ static void vertical_filter(unsigned char *first_pixel, int stride, */ static void render_slice(Vp3DecodeContext *s, int slice) { - int x, y; + int x; int m, n; - int i; /* indicates current fragment */ int16_t *dequantizer; DECLARE_ALIGNED_16(DCTELEM, block[64]); - unsigned char *output_plane; - unsigned char *last_plane; - unsigned char *golden_plane; - int stride; int motion_x = 0xdeadbeef, motion_y = 0xdeadbeef; - int upper_motion_limit, lower_motion_limit; int motion_halfpel_index; uint8_t *motion_source; int plane; - int plane_width; - int plane_height; - int slice_height; int current_macroblock_entry = slice * s->macroblock_width * 6; - int fragment_width; if (slice >= s->macroblock_height) return; for (plane = 0; plane < 3; plane++) { + uint8_t *output_plane = s->current_frame.data [plane]; + uint8_t * last_plane = s-> last_frame.data [plane]; + uint8_t *golden_plane = s-> golden_frame.data [plane]; + int stride = s->current_frame.linesize[plane]; + int plane_width = s->width >> !!plane; + int plane_height = s->height >> !!plane; + int y = slice * FRAGMENT_PIXELS << !plane ; + int slice_height = y + (FRAGMENT_PIXELS << !plane); + int i = s->macroblock_fragments[current_macroblock_entry + plane + 3*!!plane]; + + if (!s->flipped_image) stride = -stride; - /* set up plane-specific parameters */ - if (plane == 0) { - output_plane = s->current_frame.data[0]; - last_plane = s->last_frame.data[0]; - golden_plane = s->golden_frame.data[0]; - stride = s->current_frame.linesize[0]; - if (!s->flipped_image) stride = -stride; - upper_motion_limit = 7 * s->current_frame.linesize[0]; - lower_motion_limit = s->height * s->current_frame.linesize[0] + s->width - 8; - y = slice * FRAGMENT_PIXELS * 2; - plane_width = s->width; - plane_height = s->height; - slice_height = y + FRAGMENT_PIXELS * 2; - i = s->macroblock_fragments[current_macroblock_entry + 0]; - } else if (plane == 1) { - output_plane = s->current_frame.data[1]; - last_plane = s->last_frame.data[1]; - golden_plane = s->golden_frame.data[1]; - stride = s->current_frame.linesize[1]; - if (!s->flipped_image) stride = -stride; - upper_motion_limit = 7 * s->current_frame.linesize[1]; - lower_motion_limit = (s->height / 2) * s->current_frame.linesize[1] + (s->width / 2) - 8; - y = slice * FRAGMENT_PIXELS; - plane_width = s->width / 2; - plane_height = s->height / 2; - slice_height = y + FRAGMENT_PIXELS; - i = s->macroblock_fragments[current_macroblock_entry + 4]; - } else { - output_plane = s->current_frame.data[2]; - last_plane = s->last_frame.data[2]; - golden_plane = s->golden_frame.data[2]; - stride = s->current_frame.linesize[2]; - if (!s->flipped_image) stride = -stride; - upper_motion_limit = 7 * s->current_frame.linesize[2]; - lower_motion_limit = (s->height / 2) * s->current_frame.linesize[2] + (s->width / 2) - 8; - y = slice * FRAGMENT_PIXELS; - plane_width = s->width / 2; - plane_height = s->height / 2; - slice_height = y + FRAGMENT_PIXELS; - i = s->macroblock_fragments[current_macroblock_entry + 5]; - } - fragment_width = plane_width / FRAGMENT_PIXELS; if(ABS(stride) > 2048) return; //various tables are fixed size @@ -1855,12 +1605,9 @@ static void render_slice(Vp3DecodeContext *s, int slice) motion_source + stride + 1 + d, stride, 8); } - dequantizer = s->inter_dequant; + dequantizer = s->qmat[1][plane]; }else{ - if (plane == 0) - dequantizer = s->intra_y_dequant; - else - dequantizer = s->intra_c_dequant; + dequantizer = s->qmat[0][plane]; } /* dequantize the DCT coefficients */ @@ -1935,7 +1682,7 @@ static void render_slice(Vp3DecodeContext *s, int slice) (s->all_fragments[i - 1].coding_method != MODE_COPY)) )) { horizontal_filter( output_plane + s->all_fragments[i].first_pixel + 7*stride, - -stride, bounding_values); + -stride, s->bounding_values_array + 127); } /* perform the top edge filter if: @@ -1951,7 +1698,7 @@ static void render_slice(Vp3DecodeContext *s, int slice) (s->all_fragments[i - fragment_width].coding_method != MODE_COPY)) )) { vertical_filter( output_plane + s->all_fragments[i].first_pixel - stride, - -stride, bounding_values); + -stride, s->bounding_values_array + 127); } #endif } @@ -1975,7 +1722,7 @@ static void horizontal_filter(unsigned char *first_pixel, int stride, unsigned char *end; int filter_value; - for (end= first_pixel + 8*stride; first_pixel < end; first_pixel += stride) { + for (end= first_pixel + 8*stride; first_pixel != end; first_pixel += stride) { filter_value = (first_pixel[-2] - first_pixel[ 1]) +3*(first_pixel[ 0] - first_pixel[-1]); @@ -2004,11 +1751,8 @@ static void vertical_filter(unsigned char *first_pixel, int stride, static void apply_loop_filter(Vp3DecodeContext *s) { - int x, y, plane; - int width, height; - int fragment; - int stride; - unsigned char *plane_data; + int plane; + int x, y; int *bounding_values= s->bounding_values_array+127; #if 0 @@ -2033,29 +1777,12 @@ static void apply_loop_filter(Vp3DecodeContext *s) #endif for (plane = 0; plane < 3; plane++) { - - if (plane == 0) { - /* Y plane parameters */ - fragment = 0; - width = s->fragment_width; - height = s->fragment_height; - stride = s->current_frame.linesize[0]; - plane_data = s->current_frame.data[0]; - } else if (plane == 1) { - /* U plane parameters */ - fragment = s->u_fragment_start; - width = s->fragment_width / 2; - height = s->fragment_height / 2; - stride = s->current_frame.linesize[1]; - plane_data = s->current_frame.data[1]; - } else { - /* V plane parameters */ - fragment = s->v_fragment_start; - width = s->fragment_width / 2; - height = s->fragment_height / 2; - stride = s->current_frame.linesize[2]; - plane_data = s->current_frame.data[2]; - } + int width = s->fragment_width >> !!plane; + int height = s->fragment_height >> !!plane; + int fragment = s->fragment_start [plane]; + int stride = s->current_frame.linesize[plane]; + uint8_t *plane_data = s->current_frame.data [plane]; + if (!s->flipped_image) stride = -stride; for (y = 0; y < height; y++) { @@ -2065,7 +1792,7 @@ START_TIMER if ((x > 0) && (s->all_fragments[fragment].coding_method != MODE_COPY)) { horizontal_filter( - plane_data + s->all_fragments[fragment].first_pixel - 7*stride, + plane_data + s->all_fragments[fragment].first_pixel, stride, bounding_values); } @@ -2073,7 +1800,7 @@ START_TIMER if ((y > 0) && (s->all_fragments[fragment].coding_method != MODE_COPY)) { vertical_filter( - plane_data + s->all_fragments[fragment].first_pixel + stride, + plane_data + s->all_fragments[fragment].first_pixel, stride, bounding_values); } @@ -2084,7 +1811,7 @@ START_TIMER (s->all_fragments[fragment].coding_method != MODE_COPY) && (s->all_fragments[fragment + 1].coding_method == MODE_COPY)) { horizontal_filter( - plane_data + s->all_fragments[fragment + 1].first_pixel - 7*stride, + plane_data + s->all_fragments[fragment + 1].first_pixel, stride, bounding_values); } @@ -2095,7 +1822,7 @@ START_TIMER (s->all_fragments[fragment].coding_method != MODE_COPY) && (s->all_fragments[fragment + width].coding_method == MODE_COPY)) { vertical_filter( - plane_data + s->all_fragments[fragment + width].first_pixel + stride, + plane_data + s->all_fragments[fragment + width].first_pixel, stride, bounding_values); } @@ -2131,7 +1858,7 @@ static void vp3_calculate_pixel_addresses(Vp3DecodeContext *s) } /* U plane */ - i = s->u_fragment_start; + i = s->fragment_start[1]; for (y = s->fragment_height / 2; y > 0; y--) { for (x = 0; x < s->fragment_width / 2; x++) { s->all_fragments[i++].first_pixel = @@ -2144,7 +1871,7 @@ static void vp3_calculate_pixel_addresses(Vp3DecodeContext *s) } /* V plane */ - i = s->v_fragment_start; + i = s->fragment_start[2]; for (y = s->fragment_height / 2; y > 0; y--) { for (x = 0; x < s->fragment_width / 2; x++) { s->all_fragments[i++].first_pixel = @@ -2178,7 +1905,7 @@ static void theora_calculate_pixel_addresses(Vp3DecodeContext *s) } /* U plane */ - i = s->u_fragment_start; + i = s->fragment_start[1]; for (y = 1; y <= s->fragment_height / 2; y++) { for (x = 0; x < s->fragment_width / 2; x++) { s->all_fragments[i++].first_pixel = @@ -2191,7 +1918,7 @@ static void theora_calculate_pixel_addresses(Vp3DecodeContext *s) } /* V plane */ - i = s->v_fragment_start; + i = s->fragment_start[2]; for (y = 1; y <= s->fragment_height / 2; y++) { for (x = 0; x < s->fragment_width / 2; x++) { s->all_fragments[i++].first_pixel = @@ -2210,7 +1937,7 @@ static void theora_calculate_pixel_addresses(Vp3DecodeContext *s) static int vp3_decode_init(AVCodecContext *avctx) { Vp3DecodeContext *s = avctx->priv_data; - int i; + int i, inter, plane; int c_width; int c_height; int y_superblock_count; @@ -2261,8 +1988,8 @@ static int vp3_decode_init(AVCodecContext *avctx) /* fragment count covers all 8x8 blocks for all 3 planes */ s->fragment_count = s->fragment_width * s->fragment_height * 3 / 2; - s->u_fragment_start = s->fragment_width * s->fragment_height; - s->v_fragment_start = s->fragment_width * s->fragment_height * 5 / 4; + s->fragment_start[1] = s->fragment_width * s->fragment_height; + s->fragment_start[2] = s->fragment_width * s->fragment_height * 5 / 4; debug_init(" Y plane: %d x %d\n", s->width, s->height); debug_init(" C plane: %d x %d\n", c_width, c_height); @@ -2278,8 +2005,8 @@ static int vp3_decode_init(AVCodecContext *avctx) s->fragment_count, s->fragment_width, s->fragment_height, - s->u_fragment_start, - s->v_fragment_start); + s->fragment_start[1], + s->fragment_start[2]); s->all_fragments = av_malloc(s->fragment_count * sizeof(Vp3Fragment)); s->coeffs = av_malloc(s->fragment_count * sizeof(Coeff) * 65); @@ -2293,14 +2020,23 @@ static int vp3_decode_init(AVCodecContext *avctx) for (i = 0; i < 64; i++) s->coded_ac_scale_factor[i] = vp31_ac_scale_factor[i]; for (i = 0; i < 64; i++) - s->coded_intra_y_dequant[i] = vp31_intra_y_dequant[i]; + s->base_matrix[0][i] = vp31_intra_y_dequant[i]; for (i = 0; i < 64; i++) - s->coded_intra_c_dequant[i] = vp31_intra_c_dequant[i]; + s->base_matrix[1][i] = vp31_intra_c_dequant[i]; for (i = 0; i < 64; i++) - s->coded_inter_dequant[i] = vp31_inter_dequant[i]; + s->base_matrix[2][i] = vp31_inter_dequant[i]; for (i = 0; i < 64; i++) s->filter_limit_values[i] = vp31_filter_limit_values[i]; + for(inter=0; inter<2; inter++){ + for(plane=0; plane<3; plane++){ + s->qr_count[inter][plane]= 1; + s->qr_size [inter][plane][0]= 63; + s->qr_base [inter][plane][0]= + s->qr_base [inter][plane][1]= 2*inter + (!!plane)*!inter; + } + } + /* init VLC tables */ for (i = 0; i < 16; i++) { @@ -2418,10 +2154,10 @@ static int vp3_decode_frame(AVCodecContext *avctx, switch(ptype) { case 1: - theora_decode_comments(avctx, gb); + theora_decode_comments(avctx, &gb); break; case 2: - theora_decode_tables(avctx, gb); + theora_decode_tables(avctx, &gb); init_dequantizer(s); break; default: @@ -2435,9 +2171,13 @@ static int vp3_decode_frame(AVCodecContext *avctx, if (!s->theora) skip_bits(&gb, 1); s->last_quality_index = s->quality_index; - s->quality_index = get_bits(&gb, 6); - if (s->theora >= 0x030200) - skip_bits1(&gb); + + s->nqis=0; + do{ + s->qis[s->nqis++]= get_bits(&gb, 6); + } while(s->theora >= 0x030200 && s->nqis<3 && get_bits1(&gb)); + + s->quality_index= s->qis[0]; if (s->avctx->debug & FF_DEBUG_PICT_INFO) av_log(s->avctx, AV_LOG_INFO, " VP3 %sframe #%d: Q index = %d\n", @@ -2486,7 +2226,7 @@ static int vp3_decode_frame(AVCodecContext *avctx, } /* golden frame is also the current frame */ - memcpy(&s->current_frame, &s->golden_frame, sizeof(AVFrame)); + s->current_frame= s->golden_frame; /* time to figure out pixel addresses? */ if (!s->pixel_addresses_inited) @@ -2495,10 +2235,15 @@ static int vp3_decode_frame(AVCodecContext *avctx, vp3_calculate_pixel_addresses(s); else theora_calculate_pixel_addresses(s); + s->pixel_addresses_inited = 1; } } else { /* allocate a new current frame */ s->current_frame.reference = 3; + if (!s->pixel_addresses_inited) { + av_log(s->avctx, AV_LOG_ERROR, "vp3: first frame not a keyframe\n"); + return -1; + } if(avctx->get_buffer(avctx, &s->current_frame) < 0) { av_log(s->avctx, AV_LOG_ERROR, "vp3: get_buffer() failed\n"); return -1; @@ -2553,9 +2298,9 @@ if (!s->keyframe) { reverse_dc_prediction(s, 0, s->fragment_width, s->fragment_height); if ((avctx->flags & CODEC_FLAG_GRAY) == 0) { - reverse_dc_prediction(s, s->u_fragment_start, + reverse_dc_prediction(s, s->fragment_start[1], s->fragment_width / 2, s->fragment_height / 2); - reverse_dc_prediction(s, s->v_fragment_start, + reverse_dc_prediction(s, s->fragment_start[2], s->fragment_width / 2, s->fragment_height / 2); } STOP_TIMER("reverse_dc_prediction")} @@ -2582,7 +2327,7 @@ if (!s->keyframe) { avctx->release_buffer(avctx, &s->last_frame); /* shuffle frames (last = current) */ - memcpy(&s->last_frame, &s->current_frame, sizeof(AVFrame)); + s->last_frame= s->current_frame; s->current_frame.data[0]= NULL; /* ensure that we catch any access to this released frame */ return buf_size; @@ -2646,19 +2391,12 @@ static int read_huffman_tree(AVCodecContext *avctx, GetBitContext *gb) return 0; } -static int theora_decode_header(AVCodecContext *avctx, GetBitContext gb) +static int theora_decode_header(AVCodecContext *avctx, GetBitContext *gb) { Vp3DecodeContext *s = avctx->priv_data; - int major, minor, micro; - major = get_bits(&gb, 8); /* version major */ - minor = get_bits(&gb, 8); /* version minor */ - micro = get_bits(&gb, 8); /* version micro */ - av_log(avctx, AV_LOG_INFO, "Theora bitstream version %d.%d.%d\n", - major, minor, micro); - - /* FIXME: endianess? */ - s->theora = (major << 16) | (minor << 8) | micro; + s->theora = get_bits_long(gb, 24); + av_log(avctx, AV_LOG_INFO, "Theora bitstream version %X\n", s->theora); /* 3.2.0 aka alpha3 has the same frame orientation as original vp3 */ /* but previous versions have the image flipped relative to vp3 */ @@ -2668,8 +2406,8 @@ static int theora_decode_header(AVCodecContext *avctx, GetBitContext gb) av_log(avctx, AV_LOG_DEBUG, "Old (<alpha3) Theora bitstream, flipped image\n"); } - s->width = get_bits(&gb, 16) << 4; - s->height = get_bits(&gb, 16) << 4; + s->width = get_bits(gb, 16) << 4; + s->height = get_bits(gb, 16) << 4; if(avcodec_check_dimensions(avctx, s->width, s->height)){ av_log(avctx, AV_LOG_ERROR, "Invalid dimensions (%dx%d)\n", s->width, s->height); @@ -2679,47 +2417,49 @@ static int theora_decode_header(AVCodecContext *avctx, GetBitContext gb) if (s->theora >= 0x030400) { - skip_bits(&gb, 32); /* total number of superblocks in a frame */ + skip_bits(gb, 32); /* total number of superblocks in a frame */ // fixme, the next field is 36bits long - skip_bits(&gb, 32); /* total number of blocks in a frame */ - skip_bits(&gb, 4); /* total number of blocks in a frame */ - skip_bits(&gb, 32); /* total number of macroblocks in a frame */ + skip_bits(gb, 32); /* total number of blocks in a frame */ + skip_bits(gb, 4); /* total number of blocks in a frame */ + skip_bits(gb, 32); /* total number of macroblocks in a frame */ - skip_bits(&gb, 24); /* frame width */ - skip_bits(&gb, 24); /* frame height */ + skip_bits(gb, 24); /* frame width */ + skip_bits(gb, 24); /* frame height */ } else { - skip_bits(&gb, 24); /* frame width */ - skip_bits(&gb, 24); /* frame height */ + skip_bits(gb, 24); /* frame width */ + skip_bits(gb, 24); /* frame height */ } - skip_bits(&gb, 8); /* offset x */ - skip_bits(&gb, 8); /* offset y */ + if (s->theora >= 0x030200) { + skip_bits(gb, 8); /* offset x */ + skip_bits(gb, 8); /* offset y */ + } - skip_bits(&gb, 32); /* fps numerator */ - skip_bits(&gb, 32); /* fps denumerator */ - skip_bits(&gb, 24); /* aspect numerator */ - skip_bits(&gb, 24); /* aspect denumerator */ + skip_bits(gb, 32); /* fps numerator */ + skip_bits(gb, 32); /* fps denumerator */ + skip_bits(gb, 24); /* aspect numerator */ + skip_bits(gb, 24); /* aspect denumerator */ if (s->theora < 0x030200) - skip_bits(&gb, 5); /* keyframe frequency force */ - skip_bits(&gb, 8); /* colorspace */ + skip_bits(gb, 5); /* keyframe frequency force */ + skip_bits(gb, 8); /* colorspace */ if (s->theora >= 0x030400) - skip_bits(&gb, 2); /* pixel format: 420,res,422,444 */ - skip_bits(&gb, 24); /* bitrate */ + skip_bits(gb, 2); /* pixel format: 420,res,422,444 */ + skip_bits(gb, 24); /* bitrate */ - skip_bits(&gb, 6); /* quality hint */ + skip_bits(gb, 6); /* quality hint */ if (s->theora >= 0x030200) { - skip_bits(&gb, 5); /* keyframe frequency force */ + skip_bits(gb, 5); /* keyframe frequency force */ if (s->theora < 0x030400) - skip_bits(&gb, 5); /* spare bits */ + skip_bits(gb, 5); /* spare bits */ } -// align_get_bits(&gb); +// align_get_bits(gb); avctx->width = s->width; avctx->height = s->height; @@ -2727,132 +2467,89 @@ static int theora_decode_header(AVCodecContext *avctx, GetBitContext gb) return 0; } -static inline int theora_get_32bit(GetBitContext gb) -{ - int ret = get_bits(&gb, 8); - ret += get_bits(&gb, 8) << 8; - ret += get_bits(&gb, 8) << 16; - ret += get_bits(&gb, 8) << 24; - - return ret; -} - -static int theora_decode_comments(AVCodecContext *avctx, GetBitContext gb) -{ - Vp3DecodeContext *s = avctx->priv_data; - int len; - - if (s->theora <= 0x030200) - { - int i, comments; - - // vendor string - len = get_bits_long(&gb, 32); - len = le2me_32(len); - while(len--) - skip_bits(&gb, 8); - - // user comments - comments = get_bits_long(&gb, 32); - comments = le2me_32(comments); - for (i = 0; i < comments; i++) - { - len = get_bits_long(&gb, 32); - len = be2me_32(len); - while(len--) - skip_bits(&gb, 8); - } - } - else - { - do { - len = get_bits_long(&gb, 32); - len = le2me_32(len); - if (len <= 0) - break; - while (len--) - skip_bits(&gb, 8); - } while (1); - } - return 0; -} - -static int theora_decode_tables(AVCodecContext *avctx, GetBitContext gb) +static int theora_decode_tables(AVCodecContext *avctx, GetBitContext *gb) { Vp3DecodeContext *s = avctx->priv_data; - int i, n, matrices; + int i, n, matrices, inter, plane; if (s->theora >= 0x030200) { - n = get_bits(&gb, 3); + n = get_bits(gb, 3); /* loop filter limit values table */ for (i = 0; i < 64; i++) - s->filter_limit_values[i] = get_bits(&gb, n); + s->filter_limit_values[i] = get_bits(gb, n); } if (s->theora >= 0x030200) - n = get_bits(&gb, 4) + 1; + n = get_bits(gb, 4) + 1; else n = 16; /* quality threshold table */ for (i = 0; i < 64; i++) - s->coded_ac_scale_factor[i] = get_bits(&gb, n); + s->coded_ac_scale_factor[i] = get_bits(gb, n); if (s->theora >= 0x030200) - n = get_bits(&gb, 4) + 1; + n = get_bits(gb, 4) + 1; else n = 16; /* dc scale factor table */ for (i = 0; i < 64; i++) - s->coded_dc_scale_factor[i] = get_bits(&gb, n); + s->coded_dc_scale_factor[i] = get_bits(gb, n); if (s->theora >= 0x030200) - matrices = get_bits(&gb, 9) + 1; + matrices = get_bits(gb, 9) + 1; else matrices = 3; - if (matrices != 3) { - av_log(avctx,AV_LOG_ERROR, "unsupported matrices: %d\n", matrices); -// return -1; - } - /* y coeffs */ - for (i = 0; i < 64; i++) - s->coded_intra_y_dequant[i] = get_bits(&gb, 8); - - /* uv coeffs */ - for (i = 0; i < 64; i++) - s->coded_intra_c_dequant[i] = get_bits(&gb, 8); - /* inter coeffs */ - for (i = 0; i < 64; i++) - s->coded_inter_dequant[i] = get_bits(&gb, 8); + if(matrices > 384){ + av_log(avctx, AV_LOG_ERROR, "invalid number of base matrixes\n"); + return -1; + } - /* skip unknown matrices */ - n = matrices - 3; - while(n--) + for(n=0; n<matrices; n++){ for (i = 0; i < 64; i++) - skip_bits(&gb, 8); + s->base_matrix[n][i]= get_bits(gb, 8); + } - for (i = 0; i <= 1; i++) { - for (n = 0; n <= 2; n++) { - int newqr; - if (i > 0 || n > 0) - newqr = get_bits(&gb, 1); - else - newqr = 1; + for (inter = 0; inter <= 1; inter++) { + for (plane = 0; plane <= 2; plane++) { + int newqr= 1; + if (inter || plane > 0) + newqr = get_bits(gb, 1); if (!newqr) { - if (i > 0) - get_bits(&gb, 1); - } - else { + int qtj, plj; + if(inter && get_bits(gb, 1)){ + qtj = 0; + plj = plane; + }else{ + qtj= (3*inter + plane - 1) / 3; + plj= (plane + 2) % 3; + } + s->qr_count[inter][plane]= s->qr_count[qtj][plj]; + memcpy(s->qr_size[inter][plane], s->qr_size[qtj][plj], sizeof(s->qr_size[0][0])); + memcpy(s->qr_base[inter][plane], s->qr_base[qtj][plj], sizeof(s->qr_base[0][0])); + } else { + int qri= 0; int qi = 0; - skip_bits(&gb, av_log2(matrices-1)+1); - while (qi < 63) { - qi += get_bits(&gb, av_log2(63-qi)+1) + 1; - skip_bits(&gb, av_log2(matrices-1)+1); + + for(;;){ + i= get_bits(gb, av_log2(matrices-1)+1); + if(i>= matrices){ + av_log(avctx, AV_LOG_ERROR, "invalid base matrix index\n"); + return -1; + } + s->qr_base[inter][plane][qri]= i; + if(qi >= 63) + break; + i = get_bits(gb, av_log2(63-qi)+1) + 1; + s->qr_size[inter][plane][qri++]= i; + qi += i; } + if (qi > 63) { av_log(avctx, AV_LOG_ERROR, "invalid qi %d > 63\n", qi); return -1; } + s->qr_count[inter][plane]= qri; } } } @@ -2861,11 +2558,11 @@ static int theora_decode_tables(AVCodecContext *avctx, GetBitContext gb) for (s->hti = 0; s->hti < 80; s->hti++) { s->entries = 0; s->huff_code_size = 1; - if (!get_bits(&gb, 1)) { + if (!get_bits(gb, 1)) { s->hbits = 0; - read_huffman_tree(avctx, &gb); + read_huffman_tree(avctx, gb); s->hbits = 1; - read_huffman_tree(avctx, &gb); + read_huffman_tree(avctx, gb); } } @@ -2903,7 +2600,7 @@ static int theora_decode_init(AVCodecContext *avctx) if (!(ptype & 0x80)) { av_log(avctx, AV_LOG_ERROR, "Invalid extradata!\n"); - return -1; +// return -1; } // FIXME: check for this aswell @@ -2912,19 +2609,23 @@ static int theora_decode_init(AVCodecContext *avctx) switch(ptype) { case 0x80: - theora_decode_header(avctx, gb); + theora_decode_header(avctx, &gb); break; case 0x81: // FIXME: is this needed? it breaks sometimes // theora_decode_comments(avctx, gb); break; case 0x82: - theora_decode_tables(avctx, gb); + theora_decode_tables(avctx, &gb); break; default: av_log(avctx, AV_LOG_ERROR, "Unknown Theora config packet: %d\n", ptype&~0x80); break; } + if(8*op_bytes != get_bits_count(&gb)) + av_log(avctx, AV_LOG_ERROR, "%d bits left in packet %X\n", 8*op_bytes - get_bits_count(&gb), ptype); + if (s->theora < 0x030200) + break; } vp3_decode_init(avctx); diff --git a/src/libffmpeg/libavcodec/vp3dsp.c b/src/libffmpeg/libavcodec/vp3dsp.c index 0cbe8d551..f5a1fb6ff 100644 --- a/src/libffmpeg/libavcodec/vp3dsp.c +++ b/src/libffmpeg/libavcodec/vp3dsp.c @@ -35,14 +35,15 @@ #define xC6S2 25080 #define xC7S1 12785 +#define M(a,b) (((a) * (b))>>16) + static always_inline void idct(uint8_t *dst, int stride, int16_t *input, int type) { int16_t *ip = input; uint8_t *cm = cropTbl + MAX_NEG_CROP; - int A_, B_, C_, D_, _Ad, _Bd, _Cd, _Dd, E_, F_, G_, H_; - int _Ed, _Gd, _Add, _Bdd, _Fd, _Hd; - int t1, t2; + int A, B, C, D, Ad, Bd, Cd, Dd, E, F, G, H; + int Ed, Gd, Add, Bdd, Fd, Hd; int i; @@ -50,86 +51,44 @@ static always_inline void idct(uint8_t *dst, int stride, int16_t *input, int typ for (i = 0; i < 8; i++) { /* Check for non-zero values */ if ( ip[0] | ip[1] | ip[2] | ip[3] | ip[4] | ip[5] | ip[6] | ip[7] ) { - t1 = (int32_t)(xC1S7 * ip[1]); - t2 = (int32_t)(xC7S1 * ip[7]); - t1 >>= 16; - t2 >>= 16; - A_ = t1 + t2; - - t1 = (int32_t)(xC7S1 * ip[1]); - t2 = (int32_t)(xC1S7 * ip[7]); - t1 >>= 16; - t2 >>= 16; - B_ = t1 - t2; - - t1 = (int32_t)(xC3S5 * ip[3]); - t2 = (int32_t)(xC5S3 * ip[5]); - t1 >>= 16; - t2 >>= 16; - C_ = t1 + t2; - - t1 = (int32_t)(xC3S5 * ip[5]); - t2 = (int32_t)(xC5S3 * ip[3]); - t1 >>= 16; - t2 >>= 16; - D_ = t1 - t2; - - - t1 = (int32_t)(xC4S4 * (A_ - C_)); - t1 >>= 16; - _Ad = t1; + A = M(xC1S7, ip[1]) + M(xC7S1, ip[7]); + B = M(xC7S1, ip[1]) - M(xC1S7, ip[7]); + C = M(xC3S5, ip[3]) + M(xC5S3, ip[5]); + D = M(xC3S5, ip[5]) - M(xC5S3, ip[3]); - t1 = (int32_t)(xC4S4 * (B_ - D_)); - t1 >>= 16; - _Bd = t1; + Ad = M(xC4S4, (A - C)); + Bd = M(xC4S4, (B - D)); + Cd = A + C; + Dd = B + D; - _Cd = A_ + C_; - _Dd = B_ + D_; + E = M(xC4S4, (ip[0] + ip[4])); + F = M(xC4S4, (ip[0] - ip[4])); - t1 = (int32_t)(xC4S4 * (ip[0] + ip[4])); - t1 >>= 16; - E_ = t1; + G = M(xC2S6, ip[2]) + M(xC6S2, ip[6]); + H = M(xC6S2, ip[2]) - M(xC2S6, ip[6]); - t1 = (int32_t)(xC4S4 * (ip[0] - ip[4])); - t1 >>= 16; - F_ = t1; + Ed = E - G; + Gd = E + G; - t1 = (int32_t)(xC2S6 * ip[2]); - t2 = (int32_t)(xC6S2 * ip[6]); - t1 >>= 16; - t2 >>= 16; - G_ = t1 + t2; + Add = F + Ad; + Bdd = Bd - H; - t1 = (int32_t)(xC6S2 * ip[2]); - t2 = (int32_t)(xC2S6 * ip[6]); - t1 >>= 16; - t2 >>= 16; - H_ = t1 - t2; - - - _Ed = E_ - G_; - _Gd = E_ + G_; - - _Add = F_ + _Ad; - _Bdd = _Bd - H_; - - _Fd = F_ - _Ad; - _Hd = _Bd + H_; + Fd = F - Ad; + Hd = Bd + H; /* Final sequence of operations over-write original inputs. */ - ip[0] = _Gd + _Cd ; - ip[7] = _Gd - _Cd ; - - ip[1] = _Add + _Hd; - ip[2] = _Add - _Hd; + ip[0] = Gd + Cd ; + ip[7] = Gd - Cd ; - ip[3] = _Ed + _Dd ; - ip[4] = _Ed - _Dd ; + ip[1] = Add + Hd; + ip[2] = Add - Hd; - ip[5] = _Fd + _Bdd; - ip[6] = _Fd - _Bdd; + ip[3] = Ed + Dd ; + ip[4] = Ed - Dd ; + ip[5] = Fd + Bdd; + ip[6] = Fd - Bdd; } ip += 8; /* next row */ @@ -142,121 +101,74 @@ static always_inline void idct(uint8_t *dst, int stride, int16_t *input, int typ if ( ip[1 * 8] | ip[2 * 8] | ip[3 * 8] | ip[4 * 8] | ip[5 * 8] | ip[6 * 8] | ip[7 * 8] ) { - t1 = (int32_t)(xC1S7 * ip[1*8]); - t2 = (int32_t)(xC7S1 * ip[7*8]); - t1 >>= 16; - t2 >>= 16; - A_ = t1 + t2; - - t1 = (int32_t)(xC7S1 * ip[1*8]); - t2 = (int32_t)(xC1S7 * ip[7*8]); - t1 >>= 16; - t2 >>= 16; - B_ = t1 - t2; + A = M(xC1S7, ip[1*8]) + M(xC7S1, ip[7*8]); + B = M(xC7S1, ip[1*8]) - M(xC1S7, ip[7*8]); + C = M(xC3S5, ip[3*8]) + M(xC5S3, ip[5*8]); + D = M(xC3S5, ip[5*8]) - M(xC5S3, ip[3*8]); - t1 = (int32_t)(xC3S5 * ip[3*8]); - t2 = (int32_t)(xC5S3 * ip[5*8]); - t1 >>= 16; - t2 >>= 16; - C_ = t1 + t2; + Ad = M(xC4S4, (A - C)); + Bd = M(xC4S4, (B - D)); - t1 = (int32_t)(xC3S5 * ip[5*8]); - t2 = (int32_t)(xC5S3 * ip[3*8]); - t1 >>= 16; - t2 >>= 16; - D_ = t1 - t2; + Cd = A + C; + Dd = B + D; + E = M(xC4S4, (ip[0*8] + ip[4*8])) + 8; + F = M(xC4S4, (ip[0*8] - ip[4*8])) + 8; - t1 = (int32_t)(xC4S4 * (A_ - C_)); - t1 >>= 16; - _Ad = t1; - - t1 = (int32_t)(xC4S4 * (B_ - D_)); - t1 >>= 16; - _Bd = t1; - - - _Cd = A_ + C_; - _Dd = B_ + D_; - - t1 = (int32_t)(xC4S4 * (ip[0*8] + ip[4*8])); - t1 >>= 16; - E_ = t1; - - t1 = (int32_t)(xC4S4 * (ip[0*8] - ip[4*8])); - t1 >>= 16; - F_ = t1; - - t1 = (int32_t)(xC2S6 * ip[2*8]); - t2 = (int32_t)(xC6S2 * ip[6*8]); - t1 >>= 16; - t2 >>= 16; - G_ = t1 + t2; - - t1 = (int32_t)(xC6S2 * ip[2*8]); - t2 = (int32_t)(xC2S6 * ip[6*8]); - t1 >>= 16; - t2 >>= 16; - H_ = t1 - t2; - + if(type==1){ //HACK + E += 16*128; + F += 16*128; + } - _Ed = E_ - G_; - _Gd = E_ + G_; + G = M(xC2S6, ip[2*8]) + M(xC6S2, ip[6*8]); + H = M(xC6S2, ip[2*8]) - M(xC2S6, ip[6*8]); - _Add = F_ + _Ad; - _Bdd = _Bd - H_; + Ed = E - G; + Gd = E + G; - _Fd = F_ - _Ad; - _Hd = _Bd + H_; + Add = F + Ad; + Bdd = Bd - H; - if(type==1){ //HACK - _Gd += 16*128; - _Add+= 16*128; - _Ed += 16*128; - _Fd += 16*128; - } - _Gd += IdctAdjustBeforeShift; - _Add += IdctAdjustBeforeShift; - _Ed += IdctAdjustBeforeShift; - _Fd += IdctAdjustBeforeShift; + Fd = F - Ad; + Hd = Bd + H; /* Final sequence of operations over-write original inputs. */ if(type==0){ - ip[0*8] = (_Gd + _Cd ) >> 4; - ip[7*8] = (_Gd - _Cd ) >> 4; + ip[0*8] = (Gd + Cd ) >> 4; + ip[7*8] = (Gd - Cd ) >> 4; - ip[1*8] = (_Add + _Hd ) >> 4; - ip[2*8] = (_Add - _Hd ) >> 4; + ip[1*8] = (Add + Hd ) >> 4; + ip[2*8] = (Add - Hd ) >> 4; - ip[3*8] = (_Ed + _Dd ) >> 4; - ip[4*8] = (_Ed - _Dd ) >> 4; + ip[3*8] = (Ed + Dd ) >> 4; + ip[4*8] = (Ed - Dd ) >> 4; - ip[5*8] = (_Fd + _Bdd ) >> 4; - ip[6*8] = (_Fd - _Bdd ) >> 4; + ip[5*8] = (Fd + Bdd ) >> 4; + ip[6*8] = (Fd - Bdd ) >> 4; }else if(type==1){ - dst[0*stride] = cm[(_Gd + _Cd ) >> 4]; - dst[7*stride] = cm[(_Gd - _Cd ) >> 4]; + dst[0*stride] = cm[(Gd + Cd ) >> 4]; + dst[7*stride] = cm[(Gd - Cd ) >> 4]; - dst[1*stride] = cm[(_Add + _Hd ) >> 4]; - dst[2*stride] = cm[(_Add - _Hd ) >> 4]; + dst[1*stride] = cm[(Add + Hd ) >> 4]; + dst[2*stride] = cm[(Add - Hd ) >> 4]; - dst[3*stride] = cm[(_Ed + _Dd ) >> 4]; - dst[4*stride] = cm[(_Ed - _Dd ) >> 4]; + dst[3*stride] = cm[(Ed + Dd ) >> 4]; + dst[4*stride] = cm[(Ed - Dd ) >> 4]; - dst[5*stride] = cm[(_Fd + _Bdd ) >> 4]; - dst[6*stride] = cm[(_Fd - _Bdd ) >> 4]; + dst[5*stride] = cm[(Fd + Bdd ) >> 4]; + dst[6*stride] = cm[(Fd - Bdd ) >> 4]; }else{ - dst[0*stride] = cm[dst[0*stride] + ((_Gd + _Cd ) >> 4)]; - dst[7*stride] = cm[dst[7*stride] + ((_Gd - _Cd ) >> 4)]; + dst[0*stride] = cm[dst[0*stride] + ((Gd + Cd ) >> 4)]; + dst[7*stride] = cm[dst[7*stride] + ((Gd - Cd ) >> 4)]; - dst[1*stride] = cm[dst[1*stride] + ((_Add + _Hd ) >> 4)]; - dst[2*stride] = cm[dst[2*stride] + ((_Add - _Hd ) >> 4)]; + dst[1*stride] = cm[dst[1*stride] + ((Add + Hd ) >> 4)]; + dst[2*stride] = cm[dst[2*stride] + ((Add - Hd ) >> 4)]; - dst[3*stride] = cm[dst[3*stride] + ((_Ed + _Dd ) >> 4)]; - dst[4*stride] = cm[dst[4*stride] + ((_Ed - _Dd ) >> 4)]; + dst[3*stride] = cm[dst[3*stride] + ((Ed + Dd ) >> 4)]; + dst[4*stride] = cm[dst[4*stride] + ((Ed - Dd ) >> 4)]; - dst[5*stride] = cm[dst[5*stride] + ((_Fd + _Bdd ) >> 4)]; - dst[6*stride] = cm[dst[6*stride] + ((_Fd - _Bdd ) >> 4)]; + dst[5*stride] = cm[dst[5*stride] + ((Fd + Bdd ) >> 4)]; + dst[6*stride] = cm[dst[6*stride] + ((Fd - Bdd ) >> 4)]; } } else { diff --git a/src/libffmpeg/libavcodec/wmadec.c b/src/libffmpeg/libavcodec/wmadec.c index c557a2a7a..227c9695b 100644 --- a/src/libffmpeg/libavcodec/wmadec.c +++ b/src/libffmpeg/libavcodec/wmadec.c @@ -57,6 +57,13 @@ #define LSP_POW_BITS 7 #define VLCBITS 9 +#define VLCMAX ((22+VLCBITS-1)/VLCBITS) + +#define EXPVLCBITS 8 +#define EXPMAX ((19+EXPVLCBITS-1)/EXPVLCBITS) + +#define HGAINVLCBITS 9 +#define HGAINMAX ((13+HGAINVLCBITS-1)/HGAINVLCBITS) typedef struct WMADecodeContext { GetBitContext gb; @@ -185,7 +192,7 @@ static void init_coef_vlc(VLC *vlc, const uint16_t *p; int i, l, j, level; - init_vlc(vlc, 9, n, table_bits, 1, 1, table_codes, 4, 4, 0); + init_vlc(vlc, VLCBITS, n, table_bits, 1, 1, table_codes, 4, 4, 0); run_table = av_malloc(n * sizeof(uint16_t)); level_table = av_malloc(n * sizeof(uint16_t)); @@ -494,13 +501,13 @@ static int wma_decode_init(AVCodecContext * avctx) } } #endif - init_vlc(&s->hgain_vlc, 9, sizeof(hgain_huffbits), + init_vlc(&s->hgain_vlc, HGAINVLCBITS, sizeof(hgain_huffbits), hgain_huffbits, 1, 1, hgain_huffcodes, 2, 2, 0); } if (s->use_exp_vlc) { - init_vlc(&s->exp_vlc, 9, sizeof(scale_huffbits), + init_vlc(&s->exp_vlc, EXPVLCBITS, sizeof(scale_huffbits), scale_huffbits, 1, 1, scale_huffcodes, 4, 4, 0); } else { @@ -681,7 +688,7 @@ static int decode_exp_vlc(WMADecodeContext *s, int ch) } last_exp = 36; while (q < q_end) { - code = get_vlc2(&s->gb, s->exp_vlc.table, VLCBITS, 2); + code = get_vlc2(&s->gb, s->exp_vlc.table, EXPVLCBITS, EXPMAX); if (code < 0) return -1; /* NOTE: this offset is the same as MPEG4 AAC ! */ @@ -822,7 +829,7 @@ static int wma_decode_block(WMADecodeContext *s) if (val == (int)0x80000000) { val = get_bits(&s->gb, 7) - 19; } else { - code = get_vlc2(&s->gb, s->hgain_vlc.table, VLCBITS, 2); + code = get_vlc2(&s->gb, s->hgain_vlc.table, HGAINVLCBITS, HGAINMAX); if (code < 0) return -1; val += code - 18; @@ -879,7 +886,7 @@ static int wma_decode_block(WMADecodeContext *s) eptr = ptr + nb_coefs[ch]; memset(ptr, 0, s->block_len * sizeof(int16_t)); for(;;) { - code = get_vlc2(&s->gb, coef_vlc->table, VLCBITS, 3); + code = get_vlc2(&s->gb, coef_vlc->table, VLCBITS, VLCMAX); if (code < 0) return -1; if (code == 1) { @@ -901,7 +908,10 @@ static int wma_decode_block(WMADecodeContext *s) level = -level; ptr += run; if (ptr >= eptr) - return -1; + { + av_log(NULL, AV_LOG_ERROR, "overflow in spectral RLE, ignoring\n"); + break; + } *ptr++ = level; /* NOTE: EOB can be omitted */ if (ptr >= eptr) @@ -1229,7 +1239,7 @@ static int wma_decode_superframe(AVCodecContext *avctx, goto fail; q = s->last_superframe + s->last_superframe_len; len = bit_offset; - while (len > 0) { + while (len > 7) { *q++ = (get_bits)(&s->gb, 8); len -= 8; } diff --git a/src/libffmpeg/libavcodec/wmv2.c b/src/libffmpeg/libavcodec/wmv2.c index dd88b7d28..3f405af4f 100644 --- a/src/libffmpeg/libavcodec/wmv2.c +++ b/src/libffmpeg/libavcodec/wmv2.c @@ -207,7 +207,6 @@ void ff_wmv2_encode_mb(MpegEncContext * s, if (!s->mb_intra) { /* compute cbp */ - set_stat(ST_INTER_MB); cbp = 0; for (i = 0; i < 6; i++) { if (s->block_last_index[i] >= 0) @@ -244,7 +243,6 @@ void ff_wmv2_encode_mb(MpegEncContext * s, #endif if (s->pict_type == I_TYPE) { - set_stat(ST_INTRA_MB); put_bits(&s->pb, ff_msmp4_mb_i_table[coded_cbp][1], ff_msmp4_mb_i_table[coded_cbp][0]); } else { @@ -252,7 +250,6 @@ void ff_wmv2_encode_mb(MpegEncContext * s, wmv2_inter_table[w->cbp_table_index][cbp][1], wmv2_inter_table[w->cbp_table_index][cbp][0]); } - set_stat(ST_INTRA_MB); put_bits(&s->pb, 1, 0); /* no AC prediction yet */ if(s->inter_intra_pred){ s->h263_aic_dir=0; |