diff options
Diffstat (limited to 'src/libffmpeg/libavcodec/dv.c')
-rw-r--r-- | src/libffmpeg/libavcodec/dv.c | 295 |
1 files changed, 216 insertions, 79 deletions
diff --git a/src/libffmpeg/libavcodec/dv.c b/src/libffmpeg/libavcodec/dv.c index 08611a900..c39d70c54 100644 --- a/src/libffmpeg/libavcodec/dv.c +++ b/src/libffmpeg/libavcodec/dv.c @@ -6,6 +6,9 @@ * DV encoder * Copyright (c) 2003 Roman Shaposhnik. * + * 50 Mbps (DVCPRO50) support + * Copyright (c) 2006 Daniel Maas <dmaas@maasdigital.com> + * * Many thanks to Dan Dennedy <dan@dennedy.org> for providing wealth * of DV technical info. * @@ -51,8 +54,12 @@ typedef struct DVVideoContext { void (*idct_put[2])(uint8_t *dest, int line_size, DCTELEM *block); } DVVideoContext; -/* MultiThreading - applies to entire DV codec, not just the avcontext */ -uint8_t** dv_anchor; +/* MultiThreading - dv_anchor applies to entire DV codec, not just the avcontext */ +/* one element is needed for each video segment in a DV frame */ +/* at most there are 2 DIF channels * 12 DIF sequences * 27 video segments (PAL 50Mbps) */ +#define DV_ANCHOR_SIZE (2*12*27) + +static void* dv_anchor[DV_ANCHOR_SIZE]; #define TEX_VLC_BITS 9 @@ -118,11 +125,7 @@ static int dvvideo_init(AVCodecContext *avctx) return -ENOMEM; /* dv_anchor lets each thread know its Id */ - dv_anchor = av_malloc(12*27*sizeof(void*)); - if (!dv_anchor) { - return -ENOMEM; - } - for (i=0; i<12*27; i++) + for (i=0; i<DV_ANCHOR_SIZE; i++) dv_anchor[i] = (void*)(size_t)i; /* it's faster to include sign bit in a generic VLC parsing scheme */ @@ -238,9 +241,6 @@ static int dvvideo_init(AVCodecContext *avctx) /* XXX: do it only for constant case */ dv_build_unquantize_tables(s, dsp.idct_permutation); - /* FIXME: I really don't think this should be here */ - if (dv_codec_profile(avctx)) - avctx->pix_fmt = dv_codec_profile(avctx)->pix_fmt; avctx->coded_frame = &s->picture; s->avctx= avctx; @@ -253,6 +253,7 @@ static int dvvideo_init(AVCodecContext *avctx) typedef struct BlockInfo { const uint8_t *shift_table; const uint8_t *scan_table; + const int *iweight_table; uint8_t pos; /* position in block */ uint8_t dct_mode; uint8_t partial_bit_count; @@ -295,6 +296,7 @@ static void dv_decode_ac(GetBitContext *gb, BlockInfo *mb, DCTELEM *block) int last_index = get_bits_size(gb); const uint8_t *scan_table = mb->scan_table; const uint8_t *shift_table = mb->shift_table; + const int *iweight_table = mb->iweight_table; int pos = mb->pos; int partial_bit_count = mb->partial_bit_count; int level, pos1, run, vlc_len, index; @@ -342,9 +344,13 @@ static void dv_decode_ac(GetBitContext *gb, BlockInfo *mb, DCTELEM *block) if (pos >= 64) break; - assert(level); pos1 = scan_table[pos]; - block[pos1] = level << shift_table[pos1]; + level <<= shift_table[pos1]; + + /* unweigh, round, and shift down */ + level = (level*iweight_table[pos] + (1 << (dv_iweight_bits-1))) >> dv_iweight_bits; + + block[pos1] = level; UPDATE_CACHE(re, gb); } @@ -410,6 +416,7 @@ static inline void dv_decode_video_segment(DVVideoContext *s, dct_mode = get_bits1(&gb); mb->dct_mode = dct_mode; mb->scan_table = s->dv_zigzag[dct_mode]; + mb->iweight_table = dct_mode ? dv_iweight_248 : dv_iweight_88; class1 = get_bits(&gb, 2); mb->shift_table = s->dv_idct_shift[class1 == 3][dct_mode] [quant + dv_quant_offset[class1]]; @@ -488,45 +495,63 @@ static inline void dv_decode_video_segment(DVVideoContext *s, v = *mb_pos_ptr++; mb_x = v & 0xff; mb_y = v >> 8; - y_ptr = s->picture.data[0] + ((mb_y * s->picture.linesize[0] + mb_x)<<log2_blocksize); - if (s->sys->pix_fmt == PIX_FMT_YUV411P) + if (s->sys->pix_fmt == PIX_FMT_YUV422P) { + y_ptr = s->picture.data[0] + ((mb_y * s->picture.linesize[0] + (mb_x>>1))<<log2_blocksize); c_offset = ((mb_y * s->picture.linesize[1] + (mb_x >> 2))<<log2_blocksize); - else - c_offset = (((mb_y >> 1) * s->picture.linesize[1] + (mb_x >> 1))<<log2_blocksize); + } else { /* 4:1:1 or 4:2:0 */ + y_ptr = s->picture.data[0] + ((mb_y * s->picture.linesize[0] + mb_x)<<log2_blocksize); + if (s->sys->pix_fmt == PIX_FMT_YUV411P) + c_offset = ((mb_y * s->picture.linesize[1] + (mb_x >> 2))<<log2_blocksize); + else /* 4:2:0 */ + c_offset = (((mb_y >> 1) * s->picture.linesize[1] + (mb_x >> 1))<<log2_blocksize); + } for(j = 0;j < 6; j++) { idct_put = s->idct_put[mb->dct_mode && log2_blocksize==3]; - if (j < 4) { - if (s->sys->pix_fmt == PIX_FMT_YUV411P && mb_x < (704 / 8)) { - /* NOTE: at end of line, the macroblock is handled as 420 */ - idct_put(y_ptr + (j<<log2_blocksize), s->picture.linesize[0], block); - } else { - idct_put(y_ptr + (((j & 1) + (j >> 1) * s->picture.linesize[0])<<log2_blocksize), + if (s->sys->pix_fmt == PIX_FMT_YUV422P) { /* 4:2:2 */ + if (j == 0 || j == 2) { + /* Y0 Y1 */ + idct_put(y_ptr + ((j >> 1)<<log2_blocksize), s->picture.linesize[0], block); + } else if(j > 3) { + /* Cr Cb */ + idct_put(s->picture.data[6 - j] + c_offset, + s->picture.linesize[6 - j], block); } - } else { - if (s->sys->pix_fmt == PIX_FMT_YUV411P && mb_x >= (704 / 8)) { - uint64_t aligned_pixels[64/8]; - uint8_t *pixels= (uint8_t*)aligned_pixels; - uint8_t *c_ptr, *c_ptr1, *ptr, *ptr1; - int x, y, linesize; - /* NOTE: at end of line, the macroblock is handled as 420 */ - idct_put(pixels, 8, block); - linesize = s->picture.linesize[6 - j]; - c_ptr = s->picture.data[6 - j] + c_offset; - ptr = pixels; - for(y = 0;y < (1<<log2_blocksize); y++) { - ptr1= ptr + (1<<(log2_blocksize-1)); - c_ptr1 = c_ptr + (linesize<<log2_blocksize); - for(x=0; x < (1<<(log2_blocksize-1)); x++){ - c_ptr[x]= ptr[x]; c_ptr1[x]= ptr1[x]; - } - c_ptr += linesize; - ptr += 8; + /* note: j=1 and j=3 are "dummy" blocks in 4:2:2 */ + } else { /* 4:1:1 or 4:2:0 */ + if (j < 4) { + if (s->sys->pix_fmt == PIX_FMT_YUV411P && mb_x < (704 / 8)) { + /* NOTE: at end of line, the macroblock is handled as 420 */ + idct_put(y_ptr + (j<<log2_blocksize), s->picture.linesize[0], block); + } else { + idct_put(y_ptr + (((j & 1) + (j >> 1) * s->picture.linesize[0])<<log2_blocksize), + s->picture.linesize[0], block); } } else { - /* don't ask me why they inverted Cb and Cr ! */ - idct_put(s->picture.data[6 - j] + c_offset, - s->picture.linesize[6 - j], block); + if (s->sys->pix_fmt == PIX_FMT_YUV411P && mb_x >= (704 / 8)) { + uint64_t aligned_pixels[64/8]; + uint8_t *pixels= (uint8_t*)aligned_pixels; + uint8_t *c_ptr, *c_ptr1, *ptr, *ptr1; + int x, y, linesize; + /* NOTE: at end of line, the macroblock is handled as 420 */ + idct_put(pixels, 8, block); + linesize = s->picture.linesize[6 - j]; + c_ptr = s->picture.data[6 - j] + c_offset; + ptr = pixels; + for(y = 0;y < (1<<log2_blocksize); y++) { + ptr1= ptr + (1<<(log2_blocksize-1)); + c_ptr1 = c_ptr + (linesize<<log2_blocksize); + for(x=0; x < (1<<(log2_blocksize-1)); x++){ + c_ptr[x]= ptr[x]; c_ptr1[x]= ptr1[x]; + } + c_ptr += linesize; + ptr += 8; + } + } else { + /* don't ask me why they inverted Cb and Cr ! */ + idct_put(s->picture.data[6 - j] + c_offset, + s->picture.linesize[6 - j], block); + } } } block += 64; @@ -648,11 +673,25 @@ static always_inline PutBitContext* dv_encode_ac(EncBlockInfo* bi, PutBitContext } static always_inline void dv_set_class_number(DCTELEM* blk, EncBlockInfo* bi, - const uint8_t* zigzag_scan, int bias) + const uint8_t* zigzag_scan, const int *weight, int bias) { int i, area; + /* We offer two different methods for class number assignment: the + method suggested in SMPTE 314M Table 22, and an improved + method. The SMPTE method is very conservative; it assigns class + 3 (i.e. severe quantization) to any block where the largest AC + component is greater than 36. ffmpeg's DV encoder tracks AC bit + consumption precisely, so there is no need to bias most blocks + towards strongly lossy compression. Instead, we assign class 2 + to most blocks, and use class 3 only when strictly necessary + (for blocks whose largest AC component exceeds 255). */ + +#if 0 /* SMPTE spec method */ static const int classes[] = {12, 24, 36, 0xffff}; - int max=12; +#else /* improved ffmpeg method */ + static const int classes[] = {-1, -1, 255, 0xffff}; +#endif + int max=classes[0]; int prev=0; bi->mb[0] = blk[0]; @@ -665,7 +704,11 @@ static always_inline void dv_set_class_number(DCTELEM* blk, EncBlockInfo* bi, if (level+15 > 30U) { bi->sign[i] = (level>>31)&1; - bi->mb[i] = level= ABS(level)>>4; + /* weigh it and and shift down into range, adding for rounding */ + /* the extra division by a factor of 2^4 reverses the 8x expansion of the DCT + AND the 2x doubling of the weights */ + level = (ABS(level) * weight[i] + (1<<(dv_weight_bits+3))) >> (dv_weight_bits+4); + bi->mb[i] = level; if(level>max) max= level; bi->bit_size[area] += dv_rl2vlc_size(i - prev - 1, level); bi->next[prev]= i; @@ -728,9 +771,10 @@ static always_inline int dv_guess_dct_mode(DCTELEM *blk) { static inline void dv_guess_qnos(EncBlockInfo* blks, int* qnos) { int size[5]; - int i, j, k, a, prev; + int i, j, k, a, prev, a2; EncBlockInfo* b; + size[0] = size[1] = size[2] = size[3] = size[4] = 1<<24; do { b = blks; for (i=0; i<5; i++) { @@ -745,12 +789,23 @@ static inline void dv_guess_qnos(EncBlockInfo* blks, int* qnos) b->bit_size[a] = 1; // 4 areas 4 bits for EOB :) b->area_q[a]++; prev= b->prev[a]; + assert(b->next[prev] >= mb_area_start[a+1] || b->mb[prev]); for (k= b->next[prev] ; k<mb_area_start[a+1]; k= b->next[k]) { b->mb[k] >>= 1; if (b->mb[k]) { b->bit_size[a] += dv_rl2vlc_size(k - prev - 1, b->mb[k]); prev= k; } else { + if(b->next[k] >= mb_area_start[a+1] && b->next[k]<64){ + for(a2=a+1; b->next[k] >= mb_area_start[a2+1]; a2++) + b->prev[a2] = prev; + assert(a2<4); + assert(b->mb[b->next[k]]); + b->bit_size[a2] += dv_rl2vlc_size(b->next[k] - prev - 1, b->mb[b->next[k]]) + -dv_rl2vlc_size(b->next[k] - k - 1, b->mb[b->next[k]]); + assert(b->prev[a2]==k && (a2+1 >= 4 || b->prev[a2+1]!=k)); + b->prev[a2] = prev; + } b->next[prev] = b->next[k]; } } @@ -759,16 +814,29 @@ static inline void dv_guess_qnos(EncBlockInfo* blks, int* qnos) size[i] += b->bit_size[a]; } } + if(vs_total_ac_bits >= size[0] + size[1] + size[2] + size[3] + size[4]) + return; } - } while ((vs_total_ac_bits < size[0] + size[1] + size[2] + size[3] + size[4]) && - (qnos[0]|qnos[1]|qnos[2]|qnos[3]|qnos[4])); + } while (qnos[0]|qnos[1]|qnos[2]|qnos[3]|qnos[4]); + + + for(a=2; a==2 || vs_total_ac_bits < size[0]; a+=a){ + b = blks; + size[0] = 5*6*4; //EOB + for (j=0; j<6*5; j++, b++) { + prev= b->prev[0]; + for (k= b->next[prev]; k<64; k= b->next[k]) { + if(b->mb[k] < a && b->mb[k] > -a){ + b->next[prev] = b->next[k]; + }else{ + size[0] += dv_rl2vlc_size(k - prev - 1, b->mb[k]); + prev= k; + } + } + } + } } -/* - * This is a very rough initial implementaion. The performance is - * horrible and the weighting is missing. But it's missing from the - * decoding step also -- so at least we're on the same page with decoder ;-) - */ static inline void dv_encode_video_segment(DVVideoContext *s, uint8_t *dif, const uint16_t *mb_pos_ptr) @@ -795,28 +863,52 @@ static inline void dv_encode_video_segment(DVVideoContext *s, v = *mb_pos_ptr++; mb_x = v & 0xff; mb_y = v >> 8; - y_ptr = s->picture.data[0] + (mb_y * s->picture.linesize[0] * 8) + (mb_x * 8); - c_offset = (s->sys->pix_fmt == PIX_FMT_YUV411P) ? - ((mb_y * s->picture.linesize[1] * 8) + ((mb_x >> 2) * 8)) : - (((mb_y >> 1) * s->picture.linesize[1] * 8) + ((mb_x >> 1) * 8)); + if (s->sys->pix_fmt == PIX_FMT_YUV422P) { + y_ptr = s->picture.data[0] + (mb_y * s->picture.linesize[0] * 8) + (mb_x * 4); + } else { /* 4:1:1 */ + y_ptr = s->picture.data[0] + (mb_y * s->picture.linesize[0] * 8) + (mb_x * 8); + } + if (s->sys->pix_fmt == PIX_FMT_YUV420P) { + c_offset = (((mb_y >> 1) * s->picture.linesize[1] * 8) + ((mb_x >> 1) * 8)); + } else { /* 4:2:2 or 4:1:1 */ + c_offset = ((mb_y * s->picture.linesize[1] * 8) + ((mb_x >> 2) * 8)); + } do_edge_wrap = 0; qnos[mb_index] = 15; /* No quantization */ ptr = dif + mb_index*80 + 4; for(j = 0;j < 6; j++) { - if (j < 4) { /* Four Y blocks */ - /* NOTE: at end of line, the macroblock is handled as 420 */ - if (s->sys->pix_fmt == PIX_FMT_YUV411P && mb_x < (704 / 8)) { - data = y_ptr + (j * 8); + int dummy = 0; + if (s->sys->pix_fmt == PIX_FMT_YUV422P) { /* 4:2:2 */ + if (j == 0 || j == 2) { + /* Y0 Y1 */ + data = y_ptr + ((j>>1) * 8); + linesize = s->picture.linesize[0]; + } else if (j > 3) { + /* Cr Cb */ + data = s->picture.data[6 - j] + c_offset; + linesize = s->picture.linesize[6 - j]; } else { - data = y_ptr + ((j & 1) * 8) + ((j >> 1) * 8 * s->picture.linesize[0]); + /* j=1 and j=3 are "dummy" blocks, used for AC data only */ + data = 0; + linesize = 0; + dummy = 1; + } + } else { /* 4:1:1 or 4:2:0 */ + if (j < 4) { /* Four Y blocks */ + /* NOTE: at end of line, the macroblock is handled as 420 */ + if (s->sys->pix_fmt == PIX_FMT_YUV411P && mb_x < (704 / 8)) { + data = y_ptr + (j * 8); + } else { + data = y_ptr + ((j & 1) * 8) + ((j >> 1) * 8 * s->picture.linesize[0]); + } + linesize = s->picture.linesize[0]; + } else { /* Cr and Cb blocks */ + /* don't ask Fabrice why they inverted Cb and Cr ! */ + data = s->picture.data[6 - j] + c_offset; + linesize = s->picture.linesize[6 - j]; + if (s->sys->pix_fmt == PIX_FMT_YUV411P && mb_x >= (704 / 8)) + do_edge_wrap = 1; } - linesize = s->picture.linesize[0]; - } else { /* Cr and Cb blocks */ - /* don't ask Fabrice why they inverted Cb and Cr ! */ - data = s->picture.data[6 - j] + c_offset; - linesize = s->picture.linesize[6 - j]; - if (s->sys->pix_fmt == PIX_FMT_YUV411P && mb_x >= (704 / 8)) - do_edge_wrap = 1; } /* Everything is set up -- now just copy data -> DCT block */ @@ -831,7 +923,8 @@ static inline void dv_encode_video_segment(DVVideoContext *s, b += 8; } } else { /* Simple copy: 8x8 -> 8x8 */ - s->get_pixels(block, data, linesize); + if (!dummy) + s->get_pixels(block, data, linesize); } if(s->avctx->flags & CODEC_FLAG_INTERLACED_DCT) @@ -843,10 +936,18 @@ static inline void dv_encode_video_segment(DVVideoContext *s, enc_blk->partial_bit_buffer = 0; enc_blk->cur_ac = 0; - s->fdct[enc_blk->dct_mode](block); + if (dummy) { + /* We rely on the fact that encoding all zeros leads to an immediate EOB, + which is precisely what the spec calls for in the "dummy" blocks. */ + memset(block, 0, sizeof(block)); + } else { + s->fdct[enc_blk->dct_mode](block); + } dv_set_class_number(block, enc_blk, - enc_blk->dct_mode ? ff_zigzag248_direct : ff_zigzag_direct, j/4); + enc_blk->dct_mode ? ff_zigzag248_direct : ff_zigzag_direct, + enc_blk->dct_mode ? dv_weight_248 : dv_weight_88, + j/4); init_put_bits(pb, ptr, block_sizes[j]/8); put_bits(pb, 9, (uint16_t)(((enc_blk->mb[0] >> 3) - 1024 + 2) >> 2)); @@ -886,6 +987,8 @@ static inline void dv_encode_video_segment(DVVideoContext *s, for (j=0; j<5*6; j++) { if (enc_blks[j].partial_bit_count) pb=dv_encode_ac(&enc_blks[j], pb, &pbs[6*5]); + if (enc_blks[j].partial_bit_count) + av_log(NULL, AV_LOG_ERROR, "ac bitstream overflow\n"); } for (j=0; j<5*6; j++) @@ -896,7 +999,17 @@ static int dv_decode_mt(AVCodecContext *avctx, void* sl) { DVVideoContext *s = avctx->priv_data; int slice = (size_t)sl; - dv_decode_video_segment(s, &s->buf[((slice/27)*6+(slice/3)+slice*5+7)*80], + + /* which DIF channel is this? */ + int chan = slice / (s->sys->difseg_size * 27); + + /* slice within the DIF channel */ + int chan_slice = slice % (s->sys->difseg_size * 27); + + /* byte offset of this channel's data */ + int chan_offset = chan * s->sys->difseg_size * 150 * 80; + + dv_decode_video_segment(s, &s->buf[((chan_slice/27)*6+(chan_slice/3)+chan_slice*5+7)*80 + chan_offset], &s->sys->video_place[slice*5]); return 0; } @@ -905,13 +1018,23 @@ static int dv_encode_mt(AVCodecContext *avctx, void* sl) { DVVideoContext *s = avctx->priv_data; int slice = (size_t)sl; - dv_encode_video_segment(s, &s->buf[((slice/27)*6+(slice/3)+slice*5+7)*80], + + /* which DIF channel is this? */ + int chan = slice / (s->sys->difseg_size * 27); + + /* slice within the DIF channel */ + int chan_slice = slice % (s->sys->difseg_size * 27); + + /* byte offset of this channel's data */ + int chan_offset = chan * s->sys->difseg_size * 150 * 80; + + dv_encode_video_segment(s, &s->buf[((chan_slice/27)*6+(chan_slice/3)+chan_slice*5+7)*80 + chan_offset], &s->sys->video_place[slice*5]); return 0; } /* NOTE: exactly one frame must be given (120000 bytes for NTSC, - 144000 bytes for PAL) */ + 144000 bytes for PAL - or twice those for 50Mbps) */ static int dvvideo_decode_frame(AVCodecContext *avctx, void *data, int *data_size, uint8_t *buf, int buf_size) @@ -939,7 +1062,7 @@ static int dvvideo_decode_frame(AVCodecContext *avctx, s->buf = buf; avctx->execute(avctx, dv_decode_mt, (void**)&dv_anchor[0], NULL, - s->sys->difseg_size * 27); + s->sys->n_difchan * s->sys->difseg_size * 27); emms_c(); @@ -968,9 +1091,23 @@ static int dvvideo_encode_frame(AVCodecContext *c, uint8_t *buf, int buf_size, s->buf = buf; c->execute(c, dv_encode_mt, (void**)&dv_anchor[0], NULL, - s->sys->difseg_size * 27); + s->sys->n_difchan * s->sys->difseg_size * 27); emms_c(); + + /* Fill in just enough of the header for dv_frame_profile() to + return the correct result, so that the frame can be decoded + correctly. The rest of the metadata is filled in by the dvvideo + avformat. (this should probably change so that encode_frame() + fills in ALL of the metadata - e.g. for Quicktime-wrapped DV + streams) */ + + /* NTSC/PAL format */ + buf[3] = s->sys->dsf ? 0x80 : 0x00; + + /* 25Mbps or 50Mbps */ + buf[80*5 + 48 + 3] = (s->sys->pix_fmt == PIX_FMT_YUV422P) ? 0x4 : 0x0; + return s->sys->frame_size; } |