diff options
Diffstat (limited to 'src/libffmpeg/libavcodec/h264.c')
-rw-r--r-- | src/libffmpeg/libavcodec/h264.c | 471 |
1 files changed, 312 insertions, 159 deletions
diff --git a/src/libffmpeg/libavcodec/h264.c b/src/libffmpeg/libavcodec/h264.c index 1a7fb76b4..ad23ae120 100644 --- a/src/libffmpeg/libavcodec/h264.c +++ b/src/libffmpeg/libavcodec/h264.c @@ -2,18 +2,20 @@ * H.26L/H.264/AVC/JVT/14496-10/... encoder/decoder * Copyright (c) 2003 Michael Niedermayer <michaelni@gmx.at> * - * This library is free software; you can redistribute it and/or + * This file is part of FFmpeg. + * + * FFmpeg is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either - * version 2 of the License, or (at your option) any later version. + * version 2.1 of the License, or (at your option) any later version. * - * This library is distributed in the hope that it will be useful, + * FFmpeg is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public - * License along with this library; if not, write to the Free Software + * License along with FFmpeg; if not, write to the Free Software * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA * */ @@ -371,6 +373,7 @@ typedef struct H264Context{ /* 0x100 -> non null luma_dc, 0x80/0x40 -> non null chroma_dc (cb/cr), 0x?0 -> chroma_cbp(0,1,2), 0x0? luma_cbp */ uint16_t *cbp_table; + int cbp; int top_cbp; int left_cbp; /* chroma_pred_mode for i4x4 or i16x16, else 0 */ @@ -409,6 +412,7 @@ static VLC run7_vlc; static void svq3_luma_dc_dequant_idct_c(DCTELEM *block, int qp); static void svq3_add_idct_c(uint8_t *dst, DCTELEM *block, int stride, int qp, int dc); static void filter_mb( H264Context *h, int mb_x, int mb_y, uint8_t *img_y, uint8_t *img_cb, uint8_t *img_cr, unsigned int linesize, unsigned int uvlinesize); +static void filter_mb_fast( H264Context *h, int mb_x, int mb_y, uint8_t *img_y, uint8_t *img_cb, uint8_t *img_cr, unsigned int linesize, unsigned int uvlinesize); static always_inline uint32_t pack16to32(int a, int b){ #ifdef WORDS_BIGENDIAN @@ -617,7 +621,7 @@ static void fill_caches(H264Context *h, int mb_type, int for_deblock){ if(USES_LIST(mb_type,list)){ uint32_t *src = (uint32_t*)s->current_picture.motion_val[list][h->mb2b_xy[mb_xy]]; uint32_t *dst = (uint32_t*)h->mv_cache[list][scan8[0]]; - uint8_t *ref = &s->current_picture.ref_index[list][h->mb2b8_xy[mb_xy]]; + int8_t *ref = &s->current_picture.ref_index[list][h->mb2b8_xy[mb_xy]]; for(i=0; i<4; i++, dst+=8, src+=h->b_stride){ dst[0] = src[0]; dst[1] = src[1]; @@ -1131,7 +1135,7 @@ static inline int fetch_diagonal_mv(H264Context *h, const int16_t **C, int i, in * make mbaff happy, so we can't move all this logic to fill_caches */ if(FRAME_MBAFF){ MpegEncContext *s = &h->s; - const int *mb_types = s->current_picture_ptr->mb_type; + const uint32_t *mb_types = s->current_picture_ptr->mb_type; const int16_t *mv; *(uint32_t*)h->mv_cache[list][scan8[0]-2] = 0; *C = h->mv_cache[list][scan8[0]-2]; @@ -1339,7 +1343,7 @@ static inline void direct_dist_scale_factor(H264Context * const h){ h->dist_scale_factor[i] = 256; }else{ int tb = clip(poc - poc0, -128, 127); - int tx = (16384 + (ABS(td) >> 1)) / td; + int tx = (16384 + (FFABS(td) >> 1)) / td; h->dist_scale_factor[i] = clip((tb*tx + 32) >> 6, -1024, 1023); } } @@ -1470,8 +1474,8 @@ static inline void pred_direct_motion(H264Context * const h, int *mb_type){ fill_rectangle(&h->ref_cache[0][scan8[0]], 4, 4, 8, (uint8_t)ref[0], 1); fill_rectangle(&h->ref_cache[1][scan8[0]], 4, 4, 8, (uint8_t)ref[1], 1); if(!IS_INTRA(mb_type_col) - && ( (l1ref0[0] == 0 && ABS(l1mv0[0][0]) <= 1 && ABS(l1mv0[0][1]) <= 1) - || (l1ref0[0] < 0 && l1ref1[0] == 0 && ABS(l1mv1[0][0]) <= 1 && ABS(l1mv1[0][1]) <= 1 + && ( (l1ref0[0] == 0 && FFABS(l1mv0[0][0]) <= 1 && FFABS(l1mv0[0][1]) <= 1) + || (l1ref0[0] < 0 && l1ref1[0] == 0 && FFABS(l1mv1[0][0]) <= 1 && FFABS(l1mv1[0][1]) <= 1 && (h->x264_build>33 || !h->x264_build)))){ if(ref[0] > 0) fill_rectangle(&h->mv_cache[0][scan8[0]], 4, 4, 8, pack16to32(mv[0][0],mv[0][1]), 4); @@ -1506,7 +1510,7 @@ static inline void pred_direct_motion(H264Context * const h, int *mb_type){ const int16_t (*l1mv)[2]= l1ref0[x8 + y8*h->b8_stride] == 0 ? l1mv0 : l1mv1; if(IS_SUB_8X8(sub_mb_type)){ const int16_t *mv_col = l1mv[x8*3 + y8*3*h->b_stride]; - if(ABS(mv_col[0]) <= 1 && ABS(mv_col[1]) <= 1){ + if(FFABS(mv_col[0]) <= 1 && FFABS(mv_col[1]) <= 1){ if(ref[0] == 0) fill_rectangle(&h->mv_cache[0][scan8[i8*4]], 2, 2, 8, 0, 4); if(ref[1] == 0) @@ -1515,7 +1519,7 @@ static inline void pred_direct_motion(H264Context * const h, int *mb_type){ }else for(i4=0; i4<4; i4++){ const int16_t *mv_col = l1mv[x8*2 + (i4&1) + (y8*2 + (i4>>1))*h->b_stride]; - if(ABS(mv_col[0]) <= 1 && ABS(mv_col[1]) <= 1){ + if(FFABS(mv_col[0]) <= 1 && FFABS(mv_col[1]) <= 1){ if(ref[0] == 0) *(uint32_t*)h->mv_cache[0][scan8[i8*4+i4]] = 0; if(ref[1] == 0) @@ -1712,6 +1716,9 @@ static inline void write_back_motion(H264Context *h, int mb_type){ *(uint64_t*)s->current_picture.motion_val[list][b_xy + 2 + y*h->b_stride]= *(uint64_t*)h->mv_cache[list][scan8[0]+2 + 8*y]; } if( h->pps.cabac ) { + if(IS_SKIP(mb_type)) + fill_rectangle(h->mvd_table[list][b_xy], 4, 4, h->b_stride, 0, 4); + else for(y=0; y<4; y++){ *(uint64_t*)h->mvd_table[list][b_xy + 0 + y*h->b_stride]= *(uint64_t*)h->mvd_cache[list][scan8[0]+0 + 8*y]; *(uint64_t*)h->mvd_table[list][b_xy + 2 + y*h->b_stride]= *(uint64_t*)h->mvd_cache[list][scan8[0]+2 + 8*y]; @@ -1719,7 +1726,7 @@ static inline void write_back_motion(H264Context *h, int mb_type){ } { - uint8_t *ref_index = &s->current_picture.ref_index[list][b8_xy]; + int8_t *ref_index = &s->current_picture.ref_index[list][b8_xy]; ref_index[0+0*h->b8_stride]= h->ref_cache[list][scan8[0]]; ref_index[1+0*h->b8_stride]= h->ref_cache[list][scan8[4]]; ref_index[0+1*h->b8_stride]= h->ref_cache[list][scan8[8]]; @@ -2444,7 +2451,7 @@ static void pred16x16_128_dc_c(uint8_t *src, int stride){ static inline void pred16x16_plane_compat_c(uint8_t *src, int stride, const int svq3){ int i, j, k; int a; - uint8_t *cm = cropTbl + MAX_NEG_CROP; + uint8_t *cm = ff_cropTbl + MAX_NEG_CROP; const uint8_t * const src0 = src+7-stride; const uint8_t *src1 = src+8*stride-1; const uint8_t *src2 = src1-2*stride; // == src+6*stride-1; @@ -2587,7 +2594,7 @@ static void pred8x8_dc_c(uint8_t *src, int stride){ static void pred8x8_plane_c(uint8_t *src, int stride){ int j, k; int a; - uint8_t *cm = cropTbl + MAX_NEG_CROP; + uint8_t *cm = ff_cropTbl + MAX_NEG_CROP; const uint8_t * const src0 = src+3-stride; const uint8_t *src1 = src+4*stride-1; const uint8_t *src2 = src1-2*stride; // == src+2*stride-1; @@ -3142,7 +3149,7 @@ static void hl_motion(H264Context *h, uint8_t *dest_y, uint8_t *dest_cb, uint8_t prefetch_motion(h, 1); } -static void decode_init_vlc(H264Context *h){ +static void decode_init_vlc(){ static int done = 0; if (!done) { @@ -3399,7 +3406,7 @@ static int decode_init(AVCodecContext *avctx){ s->low_delay= 1; avctx->pix_fmt= PIX_FMT_YUV420P; - decode_init_vlc(h); + decode_init_vlc(); if(avctx->extradata_size > 0 && avctx->extradata && *(char *)avctx->extradata == 1){ @@ -3632,6 +3639,9 @@ static void hl_decode_mb(H264Context *h){ dest_cb = s->current_picture.data[1] + (mb_y * 8 * s->uvlinesize) + mb_x * 8; dest_cr = s->current_picture.data[2] + (mb_y * 8 * s->uvlinesize) + mb_x * 8; + s->dsp.prefetch(dest_y + (s->mb_x&3)*4*s->linesize + 64, s->linesize, 4); + s->dsp.prefetch(dest_cb + (s->mb_x&7)*s->uvlinesize + 64, dest_cr - dest_cb, 2); + if (MB_FIELD) { linesize = h->mb_linesize = s->linesize * 2; uvlinesize = h->mb_uvlinesize = s->uvlinesize * 2; @@ -3780,8 +3790,8 @@ static void hl_decode_mb(H264Context *h){ xchg_mb_border(h, dest_y, dest_cb, dest_cr, linesize, uvlinesize, 0); }else if(s->codec_id == CODEC_ID_H264){ hl_motion(h, dest_y, dest_cb, dest_cr, - s->dsp.put_h264_qpel_pixels_tab, s->dsp.put_h264_chroma_pixels_tab, - s->dsp.avg_h264_qpel_pixels_tab, s->dsp.avg_h264_chroma_pixels_tab, + s->me.qpel_put, s->dsp.put_h264_chroma_pixels_tab, + s->me.qpel_avg, s->dsp.avg_h264_chroma_pixels_tab, s->dsp.weight_h264_pixels_tab, s->dsp.biweight_h264_pixels_tab); } @@ -3879,7 +3889,7 @@ static void hl_decode_mb(H264Context *h){ tprintf("call filter_mb\n"); backup_mb_border(h, dest_y, dest_cb, dest_cr, linesize, uvlinesize); fill_caches(h, mb_type, 1); //FIXME don't fill stuff which isn't used by filter_mb - filter_mb(h, mb_x, mb_y, dest_y, dest_cb, dest_cr, linesize, uvlinesize); + filter_mb_fast(h, mb_x, mb_y, dest_y, dest_cb, dest_cr, linesize, uvlinesize); } } } @@ -4203,7 +4213,7 @@ static void implicit_weight_table(H264Context *h){ int td = clip(poc1 - poc0, -128, 127); if(td){ int tb = clip(cur_poc - poc0, -128, 127); - int tx = (16384 + (ABS(td) >> 1)) / td; + int tx = (16384 + (FFABS(td) >> 1)) / td; int dist_scale_factor = clip((tb*tx + 32) >> 6, -1024, 1023) >> 2; if(dist_scale_factor < -64 || dist_scale_factor > 128) h->implicit_weight[ref0][ref1] = 32; @@ -4883,6 +4893,14 @@ static int decode_slice_header(H264Context *h){ ); } + if((s->avctx->flags2 & CODEC_FLAG2_FAST) && !s->current_picture.reference){ + s->me.qpel_put= s->dsp.put_2tap_qpel_pixels_tab; + s->me.qpel_avg= s->dsp.avg_2tap_qpel_pixels_tab; + }else{ + s->me.qpel_put= s->dsp.put_h264_qpel_pixels_tab; + s->me.qpel_avg= s->dsp.avg_h264_qpel_pixels_tab; + } + return 0; } @@ -5100,10 +5118,7 @@ static void decode_mb_skip(H264Context *h){ fill_caches(h, mb_type, 0); //FIXME check what is needed and what not ... pred_direct_motion(h, &mb_type); - if(h->pps.cabac){ - fill_rectangle(h->mvd_cache[0][scan8[0]], 4, 4, 8, 0, 4); - fill_rectangle(h->mvd_cache[1][scan8[0]], 4, 4, 8, 0, 4); - } + mb_type|= MB_TYPE_SKIP; } else { @@ -5114,12 +5129,10 @@ static void decode_mb_skip(H264Context *h){ pred_pskip_motion(h, &mx, &my); fill_rectangle(&h->ref_cache[0][scan8[0]], 4, 4, 8, 0, 1); fill_rectangle( h->mv_cache[0][scan8[0]], 4, 4, 8, pack16to32(mx,my), 4); - if(h->pps.cabac) - fill_rectangle(h->mvd_cache[0][scan8[0]], 4, 4, 8, 0, 4); } write_back_motion(h, mb_type); - s->current_picture.mb_type[mb_xy]= mb_type|MB_TYPE_SKIP; + s->current_picture.mb_type[mb_xy]= mb_type; s->current_picture.qscale_table[mb_xy]= s->qscale; h->slice_table[ mb_xy ]= h->slice_num; h->prev_mb_skipped= 1; @@ -5184,7 +5197,7 @@ static int decode_mb_cavlc(H264Context *h){ assert(h->slice_type == I_TYPE); decode_intra_mb: if(mb_type > 25){ - av_log(h->s.avctx, AV_LOG_ERROR, "mb_type %d in %c slice to large at %d %d\n", mb_type, av_get_pict_type_char(h->slice_type), s->mb_x, s->mb_y); + av_log(h->s.avctx, AV_LOG_ERROR, "mb_type %d in %c slice too large at %d %d\n", mb_type, av_get_pict_type_char(h->slice_type), s->mb_x, s->mb_y); return -1; } partition_count=0; @@ -5478,6 +5491,7 @@ decode_intra_mb: else cbp= golomb_to_inter_cbp[cbp]; } + h->cbp = cbp; if(dct8x8_allowed && (cbp&15) && !IS_INTRA(mb_type)){ if(get_bits1(&s->gb)) @@ -5619,7 +5633,7 @@ static int decode_cabac_field_decoding_flag(H264Context *h) { ctx += 1; } - return get_cabac( &h->cabac, &h->cabac_state[70 + ctx] ); + return get_cabac_noinline( &h->cabac, &h->cabac_state[70 + ctx] ); } static int decode_cabac_intra_mb_type(H264Context *h, int ctx_base, int intra_slice) { @@ -5635,11 +5649,11 @@ static int decode_cabac_intra_mb_type(H264Context *h, int ctx_base, int intra_sl ctx++; if( h->slice_table[mbb_xy] == h->slice_num && !IS_INTRA4x4( s->current_picture.mb_type[mbb_xy] ) ) ctx++; - if( get_cabac( &h->cabac, &state[ctx] ) == 0 ) + if( get_cabac_noinline( &h->cabac, &state[ctx] ) == 0 ) return 0; /* I4x4 */ state += 2; }else{ - if( get_cabac( &h->cabac, &state[0] ) == 0 ) + if( get_cabac_noinline( &h->cabac, &state[0] ) == 0 ) return 0; /* I4x4 */ } @@ -5647,11 +5661,11 @@ static int decode_cabac_intra_mb_type(H264Context *h, int ctx_base, int intra_sl return 25; /* PCM */ mb_type = 1; /* I16x16 */ - mb_type += 12 * get_cabac( &h->cabac, &state[1] ); /* cbp_luma != 0 */ - if( get_cabac( &h->cabac, &state[2] ) ) /* cbp_chroma */ - mb_type += 4 + 4 * get_cabac( &h->cabac, &state[2+intra_slice] ); - mb_type += 2 * get_cabac( &h->cabac, &state[3+intra_slice] ); - mb_type += 1 * get_cabac( &h->cabac, &state[3+2*intra_slice] ); + mb_type += 12 * get_cabac_noinline( &h->cabac, &state[1] ); /* cbp_luma != 0 */ + if( get_cabac_noinline( &h->cabac, &state[2] ) ) /* cbp_chroma */ + mb_type += 4 + 4 * get_cabac_noinline( &h->cabac, &state[2+intra_slice] ); + mb_type += 2 * get_cabac_noinline( &h->cabac, &state[3+intra_slice] ); + mb_type += 1 * get_cabac_noinline( &h->cabac, &state[3+2*intra_slice] ); return mb_type; } @@ -5661,14 +5675,14 @@ static int decode_cabac_mb_type( H264Context *h ) { if( h->slice_type == I_TYPE ) { return decode_cabac_intra_mb_type(h, 3, 1); } else if( h->slice_type == P_TYPE ) { - if( get_cabac( &h->cabac, &h->cabac_state[14] ) == 0 ) { + if( get_cabac_noinline( &h->cabac, &h->cabac_state[14] ) == 0 ) { /* P-type */ - if( get_cabac( &h->cabac, &h->cabac_state[15] ) == 0 ) { + if( get_cabac_noinline( &h->cabac, &h->cabac_state[15] ) == 0 ) { /* P_L0_D16x16, P_8x8 */ - return 3 * get_cabac( &h->cabac, &h->cabac_state[16] ); + return 3 * get_cabac_noinline( &h->cabac, &h->cabac_state[16] ); } else { /* P_L0_D8x16, P_L0_D16x8 */ - return 2 - get_cabac( &h->cabac, &h->cabac_state[17] ); + return 2 - get_cabac_noinline( &h->cabac, &h->cabac_state[17] ); } } else { return decode_cabac_intra_mb_type(h, 17, 0) + 5; @@ -5684,17 +5698,17 @@ static int decode_cabac_mb_type( H264Context *h ) { if( h->slice_table[mbb_xy] == h->slice_num && !IS_DIRECT( s->current_picture.mb_type[mbb_xy] ) ) ctx++; - if( !get_cabac( &h->cabac, &h->cabac_state[27+ctx] ) ) + if( !get_cabac_noinline( &h->cabac, &h->cabac_state[27+ctx] ) ) return 0; /* B_Direct_16x16 */ - if( !get_cabac( &h->cabac, &h->cabac_state[27+3] ) ) { - return 1 + get_cabac( &h->cabac, &h->cabac_state[27+5] ); /* B_L[01]_16x16 */ + if( !get_cabac_noinline( &h->cabac, &h->cabac_state[27+3] ) ) { + return 1 + get_cabac_noinline( &h->cabac, &h->cabac_state[27+5] ); /* B_L[01]_16x16 */ } - bits = get_cabac( &h->cabac, &h->cabac_state[27+4] ) << 3; - bits|= get_cabac( &h->cabac, &h->cabac_state[27+5] ) << 2; - bits|= get_cabac( &h->cabac, &h->cabac_state[27+5] ) << 1; - bits|= get_cabac( &h->cabac, &h->cabac_state[27+5] ); + bits = get_cabac_noinline( &h->cabac, &h->cabac_state[27+4] ) << 3; + bits|= get_cabac_noinline( &h->cabac, &h->cabac_state[27+5] ) << 2; + bits|= get_cabac_noinline( &h->cabac, &h->cabac_state[27+5] ) << 1; + bits|= get_cabac_noinline( &h->cabac, &h->cabac_state[27+5] ); if( bits < 8 ) return bits + 3; /* B_Bi_16x16 through B_L1_L0_16x8 */ else if( bits == 13 ) { @@ -5704,7 +5718,7 @@ static int decode_cabac_mb_type( H264Context *h ) { else if( bits == 15 ) return 22; /* B_8x8 */ - bits= ( bits<<1 ) | get_cabac( &h->cabac, &h->cabac_state[27+5] ); + bits= ( bits<<1 ) | get_cabac_noinline( &h->cabac, &h->cabac_state[27+5] ); return bits - 4; /* B_L0_Bi_* through B_Bi_Bi_* */ } else { /* TODO SI/SP frames? */ @@ -5745,7 +5759,7 @@ static int decode_cabac_mb_skip( H264Context *h, int mb_x, int mb_y ) { if( h->slice_type == B_TYPE ) ctx += 13; - return get_cabac( &h->cabac, &h->cabac_state[11+ctx] ); + return get_cabac_noinline( &h->cabac, &h->cabac_state[11+ctx] ); } static int decode_cabac_mb_intra4x4_pred_mode( H264Context *h, int pred_mode ) { @@ -5777,12 +5791,12 @@ static int decode_cabac_mb_chroma_pre_mode( H264Context *h) { if( h->slice_table[mbb_xy] == h->slice_num && h->chroma_pred_mode_table[mbb_xy] != 0 ) ctx++; - if( get_cabac( &h->cabac, &h->cabac_state[64+ctx] ) == 0 ) + if( get_cabac_noinline( &h->cabac, &h->cabac_state[64+ctx] ) == 0 ) return 0; - if( get_cabac( &h->cabac, &h->cabac_state[64+3] ) == 0 ) + if( get_cabac_noinline( &h->cabac, &h->cabac_state[64+3] ) == 0 ) return 1; - if( get_cabac( &h->cabac, &h->cabac_state[64+3] ) == 0 ) + if( get_cabac_noinline( &h->cabac, &h->cabac_state[64+3] ) == 0 ) return 2; else return 3; @@ -5859,13 +5873,13 @@ static int decode_cabac_mb_cbp_chroma( H264Context *h) { ctx = 0; if( cbp_a > 0 ) ctx++; if( cbp_b > 0 ) ctx += 2; - if( get_cabac( &h->cabac, &h->cabac_state[77 + ctx] ) == 0 ) + if( get_cabac_noinline( &h->cabac, &h->cabac_state[77 + ctx] ) == 0 ) return 0; ctx = 4; if( cbp_a == 2 ) ctx++; if( cbp_b == 2 ) ctx += 2; - return 1 + get_cabac( &h->cabac, &h->cabac_state[77 + ctx] ); + return 1 + get_cabac_noinline( &h->cabac, &h->cabac_state[77 + ctx] ); } static int decode_cabac_mb_dqp( H264Context *h) { MpegEncContext * const s = &h->s; @@ -5881,7 +5895,7 @@ static int decode_cabac_mb_dqp( H264Context *h) { if( h->last_qscale_diff != 0 ) ctx++; - while( get_cabac( &h->cabac, &h->cabac_state[60 + ctx] ) ) { + while( get_cabac_noinline( &h->cabac, &h->cabac_state[60 + ctx] ) ) { if( ctx < 2 ) ctx = 2; else @@ -5923,7 +5937,7 @@ static int decode_cabac_b_mb_sub_type( H264Context *h ) { } static inline int decode_cabac_mb_transform_size( H264Context *h ) { - return get_cabac( &h->cabac, &h->cabac_state[399 + h->neighbor_transform_size] ); + return get_cabac_noinline( &h->cabac, &h->cabac_state[399 + h->neighbor_transform_size] ); } static int decode_cabac_mb_ref( H264Context *h, int list, int n ) { @@ -5989,8 +6003,7 @@ static int decode_cabac_mb_mvd( H264Context *h, int list, int n, int l ) { mvd += 1 << k; } } - if( get_cabac_bypass( &h->cabac ) ) return -mvd; - else return mvd; + return get_cabac_bypass_sign( &h->cabac, -mvd ); } static int inline get_cabac_cbf_ctx( H264Context *h, int cat, int idx ) { @@ -6021,6 +6034,13 @@ static int inline get_cabac_cbf_ctx( H264Context *h, int cat, int idx ) { return ctx + 4 * cat; } +static const __attribute((used)) uint8_t last_coeff_flag_offset_8x8[63] = { + 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 3, 3, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4, + 5, 5, 5, 5, 6, 6, 6, 6, 7, 7, 7, 7, 8, 8, 8 +}; + static int decode_cabac_residual( H264Context *h, DCTELEM *block, int cat, int n, const uint8_t *scantable, const uint32_t *qmul, int max_coeff) { const int mb_xy = h->s.mb_x + h->s.mb_y*h->s.mb_stride; static const int significant_coeff_flag_offset[2][6] = { @@ -6034,7 +6054,7 @@ static int decode_cabac_residual( H264Context *h, DCTELEM *block, int cat, int n static const int coeff_abs_level_m1_offset[6] = { 227+0, 227+10, 227+20, 227+30, 227+39, 426 }; - static const int significant_coeff_flag_offset_8x8[2][63] = { + static const uint8_t significant_coeff_flag_offset_8x8[2][63] = { { 0, 1, 2, 3, 4, 5, 5, 4, 4, 3, 3, 4, 4, 4, 5, 5, 4, 4, 4, 4, 3, 3, 6, 7, 7, 7, 8, 9,10, 9, 8, 7, 7, 6,11,12,13,11, 6, 7, 8, 9,14,10, 9, 8, 6,11, @@ -6044,16 +6064,10 @@ static int decode_cabac_residual( H264Context *h, DCTELEM *block, int cat, int n 9, 9,10,10, 8,11,12,11, 9, 9,10,10, 8,13,13, 9, 9,10,10, 8,13,13, 9, 9,10,10,14,14,14,14,14 } }; - static const int last_coeff_flag_offset_8x8[63] = { - 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, - 3, 3, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4, - 5, 5, 5, 5, 6, 6, 6, 6, 7, 7, 7, 7, 8, 8, 8 - }; int index[64]; - int i, last; + int last; int coeff_count = 0; int abslevel1 = 1; @@ -6063,6 +6077,20 @@ static int decode_cabac_residual( H264Context *h, DCTELEM *block, int cat, int n uint8_t *last_coeff_ctx_base; uint8_t *abs_level_m1_ctx_base; +#ifndef ARCH_X86 +#define CABAC_ON_STACK +#endif +#ifdef CABAC_ON_STACK +#define CC &cc + CABACContext cc; + cc.range = h->cabac.range; + cc.low = h->cabac.low; + cc.bytestream= h->cabac.bytestream; +#else +#define CC &h->cabac +#endif + + /* cat: 0-> DC 16x16 n = 0 * 1-> AC 16x16 n = luma4x4idx * 2-> Luma4x4 n = luma4x4idx @@ -6073,12 +6101,16 @@ static int decode_cabac_residual( H264Context *h, DCTELEM *block, int cat, int n /* read coded block flag */ if( cat != 5 ) { - if( get_cabac( &h->cabac, &h->cabac_state[85 + get_cabac_cbf_ctx( h, cat, n ) ] ) == 0 ) { + if( get_cabac( CC, &h->cabac_state[85 + get_cabac_cbf_ctx( h, cat, n ) ] ) == 0 ) { if( cat == 1 || cat == 2 ) h->non_zero_count_cache[scan8[n]] = 0; else if( cat == 4 ) h->non_zero_count_cache[scan8[16+n]] = 0; - +#ifdef CABAC_ON_STACK + h->cabac.range = cc.range ; + h->cabac.low = cc.low ; + h->cabac.bytestream= cc.bytestream; +#endif return 0; } } @@ -6094,22 +6126,28 @@ static int decode_cabac_residual( H264Context *h, DCTELEM *block, int cat, int n #define DECODE_SIGNIFICANCE( coefs, sig_off, last_off ) \ for(last= 0; last < coefs; last++) { \ uint8_t *sig_ctx = significant_coeff_ctx_base + sig_off; \ - if( get_cabac( &h->cabac, sig_ctx )) { \ + if( get_cabac( CC, sig_ctx )) { \ uint8_t *last_ctx = last_coeff_ctx_base + last_off; \ index[coeff_count++] = last; \ - if( get_cabac( &h->cabac, last_ctx ) ) { \ + if( get_cabac( CC, last_ctx ) ) { \ last= max_coeff; \ break; \ } \ } \ + }\ + if( last == max_coeff -1 ) {\ + index[coeff_count++] = last;\ } - const int *sig_off = significant_coeff_flag_offset_8x8[MB_FIELD]; + const uint8_t *sig_off = significant_coeff_flag_offset_8x8[MB_FIELD]; +#if defined(ARCH_X86) && !(defined(PIC) && defined(__GNUC__)) + coeff_count= decode_significance_8x8_x86(CC, significant_coeff_ctx_base, index, sig_off); + } else { + coeff_count= decode_significance_x86(CC, max_coeff, significant_coeff_ctx_base, index); +#else DECODE_SIGNIFICANCE( 63, sig_off[last], last_coeff_flag_offset_8x8[last] ); } else { DECODE_SIGNIFICANCE( max_coeff - 1, last, last ); - } - if( last == max_coeff -1 ) { - index[coeff_count++] = last; +#endif } assert(coeff_count > 0); @@ -6126,51 +6164,54 @@ static int decode_cabac_residual( H264Context *h, DCTELEM *block, int cat, int n fill_rectangle(&h->non_zero_count_cache[scan8[n]], 2, 2, 8, coeff_count, 1); } - for( i = coeff_count - 1; i >= 0; i-- ) { + for( coeff_count--; coeff_count >= 0; coeff_count-- ) { uint8_t *ctx = (abslevelgt1 != 0 ? 0 : FFMIN( 4, abslevel1 )) + abs_level_m1_ctx_base; - int j= scantable[index[i]]; + int j= scantable[index[coeff_count]]; - if( get_cabac( &h->cabac, ctx ) == 0 ) { + if( get_cabac( CC, ctx ) == 0 ) { if( !qmul ) { - if( get_cabac_bypass( &h->cabac ) ) block[j] = -1; - else block[j] = 1; + block[j] = get_cabac_bypass_sign( CC, -1); }else{ - if( get_cabac_bypass( &h->cabac ) ) block[j] = (-qmul[j] + 32) >> 6; - else block[j] = ( qmul[j] + 32) >> 6; + block[j] = (get_cabac_bypass_sign( CC, -qmul[j]) + 32) >> 6;; } abslevel1++; } else { int coeff_abs = 2; ctx = 5 + FFMIN( 4, abslevelgt1 ) + abs_level_m1_ctx_base; - while( coeff_abs < 15 && get_cabac( &h->cabac, ctx ) ) { + while( coeff_abs < 15 && get_cabac( CC, ctx ) ) { coeff_abs++; } if( coeff_abs >= 15 ) { int j = 0; - while( get_cabac_bypass( &h->cabac ) ) { - coeff_abs += 1 << j; + while( get_cabac_bypass( CC ) ) { j++; } + coeff_abs=1; while( j-- ) { - if( get_cabac_bypass( &h->cabac ) ) - coeff_abs += 1 << j ; + coeff_abs += coeff_abs + get_cabac_bypass( CC ); } + coeff_abs+= 14; } if( !qmul ) { - if( get_cabac_bypass( &h->cabac ) ) block[j] = -coeff_abs; + if( get_cabac_bypass( CC ) ) block[j] = -coeff_abs; else block[j] = coeff_abs; }else{ - if( get_cabac_bypass( &h->cabac ) ) block[j] = (-coeff_abs * qmul[j] + 32) >> 6; + if( get_cabac_bypass( CC ) ) block[j] = (-coeff_abs * qmul[j] + 32) >> 6; else block[j] = ( coeff_abs * qmul[j] + 32) >> 6; } abslevelgt1++; } } +#ifdef CABAC_ON_STACK + h->cabac.range = cc.range ; + h->cabac.low = cc.low ; + h->cabac.bytestream= cc.bytestream; +#endif return 0; } @@ -6580,7 +6621,7 @@ decode_intra_mb: cbp |= decode_cabac_mb_cbp_chroma( h ) << 4; } - h->cbp_table[mb_xy] = cbp; + h->cbp_table[mb_xy] = h->cbp = cbp; if( dct8x8_allowed && (cbp&15) && !IS_INTRA( mb_type ) ) { if( decode_cabac_mb_transform_size( h ) ) @@ -6640,8 +6681,10 @@ decode_intra_mb: for( i4x4 = 0; i4x4 < 4; i4x4++ ) { const int index = 4*i8x8 + i4x4; //av_log( s->avctx, AV_LOG_ERROR, "Luma4x4: %d\n", index ); +//START_TIMER if( decode_cabac_residual(h, h->mb + 16*index, 2, index, scan, h->dequant4_coeff[IS_INTRA( mb_type ) ? 0:3][s->qscale], 16) < 0 ) return -1; +//STOP_TIMER("decode_residual") } } else { uint8_t * const nnz= &h->non_zero_count_cache[ scan8[4*i8x8] ]; @@ -6694,16 +6737,16 @@ decode_intra_mb: } -static void filter_mb_edgev( H264Context *h, uint8_t *pix, int stride, int bS[4], int qp ) { +static void filter_mb_edgev( H264Context *h, uint8_t *pix, int stride, int16_t bS[4], int qp ) { int i, d; - const int index_a = clip( qp + h->slice_alpha_c0_offset, 0, 51 ); - const int alpha = alpha_table[index_a]; - const int beta = beta_table[clip( qp + h->slice_beta_offset, 0, 51 )]; + const int index_a = qp + h->slice_alpha_c0_offset; + const int alpha = (alpha_table+52)[index_a]; + const int beta = (beta_table+52)[qp + h->slice_beta_offset]; if( bS[0] < 4 ) { int8_t tc[4]; for(i=0; i<4; i++) - tc[i] = bS[i] ? tc0_table[index_a][bS[i] - 1] : -1; + tc[i] = bS[i] ? (tc0_table+52)[index_a][bS[i] - 1] : -1; h->s.dsp.h264_h_loop_filter_luma(pix, stride, alpha, beta, tc); } else { /* 16px edge length, because bS=4 is triggered by being at @@ -6717,12 +6760,12 @@ static void filter_mb_edgev( H264Context *h, uint8_t *pix, int stride, int bS[4] const int q1 = pix[1]; const int q2 = pix[2]; - if( ABS( p0 - q0 ) < alpha && - ABS( p1 - p0 ) < beta && - ABS( q1 - q0 ) < beta ) { + if( FFABS( p0 - q0 ) < alpha && + FFABS( p1 - p0 ) < beta && + FFABS( q1 - q0 ) < beta ) { - if(ABS( p0 - q0 ) < (( alpha >> 2 ) + 2 )){ - if( ABS( p2 - p0 ) < beta) + if(FFABS( p0 - q0 ) < (( alpha >> 2 ) + 2 )){ + if( FFABS( p2 - p0 ) < beta) { const int p3 = pix[-4]; /* p0', p1', p2' */ @@ -6733,7 +6776,7 @@ static void filter_mb_edgev( H264Context *h, uint8_t *pix, int stride, int bS[4] /* p0' */ pix[-1] = ( 2*p1 + p0 + q1 + 2 ) >> 2; } - if( ABS( q2 - q0 ) < beta) + if( FFABS( q2 - q0 ) < beta) { const int q3 = pix[3]; /* q0', q1', q2' */ @@ -6755,23 +6798,23 @@ static void filter_mb_edgev( H264Context *h, uint8_t *pix, int stride, int bS[4] } } } -static void filter_mb_edgecv( H264Context *h, uint8_t *pix, int stride, int bS[4], int qp ) { +static void filter_mb_edgecv( H264Context *h, uint8_t *pix, int stride, int16_t bS[4], int qp ) { int i; - const int index_a = clip( qp + h->slice_alpha_c0_offset, 0, 51 ); - const int alpha = alpha_table[index_a]; - const int beta = beta_table[clip( qp + h->slice_beta_offset, 0, 51 )]; + const int index_a = qp + h->slice_alpha_c0_offset; + const int alpha = (alpha_table+52)[index_a]; + const int beta = (beta_table+52)[qp + h->slice_beta_offset]; if( bS[0] < 4 ) { int8_t tc[4]; for(i=0; i<4; i++) - tc[i] = bS[i] ? tc0_table[index_a][bS[i] - 1] + 1 : 0; + tc[i] = bS[i] ? (tc0_table+52)[index_a][bS[i] - 1] + 1 : 0; h->s.dsp.h264_h_loop_filter_chroma(pix, stride, alpha, beta, tc); } else { h->s.dsp.h264_h_loop_filter_chroma_intra(pix, stride, alpha, beta); } } -static void filter_mb_mbaff_edgev( H264Context *h, uint8_t *pix, int stride, int bS[8], int qp[2] ) { +static void filter_mb_mbaff_edgev( H264Context *h, uint8_t *pix, int stride, int16_t bS[8], int qp[2] ) { int i; for( i = 0; i < 16; i++, pix += stride) { int index_a; @@ -6790,12 +6833,12 @@ static void filter_mb_mbaff_edgev( H264Context *h, uint8_t *pix, int stride, int } qp_index = MB_FIELD ? (i >> 3) : (i & 1); - index_a = clip( qp[qp_index] + h->slice_alpha_c0_offset, 0, 51 ); - alpha = alpha_table[index_a]; - beta = beta_table[clip( qp[qp_index] + h->slice_beta_offset, 0, 51 )]; + index_a = qp[qp_index] + h->slice_alpha_c0_offset; + alpha = (alpha_table+52)[index_a]; + beta = (beta_table+52)[qp[qp_index] + h->slice_beta_offset]; if( bS[bS_index] < 4 ) { - const int tc0 = tc0_table[index_a][bS[bS_index] - 1]; + const int tc0 = (tc0_table+52)[index_a][bS[bS_index] - 1]; const int p0 = pix[-1]; const int p1 = pix[-2]; const int p2 = pix[-3]; @@ -6803,17 +6846,17 @@ static void filter_mb_mbaff_edgev( H264Context *h, uint8_t *pix, int stride, int const int q1 = pix[1]; const int q2 = pix[2]; - if( ABS( p0 - q0 ) < alpha && - ABS( p1 - p0 ) < beta && - ABS( q1 - q0 ) < beta ) { + if( FFABS( p0 - q0 ) < alpha && + FFABS( p1 - p0 ) < beta && + FFABS( q1 - q0 ) < beta ) { int tc = tc0; int i_delta; - if( ABS( p2 - p0 ) < beta ) { + if( FFABS( p2 - p0 ) < beta ) { pix[-2] = p1 + clip( ( p2 + ( ( p0 + q0 + 1 ) >> 1 ) - ( p1 << 1 ) ) >> 1, -tc0, tc0 ); tc++; } - if( ABS( q2 - q0 ) < beta ) { + if( FFABS( q2 - q0 ) < beta ) { pix[1] = q1 + clip( ( q2 + ( ( p0 + q0 + 1 ) >> 1 ) - ( q1 << 1 ) ) >> 1, -tc0, tc0 ); tc++; } @@ -6832,12 +6875,12 @@ static void filter_mb_mbaff_edgev( H264Context *h, uint8_t *pix, int stride, int const int q1 = pix[1]; const int q2 = pix[2]; - if( ABS( p0 - q0 ) < alpha && - ABS( p1 - p0 ) < beta && - ABS( q1 - q0 ) < beta ) { + if( FFABS( p0 - q0 ) < alpha && + FFABS( p1 - p0 ) < beta && + FFABS( q1 - q0 ) < beta ) { - if(ABS( p0 - q0 ) < (( alpha >> 2 ) + 2 )){ - if( ABS( p2 - p0 ) < beta) + if(FFABS( p0 - q0 ) < (( alpha >> 2 ) + 2 )){ + if( FFABS( p2 - p0 ) < beta) { const int p3 = pix[-4]; /* p0', p1', p2' */ @@ -6848,7 +6891,7 @@ static void filter_mb_mbaff_edgev( H264Context *h, uint8_t *pix, int stride, int /* p0' */ pix[-1] = ( 2*p1 + p0 + q1 + 2 ) >> 2; } - if( ABS( q2 - q0 ) < beta) + if( FFABS( q2 - q0 ) < beta) { const int q3 = pix[3]; /* q0', q1', q2' */ @@ -6869,7 +6912,7 @@ static void filter_mb_mbaff_edgev( H264Context *h, uint8_t *pix, int stride, int } } } -static void filter_mb_mbaff_edgecv( H264Context *h, uint8_t *pix, int stride, int bS[8], int qp[2] ) { +static void filter_mb_mbaff_edgecv( H264Context *h, uint8_t *pix, int stride, int16_t bS[8], int qp[2] ) { int i; for( i = 0; i < 8; i++, pix += stride) { int index_a; @@ -6884,20 +6927,20 @@ static void filter_mb_mbaff_edgecv( H264Context *h, uint8_t *pix, int stride, in } qp_index = MB_FIELD ? (i >> 2) : (i & 1); - index_a = clip( qp[qp_index] + h->slice_alpha_c0_offset, 0, 51 ); - alpha = alpha_table[index_a]; - beta = beta_table[clip( qp[qp_index] + h->slice_beta_offset, 0, 51 )]; + index_a = qp[qp_index] + h->slice_alpha_c0_offset; + alpha = (alpha_table+52)[index_a]; + beta = (beta_table+52)[qp[qp_index] + h->slice_beta_offset]; if( bS[bS_index] < 4 ) { - const int tc = tc0_table[index_a][bS[bS_index] - 1] + 1; + const int tc = (tc0_table+52)[index_a][bS[bS_index] - 1] + 1; const int p0 = pix[-1]; const int p1 = pix[-2]; const int q0 = pix[0]; const int q1 = pix[1]; - if( ABS( p0 - q0 ) < alpha && - ABS( p1 - p0 ) < beta && - ABS( q1 - q0 ) < beta ) { + if( FFABS( p0 - q0 ) < alpha && + FFABS( p1 - p0 ) < beta && + FFABS( q1 - q0 ) < beta ) { const int i_delta = clip( (((q0 - p0 ) << 2) + (p1 - q1) + 4) >> 3, -tc, tc ); pix[-1] = clip_uint8( p0 + i_delta ); /* p0' */ @@ -6910,9 +6953,9 @@ static void filter_mb_mbaff_edgecv( H264Context *h, uint8_t *pix, int stride, in const int q0 = pix[0]; const int q1 = pix[1]; - if( ABS( p0 - q0 ) < alpha && - ABS( p1 - p0 ) < beta && - ABS( q1 - q0 ) < beta ) { + if( FFABS( p0 - q0 ) < alpha && + FFABS( p1 - p0 ) < beta && + FFABS( q1 - q0 ) < beta ) { pix[-1] = ( 2*p1 + p0 + q1 + 2 ) >> 2; /* p0' */ pix[0] = ( 2*q1 + q0 + p1 + 2 ) >> 2; /* q0' */ @@ -6922,17 +6965,17 @@ static void filter_mb_mbaff_edgecv( H264Context *h, uint8_t *pix, int stride, in } } -static void filter_mb_edgeh( H264Context *h, uint8_t *pix, int stride, int bS[4], int qp ) { +static void filter_mb_edgeh( H264Context *h, uint8_t *pix, int stride, int16_t bS[4], int qp ) { int i, d; - const int index_a = clip( qp + h->slice_alpha_c0_offset, 0, 51 ); - const int alpha = alpha_table[index_a]; - const int beta = beta_table[clip( qp + h->slice_beta_offset, 0, 51 )]; + const int index_a = qp + h->slice_alpha_c0_offset; + const int alpha = (alpha_table+52)[index_a]; + const int beta = (beta_table+52)[qp + h->slice_beta_offset]; const int pix_next = stride; if( bS[0] < 4 ) { int8_t tc[4]; for(i=0; i<4; i++) - tc[i] = bS[i] ? tc0_table[index_a][bS[i] - 1] : -1; + tc[i] = bS[i] ? (tc0_table+52)[index_a][bS[i] - 1] : -1; h->s.dsp.h264_v_loop_filter_luma(pix, stride, alpha, beta, tc); } else { /* 16px edge length, see filter_mb_edgev */ @@ -6944,15 +6987,15 @@ static void filter_mb_edgeh( H264Context *h, uint8_t *pix, int stride, int bS[4] const int q1 = pix[1*pix_next]; const int q2 = pix[2*pix_next]; - if( ABS( p0 - q0 ) < alpha && - ABS( p1 - p0 ) < beta && - ABS( q1 - q0 ) < beta ) { + if( FFABS( p0 - q0 ) < alpha && + FFABS( p1 - p0 ) < beta && + FFABS( q1 - q0 ) < beta ) { const int p3 = pix[-4*pix_next]; const int q3 = pix[ 3*pix_next]; - if(ABS( p0 - q0 ) < (( alpha >> 2 ) + 2 )){ - if( ABS( p2 - p0 ) < beta) { + if(FFABS( p0 - q0 ) < (( alpha >> 2 ) + 2 )){ + if( FFABS( p2 - p0 ) < beta) { /* p0', p1', p2' */ pix[-1*pix_next] = ( p2 + 2*p1 + 2*p0 + 2*q0 + q1 + 4 ) >> 3; pix[-2*pix_next] = ( p2 + p1 + p0 + q0 + 2 ) >> 2; @@ -6961,7 +7004,7 @@ static void filter_mb_edgeh( H264Context *h, uint8_t *pix, int stride, int bS[4] /* p0' */ pix[-1*pix_next] = ( 2*p1 + p0 + q1 + 2 ) >> 2; } - if( ABS( q2 - q0 ) < beta) { + if( FFABS( q2 - q0 ) < beta) { /* q0', q1', q2' */ pix[0*pix_next] = ( p1 + 2*p0 + 2*q0 + 2*q1 + q2 + 4 ) >> 3; pix[1*pix_next] = ( p0 + q0 + q1 + q2 + 2 ) >> 2; @@ -6982,22 +7025,130 @@ static void filter_mb_edgeh( H264Context *h, uint8_t *pix, int stride, int bS[4] } } -static void filter_mb_edgech( H264Context *h, uint8_t *pix, int stride, int bS[4], int qp ) { +static void filter_mb_edgech( H264Context *h, uint8_t *pix, int stride, int16_t bS[4], int qp ) { int i; - const int index_a = clip( qp + h->slice_alpha_c0_offset, 0, 51 ); - const int alpha = alpha_table[index_a]; - const int beta = beta_table[clip( qp + h->slice_beta_offset, 0, 51 )]; + const int index_a = qp + h->slice_alpha_c0_offset; + const int alpha = (alpha_table+52)[index_a]; + const int beta = (beta_table+52)[qp + h->slice_beta_offset]; if( bS[0] < 4 ) { int8_t tc[4]; for(i=0; i<4; i++) - tc[i] = bS[i] ? tc0_table[index_a][bS[i] - 1] + 1 : 0; + tc[i] = bS[i] ? (tc0_table+52)[index_a][bS[i] - 1] + 1 : 0; h->s.dsp.h264_v_loop_filter_chroma(pix, stride, alpha, beta, tc); } else { h->s.dsp.h264_v_loop_filter_chroma_intra(pix, stride, alpha, beta); } } +static void filter_mb_fast( H264Context *h, int mb_x, int mb_y, uint8_t *img_y, uint8_t *img_cb, uint8_t *img_cr, unsigned int linesize, unsigned int uvlinesize) { + MpegEncContext * const s = &h->s; + int mb_xy, mb_type; + int qp, qp0, qp1, qpc, qpc0, qpc1, qp_thresh; + + if(mb_x==0 || mb_y==0 || !s->dsp.h264_loop_filter_strength) { + filter_mb(h, mb_x, mb_y, img_y, img_cb, img_cr, linesize, uvlinesize); + return; + } + assert(!FRAME_MBAFF); + + mb_xy = mb_x + mb_y*s->mb_stride; + mb_type = s->current_picture.mb_type[mb_xy]; + qp = s->current_picture.qscale_table[mb_xy]; + qp0 = s->current_picture.qscale_table[mb_xy-1]; + qp1 = s->current_picture.qscale_table[h->top_mb_xy]; + qpc = get_chroma_qp( h->pps.chroma_qp_index_offset, qp ); + qpc0 = get_chroma_qp( h->pps.chroma_qp_index_offset, qp0 ); + qpc1 = get_chroma_qp( h->pps.chroma_qp_index_offset, qp1 ); + qp0 = (qp + qp0 + 1) >> 1; + qp1 = (qp + qp1 + 1) >> 1; + qpc0 = (qpc + qpc0 + 1) >> 1; + qpc1 = (qpc + qpc1 + 1) >> 1; + qp_thresh = 15 - h->slice_alpha_c0_offset; + if(qp <= qp_thresh && qp0 <= qp_thresh && qp1 <= qp_thresh && + qpc <= qp_thresh && qpc0 <= qp_thresh && qpc1 <= qp_thresh) + return; + + if( IS_INTRA(mb_type) ) { + int16_t bS4[4] = {4,4,4,4}; + int16_t bS3[4] = {3,3,3,3}; + if( IS_8x8DCT(mb_type) ) { + filter_mb_edgev( h, &img_y[4*0], linesize, bS4, qp0 ); + filter_mb_edgev( h, &img_y[4*2], linesize, bS3, qp ); + filter_mb_edgeh( h, &img_y[4*0*linesize], linesize, bS4, qp1 ); + filter_mb_edgeh( h, &img_y[4*2*linesize], linesize, bS3, qp ); + } else { + filter_mb_edgev( h, &img_y[4*0], linesize, bS4, qp0 ); + filter_mb_edgev( h, &img_y[4*1], linesize, bS3, qp ); + filter_mb_edgev( h, &img_y[4*2], linesize, bS3, qp ); + filter_mb_edgev( h, &img_y[4*3], linesize, bS3, qp ); + filter_mb_edgeh( h, &img_y[4*0*linesize], linesize, bS4, qp1 ); + filter_mb_edgeh( h, &img_y[4*1*linesize], linesize, bS3, qp ); + filter_mb_edgeh( h, &img_y[4*2*linesize], linesize, bS3, qp ); + filter_mb_edgeh( h, &img_y[4*3*linesize], linesize, bS3, qp ); + } + filter_mb_edgecv( h, &img_cb[2*0], uvlinesize, bS4, qpc0 ); + filter_mb_edgecv( h, &img_cb[2*2], uvlinesize, bS3, qpc ); + filter_mb_edgecv( h, &img_cr[2*0], uvlinesize, bS4, qpc0 ); + filter_mb_edgecv( h, &img_cr[2*2], uvlinesize, bS3, qpc ); + filter_mb_edgech( h, &img_cb[2*0*uvlinesize], uvlinesize, bS4, qpc1 ); + filter_mb_edgech( h, &img_cb[2*2*uvlinesize], uvlinesize, bS3, qpc ); + filter_mb_edgech( h, &img_cr[2*0*uvlinesize], uvlinesize, bS4, qpc1 ); + filter_mb_edgech( h, &img_cr[2*2*uvlinesize], uvlinesize, bS3, qpc ); + return; + } else { + DECLARE_ALIGNED_8(int16_t, bS[2][4][4]); + uint64_t (*bSv)[4] = (uint64_t(*)[4])bS; + int edges; + if( IS_8x8DCT(mb_type) && (h->cbp&7) == 7 ) { + edges = 4; + bSv[0][0] = bSv[0][2] = bSv[1][0] = bSv[1][2] = 0x0002000200020002ULL; + } else { + int mask_edge1 = (mb_type & (MB_TYPE_16x16 | MB_TYPE_8x16)) ? 3 : + (mb_type & MB_TYPE_16x8) ? 1 : 0; + int mask_edge0 = (mb_type & (MB_TYPE_16x16 | MB_TYPE_8x16)) + && (s->current_picture.mb_type[mb_xy-1] & (MB_TYPE_16x16 | MB_TYPE_8x16)) + ? 3 : 0; + int step = IS_8x8DCT(mb_type) ? 2 : 1; + edges = (mb_type & MB_TYPE_16x16) && !(h->cbp & 15) ? 1 : 4; + s->dsp.h264_loop_filter_strength( bS, h->non_zero_count_cache, h->ref_cache, h->mv_cache, + (h->slice_type == B_TYPE), edges, step, mask_edge0, mask_edge1 ); + } + if( IS_INTRA(s->current_picture.mb_type[mb_xy-1]) ) + bSv[0][0] = 0x0004000400040004ULL; + if( IS_INTRA(s->current_picture.mb_type[h->top_mb_xy]) ) + bSv[1][0] = 0x0004000400040004ULL; + +#define FILTER(hv,dir,edge)\ + if(bSv[dir][edge]) {\ + filter_mb_edge##hv( h, &img_y[4*edge*(dir?linesize:1)], linesize, bS[dir][edge], edge ? qp : qp##dir );\ + if(!(edge&1)) {\ + filter_mb_edgec##hv( h, &img_cb[2*edge*(dir?uvlinesize:1)], uvlinesize, bS[dir][edge], edge ? qpc : qpc##dir );\ + filter_mb_edgec##hv( h, &img_cr[2*edge*(dir?uvlinesize:1)], uvlinesize, bS[dir][edge], edge ? qpc : qpc##dir );\ + }\ + } + if( edges == 1 ) { + FILTER(v,0,0); + FILTER(h,1,0); + } else if( IS_8x8DCT(mb_type) ) { + FILTER(v,0,0); + FILTER(v,0,2); + FILTER(h,1,0); + FILTER(h,1,2); + } else { + FILTER(v,0,0); + FILTER(v,0,1); + FILTER(v,0,2); + FILTER(v,0,3); + FILTER(h,1,0); + FILTER(h,1,1); + FILTER(h,1,2); + FILTER(h,1,3); + } +#undef FILTER + } +} + static void filter_mb( H264Context *h, int mb_x, int mb_y, uint8_t *img_y, uint8_t *img_cb, uint8_t *img_cr, unsigned int linesize, unsigned int uvlinesize) { MpegEncContext * const s = &h->s; const int mb_xy= mb_x + mb_y*s->mb_stride; @@ -7035,7 +7186,7 @@ static void filter_mb( H264Context *h, int mb_x, int mb_y, uint8_t *img_y, uint8 */ const int pair_xy = mb_x + (mb_y&~1)*s->mb_stride; const int left_mb_xy[2] = { pair_xy-1, pair_xy-1+s->mb_stride }; - int bS[8]; + int16_t bS[8]; int qp[2]; int chroma_qp[2]; int mb_qp, mbn0_qp, mbn1_qp; @@ -7114,7 +7265,7 @@ static void filter_mb( H264Context *h, int mb_x, int mb_y, uint8_t *img_y, uint8 int mbn_xy = mb_xy - 2 * s->mb_stride; int qp, chroma_qp; int i, j; - int bS[4]; + int16_t bS[4]; for(j=0; j<2; j++, mbn_xy += s->mb_stride){ if( IS_INTRA(mb_type) || @@ -7150,7 +7301,7 @@ static void filter_mb( H264Context *h, int mb_x, int mb_y, uint8_t *img_y, uint8 /* mbn_xy: neighbor macroblock */ const int mbn_xy = edge > 0 ? mb_xy : mbm_xy; const int mbn_type = s->current_picture.mb_type[mbn_xy]; - int bS[4]; + int16_t bS[4]; int qp; if( (edge&1) && IS_8x8DCT(mb_type) ) @@ -7189,8 +7340,8 @@ static void filter_mb( H264Context *h, int mb_x, int mb_y, uint8_t *img_y, uint8 int v = 0; for( l = 0; !v && l < 1 + (h->slice_type == B_TYPE); l++ ) { v |= ref2frm[h->ref_cache[l][b_idx]+2] != ref2frm[h->ref_cache[l][bn_idx]+2] || - ABS( h->mv_cache[l][b_idx][0] - h->mv_cache[l][bn_idx][0] ) >= 4 || - ABS( h->mv_cache[l][b_idx][1] - h->mv_cache[l][bn_idx][1] ) >= mvy_limit; + FFABS( h->mv_cache[l][b_idx][0] - h->mv_cache[l][bn_idx][0] ) >= 4 || + FFABS( h->mv_cache[l][b_idx][1] - h->mv_cache[l][bn_idx][1] ) >= mvy_limit; } bS[0] = bS[1] = bS[2] = bS[3] = v; mv_done = 1; @@ -7213,8 +7364,8 @@ static void filter_mb( H264Context *h, int mb_x, int mb_y, uint8_t *img_y, uint8 bS[i] = 0; for( l = 0; l < 1 + (h->slice_type == B_TYPE); l++ ) { if( ref2frm[h->ref_cache[l][b_idx]+2] != ref2frm[h->ref_cache[l][bn_idx]+2] || - ABS( h->mv_cache[l][b_idx][0] - h->mv_cache[l][bn_idx][0] ) >= 4 || - ABS( h->mv_cache[l][b_idx][1] - h->mv_cache[l][bn_idx][1] ) >= mvy_limit ) { + FFABS( h->mv_cache[l][b_idx][0] - h->mv_cache[l][bn_idx][0] ) >= 4 || + FFABS( h->mv_cache[l][b_idx][1] - h->mv_cache[l][bn_idx][1] ) >= mvy_limit ) { bS[i] = 1; break; } @@ -7267,7 +7418,7 @@ static int decode_slice(H264Context *h){ align_get_bits( &s->gb ); /* init cabac */ - ff_init_cabac_states( &h->cabac, ff_h264_lps_range, ff_h264_mps_state, ff_h264_lps_state, 64 ); + ff_init_cabac_states( &h->cabac); ff_init_cabac_decoder( &h->cabac, s->gb.buffer + get_bits_count(&s->gb)/8, ( s->gb.size_in_bits - get_bits_count(&s->gb) + 7)/8); @@ -7286,8 +7437,10 @@ static int decode_slice(H264Context *h){ } for(;;){ +//START_TIMER int ret = decode_mb_cabac(h); int eos; +//STOP_TIMER("decode_mb_cabac") if(ret>=0) hl_decode_mb(h); @@ -7301,7 +7454,7 @@ static int decode_slice(H264Context *h){ } eos = get_cabac_terminate( &h->cabac ); - if( ret < 0 || h->cabac.bytestream > h->cabac.bytestream_end + 1) { + if( ret < 0 || h->cabac.bytestream > h->cabac.bytestream_end + 2) { av_log(h->s.avctx, AV_LOG_ERROR, "error while decoding MB %d %d, bytestream (%d)\n", s->mb_x, s->mb_y, h->cabac.bytestream_end - h->cabac.bytestream); ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x, s->mb_y, (AC_ERROR|DC_ERROR|MV_ERROR)&part_mask); return -1; @@ -7694,7 +7847,7 @@ static inline int decode_seq_parameter_set(H264Context *h){ #ifndef ALLOW_INTERLACE if(sps->mb_aff) - av_log(h->s.avctx, AV_LOG_ERROR, "MBAFF support not included; enable it compilation time\n"); + av_log(h->s.avctx, AV_LOG_ERROR, "MBAFF support not included; enable it at compile-time.\n"); #endif if(!sps->direct_8x8_inference_flag && sps->mb_aff) av_log(h->s.avctx, AV_LOG_ERROR, "MBAFF + !direct_8x8_inference is not implemented\n"); @@ -8381,7 +8534,7 @@ int main(){ printf("\n");*/ for(j=0; j<16; j++){ - int diff= ABS(src[j] - ref[j]); + int diff= FFABS(src[j] - ref[j]); error+= diff*diff; max_error= FFMAX(max_error, diff); |