summaryrefslogtreecommitdiff
path: root/contrib/ffmpeg/libavcodec/h264.c
diff options
context:
space:
mode:
Diffstat (limited to 'contrib/ffmpeg/libavcodec/h264.c')
-rw-r--r--contrib/ffmpeg/libavcodec/h264.c3174
1 files changed, 1275 insertions, 1899 deletions
diff --git a/contrib/ffmpeg/libavcodec/h264.c b/contrib/ffmpeg/libavcodec/h264.c
index 4d72dc2ff..cd6facb9b 100644
--- a/contrib/ffmpeg/libavcodec/h264.c
+++ b/contrib/ffmpeg/libavcodec/h264.c
@@ -17,7 +17,6 @@
* You should have received a copy of the GNU Lesser General Public
* License along with FFmpeg; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
- *
*/
/**
@@ -26,367 +25,25 @@
* @author Michael Niedermayer <michaelni@gmx.at>
*/
-#include "common.h"
#include "dsputil.h"
#include "avcodec.h"
#include "mpegvideo.h"
+#include "h264.h"
#include "h264data.h"
+#include "h264_parser.h"
#include "golomb.h"
+#include "rectangle.h"
#include "cabac.h"
//#undef NDEBUG
#include <assert.h>
-#define interlaced_dct interlaced_dct_is_a_bad_name
-#define mb_intra mb_intra_isnt_initalized_see_mb_type
-
-#define LUMA_DC_BLOCK_INDEX 25
-#define CHROMA_DC_BLOCK_INDEX 26
-
-#define CHROMA_DC_COEFF_TOKEN_VLC_BITS 8
-#define COEFF_TOKEN_VLC_BITS 8
-#define TOTAL_ZEROS_VLC_BITS 9
-#define CHROMA_DC_TOTAL_ZEROS_VLC_BITS 3
-#define RUN_VLC_BITS 3
-#define RUN7_VLC_BITS 6
-
-#define MAX_SPS_COUNT 32
-#define MAX_PPS_COUNT 256
-
-#define MAX_MMCO_COUNT 66
-
-/* Compiling in interlaced support reduces the speed
- * of progressive decoding by about 2%. */
-#define ALLOW_INTERLACE
-
-#ifdef ALLOW_INTERLACE
-#define MB_MBAFF h->mb_mbaff
-#define MB_FIELD h->mb_field_decoding_flag
-#define FRAME_MBAFF h->mb_aff_frame
-#else
-#define MB_MBAFF 0
-#define MB_FIELD 0
-#define FRAME_MBAFF 0
-#undef IS_INTERLACED
-#define IS_INTERLACED(mb_type) 0
-#endif
-
-/**
- * Sequence parameter set
- */
-typedef struct SPS{
-
- int profile_idc;
- int level_idc;
- int transform_bypass; ///< qpprime_y_zero_transform_bypass_flag
- int log2_max_frame_num; ///< log2_max_frame_num_minus4 + 4
- int poc_type; ///< pic_order_cnt_type
- int log2_max_poc_lsb; ///< log2_max_pic_order_cnt_lsb_minus4
- int delta_pic_order_always_zero_flag;
- int offset_for_non_ref_pic;
- int offset_for_top_to_bottom_field;
- int poc_cycle_length; ///< num_ref_frames_in_pic_order_cnt_cycle
- int ref_frame_count; ///< num_ref_frames
- int gaps_in_frame_num_allowed_flag;
- int mb_width; ///< frame_width_in_mbs_minus1 + 1
- int mb_height; ///< frame_height_in_mbs_minus1 + 1
- int frame_mbs_only_flag;
- int mb_aff; ///<mb_adaptive_frame_field_flag
- int direct_8x8_inference_flag;
- int crop; ///< frame_cropping_flag
- int crop_left; ///< frame_cropping_rect_left_offset
- int crop_right; ///< frame_cropping_rect_right_offset
- int crop_top; ///< frame_cropping_rect_top_offset
- int crop_bottom; ///< frame_cropping_rect_bottom_offset
- int vui_parameters_present_flag;
- AVRational sar;
- int timing_info_present_flag;
- uint32_t num_units_in_tick;
- uint32_t time_scale;
- int fixed_frame_rate_flag;
- short offset_for_ref_frame[256]; //FIXME dyn aloc?
- int bitstream_restriction_flag;
- int num_reorder_frames;
- int scaling_matrix_present;
- uint8_t scaling_matrix4[6][16];
- uint8_t scaling_matrix8[2][64];
-}SPS;
-
-/**
- * Picture parameter set
- */
-typedef struct PPS{
- unsigned int sps_id;
- int cabac; ///< entropy_coding_mode_flag
- int pic_order_present; ///< pic_order_present_flag
- int slice_group_count; ///< num_slice_groups_minus1 + 1
- int mb_slice_group_map_type;
- unsigned int ref_count[2]; ///< num_ref_idx_l0/1_active_minus1 + 1
- int weighted_pred; ///< weighted_pred_flag
- int weighted_bipred_idc;
- int init_qp; ///< pic_init_qp_minus26 + 26
- int init_qs; ///< pic_init_qs_minus26 + 26
- int chroma_qp_index_offset;
- int deblocking_filter_parameters_present; ///< deblocking_filter_parameters_present_flag
- int constrained_intra_pred; ///< constrained_intra_pred_flag
- int redundant_pic_cnt_present; ///< redundant_pic_cnt_present_flag
- int transform_8x8_mode; ///< transform_8x8_mode_flag
- uint8_t scaling_matrix4[6][16];
- uint8_t scaling_matrix8[2][64];
-}PPS;
-
-/**
- * Memory management control operation opcode.
- */
-typedef enum MMCOOpcode{
- MMCO_END=0,
- MMCO_SHORT2UNUSED,
- MMCO_LONG2UNUSED,
- MMCO_SHORT2LONG,
- MMCO_SET_MAX_LONG,
- MMCO_RESET,
- MMCO_LONG,
-} MMCOOpcode;
-
-/**
- * Memory management control operation.
- */
-typedef struct MMCO{
- MMCOOpcode opcode;
- int short_frame_num;
- int long_index;
-} MMCO;
-
/**
- * H264Context
+ * Value of Picture.reference when Picture is not a reference picture, but
+ * is held for delayed output.
*/
-typedef struct H264Context{
- MpegEncContext s;
- int nal_ref_idc;
- int nal_unit_type;
- uint8_t *rbsp_buffer;
- unsigned int rbsp_buffer_size;
-
- /**
- * Used to parse AVC variant of h264
- */
- int is_avc; ///< this flag is != 0 if codec is avc1
- int got_avcC; ///< flag used to parse avcC data only once
- int nal_length_size; ///< Number of bytes used for nal length (1, 2 or 4)
-
- int chroma_qp; //QPc
-
- int prev_mb_skipped;
- int next_mb_skipped;
-
- //prediction stuff
- int chroma_pred_mode;
- int intra16x16_pred_mode;
-
- int top_mb_xy;
- int left_mb_xy[2];
-
- int8_t intra4x4_pred_mode_cache[5*8];
- int8_t (*intra4x4_pred_mode)[8];
- void (*pred4x4 [9+3])(uint8_t *src, uint8_t *topright, int stride);//FIXME move to dsp?
- void (*pred8x8l [9+3])(uint8_t *src, int topleft, int topright, int stride);
- void (*pred8x8 [4+3])(uint8_t *src, int stride);
- void (*pred16x16[4+3])(uint8_t *src, int stride);
- unsigned int topleft_samples_available;
- unsigned int top_samples_available;
- unsigned int topright_samples_available;
- unsigned int left_samples_available;
- uint8_t (*top_borders[2])[16+2*8];
- uint8_t left_border[2*(17+2*9)];
-
- /**
- * non zero coeff count cache.
- * is 64 if not available.
- */
- DECLARE_ALIGNED_8(uint8_t, non_zero_count_cache[6*8]);
- uint8_t (*non_zero_count)[16];
-
- /**
- * Motion vector cache.
- */
- DECLARE_ALIGNED_8(int16_t, mv_cache[2][5*8][2]);
- DECLARE_ALIGNED_8(int8_t, ref_cache[2][5*8]);
-#define LIST_NOT_USED -1 //FIXME rename?
-#define PART_NOT_AVAILABLE -2
-
- /**
- * is 1 if the specific list MV&references are set to 0,0,-2.
- */
- int mv_cache_clean[2];
-
- /**
- * number of neighbors (top and/or left) that used 8x8 dct
- */
- int neighbor_transform_size;
-
- /**
- * block_offset[ 0..23] for frame macroblocks
- * block_offset[24..47] for field macroblocks
- */
- int block_offset[2*(16+8)];
-
- uint32_t *mb2b_xy; //FIXME are these 4 a good idea?
- uint32_t *mb2b8_xy;
- int b_stride; //FIXME use s->b4_stride
- int b8_stride;
-
- int mb_linesize; ///< may be equal to s->linesize or s->linesize*2, for mbaff
- int mb_uvlinesize;
-
- int emu_edge_width;
- int emu_edge_height;
-
- int halfpel_flag;
- int thirdpel_flag;
-
- int unknown_svq3_flag;
- int next_slice_index;
-
- SPS sps_buffer[MAX_SPS_COUNT];
- SPS sps; ///< current sps
-
- PPS pps_buffer[MAX_PPS_COUNT];
- /**
- * current pps
- */
- PPS pps; //FIXME move to Picture perhaps? (->no) do we need that?
-
- uint32_t dequant4_buffer[6][52][16];
- uint32_t dequant8_buffer[2][52][64];
- uint32_t (*dequant4_coeff[6])[16];
- uint32_t (*dequant8_coeff[2])[64];
- int dequant_coeff_pps; ///< reinit tables when pps changes
-
- int slice_num;
- uint8_t *slice_table_base;
- uint8_t *slice_table; ///< slice_table_base + 2*mb_stride + 1
- int slice_type;
- int slice_type_fixed;
-
- //interlacing specific flags
- int mb_aff_frame;
- int mb_field_decoding_flag;
- int mb_mbaff; ///< mb_aff_frame && mb_field_decoding_flag
-
- unsigned int sub_mb_type[4];
-
- //POC stuff
- int poc_lsb;
- int poc_msb;
- int delta_poc_bottom;
- int delta_poc[2];
- int frame_num;
- int prev_poc_msb; ///< poc_msb of the last reference pic for POC type 0
- int prev_poc_lsb; ///< poc_lsb of the last reference pic for POC type 0
- int frame_num_offset; ///< for POC type 2
- int prev_frame_num_offset; ///< for POC type 2
- int prev_frame_num; ///< frame_num of the last pic for POC type 1/2
-
- /**
- * frame_num for frames or 2*frame_num for field pics.
- */
- int curr_pic_num;
-
- /**
- * max_frame_num or 2*max_frame_num for field pics.
- */
- int max_pic_num;
-
- //Weighted pred stuff
- int use_weight;
- int use_weight_chroma;
- int luma_log2_weight_denom;
- int chroma_log2_weight_denom;
- int luma_weight[2][48];
- int luma_offset[2][48];
- int chroma_weight[2][48][2];
- int chroma_offset[2][48][2];
- int implicit_weight[48][48];
-
- //deblock
- int deblocking_filter; ///< disable_deblocking_filter_idc with 1<->0
- int slice_alpha_c0_offset;
- int slice_beta_offset;
-
- int redundant_pic_count;
-
- int direct_spatial_mv_pred;
- int dist_scale_factor[16];
- int dist_scale_factor_field[32];
- int map_col_to_list0[2][16];
- int map_col_to_list0_field[2][32];
-
- /**
- * num_ref_idx_l0/1_active_minus1 + 1
- */
- unsigned int ref_count[2]; ///< counts frames or fields, depending on current mb mode
- unsigned int list_count;
- Picture *short_ref[32];
- Picture *long_ref[32];
- Picture default_ref_list[2][32];
- Picture ref_list[2][48]; ///< 0..15: frame refs, 16..47: mbaff field refs
- Picture *delayed_pic[18]; //FIXME size?
- Picture *delayed_output_pic;
-
- /**
- * memory management control operations buffer.
- */
- MMCO mmco[MAX_MMCO_COUNT];
- int mmco_index;
-
- int long_ref_count; ///< number of actual long term references
- int short_ref_count; ///< number of actual short term references
-
- //data partitioning
- GetBitContext intra_gb;
- GetBitContext inter_gb;
- GetBitContext *intra_gb_ptr;
- GetBitContext *inter_gb_ptr;
-
- DECLARE_ALIGNED_8(DCTELEM, mb[16*24]);
- DCTELEM mb_padding[256]; ///< as mb is addressed by scantable[i] and scantable is uint8_t we can either check that i is not to large or ensure that there is some unused stuff after mb
-
- /**
- * Cabac
- */
- CABACContext cabac;
- uint8_t cabac_state[460];
- int cabac_init_idc;
-
- /* 0x100 -> non null luma_dc, 0x80/0x40 -> non null chroma_dc (cb/cr), 0x?0 -> chroma_cbp(0,1,2), 0x0? luma_cbp */
- uint16_t *cbp_table;
- int cbp;
- int top_cbp;
- int left_cbp;
- /* chroma_pred_mode for i4x4 or i16x16, else 0 */
- uint8_t *chroma_pred_mode_table;
- int last_qscale_diff;
- int16_t (*mvd_table[2])[2];
- DECLARE_ALIGNED_8(int16_t, mvd_cache[2][5*8][2]);
- uint8_t *direct_table;
- uint8_t direct_cache[5*8];
-
- uint8_t zigzag_scan[16];
- uint8_t zigzag_scan8x8[64];
- uint8_t zigzag_scan8x8_cavlc[64];
- uint8_t field_scan[16];
- uint8_t field_scan8x8[64];
- uint8_t field_scan8x8_cavlc[64];
- const uint8_t *zigzag_scan_q0;
- const uint8_t *zigzag_scan8x8_q0;
- const uint8_t *zigzag_scan8x8_cavlc_q0;
- const uint8_t *field_scan_q0;
- const uint8_t *field_scan8x8_q0;
- const uint8_t *field_scan8x8_cavlc_q0;
-
- int x264_build;
-}H264Context;
+#define DELAYED_PIC_REF 4
static VLC coeff_token_vlc[4];
static VLC chroma_dc_coeff_token_vlc;
@@ -419,109 +76,23 @@ const uint8_t ff_div6[52]={
};
-/**
- * fill a rectangle.
- * @param h height of the rectangle, should be a constant
- * @param w width of the rectangle, should be a constant
- * @param size the size of val (1 or 4), should be a constant
- */
-static av_always_inline void fill_rectangle(void *vp, int w, int h, int stride, uint32_t val, int size){
- uint8_t *p= (uint8_t*)vp;
- assert(size==1 || size==4);
- assert(w<=4);
-
- w *= size;
- stride *= size;
-
- assert((((long)vp)&(FFMIN(w, STRIDE_ALIGN)-1)) == 0);
- assert((stride&(w-1))==0);
- if(w==2){
- const uint16_t v= size==4 ? val : val*0x0101;
- *(uint16_t*)(p + 0*stride)= v;
- if(h==1) return;
- *(uint16_t*)(p + 1*stride)= v;
- if(h==2) return;
- *(uint16_t*)(p + 2*stride)=
- *(uint16_t*)(p + 3*stride)= v;
- }else if(w==4){
- const uint32_t v= size==4 ? val : val*0x01010101;
- *(uint32_t*)(p + 0*stride)= v;
- if(h==1) return;
- *(uint32_t*)(p + 1*stride)= v;
- if(h==2) return;
- *(uint32_t*)(p + 2*stride)=
- *(uint32_t*)(p + 3*stride)= v;
- }else if(w==8){
- //gcc can't optimize 64bit math on x86_32
-#if defined(ARCH_X86_64) || (defined(MP_WORDSIZE) && MP_WORDSIZE >= 64)
- const uint64_t v= val*0x0100000001ULL;
- *(uint64_t*)(p + 0*stride)= v;
- if(h==1) return;
- *(uint64_t*)(p + 1*stride)= v;
- if(h==2) return;
- *(uint64_t*)(p + 2*stride)=
- *(uint64_t*)(p + 3*stride)= v;
- }else if(w==16){
- const uint64_t v= val*0x0100000001ULL;
- *(uint64_t*)(p + 0+0*stride)=
- *(uint64_t*)(p + 8+0*stride)=
- *(uint64_t*)(p + 0+1*stride)=
- *(uint64_t*)(p + 8+1*stride)= v;
- if(h==2) return;
- *(uint64_t*)(p + 0+2*stride)=
- *(uint64_t*)(p + 8+2*stride)=
- *(uint64_t*)(p + 0+3*stride)=
- *(uint64_t*)(p + 8+3*stride)= v;
-#else
- *(uint32_t*)(p + 0+0*stride)=
- *(uint32_t*)(p + 4+0*stride)= val;
- if(h==1) return;
- *(uint32_t*)(p + 0+1*stride)=
- *(uint32_t*)(p + 4+1*stride)= val;
- if(h==2) return;
- *(uint32_t*)(p + 0+2*stride)=
- *(uint32_t*)(p + 4+2*stride)=
- *(uint32_t*)(p + 0+3*stride)=
- *(uint32_t*)(p + 4+3*stride)= val;
- }else if(w==16){
- *(uint32_t*)(p + 0+0*stride)=
- *(uint32_t*)(p + 4+0*stride)=
- *(uint32_t*)(p + 8+0*stride)=
- *(uint32_t*)(p +12+0*stride)=
- *(uint32_t*)(p + 0+1*stride)=
- *(uint32_t*)(p + 4+1*stride)=
- *(uint32_t*)(p + 8+1*stride)=
- *(uint32_t*)(p +12+1*stride)= val;
- if(h==2) return;
- *(uint32_t*)(p + 0+2*stride)=
- *(uint32_t*)(p + 4+2*stride)=
- *(uint32_t*)(p + 8+2*stride)=
- *(uint32_t*)(p +12+2*stride)=
- *(uint32_t*)(p + 0+3*stride)=
- *(uint32_t*)(p + 4+3*stride)=
- *(uint32_t*)(p + 8+3*stride)=
- *(uint32_t*)(p +12+3*stride)= val;
-#endif
- }else
- assert(0);
- assert(h==4);
-}
-
static void fill_caches(H264Context *h, int mb_type, int for_deblock){
MpegEncContext * const s = &h->s;
const int mb_xy= s->mb_x + s->mb_y*s->mb_stride;
int topleft_xy, top_xy, topright_xy, left_xy[2];
int topleft_type, top_type, topright_type, left_type[2];
int left_block[8];
+ int topleft_partition= -1;
int i;
+ top_xy = mb_xy - (s->mb_stride << FIELD_PICTURE);
+
//FIXME deblocking could skip the intra and nnz parts.
- if(for_deblock && (h->slice_num == 1 || h->slice_table[mb_xy] == h->slice_table[mb_xy-s->mb_stride]) && !FRAME_MBAFF)
+ if(for_deblock && (h->slice_num == 1 || h->slice_table[mb_xy] == h->slice_table[top_xy]) && !FRAME_MBAFF)
return;
//wow what a mess, why didn't they simplify the interlacing&intra stuff, i can't imagine that these complex rules are worth it
- top_xy = mb_xy - s->mb_stride;
topleft_xy = top_xy - 1;
topright_xy= top_xy + 1;
left_xy[1] = left_xy[0] = mb_xy-1;
@@ -556,6 +127,10 @@ static void fill_caches(H264Context *h, int mb_type, int for_deblock){
: (!curr_mb_frame_flag && !topleft_mb_frame_flag) // top macroblock
) {
topleft_xy -= s->mb_stride;
+ } else if(bottom && curr_mb_frame_flag && !left_mb_frame_flag) {
+ topleft_xy += s->mb_stride;
+ // take topleft mv from the middle of the mb, as opposed to all other modes which use the bottom-right partition
+ topleft_partition = 0;
}
if (bottom
? !curr_mb_frame_flag // bottom macroblock
@@ -833,8 +408,8 @@ static void fill_caches(H264Context *h, int mb_type, int for_deblock){
continue;
if(USES_LIST(topleft_type, list)){
- const int b_xy = h->mb2b_xy[topleft_xy] + 3 + 3*h->b_stride;
- const int b8_xy= h->mb2b8_xy[topleft_xy] + 1 + h->b8_stride;
+ const int b_xy = h->mb2b_xy[topleft_xy] + 3 + h->b_stride + (topleft_partition & 2*h->b_stride);
+ const int b8_xy= h->mb2b8_xy[topleft_xy] + 1 + (topleft_partition & h->b8_stride);
*(uint32_t*)h->mv_cache[list][scan8[0] - 1 - 1*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy];
h->ref_cache[list][scan8[0] - 1 - 1*8]= s->current_picture.ref_index[list][b8_xy];
}else{
@@ -1131,7 +706,7 @@ static inline int fetch_diagonal_mv(H264Context *h, const int16_t **C, int i, in
#define SET_DIAG_MV(MV_OP, REF_OP, X4, Y4)\
const int x4 = X4, y4 = Y4;\
const int mb_type = mb_types[(x4>>2)+(y4>>2)*s->mb_stride];\
- if(!USES_LIST(mb_type,list) && !IS_8X8(mb_type))\
+ if(!USES_LIST(mb_type,list))\
return LIST_NOT_USED;\
mv = s->current_picture_ptr->motion_val[list][x4 + y4*h->b_stride];\
h->mv_cache[list][scan8[0]-2][0] = mv[0];\
@@ -1152,7 +727,7 @@ static inline int fetch_diagonal_mv(H264Context *h, const int16_t **C, int i, in
&& !IS_INTERLACED(mb_types[h->left_mb_xy[0]])
&& i >= scan8[0]+8){
// leftshift will turn LIST_NOT_USED into PART_NOT_AVAILABLE, but that's ok.
- SET_DIAG_MV(>>1, <<1, s->mb_x*4-1, (s->mb_y&~1)*4 - 1 + ((i-scan8[0])>>3)*2);
+ SET_DIAG_MV(/2, <<1, s->mb_x*4-1, (s->mb_y&~1)*4 - 1 + ((i-scan8[0])>>3)*2);
}
}
#undef SET_DIAG_MV
@@ -1447,14 +1022,76 @@ static inline void pred_direct_motion(H264Context * const h, int *mb_type){
}
if(ref[1] < 0){
- *mb_type &= ~MB_TYPE_P0L1;
- sub_mb_type &= ~MB_TYPE_P0L1;
+ if(!is_b8x8)
+ *mb_type &= ~MB_TYPE_L1;
+ sub_mb_type &= ~MB_TYPE_L1;
}else if(ref[0] < 0){
- *mb_type &= ~MB_TYPE_P0L0;
- sub_mb_type &= ~MB_TYPE_P0L0;
+ if(!is_b8x8)
+ *mb_type &= ~MB_TYPE_L0;
+ sub_mb_type &= ~MB_TYPE_L0;
}
- if(IS_16X16(*mb_type)){
+ if(IS_INTERLACED(*mb_type) != IS_INTERLACED(mb_type_col)){
+ int pair_xy = s->mb_x + (s->mb_y&~1)*s->mb_stride;
+ int mb_types_col[2];
+ int b8_stride = h->b8_stride;
+ int b4_stride = h->b_stride;
+
+ *mb_type = (*mb_type & ~MB_TYPE_16x16) | MB_TYPE_8x8;
+
+ if(IS_INTERLACED(*mb_type)){
+ mb_types_col[0] = h->ref_list[1][0].mb_type[pair_xy];
+ mb_types_col[1] = h->ref_list[1][0].mb_type[pair_xy+s->mb_stride];
+ if(s->mb_y&1){
+ l1ref0 -= 2*b8_stride;
+ l1ref1 -= 2*b8_stride;
+ l1mv0 -= 4*b4_stride;
+ l1mv1 -= 4*b4_stride;
+ }
+ b8_stride *= 3;
+ b4_stride *= 6;
+ }else{
+ int cur_poc = s->current_picture_ptr->poc;
+ int *col_poc = h->ref_list[1]->field_poc;
+ int col_parity = FFABS(col_poc[0] - cur_poc) >= FFABS(col_poc[1] - cur_poc);
+ int dy = 2*col_parity - (s->mb_y&1);
+ mb_types_col[0] =
+ mb_types_col[1] = h->ref_list[1][0].mb_type[pair_xy + col_parity*s->mb_stride];
+ l1ref0 += dy*b8_stride;
+ l1ref1 += dy*b8_stride;
+ l1mv0 += 2*dy*b4_stride;
+ l1mv1 += 2*dy*b4_stride;
+ b8_stride = 0;
+ }
+
+ for(i8=0; i8<4; i8++){
+ int x8 = i8&1;
+ int y8 = i8>>1;
+ int xy8 = x8+y8*b8_stride;
+ int xy4 = 3*x8+y8*b4_stride;
+ int a=0, b=0;
+
+ if(is_b8x8 && !IS_DIRECT(h->sub_mb_type[i8]))
+ continue;
+ h->sub_mb_type[i8] = sub_mb_type;
+
+ fill_rectangle(&h->ref_cache[0][scan8[i8*4]], 2, 2, 8, (uint8_t)ref[0], 1);
+ fill_rectangle(&h->ref_cache[1][scan8[i8*4]], 2, 2, 8, (uint8_t)ref[1], 1);
+ if(!IS_INTRA(mb_types_col[y8])
+ && ( (l1ref0[xy8] == 0 && FFABS(l1mv0[xy4][0]) <= 1 && FFABS(l1mv0[xy4][1]) <= 1)
+ || (l1ref0[xy8] < 0 && l1ref1[xy8] == 0 && FFABS(l1mv1[xy4][0]) <= 1 && FFABS(l1mv1[xy4][1]) <= 1))){
+ if(ref[0] > 0)
+ a= pack16to32(mv[0][0],mv[0][1]);
+ if(ref[1] > 0)
+ b= pack16to32(mv[1][0],mv[1][1]);
+ }else{
+ a= pack16to32(mv[0][0],mv[0][1]);
+ b= pack16to32(mv[1][0],mv[1][1]);
+ }
+ fill_rectangle(&h->mv_cache[0][scan8[i8*4]], 2, 2, 8, a, 4);
+ fill_rectangle(&h->mv_cache[1][scan8[i8*4]], 2, 2, 8, b, 4);
+ }
+ }else if(IS_16X16(*mb_type)){
int a=0, b=0;
fill_rectangle(&h->ref_cache[0][scan8[0]], 4, 4, 8, (uint8_t)ref[0], 1);
@@ -1738,9 +1375,10 @@ static inline void write_back_motion(H264Context *h, int mb_type){
* @param dst_length is the number of decoded bytes FIXME here or a decode rbsp tailing?
* @returns decoded bytes, might be src+1 if no escapes
*/
-static uint8_t *decode_nal(H264Context *h, uint8_t *src, int *dst_length, int *consumed, int length){
+static const uint8_t *decode_nal(H264Context *h, const uint8_t *src, int *dst_length, int *consumed, int length){
int i, si, di;
uint8_t *dst;
+ int bufidx;
// src[0]&0x80; //forbidden bit
h->nal_ref_idc= src[0]>>5;
@@ -1769,8 +1407,9 @@ static uint8_t *decode_nal(H264Context *h, uint8_t *src, int *dst_length, int *c
return src;
}
- h->rbsp_buffer= av_fast_realloc(h->rbsp_buffer, &h->rbsp_buffer_size, length);
- dst= h->rbsp_buffer;
+ bufidx = h->nal_unit_type == NAL_DPC ? 1 : 0; // use second escape buffer for inter data
+ h->rbsp_buffer[bufidx]= av_fast_realloc(h->rbsp_buffer[bufidx], &h->rbsp_buffer_size[bufidx], length);
+ dst= h->rbsp_buffer[bufidx];
if (dst == NULL){
return NULL;
@@ -1795,7 +1434,7 @@ static uint8_t *decode_nal(H264Context *h, uint8_t *src, int *dst_length, int *c
*dst_length= di;
*consumed= si + 1;//+1 for the header
-//FIXME store exact number of bits in the getbitcontext (its needed for decoding)
+//FIXME store exact number of bits in the getbitcontext (it is needed for decoding)
return dst;
}
@@ -1803,7 +1442,7 @@ static uint8_t *decode_nal(H264Context *h, uint8_t *src, int *dst_length, int *c
* identifies the exact end of the bitstream
* @return the length of the trailing, or 0 if damaged
*/
-static int decode_rbsp_trailing(H264Context *h, uint8_t *src){
+static int decode_rbsp_trailing(H264Context *h, const uint8_t *src){
int v= *src;
int r;
@@ -1946,12 +1585,11 @@ static void chroma_dc_dct_c(DCTELEM *block){
/**
* gets the chroma qp.
*/
-static inline int get_chroma_qp(int chroma_qp_index_offset, int qscale){
-
- return chroma_qp[av_clip(qscale + chroma_qp_index_offset, 0, 51)];
+static inline int get_chroma_qp(H264Context *h, int t, int qscale){
+ return h->pps.chroma_qp_table[t][qscale & 0xff];
}
-//FIXME need to check that this doesnt overflow signed 32 bit for low qp, i am not sure, it's very close
+//FIXME need to check that this does not overflow signed 32 bit for low qp, i am not sure, it's very close
//FIXME check that gcc inlines this (and optimizes intra & separate_dc stuff away)
static inline int quantize_c(DCTELEM *block, uint8_t *scantable, int qscale, int intra, int separate_dc){
int i;
@@ -2030,722 +1668,6 @@ static inline int quantize_c(DCTELEM *block, uint8_t *scantable, int qscale, int
return last_non_zero;
}
-static void pred4x4_vertical_c(uint8_t *src, uint8_t *topright, int stride){
- const uint32_t a= ((uint32_t*)(src-stride))[0];
- ((uint32_t*)(src+0*stride))[0]= a;
- ((uint32_t*)(src+1*stride))[0]= a;
- ((uint32_t*)(src+2*stride))[0]= a;
- ((uint32_t*)(src+3*stride))[0]= a;
-}
-
-static void pred4x4_horizontal_c(uint8_t *src, uint8_t *topright, int stride){
- ((uint32_t*)(src+0*stride))[0]= src[-1+0*stride]*0x01010101;
- ((uint32_t*)(src+1*stride))[0]= src[-1+1*stride]*0x01010101;
- ((uint32_t*)(src+2*stride))[0]= src[-1+2*stride]*0x01010101;
- ((uint32_t*)(src+3*stride))[0]= src[-1+3*stride]*0x01010101;
-}
-
-static void pred4x4_dc_c(uint8_t *src, uint8_t *topright, int stride){
- const int dc= ( src[-stride] + src[1-stride] + src[2-stride] + src[3-stride]
- + src[-1+0*stride] + src[-1+1*stride] + src[-1+2*stride] + src[-1+3*stride] + 4) >>3;
-
- ((uint32_t*)(src+0*stride))[0]=
- ((uint32_t*)(src+1*stride))[0]=
- ((uint32_t*)(src+2*stride))[0]=
- ((uint32_t*)(src+3*stride))[0]= dc* 0x01010101;
-}
-
-static void pred4x4_left_dc_c(uint8_t *src, uint8_t *topright, int stride){
- const int dc= ( src[-1+0*stride] + src[-1+1*stride] + src[-1+2*stride] + src[-1+3*stride] + 2) >>2;
-
- ((uint32_t*)(src+0*stride))[0]=
- ((uint32_t*)(src+1*stride))[0]=
- ((uint32_t*)(src+2*stride))[0]=
- ((uint32_t*)(src+3*stride))[0]= dc* 0x01010101;
-}
-
-static void pred4x4_top_dc_c(uint8_t *src, uint8_t *topright, int stride){
- const int dc= ( src[-stride] + src[1-stride] + src[2-stride] + src[3-stride] + 2) >>2;
-
- ((uint32_t*)(src+0*stride))[0]=
- ((uint32_t*)(src+1*stride))[0]=
- ((uint32_t*)(src+2*stride))[0]=
- ((uint32_t*)(src+3*stride))[0]= dc* 0x01010101;
-}
-
-static void pred4x4_128_dc_c(uint8_t *src, uint8_t *topright, int stride){
- ((uint32_t*)(src+0*stride))[0]=
- ((uint32_t*)(src+1*stride))[0]=
- ((uint32_t*)(src+2*stride))[0]=
- ((uint32_t*)(src+3*stride))[0]= 128U*0x01010101U;
-}
-
-
-#define LOAD_TOP_RIGHT_EDGE\
- const int t4= topright[0];\
- const int t5= topright[1];\
- const int t6= topright[2];\
- const int t7= topright[3];\
-
-#define LOAD_LEFT_EDGE\
- const int l0= src[-1+0*stride];\
- const int l1= src[-1+1*stride];\
- const int l2= src[-1+2*stride];\
- const int l3= src[-1+3*stride];\
-
-#define LOAD_TOP_EDGE\
- const int t0= src[ 0-1*stride];\
- const int t1= src[ 1-1*stride];\
- const int t2= src[ 2-1*stride];\
- const int t3= src[ 3-1*stride];\
-
-static void pred4x4_down_right_c(uint8_t *src, uint8_t *topright, int stride){
- const int lt= src[-1-1*stride];
- LOAD_TOP_EDGE
- LOAD_LEFT_EDGE
-
- src[0+3*stride]=(l3 + 2*l2 + l1 + 2)>>2;
- src[0+2*stride]=
- src[1+3*stride]=(l2 + 2*l1 + l0 + 2)>>2;
- src[0+1*stride]=
- src[1+2*stride]=
- src[2+3*stride]=(l1 + 2*l0 + lt + 2)>>2;
- src[0+0*stride]=
- src[1+1*stride]=
- src[2+2*stride]=
- src[3+3*stride]=(l0 + 2*lt + t0 + 2)>>2;
- src[1+0*stride]=
- src[2+1*stride]=
- src[3+2*stride]=(lt + 2*t0 + t1 + 2)>>2;
- src[2+0*stride]=
- src[3+1*stride]=(t0 + 2*t1 + t2 + 2)>>2;
- src[3+0*stride]=(t1 + 2*t2 + t3 + 2)>>2;
-}
-
-static void pred4x4_down_left_c(uint8_t *src, uint8_t *topright, int stride){
- LOAD_TOP_EDGE
- LOAD_TOP_RIGHT_EDGE
-// LOAD_LEFT_EDGE
-
- src[0+0*stride]=(t0 + t2 + 2*t1 + 2)>>2;
- src[1+0*stride]=
- src[0+1*stride]=(t1 + t3 + 2*t2 + 2)>>2;
- src[2+0*stride]=
- src[1+1*stride]=
- src[0+2*stride]=(t2 + t4 + 2*t3 + 2)>>2;
- src[3+0*stride]=
- src[2+1*stride]=
- src[1+2*stride]=
- src[0+3*stride]=(t3 + t5 + 2*t4 + 2)>>2;
- src[3+1*stride]=
- src[2+2*stride]=
- src[1+3*stride]=(t4 + t6 + 2*t5 + 2)>>2;
- src[3+2*stride]=
- src[2+3*stride]=(t5 + t7 + 2*t6 + 2)>>2;
- src[3+3*stride]=(t6 + 3*t7 + 2)>>2;
-}
-
-static void pred4x4_vertical_right_c(uint8_t *src, uint8_t *topright, int stride){
- const int lt= src[-1-1*stride];
- LOAD_TOP_EDGE
- LOAD_LEFT_EDGE
- const __attribute__((unused)) int unu= l3;
-
- src[0+0*stride]=
- src[1+2*stride]=(lt + t0 + 1)>>1;
- src[1+0*stride]=
- src[2+2*stride]=(t0 + t1 + 1)>>1;
- src[2+0*stride]=
- src[3+2*stride]=(t1 + t2 + 1)>>1;
- src[3+0*stride]=(t2 + t3 + 1)>>1;
- src[0+1*stride]=
- src[1+3*stride]=(l0 + 2*lt + t0 + 2)>>2;
- src[1+1*stride]=
- src[2+3*stride]=(lt + 2*t0 + t1 + 2)>>2;
- src[2+1*stride]=
- src[3+3*stride]=(t0 + 2*t1 + t2 + 2)>>2;
- src[3+1*stride]=(t1 + 2*t2 + t3 + 2)>>2;
- src[0+2*stride]=(lt + 2*l0 + l1 + 2)>>2;
- src[0+3*stride]=(l0 + 2*l1 + l2 + 2)>>2;
-}
-
-static void pred4x4_vertical_left_c(uint8_t *src, uint8_t *topright, int stride){
- LOAD_TOP_EDGE
- LOAD_TOP_RIGHT_EDGE
- const __attribute__((unused)) int unu= t7;
-
- src[0+0*stride]=(t0 + t1 + 1)>>1;
- src[1+0*stride]=
- src[0+2*stride]=(t1 + t2 + 1)>>1;
- src[2+0*stride]=
- src[1+2*stride]=(t2 + t3 + 1)>>1;
- src[3+0*stride]=
- src[2+2*stride]=(t3 + t4+ 1)>>1;
- src[3+2*stride]=(t4 + t5+ 1)>>1;
- src[0+1*stride]=(t0 + 2*t1 + t2 + 2)>>2;
- src[1+1*stride]=
- src[0+3*stride]=(t1 + 2*t2 + t3 + 2)>>2;
- src[2+1*stride]=
- src[1+3*stride]=(t2 + 2*t3 + t4 + 2)>>2;
- src[3+1*stride]=
- src[2+3*stride]=(t3 + 2*t4 + t5 + 2)>>2;
- src[3+3*stride]=(t4 + 2*t5 + t6 + 2)>>2;
-}
-
-static void pred4x4_horizontal_up_c(uint8_t *src, uint8_t *topright, int stride){
- LOAD_LEFT_EDGE
-
- src[0+0*stride]=(l0 + l1 + 1)>>1;
- src[1+0*stride]=(l0 + 2*l1 + l2 + 2)>>2;
- src[2+0*stride]=
- src[0+1*stride]=(l1 + l2 + 1)>>1;
- src[3+0*stride]=
- src[1+1*stride]=(l1 + 2*l2 + l3 + 2)>>2;
- src[2+1*stride]=
- src[0+2*stride]=(l2 + l3 + 1)>>1;
- src[3+1*stride]=
- src[1+2*stride]=(l2 + 2*l3 + l3 + 2)>>2;
- src[3+2*stride]=
- src[1+3*stride]=
- src[0+3*stride]=
- src[2+2*stride]=
- src[2+3*stride]=
- src[3+3*stride]=l3;
-}
-
-static void pred4x4_horizontal_down_c(uint8_t *src, uint8_t *topright, int stride){
- const int lt= src[-1-1*stride];
- LOAD_TOP_EDGE
- LOAD_LEFT_EDGE
- const __attribute__((unused)) int unu= t3;
-
- src[0+0*stride]=
- src[2+1*stride]=(lt + l0 + 1)>>1;
- src[1+0*stride]=
- src[3+1*stride]=(l0 + 2*lt + t0 + 2)>>2;
- src[2+0*stride]=(lt + 2*t0 + t1 + 2)>>2;
- src[3+0*stride]=(t0 + 2*t1 + t2 + 2)>>2;
- src[0+1*stride]=
- src[2+2*stride]=(l0 + l1 + 1)>>1;
- src[1+1*stride]=
- src[3+2*stride]=(lt + 2*l0 + l1 + 2)>>2;
- src[0+2*stride]=
- src[2+3*stride]=(l1 + l2+ 1)>>1;
- src[1+2*stride]=
- src[3+3*stride]=(l0 + 2*l1 + l2 + 2)>>2;
- src[0+3*stride]=(l2 + l3 + 1)>>1;
- src[1+3*stride]=(l1 + 2*l2 + l3 + 2)>>2;
-}
-
-void ff_pred16x16_vertical_c(uint8_t *src, int stride){
- int i;
- const uint32_t a= ((uint32_t*)(src-stride))[0];
- const uint32_t b= ((uint32_t*)(src-stride))[1];
- const uint32_t c= ((uint32_t*)(src-stride))[2];
- const uint32_t d= ((uint32_t*)(src-stride))[3];
-
- for(i=0; i<16; i++){
- ((uint32_t*)(src+i*stride))[0]= a;
- ((uint32_t*)(src+i*stride))[1]= b;
- ((uint32_t*)(src+i*stride))[2]= c;
- ((uint32_t*)(src+i*stride))[3]= d;
- }
-}
-
-void ff_pred16x16_horizontal_c(uint8_t *src, int stride){
- int i;
-
- for(i=0; i<16; i++){
- ((uint32_t*)(src+i*stride))[0]=
- ((uint32_t*)(src+i*stride))[1]=
- ((uint32_t*)(src+i*stride))[2]=
- ((uint32_t*)(src+i*stride))[3]= src[-1+i*stride]*0x01010101;
- }
-}
-
-void ff_pred16x16_dc_c(uint8_t *src, int stride){
- int i, dc=0;
-
- for(i=0;i<16; i++){
- dc+= src[-1+i*stride];
- }
-
- for(i=0;i<16; i++){
- dc+= src[i-stride];
- }
-
- dc= 0x01010101*((dc + 16)>>5);
-
- for(i=0; i<16; i++){
- ((uint32_t*)(src+i*stride))[0]=
- ((uint32_t*)(src+i*stride))[1]=
- ((uint32_t*)(src+i*stride))[2]=
- ((uint32_t*)(src+i*stride))[3]= dc;
- }
-}
-
-static void pred16x16_left_dc_c(uint8_t *src, int stride){
- int i, dc=0;
-
- for(i=0;i<16; i++){
- dc+= src[-1+i*stride];
- }
-
- dc= 0x01010101*((dc + 8)>>4);
-
- for(i=0; i<16; i++){
- ((uint32_t*)(src+i*stride))[0]=
- ((uint32_t*)(src+i*stride))[1]=
- ((uint32_t*)(src+i*stride))[2]=
- ((uint32_t*)(src+i*stride))[3]= dc;
- }
-}
-
-static void pred16x16_top_dc_c(uint8_t *src, int stride){
- int i, dc=0;
-
- for(i=0;i<16; i++){
- dc+= src[i-stride];
- }
- dc= 0x01010101*((dc + 8)>>4);
-
- for(i=0; i<16; i++){
- ((uint32_t*)(src+i*stride))[0]=
- ((uint32_t*)(src+i*stride))[1]=
- ((uint32_t*)(src+i*stride))[2]=
- ((uint32_t*)(src+i*stride))[3]= dc;
- }
-}
-
-void ff_pred16x16_128_dc_c(uint8_t *src, int stride){
- int i;
-
- for(i=0; i<16; i++){
- ((uint32_t*)(src+i*stride))[0]=
- ((uint32_t*)(src+i*stride))[1]=
- ((uint32_t*)(src+i*stride))[2]=
- ((uint32_t*)(src+i*stride))[3]= 0x01010101U*128U;
- }
-}
-
-static inline void pred16x16_plane_compat_c(uint8_t *src, int stride, const int svq3){
- int i, j, k;
- int a;
- uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;
- const uint8_t * const src0 = src+7-stride;
- const uint8_t *src1 = src+8*stride-1;
- const uint8_t *src2 = src1-2*stride; // == src+6*stride-1;
- int H = src0[1] - src0[-1];
- int V = src1[0] - src2[ 0];
- for(k=2; k<=8; ++k) {
- src1 += stride; src2 -= stride;
- H += k*(src0[k] - src0[-k]);
- V += k*(src1[0] - src2[ 0]);
- }
- if(svq3){
- H = ( 5*(H/4) ) / 16;
- V = ( 5*(V/4) ) / 16;
-
- /* required for 100% accuracy */
- i = H; H = V; V = i;
- }else{
- H = ( 5*H+32 ) >> 6;
- V = ( 5*V+32 ) >> 6;
- }
-
- a = 16*(src1[0] + src2[16] + 1) - 7*(V+H);
- for(j=16; j>0; --j) {
- int b = a;
- a += V;
- for(i=-16; i<0; i+=4) {
- src[16+i] = cm[ (b ) >> 5 ];
- src[17+i] = cm[ (b+ H) >> 5 ];
- src[18+i] = cm[ (b+2*H) >> 5 ];
- src[19+i] = cm[ (b+3*H) >> 5 ];
- b += 4*H;
- }
- src += stride;
- }
-}
-
-void ff_pred16x16_plane_c(uint8_t *src, int stride){
- pred16x16_plane_compat_c(src, stride, 0);
-}
-
-void ff_pred8x8_vertical_c(uint8_t *src, int stride){
- int i;
- const uint32_t a= ((uint32_t*)(src-stride))[0];
- const uint32_t b= ((uint32_t*)(src-stride))[1];
-
- for(i=0; i<8; i++){
- ((uint32_t*)(src+i*stride))[0]= a;
- ((uint32_t*)(src+i*stride))[1]= b;
- }
-}
-
-void ff_pred8x8_horizontal_c(uint8_t *src, int stride){
- int i;
-
- for(i=0; i<8; i++){
- ((uint32_t*)(src+i*stride))[0]=
- ((uint32_t*)(src+i*stride))[1]= src[-1+i*stride]*0x01010101;
- }
-}
-
-void ff_pred8x8_128_dc_c(uint8_t *src, int stride){
- int i;
-
- for(i=0; i<8; i++){
- ((uint32_t*)(src+i*stride))[0]=
- ((uint32_t*)(src+i*stride))[1]= 0x01010101U*128U;
- }
-}
-
-static void pred8x8_left_dc_c(uint8_t *src, int stride){
- int i;
- int dc0, dc2;
-
- dc0=dc2=0;
- for(i=0;i<4; i++){
- dc0+= src[-1+i*stride];
- dc2+= src[-1+(i+4)*stride];
- }
- dc0= 0x01010101*((dc0 + 2)>>2);
- dc2= 0x01010101*((dc2 + 2)>>2);
-
- for(i=0; i<4; i++){
- ((uint32_t*)(src+i*stride))[0]=
- ((uint32_t*)(src+i*stride))[1]= dc0;
- }
- for(i=4; i<8; i++){
- ((uint32_t*)(src+i*stride))[0]=
- ((uint32_t*)(src+i*stride))[1]= dc2;
- }
-}
-
-static void pred8x8_top_dc_c(uint8_t *src, int stride){
- int i;
- int dc0, dc1;
-
- dc0=dc1=0;
- for(i=0;i<4; i++){
- dc0+= src[i-stride];
- dc1+= src[4+i-stride];
- }
- dc0= 0x01010101*((dc0 + 2)>>2);
- dc1= 0x01010101*((dc1 + 2)>>2);
-
- for(i=0; i<4; i++){
- ((uint32_t*)(src+i*stride))[0]= dc0;
- ((uint32_t*)(src+i*stride))[1]= dc1;
- }
- for(i=4; i<8; i++){
- ((uint32_t*)(src+i*stride))[0]= dc0;
- ((uint32_t*)(src+i*stride))[1]= dc1;
- }
-}
-
-
-void ff_pred8x8_dc_c(uint8_t *src, int stride){
- int i;
- int dc0, dc1, dc2, dc3;
-
- dc0=dc1=dc2=0;
- for(i=0;i<4; i++){
- dc0+= src[-1+i*stride] + src[i-stride];
- dc1+= src[4+i-stride];
- dc2+= src[-1+(i+4)*stride];
- }
- dc3= 0x01010101*((dc1 + dc2 + 4)>>3);
- dc0= 0x01010101*((dc0 + 4)>>3);
- dc1= 0x01010101*((dc1 + 2)>>2);
- dc2= 0x01010101*((dc2 + 2)>>2);
-
- for(i=0; i<4; i++){
- ((uint32_t*)(src+i*stride))[0]= dc0;
- ((uint32_t*)(src+i*stride))[1]= dc1;
- }
- for(i=4; i<8; i++){
- ((uint32_t*)(src+i*stride))[0]= dc2;
- ((uint32_t*)(src+i*stride))[1]= dc3;
- }
-}
-
-void ff_pred8x8_plane_c(uint8_t *src, int stride){
- int j, k;
- int a;
- uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;
- const uint8_t * const src0 = src+3-stride;
- const uint8_t *src1 = src+4*stride-1;
- const uint8_t *src2 = src1-2*stride; // == src+2*stride-1;
- int H = src0[1] - src0[-1];
- int V = src1[0] - src2[ 0];
- for(k=2; k<=4; ++k) {
- src1 += stride; src2 -= stride;
- H += k*(src0[k] - src0[-k]);
- V += k*(src1[0] - src2[ 0]);
- }
- H = ( 17*H+16 ) >> 5;
- V = ( 17*V+16 ) >> 5;
-
- a = 16*(src1[0] + src2[8]+1) - 3*(V+H);
- for(j=8; j>0; --j) {
- int b = a;
- a += V;
- src[0] = cm[ (b ) >> 5 ];
- src[1] = cm[ (b+ H) >> 5 ];
- src[2] = cm[ (b+2*H) >> 5 ];
- src[3] = cm[ (b+3*H) >> 5 ];
- src[4] = cm[ (b+4*H) >> 5 ];
- src[5] = cm[ (b+5*H) >> 5 ];
- src[6] = cm[ (b+6*H) >> 5 ];
- src[7] = cm[ (b+7*H) >> 5 ];
- src += stride;
- }
-}
-
-#define SRC(x,y) src[(x)+(y)*stride]
-#define PL(y) \
- const int l##y = (SRC(-1,y-1) + 2*SRC(-1,y) + SRC(-1,y+1) + 2) >> 2;
-#define PREDICT_8x8_LOAD_LEFT \
- const int l0 = ((has_topleft ? SRC(-1,-1) : SRC(-1,0)) \
- + 2*SRC(-1,0) + SRC(-1,1) + 2) >> 2; \
- PL(1) PL(2) PL(3) PL(4) PL(5) PL(6) \
- const int l7 attribute_unused = (SRC(-1,6) + 3*SRC(-1,7) + 2) >> 2
-
-#define PT(x) \
- const int t##x = (SRC(x-1,-1) + 2*SRC(x,-1) + SRC(x+1,-1) + 2) >> 2;
-#define PREDICT_8x8_LOAD_TOP \
- const int t0 = ((has_topleft ? SRC(-1,-1) : SRC(0,-1)) \
- + 2*SRC(0,-1) + SRC(1,-1) + 2) >> 2; \
- PT(1) PT(2) PT(3) PT(4) PT(5) PT(6) \
- const int t7 attribute_unused = ((has_topright ? SRC(8,-1) : SRC(7,-1)) \
- + 2*SRC(7,-1) + SRC(6,-1) + 2) >> 2
-
-#define PTR(x) \
- t##x = (SRC(x-1,-1) + 2*SRC(x,-1) + SRC(x+1,-1) + 2) >> 2;
-#define PREDICT_8x8_LOAD_TOPRIGHT \
- int t8, t9, t10, t11, t12, t13, t14, t15; \
- if(has_topright) { \
- PTR(8) PTR(9) PTR(10) PTR(11) PTR(12) PTR(13) PTR(14) \
- t15 = (SRC(14,-1) + 3*SRC(15,-1) + 2) >> 2; \
- } else t8=t9=t10=t11=t12=t13=t14=t15= SRC(7,-1);
-
-#define PREDICT_8x8_LOAD_TOPLEFT \
- const int lt = (SRC(-1,0) + 2*SRC(-1,-1) + SRC(0,-1) + 2) >> 2
-
-#define PREDICT_8x8_DC(v) \
- int y; \
- for( y = 0; y < 8; y++ ) { \
- ((uint32_t*)src)[0] = \
- ((uint32_t*)src)[1] = v; \
- src += stride; \
- }
-
-static void pred8x8l_128_dc_c(uint8_t *src, int has_topleft, int has_topright, int stride)
-{
- PREDICT_8x8_DC(0x80808080);
-}
-static void pred8x8l_left_dc_c(uint8_t *src, int has_topleft, int has_topright, int stride)
-{
- PREDICT_8x8_LOAD_LEFT;
- const uint32_t dc = ((l0+l1+l2+l3+l4+l5+l6+l7+4) >> 3) * 0x01010101;
- PREDICT_8x8_DC(dc);
-}
-static void pred8x8l_top_dc_c(uint8_t *src, int has_topleft, int has_topright, int stride)
-{
- PREDICT_8x8_LOAD_TOP;
- const uint32_t dc = ((t0+t1+t2+t3+t4+t5+t6+t7+4) >> 3) * 0x01010101;
- PREDICT_8x8_DC(dc);
-}
-static void pred8x8l_dc_c(uint8_t *src, int has_topleft, int has_topright, int stride)
-{
- PREDICT_8x8_LOAD_LEFT;
- PREDICT_8x8_LOAD_TOP;
- const uint32_t dc = ((l0+l1+l2+l3+l4+l5+l6+l7
- +t0+t1+t2+t3+t4+t5+t6+t7+8) >> 4) * 0x01010101;
- PREDICT_8x8_DC(dc);
-}
-static void pred8x8l_horizontal_c(uint8_t *src, int has_topleft, int has_topright, int stride)
-{
- PREDICT_8x8_LOAD_LEFT;
-#define ROW(y) ((uint32_t*)(src+y*stride))[0] =\
- ((uint32_t*)(src+y*stride))[1] = 0x01010101 * l##y
- ROW(0); ROW(1); ROW(2); ROW(3); ROW(4); ROW(5); ROW(6); ROW(7);
-#undef ROW
-}
-static void pred8x8l_vertical_c(uint8_t *src, int has_topleft, int has_topright, int stride)
-{
- int y;
- PREDICT_8x8_LOAD_TOP;
- src[0] = t0;
- src[1] = t1;
- src[2] = t2;
- src[3] = t3;
- src[4] = t4;
- src[5] = t5;
- src[6] = t6;
- src[7] = t7;
- for( y = 1; y < 8; y++ )
- *(uint64_t*)(src+y*stride) = *(uint64_t*)src;
-}
-static void pred8x8l_down_left_c(uint8_t *src, int has_topleft, int has_topright, int stride)
-{
- PREDICT_8x8_LOAD_TOP;
- PREDICT_8x8_LOAD_TOPRIGHT;
- SRC(0,0)= (t0 + 2*t1 + t2 + 2) >> 2;
- SRC(0,1)=SRC(1,0)= (t1 + 2*t2 + t3 + 2) >> 2;
- SRC(0,2)=SRC(1,1)=SRC(2,0)= (t2 + 2*t3 + t4 + 2) >> 2;
- SRC(0,3)=SRC(1,2)=SRC(2,1)=SRC(3,0)= (t3 + 2*t4 + t5 + 2) >> 2;
- SRC(0,4)=SRC(1,3)=SRC(2,2)=SRC(3,1)=SRC(4,0)= (t4 + 2*t5 + t6 + 2) >> 2;
- SRC(0,5)=SRC(1,4)=SRC(2,3)=SRC(3,2)=SRC(4,1)=SRC(5,0)= (t5 + 2*t6 + t7 + 2) >> 2;
- SRC(0,6)=SRC(1,5)=SRC(2,4)=SRC(3,3)=SRC(4,2)=SRC(5,1)=SRC(6,0)= (t6 + 2*t7 + t8 + 2) >> 2;
- SRC(0,7)=SRC(1,6)=SRC(2,5)=SRC(3,4)=SRC(4,3)=SRC(5,2)=SRC(6,1)=SRC(7,0)= (t7 + 2*t8 + t9 + 2) >> 2;
- SRC(1,7)=SRC(2,6)=SRC(3,5)=SRC(4,4)=SRC(5,3)=SRC(6,2)=SRC(7,1)= (t8 + 2*t9 + t10 + 2) >> 2;
- SRC(2,7)=SRC(3,6)=SRC(4,5)=SRC(5,4)=SRC(6,3)=SRC(7,2)= (t9 + 2*t10 + t11 + 2) >> 2;
- SRC(3,7)=SRC(4,6)=SRC(5,5)=SRC(6,4)=SRC(7,3)= (t10 + 2*t11 + t12 + 2) >> 2;
- SRC(4,7)=SRC(5,6)=SRC(6,5)=SRC(7,4)= (t11 + 2*t12 + t13 + 2) >> 2;
- SRC(5,7)=SRC(6,6)=SRC(7,5)= (t12 + 2*t13 + t14 + 2) >> 2;
- SRC(6,7)=SRC(7,6)= (t13 + 2*t14 + t15 + 2) >> 2;
- SRC(7,7)= (t14 + 3*t15 + 2) >> 2;
-}
-static void pred8x8l_down_right_c(uint8_t *src, int has_topleft, int has_topright, int stride)
-{
- PREDICT_8x8_LOAD_TOP;
- PREDICT_8x8_LOAD_LEFT;
- PREDICT_8x8_LOAD_TOPLEFT;
- SRC(0,7)= (l7 + 2*l6 + l5 + 2) >> 2;
- SRC(0,6)=SRC(1,7)= (l6 + 2*l5 + l4 + 2) >> 2;
- SRC(0,5)=SRC(1,6)=SRC(2,7)= (l5 + 2*l4 + l3 + 2) >> 2;
- SRC(0,4)=SRC(1,5)=SRC(2,6)=SRC(3,7)= (l4 + 2*l3 + l2 + 2) >> 2;
- SRC(0,3)=SRC(1,4)=SRC(2,5)=SRC(3,6)=SRC(4,7)= (l3 + 2*l2 + l1 + 2) >> 2;
- SRC(0,2)=SRC(1,3)=SRC(2,4)=SRC(3,5)=SRC(4,6)=SRC(5,7)= (l2 + 2*l1 + l0 + 2) >> 2;
- SRC(0,1)=SRC(1,2)=SRC(2,3)=SRC(3,4)=SRC(4,5)=SRC(5,6)=SRC(6,7)= (l1 + 2*l0 + lt + 2) >> 2;
- SRC(0,0)=SRC(1,1)=SRC(2,2)=SRC(3,3)=SRC(4,4)=SRC(5,5)=SRC(6,6)=SRC(7,7)= (l0 + 2*lt + t0 + 2) >> 2;
- SRC(1,0)=SRC(2,1)=SRC(3,2)=SRC(4,3)=SRC(5,4)=SRC(6,5)=SRC(7,6)= (lt + 2*t0 + t1 + 2) >> 2;
- SRC(2,0)=SRC(3,1)=SRC(4,2)=SRC(5,3)=SRC(6,4)=SRC(7,5)= (t0 + 2*t1 + t2 + 2) >> 2;
- SRC(3,0)=SRC(4,1)=SRC(5,2)=SRC(6,3)=SRC(7,4)= (t1 + 2*t2 + t3 + 2) >> 2;
- SRC(4,0)=SRC(5,1)=SRC(6,2)=SRC(7,3)= (t2 + 2*t3 + t4 + 2) >> 2;
- SRC(5,0)=SRC(6,1)=SRC(7,2)= (t3 + 2*t4 + t5 + 2) >> 2;
- SRC(6,0)=SRC(7,1)= (t4 + 2*t5 + t6 + 2) >> 2;
- SRC(7,0)= (t5 + 2*t6 + t7 + 2) >> 2;
-
-}
-static void pred8x8l_vertical_right_c(uint8_t *src, int has_topleft, int has_topright, int stride)
-{
- PREDICT_8x8_LOAD_TOP;
- PREDICT_8x8_LOAD_LEFT;
- PREDICT_8x8_LOAD_TOPLEFT;
- SRC(0,6)= (l5 + 2*l4 + l3 + 2) >> 2;
- SRC(0,7)= (l6 + 2*l5 + l4 + 2) >> 2;
- SRC(0,4)=SRC(1,6)= (l3 + 2*l2 + l1 + 2) >> 2;
- SRC(0,5)=SRC(1,7)= (l4 + 2*l3 + l2 + 2) >> 2;
- SRC(0,2)=SRC(1,4)=SRC(2,6)= (l1 + 2*l0 + lt + 2) >> 2;
- SRC(0,3)=SRC(1,5)=SRC(2,7)= (l2 + 2*l1 + l0 + 2) >> 2;
- SRC(0,1)=SRC(1,3)=SRC(2,5)=SRC(3,7)= (l0 + 2*lt + t0 + 2) >> 2;
- SRC(0,0)=SRC(1,2)=SRC(2,4)=SRC(3,6)= (lt + t0 + 1) >> 1;
- SRC(1,1)=SRC(2,3)=SRC(3,5)=SRC(4,7)= (lt + 2*t0 + t1 + 2) >> 2;
- SRC(1,0)=SRC(2,2)=SRC(3,4)=SRC(4,6)= (t0 + t1 + 1) >> 1;
- SRC(2,1)=SRC(3,3)=SRC(4,5)=SRC(5,7)= (t0 + 2*t1 + t2 + 2) >> 2;
- SRC(2,0)=SRC(3,2)=SRC(4,4)=SRC(5,6)= (t1 + t2 + 1) >> 1;
- SRC(3,1)=SRC(4,3)=SRC(5,5)=SRC(6,7)= (t1 + 2*t2 + t3 + 2) >> 2;
- SRC(3,0)=SRC(4,2)=SRC(5,4)=SRC(6,6)= (t2 + t3 + 1) >> 1;
- SRC(4,1)=SRC(5,3)=SRC(6,5)=SRC(7,7)= (t2 + 2*t3 + t4 + 2) >> 2;
- SRC(4,0)=SRC(5,2)=SRC(6,4)=SRC(7,6)= (t3 + t4 + 1) >> 1;
- SRC(5,1)=SRC(6,3)=SRC(7,5)= (t3 + 2*t4 + t5 + 2) >> 2;
- SRC(5,0)=SRC(6,2)=SRC(7,4)= (t4 + t5 + 1) >> 1;
- SRC(6,1)=SRC(7,3)= (t4 + 2*t5 + t6 + 2) >> 2;
- SRC(6,0)=SRC(7,2)= (t5 + t6 + 1) >> 1;
- SRC(7,1)= (t5 + 2*t6 + t7 + 2) >> 2;
- SRC(7,0)= (t6 + t7 + 1) >> 1;
-}
-static void pred8x8l_horizontal_down_c(uint8_t *src, int has_topleft, int has_topright, int stride)
-{
- PREDICT_8x8_LOAD_TOP;
- PREDICT_8x8_LOAD_LEFT;
- PREDICT_8x8_LOAD_TOPLEFT;
- SRC(0,7)= (l6 + l7 + 1) >> 1;
- SRC(1,7)= (l5 + 2*l6 + l7 + 2) >> 2;
- SRC(0,6)=SRC(2,7)= (l5 + l6 + 1) >> 1;
- SRC(1,6)=SRC(3,7)= (l4 + 2*l5 + l6 + 2) >> 2;
- SRC(0,5)=SRC(2,6)=SRC(4,7)= (l4 + l5 + 1) >> 1;
- SRC(1,5)=SRC(3,6)=SRC(5,7)= (l3 + 2*l4 + l5 + 2) >> 2;
- SRC(0,4)=SRC(2,5)=SRC(4,6)=SRC(6,7)= (l3 + l4 + 1) >> 1;
- SRC(1,4)=SRC(3,5)=SRC(5,6)=SRC(7,7)= (l2 + 2*l3 + l4 + 2) >> 2;
- SRC(0,3)=SRC(2,4)=SRC(4,5)=SRC(6,6)= (l2 + l3 + 1) >> 1;
- SRC(1,3)=SRC(3,4)=SRC(5,5)=SRC(7,6)= (l1 + 2*l2 + l3 + 2) >> 2;
- SRC(0,2)=SRC(2,3)=SRC(4,4)=SRC(6,5)= (l1 + l2 + 1) >> 1;
- SRC(1,2)=SRC(3,3)=SRC(5,4)=SRC(7,5)= (l0 + 2*l1 + l2 + 2) >> 2;
- SRC(0,1)=SRC(2,2)=SRC(4,3)=SRC(6,4)= (l0 + l1 + 1) >> 1;
- SRC(1,1)=SRC(3,2)=SRC(5,3)=SRC(7,4)= (lt + 2*l0 + l1 + 2) >> 2;
- SRC(0,0)=SRC(2,1)=SRC(4,2)=SRC(6,3)= (lt + l0 + 1) >> 1;
- SRC(1,0)=SRC(3,1)=SRC(5,2)=SRC(7,3)= (l0 + 2*lt + t0 + 2) >> 2;
- SRC(2,0)=SRC(4,1)=SRC(6,2)= (t1 + 2*t0 + lt + 2) >> 2;
- SRC(3,0)=SRC(5,1)=SRC(7,2)= (t2 + 2*t1 + t0 + 2) >> 2;
- SRC(4,0)=SRC(6,1)= (t3 + 2*t2 + t1 + 2) >> 2;
- SRC(5,0)=SRC(7,1)= (t4 + 2*t3 + t2 + 2) >> 2;
- SRC(6,0)= (t5 + 2*t4 + t3 + 2) >> 2;
- SRC(7,0)= (t6 + 2*t5 + t4 + 2) >> 2;
-}
-static void pred8x8l_vertical_left_c(uint8_t *src, int has_topleft, int has_topright, int stride)
-{
- PREDICT_8x8_LOAD_TOP;
- PREDICT_8x8_LOAD_TOPRIGHT;
- SRC(0,0)= (t0 + t1 + 1) >> 1;
- SRC(0,1)= (t0 + 2*t1 + t2 + 2) >> 2;
- SRC(0,2)=SRC(1,0)= (t1 + t2 + 1) >> 1;
- SRC(0,3)=SRC(1,1)= (t1 + 2*t2 + t3 + 2) >> 2;
- SRC(0,4)=SRC(1,2)=SRC(2,0)= (t2 + t3 + 1) >> 1;
- SRC(0,5)=SRC(1,3)=SRC(2,1)= (t2 + 2*t3 + t4 + 2) >> 2;
- SRC(0,6)=SRC(1,4)=SRC(2,2)=SRC(3,0)= (t3 + t4 + 1) >> 1;
- SRC(0,7)=SRC(1,5)=SRC(2,3)=SRC(3,1)= (t3 + 2*t4 + t5 + 2) >> 2;
- SRC(1,6)=SRC(2,4)=SRC(3,2)=SRC(4,0)= (t4 + t5 + 1) >> 1;
- SRC(1,7)=SRC(2,5)=SRC(3,3)=SRC(4,1)= (t4 + 2*t5 + t6 + 2) >> 2;
- SRC(2,6)=SRC(3,4)=SRC(4,2)=SRC(5,0)= (t5 + t6 + 1) >> 1;
- SRC(2,7)=SRC(3,5)=SRC(4,3)=SRC(5,1)= (t5 + 2*t6 + t7 + 2) >> 2;
- SRC(3,6)=SRC(4,4)=SRC(5,2)=SRC(6,0)= (t6 + t7 + 1) >> 1;
- SRC(3,7)=SRC(4,5)=SRC(5,3)=SRC(6,1)= (t6 + 2*t7 + t8 + 2) >> 2;
- SRC(4,6)=SRC(5,4)=SRC(6,2)=SRC(7,0)= (t7 + t8 + 1) >> 1;
- SRC(4,7)=SRC(5,5)=SRC(6,3)=SRC(7,1)= (t7 + 2*t8 + t9 + 2) >> 2;
- SRC(5,6)=SRC(6,4)=SRC(7,2)= (t8 + t9 + 1) >> 1;
- SRC(5,7)=SRC(6,5)=SRC(7,3)= (t8 + 2*t9 + t10 + 2) >> 2;
- SRC(6,6)=SRC(7,4)= (t9 + t10 + 1) >> 1;
- SRC(6,7)=SRC(7,5)= (t9 + 2*t10 + t11 + 2) >> 2;
- SRC(7,6)= (t10 + t11 + 1) >> 1;
- SRC(7,7)= (t10 + 2*t11 + t12 + 2) >> 2;
-}
-static void pred8x8l_horizontal_up_c(uint8_t *src, int has_topleft, int has_topright, int stride)
-{
- PREDICT_8x8_LOAD_LEFT;
- SRC(0,0)= (l0 + l1 + 1) >> 1;
- SRC(1,0)= (l0 + 2*l1 + l2 + 2) >> 2;
- SRC(0,1)=SRC(2,0)= (l1 + l2 + 1) >> 1;
- SRC(1,1)=SRC(3,0)= (l1 + 2*l2 + l3 + 2) >> 2;
- SRC(0,2)=SRC(2,1)=SRC(4,0)= (l2 + l3 + 1) >> 1;
- SRC(1,2)=SRC(3,1)=SRC(5,0)= (l2 + 2*l3 + l4 + 2) >> 2;
- SRC(0,3)=SRC(2,2)=SRC(4,1)=SRC(6,0)= (l3 + l4 + 1) >> 1;
- SRC(1,3)=SRC(3,2)=SRC(5,1)=SRC(7,0)= (l3 + 2*l4 + l5 + 2) >> 2;
- SRC(0,4)=SRC(2,3)=SRC(4,2)=SRC(6,1)= (l4 + l5 + 1) >> 1;
- SRC(1,4)=SRC(3,3)=SRC(5,2)=SRC(7,1)= (l4 + 2*l5 + l6 + 2) >> 2;
- SRC(0,5)=SRC(2,4)=SRC(4,3)=SRC(6,2)= (l5 + l6 + 1) >> 1;
- SRC(1,5)=SRC(3,4)=SRC(5,3)=SRC(7,2)= (l5 + 2*l6 + l7 + 2) >> 2;
- SRC(0,6)=SRC(2,5)=SRC(4,4)=SRC(6,3)= (l6 + l7 + 1) >> 1;
- SRC(1,6)=SRC(3,5)=SRC(5,4)=SRC(7,3)= (l6 + 3*l7 + 2) >> 2;
- SRC(0,7)=SRC(1,7)=SRC(2,6)=SRC(2,7)=SRC(3,6)=
- SRC(3,7)=SRC(4,5)=SRC(4,6)=SRC(4,7)=SRC(5,5)=
- SRC(5,6)=SRC(5,7)=SRC(6,4)=SRC(6,5)=SRC(6,6)=
- SRC(6,7)=SRC(7,4)=SRC(7,5)=SRC(7,6)=SRC(7,7)= l7;
-}
-#undef PREDICT_8x8_LOAD_LEFT
-#undef PREDICT_8x8_LOAD_TOP
-#undef PREDICT_8x8_LOAD_TOPLEFT
-#undef PREDICT_8x8_LOAD_TOPRIGHT
-#undef PREDICT_8x8_DC
-#undef PTR
-#undef PT
-#undef PL
-#undef SRC
-
static inline void mc_dir_part(H264Context *h, Picture *pic, int n, int square, int chroma_height, int delta, int list,
uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
int src_x_offset, int src_y_offset,
@@ -2762,7 +1684,7 @@ static inline void mc_dir_part(H264Context *h, Picture *pic, int n, int square,
const int full_mx= mx>>2;
const int full_my= my>>2;
const int pic_width = 16*s->mb_width;
- const int pic_height = 16*s->mb_height >> MB_MBAFF;
+ const int pic_height = 16*s->mb_height >> MB_FIELD;
if(!pic->data[0]) //FIXME this is unacceptable, some senseable error concealment must be done for missing reference frames
return;
@@ -2784,11 +1706,11 @@ static inline void mc_dir_part(H264Context *h, Picture *pic, int n, int square,
qpix_op[luma_xy](dest_y + delta, src_y + delta, h->mb_linesize);
}
- if(s->flags&CODEC_FLAG_GRAY) return;
+ if(ENABLE_GRAY && s->flags&CODEC_FLAG_GRAY) return;
- if(MB_MBAFF){
+ if(MB_FIELD){
// chroma offset when predicting from a field of opposite parity
- my += 2 * ((s->mb_y & 1) - (h->ref_cache[list][scan8[n]] & 1));
+ my += 2 * ((s->mb_y & 1) - (pic->reference - 1));
emu |= (my>>3) < 0 || (my>>3) + 8 >= (pic_height>>1);
}
src_cb= pic->data[1] + (mx>>3) + (my>>3)*h->mb_uvlinesize;
@@ -2821,7 +1743,7 @@ static inline void mc_part_std(H264Context *h, int n, int square, int chroma_hei
dest_cb += x_offset + y_offset*h->mb_uvlinesize;
dest_cr += x_offset + y_offset*h->mb_uvlinesize;
x_offset += 8*s->mb_x;
- y_offset += 8*(s->mb_y >> MB_MBAFF);
+ y_offset += 8*(s->mb_y >> MB_FIELD);
if(list0){
Picture *ref= &h->ref_list[0][ h->ref_cache[0][ scan8[n] ] ];
@@ -2854,7 +1776,7 @@ static inline void mc_part_weighted(H264Context *h, int n, int square, int chrom
dest_cb += x_offset + y_offset*h->mb_uvlinesize;
dest_cr += x_offset + y_offset*h->mb_uvlinesize;
x_offset += 8*s->mb_x;
- y_offset += 8*(s->mb_y >> MB_MBAFF);
+ y_offset += 8*(s->mb_y >> MB_FIELD);
if(list0 && list1){
/* don't optimize for luma-only case, since B-frames usually
@@ -3029,7 +1951,7 @@ static void hl_motion(H264Context *h, uint8_t *dest_y, uint8_t *dest_cb, uint8_t
prefetch_motion(h, 1);
}
-static void decode_init_vlc(){
+static void decode_init_vlc(void){
static int done = 0;
if (!done) {
@@ -3068,56 +1990,9 @@ static void decode_init_vlc(){
}
}
-/**
- * Sets the intra prediction function pointers.
- */
-static void init_pred_ptrs(H264Context *h){
-// MpegEncContext * const s = &h->s;
-
- h->pred4x4[VERT_PRED ]= pred4x4_vertical_c;
- h->pred4x4[HOR_PRED ]= pred4x4_horizontal_c;
- h->pred4x4[DC_PRED ]= pred4x4_dc_c;
- h->pred4x4[DIAG_DOWN_LEFT_PRED ]= pred4x4_down_left_c;
- h->pred4x4[DIAG_DOWN_RIGHT_PRED]= pred4x4_down_right_c;
- h->pred4x4[VERT_RIGHT_PRED ]= pred4x4_vertical_right_c;
- h->pred4x4[HOR_DOWN_PRED ]= pred4x4_horizontal_down_c;
- h->pred4x4[VERT_LEFT_PRED ]= pred4x4_vertical_left_c;
- h->pred4x4[HOR_UP_PRED ]= pred4x4_horizontal_up_c;
- h->pred4x4[LEFT_DC_PRED ]= pred4x4_left_dc_c;
- h->pred4x4[TOP_DC_PRED ]= pred4x4_top_dc_c;
- h->pred4x4[DC_128_PRED ]= pred4x4_128_dc_c;
-
- h->pred8x8l[VERT_PRED ]= pred8x8l_vertical_c;
- h->pred8x8l[HOR_PRED ]= pred8x8l_horizontal_c;
- h->pred8x8l[DC_PRED ]= pred8x8l_dc_c;
- h->pred8x8l[DIAG_DOWN_LEFT_PRED ]= pred8x8l_down_left_c;
- h->pred8x8l[DIAG_DOWN_RIGHT_PRED]= pred8x8l_down_right_c;
- h->pred8x8l[VERT_RIGHT_PRED ]= pred8x8l_vertical_right_c;
- h->pred8x8l[HOR_DOWN_PRED ]= pred8x8l_horizontal_down_c;
- h->pred8x8l[VERT_LEFT_PRED ]= pred8x8l_vertical_left_c;
- h->pred8x8l[HOR_UP_PRED ]= pred8x8l_horizontal_up_c;
- h->pred8x8l[LEFT_DC_PRED ]= pred8x8l_left_dc_c;
- h->pred8x8l[TOP_DC_PRED ]= pred8x8l_top_dc_c;
- h->pred8x8l[DC_128_PRED ]= pred8x8l_128_dc_c;
-
- h->pred8x8[DC_PRED8x8 ]= ff_pred8x8_dc_c;
- h->pred8x8[VERT_PRED8x8 ]= ff_pred8x8_vertical_c;
- h->pred8x8[HOR_PRED8x8 ]= ff_pred8x8_horizontal_c;
- h->pred8x8[PLANE_PRED8x8 ]= ff_pred8x8_plane_c;
- h->pred8x8[LEFT_DC_PRED8x8]= pred8x8_left_dc_c;
- h->pred8x8[TOP_DC_PRED8x8 ]= pred8x8_top_dc_c;
- h->pred8x8[DC_128_PRED8x8 ]= ff_pred8x8_128_dc_c;
-
- h->pred16x16[DC_PRED8x8 ]= ff_pred16x16_dc_c;
- h->pred16x16[VERT_PRED8x8 ]= ff_pred16x16_vertical_c;
- h->pred16x16[HOR_PRED8x8 ]= ff_pred16x16_horizontal_c;
- h->pred16x16[PLANE_PRED8x8 ]= ff_pred16x16_plane_c;
- h->pred16x16[LEFT_DC_PRED8x8]= pred16x16_left_dc_c;
- h->pred16x16[TOP_DC_PRED8x8 ]= pred16x16_top_dc_c;
- h->pred16x16[DC_128_PRED8x8 ]= ff_pred16x16_128_dc_c;
-}
-
static void free_tables(H264Context *h){
+ int i;
+ H264Context *hx;
av_freep(&h->intra4x4_pred_mode);
av_freep(&h->chroma_pred_mode_table);
av_freep(&h->cbp_table);
@@ -3126,14 +2001,24 @@ static void free_tables(H264Context *h){
av_freep(&h->direct_table);
av_freep(&h->non_zero_count);
av_freep(&h->slice_table_base);
- av_freep(&h->top_borders[1]);
- av_freep(&h->top_borders[0]);
h->slice_table= NULL;
av_freep(&h->mb2b_xy);
av_freep(&h->mb2b8_xy);
- av_freep(&h->s.obmc_scratchpad);
+ for(i = 0; i < MAX_SPS_COUNT; i++)
+ av_freep(h->sps_buffers + i);
+
+ for(i = 0; i < MAX_PPS_COUNT; i++)
+ av_freep(h->pps_buffers + i);
+
+ for(i = 0; i < h->s.avctx->thread_count; i++) {
+ hx = h->thread_context[i];
+ if(!hx) continue;
+ av_freep(&hx->top_borders[1]);
+ av_freep(&hx->top_borders[0]);
+ av_freep(&hx->s.obmc_scratchpad);
+ }
}
static void init_dequant8_coeff_table(H264Context *h){
@@ -3214,16 +2099,12 @@ static int alloc_tables(H264Context *h){
CHECKED_ALLOCZ(h->non_zero_count , big_mb_num * 16 * sizeof(uint8_t))
CHECKED_ALLOCZ(h->slice_table_base , (big_mb_num+s->mb_stride) * sizeof(uint8_t))
- CHECKED_ALLOCZ(h->top_borders[0] , s->mb_width * (16+8+8) * sizeof(uint8_t))
- CHECKED_ALLOCZ(h->top_borders[1] , s->mb_width * (16+8+8) * sizeof(uint8_t))
CHECKED_ALLOCZ(h->cbp_table, big_mb_num * sizeof(uint16_t))
- if( h->pps.cabac ) {
- CHECKED_ALLOCZ(h->chroma_pred_mode_table, big_mb_num * sizeof(uint8_t))
- CHECKED_ALLOCZ(h->mvd_table[0], 32*big_mb_num * sizeof(uint16_t));
- CHECKED_ALLOCZ(h->mvd_table[1], 32*big_mb_num * sizeof(uint16_t));
- CHECKED_ALLOCZ(h->direct_table, 32*big_mb_num * sizeof(uint8_t));
- }
+ CHECKED_ALLOCZ(h->chroma_pred_mode_table, big_mb_num * sizeof(uint8_t))
+ CHECKED_ALLOCZ(h->mvd_table[0], 32*big_mb_num * sizeof(uint16_t));
+ CHECKED_ALLOCZ(h->mvd_table[1], 32*big_mb_num * sizeof(uint16_t));
+ CHECKED_ALLOCZ(h->direct_table, 32*big_mb_num * sizeof(uint8_t));
memset(h->slice_table_base, -1, (big_mb_num+s->mb_stride) * sizeof(uint8_t));
h->slice_table= h->slice_table_base + s->mb_stride*2 + 1;
@@ -3252,6 +2133,38 @@ fail:
return -1;
}
+/**
+ * Mimic alloc_tables(), but for every context thread.
+ */
+static void clone_tables(H264Context *dst, H264Context *src){
+ dst->intra4x4_pred_mode = src->intra4x4_pred_mode;
+ dst->non_zero_count = src->non_zero_count;
+ dst->slice_table = src->slice_table;
+ dst->cbp_table = src->cbp_table;
+ dst->mb2b_xy = src->mb2b_xy;
+ dst->mb2b8_xy = src->mb2b8_xy;
+ dst->chroma_pred_mode_table = src->chroma_pred_mode_table;
+ dst->mvd_table[0] = src->mvd_table[0];
+ dst->mvd_table[1] = src->mvd_table[1];
+ dst->direct_table = src->direct_table;
+
+ dst->s.obmc_scratchpad = NULL;
+ ff_h264_pred_init(&dst->hpc, src->s.codec_id);
+}
+
+/**
+ * Init context
+ * Allocate buffers which are not shared amongst multiple threads.
+ */
+static int context_init(H264Context *h){
+ CHECKED_ALLOCZ(h->top_borders[0], h->s.mb_width * (16+8+8) * sizeof(uint8_t))
+ CHECKED_ALLOCZ(h->top_borders[1], h->s.mb_width * (16+8+8) * sizeof(uint8_t))
+
+ return 0;
+fail:
+ return -1; // free_tables will clean up for us
+}
+
static void common_init(H264Context *h){
MpegEncContext * const s = &h->s;
@@ -3259,7 +2172,7 @@ static void common_init(H264Context *h){
s->height = s->avctx->height;
s->codec_id= s->avctx->codec->id;
- init_pred_ptrs(h);
+ ff_h264_pred_init(&h->hpc, s->codec_id);
h->dequant_coeff_pps= -1;
s->unrestricted_mv=1;
@@ -3283,6 +2196,7 @@ static int decode_init(AVCodecContext *avctx){
// set defaults
// s->decode_mb= ff_h263_decode_mb;
+ s->quarter_sample = 1;
s->low_delay= 1;
avctx->pix_fmt= PIX_FMT_YUV420P;
@@ -3296,6 +2210,7 @@ static int decode_init(AVCodecContext *avctx){
h->is_avc = 0;
}
+ h->thread_context[0] = h;
return 0;
}
@@ -3306,6 +2221,13 @@ static int frame_start(H264Context *h){
if(MPV_frame_start(s, s->avctx) < 0)
return -1;
ff_er_frame_start(s);
+ /*
+ * MPV_frame_start uses pict_type to derive key_frame.
+ * This is incorrect for H.264; IDR markings must be used.
+ * Zero here; IDR markings per slice in frame or fields are OR'd in later.
+ * See decode_nal_units().
+ */
+ s->current_picture_ptr->key_frame= 0;
assert(s->linesize && s->uvlinesize);
@@ -3322,18 +2244,19 @@ static int frame_start(H264Context *h){
/* can't be in alloc_tables because linesize isn't known there.
* FIXME: redo bipred weight to not require extra buffer? */
- if(!s->obmc_scratchpad)
- s->obmc_scratchpad = av_malloc(16*2*s->linesize + 8*2*s->uvlinesize);
+ for(i = 0; i < s->avctx->thread_count; i++)
+ if(!h->thread_context[i]->s.obmc_scratchpad)
+ h->thread_context[i]->s.obmc_scratchpad = av_malloc(16*2*s->linesize + 8*2*s->uvlinesize);
/* some macroblocks will be accessed before they're available */
- if(FRAME_MBAFF)
+ if(FRAME_MBAFF || s->avctx->thread_count > 1)
memset(h->slice_table, -1, (s->mb_height*s->mb_stride-1) * sizeof(uint8_t));
// s->decode= (s->flags&CODEC_FLAG_PSNR) || !s->encoding || s->current_picture.reference /*|| h->contains_intra*/ || 1;
return 0;
}
-static inline void backup_mb_border(H264Context *h, uint8_t *src_y, uint8_t *src_cb, uint8_t *src_cr, int linesize, int uvlinesize){
+static inline void backup_mb_border(H264Context *h, uint8_t *src_y, uint8_t *src_cb, uint8_t *src_cr, int linesize, int uvlinesize, int simple){
MpegEncContext * const s = &h->s;
int i;
@@ -3351,7 +2274,7 @@ static inline void backup_mb_border(H264Context *h, uint8_t *src_y, uint8_t *src
*(uint64_t*)(h->top_borders[0][s->mb_x]+0)= *(uint64_t*)(src_y + 16*linesize);
*(uint64_t*)(h->top_borders[0][s->mb_x]+8)= *(uint64_t*)(src_y +8+16*linesize);
- if(!(s->flags&CODEC_FLAG_GRAY)){
+ if(simple || !ENABLE_GRAY || !(s->flags&CODEC_FLAG_GRAY)){
h->left_border[17 ]= h->top_borders[0][s->mb_x][16+7];
h->left_border[17+9]= h->top_borders[0][s->mb_x][24+7];
for(i=1; i<9; i++){
@@ -3363,12 +2286,22 @@ static inline void backup_mb_border(H264Context *h, uint8_t *src_y, uint8_t *src
}
}
-static inline void xchg_mb_border(H264Context *h, uint8_t *src_y, uint8_t *src_cb, uint8_t *src_cr, int linesize, int uvlinesize, int xchg){
+static inline void xchg_mb_border(H264Context *h, uint8_t *src_y, uint8_t *src_cb, uint8_t *src_cr, int linesize, int uvlinesize, int xchg, int simple){
MpegEncContext * const s = &h->s;
int temp8, i;
uint64_t temp64;
- int deblock_left = (s->mb_x > 0);
- int deblock_top = (s->mb_y > 0);
+ int deblock_left;
+ int deblock_top;
+ int mb_xy;
+
+ if(h->deblocking_filter == 2) {
+ mb_xy = s->mb_x + s->mb_y*s->mb_stride;
+ deblock_left = h->slice_table[mb_xy] == h->slice_table[mb_xy - 1];
+ deblock_top = h->slice_table[mb_xy] == h->slice_table[h->top_mb_xy];
+ } else {
+ deblock_left = (s->mb_x > 0);
+ deblock_top = (s->mb_y > 0);
+ }
src_y -= linesize + 1;
src_cb -= uvlinesize + 1;
@@ -3394,7 +2327,7 @@ b= t;
}
}
- if(!(s->flags&CODEC_FLAG_GRAY)){
+ if(simple || !ENABLE_GRAY || !(s->flags&CODEC_FLAG_GRAY)){
if(deblock_left){
for(i = !deblock_top; i<9; i++){
XCHG(h->left_border[i+17 ], src_cb[i*uvlinesize], temp8, xchg);
@@ -3429,7 +2362,7 @@ static inline void backup_pair_border(H264Context *h, uint8_t *src_y, uint8_t *s
*(uint64_t*)(h->top_borders[1][s->mb_x]+0)= *(uint64_t*)(src_y + 33*linesize);
*(uint64_t*)(h->top_borders[1][s->mb_x]+8)= *(uint64_t*)(src_y +8+33*linesize);
- if(!(s->flags&CODEC_FLAG_GRAY)){
+ if(!ENABLE_GRAY || !(s->flags&CODEC_FLAG_GRAY)){
h->left_border[34 ]= h->top_borders[0][s->mb_x][16+7];
h->left_border[34+ 1]= h->top_borders[1][s->mb_x][16+7];
h->left_border[34+18 ]= h->top_borders[0][s->mb_x][24+7];
@@ -3481,7 +2414,7 @@ b= t;
}
}
- if(!(s->flags&CODEC_FLAG_GRAY)){
+ if(!ENABLE_GRAY || !(s->flags&CODEC_FLAG_GRAY)){
if(deblock_left){
for(i = (!deblock_top) << 1; i<18; i++){
XCHG(h->left_border[i+34 ], src_cb[i*uvlinesize], temp8, xchg);
@@ -3497,7 +2430,7 @@ b= t;
}
}
-static void av_always_inline hl_decode_mb_internal(H264Context *h, int simple){
+static av_always_inline void hl_decode_mb_internal(H264Context *h, int simple){
MpegEncContext * const s = &h->s;
const int mb_x= s->mb_x;
const int mb_y= s->mb_y;
@@ -3535,13 +2468,13 @@ static void av_always_inline hl_decode_mb_internal(H264Context *h, int simple){
continue;
if(IS_16X16(mb_type)){
int8_t *ref = &h->ref_cache[list][scan8[0]];
- fill_rectangle(ref, 4, 4, 8, 16+*ref^(s->mb_y&1), 1);
+ fill_rectangle(ref, 4, 4, 8, (16+*ref)^(s->mb_y&1), 1);
}else{
for(i=0; i<16; i+=4){
//FIXME can refs be smaller than 8x8 when !direct_8x8_inference ?
int ref = h->ref_cache[list][scan8[i]];
if(ref >= 0)
- fill_rectangle(&h->ref_cache[list][scan8[i]], 2, 2, 8, 16+ref^(s->mb_y&1), 1);
+ fill_rectangle(&h->ref_cache[list][scan8[i]], 2, 2, 8, (16+ref)^(s->mb_y&1), 1);
}
}
}
@@ -3601,11 +2534,11 @@ static void av_always_inline hl_decode_mb_internal(H264Context *h, int simple){
} else {
if(IS_INTRA(mb_type)){
if(h->deblocking_filter && (simple || !FRAME_MBAFF))
- xchg_mb_border(h, dest_y, dest_cb, dest_cr, linesize, uvlinesize, 1);
+ xchg_mb_border(h, dest_y, dest_cb, dest_cr, linesize, uvlinesize, 1, simple);
- if(simple || !(s->flags&CODEC_FLAG_GRAY)){
- h->pred8x8[ h->chroma_pred_mode ](dest_cb, uvlinesize);
- h->pred8x8[ h->chroma_pred_mode ](dest_cr, uvlinesize);
+ if(simple || !ENABLE_GRAY || !(s->flags&CODEC_FLAG_GRAY)){
+ h->hpc.pred8x8[ h->chroma_pred_mode ](dest_cb, uvlinesize);
+ h->hpc.pred8x8[ h->chroma_pred_mode ](dest_cr, uvlinesize);
}
if(IS_INTRA4x4(mb_type)){
@@ -3615,7 +2548,7 @@ static void av_always_inline hl_decode_mb_internal(H264Context *h, int simple){
uint8_t * const ptr= dest_y + block_offset[i];
const int dir= h->intra4x4_pred_mode_cache[ scan8[i] ];
const int nnz = h->non_zero_count_cache[ scan8[i] ];
- h->pred8x8l[ dir ](ptr, (h->topleft_samples_available<<i)&0x8000,
+ h->hpc.pred8x8l[ dir ](ptr, (h->topleft_samples_available<<i)&0x8000,
(h->topright_samples_available<<i)&0x4000, linesize);
if(nnz){
if(nnz == 1 && h->mb[i*16])
@@ -3642,7 +2575,7 @@ static void av_always_inline hl_decode_mb_internal(H264Context *h, int simple){
}else
topright= NULL;
- h->pred4x4[ dir ](ptr, topright, linesize);
+ h->hpc.pred4x4[ dir ](ptr, topright, linesize);
nnz = h->non_zero_count_cache[ scan8[i] ];
if(nnz){
if(is_h264){
@@ -3656,15 +2589,15 @@ static void av_always_inline hl_decode_mb_internal(H264Context *h, int simple){
}
}
}else{
- h->pred16x16[ h->intra16x16_pred_mode ](dest_y , linesize);
+ h->hpc.pred16x16[ h->intra16x16_pred_mode ](dest_y , linesize);
if(is_h264){
if(!transform_bypass)
- h264_luma_dc_dequant_idct_c(h->mb, s->qscale, h->dequant4_coeff[IS_INTRA(mb_type) ? 0:3][s->qscale][0]);
+ h264_luma_dc_dequant_idct_c(h->mb, s->qscale, h->dequant4_coeff[0][s->qscale][0]);
}else
svq3_luma_dc_dequant_idct_c(h->mb, s->qscale);
}
if(h->deblocking_filter && (simple || !FRAME_MBAFF))
- xchg_mb_border(h, dest_y, dest_cb, dest_cr, linesize, uvlinesize, 0);
+ xchg_mb_border(h, dest_y, dest_cb, dest_cr, linesize, uvlinesize, 0, simple);
}else if(is_h264){
hl_motion(h, dest_y, dest_cb, dest_cr,
s->me.qpel_put, s->dsp.put_h264_chroma_pixels_tab,
@@ -3704,15 +2637,15 @@ static void av_always_inline hl_decode_mb_internal(H264Context *h, int simple){
}
}
- if(simple || !(s->flags&CODEC_FLAG_GRAY)){
+ if(simple || !ENABLE_GRAY || !(s->flags&CODEC_FLAG_GRAY)){
uint8_t *dest[2] = {dest_cb, dest_cr};
if(transform_bypass){
idct_add = idct_dc_add = s->dsp.add_pixels4;
}else{
idct_add = s->dsp.h264_idct_add;
idct_dc_add = s->dsp.h264_idct_dc_add;
- chroma_dc_dequant_idct_c(h->mb + 16*16, h->chroma_qp, h->dequant4_coeff[IS_INTRA(mb_type) ? 1:4][h->chroma_qp][0]);
- chroma_dc_dequant_idct_c(h->mb + 16*16+4*16, h->chroma_qp, h->dequant4_coeff[IS_INTRA(mb_type) ? 2:5][h->chroma_qp][0]);
+ chroma_dc_dequant_idct_c(h->mb + 16*16, h->chroma_qp[0], h->dequant4_coeff[IS_INTRA(mb_type) ? 1:4][h->chroma_qp[0]][0]);
+ chroma_dc_dequant_idct_c(h->mb + 16*16+4*16, h->chroma_qp[1], h->dequant4_coeff[IS_INTRA(mb_type) ? 2:5][h->chroma_qp[1]][0]);
}
if(is_h264){
for(i=16; i<16+8; i++){
@@ -3754,17 +2687,19 @@ static void av_always_inline hl_decode_mb_internal(H264Context *h, int simple){
s->mb_y--;
tprintf(h->s.avctx, "call mbaff filter_mb mb_x:%d mb_y:%d pair_dest_y = %p, dest_y = %p\n", mb_x, mb_y, pair_dest_y, dest_y);
fill_caches(h, mb_type_top, 1); //FIXME don't fill stuff which isn't used by filter_mb
- h->chroma_qp = get_chroma_qp(h->pps.chroma_qp_index_offset, s->current_picture.qscale_table[mb_xy]);
+ h->chroma_qp[0] = get_chroma_qp(h, 0, s->current_picture.qscale_table[mb_xy]);
+ h->chroma_qp[1] = get_chroma_qp(h, 1, s->current_picture.qscale_table[mb_xy]);
filter_mb(h, mb_x, mb_y, pair_dest_y, pair_dest_cb, pair_dest_cr, linesize, uvlinesize);
// bottom
s->mb_y++;
tprintf(h->s.avctx, "call mbaff filter_mb\n");
fill_caches(h, mb_type_bottom, 1); //FIXME don't fill stuff which isn't used by filter_mb
- h->chroma_qp = get_chroma_qp(h->pps.chroma_qp_index_offset, s->current_picture.qscale_table[mb_xy+s->mb_stride]);
+ h->chroma_qp[0] = get_chroma_qp(h, 0, s->current_picture.qscale_table[mb_xy+s->mb_stride]);
+ h->chroma_qp[1] = get_chroma_qp(h, 1, s->current_picture.qscale_table[mb_xy+s->mb_stride]);
filter_mb(h, mb_x, mb_y+1, dest_y, dest_cb, dest_cr, linesize, uvlinesize);
} else {
tprintf(h->s.avctx, "call filter_mb\n");
- backup_mb_border(h, dest_y, dest_cb, dest_cr, linesize, uvlinesize);
+ backup_mb_border(h, dest_y, dest_cb, dest_cr, linesize, uvlinesize, simple);
fill_caches(h, mb_type, 1); //FIXME don't fill stuff which isn't used by filter_mb
filter_mb_fast(h, mb_x, mb_y, dest_y, dest_cb, dest_cr, linesize, uvlinesize);
}
@@ -3791,7 +2726,7 @@ static void hl_decode_mb(H264Context *h){
const int mb_y= s->mb_y;
const int mb_xy= mb_x + mb_y*s->mb_stride;
const int mb_type= s->current_picture.mb_type[mb_xy];
- int is_complex = FRAME_MBAFF || MB_FIELD || IS_INTRA_PCM(mb_type) || s->codec_id != CODEC_ID_H264 || (s->flags&CODEC_FLAG_GRAY) || s->encoding;
+ int is_complex = FRAME_MBAFF || MB_FIELD || IS_INTRA_PCM(mb_type) || s->codec_id != CODEC_ID_H264 || (ENABLE_GRAY && (s->flags&CODEC_FLAG_GRAY)) || s->encoding;
if(!s->decode)
return;
@@ -3801,6 +2736,105 @@ static void hl_decode_mb(H264Context *h){
else hl_decode_mb_simple(h);
}
+static void pic_as_field(Picture *pic, const int parity){
+ int i;
+ for (i = 0; i < 4; ++i) {
+ if (parity == PICT_BOTTOM_FIELD)
+ pic->data[i] += pic->linesize[i];
+ pic->reference = parity;
+ pic->linesize[i] *= 2;
+ }
+}
+
+static int split_field_copy(Picture *dest, Picture *src,
+ int parity, int id_add){
+ int match = !!(src->reference & parity);
+
+ if (match) {
+ *dest = *src;
+ pic_as_field(dest, parity);
+ dest->pic_id *= 2;
+ dest->pic_id += id_add;
+ }
+
+ return match;
+}
+
+/**
+ * Split one reference list into field parts, interleaving by parity
+ * as per H.264 spec section 8.2.4.2.5. Output fields have their data pointers
+ * set to look at the actual start of data for that field.
+ *
+ * @param dest output list
+ * @param dest_len maximum number of fields to put in dest
+ * @param src the source reference list containing fields and/or field pairs
+ * (aka short_ref/long_ref, or
+ * refFrameListXShortTerm/refFrameListLongTerm in spec-speak)
+ * @param src_len number of Picture's in source (pairs and unmatched fields)
+ * @param parity the parity of the picture being decoded/needing
+ * these ref pics (PICT_{TOP,BOTTOM}_FIELD)
+ * @return number of fields placed in dest
+ */
+static int split_field_half_ref_list(Picture *dest, int dest_len,
+ Picture *src, int src_len, int parity){
+ int same_parity = 1;
+ int same_i = 0;
+ int opp_i = 0;
+ int out_i;
+ int field_output;
+
+ for (out_i = 0; out_i < dest_len; out_i += field_output) {
+ if (same_parity && same_i < src_len) {
+ field_output = split_field_copy(dest + out_i, src + same_i,
+ parity, 1);
+ same_parity = !field_output;
+ same_i++;
+
+ } else if (opp_i < src_len) {
+ field_output = split_field_copy(dest + out_i, src + opp_i,
+ PICT_FRAME - parity, 0);
+ same_parity = field_output;
+ opp_i++;
+
+ } else {
+ break;
+ }
+ }
+
+ return out_i;
+}
+
+/**
+ * Split the reference frame list into a reference field list.
+ * This implements H.264 spec 8.2.4.2.5 for a combined input list.
+ * The input list contains both reference field pairs and
+ * unmatched reference fields; it is ordered as spec describes
+ * RefPicListX for frames in 8.2.4.2.1 and 8.2.4.2.3, except that
+ * unmatched field pairs are also present. Conceptually this is equivalent
+ * to concatenation of refFrameListXShortTerm with refFrameListLongTerm.
+ *
+ * @param dest output reference list where ordered fields are to be placed
+ * @param dest_len max number of fields to place at dest
+ * @param src source reference list, as described above
+ * @param src_len number of pictures (pairs and unmatched fields) in src
+ * @param parity parity of field being currently decoded
+ * (one of PICT_{TOP,BOTTOM}_FIELD)
+ * @param long_i index into src array that holds first long reference picture,
+ * or src_len if no long refs present.
+ */
+static int split_field_ref_list(Picture *dest, int dest_len,
+ Picture *src, int src_len,
+ int parity, int long_i){
+
+ int i = split_field_half_ref_list(dest, dest_len, src, long_i, parity);
+ dest += i;
+ dest_len -= i;
+
+ i += split_field_half_ref_list(dest, dest_len, src + long_i,
+ src_len - long_i, parity);
+ return i;
+}
+
/**
* fills the default_ref_list.
*/
@@ -3808,9 +2842,25 @@ static int fill_default_ref_list(H264Context *h){
MpegEncContext * const s = &h->s;
int i;
int smallest_poc_greater_than_current = -1;
+ int structure_sel;
Picture sorted_short_ref[32];
+ Picture field_entry_list[2][32];
+ Picture *frame_list[2];
+
+ if (FIELD_PICTURE) {
+ structure_sel = PICT_FRAME;
+ frame_list[0] = field_entry_list[0];
+ frame_list[1] = field_entry_list[1];
+ } else {
+ structure_sel = 0;
+ frame_list[0] = h->default_ref_list[0];
+ frame_list[1] = h->default_ref_list[1];
+ }
if(h->slice_type==B_TYPE){
+ int list;
+ int len[2];
+ int short_len[2];
int out_i;
int limit= INT_MIN;
@@ -3838,71 +2888,92 @@ static int fill_default_ref_list(H264Context *h){
}
}
}
- }
- if(s->picture_structure == PICT_FRAME){
- if(h->slice_type==B_TYPE){
- int list;
- tprintf(h->s.avctx, "current poc: %d, smallest_poc_greater_than_current: %d\n", s->current_picture_ptr->poc, smallest_poc_greater_than_current);
+ tprintf(h->s.avctx, "current poc: %d, smallest_poc_greater_than_current: %d\n", s->current_picture_ptr->poc, smallest_poc_greater_than_current);
- // find the largest poc
- for(list=0; list<2; list++){
- int index = 0;
- int j= -99;
- int step= list ? -1 : 1;
-
- for(i=0; i<h->short_ref_count && index < h->ref_count[list]; i++, j+=step) {
- while(j<0 || j>= h->short_ref_count){
- if(j != -99 && step == (list ? -1 : 1))
- return -1;
- step = -step;
- j= smallest_poc_greater_than_current + (step>>1);
- }
- if(sorted_short_ref[j].reference != 3) continue;
- h->default_ref_list[list][index ]= sorted_short_ref[j];
- h->default_ref_list[list][index++].pic_id= sorted_short_ref[j].frame_num;
+ // find the largest poc
+ for(list=0; list<2; list++){
+ int index = 0;
+ int j= -99;
+ int step= list ? -1 : 1;
+
+ for(i=0; i<h->short_ref_count && index < h->ref_count[list]; i++, j+=step) {
+ int sel;
+ while(j<0 || j>= h->short_ref_count){
+ if(j != -99 && step == (list ? -1 : 1))
+ return -1;
+ step = -step;
+ j= smallest_poc_greater_than_current + (step>>1);
}
+ sel = sorted_short_ref[j].reference | structure_sel;
+ if(sel != PICT_FRAME) continue;
+ frame_list[list][index ]= sorted_short_ref[j];
+ frame_list[list][index++].pic_id= sorted_short_ref[j].frame_num;
+ }
+ short_len[list] = index;
- for(i = 0; i < 16 && index < h->ref_count[ list ]; i++){
- if(h->long_ref[i] == NULL) continue;
- if(h->long_ref[i]->reference != 3) continue;
+ for(i = 0; i < 16 && index < h->ref_count[ list ]; i++){
+ int sel;
+ if(h->long_ref[i] == NULL) continue;
+ sel = h->long_ref[i]->reference | structure_sel;
+ if(sel != PICT_FRAME) continue;
- h->default_ref_list[ list ][index ]= *h->long_ref[i];
- h->default_ref_list[ list ][index++].pic_id= i;;
- }
+ frame_list[ list ][index ]= *h->long_ref[i];
+ frame_list[ list ][index++].pic_id= i;
+ }
+ len[list] = index;
+ }
- if(list && (smallest_poc_greater_than_current<=0 || smallest_poc_greater_than_current>=h->short_ref_count) && (1 < index)){
- // swap the two first elements of L1 when
- // L0 and L1 are identical
- Picture temp= h->default_ref_list[1][0];
- h->default_ref_list[1][0] = h->default_ref_list[1][1];
- h->default_ref_list[1][1] = temp;
- }
+ for(list=0; list<2; list++){
+ if (FIELD_PICTURE)
+ len[list] = split_field_ref_list(h->default_ref_list[list],
+ h->ref_count[list],
+ frame_list[list],
+ len[list],
+ s->picture_structure,
+ short_len[list]);
+
+ // swap the two first elements of L1 when L0 and L1 are identical
+ if(list && len[0] > 1 && len[0] == len[1])
+ for(i=0; h->default_ref_list[0][i].data[0] == h->default_ref_list[1][i].data[0]; i++)
+ if(i == len[0]){
+ FFSWAP(Picture, h->default_ref_list[1][0], h->default_ref_list[1][1]);
+ break;
+ }
- if(index < h->ref_count[ list ])
- memset(&h->default_ref_list[list][index], 0, sizeof(Picture)*(h->ref_count[ list ] - index));
- }
- }else{
- int index=0;
- for(i=0; i<h->short_ref_count; i++){
- if(h->short_ref[i]->reference != 3) continue; //FIXME refernce field shit
- h->default_ref_list[0][index ]= *h->short_ref[i];
- h->default_ref_list[0][index++].pic_id= h->short_ref[i]->frame_num;
- }
- for(i = 0; i < 16; i++){
- if(h->long_ref[i] == NULL) continue;
- if(h->long_ref[i]->reference != 3) continue;
- h->default_ref_list[0][index ]= *h->long_ref[i];
- h->default_ref_list[0][index++].pic_id= i;;
- }
- if(index < h->ref_count[0])
- memset(&h->default_ref_list[0][index], 0, sizeof(Picture)*(h->ref_count[0] - index));
+ if(len[list] < h->ref_count[ list ])
+ memset(&h->default_ref_list[list][len[list]], 0, sizeof(Picture)*(h->ref_count[ list ] - len[list]));
}
- }else{ //FIELD
- if(h->slice_type==B_TYPE){
- }else{
- //FIXME second field balh
+
+
+ }else{
+ int index=0;
+ int short_len;
+ for(i=0; i<h->short_ref_count; i++){
+ int sel;
+ sel = h->short_ref[i]->reference | structure_sel;
+ if(sel != PICT_FRAME) continue;
+ frame_list[0][index ]= *h->short_ref[i];
+ frame_list[0][index++].pic_id= h->short_ref[i]->frame_num;
+ }
+ short_len = index;
+ for(i = 0; i < 16; i++){
+ int sel;
+ if(h->long_ref[i] == NULL) continue;
+ sel = h->long_ref[i]->reference | structure_sel;
+ if(sel != PICT_FRAME) continue;
+ frame_list[0][index ]= *h->long_ref[i];
+ frame_list[0][index++].pic_id= i;
}
+
+ if (FIELD_PICTURE)
+ index = split_field_ref_list(h->default_ref_list[0],
+ h->ref_count[0], frame_list[0],
+ index, s->picture_structure,
+ short_len);
+
+ if(index < h->ref_count[0])
+ memset(&h->default_ref_list[0][index], 0, sizeof(Picture)*(h->ref_count[0] - index));
}
#ifdef TRACE
for (i=0; i<h->ref_count[0]; i++) {
@@ -3910,7 +2981,7 @@ static int fill_default_ref_list(H264Context *h){
}
if(h->slice_type==B_TYPE){
for (i=0; i<h->ref_count[1]; i++) {
- tprintf(h->s.avctx, "List1: %s fn:%d 0x%p\n", (h->default_ref_list[1][i].long_ref ? "LT" : "ST"), h->default_ref_list[1][i].pic_id, h->default_ref_list[0][i].data[0]);
+ tprintf(h->s.avctx, "List1: %s fn:%d 0x%p\n", (h->default_ref_list[1][i].long_ref ? "LT" : "ST"), h->default_ref_list[1][i].pic_id, h->default_ref_list[1][i].data[0]);
}
}
#endif
@@ -3920,9 +2991,33 @@ static int fill_default_ref_list(H264Context *h){
static void print_short_term(H264Context *h);
static void print_long_term(H264Context *h);
+/**
+ * Extract structure information about the picture described by pic_num in
+ * the current decoding context (frame or field). Note that pic_num is
+ * picture number without wrapping (so, 0<=pic_num<max_pic_num).
+ * @param pic_num picture number for which to extract structure information
+ * @param structure one of PICT_XXX describing structure of picture
+ * with pic_num
+ * @return frame number (short term) or long term index of picture
+ * described by pic_num
+ */
+static int pic_num_extract(H264Context *h, int pic_num, int *structure){
+ MpegEncContext * const s = &h->s;
+
+ *structure = s->picture_structure;
+ if(FIELD_PICTURE){
+ if (!(pic_num & 1))
+ /* opposite field */
+ *structure ^= PICT_FRAME;
+ pic_num >>= 1;
+ }
+
+ return pic_num;
+}
+
static int decode_ref_pic_list_reordering(H264Context *h){
MpegEncContext * const s = &h->s;
- int list, index;
+ int list, index, pic_structure;
print_short_term(h);
print_long_term(h);
@@ -3951,8 +3046,9 @@ static int decode_ref_pic_list_reordering(H264Context *h){
if(reordering_of_pic_nums_idc<3){
if(reordering_of_pic_nums_idc<2){
const unsigned int abs_diff_pic_num= get_ue_golomb(&s->gb) + 1;
+ int frame_num;
- if(abs_diff_pic_num >= h->max_pic_num){
+ if(abs_diff_pic_num > h->max_pic_num){
av_log(h->s.avctx, AV_LOG_ERROR, "abs_diff_pic_num overflow\n");
return -1;
}
@@ -3961,25 +3057,34 @@ static int decode_ref_pic_list_reordering(H264Context *h){
else pred+= abs_diff_pic_num;
pred &= h->max_pic_num - 1;
+ frame_num = pic_num_extract(h, pred, &pic_structure);
+
for(i= h->short_ref_count-1; i>=0; i--){
ref = h->short_ref[i];
- assert(ref->reference == 3);
+ assert(ref->reference);
assert(!ref->long_ref);
- if(ref->data[0] != NULL && ref->frame_num == pred && ref->long_ref == 0) // ignore non existing pictures by testing data[0] pointer
+ if(ref->data[0] != NULL &&
+ ref->frame_num == frame_num &&
+ (ref->reference & pic_structure) &&
+ ref->long_ref == 0) // ignore non existing pictures by testing data[0] pointer
break;
}
if(i>=0)
- ref->pic_id= ref->frame_num;
+ ref->pic_id= pred;
}else{
+ int long_idx;
pic_id= get_ue_golomb(&s->gb); //long_term_pic_idx
- if(pic_id>31){
+
+ long_idx= pic_num_extract(h, pic_id, &pic_structure);
+
+ if(long_idx>31){
av_log(h->s.avctx, AV_LOG_ERROR, "long_term_pic_idx overflow\n");
return -1;
}
- ref = h->long_ref[pic_id];
- if(ref){
+ ref = h->long_ref[long_idx];
+ assert(!(ref && !ref->reference));
+ if(ref && (ref->reference & pic_structure)){
ref->pic_id= pic_id;
- assert(ref->reference == 3);
assert(ref->long_ref);
i=0;
}else{
@@ -3999,6 +3104,9 @@ static int decode_ref_pic_list_reordering(H264Context *h){
h->ref_list[list][i]= h->ref_list[list][i-1];
}
h->ref_list[list][index]= *ref;
+ if (FIELD_PICTURE){
+ pic_as_field(&h->ref_list[list][index], pic_structure);
+ }
}
}else{
av_log(h->s.avctx, AV_LOG_ERROR, "illegal reordering_of_pic_nums_idc\n");
@@ -4029,9 +3137,11 @@ static void fill_mbaff_ref_list(H264Context *h){
field[0] = *frame;
for(j=0; j<3; j++)
field[0].linesize[j] <<= 1;
+ field[0].reference = PICT_TOP_FIELD;
field[1] = field[0];
for(j=0; j<3; j++)
field[1].data[j] += frame->linesize[j];
+ field[1].reference = PICT_BOTTOM_FIELD;
h->luma_weight[list][16+2*i] = h->luma_weight[list][16+2*i+1] = h->luma_weight[list][i];
h->luma_offset[list][16+2*i] = h->luma_offset[list][16+2*i+1] = h->luma_offset[list][i];
@@ -4137,17 +3247,32 @@ static void implicit_weight_table(H264Context *h){
}
}
-static inline void unreference_pic(H264Context *h, Picture *pic){
+/**
+ * Mark a picture as no longer needed for reference. The refmask
+ * argument allows unreferencing of individual fields or the whole frame.
+ * If the picture becomes entirely unreferenced, but is being held for
+ * display purposes, it is marked as such.
+ * @param refmask mask of fields to unreference; the mask is bitwise
+ * anded with the reference marking of pic
+ * @return non-zero if pic becomes entirely unreferenced (except possibly
+ * for display purposes) zero if one of the fields remains in
+ * reference
+ */
+static inline int unreference_pic(H264Context *h, Picture *pic, int refmask){
int i;
- pic->reference=0;
- if(pic == h->delayed_output_pic)
- pic->reference=1;
- else{
- for(i = 0; h->delayed_pic[i]; i++)
- if(pic == h->delayed_pic[i]){
- pic->reference=1;
- break;
- }
+ if (pic->reference &= refmask) {
+ return 0;
+ } else {
+ if(pic == h->delayed_output_pic)
+ pic->reference=DELAYED_PIC_REF;
+ else{
+ for(i = 0; h->delayed_pic[i]; i++)
+ if(pic == h->delayed_pic[i]){
+ pic->reference=DELAYED_PIC_REF;
+ break;
+ }
+ }
+ return 1;
}
}
@@ -4159,14 +3284,14 @@ static void idr(H264Context *h){
for(i=0; i<16; i++){
if (h->long_ref[i] != NULL) {
- unreference_pic(h, h->long_ref[i]);
+ unreference_pic(h, h->long_ref[i], 0);
h->long_ref[i]= NULL;
}
}
h->long_ref_count=0;
for(i=0; i<h->short_ref_count; i++){
- unreference_pic(h, h->short_ref[i]);
+ unreference_pic(h, h->short_ref[i], 0);
h->short_ref[i]= NULL;
}
h->short_ref_count=0;
@@ -4187,27 +3312,28 @@ static void flush_dpb(AVCodecContext *avctx){
idr(h);
if(h->s.current_picture_ptr)
h->s.current_picture_ptr->reference= 0;
+ h->s.first_field= 0;
+ ff_mpeg_flush(avctx);
}
/**
- *
- * @return the removed picture or NULL if an error occurs
+ * Find a Picture in the short term reference list by frame number.
+ * @param frame_num frame number to search for
+ * @param idx the index into h->short_ref where returned picture is found
+ * undefined if no picture found.
+ * @return pointer to the found picture, or NULL if no pic with the provided
+ * frame number is found
*/
-static Picture * remove_short(H264Context *h, int frame_num){
+static Picture * find_short(H264Context *h, int frame_num, int *idx){
MpegEncContext * const s = &h->s;
int i;
- if(s->avctx->debug&FF_DEBUG_MMCO)
- av_log(h->s.avctx, AV_LOG_DEBUG, "remove short %d count %d\n", frame_num, h->short_ref_count);
-
for(i=0; i<h->short_ref_count; i++){
Picture *pic= h->short_ref[i];
if(s->avctx->debug&FF_DEBUG_MMCO)
av_log(h->s.avctx, AV_LOG_DEBUG, "%d %d %p\n", i, pic->frame_num, pic);
- if(pic->frame_num == frame_num){
- h->short_ref[i]= NULL;
- memmove(&h->short_ref[i], &h->short_ref[i+1], (h->short_ref_count - i - 1)*sizeof(Picture*));
- h->short_ref_count--;
+ if(pic->frame_num == frame_num) {
+ *idx = i;
return pic;
}
}
@@ -4215,6 +3341,49 @@ static Picture * remove_short(H264Context *h, int frame_num){
}
/**
+ * Remove a picture from the short term reference list by its index in
+ * that list. This does no checking on the provided index; it is assumed
+ * to be valid. Other list entries are shifted down.
+ * @param i index into h->short_ref of picture to remove.
+ */
+static void remove_short_at_index(H264Context *h, int i){
+ assert(i > 0 && i < h->short_ref_count);
+ h->short_ref[i]= NULL;
+ if (--h->short_ref_count)
+ memmove(&h->short_ref[i], &h->short_ref[i+1], (h->short_ref_count - i)*sizeof(Picture*));
+}
+
+/**
+ *
+ * @return the removed picture or NULL if an error occurs
+ */
+static Picture * remove_short(H264Context *h, int frame_num){
+ MpegEncContext * const s = &h->s;
+ Picture *pic;
+ int i;
+
+ if(s->avctx->debug&FF_DEBUG_MMCO)
+ av_log(h->s.avctx, AV_LOG_DEBUG, "remove short %d count %d\n", frame_num, h->short_ref_count);
+
+ pic = find_short(h, frame_num, &i);
+ if (pic)
+ remove_short_at_index(h, i);
+
+ return pic;
+}
+
+/**
+ * Remove a picture from the long term reference list by its index in
+ * that list. This does no checking on the provided index; it is assumed
+ * to be valid. The removed entry is set to NULL. Other entries are unaffected.
+ * @param i index into h->long_ref of picture to remove.
+ */
+static void remove_long_at_index(H264Context *h, int i){
+ h->long_ref[i]= NULL;
+ h->long_ref_count--;
+}
+
+/**
*
* @return the removed picture or NULL if an error occurs
*/
@@ -4222,8 +3391,8 @@ static Picture * remove_long(H264Context *h, int i){
Picture *pic;
pic= h->long_ref[i];
- h->long_ref[i]= NULL;
- if(pic) h->long_ref_count--;
+ if (pic)
+ remove_long_at_index(h, i);
return pic;
}
@@ -4264,77 +3433,143 @@ static void print_long_term(H264Context *h) {
static int execute_ref_pic_marking(H264Context *h, MMCO *mmco, int mmco_count){
MpegEncContext * const s = &h->s;
int i, j;
- int current_is_long=0;
+ int current_ref_assigned=0;
Picture *pic;
if((s->avctx->debug&FF_DEBUG_MMCO) && mmco_count==0)
av_log(h->s.avctx, AV_LOG_DEBUG, "no mmco here\n");
for(i=0; i<mmco_count; i++){
+ int structure, frame_num, unref_pic;
if(s->avctx->debug&FF_DEBUG_MMCO)
- av_log(h->s.avctx, AV_LOG_DEBUG, "mmco:%d %d %d\n", h->mmco[i].opcode, h->mmco[i].short_frame_num, h->mmco[i].long_index);
+ av_log(h->s.avctx, AV_LOG_DEBUG, "mmco:%d %d %d\n", h->mmco[i].opcode, h->mmco[i].short_pic_num, h->mmco[i].long_arg);
switch(mmco[i].opcode){
case MMCO_SHORT2UNUSED:
- pic= remove_short(h, mmco[i].short_frame_num);
- if(pic)
- unreference_pic(h, pic);
- else if(s->avctx->debug&FF_DEBUG_MMCO)
- av_log(h->s.avctx, AV_LOG_DEBUG, "mmco: remove_short() failure\n");
+ if(s->avctx->debug&FF_DEBUG_MMCO)
+ av_log(h->s.avctx, AV_LOG_DEBUG, "mmco: unref short %d count %d\n", h->mmco[i].short_pic_num, h->short_ref_count);
+ frame_num = pic_num_extract(h, mmco[i].short_pic_num, &structure);
+ pic = find_short(h, frame_num, &j);
+ if (pic) {
+ if (unreference_pic(h, pic, structure ^ PICT_FRAME))
+ remove_short_at_index(h, j);
+ } else if(s->avctx->debug&FF_DEBUG_MMCO)
+ av_log(h->s.avctx, AV_LOG_DEBUG, "mmco: unref short failure\n");
break;
case MMCO_SHORT2LONG:
- pic= remove_long(h, mmco[i].long_index);
- if(pic) unreference_pic(h, pic);
+ if (FIELD_PICTURE && mmco[i].long_arg < h->long_ref_count &&
+ h->long_ref[mmco[i].long_arg]->frame_num ==
+ mmco[i].short_pic_num / 2) {
+ /* do nothing, we've already moved this field pair. */
+ } else {
+ int frame_num = mmco[i].short_pic_num >> FIELD_PICTURE;
- h->long_ref[ mmco[i].long_index ]= remove_short(h, mmco[i].short_frame_num);
- if (h->long_ref[ mmco[i].long_index ]){
- h->long_ref[ mmco[i].long_index ]->long_ref=1;
- h->long_ref_count++;
+ pic= remove_long(h, mmco[i].long_arg);
+ if(pic) unreference_pic(h, pic, 0);
+
+ h->long_ref[ mmco[i].long_arg ]= remove_short(h, frame_num);
+ if (h->long_ref[ mmco[i].long_arg ]){
+ h->long_ref[ mmco[i].long_arg ]->long_ref=1;
+ h->long_ref_count++;
+ }
}
break;
case MMCO_LONG2UNUSED:
- pic= remove_long(h, mmco[i].long_index);
- if(pic)
- unreference_pic(h, pic);
- else if(s->avctx->debug&FF_DEBUG_MMCO)
- av_log(h->s.avctx, AV_LOG_DEBUG, "mmco: remove_long() failure\n");
+ j = pic_num_extract(h, mmco[i].long_arg, &structure);
+ pic = h->long_ref[j];
+ if (pic) {
+ if (unreference_pic(h, pic, structure ^ PICT_FRAME))
+ remove_long_at_index(h, j);
+ } else if(s->avctx->debug&FF_DEBUG_MMCO)
+ av_log(h->s.avctx, AV_LOG_DEBUG, "mmco: unref long failure\n");
break;
case MMCO_LONG:
- pic= remove_long(h, mmco[i].long_index);
- if(pic) unreference_pic(h, pic);
+ unref_pic = 1;
+ if (FIELD_PICTURE && !s->first_field) {
+ if (h->long_ref[mmco[i].long_arg] == s->current_picture_ptr) {
+ /* Just mark second field as referenced */
+ unref_pic = 0;
+ } else if (s->current_picture_ptr->reference) {
+ /* First field in pair is in short term list or
+ * at a different long term index.
+ * This is not allowed; see 7.4.3, notes 2 and 3.
+ * Report the problem and keep the pair where it is,
+ * and mark this field valid.
+ */
+ av_log(h->s.avctx, AV_LOG_ERROR,
+ "illegal long term reference assignment for second "
+ "field in complementary field pair (first field is "
+ "short term or has non-matching long index)\n");
+ unref_pic = 0;
+ }
+ }
- h->long_ref[ mmco[i].long_index ]= s->current_picture_ptr;
- h->long_ref[ mmco[i].long_index ]->long_ref=1;
- h->long_ref_count++;
+ if (unref_pic) {
+ pic= remove_long(h, mmco[i].long_arg);
+ if(pic) unreference_pic(h, pic, 0);
- current_is_long=1;
+ h->long_ref[ mmco[i].long_arg ]= s->current_picture_ptr;
+ h->long_ref[ mmco[i].long_arg ]->long_ref=1;
+ h->long_ref_count++;
+ }
+
+ s->current_picture_ptr->reference |= s->picture_structure;
+ current_ref_assigned=1;
break;
case MMCO_SET_MAX_LONG:
- assert(mmco[i].long_index <= 16);
+ assert(mmco[i].long_arg <= 16);
// just remove the long term which index is greater than new max
- for(j = mmco[i].long_index; j<16; j++){
+ for(j = mmco[i].long_arg; j<16; j++){
pic = remove_long(h, j);
- if (pic) unreference_pic(h, pic);
+ if (pic) unreference_pic(h, pic, 0);
}
break;
case MMCO_RESET:
while(h->short_ref_count){
pic= remove_short(h, h->short_ref[0]->frame_num);
- if(pic) unreference_pic(h, pic);
+ if(pic) unreference_pic(h, pic, 0);
}
for(j = 0; j < 16; j++) {
pic= remove_long(h, j);
- if(pic) unreference_pic(h, pic);
+ if(pic) unreference_pic(h, pic, 0);
}
break;
default: assert(0);
}
}
- if(!current_is_long){
+ if (!current_ref_assigned && FIELD_PICTURE &&
+ !s->first_field && s->current_picture_ptr->reference) {
+
+ /* Second field of complementary field pair; the first field of
+ * which is already referenced. If short referenced, it
+ * should be first entry in short_ref. If not, it must exist
+ * in long_ref; trying to put it on the short list here is an
+ * error in the encoded bit stream (ref: 7.4.3, NOTE 2 and 3).
+ */
+ if (h->short_ref_count && h->short_ref[0] == s->current_picture_ptr) {
+ /* Just mark the second field valid */
+ s->current_picture_ptr->reference = PICT_FRAME;
+ } else if (s->current_picture_ptr->long_ref) {
+ av_log(h->s.avctx, AV_LOG_ERROR, "illegal short term reference "
+ "assignment for second field "
+ "in complementary field pair "
+ "(first field is long term)\n");
+ } else {
+ /*
+ * First field in reference, but not in any sensible place on our
+ * reference lists. This shouldn't happen unless reference
+ * handling somewhere else is wrong.
+ */
+ assert(0);
+ }
+ current_ref_assigned = 1;
+ }
+
+ if(!current_ref_assigned){
pic= remove_short(h, s->current_picture_ptr->frame_num);
if(pic){
- unreference_pic(h, pic);
+ unreference_pic(h, pic, 0);
av_log(h->s.avctx, AV_LOG_ERROR, "illegal short term buffer state detected\n");
}
@@ -4344,6 +3579,32 @@ static int execute_ref_pic_marking(H264Context *h, MMCO *mmco, int mmco_count){
h->short_ref[0]= s->current_picture_ptr;
h->short_ref[0]->long_ref=0;
h->short_ref_count++;
+ s->current_picture_ptr->reference |= s->picture_structure;
+ }
+
+ if (h->long_ref_count + h->short_ref_count > h->sps.ref_frame_count){
+
+ /* We have too many reference frames, probably due to corrupted
+ * stream. Need to discard one frame. Prevents overrun of the
+ * short_ref and long_ref buffers.
+ */
+ av_log(h->s.avctx, AV_LOG_ERROR,
+ "number of reference frames exceeds max (probably "
+ "corrupt input), discarding one\n");
+
+ if (h->long_ref_count) {
+ for (i = 0; i < 16; ++i)
+ if (h->long_ref[i])
+ break;
+
+ assert(i < 16);
+ pic = h->long_ref[i];
+ remove_long_at_index(h, i);
+ } else {
+ pic = h->short_ref[h->short_ref_count - 1];
+ remove_short_at_index(h, h->short_ref_count - 1);
+ }
+ unreference_pic(h, pic, 0);
}
print_short_term(h);
@@ -4351,39 +3612,39 @@ static int execute_ref_pic_marking(H264Context *h, MMCO *mmco, int mmco_count){
return 0;
}
-static int decode_ref_pic_marking(H264Context *h){
+static int decode_ref_pic_marking(H264Context *h, GetBitContext *gb){
MpegEncContext * const s = &h->s;
int i;
if(h->nal_unit_type == NAL_IDR_SLICE){ //FIXME fields
- s->broken_link= get_bits1(&s->gb) -1;
- h->mmco[0].long_index= get_bits1(&s->gb) - 1; // current_long_term_idx
- if(h->mmco[0].long_index == -1)
+ s->broken_link= get_bits1(gb) -1;
+ h->mmco[0].long_arg= get_bits1(gb) - 1; // current_long_term_idx
+ if(h->mmco[0].long_arg == -1)
h->mmco_index= 0;
else{
h->mmco[0].opcode= MMCO_LONG;
h->mmco_index= 1;
}
}else{
- if(get_bits1(&s->gb)){ // adaptive_ref_pic_marking_mode_flag
+ if(get_bits1(gb)){ // adaptive_ref_pic_marking_mode_flag
for(i= 0; i<MAX_MMCO_COUNT; i++) {
- MMCOOpcode opcode= get_ue_golomb(&s->gb);;
+ MMCOOpcode opcode= get_ue_golomb(gb);
h->mmco[i].opcode= opcode;
if(opcode==MMCO_SHORT2UNUSED || opcode==MMCO_SHORT2LONG){
- h->mmco[i].short_frame_num= (h->frame_num - get_ue_golomb(&s->gb) - 1) & ((1<<h->sps.log2_max_frame_num)-1); //FIXME fields
-/* if(h->mmco[i].short_frame_num >= h->short_ref_count || h->short_ref[ h->mmco[i].short_frame_num ] == NULL){
+ h->mmco[i].short_pic_num= (h->curr_pic_num - get_ue_golomb(gb) - 1) & (h->max_pic_num - 1);
+/* if(h->mmco[i].short_pic_num >= h->short_ref_count || h->short_ref[ h->mmco[i].short_pic_num ] == NULL){
av_log(s->avctx, AV_LOG_ERROR, "illegal short ref in memory management control operation %d\n", mmco);
return -1;
}*/
}
if(opcode==MMCO_SHORT2LONG || opcode==MMCO_LONG2UNUSED || opcode==MMCO_LONG || opcode==MMCO_SET_MAX_LONG){
- unsigned int long_index= get_ue_golomb(&s->gb);
- if(/*h->mmco[i].long_index >= h->long_ref_count || h->long_ref[ h->mmco[i].long_index ] == NULL*/ long_index >= 16){
+ unsigned int long_arg= get_ue_golomb(gb);
+ if(long_arg >= 32 || (long_arg >= 16 && !(opcode == MMCO_LONG2UNUSED && FIELD_PICTURE))){
av_log(h->s.avctx, AV_LOG_ERROR, "illegal long ref in memory management control operation %d\n", opcode);
return -1;
}
- h->mmco[i].long_index= long_index;
+ h->mmco[i].long_arg= long_arg;
}
if(opcode > (unsigned)MMCO_LONG){
@@ -4397,10 +3658,17 @@ static int decode_ref_pic_marking(H264Context *h){
}else{
assert(h->long_ref_count + h->short_ref_count <= h->sps.ref_frame_count);
- if(h->long_ref_count + h->short_ref_count == h->sps.ref_frame_count){ //FIXME fields
+ if(h->short_ref_count && h->long_ref_count + h->short_ref_count == h->sps.ref_frame_count &&
+ !(FIELD_PICTURE && !s->first_field && s->current_picture_ptr->reference)) {
h->mmco[0].opcode= MMCO_SHORT2UNUSED;
- h->mmco[0].short_frame_num= h->short_ref[ h->short_ref_count - 1 ]->frame_num;
+ h->mmco[0].short_pic_num= h->short_ref[ h->short_ref_count - 1 ]->frame_num;
h->mmco_index= 1;
+ if (FIELD_PICTURE) {
+ h->mmco[0].short_pic_num *= 2;
+ h->mmco[1].opcode= MMCO_SHORT2UNUSED;
+ h->mmco[1].short_pic_num= h->mmco[0].short_pic_num + 1;
+ h->mmco_index= 2;
+ }
}else
h->mmco_index= 0;
}
@@ -4488,37 +3756,135 @@ static int init_poc(H264Context *h){
field_poc[1]= poc;
}
- if(s->picture_structure != PICT_BOTTOM_FIELD)
+ if(s->picture_structure != PICT_BOTTOM_FIELD) {
s->current_picture_ptr->field_poc[0]= field_poc[0];
- if(s->picture_structure != PICT_TOP_FIELD)
+ s->current_picture_ptr->poc = field_poc[0];
+ }
+ if(s->picture_structure != PICT_TOP_FIELD) {
s->current_picture_ptr->field_poc[1]= field_poc[1];
- if(s->picture_structure == PICT_FRAME) // FIXME field pix?
- s->current_picture_ptr->poc= FFMIN(field_poc[0], field_poc[1]);
+ s->current_picture_ptr->poc = field_poc[1];
+ }
+ if(!FIELD_PICTURE || !s->first_field) {
+ Picture *cur = s->current_picture_ptr;
+ cur->poc= FFMIN(cur->field_poc[0], cur->field_poc[1]);
+ }
return 0;
}
+
+/**
+ * initialize scan tables
+ */
+static void init_scan_tables(H264Context *h){
+ MpegEncContext * const s = &h->s;
+ int i;
+ if(s->dsp.h264_idct_add == ff_h264_idct_add_c){ //FIXME little ugly
+ memcpy(h->zigzag_scan, zigzag_scan, 16*sizeof(uint8_t));
+ memcpy(h-> field_scan, field_scan, 16*sizeof(uint8_t));
+ }else{
+ for(i=0; i<16; i++){
+#define T(x) (x>>2) | ((x<<2) & 0xF)
+ h->zigzag_scan[i] = T(zigzag_scan[i]);
+ h-> field_scan[i] = T( field_scan[i]);
+#undef T
+ }
+ }
+ if(s->dsp.h264_idct8_add == ff_h264_idct8_add_c){
+ memcpy(h->zigzag_scan8x8, zigzag_scan8x8, 64*sizeof(uint8_t));
+ memcpy(h->zigzag_scan8x8_cavlc, zigzag_scan8x8_cavlc, 64*sizeof(uint8_t));
+ memcpy(h->field_scan8x8, field_scan8x8, 64*sizeof(uint8_t));
+ memcpy(h->field_scan8x8_cavlc, field_scan8x8_cavlc, 64*sizeof(uint8_t));
+ }else{
+ for(i=0; i<64; i++){
+#define T(x) (x>>3) | ((x&7)<<3)
+ h->zigzag_scan8x8[i] = T(zigzag_scan8x8[i]);
+ h->zigzag_scan8x8_cavlc[i] = T(zigzag_scan8x8_cavlc[i]);
+ h->field_scan8x8[i] = T(field_scan8x8[i]);
+ h->field_scan8x8_cavlc[i] = T(field_scan8x8_cavlc[i]);
+#undef T
+ }
+ }
+ if(h->sps.transform_bypass){ //FIXME same ugly
+ h->zigzag_scan_q0 = zigzag_scan;
+ h->zigzag_scan8x8_q0 = zigzag_scan8x8;
+ h->zigzag_scan8x8_cavlc_q0 = zigzag_scan8x8_cavlc;
+ h->field_scan_q0 = field_scan;
+ h->field_scan8x8_q0 = field_scan8x8;
+ h->field_scan8x8_cavlc_q0 = field_scan8x8_cavlc;
+ }else{
+ h->zigzag_scan_q0 = h->zigzag_scan;
+ h->zigzag_scan8x8_q0 = h->zigzag_scan8x8;
+ h->zigzag_scan8x8_cavlc_q0 = h->zigzag_scan8x8_cavlc;
+ h->field_scan_q0 = h->field_scan;
+ h->field_scan8x8_q0 = h->field_scan8x8;
+ h->field_scan8x8_cavlc_q0 = h->field_scan8x8_cavlc;
+ }
+}
+
+/**
+ * Replicates H264 "master" context to thread contexts.
+ */
+static void clone_slice(H264Context *dst, H264Context *src)
+{
+ memcpy(dst->block_offset, src->block_offset, sizeof(dst->block_offset));
+ dst->s.current_picture_ptr = src->s.current_picture_ptr;
+ dst->s.current_picture = src->s.current_picture;
+ dst->s.linesize = src->s.linesize;
+ dst->s.uvlinesize = src->s.uvlinesize;
+ dst->s.first_field = src->s.first_field;
+
+ dst->prev_poc_msb = src->prev_poc_msb;
+ dst->prev_poc_lsb = src->prev_poc_lsb;
+ dst->prev_frame_num_offset = src->prev_frame_num_offset;
+ dst->prev_frame_num = src->prev_frame_num;
+ dst->short_ref_count = src->short_ref_count;
+
+ memcpy(dst->short_ref, src->short_ref, sizeof(dst->short_ref));
+ memcpy(dst->long_ref, src->long_ref, sizeof(dst->long_ref));
+ memcpy(dst->default_ref_list, src->default_ref_list, sizeof(dst->default_ref_list));
+ memcpy(dst->ref_list, src->ref_list, sizeof(dst->ref_list));
+
+ memcpy(dst->dequant4_coeff, src->dequant4_coeff, sizeof(src->dequant4_coeff));
+ memcpy(dst->dequant8_coeff, src->dequant8_coeff, sizeof(src->dequant8_coeff));
+}
+
/**
* decodes a slice header.
* this will allso call MPV_common_init() and frame_start() as needed
+ *
+ * @param h h264context
+ * @param h0 h264 master context (differs from 'h' when doing sliced based parallel decoding)
+ *
+ * @return 0 if okay, <0 if an error occured, 1 if decoding must not be multithreaded
*/
-static int decode_slice_header(H264Context *h){
+static int decode_slice_header(H264Context *h, H264Context *h0){
MpegEncContext * const s = &h->s;
+ MpegEncContext * const s0 = &h0->s;
unsigned int first_mb_in_slice;
unsigned int pps_id;
int num_ref_idx_active_override_flag;
static const uint8_t slice_type_map[5]= {P_TYPE, B_TYPE, I_TYPE, SP_TYPE, SI_TYPE};
- unsigned int slice_type, tmp;
+ unsigned int slice_type, tmp, i;
int default_ref_list_done = 0;
+ int last_pic_structure;
- s->current_picture.reference= h->nal_ref_idc != 0;
s->dropable= h->nal_ref_idc == 0;
+ if((s->avctx->flags2 & CODEC_FLAG2_FAST) && !h->nal_ref_idc){
+ s->me.qpel_put= s->dsp.put_2tap_qpel_pixels_tab;
+ s->me.qpel_avg= s->dsp.avg_2tap_qpel_pixels_tab;
+ }else{
+ s->me.qpel_put= s->dsp.put_h264_qpel_pixels_tab;
+ s->me.qpel_avg= s->dsp.avg_h264_qpel_pixels_tab;
+ }
+
first_mb_in_slice= get_ue_golomb(&s->gb);
if((s->flags2 & CODEC_FLAG2_CHUNKS) && first_mb_in_slice == 0){
- h->slice_num = 0;
- s->current_picture_ptr= NULL;
+ h0->current_slice = 0;
+ if (!s0->first_field)
+ s->current_picture_ptr= NULL;
}
slice_type= get_ue_golomb(&s->gb);
@@ -4534,31 +3900,36 @@ static int decode_slice_header(H264Context *h){
slice_type= slice_type_map[ slice_type ];
if (slice_type == I_TYPE
- || (h->slice_num != 0 && slice_type == h->slice_type) ) {
+ || (h0->current_slice != 0 && slice_type == h0->last_slice_type) ) {
default_ref_list_done = 1;
}
h->slice_type= slice_type;
s->pict_type= h->slice_type; // to make a few old func happy, it's wrong though
+ if (s->pict_type == B_TYPE && s0->last_picture_ptr == NULL) {
+ av_log(h->s.avctx, AV_LOG_ERROR,
+ "B picture before any references, skipping\n");
+ return -1;
+ }
pps_id= get_ue_golomb(&s->gb);
if(pps_id>=MAX_PPS_COUNT){
av_log(h->s.avctx, AV_LOG_ERROR, "pps_id out of range\n");
return -1;
}
- h->pps= h->pps_buffer[pps_id];
- if(h->pps.slice_group_count == 0){
+ if(!h0->pps_buffers[pps_id]) {
av_log(h->s.avctx, AV_LOG_ERROR, "non existing PPS referenced\n");
return -1;
}
+ h->pps= *h0->pps_buffers[pps_id];
- h->sps= h->sps_buffer[ h->pps.sps_id ];
- if(h->sps.log2_max_frame_num == 0){
+ if(!h0->sps_buffers[h->pps.sps_id]) {
av_log(h->s.avctx, AV_LOG_ERROR, "non existing SPS referenced\n");
return -1;
}
+ h->sps = *h0->sps_buffers[h->pps.sps_id];
- if(h->dequant_coeff_pps != pps_id){
+ if(h == h0 && h->dequant_coeff_pps != pps_id){
h->dequant_coeff_pps = pps_id;
init_dequant_tables(h);
}
@@ -4577,58 +3948,35 @@ static int decode_slice_header(H264Context *h){
if (s->context_initialized
&& ( s->width != s->avctx->width || s->height != s->avctx->height)) {
+ if(h != h0)
+ return -1; // width / height changed during parallelized decoding
free_tables(h);
MPV_common_end(s);
}
if (!s->context_initialized) {
+ if(h != h0)
+ return -1; // we cant (re-)initialize context during parallel decoding
if (MPV_common_init(s) < 0)
return -1;
+ s->first_field = 0;
- if(s->dsp.h264_idct_add == ff_h264_idct_add_c){ //FIXME little ugly
- memcpy(h->zigzag_scan, zigzag_scan, 16*sizeof(uint8_t));
- memcpy(h-> field_scan, field_scan, 16*sizeof(uint8_t));
- }else{
- int i;
- for(i=0; i<16; i++){
-#define T(x) (x>>2) | ((x<<2) & 0xF)
- h->zigzag_scan[i] = T(zigzag_scan[i]);
- h-> field_scan[i] = T( field_scan[i]);
-#undef T
- }
- }
- if(s->dsp.h264_idct8_add == ff_h264_idct8_add_c){
- memcpy(h->zigzag_scan8x8, zigzag_scan8x8, 64*sizeof(uint8_t));
- memcpy(h->zigzag_scan8x8_cavlc, zigzag_scan8x8_cavlc, 64*sizeof(uint8_t));
- memcpy(h->field_scan8x8, field_scan8x8, 64*sizeof(uint8_t));
- memcpy(h->field_scan8x8_cavlc, field_scan8x8_cavlc, 64*sizeof(uint8_t));
- }else{
- int i;
- for(i=0; i<64; i++){
-#define T(x) (x>>3) | ((x&7)<<3)
- h->zigzag_scan8x8[i] = T(zigzag_scan8x8[i]);
- h->zigzag_scan8x8_cavlc[i] = T(zigzag_scan8x8_cavlc[i]);
- h->field_scan8x8[i] = T(field_scan8x8[i]);
- h->field_scan8x8_cavlc[i] = T(field_scan8x8_cavlc[i]);
-#undef T
- }
- }
- if(h->sps.transform_bypass){ //FIXME same ugly
- h->zigzag_scan_q0 = zigzag_scan;
- h->zigzag_scan8x8_q0 = zigzag_scan8x8;
- h->zigzag_scan8x8_cavlc_q0 = zigzag_scan8x8_cavlc;
- h->field_scan_q0 = field_scan;
- h->field_scan8x8_q0 = field_scan8x8;
- h->field_scan8x8_cavlc_q0 = field_scan8x8_cavlc;
- }else{
- h->zigzag_scan_q0 = h->zigzag_scan;
- h->zigzag_scan8x8_q0 = h->zigzag_scan8x8;
- h->zigzag_scan8x8_cavlc_q0 = h->zigzag_scan8x8_cavlc;
- h->field_scan_q0 = h->field_scan;
- h->field_scan8x8_q0 = h->field_scan8x8;
- h->field_scan8x8_cavlc_q0 = h->field_scan8x8_cavlc;
+ init_scan_tables(h);
+ alloc_tables(h);
+
+ for(i = 1; i < s->avctx->thread_count; i++) {
+ H264Context *c;
+ c = h->thread_context[i] = av_malloc(sizeof(H264Context));
+ memcpy(c, h->s.thread_context[i], sizeof(MpegEncContext));
+ memset(&c->s + 1, 0, sizeof(H264Context) - sizeof(MpegEncContext));
+ c->sps = h->sps;
+ c->pps = h->pps;
+ init_scan_tables(c);
+ clone_tables(c, h);
}
- alloc_tables(h);
+ for(i = 0; i < s->avctx->thread_count; i++)
+ if(context_init(h->thread_context[i]) < 0)
+ return -1;
s->avctx->width = s->width;
s->avctx->height = s->height;
@@ -4645,42 +3993,90 @@ static int decode_slice_header(H264Context *h){
}
}
- if(h->slice_num == 0){
- if(frame_start(h) < 0)
- return -1;
- }
-
- s->current_picture_ptr->frame_num= //FIXME frame_num cleanup
h->frame_num= get_bits(&s->gb, h->sps.log2_max_frame_num);
h->mb_mbaff = 0;
h->mb_aff_frame = 0;
+ last_pic_structure = s0->picture_structure;
if(h->sps.frame_mbs_only_flag){
s->picture_structure= PICT_FRAME;
}else{
if(get_bits1(&s->gb)) { //field_pic_flag
s->picture_structure= PICT_TOP_FIELD + get_bits1(&s->gb); //bottom_field_flag
- av_log(h->s.avctx, AV_LOG_ERROR, "PAFF interlacing is not implemented\n");
} else {
s->picture_structure= PICT_FRAME;
h->mb_aff_frame = h->sps.mb_aff;
}
}
+
+ if(h0->current_slice == 0){
+ /* See if we have a decoded first field looking for a pair... */
+ if (s0->first_field) {
+ assert(s0->current_picture_ptr);
+ assert(s0->current_picture_ptr->data[0]);
+ assert(s0->current_picture_ptr->reference != DELAYED_PIC_REF);
+
+ /* figure out if we have a complementary field pair */
+ if (!FIELD_PICTURE || s->picture_structure == last_pic_structure) {
+ /*
+ * Previous field is unmatched. Don't display it, but let it
+ * remain for reference if marked as such.
+ */
+ s0->current_picture_ptr = NULL;
+ s0->first_field = FIELD_PICTURE;
+
+ } else {
+ if (h->nal_ref_idc &&
+ s0->current_picture_ptr->reference &&
+ s0->current_picture_ptr->frame_num != h->frame_num) {
+ /*
+ * This and previous field were reference, but had
+ * different frame_nums. Consider this field first in
+ * pair. Throw away previous field except for reference
+ * purposes.
+ */
+ s0->first_field = 1;
+ s0->current_picture_ptr = NULL;
+
+ } else {
+ /* Second field in complementary pair */
+ s0->first_field = 0;
+ }
+ }
+
+ } else {
+ /* Frame or first field in a potentially complementary pair */
+ assert(!s0->current_picture_ptr);
+ s0->first_field = FIELD_PICTURE;
+ }
+
+ if((!FIELD_PICTURE || s0->first_field) && frame_start(h) < 0) {
+ s0->first_field = 0;
+ return -1;
+ }
+ }
+ if(h != h0)
+ clone_slice(h, h0);
+
+ s->current_picture_ptr->frame_num= h->frame_num; //FIXME frame_num cleanup
+
assert(s->mb_num == s->mb_width * s->mb_height);
- if(first_mb_in_slice << h->mb_aff_frame >= s->mb_num ||
+ if(first_mb_in_slice << FIELD_OR_MBAFF_PICTURE >= s->mb_num ||
first_mb_in_slice >= s->mb_num){
av_log(h->s.avctx, AV_LOG_ERROR, "first_mb_in_slice overflow\n");
return -1;
}
s->resync_mb_x = s->mb_x = first_mb_in_slice % s->mb_width;
- s->resync_mb_y = s->mb_y = (first_mb_in_slice / s->mb_width) << h->mb_aff_frame;
+ s->resync_mb_y = s->mb_y = (first_mb_in_slice / s->mb_width) << FIELD_OR_MBAFF_PICTURE;
+ if (s->picture_structure == PICT_BOTTOM_FIELD)
+ s->resync_mb_y = s->mb_y = s->mb_y + 1;
assert(s->mb_y < s->mb_height);
if(s->picture_structure==PICT_FRAME){
h->curr_pic_num= h->frame_num;
h->max_pic_num= 1<< h->sps.log2_max_frame_num;
}else{
- h->curr_pic_num= 2*h->frame_num;
+ h->curr_pic_num= 2*h->frame_num + 1;
h->max_pic_num= 1<<(h->sps.log2_max_frame_num + 1);
}
@@ -4716,8 +4112,6 @@ static int decode_slice_header(H264Context *h){
if(h->slice_type == P_TYPE || h->slice_type == SP_TYPE || h->slice_type == B_TYPE){
if(h->slice_type == B_TYPE){
h->direct_spatial_mv_pred= get_bits1(&s->gb);
- if(h->sps.mb_aff && h->direct_spatial_mv_pred)
- av_log(h->s.avctx, AV_LOG_ERROR, "MBAFF + spatial direct mode is not implemented\n");
}
num_ref_idx_active_override_flag= get_bits1(&s->gb);
@@ -4754,8 +4148,8 @@ static int decode_slice_header(H264Context *h){
else
h->use_weight = 0;
- if(s->current_picture.reference)
- decode_ref_pic_marking(h);
+ if(h->nal_ref_idc)
+ decode_ref_pic_marking(h0, &s->gb);
if(FRAME_MBAFF)
fill_mbaff_ref_list(h);
@@ -4776,7 +4170,8 @@ static int decode_slice_header(H264Context *h){
return -1;
}
s->qscale= tmp;
- h->chroma_qp = get_chroma_qp(h->pps.chroma_qp_index_offset, s->qscale);
+ h->chroma_qp[0] = get_chroma_qp(h, 0, s->qscale);
+ h->chroma_qp[1] = get_chroma_qp(h, 1, s->qscale);
//FIXME qscale / qp ... stuff
if(h->slice_type == SP_TYPE){
get_bits1(&s->gb); /* sp_for_switch_flag */
@@ -4803,21 +4198,39 @@ static int decode_slice_header(H264Context *h){
h->slice_beta_offset = get_se_golomb(&s->gb) << 1;
}
}
+
if( s->avctx->skip_loop_filter >= AVDISCARD_ALL
||(s->avctx->skip_loop_filter >= AVDISCARD_NONKEY && h->slice_type != I_TYPE)
||(s->avctx->skip_loop_filter >= AVDISCARD_BIDIR && h->slice_type == B_TYPE)
||(s->avctx->skip_loop_filter >= AVDISCARD_NONREF && h->nal_ref_idc == 0))
h->deblocking_filter= 0;
+ if(h->deblocking_filter == 1 && h0->max_contexts > 1) {
+ if(s->avctx->flags2 & CODEC_FLAG2_FAST) {
+ /* Cheat slightly for speed:
+ Do not bother to deblock across slices. */
+ h->deblocking_filter = 2;
+ } else {
+ h0->max_contexts = 1;
+ if(!h0->single_decode_warning) {
+ av_log(s->avctx, AV_LOG_INFO, "Cannot parallelize deblocking type 1, decoding such frames in sequential order\n");
+ h0->single_decode_warning = 1;
+ }
+ if(h != h0)
+ return 1; // deblocking switched inside frame
+ }
+ }
+
#if 0 //FMO
if( h->pps.num_slice_groups > 1 && h->pps.mb_slice_group_map_type >= 3 && h->pps.mb_slice_group_map_type <= 5)
slice_group_change_cycle= get_bits(&s->gb, ?);
#endif
- h->slice_num++;
+ h0->last_slice_type = slice_type;
+ h->slice_num = ++h0->current_slice;
h->emu_edge_width= (s->flags&CODEC_FLAG_EMU_EDGE) ? 0 : 16;
- h->emu_edge_height= FRAME_MBAFF ? 0 : h->emu_edge_width;
+ h->emu_edge_height= (FRAME_MBAFF || FIELD_PICTURE) ? 0 : h->emu_edge_width;
if(s->avctx->debug&FF_DEBUG_PICT_INFO){
av_log(h->s.avctx, AV_LOG_DEBUG, "slice:%d %s mb:%d %c pps:%u frame:%d poc:%d/%d ref:%d/%d qp:%d loop:%d:%d:%d weight:%d%s\n",
@@ -4835,14 +4248,6 @@ static int decode_slice_header(H264Context *h){
);
}
- if((s->avctx->flags2 & CODEC_FLAG2_FAST) && !s->current_picture.reference){
- s->me.qpel_put= s->dsp.put_2tap_qpel_pixels_tab;
- s->me.qpel_avg= s->dsp.avg_2tap_qpel_pixels_tab;
- }else{
- s->me.qpel_put= s->dsp.put_h264_qpel_pixels_tab;
- s->me.qpel_avg= s->dsp.avg_h264_qpel_pixels_tab;
- }
-
return 0;
}
@@ -5161,7 +4566,7 @@ decode_intra_mb:
if(IS_INTRA_PCM(mb_type)){
unsigned int x, y;
- // we assume these blocks are very rare so we dont optimize it
+ // We assume these blocks are very rare so we do not optimize it.
align_get_bits(&s->gb);
// The pixels are stored in the same order as levels in h->mb array.
@@ -5189,7 +4594,8 @@ decode_intra_mb:
// In deblocking, the quantizer is 0
s->current_picture.qscale_table[mb_xy]= 0;
- h->chroma_qp = get_chroma_qp(h->pps.chroma_qp_index_offset, 0);
+ h->chroma_qp[0] = get_chroma_qp(h, 0, 0);
+ h->chroma_qp[1] = get_chroma_qp(h, 1, 0);
// All coeffs are present
memset(h->non_zero_count[mb_xy], 16, 16);
@@ -5299,8 +4705,6 @@ decode_intra_mb:
dct8x8_allowed = get_dct8x8_allowed(h);
for(list=0; list<h->list_count; list++){
- const int ref_count= IS_REF0(mb_type) ? 1 : h->ref_count[list];
-
for(i=0; i<4; i++){
if(IS_DIRECT(h->sub_mb_type[i])) {
h->ref_cache[list][ scan8[4*i] ] = h->ref_cache[list][ scan8[4*i]+1 ];
@@ -5465,7 +4869,7 @@ decode_intra_mb:
if(cbp || IS_INTRA16x16(mb_type)){
int i8x8, i4x4, chroma_idx;
- int chroma_qp, dquant;
+ int dquant;
GetBitContext *gb= IS_INTRA(mb_type) ? h->intra_gb_ptr : h->inter_gb_ptr;
const uint8_t *scan, *scan8x8, *dc_scan;
@@ -5494,7 +4898,8 @@ decode_intra_mb:
else s->qscale-= 52;
}
- h->chroma_qp= chroma_qp= get_chroma_qp(h->pps.chroma_qp_index_offset, s->qscale);
+ h->chroma_qp[0]= get_chroma_qp(h, 0, s->qscale);
+ h->chroma_qp[1]= get_chroma_qp(h, 1, s->qscale);
if(IS_INTRA16x16(mb_type)){
if( decode_residual(h, h->intra_gb_ptr, h->mb, LUMA_DC_BLOCK_INDEX, dc_scan, h->dequant4_coeff[0][s->qscale], 16) < 0){
return -1; //FIXME continue if partitioned and other return -1 too
@@ -5552,9 +4957,10 @@ decode_intra_mb:
if(cbp&0x20){
for(chroma_idx=0; chroma_idx<2; chroma_idx++){
+ const uint32_t *qmul = h->dequant4_coeff[chroma_idx+1+(IS_INTRA( mb_type ) ? 0:3)][h->chroma_qp[chroma_idx]];
for(i4x4=0; i4x4<4; i4x4++){
const int index= 16 + 4*chroma_idx + i4x4;
- if( decode_residual(h, gb, h->mb + 16*index, index, scan + 1, h->dequant4_coeff[chroma_idx+1+(IS_INTRA( mb_type ) ? 0:3)][chroma_qp], 15) < 0){
+ if( decode_residual(h, gb, h->mb + 16*index, index, scan + 1, qmul, 15) < 0){
return -1;
}
}
@@ -5713,7 +5119,7 @@ static int decode_cabac_mb_skip( H264Context *h, int mb_x, int mb_y ) {
}else{
int mb_xy = mb_x + mb_y*s->mb_stride;
mba_xy = mb_xy - 1;
- mbb_xy = mb_xy - s->mb_stride;
+ mbb_xy = mb_xy - (s->mb_stride << FIELD_PICTURE);
}
if( h->slice_table[mba_xy] == h->slice_num && !IS_SKIP( s->current_picture.mb_type[mba_xy] ))
@@ -5766,65 +5172,20 @@ static int decode_cabac_mb_chroma_pre_mode( H264Context *h) {
return 3;
}
-static const uint8_t block_idx_x[16] = {
- 0, 1, 0, 1, 2, 3, 2, 3, 0, 1, 0, 1, 2, 3, 2, 3
-};
-static const uint8_t block_idx_y[16] = {
- 0, 0, 1, 1, 0, 0, 1, 1, 2, 2, 3, 3, 2, 2, 3, 3
-};
-static const uint8_t block_idx_xy[4][4] = {
- { 0, 2, 8, 10},
- { 1, 3, 9, 11},
- { 4, 6, 12, 14},
- { 5, 7, 13, 15}
-};
-
static int decode_cabac_mb_cbp_luma( H264Context *h) {
- int cbp = 0;
- int cbp_b = -1;
- int i8x8;
-
- if( h->slice_table[h->top_mb_xy] == h->slice_num ) {
- cbp_b = h->top_cbp;
- tprintf(h->s.avctx, "cbp_b = top_cbp = %x\n", cbp_b);
- }
-
- for( i8x8 = 0; i8x8 < 4; i8x8++ ) {
- int cbp_a = -1;
- int x, y;
- int ctx = 0;
-
- x = block_idx_x[4*i8x8];
- y = block_idx_y[4*i8x8];
-
- if( x > 0 )
- cbp_a = cbp;
- else if( h->slice_table[h->left_mb_xy[0]] == h->slice_num ) {
- cbp_a = h->left_cbp;
- tprintf(h->s.avctx, "cbp_a = left_cbp = %x\n", cbp_a);
- }
-
- if( y > 0 )
- cbp_b = cbp;
-
- /* No need to test for skip as we put 0 for skip block */
- /* No need to test for IPCM as we put 1 for IPCM block */
- if( cbp_a >= 0 ) {
- int i8x8a = block_idx_xy[(x-1)&0x03][y]/4;
- if( ((cbp_a >> i8x8a)&0x01) == 0 )
- ctx++;
- }
-
- if( cbp_b >= 0 ) {
- int i8x8b = block_idx_xy[x][(y-1)&0x03]/4;
- if( ((cbp_b >> i8x8b)&0x01) == 0 )
- ctx += 2;
- }
-
- if( get_cabac( &h->cabac, &h->cabac_state[73 + ctx] ) ) {
- cbp |= 1 << i8x8;
- }
- }
+ int cbp_b, cbp_a, ctx, cbp = 0;
+
+ cbp_a = h->slice_table[h->left_mb_xy[0]] == h->slice_num ? h->left_cbp : -1;
+ cbp_b = h->slice_table[h->top_mb_xy] == h->slice_num ? h->top_cbp : -1;
+
+ ctx = !(cbp_a & 0x02) + 2 * !(cbp_b & 0x04);
+ cbp |= get_cabac_noinline(&h->cabac, &h->cabac_state[73 + ctx]);
+ ctx = !(cbp & 0x01) + 2 * !(cbp_b & 0x08);
+ cbp |= get_cabac_noinline(&h->cabac, &h->cabac_state[73 + ctx]) << 1;
+ ctx = !(cbp_a & 0x08) + 2 * !(cbp & 0x01);
+ cbp |= get_cabac_noinline(&h->cabac, &h->cabac_state[73 + ctx]) << 2;
+ ctx = !(cbp & 0x04) + 2 * !(cbp & 0x02);
+ cbp |= get_cabac_noinline(&h->cabac, &h->cabac_state[73 + ctx]) << 3;
return cbp;
}
static int decode_cabac_mb_cbp_chroma( H264Context *h) {
@@ -5846,16 +5207,9 @@ static int decode_cabac_mb_cbp_chroma( H264Context *h) {
return 1 + get_cabac_noinline( &h->cabac, &h->cabac_state[77 + ctx] );
}
static int decode_cabac_mb_dqp( H264Context *h) {
- MpegEncContext * const s = &h->s;
- int mbn_xy;
int ctx = 0;
int val = 0;
- if( s->mb_x > 0 )
- mbn_xy = s->mb_x + s->mb_y*s->mb_stride - 1;
- else
- mbn_xy = s->mb_width - 1 + (s->mb_y-1)*s->mb_stride;
-
if( h->last_qscale_diff != 0 )
ctx++;
@@ -5978,7 +5332,7 @@ static int decode_cabac_mb_mvd( H264Context *h, int list, int n, int l ) {
return get_cabac_bypass_sign( &h->cabac, -mvd );
}
-static int inline get_cabac_cbf_ctx( H264Context *h, int cat, int idx ) {
+static inline int get_cabac_cbf_ctx( H264Context *h, int cat, int idx ) {
int nza, nzb;
int ctx = 0;
@@ -6006,14 +5360,14 @@ static int inline get_cabac_cbf_ctx( H264Context *h, int cat, int idx ) {
return ctx + 4 * cat;
}
-static const __attribute((used)) uint8_t last_coeff_flag_offset_8x8[63] = {
+DECLARE_ASM_CONST(1, const uint8_t, last_coeff_flag_offset_8x8[63]) = {
0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
3, 3, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4,
5, 5, 5, 5, 6, 6, 6, 6, 7, 7, 7, 7, 8, 8, 8
};
-static int decode_cabac_residual( H264Context *h, DCTELEM *block, int cat, int n, const uint8_t *scantable, const uint32_t *qmul, int max_coeff) {
+static void decode_cabac_residual( H264Context *h, DCTELEM *block, int cat, int n, const uint8_t *scantable, const uint32_t *qmul, int max_coeff) {
const int mb_xy = h->s.mb_x + h->s.mb_y*h->s.mb_stride;
static const int significant_coeff_flag_offset[2][6] = {
{ 105+0, 105+15, 105+29, 105+44, 105+47, 402 },
@@ -6039,7 +5393,7 @@ static int decode_cabac_residual( H264Context *h, DCTELEM *block, int cat, int n
int index[64];
- int last;
+ int av_unused last;
int coeff_count = 0;
int abslevel1 = 1;
@@ -6083,7 +5437,7 @@ static int decode_cabac_residual( H264Context *h, DCTELEM *block, int cat, int n
h->cabac.low = cc.low ;
h->cabac.bytestream= cc.bytestream;
#endif
- return 0;
+ return;
}
}
@@ -6111,7 +5465,7 @@ static int decode_cabac_residual( H264Context *h, DCTELEM *block, int cat, int n
index[coeff_count++] = last;\
}
const uint8_t *sig_off = significant_coeff_flag_offset_8x8[MB_FIELD];
-#if defined(ARCH_X86) && defined(CONFIG_7REGS) && defined(CONFIG_EBX_AVAILABLE) && !( defined(ARCH_X86_64) && defined(PIC) )
+#if defined(ARCH_X86) && defined(HAVE_7REGS) && defined(HAVE_EBX_AVAILABLE) && !defined(BROKEN_RELOCATIONS)
coeff_count= decode_significance_8x8_x86(CC, significant_coeff_ctx_base, index, sig_off);
} else {
coeff_count= decode_significance_x86(CC, max_coeff, significant_coeff_ctx_base, index);
@@ -6144,7 +5498,7 @@ static int decode_cabac_residual( H264Context *h, DCTELEM *block, int cat, int n
if( !qmul ) {
block[j] = get_cabac_bypass_sign( CC, -1);
}else{
- block[j] = (get_cabac_bypass_sign( CC, -qmul[j]) + 32) >> 6;;
+ block[j] = (get_cabac_bypass_sign( CC, -qmul[j]) + 32) >> 6;
}
abslevel1++;
@@ -6184,10 +5538,10 @@ static int decode_cabac_residual( H264Context *h, DCTELEM *block, int cat, int n
h->cabac.low = cc.low ;
h->cabac.bytestream= cc.bytestream;
#endif
- return 0;
+
}
-static void inline compute_mb_neighbors(H264Context *h)
+static inline void compute_mb_neighbors(H264Context *h)
{
MpegEncContext * const s = &h->s;
const int mb_xy = s->mb_x + s->mb_y*s->mb_stride;
@@ -6209,6 +5563,8 @@ static void inline compute_mb_neighbors(H264Context *h)
if (left_mb_frame_flag != curr_mb_frame_flag) {
h->left_mb_xy[0] = pair_xy - 1;
}
+ } else if (FIELD_PICTURE) {
+ h->top_mb_xy -= s->mb_stride;
}
return;
}
@@ -6304,7 +5660,7 @@ decode_intra_mb:
const uint8_t *ptr;
unsigned int x, y;
- // We assume these blocks are very rare so we dont optimize it.
+ // We assume these blocks are very rare so we do not optimize it.
// FIXME The two following lines get the bitstream position in the cabac
// decode, I think it should be done by a function in cabac.h (or cabac.c).
ptr= h->cabac.bytestream;
@@ -6343,7 +5699,8 @@ decode_intra_mb:
h->chroma_pred_mode_table[mb_xy] = 0;
// In deblocking, the quantizer is 0
s->current_picture.qscale_table[mb_xy]= 0;
- h->chroma_qp = get_chroma_qp(h->pps.chroma_qp_index_offset, 0);
+ h->chroma_qp[0] = get_chroma_qp(h, 0, 0);
+ h->chroma_qp[1] = get_chroma_qp(h, 1, 0);
// All coeffs are present
memset(h->non_zero_count[mb_xy], 16, 16);
s->current_picture.mb_type[mb_xy]= mb_type;
@@ -6399,6 +5756,10 @@ decode_intra_mb:
if( IS_DIRECT(h->sub_mb_type[0] | h->sub_mb_type[1] |
h->sub_mb_type[2] | h->sub_mb_type[3]) ) {
pred_direct_motion(h, &mb_type);
+ h->ref_cache[0][scan8[4]] =
+ h->ref_cache[1][scan8[4]] =
+ h->ref_cache[0][scan8[12]] =
+ h->ref_cache[1][scan8[12]] = PART_NOT_AVAILABLE;
if( h->ref_count[0] > 1 || h->ref_count[1] > 1 ) {
for( i = 0; i < 4; i++ )
if( IS_DIRECT(h->sub_mb_type[i]) )
@@ -6434,11 +5795,11 @@ decode_intra_mb:
for(list=0; list<h->list_count; list++){
for(i=0; i<4; i++){
+ h->ref_cache[list][ scan8[4*i] ]=h->ref_cache[list][ scan8[4*i]+1 ];
if(IS_DIRECT(h->sub_mb_type[i])){
fill_rectangle(h->mvd_cache[list][scan8[4*i]], 2, 2, 8, 0, 4);
continue;
}
- h->ref_cache[list][ scan8[4*i] ]=h->ref_cache[list][ scan8[4*i]+1 ];
if(IS_DIR(h->sub_mb_type[i], 0, list) && !IS_DIRECT(h->sub_mb_type[i])){
const int sub_mb_type= h->sub_mb_type[i];
@@ -6597,6 +5958,7 @@ decode_intra_mb:
if( cbp || IS_INTRA16x16( mb_type ) ) {
const uint8_t *scan, *scan8x8, *dc_scan;
+ const uint32_t *qmul;
int dqp;
if(IS_INTERLACED(mb_type)){
@@ -6619,18 +5981,19 @@ decode_intra_mb:
if(s->qscale<0) s->qscale+= 52;
else s->qscale-= 52;
}
- h->chroma_qp = get_chroma_qp(h->pps.chroma_qp_index_offset, s->qscale);
+ h->chroma_qp[0] = get_chroma_qp(h, 0, s->qscale);
+ h->chroma_qp[1] = get_chroma_qp(h, 1, s->qscale);
if( IS_INTRA16x16( mb_type ) ) {
int i;
//av_log( s->avctx, AV_LOG_ERROR, "INTRA16x16 DC\n" );
- if( decode_cabac_residual( h, h->mb, 0, 0, dc_scan, NULL, 16) < 0)
- return -1;
+ decode_cabac_residual( h, h->mb, 0, 0, dc_scan, NULL, 16);
+
if( cbp&15 ) {
+ qmul = h->dequant4_coeff[0][s->qscale];
for( i = 0; i < 16; i++ ) {
//av_log( s->avctx, AV_LOG_ERROR, "INTRA16x16 AC:%d\n", i );
- if( decode_cabac_residual(h, h->mb + 16*i, 1, i, scan + 1, h->dequant4_coeff[0][s->qscale], 15) < 0 )
- return -1;
+ decode_cabac_residual(h, h->mb + 16*i, 1, i, scan + 1, qmul, 15);
}
} else {
fill_rectangle(&h->non_zero_count_cache[scan8[0]], 4, 4, 8, 0, 1);
@@ -6640,17 +6003,17 @@ decode_intra_mb:
for( i8x8 = 0; i8x8 < 4; i8x8++ ) {
if( cbp & (1<<i8x8) ) {
if( IS_8x8DCT(mb_type) ) {
- if( decode_cabac_residual(h, h->mb + 64*i8x8, 5, 4*i8x8,
- scan8x8, h->dequant8_coeff[IS_INTRA( mb_type ) ? 0:1][s->qscale], 64) < 0 )
- return -1;
- } else
- for( i4x4 = 0; i4x4 < 4; i4x4++ ) {
- const int index = 4*i8x8 + i4x4;
- //av_log( s->avctx, AV_LOG_ERROR, "Luma4x4: %d\n", index );
+ decode_cabac_residual(h, h->mb + 64*i8x8, 5, 4*i8x8,
+ scan8x8, h->dequant8_coeff[IS_INTRA( mb_type ) ? 0:1][s->qscale], 64);
+ } else {
+ qmul = h->dequant4_coeff[IS_INTRA( mb_type ) ? 0:3][s->qscale];
+ for( i4x4 = 0; i4x4 < 4; i4x4++ ) {
+ const int index = 4*i8x8 + i4x4;
+ //av_log( s->avctx, AV_LOG_ERROR, "Luma4x4: %d\n", index );
//START_TIMER
- if( decode_cabac_residual(h, h->mb + 16*index, 2, index, scan, h->dequant4_coeff[IS_INTRA( mb_type ) ? 0:3][s->qscale], 16) < 0 )
- return -1;
+ decode_cabac_residual(h, h->mb + 16*index, 2, index, scan, qmul, 16);
//STOP_TIMER("decode_residual")
+ }
}
} else {
uint8_t * const nnz= &h->non_zero_count_cache[ scan8[4*i8x8] ];
@@ -6663,19 +6026,18 @@ decode_intra_mb:
int c;
for( c = 0; c < 2; c++ ) {
//av_log( s->avctx, AV_LOG_ERROR, "INTRA C%d-DC\n",c );
- if( decode_cabac_residual(h, h->mb + 256 + 16*4*c, 3, c, chroma_dc_scan, NULL, 4) < 0)
- return -1;
+ decode_cabac_residual(h, h->mb + 256 + 16*4*c, 3, c, chroma_dc_scan, NULL, 4);
}
}
if( cbp&0x20 ) {
int c, i;
for( c = 0; c < 2; c++ ) {
+ qmul = h->dequant4_coeff[c+1+(IS_INTRA( mb_type ) ? 0:3)][h->chroma_qp[c]];
for( i = 0; i < 4; i++ ) {
const int index = 16 + 4 * c + i;
//av_log( s->avctx, AV_LOG_ERROR, "INTRA C%d-AC %d\n",c, index - 16 );
- if( decode_cabac_residual(h, h->mb + 16*index, 4, index - 16, scan + 1, h->dequant4_coeff[c+1+(IS_INTRA( mb_type ) ? 0:3)][h->chroma_qp], 15) < 0)
- return -1;
+ decode_cabac_residual(h, h->mb + 16*index, 4, index - 16, scan + 1, qmul, 15);
}
}
} else {
@@ -7009,23 +6371,27 @@ static void filter_mb_edgech( H264Context *h, uint8_t *pix, int stride, int16_t
static void filter_mb_fast( H264Context *h, int mb_x, int mb_y, uint8_t *img_y, uint8_t *img_cb, uint8_t *img_cr, unsigned int linesize, unsigned int uvlinesize) {
MpegEncContext * const s = &h->s;
+ int mb_y_firstrow = s->picture_structure == PICT_BOTTOM_FIELD;
int mb_xy, mb_type;
int qp, qp0, qp1, qpc, qpc0, qpc1, qp_thresh;
- if(mb_x==0 || mb_y==0 || !s->dsp.h264_loop_filter_strength) {
+ mb_xy = mb_x + mb_y*s->mb_stride;
+
+ if(mb_x==0 || mb_y==mb_y_firstrow || !s->dsp.h264_loop_filter_strength || h->pps.chroma_qp_diff ||
+ (h->deblocking_filter == 2 && (h->slice_table[mb_xy] != h->slice_table[h->top_mb_xy] ||
+ h->slice_table[mb_xy] != h->slice_table[mb_xy - 1]))) {
filter_mb(h, mb_x, mb_y, img_y, img_cb, img_cr, linesize, uvlinesize);
return;
}
assert(!FRAME_MBAFF);
- mb_xy = mb_x + mb_y*s->mb_stride;
mb_type = s->current_picture.mb_type[mb_xy];
qp = s->current_picture.qscale_table[mb_xy];
qp0 = s->current_picture.qscale_table[mb_xy-1];
qp1 = s->current_picture.qscale_table[h->top_mb_xy];
- qpc = get_chroma_qp( h->pps.chroma_qp_index_offset, qp );
- qpc0 = get_chroma_qp( h->pps.chroma_qp_index_offset, qp0 );
- qpc1 = get_chroma_qp( h->pps.chroma_qp_index_offset, qp1 );
+ qpc = get_chroma_qp( h, 0, qp );
+ qpc0 = get_chroma_qp( h, 0, qp0 );
+ qpc1 = get_chroma_qp( h, 0, qp1 );
qp0 = (qp + qp0 + 1) >> 1;
qp1 = (qp + qp1 + 1) >> 1;
qpc0 = (qpc + qpc0 + 1) >> 1;
@@ -7038,17 +6404,18 @@ static void filter_mb_fast( H264Context *h, int mb_x, int mb_y, uint8_t *img_y,
if( IS_INTRA(mb_type) ) {
int16_t bS4[4] = {4,4,4,4};
int16_t bS3[4] = {3,3,3,3};
+ int16_t *bSH = FIELD_PICTURE ? bS3 : bS4;
if( IS_8x8DCT(mb_type) ) {
filter_mb_edgev( h, &img_y[4*0], linesize, bS4, qp0 );
filter_mb_edgev( h, &img_y[4*2], linesize, bS3, qp );
- filter_mb_edgeh( h, &img_y[4*0*linesize], linesize, bS4, qp1 );
+ filter_mb_edgeh( h, &img_y[4*0*linesize], linesize, bSH, qp1 );
filter_mb_edgeh( h, &img_y[4*2*linesize], linesize, bS3, qp );
} else {
filter_mb_edgev( h, &img_y[4*0], linesize, bS4, qp0 );
filter_mb_edgev( h, &img_y[4*1], linesize, bS3, qp );
filter_mb_edgev( h, &img_y[4*2], linesize, bS3, qp );
filter_mb_edgev( h, &img_y[4*3], linesize, bS3, qp );
- filter_mb_edgeh( h, &img_y[4*0*linesize], linesize, bS4, qp1 );
+ filter_mb_edgeh( h, &img_y[4*0*linesize], linesize, bSH, qp1 );
filter_mb_edgeh( h, &img_y[4*1*linesize], linesize, bS3, qp );
filter_mb_edgeh( h, &img_y[4*2*linesize], linesize, bS3, qp );
filter_mb_edgeh( h, &img_y[4*3*linesize], linesize, bS3, qp );
@@ -7057,9 +6424,9 @@ static void filter_mb_fast( H264Context *h, int mb_x, int mb_y, uint8_t *img_y,
filter_mb_edgecv( h, &img_cb[2*2], uvlinesize, bS3, qpc );
filter_mb_edgecv( h, &img_cr[2*0], uvlinesize, bS4, qpc0 );
filter_mb_edgecv( h, &img_cr[2*2], uvlinesize, bS3, qpc );
- filter_mb_edgech( h, &img_cb[2*0*uvlinesize], uvlinesize, bS4, qpc1 );
+ filter_mb_edgech( h, &img_cb[2*0*uvlinesize], uvlinesize, bSH, qpc1 );
filter_mb_edgech( h, &img_cb[2*2*uvlinesize], uvlinesize, bS3, qpc );
- filter_mb_edgech( h, &img_cr[2*0*uvlinesize], uvlinesize, bS4, qpc1 );
+ filter_mb_edgech( h, &img_cr[2*0*uvlinesize], uvlinesize, bSH, qpc1 );
filter_mb_edgech( h, &img_cr[2*2*uvlinesize], uvlinesize, bS3, qpc );
return;
} else {
@@ -7083,7 +6450,7 @@ static void filter_mb_fast( H264Context *h, int mb_x, int mb_y, uint8_t *img_y,
if( IS_INTRA(s->current_picture.mb_type[mb_xy-1]) )
bSv[0][0] = 0x0004000400040004ULL;
if( IS_INTRA(s->current_picture.mb_type[h->top_mb_xy]) )
- bSv[1][0] = 0x0004000400040004ULL;
+ bSv[1][0] = FIELD_PICTURE ? 0x0003000300030003ULL : 0x0004000400040004ULL;
#define FILTER(hv,dir,edge)\
if(bSv[dir][edge]) {\
@@ -7131,7 +6498,7 @@ static void filter_mb( H264Context *h, int mb_x, int mb_y, uint8_t *img_y, uint8
//for sufficiently low qp, filtering wouldn't do anything
//this is a conservative estimate: could also check beta_offset and more accurate chroma_qp
if(!FRAME_MBAFF){
- int qp_thresh = 15 - h->slice_alpha_c0_offset - FFMAX(0, h->pps.chroma_qp_index_offset);
+ int qp_thresh = 15 - h->slice_alpha_c0_offset - FFMAX(0, FFMAX(h->pps.chroma_qp_index_offset[0], h->pps.chroma_qp_index_offset[1]));
int qp = s->current_picture.qscale_table[mb_xy];
if(qp <= qp_thresh
&& (mb_x == 0 || ((qp + s->current_picture.qscale_table[mb_xy-1] + 1)>>1) <= qp_thresh)
@@ -7154,7 +6521,8 @@ static void filter_mb( H264Context *h, int mb_x, int mb_y, uint8_t *img_y, uint8
const int left_mb_xy[2] = { pair_xy-1, pair_xy-1+s->mb_stride };
int16_t bS[8];
int qp[2];
- int chroma_qp[2];
+ int bqp[2];
+ int rqp[2];
int mb_qp, mbn0_qp, mbn1_qp;
int i;
first_vertical_edge_done = 1;
@@ -7180,18 +6548,22 @@ static void filter_mb( H264Context *h, int mb_x, int mb_y, uint8_t *img_y, uint8
mbn0_qp = s->current_picture.qscale_table[left_mb_xy[0]];
mbn1_qp = s->current_picture.qscale_table[left_mb_xy[1]];
qp[0] = ( mb_qp + mbn0_qp + 1 ) >> 1;
- chroma_qp[0] = ( get_chroma_qp( h->pps.chroma_qp_index_offset, mb_qp ) +
- get_chroma_qp( h->pps.chroma_qp_index_offset, mbn0_qp ) + 1 ) >> 1;
+ bqp[0] = ( get_chroma_qp( h, 0, mb_qp ) +
+ get_chroma_qp( h, 0, mbn0_qp ) + 1 ) >> 1;
+ rqp[0] = ( get_chroma_qp( h, 1, mb_qp ) +
+ get_chroma_qp( h, 1, mbn0_qp ) + 1 ) >> 1;
qp[1] = ( mb_qp + mbn1_qp + 1 ) >> 1;
- chroma_qp[1] = ( get_chroma_qp( h->pps.chroma_qp_index_offset, mb_qp ) +
- get_chroma_qp( h->pps.chroma_qp_index_offset, mbn1_qp ) + 1 ) >> 1;
+ bqp[1] = ( get_chroma_qp( h, 0, mb_qp ) +
+ get_chroma_qp( h, 0, mbn1_qp ) + 1 ) >> 1;
+ rqp[1] = ( get_chroma_qp( h, 1, mb_qp ) +
+ get_chroma_qp( h, 1, mbn1_qp ) + 1 ) >> 1;
/* Filter edge */
- tprintf(s->avctx, "filter mb:%d/%d MBAFF, QPy:%d/%d, QPc:%d/%d ls:%d uvls:%d", mb_x, mb_y, qp[0], qp[1], chroma_qp[0], chroma_qp[1], linesize, uvlinesize);
+ tprintf(s->avctx, "filter mb:%d/%d MBAFF, QPy:%d/%d, QPb:%d/%d QPr:%d/%d ls:%d uvls:%d", mb_x, mb_y, qp[0], qp[1], bqp[0], bqp[1], rqp[0], rqp[1], linesize, uvlinesize);
{ int i; for (i = 0; i < 8; i++) tprintf(s->avctx, " bS[%d]:%d", i, bS[i]); tprintf(s->avctx, "\n"); }
filter_mb_mbaff_edgev ( h, &img_y [0], linesize, bS, qp );
- filter_mb_mbaff_edgecv( h, &img_cb[0], uvlinesize, bS, chroma_qp );
- filter_mb_mbaff_edgecv( h, &img_cr[0], uvlinesize, bS, chroma_qp );
+ filter_mb_mbaff_edgecv( h, &img_cb[0], uvlinesize, bS, bqp );
+ filter_mb_mbaff_edgecv( h, &img_cr[0], uvlinesize, bS, rqp );
}
/* dir : 0 -> vertical edge, 1 -> horizontal edge */
for( dir = 0; dir < 2; dir++ )
@@ -7229,7 +6601,7 @@ static void filter_mb( H264Context *h, int mb_x, int mb_y, uint8_t *img_y, uint8
unsigned int tmp_linesize = 2 * linesize;
unsigned int tmp_uvlinesize = 2 * uvlinesize;
int mbn_xy = mb_xy - 2 * s->mb_stride;
- int qp, chroma_qp;
+ int qp;
int i, j;
int16_t bS[4];
@@ -7253,10 +6625,10 @@ static void filter_mb( H264Context *h, int mb_x, int mb_y, uint8_t *img_y, uint8
tprintf(s->avctx, "filter mb:%d/%d dir:%d edge:%d, QPy:%d ls:%d uvls:%d", mb_x, mb_y, dir, edge, qp, tmp_linesize, tmp_uvlinesize);
{ int i; for (i = 0; i < 4; i++) tprintf(s->avctx, " bS[%d]:%d", i, bS[i]); tprintf(s->avctx, "\n"); }
filter_mb_edgeh( h, &img_y[j*linesize], tmp_linesize, bS, qp );
- chroma_qp = ( h->chroma_qp +
- get_chroma_qp( h->pps.chroma_qp_index_offset, s->current_picture.qscale_table[mbn_xy] ) + 1 ) >> 1;
- filter_mb_edgech( h, &img_cb[j*uvlinesize], tmp_uvlinesize, bS, chroma_qp );
- filter_mb_edgech( h, &img_cr[j*uvlinesize], tmp_uvlinesize, bS, chroma_qp );
+ filter_mb_edgech( h, &img_cb[j*uvlinesize], tmp_uvlinesize, bS,
+ ( h->chroma_qp[0] + get_chroma_qp( h, 0, s->current_picture.qscale_table[mbn_xy] ) + 1 ) >> 1);
+ filter_mb_edgech( h, &img_cr[j*uvlinesize], tmp_uvlinesize, bS,
+ ( h->chroma_qp[1] + get_chroma_qp( h, 1, s->current_picture.qscale_table[mbn_xy] ) + 1 ) >> 1);
}
start = 1;
@@ -7353,25 +6725,25 @@ static void filter_mb( H264Context *h, int mb_x, int mb_y, uint8_t *img_y, uint8
if( dir == 0 ) {
filter_mb_edgev( h, &img_y[4*edge], linesize, bS, qp );
if( (edge&1) == 0 ) {
- int chroma_qp = ( h->chroma_qp +
- get_chroma_qp( h->pps.chroma_qp_index_offset, s->current_picture.qscale_table[mbn_xy] ) + 1 ) >> 1;
- filter_mb_edgecv( h, &img_cb[2*edge], uvlinesize, bS, chroma_qp );
- filter_mb_edgecv( h, &img_cr[2*edge], uvlinesize, bS, chroma_qp );
+ filter_mb_edgecv( h, &img_cb[2*edge], uvlinesize, bS,
+ ( h->chroma_qp[0] + get_chroma_qp( h, 0, s->current_picture.qscale_table[mbn_xy] ) + 1 ) >> 1);
+ filter_mb_edgecv( h, &img_cr[2*edge], uvlinesize, bS,
+ ( h->chroma_qp[1] + get_chroma_qp( h, 1, s->current_picture.qscale_table[mbn_xy] ) + 1 ) >> 1);
}
} else {
filter_mb_edgeh( h, &img_y[4*edge*linesize], linesize, bS, qp );
if( (edge&1) == 0 ) {
- int chroma_qp = ( h->chroma_qp +
- get_chroma_qp( h->pps.chroma_qp_index_offset, s->current_picture.qscale_table[mbn_xy] ) + 1 ) >> 1;
- filter_mb_edgech( h, &img_cb[2*edge*uvlinesize], uvlinesize, bS, chroma_qp );
- filter_mb_edgech( h, &img_cr[2*edge*uvlinesize], uvlinesize, bS, chroma_qp );
+ filter_mb_edgech( h, &img_cb[2*edge*uvlinesize], uvlinesize, bS,
+ ( h->chroma_qp[0] + get_chroma_qp( h, 0, s->current_picture.qscale_table[mbn_xy] ) + 1 ) >> 1);
+ filter_mb_edgech( h, &img_cr[2*edge*uvlinesize], uvlinesize, bS,
+ ( h->chroma_qp[1] + get_chroma_qp( h, 1, s->current_picture.qscale_table[mbn_xy] ) + 1 ) >> 1);
}
}
}
}
}
-static int decode_slice(H264Context *h){
+static int decode_slice(struct AVCodecContext *avctx, H264Context *h){
MpegEncContext * const s = &h->s;
const int part_mask= s->partitioned_frame ? (AC_END|AC_ERROR) : 0x7F;
@@ -7421,7 +6793,7 @@ static int decode_slice(H264Context *h){
eos = get_cabac_terminate( &h->cabac );
if( ret < 0 || h->cabac.bytestream > h->cabac.bytestream_end + 2) {
- av_log(h->s.avctx, AV_LOG_ERROR, "error while decoding MB %d %d, bytestream (%d)\n", s->mb_x, s->mb_y, h->cabac.bytestream_end - h->cabac.bytestream);
+ av_log(h->s.avctx, AV_LOG_ERROR, "error while decoding MB %d %d, bytestream (%td)\n", s->mb_x, s->mb_y, h->cabac.bytestream_end - h->cabac.bytestream);
ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x, s->mb_y, (AC_ERROR|DC_ERROR|MV_ERROR)&part_mask);
return -1;
}
@@ -7430,7 +6802,7 @@ static int decode_slice(H264Context *h){
s->mb_x = 0;
ff_draw_horiz_band(s, 16*s->mb_y, 16);
++s->mb_y;
- if(FRAME_MBAFF) {
+ if(FIELD_OR_MBAFF_PICTURE) {
++s->mb_y;
}
}
@@ -7467,7 +6839,7 @@ static int decode_slice(H264Context *h){
s->mb_x=0;
ff_draw_horiz_band(s, 16*s->mb_y, 16);
++s->mb_y;
- if(FRAME_MBAFF) {
+ if(FIELD_OR_MBAFF_PICTURE) {
++s->mb_y;
}
if(s->mb_y >= s->mb_height){
@@ -7636,7 +7008,7 @@ static inline int decode_vui_parameters(H264Context *h, SPS *sps){
if( aspect_ratio_idc == EXTENDED_SAR ) {
sps->sar.num= get_bits(&s->gb, 16);
sps->sar.den= get_bits(&s->gb, 16);
- }else if(aspect_ratio_idc < 14){
+ }else if(aspect_ratio_idc < sizeof(pixel_aspect)/sizeof(*pixel_aspect)){
sps->sar= pixel_aspect[aspect_ratio_idc];
}else{
av_log(h->s.avctx, AV_LOG_ERROR, "illegal aspect ratio\n");
@@ -7753,6 +7125,26 @@ static void decode_scaling_matrices(H264Context *h, SPS *sps, PPS *pps, int is_s
}
}
+/**
+ * Returns and optionally allocates SPS / PPS structures in the supplied array 'vec'
+ */
+static void *
+alloc_parameter_set(H264Context *h, void **vec, const unsigned int id, const unsigned int max,
+ const size_t size, const char *name)
+{
+ if(id>=max) {
+ av_log(h->s.avctx, AV_LOG_ERROR, "%s_id (%d) out of range\n", name, id);
+ return NULL;
+ }
+
+ if(!vec[id]) {
+ vec[id] = av_mallocz(size);
+ if(vec[id] == NULL)
+ av_log(h->s.avctx, AV_LOG_ERROR, "cannot allocate memory for %s\n", name);
+ }
+ return vec[id];
+}
+
static inline int decode_seq_parameter_set(H264Context *h){
MpegEncContext * const s = &h->s;
int profile_idc, level_idc;
@@ -7769,13 +7161,10 @@ static inline int decode_seq_parameter_set(H264Context *h){
level_idc= get_bits(&s->gb, 8);
sps_id= get_ue_golomb(&s->gb);
- if (sps_id >= MAX_SPS_COUNT){
- // ok it has gone out of hand, someone is sending us bad stuff.
- av_log(h->s.avctx, AV_LOG_ERROR, "illegal sps_id (%d)\n", sps_id);
+ sps = alloc_parameter_set(h, (void **)h->sps_buffers, sps_id, MAX_SPS_COUNT, sizeof(SPS), "sps");
+ if(sps == NULL)
return -1;
- }
- sps= &h->sps_buffer[ sps_id ];
sps->profile_idc= profile_idc;
sps->level_idc= level_idc;
@@ -7814,8 +7203,9 @@ static inline int decode_seq_parameter_set(H264Context *h){
}
tmp= get_ue_golomb(&s->gb);
- if(tmp > MAX_PICTURE_COUNT-2){
+ if(tmp > MAX_PICTURE_COUNT-2 || tmp >= 32){
av_log(h->s.avctx, AV_LOG_ERROR, "too many reference frames\n");
+ return -1;
}
sps->ref_frame_count= tmp;
sps->gaps_in_frame_num_allowed_flag= get_bits1(&s->gb);
@@ -7880,19 +7270,25 @@ static inline int decode_seq_parameter_set(H264Context *h){
return 0;
}
+static void
+build_qp_table(PPS *pps, int t, int index)
+{
+ int i;
+ for(i = 0; i < 255; i++)
+ pps->chroma_qp_table[t][i & 0xff] = chroma_qp[av_clip(i + index, 0, 51)];
+}
+
static inline int decode_picture_parameter_set(H264Context *h, int bit_length){
MpegEncContext * const s = &h->s;
unsigned int tmp, pps_id= get_ue_golomb(&s->gb);
PPS *pps;
- if(pps_id>=MAX_PPS_COUNT){
- av_log(h->s.avctx, AV_LOG_ERROR, "pps_id out of range\n");
+ pps = alloc_parameter_set(h, (void **)h->pps_buffers, pps_id, MAX_PPS_COUNT, sizeof(PPS), "pps");
+ if(pps == NULL)
return -1;
- }
- pps = &h->pps_buffer[pps_id];
tmp= get_ue_golomb(&s->gb);
- if(tmp>=MAX_SPS_COUNT){
+ if(tmp>=MAX_SPS_COUNT || h->sps_buffers[tmp] == NULL){
av_log(h->s.avctx, AV_LOG_ERROR, "sps_id out of range\n");
return -1;
}
@@ -7950,7 +7346,7 @@ static inline int decode_picture_parameter_set(H264Context *h, int bit_length){
pps->weighted_bipred_idc= get_bits(&s->gb, 2);
pps->init_qp= get_se_golomb(&s->gb) + 26;
pps->init_qs= get_se_golomb(&s->gb) + 26;
- pps->chroma_qp_index_offset= get_se_golomb(&s->gb);
+ pps->chroma_qp_index_offset[0]= get_se_golomb(&s->gb);
pps->deblocking_filter_parameters_present= get_bits1(&s->gb);
pps->constrained_intra_pred= get_bits1(&s->gb);
pps->redundant_pic_cnt_present = get_bits1(&s->gb);
@@ -7962,18 +7358,27 @@ static inline int decode_picture_parameter_set(H264Context *h, int bit_length){
if(get_bits_count(&s->gb) < bit_length){
pps->transform_8x8_mode= get_bits1(&s->gb);
- decode_scaling_matrices(h, &h->sps_buffer[pps->sps_id], pps, 0, pps->scaling_matrix4, pps->scaling_matrix8);
- get_se_golomb(&s->gb); //second_chroma_qp_index_offset
+ decode_scaling_matrices(h, h->sps_buffers[pps->sps_id], pps, 0, pps->scaling_matrix4, pps->scaling_matrix8);
+ pps->chroma_qp_index_offset[1]= get_se_golomb(&s->gb); //second_chroma_qp_index_offset
+ } else {
+ pps->chroma_qp_index_offset[1]= pps->chroma_qp_index_offset[0];
}
+ build_qp_table(pps, 0, pps->chroma_qp_index_offset[0]);
+ if(pps->chroma_qp_index_offset[0] != pps->chroma_qp_index_offset[1]) {
+ build_qp_table(pps, 1, pps->chroma_qp_index_offset[1]);
+ h->pps.chroma_qp_diff= 1;
+ } else
+ memcpy(pps->chroma_qp_table[1], pps->chroma_qp_table[0], 256);
+
if(s->avctx->debug&FF_DEBUG_PICT_INFO){
- av_log(h->s.avctx, AV_LOG_DEBUG, "pps:%u sps:%u %s slice_groups:%d ref:%d/%d %s qp:%d/%d/%d %s %s %s %s\n",
+ av_log(h->s.avctx, AV_LOG_DEBUG, "pps:%u sps:%u %s slice_groups:%d ref:%d/%d %s qp:%d/%d/%d/%d %s %s %s %s\n",
pps_id, pps->sps_id,
pps->cabac ? "CABAC" : "CAVLC",
pps->slice_group_count,
pps->ref_count[0], pps->ref_count[1],
pps->weighted_pred ? "weighted" : "",
- pps->init_qp, pps->init_qs, pps->chroma_qp_index_offset,
+ pps->init_qp, pps->init_qs, pps->chroma_qp_index_offset[0], pps->chroma_qp_index_offset[1],
pps->deblocking_filter_parameters_present ? "LPAR" : "",
pps->constrained_intra_pred ? "CONSTR" : "",
pps->redundant_pic_cnt_present ? "REDU" : "",
@@ -7985,119 +7390,49 @@ static inline int decode_picture_parameter_set(H264Context *h, int bit_length){
}
/**
- * finds the end of the current frame in the bitstream.
- * @return the position of the first byte of the next frame, or -1
+ * Call decode_slice() for each context.
+ *
+ * @param h h264 master context
+ * @param context_count number of contexts to execute
*/
-static int find_frame_end(H264Context *h, const uint8_t *buf, int buf_size){
+static void execute_decode_slices(H264Context *h, int context_count){
+ MpegEncContext * const s = &h->s;
+ AVCodecContext * const avctx= s->avctx;
+ H264Context *hx;
int i;
- uint32_t state;
- ParseContext *pc = &(h->s.parse_context);
-//printf("first %02X%02X%02X%02X\n", buf[0], buf[1],buf[2],buf[3]);
-// mb_addr= pc->mb_addr - 1;
- state= pc->state;
- if(state>13)
- state= 7;
-
- for(i=0; i<buf_size; i++){
- if(state==7){
- for(; i<buf_size; i++){
- if(!buf[i]){
- state=2;
- break;
- }
- }
- }else if(state<=2){
- if(buf[i]==1) state^= 5; //2->7, 1->4, 0->5
- else if(buf[i]) state = 7;
- else state>>=1; //2->1, 1->0, 0->0
- }else if(state<=5){
- int v= buf[i] & 0x1F;
- if(v==7 || v==8 || v==9){
- if(pc->frame_start_found){
- i++;
-found:
- pc->state=7;
- pc->frame_start_found= 0;
- return i-(state&5);
- }
- }else if(v==1 || v==2 || v==5){
- if(pc->frame_start_found){
- state+=8;
- continue;
- }else
- pc->frame_start_found = 1;
- }
- state= 7;
- }else{
- if(buf[i] & 0x80)
- goto found;
- state= 7;
- }
- }
- pc->state= state;
- return END_NOT_FOUND;
-}
-
-#ifdef CONFIG_H264_PARSER
-static int h264_parse(AVCodecParserContext *s,
- AVCodecContext *avctx,
- uint8_t **poutbuf, int *poutbuf_size,
- const uint8_t *buf, int buf_size)
-{
- H264Context *h = s->priv_data;
- ParseContext *pc = &h->s.parse_context;
- int next;
-
- if(s->flags & PARSER_FLAG_COMPLETE_FRAMES){
- next= buf_size;
- }else{
- next= find_frame_end(h, buf, buf_size);
-
- if (ff_combine_frame(pc, next, (uint8_t **)&buf, &buf_size) < 0) {
- *poutbuf = NULL;
- *poutbuf_size = 0;
- return buf_size;
- }
- if(next<0){
- find_frame_end(h, &pc->buffer[pc->last_index + next], -next); //update state
+ if(context_count == 1) {
+ decode_slice(avctx, h);
+ } else {
+ for(i = 1; i < context_count; i++) {
+ hx = h->thread_context[i];
+ hx->s.error_resilience = avctx->error_resilience;
+ hx->s.error_count = 0;
}
- }
-
- *poutbuf = (uint8_t *)buf;
- *poutbuf_size = buf_size;
- return next;
-}
-static int h264_split(AVCodecContext *avctx,
- const uint8_t *buf, int buf_size)
-{
- int i;
- uint32_t state = -1;
- int has_sps= 0;
+ avctx->execute(avctx, (void *)decode_slice,
+ (void **)h->thread_context, NULL, context_count);
- for(i=0; i<=buf_size; i++){
- if((state&0xFFFFFF1F) == 0x107)
- has_sps=1;
-/* if((state&0xFFFFFF1F) == 0x101 || (state&0xFFFFFF1F) == 0x102 || (state&0xFFFFFF1F) == 0x105){
- }*/
- if((state&0xFFFFFF00) == 0x100 && (state&0xFFFFFF1F) != 0x107 && (state&0xFFFFFF1F) != 0x108 && (state&0xFFFFFF1F) != 0x109){
- if(has_sps){
- while(i>4 && buf[i-5]==0) i--;
- return i-4;
- }
- }
- if (i<buf_size)
- state= (state<<8) | buf[i];
+ /* pull back stuff from slices to master context */
+ hx = h->thread_context[context_count - 1];
+ s->mb_x = hx->s.mb_x;
+ s->mb_y = hx->s.mb_y;
+ s->dropable = hx->s.dropable;
+ s->picture_structure = hx->s.picture_structure;
+ for(i = 1; i < context_count; i++)
+ h->s.error_count += h->thread_context[i]->s.error_count;
}
- return 0;
}
-#endif /* CONFIG_H264_PARSER */
-static int decode_nal_units(H264Context *h, uint8_t *buf, int buf_size){
+
+static int decode_nal_units(H264Context *h, const uint8_t *buf, int buf_size){
MpegEncContext * const s = &h->s;
AVCodecContext * const avctx= s->avctx;
int buf_index=0;
+ H264Context *hx; ///< thread context
+ int context_count = 0;
+
+ h->max_contexts = avctx->thread_count;
#if 0
int i;
for(i=0; i<50; i++){
@@ -8105,54 +7440,58 @@ static int decode_nal_units(H264Context *h, uint8_t *buf, int buf_size){
}
#endif
if(!(s->flags2 & CODEC_FLAG2_CHUNKS)){
- h->slice_num = 0;
- s->current_picture_ptr= NULL;
+ h->current_slice = 0;
+ if (!s->first_field)
+ s->current_picture_ptr= NULL;
}
for(;;){
int consumed;
int dst_length;
int bit_length;
- uint8_t *ptr;
+ const uint8_t *ptr;
int i, nalsize = 0;
-
- if(h->is_avc) {
- if(buf_index >= buf_size) break;
- nalsize = 0;
- for(i = 0; i < h->nal_length_size; i++)
- nalsize = (nalsize << 8) | buf[buf_index++];
- if(nalsize <= 1 || nalsize > buf_size){
- if(nalsize == 1){
- buf_index++;
- continue;
- }else{
- av_log(h->s.avctx, AV_LOG_ERROR, "AVC: nal size %d\n", nalsize);
- break;
+ int err;
+
+ if(h->is_avc) {
+ if(buf_index >= buf_size) break;
+ nalsize = 0;
+ for(i = 0; i < h->nal_length_size; i++)
+ nalsize = (nalsize << 8) | buf[buf_index++];
+ if(nalsize <= 1 || (nalsize+buf_index > buf_size)){
+ if(nalsize == 1){
+ buf_index++;
+ continue;
+ }else{
+ av_log(h->s.avctx, AV_LOG_ERROR, "AVC: nal size %d\n", nalsize);
+ break;
+ }
+ }
+ } else {
+ // start code prefix search
+ for(; buf_index + 3 < buf_size; buf_index++){
+ // This should always succeed in the first iteration.
+ if(buf[buf_index] == 0 && buf[buf_index+1] == 0 && buf[buf_index+2] == 1)
+ break;
}
- }
- } else {
- // start code prefix search
- for(; buf_index + 3 < buf_size; buf_index++){
- // this should allways succeed in the first iteration
- if(buf[buf_index] == 0 && buf[buf_index+1] == 0 && buf[buf_index+2] == 1)
- break;
- }
- if(buf_index+3 >= buf_size) break;
+ if(buf_index+3 >= buf_size) break;
- buf_index+=3;
- }
+ buf_index+=3;
+ }
+
+ hx = h->thread_context[context_count];
- ptr= decode_nal(h, buf + buf_index, &dst_length, &consumed, h->is_avc ? nalsize : buf_size - buf_index);
- if (ptr==NULL || dst_length <= 0){
+ ptr= decode_nal(hx, buf + buf_index, &dst_length, &consumed, h->is_avc ? nalsize : buf_size - buf_index);
+ if (ptr==NULL || dst_length < 0){
return -1;
}
- while(ptr[dst_length - 1] == 0 && dst_length > 1)
+ while(ptr[dst_length - 1] == 0 && dst_length > 0)
dst_length--;
- bit_length= 8*dst_length - decode_rbsp_trailing(h, ptr + dst_length - 1);
+ bit_length= !dst_length ? 0 : (8*dst_length - decode_rbsp_trailing(h, ptr + dst_length - 1));
if(s->avctx->debug&FF_DEBUG_STARTCODE){
- av_log(h->s.avctx, AV_LOG_DEBUG, "NAL %d at %d/%d length %d\n", h->nal_unit_type, buf_index, buf_size, dst_length);
+ av_log(h->s.avctx, AV_LOG_DEBUG, "NAL %d at %d/%d length %d\n", hx->nal_unit_type, buf_index, buf_size, dst_length);
}
if (h->is_avc && (nalsize != consumed))
@@ -8160,57 +7499,60 @@ static int decode_nal_units(H264Context *h, uint8_t *buf, int buf_size){
buf_index += consumed;
- if( (s->hurry_up == 1 && h->nal_ref_idc == 0) //FIXME dont discard SEI id
+ if( (s->hurry_up == 1 && h->nal_ref_idc == 0) //FIXME do not discard SEI id
||(avctx->skip_frame >= AVDISCARD_NONREF && h->nal_ref_idc == 0))
continue;
- switch(h->nal_unit_type){
+ again:
+ err = 0;
+ switch(hx->nal_unit_type){
case NAL_IDR_SLICE:
+ if (h->nal_unit_type != NAL_IDR_SLICE) {
+ av_log(h->s.avctx, AV_LOG_ERROR, "Invalid mix of idr and non-idr slices");
+ return -1;
+ }
idr(h); //FIXME ensure we don't loose some frames if there is reordering
case NAL_SLICE:
- init_get_bits(&s->gb, ptr, bit_length);
- h->intra_gb_ptr=
- h->inter_gb_ptr= &s->gb;
- s->data_partitioning = 0;
-
- if(decode_slice_header(h) < 0){
- av_log(h->s.avctx, AV_LOG_ERROR, "decode_slice_header error\n");
- break;
- }
- s->current_picture_ptr->key_frame= (h->nal_unit_type == NAL_IDR_SLICE);
- if(h->redundant_pic_count==0 && s->hurry_up < 5
- && (avctx->skip_frame < AVDISCARD_NONREF || h->nal_ref_idc)
- && (avctx->skip_frame < AVDISCARD_BIDIR || h->slice_type!=B_TYPE)
- && (avctx->skip_frame < AVDISCARD_NONKEY || h->slice_type==I_TYPE)
+ init_get_bits(&hx->s.gb, ptr, bit_length);
+ hx->intra_gb_ptr=
+ hx->inter_gb_ptr= &hx->s.gb;
+ hx->s.data_partitioning = 0;
+
+ if((err = decode_slice_header(hx, h)))
+ break;
+
+ s->current_picture_ptr->key_frame|= (hx->nal_unit_type == NAL_IDR_SLICE);
+ if(hx->redundant_pic_count==0 && hx->s.hurry_up < 5
+ && (avctx->skip_frame < AVDISCARD_NONREF || hx->nal_ref_idc)
+ && (avctx->skip_frame < AVDISCARD_BIDIR || hx->slice_type!=B_TYPE)
+ && (avctx->skip_frame < AVDISCARD_NONKEY || hx->slice_type==I_TYPE)
&& avctx->skip_frame < AVDISCARD_ALL)
- decode_slice(h);
+ context_count++;
break;
case NAL_DPA:
- init_get_bits(&s->gb, ptr, bit_length);
- h->intra_gb_ptr=
- h->inter_gb_ptr= NULL;
- s->data_partitioning = 1;
+ init_get_bits(&hx->s.gb, ptr, bit_length);
+ hx->intra_gb_ptr=
+ hx->inter_gb_ptr= NULL;
+ hx->s.data_partitioning = 1;
- if(decode_slice_header(h) < 0){
- av_log(h->s.avctx, AV_LOG_ERROR, "decode_slice_header error\n");
- }
+ err = decode_slice_header(hx, h);
break;
case NAL_DPB:
- init_get_bits(&h->intra_gb, ptr, bit_length);
- h->intra_gb_ptr= &h->intra_gb;
+ init_get_bits(&hx->intra_gb, ptr, bit_length);
+ hx->intra_gb_ptr= &hx->intra_gb;
break;
case NAL_DPC:
- init_get_bits(&h->inter_gb, ptr, bit_length);
- h->inter_gb_ptr= &h->inter_gb;
+ init_get_bits(&hx->inter_gb, ptr, bit_length);
+ hx->inter_gb_ptr= &hx->inter_gb;
- if(h->redundant_pic_count==0 && h->intra_gb_ptr && s->data_partitioning
+ if(hx->redundant_pic_count==0 && hx->intra_gb_ptr && hx->s.data_partitioning
&& s->context_initialized
&& s->hurry_up < 5
- && (avctx->skip_frame < AVDISCARD_NONREF || h->nal_ref_idc)
- && (avctx->skip_frame < AVDISCARD_BIDIR || h->slice_type!=B_TYPE)
- && (avctx->skip_frame < AVDISCARD_NONKEY || h->slice_type==I_TYPE)
+ && (avctx->skip_frame < AVDISCARD_NONREF || hx->nal_ref_idc)
+ && (avctx->skip_frame < AVDISCARD_BIDIR || hx->slice_type!=B_TYPE)
+ && (avctx->skip_frame < AVDISCARD_NONKEY || hx->slice_type==I_TYPE)
&& avctx->skip_frame < AVDISCARD_ALL)
- decode_slice(h);
+ context_count++;
break;
case NAL_SEI:
init_get_bits(&s->gb, ptr, bit_length);
@@ -8240,10 +7582,29 @@ static int decode_nal_units(H264Context *h, uint8_t *buf, int buf_size){
case NAL_AUXILIARY_SLICE:
break;
default:
- av_log(avctx, AV_LOG_ERROR, "Unknown NAL code: %d\n", h->nal_unit_type);
+ av_log(avctx, AV_LOG_DEBUG, "Unknown NAL code: %d (%d bits)\n", h->nal_unit_type, bit_length);
+ }
+
+ if(context_count == h->max_contexts) {
+ execute_decode_slices(h, context_count);
+ context_count = 0;
}
- }
+ if (err < 0)
+ av_log(h->s.avctx, AV_LOG_ERROR, "decode_slice_header error\n");
+ else if(err == 1) {
+ /* Slice could not be decoded in parallel mode, copy down
+ * NAL unit stuff to context 0 and restart. Note that
+ * rbsp_buffer is not transfered, but since we no longer
+ * run in parallel mode this should not be an issue. */
+ h->nal_unit_type = hx->nal_unit_type;
+ h->nal_ref_idc = hx->nal_ref_idc;
+ hx = h;
+ goto again;
+ }
+ }
+ if(context_count)
+ execute_decode_slices(h, context_count);
return buf_index;
}
@@ -8257,7 +7618,7 @@ static int get_consumed_bytes(MpegEncContext *s, int pos, int buf_size){
return pos;
}else{
- if(pos==0) pos=1; //avoid infinite loops (i doubt thats needed but ...)
+ if(pos==0) pos=1; //avoid infinite loops (i doubt that is needed but ...)
if(pos+10>buf_size) pos=buf_size; // oops ;)
return pos;
@@ -8266,7 +7627,7 @@ static int get_consumed_bytes(MpegEncContext *s, int pos, int buf_size){
static int decode_frame(AVCodecContext *avctx,
void *data, int *data_size,
- uint8_t *buf, int buf_size)
+ const uint8_t *buf, int buf_size)
{
H264Context *h = avctx->priv_data;
MpegEncContext *s = &h->s;
@@ -8302,9 +7663,9 @@ static int decode_frame(AVCodecContext *avctx,
}
if(s->flags&CODEC_FLAG_TRUNCATED){
- int next= find_frame_end(h, buf, buf_size);
+ int next= ff_h264_find_frame_end(h, buf, buf_size);
- if( ff_combine_frame(&s->parse_context, next, &buf, &buf_size) < 0 )
+ if( ff_combine_frame(&s->parse_context, next, (const uint8_t **)&buf, &buf_size) < 0 )
return buf_size;
//printf("next:%d buf_size:%d last_index:%d\n", next, buf_size, s->parse_context.last_index);
}
@@ -8360,6 +7721,7 @@ static int decode_frame(AVCodecContext *avctx,
return -1;
if(!(s->flags2 & CODEC_FLAG2_CHUNKS) && !s->current_picture_ptr){
+ if (avctx->skip_frame >= AVDISCARD_NONREF || s->hurry_up) return 0;
av_log(avctx, AV_LOG_ERROR, "no frame!\n");
return -1;
}
@@ -8377,87 +7739,109 @@ static int decode_frame(AVCodecContext *avctx,
h->prev_frame_num_offset= h->frame_num_offset;
h->prev_frame_num= h->frame_num;
- if(s->current_picture_ptr->reference){
+ if(!s->dropable) {
h->prev_poc_msb= h->poc_msb;
h->prev_poc_lsb= h->poc_lsb;
- }
- if(s->current_picture_ptr->reference)
execute_ref_pic_marking(h, h->mmco, h->mmco_index);
+ }
- ff_er_frame_end(s);
+ /*
+ * FIXME: Error handling code does not seem to support interlaced
+ * when slices span multiple rows
+ * The ff_er_add_slice calls don't work right for bottom
+ * fields; they cause massive erroneous error concealing
+ * Error marking covers both fields (top and bottom).
+ * This causes a mismatched s->error_count
+ * and a bad error table. Further, the error count goes to
+ * INT_MAX when called for bottom field, because mb_y is
+ * past end by one (callers fault) and resync_mb_y != 0
+ * causes problems for the first MB line, too.
+ */
+ if (!FIELD_PICTURE)
+ ff_er_frame_end(s);
MPV_frame_end(s);
- //FIXME do something with unavailable reference frames
+ if (s->first_field) {
+ /* Wait for second field. */
+ *data_size = 0;
-#if 0 //decode order
- *data_size = sizeof(AVFrame);
-#else
- /* Sort B-frames into display order */
+ } else {
+ cur->interlaced_frame = FIELD_OR_MBAFF_PICTURE;
+ /* Derive top_field_first from field pocs. */
+ cur->top_field_first = cur->field_poc[0] < cur->field_poc[1];
- if(h->sps.bitstream_restriction_flag
- && s->avctx->has_b_frames < h->sps.num_reorder_frames){
- s->avctx->has_b_frames = h->sps.num_reorder_frames;
- s->low_delay = 0;
- }
+ //FIXME do something with unavailable reference frames
- pics = 0;
- while(h->delayed_pic[pics]) pics++;
+#if 0 //decode order
+ *data_size = sizeof(AVFrame);
+#else
+ /* Sort B-frames into display order */
- assert(pics+1 < sizeof(h->delayed_pic) / sizeof(h->delayed_pic[0]));
+ if(h->sps.bitstream_restriction_flag
+ && s->avctx->has_b_frames < h->sps.num_reorder_frames){
+ s->avctx->has_b_frames = h->sps.num_reorder_frames;
+ s->low_delay = 0;
+ }
- h->delayed_pic[pics++] = cur;
- if(cur->reference == 0)
- cur->reference = 1;
+ pics = 0;
+ while(h->delayed_pic[pics]) pics++;
- cross_idr = 0;
- for(i=0; h->delayed_pic[i]; i++)
- if(h->delayed_pic[i]->key_frame || h->delayed_pic[i]->poc==0)
- cross_idr = 1;
+ assert(pics+1 < sizeof(h->delayed_pic) / sizeof(h->delayed_pic[0]));
- out = h->delayed_pic[0];
- out_idx = 0;
- for(i=1; h->delayed_pic[i] && !h->delayed_pic[i]->key_frame; i++)
- if(h->delayed_pic[i]->poc < out->poc){
- out = h->delayed_pic[i];
- out_idx = i;
- }
+ h->delayed_pic[pics++] = cur;
+ if(cur->reference == 0)
+ cur->reference = DELAYED_PIC_REF;
- out_of_order = !cross_idr && prev && out->poc < prev->poc;
- if(h->sps.bitstream_restriction_flag && s->avctx->has_b_frames >= h->sps.num_reorder_frames)
- { }
- else if(prev && pics <= s->avctx->has_b_frames)
- out = prev;
- else if((out_of_order && pics-1 == s->avctx->has_b_frames && pics < 15)
- || (s->low_delay &&
- ((!cross_idr && prev && out->poc > prev->poc + 2)
- || cur->pict_type == B_TYPE)))
- {
- s->low_delay = 0;
- s->avctx->has_b_frames++;
- out = prev;
- }
- else if(out_of_order)
- out = prev;
+ cross_idr = 0;
+ for(i=0; h->delayed_pic[i]; i++)
+ if(h->delayed_pic[i]->key_frame || h->delayed_pic[i]->poc==0)
+ cross_idr = 1;
- if(out_of_order || pics > s->avctx->has_b_frames){
- for(i=out_idx; h->delayed_pic[i]; i++)
- h->delayed_pic[i] = h->delayed_pic[i+1];
- }
+ out = h->delayed_pic[0];
+ out_idx = 0;
+ for(i=1; h->delayed_pic[i] && !h->delayed_pic[i]->key_frame; i++)
+ if(h->delayed_pic[i]->poc < out->poc){
+ out = h->delayed_pic[i];
+ out_idx = i;
+ }
- if(prev == out)
- *data_size = 0;
- else
- *data_size = sizeof(AVFrame);
- if(prev && prev != out && prev->reference == 1)
- prev->reference = 0;
- h->delayed_output_pic = out;
+ out_of_order = !cross_idr && prev && out->poc < prev->poc;
+ if(h->sps.bitstream_restriction_flag && s->avctx->has_b_frames >= h->sps.num_reorder_frames)
+ { }
+ else if(prev && pics <= s->avctx->has_b_frames)
+ out = prev;
+ else if((out_of_order && pics-1 == s->avctx->has_b_frames && pics < 15)
+ || (s->low_delay &&
+ ((!cross_idr && prev && out->poc > prev->poc + 2)
+ || cur->pict_type == B_TYPE)))
+ {
+ s->low_delay = 0;
+ s->avctx->has_b_frames++;
+ out = prev;
+ }
+ else if(out_of_order)
+ out = prev;
+
+ if(out_of_order || pics > s->avctx->has_b_frames){
+ for(i=out_idx; h->delayed_pic[i]; i++)
+ h->delayed_pic[i] = h->delayed_pic[i+1];
+ }
+
+ if(prev == out)
+ *data_size = 0;
+ else
+ *data_size = sizeof(AVFrame);
+ if(prev && prev != out && prev->reference == DELAYED_PIC_REF)
+ prev->reference = 0;
+ h->delayed_output_pic = out;
#endif
- if(out)
- *pict= *(AVFrame*)out;
- else
- av_log(avctx, AV_LOG_DEBUG, "no picture\n");
+ if(out)
+ *pict= *(AVFrame*)out;
+ else
+ av_log(avctx, AV_LOG_DEBUG, "no picture\n");
+ }
}
assert(pict->data[0] || !*data_size);
@@ -8466,7 +7850,7 @@ static int decode_frame(AVCodecContext *avctx,
#if 0 //?
/* Return the Picture timestamp as the frame number */
- /* we substract 1 because it is added on utils.c */
+ /* we subtract 1 because it is added on utils.c */
avctx->frame_number = s->picture_number - 1;
#endif
return get_consumed_bytes(s, buf_index, buf_size);
@@ -8491,10 +7875,12 @@ static inline void fill_mb_avail(H264Context *h){
}
#endif
-#if 0 //selftest
+#ifdef TEST
+#undef printf
+#undef random
#define COUNT 8000
#define SIZE (COUNT*40)
-int main(){
+int main(void){
int i;
uint8_t temp[SIZE];
PutBitContext pb;
@@ -8523,7 +7909,7 @@ int main(){
START_TIMER
j= get_ue_golomb(&gb);
if(j != i){
- printf("missmatch! at %d (%d should be %d) bits:%6X\n", i, j, i, s);
+ printf("mismatch! at %d (%d should be %d) bits:%6X\n", i, j, i, s);
// return -1;
}
STOP_TIMER("get_ue_golomb");
@@ -8548,12 +7934,13 @@ int main(){
START_TIMER
j= get_se_golomb(&gb);
if(j != i - COUNT/2){
- printf("missmatch! at %d (%d should be %d) bits:%6X\n", i, j, i, s);
+ printf("mismatch! at %d (%d should be %d) bits:%6X\n", i, j, i, s);
// return -1;
}
STOP_TIMER("get_se_golomb");
}
+#if 0
printf("testing 4x4 (I)DCT\n");
DCTELEM block[16];
@@ -8593,14 +7980,12 @@ int main(){
}
}
printf("error=%f max_error=%d\n", ((float)error)/COUNT/16, (int)max_error );
-#if 0
printf("testing quantizer\n");
for(qp=0; qp<52; qp++){
for(i=0; i<16; i++)
src1_block[i]= src2_block[i]= random()%255;
}
-#endif
printf("Testing NAL layer\n");
uint8_t bitstream[COUNT];
@@ -8652,17 +8037,18 @@ int main(){
}
if(memcmp(bitstream, out, COUNT)){
- printf("missmatch\n");
+ printf("mismatch\n");
return -1;
}
}
+#endif
printf("Testing RBSP\n");
return 0;
}
-#endif
+#endif /* TEST */
static int decode_end(AVCodecContext *avctx)
@@ -8670,7 +8056,8 @@ static int decode_end(AVCodecContext *avctx)
H264Context *h = avctx->priv_data;
MpegEncContext *s = &h->s;
- av_freep(&h->rbsp_buffer);
+ av_freep(&h->rbsp_buffer[0]);
+ av_freep(&h->rbsp_buffer[1]);
free_tables(h); //FIXME cleanup init stuff perhaps
MPV_common_end(s);
@@ -8693,15 +8080,4 @@ AVCodec h264_decoder = {
.flush= flush_dpb,
};
-#ifdef CONFIG_H264_PARSER
-AVCodecParser h264_parser = {
- { CODEC_ID_H264 },
- sizeof(H264Context),
- NULL,
- h264_parse,
- ff_parse_close,
- h264_split,
-};
-#endif
-
#include "svq3.c"