@@ -97,18 +97,22 @@ static HEVCFrame *alloc_frame(HEVCContext *s)
if (!frame->rpl_buf)
goto fail;
- frame->tab_mvf_buf = av_buffer_pool_get(s->tab_mvf_pool);
- if (!frame->tab_mvf_buf)
- goto fail;
- frame->tab_mvf = (MvField *)frame->tab_mvf_buf->data;
+ if (s->tab_mvf_pool) {
+ frame->tab_mvf_buf = av_buffer_pool_get(s->tab_mvf_pool);
+ if (!frame->tab_mvf_buf)
+ goto fail;
+ frame->tab_mvf = (MvField *)frame->tab_mvf_buf->data;
+ }
- frame->rpl_tab_buf = av_buffer_pool_get(s->rpl_tab_pool);
- if (!frame->rpl_tab_buf)
- goto fail;
- frame->rpl_tab = (RefPicListTab **)frame->rpl_tab_buf->data;
- frame->ctb_count = s->ps.sps->ctb_width * s->ps.sps->ctb_height;
- for (j = 0; j < frame->ctb_count; j++)
- frame->rpl_tab[j] = (RefPicListTab *)frame->rpl_buf->data;
+ if (s->rpl_tab_pool) {
+ frame->rpl_tab_buf = av_buffer_pool_get(s->rpl_tab_pool);
+ if (!frame->rpl_tab_buf)
+ goto fail;
+ frame->rpl_tab = (RefPicListTab **)frame->rpl_tab_buf->data;
+ frame->ctb_count = s->ps.sps->ctb_width * s->ps.sps->ctb_height;
+ for (j = 0; j < frame->ctb_count; j++)
+ frame->rpl_tab[j] = (RefPicListTab *)frame->rpl_buf->data;
+ }
frame->frame->top_field_first = s->sei.picture_timing.picture_struct == AV_PICTURE_STRUCTURE_TOP_FIELD;
frame->frame->interlaced_frame = (s->sei.picture_timing.picture_struct == AV_PICTURE_STRUCTURE_TOP_FIELD) || (s->sei.picture_timing.picture_struct == AV_PICTURE_STRUCTURE_BOTTOM_FIELD);
@@ -283,14 +287,17 @@ static int init_slice_rpl(HEVCContext *s)
int ctb_count = frame->ctb_count;
int ctb_addr_ts = s->ps.pps->ctb_addr_rs_to_ts[s->sh.slice_segment_addr];
int i;
+ RefPicListTab * const tab = (RefPicListTab *)frame->rpl_buf->data + s->slice_idx;
if (s->slice_idx >= frame->rpl_buf->size / sizeof(RefPicListTab))
return AVERROR_INVALIDDATA;
- for (i = ctb_addr_ts; i < ctb_count; i++)
- frame->rpl_tab[i] = (RefPicListTab *)frame->rpl_buf->data + s->slice_idx;
+ if (frame->rpl_tab) {
+ for (i = ctb_addr_ts; i < ctb_count; i++)
+ frame->rpl_tab[i] = tab;
+ }
- frame->refPicList = (RefPicList *)frame->rpl_tab[ctb_addr_ts];
+ frame->refPicList = tab->refPicList;
return 0;
}
@@ -504,6 +504,16 @@ static int set_sps(HEVCContext *s, const HEVCSPS *sps,
if (!sps)
return 0;
+ // If hwaccel then we don't need all the s/w decode helper arrays
+ if (s->avctx->hwaccel) {
+ export_stream_params(s, sps);
+
+ s->avctx->pix_fmt = pix_fmt;
+ s->ps.sps = sps;
+ s->ps.vps = (HEVCVPS*) s->ps.vps_list[s->ps.sps->vps_id]->data;
+ return 0;
+ }
+
ret = pic_arrays_init(s, sps);
if (ret < 0)
goto fail;
@@ -3008,11 +3018,13 @@ static int hevc_frame_start(HEVCContext *s)
((s->ps.sps->height >> s->ps.sps->log2_min_cb_size) + 1);
int ret;
- memset(s->horizontal_bs, 0, s->bs_width * s->bs_height);
- memset(s->vertical_bs, 0, s->bs_width * s->bs_height);
- memset(s->cbf_luma, 0, s->ps.sps->min_tb_width * s->ps.sps->min_tb_height);
- memset(s->is_pcm, 0, (s->ps.sps->min_pu_width + 1) * (s->ps.sps->min_pu_height + 1));
- memset(s->tab_slice_address, -1, pic_size_in_ctb * sizeof(*s->tab_slice_address));
+ if (s->horizontal_bs) {
+ memset(s->horizontal_bs, 0, s->bs_width * s->bs_height);
+ memset(s->vertical_bs, 0, s->bs_width * s->bs_height);
+ memset(s->cbf_luma, 0, s->ps.sps->min_tb_width * s->ps.sps->min_tb_height);
+ memset(s->is_pcm, 0, (s->ps.sps->min_pu_width + 1) * (s->ps.sps->min_pu_height + 1));
+ memset(s->tab_slice_address, -1, pic_size_in_ctb * sizeof(*s->tab_slice_address));
+ }
s->is_decoded = 0;
s->first_nal_type = s->nal_unit_type;
@@ -3555,15 +3567,19 @@ static int hevc_ref_frame(HEVCContext *s, HEVCFrame *dst, HEVCFrame *src)
dst->needs_fg = 1;
}
- dst->tab_mvf_buf = av_buffer_ref(src->tab_mvf_buf);
- if (!dst->tab_mvf_buf)
- goto fail;
- dst->tab_mvf = src->tab_mvf;
+ if (src->tab_mvf_buf) {
+ dst->tab_mvf_buf = av_buffer_ref(src->tab_mvf_buf);
+ if (!dst->tab_mvf_buf)
+ goto fail;
+ dst->tab_mvf = src->tab_mvf;
+ }
- dst->rpl_tab_buf = av_buffer_ref(src->rpl_tab_buf);
- if (!dst->rpl_tab_buf)
- goto fail;
- dst->rpl_tab = src->rpl_tab;
+ if (src->rpl_tab_buf) {
+ dst->rpl_tab_buf = av_buffer_ref(src->rpl_tab_buf);
+ if (!dst->rpl_tab_buf)
+ goto fail;
+ dst->rpl_tab = src->rpl_tab;
+ }
dst->rpl_buf = av_buffer_ref(src->rpl_buf);
if (!dst->rpl_buf)
Hwaccel doesn't use any of the block strength, pcm, slice address, etc. arrays which can be >100k each for 4k video. Patch to avoid initial allocation and zeroing at the start of every frame. On a Pi4 the memsets can use 10% CPU on 4k 60Hz decode, this fixes that. Signed-off-by: John Cox <jc@kynesim.co.uk> --- libavcodec/hevc_refs.c | 35 +++++++++++++++++++++-------------- libavcodec/hevcdec.c | 42 +++++++++++++++++++++++++++++------------- 2 files changed, 50 insertions(+), 27 deletions(-) -- 2.34.1