diff mbox series

[FFmpeg-devel] hevc: If hwccel avoid creation/use of s/w only arrays

Message ID cil19htkrmsdp69rlil4f5a3cvkob5eqoa@4ax.com
State New
Headers show
Series [FFmpeg-devel] hevc: If hwccel avoid creation/use of s/w only arrays | expand

Checks

Context Check Description
yinshiyou/make_loongarch64 success Make finished
yinshiyou/make_fate_loongarch64 success Make fate finished
andriy/make_x86 success Make finished
andriy/make_fate_x86 success Make fate finished
andriy/make_armv7_RPi4 success Make finished
andriy/make_fate_armv7_RPi4 success Make fate finished

Commit Message

John Cox May 27, 2022, 1:51 p.m. UTC
Hwaccel doesn't use any of the block strength, pcm, slice address, etc.
arrays which can be >100k each for 4k video. Patch to avoid initial
allocation and zeroing at the start of every frame. On a Pi4 the memsets
can use 10% CPU on 4k 60Hz decode, this fixes that.

Signed-off-by: John Cox <jc@kynesim.co.uk>
---
 libavcodec/hevc_refs.c | 35 +++++++++++++++++++++--------------
 libavcodec/hevcdec.c   | 42 +++++++++++++++++++++++++++++-------------
 2 files changed, 50 insertions(+), 27 deletions(-)

--
2.34.1

Comments

Anton Khirnov June 1, 2022, 10:29 a.m. UTC | #1
Quoting John Cox (2022-05-27 15:51:17)
> Hwaccel doesn't use any of the block strength, pcm, slice address, etc.
> arrays which can be >100k each for 4k video. Patch to avoid initial
> allocation and zeroing at the start of every frame. On a Pi4 the memsets
> can use 10% CPU on 4k 60Hz decode, this fixes that.
> 
> Signed-off-by: John Cox <jc@kynesim.co.uk>
> ---
>  libavcodec/hevc_refs.c | 35 +++++++++++++++++++++--------------
>  libavcodec/hevcdec.c   | 42 +++++++++++++++++++++++++++++-------------
>  2 files changed, 50 insertions(+), 27 deletions(-)
> 
> diff --git a/libavcodec/hevcdec.c b/libavcodec/hevcdec.c
> index f782ea6394..48b059ce45 100644
> --- a/libavcodec/hevcdec.c
> +++ b/libavcodec/hevcdec.c
> @@ -504,6 +504,16 @@ static int set_sps(HEVCContext *s, const HEVCSPS *sps,
>      if (!sps)
>          return 0;
> 
> +    // If hwaccel then we don't need all the s/w decode helper arrays
> +    if (s->avctx->hwaccel) {
> +        export_stream_params(s, sps);
> +
> +        s->avctx->pix_fmt = pix_fmt;
> +        s->ps.sps = sps;
> +        s->ps.vps = (HEVCVPS*) s->ps.vps_list[s->ps.sps->vps_id]->data;

This duplication is ugly. This code should be commutative, so you can
either move it all above pic_arrays_init() and add an early return
after, or move it all to the end and jump to it with goto.

Otherwise looks reasonable.
diff mbox series

Patch

diff --git a/libavcodec/hevc_refs.c b/libavcodec/hevc_refs.c
index fe18ca2b1d..ab3103f66c 100644
--- a/libavcodec/hevc_refs.c
+++ b/libavcodec/hevc_refs.c
@@ -97,18 +97,22 @@  static HEVCFrame *alloc_frame(HEVCContext *s)
         if (!frame->rpl_buf)
             goto fail;

-        frame->tab_mvf_buf = av_buffer_pool_get(s->tab_mvf_pool);
-        if (!frame->tab_mvf_buf)
-            goto fail;
-        frame->tab_mvf = (MvField *)frame->tab_mvf_buf->data;
+        if (s->tab_mvf_pool) {
+            frame->tab_mvf_buf = av_buffer_pool_get(s->tab_mvf_pool);
+            if (!frame->tab_mvf_buf)
+                goto fail;
+            frame->tab_mvf = (MvField *)frame->tab_mvf_buf->data;
+        }

-        frame->rpl_tab_buf = av_buffer_pool_get(s->rpl_tab_pool);
-        if (!frame->rpl_tab_buf)
-            goto fail;
-        frame->rpl_tab   = (RefPicListTab **)frame->rpl_tab_buf->data;
-        frame->ctb_count = s->ps.sps->ctb_width * s->ps.sps->ctb_height;
-        for (j = 0; j < frame->ctb_count; j++)
-            frame->rpl_tab[j] = (RefPicListTab *)frame->rpl_buf->data;
+        if (s->rpl_tab_pool) {
+            frame->rpl_tab_buf = av_buffer_pool_get(s->rpl_tab_pool);
+            if (!frame->rpl_tab_buf)
+                goto fail;
+            frame->rpl_tab   = (RefPicListTab **)frame->rpl_tab_buf->data;
+            frame->ctb_count = s->ps.sps->ctb_width * s->ps.sps->ctb_height;
+            for (j = 0; j < frame->ctb_count; j++)
+                frame->rpl_tab[j] = (RefPicListTab *)frame->rpl_buf->data;
+        }

         frame->frame->top_field_first  = s->sei.picture_timing.picture_struct == AV_PICTURE_STRUCTURE_TOP_FIELD;
         frame->frame->interlaced_frame = (s->sei.picture_timing.picture_struct == AV_PICTURE_STRUCTURE_TOP_FIELD) || (s->sei.picture_timing.picture_struct == AV_PICTURE_STRUCTURE_BOTTOM_FIELD);
@@ -283,14 +287,17 @@  static int init_slice_rpl(HEVCContext *s)
     int ctb_count    = frame->ctb_count;
     int ctb_addr_ts  = s->ps.pps->ctb_addr_rs_to_ts[s->sh.slice_segment_addr];
     int i;
+    RefPicListTab * const tab = (RefPicListTab *)frame->rpl_buf->data + s->slice_idx;

     if (s->slice_idx >= frame->rpl_buf->size / sizeof(RefPicListTab))
         return AVERROR_INVALIDDATA;

-    for (i = ctb_addr_ts; i < ctb_count; i++)
-        frame->rpl_tab[i] = (RefPicListTab *)frame->rpl_buf->data + s->slice_idx;
+    if (frame->rpl_tab) {
+        for (i = ctb_addr_ts; i < ctb_count; i++)
+            frame->rpl_tab[i] = tab;
+    }

-    frame->refPicList = (RefPicList *)frame->rpl_tab[ctb_addr_ts];
+    frame->refPicList = tab->refPicList;

     return 0;
 }
diff --git a/libavcodec/hevcdec.c b/libavcodec/hevcdec.c
index f782ea6394..48b059ce45 100644
--- a/libavcodec/hevcdec.c
+++ b/libavcodec/hevcdec.c
@@ -504,6 +504,16 @@  static int set_sps(HEVCContext *s, const HEVCSPS *sps,
     if (!sps)
         return 0;

+    // If hwaccel then we don't need all the s/w decode helper arrays
+    if (s->avctx->hwaccel) {
+        export_stream_params(s, sps);
+
+        s->avctx->pix_fmt = pix_fmt;
+        s->ps.sps = sps;
+        s->ps.vps = (HEVCVPS*) s->ps.vps_list[s->ps.sps->vps_id]->data;
+        return 0;
+    }
+
     ret = pic_arrays_init(s, sps);
     if (ret < 0)
         goto fail;
@@ -3008,11 +3018,13 @@  static int hevc_frame_start(HEVCContext *s)
                            ((s->ps.sps->height >> s->ps.sps->log2_min_cb_size) + 1);
     int ret;

-    memset(s->horizontal_bs, 0, s->bs_width * s->bs_height);
-    memset(s->vertical_bs,   0, s->bs_width * s->bs_height);
-    memset(s->cbf_luma,      0, s->ps.sps->min_tb_width * s->ps.sps->min_tb_height);
-    memset(s->is_pcm,        0, (s->ps.sps->min_pu_width + 1) * (s->ps.sps->min_pu_height + 1));
-    memset(s->tab_slice_address, -1, pic_size_in_ctb * sizeof(*s->tab_slice_address));
+    if (s->horizontal_bs) {
+        memset(s->horizontal_bs, 0, s->bs_width * s->bs_height);
+        memset(s->vertical_bs,   0, s->bs_width * s->bs_height);
+        memset(s->cbf_luma,      0, s->ps.sps->min_tb_width * s->ps.sps->min_tb_height);
+        memset(s->is_pcm,        0, (s->ps.sps->min_pu_width + 1) * (s->ps.sps->min_pu_height + 1));
+        memset(s->tab_slice_address, -1, pic_size_in_ctb * sizeof(*s->tab_slice_address));
+    }

     s->is_decoded        = 0;
     s->first_nal_type    = s->nal_unit_type;
@@ -3555,15 +3567,19 @@  static int hevc_ref_frame(HEVCContext *s, HEVCFrame *dst, HEVCFrame *src)
         dst->needs_fg = 1;
     }

-    dst->tab_mvf_buf = av_buffer_ref(src->tab_mvf_buf);
-    if (!dst->tab_mvf_buf)
-        goto fail;
-    dst->tab_mvf = src->tab_mvf;
+    if (src->tab_mvf_buf) {
+        dst->tab_mvf_buf = av_buffer_ref(src->tab_mvf_buf);
+        if (!dst->tab_mvf_buf)
+            goto fail;
+        dst->tab_mvf = src->tab_mvf;
+    }

-    dst->rpl_tab_buf = av_buffer_ref(src->rpl_tab_buf);
-    if (!dst->rpl_tab_buf)
-        goto fail;
-    dst->rpl_tab = src->rpl_tab;
+    if (src->rpl_tab_buf) {
+        dst->rpl_tab_buf = av_buffer_ref(src->rpl_tab_buf);
+        if (!dst->rpl_tab_buf)
+            goto fail;
+        dst->rpl_tab = src->rpl_tab;
+    }

     dst->rpl_buf = av_buffer_ref(src->rpl_buf);
     if (!dst->rpl_buf)