diff mbox series

[FFmpeg-devel,v2,03/10] lavc/hevcdec: allocate local_ctx as array of structs rather than pointers

Message ID 20240528135437.24854-1-anton@khirnov.net
State New
Headers show
Series None | expand

Commit Message

Anton Khirnov May 28, 2024, 1:54 p.m. UTC
It is more efficient and easier to manage.
---
 libavcodec/hevcdec.c | 57 +++++++++++++++++++++++++-------------------
 libavcodec/hevcdec.h |  6 ++++-
 2 files changed, 37 insertions(+), 26 deletions(-)
diff mbox series

Patch

diff --git a/libavcodec/hevcdec.c b/libavcodec/hevcdec.c
index e84f45e3f8..88a481c043 100644
--- a/libavcodec/hevcdec.c
+++ b/libavcodec/hevcdec.c
@@ -2598,7 +2598,7 @@  static int hls_slice_data(HEVCContext *s)
 static int hls_decode_entry_wpp(AVCodecContext *avctxt, void *hevc_lclist,
                                 int job, int self_id)
 {
-    HEVCLocalContext *lc = ((HEVCLocalContext**)hevc_lclist)[self_id];
+    HEVCLocalContext *lc = &((HEVCLocalContext*)hevc_lclist)[self_id];
     const HEVCContext *const s = lc->parent;
     int ctb_size    = 1 << s->ps.sps->log2_ctb_size;
     int more_data   = 1;
@@ -2682,7 +2682,7 @@  static int hls_slice_data_wpp(HEVCContext *s, const H2645NAL *nal)
 {
     const uint8_t *data = nal->data;
     int length          = nal->size;
-    HEVCLocalContext *lc = s->HEVClc;
+    HEVCLocalContext *lc;
     int *ret;
     int64_t offset;
     int64_t startheader, cmpt = 0;
@@ -2696,19 +2696,31 @@  static int hls_slice_data_wpp(HEVCContext *s, const H2645NAL *nal)
         return AVERROR_INVALIDDATA;
     }
 
-    for (i = 1; i < s->threads_number; i++) {
-        if (i < s->nb_local_ctx)
-            continue;
-        s->local_ctx[i] = av_mallocz(sizeof(HEVCLocalContext));
-        if (!s->local_ctx[i])
-            return AVERROR(ENOMEM);
-        s->nb_local_ctx++;
+    if (s->threads_number > s->nb_local_ctx) {
+        HEVCLocalContext *tmp = av_malloc_array(s->threads_number, sizeof(*s->local_ctx));
 
-        s->local_ctx[i]->logctx = s->avctx;
-        s->local_ctx[i]->parent = s;
-        s->local_ctx[i]->common_cabac_state = &s->cabac;
+        if (!tmp)
+            return AVERROR(ENOMEM);
+
+        memcpy(tmp, s->local_ctx, sizeof(*s->local_ctx) * s->nb_local_ctx);
+        av_free(s->local_ctx);
+        s->local_ctx = tmp;
+        s->HEVClc    = &s->local_ctx[0];
+
+        for (unsigned i = s->nb_local_ctx; i < s->threads_number; i++) {
+            tmp = &s->local_ctx[i];
+
+            memset(tmp, 0, sizeof(*tmp));
+
+            tmp->logctx             = s->avctx;
+            tmp->parent             = s;
+            tmp->common_cabac_state = &s->cabac;
+        }
+
+        s->nb_local_ctx = s->threads_number;
     }
 
+    lc     = &s->local_ctx[0];
     offset = (lc->gb.index >> 3);
 
     for (j = 0, cmpt = 0, startheader = offset + s->sh.entry_point_offset[0]; j < nal->skipped_bytes; j++) {
@@ -2744,8 +2756,8 @@  static int hls_slice_data_wpp(HEVCContext *s, const H2645NAL *nal)
     s->data = data;
 
     for (i = 1; i < s->threads_number; i++) {
-        s->local_ctx[i]->first_qp_group = 1;
-        s->local_ctx[i]->qp_y = s->HEVClc->qp_y;
+        s->local_ctx[i].first_qp_group = 1;
+        s->local_ctx[i].qp_y = s->HEVClc->qp_y;
     }
 
     atomic_store(&s->wpp_err, 0);
@@ -3474,12 +3486,6 @@  static av_cold int hevc_decode_free(AVCodecContext *avctx)
     av_freep(&s->sh.offset);
     av_freep(&s->sh.size);
 
-    if (s->local_ctx) {
-        for (i = 1; i < s->nb_local_ctx; i++) {
-            av_freep(&s->local_ctx[i]);
-        }
-    }
-    av_freep(&s->HEVClc);
     av_freep(&s->local_ctx);
 
     ff_h2645_packet_uninit(&s->pkt);
@@ -3496,15 +3502,16 @@  static av_cold int hevc_init_context(AVCodecContext *avctx)
 
     s->avctx = avctx;
 
-    s->HEVClc = av_mallocz(sizeof(HEVCLocalContext));
-    s->local_ctx = av_mallocz(sizeof(HEVCLocalContext*) * s->threads_number);
-    if (!s->HEVClc || !s->local_ctx)
+    s->local_ctx = av_mallocz(sizeof(*s->local_ctx));
+    if (!s->local_ctx)
         return AVERROR(ENOMEM);
+    s->nb_local_ctx = 1;
+
+    s->HEVClc = &s->local_ctx[0];
+
     s->HEVClc->parent = s;
     s->HEVClc->logctx = avctx;
     s->HEVClc->common_cabac_state = &s->cabac;
-    s->local_ctx[0] = s->HEVClc;
-    s->nb_local_ctx = 1;
 
     s->output_frame = av_frame_alloc();
     if (!s->output_frame)
diff --git a/libavcodec/hevcdec.h b/libavcodec/hevcdec.h
index ca68fb54a7..5aa3d40450 100644
--- a/libavcodec/hevcdec.h
+++ b/libavcodec/hevcdec.h
@@ -439,13 +439,17 @@  typedef struct HEVCLocalContext {
     /* properties of the boundary of the current CTB for the purposes
      * of the deblocking filter */
     int boundary_flags;
+
+    // an array of these structs is used for per-thread state - pad its size
+    // to avoid false sharing
+    char padding[128];
 } HEVCLocalContext;
 
 typedef struct HEVCContext {
     const AVClass *c;  // needed by private avoptions
     AVCodecContext *avctx;
 
-    HEVCLocalContext    **local_ctx;
+    HEVCLocalContext     *local_ctx;
     unsigned           nb_local_ctx;
 
     HEVCLocalContext    *HEVClc;