diff mbox series

[FFmpeg-devel,05/11] lavc/jpeg2000*: Use av_realloc_array_reuse() and av_reallocz_array_reuse() to eliminate lots of allocations

Message ID 9e78ef32ebe31cebd72f0ef31a42892ccb9017ab.camel@haerdin.se
State New
Headers show
Series [FFmpeg-devel,01/11] lavc/jpeg2000dec: Finer granularity threading | expand

Checks

Context Check Description
andriy/configure_x86 warning Failed to apply patch

Commit Message

Tomas Härdin Sept. 28, 2022, 10:06 a.m. UTC

diff mbox series

Patch

From 5a5986c29d62933f3f2cd2259becb763f3719eaa Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Tomas=20H=C3=A4rdin?= <git@haerdin.se>
Date: Mon, 13 Jun 2022 17:04:10 +0200
Subject: [PATCH 05/11] lavc/jpeg2000*: Use av_realloc_array_reuse() and
 av_reallocz_array_reuse() to eliminate lots of allocations

---
 libavcodec/jpeg2000.c    | 79 +++++++++++++++++++++-------------------
 libavcodec/jpeg2000.h    | 10 +++++
 libavcodec/jpeg2000dec.c | 48 ++++++++++--------------
 libavcodec/jpeg2000dwt.c |  9 ++++-
 libavcodec/jpeg2000dwt.h |  2 +
 5 files changed, 80 insertions(+), 68 deletions(-)

diff --git a/libavcodec/jpeg2000.c b/libavcodec/jpeg2000.c
index 945b787565..afff9809e4 100644
--- a/libavcodec/jpeg2000.c
+++ b/libavcodec/jpeg2000.c
@@ -52,17 +52,23 @@  static int32_t tag_tree_size(int w, int h)
 }
 
 /* allocate the memory for tag tree */
-static Jpeg2000TgtNode *ff_jpeg2000_tag_tree_init(int w, int h)
+static int ff_jpeg2000_tag_tree_init(Jpeg2000TgtNode **old, unsigned int *size, int w, int h)
 {
     int pw = w, ph = h;
-    Jpeg2000TgtNode *res, *t, *t2;
+    Jpeg2000TgtNode *t, *t2;
     int32_t tt_size;
+    size_t prod;
 
     tt_size = tag_tree_size(w, h);
 
-    t = res = av_calloc(tt_size, sizeof(*t));
-    if (!res)
-        return NULL;
+    if (av_size_mult(tt_size, sizeof(*t), &prod))
+        return AVERROR(ENOMEM);
+
+    av_fast_malloc(old, size, prod);
+    if (!*old)
+        return AVERROR(ENOMEM);
+    t = *old;
+    memset(*old, 0, prod);
 
     while (w > 1 || h > 1) {
         int i, j;
@@ -80,7 +86,7 @@  static Jpeg2000TgtNode *ff_jpeg2000_tag_tree_init(int w, int h)
         t = t2;
     }
     t[0].parent = NULL;
-    return res;
+    return 0;
 }
 
 void ff_tag_tree_zero(Jpeg2000TgtNode *t, int w, int h, int val)
@@ -278,7 +284,7 @@  static int init_prec(AVCodecContext *avctx,
                      int log2_band_prec_height)
 {
     Jpeg2000Prec *prec = band->prec + precno;
-    int nb_codeblocks, cblkno;
+    int nb_codeblocks, cblkno, ret;
 
     prec->decoded_layers = 0;
 
@@ -316,25 +322,22 @@  static int init_prec(AVCodecContext *avctx,
 
 
     /* Tag trees initialization */
-    prec->cblkincl =
-        ff_jpeg2000_tag_tree_init(prec->nb_codeblocks_width,
-                                  prec->nb_codeblocks_height);
-    if (!prec->cblkincl)
-        return AVERROR(ENOMEM);
-
-    prec->zerobits =
-        ff_jpeg2000_tag_tree_init(prec->nb_codeblocks_width,
-                                  prec->nb_codeblocks_height);
-    if (!prec->zerobits)
-        return AVERROR(ENOMEM);
+    if ((ret = ff_jpeg2000_tag_tree_init(&prec->cblkincl,
+                                         &prec->cblkincl_size,
+                                         prec->nb_codeblocks_width,
+                                         prec->nb_codeblocks_height)) < 0 ||
+        (ret = ff_jpeg2000_tag_tree_init(&prec->zerobits,
+                                         &prec->zerobits_size,
+                                         prec->nb_codeblocks_width,
+                                         prec->nb_codeblocks_height)) < 0)
+        return ret;
 
     if (prec->nb_codeblocks_width * (uint64_t)prec->nb_codeblocks_height > INT_MAX) {
         prec->cblk = NULL;
         return AVERROR(ENOMEM);
     }
     nb_codeblocks = prec->nb_codeblocks_width * prec->nb_codeblocks_height;
-    prec->cblk = av_calloc(nb_codeblocks, sizeof(*prec->cblk));
-    if (!prec->cblk)
+    if (av_reallocz_array_reuse(&prec->cblk, &prec->cblk_allocated, nb_codeblocks, INT_MAX, sizeof(*prec->cblk)))
         return AVERROR(ENOMEM);
     for (cblkno = 0; cblkno < nb_codeblocks; cblkno++) {
         Jpeg2000Cblk *cblk = prec->cblk + cblkno;
@@ -376,6 +379,7 @@  static int init_prec(AVCodecContext *avctx,
         cblk->length    = 0;
         cblk->npasses   = 0;
         if (av_codec_is_encoder(avctx->codec)) {
+            av_freep(&cblk->layers);
             cblk->layers = av_calloc(codsty->nlayers, sizeof(*cblk->layers));
             if (!cblk->layers)
                 return AVERROR(ENOMEM);
@@ -448,8 +452,7 @@  static int init_band(AVCodecContext *avctx,
         return AVERROR(ENOMEM);
     }
     nb_precincts = reslevel->num_precincts_x * reslevel->num_precincts_y;
-    band->prec = av_calloc(nb_precincts, sizeof(*band->prec));
-    if (!band->prec)
+    if (av_reallocz_array_reuse(&band->prec, &band->prec_allocated, nb_precincts, INT_MAX, sizeof(*band->prec)))
         return AVERROR(ENOMEM);
 
     for (precno = 0; precno < nb_precincts; precno++) {
@@ -471,6 +474,7 @@  int ff_jpeg2000_init_component(Jpeg2000Component *comp,
 {
     int reslevelno, bandno, gbandno = 0, ret, i, j;
     uint32_t csize;
+    size_t prod;
 
     if (codsty->nreslevels2decode <= 0) {
         av_log(avctx, AV_LOG_ERROR, "nreslevels2decode %d invalid or uninitialized\n", codsty->nreslevels2decode);
@@ -496,19 +500,22 @@  int ff_jpeg2000_init_component(Jpeg2000Component *comp,
 
     if (codsty->transform == FF_DWT97) {
         csize += AV_INPUT_BUFFER_PADDING_SIZE / sizeof(*comp->f_data);
-        comp->i_data = NULL;
-        comp->f_data = av_calloc(csize, sizeof(*comp->f_data));
+        if (av_size_mult(csize, sizeof(*comp->f_data), &prod))
+            return AVERROR(ENOMEM);
+        av_fast_malloc(&comp->f_data, &comp->f_data_size, prod);
         if (!comp->f_data)
             return AVERROR(ENOMEM);
+        memset(comp->f_data, 0, prod);
     } else {
         csize += AV_INPUT_BUFFER_PADDING_SIZE / sizeof(*comp->i_data);
-        comp->f_data = NULL;
-        comp->i_data = av_calloc(csize, sizeof(*comp->i_data));
+        if (av_size_mult(csize, sizeof(*comp->i_data), &prod))
+            return AVERROR(ENOMEM);
+        av_fast_malloc(&comp->i_data, &comp->i_data_size, prod);
         if (!comp->i_data)
             return AVERROR(ENOMEM);
+        memset(comp->i_data, 0, prod);
     }
-    comp->reslevel = av_calloc(codsty->nreslevels, sizeof(*comp->reslevel));
-    if (!comp->reslevel)
+    if (av_reallocz_array_reuse(&comp->reslevel, &comp->reslevel_allocated, codsty->nreslevels, INT_MAX, sizeof(*comp->reslevel)))
         return AVERROR(ENOMEM);
     /* LOOP on resolution levels */
     for (reslevelno = 0; reslevelno < codsty->nreslevels; reslevelno++) {
@@ -555,8 +562,7 @@  int ff_jpeg2000_init_component(Jpeg2000Component *comp,
                                         reslevel->log2_prec_height) -
                 (reslevel->coord[1][0] >> reslevel->log2_prec_height);
 
-        reslevel->band = av_calloc(reslevel->nbands, sizeof(*reslevel->band));
-        if (!reslevel->band)
+        if (av_reallocz_array_reuse(&reslevel->band, &reslevel->band_allocated, reslevel->nbands, INT_MAX, sizeof(*reslevel->band)))
             return AVERROR(ENOMEM);
 
         if (reslevel->num_precincts_x * (uint64_t)reslevel->num_precincts_y * reslevel->nbands > avctx->max_pixels / sizeof(*reslevel->band->prec))
@@ -597,9 +603,9 @@  void ff_jpeg2000_reinit(Jpeg2000Component *comp, Jpeg2000CodingStyle *codsty)
 
 void ff_jpeg2000_cleanup(Jpeg2000Component *comp, Jpeg2000CodingStyle *codsty)
 {
-    int reslevelno, bandno, precno;
+    size_t reslevelno, bandno, precno;
     for (reslevelno = 0;
-         comp->reslevel && reslevelno < codsty->nreslevels;
+         comp->reslevel && reslevelno < comp->reslevel_allocated;
          reslevelno++) {
         Jpeg2000ResLevel *reslevel;
 
@@ -607,23 +613,20 @@  void ff_jpeg2000_cleanup(Jpeg2000Component *comp, Jpeg2000CodingStyle *codsty)
             continue;
 
         reslevel = comp->reslevel + reslevelno;
-        for (bandno = 0; bandno < reslevel->nbands; bandno++) {
+        for (bandno = 0; bandno < reslevel->band_allocated; bandno++) {
             Jpeg2000Band *band;
 
             if (!reslevel->band)
                 continue;
 
             band = reslevel->band + bandno;
-            for (precno = 0; precno < reslevel->num_precincts_x * reslevel->num_precincts_y; precno++) {
+            for (precno = 0; precno < band->prec_allocated; precno++) {
                 if (band->prec) {
                     Jpeg2000Prec *prec = band->prec + precno;
-                    int nb_code_blocks = prec->nb_codeblocks_height * prec->nb_codeblocks_width;
-
                     av_freep(&prec->zerobits);
                     av_freep(&prec->cblkincl);
                     if (prec->cblk) {
-                        int cblkno;
-                        for (cblkno = 0; cblkno < nb_code_blocks; cblkno ++) {
+                        for (size_t cblkno = 0; cblkno < prec->cblk_allocated; cblkno ++) {
                             Jpeg2000Cblk *cblk = &prec->cblk[cblkno];
                             av_freep(&cblk->data);
                             av_freep(&cblk->passes);
diff --git a/libavcodec/jpeg2000.h b/libavcodec/jpeg2000.h
index 389813a9b9..6594d8e5cb 100644
--- a/libavcodec/jpeg2000.h
+++ b/libavcodec/jpeg2000.h
@@ -179,6 +179,7 @@  typedef struct Jpeg2000Cblk {
     uint8_t incl;
     uint16_t length;
     uint16_t *lengthinc;
+    size_t lengthinc_allocated;
     uint8_t nb_lengthinc;
     uint8_t lblock;
     uint8_t *data;
@@ -186,6 +187,7 @@  typedef struct Jpeg2000Cblk {
     int nb_terminations;
     int nb_terminationsinc;
     int *data_start;
+    size_t data_start_allocated;
     Jpeg2000Pass *passes;
     Jpeg2000Layer *layers;
     int coord[2][2]; // border coordinates {{x0, x1}, {y0, y1}}
@@ -195,8 +197,11 @@  typedef struct Jpeg2000Prec {
     int nb_codeblocks_width;
     int nb_codeblocks_height;
     Jpeg2000TgtNode *zerobits;
+    unsigned int zerobits_size;
     Jpeg2000TgtNode *cblkincl;
+    unsigned int cblkincl_size;
     Jpeg2000Cblk *cblk;
+    size_t cblk_allocated;
     int decoded_layers;
     int coord[2][2]; // border coordinates {{x0, x1}, {y0, y1}}
 } Jpeg2000Prec; // precinct
@@ -207,6 +212,7 @@  typedef struct Jpeg2000Band {
     int i_stepsize; // quantization stepsize
     float f_stepsize; // quantization stepsize
     Jpeg2000Prec *prec;
+    size_t prec_allocated;
 } Jpeg2000Band; // subband
 
 typedef struct Jpeg2000ResLevel {
@@ -215,13 +221,17 @@  typedef struct Jpeg2000ResLevel {
     int num_precincts_x, num_precincts_y; // number of precincts in x/y direction
     uint8_t log2_prec_width, log2_prec_height; // exponent of precinct size
     Jpeg2000Band *band;
+    size_t band_allocated;
 } Jpeg2000ResLevel; // resolution level
 
 typedef struct Jpeg2000Component {
     Jpeg2000ResLevel *reslevel;
+    size_t reslevel_allocated;
     DWTContext dwt;
     float *f_data;
+    unsigned int f_data_size;
     int *i_data;
+    unsigned int i_data_size;
     int coord[2][2];   // border coordinates {{x0, x1}, {y0, y1}} -- can be reduced with lowres option
     int coord_o[2][2]; // border coordinates {{x0, x1}, {y0, y1}} -- original values from jpeg2000 headers
     uint8_t roi_shift; // ROI scaling value for the component
diff --git a/libavcodec/jpeg2000dec.c b/libavcodec/jpeg2000dec.c
index 00aa73e261..49a815a9b0 100644
--- a/libavcodec/jpeg2000dec.c
+++ b/libavcodec/jpeg2000dec.c
@@ -79,6 +79,7 @@  typedef struct Jpeg2000TilePart {
  * one per component, so tile_part elements have a size of 3 */
 typedef struct Jpeg2000Tile {
     Jpeg2000Component   *comp;
+    size_t              comp_allocated;
     uint8_t             properties[4];
     Jpeg2000CodingStyle codsty[4];
     Jpeg2000QuantStyle  qntsty[4];
@@ -141,6 +142,7 @@  typedef struct Jpeg2000DecoderContext {
     int             curtileno;
 
     Jpeg2000Tile    *tile;
+    size_t          tile_allocated;
     Jpeg2000DSPContext dsp;
 
     /*options parameters*/
@@ -380,8 +382,7 @@  static int get_siz(Jpeg2000DecoderContext *s)
         return AVERROR(EINVAL);
     }
 
-    s->tile = av_calloc(s->numXtiles * s->numYtiles, sizeof(*s->tile));
-    if (!s->tile) {
+    if (av_reallocz_array_reuse(&s->tile, &s->tile_allocated, s->numXtiles * s->numYtiles, INT_MAX, sizeof(*s->tile))) {
         s->numXtiles = s->numYtiles = 0;
         return AVERROR(ENOMEM);
     }
@@ -389,8 +390,7 @@  static int get_siz(Jpeg2000DecoderContext *s)
     for (i = 0; i < s->numXtiles * s->numYtiles; i++) {
         Jpeg2000Tile *tile = s->tile + i;
 
-        tile->comp = av_mallocz(s->ncomponents * sizeof(*tile->comp));
-        if (!tile->comp)
+        if (av_reallocz_array_reuse(&tile->comp, &tile->comp_allocated, s->ncomponents, INT_MAX, sizeof(*tile->comp)))
             return AVERROR(ENOMEM);
     }
 
@@ -1160,7 +1160,6 @@  static int jpeg2000_decode_packet(Jpeg2000DecoderContext *s, Jpeg2000Tile *tile,
         for (cblkno = 0; cblkno < nb_code_blocks; cblkno++) {
             Jpeg2000Cblk *cblk = prec->cblk + cblkno;
             int incl, newpasses, llen;
-            void *tmp;
 
             if (cblk->npasses)
                 incl = get_bits(s, 1);
@@ -1200,14 +1199,10 @@  static int jpeg2000_decode_packet(Jpeg2000DecoderContext *s, Jpeg2000Tile *tile,
 
             cblk->nb_lengthinc = 0;
             cblk->nb_terminationsinc = 0;
-            av_free(cblk->lengthinc);
-            cblk->lengthinc = av_calloc(newpasses, sizeof(*cblk->lengthinc));
-            if (!cblk->lengthinc)
+            if (av_realloc_array_reuse(&cblk->lengthinc, &cblk->lengthinc_allocated, newpasses, INT_MAX, sizeof(*cblk->lengthinc)) ||
+                av_realloc_array_reuse(&cblk->data_start, &cblk->data_start_allocated, cblk->nb_terminations + newpasses + 1, INT_MAX, sizeof(*cblk->data_start)))
                 return AVERROR(ENOMEM);
-            tmp = av_realloc_array(cblk->data_start, cblk->nb_terminations + newpasses + 1, sizeof(*cblk->data_start));
-            if (!tmp)
-                return AVERROR(ENOMEM);
-            cblk->data_start = tmp;
+
             do {
                 int newpasses1 = 0;
 
@@ -1296,7 +1291,6 @@  static int jpeg2000_decode_packet(Jpeg2000DecoderContext *s, Jpeg2000Tile *tile,
                     cblk->data_start[cblk->nb_terminations] = cblk->length;
                 }
             }
-            av_freep(&cblk->lengthinc);
         }
     }
     // Save state of stream
@@ -2172,24 +2166,9 @@  static int jpeg2000_mct_write_frame(AVCodecContext *avctx, void *td,
 
 static void jpeg2000_dec_cleanup(Jpeg2000DecoderContext *s)
 {
-    int tileno, compno;
-    for (tileno = 0; tileno < s->numXtiles * s->numYtiles; tileno++) {
-        if (s->tile[tileno].comp) {
-            for (compno = 0; compno < s->ncomponents; compno++) {
-                Jpeg2000Component *comp     = s->tile[tileno].comp   + compno;
-                Jpeg2000CodingStyle *codsty = s->tile[tileno].codsty + compno;
-
-                ff_jpeg2000_cleanup(comp, codsty);
-            }
-            av_freep(&s->tile[tileno].comp);
-            av_freep(&s->tile[tileno].packed_headers);
-            s->tile[tileno].packed_headers_size = 0;
-        }
-    }
     av_freep(&s->packed_headers);
     s->packed_headers_size = 0;
     memset(&s->packed_headers_stream, 0, sizeof(s->packed_headers_stream));
-    av_freep(&s->tile);
     memset(s->codsty, 0, sizeof(s->codsty));
     memset(s->qntsty, 0, sizeof(s->qntsty));
     memset(s->properties, 0, sizeof(s->properties));
@@ -2726,6 +2705,19 @@  static av_cold int jpeg2000_decode_close(AVCodecContext *avctx)
 {
     Jpeg2000DecoderContext *s = avctx->priv_data;
 
+    for (size_t tileno = 0; tileno < s->tile_allocated; tileno++) {
+        if (s->tile[tileno].comp) {
+            for (size_t compno = 0; compno < s->tile[tileno].comp_allocated; compno++) {
+                Jpeg2000Component *comp     = s->tile[tileno].comp   + compno;
+                Jpeg2000CodingStyle *codsty = s->tile[tileno].codsty + compno;
+
+                ff_jpeg2000_cleanup(comp, codsty);
+            }
+            av_freep(&s->tile[tileno].comp);
+            av_freep(&s->tile[tileno].packed_headers);
+        }
+    }
+    av_freep(&s->tile);
     av_freep(&s->idwt);
     av_freep(&s->cb);
 
diff --git a/libavcodec/jpeg2000dwt.c b/libavcodec/jpeg2000dwt.c
index 921461b6d7..f3ddefe48f 100644
--- a/libavcodec/jpeg2000dwt.c
+++ b/libavcodec/jpeg2000dwt.c
@@ -531,6 +531,7 @@  int ff_jpeg2000_dwt_init(DWTContext *s, int border[2][2],
 {
     int i, j, lev = decomp_levels,
         b[2][2];
+    size_t prod;
 
     s->ndeclevels = decomp_levels;
     s->type       = type;
@@ -556,11 +557,15 @@  int ff_jpeg2000_dwt_init(DWTContext *s, int border[2][2],
         }
 
     if (type == FF_DWT97) {
-        s->f_linebuf = av_malloc_array(s->linesize, s->max_slices*sizeof(*s->f_linebuf));
+        if (av_size_mult(s->linesize, s->max_slices*sizeof(*s->f_linebuf), &prod))
+            return AVERROR(ENOMEM);
+        av_fast_malloc(&s->f_linebuf, &s->f_linebuf_size, prod);
         if (!s->f_linebuf)
             return AVERROR(ENOMEM);
     } else {
-        s->i_linebuf = av_malloc_array(s->linesize, s->max_slices*sizeof(*s->i_linebuf));
+        if (av_size_mult(s->linesize, s->max_slices*sizeof(*s->i_linebuf), &prod))
+            return AVERROR(ENOMEM);
+        av_fast_malloc(&s->i_linebuf, &s->i_linebuf_size, prod);
         if (!s->i_linebuf)
             return AVERROR(ENOMEM);
     }
diff --git a/libavcodec/jpeg2000dwt.h b/libavcodec/jpeg2000dwt.h
index d5e94c9916..fb6fc8f121 100644
--- a/libavcodec/jpeg2000dwt.h
+++ b/libavcodec/jpeg2000dwt.h
@@ -48,7 +48,9 @@  typedef struct DWTContext {
     uint8_t ndeclevels;                  ///< number of decomposition levels
     uint8_t type;                        ///< 0 for 9/7; 1 for 5/3
     int32_t *i_linebuf;                  ///< int buffer used by transform
+    unsigned int i_linebuf_size;
     float   *f_linebuf;                  ///< float buffer used by transform
+    unsigned int f_linebuf_size;
     int max_slices;
     int linesize;
 } DWTContext;
-- 
2.30.2