From 72a5f47503338a4fff816440ad64bc62cc23a738 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Tomas=20H=C3=A4rdin?= <git@haerdin.se>
Date: Mon, 13 Jun 2022 17:04:10 +0200
Subject: [PATCH 07/13] lavc/jpeg2000*: Use ff_fast_recalloc() to eliminate
lots of allocations
---
libavcodec/jpeg2000.c | 72 +++++++++++++++++++++-------------------
libavcodec/jpeg2000.h | 9 +++++
libavcodec/jpeg2000dec.c | 28 ++++++++--------
libavcodec/jpeg2000dwt.c | 9 +++--
libavcodec/jpeg2000dwt.h | 2 ++
5 files changed, 70 insertions(+), 50 deletions(-)
@@ -52,17 +52,23 @@ static int32_t tag_tree_size(int w, int h)
}
/* allocate the memory for tag tree */
-static Jpeg2000TgtNode *ff_jpeg2000_tag_tree_init(int w, int h)
+static int ff_jpeg2000_tag_tree_init(Jpeg2000TgtNode **old, unsigned int *size, int w, int h)
{
int pw = w, ph = h;
- Jpeg2000TgtNode *res, *t, *t2;
+ Jpeg2000TgtNode *t, *t2;
int32_t tt_size;
+ size_t prod;
tt_size = tag_tree_size(w, h);
- t = res = av_calloc(tt_size, sizeof(*t));
- if (!res)
- return NULL;
+ if (av_size_mult(tt_size, sizeof(*t), &prod))
+ return AVERROR(ENOMEM);
+
+ av_fast_malloc(old, size, prod);
+ if (!*old)
+ return AVERROR(ENOMEM);
+ t = *old;
+ memset(*old, 0, prod);
while (w > 1 || h > 1) {
int i, j;
@@ -80,7 +86,7 @@ static Jpeg2000TgtNode *ff_jpeg2000_tag_tree_init(int w, int h)
t = t2;
}
t[0].parent = NULL;
- return res;
+ return 0;
}
void ff_tag_tree_zero(Jpeg2000TgtNode *t, int w, int h, int val)
@@ -316,16 +322,14 @@ static int init_prec(AVCodecContext *avctx,
/* Tag trees initialization */
- prec->cblkincl =
- ff_jpeg2000_tag_tree_init(prec->nb_codeblocks_width,
- prec->nb_codeblocks_height);
- if (!prec->cblkincl)
- return AVERROR(ENOMEM);
-
- prec->zerobits =
- ff_jpeg2000_tag_tree_init(prec->nb_codeblocks_width,
- prec->nb_codeblocks_height);
- if (!prec->zerobits)
+ if (ff_jpeg2000_tag_tree_init(&prec->cblkincl,
+ &prec->cblkincl_size,
+ prec->nb_codeblocks_width,
+ prec->nb_codeblocks_height) ||
+ ff_jpeg2000_tag_tree_init(&prec->zerobits,
+ &prec->zerobits_size,
+ prec->nb_codeblocks_width,
+ prec->nb_codeblocks_height))
return AVERROR(ENOMEM);
if (prec->nb_codeblocks_width * (uint64_t)prec->nb_codeblocks_height > INT_MAX) {
@@ -333,8 +337,7 @@ static int init_prec(AVCodecContext *avctx,
return AVERROR(ENOMEM);
}
nb_codeblocks = prec->nb_codeblocks_width * prec->nb_codeblocks_height;
- prec->cblk = av_calloc(nb_codeblocks, sizeof(*prec->cblk));
- if (!prec->cblk)
+ if (ff_fast_recalloc(&prec->cblk, &prec->cblk_size, nb_codeblocks, sizeof(*prec->cblk)))
return AVERROR(ENOMEM);
for (cblkno = 0; cblkno < nb_codeblocks; cblkno++) {
Jpeg2000Cblk *cblk = prec->cblk + cblkno;
@@ -376,6 +379,7 @@ static int init_prec(AVCodecContext *avctx,
cblk->length = 0;
cblk->npasses = 0;
if (av_codec_is_encoder(avctx->codec)) {
+ av_freep(&cblk->layers);
cblk->layers = av_calloc(codsty->nlayers, sizeof(*cblk->layers));
if (!cblk->layers)
return AVERROR(ENOMEM);
@@ -448,8 +452,7 @@ static int init_band(AVCodecContext *avctx,
return AVERROR(ENOMEM);
}
nb_precincts = reslevel->num_precincts_x * reslevel->num_precincts_y;
- band->prec = av_calloc(nb_precincts, sizeof(*band->prec));
- if (!band->prec)
+ if (ff_fast_recalloc(&band->prec, &band->prec_size, nb_precincts, sizeof(*band->prec)))
return AVERROR(ENOMEM);
for (precno = 0; precno < nb_precincts; precno++) {
@@ -471,6 +474,7 @@ int ff_jpeg2000_init_component(Jpeg2000Component *comp,
{
int reslevelno, bandno, gbandno = 0, ret, i, j;
uint32_t csize;
+ size_t prod;
if (codsty->nreslevels2decode <= 0) {
av_log(avctx, AV_LOG_ERROR, "nreslevels2decode %d invalid or uninitialized\n", codsty->nreslevels2decode);
@@ -496,19 +500,22 @@ int ff_jpeg2000_init_component(Jpeg2000Component *comp,
if (codsty->transform == FF_DWT97) {
csize += AV_INPUT_BUFFER_PADDING_SIZE / sizeof(*comp->f_data);
- comp->i_data = NULL;
- comp->f_data = av_calloc(csize, sizeof(*comp->f_data));
+ if (av_size_mult(csize, sizeof(*comp->f_data), &prod))
+ return AVERROR(ENOMEM);
+ av_fast_malloc(&comp->f_data, &comp->f_data_size, prod);
if (!comp->f_data)
return AVERROR(ENOMEM);
+ memset(comp->f_data, 0, prod);
} else {
csize += AV_INPUT_BUFFER_PADDING_SIZE / sizeof(*comp->i_data);
- comp->f_data = NULL;
- comp->i_data = av_calloc(csize, sizeof(*comp->i_data));
+ if (av_size_mult(csize, sizeof(*comp->i_data), &prod))
+ return AVERROR(ENOMEM);
+ av_fast_malloc(&comp->i_data, &comp->i_data_size, prod);
if (!comp->i_data)
return AVERROR(ENOMEM);
+ memset(comp->i_data, 0, prod);
}
- comp->reslevel = av_calloc(codsty->nreslevels, sizeof(*comp->reslevel));
- if (!comp->reslevel)
+ if (ff_fast_recalloc(&comp->reslevel, &comp->reslevel_size, codsty->nreslevels, sizeof(*comp->reslevel)))
return AVERROR(ENOMEM);
/* LOOP on resolution levels */
for (reslevelno = 0; reslevelno < codsty->nreslevels; reslevelno++) {
@@ -555,8 +562,7 @@ int ff_jpeg2000_init_component(Jpeg2000Component *comp,
reslevel->log2_prec_height) -
(reslevel->coord[1][0] >> reslevel->log2_prec_height);
- reslevel->band = av_calloc(reslevel->nbands, sizeof(*reslevel->band));
- if (!reslevel->band)
+ if (ff_fast_recalloc(&reslevel->band, &reslevel->band_size, reslevel->nbands, sizeof(*reslevel->band)))
return AVERROR(ENOMEM);
if (reslevel->num_precincts_x * (uint64_t)reslevel->num_precincts_y * reslevel->nbands > avctx->max_pixels / sizeof(*reslevel->band->prec))
@@ -599,7 +605,7 @@ void ff_jpeg2000_cleanup(Jpeg2000Component *comp, Jpeg2000CodingStyle *codsty)
{
int reslevelno, bandno, precno;
for (reslevelno = 0;
- comp->reslevel && reslevelno < codsty->nreslevels;
+ comp->reslevel && reslevelno < comp->reslevel_size/sizeof(*comp->reslevel);
reslevelno++) {
Jpeg2000ResLevel *reslevel;
@@ -607,23 +613,21 @@ void ff_jpeg2000_cleanup(Jpeg2000Component *comp, Jpeg2000CodingStyle *codsty)
continue;
reslevel = comp->reslevel + reslevelno;
- for (bandno = 0; bandno < reslevel->nbands; bandno++) {
+ for (bandno = 0; bandno < reslevel->band_size/sizeof(*reslevel->band); bandno++) {
Jpeg2000Band *band;
if (!reslevel->band)
continue;
band = reslevel->band + bandno;
- for (precno = 0; precno < reslevel->num_precincts_x * reslevel->num_precincts_y; precno++) {
+ for (precno = 0; precno < band->prec_size/sizeof(*band->prec); precno++) {
if (band->prec) {
Jpeg2000Prec *prec = band->prec + precno;
- int nb_code_blocks = prec->nb_codeblocks_height * prec->nb_codeblocks_width;
-
av_freep(&prec->zerobits);
av_freep(&prec->cblkincl);
if (prec->cblk) {
int cblkno;
- for (cblkno = 0; cblkno < nb_code_blocks; cblkno ++) {
+ for (cblkno = 0; cblkno < prec->cblk_size/sizeof(*prec->cblk); cblkno ++) {
Jpeg2000Cblk *cblk = &prec->cblk[cblkno];
av_freep(&cblk->data);
av_freep(&cblk->passes);
@@ -177,6 +177,7 @@ typedef struct Jpeg2000Cblk {
uint8_t incl;
uint16_t length;
uint16_t *lengthinc;
+ unsigned int lengthinc_size;
uint8_t nb_lengthinc;
uint8_t lblock;
uint8_t *data;
@@ -193,8 +194,11 @@ typedef struct Jpeg2000Prec {
int nb_codeblocks_width;
int nb_codeblocks_height;
Jpeg2000TgtNode *zerobits;
+ unsigned int zerobits_size;
Jpeg2000TgtNode *cblkincl;
+ unsigned int cblkincl_size;
Jpeg2000Cblk *cblk;
+ unsigned int cblk_size;
int decoded_layers;
int coord[2][2]; // border coordinates {{x0, x1}, {y0, y1}}
} Jpeg2000Prec; // precinct
@@ -205,6 +209,7 @@ typedef struct Jpeg2000Band {
int i_stepsize; // quantization stepsize
float f_stepsize; // quantization stepsize
Jpeg2000Prec *prec;
+ unsigned int prec_size;
} Jpeg2000Band; // subband
typedef struct Jpeg2000ResLevel {
@@ -213,13 +218,17 @@ typedef struct Jpeg2000ResLevel {
int num_precincts_x, num_precincts_y; // number of precincts in x/y direction
uint8_t log2_prec_width, log2_prec_height; // exponent of precinct size
Jpeg2000Band *band;
+ unsigned int band_size;
} Jpeg2000ResLevel; // resolution level
typedef struct Jpeg2000Component {
Jpeg2000ResLevel *reslevel;
+ unsigned int reslevel_size;
DWTContext dwt;
float *f_data;
+ unsigned int f_data_size;
int *i_data;
+ unsigned int i_data_size;
int coord[2][2]; // border coordinates {{x0, x1}, {y0, y1}} -- can be reduced with lowres option
int coord_o[2][2]; // border coordinates {{x0, x1}, {y0, y1}} -- original values from jpeg2000 headers
uint8_t roi_shift; // ROI scaling value for the component
@@ -79,6 +79,7 @@ typedef struct Jpeg2000TilePart {
* one per component, so tile_part elements have a size of 3 */
typedef struct Jpeg2000Tile {
Jpeg2000Component *comp;
+ unsigned int comp_size;
uint8_t properties[4];
Jpeg2000CodingStyle codsty[4];
Jpeg2000QuantStyle qntsty[4];
@@ -141,6 +142,7 @@ typedef struct Jpeg2000DecoderContext {
int curtileno;
Jpeg2000Tile *tile;
+ unsigned int tile_size;
Jpeg2000DSPContext dsp;
/*options parameters*/
@@ -380,8 +382,7 @@ static int get_siz(Jpeg2000DecoderContext *s)
return AVERROR(EINVAL);
}
- s->tile = av_calloc(s->numXtiles * s->numYtiles, sizeof(*s->tile));
- if (!s->tile) {
+ if (ff_fast_recalloc(&s->tile, &s->tile_size, s->numXtiles * s->numYtiles, sizeof(*s->tile))) {
s->numXtiles = s->numYtiles = 0;
return AVERROR(ENOMEM);
}
@@ -389,8 +390,7 @@ static int get_siz(Jpeg2000DecoderContext *s)
for (i = 0; i < s->numXtiles * s->numYtiles; i++) {
Jpeg2000Tile *tile = s->tile + i;
- tile->comp = av_mallocz(s->ncomponents * sizeof(*tile->comp));
- if (!tile->comp)
+ if (ff_fast_recalloc(&tile->comp, &tile->comp_size, s->ncomponents, sizeof(*tile->comp)))
return AVERROR(ENOMEM);
}
@@ -1196,9 +1196,7 @@ static int jpeg2000_decode_packet(Jpeg2000DecoderContext *s, Jpeg2000Tile *tile,
cblk->nb_lengthinc = 0;
cblk->nb_terminationsinc = 0;
- av_free(cblk->lengthinc);
- cblk->lengthinc = av_calloc(newpasses, sizeof(*cblk->lengthinc));
- if (!cblk->lengthinc)
+ if (ff_fast_recalloc(&cblk->lengthinc, &cblk->lengthinc_size, newpasses, sizeof(*cblk->lengthinc)))
return AVERROR(ENOMEM);
tmp = av_realloc_array(cblk->data_start, cblk->nb_terminations + newpasses + 1, sizeof(*cblk->data_start));
if (!tmp)
@@ -1292,7 +1290,6 @@ static int jpeg2000_decode_packet(Jpeg2000DecoderContext *s, Jpeg2000Tile *tile,
cblk->data_start[cblk->nb_terminations] = cblk->length;
}
}
- av_freep(&cblk->lengthinc);
}
}
// Save state of stream
@@ -2166,12 +2163,13 @@ static int jpeg2000_mct_write_frame(AVCodecContext *avctx, void *td,
return 0;
}
-static void jpeg2000_dec_cleanup(Jpeg2000DecoderContext *s)
+static void jpeg2000_dec_cleanup(Jpeg2000DecoderContext *s, int close)
{
int tileno, compno;
- for (tileno = 0; tileno < s->numXtiles * s->numYtiles; tileno++) {
+ if (close) {
+ for (tileno = 0; tileno < s->tile_size/sizeof(*s->tile); tileno++) {
if (s->tile[tileno].comp) {
- for (compno = 0; compno < s->ncomponents; compno++) {
+ for (compno = 0; compno < s->tile[tileno].comp_size/sizeof(*s->tile[tileno].comp); compno++) {
Jpeg2000Component *comp = s->tile[tileno].comp + compno;
Jpeg2000CodingStyle *codsty = s->tile[tileno].codsty + compno;
@@ -2182,10 +2180,11 @@ static void jpeg2000_dec_cleanup(Jpeg2000DecoderContext *s)
s->tile[tileno].packed_headers_size = 0;
}
}
+ av_freep(&s->tile);
+ }
av_freep(&s->packed_headers);
s->packed_headers_size = 0;
memset(&s->packed_headers_stream, 0, sizeof(s->packed_headers_stream));
- av_freep(&s->tile);
memset(s->codsty, 0, sizeof(s->codsty));
memset(s->qntsty, 0, sizeof(s->qntsty));
memset(s->properties, 0, sizeof(s->properties));
@@ -2689,7 +2688,7 @@ static int jpeg2000_decode_frame(AVCodecContext *avctx, AVFrame *picture,
avctx->execute2(avctx, jpeg2000_mct_write_frame, picture, NULL, s->numXtiles * s->numYtiles);
- jpeg2000_dec_cleanup(s);
+ jpeg2000_dec_cleanup(s, 0);
*got_frame = 1;
@@ -2702,7 +2701,7 @@ static int jpeg2000_decode_frame(AVCodecContext *avctx, AVFrame *picture,
return bytestream2_tell(&s->g);
end:
- jpeg2000_dec_cleanup(s);
+ jpeg2000_dec_cleanup(s, 0);
return ret;
}
@@ -2712,6 +2711,7 @@ static av_cold int jpeg2000_decode_close(AVCodecContext *avctx)
av_freep(&s->idwt);
av_freep(&s->cb);
+ jpeg2000_dec_cleanup(s, 1);
return 0;
}
@@ -531,6 +531,7 @@ int ff_jpeg2000_dwt_init(DWTContext *s, int border[2][2],
{
int i, j, lev = decomp_levels,
b[2][2];
+ size_t prod;
s->ndeclevels = decomp_levels;
s->type = type;
@@ -556,11 +557,15 @@ int ff_jpeg2000_dwt_init(DWTContext *s, int border[2][2],
}
if (type == FF_DWT97) {
- s->f_linebuf = av_malloc_array(s->linesize, s->max_slices*sizeof(*s->f_linebuf));
+ if (av_size_mult(s->linesize, s->max_slices*sizeof(*s->f_linebuf), &prod))
+ return AVERROR(ENOMEM);
+ av_fast_malloc(&s->f_linebuf, &s->f_linebuf_size, prod);
if (!s->f_linebuf)
return AVERROR(ENOMEM);
} else {
- s->i_linebuf = av_malloc_array(s->linesize, s->max_slices*sizeof(*s->i_linebuf));
+ if (av_size_mult(s->linesize, s->max_slices*sizeof(*s->i_linebuf), &prod))
+ return AVERROR(ENOMEM);
+ av_fast_malloc(&s->i_linebuf, &s->i_linebuf_size, prod);
if (!s->i_linebuf)
return AVERROR(ENOMEM);
}
@@ -48,7 +48,9 @@ typedef struct DWTContext {
uint8_t ndeclevels; ///< number of decomposition levels
uint8_t type; ///< 0 for 9/7; 1 for 5/3
int32_t *i_linebuf; ///< int buffer used by transform
+ unsigned int i_linebuf_size;
float *f_linebuf; ///< float buffer used by transform
+ unsigned int f_linebuf_size;
int max_slices;
int linesize;
} DWTContext;
--
2.30.2