diff mbox series

[FFmpeg-devel,11/13] lavc/jpeg2000: Minimize calls to av_codec_is_encoder()

Message ID 3aeedc34b26bed5f82cadfc39fe7cce968bdc698.camel@acc.umu.se
State New
Headers show
Series [FFmpeg-devel,01/13] lavc/jpeg2000dec: Finer granularity threading | expand

Checks

Context Check Description
andriy/configure_x86 warning Failed to apply patch

Commit Message

Tomas Härdin June 14, 2022, 2:44 p.m. UTC

Comments

Andreas Rheinhardt June 14, 2022, 3:04 p.m. UTC | #1
Tomas Härdin:
> 
> 

Why call it at all? Why not just add a new parameter to
ff_jpeg2000_init_component that is always set to 1 when called from the
encoder and 0 when called from the decoder?
(And is this really a bottleneck?)

- Andreas
Tomas Härdin June 15, 2022, 10:20 a.m. UTC | #2
tis 2022-06-14 klockan 17:04 +0200 skrev Andreas Rheinhardt:
> Tomas Härdin:
> > 
> > 
> 
> Why call it at all? Why not just add a new parameter to
> ff_jpeg2000_init_component that is always set to 1 when called from
> the
> encoder and 0 when called from the decoder?

Oh yeah that's even simpler

> (And is this really a bottleneck?)

Callgrind certainly thinks so. It's called hundreds of thousands of
times per frame. Remember that this is in the serial part of the code
so any savings there get amplified -threads fold fps-wise. init_tile()
accounts for a mere 0.8 seconds out of 59.8 partly thanks to this.
Here's a rough breakdown for the curious with -threads 64:

 0.8 everything up to and including init_tiles()
23.0 jpeg2000_read_bitstream_packets()
 0.1 jpeg2000_setup_cbs()
24.1 jpeg2000_decode_cb()
 9.8 jpeg2000_idwt()
 2.0 jpeg2000_mct_write_frame()
 0.0 jpeg2000_dec_cleanup()

jpeg2000_read_bitstream_packets() is obviously the main thing to focus
on for anyone wanting to bump the speed up even more. But it's nasty.
Maybe it could be tile-threaded, but it takes some doing..

/Tomas
diff mbox series

Patch

From 5b492d4e92a11946fd7425497205b1842fa1912c Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Tomas=20H=C3=A4rdin?= <git@haerdin.se>
Date: Tue, 14 Jun 2022 10:57:45 +0200
Subject: [PATCH 11/13] lavc/jpeg2000: Minimize calls to av_codec_is_encoder()

---
 libavcodec/jpeg2000.c | 19 +++++++++++--------
 1 file changed, 11 insertions(+), 8 deletions(-)

diff --git a/libavcodec/jpeg2000.c b/libavcodec/jpeg2000.c
index 8ee50b77c5..2e3c33303b 100644
--- a/libavcodec/jpeg2000.c
+++ b/libavcodec/jpeg2000.c
@@ -247,7 +247,7 @@  static void init_band_stepsize(AVCodecContext *avctx,
                                Jpeg2000CodingStyle *codsty,
                                Jpeg2000QuantStyle *qntsty,
                                int bandno, int gbandno, int reslevelno,
-                               int cbps)
+                               int cbps, int is_enc)
 {
     /* TODO: Implementation of quantization step not finished,
      * see ISO/IEC 15444-1:2002 E.1 and A.6.4. */
@@ -305,7 +305,7 @@  static void init_band_stepsize(AVCodecContext *avctx,
 
     /* FIXME: In OpenJPEG code stepsize = stepsize * 0.5. Why?
      * If not set output of entropic decoder is not correct. */
-    if (!av_codec_is_encoder(avctx->codec))
+    if (!is_enc)
         band->f_stepsize *= 0.5;
 }
 
@@ -316,7 +316,8 @@  static int init_prec(AVCodecContext *avctx,
                      Jpeg2000CodingStyle *codsty,
                      int precno, int bandno, int reslevelno,
                      int log2_band_prec_width,
-                     int log2_band_prec_height)
+                     int log2_band_prec_height,
+                     int is_enc)
 {
     Jpeg2000Prec *prec = band->prec + precno;
     int nb_codeblocks, cblkno;
@@ -413,7 +414,7 @@  static int init_prec(AVCodecContext *avctx,
         cblk->lblock    = 3;
         cblk->length    = 0;
         cblk->npasses   = 0;
-        if (av_codec_is_encoder(avctx->codec)) {
+        if (is_enc) {
             av_freep(&cblk->layers);
             cblk->layers = av_calloc(codsty->nlayers, sizeof(*cblk->layers));
             if (!cblk->layers)
@@ -430,7 +431,7 @@  static int init_band(AVCodecContext *avctx,
                      Jpeg2000CodingStyle *codsty,
                      Jpeg2000QuantStyle *qntsty,
                      int bandno, int gbandno, int reslevelno,
-                     int cbps, int dx, int dy)
+                     int cbps, int dx, int dy, int is_enc)
 {
     Jpeg2000Band *band = reslevel->band + bandno;
     uint8_t log2_band_prec_width, log2_band_prec_height;
@@ -439,7 +440,7 @@  static int init_band(AVCodecContext *avctx,
     int nb_precincts;
     int i, j, ret;
 
-    init_band_stepsize(avctx, band, codsty, qntsty, bandno, gbandno, reslevelno, cbps);
+    init_band_stepsize(avctx, band, codsty, qntsty, bandno, gbandno, reslevelno, cbps, is_enc);
 
     /* computation of tbx_0, tbx_1, tby_0, tby_1
      * see ISO/IEC 15444-1:2002 B.5 eq. B-15 and tbl B.1
@@ -493,7 +494,8 @@  static int init_band(AVCodecContext *avctx,
     for (precno = 0; precno < nb_precincts; precno++) {
         ret = init_prec(avctx, band, reslevel, comp, codsty,
                         precno, bandno, reslevelno,
-                        log2_band_prec_width, log2_band_prec_height);
+                        log2_band_prec_width, log2_band_prec_height,
+                        is_enc);
         if (ret < 0)
             return ret;
     }
@@ -510,6 +512,7 @@  int ff_jpeg2000_init_component(Jpeg2000Component *comp,
     int reslevelno, bandno, gbandno = 0, ret, i, j;
     uint32_t csize;
     size_t prod;
+    int is_enc = av_codec_is_encoder(avctx->codec);
 
     if (codsty->nreslevels2decode <= 0) {
         av_log(avctx, AV_LOG_ERROR, "nreslevels2decode %d invalid or uninitialized\n", codsty->nreslevels2decode);
@@ -607,7 +610,7 @@  int ff_jpeg2000_init_component(Jpeg2000Component *comp,
             ret = init_band(avctx, reslevel,
                             comp, codsty, qntsty,
                             bandno, gbandno, reslevelno,
-                            cbps, dx, dy);
+                            cbps, dx, dy, is_enc);
             if (ret < 0)
                 return ret;
         }
-- 
2.30.2