diff mbox series

[FFmpeg-devel,03/11] lavc/jpeg2000dwt: Implement sliced transforms

Message ID bf14453b049acb93cab0f4fad73902311c30e645.camel@haerdin.se
State New
Headers show
Series [FFmpeg-devel,01/11] lavc/jpeg2000dec: Finer granularity threading | expand

Checks

Context Check Description
andriy/configure_x86 warning Failed to apply patch

Commit Message

Tomas Härdin Sept. 28, 2022, 10:05 a.m. UTC

diff mbox series

Patch

From 159d744f09f39e3350ac39ac5d05feaca22103af Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Tomas=20H=C3=A4rdin?= <git@haerdin.se>
Date: Fri, 10 Jun 2022 17:18:14 +0200
Subject: [PATCH 03/11] lavc/jpeg2000dwt: Implement sliced transforms

lavc/tests/jpeg2000dwt tests this.
---
 libavcodec/j2kenc.c            |   3 +-
 libavcodec/jpeg2000.c          |   5 +-
 libavcodec/jpeg2000.h          |   2 +-
 libavcodec/jpeg2000dec.c       | 109 +++++++++++++++++++++++----
 libavcodec/jpeg2000dwt.c       | 132 +++++++++++++++++++--------------
 libavcodec/jpeg2000dwt.h       |   6 +-
 libavcodec/tests/jpeg2000dwt.c |  15 ++--
 7 files changed, 189 insertions(+), 83 deletions(-)

diff --git a/libavcodec/j2kenc.c b/libavcodec/j2kenc.c
index e883d5deb7..cd325e94e0 100644
--- a/libavcodec/j2kenc.c
+++ b/libavcodec/j2kenc.c
@@ -494,7 +494,8 @@  static int init_tiles(Jpeg2000EncoderContext *s)
                                                 s->cbps[compno],
                                                 compno?1<<s->chroma_shift[0]:1,
                                                 compno?1<<s->chroma_shift[1]:1,
-                                                s->avctx
+                                                s->avctx,
+                                                1
                                                )) < 0)
                     return ret;
             }
diff --git a/libavcodec/jpeg2000.c b/libavcodec/jpeg2000.c
index 0aa984bc53..945b787565 100644
--- a/libavcodec/jpeg2000.c
+++ b/libavcodec/jpeg2000.c
@@ -467,7 +467,7 @@  int ff_jpeg2000_init_component(Jpeg2000Component *comp,
                                Jpeg2000CodingStyle *codsty,
                                Jpeg2000QuantStyle *qntsty,
                                int cbps, int dx, int dy,
-                               AVCodecContext *avctx)
+                               AVCodecContext *avctx, int max_slices)
 {
     int reslevelno, bandno, gbandno = 0, ret, i, j;
     uint32_t csize;
@@ -479,7 +479,8 @@  int ff_jpeg2000_init_component(Jpeg2000Component *comp,
 
     if (ret = ff_jpeg2000_dwt_init(&comp->dwt, comp->coord,
                                    codsty->nreslevels2decode - 1,
-                                   codsty->transform))
+                                   codsty->transform,
+                                   max_slices))
         return ret;
 
     if (av_image_check_size(comp->coord[0][1] - comp->coord[0][0],
diff --git a/libavcodec/jpeg2000.h b/libavcodec/jpeg2000.h
index e5ecb4cbf9..389813a9b9 100644
--- a/libavcodec/jpeg2000.h
+++ b/libavcodec/jpeg2000.h
@@ -280,7 +280,7 @@  int ff_jpeg2000_init_component(Jpeg2000Component *comp,
                                Jpeg2000CodingStyle *codsty,
                                Jpeg2000QuantStyle *qntsty,
                                int cbps, int dx, int dy,
-                               AVCodecContext *ctx);
+                               AVCodecContext *ctx, int max_slices);
 
 void ff_jpeg2000_reinit(Jpeg2000Component *comp, Jpeg2000CodingStyle *codsty);
 
diff --git a/libavcodec/jpeg2000dec.c b/libavcodec/jpeg2000dec.c
index 92bd76d90f..273346538f 100644
--- a/libavcodec/jpeg2000dec.c
+++ b/libavcodec/jpeg2000dec.c
@@ -150,6 +150,10 @@  typedef struct Jpeg2000DecoderContext {
     size_t idwt_allocated;
     Jpeg2000CodeblockThread *cb;
     size_t cb_allocated;
+
+    // used for idwt slicing
+    int reslevel, dir, slices;
+    int have_dwt97_int; // 1 if any coding style is FF_DWT97_INT
 } Jpeg2000DecoderContext;
 
 /* get_bits functions for JPEG2000 packet bitstream
@@ -545,9 +549,10 @@  static int get_cox(Jpeg2000DecoderContext *s, Jpeg2000CodingStyle *c)
     }
     c->transform = bytestream2_get_byteu(&s->g); // DWT transformation type
     /* set integer 9/7 DWT in case of BITEXACT flag */
-    if ((s->avctx->flags & AV_CODEC_FLAG_BITEXACT) && (c->transform == FF_DWT97))
+    if ((s->avctx->flags & AV_CODEC_FLAG_BITEXACT) && (c->transform == FF_DWT97)) {
         c->transform = FF_DWT97_INT;
-    else if (c->transform == FF_DWT53) {
+        s->have_dwt97_int = 1;
+    } else if (c->transform == FF_DWT53) {
         s->avctx->properties |= FF_CODEC_PROPERTY_LOSSLESS;
     }
 
@@ -1056,7 +1061,7 @@  static int init_tile(Jpeg2000DecoderContext *s, int tileno)
             return AVERROR_INVALIDDATA;
         if (ret = ff_jpeg2000_init_component(comp, codsty, qntsty,
                                              s->cbps[compno], s->cdx[compno],
-                                             s->cdy[compno], s->avctx))
+                                             s->cdy[compno], s->avctx, s->slices))
             return ret;
     }
     return 0;
@@ -1997,19 +2002,74 @@  static int jpeg2000_decode_cb(AVCodecContext *avctx, void *td,
     return 0;
 }
 
+static int jpeg2000_dwt97_int_preshift(AVCodecContext *avctx, void *td,
+                                       int jobnr, int threadnr)
+{
+    const Jpeg2000DecoderContext *s = avctx->priv_data;
+    Jpeg2000IdwtThread *idwt        = s->idwt + jobnr / s->slices;
+    Jpeg2000Tile *tile              = s->tile + jobnr / s->slices / s->ncomponents;
+    int compno                      = (jobnr / s->slices) % s->ncomponents;
+    int slice                       = jobnr % s->slices;
+    Jpeg2000Component *comp         = tile->comp + compno;
+    Jpeg2000CodingStyle *codsty     = tile->codsty + compno;
+    int a = comp->dwt.linelen[comp->dwt.ndeclevels - 1][0] *
+            comp->dwt.linelen[comp->dwt.ndeclevels - 1][1];
+    int as = (a + s->slices - 1)/s->slices;
+
+    for (int i = idwt->cb_start; i < idwt->cb_end; i++) {
+        if (s->cb[i].coded) {
+            if (codsty->transform == FF_DWT97_INT) {
+                for (int i = as*slice; i - as < as*slice; i++)
+                    comp->i_data[i] *= 1LL << I_PRESHIFT;
+            }
+            break;
+        }
+    }
+
+    return 0;
+}
+
 static int jpeg2000_idwt(AVCodecContext *avctx, void *td,
                          int jobnr, int threadnr)
 {
     const Jpeg2000DecoderContext *s = avctx->priv_data;
-    Jpeg2000IdwtThread *idwt        = s->idwt + jobnr;
-    Jpeg2000Tile *tile              = s->tile + jobnr / s->ncomponents;
-    int compno                      = jobnr % s->ncomponents;
+    Jpeg2000IdwtThread *idwt        = s->idwt + jobnr / s->slices;
+    Jpeg2000Tile *tile              = s->tile + jobnr / s->slices / s->ncomponents;
+    int compno                      = (jobnr / s->slices) % s->ncomponents;
+    int slice                       = jobnr % s->slices;
     Jpeg2000Component *comp         = tile->comp + compno;
     Jpeg2000CodingStyle *codsty     = tile->codsty + compno;
 
     for (int i = idwt->cb_start; i < idwt->cb_end; i++) {
         if (s->cb[i].coded) {
-            ff_dwt_decode(&comp->dwt, codsty->transform == FF_DWT97 ? (void*)comp->f_data : (void*)comp->i_data);
+            ff_dwt_decode_thread(&comp->dwt, codsty->transform == FF_DWT97 ? (void*)comp->f_data : (void*)comp->i_data, s->reslevel, s->dir, slice, s->slices);
+            break;
+        }
+    }
+
+    return 0;
+}
+
+static int jpeg2000_dwt97_int_postshift(AVCodecContext *avctx, void *td,
+                                        int jobnr, int threadnr)
+{
+    const Jpeg2000DecoderContext *s = avctx->priv_data;
+    Jpeg2000IdwtThread *idwt        = s->idwt + jobnr / s->slices;
+    Jpeg2000Tile *tile              = s->tile + jobnr / s->slices / s->ncomponents;
+    int compno                      = (jobnr / s->slices) % s->ncomponents;
+    int slice                       = jobnr % s->slices;
+    Jpeg2000Component *comp         = tile->comp + compno;
+    Jpeg2000CodingStyle *codsty     = tile->codsty + compno;
+    int a = comp->dwt.linelen[comp->dwt.ndeclevels - 1][0] *
+            comp->dwt.linelen[comp->dwt.ndeclevels - 1][1];
+    int as = (a + s->slices - 1)/s->slices;
+
+    for (int i = idwt->cb_start; i < idwt->cb_end; i++) {
+        if (s->cb[i].coded) {
+            if (codsty->transform == FF_DWT97_INT) {
+                for (int i = as*slice; i - as < as*slice; i++)
+                    comp->i_data[i] = (comp->i_data[i] + ((1LL<<I_PRESHIFT)>>1)) >> I_PRESHIFT;
+            }
             break;
         }
     }
@@ -2480,7 +2540,7 @@  static av_cold int jpeg2000_decode_init(AVCodecContext *avctx)
     return 0;
 }
 
-static int jpeg2000_setup_cbs(Jpeg2000DecoderContext *s, int *cbs_out)
+static int jpeg2000_setup_cbs(Jpeg2000DecoderContext *s, int *cbs_out, int *maxreslevels_out)
 {
     if (s->numXtiles * s->numYtiles > INT_MAX/s->ncomponents)
         return AVERROR(ENOMEM);
@@ -2491,7 +2551,7 @@  static int jpeg2000_setup_cbs(Jpeg2000DecoderContext *s, int *cbs_out)
         return AVERROR(ENOMEM);
 
     for (int pass = 0; pass < 2; pass++) {
-        int cbs = 0;
+        int cbs = 0, maxreslevels = 0;
         for (int tileno = 0; tileno < s->numXtiles * s->numYtiles; tileno++) {
             for (int compno = 0; compno < s->ncomponents; compno++) {
                 Jpeg2000Tile *tile          = s->tile + tileno;
@@ -2500,6 +2560,7 @@  static int jpeg2000_setup_cbs(Jpeg2000DecoderContext *s, int *cbs_out)
                 Jpeg2000IdwtThread *idwt    = s->idwt + compno + tileno * s->ncomponents;
 
                 idwt->cb_start = cbs;
+                maxreslevels = FFMAX(maxreslevels, codsty->nreslevels2decode);
 
                 for (int reslevelno = 0; reslevelno < codsty->nreslevels2decode; reslevelno++) {
                     Jpeg2000ResLevel *rlevel = comp->reslevel + reslevelno;
@@ -2544,6 +2605,7 @@  static int jpeg2000_setup_cbs(Jpeg2000DecoderContext *s, int *cbs_out)
         }
 
         *cbs_out = cbs;
+        *maxreslevels_out = maxreslevels;
     }
     return 0;
 }
@@ -2552,7 +2614,7 @@  static int jpeg2000_decode_frame(AVCodecContext *avctx, AVFrame *picture,
                                  int *got_frame, AVPacket *avpkt)
 {
     Jpeg2000DecoderContext *s = avctx->priv_data;
-    int ret, cbs;
+    int ret, cbs, maxreslevels;
 
     s->avctx     = avctx;
     bytestream2_init(&s->g, avpkt->data, avpkt->size);
@@ -2604,6 +2666,7 @@  static int jpeg2000_decode_frame(AVCodecContext *avctx, AVFrame *picture,
         goto end;
     picture->pict_type = AV_PICTURE_TYPE_I;
     picture->key_frame = 1;
+    s->slices = avctx->active_thread_type == FF_THREAD_SLICE ? avctx->thread_count : 1;
 
     if (ret = jpeg2000_read_bitstream_packets(s))
         goto end;
@@ -2619,12 +2682,30 @@  static int jpeg2000_decode_frame(AVCodecContext *avctx, AVFrame *picture,
         }
     }
 
-    if ((ret = jpeg2000_setup_cbs(s, &cbs)) < 0 ||
-        (ret = avctx->execute2(avctx, jpeg2000_decode_cb, NULL, NULL, cbs)) < 0 ||
-        (ret = avctx->execute2(avctx, jpeg2000_idwt, NULL, NULL, s->numXtiles * s->numYtiles * s->ncomponents)) < 0 ||
-        (ret = avctx->execute2(avctx, jpeg2000_mct_write_frame, picture, NULL, s->numXtiles * s->numYtiles)) < 0)
+    if ((ret = jpeg2000_setup_cbs(s, &cbs, &maxreslevels)) < 0 ||
+        (ret = avctx->execute2(avctx, jpeg2000_decode_cb, NULL, NULL, cbs)) < 0)
+        goto end;
+
+    if (s->have_dwt97_int &&
+        (ret = avctx->execute2(avctx, jpeg2000_dwt97_int_preshift, NULL, NULL,
+            s->numXtiles * s->numYtiles * s->ncomponents * s->slices)) < 0)
         goto end;
 
+    for (s->reslevel = 0; s->reslevel < maxreslevels; s->reslevel++) {
+        for (s->dir = 0; s->dir < 2; s->dir++) {
+            if ((ret = avctx->execute2(avctx, jpeg2000_idwt, NULL, NULL,
+                    s->numXtiles * s->numYtiles * s->ncomponents * s->slices)) < 0)
+                goto end;
+        }
+    }
+
+    if (s->have_dwt97_int &&
+        (ret = avctx->execute2(avctx, jpeg2000_dwt97_int_postshift, NULL, NULL,
+            s->numXtiles * s->numYtiles * s->ncomponents * s->slices)) < 0)
+        goto end;
+
+    if ((ret = avctx->execute2(avctx, jpeg2000_mct_write_frame, picture, NULL, s->numXtiles * s->numYtiles)) < 0)
+        goto end;
 
     jpeg2000_dec_cleanup(s);
 
diff --git a/libavcodec/jpeg2000dwt.c b/libavcodec/jpeg2000dwt.c
index f2da7307c4..921461b6d7 100644
--- a/libavcodec/jpeg2000dwt.c
+++ b/libavcodec/jpeg2000dwt.c
@@ -45,7 +45,6 @@ 
 #define I_LFTG_DELTA   29066ll
 #define I_LFTG_K       80621ll
 #define I_LFTG_X       53274ll
-#define I_PRESHIFT 8
 
 static inline void extend53(int *p, int i0, int i1)
 {
@@ -322,24 +321,24 @@  static void sr_1d53(unsigned *p, int i0, int i1)
         p[2 * i + 1] += (int)(p[2 * i] + p[2 * i + 2]) >> 1;
 }
 
-static void dwt_decode53(DWTContext *s, int *t)
+static void dwt_decode53(DWTContext *s, int *t, int lev, int dir, int slice, int slices)
 {
-    int lev;
     int w     = s->linelen[s->ndeclevels - 1][0];
-    int32_t *line = s->i_linebuf;
-    line += 3;
+    int32_t *line = s->i_linebuf + slice * s->linesize + 3;
 
-    for (lev = 0; lev < s->ndeclevels; lev++) {
         int lh = s->linelen[lev][0],
             lv = s->linelen[lev][1],
             mh = s->mod[lev][0],
             mv = s->mod[lev][1],
+            sh = (lh + slices - 1)/slices,
+            sv = (lv + slices - 1)/slices,
             lp;
         int *l;
 
+    if (dir == 0) {
         // HOR_SD
         l = line + mh;
-        for (lp = 0; lp < lv; lp++) {
+        for (lp = slice*sv; lp < lv && lp - sv < slice*sv; lp++) {
             int i, j = 0;
             // copy with interleaving
             for (i = mh; i < lh; i += 2, j++)
@@ -352,10 +351,10 @@  static void dwt_decode53(DWTContext *s, int *t)
             for (i = 0; i < lh; i++)
                 t[w * lp + i] = l[i];
         }
-
+    } else {
         // VER_SD
         l = line + mv;
-        for (lp = 0; lp < lh; lp++) {
+        for (lp = slice*sh; lp < lh && lp - sh < slice*sh; lp++) {
             int i, j = 0;
             // copy with interleaving
             for (i = mv; i < lv; i += 2, j++)
@@ -398,25 +397,26 @@  static void sr_1d97_float(float *p, int i0, int i1)
         p[2 * i + 1] += F_LFTG_ALPHA * (p[2 * i]     + p[2 * i + 2]);
 }
 
-static void dwt_decode97_float(DWTContext *s, float *t)
+static void dwt_decode97_float(DWTContext *s, float *t, int lev, int dir, int slice, int slices)
 {
-    int lev;
     int w       = s->linelen[s->ndeclevels - 1][0];
-    float *line = s->f_linebuf;
-    float *data = t;
     /* position at index O of line range [0-5,w+5] cf. extend function */
-    line += 5;
+    float *line = s->f_linebuf + slice * s->linesize + 5;
+    float *data = t;
 
-    for (lev = 0; lev < s->ndeclevels; lev++) {
         int lh = s->linelen[lev][0],
             lv = s->linelen[lev][1],
             mh = s->mod[lev][0],
             mv = s->mod[lev][1],
+            sh = (lh + slices - 1)/slices,
+            sv = (lv + slices - 1)/slices,
             lp;
         float *l;
+
+    if (dir == 0) {
         // HOR_SD
         l = line + mh;
-        for (lp = 0; lp < lv; lp++) {
+        for (lp = slice*sv; lp < lv && lp - sv < slice*sv; lp++) {
             int i, j = 0;
             // copy with interleaving
             for (i = mh; i < lh; i += 2, j++)
@@ -429,10 +429,10 @@  static void dwt_decode97_float(DWTContext *s, float *t)
             for (i = 0; i < lh; i++)
                 data[w * lp + i] = l[i];
         }
-
+    } else {
         // VER_SD
         l = line + mv;
-        for (lp = 0; lp < lh; lp++) {
+        for (lp = slice*sh; lp < lh && lp - sh < slice*sh; lp++) {
             int i, j = 0;
             // copy with interleaving
             for (i = mv; i < lv; i += 2, j++)
@@ -475,30 +475,26 @@  static void sr_1d97_int(int32_t *p, int i0, int i1)
         p[2 * i + 1] += (I_LFTG_ALPHA * (p[2 * i]     + (int64_t)p[2 * i + 2]) + (1 << 15)) >> 16;
 }
 
-static void dwt_decode97_int(DWTContext *s, int32_t *t)
+static void dwt_decode97_int(DWTContext *s, int32_t *t, int lev, int dir, int slice, int slices)
 {
-    int lev;
     int w       = s->linelen[s->ndeclevels - 1][0];
-    int h       = s->linelen[s->ndeclevels - 1][1];
-    int i;
-    int32_t *line = s->i_linebuf;
-    int32_t *data = t;
     /* position at index O of line range [0-5,w+5] cf. extend function */
-    line += 5;
-
-    for (i = 0; i < w * h; i++)
-        data[i] *= 1LL << I_PRESHIFT;
+    int32_t *line = s->i_linebuf + slice * s->linesize + 5;
+    int32_t *data = t;
 
-    for (lev = 0; lev < s->ndeclevels; lev++) {
         int lh = s->linelen[lev][0],
             lv = s->linelen[lev][1],
             mh = s->mod[lev][0],
             mv = s->mod[lev][1],
+            sh = (lh + slices - 1)/slices,
+            sv = (lv + slices - 1)/slices,
             lp;
         int32_t *l;
+
+    if (dir == 0) {
         // HOR_SD
         l = line + mh;
-        for (lp = 0; lp < lv; lp++) {
+        for (lp = slice*sv; lp < lv && lp - sv < slice*sv; lp++) {
             int i, j = 0;
             // rescale with interleaving
             for (i = mh; i < lh; i += 2, j++)
@@ -511,10 +507,10 @@  static void dwt_decode97_int(DWTContext *s, int32_t *t)
             for (i = 0; i < lh; i++)
                 data[w * lp + i] = l[i];
         }
-
+    } else {
         // VER_SD
         l = line + mv;
-        for (lp = 0; lp < lh; lp++) {
+        for (lp = slice*sh; lp < lh && lp - sh < slice*sh; lp++) {
             int i, j = 0;
             // rescale with interleaving
             for (i = mv; i < lv; i += 2, j++)
@@ -528,26 +524,29 @@  static void dwt_decode97_int(DWTContext *s, int32_t *t)
                 data[w * i + lp] = l[i];
         }
     }
-
-    for (i = 0; i < w * h; i++)
-        data[i] = (data[i] + ((1LL<<I_PRESHIFT)>>1)) >> I_PRESHIFT;
 }
 
 int ff_jpeg2000_dwt_init(DWTContext *s, int border[2][2],
-                         int decomp_levels, int type)
+                         int decomp_levels, int type, int max_slices)
 {
-    int i, j, lev = decomp_levels, maxlen,
+    int i, j, lev = decomp_levels,
         b[2][2];
 
     s->ndeclevels = decomp_levels;
     s->type       = type;
+    s->max_slices = max_slices;
+
+    if (s->max_slices > INT_MAX/FFMAX(sizeof(*s->f_linebuf),sizeof(*s->i_linebuf)))
+        return AVERROR(ENOMEM);
 
     for (i = 0; i < 2; i++)
         for (j = 0; j < 2; j++)
             b[i][j] = border[i][j];
 
-    maxlen = FFMAX(b[0][1] - b[0][0],
-                   b[1][1] - b[1][0]);
+    s->linesize   = FFMAX(b[0][1] - b[0][0],
+                          b[1][1] - b[1][0]) +
+                    (type == FF_DWT53 ? 6 : 12);
+
     while (--lev >= 0)
         for (i = 0; i < 2; i++) {
             s->linelen[lev][i] = b[i][1] - b[i][0];
@@ -555,24 +554,15 @@  int ff_jpeg2000_dwt_init(DWTContext *s, int border[2][2],
             for (j = 0; j < 2; j++)
                 b[i][j] = (b[i][j] + 1) >> 1;
         }
-    switch (type) {
-    case FF_DWT97:
-        s->f_linebuf = av_malloc_array((maxlen + 12), sizeof(*s->f_linebuf));
+
+    if (type == FF_DWT97) {
+        s->f_linebuf = av_malloc_array(s->linesize, s->max_slices*sizeof(*s->f_linebuf));
         if (!s->f_linebuf)
             return AVERROR(ENOMEM);
-        break;
-     case FF_DWT97_INT:
-        s->i_linebuf = av_malloc_array((maxlen + 12), sizeof(*s->i_linebuf));
-        if (!s->i_linebuf)
-            return AVERROR(ENOMEM);
-        break;
-    case FF_DWT53:
-        s->i_linebuf = av_malloc_array((maxlen +  6), sizeof(*s->i_linebuf));
+    } else {
+        s->i_linebuf = av_malloc_array(s->linesize, s->max_slices*sizeof(*s->i_linebuf));
         if (!s->i_linebuf)
             return AVERROR(ENOMEM);
-        break;
-    default:
-        return -1;
     }
     return 0;
 }
@@ -597,18 +587,46 @@  int ff_dwt_encode(DWTContext *s, void *t)
 
 int ff_dwt_decode(DWTContext *s, void *t)
 {
-    if (s->ndeclevels == 0)
+    int w = s->linelen[s->ndeclevels - 1][0];
+    int h = s->linelen[s->ndeclevels - 1][1];
+    int32_t *data = t;
+
+    if (s->type == FF_DWT97_INT)
+        for (int i = 0; i < w * h; i++)
+            data[i] *= 1LL << I_PRESHIFT;
+
+    for (int lev = 0; lev < s->ndeclevels; lev++)
+        for (int dir = 0; dir < 2; dir++)
+            for (int slice = 0; slice < s->max_slices; slice++) {
+                int ret = ff_dwt_decode_thread(s, t, lev, dir, slice, s->max_slices);
+                if (ret)
+                    return ret;
+            }
+
+    if (s->type == FF_DWT97_INT)
+        for (int i = 0; i < w * h; i++)
+            data[i] = (data[i] + ((1LL<<I_PRESHIFT)>>1)) >> I_PRESHIFT;
+
+    return 0;
+}
+
+int ff_dwt_decode_thread(DWTContext *s, void *t, int lev, int dir, int slice, int slices)
+{
+    slices = FFMIN(s->max_slices, slices);
+
+    // lev can be >= s->ndeclevels in files with mixed reslevels in tiles/components
+    if (s->ndeclevels == 0 || lev >= s->ndeclevels || slice >= slices)
         return 0;
 
     switch (s->type) {
     case FF_DWT97:
-        dwt_decode97_float(s, t);
+        dwt_decode97_float(s, t, lev, dir, slice, slices);
         break;
     case FF_DWT97_INT:
-        dwt_decode97_int(s, t);
+        dwt_decode97_int(s, t, lev, dir, slice, slices);
         break;
     case FF_DWT53:
-        dwt_decode53(s, t);
+        dwt_decode53(s, t, lev, dir, slice, slices);
         break;
     default:
         return -1;
diff --git a/libavcodec/jpeg2000dwt.h b/libavcodec/jpeg2000dwt.h
index 718d183ac1..d5e94c9916 100644
--- a/libavcodec/jpeg2000dwt.h
+++ b/libavcodec/jpeg2000dwt.h
@@ -32,6 +32,7 @@ 
 #define FF_DWT_MAX_DECLVLS 32 ///< max number of decomposition levels
 #define F_LFTG_K      1.230174104914001f
 #define F_LFTG_X      0.812893066115961f
+#define I_PRESHIFT 8
 
 enum DWTType {
     FF_DWT97,
@@ -48,6 +49,8 @@  typedef struct DWTContext {
     uint8_t type;                        ///< 0 for 9/7; 1 for 5/3
     int32_t *i_linebuf;                  ///< int buffer used by transform
     float   *f_linebuf;                  ///< float buffer used by transform
+    int max_slices;
+    int linesize;
 } DWTContext;
 
 /**
@@ -58,10 +61,11 @@  typedef struct DWTContext {
  * @param type              0 for DWT 9/7; 1 for DWT 5/3
  */
 int ff_jpeg2000_dwt_init(DWTContext *s, int border[2][2],
-                         int decomp_levels, int type);
+                         int decomp_levels, int type, int max_slices);
 
 int ff_dwt_encode(DWTContext *s, void *t);
 int ff_dwt_decode(DWTContext *s, void *t);
+int ff_dwt_decode_thread(DWTContext *s, void *t, int lev, int dir, int slice, int slices);
 
 void ff_dwt_destroy(DWTContext *s);
 
diff --git a/libavcodec/tests/jpeg2000dwt.c b/libavcodec/tests/jpeg2000dwt.c
index 0e5a6ed947..d4d9e6d224 100644
--- a/libavcodec/tests/jpeg2000dwt.c
+++ b/libavcodec/tests/jpeg2000dwt.c
@@ -31,12 +31,12 @@ 
 
 #define MAX_W 256
 
-static int test_dwt(int *array, int *ref, int border[2][2], int decomp_levels, int type, int max_diff) {
+static int test_dwt(int *array, int *ref, int border[2][2], int decomp_levels, int type, int max_diff, int slices) {
     int ret, j;
     DWTContext s1={{{0}}}, *s= &s1;
     int64_t err2 = 0;
 
-    ret = ff_jpeg2000_dwt_init(s,  border, decomp_levels, type);
+    ret = ff_jpeg2000_dwt_init(s,  border, decomp_levels, type, slices);
     if (ret < 0) {
         fprintf(stderr, "ff_jpeg2000_dwt_init failed\n");
         return 1;
@@ -70,12 +70,12 @@  static int test_dwt(int *array, int *ref, int border[2][2], int decomp_levels, i
     return 0;
 }
 
-static int test_dwtf(float *array, float *ref, int border[2][2], int decomp_levels, float max_diff) {
+static int test_dwtf(float *array, float *ref, int border[2][2], int decomp_levels, float max_diff, int slices) {
     int ret, j;
     DWTContext s1={{{0}}}, *s= &s1;
     double err2 = 0;
 
-    ret = ff_jpeg2000_dwt_init(s,  border, decomp_levels, FF_DWT97);
+    ret = ff_jpeg2000_dwt_init(s,  border, decomp_levels, FF_DWT97, slices);
     if (ret < 0) {
         fprintf(stderr, "ff_jpeg2000_dwt_init failed\n");
         return 1;
@@ -125,19 +125,20 @@  int main(void) {
         arrayf[i] = reff[i] = array[i] = ref[i] =  av_lfg_get(&prng) % 2048;
 
     for (i = 0; i < 100; i++) {
+        int slices = 1 + (i % 10);
         for (j=0; j<4; j++)
             border[j>>1][j&1] = av_lfg_get(&prng) % MAX_W;
         if (border[0][0] >= border[0][1] || border[1][0] >= border[1][1])
             continue;
         decomp_levels = av_lfg_get(&prng) % FF_DWT_MAX_DECLVLS;
 
-        ret = test_dwt(array, ref, border, decomp_levels, FF_DWT53, 0);
+        ret = test_dwt(array, ref, border, decomp_levels, FF_DWT53, 0, slices);
         if (ret)
             return ret;
-        ret = test_dwt(array, ref, border, decomp_levels, FF_DWT97_INT, FFMIN(7+5*decomp_levels, 15+3*decomp_levels));
+        ret = test_dwt(array, ref, border, decomp_levels, FF_DWT97_INT, FFMIN(7+5*decomp_levels, 15+3*decomp_levels), slices);
         if (ret)
             return ret;
-        ret = test_dwtf(arrayf, reff, border, decomp_levels, 0.05);
+        ret = test_dwtf(arrayf, reff, border, decomp_levels, 0.05, slices);
         if (ret)
             return ret;
     }
-- 
2.30.2