diff mbox series

[FFmpeg-devel,03/13] lavc/jpeg2000dwt: Implement sliced transforms

Message ID 12504be2356f3e0109db088f69a105c571460a94.camel@acc.umu.se
State New
Headers show
Series [FFmpeg-devel,01/13] lavc/jpeg2000dec: Finer granularity threading | expand

Checks

Context Check Description
andriy/configure_x86 warning Failed to apply patch

Commit Message

Tomas Härdin June 14, 2022, 2:40 p.m. UTC

diff mbox series

Patch

From 6ab67531c946ca320e49bc93f4f086835ffd2c1c Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Tomas=20H=C3=A4rdin?= <git@haerdin.se>
Date: Fri, 10 Jun 2022 17:18:14 +0200
Subject: [PATCH 03/13] lavc/jpeg2000dwt: Implement sliced transforms

lavc/tests/jpeg2000dwt tests this.
---
 libavcodec/j2kenc.c            |   3 +-
 libavcodec/jpeg2000.c          |   5 +-
 libavcodec/jpeg2000.h          |   2 +-
 libavcodec/jpeg2000dec.c       |   2 +-
 libavcodec/jpeg2000dwt.c       | 131 +++++++++++++++++++--------------
 libavcodec/jpeg2000dwt.h       |   5 +-
 libavcodec/tests/jpeg2000dwt.c |  15 ++--
 7 files changed, 94 insertions(+), 69 deletions(-)

diff --git a/libavcodec/j2kenc.c b/libavcodec/j2kenc.c
index 0b761d0b00..4de596ffa9 100644
--- a/libavcodec/j2kenc.c
+++ b/libavcodec/j2kenc.c
@@ -496,7 +496,8 @@  static int init_tiles(Jpeg2000EncoderContext *s)
                                                 s->cbps[compno],
                                                 compno?1<<s->chroma_shift[0]:1,
                                                 compno?1<<s->chroma_shift[1]:1,
-                                                s->avctx
+                                                s->avctx,
+                                                1
                                                )) < 0)
                     return ret;
             }
diff --git a/libavcodec/jpeg2000.c b/libavcodec/jpeg2000.c
index 0aa984bc53..945b787565 100644
--- a/libavcodec/jpeg2000.c
+++ b/libavcodec/jpeg2000.c
@@ -467,7 +467,7 @@  int ff_jpeg2000_init_component(Jpeg2000Component *comp,
                                Jpeg2000CodingStyle *codsty,
                                Jpeg2000QuantStyle *qntsty,
                                int cbps, int dx, int dy,
-                               AVCodecContext *avctx)
+                               AVCodecContext *avctx, int max_slices)
 {
     int reslevelno, bandno, gbandno = 0, ret, i, j;
     uint32_t csize;
@@ -479,7 +479,8 @@  int ff_jpeg2000_init_component(Jpeg2000Component *comp,
 
     if (ret = ff_jpeg2000_dwt_init(&comp->dwt, comp->coord,
                                    codsty->nreslevels2decode - 1,
-                                   codsty->transform))
+                                   codsty->transform,
+                                   max_slices))
         return ret;
 
     if (av_image_check_size(comp->coord[0][1] - comp->coord[0][0],
diff --git a/libavcodec/jpeg2000.h b/libavcodec/jpeg2000.h
index d06313425e..cbb8e0d951 100644
--- a/libavcodec/jpeg2000.h
+++ b/libavcodec/jpeg2000.h
@@ -278,7 +278,7 @@  int ff_jpeg2000_init_component(Jpeg2000Component *comp,
                                Jpeg2000CodingStyle *codsty,
                                Jpeg2000QuantStyle *qntsty,
                                int cbps, int dx, int dy,
-                               AVCodecContext *ctx);
+                               AVCodecContext *ctx, int max_slices);
 
 void ff_jpeg2000_reinit(Jpeg2000Component *comp, Jpeg2000CodingStyle *codsty);
 
diff --git a/libavcodec/jpeg2000dec.c b/libavcodec/jpeg2000dec.c
index 9d3d406870..8999974a56 100644
--- a/libavcodec/jpeg2000dec.c
+++ b/libavcodec/jpeg2000dec.c
@@ -1052,7 +1052,7 @@  static int init_tile(Jpeg2000DecoderContext *s, int tileno)
             return AVERROR_INVALIDDATA;
         if (ret = ff_jpeg2000_init_component(comp, codsty, qntsty,
                                              s->cbps[compno], s->cdx[compno],
-                                             s->cdy[compno], s->avctx))
+                                             s->cdy[compno], s->avctx, 1))
             return ret;
     }
     return 0;
diff --git a/libavcodec/jpeg2000dwt.c b/libavcodec/jpeg2000dwt.c
index f2da7307c4..42a92b6c64 100644
--- a/libavcodec/jpeg2000dwt.c
+++ b/libavcodec/jpeg2000dwt.c
@@ -322,24 +322,24 @@  static void sr_1d53(unsigned *p, int i0, int i1)
         p[2 * i + 1] += (int)(p[2 * i] + p[2 * i + 2]) >> 1;
 }
 
-static void dwt_decode53(DWTContext *s, int *t)
+static void dwt_decode53(DWTContext *s, int *t, int lev, int dir, int slice, int slices)
 {
-    int lev;
     int w     = s->linelen[s->ndeclevels - 1][0];
-    int32_t *line = s->i_linebuf;
-    line += 3;
+    int32_t *line = s->i_linebuf + slice * s->linesize + 3;
 
-    for (lev = 0; lev < s->ndeclevels; lev++) {
         int lh = s->linelen[lev][0],
             lv = s->linelen[lev][1],
             mh = s->mod[lev][0],
             mv = s->mod[lev][1],
+            sh = (lh + slices - 1)/slices,
+            sv = (lv + slices - 1)/slices,
             lp;
         int *l;
 
+    if (dir == 0) {
         // HOR_SD
         l = line + mh;
-        for (lp = 0; lp < lv; lp++) {
+        for (lp = slice*sv; lp < lv && lp - sv < slice*sv; lp++) {
             int i, j = 0;
             // copy with interleaving
             for (i = mh; i < lh; i += 2, j++)
@@ -352,10 +352,10 @@  static void dwt_decode53(DWTContext *s, int *t)
             for (i = 0; i < lh; i++)
                 t[w * lp + i] = l[i];
         }
-
+    } else {
         // VER_SD
         l = line + mv;
-        for (lp = 0; lp < lh; lp++) {
+        for (lp = slice*sh; lp < lh && lp - sh < slice*sh; lp++) {
             int i, j = 0;
             // copy with interleaving
             for (i = mv; i < lv; i += 2, j++)
@@ -398,25 +398,26 @@  static void sr_1d97_float(float *p, int i0, int i1)
         p[2 * i + 1] += F_LFTG_ALPHA * (p[2 * i]     + p[2 * i + 2]);
 }
 
-static void dwt_decode97_float(DWTContext *s, float *t)
+static void dwt_decode97_float(DWTContext *s, float *t, int lev, int dir, int slice, int slices)
 {
-    int lev;
     int w       = s->linelen[s->ndeclevels - 1][0];
-    float *line = s->f_linebuf;
-    float *data = t;
     /* position at index O of line range [0-5,w+5] cf. extend function */
-    line += 5;
+    float *line = s->f_linebuf + slice * s->linesize + 5;
+    float *data = t;
 
-    for (lev = 0; lev < s->ndeclevels; lev++) {
         int lh = s->linelen[lev][0],
             lv = s->linelen[lev][1],
             mh = s->mod[lev][0],
             mv = s->mod[lev][1],
+            sh = (lh + slices - 1)/slices,
+            sv = (lv + slices - 1)/slices,
             lp;
         float *l;
+
+    if (dir == 0) {
         // HOR_SD
         l = line + mh;
-        for (lp = 0; lp < lv; lp++) {
+        for (lp = slice*sv; lp < lv && lp - sv < slice*sv; lp++) {
             int i, j = 0;
             // copy with interleaving
             for (i = mh; i < lh; i += 2, j++)
@@ -429,10 +430,10 @@  static void dwt_decode97_float(DWTContext *s, float *t)
             for (i = 0; i < lh; i++)
                 data[w * lp + i] = l[i];
         }
-
+    } else {
         // VER_SD
         l = line + mv;
-        for (lp = 0; lp < lh; lp++) {
+        for (lp = slice*sh; lp < lh && lp - sh < slice*sh; lp++) {
             int i, j = 0;
             // copy with interleaving
             for (i = mv; i < lv; i += 2, j++)
@@ -475,30 +476,26 @@  static void sr_1d97_int(int32_t *p, int i0, int i1)
         p[2 * i + 1] += (I_LFTG_ALPHA * (p[2 * i]     + (int64_t)p[2 * i + 2]) + (1 << 15)) >> 16;
 }
 
-static void dwt_decode97_int(DWTContext *s, int32_t *t)
+static void dwt_decode97_int(DWTContext *s, int32_t *t, int lev, int dir, int slice, int slices)
 {
-    int lev;
     int w       = s->linelen[s->ndeclevels - 1][0];
-    int h       = s->linelen[s->ndeclevels - 1][1];
-    int i;
-    int32_t *line = s->i_linebuf;
-    int32_t *data = t;
     /* position at index O of line range [0-5,w+5] cf. extend function */
-    line += 5;
-
-    for (i = 0; i < w * h; i++)
-        data[i] *= 1LL << I_PRESHIFT;
+    int32_t *line = s->i_linebuf + slice * s->linesize + 5;
+    int32_t *data = t;
 
-    for (lev = 0; lev < s->ndeclevels; lev++) {
         int lh = s->linelen[lev][0],
             lv = s->linelen[lev][1],
             mh = s->mod[lev][0],
             mv = s->mod[lev][1],
+            sh = (lh + slices - 1)/slices,
+            sv = (lv + slices - 1)/slices,
             lp;
         int32_t *l;
+
+    if (dir == 0) {
         // HOR_SD
         l = line + mh;
-        for (lp = 0; lp < lv; lp++) {
+        for (lp = slice*sv; lp < lv && lp - sv < slice*sv; lp++) {
             int i, j = 0;
             // rescale with interleaving
             for (i = mh; i < lh; i += 2, j++)
@@ -511,10 +508,10 @@  static void dwt_decode97_int(DWTContext *s, int32_t *t)
             for (i = 0; i < lh; i++)
                 data[w * lp + i] = l[i];
         }
-
+    } else {
         // VER_SD
         l = line + mv;
-        for (lp = 0; lp < lh; lp++) {
+        for (lp = slice*sh; lp < lh && lp - sh < slice*sh; lp++) {
             int i, j = 0;
             // rescale with interleaving
             for (i = mv; i < lv; i += 2, j++)
@@ -528,26 +525,29 @@  static void dwt_decode97_int(DWTContext *s, int32_t *t)
                 data[w * i + lp] = l[i];
         }
     }
-
-    for (i = 0; i < w * h; i++)
-        data[i] = (data[i] + ((1LL<<I_PRESHIFT)>>1)) >> I_PRESHIFT;
 }
 
 int ff_jpeg2000_dwt_init(DWTContext *s, int border[2][2],
-                         int decomp_levels, int type)
+                         int decomp_levels, int type, int max_slices)
 {
-    int i, j, lev = decomp_levels, maxlen,
+    int i, j, lev = decomp_levels,
         b[2][2];
 
     s->ndeclevels = decomp_levels;
     s->type       = type;
+    s->max_slices = max_slices;
+
+    if (s->max_slices > INT_MAX/FFMAX(sizeof(*s->f_linebuf),sizeof(*s->i_linebuf)))
+        return AVERROR(ENOMEM);
 
     for (i = 0; i < 2; i++)
         for (j = 0; j < 2; j++)
             b[i][j] = border[i][j];
 
-    maxlen = FFMAX(b[0][1] - b[0][0],
-                   b[1][1] - b[1][0]);
+    s->linesize   = FFMAX(b[0][1] - b[0][0],
+                          b[1][1] - b[1][0]) +
+                    (type == FF_DWT53 ? 6 : 12);
+
     while (--lev >= 0)
         for (i = 0; i < 2; i++) {
             s->linelen[lev][i] = b[i][1] - b[i][0];
@@ -555,24 +555,15 @@  int ff_jpeg2000_dwt_init(DWTContext *s, int border[2][2],
             for (j = 0; j < 2; j++)
                 b[i][j] = (b[i][j] + 1) >> 1;
         }
-    switch (type) {
-    case FF_DWT97:
-        s->f_linebuf = av_malloc_array((maxlen + 12), sizeof(*s->f_linebuf));
+
+    if (type == FF_DWT97) {
+        s->f_linebuf = av_malloc_array(s->linesize, s->max_slices*sizeof(*s->f_linebuf));
         if (!s->f_linebuf)
             return AVERROR(ENOMEM);
-        break;
-     case FF_DWT97_INT:
-        s->i_linebuf = av_malloc_array((maxlen + 12), sizeof(*s->i_linebuf));
-        if (!s->i_linebuf)
-            return AVERROR(ENOMEM);
-        break;
-    case FF_DWT53:
-        s->i_linebuf = av_malloc_array((maxlen +  6), sizeof(*s->i_linebuf));
+    } else {
+        s->i_linebuf = av_malloc_array(s->linesize, s->max_slices*sizeof(*s->i_linebuf));
         if (!s->i_linebuf)
             return AVERROR(ENOMEM);
-        break;
-    default:
-        return -1;
     }
     return 0;
 }
@@ -597,18 +588,46 @@  int ff_dwt_encode(DWTContext *s, void *t)
 
 int ff_dwt_decode(DWTContext *s, void *t)
 {
-    if (s->ndeclevels == 0)
+    int w = s->linelen[s->ndeclevels - 1][0];
+    int h = s->linelen[s->ndeclevels - 1][1];
+    int32_t *data = t;
+
+    if (s->type == FF_DWT97_INT)
+        for (int i = 0; i < w * h; i++)
+            data[i] *= 1LL << I_PRESHIFT;
+
+    for (int lev = 0; lev < s->ndeclevels; lev++)
+        for (int dir = 0; dir < 2; dir++)
+            for (int slice = 0; slice < s->max_slices; slice++) {
+                int ret = ff_dwt_decode_thread(s, t, lev, dir, slice, s->max_slices);
+                if (ret)
+                    return ret;
+            }
+
+    if (s->type == FF_DWT97_INT)
+        for (int i = 0; i < w * h; i++)
+            data[i] = (data[i] + ((1LL<<I_PRESHIFT)>>1)) >> I_PRESHIFT;
+
+    return 0;
+}
+
+int ff_dwt_decode_thread(DWTContext *s, void *t, int lev, int dir, int slice, int slices)
+{
+    slices = FFMIN(s->max_slices, slices);
+
+    // lev can be >= s->ndeclevels in files with mixed reslevels in tiles/components
+    if (s->ndeclevels == 0 || lev >= s->ndeclevels || slice >= slices)
         return 0;
 
     switch (s->type) {
     case FF_DWT97:
-        dwt_decode97_float(s, t);
+        dwt_decode97_float(s, t, lev, dir, slice, slices);
         break;
     case FF_DWT97_INT:
-        dwt_decode97_int(s, t);
+        dwt_decode97_int(s, t, lev, dir, slice, slices);
         break;
     case FF_DWT53:
-        dwt_decode53(s, t);
+        dwt_decode53(s, t, lev, dir, slice, slices);
         break;
     default:
         return -1;
diff --git a/libavcodec/jpeg2000dwt.h b/libavcodec/jpeg2000dwt.h
index 718d183ac1..0589c8355c 100644
--- a/libavcodec/jpeg2000dwt.h
+++ b/libavcodec/jpeg2000dwt.h
@@ -48,6 +48,8 @@  typedef struct DWTContext {
     uint8_t type;                        ///< 0 for 9/7; 1 for 5/3
     int32_t *i_linebuf;                  ///< int buffer used by transform
     float   *f_linebuf;                  ///< float buffer used by transform
+    int max_slices;
+    int linesize;
 } DWTContext;
 
 /**
@@ -58,10 +60,11 @@  typedef struct DWTContext {
  * @param type              0 for DWT 9/7; 1 for DWT 5/3
  */
 int ff_jpeg2000_dwt_init(DWTContext *s, int border[2][2],
-                         int decomp_levels, int type);
+                         int decomp_levels, int type, int max_slices);
 
 int ff_dwt_encode(DWTContext *s, void *t);
 int ff_dwt_decode(DWTContext *s, void *t);
+int ff_dwt_decode_thread(DWTContext *s, void *t, int lev, int dir, int slice, int slices);
 
 void ff_dwt_destroy(DWTContext *s);
 
diff --git a/libavcodec/tests/jpeg2000dwt.c b/libavcodec/tests/jpeg2000dwt.c
index 0e5a6ed947..d4d9e6d224 100644
--- a/libavcodec/tests/jpeg2000dwt.c
+++ b/libavcodec/tests/jpeg2000dwt.c
@@ -31,12 +31,12 @@ 
 
 #define MAX_W 256
 
-static int test_dwt(int *array, int *ref, int border[2][2], int decomp_levels, int type, int max_diff) {
+static int test_dwt(int *array, int *ref, int border[2][2], int decomp_levels, int type, int max_diff, int slices) {
     int ret, j;
     DWTContext s1={{{0}}}, *s= &s1;
     int64_t err2 = 0;
 
-    ret = ff_jpeg2000_dwt_init(s,  border, decomp_levels, type);
+    ret = ff_jpeg2000_dwt_init(s,  border, decomp_levels, type, slices);
     if (ret < 0) {
         fprintf(stderr, "ff_jpeg2000_dwt_init failed\n");
         return 1;
@@ -70,12 +70,12 @@  static int test_dwt(int *array, int *ref, int border[2][2], int decomp_levels, i
     return 0;
 }
 
-static int test_dwtf(float *array, float *ref, int border[2][2], int decomp_levels, float max_diff) {
+static int test_dwtf(float *array, float *ref, int border[2][2], int decomp_levels, float max_diff, int slices) {
     int ret, j;
     DWTContext s1={{{0}}}, *s= &s1;
     double err2 = 0;
 
-    ret = ff_jpeg2000_dwt_init(s,  border, decomp_levels, FF_DWT97);
+    ret = ff_jpeg2000_dwt_init(s,  border, decomp_levels, FF_DWT97, slices);
     if (ret < 0) {
         fprintf(stderr, "ff_jpeg2000_dwt_init failed\n");
         return 1;
@@ -125,19 +125,20 @@  int main(void) {
         arrayf[i] = reff[i] = array[i] = ref[i] =  av_lfg_get(&prng) % 2048;
 
     for (i = 0; i < 100; i++) {
+        int slices = 1 + (i % 10);
         for (j=0; j<4; j++)
             border[j>>1][j&1] = av_lfg_get(&prng) % MAX_W;
         if (border[0][0] >= border[0][1] || border[1][0] >= border[1][1])
             continue;
         decomp_levels = av_lfg_get(&prng) % FF_DWT_MAX_DECLVLS;
 
-        ret = test_dwt(array, ref, border, decomp_levels, FF_DWT53, 0);
+        ret = test_dwt(array, ref, border, decomp_levels, FF_DWT53, 0, slices);
         if (ret)
             return ret;
-        ret = test_dwt(array, ref, border, decomp_levels, FF_DWT97_INT, FFMIN(7+5*decomp_levels, 15+3*decomp_levels));
+        ret = test_dwt(array, ref, border, decomp_levels, FF_DWT97_INT, FFMIN(7+5*decomp_levels, 15+3*decomp_levels), slices);
         if (ret)
             return ret;
-        ret = test_dwtf(arrayf, reff, border, decomp_levels, 0.05);
+        ret = test_dwtf(arrayf, reff, border, decomp_levels, 0.05, slices);
         if (ret)
             return ret;
     }
-- 
2.30.2