diff mbox series

[FFmpeg-devel,v7] avcodec/jpeg2000: Fix FF_DWT97_INT to pass the conformance testing defined in ISO/IEC 15444-4

Message ID 20241103044837.1667397-2-owatanab@es.takushoku-u.ac.jp
State New
Headers show
Series [FFmpeg-devel,v7] avcodec/jpeg2000: Fix FF_DWT97_INT to pass the conformance testing defined in ISO/IEC 15444-4 | expand

Checks

Context Check Description
yinshiyou/make_loongarch64 success Make finished
yinshiyou/make_fate_loongarch64 success Make fate finished

Commit Message

Osamu Watanabe Nov. 3, 2024, 4:48 a.m. UTC
Fix for the integer version of the inverse 9-7 DWT processing
(FF_DWT97_INT, https://trac.ffmpeg.org/ticket/10123), which is activated with
`-flags +bitexact`.

I went through the code path for the DWT 9-7 transform (integer) and improved
precision to match conformance codestream.

As a result, the encoded codestream size is slightly larger for a given Q value.
For example, `-flags +bitexact -i lena.pnm -q: 20 -format j2k -y tmp.j2c`
gives 13K (HEAD) and 19K (with this patch).

This commit also updates the source and reference files for affected FATE tests.

Signed-off-by: Osamu Watanabe <owatanab@es.takushoku-u.ac.jp>
---
 libavcodec/jpeg2000.c                    |  11 +-
 libavcodec/jpeg2000dec.c                 | 150 +++++++++++++----------
 libavcodec/jpeg2000dwt.c                 |  47 +++----
 libavcodec/jpeg2000dwt.h                 |   1 +
 libavcodec/jpeg2000htdec.c               |   9 +-
 libavcodec/tests/jpeg2000dwt.c           |   5 +
 tests/ref/fate/j2k-dwt                   |  40 +++---
 tests/ref/fate/jpeg2000-dcinema          |   4 +-
 tests/ref/fate/jpeg2000dec-p0_04         |   2 +-
 tests/ref/fate/jpeg2000dec-p0_05         |   2 +-
 tests/ref/fate/jpeg2000dec-p0_09         |   2 +-
 tests/ref/vsynth/vsynth1-jpeg2000-97     |   8 +-
 tests/ref/vsynth/vsynth2-jpeg2000-97     |   8 +-
 tests/ref/vsynth/vsynth3-jpeg2000-97     |   8 +-
 tests/ref/vsynth/vsynth_lena-jpeg2000-97 |   8 +-
 15 files changed, 163 insertions(+), 142 deletions(-)
diff mbox series

Patch

diff --git a/libavcodec/jpeg2000.c b/libavcodec/jpeg2000.c
index d6ffb02319..7911500901 100644
--- a/libavcodec/jpeg2000.c
+++ b/libavcodec/jpeg2000.c
@@ -260,9 +260,7 @@  static void init_band_stepsize(AVCodecContext *avctx,
                 band->f_stepsize *= F_LFTG_X * F_LFTG_X * 4;
                 break;
         }
-        if (codsty->transform == FF_DWT97) {
-            band->f_stepsize *= pow(F_LFTG_K, 2*(codsty->nreslevels2decode - reslevelno) + lband - 2);
-        }
+        band->f_stepsize *= pow(F_LFTG_K, 2*(codsty->nreslevels2decode - reslevelno) + lband - 2);
     }
 
     if (band->f_stepsize > (INT_MAX >> 15)) {
@@ -270,12 +268,7 @@  static void init_band_stepsize(AVCodecContext *avctx,
         av_log(avctx, AV_LOG_ERROR, "stepsize out of range\n");
     }
 
-    band->i_stepsize = band->f_stepsize * (1 << 15);
-
-    /* FIXME: In OpenJPEG code stepsize = stepsize * 0.5. Why?
-     * If not set output of entropic decoder is not correct. */
-    if (!av_codec_is_encoder(avctx->codec))
-        band->f_stepsize *= 0.5;
+    band->i_stepsize = (int)floorf(band->f_stepsize * (1 << 15));
 }
 
 static int init_prec(AVCodecContext *avctx,
diff --git a/libavcodec/jpeg2000dec.c b/libavcodec/jpeg2000dec.c
index 5b05ff2455..c9d8b025b1 100644
--- a/libavcodec/jpeg2000dec.c
+++ b/libavcodec/jpeg2000dec.c
@@ -1885,14 +1885,15 @@  static void decode_sigpass(Jpeg2000T1Context *t1, int width, int height,
                 && !(t1->flags[(y+1) * t1->stride + x+1] & (JPEG2000_T1_SIG | JPEG2000_T1_VIS))) {
                     if (ff_mqc_decode(&t1->mqc, t1->mqc.cx_states + ff_jpeg2000_getsigctxno(t1->flags[(y+1) * t1->stride + x+1] & flags_mask, bandno))) {
                         int xorbit, ctxno = ff_jpeg2000_getsgnctxno(t1->flags[(y+1) * t1->stride + x+1] & flags_mask, &xorbit);
-                        if (t1->mqc.raw)
-                             t1->data[(y) * t1->stride + x] = ff_mqc_decode(&t1->mqc, t1->mqc.cx_states + ctxno) ? -mask : mask;
-                        else
-                             t1->data[(y) * t1->stride + x] = (ff_mqc_decode(&t1->mqc, t1->mqc.cx_states + ctxno) ^ xorbit) ?
-                                               -mask : mask;
-
+                        if (t1->mqc.raw) {
+                            t1->data[(y) * t1->stride + x] |= ff_mqc_decode(&t1->mqc, t1->mqc.cx_states + ctxno) << 31;
+                            t1->data[(y) * t1->stride + x] |= mask;
+                        } else {
+                            t1->data[(y) * t1->stride + x] |= (ff_mqc_decode(&t1->mqc, t1->mqc.cx_states + ctxno) ^ xorbit) << 31;
+                            t1->data[(y) * t1->stride + x] |= mask;
+                        }
                         ff_jpeg2000_set_significance(t1, x, y,
-                                                     t1->data[(y) * t1->stride + x] < 0);
+                                                     t1->data[(y) * t1->stride + x] & INT32_MIN);
                     }
                     t1->flags[(y + 1) * t1->stride + x + 1] |= JPEG2000_T1_VIS;
                 }
@@ -1902,11 +1903,10 @@  static void decode_sigpass(Jpeg2000T1Context *t1, int width, int height,
 static void decode_refpass(Jpeg2000T1Context *t1, int width, int height,
                            int bpno, int vert_causal_ctx_csty_symbol)
 {
-    int phalf, nhalf;
+    int phalf;
     int y0, x, y;
 
     phalf = 1 << (bpno - 1);
-    nhalf = -phalf;
 
     for (y0 = 0; y0 < height; y0 += 4)
         for (x = 0; x < width; x++)
@@ -1915,10 +1915,13 @@  static void decode_refpass(Jpeg2000T1Context *t1, int width, int height,
                     int flags_mask = (vert_causal_ctx_csty_symbol && y == y0 + 3) ?
                         ~(JPEG2000_T1_SIG_S | JPEG2000_T1_SIG_SW | JPEG2000_T1_SIG_SE | JPEG2000_T1_SGN_S) : -1;
                     int ctxno = ff_jpeg2000_getrefctxno(t1->flags[(y + 1) * t1->stride + x + 1] & flags_mask);
-                    int r     = ff_mqc_decode(&t1->mqc,
-                                              t1->mqc.cx_states + ctxno)
-                                ? phalf : nhalf;
-                    t1->data[(y) * t1->stride + x]          += t1->data[(y) * t1->stride + x] < 0 ? -r : r;
+                    t1->data[(y) * t1->stride + x] |= phalf;
+                    if (ff_mqc_decode(&t1->mqc, t1->mqc.cx_states + ctxno))
+                        t1->data[(y) * t1->stride + x] |= phalf << 1;
+                    else {
+                        t1->data[(y) * t1->stride + x] &= ~(phalf << 1);
+
+                    }
                     t1->flags[(y + 1) * t1->stride + x + 1] |= JPEG2000_T1_REF;
                 }
 }
@@ -1966,11 +1969,9 @@  static void decode_clnpass(const Jpeg2000DecoderContext *s, Jpeg2000T1Context *t
                     int xorbit;
                     int ctxno = ff_jpeg2000_getsgnctxno(t1->flags[(y + 1) * t1->stride + x + 1] & flags_mask,
                                                         &xorbit);
-                    t1->data[(y) * t1->stride + x] = (ff_mqc_decode(&t1->mqc,
-                                                    t1->mqc.cx_states + ctxno) ^
-                                      xorbit)
-                                     ? -mask : mask;
-                    ff_jpeg2000_set_significance(t1, x, y, t1->data[(y) * t1->stride + x] < 0);
+                    t1->data[(y) * t1->stride + x] |= (ff_mqc_decode(&t1->mqc, t1->mqc.cx_states + ctxno) ^ xorbit) << 31;
+                    t1->data[(y) * t1->stride + x] |= mask;
+                    ff_jpeg2000_set_significance(t1, x, y, t1->data[(y) * t1->stride + x] & INT32_MIN);
                 }
                 dec = 0;
                 t1->flags[(y + 1) * t1->stride + x + 1] &= ~JPEG2000_T1_VIS;
@@ -1991,9 +1992,9 @@  static void decode_clnpass(const Jpeg2000DecoderContext *s, Jpeg2000T1Context *t
 
 static int decode_cblk(const Jpeg2000DecoderContext *s, Jpeg2000CodingStyle *codsty,
                        Jpeg2000T1Context *t1, Jpeg2000Cblk *cblk,
-                       int width, int height, int bandpos, uint8_t roi_shift)
+                       int width, int height, int bandpos, uint8_t roi_shift, const int M_b)
 {
-    int passno = cblk->npasses, pass_t = 2, bpno = cblk->nonzerobits - 1;
+    int passno = cblk->npasses, pass_t = 2, bpno = cblk->nonzerobits - 1 + 31 - M_b - 1 - roi_shift;
     int pass_cnt = 0;
     int vert_causal_ctx_csty_symbol = codsty->cblk_style & JPEG2000_CBLK_VSC;
     int term_cnt = 0;
@@ -2068,22 +2069,25 @@  static int decode_cblk(const Jpeg2000DecoderContext *s, Jpeg2000CodingStyle *cod
         av_log(s->avctx, AV_LOG_WARNING, "Synthetic End of Stream Marker Read.\n");
     }
 
+    /* Reconstruct the sample values */
+    for (int y = 0; y < height; y++) {
+        for (int x = 0; x < width; x++) {
+            int32_t sign, n, val;
+            const uint32_t mask  = UINT32_MAX >> (M_b + 1); // bit mask for ROI detection
+
+            n = x + (y * t1->stride);
+            val = t1->data[n];
+            sign = val & INT32_MIN;
+            val &= INT32_MAX;
+            /* ROI shift, if necessary */
+            if (roi_shift && (((uint32_t)val & ~mask) == 0))
+                val <<= roi_shift;
+            t1->data[n] = val | sign; /* NOTE: Binary point for reconstruction value is located in 31 - M_b */
+        }
+    }
     return 1;
 }
 
-static inline int roi_shift_param(Jpeg2000Component *comp,
-                                   int quan_parameter)
-{
-    uint8_t roi_shift;
-    int val;
-    roi_shift = comp->roi_shift;
-    val = (quan_parameter < 0)?-quan_parameter:quan_parameter;
-
-    if (val > (1 << roi_shift))
-        return (quan_parameter < 0)?-(val >> roi_shift):(val >> roi_shift);
-    return quan_parameter;
-}
-
 /* TODO: Verify dequantization for lossless case
  * comp->data can be float or int
  * band->stepsize can be float or int
@@ -2093,50 +2097,86 @@  static inline int roi_shift_param(Jpeg2000Component *comp,
 /* Float dequantization of a codeblock.*/
 static void dequantization_float(int x, int y, Jpeg2000Cblk *cblk,
                                  Jpeg2000Component *comp,
-                                 Jpeg2000T1Context *t1, Jpeg2000Band *band)
+                                 Jpeg2000T1Context *t1, Jpeg2000Band *band, const int M_b)
 {
     int i, j;
     int w = cblk->coord[0][1] - cblk->coord[0][0];
+    const int downshift = 31 - M_b;
+    float fscale = band->f_stepsize;
+    fscale /= (float)(1 << downshift);
     for (j = 0; j < (cblk->coord[1][1] - cblk->coord[1][0]); ++j) {
         float *datap = &comp->f_data[(comp->coord[0][1] - comp->coord[0][0]) * (y + j) + x];
         int *src = t1->data + j*t1->stride;
-        for (i = 0; i < w; ++i)
-            datap[i] = src[i] * band->f_stepsize;
+        for (i = 0; i < w; ++i) {
+            int val = src[i];
+            if (val < 0) // Convert sign-magnitude to two's complement
+                val = -(val & INT32_MAX);
+            datap[i] = (float)val * fscale;
+        }
     }
 }
 
 /* Integer dequantization of a codeblock.*/
 static void dequantization_int(int x, int y, Jpeg2000Cblk *cblk,
                                Jpeg2000Component *comp,
-                               Jpeg2000T1Context *t1, Jpeg2000Band *band)
+                               Jpeg2000T1Context *t1, Jpeg2000Band *band, const int M_b)
 {
     int i, j;
+    const int downshift = 31 - M_b;
     int w = cblk->coord[0][1] - cblk->coord[0][0];
     for (j = 0; j < (cblk->coord[1][1] - cblk->coord[1][0]); ++j) {
         int32_t *datap = &comp->i_data[(comp->coord[0][1] - comp->coord[0][0]) * (y + j) + x];
         int *src = t1->data + j*t1->stride;
         if (band->i_stepsize == 32768) {
-            for (i = 0; i < w; ++i)
-                datap[i] = src[i] / 2;
+            for (i = 0; i < w; ++i) {
+                int val = src[i];
+                if (val < 0)  // Convert sign-magnitude to two's complement
+                    val = -((val & INT32_MAX) >> downshift);
+                else
+                    val >>= downshift;
+                datap[i] = val;
+            }
         } else {
             // This should be VERY uncommon
-            for (i = 0; i < w; ++i)
-                datap[i] = (src[i] * (int64_t)band->i_stepsize) / 65536;
+            for (i = 0; i < w; ++i) {
+                int val = src[i];
+                if (val < 0)  // Convert sign-magnitude to two's complement
+                    val = -((val & INT32_MAX) >> downshift);
+                else
+                    val >>= downshift;
+                datap[i] = (val * (int64_t)band->i_stepsize) / 65536;
+            }
         }
     }
 }
 
 static void dequantization_int_97(int x, int y, Jpeg2000Cblk *cblk,
                                Jpeg2000Component *comp,
-                               Jpeg2000T1Context *t1, Jpeg2000Band *band)
+                               Jpeg2000T1Context *t1, Jpeg2000Band *band, const int M_b)
 {
     int i, j;
     int w = cblk->coord[0][1] - cblk->coord[0][0];
+    float fscale = band->f_stepsize;
+    const int downshift = 31 - M_b;
+    const int PRESCALE = 6; // At least 6 is required to pass the conformance tests in ISO/IEC 15444-4
+    int scale;
+
+    fscale /= (float)(1 << downshift);
+    fscale *= (float)(1 << PRESCALE);
+    fscale *= (float)(1 << (16 + I_PRESHIFT));
+    scale = (int)(fscale + 0.5);
+    band->i_stepsize = scale;
     for (j = 0; j < (cblk->coord[1][1] - cblk->coord[1][0]); ++j) {
         int32_t *datap = &comp->i_data[(comp->coord[0][1] - comp->coord[0][0]) * (y + j) + x];
         int *src = t1->data + j*t1->stride;
-        for (i = 0; i < w; ++i)
-            datap[i] = (src[i] * (int64_t)band->i_stepsize + (1<<15)) >> 16;
+        for (i = 0; i < w; ++i) {
+            int val = src[i];
+            if (val < 0) // Convert sign-magnitude to two's complement
+                val = -(val & INT32_MAX);
+            // Shifting down to prevent overflow in dequantization
+            val = (val + (1 << (PRESCALE - 1))) >> PRESCALE;
+            datap[i] = RSHIFT(val * (int64_t)band->i_stepsize, 16);
+        }
     }
 }
 
@@ -2168,18 +2208,6 @@  static inline void mct_decode(const Jpeg2000DecoderContext *s, Jpeg2000Tile *til
     s->dsp.mct_decode[tile->codsty[0].transform](src[0], src[1], src[2], csize);
 }
 
-static inline void roi_scale_cblk(Jpeg2000Cblk *cblk,
-                                  Jpeg2000Component *comp,
-                                  Jpeg2000T1Context *t1)
-{
-    int i, j;
-    int w = cblk->coord[0][1] - cblk->coord[0][0];
-    for (j = 0; j < (cblk->coord[1][1] - cblk->coord[1][0]); ++j) {
-        int *src = t1->data + j*t1->stride;
-        for (i = 0; i < w; ++i)
-            src[i] = roi_shift_param(comp, src[i]);
-    }
-}
 
 static inline int tile_codeblocks(const Jpeg2000DecoderContext *s, Jpeg2000Tile *tile)
 {
@@ -2242,7 +2270,7 @@  static inline int tile_codeblocks(const Jpeg2000DecoderContext *s, Jpeg2000Tile
                             ret = decode_cblk(s, codsty, &t1, cblk,
                                               cblk->coord[0][1] - cblk->coord[0][0],
                                               cblk->coord[1][1] - cblk->coord[1][0],
-                                              bandpos, comp->roi_shift);
+                                              bandpos, comp->roi_shift, M_b);
 
                         if (ret)
                             coded = 1;
@@ -2251,14 +2279,12 @@  static inline int tile_codeblocks(const Jpeg2000DecoderContext *s, Jpeg2000Tile
                         x = cblk->coord[0][0] - band->coord[0][0];
                         y = cblk->coord[1][0] - band->coord[1][0];
 
-                        if (comp->roi_shift)
-                            roi_scale_cblk(cblk, comp, &t1);
                         if (codsty->transform == FF_DWT97)
-                            dequantization_float(x, y, cblk, comp, &t1, band);
+                            dequantization_float(x, y, cblk, comp, &t1, band, M_b);
                         else if (codsty->transform == FF_DWT97_INT)
-                            dequantization_int_97(x, y, cblk, comp, &t1, band);
+                            dequantization_int_97(x, y, cblk, comp, &t1, band, M_b);
                         else
-                            dequantization_int(x, y, cblk, comp, &t1, band);
+                            dequantization_int(x, y, cblk, comp, &t1, band, M_b);
                    } /* end cblk */
                 } /*end prec */
             } /* end band */
diff --git a/libavcodec/jpeg2000dwt.c b/libavcodec/jpeg2000dwt.c
index 34e33553f7..9ee8122658 100644
--- a/libavcodec/jpeg2000dwt.c
+++ b/libavcodec/jpeg2000dwt.c
@@ -39,13 +39,12 @@ 
 
 /* Lifting parameters in integer format.
  * Computed as param = (float param) * (1 << 16) */
-#define I_LFTG_ALPHA  103949ll
-#define I_LFTG_BETA     3472ll
-#define I_LFTG_GAMMA   57862ll
-#define I_LFTG_DELTA   29066ll
-#define I_LFTG_K       80621ll
-#define I_LFTG_X       53274ll
-#define I_PRESHIFT 8
+#define I_LFTG_ALPHA_PRIME   38413ll // = 103949 - 65536, (= alpha - 1.0)
+#define I_LFTG_BETA           3472ll
+#define I_LFTG_GAMMA         57862ll
+#define I_LFTG_DELTA         29066ll
+#define I_LFTG_K             80621ll
+#define I_LFTG_X             53274ll
 
 static inline void extend53(int *p, int i0, int i1)
 {
@@ -234,8 +233,11 @@  static void sd_1d97_int(int *p, int i0, int i1)
     extend97_int(p, i0, i1);
     i0++; i1++;
 
-    for (i = (i0>>1) - 2; i < (i1>>1) + 1; i++)
-        p[2 * i + 1] -= (I_LFTG_ALPHA * (p[2 * i]     + p[2 * i + 2]) + (1 << 15)) >> 16;
+    for (i = (i0>>1) - 2; i < (i1>>1) + 1; i++) {
+        const int64_t sum = p[2 * i] + p[2 * i + 2];
+        p[2 * i + 1] -= sum;
+        p[2 * i + 1] -= (I_LFTG_ALPHA_PRIME * sum + (1 << 15)) >> 16;
+    }
     for (i = (i0>>1) - 1; i < (i1>>1) + 1; i++)
         p[2 * i]     -= (I_LFTG_BETA  * (p[2 * i - 1] + p[2 * i + 1]) + (1 << 15)) >> 16;
     for (i = (i0>>1) - 1; i < (i1>>1); i++)
@@ -276,7 +278,7 @@  static void dwt_encode97_int(DWTContext *s, int *t)
 
             // copy back and deinterleave
             for (i =   mv; i < lv; i+=2, j++)
-                t[w*j + lp] = ((l[i] * I_LFTG_X) + (1 << 15)) >> 16;
+                t[w*j + lp] = l[i];
             for (i = 1-mv; i < lv; i+=2, j++)
                 t[w*j + lp] = l[i];
         }
@@ -293,7 +295,7 @@  static void dwt_encode97_int(DWTContext *s, int *t)
 
             // copy back and deinterleave
             for (i =   mh; i < lh; i+=2, j++)
-                t[w*lp + j] = ((l[i] * I_LFTG_X) + (1 << 15)) >> 16;
+                t[w*lp + j] = l[i];
             for (i = 1-mh; i < lh; i+=2, j++)
                 t[w*lp + j] = l[i];
         }
@@ -301,7 +303,7 @@  static void dwt_encode97_int(DWTContext *s, int *t)
     }
 
     for (i = 0; i < w * h; i++)
-        t[i] = (t[i] + ((1<<I_PRESHIFT)>>1)) >> I_PRESHIFT;
+        t[i] = (t[i] + ((1<<(I_PRESHIFT))>>1)) >> (I_PRESHIFT);
 }
 
 static void sr_1d53(unsigned *p, int i0, int i1)
@@ -471,8 +473,11 @@  static void sr_1d97_int(int32_t *p, int i0, int i1)
     for (i = (i0 >> 1); i < (i1 >> 1) + 1; i++)
         p[2 * i]     += (I_LFTG_BETA  * (p[2 * i - 1] + (int64_t)p[2 * i + 1]) + (1 << 15)) >> 16;
     /* step 6 */
-    for (i = (i0 >> 1); i < (i1 >> 1); i++)
-        p[2 * i + 1] += (I_LFTG_ALPHA * (p[2 * i]     + (int64_t)p[2 * i + 2]) + (1 << 15)) >> 16;
+    for (i = (i0 >> 1); i < (i1 >> 1); i++) {
+        const int64_t sum = p[2 * i] + (int64_t) p[2 * i + 2];
+        p[2 * i + 1] += sum;
+        p[2 * i + 1] += (I_LFTG_ALPHA_PRIME * sum + (1 << 15)) >> 16;
+    }
 }
 
 static void dwt_decode97_int(DWTContext *s, int32_t *t)
@@ -486,9 +491,6 @@  static void dwt_decode97_int(DWTContext *s, int32_t *t)
     /* position at index O of line range [0-5,w+5] cf. extend function */
     line += 5;
 
-    for (i = 0; i < w * h; i++)
-        data[i] *= 1LL << I_PRESHIFT;
-
     for (lev = 0; lev < s->ndeclevels; lev++) {
         int lh = s->linelen[lev][0],
             lv = s->linelen[lev][1],
@@ -500,9 +502,9 @@  static void dwt_decode97_int(DWTContext *s, int32_t *t)
         l = line + mh;
         for (lp = 0; lp < lv; lp++) {
             int i, j = 0;
-            // rescale with interleaving
+            // interleaving
             for (i = mh; i < lh; i += 2, j++)
-                l[i] = ((data[w * lp + j] * I_LFTG_K) + (1 << 15)) >> 16;
+                l[i] = data[w * lp + j];
             for (i = 1 - mh; i < lh; i += 2, j++)
                 l[i] = data[w * lp + j];
 
@@ -516,9 +518,9 @@  static void dwt_decode97_int(DWTContext *s, int32_t *t)
         l = line + mv;
         for (lp = 0; lp < lh; lp++) {
             int i, j = 0;
-            // rescale with interleaving
+            // interleaving
             for (i = mv; i < lv; i += 2, j++)
-                l[i] = ((data[w * j + lp] * I_LFTG_K) + (1 << 15)) >> 16;
+                l[i] = data[w * j + lp];
             for (i = 1 - mv; i < lv; i += 2, j++)
                 l[i] = data[w * j + lp];
 
@@ -530,7 +532,8 @@  static void dwt_decode97_int(DWTContext *s, int32_t *t)
     }
 
     for (i = 0; i < w * h; i++)
-        data[i] = (data[i] + ((1LL<<I_PRESHIFT)>>1)) >> I_PRESHIFT;
+        // We shift down by `I_PRESHIFT` because the input coefficients `datap[]` were shifted up by `I_PRESHIFT` to secure the precision
+        data[i] = (int32_t)(data[i] + ((1LL<<(I_PRESHIFT))>>1)) >> (I_PRESHIFT);
 }
 
 int ff_jpeg2000_dwt_init(DWTContext *s, int border[2][2],
diff --git a/libavcodec/jpeg2000dwt.h b/libavcodec/jpeg2000dwt.h
index 718d183ac1..62f0548ac8 100644
--- a/libavcodec/jpeg2000dwt.h
+++ b/libavcodec/jpeg2000dwt.h
@@ -32,6 +32,7 @@ 
 #define FF_DWT_MAX_DECLVLS 32 ///< max number of decomposition levels
 #define F_LFTG_K      1.230174104914001f
 #define F_LFTG_X      0.812893066115961f
+#define I_PRESHIFT 8
 
 enum DWTType {
     FF_DWT97,
diff --git a/libavcodec/jpeg2000htdec.c b/libavcodec/jpeg2000htdec.c
index c47c8d61fe..186a6873ac 100644
--- a/libavcodec/jpeg2000htdec.c
+++ b/libavcodec/jpeg2000htdec.c
@@ -1314,8 +1314,6 @@  ff_jpeg2000_decode_htj2k(const Jpeg2000DecoderContext *s, Jpeg2000CodingStyle *c
         jpeg2000_decode_magref_segment(width, height, quad_buf_width, Dref, Lref,
                                        pLSB - 1, sample_buf, block_states);
 
-    pLSB = 31 - M_b;
-
     /* Reconstruct the sample values */
     for (int y = 0; y < height; y++) {
         for (int x = 0; x < width; x++) {
@@ -1328,12 +1326,7 @@  ff_jpeg2000_decode_htj2k(const Jpeg2000DecoderContext *s, Jpeg2000CodingStyle *c
             /* ROI shift, if necessary */
             if (roi_shift && (((uint32_t)val & ~mask) == 0))
                 val <<= roi_shift;
-            /* Convert sign-magnitude to two's complement. */
-            if (sign)
-                val = -val;
-            /* Shift down to 1 bit upper from decimal point for reconstruction value (= 0.5) */
-            val >>= (pLSB - 1);
-            t1->data[n] = val;
+            t1->data[n] = val | sign; /* NOTE: Binary point for reconstruction value is located in 31 - M_b */
         }
     }
 free:
diff --git a/libavcodec/tests/jpeg2000dwt.c b/libavcodec/tests/jpeg2000dwt.c
index 520ecc05a3..9b26440ca8 100644
--- a/libavcodec/tests/jpeg2000dwt.c
+++ b/libavcodec/tests/jpeg2000dwt.c
@@ -46,6 +46,11 @@  static int test_dwt(int *array, int *ref, int border[2][2], int decomp_levels, i
         fprintf(stderr, "ff_dwt_encode failed\n");
         return 1;
     }
+    if (type == FF_DWT97_INT) {
+        // pre-scaling to simulate dequantization which places the binary point at 1 bit above from LSB
+        for (j = 0; j< s->linelen[decomp_levels-1][0] * s->linelen[decomp_levels-1][1]; j++)
+            array[j] <<= I_PRESHIFT;
+    }
     ret = ff_dwt_decode(s, array);
     if (ret < 0) {
         fprintf(stderr, "ff_dwt_encode failed\n");
diff --git a/tests/ref/fate/j2k-dwt b/tests/ref/fate/j2k-dwt
index 42415f00f9..9fbaaefd02 100644
--- a/tests/ref/fate/j2k-dwt
+++ b/tests/ref/fate/j2k-dwt
@@ -1,60 +1,60 @@ 
 5/3i, decomp:15 border 151 170 140 183 milli-err2:        0
-9/7i, decomp:15 border 151 170 140 183 milli-err2:      544
+9/7i, decomp:15 border 151 170 140 183 milli-err2:      112
 9/7f, decomp:15 border 151 170 140 183 err2:               0.000
 5/3i, decomp:21 border 173 201  81 189 milli-err2:        0
-9/7i, decomp:21 border 173 201  81 189 milli-err2:      592
+9/7i, decomp:21 border 173 201  81 189 milli-err2:      109
 9/7f, decomp:21 border 173 201  81 189 err2:               0.000
 5/3i, decomp:22 border 213 227  76 245 milli-err2:        0
-9/7i, decomp:22 border 213 227  76 245 milli-err2:      533
+9/7i, decomp:22 border 213 227  76 245 milli-err2:      115
 9/7f, decomp:22 border 213 227  76 245 err2:               0.000
 5/3i, decomp:13 border 134 157 184 203 milli-err2:        0
-9/7i, decomp:13 border 134 157 184 203 milli-err2:      535
+9/7i, decomp:13 border 134 157 184 203 milli-err2:      109
 9/7f, decomp:13 border 134 157 184 203 err2:               0.000
 5/3i, decomp: 1 border 204 237   6 106 milli-err2:        0
-9/7i, decomp: 1 border 204 237   6 106 milli-err2:      219
+9/7i, decomp: 1 border 204 237   6 106 milli-err2:       95
 9/7f, decomp: 1 border 204 237   6 106 err2:               0.000
 5/3i, decomp:28 border  76 211  13 210 milli-err2:        0
-9/7i, decomp:28 border  76 211  13 210 milli-err2:      791
+9/7i, decomp:28 border  76 211  13 210 milli-err2:      117
 9/7f, decomp:28 border  76 211  13 210 err2:               0.000
 5/3i, decomp:21 border  76  99  43 123 milli-err2:        0
-9/7i, decomp:21 border  76  99  43 123 milli-err2:      686
+9/7i, decomp:21 border  76  99  43 123 milli-err2:      104
 9/7f, decomp:21 border  76  99  43 123 err2:               0.000
 5/3i, decomp:15 border 192 243 174 204 milli-err2:        0
-9/7i, decomp:15 border 192 243 174 204 milli-err2:      476
+9/7i, decomp:15 border 192 243 174 204 milli-err2:      122
 9/7f, decomp:15 border 192 243 174 204 err2:               0.000
 5/3i, decomp:21 border  17  68  93 204 milli-err2:        0
-9/7i, decomp:21 border  17  68  93 204 milli-err2:      633
+9/7i, decomp:21 border  17  68  93 204 milli-err2:      124
 9/7f, decomp:21 border  17  68  93 204 err2:               0.000
 5/3i, decomp:11 border 142 168  82 174 milli-err2:        0
-9/7i, decomp:11 border 142 168  82 174 milli-err2:      696
+9/7i, decomp:11 border 142 168  82 174 milli-err2:      115
 9/7f, decomp:11 border 142 168  82 174 err2:               0.000
 5/3i, decomp:23 border 142 209 171 235 milli-err2:        0
-9/7i, decomp:23 border 142 209 171 235 milli-err2:      626
+9/7i, decomp:23 border 142 209 171 235 milli-err2:      120
 9/7f, decomp:23 border 142 209 171 235 err2:               0.000
 5/3i, decomp:30 border  37 185  79 245 milli-err2:        0
-9/7i, decomp:30 border  37 185  79 245 milli-err2:      953
+9/7i, decomp:30 border  37 185  79 245 milli-err2:      116
 9/7f, decomp:30 border  37 185  79 245 err2:               0.000
 5/3i, decomp: 5 border 129 236  30 243 milli-err2:        0
-9/7i, decomp: 5 border 129 236  30 243 milli-err2:      620
+9/7i, decomp: 5 border 129 236  30 243 milli-err2:      117
 9/7f, decomp: 5 border 129 236  30 243 err2:               0.000
 5/3i, decomp:10 border   5 160 146 247 milli-err2:        0
-9/7i, decomp:10 border   5 160 146 247 milli-err2:      797
+9/7i, decomp:10 border   5 160 146 247 milli-err2:      117
 9/7f, decomp:10 border   5 160 146 247 err2:               0.000
 5/3i, decomp: 5 border 104 162   6  47 milli-err2:        0
-9/7i, decomp: 5 border 104 162   6  47 milli-err2:      603
+9/7i, decomp: 5 border 104 162   6  47 milli-err2:      120
 9/7f, decomp: 5 border 104 162   6  47 err2:               0.000
 5/3i, decomp:24 border  78 250 102 218 milli-err2:        0
-9/7i, decomp:24 border  78 250 102 218 milli-err2:      836
+9/7i, decomp:24 border  78 250 102 218 milli-err2:      113
 9/7f, decomp:24 border  78 250 102 218 err2:               0.000
 5/3i, decomp:28 border  86  98  56  79 milli-err2:        0
-9/7i, decomp:28 border  86  98  56  79 milli-err2:      597
+9/7i, decomp:28 border  86  98  56  79 milli-err2:      115
 9/7f, decomp:28 border  86  98  56  79 err2:               0.000
 5/3i, decomp: 6 border  95 238 197 214 milli-err2:        0
-9/7i, decomp: 6 border  95 238 197 214 milli-err2:      478
+9/7i, decomp: 6 border  95 238 197 214 milli-err2:      114
 9/7f, decomp: 6 border  95 238 197 214 err2:               0.000
 5/3i, decomp:17 border  77 169  93 165 milli-err2:        0
-9/7i, decomp:17 border  77 169  93 165 milli-err2:      616
+9/7i, decomp:17 border  77 169  93 165 milli-err2:      124
 9/7f, decomp:17 border  77 169  93 165 err2:               0.000
 5/3i, decomp:22 border 178 187   7 119 milli-err2:        0
-9/7i, decomp:22 border 178 187   7 119 milli-err2:      392
+9/7i, decomp:22 border 178 187   7 119 milli-err2:       96
 9/7f, decomp:22 border 178 187   7 119 err2:               0.000
diff --git a/tests/ref/fate/jpeg2000-dcinema b/tests/ref/fate/jpeg2000-dcinema
index cdf8cd4fc6..223d753580 100644
--- a/tests/ref/fate/jpeg2000-dcinema
+++ b/tests/ref/fate/jpeg2000-dcinema
@@ -3,5 +3,5 @@ 
 #codec_id 0: rawvideo
 #dimensions 0: 1920x1080
 #sar 0: 1/1
-0,          0,          0,        1, 12441600, 0xfcf6a127
-0,          1,          1,        1, 12441600, 0x577b6a64
+0,          0,          0,        1, 12441600, 0x0cf44be1
+0,          1,          1,        1, 12441600, 0xefe54482
diff --git a/tests/ref/fate/jpeg2000dec-p0_04 b/tests/ref/fate/jpeg2000dec-p0_04
index 5de7880c44..4c3ec322e4 100644
--- a/tests/ref/fate/jpeg2000dec-p0_04
+++ b/tests/ref/fate/jpeg2000dec-p0_04
@@ -3,4 +3,4 @@ 
 #codec_id 0: rawvideo
 #dimensions 0: 640x480
 #sar 0: 0/1
-0,          0,          0,        1,   921600, 0x097d9665
+0,          0,          0,        1,   921600, 0x38311bba
diff --git a/tests/ref/fate/jpeg2000dec-p0_05 b/tests/ref/fate/jpeg2000dec-p0_05
index bb215043a1..410a08a62c 100644
--- a/tests/ref/fate/jpeg2000dec-p0_05
+++ b/tests/ref/fate/jpeg2000dec-p0_05
@@ -3,4 +3,4 @@ 
 #codec_id 0: rawvideo
 #dimensions 0: 1024x1024
 #sar 0: 0/1
-0,          0,          0,        1,  2621440, 0x081f5048
+0,          0,          0,        1,  2621440, 0x9608ad8b
diff --git a/tests/ref/fate/jpeg2000dec-p0_09 b/tests/ref/fate/jpeg2000dec-p0_09
index 1755e7cc7d..ff78bf9dc7 100644
--- a/tests/ref/fate/jpeg2000dec-p0_09
+++ b/tests/ref/fate/jpeg2000dec-p0_09
@@ -3,4 +3,4 @@ 
 #codec_id 0: rawvideo
 #dimensions 0: 17x37
 #sar 0: 0/1
-0,          0,          0,        1,      629, 0x5c9c389d
+0,          0,          0,        1,      629, 0xf35d38d6
diff --git a/tests/ref/vsynth/vsynth1-jpeg2000-97 b/tests/ref/vsynth/vsynth1-jpeg2000-97
index c979ab5c36..820249c6c0 100644
--- a/tests/ref/vsynth/vsynth1-jpeg2000-97
+++ b/tests/ref/vsynth/vsynth1-jpeg2000-97
@@ -1,4 +1,4 @@ 
-5e6d32b7205d31245b0d1f015d08b515 *tests/data/fate/vsynth1-jpeg2000-97.avi
-3643886 tests/data/fate/vsynth1-jpeg2000-97.avi
-a2262f1da2f49bc196b780a6b47ec4e8 *tests/data/fate/vsynth1-jpeg2000-97.out.rawvideo
-stddev:    4.23 PSNR: 35.59 MAXDIFF:   53 bytes:  7603200/  7603200
+803c2e8a4d054c5d603eed4c77abe492 *tests/data/fate/vsynth1-jpeg2000-97.avi
+4466514 tests/data/fate/vsynth1-jpeg2000-97.avi
+c9cf5a4580f10b00056c8d8731d21395 *tests/data/fate/vsynth1-jpeg2000-97.out.rawvideo
+stddev:    3.82 PSNR: 36.49 MAXDIFF:   49 bytes:  7603200/  7603200
diff --git a/tests/ref/vsynth/vsynth2-jpeg2000-97 b/tests/ref/vsynth/vsynth2-jpeg2000-97
index 591f8b6bb3..e40dca5b9a 100644
--- a/tests/ref/vsynth/vsynth2-jpeg2000-97
+++ b/tests/ref/vsynth/vsynth2-jpeg2000-97
@@ -1,4 +1,4 @@ 
-aa5573136c54b1855d8d00efe2a149bd *tests/data/fate/vsynth2-jpeg2000-97.avi
-2464134 tests/data/fate/vsynth2-jpeg2000-97.avi
-1f63c8b065e847e4c63d57ce23442ea8 *tests/data/fate/vsynth2-jpeg2000-97.out.rawvideo
-stddev:    3.21 PSNR: 37.99 MAXDIFF:   26 bytes:  7603200/  7603200
+c189c8b89c7aee3ab4f4a5aafdf7568f *tests/data/fate/vsynth2-jpeg2000-97.avi
+3225460 tests/data/fate/vsynth2-jpeg2000-97.avi
+4c0fbd7af969085d19dfabeb9634cddb *tests/data/fate/vsynth2-jpeg2000-97.out.rawvideo
+stddev:    2.55 PSNR: 39.98 MAXDIFF:   22 bytes:  7603200/  7603200
diff --git a/tests/ref/vsynth/vsynth3-jpeg2000-97 b/tests/ref/vsynth/vsynth3-jpeg2000-97
index 5d9d083791..92376e64cf 100644
--- a/tests/ref/vsynth/vsynth3-jpeg2000-97
+++ b/tests/ref/vsynth/vsynth3-jpeg2000-97
@@ -1,4 +1,4 @@ 
-522e12684aca4262a9d613cb2db7006c *tests/data/fate/vsynth3-jpeg2000-97.avi
-85526 tests/data/fate/vsynth3-jpeg2000-97.avi
-8def36ad1413ab3a5c2af2e1af4603f9 *tests/data/fate/vsynth3-jpeg2000-97.out.rawvideo
-stddev:    4.51 PSNR: 35.04 MAXDIFF:   47 bytes:    86700/    86700
+943cbdefa18b4a83175943f4e81e037c *tests/data/fate/vsynth3-jpeg2000-97.avi
+95642 tests/data/fate/vsynth3-jpeg2000-97.avi
+c4d58f0da2e8be602f54f032b58a581b *tests/data/fate/vsynth3-jpeg2000-97.out.rawvideo
+stddev:    4.11 PSNR: 35.84 MAXDIFF:   46 bytes:    86700/    86700
diff --git a/tests/ref/vsynth/vsynth_lena-jpeg2000-97 b/tests/ref/vsynth/vsynth_lena-jpeg2000-97
index 0539300185..dd2819b8cb 100644
--- a/tests/ref/vsynth/vsynth_lena-jpeg2000-97
+++ b/tests/ref/vsynth/vsynth_lena-jpeg2000-97
@@ -1,4 +1,4 @@ 
-80fe872c8afaad914da6ef037957d93b *tests/data/fate/vsynth_lena-jpeg2000-97.avi
-1937216 tests/data/fate/vsynth_lena-jpeg2000-97.avi
-1b97333a8dc115a5ba609b0070d89d4d *tests/data/fate/vsynth_lena-jpeg2000-97.out.rawvideo
-stddev:    2.82 PSNR: 39.10 MAXDIFF:   24 bytes:  7603200/  7603200
+9e2f5705be9d08494530724b625e17a4 *tests/data/fate/vsynth_lena-jpeg2000-97.avi
+2599714 tests/data/fate/vsynth_lena-jpeg2000-97.avi
+ab207505ec9c8a16bb45621404199e5c *tests/data/fate/vsynth_lena-jpeg2000-97.out.rawvideo
+stddev:    2.23 PSNR: 41.16 MAXDIFF:   20 bytes:  7603200/  7603200