diff mbox series

[FFmpeg-devel,2/2] avcodec/jpeg2000dec: Add support for High Throughput HTJ2K decoding.

Message ID 20221215190033.19251-1-etemesicaleb@gmail.com
State New
Headers show
Series [FFmpeg-devel,1/2] avcodec/jpeg2000dec: Move decoder structs to header files | expand

Checks

Context Check Description
yinshiyou/make_loongarch64 success Make finished
yinshiyou/make_fate_loongarch64 success Make fate finished
andriy/make_x86 success Make finished
andriy/make_fate_x86 success Make fate finished

Commit Message

Caleb Etemesi Dec. 15, 2022, 7 p.m. UTC
From: caleb <etemesicaleb@gmail.com>

This is a revised patch with suggested changes from earlier and satisfies the Google Summer of
Code 2022 FFmpeg project to add a HTJ2K decoder to FFmpeg.

---
 libavcodec/Makefile        |    2 +-
 libavcodec/jpeg2000.h      |    3 +
 libavcodec/jpeg2000dec.c   |   69 +-
 libavcodec/jpeg2000dec.h   |    2 +
 libavcodec/jpeg2000htdec.c | 1441 ++++++++++++++++++++++++++++++++++++
 libavcodec/jpeg2000htdec.h |   28 +
 6 files changed, 1529 insertions(+), 16 deletions(-)
 create mode 100644 libavcodec/jpeg2000htdec.c
 create mode 100644 libavcodec/jpeg2000htdec.h

Comments

Andreas Rheinhardt Dec. 16, 2022, 12:29 a.m. UTC | #1
etemesicaleb@gmail.com:
> From: caleb <etemesicaleb@gmail.com>
> 
> This is a revised patch with suggested changes from earlier and satisfies the Google Summer of
> Code 2022 FFmpeg project to add a HTJ2K decoder to FFmpeg.
> 
> ---
>  libavcodec/Makefile        |    2 +-
>  libavcodec/jpeg2000.h      |    3 +
>  libavcodec/jpeg2000dec.c   |   69 +-
>  libavcodec/jpeg2000dec.h   |    2 +
>  libavcodec/jpeg2000htdec.c | 1441 ++++++++++++++++++++++++++++++++++++
>  libavcodec/jpeg2000htdec.h |   28 +
>  6 files changed, 1529 insertions(+), 16 deletions(-)
>  create mode 100644 libavcodec/jpeg2000htdec.c
>  create mode 100644 libavcodec/jpeg2000htdec.h
> 
> diff --git a/libavcodec/Makefile b/libavcodec/Makefile
> index 98841ed07c..1ba9e09528 100644
> --- a/libavcodec/Makefile
> +++ b/libavcodec/Makefile
> @@ -464,7 +464,7 @@ OBJS-$(CONFIG_JACOSUB_DECODER)         += jacosubdec.o ass.o
>  OBJS-$(CONFIG_JPEG2000_ENCODER)        += j2kenc.o mqcenc.o mqc.o jpeg2000.o \
>                                            jpeg2000dwt.o
>  OBJS-$(CONFIG_JPEG2000_DECODER)        += jpeg2000dec.o jpeg2000.o jpeg2000dsp.o \
> -                                          jpeg2000dwt.o mqcdec.o mqc.o
> +                                          jpeg2000dwt.o mqcdec.o mqc.o jpeg2000htdec.o
>  OBJS-$(CONFIG_JPEGLS_DECODER)          += jpeglsdec.o jpegls.o
>  OBJS-$(CONFIG_JPEGLS_ENCODER)          += jpeglsenc.o jpegls.o
>  OBJS-$(CONFIG_JV_DECODER)              += jvdec.o
> diff --git a/libavcodec/jpeg2000.h b/libavcodec/jpeg2000.h
> index e5ecb4cbf9..b2a8e13244 100644
> --- a/libavcodec/jpeg2000.h
> +++ b/libavcodec/jpeg2000.h
> @@ -189,6 +189,9 @@ typedef struct Jpeg2000Cblk {
>      Jpeg2000Pass *passes;
>      Jpeg2000Layer *layers;
>      int coord[2][2]; // border coordinates {{x0, x1}, {y0, y1}}
> +    /*HTJ2K settings */
> +    int zbp;
> +    int pass_lengths[2];
>  } Jpeg2000Cblk; // code block
>  
>  typedef struct Jpeg2000Prec {
> diff --git a/libavcodec/jpeg2000dec.c b/libavcodec/jpeg2000dec.c
> index 7fc09cb558..5c92baa88e 100644
> --- a/libavcodec/jpeg2000dec.c
> +++ b/libavcodec/jpeg2000dec.c
> @@ -43,6 +43,7 @@
>  #include "jpeg2000dsp.h"
>  #include "profiles.h"
>  #include "jpeg2000dec.h"
> +#include "jpeg2000htdec.h"
>  
>  /* get_bits functions for JPEG2000 packet bitstream
>   * It is a get_bit function with a bit-stuffing routine. If the value of the
> @@ -428,12 +429,13 @@ static int get_cox(Jpeg2000DecoderContext *s, Jpeg2000CodingStyle *c)
>      c->cblk_style = bytestream2_get_byteu(&s->g);
>      if (c->cblk_style != 0) { // cblk style
>          if (c->cblk_style & JPEG2000_CTSY_HTJ2K_M || c->cblk_style & JPEG2000_CTSY_HTJ2K_F) {
> -            av_log(s->avctx, AV_LOG_ERROR, "Support for High throughput JPEG 2000 is not yet available\n");
> -            return AVERROR_PATCHWELCOME;
> +            av_log(s->avctx,AV_LOG_TRACE,"High Throughput jpeg 2000 codestream.\n");
> +            s->is_htj2k = 1;
> +        } else {
> +            av_log(s->avctx, AV_LOG_WARNING, "extra cblk styles %X\n", c->cblk_style);
> +            if (c->cblk_style & JPEG2000_CBLK_BYPASS)
> +                av_log(s->avctx, AV_LOG_WARNING, "Selective arithmetic coding bypass\n");
>          }
> -        av_log(s->avctx, AV_LOG_WARNING, "extra cblk styles %X\n", c->cblk_style);
> -        if (c->cblk_style & JPEG2000_CBLK_BYPASS)
> -            av_log(s->avctx, AV_LOG_WARNING, "Selective arithmetic coding bypass\n");
>      }
>      c->transform = bytestream2_get_byteu(&s->g); // DWT transformation type
>      /* set integer 9/7 DWT in case of BITEXACT flag */
> @@ -1058,13 +1060,15 @@ static int jpeg2000_decode_packet(Jpeg2000DecoderContext *s, Jpeg2000Tile *tile,
>                  return incl;
>  
>              if (!cblk->npasses) {
> -                int v = expn[bandno] + numgbits - 1 -
> -                        tag_tree_decode(s, prec->zerobits + cblkno, 100);
> +                int zbp = tag_tree_decode(s,prec->zerobits + cblkno, 100);
> +                int v = expn[bandno] + numgbits - 1 - zbp;
> +
>                  if (v < 0 || v > 30) {
>                      av_log(s->avctx, AV_LOG_ERROR,
>                             "nonzerobits %d invalid or unsupported\n", v);
>                      return AVERROR_INVALIDDATA;
>                  }
> +                cblk->zbp = zbp;
>                  cblk->nonzerobits = v;
>              }
>              if ((newpasses = getnpasses(s)) < 0)
> @@ -1105,8 +1109,29 @@ static int jpeg2000_decode_packet(Jpeg2000DecoderContext *s, Jpeg2000Tile *tile,
>                      }
>                  }
>  
> -                if ((ret = get_bits(s, av_log2(newpasses1) + cblk->lblock)) < 0)
> -                    return ret;
> +                if (newpasses > 1 && s->is_htj2k) {
> +                    // Retrieve pass lengths for each pass
> +                    int href_passes =  (cblk->npasses + newpasses - 1) % 3;
> +                    int segment_passes = newpasses - href_passes;
> +                    int pass_bound = 2;
> +                    int eb = 0;
> +                    int extra_bit = newpasses > 2 ? 1 : 0;
> +                    while (pass_bound <=segment_passes) {
> +                        eb++;
> +                        pass_bound +=pass_bound;
> +                    }
> +                    if ((ret = get_bits(s, llen + eb + 3)) < 0)
> +                        return ret;
> +                    cblk->pass_lengths[0] = ret;
> +                    if ((ret = get_bits(s, llen + 3 + extra_bit)) < 0)
> +                        return ret;
> +                    cblk->pass_lengths[1] = ret;
> +                    ret = cblk->pass_lengths[0] + cblk->pass_lengths[1];
> +                } else {
> +                    if ((ret = get_bits(s, av_log2(newpasses1) + cblk->lblock)) < 0)
> +                        return ret;
> +                    cblk->pass_lengths[0]=ret;
> +                }
>                  if (ret > cblk->data_allocated) {
>                      size_t new_size = FFMAX(2*cblk->data_allocated, ret);
>                      void *new = av_realloc(cblk->data, new_size);
> @@ -1857,7 +1882,10 @@ static inline void tile_codeblocks(const Jpeg2000DecoderContext *s, Jpeg2000Tile
>      for (compno = 0; compno < s->ncomponents; compno++) {
>          Jpeg2000Component *comp     = tile->comp + compno;
>          Jpeg2000CodingStyle *codsty = tile->codsty + compno;
> +        Jpeg2000QuantStyle *quantsty= tile->qntsty + compno;
> +
>          int coded = 0;
> +        int subbandno = 0;
>  
>          t1.stride = (1<<codsty->log2_cblk_width) + 2;
>  
> @@ -1865,7 +1893,7 @@ static inline void tile_codeblocks(const Jpeg2000DecoderContext *s, Jpeg2000Tile
>          for (reslevelno = 0; reslevelno < codsty->nreslevels2decode; reslevelno++) {
>              Jpeg2000ResLevel *rlevel = comp->reslevel + reslevelno;
>              /* Loop on bands */
> -            for (bandno = 0; bandno < rlevel->nbands; bandno++) {
> +            for (bandno = 0; bandno < rlevel->nbands; bandno++, subbandno++) {
>                  int nb_precincts, precno;
>                  Jpeg2000Band *band = rlevel->band + bandno;
>                  int cblkno = 0, bandpos;
> @@ -1885,12 +1913,23 @@ static inline void tile_codeblocks(const Jpeg2000DecoderContext *s, Jpeg2000Tile
>                      for (cblkno = 0;
>                           cblkno < prec->nb_codeblocks_width * prec->nb_codeblocks_height;
>                           cblkno++) {
> -                        int x, y;
> +                        int x, y, ret;
> +                        // Annex E (Equation E-2) ISO/IEC 15444-1:2019
> +                        int magp = quantsty->expn[subbandno] + quantsty->nguardbits - 1;
> +
>                          Jpeg2000Cblk *cblk = prec->cblk + cblkno;
> -                        int ret = decode_cblk(s, codsty, &t1, cblk,
> -                                    cblk->coord[0][1] - cblk->coord[0][0],
> -                                    cblk->coord[1][1] - cblk->coord[1][0],
> -                                    bandpos, comp->roi_shift);
> +
> +                        if (s->is_htj2k)
> +                            ret = ff_jpeg2000_decode_htj2k(s, codsty, &t1, cblk,
> +                                                           cblk->coord[0][1] - cblk->coord[0][0],
> +                                                           cblk->coord[1][1] - cblk->coord[1][0],
> +                                                           magp, comp->roi_shift);
> +                        else
> +                            ret = decode_cblk(s, codsty, &t1, cblk,
> +                                              cblk->coord[0][1] - cblk->coord[0][0],
> +                                              cblk->coord[1][1] - cblk->coord[1][0],
> +                                              bandpos, comp->roi_shift);
> +
>                          if (ret)
>                              coded = 1;
>                          else
> diff --git a/libavcodec/jpeg2000dec.h b/libavcodec/jpeg2000dec.h
> index b6410c1432..9728529026 100644
> --- a/libavcodec/jpeg2000dec.h
> +++ b/libavcodec/jpeg2000dec.h
> @@ -121,6 +121,8 @@ typedef struct Jpeg2000DecoderContext {
>  
>      /*options parameters*/
>      int             reduction_factor;
> +    /*HTJ2K params*/
> +    uint8_t         is_htj2k;
>  } Jpeg2000DecoderContext;
>  
>  #endif //AVCODEC_JPEG2000DEC_H
> diff --git a/libavcodec/jpeg2000htdec.c b/libavcodec/jpeg2000htdec.c
> new file mode 100644
> index 0000000000..8ea901855f
> --- /dev/null
> +++ b/libavcodec/jpeg2000htdec.c
> @@ -0,0 +1,1441 @@
> +/*
> + * Copyright (c) 2022 Caleb Etemesi <etemesicaleb@gmail.com>
> + *
> + * This file is part of FFmpeg.
> + *
> + * FFmpeg is free software; you can redistribute it and/or
> + * modify it under the terms of the GNU Lesser General Public
> + * License as published by the Free Software Foundation; either
> + * version 2.1 of the License, or (at your option) any later version.
> + *
> + * FFmpeg is distributed in the hope that it will be useful,
> + * but WITHOUT ANY WARRANTY; without even the implied warranty of
> + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
> + * Lesser General Public License for more details.
> + *
> + * You should have received a copy of the GNU Lesser General Public
> + * License along with FFmpeg; if not, write to the Free Software
> + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
> + */
> +#include "jpeg2000htdec.h"
> +#include "jpeg2000.h"
> +#include "jpeg2000dec.h"
> +#include <stdint.h>
> +#include <libavutil/attributes.h>
> +#include <libavutil/common.h>
> +#include <libavutil/avassert.h>

Use "" instead of <>.

> +
> +#define J2K_Q1 0
> +
> +#define J2K_Q2 1
> +
> +#define HT_SHIFT_SIGMA 0
> +#define HT_SHIFT_SCAN 4
> +#define HT_SHIFT_REF 3
> +#define HT_SHIFT_REF_IND 2
> +/**
> + * @brief Table 2 in clause 7.3.3
> + */
> +const static uint8_t MEL_E[13] = {0, 0, 0, 1, 1, 1, 2, 2, 2, 3, 3, 4, 5};
> +// Decode tables, found at the end of this
> +static const uint16_t dec_CxtVLC_table1[1024];
> +static const uint16_t dec_CxtVLC_table0[1024];
> +
> +typedef struct StateVars {
> +    int32_t pos;
> +    uint32_t bits;
> +    uint32_t tmp;
> +    uint32_t last;
> +    uint8_t bits_left;
> +    uint64_t bit_buf;
> +} StateVars;
> +

I really don't like that you are adding a new bitreader here. Can you
explain why this is supposed to be necessary?

> +typedef struct MelDecoderState {
> +    uint8_t k;
> +    uint8_t run;
> +    uint8_t one;
> +} MelDecoderState;
> +
> +/**
> + *  Given a precomputed c, checks whether n % d == 0
> + *
> + *  precomputed c for d is created by the precompute_c function
> + */
> +static av_always_inline uint32_t is_divisible(uint32_t n, uint64_t c)
> +{
> +    return n * c <= c - 1;
> +}
> +/**
> + * Precompute a number c given d that when combined with a dividend n
> + * returns whether n % d == 0
> + *
> + * Combined with is_divisible function
> + */
> +static av_always_inline uint64_t precompute_c(uint32_t divisor)
> +{
> +
> +    return 1 + (0xfffffffffffffffful / divisor);
> +}
> +/* Initializers */
> +
> +static void jpeg2000_init_zero(StateVars *s)
> +{
> +    s->bits_left = 0;
> +    s->bit_buf   = 0;
> +    s->tmp       = 0;
> +    s->bits      = 0;
> +    s->pos       = 0;
> +    s->last      = 0;
> +}
> +
> +static void jpeg2000_init_mel(StateVars *s, uint32_t Pcup)
> +{
> +    jpeg2000_init_zero(s);
> +    s->pos = Pcup;
> +}
> +
> +static void jpeg2000_init_mag_ref(StateVars *s, uint32_t Lref)
> +{
> +    s->pos       = Lref - 2;
> +    s->bits      = 0;
> +    s->last      = 0xFF;
> +    s->tmp       = 0;
> +    s->bits_left = 0;
> +    s->bit_buf   = 0;
> +}
> +
> +static void jpeg2000_init_mel_decoder(MelDecoderState *mel_state)
> +{
> +    mel_state->k   = 0;
> +    mel_state->run = 0;
> +    mel_state->one = 0;
> +}
> +
> +/**
> + * @brief Refill the buffer backwards in little Endian while skipping
> + * over stuffing bits
> + *
> + * Stuffing bits are those that appear in the position of any byte whose
> + * LSBs are all 1's if the last consumed byte was larger than 0x8F
> + */
> +static int jpeg2000_bitbuf_refill_backwards(StateVars *buffer,
> +                                            const uint8_t *array)
> +{
> +    uint64_t tmp = 0;
> +    int32_t position = buffer->pos;
> +    uint32_t new_bits = 32;
> +    uint32_t mask;
> +
> +    if (buffer->bits_left > 32)
> +        return 0; // enough data, no need to pull in more bits
> +
> +    /*
> +     * We are reading bits from end to start, and we need to handle them in LE.
> +     * therefore, if we read bytes ABCD, we need to convert
> +     * them to DCBA, but the bitstream is constructed in such a way that it is
> +     * BE when reading from back to front,so we need to swap bytes
> +     * but this doesn't work when position is less than 3,
> +     * we end up reading bits from the MEL byte-stream which
> +     * is a recipe for sleepless nights.
> +     *
> +     * So the trick is to branchlessly read and mask
> +     * depending on whatever was the initial position,
> +     * the mask is simply either 32 bits, 24 bits ,8 bits or 0 bits.
> +     * depending on how many bytes are available, with this, we don't
> +     * read past the end of the buffer,we mask
> +     * bits we already read ensuring we don't read twice.
> +     * and we can do it branchlessly without checking for positions.

Generally, it is not ok to read array[-1] even if the result is that is
masked away later. It is ok if array actually points into the middle of
a buffer, so that array[-1] and the other bytes accessed are actually
part of the buffer. Your remark about MEL bytestream indicates that this
might be the case here, but I'm not sure.

> +     *
> +     * We need to watch out for negative shift values which are UB in C hence
> +     * the MAX declarative.
> +     */
> +
> +    position -= 4;
> +    mask = (UINT64_C(1) << (FFMIN(4, buffer->pos + 1)) * 8) - 1;

In case buffer->pos + 1 < 4, position + 1 < 0 and therefore the next
lines will read array[-1]. And given that you are using FFMIN, there is
an implicit branch anyway. I suggest you use AV_RB32 to read only from a
position that is known to be in the buffer and then shift unwanted bytes
away.

> +    tmp  = array[position + 1] * (1ull << 24);

(uint32_t)array[position + 1] << 24 works as well and avoids the 64bit
intermediate and the multiplication hack.

> +    tmp |= array[position + 2] << 16;
> +    tmp |= array[position + 3] << 8;
> +    tmp |= array[position + 4];
> +    tmp &= mask;
> +
> +    // Branchlessly unstuff  bits

Branchlessly? I see four branches below.

> +
> +    // load temporary byte, which preceeds the position we
> +    // currently at, to ensure that we can also un-stuff if the
> +    // stuffed bit is the bottom most bits
> +    tmp <<= 8;
> +    tmp |= array[buffer->pos + 1];
> +
> +    if ((tmp & 0x7FFF000000) > 0x7F8F000000) {
> +        tmp &= 0x7FFFFFFFFF;
> +        new_bits--;
> +    }
> +    if ((tmp & 0x007FFF0000) > 0x007F8F0000) {
> +        tmp = (tmp & 0x007FFFFFFF) + ((tmp & 0xFF00000000) >> 1);
> +        new_bits--;
> +    }
> +    if ((tmp & 0x00007FFF00) > 0x00007F8F00) {
> +        tmp = (tmp & 0x00007FFFFF) + ((tmp & 0xFFFF000000) >> 1);
> +        new_bits--;
> +    }
> +    if ((tmp & 0x0000007FFF) > 0x0000007F8F) {
> +        tmp = (tmp & 0x0000007FFF) + ((tmp & 0xFFFFFF0000) >> 1);
> +        new_bits--;
> +    }
> +    /* remove temporary byte loaded. */
> +    tmp >>= 8;
> +
> +    /* Add bits to the MSB of the bit buffer */
> +    buffer->bit_buf |= tmp << buffer->bits_left;
> +    buffer->bits_left += new_bits;
> +    buffer->pos = FFMAX(0, position);
> +    return 0;
> +}
> +
> +/**
> + * @brief Refill the bit-buffer reading new bits going forward
> + * in the stream while skipping over stuffed bits.
> + */
> +static void jpeg2000_bitbuf_refill_forward(StateVars *buffer,
> +                                           const uint8_t *array,
> +                                           uint32_t length)
> +{
> +    while (buffer->bits_left < 32) {
> +        buffer->tmp = 0xFF;
> +        buffer->bits = (buffer->last == 0xFF) ? 7 : 8;
> +        if (buffer->pos <= length) {

Are you sure you want to have <= instead of < here? This could read
length + 1 bytes of array.

> +            buffer->tmp = array[buffer->pos];
> +            buffer->pos += 1;
> +            buffer->last = buffer->tmp;
> +        }
> +        buffer->bit_buf |= ((uint64_t) buffer->tmp) << buffer->bits_left;
> +        buffer->bits_left += buffer->bits;
> +    }
> +}
> +
> +/**
> + * @brief Drops bits from lower bits in the bit buffer
> + *
> + * @param buf: Struct containing bit buffers
> + * @param nbits: Number of bits to remove.
> + */
> +static av_always_inline void jpeg2000_bitbuf_drop_bits_lsb(StateVars *buf,
> +                                                           uint8_t nbits)
> +{
> +    if (buf->bits_left < nbits) {
> +        av_log(NULL, AV_LOG_ERROR, "Invalid bit read of %d, bits in buffer are %d\n", nbits, buf->bits_left);
> +        av_assert0(0);
> +    }
> +    buf->bit_buf >>= nbits;
> +    buf->bits_left -= nbits;
> +}
> +
> +/**
> + * @brief  Get bits from the bit buffer reading them
> + * from the least significant bits moving to the most significant bits.
> + *
> + * In case there are fewer bits, refill from `buf` moving backwards.
> + */
> +static av_always_inline uint64_t jpeg2000_bitbuf_get_bits_lsb(
> +        StateVars *bit_stream, uint8_t nbits, const uint8_t *buf)
> +{
> +    uint64_t bits;
> +    uint64_t mask = (1ull << nbits) - 1;
> +    if (bit_stream->bits_left < nbits)
> +        jpeg2000_bitbuf_refill_backwards(bit_stream, buf);
> +    bits = bit_stream->bit_buf & mask;
> +    jpeg2000_bitbuf_drop_bits_lsb(bit_stream, nbits);
> +    return bits;
> +}
> +
> +/**
> + * @brief Get bits from the bit buffer reading them
> + * from the least significant bits moving to the most significant bits
> + *
> + * In case there are fewer bits, refill from `buf` moving forward
> + */
> +static av_always_inline uint64_t jpeg2000_bitbuf_get_bits_lsb_forward(
> +        StateVars *bit_stream, uint8_t nbits, const uint8_t *buf, uint32_t length)
> +{
> +    uint64_t bits;
> +    uint64_t mask = (1ull << nbits) - 1;
> +
> +    if (bit_stream->bits_left <= nbits)
> +        jpeg2000_bitbuf_refill_forward(bit_stream, buf, length);
> +    bits = bit_stream->bit_buf & mask;
> +    jpeg2000_bitbuf_drop_bits_lsb(bit_stream, nbits);
> +    return bits;
> +}
> +
> +/**
> + * @brief Look ahead bit buffer without discarding bits
> + */
> +static av_always_inline uint64_t jpeg2000_bitbuf_peek_bits_lsb(StateVars *stream, uint8_t nbits)
> +{
> +    uint64_t mask = (1ull << nbits) - 1;
> +    return stream->bit_buf & mask;
> +}
> +
> +/**
> + * Variable Length Decoding Routines
> + */
> +static void jpeg2000_init_vlc(StateVars *s, uint32_t Lcup, uint32_t Pcup, const uint8_t *Dcup)
> +{
> +    s->bits_left = 0;
> +    s->bit_buf   = 0;
> +    s->pos       = Lcup - 2 - Pcup;
> +    s->last      = Dcup[Lcup - 2];
> +    s->tmp       = (s->last) >> 4;
> +    s->bits      = ((s->tmp & 7) < 7) ? 4 : 3;
> +
> +    jpeg2000_bitbuf_refill_backwards(s, Dcup + Pcup);
> +    jpeg2000_bitbuf_drop_bits_lsb(s, 4);
> +}
> +
> +/**
> + * Decode prefix codes for VLC segment.
> + */
> +static int av_always_inline jpeg2000_decode_ctx_vlc(const Jpeg2000DecoderContext *s,
> +                                                    StateVars *vlc_stream,
> +                                                    const uint16_t *table,
> +                                                    const uint8_t *Dcup,
> +                                                    uint8_t *sig_pat,
> +                                                    uint8_t *res_off,
> +                                                    uint8_t *emb_pat_k,
> +                                                    uint8_t *emb_pat_1,
> +                                                    uint8_t pos,
> +                                                    uint32_t Pcup,
> +                                                    uint16_t context)
> +{
> +    /* Described in clause 7.3.5 */
> +    uint32_t value;
> +    uint8_t len;
> +    uint64_t index;
> +    uint64_t code_word;
> +
> +    jpeg2000_bitbuf_refill_backwards(vlc_stream, Dcup + Pcup);
> +
> +    code_word = vlc_stream->bit_buf & 0x7f;
> +    index = code_word + (context << 7);
> +
> +    /* decode table has 1024 entries so ensure array access is in bounds */
> +    av_assert0(index < 1024);
> +
> +    value = table[index];
> +
> +    len = (value & 0x000F) >> 1;
> +
> +    res_off[pos] = (uint8_t) (value & 1);
> +    sig_pat[pos] = (uint8_t) ((value & 0x00F0) >> 4);
> +    emb_pat_k[pos] = (uint8_t) ((value & 0x0F00) >> 8);
> +    emb_pat_1[pos] = (uint8_t) ((value & 0xF000) >> 12);
> +
> +    jpeg2000_bitbuf_drop_bits_lsb(vlc_stream, len);
> +    return 0;
> +}
> +
> +/**
> + * @brief Decode variable length u-vlc prefix
> + */
> +static av_always_inline uint8_t
> +vlc_decode_u_prefix(StateVars *vlc_stream, const uint8_t *refill_array)
> +{
> +    /* clause 7.3.6
> +     * procedure : decodeUPrefix.
> +     */
> +    static const uint8_t return_value[8] = {5, 1, 2, 1, 3, 1, 2, 1};
> +    static const uint8_t drop_bits[8] = {3, 1, 2, 1, 3, 1, 2, 1};
> +
> +    uint8_t bits;
> +
> +    if (vlc_stream->bits_left < 3)
> +        jpeg2000_bitbuf_refill_backwards(vlc_stream, refill_array);
> +
> +    bits = jpeg2000_bitbuf_peek_bits_lsb(vlc_stream, 3);
> +
> +    jpeg2000_bitbuf_drop_bits_lsb(vlc_stream, drop_bits[bits]);
> +    return return_value[bits];
> +}
> +
> +/**
> + * @brief Decode variable length u-vlc suffix
> + */
> +static av_always_inline uint8_t vlc_decode_u_suffix(
> +        StateVars *vlc_stream, uint8_t suffix, const uint8_t *refill_array)
> +{
> +    /* clause 7.3.6
> +     * procedure: decodeUSuffix
> +     */
> +    static const int mask[] = {1, 31};
> +    static const int drop_bits[] = {1, 5};
> +
> +    uint8_t bits;
> +    int cond = suffix != 3;
> +    if (suffix < 3)
> +        return 0;
> +
> +    if (vlc_stream->bits_left < 5)
> +        jpeg2000_bitbuf_refill_backwards(vlc_stream, refill_array);
> +
> +    bits = jpeg2000_bitbuf_peek_bits_lsb(vlc_stream, 5);
> +
> +    jpeg2000_bitbuf_drop_bits_lsb(vlc_stream, drop_bits[cond]);
> +    return bits & mask[cond];
> +}
> +
> +/**
> + * @brief Decode variable length u-vlc suffix
> + */
> +static av_always_inline uint8_t vlc_decode_u_extension(
> +        StateVars *vlc_stream, uint8_t suffix, const uint8_t *refill_array)
> +{
> +    /* clause 7.3.6
> +     * procedure decodeUExtension.
> +     */
> +    return jpeg2000_bitbuf_get_bits_lsb(vlc_stream, 4 * (suffix >= 28), refill_array);
> +}
> +
> +/**
> + * Magnitude and Sign decode procedures
> + */
> +static int32_t av_always_inline jpeg2000_decode_mag_sgn(StateVars *mag_sgn_stream, int32_t m_n,
> +                                                        int32_t i_n, const uint8_t *buf, uint32_t length)
> +{
> +    /* clause 7.3.8
> +     * procedure: decodeMagSgnValue
> +     */
> +    int32_t val = 0;
> +    if (m_n > 0) {
> +        val = jpeg2000_bitbuf_get_bits_lsb_forward(mag_sgn_stream,m_n,buf,length);
> +        val += (i_n << m_n);
> +    }
> +    return val;
> +}
> +
> +static av_always_inline void
> +recover_mag_sgn(StateVars *mag_sgn, uint8_t pos, uint16_t q, int32_t m_n[2], int32_t known_1[2],
> +                const uint8_t emb_pat_1[2],
> +                int32_t v[2][4], int32_t m[2][4], uint8_t *E, uint32_t *mu_n, const uint8_t *Dcup, uint32_t Pcup,
> +                uint32_t pLSB)
> +{
> +    for (int i = 0; i < 4; i++) {
> +        int32_t n = 4 * q + i;
> +        m_n[pos] = m[pos][i];
> +        known_1[pos] = (emb_pat_1[pos] >> i) & 1;
> +        v[pos][i] = jpeg2000_decode_mag_sgn(mag_sgn, m_n[pos], known_1[pos], Dcup, Pcup);
> +
> +        if (m_n[pos] != 0) {
> +            E[n] = 32 - ff_clz(v[pos][i] | 1);
> +            mu_n[n] = (v[pos][i] >> 1) + 1;
> +            mu_n[n] <<= pLSB;
> +            mu_n[n] |= ((uint32_t) (v[pos][i] & 1)) << 31; // sign bit.
> +        }
> +    }
> +}
> +
> +static int jpeg2000_import_bit(StateVars *stream, const uint8_t *array, uint32_t length)
> +{
> +    int cond = stream->pos <= length;
> +    int pos = FFMIN(stream->pos, length);
> +    if (stream->bits == 0) {
> +        stream->bits = (stream->tmp == 0xFF) ? 7 : 8;
> +        stream->pos += cond;
> +        stream->tmp = cond ? array[pos] : 0xFF;
> +    }
> +    stream->bits -= 1;
> +    return (stream->tmp >> stream->bits) & 1;
> +}
> +
> +static int jpeg2000_peek_bit(StateVars *stream, const uint8_t *array, uint32_t length)
> +{
> +    if (stream->bits == 0) {
> +        int cond = stream->pos <= length;
> +        int pos = FFMIN(stream->pos, length);
> +        stream->bits = (stream->tmp == 0xFF) ? 7 : 8;
> +        stream->pos += cond;
> +        stream->tmp = cond ? array[pos] : 0xFF;
> +    }
> +    return (stream->tmp >> stream->bits) & 1;
> +}
> +
> +static int jpeg2000_decode_mel_sym(MelDecoderState *mel_state,
> +                                   StateVars *mel_stream,
> +                                   const uint8_t *Dcup,
> +                                   uint32_t Lcup)
> +{
> +
> +    if (mel_state->run == 0 && mel_state->one == 0) {
> +        uint8_t eval;
> +        uint8_t bit;
> +
> +        eval = MEL_E[mel_state->k];
> +        bit = jpeg2000_import_bit(mel_stream, Dcup, Lcup);
> +        if (bit == 1) {
> +            mel_state->run = 1 << eval;
> +            mel_state->k = FFMIN(12, mel_state->k + 1);
> +        } else {
> +            mel_state->run = 0;
> +            while (eval > 0) {
> +                bit = jpeg2000_import_bit(mel_stream, Dcup, Lcup);
> +                mel_state->run = (2 * (mel_state->run)) + bit;
> +                eval -= 1;
> +            }
> +            mel_state->k = FFMAX(0, mel_state->k - 1);
> +            mel_state->one = 1;
> +        }
> +    }
> +    if (mel_state->run > 0) {
> +        mel_state->run -= 1;
> +        return 0;
> +    } else {
> +        mel_state->one = 0;
> +        return 1;
> +    }
> +}
> +
> +/**
> + * @brief Magref decoding procedures
> + */
> +static av_always_inline int jpeg2000_import_magref_bit(StateVars *stream, const uint8_t *array, uint32_t length)
> +{
> +    return jpeg2000_bitbuf_get_bits_lsb(stream, 1, array);
> +}
> +
> +/**
> + * @brief Signal EMB decode
> + */
> +static int
> +jpeg2000_decode_sig_emb(const Jpeg2000DecoderContext *s, MelDecoderState *mel_state, StateVars *mel_stream,
> +                        StateVars *vlc_stream, const uint16_t *vlc_table, const uint8_t *Dcup, uint8_t *sig_pat,
> +                        uint8_t *res_off, uint8_t *emb_pat_k, uint8_t *emb_pat_1, uint8_t pos, uint16_t context,
> +                        uint32_t Lcup, uint32_t Pcup)
> +{
> +    if (context == 0) {
> +        uint8_t sym;
> +        sym = jpeg2000_decode_mel_sym(mel_state, mel_stream, Dcup, Lcup);
> +        if (sym == 0) {
> +            sig_pat[pos] = 0;
> +            res_off[pos] = 0;
> +            emb_pat_k[pos] = 0;
> +            emb_pat_1[pos] = 0;
> +            return 0;
> +        }
> +    }
> +    return jpeg2000_decode_ctx_vlc(s, vlc_stream, vlc_table, Dcup, sig_pat,
> +                                   res_off, emb_pat_k, emb_pat_1, pos, Pcup,
> +                                   context);
> +}
> +
> +static av_always_inline int jpeg2000_get_state(int x1, int x2, int width, int shift_by, const uint8_t *block_states)
> +{
> +    return (block_states[(x1 + 1) * (width + 2) + (x2 + 1)] >> shift_by) & 1;
> +}
> +
> +static av_always_inline void jpeg2000_modify_state(int x1, int x2, int width, int value, uint8_t *block_states)
> +{
> +    block_states[(x1 + 1) * (width + 2) + (x2 + 1)] |= value;
> +}
> +
> +static int av_noinline

Why is this av_noinline? It is normally good, to inline functions only
called once.

> +jpeg2000_decode_ht_cleanup(const Jpeg2000DecoderContext *s, Jpeg2000Cblk *cblk, Jpeg2000T1Context *t1,
> +                           MelDecoderState *mel_state,
> +                           StateVars *mel_stream, StateVars *vlc_stream, StateVars *mag_sgn_stream, const uint8_t *Dcup,
> +                           uint32_t Lcup,
> +                           uint32_t Pcup, uint8_t pLSB, int width, int height, int32_t *sample_buf,
> +                           uint8_t *block_states)
> +{
> +    uint16_t q = 0; /* Represents current quad position. */
> +    uint16_t q1, q2;
> +    uint16_t context1, context2;
> +    uint16_t context = 0;
> +
> +    uint8_t sig_pat[2]   = {0}; /* significance pattern*/
> +    uint8_t res_off[2]   = {0}; /* residual offset*/
> +    uint8_t emb_pat_k[2] = {0}; /* Exponent Max Bound pattern K */
> +    uint8_t emb_pat_1[2] = {0}; /* Exponent Max Bound pattern 1.*/
> +    uint8_t gamma[2]     = {0};
> +
> +    uint8_t E_n[2]  = {0};
> +    uint8_t E_ne[2] = {0};
> +    uint8_t E_nw[2] = {0};
> +    uint8_t E_nf[2] = {0};
> +
> +    uint8_t max_e[2] = {0};
> +    uint8_t u_pfx[2] = {0};
> +    uint8_t u_sfx[2] = {0};
> +    uint8_t u_ext[2] = {0};
> +
> +    int32_t u[2]       = {0};
> +    int32_t U[2]       = {0}; /* Exponent bound (7.3.7) */
> +    int32_t m_n[2]     = {0};
> +    int32_t known_1[2] = {0};
> +
> +    int32_t m[2][4] = {0};
> +    int32_t v[2][4] = {0};
> +
> +    uint8_t kappa[2] = {1, 1};
> +    int ret = 0;
> +
> +    int sp;
> +
> +    uint64_t c;
> +
> +    uint8_t *sigma;
> +    uint32_t *mu;
> +
> +    const uint8_t *vlc_buf = Dcup + Pcup;
> +    /* convert to raster-scan */
> +    const uint16_t is_border_x = width % 2;
> +    const uint16_t is_border_y = height % 2;
> +
> +    int j1, j2;
> +    int x1 = 0, x2 = 0, x3 = 0;
> +
> +    const uint16_t quad_width = ff_jpeg2000_ceildivpow2(width, 1);
> +    const uint16_t quad_height = ff_jpeg2000_ceildivpow2(height, 1);
> +
> +    size_t buf_size = 4 * quad_width * quad_height;
> +
> +    uint8_t *sigma_n = av_calloc(buf_size, sizeof(uint8_t));
> +    uint8_t *E = av_calloc(buf_size, sizeof(uint8_t));
> +    uint32_t *mu_n = av_calloc(buf_size, sizeof(uint32_t));
> +

Why does a function called jpeg2000_decode_ht_cleanup perform
allocations? And why all the other stuff below?

> +    if (!sigma_n || !E || !mu_n) {
> +        ret = AVERROR(ENOMEM);
> +        goto free;
> +    }
> +
> +    sigma = sigma_n;
> +    mu = mu_n;
> +
> +    while (q < quad_width - 1) {
> +        q1 = q;
> +        q2 = q1 + 1;
> +
> +        if ((ret = jpeg2000_decode_sig_emb(s, mel_state, mel_stream, vlc_stream,
> +                                           dec_CxtVLC_table0, Dcup, sig_pat, res_off,
> +                                           emb_pat_k, emb_pat_1, J2K_Q1, context, Lcup,
> +                                           Pcup)) < 0)
> +            goto free;
> +        for (int i = 0; i < 4; i++)
> +            sigma_n[4 * q1 + i] = (sig_pat[J2K_Q1] >> i) & 1;
> +
> +        // calculate context
> +        context  = sigma_n[4 * q1];           // f
> +        context |= sigma_n[4 * q1 + 1];      // sf
> +        context += sigma_n[4 * q1 + 2] << 1; // w << 1
> +        context += sigma_n[4 * q1 + 3] << 2;
> +
> +        if ((ret = jpeg2000_decode_sig_emb(s, mel_state, mel_stream, vlc_stream,
> +                                           dec_CxtVLC_table0, Dcup, sig_pat, res_off,
> +                                           emb_pat_k, emb_pat_1, J2K_Q2, context, Lcup,
> +                                           Pcup)) < 0)
> +            goto free;
> +
> +        for (int i = 0; i < 4; i++)
> +            sigma_n[4 * q2 + i] = (sig_pat[J2K_Q2] >> i) & 1;
> +
> +        // calculate context for the next quad
> +        context  = sigma_n[4 * q2];           // f
> +        context |= sigma_n[4 * q2 + 1];      // sf
> +        context += sigma_n[4 * q2 + 2] << 1; // w << 1
> +        context += sigma_n[4 * q2 + 3] << 2; // sw << 2
> +
> +        u[0] = 0;
> +        u[1] = 0;
> +
> +        jpeg2000_bitbuf_refill_backwards(vlc_stream, vlc_buf);
> +
> +        if (res_off[J2K_Q1] == 1 && res_off[J2K_Q2] == 1) {
> +
> +            if (jpeg2000_decode_mel_sym(mel_state, mel_stream, Dcup, Lcup) == 1) {
> +
> +                u_pfx[J2K_Q1] = vlc_decode_u_prefix(vlc_stream, vlc_buf);
> +                u_pfx[J2K_Q2] = vlc_decode_u_prefix(vlc_stream, vlc_buf);
> +
> +                u_sfx[J2K_Q1] = vlc_decode_u_suffix(vlc_stream, u_pfx[J2K_Q1], vlc_buf);
> +                u_sfx[J2K_Q2] = vlc_decode_u_suffix(vlc_stream, u_pfx[J2K_Q2], vlc_buf);
> +
> +                u_ext[J2K_Q1] = vlc_decode_u_extension(vlc_stream, u_sfx[J2K_Q1], vlc_buf);
> +                u_ext[J2K_Q2] = vlc_decode_u_extension(vlc_stream, u_sfx[J2K_Q2], vlc_buf);
> +
> +                u[J2K_Q1] = 2 + u_pfx[J2K_Q1] + u_sfx[J2K_Q1] + (u_ext[J2K_Q1] * 4);
> +                u[J2K_Q2] = 2 + u_pfx[J2K_Q2] + u_sfx[J2K_Q2] + (u_ext[J2K_Q2] * 4);
> +
> +            } else {
> +                u_pfx[J2K_Q1] = vlc_decode_u_prefix(vlc_stream, vlc_buf);
> +
> +                if (u_pfx[J2K_Q1] > 2) {
> +                    u[J2K_Q2] = jpeg2000_bitbuf_get_bits_lsb(vlc_stream, 1, vlc_buf) + 1;
> +                    u_sfx[J2K_Q1] = vlc_decode_u_suffix(vlc_stream, u_pfx[J2K_Q1], vlc_buf);
> +                    u_ext[J2K_Q1] = vlc_decode_u_extension(vlc_stream, u_sfx[J2K_Q1], vlc_buf);
> +                } else {
> +                    u_pfx[J2K_Q2] = vlc_decode_u_prefix(vlc_stream, vlc_buf);
> +                    u_sfx[J2K_Q1] = vlc_decode_u_suffix(vlc_stream, u_pfx[J2K_Q1], vlc_buf);
> +                    u_sfx[J2K_Q2] = vlc_decode_u_suffix(vlc_stream, u_pfx[J2K_Q2], vlc_buf);
> +                    u_ext[J2K_Q1] = vlc_decode_u_extension(vlc_stream, u_sfx[J2K_Q1], vlc_buf);
> +                    u_ext[J2K_Q2] = vlc_decode_u_extension(vlc_stream, u_sfx[J2K_Q2], vlc_buf);
> +                    u[J2K_Q2] = u_pfx[J2K_Q2] + u_sfx[J2K_Q2] + (u_ext[J2K_Q2] * 4);
> +                }
> +                // clause 7.3.6 (3)
> +                u[J2K_Q1] = u_pfx[J2K_Q1] + u_sfx[J2K_Q1] + (u_ext[J2K_Q1] * 4);
> +            }
> +
> +        } else if (res_off[J2K_Q1] == 1 || res_off[J2K_Q2] == 1) {
> +            uint8_t pos = res_off[J2K_Q1] == 1 ? 0 : 1;
> +            u_pfx[pos] = vlc_decode_u_prefix(vlc_stream, vlc_buf);
> +            u_sfx[pos] = vlc_decode_u_suffix(vlc_stream, u_pfx[pos], vlc_buf);
> +            u_ext[pos] = vlc_decode_u_extension(vlc_stream, u_sfx[pos], vlc_buf);
> +            u[pos] = u_pfx[pos] + u_sfx[pos] + (u_ext[pos] * 4);
> +        }
> +        U[J2K_Q1] = kappa[J2K_Q1] + u[J2K_Q1];
> +        U[J2K_Q2] = kappa[J2K_Q2] + u[J2K_Q2];
> +
> +        for (int i = 0; i < 4; i++) {
> +            m[J2K_Q1][i] = sigma_n[4 * q1 + i] * U[J2K_Q1] - ((emb_pat_k[J2K_Q1] >> i) & 1);
> +            m[J2K_Q2][i] = sigma_n[4 * q2 + i] * U[J2K_Q2] - ((emb_pat_k[J2K_Q2] >> i) & 1);
> +        }
> +
> +        recover_mag_sgn(mag_sgn_stream, J2K_Q1, q1, m_n, known_1, emb_pat_1, v, m,
> +                        E, mu_n, Dcup, Pcup, pLSB);
> +
> +        recover_mag_sgn(mag_sgn_stream, J2K_Q2, q2, m_n, known_1, emb_pat_1, v, m,
> +                        E, mu_n, Dcup, Pcup, pLSB);
> +
> +        // prepare context for the next quad
> +
> +        // move to the next quad pair
> +        q += 2;
> +    }
> +    if (quad_width % 2 == 1) {
> +        q1 = q;
> +
> +        if ((ret = jpeg2000_decode_sig_emb(s, mel_state, mel_stream, vlc_stream,
> +                                           dec_CxtVLC_table0, Dcup, sig_pat, res_off,
> +                                           emb_pat_k, emb_pat_1, J2K_Q1, context, Lcup,
> +                                           Pcup)) < 0)
> +            goto free;
> +
> +        for (int i = 0; i < 4; i++)
> +            sigma_n[4 * q1 + i] = (sig_pat[J2K_Q1] >> i) & 1;
> +
> +        u[J2K_Q1] = 0;
> +
> +        if (res_off[J2K_Q1] == 1) {
> +            u_pfx[J2K_Q1] = vlc_decode_u_prefix(vlc_stream, vlc_buf);
> +            u_sfx[J2K_Q1] = vlc_decode_u_suffix(vlc_stream, u_pfx[J2K_Q1], vlc_buf);
> +            u_ext[J2K_Q1] = vlc_decode_u_extension(vlc_stream, u_sfx[J2K_Q1], vlc_buf);
> +            u[J2K_Q1] = u_pfx[J2K_Q1] + u_sfx[J2K_Q1] + (u_ext[J2K_Q1] * 4);
> +        }
> +
> +        U[J2K_Q1] = kappa[J2K_Q1] + u[J2K_Q1];
> +
> +        for (int i = 0; i < 4; i++)
> +            m[J2K_Q1][i] = sigma_n[4 * q1 + i] * U[J2K_Q1] - ((emb_pat_k[J2K_Q1] >> i) & 1);
> +
> +        recover_mag_sgn(mag_sgn_stream, J2K_Q1, q1, m_n, known_1, emb_pat_1, v, m,
> +                        E, mu_n, Dcup, Pcup, pLSB);
> +
> +        q++; /* move to next quad pair */
> +    }
> +    /* initial line pair end. */
> +    /*
> +     * As an optimization, we can replace modulo operations with
> +     * checking if a number is divisible , since that's the only thing we need.
> +     * this is paired with is_divisible.
> +     * Credits to Daniel Lemire blog post:
> +     * https://lemire.me/blog/2019/02/08/faster-remainders-when-the-divisor-is-a-constant-beating-compilers-and-libdivide/
> +     * It's UB on zero, but we can't have a quad being zero, the spec doesn't allow,
> +     * so we error out early in case that's the case.
> +     */
> +    c = precompute_c(quad_width);
> +
> +    for (int row = 1; row < quad_height; row++) {
> +        while ((q - (row * quad_width)) < quad_width - 1 && q < (quad_height * quad_width)) {
> +            q1 = q;
> +            q2 = q + 1;
> +            context1  = sigma_n[4 * (q1 - quad_width) + 1];
> +            context1 += sigma_n[4 * (q1 - quad_width) + 3] << 2; // ne
> +
> +            if (!is_divisible(q1, c)) {
> +                context1 |= sigma_n[4 * (q1 - quad_width) - 1];               // nw
> +                context1 += (sigma_n[4 * q1 - 1] | sigma_n[4 * q1 - 2]) << 1; // sw| q
> +            }
> +            if (!is_divisible(q1 + 1, c))
> +                context1 |= sigma_n[4 * (q1 - quad_width) + 5] << 2;
> +
> +            if ((ret = jpeg2000_decode_sig_emb(s, mel_state, mel_stream, vlc_stream,
> +                                               dec_CxtVLC_table1, Dcup, sig_pat, res_off,
> +                                               emb_pat_k, emb_pat_1, J2K_Q1, context1, Lcup,
> +                                               Pcup))
> +                < 0)
> +                goto free;
> +
> +            for (int i = 0; i < 4; i++)
> +                sigma_n[4 * q1 + i] = (sig_pat[J2K_Q1] >> i) & 1;
> +
> +            context2  = sigma_n[4 * (q2 - quad_width) + 1];
> +            context2 += sigma_n[4 * (q2 - quad_width) + 3] << 2;
> +
> +            if (!is_divisible(q2, c)) {
> +                context2 |= sigma_n[4 * (q2 - quad_width) - 1];
> +                context2 += (sigma_n[4 * q2 - 1] | sigma_n[4 * q2 - 2]) << 1;
> +            }
> +            if (!is_divisible(q2 + 1, c))
> +                context2 |= sigma_n[4 * (q2 - quad_width) + 5] << 2;
> +
> +            if ((ret = jpeg2000_decode_sig_emb(s, mel_state, mel_stream, vlc_stream,
> +                                               dec_CxtVLC_table1, Dcup, sig_pat, res_off,
> +                                               emb_pat_k, emb_pat_1, J2K_Q2, context2, Lcup,
> +                                               Pcup))
> +                < 0)
> +                goto free;
> +
> +            for (int i = 0; i < 4; i++)
> +                sigma_n[4 * q2 + i] = (sig_pat[J2K_Q2] >> i) & 1;
> +
> +            u[J2K_Q1] = 0;
> +            u[J2K_Q2] = 0;
> +
> +            jpeg2000_bitbuf_refill_backwards(vlc_stream, vlc_buf);
> +
> +            if (res_off[J2K_Q1] == 1 && res_off[J2K_Q2] == 1) {
> +                u_pfx[J2K_Q1] = vlc_decode_u_prefix(vlc_stream, vlc_buf);
> +                u_pfx[J2K_Q2] = vlc_decode_u_prefix(vlc_stream, vlc_buf);
> +
> +                u_sfx[J2K_Q1] = vlc_decode_u_suffix(vlc_stream, u_pfx[J2K_Q1], vlc_buf);
> +                u_sfx[J2K_Q2] = vlc_decode_u_suffix(vlc_stream, u_pfx[J2K_Q2], vlc_buf);
> +
> +                u_ext[J2K_Q1] = vlc_decode_u_extension(vlc_stream, u_sfx[J2K_Q1], vlc_buf);
> +                u_ext[J2K_Q2] = vlc_decode_u_extension(vlc_stream, u_sfx[J2K_Q2], vlc_buf);
> +
> +                u[J2K_Q1] = u_pfx[J2K_Q1] + u_sfx[J2K_Q1] + (u_ext[J2K_Q1] << 2);
> +                u[J2K_Q2] = u_pfx[J2K_Q2] + u_sfx[J2K_Q2] + (u_ext[J2K_Q2] << 2);
> +
> +            } else if (res_off[J2K_Q1] == 1 || res_off[J2K_Q2] == 1) {
> +                uint8_t pos = res_off[J2K_Q1] == 1 ? 0 : 1;
> +
> +                u_pfx[pos] = vlc_decode_u_prefix(vlc_stream, vlc_buf);
> +                u_sfx[pos] = vlc_decode_u_suffix(vlc_stream, u_pfx[pos], vlc_buf);
> +                u_ext[pos] = vlc_decode_u_extension(vlc_stream, u_sfx[pos], vlc_buf);
> +
> +                u[pos] = u_pfx[pos] + u_sfx[pos] + (u_ext[pos] << 2);
> +            }
> +            sp = sig_pat[J2K_Q1];
> +
> +            gamma[J2K_Q1] = 1;
> +
> +            if (sp == 0 || sp == 1 || sp == 2 || sp == 4 || sp == 8)
> +                gamma[J2K_Q1] = 0;
> +
> +            sp = sig_pat[J2K_Q2];
> +
> +            gamma[J2K_Q2] = 1;
> +
> +            if (sp == 0 || sp == 1 || sp == 2 || sp == 4 || sp == 8)
> +                gamma[J2K_Q2] = 0;
> +
> +            E_n[J2K_Q1] = E[4 * (q1 - quad_width) + 1];
> +            E_n[J2K_Q2] = E[4 * (q2 - quad_width) + 1];
> +
> +            E_ne[J2K_Q1] = E[4 * (q1 - quad_width) + 3];
> +            E_ne[J2K_Q2] = E[4 * (q2 - quad_width) + 3];
> +
> +            E_nw[J2K_Q1] = (!is_divisible(q1, c)) * E[FFMAX((4 * (q1 - quad_width) - 1), 0)];
> +            E_nw[J2K_Q2] = (!is_divisible(q2, c)) * E[FFMAX((4 * (q2 - quad_width) - 1), 0)];
> +
> +            E_nf[J2K_Q1] = (!is_divisible(q1 + 1, c)) * E[4 * (q1 - quad_width) + 5];
> +            E_nf[J2K_Q2] = (!is_divisible(q2 + 1, c)) * E[4 * (q2 - quad_width) + 5];
> +
> +            max_e[J2K_Q1] = FFMAX(E_nw[J2K_Q1], FFMAX3(E_n[J2K_Q1], E_ne[J2K_Q1], E_nf[J2K_Q1]));
> +            max_e[J2K_Q2] = FFMAX(E_nw[J2K_Q2], FFMAX3(E_n[J2K_Q2], E_ne[J2K_Q2], E_nf[J2K_Q2]));
> +
> +            kappa[J2K_Q1] = FFMAX(1, gamma[J2K_Q1] * (max_e[J2K_Q1] - 1));
> +            kappa[J2K_Q2] = FFMAX(1, gamma[J2K_Q2] * (max_e[J2K_Q2] - 1));
> +
> +            U[J2K_Q1] = kappa[J2K_Q1] + u[J2K_Q1];
> +            U[J2K_Q2] = kappa[J2K_Q2] + u[J2K_Q2];
> +
> +            for (int i = 0; i < 4; i++) {
> +                m[J2K_Q1][i] = sigma_n[4 * q1 + i] * U[J2K_Q1] - ((emb_pat_k[J2K_Q1] >> i) & 1);
> +                m[J2K_Q2][i] = sigma_n[4 * q2 + i] * U[J2K_Q2] - ((emb_pat_k[J2K_Q2] >> i) & 1);
> +            }
> +            recover_mag_sgn(mag_sgn_stream, J2K_Q1, q1, m_n, known_1, emb_pat_1, v, m,
> +                            E, mu_n, Dcup, Pcup, pLSB);
> +
> +            recover_mag_sgn(mag_sgn_stream, J2K_Q2, q2, m_n, known_1, emb_pat_1, v, m,
> +                            E, mu_n, Dcup, Pcup, pLSB);
> +
> +            /* move to the next quad pair */
> +            q += 2;
> +        }
> +        if (quad_width % 2 == 1) {
> +            q1 = q;
> +            /* calculate context for current quad */
> +            context1  = sigma_n[4 * (q1 - quad_width) + 1];
> +            context1 += (sigma_n[4 * (q1 - quad_width) + 3] << 2);
> +
> +            if (!is_divisible(q1, c)) {
> +                context1 |= sigma_n[4 * (q1 - quad_width) - 1];
> +                context1 += (sigma_n[4 * q1 - 1] | sigma_n[4 * q1 - 2]) << 1;
> +            }
> +            if (!is_divisible(q1 + 1, c))
> +                context1 |= sigma_n[4 * (q1 - quad_width) + 5] << 2;
> +
> +            if ((ret = jpeg2000_decode_sig_emb(s, mel_state, mel_stream, vlc_stream,
> +                                               dec_CxtVLC_table1, Dcup, sig_pat, res_off,
> +                                               emb_pat_k, emb_pat_1, J2K_Q1, context1, Lcup,
> +                                               Pcup))
> +                < 0)
> +                goto free;
> +
> +            for (int i = 0; i < 4; i++)
> +                sigma_n[4 * q1 + i] = (sig_pat[J2K_Q1] >> i) & 1;
> +
> +            u[J2K_Q1] = 0;
> +
> +            /* Recover mag_sgn value */
> +            if (res_off[J2K_Q1] == 1) {
> +                u_pfx[J2K_Q1] = vlc_decode_u_prefix(vlc_stream, vlc_buf);
> +                u_sfx[J2K_Q1] = vlc_decode_u_suffix(vlc_stream, u_pfx[J2K_Q1], vlc_buf);
> +                u_ext[J2K_Q1] = vlc_decode_u_extension(vlc_stream, u_sfx[J2K_Q1], vlc_buf);
> +
> +                u[J2K_Q1] = u_pfx[J2K_Q1] + u_sfx[J2K_Q1] + (u_ext[J2K_Q1] << 2);
> +            }
> +
> +            sp = sig_pat[J2K_Q1];
> +
> +            gamma[J2K_Q1] = 1;
> +
> +            if (sp == 0 || sp == 1 || sp == 2 || sp == 4 || sp == 8)
> +                gamma[J2K_Q1] = 0;
> +
> +            E_n[J2K_Q1] = E[4 * (q1 - quad_width) + 1];
> +
> +            E_ne[J2K_Q1] = E[4 * (q1 - quad_width) + 3];
> +
> +            E_nw[J2K_Q1] = (!is_divisible(q1, c)) * E[FFMAX((4 * (q1 - quad_width) - 1), 0)];
> +
> +            E_nf[J2K_Q1] = (!is_divisible(q1 + 1, c)) * E[4 * (q1 - quad_width) + 5];
> +
> +            max_e[J2K_Q1] = FFMAX(E_nw[J2K_Q1], FFMAX3(E_n[J2K_Q1], E_ne[J2K_Q1], E_nf[J2K_Q1]));
> +
> +            kappa[J2K_Q1] = FFMAX(1, gamma[J2K_Q1] * (max_e[J2K_Q1] - 1));
> +
> +            U[J2K_Q1] = kappa[J2K_Q1] + u[J2K_Q1];
> +
> +            for (int i = 0; i < 4; i++)
> +                m[J2K_Q1][i] = sigma_n[4 * q1 + i] * U[J2K_Q1] - ((emb_pat_k[J2K_Q1] >> i) & 1);
> +
> +            recover_mag_sgn(mag_sgn_stream, J2K_Q1, q1, m_n, known_1, emb_pat_1, v, m,
> +                            E, mu_n, Dcup, Pcup, pLSB);
> +            q += 1;
> +        }
> +    }
> +    // convert to raster-scan
> +    for (int y = 0; y < quad_height; y++) {
> +        for (int x = 0; x < quad_width; x++) {
> +            j1 = 2 * y;
> +            j2 = 2 * x;
> +
> +            sample_buf[j2 + (j1 * width)] = (int32_t)*mu;
> +            jpeg2000_modify_state(j1, j2, width, *sigma, block_states);
> +            sigma += 1;
> +            mu += 1;
> +
> +            x1 = y != quad_height - 1 || is_border_y == 0;
> +            sample_buf[j2 + ((j1 + 1) * width)] = ((int32_t)*mu) * x1;
> +            jpeg2000_modify_state(j1 + 1, j2, width, (*sigma) * x1, block_states);
> +            sigma += 1;
> +            mu += 1;
> +
> +            x2 = x != quad_width - 1 || is_border_x == 0;
> +            sample_buf[(j2 + 1) + (j1 * width)] = ((int32_t)*mu) * x2;
> +            jpeg2000_modify_state(j1, j2 + 1, width, (*sigma) * x2, block_states);
> +            sigma += 1;
> +            mu += 1;
> +
> +            x3 = x1 | x2;
> +            sample_buf[(j2 + 1) + (j1 + 1) * width] = ((int32_t)*mu) * x3;
> +            jpeg2000_modify_state(j1 + 1, j2 + 1, width, (*sigma) * x3, block_states);
> +            sigma += 1;
> +            mu += 1;
> +        }
> +    }
> +    ret = 1;
> +free:
> +    av_freep(&sigma_n);
> +    av_freep(&E);
> +    av_freep(&mu_n);
> +    return ret;
> +}
> +
> +static void
> +jpeg2000_calc_mbr(uint8_t *mbr, const uint16_t i, const uint16_t j, const uint32_t mbr_info, uint8_t causal_cond,
> +                  uint8_t *block_states, int width)
> +{
> +
> +    int local_mbr = 0;
> +    local_mbr |= jpeg2000_get_state(i - 1, j - 1, width, HT_SHIFT_SIGMA, block_states);
> +    local_mbr |= jpeg2000_get_state(i - 1, j + 0, width, HT_SHIFT_SIGMA, block_states);
> +    local_mbr |= jpeg2000_get_state(i - 1, j + 1, width, HT_SHIFT_SIGMA, block_states);
> +
> +    local_mbr |= jpeg2000_get_state(i + 0, j - 1, width, HT_SHIFT_SIGMA, block_states);
> +    local_mbr |= jpeg2000_get_state(i + 0, j + 1, width, HT_SHIFT_SIGMA, block_states);
> +
> +    local_mbr |= jpeg2000_get_state(i + 1, j - 1, width, HT_SHIFT_SIGMA, block_states) * causal_cond;
> +    local_mbr |= jpeg2000_get_state(i + 1, j + 0, width, HT_SHIFT_SIGMA, block_states) * causal_cond;
> +    local_mbr |= jpeg2000_get_state(i + 1, j + 1, width, HT_SHIFT_SIGMA, block_states) * causal_cond;
> +
> +    local_mbr |= jpeg2000_get_state(i - 1, j - 1, width, HT_SHIFT_REF, block_states) *
> +                 jpeg2000_get_state(i - 1, j - 1, width, HT_SHIFT_SCAN, block_states);
> +    local_mbr |= jpeg2000_get_state(i - 1, j + 0, width, HT_SHIFT_REF, block_states) *
> +                 jpeg2000_get_state(i - 1, j - 1, width, HT_SHIFT_SCAN, block_states);
> +    local_mbr |= jpeg2000_get_state(i - 1, j + 1, width, HT_SHIFT_REF, block_states) *
> +                 jpeg2000_get_state(i - 1, j + 1, width, HT_SHIFT_SCAN, block_states);
> +
> +    local_mbr |= jpeg2000_get_state(i + 0, j - 1, width, HT_SHIFT_REF, block_states) *
> +                 jpeg2000_get_state(i + 0, j - 1, width, HT_SHIFT_SCAN, block_states);
> +    local_mbr |= jpeg2000_get_state(i + 0, j + 1, width, HT_SHIFT_REF, block_states) *
> +                 jpeg2000_get_state(i + 0, j + 1, width, HT_SHIFT_SCAN, block_states);
> +
> +    local_mbr |= jpeg2000_get_state(i + 1, j - 1, width, HT_SHIFT_REF, block_states) *
> +                 jpeg2000_get_state(i + 1, j - 1, width, HT_SHIFT_SCAN, block_states) * causal_cond;
> +    local_mbr |= jpeg2000_get_state(i + 1, j + 0, width, HT_SHIFT_REF, block_states) *
> +                 jpeg2000_get_state(i + 1, j + 0, width, HT_SHIFT_SCAN, block_states) * causal_cond;
> +    local_mbr |= jpeg2000_get_state(i + 1, j + 1, width, HT_SHIFT_REF, block_states) *
> +                 jpeg2000_get_state(i + 1, j + 1, width, HT_SHIFT_SCAN, block_states) * causal_cond;
> +
> +    *mbr |= local_mbr;
> +}
> +
> +static void
> +jpeg2000_process_stripes_block(StateVars *sig_prop, int i_s, int j_s, int width, int height, int stride, int pLSB,
> +                               int32_t *sample_buf, uint8_t *block_states, uint8_t *magref_segment,
> +                               uint32_t magref_length)
> +{
> +    int32_t *sp;
> +    uint8_t causal_cond = 0;
> +    uint8_t bit;
> +    uint8_t mbr;
> +    uint32_t mbr_info;
> +    int modify_state = 0;
> +    int cond;
> +
> +    for (int j = j_s; j < j_s + width; j++) {
> +        mbr_info = 0;

Could use a smaller scope.

> +        for (int i = i_s; i < i_s + height; i++) {
> +            sp = &sample_buf[j + (i * (stride - 2))];
> +            mbr = 0;
> +            causal_cond = i != (i_s + height - 1);
> +            if (jpeg2000_get_state(i, j, stride - 2, HT_SHIFT_SIGMA, block_states) == 0)
> +                jpeg2000_calc_mbr(&mbr, i, j, mbr_info & 0x1EF, causal_cond, block_states, stride - 2);
> +            mbr_info >>= 3;
> +            cond = mbr != 0;
> +            bit = jpeg2000_peek_bit(sig_prop, magref_segment, magref_length);
> +            *sp |= (bit * cond) << pLSB;
> +            sig_prop->bits -= cond;
> +            modify_state = (((1 << HT_SHIFT_REF_IND) | (1 << HT_SHIFT_REF)) * cond) | 1 << HT_SHIFT_SCAN;
> +            jpeg2000_modify_state(i, j, stride - 2, modify_state, block_states);
> +        }
> +    }
> +}
> +
> +static void av_noinline
> +jpeg2000_decode_sigprop(Jpeg2000Cblk *cblk, uint16_t width, uint16_t height, uint8_t *magref_segment,
> +                        uint32_t magref_length, uint8_t pLSB, int32_t *sample_buf, uint8_t *block_states)
> +{
> +    /* Described in clause 7.4
> +     * procedure: decodeSigPropMag
> +     */
> +    StateVars sp_dec;
> +    const uint16_t num_v_stripe = height / 4;
> +    const uint16_t num_h_stripe = width / 4;
> +    int b_width = 4;
> +    int b_height = 4;
> +    int stride = width + 2;
> +    int last_width;
> +    uint16_t i = 0, j = 0;
> +
> +    jpeg2000_init_zero(&sp_dec);
> +
> +    for (int n1 = 0; n1 < num_v_stripe; n1++) {
> +        j = 0;
> +        for (int n2 = 0; n2 < num_h_stripe; n2++) {
> +            jpeg2000_process_stripes_block(&sp_dec, i, j, b_width, b_height, stride,
> +                                           pLSB, sample_buf, block_states, magref_segment,
> +                                           magref_length);
> +            j += 4;
> +        }
> +        last_width = width % 4;
> +        if (last_width)
> +            jpeg2000_process_stripes_block(&sp_dec, i, j, last_width, b_height, stride,
> +                                           pLSB, sample_buf, block_states, magref_segment,
> +                                           magref_length);
> +        i += 4;
> +    }
> +    /* decode remaining height stripes */
> +    b_height = height % 4;
> +    j = 0;
> +    for (int n2 = 0; n2 < num_h_stripe; n2++) {
> +        jpeg2000_process_stripes_block(&sp_dec, i, j, b_width, b_height, stride,
> +                                       pLSB, sample_buf, block_states, magref_segment,
> +                                       magref_length);
> +        j += 4;
> +    }
> +    last_width = width % 4;
> +    if (last_width)
> +        jpeg2000_process_stripes_block(&sp_dec, i, j, last_width, b_height, stride,
> +                                       pLSB, sample_buf, block_states, magref_segment,
> +                                       magref_length);
> +}
> +
> +static int av_noinline
> +jpeg2000_decode_magref(Jpeg2000Cblk *cblk, uint16_t width, uint16_t block_height, uint8_t *magref_segment,
> +                       uint32_t magref_length, uint8_t pLSB, int32_t *sample_buf, uint8_t *block_states)
> +{
> +    /*
> +     * Described in clause 7.5
> +     * procedure: decodeSigPropMag
> +     */
> +
> +    StateVars mag_ref = {0};
> +    const uint16_t num_v_stripe = block_height / 4;
> +    uint16_t height = 4;
> +    uint16_t i_start = 0;
> +    int32_t *sp;
> +
> +    jpeg2000_init_mag_ref(&mag_ref, magref_length);
> +
> +    for (int n1 = 0; n1 < num_v_stripe; n1++) {
> +        for (int j = 0; j < width; j++) {
> +            for (int i = i_start; i < i_start + height; i++) {
> +                /* we move column wise, going from one quad to another
> +                * see figure 7.
> +                */
> +                sp = &sample_buf[j + i * width];
> +                if (jpeg2000_get_state(i, j, width, HT_SHIFT_SIGMA, block_states) != 0) {
> +                    jpeg2000_modify_state(i, j, width, 1 << HT_SHIFT_REF_IND, block_states);
> +                    *sp |= jpeg2000_import_magref_bit(&mag_ref, magref_segment, magref_length) << pLSB;
> +                }
> +            }
> +        }
> +        i_start += 4;
> +    }
> +    height = block_height % 4;
> +    for (int j = 0; j < width; j++) {
> +        for (int i = i_start; i < i_start + height; i++) {
> +            sp = &sample_buf[j + i * width];
> +            if (jpeg2000_get_state(i, j, width, HT_SHIFT_SIGMA, block_states) != 0) {
> +                jpeg2000_modify_state(i, j, width, 1 << HT_SHIFT_REF_IND, block_states);
> +                *sp |= jpeg2000_import_magref_bit(&mag_ref, magref_segment, magref_length) << pLSB;
> +            }
> +        }
> +    }
> +    return 1;
> +}
> +
> +
> +int
> +ff_jpeg2000_decode_htj2k(const Jpeg2000DecoderContext *s, Jpeg2000CodingStyle *codsty, Jpeg2000T1Context *t1, Jpeg2000Cblk *cblk,
> +                         int width, int height, int magp, uint8_t roi_shift)
> +{
> +    uint8_t p0 = 0; /* Number of placeholder passes. */
> +    uint32_t Lcup;  /* Length of HT cleanup segment. */
> +    uint32_t Lref;  /* Length of Refinement segment. */
> +    uint32_t Scup;  /* HT cleanup segment suffix length. */
> +    uint32_t Pcup;  /* HT cleanup segment prefix length. */
> +
> +    uint8_t S_blk;  /* Number of skipped magnitude bitplanes; */
> +    uint8_t pLSB;
> +
> +    uint8_t *Dcup; /* Byte of an HT cleanup segment. */
> +    uint8_t *Dref; /* Byte of an HT refinement segment. */
> +
> +    int z_blk;     /* Number of ht coding pass */
> +
> +    uint8_t empty_passes;
> +
> +    StateVars mag_sgn;  /* Magnitude and Sign */
> +    StateVars mel;      /* Adaptive run-length coding */
> +    StateVars vlc;      /* Variable Length coding */
> +    StateVars sig_prop; /* Significance propagation */
> +
> +    MelDecoderState mel_state;
> +
> +    int ret;
> +
> +    /* Temporary buffers */
> +    int32_t *sample_buf;
> +    uint8_t *block_states;
> +
> +    /* Post-processing */
> +    int32_t n, val;
> +
> +    int32_t M_b = magp;
> +    av_assert0(width <= 1024U && height <= 1024U);
> +    av_assert0(width * height <= 4096);
> +    av_assert0(width * height > 0);
> +
> +    memset(t1->data, 0, t1->stride * height * sizeof(*t1->data));
> +    memset(t1->flags, 0, t1->stride * (height + 2) * sizeof(*t1->flags));
> +
> +    if (cblk->npasses == 0)
> +        return 0;
> +
> +    if (cblk->npasses > 3)
> +        p0 = 0;
> +    else if (cblk->length == 0)
> +        p0 = 1;
> +
> +    empty_passes = p0 * 3;
> +    z_blk = cblk->npasses - empty_passes;
> +
> +    if (z_blk <= 0)
> +        /* no passes within this set, continue */
> +        return 0;
> +
> +    Lcup = cblk->pass_lengths[0];
> +    Lref = cblk->pass_lengths[1];
> +
> +    if (Lcup < 2) {
> +        av_log(s->avctx, AV_LOG_ERROR,
> +               "Cleanup pass length must be at least 2 bytes in length\n");
> +        return AVERROR_INVALIDDATA;
> +    }
> +    Dcup = cblk->data;
> +    /* Dref comes after the refinement segment. */
> +    Dref  = cblk->data + Lcup;
> +    S_blk = p0 + cblk->zbp;
> +    pLSB  = 30 - S_blk;
> +
> +    Scup = (Dcup[Lcup - 1] << 4) + (Dcup[Lcup - 2] & 0x0F);
> +
> +    if (Scup < 2 || Scup > Lcup || Scup > 4079) {
> +        av_log(s->avctx, AV_LOG_ERROR, "Cleanup pass suffix length is invalid %d\n",
> +               Scup);
> +        ret = AVERROR_INVALIDDATA;
> +        goto free;
> +    }
> +    Pcup = Lcup - Scup;
> +
> +    /* modDcup shall be done before the creation of vlc instance. */
> +    Dcup[Lcup - 1] = 0xFF;
> +    Dcup[Lcup - 2] |= 0x0F;
> +    /* Magnitude and refinement */
> +    jpeg2000_init_zero(&mag_sgn);
> +    jpeg2000_bitbuf_refill_forward(&mag_sgn, Dcup, Pcup);
> +    /* Significance propagation */
> +    jpeg2000_init_zero(&sig_prop);
> +    /* Adaptive run length */
> +    jpeg2000_init_mel(&mel, Pcup);
> +    /* Variable Length coding. */
> +    jpeg2000_init_vlc(&vlc, Lcup, Pcup, Dcup);
> +
> +    jpeg2000_init_mel_decoder(&mel_state);
> +
> +    sample_buf = av_calloc((width + 4) * (height + 4), sizeof(int32_t));
> +    block_states = av_calloc((width + 4) * (height + 4), sizeof(uint8_t));
> +
> +    if (!sample_buf || !block_states) {
> +        ret = AVERROR(ENOMEM);
> +        goto free;
> +    }
> +    if ((ret = jpeg2000_decode_ht_cleanup(s, cblk, t1, &mel_state, &mel, &vlc,
> +                                          &mag_sgn, Dcup, Lcup, Pcup, pLSB, width,
> +                                          height, sample_buf, block_states))
> +        < 0)
> +        goto free;
> +
> +    if (cblk->npasses > 1)
> +        jpeg2000_decode_sigprop(cblk, width, height, Dref, Lref,
> +                                pLSB - 1, sample_buf, block_states);
> +
> +    if (cblk->npasses > 2)
> +        if ((ret = jpeg2000_decode_magref(cblk, width, height, Dref, Lref,
> +                                          pLSB - 1, sample_buf, block_states))
> +            < 0)
> +            goto free;
> +
> +    pLSB = 31 - M_b;
> +    /* Reconstruct the values. */
> +    for (int y = 0; y < height; y++) {
> +        for (int x = 0; x < width; x++) {
> +            n = x + (y * t1->stride);
> +            val = sample_buf[x + (y * width)];
> +            /* Convert sign-magnitude to twos complement. */
> +            val = val >> 31 ? 0x80000000 - val : val;
> +            val >>= (pLSB - 1);
> +            t1->data[n] = val;
> +        }
> +    }
> +free:
> +    av_freep(&sample_buf);
> +    av_freep(&block_states);
> +    return ret;
> +}
> +
> +/**
> + * @brief  CtxVLC tables, borrowed from openhtj2k (https://github.com/osamu620/OpenHTJ2K) (credits to Osamu Watanabe)
> + */
> +static const uint16_t dec_CxtVLC_table1[1024] = {

This is inconsistent with our naming conventions.

> +        0x0016, 0x006A, 0x0046, 0x00DD, 0x0086, 0x888B, 0x0026, 0x444D, 0x0016, 0x00AA, 0x0046, 0x88AD, 0x0086,
> +        0x003A, 0x0026, 0x00DE, 0x0016, 0x00CA, 0x0046, 0x009D, 0x0086, 0x005A, 0x0026, 0x222D, 0x0016, 0x009A,
> +        0x0046, 0x007D, 0x0086, 0x01FD, 0x0026, 0x007E, 0x0016, 0x006A, 0x0046, 0x88CD, 0x0086, 0x888B, 0x0026,
> +        0x111D, 0x0016, 0x00AA, 0x0046, 0x005D, 0x0086, 0x003A, 0x0026, 0x00EE, 0x0016, 0x00CA, 0x0046, 0x00BD,
> +        0x0086, 0x005A, 0x0026, 0x11FF, 0x0016, 0x009A, 0x0046, 0x003D, 0x0086, 0x04ED, 0x0026, 0x2AAF, 0x0016,
> +        0x006A, 0x0046, 0x00DD, 0x0086, 0x888B, 0x0026, 0x444D, 0x0016, 0x00AA, 0x0046, 0x88AD, 0x0086, 0x003A,
> +        0x0026, 0x44EF, 0x0016, 0x00CA, 0x0046, 0x009D, 0x0086, 0x005A, 0x0026, 0x222D, 0x0016, 0x009A, 0x0046,
> +        0x007D, 0x0086, 0x01FD, 0x0026, 0x00BE, 0x0016, 0x006A, 0x0046, 0x88CD, 0x0086, 0x888B, 0x0026, 0x111D,
> +        0x0016, 0x00AA, 0x0046, 0x005D, 0x0086, 0x003A, 0x0026, 0x4CCF, 0x0016, 0x00CA, 0x0046, 0x00BD, 0x0086,
> +        0x005A, 0x0026, 0x00FE, 0x0016, 0x009A, 0x0046, 0x003D, 0x0086, 0x04ED, 0x0026, 0x006F, 0x0002, 0x0088,
> +        0x0002, 0x005C, 0x0002, 0x0018, 0x0002, 0x00DE, 0x0002, 0x0028, 0x0002, 0x009C, 0x0002, 0x004A, 0x0002,
> +        0x007E, 0x0002, 0x0088, 0x0002, 0x00CC, 0x0002, 0x0018, 0x0002, 0x888F, 0x0002, 0x0028, 0x0002, 0x00FE,
> +        0x0002, 0x003A, 0x0002, 0x222F, 0x0002, 0x0088, 0x0002, 0x04FD, 0x0002, 0x0018, 0x0002, 0x00BE, 0x0002,
> +        0x0028, 0x0002, 0x00BF, 0x0002, 0x004A, 0x0002, 0x006E, 0x0002, 0x0088, 0x0002, 0x00AC, 0x0002, 0x0018,
> +        0x0002, 0x444F, 0x0002, 0x0028, 0x0002, 0x00EE, 0x0002, 0x003A, 0x0002, 0x113F, 0x0002, 0x0088, 0x0002,
> +        0x005C, 0x0002, 0x0018, 0x0002, 0x00CF, 0x0002, 0x0028, 0x0002, 0x009C, 0x0002, 0x004A, 0x0002, 0x006F,
> +        0x0002, 0x0088, 0x0002, 0x00CC, 0x0002, 0x0018, 0x0002, 0x009F, 0x0002, 0x0028, 0x0002, 0x00EF, 0x0002,
> +        0x003A, 0x0002, 0x233F, 0x0002, 0x0088, 0x0002, 0x04FD, 0x0002, 0x0018, 0x0002, 0x00AF, 0x0002, 0x0028,
> +        0x0002, 0x44FF, 0x0002, 0x004A, 0x0002, 0x005F, 0x0002, 0x0088, 0x0002, 0x00AC, 0x0002, 0x0018, 0x0002,
> +        0x007F, 0x0002, 0x0028, 0x0002, 0x00DF, 0x0002, 0x003A, 0x0002, 0x111F, 0x0002, 0x0028, 0x0002, 0x005C,
> +        0x0002, 0x008A, 0x0002, 0x00BF, 0x0002, 0x0018, 0x0002, 0x00FE, 0x0002, 0x00CC, 0x0002, 0x007E, 0x0002,
> +        0x0028, 0x0002, 0x8FFF, 0x0002, 0x004A, 0x0002, 0x007F, 0x0002, 0x0018, 0x0002, 0x00DF, 0x0002, 0x00AC,
> +        0x0002, 0x133F, 0x0002, 0x0028, 0x0002, 0x222D, 0x0002, 0x008A, 0x0002, 0x00BE, 0x0002, 0x0018, 0x0002,
> +        0x44EF, 0x0002, 0x2AAD, 0x0002, 0x006E, 0x0002, 0x0028, 0x0002, 0x15FF, 0x0002, 0x004A, 0x0002, 0x009E,
> +        0x0002, 0x0018, 0x0002, 0x00CF, 0x0002, 0x003C, 0x0002, 0x223F, 0x0002, 0x0028, 0x0002, 0x005C, 0x0002,
> +        0x008A, 0x0002, 0x2BBF, 0x0002, 0x0018, 0x0002, 0x04EF, 0x0002, 0x00CC, 0x0002, 0x006F, 0x0002, 0x0028,
> +        0x0002, 0x27FF, 0x0002, 0x004A, 0x0002, 0x009F, 0x0002, 0x0018, 0x0002, 0x00DE, 0x0002, 0x00AC, 0x0002,
> +        0x444F, 0x0002, 0x0028, 0x0002, 0x222D, 0x0002, 0x008A, 0x0002, 0x8AAF, 0x0002, 0x0018, 0x0002, 0x00EE,
> +        0x0002, 0x2AAD, 0x0002, 0x005F, 0x0002, 0x0028, 0x0002, 0x44FF, 0x0002, 0x004A, 0x0002, 0x888F, 0x0002,
> +        0x0018, 0x0002, 0xAAAF, 0x0002, 0x003C, 0x0002, 0x111F, 0x0004, 0x8FFD, 0x0028, 0x005C, 0x0004, 0x00BC,
> +        0x008A, 0x66FF, 0x0004, 0x00CD, 0x0018, 0x111D, 0x0004, 0x009C, 0x003A, 0x8AAF, 0x0004, 0x00FC, 0x0028,
> +        0x133D, 0x0004, 0x00AC, 0x004A, 0x3BBF, 0x0004, 0x2BBD, 0x0018, 0x5FFF, 0x0004, 0x006C, 0x157D, 0x455F,
> +        0x0004, 0x2FFD, 0x0028, 0x222D, 0x0004, 0x22AD, 0x008A, 0x44EF, 0x0004, 0x00CC, 0x0018, 0x4FFF, 0x0004,
> +        0x007C, 0x003A, 0x447F, 0x0004, 0x04DD, 0x0028, 0x233D, 0x0004, 0x009D, 0x004A, 0x00DE, 0x0004, 0x88BD,
> +        0x0018, 0xAFFF, 0x0004, 0x115D, 0x1FFD, 0x444F, 0x0004, 0x8FFD, 0x0028, 0x005C, 0x0004, 0x00BC, 0x008A,
> +        0x8CEF, 0x0004, 0x00CD, 0x0018, 0x111D, 0x0004, 0x009C, 0x003A, 0x888F, 0x0004, 0x00FC, 0x0028, 0x133D,
> +        0x0004, 0x00AC, 0x004A, 0x44DF, 0x0004, 0x2BBD, 0x0018, 0x8AFF, 0x0004, 0x006C, 0x157D, 0x006F, 0x0004,
> +        0x2FFD, 0x0028, 0x222D, 0x0004, 0x22AD, 0x008A, 0x00EE, 0x0004, 0x00CC, 0x0018, 0x2EEF, 0x0004, 0x007C,
> +        0x003A, 0x277F, 0x0004, 0x04DD, 0x0028, 0x233D, 0x0004, 0x009D, 0x004A, 0x1BBF, 0x0004, 0x88BD, 0x0018,
> +        0x37FF, 0x0004, 0x115D, 0x1FFD, 0x333F, 0x0002, 0x0088, 0x0002, 0x02ED, 0x0002, 0x00CA, 0x0002, 0x4CCF,
> +        0x0002, 0x0048, 0x0002, 0x23FF, 0x0002, 0x001A, 0x0002, 0x888F, 0x0002, 0x0088, 0x0002, 0x006C, 0x0002,
> +        0x002A, 0x0002, 0x00AF, 0x0002, 0x0048, 0x0002, 0x22EF, 0x0002, 0x00AC, 0x0002, 0x005F, 0x0002, 0x0088,
> +        0x0002, 0x444D, 0x0002, 0x00CA, 0x0002, 0xCCCF, 0x0002, 0x0048, 0x0002, 0x00FE, 0x0002, 0x001A, 0x0002,
> +        0x006F, 0x0002, 0x0088, 0x0002, 0x005C, 0x0002, 0x002A, 0x0002, 0x009F, 0x0002, 0x0048, 0x0002, 0x00DF,
> +        0x0002, 0x03FD, 0x0002, 0x222F, 0x0002, 0x0088, 0x0002, 0x02ED, 0x0002, 0x00CA, 0x0002, 0x8CCF, 0x0002,
> +        0x0048, 0x0002, 0x11FF, 0x0002, 0x001A, 0x0002, 0x007E, 0x0002, 0x0088, 0x0002, 0x006C, 0x0002, 0x002A,
> +        0x0002, 0x007F, 0x0002, 0x0048, 0x0002, 0x00EE, 0x0002, 0x00AC, 0x0002, 0x003E, 0x0002, 0x0088, 0x0002,
> +        0x444D, 0x0002, 0x00CA, 0x0002, 0x00BE, 0x0002, 0x0048, 0x0002, 0x00BF, 0x0002, 0x001A, 0x0002, 0x003F,
> +        0x0002, 0x0088, 0x0002, 0x005C, 0x0002, 0x002A, 0x0002, 0x009E, 0x0002, 0x0048, 0x0002, 0x00DE, 0x0002,
> +        0x03FD, 0x0002, 0x111F, 0x0004, 0x8AED, 0x0048, 0x888D, 0x0004, 0x00DC, 0x00CA, 0x3FFF, 0x0004, 0xCFFD,
> +        0x002A, 0x003D, 0x0004, 0x00BC, 0x005A, 0x8DDF, 0x0004, 0x8FFD, 0x0048, 0x006C, 0x0004, 0x027D, 0x008A,
> +        0x99FF, 0x0004, 0x00EC, 0x00FA, 0x003C, 0x0004, 0x00AC, 0x001A, 0x009F, 0x0004, 0x2FFD, 0x0048, 0x007C,
> +        0x0004, 0x44CD, 0x00CA, 0x67FF, 0x0004, 0x1FFD, 0x002A, 0x444D, 0x0004, 0x00AD, 0x005A, 0x8CCF, 0x0004,
> +        0x4FFD, 0x0048, 0x445D, 0x0004, 0x01BD, 0x008A, 0x4EEF, 0x0004, 0x45DD, 0x00FA, 0x111D, 0x0004, 0x009C,
> +        0x001A, 0x222F, 0x0004, 0x8AED, 0x0048, 0x888D, 0x0004, 0x00DC, 0x00CA, 0xAFFF, 0x0004, 0xCFFD, 0x002A,
> +        0x003D, 0x0004, 0x00BC, 0x005A, 0x11BF, 0x0004, 0x8FFD, 0x0048, 0x006C, 0x0004, 0x027D, 0x008A, 0x22EF,
> +        0x0004, 0x00EC, 0x00FA, 0x003C, 0x0004, 0x00AC, 0x001A, 0x227F, 0x0004, 0x2FFD, 0x0048, 0x007C, 0x0004,
> +        0x44CD, 0x00CA, 0x5DFF, 0x0004, 0x1FFD, 0x002A, 0x444D, 0x0004, 0x00AD, 0x005A, 0x006F, 0x0004, 0x4FFD,
> +        0x0048, 0x445D, 0x0004, 0x01BD, 0x008A, 0x11DF, 0x0004, 0x45DD, 0x00FA, 0x111D, 0x0004, 0x009C, 0x001A,
> +        0x155F, 0x0006, 0x00FC, 0x0018, 0x111D, 0x0048, 0x888D, 0x00AA, 0x4DDF, 0x0006, 0x2AAD, 0x005A, 0x67FF,
> +        0x0028, 0x223D, 0x00BC, 0xAAAF, 0x0006, 0x00EC, 0x0018, 0x5FFF, 0x0048, 0x006C, 0x008A, 0xCCCF, 0x0006,
> +        0x009D, 0x00CA, 0x44EF, 0x0028, 0x003C, 0x8FFD, 0x137F, 0x0006, 0x8EED, 0x0018, 0x1FFF, 0x0048, 0x007C,
> +        0x00AA, 0x4CCF, 0x0006, 0x227D, 0x005A, 0x1DDF, 0x0028, 0x444D, 0x4FFD, 0x155F, 0x0006, 0x00DC, 0x0018,
> +        0x2EEF, 0x0048, 0x445D, 0x008A, 0x22BF, 0x0006, 0x009C, 0x00CA, 0x8CDF, 0x0028, 0x222D, 0x2FFD, 0x226F,
> +        0x0006, 0x00FC, 0x0018, 0x111D, 0x0048, 0x888D, 0x00AA, 0x1BBF, 0x0006, 0x2AAD, 0x005A, 0x33FF, 0x0028,
> +        0x223D, 0x00BC, 0x8AAF, 0x0006, 0x00EC, 0x0018, 0x9BFF, 0x0048, 0x006C, 0x008A, 0x8ABF, 0x0006, 0x009D,
> +        0x00CA, 0x4EEF, 0x0028, 0x003C, 0x8FFD, 0x466F, 0x0006, 0x8EED, 0x0018, 0xCFFF, 0x0048, 0x007C, 0x00AA,
> +        0x8CCF, 0x0006, 0x227D, 0x005A, 0xAEEF, 0x0028, 0x444D, 0x4FFD, 0x477F, 0x0006, 0x00DC, 0x0018, 0xAFFF,
> +        0x0048, 0x445D, 0x008A, 0x2BBF, 0x0006, 0x009C, 0x00CA, 0x44DF, 0x0028, 0x222D, 0x2FFD, 0x133F, 0x00F6,
> +        0xAFFD, 0x1FFB, 0x003C, 0x0008, 0x23BD, 0x007A, 0x11DF, 0x00F6, 0x45DD, 0x2FFB, 0x4EEF, 0x00DA, 0x177D,
> +        0xCFFD, 0x377F, 0x00F6, 0x3FFD, 0x8FFB, 0x111D, 0x0008, 0x009C, 0x005A, 0x1BBF, 0x00F6, 0x00CD, 0x00BA,
> +        0x8DDF, 0x4FFB, 0x006C, 0x9BFD, 0x455F, 0x00F6, 0x67FD, 0x1FFB, 0x002C, 0x0008, 0x00AC, 0x007A, 0x009F,
> +        0x00F6, 0x00AD, 0x2FFB, 0x7FFF, 0x00DA, 0x004C, 0x5FFD, 0x477F, 0x00F6, 0x00EC, 0x8FFB, 0x001C, 0x0008,
> +        0x008C, 0x005A, 0x888F, 0x00F6, 0x00CC, 0x00BA, 0x2EEF, 0x4FFB, 0x115D, 0x8AED, 0x113F, 0x00F6, 0xAFFD,
> +        0x1FFB, 0x003C, 0x0008, 0x23BD, 0x007A, 0x1DDF, 0x00F6, 0x45DD, 0x2FFB, 0xBFFF, 0x00DA, 0x177D, 0xCFFD,
> +        0x447F, 0x00F6, 0x3FFD, 0x8FFB, 0x111D, 0x0008, 0x009C, 0x005A, 0x277F, 0x00F6, 0x00CD, 0x00BA, 0x22EF,
> +        0x4FFB, 0x006C, 0x9BFD, 0x444F, 0x00F6, 0x67FD, 0x1FFB, 0x002C, 0x0008, 0x00AC, 0x007A, 0x11BF, 0x00F6,
> +        0x00AD, 0x2FFB, 0xFFFF, 0x00DA, 0x004C, 0x5FFD, 0x233F, 0x00F6, 0x00EC, 0x8FFB, 0x001C, 0x0008, 0x008C,
> +        0x005A, 0x006F, 0x00F6, 0x00CC, 0x00BA, 0x8BBF, 0x4FFB, 0x115D, 0x8AED, 0x222F};
> +
> +static const uint16_t dec_CxtVLC_table0[1024] = {
> +        0x0026, 0x00AA, 0x0046, 0x006C, 0x0086, 0x8AED, 0x0018, 0x8DDF, 0x0026, 0x01BD, 0x0046, 0x5FFF, 0x0086,
> +        0x027D, 0x005A, 0x155F, 0x0026, 0x003A, 0x0046, 0x444D, 0x0086, 0x4CCD, 0x0018, 0xCCCF, 0x0026, 0x2EFD,
> +        0x0046, 0x99FF, 0x0086, 0x009C, 0x00CA, 0x133F, 0x0026, 0x00AA, 0x0046, 0x445D, 0x0086, 0x8CCD, 0x0018,
> +        0x11DF, 0x0026, 0x4FFD, 0x0046, 0xCFFF, 0x0086, 0x009D, 0x005A, 0x007E, 0x0026, 0x003A, 0x0046, 0x1FFF,
> +        0x0086, 0x88AD, 0x0018, 0x00BE, 0x0026, 0x8FFD, 0x0046, 0x4EEF, 0x0086, 0x888D, 0x00CA, 0x111F, 0x0026,
> +        0x00AA, 0x0046, 0x006C, 0x0086, 0x8AED, 0x0018, 0x45DF, 0x0026, 0x01BD, 0x0046, 0x22EF, 0x0086, 0x027D,
> +        0x005A, 0x227F, 0x0026, 0x003A, 0x0046, 0x444D, 0x0086, 0x4CCD, 0x0018, 0x11BF, 0x0026, 0x2EFD, 0x0046,
> +        0x00FE, 0x0086, 0x009C, 0x00CA, 0x223F, 0x0026, 0x00AA, 0x0046, 0x445D, 0x0086, 0x8CCD, 0x0018, 0x00DE,
> +        0x0026, 0x4FFD, 0x0046, 0xABFF, 0x0086, 0x009D, 0x005A, 0x006F, 0x0026, 0x003A, 0x0046, 0x6EFF, 0x0086,
> +        0x88AD, 0x0018, 0x2AAF, 0x0026, 0x8FFD, 0x0046, 0x00EE, 0x0086, 0x888D, 0x00CA, 0x222F, 0x0004, 0x00CA,
> +        0x0088, 0x027D, 0x0004, 0x4CCD, 0x0028, 0x00FE, 0x0004, 0x2AFD, 0x0048, 0x005C, 0x0004, 0x009D, 0x0018,
> +        0x00DE, 0x0004, 0x01BD, 0x0088, 0x006C, 0x0004, 0x88AD, 0x0028, 0x11DF, 0x0004, 0x8AED, 0x0048, 0x003C,
> +        0x0004, 0x888D, 0x0018, 0x111F, 0x0004, 0x00CA, 0x0088, 0x006D, 0x0004, 0x88CD, 0x0028, 0x88FF, 0x0004,
> +        0x8BFD, 0x0048, 0x444D, 0x0004, 0x009C, 0x0018, 0x00BE, 0x0004, 0x4EFD, 0x0088, 0x445D, 0x0004, 0x00AC,
> +        0x0028, 0x00EE, 0x0004, 0x45DD, 0x0048, 0x222D, 0x0004, 0x003D, 0x0018, 0x007E, 0x0004, 0x00CA, 0x0088,
> +        0x027D, 0x0004, 0x4CCD, 0x0028, 0x1FFF, 0x0004, 0x2AFD, 0x0048, 0x005C, 0x0004, 0x009D, 0x0018, 0x11BF,
> +        0x0004, 0x01BD, 0x0088, 0x006C, 0x0004, 0x88AD, 0x0028, 0x22EF, 0x0004, 0x8AED, 0x0048, 0x003C, 0x0004,
> +        0x888D, 0x0018, 0x227F, 0x0004, 0x00CA, 0x0088, 0x006D, 0x0004, 0x88CD, 0x0028, 0x4EEF, 0x0004, 0x8BFD,
> +        0x0048, 0x444D, 0x0004, 0x009C, 0x0018, 0x2AAF, 0x0004, 0x4EFD, 0x0088, 0x445D, 0x0004, 0x00AC, 0x0028,
> +        0x8DDF, 0x0004, 0x45DD, 0x0048, 0x222D, 0x0004, 0x003D, 0x0018, 0x155F, 0x0004, 0x005A, 0x0088, 0x006C,
> +        0x0004, 0x88DD, 0x0028, 0x23FF, 0x0004, 0x11FD, 0x0048, 0x444D, 0x0004, 0x00AD, 0x0018, 0x00BE, 0x0004,
> +        0x137D, 0x0088, 0x155D, 0x0004, 0x00CC, 0x0028, 0x00DE, 0x0004, 0x02ED, 0x0048, 0x111D, 0x0004, 0x009D,
> +        0x0018, 0x007E, 0x0004, 0x005A, 0x0088, 0x455D, 0x0004, 0x44CD, 0x0028, 0x00EE, 0x0004, 0x1FFD, 0x0048,
> +        0x003C, 0x0004, 0x00AC, 0x0018, 0x555F, 0x0004, 0x47FD, 0x0088, 0x113D, 0x0004, 0x02BD, 0x0028, 0x477F,
> +        0x0004, 0x4CDD, 0x0048, 0x8FFF, 0x0004, 0x009C, 0x0018, 0x222F, 0x0004, 0x005A, 0x0088, 0x006C, 0x0004,
> +        0x88DD, 0x0028, 0x00FE, 0x0004, 0x11FD, 0x0048, 0x444D, 0x0004, 0x00AD, 0x0018, 0x888F, 0x0004, 0x137D,
> +        0x0088, 0x155D, 0x0004, 0x00CC, 0x0028, 0x8CCF, 0x0004, 0x02ED, 0x0048, 0x111D, 0x0004, 0x009D, 0x0018,
> +        0x006F, 0x0004, 0x005A, 0x0088, 0x455D, 0x0004, 0x44CD, 0x0028, 0x1DDF, 0x0004, 0x1FFD, 0x0048, 0x003C,
> +        0x0004, 0x00AC, 0x0018, 0x227F, 0x0004, 0x47FD, 0x0088, 0x113D, 0x0004, 0x02BD, 0x0028, 0x22BF, 0x0004,
> +        0x4CDD, 0x0048, 0x22EF, 0x0004, 0x009C, 0x0018, 0x233F, 0x0006, 0x4DDD, 0x4FFB, 0xCFFF, 0x0018, 0x113D,
> +        0x005A, 0x888F, 0x0006, 0x23BD, 0x008A, 0x00EE, 0x002A, 0x155D, 0xAAFD, 0x277F, 0x0006, 0x44CD, 0x8FFB,
> +        0x44EF, 0x0018, 0x467D, 0x004A, 0x2AAF, 0x0006, 0x00AC, 0x555B, 0x99DF, 0x1FFB, 0x003C, 0x5FFD, 0x266F,
> +        0x0006, 0x1DDD, 0x4FFB, 0x6EFF, 0x0018, 0x177D, 0x005A, 0x1BBF, 0x0006, 0x88AD, 0x008A, 0x5DDF, 0x002A,
> +        0x444D, 0x2FFD, 0x667F, 0x0006, 0x00CC, 0x8FFB, 0x2EEF, 0x0018, 0x455D, 0x004A, 0x119F, 0x0006, 0x009C,
> +        0x555B, 0x8CCF, 0x1FFB, 0x111D, 0x8CED, 0x006E, 0x0006, 0x4DDD, 0x4FFB, 0x3FFF, 0x0018, 0x113D, 0x005A,
> +        0x11BF, 0x0006, 0x23BD, 0x008A, 0x8DDF, 0x002A, 0x155D, 0xAAFD, 0x222F, 0x0006, 0x44CD, 0x8FFB, 0x00FE,
> +        0x0018, 0x467D, 0x004A, 0x899F, 0x0006, 0x00AC, 0x555B, 0x00DE, 0x1FFB, 0x003C, 0x5FFD, 0x446F, 0x0006,
> +        0x1DDD, 0x4FFB, 0x9BFF, 0x0018, 0x177D, 0x005A, 0x00BE, 0x0006, 0x88AD, 0x008A, 0xCDDF, 0x002A, 0x444D,
> +        0x2FFD, 0x007E, 0x0006, 0x00CC, 0x8FFB, 0x4EEF, 0x0018, 0x455D, 0x004A, 0x377F, 0x0006, 0x009C, 0x555B,
> +        0x8BBF, 0x1FFB, 0x111D, 0x8CED, 0x233F, 0x0004, 0x00AA, 0x0088, 0x047D, 0x0004, 0x01DD, 0x0028, 0x11DF,
> +        0x0004, 0x27FD, 0x0048, 0x005C, 0x0004, 0x8AAD, 0x0018, 0x2BBF, 0x0004, 0x009C, 0x0088, 0x006C, 0x0004,
> +        0x00CC, 0x0028, 0x00EE, 0x0004, 0x8CED, 0x0048, 0x222D, 0x0004, 0x888D, 0x0018, 0x007E, 0x0004, 0x00AA,
> +        0x0088, 0x006D, 0x0004, 0x88CD, 0x0028, 0x00FE, 0x0004, 0x19FD, 0x0048, 0x003C, 0x0004, 0x2AAD, 0x0018,
> +        0xAAAF, 0x0004, 0x8BFD, 0x0088, 0x005D, 0x0004, 0x00BD, 0x0028, 0x4CCF, 0x0004, 0x44ED, 0x0048, 0x4FFF,
> +        0x0004, 0x223D, 0x0018, 0x111F, 0x0004, 0x00AA, 0x0088, 0x047D, 0x0004, 0x01DD, 0x0028, 0x99FF, 0x0004,
> +        0x27FD, 0x0048, 0x005C, 0x0004, 0x8AAD, 0x0018, 0x00BE, 0x0004, 0x009C, 0x0088, 0x006C, 0x0004, 0x00CC,
> +        0x0028, 0x00DE, 0x0004, 0x8CED, 0x0048, 0x222D, 0x0004, 0x888D, 0x0018, 0x444F, 0x0004, 0x00AA, 0x0088,
> +        0x006D, 0x0004, 0x88CD, 0x0028, 0x2EEF, 0x0004, 0x19FD, 0x0048, 0x003C, 0x0004, 0x2AAD, 0x0018, 0x447F,
> +        0x0004, 0x8BFD, 0x0088, 0x005D, 0x0004, 0x00BD, 0x0028, 0x009F, 0x0004, 0x44ED, 0x0048, 0x67FF, 0x0004,
> +        0x223D, 0x0018, 0x133F, 0x0006, 0x00CC, 0x008A, 0x9DFF, 0x2FFB, 0x467D, 0x1FFD, 0x99BF, 0x0006, 0x2AAD,
> +        0x002A, 0x66EF, 0x4FFB, 0x005C, 0x2EED, 0x377F, 0x0006, 0x89BD, 0x004A, 0x00FE, 0x8FFB, 0x006C, 0x67FD,
> +        0x889F, 0x0006, 0x888D, 0x001A, 0x5DDF, 0x00AA, 0x222D, 0x89DD, 0x444F, 0x0006, 0x2BBD, 0x008A, 0xCFFF,
> +        0x2FFB, 0x226D, 0x009C, 0x00BE, 0x0006, 0xAAAD, 0x002A, 0x1DDF, 0x4FFB, 0x003C, 0x4DDD, 0x466F, 0x0006,
> +        0x8AAD, 0x004A, 0xAEEF, 0x8FFB, 0x445D, 0x8EED, 0x177F, 0x0006, 0x233D, 0x001A, 0x4CCF, 0x00AA, 0xAFFF,
> +        0x88CD, 0x133F, 0x0006, 0x00CC, 0x008A, 0x77FF, 0x2FFB, 0x467D, 0x1FFD, 0x3BBF, 0x0006, 0x2AAD, 0x002A,
> +        0x00EE, 0x4FFB, 0x005C, 0x2EED, 0x007E, 0x0006, 0x89BD, 0x004A, 0x4EEF, 0x8FFB, 0x006C, 0x67FD, 0x667F,
> +        0x0006, 0x888D, 0x001A, 0x00DE, 0x00AA, 0x222D, 0x89DD, 0x333F, 0x0006, 0x2BBD, 0x008A, 0x57FF, 0x2FFB,
> +        0x226D, 0x009C, 0x199F, 0x0006, 0xAAAD, 0x002A, 0x99DF, 0x4FFB, 0x003C, 0x4DDD, 0x155F, 0x0006, 0x8AAD,
> +        0x004A, 0xCEEF, 0x8FFB, 0x445D, 0x8EED, 0x277F, 0x0006, 0x233D, 0x001A, 0x1BBF, 0x00AA, 0x3FFF, 0x88CD,
> +        0x111F, 0x0006, 0x45DD, 0x2FFB, 0x111D, 0x0018, 0x467D, 0x8FFD, 0xCCCF, 0x0006, 0x19BD, 0x004A, 0x22EF,
> +        0x002A, 0x222D, 0x3FFD, 0x888F, 0x0006, 0x00CC, 0x008A, 0x00FE, 0x0018, 0x115D, 0xCFFD, 0x8AAF, 0x0006,
> +        0x00AC, 0x003A, 0x8CDF, 0x1FFB, 0x133D, 0x66FD, 0x466F, 0x0006, 0x8CCD, 0x2FFB, 0x5FFF, 0x0018, 0x006C,
> +        0x4FFD, 0xABBF, 0x0006, 0x22AD, 0x004A, 0x00EE, 0x002A, 0x233D, 0xAEFD, 0x377F, 0x0006, 0x2BBD, 0x008A,
> +        0x55DF, 0x0018, 0x005C, 0x177D, 0x119F, 0x0006, 0x009C, 0x003A, 0x4CCF, 0x1FFB, 0x333D, 0x8EED, 0x444F,
> +        0x0006, 0x45DD, 0x2FFB, 0x111D, 0x0018, 0x467D, 0x8FFD, 0x99BF, 0x0006, 0x19BD, 0x004A, 0x2EEF, 0x002A,
> +        0x222D, 0x3FFD, 0x667F, 0x0006, 0x00CC, 0x008A, 0x4EEF, 0x0018, 0x115D, 0xCFFD, 0x899F, 0x0006, 0x00AC,
> +        0x003A, 0x00DE, 0x1FFB, 0x133D, 0x66FD, 0x226F, 0x0006, 0x8CCD, 0x2FFB, 0x9BFF, 0x0018, 0x006C, 0x4FFD,
> +        0x00BE, 0x0006, 0x22AD, 0x004A, 0x1DDF, 0x002A, 0x233D, 0xAEFD, 0x007E, 0x0006, 0x2BBD, 0x008A, 0xCEEF,
> +        0x0018, 0x005C, 0x177D, 0x277F, 0x0006, 0x009C, 0x003A, 0x8BBF, 0x1FFB, 0x333D, 0x8EED, 0x455F, 0x1FF9,
> +        0x1DDD, 0xAFFB, 0x00DE, 0x8FF9, 0x001C, 0xFFFB, 0x477F, 0x4FF9, 0x177D, 0x3FFB, 0x3BBF, 0x2FF9, 0xAEEF,
> +        0x8EED, 0x444F, 0x1FF9, 0x22AD, 0x000A, 0x8BBF, 0x8FF9, 0x00FE, 0xCFFD, 0x007E, 0x4FF9, 0x115D, 0x5FFB,
> +        0x577F, 0x2FF9, 0x8DDF, 0x2EED, 0x333F, 0x1FF9, 0x2BBD, 0xAFFB, 0x88CF, 0x8FF9, 0xBFFF, 0xFFFB, 0x377F,
> +        0x4FF9, 0x006D, 0x3FFB, 0x00BE, 0x2FF9, 0x66EF, 0x9FFD, 0x133F, 0x1FF9, 0x009D, 0x000A, 0xABBF, 0x8FF9,
> +        0xDFFF, 0x6FFD, 0x006E, 0x4FF9, 0x002C, 0x5FFB, 0x888F, 0x2FF9, 0xCDDF, 0x4DDD, 0x222F, 0x1FF9, 0x1DDD,
> +        0xAFFB, 0x4CCF, 0x8FF9, 0x001C, 0xFFFB, 0x277F, 0x4FF9, 0x177D, 0x3FFB, 0x99BF, 0x2FF9, 0xCEEF, 0x8EED,
> +        0x004E, 0x1FF9, 0x22AD, 0x000A, 0x00AE, 0x8FF9, 0x7FFF, 0xCFFD, 0x005E, 0x4FF9, 0x115D, 0x5FFB, 0x009E,
> +        0x2FF9, 0x5DDF, 0x2EED, 0x003E, 0x1FF9, 0x2BBD, 0xAFFB, 0x00CE, 0x8FF9, 0xEFFF, 0xFFFB, 0x667F, 0x4FF9,
> +        0x006D, 0x3FFB, 0x8AAF, 0x2FF9, 0x00EE, 0x9FFD, 0x233F, 0x1FF9, 0x009D, 0x000A, 0x1BBF, 0x8FF9, 0x4EEF,
> +        0x6FFD, 0x455F, 0x4FF9, 0x002C, 0x5FFB, 0x008E, 0x2FF9, 0x99DF, 0x4DDD, 0x111F};
> diff --git a/libavcodec/jpeg2000htdec.h b/libavcodec/jpeg2000htdec.h
> new file mode 100644
> index 0000000000..62cf1450c2
> --- /dev/null
> +++ b/libavcodec/jpeg2000htdec.h
> @@ -0,0 +1,28 @@
> +/*
> +* JPEG2000 High Throughput block decoder
> +* Copyright (c) 2022 Caleb Etemesi <etemesicaleb@gmail.com>
> +*
> +* This file is part of FFmpeg.
> +*
> +* FFmpeg is free software; you can redistribute it and/or
> +* modify it under the terms of the GNU Lesser General Public
> +* License as published by the Free Software Foundation; either
> +* version 2.1 of the License, or (at your option) any later version.
> +*
> +* FFmpeg is distributed in the hope that it will be useful,
> +* but WITHOUT ANY WARRANTY; without even the implied warranty of
> +* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
> +* Lesser General Public License for more details.
> +*
> +* You should have received a copy of the GNU Lesser General Public
> +* License along with FFmpeg; if not, write to the Free Software
> +* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
> + */
> +#ifndef AVCODEC_JPEG2000HTDEC_H
> +#define AVCODEC_JPEG2000HTDEC_H
> +
> +#include "jpeg2000dec.h"
> +
> +int ff_jpeg2000_decode_htj2k(const Jpeg2000DecoderContext *s, Jpeg2000CodingStyle *codsty, Jpeg2000T1Context *t1, Jpeg2000Cblk *cblk, int width, int height, int magp, uint8_t roi_shift);
> +

jpeg2000dec.c seems to be the only intended user of this function, so
one could make a header/template out of jpeg2000htdec.c and include this
in jpeg2000dec.c. This would allow to make this function static and
would avoid exposing the jpeg2000dec.c structures via a header.

> +#endif /* AVCODEC_JPEG2000HTDEC_H */
Caleb Etemesi Dec. 16, 2022, 3:41 p.m. UTC | #2
Hi Andreas, thanks for the review,

> Use "" instead of <>.

Will do

> I really don't like that you are adding a new bitreader here. Can you
explain why this is supposed to be necessary?

The reasons are:
1. The spec has some byte streams read from back to front,I didn't see that
functionality present in get_bits.h(which I assumed contained the bit
reading API).
2. It doesn't handle unstuffing correctly.
3. Doesn't handle EOB correctly, the spec has some arrays when there are no
more bytes, the bit-reader should be  filled with 0, others with 0xFF.

> Generally, it is not ok to read array[-1] even if the result is that is
> vmasked away later. It is ok if array actually points into the middle of
> a buffer, so that array[-1] and the other bytes accessed are actually
> part of the buffer. Your remark about MEL bytestream indicates that this
> might be the case here, but I'm not sure.

Hi that is the case, the stream in question is a backward read array, hence
array[-1] would point to the last byte in the MEL
byte stream which is forward read, the paper on HTJ2K @
https://www.htj2k.com/wp-content/uploads/white-paper.pdf on page 20 shows
an image description
of how the bytes are arranged.

> Branchlessly? I see four branches below.

Sorry stray comment, will remove

> In case buffer->pos + 1 < 4, position + 1 < 0 and therefore the next
> lines will read array[-1]. And given that you are using FFMIN, there is
> an implicit branch anyway. I suggest you use AV_RB32 to read only from a
> position that is known to be in the buffer and then shift unwanted bytes
> away.
This array[-1] covered by the MEL bytestream, but a solution would be to
have a check to see if we are close to the end

Didn't know about AV_RB32, replaced the manual shifts with it.

> Are you sure you want to have <= instead of < here? This could read
> length + 1 bytes of array.

Yes, this is that way because of a bug Tomas Hardin found in the first
iteration of the patches.

> Why is this av_noinline? It is normally good, to inline functions only
> called once.

Will change.

> Why does a function called jpeg2000_decode_ht_cleanup perform
> allocations? And why all the other stuff below?

It decodes the cleanup segment, it's not a cleanup function.

> Could use a smaller scope.
Hi what exactly ?  the whole code below that?

> This is inconsistent with our naming conventions.
Hi, what section of the naming conventions?

> jpeg2000dec.c seems to be the only intended user of this function, so
> one could make a header/template out of jpeg2000htdec.c and include this
> in jpeg2000dec.c. This would allow to make this function static and
> would avoid exposing the jpeg2000dec.c structures via a header.

Sounds like a good idea,

On Fri, Dec 16, 2022 at 3:29 AM Andreas Rheinhardt <
andreas.rheinhardt@outlook.com> wrote:

> etemesicaleb@gmail.com:
> > From: caleb <etemesicaleb@gmail.com>
> >
> > This is a revised patch with suggested changes from earlier and
> satisfies the Google Summer of
> > Code 2022 FFmpeg project to add a HTJ2K decoder to FFmpeg.
> >
> > ---
> >  libavcodec/Makefile        |    2 +-
> >  libavcodec/jpeg2000.h      |    3 +
> >  libavcodec/jpeg2000dec.c   |   69 +-
> >  libavcodec/jpeg2000dec.h   |    2 +
> >  libavcodec/jpeg2000htdec.c | 1441 ++++++++++++++++++++++++++++++++++++
> >  libavcodec/jpeg2000htdec.h |   28 +
> >  6 files changed, 1529 insertions(+), 16 deletions(-)
> >  create mode 100644 libavcodec/jpeg2000htdec.c
> >  create mode 100644 libavcodec/jpeg2000htdec.h
> >
> > diff --git a/libavcodec/Makefile b/libavcodec/Makefile
> > index 98841ed07c..1ba9e09528 100644
> > --- a/libavcodec/Makefile
> > +++ b/libavcodec/Makefile
> > @@ -464,7 +464,7 @@ OBJS-$(CONFIG_JACOSUB_DECODER)         +=
> jacosubdec.o ass.o
> >  OBJS-$(CONFIG_JPEG2000_ENCODER)        += j2kenc.o mqcenc.o mqc.o
> jpeg2000.o \
> >                                            jpeg2000dwt.o
> >  OBJS-$(CONFIG_JPEG2000_DECODER)        += jpeg2000dec.o jpeg2000.o
> jpeg2000dsp.o \
> > -                                          jpeg2000dwt.o mqcdec.o mqc.o
> > +                                          jpeg2000dwt.o mqcdec.o mqc.o
> jpeg2000htdec.o
> >  OBJS-$(CONFIG_JPEGLS_DECODER)          += jpeglsdec.o jpegls.o
> >  OBJS-$(CONFIG_JPEGLS_ENCODER)          += jpeglsenc.o jpegls.o
> >  OBJS-$(CONFIG_JV_DECODER)              += jvdec.o
> > diff --git a/libavcodec/jpeg2000.h b/libavcodec/jpeg2000.h
> > index e5ecb4cbf9..b2a8e13244 100644
> > --- a/libavcodec/jpeg2000.h
> > +++ b/libavcodec/jpeg2000.h
> > @@ -189,6 +189,9 @@ typedef struct Jpeg2000Cblk {
> >      Jpeg2000Pass *passes;
> >      Jpeg2000Layer *layers;
> >      int coord[2][2]; // border coordinates {{x0, x1}, {y0, y1}}
> > +    /*HTJ2K settings */
> > +    int zbp;
> > +    int pass_lengths[2];
> >  } Jpeg2000Cblk; // code block
> >
> >  typedef struct Jpeg2000Prec {
> > diff --git a/libavcodec/jpeg2000dec.c b/libavcodec/jpeg2000dec.c
> > index 7fc09cb558..5c92baa88e 100644
> > --- a/libavcodec/jpeg2000dec.c
> > +++ b/libavcodec/jpeg2000dec.c
> > @@ -43,6 +43,7 @@
> >  #include "jpeg2000dsp.h"
> >  #include "profiles.h"
> >  #include "jpeg2000dec.h"
> > +#include "jpeg2000htdec.h"
> >
> >  /* get_bits functions for JPEG2000 packet bitstream
> >   * It is a get_bit function with a bit-stuffing routine. If the value
> of the
> > @@ -428,12 +429,13 @@ static int get_cox(Jpeg2000DecoderContext *s,
> Jpeg2000CodingStyle *c)
> >      c->cblk_style = bytestream2_get_byteu(&s->g);
> >      if (c->cblk_style != 0) { // cblk style
> >          if (c->cblk_style & JPEG2000_CTSY_HTJ2K_M || c->cblk_style &
> JPEG2000_CTSY_HTJ2K_F) {
> > -            av_log(s->avctx, AV_LOG_ERROR, "Support for High throughput
> JPEG 2000 is not yet available\n");
> > -            return AVERROR_PATCHWELCOME;
> > +            av_log(s->avctx,AV_LOG_TRACE,"High Throughput jpeg 2000
> codestream.\n");
> > +            s->is_htj2k = 1;
> > +        } else {
> > +            av_log(s->avctx, AV_LOG_WARNING, "extra cblk styles %X\n",
> c->cblk_style);
> > +            if (c->cblk_style & JPEG2000_CBLK_BYPASS)
> > +                av_log(s->avctx, AV_LOG_WARNING, "Selective arithmetic
> coding bypass\n");
> >          }
> > -        av_log(s->avctx, AV_LOG_WARNING, "extra cblk styles %X\n",
> c->cblk_style);
> > -        if (c->cblk_style & JPEG2000_CBLK_BYPASS)
> > -            av_log(s->avctx, AV_LOG_WARNING, "Selective arithmetic
> coding bypass\n");
> >      }
> >      c->transform = bytestream2_get_byteu(&s->g); // DWT transformation
> type
> >      /* set integer 9/7 DWT in case of BITEXACT flag */
> > @@ -1058,13 +1060,15 @@ static int
> jpeg2000_decode_packet(Jpeg2000DecoderContext *s, Jpeg2000Tile *tile,
> >                  return incl;
> >
> >              if (!cblk->npasses) {
> > -                int v = expn[bandno] + numgbits - 1 -
> > -                        tag_tree_decode(s, prec->zerobits + cblkno,
> 100);
> > +                int zbp = tag_tree_decode(s,prec->zerobits + cblkno,
> 100);
> > +                int v = expn[bandno] + numgbits - 1 - zbp;
> > +
> >                  if (v < 0 || v > 30) {
> >                      av_log(s->avctx, AV_LOG_ERROR,
> >                             "nonzerobits %d invalid or unsupported\n",
> v);
> >                      return AVERROR_INVALIDDATA;
> >                  }
> > +                cblk->zbp = zbp;
> >                  cblk->nonzerobits = v;
> >              }
> >              if ((newpasses = getnpasses(s)) < 0)
> > @@ -1105,8 +1109,29 @@ static int
> jpeg2000_decode_packet(Jpeg2000DecoderContext *s, Jpeg2000Tile *tile,
> >                      }
> >                  }
> >
> > -                if ((ret = get_bits(s, av_log2(newpasses1) +
> cblk->lblock)) < 0)
> > -                    return ret;
> > +                if (newpasses > 1 && s->is_htj2k) {
> > +                    // Retrieve pass lengths for each pass
> > +                    int href_passes =  (cblk->npasses + newpasses - 1)
> % 3;
> > +                    int segment_passes = newpasses - href_passes;
> > +                    int pass_bound = 2;
> > +                    int eb = 0;
> > +                    int extra_bit = newpasses > 2 ? 1 : 0;
> > +                    while (pass_bound <=segment_passes) {
> > +                        eb++;
> > +                        pass_bound +=pass_bound;
> > +                    }
> > +                    if ((ret = get_bits(s, llen + eb + 3)) < 0)
> > +                        return ret;
> > +                    cblk->pass_lengths[0] = ret;
> > +                    if ((ret = get_bits(s, llen + 3 + extra_bit)) < 0)
> > +                        return ret;
> > +                    cblk->pass_lengths[1] = ret;
> > +                    ret = cblk->pass_lengths[0] + cblk->pass_lengths[1];
> > +                } else {
> > +                    if ((ret = get_bits(s, av_log2(newpasses1) +
> cblk->lblock)) < 0)
> > +                        return ret;
> > +                    cblk->pass_lengths[0]=ret;
> > +                }
> >                  if (ret > cblk->data_allocated) {
> >                      size_t new_size = FFMAX(2*cblk->data_allocated,
> ret);
> >                      void *new = av_realloc(cblk->data, new_size);
> > @@ -1857,7 +1882,10 @@ static inline void tile_codeblocks(const
> Jpeg2000DecoderContext *s, Jpeg2000Tile
> >      for (compno = 0; compno < s->ncomponents; compno++) {
> >          Jpeg2000Component *comp     = tile->comp + compno;
> >          Jpeg2000CodingStyle *codsty = tile->codsty + compno;
> > +        Jpeg2000QuantStyle *quantsty= tile->qntsty + compno;
> > +
> >          int coded = 0;
> > +        int subbandno = 0;
> >
> >          t1.stride = (1<<codsty->log2_cblk_width) + 2;
> >
> > @@ -1865,7 +1893,7 @@ static inline void tile_codeblocks(const
> Jpeg2000DecoderContext *s, Jpeg2000Tile
> >          for (reslevelno = 0; reslevelno < codsty->nreslevels2decode;
> reslevelno++) {
> >              Jpeg2000ResLevel *rlevel = comp->reslevel + reslevelno;
> >              /* Loop on bands */
> > -            for (bandno = 0; bandno < rlevel->nbands; bandno++) {
> > +            for (bandno = 0; bandno < rlevel->nbands; bandno++,
> subbandno++) {
> >                  int nb_precincts, precno;
> >                  Jpeg2000Band *band = rlevel->band + bandno;
> >                  int cblkno = 0, bandpos;
> > @@ -1885,12 +1913,23 @@ static inline void tile_codeblocks(const
> Jpeg2000DecoderContext *s, Jpeg2000Tile
> >                      for (cblkno = 0;
> >                           cblkno < prec->nb_codeblocks_width *
> prec->nb_codeblocks_height;
> >                           cblkno++) {
> > -                        int x, y;
> > +                        int x, y, ret;
> > +                        // Annex E (Equation E-2) ISO/IEC 15444-1:2019
> > +                        int magp = quantsty->expn[subbandno] +
> quantsty->nguardbits - 1;
> > +
> >                          Jpeg2000Cblk *cblk = prec->cblk + cblkno;
> > -                        int ret = decode_cblk(s, codsty, &t1, cblk,
> > -                                    cblk->coord[0][1] -
> cblk->coord[0][0],
> > -                                    cblk->coord[1][1] -
> cblk->coord[1][0],
> > -                                    bandpos, comp->roi_shift);
> > +
> > +                        if (s->is_htj2k)
> > +                            ret = ff_jpeg2000_decode_htj2k(s, codsty,
> &t1, cblk,
> > +
>  cblk->coord[0][1] - cblk->coord[0][0],
> > +
>  cblk->coord[1][1] - cblk->coord[1][0],
> > +                                                           magp,
> comp->roi_shift);
> > +                        else
> > +                            ret = decode_cblk(s, codsty, &t1, cblk,
> > +                                              cblk->coord[0][1] -
> cblk->coord[0][0],
> > +                                              cblk->coord[1][1] -
> cblk->coord[1][0],
> > +                                              bandpos, comp->roi_shift);
> > +
> >                          if (ret)
> >                              coded = 1;
> >                          else
> > diff --git a/libavcodec/jpeg2000dec.h b/libavcodec/jpeg2000dec.h
> > index b6410c1432..9728529026 100644
> > --- a/libavcodec/jpeg2000dec.h
> > +++ b/libavcodec/jpeg2000dec.h
> > @@ -121,6 +121,8 @@ typedef struct Jpeg2000DecoderContext {
> >
> >      /*options parameters*/
> >      int             reduction_factor;
> > +    /*HTJ2K params*/
> > +    uint8_t         is_htj2k;
> >  } Jpeg2000DecoderContext;
> >
> >  #endif //AVCODEC_JPEG2000DEC_H
> > diff --git a/libavcodec/jpeg2000htdec.c b/libavcodec/jpeg2000htdec.c
> > new file mode 100644
> > index 0000000000..8ea901855f
> > --- /dev/null
> > +++ b/libavcodec/jpeg2000htdec.c
> > @@ -0,0 +1,1441 @@
> > +/*
> > + * Copyright (c) 2022 Caleb Etemesi <etemesicaleb@gmail.com>
> > + *
> > + * This file is part of FFmpeg.
> > + *
> > + * FFmpeg is free software; you can redistribute it and/or
> > + * modify it under the terms of the GNU Lesser General Public
> > + * License as published by the Free Software Foundation; either
> > + * version 2.1 of the License, or (at your option) any later version.
> > + *
> > + * FFmpeg is distributed in the hope that it will be useful,
> > + * but WITHOUT ANY WARRANTY; without even the implied warranty of
> > + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
> > + * Lesser General Public License for more details.
> > + *
> > + * You should have received a copy of the GNU Lesser General Public
> > + * License along with FFmpeg; if not, write to the Free Software
> > + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
> 02110-1301 USA
> > + */
> > +#include "jpeg2000htdec.h"
> > +#include "jpeg2000.h"
> > +#include "jpeg2000dec.h"
> > +#include <stdint.h>
> > +#include <libavutil/attributes.h>
> > +#include <libavutil/common.h>
> > +#include <libavutil/avassert.h>
>
> Use "" instead of <>.
>
> > +
> > +#define J2K_Q1 0
> > +
> > +#define J2K_Q2 1
> > +
> > +#define HT_SHIFT_SIGMA 0
> > +#define HT_SHIFT_SCAN 4
> > +#define HT_SHIFT_REF 3
> > +#define HT_SHIFT_REF_IND 2
> > +/**
> > + * @brief Table 2 in clause 7.3.3
> > + */
> > +const static uint8_t MEL_E[13] = {0, 0, 0, 1, 1, 1, 2, 2, 2, 3, 3, 4,
> 5};
> > +// Decode tables, found at the end of this
> > +static const uint16_t dec_CxtVLC_table1[1024];
> > +static const uint16_t dec_CxtVLC_table0[1024];
> > +
> > +typedef struct StateVars {
> > +    int32_t pos;
> > +    uint32_t bits;
> > +    uint32_t tmp;
> > +    uint32_t last;
> > +    uint8_t bits_left;
> > +    uint64_t bit_buf;
> > +} StateVars;
> > +
>
> I really don't like that you are adding a new bitreader here. Can you
> explain why this is supposed to be necessary?
>
> > +typedef struct MelDecoderState {
> > +    uint8_t k;
> > +    uint8_t run;
> > +    uint8_t one;
> > +} MelDecoderState;
> > +
> > +/**
> > + *  Given a precomputed c, checks whether n % d == 0
> > + *
> > + *  precomputed c for d is created by the precompute_c function
> > + */
> > +static av_always_inline uint32_t is_divisible(uint32_t n, uint64_t c)
> > +{
> > +    return n * c <= c - 1;
> > +}
> > +/**
> > + * Precompute a number c given d that when combined with a dividend n
> > + * returns whether n % d == 0
> > + *
> > + * Combined with is_divisible function
> > + */
> > +static av_always_inline uint64_t precompute_c(uint32_t divisor)
> > +{
> > +
> > +    return 1 + (0xfffffffffffffffful / divisor);
> > +}
> > +/* Initializers */
> > +
> > +static void jpeg2000_init_zero(StateVars *s)
> > +{
> > +    s->bits_left = 0;
> > +    s->bit_buf   = 0;
> > +    s->tmp       = 0;
> > +    s->bits      = 0;
> > +    s->pos       = 0;
> > +    s->last      = 0;
> > +}
> > +
> > +static void jpeg2000_init_mel(StateVars *s, uint32_t Pcup)
> > +{
> > +    jpeg2000_init_zero(s);
> > +    s->pos = Pcup;
> > +}
> > +
> > +static void jpeg2000_init_mag_ref(StateVars *s, uint32_t Lref)
> > +{
> > +    s->pos       = Lref - 2;
> > +    s->bits      = 0;
> > +    s->last      = 0xFF;
> > +    s->tmp       = 0;
> > +    s->bits_left = 0;
> > +    s->bit_buf   = 0;
> > +}
> > +
> > +static void jpeg2000_init_mel_decoder(MelDecoderState *mel_state)
> > +{
> > +    mel_state->k   = 0;
> > +    mel_state->run = 0;
> > +    mel_state->one = 0;
> > +}
> > +
> > +/**
> > + * @brief Refill the buffer backwards in little Endian while skipping
> > + * over stuffing bits
> > + *
> > + * Stuffing bits are those that appear in the position of any byte whose
> > + * LSBs are all 1's if the last consumed byte was larger than 0x8F
> > + */
> > +static int jpeg2000_bitbuf_refill_backwards(StateVars *buffer,
> > +                                            const uint8_t *array)
> > +{
> > +    uint64_t tmp = 0;
> > +    int32_t position = buffer->pos;
> > +    uint32_t new_bits = 32;
> > +    uint32_t mask;
> > +
> > +    if (buffer->bits_left > 32)
> > +        return 0; // enough data, no need to pull in more bits
> > +
> > +    /*
> > +     * We are reading bits from end to start, and we need to handle
> them in LE.
> > +     * therefore, if we read bytes ABCD, we need to convert
> > +     * them to DCBA, but the bitstream is constructed in such a way
> that it is
> > +     * BE when reading from back to front,so we need to swap bytes
> > +     * but this doesn't work when position is less than 3,
> > +     * we end up reading bits from the MEL byte-stream which
> > +     * is a recipe for sleepless nights.
> > +     *
> > +     * So the trick is to branchlessly read and mask
> > +     * depending on whatever was the initial position,
> > +     * the mask is simply either 32 bits, 24 bits ,8 bits or 0 bits.
> > +     * depending on how many bytes are available, with this, we don't
> > +     * read past the end of the buffer,we mask
> > +     * bits we already read ensuring we don't read twice.
> > +     * and we can do it branchlessly without checking for positions.
>
> Generally, it is not ok to read array[-1] even if the result is that is
> masked away later. It is ok if array actually points into the middle of
> a buffer, so that array[-1] and the other bytes accessed are actually
> part of the buffer. Your remark about MEL bytestream indicates that this
> might be the case here, but I'm not sure.
>
> > +     *
> > +     * We need to watch out for negative shift values which are UB in C
> hence
> > +     * the MAX declarative.
> > +     */
> > +
> > +    position -= 4;
> > +    mask = (UINT64_C(1) << (FFMIN(4, buffer->pos + 1)) * 8) - 1;
>
> In case buffer->pos + 1 < 4, position + 1 < 0 and therefore the next
> lines will read array[-1]. And given that you are using FFMIN, there is
> an implicit branch anyway. I suggest you use AV_RB32 to read only from a
> position that is known to be in the buffer and then shift unwanted bytes
> away.
>
> > +    tmp  = array[position + 1] * (1ull << 24);
>
> (uint32_t)array[position + 1] << 24 works as well and avoids the 64bit
> intermediate and the multiplication hack.
>
> > +    tmp |= array[position + 2] << 16;
> > +    tmp |= array[position + 3] << 8;
> > +    tmp |= array[position + 4];
> > +    tmp &= mask;
> > +
> > +    // Branchlessly unstuff  bits
>
> Branchlessly? I see four branches below.
>
> > +
> > +    // load temporary byte, which preceeds the position we
> > +    // currently at, to ensure that we can also un-stuff if the
> > +    // stuffed bit is the bottom most bits
> > +    tmp <<= 8;
> > +    tmp |= array[buffer->pos + 1];
> > +
> > +    if ((tmp & 0x7FFF000000) > 0x7F8F000000) {
> > +        tmp &= 0x7FFFFFFFFF;
> > +        new_bits--;
> > +    }
> > +    if ((tmp & 0x007FFF0000) > 0x007F8F0000) {
> > +        tmp = (tmp & 0x007FFFFFFF) + ((tmp & 0xFF00000000) >> 1);
> > +        new_bits--;
> > +    }
> > +    if ((tmp & 0x00007FFF00) > 0x00007F8F00) {
> > +        tmp = (tmp & 0x00007FFFFF) + ((tmp & 0xFFFF000000) >> 1);
> > +        new_bits--;
> > +    }
> > +    if ((tmp & 0x0000007FFF) > 0x0000007F8F) {
> > +        tmp = (tmp & 0x0000007FFF) + ((tmp & 0xFFFFFF0000) >> 1);
> > +        new_bits--;
> > +    }
> > +    /* remove temporary byte loaded. */
> > +    tmp >>= 8;
> > +
> > +    /* Add bits to the MSB of the bit buffer */
> > +    buffer->bit_buf |= tmp << buffer->bits_left;
> > +    buffer->bits_left += new_bits;
> > +    buffer->pos = FFMAX(0, position);
> > +    return 0;
> > +}
> > +
> > +/**
> > + * @brief Refill the bit-buffer reading new bits going forward
> > + * in the stream while skipping over stuffed bits.
> > + */
> > +static void jpeg2000_bitbuf_refill_forward(StateVars *buffer,
> > +                                           const uint8_t *array,
> > +                                           uint32_t length)
> > +{
> > +    while (buffer->bits_left < 32) {
> > +        buffer->tmp = 0xFF;
> > +        buffer->bits = (buffer->last == 0xFF) ? 7 : 8;
> > +        if (buffer->pos <= length) {
>
> Are you sure you want to have <= instead of < here? This could read
> length + 1 bytes of array.
>
> > +            buffer->tmp = array[buffer->pos];
> > +            buffer->pos += 1;
> > +            buffer->last = buffer->tmp;
> > +        }
> > +        buffer->bit_buf |= ((uint64_t) buffer->tmp) <<
> buffer->bits_left;
> > +        buffer->bits_left += buffer->bits;
> > +    }
> > +}
> > +
> > +/**
> > + * @brief Drops bits from lower bits in the bit buffer
> > + *
> > + * @param buf: Struct containing bit buffers
> > + * @param nbits: Number of bits to remove.
> > + */
> > +static av_always_inline void jpeg2000_bitbuf_drop_bits_lsb(StateVars
> *buf,
> > +                                                           uint8_t
> nbits)
> > +{
> > +    if (buf->bits_left < nbits) {
> > +        av_log(NULL, AV_LOG_ERROR, "Invalid bit read of %d, bits in
> buffer are %d\n", nbits, buf->bits_left);
> > +        av_assert0(0);
> > +    }
> > +    buf->bit_buf >>= nbits;
> > +    buf->bits_left -= nbits;
> > +}
> > +
> > +/**
> > + * @brief  Get bits from the bit buffer reading them
> > + * from the least significant bits moving to the most significant bits.
> > + *
> > + * In case there are fewer bits, refill from `buf` moving backwards.
> > + */
> > +static av_always_inline uint64_t jpeg2000_bitbuf_get_bits_lsb(
> > +        StateVars *bit_stream, uint8_t nbits, const uint8_t *buf)
> > +{
> > +    uint64_t bits;
> > +    uint64_t mask = (1ull << nbits) - 1;
> > +    if (bit_stream->bits_left < nbits)
> > +        jpeg2000_bitbuf_refill_backwards(bit_stream, buf);
> > +    bits = bit_stream->bit_buf & mask;
> > +    jpeg2000_bitbuf_drop_bits_lsb(bit_stream, nbits);
> > +    return bits;
> > +}
> > +
> > +/**
> > + * @brief Get bits from the bit buffer reading them
> > + * from the least significant bits moving to the most significant bits
> > + *
> > + * In case there are fewer bits, refill from `buf` moving forward
> > + */
> > +static av_always_inline uint64_t jpeg2000_bitbuf_get_bits_lsb_forward(
> > +        StateVars *bit_stream, uint8_t nbits, const uint8_t *buf,
> uint32_t length)
> > +{
> > +    uint64_t bits;
> > +    uint64_t mask = (1ull << nbits) - 1;
> > +
> > +    if (bit_stream->bits_left <= nbits)
> > +        jpeg2000_bitbuf_refill_forward(bit_stream, buf, length);
> > +    bits = bit_stream->bit_buf & mask;
> > +    jpeg2000_bitbuf_drop_bits_lsb(bit_stream, nbits);
> > +    return bits;
> > +}
> > +
> > +/**
> > + * @brief Look ahead bit buffer without discarding bits
> > + */
> > +static av_always_inline uint64_t
> jpeg2000_bitbuf_peek_bits_lsb(StateVars *stream, uint8_t nbits)
> > +{
> > +    uint64_t mask = (1ull << nbits) - 1;
> > +    return stream->bit_buf & mask;
> > +}
> > +
> > +/**
> > + * Variable Length Decoding Routines
> > + */
> > +static void jpeg2000_init_vlc(StateVars *s, uint32_t Lcup, uint32_t
> Pcup, const uint8_t *Dcup)
> > +{
> > +    s->bits_left = 0;
> > +    s->bit_buf   = 0;
> > +    s->pos       = Lcup - 2 - Pcup;
> > +    s->last      = Dcup[Lcup - 2];
> > +    s->tmp       = (s->last) >> 4;
> > +    s->bits      = ((s->tmp & 7) < 7) ? 4 : 3;
> > +
> > +    jpeg2000_bitbuf_refill_backwards(s, Dcup + Pcup);
> > +    jpeg2000_bitbuf_drop_bits_lsb(s, 4);
> > +}
> > +
> > +/**
> > + * Decode prefix codes for VLC segment.
> > + */
> > +static int av_always_inline jpeg2000_decode_ctx_vlc(const
> Jpeg2000DecoderContext *s,
> > +                                                    StateVars
> *vlc_stream,
> > +                                                    const uint16_t
> *table,
> > +                                                    const uint8_t *Dcup,
> > +                                                    uint8_t *sig_pat,
> > +                                                    uint8_t *res_off,
> > +                                                    uint8_t *emb_pat_k,
> > +                                                    uint8_t *emb_pat_1,
> > +                                                    uint8_t pos,
> > +                                                    uint32_t Pcup,
> > +                                                    uint16_t context)
> > +{
> > +    /* Described in clause 7.3.5 */
> > +    uint32_t value;
> > +    uint8_t len;
> > +    uint64_t index;
> > +    uint64_t code_word;
> > +
> > +    jpeg2000_bitbuf_refill_backwards(vlc_stream, Dcup + Pcup);
> > +
> > +    code_word = vlc_stream->bit_buf & 0x7f;
> > +    index = code_word + (context << 7);
> > +
> > +    /* decode table has 1024 entries so ensure array access is in
> bounds */
> > +    av_assert0(index < 1024);
> > +
> > +    value = table[index];
> > +
> > +    len = (value & 0x000F) >> 1;
> > +
> > +    res_off[pos] = (uint8_t) (value & 1);
> > +    sig_pat[pos] = (uint8_t) ((value & 0x00F0) >> 4);
> > +    emb_pat_k[pos] = (uint8_t) ((value & 0x0F00) >> 8);
> > +    emb_pat_1[pos] = (uint8_t) ((value & 0xF000) >> 12);
> > +
> > +    jpeg2000_bitbuf_drop_bits_lsb(vlc_stream, len);
> > +    return 0;
> > +}
> > +
> > +/**
> > + * @brief Decode variable length u-vlc prefix
> > + */
> > +static av_always_inline uint8_t
> > +vlc_decode_u_prefix(StateVars *vlc_stream, const uint8_t *refill_array)
> > +{
> > +    /* clause 7.3.6
> > +     * procedure : decodeUPrefix.
> > +     */
> > +    static const uint8_t return_value[8] = {5, 1, 2, 1, 3, 1, 2, 1};
> > +    static const uint8_t drop_bits[8] = {3, 1, 2, 1, 3, 1, 2, 1};
> > +
> > +    uint8_t bits;
> > +
> > +    if (vlc_stream->bits_left < 3)
> > +        jpeg2000_bitbuf_refill_backwards(vlc_stream, refill_array);
> > +
> > +    bits = jpeg2000_bitbuf_peek_bits_lsb(vlc_stream, 3);
> > +
> > +    jpeg2000_bitbuf_drop_bits_lsb(vlc_stream, drop_bits[bits]);
> > +    return return_value[bits];
> > +}
> > +
> > +/**
> > + * @brief Decode variable length u-vlc suffix
> > + */
> > +static av_always_inline uint8_t vlc_decode_u_suffix(
> > +        StateVars *vlc_stream, uint8_t suffix, const uint8_t
> *refill_array)
> > +{
> > +    /* clause 7.3.6
> > +     * procedure: decodeUSuffix
> > +     */
> > +    static const int mask[] = {1, 31};
> > +    static const int drop_bits[] = {1, 5};
> > +
> > +    uint8_t bits;
> > +    int cond = suffix != 3;
> > +    if (suffix < 3)
> > +        return 0;
> > +
> > +    if (vlc_stream->bits_left < 5)
> > +        jpeg2000_bitbuf_refill_backwards(vlc_stream, refill_array);
> > +
> > +    bits = jpeg2000_bitbuf_peek_bits_lsb(vlc_stream, 5);
> > +
> > +    jpeg2000_bitbuf_drop_bits_lsb(vlc_stream, drop_bits[cond]);
> > +    return bits & mask[cond];
> > +}
> > +
> > +/**
> > + * @brief Decode variable length u-vlc suffix
> > + */
> > +static av_always_inline uint8_t vlc_decode_u_extension(
> > +        StateVars *vlc_stream, uint8_t suffix, const uint8_t
> *refill_array)
> > +{
> > +    /* clause 7.3.6
> > +     * procedure decodeUExtension.
> > +     */
> > +    return jpeg2000_bitbuf_get_bits_lsb(vlc_stream, 4 * (suffix >= 28),
> refill_array);
> > +}
> > +
> > +/**
> > + * Magnitude and Sign decode procedures
> > + */
> > +static int32_t av_always_inline jpeg2000_decode_mag_sgn(StateVars
> *mag_sgn_stream, int32_t m_n,
> > +                                                        int32_t i_n,
> const uint8_t *buf, uint32_t length)
> > +{
> > +    /* clause 7.3.8
> > +     * procedure: decodeMagSgnValue
> > +     */
> > +    int32_t val = 0;
> > +    if (m_n > 0) {
> > +        val =
> jpeg2000_bitbuf_get_bits_lsb_forward(mag_sgn_stream,m_n,buf,length);
> > +        val += (i_n << m_n);
> > +    }
> > +    return val;
> > +}
> > +
> > +static av_always_inline void
> > +recover_mag_sgn(StateVars *mag_sgn, uint8_t pos, uint16_t q, int32_t
> m_n[2], int32_t known_1[2],
> > +                const uint8_t emb_pat_1[2],
> > +                int32_t v[2][4], int32_t m[2][4], uint8_t *E, uint32_t
> *mu_n, const uint8_t *Dcup, uint32_t Pcup,
> > +                uint32_t pLSB)
> > +{
> > +    for (int i = 0; i < 4; i++) {
> > +        int32_t n = 4 * q + i;
> > +        m_n[pos] = m[pos][i];
> > +        known_1[pos] = (emb_pat_1[pos] >> i) & 1;
> > +        v[pos][i] = jpeg2000_decode_mag_sgn(mag_sgn, m_n[pos],
> known_1[pos], Dcup, Pcup);
> > +
> > +        if (m_n[pos] != 0) {
> > +            E[n] = 32 - ff_clz(v[pos][i] | 1);
> > +            mu_n[n] = (v[pos][i] >> 1) + 1;
> > +            mu_n[n] <<= pLSB;
> > +            mu_n[n] |= ((uint32_t) (v[pos][i] & 1)) << 31; // sign bit.
> > +        }
> > +    }
> > +}
> > +
> > +static int jpeg2000_import_bit(StateVars *stream, const uint8_t *array,
> uint32_t length)
> > +{
> > +    int cond = stream->pos <= length;
> > +    int pos = FFMIN(stream->pos, length);
> > +    if (stream->bits == 0) {
> > +        stream->bits = (stream->tmp == 0xFF) ? 7 : 8;
> > +        stream->pos += cond;
> > +        stream->tmp = cond ? array[pos] : 0xFF;
> > +    }
> > +    stream->bits -= 1;
> > +    return (stream->tmp >> stream->bits) & 1;
> > +}
> > +
> > +static int jpeg2000_peek_bit(StateVars *stream, const uint8_t *array,
> uint32_t length)
> > +{
> > +    if (stream->bits == 0) {
> > +        int cond = stream->pos <= length;
> > +        int pos = FFMIN(stream->pos, length);
> > +        stream->bits = (stream->tmp == 0xFF) ? 7 : 8;
> > +        stream->pos += cond;
> > +        stream->tmp = cond ? array[pos] : 0xFF;
> > +    }
> > +    return (stream->tmp >> stream->bits) & 1;
> > +}
> > +
> > +static int jpeg2000_decode_mel_sym(MelDecoderState *mel_state,
> > +                                   StateVars *mel_stream,
> > +                                   const uint8_t *Dcup,
> > +                                   uint32_t Lcup)
> > +{
> > +
> > +    if (mel_state->run == 0 && mel_state->one == 0) {
> > +        uint8_t eval;
> > +        uint8_t bit;
> > +
> > +        eval = MEL_E[mel_state->k];
> > +        bit = jpeg2000_import_bit(mel_stream, Dcup, Lcup);
> > +        if (bit == 1) {
> > +            mel_state->run = 1 << eval;
> > +            mel_state->k = FFMIN(12, mel_state->k + 1);
> > +        } else {
> > +            mel_state->run = 0;
> > +            while (eval > 0) {
> > +                bit = jpeg2000_import_bit(mel_stream, Dcup, Lcup);
> > +                mel_state->run = (2 * (mel_state->run)) + bit;
> > +                eval -= 1;
> > +            }
> > +            mel_state->k = FFMAX(0, mel_state->k - 1);
> > +            mel_state->one = 1;
> > +        }
> > +    }
> > +    if (mel_state->run > 0) {
> > +        mel_state->run -= 1;
> > +        return 0;
> > +    } else {
> > +        mel_state->one = 0;
> > +        return 1;
> > +    }
> > +}
> > +
> > +/**
> > + * @brief Magref decoding procedures
> > + */
> > +static av_always_inline int jpeg2000_import_magref_bit(StateVars
> *stream, const uint8_t *array, uint32_t length)
> > +{
> > +    return jpeg2000_bitbuf_get_bits_lsb(stream, 1, array);
> > +}
> > +
> > +/**
> > + * @brief Signal EMB decode
> > + */
> > +static int
> > +jpeg2000_decode_sig_emb(const Jpeg2000DecoderContext *s,
> MelDecoderState *mel_state, StateVars *mel_stream,
> > +                        StateVars *vlc_stream, const uint16_t
> *vlc_table, const uint8_t *Dcup, uint8_t *sig_pat,
> > +                        uint8_t *res_off, uint8_t *emb_pat_k, uint8_t
> *emb_pat_1, uint8_t pos, uint16_t context,
> > +                        uint32_t Lcup, uint32_t Pcup)
> > +{
> > +    if (context == 0) {
> > +        uint8_t sym;
> > +        sym = jpeg2000_decode_mel_sym(mel_state, mel_stream, Dcup,
> Lcup);
> > +        if (sym == 0) {
> > +            sig_pat[pos] = 0;
> > +            res_off[pos] = 0;
> > +            emb_pat_k[pos] = 0;
> > +            emb_pat_1[pos] = 0;
> > +            return 0;
> > +        }
> > +    }
> > +    return jpeg2000_decode_ctx_vlc(s, vlc_stream, vlc_table, Dcup,
> sig_pat,
> > +                                   res_off, emb_pat_k, emb_pat_1, pos,
> Pcup,
> > +                                   context);
> > +}
> > +
> > +static av_always_inline int jpeg2000_get_state(int x1, int x2, int
> width, int shift_by, const uint8_t *block_states)
> > +{
> > +    return (block_states[(x1 + 1) * (width + 2) + (x2 + 1)] >>
> shift_by) & 1;
> > +}
> > +
> > +static av_always_inline void jpeg2000_modify_state(int x1, int x2, int
> width, int value, uint8_t *block_states)
> > +{
> > +    block_states[(x1 + 1) * (width + 2) + (x2 + 1)] |= value;
> > +}
> > +
> > +static int av_noinline
>
> Why is this av_noinline? It is normally good, to inline functions only
> called once.
>
> > +jpeg2000_decode_ht_cleanup(const Jpeg2000DecoderContext *s,
> Jpeg2000Cblk *cblk, Jpeg2000T1Context *t1,
> > +                           MelDecoderState *mel_state,
> > +                           StateVars *mel_stream, StateVars
> *vlc_stream, StateVars *mag_sgn_stream, const uint8_t *Dcup,
> > +                           uint32_t Lcup,
> > +                           uint32_t Pcup, uint8_t pLSB, int width, int
> height, int32_t *sample_buf,
> > +                           uint8_t *block_states)
> > +{
> > +    uint16_t q = 0; /* Represents current quad position. */
> > +    uint16_t q1, q2;
> > +    uint16_t context1, context2;
> > +    uint16_t context = 0;
> > +
> > +    uint8_t sig_pat[2]   = {0}; /* significance pattern*/
> > +    uint8_t res_off[2]   = {0}; /* residual offset*/
> > +    uint8_t emb_pat_k[2] = {0}; /* Exponent Max Bound pattern K */
> > +    uint8_t emb_pat_1[2] = {0}; /* Exponent Max Bound pattern 1.*/
> > +    uint8_t gamma[2]     = {0};
> > +
> > +    uint8_t E_n[2]  = {0};
> > +    uint8_t E_ne[2] = {0};
> > +    uint8_t E_nw[2] = {0};
> > +    uint8_t E_nf[2] = {0};
> > +
> > +    uint8_t max_e[2] = {0};
> > +    uint8_t u_pfx[2] = {0};
> > +    uint8_t u_sfx[2] = {0};
> > +    uint8_t u_ext[2] = {0};
> > +
> > +    int32_t u[2]       = {0};
> > +    int32_t U[2]       = {0}; /* Exponent bound (7.3.7) */
> > +    int32_t m_n[2]     = {0};
> > +    int32_t known_1[2] = {0};
> > +
> > +    int32_t m[2][4] = {0};
> > +    int32_t v[2][4] = {0};
> > +
> > +    uint8_t kappa[2] = {1, 1};
> > +    int ret = 0;
> > +
> > +    int sp;
> > +
> > +    uint64_t c;
> > +
> > +    uint8_t *sigma;
> > +    uint32_t *mu;
> > +
> > +    const uint8_t *vlc_buf = Dcup + Pcup;
> > +    /* convert to raster-scan */
> > +    const uint16_t is_border_x = width % 2;
> > +    const uint16_t is_border_y = height % 2;
> > +
> > +    int j1, j2;
> > +    int x1 = 0, x2 = 0, x3 = 0;
> > +
> > +    const uint16_t quad_width = ff_jpeg2000_ceildivpow2(width, 1);
> > +    const uint16_t quad_height = ff_jpeg2000_ceildivpow2(height, 1);
> > +
> > +    size_t buf_size = 4 * quad_width * quad_height;
> > +
> > +    uint8_t *sigma_n = av_calloc(buf_size, sizeof(uint8_t));
> > +    uint8_t *E = av_calloc(buf_size, sizeof(uint8_t));
> > +    uint32_t *mu_n = av_calloc(buf_size, sizeof(uint32_t));
> > +
>
> Why does a function called jpeg2000_decode_ht_cleanup perform
> allocations? And why all the other stuff below?
>
> > +    if (!sigma_n || !E || !mu_n) {
> > +        ret = AVERROR(ENOMEM);
> > +        goto free;
> > +    }
> > +
> > +    sigma = sigma_n;
> > +    mu = mu_n;
> > +
> > +    while (q < quad_width - 1) {
> > +        q1 = q;
> > +        q2 = q1 + 1;
> > +
> > +        if ((ret = jpeg2000_decode_sig_emb(s, mel_state, mel_stream,
> vlc_stream,
> > +                                           dec_CxtVLC_table0, Dcup,
> sig_pat, res_off,
> > +                                           emb_pat_k, emb_pat_1,
> J2K_Q1, context, Lcup,
> > +                                           Pcup)) < 0)
> > +            goto free;
> > +        for (int i = 0; i < 4; i++)
> > +            sigma_n[4 * q1 + i] = (sig_pat[J2K_Q1] >> i) & 1;
> > +
> > +        // calculate context
> > +        context  = sigma_n[4 * q1];           // f
> > +        context |= sigma_n[4 * q1 + 1];      // sf
> > +        context += sigma_n[4 * q1 + 2] << 1; // w << 1
> > +        context += sigma_n[4 * q1 + 3] << 2;
> > +
> > +        if ((ret = jpeg2000_decode_sig_emb(s, mel_state, mel_stream,
> vlc_stream,
> > +                                           dec_CxtVLC_table0, Dcup,
> sig_pat, res_off,
> > +                                           emb_pat_k, emb_pat_1,
> J2K_Q2, context, Lcup,
> > +                                           Pcup)) < 0)
> > +            goto free;
> > +
> > +        for (int i = 0; i < 4; i++)
> > +            sigma_n[4 * q2 + i] = (sig_pat[J2K_Q2] >> i) & 1;
> > +
> > +        // calculate context for the next quad
> > +        context  = sigma_n[4 * q2];           // f
> > +        context |= sigma_n[4 * q2 + 1];      // sf
> > +        context += sigma_n[4 * q2 + 2] << 1; // w << 1
> > +        context += sigma_n[4 * q2 + 3] << 2; // sw << 2
> > +
> > +        u[0] = 0;
> > +        u[1] = 0;
> > +
> > +        jpeg2000_bitbuf_refill_backwards(vlc_stream, vlc_buf);
> > +
> > +        if (res_off[J2K_Q1] == 1 && res_off[J2K_Q2] == 1) {
> > +
> > +            if (jpeg2000_decode_mel_sym(mel_state, mel_stream, Dcup,
> Lcup) == 1) {
> > +
> > +                u_pfx[J2K_Q1] = vlc_decode_u_prefix(vlc_stream,
> vlc_buf);
> > +                u_pfx[J2K_Q2] = vlc_decode_u_prefix(vlc_stream,
> vlc_buf);
> > +
> > +                u_sfx[J2K_Q1] = vlc_decode_u_suffix(vlc_stream,
> u_pfx[J2K_Q1], vlc_buf);
> > +                u_sfx[J2K_Q2] = vlc_decode_u_suffix(vlc_stream,
> u_pfx[J2K_Q2], vlc_buf);
> > +
> > +                u_ext[J2K_Q1] = vlc_decode_u_extension(vlc_stream,
> u_sfx[J2K_Q1], vlc_buf);
> > +                u_ext[J2K_Q2] = vlc_decode_u_extension(vlc_stream,
> u_sfx[J2K_Q2], vlc_buf);
> > +
> > +                u[J2K_Q1] = 2 + u_pfx[J2K_Q1] + u_sfx[J2K_Q1] +
> (u_ext[J2K_Q1] * 4);
> > +                u[J2K_Q2] = 2 + u_pfx[J2K_Q2] + u_sfx[J2K_Q2] +
> (u_ext[J2K_Q2] * 4);
> > +
> > +            } else {
> > +                u_pfx[J2K_Q1] = vlc_decode_u_prefix(vlc_stream,
> vlc_buf);
> > +
> > +                if (u_pfx[J2K_Q1] > 2) {
> > +                    u[J2K_Q2] =
> jpeg2000_bitbuf_get_bits_lsb(vlc_stream, 1, vlc_buf) + 1;
> > +                    u_sfx[J2K_Q1] = vlc_decode_u_suffix(vlc_stream,
> u_pfx[J2K_Q1], vlc_buf);
> > +                    u_ext[J2K_Q1] = vlc_decode_u_extension(vlc_stream,
> u_sfx[J2K_Q1], vlc_buf);
> > +                } else {
> > +                    u_pfx[J2K_Q2] = vlc_decode_u_prefix(vlc_stream,
> vlc_buf);
> > +                    u_sfx[J2K_Q1] = vlc_decode_u_suffix(vlc_stream,
> u_pfx[J2K_Q1], vlc_buf);
> > +                    u_sfx[J2K_Q2] = vlc_decode_u_suffix(vlc_stream,
> u_pfx[J2K_Q2], vlc_buf);
> > +                    u_ext[J2K_Q1] = vlc_decode_u_extension(vlc_stream,
> u_sfx[J2K_Q1], vlc_buf);
> > +                    u_ext[J2K_Q2] = vlc_decode_u_extension(vlc_stream,
> u_sfx[J2K_Q2], vlc_buf);
> > +                    u[J2K_Q2] = u_pfx[J2K_Q2] + u_sfx[J2K_Q2] +
> (u_ext[J2K_Q2] * 4);
> > +                }
> > +                // clause 7.3.6 (3)
> > +                u[J2K_Q1] = u_pfx[J2K_Q1] + u_sfx[J2K_Q1] +
> (u_ext[J2K_Q1] * 4);
> > +            }
> > +
> > +        } else if (res_off[J2K_Q1] == 1 || res_off[J2K_Q2] == 1) {
> > +            uint8_t pos = res_off[J2K_Q1] == 1 ? 0 : 1;
> > +            u_pfx[pos] = vlc_decode_u_prefix(vlc_stream, vlc_buf);
> > +            u_sfx[pos] = vlc_decode_u_suffix(vlc_stream, u_pfx[pos],
> vlc_buf);
> > +            u_ext[pos] = vlc_decode_u_extension(vlc_stream, u_sfx[pos],
> vlc_buf);
> > +            u[pos] = u_pfx[pos] + u_sfx[pos] + (u_ext[pos] * 4);
> > +        }
> > +        U[J2K_Q1] = kappa[J2K_Q1] + u[J2K_Q1];
> > +        U[J2K_Q2] = kappa[J2K_Q2] + u[J2K_Q2];
> > +
> > +        for (int i = 0; i < 4; i++) {
> > +            m[J2K_Q1][i] = sigma_n[4 * q1 + i] * U[J2K_Q1] -
> ((emb_pat_k[J2K_Q1] >> i) & 1);
> > +            m[J2K_Q2][i] = sigma_n[4 * q2 + i] * U[J2K_Q2] -
> ((emb_pat_k[J2K_Q2] >> i) & 1);
> > +        }
> > +
> > +        recover_mag_sgn(mag_sgn_stream, J2K_Q1, q1, m_n, known_1,
> emb_pat_1, v, m,
> > +                        E, mu_n, Dcup, Pcup, pLSB);
> > +
> > +        recover_mag_sgn(mag_sgn_stream, J2K_Q2, q2, m_n, known_1,
> emb_pat_1, v, m,
> > +                        E, mu_n, Dcup, Pcup, pLSB);
> > +
> > +        // prepare context for the next quad
> > +
> > +        // move to the next quad pair
> > +        q += 2;
> > +    }
> > +    if (quad_width % 2 == 1) {
> > +        q1 = q;
> > +
> > +        if ((ret = jpeg2000_decode_sig_emb(s, mel_state, mel_stream,
> vlc_stream,
> > +                                           dec_CxtVLC_table0, Dcup,
> sig_pat, res_off,
> > +                                           emb_pat_k, emb_pat_1,
> J2K_Q1, context, Lcup,
> > +                                           Pcup)) < 0)
> > +            goto free;
> > +
> > +        for (int i = 0; i < 4; i++)
> > +            sigma_n[4 * q1 + i] = (sig_pat[J2K_Q1] >> i) & 1;
> > +
> > +        u[J2K_Q1] = 0;
> > +
> > +        if (res_off[J2K_Q1] == 1) {
> > +            u_pfx[J2K_Q1] = vlc_decode_u_prefix(vlc_stream, vlc_buf);
> > +            u_sfx[J2K_Q1] = vlc_decode_u_suffix(vlc_stream,
> u_pfx[J2K_Q1], vlc_buf);
> > +            u_ext[J2K_Q1] = vlc_decode_u_extension(vlc_stream,
> u_sfx[J2K_Q1], vlc_buf);
> > +            u[J2K_Q1] = u_pfx[J2K_Q1] + u_sfx[J2K_Q1] + (u_ext[J2K_Q1]
> * 4);
> > +        }
> > +
> > +        U[J2K_Q1] = kappa[J2K_Q1] + u[J2K_Q1];
> > +
> > +        for (int i = 0; i < 4; i++)
> > +            m[J2K_Q1][i] = sigma_n[4 * q1 + i] * U[J2K_Q1] -
> ((emb_pat_k[J2K_Q1] >> i) & 1);
> > +
> > +        recover_mag_sgn(mag_sgn_stream, J2K_Q1, q1, m_n, known_1,
> emb_pat_1, v, m,
> > +                        E, mu_n, Dcup, Pcup, pLSB);
> > +
> > +        q++; /* move to next quad pair */
> > +    }
> > +    /* initial line pair end. */
> > +    /*
> > +     * As an optimization, we can replace modulo operations with
> > +     * checking if a number is divisible , since that's the only thing
> we need.
> > +     * this is paired with is_divisible.
> > +     * Credits to Daniel Lemire blog post:
> > +     *
> https://lemire.me/blog/2019/02/08/faster-remainders-when-the-divisor-is-a-constant-beating-compilers-and-libdivide/
> > +     * It's UB on zero, but we can't have a quad being zero, the spec
> doesn't allow,
> > +     * so we error out early in case that's the case.
> > +     */
> > +    c = precompute_c(quad_width);
> > +
> > +    for (int row = 1; row < quad_height; row++) {
> > +        while ((q - (row * quad_width)) < quad_width - 1 && q <
> (quad_height * quad_width)) {
> > +            q1 = q;
> > +            q2 = q + 1;
> > +            context1  = sigma_n[4 * (q1 - quad_width) + 1];
> > +            context1 += sigma_n[4 * (q1 - quad_width) + 3] << 2; // ne
> > +
> > +            if (!is_divisible(q1, c)) {
> > +                context1 |= sigma_n[4 * (q1 - quad_width) - 1];
>        // nw
> > +                context1 += (sigma_n[4 * q1 - 1] | sigma_n[4 * q1 - 2])
> << 1; // sw| q
> > +            }
> > +            if (!is_divisible(q1 + 1, c))
> > +                context1 |= sigma_n[4 * (q1 - quad_width) + 5] << 2;
> > +
> > +            if ((ret = jpeg2000_decode_sig_emb(s, mel_state,
> mel_stream, vlc_stream,
> > +                                               dec_CxtVLC_table1, Dcup,
> sig_pat, res_off,
> > +                                               emb_pat_k, emb_pat_1,
> J2K_Q1, context1, Lcup,
> > +                                               Pcup))
> > +                < 0)
> > +                goto free;
> > +
> > +            for (int i = 0; i < 4; i++)
> > +                sigma_n[4 * q1 + i] = (sig_pat[J2K_Q1] >> i) & 1;
> > +
> > +            context2  = sigma_n[4 * (q2 - quad_width) + 1];
> > +            context2 += sigma_n[4 * (q2 - quad_width) + 3] << 2;
> > +
> > +            if (!is_divisible(q2, c)) {
> > +                context2 |= sigma_n[4 * (q2 - quad_width) - 1];
> > +                context2 += (sigma_n[4 * q2 - 1] | sigma_n[4 * q2 - 2])
> << 1;
> > +            }
> > +            if (!is_divisible(q2 + 1, c))
> > +                context2 |= sigma_n[4 * (q2 - quad_width) + 5] << 2;
> > +
> > +            if ((ret = jpeg2000_decode_sig_emb(s, mel_state,
> mel_stream, vlc_stream,
> > +                                               dec_CxtVLC_table1, Dcup,
> sig_pat, res_off,
> > +                                               emb_pat_k, emb_pat_1,
> J2K_Q2, context2, Lcup,
> > +                                               Pcup))
> > +                < 0)
> > +                goto free;
> > +
> > +            for (int i = 0; i < 4; i++)
> > +                sigma_n[4 * q2 + i] = (sig_pat[J2K_Q2] >> i) & 1;
> > +
> > +            u[J2K_Q1] = 0;
> > +            u[J2K_Q2] = 0;
> > +
> > +            jpeg2000_bitbuf_refill_backwards(vlc_stream, vlc_buf);
> > +
> > +            if (res_off[J2K_Q1] == 1 && res_off[J2K_Q2] == 1) {
> > +                u_pfx[J2K_Q1] = vlc_decode_u_prefix(vlc_stream,
> vlc_buf);
> > +                u_pfx[J2K_Q2] = vlc_decode_u_prefix(vlc_stream,
> vlc_buf);
> > +
> > +                u_sfx[J2K_Q1] = vlc_decode_u_suffix(vlc_stream,
> u_pfx[J2K_Q1], vlc_buf);
> > +                u_sfx[J2K_Q2] = vlc_decode_u_suffix(vlc_stream,
> u_pfx[J2K_Q2], vlc_buf);
> > +
> > +                u_ext[J2K_Q1] = vlc_decode_u_extension(vlc_stream,
> u_sfx[J2K_Q1], vlc_buf);
> > +                u_ext[J2K_Q2] = vlc_decode_u_extension(vlc_stream,
> u_sfx[J2K_Q2], vlc_buf);
> > +
> > +                u[J2K_Q1] = u_pfx[J2K_Q1] + u_sfx[J2K_Q1] +
> (u_ext[J2K_Q1] << 2);
> > +                u[J2K_Q2] = u_pfx[J2K_Q2] + u_sfx[J2K_Q2] +
> (u_ext[J2K_Q2] << 2);
> > +
> > +            } else if (res_off[J2K_Q1] == 1 || res_off[J2K_Q2] == 1) {
> > +                uint8_t pos = res_off[J2K_Q1] == 1 ? 0 : 1;
> > +
> > +                u_pfx[pos] = vlc_decode_u_prefix(vlc_stream, vlc_buf);
> > +                u_sfx[pos] = vlc_decode_u_suffix(vlc_stream,
> u_pfx[pos], vlc_buf);
> > +                u_ext[pos] = vlc_decode_u_extension(vlc_stream,
> u_sfx[pos], vlc_buf);
> > +
> > +                u[pos] = u_pfx[pos] + u_sfx[pos] + (u_ext[pos] << 2);
> > +            }
> > +            sp = sig_pat[J2K_Q1];
> > +
> > +            gamma[J2K_Q1] = 1;
> > +
> > +            if (sp == 0 || sp == 1 || sp == 2 || sp == 4 || sp == 8)
> > +                gamma[J2K_Q1] = 0;
> > +
> > +            sp = sig_pat[J2K_Q2];
> > +
> > +            gamma[J2K_Q2] = 1;
> > +
> > +            if (sp == 0 || sp == 1 || sp == 2 || sp == 4 || sp == 8)
> > +                gamma[J2K_Q2] = 0;
> > +
> > +            E_n[J2K_Q1] = E[4 * (q1 - quad_width) + 1];
> > +            E_n[J2K_Q2] = E[4 * (q2 - quad_width) + 1];
> > +
> > +            E_ne[J2K_Q1] = E[4 * (q1 - quad_width) + 3];
> > +            E_ne[J2K_Q2] = E[4 * (q2 - quad_width) + 3];
> > +
> > +            E_nw[J2K_Q1] = (!is_divisible(q1, c)) * E[FFMAX((4 * (q1 -
> quad_width) - 1), 0)];
> > +            E_nw[J2K_Q2] = (!is_divisible(q2, c)) * E[FFMAX((4 * (q2 -
> quad_width) - 1), 0)];
> > +
> > +            E_nf[J2K_Q1] = (!is_divisible(q1 + 1, c)) * E[4 * (q1 -
> quad_width) + 5];
> > +            E_nf[J2K_Q2] = (!is_divisible(q2 + 1, c)) * E[4 * (q2 -
> quad_width) + 5];
> > +
> > +            max_e[J2K_Q1] = FFMAX(E_nw[J2K_Q1], FFMAX3(E_n[J2K_Q1],
> E_ne[J2K_Q1], E_nf[J2K_Q1]));
> > +            max_e[J2K_Q2] = FFMAX(E_nw[J2K_Q2], FFMAX3(E_n[J2K_Q2],
> E_ne[J2K_Q2], E_nf[J2K_Q2]));
> > +
> > +            kappa[J2K_Q1] = FFMAX(1, gamma[J2K_Q1] * (max_e[J2K_Q1] -
> 1));
> > +            kappa[J2K_Q2] = FFMAX(1, gamma[J2K_Q2] * (max_e[J2K_Q2] -
> 1));
> > +
> > +            U[J2K_Q1] = kappa[J2K_Q1] + u[J2K_Q1];
> > +            U[J2K_Q2] = kappa[J2K_Q2] + u[J2K_Q2];
> > +
> > +            for (int i = 0; i < 4; i++) {
> > +                m[J2K_Q1][i] = sigma_n[4 * q1 + i] * U[J2K_Q1] -
> ((emb_pat_k[J2K_Q1] >> i) & 1);
> > +                m[J2K_Q2][i] = sigma_n[4 * q2 + i] * U[J2K_Q2] -
> ((emb_pat_k[J2K_Q2] >> i) & 1);
> > +            }
> > +            recover_mag_sgn(mag_sgn_stream, J2K_Q1, q1, m_n, known_1,
> emb_pat_1, v, m,
> > +                            E, mu_n, Dcup, Pcup, pLSB);
> > +
> > +            recover_mag_sgn(mag_sgn_stream, J2K_Q2, q2, m_n, known_1,
> emb_pat_1, v, m,
> > +                            E, mu_n, Dcup, Pcup, pLSB);
> > +
> > +            /* move to the next quad pair */
> > +            q += 2;
> > +        }
> > +        if (quad_width % 2 == 1) {
> > +            q1 = q;
> > +            /* calculate context for current quad */
> > +            context1  = sigma_n[4 * (q1 - quad_width) + 1];
> > +            context1 += (sigma_n[4 * (q1 - quad_width) + 3] << 2);
> > +
> > +            if (!is_divisible(q1, c)) {
> > +                context1 |= sigma_n[4 * (q1 - quad_width) - 1];
> > +                context1 += (sigma_n[4 * q1 - 1] | sigma_n[4 * q1 - 2])
> << 1;
> > +            }
> > +            if (!is_divisible(q1 + 1, c))
> > +                context1 |= sigma_n[4 * (q1 - quad_width) + 5] << 2;
> > +
> > +            if ((ret = jpeg2000_decode_sig_emb(s, mel_state,
> mel_stream, vlc_stream,
> > +                                               dec_CxtVLC_table1, Dcup,
> sig_pat, res_off,
> > +                                               emb_pat_k, emb_pat_1,
> J2K_Q1, context1, Lcup,
> > +                                               Pcup))
> > +                < 0)
> > +                goto free;
> > +
> > +            for (int i = 0; i < 4; i++)
> > +                sigma_n[4 * q1 + i] = (sig_pat[J2K_Q1] >> i) & 1;
> > +
> > +            u[J2K_Q1] = 0;
> > +
> > +            /* Recover mag_sgn value */
> > +            if (res_off[J2K_Q1] == 1) {
> > +                u_pfx[J2K_Q1] = vlc_decode_u_prefix(vlc_stream,
> vlc_buf);
> > +                u_sfx[J2K_Q1] = vlc_decode_u_suffix(vlc_stream,
> u_pfx[J2K_Q1], vlc_buf);
> > +                u_ext[J2K_Q1] = vlc_decode_u_extension(vlc_stream,
> u_sfx[J2K_Q1], vlc_buf);
> > +
> > +                u[J2K_Q1] = u_pfx[J2K_Q1] + u_sfx[J2K_Q1] +
> (u_ext[J2K_Q1] << 2);
> > +            }
> > +
> > +            sp = sig_pat[J2K_Q1];
> > +
> > +            gamma[J2K_Q1] = 1;
> > +
> > +            if (sp == 0 || sp == 1 || sp == 2 || sp == 4 || sp == 8)
> > +                gamma[J2K_Q1] = 0;
> > +
> > +            E_n[J2K_Q1] = E[4 * (q1 - quad_width) + 1];
> > +
> > +            E_ne[J2K_Q1] = E[4 * (q1 - quad_width) + 3];
> > +
> > +            E_nw[J2K_Q1] = (!is_divisible(q1, c)) * E[FFMAX((4 * (q1 -
> quad_width) - 1), 0)];
> > +
> > +            E_nf[J2K_Q1] = (!is_divisible(q1 + 1, c)) * E[4 * (q1 -
> quad_width) + 5];
> > +
> > +            max_e[J2K_Q1] = FFMAX(E_nw[J2K_Q1], FFMAX3(E_n[J2K_Q1],
> E_ne[J2K_Q1], E_nf[J2K_Q1]));
> > +
> > +            kappa[J2K_Q1] = FFMAX(1, gamma[J2K_Q1] * (max_e[J2K_Q1] -
> 1));
> > +
> > +            U[J2K_Q1] = kappa[J2K_Q1] + u[J2K_Q1];
> > +
> > +            for (int i = 0; i < 4; i++)
> > +                m[J2K_Q1][i] = sigma_n[4 * q1 + i] * U[J2K_Q1] -
> ((emb_pat_k[J2K_Q1] >> i) & 1);
> > +
> > +            recover_mag_sgn(mag_sgn_stream, J2K_Q1, q1, m_n, known_1,
> emb_pat_1, v, m,
> > +                            E, mu_n, Dcup, Pcup, pLSB);
> > +            q += 1;
> > +        }
> > +    }
> > +    // convert to raster-scan
> > +    for (int y = 0; y < quad_height; y++) {
> > +        for (int x = 0; x < quad_width; x++) {
> > +            j1 = 2 * y;
> > +            j2 = 2 * x;
> > +
> > +            sample_buf[j2 + (j1 * width)] = (int32_t)*mu;
> > +            jpeg2000_modify_state(j1, j2, width, *sigma, block_states);
> > +            sigma += 1;
> > +            mu += 1;
> > +
> > +            x1 = y != quad_height - 1 || is_border_y == 0;
> > +            sample_buf[j2 + ((j1 + 1) * width)] = ((int32_t)*mu) * x1;
> > +            jpeg2000_modify_state(j1 + 1, j2, width, (*sigma) * x1,
> block_states);
> > +            sigma += 1;
> > +            mu += 1;
> > +
> > +            x2 = x != quad_width - 1 || is_border_x == 0;
> > +            sample_buf[(j2 + 1) + (j1 * width)] = ((int32_t)*mu) * x2;
> > +            jpeg2000_modify_state(j1, j2 + 1, width, (*sigma) * x2,
> block_states);
> > +            sigma += 1;
> > +            mu += 1;
> > +
> > +            x3 = x1 | x2;
> > +            sample_buf[(j2 + 1) + (j1 + 1) * width] = ((int32_t)*mu) *
> x3;
> > +            jpeg2000_modify_state(j1 + 1, j2 + 1, width, (*sigma) * x3,
> block_states);
> > +            sigma += 1;
> > +            mu += 1;
> > +        }
> > +    }
> > +    ret = 1;
> > +free:
> > +    av_freep(&sigma_n);
> > +    av_freep(&E);
> > +    av_freep(&mu_n);
> > +    return ret;
> > +}
> > +
> > +static void
> > +jpeg2000_calc_mbr(uint8_t *mbr, const uint16_t i, const uint16_t j,
> const uint32_t mbr_info, uint8_t causal_cond,
> > +                  uint8_t *block_states, int width)
> > +{
> > +
> > +    int local_mbr = 0;
> > +    local_mbr |= jpeg2000_get_state(i - 1, j - 1, width,
> HT_SHIFT_SIGMA, block_states);
> > +    local_mbr |= jpeg2000_get_state(i - 1, j + 0, width,
> HT_SHIFT_SIGMA, block_states);
> > +    local_mbr |= jpeg2000_get_state(i - 1, j + 1, width,
> HT_SHIFT_SIGMA, block_states);
> > +
> > +    local_mbr |= jpeg2000_get_state(i + 0, j - 1, width,
> HT_SHIFT_SIGMA, block_states);
> > +    local_mbr |= jpeg2000_get_state(i + 0, j + 1, width,
> HT_SHIFT_SIGMA, block_states);
> > +
> > +    local_mbr |= jpeg2000_get_state(i + 1, j - 1, width,
> HT_SHIFT_SIGMA, block_states) * causal_cond;
> > +    local_mbr |= jpeg2000_get_state(i + 1, j + 0, width,
> HT_SHIFT_SIGMA, block_states) * causal_cond;
> > +    local_mbr |= jpeg2000_get_state(i + 1, j + 1, width,
> HT_SHIFT_SIGMA, block_states) * causal_cond;
> > +
> > +    local_mbr |= jpeg2000_get_state(i - 1, j - 1, width, HT_SHIFT_REF,
> block_states) *
> > +                 jpeg2000_get_state(i - 1, j - 1, width, HT_SHIFT_SCAN,
> block_states);
> > +    local_mbr |= jpeg2000_get_state(i - 1, j + 0, width, HT_SHIFT_REF,
> block_states) *
> > +                 jpeg2000_get_state(i - 1, j - 1, width, HT_SHIFT_SCAN,
> block_states);
> > +    local_mbr |= jpeg2000_get_state(i - 1, j + 1, width, HT_SHIFT_REF,
> block_states) *
> > +                 jpeg2000_get_state(i - 1, j + 1, width, HT_SHIFT_SCAN,
> block_states);
> > +
> > +    local_mbr |= jpeg2000_get_state(i + 0, j - 1, width, HT_SHIFT_REF,
> block_states) *
> > +                 jpeg2000_get_state(i + 0, j - 1, width, HT_SHIFT_SCAN,
> block_states);
> > +    local_mbr |= jpeg2000_get_state(i + 0, j + 1, width, HT_SHIFT_REF,
> block_states) *
> > +                 jpeg2000_get_state(i + 0, j + 1, width, HT_SHIFT_SCAN,
> block_states);
> > +
> > +    local_mbr |= jpeg2000_get_state(i + 1, j - 1, width, HT_SHIFT_REF,
> block_states) *
> > +                 jpeg2000_get_state(i + 1, j - 1, width, HT_SHIFT_SCAN,
> block_states) * causal_cond;
> > +    local_mbr |= jpeg2000_get_state(i + 1, j + 0, width, HT_SHIFT_REF,
> block_states) *
> > +                 jpeg2000_get_state(i + 1, j + 0, width, HT_SHIFT_SCAN,
> block_states) * causal_cond;
> > +    local_mbr |= jpeg2000_get_state(i + 1, j + 1, width, HT_SHIFT_REF,
> block_states) *
> > +                 jpeg2000_get_state(i + 1, j + 1, width, HT_SHIFT_SCAN,
> block_states) * causal_cond;
> > +
> > +    *mbr |= local_mbr;
> > +}
> > +
> > +static void
> > +jpeg2000_process_stripes_block(StateVars *sig_prop, int i_s, int j_s,
> int width, int height, int stride, int pLSB,
> > +                               int32_t *sample_buf, uint8_t
> *block_states, uint8_t *magref_segment,
> > +                               uint32_t magref_length)
> > +{
> > +    int32_t *sp;
> > +    uint8_t causal_cond = 0;
> > +    uint8_t bit;
> > +    uint8_t mbr;
> > +    uint32_t mbr_info;
> > +    int modify_state = 0;
> > +    int cond;
> > +
> > +    for (int j = j_s; j < j_s + width; j++) {
> > +        mbr_info = 0;
>
> Could use a smaller scope.
>
> > +        for (int i = i_s; i < i_s + height; i++) {
> > +            sp = &sample_buf[j + (i * (stride - 2))];
> > +            mbr = 0;
> > +            causal_cond = i != (i_s + height - 1);
> > +            if (jpeg2000_get_state(i, j, stride - 2, HT_SHIFT_SIGMA,
> block_states) == 0)
> > +                jpeg2000_calc_mbr(&mbr, i, j, mbr_info & 0x1EF,
> causal_cond, block_states, stride - 2);
> > +            mbr_info >>= 3;
> > +            cond = mbr != 0;
> > +            bit = jpeg2000_peek_bit(sig_prop, magref_segment,
> magref_length);
> > +            *sp |= (bit * cond) << pLSB;
> > +            sig_prop->bits -= cond;
> > +            modify_state = (((1 << HT_SHIFT_REF_IND) | (1 <<
> HT_SHIFT_REF)) * cond) | 1 << HT_SHIFT_SCAN;
> > +            jpeg2000_modify_state(i, j, stride - 2, modify_state,
> block_states);
> > +        }
> > +    }
> > +}
> > +
> > +static void av_noinline
> > +jpeg2000_decode_sigprop(Jpeg2000Cblk *cblk, uint16_t width, uint16_t
> height, uint8_t *magref_segment,
> > +                        uint32_t magref_length, uint8_t pLSB, int32_t
> *sample_buf, uint8_t *block_states)
> > +{
> > +    /* Described in clause 7.4
> > +     * procedure: decodeSigPropMag
> > +     */
> > +    StateVars sp_dec;
> > +    const uint16_t num_v_stripe = height / 4;
> > +    const uint16_t num_h_stripe = width / 4;
> > +    int b_width = 4;
> > +    int b_height = 4;
> > +    int stride = width + 2;
> > +    int last_width;
> > +    uint16_t i = 0, j = 0;
> > +
> > +    jpeg2000_init_zero(&sp_dec);
> > +
> > +    for (int n1 = 0; n1 < num_v_stripe; n1++) {
> > +        j = 0;
> > +        for (int n2 = 0; n2 < num_h_stripe; n2++) {
> > +            jpeg2000_process_stripes_block(&sp_dec, i, j, b_width,
> b_height, stride,
> > +                                           pLSB, sample_buf,
> block_states, magref_segment,
> > +                                           magref_length);
> > +            j += 4;
> > +        }
> > +        last_width = width % 4;
> > +        if (last_width)
> > +            jpeg2000_process_stripes_block(&sp_dec, i, j, last_width,
> b_height, stride,
> > +                                           pLSB, sample_buf,
> block_states, magref_segment,
> > +                                           magref_length);
> > +        i += 4;
> > +    }
> > +    /* decode remaining height stripes */
> > +    b_height = height % 4;
> > +    j = 0;
> > +    for (int n2 = 0; n2 < num_h_stripe; n2++) {
> > +        jpeg2000_process_stripes_block(&sp_dec, i, j, b_width,
> b_height, stride,
> > +                                       pLSB, sample_buf, block_states,
> magref_segment,
> > +                                       magref_length);
> > +        j += 4;
> > +    }
> > +    last_width = width % 4;
> > +    if (last_width)
> > +        jpeg2000_process_stripes_block(&sp_dec, i, j, last_width,
> b_height, stride,
> > +                                       pLSB, sample_buf, block_states,
> magref_segment,
> > +                                       magref_length);
> > +}
> > +
> > +static int av_noinline
> > +jpeg2000_decode_magref(Jpeg2000Cblk *cblk, uint16_t width, uint16_t
> block_height, uint8_t *magref_segment,
> > +                       uint32_t magref_length, uint8_t pLSB, int32_t
> *sample_buf, uint8_t *block_states)
> > +{
> > +    /*
> > +     * Described in clause 7.5
> > +     * procedure: decodeSigPropMag
> > +     */
> > +
> > +    StateVars mag_ref = {0};
> > +    const uint16_t num_v_stripe = block_height / 4;
> > +    uint16_t height = 4;
> > +    uint16_t i_start = 0;
> > +    int32_t *sp;
> > +
> > +    jpeg2000_init_mag_ref(&mag_ref, magref_length);
> > +
> > +    for (int n1 = 0; n1 < num_v_stripe; n1++) {
> > +        for (int j = 0; j < width; j++) {
> > +            for (int i = i_start; i < i_start + height; i++) {
> > +                /* we move column wise, going from one quad to another
> > +                * see figure 7.
> > +                */
> > +                sp = &sample_buf[j + i * width];
> > +                if (jpeg2000_get_state(i, j, width, HT_SHIFT_SIGMA,
> block_states) != 0) {
> > +                    jpeg2000_modify_state(i, j, width, 1 <<
> HT_SHIFT_REF_IND, block_states);
> > +                    *sp |= jpeg2000_import_magref_bit(&mag_ref,
> magref_segment, magref_length) << pLSB;
> > +                }
> > +            }
> > +        }
> > +        i_start += 4;
> > +    }
> > +    height = block_height % 4;
> > +    for (int j = 0; j < width; j++) {
> > +        for (int i = i_start; i < i_start + height; i++) {
> > +            sp = &sample_buf[j + i * width];
> > +            if (jpeg2000_get_state(i, j, width, HT_SHIFT_SIGMA,
> block_states) != 0) {
> > +                jpeg2000_modify_state(i, j, width, 1 <<
> HT_SHIFT_REF_IND, block_states);
> > +                *sp |= jpeg2000_import_magref_bit(&mag_ref,
> magref_segment, magref_length) << pLSB;
> > +            }
> > +        }
> > +    }
> > +    return 1;
> > +}
> > +
> > +
> > +int
> > +ff_jpeg2000_decode_htj2k(const Jpeg2000DecoderContext *s,
> Jpeg2000CodingStyle *codsty, Jpeg2000T1Context *t1, Jpeg2000Cblk *cblk,
> > +                         int width, int height, int magp, uint8_t
> roi_shift)
> > +{
> > +    uint8_t p0 = 0; /* Number of placeholder passes. */
> > +    uint32_t Lcup;  /* Length of HT cleanup segment. */
> > +    uint32_t Lref;  /* Length of Refinement segment. */
> > +    uint32_t Scup;  /* HT cleanup segment suffix length. */
> > +    uint32_t Pcup;  /* HT cleanup segment prefix length. */
> > +
> > +    uint8_t S_blk;  /* Number of skipped magnitude bitplanes; */
> > +    uint8_t pLSB;
> > +
> > +    uint8_t *Dcup; /* Byte of an HT cleanup segment. */
> > +    uint8_t *Dref; /* Byte of an HT refinement segment. */
> > +
> > +    int z_blk;     /* Number of ht coding pass */
> > +
> > +    uint8_t empty_passes;
> > +
> > +    StateVars mag_sgn;  /* Magnitude and Sign */
> > +    StateVars mel;      /* Adaptive run-length coding */
> > +    StateVars vlc;      /* Variable Length coding */
> > +    StateVars sig_prop; /* Significance propagation */
> > +
> > +    MelDecoderState mel_state;
> > +
> > +    int ret;
> > +
> > +    /* Temporary buffers */
> > +    int32_t *sample_buf;
> > +    uint8_t *block_states;
> > +
> > +    /* Post-processing */
> > +    int32_t n, val;
> > +
> > +    int32_t M_b = magp;
> > +    av_assert0(width <= 1024U && height <= 1024U);
> > +    av_assert0(width * height <= 4096);
> > +    av_assert0(width * height > 0);
> > +
> > +    memset(t1->data, 0, t1->stride * height * sizeof(*t1->data));
> > +    memset(t1->flags, 0, t1->stride * (height + 2) *
> sizeof(*t1->flags));
> > +
> > +    if (cblk->npasses == 0)
> > +        return 0;
> > +
> > +    if (cblk->npasses > 3)
> > +        p0 = 0;
> > +    else if (cblk->length == 0)
> > +        p0 = 1;
> > +
> > +    empty_passes = p0 * 3;
> > +    z_blk = cblk->npasses - empty_passes;
> > +
> > +    if (z_blk <= 0)
> > +        /* no passes within this set, continue */
> > +        return 0;
> > +
> > +    Lcup = cblk->pass_lengths[0];
> > +    Lref = cblk->pass_lengths[1];
> > +
> > +    if (Lcup < 2) {
> > +        av_log(s->avctx, AV_LOG_ERROR,
> > +               "Cleanup pass length must be at least 2 bytes in
> length\n");
> > +        return AVERROR_INVALIDDATA;
> > +    }
> > +    Dcup = cblk->data;
> > +    /* Dref comes after the refinement segment. */
> > +    Dref  = cblk->data + Lcup;
> > +    S_blk = p0 + cblk->zbp;
> > +    pLSB  = 30 - S_blk;
> > +
> > +    Scup = (Dcup[Lcup - 1] << 4) + (Dcup[Lcup - 2] & 0x0F);
> > +
> > +    if (Scup < 2 || Scup > Lcup || Scup > 4079) {
> > +        av_log(s->avctx, AV_LOG_ERROR, "Cleanup pass suffix length is
> invalid %d\n",
> > +               Scup);
> > +        ret = AVERROR_INVALIDDATA;
> > +        goto free;
> > +    }
> > +    Pcup = Lcup - Scup;
> > +
> > +    /* modDcup shall be done before the creation of vlc instance. */
> > +    Dcup[Lcup - 1] = 0xFF;
> > +    Dcup[Lcup - 2] |= 0x0F;
> > +    /* Magnitude and refinement */
> > +    jpeg2000_init_zero(&mag_sgn);
> > +    jpeg2000_bitbuf_refill_forward(&mag_sgn, Dcup, Pcup);
> > +    /* Significance propagation */
> > +    jpeg2000_init_zero(&sig_prop);
> > +    /* Adaptive run length */
> > +    jpeg2000_init_mel(&mel, Pcup);
> > +    /* Variable Length coding. */
> > +    jpeg2000_init_vlc(&vlc, Lcup, Pcup, Dcup);
> > +
> > +    jpeg2000_init_mel_decoder(&mel_state);
> > +
> > +    sample_buf = av_calloc((width + 4) * (height + 4), sizeof(int32_t));
> > +    block_states = av_calloc((width + 4) * (height + 4),
> sizeof(uint8_t));
> > +
> > +    if (!sample_buf || !block_states) {
> > +        ret = AVERROR(ENOMEM);
> > +        goto free;
> > +    }
> > +    if ((ret = jpeg2000_decode_ht_cleanup(s, cblk, t1, &mel_state,
> &mel, &vlc,
> > +                                          &mag_sgn, Dcup, Lcup, Pcup,
> pLSB, width,
> > +                                          height, sample_buf,
> block_states))
> > +        < 0)
> > +        goto free;
> > +
> > +    if (cblk->npasses > 1)
> > +        jpeg2000_decode_sigprop(cblk, width, height, Dref, Lref,
> > +                                pLSB - 1, sample_buf, block_states);
> > +
> > +    if (cblk->npasses > 2)
> > +        if ((ret = jpeg2000_decode_magref(cblk, width, height, Dref,
> Lref,
> > +                                          pLSB - 1, sample_buf,
> block_states))
> > +            < 0)
> > +            goto free;
> > +
> > +    pLSB = 31 - M_b;
> > +    /* Reconstruct the values. */
> > +    for (int y = 0; y < height; y++) {
> > +        for (int x = 0; x < width; x++) {
> > +            n = x + (y * t1->stride);
> > +            val = sample_buf[x + (y * width)];
> > +            /* Convert sign-magnitude to twos complement. */
> > +            val = val >> 31 ? 0x80000000 - val : val;
> > +            val >>= (pLSB - 1);
> > +            t1->data[n] = val;
> > +        }
> > +    }
> > +free:
> > +    av_freep(&sample_buf);
> > +    av_freep(&block_states);
> > +    return ret;
> > +}
> > +
> > +/**
> > + * @brief  CtxVLC tables, borrowed from openhtj2k (
> https://github.com/osamu620/OpenHTJ2K) (credits to Osamu Watanabe)
> > + */
> > +static const uint16_t dec_CxtVLC_table1[1024] = {
>
> This is inconsistent with our naming conventions.
>
> > +        0x0016, 0x006A, 0x0046, 0x00DD, 0x0086, 0x888B, 0x0026, 0x444D,
> 0x0016, 0x00AA, 0x0046, 0x88AD, 0x0086,
> > +        0x003A, 0x0026, 0x00DE, 0x0016, 0x00CA, 0x0046, 0x009D, 0x0086,
> 0x005A, 0x0026, 0x222D, 0x0016, 0x009A,
> > +        0x0046, 0x007D, 0x0086, 0x01FD, 0x0026, 0x007E, 0x0016, 0x006A,
> 0x0046, 0x88CD, 0x0086, 0x888B, 0x0026,
> > +        0x111D, 0x0016, 0x00AA, 0x0046, 0x005D, 0x0086, 0x003A, 0x0026,
> 0x00EE, 0x0016, 0x00CA, 0x0046, 0x00BD,
> > +        0x0086, 0x005A, 0x0026, 0x11FF, 0x0016, 0x009A, 0x0046, 0x003D,
> 0x0086, 0x04ED, 0x0026, 0x2AAF, 0x0016,
> > +        0x006A, 0x0046, 0x00DD, 0x0086, 0x888B, 0x0026, 0x444D, 0x0016,
> 0x00AA, 0x0046, 0x88AD, 0x0086, 0x003A,
> > +        0x0026, 0x44EF, 0x0016, 0x00CA, 0x0046, 0x009D, 0x0086, 0x005A,
> 0x0026, 0x222D, 0x0016, 0x009A, 0x0046,
> > +        0x007D, 0x0086, 0x01FD, 0x0026, 0x00BE, 0x0016, 0x006A, 0x0046,
> 0x88CD, 0x0086, 0x888B, 0x0026, 0x111D,
> > +        0x0016, 0x00AA, 0x0046, 0x005D, 0x0086, 0x003A, 0x0026, 0x4CCF,
> 0x0016, 0x00CA, 0x0046, 0x00BD, 0x0086,
> > +        0x005A, 0x0026, 0x00FE, 0x0016, 0x009A, 0x0046, 0x003D, 0x0086,
> 0x04ED, 0x0026, 0x006F, 0x0002, 0x0088,
> > +        0x0002, 0x005C, 0x0002, 0x0018, 0x0002, 0x00DE, 0x0002, 0x0028,
> 0x0002, 0x009C, 0x0002, 0x004A, 0x0002,
> > +        0x007E, 0x0002, 0x0088, 0x0002, 0x00CC, 0x0002, 0x0018, 0x0002,
> 0x888F, 0x0002, 0x0028, 0x0002, 0x00FE,
> > +        0x0002, 0x003A, 0x0002, 0x222F, 0x0002, 0x0088, 0x0002, 0x04FD,
> 0x0002, 0x0018, 0x0002, 0x00BE, 0x0002,
> > +        0x0028, 0x0002, 0x00BF, 0x0002, 0x004A, 0x0002, 0x006E, 0x0002,
> 0x0088, 0x0002, 0x00AC, 0x0002, 0x0018,
> > +        0x0002, 0x444F, 0x0002, 0x0028, 0x0002, 0x00EE, 0x0002, 0x003A,
> 0x0002, 0x113F, 0x0002, 0x0088, 0x0002,
> > +        0x005C, 0x0002, 0x0018, 0x0002, 0x00CF, 0x0002, 0x0028, 0x0002,
> 0x009C, 0x0002, 0x004A, 0x0002, 0x006F,
> > +        0x0002, 0x0088, 0x0002, 0x00CC, 0x0002, 0x0018, 0x0002, 0x009F,
> 0x0002, 0x0028, 0x0002, 0x00EF, 0x0002,
> > +        0x003A, 0x0002, 0x233F, 0x0002, 0x0088, 0x0002, 0x04FD, 0x0002,
> 0x0018, 0x0002, 0x00AF, 0x0002, 0x0028,
> > +        0x0002, 0x44FF, 0x0002, 0x004A, 0x0002, 0x005F, 0x0002, 0x0088,
> 0x0002, 0x00AC, 0x0002, 0x0018, 0x0002,
> > +        0x007F, 0x0002, 0x0028, 0x0002, 0x00DF, 0x0002, 0x003A, 0x0002,
> 0x111F, 0x0002, 0x0028, 0x0002, 0x005C,
> > +        0x0002, 0x008A, 0x0002, 0x00BF, 0x0002, 0x0018, 0x0002, 0x00FE,
> 0x0002, 0x00CC, 0x0002, 0x007E, 0x0002,
> > +        0x0028, 0x0002, 0x8FFF, 0x0002, 0x004A, 0x0002, 0x007F, 0x0002,
> 0x0018, 0x0002, 0x00DF, 0x0002, 0x00AC,
> > +        0x0002, 0x133F, 0x0002, 0x0028, 0x0002, 0x222D, 0x0002, 0x008A,
> 0x0002, 0x00BE, 0x0002, 0x0018, 0x0002,
> > +        0x44EF, 0x0002, 0x2AAD, 0x0002, 0x006E, 0x0002, 0x0028, 0x0002,
> 0x15FF, 0x0002, 0x004A, 0x0002, 0x009E,
> > +        0x0002, 0x0018, 0x0002, 0x00CF, 0x0002, 0x003C, 0x0002, 0x223F,
> 0x0002, 0x0028, 0x0002, 0x005C, 0x0002,
> > +        0x008A, 0x0002, 0x2BBF, 0x0002, 0x0018, 0x0002, 0x04EF, 0x0002,
> 0x00CC, 0x0002, 0x006F, 0x0002, 0x0028,
> > +        0x0002, 0x27FF, 0x0002, 0x004A, 0x0002, 0x009F, 0x0002, 0x0018,
> 0x0002, 0x00DE, 0x0002, 0x00AC, 0x0002,
> > +        0x444F, 0x0002, 0x0028, 0x0002, 0x222D, 0x0002, 0x008A, 0x0002,
> 0x8AAF, 0x0002, 0x0018, 0x0002, 0x00EE,
> > +        0x0002, 0x2AAD, 0x0002, 0x005F, 0x0002, 0x0028, 0x0002, 0x44FF,
> 0x0002, 0x004A, 0x0002, 0x888F, 0x0002,
> > +        0x0018, 0x0002, 0xAAAF, 0x0002, 0x003C, 0x0002, 0x111F, 0x0004,
> 0x8FFD, 0x0028, 0x005C, 0x0004, 0x00BC,
> > +        0x008A, 0x66FF, 0x0004, 0x00CD, 0x0018, 0x111D, 0x0004, 0x009C,
> 0x003A, 0x8AAF, 0x0004, 0x00FC, 0x0028,
> > +        0x133D, 0x0004, 0x00AC, 0x004A, 0x3BBF, 0x0004, 0x2BBD, 0x0018,
> 0x5FFF, 0x0004, 0x006C, 0x157D, 0x455F,
> > +        0x0004, 0x2FFD, 0x0028, 0x222D, 0x0004, 0x22AD, 0x008A, 0x44EF,
> 0x0004, 0x00CC, 0x0018, 0x4FFF, 0x0004,
> > +        0x007C, 0x003A, 0x447F, 0x0004, 0x04DD, 0x0028, 0x233D, 0x0004,
> 0x009D, 0x004A, 0x00DE, 0x0004, 0x88BD,
> > +        0x0018, 0xAFFF, 0x0004, 0x115D, 0x1FFD, 0x444F, 0x0004, 0x8FFD,
> 0x0028, 0x005C, 0x0004, 0x00BC, 0x008A,
> > +        0x8CEF, 0x0004, 0x00CD, 0x0018, 0x111D, 0x0004, 0x009C, 0x003A,
> 0x888F, 0x0004, 0x00FC, 0x0028, 0x133D,
> > +        0x0004, 0x00AC, 0x004A, 0x44DF, 0x0004, 0x2BBD, 0x0018, 0x8AFF,
> 0x0004, 0x006C, 0x157D, 0x006F, 0x0004,
> > +        0x2FFD, 0x0028, 0x222D, 0x0004, 0x22AD, 0x008A, 0x00EE, 0x0004,
> 0x00CC, 0x0018, 0x2EEF, 0x0004, 0x007C,
> > +        0x003A, 0x277F, 0x0004, 0x04DD, 0x0028, 0x233D, 0x0004, 0x009D,
> 0x004A, 0x1BBF, 0x0004, 0x88BD, 0x0018,
> > +        0x37FF, 0x0004, 0x115D, 0x1FFD, 0x333F, 0x0002, 0x0088, 0x0002,
> 0x02ED, 0x0002, 0x00CA, 0x0002, 0x4CCF,
> > +        0x0002, 0x0048, 0x0002, 0x23FF, 0x0002, 0x001A, 0x0002, 0x888F,
> 0x0002, 0x0088, 0x0002, 0x006C, 0x0002,
> > +        0x002A, 0x0002, 0x00AF, 0x0002, 0x0048, 0x0002, 0x22EF, 0x0002,
> 0x00AC, 0x0002, 0x005F, 0x0002, 0x0088,
> > +        0x0002, 0x444D, 0x0002, 0x00CA, 0x0002, 0xCCCF, 0x0002, 0x0048,
> 0x0002, 0x00FE, 0x0002, 0x001A, 0x0002,
> > +        0x006F, 0x0002, 0x0088, 0x0002, 0x005C, 0x0002, 0x002A, 0x0002,
> 0x009F, 0x0002, 0x0048, 0x0002, 0x00DF,
> > +        0x0002, 0x03FD, 0x0002, 0x222F, 0x0002, 0x0088, 0x0002, 0x02ED,
> 0x0002, 0x00CA, 0x0002, 0x8CCF, 0x0002,
> > +        0x0048, 0x0002, 0x11FF, 0x0002, 0x001A, 0x0002, 0x007E, 0x0002,
> 0x0088, 0x0002, 0x006C, 0x0002, 0x002A,
> > +        0x0002, 0x007F, 0x0002, 0x0048, 0x0002, 0x00EE, 0x0002, 0x00AC,
> 0x0002, 0x003E, 0x0002, 0x0088, 0x0002,
> > +        0x444D, 0x0002, 0x00CA, 0x0002, 0x00BE, 0x0002, 0x0048, 0x0002,
> 0x00BF, 0x0002, 0x001A, 0x0002, 0x003F,
> > +        0x0002, 0x0088, 0x0002, 0x005C, 0x0002, 0x002A, 0x0002, 0x009E,
> 0x0002, 0x0048, 0x0002, 0x00DE, 0x0002,
> > +        0x03FD, 0x0002, 0x111F, 0x0004, 0x8AED, 0x0048, 0x888D, 0x0004,
> 0x00DC, 0x00CA, 0x3FFF, 0x0004, 0xCFFD,
> > +        0x002A, 0x003D, 0x0004, 0x00BC, 0x005A, 0x8DDF, 0x0004, 0x8FFD,
> 0x0048, 0x006C, 0x0004, 0x027D, 0x008A,
> > +        0x99FF, 0x0004, 0x00EC, 0x00FA, 0x003C, 0x0004, 0x00AC, 0x001A,
> 0x009F, 0x0004, 0x2FFD, 0x0048, 0x007C,
> > +        0x0004, 0x44CD, 0x00CA, 0x67FF, 0x0004, 0x1FFD, 0x002A, 0x444D,
> 0x0004, 0x00AD, 0x005A, 0x8CCF, 0x0004,
> > +        0x4FFD, 0x0048, 0x445D, 0x0004, 0x01BD, 0x008A, 0x4EEF, 0x0004,
> 0x45DD, 0x00FA, 0x111D, 0x0004, 0x009C,
> > +        0x001A, 0x222F, 0x0004, 0x8AED, 0x0048, 0x888D, 0x0004, 0x00DC,
> 0x00CA, 0xAFFF, 0x0004, 0xCFFD, 0x002A,
> > +        0x003D, 0x0004, 0x00BC, 0x005A, 0x11BF, 0x0004, 0x8FFD, 0x0048,
> 0x006C, 0x0004, 0x027D, 0x008A, 0x22EF,
> > +        0x0004, 0x00EC, 0x00FA, 0x003C, 0x0004, 0x00AC, 0x001A, 0x227F,
> 0x0004, 0x2FFD, 0x0048, 0x007C, 0x0004,
> > +        0x44CD, 0x00CA, 0x5DFF, 0x0004, 0x1FFD, 0x002A, 0x444D, 0x0004,
> 0x00AD, 0x005A, 0x006F, 0x0004, 0x4FFD,
> > +        0x0048, 0x445D, 0x0004, 0x01BD, 0x008A, 0x11DF, 0x0004, 0x45DD,
> 0x00FA, 0x111D, 0x0004, 0x009C, 0x001A,
> > +        0x155F, 0x0006, 0x00FC, 0x0018, 0x111D, 0x0048, 0x888D, 0x00AA,
> 0x4DDF, 0x0006, 0x2AAD, 0x005A, 0x67FF,
> > +        0x0028, 0x223D, 0x00BC, 0xAAAF, 0x0006, 0x00EC, 0x0018, 0x5FFF,
> 0x0048, 0x006C, 0x008A, 0xCCCF, 0x0006,
> > +        0x009D, 0x00CA, 0x44EF, 0x0028, 0x003C, 0x8FFD, 0x137F, 0x0006,
> 0x8EED, 0x0018, 0x1FFF, 0x0048, 0x007C,
> > +        0x00AA, 0x4CCF, 0x0006, 0x227D, 0x005A, 0x1DDF, 0x0028, 0x444D,
> 0x4FFD, 0x155F, 0x0006, 0x00DC, 0x0018,
> > +        0x2EEF, 0x0048, 0x445D, 0x008A, 0x22BF, 0x0006, 0x009C, 0x00CA,
> 0x8CDF, 0x0028, 0x222D, 0x2FFD, 0x226F,
> > +        0x0006, 0x00FC, 0x0018, 0x111D, 0x0048, 0x888D, 0x00AA, 0x1BBF,
> 0x0006, 0x2AAD, 0x005A, 0x33FF, 0x0028,
> > +        0x223D, 0x00BC, 0x8AAF, 0x0006, 0x00EC, 0x0018, 0x9BFF, 0x0048,
> 0x006C, 0x008A, 0x8ABF, 0x0006, 0x009D,
> > +        0x00CA, 0x4EEF, 0x0028, 0x003C, 0x8FFD, 0x466F, 0x0006, 0x8EED,
> 0x0018, 0xCFFF, 0x0048, 0x007C, 0x00AA,
> > +        0x8CCF, 0x0006, 0x227D, 0x005A, 0xAEEF, 0x0028, 0x444D, 0x4FFD,
> 0x477F, 0x0006, 0x00DC, 0x0018, 0xAFFF,
> > +        0x0048, 0x445D, 0x008A, 0x2BBF, 0x0006, 0x009C, 0x00CA, 0x44DF,
> 0x0028, 0x222D, 0x2FFD, 0x133F, 0x00F6,
> > +        0xAFFD, 0x1FFB, 0x003C, 0x0008, 0x23BD, 0x007A, 0x11DF, 0x00F6,
> 0x45DD, 0x2FFB, 0x4EEF, 0x00DA, 0x177D,
> > +        0xCFFD, 0x377F, 0x00F6, 0x3FFD, 0x8FFB, 0x111D, 0x0008, 0x009C,
> 0x005A, 0x1BBF, 0x00F6, 0x00CD, 0x00BA,
> > +        0x8DDF, 0x4FFB, 0x006C, 0x9BFD, 0x455F, 0x00F6, 0x67FD, 0x1FFB,
> 0x002C, 0x0008, 0x00AC, 0x007A, 0x009F,
> > +        0x00F6, 0x00AD, 0x2FFB, 0x7FFF, 0x00DA, 0x004C, 0x5FFD, 0x477F,
> 0x00F6, 0x00EC, 0x8FFB, 0x001C, 0x0008,
> > +        0x008C, 0x005A, 0x888F, 0x00F6, 0x00CC, 0x00BA, 0x2EEF, 0x4FFB,
> 0x115D, 0x8AED, 0x113F, 0x00F6, 0xAFFD,
> > +        0x1FFB, 0x003C, 0x0008, 0x23BD, 0x007A, 0x1DDF, 0x00F6, 0x45DD,
> 0x2FFB, 0xBFFF, 0x00DA, 0x177D, 0xCFFD,
> > +        0x447F, 0x00F6, 0x3FFD, 0x8FFB, 0x111D, 0x0008, 0x009C, 0x005A,
> 0x277F, 0x00F6, 0x00CD, 0x00BA, 0x22EF,
> > +        0x4FFB, 0x006C, 0x9BFD, 0x444F, 0x00F6, 0x67FD, 0x1FFB, 0x002C,
> 0x0008, 0x00AC, 0x007A, 0x11BF, 0x00F6,
> > +        0x00AD, 0x2FFB, 0xFFFF, 0x00DA, 0x004C, 0x5FFD, 0x233F, 0x00F6,
> 0x00EC, 0x8FFB, 0x001C, 0x0008, 0x008C,
> > +        0x005A, 0x006F, 0x00F6, 0x00CC, 0x00BA, 0x8BBF, 0x4FFB, 0x115D,
> 0x8AED, 0x222F};
> > +
> > +static const uint16_t dec_CxtVLC_table0[1024] = {
> > +        0x0026, 0x00AA, 0x0046, 0x006C, 0x0086, 0x8AED, 0x0018, 0x8DDF,
> 0x0026, 0x01BD, 0x0046, 0x5FFF, 0x0086,
> > +        0x027D, 0x005A, 0x155F, 0x0026, 0x003A, 0x0046, 0x444D, 0x0086,
> 0x4CCD, 0x0018, 0xCCCF, 0x0026, 0x2EFD,
> > +        0x0046, 0x99FF, 0x0086, 0x009C, 0x00CA, 0x133F, 0x0026, 0x00AA,
> 0x0046, 0x445D, 0x0086, 0x8CCD, 0x0018,
> > +        0x11DF, 0x0026, 0x4FFD, 0x0046, 0xCFFF, 0x0086, 0x009D, 0x005A,
> 0x007E, 0x0026, 0x003A, 0x0046, 0x1FFF,
> > +        0x0086, 0x88AD, 0x0018, 0x00BE, 0x0026, 0x8FFD, 0x0046, 0x4EEF,
> 0x0086, 0x888D, 0x00CA, 0x111F, 0x0026,
> > +        0x00AA, 0x0046, 0x006C, 0x0086, 0x8AED, 0x0018, 0x45DF, 0x0026,
> 0x01BD, 0x0046, 0x22EF, 0x0086, 0x027D,
> > +        0x005A, 0x227F, 0x0026, 0x003A, 0x0046, 0x444D, 0x0086, 0x4CCD,
> 0x0018, 0x11BF, 0x0026, 0x2EFD, 0x0046,
> > +        0x00FE, 0x0086, 0x009C, 0x00CA, 0x223F, 0x0026, 0x00AA, 0x0046,
> 0x445D, 0x0086, 0x8CCD, 0x0018, 0x00DE,
> > +        0x0026, 0x4FFD, 0x0046, 0xABFF, 0x0086, 0x009D, 0x005A, 0x006F,
> 0x0026, 0x003A, 0x0046, 0x6EFF, 0x0086,
> > +        0x88AD, 0x0018, 0x2AAF, 0x0026, 0x8FFD, 0x0046, 0x00EE, 0x0086,
> 0x888D, 0x00CA, 0x222F, 0x0004, 0x00CA,
> > +        0x0088, 0x027D, 0x0004, 0x4CCD, 0x0028, 0x00FE, 0x0004, 0x2AFD,
> 0x0048, 0x005C, 0x0004, 0x009D, 0x0018,
> > +        0x00DE, 0x0004, 0x01BD, 0x0088, 0x006C, 0x0004, 0x88AD, 0x0028,
> 0x11DF, 0x0004, 0x8AED, 0x0048, 0x003C,
> > +        0x0004, 0x888D, 0x0018, 0x111F, 0x0004, 0x00CA, 0x0088, 0x006D,
> 0x0004, 0x88CD, 0x0028, 0x88FF, 0x0004,
> > +        0x8BFD, 0x0048, 0x444D, 0x0004, 0x009C, 0x0018, 0x00BE, 0x0004,
> 0x4EFD, 0x0088, 0x445D, 0x0004, 0x00AC,
> > +        0x0028, 0x00EE, 0x0004, 0x45DD, 0x0048, 0x222D, 0x0004, 0x003D,
> 0x0018, 0x007E, 0x0004, 0x00CA, 0x0088,
> > +        0x027D, 0x0004, 0x4CCD, 0x0028, 0x1FFF, 0x0004, 0x2AFD, 0x0048,
> 0x005C, 0x0004, 0x009D, 0x0018, 0x11BF,
> > +        0x0004, 0x01BD, 0x0088, 0x006C, 0x0004, 0x88AD, 0x0028, 0x22EF,
> 0x0004, 0x8AED, 0x0048, 0x003C, 0x0004,
> > +        0x888D, 0x0018, 0x227F, 0x0004, 0x00CA, 0x0088, 0x006D, 0x0004,
> 0x88CD, 0x0028, 0x4EEF, 0x0004, 0x8BFD,
> > +        0x0048, 0x444D, 0x0004, 0x009C, 0x0018, 0x2AAF, 0x0004, 0x4EFD,
> 0x0088, 0x445D, 0x0004, 0x00AC, 0x0028,
> > +        0x8DDF, 0x0004, 0x45DD, 0x0048, 0x222D, 0x0004, 0x003D, 0x0018,
> 0x155F, 0x0004, 0x005A, 0x0088, 0x006C,
> > +        0x0004, 0x88DD, 0x0028, 0x23FF, 0x0004, 0x11FD, 0x0048, 0x444D,
> 0x0004, 0x00AD, 0x0018, 0x00BE, 0x0004,
> > +        0x137D, 0x0088, 0x155D, 0x0004, 0x00CC, 0x0028, 0x00DE, 0x0004,
> 0x02ED, 0x0048, 0x111D, 0x0004, 0x009D,
> > +        0x0018, 0x007E, 0x0004, 0x005A, 0x0088, 0x455D, 0x0004, 0x44CD,
> 0x0028, 0x00EE, 0x0004, 0x1FFD, 0x0048,
> > +        0x003C, 0x0004, 0x00AC, 0x0018, 0x555F, 0x0004, 0x47FD, 0x0088,
> 0x113D, 0x0004, 0x02BD, 0x0028, 0x477F,
> > +        0x0004, 0x4CDD, 0x0048, 0x8FFF, 0x0004, 0x009C, 0x0018, 0x222F,
> 0x0004, 0x005A, 0x0088, 0x006C, 0x0004,
> > +        0x88DD, 0x0028, 0x00FE, 0x0004, 0x11FD, 0x0048, 0x444D, 0x0004,
> 0x00AD, 0x0018, 0x888F, 0x0004, 0x137D,
> > +        0x0088, 0x155D, 0x0004, 0x00CC, 0x0028, 0x8CCF, 0x0004, 0x02ED,
> 0x0048, 0x111D, 0x0004, 0x009D, 0x0018,
> > +        0x006F, 0x0004, 0x005A, 0x0088, 0x455D, 0x0004, 0x44CD, 0x0028,
> 0x1DDF, 0x0004, 0x1FFD, 0x0048, 0x003C,
> > +        0x0004, 0x00AC, 0x0018, 0x227F, 0x0004, 0x47FD, 0x0088, 0x113D,
> 0x0004, 0x02BD, 0x0028, 0x22BF, 0x0004,
> > +        0x4CDD, 0x0048, 0x22EF, 0x0004, 0x009C, 0x0018, 0x233F, 0x0006,
> 0x4DDD, 0x4FFB, 0xCFFF, 0x0018, 0x113D,
> > +        0x005A, 0x888F, 0x0006, 0x23BD, 0x008A, 0x00EE, 0x002A, 0x155D,
> 0xAAFD, 0x277F, 0x0006, 0x44CD, 0x8FFB,
> > +        0x44EF, 0x0018, 0x467D, 0x004A, 0x2AAF, 0x0006, 0x00AC, 0x555B,
> 0x99DF, 0x1FFB, 0x003C, 0x5FFD, 0x266F,
> > +        0x0006, 0x1DDD, 0x4FFB, 0x6EFF, 0x0018, 0x177D, 0x005A, 0x1BBF,
> 0x0006, 0x88AD, 0x008A, 0x5DDF, 0x002A,
> > +        0x444D, 0x2FFD, 0x667F, 0x0006, 0x00CC, 0x8FFB, 0x2EEF, 0x0018,
> 0x455D, 0x004A, 0x119F, 0x0006, 0x009C,
> > +        0x555B, 0x8CCF, 0x1FFB, 0x111D, 0x8CED, 0x006E, 0x0006, 0x4DDD,
> 0x4FFB, 0x3FFF, 0x0018, 0x113D, 0x005A,
> > +        0x11BF, 0x0006, 0x23BD, 0x008A, 0x8DDF, 0x002A, 0x155D, 0xAAFD,
> 0x222F, 0x0006, 0x44CD, 0x8FFB, 0x00FE,
> > +        0x0018, 0x467D, 0x004A, 0x899F, 0x0006, 0x00AC, 0x555B, 0x00DE,
> 0x1FFB, 0x003C, 0x5FFD, 0x446F, 0x0006,
> > +        0x1DDD, 0x4FFB, 0x9BFF, 0x0018, 0x177D, 0x005A, 0x00BE, 0x0006,
> 0x88AD, 0x008A, 0xCDDF, 0x002A, 0x444D,
> > +        0x2FFD, 0x007E, 0x0006, 0x00CC, 0x8FFB, 0x4EEF, 0x0018, 0x455D,
> 0x004A, 0x377F, 0x0006, 0x009C, 0x555B,
> > +        0x8BBF, 0x1FFB, 0x111D, 0x8CED, 0x233F, 0x0004, 0x00AA, 0x0088,
> 0x047D, 0x0004, 0x01DD, 0x0028, 0x11DF,
> > +        0x0004, 0x27FD, 0x0048, 0x005C, 0x0004, 0x8AAD, 0x0018, 0x2BBF,
> 0x0004, 0x009C, 0x0088, 0x006C, 0x0004,
> > +        0x00CC, 0x0028, 0x00EE, 0x0004, 0x8CED, 0x0048, 0x222D, 0x0004,
> 0x888D, 0x0018, 0x007E, 0x0004, 0x00AA,
> > +        0x0088, 0x006D, 0x0004, 0x88CD, 0x0028, 0x00FE, 0x0004, 0x19FD,
> 0x0048, 0x003C, 0x0004, 0x2AAD, 0x0018,
> > +        0xAAAF, 0x0004, 0x8BFD, 0x0088, 0x005D, 0x0004, 0x00BD, 0x0028,
> 0x4CCF, 0x0004, 0x44ED, 0x0048, 0x4FFF,
> > +        0x0004, 0x223D, 0x0018, 0x111F, 0x0004, 0x00AA, 0x0088, 0x047D,
> 0x0004, 0x01DD, 0x0028, 0x99FF, 0x0004,
> > +        0x27FD, 0x0048, 0x005C, 0x0004, 0x8AAD, 0x0018, 0x00BE, 0x0004,
> 0x009C, 0x0088, 0x006C, 0x0004, 0x00CC,
> > +        0x0028, 0x00DE, 0x0004, 0x8CED, 0x0048, 0x222D, 0x0004, 0x888D,
> 0x0018, 0x444F, 0x0004, 0x00AA, 0x0088,
> > +        0x006D, 0x0004, 0x88CD, 0x0028, 0x2EEF, 0x0004, 0x19FD, 0x0048,
> 0x003C, 0x0004, 0x2AAD, 0x0018, 0x447F,
> > +        0x0004, 0x8BFD, 0x0088, 0x005D, 0x0004, 0x00BD, 0x0028, 0x009F,
> 0x0004, 0x44ED, 0x0048, 0x67FF, 0x0004,
> > +        0x223D, 0x0018, 0x133F, 0x0006, 0x00CC, 0x008A, 0x9DFF, 0x2FFB,
> 0x467D, 0x1FFD, 0x99BF, 0x0006, 0x2AAD,
> > +        0x002A, 0x66EF, 0x4FFB, 0x005C, 0x2EED, 0x377F, 0x0006, 0x89BD,
> 0x004A, 0x00FE, 0x8FFB, 0x006C, 0x67FD,
> > +        0x889F, 0x0006, 0x888D, 0x001A, 0x5DDF, 0x00AA, 0x222D, 0x89DD,
> 0x444F, 0x0006, 0x2BBD, 0x008A, 0xCFFF,
> > +        0x2FFB, 0x226D, 0x009C, 0x00BE, 0x0006, 0xAAAD, 0x002A, 0x1DDF,
> 0x4FFB, 0x003C, 0x4DDD, 0x466F, 0x0006,
> > +        0x8AAD, 0x004A, 0xAEEF, 0x8FFB, 0x445D, 0x8EED, 0x177F, 0x0006,
> 0x233D, 0x001A, 0x4CCF, 0x00AA, 0xAFFF,
> > +        0x88CD, 0x133F, 0x0006, 0x00CC, 0x008A, 0x77FF, 0x2FFB, 0x467D,
> 0x1FFD, 0x3BBF, 0x0006, 0x2AAD, 0x002A,
> > +        0x00EE, 0x4FFB, 0x005C, 0x2EED, 0x007E, 0x0006, 0x89BD, 0x004A,
> 0x4EEF, 0x8FFB, 0x006C, 0x67FD, 0x667F,
> > +        0x0006, 0x888D, 0x001A, 0x00DE, 0x00AA, 0x222D, 0x89DD, 0x333F,
> 0x0006, 0x2BBD, 0x008A, 0x57FF, 0x2FFB,
> > +        0x226D, 0x009C, 0x199F, 0x0006, 0xAAAD, 0x002A, 0x99DF, 0x4FFB,
> 0x003C, 0x4DDD, 0x155F, 0x0006, 0x8AAD,
> > +        0x004A, 0xCEEF, 0x8FFB, 0x445D, 0x8EED, 0x277F, 0x0006, 0x233D,
> 0x001A, 0x1BBF, 0x00AA, 0x3FFF, 0x88CD,
> > +        0x111F, 0x0006, 0x45DD, 0x2FFB, 0x111D, 0x0018, 0x467D, 0x8FFD,
> 0xCCCF, 0x0006, 0x19BD, 0x004A, 0x22EF,
> > +        0x002A, 0x222D, 0x3FFD, 0x888F, 0x0006, 0x00CC, 0x008A, 0x00FE,
> 0x0018, 0x115D, 0xCFFD, 0x8AAF, 0x0006,
> > +        0x00AC, 0x003A, 0x8CDF, 0x1FFB, 0x133D, 0x66FD, 0x466F, 0x0006,
> 0x8CCD, 0x2FFB, 0x5FFF, 0x0018, 0x006C,
> > +        0x4FFD, 0xABBF, 0x0006, 0x22AD, 0x004A, 0x00EE, 0x002A, 0x233D,
> 0xAEFD, 0x377F, 0x0006, 0x2BBD, 0x008A,
> > +        0x55DF, 0x0018, 0x005C, 0x177D, 0x119F, 0x0006, 0x009C, 0x003A,
> 0x4CCF, 0x1FFB, 0x333D, 0x8EED, 0x444F,
> > +        0x0006, 0x45DD, 0x2FFB, 0x111D, 0x0018, 0x467D, 0x8FFD, 0x99BF,
> 0x0006, 0x19BD, 0x004A, 0x2EEF, 0x002A,
> > +        0x222D, 0x3FFD, 0x667F, 0x0006, 0x00CC, 0x008A, 0x4EEF, 0x0018,
> 0x115D, 0xCFFD, 0x899F, 0x0006, 0x00AC,
> > +        0x003A, 0x00DE, 0x1FFB, 0x133D, 0x66FD, 0x226F, 0x0006, 0x8CCD,
> 0x2FFB, 0x9BFF, 0x0018, 0x006C, 0x4FFD,
> > +        0x00BE, 0x0006, 0x22AD, 0x004A, 0x1DDF, 0x002A, 0x233D, 0xAEFD,
> 0x007E, 0x0006, 0x2BBD, 0x008A, 0xCEEF,
> > +        0x0018, 0x005C, 0x177D, 0x277F, 0x0006, 0x009C, 0x003A, 0x8BBF,
> 0x1FFB, 0x333D, 0x8EED, 0x455F, 0x1FF9,
> > +        0x1DDD, 0xAFFB, 0x00DE, 0x8FF9, 0x001C, 0xFFFB, 0x477F, 0x4FF9,
> 0x177D, 0x3FFB, 0x3BBF, 0x2FF9, 0xAEEF,
> > +        0x8EED, 0x444F, 0x1FF9, 0x22AD, 0x000A, 0x8BBF, 0x8FF9, 0x00FE,
> 0xCFFD, 0x007E, 0x4FF9, 0x115D, 0x5FFB,
> > +        0x577F, 0x2FF9, 0x8DDF, 0x2EED, 0x333F, 0x1FF9, 0x2BBD, 0xAFFB,
> 0x88CF, 0x8FF9, 0xBFFF, 0xFFFB, 0x377F,
> > +        0x4FF9, 0x006D, 0x3FFB, 0x00BE, 0x2FF9, 0x66EF, 0x9FFD, 0x133F,
> 0x1FF9, 0x009D, 0x000A, 0xABBF, 0x8FF9,
> > +        0xDFFF, 0x6FFD, 0x006E, 0x4FF9, 0x002C, 0x5FFB, 0x888F, 0x2FF9,
> 0xCDDF, 0x4DDD, 0x222F, 0x1FF9, 0x1DDD,
> > +        0xAFFB, 0x4CCF, 0x8FF9, 0x001C, 0xFFFB, 0x277F, 0x4FF9, 0x177D,
> 0x3FFB, 0x99BF, 0x2FF9, 0xCEEF, 0x8EED,
> > +        0x004E, 0x1FF9, 0x22AD, 0x000A, 0x00AE, 0x8FF9, 0x7FFF, 0xCFFD,
> 0x005E, 0x4FF9, 0x115D, 0x5FFB, 0x009E,
> > +        0x2FF9, 0x5DDF, 0x2EED, 0x003E, 0x1FF9, 0x2BBD, 0xAFFB, 0x00CE,
> 0x8FF9, 0xEFFF, 0xFFFB, 0x667F, 0x4FF9,
> > +        0x006D, 0x3FFB, 0x8AAF, 0x2FF9, 0x00EE, 0x9FFD, 0x233F, 0x1FF9,
> 0x009D, 0x000A, 0x1BBF, 0x8FF9, 0x4EEF,
> > +        0x6FFD, 0x455F, 0x4FF9, 0x002C, 0x5FFB, 0x008E, 0x2FF9, 0x99DF,
> 0x4DDD, 0x111F};
> > diff --git a/libavcodec/jpeg2000htdec.h b/libavcodec/jpeg2000htdec.h
> > new file mode 100644
> > index 0000000000..62cf1450c2
> > --- /dev/null
> > +++ b/libavcodec/jpeg2000htdec.h
> > @@ -0,0 +1,28 @@
> > +/*
> > +* JPEG2000 High Throughput block decoder
> > +* Copyright (c) 2022 Caleb Etemesi <etemesicaleb@gmail.com>
> > +*
> > +* This file is part of FFmpeg.
> > +*
> > +* FFmpeg is free software; you can redistribute it and/or
> > +* modify it under the terms of the GNU Lesser General Public
> > +* License as published by the Free Software Foundation; either
> > +* version 2.1 of the License, or (at your option) any later version.
> > +*
> > +* FFmpeg is distributed in the hope that it will be useful,
> > +* but WITHOUT ANY WARRANTY; without even the implied warranty of
> > +* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
> > +* Lesser General Public License for more details.
> > +*
> > +* You should have received a copy of the GNU Lesser General Public
> > +* License along with FFmpeg; if not, write to the Free Software
> > +* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
> 02110-1301 USA
> > + */
> > +#ifndef AVCODEC_JPEG2000HTDEC_H
> > +#define AVCODEC_JPEG2000HTDEC_H
> > +
> > +#include "jpeg2000dec.h"
> > +
> > +int ff_jpeg2000_decode_htj2k(const Jpeg2000DecoderContext *s,
> Jpeg2000CodingStyle *codsty, Jpeg2000T1Context *t1, Jpeg2000Cblk *cblk, int
> width, int height, int magp, uint8_t roi_shift);
> > +
>
> jpeg2000dec.c seems to be the only intended user of this function, so
> one could make a header/template out of jpeg2000htdec.c and include this
> in jpeg2000dec.c. This would allow to make this function static and
> would avoid exposing the jpeg2000dec.c structures via a header.
>
> > +#endif /* AVCODEC_JPEG2000HTDEC_H */
>
> _______________________________________________
> ffmpeg-devel mailing list
> ffmpeg-devel@ffmpeg.org
> https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
>
> To unsubscribe, visit link above, or email
> ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".
>
diff mbox series

Patch

diff --git a/libavcodec/Makefile b/libavcodec/Makefile
index 98841ed07c..1ba9e09528 100644
--- a/libavcodec/Makefile
+++ b/libavcodec/Makefile
@@ -464,7 +464,7 @@  OBJS-$(CONFIG_JACOSUB_DECODER)         += jacosubdec.o ass.o
 OBJS-$(CONFIG_JPEG2000_ENCODER)        += j2kenc.o mqcenc.o mqc.o jpeg2000.o \
                                           jpeg2000dwt.o
 OBJS-$(CONFIG_JPEG2000_DECODER)        += jpeg2000dec.o jpeg2000.o jpeg2000dsp.o \
-                                          jpeg2000dwt.o mqcdec.o mqc.o
+                                          jpeg2000dwt.o mqcdec.o mqc.o jpeg2000htdec.o
 OBJS-$(CONFIG_JPEGLS_DECODER)          += jpeglsdec.o jpegls.o
 OBJS-$(CONFIG_JPEGLS_ENCODER)          += jpeglsenc.o jpegls.o
 OBJS-$(CONFIG_JV_DECODER)              += jvdec.o
diff --git a/libavcodec/jpeg2000.h b/libavcodec/jpeg2000.h
index e5ecb4cbf9..b2a8e13244 100644
--- a/libavcodec/jpeg2000.h
+++ b/libavcodec/jpeg2000.h
@@ -189,6 +189,9 @@  typedef struct Jpeg2000Cblk {
     Jpeg2000Pass *passes;
     Jpeg2000Layer *layers;
     int coord[2][2]; // border coordinates {{x0, x1}, {y0, y1}}
+    /*HTJ2K settings */
+    int zbp;
+    int pass_lengths[2];
 } Jpeg2000Cblk; // code block
 
 typedef struct Jpeg2000Prec {
diff --git a/libavcodec/jpeg2000dec.c b/libavcodec/jpeg2000dec.c
index 7fc09cb558..5c92baa88e 100644
--- a/libavcodec/jpeg2000dec.c
+++ b/libavcodec/jpeg2000dec.c
@@ -43,6 +43,7 @@ 
 #include "jpeg2000dsp.h"
 #include "profiles.h"
 #include "jpeg2000dec.h"
+#include "jpeg2000htdec.h"
 
 /* get_bits functions for JPEG2000 packet bitstream
  * It is a get_bit function with a bit-stuffing routine. If the value of the
@@ -428,12 +429,13 @@  static int get_cox(Jpeg2000DecoderContext *s, Jpeg2000CodingStyle *c)
     c->cblk_style = bytestream2_get_byteu(&s->g);
     if (c->cblk_style != 0) { // cblk style
         if (c->cblk_style & JPEG2000_CTSY_HTJ2K_M || c->cblk_style & JPEG2000_CTSY_HTJ2K_F) {
-            av_log(s->avctx, AV_LOG_ERROR, "Support for High throughput JPEG 2000 is not yet available\n");
-            return AVERROR_PATCHWELCOME;
+            av_log(s->avctx,AV_LOG_TRACE,"High Throughput jpeg 2000 codestream.\n");
+            s->is_htj2k = 1;
+        } else {
+            av_log(s->avctx, AV_LOG_WARNING, "extra cblk styles %X\n", c->cblk_style);
+            if (c->cblk_style & JPEG2000_CBLK_BYPASS)
+                av_log(s->avctx, AV_LOG_WARNING, "Selective arithmetic coding bypass\n");
         }
-        av_log(s->avctx, AV_LOG_WARNING, "extra cblk styles %X\n", c->cblk_style);
-        if (c->cblk_style & JPEG2000_CBLK_BYPASS)
-            av_log(s->avctx, AV_LOG_WARNING, "Selective arithmetic coding bypass\n");
     }
     c->transform = bytestream2_get_byteu(&s->g); // DWT transformation type
     /* set integer 9/7 DWT in case of BITEXACT flag */
@@ -1058,13 +1060,15 @@  static int jpeg2000_decode_packet(Jpeg2000DecoderContext *s, Jpeg2000Tile *tile,
                 return incl;
 
             if (!cblk->npasses) {
-                int v = expn[bandno] + numgbits - 1 -
-                        tag_tree_decode(s, prec->zerobits + cblkno, 100);
+                int zbp = tag_tree_decode(s,prec->zerobits + cblkno, 100);
+                int v = expn[bandno] + numgbits - 1 - zbp;
+
                 if (v < 0 || v > 30) {
                     av_log(s->avctx, AV_LOG_ERROR,
                            "nonzerobits %d invalid or unsupported\n", v);
                     return AVERROR_INVALIDDATA;
                 }
+                cblk->zbp = zbp;
                 cblk->nonzerobits = v;
             }
             if ((newpasses = getnpasses(s)) < 0)
@@ -1105,8 +1109,29 @@  static int jpeg2000_decode_packet(Jpeg2000DecoderContext *s, Jpeg2000Tile *tile,
                     }
                 }
 
-                if ((ret = get_bits(s, av_log2(newpasses1) + cblk->lblock)) < 0)
-                    return ret;
+                if (newpasses > 1 && s->is_htj2k) {
+                    // Retrieve pass lengths for each pass
+                    int href_passes =  (cblk->npasses + newpasses - 1) % 3;
+                    int segment_passes = newpasses - href_passes;
+                    int pass_bound = 2;
+                    int eb = 0;
+                    int extra_bit = newpasses > 2 ? 1 : 0;
+                    while (pass_bound <=segment_passes) {
+                        eb++;
+                        pass_bound +=pass_bound;
+                    }
+                    if ((ret = get_bits(s, llen + eb + 3)) < 0)
+                        return ret;
+                    cblk->pass_lengths[0] = ret;
+                    if ((ret = get_bits(s, llen + 3 + extra_bit)) < 0)
+                        return ret;
+                    cblk->pass_lengths[1] = ret;
+                    ret = cblk->pass_lengths[0] + cblk->pass_lengths[1];
+                } else {
+                    if ((ret = get_bits(s, av_log2(newpasses1) + cblk->lblock)) < 0)
+                        return ret;
+                    cblk->pass_lengths[0]=ret;
+                }
                 if (ret > cblk->data_allocated) {
                     size_t new_size = FFMAX(2*cblk->data_allocated, ret);
                     void *new = av_realloc(cblk->data, new_size);
@@ -1857,7 +1882,10 @@  static inline void tile_codeblocks(const Jpeg2000DecoderContext *s, Jpeg2000Tile
     for (compno = 0; compno < s->ncomponents; compno++) {
         Jpeg2000Component *comp     = tile->comp + compno;
         Jpeg2000CodingStyle *codsty = tile->codsty + compno;
+        Jpeg2000QuantStyle *quantsty= tile->qntsty + compno;
+
         int coded = 0;
+        int subbandno = 0;
 
         t1.stride = (1<<codsty->log2_cblk_width) + 2;
 
@@ -1865,7 +1893,7 @@  static inline void tile_codeblocks(const Jpeg2000DecoderContext *s, Jpeg2000Tile
         for (reslevelno = 0; reslevelno < codsty->nreslevels2decode; reslevelno++) {
             Jpeg2000ResLevel *rlevel = comp->reslevel + reslevelno;
             /* Loop on bands */
-            for (bandno = 0; bandno < rlevel->nbands; bandno++) {
+            for (bandno = 0; bandno < rlevel->nbands; bandno++, subbandno++) {
                 int nb_precincts, precno;
                 Jpeg2000Band *band = rlevel->band + bandno;
                 int cblkno = 0, bandpos;
@@ -1885,12 +1913,23 @@  static inline void tile_codeblocks(const Jpeg2000DecoderContext *s, Jpeg2000Tile
                     for (cblkno = 0;
                          cblkno < prec->nb_codeblocks_width * prec->nb_codeblocks_height;
                          cblkno++) {
-                        int x, y;
+                        int x, y, ret;
+                        // Annex E (Equation E-2) ISO/IEC 15444-1:2019
+                        int magp = quantsty->expn[subbandno] + quantsty->nguardbits - 1;
+
                         Jpeg2000Cblk *cblk = prec->cblk + cblkno;
-                        int ret = decode_cblk(s, codsty, &t1, cblk,
-                                    cblk->coord[0][1] - cblk->coord[0][0],
-                                    cblk->coord[1][1] - cblk->coord[1][0],
-                                    bandpos, comp->roi_shift);
+
+                        if (s->is_htj2k)
+                            ret = ff_jpeg2000_decode_htj2k(s, codsty, &t1, cblk,
+                                                           cblk->coord[0][1] - cblk->coord[0][0],
+                                                           cblk->coord[1][1] - cblk->coord[1][0],
+                                                           magp, comp->roi_shift);
+                        else
+                            ret = decode_cblk(s, codsty, &t1, cblk,
+                                              cblk->coord[0][1] - cblk->coord[0][0],
+                                              cblk->coord[1][1] - cblk->coord[1][0],
+                                              bandpos, comp->roi_shift);
+
                         if (ret)
                             coded = 1;
                         else
diff --git a/libavcodec/jpeg2000dec.h b/libavcodec/jpeg2000dec.h
index b6410c1432..9728529026 100644
--- a/libavcodec/jpeg2000dec.h
+++ b/libavcodec/jpeg2000dec.h
@@ -121,6 +121,8 @@  typedef struct Jpeg2000DecoderContext {
 
     /*options parameters*/
     int             reduction_factor;
+    /*HTJ2K params*/
+    uint8_t         is_htj2k;
 } Jpeg2000DecoderContext;
 
 #endif //AVCODEC_JPEG2000DEC_H
diff --git a/libavcodec/jpeg2000htdec.c b/libavcodec/jpeg2000htdec.c
new file mode 100644
index 0000000000..8ea901855f
--- /dev/null
+++ b/libavcodec/jpeg2000htdec.c
@@ -0,0 +1,1441 @@ 
+/*
+ * Copyright (c) 2022 Caleb Etemesi <etemesicaleb@gmail.com>
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+#include "jpeg2000htdec.h"
+#include "jpeg2000.h"
+#include "jpeg2000dec.h"
+#include <stdint.h>
+#include <libavutil/attributes.h>
+#include <libavutil/common.h>
+#include <libavutil/avassert.h>
+
+#define J2K_Q1 0
+
+#define J2K_Q2 1
+
+#define HT_SHIFT_SIGMA 0
+#define HT_SHIFT_SCAN 4
+#define HT_SHIFT_REF 3
+#define HT_SHIFT_REF_IND 2
+/**
+ * @brief Table 2 in clause 7.3.3
+ */
+const static uint8_t MEL_E[13] = {0, 0, 0, 1, 1, 1, 2, 2, 2, 3, 3, 4, 5};
+// Decode tables, found at the end of this
+static const uint16_t dec_CxtVLC_table1[1024];
+static const uint16_t dec_CxtVLC_table0[1024];
+
+typedef struct StateVars {
+    int32_t pos;
+    uint32_t bits;
+    uint32_t tmp;
+    uint32_t last;
+    uint8_t bits_left;
+    uint64_t bit_buf;
+} StateVars;
+
+typedef struct MelDecoderState {
+    uint8_t k;
+    uint8_t run;
+    uint8_t one;
+} MelDecoderState;
+
+/**
+ *  Given a precomputed c, checks whether n % d == 0
+ *
+ *  precomputed c for d is created by the precompute_c function
+ */
+static av_always_inline uint32_t is_divisible(uint32_t n, uint64_t c)
+{
+    return n * c <= c - 1;
+}
+/**
+ * Precompute a number c given d that when combined with a dividend n
+ * returns whether n % d == 0
+ *
+ * Combined with is_divisible function
+ */
+static av_always_inline uint64_t precompute_c(uint32_t divisor)
+{
+
+    return 1 + (0xfffffffffffffffful / divisor);
+}
+/* Initializers */
+
+static void jpeg2000_init_zero(StateVars *s)
+{
+    s->bits_left = 0;
+    s->bit_buf   = 0;
+    s->tmp       = 0;
+    s->bits      = 0;
+    s->pos       = 0;
+    s->last      = 0;
+}
+
+static void jpeg2000_init_mel(StateVars *s, uint32_t Pcup)
+{
+    jpeg2000_init_zero(s);
+    s->pos = Pcup;
+}
+
+static void jpeg2000_init_mag_ref(StateVars *s, uint32_t Lref)
+{
+    s->pos       = Lref - 2;
+    s->bits      = 0;
+    s->last      = 0xFF;
+    s->tmp       = 0;
+    s->bits_left = 0;
+    s->bit_buf   = 0;
+}
+
+static void jpeg2000_init_mel_decoder(MelDecoderState *mel_state)
+{
+    mel_state->k   = 0;
+    mel_state->run = 0;
+    mel_state->one = 0;
+}
+
+/**
+ * @brief Refill the buffer backwards in little Endian while skipping
+ * over stuffing bits
+ *
+ * Stuffing bits are those that appear in the position of any byte whose
+ * LSBs are all 1's if the last consumed byte was larger than 0x8F
+ */
+static int jpeg2000_bitbuf_refill_backwards(StateVars *buffer,
+                                            const uint8_t *array)
+{
+    uint64_t tmp = 0;
+    int32_t position = buffer->pos;
+    uint32_t new_bits = 32;
+    uint32_t mask;
+
+    if (buffer->bits_left > 32)
+        return 0; // enough data, no need to pull in more bits
+
+    /*
+     * We are reading bits from end to start, and we need to handle them in LE.
+     * therefore, if we read bytes ABCD, we need to convert
+     * them to DCBA, but the bitstream is constructed in such a way that it is
+     * BE when reading from back to front,so we need to swap bytes
+     * but this doesn't work when position is less than 3,
+     * we end up reading bits from the MEL byte-stream which
+     * is a recipe for sleepless nights.
+     *
+     * So the trick is to branchlessly read and mask
+     * depending on whatever was the initial position,
+     * the mask is simply either 32 bits, 24 bits ,8 bits or 0 bits.
+     * depending on how many bytes are available, with this, we don't
+     * read past the end of the buffer,we mask
+     * bits we already read ensuring we don't read twice.
+     * and we can do it branchlessly without checking for positions.
+     *
+     * We need to watch out for negative shift values which are UB in C hence
+     * the MAX declarative.
+     */
+
+    position -= 4;
+    mask = (UINT64_C(1) << (FFMIN(4, buffer->pos + 1)) * 8) - 1;
+    tmp  = array[position + 1] * (1ull << 24);
+    tmp |= array[position + 2] << 16;
+    tmp |= array[position + 3] << 8;
+    tmp |= array[position + 4];
+    tmp &= mask;
+
+    // Branchlessly unstuff  bits
+
+    // load temporary byte, which preceeds the position we
+    // currently at, to ensure that we can also un-stuff if the
+    // stuffed bit is the bottom most bits
+    tmp <<= 8;
+    tmp |= array[buffer->pos + 1];
+
+    if ((tmp & 0x7FFF000000) > 0x7F8F000000) {
+        tmp &= 0x7FFFFFFFFF;
+        new_bits--;
+    }
+    if ((tmp & 0x007FFF0000) > 0x007F8F0000) {
+        tmp = (tmp & 0x007FFFFFFF) + ((tmp & 0xFF00000000) >> 1);
+        new_bits--;
+    }
+    if ((tmp & 0x00007FFF00) > 0x00007F8F00) {
+        tmp = (tmp & 0x00007FFFFF) + ((tmp & 0xFFFF000000) >> 1);
+        new_bits--;
+    }
+    if ((tmp & 0x0000007FFF) > 0x0000007F8F) {
+        tmp = (tmp & 0x0000007FFF) + ((tmp & 0xFFFFFF0000) >> 1);
+        new_bits--;
+    }
+    /* remove temporary byte loaded. */
+    tmp >>= 8;
+
+    /* Add bits to the MSB of the bit buffer */
+    buffer->bit_buf |= tmp << buffer->bits_left;
+    buffer->bits_left += new_bits;
+    buffer->pos = FFMAX(0, position);
+    return 0;
+}
+
+/**
+ * @brief Refill the bit-buffer reading new bits going forward
+ * in the stream while skipping over stuffed bits.
+ */
+static void jpeg2000_bitbuf_refill_forward(StateVars *buffer,
+                                           const uint8_t *array,
+                                           uint32_t length)
+{
+    while (buffer->bits_left < 32) {
+        buffer->tmp = 0xFF;
+        buffer->bits = (buffer->last == 0xFF) ? 7 : 8;
+        if (buffer->pos <= length) {
+            buffer->tmp = array[buffer->pos];
+            buffer->pos += 1;
+            buffer->last = buffer->tmp;
+        }
+        buffer->bit_buf |= ((uint64_t) buffer->tmp) << buffer->bits_left;
+        buffer->bits_left += buffer->bits;
+    }
+}
+
+/**
+ * @brief Drops bits from lower bits in the bit buffer
+ *
+ * @param buf: Struct containing bit buffers
+ * @param nbits: Number of bits to remove.
+ */
+static av_always_inline void jpeg2000_bitbuf_drop_bits_lsb(StateVars *buf,
+                                                           uint8_t nbits)
+{
+    if (buf->bits_left < nbits) {
+        av_log(NULL, AV_LOG_ERROR, "Invalid bit read of %d, bits in buffer are %d\n", nbits, buf->bits_left);
+        av_assert0(0);
+    }
+    buf->bit_buf >>= nbits;
+    buf->bits_left -= nbits;
+}
+
+/**
+ * @brief  Get bits from the bit buffer reading them
+ * from the least significant bits moving to the most significant bits.
+ *
+ * In case there are fewer bits, refill from `buf` moving backwards.
+ */
+static av_always_inline uint64_t jpeg2000_bitbuf_get_bits_lsb(
+        StateVars *bit_stream, uint8_t nbits, const uint8_t *buf)
+{
+    uint64_t bits;
+    uint64_t mask = (1ull << nbits) - 1;
+    if (bit_stream->bits_left < nbits)
+        jpeg2000_bitbuf_refill_backwards(bit_stream, buf);
+    bits = bit_stream->bit_buf & mask;
+    jpeg2000_bitbuf_drop_bits_lsb(bit_stream, nbits);
+    return bits;
+}
+
+/**
+ * @brief Get bits from the bit buffer reading them
+ * from the least significant bits moving to the most significant bits
+ *
+ * In case there are fewer bits, refill from `buf` moving forward
+ */
+static av_always_inline uint64_t jpeg2000_bitbuf_get_bits_lsb_forward(
+        StateVars *bit_stream, uint8_t nbits, const uint8_t *buf, uint32_t length)
+{
+    uint64_t bits;
+    uint64_t mask = (1ull << nbits) - 1;
+
+    if (bit_stream->bits_left <= nbits)
+        jpeg2000_bitbuf_refill_forward(bit_stream, buf, length);
+    bits = bit_stream->bit_buf & mask;
+    jpeg2000_bitbuf_drop_bits_lsb(bit_stream, nbits);
+    return bits;
+}
+
+/**
+ * @brief Look ahead bit buffer without discarding bits
+ */
+static av_always_inline uint64_t jpeg2000_bitbuf_peek_bits_lsb(StateVars *stream, uint8_t nbits)
+{
+    uint64_t mask = (1ull << nbits) - 1;
+    return stream->bit_buf & mask;
+}
+
+/**
+ * Variable Length Decoding Routines
+ */
+static void jpeg2000_init_vlc(StateVars *s, uint32_t Lcup, uint32_t Pcup, const uint8_t *Dcup)
+{
+    s->bits_left = 0;
+    s->bit_buf   = 0;
+    s->pos       = Lcup - 2 - Pcup;
+    s->last      = Dcup[Lcup - 2];
+    s->tmp       = (s->last) >> 4;
+    s->bits      = ((s->tmp & 7) < 7) ? 4 : 3;
+
+    jpeg2000_bitbuf_refill_backwards(s, Dcup + Pcup);
+    jpeg2000_bitbuf_drop_bits_lsb(s, 4);
+}
+
+/**
+ * Decode prefix codes for VLC segment.
+ */
+static int av_always_inline jpeg2000_decode_ctx_vlc(const Jpeg2000DecoderContext *s,
+                                                    StateVars *vlc_stream,
+                                                    const uint16_t *table,
+                                                    const uint8_t *Dcup,
+                                                    uint8_t *sig_pat,
+                                                    uint8_t *res_off,
+                                                    uint8_t *emb_pat_k,
+                                                    uint8_t *emb_pat_1,
+                                                    uint8_t pos,
+                                                    uint32_t Pcup,
+                                                    uint16_t context)
+{
+    /* Described in clause 7.3.5 */
+    uint32_t value;
+    uint8_t len;
+    uint64_t index;
+    uint64_t code_word;
+
+    jpeg2000_bitbuf_refill_backwards(vlc_stream, Dcup + Pcup);
+
+    code_word = vlc_stream->bit_buf & 0x7f;
+    index = code_word + (context << 7);
+
+    /* decode table has 1024 entries so ensure array access is in bounds */
+    av_assert0(index < 1024);
+
+    value = table[index];
+
+    len = (value & 0x000F) >> 1;
+
+    res_off[pos] = (uint8_t) (value & 1);
+    sig_pat[pos] = (uint8_t) ((value & 0x00F0) >> 4);
+    emb_pat_k[pos] = (uint8_t) ((value & 0x0F00) >> 8);
+    emb_pat_1[pos] = (uint8_t) ((value & 0xF000) >> 12);
+
+    jpeg2000_bitbuf_drop_bits_lsb(vlc_stream, len);
+    return 0;
+}
+
+/**
+ * @brief Decode variable length u-vlc prefix
+ */
+static av_always_inline uint8_t
+vlc_decode_u_prefix(StateVars *vlc_stream, const uint8_t *refill_array)
+{
+    /* clause 7.3.6
+     * procedure : decodeUPrefix.
+     */
+    static const uint8_t return_value[8] = {5, 1, 2, 1, 3, 1, 2, 1};
+    static const uint8_t drop_bits[8] = {3, 1, 2, 1, 3, 1, 2, 1};
+
+    uint8_t bits;
+
+    if (vlc_stream->bits_left < 3)
+        jpeg2000_bitbuf_refill_backwards(vlc_stream, refill_array);
+
+    bits = jpeg2000_bitbuf_peek_bits_lsb(vlc_stream, 3);
+
+    jpeg2000_bitbuf_drop_bits_lsb(vlc_stream, drop_bits[bits]);
+    return return_value[bits];
+}
+
+/**
+ * @brief Decode variable length u-vlc suffix
+ */
+static av_always_inline uint8_t vlc_decode_u_suffix(
+        StateVars *vlc_stream, uint8_t suffix, const uint8_t *refill_array)
+{
+    /* clause 7.3.6
+     * procedure: decodeUSuffix
+     */
+    static const int mask[] = {1, 31};
+    static const int drop_bits[] = {1, 5};
+
+    uint8_t bits;
+    int cond = suffix != 3;
+    if (suffix < 3)
+        return 0;
+
+    if (vlc_stream->bits_left < 5)
+        jpeg2000_bitbuf_refill_backwards(vlc_stream, refill_array);
+
+    bits = jpeg2000_bitbuf_peek_bits_lsb(vlc_stream, 5);
+
+    jpeg2000_bitbuf_drop_bits_lsb(vlc_stream, drop_bits[cond]);
+    return bits & mask[cond];
+}
+
+/**
+ * @brief Decode variable length u-vlc suffix
+ */
+static av_always_inline uint8_t vlc_decode_u_extension(
+        StateVars *vlc_stream, uint8_t suffix, const uint8_t *refill_array)
+{
+    /* clause 7.3.6
+     * procedure decodeUExtension.
+     */
+    return jpeg2000_bitbuf_get_bits_lsb(vlc_stream, 4 * (suffix >= 28), refill_array);
+}
+
+/**
+ * Magnitude and Sign decode procedures
+ */
+static int32_t av_always_inline jpeg2000_decode_mag_sgn(StateVars *mag_sgn_stream, int32_t m_n,
+                                                        int32_t i_n, const uint8_t *buf, uint32_t length)
+{
+    /* clause 7.3.8
+     * procedure: decodeMagSgnValue
+     */
+    int32_t val = 0;
+    if (m_n > 0) {
+        val = jpeg2000_bitbuf_get_bits_lsb_forward(mag_sgn_stream,m_n,buf,length);
+        val += (i_n << m_n);
+    }
+    return val;
+}
+
+static av_always_inline void
+recover_mag_sgn(StateVars *mag_sgn, uint8_t pos, uint16_t q, int32_t m_n[2], int32_t known_1[2],
+                const uint8_t emb_pat_1[2],
+                int32_t v[2][4], int32_t m[2][4], uint8_t *E, uint32_t *mu_n, const uint8_t *Dcup, uint32_t Pcup,
+                uint32_t pLSB)
+{
+    for (int i = 0; i < 4; i++) {
+        int32_t n = 4 * q + i;
+        m_n[pos] = m[pos][i];
+        known_1[pos] = (emb_pat_1[pos] >> i) & 1;
+        v[pos][i] = jpeg2000_decode_mag_sgn(mag_sgn, m_n[pos], known_1[pos], Dcup, Pcup);
+
+        if (m_n[pos] != 0) {
+            E[n] = 32 - ff_clz(v[pos][i] | 1);
+            mu_n[n] = (v[pos][i] >> 1) + 1;
+            mu_n[n] <<= pLSB;
+            mu_n[n] |= ((uint32_t) (v[pos][i] & 1)) << 31; // sign bit.
+        }
+    }
+}
+
+static int jpeg2000_import_bit(StateVars *stream, const uint8_t *array, uint32_t length)
+{
+    int cond = stream->pos <= length;
+    int pos = FFMIN(stream->pos, length);
+    if (stream->bits == 0) {
+        stream->bits = (stream->tmp == 0xFF) ? 7 : 8;
+        stream->pos += cond;
+        stream->tmp = cond ? array[pos] : 0xFF;
+    }
+    stream->bits -= 1;
+    return (stream->tmp >> stream->bits) & 1;
+}
+
+static int jpeg2000_peek_bit(StateVars *stream, const uint8_t *array, uint32_t length)
+{
+    if (stream->bits == 0) {
+        int cond = stream->pos <= length;
+        int pos = FFMIN(stream->pos, length);
+        stream->bits = (stream->tmp == 0xFF) ? 7 : 8;
+        stream->pos += cond;
+        stream->tmp = cond ? array[pos] : 0xFF;
+    }
+    return (stream->tmp >> stream->bits) & 1;
+}
+
+static int jpeg2000_decode_mel_sym(MelDecoderState *mel_state,
+                                   StateVars *mel_stream,
+                                   const uint8_t *Dcup,
+                                   uint32_t Lcup)
+{
+
+    if (mel_state->run == 0 && mel_state->one == 0) {
+        uint8_t eval;
+        uint8_t bit;
+
+        eval = MEL_E[mel_state->k];
+        bit = jpeg2000_import_bit(mel_stream, Dcup, Lcup);
+        if (bit == 1) {
+            mel_state->run = 1 << eval;
+            mel_state->k = FFMIN(12, mel_state->k + 1);
+        } else {
+            mel_state->run = 0;
+            while (eval > 0) {
+                bit = jpeg2000_import_bit(mel_stream, Dcup, Lcup);
+                mel_state->run = (2 * (mel_state->run)) + bit;
+                eval -= 1;
+            }
+            mel_state->k = FFMAX(0, mel_state->k - 1);
+            mel_state->one = 1;
+        }
+    }
+    if (mel_state->run > 0) {
+        mel_state->run -= 1;
+        return 0;
+    } else {
+        mel_state->one = 0;
+        return 1;
+    }
+}
+
+/**
+ * @brief Magref decoding procedures
+ */
+static av_always_inline int jpeg2000_import_magref_bit(StateVars *stream, const uint8_t *array, uint32_t length)
+{
+    return jpeg2000_bitbuf_get_bits_lsb(stream, 1, array);
+}
+
+/**
+ * @brief Signal EMB decode
+ */
+static int
+jpeg2000_decode_sig_emb(const Jpeg2000DecoderContext *s, MelDecoderState *mel_state, StateVars *mel_stream,
+                        StateVars *vlc_stream, const uint16_t *vlc_table, const uint8_t *Dcup, uint8_t *sig_pat,
+                        uint8_t *res_off, uint8_t *emb_pat_k, uint8_t *emb_pat_1, uint8_t pos, uint16_t context,
+                        uint32_t Lcup, uint32_t Pcup)
+{
+    if (context == 0) {
+        uint8_t sym;
+        sym = jpeg2000_decode_mel_sym(mel_state, mel_stream, Dcup, Lcup);
+        if (sym == 0) {
+            sig_pat[pos] = 0;
+            res_off[pos] = 0;
+            emb_pat_k[pos] = 0;
+            emb_pat_1[pos] = 0;
+            return 0;
+        }
+    }
+    return jpeg2000_decode_ctx_vlc(s, vlc_stream, vlc_table, Dcup, sig_pat,
+                                   res_off, emb_pat_k, emb_pat_1, pos, Pcup,
+                                   context);
+}
+
+static av_always_inline int jpeg2000_get_state(int x1, int x2, int width, int shift_by, const uint8_t *block_states)
+{
+    return (block_states[(x1 + 1) * (width + 2) + (x2 + 1)] >> shift_by) & 1;
+}
+
+static av_always_inline void jpeg2000_modify_state(int x1, int x2, int width, int value, uint8_t *block_states)
+{
+    block_states[(x1 + 1) * (width + 2) + (x2 + 1)] |= value;
+}
+
+static int av_noinline
+jpeg2000_decode_ht_cleanup(const Jpeg2000DecoderContext *s, Jpeg2000Cblk *cblk, Jpeg2000T1Context *t1,
+                           MelDecoderState *mel_state,
+                           StateVars *mel_stream, StateVars *vlc_stream, StateVars *mag_sgn_stream, const uint8_t *Dcup,
+                           uint32_t Lcup,
+                           uint32_t Pcup, uint8_t pLSB, int width, int height, int32_t *sample_buf,
+                           uint8_t *block_states)
+{
+    uint16_t q = 0; /* Represents current quad position. */
+    uint16_t q1, q2;
+    uint16_t context1, context2;
+    uint16_t context = 0;
+
+    uint8_t sig_pat[2]   = {0}; /* significance pattern*/
+    uint8_t res_off[2]   = {0}; /* residual offset*/
+    uint8_t emb_pat_k[2] = {0}; /* Exponent Max Bound pattern K */
+    uint8_t emb_pat_1[2] = {0}; /* Exponent Max Bound pattern 1.*/
+    uint8_t gamma[2]     = {0};
+
+    uint8_t E_n[2]  = {0};
+    uint8_t E_ne[2] = {0};
+    uint8_t E_nw[2] = {0};
+    uint8_t E_nf[2] = {0};
+
+    uint8_t max_e[2] = {0};
+    uint8_t u_pfx[2] = {0};
+    uint8_t u_sfx[2] = {0};
+    uint8_t u_ext[2] = {0};
+
+    int32_t u[2]       = {0};
+    int32_t U[2]       = {0}; /* Exponent bound (7.3.7) */
+    int32_t m_n[2]     = {0};
+    int32_t known_1[2] = {0};
+
+    int32_t m[2][4] = {0};
+    int32_t v[2][4] = {0};
+
+    uint8_t kappa[2] = {1, 1};
+    int ret = 0;
+
+    int sp;
+
+    uint64_t c;
+
+    uint8_t *sigma;
+    uint32_t *mu;
+
+    const uint8_t *vlc_buf = Dcup + Pcup;
+    /* convert to raster-scan */
+    const uint16_t is_border_x = width % 2;
+    const uint16_t is_border_y = height % 2;
+
+    int j1, j2;
+    int x1 = 0, x2 = 0, x3 = 0;
+
+    const uint16_t quad_width = ff_jpeg2000_ceildivpow2(width, 1);
+    const uint16_t quad_height = ff_jpeg2000_ceildivpow2(height, 1);
+
+    size_t buf_size = 4 * quad_width * quad_height;
+
+    uint8_t *sigma_n = av_calloc(buf_size, sizeof(uint8_t));
+    uint8_t *E = av_calloc(buf_size, sizeof(uint8_t));
+    uint32_t *mu_n = av_calloc(buf_size, sizeof(uint32_t));
+
+    if (!sigma_n || !E || !mu_n) {
+        ret = AVERROR(ENOMEM);
+        goto free;
+    }
+
+    sigma = sigma_n;
+    mu = mu_n;
+
+    while (q < quad_width - 1) {
+        q1 = q;
+        q2 = q1 + 1;
+
+        if ((ret = jpeg2000_decode_sig_emb(s, mel_state, mel_stream, vlc_stream,
+                                           dec_CxtVLC_table0, Dcup, sig_pat, res_off,
+                                           emb_pat_k, emb_pat_1, J2K_Q1, context, Lcup,
+                                           Pcup)) < 0)
+            goto free;
+        for (int i = 0; i < 4; i++)
+            sigma_n[4 * q1 + i] = (sig_pat[J2K_Q1] >> i) & 1;
+
+        // calculate context
+        context  = sigma_n[4 * q1];           // f
+        context |= sigma_n[4 * q1 + 1];      // sf
+        context += sigma_n[4 * q1 + 2] << 1; // w << 1
+        context += sigma_n[4 * q1 + 3] << 2;
+
+        if ((ret = jpeg2000_decode_sig_emb(s, mel_state, mel_stream, vlc_stream,
+                                           dec_CxtVLC_table0, Dcup, sig_pat, res_off,
+                                           emb_pat_k, emb_pat_1, J2K_Q2, context, Lcup,
+                                           Pcup)) < 0)
+            goto free;
+
+        for (int i = 0; i < 4; i++)
+            sigma_n[4 * q2 + i] = (sig_pat[J2K_Q2] >> i) & 1;
+
+        // calculate context for the next quad
+        context  = sigma_n[4 * q2];           // f
+        context |= sigma_n[4 * q2 + 1];      // sf
+        context += sigma_n[4 * q2 + 2] << 1; // w << 1
+        context += sigma_n[4 * q2 + 3] << 2; // sw << 2
+
+        u[0] = 0;
+        u[1] = 0;
+
+        jpeg2000_bitbuf_refill_backwards(vlc_stream, vlc_buf);
+
+        if (res_off[J2K_Q1] == 1 && res_off[J2K_Q2] == 1) {
+
+            if (jpeg2000_decode_mel_sym(mel_state, mel_stream, Dcup, Lcup) == 1) {
+
+                u_pfx[J2K_Q1] = vlc_decode_u_prefix(vlc_stream, vlc_buf);
+                u_pfx[J2K_Q2] = vlc_decode_u_prefix(vlc_stream, vlc_buf);
+
+                u_sfx[J2K_Q1] = vlc_decode_u_suffix(vlc_stream, u_pfx[J2K_Q1], vlc_buf);
+                u_sfx[J2K_Q2] = vlc_decode_u_suffix(vlc_stream, u_pfx[J2K_Q2], vlc_buf);
+
+                u_ext[J2K_Q1] = vlc_decode_u_extension(vlc_stream, u_sfx[J2K_Q1], vlc_buf);
+                u_ext[J2K_Q2] = vlc_decode_u_extension(vlc_stream, u_sfx[J2K_Q2], vlc_buf);
+
+                u[J2K_Q1] = 2 + u_pfx[J2K_Q1] + u_sfx[J2K_Q1] + (u_ext[J2K_Q1] * 4);
+                u[J2K_Q2] = 2 + u_pfx[J2K_Q2] + u_sfx[J2K_Q2] + (u_ext[J2K_Q2] * 4);
+
+            } else {
+                u_pfx[J2K_Q1] = vlc_decode_u_prefix(vlc_stream, vlc_buf);
+
+                if (u_pfx[J2K_Q1] > 2) {
+                    u[J2K_Q2] = jpeg2000_bitbuf_get_bits_lsb(vlc_stream, 1, vlc_buf) + 1;
+                    u_sfx[J2K_Q1] = vlc_decode_u_suffix(vlc_stream, u_pfx[J2K_Q1], vlc_buf);
+                    u_ext[J2K_Q1] = vlc_decode_u_extension(vlc_stream, u_sfx[J2K_Q1], vlc_buf);
+                } else {
+                    u_pfx[J2K_Q2] = vlc_decode_u_prefix(vlc_stream, vlc_buf);
+                    u_sfx[J2K_Q1] = vlc_decode_u_suffix(vlc_stream, u_pfx[J2K_Q1], vlc_buf);
+                    u_sfx[J2K_Q2] = vlc_decode_u_suffix(vlc_stream, u_pfx[J2K_Q2], vlc_buf);
+                    u_ext[J2K_Q1] = vlc_decode_u_extension(vlc_stream, u_sfx[J2K_Q1], vlc_buf);
+                    u_ext[J2K_Q2] = vlc_decode_u_extension(vlc_stream, u_sfx[J2K_Q2], vlc_buf);
+                    u[J2K_Q2] = u_pfx[J2K_Q2] + u_sfx[J2K_Q2] + (u_ext[J2K_Q2] * 4);
+                }
+                // clause 7.3.6 (3)
+                u[J2K_Q1] = u_pfx[J2K_Q1] + u_sfx[J2K_Q1] + (u_ext[J2K_Q1] * 4);
+            }
+
+        } else if (res_off[J2K_Q1] == 1 || res_off[J2K_Q2] == 1) {
+            uint8_t pos = res_off[J2K_Q1] == 1 ? 0 : 1;
+            u_pfx[pos] = vlc_decode_u_prefix(vlc_stream, vlc_buf);
+            u_sfx[pos] = vlc_decode_u_suffix(vlc_stream, u_pfx[pos], vlc_buf);
+            u_ext[pos] = vlc_decode_u_extension(vlc_stream, u_sfx[pos], vlc_buf);
+            u[pos] = u_pfx[pos] + u_sfx[pos] + (u_ext[pos] * 4);
+        }
+        U[J2K_Q1] = kappa[J2K_Q1] + u[J2K_Q1];
+        U[J2K_Q2] = kappa[J2K_Q2] + u[J2K_Q2];
+
+        for (int i = 0; i < 4; i++) {
+            m[J2K_Q1][i] = sigma_n[4 * q1 + i] * U[J2K_Q1] - ((emb_pat_k[J2K_Q1] >> i) & 1);
+            m[J2K_Q2][i] = sigma_n[4 * q2 + i] * U[J2K_Q2] - ((emb_pat_k[J2K_Q2] >> i) & 1);
+        }
+
+        recover_mag_sgn(mag_sgn_stream, J2K_Q1, q1, m_n, known_1, emb_pat_1, v, m,
+                        E, mu_n, Dcup, Pcup, pLSB);
+
+        recover_mag_sgn(mag_sgn_stream, J2K_Q2, q2, m_n, known_1, emb_pat_1, v, m,
+                        E, mu_n, Dcup, Pcup, pLSB);
+
+        // prepare context for the next quad
+
+        // move to the next quad pair
+        q += 2;
+    }
+    if (quad_width % 2 == 1) {
+        q1 = q;
+
+        if ((ret = jpeg2000_decode_sig_emb(s, mel_state, mel_stream, vlc_stream,
+                                           dec_CxtVLC_table0, Dcup, sig_pat, res_off,
+                                           emb_pat_k, emb_pat_1, J2K_Q1, context, Lcup,
+                                           Pcup)) < 0)
+            goto free;
+
+        for (int i = 0; i < 4; i++)
+            sigma_n[4 * q1 + i] = (sig_pat[J2K_Q1] >> i) & 1;
+
+        u[J2K_Q1] = 0;
+
+        if (res_off[J2K_Q1] == 1) {
+            u_pfx[J2K_Q1] = vlc_decode_u_prefix(vlc_stream, vlc_buf);
+            u_sfx[J2K_Q1] = vlc_decode_u_suffix(vlc_stream, u_pfx[J2K_Q1], vlc_buf);
+            u_ext[J2K_Q1] = vlc_decode_u_extension(vlc_stream, u_sfx[J2K_Q1], vlc_buf);
+            u[J2K_Q1] = u_pfx[J2K_Q1] + u_sfx[J2K_Q1] + (u_ext[J2K_Q1] * 4);
+        }
+
+        U[J2K_Q1] = kappa[J2K_Q1] + u[J2K_Q1];
+
+        for (int i = 0; i < 4; i++)
+            m[J2K_Q1][i] = sigma_n[4 * q1 + i] * U[J2K_Q1] - ((emb_pat_k[J2K_Q1] >> i) & 1);
+
+        recover_mag_sgn(mag_sgn_stream, J2K_Q1, q1, m_n, known_1, emb_pat_1, v, m,
+                        E, mu_n, Dcup, Pcup, pLSB);
+
+        q++; /* move to next quad pair */
+    }
+    /* initial line pair end. */
+    /*
+     * As an optimization, we can replace modulo operations with
+     * checking if a number is divisible , since that's the only thing we need.
+     * this is paired with is_divisible.
+     * Credits to Daniel Lemire blog post:
+     * https://lemire.me/blog/2019/02/08/faster-remainders-when-the-divisor-is-a-constant-beating-compilers-and-libdivide/
+     * It's UB on zero, but we can't have a quad being zero, the spec doesn't allow,
+     * so we error out early in case that's the case.
+     */
+    c = precompute_c(quad_width);
+
+    for (int row = 1; row < quad_height; row++) {
+        while ((q - (row * quad_width)) < quad_width - 1 && q < (quad_height * quad_width)) {
+            q1 = q;
+            q2 = q + 1;
+            context1  = sigma_n[4 * (q1 - quad_width) + 1];
+            context1 += sigma_n[4 * (q1 - quad_width) + 3] << 2; // ne
+
+            if (!is_divisible(q1, c)) {
+                context1 |= sigma_n[4 * (q1 - quad_width) - 1];               // nw
+                context1 += (sigma_n[4 * q1 - 1] | sigma_n[4 * q1 - 2]) << 1; // sw| q
+            }
+            if (!is_divisible(q1 + 1, c))
+                context1 |= sigma_n[4 * (q1 - quad_width) + 5] << 2;
+
+            if ((ret = jpeg2000_decode_sig_emb(s, mel_state, mel_stream, vlc_stream,
+                                               dec_CxtVLC_table1, Dcup, sig_pat, res_off,
+                                               emb_pat_k, emb_pat_1, J2K_Q1, context1, Lcup,
+                                               Pcup))
+                < 0)
+                goto free;
+
+            for (int i = 0; i < 4; i++)
+                sigma_n[4 * q1 + i] = (sig_pat[J2K_Q1] >> i) & 1;
+
+            context2  = sigma_n[4 * (q2 - quad_width) + 1];
+            context2 += sigma_n[4 * (q2 - quad_width) + 3] << 2;
+
+            if (!is_divisible(q2, c)) {
+                context2 |= sigma_n[4 * (q2 - quad_width) - 1];
+                context2 += (sigma_n[4 * q2 - 1] | sigma_n[4 * q2 - 2]) << 1;
+            }
+            if (!is_divisible(q2 + 1, c))
+                context2 |= sigma_n[4 * (q2 - quad_width) + 5] << 2;
+
+            if ((ret = jpeg2000_decode_sig_emb(s, mel_state, mel_stream, vlc_stream,
+                                               dec_CxtVLC_table1, Dcup, sig_pat, res_off,
+                                               emb_pat_k, emb_pat_1, J2K_Q2, context2, Lcup,
+                                               Pcup))
+                < 0)
+                goto free;
+
+            for (int i = 0; i < 4; i++)
+                sigma_n[4 * q2 + i] = (sig_pat[J2K_Q2] >> i) & 1;
+
+            u[J2K_Q1] = 0;
+            u[J2K_Q2] = 0;
+
+            jpeg2000_bitbuf_refill_backwards(vlc_stream, vlc_buf);
+
+            if (res_off[J2K_Q1] == 1 && res_off[J2K_Q2] == 1) {
+                u_pfx[J2K_Q1] = vlc_decode_u_prefix(vlc_stream, vlc_buf);
+                u_pfx[J2K_Q2] = vlc_decode_u_prefix(vlc_stream, vlc_buf);
+
+                u_sfx[J2K_Q1] = vlc_decode_u_suffix(vlc_stream, u_pfx[J2K_Q1], vlc_buf);
+                u_sfx[J2K_Q2] = vlc_decode_u_suffix(vlc_stream, u_pfx[J2K_Q2], vlc_buf);
+
+                u_ext[J2K_Q1] = vlc_decode_u_extension(vlc_stream, u_sfx[J2K_Q1], vlc_buf);
+                u_ext[J2K_Q2] = vlc_decode_u_extension(vlc_stream, u_sfx[J2K_Q2], vlc_buf);
+
+                u[J2K_Q1] = u_pfx[J2K_Q1] + u_sfx[J2K_Q1] + (u_ext[J2K_Q1] << 2);
+                u[J2K_Q2] = u_pfx[J2K_Q2] + u_sfx[J2K_Q2] + (u_ext[J2K_Q2] << 2);
+
+            } else if (res_off[J2K_Q1] == 1 || res_off[J2K_Q2] == 1) {
+                uint8_t pos = res_off[J2K_Q1] == 1 ? 0 : 1;
+
+                u_pfx[pos] = vlc_decode_u_prefix(vlc_stream, vlc_buf);
+                u_sfx[pos] = vlc_decode_u_suffix(vlc_stream, u_pfx[pos], vlc_buf);
+                u_ext[pos] = vlc_decode_u_extension(vlc_stream, u_sfx[pos], vlc_buf);
+
+                u[pos] = u_pfx[pos] + u_sfx[pos] + (u_ext[pos] << 2);
+            }
+            sp = sig_pat[J2K_Q1];
+
+            gamma[J2K_Q1] = 1;
+
+            if (sp == 0 || sp == 1 || sp == 2 || sp == 4 || sp == 8)
+                gamma[J2K_Q1] = 0;
+
+            sp = sig_pat[J2K_Q2];
+
+            gamma[J2K_Q2] = 1;
+
+            if (sp == 0 || sp == 1 || sp == 2 || sp == 4 || sp == 8)
+                gamma[J2K_Q2] = 0;
+
+            E_n[J2K_Q1] = E[4 * (q1 - quad_width) + 1];
+            E_n[J2K_Q2] = E[4 * (q2 - quad_width) + 1];
+
+            E_ne[J2K_Q1] = E[4 * (q1 - quad_width) + 3];
+            E_ne[J2K_Q2] = E[4 * (q2 - quad_width) + 3];
+
+            E_nw[J2K_Q1] = (!is_divisible(q1, c)) * E[FFMAX((4 * (q1 - quad_width) - 1), 0)];
+            E_nw[J2K_Q2] = (!is_divisible(q2, c)) * E[FFMAX((4 * (q2 - quad_width) - 1), 0)];
+
+            E_nf[J2K_Q1] = (!is_divisible(q1 + 1, c)) * E[4 * (q1 - quad_width) + 5];
+            E_nf[J2K_Q2] = (!is_divisible(q2 + 1, c)) * E[4 * (q2 - quad_width) + 5];
+
+            max_e[J2K_Q1] = FFMAX(E_nw[J2K_Q1], FFMAX3(E_n[J2K_Q1], E_ne[J2K_Q1], E_nf[J2K_Q1]));
+            max_e[J2K_Q2] = FFMAX(E_nw[J2K_Q2], FFMAX3(E_n[J2K_Q2], E_ne[J2K_Q2], E_nf[J2K_Q2]));
+
+            kappa[J2K_Q1] = FFMAX(1, gamma[J2K_Q1] * (max_e[J2K_Q1] - 1));
+            kappa[J2K_Q2] = FFMAX(1, gamma[J2K_Q2] * (max_e[J2K_Q2] - 1));
+
+            U[J2K_Q1] = kappa[J2K_Q1] + u[J2K_Q1];
+            U[J2K_Q2] = kappa[J2K_Q2] + u[J2K_Q2];
+
+            for (int i = 0; i < 4; i++) {
+                m[J2K_Q1][i] = sigma_n[4 * q1 + i] * U[J2K_Q1] - ((emb_pat_k[J2K_Q1] >> i) & 1);
+                m[J2K_Q2][i] = sigma_n[4 * q2 + i] * U[J2K_Q2] - ((emb_pat_k[J2K_Q2] >> i) & 1);
+            }
+            recover_mag_sgn(mag_sgn_stream, J2K_Q1, q1, m_n, known_1, emb_pat_1, v, m,
+                            E, mu_n, Dcup, Pcup, pLSB);
+
+            recover_mag_sgn(mag_sgn_stream, J2K_Q2, q2, m_n, known_1, emb_pat_1, v, m,
+                            E, mu_n, Dcup, Pcup, pLSB);
+
+            /* move to the next quad pair */
+            q += 2;
+        }
+        if (quad_width % 2 == 1) {
+            q1 = q;
+            /* calculate context for current quad */
+            context1  = sigma_n[4 * (q1 - quad_width) + 1];
+            context1 += (sigma_n[4 * (q1 - quad_width) + 3] << 2);
+
+            if (!is_divisible(q1, c)) {
+                context1 |= sigma_n[4 * (q1 - quad_width) - 1];
+                context1 += (sigma_n[4 * q1 - 1] | sigma_n[4 * q1 - 2]) << 1;
+            }
+            if (!is_divisible(q1 + 1, c))
+                context1 |= sigma_n[4 * (q1 - quad_width) + 5] << 2;
+
+            if ((ret = jpeg2000_decode_sig_emb(s, mel_state, mel_stream, vlc_stream,
+                                               dec_CxtVLC_table1, Dcup, sig_pat, res_off,
+                                               emb_pat_k, emb_pat_1, J2K_Q1, context1, Lcup,
+                                               Pcup))
+                < 0)
+                goto free;
+
+            for (int i = 0; i < 4; i++)
+                sigma_n[4 * q1 + i] = (sig_pat[J2K_Q1] >> i) & 1;
+
+            u[J2K_Q1] = 0;
+
+            /* Recover mag_sgn value */
+            if (res_off[J2K_Q1] == 1) {
+                u_pfx[J2K_Q1] = vlc_decode_u_prefix(vlc_stream, vlc_buf);
+                u_sfx[J2K_Q1] = vlc_decode_u_suffix(vlc_stream, u_pfx[J2K_Q1], vlc_buf);
+                u_ext[J2K_Q1] = vlc_decode_u_extension(vlc_stream, u_sfx[J2K_Q1], vlc_buf);
+
+                u[J2K_Q1] = u_pfx[J2K_Q1] + u_sfx[J2K_Q1] + (u_ext[J2K_Q1] << 2);
+            }
+
+            sp = sig_pat[J2K_Q1];
+
+            gamma[J2K_Q1] = 1;
+
+            if (sp == 0 || sp == 1 || sp == 2 || sp == 4 || sp == 8)
+                gamma[J2K_Q1] = 0;
+
+            E_n[J2K_Q1] = E[4 * (q1 - quad_width) + 1];
+
+            E_ne[J2K_Q1] = E[4 * (q1 - quad_width) + 3];
+
+            E_nw[J2K_Q1] = (!is_divisible(q1, c)) * E[FFMAX((4 * (q1 - quad_width) - 1), 0)];
+
+            E_nf[J2K_Q1] = (!is_divisible(q1 + 1, c)) * E[4 * (q1 - quad_width) + 5];
+
+            max_e[J2K_Q1] = FFMAX(E_nw[J2K_Q1], FFMAX3(E_n[J2K_Q1], E_ne[J2K_Q1], E_nf[J2K_Q1]));
+
+            kappa[J2K_Q1] = FFMAX(1, gamma[J2K_Q1] * (max_e[J2K_Q1] - 1));
+
+            U[J2K_Q1] = kappa[J2K_Q1] + u[J2K_Q1];
+
+            for (int i = 0; i < 4; i++)
+                m[J2K_Q1][i] = sigma_n[4 * q1 + i] * U[J2K_Q1] - ((emb_pat_k[J2K_Q1] >> i) & 1);
+
+            recover_mag_sgn(mag_sgn_stream, J2K_Q1, q1, m_n, known_1, emb_pat_1, v, m,
+                            E, mu_n, Dcup, Pcup, pLSB);
+            q += 1;
+        }
+    }
+    // convert to raster-scan
+    for (int y = 0; y < quad_height; y++) {
+        for (int x = 0; x < quad_width; x++) {
+            j1 = 2 * y;
+            j2 = 2 * x;
+
+            sample_buf[j2 + (j1 * width)] = (int32_t)*mu;
+            jpeg2000_modify_state(j1, j2, width, *sigma, block_states);
+            sigma += 1;
+            mu += 1;
+
+            x1 = y != quad_height - 1 || is_border_y == 0;
+            sample_buf[j2 + ((j1 + 1) * width)] = ((int32_t)*mu) * x1;
+            jpeg2000_modify_state(j1 + 1, j2, width, (*sigma) * x1, block_states);
+            sigma += 1;
+            mu += 1;
+
+            x2 = x != quad_width - 1 || is_border_x == 0;
+            sample_buf[(j2 + 1) + (j1 * width)] = ((int32_t)*mu) * x2;
+            jpeg2000_modify_state(j1, j2 + 1, width, (*sigma) * x2, block_states);
+            sigma += 1;
+            mu += 1;
+
+            x3 = x1 | x2;
+            sample_buf[(j2 + 1) + (j1 + 1) * width] = ((int32_t)*mu) * x3;
+            jpeg2000_modify_state(j1 + 1, j2 + 1, width, (*sigma) * x3, block_states);
+            sigma += 1;
+            mu += 1;
+        }
+    }
+    ret = 1;
+free:
+    av_freep(&sigma_n);
+    av_freep(&E);
+    av_freep(&mu_n);
+    return ret;
+}
+
+static void
+jpeg2000_calc_mbr(uint8_t *mbr, const uint16_t i, const uint16_t j, const uint32_t mbr_info, uint8_t causal_cond,
+                  uint8_t *block_states, int width)
+{
+
+    int local_mbr = 0;
+    local_mbr |= jpeg2000_get_state(i - 1, j - 1, width, HT_SHIFT_SIGMA, block_states);
+    local_mbr |= jpeg2000_get_state(i - 1, j + 0, width, HT_SHIFT_SIGMA, block_states);
+    local_mbr |= jpeg2000_get_state(i - 1, j + 1, width, HT_SHIFT_SIGMA, block_states);
+
+    local_mbr |= jpeg2000_get_state(i + 0, j - 1, width, HT_SHIFT_SIGMA, block_states);
+    local_mbr |= jpeg2000_get_state(i + 0, j + 1, width, HT_SHIFT_SIGMA, block_states);
+
+    local_mbr |= jpeg2000_get_state(i + 1, j - 1, width, HT_SHIFT_SIGMA, block_states) * causal_cond;
+    local_mbr |= jpeg2000_get_state(i + 1, j + 0, width, HT_SHIFT_SIGMA, block_states) * causal_cond;
+    local_mbr |= jpeg2000_get_state(i + 1, j + 1, width, HT_SHIFT_SIGMA, block_states) * causal_cond;
+
+    local_mbr |= jpeg2000_get_state(i - 1, j - 1, width, HT_SHIFT_REF, block_states) *
+                 jpeg2000_get_state(i - 1, j - 1, width, HT_SHIFT_SCAN, block_states);
+    local_mbr |= jpeg2000_get_state(i - 1, j + 0, width, HT_SHIFT_REF, block_states) *
+                 jpeg2000_get_state(i - 1, j - 1, width, HT_SHIFT_SCAN, block_states);
+    local_mbr |= jpeg2000_get_state(i - 1, j + 1, width, HT_SHIFT_REF, block_states) *
+                 jpeg2000_get_state(i - 1, j + 1, width, HT_SHIFT_SCAN, block_states);
+
+    local_mbr |= jpeg2000_get_state(i + 0, j - 1, width, HT_SHIFT_REF, block_states) *
+                 jpeg2000_get_state(i + 0, j - 1, width, HT_SHIFT_SCAN, block_states);
+    local_mbr |= jpeg2000_get_state(i + 0, j + 1, width, HT_SHIFT_REF, block_states) *
+                 jpeg2000_get_state(i + 0, j + 1, width, HT_SHIFT_SCAN, block_states);
+
+    local_mbr |= jpeg2000_get_state(i + 1, j - 1, width, HT_SHIFT_REF, block_states) *
+                 jpeg2000_get_state(i + 1, j - 1, width, HT_SHIFT_SCAN, block_states) * causal_cond;
+    local_mbr |= jpeg2000_get_state(i + 1, j + 0, width, HT_SHIFT_REF, block_states) *
+                 jpeg2000_get_state(i + 1, j + 0, width, HT_SHIFT_SCAN, block_states) * causal_cond;
+    local_mbr |= jpeg2000_get_state(i + 1, j + 1, width, HT_SHIFT_REF, block_states) *
+                 jpeg2000_get_state(i + 1, j + 1, width, HT_SHIFT_SCAN, block_states) * causal_cond;
+
+    *mbr |= local_mbr;
+}
+
+static void
+jpeg2000_process_stripes_block(StateVars *sig_prop, int i_s, int j_s, int width, int height, int stride, int pLSB,
+                               int32_t *sample_buf, uint8_t *block_states, uint8_t *magref_segment,
+                               uint32_t magref_length)
+{
+    int32_t *sp;
+    uint8_t causal_cond = 0;
+    uint8_t bit;
+    uint8_t mbr;
+    uint32_t mbr_info;
+    int modify_state = 0;
+    int cond;
+
+    for (int j = j_s; j < j_s + width; j++) {
+        mbr_info = 0;
+        for (int i = i_s; i < i_s + height; i++) {
+            sp = &sample_buf[j + (i * (stride - 2))];
+            mbr = 0;
+            causal_cond = i != (i_s + height - 1);
+            if (jpeg2000_get_state(i, j, stride - 2, HT_SHIFT_SIGMA, block_states) == 0)
+                jpeg2000_calc_mbr(&mbr, i, j, mbr_info & 0x1EF, causal_cond, block_states, stride - 2);
+            mbr_info >>= 3;
+            cond = mbr != 0;
+            bit = jpeg2000_peek_bit(sig_prop, magref_segment, magref_length);
+            *sp |= (bit * cond) << pLSB;
+            sig_prop->bits -= cond;
+            modify_state = (((1 << HT_SHIFT_REF_IND) | (1 << HT_SHIFT_REF)) * cond) | 1 << HT_SHIFT_SCAN;
+            jpeg2000_modify_state(i, j, stride - 2, modify_state, block_states);
+        }
+    }
+}
+
+static void av_noinline
+jpeg2000_decode_sigprop(Jpeg2000Cblk *cblk, uint16_t width, uint16_t height, uint8_t *magref_segment,
+                        uint32_t magref_length, uint8_t pLSB, int32_t *sample_buf, uint8_t *block_states)
+{
+    /* Described in clause 7.4
+     * procedure: decodeSigPropMag
+     */
+    StateVars sp_dec;
+    const uint16_t num_v_stripe = height / 4;
+    const uint16_t num_h_stripe = width / 4;
+    int b_width = 4;
+    int b_height = 4;
+    int stride = width + 2;
+    int last_width;
+    uint16_t i = 0, j = 0;
+
+    jpeg2000_init_zero(&sp_dec);
+
+    for (int n1 = 0; n1 < num_v_stripe; n1++) {
+        j = 0;
+        for (int n2 = 0; n2 < num_h_stripe; n2++) {
+            jpeg2000_process_stripes_block(&sp_dec, i, j, b_width, b_height, stride,
+                                           pLSB, sample_buf, block_states, magref_segment,
+                                           magref_length);
+            j += 4;
+        }
+        last_width = width % 4;
+        if (last_width)
+            jpeg2000_process_stripes_block(&sp_dec, i, j, last_width, b_height, stride,
+                                           pLSB, sample_buf, block_states, magref_segment,
+                                           magref_length);
+        i += 4;
+    }
+    /* decode remaining height stripes */
+    b_height = height % 4;
+    j = 0;
+    for (int n2 = 0; n2 < num_h_stripe; n2++) {
+        jpeg2000_process_stripes_block(&sp_dec, i, j, b_width, b_height, stride,
+                                       pLSB, sample_buf, block_states, magref_segment,
+                                       magref_length);
+        j += 4;
+    }
+    last_width = width % 4;
+    if (last_width)
+        jpeg2000_process_stripes_block(&sp_dec, i, j, last_width, b_height, stride,
+                                       pLSB, sample_buf, block_states, magref_segment,
+                                       magref_length);
+}
+
+static int av_noinline
+jpeg2000_decode_magref(Jpeg2000Cblk *cblk, uint16_t width, uint16_t block_height, uint8_t *magref_segment,
+                       uint32_t magref_length, uint8_t pLSB, int32_t *sample_buf, uint8_t *block_states)
+{
+    /*
+     * Described in clause 7.5
+     * procedure: decodeSigPropMag
+     */
+
+    StateVars mag_ref = {0};
+    const uint16_t num_v_stripe = block_height / 4;
+    uint16_t height = 4;
+    uint16_t i_start = 0;
+    int32_t *sp;
+
+    jpeg2000_init_mag_ref(&mag_ref, magref_length);
+
+    for (int n1 = 0; n1 < num_v_stripe; n1++) {
+        for (int j = 0; j < width; j++) {
+            for (int i = i_start; i < i_start + height; i++) {
+                /* we move column wise, going from one quad to another
+                * see figure 7.
+                */
+                sp = &sample_buf[j + i * width];
+                if (jpeg2000_get_state(i, j, width, HT_SHIFT_SIGMA, block_states) != 0) {
+                    jpeg2000_modify_state(i, j, width, 1 << HT_SHIFT_REF_IND, block_states);
+                    *sp |= jpeg2000_import_magref_bit(&mag_ref, magref_segment, magref_length) << pLSB;
+                }
+            }
+        }
+        i_start += 4;
+    }
+    height = block_height % 4;
+    for (int j = 0; j < width; j++) {
+        for (int i = i_start; i < i_start + height; i++) {
+            sp = &sample_buf[j + i * width];
+            if (jpeg2000_get_state(i, j, width, HT_SHIFT_SIGMA, block_states) != 0) {
+                jpeg2000_modify_state(i, j, width, 1 << HT_SHIFT_REF_IND, block_states);
+                *sp |= jpeg2000_import_magref_bit(&mag_ref, magref_segment, magref_length) << pLSB;
+            }
+        }
+    }
+    return 1;
+}
+
+
+int
+ff_jpeg2000_decode_htj2k(const Jpeg2000DecoderContext *s, Jpeg2000CodingStyle *codsty, Jpeg2000T1Context *t1, Jpeg2000Cblk *cblk,
+                         int width, int height, int magp, uint8_t roi_shift)
+{
+    uint8_t p0 = 0; /* Number of placeholder passes. */
+    uint32_t Lcup;  /* Length of HT cleanup segment. */
+    uint32_t Lref;  /* Length of Refinement segment. */
+    uint32_t Scup;  /* HT cleanup segment suffix length. */
+    uint32_t Pcup;  /* HT cleanup segment prefix length. */
+
+    uint8_t S_blk;  /* Number of skipped magnitude bitplanes; */
+    uint8_t pLSB;
+
+    uint8_t *Dcup; /* Byte of an HT cleanup segment. */
+    uint8_t *Dref; /* Byte of an HT refinement segment. */
+
+    int z_blk;     /* Number of ht coding pass */
+
+    uint8_t empty_passes;
+
+    StateVars mag_sgn;  /* Magnitude and Sign */
+    StateVars mel;      /* Adaptive run-length coding */
+    StateVars vlc;      /* Variable Length coding */
+    StateVars sig_prop; /* Significance propagation */
+
+    MelDecoderState mel_state;
+
+    int ret;
+
+    /* Temporary buffers */
+    int32_t *sample_buf;
+    uint8_t *block_states;
+
+    /* Post-processing */
+    int32_t n, val;
+
+    int32_t M_b = magp;
+    av_assert0(width <= 1024U && height <= 1024U);
+    av_assert0(width * height <= 4096);
+    av_assert0(width * height > 0);
+
+    memset(t1->data, 0, t1->stride * height * sizeof(*t1->data));
+    memset(t1->flags, 0, t1->stride * (height + 2) * sizeof(*t1->flags));
+
+    if (cblk->npasses == 0)
+        return 0;
+
+    if (cblk->npasses > 3)
+        p0 = 0;
+    else if (cblk->length == 0)
+        p0 = 1;
+
+    empty_passes = p0 * 3;
+    z_blk = cblk->npasses - empty_passes;
+
+    if (z_blk <= 0)
+        /* no passes within this set, continue */
+        return 0;
+
+    Lcup = cblk->pass_lengths[0];
+    Lref = cblk->pass_lengths[1];
+
+    if (Lcup < 2) {
+        av_log(s->avctx, AV_LOG_ERROR,
+               "Cleanup pass length must be at least 2 bytes in length\n");
+        return AVERROR_INVALIDDATA;
+    }
+    Dcup = cblk->data;
+    /* Dref comes after the refinement segment. */
+    Dref  = cblk->data + Lcup;
+    S_blk = p0 + cblk->zbp;
+    pLSB  = 30 - S_blk;
+
+    Scup = (Dcup[Lcup - 1] << 4) + (Dcup[Lcup - 2] & 0x0F);
+
+    if (Scup < 2 || Scup > Lcup || Scup > 4079) {
+        av_log(s->avctx, AV_LOG_ERROR, "Cleanup pass suffix length is invalid %d\n",
+               Scup);
+        ret = AVERROR_INVALIDDATA;
+        goto free;
+    }
+    Pcup = Lcup - Scup;
+
+    /* modDcup shall be done before the creation of vlc instance. */
+    Dcup[Lcup - 1] = 0xFF;
+    Dcup[Lcup - 2] |= 0x0F;
+    /* Magnitude and refinement */
+    jpeg2000_init_zero(&mag_sgn);
+    jpeg2000_bitbuf_refill_forward(&mag_sgn, Dcup, Pcup);
+    /* Significance propagation */
+    jpeg2000_init_zero(&sig_prop);
+    /* Adaptive run length */
+    jpeg2000_init_mel(&mel, Pcup);
+    /* Variable Length coding. */
+    jpeg2000_init_vlc(&vlc, Lcup, Pcup, Dcup);
+
+    jpeg2000_init_mel_decoder(&mel_state);
+
+    sample_buf = av_calloc((width + 4) * (height + 4), sizeof(int32_t));
+    block_states = av_calloc((width + 4) * (height + 4), sizeof(uint8_t));
+
+    if (!sample_buf || !block_states) {
+        ret = AVERROR(ENOMEM);
+        goto free;
+    }
+    if ((ret = jpeg2000_decode_ht_cleanup(s, cblk, t1, &mel_state, &mel, &vlc,
+                                          &mag_sgn, Dcup, Lcup, Pcup, pLSB, width,
+                                          height, sample_buf, block_states))
+        < 0)
+        goto free;
+
+    if (cblk->npasses > 1)
+        jpeg2000_decode_sigprop(cblk, width, height, Dref, Lref,
+                                pLSB - 1, sample_buf, block_states);
+
+    if (cblk->npasses > 2)
+        if ((ret = jpeg2000_decode_magref(cblk, width, height, Dref, Lref,
+                                          pLSB - 1, sample_buf, block_states))
+            < 0)
+            goto free;
+
+    pLSB = 31 - M_b;
+    /* Reconstruct the values. */
+    for (int y = 0; y < height; y++) {
+        for (int x = 0; x < width; x++) {
+            n = x + (y * t1->stride);
+            val = sample_buf[x + (y * width)];
+            /* Convert sign-magnitude to twos complement. */
+            val = val >> 31 ? 0x80000000 - val : val;
+            val >>= (pLSB - 1);
+            t1->data[n] = val;
+        }
+    }
+free:
+    av_freep(&sample_buf);
+    av_freep(&block_states);
+    return ret;
+}
+
+/**
+ * @brief  CtxVLC tables, borrowed from openhtj2k (https://github.com/osamu620/OpenHTJ2K) (credits to Osamu Watanabe)
+ */
+static const uint16_t dec_CxtVLC_table1[1024] = {
+        0x0016, 0x006A, 0x0046, 0x00DD, 0x0086, 0x888B, 0x0026, 0x444D, 0x0016, 0x00AA, 0x0046, 0x88AD, 0x0086,
+        0x003A, 0x0026, 0x00DE, 0x0016, 0x00CA, 0x0046, 0x009D, 0x0086, 0x005A, 0x0026, 0x222D, 0x0016, 0x009A,
+        0x0046, 0x007D, 0x0086, 0x01FD, 0x0026, 0x007E, 0x0016, 0x006A, 0x0046, 0x88CD, 0x0086, 0x888B, 0x0026,
+        0x111D, 0x0016, 0x00AA, 0x0046, 0x005D, 0x0086, 0x003A, 0x0026, 0x00EE, 0x0016, 0x00CA, 0x0046, 0x00BD,
+        0x0086, 0x005A, 0x0026, 0x11FF, 0x0016, 0x009A, 0x0046, 0x003D, 0x0086, 0x04ED, 0x0026, 0x2AAF, 0x0016,
+        0x006A, 0x0046, 0x00DD, 0x0086, 0x888B, 0x0026, 0x444D, 0x0016, 0x00AA, 0x0046, 0x88AD, 0x0086, 0x003A,
+        0x0026, 0x44EF, 0x0016, 0x00CA, 0x0046, 0x009D, 0x0086, 0x005A, 0x0026, 0x222D, 0x0016, 0x009A, 0x0046,
+        0x007D, 0x0086, 0x01FD, 0x0026, 0x00BE, 0x0016, 0x006A, 0x0046, 0x88CD, 0x0086, 0x888B, 0x0026, 0x111D,
+        0x0016, 0x00AA, 0x0046, 0x005D, 0x0086, 0x003A, 0x0026, 0x4CCF, 0x0016, 0x00CA, 0x0046, 0x00BD, 0x0086,
+        0x005A, 0x0026, 0x00FE, 0x0016, 0x009A, 0x0046, 0x003D, 0x0086, 0x04ED, 0x0026, 0x006F, 0x0002, 0x0088,
+        0x0002, 0x005C, 0x0002, 0x0018, 0x0002, 0x00DE, 0x0002, 0x0028, 0x0002, 0x009C, 0x0002, 0x004A, 0x0002,
+        0x007E, 0x0002, 0x0088, 0x0002, 0x00CC, 0x0002, 0x0018, 0x0002, 0x888F, 0x0002, 0x0028, 0x0002, 0x00FE,
+        0x0002, 0x003A, 0x0002, 0x222F, 0x0002, 0x0088, 0x0002, 0x04FD, 0x0002, 0x0018, 0x0002, 0x00BE, 0x0002,
+        0x0028, 0x0002, 0x00BF, 0x0002, 0x004A, 0x0002, 0x006E, 0x0002, 0x0088, 0x0002, 0x00AC, 0x0002, 0x0018,
+        0x0002, 0x444F, 0x0002, 0x0028, 0x0002, 0x00EE, 0x0002, 0x003A, 0x0002, 0x113F, 0x0002, 0x0088, 0x0002,
+        0x005C, 0x0002, 0x0018, 0x0002, 0x00CF, 0x0002, 0x0028, 0x0002, 0x009C, 0x0002, 0x004A, 0x0002, 0x006F,
+        0x0002, 0x0088, 0x0002, 0x00CC, 0x0002, 0x0018, 0x0002, 0x009F, 0x0002, 0x0028, 0x0002, 0x00EF, 0x0002,
+        0x003A, 0x0002, 0x233F, 0x0002, 0x0088, 0x0002, 0x04FD, 0x0002, 0x0018, 0x0002, 0x00AF, 0x0002, 0x0028,
+        0x0002, 0x44FF, 0x0002, 0x004A, 0x0002, 0x005F, 0x0002, 0x0088, 0x0002, 0x00AC, 0x0002, 0x0018, 0x0002,
+        0x007F, 0x0002, 0x0028, 0x0002, 0x00DF, 0x0002, 0x003A, 0x0002, 0x111F, 0x0002, 0x0028, 0x0002, 0x005C,
+        0x0002, 0x008A, 0x0002, 0x00BF, 0x0002, 0x0018, 0x0002, 0x00FE, 0x0002, 0x00CC, 0x0002, 0x007E, 0x0002,
+        0x0028, 0x0002, 0x8FFF, 0x0002, 0x004A, 0x0002, 0x007F, 0x0002, 0x0018, 0x0002, 0x00DF, 0x0002, 0x00AC,
+        0x0002, 0x133F, 0x0002, 0x0028, 0x0002, 0x222D, 0x0002, 0x008A, 0x0002, 0x00BE, 0x0002, 0x0018, 0x0002,
+        0x44EF, 0x0002, 0x2AAD, 0x0002, 0x006E, 0x0002, 0x0028, 0x0002, 0x15FF, 0x0002, 0x004A, 0x0002, 0x009E,
+        0x0002, 0x0018, 0x0002, 0x00CF, 0x0002, 0x003C, 0x0002, 0x223F, 0x0002, 0x0028, 0x0002, 0x005C, 0x0002,
+        0x008A, 0x0002, 0x2BBF, 0x0002, 0x0018, 0x0002, 0x04EF, 0x0002, 0x00CC, 0x0002, 0x006F, 0x0002, 0x0028,
+        0x0002, 0x27FF, 0x0002, 0x004A, 0x0002, 0x009F, 0x0002, 0x0018, 0x0002, 0x00DE, 0x0002, 0x00AC, 0x0002,
+        0x444F, 0x0002, 0x0028, 0x0002, 0x222D, 0x0002, 0x008A, 0x0002, 0x8AAF, 0x0002, 0x0018, 0x0002, 0x00EE,
+        0x0002, 0x2AAD, 0x0002, 0x005F, 0x0002, 0x0028, 0x0002, 0x44FF, 0x0002, 0x004A, 0x0002, 0x888F, 0x0002,
+        0x0018, 0x0002, 0xAAAF, 0x0002, 0x003C, 0x0002, 0x111F, 0x0004, 0x8FFD, 0x0028, 0x005C, 0x0004, 0x00BC,
+        0x008A, 0x66FF, 0x0004, 0x00CD, 0x0018, 0x111D, 0x0004, 0x009C, 0x003A, 0x8AAF, 0x0004, 0x00FC, 0x0028,
+        0x133D, 0x0004, 0x00AC, 0x004A, 0x3BBF, 0x0004, 0x2BBD, 0x0018, 0x5FFF, 0x0004, 0x006C, 0x157D, 0x455F,
+        0x0004, 0x2FFD, 0x0028, 0x222D, 0x0004, 0x22AD, 0x008A, 0x44EF, 0x0004, 0x00CC, 0x0018, 0x4FFF, 0x0004,
+        0x007C, 0x003A, 0x447F, 0x0004, 0x04DD, 0x0028, 0x233D, 0x0004, 0x009D, 0x004A, 0x00DE, 0x0004, 0x88BD,
+        0x0018, 0xAFFF, 0x0004, 0x115D, 0x1FFD, 0x444F, 0x0004, 0x8FFD, 0x0028, 0x005C, 0x0004, 0x00BC, 0x008A,
+        0x8CEF, 0x0004, 0x00CD, 0x0018, 0x111D, 0x0004, 0x009C, 0x003A, 0x888F, 0x0004, 0x00FC, 0x0028, 0x133D,
+        0x0004, 0x00AC, 0x004A, 0x44DF, 0x0004, 0x2BBD, 0x0018, 0x8AFF, 0x0004, 0x006C, 0x157D, 0x006F, 0x0004,
+        0x2FFD, 0x0028, 0x222D, 0x0004, 0x22AD, 0x008A, 0x00EE, 0x0004, 0x00CC, 0x0018, 0x2EEF, 0x0004, 0x007C,
+        0x003A, 0x277F, 0x0004, 0x04DD, 0x0028, 0x233D, 0x0004, 0x009D, 0x004A, 0x1BBF, 0x0004, 0x88BD, 0x0018,
+        0x37FF, 0x0004, 0x115D, 0x1FFD, 0x333F, 0x0002, 0x0088, 0x0002, 0x02ED, 0x0002, 0x00CA, 0x0002, 0x4CCF,
+        0x0002, 0x0048, 0x0002, 0x23FF, 0x0002, 0x001A, 0x0002, 0x888F, 0x0002, 0x0088, 0x0002, 0x006C, 0x0002,
+        0x002A, 0x0002, 0x00AF, 0x0002, 0x0048, 0x0002, 0x22EF, 0x0002, 0x00AC, 0x0002, 0x005F, 0x0002, 0x0088,
+        0x0002, 0x444D, 0x0002, 0x00CA, 0x0002, 0xCCCF, 0x0002, 0x0048, 0x0002, 0x00FE, 0x0002, 0x001A, 0x0002,
+        0x006F, 0x0002, 0x0088, 0x0002, 0x005C, 0x0002, 0x002A, 0x0002, 0x009F, 0x0002, 0x0048, 0x0002, 0x00DF,
+        0x0002, 0x03FD, 0x0002, 0x222F, 0x0002, 0x0088, 0x0002, 0x02ED, 0x0002, 0x00CA, 0x0002, 0x8CCF, 0x0002,
+        0x0048, 0x0002, 0x11FF, 0x0002, 0x001A, 0x0002, 0x007E, 0x0002, 0x0088, 0x0002, 0x006C, 0x0002, 0x002A,
+        0x0002, 0x007F, 0x0002, 0x0048, 0x0002, 0x00EE, 0x0002, 0x00AC, 0x0002, 0x003E, 0x0002, 0x0088, 0x0002,
+        0x444D, 0x0002, 0x00CA, 0x0002, 0x00BE, 0x0002, 0x0048, 0x0002, 0x00BF, 0x0002, 0x001A, 0x0002, 0x003F,
+        0x0002, 0x0088, 0x0002, 0x005C, 0x0002, 0x002A, 0x0002, 0x009E, 0x0002, 0x0048, 0x0002, 0x00DE, 0x0002,
+        0x03FD, 0x0002, 0x111F, 0x0004, 0x8AED, 0x0048, 0x888D, 0x0004, 0x00DC, 0x00CA, 0x3FFF, 0x0004, 0xCFFD,
+        0x002A, 0x003D, 0x0004, 0x00BC, 0x005A, 0x8DDF, 0x0004, 0x8FFD, 0x0048, 0x006C, 0x0004, 0x027D, 0x008A,
+        0x99FF, 0x0004, 0x00EC, 0x00FA, 0x003C, 0x0004, 0x00AC, 0x001A, 0x009F, 0x0004, 0x2FFD, 0x0048, 0x007C,
+        0x0004, 0x44CD, 0x00CA, 0x67FF, 0x0004, 0x1FFD, 0x002A, 0x444D, 0x0004, 0x00AD, 0x005A, 0x8CCF, 0x0004,
+        0x4FFD, 0x0048, 0x445D, 0x0004, 0x01BD, 0x008A, 0x4EEF, 0x0004, 0x45DD, 0x00FA, 0x111D, 0x0004, 0x009C,
+        0x001A, 0x222F, 0x0004, 0x8AED, 0x0048, 0x888D, 0x0004, 0x00DC, 0x00CA, 0xAFFF, 0x0004, 0xCFFD, 0x002A,
+        0x003D, 0x0004, 0x00BC, 0x005A, 0x11BF, 0x0004, 0x8FFD, 0x0048, 0x006C, 0x0004, 0x027D, 0x008A, 0x22EF,
+        0x0004, 0x00EC, 0x00FA, 0x003C, 0x0004, 0x00AC, 0x001A, 0x227F, 0x0004, 0x2FFD, 0x0048, 0x007C, 0x0004,
+        0x44CD, 0x00CA, 0x5DFF, 0x0004, 0x1FFD, 0x002A, 0x444D, 0x0004, 0x00AD, 0x005A, 0x006F, 0x0004, 0x4FFD,
+        0x0048, 0x445D, 0x0004, 0x01BD, 0x008A, 0x11DF, 0x0004, 0x45DD, 0x00FA, 0x111D, 0x0004, 0x009C, 0x001A,
+        0x155F, 0x0006, 0x00FC, 0x0018, 0x111D, 0x0048, 0x888D, 0x00AA, 0x4DDF, 0x0006, 0x2AAD, 0x005A, 0x67FF,
+        0x0028, 0x223D, 0x00BC, 0xAAAF, 0x0006, 0x00EC, 0x0018, 0x5FFF, 0x0048, 0x006C, 0x008A, 0xCCCF, 0x0006,
+        0x009D, 0x00CA, 0x44EF, 0x0028, 0x003C, 0x8FFD, 0x137F, 0x0006, 0x8EED, 0x0018, 0x1FFF, 0x0048, 0x007C,
+        0x00AA, 0x4CCF, 0x0006, 0x227D, 0x005A, 0x1DDF, 0x0028, 0x444D, 0x4FFD, 0x155F, 0x0006, 0x00DC, 0x0018,
+        0x2EEF, 0x0048, 0x445D, 0x008A, 0x22BF, 0x0006, 0x009C, 0x00CA, 0x8CDF, 0x0028, 0x222D, 0x2FFD, 0x226F,
+        0x0006, 0x00FC, 0x0018, 0x111D, 0x0048, 0x888D, 0x00AA, 0x1BBF, 0x0006, 0x2AAD, 0x005A, 0x33FF, 0x0028,
+        0x223D, 0x00BC, 0x8AAF, 0x0006, 0x00EC, 0x0018, 0x9BFF, 0x0048, 0x006C, 0x008A, 0x8ABF, 0x0006, 0x009D,
+        0x00CA, 0x4EEF, 0x0028, 0x003C, 0x8FFD, 0x466F, 0x0006, 0x8EED, 0x0018, 0xCFFF, 0x0048, 0x007C, 0x00AA,
+        0x8CCF, 0x0006, 0x227D, 0x005A, 0xAEEF, 0x0028, 0x444D, 0x4FFD, 0x477F, 0x0006, 0x00DC, 0x0018, 0xAFFF,
+        0x0048, 0x445D, 0x008A, 0x2BBF, 0x0006, 0x009C, 0x00CA, 0x44DF, 0x0028, 0x222D, 0x2FFD, 0x133F, 0x00F6,
+        0xAFFD, 0x1FFB, 0x003C, 0x0008, 0x23BD, 0x007A, 0x11DF, 0x00F6, 0x45DD, 0x2FFB, 0x4EEF, 0x00DA, 0x177D,
+        0xCFFD, 0x377F, 0x00F6, 0x3FFD, 0x8FFB, 0x111D, 0x0008, 0x009C, 0x005A, 0x1BBF, 0x00F6, 0x00CD, 0x00BA,
+        0x8DDF, 0x4FFB, 0x006C, 0x9BFD, 0x455F, 0x00F6, 0x67FD, 0x1FFB, 0x002C, 0x0008, 0x00AC, 0x007A, 0x009F,
+        0x00F6, 0x00AD, 0x2FFB, 0x7FFF, 0x00DA, 0x004C, 0x5FFD, 0x477F, 0x00F6, 0x00EC, 0x8FFB, 0x001C, 0x0008,
+        0x008C, 0x005A, 0x888F, 0x00F6, 0x00CC, 0x00BA, 0x2EEF, 0x4FFB, 0x115D, 0x8AED, 0x113F, 0x00F6, 0xAFFD,
+        0x1FFB, 0x003C, 0x0008, 0x23BD, 0x007A, 0x1DDF, 0x00F6, 0x45DD, 0x2FFB, 0xBFFF, 0x00DA, 0x177D, 0xCFFD,
+        0x447F, 0x00F6, 0x3FFD, 0x8FFB, 0x111D, 0x0008, 0x009C, 0x005A, 0x277F, 0x00F6, 0x00CD, 0x00BA, 0x22EF,
+        0x4FFB, 0x006C, 0x9BFD, 0x444F, 0x00F6, 0x67FD, 0x1FFB, 0x002C, 0x0008, 0x00AC, 0x007A, 0x11BF, 0x00F6,
+        0x00AD, 0x2FFB, 0xFFFF, 0x00DA, 0x004C, 0x5FFD, 0x233F, 0x00F6, 0x00EC, 0x8FFB, 0x001C, 0x0008, 0x008C,
+        0x005A, 0x006F, 0x00F6, 0x00CC, 0x00BA, 0x8BBF, 0x4FFB, 0x115D, 0x8AED, 0x222F};
+
+static const uint16_t dec_CxtVLC_table0[1024] = {
+        0x0026, 0x00AA, 0x0046, 0x006C, 0x0086, 0x8AED, 0x0018, 0x8DDF, 0x0026, 0x01BD, 0x0046, 0x5FFF, 0x0086,
+        0x027D, 0x005A, 0x155F, 0x0026, 0x003A, 0x0046, 0x444D, 0x0086, 0x4CCD, 0x0018, 0xCCCF, 0x0026, 0x2EFD,
+        0x0046, 0x99FF, 0x0086, 0x009C, 0x00CA, 0x133F, 0x0026, 0x00AA, 0x0046, 0x445D, 0x0086, 0x8CCD, 0x0018,
+        0x11DF, 0x0026, 0x4FFD, 0x0046, 0xCFFF, 0x0086, 0x009D, 0x005A, 0x007E, 0x0026, 0x003A, 0x0046, 0x1FFF,
+        0x0086, 0x88AD, 0x0018, 0x00BE, 0x0026, 0x8FFD, 0x0046, 0x4EEF, 0x0086, 0x888D, 0x00CA, 0x111F, 0x0026,
+        0x00AA, 0x0046, 0x006C, 0x0086, 0x8AED, 0x0018, 0x45DF, 0x0026, 0x01BD, 0x0046, 0x22EF, 0x0086, 0x027D,
+        0x005A, 0x227F, 0x0026, 0x003A, 0x0046, 0x444D, 0x0086, 0x4CCD, 0x0018, 0x11BF, 0x0026, 0x2EFD, 0x0046,
+        0x00FE, 0x0086, 0x009C, 0x00CA, 0x223F, 0x0026, 0x00AA, 0x0046, 0x445D, 0x0086, 0x8CCD, 0x0018, 0x00DE,
+        0x0026, 0x4FFD, 0x0046, 0xABFF, 0x0086, 0x009D, 0x005A, 0x006F, 0x0026, 0x003A, 0x0046, 0x6EFF, 0x0086,
+        0x88AD, 0x0018, 0x2AAF, 0x0026, 0x8FFD, 0x0046, 0x00EE, 0x0086, 0x888D, 0x00CA, 0x222F, 0x0004, 0x00CA,
+        0x0088, 0x027D, 0x0004, 0x4CCD, 0x0028, 0x00FE, 0x0004, 0x2AFD, 0x0048, 0x005C, 0x0004, 0x009D, 0x0018,
+        0x00DE, 0x0004, 0x01BD, 0x0088, 0x006C, 0x0004, 0x88AD, 0x0028, 0x11DF, 0x0004, 0x8AED, 0x0048, 0x003C,
+        0x0004, 0x888D, 0x0018, 0x111F, 0x0004, 0x00CA, 0x0088, 0x006D, 0x0004, 0x88CD, 0x0028, 0x88FF, 0x0004,
+        0x8BFD, 0x0048, 0x444D, 0x0004, 0x009C, 0x0018, 0x00BE, 0x0004, 0x4EFD, 0x0088, 0x445D, 0x0004, 0x00AC,
+        0x0028, 0x00EE, 0x0004, 0x45DD, 0x0048, 0x222D, 0x0004, 0x003D, 0x0018, 0x007E, 0x0004, 0x00CA, 0x0088,
+        0x027D, 0x0004, 0x4CCD, 0x0028, 0x1FFF, 0x0004, 0x2AFD, 0x0048, 0x005C, 0x0004, 0x009D, 0x0018, 0x11BF,
+        0x0004, 0x01BD, 0x0088, 0x006C, 0x0004, 0x88AD, 0x0028, 0x22EF, 0x0004, 0x8AED, 0x0048, 0x003C, 0x0004,
+        0x888D, 0x0018, 0x227F, 0x0004, 0x00CA, 0x0088, 0x006D, 0x0004, 0x88CD, 0x0028, 0x4EEF, 0x0004, 0x8BFD,
+        0x0048, 0x444D, 0x0004, 0x009C, 0x0018, 0x2AAF, 0x0004, 0x4EFD, 0x0088, 0x445D, 0x0004, 0x00AC, 0x0028,
+        0x8DDF, 0x0004, 0x45DD, 0x0048, 0x222D, 0x0004, 0x003D, 0x0018, 0x155F, 0x0004, 0x005A, 0x0088, 0x006C,
+        0x0004, 0x88DD, 0x0028, 0x23FF, 0x0004, 0x11FD, 0x0048, 0x444D, 0x0004, 0x00AD, 0x0018, 0x00BE, 0x0004,
+        0x137D, 0x0088, 0x155D, 0x0004, 0x00CC, 0x0028, 0x00DE, 0x0004, 0x02ED, 0x0048, 0x111D, 0x0004, 0x009D,
+        0x0018, 0x007E, 0x0004, 0x005A, 0x0088, 0x455D, 0x0004, 0x44CD, 0x0028, 0x00EE, 0x0004, 0x1FFD, 0x0048,
+        0x003C, 0x0004, 0x00AC, 0x0018, 0x555F, 0x0004, 0x47FD, 0x0088, 0x113D, 0x0004, 0x02BD, 0x0028, 0x477F,
+        0x0004, 0x4CDD, 0x0048, 0x8FFF, 0x0004, 0x009C, 0x0018, 0x222F, 0x0004, 0x005A, 0x0088, 0x006C, 0x0004,
+        0x88DD, 0x0028, 0x00FE, 0x0004, 0x11FD, 0x0048, 0x444D, 0x0004, 0x00AD, 0x0018, 0x888F, 0x0004, 0x137D,
+        0x0088, 0x155D, 0x0004, 0x00CC, 0x0028, 0x8CCF, 0x0004, 0x02ED, 0x0048, 0x111D, 0x0004, 0x009D, 0x0018,
+        0x006F, 0x0004, 0x005A, 0x0088, 0x455D, 0x0004, 0x44CD, 0x0028, 0x1DDF, 0x0004, 0x1FFD, 0x0048, 0x003C,
+        0x0004, 0x00AC, 0x0018, 0x227F, 0x0004, 0x47FD, 0x0088, 0x113D, 0x0004, 0x02BD, 0x0028, 0x22BF, 0x0004,
+        0x4CDD, 0x0048, 0x22EF, 0x0004, 0x009C, 0x0018, 0x233F, 0x0006, 0x4DDD, 0x4FFB, 0xCFFF, 0x0018, 0x113D,
+        0x005A, 0x888F, 0x0006, 0x23BD, 0x008A, 0x00EE, 0x002A, 0x155D, 0xAAFD, 0x277F, 0x0006, 0x44CD, 0x8FFB,
+        0x44EF, 0x0018, 0x467D, 0x004A, 0x2AAF, 0x0006, 0x00AC, 0x555B, 0x99DF, 0x1FFB, 0x003C, 0x5FFD, 0x266F,
+        0x0006, 0x1DDD, 0x4FFB, 0x6EFF, 0x0018, 0x177D, 0x005A, 0x1BBF, 0x0006, 0x88AD, 0x008A, 0x5DDF, 0x002A,
+        0x444D, 0x2FFD, 0x667F, 0x0006, 0x00CC, 0x8FFB, 0x2EEF, 0x0018, 0x455D, 0x004A, 0x119F, 0x0006, 0x009C,
+        0x555B, 0x8CCF, 0x1FFB, 0x111D, 0x8CED, 0x006E, 0x0006, 0x4DDD, 0x4FFB, 0x3FFF, 0x0018, 0x113D, 0x005A,
+        0x11BF, 0x0006, 0x23BD, 0x008A, 0x8DDF, 0x002A, 0x155D, 0xAAFD, 0x222F, 0x0006, 0x44CD, 0x8FFB, 0x00FE,
+        0x0018, 0x467D, 0x004A, 0x899F, 0x0006, 0x00AC, 0x555B, 0x00DE, 0x1FFB, 0x003C, 0x5FFD, 0x446F, 0x0006,
+        0x1DDD, 0x4FFB, 0x9BFF, 0x0018, 0x177D, 0x005A, 0x00BE, 0x0006, 0x88AD, 0x008A, 0xCDDF, 0x002A, 0x444D,
+        0x2FFD, 0x007E, 0x0006, 0x00CC, 0x8FFB, 0x4EEF, 0x0018, 0x455D, 0x004A, 0x377F, 0x0006, 0x009C, 0x555B,
+        0x8BBF, 0x1FFB, 0x111D, 0x8CED, 0x233F, 0x0004, 0x00AA, 0x0088, 0x047D, 0x0004, 0x01DD, 0x0028, 0x11DF,
+        0x0004, 0x27FD, 0x0048, 0x005C, 0x0004, 0x8AAD, 0x0018, 0x2BBF, 0x0004, 0x009C, 0x0088, 0x006C, 0x0004,
+        0x00CC, 0x0028, 0x00EE, 0x0004, 0x8CED, 0x0048, 0x222D, 0x0004, 0x888D, 0x0018, 0x007E, 0x0004, 0x00AA,
+        0x0088, 0x006D, 0x0004, 0x88CD, 0x0028, 0x00FE, 0x0004, 0x19FD, 0x0048, 0x003C, 0x0004, 0x2AAD, 0x0018,
+        0xAAAF, 0x0004, 0x8BFD, 0x0088, 0x005D, 0x0004, 0x00BD, 0x0028, 0x4CCF, 0x0004, 0x44ED, 0x0048, 0x4FFF,
+        0x0004, 0x223D, 0x0018, 0x111F, 0x0004, 0x00AA, 0x0088, 0x047D, 0x0004, 0x01DD, 0x0028, 0x99FF, 0x0004,
+        0x27FD, 0x0048, 0x005C, 0x0004, 0x8AAD, 0x0018, 0x00BE, 0x0004, 0x009C, 0x0088, 0x006C, 0x0004, 0x00CC,
+        0x0028, 0x00DE, 0x0004, 0x8CED, 0x0048, 0x222D, 0x0004, 0x888D, 0x0018, 0x444F, 0x0004, 0x00AA, 0x0088,
+        0x006D, 0x0004, 0x88CD, 0x0028, 0x2EEF, 0x0004, 0x19FD, 0x0048, 0x003C, 0x0004, 0x2AAD, 0x0018, 0x447F,
+        0x0004, 0x8BFD, 0x0088, 0x005D, 0x0004, 0x00BD, 0x0028, 0x009F, 0x0004, 0x44ED, 0x0048, 0x67FF, 0x0004,
+        0x223D, 0x0018, 0x133F, 0x0006, 0x00CC, 0x008A, 0x9DFF, 0x2FFB, 0x467D, 0x1FFD, 0x99BF, 0x0006, 0x2AAD,
+        0x002A, 0x66EF, 0x4FFB, 0x005C, 0x2EED, 0x377F, 0x0006, 0x89BD, 0x004A, 0x00FE, 0x8FFB, 0x006C, 0x67FD,
+        0x889F, 0x0006, 0x888D, 0x001A, 0x5DDF, 0x00AA, 0x222D, 0x89DD, 0x444F, 0x0006, 0x2BBD, 0x008A, 0xCFFF,
+        0x2FFB, 0x226D, 0x009C, 0x00BE, 0x0006, 0xAAAD, 0x002A, 0x1DDF, 0x4FFB, 0x003C, 0x4DDD, 0x466F, 0x0006,
+        0x8AAD, 0x004A, 0xAEEF, 0x8FFB, 0x445D, 0x8EED, 0x177F, 0x0006, 0x233D, 0x001A, 0x4CCF, 0x00AA, 0xAFFF,
+        0x88CD, 0x133F, 0x0006, 0x00CC, 0x008A, 0x77FF, 0x2FFB, 0x467D, 0x1FFD, 0x3BBF, 0x0006, 0x2AAD, 0x002A,
+        0x00EE, 0x4FFB, 0x005C, 0x2EED, 0x007E, 0x0006, 0x89BD, 0x004A, 0x4EEF, 0x8FFB, 0x006C, 0x67FD, 0x667F,
+        0x0006, 0x888D, 0x001A, 0x00DE, 0x00AA, 0x222D, 0x89DD, 0x333F, 0x0006, 0x2BBD, 0x008A, 0x57FF, 0x2FFB,
+        0x226D, 0x009C, 0x199F, 0x0006, 0xAAAD, 0x002A, 0x99DF, 0x4FFB, 0x003C, 0x4DDD, 0x155F, 0x0006, 0x8AAD,
+        0x004A, 0xCEEF, 0x8FFB, 0x445D, 0x8EED, 0x277F, 0x0006, 0x233D, 0x001A, 0x1BBF, 0x00AA, 0x3FFF, 0x88CD,
+        0x111F, 0x0006, 0x45DD, 0x2FFB, 0x111D, 0x0018, 0x467D, 0x8FFD, 0xCCCF, 0x0006, 0x19BD, 0x004A, 0x22EF,
+        0x002A, 0x222D, 0x3FFD, 0x888F, 0x0006, 0x00CC, 0x008A, 0x00FE, 0x0018, 0x115D, 0xCFFD, 0x8AAF, 0x0006,
+        0x00AC, 0x003A, 0x8CDF, 0x1FFB, 0x133D, 0x66FD, 0x466F, 0x0006, 0x8CCD, 0x2FFB, 0x5FFF, 0x0018, 0x006C,
+        0x4FFD, 0xABBF, 0x0006, 0x22AD, 0x004A, 0x00EE, 0x002A, 0x233D, 0xAEFD, 0x377F, 0x0006, 0x2BBD, 0x008A,
+        0x55DF, 0x0018, 0x005C, 0x177D, 0x119F, 0x0006, 0x009C, 0x003A, 0x4CCF, 0x1FFB, 0x333D, 0x8EED, 0x444F,
+        0x0006, 0x45DD, 0x2FFB, 0x111D, 0x0018, 0x467D, 0x8FFD, 0x99BF, 0x0006, 0x19BD, 0x004A, 0x2EEF, 0x002A,
+        0x222D, 0x3FFD, 0x667F, 0x0006, 0x00CC, 0x008A, 0x4EEF, 0x0018, 0x115D, 0xCFFD, 0x899F, 0x0006, 0x00AC,
+        0x003A, 0x00DE, 0x1FFB, 0x133D, 0x66FD, 0x226F, 0x0006, 0x8CCD, 0x2FFB, 0x9BFF, 0x0018, 0x006C, 0x4FFD,
+        0x00BE, 0x0006, 0x22AD, 0x004A, 0x1DDF, 0x002A, 0x233D, 0xAEFD, 0x007E, 0x0006, 0x2BBD, 0x008A, 0xCEEF,
+        0x0018, 0x005C, 0x177D, 0x277F, 0x0006, 0x009C, 0x003A, 0x8BBF, 0x1FFB, 0x333D, 0x8EED, 0x455F, 0x1FF9,
+        0x1DDD, 0xAFFB, 0x00DE, 0x8FF9, 0x001C, 0xFFFB, 0x477F, 0x4FF9, 0x177D, 0x3FFB, 0x3BBF, 0x2FF9, 0xAEEF,
+        0x8EED, 0x444F, 0x1FF9, 0x22AD, 0x000A, 0x8BBF, 0x8FF9, 0x00FE, 0xCFFD, 0x007E, 0x4FF9, 0x115D, 0x5FFB,
+        0x577F, 0x2FF9, 0x8DDF, 0x2EED, 0x333F, 0x1FF9, 0x2BBD, 0xAFFB, 0x88CF, 0x8FF9, 0xBFFF, 0xFFFB, 0x377F,
+        0x4FF9, 0x006D, 0x3FFB, 0x00BE, 0x2FF9, 0x66EF, 0x9FFD, 0x133F, 0x1FF9, 0x009D, 0x000A, 0xABBF, 0x8FF9,
+        0xDFFF, 0x6FFD, 0x006E, 0x4FF9, 0x002C, 0x5FFB, 0x888F, 0x2FF9, 0xCDDF, 0x4DDD, 0x222F, 0x1FF9, 0x1DDD,
+        0xAFFB, 0x4CCF, 0x8FF9, 0x001C, 0xFFFB, 0x277F, 0x4FF9, 0x177D, 0x3FFB, 0x99BF, 0x2FF9, 0xCEEF, 0x8EED,
+        0x004E, 0x1FF9, 0x22AD, 0x000A, 0x00AE, 0x8FF9, 0x7FFF, 0xCFFD, 0x005E, 0x4FF9, 0x115D, 0x5FFB, 0x009E,
+        0x2FF9, 0x5DDF, 0x2EED, 0x003E, 0x1FF9, 0x2BBD, 0xAFFB, 0x00CE, 0x8FF9, 0xEFFF, 0xFFFB, 0x667F, 0x4FF9,
+        0x006D, 0x3FFB, 0x8AAF, 0x2FF9, 0x00EE, 0x9FFD, 0x233F, 0x1FF9, 0x009D, 0x000A, 0x1BBF, 0x8FF9, 0x4EEF,
+        0x6FFD, 0x455F, 0x4FF9, 0x002C, 0x5FFB, 0x008E, 0x2FF9, 0x99DF, 0x4DDD, 0x111F};
diff --git a/libavcodec/jpeg2000htdec.h b/libavcodec/jpeg2000htdec.h
new file mode 100644
index 0000000000..62cf1450c2
--- /dev/null
+++ b/libavcodec/jpeg2000htdec.h
@@ -0,0 +1,28 @@ 
+/*
+* JPEG2000 High Throughput block decoder
+* Copyright (c) 2022 Caleb Etemesi <etemesicaleb@gmail.com>
+*
+* This file is part of FFmpeg.
+*
+* FFmpeg is free software; you can redistribute it and/or
+* modify it under the terms of the GNU Lesser General Public
+* License as published by the Free Software Foundation; either
+* version 2.1 of the License, or (at your option) any later version.
+*
+* FFmpeg is distributed in the hope that it will be useful,
+* but WITHOUT ANY WARRANTY; without even the implied warranty of
+* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+* Lesser General Public License for more details.
+*
+* You should have received a copy of the GNU Lesser General Public
+* License along with FFmpeg; if not, write to the Free Software
+* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+#ifndef AVCODEC_JPEG2000HTDEC_H
+#define AVCODEC_JPEG2000HTDEC_H
+
+#include "jpeg2000dec.h"
+
+int ff_jpeg2000_decode_htj2k(const Jpeg2000DecoderContext *s, Jpeg2000CodingStyle *codsty, Jpeg2000T1Context *t1, Jpeg2000Cblk *cblk, int width, int height, int magp, uint8_t roi_shift);
+
+#endif /* AVCODEC_JPEG2000HTDEC_H */