diff mbox series

[FFmpeg-devel,1/2] Add dirac vulkan hwaccel usage to diracdec.c

Message ID 20240814121856.757459-2-mozil.petryk@gmail.com
State New
Headers show
Series [FFmpeg-devel,1/2] Add dirac vulkan hwaccel usage to diracdec.c | expand

Commit Message

Petro Mozil Aug. 14, 2024, 12:18 p.m. UTC
This patch adds a vc2 hwaccel to ffmpeg.
Tested on ffmpeg vc encoder and vc2-conformance software.

Here is a set of commands to verify correctness

# Encode vid as vc2
ffmpeg -i <in-file> -vcodec vc2 input.vc2

# Decode with hwaccel
ffmpeg -init_hw_device "vulkan=vk:0" -hwaccel vulkan -i input.vc2 output.mkv

# Decode without hwaccel
ffmpeg -i input.vc2 output.mkv

Signed-off-by: Petro Mozil <mozil.petryk@gmail.com>
---
 libavcodec/diracdec.c | 336 +++++++++++-------------------------------
 libavcodec/diracdec.h | 267 +++++++++++++++++++++++++++++++++
 2 files changed, 355 insertions(+), 248 deletions(-)
 create mode 100644 libavcodec/diracdec.h
diff mbox series

Patch

diff --git a/libavcodec/diracdec.c b/libavcodec/diracdec.c
index 76209aebba..542824f6e1 100644
--- a/libavcodec/diracdec.c
+++ b/libavcodec/diracdec.c
@@ -26,228 +26,11 @@ 
  * @author Marco Gerards <marco@gnu.org>, David Conrad, Jordi Ortiz <nenjordi@gmail.com>
  */
 
-#include "libavutil/mem.h"
-#include "libavutil/mem_internal.h"
-#include "libavutil/pixdesc.h"
-#include "libavutil/thread.h"
-#include "avcodec.h"
-#include "get_bits.h"
-#include "codec_internal.h"
-#include "decode.h"
-#include "golomb.h"
-#include "dirac_arith.h"
-#include "dirac_vlc.h"
-#include "mpegvideoencdsp.h"
-#include "dirac_dwt.h"
-#include "dirac.h"
-#include "diractab.h"
-#include "diracdsp.h"
-#include "videodsp.h"
-
-#define EDGE_WIDTH 16
-
-/**
- * The spec limits this to 3 for frame coding, but in practice can be as high as 6
- */
-#define MAX_REFERENCE_FRAMES 8
-#define MAX_DELAY 5         /* limit for main profile for frame coding (TODO: field coding) */
-#define MAX_FRAMES (MAX_REFERENCE_FRAMES + MAX_DELAY + 1)
-#define MAX_QUANT 255        /* max quant for VC-2 */
-#define MAX_BLOCKSIZE 32    /* maximum xblen/yblen we support */
-
-/**
- * DiracBlock->ref flags, if set then the block does MC from the given ref
- */
-#define DIRAC_REF_MASK_REF1   1
-#define DIRAC_REF_MASK_REF2   2
-#define DIRAC_REF_MASK_GLOBAL 4
-
-/**
- * Value of Picture.reference when Picture is not a reference picture, but
- * is held for delayed output.
- */
-#define DELAYED_PIC_REF 4
-
-#define CALC_PADDING(size, depth)                       \
-    (((size + (1 << depth) - 1) >> depth) << depth)
-
-#define DIVRNDUP(a, b) (((a) + (b) - 1) / (b))
-
-typedef struct {
-    AVFrame *avframe;
-    int interpolated[3];    /* 1 if hpel[] is valid */
-    uint8_t *hpel[3][4];
-    uint8_t *hpel_base[3][4];
-    int reference;
-    unsigned picture_number;
-} DiracFrame;
-
-typedef struct {
-    union {
-        int16_t mv[2][2];
-        int16_t dc[3];
-    } u; /* anonymous unions aren't in C99 :( */
-    uint8_t ref;
-} DiracBlock;
-
-typedef struct SubBand {
-    int level;
-    int orientation;
-    int stride; /* in bytes */
-    int width;
-    int height;
-    int pshift;
-    int quant;
-    uint8_t *ibuf;
-    struct SubBand *parent;
-
-    /* for low delay */
-    unsigned length;
-    const uint8_t *coeff_data;
-} SubBand;
-
-typedef struct Plane {
-    DWTPlane idwt;
-
-    int width;
-    int height;
-    ptrdiff_t stride;
-
-    /* block length */
-    uint8_t xblen;
-    uint8_t yblen;
-    /* block separation (block n+1 starts after this many pixels in block n) */
-    uint8_t xbsep;
-    uint8_t ybsep;
-    /* amount of overspill on each edge (half of the overlap between blocks) */
-    uint8_t xoffset;
-    uint8_t yoffset;
-
-    SubBand band[MAX_DWT_LEVELS][4];
-} Plane;
-
-/* Used by Low Delay and High Quality profiles */
-typedef struct DiracSlice {
-    GetBitContext gb;
-    int slice_x;
-    int slice_y;
-    int bytes;
-} DiracSlice;
-
-typedef struct DiracContext {
-    AVCodecContext *avctx;
-    MpegvideoEncDSPContext mpvencdsp;
-    VideoDSPContext vdsp;
-    DiracDSPContext diracdsp;
-    DiracVersionInfo version;
-    GetBitContext gb;
-    AVDiracSeqHeader seq;
-    int seen_sequence_header;
-    int64_t frame_number;       /* number of the next frame to display       */
-    Plane plane[3];
-    int chroma_x_shift;
-    int chroma_y_shift;
-
-    int bit_depth;              /* bit depth                                 */
-    int pshift;                 /* pixel shift = bit_depth > 8               */
-
-    int zero_res;               /* zero residue flag                         */
-    int is_arith;               /* whether coeffs use arith or golomb coding */
-    int core_syntax;            /* use core syntax only                      */
-    int low_delay;              /* use the low delay syntax                  */
-    int hq_picture;             /* high quality picture, enables low_delay   */
-    int ld_picture;             /* use low delay picture, turns on low_delay */
-    int dc_prediction;          /* has dc prediction                         */
-    int globalmc_flag;          /* use global motion compensation            */
-    int num_refs;               /* number of reference pictures              */
-
-    /* wavelet decoding */
-    unsigned wavelet_depth;     /* depth of the IDWT                         */
-    unsigned wavelet_idx;
-
-    /**
-     * schroedinger older than 1.0.8 doesn't store
-     * quant delta if only one codebook exists in a band
-     */
-    unsigned old_delta_quant;
-    unsigned codeblock_mode;
-
-    unsigned num_x;              /* number of horizontal slices               */
-    unsigned num_y;              /* number of vertical slices                 */
-
-    uint8_t *thread_buf;         /* Per-thread buffer for coefficient storage */
-    int threads_num_buf;         /* Current # of buffers allocated            */
-    int thread_buf_size;         /* Each thread has a buffer this size        */
-
-    DiracSlice *slice_params_buf;
-    int slice_params_num_buf;
-
-    struct {
-        unsigned width;
-        unsigned height;
-    } codeblock[MAX_DWT_LEVELS+1];
-
-    struct {
-        AVRational bytes;       /* average bytes per slice                   */
-        uint8_t quant[MAX_DWT_LEVELS][4]; /* [DIRAC_STD] E.1 */
-    } lowdelay;
-
-    struct {
-        unsigned prefix_bytes;
-        uint64_t size_scaler;
-    } highquality;
-
-    struct {
-        int pan_tilt[2];        /* pan/tilt vector                           */
-        int zrs[2][2];          /* zoom/rotate/shear matrix                  */
-        int perspective[2];     /* perspective vector                        */
-        unsigned zrs_exp;
-        unsigned perspective_exp;
-    } globalmc[2];
-
-    /* motion compensation */
-    uint8_t mv_precision;       /* [DIRAC_STD] REFS_WT_PRECISION             */
-    int16_t weight[2];          /* [DIRAC_STD] REF1_WT and REF2_WT           */
-    unsigned weight_log2denom;  /* [DIRAC_STD] REFS_WT_PRECISION             */
-
-    int blwidth;                /* number of blocks (horizontally)           */
-    int blheight;               /* number of blocks (vertically)             */
-    int sbwidth;                /* number of superblocks (horizontally)      */
-    int sbheight;               /* number of superblocks (vertically)        */
-
-    uint8_t *sbsplit;
-    DiracBlock *blmotion;
-
-    uint8_t *edge_emu_buffer[4];
-    uint8_t *edge_emu_buffer_base;
-
-    uint16_t *mctmp;            /* buffer holding the MC data multiplied by OBMC weights */
-    uint8_t *mcscratch;
-    int buffer_stride;
-
-    DECLARE_ALIGNED(16, uint8_t, obmc_weight)[3][MAX_BLOCKSIZE*MAX_BLOCKSIZE];
-
-    void (*put_pixels_tab[4])(uint8_t *dst, const uint8_t *src[5], int stride, int h);
-    void (*avg_pixels_tab[4])(uint8_t *dst, const uint8_t *src[5], int stride, int h);
-    void (*add_obmc)(uint16_t *dst, const uint8_t *src, int stride, const uint8_t *obmc_weight, int yblen);
-    dirac_weight_func weight_func;
-    dirac_biweight_func biweight_func;
-
-    DiracFrame *current_picture;
-    DiracFrame *ref_pics[2];
-
-    DiracFrame *ref_frames[MAX_REFERENCE_FRAMES+1];
-    DiracFrame *delay_frames[MAX_DELAY+1];
-    DiracFrame all_frames[MAX_FRAMES];
-} DiracContext;
-
-enum dirac_subband {
-    subband_ll = 0,
-    subband_hl = 1,
-    subband_lh = 2,
-    subband_hh = 3,
-    subband_nb,
-};
+#include "diracdec.h"
+#include "hwaccels.h"
+#include "hwconfig.h"
+#include "libavutil/imgutils.c"
+#include "config_components.h"
 
 /* magic number division by 3 from schroedinger */
 static inline int divide3(int x)
@@ -351,7 +134,7 @@  static int alloc_buffers(DiracContext *s, int stride)
     return 0;
 }
 
-static av_cold void free_sequence_buffers(DiracContext *s)
+static void free_sequence_buffers(DiracContext *s)
 {
     int i, j, k;
 
@@ -403,8 +186,11 @@  static av_cold int dirac_decode_init(AVCodecContext *avctx)
 
     for (i = 0; i < MAX_FRAMES; i++) {
         s->all_frames[i].avframe = av_frame_alloc();
-        if (!s->all_frames[i].avframe)
+        if (!s->all_frames[i].avframe) {
+            while (i > 0)
+                av_frame_free(&s->all_frames[--i].avframe);
             return AVERROR(ENOMEM);
+        }
     }
     ret = ff_thread_once(&dirac_arith_init, ff_dirac_init_arith_tables);
     if (ret != 0)
@@ -413,7 +199,7 @@  static av_cold int dirac_decode_init(AVCodecContext *avctx)
     return 0;
 }
 
-static av_cold void dirac_decode_flush(AVCodecContext *avctx)
+static void dirac_decode_flush(AVCodecContext *avctx)
 {
     DiracContext *s = avctx->priv_data;
     free_sequence_buffers(s);
@@ -426,9 +212,7 @@  static av_cold int dirac_decode_end(AVCodecContext *avctx)
     DiracContext *s = avctx->priv_data;
     int i;
 
-    // Necessary in case dirac_decode_init() failed
-    if (s->all_frames[MAX_FRAMES - 1].avframe)
-        free_sequence_buffers(s);
+    dirac_decode_flush(avctx);
     for (i = 0; i < MAX_FRAMES; i++)
         av_frame_free(&s->all_frames[i].avframe);
 
@@ -812,14 +596,6 @@  static int decode_lowdelay_slice(AVCodecContext *avctx, void *arg)
     return 0;
 }
 
-typedef struct SliceCoeffs {
-    int left;
-    int top;
-    int tot_h;
-    int tot_v;
-    int tot;
-} SliceCoeffs;
-
 static int subband_coeffs(const DiracContext *s, int x, int y, int p,
                           SliceCoeffs c[MAX_DWT_LEVELS])
 {
@@ -1006,7 +782,10 @@  static int decode_lowdelay(DiracContext *s)
             return AVERROR_INVALIDDATA;
         }
 
-        avctx->execute2(avctx, decode_hq_slice_row, slices, NULL, s->num_y);
+        if (avctx->hwaccel)
+            FF_HW_CALL(avctx, decode_slice, NULL, 0);
+        else
+            avctx->execute2(avctx, decode_hq_slice_row, slices, NULL, s->num_y);
     } else {
         for (slice_y = 0; bufsize > 0 && slice_y < s->num_y; slice_y++) {
             for (slice_x = 0; bufsize > 0 && slice_x < s->num_x; slice_x++) {
@@ -1873,7 +1652,13 @@  static int dirac_decode_frame_internal(DiracContext *s)
 {
     DWTContext d;
     int y, i, comp, dsty;
-    int ret;
+    int ret = -1;
+
+    if (s->avctx->hwaccel) {
+        ret = FF_HW_CALL(s->avctx, start_frame, NULL, 0);
+        if (ret < 0)
+            return ret;
+    }
 
     if (s->low_delay) {
         /* [DIRAC_STD] 13.5.1 low_delay_transform_data() */
@@ -1889,6 +1674,14 @@  static int dirac_decode_frame_internal(DiracContext *s)
         }
     }
 
+    if (s->avctx->hwaccel) {
+        ret = ffhwaccel(s->avctx->hwaccel)->end_frame(s->avctx);
+        if (ret == 0) {
+            /* Hwaccel failed - fall back on software decoder */
+        }
+            return ret;
+    }
+
     for (comp = 0; comp < 3; comp++) {
         Plane *p       = &s->plane[comp];
         uint8_t *frame = s->current_picture->avframe->data[comp];
@@ -1904,6 +1697,7 @@  static int dirac_decode_frame_internal(DiracContext *s)
             if (ret < 0)
                 return ret;
         }
+
         ret = ff_spatial_idwt_init(&d, &p->idwt, s->wavelet_idx+2,
                                    s->wavelet_depth, s->bit_depth);
         if (ret < 0)
@@ -1970,15 +1764,23 @@  static int get_buffer_with_edge(AVCodecContext *avctx, AVFrame *f, int flags)
 {
     int ret, i;
     int chroma_x_shift, chroma_y_shift;
-    ret = av_pix_fmt_get_chroma_sub_sample(avctx->pix_fmt, &chroma_x_shift,
+    DiracContext *s = avctx->priv_data;
+    ret = av_pix_fmt_get_chroma_sub_sample(s->sof_pix_fmt, &chroma_x_shift,
                                            &chroma_y_shift);
     if (ret < 0)
         return ret;
 
+    /*if (avctx->hwaccel) {*/
+    /*    f->width   = s->plane[0].width;*/
+    /*    f->height  = s->plane[0].height;*/
+    /*    ret = ff_get_buffer(avctx, f, flags);*/
+    /*    return ret;*/
+    /*}*/
+
     f->width  = avctx->width  + 2 * EDGE_WIDTH;
     f->height = avctx->height + 2 * EDGE_WIDTH + 2;
     ret = ff_get_buffer(avctx, f, flags);
-    if (ret < 0)
+    if (ret < 0 || avctx->hwaccel)
         return ret;
 
     for (i = 0; f->data[i]; i++) {
@@ -2136,6 +1938,7 @@  static int dirac_decode_data_unit(AVCodecContext *avctx, const uint8_t *buf, int
     init_get_bits(&s->gb, &buf[13], 8*(size - DATA_UNIT_HEADER_SIZE));
 
     if (parse_code == DIRAC_PCODE_SEQ_HEADER) {
+        enum AVPixelFormat *pix_fmts;
         if (s->seen_sequence_header)
             return 0;
 
@@ -2156,6 +1959,7 @@  static int dirac_decode_data_unit(AVCodecContext *avctx, const uint8_t *buf, int
         }
 
         ff_set_sar(avctx, dsh->sample_aspect_ratio);
+        s->sof_pix_fmt         = dsh->pix_fmt;
         avctx->pix_fmt         = dsh->pix_fmt;
         avctx->color_range     = dsh->color_range;
         avctx->color_trc       = dsh->color_trc;
@@ -2172,7 +1976,20 @@  static int dirac_decode_data_unit(AVCodecContext *avctx, const uint8_t *buf, int
 
         s->pshift = s->bit_depth > 8;
 
-        ret = av_pix_fmt_get_chroma_sub_sample(avctx->pix_fmt,
+        /*if (s->pshift) {*/
+        /*    avctx->pix_fmt = s->sof_pix_fmt;*/
+        /*} else {*/
+            pix_fmts = (enum AVPixelFormat[]){
+#if CONFIG_DIRAC_VULKAN_HWACCEL
+                AV_PIX_FMT_VULKAN,
+#endif
+                s->sof_pix_fmt,
+                AV_PIX_FMT_NONE,
+            };
+            avctx->pix_fmt = ff_get_format(s->avctx, pix_fmts);
+        /*}*/
+
+        ret = av_pix_fmt_get_chroma_sub_sample(s->sof_pix_fmt,
                                                &s->chroma_x_shift,
                                                &s->chroma_y_shift);
         if (ret < 0)
@@ -2202,9 +2019,10 @@  static int dirac_decode_data_unit(AVCodecContext *avctx, const uint8_t *buf, int
         }
 
         /* find an unused frame */
-        for (i = 0; i < MAX_FRAMES; i++)
+        for (i = 0; i < MAX_FRAMES; i++) 
             if (s->all_frames[i].avframe->data[0] == NULL)
                 pic = &s->all_frames[i];
+
         if (!pic) {
             av_log(avctx, AV_LOG_ERROR, "framelist full\n");
             return AVERROR_INVALIDDATA;
@@ -2244,12 +2062,28 @@  static int dirac_decode_data_unit(AVCodecContext *avctx, const uint8_t *buf, int
         if ((ret = get_buffer_with_edge(avctx, pic->avframe, (parse_code & 0x0C) == 0x0C ? AV_GET_BUFFER_FLAG_REF : 0)) < 0)
             return ret;
         s->current_picture = pic;
-        s->plane[0].stride = pic->avframe->linesize[0];
-        s->plane[1].stride = pic->avframe->linesize[1];
-        s->plane[2].stride = pic->avframe->linesize[2];
 
-        if (alloc_buffers(s, FFMAX3(FFABS(s->plane[0].stride), FFABS(s->plane[1].stride), FFABS(s->plane[2].stride))) < 0)
-            return AVERROR(ENOMEM);
+        if (s->avctx->hwaccel) {
+            if (!(s->low_delay && s->hq_picture)) {
+                av_log(avctx, AV_LOG_ERROR, "The HWaccel only supports VC-2\n");
+                return AVERROR_INVALIDDATA;
+            }
+
+            if (!s->hwaccel_picture_private) {
+                const FFHWAccel *hwaccel = ffhwaccel(s->avctx->hwaccel);
+                s->hwaccel_picture_private =
+                    av_mallocz(hwaccel->frame_priv_data_size);
+                if (!s->hwaccel_picture_private)
+                    return AVERROR(ENOMEM);
+            }
+        } else {
+            s->plane[0].stride = pic->avframe->linesize[0];
+            s->plane[1].stride = pic->avframe->linesize[1];
+            s->plane[2].stride = pic->avframe->linesize[2];
+
+            if (alloc_buffers(s, FFMAX3(FFABS(s->plane[0].stride), FFABS(s->plane[1].stride), FFABS(s->plane[2].stride))) < 0)
+                return AVERROR(ENOMEM);
+        }
 
         /* [DIRAC_STD] 11.1 Picture parse. picture_parse() */
         ret = dirac_decode_picture_header(s);
@@ -2359,6 +2193,7 @@  static int dirac_decode_frame(AVCodecContext *avctx, AVFrame *picture,
     return buf_idx;
 }
 
+
 const FFCodec ff_dirac_decoder = {
     .p.name         = "dirac",
     CODEC_LONG_NAME("BBC Dirac VC-2"),
@@ -2370,5 +2205,10 @@  const FFCodec ff_dirac_decoder = {
     FF_CODEC_DECODE_CB(dirac_decode_frame),
     .p.capabilities = AV_CODEC_CAP_DELAY | AV_CODEC_CAP_SLICE_THREADS | AV_CODEC_CAP_DR1,
     .flush          = dirac_decode_flush,
-    .caps_internal  = FF_CODEC_CAP_INIT_CLEANUP,
+    .hw_configs     = (const AVCodecHWConfigInternal *const []) {
+#if CONFIG_DIRAC_VULKAN_HWACCEL
+        HWACCEL_VULKAN(dirac),
+#endif
+        NULL
+    },
 };
diff --git a/libavcodec/diracdec.h b/libavcodec/diracdec.h
new file mode 100644
index 0000000000..9c8dc14127
--- /dev/null
+++ b/libavcodec/diracdec.h
@@ -0,0 +1,267 @@ 
+/*
+ * Copyright (C) 2007 Marco Gerards <marco@gnu.org>
+ * Copyright (C) 2009 David Conrad
+ * Copyright (C) 2011 Jordi Ortiz
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+/**
+ * @file
+ * Dirac Decoder
+ * @author Marco Gerards <marco@gnu.org>, David Conrad, Jordi Ortiz <nenjordi@gmail.com>
+ */
+
+
+#ifndef DIRACDEC_H
+#define DIRACDEC_H
+
+#include "libavutil/mem.h"
+#include "libavutil/mem_internal.h"
+#include "libavutil/pixdesc.h"
+#include "libavutil/thread.h"
+#include "avcodec.h"
+#include "get_bits.h"
+#include "codec_internal.h"
+#include "decode.h"
+#include "golomb.h"
+#include "dirac_arith.h"
+#include "dirac_vlc.h"
+#include "mpegvideoencdsp.h"
+#include "dirac_dwt.h"
+#include "dirac.h"
+#include "diractab.h"
+#include "diracdsp.h"
+#include "videodsp.h"
+#include "hwaccel_internal.h"
+
+#define EDGE_WIDTH 16
+
+/**
+ * The spec limits this to 3 for frame coding, but in practice can be as high as 6
+ */
+#define MAX_REFERENCE_FRAMES 8
+#define MAX_DELAY 5         /* limit for main profile for frame coding (TODO: field coding) */
+#define MAX_FRAMES (MAX_REFERENCE_FRAMES + MAX_DELAY + 1)
+#define MAX_QUANT 255        /* max quant for VC-2 */
+#define MAX_BLOCKSIZE 32    /* maximum xblen/yblen we support */
+
+/**
+ * DiracBlock->ref flags, if set then the block does MC from the given ref
+ */
+#define DIRAC_REF_MASK_REF1   1
+#define DIRAC_REF_MASK_REF2   2
+#define DIRAC_REF_MASK_GLOBAL 4
+
+/**
+ * Value of Picture.reference when Picture is not a reference picture, but
+ * is held for delayed output.
+ */
+#define DELAYED_PIC_REF 4
+
+#define CALC_PADDING(size, depth)                       \
+    (((size + (1 << depth) - 1) >> depth) << depth)
+
+#define DIVRNDUP(a, b) (((a) + (b) - 1) / (b))
+
+typedef struct {
+    AVFrame *avframe;
+    int interpolated[3];    /* 1 if hpel[] is valid */
+    uint8_t *hpel[3][4];
+    uint8_t *hpel_base[3][4];
+    int reference;
+    unsigned picture_number;
+} DiracFrame;
+
+typedef struct {
+    union {
+        int16_t mv[2][2];
+        int16_t dc[3];
+    } u; /* anonymous unions aren't in C99 :( */
+    uint8_t ref;
+} DiracBlock;
+
+typedef struct SubBand {
+    int level;
+    int orientation;
+    int stride; /* in bytes */
+    int width;
+    int height;
+    int pshift;
+    int quant;
+    uint8_t *ibuf;
+    struct SubBand *parent;
+
+    /* for low delay */
+    unsigned length;
+    const uint8_t *coeff_data;
+} SubBand;
+
+typedef struct Plane {
+    DWTPlane idwt;
+
+    int width;
+    int height;
+    ptrdiff_t stride;
+
+    /* block length */
+    uint8_t xblen;
+    uint8_t yblen;
+    /* block separation (block n+1 starts after this many pixels in block n) */
+    uint8_t xbsep;
+    uint8_t ybsep;
+    /* amount of overspill on each edge (half of the overlap between blocks) */
+    uint8_t xoffset;
+    uint8_t yoffset;
+
+    SubBand band[MAX_DWT_LEVELS][4];
+} Plane;
+
+/* Used by Low Delay and High Quality profiles */
+typedef struct DiracSlice {
+    GetBitContext gb;
+    int slice_x;
+    int slice_y;
+    int bytes;
+} DiracSlice;
+
+typedef struct DiracContext {
+    AVCodecContext *avctx;
+    MpegvideoEncDSPContext mpvencdsp;
+    VideoDSPContext vdsp;
+    DiracDSPContext diracdsp;
+    DiracVersionInfo version;
+    GetBitContext gb;
+    AVDiracSeqHeader seq;
+    enum AVPixelFormat sof_pix_fmt;
+    void *hwaccel_picture_private;
+    int seen_sequence_header;
+    int64_t frame_number;       /* number of the next frame to display       */
+    Plane plane[3];
+    int chroma_x_shift;
+    int chroma_y_shift;
+
+    int bit_depth;              /* bit depth                                 */
+    int pshift;                 /* pixel shift = bit_depth > 8               */
+
+    int zero_res;               /* zero residue flag                         */
+    int is_arith;               /* whether coeffs use arith or golomb coding */
+    int core_syntax;            /* use core syntax only                      */
+    int low_delay;              /* use the low delay syntax                  */
+    int hq_picture;             /* high quality picture, enables low_delay   */
+    int ld_picture;             /* use low delay picture, turns on low_delay */
+    int dc_prediction;          /* has dc prediction                         */
+    int globalmc_flag;          /* use global motion compensation            */
+    int num_refs;               /* number of reference pictures              */
+
+    /* wavelet decoding */
+    unsigned wavelet_depth;     /* depth of the IDWT                         */
+    unsigned wavelet_idx;
+
+    /**
+     * schroedinger older than 1.0.8 doesn't store
+     * quant delta if only one codebook exists in a band
+     */
+    unsigned old_delta_quant;
+    unsigned codeblock_mode;
+
+    unsigned num_x;              /* number of horizontal slices               */
+    unsigned num_y;              /* number of vertical slices                 */
+
+    uint8_t *thread_buf;         /* Per-thread buffer for coefficient storage */
+    int threads_num_buf;         /* Current # of buffers allocated            */
+    int thread_buf_size;         /* Each thread has a buffer this size        */
+
+    DiracSlice *slice_params_buf;
+    int slice_params_num_buf;
+
+    struct {
+        unsigned width;
+        unsigned height;
+    } codeblock[MAX_DWT_LEVELS+1];
+
+    struct {
+        AVRational bytes;       /* average bytes per slice                   */
+        uint8_t quant[MAX_DWT_LEVELS][4]; /* [DIRAC_STD] E.1 */
+    } lowdelay;
+
+    struct {
+        unsigned prefix_bytes;
+        uint64_t size_scaler;
+    } highquality;
+
+    struct {
+        int pan_tilt[2];        /* pan/tilt vector                           */
+        int zrs[2][2];          /* zoom/rotate/shear matrix                  */
+        int perspective[2];     /* perspective vector                        */
+        unsigned zrs_exp;
+        unsigned perspective_exp;
+    } globalmc[2];
+
+    /* motion compensation */
+    uint8_t mv_precision;       /* [DIRAC_STD] REFS_WT_PRECISION             */
+    int16_t weight[2];          /* [DIRAC_STD] REF1_WT and REF2_WT           */
+    unsigned weight_log2denom;  /* [DIRAC_STD] REFS_WT_PRECISION             */
+
+    int blwidth;                /* number of blocks (horizontally)           */
+    int blheight;               /* number of blocks (vertically)             */
+    int sbwidth;                /* number of superblocks (horizontally)      */
+    int sbheight;               /* number of superblocks (vertically)        */
+
+    uint8_t *sbsplit;
+    DiracBlock *blmotion;
+
+    uint8_t *edge_emu_buffer[4];
+    uint8_t *edge_emu_buffer_base;
+
+    uint16_t *mctmp;            /* buffer holding the MC data multiplied by OBMC weights */
+    uint8_t *mcscratch;
+    int buffer_stride;
+
+    DECLARE_ALIGNED(16, uint8_t, obmc_weight)[3][MAX_BLOCKSIZE*MAX_BLOCKSIZE];
+
+    void (*put_pixels_tab[4])(uint8_t *dst, const uint8_t *src[5], int stride, int h);
+    void (*avg_pixels_tab[4])(uint8_t *dst, const uint8_t *src[5], int stride, int h);
+    void (*add_obmc)(uint16_t *dst, const uint8_t *src, int stride, const uint8_t *obmc_weight, int yblen);
+    dirac_weight_func weight_func;
+    dirac_biweight_func biweight_func;
+
+    DiracFrame *current_picture;
+    DiracFrame *ref_pics[2];
+
+    DiracFrame *ref_frames[MAX_REFERENCE_FRAMES+1];
+    DiracFrame *delay_frames[MAX_DELAY+1];
+    DiracFrame all_frames[MAX_FRAMES];
+} DiracContext;
+
+enum dirac_subband {
+    subband_ll = 0,
+    subband_hl = 1,
+    subband_lh = 2,
+    subband_hh = 3,
+    subband_nb,
+};
+
+typedef struct SliceCoeffs {
+    int left;
+    int top;
+    int tot_h;
+    int tot_v;
+    int tot;
+} SliceCoeffs;
+
+#endif