diff mbox series

[FFmpeg-devel] JPEG-XL : Image Format Parser

Message ID 20200315184154.28685-2-varun.gupta140@gmail.com
State New
Headers show
Series [FFmpeg-devel] JPEG-XL : Image Format Parser
Related show

Checks

Context Check Description
andriy/ffmpeg-patchwork pending
andriy/ffmpeg-patchwork success Applied patch
andriy/ffmpeg-patchwork success Configure finished
andriy/ffmpeg-patchwork success Make finished
andriy/ffmpeg-patchwork success Make fate finished

Commit Message

Varun Gupta March 15, 2020, 6:41 p.m. UTC
From: Varun Gupta <gupvar@amazon.com>

---
 libavcodec/avcodec.h        |   1 +
 libavcodec/jpeg-xl.h        | 180 +++++++++
 libavcodec/jpeg-xl_parser.c | 707 ++++++++++++++++++++++++++++++++++++
 3 files changed, 888 insertions(+)
 create mode 100644 libavcodec/jpeg-xl.h
 create mode 100644 libavcodec/jpeg-xl_parser.c

Comments

Jai Luthra March 18, 2020, 10:46 a.m. UTC | #1
Hi Varun,

Set [RFC]/[WIP]/[GSOC] and other subject labels for patches that are not 
intended for merge review. From your first email it seems like your mailer 
mangled it. You can edit the .patch file before sending it via git send-email.

On Mon, Mar 16, 2020 at 12:11:54AM +0530, Varun Gupta wrote:
> [...]
>+
>+typedef struct JPEGXLParseContext {
>+    ParseContext pc;
>+    int state;
>+    int index;  // keeps track of number of bits read from the media file
>+    SizeHeader size;
>+    PreviewHeader preview;
>+    ImageMetadata metadata;
>+    AnimationHeader animation;
>+} JPEGXLParseContext;

Most of the decoding specific headers should be read in the decoder itself. 
The parser should only find the end of frames, and decoder can initalize other 
parameters from first packet (which would be header + frame) and use same 
initialized contexts for the subsequent frame packets. Take a look at other 
video and image parsers.

If you think that it won't be possible to find frame ends without reading all 
the headers then that is a different case. Even then you need to make sure 
your parameters reach the decoder module via AVCodecContext->priv_data.

But it is good that you read the spec and figured out the bitstream 
organization.

>+
>+static unsigned int bits_offset_enum(GetBitContext gb, int n, int offset) {
>+    unsigned int read_n_bits = get_bits(&gb, n);
>+    return read_n_bits + offset;
>+}
>+
>+// TODO -> add checks for buffer size overflow in between and ill formed checks
>+static int jpegxl_find_frame_end(JPEGXLParseContext *context, const uint8_t *buf,
>+                                 int buf_size) {
>+    int index, next = END_NOT_FOUND;
>+    GetBitContext gb;
>+    init_get_bits(&gb, buf, buf_size*8);

init_get_bits8 can be used here as your buf_size is in bytes.

>+    for (index = 0; index < buf_size*8; ) {
>+        if (!context->state) {
>+            if (get_bits(&gb, 8) == JPEG_XL_SIG_FF) {
>+                context->state = JPEGXL_SIG;

Any particular reason for choosing state as type int and not the enum type it 
is being set to?

You can add a state called JPEGXL_UNDEFINED=0 if you are using int just to 
handle that case

>+            } else {
>+                // TODO -> Bitstream is ill formed
>+            }
>+            index += 8;

GetBitContext maintains an internal index as well, look into how you can use 
it for your case.

>+            context->index += 8;
>+        } else if (context->state == JPEGXL_SIG) {
>+            if (get_bits(&gb, 8) == JPEG_XL_SIG_TYPE) {
>+                context->state = JPEGXL_SIZE_HEADER;
>+            } else {
>+                // TODO -> Bitstream is ill formed
>+            }
>+            index +=8;
>+            context->index += 8;
>+        } else if (context->state == JPEGXL_SIZE_HEADER) {
>+            // default values
>+            context->size.ysize_div8_minus_1 = 0;
>+            context->size.ysize_minus_1 = 0;
>+            context->size.xsize_div8_minus_1 = 0;
>+            context->size.xsize_minus_1 = 0;

To simplify code you can 0 initialize all structs at start by setting context 
struct to 0. Then you only need to change the non-zero init values.

>+
>+            unsigned int small = get_bits(&gb, 1);

I do not think this will compile with FFmpeg which is C89 standard. Declare 
all variables at top of block. Same with your usage of loop variables, define 
them beforehand.

>+            index++;
>+            context->index++;
>+            if (small) {
>+                context->size.ysize_div8_minus_1 = get_bits(&gb, 5);
>+                index += 5;
>+                context->index += 5;
>+            } else {
>+                unsigned int input = get_bits(&gb, 2); // U32() first reads 2 bits
>+                index += 2;
>+                context->index += 2;
>+                if (input == 0) {   // d0 = Bits(9)
>+                    context->size.ysize_minus_1 = get_bits(&gb, 9);
>+                    index += 9;
>+                    context->index += 9;
>+                } else if (input == 1) {   // d1 = Bits(13)
>+                    context->size.ysize_minus_1 = get_bits(&gb, 17);
>+                    index += 13;
>+                    context->index += 13;
>+                } else if (input == 2) {   // d2 = Bits(18)
>+                    context->size.ysize_minus_1 = get_bits_long(&gb, 18);
>+                    index += 18;
>+                    context->index += 18;
>+                } else {   // d3 = Bits(30)
>+                    context->size.ysize_minus_1 = get_bits_long(&gb, 30);
>+                    index += 30;
>+                    context->index += 30;
>+                }
>+            }

you could simplify this a lot by setting a temporary variable to hold the read 
bit size n using a switch case, and then read n bits into the ysize_minus_1. 

similar simplifications can be done at a lot of places.

>+            unsigned int ratio = get_bits(&gb, 3);
>+            index += 3;
>+            context->index += 3;
>+            if (ratio == 0) {
>+                if (small) {
>+                    context->size.xsize_div8_minus_1 = get_bits(&gb, 5);
>+                    index += 5;
>+                    context->index += 5;
>+                } else {
>+                    unsigned int input = get_bits(&gb, 2); // U32() first reads 2 bits
>+                    index += 2;
>+                    context->index += 2;
>+                    if (input == 0) {   // d0 = Bits(9)
>+                        context->size.xsize_minus_1 = get_bits(&gb, 9);
>+                        index += 9;
>+                        context->index += 9;
>+                    } else if (input == 1) {   // d1 = Bits(13)
>+                        context->size.xsize_minus_1 = get_bits(&gb, 17);
>+                        index += 13;
>+                        context->index += 13;
>+                    } else if (input == 2) {   // d2 = Bits(18)
>+                        context->size.xsize_minus_1 = get_bits_long(&gb, 18);
>+                        index += 18;
>+                        context->index += 18;
>+                    } else {   // d3 = Bits(30)
>+                        context->size.xsize_minus_1 = get_bits_long(&gb, 30);
>+                        index += 30;
>+                        context->index += 30;
>+                    }
>+                }
>+            }
>+            context->state = JPEGXL_IMAGE_METADATA;
>+        } else if (context->state == JPEGXL_IMAGE_METADATA) {
>+            // setting up default values
>+            context->metadata.have_icc = 0;
>+            context->metadata.alpha_bits = 0;
>+            context->metadata.bits_per_sample = 8;
>+            context->metadata.target_nits_div50 = 5;
>+            context->metadata.colour_encoding.received_icc = 0;
>+            context->metadata.colour_encoding.opaque_icc = 0;
>+            context->metadata.colour_encoding.colour_space = kRGB;
>+            context->metadata.colour_encoding.white_point = kD65;
>+            context->metadata.colour_encoding.primaries = kSRGB;
>+            context->metadata.colour_encoding.have_gamma = 0;
>+            context->metadata.colour_encoding.gamma = 0;
>+            context->metadata.colour_encoding.transfer_function = kSRGBTransferFunction;
>+            context->metadata.colour_encoding.rendering_intent = kRelative;
>+            context->metadata.m2.have_preview = 0;
>+            context->metadata.m2.have_animation = 0;
>+            context->metadata.m2.orientation_minus_1 = 0;
>+            context->metadata.m2.depth_bits = 0;
>+            context->metadata.m2.depth_shift = 0;
>+            context->metadata.m2.num_extra_channels = 0;
>+            context->metadata.m2.extra_channel_bits = 0;
>+
>+            context->metadata.all_default = get_bits(&gb, 1);
>+            index++;
>+            context->index++;
>+            if (!context->metadata.all_default) {
>+                context->metadata.have_icc = get_bits(&gb, 1);
>+                index++;
>+                context->index++;
>+
>+                unsigned int input = get_bits(&gb, 2); // U32() first reads 2 bits
>+                index += 2;
>+                context->index += 2;
>+                if (input == 0) {   // d0 = Val(8)
>+                    context->metadata.bits_per_sample = 8;
>+                } else if (input == 1) {  // d1 = Val(16)
>+                    context->metadata.bits_per_sample = 16;
>+                } else if (input == 2) {  // d2 = Val(32)
>+                    context->metadata.bits_per_sample = 32;

if (input >= 0 && input <= 2)
     context->metadata.bits_per_sample = 1 << (3 + input);

but current way wayis also OK for this case.

>+                } else {  // d3 = Bits(5)
>+                    context->metadata.bits_per_sample = get_bits(&gb, 5);
>+                    index += 5;
>+                    context->index += 5;
>+                }
>+
>+                context->metadata.colour_encoding.all_default = get_bits(&gb, 1);
>+                index++;
>+                context->index++;
>+                if(!context->metadata.colour_encoding.all_default) {
>+                    context->metadata.colour_encoding.received_icc = get_bits(&gb ,1);
>+                    index++;
>+                    context->index++;
>+                }
>+                if(context->metadata.colour_encoding.received_icc) {
>+                    context->metadata.colour_encoding.opaque_icc = get_bits(&gb, 1);
>+                    index++;
>+                    context->index++;
>+                }
>+                int use_desc = !context->metadata.colour_encoding.all_default &&
>+                               !context->metadata.colour_encoding.opaque_icc;
>+                if(use_desc) {  // colour_space enum
>+                    unsigned int input = get_bits(&gb, 2);
>+                    index += 2;
>+                    context->index += 2;
>+                    unsigned int enum_value;
>+                    if (input == 0) {
>+                        enum_value = 0;
>+                    } else if (input == 1) {
>+                        enum_value = 1;
>+                    } else if (input == 2) {
>+                        enum_value = bits_offset_enum(gb, 4, 2);
>+                        index += 4;
>+                        context->index += 4;
>+                    } else {
>+                        enum_value = bits_offset_enum(gb, 6, 18);
>+                        index += 6;
>+                        context->index += 6;
>+                    }
>+
>+                    if (checkIfValueInColourSpace(enum_value)) {
>+                        context->metadata.colour_encoding.colour_space = enum_value;
>+                    } else {
>+                        // TODO -> Bitstream is ill formed
>+                    }
>+                }
>+                int not_xy = context->metadata.colour_encoding.colour_space != kXYZ &&
>+                             context->metadata.colour_encoding.colour_space != kXYB;
>+                if(use_desc && not_xy) {  // white_point enum
>+                    unsigned int input = get_bits(&gb, 2);
>+                    index += 2;
>+                    context->index += 2;
>+                    unsigned int enum_value;
>+                    if (input == 0) {
>+                        enum_value = 0;
>+                    } else if (input == 1) {
>+                        enum_value = 1;
>+                    } else if (input == 2) {
>+                        enum_value = bits_offset_enum(gb, 4, 2);
>+                        index += 4;
>+                        context->index += 4;
>+                    } else {
>+                        enum_value = bits_offset_enum(gb, 6, 18);
>+                        index += 6;
>+                        context->index += 6;
>+                    }
>+
>+                    if (checkIfValueInWhitePoint(enum_value)) {
>+                        context->metadata.colour_encoding.white_point = enum_value;
>+                    } else {
>+                        // TODO -> Bitstream is ill formed
>+                    }
>+                }
>+                if (use_desc && context->metadata.colour_encoding.white_point == kCustom) {
>+                    // TODO -> Implement custom xy for white
>+                }
>+                if (use_desc && not_xy && context->metadata.colour_encoding.colour_space != kGrey) {   // primaries enum
>+                    unsigned int input = get_bits(&gb, 2);
>+                    index += 2;
>+                    context->index += 2;
>+                    unsigned int enum_value;
>+                    if (input == 0) {
>+                        enum_value = 0;
>+                    } else if (input == 1) {
>+                        enum_value = 1;
>+                    } else if (input == 2) {
>+                        enum_value = bits_offset_enum(gb, 4, 2);
>+                        index += 4;
>+                        context->index += 4;
>+                    } else {
>+                        enum_value = bits_offset_enum(gb, 6, 18);
>+                        index += 6;
>+                        context->index += 6;
>+                    }

this exact chunk of code was seen before (and seen later as well), consider 
moving it to a function.

>+
>+                    if (checkIfValueInPrimaries(enum_value)) {
>+                        context->metadata.colour_encoding.primaries = enum_value;
>+                    } else {
>+                        // TODO -> Bitstream is ill formed
>+                    }
>+                }
>+                if (use_desc && context->metadata.colour_encoding.primaries == kCustomPrimary) {
>+                    // TODO -> Implement custom xy for red
>+                }
>+                if (use_desc && context->metadata.colour_encoding.primaries == kCustomPrimary) {
>+                    // TODO -> Implement custom xy for green
>+                }
>+                if (use_desc && context->metadata.colour_encoding.primaries == kCustomPrimary) {
>+                    // TODO -> Implement custom xy for blue
>+                }
>+                if (use_desc && not_xy) {
>+                    context->metadata.colour_encoding.have_gamma = get_bits(&gb, 1);
>+                    index++;
>+                    context->index++;
>+                }
>+                if (use_desc && context->metadata.colour_encoding.have_gamma) {
>+                    get_bits_long(&gb, 24);
>+                    index += 24;
>+                    context->index += 24;
>+                }
>+                if (use_desc && !context->metadata.colour_encoding.have_gamma && not_xy) { // transfer_function enum
>+                    unsigned int input = get_bits(&gb, 2);
>+                    index += 2;
>+                    context->index += 2;
>+                    unsigned int enum_value;
>+                    if (input == 0) {
>+                        enum_value = 0;
>+                    } else if (input == 1) {
>+                        enum_value = 1;
>+                    } else if (input == 2) {
>+                        enum_value = bits_offset_enum(gb, 4, 2);
>+                        index += 4;
>+                        context->index += 4;
>+                    } else {
>+                        enum_value = bits_offset_enum(gb, 6, 18);
>+                        index += 6;
>+                        context->index += 6;
>+                    }
>+
>+                    if (checkIfValueInTransferFunction(enum_value)) {
>+                        context->metadata.colour_encoding.transfer_function = enum_value;
>+                    } else {
>+                        // TODO -> Bitstream is ill formed
>+                    }
>+                }
>+                if (use_desc && context->metadata.colour_encoding.colour_space != kGrey && not_xy) { // rendering_intent enum
>+                    unsigned int input = get_bits(&gb, 2);
>+                    index += 2;
>+                    context->index += 2;
>+                    unsigned int enum_value;
>+                    if (input == 0) {
>+                        enum_value = 0;
>+                    } else if (input == 1) {
>+                        enum_value = 1;
>+                    } else if (input == 2) {
>+                        enum_value = bits_offset_enum(gb, 4, 2);
>+                        index += 4;
>+                        context->index += 4;
>+                    } else {
>+                        enum_value = bits_offset_enum(gb, 6, 18);
>+                        index += 6;
>+                        context->index += 6;
>+                    }
>+
>+                    if (checkIfValueInRenderingIntent(enum_value)) {
>+                        context->metadata.colour_encoding.rendering_intent = enum_value;
>+                    } else {
>+                        // TODO -> Bitstream is ill formed
>+                    }
>+                }
>+
>+                input = get_bits(&gb, 2); // U32() first reads 2 bits
>+                index += 2;
>+                context->index += 2;
>+                if (input == 0) {   // d0 = Val(0)
>+                    context->metadata.alpha_bits = 0;
>+                } else if (input == 1) {  // d1 = Val(8)
>+                    context->metadata.alpha_bits = 8;
>+                } else if (input == 2) {  // d2 = Val(16)
>+                    context->metadata.alpha_bits = 16;
>+                } else {  // d3 = Bits(4)
>+                    context->metadata.alpha_bits = get_bits(&gb, 4);
>+                    index += 4;
>+                    context->index += 4;
>+                }

again very similar to a block before. it is ok if you wrote everything 
explicitly for testing, but do clean it out at some point.

> [...]


Cheers

--
Jai (darkapex)
diff mbox series

Patch

diff --git a/libavcodec/avcodec.h b/libavcodec/avcodec.h
index 5a0fc3405c..ecfa2e0009 100644
--- a/libavcodec/avcodec.h
+++ b/libavcodec/avcodec.h
@@ -463,6 +463,7 @@  enum AVCodecID {
     AV_CODEC_ID_MVDV,
     AV_CODEC_ID_MVHA,
     AV_CODEC_ID_CDTOONS,
+    AV_CODEC_ID_JPEGXL,
 
     /* various PCM "codecs" */
     AV_CODEC_ID_FIRST_AUDIO = 0x10000,     ///< A dummy id pointing at the start of audio codecs
diff --git a/libavcodec/jpeg-xl.h b/libavcodec/jpeg-xl.h
new file mode 100644
index 0000000000..7a5b5a351d
--- /dev/null
+++ b/libavcodec/jpeg-xl.h
@@ -0,0 +1,180 @@ 
+/*
+ * JPEG-XL format definitions
+ * Copyright (c) 2020 Varun Gupta
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+/**
+ * @file
+ * JPEG-XL format definitions.
+ */
+
+#ifndef AVCODEC_JPEG_XL_H
+#define AVCODEC_JPEG_XL_H
+
+#include <stdint.h>
+
+#define JPEG_XL_SIG_FF    0xff
+#define JPEG_XL_SIG_TYPE  0x0a
+
+typedef enum ColourSpace {
+    kRGB = 0,
+    kGrey,
+    kXYB,
+    kUnknown,
+    kXYZ
+} ColourSpace;
+
+typedef enum Primaries {
+    kSRGB = 1,
+    kCustomPrimary = 2,
+    k2100 = 9,
+    kP3 = 11
+} Primaries;
+
+typedef enum RenderingIntent {
+    kPerceptual = 0,
+    kRelative,
+    kSaturation,
+    kAbsolute
+} RenderingIntent;
+
+typedef enum WhitePoint {
+    kD65 = 1,
+    kCustom = 2,
+    kE = 10,
+    kDCI = 11
+} WhitePoint;
+
+typedef enum TransferFunction {
+    k709 = 1,
+    kUnknownTransferFunction = 2,
+    kLinear = 8,
+    kSRGBTransferFunction = 13,
+    kPQ = 16,
+    kDCITransferFunction = 17,
+    kHLG = 18
+} TransferFunction;
+
+typedef struct Customxy {
+    unsigned int x;
+    unsigned int y;
+} Customxy;
+
+typedef struct ExtraChannelInfo {
+    unsigned int meaning;
+    float red;
+    float green;
+    float blue;
+    float solidity;
+} ExtraChannelInfo;
+
+typedef enum JPEGXLParseStates {
+    JPEGXL_SIG = 1,
+    JPEGXL_SIZE_HEADER,
+    JPEGXL_IMAGE_METADATA,
+    JPEGXL_PREVIEW_HEADER,
+    JPEGXL_ANIMATION_HEADER,
+    JPEGXL_ICC_CODEC,
+    JPEGXL_PREVIEW_FRAME,
+    JPEGXL_FRAMES
+} jpegxl_states;
+
+typedef struct ColourEncoding {
+    unsigned int all_default;
+    unsigned int received_icc;
+    unsigned int opaque_icc;
+    ColourSpace colour_space;
+    WhitePoint white_point;
+    Customxy white;
+    Primaries primaries;
+    Customxy red;
+    Customxy green;
+    Customxy blue;
+    unsigned int have_gamma;
+    unsigned long int gamma;
+    TransferFunction transfer_function;
+    RenderingIntent rendering_intent;
+} ColourEncoding;
+
+typedef struct ImageMetadata2 {
+    unsigned int all_default;
+    unsigned int have_preview;
+    unsigned int have_animation;
+    unsigned int orientation_minus_1;
+    unsigned int depth_bits;
+    unsigned int depth_shift;
+    unsigned int num_extra_channels;
+    unsigned int extra_channel_bits;
+    ExtraChannelInfo* extra_channel_info;
+} ImageMetadata2;
+
+typedef struct SizeHeader {
+    unsigned long int ysize_div8_minus_1;
+    unsigned long int ysize_minus_1;
+    unsigned long int xsize_div8_minus_1;
+    unsigned long int xsize_minus_1;
+} SizeHeader;
+
+typedef struct PreviewHeader {
+    unsigned long int ysize_div8_minus_1;
+    unsigned long int ysize_minus_1;
+    unsigned long int xsize_div8_minus_1;
+    unsigned long int xsize_minus_1;
+} PreviewHeader;
+
+typedef struct ImageMetadata {
+    unsigned int all_default;
+    unsigned int have_icc;
+    unsigned int bits_per_sample;
+    unsigned int alpha_bits;
+    unsigned int target_nits_div50;
+    ColourEncoding colour_encoding;
+    ImageMetadata2 m2;
+} ImageMetadata;
+
+typedef struct AnimationHeader {
+    unsigned int composite_still;
+    unsigned long int tps_numerator_minus_1;
+    unsigned int tps_denominator_minus_1;
+    unsigned long int num_loops;
+    unsigned int have_timecodes;
+} AnimationHeader;
+
+static int checkIfValueInColourSpace (unsigned int value) {
+    return value >=0 && value <= 4;
+}
+
+static int checkIfValueInPrimaries (unsigned int value) {
+    return value == 1 || value == 2 || value == 9 || value == 11;
+}
+
+static int checkIfValueInRenderingIntent (unsigned int value) {
+    return value >=0 && value <= 3;
+}
+
+static int checkIfValueInWhitePoint (unsigned int value) {
+    return value == 1 || value == 2 || value == 10 || value == 11;
+}
+
+static int checkIfValueInTransferFunction (unsigned int value) {
+    return value == 1 || value == 2 || value == 8 || value == 13 ||
+         value == 16 || value == 17 || value == 18;
+}
+
+#endif //AVCODEC_JPEG_XL_H
diff --git a/libavcodec/jpeg-xl_parser.c b/libavcodec/jpeg-xl_parser.c
new file mode 100644
index 0000000000..e8d9007970
--- /dev/null
+++ b/libavcodec/jpeg-xl_parser.c
@@ -0,0 +1,707 @@ 
+/*
+ * JPEG-XL parser
+ * Copyright (c) 2020 Varun Gupta
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+/**
+ * @file
+ * JPEG-XL parser
+ */
+
+#include "libavutil/avassert.h"
+#include "libavutil/bswap.h"
+#include "libavutil/common.h"
+
+#include "jpeg-xl.h"
+#include "parser.h"
+#include "get_bits.h"
+
+typedef struct JPEGXLParseContext {
+    ParseContext pc;
+    int state;
+    int index;  // keeps track of number of bits read from the media file
+    SizeHeader size;
+    PreviewHeader preview;
+    ImageMetadata metadata;
+    AnimationHeader animation;
+} JPEGXLParseContext;
+
+static unsigned int bits_offset_enum(GetBitContext gb, int n, int offset) {
+    unsigned int read_n_bits = get_bits(&gb, n);
+    return read_n_bits + offset;
+}
+
+// TODO -> add checks for buffer size overflow in between and ill formed checks
+static int jpegxl_find_frame_end(JPEGXLParseContext *context, const uint8_t *buf,
+                                 int buf_size) {
+    int index, next = END_NOT_FOUND;
+    GetBitContext gb;
+    init_get_bits(&gb, buf, buf_size*8);
+    for (index = 0; index < buf_size*8; ) {
+        if (!context->state) {
+            if (get_bits(&gb, 8) == JPEG_XL_SIG_FF) {
+                context->state = JPEGXL_SIG;
+            } else {
+                // TODO -> Bitstream is ill formed
+            }
+            index += 8;
+            context->index += 8;
+        } else if (context->state == JPEGXL_SIG) {
+            if (get_bits(&gb, 8) == JPEG_XL_SIG_TYPE) {
+                context->state = JPEGXL_SIZE_HEADER;
+            } else {
+                // TODO -> Bitstream is ill formed
+            }
+            index +=8;
+            context->index += 8;
+        } else if (context->state == JPEGXL_SIZE_HEADER) {
+            // default values
+            context->size.ysize_div8_minus_1 = 0;
+            context->size.ysize_minus_1 = 0;
+            context->size.xsize_div8_minus_1 = 0;
+            context->size.xsize_minus_1 = 0;
+
+            unsigned int small = get_bits(&gb, 1);
+            index++;
+            context->index++;
+            if (small) {
+                context->size.ysize_div8_minus_1 = get_bits(&gb, 5);
+                index += 5;
+                context->index += 5;
+            } else {
+                unsigned int input = get_bits(&gb, 2); // U32() first reads 2 bits
+                index += 2;
+                context->index += 2;
+                if (input == 0) {   // d0 = Bits(9)
+                    context->size.ysize_minus_1 = get_bits(&gb, 9);
+                    index += 9;
+                    context->index += 9;
+                } else if (input == 1) {   // d1 = Bits(13)
+                    context->size.ysize_minus_1 = get_bits(&gb, 17);
+                    index += 13;
+                    context->index += 13;
+                } else if (input == 2) {   // d2 = Bits(18)
+                    context->size.ysize_minus_1 = get_bits_long(&gb, 18);
+                    index += 18;
+                    context->index += 18;
+                } else {   // d3 = Bits(30)
+                    context->size.ysize_minus_1 = get_bits_long(&gb, 30);
+                    index += 30;
+                    context->index += 30;
+                }
+            }
+            unsigned int ratio = get_bits(&gb, 3);
+            index += 3;
+            context->index += 3;
+            if (ratio == 0) {
+                if (small) {
+                    context->size.xsize_div8_minus_1 = get_bits(&gb, 5);
+                    index += 5;
+                    context->index += 5;
+                } else {
+                    unsigned int input = get_bits(&gb, 2); // U32() first reads 2 bits
+                    index += 2;
+                    context->index += 2;
+                    if (input == 0) {   // d0 = Bits(9)
+                        context->size.xsize_minus_1 = get_bits(&gb, 9);
+                        index += 9;
+                        context->index += 9;
+                    } else if (input == 1) {   // d1 = Bits(13)
+                        context->size.xsize_minus_1 = get_bits(&gb, 17);
+                        index += 13;
+                        context->index += 13;
+                    } else if (input == 2) {   // d2 = Bits(18)
+                        context->size.xsize_minus_1 = get_bits_long(&gb, 18);
+                        index += 18;
+                        context->index += 18;
+                    } else {   // d3 = Bits(30)
+                        context->size.xsize_minus_1 = get_bits_long(&gb, 30);
+                        index += 30;
+                        context->index += 30;
+                    }
+                }
+            }
+            context->state = JPEGXL_IMAGE_METADATA;
+        } else if (context->state == JPEGXL_IMAGE_METADATA) {
+            // setting up default values
+            context->metadata.have_icc = 0;
+            context->metadata.alpha_bits = 0;
+            context->metadata.bits_per_sample = 8;
+            context->metadata.target_nits_div50 = 5;
+            context->metadata.colour_encoding.received_icc = 0;
+            context->metadata.colour_encoding.opaque_icc = 0;
+            context->metadata.colour_encoding.colour_space = kRGB;
+            context->metadata.colour_encoding.white_point = kD65;
+            context->metadata.colour_encoding.primaries = kSRGB;
+            context->metadata.colour_encoding.have_gamma = 0;
+            context->metadata.colour_encoding.gamma = 0;
+            context->metadata.colour_encoding.transfer_function = kSRGBTransferFunction;
+            context->metadata.colour_encoding.rendering_intent = kRelative;
+            context->metadata.m2.have_preview = 0;
+            context->metadata.m2.have_animation = 0;
+            context->metadata.m2.orientation_minus_1 = 0;
+            context->metadata.m2.depth_bits = 0;
+            context->metadata.m2.depth_shift = 0;
+            context->metadata.m2.num_extra_channels = 0;
+            context->metadata.m2.extra_channel_bits = 0;
+
+            context->metadata.all_default = get_bits(&gb, 1);
+            index++;
+            context->index++;
+            if (!context->metadata.all_default) {
+                context->metadata.have_icc = get_bits(&gb, 1);
+                index++;
+                context->index++;
+
+                unsigned int input = get_bits(&gb, 2); // U32() first reads 2 bits
+                index += 2;
+                context->index += 2;
+                if (input == 0) {   // d0 = Val(8)
+                    context->metadata.bits_per_sample = 8;
+                } else if (input == 1) {  // d1 = Val(16)
+                    context->metadata.bits_per_sample = 16;
+                } else if (input == 2) {  // d2 = Val(32)
+                    context->metadata.bits_per_sample = 32;
+                } else {  // d3 = Bits(5)
+                    context->metadata.bits_per_sample = get_bits(&gb, 5);
+                    index += 5;
+                    context->index += 5;
+                }
+
+                context->metadata.colour_encoding.all_default = get_bits(&gb, 1);
+                index++;
+                context->index++;
+                if(!context->metadata.colour_encoding.all_default) {
+                    context->metadata.colour_encoding.received_icc = get_bits(&gb ,1);
+                    index++;
+                    context->index++;
+                }
+                if(context->metadata.colour_encoding.received_icc) {
+                    context->metadata.colour_encoding.opaque_icc = get_bits(&gb, 1);
+                    index++;
+                    context->index++;
+                }
+                int use_desc = !context->metadata.colour_encoding.all_default &&
+                               !context->metadata.colour_encoding.opaque_icc;
+                if(use_desc) {  // colour_space enum
+                    unsigned int input = get_bits(&gb, 2);
+                    index += 2;
+                    context->index += 2;
+                    unsigned int enum_value;
+                    if (input == 0) {
+                        enum_value = 0;
+                    } else if (input == 1) {
+                        enum_value = 1;
+                    } else if (input == 2) {
+                        enum_value = bits_offset_enum(gb, 4, 2);
+                        index += 4;
+                        context->index += 4;
+                    } else {
+                        enum_value = bits_offset_enum(gb, 6, 18);
+                        index += 6;
+                        context->index += 6;
+                    }
+
+                    if (checkIfValueInColourSpace(enum_value)) {
+                        context->metadata.colour_encoding.colour_space = enum_value;
+                    } else {
+                        // TODO -> Bitstream is ill formed
+                    }
+                }
+                int not_xy = context->metadata.colour_encoding.colour_space != kXYZ &&
+                             context->metadata.colour_encoding.colour_space != kXYB;
+                if(use_desc && not_xy) {  // white_point enum
+                    unsigned int input = get_bits(&gb, 2);
+                    index += 2;
+                    context->index += 2;
+                    unsigned int enum_value;
+                    if (input == 0) {
+                        enum_value = 0;
+                    } else if (input == 1) {
+                        enum_value = 1;
+                    } else if (input == 2) {
+                        enum_value = bits_offset_enum(gb, 4, 2);
+                        index += 4;
+                        context->index += 4;
+                    } else {
+                        enum_value = bits_offset_enum(gb, 6, 18);
+                        index += 6;
+                        context->index += 6;
+                    }
+
+                    if (checkIfValueInWhitePoint(enum_value)) {
+                        context->metadata.colour_encoding.white_point = enum_value;
+                    } else {
+                        // TODO -> Bitstream is ill formed
+                    }
+                }
+                if (use_desc && context->metadata.colour_encoding.white_point == kCustom) {
+                    // TODO -> Implement custom xy for white
+                }
+                if (use_desc && not_xy && context->metadata.colour_encoding.colour_space != kGrey) {   // primaries enum
+                    unsigned int input = get_bits(&gb, 2);
+                    index += 2;
+                    context->index += 2;
+                    unsigned int enum_value;
+                    if (input == 0) {
+                        enum_value = 0;
+                    } else if (input == 1) {
+                        enum_value = 1;
+                    } else if (input == 2) {
+                        enum_value = bits_offset_enum(gb, 4, 2);
+                        index += 4;
+                        context->index += 4;
+                    } else {
+                        enum_value = bits_offset_enum(gb, 6, 18);
+                        index += 6;
+                        context->index += 6;
+                    }
+
+                    if (checkIfValueInPrimaries(enum_value)) {
+                        context->metadata.colour_encoding.primaries = enum_value;
+                    } else {
+                        // TODO -> Bitstream is ill formed
+                    }
+                }
+                if (use_desc && context->metadata.colour_encoding.primaries == kCustomPrimary) {
+                    // TODO -> Implement custom xy for red
+                }
+                if (use_desc && context->metadata.colour_encoding.primaries == kCustomPrimary) {
+                    // TODO -> Implement custom xy for green
+                }
+                if (use_desc && context->metadata.colour_encoding.primaries == kCustomPrimary) {
+                    // TODO -> Implement custom xy for blue
+                }
+                if (use_desc && not_xy) {
+                    context->metadata.colour_encoding.have_gamma = get_bits(&gb, 1);
+                    index++;
+                    context->index++;
+                }
+                if (use_desc && context->metadata.colour_encoding.have_gamma) {
+                    get_bits_long(&gb, 24);
+                    index += 24;
+                    context->index += 24;
+                }
+                if (use_desc && !context->metadata.colour_encoding.have_gamma && not_xy) { // transfer_function enum
+                    unsigned int input = get_bits(&gb, 2);
+                    index += 2;
+                    context->index += 2;
+                    unsigned int enum_value;
+                    if (input == 0) {
+                        enum_value = 0;
+                    } else if (input == 1) {
+                        enum_value = 1;
+                    } else if (input == 2) {
+                        enum_value = bits_offset_enum(gb, 4, 2);
+                        index += 4;
+                        context->index += 4;
+                    } else {
+                        enum_value = bits_offset_enum(gb, 6, 18);
+                        index += 6;
+                        context->index += 6;
+                    }
+
+                    if (checkIfValueInTransferFunction(enum_value)) {
+                        context->metadata.colour_encoding.transfer_function = enum_value;
+                    } else {
+                        // TODO -> Bitstream is ill formed
+                    }
+                }
+                if (use_desc && context->metadata.colour_encoding.colour_space != kGrey && not_xy) { // rendering_intent enum
+                    unsigned int input = get_bits(&gb, 2);
+                    index += 2;
+                    context->index += 2;
+                    unsigned int enum_value;
+                    if (input == 0) {
+                        enum_value = 0;
+                    } else if (input == 1) {
+                        enum_value = 1;
+                    } else if (input == 2) {
+                        enum_value = bits_offset_enum(gb, 4, 2);
+                        index += 4;
+                        context->index += 4;
+                    } else {
+                        enum_value = bits_offset_enum(gb, 6, 18);
+                        index += 6;
+                        context->index += 6;
+                    }
+
+                    if (checkIfValueInRenderingIntent(enum_value)) {
+                        context->metadata.colour_encoding.rendering_intent = enum_value;
+                    } else {
+                        // TODO -> Bitstream is ill formed
+                    }
+                }
+
+                input = get_bits(&gb, 2); // U32() first reads 2 bits
+                index += 2;
+                context->index += 2;
+                if (input == 0) {   // d0 = Val(0)
+                    context->metadata.alpha_bits = 0;
+                } else if (input == 1) {  // d1 = Val(8)
+                    context->metadata.alpha_bits = 8;
+                } else if (input == 2) {  // d2 = Val(16)
+                    context->metadata.alpha_bits = 16;
+                } else {  // d3 = Bits(4)
+                    context->metadata.alpha_bits = get_bits(&gb, 4);
+                    index += 4;
+                    context->index += 4;
+                }
+
+                input = get_bits(&gb, 2); // U32() first reads 2 bits
+                index += 2;
+                context->index += 2;
+                if (input == 0) {   // d0 = Val(5)
+                    context->metadata.target_nits_div50 = 5;
+                } else if (input == 1) {  // d1 = Val(20)
+                    context->metadata.target_nits_div50 = 20;
+                } else if (input == 2) {  // d2 = Val(80)
+                    context->metadata.target_nits_div50 = 80;
+                } else {  // d3 = BitsOffset(10,1)
+                    context->metadata.target_nits_div50 = get_bits(&gb, 10) + 1;
+                    index += 10;
+                    context->index += 10;
+                }
+
+                context->metadata.m2.all_default = get_bits(&gb, 1);
+                index++;
+                context->index++;
+                if (!context->metadata.m2.all_default) {
+                    context->metadata.m2.have_preview = get_bits(&gb, 1);
+                    index++;
+                    context->index++;
+
+                    context->metadata.m2.have_animation = get_bits(&gb, 1);
+                    index++;
+                    context->index++;
+
+                    context->metadata.m2.orientation_minus_1 = get_bits(&gb, 3);
+                    index +=3;
+                    context->index +=3;
+
+                    input = get_bits(&gb, 2); // U32() first reads 2 bits
+                    index += 2;
+                    context->index += 2;
+                    if (input == 0) {   // d0 = Val(0)
+                        context->metadata.m2.depth_bits = 0;
+                    } else if (input == 1) {  // d1 = Val(8)
+                        context->metadata.m2.depth_bits = 8;
+                    } else if (input == 2) {  // d2 = Val(16)
+                        context->metadata.m2.depth_bits = 16;
+                    } else {  // d3 = Bits(4)
+                        context->metadata.m2.depth_bits = get_bits(&gb, 4);
+                        index += 4;
+                        context->index += 4;
+                    }
+
+                    input = get_bits(&gb, 2); // U32() first reads 2 bits
+                    index += 2;
+                    context->index += 2;
+                    if (input == 0) {   // d0 = Val(0)
+                        context->metadata.m2.depth_shift = 0;
+                    } else if (input == 1) {  // d1 = Val(3)
+                        context->metadata.m2.depth_shift = 3;
+                    } else if (input == 2) {  // d2 = Val(4)
+                        context->metadata.m2.depth_shift = 4;
+                    } else {  // d3 = BitsOffset(3,1)
+                        context->metadata.m2.depth_shift = get_bits(&gb, 3) + 1;
+                        index += 3;
+                        context->index += 3;
+                    }
+
+                    input = get_bits(&gb, 2); // U32() first reads 2 bits
+                    index += 2;
+                    context->index += 2;
+                    if (input == 0) {   // d0 = Val(0)
+                        context->metadata.m2.num_extra_channels = 0;
+                    } else if (input == 1) {  // d1 = Bits(4)
+                        context->metadata.m2.num_extra_channels = get_bits(&gb, 4);
+                        index += 4;
+                        context->index += 4;
+                    } else if (input == 2) {  // d2 = BitsOffset(8,16)
+                        context->metadata.m2.num_extra_channels = get_bits(&gb, 8) + 16;
+                        index += 8;
+                        context->index += 8;
+                    } else {  // d3 = BitsOffset(12,1)
+                        context->metadata.m2.num_extra_channels = get_bits(&gb, 12) + 1;
+                        index += 12;
+                        context->index += 12;
+                    }
+                }
+                if (context->metadata.m2.num_extra_channels > 0) {
+                    input = get_bits(&gb, 2); // U32() first reads 2 bits
+                    index += 2;
+                    context->index += 2;
+                    if (input == 0) {   // d0 = Val(0)
+                        context->metadata.m2.extra_channel_bits = 0;
+                    } else if (input == 1) {  // d1 = Val(8)
+                        context->metadata.m2.extra_channel_bits = 8;
+                    } else if (input == 2) {  // d2 = Val(16)
+                        context->metadata.m2.extra_channel_bits = 16;
+                    } else {  // d3 = Bits(4)
+                        context->metadata.m2.extra_channel_bits = get_bits(&gb, 4);
+                        index += 4;
+                        context->index += 4;
+                    }
+                }
+                context->metadata.m2.extra_channel_info = (ExtraChannelInfo*) malloc (context->metadata.m2.num_extra_channels * sizeof(ExtraChannelInfo));
+                for (int channel = 0; channel < context->metadata.m2.num_extra_channels; channel++) {
+                    input = get_bits(&gb, 2); // U32() first reads 2 bits
+                    index += 2;
+                    context->index += 2;
+                    if (input == 0) {   // d0 = Val(0)
+                        context->metadata.m2.extra_channel_info[channel].meaning = 0;
+                    } else if (input == 1) {  // d1 = Val(1)
+                        context->metadata.m2.extra_channel_info[channel].meaning = 1;
+                    } else if (input == 2) {  // d2 = Val(2)
+                      context->metadata.m2.extra_channel_info[channel].meaning = 2;
+                    } else {  // d3 = Bits(6)
+                        context->metadata.m2.extra_channel_info[channel].meaning = get_bits(&gb, 6);
+                        index += 6;
+                        context->index += 6;
+                    }
+
+                    // default values
+                    context->metadata.m2.extra_channel_info[channel].red = 0;
+                    context->metadata.m2.extra_channel_info[channel].green = 0;
+                    context->metadata.m2.extra_channel_info[channel].blue = 0;
+                    context->metadata.m2.extra_channel_info[channel].solidity = 0;
+
+                    if (context->metadata.m2.extra_channel_info[channel].meaning == 1) {
+                        // TODO -> Implement f16() for blue, red, green, solidity
+                    }
+                }
+                if (!context->metadata.m2.all_default) {
+                    // TODO -> Implement extensions
+                }
+            }
+            context->state = JPEGXL_PREVIEW_HEADER;
+        } else if (context->state == JPEGXL_PREVIEW_HEADER) {
+            // default values
+            context->preview.xsize_minus_1 = 0;
+            context->preview.xsize_div8_minus_1 = 0;
+            context->preview.ysize_minus_1 = 0;
+            context->preview.ysize_div8_minus_1 = 0;
+
+            if (!context->metadata.m2.have_preview) {
+                  context->state = JPEGXL_ANIMATION_HEADER;
+                  continue;
+            }
+
+            unsigned int div8 = get_bits(&gb, 1);
+            index++;
+            context->index++;
+            if (div8) {
+                unsigned int input = get_bits(&gb, 2); // U32() first reads 2 bits
+                index += 2;
+                context->index += 2;
+                if (input == 0) {   // d0 = Val(15)
+                    context->preview.ysize_div8_minus_1 = 15;
+                } else if (input == 1) {   // d1 = Val(31)
+                    context->preview.ysize_div8_minus_1 = 31;
+                } else if (input == 2) {   // d2 = Bits(5)
+                    context->preview.ysize_div8_minus_1 = get_bits(&gb, 5);
+                    index += 5;
+                    context->index += 5;
+                } else {   // d3 = BitsOffset(9,32)
+                    context->preview.ysize_div8_minus_1 = get_bits(&gb, 9) + 32;
+                    index += 9;
+                    context->index += 9;
+                }
+            } else {
+                unsigned int input = get_bits(&gb, 2); // U32() first reads 2 bits
+                index += 2;
+                context->index += 2;
+                if (input == 0) {   // d0 = Bits(6)
+                    context->preview.ysize_minus_1 = get_bits(&gb, 6);
+                    index += 6;
+                    context->index += 6;
+                } else if (input == 1) {   // d1 = BitsOffset(8,64)
+                    context->preview.ysize_minus_1 = get_bits(&gb, 8) + 64;
+                    index += 8;
+                    context->index += 8;
+                } else if (input == 2) {   // d2 = BitsOffset(10,320)
+                    context->preview.ysize_minus_1 = get_bits(&gb, 10) + 320;
+                    index += 10;
+                    context->index += 10;
+                } else {   // d3 = BitsOffset(12,1344)
+                    context->preview.ysize_minus_1 = get_bits(&gb, 12) + 1344;
+                    index += 12;
+                    context->index += 12;
+                }
+            }
+            unsigned int ratio = get_bits(&gb, 3);
+            index += 3;
+            context->index += 3;
+            if (ratio == 0) {
+                if (div8) {
+                    unsigned int input = get_bits(&gb, 2); // U32() first reads 2 bits
+                    index += 2;
+                    context->index += 2;
+                    if (input == 0) {   // d0 = Val(15)
+                        context->preview.xsize_div8_minus_1 = 15;
+                    } else if (input == 1) {   // d1 = Val(31)
+                        context->preview.xsize_div8_minus_1 = 31;
+                    } else if (input == 2) {   // d2 = Bits(5)
+                        context->preview.xsize_div8_minus_1 = get_bits(&gb, 5);
+                        index += 5;
+                        context->index += 5;
+                    } else {   // d3 = BitsOffset(9,32)
+                        context->preview.xsize_div8_minus_1 = get_bits(&gb, 9) + 32;
+                        index += 9;
+                        context->index += 9;
+                    }
+                } else {
+                    unsigned int input = get_bits(&gb, 2); // U32() first reads 2 bits
+                    index += 2;
+                    context->index += 2;
+                    if (input == 0) {   // d0 = Bits(6)
+                        context->preview.xsize_minus_1 = get_bits(&gb, 6);
+                        index += 6;
+                        context->index += 6;
+                    } else if (input == 1) {   // d1 = BitsOffset(8,64)
+                        context->preview.xsize_minus_1 = get_bits(&gb, 8) + 64;
+                        index += 8;
+                        context->index += 8;
+                    } else if (input == 2) {   // d2 = BitsOffset(10,320)
+                        context->preview.xsize_minus_1 = get_bits(&gb, 10) + 320;
+                        index += 10;
+                        context->index += 10;
+                    } else {   // d3 = BitsOffset(12,1344)
+                        context->preview.xsize_minus_1 = get_bits(&gb, 12) + 1344;
+                        index += 12;
+                        context->index += 12;
+                    }
+                }
+            }
+            context->state = JPEGXL_ANIMATION_HEADER;
+        } else if (context->state == JPEGXL_ANIMATION_HEADER) {
+            // default values
+            context->animation.num_loops = 0;
+            context->animation.tps_denominator_minus_1 = 0;
+            context->animation.tps_numerator_minus_1 = 0;
+            context->animation.have_timecodes = 0;
+
+            if (!context->metadata.m2.have_animation) {
+                context->state = JPEGXL_ICC_CODEC;
+                continue;
+            }
+
+            context->animation.composite_still = get_bits(&gb, 1);
+            index++;
+            context->index++;
+            if (!context->animation.composite_still) {
+                unsigned int input = get_bits(&gb, 2); // U32() first reads 2 bits
+                index += 2;
+                context->index += 2;
+                if (input == 0) {   // d0 = Val(99)
+                    context->animation.tps_numerator_minus_1 = 99;
+                } else if (input == 1) {   // d1 = Val(999)
+                    context->animation.tps_numerator_minus_1 = 999;
+                } else if (input == 2) {   // d2 = Bits(6)
+                    context->animation.tps_numerator_minus_1 = get_bits(&gb, 6);
+                    index += 6;
+                    context->index += 6;
+                } else {   // d3 = Bits(18)
+                    context->animation.tps_numerator_minus_1 = get_bits_long(&gb, 18);
+                    index += 18;
+                    context->index += 18;
+                }
+
+                input = get_bits(&gb, 2); // U32() first reads 2 bits
+                index += 2;
+                context->index += 2;
+                if (input == 0) {   // d0 = Val(0)
+                    context->animation.tps_denominator_minus_1 = 0;
+                } else if (input == 1) {   // d1 = Val(1000)
+                    context->animation.tps_denominator_minus_1 = 1000;
+                } else if (input == 2) {   // d2 = Bits(8)
+                    context->animation.tps_denominator_minus_1 = get_bits(&gb, 8);
+                    index += 8;
+                    context->index += 8;
+                } else {   // d3 = Bits(10)
+                    context->animation.tps_denominator_minus_1 = get_bits_long(&gb, 10);
+                    index += 10;
+                    context->index += 10;
+                }
+
+                input = get_bits(&gb, 2); // U32() first reads 2 bits
+                index += 2;
+                context->index += 2;
+                if (input == 0) {   // d0 = Val(0)
+                    context->animation.num_loops = 0;
+                } else if (input == 1) {   // d1 = Bits(3)
+                    context->animation.num_loops = get_bits(&gb, 3);
+                    index += 3;
+                    context->index += 3;
+                } else if (input == 2) {   // d2 = Bits(16)
+                    context->animation.num_loops = get_bits(&gb, 16);
+                    index += 16;
+                    context->index += 16;
+                } else {   // d3 = Bits(32)
+                    context->animation.num_loops = get_bits_long(&gb, 32);
+                    index += 32;
+                    context->index += 32;
+                }
+
+                context->animation.have_timecodes = get_bits(&gb, 1);
+                index++;
+                context->index++;
+            }
+            context->state = JPEGXL_ICC_CODEC;
+        } else if (context->state == JPEGXL_ICC_CODEC) {
+            if (!context->metadata.have_icc) {
+                context->state = JPEGXL_PREVIEW_FRAME;
+                continue;
+            }
+            // TODO -> Handle ICC profile
+        } else if (context->state == JPEGXL_PREVIEW_FRAME) {
+            // TODO
+        } else if (context->state == JPEGXL_FRAMES) {
+            //TODO
+        }
+    }
+
+    return next;
+}
+
+static int jpegxl_parse(AVCodecParserContext *s, AVCodecContext *avctx,
+                     const uint8_t **poutbuf, int *poutbuf_size,
+                     const uint8_t *buf, int buf_size) {
+    JPEGXLParseContext *context = s->priv_data;
+    int next;
+
+    next = jpegxl_find_frame_end(context, buf, buf_size);
+    if (ff_combine_frame(&context->pc, next, &buf, &buf_size) < 0) {
+        *poutbuf      = NULL;
+        *poutbuf_size = 0;
+        return buf_size;
+    }
+
+    *poutbuf      = buf;
+    *poutbuf_size = buf_size;
+    return next;
+}
+
+AVCodecParser ff_jpegxl_parser = {
+        .codec_ids      = { AV_CODEC_ID_JPEGXL },
+        .priv_data_size = sizeof(JPEGXLParseContext),
+        .parser_parse   = jpegxl_parse,
+        .parser_close   = ff_parse_close,
+};
\ No newline at end of file