diff mbox series

[FFmpeg-devel,6/6] avcodec/dovi_rpu: parse extension blocks

Message ID 20240323173735.26224-6-ffmpeg@haasn.xyz
State New
Headers show
Series [FFmpeg-devel,1/6] avutil/dovi_meta: add AVDOVIDataMapping.nlq_pivots | expand

Checks

Context Check Description
andriy/make_x86 success Make finished
andriy/make_fate_x86 success Make fate finished
yinshiyou/configure_loongarch64 warning Failed to apply patch

Commit Message

Niklas Haas March 23, 2024, 5:37 p.m. UTC
From: Niklas Haas <git@haasn.dev>

We split the inner loop between v1 and v2 extension blocks to print
a warning where an extension block was encountered in an unexpected
context.

Co-authored-by: quietvoid <tcChlisop0@gmail.com>
---
 libavcodec/dovi_rpu.c | 178 ++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 178 insertions(+)

Comments

Andreas Rheinhardt March 23, 2024, 6 p.m. UTC | #1
Niklas Haas:
> From: Niklas Haas <git@haasn.dev>
> 
> We split the inner loop between v1 and v2 extension blocks to print
> a warning where an extension block was encountered in an unexpected
> context.
> 
> Co-authored-by: quietvoid <tcChlisop0@gmail.com>
> ---
>  libavcodec/dovi_rpu.c | 178 ++++++++++++++++++++++++++++++++++++++++++
>  1 file changed, 178 insertions(+)
> 
> diff --git a/libavcodec/dovi_rpu.c b/libavcodec/dovi_rpu.c
> index 21cb1850e3e..99a983f65dd 100644
> --- a/libavcodec/dovi_rpu.c
> +++ b/libavcodec/dovi_rpu.c
> @@ -199,6 +199,174 @@ static inline unsigned get_variable_bits(GetBitContext *gb, int n)
>          }                                                                       \
>      } while (0)
>  
> +static void parse_ext_v1(DOVIContext *s, GetBitContext *gb, AVDOVIDmData *dm)
> +{
> +    switch (dm->level) {
> +    case 1:
> +        dm->l1.min_pq = get_bits(gb, 12);
> +        dm->l1.max_pq = get_bits(gb, 12);
> +        dm->l1.avg_pq = get_bits(gb, 12);
> +        break;
> +    case 2:
> +        dm->l2.target_max_pq = get_bits(gb, 12);
> +        dm->l2.trim_slope = get_bits(gb, 12);
> +        dm->l2.trim_offset = get_bits(gb, 12);
> +        dm->l2.trim_power = get_bits(gb, 12);
> +        dm->l2.trim_chroma_weight = get_bits(gb, 12);
> +        dm->l2.trim_saturation_gain = get_bits(gb, 12);
> +        dm->l2.ms_weight = get_bits(gb, 13) - 8192;
> +        break;
> +    case 4:
> +        dm->l4.anchor_pq = get_bits(gb, 12);
> +        dm->l4.anchor_power = get_bits(gb, 12);
> +        break;
> +    case 5:
> +        dm->l5.left_offset = get_bits(gb, 13);
> +        dm->l5.right_offset = get_bits(gb, 13);
> +        dm->l5.top_offset = get_bits(gb, 13);
> +        dm->l5.bottom_offset = get_bits(gb, 13);
> +        break;
> +    case 6:
> +        dm->l6.max_luminance = get_bits(gb, 16);
> +        dm->l6.min_luminance = get_bits(gb, 16);
> +        dm->l6.max_cll = get_bits(gb, 16);
> +        dm->l6.max_fall = get_bits(gb, 16);
> +        break;
> +    case 255:
> +        dm->l255.dm_run_mode = get_bits(gb, 8);
> +        dm->l255.dm_run_version = get_bits(gb, 8);
> +        for (int i = 0; i < 4; i++)
> +            dm->l255.dm_debug[i] = get_bits(gb, 8);
> +        break;
> +    default:
> +        av_log(s->logctx, AV_LOG_WARNING,
> +               "Unknown Dolby Vision DM v1 level: %u\n", dm->level);
> +    }
> +}
> +
> +static inline AVCIExy get_cie_xy(GetBitContext *gb)
> +{
> +    const int denom = 32767;
> +    return (AVCIExy) {
> +        .x = { get_sbits(gb, 16), denom },
> +        .y = { get_sbits(gb, 16), denom },
> +    };

The order of initializations is undefined.

> +}
> +
> +static void parse_ext_v2(DOVIContext *s, GetBitContext *gb, AVDOVIDmData *dm,
> +                         int ext_block_length)
> +{
> +    switch (dm->level) {
> +    case 3:
> +        dm->l3.min_pq_offset = get_bits(gb, 12);
> +        dm->l3.max_pq_offset = get_bits(gb, 12);
> +        dm->l3.avg_pq_offset = get_bits(gb, 12);
> +        break;
> +    case 8:
> +        dm->l8.target_display_index = get_bits(gb, 8);
> +        dm->l8.trim_slope = get_bits(gb, 12);
> +        dm->l8.trim_offset = get_bits(gb, 12);
> +        dm->l8.trim_power = get_bits(gb, 12);
> +        dm->l8.trim_chroma_weight = get_bits(gb, 12);
> +        dm->l8.trim_saturation_gain = get_bits(gb, 12);
> +        dm->l8.ms_weight = get_bits(gb, 12) - 8192;
> +        if (ext_block_length < 12)
> +            break;
> +        dm->l8.target_mid_contrast = get_bits(gb, 12);
> +        if (ext_block_length < 13)
> +            break;
> +        dm->l8.clip_trim = get_bits(gb, 12);
> +        if (ext_block_length < 19)
> +            break;
> +        for (int i = 0; i < 6; i++)
> +            dm->l8.saturation_vector_field[i] = get_bits(gb, 8);
> +        if (ext_block_length < 25)
> +            break;
> +        for (int i = 0; i < 6; i++)
> +            dm->l8.hue_vector_field[i] = get_bits(gb, 8);
> +        break;
> +    case 9:
> +        dm->l9.source_primary_index = get_bits(gb, 8);
> +        if (ext_block_length < 17)
> +            break;
> +        dm->l9.source_display_primaries.prim.r = get_cie_xy(gb);
> +        dm->l9.source_display_primaries.prim.g = get_cie_xy(gb);
> +        dm->l9.source_display_primaries.prim.b = get_cie_xy(gb);
> +        dm->l9.source_display_primaries.wp = get_cie_xy(gb);
> +        break;
> +    case 10:
> +        dm->l10.target_display_index = get_bits(gb, 8);
> +        dm->l10.target_max_pq = get_bits(gb, 12);
> +        dm->l10.target_min_pq = get_bits(gb, 12);
> +        dm->l10.target_primary_index = get_bits(gb, 8);
> +        if (ext_block_length < 21)
> +            break;
> +        dm->l10.target_display_primaries.prim.r = get_cie_xy(gb);
> +        dm->l10.target_display_primaries.prim.g = get_cie_xy(gb);
> +        dm->l10.target_display_primaries.prim.b = get_cie_xy(gb);
> +        dm->l10.target_display_primaries.wp = get_cie_xy(gb);
> +        break;
> +    case 11:
> +        dm->l11.content_type = get_bits(gb, 8);
> +        dm->l11.whitepoint = get_bits(gb, 4);
> +        dm->l11.reference_mode_flag = get_bits(gb, 1);
> +        skip_bits(gb, 3); /* reserved */
> +        dm->l11.sharpness = get_bits(gb, 2);
> +        dm->l11.noise_reduction = get_bits(gb, 2);
> +        dm->l11.mpeg_noise_reduction = get_bits(gb, 2);
> +        dm->l11.frame_rate_conversion = get_bits(gb, 2);
> +        dm->l11.brightness = get_bits(gb, 2);
> +        dm->l11.color = get_bits(gb, 2);
> +        break;
> +    case 254:
> +        dm->l254.dm_mode = get_bits(gb, 8);
> +        dm->l254.dm_version_index = get_bits(gb, 8);
> +        break;
> +    default:
> +        av_log(s->logctx, AV_LOG_WARNING,
> +               "Unknown Dolby Vision DM v2 level: %u\n", dm->level);
> +    }
> +}
> +
> +static int parse_ext_blocks(DOVIContext *s, GetBitContext *gb, int ver)
> +{
> +    int num_ext_blocks, ext_block_length, start_pos, parsed_bits;
> +    AVDOVIDmData *ext_blocks;
> +    size_t alloc_sz;
> +
> +    num_ext_blocks = get_ue_golomb_31(gb);

get_ue_golomb_31() currently returns small values when encountering
golomb values outside of the 0-31 range; but this is not guaranteed to
be so.

> +    align_get_bits(gb);
> +
> +    alloc_sz = (s->num_ext_blocks + num_ext_blocks) * sizeof(AVDOVIDmData);
> +    ext_blocks = av_fast_realloc(s->ext_blocks, &s->ext_blocks_sz, alloc_sz);
> +    if (!ext_blocks) {
> +        ff_dovi_ctx_unref(s);
> +        return AVERROR(ENOMEM);
> +    }
> +    s->ext_blocks = ext_blocks;
> +
> +    while (num_ext_blocks--) {
> +        AVDOVIDmData *dm = &s->ext_blocks[s->num_ext_blocks++];
> +        ext_block_length = get_ue_golomb_31(gb);
> +        dm->level = get_bits(gb, 8);
> +        start_pos = get_bits_count(gb);
> +
> +        switch (ver) {
> +        case 1: parse_ext_v1(s, gb, dm); break;
> +        case 2: parse_ext_v2(s, gb, dm, ext_block_length); break;
> +        }
> +
> +        parsed_bits = get_bits_count(gb) - start_pos;
> +        if (parsed_bits > ext_block_length * 8) {
> +            ff_dovi_ctx_unref(s);
> +            return AVERROR_INVALIDDATA;
> +        }
> +        skip_bits(gb, ext_block_length * 8 - parsed_bits);
> +    }
> +
> +    return 0;
> +}
> +
>  int ff_dovi_rpu_parse(DOVIContext *s, const uint8_t *rpu, size_t rpu_size,
>                        int err_recognition)
>  {
> @@ -524,6 +692,16 @@ int ff_dovi_rpu_parse(DOVIContext *s, const uint8_t *rpu, size_t rpu_size,
>          color->source_diagonal = get_bits(gb, 10);
>      }
>  
> +    /* Parse extension blocks */
> +    s->num_ext_blocks = 0;
> +    if ((ret = parse_ext_blocks(s, gb, 1)) < 0)
> +        return ret;
> +
> +    if (get_bits_left(gb) > 48 /* padding + CRC32 + terminator */) {
> +        if ((ret = parse_ext_blocks(s, gb, 2)) < 0)
> +            return ret;
> +    }
> +
>      return 0;
>  
>  fail:

Why do you implement parsing for something that does not get used at all?
Niklas Haas March 23, 2024, 6:23 p.m. UTC | #2
On Sat, 23 Mar 2024 19:00:59 +0100 Andreas Rheinhardt <andreas.rheinhardt@outlook.com> wrote:
> Niklas Haas:
> > From: Niklas Haas <git@haasn.dev>
> > 
> > We split the inner loop between v1 and v2 extension blocks to print
> > a warning where an extension block was encountered in an unexpected
> > context.
> > 
> > Co-authored-by: quietvoid <tcChlisop0@gmail.com>
> > ---
> >  libavcodec/dovi_rpu.c | 178 ++++++++++++++++++++++++++++++++++++++++++
> >  1 file changed, 178 insertions(+)
> > 
> > diff --git a/libavcodec/dovi_rpu.c b/libavcodec/dovi_rpu.c
> > index 21cb1850e3e..99a983f65dd 100644
> > --- a/libavcodec/dovi_rpu.c
> > +++ b/libavcodec/dovi_rpu.c
> > @@ -199,6 +199,174 @@ static inline unsigned get_variable_bits(GetBitContext *gb, int n)
> >          }                                                                       \
> >      } while (0)
> >  
> > +static void parse_ext_v1(DOVIContext *s, GetBitContext *gb, AVDOVIDmData *dm)
> > +{
> > +    switch (dm->level) {
> > +    case 1:
> > +        dm->l1.min_pq = get_bits(gb, 12);
> > +        dm->l1.max_pq = get_bits(gb, 12);
> > +        dm->l1.avg_pq = get_bits(gb, 12);
> > +        break;
> > +    case 2:
> > +        dm->l2.target_max_pq = get_bits(gb, 12);
> > +        dm->l2.trim_slope = get_bits(gb, 12);
> > +        dm->l2.trim_offset = get_bits(gb, 12);
> > +        dm->l2.trim_power = get_bits(gb, 12);
> > +        dm->l2.trim_chroma_weight = get_bits(gb, 12);
> > +        dm->l2.trim_saturation_gain = get_bits(gb, 12);
> > +        dm->l2.ms_weight = get_bits(gb, 13) - 8192;
> > +        break;
> > +    case 4:
> > +        dm->l4.anchor_pq = get_bits(gb, 12);
> > +        dm->l4.anchor_power = get_bits(gb, 12);
> > +        break;
> > +    case 5:
> > +        dm->l5.left_offset = get_bits(gb, 13);
> > +        dm->l5.right_offset = get_bits(gb, 13);
> > +        dm->l5.top_offset = get_bits(gb, 13);
> > +        dm->l5.bottom_offset = get_bits(gb, 13);
> > +        break;
> > +    case 6:
> > +        dm->l6.max_luminance = get_bits(gb, 16);
> > +        dm->l6.min_luminance = get_bits(gb, 16);
> > +        dm->l6.max_cll = get_bits(gb, 16);
> > +        dm->l6.max_fall = get_bits(gb, 16);
> > +        break;
> > +    case 255:
> > +        dm->l255.dm_run_mode = get_bits(gb, 8);
> > +        dm->l255.dm_run_version = get_bits(gb, 8);
> > +        for (int i = 0; i < 4; i++)
> > +            dm->l255.dm_debug[i] = get_bits(gb, 8);
> > +        break;
> > +    default:
> > +        av_log(s->logctx, AV_LOG_WARNING,
> > +               "Unknown Dolby Vision DM v1 level: %u\n", dm->level);
> > +    }
> > +}
> > +
> > +static inline AVCIExy get_cie_xy(GetBitContext *gb)
> > +{
> > +    const int denom = 32767;
> > +    return (AVCIExy) {
> > +        .x = { get_sbits(gb, 16), denom },
> > +        .y = { get_sbits(gb, 16), denom },
> > +    };
> 
> The order of initializations is undefined.

Fixed.

> > +}
> > +
> > +static void parse_ext_v2(DOVIContext *s, GetBitContext *gb, AVDOVIDmData *dm,
> > +                         int ext_block_length)
> > +{
> > +    switch (dm->level) {
> > +    case 3:
> > +        dm->l3.min_pq_offset = get_bits(gb, 12);
> > +        dm->l3.max_pq_offset = get_bits(gb, 12);
> > +        dm->l3.avg_pq_offset = get_bits(gb, 12);
> > +        break;
> > +    case 8:
> > +        dm->l8.target_display_index = get_bits(gb, 8);
> > +        dm->l8.trim_slope = get_bits(gb, 12);
> > +        dm->l8.trim_offset = get_bits(gb, 12);
> > +        dm->l8.trim_power = get_bits(gb, 12);
> > +        dm->l8.trim_chroma_weight = get_bits(gb, 12);
> > +        dm->l8.trim_saturation_gain = get_bits(gb, 12);
> > +        dm->l8.ms_weight = get_bits(gb, 12) - 8192;
> > +        if (ext_block_length < 12)
> > +            break;
> > +        dm->l8.target_mid_contrast = get_bits(gb, 12);
> > +        if (ext_block_length < 13)
> > +            break;
> > +        dm->l8.clip_trim = get_bits(gb, 12);
> > +        if (ext_block_length < 19)
> > +            break;
> > +        for (int i = 0; i < 6; i++)
> > +            dm->l8.saturation_vector_field[i] = get_bits(gb, 8);
> > +        if (ext_block_length < 25)
> > +            break;
> > +        for (int i = 0; i < 6; i++)
> > +            dm->l8.hue_vector_field[i] = get_bits(gb, 8);
> > +        break;
> > +    case 9:
> > +        dm->l9.source_primary_index = get_bits(gb, 8);
> > +        if (ext_block_length < 17)
> > +            break;
> > +        dm->l9.source_display_primaries.prim.r = get_cie_xy(gb);
> > +        dm->l9.source_display_primaries.prim.g = get_cie_xy(gb);
> > +        dm->l9.source_display_primaries.prim.b = get_cie_xy(gb);
> > +        dm->l9.source_display_primaries.wp = get_cie_xy(gb);
> > +        break;
> > +    case 10:
> > +        dm->l10.target_display_index = get_bits(gb, 8);
> > +        dm->l10.target_max_pq = get_bits(gb, 12);
> > +        dm->l10.target_min_pq = get_bits(gb, 12);
> > +        dm->l10.target_primary_index = get_bits(gb, 8);
> > +        if (ext_block_length < 21)
> > +            break;
> > +        dm->l10.target_display_primaries.prim.r = get_cie_xy(gb);
> > +        dm->l10.target_display_primaries.prim.g = get_cie_xy(gb);
> > +        dm->l10.target_display_primaries.prim.b = get_cie_xy(gb);
> > +        dm->l10.target_display_primaries.wp = get_cie_xy(gb);
> > +        break;
> > +    case 11:
> > +        dm->l11.content_type = get_bits(gb, 8);
> > +        dm->l11.whitepoint = get_bits(gb, 4);
> > +        dm->l11.reference_mode_flag = get_bits(gb, 1);
> > +        skip_bits(gb, 3); /* reserved */
> > +        dm->l11.sharpness = get_bits(gb, 2);
> > +        dm->l11.noise_reduction = get_bits(gb, 2);
> > +        dm->l11.mpeg_noise_reduction = get_bits(gb, 2);
> > +        dm->l11.frame_rate_conversion = get_bits(gb, 2);
> > +        dm->l11.brightness = get_bits(gb, 2);
> > +        dm->l11.color = get_bits(gb, 2);
> > +        break;
> > +    case 254:
> > +        dm->l254.dm_mode = get_bits(gb, 8);
> > +        dm->l254.dm_version_index = get_bits(gb, 8);
> > +        break;
> > +    default:
> > +        av_log(s->logctx, AV_LOG_WARNING,
> > +               "Unknown Dolby Vision DM v2 level: %u\n", dm->level);
> > +    }
> > +}
> > +
> > +static int parse_ext_blocks(DOVIContext *s, GetBitContext *gb, int ver)
> > +{
> > +    int num_ext_blocks, ext_block_length, start_pos, parsed_bits;
> > +    AVDOVIDmData *ext_blocks;
> > +    size_t alloc_sz;
> > +
> > +    num_ext_blocks = get_ue_golomb_31(gb);
> 
> get_ue_golomb_31() currently returns small values when encountering
> golomb values outside of the 0-31 range; but this is not guaranteed to
> be so.

Fixed.

And in doing so, I noticed that the total number of blocks is actually
fairly constrained, so there's probably no point in allocating this
dynamically at all.

> 
> > +    align_get_bits(gb);
> > +
> > +    alloc_sz = (s->num_ext_blocks + num_ext_blocks) * sizeof(AVDOVIDmData);
> > +    ext_blocks = av_fast_realloc(s->ext_blocks, &s->ext_blocks_sz, alloc_sz);
> > +    if (!ext_blocks) {
> > +        ff_dovi_ctx_unref(s);
> > +        return AVERROR(ENOMEM);
> > +    }
> > +    s->ext_blocks = ext_blocks;
> > +
> > +    while (num_ext_blocks--) {
> > +        AVDOVIDmData *dm = &s->ext_blocks[s->num_ext_blocks++];
> > +        ext_block_length = get_ue_golomb_31(gb);
> > +        dm->level = get_bits(gb, 8);
> > +        start_pos = get_bits_count(gb);
> > +
> > +        switch (ver) {
> > +        case 1: parse_ext_v1(s, gb, dm); break;
> > +        case 2: parse_ext_v2(s, gb, dm, ext_block_length); break;
> > +        }
> > +
> > +        parsed_bits = get_bits_count(gb) - start_pos;
> > +        if (parsed_bits > ext_block_length * 8) {
> > +            ff_dovi_ctx_unref(s);
> > +            return AVERROR_INVALIDDATA;
> > +        }
> > +        skip_bits(gb, ext_block_length * 8 - parsed_bits);
> > +    }
> > +
> > +    return 0;
> > +}
> > +
> >  int ff_dovi_rpu_parse(DOVIContext *s, const uint8_t *rpu, size_t rpu_size,
> >                        int err_recognition)
> >  {
> > @@ -524,6 +692,16 @@ int ff_dovi_rpu_parse(DOVIContext *s, const uint8_t *rpu, size_t rpu_size,
> >          color->source_diagonal = get_bits(gb, 10);
> >      }
> >  
> > +    /* Parse extension blocks */
> > +    s->num_ext_blocks = 0;
> > +    if ((ret = parse_ext_blocks(s, gb, 1)) < 0)
> > +        return ret;
> > +
> > +    if (get_bits_left(gb) > 48 /* padding + CRC32 + terminator */) {
> > +        if ((ret = parse_ext_blocks(s, gb, 2)) < 0)
> > +            return ret;
> > +    }
> > +
> >      return 0;
> >  
> >  fail:
> 
> Why do you implement parsing for something that does not get used at all?

Oops, I forgot to submit the commit that actually exposes this in
ff_dovi_attach_side_data()!

For context, there are at least two valid use-cases:

1. Downstream clients want access to these (e.g. VLC, mpv); libplacebo
   currently uses custom code to parse the RPU but having it done inside
   FFmpeg would be better for everybody involved.

2. For transcoding, we want to be able to preserve the information
   inside the extension blocks. This requires parsing them in the first
   place, so we can synthesize them back into binary form.

> 
> _______________________________________________
> ffmpeg-devel mailing list
> ffmpeg-devel@ffmpeg.org
> https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
> 
> To unsubscribe, visit link above, or email
> ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".
diff mbox series

Patch

diff --git a/libavcodec/dovi_rpu.c b/libavcodec/dovi_rpu.c
index 21cb1850e3e..99a983f65dd 100644
--- a/libavcodec/dovi_rpu.c
+++ b/libavcodec/dovi_rpu.c
@@ -199,6 +199,174 @@  static inline unsigned get_variable_bits(GetBitContext *gb, int n)
         }                                                                       \
     } while (0)
 
+static void parse_ext_v1(DOVIContext *s, GetBitContext *gb, AVDOVIDmData *dm)
+{
+    switch (dm->level) {
+    case 1:
+        dm->l1.min_pq = get_bits(gb, 12);
+        dm->l1.max_pq = get_bits(gb, 12);
+        dm->l1.avg_pq = get_bits(gb, 12);
+        break;
+    case 2:
+        dm->l2.target_max_pq = get_bits(gb, 12);
+        dm->l2.trim_slope = get_bits(gb, 12);
+        dm->l2.trim_offset = get_bits(gb, 12);
+        dm->l2.trim_power = get_bits(gb, 12);
+        dm->l2.trim_chroma_weight = get_bits(gb, 12);
+        dm->l2.trim_saturation_gain = get_bits(gb, 12);
+        dm->l2.ms_weight = get_bits(gb, 13) - 8192;
+        break;
+    case 4:
+        dm->l4.anchor_pq = get_bits(gb, 12);
+        dm->l4.anchor_power = get_bits(gb, 12);
+        break;
+    case 5:
+        dm->l5.left_offset = get_bits(gb, 13);
+        dm->l5.right_offset = get_bits(gb, 13);
+        dm->l5.top_offset = get_bits(gb, 13);
+        dm->l5.bottom_offset = get_bits(gb, 13);
+        break;
+    case 6:
+        dm->l6.max_luminance = get_bits(gb, 16);
+        dm->l6.min_luminance = get_bits(gb, 16);
+        dm->l6.max_cll = get_bits(gb, 16);
+        dm->l6.max_fall = get_bits(gb, 16);
+        break;
+    case 255:
+        dm->l255.dm_run_mode = get_bits(gb, 8);
+        dm->l255.dm_run_version = get_bits(gb, 8);
+        for (int i = 0; i < 4; i++)
+            dm->l255.dm_debug[i] = get_bits(gb, 8);
+        break;
+    default:
+        av_log(s->logctx, AV_LOG_WARNING,
+               "Unknown Dolby Vision DM v1 level: %u\n", dm->level);
+    }
+}
+
+static inline AVCIExy get_cie_xy(GetBitContext *gb)
+{
+    const int denom = 32767;
+    return (AVCIExy) {
+        .x = { get_sbits(gb, 16), denom },
+        .y = { get_sbits(gb, 16), denom },
+    };
+}
+
+static void parse_ext_v2(DOVIContext *s, GetBitContext *gb, AVDOVIDmData *dm,
+                         int ext_block_length)
+{
+    switch (dm->level) {
+    case 3:
+        dm->l3.min_pq_offset = get_bits(gb, 12);
+        dm->l3.max_pq_offset = get_bits(gb, 12);
+        dm->l3.avg_pq_offset = get_bits(gb, 12);
+        break;
+    case 8:
+        dm->l8.target_display_index = get_bits(gb, 8);
+        dm->l8.trim_slope = get_bits(gb, 12);
+        dm->l8.trim_offset = get_bits(gb, 12);
+        dm->l8.trim_power = get_bits(gb, 12);
+        dm->l8.trim_chroma_weight = get_bits(gb, 12);
+        dm->l8.trim_saturation_gain = get_bits(gb, 12);
+        dm->l8.ms_weight = get_bits(gb, 12) - 8192;
+        if (ext_block_length < 12)
+            break;
+        dm->l8.target_mid_contrast = get_bits(gb, 12);
+        if (ext_block_length < 13)
+            break;
+        dm->l8.clip_trim = get_bits(gb, 12);
+        if (ext_block_length < 19)
+            break;
+        for (int i = 0; i < 6; i++)
+            dm->l8.saturation_vector_field[i] = get_bits(gb, 8);
+        if (ext_block_length < 25)
+            break;
+        for (int i = 0; i < 6; i++)
+            dm->l8.hue_vector_field[i] = get_bits(gb, 8);
+        break;
+    case 9:
+        dm->l9.source_primary_index = get_bits(gb, 8);
+        if (ext_block_length < 17)
+            break;
+        dm->l9.source_display_primaries.prim.r = get_cie_xy(gb);
+        dm->l9.source_display_primaries.prim.g = get_cie_xy(gb);
+        dm->l9.source_display_primaries.prim.b = get_cie_xy(gb);
+        dm->l9.source_display_primaries.wp = get_cie_xy(gb);
+        break;
+    case 10:
+        dm->l10.target_display_index = get_bits(gb, 8);
+        dm->l10.target_max_pq = get_bits(gb, 12);
+        dm->l10.target_min_pq = get_bits(gb, 12);
+        dm->l10.target_primary_index = get_bits(gb, 8);
+        if (ext_block_length < 21)
+            break;
+        dm->l10.target_display_primaries.prim.r = get_cie_xy(gb);
+        dm->l10.target_display_primaries.prim.g = get_cie_xy(gb);
+        dm->l10.target_display_primaries.prim.b = get_cie_xy(gb);
+        dm->l10.target_display_primaries.wp = get_cie_xy(gb);
+        break;
+    case 11:
+        dm->l11.content_type = get_bits(gb, 8);
+        dm->l11.whitepoint = get_bits(gb, 4);
+        dm->l11.reference_mode_flag = get_bits(gb, 1);
+        skip_bits(gb, 3); /* reserved */
+        dm->l11.sharpness = get_bits(gb, 2);
+        dm->l11.noise_reduction = get_bits(gb, 2);
+        dm->l11.mpeg_noise_reduction = get_bits(gb, 2);
+        dm->l11.frame_rate_conversion = get_bits(gb, 2);
+        dm->l11.brightness = get_bits(gb, 2);
+        dm->l11.color = get_bits(gb, 2);
+        break;
+    case 254:
+        dm->l254.dm_mode = get_bits(gb, 8);
+        dm->l254.dm_version_index = get_bits(gb, 8);
+        break;
+    default:
+        av_log(s->logctx, AV_LOG_WARNING,
+               "Unknown Dolby Vision DM v2 level: %u\n", dm->level);
+    }
+}
+
+static int parse_ext_blocks(DOVIContext *s, GetBitContext *gb, int ver)
+{
+    int num_ext_blocks, ext_block_length, start_pos, parsed_bits;
+    AVDOVIDmData *ext_blocks;
+    size_t alloc_sz;
+
+    num_ext_blocks = get_ue_golomb_31(gb);
+    align_get_bits(gb);
+
+    alloc_sz = (s->num_ext_blocks + num_ext_blocks) * sizeof(AVDOVIDmData);
+    ext_blocks = av_fast_realloc(s->ext_blocks, &s->ext_blocks_sz, alloc_sz);
+    if (!ext_blocks) {
+        ff_dovi_ctx_unref(s);
+        return AVERROR(ENOMEM);
+    }
+    s->ext_blocks = ext_blocks;
+
+    while (num_ext_blocks--) {
+        AVDOVIDmData *dm = &s->ext_blocks[s->num_ext_blocks++];
+        ext_block_length = get_ue_golomb_31(gb);
+        dm->level = get_bits(gb, 8);
+        start_pos = get_bits_count(gb);
+
+        switch (ver) {
+        case 1: parse_ext_v1(s, gb, dm); break;
+        case 2: parse_ext_v2(s, gb, dm, ext_block_length); break;
+        }
+
+        parsed_bits = get_bits_count(gb) - start_pos;
+        if (parsed_bits > ext_block_length * 8) {
+            ff_dovi_ctx_unref(s);
+            return AVERROR_INVALIDDATA;
+        }
+        skip_bits(gb, ext_block_length * 8 - parsed_bits);
+    }
+
+    return 0;
+}
+
 int ff_dovi_rpu_parse(DOVIContext *s, const uint8_t *rpu, size_t rpu_size,
                       int err_recognition)
 {
@@ -524,6 +692,16 @@  int ff_dovi_rpu_parse(DOVIContext *s, const uint8_t *rpu, size_t rpu_size,
         color->source_diagonal = get_bits(gb, 10);
     }
 
+    /* Parse extension blocks */
+    s->num_ext_blocks = 0;
+    if ((ret = parse_ext_blocks(s, gb, 1)) < 0)
+        return ret;
+
+    if (get_bits_left(gb) > 48 /* padding + CRC32 + terminator */) {
+        if ((ret = parse_ext_blocks(s, gb, 2)) < 0)
+            return ret;
+    }
+
     return 0;
 
 fail: