diff mbox series

[FFmpeg-devel,v9,6/6] lavc/hevcdec: Parse DOVI RPU NALs

Message ID 20211222151447.57681-6-ffmpeg@haasn.xyz
State New
Headers show
Series [FFmpeg-devel,v9,1/6] lavu/frame: Add Dolby Vision metadata side data type | expand

Checks

Context Check Description
andriy/make_x86 success Make finished
andriy/make_fate_x86 success Make fate finished
andriy/make_ppc success Make finished
andriy/make_fate_ppc success Make fate finished
andriy/makeppc warning New warnings during build

Commit Message

Niklas Haas Dec. 22, 2021, 3:14 p.m. UTC
From: Niklas Haas <git@haasn.dev>

And expose the parsed values as frame side data. Update FATE results to
match.

It's worth documenting that this relies on the dovi configuration record
being present on the first AVPacket fed to the decoder, which in
practice is the case if if the API user has called something like
av_format_inject_global_side_data, which is unfortunately not the
default.

This commit is not the time and place to change that behavior, though.

Signed-off-by: Niklas Haas <git@haasn.dev>
---
 configure                  |   2 +-
 libavcodec/hevcdec.c       |  63 +++++++++--
 libavcodec/hevcdec.h       |   3 +
 tests/ref/fate/hevc-dv-rpu | 224 +++++++++++++++++++++++++++++++++++++
 4 files changed, 283 insertions(+), 9 deletions(-)

Comments

Andreas Rheinhardt Dec. 22, 2021, 7:55 p.m. UTC | #1
Niklas Haas:
> From: Niklas Haas <git@haasn.dev>
> 
> And expose the parsed values as frame side data. Update FATE results to
> match.
> 
> It's worth documenting that this relies on the dovi configuration record
> being present on the first AVPacket fed to the decoder, which in
> practice is the case if if the API user has called something like
> av_format_inject_global_side_data, which is unfortunately not the
> default.
> 
> This commit is not the time and place to change that behavior, though.
> 
> Signed-off-by: Niklas Haas <git@haasn.dev>
> ---
>  configure                  |   2 +-
>  libavcodec/hevcdec.c       |  63 +++++++++--
>  libavcodec/hevcdec.h       |   3 +
>  tests/ref/fate/hevc-dv-rpu | 224 +++++++++++++++++++++++++++++++++++++
>  4 files changed, 283 insertions(+), 9 deletions(-)
> 
> diff --git a/configure b/configure
> index 68658a847f..7803aa47af 100755
> --- a/configure
> +++ b/configure
> @@ -2826,7 +2826,7 @@ h264_decoder_suggest="error_resilience"
>  hap_decoder_select="snappy texturedsp"
>  hap_encoder_deps="libsnappy"
>  hap_encoder_select="texturedspenc"
> -hevc_decoder_select="atsc_a53 bswapdsp cabac golomb hevcparse videodsp"
> +hevc_decoder_select="atsc_a53 bswapdsp cabac dovi_rpu golomb hevcparse videodsp"
>  huffyuv_decoder_select="bswapdsp huffyuvdsp llviddsp"
>  huffyuv_encoder_select="bswapdsp huffman huffyuvencdsp llvidencdsp"
>  hymt_decoder_select="huffyuv_decoder"
> diff --git a/libavcodec/hevcdec.c b/libavcodec/hevcdec.c
> index 46d9edf8eb..298d89fea6 100644
> --- a/libavcodec/hevcdec.c
> +++ b/libavcodec/hevcdec.c
> @@ -2723,6 +2723,7 @@ error:
>  static int set_side_data(HEVCContext *s)
>  {
>      AVFrame *out = s->ref->frame;
> +    int ret;
>  
>      if (s->sei.frame_packing.present &&
>          s->sei.frame_packing.arrangement_type >= 3 &&
> @@ -2967,6 +2968,9 @@ static int set_side_data(HEVCContext *s)
>          s->rpu_buf = NULL;
>      }
>  
> +    if ((ret = ff_dovi_attach_side_data(&s->dovi_ctx, out)) < 0)
> +        return ret;
> +
>      return 0;
>  }
>  
> @@ -3298,16 +3302,24 @@ static int decode_nal_units(HEVCContext *s, const uint8_t *buf, int length)
>      if (s->pkt.nb_nals > 1 && s->pkt.nals[s->pkt.nb_nals - 1].type == HEVC_NAL_UNSPEC62 &&
>          s->pkt.nals[s->pkt.nb_nals - 1].size > 2 && !s->pkt.nals[s->pkt.nb_nals - 1].nuh_layer_id
>          && !s->pkt.nals[s->pkt.nb_nals - 1].temporal_id) {
> +        H2645NAL *nal = &s->pkt.nals[s->pkt.nb_nals - 1];
>          if (s->rpu_buf) {
>              av_buffer_unref(&s->rpu_buf);
>              av_log(s->avctx, AV_LOG_WARNING, "Multiple Dolby Vision RPUs found in one AU. Skipping previous.\n");
>          }
>  
> -        s->rpu_buf = av_buffer_alloc(s->pkt.nals[s->pkt.nb_nals - 1].raw_size - 2);
> +        s->rpu_buf = av_buffer_alloc(nal->raw_size - 2);
>          if (!s->rpu_buf)
>              return AVERROR(ENOMEM);
> +        memcpy(s->rpu_buf->data, nal->raw_data + 2, nal->raw_size - 2);
>  
> -        memcpy(s->rpu_buf->data, s->pkt.nals[s->pkt.nb_nals - 1].raw_data + 2, s->pkt.nals[s->pkt.nb_nals - 1].raw_size - 2);
> +        s->dovi_ctx.config = s->dovi_cfg ? (void *) s->dovi_cfg->data : NULL;
> +        ret = ff_dovi_rpu_parse(&s->dovi_ctx, nal->data + 2, nal->size - 2);
> +        if (ret < 0) {
> +            av_buffer_unref(&s->rpu_buf);
> +            av_log(s->avctx, AV_LOG_WARNING, "Error parsing DOVI NAL unit.\n");
> +            /* ignore */
> +        }
>      }
>  
>      /* decode the NAL units */
> @@ -3440,8 +3452,8 @@ static int hevc_decode_frame(AVCodecContext *avctx, void *data, int *got_output,
>                               AVPacket *avpkt)
>  {
>      int ret;
> -    size_t new_extradata_size;
> -    uint8_t *new_extradata;
> +    uint8_t *sd;
> +    size_t sd_size;
>      HEVCContext *s = avctx->priv_data;
>  
>      if (!avpkt->size) {
> @@ -3453,14 +3465,37 @@ static int hevc_decode_frame(AVCodecContext *avctx, void *data, int *got_output,
>          return 0;
>      }
>  
> -    new_extradata = av_packet_get_side_data(avpkt, AV_PKT_DATA_NEW_EXTRADATA,
> -                                            &new_extradata_size);
> -    if (new_extradata && new_extradata_size > 0) {
> -        ret = hevc_decode_extradata(s, new_extradata, new_extradata_size, 0);
> +    sd = av_packet_get_side_data(avpkt, AV_PKT_DATA_NEW_EXTRADATA, &sd_size);
> +    if (sd && sd_size > 0) {
> +        ret = hevc_decode_extradata(s, sd, sd_size, 0);
>          if (ret < 0)
>              return ret;
>      }
>  
> +    sd = av_packet_get_side_data(avpkt, AV_PKT_DATA_DOVI_CONF, &sd_size);
> +    if (sd && sd_size > 0) {
> +        if (s->dovi_cfg) {
> +            /* Reuse existing buffer */
> +            if ((ret = av_buffer_make_writable(&s->dovi_cfg)) < 0)
> +                return ret;
> +        } else {
> +            /* Allocate new buffer */
> +            AVDOVIDecoderConfigurationRecord *cfg;
> +            size_t cfg_size;
> +            cfg = av_dovi_alloc(&cfg_size);
> +            if (!cfg)
> +                return AVERROR(ENOMEM);
> +            s->dovi_cfg = av_buffer_create((uint8_t *) cfg, cfg_size, NULL, NULL, 0);
> +            if (!s->dovi_cfg) {
> +                av_free(cfg);
> +                return AVERROR(ENOMEM);
> +            }
> +        }
> +
> +        av_assert0(sd_size >= s->dovi_cfg->size);

Actually, your dovi_cfg comes from libavutil (allocated via
av_dovi_alloc()), so it should be the newest and biggest dovi struct
there is. And the side data's AV_PKT_DATA_DOVI_CONF should
also emanate from av_dovi_alloc(), so its size should coincide with
dovi_cfg->size.

> +        memcpy(s->dovi_cfg->data, sd, s->dovi_cfg->size);
> +    }
> +
>      s->ref = NULL;
>      ret    = decode_nal_units(s, avpkt->data, avpkt->size);
>      if (ret < 0)
> @@ -3553,6 +3588,8 @@ static av_cold int hevc_decode_free(AVCodecContext *avctx)
>  
>      pic_arrays_free(s);
>  
> +    ff_dovi_ctx_unref(&s->dovi_ctx);
> +    av_buffer_unref(&s->dovi_cfg);
>      av_buffer_unref(&s->rpu_buf);
>  
>      av_freep(&s->md5_ctx);
> @@ -3637,6 +3674,7 @@ static av_cold int hevc_init_context(AVCodecContext *avctx)
>  
>      ff_bswapdsp_init(&s->bdsp);
>  
> +    s->dovi_ctx.logctx = avctx;
>      s->context_initialized = 1;
>      s->eos = 0;
>  
> @@ -3745,6 +3783,14 @@ static int hevc_update_thread_context(AVCodecContext *dst,
>      if (ret < 0)
>          return ret;
>  
> +    ret = av_buffer_replace(&s->dovi_cfg, s0->dovi_cfg);
> +    if (ret < 0)
> +        return ret;
> +
> +    ret = ff_dovi_ctx_replace(&s->dovi_ctx, &s0->dovi_ctx);
> +    if (ret < 0)
> +        return ret;
> +
>      s->sei.frame_packing        = s0->sei.frame_packing;
>      s->sei.display_orientation  = s0->sei.display_orientation;
>      s->sei.mastering_display    = s0->sei.mastering_display;
> @@ -3801,6 +3847,7 @@ static void hevc_decode_flush(AVCodecContext *avctx)
>      HEVCContext *s = avctx->priv_data;
>      ff_hevc_flush_dpb(s);
>      ff_hevc_reset_sei(&s->sei);
> +    ff_dovi_ctx_unref(&s->dovi_ctx);
>      av_buffer_unref(&s->rpu_buf);
>      s->max_ra = INT_MAX;
>      s->eos = 1;
> diff --git a/libavcodec/hevcdec.h b/libavcodec/hevcdec.h
> index 870ff178d4..c8dde6fd17 100644
> --- a/libavcodec/hevcdec.h
> +++ b/libavcodec/hevcdec.h
> @@ -32,6 +32,7 @@
>  #include "avcodec.h"
>  #include "bswapdsp.h"
>  #include "cabac.h"
> +#include "dovi_rpu.h"
>  #include "get_bits.h"
>  #include "hevcpred.h"
>  #include "h2645_parse.h"
> @@ -574,6 +575,8 @@ typedef struct HEVCContext {
>      int nuh_layer_id;
>  
>      AVBufferRef *rpu_buf;       ///< 0 or 1 Dolby Vision RPUs.
> +    AVBufferRef *dovi_cfg;      ///< contains AVDOVIDecoderConfigurationRecord
> +    DOVIContext dovi_ctx;       ///< Dolby Vision decoding context
>  } HEVCContext;
>  
>  /**
1. Patchwork said there was a new warning when applying this patch.
Andreas Rheinhardt Jan. 2, 2022, 4:31 a.m. UTC | #2
Niklas Haas:
> From: Niklas Haas <git@haasn.dev>
> 
> And expose the parsed values as frame side data. Update FATE results to
> match.
> 
> It's worth documenting that this relies on the dovi configuration record
> being present on the first AVPacket fed to the decoder, which in
> practice is the case if if the API user has called something like
> av_format_inject_global_side_data, which is unfortunately not the
> default.
> 
> This commit is not the time and place to change that behavior, though.
> 
> Signed-off-by: Niklas Haas <git@haasn.dev>
> ---
>  configure                  |   2 +-
>  libavcodec/hevcdec.c       |  63 +++++++++--
>  libavcodec/hevcdec.h       |   3 +
>  tests/ref/fate/hevc-dv-rpu | 224 +++++++++++++++++++++++++++++++++++++
>  4 files changed, 283 insertions(+), 9 deletions(-)
> 
> diff --git a/configure b/configure
> index 68658a847f..7803aa47af 100755
> --- a/configure
> +++ b/configure
> @@ -2826,7 +2826,7 @@ h264_decoder_suggest="error_resilience"
>  hap_decoder_select="snappy texturedsp"
>  hap_encoder_deps="libsnappy"
>  hap_encoder_select="texturedspenc"
> -hevc_decoder_select="atsc_a53 bswapdsp cabac golomb hevcparse videodsp"
> +hevc_decoder_select="atsc_a53 bswapdsp cabac dovi_rpu golomb hevcparse videodsp"
>  huffyuv_decoder_select="bswapdsp huffyuvdsp llviddsp"
>  huffyuv_encoder_select="bswapdsp huffman huffyuvencdsp llvidencdsp"
>  hymt_decoder_select="huffyuv_decoder"
> diff --git a/libavcodec/hevcdec.c b/libavcodec/hevcdec.c
> index 46d9edf8eb..298d89fea6 100644
> --- a/libavcodec/hevcdec.c
> +++ b/libavcodec/hevcdec.c
> @@ -2723,6 +2723,7 @@ error:
>  static int set_side_data(HEVCContext *s)
>  {
>      AVFrame *out = s->ref->frame;
> +    int ret;
>  
>      if (s->sei.frame_packing.present &&
>          s->sei.frame_packing.arrangement_type >= 3 &&
> @@ -2967,6 +2968,9 @@ static int set_side_data(HEVCContext *s)
>          s->rpu_buf = NULL;
>      }
>  
> +    if ((ret = ff_dovi_attach_side_data(&s->dovi_ctx, out)) < 0)
> +        return ret;
> +
>      return 0;
>  }
>  
> @@ -3298,16 +3302,24 @@ static int decode_nal_units(HEVCContext *s, const uint8_t *buf, int length)
>      if (s->pkt.nb_nals > 1 && s->pkt.nals[s->pkt.nb_nals - 1].type == HEVC_NAL_UNSPEC62 &&
>          s->pkt.nals[s->pkt.nb_nals - 1].size > 2 && !s->pkt.nals[s->pkt.nb_nals - 1].nuh_layer_id
>          && !s->pkt.nals[s->pkt.nb_nals - 1].temporal_id) {
> +        H2645NAL *nal = &s->pkt.nals[s->pkt.nb_nals - 1];
>          if (s->rpu_buf) {
>              av_buffer_unref(&s->rpu_buf);
>              av_log(s->avctx, AV_LOG_WARNING, "Multiple Dolby Vision RPUs found in one AU. Skipping previous.\n");
>          }
>  
> -        s->rpu_buf = av_buffer_alloc(s->pkt.nals[s->pkt.nb_nals - 1].raw_size - 2);
> +        s->rpu_buf = av_buffer_alloc(nal->raw_size - 2);
>          if (!s->rpu_buf)
>              return AVERROR(ENOMEM);
> +        memcpy(s->rpu_buf->data, nal->raw_data + 2, nal->raw_size - 2);
>  
> -        memcpy(s->rpu_buf->data, s->pkt.nals[s->pkt.nb_nals - 1].raw_data + 2, s->pkt.nals[s->pkt.nb_nals - 1].raw_size - 2);
> +        s->dovi_ctx.config = s->dovi_cfg ? (void *) s->dovi_cfg->data : NULL;
> +        ret = ff_dovi_rpu_parse(&s->dovi_ctx, nal->data + 2, nal->size - 2);
> +        if (ret < 0) {
> +            av_buffer_unref(&s->rpu_buf);
> +            av_log(s->avctx, AV_LOG_WARNING, "Error parsing DOVI NAL unit.\n");
> +            /* ignore */
> +        }
>      }
>  
>      /* decode the NAL units */
> @@ -3440,8 +3452,8 @@ static int hevc_decode_frame(AVCodecContext *avctx, void *data, int *got_output,
>                               AVPacket *avpkt)
>  {
>      int ret;
> -    size_t new_extradata_size;
> -    uint8_t *new_extradata;
> +    uint8_t *sd;
> +    size_t sd_size;
>      HEVCContext *s = avctx->priv_data;
>  
>      if (!avpkt->size) {
> @@ -3453,14 +3465,37 @@ static int hevc_decode_frame(AVCodecContext *avctx, void *data, int *got_output,
>          return 0;
>      }
>  
> -    new_extradata = av_packet_get_side_data(avpkt, AV_PKT_DATA_NEW_EXTRADATA,
> -                                            &new_extradata_size);
> -    if (new_extradata && new_extradata_size > 0) {
> -        ret = hevc_decode_extradata(s, new_extradata, new_extradata_size, 0);
> +    sd = av_packet_get_side_data(avpkt, AV_PKT_DATA_NEW_EXTRADATA, &sd_size);
> +    if (sd && sd_size > 0) {
> +        ret = hevc_decode_extradata(s, sd, sd_size, 0);
>          if (ret < 0)
>              return ret;
>      }
>  
> +    sd = av_packet_get_side_data(avpkt, AV_PKT_DATA_DOVI_CONF, &sd_size);
> +    if (sd && sd_size > 0) {
> +        if (s->dovi_cfg) {
> +            /* Reuse existing buffer */
> +            if ((ret = av_buffer_make_writable(&s->dovi_cfg)) < 0)
> +                return ret;
> +        } else {
> +            /* Allocate new buffer */
> +            AVDOVIDecoderConfigurationRecord *cfg;
> +            size_t cfg_size;
> +            cfg = av_dovi_alloc(&cfg_size);
> +            if (!cfg)
> +                return AVERROR(ENOMEM);
> +            s->dovi_cfg = av_buffer_create((uint8_t *) cfg, cfg_size, NULL, NULL, 0);
> +            if (!s->dovi_cfg) {
> +                av_free(cfg);
> +                return AVERROR(ENOMEM);
> +            }
> +        }
> +
> +        av_assert0(sd_size >= s->dovi_cfg->size);
> +        memcpy(s->dovi_cfg->data, sd, s->dovi_cfg->size);

dovi_cfg is only used for exactly one thing: To read its dv_profile in
ff_dovi_rpu_parse() later. This is quite a lot of effort to get this bit
of data. Why not add a ff_dovi_parse_config() that just copies the
needed field(s) from the user-supplied AVDOVIDecoderConfigurationRecord
to the DOVIContext instead?
(Furthermore, said config is explicitly intended to survive an unref as
happens in a flush, leading to a dangling pointer in case the user
unrefs dovi_cfg in flush. (Yes, the hevc decoder ensures that config is
always set correctly before every ff_dovi_rpu_parse() call, but it is
nevertheless nowhere stated that the profile is intended to be persistent.))

> +    }
> +
>      s->ref = NULL;
>      ret    = decode_nal_units(s, avpkt->data, avpkt->size);
>      if (ret < 0)
> @@ -3553,6 +3588,8 @@ static av_cold int hevc_decode_free(AVCodecContext *avctx)
>  
>      pic_arrays_free(s);
>  
> +    ff_dovi_ctx_unref(&s->dovi_ctx);
> +    av_buffer_unref(&s->dovi_cfg);
>      av_buffer_unref(&s->rpu_buf);
>  
>      av_freep(&s->md5_ctx);
> @@ -3637,6 +3674,7 @@ static av_cold int hevc_init_context(AVCodecContext *avctx)
>  
>      ff_bswapdsp_init(&s->bdsp);
>  
> +    s->dovi_ctx.logctx = avctx;
>      s->context_initialized = 1;
>      s->eos = 0;
>  
> @@ -3745,6 +3783,14 @@ static int hevc_update_thread_context(AVCodecContext *dst,
>      if (ret < 0)
>          return ret;
>  
> +    ret = av_buffer_replace(&s->dovi_cfg, s0->dovi_cfg);
> +    if (ret < 0)
> +        return ret;
> +
> +    ret = ff_dovi_ctx_replace(&s->dovi_ctx, &s0->dovi_ctx);
> +    if (ret < 0)
> +        return ret;
> +
>      s->sei.frame_packing        = s0->sei.frame_packing;
>      s->sei.display_orientation  = s0->sei.display_orientation;
>      s->sei.mastering_display    = s0->sei.mastering_display;
> @@ -3801,6 +3847,7 @@ static void hevc_decode_flush(AVCodecContext *avctx)
>      HEVCContext *s = avctx->priv_data;
>      ff_hevc_flush_dpb(s);
>      ff_hevc_reset_sei(&s->sei);
> +    ff_dovi_ctx_unref(&s->dovi_ctx);
>      av_buffer_unref(&s->rpu_buf);
>      s->max_ra = INT_MAX;
>      s->eos = 1;
> diff --git a/libavcodec/hevcdec.h b/libavcodec/hevcdec.h
> index 870ff178d4..c8dde6fd17 100644
> --- a/libavcodec/hevcdec.h
> +++ b/libavcodec/hevcdec.h
> @@ -32,6 +32,7 @@
>  #include "avcodec.h"
>  #include "bswapdsp.h"
>  #include "cabac.h"
> +#include "dovi_rpu.h"
>  #include "get_bits.h"
>  #include "hevcpred.h"
>  #include "h2645_parse.h"
> @@ -574,6 +575,8 @@ typedef struct HEVCContext {
>      int nuh_layer_id;
>  
>      AVBufferRef *rpu_buf;       ///< 0 or 1 Dolby Vision RPUs.
> +    AVBufferRef *dovi_cfg;      ///< contains AVDOVIDecoderConfigurationRecord
> +    DOVIContext dovi_ctx;       ///< Dolby Vision decoding context
>  } HEVCContext;
>  
>  /**
diff mbox series

Patch

diff --git a/configure b/configure
index 68658a847f..7803aa47af 100755
--- a/configure
+++ b/configure
@@ -2826,7 +2826,7 @@  h264_decoder_suggest="error_resilience"
 hap_decoder_select="snappy texturedsp"
 hap_encoder_deps="libsnappy"
 hap_encoder_select="texturedspenc"
-hevc_decoder_select="atsc_a53 bswapdsp cabac golomb hevcparse videodsp"
+hevc_decoder_select="atsc_a53 bswapdsp cabac dovi_rpu golomb hevcparse videodsp"
 huffyuv_decoder_select="bswapdsp huffyuvdsp llviddsp"
 huffyuv_encoder_select="bswapdsp huffman huffyuvencdsp llvidencdsp"
 hymt_decoder_select="huffyuv_decoder"
diff --git a/libavcodec/hevcdec.c b/libavcodec/hevcdec.c
index 46d9edf8eb..298d89fea6 100644
--- a/libavcodec/hevcdec.c
+++ b/libavcodec/hevcdec.c
@@ -2723,6 +2723,7 @@  error:
 static int set_side_data(HEVCContext *s)
 {
     AVFrame *out = s->ref->frame;
+    int ret;
 
     if (s->sei.frame_packing.present &&
         s->sei.frame_packing.arrangement_type >= 3 &&
@@ -2967,6 +2968,9 @@  static int set_side_data(HEVCContext *s)
         s->rpu_buf = NULL;
     }
 
+    if ((ret = ff_dovi_attach_side_data(&s->dovi_ctx, out)) < 0)
+        return ret;
+
     return 0;
 }
 
@@ -3298,16 +3302,24 @@  static int decode_nal_units(HEVCContext *s, const uint8_t *buf, int length)
     if (s->pkt.nb_nals > 1 && s->pkt.nals[s->pkt.nb_nals - 1].type == HEVC_NAL_UNSPEC62 &&
         s->pkt.nals[s->pkt.nb_nals - 1].size > 2 && !s->pkt.nals[s->pkt.nb_nals - 1].nuh_layer_id
         && !s->pkt.nals[s->pkt.nb_nals - 1].temporal_id) {
+        H2645NAL *nal = &s->pkt.nals[s->pkt.nb_nals - 1];
         if (s->rpu_buf) {
             av_buffer_unref(&s->rpu_buf);
             av_log(s->avctx, AV_LOG_WARNING, "Multiple Dolby Vision RPUs found in one AU. Skipping previous.\n");
         }
 
-        s->rpu_buf = av_buffer_alloc(s->pkt.nals[s->pkt.nb_nals - 1].raw_size - 2);
+        s->rpu_buf = av_buffer_alloc(nal->raw_size - 2);
         if (!s->rpu_buf)
             return AVERROR(ENOMEM);
+        memcpy(s->rpu_buf->data, nal->raw_data + 2, nal->raw_size - 2);
 
-        memcpy(s->rpu_buf->data, s->pkt.nals[s->pkt.nb_nals - 1].raw_data + 2, s->pkt.nals[s->pkt.nb_nals - 1].raw_size - 2);
+        s->dovi_ctx.config = s->dovi_cfg ? (void *) s->dovi_cfg->data : NULL;
+        ret = ff_dovi_rpu_parse(&s->dovi_ctx, nal->data + 2, nal->size - 2);
+        if (ret < 0) {
+            av_buffer_unref(&s->rpu_buf);
+            av_log(s->avctx, AV_LOG_WARNING, "Error parsing DOVI NAL unit.\n");
+            /* ignore */
+        }
     }
 
     /* decode the NAL units */
@@ -3440,8 +3452,8 @@  static int hevc_decode_frame(AVCodecContext *avctx, void *data, int *got_output,
                              AVPacket *avpkt)
 {
     int ret;
-    size_t new_extradata_size;
-    uint8_t *new_extradata;
+    uint8_t *sd;
+    size_t sd_size;
     HEVCContext *s = avctx->priv_data;
 
     if (!avpkt->size) {
@@ -3453,14 +3465,37 @@  static int hevc_decode_frame(AVCodecContext *avctx, void *data, int *got_output,
         return 0;
     }
 
-    new_extradata = av_packet_get_side_data(avpkt, AV_PKT_DATA_NEW_EXTRADATA,
-                                            &new_extradata_size);
-    if (new_extradata && new_extradata_size > 0) {
-        ret = hevc_decode_extradata(s, new_extradata, new_extradata_size, 0);
+    sd = av_packet_get_side_data(avpkt, AV_PKT_DATA_NEW_EXTRADATA, &sd_size);
+    if (sd && sd_size > 0) {
+        ret = hevc_decode_extradata(s, sd, sd_size, 0);
         if (ret < 0)
             return ret;
     }
 
+    sd = av_packet_get_side_data(avpkt, AV_PKT_DATA_DOVI_CONF, &sd_size);
+    if (sd && sd_size > 0) {
+        if (s->dovi_cfg) {
+            /* Reuse existing buffer */
+            if ((ret = av_buffer_make_writable(&s->dovi_cfg)) < 0)
+                return ret;
+        } else {
+            /* Allocate new buffer */
+            AVDOVIDecoderConfigurationRecord *cfg;
+            size_t cfg_size;
+            cfg = av_dovi_alloc(&cfg_size);
+            if (!cfg)
+                return AVERROR(ENOMEM);
+            s->dovi_cfg = av_buffer_create((uint8_t *) cfg, cfg_size, NULL, NULL, 0);
+            if (!s->dovi_cfg) {
+                av_free(cfg);
+                return AVERROR(ENOMEM);
+            }
+        }
+
+        av_assert0(sd_size >= s->dovi_cfg->size);
+        memcpy(s->dovi_cfg->data, sd, s->dovi_cfg->size);
+    }
+
     s->ref = NULL;
     ret    = decode_nal_units(s, avpkt->data, avpkt->size);
     if (ret < 0)
@@ -3553,6 +3588,8 @@  static av_cold int hevc_decode_free(AVCodecContext *avctx)
 
     pic_arrays_free(s);
 
+    ff_dovi_ctx_unref(&s->dovi_ctx);
+    av_buffer_unref(&s->dovi_cfg);
     av_buffer_unref(&s->rpu_buf);
 
     av_freep(&s->md5_ctx);
@@ -3637,6 +3674,7 @@  static av_cold int hevc_init_context(AVCodecContext *avctx)
 
     ff_bswapdsp_init(&s->bdsp);
 
+    s->dovi_ctx.logctx = avctx;
     s->context_initialized = 1;
     s->eos = 0;
 
@@ -3745,6 +3783,14 @@  static int hevc_update_thread_context(AVCodecContext *dst,
     if (ret < 0)
         return ret;
 
+    ret = av_buffer_replace(&s->dovi_cfg, s0->dovi_cfg);
+    if (ret < 0)
+        return ret;
+
+    ret = ff_dovi_ctx_replace(&s->dovi_ctx, &s0->dovi_ctx);
+    if (ret < 0)
+        return ret;
+
     s->sei.frame_packing        = s0->sei.frame_packing;
     s->sei.display_orientation  = s0->sei.display_orientation;
     s->sei.mastering_display    = s0->sei.mastering_display;
@@ -3801,6 +3847,7 @@  static void hevc_decode_flush(AVCodecContext *avctx)
     HEVCContext *s = avctx->priv_data;
     ff_hevc_flush_dpb(s);
     ff_hevc_reset_sei(&s->sei);
+    ff_dovi_ctx_unref(&s->dovi_ctx);
     av_buffer_unref(&s->rpu_buf);
     s->max_ra = INT_MAX;
     s->eos = 1;
diff --git a/libavcodec/hevcdec.h b/libavcodec/hevcdec.h
index 870ff178d4..c8dde6fd17 100644
--- a/libavcodec/hevcdec.h
+++ b/libavcodec/hevcdec.h
@@ -32,6 +32,7 @@ 
 #include "avcodec.h"
 #include "bswapdsp.h"
 #include "cabac.h"
+#include "dovi_rpu.h"
 #include "get_bits.h"
 #include "hevcpred.h"
 #include "h2645_parse.h"
@@ -574,6 +575,8 @@  typedef struct HEVCContext {
     int nuh_layer_id;
 
     AVBufferRef *rpu_buf;       ///< 0 or 1 Dolby Vision RPUs.
+    AVBufferRef *dovi_cfg;      ///< contains AVDOVIDecoderConfigurationRecord
+    DOVIContext dovi_ctx;       ///< Dolby Vision decoding context
 } HEVCContext;
 
 /**
diff --git a/tests/ref/fate/hevc-dv-rpu b/tests/ref/fate/hevc-dv-rpu
index 416d9c51a6..1980ab13ea 100644
--- a/tests/ref/fate/hevc-dv-rpu
+++ b/tests/ref/fate/hevc-dv-rpu
@@ -5,9 +5,233 @@  side_data_type=H.26[45] User Data Unregistered SEI message
 [SIDE_DATA]
 side_data_type=Dolby Vision RPU Data
 [/SIDE_DATA]
+[SIDE_DATA]
+side_data_type=Dolby Vision Metadata
+rpu_type=2
+rpu_format=18
+vdr_rpu_profile=1
+vdr_rpu_level=0
+chroma_resampling_explicit_filter_flag=0
+coef_data_type=0
+coef_log2_denom=23
+vdr_rpu_normalized_idc=1
+bl_video_full_range_flag=0
+bl_bit_depth=10
+el_bit_depth=10
+vdr_bit_depth=12
+spatial_resampling_filter_flag=0
+el_spatial_resampling_filter_flag=0
+disable_residual_flag=1
+vdr_rpu_id=0
+mapping_color_space=0
+mapping_chroma_format_idc=0
+nlq_method_idc=-1
+nlq_method_idc_name=none
+num_x_partitions=1
+num_y_partitions=1
+[COMPONENT]
+pivots=63 132 362 618 874 911 927 935 942
+[SECTION]
+mapping_idc=0
+mapping_idc_name=polynomial
+poly_order=2
+poly_coef=-409680 16721463 -20276640
+[/SECTION]
+[SECTION]
+mapping_idc=0
+mapping_idc_name=polynomial
+poly_order=2
+poly_coef=-119056 13575212 -12867889
+[/SECTION]
+[SECTION]
+mapping_idc=0
+mapping_idc_name=polynomial
+poly_order=2
+poly_coef=1317527 5338528 -948122
+[/SECTION]
+[SECTION]
+mapping_idc=0
+mapping_idc_name=polynomial
+poly_order=2
+poly_coef=2119979 2065496 2288524
+[/SECTION]
+[SECTION]
+mapping_idc=0
+mapping_idc_name=polynomial
+poly_order=2
+poly_coef=7982780 -11367226 9973944
+[/SECTION]
+[SECTION]
+mapping_idc=0
+mapping_idc_name=polynomial
+poly_order=2
+poly_coef=53792084 -114243184 67724328
+[/SECTION]
+[SECTION]
+mapping_idc=0
+mapping_idc_name=polynomial
+poly_order=2
+poly_coef=112973872 -244837568 139764480
+[/SECTION]
+[SECTION]
+mapping_idc=0
+mapping_idc_name=polynomial
+poly_order=2
+poly_coef=236828416 -518849056 291306944
+[/SECTION]
+[/COMPONENT]
+[COMPONENT]
+pivots=0 1023
+[SECTION]
+mapping_idc=1
+mapping_idc_name=mmr
+mmr_order=3
+mmr_constant=-4200453
+mmr_coef=9094176 31944476 739652 -27103344 -3431599 -11015088 22758042 -2028643 -30021218 -906885 26272992 7291404 16488745 -78087176 -1487777 12496543 762460 -4281948 -5768035 -7843163 103636960
+[/SECTION]
+[/COMPONENT]
+[COMPONENT]
+pivots=0 1023
+[SECTION]
+mapping_idc=1
+mapping_idc_name=mmr
+mmr_order=3
+mmr_constant=-10387889
+mmr_coef=29604202 3214133 46206184 -8564340 -53383996 1628407 5426045 -21634202 -5251953 -50812292 19221972 76726656 -425892 -35041240 5917361 2863974 25030554 -14404292 -41230120 1229046 53575140
+[/SECTION]
+[/COMPONENT]
+dm_metadata_id=0
+scene_refresh_flag=1
+ycc_to_rgb_matrix=9574/8192 0/8192 13802/8192 9574/8192 -1540/8192 -5348/8192 9574/8192 17610/8192 0/8192
+ycc_to_rgb_offset=16777216/268435456 134217728/268435456 134217728/268435456
+rgb_to_lms_matrix=7222/16384 8771/16384 390/16384 2654/16384 12430/16384 1300/16384 0/16384 422/16384 15962/16384
+signal_eotf=65535
+signal_eotf_param0=0
+signal_eotf_param1=0
+signal_eotf_param2=0
+signal_bit_depth=12
+signal_color_space=0
+signal_chroma_format=0
+signal_full_range_flag=1
+source_min_pq=0
+source_max_pq=3079
+source_diagonal=42
+[/SIDE_DATA]
 [/FRAME]
 [FRAME]
 [SIDE_DATA]
 side_data_type=Dolby Vision RPU Data
 [/SIDE_DATA]
+[SIDE_DATA]
+side_data_type=Dolby Vision Metadata
+rpu_type=2
+rpu_format=18
+vdr_rpu_profile=1
+vdr_rpu_level=0
+chroma_resampling_explicit_filter_flag=0
+coef_data_type=0
+coef_log2_denom=23
+vdr_rpu_normalized_idc=1
+bl_video_full_range_flag=0
+bl_bit_depth=10
+el_bit_depth=10
+vdr_bit_depth=12
+spatial_resampling_filter_flag=0
+el_spatial_resampling_filter_flag=0
+disable_residual_flag=1
+vdr_rpu_id=0
+mapping_color_space=0
+mapping_chroma_format_idc=0
+nlq_method_idc=-1
+nlq_method_idc_name=none
+num_x_partitions=1
+num_y_partitions=1
+[COMPONENT]
+pivots=63 132 362 618 874 911 927 935 942
+[SECTION]
+mapping_idc=0
+mapping_idc_name=polynomial
+poly_order=2
+poly_coef=-409680 16721463 -20276640
+[/SECTION]
+[SECTION]
+mapping_idc=0
+mapping_idc_name=polynomial
+poly_order=2
+poly_coef=-119056 13575212 -12867889
+[/SECTION]
+[SECTION]
+mapping_idc=0
+mapping_idc_name=polynomial
+poly_order=2
+poly_coef=1317527 5338528 -948122
+[/SECTION]
+[SECTION]
+mapping_idc=0
+mapping_idc_name=polynomial
+poly_order=2
+poly_coef=2119979 2065496 2288524
+[/SECTION]
+[SECTION]
+mapping_idc=0
+mapping_idc_name=polynomial
+poly_order=2
+poly_coef=7982780 -11367226 9973944
+[/SECTION]
+[SECTION]
+mapping_idc=0
+mapping_idc_name=polynomial
+poly_order=2
+poly_coef=53792084 -114243184 67724328
+[/SECTION]
+[SECTION]
+mapping_idc=0
+mapping_idc_name=polynomial
+poly_order=2
+poly_coef=112973872 -244837568 139764480
+[/SECTION]
+[SECTION]
+mapping_idc=0
+mapping_idc_name=polynomial
+poly_order=2
+poly_coef=236828416 -518849056 291306944
+[/SECTION]
+[/COMPONENT]
+[COMPONENT]
+pivots=0 1023
+[SECTION]
+mapping_idc=1
+mapping_idc_name=mmr
+mmr_order=3
+mmr_constant=-4200453
+mmr_coef=9094176 31944476 739652 -27103344 -3431599 -11015088 22758042 -2028643 -30021218 -906885 26272992 7291404 16488745 -78087176 -1487777 12496543 762460 -4281948 -5768035 -7843163 103636960
+[/SECTION]
+[/COMPONENT]
+[COMPONENT]
+pivots=0 1023
+[SECTION]
+mapping_idc=1
+mapping_idc_name=mmr
+mmr_order=3
+mmr_constant=-10387889
+mmr_coef=29604202 3214133 46206184 -8564340 -53383996 1628407 5426045 -21634202 -5251953 -50812292 19221972 76726656 -425892 -35041240 5917361 2863974 25030554 -14404292 -41230120 1229046 53575140
+[/SECTION]
+[/COMPONENT]
+dm_metadata_id=0
+scene_refresh_flag=1
+ycc_to_rgb_matrix=9574/8192 0/8192 13802/8192 9574/8192 -1540/8192 -5348/8192 9574/8192 17610/8192 0/8192
+ycc_to_rgb_offset=16777216/268435456 134217728/268435456 134217728/268435456
+rgb_to_lms_matrix=7222/16384 8771/16384 390/16384 2654/16384 12430/16384 1300/16384 0/16384 422/16384 15962/16384
+signal_eotf=65535
+signal_eotf_param0=0
+signal_eotf_param1=0
+signal_eotf_param2=0
+signal_bit_depth=12
+signal_color_space=0
+signal_chroma_format=0
+signal_full_range_flag=1
+source_min_pq=0
+source_max_pq=3079
+source_diagonal=42
+[/SIDE_DATA]
 [/FRAME]