diff mbox series

[FFmpeg-devel,2/3] avcodec/hevcdec: Export Dolby Vision RPUs as side data

Message ID 20211021134843.1436241-3-derek.buitenhuis@gmail.com
State New
Headers show
Series Dolby Vision RPU Side Data | expand

Checks

Context Check Description
andriy/make_x86 success Make finished
andriy/make_fate_x86 success Make fate finished
andriy/make_ppc success Make finished
andriy/make_fate_ppc success Make fate finished

Commit Message

Derek Buitenhuis Oct. 21, 2021, 1:48 p.m. UTC
Signed-off-by: Derek Buitenhuis <derek.buitenhuis@gmail.com>
---
* The NAL reordering approach is a result of discussions with Anton and James
  a few months ago.
* I've put the NAL reordering in ff_h2645_packet_split, rather than a bitstream
  filter for a few reasons:
    * I don't think having a decoder's behavior rely on the presence of a bitstream
      filter is good architecture / design.
    * This spliting is only usd or useful to decoding.
---
 libavcodec/h2645_parse.c | 28 ++++++++++++++++++++++++++++
 libavcodec/hevcdec.c     | 32 ++++++++++++++++++++++++++++++++
 libavcodec/hevcdec.h     |  3 +++
 libavcodec/version.h     |  2 +-
 4 files changed, 64 insertions(+), 1 deletion(-)

Comments

James Almer Oct. 23, 2021, 2:18 p.m. UTC | #1
On 10/21/2021 10:48 AM, Derek Buitenhuis wrote:
> Signed-off-by: Derek Buitenhuis <derek.buitenhuis@gmail.com>
> ---
> * The NAL reordering approach is a result of discussions with Anton and James
>    a few months ago.
> * I've put the NAL reordering in ff_h2645_packet_split, rather than a bitstream
>    filter for a few reasons:
>      * I don't think having a decoder's behavior rely on the presence of a bitstream
>        filter is good architecture / design.
>      * This spliting is only usd or useful to decoding.
> ---
>   libavcodec/h2645_parse.c | 28 ++++++++++++++++++++++++++++
>   libavcodec/hevcdec.c     | 32 ++++++++++++++++++++++++++++++++
>   libavcodec/hevcdec.h     |  3 +++
>   libavcodec/version.h     |  2 +-
>   4 files changed, 64 insertions(+), 1 deletion(-)
> 
> diff --git a/libavcodec/h2645_parse.c b/libavcodec/h2645_parse.c
> index 6fbe97ad4a..04348c3a73 100644
> --- a/libavcodec/h2645_parse.c
> +++ b/libavcodec/h2645_parse.c
> @@ -517,6 +517,34 @@ int ff_h2645_packet_split(H2645Packet *pkt, const uint8_t *buf, int length,
>           pkt->nb_nals++;
>       }
>   
> +    /*
> +     * Due to limitions in avcodec's current frame threading code, it cannot
> +     * handle adding frame side data in any place except before the slice has
> +     * started decoding. Since Dolby Vision RPUs (which appear as NAL type 62)
> +     * are appended to the AU, this is a poblematic. As a hack around this, we
> +     * move any RPUs to before the first slice NAL.
> +     */
> +    if (codec_id == AV_CODEC_ID_HEVC && pkt->nb_nals > 1 && pkt->nals[pkt->nb_nals - 1].type == HEVC_NAL_UNSPEC62 &&
> +        pkt->nals[pkt->nb_nals - 1].data[0] == 0x7C && pkt->nals[pkt->nb_nals - 1].data[1] == 0x01) {

0x7c01 is just the NAL header that signals type 62, nuh_layer_id 0 and 
nuh_temporal_id 0.

forbidden_zero_bit    =      0
nal_unit_type         = 111110
nuh_layer_id          = 000000
nuh_temporal_id_plus1 =    001

So the last two checks can be changed to !pkt->nals[pkt->nb_nals - 
1].nuh_layer_id && !pkt->nals[pkt->nb_nals - 1].temporal_id

> +
> +        int first_non_slice = 0;
> +        H2645NAL *tmp = av_malloc(pkt->nb_nals * sizeof(H2645NAL));
> +        if (!tmp)
> +            return AVERROR(ENOMEM);
> +
> +        for (int i = pkt->nb_nals - 1; i >= 0; i--) {
> +            if (pkt->nals[i].type < HEVC_NAL_VPS)
> +                first_non_slice = i;
> +            tmp[i] = pkt->nals[i];
> +        }
> +
> +        pkt->nals[first_non_slice] = pkt->nals[pkt->nb_nals - 1];
> +        for (int i = first_non_slice + 1; i < pkt->nb_nals; i++)
> +            pkt->nals[i] = tmp[i - 1];
> +
> +        av_free(tmp);

This is horrible, but i guess there's no alternative without 
autoinserting a bsf.

> +    }
> +
>       return 0;
>   }
>   
> diff --git a/libavcodec/hevcdec.c b/libavcodec/hevcdec.c
> index 246ffd7d80..5ccee2aa4e 100644
> --- a/libavcodec/hevcdec.c
> +++ b/libavcodec/hevcdec.c
> @@ -2950,6 +2950,17 @@ static int set_side_data(HEVCContext *s)
>           }
>       }
>   
> +    if (s->rpu_buf) {
> +        AVFrameSideData *rpu = av_frame_new_side_data(out, AV_FRAME_DATA_DOVI_RPU, s->rpu_buf_size);
> +        if (!rpu)
> +            return AVERROR(ENOMEM);
> +
> +        memcpy(rpu->data, s->rpu_buf, s->rpu_buf_size);
> +
> +        av_freep(&s->rpu_buf);
> +        s->rpu_buf_size = 0;
> +    }
> +
>       return 0;
>   }
>   
> @@ -3224,6 +3235,23 @@ static int decode_nal_unit(HEVCContext *s, const H2645NAL *nal)
>       case HEVC_NAL_AUD:
>       case HEVC_NAL_FD_NUT:
>           break;
> +    case HEVC_NAL_UNSPEC62:
> +        /*
> +         * Check for RPU delimiter.
> +         *
> +         * Dolby Vision RPUs masquerade as unregistered NALs of type 62 and start with
> +         * 0x7C01.

What i said above. It's nothing Dolby specific. Any NAL type 62 could be 
like that. If there's no spec we could use to actually parse the 
bitstream after the NAL header, then we will end up propagating pretty 
much anything using NAL type 62 as Dolby RPU.

> +         */
> +        if (nal->size > 2 && nal->data[0] == 0x7C && nal->data[1] == 0x01) {
> +            s->rpu_buf = av_malloc(nal->raw_size - 2);

You should use an AVBufferRef for this that you then attach to the frame 
using av_frame_new_side_data_from_buf().

And you need to keep threads in sync by replacing the dst context's 
rpu_buf with the src context rpu_buf with av_buffer_replace() in 
hevc_update_thread_context().

> +            if (!s->rpu_buf)
> +                return AVERROR(ENOMEM);
> +
> +            memcpy(s->rpu_buf, nal->raw_data + 2, nal->raw_size - 2);
> +
> +            s->rpu_buf_size = nal->raw_size - 2;
> +        }
> +        break;
>       default:
>           av_log(s->avctx, AV_LOG_INFO,
>                  "Skipping NAL unit %d\n", s->nal_unit_type);
> @@ -3512,6 +3540,8 @@ static av_cold int hevc_decode_free(AVCodecContext *avctx)
>   
>       pic_arrays_free(s);
>   
> +    av_freep(&s->rpu_buf);
> +
>       av_freep(&s->md5_ctx);
>   
>       av_freep(&s->cabac_state);
> @@ -3754,6 +3784,8 @@ static void hevc_decode_flush(AVCodecContext *avctx)
>       HEVCContext *s = avctx->priv_data;
>       ff_hevc_flush_dpb(s);
>       ff_hevc_reset_sei(&s->sei);
> +    av_freep(&s->rpu_buf);
> +    s->rpu_buf_size = 0;
>       s->max_ra = INT_MAX;
>       s->eos = 1;
>   }
> diff --git a/libavcodec/hevcdec.h b/libavcodec/hevcdec.h
> index 77fdf90da1..8a9b516759 100644
> --- a/libavcodec/hevcdec.h
> +++ b/libavcodec/hevcdec.h
> @@ -572,6 +572,9 @@ typedef struct HEVCContext {
>   
>       int nal_length_size;    ///< Number of bytes used for nal length (1, 2 or 4)
>       int nuh_layer_id;
> +
> +    uint8_t *rpu_buf;       ///< 0 or 1 Dolby Vision RPUs.
> +    size_t rpu_buf_size;
>   } HEVCContext;
>   
>   /**
> diff --git a/libavcodec/version.h b/libavcodec/version.h
> index 74b8baa5f3..76af066d32 100644
> --- a/libavcodec/version.h
> +++ b/libavcodec/version.h
> @@ -28,7 +28,7 @@
>   #include "libavutil/version.h"
>   
>   #define LIBAVCODEC_VERSION_MAJOR  59
> -#define LIBAVCODEC_VERSION_MINOR  12
> +#define LIBAVCODEC_VERSION_MINOR  13
>   #define LIBAVCODEC_VERSION_MICRO 100
>   
>   #define LIBAVCODEC_VERSION_INT  AV_VERSION_INT(LIBAVCODEC_VERSION_MAJOR, \
Derek Buitenhuis Oct. 23, 2021, 5:52 p.m. UTC | #2
On 10/23/2021 3:18 PM, James Almer wrote:
> 0x7c01 is just the NAL header that signals type 62, nuh_layer_id 0 and 
> nuh_temporal_id 0.
> 
> forbidden_zero_bit    =      0
> nal_unit_type         = 111110
> nuh_layer_id          = 000000
> nuh_temporal_id_plus1 =    001
> 
> So the last two checks can be changed to !pkt->nals[pkt->nb_nals - 
> 1].nuh_layer_id && !pkt->nals[pkt->nb_nals - 1].temporal_id

Do'h, will fix.
 
> This is horrible, but i guess there's no alternative without 
> autoinserting a bsf.

Yeah, I couldn't think of one - and the BSF option seemed even worse.

> What i said above. It's nothing Dolby specific. Any NAL type 62 could be 
> like that. If there's no spec we could use to actually parse the 
> bitstream after the NAL header, then we will end up propagating pretty 
> much anything using NAL type 62 as Dolby RPU.

Indeed, there is no spec. As far as I know, nothing else uses UNSPEC62,
though, and this is consistent with what e.g. Dolby's open source muxer
and mkvtoolnix do.

What do you suggest? Rename the side data to _UNSPEC62 maybe and leave it
to the caller to handle it?

Or, what we could do is check for the existence of stream side data containing
the DOVI configuration record, which is needed to use these RPUs, I think.

>> +         */
>> +        if (nal->size > 2 && nal->data[0] == 0x7C && nal->data[1] == 0x01) {
>> +            s->rpu_buf = av_malloc(nal->raw_size - 2);
> 
> You should use an AVBufferRef for this that you then attach to the frame 
> using av_frame_new_side_data_from_buf().

Will do.

> And you need to keep threads in sync by replacing the dst context's 
> rpu_buf with the src context rpu_buf with av_buffer_replace() in 
> hevc_update_thread_context().

Will do. Interestingly, I haven't run into any issues here with MT at all,
in months of use. Luck?

- Derek
James Almer Oct. 23, 2021, 6:15 p.m. UTC | #3
On 10/23/2021 2:52 PM, Derek Buitenhuis wrote:
> On 10/23/2021 3:18 PM, James Almer wrote:
>> 0x7c01 is just the NAL header that signals type 62, nuh_layer_id 0 and
>> nuh_temporal_id 0.
>>
>> forbidden_zero_bit    =      0
>> nal_unit_type         = 111110
>> nuh_layer_id          = 000000
>> nuh_temporal_id_plus1 =    001
>>
>> So the last two checks can be changed to !pkt->nals[pkt->nb_nals -
>> 1].nuh_layer_id && !pkt->nals[pkt->nb_nals - 1].temporal_id
> 
> Do'h, will fix.

That being said, would Dolby RPU NALUs like this using other values for 
temporal and layer id be valid? Or can we just assume that's never 
happening?

>   
>> This is horrible, but i guess there's no alternative without
>> autoinserting a bsf.
> 
> Yeah, I couldn't think of one - and the BSF option seemed even worse.
> 
>> What i said above. It's nothing Dolby specific. Any NAL type 62 could be
>> like that. If there's no spec we could use to actually parse the
>> bitstream after the NAL header, then we will end up propagating pretty
>> much anything using NAL type 62 as Dolby RPU.
> 
> Indeed, there is no spec. As far as I know, nothing else uses UNSPEC62,
> though, and this is consistent with what e.g. Dolby's open source muxer
> and mkvtoolnix do.
> 
> What do you suggest? Rename the side data to _UNSPEC62 maybe and leave it
> to the caller to handle it?

No, i prefer having the side data be about Dolby Vision RPU. Who knows, 
maybe in the future Dolby realizes they had the Unregistered and even 
Registered User Data SEI readily available for this kind of proprietary 
information and start using that instead...

> 
> Or, what we could do is check for the existence of stream side data containing
> the DOVI configuration record, which is needed to use these RPUs, I think.

That sounds ideal, yes.

> 
>>> +         */
>>> +        if (nal->size > 2 && nal->data[0] == 0x7C && nal->data[1] == 0x01) {
>>> +            s->rpu_buf = av_malloc(nal->raw_size - 2);
>>
>> You should use an AVBufferRef for this that you then attach to the frame
>> using av_frame_new_side_data_from_buf().
> 
> Will do.
> 
>> And you need to keep threads in sync by replacing the dst context's
>> rpu_buf with the src context rpu_buf with av_buffer_replace() in
>> hevc_update_thread_context().
> 
> Will do. Interestingly, I haven't run into any issues here with MT at all,
> in months of use. Luck?

I guess it may be because there's one such NALU per AU in this sample, 
so no frame depends on other threads having finished parsing/decoding 
their own frames and syncing the stuff stored in their thread-specific 
contexts.

> 
> - Derek
> _______________________________________________
> ffmpeg-devel mailing list
> ffmpeg-devel@ffmpeg.org
> https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
> 
> To unsubscribe, visit link above, or email
> ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".
>
Jan Ekström Oct. 23, 2021, 6:25 p.m. UTC | #4
On Sat, Oct 23, 2021 at 9:16 PM James Almer <jamrial@gmail.com> wrote:
>
> On 10/23/2021 2:52 PM, Derek Buitenhuis wrote:
> > On 10/23/2021 3:18 PM, James Almer wrote:
> >> 0x7c01 is just the NAL header that signals type 62, nuh_layer_id 0 and
> >> nuh_temporal_id 0.
> >>
> >> forbidden_zero_bit    =      0
> >> nal_unit_type         = 111110
> >> nuh_layer_id          = 000000
> >> nuh_temporal_id_plus1 =    001
> >>
> >> So the last two checks can be changed to !pkt->nals[pkt->nb_nals -
> >> 1].nuh_layer_id && !pkt->nals[pkt->nb_nals - 1].temporal_id
> >
> > Do'h, will fix.
>
> That being said, would Dolby RPU NALUs like this using other values for
> temporal and layer id be valid? Or can we just assume that's never
> happening?
>
> >
> >> This is horrible, but i guess there's no alternative without
> >> autoinserting a bsf.
> >
> > Yeah, I couldn't think of one - and the BSF option seemed even worse.
> >
> >> What i said above. It's nothing Dolby specific. Any NAL type 62 could be
> >> like that. If there's no spec we could use to actually parse the
> >> bitstream after the NAL header, then we will end up propagating pretty
> >> much anything using NAL type 62 as Dolby RPU.
> >
> > Indeed, there is no spec. As far as I know, nothing else uses UNSPEC62,
> > though, and this is consistent with what e.g. Dolby's open source muxer
> > and mkvtoolnix do.
> >
> > What do you suggest? Rename the side data to _UNSPEC62 maybe and leave it
> > to the caller to handle it?
>
> No, i prefer having the side data be about Dolby Vision RPU. Who knows,
> maybe in the future Dolby realizes they had the Unregistered and even
> Registered User Data SEI readily available for this kind of proprietary
> information and start using that instead...
>

They already utilize something like this for their ST.2094-10
brightness metadata (which is actually specified through ETSI etc
[1]), which hilariously is also called "DoVi" in their marketing.

Since effectively D has taken over NAL unit 62 for this stuff, just
call it RPU_BUFFER or so, and take in everything NAL unit 62?

Jan

[1] https://www.etsi.org/deliver/etsi_ts/103500_103599/103572/01.03.01_60/ts_103572v010301p.pdf
Derek Buitenhuis Oct. 23, 2021, 6:33 p.m. UTC | #5
On 10/23/2021 7:15 PM, James Almer wrote:
>> Do'h, will fix.
> 
> That being said, would Dolby RPU NALUs like this using other values for 
> temporal and layer id be valid? Or can we just assume that's never 
> happening?

They also use 63 for an EL, according to other codebases an docs. I didn't
include it in this patch, since I have no such samples or way to test.


> No, i prefer having the side data be about Dolby Vision RPU. Who knows, 
> maybe in the future Dolby realizes they had the Unregistered and even 
> Registered User Data SEI readily available for this kind of proprietary 
> information and start using that instead...

OK.
>> Or, what we could do is check for the existence of stream side data containing
>> the DOVI configuration record, which is needed to use these RPUs, I think.
> 
> That sounds ideal, yes.

Will do.

> I guess it may be because there's one such NALU per AU in this sample, 
> so no frame depends on other threads having finished parsing/decoding 
> their own frames and syncing the stuff stored in their thread-specific 
> contexts.

There must be a exactly one RPU per AU, is my understanding - anything else is
malformed.

I guess malformed files would run into trouble, so this should be done regardless.

- Derek
Derek Buitenhuis Oct. 23, 2021, 6:35 p.m. UTC | #6
On 10/23/2021 7:25 PM, Jan Ekström wrote:
> Since effectively D has taken over NAL unit 62 for this stuff, just
> call it RPU_BUFFER or so, and take in everything NAL unit 62?

Sounds OK to me if others are OK.

- Derek
diff mbox series

Patch

diff --git a/libavcodec/h2645_parse.c b/libavcodec/h2645_parse.c
index 6fbe97ad4a..04348c3a73 100644
--- a/libavcodec/h2645_parse.c
+++ b/libavcodec/h2645_parse.c
@@ -517,6 +517,34 @@  int ff_h2645_packet_split(H2645Packet *pkt, const uint8_t *buf, int length,
         pkt->nb_nals++;
     }
 
+    /*
+     * Due to limitions in avcodec's current frame threading code, it cannot
+     * handle adding frame side data in any place except before the slice has
+     * started decoding. Since Dolby Vision RPUs (which appear as NAL type 62)
+     * are appended to the AU, this is a poblematic. As a hack around this, we
+     * move any RPUs to before the first slice NAL.
+     */
+    if (codec_id == AV_CODEC_ID_HEVC && pkt->nb_nals > 1 && pkt->nals[pkt->nb_nals - 1].type == HEVC_NAL_UNSPEC62 &&
+        pkt->nals[pkt->nb_nals - 1].data[0] == 0x7C && pkt->nals[pkt->nb_nals - 1].data[1] == 0x01) {
+
+        int first_non_slice = 0;
+        H2645NAL *tmp = av_malloc(pkt->nb_nals * sizeof(H2645NAL));
+        if (!tmp)
+            return AVERROR(ENOMEM);
+
+        for (int i = pkt->nb_nals - 1; i >= 0; i--) {
+            if (pkt->nals[i].type < HEVC_NAL_VPS)
+                first_non_slice = i;
+            tmp[i] = pkt->nals[i];
+        }
+
+        pkt->nals[first_non_slice] = pkt->nals[pkt->nb_nals - 1];
+        for (int i = first_non_slice + 1; i < pkt->nb_nals; i++)
+            pkt->nals[i] = tmp[i - 1];
+
+        av_free(tmp);
+    }
+
     return 0;
 }
 
diff --git a/libavcodec/hevcdec.c b/libavcodec/hevcdec.c
index 246ffd7d80..5ccee2aa4e 100644
--- a/libavcodec/hevcdec.c
+++ b/libavcodec/hevcdec.c
@@ -2950,6 +2950,17 @@  static int set_side_data(HEVCContext *s)
         }
     }
 
+    if (s->rpu_buf) {
+        AVFrameSideData *rpu = av_frame_new_side_data(out, AV_FRAME_DATA_DOVI_RPU, s->rpu_buf_size);
+        if (!rpu)
+            return AVERROR(ENOMEM);
+
+        memcpy(rpu->data, s->rpu_buf, s->rpu_buf_size);
+
+        av_freep(&s->rpu_buf);
+        s->rpu_buf_size = 0;
+    }
+
     return 0;
 }
 
@@ -3224,6 +3235,23 @@  static int decode_nal_unit(HEVCContext *s, const H2645NAL *nal)
     case HEVC_NAL_AUD:
     case HEVC_NAL_FD_NUT:
         break;
+    case HEVC_NAL_UNSPEC62:
+        /*
+         * Check for RPU delimiter.
+         *
+         * Dolby Vision RPUs masquerade as unregistered NALs of type 62 and start with
+         * 0x7C01.
+         */
+        if (nal->size > 2 && nal->data[0] == 0x7C && nal->data[1] == 0x01) {
+            s->rpu_buf = av_malloc(nal->raw_size - 2);
+            if (!s->rpu_buf)
+                return AVERROR(ENOMEM);
+
+            memcpy(s->rpu_buf, nal->raw_data + 2, nal->raw_size - 2);
+
+            s->rpu_buf_size = nal->raw_size - 2;
+        }
+        break;
     default:
         av_log(s->avctx, AV_LOG_INFO,
                "Skipping NAL unit %d\n", s->nal_unit_type);
@@ -3512,6 +3540,8 @@  static av_cold int hevc_decode_free(AVCodecContext *avctx)
 
     pic_arrays_free(s);
 
+    av_freep(&s->rpu_buf);
+
     av_freep(&s->md5_ctx);
 
     av_freep(&s->cabac_state);
@@ -3754,6 +3784,8 @@  static void hevc_decode_flush(AVCodecContext *avctx)
     HEVCContext *s = avctx->priv_data;
     ff_hevc_flush_dpb(s);
     ff_hevc_reset_sei(&s->sei);
+    av_freep(&s->rpu_buf);
+    s->rpu_buf_size = 0;
     s->max_ra = INT_MAX;
     s->eos = 1;
 }
diff --git a/libavcodec/hevcdec.h b/libavcodec/hevcdec.h
index 77fdf90da1..8a9b516759 100644
--- a/libavcodec/hevcdec.h
+++ b/libavcodec/hevcdec.h
@@ -572,6 +572,9 @@  typedef struct HEVCContext {
 
     int nal_length_size;    ///< Number of bytes used for nal length (1, 2 or 4)
     int nuh_layer_id;
+
+    uint8_t *rpu_buf;       ///< 0 or 1 Dolby Vision RPUs.
+    size_t rpu_buf_size;
 } HEVCContext;
 
 /**
diff --git a/libavcodec/version.h b/libavcodec/version.h
index 74b8baa5f3..76af066d32 100644
--- a/libavcodec/version.h
+++ b/libavcodec/version.h
@@ -28,7 +28,7 @@ 
 #include "libavutil/version.h"
 
 #define LIBAVCODEC_VERSION_MAJOR  59
-#define LIBAVCODEC_VERSION_MINOR  12
+#define LIBAVCODEC_VERSION_MINOR  13
 #define LIBAVCODEC_VERSION_MICRO 100
 
 #define LIBAVCODEC_VERSION_INT  AV_VERSION_INT(LIBAVCODEC_VERSION_MAJOR, \