diff mbox series

[FFmpeg-devel,2/3,v2] avcodec/hevcdec: Export Dolby Vision RPUs as side data

Message ID 20211101203227.1648805-3-derek.buitenhuis@gmail.com
State New
Headers show
Series Dolby Vision RPU Side Data | expand

Checks

Context Check Description
andriy/make_x86 success Make finished
andriy/make_fate_x86 success Make fate finished
andriy/make_ppc success Make finished
andriy/make_fate_ppc success Make fate finished

Commit Message

Derek Buitenhuis Nov. 1, 2021, 8:32 p.m. UTC
Signed-off-by: Derek Buitenhuis <derek.buitenhuis@gmail.com>
---
 libavcodec/h2645_parse.c | 28 ++++++++++++++++++++++++++++
 libavcodec/hevcdec.c     | 29 +++++++++++++++++++++++++++++
 libavcodec/hevcdec.h     |  2 ++
 libavcodec/version.h     |  2 +-
 4 files changed, 60 insertions(+), 1 deletion(-)

Comments

Andreas Rheinhardt Nov. 6, 2021, 11:36 p.m. UTC | #1
Derek Buitenhuis:
> Signed-off-by: Derek Buitenhuis <derek.buitenhuis@gmail.com>
> ---
>  libavcodec/h2645_parse.c | 28 ++++++++++++++++++++++++++++
>  libavcodec/hevcdec.c     | 29 +++++++++++++++++++++++++++++
>  libavcodec/hevcdec.h     |  2 ++
>  libavcodec/version.h     |  2 +-
>  4 files changed, 60 insertions(+), 1 deletion(-)
> 
> diff --git a/libavcodec/h2645_parse.c b/libavcodec/h2645_parse.c
> index 6fbe97ad4a..7adf5b7ef1 100644
> --- a/libavcodec/h2645_parse.c
> +++ b/libavcodec/h2645_parse.c
> @@ -517,6 +517,34 @@ int ff_h2645_packet_split(H2645Packet *pkt, const uint8_t *buf, int length,
>          pkt->nb_nals++;
>      }
>  
> +    /*
> +     * Due to limitions in avcodec's current frame threading code, it cannot
> +     * handle adding frame side data in any place except before the slice has
> +     * started decoding. Since Dolby Vision RPUs (which appear as NAL type 62)
> +     * are appended to the AU, this is a poblematic. As a hack around this, we
> +     * move any RPUs to before the first slice NAL.
> +     */
> +    if (codec_id == AV_CODEC_ID_HEVC && pkt->nb_nals > 1 && pkt->nals[pkt->nb_nals - 1].type == HEVC_NAL_UNSPEC62 &&

1. This code is also used CBS, not only the HEVC decoder. So
unconditionally moving the NAL is unacceptable.
(Does this type of NAL actually abide by the typical 0x03 escaping
scheme? If not, CBS has a problem.)

> +        !pkt->nals[pkt->nb_nals - 1].nuh_layer_id && !pkt->nals[pkt->nb_nals - 1].temporal_id) {
> +
> +        int first_non_slice = 0;
> +        H2645NAL *tmp = av_malloc(pkt->nb_nals * sizeof(H2645NAL));
> +        if (!tmp)
> +            return AVERROR(ENOMEM);
> +
> +        for (int i = pkt->nb_nals - 1; i >= 0; i--) {
> +            if (pkt->nals[i].type < HEVC_NAL_VPS)
> +                first_non_slice = i;
> +            tmp[i] = pkt->nals[i];
> +        }
> +
> +        pkt->nals[first_non_slice] = pkt->nals[pkt->nb_nals - 1];
> +        for (int i = first_non_slice + 1; i < pkt->nb_nals; i++)
> +            pkt->nals[i] = tmp[i - 1];
> +
> +        av_free(tmp);
> +    }

2. This is unnecessarily complicated:
    H2645NAL tmp = pkt->nals[pkt->nb_nals - 1];
    memmove(&pkt->nals[first_non_slice + 1],
            &pkt->nals[first_non_slice],
            (pkt->nb_nals - 1 - first_non_slice) * sizeof(*pkt->nals));
    pkt->nals[first_non_slice] = tmp;
(The naming of first_non_slice is actually misleading: If there are
slices, then it is the index of the first slice before moving. It would
be easier to use an approach as follows:
    int dst_index;
    for (dst_index = 0; dst_index < pkt->nb_nals - 1; dst_index)
        if (pkt->nals[dst_index] < HEVC_NAL_VPS)
            break;
)

> +
>      return 0;
>  }
>  
> diff --git a/libavcodec/hevcdec.c b/libavcodec/hevcdec.c
> index 246ffd7d80..dd286deaa7 100644
> --- a/libavcodec/hevcdec.c
> +++ b/libavcodec/hevcdec.c
> @@ -2950,6 +2950,14 @@ static int set_side_data(HEVCContext *s)
>          }
>      }
>  
> +    if (s->rpu_buf) {
> +        AVFrameSideData *rpu = av_frame_new_side_data_from_buf(out, AV_FRAME_DATA_DOVI_RPU_BUFFER, s->rpu_buf);
> +        if (!rpu)
> +            return AVERROR(ENOMEM);
> +
> +        s->rpu_buf = NULL;
> +    }
> +
>      return 0;
>  }
>  
> @@ -3224,6 +3232,20 @@ static int decode_nal_unit(HEVCContext *s, const H2645NAL *nal)
>      case HEVC_NAL_AUD:
>      case HEVC_NAL_FD_NUT:
>          break;
> +    case HEVC_NAL_UNSPEC62:
> +        /*
> +         * Check for RPU delimiter.
> +         *
> +         * Dolby Vision RPUs masquerade as unregistered NALs of type 62.
> +         */
> +        if (nal->size > 2 && !nal->nuh_layer_id && !nal->temporal_id) {
> +            s->rpu_buf = av_buffer_alloc(nal->raw_size - 2);

3. This may leak in case there are multiple HEVC_NAL_UNSPEC62 in an
H2645Packet. (Don't know whether this is legal, but it is unchecked.)

4. My preferred solution for this is adding the following after the call
to ff_h2645_packet_split():

    if (pkt->nb_nals > 1 && pkt->nals[pkt->nb_nals - 1].type ==
HEVC_NAL_UNSPEC62 && nal->size > 2 && !nal->nuh_layer_id &&
!nal->temporal_id) {
        // create rpu_buf here

This is less of a hack than reordering the nalus.
(This of course also needs a check to rule out leaks: After all, the
preceding packet might have given us an rpu, but no first slice. Btw:
That's the reason why this buffer is synced between threads, isn't it?)

> +            if (!s->rpu_buf)
> +                return AVERROR(ENOMEM);
> +
> +            memcpy(s->rpu_buf->data, nal->raw_data + 2, nal->raw_size - 2);
> +        }
> +        break;
>      default:
>          av_log(s->avctx, AV_LOG_INFO,
>                 "Skipping NAL unit %d\n", s->nal_unit_type);
Derek Buitenhuis Nov. 9, 2021, 3:54 p.m. UTC | #2
On 11/6/2021 11:36 PM, Andreas Rheinhardt wrote:
>> +    /*
>> +     * Due to limitions in avcodec's current frame threading code, it cannot
>> +     * handle adding frame side data in any place except before the slice has
>> +     * started decoding. Since Dolby Vision RPUs (which appear as NAL type 62)
>> +     * are appended to the AU, this is a poblematic. As a hack around this, we
>> +     * move any RPUs to before the first slice NAL.
>> +     */
>> +    if (codec_id == AV_CODEC_ID_HEVC && pkt->nb_nals > 1 && pkt->nals[pkt->nb_nals - 1].type == HEVC_NAL_UNSPEC62 &&
> 
> 1. This code is also used CBS, not only the HEVC decoder. So
> unconditionally moving the NAL is unacceptable.

[...]

> (Does this type of NAL actually abide by the typical 0x03 escaping
> scheme? If not, CBS has a problem.)

It does, so this at least is not a problem.


> 2. This is unnecessarily complicated:
>     H2645NAL tmp = pkt->nals[pkt->nb_nals - 1];
>     memmove(&pkt->nals[first_non_slice + 1],
>             &pkt->nals[first_non_slice],
>             (pkt->nb_nals - 1 - first_non_slice) * sizeof(*pkt->nals));
>     pkt->nals[first_non_slice] = tmp;
> (The naming of first_non_slice is actually misleading: If there are
> slices, then it is the index of the first slice before moving. It would
> be easier to use an approach as follows:
>     int dst_index;
>     for (dst_index = 0; dst_index < pkt->nb_nals - 1; dst_index)
>         if (pkt->nals[dst_index] < HEVC_NAL_VPS)
>             break;
> )

Will fix, thanks.

> 3. This may leak in case there are multiple HEVC_NAL_UNSPEC62 in an
> H2645Packet. (Don't know whether this is legal, but it is unchecked.)

It's not legal - Dolby Vision requires a single RPU (UNPSEC62) placed at the
end of the AU. However, that doesn't preclude crafted files or corrupt files
from having it like that.

> 4. My preferred solution for this is adding the following after the call
> to ff_h2645_packet_split():
> 
>     if (pkt->nb_nals > 1 && pkt->nals[pkt->nb_nals - 1].type ==
> HEVC_NAL_UNSPEC62 && nal->size > 2 && !nal->nuh_layer_id &&
> !nal->temporal_id) {
>         // create rpu_buf here

I'm not sure this is 100% correct. I believe RPUs must either be the
last NAL in the AU - in which case this is correct, *or* they must be
the NAL in the AU before the EOB/EOS NAL, which, by HEVC spec, must be
last.

> This is less of a hack than reordering the nalus.
> (This of course also needs a check to rule out leaks: After all, the
> preceding packet might have given us an rpu, but no first slice. Btw:
> That's the reason why this buffer is synced between threads, isn't it?)

Yes that is the only reason it is synce - corrupt or crafted files, as per
James' last review which requested that.

- Derek
Andreas Rheinhardt Nov. 9, 2021, 4:05 p.m. UTC | #3
Derek Buitenhuis:
> On 11/6/2021 11:36 PM, Andreas Rheinhardt wrote:
>>> +    /*
>>> +     * Due to limitions in avcodec's current frame threading code, it cannot
>>> +     * handle adding frame side data in any place except before the slice has
>>> +     * started decoding. Since Dolby Vision RPUs (which appear as NAL type 62)
>>> +     * are appended to the AU, this is a poblematic. As a hack around this, we
>>> +     * move any RPUs to before the first slice NAL.
>>> +     */
>>> +    if (codec_id == AV_CODEC_ID_HEVC && pkt->nb_nals > 1 && pkt->nals[pkt->nb_nals - 1].type == HEVC_NAL_UNSPEC62 &&
>>
>> 1. This code is also used CBS, not only the HEVC decoder. So
>> unconditionally moving the NAL is unacceptable.
> 
> [...]
> 
>> (Does this type of NAL actually abide by the typical 0x03 escaping
>> scheme? If not, CBS has a problem.)
> 
> It does, so this at least is not a problem.
> 

Good to hear.

> 
>> 2. This is unnecessarily complicated:
>>     H2645NAL tmp = pkt->nals[pkt->nb_nals - 1];
>>     memmove(&pkt->nals[first_non_slice + 1],
>>             &pkt->nals[first_non_slice],
>>             (pkt->nb_nals - 1 - first_non_slice) * sizeof(*pkt->nals));
>>     pkt->nals[first_non_slice] = tmp;
>> (The naming of first_non_slice is actually misleading: If there are
>> slices, then it is the index of the first slice before moving. It would
>> be easier to use an approach as follows:
>>     int dst_index;
>>     for (dst_index = 0; dst_index < pkt->nb_nals - 1; dst_index)
>>         if (pkt->nals[dst_index] < HEVC_NAL_VPS)
>>             break;
>> )
> 
> Will fix, thanks.
> 
>> 3. This may leak in case there are multiple HEVC_NAL_UNSPEC62 in an
>> H2645Packet. (Don't know whether this is legal, but it is unchecked.)
> 
> It's not legal - Dolby Vision requires a single RPU (UNPSEC62) placed at the
> end of the AU. However, that doesn't preclude crafted files or corrupt files
> from having it like that.
> 
>> 4. My preferred solution for this is adding the following after the call
>> to ff_h2645_packet_split():
>>
>>     if (pkt->nb_nals > 1 && pkt->nals[pkt->nb_nals - 1].type ==
>> HEVC_NAL_UNSPEC62 && nal->size > 2 && !nal->nuh_layer_id &&
>> !nal->temporal_id) {
>>         // create rpu_buf here
> 
> I'm not sure this is 100% correct. I believe RPUs must either be the
> last NAL in the AU - in which case this is correct, *or* they must be
> the NAL in the AU before the EOB/EOS NAL, which, by HEVC spec, must be
> last.
> 

There is already a loop that checks for EOB/EOS NALs. You could use it
to check for HEVC_NAL_UNSPEC62 and create an rpu_buf from it before
ff_thread_finish_setup().

- Andreas
diff mbox series

Patch

diff --git a/libavcodec/h2645_parse.c b/libavcodec/h2645_parse.c
index 6fbe97ad4a..7adf5b7ef1 100644
--- a/libavcodec/h2645_parse.c
+++ b/libavcodec/h2645_parse.c
@@ -517,6 +517,34 @@  int ff_h2645_packet_split(H2645Packet *pkt, const uint8_t *buf, int length,
         pkt->nb_nals++;
     }
 
+    /*
+     * Due to limitions in avcodec's current frame threading code, it cannot
+     * handle adding frame side data in any place except before the slice has
+     * started decoding. Since Dolby Vision RPUs (which appear as NAL type 62)
+     * are appended to the AU, this is a poblematic. As a hack around this, we
+     * move any RPUs to before the first slice NAL.
+     */
+    if (codec_id == AV_CODEC_ID_HEVC && pkt->nb_nals > 1 && pkt->nals[pkt->nb_nals - 1].type == HEVC_NAL_UNSPEC62 &&
+        !pkt->nals[pkt->nb_nals - 1].nuh_layer_id && !pkt->nals[pkt->nb_nals - 1].temporal_id) {
+
+        int first_non_slice = 0;
+        H2645NAL *tmp = av_malloc(pkt->nb_nals * sizeof(H2645NAL));
+        if (!tmp)
+            return AVERROR(ENOMEM);
+
+        for (int i = pkt->nb_nals - 1; i >= 0; i--) {
+            if (pkt->nals[i].type < HEVC_NAL_VPS)
+                first_non_slice = i;
+            tmp[i] = pkt->nals[i];
+        }
+
+        pkt->nals[first_non_slice] = pkt->nals[pkt->nb_nals - 1];
+        for (int i = first_non_slice + 1; i < pkt->nb_nals; i++)
+            pkt->nals[i] = tmp[i - 1];
+
+        av_free(tmp);
+    }
+
     return 0;
 }
 
diff --git a/libavcodec/hevcdec.c b/libavcodec/hevcdec.c
index 246ffd7d80..dd286deaa7 100644
--- a/libavcodec/hevcdec.c
+++ b/libavcodec/hevcdec.c
@@ -2950,6 +2950,14 @@  static int set_side_data(HEVCContext *s)
         }
     }
 
+    if (s->rpu_buf) {
+        AVFrameSideData *rpu = av_frame_new_side_data_from_buf(out, AV_FRAME_DATA_DOVI_RPU_BUFFER, s->rpu_buf);
+        if (!rpu)
+            return AVERROR(ENOMEM);
+
+        s->rpu_buf = NULL;
+    }
+
     return 0;
 }
 
@@ -3224,6 +3232,20 @@  static int decode_nal_unit(HEVCContext *s, const H2645NAL *nal)
     case HEVC_NAL_AUD:
     case HEVC_NAL_FD_NUT:
         break;
+    case HEVC_NAL_UNSPEC62:
+        /*
+         * Check for RPU delimiter.
+         *
+         * Dolby Vision RPUs masquerade as unregistered NALs of type 62.
+         */
+        if (nal->size > 2 && !nal->nuh_layer_id && !nal->temporal_id) {
+            s->rpu_buf = av_buffer_alloc(nal->raw_size - 2);
+            if (!s->rpu_buf)
+                return AVERROR(ENOMEM);
+
+            memcpy(s->rpu_buf->data, nal->raw_data + 2, nal->raw_size - 2);
+        }
+        break;
     default:
         av_log(s->avctx, AV_LOG_INFO,
                "Skipping NAL unit %d\n", s->nal_unit_type);
@@ -3512,6 +3534,8 @@  static av_cold int hevc_decode_free(AVCodecContext *avctx)
 
     pic_arrays_free(s);
 
+    av_buffer_unref(&s->rpu_buf);
+
     av_freep(&s->md5_ctx);
 
     av_freep(&s->cabac_state);
@@ -3698,6 +3722,10 @@  static int hevc_update_thread_context(AVCodecContext *dst,
     if (ret < 0)
         return ret;
 
+    ret = av_buffer_replace(&s->rpu_buf, s0->rpu_buf);
+    if (ret < 0)
+        return ret;
+
     s->sei.frame_packing        = s0->sei.frame_packing;
     s->sei.display_orientation  = s0->sei.display_orientation;
     s->sei.mastering_display    = s0->sei.mastering_display;
@@ -3754,6 +3782,7 @@  static void hevc_decode_flush(AVCodecContext *avctx)
     HEVCContext *s = avctx->priv_data;
     ff_hevc_flush_dpb(s);
     ff_hevc_reset_sei(&s->sei);
+    av_buffer_unref(&s->rpu_buf);
     s->max_ra = INT_MAX;
     s->eos = 1;
 }
diff --git a/libavcodec/hevcdec.h b/libavcodec/hevcdec.h
index 77fdf90da1..870ff178d4 100644
--- a/libavcodec/hevcdec.h
+++ b/libavcodec/hevcdec.h
@@ -572,6 +572,8 @@  typedef struct HEVCContext {
 
     int nal_length_size;    ///< Number of bytes used for nal length (1, 2 or 4)
     int nuh_layer_id;
+
+    AVBufferRef *rpu_buf;       ///< 0 or 1 Dolby Vision RPUs.
 } HEVCContext;
 
 /**
diff --git a/libavcodec/version.h b/libavcodec/version.h
index 74b8baa5f3..76af066d32 100644
--- a/libavcodec/version.h
+++ b/libavcodec/version.h
@@ -28,7 +28,7 @@ 
 #include "libavutil/version.h"
 
 #define LIBAVCODEC_VERSION_MAJOR  59
-#define LIBAVCODEC_VERSION_MINOR  12
+#define LIBAVCODEC_VERSION_MINOR  13
 #define LIBAVCODEC_VERSION_MICRO 100
 
 #define LIBAVCODEC_VERSION_INT  AV_VERSION_INT(LIBAVCODEC_VERSION_MAJOR, \