Message ID | 20211021134843.1436241-3-derek.buitenhuis@gmail.com |
---|---|
State | New |
Headers | show |
Series | Dolby Vision RPU Side Data | expand |
Context | Check | Description |
---|---|---|
andriy/make_x86 | success | Make finished |
andriy/make_fate_x86 | success | Make fate finished |
andriy/make_ppc | success | Make finished |
andriy/make_fate_ppc | success | Make fate finished |
On 10/21/2021 10:48 AM, Derek Buitenhuis wrote: > Signed-off-by: Derek Buitenhuis <derek.buitenhuis@gmail.com> > --- > * The NAL reordering approach is a result of discussions with Anton and James > a few months ago. > * I've put the NAL reordering in ff_h2645_packet_split, rather than a bitstream > filter for a few reasons: > * I don't think having a decoder's behavior rely on the presence of a bitstream > filter is good architecture / design. > * This spliting is only usd or useful to decoding. > --- > libavcodec/h2645_parse.c | 28 ++++++++++++++++++++++++++++ > libavcodec/hevcdec.c | 32 ++++++++++++++++++++++++++++++++ > libavcodec/hevcdec.h | 3 +++ > libavcodec/version.h | 2 +- > 4 files changed, 64 insertions(+), 1 deletion(-) > > diff --git a/libavcodec/h2645_parse.c b/libavcodec/h2645_parse.c > index 6fbe97ad4a..04348c3a73 100644 > --- a/libavcodec/h2645_parse.c > +++ b/libavcodec/h2645_parse.c > @@ -517,6 +517,34 @@ int ff_h2645_packet_split(H2645Packet *pkt, const uint8_t *buf, int length, > pkt->nb_nals++; > } > > + /* > + * Due to limitions in avcodec's current frame threading code, it cannot > + * handle adding frame side data in any place except before the slice has > + * started decoding. Since Dolby Vision RPUs (which appear as NAL type 62) > + * are appended to the AU, this is a poblematic. As a hack around this, we > + * move any RPUs to before the first slice NAL. > + */ > + if (codec_id == AV_CODEC_ID_HEVC && pkt->nb_nals > 1 && pkt->nals[pkt->nb_nals - 1].type == HEVC_NAL_UNSPEC62 && > + pkt->nals[pkt->nb_nals - 1].data[0] == 0x7C && pkt->nals[pkt->nb_nals - 1].data[1] == 0x01) { 0x7c01 is just the NAL header that signals type 62, nuh_layer_id 0 and nuh_temporal_id 0. forbidden_zero_bit = 0 nal_unit_type = 111110 nuh_layer_id = 000000 nuh_temporal_id_plus1 = 001 So the last two checks can be changed to !pkt->nals[pkt->nb_nals - 1].nuh_layer_id && !pkt->nals[pkt->nb_nals - 1].temporal_id > + > + int first_non_slice = 0; > + H2645NAL *tmp = av_malloc(pkt->nb_nals * sizeof(H2645NAL)); > + if (!tmp) > + return AVERROR(ENOMEM); > + > + for (int i = pkt->nb_nals - 1; i >= 0; i--) { > + if (pkt->nals[i].type < HEVC_NAL_VPS) > + first_non_slice = i; > + tmp[i] = pkt->nals[i]; > + } > + > + pkt->nals[first_non_slice] = pkt->nals[pkt->nb_nals - 1]; > + for (int i = first_non_slice + 1; i < pkt->nb_nals; i++) > + pkt->nals[i] = tmp[i - 1]; > + > + av_free(tmp); This is horrible, but i guess there's no alternative without autoinserting a bsf. > + } > + > return 0; > } > > diff --git a/libavcodec/hevcdec.c b/libavcodec/hevcdec.c > index 246ffd7d80..5ccee2aa4e 100644 > --- a/libavcodec/hevcdec.c > +++ b/libavcodec/hevcdec.c > @@ -2950,6 +2950,17 @@ static int set_side_data(HEVCContext *s) > } > } > > + if (s->rpu_buf) { > + AVFrameSideData *rpu = av_frame_new_side_data(out, AV_FRAME_DATA_DOVI_RPU, s->rpu_buf_size); > + if (!rpu) > + return AVERROR(ENOMEM); > + > + memcpy(rpu->data, s->rpu_buf, s->rpu_buf_size); > + > + av_freep(&s->rpu_buf); > + s->rpu_buf_size = 0; > + } > + > return 0; > } > > @@ -3224,6 +3235,23 @@ static int decode_nal_unit(HEVCContext *s, const H2645NAL *nal) > case HEVC_NAL_AUD: > case HEVC_NAL_FD_NUT: > break; > + case HEVC_NAL_UNSPEC62: > + /* > + * Check for RPU delimiter. > + * > + * Dolby Vision RPUs masquerade as unregistered NALs of type 62 and start with > + * 0x7C01. What i said above. It's nothing Dolby specific. Any NAL type 62 could be like that. If there's no spec we could use to actually parse the bitstream after the NAL header, then we will end up propagating pretty much anything using NAL type 62 as Dolby RPU. > + */ > + if (nal->size > 2 && nal->data[0] == 0x7C && nal->data[1] == 0x01) { > + s->rpu_buf = av_malloc(nal->raw_size - 2); You should use an AVBufferRef for this that you then attach to the frame using av_frame_new_side_data_from_buf(). And you need to keep threads in sync by replacing the dst context's rpu_buf with the src context rpu_buf with av_buffer_replace() in hevc_update_thread_context(). > + if (!s->rpu_buf) > + return AVERROR(ENOMEM); > + > + memcpy(s->rpu_buf, nal->raw_data + 2, nal->raw_size - 2); > + > + s->rpu_buf_size = nal->raw_size - 2; > + } > + break; > default: > av_log(s->avctx, AV_LOG_INFO, > "Skipping NAL unit %d\n", s->nal_unit_type); > @@ -3512,6 +3540,8 @@ static av_cold int hevc_decode_free(AVCodecContext *avctx) > > pic_arrays_free(s); > > + av_freep(&s->rpu_buf); > + > av_freep(&s->md5_ctx); > > av_freep(&s->cabac_state); > @@ -3754,6 +3784,8 @@ static void hevc_decode_flush(AVCodecContext *avctx) > HEVCContext *s = avctx->priv_data; > ff_hevc_flush_dpb(s); > ff_hevc_reset_sei(&s->sei); > + av_freep(&s->rpu_buf); > + s->rpu_buf_size = 0; > s->max_ra = INT_MAX; > s->eos = 1; > } > diff --git a/libavcodec/hevcdec.h b/libavcodec/hevcdec.h > index 77fdf90da1..8a9b516759 100644 > --- a/libavcodec/hevcdec.h > +++ b/libavcodec/hevcdec.h > @@ -572,6 +572,9 @@ typedef struct HEVCContext { > > int nal_length_size; ///< Number of bytes used for nal length (1, 2 or 4) > int nuh_layer_id; > + > + uint8_t *rpu_buf; ///< 0 or 1 Dolby Vision RPUs. > + size_t rpu_buf_size; > } HEVCContext; > > /** > diff --git a/libavcodec/version.h b/libavcodec/version.h > index 74b8baa5f3..76af066d32 100644 > --- a/libavcodec/version.h > +++ b/libavcodec/version.h > @@ -28,7 +28,7 @@ > #include "libavutil/version.h" > > #define LIBAVCODEC_VERSION_MAJOR 59 > -#define LIBAVCODEC_VERSION_MINOR 12 > +#define LIBAVCODEC_VERSION_MINOR 13 > #define LIBAVCODEC_VERSION_MICRO 100 > > #define LIBAVCODEC_VERSION_INT AV_VERSION_INT(LIBAVCODEC_VERSION_MAJOR, \
On 10/23/2021 3:18 PM, James Almer wrote: > 0x7c01 is just the NAL header that signals type 62, nuh_layer_id 0 and > nuh_temporal_id 0. > > forbidden_zero_bit = 0 > nal_unit_type = 111110 > nuh_layer_id = 000000 > nuh_temporal_id_plus1 = 001 > > So the last two checks can be changed to !pkt->nals[pkt->nb_nals - > 1].nuh_layer_id && !pkt->nals[pkt->nb_nals - 1].temporal_id Do'h, will fix. > This is horrible, but i guess there's no alternative without > autoinserting a bsf. Yeah, I couldn't think of one - and the BSF option seemed even worse. > What i said above. It's nothing Dolby specific. Any NAL type 62 could be > like that. If there's no spec we could use to actually parse the > bitstream after the NAL header, then we will end up propagating pretty > much anything using NAL type 62 as Dolby RPU. Indeed, there is no spec. As far as I know, nothing else uses UNSPEC62, though, and this is consistent with what e.g. Dolby's open source muxer and mkvtoolnix do. What do you suggest? Rename the side data to _UNSPEC62 maybe and leave it to the caller to handle it? Or, what we could do is check for the existence of stream side data containing the DOVI configuration record, which is needed to use these RPUs, I think. >> + */ >> + if (nal->size > 2 && nal->data[0] == 0x7C && nal->data[1] == 0x01) { >> + s->rpu_buf = av_malloc(nal->raw_size - 2); > > You should use an AVBufferRef for this that you then attach to the frame > using av_frame_new_side_data_from_buf(). Will do. > And you need to keep threads in sync by replacing the dst context's > rpu_buf with the src context rpu_buf with av_buffer_replace() in > hevc_update_thread_context(). Will do. Interestingly, I haven't run into any issues here with MT at all, in months of use. Luck? - Derek
On 10/23/2021 2:52 PM, Derek Buitenhuis wrote: > On 10/23/2021 3:18 PM, James Almer wrote: >> 0x7c01 is just the NAL header that signals type 62, nuh_layer_id 0 and >> nuh_temporal_id 0. >> >> forbidden_zero_bit = 0 >> nal_unit_type = 111110 >> nuh_layer_id = 000000 >> nuh_temporal_id_plus1 = 001 >> >> So the last two checks can be changed to !pkt->nals[pkt->nb_nals - >> 1].nuh_layer_id && !pkt->nals[pkt->nb_nals - 1].temporal_id > > Do'h, will fix. That being said, would Dolby RPU NALUs like this using other values for temporal and layer id be valid? Or can we just assume that's never happening? > >> This is horrible, but i guess there's no alternative without >> autoinserting a bsf. > > Yeah, I couldn't think of one - and the BSF option seemed even worse. > >> What i said above. It's nothing Dolby specific. Any NAL type 62 could be >> like that. If there's no spec we could use to actually parse the >> bitstream after the NAL header, then we will end up propagating pretty >> much anything using NAL type 62 as Dolby RPU. > > Indeed, there is no spec. As far as I know, nothing else uses UNSPEC62, > though, and this is consistent with what e.g. Dolby's open source muxer > and mkvtoolnix do. > > What do you suggest? Rename the side data to _UNSPEC62 maybe and leave it > to the caller to handle it? No, i prefer having the side data be about Dolby Vision RPU. Who knows, maybe in the future Dolby realizes they had the Unregistered and even Registered User Data SEI readily available for this kind of proprietary information and start using that instead... > > Or, what we could do is check for the existence of stream side data containing > the DOVI configuration record, which is needed to use these RPUs, I think. That sounds ideal, yes. > >>> + */ >>> + if (nal->size > 2 && nal->data[0] == 0x7C && nal->data[1] == 0x01) { >>> + s->rpu_buf = av_malloc(nal->raw_size - 2); >> >> You should use an AVBufferRef for this that you then attach to the frame >> using av_frame_new_side_data_from_buf(). > > Will do. > >> And you need to keep threads in sync by replacing the dst context's >> rpu_buf with the src context rpu_buf with av_buffer_replace() in >> hevc_update_thread_context(). > > Will do. Interestingly, I haven't run into any issues here with MT at all, > in months of use. Luck? I guess it may be because there's one such NALU per AU in this sample, so no frame depends on other threads having finished parsing/decoding their own frames and syncing the stuff stored in their thread-specific contexts. > > - Derek > _______________________________________________ > ffmpeg-devel mailing list > ffmpeg-devel@ffmpeg.org > https://ffmpeg.org/mailman/listinfo/ffmpeg-devel > > To unsubscribe, visit link above, or email > ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe". >
On Sat, Oct 23, 2021 at 9:16 PM James Almer <jamrial@gmail.com> wrote: > > On 10/23/2021 2:52 PM, Derek Buitenhuis wrote: > > On 10/23/2021 3:18 PM, James Almer wrote: > >> 0x7c01 is just the NAL header that signals type 62, nuh_layer_id 0 and > >> nuh_temporal_id 0. > >> > >> forbidden_zero_bit = 0 > >> nal_unit_type = 111110 > >> nuh_layer_id = 000000 > >> nuh_temporal_id_plus1 = 001 > >> > >> So the last two checks can be changed to !pkt->nals[pkt->nb_nals - > >> 1].nuh_layer_id && !pkt->nals[pkt->nb_nals - 1].temporal_id > > > > Do'h, will fix. > > That being said, would Dolby RPU NALUs like this using other values for > temporal and layer id be valid? Or can we just assume that's never > happening? > > > > >> This is horrible, but i guess there's no alternative without > >> autoinserting a bsf. > > > > Yeah, I couldn't think of one - and the BSF option seemed even worse. > > > >> What i said above. It's nothing Dolby specific. Any NAL type 62 could be > >> like that. If there's no spec we could use to actually parse the > >> bitstream after the NAL header, then we will end up propagating pretty > >> much anything using NAL type 62 as Dolby RPU. > > > > Indeed, there is no spec. As far as I know, nothing else uses UNSPEC62, > > though, and this is consistent with what e.g. Dolby's open source muxer > > and mkvtoolnix do. > > > > What do you suggest? Rename the side data to _UNSPEC62 maybe and leave it > > to the caller to handle it? > > No, i prefer having the side data be about Dolby Vision RPU. Who knows, > maybe in the future Dolby realizes they had the Unregistered and even > Registered User Data SEI readily available for this kind of proprietary > information and start using that instead... > They already utilize something like this for their ST.2094-10 brightness metadata (which is actually specified through ETSI etc [1]), which hilariously is also called "DoVi" in their marketing. Since effectively D has taken over NAL unit 62 for this stuff, just call it RPU_BUFFER or so, and take in everything NAL unit 62? Jan [1] https://www.etsi.org/deliver/etsi_ts/103500_103599/103572/01.03.01_60/ts_103572v010301p.pdf
On 10/23/2021 7:15 PM, James Almer wrote: >> Do'h, will fix. > > That being said, would Dolby RPU NALUs like this using other values for > temporal and layer id be valid? Or can we just assume that's never > happening? They also use 63 for an EL, according to other codebases an docs. I didn't include it in this patch, since I have no such samples or way to test. > No, i prefer having the side data be about Dolby Vision RPU. Who knows, > maybe in the future Dolby realizes they had the Unregistered and even > Registered User Data SEI readily available for this kind of proprietary > information and start using that instead... OK. >> Or, what we could do is check for the existence of stream side data containing >> the DOVI configuration record, which is needed to use these RPUs, I think. > > That sounds ideal, yes. Will do. > I guess it may be because there's one such NALU per AU in this sample, > so no frame depends on other threads having finished parsing/decoding > their own frames and syncing the stuff stored in their thread-specific > contexts. There must be a exactly one RPU per AU, is my understanding - anything else is malformed. I guess malformed files would run into trouble, so this should be done regardless. - Derek
On 10/23/2021 7:25 PM, Jan Ekström wrote: > Since effectively D has taken over NAL unit 62 for this stuff, just > call it RPU_BUFFER or so, and take in everything NAL unit 62? Sounds OK to me if others are OK. - Derek
diff --git a/libavcodec/h2645_parse.c b/libavcodec/h2645_parse.c index 6fbe97ad4a..04348c3a73 100644 --- a/libavcodec/h2645_parse.c +++ b/libavcodec/h2645_parse.c @@ -517,6 +517,34 @@ int ff_h2645_packet_split(H2645Packet *pkt, const uint8_t *buf, int length, pkt->nb_nals++; } + /* + * Due to limitions in avcodec's current frame threading code, it cannot + * handle adding frame side data in any place except before the slice has + * started decoding. Since Dolby Vision RPUs (which appear as NAL type 62) + * are appended to the AU, this is a poblematic. As a hack around this, we + * move any RPUs to before the first slice NAL. + */ + if (codec_id == AV_CODEC_ID_HEVC && pkt->nb_nals > 1 && pkt->nals[pkt->nb_nals - 1].type == HEVC_NAL_UNSPEC62 && + pkt->nals[pkt->nb_nals - 1].data[0] == 0x7C && pkt->nals[pkt->nb_nals - 1].data[1] == 0x01) { + + int first_non_slice = 0; + H2645NAL *tmp = av_malloc(pkt->nb_nals * sizeof(H2645NAL)); + if (!tmp) + return AVERROR(ENOMEM); + + for (int i = pkt->nb_nals - 1; i >= 0; i--) { + if (pkt->nals[i].type < HEVC_NAL_VPS) + first_non_slice = i; + tmp[i] = pkt->nals[i]; + } + + pkt->nals[first_non_slice] = pkt->nals[pkt->nb_nals - 1]; + for (int i = first_non_slice + 1; i < pkt->nb_nals; i++) + pkt->nals[i] = tmp[i - 1]; + + av_free(tmp); + } + return 0; } diff --git a/libavcodec/hevcdec.c b/libavcodec/hevcdec.c index 246ffd7d80..5ccee2aa4e 100644 --- a/libavcodec/hevcdec.c +++ b/libavcodec/hevcdec.c @@ -2950,6 +2950,17 @@ static int set_side_data(HEVCContext *s) } } + if (s->rpu_buf) { + AVFrameSideData *rpu = av_frame_new_side_data(out, AV_FRAME_DATA_DOVI_RPU, s->rpu_buf_size); + if (!rpu) + return AVERROR(ENOMEM); + + memcpy(rpu->data, s->rpu_buf, s->rpu_buf_size); + + av_freep(&s->rpu_buf); + s->rpu_buf_size = 0; + } + return 0; } @@ -3224,6 +3235,23 @@ static int decode_nal_unit(HEVCContext *s, const H2645NAL *nal) case HEVC_NAL_AUD: case HEVC_NAL_FD_NUT: break; + case HEVC_NAL_UNSPEC62: + /* + * Check for RPU delimiter. + * + * Dolby Vision RPUs masquerade as unregistered NALs of type 62 and start with + * 0x7C01. + */ + if (nal->size > 2 && nal->data[0] == 0x7C && nal->data[1] == 0x01) { + s->rpu_buf = av_malloc(nal->raw_size - 2); + if (!s->rpu_buf) + return AVERROR(ENOMEM); + + memcpy(s->rpu_buf, nal->raw_data + 2, nal->raw_size - 2); + + s->rpu_buf_size = nal->raw_size - 2; + } + break; default: av_log(s->avctx, AV_LOG_INFO, "Skipping NAL unit %d\n", s->nal_unit_type); @@ -3512,6 +3540,8 @@ static av_cold int hevc_decode_free(AVCodecContext *avctx) pic_arrays_free(s); + av_freep(&s->rpu_buf); + av_freep(&s->md5_ctx); av_freep(&s->cabac_state); @@ -3754,6 +3784,8 @@ static void hevc_decode_flush(AVCodecContext *avctx) HEVCContext *s = avctx->priv_data; ff_hevc_flush_dpb(s); ff_hevc_reset_sei(&s->sei); + av_freep(&s->rpu_buf); + s->rpu_buf_size = 0; s->max_ra = INT_MAX; s->eos = 1; } diff --git a/libavcodec/hevcdec.h b/libavcodec/hevcdec.h index 77fdf90da1..8a9b516759 100644 --- a/libavcodec/hevcdec.h +++ b/libavcodec/hevcdec.h @@ -572,6 +572,9 @@ typedef struct HEVCContext { int nal_length_size; ///< Number of bytes used for nal length (1, 2 or 4) int nuh_layer_id; + + uint8_t *rpu_buf; ///< 0 or 1 Dolby Vision RPUs. + size_t rpu_buf_size; } HEVCContext; /** diff --git a/libavcodec/version.h b/libavcodec/version.h index 74b8baa5f3..76af066d32 100644 --- a/libavcodec/version.h +++ b/libavcodec/version.h @@ -28,7 +28,7 @@ #include "libavutil/version.h" #define LIBAVCODEC_VERSION_MAJOR 59 -#define LIBAVCODEC_VERSION_MINOR 12 +#define LIBAVCODEC_VERSION_MINOR 13 #define LIBAVCODEC_VERSION_MICRO 100 #define LIBAVCODEC_VERSION_INT AV_VERSION_INT(LIBAVCODEC_VERSION_MAJOR, \
Signed-off-by: Derek Buitenhuis <derek.buitenhuis@gmail.com> --- * The NAL reordering approach is a result of discussions with Anton and James a few months ago. * I've put the NAL reordering in ff_h2645_packet_split, rather than a bitstream filter for a few reasons: * I don't think having a decoder's behavior rely on the presence of a bitstream filter is good architecture / design. * This spliting is only usd or useful to decoding. --- libavcodec/h2645_parse.c | 28 ++++++++++++++++++++++++++++ libavcodec/hevcdec.c | 32 ++++++++++++++++++++++++++++++++ libavcodec/hevcdec.h | 3 +++ libavcodec/version.h | 2 +- 4 files changed, 64 insertions(+), 1 deletion(-)