diff mbox series

[FFmpeg-devel] avformat/mxfenc: add h264_mp4toannexb bitstream filter if needed when muxing h264

Message ID 20240223001943.31168-1-cus@passwd.hu
State Accepted
Commit 64634e809f2e7b6c1a1ea3f6952a17c5915c3f22
Headers show
Series [FFmpeg-devel] avformat/mxfenc: add h264_mp4toannexb bitstream filter if needed when muxing h264 | expand

Checks

Context Check Description
yinshiyou/make_loongarch64 success Make finished
yinshiyou/make_fate_loongarch64 success Make fate finished
andriy/make_x86 success Make finished
andriy/make_fate_x86 success Make fate finished

Commit Message

Marton Balint Feb. 23, 2024, 12:19 a.m. UTC
Partially fixes ticket #10395.

Signed-off-by: Marton Balint <cus@passwd.hu>
---
 libavformat/mxfenc.c | 12 ++++++++++++
 1 file changed, 12 insertions(+)

Comments

Andreas Rheinhardt Feb. 23, 2024, 1:26 a.m. UTC | #1
Marton Balint:
> Partially fixes ticket #10395.
> 
> Signed-off-by: Marton Balint <cus@passwd.hu>
> ---
>  libavformat/mxfenc.c | 12 ++++++++++++
>  1 file changed, 12 insertions(+)
> 
> diff --git a/libavformat/mxfenc.c b/libavformat/mxfenc.c
> index 61ed6fc3db..adc31c1cf4 100644
> --- a/libavformat/mxfenc.c
> +++ b/libavformat/mxfenc.c
> @@ -3542,6 +3542,16 @@ static int mxf_interleave(AVFormatContext *s, AVPacket *pkt,
>      return mxf_interleave_get_packet(s, pkt, flush);
>  }
>  
> +static int mxf_check_bitstream(AVFormatContext *s, AVStream *st, const AVPacket *pkt)
> +{
> +    if (st->codecpar->codec_id == AV_CODEC_ID_H264) {
> +        if (pkt->size >= 5 && AV_RB32(pkt->data) != 0x0000001 &&
> +                              AV_RB24(pkt->data) != 0x000001)
> +            return ff_stream_add_bitstream_filter(st, "h264_mp4toannexb", NULL);
> +    }
> +    return 1;
> +}
> +
>  #define MXF_COMMON_OPTIONS \
>      { "signal_standard", "Force/set Signal Standard",\
>        offsetof(MXFContext, signal_standard), AV_OPT_TYPE_INT, {.i64 = -1}, -1, 7, AV_OPT_FLAG_ENCODING_PARAM, .unit = "signal_standard"},\
> @@ -3623,6 +3633,7 @@ const FFOutputFormat ff_mxf_muxer = {
>      .p.flags           = AVFMT_NOTIMESTAMPS,
>      .interleave_packet = mxf_interleave,
>      .p.priv_class      = &mxf_muxer_class,
> +    .check_bitstream   = mxf_check_bitstream,
>  };
>  
>  const FFOutputFormat ff_mxf_d10_muxer = {
> @@ -3656,4 +3667,5 @@ const FFOutputFormat ff_mxf_opatom_muxer = {
>      .p.flags           = AVFMT_NOTIMESTAMPS,
>      .interleave_packet = mxf_interleave,
>      .p.priv_class      = &mxf_opatom_muxer_class,
> +    .check_bitstream   = mxf_check_bitstream,
>  };

I sent the very same patch long ago [1]. Tomas Härdin opposed it [2],
[3], because he sees stuff like this as hack.

- Andreas

[1]: https://ffmpeg.org/pipermail/ffmpeg-devel/2021-November/287638.html
[2]: https://ffmpeg.org/pipermail/ffmpeg-devel/2021-November/287702.html
[3]: https://ffmpeg.org/pipermail/ffmpeg-devel/2021-November/287703.html
Marton Balint Feb. 23, 2024, 7:55 p.m. UTC | #2
On Fri, 23 Feb 2024, Andreas Rheinhardt wrote:

> Marton Balint:
>> Partially fixes ticket #10395.
>> 
>> Signed-off-by: Marton Balint <cus@passwd.hu>
>> ---
>>  libavformat/mxfenc.c | 12 ++++++++++++
>>  1 file changed, 12 insertions(+)
>> 
>> diff --git a/libavformat/mxfenc.c b/libavformat/mxfenc.c
>> index 61ed6fc3db..adc31c1cf4 100644
>> --- a/libavformat/mxfenc.c
>> +++ b/libavformat/mxfenc.c
>> @@ -3542,6 +3542,16 @@ static int mxf_interleave(AVFormatContext *s, AVPacket *pkt,
>>      return mxf_interleave_get_packet(s, pkt, flush);
>>  }
>> 
>> +static int mxf_check_bitstream(AVFormatContext *s, AVStream *st, const AVPacket *pkt)
>> +{
>> +    if (st->codecpar->codec_id == AV_CODEC_ID_H264) {
>> +        if (pkt->size >= 5 && AV_RB32(pkt->data) != 0x0000001 &&
>> +                              AV_RB24(pkt->data) != 0x000001)
>> +            return ff_stream_add_bitstream_filter(st, "h264_mp4toannexb", NULL);
>> +    }
>> +    return 1;
>> +}
>> +
>>  #define MXF_COMMON_OPTIONS \
>>      { "signal_standard", "Force/set Signal Standard",\
>>        offsetof(MXFContext, signal_standard), AV_OPT_TYPE_INT, {.i64 = -1}, -1, 7, AV_OPT_FLAG_ENCODING_PARAM, .unit = "signal_standard"},\
>> @@ -3623,6 +3633,7 @@ const FFOutputFormat ff_mxf_muxer = {
>>      .p.flags           = AVFMT_NOTIMESTAMPS,
>>      .interleave_packet = mxf_interleave,
>>      .p.priv_class      = &mxf_muxer_class,
>> +    .check_bitstream   = mxf_check_bitstream,
>>  };
>>
>>  const FFOutputFormat ff_mxf_d10_muxer = {
>> @@ -3656,4 +3667,5 @@ const FFOutputFormat ff_mxf_opatom_muxer = {
>>      .p.flags           = AVFMT_NOTIMESTAMPS,
>>      .interleave_packet = mxf_interleave,
>>      .p.priv_class      = &mxf_opatom_muxer_class,
>> +    .check_bitstream   = mxf_check_bitstream,
>>  };
>
> I sent the very same patch long ago [1]. Tomas Härdin opposed it [2],
> [3], because he sees stuff like this as hack.
>
> - Andreas
>
> [1]: https://ffmpeg.org/pipermail/ffmpeg-devel/2021-November/287638.html
> [2]: https://ffmpeg.org/pipermail/ffmpeg-devel/2021-November/287702.html
> [3]: https://ffmpeg.org/pipermail/ffmpeg-devel/2021-November/287703.html

Maybe we should discuss this again, define what is missing 
exactly, considering that we both think this is a step in the right 
direction.

The specs says this:

The Picture Element Values shall be the AVC NAL unit stream or the AVC 
byte stream. The bit streams carried in the Value field shall contain 
complete NAL units including their relevant parameter sets, other 
Supplemental Enhancement Information (SEI) and padding zeroes.

(https://pub.smpte.org/doc/st381-3/20170830-pub/)

Regards,
Marton
Tomas Härdin Feb. 24, 2024, 11:38 a.m. UTC | #3
> > > +static int mxf_check_bitstream(AVFormatContext *s, AVStream *st,
> > > const AVPacket *pkt)
> > > +{
> > > +    if (st->codecpar->codec_id == AV_CODEC_ID_H264) {
> > > +        if (pkt->size >= 5 && AV_RB32(pkt->data) != 0x0000001 &&
> > > +                              AV_RB24(pkt->data) != 0x000001)
> > > +            return ff_stream_add_bitstream_filter(st,
> > > "h264_mp4toannexb", NULL);

Regardless of the comments below, this is wrong. ST 381-3 says this:

> The byte stream format can be constructed from the NAL unit stream by
> prefixing each NAL unit with a start
> code prefix and zero or more zero-valued bytes to form a stream of
> bytes.

Note the wording is "zero or more", not "zero or one".

The correct way to do this is to inspect byte 14 of the EC UL, per
section 8.1 of ST 381-3.
> 

> > I sent the very same patch long ago [1]. Tomas Härdin opposed it
> > [2],
> > [3], because he sees stuff like this as hack.

No, I oppose it because it is potentially against spec. The MXF
ecosystem is bad enough as it is without us encouraging out-of-spec
behavior.

Any behavior we put in to handle out-of-spec behavior should be limited
by Identification. But even that would be making our responsibility
what is really the responsibility of companies making broken MXF
muxers.

/Tomas
Andreas Rheinhardt Feb. 24, 2024, 2:13 p.m. UTC | #4
Tomas Härdin:
>>>> +static int mxf_check_bitstream(AVFormatContext *s, AVStream *st,
>>>> const AVPacket *pkt)
>>>> +{
>>>> +    if (st->codecpar->codec_id == AV_CODEC_ID_H264) {
>>>> +        if (pkt->size >= 5 && AV_RB32(pkt->data) != 0x0000001 &&
>>>> +                              AV_RB24(pkt->data) != 0x000001)
>>>> +            return ff_stream_add_bitstream_filter(st,
>>>> "h264_mp4toannexb", NULL);
> 
> Regardless of the comments below, this is wrong. ST 381-3 says this:
> 
>> The byte stream format can be constructed from the NAL unit stream by
>> prefixing each NAL unit with a start
>> code prefix and zero or more zero-valued bytes to form a stream of
>> bytes.
> 
> Note the wording is "zero or more", not "zero or one".

IMO all the code should only look at extradata to decide whether a
stream is annex B or ISOBMFF (no extradata->annex B, no ISOBMFF
extradata->annex B, else ISOBMFF). But that is a separate issue.
(There is a slight possibility of misdetection here: E.g. a 0x00 00 01
at the start of a packet can actually be the start of the length code of
an ISOBMFF NALU with length in the range 256-511; on the other hand, it
is legal for an annex B packet to start with four or more zero bytes, as
you mentioned.)

> The correct way to do this is to inspect byte 14 of the EC UL, per
> section 8.1 of ST 381-3.

This is a patch for the muxer, not the demuxer. There is no byte 14 of
the EC UL to inspect; or at least: It is what this muxer writes for it.
This muxer always indicates that the output is an annex B (aka AVC byte
stream), so it should always convert the input from the user to actually
be annex B.

>>> I sent the very same patch long ago [1]. Tomas Härdin opposed it
>>> [2],
>>> [3], because he sees stuff like this as hack.
> 
> No, I oppose it because it is potentially against spec. The MXF
> ecosystem is bad enough as it is without us encouraging out-of-spec
> behavior.

If the user's input is ISOBMFF, then the output will definitely be
against spec without a conversion. With a patch like this ISOBMFF data
will be converted to annex B.

Anyway, FFmpeg aims to support two framings for H.264: Annex B and
ISOBMFF. Sending ISOBMFF-framed data to a muxer is therefore not
"out-of-spec behavior". It is just supposed to work and the onus is on
the muxer/libavformat to convert as necessary.

Also note that other missing checks for whether the input is really
conforming to the specs should be separate from inserting this BSF.
After all, the user could insert the BSF himself and even in this case
it would be this muxer's responsibility to ensure that the output is
spec-compliant. Inserting the BSF simplifies this task, because it means
that the muxer can assume that the input is already annex B (and does
not need separate logic for handling ISOBMFF input); that is the only
point of inserting it.

> Any behavior we put in to handle out-of-spec behavior should be limited
> by Identification. But even that would be making our responsibility
> what is really the responsibility of companies making broken MXF
> muxers.

Once again: This is a muxer, we do not parse and identify a file here.
For the same reason it makes no sense to complain about other companies'
broken MXF muxers.

- Andreas
Marton Balint March 5, 2024, 11:14 p.m. UTC | #5
On Sat, 24 Feb 2024, Andreas Rheinhardt wrote:

> Tomas Härdin:
>>>>> +static int mxf_check_bitstream(AVFormatContext *s, AVStream *st,
>>>>> const AVPacket *pkt)
>>>>> +{
>>>>> +    if (st->codecpar->codec_id == AV_CODEC_ID_H264) {
>>>>> +        if (pkt->size >= 5 && AV_RB32(pkt->data) != 0x0000001 &&
>>>>> +                              AV_RB24(pkt->data) != 0x000001)
>>>>> +            return ff_stream_add_bitstream_filter(st,
>>>>> "h264_mp4toannexb", NULL);
>> 
>> Regardless of the comments below, this is wrong. ST 381-3 says this:
>> 
>>> The byte stream format can be constructed from the NAL unit stream by
>>> prefixing each NAL unit with a start
>>> code prefix and zero or more zero-valued bytes to form a stream of
>>> bytes.
>> 
>> Note the wording is "zero or more", not "zero or one".
>
> IMO all the code should only look at extradata to decide whether a
> stream is annex B or ISOBMFF (no extradata->annex B, no ISOBMFF
> extradata->annex B, else ISOBMFF). But that is a separate issue.
> (There is a slight possibility of misdetection here: E.g. a 0x00 00 01
> at the start of a packet can actually be the start of the length code of
> an ISOBMFF NALU with length in the range 256-511; on the other hand, it
> is legal for an annex B packet to start with four or more zero bytes, as
> you mentioned.)
>
>> The correct way to do this is to inspect byte 14 of the EC UL, per
>> section 8.1 of ST 381-3.
>
> This is a patch for the muxer, not the demuxer. There is no byte 14 of
> the EC UL to inspect; or at least: It is what this muxer writes for it.
> This muxer always indicates that the output is an annex B (aka AVC byte
> stream), so it should always convert the input from the user to actually
> be annex B.
>
>>>> I sent the very same patch long ago [1]. Tomas Härdin opposed it
>>>> [2],
>>>> [3], because he sees stuff like this as hack.
>> 
>> No, I oppose it because it is potentially against spec. The MXF
>> ecosystem is bad enough as it is without us encouraging out-of-spec
>> behavior.
>
> If the user's input is ISOBMFF, then the output will definitely be
> against spec without a conversion. With a patch like this ISOBMFF data
> will be converted to annex B.
>
> Anyway, FFmpeg aims to support two framings for H.264: Annex B and
> ISOBMFF. Sending ISOBMFF-framed data to a muxer is therefore not
> "out-of-spec behavior". It is just supposed to work and the onus is on
> the muxer/libavformat to convert as necessary.
>
> Also note that other missing checks for whether the input is really
> conforming to the specs should be separate from inserting this BSF.
> After all, the user could insert the BSF himself and even in this case
> it would be this muxer's responsibility to ensure that the output is
> spec-compliant. Inserting the BSF simplifies this task, because it means
> that the muxer can assume that the input is already annex B (and does
> not need separate logic for handling ISOBMFF input); that is the only
> point of inserting it.
>
>> Any behavior we put in to handle out-of-spec behavior should be limited
>> by Identification. But even that would be making our responsibility
>> what is really the responsibility of companies making broken MXF
>> muxers.
>
> Once again: This is a muxer, we do not parse and identify a file here.
> For the same reason it makes no sense to complain about other companies'
> broken MXF muxers.

I agree with what you wrote, so IMHO we should apply this, unless Tomas 
still has an objection.

Thanks,
Marton
Tomas Härdin March 6, 2024, 9:59 p.m. UTC | #6
lör 2024-02-24 klockan 15:13 +0100 skrev Andreas Rheinhardt:
> Tomas Härdin:
> > > > > +static int mxf_check_bitstream(AVFormatContext *s, AVStream
> > > > > *st,
> > > > > const AVPacket *pkt)
> > > > > +{
> > > > > +    if (st->codecpar->codec_id == AV_CODEC_ID_H264) {
> > > > > +        if (pkt->size >= 5 && AV_RB32(pkt->data) !=
> > > > > 0x0000001 &&
> > > > > +                              AV_RB24(pkt->data) !=
> > > > > 0x000001)
> > > > > +            return ff_stream_add_bitstream_filter(st,
> > > > > "h264_mp4toannexb", NULL);
> > 
> > Regardless of the comments below, this is wrong. ST 381-3 says
> > this:
> > 
> > > The byte stream format can be constructed from the NAL unit
> > > stream by
> > > prefixing each NAL unit with a start
> > > code prefix and zero or more zero-valued bytes to form a stream
> > > of
> > > bytes.
> > 
> > Note the wording is "zero or more", not "zero or one".
> 
> IMO all the code should only look at extradata to decide whether a
> stream is annex B or ISOBMFF (no extradata->annex B, no ISOBMFF
> extradata->annex B, else ISOBMFF). But that is a separate issue.
> (There is a slight possibility of misdetection here: E.g. a 0x00 00
> 01
> at the start of a packet can actually be the start of the length code
> of
> an ISOBMFF NALU with length in the range 256-511; on the other hand,
> it
> is legal for an annex B packet to start with four or more zero bytes,
> as
> you mentioned.)
> 
> > The correct way to do this is to inspect byte 14 of the EC UL, per
> > section 8.1 of ST 381-3.
> 
> This is a patch for the muxer, not the demuxer.

D'oh! Then it's an entirely different thing of course. Then the onus
falls on lavf internals to behave correctly.

> There is no byte 14 of
> the EC UL to inspect; or at least: It is what this muxer writes for
> it.
> This muxer always indicates that the output is an annex B (aka AVC
> byte
> stream), so it should always convert the input from the user to
> actually
> be annex B.

We could do that, or we could write an appropriate UL. Either is fine I
suppose.

/Tomas
Marton Balint March 8, 2024, 1:27 a.m. UTC | #7
On Wed, 6 Mar 2024, Tomas Härdin wrote:

> lör 2024-02-24 klockan 15:13 +0100 skrev Andreas Rheinhardt:
>> Tomas Härdin:
>> > > > > +static int mxf_check_bitstream(AVFormatContext *s, AVStream
>> > > > > *st,
>> > > > > const AVPacket *pkt)
>> > > > > +{
>> > > > > +    if (st->codecpar->codec_id == AV_CODEC_ID_H264) {
>> > > > > +        if (pkt->size >= 5 && AV_RB32(pkt->data) !=
>> > > > > 0x0000001 &&
>> > > > > +                              AV_RB24(pkt->data) !=
>> > > > > 0x000001)
>> > > > > +            return ff_stream_add_bitstream_filter(st,
>> > > > > "h264_mp4toannexb", NULL);
>> > 
>> > Regardless of the comments below, this is wrong. ST 381-3 says
>> > this:
>> > 
>> > > The byte stream format can be constructed from the NAL unit
>> > > stream by
>> > > prefixing each NAL unit with a start
>> > > code prefix and zero or more zero-valued bytes to form a stream
>> > > of
>> > > bytes.
>> > 
>> > Note the wording is "zero or more", not "zero or one".
>> 
>> IMO all the code should only look at extradata to decide whether a
>> stream is annex B or ISOBMFF (no extradata->annex B, no ISOBMFF
>> extradata->annex B, else ISOBMFF). But that is a separate issue.
>> (There is a slight possibility of misdetection here: E.g. a 0x00 00
>> 01
>> at the start of a packet can actually be the start of the length code
>> of
>> an ISOBMFF NALU with length in the range 256-511; on the other hand,
>> it
>> is legal for an annex B packet to start with four or more zero bytes,
>> as
>> you mentioned.)
>> 
>> > The correct way to do this is to inspect byte 14 of the EC UL, per
>> > section 8.1 of ST 381-3.
>> 
>> This is a patch for the muxer, not the demuxer.
>
> D'oh! Then it's an entirely different thing of course. Then the onus
> falls on lavf internals to behave correctly.
>
>> There is no byte 14 of
>> the EC UL to inspect; or at least: It is what this muxer writes for
>> it.
>> This muxer always indicates that the output is an annex B (aka AVC
>> byte
>> stream), so it should always convert the input from the user to
>> actually
>> be annex B.
>
> We could do that, or we could write an appropriate UL. Either is fine I
> suppose.

Will apply.

Thanks,
Marton
diff mbox series

Patch

diff --git a/libavformat/mxfenc.c b/libavformat/mxfenc.c
index 61ed6fc3db..adc31c1cf4 100644
--- a/libavformat/mxfenc.c
+++ b/libavformat/mxfenc.c
@@ -3542,6 +3542,16 @@  static int mxf_interleave(AVFormatContext *s, AVPacket *pkt,
     return mxf_interleave_get_packet(s, pkt, flush);
 }
 
+static int mxf_check_bitstream(AVFormatContext *s, AVStream *st, const AVPacket *pkt)
+{
+    if (st->codecpar->codec_id == AV_CODEC_ID_H264) {
+        if (pkt->size >= 5 && AV_RB32(pkt->data) != 0x0000001 &&
+                              AV_RB24(pkt->data) != 0x000001)
+            return ff_stream_add_bitstream_filter(st, "h264_mp4toannexb", NULL);
+    }
+    return 1;
+}
+
 #define MXF_COMMON_OPTIONS \
     { "signal_standard", "Force/set Signal Standard",\
       offsetof(MXFContext, signal_standard), AV_OPT_TYPE_INT, {.i64 = -1}, -1, 7, AV_OPT_FLAG_ENCODING_PARAM, .unit = "signal_standard"},\
@@ -3623,6 +3633,7 @@  const FFOutputFormat ff_mxf_muxer = {
     .p.flags           = AVFMT_NOTIMESTAMPS,
     .interleave_packet = mxf_interleave,
     .p.priv_class      = &mxf_muxer_class,
+    .check_bitstream   = mxf_check_bitstream,
 };
 
 const FFOutputFormat ff_mxf_d10_muxer = {
@@ -3656,4 +3667,5 @@  const FFOutputFormat ff_mxf_opatom_muxer = {
     .p.flags           = AVFMT_NOTIMESTAMPS,
     .interleave_packet = mxf_interleave,
     .p.priv_class      = &mxf_opatom_muxer_class,
+    .check_bitstream   = mxf_check_bitstream,
 };