diff mbox series

[FFmpeg-devel,v2] libavformat/mpegtsenc.c: correctly re-emit extradata ahead of IDR pictures

Message ID 20230212230607.180234-1-jpcoiner@gmail.com
State New
Headers show
Series [FFmpeg-devel,v2] libavformat/mpegtsenc.c: correctly re-emit extradata ahead of IDR pictures | expand

Checks

Context Check Description
andriy/make_x86 success Make finished
andriy/make_fate_x86 success Make fate finished

Commit Message

John Coiner Feb. 12, 2023, 11:06 p.m. UTC
This is v2 of the patch for https://trac.ffmpeg.org/ticket/10148.

It implements the handling described at http://ffmpeg.org/pipermail/ffmpeg-devel/2023-February/306542.html, that is:
  * If we receive [AUD][IDR], we emit [AUD][SPS][PPS][IDR]
  * If we receive [SEI][AUD][IDR], we emit [AUD][SPS][PPS][SEI][IDR]

This is speculative; it would be good to hear from Marton or others about whether this is the right handling.

The other possible handling would be to simply prepend the extradata to the existing frame's bitstream without relocating the AUD to be in front. So if we received:
  [AUD][IDR]
We would emit:
  [SPS][PPS][AUD][IDR]
That's not quite compliant with the H.264 spec, which says an AUD shall be the first NAL in a frame when it's present. I doubt real decoders care and it would be simpler to implement. Let me know if you'd prefer the simpler handling.

---
 libavformat/mpegtsenc.c | 55 ++++++++++++++++++++++++++++++++++++-----
 1 file changed, 49 insertions(+), 6 deletions(-)

Comments

Marton Balint Feb. 17, 2023, 12:36 a.m. UTC | #1
On Sun, 12 Feb 2023, John Coiner wrote:

> This is v2 of the patch for https://trac.ffmpeg.org/ticket/10148.
>
> It implements the handling described at http://ffmpeg.org/pipermail/ffmpeg-devel/2023-February/306542.html, that is:
>  * If we receive [AUD][IDR], we emit [AUD][SPS][PPS][IDR]
>  * If we receive [SEI][AUD][IDR], we emit [AUD][SPS][PPS][SEI][IDR]
>
> This is speculative; it would be good to hear from Marton or others about whether this is the right handling.

Looks fine to me, but I am no expert in H264 either.

Can you check and test the attached patch? It contins cosmetic changes to 
your work. If that looks OK to you as well, than I will apply it in a few 
days.

Thanks,
Marton

>
> The other possible handling would be to simply prepend the extradata to the existing frame's bitstream without relocating the AUD to be in front. So if we received:
>  [AUD][IDR]
> We would emit:
>  [SPS][PPS][AUD][IDR]
> That's not quite compliant with the H.264 spec, which says an AUD shall be the first NAL in a frame when it's present. I doubt real decoders care and it would be simpler to implement. Let me know if you'd prefer the simpler handling.
>
> ---
> libavformat/mpegtsenc.c | 55 ++++++++++++++++++++++++++++++++++++-----
> 1 file changed, 49 insertions(+), 6 deletions(-)
>
> diff --git a/libavformat/mpegtsenc.c b/libavformat/mpegtsenc.c
> index 00ad426086..a4a2d8cdaf 100644
> --- a/libavformat/mpegtsenc.c
> +++ b/libavformat/mpegtsenc.c
> @@ -1835,6 +1835,13 @@ static int opus_get_packet_samples(AVFormatContext *s, AVPacket *pkt)
>     return duration;
> }
>
> +// Copies `size_bytes` from `source` to `dest`.
> +// Returns a pointer to the next destination address after the range written.
> +static uint8_t* append(uint8_t* dest, const uint8_t* source, int size_bytes) {
> +  memcpy(dest, source, size_bytes);
> +  return dest + size_bytes;
> +}
> +
> static int mpegts_write_packet_internal(AVFormatContext *s, AVPacket *pkt)
> {
>     AVStream *st = s->streams[pkt->stream_index];
> @@ -1877,6 +1884,7 @@ static int mpegts_write_packet_internal(AVFormatContext *s, AVPacket *pkt)
>
>     if (st->codecpar->codec_id == AV_CODEC_ID_H264) {
>         const uint8_t *p = buf, *buf_end = p + size;
> +        const uint8_t *found_aud = NULL, *found_aud_end = NULL;
>         uint32_t state = -1;
>         int extradd = (pkt->flags & AV_PKT_FLAG_KEY) ? st->codecpar->extradata_size : 0;
>         int ret = ff_check_h264_startcode(s, st, pkt);
> @@ -1886,17 +1894,34 @@ static int mpegts_write_packet_internal(AVFormatContext *s, AVPacket *pkt)
>         if (extradd && AV_RB24(st->codecpar->extradata) > 1)
>             extradd = 0;
>
> +        // Ensure that all pictures are prefixed with an AUD, and that
> +        // IDR pictures are also prefixed with SPS and PPS. SPS and PPS
> +        // are assumed to be available in 'extradata' if not found in-band.
>         do {
>             p = avpriv_find_start_code(p, buf_end, &state);
>             av_log(s, AV_LOG_TRACE, "nal %"PRId32"\n", state & 0x1f);
> -            if ((state & 0x1f) == 7)
> +            if ((state & 0x1f) == 7)  // SPS NAL
>                 extradd = 0;
> -        } while (p < buf_end && (state & 0x1f) != 9 &&
> -                 (state & 0x1f) != 5 && (state & 0x1f) != 1);
> -
> -        if ((state & 0x1f) != 5)
> +            if ((state & 0x1f) == 9) {  // AUD NAL
> +                found_aud = p - 4;  // start of the 0x000001 start code.
> +                found_aud_end = p + 1; // first byte past the AUD.
> +
> +                if (found_aud < buf)
> +                    found_aud = buf;
> +                if (buf_end < found_aud_end)
> +                    found_aud_end = buf_end;
> +            }
> +        } while (p < buf_end
> +                 && (state & 0x1f) != 5  // IDR picture
> +                 && (state & 0x1f) != 1  // non-IDR picture
> +                 && (extradd > 0 || !found_aud));
> +        if ((state & 0x1f) != 5) {
> +            // Did not find an IDR picture; do not emit extradata.
>             extradd = 0;
> -        if ((state & 0x1f) != 9) { // AUD NAL
> +        }
> +
> +        if (!found_aud) {
> +            // Prefix 'buf' with the missing AUD, and extradata if needed.
>             data = av_malloc(pkt->size + 6 + extradd);
>             if (!data)
>                 return AVERROR(ENOMEM);
> @@ -1907,6 +1932,24 @@ static int mpegts_write_packet_internal(AVFormatContext *s, AVPacket *pkt)
>             data[5] = 0xf0; // any slice type (0xe) + rbsp stop one bit
>             buf     = data;
>             size    = pkt->size + 6 + extradd;
> +        } else if (extradd != 0) {
> +            // Move the AUD up to the beginning of the frame, where the H.264
> +            // spec requires it to appear. Emit the extradata after it.
> +            const int aud_size = found_aud_end - found_aud;
> +            const int new_pkt_size = pkt->size + 1 + extradd;
> +            uint8_t *pos;
> +            data = av_malloc(new_pkt_size);
> +            if (!data)
> +                return AVERROR(ENOMEM);
> +            AV_WB8(data, 0x0);
> +            pos = data + 1;
> +            pos = append(pos, found_aud, aud_size);
> +            pos = append(pos, st->codecpar->extradata, extradd);
> +            pos = append(pos, pkt->data, found_aud - pkt->data);
> +            pos = append(pos, found_aud_end, buf_end - found_aud_end);
> +            av_assert0(data + new_pkt_size == pos);
> +            buf     = data;
> +            size    = new_pkt_size;
>         }
>     } else if (st->codecpar->codec_id == AV_CODEC_ID_AAC) {
>         if (pkt->size < 2) {
> -- 
> 2.39.1.581.gbfd45094c4-goog
>
> _______________________________________________
> ffmpeg-devel mailing list
> ffmpeg-devel@ffmpeg.org
> https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
>
> To unsubscribe, visit link above, or email
> ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".
>
John Coiner Feb. 18, 2023, 7:35 p.m. UTC | #2
On Thu, Feb 16, 2023 at 7:36 PM Marton Balint <cus@passwd.hu> wrote:
>
>
>
> On Sun, 12 Feb 2023, John Coiner wrote:
>
> > This is v2 of the patch for https://trac.ffmpeg.org/ticket/10148.
> >
> > It implements the handling described at http://ffmpeg.org/pipermail/ffmpeg-devel/2023-February/306542.html, that is:
> >  * If we receive [AUD][IDR], we emit [AUD][SPS][PPS][IDR]
> >  * If we receive [SEI][AUD][IDR], we emit [AUD][SPS][PPS][SEI][IDR]
> >
> > This is speculative; it would be good to hear from Marton or others about whether this is the right handling.
>
> Looks fine to me, but I am no expert in H264 either.
>
> Can you check and test the attached patch? It contins cosmetic changes to
> your work. If that looks OK to you as well, than I will apply it in a few
> days.
>
> Thanks,
> Marton
>

Thank you for the cleanups! I reviewed and tested this, it's working
and seems to be equivalent to my last patch. It looks ready to apply.

Cheers,
John
Anton Khirnov Feb. 20, 2023, 3:28 p.m. UTC | #3
I'm wondering if there isn't a bsf that could do this in a cleaner way
than messing with codec internals in a muxer.
Marton Balint Feb. 20, 2023, 10:21 p.m. UTC | #4
On Mon, 20 Feb 2023, Anton Khirnov wrote:

> I'm wondering if there isn't a bsf that could do this in a cleaner way
> than messing with codec internals in a muxer.

I thought about that too, but in the end I sticked to the more simple 
approach, because I could not decide which would be better, adding a 
single new bsf which works for any codec you may want to mux into mpegts 
(h264, hevc, av1) or hack some new options to existing *_metadata bsf-s to 
make them create mpegts-compliant output.

The problem with using *_metadata is that I am not sure if more complex 
insertion rules belong to those filters, and the way they work - 
parsing everything with the CBS framework - they have significant CPU 
overhead.

A separate BSF could use the same or very similar code as mpegtsenc.c 
right now, but by having it separate we have to maintain it separately 
from mpegtsenc, and that can also cause problems, if a newer version of 
mpegtsenc expects the BSF to do something different than the older 
version...

Regards,
Marton
diff mbox series

Patch

diff --git a/libavformat/mpegtsenc.c b/libavformat/mpegtsenc.c
index 00ad426086..a4a2d8cdaf 100644
--- a/libavformat/mpegtsenc.c
+++ b/libavformat/mpegtsenc.c
@@ -1835,6 +1835,13 @@  static int opus_get_packet_samples(AVFormatContext *s, AVPacket *pkt)
     return duration;
 }
 
+// Copies `size_bytes` from `source` to `dest`.
+// Returns a pointer to the next destination address after the range written.
+static uint8_t* append(uint8_t* dest, const uint8_t* source, int size_bytes) {
+  memcpy(dest, source, size_bytes);
+  return dest + size_bytes;
+}
+
 static int mpegts_write_packet_internal(AVFormatContext *s, AVPacket *pkt)
 {
     AVStream *st = s->streams[pkt->stream_index];
@@ -1877,6 +1884,7 @@  static int mpegts_write_packet_internal(AVFormatContext *s, AVPacket *pkt)
 
     if (st->codecpar->codec_id == AV_CODEC_ID_H264) {
         const uint8_t *p = buf, *buf_end = p + size;
+        const uint8_t *found_aud = NULL, *found_aud_end = NULL;
         uint32_t state = -1;
         int extradd = (pkt->flags & AV_PKT_FLAG_KEY) ? st->codecpar->extradata_size : 0;
         int ret = ff_check_h264_startcode(s, st, pkt);
@@ -1886,17 +1894,34 @@  static int mpegts_write_packet_internal(AVFormatContext *s, AVPacket *pkt)
         if (extradd && AV_RB24(st->codecpar->extradata) > 1)
             extradd = 0;
 
+        // Ensure that all pictures are prefixed with an AUD, and that
+        // IDR pictures are also prefixed with SPS and PPS. SPS and PPS
+        // are assumed to be available in 'extradata' if not found in-band.
         do {
             p = avpriv_find_start_code(p, buf_end, &state);
             av_log(s, AV_LOG_TRACE, "nal %"PRId32"\n", state & 0x1f);
-            if ((state & 0x1f) == 7)
+            if ((state & 0x1f) == 7)  // SPS NAL
                 extradd = 0;
-        } while (p < buf_end && (state & 0x1f) != 9 &&
-                 (state & 0x1f) != 5 && (state & 0x1f) != 1);
-
-        if ((state & 0x1f) != 5)
+            if ((state & 0x1f) == 9) {  // AUD NAL
+                found_aud = p - 4;  // start of the 0x000001 start code.
+                found_aud_end = p + 1; // first byte past the AUD.
+
+                if (found_aud < buf)
+                    found_aud = buf;
+                if (buf_end < found_aud_end)
+                    found_aud_end = buf_end;
+            }
+        } while (p < buf_end
+                 && (state & 0x1f) != 5  // IDR picture
+                 && (state & 0x1f) != 1  // non-IDR picture
+                 && (extradd > 0 || !found_aud));
+        if ((state & 0x1f) != 5) {
+            // Did not find an IDR picture; do not emit extradata.
             extradd = 0;
-        if ((state & 0x1f) != 9) { // AUD NAL
+        }
+
+        if (!found_aud) {
+            // Prefix 'buf' with the missing AUD, and extradata if needed.
             data = av_malloc(pkt->size + 6 + extradd);
             if (!data)
                 return AVERROR(ENOMEM);
@@ -1907,6 +1932,24 @@  static int mpegts_write_packet_internal(AVFormatContext *s, AVPacket *pkt)
             data[5] = 0xf0; // any slice type (0xe) + rbsp stop one bit
             buf     = data;
             size    = pkt->size + 6 + extradd;
+        } else if (extradd != 0) {
+            // Move the AUD up to the beginning of the frame, where the H.264
+            // spec requires it to appear. Emit the extradata after it.
+            const int aud_size = found_aud_end - found_aud;
+            const int new_pkt_size = pkt->size + 1 + extradd;
+            uint8_t *pos;
+            data = av_malloc(new_pkt_size);
+            if (!data)
+                return AVERROR(ENOMEM);
+            AV_WB8(data, 0x0);
+            pos = data + 1;
+            pos = append(pos, found_aud, aud_size);
+            pos = append(pos, st->codecpar->extradata, extradd);
+            pos = append(pos, pkt->data, found_aud - pkt->data);
+            pos = append(pos, found_aud_end, buf_end - found_aud_end);
+            av_assert0(data + new_pkt_size == pos);
+            buf     = data;
+            size    = new_pkt_size;
         }
     } else if (st->codecpar->codec_id == AV_CODEC_ID_AAC) {
         if (pkt->size < 2) {