diff mbox series

[FFmpeg-devel] avcodec/mpeg12dec: extract only one type of CC substream

Message ID 20240311001856.128390-1-marth64@proxyid.net
State New
Headers show
Series [FFmpeg-devel] avcodec/mpeg12dec: extract only one type of CC substream | expand

Checks

Context Check Description
yinshiyou/make_loongarch64 success Make finished
yinshiyou/make_fate_loongarch64 success Make fate finished
andriy/make_x86 success Make finished
andriy/make_fate_x86 success Make fate finished

Commit Message

Marth64 March 11, 2024, 12:18 a.m. UTC
In MPEG-2 user data, there can be different types of Closed Captions
formats embedded (A53, SCTE-20, or DVD). The current behavior of the
CC extraction code in the MPEG-2 decoder is to not be aware of
multiple formats if multiple exist, therefore allowing one format
to overwrite the other during the extraction process since the CC
extraction shares one output buffer for the normalized bytes.

This causes sources that have two CC formats to produce flawed output.
There exist real-world samples which contain both A53 and SCTE-20 captions
in the same MPEG-2 stream, and that manifest this problem. Example of symptom:
THANK YOU (expected) --> THTHANANK K YOYOUU (actual)

The solution is to pick only the first CC substream observed with valid bytes,
and ignore the other types. Additionally, provide an option for users
to manually "force" a type in the event that this matters for a particular source.

In tandem, I am working on an improvement to src_movie to allow passing decoder
options (as src_movie via lavfi is the "de facto" way to extract CCs right now).
This way, users can realize the newly added option.

Signed-off-by: Marth64 <marth64@proxyid.net>
---
 libavcodec/mpeg12dec.c | 49 +++++++++++++++++++++++++++++++++++++++---
 1 file changed, 46 insertions(+), 3 deletions(-)

Comments

Stefano Sabatini March 11, 2024, 4:27 p.m. UTC | #1
On date Sunday 2024-03-10 19:18:56 -0500, Marth64 wrote:
> In MPEG-2 user data, there can be different types of Closed Captions
> formats embedded (A53, SCTE-20, or DVD). The current behavior of the
> CC extraction code in the MPEG-2 decoder is to not be aware of
> multiple formats if multiple exist, therefore allowing one format
> to overwrite the other during the extraction process since the CC
> extraction shares one output buffer for the normalized bytes.
> 
> This causes sources that have two CC formats to produce flawed output.
> There exist real-world samples which contain both A53 and SCTE-20 captions
> in the same MPEG-2 stream, and that manifest this problem. Example of symptom:
> THANK YOU (expected) --> THTHANANK K YOYOUU (actual)
> 
> The solution is to pick only the first CC substream observed with valid bytes,
> and ignore the other types. Additionally, provide an option for users
> to manually "force" a type in the event that this matters for a particular source.
> 
> In tandem, I am working on an improvement to src_movie to allow passing decoder
> options (as src_movie via lavfi is the "de facto" way to extract CCs right now).
> This way, users can realize the newly added option.
> 
> Signed-off-by: Marth64 <marth64@proxyid.net>
> ---
>  libavcodec/mpeg12dec.c | 49 +++++++++++++++++++++++++++++++++++++++---
>  1 file changed, 46 insertions(+), 3 deletions(-)
> 
> diff --git a/libavcodec/mpeg12dec.c b/libavcodec/mpeg12dec.c
> index 3a2f17e508..a42e1c661f 100644
> --- a/libavcodec/mpeg12dec.c
> +++ b/libavcodec/mpeg12dec.c
> @@ -62,6 +62,13 @@
>  
>  #define A53_MAX_CC_COUNT 2000
>  
> +enum Mpeg2ClosedCaptionsFormat {
> +    CC_FORMAT_AUTO,
> +    CC_FORMAT_A53_PART4,
> +    CC_FORMAT_SCTE20,
> +    CC_FORMAT_DVD
> +};
> +
>  typedef struct Mpeg1Context {
>      MpegEncContext mpeg_enc_ctx;
>      int mpeg_enc_ctx_allocated; /* true if decoding context allocated */
> @@ -70,6 +77,7 @@ typedef struct Mpeg1Context {
>      AVStereo3D stereo3d;
>      int has_stereo3d;
>      AVBufferRef *a53_buf_ref;
> +    enum Mpeg2ClosedCaptionsFormat cc_format;
>      uint8_t afd;
>      int has_afd;
>      int slice_count;
> @@ -1908,7 +1916,8 @@ static int mpeg_decode_a53_cc(AVCodecContext *avctx,
>  {
>      Mpeg1Context *s1 = avctx->priv_data;
>  
> -    if (buf_size >= 6 &&
> +    if ((!s1->cc_format || s1->cc_format == CC_FORMAT_A53_PART4) &&
> +        buf_size >= 6 &&
>          p[0] == 'G' && p[1] == 'A' && p[2] == '9' && p[3] == '4' &&
>          p[4] == 3 && (p[5] & 0x40)) {
>          /* extract A53 Part 4 CC data */
> @@ -1927,9 +1936,15 @@ static int mpeg_decode_a53_cc(AVCodecContext *avctx,
>                  memcpy(s1->a53_buf_ref->data + old_size, p + 7, cc_count * UINT64_C(3));
>  
>              avctx->properties |= FF_CODEC_PROPERTY_CLOSED_CAPTIONS;
> +
> +            if (!s1->cc_format) {
> +                s1->cc_format = CC_FORMAT_A53_PART4;
> +                av_log(avctx, AV_LOG_DEBUG, "CC: first seen substream is A53 format\n");
> +            }
>          }
>          return 1;
> -    } else if (buf_size >= 2 &&
> +    } else if ((!s1->cc_format || s1->cc_format == CC_FORMAT_SCTE20) &&
> +               buf_size >= 2 &&
>                 p[0] == 0x03 && (p[1]&0x7f) == 0x01) {
>          /* extract SCTE-20 CC data */
>          GetBitContext gb;
> @@ -1974,9 +1989,15 @@ static int mpeg_decode_a53_cc(AVCodecContext *avctx,
>                  }
>              }
>              avctx->properties |= FF_CODEC_PROPERTY_CLOSED_CAPTIONS;
> +
> +            if (!s1->cc_format) {
> +                s1->cc_format = CC_FORMAT_SCTE20;
> +                av_log(avctx, AV_LOG_DEBUG, "CC: first seen substream is SCTE-20 format\n");
> +            }
>          }
>          return 1;
> -    } else if (buf_size >= 11 &&
> +    } else if ((!s1->cc_format || s1->cc_format == CC_FORMAT_DVD) &&
> +               buf_size >= 11 &&
>                 p[0] == 'C' && p[1] == 'C' && p[2] == 0x01 && p[3] == 0xf8) {
>          /* extract DVD CC data
>           *
> @@ -2034,6 +2055,11 @@ static int mpeg_decode_a53_cc(AVCodecContext *avctx,
>                  }
>              }

>              avctx->properties |= FF_CODEC_PROPERTY_CLOSED_CAPTIONS;

> +
> +            if (!s1->cc_format) {
> +                s1->cc_format = CC_FORMAT_DVD;
> +                av_log(avctx, AV_LOG_DEBUG, "CC: first seen substream is DVD format\n");
> +            }

nit: probably this might be factorized, either with a function or a
macro

>          }
>          return 1;
>      }
> @@ -2598,11 +2624,28 @@ const FFCodec ff_mpeg1video_decoder = {
>                             },
>  };
>  
> +#define M2V_OFFSET(x) offsetof(Mpeg1Context, x)
> +#define M2V_PARAM     AV_OPT_FLAG_VIDEO_PARAM | AV_OPT_FLAG_DECODING_PARAM
> +

> +static const AVOption mpeg2video_options[] = {
> +    { "cc_format", "extract a specific Closed Captions format (0=auto)", M2V_OFFSET(cc_format), AV_OPT_TYPE_INT, { .i64 = 0 }, CC_FORMAT_AUTO, CC_FORMAT_DVD, M2V_PARAM },
> +    { NULL }

ideally this should be documented in the doc, but it is missing for
this one so probably it's not needed

[...]

Patch looks good to me, but let's wait a few days in case there are
more comments.
Marth64 March 12, 2024, 12:33 a.m. UTC | #2
Agreed on both points, I will address them. Thanks.
diff mbox series

Patch

diff --git a/libavcodec/mpeg12dec.c b/libavcodec/mpeg12dec.c
index 3a2f17e508..a42e1c661f 100644
--- a/libavcodec/mpeg12dec.c
+++ b/libavcodec/mpeg12dec.c
@@ -62,6 +62,13 @@ 
 
 #define A53_MAX_CC_COUNT 2000
 
+enum Mpeg2ClosedCaptionsFormat {
+    CC_FORMAT_AUTO,
+    CC_FORMAT_A53_PART4,
+    CC_FORMAT_SCTE20,
+    CC_FORMAT_DVD
+};
+
 typedef struct Mpeg1Context {
     MpegEncContext mpeg_enc_ctx;
     int mpeg_enc_ctx_allocated; /* true if decoding context allocated */
@@ -70,6 +77,7 @@  typedef struct Mpeg1Context {
     AVStereo3D stereo3d;
     int has_stereo3d;
     AVBufferRef *a53_buf_ref;
+    enum Mpeg2ClosedCaptionsFormat cc_format;
     uint8_t afd;
     int has_afd;
     int slice_count;
@@ -1908,7 +1916,8 @@  static int mpeg_decode_a53_cc(AVCodecContext *avctx,
 {
     Mpeg1Context *s1 = avctx->priv_data;
 
-    if (buf_size >= 6 &&
+    if ((!s1->cc_format || s1->cc_format == CC_FORMAT_A53_PART4) &&
+        buf_size >= 6 &&
         p[0] == 'G' && p[1] == 'A' && p[2] == '9' && p[3] == '4' &&
         p[4] == 3 && (p[5] & 0x40)) {
         /* extract A53 Part 4 CC data */
@@ -1927,9 +1936,15 @@  static int mpeg_decode_a53_cc(AVCodecContext *avctx,
                 memcpy(s1->a53_buf_ref->data + old_size, p + 7, cc_count * UINT64_C(3));
 
             avctx->properties |= FF_CODEC_PROPERTY_CLOSED_CAPTIONS;
+
+            if (!s1->cc_format) {
+                s1->cc_format = CC_FORMAT_A53_PART4;
+                av_log(avctx, AV_LOG_DEBUG, "CC: first seen substream is A53 format\n");
+            }
         }
         return 1;
-    } else if (buf_size >= 2 &&
+    } else if ((!s1->cc_format || s1->cc_format == CC_FORMAT_SCTE20) &&
+               buf_size >= 2 &&
                p[0] == 0x03 && (p[1]&0x7f) == 0x01) {
         /* extract SCTE-20 CC data */
         GetBitContext gb;
@@ -1974,9 +1989,15 @@  static int mpeg_decode_a53_cc(AVCodecContext *avctx,
                 }
             }
             avctx->properties |= FF_CODEC_PROPERTY_CLOSED_CAPTIONS;
+
+            if (!s1->cc_format) {
+                s1->cc_format = CC_FORMAT_SCTE20;
+                av_log(avctx, AV_LOG_DEBUG, "CC: first seen substream is SCTE-20 format\n");
+            }
         }
         return 1;
-    } else if (buf_size >= 11 &&
+    } else if ((!s1->cc_format || s1->cc_format == CC_FORMAT_DVD) &&
+               buf_size >= 11 &&
                p[0] == 'C' && p[1] == 'C' && p[2] == 0x01 && p[3] == 0xf8) {
         /* extract DVD CC data
          *
@@ -2034,6 +2055,11 @@  static int mpeg_decode_a53_cc(AVCodecContext *avctx,
                 }
             }
             avctx->properties |= FF_CODEC_PROPERTY_CLOSED_CAPTIONS;
+
+            if (!s1->cc_format) {
+                s1->cc_format = CC_FORMAT_DVD;
+                av_log(avctx, AV_LOG_DEBUG, "CC: first seen substream is DVD format\n");
+            }
         }
         return 1;
     }
@@ -2598,11 +2624,28 @@  const FFCodec ff_mpeg1video_decoder = {
                            },
 };
 
+#define M2V_OFFSET(x) offsetof(Mpeg1Context, x)
+#define M2V_PARAM     AV_OPT_FLAG_VIDEO_PARAM | AV_OPT_FLAG_DECODING_PARAM
+
+static const AVOption mpeg2video_options[] = {
+    { "cc_format", "extract a specific Closed Captions format (0=auto)", M2V_OFFSET(cc_format), AV_OPT_TYPE_INT, { .i64 = 0 }, CC_FORMAT_AUTO, CC_FORMAT_DVD, M2V_PARAM },
+    { NULL }
+};
+
+static const AVClass mpeg2video_class = {
+    .class_name = "MPEG-2 video",
+    .item_name  = av_default_item_name,
+    .option     = mpeg2video_options,
+    .version    = LIBAVUTIL_VERSION_INT,
+    .category   = AV_CLASS_CATEGORY_DECODER,
+};
+
 const FFCodec ff_mpeg2video_decoder = {
     .p.name         = "mpeg2video",
     CODEC_LONG_NAME("MPEG-2 video"),
     .p.type         = AVMEDIA_TYPE_VIDEO,
     .p.id           = AV_CODEC_ID_MPEG2VIDEO,
+    .p.priv_class   = &mpeg2video_class,
     .priv_data_size = sizeof(Mpeg1Context),
     .init           = mpeg_decode_init,
     .close          = mpeg_decode_end,