diff mbox

[FFmpeg-devel,4/8] h264_metadata: Add support for A/53 closed captions

Message ID 20180311183021.25556-4-sw@jkqxz.net
State Superseded
Headers show

Commit Message

Mark Thompson March 11, 2018, 6:30 p.m. UTC
---
 libavcodec/h264_metadata_bsf.c | 121 +++++++++++++++++++++++++++++++++++++++++
 1 file changed, 121 insertions(+)

Comments

Aman Karmani March 12, 2018, 7:19 a.m. UTC | #1
On Sun, Mar 11, 2018 at 11:30 AM, Mark Thompson <sw@jkqxz.net> wrote:

> ---
>  libavcodec/h264_metadata_bsf.c | 121 ++++++++++++++++++++++++++++++
> +++++++++++
>  1 file changed, 121 insertions(+)
>
> diff --git a/libavcodec/h264_metadata_bsf.c b/libavcodec/h264_metadata_
> bsf.c
> index 36047887ca..d340c55990 100644
> --- a/libavcodec/h264_metadata_bsf.c
> +++ b/libavcodec/h264_metadata_bsf.c
> @@ -77,6 +77,8 @@ typedef struct H264MetadataContext {
>      int display_orientation;
>      double rotate;
>      int flip;
> +
> +    int a53_cc;
>  } H264MetadataContext;
>
>
> @@ -225,6 +227,8 @@ static int h264_metadata_filter(AVBSFContext *bsf,
> AVPacket *out)
>      int err, i, j, has_sps;
>      uint8_t *displaymatrix_side_data = NULL;
>      size_t displaymatrix_side_data_size = 0;
> +    uint8_t *a53_side_data = NULL;
> +    size_t a53_side_data_size = 0;
>
>      err = ff_bsf_get_packet(bsf, &in);
>      if (err < 0)
> @@ -514,6 +518,104 @@ static int h264_metadata_filter(AVBSFContext *bsf,
> AVPacket *out)
>          }
>      }
>
> +    if (ctx->a53_cc == INSERT) {
> +        uint8_t *data;
> +        int size;
> +
> +        data = av_packet_get_side_data(in, AV_PKT_DATA_A53_CC, &size);
> +        if (data) {
> +            H264RawSEIPayload payload = {
> +                .payload_type = H264_SEI_TYPE_USER_DATA_REGISTERED,
> +            };
> +            H264RawSEIUserDataRegistered *udr =
> +                &payload.payload.user_data_registered;
> +
> +            av_log(bsf, AV_LOG_WARNING, "A53 CC insert: %d bytes.\n",
> size);
> +
> +            udr->data_length = size + 10;
> +            udr->data_ref    = av_buffer_alloc(udr->data_length);
> +            if (!udr->data_ref) {
> +                err = AVERROR(ENOMEM);
> +                goto fail;
> +            }
> +            udr->data = udr->data_ref->data;
> +
> +            udr->itu_t_t35_country_code = 181;
> +            udr->data[0] = 0;
> +            udr->data[1] = 49;
> +            AV_WB32(udr->data + 2, MKBETAG('G', 'A', '9', '4'));
> +            udr->data[6] = 3;
> +            udr->data[7] = ((size / 3) & 0x1f) | 0x40;
> +            udr->data[8] = 0;
> +            memcpy(udr->data + 9, data, size);
> +            udr->data[size + 9] = 0xff;
> +
> +            err = ff_cbs_h264_add_sei_message(ctx->cbc, au, &payload);
> +            if (err < 0) {
> +                av_log(bsf, AV_LOG_ERROR, "Failed to add user data SEI "
> +                       "message to access unit.\n");
> +                av_buffer_unref(&udr->data_ref);
> +                goto fail;
> +            }
> +        }
> +
> +    } else if (ctx->a53_cc == REMOVE || ctx->a53_cc == EXTRACT) {
> +        for (i = 0; i < au->nb_units; i++) {
> +            H264RawSEI *sei;
> +            if (au->units[i].type != H264_NAL_SEI)
> +                continue;
> +            sei = au->units[i].content;
> +
> +            for (j = 0; j < sei->payload_count; j++) {
> +                H264RawSEIUserDataRegistered *udr;
> +                uint32_t tag;
> +                uint8_t type_code, count;
> +
> +                if (sei->payload[j].payload_type !=
> +                    H264_SEI_TYPE_USER_DATA_REGISTERED)
> +                    continue;
> +                udr = &sei->payload[j].payload.user_data_registered;
> +                tag = AV_RB32(udr->data + 2);
> +                type_code = udr->data[6];
> +                if (tag != MKBETAG('G', 'A', '9', '4') || type_code != 3)
> +                    continue;
> +
> +                if (ctx->a53_cc == REMOVE) {
> +                    err = ff_cbs_h264_delete_sei_message(ctx->cbc, au,
> +                                                         &au->units[i],
> j);
> +                    if (err < 0) {
> +                        av_log(bsf, AV_LOG_ERROR, "Failed to delete "
> +                               "A53 CC SEI message.\n");
> +                        goto fail;
> +                    }
> +                    av_log(bsf, AV_LOG_WARNING, "A53 CC remove!.\n");

+
> +                    --i;
> +                    break;
> +                }
> +
> +                // Extract.
> +                count = udr->data[7] & 0x1f;
> +                if (3 * count + 10 > udr->data_length) {
> +                    av_log(bsf, AV_LOG_ERROR, "Invalid A/53 closed
> caption "
> +                           "data: count %d overflows length %zu.\n",
> +                           count, udr->data_length);
> +                    continue;
> +                }
> +
> +                av_log(bsf, AV_LOG_WARNING, "A53 CC extract: %zu
> bytes.\n", udr->data_length);
>

I assume these are WARNINGs from testing? Seems like TRACE or DEBUG would
be fine.


> +
> +                err = av_reallocp(&a53_side_data,
> +                                  a53_side_data_size + 3 * count);
> +                if (err)
> +                    goto fail;
> +                memcpy(a53_side_data + a53_side_data_size,
> +                       udr->data + 9, 3 * count);
> +                a53_side_data_size += 3 * count;
> +            }
> +        }
> +    }
> +
>      err = ff_cbs_write_packet(ctx->cbc, out, au);
>      if (err < 0) {
>          av_log(bsf, AV_LOG_ERROR, "Failed to write packet.\n");
> @@ -535,6 +637,16 @@ static int h264_metadata_filter(AVBSFContext *bsf,
> AVPacket *out)
>          }
>          displaymatrix_side_data = NULL;
>      }
> +    if (a53_side_data) {
> +        err = av_packet_add_side_data(out, AV_PKT_DATA_A53_CC,
> +                                      a53_side_data, a53_side_data_size);
> +        if (err) {
> +            av_log(bsf, AV_LOG_ERROR, "Failed to attach extracted A/53 "
> +                   "side data to packet.\n");
> +            goto fail;
> +        }
> +        a53_side_data = NULL;
> +    }
>
>      ctx->done_first_au = 1;
>
> @@ -542,6 +654,7 @@ static int h264_metadata_filter(AVBSFContext *bsf,
> AVPacket *out)
>  fail:
>      ff_cbs_fragment_uninit(ctx->cbc, au);
>      av_freep(&displaymatrix_side_data);
> +    av_freep(&a53_side_data);
>
>      av_packet_free(&in);
>
> @@ -670,6 +783,14 @@ static const AVOption h264_metadata_options[] = {
>      { "vertical",   "Set ver_flip",
>          0, AV_OPT_TYPE_CONST, { .i64 = FLIP_VERTICAL },   .unit ="flip" },
>
> +    { "a53_cc", "A/53 Closed Captions in SEI NAL units",
> +        OFFSET(a53_cc), AV_OPT_TYPE_INT,
> +        { .i64 = PASS }, PASS, EXTRACT, 0, "a53_cc" },
> +    { "pass",    NULL, 0, AV_OPT_TYPE_CONST, { .i64 = PASS    }, .unit =
> "a53_cc" },
> +    { "insert",  NULL, 0, AV_OPT_TYPE_CONST, { .i64 = INSERT  }, .unit =
> "a53_cc" },
> +    { "remove",  NULL, 0, AV_OPT_TYPE_CONST, { .i64 = REMOVE  }, .unit =
> "a53_cc" },
> +    { "extract", NULL, 0, AV_OPT_TYPE_CONST, { .i64 = EXTRACT }, .unit =
> "a53_cc" },
>

Thanks for writing this patch!

I tested the remove and extract modes on a h264 stream and they work as
expected.

I was also able to port these over to the mpeg2_metadata bsf and will
submit a patch shortly.

Aman


> +
>      { NULL }
>  };
>
> --
> 2.16.1
>
> _______________________________________________
> ffmpeg-devel mailing list
> ffmpeg-devel@ffmpeg.org
> http://ffmpeg.org/mailman/listinfo/ffmpeg-devel
>
Hendrik Leppkes March 12, 2018, 9:54 a.m. UTC | #2
On Sun, Mar 11, 2018 at 7:30 PM, Mark Thompson <sw@jkqxz.net> wrote:
> ---
>  libavcodec/h264_metadata_bsf.c | 121 +++++++++++++++++++++++++++++++++++++++++
>  1 file changed, 121 insertions(+)
>
> diff --git a/libavcodec/h264_metadata_bsf.c b/libavcodec/h264_metadata_bsf.c
> index 36047887ca..d340c55990 100644
> --- a/libavcodec/h264_metadata_bsf.c
> +++ b/libavcodec/h264_metadata_bsf.c
> @@ -77,6 +77,8 @@ typedef struct H264MetadataContext {
>      int display_orientation;
>      double rotate;
>      int flip;
> +
> +    int a53_cc;
>  } H264MetadataContext;
>
>
> @@ -225,6 +227,8 @@ static int h264_metadata_filter(AVBSFContext *bsf, AVPacket *out)
>      int err, i, j, has_sps;
>      uint8_t *displaymatrix_side_data = NULL;
>      size_t displaymatrix_side_data_size = 0;
> +    uint8_t *a53_side_data = NULL;
> +    size_t a53_side_data_size = 0;
>
>      err = ff_bsf_get_packet(bsf, &in);
>      if (err < 0)
> @@ -514,6 +518,104 @@ static int h264_metadata_filter(AVBSFContext *bsf, AVPacket *out)
>          }
>      }
>
> +    if (ctx->a53_cc == INSERT) {
> +        uint8_t *data;
> +        int size;
> +
> +        data = av_packet_get_side_data(in, AV_PKT_DATA_A53_CC, &size);
> +        if (data) {
> +            H264RawSEIPayload payload = {
> +                .payload_type = H264_SEI_TYPE_USER_DATA_REGISTERED,
> +            };
> +            H264RawSEIUserDataRegistered *udr =
> +                &payload.payload.user_data_registered;
> +
> +            av_log(bsf, AV_LOG_WARNING, "A53 CC insert: %d bytes.\n", size);
> +
> +            udr->data_length = size + 10;
> +            udr->data_ref    = av_buffer_alloc(udr->data_length);
> +            if (!udr->data_ref) {
> +                err = AVERROR(ENOMEM);
> +                goto fail;
> +            }
> +            udr->data = udr->data_ref->data;
> +
> +            udr->itu_t_t35_country_code = 181;
> +            udr->data[0] = 0;
> +            udr->data[1] = 49;
> +            AV_WB32(udr->data + 2, MKBETAG('G', 'A', '9', '4'));
> +            udr->data[6] = 3;
> +            udr->data[7] = ((size / 3) & 0x1f) | 0x40;
> +            udr->data[8] = 0;
> +            memcpy(udr->data + 9, data, size);
> +            udr->data[size + 9] = 0xff;
> +
> +            err = ff_cbs_h264_add_sei_message(ctx->cbc, au, &payload);
> +            if (err < 0) {
> +                av_log(bsf, AV_LOG_ERROR, "Failed to add user data SEI "
> +                       "message to access unit.\n");
> +                av_buffer_unref(&udr->data_ref);
> +                goto fail;
> +            }
> +        }
> +
> +    } else if (ctx->a53_cc == REMOVE || ctx->a53_cc == EXTRACT) {
> +        for (i = 0; i < au->nb_units; i++) {
> +            H264RawSEI *sei;
> +            if (au->units[i].type != H264_NAL_SEI)
> +                continue;
> +            sei = au->units[i].content;
> +
> +            for (j = 0; j < sei->payload_count; j++) {
> +                H264RawSEIUserDataRegistered *udr;
> +                uint32_t tag;
> +                uint8_t type_code, count;
> +
> +                if (sei->payload[j].payload_type !=
> +                    H264_SEI_TYPE_USER_DATA_REGISTERED)
> +                    continue;
> +                udr = &sei->payload[j].payload.user_data_registered;
> +                tag = AV_RB32(udr->data + 2);
> +                type_code = udr->data[6];
> +                if (tag != MKBETAG('G', 'A', '9', '4') || type_code != 3)
> +                    continue;
> +
> +                if (ctx->a53_cc == REMOVE) {
> +                    err = ff_cbs_h264_delete_sei_message(ctx->cbc, au,
> +                                                         &au->units[i], j);
> +                    if (err < 0) {
> +                        av_log(bsf, AV_LOG_ERROR, "Failed to delete "
> +                               "A53 CC SEI message.\n");
> +                        goto fail;
> +                    }
> +                    av_log(bsf, AV_LOG_WARNING, "A53 CC remove!.\n");
> +
> +                    --i;
> +                    break;
> +                }
> +
> +                // Extract.
> +                count = udr->data[7] & 0x1f;
> +                if (3 * count + 10 > udr->data_length) {
> +                    av_log(bsf, AV_LOG_ERROR, "Invalid A/53 closed caption "
> +                           "data: count %d overflows length %zu.\n",
> +                           count, udr->data_length);
> +                    continue;
> +                }
> +
> +                av_log(bsf, AV_LOG_WARNING, "A53 CC extract: %zu bytes.\n", udr->data_length);
> +
> +                err = av_reallocp(&a53_side_data,
> +                                  a53_side_data_size + 3 * count);
> +                if (err)
> +                    goto fail;
> +                memcpy(a53_side_data + a53_side_data_size,
> +                       udr->data + 9, 3 * count);
> +                a53_side_data_size += 3 * count;
> +            }
> +        }
> +    }
> +
>      err = ff_cbs_write_packet(ctx->cbc, out, au);
>      if (err < 0) {
>          av_log(bsf, AV_LOG_ERROR, "Failed to write packet.\n");
> @@ -535,6 +637,16 @@ static int h264_metadata_filter(AVBSFContext *bsf, AVPacket *out)
>          }
>          displaymatrix_side_data = NULL;
>      }
> +    if (a53_side_data) {
> +        err = av_packet_add_side_data(out, AV_PKT_DATA_A53_CC,
> +                                      a53_side_data, a53_side_data_size);
> +        if (err) {
> +            av_log(bsf, AV_LOG_ERROR, "Failed to attach extracted A/53 "
> +                   "side data to packet.\n");
> +            goto fail;
> +        }
> +        a53_side_data = NULL;
> +    }
>
>      ctx->done_first_au = 1;
>
> @@ -542,6 +654,7 @@ static int h264_metadata_filter(AVBSFContext *bsf, AVPacket *out)
>  fail:
>      ff_cbs_fragment_uninit(ctx->cbc, au);
>      av_freep(&displaymatrix_side_data);
> +    av_freep(&a53_side_data);
>
>      av_packet_free(&in);
>
> @@ -670,6 +783,14 @@ static const AVOption h264_metadata_options[] = {
>      { "vertical",   "Set ver_flip",
>          0, AV_OPT_TYPE_CONST, { .i64 = FLIP_VERTICAL },   .unit ="flip" },
>
> +    { "a53_cc", "A/53 Closed Captions in SEI NAL units",
> +        OFFSET(a53_cc), AV_OPT_TYPE_INT,
> +        { .i64 = PASS }, PASS, EXTRACT, 0, "a53_cc" },
> +    { "pass",    NULL, 0, AV_OPT_TYPE_CONST, { .i64 = PASS    }, .unit = "a53_cc" },
> +    { "insert",  NULL, 0, AV_OPT_TYPE_CONST, { .i64 = INSERT  }, .unit = "a53_cc" },
> +    { "remove",  NULL, 0, AV_OPT_TYPE_CONST, { .i64 = REMOVE  }, .unit = "a53_cc" },
> +    { "extract", NULL, 0, AV_OPT_TYPE_CONST, { .i64 = EXTRACT }, .unit = "a53_cc" },
> +

Does extracting really make sense? Doesn't the data end up out of
order and basically unusable?

- Hendrik
Devin Heitmueller March 12, 2018, 1:04 p.m. UTC | #3
> Does extracting really make sense? Doesn't the data end up out of
> order and basically unusable?

For what it’s worth, I’ve got a video filter which extracts the A53 side data and saves it into an MCC file (Telestream MacCaption format).  If people think that’s something that would be useful, I’ll see about getting it upstreamed.

Devin
Mark Thompson March 12, 2018, 1:26 p.m. UTC | #4
On 12/03/18 07:19, Aman Gupta wrote:
> On Sun, Mar 11, 2018 at 11:30 AM, Mark Thompson <sw@jkqxz.net> wrote:
> 
>> ---
>>  libavcodec/h264_metadata_bsf.c | 121 ++++++++++++++++++++++++++++++
>> +++++++++++
>>  1 file changed, 121 insertions(+)
>>
>> diff --git a/libavcodec/h264_metadata_bsf.c b/libavcodec/h264_metadata_
>> bsf.c
>> index 36047887ca..d340c55990 100644
>> --- a/libavcodec/h264_metadata_bsf.c
>> +++ b/libavcodec/h264_metadata_bsf.c
>> @@ -77,6 +77,8 @@ typedef struct H264MetadataContext {
>>      int display_orientation;
>>      double rotate;
>>      int flip;
>> +
>> +    int a53_cc;
>>  } H264MetadataContext;
>>
>>
>> @@ -225,6 +227,8 @@ static int h264_metadata_filter(AVBSFContext *bsf,
>> AVPacket *out)
>>      int err, i, j, has_sps;
>>      uint8_t *displaymatrix_side_data = NULL;
>>      size_t displaymatrix_side_data_size = 0;
>> +    uint8_t *a53_side_data = NULL;
>> +    size_t a53_side_data_size = 0;
>>
>>      err = ff_bsf_get_packet(bsf, &in);
>>      if (err < 0)
>> @@ -514,6 +518,104 @@ static int h264_metadata_filter(AVBSFContext *bsf,
>> AVPacket *out)
>>          }
>>      }
>>
>> +    if (ctx->a53_cc == INSERT) {
>> +        uint8_t *data;
>> +        int size;
>> +
>> +        data = av_packet_get_side_data(in, AV_PKT_DATA_A53_CC, &size);
>> +        if (data) {
>> +            H264RawSEIPayload payload = {
>> +                .payload_type = H264_SEI_TYPE_USER_DATA_REGISTERED,
>> +            };
>> +            H264RawSEIUserDataRegistered *udr =
>> +                &payload.payload.user_data_registered;
>> +
>> +            av_log(bsf, AV_LOG_WARNING, "A53 CC insert: %d bytes.\n",
>> size);
>> +
>> +            udr->data_length = size + 10;
>> +            udr->data_ref    = av_buffer_alloc(udr->data_length);
>> +            if (!udr->data_ref) {
>> +                err = AVERROR(ENOMEM);
>> +                goto fail;
>> +            }
>> +            udr->data = udr->data_ref->data;
>> +
>> +            udr->itu_t_t35_country_code = 181;
>> +            udr->data[0] = 0;
>> +            udr->data[1] = 49;
>> +            AV_WB32(udr->data + 2, MKBETAG('G', 'A', '9', '4'));
>> +            udr->data[6] = 3;
>> +            udr->data[7] = ((size / 3) & 0x1f) | 0x40;
>> +            udr->data[8] = 0;
>> +            memcpy(udr->data + 9, data, size);
>> +            udr->data[size + 9] = 0xff;
>> +
>> +            err = ff_cbs_h264_add_sei_message(ctx->cbc, au, &payload);
>> +            if (err < 0) {
>> +                av_log(bsf, AV_LOG_ERROR, "Failed to add user data SEI "
>> +                       "message to access unit.\n");
>> +                av_buffer_unref(&udr->data_ref);
>> +                goto fail;
>> +            }
>> +        }
>> +
>> +    } else if (ctx->a53_cc == REMOVE || ctx->a53_cc == EXTRACT) {
>> +        for (i = 0; i < au->nb_units; i++) {
>> +            H264RawSEI *sei;
>> +            if (au->units[i].type != H264_NAL_SEI)
>> +                continue;
>> +            sei = au->units[i].content;
>> +
>> +            for (j = 0; j < sei->payload_count; j++) {
>> +                H264RawSEIUserDataRegistered *udr;
>> +                uint32_t tag;
>> +                uint8_t type_code, count;
>> +
>> +                if (sei->payload[j].payload_type !=
>> +                    H264_SEI_TYPE_USER_DATA_REGISTERED)
>> +                    continue;
>> +                udr = &sei->payload[j].payload.user_data_registered;
>> +                tag = AV_RB32(udr->data + 2);
>> +                type_code = udr->data[6];
>> +                if (tag != MKBETAG('G', 'A', '9', '4') || type_code != 3)
>> +                    continue;
>> +
>> +                if (ctx->a53_cc == REMOVE) {
>> +                    err = ff_cbs_h264_delete_sei_message(ctx->cbc, au,
>> +                                                         &au->units[i],
>> j);
>> +                    if (err < 0) {
>> +                        av_log(bsf, AV_LOG_ERROR, "Failed to delete "
>> +                               "A53 CC SEI message.\n");
>> +                        goto fail;
>> +                    }
>> +                    av_log(bsf, AV_LOG_WARNING, "A53 CC remove!.\n");
> 
> +
>> +                    --i;
>> +                    break;
>> +                }
>> +
>> +                // Extract.
>> +                count = udr->data[7] & 0x1f;
>> +                if (3 * count + 10 > udr->data_length) {
>> +                    av_log(bsf, AV_LOG_ERROR, "Invalid A/53 closed
>> caption "
>> +                           "data: count %d overflows length %zu.\n",
>> +                           count, udr->data_length);
>> +                    continue;
>> +                }
>> +
>> +                av_log(bsf, AV_LOG_WARNING, "A53 CC extract: %zu
>> bytes.\n", udr->data_length);
>>
> 
> I assume these are WARNINGs from testing? Seems like TRACE or DEBUG would
> be fine.

Oops, yeah.  Removed this one and others.

(The filter probably does want some more logging to explain what it's doing (at VERBOSE or DEBUG?), but not like this.)

>> +
>> +                err = av_reallocp(&a53_side_data,
>> +                                  a53_side_data_size + 3 * count);
>> +                if (err)
>> +                    goto fail;
>> +                memcpy(a53_side_data + a53_side_data_size,
>> +                       udr->data + 9, 3 * count);
>> +                a53_side_data_size += 3 * count;
>> +            }
>> +        }
>> +    }
>> +
>>      err = ff_cbs_write_packet(ctx->cbc, out, au);
>>      if (err < 0) {
>>          av_log(bsf, AV_LOG_ERROR, "Failed to write packet.\n");
>> @@ -535,6 +637,16 @@ static int h264_metadata_filter(AVBSFContext *bsf,
>> AVPacket *out)
>>          }
>>          displaymatrix_side_data = NULL;
>>      }
>> +    if (a53_side_data) {
>> +        err = av_packet_add_side_data(out, AV_PKT_DATA_A53_CC,
>> +                                      a53_side_data, a53_side_data_size);
>> +        if (err) {
>> +            av_log(bsf, AV_LOG_ERROR, "Failed to attach extracted A/53 "
>> +                   "side data to packet.\n");
>> +            goto fail;
>> +        }
>> +        a53_side_data = NULL;
>> +    }
>>
>>      ctx->done_first_au = 1;
>>
>> @@ -542,6 +654,7 @@ static int h264_metadata_filter(AVBSFContext *bsf,
>> AVPacket *out)
>>  fail:
>>      ff_cbs_fragment_uninit(ctx->cbc, au);
>>      av_freep(&displaymatrix_side_data);
>> +    av_freep(&a53_side_data);
>>
>>      av_packet_free(&in);
>>
>> @@ -670,6 +783,14 @@ static const AVOption h264_metadata_options[] = {
>>      { "vertical",   "Set ver_flip",
>>          0, AV_OPT_TYPE_CONST, { .i64 = FLIP_VERTICAL },   .unit ="flip" },
>>
>> +    { "a53_cc", "A/53 Closed Captions in SEI NAL units",
>> +        OFFSET(a53_cc), AV_OPT_TYPE_INT,
>> +        { .i64 = PASS }, PASS, EXTRACT, 0, "a53_cc" },
>> +    { "pass",    NULL, 0, AV_OPT_TYPE_CONST, { .i64 = PASS    }, .unit =
>> "a53_cc" },
>> +    { "insert",  NULL, 0, AV_OPT_TYPE_CONST, { .i64 = INSERT  }, .unit =
>> "a53_cc" },
>> +    { "remove",  NULL, 0, AV_OPT_TYPE_CONST, { .i64 = REMOVE  }, .unit =
>> "a53_cc" },
>> +    { "extract", NULL, 0, AV_OPT_TYPE_CONST, { .i64 = EXTRACT }, .unit =
>> "a53_cc" },
>>
> 
> Thanks for writing this patch!
> 
> I tested the remove and extract modes on a h264 stream and they work as
> expected.

Yay!

> I was also able to port these over to the mpeg2_metadata bsf and will
> submit a patch shortly.

I wonder whether this should be put further into cbs to avoid duplication, since presumably H.265 would want identical code as well.  Maybe cbs_misc.c with template code for reading/writing MPEG-2 user_data and H.264 (and H.265) user_data_registered in the same way?

Thanks,

- Mark
Mark Thompson March 12, 2018, 1:38 p.m. UTC | #5
On 12/03/18 09:54, Hendrik Leppkes wrote:
> On Sun, Mar 11, 2018 at 7:30 PM, Mark Thompson <sw@jkqxz.net> wrote:
>> ---
>>  libavcodec/h264_metadata_bsf.c | 121 +++++++++++++++++++++++++++++++++++++++++
>>  1 file changed, 121 insertions(+)
>>
>> diff --git a/libavcodec/h264_metadata_bsf.c b/libavcodec/h264_metadata_bsf.c
>> index 36047887ca..d340c55990 100644
>> --- a/libavcodec/h264_metadata_bsf.c
>> +++ b/libavcodec/h264_metadata_bsf.c
>> @@ -77,6 +77,8 @@ typedef struct H264MetadataContext {
>>      int display_orientation;
>>      double rotate;
>>      int flip;
>> +
>> +    int a53_cc;
>>  } H264MetadataContext;
>>
>>
>> @@ -225,6 +227,8 @@ static int h264_metadata_filter(AVBSFContext *bsf, AVPacket *out)
>>      int err, i, j, has_sps;
>>      uint8_t *displaymatrix_side_data = NULL;
>>      size_t displaymatrix_side_data_size = 0;
>> +    uint8_t *a53_side_data = NULL;
>> +    size_t a53_side_data_size = 0;
>>
>>      err = ff_bsf_get_packet(bsf, &in);
>>      if (err < 0)
>> @@ -514,6 +518,104 @@ static int h264_metadata_filter(AVBSFContext *bsf, AVPacket *out)
>>          }
>>      }
>>
>> +    if (ctx->a53_cc == INSERT) {
>> +        uint8_t *data;
>> +        int size;
>> +
>> +        data = av_packet_get_side_data(in, AV_PKT_DATA_A53_CC, &size);
>> +        if (data) {
>> +            H264RawSEIPayload payload = {
>> +                .payload_type = H264_SEI_TYPE_USER_DATA_REGISTERED,
>> +            };
>> +            H264RawSEIUserDataRegistered *udr =
>> +                &payload.payload.user_data_registered;
>> +
>> +            av_log(bsf, AV_LOG_WARNING, "A53 CC insert: %d bytes.\n", size);
>> +
>> +            udr->data_length = size + 10;
>> +            udr->data_ref    = av_buffer_alloc(udr->data_length);
>> +            if (!udr->data_ref) {
>> +                err = AVERROR(ENOMEM);
>> +                goto fail;
>> +            }
>> +            udr->data = udr->data_ref->data;
>> +
>> +            udr->itu_t_t35_country_code = 181;
>> +            udr->data[0] = 0;
>> +            udr->data[1] = 49;
>> +            AV_WB32(udr->data + 2, MKBETAG('G', 'A', '9', '4'));
>> +            udr->data[6] = 3;
>> +            udr->data[7] = ((size / 3) & 0x1f) | 0x40;
>> +            udr->data[8] = 0;
>> +            memcpy(udr->data + 9, data, size);
>> +            udr->data[size + 9] = 0xff;
>> +
>> +            err = ff_cbs_h264_add_sei_message(ctx->cbc, au, &payload);
>> +            if (err < 0) {
>> +                av_log(bsf, AV_LOG_ERROR, "Failed to add user data SEI "
>> +                       "message to access unit.\n");
>> +                av_buffer_unref(&udr->data_ref);
>> +                goto fail;
>> +            }
>> +        }
>> +
>> +    } else if (ctx->a53_cc == REMOVE || ctx->a53_cc == EXTRACT) {
>> +        for (i = 0; i < au->nb_units; i++) {
>> +            H264RawSEI *sei;
>> +            if (au->units[i].type != H264_NAL_SEI)
>> +                continue;
>> +            sei = au->units[i].content;
>> +
>> +            for (j = 0; j < sei->payload_count; j++) {
>> +                H264RawSEIUserDataRegistered *udr;
>> +                uint32_t tag;
>> +                uint8_t type_code, count;
>> +
>> +                if (sei->payload[j].payload_type !=
>> +                    H264_SEI_TYPE_USER_DATA_REGISTERED)
>> +                    continue;
>> +                udr = &sei->payload[j].payload.user_data_registered;
>> +                tag = AV_RB32(udr->data + 2);
>> +                type_code = udr->data[6];
>> +                if (tag != MKBETAG('G', 'A', '9', '4') || type_code != 3)
>> +                    continue;
>> +
>> +                if (ctx->a53_cc == REMOVE) {
>> +                    err = ff_cbs_h264_delete_sei_message(ctx->cbc, au,
>> +                                                         &au->units[i], j);
>> +                    if (err < 0) {
>> +                        av_log(bsf, AV_LOG_ERROR, "Failed to delete "
>> +                               "A53 CC SEI message.\n");
>> +                        goto fail;
>> +                    }
>> +                    av_log(bsf, AV_LOG_WARNING, "A53 CC remove!.\n");
>> +
>> +                    --i;
>> +                    break;
>> +                }
>> +
>> +                // Extract.
>> +                count = udr->data[7] & 0x1f;
>> +                if (3 * count + 10 > udr->data_length) {
>> +                    av_log(bsf, AV_LOG_ERROR, "Invalid A/53 closed caption "
>> +                           "data: count %d overflows length %zu.\n",
>> +                           count, udr->data_length);
>> +                    continue;
>> +                }
>> +
>> +                av_log(bsf, AV_LOG_WARNING, "A53 CC extract: %zu bytes.\n", udr->data_length);
>> +
>> +                err = av_reallocp(&a53_side_data,
>> +                                  a53_side_data_size + 3 * count);
>> +                if (err)
>> +                    goto fail;
>> +                memcpy(a53_side_data + a53_side_data_size,
>> +                       udr->data + 9, 3 * count);
>> +                a53_side_data_size += 3 * count;
>> +            }
>> +        }
>> +    }
>> +
>>      err = ff_cbs_write_packet(ctx->cbc, out, au);
>>      if (err < 0) {
>>          av_log(bsf, AV_LOG_ERROR, "Failed to write packet.\n");
>> @@ -535,6 +637,16 @@ static int h264_metadata_filter(AVBSFContext *bsf, AVPacket *out)
>>          }
>>          displaymatrix_side_data = NULL;
>>      }
>> +    if (a53_side_data) {
>> +        err = av_packet_add_side_data(out, AV_PKT_DATA_A53_CC,
>> +                                      a53_side_data, a53_side_data_size);
>> +        if (err) {
>> +            av_log(bsf, AV_LOG_ERROR, "Failed to attach extracted A/53 "
>> +                   "side data to packet.\n");
>> +            goto fail;
>> +        }
>> +        a53_side_data = NULL;
>> +    }
>>
>>      ctx->done_first_au = 1;
>>
>> @@ -542,6 +654,7 @@ static int h264_metadata_filter(AVBSFContext *bsf, AVPacket *out)
>>  fail:
>>      ff_cbs_fragment_uninit(ctx->cbc, au);
>>      av_freep(&displaymatrix_side_data);
>> +    av_freep(&a53_side_data);
>>
>>      av_packet_free(&in);
>>
>> @@ -670,6 +783,14 @@ static const AVOption h264_metadata_options[] = {
>>      { "vertical",   "Set ver_flip",
>>          0, AV_OPT_TYPE_CONST, { .i64 = FLIP_VERTICAL },   .unit ="flip" },
>>
>> +    { "a53_cc", "A/53 Closed Captions in SEI NAL units",
>> +        OFFSET(a53_cc), AV_OPT_TYPE_INT,
>> +        { .i64 = PASS }, PASS, EXTRACT, 0, "a53_cc" },
>> +    { "pass",    NULL, 0, AV_OPT_TYPE_CONST, { .i64 = PASS    }, .unit = "a53_cc" },
>> +    { "insert",  NULL, 0, AV_OPT_TYPE_CONST, { .i64 = INSERT  }, .unit = "a53_cc" },
>> +    { "remove",  NULL, 0, AV_OPT_TYPE_CONST, { .i64 = REMOVE  }, .unit = "a53_cc" },
>> +    { "extract", NULL, 0, AV_OPT_TYPE_CONST, { .i64 = EXTRACT }, .unit = "a53_cc" },
>> +
> 
> Does extracting really make sense? Doesn't the data end up out of
> order and basically unusable?

Well, it's up to whatever follows to deal with that.  If the stream has correct timesatamps (ha) then you can use those directly.  If you're feeding the output to an opaque hardware decoder then having some way to associate input packets with output frames is sufficient to get the right ordering.  A BSF to deal with reordering somehow is also possible.

- Mark
Hendrik Leppkes March 12, 2018, 3:10 p.m. UTC | #6
On Mon, Mar 12, 2018 at 2:38 PM, Mark Thompson <sw@jkqxz.net> wrote:
> On 12/03/18 09:54, Hendrik Leppkes wrote:
>> On Sun, Mar 11, 2018 at 7:30 PM, Mark Thompson <sw@jkqxz.net> wrote:
>>> ---
>>>  libavcodec/h264_metadata_bsf.c | 121 +++++++++++++++++++++++++++++++++++++++++
>>>  1 file changed, 121 insertions(+)
>>>
>>> diff --git a/libavcodec/h264_metadata_bsf.c b/libavcodec/h264_metadata_bsf.c
>>> index 36047887ca..d340c55990 100644
>>> --- a/libavcodec/h264_metadata_bsf.c
>>> +++ b/libavcodec/h264_metadata_bsf.c
>>> @@ -77,6 +77,8 @@ typedef struct H264MetadataContext {
>>>      int display_orientation;
>>>      double rotate;
>>>      int flip;
>>> +
>>> +    int a53_cc;
>>>  } H264MetadataContext;
>>>
>>>
>>> @@ -225,6 +227,8 @@ static int h264_metadata_filter(AVBSFContext *bsf, AVPacket *out)
>>>      int err, i, j, has_sps;
>>>      uint8_t *displaymatrix_side_data = NULL;
>>>      size_t displaymatrix_side_data_size = 0;
>>> +    uint8_t *a53_side_data = NULL;
>>> +    size_t a53_side_data_size = 0;
>>>
>>>      err = ff_bsf_get_packet(bsf, &in);
>>>      if (err < 0)
>>> @@ -514,6 +518,104 @@ static int h264_metadata_filter(AVBSFContext *bsf, AVPacket *out)
>>>          }
>>>      }
>>>
>>> +    if (ctx->a53_cc == INSERT) {
>>> +        uint8_t *data;
>>> +        int size;
>>> +
>>> +        data = av_packet_get_side_data(in, AV_PKT_DATA_A53_CC, &size);
>>> +        if (data) {
>>> +            H264RawSEIPayload payload = {
>>> +                .payload_type = H264_SEI_TYPE_USER_DATA_REGISTERED,
>>> +            };
>>> +            H264RawSEIUserDataRegistered *udr =
>>> +                &payload.payload.user_data_registered;
>>> +
>>> +            av_log(bsf, AV_LOG_WARNING, "A53 CC insert: %d bytes.\n", size);
>>> +
>>> +            udr->data_length = size + 10;
>>> +            udr->data_ref    = av_buffer_alloc(udr->data_length);
>>> +            if (!udr->data_ref) {
>>> +                err = AVERROR(ENOMEM);
>>> +                goto fail;
>>> +            }
>>> +            udr->data = udr->data_ref->data;
>>> +
>>> +            udr->itu_t_t35_country_code = 181;
>>> +            udr->data[0] = 0;
>>> +            udr->data[1] = 49;
>>> +            AV_WB32(udr->data + 2, MKBETAG('G', 'A', '9', '4'));
>>> +            udr->data[6] = 3;
>>> +            udr->data[7] = ((size / 3) & 0x1f) | 0x40;
>>> +            udr->data[8] = 0;
>>> +            memcpy(udr->data + 9, data, size);
>>> +            udr->data[size + 9] = 0xff;
>>> +
>>> +            err = ff_cbs_h264_add_sei_message(ctx->cbc, au, &payload);
>>> +            if (err < 0) {
>>> +                av_log(bsf, AV_LOG_ERROR, "Failed to add user data SEI "
>>> +                       "message to access unit.\n");
>>> +                av_buffer_unref(&udr->data_ref);
>>> +                goto fail;
>>> +            }
>>> +        }
>>> +
>>> +    } else if (ctx->a53_cc == REMOVE || ctx->a53_cc == EXTRACT) {
>>> +        for (i = 0; i < au->nb_units; i++) {
>>> +            H264RawSEI *sei;
>>> +            if (au->units[i].type != H264_NAL_SEI)
>>> +                continue;
>>> +            sei = au->units[i].content;
>>> +
>>> +            for (j = 0; j < sei->payload_count; j++) {
>>> +                H264RawSEIUserDataRegistered *udr;
>>> +                uint32_t tag;
>>> +                uint8_t type_code, count;
>>> +
>>> +                if (sei->payload[j].payload_type !=
>>> +                    H264_SEI_TYPE_USER_DATA_REGISTERED)
>>> +                    continue;
>>> +                udr = &sei->payload[j].payload.user_data_registered;
>>> +                tag = AV_RB32(udr->data + 2);
>>> +                type_code = udr->data[6];
>>> +                if (tag != MKBETAG('G', 'A', '9', '4') || type_code != 3)
>>> +                    continue;
>>> +
>>> +                if (ctx->a53_cc == REMOVE) {
>>> +                    err = ff_cbs_h264_delete_sei_message(ctx->cbc, au,
>>> +                                                         &au->units[i], j);
>>> +                    if (err < 0) {
>>> +                        av_log(bsf, AV_LOG_ERROR, "Failed to delete "
>>> +                               "A53 CC SEI message.\n");
>>> +                        goto fail;
>>> +                    }
>>> +                    av_log(bsf, AV_LOG_WARNING, "A53 CC remove!.\n");
>>> +
>>> +                    --i;
>>> +                    break;
>>> +                }
>>> +
>>> +                // Extract.
>>> +                count = udr->data[7] & 0x1f;
>>> +                if (3 * count + 10 > udr->data_length) {
>>> +                    av_log(bsf, AV_LOG_ERROR, "Invalid A/53 closed caption "
>>> +                           "data: count %d overflows length %zu.\n",
>>> +                           count, udr->data_length);
>>> +                    continue;
>>> +                }
>>> +
>>> +                av_log(bsf, AV_LOG_WARNING, "A53 CC extract: %zu bytes.\n", udr->data_length);
>>> +
>>> +                err = av_reallocp(&a53_side_data,
>>> +                                  a53_side_data_size + 3 * count);
>>> +                if (err)
>>> +                    goto fail;
>>> +                memcpy(a53_side_data + a53_side_data_size,
>>> +                       udr->data + 9, 3 * count);
>>> +                a53_side_data_size += 3 * count;
>>> +            }
>>> +        }
>>> +    }
>>> +
>>>      err = ff_cbs_write_packet(ctx->cbc, out, au);
>>>      if (err < 0) {
>>>          av_log(bsf, AV_LOG_ERROR, "Failed to write packet.\n");
>>> @@ -535,6 +637,16 @@ static int h264_metadata_filter(AVBSFContext *bsf, AVPacket *out)
>>>          }
>>>          displaymatrix_side_data = NULL;
>>>      }
>>> +    if (a53_side_data) {
>>> +        err = av_packet_add_side_data(out, AV_PKT_DATA_A53_CC,
>>> +                                      a53_side_data, a53_side_data_size);
>>> +        if (err) {
>>> +            av_log(bsf, AV_LOG_ERROR, "Failed to attach extracted A/53 "
>>> +                   "side data to packet.\n");
>>> +            goto fail;
>>> +        }
>>> +        a53_side_data = NULL;
>>> +    }
>>>
>>>      ctx->done_first_au = 1;
>>>
>>> @@ -542,6 +654,7 @@ static int h264_metadata_filter(AVBSFContext *bsf, AVPacket *out)
>>>  fail:
>>>      ff_cbs_fragment_uninit(ctx->cbc, au);
>>>      av_freep(&displaymatrix_side_data);
>>> +    av_freep(&a53_side_data);
>>>
>>>      av_packet_free(&in);
>>>
>>> @@ -670,6 +783,14 @@ static const AVOption h264_metadata_options[] = {
>>>      { "vertical",   "Set ver_flip",
>>>          0, AV_OPT_TYPE_CONST, { .i64 = FLIP_VERTICAL },   .unit ="flip" },
>>>
>>> +    { "a53_cc", "A/53 Closed Captions in SEI NAL units",
>>> +        OFFSET(a53_cc), AV_OPT_TYPE_INT,
>>> +        { .i64 = PASS }, PASS, EXTRACT, 0, "a53_cc" },
>>> +    { "pass",    NULL, 0, AV_OPT_TYPE_CONST, { .i64 = PASS    }, .unit = "a53_cc" },
>>> +    { "insert",  NULL, 0, AV_OPT_TYPE_CONST, { .i64 = INSERT  }, .unit = "a53_cc" },
>>> +    { "remove",  NULL, 0, AV_OPT_TYPE_CONST, { .i64 = REMOVE  }, .unit = "a53_cc" },
>>> +    { "extract", NULL, 0, AV_OPT_TYPE_CONST, { .i64 = EXTRACT }, .unit = "a53_cc" },
>>> +
>>
>> Does extracting really make sense? Doesn't the data end up out of
>> order and basically unusable?
>
> Well, it's up to whatever follows to deal with that.  If the stream has correct timesatamps (ha) then you can use those directly.  If you're feeding the output to an opaque hardware decoder then having some way to associate input packets with output frames is sufficient to get the right ordering.  A BSF to deal with reordering somehow is also possible.
>

That seems like a problem. You generate side-data like any other, but
in reality its invalid, and if you process it like any other A53
sidedata you get corrupt output.
The same goes for "insert", I guess, how do I figure out in which
order to pass things to it to get any sort of functional output? Thats
a very specific setup which requires extremely custom and careful
usage, does that really fit in a generic filter?

- Hendrik
Mark Thompson March 12, 2018, 4:25 p.m. UTC | #7
On 12/03/18 15:10, Hendrik Leppkes wrote:
> On Mon, Mar 12, 2018 at 2:38 PM, Mark Thompson <sw@jkqxz.net> wrote:
>> On 12/03/18 09:54, Hendrik Leppkes wrote:
>>> On Sun, Mar 11, 2018 at 7:30 PM, Mark Thompson <sw@jkqxz.net> wrote:
>>>> ---
>>>>  libavcodec/h264_metadata_bsf.c | 121 +++++++++++++++++++++++++++++++++++++++++
>>>>  1 file changed, 121 insertions(+)
>>>>
>>>> diff --git a/libavcodec/h264_metadata_bsf.c b/libavcodec/h264_metadata_bsf.c
>>>> index 36047887ca..d340c55990 100644
>>>> --- a/libavcodec/h264_metadata_bsf.c
>>>> +++ b/libavcodec/h264_metadata_bsf.c
>>>> @@ -77,6 +77,8 @@ typedef struct H264MetadataContext {
>>>>      int display_orientation;
>>>>      double rotate;
>>>>      int flip;
>>>> +
>>>> +    int a53_cc;
>>>>  } H264MetadataContext;
>>>>
>>>>
>>>> @@ -225,6 +227,8 @@ static int h264_metadata_filter(AVBSFContext *bsf, AVPacket *out)
>>>>      int err, i, j, has_sps;
>>>>      uint8_t *displaymatrix_side_data = NULL;
>>>>      size_t displaymatrix_side_data_size = 0;
>>>> +    uint8_t *a53_side_data = NULL;
>>>> +    size_t a53_side_data_size = 0;
>>>>
>>>>      err = ff_bsf_get_packet(bsf, &in);
>>>>      if (err < 0)
>>>> @@ -514,6 +518,104 @@ static int h264_metadata_filter(AVBSFContext *bsf, AVPacket *out)
>>>>          }
>>>>      }
>>>>
>>>> +    if (ctx->a53_cc == INSERT) {
>>>> +        uint8_t *data;
>>>> +        int size;
>>>> +
>>>> +        data = av_packet_get_side_data(in, AV_PKT_DATA_A53_CC, &size);
>>>> +        if (data) {
>>>> +            H264RawSEIPayload payload = {
>>>> +                .payload_type = H264_SEI_TYPE_USER_DATA_REGISTERED,
>>>> +            };
>>>> +            H264RawSEIUserDataRegistered *udr =
>>>> +                &payload.payload.user_data_registered;
>>>> +
>>>> +            av_log(bsf, AV_LOG_WARNING, "A53 CC insert: %d bytes.\n", size);
>>>> +
>>>> +            udr->data_length = size + 10;
>>>> +            udr->data_ref    = av_buffer_alloc(udr->data_length);
>>>> +            if (!udr->data_ref) {
>>>> +                err = AVERROR(ENOMEM);
>>>> +                goto fail;
>>>> +            }
>>>> +            udr->data = udr->data_ref->data;
>>>> +
>>>> +            udr->itu_t_t35_country_code = 181;
>>>> +            udr->data[0] = 0;
>>>> +            udr->data[1] = 49;
>>>> +            AV_WB32(udr->data + 2, MKBETAG('G', 'A', '9', '4'));
>>>> +            udr->data[6] = 3;
>>>> +            udr->data[7] = ((size / 3) & 0x1f) | 0x40;
>>>> +            udr->data[8] = 0;
>>>> +            memcpy(udr->data + 9, data, size);
>>>> +            udr->data[size + 9] = 0xff;
>>>> +
>>>> +            err = ff_cbs_h264_add_sei_message(ctx->cbc, au, &payload);
>>>> +            if (err < 0) {
>>>> +                av_log(bsf, AV_LOG_ERROR, "Failed to add user data SEI "
>>>> +                       "message to access unit.\n");
>>>> +                av_buffer_unref(&udr->data_ref);
>>>> +                goto fail;
>>>> +            }
>>>> +        }
>>>> +
>>>> +    } else if (ctx->a53_cc == REMOVE || ctx->a53_cc == EXTRACT) {
>>>> +        for (i = 0; i < au->nb_units; i++) {
>>>> +            H264RawSEI *sei;
>>>> +            if (au->units[i].type != H264_NAL_SEI)
>>>> +                continue;
>>>> +            sei = au->units[i].content;
>>>> +
>>>> +            for (j = 0; j < sei->payload_count; j++) {
>>>> +                H264RawSEIUserDataRegistered *udr;
>>>> +                uint32_t tag;
>>>> +                uint8_t type_code, count;
>>>> +
>>>> +                if (sei->payload[j].payload_type !=
>>>> +                    H264_SEI_TYPE_USER_DATA_REGISTERED)
>>>> +                    continue;
>>>> +                udr = &sei->payload[j].payload.user_data_registered;
>>>> +                tag = AV_RB32(udr->data + 2);
>>>> +                type_code = udr->data[6];
>>>> +                if (tag != MKBETAG('G', 'A', '9', '4') || type_code != 3)
>>>> +                    continue;
>>>> +
>>>> +                if (ctx->a53_cc == REMOVE) {
>>>> +                    err = ff_cbs_h264_delete_sei_message(ctx->cbc, au,
>>>> +                                                         &au->units[i], j);
>>>> +                    if (err < 0) {
>>>> +                        av_log(bsf, AV_LOG_ERROR, "Failed to delete "
>>>> +                               "A53 CC SEI message.\n");
>>>> +                        goto fail;
>>>> +                    }
>>>> +                    av_log(bsf, AV_LOG_WARNING, "A53 CC remove!.\n");
>>>> +
>>>> +                    --i;
>>>> +                    break;
>>>> +                }
>>>> +
>>>> +                // Extract.
>>>> +                count = udr->data[7] & 0x1f;
>>>> +                if (3 * count + 10 > udr->data_length) {
>>>> +                    av_log(bsf, AV_LOG_ERROR, "Invalid A/53 closed caption "
>>>> +                           "data: count %d overflows length %zu.\n",
>>>> +                           count, udr->data_length);
>>>> +                    continue;
>>>> +                }
>>>> +
>>>> +                av_log(bsf, AV_LOG_WARNING, "A53 CC extract: %zu bytes.\n", udr->data_length);
>>>> +
>>>> +                err = av_reallocp(&a53_side_data,
>>>> +                                  a53_side_data_size + 3 * count);
>>>> +                if (err)
>>>> +                    goto fail;
>>>> +                memcpy(a53_side_data + a53_side_data_size,
>>>> +                       udr->data + 9, 3 * count);
>>>> +                a53_side_data_size += 3 * count;
>>>> +            }
>>>> +        }
>>>> +    }
>>>> +
>>>>      err = ff_cbs_write_packet(ctx->cbc, out, au);
>>>>      if (err < 0) {
>>>>          av_log(bsf, AV_LOG_ERROR, "Failed to write packet.\n");
>>>> @@ -535,6 +637,16 @@ static int h264_metadata_filter(AVBSFContext *bsf, AVPacket *out)
>>>>          }
>>>>          displaymatrix_side_data = NULL;
>>>>      }
>>>> +    if (a53_side_data) {
>>>> +        err = av_packet_add_side_data(out, AV_PKT_DATA_A53_CC,
>>>> +                                      a53_side_data, a53_side_data_size);
>>>> +        if (err) {
>>>> +            av_log(bsf, AV_LOG_ERROR, "Failed to attach extracted A/53 "
>>>> +                   "side data to packet.\n");
>>>> +            goto fail;
>>>> +        }
>>>> +        a53_side_data = NULL;
>>>> +    }
>>>>
>>>>      ctx->done_first_au = 1;
>>>>
>>>> @@ -542,6 +654,7 @@ static int h264_metadata_filter(AVBSFContext *bsf, AVPacket *out)
>>>>  fail:
>>>>      ff_cbs_fragment_uninit(ctx->cbc, au);
>>>>      av_freep(&displaymatrix_side_data);
>>>> +    av_freep(&a53_side_data);
>>>>
>>>>      av_packet_free(&in);
>>>>
>>>> @@ -670,6 +783,14 @@ static const AVOption h264_metadata_options[] = {
>>>>      { "vertical",   "Set ver_flip",
>>>>          0, AV_OPT_TYPE_CONST, { .i64 = FLIP_VERTICAL },   .unit ="flip" },
>>>>
>>>> +    { "a53_cc", "A/53 Closed Captions in SEI NAL units",
>>>> +        OFFSET(a53_cc), AV_OPT_TYPE_INT,
>>>> +        { .i64 = PASS }, PASS, EXTRACT, 0, "a53_cc" },
>>>> +    { "pass",    NULL, 0, AV_OPT_TYPE_CONST, { .i64 = PASS    }, .unit = "a53_cc" },
>>>> +    { "insert",  NULL, 0, AV_OPT_TYPE_CONST, { .i64 = INSERT  }, .unit = "a53_cc" },
>>>> +    { "remove",  NULL, 0, AV_OPT_TYPE_CONST, { .i64 = REMOVE  }, .unit = "a53_cc" },
>>>> +    { "extract", NULL, 0, AV_OPT_TYPE_CONST, { .i64 = EXTRACT }, .unit = "a53_cc" },
>>>> +
>>>
>>> Does extracting really make sense? Doesn't the data end up out of
>>> order and basically unusable?
>>
>> Well, it's up to whatever follows to deal with that.  If the stream has correct timesatamps (ha) then you can use those directly.  If you're feeding the output to an opaque hardware decoder then having some way to associate input packets with output frames is sufficient to get the right ordering.  A BSF to deal with reordering somehow is also possible.
>>
> 
> That seems like a problem. You generate side-data like any other, but
> in reality its invalid, and if you process it like any other A53
> sidedata you get corrupt output.

Each packet is being tagged with the side-data extracted from the contents of that packet.  Like the non-side-data part of a packet, this needs to be passed through a decoder (of some kind) to turn a packet into a frame before you can display it sensibly.

> The same goes for "insert", I guess, how do I figure out in which
> order to pass things to it to get any sort of functional output? Thats
> a very specific setup which requires extremely custom and careful
> usage, does that really fit in a generic filter?
As with the decoder case, it can work with any encoder which can match output packets to input frames.  (I haven't sent any patches to pass side-data through an encoder yet, but I plan to do so at least for VAAPI.)

- Mark
Aman Karmani March 20, 2018, 11:02 p.m. UTC | #8
On Mon, Mar 12, 2018 at 9:25 AM, Mark Thompson <sw@jkqxz.net> wrote:

> On 12/03/18 15:10, Hendrik Leppkes wrote:
> > On Mon, Mar 12, 2018 at 2:38 PM, Mark Thompson <sw@jkqxz.net> wrote:
> >> On 12/03/18 09:54, Hendrik Leppkes wrote:
> >>> On Sun, Mar 11, 2018 at 7:30 PM, Mark Thompson <sw@jkqxz.net> wrote:
> >>>> ---
> >>>>  libavcodec/h264_metadata_bsf.c | 121 ++++++++++++++++++++++++++++++
> +++++++++++
> >>>>  1 file changed, 121 insertions(+)
> >>>>
> >>>> diff --git a/libavcodec/h264_metadata_bsf.c
> b/libavcodec/h264_metadata_bsf.c
> >>>> index 36047887ca..d340c55990 100644
> >>>> --- a/libavcodec/h264_metadata_bsf.c
> >>>> +++ b/libavcodec/h264_metadata_bsf.c
> >>>> @@ -77,6 +77,8 @@ typedef struct H264MetadataContext {
> >>>>      int display_orientation;
> >>>>      double rotate;
> >>>>      int flip;
> >>>> +
> >>>> +    int a53_cc;
> >>>>  } H264MetadataContext;
> >>>>
> >>>>
> >>>> @@ -225,6 +227,8 @@ static int h264_metadata_filter(AVBSFContext
> *bsf, AVPacket *out)
> >>>>      int err, i, j, has_sps;
> >>>>      uint8_t *displaymatrix_side_data = NULL;
> >>>>      size_t displaymatrix_side_data_size = 0;
> >>>> +    uint8_t *a53_side_data = NULL;
> >>>> +    size_t a53_side_data_size = 0;
> >>>>
> >>>>      err = ff_bsf_get_packet(bsf, &in);
> >>>>      if (err < 0)
> >>>> @@ -514,6 +518,104 @@ static int h264_metadata_filter(AVBSFContext
> *bsf, AVPacket *out)
> >>>>          }
> >>>>      }
> >>>>
> >>>> +    if (ctx->a53_cc == INSERT) {
> >>>> +        uint8_t *data;
> >>>> +        int size;
> >>>> +
> >>>> +        data = av_packet_get_side_data(in, AV_PKT_DATA_A53_CC,
> &size);
> >>>> +        if (data) {
> >>>> +            H264RawSEIPayload payload = {
> >>>> +                .payload_type = H264_SEI_TYPE_USER_DATA_REGISTERED,
> >>>> +            };
> >>>> +            H264RawSEIUserDataRegistered *udr =
> >>>> +                &payload.payload.user_data_registered;
> >>>> +
> >>>> +            av_log(bsf, AV_LOG_WARNING, "A53 CC insert: %d
> bytes.\n", size);
> >>>> +
> >>>> +            udr->data_length = size + 10;
> >>>> +            udr->data_ref    = av_buffer_alloc(udr->data_length);
> >>>> +            if (!udr->data_ref) {
> >>>> +                err = AVERROR(ENOMEM);
> >>>> +                goto fail;
> >>>> +            }
> >>>> +            udr->data = udr->data_ref->data;
> >>>> +
> >>>> +            udr->itu_t_t35_country_code = 181;
> >>>> +            udr->data[0] = 0;
> >>>> +            udr->data[1] = 49;
> >>>> +            AV_WB32(udr->data + 2, MKBETAG('G', 'A', '9', '4'));
> >>>> +            udr->data[6] = 3;
> >>>> +            udr->data[7] = ((size / 3) & 0x1f) | 0x40;
> >>>> +            udr->data[8] = 0;
> >>>> +            memcpy(udr->data + 9, data, size);
> >>>> +            udr->data[size + 9] = 0xff;
> >>>> +
> >>>> +            err = ff_cbs_h264_add_sei_message(ctx->cbc, au,
> &payload);
> >>>> +            if (err < 0) {
> >>>> +                av_log(bsf, AV_LOG_ERROR, "Failed to add user data
> SEI "
> >>>> +                       "message to access unit.\n");
> >>>> +                av_buffer_unref(&udr->data_ref);
> >>>> +                goto fail;
> >>>> +            }
> >>>> +        }
> >>>> +
> >>>> +    } else if (ctx->a53_cc == REMOVE || ctx->a53_cc == EXTRACT) {
> >>>> +        for (i = 0; i < au->nb_units; i++) {
> >>>> +            H264RawSEI *sei;
> >>>> +            if (au->units[i].type != H264_NAL_SEI)
> >>>> +                continue;
> >>>> +            sei = au->units[i].content;
> >>>> +
> >>>> +            for (j = 0; j < sei->payload_count; j++) {
> >>>> +                H264RawSEIUserDataRegistered *udr;
> >>>> +                uint32_t tag;
> >>>> +                uint8_t type_code, count;
> >>>> +
> >>>> +                if (sei->payload[j].payload_type !=
> >>>> +                    H264_SEI_TYPE_USER_DATA_REGISTERED)
> >>>> +                    continue;
> >>>> +                udr = &sei->payload[j].payload.user_data_registered;
> >>>> +                tag = AV_RB32(udr->data + 2);
> >>>> +                type_code = udr->data[6];
> >>>> +                if (tag != MKBETAG('G', 'A', '9', '4') || type_code
> != 3)
> >>>> +                    continue;
> >>>> +
> >>>> +                if (ctx->a53_cc == REMOVE) {
> >>>> +                    err = ff_cbs_h264_delete_sei_message(ctx->cbc,
> au,
> >>>> +
>  &au->units[i], j);
> >>>> +                    if (err < 0) {
> >>>> +                        av_log(bsf, AV_LOG_ERROR, "Failed to delete "
> >>>> +                               "A53 CC SEI message.\n");
> >>>> +                        goto fail;
> >>>> +                    }
> >>>> +                    av_log(bsf, AV_LOG_WARNING, "A53 CC remove!.\n");
> >>>> +
> >>>> +                    --i;
> >>>> +                    break;
> >>>> +                }
> >>>> +
> >>>> +                // Extract.
> >>>> +                count = udr->data[7] & 0x1f;
> >>>> +                if (3 * count + 10 > udr->data_length) {
> >>>> +                    av_log(bsf, AV_LOG_ERROR, "Invalid A/53 closed
> caption "
> >>>> +                           "data: count %d overflows length %zu.\n",
> >>>> +                           count, udr->data_length);
> >>>> +                    continue;
> >>>> +                }
> >>>> +
> >>>> +                av_log(bsf, AV_LOG_WARNING, "A53 CC extract: %zu
> bytes.\n", udr->data_length);
> >>>> +
> >>>> +                err = av_reallocp(&a53_side_data,
> >>>> +                                  a53_side_data_size + 3 * count);
> >>>> +                if (err)
> >>>> +                    goto fail;
> >>>> +                memcpy(a53_side_data + a53_side_data_size,
> >>>> +                       udr->data + 9, 3 * count);
> >>>> +                a53_side_data_size += 3 * count;
> >>>> +            }
> >>>> +        }
> >>>> +    }
> >>>> +
> >>>>      err = ff_cbs_write_packet(ctx->cbc, out, au);
> >>>>      if (err < 0) {
> >>>>          av_log(bsf, AV_LOG_ERROR, "Failed to write packet.\n");
> >>>> @@ -535,6 +637,16 @@ static int h264_metadata_filter(AVBSFContext
> *bsf, AVPacket *out)
> >>>>          }
> >>>>          displaymatrix_side_data = NULL;
> >>>>      }
> >>>> +    if (a53_side_data) {
> >>>> +        err = av_packet_add_side_data(out, AV_PKT_DATA_A53_CC,
> >>>> +                                      a53_side_data,
> a53_side_data_size);
> >>>> +        if (err) {
> >>>> +            av_log(bsf, AV_LOG_ERROR, "Failed to attach extracted
> A/53 "
> >>>> +                   "side data to packet.\n");
> >>>> +            goto fail;
> >>>> +        }
> >>>> +        a53_side_data = NULL;
> >>>> +    }
> >>>>
> >>>>      ctx->done_first_au = 1;
> >>>>
> >>>> @@ -542,6 +654,7 @@ static int h264_metadata_filter(AVBSFContext
> *bsf, AVPacket *out)
> >>>>  fail:
> >>>>      ff_cbs_fragment_uninit(ctx->cbc, au);
> >>>>      av_freep(&displaymatrix_side_data);
> >>>> +    av_freep(&a53_side_data);
> >>>>
> >>>>      av_packet_free(&in);
> >>>>
> >>>> @@ -670,6 +783,14 @@ static const AVOption h264_metadata_options[] = {
> >>>>      { "vertical",   "Set ver_flip",
> >>>>          0, AV_OPT_TYPE_CONST, { .i64 = FLIP_VERTICAL },   .unit
> ="flip" },
> >>>>
> >>>> +    { "a53_cc", "A/53 Closed Captions in SEI NAL units",
> >>>> +        OFFSET(a53_cc), AV_OPT_TYPE_INT,
> >>>> +        { .i64 = PASS }, PASS, EXTRACT, 0, "a53_cc" },
> >>>> +    { "pass",    NULL, 0, AV_OPT_TYPE_CONST, { .i64 = PASS    },
> .unit = "a53_cc" },
> >>>> +    { "insert",  NULL, 0, AV_OPT_TYPE_CONST, { .i64 = INSERT  },
> .unit = "a53_cc" },
> >>>> +    { "remove",  NULL, 0, AV_OPT_TYPE_CONST, { .i64 = REMOVE  },
> .unit = "a53_cc" },
> >>>> +    { "extract", NULL, 0, AV_OPT_TYPE_CONST, { .i64 = EXTRACT },
> .unit = "a53_cc" },
> >>>> +
> >>>
> >>> Does extracting really make sense? Doesn't the data end up out of
> >>> order and basically unusable?
> >>
> >> Well, it's up to whatever follows to deal with that.  If the stream has
> correct timesatamps (ha) then you can use those directly.  If you're
> feeding the output to an opaque hardware decoder then having some way to
> associate input packets with output frames is sufficient to get the right
> ordering.  A BSF to deal with reordering somehow is also possible.
> >>
> >
> > That seems like a problem. You generate side-data like any other, but
> > in reality its invalid, and if you process it like any other A53
> > sidedata you get corrupt output.
>
> Each packet is being tagged with the side-data extracted from the contents
> of that packet.  Like the non-side-data part of a packet, this needs to be
> passed through a decoder (of some kind) to turn a packet into a frame
> before you can display it sensibly.
>
> > The same goes for "insert", I guess, how do I figure out in which
> > order to pass things to it to get any sort of functional output? Thats
> > a very specific setup which requires extremely custom and careful
> > usage, does that really fit in a generic filter?
> As with the decoder case, it can work with any encoder which can match
> output packets to input frames.  (I haven't sent any patches to pass
> side-data through an encoder yet, but I plan to do so at least for VAAPI.)
>

The videotoolbox encoder currently implements its own a53 insertion logic
(which is also buggy and produces broken bitstreams in some cases). It does
so by passing the side-data into the system encoder along with the video
data, and then inserts it into the SEI manually when the encoder returns a
slice.

Using this new INSERT bitstream mode would clean up that encoder
significantly, and also probably fix the bug in the manual SEI manipulation
code.

Aman


>
> - Mark
> _______________________________________________
> ffmpeg-devel mailing list
> ffmpeg-devel@ffmpeg.org
> http://ffmpeg.org/mailman/listinfo/ffmpeg-devel
>
diff mbox

Patch

diff --git a/libavcodec/h264_metadata_bsf.c b/libavcodec/h264_metadata_bsf.c
index 36047887ca..d340c55990 100644
--- a/libavcodec/h264_metadata_bsf.c
+++ b/libavcodec/h264_metadata_bsf.c
@@ -77,6 +77,8 @@  typedef struct H264MetadataContext {
     int display_orientation;
     double rotate;
     int flip;
+
+    int a53_cc;
 } H264MetadataContext;
 
 
@@ -225,6 +227,8 @@  static int h264_metadata_filter(AVBSFContext *bsf, AVPacket *out)
     int err, i, j, has_sps;
     uint8_t *displaymatrix_side_data = NULL;
     size_t displaymatrix_side_data_size = 0;
+    uint8_t *a53_side_data = NULL;
+    size_t a53_side_data_size = 0;
 
     err = ff_bsf_get_packet(bsf, &in);
     if (err < 0)
@@ -514,6 +518,104 @@  static int h264_metadata_filter(AVBSFContext *bsf, AVPacket *out)
         }
     }
 
+    if (ctx->a53_cc == INSERT) {
+        uint8_t *data;
+        int size;
+
+        data = av_packet_get_side_data(in, AV_PKT_DATA_A53_CC, &size);
+        if (data) {
+            H264RawSEIPayload payload = {
+                .payload_type = H264_SEI_TYPE_USER_DATA_REGISTERED,
+            };
+            H264RawSEIUserDataRegistered *udr =
+                &payload.payload.user_data_registered;
+
+            av_log(bsf, AV_LOG_WARNING, "A53 CC insert: %d bytes.\n", size);
+
+            udr->data_length = size + 10;
+            udr->data_ref    = av_buffer_alloc(udr->data_length);
+            if (!udr->data_ref) {
+                err = AVERROR(ENOMEM);
+                goto fail;
+            }
+            udr->data = udr->data_ref->data;
+
+            udr->itu_t_t35_country_code = 181;
+            udr->data[0] = 0;
+            udr->data[1] = 49;
+            AV_WB32(udr->data + 2, MKBETAG('G', 'A', '9', '4'));
+            udr->data[6] = 3;
+            udr->data[7] = ((size / 3) & 0x1f) | 0x40;
+            udr->data[8] = 0;
+            memcpy(udr->data + 9, data, size);
+            udr->data[size + 9] = 0xff;
+
+            err = ff_cbs_h264_add_sei_message(ctx->cbc, au, &payload);
+            if (err < 0) {
+                av_log(bsf, AV_LOG_ERROR, "Failed to add user data SEI "
+                       "message to access unit.\n");
+                av_buffer_unref(&udr->data_ref);
+                goto fail;
+            }
+        }
+
+    } else if (ctx->a53_cc == REMOVE || ctx->a53_cc == EXTRACT) {
+        for (i = 0; i < au->nb_units; i++) {
+            H264RawSEI *sei;
+            if (au->units[i].type != H264_NAL_SEI)
+                continue;
+            sei = au->units[i].content;
+
+            for (j = 0; j < sei->payload_count; j++) {
+                H264RawSEIUserDataRegistered *udr;
+                uint32_t tag;
+                uint8_t type_code, count;
+
+                if (sei->payload[j].payload_type !=
+                    H264_SEI_TYPE_USER_DATA_REGISTERED)
+                    continue;
+                udr = &sei->payload[j].payload.user_data_registered;
+                tag = AV_RB32(udr->data + 2);
+                type_code = udr->data[6];
+                if (tag != MKBETAG('G', 'A', '9', '4') || type_code != 3)
+                    continue;
+
+                if (ctx->a53_cc == REMOVE) {
+                    err = ff_cbs_h264_delete_sei_message(ctx->cbc, au,
+                                                         &au->units[i], j);
+                    if (err < 0) {
+                        av_log(bsf, AV_LOG_ERROR, "Failed to delete "
+                               "A53 CC SEI message.\n");
+                        goto fail;
+                    }
+                    av_log(bsf, AV_LOG_WARNING, "A53 CC remove!.\n");
+
+                    --i;
+                    break;
+                }
+
+                // Extract.
+                count = udr->data[7] & 0x1f;
+                if (3 * count + 10 > udr->data_length) {
+                    av_log(bsf, AV_LOG_ERROR, "Invalid A/53 closed caption "
+                           "data: count %d overflows length %zu.\n",
+                           count, udr->data_length);
+                    continue;
+                }
+
+                av_log(bsf, AV_LOG_WARNING, "A53 CC extract: %zu bytes.\n", udr->data_length);
+
+                err = av_reallocp(&a53_side_data,
+                                  a53_side_data_size + 3 * count);
+                if (err)
+                    goto fail;
+                memcpy(a53_side_data + a53_side_data_size,
+                       udr->data + 9, 3 * count);
+                a53_side_data_size += 3 * count;
+            }
+        }
+    }
+
     err = ff_cbs_write_packet(ctx->cbc, out, au);
     if (err < 0) {
         av_log(bsf, AV_LOG_ERROR, "Failed to write packet.\n");
@@ -535,6 +637,16 @@  static int h264_metadata_filter(AVBSFContext *bsf, AVPacket *out)
         }
         displaymatrix_side_data = NULL;
     }
+    if (a53_side_data) {
+        err = av_packet_add_side_data(out, AV_PKT_DATA_A53_CC,
+                                      a53_side_data, a53_side_data_size);
+        if (err) {
+            av_log(bsf, AV_LOG_ERROR, "Failed to attach extracted A/53 "
+                   "side data to packet.\n");
+            goto fail;
+        }
+        a53_side_data = NULL;
+    }
 
     ctx->done_first_au = 1;
 
@@ -542,6 +654,7 @@  static int h264_metadata_filter(AVBSFContext *bsf, AVPacket *out)
 fail:
     ff_cbs_fragment_uninit(ctx->cbc, au);
     av_freep(&displaymatrix_side_data);
+    av_freep(&a53_side_data);
 
     av_packet_free(&in);
 
@@ -670,6 +783,14 @@  static const AVOption h264_metadata_options[] = {
     { "vertical",   "Set ver_flip",
         0, AV_OPT_TYPE_CONST, { .i64 = FLIP_VERTICAL },   .unit ="flip" },
 
+    { "a53_cc", "A/53 Closed Captions in SEI NAL units",
+        OFFSET(a53_cc), AV_OPT_TYPE_INT,
+        { .i64 = PASS }, PASS, EXTRACT, 0, "a53_cc" },
+    { "pass",    NULL, 0, AV_OPT_TYPE_CONST, { .i64 = PASS    }, .unit = "a53_cc" },
+    { "insert",  NULL, 0, AV_OPT_TYPE_CONST, { .i64 = INSERT  }, .unit = "a53_cc" },
+    { "remove",  NULL, 0, AV_OPT_TYPE_CONST, { .i64 = REMOVE  }, .unit = "a53_cc" },
+    { "extract", NULL, 0, AV_OPT_TYPE_CONST, { .i64 = EXTRACT }, .unit = "a53_cc" },
+
     { NULL }
 };