diff mbox series

[FFmpeg-devel,v6,21/22] h264_metadata_bsf: Refactor the filter function into smaller parts

Message ID 20200727163237.23371-22-sw@jkqxz.net
State New
Headers show
Series CBS unit type tables and assorted related stuff
Related show

Checks

Context Check Description
andriy/default pending
andriy/make success Make finished
andriy/make_fate success Make fate finished

Commit Message

Mark Thompson July 27, 2020, 4:32 p.m. UTC
---
 libavcodec/h264_metadata_bsf.c | 443 ++++++++++++++++++---------------
 1 file changed, 242 insertions(+), 201 deletions(-)

Comments

Andreas Rheinhardt Aug. 12, 2020, 1:55 a.m. UTC | #1
First of all: I only looked at the sei_user_data stuff yet.

Mark Thompson:
> ---
>  libavcodec/h264_metadata_bsf.c | 443 ++++++++++++++++++---------------
>  1 file changed, 242 insertions(+), 201 deletions(-)
> 
> diff --git a/libavcodec/h264_metadata_bsf.c b/libavcodec/h264_metadata_bsf.c
> index eb1503159b..1d00ccdfb8 100644
> --- a/libavcodec/h264_metadata_bsf.c
> +++ b/libavcodec/h264_metadata_bsf.c
> @@ -56,6 +56,7 @@ typedef struct H264MetadataContext {
>      int done_first_au;
>  
>      int aud;
> +    H264RawAUD aud_nal;
>  
>      AVRational sample_aspect_ratio;
>  
> @@ -78,6 +79,7 @@ typedef struct H264MetadataContext {
>      int crop_bottom;
>  
>      const char *sei_user_data;
> +    H264RawSEIPayload sei_user_data_payload;
>  
>      int delete_filler;
>  
> @@ -89,6 +91,59 @@ typedef struct H264MetadataContext {
>  } H264MetadataContext;
>  
>  
> +static int h264_metadata_insert_aud(AVBSFContext *bsf,
> +                                    CodedBitstreamFragment *au)
> +{
> +    H264MetadataContext *ctx = bsf->priv_data;
> +    int primary_pic_type_mask = 0xff;
> +    int err, i, j;
> +
> +    static const int primary_pic_type_table[] = {
> +        0x084, // 2, 7
> +        0x0a5, // 0, 2, 5, 7
> +        0x0e7, // 0, 1, 2, 5, 6, 7
> +        0x210, // 4, 9
> +        0x318, // 3, 4, 8, 9
> +        0x294, // 2, 4, 7, 9
> +        0x3bd, // 0, 2, 3, 4, 5, 7, 8, 9
> +        0x3ff, // 0, 1, 2, 3, 4, 5, 6, 7, 8, 9
> +    };
> +
> +    for (i = 0; i < au->nb_units; i++) {
> +        if (au->units[i].type == H264_NAL_SLICE ||
> +            au->units[i].type == H264_NAL_IDR_SLICE) {
> +            H264RawSlice *slice = au->units[i].content;
> +            for (j = 0; j < FF_ARRAY_ELEMS(primary_pic_type_table); j++) {
> +                if (!(primary_pic_type_table[j] &
> +                      (1 << slice->header.slice_type)))
> +                    primary_pic_type_mask &= ~(1 << j);
> +            }
> +        }
> +    }
> +    for (j = 0; j < FF_ARRAY_ELEMS(primary_pic_type_table); j++)
> +        if (primary_pic_type_mask & (1 << j))
> +            break;
> +    if (j >= FF_ARRAY_ELEMS(primary_pic_type_table)) {
> +        av_log(bsf, AV_LOG_ERROR, "No usable primary_pic_type: "
> +               "invalid slice types?\n");
> +        return AVERROR_INVALIDDATA;
> +    }
> +
> +    ctx->aud_nal = (H264RawAUD) {
> +        .nal_unit_header.nal_unit_type = H264_NAL_AUD,
> +        .primary_pic_type = j,
> +    };
> +
> +    err = ff_cbs_insert_unit_content(au, 0, H264_NAL_AUD,
> +                                     &ctx->aud_nal, NULL);
> +    if (err < 0) {
> +        av_log(bsf, AV_LOG_ERROR, "Failed to insert AUD.\n");
> +        return err;
> +    }
> +
> +    return 0;
> +}
> +
>  static int h264_metadata_update_sps(AVBSFContext *bsf,
>                                      H264RawSPS *sps)
>  {
> @@ -320,219 +375,59 @@ static int h264_metadata_update_side_data(AVBSFContext *bsf, AVPacket *pkt)
>      return 0;
>  }
>  
> -static int h264_metadata_filter(AVBSFContext *bsf, AVPacket *pkt)
> +static int h264_metadata_handle_display_orientation(AVBSFContext *bsf,
> +                                                    AVPacket *pkt,
> +                                                    CodedBitstreamFragment *au,
> +                                                    int seek_point)
>  {
>      H264MetadataContext *ctx = bsf->priv_data;
> -    CodedBitstreamFragment *au = &ctx->access_unit;
> -    int err, i, j, has_sps;
> -    H264RawAUD aud;
> -
> -    err = ff_bsf_get_packet_ref(bsf, pkt);
> -    if (err < 0)
> -        return err;
> -
> -    err = h264_metadata_update_side_data(bsf, pkt);
> -    if (err < 0)
> -        goto fail;
> -
> -    err = ff_cbs_read_packet(ctx->input, au, pkt);
> -    if (err < 0) {
> -        av_log(bsf, AV_LOG_ERROR, "Failed to read packet.\n");
> -        goto fail;
> -    }
> -
> -    if (au->nb_units == 0) {
> -        av_log(bsf, AV_LOG_ERROR, "No NAL units in packet.\n");
> -        err = AVERROR_INVALIDDATA;
> -        goto fail;
> -    }
> -
> -    // If an AUD is present, it must be the first NAL unit.
> -    if (au->units[0].type == H264_NAL_AUD) {
> -        if (ctx->aud == REMOVE)
> -            ff_cbs_delete_unit(au, 0);
> -    } else {
> -        if (ctx->aud == INSERT) {
> -            static const int primary_pic_type_table[] = {
> -                0x084, // 2, 7
> -                0x0a5, // 0, 2, 5, 7
> -                0x0e7, // 0, 1, 2, 5, 6, 7
> -                0x210, // 4, 9
> -                0x318, // 3, 4, 8, 9
> -                0x294, // 2, 4, 7, 9
> -                0x3bd, // 0, 2, 3, 4, 5, 7, 8, 9
> -                0x3ff, // 0, 1, 2, 3, 4, 5, 6, 7, 8, 9
> -            };
> -            int primary_pic_type_mask = 0xff;
> -
> -            for (i = 0; i < au->nb_units; i++) {
> -                if (au->units[i].type == H264_NAL_SLICE ||
> -                    au->units[i].type == H264_NAL_IDR_SLICE) {
> -                    H264RawSlice *slice = au->units[i].content;
> -                    for (j = 0; j < FF_ARRAY_ELEMS(primary_pic_type_table); j++) {
> -                         if (!(primary_pic_type_table[j] &
> -                               (1 << slice->header.slice_type)))
> -                             primary_pic_type_mask &= ~(1 << j);
> -                    }
> -                }
> -            }
> -            for (j = 0; j < FF_ARRAY_ELEMS(primary_pic_type_table); j++)
> -                if (primary_pic_type_mask & (1 << j))
> -                    break;
> -            if (j >= FF_ARRAY_ELEMS(primary_pic_type_table)) {
> -                av_log(bsf, AV_LOG_ERROR, "No usable primary_pic_type: "
> -                       "invalid slice types?\n");
> -                err = AVERROR_INVALIDDATA;
> -                goto fail;
> -            }
> -
> -            aud = (H264RawAUD) {
> -                .nal_unit_header.nal_unit_type = H264_NAL_AUD,
> -                .primary_pic_type = j,
> -            };
> +    int err, i, j;
>  
> -            err = ff_cbs_insert_unit_content(au,
> -                                             0, H264_NAL_AUD, &aud, NULL);
> -            if (err < 0) {
> -                av_log(bsf, AV_LOG_ERROR, "Failed to insert AUD.\n");
> -                goto fail;
> -            }
> -        }
> -    }
> -
> -    has_sps = 0;
> -    for (i = 0; i < au->nb_units; i++) {
> -        if (au->units[i].type == H264_NAL_SPS) {
> -            err = h264_metadata_update_sps(bsf, au->units[i].content);
> -            if (err < 0)
> -                goto fail;
> -            has_sps = 1;
> -        }
> -    }
> +    for (i = au->nb_units - 1; i >= 0; i--) {
> +        H264RawSEI *sei;
> +        if (au->units[i].type != H264_NAL_SEI)
> +            continue;
> +        sei = au->units[i].content;
>  
> -    // Only insert the SEI in access units containing SPSs, and also
> -    // unconditionally in the first access unit we ever see.
> -    if (ctx->sei_user_data && (has_sps || !ctx->done_first_au)) {
> -        H264RawSEIPayload payload = {
> -            .payload_type = H264_SEI_TYPE_USER_DATA_UNREGISTERED,
> -        };
> -        H264RawSEIUserDataUnregistered *udu =
> -            &payload.payload.user_data_unregistered;
> +        for (j = sei->payload_count - 1; j >= 0; j--) {
> +            H264RawSEIDisplayOrientation *disp;
> +            int32_t *matrix;
>  
> -        for (i = j = 0; j < 32 && ctx->sei_user_data[i]; i++) {
> -            int c, v;
> -            c = ctx->sei_user_data[i];
> -            if (c == '-') {
> +            if (sei->payload[j].payload_type !=
> +                H264_SEI_TYPE_DISPLAY_ORIENTATION)
>                  continue;
> -            } else if (av_isxdigit(c)) {
> -                c = av_tolower(c);
> -                v = (c <= '9' ? c - '0' : c - 'a' + 10);
> -            } else {
> -                goto invalid_user_data;
> -            }
> -            if (j & 1)
> -                udu->uuid_iso_iec_11578[j / 2] |= v;
> -            else
> -                udu->uuid_iso_iec_11578[j / 2] = v << 4;
> -            ++j;
> -        }
> -        if (j == 32 && ctx->sei_user_data[i] == '+') {
> -            size_t len = strlen(ctx->sei_user_data + i + 1);
> -
> -            udu->data_ref = av_buffer_alloc(len + 1);
> -            if (!udu->data_ref) {
> -                err = AVERROR(ENOMEM);
> -                goto fail;
> -            }
> -
> -            udu->data        = udu->data_ref->data;
> -            udu->data_length = len + 1;
> -            memcpy(udu->data, ctx->sei_user_data + i + 1, len + 1);
> +            disp = &sei->payload[j].payload.display_orientation;
>  
> -            err = ff_cbs_h264_add_sei_message(au, &payload);
> -            if (err < 0) {
> -                av_log(bsf, AV_LOG_ERROR, "Failed to add user data SEI "
> -                       "message to access unit.\n");
> -                goto fail;
> -            }
> -
> -        } else {
> -        invalid_user_data:
> -            av_log(bsf, AV_LOG_ERROR, "Invalid user data: "
> -                   "must be \"UUID+string\".\n");
> -            err = AVERROR(EINVAL);
> -            goto fail;
> -        }
> -    }
> -
> -    if (ctx->delete_filler) {
> -        for (i = au->nb_units - 1; i >= 0; i--) {
> -            if (au->units[i].type == H264_NAL_FILLER_DATA) {
> -                ff_cbs_delete_unit(au, i);
> +            if (ctx->display_orientation == REMOVE ||
> +                ctx->display_orientation == INSERT) {
> +                ff_cbs_h264_delete_sei_message(au, &au->units[i], j);
>                  continue;
>              }
>  
> -            if (au->units[i].type == H264_NAL_SEI) {
> -                // Filler SEI messages.
> -                H264RawSEI *sei = au->units[i].content;
> -
> -                for (j = sei->payload_count - 1; j >= 0; j--) {
> -                    if (sei->payload[j].payload_type ==
> -                        H264_SEI_TYPE_FILLER_PAYLOAD)
> -                        ff_cbs_h264_delete_sei_message(au, &au->units[i], j);
> -                }
> +            matrix = av_malloc(9 * sizeof(int32_t));
> +            if (!matrix)
> +                return AVERROR(ENOMEM);
> +
> +            av_display_rotation_set(matrix,
> +                                    disp->anticlockwise_rotation *
> +                                    180.0 / 65536.0);
> +            av_display_matrix_flip(matrix, disp->hor_flip, disp->ver_flip);
> +
> +            // If there are multiple display orientation messages in an
> +            // access unit, then the last one added to the packet (i.e.
> +            // the first one in the access unit) will prevail.
> +            err = av_packet_add_side_data(pkt, AV_PKT_DATA_DISPLAYMATRIX,
> +                                          (uint8_t*)matrix,
> +                                          9 * sizeof(int32_t));
> +            if (err < 0) {
> +                av_log(bsf, AV_LOG_ERROR, "Failed to attach extracted "
> +                       "displaymatrix side data to packet.\n");
> +                av_free(matrix);
> +                return AVERROR(ENOMEM);
>              }
>          }
>      }
>  
> -    if (ctx->display_orientation != PASS) {
> -        for (i = au->nb_units - 1; i >= 0; i--) {
> -            H264RawSEI *sei;
> -            if (au->units[i].type != H264_NAL_SEI)
> -                continue;
> -            sei = au->units[i].content;
> -
> -            for (j = sei->payload_count - 1; j >= 0; j--) {
> -                H264RawSEIDisplayOrientation *disp;
> -                int32_t *matrix;
> -
> -                if (sei->payload[j].payload_type !=
> -                    H264_SEI_TYPE_DISPLAY_ORIENTATION)
> -                    continue;
> -                disp = &sei->payload[j].payload.display_orientation;
> -
> -                if (ctx->display_orientation == REMOVE ||
> -                    ctx->display_orientation == INSERT) {
> -                    ff_cbs_h264_delete_sei_message(au, &au->units[i], j);
> -                    continue;
> -                }
> -
> -                matrix = av_malloc(9 * sizeof(int32_t));
> -                if (!matrix) {
> -                    err = AVERROR(ENOMEM);
> -                    goto fail;
> -                }
> -
> -                av_display_rotation_set(matrix,
> -                                        disp->anticlockwise_rotation *
> -                                        180.0 / 65536.0);
> -                av_display_matrix_flip(matrix, disp->hor_flip, disp->ver_flip);
> -
> -                // If there are multiple display orientation messages in an
> -                // access unit, then the last one added to the packet (i.e.
> -                // the first one in the access unit) will prevail.
> -                err = av_packet_add_side_data(pkt, AV_PKT_DATA_DISPLAYMATRIX,
> -                                              (uint8_t*)matrix,
> -                                              9 * sizeof(int32_t));
> -                if (err < 0) {
> -                    av_log(bsf, AV_LOG_ERROR, "Failed to attach extracted "
> -                           "displaymatrix side data to packet.\n");
> -                    av_free(matrix);
> -                    goto fail;
> -                }
> -            }
> -        }
> -    }
>      if (ctx->display_orientation == INSERT) {
>          H264RawSEIPayload payload = {
>              .payload_type = H264_SEI_TYPE_DISPLAY_ORIENTATION,
> @@ -576,7 +471,7 @@ static int h264_metadata_filter(AVBSFContext *bsf, AVPacket *pkt)
>              }
>          }
>  
> -        if (has_sps || !ctx->done_first_au) {
> +        if (seek_point) {
>              if (!isnan(ctx->rotate)) {
>                  disp->anticlockwise_rotation =
>                      (uint16_t)rint((ctx->rotate >= 0.0 ? ctx->rotate
> @@ -598,11 +493,107 @@ static int h264_metadata_filter(AVBSFContext *bsf, AVPacket *pkt)
>              if (err < 0) {
>                  av_log(bsf, AV_LOG_ERROR, "Failed to add display orientation "
>                         "SEI message to access unit.\n");
> +                return err;
> +            }
> +        }
> +    }
> +
> +    return 0;
> +}
> +
> +static int h264_metadata_filter(AVBSFContext *bsf, AVPacket *pkt)
> +{
> +    H264MetadataContext *ctx = bsf->priv_data;
> +    CodedBitstreamFragment *au = &ctx->access_unit;
> +    int err, i, j, has_sps, seek_point;
> +
> +    err = ff_bsf_get_packet_ref(bsf, pkt);
> +    if (err < 0)
> +        return err;
> +
> +    err = h264_metadata_update_side_data(bsf, pkt);
> +    if (err < 0)
> +        goto fail;
> +
> +    err = ff_cbs_read_packet(ctx->input, au, pkt);
> +    if (err < 0) {
> +        av_log(bsf, AV_LOG_ERROR, "Failed to read packet.\n");
> +        goto fail;
> +    }
> +
> +    if (au->nb_units == 0) {
> +        av_log(bsf, AV_LOG_ERROR, "No NAL units in packet.\n");
> +        err = AVERROR_INVALIDDATA;
> +        goto fail;
> +    }
> +
> +    // If an AUD is present, it must be the first NAL unit.
> +    if (au->units[0].type == H264_NAL_AUD) {
> +        if (ctx->aud == REMOVE)
> +            ff_cbs_delete_unit(au, 0);
> +    } else {
> +        if (ctx->aud == INSERT) {
> +            err = h264_metadata_insert_aud(bsf, au);
> +            if (err < 0)
>                  goto fail;
> +        }
> +    }
> +
> +    has_sps = 0;
> +    for (i = 0; i < au->nb_units; i++) {
> +        if (au->units[i].type == H264_NAL_SPS) {
> +            err = h264_metadata_update_sps(bsf, au->units[i].content);
> +            if (err < 0)
> +                goto fail;
> +            has_sps = 1;
> +        }
> +    }
> +
> +    // The current packet should be treated as a seek point for metadata
> +    // insertion if any of:
> +    // - It is the first packet in the stream.
> +    // - It contains an SPS, indicating that a sequence might start here.
> +    // - It is marked as containing a key frame.
> +    seek_point = !ctx->done_first_au || has_sps ||
> +        (pkt->flags & AV_PKT_FLAG_KEY);
> +
> +    if (ctx->sei_user_data && seek_point) {
> +        err = ff_cbs_h264_add_sei_message(au, &ctx->sei_user_data_payload);
> +        if (err < 0) {
> +            av_log(bsf, AV_LOG_ERROR, "Failed to add user data SEI "
> +                   "message to access unit.\n");
> +            goto fail;
> +        }

Did you test this? With a sample with more than one seekpoint? It
shouldn't work. The reason is that the ownership of the SEI message
moves to the unit the SEI message will be attached to* (on success; on
failure, the SEI message will be freed for you) and (on success) it will
be unreferenced when the access unit gets reset. Notice that in this
case ctx->sei_user_data_payload won't be changed, but its pointers will
become dangling pointers and the second seek point will lead to a
use-after-free.

I see two ways to fix this:
My preferred solution is not to set the data_ref of the
H264RawSEIUserDataUnregistered at all. In this case one does not need to
allocate a buffer and copy the string at all -- the pointer can point
directly into the ctx->sei_user_data string. It should work; in this
case the point below about len + 1 not being in the range of int becomes
moot, of course. (Notice that due to using a PutBitContext which
currently uses an int anything longer than INT_MAX / 8 won't work anyway
-- but maybe one should nevertheless use a size_t for the loop that
writes the data in the user_data_(un)registered function?)

The second solution would be to keep a reference to the data_ref
containing the string from which to create references which are then
used in the SEI message; furthermore one would need to add code to free
this reference in the close function (yes, this is missing right now --
the buffer leaks if e.g. init fails after the buffer's allocation).

In any case I wonder whether the semantics of
ff_cbs_h264_add_sei_message() are good as is: They were designed for a
case in which the SEI message passed to it would go out of scope
immediately after the function call, so leaving it in a consistent state
was irrelevant. E.g. if one adds an SEI message with an internal ref,
there would now be two pointers to the corresponding AVBufferRef.
Furthermore freeing a user-data-unregistered SEI message currently only
unreferences the AVBufferRef; it does not reset the other fields (the
pointer might become dangling in this scenario). If the other fields
were reset, too, then one would need to store them separately outside of
the SEI message (maybe one should keep only the pointer to the data as
well as its length in the context and keep using SEI messages on the
stack?).

Moreover, I want to mention that allowing an internal buffer that is not
refcounted will cause slight complications if we ever wanted to copy the
containing unit or make it writable. But I am nevertheless in favour of
doing it here.

Furthermore, we need better FATE-tests: This bsf seems to be only tested
by passthrough tests. No options are set. This bug here would probably
have been found by you earlier if there was a test with more than one
seekpoint.

> +    }
> +
> +    if (ctx->delete_filler) {
> +        for (i = au->nb_units - 1; i >= 0; i--) {
> +            if (au->units[i].type == H264_NAL_FILLER_DATA) {
> +                ff_cbs_delete_unit(au, i);
> +                continue;
> +            }
> +
> +            if (au->units[i].type == H264_NAL_SEI) {
> +                // Filler SEI messages.
> +                H264RawSEI *sei = au->units[i].content;
> +
> +                for (j = sei->payload_count - 1; j >= 0; j--) {
> +                    if (sei->payload[j].payload_type ==
> +                        H264_SEI_TYPE_FILLER_PAYLOAD)
> +                        ff_cbs_h264_delete_sei_message(au,
> +                                                       &au->units[i], j);
> +                }
>              }
>          }
>      }
>  
> +    if (ctx->display_orientation != PASS) {
> +        err = h264_metadata_handle_display_orientation(bsf, pkt, au,
> +                                                       seek_point);
> +        if (err < 0)
> +            goto fail;
> +    }
> +
>      err = ff_cbs_write_packet(ctx->output, pkt, au);
>      if (err < 0) {
>          av_log(bsf, AV_LOG_ERROR, "Failed to write packet.\n");
> @@ -625,7 +616,57 @@ static int h264_metadata_init(AVBSFContext *bsf)
>  {
>      H264MetadataContext *ctx = bsf->priv_data;
>      CodedBitstreamFragment *au = &ctx->access_unit;
> -    int err, i;
> +    int err, i, j;
> +
> +    if (ctx->sei_user_data) {
> +        H264RawSEIUserDataUnregistered *udu;
> +        int c, v;

These two could be moved to loop body scope.

> +        size_t len;

The scope of this one should be the "Skip over the '+'." block below.

> +
> +        ctx->sei_user_data_payload = (H264RawSEIPayload) {
> +            .payload_type = H264_SEI_TYPE_USER_DATA_UNREGISTERED,
> +        };

ctx->sei_user_data_payload.payload_type =
    H264_SEI_TYPE_USER_DATA_UNREGISTERED;

is enough as ctx is already zero initialized.

> +        udu = &ctx->sei_user_data_payload.payload.user_data_unregistered;
> +
> +        // Parse UUID.  It must be a hex string of length 32, possibly
> +        // containing '-'s which we ignore.
> +        for (i = j = 0; j < 32 && ctx->sei_user_data[i]; i++) {

This code has a potential for undefined behaviour/overflow (that already
existed before this patch): There is nothing that precludes the length
of ctx->sei_user_data from being huge (you just have to set
av_max_alloc() to a huge value). And if you have lots of '-', i might
overflow.

The easiest fix for this is to not use an index, but a pointer to be
incremented directly. This also means that you do not have to add a
second loop counter variable.

> +            c = ctx->sei_user_data[i];
> +            if (c == '-') {
> +                continue;
> +            } else if (av_isxdigit(c)) {
> +                c = av_tolower(c);
> +                v = (c <= '9' ? c - '0' : c - 'a' + 10);
> +            } else {
> +                break;
> +            }
> +            if (j & 1)
> +                udu->uuid_iso_iec_11578[j / 2] |= v;
> +            else
> +                udu->uuid_iso_iec_11578[j / 2] = v << 4;
> +            ++j;
> +        }
> +        if (j < 32 || ctx->sei_user_data[i] != '+') {
> +            av_log(bsf, AV_LOG_ERROR, "Invalid user data: "
> +                   "must be of the form \"UUID+string\".\n");
> +            return AVERROR(EINVAL);
> +        } else {
> +            // Skip over the '+'.
> +            ++i;
> +
> +            // Length of the actual data to insert (could be zero).
> +            len = strlen(ctx->sei_user_data + i);
> +
> +            udu->data_ref = av_buffer_alloc(len + 1);

len + 1 might not fit into an int.

> +            if (!udu->data_ref)
> +                return AVERROR(ENOMEM);
> +
> +            udu->data_length = len + 1;

Is it really to be expected that the terminating zero be written (yes, I
know, the old code did it, too; just asking)?

> +            udu->data        = udu->data_ref->data;
> +            memcpy(udu->data, ctx->sei_user_data + 1, len);
> +            udu->data[len]   = 0;

This is unnecessary: Just copy len + 1 elements (as the old code did).

Finally, I like that you move parsing of the sei_user_data string to
init; that's the proper place for it. But this should be done in a patch
of its own (it changes behaviour, something that I don't expect from a
pure refactoring patch). With a bit of luck git will produce a nice diff
from the other changes and not this mess here (but I don't really expect
it -- the diff is just to large).

- Andreas

*: After all, it can't copy the SEI message, because this would
basically be the same as copying an SEI NAL unit and that is unimplemented.
Mark Thompson Aug. 12, 2020, 11:16 p.m. UTC | #2
On 12/08/2020 02:55, Andreas Rheinhardt wrote:
> First of all: I only looked at the sei_user_data stuff yet.
> 
> Mark Thompson:
>> ---
>>   libavcodec/h264_metadata_bsf.c | 443 ++++++++++++++++++---------------
>>   1 file changed, 242 insertions(+), 201 deletions(-)
>>
>> diff --git a/libavcodec/h264_metadata_bsf.c b/libavcodec/h264_metadata_bsf.c
>> index eb1503159b..1d00ccdfb8 100644
>> --- a/libavcodec/h264_metadata_bsf.c
>> +++ b/libavcodec/h264_metadata_bsf.c
>> ...
>> @@ -78,6 +79,7 @@ typedef struct H264MetadataContext {
>>       int crop_bottom;
>>   
>>       const char *sei_user_data;
>> +    H264RawSEIPayload sei_user_data_payload;
>>   
>>       int delete_filler;
>>   
>> ...
>> +
>> +    // The current packet should be treated as a seek point for metadata
>> +    // insertion if any of:
>> +    // - It is the first packet in the stream.
>> +    // - It contains an SPS, indicating that a sequence might start here.
>> +    // - It is marked as containing a key frame.
>> +    seek_point = !ctx->done_first_au || has_sps ||
>> +        (pkt->flags & AV_PKT_FLAG_KEY);
>> +
>> +    if (ctx->sei_user_data && seek_point) {
>> +        err = ff_cbs_h264_add_sei_message(au, &ctx->sei_user_data_payload);
>> +        if (err < 0) {
>> +            av_log(bsf, AV_LOG_ERROR, "Failed to add user data SEI "
>> +                   "message to access unit.\n");
>> +            goto fail;
>> +        }
> 
> Did you test this? With a sample with more than one seekpoint? It
> shouldn't work. The reason is that the ownership of the SEI message
> moves to the unit the SEI message will be attached to* (on success; on
> failure, the SEI message will be freed for you) and (on success) it will
> be unreferenced when the access unit gets reset. Notice that in this
> case ctx->sei_user_data_payload won't be changed, but its pointers will
> become dangling pointers and the second seek point will lead to a
> use-after-free.
> 
> I see two ways to fix this:
> My preferred solution is not to set the data_ref of the
> H264RawSEIUserDataUnregistered at all. In this case one does not need to
> allocate a buffer and copy the string at all -- the pointer can point
> directly into the ctx->sei_user_data string. It should work; in this
> case the point below about len + 1 not being in the range of int becomes
> moot, of course. (Notice that due to using a PutBitContext which
> currently uses an int anything longer than INT_MAX / 8 won't work anyway
> -- but maybe one should nevertheless use a size_t for the loop that
> writes the data in the user_data_(un)registered function?)
> 
> The second solution would be to keep a reference to the data_ref
> containing the string from which to create references which are then
> used in the SEI message; furthermore one would need to add code to free
> this reference in the close function (yes, this is missing right now --
> the buffer leaks if e.g. init fails after the buffer's allocation).
> 
> In any case I wonder whether the semantics of
> ff_cbs_h264_add_sei_message() are good as is: They were designed for a
> case in which the SEI message passed to it would go out of scope
> immediately after the function call, so leaving it in a consistent state
> was irrelevant. E.g. if one adds an SEI message with an internal ref,
> there would now be two pointers to the corresponding AVBufferRef.
> Furthermore freeing a user-data-unregistered SEI message currently only
> unreferences the AVBufferRef; it does not reset the other fields (the
> pointer might become dangling in this scenario). If the other fields
> were reset, too, then one would need to store them separately outside of
> the SEI message (maybe one should keep only the pointer to the data as
> well as its length in the context and keep using SEI messages on the
> stack?).
> 
> Moreover, I want to mention that allowing an internal buffer that is not
> refcounted will cause slight complications if we ever wanted to copy the
> containing unit or make it writable. But I am nevertheless in favour of
> doing it here.

The simplicity of your preferred answer here is so much greater that I have to agree.

> Furthermore, we need better FATE-tests: This bsf seems to be only tested
> by passthrough tests. No options are set. This bug here would probably
> have been found by you earlier if there was a test with more than one
> seekpoint.
> 
>> +    }
>> +
>> +    if (ctx->delete_filler) {
>> +        for (i = au->nb_units - 1; i >= 0; i--) {
>> +            if (au->units[i].type == H264_NAL_FILLER_DATA) {
>> +                ff_cbs_delete_unit(au, i);
>> +                continue;
>> +            }
>> +
>> +            if (au->units[i].type == H264_NAL_SEI) {
>> +                // Filler SEI messages.
>> +                H264RawSEI *sei = au->units[i].content;
>> +
>> +                for (j = sei->payload_count - 1; j >= 0; j--) {
>> +                    if (sei->payload[j].payload_type ==
>> +                        H264_SEI_TYPE_FILLER_PAYLOAD)
>> +                        ff_cbs_h264_delete_sei_message(au,
>> +                                                       &au->units[i], j);
>> +                }
>>               }
>>           }
>>       }
>>   
>> +    if (ctx->display_orientation != PASS) {
>> +        err = h264_metadata_handle_display_orientation(bsf, pkt, au,
>> +                                                       seek_point);
>> +        if (err < 0)
>> +            goto fail;
>> +    }
>> +
>>       err = ff_cbs_write_packet(ctx->output, pkt, au);
>>       if (err < 0) {
>>           av_log(bsf, AV_LOG_ERROR, "Failed to write packet.\n");
>> @@ -625,7 +616,57 @@ static int h264_metadata_init(AVBSFContext *bsf)
>>   {
>>       H264MetadataContext *ctx = bsf->priv_data;
>>       CodedBitstreamFragment *au = &ctx->access_unit;
>> -    int err, i;
>> +    int err, i, j;
>> +
>> +    if (ctx->sei_user_data) {
>> +        H264RawSEIUserDataUnregistered *udu;
>> +        int c, v;
> 
> These two could be moved to loop body scope.

Yep, moved.

>> +        size_t len;
> 
> The scope of this one should be the "Skip over the '+'." block below.

This variable disappeared anyway.

>> +
>> +        ctx->sei_user_data_payload = (H264RawSEIPayload) {
>> +            .payload_type = H264_SEI_TYPE_USER_DATA_UNREGISTERED,
>> +        };
> 
> ctx->sei_user_data_payload.payload_type =
>      H264_SEI_TYPE_USER_DATA_UNREGISTERED;
> 
> is enough as ctx is already zero initialized.

Sure, changed.

>> +        udu = &ctx->sei_user_data_payload.payload.user_data_unregistered;
>> +
>> +        // Parse UUID.  It must be a hex string of length 32, possibly
>> +        // containing '-'s which we ignore.
>> +        for (i = j = 0; j < 32 && ctx->sei_user_data[i]; i++) {
> 
> This code has a potential for undefined behaviour/overflow (that already
> existed before this patch): There is nothing that precludes the length
> of ctx->sei_user_data from being huge (you just have to set
> av_max_alloc() to a huge value). And if you have lots of '-', i might
> overflow.
> 
> The easiest fix for this is to not use an index, but a pointer to be
> incremented directly. This also means that you do not have to add a
> second loop counter variable.

I decided to just ban having infinitely many -, because that's ridiculous.  The worst non-stupid (well, not-quite-as-stupid) case is a '-' between every digit, so we bound i.

>> +            c = ctx->sei_user_data[i];
>> +            if (c == '-') {
>> +                continue;
>> +            } else if (av_isxdigit(c)) {
>> +                c = av_tolower(c);
>> +                v = (c <= '9' ? c - '0' : c - 'a' + 10);
>> +            } else {
>> +                break;
>> +            }
>> +            if (j & 1)
>> +                udu->uuid_iso_iec_11578[j / 2] |= v;
>> +            else
>> +                udu->uuid_iso_iec_11578[j / 2] = v << 4;
>> +            ++j;
>> +        }
>> +        if (j < 32 || ctx->sei_user_data[i] != '+') {
>> +            av_log(bsf, AV_LOG_ERROR, "Invalid user data: "
>> +                   "must be of the form \"UUID+string\".\n");
>> +            return AVERROR(EINVAL);
>> +        } else {
>> +            // Skip over the '+'.
>> +            ++i;
>> +
>> +            // Length of the actual data to insert (could be zero).
>> +            len = strlen(ctx->sei_user_data + i);
>> +
>> +            udu->data_ref = av_buffer_alloc(len + 1);
> 
> len + 1 might not fit into an int.
> 
>> +            if (!udu->data_ref)
>> +                return AVERROR(ENOMEM);
>> +
>> +            udu->data_length = len + 1;
> 
> Is it really to be expected that the terminating zero be written (yes, I
> know, the old code did it, too; just asking)?

I followed the pattern of the x264 version SEI message, which includes the terminator (since one of the original uses of this was forging that message).

>> +            udu->data        = udu->data_ref->data;
>> +            memcpy(udu->data, ctx->sei_user_data + 1, len);
>> +            udu->data[len]   = 0;
> 
> This is unnecessary: Just copy len + 1 elements (as the old code did).
> 
> Finally, I like that you move parsing of the sei_user_data string to
> init; that's the proper place for it. But this should be done in a patch
> of its own (it changes behaviour, something that I don't expect from a
> pure refactoring patch).

Ok, split into two patches (following).

>                          With a bit of luck git will produce a nice diff
> from the other changes and not this mess here (but I don't really expect
> it -- the diff is just to large).

Nope, as expected :)

Thanks,

- Mark
diff mbox series

Patch

diff --git a/libavcodec/h264_metadata_bsf.c b/libavcodec/h264_metadata_bsf.c
index eb1503159b..1d00ccdfb8 100644
--- a/libavcodec/h264_metadata_bsf.c
+++ b/libavcodec/h264_metadata_bsf.c
@@ -56,6 +56,7 @@  typedef struct H264MetadataContext {
     int done_first_au;
 
     int aud;
+    H264RawAUD aud_nal;
 
     AVRational sample_aspect_ratio;
 
@@ -78,6 +79,7 @@  typedef struct H264MetadataContext {
     int crop_bottom;
 
     const char *sei_user_data;
+    H264RawSEIPayload sei_user_data_payload;
 
     int delete_filler;
 
@@ -89,6 +91,59 @@  typedef struct H264MetadataContext {
 } H264MetadataContext;
 
 
+static int h264_metadata_insert_aud(AVBSFContext *bsf,
+                                    CodedBitstreamFragment *au)
+{
+    H264MetadataContext *ctx = bsf->priv_data;
+    int primary_pic_type_mask = 0xff;
+    int err, i, j;
+
+    static const int primary_pic_type_table[] = {
+        0x084, // 2, 7
+        0x0a5, // 0, 2, 5, 7
+        0x0e7, // 0, 1, 2, 5, 6, 7
+        0x210, // 4, 9
+        0x318, // 3, 4, 8, 9
+        0x294, // 2, 4, 7, 9
+        0x3bd, // 0, 2, 3, 4, 5, 7, 8, 9
+        0x3ff, // 0, 1, 2, 3, 4, 5, 6, 7, 8, 9
+    };
+
+    for (i = 0; i < au->nb_units; i++) {
+        if (au->units[i].type == H264_NAL_SLICE ||
+            au->units[i].type == H264_NAL_IDR_SLICE) {
+            H264RawSlice *slice = au->units[i].content;
+            for (j = 0; j < FF_ARRAY_ELEMS(primary_pic_type_table); j++) {
+                if (!(primary_pic_type_table[j] &
+                      (1 << slice->header.slice_type)))
+                    primary_pic_type_mask &= ~(1 << j);
+            }
+        }
+    }
+    for (j = 0; j < FF_ARRAY_ELEMS(primary_pic_type_table); j++)
+        if (primary_pic_type_mask & (1 << j))
+            break;
+    if (j >= FF_ARRAY_ELEMS(primary_pic_type_table)) {
+        av_log(bsf, AV_LOG_ERROR, "No usable primary_pic_type: "
+               "invalid slice types?\n");
+        return AVERROR_INVALIDDATA;
+    }
+
+    ctx->aud_nal = (H264RawAUD) {
+        .nal_unit_header.nal_unit_type = H264_NAL_AUD,
+        .primary_pic_type = j,
+    };
+
+    err = ff_cbs_insert_unit_content(au, 0, H264_NAL_AUD,
+                                     &ctx->aud_nal, NULL);
+    if (err < 0) {
+        av_log(bsf, AV_LOG_ERROR, "Failed to insert AUD.\n");
+        return err;
+    }
+
+    return 0;
+}
+
 static int h264_metadata_update_sps(AVBSFContext *bsf,
                                     H264RawSPS *sps)
 {
@@ -320,219 +375,59 @@  static int h264_metadata_update_side_data(AVBSFContext *bsf, AVPacket *pkt)
     return 0;
 }
 
-static int h264_metadata_filter(AVBSFContext *bsf, AVPacket *pkt)
+static int h264_metadata_handle_display_orientation(AVBSFContext *bsf,
+                                                    AVPacket *pkt,
+                                                    CodedBitstreamFragment *au,
+                                                    int seek_point)
 {
     H264MetadataContext *ctx = bsf->priv_data;
-    CodedBitstreamFragment *au = &ctx->access_unit;
-    int err, i, j, has_sps;
-    H264RawAUD aud;
-
-    err = ff_bsf_get_packet_ref(bsf, pkt);
-    if (err < 0)
-        return err;
-
-    err = h264_metadata_update_side_data(bsf, pkt);
-    if (err < 0)
-        goto fail;
-
-    err = ff_cbs_read_packet(ctx->input, au, pkt);
-    if (err < 0) {
-        av_log(bsf, AV_LOG_ERROR, "Failed to read packet.\n");
-        goto fail;
-    }
-
-    if (au->nb_units == 0) {
-        av_log(bsf, AV_LOG_ERROR, "No NAL units in packet.\n");
-        err = AVERROR_INVALIDDATA;
-        goto fail;
-    }
-
-    // If an AUD is present, it must be the first NAL unit.
-    if (au->units[0].type == H264_NAL_AUD) {
-        if (ctx->aud == REMOVE)
-            ff_cbs_delete_unit(au, 0);
-    } else {
-        if (ctx->aud == INSERT) {
-            static const int primary_pic_type_table[] = {
-                0x084, // 2, 7
-                0x0a5, // 0, 2, 5, 7
-                0x0e7, // 0, 1, 2, 5, 6, 7
-                0x210, // 4, 9
-                0x318, // 3, 4, 8, 9
-                0x294, // 2, 4, 7, 9
-                0x3bd, // 0, 2, 3, 4, 5, 7, 8, 9
-                0x3ff, // 0, 1, 2, 3, 4, 5, 6, 7, 8, 9
-            };
-            int primary_pic_type_mask = 0xff;
-
-            for (i = 0; i < au->nb_units; i++) {
-                if (au->units[i].type == H264_NAL_SLICE ||
-                    au->units[i].type == H264_NAL_IDR_SLICE) {
-                    H264RawSlice *slice = au->units[i].content;
-                    for (j = 0; j < FF_ARRAY_ELEMS(primary_pic_type_table); j++) {
-                         if (!(primary_pic_type_table[j] &
-                               (1 << slice->header.slice_type)))
-                             primary_pic_type_mask &= ~(1 << j);
-                    }
-                }
-            }
-            for (j = 0; j < FF_ARRAY_ELEMS(primary_pic_type_table); j++)
-                if (primary_pic_type_mask & (1 << j))
-                    break;
-            if (j >= FF_ARRAY_ELEMS(primary_pic_type_table)) {
-                av_log(bsf, AV_LOG_ERROR, "No usable primary_pic_type: "
-                       "invalid slice types?\n");
-                err = AVERROR_INVALIDDATA;
-                goto fail;
-            }
-
-            aud = (H264RawAUD) {
-                .nal_unit_header.nal_unit_type = H264_NAL_AUD,
-                .primary_pic_type = j,
-            };
+    int err, i, j;
 
-            err = ff_cbs_insert_unit_content(au,
-                                             0, H264_NAL_AUD, &aud, NULL);
-            if (err < 0) {
-                av_log(bsf, AV_LOG_ERROR, "Failed to insert AUD.\n");
-                goto fail;
-            }
-        }
-    }
-
-    has_sps = 0;
-    for (i = 0; i < au->nb_units; i++) {
-        if (au->units[i].type == H264_NAL_SPS) {
-            err = h264_metadata_update_sps(bsf, au->units[i].content);
-            if (err < 0)
-                goto fail;
-            has_sps = 1;
-        }
-    }
+    for (i = au->nb_units - 1; i >= 0; i--) {
+        H264RawSEI *sei;
+        if (au->units[i].type != H264_NAL_SEI)
+            continue;
+        sei = au->units[i].content;
 
-    // Only insert the SEI in access units containing SPSs, and also
-    // unconditionally in the first access unit we ever see.
-    if (ctx->sei_user_data && (has_sps || !ctx->done_first_au)) {
-        H264RawSEIPayload payload = {
-            .payload_type = H264_SEI_TYPE_USER_DATA_UNREGISTERED,
-        };
-        H264RawSEIUserDataUnregistered *udu =
-            &payload.payload.user_data_unregistered;
+        for (j = sei->payload_count - 1; j >= 0; j--) {
+            H264RawSEIDisplayOrientation *disp;
+            int32_t *matrix;
 
-        for (i = j = 0; j < 32 && ctx->sei_user_data[i]; i++) {
-            int c, v;
-            c = ctx->sei_user_data[i];
-            if (c == '-') {
+            if (sei->payload[j].payload_type !=
+                H264_SEI_TYPE_DISPLAY_ORIENTATION)
                 continue;
-            } else if (av_isxdigit(c)) {
-                c = av_tolower(c);
-                v = (c <= '9' ? c - '0' : c - 'a' + 10);
-            } else {
-                goto invalid_user_data;
-            }
-            if (j & 1)
-                udu->uuid_iso_iec_11578[j / 2] |= v;
-            else
-                udu->uuid_iso_iec_11578[j / 2] = v << 4;
-            ++j;
-        }
-        if (j == 32 && ctx->sei_user_data[i] == '+') {
-            size_t len = strlen(ctx->sei_user_data + i + 1);
-
-            udu->data_ref = av_buffer_alloc(len + 1);
-            if (!udu->data_ref) {
-                err = AVERROR(ENOMEM);
-                goto fail;
-            }
-
-            udu->data        = udu->data_ref->data;
-            udu->data_length = len + 1;
-            memcpy(udu->data, ctx->sei_user_data + i + 1, len + 1);
+            disp = &sei->payload[j].payload.display_orientation;
 
-            err = ff_cbs_h264_add_sei_message(au, &payload);
-            if (err < 0) {
-                av_log(bsf, AV_LOG_ERROR, "Failed to add user data SEI "
-                       "message to access unit.\n");
-                goto fail;
-            }
-
-        } else {
-        invalid_user_data:
-            av_log(bsf, AV_LOG_ERROR, "Invalid user data: "
-                   "must be \"UUID+string\".\n");
-            err = AVERROR(EINVAL);
-            goto fail;
-        }
-    }
-
-    if (ctx->delete_filler) {
-        for (i = au->nb_units - 1; i >= 0; i--) {
-            if (au->units[i].type == H264_NAL_FILLER_DATA) {
-                ff_cbs_delete_unit(au, i);
+            if (ctx->display_orientation == REMOVE ||
+                ctx->display_orientation == INSERT) {
+                ff_cbs_h264_delete_sei_message(au, &au->units[i], j);
                 continue;
             }
 
-            if (au->units[i].type == H264_NAL_SEI) {
-                // Filler SEI messages.
-                H264RawSEI *sei = au->units[i].content;
-
-                for (j = sei->payload_count - 1; j >= 0; j--) {
-                    if (sei->payload[j].payload_type ==
-                        H264_SEI_TYPE_FILLER_PAYLOAD)
-                        ff_cbs_h264_delete_sei_message(au, &au->units[i], j);
-                }
+            matrix = av_malloc(9 * sizeof(int32_t));
+            if (!matrix)
+                return AVERROR(ENOMEM);
+
+            av_display_rotation_set(matrix,
+                                    disp->anticlockwise_rotation *
+                                    180.0 / 65536.0);
+            av_display_matrix_flip(matrix, disp->hor_flip, disp->ver_flip);
+
+            // If there are multiple display orientation messages in an
+            // access unit, then the last one added to the packet (i.e.
+            // the first one in the access unit) will prevail.
+            err = av_packet_add_side_data(pkt, AV_PKT_DATA_DISPLAYMATRIX,
+                                          (uint8_t*)matrix,
+                                          9 * sizeof(int32_t));
+            if (err < 0) {
+                av_log(bsf, AV_LOG_ERROR, "Failed to attach extracted "
+                       "displaymatrix side data to packet.\n");
+                av_free(matrix);
+                return AVERROR(ENOMEM);
             }
         }
     }
 
-    if (ctx->display_orientation != PASS) {
-        for (i = au->nb_units - 1; i >= 0; i--) {
-            H264RawSEI *sei;
-            if (au->units[i].type != H264_NAL_SEI)
-                continue;
-            sei = au->units[i].content;
-
-            for (j = sei->payload_count - 1; j >= 0; j--) {
-                H264RawSEIDisplayOrientation *disp;
-                int32_t *matrix;
-
-                if (sei->payload[j].payload_type !=
-                    H264_SEI_TYPE_DISPLAY_ORIENTATION)
-                    continue;
-                disp = &sei->payload[j].payload.display_orientation;
-
-                if (ctx->display_orientation == REMOVE ||
-                    ctx->display_orientation == INSERT) {
-                    ff_cbs_h264_delete_sei_message(au, &au->units[i], j);
-                    continue;
-                }
-
-                matrix = av_malloc(9 * sizeof(int32_t));
-                if (!matrix) {
-                    err = AVERROR(ENOMEM);
-                    goto fail;
-                }
-
-                av_display_rotation_set(matrix,
-                                        disp->anticlockwise_rotation *
-                                        180.0 / 65536.0);
-                av_display_matrix_flip(matrix, disp->hor_flip, disp->ver_flip);
-
-                // If there are multiple display orientation messages in an
-                // access unit, then the last one added to the packet (i.e.
-                // the first one in the access unit) will prevail.
-                err = av_packet_add_side_data(pkt, AV_PKT_DATA_DISPLAYMATRIX,
-                                              (uint8_t*)matrix,
-                                              9 * sizeof(int32_t));
-                if (err < 0) {
-                    av_log(bsf, AV_LOG_ERROR, "Failed to attach extracted "
-                           "displaymatrix side data to packet.\n");
-                    av_free(matrix);
-                    goto fail;
-                }
-            }
-        }
-    }
     if (ctx->display_orientation == INSERT) {
         H264RawSEIPayload payload = {
             .payload_type = H264_SEI_TYPE_DISPLAY_ORIENTATION,
@@ -576,7 +471,7 @@  static int h264_metadata_filter(AVBSFContext *bsf, AVPacket *pkt)
             }
         }
 
-        if (has_sps || !ctx->done_first_au) {
+        if (seek_point) {
             if (!isnan(ctx->rotate)) {
                 disp->anticlockwise_rotation =
                     (uint16_t)rint((ctx->rotate >= 0.0 ? ctx->rotate
@@ -598,11 +493,107 @@  static int h264_metadata_filter(AVBSFContext *bsf, AVPacket *pkt)
             if (err < 0) {
                 av_log(bsf, AV_LOG_ERROR, "Failed to add display orientation "
                        "SEI message to access unit.\n");
+                return err;
+            }
+        }
+    }
+
+    return 0;
+}
+
+static int h264_metadata_filter(AVBSFContext *bsf, AVPacket *pkt)
+{
+    H264MetadataContext *ctx = bsf->priv_data;
+    CodedBitstreamFragment *au = &ctx->access_unit;
+    int err, i, j, has_sps, seek_point;
+
+    err = ff_bsf_get_packet_ref(bsf, pkt);
+    if (err < 0)
+        return err;
+
+    err = h264_metadata_update_side_data(bsf, pkt);
+    if (err < 0)
+        goto fail;
+
+    err = ff_cbs_read_packet(ctx->input, au, pkt);
+    if (err < 0) {
+        av_log(bsf, AV_LOG_ERROR, "Failed to read packet.\n");
+        goto fail;
+    }
+
+    if (au->nb_units == 0) {
+        av_log(bsf, AV_LOG_ERROR, "No NAL units in packet.\n");
+        err = AVERROR_INVALIDDATA;
+        goto fail;
+    }
+
+    // If an AUD is present, it must be the first NAL unit.
+    if (au->units[0].type == H264_NAL_AUD) {
+        if (ctx->aud == REMOVE)
+            ff_cbs_delete_unit(au, 0);
+    } else {
+        if (ctx->aud == INSERT) {
+            err = h264_metadata_insert_aud(bsf, au);
+            if (err < 0)
                 goto fail;
+        }
+    }
+
+    has_sps = 0;
+    for (i = 0; i < au->nb_units; i++) {
+        if (au->units[i].type == H264_NAL_SPS) {
+            err = h264_metadata_update_sps(bsf, au->units[i].content);
+            if (err < 0)
+                goto fail;
+            has_sps = 1;
+        }
+    }
+
+    // The current packet should be treated as a seek point for metadata
+    // insertion if any of:
+    // - It is the first packet in the stream.
+    // - It contains an SPS, indicating that a sequence might start here.
+    // - It is marked as containing a key frame.
+    seek_point = !ctx->done_first_au || has_sps ||
+        (pkt->flags & AV_PKT_FLAG_KEY);
+
+    if (ctx->sei_user_data && seek_point) {
+        err = ff_cbs_h264_add_sei_message(au, &ctx->sei_user_data_payload);
+        if (err < 0) {
+            av_log(bsf, AV_LOG_ERROR, "Failed to add user data SEI "
+                   "message to access unit.\n");
+            goto fail;
+        }
+    }
+
+    if (ctx->delete_filler) {
+        for (i = au->nb_units - 1; i >= 0; i--) {
+            if (au->units[i].type == H264_NAL_FILLER_DATA) {
+                ff_cbs_delete_unit(au, i);
+                continue;
+            }
+
+            if (au->units[i].type == H264_NAL_SEI) {
+                // Filler SEI messages.
+                H264RawSEI *sei = au->units[i].content;
+
+                for (j = sei->payload_count - 1; j >= 0; j--) {
+                    if (sei->payload[j].payload_type ==
+                        H264_SEI_TYPE_FILLER_PAYLOAD)
+                        ff_cbs_h264_delete_sei_message(au,
+                                                       &au->units[i], j);
+                }
             }
         }
     }
 
+    if (ctx->display_orientation != PASS) {
+        err = h264_metadata_handle_display_orientation(bsf, pkt, au,
+                                                       seek_point);
+        if (err < 0)
+            goto fail;
+    }
+
     err = ff_cbs_write_packet(ctx->output, pkt, au);
     if (err < 0) {
         av_log(bsf, AV_LOG_ERROR, "Failed to write packet.\n");
@@ -625,7 +616,57 @@  static int h264_metadata_init(AVBSFContext *bsf)
 {
     H264MetadataContext *ctx = bsf->priv_data;
     CodedBitstreamFragment *au = &ctx->access_unit;
-    int err, i;
+    int err, i, j;
+
+    if (ctx->sei_user_data) {
+        H264RawSEIUserDataUnregistered *udu;
+        int c, v;
+        size_t len;
+
+        ctx->sei_user_data_payload = (H264RawSEIPayload) {
+            .payload_type = H264_SEI_TYPE_USER_DATA_UNREGISTERED,
+        };
+        udu = &ctx->sei_user_data_payload.payload.user_data_unregistered;
+
+        // Parse UUID.  It must be a hex string of length 32, possibly
+        // containing '-'s which we ignore.
+        for (i = j = 0; j < 32 && ctx->sei_user_data[i]; i++) {
+            c = ctx->sei_user_data[i];
+            if (c == '-') {
+                continue;
+            } else if (av_isxdigit(c)) {
+                c = av_tolower(c);
+                v = (c <= '9' ? c - '0' : c - 'a' + 10);
+            } else {
+                break;
+            }
+            if (j & 1)
+                udu->uuid_iso_iec_11578[j / 2] |= v;
+            else
+                udu->uuid_iso_iec_11578[j / 2] = v << 4;
+            ++j;
+        }
+        if (j < 32 || ctx->sei_user_data[i] != '+') {
+            av_log(bsf, AV_LOG_ERROR, "Invalid user data: "
+                   "must be of the form \"UUID+string\".\n");
+            return AVERROR(EINVAL);
+        } else {
+            // Skip over the '+'.
+            ++i;
+
+            // Length of the actual data to insert (could be zero).
+            len = strlen(ctx->sei_user_data + i);
+
+            udu->data_ref = av_buffer_alloc(len + 1);
+            if (!udu->data_ref)
+                return AVERROR(ENOMEM);
+
+            udu->data_length = len + 1;
+            udu->data        = udu->data_ref->data;
+            memcpy(udu->data, ctx->sei_user_data + 1, len);
+            udu->data[len]   = 0;
+        }
+    }
 
     err = ff_cbs_init(&ctx->input,  AV_CODEC_ID_H264, bsf);
     if (err < 0)