diff mbox series

[FFmpeg-devel,v3] avcodec/h264dec: apply H.274 film grain

Message ID 20210817195456.27224-1-ffmpeg@haasn.xyz
State Accepted
Commit 66845cffc3bbb17f91294d15cd6f57f3df3bce97
Headers show
Series [FFmpeg-devel,v3] avcodec/h264dec: apply H.274 film grain | expand

Checks

Context Check Description
andriy/x86_make fail Make failed
andriy/PPC64_make warning Make failed

Commit Message

Niklas Haas Aug. 17, 2021, 7:54 p.m. UTC
From: Niklas Haas <git@haasn.dev>

Because we need access to ref frames without film grain applied, we have
to add an extra AVFrame to H264Picture to avoid messing with the
original. This requires some amount of overhead to make the reference
moves work out, but it allows us to benefit from frame multithreading
for film grain application "for free".

Unfortunately, this approach requires twice as much RAM to be constantly
allocated for ref frames, due to the need for an extra buffer per
H264Picture. In theory, we could get away with freeing up this memory as
soon as it's no longer needed (since ref frames do not need film grain
buffers any longer), but trying to call ff_thread_release_buffer() from
output_frame() conflicts with possible later accesses to that same frame
and I'm not sure how to synchronize that well.

Tested on all three cases of (no fg), (fg present but exported) and (fg
present and not exported), with and without threading.

Signed-off-by: Niklas Haas <git@haasn.dev>
---
 libavcodec/h264_picture.c | 35 +++++++++++++++++++++++--
 libavcodec/h264_slice.c   | 16 ++++++++++--
 libavcodec/h264dec.c      | 55 ++++++++++++++++++++++++++-------------
 libavcodec/h264dec.h      |  6 +++++
 4 files changed, 90 insertions(+), 22 deletions(-)

Comments

James Almer Aug. 17, 2021, 10:53 p.m. UTC | #1
On 8/17/2021 4:54 PM, Niklas Haas wrote:
> From: Niklas Haas <git@haasn.dev>
> 
> Because we need access to ref frames without film grain applied, we have
> to add an extra AVFrame to H264Picture to avoid messing with the
> original. This requires some amount of overhead to make the reference
> moves work out, but it allows us to benefit from frame multithreading
> for film grain application "for free".
> 
> Unfortunately, this approach requires twice as much RAM to be constantly
> allocated for ref frames, due to the need for an extra buffer per
> H264Picture. In theory, we could get away with freeing up this memory as
> soon as it's no longer needed (since ref frames do not need film grain
> buffers any longer), but trying to call ff_thread_release_buffer() from
> output_frame() conflicts with possible later accesses to that same frame
> and I'm not sure how to synchronize that well.
> 
> Tested on all three cases of (no fg), (fg present but exported) and (fg
> present and not exported), with and without threading.
> 
> Signed-off-by: Niklas Haas <git@haasn.dev>
> ---
>   libavcodec/h264_picture.c | 35 +++++++++++++++++++++++--
>   libavcodec/h264_slice.c   | 16 ++++++++++--
>   libavcodec/h264dec.c      | 55 ++++++++++++++++++++++++++-------------
>   libavcodec/h264dec.h      |  6 +++++
>   4 files changed, 90 insertions(+), 22 deletions(-)

Looks good now, so I'll apply it soon.
Eoff, Ullysses A Aug. 25, 2021, 6:06 p.m. UTC | #2
> -----Original Message-----
> From: ffmpeg-devel <ffmpeg-devel-bounces@ffmpeg.org> On Behalf Of Niklas Haas
> Sent: Tuesday, August 17, 2021 12:55 PM
> To: ffmpeg-devel@ffmpeg.org
> Cc: Niklas Haas <git@haasn.dev>
> Subject: [FFmpeg-devel] [PATCH v3] avcodec/h264dec: apply H.274 film grain
> 
> From: Niklas Haas <git@haasn.dev>
> 
> Because we need access to ref frames without film grain applied, we have
> to add an extra AVFrame to H264Picture to avoid messing with the
> original. This requires some amount of overhead to make the reference
> moves work out, but it allows us to benefit from frame multithreading
> for film grain application "for free".
> 
> Unfortunately, this approach requires twice as much RAM to be constantly
> allocated for ref frames, due to the need for an extra buffer per
> H264Picture. In theory, we could get away with freeing up this memory as
> soon as it's no longer needed (since ref frames do not need film grain
> buffers any longer), but trying to call ff_thread_release_buffer() from
> output_frame() conflicts with possible later accesses to that same frame
> and I'm not sure how to synchronize that well.
> 
> Tested on all three cases of (no fg), (fg present but exported) and (fg
> present and not exported), with and without threading.
> 
> Signed-off-by: Niklas Haas <git@haasn.dev>
> ---
>  libavcodec/h264_picture.c | 35 +++++++++++++++++++++++--
>  libavcodec/h264_slice.c   | 16 ++++++++++--
>  libavcodec/h264dec.c      | 55 ++++++++++++++++++++++++++-------------
>  libavcodec/h264dec.h      |  6 +++++
>  4 files changed, 90 insertions(+), 22 deletions(-)
> 
> diff --git a/libavcodec/h264_picture.c b/libavcodec/h264_picture.c
> index ff30166b4d..5944798394 100644
> --- a/libavcodec/h264_picture.c
> +++ b/libavcodec/h264_picture.c
> @@ -43,13 +43,14 @@
> 
>  void ff_h264_unref_picture(H264Context *h, H264Picture *pic)
>  {
> -    int off = offsetof(H264Picture, tf) + sizeof(pic->tf);
> +    int off = offsetof(H264Picture, tf_grain) + sizeof(pic->tf_grain);
>      int i;
> 
>      if (!pic->f || !pic->f->buf[0])
>          return;
> 
>      ff_thread_release_buffer(h->avctx, &pic->tf);
> +    ff_thread_release_buffer(h->avctx, &pic->tf_grain);
>      av_buffer_unref(&pic->hwaccel_priv_buf);
> 
>      av_buffer_unref(&pic->qscale_table_buf);
> @@ -93,6 +94,7 @@ static void h264_copy_picture_params(H264Picture *dst, const H264Picture *src)
>      dst->mb_width      = src->mb_width;
>      dst->mb_height     = src->mb_height;
>      dst->mb_stride     = src->mb_stride;
> +    dst->needs_fg      = src->needs_fg;
>  }
> 
>  int ff_h264_ref_picture(H264Context *h, H264Picture *dst, H264Picture *src)
> @@ -108,6 +110,14 @@ int ff_h264_ref_picture(H264Context *h, H264Picture *dst, H264Picture *src)
>      if (ret < 0)
>          goto fail;
> 
> +    if (src->needs_fg) {
> +        av_assert0(src->tf_grain.f == src->f_grain);
> +        dst->tf_grain.f = dst->f_grain;
> +        ret = ff_thread_ref_frame(&dst->tf_grain, &src->tf_grain);
> +        if (ret < 0)
> +            goto fail;
> +    }
> +
>      dst->qscale_table_buf = av_buffer_ref(src->qscale_table_buf);
>      dst->mb_type_buf      = av_buffer_ref(src->mb_type_buf);
>      dst->pps_buf          = av_buffer_ref(src->pps_buf);
> @@ -159,6 +169,15 @@ int ff_h264_replace_picture(H264Context *h, H264Picture *dst, const H264Picture
>      if (ret < 0)
>          goto fail;
> 
> +    if (src->needs_fg) {
> +        av_assert0(src->tf_grain.f == src->f_grain);
> +        dst->tf_grain.f = dst->f_grain;
> +        ff_thread_release_buffer(h->avctx, &dst->tf_grain);
> +        ret = ff_thread_ref_frame(&dst->tf_grain, &src->tf_grain);
> +        if (ret < 0)
> +            goto fail;
> +    }
> +
>      ret  = av_buffer_replace(&dst->qscale_table_buf, src->qscale_table_buf);
>      ret |= av_buffer_replace(&dst->mb_type_buf, src->mb_type_buf);
>      ret |= av_buffer_replace(&dst->pps_buf, src->pps_buf);
> @@ -212,6 +231,7 @@ void ff_h264_set_erpic(ERPicture *dst, H264Picture *src)
>  int ff_h264_field_end(H264Context *h, H264SliceContext *sl, int in_setup)
>  {
>      AVCodecContext *const avctx = h->avctx;
> +    H264Picture *cur = h->cur_pic_ptr;
>      int err = 0;
>      h->mb_y = 0;
> 
> @@ -230,10 +250,21 @@ int ff_h264_field_end(H264Context *h, H264SliceContext *sl, int in_setup)
>          if (err < 0)
>              av_log(avctx, AV_LOG_ERROR,
>                     "hardware accelerator failed to decode picture\n");
> +    } else if (!in_setup && cur->needs_fg) {
> +        AVFrameSideData *sd = av_frame_get_side_data(cur->f, AV_FRAME_DATA_FILM_GRAIN_PARAMS);
> +        av_assert0(sd); // always present if `cur->needs_fg`
> +        err = ff_h274_apply_film_grain(cur->f_grain, cur->f, &h->h274db,
> +                                       (AVFilmGrainParams *) sd->data);
> +        if (err < 0) {
> +            av_log(h->avctx, AV_LOG_WARNING, "Failed synthesizing film "
> +                   "grain, ignoring: %s\n", av_err2str(err));
> +            cur->needs_fg = 0;
> +            err = 0;
> +        }
>      }
> 
>      if (!in_setup && !h->droppable)
> -        ff_thread_report_progress(&h->cur_pic_ptr->tf, INT_MAX,
> +        ff_thread_report_progress(&cur->tf, INT_MAX,
>                                    h->picture_structure == PICT_BOTTOM_FIELD);
>      emms_c();
> 
> diff --git a/libavcodec/h264_slice.c b/libavcodec/h264_slice.c
> index 9244d2d5dd..98ca8836db 100644
> --- a/libavcodec/h264_slice.c
> +++ b/libavcodec/h264_slice.c
> @@ -197,6 +197,16 @@ static int alloc_picture(H264Context *h, H264Picture *pic)
>      if (ret < 0)
>          goto fail;
> 
> +    if (pic->needs_fg) {
> +        pic->tf_grain.f = pic->f_grain;
> +        pic->f_grain->format = pic->f->format;
> +        pic->f_grain->width = pic->f->width;
> +        pic->f_grain->height = pic->f->height;
> +        ret = ff_thread_get_buffer(h->avctx, &pic->tf_grain, 0);
> +        if (ret < 0)
> +            goto fail;
> +    }
> +
>      if (h->avctx->hwaccel) {
>          const AVHWAccel *hwaccel = h->avctx->hwaccel;
>          av_assert0(!pic->hwaccel_picture_private);
> @@ -517,6 +527,9 @@ static int h264_frame_start(H264Context *h)
>      pic->f->crop_top    = h->crop_top;
>      pic->f->crop_bottom = h->crop_bottom;
> 
> +    pic->needs_fg = h->sei.film_grain_characteristics.present &&
> +        !(h->avctx->export_side_data & AV_CODEC_EXPORT_DATA_FILM_GRAIN);
> +
>      if ((ret = alloc_picture(h, pic)) < 0)
>          return ret;
> 
> @@ -1328,8 +1341,7 @@ static int h264_export_frame_props(H264Context *h)
>      }
>      h->sei.unregistered.nb_buf_ref = 0;
> 
> -    if (h->sei.film_grain_characteristics.present &&
> -        (h->avctx->export_side_data & AV_CODEC_EXPORT_DATA_FILM_GRAIN)) {
> +    if (h->sei.film_grain_characteristics.present) {
>          H264SEIFilmGrainCharacteristics *fgc = &h->sei.film_grain_characteristics;
>          AVFilmGrainParams *fgp = av_film_grain_params_create_side_data(out);
>          if (!fgp)
> diff --git a/libavcodec/h264dec.c b/libavcodec/h264dec.c
> index dc99ee995e..a64c93eca4 100644
> --- a/libavcodec/h264dec.c
> +++ b/libavcodec/h264dec.c
> @@ -275,9 +275,22 @@ int ff_h264_slice_context_init(H264Context *h, H264SliceContext *sl)
>      return 0;
>  }
> 
> +static int h264_init_pic(H264Picture *pic)
> +{
> +    pic->f = av_frame_alloc();
> +    if (!pic->f)
> +        return AVERROR(ENOMEM);
> +
> +    pic->f_grain = av_frame_alloc();
> +    if (!pic->f_grain)
> +        return AVERROR(ENOMEM);
> +
> +    return 0;
> +}
> +
>  static int h264_init_context(AVCodecContext *avctx, H264Context *h)
>  {
> -    int i;
> +    int i, ret;
> 
>      h->avctx                 = avctx;
>      h->cur_chroma_format_idc = -1;
> @@ -308,18 +321,15 @@ static int h264_init_context(AVCodecContext *avctx, H264Context *h)
>      }
> 
>      for (i = 0; i < H264_MAX_PICTURE_COUNT; i++) {
> -        h->DPB[i].f = av_frame_alloc();
> -        if (!h->DPB[i].f)
> -            return AVERROR(ENOMEM);
> +        if ((ret = h264_init_pic(&h->DPB[i])) < 0)
> +            return ret;
>      }
> 
> -    h->cur_pic.f = av_frame_alloc();
> -    if (!h->cur_pic.f)
> -        return AVERROR(ENOMEM);
> +    if ((ret = h264_init_pic(&h->cur_pic)) < 0)
> +        return ret;
> 
> -    h->last_pic_for_ec.f = av_frame_alloc();
> -    if (!h->last_pic_for_ec.f)
> -        return AVERROR(ENOMEM);
> +    if ((ret = h264_init_pic(&h->last_pic_for_ec)) < 0)
> +        return ret;
> 
>      for (i = 0; i < h->nb_slice_ctx; i++)
>          h->slice_ctx[i].h264 = h;
> @@ -327,6 +337,13 @@ static int h264_init_context(AVCodecContext *avctx, H264Context *h)
>      return 0;
>  }
> 
> +static void h264_free_pic(H264Context *h, H264Picture *pic)
> +{
> +    ff_h264_unref_picture(h, pic);
> +    av_frame_free(&pic->f);
> +    av_frame_free(&pic->f_grain);
> +}
> +
>  static av_cold int h264_decode_end(AVCodecContext *avctx)
>  {
>      H264Context *h = avctx->priv_data;
> @@ -336,8 +353,7 @@ static av_cold int h264_decode_end(AVCodecContext *avctx)
>      ff_h264_free_tables(h);
> 
>      for (i = 0; i < H264_MAX_PICTURE_COUNT; i++) {
> -        ff_h264_unref_picture(h, &h->DPB[i]);
> -        av_frame_free(&h->DPB[i].f);
> +        h264_free_pic(h, &h->DPB[i]);
>      }
>      memset(h->delayed_pic, 0, sizeof(h->delayed_pic));
> 
> @@ -351,10 +367,8 @@ static av_cold int h264_decode_end(AVCodecContext *avctx)
> 
>      ff_h2645_packet_uninit(&h->pkt);
> 
> -    ff_h264_unref_picture(h, &h->cur_pic);
> -    av_frame_free(&h->cur_pic.f);
> -    ff_h264_unref_picture(h, &h->last_pic_for_ec);
> -    av_frame_free(&h->last_pic_for_ec.f);
> +    h264_free_pic(h, &h->cur_pic);
> +    h264_free_pic(h, &h->last_pic_for_ec);
> 
>      return 0;
>  }
> @@ -837,13 +851,15 @@ static int h264_export_enc_params(AVFrame *f, H264Picture *p)
> 
>  static int output_frame(H264Context *h, AVFrame *dst, H264Picture *srcp)
>  {
> -    AVFrame *src = srcp->f;
>      int ret;
> 
> -    ret = av_frame_ref(dst, src);
> +    ret = av_frame_ref(dst, srcp->needs_fg ? srcp->f_grain : srcp->f);
>      if (ret < 0)
>          return ret;
> 
> +    if (srcp->needs_fg && (ret = av_frame_copy_props(dst, srcp->f)) < 0)
> +        return ret;
> +
>      av_dict_set(&dst->metadata, "stereo_mode", ff_h264_sei_stereo_mode(&h->sei.frame_packing), 0);
> 
>      if (srcp->sei_recovery_frame_cnt == 0)
> @@ -855,6 +871,9 @@ static int output_frame(H264Context *h, AVFrame *dst, H264Picture *srcp)
>              goto fail;
>      }
> 
> +    if (!(h->avctx->export_side_data & AV_CODEC_EXPORT_DATA_FILM_GRAIN))
> +        av_frame_remove_side_data(dst, AV_FRAME_DATA_FILM_GRAIN_PARAMS);
> +
>      return 0;
>  fail:
>      av_frame_unref(dst);
> diff --git a/libavcodec/h264dec.h b/libavcodec/h264dec.h
> index 7c419de051..87c4e4e539 100644
> --- a/libavcodec/h264dec.h
> +++ b/libavcodec/h264dec.h
> @@ -43,6 +43,7 @@
>  #include "h264dsp.h"
>  #include "h264pred.h"
>  #include "h264qpel.h"
> +#include "h274.h"
>  #include "internal.h"
>  #include "mpegutils.h"
>  #include "parser.h"
> @@ -130,6 +131,9 @@ typedef struct H264Picture {
>      AVFrame *f;
>      ThreadFrame tf;
> 
> +    AVFrame *f_grain;
> +    ThreadFrame tf_grain;
> +
>      AVBufferRef *qscale_table_buf;
>      int8_t *qscale_table;
> 
> @@ -162,6 +166,7 @@ typedef struct H264Picture {
>      int recovered;          ///< picture at IDR or recovery point + recovery count
>      int invalid_gap;
>      int sei_recovery_frame_cnt;
> +    int needs_fg;           ///< whether picture needs film grain synthesis (see `f_grain`)
> 
>      AVBufferRef *pps_buf;
>      const PPS   *pps;
> @@ -349,6 +354,7 @@ typedef struct H264Context {
>      H264DSPContext h264dsp;
>      H264ChromaContext h264chroma;
>      H264QpelContext h264qpel;
> +    H274FilmGrainDatabase h274db;
> 
>      H264Picture DPB[H264_MAX_PICTURE_COUNT];
>      H264Picture *cur_pic_ptr;
> --
> 2.32.0
> 

Sorry, replied to v2 first...

This causes regression reported here https://trac.ffmpeg.org/ticket/9389

> _______________________________________________
> ffmpeg-devel mailing list
> ffmpeg-devel@ffmpeg.org
> https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
> 
> To unsubscribe, visit link above, or email
> ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".
James Almer Aug. 25, 2021, 9:01 p.m. UTC | #3
On 8/25/2021 3:06 PM, Eoff, Ullysses A wrote:
>> -----Original Message-----
>> From: ffmpeg-devel <ffmpeg-devel-bounces@ffmpeg.org> On Behalf Of Niklas Haas
>> Sent: Tuesday, August 17, 2021 12:55 PM
>> To: ffmpeg-devel@ffmpeg.org
>> Cc: Niklas Haas <git@haasn.dev>
>> Subject: [FFmpeg-devel] [PATCH v3] avcodec/h264dec: apply H.274 film grain
>>
>> From: Niklas Haas <git@haasn.dev>
>>
>> Because we need access to ref frames without film grain applied, we have
>> to add an extra AVFrame to H264Picture to avoid messing with the
>> original. This requires some amount of overhead to make the reference
>> moves work out, but it allows us to benefit from frame multithreading
>> for film grain application "for free".
>>
>> Unfortunately, this approach requires twice as much RAM to be constantly
>> allocated for ref frames, due to the need for an extra buffer per
>> H264Picture. In theory, we could get away with freeing up this memory as
>> soon as it's no longer needed (since ref frames do not need film grain
>> buffers any longer), but trying to call ff_thread_release_buffer() from
>> output_frame() conflicts with possible later accesses to that same frame
>> and I'm not sure how to synchronize that well.
>>
>> Tested on all three cases of (no fg), (fg present but exported) and (fg
>> present and not exported), with and without threading.
>>
>> Signed-off-by: Niklas Haas <git@haasn.dev>
>> ---
>>   libavcodec/h264_picture.c | 35 +++++++++++++++++++++++--
>>   libavcodec/h264_slice.c   | 16 ++++++++++--
>>   libavcodec/h264dec.c      | 55 ++++++++++++++++++++++++++-------------
>>   libavcodec/h264dec.h      |  6 +++++
>>   4 files changed, 90 insertions(+), 22 deletions(-)
>>
>> diff --git a/libavcodec/h264_picture.c b/libavcodec/h264_picture.c
>> index ff30166b4d..5944798394 100644
>> --- a/libavcodec/h264_picture.c
>> +++ b/libavcodec/h264_picture.c
>> @@ -43,13 +43,14 @@
>>
>>   void ff_h264_unref_picture(H264Context *h, H264Picture *pic)
>>   {
>> -    int off = offsetof(H264Picture, tf) + sizeof(pic->tf);
>> +    int off = offsetof(H264Picture, tf_grain) + sizeof(pic->tf_grain);
>>       int i;
>>
>>       if (!pic->f || !pic->f->buf[0])
>>           return;
>>
>>       ff_thread_release_buffer(h->avctx, &pic->tf);
>> +    ff_thread_release_buffer(h->avctx, &pic->tf_grain);
>>       av_buffer_unref(&pic->hwaccel_priv_buf);
>>
>>       av_buffer_unref(&pic->qscale_table_buf);
>> @@ -93,6 +94,7 @@ static void h264_copy_picture_params(H264Picture *dst, const H264Picture *src)
>>       dst->mb_width      = src->mb_width;
>>       dst->mb_height     = src->mb_height;
>>       dst->mb_stride     = src->mb_stride;
>> +    dst->needs_fg      = src->needs_fg;
>>   }
>>
>>   int ff_h264_ref_picture(H264Context *h, H264Picture *dst, H264Picture *src)
>> @@ -108,6 +110,14 @@ int ff_h264_ref_picture(H264Context *h, H264Picture *dst, H264Picture *src)
>>       if (ret < 0)
>>           goto fail;
>>
>> +    if (src->needs_fg) {
>> +        av_assert0(src->tf_grain.f == src->f_grain);
>> +        dst->tf_grain.f = dst->f_grain;
>> +        ret = ff_thread_ref_frame(&dst->tf_grain, &src->tf_grain);
>> +        if (ret < 0)
>> +            goto fail;
>> +    }
>> +
>>       dst->qscale_table_buf = av_buffer_ref(src->qscale_table_buf);
>>       dst->mb_type_buf      = av_buffer_ref(src->mb_type_buf);
>>       dst->pps_buf          = av_buffer_ref(src->pps_buf);
>> @@ -159,6 +169,15 @@ int ff_h264_replace_picture(H264Context *h, H264Picture *dst, const H264Picture
>>       if (ret < 0)
>>           goto fail;
>>
>> +    if (src->needs_fg) {
>> +        av_assert0(src->tf_grain.f == src->f_grain);
>> +        dst->tf_grain.f = dst->f_grain;
>> +        ff_thread_release_buffer(h->avctx, &dst->tf_grain);
>> +        ret = ff_thread_ref_frame(&dst->tf_grain, &src->tf_grain);
>> +        if (ret < 0)
>> +            goto fail;
>> +    }
>> +
>>       ret  = av_buffer_replace(&dst->qscale_table_buf, src->qscale_table_buf);
>>       ret |= av_buffer_replace(&dst->mb_type_buf, src->mb_type_buf);
>>       ret |= av_buffer_replace(&dst->pps_buf, src->pps_buf);
>> @@ -212,6 +231,7 @@ void ff_h264_set_erpic(ERPicture *dst, H264Picture *src)
>>   int ff_h264_field_end(H264Context *h, H264SliceContext *sl, int in_setup)
>>   {
>>       AVCodecContext *const avctx = h->avctx;
>> +    H264Picture *cur = h->cur_pic_ptr;
>>       int err = 0;
>>       h->mb_y = 0;
>>
>> @@ -230,10 +250,21 @@ int ff_h264_field_end(H264Context *h, H264SliceContext *sl, int in_setup)
>>           if (err < 0)
>>               av_log(avctx, AV_LOG_ERROR,
>>                      "hardware accelerator failed to decode picture\n");
>> +    } else if (!in_setup && cur->needs_fg) {
>> +        AVFrameSideData *sd = av_frame_get_side_data(cur->f, AV_FRAME_DATA_FILM_GRAIN_PARAMS);
>> +        av_assert0(sd); // always present if `cur->needs_fg`
>> +        err = ff_h274_apply_film_grain(cur->f_grain, cur->f, &h->h274db,
>> +                                       (AVFilmGrainParams *) sd->data);
>> +        if (err < 0) {
>> +            av_log(h->avctx, AV_LOG_WARNING, "Failed synthesizing film "
>> +                   "grain, ignoring: %s\n", av_err2str(err));
>> +            cur->needs_fg = 0;
>> +            err = 0;
>> +        }
>>       }
>>
>>       if (!in_setup && !h->droppable)
>> -        ff_thread_report_progress(&h->cur_pic_ptr->tf, INT_MAX,
>> +        ff_thread_report_progress(&cur->tf, INT_MAX,
>>                                     h->picture_structure == PICT_BOTTOM_FIELD);
>>       emms_c();
>>
>> diff --git a/libavcodec/h264_slice.c b/libavcodec/h264_slice.c
>> index 9244d2d5dd..98ca8836db 100644
>> --- a/libavcodec/h264_slice.c
>> +++ b/libavcodec/h264_slice.c
>> @@ -197,6 +197,16 @@ static int alloc_picture(H264Context *h, H264Picture *pic)
>>       if (ret < 0)
>>           goto fail;
>>
>> +    if (pic->needs_fg) {
>> +        pic->tf_grain.f = pic->f_grain;
>> +        pic->f_grain->format = pic->f->format;
>> +        pic->f_grain->width = pic->f->width;
>> +        pic->f_grain->height = pic->f->height;
>> +        ret = ff_thread_get_buffer(h->avctx, &pic->tf_grain, 0);
>> +        if (ret < 0)
>> +            goto fail;
>> +    }
>> +
>>       if (h->avctx->hwaccel) {
>>           const AVHWAccel *hwaccel = h->avctx->hwaccel;
>>           av_assert0(!pic->hwaccel_picture_private);
>> @@ -517,6 +527,9 @@ static int h264_frame_start(H264Context *h)
>>       pic->f->crop_top    = h->crop_top;
>>       pic->f->crop_bottom = h->crop_bottom;
>>
>> +    pic->needs_fg = h->sei.film_grain_characteristics.present &&
>> +        !(h->avctx->export_side_data & AV_CODEC_EXPORT_DATA_FILM_GRAIN);
>> +
>>       if ((ret = alloc_picture(h, pic)) < 0)
>>           return ret;
>>
>> @@ -1328,8 +1341,7 @@ static int h264_export_frame_props(H264Context *h)
>>       }
>>       h->sei.unregistered.nb_buf_ref = 0;
>>
>> -    if (h->sei.film_grain_characteristics.present &&
>> -        (h->avctx->export_side_data & AV_CODEC_EXPORT_DATA_FILM_GRAIN)) {
>> +    if (h->sei.film_grain_characteristics.present) {
>>           H264SEIFilmGrainCharacteristics *fgc = &h->sei.film_grain_characteristics;
>>           AVFilmGrainParams *fgp = av_film_grain_params_create_side_data(out);
>>           if (!fgp)
>> diff --git a/libavcodec/h264dec.c b/libavcodec/h264dec.c
>> index dc99ee995e..a64c93eca4 100644
>> --- a/libavcodec/h264dec.c
>> +++ b/libavcodec/h264dec.c
>> @@ -275,9 +275,22 @@ int ff_h264_slice_context_init(H264Context *h, H264SliceContext *sl)
>>       return 0;
>>   }
>>
>> +static int h264_init_pic(H264Picture *pic)
>> +{
>> +    pic->f = av_frame_alloc();
>> +    if (!pic->f)
>> +        return AVERROR(ENOMEM);
>> +
>> +    pic->f_grain = av_frame_alloc();
>> +    if (!pic->f_grain)
>> +        return AVERROR(ENOMEM);
>> +
>> +    return 0;
>> +}
>> +
>>   static int h264_init_context(AVCodecContext *avctx, H264Context *h)
>>   {
>> -    int i;
>> +    int i, ret;
>>
>>       h->avctx                 = avctx;
>>       h->cur_chroma_format_idc = -1;
>> @@ -308,18 +321,15 @@ static int h264_init_context(AVCodecContext *avctx, H264Context *h)
>>       }
>>
>>       for (i = 0; i < H264_MAX_PICTURE_COUNT; i++) {
>> -        h->DPB[i].f = av_frame_alloc();
>> -        if (!h->DPB[i].f)
>> -            return AVERROR(ENOMEM);
>> +        if ((ret = h264_init_pic(&h->DPB[i])) < 0)
>> +            return ret;
>>       }
>>
>> -    h->cur_pic.f = av_frame_alloc();
>> -    if (!h->cur_pic.f)
>> -        return AVERROR(ENOMEM);
>> +    if ((ret = h264_init_pic(&h->cur_pic)) < 0)
>> +        return ret;
>>
>> -    h->last_pic_for_ec.f = av_frame_alloc();
>> -    if (!h->last_pic_for_ec.f)
>> -        return AVERROR(ENOMEM);
>> +    if ((ret = h264_init_pic(&h->last_pic_for_ec)) < 0)
>> +        return ret;
>>
>>       for (i = 0; i < h->nb_slice_ctx; i++)
>>           h->slice_ctx[i].h264 = h;
>> @@ -327,6 +337,13 @@ static int h264_init_context(AVCodecContext *avctx, H264Context *h)
>>       return 0;
>>   }
>>
>> +static void h264_free_pic(H264Context *h, H264Picture *pic)
>> +{
>> +    ff_h264_unref_picture(h, pic);
>> +    av_frame_free(&pic->f);
>> +    av_frame_free(&pic->f_grain);
>> +}
>> +
>>   static av_cold int h264_decode_end(AVCodecContext *avctx)
>>   {
>>       H264Context *h = avctx->priv_data;
>> @@ -336,8 +353,7 @@ static av_cold int h264_decode_end(AVCodecContext *avctx)
>>       ff_h264_free_tables(h);
>>
>>       for (i = 0; i < H264_MAX_PICTURE_COUNT; i++) {
>> -        ff_h264_unref_picture(h, &h->DPB[i]);
>> -        av_frame_free(&h->DPB[i].f);
>> +        h264_free_pic(h, &h->DPB[i]);
>>       }
>>       memset(h->delayed_pic, 0, sizeof(h->delayed_pic));
>>
>> @@ -351,10 +367,8 @@ static av_cold int h264_decode_end(AVCodecContext *avctx)
>>
>>       ff_h2645_packet_uninit(&h->pkt);
>>
>> -    ff_h264_unref_picture(h, &h->cur_pic);
>> -    av_frame_free(&h->cur_pic.f);
>> -    ff_h264_unref_picture(h, &h->last_pic_for_ec);
>> -    av_frame_free(&h->last_pic_for_ec.f);
>> +    h264_free_pic(h, &h->cur_pic);
>> +    h264_free_pic(h, &h->last_pic_for_ec);
>>
>>       return 0;
>>   }
>> @@ -837,13 +851,15 @@ static int h264_export_enc_params(AVFrame *f, H264Picture *p)
>>
>>   static int output_frame(H264Context *h, AVFrame *dst, H264Picture *srcp)
>>   {
>> -    AVFrame *src = srcp->f;
>>       int ret;
>>
>> -    ret = av_frame_ref(dst, src);
>> +    ret = av_frame_ref(dst, srcp->needs_fg ? srcp->f_grain : srcp->f);
>>       if (ret < 0)
>>           return ret;
>>
>> +    if (srcp->needs_fg && (ret = av_frame_copy_props(dst, srcp->f)) < 0)
>> +        return ret;
>> +
>>       av_dict_set(&dst->metadata, "stereo_mode", ff_h264_sei_stereo_mode(&h->sei.frame_packing), 0);
>>
>>       if (srcp->sei_recovery_frame_cnt == 0)
>> @@ -855,6 +871,9 @@ static int output_frame(H264Context *h, AVFrame *dst, H264Picture *srcp)
>>               goto fail;
>>       }
>>
>> +    if (!(h->avctx->export_side_data & AV_CODEC_EXPORT_DATA_FILM_GRAIN))
>> +        av_frame_remove_side_data(dst, AV_FRAME_DATA_FILM_GRAIN_PARAMS);
>> +
>>       return 0;
>>   fail:
>>       av_frame_unref(dst);
>> diff --git a/libavcodec/h264dec.h b/libavcodec/h264dec.h
>> index 7c419de051..87c4e4e539 100644
>> --- a/libavcodec/h264dec.h
>> +++ b/libavcodec/h264dec.h
>> @@ -43,6 +43,7 @@
>>   #include "h264dsp.h"
>>   #include "h264pred.h"
>>   #include "h264qpel.h"
>> +#include "h274.h"
>>   #include "internal.h"
>>   #include "mpegutils.h"
>>   #include "parser.h"
>> @@ -130,6 +131,9 @@ typedef struct H264Picture {
>>       AVFrame *f;
>>       ThreadFrame tf;
>>
>> +    AVFrame *f_grain;
>> +    ThreadFrame tf_grain;
>> +
>>       AVBufferRef *qscale_table_buf;
>>       int8_t *qscale_table;
>>
>> @@ -162,6 +166,7 @@ typedef struct H264Picture {
>>       int recovered;          ///< picture at IDR or recovery point + recovery count
>>       int invalid_gap;
>>       int sei_recovery_frame_cnt;
>> +    int needs_fg;           ///< whether picture needs film grain synthesis (see `f_grain`)
>>
>>       AVBufferRef *pps_buf;
>>       const PPS   *pps;
>> @@ -349,6 +354,7 @@ typedef struct H264Context {
>>       H264DSPContext h264dsp;
>>       H264ChromaContext h264chroma;
>>       H264QpelContext h264qpel;
>> +    H274FilmGrainDatabase h274db;
>>
>>       H264Picture DPB[H264_MAX_PICTURE_COUNT];
>>       H264Picture *cur_pic_ptr;
>> --
>> 2.32.0
>>
> 
> Sorry, replied to v2 first...
> 
> This causes regression reported here https://trac.ffmpeg.org/ticket/9389

Should be fixed.
Michael Niedermayer Sept. 13, 2021, 9:38 p.m. UTC | #4
On Tue, Aug 17, 2021 at 09:54:56PM +0200, Niklas Haas wrote:
> From: Niklas Haas <git@haasn.dev>
> 
> Because we need access to ref frames without film grain applied, we have
> to add an extra AVFrame to H264Picture to avoid messing with the
> original. This requires some amount of overhead to make the reference
> moves work out, but it allows us to benefit from frame multithreading
> for film grain application "for free".
> 
> Unfortunately, this approach requires twice as much RAM to be constantly
> allocated for ref frames, due to the need for an extra buffer per
> H264Picture. In theory, we could get away with freeing up this memory as
> soon as it's no longer needed (since ref frames do not need film grain
> buffers any longer), but trying to call ff_thread_release_buffer() from
> output_frame() conflicts with possible later accesses to that same frame
> and I'm not sure how to synchronize that well.
> 
> Tested on all three cases of (no fg), (fg present but exported) and (fg
> present and not exported), with and without threading.
> 
> Signed-off-by: Niklas Haas <git@haasn.dev>
> ---
>  libavcodec/h264_picture.c | 35 +++++++++++++++++++++++--
>  libavcodec/h264_slice.c   | 16 ++++++++++--
>  libavcodec/h264dec.c      | 55 ++++++++++++++++++++++++++-------------
>  libavcodec/h264dec.h      |  6 +++++
>  4 files changed, 90 insertions(+), 22 deletions(-)

This causes aborts (tested latest git master as of this mail)

Assertion sd failed at libavcodec/h264_picture.c:255
==26500== ERROR: libFuzzer: deadly signal
    #0 0x49f9d1 in __sanitizer_print_stack_trace /b/swarming/w/ir/cache/builder/src/third_party/llvm/compiler-rt/lib/asan/asan_stack.cc:86:3
    #1 0x18c45de in fuzzer::Fuzzer::CrashCallback() /home/michael/libfuzzer-2017-10-newclang/Fuzzer/build/../FuzzerLoop.cpp:196:5
    #2 0x18c45ad in fuzzer::Fuzzer::StaticCrashSignalCallback() /home/michael/libfuzzer-2017-10-newclang/Fuzzer/build/../FuzzerLoop.cpp:175:6
    #3 0x7f200a40997f  (/lib/x86_64-linux-gnu/libpthread.so.0+0x1297f)
    #4 0x7f2008b31fb6 in __libc_signal_restore_set /build/glibc-S9d2JN/glibc-2.27/signal/../sysdeps/unix/sysv/linux/nptl-signals.h:80
    #5 0x7f2008b31fb6 in gsignal /build/glibc-S9d2JN/glibc-2.27/signal/../sysdeps/unix/sysv/linux/raise.c:48
    #6 0x7f2008b33920 in abort /build/glibc-S9d2JN/glibc-2.27/stdlib/abort.c:79
    #7 0xafcd86 in ff_h264_field_end /home/michael/ffmpeg-git/ffmpeg/libavcodec/h264_picture.c:255:9
    #8 0x5514da in h264_decode_frame /home/michael/ffmpeg-git/ffmpeg/libavcodec/h264dec.c:1041:20
    #9 0x523712 in decode_simple_internal /home/michael/ffmpeg-git/ffmpeg/libavcodec/decode.c:326:15
    #10 0x502a21 in decode_simple_receive_frame /home/michael/ffmpeg-git/ffmpeg/libavcodec/decode.c:517:15
    #11 0x502a21 in decode_receive_frame_internal /home/michael/ffmpeg-git/ffmpeg/libavcodec/decode.c:537
    #12 0x50204f in avcodec_send_packet /home/michael/ffmpeg-git/ffmpeg/libavcodec/decode.c:604:15
    #13 0x4cc093 in LLVMFuzzerTestOneInput /home/michael/ffmpeg-git/ffmpeg/tools/target_dec_fuzzer.c:387:25
    #14 0x18c55cd in fuzzer::Fuzzer::ExecuteCallback(unsigned char const*, unsigned long) /home/michael/libfuzzer-2017-10-newclang/Fuzzer/build/../FuzzerLoop.cpp:495:13
    #15 0x18ba1a2 in fuzzer::RunOneTest(fuzzer::Fuzzer*, char const*, unsigned long) /home/michael/libfuzzer-2017-10-newclang/Fuzzer/build/../FuzzerDriver.cpp:273:6
    #16 0x18bf3a1 in fuzzer::FuzzerDriver(int*, char***, int (*)(unsigned char const*, unsigned long)) /home/michael/libfuzzer-2017-10-newclang/Fuzzer/build/../FuzzerDriver.cpp:690:9
    #17 0x18b9e80 in main /home/michael/libfuzzer-2017-10-newclang/Fuzzer/build/../FuzzerMain.cpp:20:10
    #18 0x7f2008b14bf6 in __libc_start_main /build/glibc-S9d2JN/glibc-2.27/csu/../csu/libc-start.c:310
    #19 0x41f129 in _start (/home/michael/ffmpeg-git/ffmpeg/tools/target_dec_h264_fuzzer+0x41f129)

[...]
diff mbox series

Patch

diff --git a/libavcodec/h264_picture.c b/libavcodec/h264_picture.c
index ff30166b4d..5944798394 100644
--- a/libavcodec/h264_picture.c
+++ b/libavcodec/h264_picture.c
@@ -43,13 +43,14 @@ 
 
 void ff_h264_unref_picture(H264Context *h, H264Picture *pic)
 {
-    int off = offsetof(H264Picture, tf) + sizeof(pic->tf);
+    int off = offsetof(H264Picture, tf_grain) + sizeof(pic->tf_grain);
     int i;
 
     if (!pic->f || !pic->f->buf[0])
         return;
 
     ff_thread_release_buffer(h->avctx, &pic->tf);
+    ff_thread_release_buffer(h->avctx, &pic->tf_grain);
     av_buffer_unref(&pic->hwaccel_priv_buf);
 
     av_buffer_unref(&pic->qscale_table_buf);
@@ -93,6 +94,7 @@  static void h264_copy_picture_params(H264Picture *dst, const H264Picture *src)
     dst->mb_width      = src->mb_width;
     dst->mb_height     = src->mb_height;
     dst->mb_stride     = src->mb_stride;
+    dst->needs_fg      = src->needs_fg;
 }
 
 int ff_h264_ref_picture(H264Context *h, H264Picture *dst, H264Picture *src)
@@ -108,6 +110,14 @@  int ff_h264_ref_picture(H264Context *h, H264Picture *dst, H264Picture *src)
     if (ret < 0)
         goto fail;
 
+    if (src->needs_fg) {
+        av_assert0(src->tf_grain.f == src->f_grain);
+        dst->tf_grain.f = dst->f_grain;
+        ret = ff_thread_ref_frame(&dst->tf_grain, &src->tf_grain);
+        if (ret < 0)
+            goto fail;
+    }
+
     dst->qscale_table_buf = av_buffer_ref(src->qscale_table_buf);
     dst->mb_type_buf      = av_buffer_ref(src->mb_type_buf);
     dst->pps_buf          = av_buffer_ref(src->pps_buf);
@@ -159,6 +169,15 @@  int ff_h264_replace_picture(H264Context *h, H264Picture *dst, const H264Picture
     if (ret < 0)
         goto fail;
 
+    if (src->needs_fg) {
+        av_assert0(src->tf_grain.f == src->f_grain);
+        dst->tf_grain.f = dst->f_grain;
+        ff_thread_release_buffer(h->avctx, &dst->tf_grain);
+        ret = ff_thread_ref_frame(&dst->tf_grain, &src->tf_grain);
+        if (ret < 0)
+            goto fail;
+    }
+
     ret  = av_buffer_replace(&dst->qscale_table_buf, src->qscale_table_buf);
     ret |= av_buffer_replace(&dst->mb_type_buf, src->mb_type_buf);
     ret |= av_buffer_replace(&dst->pps_buf, src->pps_buf);
@@ -212,6 +231,7 @@  void ff_h264_set_erpic(ERPicture *dst, H264Picture *src)
 int ff_h264_field_end(H264Context *h, H264SliceContext *sl, int in_setup)
 {
     AVCodecContext *const avctx = h->avctx;
+    H264Picture *cur = h->cur_pic_ptr;
     int err = 0;
     h->mb_y = 0;
 
@@ -230,10 +250,21 @@  int ff_h264_field_end(H264Context *h, H264SliceContext *sl, int in_setup)
         if (err < 0)
             av_log(avctx, AV_LOG_ERROR,
                    "hardware accelerator failed to decode picture\n");
+    } else if (!in_setup && cur->needs_fg) {
+        AVFrameSideData *sd = av_frame_get_side_data(cur->f, AV_FRAME_DATA_FILM_GRAIN_PARAMS);
+        av_assert0(sd); // always present if `cur->needs_fg`
+        err = ff_h274_apply_film_grain(cur->f_grain, cur->f, &h->h274db,
+                                       (AVFilmGrainParams *) sd->data);
+        if (err < 0) {
+            av_log(h->avctx, AV_LOG_WARNING, "Failed synthesizing film "
+                   "grain, ignoring: %s\n", av_err2str(err));
+            cur->needs_fg = 0;
+            err = 0;
+        }
     }
 
     if (!in_setup && !h->droppable)
-        ff_thread_report_progress(&h->cur_pic_ptr->tf, INT_MAX,
+        ff_thread_report_progress(&cur->tf, INT_MAX,
                                   h->picture_structure == PICT_BOTTOM_FIELD);
     emms_c();
 
diff --git a/libavcodec/h264_slice.c b/libavcodec/h264_slice.c
index 9244d2d5dd..98ca8836db 100644
--- a/libavcodec/h264_slice.c
+++ b/libavcodec/h264_slice.c
@@ -197,6 +197,16 @@  static int alloc_picture(H264Context *h, H264Picture *pic)
     if (ret < 0)
         goto fail;
 
+    if (pic->needs_fg) {
+        pic->tf_grain.f = pic->f_grain;
+        pic->f_grain->format = pic->f->format;
+        pic->f_grain->width = pic->f->width;
+        pic->f_grain->height = pic->f->height;
+        ret = ff_thread_get_buffer(h->avctx, &pic->tf_grain, 0);
+        if (ret < 0)
+            goto fail;
+    }
+
     if (h->avctx->hwaccel) {
         const AVHWAccel *hwaccel = h->avctx->hwaccel;
         av_assert0(!pic->hwaccel_picture_private);
@@ -517,6 +527,9 @@  static int h264_frame_start(H264Context *h)
     pic->f->crop_top    = h->crop_top;
     pic->f->crop_bottom = h->crop_bottom;
 
+    pic->needs_fg = h->sei.film_grain_characteristics.present &&
+        !(h->avctx->export_side_data & AV_CODEC_EXPORT_DATA_FILM_GRAIN);
+
     if ((ret = alloc_picture(h, pic)) < 0)
         return ret;
 
@@ -1328,8 +1341,7 @@  static int h264_export_frame_props(H264Context *h)
     }
     h->sei.unregistered.nb_buf_ref = 0;
 
-    if (h->sei.film_grain_characteristics.present &&
-        (h->avctx->export_side_data & AV_CODEC_EXPORT_DATA_FILM_GRAIN)) {
+    if (h->sei.film_grain_characteristics.present) {
         H264SEIFilmGrainCharacteristics *fgc = &h->sei.film_grain_characteristics;
         AVFilmGrainParams *fgp = av_film_grain_params_create_side_data(out);
         if (!fgp)
diff --git a/libavcodec/h264dec.c b/libavcodec/h264dec.c
index dc99ee995e..a64c93eca4 100644
--- a/libavcodec/h264dec.c
+++ b/libavcodec/h264dec.c
@@ -275,9 +275,22 @@  int ff_h264_slice_context_init(H264Context *h, H264SliceContext *sl)
     return 0;
 }
 
+static int h264_init_pic(H264Picture *pic)
+{
+    pic->f = av_frame_alloc();
+    if (!pic->f)
+        return AVERROR(ENOMEM);
+
+    pic->f_grain = av_frame_alloc();
+    if (!pic->f_grain)
+        return AVERROR(ENOMEM);
+
+    return 0;
+}
+
 static int h264_init_context(AVCodecContext *avctx, H264Context *h)
 {
-    int i;
+    int i, ret;
 
     h->avctx                 = avctx;
     h->cur_chroma_format_idc = -1;
@@ -308,18 +321,15 @@  static int h264_init_context(AVCodecContext *avctx, H264Context *h)
     }
 
     for (i = 0; i < H264_MAX_PICTURE_COUNT; i++) {
-        h->DPB[i].f = av_frame_alloc();
-        if (!h->DPB[i].f)
-            return AVERROR(ENOMEM);
+        if ((ret = h264_init_pic(&h->DPB[i])) < 0)
+            return ret;
     }
 
-    h->cur_pic.f = av_frame_alloc();
-    if (!h->cur_pic.f)
-        return AVERROR(ENOMEM);
+    if ((ret = h264_init_pic(&h->cur_pic)) < 0)
+        return ret;
 
-    h->last_pic_for_ec.f = av_frame_alloc();
-    if (!h->last_pic_for_ec.f)
-        return AVERROR(ENOMEM);
+    if ((ret = h264_init_pic(&h->last_pic_for_ec)) < 0)
+        return ret;
 
     for (i = 0; i < h->nb_slice_ctx; i++)
         h->slice_ctx[i].h264 = h;
@@ -327,6 +337,13 @@  static int h264_init_context(AVCodecContext *avctx, H264Context *h)
     return 0;
 }
 
+static void h264_free_pic(H264Context *h, H264Picture *pic)
+{
+    ff_h264_unref_picture(h, pic);
+    av_frame_free(&pic->f);
+    av_frame_free(&pic->f_grain);
+}
+
 static av_cold int h264_decode_end(AVCodecContext *avctx)
 {
     H264Context *h = avctx->priv_data;
@@ -336,8 +353,7 @@  static av_cold int h264_decode_end(AVCodecContext *avctx)
     ff_h264_free_tables(h);
 
     for (i = 0; i < H264_MAX_PICTURE_COUNT; i++) {
-        ff_h264_unref_picture(h, &h->DPB[i]);
-        av_frame_free(&h->DPB[i].f);
+        h264_free_pic(h, &h->DPB[i]);
     }
     memset(h->delayed_pic, 0, sizeof(h->delayed_pic));
 
@@ -351,10 +367,8 @@  static av_cold int h264_decode_end(AVCodecContext *avctx)
 
     ff_h2645_packet_uninit(&h->pkt);
 
-    ff_h264_unref_picture(h, &h->cur_pic);
-    av_frame_free(&h->cur_pic.f);
-    ff_h264_unref_picture(h, &h->last_pic_for_ec);
-    av_frame_free(&h->last_pic_for_ec.f);
+    h264_free_pic(h, &h->cur_pic);
+    h264_free_pic(h, &h->last_pic_for_ec);
 
     return 0;
 }
@@ -837,13 +851,15 @@  static int h264_export_enc_params(AVFrame *f, H264Picture *p)
 
 static int output_frame(H264Context *h, AVFrame *dst, H264Picture *srcp)
 {
-    AVFrame *src = srcp->f;
     int ret;
 
-    ret = av_frame_ref(dst, src);
+    ret = av_frame_ref(dst, srcp->needs_fg ? srcp->f_grain : srcp->f);
     if (ret < 0)
         return ret;
 
+    if (srcp->needs_fg && (ret = av_frame_copy_props(dst, srcp->f)) < 0)
+        return ret;
+
     av_dict_set(&dst->metadata, "stereo_mode", ff_h264_sei_stereo_mode(&h->sei.frame_packing), 0);
 
     if (srcp->sei_recovery_frame_cnt == 0)
@@ -855,6 +871,9 @@  static int output_frame(H264Context *h, AVFrame *dst, H264Picture *srcp)
             goto fail;
     }
 
+    if (!(h->avctx->export_side_data & AV_CODEC_EXPORT_DATA_FILM_GRAIN))
+        av_frame_remove_side_data(dst, AV_FRAME_DATA_FILM_GRAIN_PARAMS);
+
     return 0;
 fail:
     av_frame_unref(dst);
diff --git a/libavcodec/h264dec.h b/libavcodec/h264dec.h
index 7c419de051..87c4e4e539 100644
--- a/libavcodec/h264dec.h
+++ b/libavcodec/h264dec.h
@@ -43,6 +43,7 @@ 
 #include "h264dsp.h"
 #include "h264pred.h"
 #include "h264qpel.h"
+#include "h274.h"
 #include "internal.h"
 #include "mpegutils.h"
 #include "parser.h"
@@ -130,6 +131,9 @@  typedef struct H264Picture {
     AVFrame *f;
     ThreadFrame tf;
 
+    AVFrame *f_grain;
+    ThreadFrame tf_grain;
+
     AVBufferRef *qscale_table_buf;
     int8_t *qscale_table;
 
@@ -162,6 +166,7 @@  typedef struct H264Picture {
     int recovered;          ///< picture at IDR or recovery point + recovery count
     int invalid_gap;
     int sei_recovery_frame_cnt;
+    int needs_fg;           ///< whether picture needs film grain synthesis (see `f_grain`)
 
     AVBufferRef *pps_buf;
     const PPS   *pps;
@@ -349,6 +354,7 @@  typedef struct H264Context {
     H264DSPContext h264dsp;
     H264ChromaContext h264chroma;
     H264QpelContext h264qpel;
+    H274FilmGrainDatabase h274db;
 
     H264Picture DPB[H264_MAX_PICTURE_COUNT];
     H264Picture *cur_pic_ptr;