diff mbox series

[FFmpeg-devel,4/4] avcodec/h264dec: apply H.274 film grain

Message ID 20210814113620.39290-4-ffmpeg@haasn.xyz
State New
Headers show
Series [FFmpeg-devel,1/4] avcodec/h264_sei: fix H.274 film grain parsing | expand

Checks

Context Check Description
andriy/x86_make success Make finished
andriy/x86_make_fate success Make fate finished
andriy/PPC64_make success Make finished
andriy/PPC64_make_fate success Make fate finished

Commit Message

Niklas Haas Aug. 14, 2021, 11:36 a.m. UTC
From: Niklas Haas <git@haasn.dev>

Because we need access to ref frames without film grain applied, we have
to add an extra AVFrame to H264Picture to avoid messing with the
original. This requires some amount of overhead to make the reference
moves work out, but it allows us to benefit from frame multithreading
for film grain application "for free".

Unfortunately, this approach requires twice as much RAM to be constantly
allocated, due to the need for an extra buffer per H264Picture. In
theory, we could get away with freeing up this memory as soon as it's no
longer needed, but trying to call ff_thread_release_buffer() from
output_frame() simply deadlocks the decoder and I haven't figured out
why. Patches welcome(tm)

Tested on all three cases of (no fg), (fg present but exported) and (fg
present and not exported), with and without threading.
---
 libavcodec/h264_picture.c | 24 +++++++++++++++++++-
 libavcodec/h264_slice.c   | 18 +++++++++++++--
 libavcodec/h264dec.c      | 48 +++++++++++++++++++++++++++++++--------
 libavcodec/h264dec.h      |  6 +++++
 4 files changed, 83 insertions(+), 13 deletions(-)

Comments

James Almer Aug. 14, 2021, 1:07 p.m. UTC | #1
On 8/14/2021 8:36 AM, Niklas Haas wrote:
> From: Niklas Haas <git@haasn.dev>
> 
> Because we need access to ref frames without film grain applied, we have
> to add an extra AVFrame to H264Picture to avoid messing with the
> original. This requires some amount of overhead to make the reference
> moves work out, but it allows us to benefit from frame multithreading
> for film grain application "for free".
> 
> Unfortunately, this approach requires twice as much RAM to be constantly
> allocated, due to the need for an extra buffer per H264Picture. In
> theory, we could get away with freeing up this memory as soon as it's no
> longer needed, but trying to call ff_thread_release_buffer() from
> output_frame() simply deadlocks the decoder and I haven't figured out
> why. Patches welcome(tm)
> 
> Tested on all three cases of (no fg), (fg present but exported) and (fg
> present and not exported), with and without threading.
> ---
>   libavcodec/h264_picture.c | 24 +++++++++++++++++++-
>   libavcodec/h264_slice.c   | 18 +++++++++++++--
>   libavcodec/h264dec.c      | 48 +++++++++++++++++++++++++++++++--------
>   libavcodec/h264dec.h      |  6 +++++
>   4 files changed, 83 insertions(+), 13 deletions(-)
> 
> diff --git a/libavcodec/h264_picture.c b/libavcodec/h264_picture.c
> index b79944b794..8284d12158 100644
> --- a/libavcodec/h264_picture.c
> +++ b/libavcodec/h264_picture.c
> @@ -44,7 +44,7 @@
>   
>   void ff_h264_unref_picture(H264Context *h, H264Picture *pic)
>   {
> -    int off = offsetof(H264Picture, tf) + sizeof(pic->tf);
> +    int off = offsetof(H264Picture, tf_grain) + sizeof(pic->tf_grain);
>       int i;
>   
>       if (!pic->f || !pic->f->buf[0])
> @@ -61,6 +61,9 @@ void ff_h264_unref_picture(H264Context *h, H264Picture *pic)
>           av_buffer_unref(&pic->ref_index_buf[i]);
>       }
>   
> +    if (pic->needs_fg)
> +        ff_thread_release_buffer(h->avctx, &pic->tf_grain);
> +
>       memset((uint8_t*)pic + off, 0, sizeof(*pic) - off);
>   }
>   
> @@ -109,6 +112,15 @@ int ff_h264_ref_picture(H264Context *h, H264Picture *dst, H264Picture *src)
>       if (ret < 0)
>           goto fail;
>   
> +    if (src->needs_fg) {
> +        av_assert0(src->tf_grain.f == src->f_grain);
> +        dst->tf_grain.f = dst->f_grain;
> +        ret = ff_thread_ref_frame(&dst->tf_grain, &src->tf_grain);
> +        if (ret < 0)
> +            goto fail;
> +        dst->needs_fg = 1;
> +    }
> +
>       dst->qscale_table_buf = av_buffer_ref(src->qscale_table_buf);
>       dst->mb_type_buf      = av_buffer_ref(src->mb_type_buf);
>       dst->pps_buf          = av_buffer_ref(src->pps_buf);
> @@ -160,6 +172,16 @@ int ff_h264_replace_picture(H264Context *h, H264Picture *dst, const H264Picture
>       if (ret < 0)
>           goto fail;
>   
> +    if (src->needs_fg) {
> +        av_assert0(src->tf_grain.f == src->f_grain);
> +        dst->tf_grain.f = dst->f_grain;
> +        ff_thread_release_buffer(h->avctx, &dst->tf_grain);
> +        ret = ff_thread_ref_frame(&dst->tf_grain, &src->tf_grain);
> +        if (ret < 0)
> +            goto fail;
> +        dst->needs_fg = 1;
> +    }
> +
>       ret  = av_buffer_replace(&dst->qscale_table_buf, src->qscale_table_buf);
>       ret |= av_buffer_replace(&dst->mb_type_buf, src->mb_type_buf);
>       ret |= av_buffer_replace(&dst->pps_buf, src->pps_buf);
> diff --git a/libavcodec/h264_slice.c b/libavcodec/h264_slice.c
> index 65d0259a71..2b85f807df 100644
> --- a/libavcodec/h264_slice.c
> +++ b/libavcodec/h264_slice.c
> @@ -197,6 +197,21 @@ static int alloc_picture(H264Context *h, H264Picture *pic)
>       if (ret < 0)
>           goto fail;
>   
> +    if (h->sei.film_grain_characteristics.present &&
> +        !(h->avctx->export_side_data & AV_CODEC_EXPORT_DATA_FILM_GRAIN))
> +    {
> +        // Extra buffer required for film grain synthesis in the decoder
> +        pic->tf_grain.f = pic->f_grain;
> +        pic->f_grain->format = pic->f->format;
> +        pic->f_grain->width = pic->f->width;
> +        pic->f_grain->height = pic->f->height;
> +        ret = ff_thread_get_buffer(h->avctx, &pic->tf_grain, 0);
> +        if (ret < 0)
> +            goto fail;
> +        pic->needs_fg = 1;
> +    }
> +
> +
>       if (h->avctx->hwaccel) {
>           const AVHWAccel *hwaccel = h->avctx->hwaccel;
>           av_assert0(!pic->hwaccel_picture_private);
> @@ -1329,8 +1344,7 @@ static int h264_export_frame_props(H264Context *h)
>       }
>       h->sei.unregistered.nb_buf_ref = 0;
>   
> -    if (h->sei.film_grain_characteristics.present &&
> -        (h->avctx->export_side_data & AV_CODEC_EXPORT_DATA_FILM_GRAIN)) {
> +    if (h->sei.film_grain_characteristics.present) {
>           H264SEIFilmGrainCharacteristics *fgc = &h->sei.film_grain_characteristics;
>           AVFilmGrainParams *fgp = av_film_grain_params_create_side_data(out);
>           if (!fgp)
> diff --git a/libavcodec/h264dec.c b/libavcodec/h264dec.c
> index 38f8967265..d5878b8a13 100644
> --- a/libavcodec/h264dec.c
> +++ b/libavcodec/h264dec.c
> @@ -275,9 +275,22 @@ int ff_h264_slice_context_init(H264Context *h, H264SliceContext *sl)
>       return 0;
>   }
>   
> +static int h264_init_pic(H264Picture *pic)
> +{
> +    pic->f = av_frame_alloc();
> +    if (!pic->f)
> +        return AVERROR(ENOMEM);
> +
> +    pic->f_grain = av_frame_alloc();
> +    if (!pic->f_grain)
> +        return AVERROR(ENOMEM);
> +
> +    return 0;
> +}
> +
>   static int h264_init_context(AVCodecContext *avctx, H264Context *h)
>   {
> -    int i;
> +    int i, ret;
>   
>       h->avctx                 = avctx;
>       h->cur_chroma_format_idc = -1;
> @@ -308,18 +321,15 @@ static int h264_init_context(AVCodecContext *avctx, H264Context *h)
>       }
>   
>       for (i = 0; i < H264_MAX_PICTURE_COUNT; i++) {
> -        h->DPB[i].f = av_frame_alloc();
> -        if (!h->DPB[i].f)
> -            return AVERROR(ENOMEM);
> +        if ((ret = h264_init_pic(&h->DPB[i])) < 0)
> +            return ret;
>       }
>   
> -    h->cur_pic.f = av_frame_alloc();
> -    if (!h->cur_pic.f)
> -        return AVERROR(ENOMEM);
> +    if ((ret = h264_init_pic(&h->cur_pic)) < 0)
> +        return ret;
>   
> -    h->last_pic_for_ec.f = av_frame_alloc();
> -    if (!h->last_pic_for_ec.f)
> -        return AVERROR(ENOMEM);
> +    if ((ret = h264_init_pic(&h->last_pic_for_ec)) < 0)
> +        return ret;
>   
>       for (i = 0; i < h->nb_slice_ctx; i++)
>           h->slice_ctx[i].h264 = h;
> @@ -826,6 +836,21 @@ static int output_frame(H264Context *h, AVFrame *dst, H264Picture *srcp)
>       AVFrame *src = srcp->f;
>       int ret;
>   
> +    if (srcp->needs_fg) {
> +        AVFrameSideData *sd = av_frame_get_side_data(src, AV_FRAME_DATA_FILM_GRAIN_PARAMS);
> +        av_assert0(sd);
> +        ret = ff_h274_apply_film_grain(srcp->f_grain, src, &h->h274db,
> +                                       (AVFilmGrainParams *) sd->data);
> +        if (!ret) {
> +            if ((ret = av_frame_copy_props(srcp->f_grain, src)) < 0)

TSAN reports a race here. update_context_from_thread() from the next 
thread is already running at this point as is trying to sync the 
H264Pictures, so you're replacing fields in srcp->f_grain at the same 
time they are being read to be copied onto another thread's context.

Since this threads owns dst, you'd can copy the props directly to it 
instead.

> +                return ret;
> +            src = srcp->f_grain;
> +        } else {
> +            av_log(h->avctx, AV_LOG_WARNING, "Failed synthesizing film "
> +                   "grain, ignoring: %s\n", av_err2str(ret));
> +        }
> +    }
> +
>       ret = av_frame_ref(dst, src);
>       if (ret < 0)
>           return ret;
> @@ -841,6 +866,9 @@ static int output_frame(H264Context *h, AVFrame *dst, H264Picture *srcp)
>               goto fail;
>       }
>   
> +    if (!(h->avctx->export_side_data & AV_CODEC_EXPORT_DATA_FILM_GRAIN))
> +        av_frame_remove_side_data(dst, AV_FRAME_DATA_FILM_GRAIN_PARAMS);
> +
>       return 0;
>   fail:
>       av_frame_unref(dst);
> diff --git a/libavcodec/h264dec.h b/libavcodec/h264dec.h
> index 7c419de051..7d11d81f99 100644
> --- a/libavcodec/h264dec.h
> +++ b/libavcodec/h264dec.h
> @@ -43,6 +43,7 @@
>   #include "h264dsp.h"
>   #include "h264pred.h"
>   #include "h264qpel.h"
> +#include "h274.h"
>   #include "internal.h"
>   #include "mpegutils.h"
>   #include "parser.h"
> @@ -130,6 +131,9 @@ typedef struct H264Picture {
>       AVFrame *f;
>       ThreadFrame tf;
>   
> +    AVFrame *f_grain;   ///< extra buffer for film grain synthesis
> +    ThreadFrame tf_grain;
> +
>       AVBufferRef *qscale_table_buf;
>       int8_t *qscale_table;
>   
> @@ -162,6 +166,7 @@ typedef struct H264Picture {
>       int recovered;          ///< picture at IDR or recovery point + recovery count
>       int invalid_gap;
>       int sei_recovery_frame_cnt;
> +    int needs_fg;           ///< whether picture needs film grain synthesis (see `f_grain`)
>   
>       AVBufferRef *pps_buf;
>       const PPS   *pps;
> @@ -349,6 +354,7 @@ typedef struct H264Context {
>       H264DSPContext h264dsp;
>       H264ChromaContext h264chroma;
>       H264QpelContext h264qpel;
> +    H274FilmGrainDatabase h274db;
>   
>       H264Picture DPB[H264_MAX_PICTURE_COUNT];
>       H264Picture *cur_pic_ptr;
>
Michael Niedermayer Aug. 15, 2021, 5:11 p.m. UTC | #2
On Sat, Aug 14, 2021 at 01:36:20PM +0200, Niklas Haas wrote:
> From: Niklas Haas <git@haasn.dev>
> 
> Because we need access to ref frames without film grain applied, we have
> to add an extra AVFrame to H264Picture to avoid messing with the
> original. This requires some amount of overhead to make the reference
> moves work out, but it allows us to benefit from frame multithreading
> for film grain application "for free".
> 
> Unfortunately, this approach requires twice as much RAM to be constantly
> allocated, due to the need for an extra buffer per H264Picture. In
> theory, we could get away with freeing up this memory as soon as it's no
> longer needed, but trying to call ff_thread_release_buffer() from
> output_frame() simply deadlocks the decoder and I haven't figured out
> why. Patches welcome(tm)
> 
> Tested on all three cases of (no fg), (fg present but exported) and (fg
> present and not exported), with and without threading.
> ---
>  libavcodec/h264_picture.c | 24 +++++++++++++++++++-
>  libavcodec/h264_slice.c   | 18 +++++++++++++--
>  libavcodec/h264dec.c      | 48 +++++++++++++++++++++++++++++++--------
>  libavcodec/h264dec.h      |  6 +++++
>  4 files changed, 83 insertions(+), 13 deletions(-)

[...]
> @@ -826,6 +836,21 @@ static int output_frame(H264Context *h, AVFrame *dst, H264Picture *srcp)
>      AVFrame *src = srcp->f;
>      int ret;
>  
> +    if (srcp->needs_fg) {

> +        AVFrameSideData *sd = av_frame_get_side_data(src, AV_FRAME_DATA_FILM_GRAIN_PARAMS);
> +        av_assert0(sd);

Assertion is not correct to check for failure

This would kill the process with the lib and app


[...]
Niklas Haas Aug. 17, 2021, 7:24 p.m. UTC | #3
On Sun, 15 Aug 2021 19:11:42 +0200 Michael Niedermayer <michael@niedermayer.cc> wrote:
> On Sat, Aug 14, 2021 at 01:36:20PM +0200, Niklas Haas wrote:
> > From: Niklas Haas <git@haasn.dev>
> > 
> > Because we need access to ref frames without film grain applied, we have
> > to add an extra AVFrame to H264Picture to avoid messing with the
> > original. This requires some amount of overhead to make the reference
> > moves work out, but it allows us to benefit from frame multithreading
> > for film grain application "for free".
> > 
> > Unfortunately, this approach requires twice as much RAM to be constantly
> > allocated, due to the need for an extra buffer per H264Picture. In
> > theory, we could get away with freeing up this memory as soon as it's no
> > longer needed, but trying to call ff_thread_release_buffer() from
> > output_frame() simply deadlocks the decoder and I haven't figured out
> > why. Patches welcome(tm)
> > 
> > Tested on all three cases of (no fg), (fg present but exported) and (fg
> > present and not exported), with and without threading.
> > ---
> >  libavcodec/h264_picture.c | 24 +++++++++++++++++++-
> >  libavcodec/h264_slice.c   | 18 +++++++++++++--
> >  libavcodec/h264dec.c      | 48 +++++++++++++++++++++++++++++++--------
> >  libavcodec/h264dec.h      |  6 +++++
> >  4 files changed, 83 insertions(+), 13 deletions(-)
> 
> [...]
> > @@ -826,6 +836,21 @@ static int output_frame(H264Context *h, AVFrame *dst, H264Picture *srcp)
> >      AVFrame *src = srcp->f;
> >      int ret;
> >  
> > +    if (srcp->needs_fg) {
> 
> > +        AVFrameSideData *sd = av_frame_get_side_data(src, AV_FRAME_DATA_FILM_GRAIN_PARAMS);
> > +        av_assert0(sd);
> 
> Assertion is not correct to check for failure
> 
> This would kill the process with the lib and app

This is not a failure check. The point is that it should be impossible
for `srcp->needs_fg` to be true but the side data to be absent. The
assert is just there to communicate/enforce this.

I've added an extra comment in v2.
Michael Niedermayer Aug. 18, 2021, 8:58 p.m. UTC | #4
On Tue, Aug 17, 2021 at 09:24:38PM +0200, Niklas Haas wrote:
> On Sun, 15 Aug 2021 19:11:42 +0200 Michael Niedermayer <michael@niedermayer.cc> wrote:
> > On Sat, Aug 14, 2021 at 01:36:20PM +0200, Niklas Haas wrote:
> > > From: Niklas Haas <git@haasn.dev>
> > > 
> > > Because we need access to ref frames without film grain applied, we have
> > > to add an extra AVFrame to H264Picture to avoid messing with the
> > > original. This requires some amount of overhead to make the reference
> > > moves work out, but it allows us to benefit from frame multithreading
> > > for film grain application "for free".
> > > 
> > > Unfortunately, this approach requires twice as much RAM to be constantly
> > > allocated, due to the need for an extra buffer per H264Picture. In
> > > theory, we could get away with freeing up this memory as soon as it's no
> > > longer needed, but trying to call ff_thread_release_buffer() from
> > > output_frame() simply deadlocks the decoder and I haven't figured out
> > > why. Patches welcome(tm)
> > > 
> > > Tested on all three cases of (no fg), (fg present but exported) and (fg
> > > present and not exported), with and without threading.
> > > ---
> > >  libavcodec/h264_picture.c | 24 +++++++++++++++++++-
> > >  libavcodec/h264_slice.c   | 18 +++++++++++++--
> > >  libavcodec/h264dec.c      | 48 +++++++++++++++++++++++++++++++--------
> > >  libavcodec/h264dec.h      |  6 +++++
> > >  4 files changed, 83 insertions(+), 13 deletions(-)
> > 
> > [...]
> > > @@ -826,6 +836,21 @@ static int output_frame(H264Context *h, AVFrame *dst, H264Picture *srcp)
> > >      AVFrame *src = srcp->f;
> > >      int ret;
> > >  
> > > +    if (srcp->needs_fg) {
> > 
> > > +        AVFrameSideData *sd = av_frame_get_side_data(src, AV_FRAME_DATA_FILM_GRAIN_PARAMS);
> > > +        av_assert0(sd);
> > 
> > Assertion is not correct to check for failure
> > 
> > This would kill the process with the lib and app
> 
> This is not a failure check. The point is that it should be impossible
> for `srcp->needs_fg` to be true but the side data to be absent. The
> assert is just there to communicate/enforce this.

I thought i saw it fail but it seems not to replicate now, either way there
where all kinds of other patches applied too so it might have been something
else

thx

[...]
diff mbox series

Patch

diff --git a/libavcodec/h264_picture.c b/libavcodec/h264_picture.c
index b79944b794..8284d12158 100644
--- a/libavcodec/h264_picture.c
+++ b/libavcodec/h264_picture.c
@@ -44,7 +44,7 @@ 
 
 void ff_h264_unref_picture(H264Context *h, H264Picture *pic)
 {
-    int off = offsetof(H264Picture, tf) + sizeof(pic->tf);
+    int off = offsetof(H264Picture, tf_grain) + sizeof(pic->tf_grain);
     int i;
 
     if (!pic->f || !pic->f->buf[0])
@@ -61,6 +61,9 @@  void ff_h264_unref_picture(H264Context *h, H264Picture *pic)
         av_buffer_unref(&pic->ref_index_buf[i]);
     }
 
+    if (pic->needs_fg)
+        ff_thread_release_buffer(h->avctx, &pic->tf_grain);
+
     memset((uint8_t*)pic + off, 0, sizeof(*pic) - off);
 }
 
@@ -109,6 +112,15 @@  int ff_h264_ref_picture(H264Context *h, H264Picture *dst, H264Picture *src)
     if (ret < 0)
         goto fail;
 
+    if (src->needs_fg) {
+        av_assert0(src->tf_grain.f == src->f_grain);
+        dst->tf_grain.f = dst->f_grain;
+        ret = ff_thread_ref_frame(&dst->tf_grain, &src->tf_grain);
+        if (ret < 0)
+            goto fail;
+        dst->needs_fg = 1;
+    }
+
     dst->qscale_table_buf = av_buffer_ref(src->qscale_table_buf);
     dst->mb_type_buf      = av_buffer_ref(src->mb_type_buf);
     dst->pps_buf          = av_buffer_ref(src->pps_buf);
@@ -160,6 +172,16 @@  int ff_h264_replace_picture(H264Context *h, H264Picture *dst, const H264Picture
     if (ret < 0)
         goto fail;
 
+    if (src->needs_fg) {
+        av_assert0(src->tf_grain.f == src->f_grain);
+        dst->tf_grain.f = dst->f_grain;
+        ff_thread_release_buffer(h->avctx, &dst->tf_grain);
+        ret = ff_thread_ref_frame(&dst->tf_grain, &src->tf_grain);
+        if (ret < 0)
+            goto fail;
+        dst->needs_fg = 1;
+    }
+
     ret  = av_buffer_replace(&dst->qscale_table_buf, src->qscale_table_buf);
     ret |= av_buffer_replace(&dst->mb_type_buf, src->mb_type_buf);
     ret |= av_buffer_replace(&dst->pps_buf, src->pps_buf);
diff --git a/libavcodec/h264_slice.c b/libavcodec/h264_slice.c
index 65d0259a71..2b85f807df 100644
--- a/libavcodec/h264_slice.c
+++ b/libavcodec/h264_slice.c
@@ -197,6 +197,21 @@  static int alloc_picture(H264Context *h, H264Picture *pic)
     if (ret < 0)
         goto fail;
 
+    if (h->sei.film_grain_characteristics.present &&
+        !(h->avctx->export_side_data & AV_CODEC_EXPORT_DATA_FILM_GRAIN))
+    {
+        // Extra buffer required for film grain synthesis in the decoder
+        pic->tf_grain.f = pic->f_grain;
+        pic->f_grain->format = pic->f->format;
+        pic->f_grain->width = pic->f->width;
+        pic->f_grain->height = pic->f->height;
+        ret = ff_thread_get_buffer(h->avctx, &pic->tf_grain, 0);
+        if (ret < 0)
+            goto fail;
+        pic->needs_fg = 1;
+    }
+
+
     if (h->avctx->hwaccel) {
         const AVHWAccel *hwaccel = h->avctx->hwaccel;
         av_assert0(!pic->hwaccel_picture_private);
@@ -1329,8 +1344,7 @@  static int h264_export_frame_props(H264Context *h)
     }
     h->sei.unregistered.nb_buf_ref = 0;
 
-    if (h->sei.film_grain_characteristics.present &&
-        (h->avctx->export_side_data & AV_CODEC_EXPORT_DATA_FILM_GRAIN)) {
+    if (h->sei.film_grain_characteristics.present) {
         H264SEIFilmGrainCharacteristics *fgc = &h->sei.film_grain_characteristics;
         AVFilmGrainParams *fgp = av_film_grain_params_create_side_data(out);
         if (!fgp)
diff --git a/libavcodec/h264dec.c b/libavcodec/h264dec.c
index 38f8967265..d5878b8a13 100644
--- a/libavcodec/h264dec.c
+++ b/libavcodec/h264dec.c
@@ -275,9 +275,22 @@  int ff_h264_slice_context_init(H264Context *h, H264SliceContext *sl)
     return 0;
 }
 
+static int h264_init_pic(H264Picture *pic)
+{
+    pic->f = av_frame_alloc();
+    if (!pic->f)
+        return AVERROR(ENOMEM);
+
+    pic->f_grain = av_frame_alloc();
+    if (!pic->f_grain)
+        return AVERROR(ENOMEM);
+
+    return 0;
+}
+
 static int h264_init_context(AVCodecContext *avctx, H264Context *h)
 {
-    int i;
+    int i, ret;
 
     h->avctx                 = avctx;
     h->cur_chroma_format_idc = -1;
@@ -308,18 +321,15 @@  static int h264_init_context(AVCodecContext *avctx, H264Context *h)
     }
 
     for (i = 0; i < H264_MAX_PICTURE_COUNT; i++) {
-        h->DPB[i].f = av_frame_alloc();
-        if (!h->DPB[i].f)
-            return AVERROR(ENOMEM);
+        if ((ret = h264_init_pic(&h->DPB[i])) < 0)
+            return ret;
     }
 
-    h->cur_pic.f = av_frame_alloc();
-    if (!h->cur_pic.f)
-        return AVERROR(ENOMEM);
+    if ((ret = h264_init_pic(&h->cur_pic)) < 0)
+        return ret;
 
-    h->last_pic_for_ec.f = av_frame_alloc();
-    if (!h->last_pic_for_ec.f)
-        return AVERROR(ENOMEM);
+    if ((ret = h264_init_pic(&h->last_pic_for_ec)) < 0)
+        return ret;
 
     for (i = 0; i < h->nb_slice_ctx; i++)
         h->slice_ctx[i].h264 = h;
@@ -826,6 +836,21 @@  static int output_frame(H264Context *h, AVFrame *dst, H264Picture *srcp)
     AVFrame *src = srcp->f;
     int ret;
 
+    if (srcp->needs_fg) {
+        AVFrameSideData *sd = av_frame_get_side_data(src, AV_FRAME_DATA_FILM_GRAIN_PARAMS);
+        av_assert0(sd);
+        ret = ff_h274_apply_film_grain(srcp->f_grain, src, &h->h274db,
+                                       (AVFilmGrainParams *) sd->data);
+        if (!ret) {
+            if ((ret = av_frame_copy_props(srcp->f_grain, src)) < 0)
+                return ret;
+            src = srcp->f_grain;
+        } else {
+            av_log(h->avctx, AV_LOG_WARNING, "Failed synthesizing film "
+                   "grain, ignoring: %s\n", av_err2str(ret));
+        }
+    }
+
     ret = av_frame_ref(dst, src);
     if (ret < 0)
         return ret;
@@ -841,6 +866,9 @@  static int output_frame(H264Context *h, AVFrame *dst, H264Picture *srcp)
             goto fail;
     }
 
+    if (!(h->avctx->export_side_data & AV_CODEC_EXPORT_DATA_FILM_GRAIN))
+        av_frame_remove_side_data(dst, AV_FRAME_DATA_FILM_GRAIN_PARAMS);
+
     return 0;
 fail:
     av_frame_unref(dst);
diff --git a/libavcodec/h264dec.h b/libavcodec/h264dec.h
index 7c419de051..7d11d81f99 100644
--- a/libavcodec/h264dec.h
+++ b/libavcodec/h264dec.h
@@ -43,6 +43,7 @@ 
 #include "h264dsp.h"
 #include "h264pred.h"
 #include "h264qpel.h"
+#include "h274.h"
 #include "internal.h"
 #include "mpegutils.h"
 #include "parser.h"
@@ -130,6 +131,9 @@  typedef struct H264Picture {
     AVFrame *f;
     ThreadFrame tf;
 
+    AVFrame *f_grain;   ///< extra buffer for film grain synthesis
+    ThreadFrame tf_grain;
+
     AVBufferRef *qscale_table_buf;
     int8_t *qscale_table;
 
@@ -162,6 +166,7 @@  typedef struct H264Picture {
     int recovered;          ///< picture at IDR or recovery point + recovery count
     int invalid_gap;
     int sei_recovery_frame_cnt;
+    int needs_fg;           ///< whether picture needs film grain synthesis (see `f_grain`)
 
     AVBufferRef *pps_buf;
     const PPS   *pps;
@@ -349,6 +354,7 @@  typedef struct H264Context {
     H264DSPContext h264dsp;
     H264ChromaContext h264chroma;
     H264QpelContext h264qpel;
+    H274FilmGrainDatabase h274db;
 
     H264Picture DPB[H264_MAX_PICTURE_COUNT];
     H264Picture *cur_pic_ptr;