Message ID | 20210817195456.27224-1-ffmpeg@haasn.xyz |
---|---|
State | Accepted |
Commit | 66845cffc3bbb17f91294d15cd6f57f3df3bce97 |
Headers | show |
Series | [FFmpeg-devel,v3] avcodec/h264dec: apply H.274 film grain | expand |
Context | Check | Description |
---|---|---|
andriy/x86_make | fail | Make failed |
andriy/PPC64_make | warning | Make failed |
On 8/17/2021 4:54 PM, Niklas Haas wrote: > From: Niklas Haas <git@haasn.dev> > > Because we need access to ref frames without film grain applied, we have > to add an extra AVFrame to H264Picture to avoid messing with the > original. This requires some amount of overhead to make the reference > moves work out, but it allows us to benefit from frame multithreading > for film grain application "for free". > > Unfortunately, this approach requires twice as much RAM to be constantly > allocated for ref frames, due to the need for an extra buffer per > H264Picture. In theory, we could get away with freeing up this memory as > soon as it's no longer needed (since ref frames do not need film grain > buffers any longer), but trying to call ff_thread_release_buffer() from > output_frame() conflicts with possible later accesses to that same frame > and I'm not sure how to synchronize that well. > > Tested on all three cases of (no fg), (fg present but exported) and (fg > present and not exported), with and without threading. > > Signed-off-by: Niklas Haas <git@haasn.dev> > --- > libavcodec/h264_picture.c | 35 +++++++++++++++++++++++-- > libavcodec/h264_slice.c | 16 ++++++++++-- > libavcodec/h264dec.c | 55 ++++++++++++++++++++++++++------------- > libavcodec/h264dec.h | 6 +++++ > 4 files changed, 90 insertions(+), 22 deletions(-) Looks good now, so I'll apply it soon.
> -----Original Message----- > From: ffmpeg-devel <ffmpeg-devel-bounces@ffmpeg.org> On Behalf Of Niklas Haas > Sent: Tuesday, August 17, 2021 12:55 PM > To: ffmpeg-devel@ffmpeg.org > Cc: Niklas Haas <git@haasn.dev> > Subject: [FFmpeg-devel] [PATCH v3] avcodec/h264dec: apply H.274 film grain > > From: Niklas Haas <git@haasn.dev> > > Because we need access to ref frames without film grain applied, we have > to add an extra AVFrame to H264Picture to avoid messing with the > original. This requires some amount of overhead to make the reference > moves work out, but it allows us to benefit from frame multithreading > for film grain application "for free". > > Unfortunately, this approach requires twice as much RAM to be constantly > allocated for ref frames, due to the need for an extra buffer per > H264Picture. In theory, we could get away with freeing up this memory as > soon as it's no longer needed (since ref frames do not need film grain > buffers any longer), but trying to call ff_thread_release_buffer() from > output_frame() conflicts with possible later accesses to that same frame > and I'm not sure how to synchronize that well. > > Tested on all three cases of (no fg), (fg present but exported) and (fg > present and not exported), with and without threading. > > Signed-off-by: Niklas Haas <git@haasn.dev> > --- > libavcodec/h264_picture.c | 35 +++++++++++++++++++++++-- > libavcodec/h264_slice.c | 16 ++++++++++-- > libavcodec/h264dec.c | 55 ++++++++++++++++++++++++++------------- > libavcodec/h264dec.h | 6 +++++ > 4 files changed, 90 insertions(+), 22 deletions(-) > > diff --git a/libavcodec/h264_picture.c b/libavcodec/h264_picture.c > index ff30166b4d..5944798394 100644 > --- a/libavcodec/h264_picture.c > +++ b/libavcodec/h264_picture.c > @@ -43,13 +43,14 @@ > > void ff_h264_unref_picture(H264Context *h, H264Picture *pic) > { > - int off = offsetof(H264Picture, tf) + sizeof(pic->tf); > + int off = offsetof(H264Picture, tf_grain) + sizeof(pic->tf_grain); > int i; > > if (!pic->f || !pic->f->buf[0]) > return; > > ff_thread_release_buffer(h->avctx, &pic->tf); > + ff_thread_release_buffer(h->avctx, &pic->tf_grain); > av_buffer_unref(&pic->hwaccel_priv_buf); > > av_buffer_unref(&pic->qscale_table_buf); > @@ -93,6 +94,7 @@ static void h264_copy_picture_params(H264Picture *dst, const H264Picture *src) > dst->mb_width = src->mb_width; > dst->mb_height = src->mb_height; > dst->mb_stride = src->mb_stride; > + dst->needs_fg = src->needs_fg; > } > > int ff_h264_ref_picture(H264Context *h, H264Picture *dst, H264Picture *src) > @@ -108,6 +110,14 @@ int ff_h264_ref_picture(H264Context *h, H264Picture *dst, H264Picture *src) > if (ret < 0) > goto fail; > > + if (src->needs_fg) { > + av_assert0(src->tf_grain.f == src->f_grain); > + dst->tf_grain.f = dst->f_grain; > + ret = ff_thread_ref_frame(&dst->tf_grain, &src->tf_grain); > + if (ret < 0) > + goto fail; > + } > + > dst->qscale_table_buf = av_buffer_ref(src->qscale_table_buf); > dst->mb_type_buf = av_buffer_ref(src->mb_type_buf); > dst->pps_buf = av_buffer_ref(src->pps_buf); > @@ -159,6 +169,15 @@ int ff_h264_replace_picture(H264Context *h, H264Picture *dst, const H264Picture > if (ret < 0) > goto fail; > > + if (src->needs_fg) { > + av_assert0(src->tf_grain.f == src->f_grain); > + dst->tf_grain.f = dst->f_grain; > + ff_thread_release_buffer(h->avctx, &dst->tf_grain); > + ret = ff_thread_ref_frame(&dst->tf_grain, &src->tf_grain); > + if (ret < 0) > + goto fail; > + } > + > ret = av_buffer_replace(&dst->qscale_table_buf, src->qscale_table_buf); > ret |= av_buffer_replace(&dst->mb_type_buf, src->mb_type_buf); > ret |= av_buffer_replace(&dst->pps_buf, src->pps_buf); > @@ -212,6 +231,7 @@ void ff_h264_set_erpic(ERPicture *dst, H264Picture *src) > int ff_h264_field_end(H264Context *h, H264SliceContext *sl, int in_setup) > { > AVCodecContext *const avctx = h->avctx; > + H264Picture *cur = h->cur_pic_ptr; > int err = 0; > h->mb_y = 0; > > @@ -230,10 +250,21 @@ int ff_h264_field_end(H264Context *h, H264SliceContext *sl, int in_setup) > if (err < 0) > av_log(avctx, AV_LOG_ERROR, > "hardware accelerator failed to decode picture\n"); > + } else if (!in_setup && cur->needs_fg) { > + AVFrameSideData *sd = av_frame_get_side_data(cur->f, AV_FRAME_DATA_FILM_GRAIN_PARAMS); > + av_assert0(sd); // always present if `cur->needs_fg` > + err = ff_h274_apply_film_grain(cur->f_grain, cur->f, &h->h274db, > + (AVFilmGrainParams *) sd->data); > + if (err < 0) { > + av_log(h->avctx, AV_LOG_WARNING, "Failed synthesizing film " > + "grain, ignoring: %s\n", av_err2str(err)); > + cur->needs_fg = 0; > + err = 0; > + } > } > > if (!in_setup && !h->droppable) > - ff_thread_report_progress(&h->cur_pic_ptr->tf, INT_MAX, > + ff_thread_report_progress(&cur->tf, INT_MAX, > h->picture_structure == PICT_BOTTOM_FIELD); > emms_c(); > > diff --git a/libavcodec/h264_slice.c b/libavcodec/h264_slice.c > index 9244d2d5dd..98ca8836db 100644 > --- a/libavcodec/h264_slice.c > +++ b/libavcodec/h264_slice.c > @@ -197,6 +197,16 @@ static int alloc_picture(H264Context *h, H264Picture *pic) > if (ret < 0) > goto fail; > > + if (pic->needs_fg) { > + pic->tf_grain.f = pic->f_grain; > + pic->f_grain->format = pic->f->format; > + pic->f_grain->width = pic->f->width; > + pic->f_grain->height = pic->f->height; > + ret = ff_thread_get_buffer(h->avctx, &pic->tf_grain, 0); > + if (ret < 0) > + goto fail; > + } > + > if (h->avctx->hwaccel) { > const AVHWAccel *hwaccel = h->avctx->hwaccel; > av_assert0(!pic->hwaccel_picture_private); > @@ -517,6 +527,9 @@ static int h264_frame_start(H264Context *h) > pic->f->crop_top = h->crop_top; > pic->f->crop_bottom = h->crop_bottom; > > + pic->needs_fg = h->sei.film_grain_characteristics.present && > + !(h->avctx->export_side_data & AV_CODEC_EXPORT_DATA_FILM_GRAIN); > + > if ((ret = alloc_picture(h, pic)) < 0) > return ret; > > @@ -1328,8 +1341,7 @@ static int h264_export_frame_props(H264Context *h) > } > h->sei.unregistered.nb_buf_ref = 0; > > - if (h->sei.film_grain_characteristics.present && > - (h->avctx->export_side_data & AV_CODEC_EXPORT_DATA_FILM_GRAIN)) { > + if (h->sei.film_grain_characteristics.present) { > H264SEIFilmGrainCharacteristics *fgc = &h->sei.film_grain_characteristics; > AVFilmGrainParams *fgp = av_film_grain_params_create_side_data(out); > if (!fgp) > diff --git a/libavcodec/h264dec.c b/libavcodec/h264dec.c > index dc99ee995e..a64c93eca4 100644 > --- a/libavcodec/h264dec.c > +++ b/libavcodec/h264dec.c > @@ -275,9 +275,22 @@ int ff_h264_slice_context_init(H264Context *h, H264SliceContext *sl) > return 0; > } > > +static int h264_init_pic(H264Picture *pic) > +{ > + pic->f = av_frame_alloc(); > + if (!pic->f) > + return AVERROR(ENOMEM); > + > + pic->f_grain = av_frame_alloc(); > + if (!pic->f_grain) > + return AVERROR(ENOMEM); > + > + return 0; > +} > + > static int h264_init_context(AVCodecContext *avctx, H264Context *h) > { > - int i; > + int i, ret; > > h->avctx = avctx; > h->cur_chroma_format_idc = -1; > @@ -308,18 +321,15 @@ static int h264_init_context(AVCodecContext *avctx, H264Context *h) > } > > for (i = 0; i < H264_MAX_PICTURE_COUNT; i++) { > - h->DPB[i].f = av_frame_alloc(); > - if (!h->DPB[i].f) > - return AVERROR(ENOMEM); > + if ((ret = h264_init_pic(&h->DPB[i])) < 0) > + return ret; > } > > - h->cur_pic.f = av_frame_alloc(); > - if (!h->cur_pic.f) > - return AVERROR(ENOMEM); > + if ((ret = h264_init_pic(&h->cur_pic)) < 0) > + return ret; > > - h->last_pic_for_ec.f = av_frame_alloc(); > - if (!h->last_pic_for_ec.f) > - return AVERROR(ENOMEM); > + if ((ret = h264_init_pic(&h->last_pic_for_ec)) < 0) > + return ret; > > for (i = 0; i < h->nb_slice_ctx; i++) > h->slice_ctx[i].h264 = h; > @@ -327,6 +337,13 @@ static int h264_init_context(AVCodecContext *avctx, H264Context *h) > return 0; > } > > +static void h264_free_pic(H264Context *h, H264Picture *pic) > +{ > + ff_h264_unref_picture(h, pic); > + av_frame_free(&pic->f); > + av_frame_free(&pic->f_grain); > +} > + > static av_cold int h264_decode_end(AVCodecContext *avctx) > { > H264Context *h = avctx->priv_data; > @@ -336,8 +353,7 @@ static av_cold int h264_decode_end(AVCodecContext *avctx) > ff_h264_free_tables(h); > > for (i = 0; i < H264_MAX_PICTURE_COUNT; i++) { > - ff_h264_unref_picture(h, &h->DPB[i]); > - av_frame_free(&h->DPB[i].f); > + h264_free_pic(h, &h->DPB[i]); > } > memset(h->delayed_pic, 0, sizeof(h->delayed_pic)); > > @@ -351,10 +367,8 @@ static av_cold int h264_decode_end(AVCodecContext *avctx) > > ff_h2645_packet_uninit(&h->pkt); > > - ff_h264_unref_picture(h, &h->cur_pic); > - av_frame_free(&h->cur_pic.f); > - ff_h264_unref_picture(h, &h->last_pic_for_ec); > - av_frame_free(&h->last_pic_for_ec.f); > + h264_free_pic(h, &h->cur_pic); > + h264_free_pic(h, &h->last_pic_for_ec); > > return 0; > } > @@ -837,13 +851,15 @@ static int h264_export_enc_params(AVFrame *f, H264Picture *p) > > static int output_frame(H264Context *h, AVFrame *dst, H264Picture *srcp) > { > - AVFrame *src = srcp->f; > int ret; > > - ret = av_frame_ref(dst, src); > + ret = av_frame_ref(dst, srcp->needs_fg ? srcp->f_grain : srcp->f); > if (ret < 0) > return ret; > > + if (srcp->needs_fg && (ret = av_frame_copy_props(dst, srcp->f)) < 0) > + return ret; > + > av_dict_set(&dst->metadata, "stereo_mode", ff_h264_sei_stereo_mode(&h->sei.frame_packing), 0); > > if (srcp->sei_recovery_frame_cnt == 0) > @@ -855,6 +871,9 @@ static int output_frame(H264Context *h, AVFrame *dst, H264Picture *srcp) > goto fail; > } > > + if (!(h->avctx->export_side_data & AV_CODEC_EXPORT_DATA_FILM_GRAIN)) > + av_frame_remove_side_data(dst, AV_FRAME_DATA_FILM_GRAIN_PARAMS); > + > return 0; > fail: > av_frame_unref(dst); > diff --git a/libavcodec/h264dec.h b/libavcodec/h264dec.h > index 7c419de051..87c4e4e539 100644 > --- a/libavcodec/h264dec.h > +++ b/libavcodec/h264dec.h > @@ -43,6 +43,7 @@ > #include "h264dsp.h" > #include "h264pred.h" > #include "h264qpel.h" > +#include "h274.h" > #include "internal.h" > #include "mpegutils.h" > #include "parser.h" > @@ -130,6 +131,9 @@ typedef struct H264Picture { > AVFrame *f; > ThreadFrame tf; > > + AVFrame *f_grain; > + ThreadFrame tf_grain; > + > AVBufferRef *qscale_table_buf; > int8_t *qscale_table; > > @@ -162,6 +166,7 @@ typedef struct H264Picture { > int recovered; ///< picture at IDR or recovery point + recovery count > int invalid_gap; > int sei_recovery_frame_cnt; > + int needs_fg; ///< whether picture needs film grain synthesis (see `f_grain`) > > AVBufferRef *pps_buf; > const PPS *pps; > @@ -349,6 +354,7 @@ typedef struct H264Context { > H264DSPContext h264dsp; > H264ChromaContext h264chroma; > H264QpelContext h264qpel; > + H274FilmGrainDatabase h274db; > > H264Picture DPB[H264_MAX_PICTURE_COUNT]; > H264Picture *cur_pic_ptr; > -- > 2.32.0 > Sorry, replied to v2 first... This causes regression reported here https://trac.ffmpeg.org/ticket/9389 > _______________________________________________ > ffmpeg-devel mailing list > ffmpeg-devel@ffmpeg.org > https://ffmpeg.org/mailman/listinfo/ffmpeg-devel > > To unsubscribe, visit link above, or email > ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".
On 8/25/2021 3:06 PM, Eoff, Ullysses A wrote: >> -----Original Message----- >> From: ffmpeg-devel <ffmpeg-devel-bounces@ffmpeg.org> On Behalf Of Niklas Haas >> Sent: Tuesday, August 17, 2021 12:55 PM >> To: ffmpeg-devel@ffmpeg.org >> Cc: Niklas Haas <git@haasn.dev> >> Subject: [FFmpeg-devel] [PATCH v3] avcodec/h264dec: apply H.274 film grain >> >> From: Niklas Haas <git@haasn.dev> >> >> Because we need access to ref frames without film grain applied, we have >> to add an extra AVFrame to H264Picture to avoid messing with the >> original. This requires some amount of overhead to make the reference >> moves work out, but it allows us to benefit from frame multithreading >> for film grain application "for free". >> >> Unfortunately, this approach requires twice as much RAM to be constantly >> allocated for ref frames, due to the need for an extra buffer per >> H264Picture. In theory, we could get away with freeing up this memory as >> soon as it's no longer needed (since ref frames do not need film grain >> buffers any longer), but trying to call ff_thread_release_buffer() from >> output_frame() conflicts with possible later accesses to that same frame >> and I'm not sure how to synchronize that well. >> >> Tested on all three cases of (no fg), (fg present but exported) and (fg >> present and not exported), with and without threading. >> >> Signed-off-by: Niklas Haas <git@haasn.dev> >> --- >> libavcodec/h264_picture.c | 35 +++++++++++++++++++++++-- >> libavcodec/h264_slice.c | 16 ++++++++++-- >> libavcodec/h264dec.c | 55 ++++++++++++++++++++++++++------------- >> libavcodec/h264dec.h | 6 +++++ >> 4 files changed, 90 insertions(+), 22 deletions(-) >> >> diff --git a/libavcodec/h264_picture.c b/libavcodec/h264_picture.c >> index ff30166b4d..5944798394 100644 >> --- a/libavcodec/h264_picture.c >> +++ b/libavcodec/h264_picture.c >> @@ -43,13 +43,14 @@ >> >> void ff_h264_unref_picture(H264Context *h, H264Picture *pic) >> { >> - int off = offsetof(H264Picture, tf) + sizeof(pic->tf); >> + int off = offsetof(H264Picture, tf_grain) + sizeof(pic->tf_grain); >> int i; >> >> if (!pic->f || !pic->f->buf[0]) >> return; >> >> ff_thread_release_buffer(h->avctx, &pic->tf); >> + ff_thread_release_buffer(h->avctx, &pic->tf_grain); >> av_buffer_unref(&pic->hwaccel_priv_buf); >> >> av_buffer_unref(&pic->qscale_table_buf); >> @@ -93,6 +94,7 @@ static void h264_copy_picture_params(H264Picture *dst, const H264Picture *src) >> dst->mb_width = src->mb_width; >> dst->mb_height = src->mb_height; >> dst->mb_stride = src->mb_stride; >> + dst->needs_fg = src->needs_fg; >> } >> >> int ff_h264_ref_picture(H264Context *h, H264Picture *dst, H264Picture *src) >> @@ -108,6 +110,14 @@ int ff_h264_ref_picture(H264Context *h, H264Picture *dst, H264Picture *src) >> if (ret < 0) >> goto fail; >> >> + if (src->needs_fg) { >> + av_assert0(src->tf_grain.f == src->f_grain); >> + dst->tf_grain.f = dst->f_grain; >> + ret = ff_thread_ref_frame(&dst->tf_grain, &src->tf_grain); >> + if (ret < 0) >> + goto fail; >> + } >> + >> dst->qscale_table_buf = av_buffer_ref(src->qscale_table_buf); >> dst->mb_type_buf = av_buffer_ref(src->mb_type_buf); >> dst->pps_buf = av_buffer_ref(src->pps_buf); >> @@ -159,6 +169,15 @@ int ff_h264_replace_picture(H264Context *h, H264Picture *dst, const H264Picture >> if (ret < 0) >> goto fail; >> >> + if (src->needs_fg) { >> + av_assert0(src->tf_grain.f == src->f_grain); >> + dst->tf_grain.f = dst->f_grain; >> + ff_thread_release_buffer(h->avctx, &dst->tf_grain); >> + ret = ff_thread_ref_frame(&dst->tf_grain, &src->tf_grain); >> + if (ret < 0) >> + goto fail; >> + } >> + >> ret = av_buffer_replace(&dst->qscale_table_buf, src->qscale_table_buf); >> ret |= av_buffer_replace(&dst->mb_type_buf, src->mb_type_buf); >> ret |= av_buffer_replace(&dst->pps_buf, src->pps_buf); >> @@ -212,6 +231,7 @@ void ff_h264_set_erpic(ERPicture *dst, H264Picture *src) >> int ff_h264_field_end(H264Context *h, H264SliceContext *sl, int in_setup) >> { >> AVCodecContext *const avctx = h->avctx; >> + H264Picture *cur = h->cur_pic_ptr; >> int err = 0; >> h->mb_y = 0; >> >> @@ -230,10 +250,21 @@ int ff_h264_field_end(H264Context *h, H264SliceContext *sl, int in_setup) >> if (err < 0) >> av_log(avctx, AV_LOG_ERROR, >> "hardware accelerator failed to decode picture\n"); >> + } else if (!in_setup && cur->needs_fg) { >> + AVFrameSideData *sd = av_frame_get_side_data(cur->f, AV_FRAME_DATA_FILM_GRAIN_PARAMS); >> + av_assert0(sd); // always present if `cur->needs_fg` >> + err = ff_h274_apply_film_grain(cur->f_grain, cur->f, &h->h274db, >> + (AVFilmGrainParams *) sd->data); >> + if (err < 0) { >> + av_log(h->avctx, AV_LOG_WARNING, "Failed synthesizing film " >> + "grain, ignoring: %s\n", av_err2str(err)); >> + cur->needs_fg = 0; >> + err = 0; >> + } >> } >> >> if (!in_setup && !h->droppable) >> - ff_thread_report_progress(&h->cur_pic_ptr->tf, INT_MAX, >> + ff_thread_report_progress(&cur->tf, INT_MAX, >> h->picture_structure == PICT_BOTTOM_FIELD); >> emms_c(); >> >> diff --git a/libavcodec/h264_slice.c b/libavcodec/h264_slice.c >> index 9244d2d5dd..98ca8836db 100644 >> --- a/libavcodec/h264_slice.c >> +++ b/libavcodec/h264_slice.c >> @@ -197,6 +197,16 @@ static int alloc_picture(H264Context *h, H264Picture *pic) >> if (ret < 0) >> goto fail; >> >> + if (pic->needs_fg) { >> + pic->tf_grain.f = pic->f_grain; >> + pic->f_grain->format = pic->f->format; >> + pic->f_grain->width = pic->f->width; >> + pic->f_grain->height = pic->f->height; >> + ret = ff_thread_get_buffer(h->avctx, &pic->tf_grain, 0); >> + if (ret < 0) >> + goto fail; >> + } >> + >> if (h->avctx->hwaccel) { >> const AVHWAccel *hwaccel = h->avctx->hwaccel; >> av_assert0(!pic->hwaccel_picture_private); >> @@ -517,6 +527,9 @@ static int h264_frame_start(H264Context *h) >> pic->f->crop_top = h->crop_top; >> pic->f->crop_bottom = h->crop_bottom; >> >> + pic->needs_fg = h->sei.film_grain_characteristics.present && >> + !(h->avctx->export_side_data & AV_CODEC_EXPORT_DATA_FILM_GRAIN); >> + >> if ((ret = alloc_picture(h, pic)) < 0) >> return ret; >> >> @@ -1328,8 +1341,7 @@ static int h264_export_frame_props(H264Context *h) >> } >> h->sei.unregistered.nb_buf_ref = 0; >> >> - if (h->sei.film_grain_characteristics.present && >> - (h->avctx->export_side_data & AV_CODEC_EXPORT_DATA_FILM_GRAIN)) { >> + if (h->sei.film_grain_characteristics.present) { >> H264SEIFilmGrainCharacteristics *fgc = &h->sei.film_grain_characteristics; >> AVFilmGrainParams *fgp = av_film_grain_params_create_side_data(out); >> if (!fgp) >> diff --git a/libavcodec/h264dec.c b/libavcodec/h264dec.c >> index dc99ee995e..a64c93eca4 100644 >> --- a/libavcodec/h264dec.c >> +++ b/libavcodec/h264dec.c >> @@ -275,9 +275,22 @@ int ff_h264_slice_context_init(H264Context *h, H264SliceContext *sl) >> return 0; >> } >> >> +static int h264_init_pic(H264Picture *pic) >> +{ >> + pic->f = av_frame_alloc(); >> + if (!pic->f) >> + return AVERROR(ENOMEM); >> + >> + pic->f_grain = av_frame_alloc(); >> + if (!pic->f_grain) >> + return AVERROR(ENOMEM); >> + >> + return 0; >> +} >> + >> static int h264_init_context(AVCodecContext *avctx, H264Context *h) >> { >> - int i; >> + int i, ret; >> >> h->avctx = avctx; >> h->cur_chroma_format_idc = -1; >> @@ -308,18 +321,15 @@ static int h264_init_context(AVCodecContext *avctx, H264Context *h) >> } >> >> for (i = 0; i < H264_MAX_PICTURE_COUNT; i++) { >> - h->DPB[i].f = av_frame_alloc(); >> - if (!h->DPB[i].f) >> - return AVERROR(ENOMEM); >> + if ((ret = h264_init_pic(&h->DPB[i])) < 0) >> + return ret; >> } >> >> - h->cur_pic.f = av_frame_alloc(); >> - if (!h->cur_pic.f) >> - return AVERROR(ENOMEM); >> + if ((ret = h264_init_pic(&h->cur_pic)) < 0) >> + return ret; >> >> - h->last_pic_for_ec.f = av_frame_alloc(); >> - if (!h->last_pic_for_ec.f) >> - return AVERROR(ENOMEM); >> + if ((ret = h264_init_pic(&h->last_pic_for_ec)) < 0) >> + return ret; >> >> for (i = 0; i < h->nb_slice_ctx; i++) >> h->slice_ctx[i].h264 = h; >> @@ -327,6 +337,13 @@ static int h264_init_context(AVCodecContext *avctx, H264Context *h) >> return 0; >> } >> >> +static void h264_free_pic(H264Context *h, H264Picture *pic) >> +{ >> + ff_h264_unref_picture(h, pic); >> + av_frame_free(&pic->f); >> + av_frame_free(&pic->f_grain); >> +} >> + >> static av_cold int h264_decode_end(AVCodecContext *avctx) >> { >> H264Context *h = avctx->priv_data; >> @@ -336,8 +353,7 @@ static av_cold int h264_decode_end(AVCodecContext *avctx) >> ff_h264_free_tables(h); >> >> for (i = 0; i < H264_MAX_PICTURE_COUNT; i++) { >> - ff_h264_unref_picture(h, &h->DPB[i]); >> - av_frame_free(&h->DPB[i].f); >> + h264_free_pic(h, &h->DPB[i]); >> } >> memset(h->delayed_pic, 0, sizeof(h->delayed_pic)); >> >> @@ -351,10 +367,8 @@ static av_cold int h264_decode_end(AVCodecContext *avctx) >> >> ff_h2645_packet_uninit(&h->pkt); >> >> - ff_h264_unref_picture(h, &h->cur_pic); >> - av_frame_free(&h->cur_pic.f); >> - ff_h264_unref_picture(h, &h->last_pic_for_ec); >> - av_frame_free(&h->last_pic_for_ec.f); >> + h264_free_pic(h, &h->cur_pic); >> + h264_free_pic(h, &h->last_pic_for_ec); >> >> return 0; >> } >> @@ -837,13 +851,15 @@ static int h264_export_enc_params(AVFrame *f, H264Picture *p) >> >> static int output_frame(H264Context *h, AVFrame *dst, H264Picture *srcp) >> { >> - AVFrame *src = srcp->f; >> int ret; >> >> - ret = av_frame_ref(dst, src); >> + ret = av_frame_ref(dst, srcp->needs_fg ? srcp->f_grain : srcp->f); >> if (ret < 0) >> return ret; >> >> + if (srcp->needs_fg && (ret = av_frame_copy_props(dst, srcp->f)) < 0) >> + return ret; >> + >> av_dict_set(&dst->metadata, "stereo_mode", ff_h264_sei_stereo_mode(&h->sei.frame_packing), 0); >> >> if (srcp->sei_recovery_frame_cnt == 0) >> @@ -855,6 +871,9 @@ static int output_frame(H264Context *h, AVFrame *dst, H264Picture *srcp) >> goto fail; >> } >> >> + if (!(h->avctx->export_side_data & AV_CODEC_EXPORT_DATA_FILM_GRAIN)) >> + av_frame_remove_side_data(dst, AV_FRAME_DATA_FILM_GRAIN_PARAMS); >> + >> return 0; >> fail: >> av_frame_unref(dst); >> diff --git a/libavcodec/h264dec.h b/libavcodec/h264dec.h >> index 7c419de051..87c4e4e539 100644 >> --- a/libavcodec/h264dec.h >> +++ b/libavcodec/h264dec.h >> @@ -43,6 +43,7 @@ >> #include "h264dsp.h" >> #include "h264pred.h" >> #include "h264qpel.h" >> +#include "h274.h" >> #include "internal.h" >> #include "mpegutils.h" >> #include "parser.h" >> @@ -130,6 +131,9 @@ typedef struct H264Picture { >> AVFrame *f; >> ThreadFrame tf; >> >> + AVFrame *f_grain; >> + ThreadFrame tf_grain; >> + >> AVBufferRef *qscale_table_buf; >> int8_t *qscale_table; >> >> @@ -162,6 +166,7 @@ typedef struct H264Picture { >> int recovered; ///< picture at IDR or recovery point + recovery count >> int invalid_gap; >> int sei_recovery_frame_cnt; >> + int needs_fg; ///< whether picture needs film grain synthesis (see `f_grain`) >> >> AVBufferRef *pps_buf; >> const PPS *pps; >> @@ -349,6 +354,7 @@ typedef struct H264Context { >> H264DSPContext h264dsp; >> H264ChromaContext h264chroma; >> H264QpelContext h264qpel; >> + H274FilmGrainDatabase h274db; >> >> H264Picture DPB[H264_MAX_PICTURE_COUNT]; >> H264Picture *cur_pic_ptr; >> -- >> 2.32.0 >> > > Sorry, replied to v2 first... > > This causes regression reported here https://trac.ffmpeg.org/ticket/9389 Should be fixed.
On Tue, Aug 17, 2021 at 09:54:56PM +0200, Niklas Haas wrote: > From: Niklas Haas <git@haasn.dev> > > Because we need access to ref frames without film grain applied, we have > to add an extra AVFrame to H264Picture to avoid messing with the > original. This requires some amount of overhead to make the reference > moves work out, but it allows us to benefit from frame multithreading > for film grain application "for free". > > Unfortunately, this approach requires twice as much RAM to be constantly > allocated for ref frames, due to the need for an extra buffer per > H264Picture. In theory, we could get away with freeing up this memory as > soon as it's no longer needed (since ref frames do not need film grain > buffers any longer), but trying to call ff_thread_release_buffer() from > output_frame() conflicts with possible later accesses to that same frame > and I'm not sure how to synchronize that well. > > Tested on all three cases of (no fg), (fg present but exported) and (fg > present and not exported), with and without threading. > > Signed-off-by: Niklas Haas <git@haasn.dev> > --- > libavcodec/h264_picture.c | 35 +++++++++++++++++++++++-- > libavcodec/h264_slice.c | 16 ++++++++++-- > libavcodec/h264dec.c | 55 ++++++++++++++++++++++++++------------- > libavcodec/h264dec.h | 6 +++++ > 4 files changed, 90 insertions(+), 22 deletions(-) This causes aborts (tested latest git master as of this mail) Assertion sd failed at libavcodec/h264_picture.c:255 ==26500== ERROR: libFuzzer: deadly signal #0 0x49f9d1 in __sanitizer_print_stack_trace /b/swarming/w/ir/cache/builder/src/third_party/llvm/compiler-rt/lib/asan/asan_stack.cc:86:3 #1 0x18c45de in fuzzer::Fuzzer::CrashCallback() /home/michael/libfuzzer-2017-10-newclang/Fuzzer/build/../FuzzerLoop.cpp:196:5 #2 0x18c45ad in fuzzer::Fuzzer::StaticCrashSignalCallback() /home/michael/libfuzzer-2017-10-newclang/Fuzzer/build/../FuzzerLoop.cpp:175:6 #3 0x7f200a40997f (/lib/x86_64-linux-gnu/libpthread.so.0+0x1297f) #4 0x7f2008b31fb6 in __libc_signal_restore_set /build/glibc-S9d2JN/glibc-2.27/signal/../sysdeps/unix/sysv/linux/nptl-signals.h:80 #5 0x7f2008b31fb6 in gsignal /build/glibc-S9d2JN/glibc-2.27/signal/../sysdeps/unix/sysv/linux/raise.c:48 #6 0x7f2008b33920 in abort /build/glibc-S9d2JN/glibc-2.27/stdlib/abort.c:79 #7 0xafcd86 in ff_h264_field_end /home/michael/ffmpeg-git/ffmpeg/libavcodec/h264_picture.c:255:9 #8 0x5514da in h264_decode_frame /home/michael/ffmpeg-git/ffmpeg/libavcodec/h264dec.c:1041:20 #9 0x523712 in decode_simple_internal /home/michael/ffmpeg-git/ffmpeg/libavcodec/decode.c:326:15 #10 0x502a21 in decode_simple_receive_frame /home/michael/ffmpeg-git/ffmpeg/libavcodec/decode.c:517:15 #11 0x502a21 in decode_receive_frame_internal /home/michael/ffmpeg-git/ffmpeg/libavcodec/decode.c:537 #12 0x50204f in avcodec_send_packet /home/michael/ffmpeg-git/ffmpeg/libavcodec/decode.c:604:15 #13 0x4cc093 in LLVMFuzzerTestOneInput /home/michael/ffmpeg-git/ffmpeg/tools/target_dec_fuzzer.c:387:25 #14 0x18c55cd in fuzzer::Fuzzer::ExecuteCallback(unsigned char const*, unsigned long) /home/michael/libfuzzer-2017-10-newclang/Fuzzer/build/../FuzzerLoop.cpp:495:13 #15 0x18ba1a2 in fuzzer::RunOneTest(fuzzer::Fuzzer*, char const*, unsigned long) /home/michael/libfuzzer-2017-10-newclang/Fuzzer/build/../FuzzerDriver.cpp:273:6 #16 0x18bf3a1 in fuzzer::FuzzerDriver(int*, char***, int (*)(unsigned char const*, unsigned long)) /home/michael/libfuzzer-2017-10-newclang/Fuzzer/build/../FuzzerDriver.cpp:690:9 #17 0x18b9e80 in main /home/michael/libfuzzer-2017-10-newclang/Fuzzer/build/../FuzzerMain.cpp:20:10 #18 0x7f2008b14bf6 in __libc_start_main /build/glibc-S9d2JN/glibc-2.27/csu/../csu/libc-start.c:310 #19 0x41f129 in _start (/home/michael/ffmpeg-git/ffmpeg/tools/target_dec_h264_fuzzer+0x41f129) [...]
diff --git a/libavcodec/h264_picture.c b/libavcodec/h264_picture.c index ff30166b4d..5944798394 100644 --- a/libavcodec/h264_picture.c +++ b/libavcodec/h264_picture.c @@ -43,13 +43,14 @@ void ff_h264_unref_picture(H264Context *h, H264Picture *pic) { - int off = offsetof(H264Picture, tf) + sizeof(pic->tf); + int off = offsetof(H264Picture, tf_grain) + sizeof(pic->tf_grain); int i; if (!pic->f || !pic->f->buf[0]) return; ff_thread_release_buffer(h->avctx, &pic->tf); + ff_thread_release_buffer(h->avctx, &pic->tf_grain); av_buffer_unref(&pic->hwaccel_priv_buf); av_buffer_unref(&pic->qscale_table_buf); @@ -93,6 +94,7 @@ static void h264_copy_picture_params(H264Picture *dst, const H264Picture *src) dst->mb_width = src->mb_width; dst->mb_height = src->mb_height; dst->mb_stride = src->mb_stride; + dst->needs_fg = src->needs_fg; } int ff_h264_ref_picture(H264Context *h, H264Picture *dst, H264Picture *src) @@ -108,6 +110,14 @@ int ff_h264_ref_picture(H264Context *h, H264Picture *dst, H264Picture *src) if (ret < 0) goto fail; + if (src->needs_fg) { + av_assert0(src->tf_grain.f == src->f_grain); + dst->tf_grain.f = dst->f_grain; + ret = ff_thread_ref_frame(&dst->tf_grain, &src->tf_grain); + if (ret < 0) + goto fail; + } + dst->qscale_table_buf = av_buffer_ref(src->qscale_table_buf); dst->mb_type_buf = av_buffer_ref(src->mb_type_buf); dst->pps_buf = av_buffer_ref(src->pps_buf); @@ -159,6 +169,15 @@ int ff_h264_replace_picture(H264Context *h, H264Picture *dst, const H264Picture if (ret < 0) goto fail; + if (src->needs_fg) { + av_assert0(src->tf_grain.f == src->f_grain); + dst->tf_grain.f = dst->f_grain; + ff_thread_release_buffer(h->avctx, &dst->tf_grain); + ret = ff_thread_ref_frame(&dst->tf_grain, &src->tf_grain); + if (ret < 0) + goto fail; + } + ret = av_buffer_replace(&dst->qscale_table_buf, src->qscale_table_buf); ret |= av_buffer_replace(&dst->mb_type_buf, src->mb_type_buf); ret |= av_buffer_replace(&dst->pps_buf, src->pps_buf); @@ -212,6 +231,7 @@ void ff_h264_set_erpic(ERPicture *dst, H264Picture *src) int ff_h264_field_end(H264Context *h, H264SliceContext *sl, int in_setup) { AVCodecContext *const avctx = h->avctx; + H264Picture *cur = h->cur_pic_ptr; int err = 0; h->mb_y = 0; @@ -230,10 +250,21 @@ int ff_h264_field_end(H264Context *h, H264SliceContext *sl, int in_setup) if (err < 0) av_log(avctx, AV_LOG_ERROR, "hardware accelerator failed to decode picture\n"); + } else if (!in_setup && cur->needs_fg) { + AVFrameSideData *sd = av_frame_get_side_data(cur->f, AV_FRAME_DATA_FILM_GRAIN_PARAMS); + av_assert0(sd); // always present if `cur->needs_fg` + err = ff_h274_apply_film_grain(cur->f_grain, cur->f, &h->h274db, + (AVFilmGrainParams *) sd->data); + if (err < 0) { + av_log(h->avctx, AV_LOG_WARNING, "Failed synthesizing film " + "grain, ignoring: %s\n", av_err2str(err)); + cur->needs_fg = 0; + err = 0; + } } if (!in_setup && !h->droppable) - ff_thread_report_progress(&h->cur_pic_ptr->tf, INT_MAX, + ff_thread_report_progress(&cur->tf, INT_MAX, h->picture_structure == PICT_BOTTOM_FIELD); emms_c(); diff --git a/libavcodec/h264_slice.c b/libavcodec/h264_slice.c index 9244d2d5dd..98ca8836db 100644 --- a/libavcodec/h264_slice.c +++ b/libavcodec/h264_slice.c @@ -197,6 +197,16 @@ static int alloc_picture(H264Context *h, H264Picture *pic) if (ret < 0) goto fail; + if (pic->needs_fg) { + pic->tf_grain.f = pic->f_grain; + pic->f_grain->format = pic->f->format; + pic->f_grain->width = pic->f->width; + pic->f_grain->height = pic->f->height; + ret = ff_thread_get_buffer(h->avctx, &pic->tf_grain, 0); + if (ret < 0) + goto fail; + } + if (h->avctx->hwaccel) { const AVHWAccel *hwaccel = h->avctx->hwaccel; av_assert0(!pic->hwaccel_picture_private); @@ -517,6 +527,9 @@ static int h264_frame_start(H264Context *h) pic->f->crop_top = h->crop_top; pic->f->crop_bottom = h->crop_bottom; + pic->needs_fg = h->sei.film_grain_characteristics.present && + !(h->avctx->export_side_data & AV_CODEC_EXPORT_DATA_FILM_GRAIN); + if ((ret = alloc_picture(h, pic)) < 0) return ret; @@ -1328,8 +1341,7 @@ static int h264_export_frame_props(H264Context *h) } h->sei.unregistered.nb_buf_ref = 0; - if (h->sei.film_grain_characteristics.present && - (h->avctx->export_side_data & AV_CODEC_EXPORT_DATA_FILM_GRAIN)) { + if (h->sei.film_grain_characteristics.present) { H264SEIFilmGrainCharacteristics *fgc = &h->sei.film_grain_characteristics; AVFilmGrainParams *fgp = av_film_grain_params_create_side_data(out); if (!fgp) diff --git a/libavcodec/h264dec.c b/libavcodec/h264dec.c index dc99ee995e..a64c93eca4 100644 --- a/libavcodec/h264dec.c +++ b/libavcodec/h264dec.c @@ -275,9 +275,22 @@ int ff_h264_slice_context_init(H264Context *h, H264SliceContext *sl) return 0; } +static int h264_init_pic(H264Picture *pic) +{ + pic->f = av_frame_alloc(); + if (!pic->f) + return AVERROR(ENOMEM); + + pic->f_grain = av_frame_alloc(); + if (!pic->f_grain) + return AVERROR(ENOMEM); + + return 0; +} + static int h264_init_context(AVCodecContext *avctx, H264Context *h) { - int i; + int i, ret; h->avctx = avctx; h->cur_chroma_format_idc = -1; @@ -308,18 +321,15 @@ static int h264_init_context(AVCodecContext *avctx, H264Context *h) } for (i = 0; i < H264_MAX_PICTURE_COUNT; i++) { - h->DPB[i].f = av_frame_alloc(); - if (!h->DPB[i].f) - return AVERROR(ENOMEM); + if ((ret = h264_init_pic(&h->DPB[i])) < 0) + return ret; } - h->cur_pic.f = av_frame_alloc(); - if (!h->cur_pic.f) - return AVERROR(ENOMEM); + if ((ret = h264_init_pic(&h->cur_pic)) < 0) + return ret; - h->last_pic_for_ec.f = av_frame_alloc(); - if (!h->last_pic_for_ec.f) - return AVERROR(ENOMEM); + if ((ret = h264_init_pic(&h->last_pic_for_ec)) < 0) + return ret; for (i = 0; i < h->nb_slice_ctx; i++) h->slice_ctx[i].h264 = h; @@ -327,6 +337,13 @@ static int h264_init_context(AVCodecContext *avctx, H264Context *h) return 0; } +static void h264_free_pic(H264Context *h, H264Picture *pic) +{ + ff_h264_unref_picture(h, pic); + av_frame_free(&pic->f); + av_frame_free(&pic->f_grain); +} + static av_cold int h264_decode_end(AVCodecContext *avctx) { H264Context *h = avctx->priv_data; @@ -336,8 +353,7 @@ static av_cold int h264_decode_end(AVCodecContext *avctx) ff_h264_free_tables(h); for (i = 0; i < H264_MAX_PICTURE_COUNT; i++) { - ff_h264_unref_picture(h, &h->DPB[i]); - av_frame_free(&h->DPB[i].f); + h264_free_pic(h, &h->DPB[i]); } memset(h->delayed_pic, 0, sizeof(h->delayed_pic)); @@ -351,10 +367,8 @@ static av_cold int h264_decode_end(AVCodecContext *avctx) ff_h2645_packet_uninit(&h->pkt); - ff_h264_unref_picture(h, &h->cur_pic); - av_frame_free(&h->cur_pic.f); - ff_h264_unref_picture(h, &h->last_pic_for_ec); - av_frame_free(&h->last_pic_for_ec.f); + h264_free_pic(h, &h->cur_pic); + h264_free_pic(h, &h->last_pic_for_ec); return 0; } @@ -837,13 +851,15 @@ static int h264_export_enc_params(AVFrame *f, H264Picture *p) static int output_frame(H264Context *h, AVFrame *dst, H264Picture *srcp) { - AVFrame *src = srcp->f; int ret; - ret = av_frame_ref(dst, src); + ret = av_frame_ref(dst, srcp->needs_fg ? srcp->f_grain : srcp->f); if (ret < 0) return ret; + if (srcp->needs_fg && (ret = av_frame_copy_props(dst, srcp->f)) < 0) + return ret; + av_dict_set(&dst->metadata, "stereo_mode", ff_h264_sei_stereo_mode(&h->sei.frame_packing), 0); if (srcp->sei_recovery_frame_cnt == 0) @@ -855,6 +871,9 @@ static int output_frame(H264Context *h, AVFrame *dst, H264Picture *srcp) goto fail; } + if (!(h->avctx->export_side_data & AV_CODEC_EXPORT_DATA_FILM_GRAIN)) + av_frame_remove_side_data(dst, AV_FRAME_DATA_FILM_GRAIN_PARAMS); + return 0; fail: av_frame_unref(dst); diff --git a/libavcodec/h264dec.h b/libavcodec/h264dec.h index 7c419de051..87c4e4e539 100644 --- a/libavcodec/h264dec.h +++ b/libavcodec/h264dec.h @@ -43,6 +43,7 @@ #include "h264dsp.h" #include "h264pred.h" #include "h264qpel.h" +#include "h274.h" #include "internal.h" #include "mpegutils.h" #include "parser.h" @@ -130,6 +131,9 @@ typedef struct H264Picture { AVFrame *f; ThreadFrame tf; + AVFrame *f_grain; + ThreadFrame tf_grain; + AVBufferRef *qscale_table_buf; int8_t *qscale_table; @@ -162,6 +166,7 @@ typedef struct H264Picture { int recovered; ///< picture at IDR or recovery point + recovery count int invalid_gap; int sei_recovery_frame_cnt; + int needs_fg; ///< whether picture needs film grain synthesis (see `f_grain`) AVBufferRef *pps_buf; const PPS *pps; @@ -349,6 +354,7 @@ typedef struct H264Context { H264DSPContext h264dsp; H264ChromaContext h264chroma; H264QpelContext h264qpel; + H274FilmGrainDatabase h274db; H264Picture DPB[H264_MAX_PICTURE_COUNT]; H264Picture *cur_pic_ptr;