diff mbox series

[FFmpeg-devel,V2,2/3] libavcodec/vaapi_encode: Change the way to call async to increase performance

Message ID 20220105024810.435597-2-wenbin.chen@intel.com
State New
Headers show
Series [FFmpeg-devel,V2,1/3] libavcodec/vaapi_encode: Add new API adaption to vaapi_encode
Related show

Checks

Context Check Description
andriy/make_x86 success Make finished
andriy/make_fate_x86 success Make fate finished
andriy/make_ppc success Make finished
andriy/make_fate_ppc success Make fate finished

Commit Message

Wenbin Chen Jan. 5, 2022, 2:48 a.m. UTC
Fix: #7706. After commit 5fdcf85bbffe7451c2, vaapi encoder's performance
decrease. The reason is that vaRenderPicture() and vaSyncBuffer() are
called at the same time (vaRenderPicture() always followed by a
vaSyncBuffer()). When we encode stream with B frames, we need buffer to
reorder frames, so we can send serveral frames to HW at once to increase
performance. Now I changed them to be called in a asynchronous way, which
will make better use of hardware. 1080p transcoding increases about 17%
fps on my environment.

This change fits vaSyncBuffer(), so if driver does not support
vaSyncBuffer, it will keep previous operation.

Signed-off-by: Wenbin Chen <wenbin.chen@intel.com>
---
 libavcodec/vaapi_encode.c | 64 ++++++++++++++++++++++++++++++++-------
 libavcodec/vaapi_encode.h |  5 +++
 2 files changed, 58 insertions(+), 11 deletions(-)

Comments

Andreas Rheinhardt Jan. 5, 2022, 5:45 a.m. UTC | #1
Wenbin Chen:
> Fix: #7706. After commit 5fdcf85bbffe7451c2, vaapi encoder's performance
> decrease. The reason is that vaRenderPicture() and vaSyncBuffer() are
> called at the same time (vaRenderPicture() always followed by a
> vaSyncBuffer()). When we encode stream with B frames, we need buffer to
> reorder frames, so we can send serveral frames to HW at once to increase
> performance. Now I changed them to be called in a asynchronous way, which
> will make better use of hardware. 1080p transcoding increases about 17%
> fps on my environment.
> 
> This change fits vaSyncBuffer(), so if driver does not support
> vaSyncBuffer, it will keep previous operation.
> 
> Signed-off-by: Wenbin Chen <wenbin.chen@intel.com>
> ---
>  libavcodec/vaapi_encode.c | 64 ++++++++++++++++++++++++++++++++-------
>  libavcodec/vaapi_encode.h |  5 +++
>  2 files changed, 58 insertions(+), 11 deletions(-)
> 
> diff --git a/libavcodec/vaapi_encode.c b/libavcodec/vaapi_encode.c
> index b87b58a42b..9a3b3ba4ad 100644
> --- a/libavcodec/vaapi_encode.c
> +++ b/libavcodec/vaapi_encode.c
> @@ -984,8 +984,10 @@ static int vaapi_encode_pick_next(AVCodecContext *avctx,
>      if (!pic && ctx->end_of_stream) {
>          --b_counter;
>          pic = ctx->pic_end;
> -        if (pic->encode_issued)
> +        if (pic->encode_complete)
>              return AVERROR_EOF;
> +        else if (pic->encode_issued)
> +            return AVERROR(EAGAIN);
>      }
>  
>      if (!pic) {
> @@ -1210,18 +1212,45 @@ int ff_vaapi_encode_receive_packet(AVCodecContext *avctx, AVPacket *pkt)
>              return AVERROR(EAGAIN);
>      }
>  
> -    pic = NULL;
> -    err = vaapi_encode_pick_next(avctx, &pic);
> -    if (err < 0)
> -        return err;
> -    av_assert0(pic);
> +#if VA_CHECK_VERSION(1, 9, 0)
> +    if (ctx->has_sync_buffer_func) {
> +        while (av_fifo_size(ctx->encode_fifo) <=
> +               MAX_PICTURE_REFERENCES * sizeof(VAAPIEncodePicture *)) {
> +            pic = NULL;
> +            err = vaapi_encode_pick_next(avctx, &pic);
> +            if (err < 0)
> +                break;
> +
> +            av_assert0(pic);
> +            pic->encode_order = ctx->encode_order +
> +                (av_fifo_size(ctx->encode_fifo) / sizeof(VAAPIEncodePicture *));
> +            err = vaapi_encode_issue(avctx, pic);
> +            if (err < 0) {
> +                av_log(avctx, AV_LOG_ERROR, "Encode failed: %d.\n", err);
> +                return err;
> +            }
> +            av_fifo_generic_write(ctx->encode_fifo, &pic, sizeof(pic), NULL);
> +        }
> +        if (!av_fifo_size(ctx->encode_fifo))
> +            return err;
> +        av_fifo_generic_read(ctx->encode_fifo, &pic, sizeof(pic), NULL);
> +        ctx->encode_order = pic->encode_order + 1;
> +    } else
> +#endif
> +    {
> +        pic = NULL;
> +        err = vaapi_encode_pick_next(avctx, &pic);
> +        if (err < 0)
> +            return err;
> +        av_assert0(pic);
>  
> -    pic->encode_order = ctx->encode_order++;
> +        pic->encode_order = ctx->encode_order++;
>  
> -    err = vaapi_encode_issue(avctx, pic);
> -    if (err < 0) {
> -        av_log(avctx, AV_LOG_ERROR, "Encode failed: %d.\n", err);
> -        return err;
> +        err = vaapi_encode_issue(avctx, pic);
> +        if (err < 0) {
> +            av_log(avctx, AV_LOG_ERROR, "Encode failed: %d.\n", err);
> +            return err;
> +        }
>      }
>  
>      err = vaapi_encode_output(avctx, pic, pkt);
> @@ -2555,6 +2584,18 @@ av_cold int ff_vaapi_encode_init(AVCodecContext *avctx)
>          }
>      }
>  
> +#if VA_CHECK_VERSION(1, 9, 0)
> +    //check vaSyncBuffer function
> +    vas = vaSyncBuffer(ctx->hwctx->display, 0, 0);
> +    if (vas != VA_STATUS_ERROR_UNIMPLEMENTED) {
> +        ctx->has_sync_buffer_func = 1;
> +        ctx->encode_fifo = av_fifo_alloc((MAX_PICTURE_REFERENCES + 1) *
> +                                        sizeof(VAAPIEncodePicture *));
> +        if (!ctx->encode_fifo)
> +            return AVERROR(ENOMEM);
> +    }
> +#endif
> +
>      return 0;
>  
>  fail:
> @@ -2592,6 +2633,7 @@ av_cold int ff_vaapi_encode_close(AVCodecContext *avctx)
>  
>      av_freep(&ctx->codec_sequence_params);
>      av_freep(&ctx->codec_picture_params);
> +    av_fifo_freep(&ctx->encode_fifo);

Is it guaranteed that the fifo is empty at this point? I don't think so.

>  
>      av_buffer_unref(&ctx->recon_frames_ref);
>      av_buffer_unref(&ctx->input_frames_ref);
> diff --git a/libavcodec/vaapi_encode.h b/libavcodec/vaapi_encode.h
> index b41604a883..560a1c42a9 100644
> --- a/libavcodec/vaapi_encode.h
> +++ b/libavcodec/vaapi_encode.h
> @@ -29,6 +29,7 @@
>  
>  #include "libavutil/hwcontext.h"
>  #include "libavutil/hwcontext_vaapi.h"
> +#include "libavutil/fifo.h"
>  
>  #include "avcodec.h"
>  #include "hwconfig.h"
> @@ -345,6 +346,10 @@ typedef struct VAAPIEncodeContext {
>      int             roi_warned;
>  
>      AVFrame         *frame;
> +    //Store buffered pic
> +    AVFifoBuffer *encode_fifo;
> +    //Whether the driver support vaSyncBuffer
> +    int has_sync_buffer_func;
>  } VAAPIEncodeContext;
>  
>  enum {
>
Wenbin Chen Jan. 5, 2022, 7:59 a.m. UTC | #2
> Wenbin Chen:
> > Fix: #7706. After commit 5fdcf85bbffe7451c2, vaapi encoder's performance
> > decrease. The reason is that vaRenderPicture() and vaSyncBuffer() are
> > called at the same time (vaRenderPicture() always followed by a
> > vaSyncBuffer()). When we encode stream with B frames, we need buffer to
> > reorder frames, so we can send serveral frames to HW at once to increase
> > performance. Now I changed them to be called in a asynchronous way,
> which
> > will make better use of hardware. 1080p transcoding increases about 17%
> > fps on my environment.
> >
> > This change fits vaSyncBuffer(), so if driver does not support
> > vaSyncBuffer, it will keep previous operation.
> >
> > Signed-off-by: Wenbin Chen <wenbin.chen@intel.com>
> > ---
> >  libavcodec/vaapi_encode.c | 64 ++++++++++++++++++++++++++++++++-----
> --
> >  libavcodec/vaapi_encode.h |  5 +++
> >  2 files changed, 58 insertions(+), 11 deletions(-)
> >
> > diff --git a/libavcodec/vaapi_encode.c b/libavcodec/vaapi_encode.c
> > index b87b58a42b..9a3b3ba4ad 100644
> > --- a/libavcodec/vaapi_encode.c
> > +++ b/libavcodec/vaapi_encode.c
> > @@ -984,8 +984,10 @@ static int
> vaapi_encode_pick_next(AVCodecContext *avctx,
> >      if (!pic && ctx->end_of_stream) {
> >          --b_counter;
> >          pic = ctx->pic_end;
> > -        if (pic->encode_issued)
> > +        if (pic->encode_complete)
> >              return AVERROR_EOF;
> > +        else if (pic->encode_issued)
> > +            return AVERROR(EAGAIN);
> >      }
> >
> >      if (!pic) {
> > @@ -1210,18 +1212,45 @@ int
> ff_vaapi_encode_receive_packet(AVCodecContext *avctx, AVPacket *pkt)
> >              return AVERROR(EAGAIN);
> >      }
> >
> > -    pic = NULL;
> > -    err = vaapi_encode_pick_next(avctx, &pic);
> > -    if (err < 0)
> > -        return err;
> > -    av_assert0(pic);
> > +#if VA_CHECK_VERSION(1, 9, 0)
> > +    if (ctx->has_sync_buffer_func) {
> > +        while (av_fifo_size(ctx->encode_fifo) <=
> > +               MAX_PICTURE_REFERENCES * sizeof(VAAPIEncodePicture *)) {
> > +            pic = NULL;
> > +            err = vaapi_encode_pick_next(avctx, &pic);
> > +            if (err < 0)
> > +                break;
> > +
> > +            av_assert0(pic);
> > +            pic->encode_order = ctx->encode_order +
> > +                (av_fifo_size(ctx->encode_fifo) / sizeof(VAAPIEncodePicture *));
> > +            err = vaapi_encode_issue(avctx, pic);
> > +            if (err < 0) {
> > +                av_log(avctx, AV_LOG_ERROR, "Encode failed: %d.\n", err);
> > +                return err;
> > +            }
> > +            av_fifo_generic_write(ctx->encode_fifo, &pic, sizeof(pic), NULL);
> > +        }
> > +        if (!av_fifo_size(ctx->encode_fifo))
> > +            return err;
> > +        av_fifo_generic_read(ctx->encode_fifo, &pic, sizeof(pic), NULL);
> > +        ctx->encode_order = pic->encode_order + 1;
> > +    } else
> > +#endif
> > +    {
> > +        pic = NULL;
> > +        err = vaapi_encode_pick_next(avctx, &pic);
> > +        if (err < 0)
> > +            return err;
> > +        av_assert0(pic);
> >
> > -    pic->encode_order = ctx->encode_order++;
> > +        pic->encode_order = ctx->encode_order++;
> >
> > -    err = vaapi_encode_issue(avctx, pic);
> > -    if (err < 0) {
> > -        av_log(avctx, AV_LOG_ERROR, "Encode failed: %d.\n", err);
> > -        return err;
> > +        err = vaapi_encode_issue(avctx, pic);
> > +        if (err < 0) {
> > +            av_log(avctx, AV_LOG_ERROR, "Encode failed: %d.\n", err);
> > +            return err;
> > +        }
> >      }
> >
> >      err = vaapi_encode_output(avctx, pic, pkt);
> > @@ -2555,6 +2584,18 @@ av_cold int
> ff_vaapi_encode_init(AVCodecContext *avctx)
> >          }
> >      }
> >
> > +#if VA_CHECK_VERSION(1, 9, 0)
> > +    //check vaSyncBuffer function
> > +    vas = vaSyncBuffer(ctx->hwctx->display, 0, 0);
> > +    if (vas != VA_STATUS_ERROR_UNIMPLEMENTED) {
> > +        ctx->has_sync_buffer_func = 1;
> > +        ctx->encode_fifo = av_fifo_alloc((MAX_PICTURE_REFERENCES + 1) *
> > +                                        sizeof(VAAPIEncodePicture *));
> > +        if (!ctx->encode_fifo)
> > +            return AVERROR(ENOMEM);
> > +    }
> > +#endif
> > +
> >      return 0;
> >
> >  fail:
> > @@ -2592,6 +2633,7 @@ av_cold int
> ff_vaapi_encode_close(AVCodecContext *avctx)
> >
> >      av_freep(&ctx->codec_sequence_params);
> >      av_freep(&ctx->codec_picture_params);
> > +    av_fifo_freep(&ctx->encode_fifo);
> 
> Is it guaranteed that the fifo is empty at this point? I don't think so.

I don't check the fifo size, because in ff_vaapi_encode_close() all pics
are already freed and encode_fifo only buffer pic.
```
    for (pic = ctx->pic_start; pic; pic = next) {
        next = pic->next;
        vaapi_encode_free(avctx, pic);
    }
```

> 
> >
> >      av_buffer_unref(&ctx->recon_frames_ref);
> >      av_buffer_unref(&ctx->input_frames_ref);
> > diff --git a/libavcodec/vaapi_encode.h b/libavcodec/vaapi_encode.h
> > index b41604a883..560a1c42a9 100644
> > --- a/libavcodec/vaapi_encode.h
> > +++ b/libavcodec/vaapi_encode.h
> > @@ -29,6 +29,7 @@
> >
> >  #include "libavutil/hwcontext.h"
> >  #include "libavutil/hwcontext_vaapi.h"
> > +#include "libavutil/fifo.h"
> >
> >  #include "avcodec.h"
> >  #include "hwconfig.h"
> > @@ -345,6 +346,10 @@ typedef struct VAAPIEncodeContext {
> >      int             roi_warned;
> >
> >      AVFrame         *frame;
> > +    //Store buffered pic
> > +    AVFifoBuffer *encode_fifo;
> > +    //Whether the driver support vaSyncBuffer
> > +    int has_sync_buffer_func;
> >  } VAAPIEncodeContext;
> >
> >  enum {
> >
> 
> _______________________________________________
> ffmpeg-devel mailing list
> ffmpeg-devel@ffmpeg.org
> https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
> 
> To unsubscribe, visit link above, or email
> ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".
Andreas Rheinhardt Jan. 6, 2022, 8:41 a.m. UTC | #3
Chen, Wenbin:
>> Wenbin Chen:
>>> Fix: #7706. After commit 5fdcf85bbffe7451c2, vaapi encoder's performance
>>> decrease. The reason is that vaRenderPicture() and vaSyncBuffer() are
>>> called at the same time (vaRenderPicture() always followed by a
>>> vaSyncBuffer()). When we encode stream with B frames, we need buffer to
>>> reorder frames, so we can send serveral frames to HW at once to increase
>>> performance. Now I changed them to be called in a asynchronous way,
>> which
>>> will make better use of hardware. 1080p transcoding increases about 17%
>>> fps on my environment.
>>>
>>> This change fits vaSyncBuffer(), so if driver does not support
>>> vaSyncBuffer, it will keep previous operation.
>>>
>>> Signed-off-by: Wenbin Chen <wenbin.chen@intel.com>
>>> ---
>>>  libavcodec/vaapi_encode.c | 64 ++++++++++++++++++++++++++++++++-----
>> --
>>>  libavcodec/vaapi_encode.h |  5 +++
>>>  2 files changed, 58 insertions(+), 11 deletions(-)
>>>
>>> diff --git a/libavcodec/vaapi_encode.c b/libavcodec/vaapi_encode.c
>>> index b87b58a42b..9a3b3ba4ad 100644
>>> --- a/libavcodec/vaapi_encode.c
>>> +++ b/libavcodec/vaapi_encode.c
>>> @@ -984,8 +984,10 @@ static int
>> vaapi_encode_pick_next(AVCodecContext *avctx,
>>>      if (!pic && ctx->end_of_stream) {
>>>          --b_counter;
>>>          pic = ctx->pic_end;
>>> -        if (pic->encode_issued)
>>> +        if (pic->encode_complete)
>>>              return AVERROR_EOF;
>>> +        else if (pic->encode_issued)
>>> +            return AVERROR(EAGAIN);
>>>      }
>>>
>>>      if (!pic) {
>>> @@ -1210,18 +1212,45 @@ int
>> ff_vaapi_encode_receive_packet(AVCodecContext *avctx, AVPacket *pkt)
>>>              return AVERROR(EAGAIN);
>>>      }
>>>
>>> -    pic = NULL;
>>> -    err = vaapi_encode_pick_next(avctx, &pic);
>>> -    if (err < 0)
>>> -        return err;
>>> -    av_assert0(pic);
>>> +#if VA_CHECK_VERSION(1, 9, 0)
>>> +    if (ctx->has_sync_buffer_func) {
>>> +        while (av_fifo_size(ctx->encode_fifo) <=
>>> +               MAX_PICTURE_REFERENCES * sizeof(VAAPIEncodePicture *)) {
>>> +            pic = NULL;
>>> +            err = vaapi_encode_pick_next(avctx, &pic);
>>> +            if (err < 0)
>>> +                break;
>>> +
>>> +            av_assert0(pic);
>>> +            pic->encode_order = ctx->encode_order +
>>> +                (av_fifo_size(ctx->encode_fifo) / sizeof(VAAPIEncodePicture *));
>>> +            err = vaapi_encode_issue(avctx, pic);
>>> +            if (err < 0) {
>>> +                av_log(avctx, AV_LOG_ERROR, "Encode failed: %d.\n", err);
>>> +                return err;
>>> +            }
>>> +            av_fifo_generic_write(ctx->encode_fifo, &pic, sizeof(pic), NULL);
>>> +        }
>>> +        if (!av_fifo_size(ctx->encode_fifo))
>>> +            return err;
>>> +        av_fifo_generic_read(ctx->encode_fifo, &pic, sizeof(pic), NULL);
>>> +        ctx->encode_order = pic->encode_order + 1;
>>> +    } else
>>> +#endif
>>> +    {
>>> +        pic = NULL;
>>> +        err = vaapi_encode_pick_next(avctx, &pic);
>>> +        if (err < 0)
>>> +            return err;
>>> +        av_assert0(pic);
>>>
>>> -    pic->encode_order = ctx->encode_order++;
>>> +        pic->encode_order = ctx->encode_order++;
>>>
>>> -    err = vaapi_encode_issue(avctx, pic);
>>> -    if (err < 0) {
>>> -        av_log(avctx, AV_LOG_ERROR, "Encode failed: %d.\n", err);
>>> -        return err;
>>> +        err = vaapi_encode_issue(avctx, pic);
>>> +        if (err < 0) {
>>> +            av_log(avctx, AV_LOG_ERROR, "Encode failed: %d.\n", err);
>>> +            return err;
>>> +        }
>>>      }
>>>
>>>      err = vaapi_encode_output(avctx, pic, pkt);
>>> @@ -2555,6 +2584,18 @@ av_cold int
>> ff_vaapi_encode_init(AVCodecContext *avctx)
>>>          }
>>>      }
>>>
>>> +#if VA_CHECK_VERSION(1, 9, 0)
>>> +    //check vaSyncBuffer function
>>> +    vas = vaSyncBuffer(ctx->hwctx->display, 0, 0);
>>> +    if (vas != VA_STATUS_ERROR_UNIMPLEMENTED) {
>>> +        ctx->has_sync_buffer_func = 1;
>>> +        ctx->encode_fifo = av_fifo_alloc((MAX_PICTURE_REFERENCES + 1) *
>>> +                                        sizeof(VAAPIEncodePicture *));
>>> +        if (!ctx->encode_fifo)
>>> +            return AVERROR(ENOMEM);
>>> +    }
>>> +#endif
>>> +
>>>      return 0;
>>>
>>>  fail:
>>> @@ -2592,6 +2633,7 @@ av_cold int
>> ff_vaapi_encode_close(AVCodecContext *avctx)
>>>
>>>      av_freep(&ctx->codec_sequence_params);
>>>      av_freep(&ctx->codec_picture_params);
>>> +    av_fifo_freep(&ctx->encode_fifo);
>>
>> Is it guaranteed that the fifo is empty at this point? I don't think so.
> 
> I don't check the fifo size, because in ff_vaapi_encode_close() all pics
> are already freed and encode_fifo only buffer pic.
> ```
>     for (pic = ctx->pic_start; pic; pic = next) {
>         next = pic->next;
>         vaapi_encode_free(avctx, pic);
>     }
> ```
> 

Ok, seems like the FIFO does not have ownership of the pics. Alright then.

>>
>>>
>>>      av_buffer_unref(&ctx->recon_frames_ref);
>>>      av_buffer_unref(&ctx->input_frames_ref);
>>> diff --git a/libavcodec/vaapi_encode.h b/libavcodec/vaapi_encode.h
>>> index b41604a883..560a1c42a9 100644
>>> --- a/libavcodec/vaapi_encode.h
>>> +++ b/libavcodec/vaapi_encode.h
>>> @@ -29,6 +29,7 @@
>>>
>>>  #include "libavutil/hwcontext.h"
>>>  #include "libavutil/hwcontext_vaapi.h"
>>> +#include "libavutil/fifo.h"
>>>
>>>  #include "avcodec.h"
>>>  #include "hwconfig.h"
>>> @@ -345,6 +346,10 @@ typedef struct VAAPIEncodeContext {
>>>      int             roi_warned;
>>>
>>>      AVFrame         *frame;
>>> +    //Store buffered pic
>>> +    AVFifoBuffer *encode_fifo;
>>> +    //Whether the driver support vaSyncBuffer
>>> +    int has_sync_buffer_func;
>>>  } VAAPIEncodeContext;
>>>
>>>  enum {
>>>
>>
diff mbox series

Patch

diff --git a/libavcodec/vaapi_encode.c b/libavcodec/vaapi_encode.c
index b87b58a42b..9a3b3ba4ad 100644
--- a/libavcodec/vaapi_encode.c
+++ b/libavcodec/vaapi_encode.c
@@ -984,8 +984,10 @@  static int vaapi_encode_pick_next(AVCodecContext *avctx,
     if (!pic && ctx->end_of_stream) {
         --b_counter;
         pic = ctx->pic_end;
-        if (pic->encode_issued)
+        if (pic->encode_complete)
             return AVERROR_EOF;
+        else if (pic->encode_issued)
+            return AVERROR(EAGAIN);
     }
 
     if (!pic) {
@@ -1210,18 +1212,45 @@  int ff_vaapi_encode_receive_packet(AVCodecContext *avctx, AVPacket *pkt)
             return AVERROR(EAGAIN);
     }
 
-    pic = NULL;
-    err = vaapi_encode_pick_next(avctx, &pic);
-    if (err < 0)
-        return err;
-    av_assert0(pic);
+#if VA_CHECK_VERSION(1, 9, 0)
+    if (ctx->has_sync_buffer_func) {
+        while (av_fifo_size(ctx->encode_fifo) <=
+               MAX_PICTURE_REFERENCES * sizeof(VAAPIEncodePicture *)) {
+            pic = NULL;
+            err = vaapi_encode_pick_next(avctx, &pic);
+            if (err < 0)
+                break;
+
+            av_assert0(pic);
+            pic->encode_order = ctx->encode_order +
+                (av_fifo_size(ctx->encode_fifo) / sizeof(VAAPIEncodePicture *));
+            err = vaapi_encode_issue(avctx, pic);
+            if (err < 0) {
+                av_log(avctx, AV_LOG_ERROR, "Encode failed: %d.\n", err);
+                return err;
+            }
+            av_fifo_generic_write(ctx->encode_fifo, &pic, sizeof(pic), NULL);
+        }
+        if (!av_fifo_size(ctx->encode_fifo))
+            return err;
+        av_fifo_generic_read(ctx->encode_fifo, &pic, sizeof(pic), NULL);
+        ctx->encode_order = pic->encode_order + 1;
+    } else
+#endif
+    {
+        pic = NULL;
+        err = vaapi_encode_pick_next(avctx, &pic);
+        if (err < 0)
+            return err;
+        av_assert0(pic);
 
-    pic->encode_order = ctx->encode_order++;
+        pic->encode_order = ctx->encode_order++;
 
-    err = vaapi_encode_issue(avctx, pic);
-    if (err < 0) {
-        av_log(avctx, AV_LOG_ERROR, "Encode failed: %d.\n", err);
-        return err;
+        err = vaapi_encode_issue(avctx, pic);
+        if (err < 0) {
+            av_log(avctx, AV_LOG_ERROR, "Encode failed: %d.\n", err);
+            return err;
+        }
     }
 
     err = vaapi_encode_output(avctx, pic, pkt);
@@ -2555,6 +2584,18 @@  av_cold int ff_vaapi_encode_init(AVCodecContext *avctx)
         }
     }
 
+#if VA_CHECK_VERSION(1, 9, 0)
+    //check vaSyncBuffer function
+    vas = vaSyncBuffer(ctx->hwctx->display, 0, 0);
+    if (vas != VA_STATUS_ERROR_UNIMPLEMENTED) {
+        ctx->has_sync_buffer_func = 1;
+        ctx->encode_fifo = av_fifo_alloc((MAX_PICTURE_REFERENCES + 1) *
+                                        sizeof(VAAPIEncodePicture *));
+        if (!ctx->encode_fifo)
+            return AVERROR(ENOMEM);
+    }
+#endif
+
     return 0;
 
 fail:
@@ -2592,6 +2633,7 @@  av_cold int ff_vaapi_encode_close(AVCodecContext *avctx)
 
     av_freep(&ctx->codec_sequence_params);
     av_freep(&ctx->codec_picture_params);
+    av_fifo_freep(&ctx->encode_fifo);
 
     av_buffer_unref(&ctx->recon_frames_ref);
     av_buffer_unref(&ctx->input_frames_ref);
diff --git a/libavcodec/vaapi_encode.h b/libavcodec/vaapi_encode.h
index b41604a883..560a1c42a9 100644
--- a/libavcodec/vaapi_encode.h
+++ b/libavcodec/vaapi_encode.h
@@ -29,6 +29,7 @@ 
 
 #include "libavutil/hwcontext.h"
 #include "libavutil/hwcontext_vaapi.h"
+#include "libavutil/fifo.h"
 
 #include "avcodec.h"
 #include "hwconfig.h"
@@ -345,6 +346,10 @@  typedef struct VAAPIEncodeContext {
     int             roi_warned;
 
     AVFrame         *frame;
+    //Store buffered pic
+    AVFifoBuffer *encode_fifo;
+    //Whether the driver support vaSyncBuffer
+    int has_sync_buffer_func;
 } VAAPIEncodeContext;
 
 enum {