diff mbox series

[FFmpeg-devel,v3,5/6] lavc/vaapi_encode: Separate reference frame into previous/future list

Message ID 20230803060132.501741-5-fei.w.wang@intel.com
State New
Headers show
Series [FFmpeg-devel,v3,1/6] avcodec/cbs_av1: Add tx mode enum values | expand

Checks

Context Check Description
yinshiyou/make_loongarch64 success Make finished
yinshiyou/make_fate_loongarch64 success Make fate finished
andriy/make_x86 success Make finished
andriy/make_fate_x86 success Make fate finished

Commit Message

Wang, Fei W Aug. 3, 2023, 6:01 a.m. UTC
From: Fei Wang <fei.w.wang@intel.com>

To support more reference frames from different directions.

Signed-off-by: Fei Wang <fei.w.wang@intel.com>
---
 libavcodec/vaapi_encode.c       | 112 +++++++++++++++++++++++++-------
 libavcodec/vaapi_encode.h       |  15 +++--
 libavcodec/vaapi_encode_h264.c  |  94 +++++++++++++--------------
 libavcodec/vaapi_encode_h265.c  |  76 +++++++++++++---------
 libavcodec/vaapi_encode_mpeg2.c |   6 +-
 libavcodec/vaapi_encode_vp8.c   |   6 +-
 libavcodec/vaapi_encode_vp9.c   |  26 ++++----
 7 files changed, 208 insertions(+), 127 deletions(-)

Comments

Mark Thompson Aug. 7, 2023, 8:28 p.m. UTC | #1
On 03/08/2023 07:01, fei.w.wang-at-intel.com@ffmpeg.org wrote:
> From: Fei Wang <fei.w.wang@intel.com>
> 
> To support more reference frames from different directions.
> 
> Signed-off-by: Fei Wang <fei.w.wang@intel.com>
> ---
>   libavcodec/vaapi_encode.c       | 112 +++++++++++++++++++++++++-------
>   libavcodec/vaapi_encode.h       |  15 +++--
>   libavcodec/vaapi_encode_h264.c  |  94 +++++++++++++--------------
>   libavcodec/vaapi_encode_h265.c  |  76 +++++++++++++---------
>   libavcodec/vaapi_encode_mpeg2.c |   6 +-
>   libavcodec/vaapi_encode_vp8.c   |   6 +-
>   libavcodec/vaapi_encode_vp9.c   |  26 ++++----
>   7 files changed, 208 insertions(+), 127 deletions(-)

But why?

VAAPI at the top level doesn't care which direction the reference is in, it only cares about the set of reference pictures in the DPB and used as reference.

It's up to the per-codec code to decide how it needs to structure that, like how it builds L0/L1 for H.264 but for VP9 it's just a flat set.

Thanks,

- Mark
diff mbox series

Patch

diff --git a/libavcodec/vaapi_encode.c b/libavcodec/vaapi_encode.c
index c8545cd8db..2604f12b9e 100644
--- a/libavcodec/vaapi_encode.c
+++ b/libavcodec/vaapi_encode.c
@@ -276,21 +276,34 @@  static int vaapi_encode_issue(AVCodecContext *avctx,
     av_log(avctx, AV_LOG_DEBUG, "Issuing encode for pic %"PRId64"/%"PRId64" "
            "as type %s.\n", pic->display_order, pic->encode_order,
            picture_type_name[pic->type]);
-    if (pic->nb_refs == 0) {
+    if (pic->nb_refs[0] == 0 && pic->nb_refs[1] == 0) {
         av_log(avctx, AV_LOG_DEBUG, "No reference pictures.\n");
     } else {
-        av_log(avctx, AV_LOG_DEBUG, "Refers to:");
-        for (i = 0; i < pic->nb_refs; i++) {
+        av_log(avctx, AV_LOG_DEBUG, "L0 refers to");
+        for (i = 0; i < pic->nb_refs[0]; i++) {
             av_log(avctx, AV_LOG_DEBUG, " %"PRId64"/%"PRId64,
-                   pic->refs[i]->display_order, pic->refs[i]->encode_order);
+                   pic->refs[0][i]->display_order, pic->refs[0][i]->encode_order);
         }
         av_log(avctx, AV_LOG_DEBUG, ".\n");
+
+        if (pic->nb_refs[1]) {
+            av_log(avctx, AV_LOG_DEBUG, "L1 refers to");
+            for (i = 0; i < pic->nb_refs[1]; i++) {
+                av_log(avctx, AV_LOG_DEBUG, " %"PRId64"/%"PRId64,
+                       pic->refs[1][i]->display_order, pic->refs[1][i]->encode_order);
+            }
+            av_log(avctx, AV_LOG_DEBUG, ".\n");
+        }
     }
 
     av_assert0(!pic->encode_issued);
-    for (i = 0; i < pic->nb_refs; i++) {
-        av_assert0(pic->refs[i]);
-        av_assert0(pic->refs[i]->encode_issued);
+    for (i = 0; i < pic->nb_refs[0]; i++) {
+        av_assert0(pic->refs[0][i]);
+        av_assert0(pic->refs[0][i]->encode_issued);
+    }
+    for (i = 0; i < pic->nb_refs[1]; i++) {
+        av_assert0(pic->refs[1][i]);
+        av_assert0(pic->refs[1][i]->encode_issued);
     }
 
     av_log(avctx, AV_LOG_DEBUG, "Input surface is %#x.\n", pic->input_surface);
@@ -832,8 +845,12 @@  static void vaapi_encode_add_ref(AVCodecContext *avctx,
 
     if (is_ref) {
         av_assert0(pic != target);
-        av_assert0(pic->nb_refs < MAX_PICTURE_REFERENCES);
-        pic->refs[pic->nb_refs++] = target;
+        av_assert0(pic->nb_refs[0] < MAX_PICTURE_REFERENCES &&
+                   pic->nb_refs[1] < MAX_PICTURE_REFERENCES);
+        if (target->display_order < pic->display_order)
+            pic->refs[0][pic->nb_refs[0]++] = target;
+        else
+            pic->refs[1][pic->nb_refs[1]++] = target;
         ++refs;
     }
 
@@ -862,10 +879,16 @@  static void vaapi_encode_remove_refs(AVCodecContext *avctx,
     if (pic->ref_removed[level])
         return;
 
-    for (i = 0; i < pic->nb_refs; i++) {
-        av_assert0(pic->refs[i]);
-        --pic->refs[i]->ref_count[level];
-        av_assert0(pic->refs[i]->ref_count[level] >= 0);
+    for (i = 0; i < pic->nb_refs[0]; i++) {
+        av_assert0(pic->refs[0][i]);
+        --pic->refs[0][i]->ref_count[level];
+        av_assert0(pic->refs[0][i]->ref_count[level] >= 0);
+    }
+
+    for (i = 0; i < pic->nb_refs[1]; i++) {
+        av_assert0(pic->refs[1][i]);
+        --pic->refs[1][i]->ref_count[level];
+        av_assert0(pic->refs[1][i]->ref_count[level] >= 0);
     }
 
     for (i = 0; i < pic->nb_dpb_pics; i++) {
@@ -910,7 +933,7 @@  static void vaapi_encode_set_b_pictures(AVCodecContext *avctx,
             vaapi_encode_add_ref(avctx, pic, end,   1, 1, 0);
             vaapi_encode_add_ref(avctx, pic, prev,  0, 0, 1);
 
-            for (ref = end->refs[1]; ref; ref = ref->refs[1])
+            for (ref = end->refs[1][0]; ref; ref = ref->refs[1][0])
                 vaapi_encode_add_ref(avctx, pic, ref, 0, 1, 0);
         }
         *last = prev;
@@ -933,7 +956,7 @@  static void vaapi_encode_set_b_pictures(AVCodecContext *avctx,
         vaapi_encode_add_ref(avctx, pic, end,   1, 1, 0);
         vaapi_encode_add_ref(avctx, pic, prev,  0, 0, 1);
 
-        for (ref = end->refs[1]; ref; ref = ref->refs[1])
+        for (ref = end->refs[1][0]; ref; ref = ref->refs[1][0])
             vaapi_encode_add_ref(avctx, pic, ref, 0, 1, 0);
 
         if (i > 1)
@@ -947,11 +970,44 @@  static void vaapi_encode_set_b_pictures(AVCodecContext *avctx,
     }
 }
 
+static void vaapi_encode_add_next_prev(AVCodecContext *avctx,
+                                       VAAPIEncodePicture *pic)
+{
+    VAAPIEncodeContext *ctx = avctx->priv_data;
+    int i;
+
+    if (!pic)
+        return;
+
+    if (pic->type == PICTURE_TYPE_IDR) {
+        for (i = 0; i < ctx->nb_next_prev; i++) {
+            --ctx->next_prev[i]->ref_count[0];
+            ctx->next_prev[i] = NULL;
+        }
+        ctx->next_prev[0] = pic;
+        ++pic->ref_count[0];
+        ctx->nb_next_prev = 1;
+
+        return;
+    }
+
+    if (ctx->nb_next_prev < MAX_PICTURE_REFERENCES) {
+        ctx->next_prev[ctx->nb_next_prev++] = pic;
+        ++pic->ref_count[0];
+    } else {
+        --ctx->next_prev[0]->ref_count[0];
+        for (i = 0; i < MAX_PICTURE_REFERENCES - 1; i++)
+            ctx->next_prev[i] = ctx->next_prev[i + 1];
+        ctx->next_prev[i] = pic;
+        ++pic->ref_count[0];
+    }
+}
+
 static int vaapi_encode_pick_next(AVCodecContext *avctx,
                                   VAAPIEncodePicture **pic_out)
 {
     VAAPIEncodeContext *ctx = avctx->priv_data;
-    VAAPIEncodePicture *pic = NULL, *next, *start;
+    VAAPIEncodePicture *pic = NULL, *prev = NULL, *next, *start;
     int i, b_counter, closed_gop_end;
 
     // If there are any B-frames already queued, the next one to encode
@@ -962,11 +1018,18 @@  static int vaapi_encode_pick_next(AVCodecContext *avctx,
             continue;
         if (pic->type != PICTURE_TYPE_B)
             continue;
-        for (i = 0; i < pic->nb_refs; i++) {
-            if (!pic->refs[i]->encode_issued)
+        for (i = 0; i < pic->nb_refs[0]; i++) {
+            if (!pic->refs[0][i]->encode_issued)
                 break;
         }
-        if (i == pic->nb_refs)
+        if (i != pic->nb_refs[0])
+            continue;
+
+        for (i = 0; i < pic->nb_refs[1]; i++) {
+            if (!pic->refs[1][i]->encode_issued)
+                break;
+        }
+        if (i == pic->nb_refs[1])
             break;
     }
 
@@ -1068,18 +1131,17 @@  static int vaapi_encode_pick_next(AVCodecContext *avctx,
         vaapi_encode_add_ref(avctx, pic, start,
                              pic->type == PICTURE_TYPE_P,
                              b_counter > 0, 0);
-        vaapi_encode_add_ref(avctx, pic, ctx->next_prev, 0, 0, 1);
+        vaapi_encode_add_ref(avctx, pic, ctx->next_prev[ctx->nb_next_prev - 1], 0, 0, 1);
     }
-    if (ctx->next_prev)
-        --ctx->next_prev->ref_count[0];
 
     if (b_counter > 0) {
         vaapi_encode_set_b_pictures(avctx, start, pic, pic, 1,
-                                    &ctx->next_prev);
+                                    &prev);
     } else {
-        ctx->next_prev = pic;
+        prev = pic;
     }
-    ++ctx->next_prev->ref_count[0];
+    vaapi_encode_add_next_prev(avctx, prev);
+
     return 0;
 }
 
diff --git a/libavcodec/vaapi_encode.h b/libavcodec/vaapi_encode.h
index a1e639f56b..d5452a37b3 100644
--- a/libavcodec/vaapi_encode.h
+++ b/libavcodec/vaapi_encode.h
@@ -49,6 +49,7 @@  enum {
     // A.4.1: table A.6 allows at most 20 tile columns for any level.
     MAX_TILE_COLS          = 20,
     MAX_ASYNC_DEPTH        = 64,
+    MAX_REFERENCE_LIST_NUM = 2,
 };
 
 extern const AVCodecHWConfigInternal *const ff_vaapi_encode_hw_configs[];
@@ -116,10 +117,11 @@  typedef struct VAAPIEncodePicture {
     // but not if it isn't.
     int                     nb_dpb_pics;
     struct VAAPIEncodePicture *dpb[MAX_DPB_SIZE];
-    // The reference pictures used in decoding this picture.  If they are
-    // used by later pictures they will also appear in the DPB.
-    int                     nb_refs;
-    struct VAAPIEncodePicture *refs[MAX_PICTURE_REFERENCES];
+    // The reference pictures used in decoding this picture. If they are
+    // used by later pictures they will also appear in the DPB. ref[0][] for
+    // previous reference frames. ref[1][] for future reference frames.
+    int                     nb_refs[MAX_REFERENCE_LIST_NUM];
+    struct VAAPIEncodePicture *refs[MAX_REFERENCE_LIST_NUM][MAX_PICTURE_REFERENCES];
     // The previous reference picture in encode order.  Must be in at least
     // one of the reference list and DPB list.
     struct VAAPIEncodePicture *prev;
@@ -290,8 +292,9 @@  typedef struct VAAPIEncodeContext {
     // Current encoding window, in display (input) order.
     VAAPIEncodePicture *pic_start, *pic_end;
     // The next picture to use as the previous reference picture in
-    // encoding order.
-    VAAPIEncodePicture *next_prev;
+    // encoding order. Order from small to large in encoding order.
+    VAAPIEncodePicture *next_prev[MAX_PICTURE_REFERENCES];
+    int                 nb_next_prev;
 
     // Next input order index (display order).
     int64_t         input_order;
diff --git a/libavcodec/vaapi_encode_h264.c b/libavcodec/vaapi_encode_h264.c
index 9ad017d540..11283f0bf7 100644
--- a/libavcodec/vaapi_encode_h264.c
+++ b/libavcodec/vaapi_encode_h264.c
@@ -628,7 +628,7 @@  static int vaapi_encode_h264_init_picture_params(AVCodecContext *avctx,
     VAAPIEncodePicture              *prev = pic->prev;
     VAAPIEncodeH264Picture         *hprev = prev ? prev->priv_data : NULL;
     VAEncPictureParameterBufferH264 *vpic = pic->codec_picture_params;
-    int i;
+    int i, j = 0;
 
     if (pic->type == PICTURE_TYPE_IDR) {
         av_assert0(pic->display_order == pic->encode_order);
@@ -729,24 +729,26 @@  static int vaapi_encode_h264_init_picture_params(AVCodecContext *avctx,
         .TopFieldOrderCnt    = hpic->pic_order_cnt,
         .BottomFieldOrderCnt = hpic->pic_order_cnt,
     };
-
-    for (i = 0; i < pic->nb_refs; i++) {
-        VAAPIEncodePicture      *ref = pic->refs[i];
-        VAAPIEncodeH264Picture *href;
-
-        av_assert0(ref && ref->encode_order < pic->encode_order);
-        href = ref->priv_data;
-
-        vpic->ReferenceFrames[i] = (VAPictureH264) {
-            .picture_id          = ref->recon_surface,
-            .frame_idx           = href->frame_num,
-            .flags               = VA_PICTURE_H264_SHORT_TERM_REFERENCE,
-            .TopFieldOrderCnt    = href->pic_order_cnt,
-            .BottomFieldOrderCnt = href->pic_order_cnt,
-        };
+    for (int k = 0; k < MAX_REFERENCE_LIST_NUM; k++) {
+        for (i = 0; i < pic->nb_refs[k]; i++) {
+            VAAPIEncodePicture      *ref = pic->refs[k][i];
+            VAAPIEncodeH264Picture *href;
+
+            av_assert0(ref && ref->encode_order < pic->encode_order);
+            href = ref->priv_data;
+
+            vpic->ReferenceFrames[j++] = (VAPictureH264) {
+                .picture_id          = ref->recon_surface,
+                .frame_idx           = href->frame_num,
+                .flags               = VA_PICTURE_H264_SHORT_TERM_REFERENCE,
+                .TopFieldOrderCnt    = href->pic_order_cnt,
+                .BottomFieldOrderCnt = href->pic_order_cnt,
+            };
+        }
     }
-    for (; i < FF_ARRAY_ELEMS(vpic->ReferenceFrames); i++) {
-        vpic->ReferenceFrames[i] = (VAPictureH264) {
+
+    for (; j < FF_ARRAY_ELEMS(vpic->ReferenceFrames); j++) {
+        vpic->ReferenceFrames[j] = (VAPictureH264) {
             .picture_id = VA_INVALID_ID,
             .flags      = VA_PICTURE_H264_INVALID,
         };
@@ -948,17 +950,17 @@  static int vaapi_encode_h264_init_slice_params(AVCodecContext *avctx,
 
         if (pic->type == PICTURE_TYPE_P) {
             int need_rplm = 0;
-            for (i = 0; i < pic->nb_refs; i++) {
-                av_assert0(pic->refs[i]);
-                if (pic->refs[i] != def_l0[i])
+            for (i = 0; i < pic->nb_refs[0]; i++) {
+                av_assert0(pic->refs[0][i]);
+                if (pic->refs[0][i] != def_l0[i])
                     need_rplm = 1;
             }
 
             sh->ref_pic_list_modification_flag_l0 = need_rplm;
             if (need_rplm) {
                 int pic_num = hpic->frame_num;
-                for (i = 0; i < pic->nb_refs; i++) {
-                    href = pic->refs[i]->priv_data;
+                for (i = 0; i < pic->nb_refs[0]; i++) {
+                    href = pic->refs[0][i]->priv_data;
                     av_assert0(href->frame_num != pic_num);
                     if (href->frame_num < pic_num) {
                         sh->rplm_l0[i].modification_of_pic_nums_idc = 0;
@@ -977,28 +979,29 @@  static int vaapi_encode_h264_init_slice_params(AVCodecContext *avctx,
         } else {
             int need_rplm_l0 = 0, need_rplm_l1 = 0;
             int n0 = 0, n1 = 0;
-            for (i = 0; i < pic->nb_refs; i++) {
-                av_assert0(pic->refs[i]);
-                href = pic->refs[i]->priv_data;
-                av_assert0(href->pic_order_cnt != hpic->pic_order_cnt);
-                if (href->pic_order_cnt < hpic->pic_order_cnt) {
-                    if (pic->refs[i] != def_l0[n0])
-                        need_rplm_l0 = 1;
-                    ++n0;
-                } else {
-                    if (pic->refs[i] != def_l1[n1])
-                        need_rplm_l1 = 1;
-                    ++n1;
-                }
+            for (i = 0; i < pic->nb_refs[0]; i++) {
+                av_assert0(pic->refs[0][i]);
+                href = pic->refs[0][i]->priv_data;
+                av_assert0(href->pic_order_cnt < hpic->pic_order_cnt);
+                if (pic->refs[0][i] != def_l0[n0])
+                    need_rplm_l0 = 1;
+                ++n0;
+            }
+
+            for (i = 0; i < pic->nb_refs[1]; i++) {
+                av_assert0(pic->refs[1][i]);
+                href = pic->refs[1][i]->priv_data;
+                av_assert0(href->pic_order_cnt > hpic->pic_order_cnt);
+                if (pic->refs[1][i] != def_l1[n1])
+                    need_rplm_l1 = 1;
+                ++n1;
             }
 
             sh->ref_pic_list_modification_flag_l0 = need_rplm_l0;
             if (need_rplm_l0) {
                 int pic_num = hpic->frame_num;
-                for (i = j = 0; i < pic->nb_refs; i++) {
-                    href = pic->refs[i]->priv_data;
-                    if (href->pic_order_cnt > hpic->pic_order_cnt)
-                        continue;
+                for (i = j = 0; i < pic->nb_refs[0]; i++) {
+                    href = pic->refs[0][i]->priv_data;
                     av_assert0(href->frame_num != pic_num);
                     if (href->frame_num < pic_num) {
                         sh->rplm_l0[j].modification_of_pic_nums_idc = 0;
@@ -1019,10 +1022,8 @@  static int vaapi_encode_h264_init_slice_params(AVCodecContext *avctx,
             sh->ref_pic_list_modification_flag_l1 = need_rplm_l1;
             if (need_rplm_l1) {
                 int pic_num = hpic->frame_num;
-                for (i = j = 0; i < pic->nb_refs; i++) {
-                    href = pic->refs[i]->priv_data;
-                    if (href->pic_order_cnt < hpic->pic_order_cnt)
-                        continue;
+                for (i = j = 0; i < pic->nb_refs[1]; i++) {
+                    href = pic->refs[1][i]->priv_data;
                     av_assert0(href->frame_num != pic_num);
                     if (href->frame_num < pic_num) {
                         sh->rplm_l1[j].modification_of_pic_nums_idc = 0;
@@ -1062,14 +1063,13 @@  static int vaapi_encode_h264_init_slice_params(AVCodecContext *avctx,
         vslice->RefPicList1[i].flags      = VA_PICTURE_H264_INVALID;
     }
 
-    av_assert0(pic->nb_refs <= 2);
-    if (pic->nb_refs >= 1) {
+    if (pic->nb_refs[0]) {
         // Backward reference for P- or B-frame.
         av_assert0(pic->type == PICTURE_TYPE_P ||
                    pic->type == PICTURE_TYPE_B);
         vslice->RefPicList0[0] = vpic->ReferenceFrames[0];
     }
-    if (pic->nb_refs >= 2) {
+    if (pic->nb_refs[1]) {
         // Forward reference for B-frame.
         av_assert0(pic->type == PICTURE_TYPE_B);
         vslice->RefPicList1[0] = vpic->ReferenceFrames[1];
diff --git a/libavcodec/vaapi_encode_h265.c b/libavcodec/vaapi_encode_h265.c
index aa7e532f9a..f015e6f3ce 100644
--- a/libavcodec/vaapi_encode_h265.c
+++ b/libavcodec/vaapi_encode_h265.c
@@ -764,7 +764,7 @@  static int vaapi_encode_h265_init_picture_params(AVCodecContext *avctx,
     VAAPIEncodePicture              *prev = pic->prev;
     VAAPIEncodeH265Picture         *hprev = prev ? prev->priv_data : NULL;
     VAEncPictureParameterBufferHEVC *vpic = pic->codec_picture_params;
-    int i;
+    int i, j = 0;
 
     if (pic->type == PICTURE_TYPE_IDR) {
         av_assert0(pic->display_order == pic->encode_order);
@@ -789,8 +789,8 @@  static int vaapi_encode_h265_init_picture_params(AVCodecContext *avctx,
             hpic->pic_type       = 1;
         } else {
             VAAPIEncodePicture *irap_ref;
-            av_assert0(pic->refs[0] && pic->refs[1]);
-            for (irap_ref = pic; irap_ref; irap_ref = irap_ref->refs[1]) {
+            av_assert0(pic->refs[0][0] && pic->refs[1][0]);
+            for (irap_ref = pic; irap_ref; irap_ref = irap_ref->refs[1][0]) {
                 if (irap_ref->type == PICTURE_TYPE_I)
                     break;
             }
@@ -915,24 +915,27 @@  static int vaapi_encode_h265_init_picture_params(AVCodecContext *avctx,
         .flags         = 0,
     };
 
-    for (i = 0; i < pic->nb_refs; i++) {
-        VAAPIEncodePicture      *ref = pic->refs[i];
-        VAAPIEncodeH265Picture *href;
-
-        av_assert0(ref && ref->encode_order < pic->encode_order);
-        href = ref->priv_data;
-
-        vpic->reference_frames[i] = (VAPictureHEVC) {
-            .picture_id    = ref->recon_surface,
-            .pic_order_cnt = href->pic_order_cnt,
-            .flags = (ref->display_order < pic->display_order ?
-                      VA_PICTURE_HEVC_RPS_ST_CURR_BEFORE : 0) |
-                     (ref->display_order > pic->display_order ?
-                      VA_PICTURE_HEVC_RPS_ST_CURR_AFTER  : 0),
-        };
+    for (int k = 0; k < MAX_REFERENCE_LIST_NUM; k++) {
+        for (i = 0; i < pic->nb_refs[k]; i++) {
+            VAAPIEncodePicture      *ref = pic->refs[k][i];
+            VAAPIEncodeH265Picture *href;
+
+            av_assert0(ref && ref->encode_order < pic->encode_order);
+            href = ref->priv_data;
+
+            vpic->reference_frames[j++] = (VAPictureHEVC) {
+                .picture_id    = ref->recon_surface,
+                .pic_order_cnt = href->pic_order_cnt,
+                .flags = (ref->display_order < pic->display_order ?
+                          VA_PICTURE_HEVC_RPS_ST_CURR_BEFORE : 0) |
+                          (ref->display_order > pic->display_order ?
+                          VA_PICTURE_HEVC_RPS_ST_CURR_AFTER  : 0),
+            };
+        }
     }
-    for (; i < FF_ARRAY_ELEMS(vpic->reference_frames); i++) {
-        vpic->reference_frames[i] = (VAPictureHEVC) {
+
+    for (; j < FF_ARRAY_ELEMS(vpic->reference_frames); j++) {
+        vpic->reference_frames[j] = (VAPictureHEVC) {
             .picture_id = VA_INVALID_ID,
             .flags      = VA_PICTURE_HEVC_INVALID,
         };
@@ -1016,21 +1019,33 @@  static int vaapi_encode_h265_init_slice_params(AVCodecContext *avctx,
         memset(rps, 0, sizeof(*rps));
 
         rps_pics = 0;
-        for (i = 0; i < pic->nb_refs; i++) {
-            strp = pic->refs[i]->priv_data;
-            rps_poc[rps_pics]  = strp->pic_order_cnt;
-            rps_used[rps_pics] = 1;
-            ++rps_pics;
+        for (i = 0; i < MAX_REFERENCE_LIST_NUM; i++) {
+            for (j = 0; j < pic->nb_refs[i]; j++) {
+                strp = pic->refs[i][j]->priv_data;
+                rps_poc[rps_pics]  = strp->pic_order_cnt;
+                rps_used[rps_pics] = 1;
+                ++rps_pics;
+            }
         }
+
         for (i = 0; i < pic->nb_dpb_pics; i++) {
             if (pic->dpb[i] == pic)
                 continue;
-            for (j = 0; j < pic->nb_refs; j++) {
-                if (pic->dpb[i] == pic->refs[j])
+
+            for (j = 0; j < pic->nb_refs[0]; j++) {
+                if (pic->dpb[i] == pic->refs[0][j])
+                    break;
+            }
+            if (j < pic->nb_refs[0])
+                continue;
+
+            for (j = 0; j < pic->nb_refs[1]; j++) {
+                if (pic->dpb[i] == pic->refs[1][j])
                     break;
             }
-            if (j < pic->nb_refs)
+            if (j < pic->nb_refs[1])
                 continue;
+
             strp = pic->dpb[i]->priv_data;
             rps_poc[rps_pics]  = strp->pic_order_cnt;
             rps_used[rps_pics] = 0;
@@ -1155,8 +1170,7 @@  static int vaapi_encode_h265_init_slice_params(AVCodecContext *avctx,
         vslice->ref_pic_list1[i].flags      = VA_PICTURE_HEVC_INVALID;
     }
 
-    av_assert0(pic->nb_refs <= 2);
-    if (pic->nb_refs >= 1) {
+    if (pic->nb_refs[0]) {
         // Backward reference for P- or B-frame.
         av_assert0(pic->type == PICTURE_TYPE_P ||
                    pic->type == PICTURE_TYPE_B);
@@ -1165,7 +1179,7 @@  static int vaapi_encode_h265_init_slice_params(AVCodecContext *avctx,
             // Reference for GPB B-frame, L0 == L1
             vslice->ref_pic_list1[0] = vpic->reference_frames[0];
     }
-    if (pic->nb_refs >= 2) {
+    if (pic->nb_refs[1]) {
         // Forward reference for B-frame.
         av_assert0(pic->type == PICTURE_TYPE_B);
         vslice->ref_pic_list1[0] = vpic->reference_frames[1];
diff --git a/libavcodec/vaapi_encode_mpeg2.c b/libavcodec/vaapi_encode_mpeg2.c
index 9261d19a83..8000ae19cb 100644
--- a/libavcodec/vaapi_encode_mpeg2.c
+++ b/libavcodec/vaapi_encode_mpeg2.c
@@ -458,12 +458,12 @@  static int vaapi_encode_mpeg2_init_picture_params(AVCodecContext *avctx,
         break;
     case PICTURE_TYPE_P:
         vpic->picture_type = VAEncPictureTypePredictive;
-        vpic->forward_reference_picture = pic->refs[0]->recon_surface;
+        vpic->forward_reference_picture = pic->refs[0][0]->recon_surface;
         break;
     case PICTURE_TYPE_B:
         vpic->picture_type = VAEncPictureTypeBidirectional;
-        vpic->forward_reference_picture  = pic->refs[0]->recon_surface;
-        vpic->backward_reference_picture = pic->refs[1]->recon_surface;
+        vpic->forward_reference_picture  = pic->refs[0][0]->recon_surface;
+        vpic->backward_reference_picture = pic->refs[1][0]->recon_surface;
         break;
     default:
         av_assert0(0 && "invalid picture type");
diff --git a/libavcodec/vaapi_encode_vp8.c b/libavcodec/vaapi_encode_vp8.c
index ae6a8d313c..b96e4cd65f 100644
--- a/libavcodec/vaapi_encode_vp8.c
+++ b/libavcodec/vaapi_encode_vp8.c
@@ -86,7 +86,7 @@  static int vaapi_encode_vp8_init_picture_params(AVCodecContext *avctx,
     switch (pic->type) {
     case PICTURE_TYPE_IDR:
     case PICTURE_TYPE_I:
-        av_assert0(pic->nb_refs == 0);
+        av_assert0(pic->nb_refs[0] == 0 && pic->nb_refs[1] == 0);
         vpic->ref_flags.bits.force_kf = 1;
         vpic->ref_last_frame =
         vpic->ref_gf_frame   =
@@ -94,14 +94,14 @@  static int vaapi_encode_vp8_init_picture_params(AVCodecContext *avctx,
             VA_INVALID_SURFACE;
         break;
     case PICTURE_TYPE_P:
-        av_assert0(pic->nb_refs == 1);
+        av_assert0(!pic->nb_refs[1]);
         vpic->ref_flags.bits.no_ref_last = 0;
         vpic->ref_flags.bits.no_ref_gf   = 1;
         vpic->ref_flags.bits.no_ref_arf  = 1;
         vpic->ref_last_frame =
         vpic->ref_gf_frame   =
         vpic->ref_arf_frame  =
-            pic->refs[0]->recon_surface;
+            pic->refs[0][0]->recon_surface;
         break;
     default:
         av_assert0(0 && "invalid picture type");
diff --git a/libavcodec/vaapi_encode_vp9.c b/libavcodec/vaapi_encode_vp9.c
index af1353cea8..d9e8ba6a08 100644
--- a/libavcodec/vaapi_encode_vp9.c
+++ b/libavcodec/vaapi_encode_vp9.c
@@ -96,15 +96,15 @@  static int vaapi_encode_vp9_init_picture_params(AVCodecContext *avctx,
 
     switch (pic->type) {
     case PICTURE_TYPE_IDR:
-        av_assert0(pic->nb_refs == 0);
+        av_assert0(pic->nb_refs[0] == 0 && pic->nb_refs[1] == 0);
         vpic->ref_flags.bits.force_kf = 1;
         vpic->refresh_frame_flags = 0xff;
         hpic->slot = 0;
         break;
     case PICTURE_TYPE_P:
-        av_assert0(pic->nb_refs == 1);
+        av_assert0(!pic->nb_refs[1]);
         {
-            VAAPIEncodeVP9Picture *href = pic->refs[0]->priv_data;
+            VAAPIEncodeVP9Picture *href = pic->refs[0][0]->priv_data;
             av_assert0(href->slot == 0 || href->slot == 1);
 
             if (ctx->max_b_depth > 0) {
@@ -120,10 +120,10 @@  static int vaapi_encode_vp9_init_picture_params(AVCodecContext *avctx,
         }
         break;
     case PICTURE_TYPE_B:
-        av_assert0(pic->nb_refs == 2);
+        av_assert0(pic->nb_refs[0] && pic->nb_refs[1]);
         {
-            VAAPIEncodeVP9Picture *href0 = pic->refs[0]->priv_data,
-                                  *href1 = pic->refs[1]->priv_data;
+            VAAPIEncodeVP9Picture *href0 = pic->refs[0][0]->priv_data,
+                                  *href1 = pic->refs[1][0]->priv_data;
             av_assert0(href0->slot < pic->b_depth + 1 &&
                        href1->slot < pic->b_depth + 1);
 
@@ -157,12 +157,14 @@  static int vaapi_encode_vp9_init_picture_params(AVCodecContext *avctx,
     for (i = 0; i < FF_ARRAY_ELEMS(vpic->reference_frames); i++)
         vpic->reference_frames[i] = VA_INVALID_SURFACE;
 
-    for (i = 0; i < pic->nb_refs; i++) {
-        VAAPIEncodePicture *ref_pic = pic->refs[i];
-        int slot;
-        slot = ((VAAPIEncodeVP9Picture*)ref_pic->priv_data)->slot;
-        av_assert0(vpic->reference_frames[slot] == VA_INVALID_SURFACE);
-        vpic->reference_frames[slot] = ref_pic->recon_surface;
+    for (i = 0; i < MAX_REFERENCE_LIST_NUM; i++) {
+        for (int j = 0; j < pic->nb_refs[i]; j++) {
+            VAAPIEncodePicture *ref_pic = pic->refs[i][j];
+            int slot;
+            slot = ((VAAPIEncodeVP9Picture*)ref_pic->priv_data)->slot;
+            av_assert0(vpic->reference_frames[slot] == VA_INVALID_SURFACE);
+            vpic->reference_frames[slot] = ref_pic->recon_surface;
+        }
     }
 
     vpic->pic_flags.bits.frame_type = (pic->type != PICTURE_TYPE_IDR);