diff mbox series

[FFmpeg-devel,v1,5/6] lavc/vaapi_encode: Separate reference frame into previous/future list

Message ID 20230710073941.425658-5-fei.w.wang@intel.com
State New
Headers show
Series [FFmpeg-devel,v1,1/6] avcodec/cbs_av1: Add tx mode enum values | expand

Checks

Context Check Description
yinshiyou/make_loongarch64 success Make finished
yinshiyou/make_fate_loongarch64 success Make fate finished
andriy/make_x86 success Make finished
andriy/make_fate_x86 success Make fate finished

Commit Message

Wang, Fei W July 10, 2023, 7:39 a.m. UTC
To support more reference frames from different directions.

Signed-off-by: Fei Wang <fei.w.wang@intel.com>
---
 libavcodec/vaapi_encode.c       | 112 +++++++++++++++++++++++++-------
 libavcodec/vaapi_encode.h       |  15 +++--
 libavcodec/vaapi_encode_h264.c  |  94 +++++++++++++--------------
 libavcodec/vaapi_encode_h265.c  |  76 +++++++++++++---------
 libavcodec/vaapi_encode_mpeg2.c |   6 +-
 libavcodec/vaapi_encode_vp8.c   |   6 +-
 libavcodec/vaapi_encode_vp9.c   |  26 ++++----
 7 files changed, 208 insertions(+), 127 deletions(-)
diff mbox series

Patch

diff --git a/libavcodec/vaapi_encode.c b/libavcodec/vaapi_encode.c
index c8545cd8db..2604f12b9e 100644
--- a/libavcodec/vaapi_encode.c
+++ b/libavcodec/vaapi_encode.c
@@ -276,21 +276,34 @@  static int vaapi_encode_issue(AVCodecContext *avctx,
     av_log(avctx, AV_LOG_DEBUG, "Issuing encode for pic %"PRId64"/%"PRId64" "
            "as type %s.\n", pic->display_order, pic->encode_order,
            picture_type_name[pic->type]);
-    if (pic->nb_refs == 0) {
+    if (pic->nb_refs[0] == 0 && pic->nb_refs[1] == 0) {
         av_log(avctx, AV_LOG_DEBUG, "No reference pictures.\n");
     } else {
-        av_log(avctx, AV_LOG_DEBUG, "Refers to:");
-        for (i = 0; i < pic->nb_refs; i++) {
+        av_log(avctx, AV_LOG_DEBUG, "L0 refers to");
+        for (i = 0; i < pic->nb_refs[0]; i++) {
             av_log(avctx, AV_LOG_DEBUG, " %"PRId64"/%"PRId64,
-                   pic->refs[i]->display_order, pic->refs[i]->encode_order);
+                   pic->refs[0][i]->display_order, pic->refs[0][i]->encode_order);
         }
         av_log(avctx, AV_LOG_DEBUG, ".\n");
+
+        if (pic->nb_refs[1]) {
+            av_log(avctx, AV_LOG_DEBUG, "L1 refers to");
+            for (i = 0; i < pic->nb_refs[1]; i++) {
+                av_log(avctx, AV_LOG_DEBUG, " %"PRId64"/%"PRId64,
+                       pic->refs[1][i]->display_order, pic->refs[1][i]->encode_order);
+            }
+            av_log(avctx, AV_LOG_DEBUG, ".\n");
+        }
     }
 
     av_assert0(!pic->encode_issued);
-    for (i = 0; i < pic->nb_refs; i++) {
-        av_assert0(pic->refs[i]);
-        av_assert0(pic->refs[i]->encode_issued);
+    for (i = 0; i < pic->nb_refs[0]; i++) {
+        av_assert0(pic->refs[0][i]);
+        av_assert0(pic->refs[0][i]->encode_issued);
+    }
+    for (i = 0; i < pic->nb_refs[1]; i++) {
+        av_assert0(pic->refs[1][i]);
+        av_assert0(pic->refs[1][i]->encode_issued);
     }
 
     av_log(avctx, AV_LOG_DEBUG, "Input surface is %#x.\n", pic->input_surface);
@@ -832,8 +845,12 @@  static void vaapi_encode_add_ref(AVCodecContext *avctx,
 
     if (is_ref) {
         av_assert0(pic != target);
-        av_assert0(pic->nb_refs < MAX_PICTURE_REFERENCES);
-        pic->refs[pic->nb_refs++] = target;
+        av_assert0(pic->nb_refs[0] < MAX_PICTURE_REFERENCES &&
+                   pic->nb_refs[1] < MAX_PICTURE_REFERENCES);
+        if (target->display_order < pic->display_order)
+            pic->refs[0][pic->nb_refs[0]++] = target;
+        else
+            pic->refs[1][pic->nb_refs[1]++] = target;
         ++refs;
     }
 
@@ -862,10 +879,16 @@  static void vaapi_encode_remove_refs(AVCodecContext *avctx,
     if (pic->ref_removed[level])
         return;
 
-    for (i = 0; i < pic->nb_refs; i++) {
-        av_assert0(pic->refs[i]);
-        --pic->refs[i]->ref_count[level];
-        av_assert0(pic->refs[i]->ref_count[level] >= 0);
+    for (i = 0; i < pic->nb_refs[0]; i++) {
+        av_assert0(pic->refs[0][i]);
+        --pic->refs[0][i]->ref_count[level];
+        av_assert0(pic->refs[0][i]->ref_count[level] >= 0);
+    }
+
+    for (i = 0; i < pic->nb_refs[1]; i++) {
+        av_assert0(pic->refs[1][i]);
+        --pic->refs[1][i]->ref_count[level];
+        av_assert0(pic->refs[1][i]->ref_count[level] >= 0);
     }
 
     for (i = 0; i < pic->nb_dpb_pics; i++) {
@@ -910,7 +933,7 @@  static void vaapi_encode_set_b_pictures(AVCodecContext *avctx,
             vaapi_encode_add_ref(avctx, pic, end,   1, 1, 0);
             vaapi_encode_add_ref(avctx, pic, prev,  0, 0, 1);
 
-            for (ref = end->refs[1]; ref; ref = ref->refs[1])
+            for (ref = end->refs[1][0]; ref; ref = ref->refs[1][0])
                 vaapi_encode_add_ref(avctx, pic, ref, 0, 1, 0);
         }
         *last = prev;
@@ -933,7 +956,7 @@  static void vaapi_encode_set_b_pictures(AVCodecContext *avctx,
         vaapi_encode_add_ref(avctx, pic, end,   1, 1, 0);
         vaapi_encode_add_ref(avctx, pic, prev,  0, 0, 1);
 
-        for (ref = end->refs[1]; ref; ref = ref->refs[1])
+        for (ref = end->refs[1][0]; ref; ref = ref->refs[1][0])
             vaapi_encode_add_ref(avctx, pic, ref, 0, 1, 0);
 
         if (i > 1)
@@ -947,11 +970,44 @@  static void vaapi_encode_set_b_pictures(AVCodecContext *avctx,
     }
 }
 
+static void vaapi_encode_add_next_prev(AVCodecContext *avctx,
+                                       VAAPIEncodePicture *pic)
+{
+    VAAPIEncodeContext *ctx = avctx->priv_data;
+    int i;
+
+    if (!pic)
+        return;
+
+    if (pic->type == PICTURE_TYPE_IDR) {
+        for (i = 0; i < ctx->nb_next_prev; i++) {
+            --ctx->next_prev[i]->ref_count[0];
+            ctx->next_prev[i] = NULL;
+        }
+        ctx->next_prev[0] = pic;
+        ++pic->ref_count[0];
+        ctx->nb_next_prev = 1;
+
+        return;
+    }
+
+    if (ctx->nb_next_prev < MAX_PICTURE_REFERENCES) {
+        ctx->next_prev[ctx->nb_next_prev++] = pic;
+        ++pic->ref_count[0];
+    } else {
+        --ctx->next_prev[0]->ref_count[0];
+        for (i = 0; i < MAX_PICTURE_REFERENCES - 1; i++)
+            ctx->next_prev[i] = ctx->next_prev[i + 1];
+        ctx->next_prev[i] = pic;
+        ++pic->ref_count[0];
+    }
+}
+
 static int vaapi_encode_pick_next(AVCodecContext *avctx,
                                   VAAPIEncodePicture **pic_out)
 {
     VAAPIEncodeContext *ctx = avctx->priv_data;
-    VAAPIEncodePicture *pic = NULL, *next, *start;
+    VAAPIEncodePicture *pic = NULL, *prev = NULL, *next, *start;
     int i, b_counter, closed_gop_end;
 
     // If there are any B-frames already queued, the next one to encode
@@ -962,11 +1018,18 @@  static int vaapi_encode_pick_next(AVCodecContext *avctx,
             continue;
         if (pic->type != PICTURE_TYPE_B)
             continue;
-        for (i = 0; i < pic->nb_refs; i++) {
-            if (!pic->refs[i]->encode_issued)
+        for (i = 0; i < pic->nb_refs[0]; i++) {
+            if (!pic->refs[0][i]->encode_issued)
                 break;
         }
-        if (i == pic->nb_refs)
+        if (i != pic->nb_refs[0])
+            continue;
+
+        for (i = 0; i < pic->nb_refs[1]; i++) {
+            if (!pic->refs[1][i]->encode_issued)
+                break;
+        }
+        if (i == pic->nb_refs[1])
             break;
     }
 
@@ -1068,18 +1131,17 @@  static int vaapi_encode_pick_next(AVCodecContext *avctx,
         vaapi_encode_add_ref(avctx, pic, start,
                              pic->type == PICTURE_TYPE_P,
                              b_counter > 0, 0);
-        vaapi_encode_add_ref(avctx, pic, ctx->next_prev, 0, 0, 1);
+        vaapi_encode_add_ref(avctx, pic, ctx->next_prev[ctx->nb_next_prev - 1], 0, 0, 1);
     }
-    if (ctx->next_prev)
-        --ctx->next_prev->ref_count[0];
 
     if (b_counter > 0) {
         vaapi_encode_set_b_pictures(avctx, start, pic, pic, 1,
-                                    &ctx->next_prev);
+                                    &prev);
     } else {
-        ctx->next_prev = pic;
+        prev = pic;
     }
-    ++ctx->next_prev->ref_count[0];
+    vaapi_encode_add_next_prev(avctx, prev);
+
     return 0;
 }
 
diff --git a/libavcodec/vaapi_encode.h b/libavcodec/vaapi_encode.h
index a1e639f56b..d5452a37b3 100644
--- a/libavcodec/vaapi_encode.h
+++ b/libavcodec/vaapi_encode.h
@@ -49,6 +49,7 @@  enum {
     // A.4.1: table A.6 allows at most 20 tile columns for any level.
     MAX_TILE_COLS          = 20,
     MAX_ASYNC_DEPTH        = 64,
+    MAX_REFERENCE_LIST_NUM = 2,
 };
 
 extern const AVCodecHWConfigInternal *const ff_vaapi_encode_hw_configs[];
@@ -116,10 +117,11 @@  typedef struct VAAPIEncodePicture {
     // but not if it isn't.
     int                     nb_dpb_pics;
     struct VAAPIEncodePicture *dpb[MAX_DPB_SIZE];
-    // The reference pictures used in decoding this picture.  If they are
-    // used by later pictures they will also appear in the DPB.
-    int                     nb_refs;
-    struct VAAPIEncodePicture *refs[MAX_PICTURE_REFERENCES];
+    // The reference pictures used in decoding this picture. If they are
+    // used by later pictures they will also appear in the DPB. ref[0][] for
+    // previous reference frames. ref[1][] for future reference frames.
+    int                     nb_refs[MAX_REFERENCE_LIST_NUM];
+    struct VAAPIEncodePicture *refs[MAX_REFERENCE_LIST_NUM][MAX_PICTURE_REFERENCES];
     // The previous reference picture in encode order.  Must be in at least
     // one of the reference list and DPB list.
     struct VAAPIEncodePicture *prev;
@@ -290,8 +292,9 @@  typedef struct VAAPIEncodeContext {
     // Current encoding window, in display (input) order.
     VAAPIEncodePicture *pic_start, *pic_end;
     // The next picture to use as the previous reference picture in
-    // encoding order.
-    VAAPIEncodePicture *next_prev;
+    // encoding order. Order from small to large in encoding order.
+    VAAPIEncodePicture *next_prev[MAX_PICTURE_REFERENCES];
+    int                 nb_next_prev;
 
     // Next input order index (display order).
     int64_t         input_order;
diff --git a/libavcodec/vaapi_encode_h264.c b/libavcodec/vaapi_encode_h264.c
index 9ad017d540..11283f0bf7 100644
--- a/libavcodec/vaapi_encode_h264.c
+++ b/libavcodec/vaapi_encode_h264.c
@@ -628,7 +628,7 @@  static int vaapi_encode_h264_init_picture_params(AVCodecContext *avctx,
     VAAPIEncodePicture              *prev = pic->prev;
     VAAPIEncodeH264Picture         *hprev = prev ? prev->priv_data : NULL;
     VAEncPictureParameterBufferH264 *vpic = pic->codec_picture_params;
-    int i;
+    int i, j = 0;
 
     if (pic->type == PICTURE_TYPE_IDR) {
         av_assert0(pic->display_order == pic->encode_order);
@@ -729,24 +729,26 @@  static int vaapi_encode_h264_init_picture_params(AVCodecContext *avctx,
         .TopFieldOrderCnt    = hpic->pic_order_cnt,
         .BottomFieldOrderCnt = hpic->pic_order_cnt,
     };
-
-    for (i = 0; i < pic->nb_refs; i++) {
-        VAAPIEncodePicture      *ref = pic->refs[i];
-        VAAPIEncodeH264Picture *href;
-
-        av_assert0(ref && ref->encode_order < pic->encode_order);
-        href = ref->priv_data;
-
-        vpic->ReferenceFrames[i] = (VAPictureH264) {
-            .picture_id          = ref->recon_surface,
-            .frame_idx           = href->frame_num,
-            .flags               = VA_PICTURE_H264_SHORT_TERM_REFERENCE,
-            .TopFieldOrderCnt    = href->pic_order_cnt,
-            .BottomFieldOrderCnt = href->pic_order_cnt,
-        };
+    for (int k = 0; k < MAX_REFERENCE_LIST_NUM; k++) {
+        for (i = 0; i < pic->nb_refs[k]; i++) {
+            VAAPIEncodePicture      *ref = pic->refs[k][i];
+            VAAPIEncodeH264Picture *href;
+
+            av_assert0(ref && ref->encode_order < pic->encode_order);
+            href = ref->priv_data;
+
+            vpic->ReferenceFrames[j++] = (VAPictureH264) {
+                .picture_id          = ref->recon_surface,
+                .frame_idx           = href->frame_num,
+                .flags               = VA_PICTURE_H264_SHORT_TERM_REFERENCE,
+                .TopFieldOrderCnt    = href->pic_order_cnt,
+                .BottomFieldOrderCnt = href->pic_order_cnt,
+            };
+        }
     }
-    for (; i < FF_ARRAY_ELEMS(vpic->ReferenceFrames); i++) {
-        vpic->ReferenceFrames[i] = (VAPictureH264) {
+
+    for (; j < FF_ARRAY_ELEMS(vpic->ReferenceFrames); j++) {
+        vpic->ReferenceFrames[j] = (VAPictureH264) {
             .picture_id = VA_INVALID_ID,
             .flags      = VA_PICTURE_H264_INVALID,
         };
@@ -948,17 +950,17 @@  static int vaapi_encode_h264_init_slice_params(AVCodecContext *avctx,
 
         if (pic->type == PICTURE_TYPE_P) {
             int need_rplm = 0;
-            for (i = 0; i < pic->nb_refs; i++) {
-                av_assert0(pic->refs[i]);
-                if (pic->refs[i] != def_l0[i])
+            for (i = 0; i < pic->nb_refs[0]; i++) {
+                av_assert0(pic->refs[0][i]);
+                if (pic->refs[0][i] != def_l0[i])
                     need_rplm = 1;
             }
 
             sh->ref_pic_list_modification_flag_l0 = need_rplm;
             if (need_rplm) {
                 int pic_num = hpic->frame_num;
-                for (i = 0; i < pic->nb_refs; i++) {
-                    href = pic->refs[i]->priv_data;
+                for (i = 0; i < pic->nb_refs[0]; i++) {
+                    href = pic->refs[0][i]->priv_data;
                     av_assert0(href->frame_num != pic_num);
                     if (href->frame_num < pic_num) {
                         sh->rplm_l0[i].modification_of_pic_nums_idc = 0;
@@ -977,28 +979,29 @@  static int vaapi_encode_h264_init_slice_params(AVCodecContext *avctx,
         } else {
             int need_rplm_l0 = 0, need_rplm_l1 = 0;
             int n0 = 0, n1 = 0;
-            for (i = 0; i < pic->nb_refs; i++) {
-                av_assert0(pic->refs[i]);
-                href = pic->refs[i]->priv_data;
-                av_assert0(href->pic_order_cnt != hpic->pic_order_cnt);
-                if (href->pic_order_cnt < hpic->pic_order_cnt) {
-                    if (pic->refs[i] != def_l0[n0])
-                        need_rplm_l0 = 1;
-                    ++n0;
-                } else {
-                    if (pic->refs[i] != def_l1[n1])
-                        need_rplm_l1 = 1;
-                    ++n1;
-                }
+            for (i = 0; i < pic->nb_refs[0]; i++) {
+                av_assert0(pic->refs[0][i]);
+                href = pic->refs[0][i]->priv_data;
+                av_assert0(href->pic_order_cnt < hpic->pic_order_cnt);
+                if (pic->refs[0][i] != def_l0[n0])
+                    need_rplm_l0 = 1;
+                ++n0;
+            }
+
+            for (i = 0; i < pic->nb_refs[1]; i++) {
+                av_assert0(pic->refs[1][i]);
+                href = pic->refs[1][i]->priv_data;
+                av_assert0(href->pic_order_cnt > hpic->pic_order_cnt);
+                if (pic->refs[1][i] != def_l1[n1])
+                    need_rplm_l1 = 1;
+                ++n1;
             }
 
             sh->ref_pic_list_modification_flag_l0 = need_rplm_l0;
             if (need_rplm_l0) {
                 int pic_num = hpic->frame_num;
-                for (i = j = 0; i < pic->nb_refs; i++) {
-                    href = pic->refs[i]->priv_data;
-                    if (href->pic_order_cnt > hpic->pic_order_cnt)
-                        continue;
+                for (i = j = 0; i < pic->nb_refs[0]; i++) {
+                    href = pic->refs[0][i]->priv_data;
                     av_assert0(href->frame_num != pic_num);
                     if (href->frame_num < pic_num) {
                         sh->rplm_l0[j].modification_of_pic_nums_idc = 0;
@@ -1019,10 +1022,8 @@  static int vaapi_encode_h264_init_slice_params(AVCodecContext *avctx,
             sh->ref_pic_list_modification_flag_l1 = need_rplm_l1;
             if (need_rplm_l1) {
                 int pic_num = hpic->frame_num;
-                for (i = j = 0; i < pic->nb_refs; i++) {
-                    href = pic->refs[i]->priv_data;
-                    if (href->pic_order_cnt < hpic->pic_order_cnt)
-                        continue;
+                for (i = j = 0; i < pic->nb_refs[1]; i++) {
+                    href = pic->refs[1][i]->priv_data;
                     av_assert0(href->frame_num != pic_num);
                     if (href->frame_num < pic_num) {
                         sh->rplm_l1[j].modification_of_pic_nums_idc = 0;
@@ -1062,14 +1063,13 @@  static int vaapi_encode_h264_init_slice_params(AVCodecContext *avctx,
         vslice->RefPicList1[i].flags      = VA_PICTURE_H264_INVALID;
     }
 
-    av_assert0(pic->nb_refs <= 2);
-    if (pic->nb_refs >= 1) {
+    if (pic->nb_refs[0]) {
         // Backward reference for P- or B-frame.
         av_assert0(pic->type == PICTURE_TYPE_P ||
                    pic->type == PICTURE_TYPE_B);
         vslice->RefPicList0[0] = vpic->ReferenceFrames[0];
     }
-    if (pic->nb_refs >= 2) {
+    if (pic->nb_refs[1]) {
         // Forward reference for B-frame.
         av_assert0(pic->type == PICTURE_TYPE_B);
         vslice->RefPicList1[0] = vpic->ReferenceFrames[1];
diff --git a/libavcodec/vaapi_encode_h265.c b/libavcodec/vaapi_encode_h265.c
index aa7e532f9a..f015e6f3ce 100644
--- a/libavcodec/vaapi_encode_h265.c
+++ b/libavcodec/vaapi_encode_h265.c
@@ -764,7 +764,7 @@  static int vaapi_encode_h265_init_picture_params(AVCodecContext *avctx,
     VAAPIEncodePicture              *prev = pic->prev;
     VAAPIEncodeH265Picture         *hprev = prev ? prev->priv_data : NULL;
     VAEncPictureParameterBufferHEVC *vpic = pic->codec_picture_params;
-    int i;
+    int i, j = 0;
 
     if (pic->type == PICTURE_TYPE_IDR) {
         av_assert0(pic->display_order == pic->encode_order);
@@ -789,8 +789,8 @@  static int vaapi_encode_h265_init_picture_params(AVCodecContext *avctx,
             hpic->pic_type       = 1;
         } else {
             VAAPIEncodePicture *irap_ref;
-            av_assert0(pic->refs[0] && pic->refs[1]);
-            for (irap_ref = pic; irap_ref; irap_ref = irap_ref->refs[1]) {
+            av_assert0(pic->refs[0][0] && pic->refs[1][0]);
+            for (irap_ref = pic; irap_ref; irap_ref = irap_ref->refs[1][0]) {
                 if (irap_ref->type == PICTURE_TYPE_I)
                     break;
             }
@@ -915,24 +915,27 @@  static int vaapi_encode_h265_init_picture_params(AVCodecContext *avctx,
         .flags         = 0,
     };
 
-    for (i = 0; i < pic->nb_refs; i++) {
-        VAAPIEncodePicture      *ref = pic->refs[i];
-        VAAPIEncodeH265Picture *href;
-
-        av_assert0(ref && ref->encode_order < pic->encode_order);
-        href = ref->priv_data;
-
-        vpic->reference_frames[i] = (VAPictureHEVC) {
-            .picture_id    = ref->recon_surface,
-            .pic_order_cnt = href->pic_order_cnt,
-            .flags = (ref->display_order < pic->display_order ?
-                      VA_PICTURE_HEVC_RPS_ST_CURR_BEFORE : 0) |
-                     (ref->display_order > pic->display_order ?
-                      VA_PICTURE_HEVC_RPS_ST_CURR_AFTER  : 0),
-        };
+    for (int k = 0; k < MAX_REFERENCE_LIST_NUM; k++) {
+        for (i = 0; i < pic->nb_refs[k]; i++) {
+            VAAPIEncodePicture      *ref = pic->refs[k][i];
+            VAAPIEncodeH265Picture *href;
+
+            av_assert0(ref && ref->encode_order < pic->encode_order);
+            href = ref->priv_data;
+
+            vpic->reference_frames[j++] = (VAPictureHEVC) {
+                .picture_id    = ref->recon_surface,
+                .pic_order_cnt = href->pic_order_cnt,
+                .flags = (ref->display_order < pic->display_order ?
+                          VA_PICTURE_HEVC_RPS_ST_CURR_BEFORE : 0) |
+                          (ref->display_order > pic->display_order ?
+                          VA_PICTURE_HEVC_RPS_ST_CURR_AFTER  : 0),
+            };
+        }
     }
-    for (; i < FF_ARRAY_ELEMS(vpic->reference_frames); i++) {
-        vpic->reference_frames[i] = (VAPictureHEVC) {
+
+    for (; j < FF_ARRAY_ELEMS(vpic->reference_frames); j++) {
+        vpic->reference_frames[j] = (VAPictureHEVC) {
             .picture_id = VA_INVALID_ID,
             .flags      = VA_PICTURE_HEVC_INVALID,
         };
@@ -1016,21 +1019,33 @@  static int vaapi_encode_h265_init_slice_params(AVCodecContext *avctx,
         memset(rps, 0, sizeof(*rps));
 
         rps_pics = 0;
-        for (i = 0; i < pic->nb_refs; i++) {
-            strp = pic->refs[i]->priv_data;
-            rps_poc[rps_pics]  = strp->pic_order_cnt;
-            rps_used[rps_pics] = 1;
-            ++rps_pics;
+        for (i = 0; i < MAX_REFERENCE_LIST_NUM; i++) {
+            for (j = 0; j < pic->nb_refs[i]; j++) {
+                strp = pic->refs[i][j]->priv_data;
+                rps_poc[rps_pics]  = strp->pic_order_cnt;
+                rps_used[rps_pics] = 1;
+                ++rps_pics;
+            }
         }
+
         for (i = 0; i < pic->nb_dpb_pics; i++) {
             if (pic->dpb[i] == pic)
                 continue;
-            for (j = 0; j < pic->nb_refs; j++) {
-                if (pic->dpb[i] == pic->refs[j])
+
+            for (j = 0; j < pic->nb_refs[0]; j++) {
+                if (pic->dpb[i] == pic->refs[0][j])
+                    break;
+            }
+            if (j < pic->nb_refs[0])
+                continue;
+
+            for (j = 0; j < pic->nb_refs[1]; j++) {
+                if (pic->dpb[i] == pic->refs[1][j])
                     break;
             }
-            if (j < pic->nb_refs)
+            if (j < pic->nb_refs[1])
                 continue;
+
             strp = pic->dpb[i]->priv_data;
             rps_poc[rps_pics]  = strp->pic_order_cnt;
             rps_used[rps_pics] = 0;
@@ -1155,8 +1170,7 @@  static int vaapi_encode_h265_init_slice_params(AVCodecContext *avctx,
         vslice->ref_pic_list1[i].flags      = VA_PICTURE_HEVC_INVALID;
     }
 
-    av_assert0(pic->nb_refs <= 2);
-    if (pic->nb_refs >= 1) {
+    if (pic->nb_refs[0]) {
         // Backward reference for P- or B-frame.
         av_assert0(pic->type == PICTURE_TYPE_P ||
                    pic->type == PICTURE_TYPE_B);
@@ -1165,7 +1179,7 @@  static int vaapi_encode_h265_init_slice_params(AVCodecContext *avctx,
             // Reference for GPB B-frame, L0 == L1
             vslice->ref_pic_list1[0] = vpic->reference_frames[0];
     }
-    if (pic->nb_refs >= 2) {
+    if (pic->nb_refs[1]) {
         // Forward reference for B-frame.
         av_assert0(pic->type == PICTURE_TYPE_B);
         vslice->ref_pic_list1[0] = vpic->reference_frames[1];
diff --git a/libavcodec/vaapi_encode_mpeg2.c b/libavcodec/vaapi_encode_mpeg2.c
index 9261d19a83..8000ae19cb 100644
--- a/libavcodec/vaapi_encode_mpeg2.c
+++ b/libavcodec/vaapi_encode_mpeg2.c
@@ -458,12 +458,12 @@  static int vaapi_encode_mpeg2_init_picture_params(AVCodecContext *avctx,
         break;
     case PICTURE_TYPE_P:
         vpic->picture_type = VAEncPictureTypePredictive;
-        vpic->forward_reference_picture = pic->refs[0]->recon_surface;
+        vpic->forward_reference_picture = pic->refs[0][0]->recon_surface;
         break;
     case PICTURE_TYPE_B:
         vpic->picture_type = VAEncPictureTypeBidirectional;
-        vpic->forward_reference_picture  = pic->refs[0]->recon_surface;
-        vpic->backward_reference_picture = pic->refs[1]->recon_surface;
+        vpic->forward_reference_picture  = pic->refs[0][0]->recon_surface;
+        vpic->backward_reference_picture = pic->refs[1][0]->recon_surface;
         break;
     default:
         av_assert0(0 && "invalid picture type");
diff --git a/libavcodec/vaapi_encode_vp8.c b/libavcodec/vaapi_encode_vp8.c
index ae6a8d313c..b96e4cd65f 100644
--- a/libavcodec/vaapi_encode_vp8.c
+++ b/libavcodec/vaapi_encode_vp8.c
@@ -86,7 +86,7 @@  static int vaapi_encode_vp8_init_picture_params(AVCodecContext *avctx,
     switch (pic->type) {
     case PICTURE_TYPE_IDR:
     case PICTURE_TYPE_I:
-        av_assert0(pic->nb_refs == 0);
+        av_assert0(pic->nb_refs[0] == 0 && pic->nb_refs[1] == 0);
         vpic->ref_flags.bits.force_kf = 1;
         vpic->ref_last_frame =
         vpic->ref_gf_frame   =
@@ -94,14 +94,14 @@  static int vaapi_encode_vp8_init_picture_params(AVCodecContext *avctx,
             VA_INVALID_SURFACE;
         break;
     case PICTURE_TYPE_P:
-        av_assert0(pic->nb_refs == 1);
+        av_assert0(!pic->nb_refs[1]);
         vpic->ref_flags.bits.no_ref_last = 0;
         vpic->ref_flags.bits.no_ref_gf   = 1;
         vpic->ref_flags.bits.no_ref_arf  = 1;
         vpic->ref_last_frame =
         vpic->ref_gf_frame   =
         vpic->ref_arf_frame  =
-            pic->refs[0]->recon_surface;
+            pic->refs[0][0]->recon_surface;
         break;
     default:
         av_assert0(0 && "invalid picture type");
diff --git a/libavcodec/vaapi_encode_vp9.c b/libavcodec/vaapi_encode_vp9.c
index af1353cea8..d9e8ba6a08 100644
--- a/libavcodec/vaapi_encode_vp9.c
+++ b/libavcodec/vaapi_encode_vp9.c
@@ -96,15 +96,15 @@  static int vaapi_encode_vp9_init_picture_params(AVCodecContext *avctx,
 
     switch (pic->type) {
     case PICTURE_TYPE_IDR:
-        av_assert0(pic->nb_refs == 0);
+        av_assert0(pic->nb_refs[0] == 0 && pic->nb_refs[1] == 0);
         vpic->ref_flags.bits.force_kf = 1;
         vpic->refresh_frame_flags = 0xff;
         hpic->slot = 0;
         break;
     case PICTURE_TYPE_P:
-        av_assert0(pic->nb_refs == 1);
+        av_assert0(!pic->nb_refs[1]);
         {
-            VAAPIEncodeVP9Picture *href = pic->refs[0]->priv_data;
+            VAAPIEncodeVP9Picture *href = pic->refs[0][0]->priv_data;
             av_assert0(href->slot == 0 || href->slot == 1);
 
             if (ctx->max_b_depth > 0) {
@@ -120,10 +120,10 @@  static int vaapi_encode_vp9_init_picture_params(AVCodecContext *avctx,
         }
         break;
     case PICTURE_TYPE_B:
-        av_assert0(pic->nb_refs == 2);
+        av_assert0(pic->nb_refs[0] && pic->nb_refs[1]);
         {
-            VAAPIEncodeVP9Picture *href0 = pic->refs[0]->priv_data,
-                                  *href1 = pic->refs[1]->priv_data;
+            VAAPIEncodeVP9Picture *href0 = pic->refs[0][0]->priv_data,
+                                  *href1 = pic->refs[1][0]->priv_data;
             av_assert0(href0->slot < pic->b_depth + 1 &&
                        href1->slot < pic->b_depth + 1);
 
@@ -157,12 +157,14 @@  static int vaapi_encode_vp9_init_picture_params(AVCodecContext *avctx,
     for (i = 0; i < FF_ARRAY_ELEMS(vpic->reference_frames); i++)
         vpic->reference_frames[i] = VA_INVALID_SURFACE;
 
-    for (i = 0; i < pic->nb_refs; i++) {
-        VAAPIEncodePicture *ref_pic = pic->refs[i];
-        int slot;
-        slot = ((VAAPIEncodeVP9Picture*)ref_pic->priv_data)->slot;
-        av_assert0(vpic->reference_frames[slot] == VA_INVALID_SURFACE);
-        vpic->reference_frames[slot] = ref_pic->recon_surface;
+    for (i = 0; i < MAX_REFERENCE_LIST_NUM; i++) {
+        for (int j = 0; j < pic->nb_refs[i]; j++) {
+            VAAPIEncodePicture *ref_pic = pic->refs[i][j];
+            int slot;
+            slot = ((VAAPIEncodeVP9Picture*)ref_pic->priv_data)->slot;
+            av_assert0(vpic->reference_frames[slot] == VA_INVALID_SURFACE);
+            vpic->reference_frames[slot] = ref_pic->recon_surface;
+        }
     }
 
     vpic->pic_flags.bits.frame_type = (pic->type != PICTURE_TYPE_IDR);