diff mbox series

[FFmpeg-devel,08/10] vulkan_video: move imageview creation and DPB fields to common context

Message ID 20240901000314.379276-7-dev@lynne.ee
State New
Headers show
Series None | expand

Commit Message

Lynne Sept. 1, 2024, 12:03 a.m. UTC
Shared between decoders and encoders.
---
 libavcodec/vulkan_av1.c    |   3 +-
 libavcodec/vulkan_decode.c | 137 +++++++++----------------------------
 libavcodec/vulkan_decode.h |   9 ---
 libavcodec/vulkan_h264.c   |   3 +-
 libavcodec/vulkan_hevc.c   |   3 +-
 libavcodec/vulkan_video.c  |  80 +++++++++++++++++++++-
 libavcodec/vulkan_video.h  |  18 ++++-
 7 files changed, 133 insertions(+), 120 deletions(-)
diff mbox series

Patch

diff --git a/libavcodec/vulkan_av1.c b/libavcodec/vulkan_av1.c
index a550215e32..a0befb9c4e 100644
--- a/libavcodec/vulkan_av1.c
+++ b/libavcodec/vulkan_av1.c
@@ -79,6 +79,7 @@  static int vk_av1_fill_pict(AVCodecContext *avctx, const AV1Frame **ref_src,
                             const uint8_t *saved_order_hints)
 {
     FFVulkanDecodeContext *dec = avctx->internal->hwaccel_priv_data;
+    FFVulkanDecodeShared *ctx = dec->shared_ctx;
     AV1VulkanDecodePicture *hp = pic->hwaccel_picture_private;
     FFVulkanDecodePicture *vkpic = &hp->vp;
 
@@ -119,7 +120,7 @@  static int vk_av1_fill_pict(AVCodecContext *avctx, const AV1Frame **ref_src,
         .sType = VK_STRUCTURE_TYPE_VIDEO_PICTURE_RESOURCE_INFO_KHR,
         .codedOffset = (VkOffset2D){ 0, 0 },
         .codedExtent = (VkExtent2D){ pic->f->width, pic->f->height },
-        .baseArrayLayer = ((has_grain || dec->dedicated_dpb) && dec->layered_dpb) ?
+        .baseArrayLayer = ((has_grain || dec->dedicated_dpb) && ctx->common.layered_dpb) ?
                           hp->frame_id : 0,
         .imageViewBinding = vkpic->img_view_ref,
     };
diff --git a/libavcodec/vulkan_decode.c b/libavcodec/vulkan_decode.c
index e73d4f93c2..35966cd60d 100644
--- a/libavcodec/vulkan_decode.c
+++ b/libavcodec/vulkan_decode.c
@@ -111,7 +111,6 @@  int ff_vk_update_thread_context(AVCodecContext *dst, const AVCodecContext *src)
     }
 
     dst_ctx->dedicated_dpb = src_ctx->dedicated_dpb;
-    dst_ctx->layered_dpb = src_ctx->layered_dpb;
     dst_ctx->external_fg = src_ctx->external_fg;
     dst_ctx->frame_id_alloc_mask = src_ctx->frame_id_alloc_mask;
 
@@ -125,51 +124,6 @@  int ff_vk_params_invalidate(AVCodecContext *avctx, int t, const uint8_t *b, uint
     return 0;
 }
 
-static int vk_decode_create_view(FFVulkanDecodeContext *dec, VkImageView *dst_view,
-                                 VkImageAspectFlags *aspect, AVVkFrame *src,
-                                 VkFormat vkf, int is_current)
-{
-    VkResult ret;
-    FFVulkanDecodeShared *ctx = dec->shared_ctx;
-    FFVulkanFunctions *vk = &ctx->s.vkfn;
-    VkImageAspectFlags aspect_mask = ff_vk_aspect_bits_from_vkfmt(vkf);
-
-    VkSamplerYcbcrConversionInfo yuv_sampler_info = {
-        .sType = VK_STRUCTURE_TYPE_SAMPLER_YCBCR_CONVERSION_INFO,
-        .conversion = ctx->yuv_sampler,
-    };
-    VkImageViewCreateInfo img_view_create_info = {
-        .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO,
-        .pNext = &yuv_sampler_info,
-        .viewType = dec->layered_dpb && !is_current ?
-                    VK_IMAGE_VIEW_TYPE_2D_ARRAY : VK_IMAGE_VIEW_TYPE_2D,
-        .format = vkf,
-        .image = src->img[0],
-        .components = (VkComponentMapping) {
-            .r = VK_COMPONENT_SWIZZLE_IDENTITY,
-            .g = VK_COMPONENT_SWIZZLE_IDENTITY,
-            .b = VK_COMPONENT_SWIZZLE_IDENTITY,
-            .a = VK_COMPONENT_SWIZZLE_IDENTITY,
-        },
-        .subresourceRange = (VkImageSubresourceRange) {
-            .aspectMask     = VK_IMAGE_ASPECT_COLOR_BIT,
-            .baseArrayLayer = 0,
-            .layerCount     = dec->layered_dpb && !is_current ?
-                              VK_REMAINING_ARRAY_LAYERS : 1,
-            .levelCount     = 1,
-        },
-    };
-
-    ret = vk->CreateImageView(ctx->s.hwctx->act_dev, &img_view_create_info,
-                              ctx->s.hwctx->alloc, dst_view);
-    if (ret != VK_SUCCESS)
-        return AVERROR_EXTERNAL;
-
-    *aspect = aspect_mask;
-
-    return 0;
-}
-
 static AVFrame *vk_get_dpb_pool(FFVulkanDecodeShared *ctx)
 {
     int err;
@@ -177,7 +131,7 @@  static AVFrame *vk_get_dpb_pool(FFVulkanDecodeShared *ctx)
     if (!avf)
         return NULL;
 
-    err = av_hwframe_get_buffer(ctx->dpb_hwfc_ref, avf, 0x0);
+    err = av_hwframe_get_buffer(ctx->common.dpb_hwfc_ref, avf, 0x0);
     if (err < 0)
         av_frame_free(&avf);
 
@@ -207,21 +161,21 @@  int ff_vk_decode_prepare_frame(FFVulkanDecodeContext *dec, AVFrame *pic,
     vkpic->destroy_image_view = vk->DestroyImageView;
     vkpic->wait_semaphores = vk->WaitSemaphores;
 
-    if (dec->layered_dpb && alloc_dpb) {
-        vkpic->img_view_ref = ctx->layered_view;
-        vkpic->img_aspect_ref = ctx->layered_aspect;
+    if (ctx->common.layered_dpb && alloc_dpb) {
+        vkpic->img_view_ref = ctx->common.layered_view;
+        vkpic->img_aspect_ref = ctx->common.layered_aspect;
     } else if (alloc_dpb) {
-        AVHWFramesContext *dpb_frames = (AVHWFramesContext *)ctx->dpb_hwfc_ref->data;
+        AVHWFramesContext *dpb_frames = (AVHWFramesContext *)ctx->common.dpb_hwfc_ref->data;
         AVVulkanFramesContext *dpb_hwfc = dpb_frames->hwctx;
 
         vkpic->dpb_frame = vk_get_dpb_pool(ctx);
         if (!vkpic->dpb_frame)
             return AVERROR(ENOMEM);
 
-        err = vk_decode_create_view(dec, &vkpic->img_view_ref,
-                                    &vkpic->img_aspect_ref,
-                                    (AVVkFrame *)vkpic->dpb_frame->data[0],
-                                    dpb_hwfc->format[0], is_current);
+        err = ff_vk_create_view(&ctx->s, &ctx->common,
+                                &vkpic->img_view_ref, &vkpic->img_aspect_ref,
+                                (AVVkFrame *)vkpic->dpb_frame->data[0],
+                                dpb_hwfc->format[0], !is_current);
         if (err < 0)
             return err;
 
@@ -232,10 +186,10 @@  int ff_vk_decode_prepare_frame(FFVulkanDecodeContext *dec, AVFrame *pic,
         AVHWFramesContext *frames = (AVHWFramesContext *)pic->hw_frames_ctx->data;
         AVVulkanFramesContext *hwfc = frames->hwctx;
 
-        err = vk_decode_create_view(dec, &vkpic->img_view_out,
-                                    &vkpic->img_aspect,
-                                    (AVVkFrame *)pic->data[0],
-                                    hwfc->format[0], is_current);
+        err = ff_vk_create_view(&ctx->s, &ctx->common,
+                                &vkpic->img_view_out, &vkpic->img_aspect,
+                                (AVVkFrame *)pic->data[0],
+                                hwfc->format[0], !is_current);
         if (err < 0)
             return err;
 
@@ -366,7 +320,7 @@  int ff_vk_decode_frame(AVCodecContext *avctx,
     AVVkFrame *vkf = (AVVkFrame *)pic->buf[0]->data;
 
     /* Quirks */
-    const int layered_dpb = dec->layered_dpb;
+    const int layered_dpb = ctx->common.layered_dpb;
 
     VkVideoSessionParametersKHR *par = (VkVideoSessionParametersKHR *)dec->session_params->data;
     VkVideoBeginCodingInfoKHR decode_start = {
@@ -470,7 +424,7 @@  int ff_vk_decode_frame(AVCodecContext *avctx,
         .srcAccessMask = VK_ACCESS_2_NONE,
         .dstAccessMask = VK_ACCESS_2_VIDEO_DECODE_WRITE_BIT_KHR,
         .oldLayout = vkf->layout[0],
-        .newLayout = (dec->layered_dpb || vp->dpb_frame) ?
+        .newLayout = (layered_dpb || vp->dpb_frame) ?
                      VK_IMAGE_LAYOUT_VIDEO_DECODE_DST_KHR :
                      VK_IMAGE_LAYOUT_VIDEO_DECODE_DPB_KHR, /* Spec, 07252 utter madness */
         .srcQueueFamilyIndex = vkf->queue_family[0],
@@ -545,7 +499,7 @@  int ff_vk_decode_frame(AVCodecContext *avctx,
     } else if (vp->decode_info.referenceSlotCount ||
                vp->img_view_out != vp->img_view_ref) {
         /* Single barrier for a single layered ref */
-        err = ff_vk_exec_add_dep_frame(&ctx->s, exec, ctx->layered_frame,
+        err = ff_vk_exec_add_dep_frame(&ctx->s, exec, ctx->common.layered_frame,
                                        VK_PIPELINE_STAGE_2_VIDEO_DECODE_BIT_KHR,
                                        VK_PIPELINE_STAGE_2_VIDEO_DECODE_BIT_KHR);
         if (err < 0)
@@ -616,13 +570,8 @@  static void free_common(FFRefStructOpaque unused, void *obj)
     FFVulkanContext *s = &ctx->s;
     FFVulkanFunctions *vk = &ctx->s.vkfn;
 
-    /* Destroy layered view */
-    if (ctx->layered_view)
-        vk->DestroyImageView(s->hwctx->act_dev, ctx->layered_view, s->hwctx->alloc);
-
     /* This also frees all references from this pool */
-    av_frame_free(&ctx->layered_frame);
-    av_buffer_unref(&ctx->dpb_hwfc_ref);
+    av_frame_free(&ctx->common.layered_frame);
 
     /* Destroy parameters */
     if (ctx->empty_session_params)
@@ -634,10 +583,6 @@  static void free_common(FFRefStructOpaque unused, void *obj)
 
     ff_vk_video_common_uninit(s, &ctx->common);
 
-    if (ctx->yuv_sampler)
-        vk->DestroySamplerYcbcrConversion(s->hwctx->act_dev, ctx->yuv_sampler,
-                                          s->hwctx->alloc);
-
     ff_vk_uninit(s);
 }
 
@@ -924,8 +869,8 @@  static int vulkan_decode_get_profile(AVCodecContext *avctx, AVBufferRef *frames_
     }
 
     dec->dedicated_dpb = !(dec_caps->flags & VK_VIDEO_DECODE_CAPABILITY_DPB_AND_OUTPUT_COINCIDE_BIT_KHR);
-    dec->layered_dpb = !dec->dedicated_dpb ? 0 :
-                       !(caps->flags & VK_VIDEO_CAPABILITY_SEPARATE_REFERENCE_IMAGES_BIT_KHR);
+    ctx->common.layered_dpb = !dec->dedicated_dpb ? 0 :
+                              !(caps->flags & VK_VIDEO_CAPABILITY_SEPARATE_REFERENCE_IMAGES_BIT_KHR);
 
     if (dec->dedicated_dpb) {
         fmt_info.imageUsage = VK_IMAGE_USAGE_VIDEO_DECODE_DPB_BIT_KHR;
@@ -1128,7 +1073,7 @@  int ff_vk_decode_uninit(AVCodecContext *avctx)
 
 int ff_vk_decode_init(AVCodecContext *avctx)
 {
-    int err, cxpos = 0, cypos = 0, nb_q = 0;
+    int err, nb_q = 0;
     VkResult ret;
     FFVulkanDecodeContext *dec = avctx->internal->hwaccel_priv_data;
     FFVulkanDecodeShared *ctx;
@@ -1159,12 +1104,6 @@  int ff_vk_decode_init(AVCodecContext *avctx)
     VkVideoSessionCreateInfoKHR session_create = {
         .sType = VK_STRUCTURE_TYPE_VIDEO_SESSION_CREATE_INFO_KHR,
     };
-    VkSamplerYcbcrConversionCreateInfo yuv_sampler_info = {
-        .sType = VK_STRUCTURE_TYPE_SAMPLER_YCBCR_CONVERSION_CREATE_INFO,
-        .components = ff_comp_identity_map,
-        .ycbcrModel = VK_SAMPLER_YCBCR_MODEL_CONVERSION_RGB_IDENTITY,
-        .ycbcrRange = avctx->color_range == AVCOL_RANGE_MPEG, /* Ignored */
-    };
 
     err = ff_decode_get_hw_frames_ctx(avctx, AV_HWDEVICE_TYPE_VULKAN);
     if (err < 0)
@@ -1230,30 +1169,18 @@  int ff_vk_decode_init(AVCodecContext *avctx)
     if (err < 0)
         goto fail;
 
-    /* Get sampler */
-    av_chroma_location_enum_to_pos(&cxpos, &cypos, avctx->chroma_sample_location);
-    yuv_sampler_info.xChromaOffset = cxpos >> 7;
-    yuv_sampler_info.yChromaOffset = cypos >> 7;
-    yuv_sampler_info.format = s->hwfc->format[0];
-    ret = vk->CreateSamplerYcbcrConversion(s->hwctx->act_dev, &yuv_sampler_info,
-                                           s->hwctx->alloc, &ctx->yuv_sampler);
-    if (ret != VK_SUCCESS) {
-        err = AVERROR_EXTERNAL;
-        goto fail;
-    }
-
     /* If doing an out-of-place decoding, create a DPB pool */
     if (dec->dedicated_dpb || avctx->codec_id == AV_CODEC_ID_AV1) {
         AVHWFramesContext *dpb_frames;
         AVVulkanFramesContext *dpb_hwfc;
 
-        ctx->dpb_hwfc_ref = av_hwframe_ctx_alloc(s->frames->device_ref);
-        if (!ctx->dpb_hwfc_ref) {
+        ctx->common.dpb_hwfc_ref = av_hwframe_ctx_alloc(s->frames->device_ref);
+        if (!ctx->common.dpb_hwfc_ref) {
             err = AVERROR(ENOMEM);
             goto fail;
         }
 
-        dpb_frames = (AVHWFramesContext *)ctx->dpb_hwfc_ref->data;
+        dpb_frames = (AVHWFramesContext *)ctx->common.dpb_hwfc_ref->data;
         dpb_frames->format    = s->frames->format;
         dpb_frames->sw_format = s->frames->sw_format;
         dpb_frames->width     = avctx->coded_width;
@@ -1267,23 +1194,25 @@  int ff_vk_decode_init(AVCodecContext *avctx)
         dpb_hwfc->usage        = VK_IMAGE_USAGE_VIDEO_DECODE_DPB_BIT_KHR |
                                  VK_IMAGE_USAGE_SAMPLED_BIT; /* Shuts validator up. */
 
-        if (dec->layered_dpb)
+        if (ctx->common.layered_dpb)
             dpb_hwfc->nb_layers = ctx->caps.maxDpbSlots;
 
-        err = av_hwframe_ctx_init(ctx->dpb_hwfc_ref);
+        err = av_hwframe_ctx_init(ctx->common.dpb_hwfc_ref);
         if (err < 0)
             goto fail;
 
-        if (dec->layered_dpb) {
-            ctx->layered_frame = vk_get_dpb_pool(ctx);
-            if (!ctx->layered_frame) {
+        if (ctx->common.layered_dpb) {
+            ctx->common.layered_frame = vk_get_dpb_pool(ctx);
+            if (!ctx->common.layered_frame) {
                 err = AVERROR(ENOMEM);
                 goto fail;
             }
 
-            err = vk_decode_create_view(dec, &ctx->layered_view, &ctx->layered_aspect,
-                                        (AVVkFrame *)ctx->layered_frame->data[0],
-                                        s->hwfc->format[0], 0);
+            err = ff_vk_create_view(&ctx->s, &ctx->common,
+                                    &ctx->common.layered_view,
+                                    &ctx->common.layered_aspect,
+                                    (AVVkFrame *)ctx->common.layered_frame->data[0],
+                                    s->hwfc->format[0], 1);
             if (err < 0)
                 goto fail;
         }
diff --git a/libavcodec/vulkan_decode.h b/libavcodec/vulkan_decode.h
index c181277cdc..76e60836b5 100644
--- a/libavcodec/vulkan_decode.h
+++ b/libavcodec/vulkan_decode.h
@@ -53,15 +53,7 @@  typedef struct FFVulkanDecodeShared {
     VkVideoCapabilitiesKHR caps;
     VkVideoDecodeCapabilitiesKHR dec_caps;
 
-    AVBufferRef *dpb_hwfc_ref;  /* Only used for dedicated_dpb */
-
-    AVFrame *layered_frame;     /* Only used for layered_dpb   */
-    VkImageView layered_view;
-    VkImageAspectFlags layered_aspect;
-
     VkVideoSessionParametersKHR empty_session_params;
-
-    VkSamplerYcbcrConversion yuv_sampler;
 } FFVulkanDecodeShared;
 
 typedef struct FFVulkanDecodeContext {
@@ -70,7 +62,6 @@  typedef struct FFVulkanDecodeContext {
     FFVkExecPool exec_pool;
 
     int dedicated_dpb; /* Oddity  #1 - separate DPB images */
-    int layered_dpb;   /* Madness #1 - layered  DPB images */
     int external_fg;   /* Oddity  #2 - hardware can't apply film grain */
     uint32_t frame_id_alloc_mask; /* For AV1 only */
 
diff --git a/libavcodec/vulkan_h264.c b/libavcodec/vulkan_h264.c
index 0b296b3cc3..05ac884138 100644
--- a/libavcodec/vulkan_h264.c
+++ b/libavcodec/vulkan_h264.c
@@ -60,6 +60,7 @@  static int vk_h264_fill_pict(AVCodecContext *avctx, H264Picture **ref_src,
                              int dpb_slot_index)
 {
     FFVulkanDecodeContext *dec = avctx->internal->hwaccel_priv_data;
+    FFVulkanDecodeShared *ctx = dec->shared_ctx;
     H264VulkanDecodePicture *hp = pic->hwaccel_picture_private;
     FFVulkanDecodePicture *vkpic = &hp->vp;
 
@@ -95,7 +96,7 @@  static int vk_h264_fill_pict(AVCodecContext *avctx, H264Picture **ref_src,
         .sType = VK_STRUCTURE_TYPE_VIDEO_PICTURE_RESOURCE_INFO_KHR,
         .codedOffset = (VkOffset2D){ 0, 0 },
         .codedExtent = (VkExtent2D){ pic->f->width, pic->f->height },
-        .baseArrayLayer = dec->layered_dpb ? dpb_slot_index : 0,
+        .baseArrayLayer = ctx->common.layered_dpb ? dpb_slot_index : 0,
         .imageViewBinding = vkpic->img_view_ref,
     };
 
diff --git a/libavcodec/vulkan_hevc.c b/libavcodec/vulkan_hevc.c
index 5228e41ad5..f64c854a60 100644
--- a/libavcodec/vulkan_hevc.c
+++ b/libavcodec/vulkan_hevc.c
@@ -136,6 +136,7 @@  static int vk_hevc_fill_pict(AVCodecContext *avctx, HEVCFrame **ref_src,
                              HEVCFrame *pic, int is_current, int pic_id)
 {
     FFVulkanDecodeContext *dec = avctx->internal->hwaccel_priv_data;
+    FFVulkanDecodeShared *ctx = dec->shared_ctx;
     HEVCVulkanDecodePicture *hp = pic->hwaccel_picture_private;
     FFVulkanDecodePicture *vkpic = &hp->vp;
 
@@ -161,7 +162,7 @@  static int vk_hevc_fill_pict(AVCodecContext *avctx, HEVCFrame **ref_src,
         .sType = VK_STRUCTURE_TYPE_VIDEO_PICTURE_RESOURCE_INFO_KHR,
         .codedOffset = (VkOffset2D){ 0, 0 },
         .codedExtent = (VkExtent2D){ pic->f->width, pic->f->height },
-        .baseArrayLayer = dec->layered_dpb ? pic_id : 0,
+        .baseArrayLayer = ctx->common.layered_dpb ? pic_id : 0,
         .imageViewBinding = vkpic->img_view_ref,
     };
 
diff --git a/libavcodec/vulkan_video.c b/libavcodec/vulkan_video.c
index a676f0332a..b7e42476bb 100644
--- a/libavcodec/vulkan_video.c
+++ b/libavcodec/vulkan_video.c
@@ -16,7 +16,6 @@ 
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
-#include "defs.h"
 #include "libavutil/mem.h"
 #include "vulkan_video.h"
 
@@ -240,6 +239,50 @@  int ff_vk_video_qf_init(FFVulkanContext *s, FFVkQueueFamilyCtx *qf,
     return AVERROR(ENOTSUP);
 }
 
+int ff_vk_create_view(FFVulkanContext *s, FFVkVideoCommon *common,
+                      VkImageView *view, VkImageAspectFlags *aspect,
+                      AVVkFrame *src, VkFormat vkf, int is_dpb)
+{
+    VkResult ret;
+    FFVulkanFunctions *vk = &s->vkfn;
+    VkImageAspectFlags aspect_mask = ff_vk_aspect_bits_from_vkfmt(vkf);
+
+    VkSamplerYcbcrConversionInfo yuv_sampler_info = {
+        .sType = VK_STRUCTURE_TYPE_SAMPLER_YCBCR_CONVERSION_INFO,
+        .conversion = common->yuv_sampler,
+    };
+    VkImageViewCreateInfo img_view_create_info = {
+        .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO,
+        .pNext = &yuv_sampler_info,
+        .viewType = common->layered_dpb && is_dpb ?
+                    VK_IMAGE_VIEW_TYPE_2D_ARRAY : VK_IMAGE_VIEW_TYPE_2D,
+        .format = vkf,
+        .image = src->img[0],
+        .components = (VkComponentMapping) {
+            .r = VK_COMPONENT_SWIZZLE_IDENTITY,
+            .g = VK_COMPONENT_SWIZZLE_IDENTITY,
+            .b = VK_COMPONENT_SWIZZLE_IDENTITY,
+            .a = VK_COMPONENT_SWIZZLE_IDENTITY,
+        },
+        .subresourceRange = (VkImageSubresourceRange) {
+            .aspectMask     = VK_IMAGE_ASPECT_COLOR_BIT,
+            .baseArrayLayer = 0,
+            .layerCount     = common->layered_dpb && is_dpb ?
+                              VK_REMAINING_ARRAY_LAYERS : 1,
+            .levelCount     = 1,
+        },
+    };
+
+    ret = vk->CreateImageView(s->hwctx->act_dev, &img_view_create_info,
+                              s->hwctx->alloc, view);
+    if (ret != VK_SUCCESS)
+        return AVERROR_EXTERNAL;
+
+    *aspect = aspect_mask;
+
+    return 0;
+}
+
 av_cold void ff_vk_video_common_uninit(FFVulkanContext *s,
                                        FFVkVideoCommon *common)
 {
@@ -256,9 +299,21 @@  av_cold void ff_vk_video_common_uninit(FFVulkanContext *s,
             vk->FreeMemory(s->hwctx->act_dev, common->mem[i], s->hwctx->alloc);
 
     av_freep(&common->mem);
+
+    if (common->layered_view)
+        vk->DestroyImageView(s->hwctx->act_dev, common->layered_view,
+                             s->hwctx->alloc);
+
+    av_frame_free(&common->layered_frame);
+
+    av_buffer_unref(&common->dpb_hwfc_ref);
+
+    if (common->yuv_sampler)
+        vk->DestroySamplerYcbcrConversion(s->hwctx->act_dev, common->yuv_sampler,
+                                          s->hwctx->alloc);
 }
 
-av_cold int ff_vk_video_common_init(void *log, FFVulkanContext *s,
+av_cold int ff_vk_video_common_init(AVCodecContext *avctx, FFVulkanContext *s,
                                     FFVkVideoCommon *common,
                                     VkVideoSessionCreateInfoKHR *session_create)
 {
@@ -268,6 +323,25 @@  av_cold int ff_vk_video_common_init(void *log, FFVulkanContext *s,
     VkVideoSessionMemoryRequirementsKHR *mem = NULL;
     VkBindVideoSessionMemoryInfoKHR *bind_mem = NULL;
 
+    int cxpos, cypos;
+    VkSamplerYcbcrConversionCreateInfo yuv_sampler_info = {
+        .sType      = VK_STRUCTURE_TYPE_SAMPLER_YCBCR_CONVERSION_CREATE_INFO,
+        .components = ff_comp_identity_map,
+        .ycbcrModel = VK_SAMPLER_YCBCR_MODEL_CONVERSION_RGB_IDENTITY,
+        .ycbcrRange = avctx->color_range == AVCOL_RANGE_MPEG, /* Ignored */
+        .format     = session_create->pictureFormat,
+    };
+
+    /* Create identity YUV sampler
+     * (VkImageViews of YUV image formats require it, even if it does nothing) */
+    av_chroma_location_enum_to_pos(&cxpos, &cypos, avctx->chroma_sample_location);
+    yuv_sampler_info.xChromaOffset = cxpos >> 7;
+    yuv_sampler_info.yChromaOffset = cypos >> 7;
+    ret = vk->CreateSamplerYcbcrConversion(s->hwctx->act_dev, &yuv_sampler_info,
+                                           s->hwctx->alloc, &common->yuv_sampler);
+    if (ret != VK_SUCCESS)
+        return AVERROR_EXTERNAL;
+
     /* Create session */
     ret = vk->CreateVideoSessionKHR(s->hwctx->act_dev, session_create,
                                     s->hwctx->alloc, &common->session);
@@ -333,7 +407,7 @@  av_cold int ff_vk_video_common_init(void *log, FFVulkanContext *s,
             .memorySize = mem[i].memoryRequirements.size,
         };
 
-        av_log(log, AV_LOG_VERBOSE, "Allocating %"PRIu64" bytes in bind index %i for video session\n",
+        av_log(avctx, AV_LOG_VERBOSE, "Allocating %"PRIu64" bytes in bind index %i for video session\n",
                bind_mem[i].memorySize, bind_mem[i].memoryBindIndex);
     }
 
diff --git a/libavcodec/vulkan_video.h b/libavcodec/vulkan_video.h
index a5f69c576c..01659f6501 100644
--- a/libavcodec/vulkan_video.h
+++ b/libavcodec/vulkan_video.h
@@ -19,6 +19,7 @@ 
 #ifndef AVCODEC_VULKAN_VIDEO_H
 #define AVCODEC_VULKAN_VIDEO_H
 
+#include "avcodec.h"
 #include "vulkan.h"
 
 #include <vk_video/vulkan_video_codecs_common.h>
@@ -32,6 +33,14 @@  typedef struct FFVkVideoSession {
     VkVideoSessionKHR session;
     VkDeviceMemory *mem;
     uint32_t nb_mem;
+
+    VkSamplerYcbcrConversion yuv_sampler;
+
+    AVBufferRef *dpb_hwfc_ref;
+    int layered_dpb;
+    AVFrame *layered_frame;
+    VkImageView layered_view;
+    VkImageAspectFlags layered_aspect;
 } FFVkVideoCommon;
 
 /**
@@ -74,10 +83,17 @@  StdVideoH264LevelIdc ff_vk_h264_level_to_vk(int level_idc);
 StdVideoH264ProfileIdc ff_vk_h264_profile_to_vk(int profile);
 int ff_vk_h264_profile_to_av(StdVideoH264ProfileIdc profile);
 
+/**
+ * Creates image views for video frames.
+ */
+int ff_vk_create_view(FFVulkanContext *s, FFVkVideoCommon *common,
+                      VkImageView *view, VkImageAspectFlags *aspect,
+                      AVVkFrame *src, VkFormat vkf, int is_dpb);
+
 /**
  * Initialize video session, allocating and binding necessary memory.
  */
-int ff_vk_video_common_init(void *log, FFVulkanContext *s,
+int ff_vk_video_common_init(AVCodecContext *avctx, FFVulkanContext *s,
                             FFVkVideoCommon *common,
                             VkVideoSessionCreateInfoKHR *session_create);