diff mbox series

[FFmpeg-devel,1/2] lavc: add Vulkan video encoding base code

Message ID 20240909103759.371919-1-dev@lynne.ee
State New
Headers show
Series [FFmpeg-devel,1/2] lavc: add Vulkan video encoding base code | expand

Checks

Context Check Description
yinshiyou/make_loongarch64 success Make finished
yinshiyou/make_fate_loongarch64 success Make fate finished
andriy/make_x86 success Make finished
andriy/make_fate_x86 success Make fate finished

Commit Message

Lynne Sept. 9, 2024, 10:37 a.m. UTC
This commit adds the common Vulkan video encoding framework.
It makes full use of the asynchronous features of our new common
hardware encoding code, and of Vulkan.
The code is able to handle anything from H264 to AV1 and MJPEG.
---
 configure                  |   2 +
 libavcodec/Makefile        |   2 +-
 libavcodec/vulkan_encode.c | 979 +++++++++++++++++++++++++++++++++++++
 libavcodec/vulkan_encode.h | 243 +++++++++
 4 files changed, 1225 insertions(+), 1 deletion(-)
 create mode 100644 libavcodec/vulkan_encode.c
 create mode 100644 libavcodec/vulkan_encode.h

Comments

Benjamin Cheng Sept. 10, 2024, 1:29 p.m. UTC | #1
On Mon Sep 9, 2024 at 6:37 AM EDT, Lynne via ffmpeg-devel wrote:
> This commit adds the common Vulkan video encoding framework.
> It makes full use of the asynchronous features of our new common
> hardware encoding code, and of Vulkan.
> The code is able to handle anything from H264 to AV1 and MJPEG.
> ---
>  configure                  |   2 +
>  libavcodec/Makefile        |   2 +-
>  libavcodec/vulkan_encode.c | 979 +++++++++++++++++++++++++++++++++++++
>  libavcodec/vulkan_encode.h | 243 +++++++++
>  4 files changed, 1225 insertions(+), 1 deletion(-)
>  create mode 100644 libavcodec/vulkan_encode.c
>  create mode 100644 libavcodec/vulkan_encode.h
>
> diff --git a/configure b/configure
> index a8e67d230c..6cfb736a86 100755
> --- a/configure
> +++ b/configure
> @@ -2638,6 +2638,7 @@ CONFIG_EXTRA="
>      vp3dsp
>      vp56dsp
>      vp8dsp
> +    vulkan_encode
>      wma_freqs
>      wmv2dsp
>  "
> @@ -3299,6 +3300,7 @@ qsvdec_select="qsv"
>  qsvenc_select="qsv"
>  qsvvpp_select="qsv"
>  vaapi_encode_deps="vaapi"
> +vulkan_encode_deps="vulkan"
>  v4l2_m2m_deps="linux_videodev2_h sem_timedwait"
>  
>  bilateral_cuda_filter_deps="ffnvcodec"
> diff --git a/libavcodec/Makefile b/libavcodec/Makefile
> index 27ef4638ce..ff6a3c4efc 100644
> --- a/libavcodec/Makefile
> +++ b/libavcodec/Makefile
> @@ -1282,7 +1282,7 @@ SKIPHEADERS-$(CONFIG_QSVENC)           += qsvenc.h
>  SKIPHEADERS-$(CONFIG_VAAPI)            += vaapi_decode.h vaapi_hevc.h vaapi_encode.h
>  SKIPHEADERS-$(CONFIG_VDPAU)            += vdpau.h vdpau_internal.h
>  SKIPHEADERS-$(CONFIG_VIDEOTOOLBOX)     += videotoolbox.h vt_internal.h
> -SKIPHEADERS-$(CONFIG_VULKAN)           += vulkan.h vulkan_video.h vulkan_decode.h
> +SKIPHEADERS-$(CONFIG_VULKAN)           += vulkan.h vulkan_video.h vulkan_encode.h vulkan_decode.h
>  SKIPHEADERS-$(CONFIG_V4L2_M2M)         += v4l2_buffers.h v4l2_context.h v4l2_m2m.h
>  SKIPHEADERS-$(CONFIG_ZLIB)             += zlib_wrapper.h
>  
> diff --git a/libavcodec/vulkan_encode.c b/libavcodec/vulkan_encode.c
> new file mode 100644
> index 0000000000..5e87d4c073
> --- /dev/null
> +++ b/libavcodec/vulkan_encode.c
> @@ -0,0 +1,979 @@
> +/*
> + * This file is part of FFmpeg.
> + *
> + * FFmpeg is free software; you can redistribute it and/or
> + * modify it under the terms of the GNU Lesser General Public
> + * License as published by the Free Software Foundation; either
> + * version 2.1 of the License, or (at your option) any later version.
> + *
> + * FFmpeg is distributed in the hope that it will be useful,
> + * but WITHOUT ANY WARRANTY; without even the implied warranty of
> + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
> + * Lesser General Public License for more details.
> + *
> + * You should have received a copy of the GNU Lesser General Public
> + * License along with FFmpeg; if not, write to the Free Software
> + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
> + */
> +
> +#include "libavutil/mem.h"
> +#include "libavutil/avassert.h"
> +#include "vulkan_encode.h"
> +#include "config.h"
> +
> +#include "libavutil/vulkan_loader.h"
> +
> +const AVCodecHWConfigInternal *const ff_vulkan_encode_hw_configs[] = {
> +    HW_CONFIG_ENCODER_FRAMES(VULKAN, VULKAN),
> +    NULL,
> +};
> +
> +av_cold void ff_vulkan_encode_uninit(FFVulkanEncodeContext *ctx)
> +{
> +    FFVulkanContext *s = &ctx->s;
> +    FFVulkanFunctions *vk = &s->vkfn;
> +
> +    /* Wait on and free execution pool */
> +    ff_vk_exec_pool_free(s, &ctx->enc_pool);
> +
> +    /* Destroy the session params */
> +    if (ctx->session_params)
> +        vk->DestroyVideoSessionParametersKHR(s->hwctx->act_dev,
> +                                             ctx->session_params,
> +                                             s->hwctx->alloc);
> +
> +    ff_hw_base_encode_close(&ctx->base);
> +
> +    av_buffer_pool_uninit(&ctx->buf_pool);
> +
> +    ff_vk_video_common_uninit(s, &ctx->common);
> +
> +    ff_vk_uninit(s);
> +}
> +
> +static int vulkan_encode_init(AVCodecContext *avctx, FFHWBaseEncodePicture *pic)
> +{
> +    int err;
> +    FFVulkanEncodeContext *ctx = avctx->priv_data;
> +    FFVulkanEncodePicture *vp = pic->priv;
> +
> +    AVFrame *f = pic->input_image;
> +    AVHWFramesContext *hwfc = (AVHWFramesContext *)f->hw_frames_ctx->data;
> +    AVVulkanFramesContext *vkfc = hwfc->hwctx;
> +    AVVkFrame *vkf = (AVVkFrame *)f->data[0];
> +
> +    if (ctx->codec->picture_priv_data_size > 0) {
> +        pic->codec_priv = av_mallocz(ctx->codec->picture_priv_data_size);
> +        if (!pic->codec_priv)
> +            return AVERROR(ENOMEM);
> +    }
> +
> +    /* Input image view */
> +    err = ff_vk_create_view(&ctx->s, &ctx->common,
> +                            &vp->in.view, &vp->in.aspect,
> +                            vkf, vkfc->format[0], 0);
> +    if (err < 0)
> +        return err;
> +
> +    /* Reference view */
> +    if (!ctx->common.layered_dpb) {
> +        AVFrame *rf = pic->recon_image;
> +        AVVkFrame *rvkf = (AVVkFrame *)rf->data[0];
> +        err = ff_vk_create_view(&ctx->s, &ctx->common,
> +                                &vp->dpb.view, &vp->dpb.aspect,
> +                                rvkf, ctx->pic_format, 1);
> +        if (err < 0)
> +            return err;
> +    } else {
> +        vp->dpb.view = ctx->common.layered_view;
> +        vp->dpb.aspect = ctx->common.layered_aspect;
> +    }
> +
> +    return 0;
> +}
> +
> +static int vulkan_encode_free(AVCodecContext *avctx, FFHWBaseEncodePicture *pic)
> +{
> +    FFVulkanEncodeContext *ctx = avctx->priv_data;
> +    FFVulkanFunctions *vk = &ctx->s.vkfn;
> +
> +    FFVulkanEncodePicture *vp = pic->priv;
> +
> +    if (vp->in.view)
> +        vk->DestroyImageView(ctx->s.hwctx->act_dev, vp->in.view,
> +                             ctx->s.hwctx->alloc);
> +
> +    if (!ctx->common.layered_dpb && vp->dpb.view)
> +        vk->DestroyImageView(ctx->s.hwctx->act_dev, vp->dpb.view,
> +                             ctx->s.hwctx->alloc);
> +
> +    ctx->slots[vp->dpb_slot.slotIndex] = 0;
> +
> +    return 0;
> +}
> +
> +static int init_pic_rc(AVCodecContext *avctx, FFHWBaseEncodePicture *pic,
> +                       VkVideoEncodeRateControlInfoKHR *rc_info,
> +                       VkVideoEncodeRateControlLayerInfoKHR *rc_layer /* Goes in ^ */)
> +{
> +    FFVulkanEncodeContext *ctx = avctx->priv_data;
> +
> +    *rc_info = (VkVideoEncodeRateControlInfoKHR) {
> +        .sType = VK_STRUCTURE_TYPE_VIDEO_ENCODE_RATE_CONTROL_INFO_KHR,
> +        .rateControlMode = ctx->opts.rc_mode,
> +    };
> +
> +    if (ctx->opts.rc_mode > VK_VIDEO_ENCODE_RATE_CONTROL_MODE_DISABLED_BIT_KHR) {
> +        *rc_layer = (VkVideoEncodeRateControlLayerInfoKHR) {
> +            .sType = VK_STRUCTURE_TYPE_VIDEO_ENCODE_RATE_CONTROL_LAYER_INFO_KHR,
> +            .averageBitrate = avctx->bit_rate,
> +            .maxBitrate = avctx->rc_max_rate ? avctx->rc_max_rate : avctx->bit_rate,
> +            .frameRateNumerator = avctx->framerate.num,
> +            .frameRateDenominator = avctx->framerate.den,
> +        };
> +        rc_info->layerCount++;
> +        rc_info->pLayers = rc_layer;
> +    }
> +
> +    return ctx->codec->init_pic_rc(avctx, pic, rc_info, rc_layer);
> +}
> +
> +static int vulkan_encode_issue(AVCodecContext *avctx,
> +                               FFHWBaseEncodePicture *base_pic)
> +{
> +    FFVulkanEncodeContext *ctx = avctx->priv_data;
> +    FFVulkanFunctions *vk = &ctx->s.vkfn;
> +
> +    const size_t size_align = ctx->caps.minBitstreamBufferSizeAlignment;
> +
> +    FFVulkanEncodePicture *vp = base_pic->priv;
> +    AVFrame *src = (AVFrame *)base_pic->input_image;
> +    AVVkFrame *vkf = (AVVkFrame *)src->data[0];
> +
> +    int err, max_pkt_size;
> +
> +    FFVkBuffer *sd_buf;
> +
> +    int slot_index = -1;
> +    FFVkExecContext *exec;
> +    VkCommandBuffer cmd_buf;
> +    VkImageMemoryBarrier2 img_bar[37];
> +    int nb_img_bar = 0;
> +
> +    /* Coding start/end */
> +    VkVideoBeginCodingInfoKHR encode_start;
> +    VkVideoEndCodingInfoKHR encode_end = {
> +        .sType = VK_STRUCTURE_TYPE_VIDEO_END_CODING_INFO_KHR,
> +    };
> +
> +    VkVideoEncodeRateControlLayerInfoKHR rc_layer;
> +    VkVideoEncodeRateControlInfoKHR rc_info;
> +    VkVideoEncodeQualityLevelInfoKHR q_info;
> +    VkVideoCodingControlInfoKHR encode_ctrl;
> +
> +    VkVideoReferenceSlotInfoKHR ref_slot[37];
> +    VkVideoEncodeInfoKHR encode_info;
> +
> +    /* Create packet data buffer */
> +    max_pkt_size = FFALIGN(3 * ctx->base.surface_width * ctx->base.surface_height + (1 << 16),
> +                           ctx->caps.minBitstreamBufferSizeAlignment);
> +
> +    err = ff_vk_get_pooled_buffer(&ctx->s, &ctx->buf_pool, &vp->pkt_buf,
> +                                  VK_BUFFER_USAGE_VIDEO_ENCODE_DST_BIT_KHR,
> +                                  &ctx->profile_list, max_pkt_size,
> +                                  VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT |
> +                                  VK_MEMORY_PROPERTY_HOST_CACHED_BIT);
> +    if (err < 0)
> +        return err;
> +
> +    sd_buf = (FFVkBuffer *)vp->pkt_buf->data;
> +
> +    /* Setup rate control */
> +    err = init_pic_rc(avctx, base_pic, &rc_info, &rc_layer);
> +    if (err < 0)
> +        return err;
> +
> +    q_info = (VkVideoEncodeQualityLevelInfoKHR) {
> +        .sType = VK_STRUCTURE_TYPE_VIDEO_ENCODE_QUALITY_LEVEL_INFO_KHR,
> +        .pNext = &rc_info,
> +        .qualityLevel = ctx->opts.quality,
> +    };
> +    encode_ctrl = (VkVideoCodingControlInfoKHR) {
> +        .sType = VK_STRUCTURE_TYPE_VIDEO_CODING_CONTROL_INFO_KHR,
> +        .pNext = &q_info,
> +        .flags = VK_VIDEO_CODING_CONTROL_ENCODE_QUALITY_LEVEL_BIT_KHR |
> +                 VK_VIDEO_CODING_CONTROL_ENCODE_RATE_CONTROL_BIT_KHR |
> +                 (base_pic->force_idr ? VK_VIDEO_CODING_CONTROL_RESET_BIT_KHR : 0),
> +    };
> +
> +    /* Current picture's ref slot */
> +    vp->dpb_res = (VkVideoPictureResourceInfoKHR) {
> +        .sType = VK_STRUCTURE_TYPE_VIDEO_PICTURE_RESOURCE_INFO_KHR,
> +        .pNext = NULL,
> +        .codedOffset = { 0 },
> +        .codedExtent = (VkExtent2D){ ctx->base.surface_width,
> +                                     ctx->base.surface_height },
> +        .baseArrayLayer = 0,
> +        .imageViewBinding = vp->dpb.view,
> +    };
> +
> +    for (int i = 0; i < ctx->caps.maxDpbSlots; i++) {
> +        if (!ctx->slots[i]) {
> +            slot_index = i;
> +            ctx->slots[i] = 1;
> +            break;
> +        }
> +    }
> +    av_assert0(slot_index >= 0);
> +
> +    vp->dpb_slot = (VkVideoReferenceSlotInfoKHR) {
> +        .sType = VK_STRUCTURE_TYPE_VIDEO_REFERENCE_SLOT_INFO_KHR,
> +        .pNext = NULL,  // Set later
> +        .slotIndex = slot_index,
> +        .pPictureResource = &vp->dpb_res,
> +    };
> +
> +    encode_info = (VkVideoEncodeInfoKHR) {
> +        .sType = VK_STRUCTURE_TYPE_VIDEO_ENCODE_INFO_KHR,
> +        .pNext = NULL, // Set later
> +        .flags = 0x0,
> +        .srcPictureResource = (VkVideoPictureResourceInfoKHR) {
> +            .sType = VK_STRUCTURE_TYPE_VIDEO_PICTURE_RESOURCE_INFO_KHR,
> +            .pNext = NULL,
> +            .codedOffset = { 0, 0 },
> +            .codedExtent = (VkExtent2D){ base_pic->input_image->width,
> +                                         base_pic->input_image->height },
> +            .baseArrayLayer = 0,
> +            .imageViewBinding = vp->in.view,
> +        },
> +        .pSetupReferenceSlot = &vp->dpb_slot,
> +        .referenceSlotCount = 0,
> +        .pReferenceSlots = ref_slot,
> +        .dstBuffer = sd_buf->buf,
> +        .dstBufferOffset = 0,
> +        .dstBufferRange = sd_buf->size,
> +        .precedingExternallyEncodedBytes = 0,
> +    };
> +
> +    for (int i = 0; i < MAX_REFERENCE_LIST_NUM; i++) {
> +        for (int j = 0; j < base_pic->nb_refs[i]; j++) {
> +            FFHWBaseEncodePicture *ref = base_pic->refs[i][j];
> +            FFVulkanEncodePicture *rvp = ref->priv;
> +            ref_slot[encode_info.referenceSlotCount++] = rvp->dpb_slot;
> +        }
> +    }
> +
> +    /* Setup picture parameters */
> +    err = ctx->codec->init_pic_params(avctx, base_pic,
> +                                      &encode_info);
> +    if (err < 0)
> +        return err;
> +
> +    encode_start = (VkVideoBeginCodingInfoKHR) {
> +        .sType = VK_STRUCTURE_TYPE_VIDEO_BEGIN_CODING_INFO_KHR,
> +        .pNext = !base_pic->force_idr ? &rc_info : NULL,
> +        .videoSession = ctx->common.session,
> +        .videoSessionParameters = ctx->session_params,
> +        .referenceSlotCount = encode_info.referenceSlotCount,
> +        .pReferenceSlots = ref_slot,
> +    };
> +
> +    /* Calling vkCmdBeginVideoCodingKHR requires to declare all references
> +     * being enabled upfront, including the current frame's output ref.
> +     * If layered DBPs are used, make sure its not included twice. */
> +    if (!ctx->common.layered_dpb || !encode_info.referenceSlotCount) {
> +        ref_slot[encode_info.referenceSlotCount] = vp->dpb_slot;
> +        ref_slot[encode_info.referenceSlotCount].slotIndex = -1;
> +        encode_start.referenceSlotCount++;
> +    }
> +
> +    /* Write header */
> +    if (base_pic->type == FF_HW_PICTURE_TYPE_IDR) {
> +        uint8_t *hdr_dst = sd_buf->mapped_mem + encode_info.dstBufferOffset;
> +        size_t data_size = encode_info.dstBufferRange;
> +        err = ctx->codec->write_sequence_headers(avctx, base_pic, hdr_dst, &data_size);
> +        if (err < 0)
> +            goto fail;
> +        encode_info.dstBufferOffset += data_size;
> +        encode_info.dstBufferRange  -= data_size;
> +    }
> +
> +    /* Write extra units */
> +    if (ctx->codec->write_extra_headers) {
> +        uint8_t *hdr_dst = sd_buf->mapped_mem + encode_info.dstBufferOffset;
> +        size_t data_size = encode_info.dstBufferRange;
> +        err = ctx->codec->write_extra_headers(avctx, base_pic, hdr_dst, &data_size);
> +        if (err < 0)
> +            goto fail;
> +        encode_info.dstBufferOffset += data_size;
> +        encode_info.dstBufferRange  -= data_size;
> +    }
> +
> +    /* Align buffer offset to the required value with filler units */
> +    if (ctx->codec->write_filler) {
> +        uint8_t *hdr_dst = sd_buf->mapped_mem + encode_info.dstBufferOffset;
> +        size_t data_size = encode_info.dstBufferRange;
> +
> +        uint32_t offset = encode_info.dstBufferOffset;
> +        size_t offset_align = ctx->caps.minBitstreamBufferOffsetAlignment;
> +
> +        uint32_t filler_data = FFALIGN(offset, offset_align) - offset;
> +
> +        if (filler_data) {
> +            while (filler_data < ctx->codec->filler_header_size)
> +                filler_data += offset_align;
> +
> +            filler_data -= ctx->codec->filler_header_size;
> +
> +            err = ctx->codec->write_filler(avctx, filler_data,
> +                                           hdr_dst, &data_size);
> +            if (err < 0)
> +                goto fail;
> +
> +            encode_info.dstBufferOffset += data_size;
> +            encode_info.dstBufferRange  -= data_size;
> +        }
> +    }
> +
> +    vp->slices_offset = encode_info.dstBufferOffset;
> +
> +    /* Align buffer size to the nearest lower alignment requirement. */
> +    encode_info.dstBufferRange -= size_align;
> +    encode_info.dstBufferRange = FFALIGN(encode_info.dstBufferRange,
> +                                         size_align);
> +
> +    /* Start command buffer recording */
> +    exec = vp->exec = ff_vk_exec_get(&ctx->enc_pool);
> +    ff_vk_exec_start(&ctx->s, exec);
> +    cmd_buf = exec->buf;
> +
> +    /* Output packet buffer */
> +    err = ff_vk_exec_add_dep_buf(&ctx->s, exec, &vp->pkt_buf, 1, 1);
> +    if (err < 0)
> +        goto fail;
> +
> +    /* Source image */
> +    err = ff_vk_exec_add_dep_frame(&ctx->s, exec, src,
> +                                   VK_PIPELINE_STAGE_2_ALL_COMMANDS_BIT,
> +                                   VK_PIPELINE_STAGE_2_VIDEO_ENCODE_BIT_KHR);
> +    if (err < 0)
> +        goto fail;
> +
> +    /* Source image layout conversion */
> +    img_bar[nb_img_bar] = (VkImageMemoryBarrier2) {
> +        .sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER_2,
> +        .pNext = NULL,
> +        .srcStageMask = VK_PIPELINE_STAGE_2_ALL_COMMANDS_BIT,
> +        .srcAccessMask = vkf->access[0],
> +        .dstStageMask = VK_PIPELINE_STAGE_2_VIDEO_ENCODE_BIT_KHR,
> +        .dstAccessMask = VK_ACCESS_2_VIDEO_ENCODE_READ_BIT_KHR,
> +        .oldLayout = vkf->layout[0],
> +        .newLayout = VK_IMAGE_LAYOUT_VIDEO_ENCODE_SRC_KHR,
> +        .srcQueueFamilyIndex = vkf->queue_family[0],
> +        .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
> +        .image = vkf->img[0],
> +        .subresourceRange = (VkImageSubresourceRange) {
> +            .aspectMask = vp->in.aspect,
> +            .layerCount = 1,
> +            .levelCount = 1,
> +        },
> +    };
> +    ff_vk_exec_update_frame(&ctx->s, exec, src,
> +                            &img_bar[nb_img_bar], &nb_img_bar);
> +
> +    if (!ctx->common.layered_dpb) {
> +        /* Source image's ref slot.
> +         * No need to do a layout conversion, since the frames which are allocated
> +         * with a DPB usage are automatically converted. */
> +        err = ff_vk_exec_add_dep_frame(&ctx->s, exec, base_pic->recon_image,
> +                                       VK_PIPELINE_STAGE_2_ALL_COMMANDS_BIT,
> +                                       VK_PIPELINE_STAGE_2_VIDEO_ENCODE_BIT_KHR);
> +        if (err < 0)
> +            return err;
> +
> +        /* All references */
> +        for (int i = 0; i < MAX_REFERENCE_LIST_NUM; i++) {
> +            for (int j = 0; j < base_pic->nb_refs[i]; j++) {
> +                FFHWBaseEncodePicture *ref = base_pic->refs[i][j];
> +                err = ff_vk_exec_add_dep_frame(&ctx->s, exec, ref->recon_image,
> +                                               VK_PIPELINE_STAGE_2_ALL_COMMANDS_BIT,
> +                                               VK_PIPELINE_STAGE_2_VIDEO_ENCODE_BIT_KHR);
> +                if (err < 0)
> +                    return err;
> +            }
> +        }
> +    } else {
> +        err = ff_vk_exec_add_dep_frame(&ctx->s, exec, ctx->common.layered_frame,
> +                                       VK_PIPELINE_STAGE_2_VIDEO_ENCODE_BIT_KHR,
> +                                       VK_PIPELINE_STAGE_2_VIDEO_ENCODE_BIT_KHR);
> +        if (err < 0)
> +            return err;
> +    }
> +
> +    /* Change image layout */
> +    vk->CmdPipelineBarrier2(cmd_buf, &(VkDependencyInfo) {
> +            .sType = VK_STRUCTURE_TYPE_DEPENDENCY_INFO,
> +            .pImageMemoryBarriers = img_bar,
> +            .imageMemoryBarrierCount = nb_img_bar,
> +        });
> +
> +    /* Start, use parameters */
> +    vk->CmdBeginVideoCodingKHR(cmd_buf, &encode_start);
> +
> +    /* Send control data */
> +    if (base_pic->force_idr)
> +        vk->CmdControlVideoCodingKHR(cmd_buf, &encode_ctrl);
> +
> +    /* Encode */
> +    vk->CmdBeginQuery(cmd_buf, ctx->enc_pool.query_pool, exec->query_idx + 0, 0);
> +    vk->CmdEncodeVideoKHR(cmd_buf, &encode_info);
> +    vk->CmdEndQuery(cmd_buf, ctx->enc_pool.query_pool, exec->query_idx + 0);
> +
> +    /* End encoding */
> +    vk->CmdEndVideoCodingKHR(cmd_buf, &encode_end);
> +
> +    /* End recording and submit for execution */
> +    err = ff_vk_exec_submit(&ctx->s, vp->exec);
> +    if (err < 0)
> +        goto fail;
> +
> +    /* We don't need to keep the input image any longer, its already ref'd */
> +    av_frame_free(&base_pic->input_image);
> +
> +    return 0;
> +
> +fail:
> +    return err;
> +}
> +
> +static void vulkan_encode_wait(AVCodecContext *avctx,
> +                               FFHWBaseEncodePicture *base_pic)
> +{
> +    FFVulkanEncodeContext *ctx = avctx->priv_data;
> +    FFVulkanEncodePicture *vp = base_pic->priv;
> +
> +    av_assert0(base_pic->encode_issued);
> +
> +    if (base_pic->encode_complete)
> +        return;
> +
> +    ff_vk_exec_wait(&ctx->s, vp->exec);
> +    base_pic->encode_complete = 1;
> +}
> +
> +static int vulkan_encode_output(AVCodecContext *avctx,
> +                                FFHWBaseEncodePicture *base_pic, AVPacket *pkt)
> +{
> +    VkResult ret;
> +    FFVulkanEncodePicture *vp = base_pic->priv;
> +    FFVulkanEncodeContext *ctx = avctx->priv_data;
> +    FFVkBuffer *sd_buf = (FFVkBuffer *)vp->pkt_buf->data;
> +    uint32_t *query_data;
> +
> +    vulkan_encode_wait(avctx, base_pic);
> +
> +    ret = ff_vk_exec_get_query(&ctx->s, vp->exec, (void **)&query_data, 0);
> +    if (ret == VK_NOT_READY) {
> +        av_log(avctx, AV_LOG_ERROR, "Unable to perform query: %s!\n",
> +               ff_vk_ret2str(ret));
> +        return AVERROR(EINVAL);
> +    }
> +
> +    if (ret != VK_NOT_READY && ret != VK_SUCCESS) {
> +        av_log(avctx, AV_LOG_ERROR, "Unable to perform query: %s!\n",
> +               ff_vk_ret2str(ret));
> +        return AVERROR_EXTERNAL;
> +    }
> 
> +    if (query_data[2] != VK_QUERY_RESULT_STATUS_COMPLETE_KHR) {
> +        av_log(avctx, AV_LOG_ERROR, "Unable to encode: %u\n", query_data[2]);
> +        return AVERROR_EXTERNAL;
> +    }

The query pool is created with ctx->enc_caps.supportedEncodeFeedbackFlags. If
the implementation reports all feedback flags defined by the spec,
query_data[2] would refer to the HAS_OVERRIDES field of the feedback. In
this case, the status would live in query_data[3].

> +
> +    /* Invalidate buffer if needed */
> +    if (!(sd_buf->flags & VK_MEMORY_PROPERTY_HOST_COHERENT_BIT)) {
> +        FFVulkanFunctions *vk = &ctx->s.vkfn;
> +        VkMappedMemoryRange invalidate_buf;
> +
> +        int offs = vp->slices_offset;
> +        /* If the non-coherent alignment is greater than the bitstream buffer
> +         * offset's alignment, and the offs value is not aligned already,
> +         * align it to the previous alignment point. */
> +        if (ctx->s.props.properties.limits.nonCoherentAtomSize >
> +            ctx->caps.minBitstreamBufferOffsetAlignment && offs &&
> +            (FFALIGN(offs, ctx->s.props.properties.limits.nonCoherentAtomSize) != offs)) {
> +            offs -= ctx->s.props.properties.limits.nonCoherentAtomSize;
> +            offs = FFALIGN(FFMAX(offs, 0), ctx->s.props.properties.limits.nonCoherentAtomSize);
> +        }
> +
> +        invalidate_buf = (VkMappedMemoryRange) {
> +            .sType = VK_STRUCTURE_TYPE_MAPPED_MEMORY_RANGE,
> +            .memory = sd_buf->mem,
> +            .offset = offs,
> +            .size = VK_WHOLE_SIZE,
> +        };
> +
> +        vk->FlushMappedMemoryRanges(ctx->s.hwctx->act_dev, 1, &invalidate_buf);
> +    }
> +
> +    pkt->data = sd_buf->mapped_mem;
> +    pkt->size = vp->slices_offset + /* base offset */
> +                query_data[0]       /* secondary offset */ +
> +                query_data[1]       /* size */;
> +
> +    /* Move reference */
> +    pkt->buf = vp->pkt_buf;
> +    vp->pkt_buf = NULL;
> +
> +    av_log(avctx, AV_LOG_DEBUG, "Frame %"PRId64"/%"PRId64 " encoded\n",
> +           base_pic->display_order, base_pic->encode_order);
> +
> +    ff_hw_base_encode_set_output_property(&ctx->base, avctx,
> +                                          base_pic, pkt,
> +                                          ctx->codec->flags & VK_ENC_FLAG_NO_DELAY);
> +
> +    return 0;
> +}
> +
> +static const FFHWEncodePictureOperation vulkan_base_encode_ops = {
> +    .priv_size = sizeof(FFVulkanEncodePicture),
> +    .init   = &vulkan_encode_init,
> +    .issue  = &vulkan_encode_issue,
> +    .output = &vulkan_encode_output,
> +    .free   = &vulkan_encode_free,
> +};
> +
> +int ff_vulkan_encode_receive_packet(AVCodecContext *avctx, AVPacket *pkt)
> +{
> +    FFVulkanEncodeContext *ctx = avctx->priv_data;
> +    return ff_hw_base_encode_receive_packet(&ctx->base, avctx, pkt);
> +}
> +
> +static int vulkan_encode_create_dpb(AVCodecContext *avctx, FFVulkanEncodeContext *ctx)
> +{
> +    int err;
> +    FFHWBaseEncodeContext *base_ctx = &ctx->base;
> +    AVVulkanFramesContext *hwfc;
> +
> +    enum AVPixelFormat dpb_format;
> +    err = ff_hw_base_get_recon_format(base_ctx, NULL, &dpb_format);
> +    if (err < 0)
> +        return err;
> +
> +    base_ctx->recon_frames_ref = av_hwframe_ctx_alloc(base_ctx->device_ref);
> +    if (!base_ctx->recon_frames_ref)
> +        return AVERROR(ENOMEM);
> +
> +    base_ctx->recon_frames = (AVHWFramesContext *)base_ctx->recon_frames_ref->data;
> +    hwfc = (AVVulkanFramesContext *)base_ctx->recon_frames->hwctx;
> +
> +    base_ctx->recon_frames->format    = AV_PIX_FMT_VULKAN;
> +    base_ctx->recon_frames->sw_format = dpb_format;
> +    base_ctx->recon_frames->width     = base_ctx->surface_width;
> +    base_ctx->recon_frames->height    = base_ctx->surface_height;
> +
> +    hwfc->format[0]    = ctx->pic_format;
> +    hwfc->create_pnext = &ctx->profile_list;
> +    hwfc->tiling       = VK_IMAGE_TILING_OPTIMAL;
> +    hwfc->usage        = VK_IMAGE_USAGE_SAMPLED_BIT              |
> +                         VK_IMAGE_USAGE_VIDEO_ENCODE_DPB_BIT_KHR;
> +
> +    if (ctx->common.layered_dpb)
> +        hwfc->nb_layers = ctx->caps.maxDpbSlots;
> +
> +    err = av_hwframe_ctx_init(base_ctx->recon_frames_ref);
> +    if (err < 0) {
> +        av_log(avctx, AV_LOG_ERROR, "Failed to initialise DPB frame context: %s\n",
> +               av_err2str(err));
> +        return err;
> +    }
> +
> +    if (ctx->common.layered_dpb) {
> +        ctx->common.layered_frame = av_frame_alloc();
> +        if (!ctx->common.layered_frame)
> +            return AVERROR(ENOMEM);
> +
> +        err = av_hwframe_get_buffer(base_ctx->recon_frames_ref,
> +                                    ctx->common.layered_frame, 0);
> +        if (err < 0)
> +            return AVERROR(ENOMEM);
> +
> +        err = ff_vk_create_view(&ctx->s, &ctx->common,
> +                                &ctx->common.layered_view,
> +                                &ctx->common.layered_aspect,
> +                                (AVVkFrame *)ctx->common.layered_frame->data[0],
> +                                hwfc->format[0], 1);
> +        if (err < 0)
> +            return err;
> +
> +        av_buffer_unref(&base_ctx->recon_frames_ref);
> +    }
> +
> +    return 0;
> +}
> +
> +static av_cold int init_rc(AVCodecContext *avctx, FFVulkanEncodeContext *ctx)
> +{
> +    if (ctx->opts.rc_mode == FF_VK_RC_MODE_AUTO) {
> +        if (ctx->opts.qp >= 0) {
> +            ctx->opts.rc_mode = VK_VIDEO_ENCODE_RATE_CONTROL_MODE_DISABLED_BIT_KHR;
> +        } else if (avctx->global_quality > 0) {
> +            if (avctx->flags & AV_CODEC_FLAG_QSCALE)
> +                ctx->opts.qp = avctx->global_quality / FF_QP2LAMBDA;
> +            else
> +                ctx->opts.qp = avctx->global_quality;
> +            ctx->opts.rc_mode = VK_VIDEO_ENCODE_RATE_CONTROL_MODE_DISABLED_BIT_KHR;
> +        } else if (avctx->bit_rate) {
> +            if (ctx->enc_caps.rateControlModes & VK_VIDEO_ENCODE_RATE_CONTROL_MODE_VBR_BIT_KHR)
> +                ctx->opts.rc_mode = VK_VIDEO_ENCODE_RATE_CONTROL_MODE_VBR_BIT_KHR;
> +            else if (ctx->enc_caps.rateControlModes & VK_VIDEO_ENCODE_RATE_CONTROL_MODE_CBR_BIT_KHR)
> +                ctx->opts.rc_mode = VK_VIDEO_ENCODE_RATE_CONTROL_MODE_CBR_BIT_KHR;
> +            else
> +                ctx->opts.rc_mode = VK_VIDEO_ENCODE_RATE_CONTROL_MODE_DEFAULT_KHR;
> +        } else {
> +            ctx->opts.qp = 18;
> +            ctx->opts.rc_mode = VK_VIDEO_ENCODE_RATE_CONTROL_MODE_DISABLED_BIT_KHR;
> +            av_log(avctx, AV_LOG_WARNING, "No rate control settings specified, using fixed QP = %i\n",
> +                   ctx->opts.qp);
> +        }
> +    }
> +
> +    if (ctx->opts.rc_mode && !(ctx->enc_caps.rateControlModes & ctx->opts.rc_mode)) {
> +        static const char *rc_modes[] = {
> +            [VK_VIDEO_ENCODE_RATE_CONTROL_MODE_DEFAULT_KHR] = "default",
> +            [VK_VIDEO_ENCODE_RATE_CONTROL_MODE_DISABLED_BIT_KHR] = "cqp",
> +            [VK_VIDEO_ENCODE_RATE_CONTROL_MODE_CBR_BIT_KHR] = "cbr",
> +            [VK_VIDEO_ENCODE_RATE_CONTROL_MODE_VBR_BIT_KHR] = "vbr",
> +        };
> +        av_log(avctx, AV_LOG_ERROR, "Unsupported rate control mode %i, supported are:\n",
> +               ctx->opts.rc_mode);
> +        for (int i = 0; i < av_popcount(ctx->enc_caps.rateControlModes); i++) {
> +            if (!(ctx->enc_caps.rateControlModes & (1 << i)))
> +                continue;
> +            av_log(avctx, AV_LOG_ERROR, "    %i: %s\n", i, rc_modes[i]);
> +        }
> +        return AVERROR(ENOTSUP);
> +    }
> +
> +    return 0;
> +}
> +
> +av_cold int ff_vulkan_encode_init(AVCodecContext *avctx, FFVulkanEncodeContext *ctx,
> +                                  const FFVulkanEncodeDescriptor *vk_desc,
> +                                  const FFVulkanCodec *codec,
> +                                  void *codec_caps, void *quality_pnext)
> +{
> +    int i, err;
> +    VkResult ret;
> +    FFVulkanFunctions *vk = &ctx->s.vkfn;
> +    FFVulkanContext *s = &ctx->s;
> +    FFHWBaseEncodeContext *base_ctx = &ctx->base;
> +
> +    const AVPixFmtDescriptor *desc;
> +
> +    VkVideoFormatPropertiesKHR *ret_info;
> +    uint32_t nb_out_fmts = 0;
> +
> +    VkPhysicalDeviceVideoEncodeQualityLevelInfoKHR quality_info;
> +
> +    VkQueryPoolVideoEncodeFeedbackCreateInfoKHR query_create;
> +
> +    VkVideoSessionCreateInfoKHR session_create = {
> +        .sType = VK_STRUCTURE_TYPE_VIDEO_SESSION_CREATE_INFO_KHR,
> +    };
> +    VkPhysicalDeviceVideoFormatInfoKHR fmt_info = {
> +        .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VIDEO_FORMAT_INFO_KHR,
> +        .pNext = &ctx->profile_list,
> +    };
> +
> +    if (!avctx->hw_frames_ctx) {
> +        av_log(avctx, AV_LOG_ERROR, "A hardware frames reference is "
> +               "required to associate the encoding device.\n");
> +        return AVERROR(EINVAL);
> +    }
> +
> +    ctx->base.op = &vulkan_base_encode_ops;
> +    ctx->codec = codec;
> +
> +    s->frames_ref = av_buffer_ref(avctx->hw_frames_ctx);
> +    s->frames = (AVHWFramesContext *)s->frames_ref->data;
> +    s->hwfc = s->frames->hwctx;
> +
> +    s->device = (AVHWDeviceContext *)s->frames->device_ref->data;
> +    s->hwctx = s->device->hwctx;
> +
> +    desc = av_pix_fmt_desc_get(avctx->sw_pix_fmt);
> +    if (!desc)
> +        return AVERROR(EINVAL);
> +
> +    s->extensions = ff_vk_extensions_to_mask(s->hwctx->enabled_dev_extensions,
> +                                             s->hwctx->nb_enabled_dev_extensions);
> +
> +    if (!(s->extensions & FF_VK_EXT_VIDEO_ENCODE_QUEUE)) {
> +        av_log(avctx, AV_LOG_ERROR, "Device does not support the %s extension!\n",
> +               VK_KHR_VIDEO_ENCODE_QUEUE_EXTENSION_NAME);
> +        return AVERROR(ENOSYS);
> +    } else if (!(s->extensions & FF_VK_EXT_VIDEO_MAINTENANCE_1)) {
> +        av_log(avctx, AV_LOG_ERROR, "Device does not support the %s extension!\n",
> +               VK_KHR_VIDEO_MAINTENANCE_1_EXTENSION_NAME);
> +        return AVERROR(ENOSYS);
> +    } else if (!(s->extensions & vk_desc->encode_extension)) {
> +        av_log(avctx, AV_LOG_ERROR, "Device does not support decoding %s!\n",
> +               avcodec_get_name(avctx->codec_id));
> +        return AVERROR(ENOSYS);
> +    }
> +
> +    /* Load functions */
> +    err = ff_vk_load_functions(s->device, vk, s->extensions, 1, 1);
> +    if (err < 0)
> +        return err;
> +
> +    /* Create queue context */
> +    err = ff_vk_video_qf_init(s, &ctx->qf_enc,
> +                              VK_QUEUE_VIDEO_ENCODE_BIT_KHR,
> +                              vk_desc->encode_op);
> +    if (err < 0) {
> +        av_log(avctx, AV_LOG_ERROR, "Decoding of %s is not supported by this device\n",
> +               avcodec_get_name(avctx->codec_id));
> +        return err;
> +    }
> +
> +    /* Load all properties */
> +    err = ff_vk_load_props(s);
> +    if (err < 0)
> +        return err;
> +
> +    /* Set tuning */
> +    ctx->usage_info = (VkVideoEncodeUsageInfoKHR) {
> +        .sType             = VK_STRUCTURE_TYPE_VIDEO_ENCODE_USAGE_INFO_KHR,
> +        .videoUsageHints   = ctx->opts.usage,
> +        .videoContentHints = ctx->opts.content,
> +        .tuningMode        = ctx->opts.tune,
> +    };
> +
> +    /* Load up the profile now, needed for caps and to create a query pool */
> +    ctx->profile.sType               = VK_STRUCTURE_TYPE_VIDEO_PROFILE_INFO_KHR;
> +    ctx->profile.pNext               = &ctx->usage_info;
> +    ctx->profile.videoCodecOperation = vk_desc->encode_op;
> +    ctx->profile.chromaSubsampling   = ff_vk_subsampling_from_av_desc(desc);
> +    ctx->profile.lumaBitDepth        = ff_vk_depth_from_av_depth(desc->comp[0].depth);
> +    ctx->profile.chromaBitDepth      = ctx->profile.lumaBitDepth;
> +
> +    /* Setup a profile */
> +    err = codec->init_profile(avctx, &ctx->profile, &ctx->usage_info);
> +    if (err < 0)
> +        return err;
> +
> +    ctx->profile_list.sType        = VK_STRUCTURE_TYPE_VIDEO_PROFILE_LIST_INFO_KHR;
> +    ctx->profile_list.profileCount = 1;
> +    ctx->profile_list.pProfiles    = &ctx->profile;
> +
> +    /* Get the capabilities of the encoder for the given profile */
> +    ctx->enc_caps.sType = VK_STRUCTURE_TYPE_VIDEO_ENCODE_CAPABILITIES_KHR;
> +    ctx->enc_caps.pNext = codec_caps;
> +    ctx->caps.sType = VK_STRUCTURE_TYPE_VIDEO_CAPABILITIES_KHR;
> +    ctx->caps.pNext = &ctx->enc_caps;
> +
> +    ret = vk->GetPhysicalDeviceVideoCapabilitiesKHR(s->hwctx->phys_dev,
> +                                                    &ctx->profile,
> +                                                    &ctx->caps);
> +    if (ret == VK_ERROR_VIDEO_PROFILE_OPERATION_NOT_SUPPORTED_KHR) {
> +        av_log(avctx, AV_LOG_ERROR, "Unable to initialize encoding: "
> +               "%s profile \"%s\" not supported!\n",
> +               avcodec_get_name(avctx->codec_id),
> +               avcodec_profile_name(avctx->codec_id, avctx->profile));
> +        return AVERROR(EINVAL);
> +    } else if (ret == VK_ERROR_VIDEO_PROFILE_FORMAT_NOT_SUPPORTED_KHR) {
> +        av_log(avctx, AV_LOG_ERROR, "Unable to initialize encoding: "
> +               "format (%s) not supported!\n",
> +               av_get_pix_fmt_name(avctx->sw_pix_fmt));
> +        return AVERROR(EINVAL);
> +    } else if (ret == VK_ERROR_FEATURE_NOT_PRESENT ||
> +               ret == VK_ERROR_FORMAT_NOT_SUPPORTED) {
> +        return AVERROR(EINVAL);
> +    } else if (ret != VK_SUCCESS) {
> +        return AVERROR_EXTERNAL;
> +    }
> +
> +    err = init_rc(avctx, ctx);
> +    if (err < 0)
> +        return err;
> +
> +    /* Create command and query pool */
> +    query_create = (VkQueryPoolVideoEncodeFeedbackCreateInfoKHR) {
> +        .sType = VK_STRUCTURE_TYPE_QUERY_POOL_VIDEO_ENCODE_FEEDBACK_CREATE_INFO_KHR,
> +        .pNext = &ctx->profile,
> +        .encodeFeedbackFlags = ctx->enc_caps.supportedEncodeFeedbackFlags,
> +    };
> +    err = ff_vk_exec_pool_init(s, &ctx->qf_enc, &ctx->enc_pool, base_ctx->async_depth,
> +                               1, VK_QUERY_TYPE_VIDEO_ENCODE_FEEDBACK_KHR, 0,
> +                               &query_create);
> +    if (err < 0)
> +        return err;
> +
> +    if (ctx->opts.quality > ctx->enc_caps.maxQualityLevels) {
> +        av_log(avctx, AV_LOG_ERROR, "Invalid quality level %i: allowed range is "
> +                                    "0 to %i\n",
> +               ctx->opts.quality, ctx->enc_caps.maxQualityLevels);
> +        return AVERROR(EINVAL);
> +    }
> +
> +    /* Get quality properties for the profile and quality level */
> +    quality_info = (VkPhysicalDeviceVideoEncodeQualityLevelInfoKHR) {
> +        .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VIDEO_ENCODE_QUALITY_LEVEL_INFO_KHR,
> +        .pVideoProfile = &ctx->profile,
> +        .qualityLevel = ctx->opts.quality,
> +    };
> +    ctx->quality_props = (VkVideoEncodeQualityLevelPropertiesKHR) {
> +        .sType = VK_STRUCTURE_TYPE_VIDEO_ENCODE_QUALITY_LEVEL_PROPERTIES_KHR,
> +        .pNext = quality_pnext,
> +    };
> +    ret = vk->GetPhysicalDeviceVideoEncodeQualityLevelPropertiesKHR(s->hwctx->phys_dev,
> +                                                                    &quality_info,
> +                                                                    &ctx->quality_props);
> +    if (ret != VK_SUCCESS)
> +        return AVERROR_EXTERNAL;
> +
> +    /* Printout informative properties */
> +    av_log(avctx, AV_LOG_VERBOSE, "Encoder capabilities for %s profile \"%s\":\n",
> +           avcodec_get_name(avctx->codec_id),
> +           avcodec_profile_name(avctx->codec_id, avctx->profile));
> +    av_log(avctx, AV_LOG_VERBOSE, "    Width: from %i to %i\n",
> +           ctx->caps.minCodedExtent.width, ctx->caps.maxCodedExtent.width);
> +    av_log(avctx, AV_LOG_VERBOSE, "    Height: from %i to %i\n",
> +           ctx->caps.minCodedExtent.height, ctx->caps.maxCodedExtent.height);
> +    av_log(avctx, AV_LOG_VERBOSE, "    Width alignment: %i\n",
> +           ctx->caps.pictureAccessGranularity.width);
> +    av_log(avctx, AV_LOG_VERBOSE, "    Height alignment: %i\n",
> +           ctx->caps.pictureAccessGranularity.height);
> +    av_log(avctx, AV_LOG_VERBOSE, "    Bitstream offset alignment: %"PRIu64"\n",
> +           ctx->caps.minBitstreamBufferOffsetAlignment);
> +    av_log(avctx, AV_LOG_VERBOSE, "    Bitstream size alignment: %"PRIu64"\n",
> +           ctx->caps.minBitstreamBufferSizeAlignment);
> +    av_log(avctx, AV_LOG_VERBOSE, "    Maximum references: %u\n",
> +           ctx->caps.maxDpbSlots);
> +    av_log(avctx, AV_LOG_VERBOSE, "    Maximum active references: %u\n",
> +           ctx->caps.maxActiveReferencePictures);
> +    av_log(avctx, AV_LOG_VERBOSE, "    Codec header version: %i.%i.%i (driver), %i.%i.%i (compiled)\n",
> +           CODEC_VER(ctx->caps.stdHeaderVersion.specVersion),
> +           CODEC_VER(vk_desc->ext_props.specVersion));
> +    av_log(avctx, AV_LOG_VERBOSE, "    Encoder max quality: %i\n",
> +           ctx->enc_caps.maxQualityLevels);
> +    av_log(avctx, AV_LOG_VERBOSE, "    Encoder image width alignment: %i\n",
> +           ctx->enc_caps.encodeInputPictureGranularity.width);
> +    av_log(avctx, AV_LOG_VERBOSE, "    Encoder image height alignment: %i\n",
> +           ctx->enc_caps.encodeInputPictureGranularity.height);
> +    av_log(avctx, AV_LOG_VERBOSE, "    Capability flags:%s%s%s\n",
> +           ctx->caps.flags ? "" :
> +               " none",
> +           ctx->caps.flags & VK_VIDEO_CAPABILITY_PROTECTED_CONTENT_BIT_KHR ?
> +               " protected" : "",
> +           ctx->caps.flags & VK_VIDEO_CAPABILITY_SEPARATE_REFERENCE_IMAGES_BIT_KHR ?
> +               " separate_references" : "");
> +
> +    /* Setup width/height alignment */
> +    base_ctx->surface_width = avctx->coded_width =
> +        FFALIGN(avctx->width, ctx->enc_caps.encodeInputPictureGranularity.width);
> +    base_ctx->surface_height = avctx->coded_height =
> +        FFALIGN(avctx->height, ctx->enc_caps.encodeInputPictureGranularity.height);
> +
> +    /* Check if encoding is possible with the given parameters */
> +    if (avctx->coded_width  < ctx->caps.minCodedExtent.width   ||
> +        avctx->coded_height < ctx->caps.minCodedExtent.height  ||
> +        avctx->coded_width  > ctx->caps.maxCodedExtent.width   ||
> +        avctx->coded_height > ctx->caps.maxCodedExtent.height) {
> +        av_log(avctx, AV_LOG_ERROR, "Input of %ix%i too large for encoder limits: %ix%i max\n",
> +               avctx->coded_width, avctx->coded_height,
> +               ctx->caps.minCodedExtent.width, ctx->caps.minCodedExtent.height);
> +        return AVERROR(EINVAL);
> +    }
> +
> +    fmt_info.imageUsage = VK_IMAGE_USAGE_VIDEO_ENCODE_DPB_BIT_KHR |
> +                          VK_IMAGE_USAGE_VIDEO_ENCODE_DST_BIT_KHR;
> +
> +    ctx->common.layered_dpb = !(ctx->caps.flags & VK_VIDEO_CAPABILITY_SEPARATE_REFERENCE_IMAGES_BIT_KHR);
> +
> +    /* Get the supported image formats */
> +    ret = vk->GetPhysicalDeviceVideoFormatPropertiesKHR(s->hwctx->phys_dev,
> +                                                        &fmt_info,
> +                                                        &nb_out_fmts, NULL);
> +    if (ret == VK_ERROR_FORMAT_NOT_SUPPORTED ||
> +        (!nb_out_fmts && ret == VK_SUCCESS)) {
> +        return AVERROR(EINVAL);
> +    } else if (ret != VK_SUCCESS) {
> +        av_log(avctx, AV_LOG_ERROR, "Unable to get Vulkan format properties: %s!\n",
> +               ff_vk_ret2str(ret));
> +        return AVERROR_EXTERNAL;
> +    }
> +
> +    ret_info = av_mallocz(sizeof(*ret_info)*nb_out_fmts);
> +    if (!ret_info)
> +        return AVERROR(ENOMEM);
> +
> +    for (int i = 0; i < nb_out_fmts; i++)
> +        ret_info[i].sType = VK_STRUCTURE_TYPE_VIDEO_FORMAT_PROPERTIES_KHR;
> +
> +    ret = vk->GetPhysicalDeviceVideoFormatPropertiesKHR(s->hwctx->phys_dev,
> +                                                        &fmt_info,
> +                                                        &nb_out_fmts, ret_info);
> +    if (ret == VK_ERROR_FORMAT_NOT_SUPPORTED ||
> +        (!nb_out_fmts && ret == VK_SUCCESS)) {
> +        av_free(ret_info);
> +        return AVERROR(EINVAL);
> +    } else if (ret != VK_SUCCESS) {
> +        av_log(avctx, AV_LOG_ERROR, "Unable to get Vulkan format properties: %s!\n",
> +               ff_vk_ret2str(ret));
> +        av_free(ret_info);
> +        return AVERROR_EXTERNAL;
> +    }
> +
> +    av_log(avctx, AV_LOG_VERBOSE, "Supported input formats:\n");
> +    for (i = 0; i < nb_out_fmts; i++)
> +        av_log(avctx, AV_LOG_VERBOSE, "    %i: %i\n", i, ret_info[i].format);
> +
> +    for (i = 0; i < nb_out_fmts; i++) {
> +        if (ff_vk_pix_fmt_from_vkfmt(ret_info[i].format) == s->frames->sw_format) {
> +            ctx->pic_format = ret_info[i].format;
> +            break;
> +        }
> +    }
> +
> +    av_free(ret_info);
> +
> +    if (i == nb_out_fmts) {
> +        av_log(avctx, AV_LOG_ERROR, "Pixel format %s of input frames not supported!\n",
> +               av_get_pix_fmt_name(s->frames->sw_format));
> +        return AVERROR(EINVAL);
> +    }
> +
> +    /* Create session */
> +    session_create.pVideoProfile = &ctx->profile;
> +    session_create.flags = 0x0;
> +    session_create.queueFamilyIndex = ctx->qf_enc.queue_family;
> +    session_create.maxCodedExtent = ctx->caps.maxCodedExtent;
> +    session_create.maxDpbSlots = ctx->caps.maxDpbSlots;
> +    session_create.maxActiveReferencePictures = ctx->caps.maxActiveReferencePictures;
> +    session_create.pictureFormat = ctx->pic_format;
> +    session_create.referencePictureFormat = session_create.pictureFormat;
> +    session_create.pStdHeaderVersion = &vk_desc->ext_props;
> +
> +    err = ff_vk_video_common_init(avctx, s, &ctx->common, &session_create);
> +    if (err < 0)
> +        return err;
> +
> +    err = ff_hw_base_encode_init(avctx, &ctx->base);
> +    if (err < 0)
> +        return err;
> +
> +    err = vulkan_encode_create_dpb(avctx, ctx);
> +    if (err < 0)
> +        return err;
> +
> +    base_ctx->async_encode = 1;
> +    base_ctx->encode_fifo = av_fifo_alloc2(base_ctx->async_depth,
> +                                           sizeof(FFVulkanEncodePicture *), 0);
> +    if (!base_ctx->encode_fifo)
> +        return AVERROR(ENOMEM);
> +
> +    return 0;
> +}
> diff --git a/libavcodec/vulkan_encode.h b/libavcodec/vulkan_encode.h
> new file mode 100644
> index 0000000000..995befeca2
> --- /dev/null
> +++ b/libavcodec/vulkan_encode.h
> @@ -0,0 +1,243 @@
> +/*
> + * This file is part of FFmpeg.
> + *
> + * FFmpeg is free software; you can redistribute it and/or
> + * modify it under the terms of the GNU Lesser General Public
> + * License as published by the Free Software Foundation; either
> + * version 2.1 of the License, or (at your option) any later version.
> + *
> + * FFmpeg is distributed in the hope that it will be useful,
> + * but WITHOUT ANY WARRANTY; without even the implied warranty of
> + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
> + * Lesser General Public License for more details.
> + *
> + * You should have received a copy of the GNU Lesser General Public
> + * License along with FFmpeg; if not, write to the Free Software
> + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
> + */
> +
> +#ifndef AVCODEC_VULKAN_ENCODE_H
> +#define AVCODEC_VULKAN_ENCODE_H
> +
> +#include "codec_id.h"
> +#include "internal.h"
> +
> +#include "encode.h"
> +#include "hwconfig.h"
> +
> +#include "vulkan_video.h"
> +#include "hw_base_encode.h"
> +
> +typedef struct FFVulkanEncodeDescriptor {
> +    enum AVCodecID                   codec_id;
> +    FFVulkanExtensions               encode_extension;
> +    VkVideoCodecOperationFlagBitsKHR encode_op;
> +
> +    VkExtensionProperties ext_props;
> +} FFVulkanEncodeDescriptor;
> +
> +typedef struct FFVulkanEncodePicture {
> +    FFHWBaseEncodePicture  base;
> +    VkVideoPictureResourceInfoKHR dpb_res;
> +    VkVideoReferenceSlotInfoKHR dpb_slot;
> +
> +    struct {
> +        VkImageView        view;
> +        VkImageAspectFlags aspect;
> +    } in;
> +
> +    struct {
> +        VkImageView        view;
> +        VkImageAspectFlags aspect;
> +    } dpb;
> +
> +    void                  *codec_layer;
> +    void                  *codec_rc_layer;
> +
> +    FFVkExecContext       *exec;
> +    AVBufferRef           *pkt_buf;
> +    int                    slices_offset;
> +} FFVulkanEncodePicture;
> +
> +/**
> + * Callback for writing stream-level headers.
> + */
> +typedef int (*vkenc_cb_write_stream_headers)(AVCodecContext *avctx,
> +                                             uint8_t *data, size_t *data_len);
> +
> +/**
> + * Callback for initializing codec-specific picture headers.
> + */
> +typedef int (*vkenc_cb_init_pic_headers)(AVCodecContext *avctx,
> +                                         FFVulkanEncodePicture *pic);
> +
> +/**
> + * Callback for writing alignment data.
> + * Align is the value to align offset to.
> + */
> +typedef int (*vkenc_cb_write_filler)(AVCodecContext *avctx, uint32_t filler,
> +                                     uint8_t *data, size_t *data_len);
> +
> +/**
> + * Callback for writing any extra units requested. data_len must be set
> + * to the available size, and its value will be overwritten by the #bytes written
> + * to the output buffer.
> + */
> +typedef int (*vkenc_cb_write_extra_headers)(AVCodecContext *avctx,
> +                                            FFVulkanEncodePicture *pic,
> +                                            uint8_t *data, size_t *data_len);
> +
> +typedef struct FFVulkanCodec {
> +    /**
> +     * Codec feature flags.
> +     */
> +    int flags;
> +/* Codec output packet without timestamp delay, which means the
> + * output packet has same PTS and DTS. For AV1. */
> +#define VK_ENC_FLAG_NO_DELAY 1 << 6
> +
> +    /**
> +     * Size of the codec-specific picture struct.
> +     */
> +    size_t picture_priv_data_size;
> +
> +    /**
> +     * Size of the filler header.
> +     */
> +    size_t filler_header_size;
> +
> +    /**
> +     * Initialize codec-specific structs in a Vulkan profile.
> +     */
> +    int (*init_profile)(AVCodecContext *avctx, VkVideoProfileInfoKHR *profile,
> +                        void *pnext);
> +
> +    /**
> +     * Initialize codec-specific rate control structures for a picture.
> +     */
> +    int (*init_pic_rc)(AVCodecContext *avctx, FFHWBaseEncodePicture *pic,
> +                       VkVideoEncodeRateControlInfoKHR *rc_info,
> +                       VkVideoEncodeRateControlLayerInfoKHR *rc_layer);
> +
> +    /**
> +     * Initialize codec-specific picture parameters.
> +     */
> +    int (*init_pic_params)(AVCodecContext *avctx, FFHWBaseEncodePicture *pic,
> +                           VkVideoEncodeInfoKHR *encode_info);
> +
> +    /**
> +     * Callback for writing stream headers.
> +     */
> +    int (*write_sequence_headers)(AVCodecContext *avctx,
> +                                  FFHWBaseEncodePicture *base_pic,
> +                                  uint8_t *data, size_t *data_len);
> +
> +    /**
> +     * Callback for writing alignment data.
> +     */
> +    int (*write_filler)(AVCodecContext *avctx, uint32_t filler,
> +                        uint8_t *data, size_t *data_len);
> +
> +    /**
> +     * Callback for writing any extra units requested. data_len must be set
> +     * to the available size, and its value will be overwritten by the #bytes written
> +     * to the output buffer.
> +     */
> +    int (*write_extra_headers)(AVCodecContext *avctx, FFHWBaseEncodePicture *pic,
> +                               uint8_t *data, size_t *data_len);
> +} FFVulkanCodec;
> +
> +typedef struct FFVkEncodeCommonOptions {
> +    int qp;
> +    int quality;
> +    int profile;
> +    int level;
> +    int async_depth;
> +    VkVideoEncodeUsageFlagBitsKHR usage;
> +    VkVideoEncodeContentFlagBitsKHR content;
> +    VkVideoEncodeTuningModeKHR tune;
> +
> +    VkVideoEncodeRateControlModeFlagBitsKHR rc_mode;
> +#define FF_VK_RC_MODE_AUTO 0xFFFFFFFF
> +} FFVkEncodeCommonOptions;
> +
> +typedef struct FFVulkanEncodeContext {
> +    FFVulkanContext s;
> +    FFVkVideoCommon common;
> +    FFHWBaseEncodeContext base;
> +    const FFVulkanCodec *codec;
> +
> +    /* Session parameters object, initialized by each codec independently
> +     * and set here. */
> +    VkVideoSessionParametersKHR session_params;
> +
> +    AVBufferPool *buf_pool;
> +
> +    VkFormat pic_format;
> +
> +    FFVkEncodeCommonOptions opts;
> +
> +    VkVideoProfileInfoKHR profile;
> +    VkVideoProfileListInfoKHR profile_list;
> +    VkVideoCapabilitiesKHR caps;
> +    VkVideoEncodeQualityLevelPropertiesKHR quality_props;
> +    VkVideoEncodeCapabilitiesKHR enc_caps;
> +    VkVideoEncodeUsageInfoKHR usage_info;
> +
> +    FFVkQueueFamilyCtx qf_enc;
> +    FFVkExecPool enc_pool;
> +
> +    uint32_t slots[32];
> +} FFVulkanEncodeContext;
> +
> +#define VULKAN_ENCODE_COMMON_OPTIONS \
> +    { "qp", "Use an explicit constant quantizer for the whole stream", OFFSET(common.opts.qp), AV_OPT_TYPE_INT, { .i64 = -1 }, -1, 255, FLAGS }, \
> +    { "quality", "Set encode quality (trades off against speed, higher is faster)", OFFSET(common.opts.quality), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, INT_MAX, FLAGS }, \
> +    { "rc_mode", "Select rate control type", OFFSET(common.opts.rc_mode), AV_OPT_TYPE_INT, { .i64 = FF_VK_RC_MODE_AUTO }, 0, FF_VK_RC_MODE_AUTO, FLAGS, "rc_mode" }, \
> +        { "auto",    "Choose mode automatically based on parameters", 0, AV_OPT_TYPE_CONST, { .i64 = FF_VK_RC_MODE_AUTO }, INT_MIN, INT_MAX, FLAGS, "rc_mode" }, \
> +        { "driver",  "Driver-specific rate control", 0, AV_OPT_TYPE_CONST, { .i64 = VK_VIDEO_ENCODE_RATE_CONTROL_MODE_DEFAULT_KHR      }, INT_MIN, INT_MAX, FLAGS, "rc_mode" }, \
> +        { "cqp",     "Constant quantizer mode", 0, AV_OPT_TYPE_CONST, { .i64 = VK_VIDEO_ENCODE_RATE_CONTROL_MODE_DISABLED_BIT_KHR }, INT_MIN, INT_MAX, FLAGS, "rc_mode" }, \
> +        { "cbr",     "Constant bitrate mode", 0, AV_OPT_TYPE_CONST, { .i64 = VK_VIDEO_ENCODE_RATE_CONTROL_MODE_CBR_BIT_KHR      }, INT_MIN, INT_MAX, FLAGS, "rc_mode" }, \
> +        { "vbr",     "Variable bitrate mode", 0, AV_OPT_TYPE_CONST, { .i64 = VK_VIDEO_ENCODE_RATE_CONTROL_MODE_VBR_BIT_KHR      }, INT_MIN, INT_MAX, FLAGS, "rc_mode" }, \
> +    { "tune", "Select tuning type", OFFSET(common.opts.tune), AV_OPT_TYPE_INT, { .i64 = VK_VIDEO_ENCODE_TUNING_MODE_DEFAULT_KHR }, 0, INT_MAX, FLAGS, "tune" }, \
> +        { "default",  "Default tuning", 0, AV_OPT_TYPE_CONST, { .i64 = VK_VIDEO_ENCODE_TUNING_MODE_DEFAULT_KHR           }, INT_MIN, INT_MAX, FLAGS, "tune" }, \
> +        { "hq",       "High quality tuning", 0, AV_OPT_TYPE_CONST, { .i64 = VK_VIDEO_ENCODE_TUNING_MODE_HIGH_QUALITY_KHR      }, INT_MIN, INT_MAX, FLAGS, "tune" }, \
> +        { "ll",       "Low-latency tuning", 0, AV_OPT_TYPE_CONST, { .i64 = VK_VIDEO_ENCODE_TUNING_MODE_LOW_LATENCY_KHR       }, INT_MIN, INT_MAX, FLAGS, "tune" }, \
> +        { "ull",      "Ultra low-latency tuning", 0, AV_OPT_TYPE_CONST, { .i64 = VK_VIDEO_ENCODE_TUNING_MODE_ULTRA_LOW_LATENCY_KHR }, INT_MIN, INT_MAX, FLAGS, "tune" }, \
> +        { "lossless", "Lossless mode tuning", 0, AV_OPT_TYPE_CONST, { .i64 = VK_VIDEO_ENCODE_TUNING_MODE_LOSSLESS_KHR          }, INT_MIN, INT_MAX, FLAGS, "tune" }, \
> +    { "usage", "Select usage type", OFFSET(common.opts.usage), AV_OPT_TYPE_FLAGS, { .i64 = VK_VIDEO_ENCODE_USAGE_DEFAULT_KHR }, 0, INT_MAX, FLAGS, "usage" }, \
> +        { "default",    "Default optimizations", 0, AV_OPT_TYPE_CONST, { .i64 = VK_VIDEO_ENCODE_USAGE_DEFAULT_KHR          }, INT_MIN, INT_MAX, FLAGS, "usage" }, \
> +        { "transcode",  "Optimize for transcoding", 0, AV_OPT_TYPE_CONST, { .i64 = VK_VIDEO_ENCODE_USAGE_TRANSCODING_BIT_KHR  }, INT_MIN, INT_MAX, FLAGS, "usage" }, \
> +        { "stream",     "Optimize for streaming", 0, AV_OPT_TYPE_CONST, { .i64 = VK_VIDEO_ENCODE_USAGE_STREAMING_BIT_KHR    }, INT_MIN, INT_MAX, FLAGS, "usage" }, \
> +        { "record",     "Optimize for offline recording", 0, AV_OPT_TYPE_CONST, { .i64 = VK_VIDEO_ENCODE_USAGE_RECORDING_BIT_KHR    }, INT_MIN, INT_MAX, FLAGS, "usage" }, \
> +        { "conference", "Optimize for teleconferencing", 0, AV_OPT_TYPE_CONST, { .i64 = VK_VIDEO_ENCODE_USAGE_CONFERENCING_BIT_KHR }, INT_MIN, INT_MAX, FLAGS, "usage" }, \
> +    { "content", "Select content type", OFFSET(common.opts.content), AV_OPT_TYPE_FLAGS, { .i64 = VK_VIDEO_ENCODE_CONTENT_DEFAULT_KHR }, 0, INT_MAX, FLAGS, "content" }, \
> +        { "default",  "Default content", 0, AV_OPT_TYPE_CONST, { .i64 = VK_VIDEO_ENCODE_CONTENT_DEFAULT_KHR      }, INT_MIN, INT_MAX, FLAGS, "content" }, \
> +        { "camera",   "Camera footage", 0, AV_OPT_TYPE_CONST, { .i64 = VK_VIDEO_ENCODE_CONTENT_CAMERA_BIT_KHR   }, INT_MIN, INT_MAX, FLAGS, "content" }, \
> +        { "desktop",  "Screen recording", 0, AV_OPT_TYPE_CONST, { .i64 = VK_VIDEO_ENCODE_CONTENT_DESKTOP_BIT_KHR  }, INT_MIN, INT_MAX, FLAGS, "content" }, \
> +        { "rendered", "Game or 3D content", 0, AV_OPT_TYPE_CONST, { .i64 = VK_VIDEO_ENCODE_CONTENT_RENDERED_BIT_KHR }, INT_MIN, INT_MAX, FLAGS, "content" }
> +
> +/**
> + * Initialize encoder.
> + */
> +av_cold int ff_vulkan_encode_init(AVCodecContext *avctx, FFVulkanEncodeContext *ctx,
> +                                  const FFVulkanEncodeDescriptor *vk_desc,
> +                                  const FFVulkanCodec *codec,
> +                                  void *codec_caps, void *quality_pnext);
> +
> +/**
> + * Encode.
> + */
> +int ff_vulkan_encode_receive_packet(AVCodecContext *avctx, AVPacket *pkt);
> +
> +/**
> + * Uninitialize encoder.
> + */
> +void ff_vulkan_encode_uninit(FFVulkanEncodeContext *ctx);
> +
> +/**
> + * Paperwork.
> + */
> +extern const AVCodecHWConfigInternal *const ff_vulkan_encode_hw_configs[];
> +
> +#endif /* AVCODEC_VULKAN_ENCODE_H */
Lynne Sept. 11, 2024, 4:03 a.m. UTC | #2
On 10/09/2024 15:29, Benjamin Cheng wrote:
> On Mon Sep 9, 2024 at 6:37 AM EDT, Lynne via ffmpeg-devel wrote:
>> This commit adds the common Vulkan video encoding framework.
>> It makes full use of the asynchronous features of our new common
>> hardware encoding code, and of Vulkan.
>> The code is able to handle anything from H264 to AV1 and MJPEG.
>> ---
>>   configure                  |   2 +
>>   libavcodec/Makefile        |   2 +-
>>   libavcodec/vulkan_encode.c | 979 +++++++++++++++++++++++++++++++++++++
>>   libavcodec/vulkan_encode.h | 243 +++++++++
>>   4 files changed, 1225 insertions(+), 1 deletion(-)
>>   create mode 100644 libavcodec/vulkan_encode.c
>>   create mode 100644 libavcodec/vulkan_encode.h
>>
>> diff --git a/configure b/configure
>> index a8e67d230c..6cfb736a86 100755
>> --- a/configure
>> +++ b/configure
>> @@ -2638,6 +2638,7 @@ CONFIG_EXTRA="
>>       vp3dsp
>>       vp56dsp
>>       vp8dsp
>> +    vulkan_encode
>>       wma_freqs
>>       wmv2dsp
>>   "
>> @@ -3299,6 +3300,7 @@ qsvdec_select="qsv"
>>   qsvenc_select="qsv"
>>   qsvvpp_select="qsv"
>>   vaapi_encode_deps="vaapi"
>> +vulkan_encode_deps="vulkan"
>>   v4l2_m2m_deps="linux_videodev2_h sem_timedwait"
>>   
>>   bilateral_cuda_filter_deps="ffnvcodec"
>> diff --git a/libavcodec/Makefile b/libavcodec/Makefile
>> index 27ef4638ce..ff6a3c4efc 100644
>> --- a/libavcodec/Makefile
>> +++ b/libavcodec/Makefile
>> @@ -1282,7 +1282,7 @@ SKIPHEADERS-$(CONFIG_QSVENC)           += qsvenc.h
>>   SKIPHEADERS-$(CONFIG_VAAPI)            += vaapi_decode.h vaapi_hevc.h vaapi_encode.h
>>   SKIPHEADERS-$(CONFIG_VDPAU)            += vdpau.h vdpau_internal.h
>>   SKIPHEADERS-$(CONFIG_VIDEOTOOLBOX)     += videotoolbox.h vt_internal.h
>> -SKIPHEADERS-$(CONFIG_VULKAN)           += vulkan.h vulkan_video.h vulkan_decode.h
>> +SKIPHEADERS-$(CONFIG_VULKAN)           += vulkan.h vulkan_video.h vulkan_encode.h vulkan_decode.h
>>   SKIPHEADERS-$(CONFIG_V4L2_M2M)         += v4l2_buffers.h v4l2_context.h v4l2_m2m.h
>>   SKIPHEADERS-$(CONFIG_ZLIB)             += zlib_wrapper.h
>>   
>> diff --git a/libavcodec/vulkan_encode.c b/libavcodec/vulkan_encode.c
>> new file mode 100644
>> index 0000000000..5e87d4c073
>> --- /dev/null
>> +++ b/libavcodec/vulkan_encode.c
>> @@ -0,0 +1,979 @@
>> +/*
>> + * This file is part of FFmpeg.
>> + *
>> + * FFmpeg is free software; you can redistribute it and/or
>> + * modify it under the terms of the GNU Lesser General Public
>> + * License as published by the Free Software Foundation; either
>> + * version 2.1 of the License, or (at your option) any later version.
>> + *
>> + * FFmpeg is distributed in the hope that it will be useful,
>> + * but WITHOUT ANY WARRANTY; without even the implied warranty of
>> + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
>> + * Lesser General Public License for more details.
>> + *
>> + * You should have received a copy of the GNU Lesser General Public
>> + * License along with FFmpeg; if not, write to the Free Software
>> + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
>> + */
>> +
>> +#include "libavutil/mem.h"
>> +#include "libavutil/avassert.h"
>> +#include "vulkan_encode.h"
>> +#include "config.h"
>> +
>> +#include "libavutil/vulkan_loader.h"
>> +
>> +const AVCodecHWConfigInternal *const ff_vulkan_encode_hw_configs[] = {
>> +    HW_CONFIG_ENCODER_FRAMES(VULKAN, VULKAN),
>> +    NULL,
>> +};
>> +
>> +av_cold void ff_vulkan_encode_uninit(FFVulkanEncodeContext *ctx)
>> +{
>> +    FFVulkanContext *s = &ctx->s;
>> +    FFVulkanFunctions *vk = &s->vkfn;
>> +
>> +    /* Wait on and free execution pool */
>> +    ff_vk_exec_pool_free(s, &ctx->enc_pool);
>> +
>> +    /* Destroy the session params */
>> +    if (ctx->session_params)
>> +        vk->DestroyVideoSessionParametersKHR(s->hwctx->act_dev,
>> +                                             ctx->session_params,
>> +                                             s->hwctx->alloc);
>> +
>> +    ff_hw_base_encode_close(&ctx->base);
>> +
>> +    av_buffer_pool_uninit(&ctx->buf_pool);
>> +
>> +    ff_vk_video_common_uninit(s, &ctx->common);
>> +
>> +    ff_vk_uninit(s);
>> +}
>> +
>> +static int vulkan_encode_init(AVCodecContext *avctx, FFHWBaseEncodePicture *pic)
>> +{
>> +    int err;
>> +    FFVulkanEncodeContext *ctx = avctx->priv_data;
>> +    FFVulkanEncodePicture *vp = pic->priv;
>> +
>> +    AVFrame *f = pic->input_image;
>> +    AVHWFramesContext *hwfc = (AVHWFramesContext *)f->hw_frames_ctx->data;
>> +    AVVulkanFramesContext *vkfc = hwfc->hwctx;
>> +    AVVkFrame *vkf = (AVVkFrame *)f->data[0];
>> +
>> +    if (ctx->codec->picture_priv_data_size > 0) {
>> +        pic->codec_priv = av_mallocz(ctx->codec->picture_priv_data_size);
>> +        if (!pic->codec_priv)
>> +            return AVERROR(ENOMEM);
>> +    }
>> +
>> +    /* Input image view */
>> +    err = ff_vk_create_view(&ctx->s, &ctx->common,
>> +                            &vp->in.view, &vp->in.aspect,
>> +                            vkf, vkfc->format[0], 0);
>> +    if (err < 0)
>> +        return err;
>> +
>> +    /* Reference view */
>> +    if (!ctx->common.layered_dpb) {
>> +        AVFrame *rf = pic->recon_image;
>> +        AVVkFrame *rvkf = (AVVkFrame *)rf->data[0];
>> +        err = ff_vk_create_view(&ctx->s, &ctx->common,
>> +                                &vp->dpb.view, &vp->dpb.aspect,
>> +                                rvkf, ctx->pic_format, 1);
>> +        if (err < 0)
>> +            return err;
>> +    } else {
>> +        vp->dpb.view = ctx->common.layered_view;
>> +        vp->dpb.aspect = ctx->common.layered_aspect;
>> +    }
>> +
>> +    return 0;
>> +}
>> +
>> +static int vulkan_encode_free(AVCodecContext *avctx, FFHWBaseEncodePicture *pic)
>> +{
>> +    FFVulkanEncodeContext *ctx = avctx->priv_data;
>> +    FFVulkanFunctions *vk = &ctx->s.vkfn;
>> +
>> +    FFVulkanEncodePicture *vp = pic->priv;
>> +
>> +    if (vp->in.view)
>> +        vk->DestroyImageView(ctx->s.hwctx->act_dev, vp->in.view,
>> +                             ctx->s.hwctx->alloc);
>> +
>> +    if (!ctx->common.layered_dpb && vp->dpb.view)
>> +        vk->DestroyImageView(ctx->s.hwctx->act_dev, vp->dpb.view,
>> +                             ctx->s.hwctx->alloc);
>> +
>> +    ctx->slots[vp->dpb_slot.slotIndex] = 0;
>> +
>> +    return 0;
>> +}
>> +
>> +static int init_pic_rc(AVCodecContext *avctx, FFHWBaseEncodePicture *pic,
>> +                       VkVideoEncodeRateControlInfoKHR *rc_info,
>> +                       VkVideoEncodeRateControlLayerInfoKHR *rc_layer /* Goes in ^ */)
>> +{
>> +    FFVulkanEncodeContext *ctx = avctx->priv_data;
>> +
>> +    *rc_info = (VkVideoEncodeRateControlInfoKHR) {
>> +        .sType = VK_STRUCTURE_TYPE_VIDEO_ENCODE_RATE_CONTROL_INFO_KHR,
>> +        .rateControlMode = ctx->opts.rc_mode,
>> +    };
>> +
>> +    if (ctx->opts.rc_mode > VK_VIDEO_ENCODE_RATE_CONTROL_MODE_DISABLED_BIT_KHR) {
>> +        *rc_layer = (VkVideoEncodeRateControlLayerInfoKHR) {
>> +            .sType = VK_STRUCTURE_TYPE_VIDEO_ENCODE_RATE_CONTROL_LAYER_INFO_KHR,
>> +            .averageBitrate = avctx->bit_rate,
>> +            .maxBitrate = avctx->rc_max_rate ? avctx->rc_max_rate : avctx->bit_rate,
>> +            .frameRateNumerator = avctx->framerate.num,
>> +            .frameRateDenominator = avctx->framerate.den,
>> +        };
>> +        rc_info->layerCount++;
>> +        rc_info->pLayers = rc_layer;
>> +    }
>> +
>> +    return ctx->codec->init_pic_rc(avctx, pic, rc_info, rc_layer);
>> +}
>> +
>> +static int vulkan_encode_issue(AVCodecContext *avctx,
>> +                               FFHWBaseEncodePicture *base_pic)
>> +{
>> +    FFVulkanEncodeContext *ctx = avctx->priv_data;
>> +    FFVulkanFunctions *vk = &ctx->s.vkfn;
>> +
>> +    const size_t size_align = ctx->caps.minBitstreamBufferSizeAlignment;
>> +
>> +    FFVulkanEncodePicture *vp = base_pic->priv;
>> +    AVFrame *src = (AVFrame *)base_pic->input_image;
>> +    AVVkFrame *vkf = (AVVkFrame *)src->data[0];
>> +
>> +    int err, max_pkt_size;
>> +
>> +    FFVkBuffer *sd_buf;
>> +
>> +    int slot_index = -1;
>> +    FFVkExecContext *exec;
>> +    VkCommandBuffer cmd_buf;
>> +    VkImageMemoryBarrier2 img_bar[37];
>> +    int nb_img_bar = 0;
>> +
>> +    /* Coding start/end */
>> +    VkVideoBeginCodingInfoKHR encode_start;
>> +    VkVideoEndCodingInfoKHR encode_end = {
>> +        .sType = VK_STRUCTURE_TYPE_VIDEO_END_CODING_INFO_KHR,
>> +    };
>> +
>> +    VkVideoEncodeRateControlLayerInfoKHR rc_layer;
>> +    VkVideoEncodeRateControlInfoKHR rc_info;
>> +    VkVideoEncodeQualityLevelInfoKHR q_info;
>> +    VkVideoCodingControlInfoKHR encode_ctrl;
>> +
>> +    VkVideoReferenceSlotInfoKHR ref_slot[37];
>> +    VkVideoEncodeInfoKHR encode_info;
>> +
>> +    /* Create packet data buffer */
>> +    max_pkt_size = FFALIGN(3 * ctx->base.surface_width * ctx->base.surface_height + (1 << 16),
>> +                           ctx->caps.minBitstreamBufferSizeAlignment);
>> +
>> +    err = ff_vk_get_pooled_buffer(&ctx->s, &ctx->buf_pool, &vp->pkt_buf,
>> +                                  VK_BUFFER_USAGE_VIDEO_ENCODE_DST_BIT_KHR,
>> +                                  &ctx->profile_list, max_pkt_size,
>> +                                  VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT |
>> +                                  VK_MEMORY_PROPERTY_HOST_CACHED_BIT);
>> +    if (err < 0)
>> +        return err;
>> +
>> +    sd_buf = (FFVkBuffer *)vp->pkt_buf->data;
>> +
>> +    /* Setup rate control */
>> +    err = init_pic_rc(avctx, base_pic, &rc_info, &rc_layer);
>> +    if (err < 0)
>> +        return err;
>> +
>> +    q_info = (VkVideoEncodeQualityLevelInfoKHR) {
>> +        .sType = VK_STRUCTURE_TYPE_VIDEO_ENCODE_QUALITY_LEVEL_INFO_KHR,
>> +        .pNext = &rc_info,
>> +        .qualityLevel = ctx->opts.quality,
>> +    };
>> +    encode_ctrl = (VkVideoCodingControlInfoKHR) {
>> +        .sType = VK_STRUCTURE_TYPE_VIDEO_CODING_CONTROL_INFO_KHR,
>> +        .pNext = &q_info,
>> +        .flags = VK_VIDEO_CODING_CONTROL_ENCODE_QUALITY_LEVEL_BIT_KHR |
>> +                 VK_VIDEO_CODING_CONTROL_ENCODE_RATE_CONTROL_BIT_KHR |
>> +                 (base_pic->force_idr ? VK_VIDEO_CODING_CONTROL_RESET_BIT_KHR : 0),
>> +    };
>> +
>> +    /* Current picture's ref slot */
>> +    vp->dpb_res = (VkVideoPictureResourceInfoKHR) {
>> +        .sType = VK_STRUCTURE_TYPE_VIDEO_PICTURE_RESOURCE_INFO_KHR,
>> +        .pNext = NULL,
>> +        .codedOffset = { 0 },
>> +        .codedExtent = (VkExtent2D){ ctx->base.surface_width,
>> +                                     ctx->base.surface_height },
>> +        .baseArrayLayer = 0,
>> +        .imageViewBinding = vp->dpb.view,
>> +    };
>> +
>> +    for (int i = 0; i < ctx->caps.maxDpbSlots; i++) {
>> +        if (!ctx->slots[i]) {
>> +            slot_index = i;
>> +            ctx->slots[i] = 1;
>> +            break;
>> +        }
>> +    }
>> +    av_assert0(slot_index >= 0);
>> +
>> +    vp->dpb_slot = (VkVideoReferenceSlotInfoKHR) {
>> +        .sType = VK_STRUCTURE_TYPE_VIDEO_REFERENCE_SLOT_INFO_KHR,
>> +        .pNext = NULL,  // Set later
>> +        .slotIndex = slot_index,
>> +        .pPictureResource = &vp->dpb_res,
>> +    };
>> +
>> +    encode_info = (VkVideoEncodeInfoKHR) {
>> +        .sType = VK_STRUCTURE_TYPE_VIDEO_ENCODE_INFO_KHR,
>> +        .pNext = NULL, // Set later
>> +        .flags = 0x0,
>> +        .srcPictureResource = (VkVideoPictureResourceInfoKHR) {
>> +            .sType = VK_STRUCTURE_TYPE_VIDEO_PICTURE_RESOURCE_INFO_KHR,
>> +            .pNext = NULL,
>> +            .codedOffset = { 0, 0 },
>> +            .codedExtent = (VkExtent2D){ base_pic->input_image->width,
>> +                                         base_pic->input_image->height },
>> +            .baseArrayLayer = 0,
>> +            .imageViewBinding = vp->in.view,
>> +        },
>> +        .pSetupReferenceSlot = &vp->dpb_slot,
>> +        .referenceSlotCount = 0,
>> +        .pReferenceSlots = ref_slot,
>> +        .dstBuffer = sd_buf->buf,
>> +        .dstBufferOffset = 0,
>> +        .dstBufferRange = sd_buf->size,
>> +        .precedingExternallyEncodedBytes = 0,
>> +    };
>> +
>> +    for (int i = 0; i < MAX_REFERENCE_LIST_NUM; i++) {
>> +        for (int j = 0; j < base_pic->nb_refs[i]; j++) {
>> +            FFHWBaseEncodePicture *ref = base_pic->refs[i][j];
>> +            FFVulkanEncodePicture *rvp = ref->priv;
>> +            ref_slot[encode_info.referenceSlotCount++] = rvp->dpb_slot;
>> +        }
>> +    }
>> +
>> +    /* Setup picture parameters */
>> +    err = ctx->codec->init_pic_params(avctx, base_pic,
>> +                                      &encode_info);
>> +    if (err < 0)
>> +        return err;
>> +
>> +    encode_start = (VkVideoBeginCodingInfoKHR) {
>> +        .sType = VK_STRUCTURE_TYPE_VIDEO_BEGIN_CODING_INFO_KHR,
>> +        .pNext = !base_pic->force_idr ? &rc_info : NULL,
>> +        .videoSession = ctx->common.session,
>> +        .videoSessionParameters = ctx->session_params,
>> +        .referenceSlotCount = encode_info.referenceSlotCount,
>> +        .pReferenceSlots = ref_slot,
>> +    };
>> +
>> +    /* Calling vkCmdBeginVideoCodingKHR requires to declare all references
>> +     * being enabled upfront, including the current frame's output ref.
>> +     * If layered DBPs are used, make sure its not included twice. */
>> +    if (!ctx->common.layered_dpb || !encode_info.referenceSlotCount) {
>> +        ref_slot[encode_info.referenceSlotCount] = vp->dpb_slot;
>> +        ref_slot[encode_info.referenceSlotCount].slotIndex = -1;
>> +        encode_start.referenceSlotCount++;
>> +    }
>> +
>> +    /* Write header */
>> +    if (base_pic->type == FF_HW_PICTURE_TYPE_IDR) {
>> +        uint8_t *hdr_dst = sd_buf->mapped_mem + encode_info.dstBufferOffset;
>> +        size_t data_size = encode_info.dstBufferRange;
>> +        err = ctx->codec->write_sequence_headers(avctx, base_pic, hdr_dst, &data_size);
>> +        if (err < 0)
>> +            goto fail;
>> +        encode_info.dstBufferOffset += data_size;
>> +        encode_info.dstBufferRange  -= data_size;
>> +    }
>> +
>> +    /* Write extra units */
>> +    if (ctx->codec->write_extra_headers) {
>> +        uint8_t *hdr_dst = sd_buf->mapped_mem + encode_info.dstBufferOffset;
>> +        size_t data_size = encode_info.dstBufferRange;
>> +        err = ctx->codec->write_extra_headers(avctx, base_pic, hdr_dst, &data_size);
>> +        if (err < 0)
>> +            goto fail;
>> +        encode_info.dstBufferOffset += data_size;
>> +        encode_info.dstBufferRange  -= data_size;
>> +    }
>> +
>> +    /* Align buffer offset to the required value with filler units */
>> +    if (ctx->codec->write_filler) {
>> +        uint8_t *hdr_dst = sd_buf->mapped_mem + encode_info.dstBufferOffset;
>> +        size_t data_size = encode_info.dstBufferRange;
>> +
>> +        uint32_t offset = encode_info.dstBufferOffset;
>> +        size_t offset_align = ctx->caps.minBitstreamBufferOffsetAlignment;
>> +
>> +        uint32_t filler_data = FFALIGN(offset, offset_align) - offset;
>> +
>> +        if (filler_data) {
>> +            while (filler_data < ctx->codec->filler_header_size)
>> +                filler_data += offset_align;
>> +
>> +            filler_data -= ctx->codec->filler_header_size;
>> +
>> +            err = ctx->codec->write_filler(avctx, filler_data,
>> +                                           hdr_dst, &data_size);
>> +            if (err < 0)
>> +                goto fail;
>> +
>> +            encode_info.dstBufferOffset += data_size;
>> +            encode_info.dstBufferRange  -= data_size;
>> +        }
>> +    }
>> +
>> +    vp->slices_offset = encode_info.dstBufferOffset;
>> +
>> +    /* Align buffer size to the nearest lower alignment requirement. */
>> +    encode_info.dstBufferRange -= size_align;
>> +    encode_info.dstBufferRange = FFALIGN(encode_info.dstBufferRange,
>> +                                         size_align);
>> +
>> +    /* Start command buffer recording */
>> +    exec = vp->exec = ff_vk_exec_get(&ctx->enc_pool);
>> +    ff_vk_exec_start(&ctx->s, exec);
>> +    cmd_buf = exec->buf;
>> +
>> +    /* Output packet buffer */
>> +    err = ff_vk_exec_add_dep_buf(&ctx->s, exec, &vp->pkt_buf, 1, 1);
>> +    if (err < 0)
>> +        goto fail;
>> +
>> +    /* Source image */
>> +    err = ff_vk_exec_add_dep_frame(&ctx->s, exec, src,
>> +                                   VK_PIPELINE_STAGE_2_ALL_COMMANDS_BIT,
>> +                                   VK_PIPELINE_STAGE_2_VIDEO_ENCODE_BIT_KHR);
>> +    if (err < 0)
>> +        goto fail;
>> +
>> +    /* Source image layout conversion */
>> +    img_bar[nb_img_bar] = (VkImageMemoryBarrier2) {
>> +        .sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER_2,
>> +        .pNext = NULL,
>> +        .srcStageMask = VK_PIPELINE_STAGE_2_ALL_COMMANDS_BIT,
>> +        .srcAccessMask = vkf->access[0],
>> +        .dstStageMask = VK_PIPELINE_STAGE_2_VIDEO_ENCODE_BIT_KHR,
>> +        .dstAccessMask = VK_ACCESS_2_VIDEO_ENCODE_READ_BIT_KHR,
>> +        .oldLayout = vkf->layout[0],
>> +        .newLayout = VK_IMAGE_LAYOUT_VIDEO_ENCODE_SRC_KHR,
>> +        .srcQueueFamilyIndex = vkf->queue_family[0],
>> +        .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
>> +        .image = vkf->img[0],
>> +        .subresourceRange = (VkImageSubresourceRange) {
>> +            .aspectMask = vp->in.aspect,
>> +            .layerCount = 1,
>> +            .levelCount = 1,
>> +        },
>> +    };
>> +    ff_vk_exec_update_frame(&ctx->s, exec, src,
>> +                            &img_bar[nb_img_bar], &nb_img_bar);
>> +
>> +    if (!ctx->common.layered_dpb) {
>> +        /* Source image's ref slot.
>> +         * No need to do a layout conversion, since the frames which are allocated
>> +         * with a DPB usage are automatically converted. */
>> +        err = ff_vk_exec_add_dep_frame(&ctx->s, exec, base_pic->recon_image,
>> +                                       VK_PIPELINE_STAGE_2_ALL_COMMANDS_BIT,
>> +                                       VK_PIPELINE_STAGE_2_VIDEO_ENCODE_BIT_KHR);
>> +        if (err < 0)
>> +            return err;
>> +
>> +        /* All references */
>> +        for (int i = 0; i < MAX_REFERENCE_LIST_NUM; i++) {
>> +            for (int j = 0; j < base_pic->nb_refs[i]; j++) {
>> +                FFHWBaseEncodePicture *ref = base_pic->refs[i][j];
>> +                err = ff_vk_exec_add_dep_frame(&ctx->s, exec, ref->recon_image,
>> +                                               VK_PIPELINE_STAGE_2_ALL_COMMANDS_BIT,
>> +                                               VK_PIPELINE_STAGE_2_VIDEO_ENCODE_BIT_KHR);
>> +                if (err < 0)
>> +                    return err;
>> +            }
>> +        }
>> +    } else {
>> +        err = ff_vk_exec_add_dep_frame(&ctx->s, exec, ctx->common.layered_frame,
>> +                                       VK_PIPELINE_STAGE_2_VIDEO_ENCODE_BIT_KHR,
>> +                                       VK_PIPELINE_STAGE_2_VIDEO_ENCODE_BIT_KHR);
>> +        if (err < 0)
>> +            return err;
>> +    }
>> +
>> +    /* Change image layout */
>> +    vk->CmdPipelineBarrier2(cmd_buf, &(VkDependencyInfo) {
>> +            .sType = VK_STRUCTURE_TYPE_DEPENDENCY_INFO,
>> +            .pImageMemoryBarriers = img_bar,
>> +            .imageMemoryBarrierCount = nb_img_bar,
>> +        });
>> +
>> +    /* Start, use parameters */
>> +    vk->CmdBeginVideoCodingKHR(cmd_buf, &encode_start);
>> +
>> +    /* Send control data */
>> +    if (base_pic->force_idr)
>> +        vk->CmdControlVideoCodingKHR(cmd_buf, &encode_ctrl);
>> +
>> +    /* Encode */
>> +    vk->CmdBeginQuery(cmd_buf, ctx->enc_pool.query_pool, exec->query_idx + 0, 0);
>> +    vk->CmdEncodeVideoKHR(cmd_buf, &encode_info);
>> +    vk->CmdEndQuery(cmd_buf, ctx->enc_pool.query_pool, exec->query_idx + 0);
>> +
>> +    /* End encoding */
>> +    vk->CmdEndVideoCodingKHR(cmd_buf, &encode_end);
>> +
>> +    /* End recording and submit for execution */
>> +    err = ff_vk_exec_submit(&ctx->s, vp->exec);
>> +    if (err < 0)
>> +        goto fail;
>> +
>> +    /* We don't need to keep the input image any longer, its already ref'd */
>> +    av_frame_free(&base_pic->input_image);
>> +
>> +    return 0;
>> +
>> +fail:
>> +    return err;
>> +}
>> +
>> +static void vulkan_encode_wait(AVCodecContext *avctx,
>> +                               FFHWBaseEncodePicture *base_pic)
>> +{
>> +    FFVulkanEncodeContext *ctx = avctx->priv_data;
>> +    FFVulkanEncodePicture *vp = base_pic->priv;
>> +
>> +    av_assert0(base_pic->encode_issued);
>> +
>> +    if (base_pic->encode_complete)
>> +        return;
>> +
>> +    ff_vk_exec_wait(&ctx->s, vp->exec);
>> +    base_pic->encode_complete = 1;
>> +}
>> +
>> +static int vulkan_encode_output(AVCodecContext *avctx,
>> +                                FFHWBaseEncodePicture *base_pic, AVPacket *pkt)
>> +{
>> +    VkResult ret;
>> +    FFVulkanEncodePicture *vp = base_pic->priv;
>> +    FFVulkanEncodeContext *ctx = avctx->priv_data;
>> +    FFVkBuffer *sd_buf = (FFVkBuffer *)vp->pkt_buf->data;
>> +    uint32_t *query_data;
>> +
>> +    vulkan_encode_wait(avctx, base_pic);
>> +
>> +    ret = ff_vk_exec_get_query(&ctx->s, vp->exec, (void **)&query_data, 0);
>> +    if (ret == VK_NOT_READY) {
>> +        av_log(avctx, AV_LOG_ERROR, "Unable to perform query: %s!\n",
>> +               ff_vk_ret2str(ret));
>> +        return AVERROR(EINVAL);
>> +    }
>> +
>> +    if (ret != VK_NOT_READY && ret != VK_SUCCESS) {
>> +        av_log(avctx, AV_LOG_ERROR, "Unable to perform query: %s!\n",
>> +               ff_vk_ret2str(ret));
>> +        return AVERROR_EXTERNAL;
>> +    }
>>
>> +    if (query_data[2] != VK_QUERY_RESULT_STATUS_COMPLETE_KHR) {
>> +        av_log(avctx, AV_LOG_ERROR, "Unable to encode: %u\n", query_data[2]);
>> +        return AVERROR_EXTERNAL;
>> +    }
> 
> The query pool is created with ctx->enc_caps.supportedEncodeFeedbackFlags. If
> the implementation reports all feedback flags defined by the spec,
> query_data[2] would refer to the HAS_OVERRIDES field of the feedback. In
> this case, the status would live in query_data[3].


v2 sent to the ML which fixes both issues you mentioned, plus other bugs 
I and others encountered.

The spec text says:
 > When retrieving the results of video encode feedback queries, the
 > values corresponding to each enabled video encode feedback are written
 > in the order of the bits defined above

The order they're given in is:
VK_VIDEO_ENCODE_FEEDBACK_BITSTREAM_BUFFER_OFFSET_BIT_KHR = 0x00000001,
VK_VIDEO_ENCODE_FEEDBACK_BITSTREAM_BYTES_WRITTEN_BIT_KHR = 0x00000002,
VK_VIDEO_ENCODE_FEEDBACK_BITSTREAM_HAS_OVERRIDES_BIT_KHR = 0x00000004,

So I think the code is correct on this.

Thanks for reviewing/testing
Benjamin Cheng Sept. 12, 2024, 1:20 p.m. UTC | #3
On Wed Sep 11, 2024 at 12:03 AM EDT, Lynne wrote:
> On 10/09/2024 15:29, Benjamin Cheng wrote:
> > On Mon Sep 9, 2024 at 6:37 AM EDT, Lynne via ffmpeg-devel wrote:
> >> This commit adds the common Vulkan video encoding framework.
> >> It makes full use of the asynchronous features of our new common
> >> hardware encoding code, and of Vulkan.
> >> The code is able to handle anything from H264 to AV1 and MJPEG.
> >> ---
> >>   configure                  |   2 +
> >>   libavcodec/Makefile        |   2 +-
> >>   libavcodec/vulkan_encode.c | 979 +++++++++++++++++++++++++++++++++++++
> >>   libavcodec/vulkan_encode.h | 243 +++++++++
> >>   4 files changed, 1225 insertions(+), 1 deletion(-)
> >>   create mode 100644 libavcodec/vulkan_encode.c
> >>   create mode 100644 libavcodec/vulkan_encode.h
> >>
> >> diff --git a/configure b/configure
> >> index a8e67d230c..6cfb736a86 100755
> >> --- a/configure
> >> +++ b/configure
> >> @@ -2638,6 +2638,7 @@ CONFIG_EXTRA="
> >>       vp3dsp
> >>       vp56dsp
> >>       vp8dsp
> >> +    vulkan_encode
> >>       wma_freqs
> >>       wmv2dsp
> >>   "
> >> @@ -3299,6 +3300,7 @@ qsvdec_select="qsv"
> >>   qsvenc_select="qsv"
> >>   qsvvpp_select="qsv"
> >>   vaapi_encode_deps="vaapi"
> >> +vulkan_encode_deps="vulkan"
> >>   v4l2_m2m_deps="linux_videodev2_h sem_timedwait"
> >>   
> >>   bilateral_cuda_filter_deps="ffnvcodec"
> >> diff --git a/libavcodec/Makefile b/libavcodec/Makefile
> >> index 27ef4638ce..ff6a3c4efc 100644
> >> --- a/libavcodec/Makefile
> >> +++ b/libavcodec/Makefile
> >> @@ -1282,7 +1282,7 @@ SKIPHEADERS-$(CONFIG_QSVENC)           += qsvenc.h
> >>   SKIPHEADERS-$(CONFIG_VAAPI)            += vaapi_decode.h vaapi_hevc.h vaapi_encode.h
> >>   SKIPHEADERS-$(CONFIG_VDPAU)            += vdpau.h vdpau_internal.h
> >>   SKIPHEADERS-$(CONFIG_VIDEOTOOLBOX)     += videotoolbox.h vt_internal.h
> >> -SKIPHEADERS-$(CONFIG_VULKAN)           += vulkan.h vulkan_video.h vulkan_decode.h
> >> +SKIPHEADERS-$(CONFIG_VULKAN)           += vulkan.h vulkan_video.h vulkan_encode.h vulkan_decode.h
> >>   SKIPHEADERS-$(CONFIG_V4L2_M2M)         += v4l2_buffers.h v4l2_context.h v4l2_m2m.h
> >>   SKIPHEADERS-$(CONFIG_ZLIB)             += zlib_wrapper.h
> >>   
> >> diff --git a/libavcodec/vulkan_encode.c b/libavcodec/vulkan_encode.c
> >> new file mode 100644
> >> index 0000000000..5e87d4c073
> >> --- /dev/null
> >> +++ b/libavcodec/vulkan_encode.c
> >> @@ -0,0 +1,979 @@
> >> +/*
> >> + * This file is part of FFmpeg.
> >> + *
> >> + * FFmpeg is free software; you can redistribute it and/or
> >> + * modify it under the terms of the GNU Lesser General Public
> >> + * License as published by the Free Software Foundation; either
> >> + * version 2.1 of the License, or (at your option) any later version.
> >> + *
> >> + * FFmpeg is distributed in the hope that it will be useful,
> >> + * but WITHOUT ANY WARRANTY; without even the implied warranty of
> >> + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
> >> + * Lesser General Public License for more details.
> >> + *
> >> + * You should have received a copy of the GNU Lesser General Public
> >> + * License along with FFmpeg; if not, write to the Free Software
> >> + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
> >> + */
> >> +
> >> +#include "libavutil/mem.h"
> >> +#include "libavutil/avassert.h"
> >> +#include "vulkan_encode.h"
> >> +#include "config.h"
> >> +
> >> +#include "libavutil/vulkan_loader.h"
> >> +
> >> +const AVCodecHWConfigInternal *const ff_vulkan_encode_hw_configs[] = {
> >> +    HW_CONFIG_ENCODER_FRAMES(VULKAN, VULKAN),
> >> +    NULL,
> >> +};
> >> +
> >> +av_cold void ff_vulkan_encode_uninit(FFVulkanEncodeContext *ctx)
> >> +{
> >> +    FFVulkanContext *s = &ctx->s;
> >> +    FFVulkanFunctions *vk = &s->vkfn;
> >> +
> >> +    /* Wait on and free execution pool */
> >> +    ff_vk_exec_pool_free(s, &ctx->enc_pool);
> >> +
> >> +    /* Destroy the session params */
> >> +    if (ctx->session_params)
> >> +        vk->DestroyVideoSessionParametersKHR(s->hwctx->act_dev,
> >> +                                             ctx->session_params,
> >> +                                             s->hwctx->alloc);
> >> +
> >> +    ff_hw_base_encode_close(&ctx->base);
> >> +
> >> +    av_buffer_pool_uninit(&ctx->buf_pool);
> >> +
> >> +    ff_vk_video_common_uninit(s, &ctx->common);
> >> +
> >> +    ff_vk_uninit(s);
> >> +}
> >> +
> >> +static int vulkan_encode_init(AVCodecContext *avctx, FFHWBaseEncodePicture *pic)
> >> +{
> >> +    int err;
> >> +    FFVulkanEncodeContext *ctx = avctx->priv_data;
> >> +    FFVulkanEncodePicture *vp = pic->priv;
> >> +
> >> +    AVFrame *f = pic->input_image;
> >> +    AVHWFramesContext *hwfc = (AVHWFramesContext *)f->hw_frames_ctx->data;
> >> +    AVVulkanFramesContext *vkfc = hwfc->hwctx;
> >> +    AVVkFrame *vkf = (AVVkFrame *)f->data[0];
> >> +
> >> +    if (ctx->codec->picture_priv_data_size > 0) {
> >> +        pic->codec_priv = av_mallocz(ctx->codec->picture_priv_data_size);
> >> +        if (!pic->codec_priv)
> >> +            return AVERROR(ENOMEM);
> >> +    }
> >> +
> >> +    /* Input image view */
> >> +    err = ff_vk_create_view(&ctx->s, &ctx->common,
> >> +                            &vp->in.view, &vp->in.aspect,
> >> +                            vkf, vkfc->format[0], 0);
> >> +    if (err < 0)
> >> +        return err;
> >> +
> >> +    /* Reference view */
> >> +    if (!ctx->common.layered_dpb) {
> >> +        AVFrame *rf = pic->recon_image;
> >> +        AVVkFrame *rvkf = (AVVkFrame *)rf->data[0];
> >> +        err = ff_vk_create_view(&ctx->s, &ctx->common,
> >> +                                &vp->dpb.view, &vp->dpb.aspect,
> >> +                                rvkf, ctx->pic_format, 1);
> >> +        if (err < 0)
> >> +            return err;
> >> +    } else {
> >> +        vp->dpb.view = ctx->common.layered_view;
> >> +        vp->dpb.aspect = ctx->common.layered_aspect;
> >> +    }
> >> +
> >> +    return 0;
> >> +}
> >> +
> >> +static int vulkan_encode_free(AVCodecContext *avctx, FFHWBaseEncodePicture *pic)
> >> +{
> >> +    FFVulkanEncodeContext *ctx = avctx->priv_data;
> >> +    FFVulkanFunctions *vk = &ctx->s.vkfn;
> >> +
> >> +    FFVulkanEncodePicture *vp = pic->priv;
> >> +
> >> +    if (vp->in.view)
> >> +        vk->DestroyImageView(ctx->s.hwctx->act_dev, vp->in.view,
> >> +                             ctx->s.hwctx->alloc);
> >> +
> >> +    if (!ctx->common.layered_dpb && vp->dpb.view)
> >> +        vk->DestroyImageView(ctx->s.hwctx->act_dev, vp->dpb.view,
> >> +                             ctx->s.hwctx->alloc);
> >> +
> >> +    ctx->slots[vp->dpb_slot.slotIndex] = 0;
> >> +
> >> +    return 0;
> >> +}
> >> +
> >> +static int init_pic_rc(AVCodecContext *avctx, FFHWBaseEncodePicture *pic,
> >> +                       VkVideoEncodeRateControlInfoKHR *rc_info,
> >> +                       VkVideoEncodeRateControlLayerInfoKHR *rc_layer /* Goes in ^ */)
> >> +{
> >> +    FFVulkanEncodeContext *ctx = avctx->priv_data;
> >> +
> >> +    *rc_info = (VkVideoEncodeRateControlInfoKHR) {
> >> +        .sType = VK_STRUCTURE_TYPE_VIDEO_ENCODE_RATE_CONTROL_INFO_KHR,
> >> +        .rateControlMode = ctx->opts.rc_mode,
> >> +    };
> >> +
> >> +    if (ctx->opts.rc_mode > VK_VIDEO_ENCODE_RATE_CONTROL_MODE_DISABLED_BIT_KHR) {
> >> +        *rc_layer = (VkVideoEncodeRateControlLayerInfoKHR) {
> >> +            .sType = VK_STRUCTURE_TYPE_VIDEO_ENCODE_RATE_CONTROL_LAYER_INFO_KHR,
> >> +            .averageBitrate = avctx->bit_rate,
> >> +            .maxBitrate = avctx->rc_max_rate ? avctx->rc_max_rate : avctx->bit_rate,
> >> +            .frameRateNumerator = avctx->framerate.num,
> >> +            .frameRateDenominator = avctx->framerate.den,
> >> +        };
> >> +        rc_info->layerCount++;
> >> +        rc_info->pLayers = rc_layer;
> >> +    }
> >> +
> >> +    return ctx->codec->init_pic_rc(avctx, pic, rc_info, rc_layer);
> >> +}
> >> +
> >> +static int vulkan_encode_issue(AVCodecContext *avctx,
> >> +                               FFHWBaseEncodePicture *base_pic)
> >> +{
> >> +    FFVulkanEncodeContext *ctx = avctx->priv_data;
> >> +    FFVulkanFunctions *vk = &ctx->s.vkfn;
> >> +
> >> +    const size_t size_align = ctx->caps.minBitstreamBufferSizeAlignment;
> >> +
> >> +    FFVulkanEncodePicture *vp = base_pic->priv;
> >> +    AVFrame *src = (AVFrame *)base_pic->input_image;
> >> +    AVVkFrame *vkf = (AVVkFrame *)src->data[0];
> >> +
> >> +    int err, max_pkt_size;
> >> +
> >> +    FFVkBuffer *sd_buf;
> >> +
> >> +    int slot_index = -1;
> >> +    FFVkExecContext *exec;
> >> +    VkCommandBuffer cmd_buf;
> >> +    VkImageMemoryBarrier2 img_bar[37];
> >> +    int nb_img_bar = 0;
> >> +
> >> +    /* Coding start/end */
> >> +    VkVideoBeginCodingInfoKHR encode_start;
> >> +    VkVideoEndCodingInfoKHR encode_end = {
> >> +        .sType = VK_STRUCTURE_TYPE_VIDEO_END_CODING_INFO_KHR,
> >> +    };
> >> +
> >> +    VkVideoEncodeRateControlLayerInfoKHR rc_layer;
> >> +    VkVideoEncodeRateControlInfoKHR rc_info;
> >> +    VkVideoEncodeQualityLevelInfoKHR q_info;
> >> +    VkVideoCodingControlInfoKHR encode_ctrl;
> >> +
> >> +    VkVideoReferenceSlotInfoKHR ref_slot[37];
> >> +    VkVideoEncodeInfoKHR encode_info;
> >> +
> >> +    /* Create packet data buffer */
> >> +    max_pkt_size = FFALIGN(3 * ctx->base.surface_width * ctx->base.surface_height + (1 << 16),
> >> +                           ctx->caps.minBitstreamBufferSizeAlignment);
> >> +
> >> +    err = ff_vk_get_pooled_buffer(&ctx->s, &ctx->buf_pool, &vp->pkt_buf,
> >> +                                  VK_BUFFER_USAGE_VIDEO_ENCODE_DST_BIT_KHR,
> >> +                                  &ctx->profile_list, max_pkt_size,
> >> +                                  VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT |
> >> +                                  VK_MEMORY_PROPERTY_HOST_CACHED_BIT);
> >> +    if (err < 0)
> >> +        return err;
> >> +
> >> +    sd_buf = (FFVkBuffer *)vp->pkt_buf->data;
> >> +
> >> +    /* Setup rate control */
> >> +    err = init_pic_rc(avctx, base_pic, &rc_info, &rc_layer);
> >> +    if (err < 0)
> >> +        return err;
> >> +
> >> +    q_info = (VkVideoEncodeQualityLevelInfoKHR) {
> >> +        .sType = VK_STRUCTURE_TYPE_VIDEO_ENCODE_QUALITY_LEVEL_INFO_KHR,
> >> +        .pNext = &rc_info,
> >> +        .qualityLevel = ctx->opts.quality,
> >> +    };
> >> +    encode_ctrl = (VkVideoCodingControlInfoKHR) {
> >> +        .sType = VK_STRUCTURE_TYPE_VIDEO_CODING_CONTROL_INFO_KHR,
> >> +        .pNext = &q_info,
> >> +        .flags = VK_VIDEO_CODING_CONTROL_ENCODE_QUALITY_LEVEL_BIT_KHR |
> >> +                 VK_VIDEO_CODING_CONTROL_ENCODE_RATE_CONTROL_BIT_KHR |
> >> +                 (base_pic->force_idr ? VK_VIDEO_CODING_CONTROL_RESET_BIT_KHR : 0),
> >> +    };
> >> +
> >> +    /* Current picture's ref slot */
> >> +    vp->dpb_res = (VkVideoPictureResourceInfoKHR) {
> >> +        .sType = VK_STRUCTURE_TYPE_VIDEO_PICTURE_RESOURCE_INFO_KHR,
> >> +        .pNext = NULL,
> >> +        .codedOffset = { 0 },
> >> +        .codedExtent = (VkExtent2D){ ctx->base.surface_width,
> >> +                                     ctx->base.surface_height },
> >> +        .baseArrayLayer = 0,
> >> +        .imageViewBinding = vp->dpb.view,
> >> +    };
> >> +
> >> +    for (int i = 0; i < ctx->caps.maxDpbSlots; i++) {
> >> +        if (!ctx->slots[i]) {
> >> +            slot_index = i;
> >> +            ctx->slots[i] = 1;
> >> +            break;
> >> +        }
> >> +    }
> >> +    av_assert0(slot_index >= 0);
> >> +
> >> +    vp->dpb_slot = (VkVideoReferenceSlotInfoKHR) {
> >> +        .sType = VK_STRUCTURE_TYPE_VIDEO_REFERENCE_SLOT_INFO_KHR,
> >> +        .pNext = NULL,  // Set later
> >> +        .slotIndex = slot_index,
> >> +        .pPictureResource = &vp->dpb_res,
> >> +    };
> >> +
> >> +    encode_info = (VkVideoEncodeInfoKHR) {
> >> +        .sType = VK_STRUCTURE_TYPE_VIDEO_ENCODE_INFO_KHR,
> >> +        .pNext = NULL, // Set later
> >> +        .flags = 0x0,
> >> +        .srcPictureResource = (VkVideoPictureResourceInfoKHR) {
> >> +            .sType = VK_STRUCTURE_TYPE_VIDEO_PICTURE_RESOURCE_INFO_KHR,
> >> +            .pNext = NULL,
> >> +            .codedOffset = { 0, 0 },
> >> +            .codedExtent = (VkExtent2D){ base_pic->input_image->width,
> >> +                                         base_pic->input_image->height },
> >> +            .baseArrayLayer = 0,
> >> +            .imageViewBinding = vp->in.view,
> >> +        },
> >> +        .pSetupReferenceSlot = &vp->dpb_slot,
> >> +        .referenceSlotCount = 0,
> >> +        .pReferenceSlots = ref_slot,
> >> +        .dstBuffer = sd_buf->buf,
> >> +        .dstBufferOffset = 0,
> >> +        .dstBufferRange = sd_buf->size,
> >> +        .precedingExternallyEncodedBytes = 0,
> >> +    };
> >> +
> >> +    for (int i = 0; i < MAX_REFERENCE_LIST_NUM; i++) {
> >> +        for (int j = 0; j < base_pic->nb_refs[i]; j++) {
> >> +            FFHWBaseEncodePicture *ref = base_pic->refs[i][j];
> >> +            FFVulkanEncodePicture *rvp = ref->priv;
> >> +            ref_slot[encode_info.referenceSlotCount++] = rvp->dpb_slot;
> >> +        }
> >> +    }
> >> +
> >> +    /* Setup picture parameters */
> >> +    err = ctx->codec->init_pic_params(avctx, base_pic,
> >> +                                      &encode_info);
> >> +    if (err < 0)
> >> +        return err;
> >> +
> >> +    encode_start = (VkVideoBeginCodingInfoKHR) {
> >> +        .sType = VK_STRUCTURE_TYPE_VIDEO_BEGIN_CODING_INFO_KHR,
> >> +        .pNext = !base_pic->force_idr ? &rc_info : NULL,
> >> +        .videoSession = ctx->common.session,
> >> +        .videoSessionParameters = ctx->session_params,
> >> +        .referenceSlotCount = encode_info.referenceSlotCount,
> >> +        .pReferenceSlots = ref_slot,
> >> +    };
> >> +
> >> +    /* Calling vkCmdBeginVideoCodingKHR requires to declare all references
> >> +     * being enabled upfront, including the current frame's output ref.
> >> +     * If layered DBPs are used, make sure its not included twice. */
> >> +    if (!ctx->common.layered_dpb || !encode_info.referenceSlotCount) {
> >> +        ref_slot[encode_info.referenceSlotCount] = vp->dpb_slot;
> >> +        ref_slot[encode_info.referenceSlotCount].slotIndex = -1;
> >> +        encode_start.referenceSlotCount++;
> >> +    }
> >> +
> >> +    /* Write header */
> >> +    if (base_pic->type == FF_HW_PICTURE_TYPE_IDR) {
> >> +        uint8_t *hdr_dst = sd_buf->mapped_mem + encode_info.dstBufferOffset;
> >> +        size_t data_size = encode_info.dstBufferRange;
> >> +        err = ctx->codec->write_sequence_headers(avctx, base_pic, hdr_dst, &data_size);
> >> +        if (err < 0)
> >> +            goto fail;
> >> +        encode_info.dstBufferOffset += data_size;
> >> +        encode_info.dstBufferRange  -= data_size;
> >> +    }
> >> +
> >> +    /* Write extra units */
> >> +    if (ctx->codec->write_extra_headers) {
> >> +        uint8_t *hdr_dst = sd_buf->mapped_mem + encode_info.dstBufferOffset;
> >> +        size_t data_size = encode_info.dstBufferRange;
> >> +        err = ctx->codec->write_extra_headers(avctx, base_pic, hdr_dst, &data_size);
> >> +        if (err < 0)
> >> +            goto fail;
> >> +        encode_info.dstBufferOffset += data_size;
> >> +        encode_info.dstBufferRange  -= data_size;
> >> +    }
> >> +
> >> +    /* Align buffer offset to the required value with filler units */
> >> +    if (ctx->codec->write_filler) {
> >> +        uint8_t *hdr_dst = sd_buf->mapped_mem + encode_info.dstBufferOffset;
> >> +        size_t data_size = encode_info.dstBufferRange;
> >> +
> >> +        uint32_t offset = encode_info.dstBufferOffset;
> >> +        size_t offset_align = ctx->caps.minBitstreamBufferOffsetAlignment;
> >> +
> >> +        uint32_t filler_data = FFALIGN(offset, offset_align) - offset;
> >> +
> >> +        if (filler_data) {
> >> +            while (filler_data < ctx->codec->filler_header_size)
> >> +                filler_data += offset_align;
> >> +
> >> +            filler_data -= ctx->codec->filler_header_size;
> >> +
> >> +            err = ctx->codec->write_filler(avctx, filler_data,
> >> +                                           hdr_dst, &data_size);
> >> +            if (err < 0)
> >> +                goto fail;
> >> +
> >> +            encode_info.dstBufferOffset += data_size;
> >> +            encode_info.dstBufferRange  -= data_size;
> >> +        }
> >> +    }
> >> +
> >> +    vp->slices_offset = encode_info.dstBufferOffset;
> >> +
> >> +    /* Align buffer size to the nearest lower alignment requirement. */
> >> +    encode_info.dstBufferRange -= size_align;
> >> +    encode_info.dstBufferRange = FFALIGN(encode_info.dstBufferRange,
> >> +                                         size_align);
> >> +
> >> +    /* Start command buffer recording */
> >> +    exec = vp->exec = ff_vk_exec_get(&ctx->enc_pool);
> >> +    ff_vk_exec_start(&ctx->s, exec);
> >> +    cmd_buf = exec->buf;
> >> +
> >> +    /* Output packet buffer */
> >> +    err = ff_vk_exec_add_dep_buf(&ctx->s, exec, &vp->pkt_buf, 1, 1);
> >> +    if (err < 0)
> >> +        goto fail;
> >> +
> >> +    /* Source image */
> >> +    err = ff_vk_exec_add_dep_frame(&ctx->s, exec, src,
> >> +                                   VK_PIPELINE_STAGE_2_ALL_COMMANDS_BIT,
> >> +                                   VK_PIPELINE_STAGE_2_VIDEO_ENCODE_BIT_KHR);
> >> +    if (err < 0)
> >> +        goto fail;
> >> +
> >> +    /* Source image layout conversion */
> >> +    img_bar[nb_img_bar] = (VkImageMemoryBarrier2) {
> >> +        .sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER_2,
> >> +        .pNext = NULL,
> >> +        .srcStageMask = VK_PIPELINE_STAGE_2_ALL_COMMANDS_BIT,
> >> +        .srcAccessMask = vkf->access[0],
> >> +        .dstStageMask = VK_PIPELINE_STAGE_2_VIDEO_ENCODE_BIT_KHR,
> >> +        .dstAccessMask = VK_ACCESS_2_VIDEO_ENCODE_READ_BIT_KHR,
> >> +        .oldLayout = vkf->layout[0],
> >> +        .newLayout = VK_IMAGE_LAYOUT_VIDEO_ENCODE_SRC_KHR,
> >> +        .srcQueueFamilyIndex = vkf->queue_family[0],
> >> +        .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
> >> +        .image = vkf->img[0],
> >> +        .subresourceRange = (VkImageSubresourceRange) {
> >> +            .aspectMask = vp->in.aspect,
> >> +            .layerCount = 1,
> >> +            .levelCount = 1,
> >> +        },
> >> +    };
> >> +    ff_vk_exec_update_frame(&ctx->s, exec, src,
> >> +                            &img_bar[nb_img_bar], &nb_img_bar);
> >> +
> >> +    if (!ctx->common.layered_dpb) {
> >> +        /* Source image's ref slot.
> >> +         * No need to do a layout conversion, since the frames which are allocated
> >> +         * with a DPB usage are automatically converted. */
> >> +        err = ff_vk_exec_add_dep_frame(&ctx->s, exec, base_pic->recon_image,
> >> +                                       VK_PIPELINE_STAGE_2_ALL_COMMANDS_BIT,
> >> +                                       VK_PIPELINE_STAGE_2_VIDEO_ENCODE_BIT_KHR);
> >> +        if (err < 0)
> >> +            return err;
> >> +
> >> +        /* All references */
> >> +        for (int i = 0; i < MAX_REFERENCE_LIST_NUM; i++) {
> >> +            for (int j = 0; j < base_pic->nb_refs[i]; j++) {
> >> +                FFHWBaseEncodePicture *ref = base_pic->refs[i][j];
> >> +                err = ff_vk_exec_add_dep_frame(&ctx->s, exec, ref->recon_image,
> >> +                                               VK_PIPELINE_STAGE_2_ALL_COMMANDS_BIT,
> >> +                                               VK_PIPELINE_STAGE_2_VIDEO_ENCODE_BIT_KHR);
> >> +                if (err < 0)
> >> +                    return err;
> >> +            }
> >> +        }
> >> +    } else {
> >> +        err = ff_vk_exec_add_dep_frame(&ctx->s, exec, ctx->common.layered_frame,
> >> +                                       VK_PIPELINE_STAGE_2_VIDEO_ENCODE_BIT_KHR,
> >> +                                       VK_PIPELINE_STAGE_2_VIDEO_ENCODE_BIT_KHR);
> >> +        if (err < 0)
> >> +            return err;
> >> +    }
> >> +
> >> +    /* Change image layout */
> >> +    vk->CmdPipelineBarrier2(cmd_buf, &(VkDependencyInfo) {
> >> +            .sType = VK_STRUCTURE_TYPE_DEPENDENCY_INFO,
> >> +            .pImageMemoryBarriers = img_bar,
> >> +            .imageMemoryBarrierCount = nb_img_bar,
> >> +        });
> >> +
> >> +    /* Start, use parameters */
> >> +    vk->CmdBeginVideoCodingKHR(cmd_buf, &encode_start);
> >> +
> >> +    /* Send control data */
> >> +    if (base_pic->force_idr)
> >> +        vk->CmdControlVideoCodingKHR(cmd_buf, &encode_ctrl);
> >> +
> >> +    /* Encode */
> >> +    vk->CmdBeginQuery(cmd_buf, ctx->enc_pool.query_pool, exec->query_idx + 0, 0);
> >> +    vk->CmdEncodeVideoKHR(cmd_buf, &encode_info);
> >> +    vk->CmdEndQuery(cmd_buf, ctx->enc_pool.query_pool, exec->query_idx + 0);
> >> +
> >> +    /* End encoding */
> >> +    vk->CmdEndVideoCodingKHR(cmd_buf, &encode_end);
> >> +
> >> +    /* End recording and submit for execution */
> >> +    err = ff_vk_exec_submit(&ctx->s, vp->exec);
> >> +    if (err < 0)
> >> +        goto fail;
> >> +
> >> +    /* We don't need to keep the input image any longer, its already ref'd */
> >> +    av_frame_free(&base_pic->input_image);
> >> +
> >> +    return 0;
> >> +
> >> +fail:
> >> +    return err;
> >> +}
> >> +
> >> +static void vulkan_encode_wait(AVCodecContext *avctx,
> >> +                               FFHWBaseEncodePicture *base_pic)
> >> +{
> >> +    FFVulkanEncodeContext *ctx = avctx->priv_data;
> >> +    FFVulkanEncodePicture *vp = base_pic->priv;
> >> +
> >> +    av_assert0(base_pic->encode_issued);
> >> +
> >> +    if (base_pic->encode_complete)
> >> +        return;
> >> +
> >> +    ff_vk_exec_wait(&ctx->s, vp->exec);
> >> +    base_pic->encode_complete = 1;
> >> +}
> >> +
> >> +static int vulkan_encode_output(AVCodecContext *avctx,
> >> +                                FFHWBaseEncodePicture *base_pic, AVPacket *pkt)
> >> +{
> >> +    VkResult ret;
> >> +    FFVulkanEncodePicture *vp = base_pic->priv;
> >> +    FFVulkanEncodeContext *ctx = avctx->priv_data;
> >> +    FFVkBuffer *sd_buf = (FFVkBuffer *)vp->pkt_buf->data;
> >> +    uint32_t *query_data;
> >> +
> >> +    vulkan_encode_wait(avctx, base_pic);
> >> +
> >> +    ret = ff_vk_exec_get_query(&ctx->s, vp->exec, (void **)&query_data, 0);
> >> +    if (ret == VK_NOT_READY) {
> >> +        av_log(avctx, AV_LOG_ERROR, "Unable to perform query: %s!\n",
> >> +               ff_vk_ret2str(ret));
> >> +        return AVERROR(EINVAL);
> >> +    }
> >> +
> >> +    if (ret != VK_NOT_READY && ret != VK_SUCCESS) {
> >> +        av_log(avctx, AV_LOG_ERROR, "Unable to perform query: %s!\n",
> >> +               ff_vk_ret2str(ret));
> >> +        return AVERROR_EXTERNAL;
> >> +    }
> >>
> >> +    if (query_data[2] != VK_QUERY_RESULT_STATUS_COMPLETE_KHR) {
> >> +        av_log(avctx, AV_LOG_ERROR, "Unable to encode: %u\n", query_data[2]);
> >> +        return AVERROR_EXTERNAL;
> >> +    }
> > 
> > The query pool is created with ctx->enc_caps.supportedEncodeFeedbackFlags. If
> > the implementation reports all feedback flags defined by the spec,
> > query_data[2] would refer to the HAS_OVERRIDES field of the feedback. In
> > this case, the status would live in query_data[3].
>
>
> v2 sent to the ML which fixes both issues you mentioned, plus other bugs 
> I and others encountered.
>
> The spec text says:
>  > When retrieving the results of video encode feedback queries, the
>  > values corresponding to each enabled video encode feedback are written
>  > in the order of the bits defined above
>
> The order they're given in is:
> VK_VIDEO_ENCODE_FEEDBACK_BITSTREAM_BUFFER_OFFSET_BIT_KHR = 0x00000001,
> VK_VIDEO_ENCODE_FEEDBACK_BITSTREAM_BYTES_WRITTEN_BIT_KHR = 0x00000002,
> VK_VIDEO_ENCODE_FEEDBACK_BITSTREAM_HAS_OVERRIDES_BIT_KHR = 0x00000004,

Yes, so if all these feedback bits are reported as supported the query
feedback buffer would look like:
 
- query_data[0]: BUFFER_OFFSET
- query_data[1]: BYTES_WRITTEN
- query_data[2]: HAS_OVERRIDES

Then, the result status appears at query_data[3] (if
VK_QUERY_RESULT_WITH_STATUS_BIT_KHR is set).

>
> So I think the code is correct on this.
>
> Thanks for reviewing/testing
Lynne Sept. 15, 2024, 9:54 a.m. UTC | #4
On 12/09/2024 15:20, Benjamin Cheng wrote:
> On Wed Sep 11, 2024 at 12:03 AM EDT, Lynne wrote:
>> On 10/09/2024 15:29, Benjamin Cheng wrote:
>>> On Mon Sep 9, 2024 at 6:37 AM EDT, Lynne via ffmpeg-devel wrote:
>>>> This commit adds the common Vulkan video encoding framework.
>>>> It makes full use of the asynchronous features of our new common
>>>> hardware encoding code, and of Vulkan.
>>>> The code is able to handle anything from H264 to AV1 and MJPEG.
>>>> ---
>>>>    configure                  |   2 +
>>>>    libavcodec/Makefile        |   2 +-
>>>>    libavcodec/vulkan_encode.c | 979 +++++++++++++++++++++++++++++++++++++
>>>>    libavcodec/vulkan_encode.h | 243 +++++++++
>>>>    4 files changed, 1225 insertions(+), 1 deletion(-)
>>>>    create mode 100644 libavcodec/vulkan_encode.c
>>>>    create mode 100644 libavcodec/vulkan_encode.h
>>>>
>>>> diff --git a/configure b/configure
>>>> index a8e67d230c..6cfb736a86 100755
>>>> --- a/configure
>>>> +++ b/configure
>>>> @@ -2638,6 +2638,7 @@ CONFIG_EXTRA="
>>>>        vp3dsp
>>>>        vp56dsp
>>>>        vp8dsp
>>>> +    vulkan_encode
>>>>        wma_freqs
>>>>        wmv2dsp
>>>>    "
>>>> @@ -3299,6 +3300,7 @@ qsvdec_select="qsv"
>>>>    qsvenc_select="qsv"
>>>>    qsvvpp_select="qsv"
>>>>    vaapi_encode_deps="vaapi"
>>>> +vulkan_encode_deps="vulkan"
>>>>    v4l2_m2m_deps="linux_videodev2_h sem_timedwait"
>>>>    
>>>>    bilateral_cuda_filter_deps="ffnvcodec"
>>>> diff --git a/libavcodec/Makefile b/libavcodec/Makefile
>>>> index 27ef4638ce..ff6a3c4efc 100644
>>>> --- a/libavcodec/Makefile
>>>> +++ b/libavcodec/Makefile
>>>> @@ -1282,7 +1282,7 @@ SKIPHEADERS-$(CONFIG_QSVENC)           += qsvenc.h
>>>>    SKIPHEADERS-$(CONFIG_VAAPI)            += vaapi_decode.h vaapi_hevc.h vaapi_encode.h
>>>>    SKIPHEADERS-$(CONFIG_VDPAU)            += vdpau.h vdpau_internal.h
>>>>    SKIPHEADERS-$(CONFIG_VIDEOTOOLBOX)     += videotoolbox.h vt_internal.h
>>>> -SKIPHEADERS-$(CONFIG_VULKAN)           += vulkan.h vulkan_video.h vulkan_decode.h
>>>> +SKIPHEADERS-$(CONFIG_VULKAN)           += vulkan.h vulkan_video.h vulkan_encode.h vulkan_decode.h
>>>>    SKIPHEADERS-$(CONFIG_V4L2_M2M)         += v4l2_buffers.h v4l2_context.h v4l2_m2m.h
>>>>    SKIPHEADERS-$(CONFIG_ZLIB)             += zlib_wrapper.h
>>>>    
>>>> diff --git a/libavcodec/vulkan_encode.c b/libavcodec/vulkan_encode.c
>>>> new file mode 100644
>>>> index 0000000000..5e87d4c073
>>>> --- /dev/null
>>>> +++ b/libavcodec/vulkan_encode.c
>>>> @@ -0,0 +1,979 @@
>>>> +/*
>>>> + * This file is part of FFmpeg.
>>>> + *
>>>> + * FFmpeg is free software; you can redistribute it and/or
>>>> + * modify it under the terms of the GNU Lesser General Public
>>>> + * License as published by the Free Software Foundation; either
>>>> + * version 2.1 of the License, or (at your option) any later version.
>>>> + *
>>>> + * FFmpeg is distributed in the hope that it will be useful,
>>>> + * but WITHOUT ANY WARRANTY; without even the implied warranty of
>>>> + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
>>>> + * Lesser General Public License for more details.
>>>> + *
>>>> + * You should have received a copy of the GNU Lesser General Public
>>>> + * License along with FFmpeg; if not, write to the Free Software
>>>> + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
>>>> + */
>>>> +
>>>> +#include "libavutil/mem.h"
>>>> +#include "libavutil/avassert.h"
>>>> +#include "vulkan_encode.h"
>>>> +#include "config.h"
>>>> +
>>>> +#include "libavutil/vulkan_loader.h"
>>>> +
>>>> +const AVCodecHWConfigInternal *const ff_vulkan_encode_hw_configs[] = {
>>>> +    HW_CONFIG_ENCODER_FRAMES(VULKAN, VULKAN),
>>>> +    NULL,
>>>> +};
>>>> +
>>>> +av_cold void ff_vulkan_encode_uninit(FFVulkanEncodeContext *ctx)
>>>> +{
>>>> +    FFVulkanContext *s = &ctx->s;
>>>> +    FFVulkanFunctions *vk = &s->vkfn;
>>>> +
>>>> +    /* Wait on and free execution pool */
>>>> +    ff_vk_exec_pool_free(s, &ctx->enc_pool);
>>>> +
>>>> +    /* Destroy the session params */
>>>> +    if (ctx->session_params)
>>>> +        vk->DestroyVideoSessionParametersKHR(s->hwctx->act_dev,
>>>> +                                             ctx->session_params,
>>>> +                                             s->hwctx->alloc);
>>>> +
>>>> +    ff_hw_base_encode_close(&ctx->base);
>>>> +
>>>> +    av_buffer_pool_uninit(&ctx->buf_pool);
>>>> +
>>>> +    ff_vk_video_common_uninit(s, &ctx->common);
>>>> +
>>>> +    ff_vk_uninit(s);
>>>> +}
>>>> +
>>>> +static int vulkan_encode_init(AVCodecContext *avctx, FFHWBaseEncodePicture *pic)
>>>> +{
>>>> +    int err;
>>>> +    FFVulkanEncodeContext *ctx = avctx->priv_data;
>>>> +    FFVulkanEncodePicture *vp = pic->priv;
>>>> +
>>>> +    AVFrame *f = pic->input_image;
>>>> +    AVHWFramesContext *hwfc = (AVHWFramesContext *)f->hw_frames_ctx->data;
>>>> +    AVVulkanFramesContext *vkfc = hwfc->hwctx;
>>>> +    AVVkFrame *vkf = (AVVkFrame *)f->data[0];
>>>> +
>>>> +    if (ctx->codec->picture_priv_data_size > 0) {
>>>> +        pic->codec_priv = av_mallocz(ctx->codec->picture_priv_data_size);
>>>> +        if (!pic->codec_priv)
>>>> +            return AVERROR(ENOMEM);
>>>> +    }
>>>> +
>>>> +    /* Input image view */
>>>> +    err = ff_vk_create_view(&ctx->s, &ctx->common,
>>>> +                            &vp->in.view, &vp->in.aspect,
>>>> +                            vkf, vkfc->format[0], 0);
>>>> +    if (err < 0)
>>>> +        return err;
>>>> +
>>>> +    /* Reference view */
>>>> +    if (!ctx->common.layered_dpb) {
>>>> +        AVFrame *rf = pic->recon_image;
>>>> +        AVVkFrame *rvkf = (AVVkFrame *)rf->data[0];
>>>> +        err = ff_vk_create_view(&ctx->s, &ctx->common,
>>>> +                                &vp->dpb.view, &vp->dpb.aspect,
>>>> +                                rvkf, ctx->pic_format, 1);
>>>> +        if (err < 0)
>>>> +            return err;
>>>> +    } else {
>>>> +        vp->dpb.view = ctx->common.layered_view;
>>>> +        vp->dpb.aspect = ctx->common.layered_aspect;
>>>> +    }
>>>> +
>>>> +    return 0;
>>>> +}
>>>> +
>>>> +static int vulkan_encode_free(AVCodecContext *avctx, FFHWBaseEncodePicture *pic)
>>>> +{
>>>> +    FFVulkanEncodeContext *ctx = avctx->priv_data;
>>>> +    FFVulkanFunctions *vk = &ctx->s.vkfn;
>>>> +
>>>> +    FFVulkanEncodePicture *vp = pic->priv;
>>>> +
>>>> +    if (vp->in.view)
>>>> +        vk->DestroyImageView(ctx->s.hwctx->act_dev, vp->in.view,
>>>> +                             ctx->s.hwctx->alloc);
>>>> +
>>>> +    if (!ctx->common.layered_dpb && vp->dpb.view)
>>>> +        vk->DestroyImageView(ctx->s.hwctx->act_dev, vp->dpb.view,
>>>> +                             ctx->s.hwctx->alloc);
>>>> +
>>>> +    ctx->slots[vp->dpb_slot.slotIndex] = 0;
>>>> +
>>>> +    return 0;
>>>> +}
>>>> +
>>>> +static int init_pic_rc(AVCodecContext *avctx, FFHWBaseEncodePicture *pic,
>>>> +                       VkVideoEncodeRateControlInfoKHR *rc_info,
>>>> +                       VkVideoEncodeRateControlLayerInfoKHR *rc_layer /* Goes in ^ */)
>>>> +{
>>>> +    FFVulkanEncodeContext *ctx = avctx->priv_data;
>>>> +
>>>> +    *rc_info = (VkVideoEncodeRateControlInfoKHR) {
>>>> +        .sType = VK_STRUCTURE_TYPE_VIDEO_ENCODE_RATE_CONTROL_INFO_KHR,
>>>> +        .rateControlMode = ctx->opts.rc_mode,
>>>> +    };
>>>> +
>>>> +    if (ctx->opts.rc_mode > VK_VIDEO_ENCODE_RATE_CONTROL_MODE_DISABLED_BIT_KHR) {
>>>> +        *rc_layer = (VkVideoEncodeRateControlLayerInfoKHR) {
>>>> +            .sType = VK_STRUCTURE_TYPE_VIDEO_ENCODE_RATE_CONTROL_LAYER_INFO_KHR,
>>>> +            .averageBitrate = avctx->bit_rate,
>>>> +            .maxBitrate = avctx->rc_max_rate ? avctx->rc_max_rate : avctx->bit_rate,
>>>> +            .frameRateNumerator = avctx->framerate.num,
>>>> +            .frameRateDenominator = avctx->framerate.den,
>>>> +        };
>>>> +        rc_info->layerCount++;
>>>> +        rc_info->pLayers = rc_layer;
>>>> +    }
>>>> +
>>>> +    return ctx->codec->init_pic_rc(avctx, pic, rc_info, rc_layer);
>>>> +}
>>>> +
>>>> +static int vulkan_encode_issue(AVCodecContext *avctx,
>>>> +                               FFHWBaseEncodePicture *base_pic)
>>>> +{
>>>> +    FFVulkanEncodeContext *ctx = avctx->priv_data;
>>>> +    FFVulkanFunctions *vk = &ctx->s.vkfn;
>>>> +
>>>> +    const size_t size_align = ctx->caps.minBitstreamBufferSizeAlignment;
>>>> +
>>>> +    FFVulkanEncodePicture *vp = base_pic->priv;
>>>> +    AVFrame *src = (AVFrame *)base_pic->input_image;
>>>> +    AVVkFrame *vkf = (AVVkFrame *)src->data[0];
>>>> +
>>>> +    int err, max_pkt_size;
>>>> +
>>>> +    FFVkBuffer *sd_buf;
>>>> +
>>>> +    int slot_index = -1;
>>>> +    FFVkExecContext *exec;
>>>> +    VkCommandBuffer cmd_buf;
>>>> +    VkImageMemoryBarrier2 img_bar[37];
>>>> +    int nb_img_bar = 0;
>>>> +
>>>> +    /* Coding start/end */
>>>> +    VkVideoBeginCodingInfoKHR encode_start;
>>>> +    VkVideoEndCodingInfoKHR encode_end = {
>>>> +        .sType = VK_STRUCTURE_TYPE_VIDEO_END_CODING_INFO_KHR,
>>>> +    };
>>>> +
>>>> +    VkVideoEncodeRateControlLayerInfoKHR rc_layer;
>>>> +    VkVideoEncodeRateControlInfoKHR rc_info;
>>>> +    VkVideoEncodeQualityLevelInfoKHR q_info;
>>>> +    VkVideoCodingControlInfoKHR encode_ctrl;
>>>> +
>>>> +    VkVideoReferenceSlotInfoKHR ref_slot[37];
>>>> +    VkVideoEncodeInfoKHR encode_info;
>>>> +
>>>> +    /* Create packet data buffer */
>>>> +    max_pkt_size = FFALIGN(3 * ctx->base.surface_width * ctx->base.surface_height + (1 << 16),
>>>> +                           ctx->caps.minBitstreamBufferSizeAlignment);
>>>> +
>>>> +    err = ff_vk_get_pooled_buffer(&ctx->s, &ctx->buf_pool, &vp->pkt_buf,
>>>> +                                  VK_BUFFER_USAGE_VIDEO_ENCODE_DST_BIT_KHR,
>>>> +                                  &ctx->profile_list, max_pkt_size,
>>>> +                                  VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT |
>>>> +                                  VK_MEMORY_PROPERTY_HOST_CACHED_BIT);
>>>> +    if (err < 0)
>>>> +        return err;
>>>> +
>>>> +    sd_buf = (FFVkBuffer *)vp->pkt_buf->data;
>>>> +
>>>> +    /* Setup rate control */
>>>> +    err = init_pic_rc(avctx, base_pic, &rc_info, &rc_layer);
>>>> +    if (err < 0)
>>>> +        return err;
>>>> +
>>>> +    q_info = (VkVideoEncodeQualityLevelInfoKHR) {
>>>> +        .sType = VK_STRUCTURE_TYPE_VIDEO_ENCODE_QUALITY_LEVEL_INFO_KHR,
>>>> +        .pNext = &rc_info,
>>>> +        .qualityLevel = ctx->opts.quality,
>>>> +    };
>>>> +    encode_ctrl = (VkVideoCodingControlInfoKHR) {
>>>> +        .sType = VK_STRUCTURE_TYPE_VIDEO_CODING_CONTROL_INFO_KHR,
>>>> +        .pNext = &q_info,
>>>> +        .flags = VK_VIDEO_CODING_CONTROL_ENCODE_QUALITY_LEVEL_BIT_KHR |
>>>> +                 VK_VIDEO_CODING_CONTROL_ENCODE_RATE_CONTROL_BIT_KHR |
>>>> +                 (base_pic->force_idr ? VK_VIDEO_CODING_CONTROL_RESET_BIT_KHR : 0),
>>>> +    };
>>>> +
>>>> +    /* Current picture's ref slot */
>>>> +    vp->dpb_res = (VkVideoPictureResourceInfoKHR) {
>>>> +        .sType = VK_STRUCTURE_TYPE_VIDEO_PICTURE_RESOURCE_INFO_KHR,
>>>> +        .pNext = NULL,
>>>> +        .codedOffset = { 0 },
>>>> +        .codedExtent = (VkExtent2D){ ctx->base.surface_width,
>>>> +                                     ctx->base.surface_height },
>>>> +        .baseArrayLayer = 0,
>>>> +        .imageViewBinding = vp->dpb.view,
>>>> +    };
>>>> +
>>>> +    for (int i = 0; i < ctx->caps.maxDpbSlots; i++) {
>>>> +        if (!ctx->slots[i]) {
>>>> +            slot_index = i;
>>>> +            ctx->slots[i] = 1;
>>>> +            break;
>>>> +        }
>>>> +    }
>>>> +    av_assert0(slot_index >= 0);
>>>> +
>>>> +    vp->dpb_slot = (VkVideoReferenceSlotInfoKHR) {
>>>> +        .sType = VK_STRUCTURE_TYPE_VIDEO_REFERENCE_SLOT_INFO_KHR,
>>>> +        .pNext = NULL,  // Set later
>>>> +        .slotIndex = slot_index,
>>>> +        .pPictureResource = &vp->dpb_res,
>>>> +    };
>>>> +
>>>> +    encode_info = (VkVideoEncodeInfoKHR) {
>>>> +        .sType = VK_STRUCTURE_TYPE_VIDEO_ENCODE_INFO_KHR,
>>>> +        .pNext = NULL, // Set later
>>>> +        .flags = 0x0,
>>>> +        .srcPictureResource = (VkVideoPictureResourceInfoKHR) {
>>>> +            .sType = VK_STRUCTURE_TYPE_VIDEO_PICTURE_RESOURCE_INFO_KHR,
>>>> +            .pNext = NULL,
>>>> +            .codedOffset = { 0, 0 },
>>>> +            .codedExtent = (VkExtent2D){ base_pic->input_image->width,
>>>> +                                         base_pic->input_image->height },
>>>> +            .baseArrayLayer = 0,
>>>> +            .imageViewBinding = vp->in.view,
>>>> +        },
>>>> +        .pSetupReferenceSlot = &vp->dpb_slot,
>>>> +        .referenceSlotCount = 0,
>>>> +        .pReferenceSlots = ref_slot,
>>>> +        .dstBuffer = sd_buf->buf,
>>>> +        .dstBufferOffset = 0,
>>>> +        .dstBufferRange = sd_buf->size,
>>>> +        .precedingExternallyEncodedBytes = 0,
>>>> +    };
>>>> +
>>>> +    for (int i = 0; i < MAX_REFERENCE_LIST_NUM; i++) {
>>>> +        for (int j = 0; j < base_pic->nb_refs[i]; j++) {
>>>> +            FFHWBaseEncodePicture *ref = base_pic->refs[i][j];
>>>> +            FFVulkanEncodePicture *rvp = ref->priv;
>>>> +            ref_slot[encode_info.referenceSlotCount++] = rvp->dpb_slot;
>>>> +        }
>>>> +    }
>>>> +
>>>> +    /* Setup picture parameters */
>>>> +    err = ctx->codec->init_pic_params(avctx, base_pic,
>>>> +                                      &encode_info);
>>>> +    if (err < 0)
>>>> +        return err;
>>>> +
>>>> +    encode_start = (VkVideoBeginCodingInfoKHR) {
>>>> +        .sType = VK_STRUCTURE_TYPE_VIDEO_BEGIN_CODING_INFO_KHR,
>>>> +        .pNext = !base_pic->force_idr ? &rc_info : NULL,
>>>> +        .videoSession = ctx->common.session,
>>>> +        .videoSessionParameters = ctx->session_params,
>>>> +        .referenceSlotCount = encode_info.referenceSlotCount,
>>>> +        .pReferenceSlots = ref_slot,
>>>> +    };
>>>> +
>>>> +    /* Calling vkCmdBeginVideoCodingKHR requires to declare all references
>>>> +     * being enabled upfront, including the current frame's output ref.
>>>> +     * If layered DBPs are used, make sure its not included twice. */
>>>> +    if (!ctx->common.layered_dpb || !encode_info.referenceSlotCount) {
>>>> +        ref_slot[encode_info.referenceSlotCount] = vp->dpb_slot;
>>>> +        ref_slot[encode_info.referenceSlotCount].slotIndex = -1;
>>>> +        encode_start.referenceSlotCount++;
>>>> +    }
>>>> +
>>>> +    /* Write header */
>>>> +    if (base_pic->type == FF_HW_PICTURE_TYPE_IDR) {
>>>> +        uint8_t *hdr_dst = sd_buf->mapped_mem + encode_info.dstBufferOffset;
>>>> +        size_t data_size = encode_info.dstBufferRange;
>>>> +        err = ctx->codec->write_sequence_headers(avctx, base_pic, hdr_dst, &data_size);
>>>> +        if (err < 0)
>>>> +            goto fail;
>>>> +        encode_info.dstBufferOffset += data_size;
>>>> +        encode_info.dstBufferRange  -= data_size;
>>>> +    }
>>>> +
>>>> +    /* Write extra units */
>>>> +    if (ctx->codec->write_extra_headers) {
>>>> +        uint8_t *hdr_dst = sd_buf->mapped_mem + encode_info.dstBufferOffset;
>>>> +        size_t data_size = encode_info.dstBufferRange;
>>>> +        err = ctx->codec->write_extra_headers(avctx, base_pic, hdr_dst, &data_size);
>>>> +        if (err < 0)
>>>> +            goto fail;
>>>> +        encode_info.dstBufferOffset += data_size;
>>>> +        encode_info.dstBufferRange  -= data_size;
>>>> +    }
>>>> +
>>>> +    /* Align buffer offset to the required value with filler units */
>>>> +    if (ctx->codec->write_filler) {
>>>> +        uint8_t *hdr_dst = sd_buf->mapped_mem + encode_info.dstBufferOffset;
>>>> +        size_t data_size = encode_info.dstBufferRange;
>>>> +
>>>> +        uint32_t offset = encode_info.dstBufferOffset;
>>>> +        size_t offset_align = ctx->caps.minBitstreamBufferOffsetAlignment;
>>>> +
>>>> +        uint32_t filler_data = FFALIGN(offset, offset_align) - offset;
>>>> +
>>>> +        if (filler_data) {
>>>> +            while (filler_data < ctx->codec->filler_header_size)
>>>> +                filler_data += offset_align;
>>>> +
>>>> +            filler_data -= ctx->codec->filler_header_size;
>>>> +
>>>> +            err = ctx->codec->write_filler(avctx, filler_data,
>>>> +                                           hdr_dst, &data_size);
>>>> +            if (err < 0)
>>>> +                goto fail;
>>>> +
>>>> +            encode_info.dstBufferOffset += data_size;
>>>> +            encode_info.dstBufferRange  -= data_size;
>>>> +        }
>>>> +    }
>>>> +
>>>> +    vp->slices_offset = encode_info.dstBufferOffset;
>>>> +
>>>> +    /* Align buffer size to the nearest lower alignment requirement. */
>>>> +    encode_info.dstBufferRange -= size_align;
>>>> +    encode_info.dstBufferRange = FFALIGN(encode_info.dstBufferRange,
>>>> +                                         size_align);
>>>> +
>>>> +    /* Start command buffer recording */
>>>> +    exec = vp->exec = ff_vk_exec_get(&ctx->enc_pool);
>>>> +    ff_vk_exec_start(&ctx->s, exec);
>>>> +    cmd_buf = exec->buf;
>>>> +
>>>> +    /* Output packet buffer */
>>>> +    err = ff_vk_exec_add_dep_buf(&ctx->s, exec, &vp->pkt_buf, 1, 1);
>>>> +    if (err < 0)
>>>> +        goto fail;
>>>> +
>>>> +    /* Source image */
>>>> +    err = ff_vk_exec_add_dep_frame(&ctx->s, exec, src,
>>>> +                                   VK_PIPELINE_STAGE_2_ALL_COMMANDS_BIT,
>>>> +                                   VK_PIPELINE_STAGE_2_VIDEO_ENCODE_BIT_KHR);
>>>> +    if (err < 0)
>>>> +        goto fail;
>>>> +
>>>> +    /* Source image layout conversion */
>>>> +    img_bar[nb_img_bar] = (VkImageMemoryBarrier2) {
>>>> +        .sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER_2,
>>>> +        .pNext = NULL,
>>>> +        .srcStageMask = VK_PIPELINE_STAGE_2_ALL_COMMANDS_BIT,
>>>> +        .srcAccessMask = vkf->access[0],
>>>> +        .dstStageMask = VK_PIPELINE_STAGE_2_VIDEO_ENCODE_BIT_KHR,
>>>> +        .dstAccessMask = VK_ACCESS_2_VIDEO_ENCODE_READ_BIT_KHR,
>>>> +        .oldLayout = vkf->layout[0],
>>>> +        .newLayout = VK_IMAGE_LAYOUT_VIDEO_ENCODE_SRC_KHR,
>>>> +        .srcQueueFamilyIndex = vkf->queue_family[0],
>>>> +        .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
>>>> +        .image = vkf->img[0],
>>>> +        .subresourceRange = (VkImageSubresourceRange) {
>>>> +            .aspectMask = vp->in.aspect,
>>>> +            .layerCount = 1,
>>>> +            .levelCount = 1,
>>>> +        },
>>>> +    };
>>>> +    ff_vk_exec_update_frame(&ctx->s, exec, src,
>>>> +                            &img_bar[nb_img_bar], &nb_img_bar);
>>>> +
>>>> +    if (!ctx->common.layered_dpb) {
>>>> +        /* Source image's ref slot.
>>>> +         * No need to do a layout conversion, since the frames which are allocated
>>>> +         * with a DPB usage are automatically converted. */
>>>> +        err = ff_vk_exec_add_dep_frame(&ctx->s, exec, base_pic->recon_image,
>>>> +                                       VK_PIPELINE_STAGE_2_ALL_COMMANDS_BIT,
>>>> +                                       VK_PIPELINE_STAGE_2_VIDEO_ENCODE_BIT_KHR);
>>>> +        if (err < 0)
>>>> +            return err;
>>>> +
>>>> +        /* All references */
>>>> +        for (int i = 0; i < MAX_REFERENCE_LIST_NUM; i++) {
>>>> +            for (int j = 0; j < base_pic->nb_refs[i]; j++) {
>>>> +                FFHWBaseEncodePicture *ref = base_pic->refs[i][j];
>>>> +                err = ff_vk_exec_add_dep_frame(&ctx->s, exec, ref->recon_image,
>>>> +                                               VK_PIPELINE_STAGE_2_ALL_COMMANDS_BIT,
>>>> +                                               VK_PIPELINE_STAGE_2_VIDEO_ENCODE_BIT_KHR);
>>>> +                if (err < 0)
>>>> +                    return err;
>>>> +            }
>>>> +        }
>>>> +    } else {
>>>> +        err = ff_vk_exec_add_dep_frame(&ctx->s, exec, ctx->common.layered_frame,
>>>> +                                       VK_PIPELINE_STAGE_2_VIDEO_ENCODE_BIT_KHR,
>>>> +                                       VK_PIPELINE_STAGE_2_VIDEO_ENCODE_BIT_KHR);
>>>> +        if (err < 0)
>>>> +            return err;
>>>> +    }
>>>> +
>>>> +    /* Change image layout */
>>>> +    vk->CmdPipelineBarrier2(cmd_buf, &(VkDependencyInfo) {
>>>> +            .sType = VK_STRUCTURE_TYPE_DEPENDENCY_INFO,
>>>> +            .pImageMemoryBarriers = img_bar,
>>>> +            .imageMemoryBarrierCount = nb_img_bar,
>>>> +        });
>>>> +
>>>> +    /* Start, use parameters */
>>>> +    vk->CmdBeginVideoCodingKHR(cmd_buf, &encode_start);
>>>> +
>>>> +    /* Send control data */
>>>> +    if (base_pic->force_idr)
>>>> +        vk->CmdControlVideoCodingKHR(cmd_buf, &encode_ctrl);
>>>> +
>>>> +    /* Encode */
>>>> +    vk->CmdBeginQuery(cmd_buf, ctx->enc_pool.query_pool, exec->query_idx + 0, 0);
>>>> +    vk->CmdEncodeVideoKHR(cmd_buf, &encode_info);
>>>> +    vk->CmdEndQuery(cmd_buf, ctx->enc_pool.query_pool, exec->query_idx + 0);
>>>> +
>>>> +    /* End encoding */
>>>> +    vk->CmdEndVideoCodingKHR(cmd_buf, &encode_end);
>>>> +
>>>> +    /* End recording and submit for execution */
>>>> +    err = ff_vk_exec_submit(&ctx->s, vp->exec);
>>>> +    if (err < 0)
>>>> +        goto fail;
>>>> +
>>>> +    /* We don't need to keep the input image any longer, its already ref'd */
>>>> +    av_frame_free(&base_pic->input_image);
>>>> +
>>>> +    return 0;
>>>> +
>>>> +fail:
>>>> +    return err;
>>>> +}
>>>> +
>>>> +static void vulkan_encode_wait(AVCodecContext *avctx,
>>>> +                               FFHWBaseEncodePicture *base_pic)
>>>> +{
>>>> +    FFVulkanEncodeContext *ctx = avctx->priv_data;
>>>> +    FFVulkanEncodePicture *vp = base_pic->priv;
>>>> +
>>>> +    av_assert0(base_pic->encode_issued);
>>>> +
>>>> +    if (base_pic->encode_complete)
>>>> +        return;
>>>> +
>>>> +    ff_vk_exec_wait(&ctx->s, vp->exec);
>>>> +    base_pic->encode_complete = 1;
>>>> +}
>>>> +
>>>> +static int vulkan_encode_output(AVCodecContext *avctx,
>>>> +                                FFHWBaseEncodePicture *base_pic, AVPacket *pkt)
>>>> +{
>>>> +    VkResult ret;
>>>> +    FFVulkanEncodePicture *vp = base_pic->priv;
>>>> +    FFVulkanEncodeContext *ctx = avctx->priv_data;
>>>> +    FFVkBuffer *sd_buf = (FFVkBuffer *)vp->pkt_buf->data;
>>>> +    uint32_t *query_data;
>>>> +
>>>> +    vulkan_encode_wait(avctx, base_pic);
>>>> +
>>>> +    ret = ff_vk_exec_get_query(&ctx->s, vp->exec, (void **)&query_data, 0);
>>>> +    if (ret == VK_NOT_READY) {
>>>> +        av_log(avctx, AV_LOG_ERROR, "Unable to perform query: %s!\n",
>>>> +               ff_vk_ret2str(ret));
>>>> +        return AVERROR(EINVAL);
>>>> +    }
>>>> +
>>>> +    if (ret != VK_NOT_READY && ret != VK_SUCCESS) {
>>>> +        av_log(avctx, AV_LOG_ERROR, "Unable to perform query: %s!\n",
>>>> +               ff_vk_ret2str(ret));
>>>> +        return AVERROR_EXTERNAL;
>>>> +    }
>>>>
>>>> +    if (query_data[2] != VK_QUERY_RESULT_STATUS_COMPLETE_KHR) {
>>>> +        av_log(avctx, AV_LOG_ERROR, "Unable to encode: %u\n", query_data[2]);
>>>> +        return AVERROR_EXTERNAL;
>>>> +    }
>>>
>>> The query pool is created with ctx->enc_caps.supportedEncodeFeedbackFlags. If
>>> the implementation reports all feedback flags defined by the spec,
>>> query_data[2] would refer to the HAS_OVERRIDES field of the feedback. In
>>> this case, the status would live in query_data[3].
>>
>>
>> v2 sent to the ML which fixes both issues you mentioned, plus other bugs
>> I and others encountered.
>>
>> The spec text says:
>>   > When retrieving the results of video encode feedback queries, the
>>   > values corresponding to each enabled video encode feedback are written
>>   > in the order of the bits defined above
>>
>> The order they're given in is:
>> VK_VIDEO_ENCODE_FEEDBACK_BITSTREAM_BUFFER_OFFSET_BIT_KHR = 0x00000001,
>> VK_VIDEO_ENCODE_FEEDBACK_BITSTREAM_BYTES_WRITTEN_BIT_KHR = 0x00000002,
>> VK_VIDEO_ENCODE_FEEDBACK_BITSTREAM_HAS_OVERRIDES_BIT_KHR = 0x00000004,
> 
> Yes, so if all these feedback bits are reported as supported the query
> feedback buffer would look like:
>   
> - query_data[0]: BUFFER_OFFSET
> - query_data[1]: BYTES_WRITTEN
> - query_data[2]: HAS_OVERRIDES
> 
> Then, the result status appears at query_data[3] (if
> VK_QUERY_RESULT_WITH_STATUS_BIT_KHR is set).
> 
>>
>> So I think the code is correct on this.
>>
>> Thanks for reviewing/testing

Fixed this in my branch, thanks
Lynne Sept. 15, 2024, 9:55 a.m. UTC | #5
On 09/09/2024 12:37, Lynne wrote:
> This commit adds the common Vulkan video encoding framework.
> It makes full use of the asynchronous features of our new common
> hardware encoding code, and of Vulkan.
> The code is able to handle anything from H264 to AV1 and MJPEG.
> ---
>   configure                  |   2 +
>   libavcodec/Makefile        |   2 +-
>   libavcodec/vulkan_encode.c | 979 +++++++++++++++++++++++++++++++++++++
>   libavcodec/vulkan_encode.h | 243 +++++++++
>   4 files changed, 1225 insertions(+), 1 deletion(-)
>   create mode 100644 libavcodec/vulkan_encode.c
>   create mode 100644 libavcodec/vulkan_encode.h
> 
> diff --git a/configure b/configure
> index a8e67d230c..6cfb736a86 100755
> --- a/configure
> +++ b/configure
> @@ -2638,6 +2638,7 @@ CONFIG_EXTRA="
>       vp3dsp
>       vp56dsp
>       vp8dsp
> +    vulkan_encode
>       wma_freqs
>       wmv2dsp
>   "
> @@ -3299,6 +3300,7 @@ qsvdec_select="qsv"
>   qsvenc_select="qsv"
>   qsvvpp_select="qsv"
>   vaapi_encode_deps="vaapi"
> +vulkan_encode_deps="vulkan"
>   v4l2_m2m_deps="linux_videodev2_h sem_timedwait"
>   
>   bilateral_cuda_filter_deps="ffnvcodec"
> diff --git a/libavcodec/Makefile b/libavcodec/Makefile
> index 27ef4638ce..ff6a3c4efc 100644
> --- a/libavcodec/Makefile
> +++ b/libavcodec/Makefile
> @@ -1282,7 +1282,7 @@ SKIPHEADERS-$(CONFIG_QSVENC)           += qsvenc.h
>   SKIPHEADERS-$(CONFIG_VAAPI)            += vaapi_decode.h vaapi_hevc.h vaapi_encode.h
>   SKIPHEADERS-$(CONFIG_VDPAU)            += vdpau.h vdpau_internal.h
>   SKIPHEADERS-$(CONFIG_VIDEOTOOLBOX)     += videotoolbox.h vt_internal.h
> -SKIPHEADERS-$(CONFIG_VULKAN)           += vulkan.h vulkan_video.h vulkan_decode.h
> +SKIPHEADERS-$(CONFIG_VULKAN)           += vulkan.h vulkan_video.h vulkan_encode.h vulkan_decode.h
>   SKIPHEADERS-$(CONFIG_V4L2_M2M)         += v4l2_buffers.h v4l2_context.h v4l2_m2m.h
>   SKIPHEADERS-$(CONFIG_ZLIB)             += zlib_wrapper.h
>   
> diff --git a/libavcodec/vulkan_encode.c b/libavcodec/vulkan_encode.c
> new file mode 100644
> index 0000000000..5e87d4c073
> --- /dev/null
> +++ b/libavcodec/vulkan_encode.c
> @@ -0,0 +1,979 @@
> +/*
> + * This file is part of FFmpeg.
> + *
> + * FFmpeg is free software; you can redistribute it and/or
> + * modify it under the terms of the GNU Lesser General Public
> + * License as published by the Free Software Foundation; either
> + * version 2.1 of the License, or (at your option) any later version.
> + *
> + * FFmpeg is distributed in the hope that it will be useful,
> + * but WITHOUT ANY WARRANTY; without even the implied warranty of
> + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
> + * Lesser General Public License for more details.
> + *
> + * You should have received a copy of the GNU Lesser General Public
> + * License along with FFmpeg; if not, write to the Free Software
> + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
> + */
> +
> +#include "libavutil/mem.h"
> +#include "libavutil/avassert.h"
> +#include "vulkan_encode.h"
> +#include "config.h"
> +
> +#include "libavutil/vulkan_loader.h"
> +
> +const AVCodecHWConfigInternal *const ff_vulkan_encode_hw_configs[] = {
> +    HW_CONFIG_ENCODER_FRAMES(VULKAN, VULKAN),
> +    NULL,
> +};
> +
> +av_cold void ff_vulkan_encode_uninit(FFVulkanEncodeContext *ctx)
> +{
> +    FFVulkanContext *s = &ctx->s;
> +    FFVulkanFunctions *vk = &s->vkfn;
> +
> +    /* Wait on and free execution pool */
> +    ff_vk_exec_pool_free(s, &ctx->enc_pool);
> +
> +    /* Destroy the session params */
> +    if (ctx->session_params)
> +        vk->DestroyVideoSessionParametersKHR(s->hwctx->act_dev,
> +                                             ctx->session_params,
> +                                             s->hwctx->alloc);
> +
> +    ff_hw_base_encode_close(&ctx->base);
> +
> +    av_buffer_pool_uninit(&ctx->buf_pool);
> +
> +    ff_vk_video_common_uninit(s, &ctx->common);
> +
> +    ff_vk_uninit(s);
> +}
> +
> +static int vulkan_encode_init(AVCodecContext *avctx, FFHWBaseEncodePicture *pic)
> +{
> +    int err;
> +    FFVulkanEncodeContext *ctx = avctx->priv_data;
> +    FFVulkanEncodePicture *vp = pic->priv;
> +
> +    AVFrame *f = pic->input_image;
> +    AVHWFramesContext *hwfc = (AVHWFramesContext *)f->hw_frames_ctx->data;
> +    AVVulkanFramesContext *vkfc = hwfc->hwctx;
> +    AVVkFrame *vkf = (AVVkFrame *)f->data[0];
> +
> +    if (ctx->codec->picture_priv_data_size > 0) {
> +        pic->codec_priv = av_mallocz(ctx->codec->picture_priv_data_size);
> +        if (!pic->codec_priv)
> +            return AVERROR(ENOMEM);
> +    }
> +
> +    /* Input image view */
> +    err = ff_vk_create_view(&ctx->s, &ctx->common,
> +                            &vp->in.view, &vp->in.aspect,
> +                            vkf, vkfc->format[0], 0);
> +    if (err < 0)
> +        return err;
> +
> +    /* Reference view */
> +    if (!ctx->common.layered_dpb) {
> +        AVFrame *rf = pic->recon_image;
> +        AVVkFrame *rvkf = (AVVkFrame *)rf->data[0];
> +        err = ff_vk_create_view(&ctx->s, &ctx->common,
> +                                &vp->dpb.view, &vp->dpb.aspect,
> +                                rvkf, ctx->pic_format, 1);
> +        if (err < 0)
> +            return err;
> +    } else {
> +        vp->dpb.view = ctx->common.layered_view;
> +        vp->dpb.aspect = ctx->common.layered_aspect;
> +    }
> +
> +    return 0;
> +}
> +
> +static int vulkan_encode_free(AVCodecContext *avctx, FFHWBaseEncodePicture *pic)
> +{
> +    FFVulkanEncodeContext *ctx = avctx->priv_data;
> +    FFVulkanFunctions *vk = &ctx->s.vkfn;
> +
> +    FFVulkanEncodePicture *vp = pic->priv;
> +
> +    if (vp->in.view)
> +        vk->DestroyImageView(ctx->s.hwctx->act_dev, vp->in.view,
> +                             ctx->s.hwctx->alloc);
> +
> +    if (!ctx->common.layered_dpb && vp->dpb.view)
> +        vk->DestroyImageView(ctx->s.hwctx->act_dev, vp->dpb.view,
> +                             ctx->s.hwctx->alloc);
> +
> +    ctx->slots[vp->dpb_slot.slotIndex] = 0;
> +
> +    return 0;
> +}
> +
> +static int init_pic_rc(AVCodecContext *avctx, FFHWBaseEncodePicture *pic,
> +                       VkVideoEncodeRateControlInfoKHR *rc_info,
> +                       VkVideoEncodeRateControlLayerInfoKHR *rc_layer /* Goes in ^ */)
> +{
> +    FFVulkanEncodeContext *ctx = avctx->priv_data;
> +
> +    *rc_info = (VkVideoEncodeRateControlInfoKHR) {
> +        .sType = VK_STRUCTURE_TYPE_VIDEO_ENCODE_RATE_CONTROL_INFO_KHR,
> +        .rateControlMode = ctx->opts.rc_mode,
> +    };
> +
> +    if (ctx->opts.rc_mode > VK_VIDEO_ENCODE_RATE_CONTROL_MODE_DISABLED_BIT_KHR) {
> +        *rc_layer = (VkVideoEncodeRateControlLayerInfoKHR) {
> +            .sType = VK_STRUCTURE_TYPE_VIDEO_ENCODE_RATE_CONTROL_LAYER_INFO_KHR,
> +            .averageBitrate = avctx->bit_rate,
> +            .maxBitrate = avctx->rc_max_rate ? avctx->rc_max_rate : avctx->bit_rate,
> +            .frameRateNumerator = avctx->framerate.num,
> +            .frameRateDenominator = avctx->framerate.den,
> +        };
> +        rc_info->layerCount++;
> +        rc_info->pLayers = rc_layer;
> +    }
> +
> +    return ctx->codec->init_pic_rc(avctx, pic, rc_info, rc_layer);
> +}
> +
> +static int vulkan_encode_issue(AVCodecContext *avctx,
> +                               FFHWBaseEncodePicture *base_pic)
> +{
> +    FFVulkanEncodeContext *ctx = avctx->priv_data;
> +    FFVulkanFunctions *vk = &ctx->s.vkfn;
> +
> +    const size_t size_align = ctx->caps.minBitstreamBufferSizeAlignment;
> +
> +    FFVulkanEncodePicture *vp = base_pic->priv;
> +    AVFrame *src = (AVFrame *)base_pic->input_image;
> +    AVVkFrame *vkf = (AVVkFrame *)src->data[0];
> +
> +    int err, max_pkt_size;
> +
> +    FFVkBuffer *sd_buf;
> +
> +    int slot_index = -1;
> +    FFVkExecContext *exec;
> +    VkCommandBuffer cmd_buf;
> +    VkImageMemoryBarrier2 img_bar[37];
> +    int nb_img_bar = 0;
> +
> +    /* Coding start/end */
> +    VkVideoBeginCodingInfoKHR encode_start;
> +    VkVideoEndCodingInfoKHR encode_end = {
> +        .sType = VK_STRUCTURE_TYPE_VIDEO_END_CODING_INFO_KHR,
> +    };
> +
> +    VkVideoEncodeRateControlLayerInfoKHR rc_layer;
> +    VkVideoEncodeRateControlInfoKHR rc_info;
> +    VkVideoEncodeQualityLevelInfoKHR q_info;
> +    VkVideoCodingControlInfoKHR encode_ctrl;
> +
> +    VkVideoReferenceSlotInfoKHR ref_slot[37];
> +    VkVideoEncodeInfoKHR encode_info;
> +
> +    /* Create packet data buffer */
> +    max_pkt_size = FFALIGN(3 * ctx->base.surface_width * ctx->base.surface_height + (1 << 16),
> +                           ctx->caps.minBitstreamBufferSizeAlignment);
> +
> +    err = ff_vk_get_pooled_buffer(&ctx->s, &ctx->buf_pool, &vp->pkt_buf,
> +                                  VK_BUFFER_USAGE_VIDEO_ENCODE_DST_BIT_KHR,
> +                                  &ctx->profile_list, max_pkt_size,
> +                                  VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT |
> +                                  VK_MEMORY_PROPERTY_HOST_CACHED_BIT);
> +    if (err < 0)
> +        return err;
> +
> +    sd_buf = (FFVkBuffer *)vp->pkt_buf->data;
> +
> +    /* Setup rate control */
> +    err = init_pic_rc(avctx, base_pic, &rc_info, &rc_layer);
> +    if (err < 0)
> +        return err;
> +
> +    q_info = (VkVideoEncodeQualityLevelInfoKHR) {
> +        .sType = VK_STRUCTURE_TYPE_VIDEO_ENCODE_QUALITY_LEVEL_INFO_KHR,
> +        .pNext = &rc_info,
> +        .qualityLevel = ctx->opts.quality,
> +    };
> +    encode_ctrl = (VkVideoCodingControlInfoKHR) {
> +        .sType = VK_STRUCTURE_TYPE_VIDEO_CODING_CONTROL_INFO_KHR,
> +        .pNext = &q_info,
> +        .flags = VK_VIDEO_CODING_CONTROL_ENCODE_QUALITY_LEVEL_BIT_KHR |
> +                 VK_VIDEO_CODING_CONTROL_ENCODE_RATE_CONTROL_BIT_KHR |
> +                 (base_pic->force_idr ? VK_VIDEO_CODING_CONTROL_RESET_BIT_KHR : 0),
> +    };
> +
> +    /* Current picture's ref slot */
> +    vp->dpb_res = (VkVideoPictureResourceInfoKHR) {
> +        .sType = VK_STRUCTURE_TYPE_VIDEO_PICTURE_RESOURCE_INFO_KHR,
> +        .pNext = NULL,
> +        .codedOffset = { 0 },
> +        .codedExtent = (VkExtent2D){ ctx->base.surface_width,
> +                                     ctx->base.surface_height },
> +        .baseArrayLayer = 0,
> +        .imageViewBinding = vp->dpb.view,
> +    };
> +
> +    for (int i = 0; i < ctx->caps.maxDpbSlots; i++) {
> +        if (!ctx->slots[i]) {
> +            slot_index = i;
> +            ctx->slots[i] = 1;
> +            break;
> +        }
> +    }
> +    av_assert0(slot_index >= 0);
> +
> +    vp->dpb_slot = (VkVideoReferenceSlotInfoKHR) {
> +        .sType = VK_STRUCTURE_TYPE_VIDEO_REFERENCE_SLOT_INFO_KHR,
> +        .pNext = NULL,  // Set later
> +        .slotIndex = slot_index,
> +        .pPictureResource = &vp->dpb_res,
> +    };
> +
> +    encode_info = (VkVideoEncodeInfoKHR) {
> +        .sType = VK_STRUCTURE_TYPE_VIDEO_ENCODE_INFO_KHR,
> +        .pNext = NULL, // Set later
> +        .flags = 0x0,
> +        .srcPictureResource = (VkVideoPictureResourceInfoKHR) {
> +            .sType = VK_STRUCTURE_TYPE_VIDEO_PICTURE_RESOURCE_INFO_KHR,
> +            .pNext = NULL,
> +            .codedOffset = { 0, 0 },
> +            .codedExtent = (VkExtent2D){ base_pic->input_image->width,
> +                                         base_pic->input_image->height },
> +            .baseArrayLayer = 0,
> +            .imageViewBinding = vp->in.view,
> +        },
> +        .pSetupReferenceSlot = &vp->dpb_slot,
> +        .referenceSlotCount = 0,
> +        .pReferenceSlots = ref_slot,
> +        .dstBuffer = sd_buf->buf,
> +        .dstBufferOffset = 0,
> +        .dstBufferRange = sd_buf->size,
> +        .precedingExternallyEncodedBytes = 0,
> +    };
> +
> +    for (int i = 0; i < MAX_REFERENCE_LIST_NUM; i++) {
> +        for (int j = 0; j < base_pic->nb_refs[i]; j++) {
> +            FFHWBaseEncodePicture *ref = base_pic->refs[i][j];
> +            FFVulkanEncodePicture *rvp = ref->priv;
> +            ref_slot[encode_info.referenceSlotCount++] = rvp->dpb_slot;
> +        }
> +    }
> +
> +    /* Setup picture parameters */
> +    err = ctx->codec->init_pic_params(avctx, base_pic,
> +                                      &encode_info);
> +    if (err < 0)
> +        return err;
> +
> +    encode_start = (VkVideoBeginCodingInfoKHR) {
> +        .sType = VK_STRUCTURE_TYPE_VIDEO_BEGIN_CODING_INFO_KHR,
> +        .pNext = !base_pic->force_idr ? &rc_info : NULL,
> +        .videoSession = ctx->common.session,
> +        .videoSessionParameters = ctx->session_params,
> +        .referenceSlotCount = encode_info.referenceSlotCount,
> +        .pReferenceSlots = ref_slot,
> +    };
> +
> +    /* Calling vkCmdBeginVideoCodingKHR requires to declare all references
> +     * being enabled upfront, including the current frame's output ref.
> +     * If layered DBPs are used, make sure its not included twice. */
> +    if (!ctx->common.layered_dpb || !encode_info.referenceSlotCount) {
> +        ref_slot[encode_info.referenceSlotCount] = vp->dpb_slot;
> +        ref_slot[encode_info.referenceSlotCount].slotIndex = -1;
> +        encode_start.referenceSlotCount++;
> +    }
> +
> +    /* Write header */
> +    if (base_pic->type == FF_HW_PICTURE_TYPE_IDR) {
> +        uint8_t *hdr_dst = sd_buf->mapped_mem + encode_info.dstBufferOffset;
> +        size_t data_size = encode_info.dstBufferRange;
> +        err = ctx->codec->write_sequence_headers(avctx, base_pic, hdr_dst, &data_size);
> +        if (err < 0)
> +            goto fail;
> +        encode_info.dstBufferOffset += data_size;
> +        encode_info.dstBufferRange  -= data_size;
> +    }
> +
> +    /* Write extra units */
> +    if (ctx->codec->write_extra_headers) {
> +        uint8_t *hdr_dst = sd_buf->mapped_mem + encode_info.dstBufferOffset;
> +        size_t data_size = encode_info.dstBufferRange;
> +        err = ctx->codec->write_extra_headers(avctx, base_pic, hdr_dst, &data_size);
> +        if (err < 0)
> +            goto fail;
> +        encode_info.dstBufferOffset += data_size;
> +        encode_info.dstBufferRange  -= data_size;
> +    }
> +
> +    /* Align buffer offset to the required value with filler units */
> +    if (ctx->codec->write_filler) {
> +        uint8_t *hdr_dst = sd_buf->mapped_mem + encode_info.dstBufferOffset;
> +        size_t data_size = encode_info.dstBufferRange;
> +
> +        uint32_t offset = encode_info.dstBufferOffset;
> +        size_t offset_align = ctx->caps.minBitstreamBufferOffsetAlignment;
> +
> +        uint32_t filler_data = FFALIGN(offset, offset_align) - offset;
> +
> +        if (filler_data) {
> +            while (filler_data < ctx->codec->filler_header_size)
> +                filler_data += offset_align;
> +
> +            filler_data -= ctx->codec->filler_header_size;
> +
> +            err = ctx->codec->write_filler(avctx, filler_data,
> +                                           hdr_dst, &data_size);
> +            if (err < 0)
> +                goto fail;
> +
> +            encode_info.dstBufferOffset += data_size;
> +            encode_info.dstBufferRange  -= data_size;
> +        }
> +    }
> +
> +    vp->slices_offset = encode_info.dstBufferOffset;
> +
> +    /* Align buffer size to the nearest lower alignment requirement. */
> +    encode_info.dstBufferRange -= size_align;
> +    encode_info.dstBufferRange = FFALIGN(encode_info.dstBufferRange,
> +                                         size_align);
> +
> +    /* Start command buffer recording */
> +    exec = vp->exec = ff_vk_exec_get(&ctx->enc_pool);
> +    ff_vk_exec_start(&ctx->s, exec);
> +    cmd_buf = exec->buf;
> +
> +    /* Output packet buffer */
> +    err = ff_vk_exec_add_dep_buf(&ctx->s, exec, &vp->pkt_buf, 1, 1);
> +    if (err < 0)
> +        goto fail;
> +
> +    /* Source image */
> +    err = ff_vk_exec_add_dep_frame(&ctx->s, exec, src,
> +                                   VK_PIPELINE_STAGE_2_ALL_COMMANDS_BIT,
> +                                   VK_PIPELINE_STAGE_2_VIDEO_ENCODE_BIT_KHR);
> +    if (err < 0)
> +        goto fail;
> +
> +    /* Source image layout conversion */
> +    img_bar[nb_img_bar] = (VkImageMemoryBarrier2) {
> +        .sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER_2,
> +        .pNext = NULL,
> +        .srcStageMask = VK_PIPELINE_STAGE_2_ALL_COMMANDS_BIT,
> +        .srcAccessMask = vkf->access[0],
> +        .dstStageMask = VK_PIPELINE_STAGE_2_VIDEO_ENCODE_BIT_KHR,
> +        .dstAccessMask = VK_ACCESS_2_VIDEO_ENCODE_READ_BIT_KHR,
> +        .oldLayout = vkf->layout[0],
> +        .newLayout = VK_IMAGE_LAYOUT_VIDEO_ENCODE_SRC_KHR,
> +        .srcQueueFamilyIndex = vkf->queue_family[0],
> +        .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
> +        .image = vkf->img[0],
> +        .subresourceRange = (VkImageSubresourceRange) {
> +            .aspectMask = vp->in.aspect,
> +            .layerCount = 1,
> +            .levelCount = 1,
> +        },
> +    };
> +    ff_vk_exec_update_frame(&ctx->s, exec, src,
> +                            &img_bar[nb_img_bar], &nb_img_bar);
> +
> +    if (!ctx->common.layered_dpb) {
> +        /* Source image's ref slot.
> +         * No need to do a layout conversion, since the frames which are allocated
> +         * with a DPB usage are automatically converted. */
> +        err = ff_vk_exec_add_dep_frame(&ctx->s, exec, base_pic->recon_image,
> +                                       VK_PIPELINE_STAGE_2_ALL_COMMANDS_BIT,
> +                                       VK_PIPELINE_STAGE_2_VIDEO_ENCODE_BIT_KHR);
> +        if (err < 0)
> +            return err;
> +
> +        /* All references */
> +        for (int i = 0; i < MAX_REFERENCE_LIST_NUM; i++) {
> +            for (int j = 0; j < base_pic->nb_refs[i]; j++) {
> +                FFHWBaseEncodePicture *ref = base_pic->refs[i][j];
> +                err = ff_vk_exec_add_dep_frame(&ctx->s, exec, ref->recon_image,
> +                                               VK_PIPELINE_STAGE_2_ALL_COMMANDS_BIT,
> +                                               VK_PIPELINE_STAGE_2_VIDEO_ENCODE_BIT_KHR);
> +                if (err < 0)
> +                    return err;
> +            }
> +        }
> +    } else {
> +        err = ff_vk_exec_add_dep_frame(&ctx->s, exec, ctx->common.layered_frame,
> +                                       VK_PIPELINE_STAGE_2_VIDEO_ENCODE_BIT_KHR,
> +                                       VK_PIPELINE_STAGE_2_VIDEO_ENCODE_BIT_KHR);
> +        if (err < 0)
> +            return err;
> +    }
> +
> +    /* Change image layout */
> +    vk->CmdPipelineBarrier2(cmd_buf, &(VkDependencyInfo) {
> +            .sType = VK_STRUCTURE_TYPE_DEPENDENCY_INFO,
> +            .pImageMemoryBarriers = img_bar,
> +            .imageMemoryBarrierCount = nb_img_bar,
> +        });
> +
> +    /* Start, use parameters */
> +    vk->CmdBeginVideoCodingKHR(cmd_buf, &encode_start);
> +
> +    /* Send control data */
> +    if (base_pic->force_idr)
> +        vk->CmdControlVideoCodingKHR(cmd_buf, &encode_ctrl);
> +
> +    /* Encode */
> +    vk->CmdBeginQuery(cmd_buf, ctx->enc_pool.query_pool, exec->query_idx + 0, 0);
> +    vk->CmdEncodeVideoKHR(cmd_buf, &encode_info);
> +    vk->CmdEndQuery(cmd_buf, ctx->enc_pool.query_pool, exec->query_idx + 0);
> +
> +    /* End encoding */
> +    vk->CmdEndVideoCodingKHR(cmd_buf, &encode_end);
> +
> +    /* End recording and submit for execution */
> +    err = ff_vk_exec_submit(&ctx->s, vp->exec);
> +    if (err < 0)
> +        goto fail;
> +
> +    /* We don't need to keep the input image any longer, its already ref'd */
> +    av_frame_free(&base_pic->input_image);
> +
> +    return 0;
> +
> +fail:
> +    return err;
> +}
> +
> +static void vulkan_encode_wait(AVCodecContext *avctx,
> +                               FFHWBaseEncodePicture *base_pic)
> +{
> +    FFVulkanEncodeContext *ctx = avctx->priv_data;
> +    FFVulkanEncodePicture *vp = base_pic->priv;
> +
> +    av_assert0(base_pic->encode_issued);
> +
> +    if (base_pic->encode_complete)
> +        return;
> +
> +    ff_vk_exec_wait(&ctx->s, vp->exec);
> +    base_pic->encode_complete = 1;
> +}
> +
> +static int vulkan_encode_output(AVCodecContext *avctx,
> +                                FFHWBaseEncodePicture *base_pic, AVPacket *pkt)
> +{
> +    VkResult ret;
> +    FFVulkanEncodePicture *vp = base_pic->priv;
> +    FFVulkanEncodeContext *ctx = avctx->priv_data;
> +    FFVkBuffer *sd_buf = (FFVkBuffer *)vp->pkt_buf->data;
> +    uint32_t *query_data;
> +
> +    vulkan_encode_wait(avctx, base_pic);
> +
> +    ret = ff_vk_exec_get_query(&ctx->s, vp->exec, (void **)&query_data, 0);
> +    if (ret == VK_NOT_READY) {
> +        av_log(avctx, AV_LOG_ERROR, "Unable to perform query: %s!\n",
> +               ff_vk_ret2str(ret));
> +        return AVERROR(EINVAL);
> +    }
> +
> +    if (ret != VK_NOT_READY && ret != VK_SUCCESS) {
> +        av_log(avctx, AV_LOG_ERROR, "Unable to perform query: %s!\n",
> +               ff_vk_ret2str(ret));
> +        return AVERROR_EXTERNAL;
> +    }
> +
> +    if (query_data[2] != VK_QUERY_RESULT_STATUS_COMPLETE_KHR) {
> +        av_log(avctx, AV_LOG_ERROR, "Unable to encode: %u\n", query_data[2]);
> +        return AVERROR_EXTERNAL;
> +    }
> +
> +    /* Invalidate buffer if needed */
> +    if (!(sd_buf->flags & VK_MEMORY_PROPERTY_HOST_COHERENT_BIT)) {
> +        FFVulkanFunctions *vk = &ctx->s.vkfn;
> +        VkMappedMemoryRange invalidate_buf;
> +
> +        int offs = vp->slices_offset;
> +        /* If the non-coherent alignment is greater than the bitstream buffer
> +         * offset's alignment, and the offs value is not aligned already,
> +         * align it to the previous alignment point. */
> +        if (ctx->s.props.properties.limits.nonCoherentAtomSize >
> +            ctx->caps.minBitstreamBufferOffsetAlignment && offs &&
> +            (FFALIGN(offs, ctx->s.props.properties.limits.nonCoherentAtomSize) != offs)) {
> +            offs -= ctx->s.props.properties.limits.nonCoherentAtomSize;
> +            offs = FFALIGN(FFMAX(offs, 0), ctx->s.props.properties.limits.nonCoherentAtomSize);
> +        }
> +
> +        invalidate_buf = (VkMappedMemoryRange) {
> +            .sType = VK_STRUCTURE_TYPE_MAPPED_MEMORY_RANGE,
> +            .memory = sd_buf->mem,
> +            .offset = offs,
> +            .size = VK_WHOLE_SIZE,
> +        };
> +
> +        vk->FlushMappedMemoryRanges(ctx->s.hwctx->act_dev, 1, &invalidate_buf);
> +    }
> +
> +    pkt->data = sd_buf->mapped_mem;
> +    pkt->size = vp->slices_offset + /* base offset */
> +                query_data[0]       /* secondary offset */ +
> +                query_data[1]       /* size */;
> +
> +    /* Move reference */
> +    pkt->buf = vp->pkt_buf;
> +    vp->pkt_buf = NULL;
> +
> +    av_log(avctx, AV_LOG_DEBUG, "Frame %"PRId64"/%"PRId64 " encoded\n",
> +           base_pic->display_order, base_pic->encode_order);
> +
> +    ff_hw_base_encode_set_output_property(&ctx->base, avctx,
> +                                          base_pic, pkt,
> +                                          ctx->codec->flags & VK_ENC_FLAG_NO_DELAY);
> +
> +    return 0;
> +}
> +
> +static const FFHWEncodePictureOperation vulkan_base_encode_ops = {
> +    .priv_size = sizeof(FFVulkanEncodePicture),
> +    .init   = &vulkan_encode_init,
> +    .issue  = &vulkan_encode_issue,
> +    .output = &vulkan_encode_output,
> +    .free   = &vulkan_encode_free,
> +};
> +
> +int ff_vulkan_encode_receive_packet(AVCodecContext *avctx, AVPacket *pkt)
> +{
> +    FFVulkanEncodeContext *ctx = avctx->priv_data;
> +    return ff_hw_base_encode_receive_packet(&ctx->base, avctx, pkt);
> +}
> +
> +static int vulkan_encode_create_dpb(AVCodecContext *avctx, FFVulkanEncodeContext *ctx)
> +{
> +    int err;
> +    FFHWBaseEncodeContext *base_ctx = &ctx->base;
> +    AVVulkanFramesContext *hwfc;
> +
> +    enum AVPixelFormat dpb_format;
> +    err = ff_hw_base_get_recon_format(base_ctx, NULL, &dpb_format);
> +    if (err < 0)
> +        return err;
> +
> +    base_ctx->recon_frames_ref = av_hwframe_ctx_alloc(base_ctx->device_ref);
> +    if (!base_ctx->recon_frames_ref)
> +        return AVERROR(ENOMEM);
> +
> +    base_ctx->recon_frames = (AVHWFramesContext *)base_ctx->recon_frames_ref->data;
> +    hwfc = (AVVulkanFramesContext *)base_ctx->recon_frames->hwctx;
> +
> +    base_ctx->recon_frames->format    = AV_PIX_FMT_VULKAN;
> +    base_ctx->recon_frames->sw_format = dpb_format;
> +    base_ctx->recon_frames->width     = base_ctx->surface_width;
> +    base_ctx->recon_frames->height    = base_ctx->surface_height;
> +
> +    hwfc->format[0]    = ctx->pic_format;
> +    hwfc->create_pnext = &ctx->profile_list;
> +    hwfc->tiling       = VK_IMAGE_TILING_OPTIMAL;
> +    hwfc->usage        = VK_IMAGE_USAGE_SAMPLED_BIT              |
> +                         VK_IMAGE_USAGE_VIDEO_ENCODE_DPB_BIT_KHR;
> +
> +    if (ctx->common.layered_dpb)
> +        hwfc->nb_layers = ctx->caps.maxDpbSlots;
> +
> +    err = av_hwframe_ctx_init(base_ctx->recon_frames_ref);
> +    if (err < 0) {
> +        av_log(avctx, AV_LOG_ERROR, "Failed to initialise DPB frame context: %s\n",
> +               av_err2str(err));
> +        return err;
> +    }
> +
> +    if (ctx->common.layered_dpb) {
> +        ctx->common.layered_frame = av_frame_alloc();
> +        if (!ctx->common.layered_frame)
> +            return AVERROR(ENOMEM);
> +
> +        err = av_hwframe_get_buffer(base_ctx->recon_frames_ref,
> +                                    ctx->common.layered_frame, 0);
> +        if (err < 0)
> +            return AVERROR(ENOMEM);
> +
> +        err = ff_vk_create_view(&ctx->s, &ctx->common,
> +                                &ctx->common.layered_view,
> +                                &ctx->common.layered_aspect,
> +                                (AVVkFrame *)ctx->common.layered_frame->data[0],
> +                                hwfc->format[0], 1);
> +        if (err < 0)
> +            return err;
> +
> +        av_buffer_unref(&base_ctx->recon_frames_ref);
> +    }
> +
> +    return 0;
> +}
> +
> +static av_cold int init_rc(AVCodecContext *avctx, FFVulkanEncodeContext *ctx)
> +{
> +    if (ctx->opts.rc_mode == FF_VK_RC_MODE_AUTO) {
> +        if (ctx->opts.qp >= 0) {
> +            ctx->opts.rc_mode = VK_VIDEO_ENCODE_RATE_CONTROL_MODE_DISABLED_BIT_KHR;
> +        } else if (avctx->global_quality > 0) {
> +            if (avctx->flags & AV_CODEC_FLAG_QSCALE)
> +                ctx->opts.qp = avctx->global_quality / FF_QP2LAMBDA;
> +            else
> +                ctx->opts.qp = avctx->global_quality;
> +            ctx->opts.rc_mode = VK_VIDEO_ENCODE_RATE_CONTROL_MODE_DISABLED_BIT_KHR;
> +        } else if (avctx->bit_rate) {
> +            if (ctx->enc_caps.rateControlModes & VK_VIDEO_ENCODE_RATE_CONTROL_MODE_VBR_BIT_KHR)
> +                ctx->opts.rc_mode = VK_VIDEO_ENCODE_RATE_CONTROL_MODE_VBR_BIT_KHR;
> +            else if (ctx->enc_caps.rateControlModes & VK_VIDEO_ENCODE_RATE_CONTROL_MODE_CBR_BIT_KHR)
> +                ctx->opts.rc_mode = VK_VIDEO_ENCODE_RATE_CONTROL_MODE_CBR_BIT_KHR;
> +            else
> +                ctx->opts.rc_mode = VK_VIDEO_ENCODE_RATE_CONTROL_MODE_DEFAULT_KHR;
> +        } else {
> +            ctx->opts.qp = 18;
> +            ctx->opts.rc_mode = VK_VIDEO_ENCODE_RATE_CONTROL_MODE_DISABLED_BIT_KHR;
> +            av_log(avctx, AV_LOG_WARNING, "No rate control settings specified, using fixed QP = %i\n",
> +                   ctx->opts.qp);
> +        }
> +    }
> +
> +    if (ctx->opts.rc_mode && !(ctx->enc_caps.rateControlModes & ctx->opts.rc_mode)) {
> +        static const char *rc_modes[] = {
> +            [VK_VIDEO_ENCODE_RATE_CONTROL_MODE_DEFAULT_KHR] = "default",
> +            [VK_VIDEO_ENCODE_RATE_CONTROL_MODE_DISABLED_BIT_KHR] = "cqp",
> +            [VK_VIDEO_ENCODE_RATE_CONTROL_MODE_CBR_BIT_KHR] = "cbr",
> +            [VK_VIDEO_ENCODE_RATE_CONTROL_MODE_VBR_BIT_KHR] = "vbr",
> +        };
> +        av_log(avctx, AV_LOG_ERROR, "Unsupported rate control mode %i, supported are:\n",
> +               ctx->opts.rc_mode);
> +        for (int i = 0; i < av_popcount(ctx->enc_caps.rateControlModes); i++) {
> +            if (!(ctx->enc_caps.rateControlModes & (1 << i)))
> +                continue;
> +            av_log(avctx, AV_LOG_ERROR, "    %i: %s\n", i, rc_modes[i]);
> +        }
> +        return AVERROR(ENOTSUP);
> +    }
> +
> +    return 0;
> +}
> +
> +av_cold int ff_vulkan_encode_init(AVCodecContext *avctx, FFVulkanEncodeContext *ctx,
> +                                  const FFVulkanEncodeDescriptor *vk_desc,
> +                                  const FFVulkanCodec *codec,
> +                                  void *codec_caps, void *quality_pnext)
> +{
> +    int i, err;
> +    VkResult ret;
> +    FFVulkanFunctions *vk = &ctx->s.vkfn;
> +    FFVulkanContext *s = &ctx->s;
> +    FFHWBaseEncodeContext *base_ctx = &ctx->base;
> +
> +    const AVPixFmtDescriptor *desc;
> +
> +    VkVideoFormatPropertiesKHR *ret_info;
> +    uint32_t nb_out_fmts = 0;
> +
> +    VkPhysicalDeviceVideoEncodeQualityLevelInfoKHR quality_info;
> +
> +    VkQueryPoolVideoEncodeFeedbackCreateInfoKHR query_create;
> +
> +    VkVideoSessionCreateInfoKHR session_create = {
> +        .sType = VK_STRUCTURE_TYPE_VIDEO_SESSION_CREATE_INFO_KHR,
> +    };
> +    VkPhysicalDeviceVideoFormatInfoKHR fmt_info = {
> +        .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VIDEO_FORMAT_INFO_KHR,
> +        .pNext = &ctx->profile_list,
> +    };
> +
> +    if (!avctx->hw_frames_ctx) {
> +        av_log(avctx, AV_LOG_ERROR, "A hardware frames reference is "
> +               "required to associate the encoding device.\n");
> +        return AVERROR(EINVAL);
> +    }
> +
> +    ctx->base.op = &vulkan_base_encode_ops;
> +    ctx->codec = codec;
> +
> +    s->frames_ref = av_buffer_ref(avctx->hw_frames_ctx);
> +    s->frames = (AVHWFramesContext *)s->frames_ref->data;
> +    s->hwfc = s->frames->hwctx;
> +
> +    s->device = (AVHWDeviceContext *)s->frames->device_ref->data;
> +    s->hwctx = s->device->hwctx;
> +
> +    desc = av_pix_fmt_desc_get(avctx->sw_pix_fmt);
> +    if (!desc)
> +        return AVERROR(EINVAL);
> +
> +    s->extensions = ff_vk_extensions_to_mask(s->hwctx->enabled_dev_extensions,
> +                                             s->hwctx->nb_enabled_dev_extensions);
> +
> +    if (!(s->extensions & FF_VK_EXT_VIDEO_ENCODE_QUEUE)) {
> +        av_log(avctx, AV_LOG_ERROR, "Device does not support the %s extension!\n",
> +               VK_KHR_VIDEO_ENCODE_QUEUE_EXTENSION_NAME);
> +        return AVERROR(ENOSYS);
> +    } else if (!(s->extensions & FF_VK_EXT_VIDEO_MAINTENANCE_1)) {
> +        av_log(avctx, AV_LOG_ERROR, "Device does not support the %s extension!\n",
> +               VK_KHR_VIDEO_MAINTENANCE_1_EXTENSION_NAME);
> +        return AVERROR(ENOSYS);
> +    } else if (!(s->extensions & vk_desc->encode_extension)) {
> +        av_log(avctx, AV_LOG_ERROR, "Device does not support decoding %s!\n",
> +               avcodec_get_name(avctx->codec_id));
> +        return AVERROR(ENOSYS);
> +    }
> +
> +    /* Load functions */
> +    err = ff_vk_load_functions(s->device, vk, s->extensions, 1, 1);
> +    if (err < 0)
> +        return err;
> +
> +    /* Create queue context */
> +    err = ff_vk_video_qf_init(s, &ctx->qf_enc,
> +                              VK_QUEUE_VIDEO_ENCODE_BIT_KHR,
> +                              vk_desc->encode_op);
> +    if (err < 0) {
> +        av_log(avctx, AV_LOG_ERROR, "Decoding of %s is not supported by this device\n",
> +               avcodec_get_name(avctx->codec_id));
> +        return err;
> +    }
> +
> +    /* Load all properties */
> +    err = ff_vk_load_props(s);
> +    if (err < 0)
> +        return err;
> +
> +    /* Set tuning */
> +    ctx->usage_info = (VkVideoEncodeUsageInfoKHR) {
> +        .sType             = VK_STRUCTURE_TYPE_VIDEO_ENCODE_USAGE_INFO_KHR,
> +        .videoUsageHints   = ctx->opts.usage,
> +        .videoContentHints = ctx->opts.content,
> +        .tuningMode        = ctx->opts.tune,
> +    };
> +
> +    /* Load up the profile now, needed for caps and to create a query pool */
> +    ctx->profile.sType               = VK_STRUCTURE_TYPE_VIDEO_PROFILE_INFO_KHR;
> +    ctx->profile.pNext               = &ctx->usage_info;
> +    ctx->profile.videoCodecOperation = vk_desc->encode_op;
> +    ctx->profile.chromaSubsampling   = ff_vk_subsampling_from_av_desc(desc);
> +    ctx->profile.lumaBitDepth        = ff_vk_depth_from_av_depth(desc->comp[0].depth);
> +    ctx->profile.chromaBitDepth      = ctx->profile.lumaBitDepth;
> +
> +    /* Setup a profile */
> +    err = codec->init_profile(avctx, &ctx->profile, &ctx->usage_info);
> +    if (err < 0)
> +        return err;
> +
> +    ctx->profile_list.sType        = VK_STRUCTURE_TYPE_VIDEO_PROFILE_LIST_INFO_KHR;
> +    ctx->profile_list.profileCount = 1;
> +    ctx->profile_list.pProfiles    = &ctx->profile;
> +
> +    /* Get the capabilities of the encoder for the given profile */
> +    ctx->enc_caps.sType = VK_STRUCTURE_TYPE_VIDEO_ENCODE_CAPABILITIES_KHR;
> +    ctx->enc_caps.pNext = codec_caps;
> +    ctx->caps.sType = VK_STRUCTURE_TYPE_VIDEO_CAPABILITIES_KHR;
> +    ctx->caps.pNext = &ctx->enc_caps;
> +
> +    ret = vk->GetPhysicalDeviceVideoCapabilitiesKHR(s->hwctx->phys_dev,
> +                                                    &ctx->profile,
> +                                                    &ctx->caps);
> +    if (ret == VK_ERROR_VIDEO_PROFILE_OPERATION_NOT_SUPPORTED_KHR) {
> +        av_log(avctx, AV_LOG_ERROR, "Unable to initialize encoding: "
> +               "%s profile \"%s\" not supported!\n",
> +               avcodec_get_name(avctx->codec_id),
> +               avcodec_profile_name(avctx->codec_id, avctx->profile));
> +        return AVERROR(EINVAL);
> +    } else if (ret == VK_ERROR_VIDEO_PROFILE_FORMAT_NOT_SUPPORTED_KHR) {
> +        av_log(avctx, AV_LOG_ERROR, "Unable to initialize encoding: "
> +               "format (%s) not supported!\n",
> +               av_get_pix_fmt_name(avctx->sw_pix_fmt));
> +        return AVERROR(EINVAL);
> +    } else if (ret == VK_ERROR_FEATURE_NOT_PRESENT ||
> +               ret == VK_ERROR_FORMAT_NOT_SUPPORTED) {
> +        return AVERROR(EINVAL);
> +    } else if (ret != VK_SUCCESS) {
> +        return AVERROR_EXTERNAL;
> +    }
> +
> +    err = init_rc(avctx, ctx);
> +    if (err < 0)
> +        return err;
> +
> +    /* Create command and query pool */
> +    query_create = (VkQueryPoolVideoEncodeFeedbackCreateInfoKHR) {
> +        .sType = VK_STRUCTURE_TYPE_QUERY_POOL_VIDEO_ENCODE_FEEDBACK_CREATE_INFO_KHR,
> +        .pNext = &ctx->profile,
> +        .encodeFeedbackFlags = ctx->enc_caps.supportedEncodeFeedbackFlags,
> +    };
> +    err = ff_vk_exec_pool_init(s, &ctx->qf_enc, &ctx->enc_pool, base_ctx->async_depth,
> +                               1, VK_QUERY_TYPE_VIDEO_ENCODE_FEEDBACK_KHR, 0,
> +                               &query_create);
> +    if (err < 0)
> +        return err;
> +
> +    if (ctx->opts.quality > ctx->enc_caps.maxQualityLevels) {
> +        av_log(avctx, AV_LOG_ERROR, "Invalid quality level %i: allowed range is "
> +                                    "0 to %i\n",
> +               ctx->opts.quality, ctx->enc_caps.maxQualityLevels);
> +        return AVERROR(EINVAL);
> +    }
> +
> +    /* Get quality properties for the profile and quality level */
> +    quality_info = (VkPhysicalDeviceVideoEncodeQualityLevelInfoKHR) {
> +        .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VIDEO_ENCODE_QUALITY_LEVEL_INFO_KHR,
> +        .pVideoProfile = &ctx->profile,
> +        .qualityLevel = ctx->opts.quality,
> +    };
> +    ctx->quality_props = (VkVideoEncodeQualityLevelPropertiesKHR) {
> +        .sType = VK_STRUCTURE_TYPE_VIDEO_ENCODE_QUALITY_LEVEL_PROPERTIES_KHR,
> +        .pNext = quality_pnext,
> +    };
> +    ret = vk->GetPhysicalDeviceVideoEncodeQualityLevelPropertiesKHR(s->hwctx->phys_dev,
> +                                                                    &quality_info,
> +                                                                    &ctx->quality_props);
> +    if (ret != VK_SUCCESS)
> +        return AVERROR_EXTERNAL;
> +
> +    /* Printout informative properties */
> +    av_log(avctx, AV_LOG_VERBOSE, "Encoder capabilities for %s profile \"%s\":\n",
> +           avcodec_get_name(avctx->codec_id),
> +           avcodec_profile_name(avctx->codec_id, avctx->profile));
> +    av_log(avctx, AV_LOG_VERBOSE, "    Width: from %i to %i\n",
> +           ctx->caps.minCodedExtent.width, ctx->caps.maxCodedExtent.width);
> +    av_log(avctx, AV_LOG_VERBOSE, "    Height: from %i to %i\n",
> +           ctx->caps.minCodedExtent.height, ctx->caps.maxCodedExtent.height);
> +    av_log(avctx, AV_LOG_VERBOSE, "    Width alignment: %i\n",
> +           ctx->caps.pictureAccessGranularity.width);
> +    av_log(avctx, AV_LOG_VERBOSE, "    Height alignment: %i\n",
> +           ctx->caps.pictureAccessGranularity.height);
> +    av_log(avctx, AV_LOG_VERBOSE, "    Bitstream offset alignment: %"PRIu64"\n",
> +           ctx->caps.minBitstreamBufferOffsetAlignment);
> +    av_log(avctx, AV_LOG_VERBOSE, "    Bitstream size alignment: %"PRIu64"\n",
> +           ctx->caps.minBitstreamBufferSizeAlignment);
> +    av_log(avctx, AV_LOG_VERBOSE, "    Maximum references: %u\n",
> +           ctx->caps.maxDpbSlots);
> +    av_log(avctx, AV_LOG_VERBOSE, "    Maximum active references: %u\n",
> +           ctx->caps.maxActiveReferencePictures);
> +    av_log(avctx, AV_LOG_VERBOSE, "    Codec header version: %i.%i.%i (driver), %i.%i.%i (compiled)\n",
> +           CODEC_VER(ctx->caps.stdHeaderVersion.specVersion),
> +           CODEC_VER(vk_desc->ext_props.specVersion));
> +    av_log(avctx, AV_LOG_VERBOSE, "    Encoder max quality: %i\n",
> +           ctx->enc_caps.maxQualityLevels);
> +    av_log(avctx, AV_LOG_VERBOSE, "    Encoder image width alignment: %i\n",
> +           ctx->enc_caps.encodeInputPictureGranularity.width);
> +    av_log(avctx, AV_LOG_VERBOSE, "    Encoder image height alignment: %i\n",
> +           ctx->enc_caps.encodeInputPictureGranularity.height);
> +    av_log(avctx, AV_LOG_VERBOSE, "    Capability flags:%s%s%s\n",
> +           ctx->caps.flags ? "" :
> +               " none",
> +           ctx->caps.flags & VK_VIDEO_CAPABILITY_PROTECTED_CONTENT_BIT_KHR ?
> +               " protected" : "",
> +           ctx->caps.flags & VK_VIDEO_CAPABILITY_SEPARATE_REFERENCE_IMAGES_BIT_KHR ?
> +               " separate_references" : "");
> +
> +    /* Setup width/height alignment */
> +    base_ctx->surface_width = avctx->coded_width =
> +        FFALIGN(avctx->width, ctx->enc_caps.encodeInputPictureGranularity.width);
> +    base_ctx->surface_height = avctx->coded_height =
> +        FFALIGN(avctx->height, ctx->enc_caps.encodeInputPictureGranularity.height);
> +
> +    /* Check if encoding is possible with the given parameters */
> +    if (avctx->coded_width  < ctx->caps.minCodedExtent.width   ||
> +        avctx->coded_height < ctx->caps.minCodedExtent.height  ||
> +        avctx->coded_width  > ctx->caps.maxCodedExtent.width   ||
> +        avctx->coded_height > ctx->caps.maxCodedExtent.height) {
> +        av_log(avctx, AV_LOG_ERROR, "Input of %ix%i too large for encoder limits: %ix%i max\n",
> +               avctx->coded_width, avctx->coded_height,
> +               ctx->caps.minCodedExtent.width, ctx->caps.minCodedExtent.height);
> +        return AVERROR(EINVAL);
> +    }
> +
> +    fmt_info.imageUsage = VK_IMAGE_USAGE_VIDEO_ENCODE_DPB_BIT_KHR |
> +                          VK_IMAGE_USAGE_VIDEO_ENCODE_DST_BIT_KHR;
> +
> +    ctx->common.layered_dpb = !(ctx->caps.flags & VK_VIDEO_CAPABILITY_SEPARATE_REFERENCE_IMAGES_BIT_KHR);
> +
> +    /* Get the supported image formats */
> +    ret = vk->GetPhysicalDeviceVideoFormatPropertiesKHR(s->hwctx->phys_dev,
> +                                                        &fmt_info,
> +                                                        &nb_out_fmts, NULL);
> +    if (ret == VK_ERROR_FORMAT_NOT_SUPPORTED ||
> +        (!nb_out_fmts && ret == VK_SUCCESS)) {
> +        return AVERROR(EINVAL);
> +    } else if (ret != VK_SUCCESS) {
> +        av_log(avctx, AV_LOG_ERROR, "Unable to get Vulkan format properties: %s!\n",
> +               ff_vk_ret2str(ret));
> +        return AVERROR_EXTERNAL;
> +    }
> +
> +    ret_info = av_mallocz(sizeof(*ret_info)*nb_out_fmts);
> +    if (!ret_info)
> +        return AVERROR(ENOMEM);
> +
> +    for (int i = 0; i < nb_out_fmts; i++)
> +        ret_info[i].sType = VK_STRUCTURE_TYPE_VIDEO_FORMAT_PROPERTIES_KHR;
> +
> +    ret = vk->GetPhysicalDeviceVideoFormatPropertiesKHR(s->hwctx->phys_dev,
> +                                                        &fmt_info,
> +                                                        &nb_out_fmts, ret_info);
> +    if (ret == VK_ERROR_FORMAT_NOT_SUPPORTED ||
> +        (!nb_out_fmts && ret == VK_SUCCESS)) {
> +        av_free(ret_info);
> +        return AVERROR(EINVAL);
> +    } else if (ret != VK_SUCCESS) {
> +        av_log(avctx, AV_LOG_ERROR, "Unable to get Vulkan format properties: %s!\n",
> +               ff_vk_ret2str(ret));
> +        av_free(ret_info);
> +        return AVERROR_EXTERNAL;
> +    }
> +
> +    av_log(avctx, AV_LOG_VERBOSE, "Supported input formats:\n");
> +    for (i = 0; i < nb_out_fmts; i++)
> +        av_log(avctx, AV_LOG_VERBOSE, "    %i: %i\n", i, ret_info[i].format);
> +
> +    for (i = 0; i < nb_out_fmts; i++) {
> +        if (ff_vk_pix_fmt_from_vkfmt(ret_info[i].format) == s->frames->sw_format) {
> +            ctx->pic_format = ret_info[i].format;
> +            break;
> +        }
> +    }
> +
> +    av_free(ret_info);
> +
> +    if (i == nb_out_fmts) {
> +        av_log(avctx, AV_LOG_ERROR, "Pixel format %s of input frames not supported!\n",
> +               av_get_pix_fmt_name(s->frames->sw_format));
> +        return AVERROR(EINVAL);
> +    }
> +
> +    /* Create session */
> +    session_create.pVideoProfile = &ctx->profile;
> +    session_create.flags = 0x0;
> +    session_create.queueFamilyIndex = ctx->qf_enc.queue_family;
> +    session_create.maxCodedExtent = ctx->caps.maxCodedExtent;
> +    session_create.maxDpbSlots = ctx->caps.maxDpbSlots;
> +    session_create.maxActiveReferencePictures = ctx->caps.maxActiveReferencePictures;
> +    session_create.pictureFormat = ctx->pic_format;
> +    session_create.referencePictureFormat = session_create.pictureFormat;
> +    session_create.pStdHeaderVersion = &vk_desc->ext_props;
> +
> +    err = ff_vk_video_common_init(avctx, s, &ctx->common, &session_create);
> +    if (err < 0)
> +        return err;
> +
> +    err = ff_hw_base_encode_init(avctx, &ctx->base);
> +    if (err < 0)
> +        return err;
> +
> +    err = vulkan_encode_create_dpb(avctx, ctx);
> +    if (err < 0)
> +        return err;
> +
> +    base_ctx->async_encode = 1;
> +    base_ctx->encode_fifo = av_fifo_alloc2(base_ctx->async_depth,
> +                                           sizeof(FFVulkanEncodePicture *), 0);
> +    if (!base_ctx->encode_fifo)
> +        return AVERROR(ENOMEM);
> +
> +    return 0;
> +}
> diff --git a/libavcodec/vulkan_encode.h b/libavcodec/vulkan_encode.h
> new file mode 100644
> index 0000000000..995befeca2
> --- /dev/null
> +++ b/libavcodec/vulkan_encode.h
> @@ -0,0 +1,243 @@
> +/*
> + * This file is part of FFmpeg.
> + *
> + * FFmpeg is free software; you can redistribute it and/or
> + * modify it under the terms of the GNU Lesser General Public
> + * License as published by the Free Software Foundation; either
> + * version 2.1 of the License, or (at your option) any later version.
> + *
> + * FFmpeg is distributed in the hope that it will be useful,
> + * but WITHOUT ANY WARRANTY; without even the implied warranty of
> + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
> + * Lesser General Public License for more details.
> + *
> + * You should have received a copy of the GNU Lesser General Public
> + * License along with FFmpeg; if not, write to the Free Software
> + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
> + */
> +
> +#ifndef AVCODEC_VULKAN_ENCODE_H
> +#define AVCODEC_VULKAN_ENCODE_H
> +
> +#include "codec_id.h"
> +#include "internal.h"
> +
> +#include "encode.h"
> +#include "hwconfig.h"
> +
> +#include "vulkan_video.h"
> +#include "hw_base_encode.h"
> +
> +typedef struct FFVulkanEncodeDescriptor {
> +    enum AVCodecID                   codec_id;
> +    FFVulkanExtensions               encode_extension;
> +    VkVideoCodecOperationFlagBitsKHR encode_op;
> +
> +    VkExtensionProperties ext_props;
> +} FFVulkanEncodeDescriptor;
> +
> +typedef struct FFVulkanEncodePicture {
> +    FFHWBaseEncodePicture  base;
> +    VkVideoPictureResourceInfoKHR dpb_res;
> +    VkVideoReferenceSlotInfoKHR dpb_slot;
> +
> +    struct {
> +        VkImageView        view;
> +        VkImageAspectFlags aspect;
> +    } in;
> +
> +    struct {
> +        VkImageView        view;
> +        VkImageAspectFlags aspect;
> +    } dpb;
> +
> +    void                  *codec_layer;
> +    void                  *codec_rc_layer;
> +
> +    FFVkExecContext       *exec;
> +    AVBufferRef           *pkt_buf;
> +    int                    slices_offset;
> +} FFVulkanEncodePicture;
> +
> +/**
> + * Callback for writing stream-level headers.
> + */
> +typedef int (*vkenc_cb_write_stream_headers)(AVCodecContext *avctx,
> +                                             uint8_t *data, size_t *data_len);
> +
> +/**
> + * Callback for initializing codec-specific picture headers.
> + */
> +typedef int (*vkenc_cb_init_pic_headers)(AVCodecContext *avctx,
> +                                         FFVulkanEncodePicture *pic);
> +
> +/**
> + * Callback for writing alignment data.
> + * Align is the value to align offset to.
> + */
> +typedef int (*vkenc_cb_write_filler)(AVCodecContext *avctx, uint32_t filler,
> +                                     uint8_t *data, size_t *data_len);
> +
> +/**
> + * Callback for writing any extra units requested. data_len must be set
> + * to the available size, and its value will be overwritten by the #bytes written
> + * to the output buffer.
> + */
> +typedef int (*vkenc_cb_write_extra_headers)(AVCodecContext *avctx,
> +                                            FFVulkanEncodePicture *pic,
> +                                            uint8_t *data, size_t *data_len);
> +
> +typedef struct FFVulkanCodec {
> +    /**
> +     * Codec feature flags.
> +     */
> +    int flags;
> +/* Codec output packet without timestamp delay, which means the
> + * output packet has same PTS and DTS. For AV1. */
> +#define VK_ENC_FLAG_NO_DELAY 1 << 6
> +
> +    /**
> +     * Size of the codec-specific picture struct.
> +     */
> +    size_t picture_priv_data_size;
> +
> +    /**
> +     * Size of the filler header.
> +     */
> +    size_t filler_header_size;
> +
> +    /**
> +     * Initialize codec-specific structs in a Vulkan profile.
> +     */
> +    int (*init_profile)(AVCodecContext *avctx, VkVideoProfileInfoKHR *profile,
> +                        void *pnext);
> +
> +    /**
> +     * Initialize codec-specific rate control structures for a picture.
> +     */
> +    int (*init_pic_rc)(AVCodecContext *avctx, FFHWBaseEncodePicture *pic,
> +                       VkVideoEncodeRateControlInfoKHR *rc_info,
> +                       VkVideoEncodeRateControlLayerInfoKHR *rc_layer);
> +
> +    /**
> +     * Initialize codec-specific picture parameters.
> +     */
> +    int (*init_pic_params)(AVCodecContext *avctx, FFHWBaseEncodePicture *pic,
> +                           VkVideoEncodeInfoKHR *encode_info);
> +
> +    /**
> +     * Callback for writing stream headers.
> +     */
> +    int (*write_sequence_headers)(AVCodecContext *avctx,
> +                                  FFHWBaseEncodePicture *base_pic,
> +                                  uint8_t *data, size_t *data_len);
> +
> +    /**
> +     * Callback for writing alignment data.
> +     */
> +    int (*write_filler)(AVCodecContext *avctx, uint32_t filler,
> +                        uint8_t *data, size_t *data_len);
> +
> +    /**
> +     * Callback for writing any extra units requested. data_len must be set
> +     * to the available size, and its value will be overwritten by the #bytes written
> +     * to the output buffer.
> +     */
> +    int (*write_extra_headers)(AVCodecContext *avctx, FFHWBaseEncodePicture *pic,
> +                               uint8_t *data, size_t *data_len);
> +} FFVulkanCodec;
> +
> +typedef struct FFVkEncodeCommonOptions {
> +    int qp;
> +    int quality;
> +    int profile;
> +    int level;
> +    int async_depth;
> +    VkVideoEncodeUsageFlagBitsKHR usage;
> +    VkVideoEncodeContentFlagBitsKHR content;
> +    VkVideoEncodeTuningModeKHR tune;
> +
> +    VkVideoEncodeRateControlModeFlagBitsKHR rc_mode;
> +#define FF_VK_RC_MODE_AUTO 0xFFFFFFFF
> +} FFVkEncodeCommonOptions;
> +
> +typedef struct FFVulkanEncodeContext {
> +    FFVulkanContext s;
> +    FFVkVideoCommon common;
> +    FFHWBaseEncodeContext base;
> +    const FFVulkanCodec *codec;
> +
> +    /* Session parameters object, initialized by each codec independently
> +     * and set here. */
> +    VkVideoSessionParametersKHR session_params;
> +
> +    AVBufferPool *buf_pool;
> +
> +    VkFormat pic_format;
> +
> +    FFVkEncodeCommonOptions opts;
> +
> +    VkVideoProfileInfoKHR profile;
> +    VkVideoProfileListInfoKHR profile_list;
> +    VkVideoCapabilitiesKHR caps;
> +    VkVideoEncodeQualityLevelPropertiesKHR quality_props;
> +    VkVideoEncodeCapabilitiesKHR enc_caps;
> +    VkVideoEncodeUsageInfoKHR usage_info;
> +
> +    FFVkQueueFamilyCtx qf_enc;
> +    FFVkExecPool enc_pool;
> +
> +    uint32_t slots[32];
> +} FFVulkanEncodeContext;
> +
> +#define VULKAN_ENCODE_COMMON_OPTIONS \
> +    { "qp", "Use an explicit constant quantizer for the whole stream", OFFSET(common.opts.qp), AV_OPT_TYPE_INT, { .i64 = -1 }, -1, 255, FLAGS }, \
> +    { "quality", "Set encode quality (trades off against speed, higher is faster)", OFFSET(common.opts.quality), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, INT_MAX, FLAGS }, \
> +    { "rc_mode", "Select rate control type", OFFSET(common.opts.rc_mode), AV_OPT_TYPE_INT, { .i64 = FF_VK_RC_MODE_AUTO }, 0, FF_VK_RC_MODE_AUTO, FLAGS, "rc_mode" }, \
> +        { "auto",    "Choose mode automatically based on parameters", 0, AV_OPT_TYPE_CONST, { .i64 = FF_VK_RC_MODE_AUTO }, INT_MIN, INT_MAX, FLAGS, "rc_mode" }, \
> +        { "driver",  "Driver-specific rate control", 0, AV_OPT_TYPE_CONST, { .i64 = VK_VIDEO_ENCODE_RATE_CONTROL_MODE_DEFAULT_KHR      }, INT_MIN, INT_MAX, FLAGS, "rc_mode" }, \
> +        { "cqp",     "Constant quantizer mode", 0, AV_OPT_TYPE_CONST, { .i64 = VK_VIDEO_ENCODE_RATE_CONTROL_MODE_DISABLED_BIT_KHR }, INT_MIN, INT_MAX, FLAGS, "rc_mode" }, \
> +        { "cbr",     "Constant bitrate mode", 0, AV_OPT_TYPE_CONST, { .i64 = VK_VIDEO_ENCODE_RATE_CONTROL_MODE_CBR_BIT_KHR      }, INT_MIN, INT_MAX, FLAGS, "rc_mode" }, \
> +        { "vbr",     "Variable bitrate mode", 0, AV_OPT_TYPE_CONST, { .i64 = VK_VIDEO_ENCODE_RATE_CONTROL_MODE_VBR_BIT_KHR      }, INT_MIN, INT_MAX, FLAGS, "rc_mode" }, \
> +    { "tune", "Select tuning type", OFFSET(common.opts.tune), AV_OPT_TYPE_INT, { .i64 = VK_VIDEO_ENCODE_TUNING_MODE_DEFAULT_KHR }, 0, INT_MAX, FLAGS, "tune" }, \
> +        { "default",  "Default tuning", 0, AV_OPT_TYPE_CONST, { .i64 = VK_VIDEO_ENCODE_TUNING_MODE_DEFAULT_KHR           }, INT_MIN, INT_MAX, FLAGS, "tune" }, \
> +        { "hq",       "High quality tuning", 0, AV_OPT_TYPE_CONST, { .i64 = VK_VIDEO_ENCODE_TUNING_MODE_HIGH_QUALITY_KHR      }, INT_MIN, INT_MAX, FLAGS, "tune" }, \
> +        { "ll",       "Low-latency tuning", 0, AV_OPT_TYPE_CONST, { .i64 = VK_VIDEO_ENCODE_TUNING_MODE_LOW_LATENCY_KHR       }, INT_MIN, INT_MAX, FLAGS, "tune" }, \
> +        { "ull",      "Ultra low-latency tuning", 0, AV_OPT_TYPE_CONST, { .i64 = VK_VIDEO_ENCODE_TUNING_MODE_ULTRA_LOW_LATENCY_KHR }, INT_MIN, INT_MAX, FLAGS, "tune" }, \
> +        { "lossless", "Lossless mode tuning", 0, AV_OPT_TYPE_CONST, { .i64 = VK_VIDEO_ENCODE_TUNING_MODE_LOSSLESS_KHR          }, INT_MIN, INT_MAX, FLAGS, "tune" }, \
> +    { "usage", "Select usage type", OFFSET(common.opts.usage), AV_OPT_TYPE_FLAGS, { .i64 = VK_VIDEO_ENCODE_USAGE_DEFAULT_KHR }, 0, INT_MAX, FLAGS, "usage" }, \
> +        { "default",    "Default optimizations", 0, AV_OPT_TYPE_CONST, { .i64 = VK_VIDEO_ENCODE_USAGE_DEFAULT_KHR          }, INT_MIN, INT_MAX, FLAGS, "usage" }, \
> +        { "transcode",  "Optimize for transcoding", 0, AV_OPT_TYPE_CONST, { .i64 = VK_VIDEO_ENCODE_USAGE_TRANSCODING_BIT_KHR  }, INT_MIN, INT_MAX, FLAGS, "usage" }, \
> +        { "stream",     "Optimize for streaming", 0, AV_OPT_TYPE_CONST, { .i64 = VK_VIDEO_ENCODE_USAGE_STREAMING_BIT_KHR    }, INT_MIN, INT_MAX, FLAGS, "usage" }, \
> +        { "record",     "Optimize for offline recording", 0, AV_OPT_TYPE_CONST, { .i64 = VK_VIDEO_ENCODE_USAGE_RECORDING_BIT_KHR    }, INT_MIN, INT_MAX, FLAGS, "usage" }, \
> +        { "conference", "Optimize for teleconferencing", 0, AV_OPT_TYPE_CONST, { .i64 = VK_VIDEO_ENCODE_USAGE_CONFERENCING_BIT_KHR }, INT_MIN, INT_MAX, FLAGS, "usage" }, \
> +    { "content", "Select content type", OFFSET(common.opts.content), AV_OPT_TYPE_FLAGS, { .i64 = VK_VIDEO_ENCODE_CONTENT_DEFAULT_KHR }, 0, INT_MAX, FLAGS, "content" }, \
> +        { "default",  "Default content", 0, AV_OPT_TYPE_CONST, { .i64 = VK_VIDEO_ENCODE_CONTENT_DEFAULT_KHR      }, INT_MIN, INT_MAX, FLAGS, "content" }, \
> +        { "camera",   "Camera footage", 0, AV_OPT_TYPE_CONST, { .i64 = VK_VIDEO_ENCODE_CONTENT_CAMERA_BIT_KHR   }, INT_MIN, INT_MAX, FLAGS, "content" }, \
> +        { "desktop",  "Screen recording", 0, AV_OPT_TYPE_CONST, { .i64 = VK_VIDEO_ENCODE_CONTENT_DESKTOP_BIT_KHR  }, INT_MIN, INT_MAX, FLAGS, "content" }, \
> +        { "rendered", "Game or 3D content", 0, AV_OPT_TYPE_CONST, { .i64 = VK_VIDEO_ENCODE_CONTENT_RENDERED_BIT_KHR }, INT_MIN, INT_MAX, FLAGS, "content" }
> +
> +/**
> + * Initialize encoder.
> + */
> +av_cold int ff_vulkan_encode_init(AVCodecContext *avctx, FFVulkanEncodeContext *ctx,
> +                                  const FFVulkanEncodeDescriptor *vk_desc,
> +                                  const FFVulkanCodec *codec,
> +                                  void *codec_caps, void *quality_pnext);
> +
> +/**
> + * Encode.
> + */
> +int ff_vulkan_encode_receive_packet(AVCodecContext *avctx, AVPacket *pkt);
> +
> +/**
> + * Uninitialize encoder.
> + */
> +void ff_vulkan_encode_uninit(FFVulkanEncodeContext *ctx);
> +
> +/**
> + * Paperwork.
> + */
> +extern const AVCodecHWConfigInternal *const ff_vulkan_encode_hw_configs[];
> +
> +#endif /* AVCODEC_VULKAN_ENCODE_H */

Going to push the Vulkan common code and the H.264 encoder tomorrow 
unless there are objections.
Lynne Sept. 16, 2024, 12:10 p.m. UTC | #6
On 15/09/2024 11:55, Lynne via ffmpeg-devel wrote:
> On 09/09/2024 12:37, Lynne wrote:
>> This commit adds the common Vulkan video encoding framework.
>> It makes full use of the asynchronous features of our new common
>> hardware encoding code, and of Vulkan.
>> The code is able to handle anything from H264 to AV1 and MJPEG.
>> ---
>>   configure                  |   2 +
>>   libavcodec/Makefile        |   2 +-
>>   libavcodec/vulkan_encode.c | 979 +++++++++++++++++++++++++++++++++++++
>>   libavcodec/vulkan_encode.h | 243 +++++++++
>>   4 files changed, 1225 insertions(+), 1 deletion(-)
>>   create mode 100644 libavcodec/vulkan_encode.c
>>   create mode 100644 libavcodec/vulkan_encode.h
>>
>> diff --git a/configure b/configure
>> index a8e67d230c..6cfb736a86 100755
>> --- a/configure
>> +++ b/configure
>> @@ -2638,6 +2638,7 @@ CONFIG_EXTRA="
>>       vp3dsp
>>       vp56dsp
>>       vp8dsp
>> +    vulkan_encode
>>       wma_freqs
>>       wmv2dsp
>>   "
>> @@ -3299,6 +3300,7 @@ qsvdec_select="qsv"
>>   qsvenc_select="qsv"
>>   qsvvpp_select="qsv"
>>   vaapi_encode_deps="vaapi"
>> +vulkan_encode_deps="vulkan"
>>   v4l2_m2m_deps="linux_videodev2_h sem_timedwait"
>>   bilateral_cuda_filter_deps="ffnvcodec"
>> diff --git a/libavcodec/Makefile b/libavcodec/Makefile
>> index 27ef4638ce..ff6a3c4efc 100644
>> --- a/libavcodec/Makefile
>> +++ b/libavcodec/Makefile
>> @@ -1282,7 +1282,7 @@ SKIPHEADERS-$(CONFIG_QSVENC)           += qsvenc.h
>>   SKIPHEADERS-$(CONFIG_VAAPI)            += vaapi_decode.h 
>> vaapi_hevc.h vaapi_encode.h
>>   SKIPHEADERS-$(CONFIG_VDPAU)            += vdpau.h vdpau_internal.h
>>   SKIPHEADERS-$(CONFIG_VIDEOTOOLBOX)     += videotoolbox.h vt_internal.h
>> -SKIPHEADERS-$(CONFIG_VULKAN)           += vulkan.h vulkan_video.h 
>> vulkan_decode.h
>> +SKIPHEADERS-$(CONFIG_VULKAN)           += vulkan.h vulkan_video.h 
>> vulkan_encode.h vulkan_decode.h
>>   SKIPHEADERS-$(CONFIG_V4L2_M2M)         += v4l2_buffers.h 
>> v4l2_context.h v4l2_m2m.h
>>   SKIPHEADERS-$(CONFIG_ZLIB)             += zlib_wrapper.h
>> diff --git a/libavcodec/vulkan_encode.c b/libavcodec/vulkan_encode.c
>> new file mode 100644
>> index 0000000000..5e87d4c073
>> --- /dev/null
>> +++ b/libavcodec/vulkan_encode.c
>> @@ -0,0 +1,979 @@
>> +/*
>> + * This file is part of FFmpeg.
>> + *
>> + * FFmpeg is free software; you can redistribute it and/or
>> + * modify it under the terms of the GNU Lesser General Public
>> + * License as published by the Free Software Foundation; either
>> + * version 2.1 of the License, or (at your option) any later version.
>> + *
>> + * FFmpeg is distributed in the hope that it will be useful,
>> + * but WITHOUT ANY WARRANTY; without even the implied warranty of
>> + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
>> + * Lesser General Public License for more details.
>> + *
>> + * You should have received a copy of the GNU Lesser General Public
>> + * License along with FFmpeg; if not, write to the Free Software
>> + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 
>> 02110-1301 USA
>> + */
>> +
>> +#include "libavutil/mem.h"
>> +#include "libavutil/avassert.h"
>> +#include "vulkan_encode.h"
>> +#include "config.h"
>> +
>> +#include "libavutil/vulkan_loader.h"
>> +
>> +const AVCodecHWConfigInternal *const ff_vulkan_encode_hw_configs[] = {
>> +    HW_CONFIG_ENCODER_FRAMES(VULKAN, VULKAN),
>> +    NULL,
>> +};
>> +
>> +av_cold void ff_vulkan_encode_uninit(FFVulkanEncodeContext *ctx)
>> +{
>> +    FFVulkanContext *s = &ctx->s;
>> +    FFVulkanFunctions *vk = &s->vkfn;
>> +
>> +    /* Wait on and free execution pool */
>> +    ff_vk_exec_pool_free(s, &ctx->enc_pool);
>> +
>> +    /* Destroy the session params */
>> +    if (ctx->session_params)
>> +        vk->DestroyVideoSessionParametersKHR(s->hwctx->act_dev,
>> +                                             ctx->session_params,
>> +                                             s->hwctx->alloc);
>> +
>> +    ff_hw_base_encode_close(&ctx->base);
>> +
>> +    av_buffer_pool_uninit(&ctx->buf_pool);
>> +
>> +    ff_vk_video_common_uninit(s, &ctx->common);
>> +
>> +    ff_vk_uninit(s);
>> +}
>> +
>> +static int vulkan_encode_init(AVCodecContext *avctx, 
>> FFHWBaseEncodePicture *pic)
>> +{
>> +    int err;
>> +    FFVulkanEncodeContext *ctx = avctx->priv_data;
>> +    FFVulkanEncodePicture *vp = pic->priv;
>> +
>> +    AVFrame *f = pic->input_image;
>> +    AVHWFramesContext *hwfc = (AVHWFramesContext *)f->hw_frames_ctx- 
>> >data;
>> +    AVVulkanFramesContext *vkfc = hwfc->hwctx;
>> +    AVVkFrame *vkf = (AVVkFrame *)f->data[0];
>> +
>> +    if (ctx->codec->picture_priv_data_size > 0) {
>> +        pic->codec_priv = av_mallocz(ctx->codec- 
>> >picture_priv_data_size);
>> +        if (!pic->codec_priv)
>> +            return AVERROR(ENOMEM);
>> +    }
>> +
>> +    /* Input image view */
>> +    err = ff_vk_create_view(&ctx->s, &ctx->common,
>> +                            &vp->in.view, &vp->in.aspect,
>> +                            vkf, vkfc->format[0], 0);
>> +    if (err < 0)
>> +        return err;
>> +
>> +    /* Reference view */
>> +    if (!ctx->common.layered_dpb) {
>> +        AVFrame *rf = pic->recon_image;
>> +        AVVkFrame *rvkf = (AVVkFrame *)rf->data[0];
>> +        err = ff_vk_create_view(&ctx->s, &ctx->common,
>> +                                &vp->dpb.view, &vp->dpb.aspect,
>> +                                rvkf, ctx->pic_format, 1);
>> +        if (err < 0)
>> +            return err;
>> +    } else {
>> +        vp->dpb.view = ctx->common.layered_view;
>> +        vp->dpb.aspect = ctx->common.layered_aspect;
>> +    }
>> +
>> +    return 0;
>> +}
>> +
>> +static int vulkan_encode_free(AVCodecContext *avctx, 
>> FFHWBaseEncodePicture *pic)
>> +{
>> +    FFVulkanEncodeContext *ctx = avctx->priv_data;
>> +    FFVulkanFunctions *vk = &ctx->s.vkfn;
>> +
>> +    FFVulkanEncodePicture *vp = pic->priv;
>> +
>> +    if (vp->in.view)
>> +        vk->DestroyImageView(ctx->s.hwctx->act_dev, vp->in.view,
>> +                             ctx->s.hwctx->alloc);
>> +
>> +    if (!ctx->common.layered_dpb && vp->dpb.view)
>> +        vk->DestroyImageView(ctx->s.hwctx->act_dev, vp->dpb.view,
>> +                             ctx->s.hwctx->alloc);
>> +
>> +    ctx->slots[vp->dpb_slot.slotIndex] = 0;
>> +
>> +    return 0;
>> +}
>> +
>> +static int init_pic_rc(AVCodecContext *avctx, FFHWBaseEncodePicture 
>> *pic,
>> +                       VkVideoEncodeRateControlInfoKHR *rc_info,
>> +                       VkVideoEncodeRateControlLayerInfoKHR 
>> *rc_layer /* Goes in ^ */)
>> +{
>> +    FFVulkanEncodeContext *ctx = avctx->priv_data;
>> +
>> +    *rc_info = (VkVideoEncodeRateControlInfoKHR) {
>> +        .sType = VK_STRUCTURE_TYPE_VIDEO_ENCODE_RATE_CONTROL_INFO_KHR,
>> +        .rateControlMode = ctx->opts.rc_mode,
>> +    };
>> +
>> +    if (ctx->opts.rc_mode > 
>> VK_VIDEO_ENCODE_RATE_CONTROL_MODE_DISABLED_BIT_KHR) {
>> +        *rc_layer = (VkVideoEncodeRateControlLayerInfoKHR) {
>> +            .sType = 
>> VK_STRUCTURE_TYPE_VIDEO_ENCODE_RATE_CONTROL_LAYER_INFO_KHR,
>> +            .averageBitrate = avctx->bit_rate,
>> +            .maxBitrate = avctx->rc_max_rate ? avctx->rc_max_rate : 
>> avctx->bit_rate,
>> +            .frameRateNumerator = avctx->framerate.num,
>> +            .frameRateDenominator = avctx->framerate.den,
>> +        };
>> +        rc_info->layerCount++;
>> +        rc_info->pLayers = rc_layer;
>> +    }
>> +
>> +    return ctx->codec->init_pic_rc(avctx, pic, rc_info, rc_layer);
>> +}
>> +
>> +static int vulkan_encode_issue(AVCodecContext *avctx,
>> +                               FFHWBaseEncodePicture *base_pic)
>> +{
>> +    FFVulkanEncodeContext *ctx = avctx->priv_data;
>> +    FFVulkanFunctions *vk = &ctx->s.vkfn;
>> +
>> +    const size_t size_align = ctx->caps.minBitstreamBufferSizeAlignment;
>> +
>> +    FFVulkanEncodePicture *vp = base_pic->priv;
>> +    AVFrame *src = (AVFrame *)base_pic->input_image;
>> +    AVVkFrame *vkf = (AVVkFrame *)src->data[0];
>> +
>> +    int err, max_pkt_size;
>> +
>> +    FFVkBuffer *sd_buf;
>> +
>> +    int slot_index = -1;
>> +    FFVkExecContext *exec;
>> +    VkCommandBuffer cmd_buf;
>> +    VkImageMemoryBarrier2 img_bar[37];
>> +    int nb_img_bar = 0;
>> +
>> +    /* Coding start/end */
>> +    VkVideoBeginCodingInfoKHR encode_start;
>> +    VkVideoEndCodingInfoKHR encode_end = {
>> +        .sType = VK_STRUCTURE_TYPE_VIDEO_END_CODING_INFO_KHR,
>> +    };
>> +
>> +    VkVideoEncodeRateControlLayerInfoKHR rc_layer;
>> +    VkVideoEncodeRateControlInfoKHR rc_info;
>> +    VkVideoEncodeQualityLevelInfoKHR q_info;
>> +    VkVideoCodingControlInfoKHR encode_ctrl;
>> +
>> +    VkVideoReferenceSlotInfoKHR ref_slot[37];
>> +    VkVideoEncodeInfoKHR encode_info;
>> +
>> +    /* Create packet data buffer */
>> +    max_pkt_size = FFALIGN(3 * ctx->base.surface_width * ctx- 
>> >base.surface_height + (1 << 16),
>> +                           ctx->caps.minBitstreamBufferSizeAlignment);
>> +
>> +    err = ff_vk_get_pooled_buffer(&ctx->s, &ctx->buf_pool, &vp->pkt_buf,
>> +                                  
>> VK_BUFFER_USAGE_VIDEO_ENCODE_DST_BIT_KHR,
>> +                                  &ctx->profile_list, max_pkt_size,
>> +                                  VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT |
>> +                                  VK_MEMORY_PROPERTY_HOST_CACHED_BIT);
>> +    if (err < 0)
>> +        return err;
>> +
>> +    sd_buf = (FFVkBuffer *)vp->pkt_buf->data;
>> +
>> +    /* Setup rate control */
>> +    err = init_pic_rc(avctx, base_pic, &rc_info, &rc_layer);
>> +    if (err < 0)
>> +        return err;
>> +
>> +    q_info = (VkVideoEncodeQualityLevelInfoKHR) {
>> +        .sType = VK_STRUCTURE_TYPE_VIDEO_ENCODE_QUALITY_LEVEL_INFO_KHR,
>> +        .pNext = &rc_info,
>> +        .qualityLevel = ctx->opts.quality,
>> +    };
>> +    encode_ctrl = (VkVideoCodingControlInfoKHR) {
>> +        .sType = VK_STRUCTURE_TYPE_VIDEO_CODING_CONTROL_INFO_KHR,
>> +        .pNext = &q_info,
>> +        .flags = VK_VIDEO_CODING_CONTROL_ENCODE_QUALITY_LEVEL_BIT_KHR |
>> +                 VK_VIDEO_CODING_CONTROL_ENCODE_RATE_CONTROL_BIT_KHR |
>> +                 (base_pic->force_idr ? 
>> VK_VIDEO_CODING_CONTROL_RESET_BIT_KHR : 0),
>> +    };
>> +
>> +    /* Current picture's ref slot */
>> +    vp->dpb_res = (VkVideoPictureResourceInfoKHR) {
>> +        .sType = VK_STRUCTURE_TYPE_VIDEO_PICTURE_RESOURCE_INFO_KHR,
>> +        .pNext = NULL,
>> +        .codedOffset = { 0 },
>> +        .codedExtent = (VkExtent2D){ ctx->base.surface_width,
>> +                                     ctx->base.surface_height },
>> +        .baseArrayLayer = 0,
>> +        .imageViewBinding = vp->dpb.view,
>> +    };
>> +
>> +    for (int i = 0; i < ctx->caps.maxDpbSlots; i++) {
>> +        if (!ctx->slots[i]) {
>> +            slot_index = i;
>> +            ctx->slots[i] = 1;
>> +            break;
>> +        }
>> +    }
>> +    av_assert0(slot_index >= 0);
>> +
>> +    vp->dpb_slot = (VkVideoReferenceSlotInfoKHR) {
>> +        .sType = VK_STRUCTURE_TYPE_VIDEO_REFERENCE_SLOT_INFO_KHR,
>> +        .pNext = NULL,  // Set later
>> +        .slotIndex = slot_index,
>> +        .pPictureResource = &vp->dpb_res,
>> +    };
>> +
>> +    encode_info = (VkVideoEncodeInfoKHR) {
>> +        .sType = VK_STRUCTURE_TYPE_VIDEO_ENCODE_INFO_KHR,
>> +        .pNext = NULL, // Set later
>> +        .flags = 0x0,
>> +        .srcPictureResource = (VkVideoPictureResourceInfoKHR) {
>> +            .sType = VK_STRUCTURE_TYPE_VIDEO_PICTURE_RESOURCE_INFO_KHR,
>> +            .pNext = NULL,
>> +            .codedOffset = { 0, 0 },
>> +            .codedExtent = (VkExtent2D){ base_pic->input_image->width,
>> +                                         base_pic->input_image- 
>> >height },
>> +            .baseArrayLayer = 0,
>> +            .imageViewBinding = vp->in.view,
>> +        },
>> +        .pSetupReferenceSlot = &vp->dpb_slot,
>> +        .referenceSlotCount = 0,
>> +        .pReferenceSlots = ref_slot,
>> +        .dstBuffer = sd_buf->buf,
>> +        .dstBufferOffset = 0,
>> +        .dstBufferRange = sd_buf->size,
>> +        .precedingExternallyEncodedBytes = 0,
>> +    };
>> +
>> +    for (int i = 0; i < MAX_REFERENCE_LIST_NUM; i++) {
>> +        for (int j = 0; j < base_pic->nb_refs[i]; j++) {
>> +            FFHWBaseEncodePicture *ref = base_pic->refs[i][j];
>> +            FFVulkanEncodePicture *rvp = ref->priv;
>> +            ref_slot[encode_info.referenceSlotCount++] = rvp->dpb_slot;
>> +        }
>> +    }
>> +
>> +    /* Setup picture parameters */
>> +    err = ctx->codec->init_pic_params(avctx, base_pic,
>> +                                      &encode_info);
>> +    if (err < 0)
>> +        return err;
>> +
>> +    encode_start = (VkVideoBeginCodingInfoKHR) {
>> +        .sType = VK_STRUCTURE_TYPE_VIDEO_BEGIN_CODING_INFO_KHR,
>> +        .pNext = !base_pic->force_idr ? &rc_info : NULL,
>> +        .videoSession = ctx->common.session,
>> +        .videoSessionParameters = ctx->session_params,
>> +        .referenceSlotCount = encode_info.referenceSlotCount,
>> +        .pReferenceSlots = ref_slot,
>> +    };
>> +
>> +    /* Calling vkCmdBeginVideoCodingKHR requires to declare all 
>> references
>> +     * being enabled upfront, including the current frame's output ref.
>> +     * If layered DBPs are used, make sure its not included twice. */
>> +    if (!ctx->common.layered_dpb || !encode_info.referenceSlotCount) {
>> +        ref_slot[encode_info.referenceSlotCount] = vp->dpb_slot;
>> +        ref_slot[encode_info.referenceSlotCount].slotIndex = -1;
>> +        encode_start.referenceSlotCount++;
>> +    }
>> +
>> +    /* Write header */
>> +    if (base_pic->type == FF_HW_PICTURE_TYPE_IDR) {
>> +        uint8_t *hdr_dst = sd_buf->mapped_mem + 
>> encode_info.dstBufferOffset;
>> +        size_t data_size = encode_info.dstBufferRange;
>> +        err = ctx->codec->write_sequence_headers(avctx, base_pic, 
>> hdr_dst, &data_size);
>> +        if (err < 0)
>> +            goto fail;
>> +        encode_info.dstBufferOffset += data_size;
>> +        encode_info.dstBufferRange  -= data_size;
>> +    }
>> +
>> +    /* Write extra units */
>> +    if (ctx->codec->write_extra_headers) {
>> +        uint8_t *hdr_dst = sd_buf->mapped_mem + 
>> encode_info.dstBufferOffset;
>> +        size_t data_size = encode_info.dstBufferRange;
>> +        err = ctx->codec->write_extra_headers(avctx, base_pic, 
>> hdr_dst, &data_size);
>> +        if (err < 0)
>> +            goto fail;
>> +        encode_info.dstBufferOffset += data_size;
>> +        encode_info.dstBufferRange  -= data_size;
>> +    }
>> +
>> +    /* Align buffer offset to the required value with filler units */
>> +    if (ctx->codec->write_filler) {
>> +        uint8_t *hdr_dst = sd_buf->mapped_mem + 
>> encode_info.dstBufferOffset;
>> +        size_t data_size = encode_info.dstBufferRange;
>> +
>> +        uint32_t offset = encode_info.dstBufferOffset;
>> +        size_t offset_align = ctx- 
>> >caps.minBitstreamBufferOffsetAlignment;
>> +
>> +        uint32_t filler_data = FFALIGN(offset, offset_align) - offset;
>> +
>> +        if (filler_data) {
>> +            while (filler_data < ctx->codec->filler_header_size)
>> +                filler_data += offset_align;
>> +
>> +            filler_data -= ctx->codec->filler_header_size;
>> +
>> +            err = ctx->codec->write_filler(avctx, filler_data,
>> +                                           hdr_dst, &data_size);
>> +            if (err < 0)
>> +                goto fail;
>> +
>> +            encode_info.dstBufferOffset += data_size;
>> +            encode_info.dstBufferRange  -= data_size;
>> +        }
>> +    }
>> +
>> +    vp->slices_offset = encode_info.dstBufferOffset;
>> +
>> +    /* Align buffer size to the nearest lower alignment requirement. */
>> +    encode_info.dstBufferRange -= size_align;
>> +    encode_info.dstBufferRange = FFALIGN(encode_info.dstBufferRange,
>> +                                         size_align);
>> +
>> +    /* Start command buffer recording */
>> +    exec = vp->exec = ff_vk_exec_get(&ctx->enc_pool);
>> +    ff_vk_exec_start(&ctx->s, exec);
>> +    cmd_buf = exec->buf;
>> +
>> +    /* Output packet buffer */
>> +    err = ff_vk_exec_add_dep_buf(&ctx->s, exec, &vp->pkt_buf, 1, 1);
>> +    if (err < 0)
>> +        goto fail;
>> +
>> +    /* Source image */
>> +    err = ff_vk_exec_add_dep_frame(&ctx->s, exec, src,
>> +                                   VK_PIPELINE_STAGE_2_ALL_COMMANDS_BIT,
>> +                                   
>> VK_PIPELINE_STAGE_2_VIDEO_ENCODE_BIT_KHR);
>> +    if (err < 0)
>> +        goto fail;
>> +
>> +    /* Source image layout conversion */
>> +    img_bar[nb_img_bar] = (VkImageMemoryBarrier2) {
>> +        .sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER_2,
>> +        .pNext = NULL,
>> +        .srcStageMask = VK_PIPELINE_STAGE_2_ALL_COMMANDS_BIT,
>> +        .srcAccessMask = vkf->access[0],
>> +        .dstStageMask = VK_PIPELINE_STAGE_2_VIDEO_ENCODE_BIT_KHR,
>> +        .dstAccessMask = VK_ACCESS_2_VIDEO_ENCODE_READ_BIT_KHR,
>> +        .oldLayout = vkf->layout[0],
>> +        .newLayout = VK_IMAGE_LAYOUT_VIDEO_ENCODE_SRC_KHR,
>> +        .srcQueueFamilyIndex = vkf->queue_family[0],
>> +        .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
>> +        .image = vkf->img[0],
>> +        .subresourceRange = (VkImageSubresourceRange) {
>> +            .aspectMask = vp->in.aspect,
>> +            .layerCount = 1,
>> +            .levelCount = 1,
>> +        },
>> +    };
>> +    ff_vk_exec_update_frame(&ctx->s, exec, src,
>> +                            &img_bar[nb_img_bar], &nb_img_bar);
>> +
>> +    if (!ctx->common.layered_dpb) {
>> +        /* Source image's ref slot.
>> +         * No need to do a layout conversion, since the frames which 
>> are allocated
>> +         * with a DPB usage are automatically converted. */
>> +        err = ff_vk_exec_add_dep_frame(&ctx->s, exec, base_pic- 
>> >recon_image,
>> +                                       
>> VK_PIPELINE_STAGE_2_ALL_COMMANDS_BIT,
>> +                                       
>> VK_PIPELINE_STAGE_2_VIDEO_ENCODE_BIT_KHR);
>> +        if (err < 0)
>> +            return err;
>> +
>> +        /* All references */
>> +        for (int i = 0; i < MAX_REFERENCE_LIST_NUM; i++) {
>> +            for (int j = 0; j < base_pic->nb_refs[i]; j++) {
>> +                FFHWBaseEncodePicture *ref = base_pic->refs[i][j];
>> +                err = ff_vk_exec_add_dep_frame(&ctx->s, exec, ref- 
>> >recon_image,
>> +                                               
>> VK_PIPELINE_STAGE_2_ALL_COMMANDS_BIT,
>> +                                               
>> VK_PIPELINE_STAGE_2_VIDEO_ENCODE_BIT_KHR);
>> +                if (err < 0)
>> +                    return err;
>> +            }
>> +        }
>> +    } else {
>> +        err = ff_vk_exec_add_dep_frame(&ctx->s, exec, ctx- 
>> >common.layered_frame,
>> +                                       
>> VK_PIPELINE_STAGE_2_VIDEO_ENCODE_BIT_KHR,
>> +                                       
>> VK_PIPELINE_STAGE_2_VIDEO_ENCODE_BIT_KHR);
>> +        if (err < 0)
>> +            return err;
>> +    }
>> +
>> +    /* Change image layout */
>> +    vk->CmdPipelineBarrier2(cmd_buf, &(VkDependencyInfo) {
>> +            .sType = VK_STRUCTURE_TYPE_DEPENDENCY_INFO,
>> +            .pImageMemoryBarriers = img_bar,
>> +            .imageMemoryBarrierCount = nb_img_bar,
>> +        });
>> +
>> +    /* Start, use parameters */
>> +    vk->CmdBeginVideoCodingKHR(cmd_buf, &encode_start);
>> +
>> +    /* Send control data */
>> +    if (base_pic->force_idr)
>> +        vk->CmdControlVideoCodingKHR(cmd_buf, &encode_ctrl);
>> +
>> +    /* Encode */
>> +    vk->CmdBeginQuery(cmd_buf, ctx->enc_pool.query_pool, exec- 
>> >query_idx + 0, 0);
>> +    vk->CmdEncodeVideoKHR(cmd_buf, &encode_info);
>> +    vk->CmdEndQuery(cmd_buf, ctx->enc_pool.query_pool, exec- 
>> >query_idx + 0);
>> +
>> +    /* End encoding */
>> +    vk->CmdEndVideoCodingKHR(cmd_buf, &encode_end);
>> +
>> +    /* End recording and submit for execution */
>> +    err = ff_vk_exec_submit(&ctx->s, vp->exec);
>> +    if (err < 0)
>> +        goto fail;
>> +
>> +    /* We don't need to keep the input image any longer, its already 
>> ref'd */
>> +    av_frame_free(&base_pic->input_image);
>> +
>> +    return 0;
>> +
>> +fail:
>> +    return err;
>> +}
>> +
>> +static void vulkan_encode_wait(AVCodecContext *avctx,
>> +                               FFHWBaseEncodePicture *base_pic)
>> +{
>> +    FFVulkanEncodeContext *ctx = avctx->priv_data;
>> +    FFVulkanEncodePicture *vp = base_pic->priv;
>> +
>> +    av_assert0(base_pic->encode_issued);
>> +
>> +    if (base_pic->encode_complete)
>> +        return;
>> +
>> +    ff_vk_exec_wait(&ctx->s, vp->exec);
>> +    base_pic->encode_complete = 1;
>> +}
>> +
>> +static int vulkan_encode_output(AVCodecContext *avctx,
>> +                                FFHWBaseEncodePicture *base_pic, 
>> AVPacket *pkt)
>> +{
>> +    VkResult ret;
>> +    FFVulkanEncodePicture *vp = base_pic->priv;
>> +    FFVulkanEncodeContext *ctx = avctx->priv_data;
>> +    FFVkBuffer *sd_buf = (FFVkBuffer *)vp->pkt_buf->data;
>> +    uint32_t *query_data;
>> +
>> +    vulkan_encode_wait(avctx, base_pic);
>> +
>> +    ret = ff_vk_exec_get_query(&ctx->s, vp->exec, (void 
>> **)&query_data, 0);
>> +    if (ret == VK_NOT_READY) {
>> +        av_log(avctx, AV_LOG_ERROR, "Unable to perform query: %s!\n",
>> +               ff_vk_ret2str(ret));
>> +        return AVERROR(EINVAL);
>> +    }
>> +
>> +    if (ret != VK_NOT_READY && ret != VK_SUCCESS) {
>> +        av_log(avctx, AV_LOG_ERROR, "Unable to perform query: %s!\n",
>> +               ff_vk_ret2str(ret));
>> +        return AVERROR_EXTERNAL;
>> +    }
>> +
>> +    if (query_data[2] != VK_QUERY_RESULT_STATUS_COMPLETE_KHR) {
>> +        av_log(avctx, AV_LOG_ERROR, "Unable to encode: %u\n", 
>> query_data[2]);
>> +        return AVERROR_EXTERNAL;
>> +    }
>> +
>> +    /* Invalidate buffer if needed */
>> +    if (!(sd_buf->flags & VK_MEMORY_PROPERTY_HOST_COHERENT_BIT)) {
>> +        FFVulkanFunctions *vk = &ctx->s.vkfn;
>> +        VkMappedMemoryRange invalidate_buf;
>> +
>> +        int offs = vp->slices_offset;
>> +        /* If the non-coherent alignment is greater than the 
>> bitstream buffer
>> +         * offset's alignment, and the offs value is not aligned 
>> already,
>> +         * align it to the previous alignment point. */
>> +        if (ctx->s.props.properties.limits.nonCoherentAtomSize >
>> +            ctx->caps.minBitstreamBufferOffsetAlignment && offs &&
>> +            (FFALIGN(offs, ctx- 
>> >s.props.properties.limits.nonCoherentAtomSize) != offs)) {
>> +            offs -= ctx->s.props.properties.limits.nonCoherentAtomSize;
>> +            offs = FFALIGN(FFMAX(offs, 0), ctx- 
>> >s.props.properties.limits.nonCoherentAtomSize);
>> +        }
>> +
>> +        invalidate_buf = (VkMappedMemoryRange) {
>> +            .sType = VK_STRUCTURE_TYPE_MAPPED_MEMORY_RANGE,
>> +            .memory = sd_buf->mem,
>> +            .offset = offs,
>> +            .size = VK_WHOLE_SIZE,
>> +        };
>> +
>> +        vk->FlushMappedMemoryRanges(ctx->s.hwctx->act_dev, 1, 
>> &invalidate_buf);
>> +    }
>> +
>> +    pkt->data = sd_buf->mapped_mem;
>> +    pkt->size = vp->slices_offset + /* base offset */
>> +                query_data[0]       /* secondary offset */ +
>> +                query_data[1]       /* size */;
>> +
>> +    /* Move reference */
>> +    pkt->buf = vp->pkt_buf;
>> +    vp->pkt_buf = NULL;
>> +
>> +    av_log(avctx, AV_LOG_DEBUG, "Frame %"PRId64"/%"PRId64 " encoded\n",
>> +           base_pic->display_order, base_pic->encode_order);
>> +
>> +    ff_hw_base_encode_set_output_property(&ctx->base, avctx,
>> +                                          base_pic, pkt,
>> +                                          ctx->codec->flags & 
>> VK_ENC_FLAG_NO_DELAY);
>> +
>> +    return 0;
>> +}
>> +
>> +static const FFHWEncodePictureOperation vulkan_base_encode_ops = {
>> +    .priv_size = sizeof(FFVulkanEncodePicture),
>> +    .init   = &vulkan_encode_init,
>> +    .issue  = &vulkan_encode_issue,
>> +    .output = &vulkan_encode_output,
>> +    .free   = &vulkan_encode_free,
>> +};
>> +
>> +int ff_vulkan_encode_receive_packet(AVCodecContext *avctx, AVPacket 
>> *pkt)
>> +{
>> +    FFVulkanEncodeContext *ctx = avctx->priv_data;
>> +    return ff_hw_base_encode_receive_packet(&ctx->base, avctx, pkt);
>> +}
>> +
>> +static int vulkan_encode_create_dpb(AVCodecContext *avctx, 
>> FFVulkanEncodeContext *ctx)
>> +{
>> +    int err;
>> +    FFHWBaseEncodeContext *base_ctx = &ctx->base;
>> +    AVVulkanFramesContext *hwfc;
>> +
>> +    enum AVPixelFormat dpb_format;
>> +    err = ff_hw_base_get_recon_format(base_ctx, NULL, &dpb_format);
>> +    if (err < 0)
>> +        return err;
>> +
>> +    base_ctx->recon_frames_ref = av_hwframe_ctx_alloc(base_ctx- 
>> >device_ref);
>> +    if (!base_ctx->recon_frames_ref)
>> +        return AVERROR(ENOMEM);
>> +
>> +    base_ctx->recon_frames = (AVHWFramesContext *)base_ctx- 
>> >recon_frames_ref->data;
>> +    hwfc = (AVVulkanFramesContext *)base_ctx->recon_frames->hwctx;
>> +
>> +    base_ctx->recon_frames->format    = AV_PIX_FMT_VULKAN;
>> +    base_ctx->recon_frames->sw_format = dpb_format;
>> +    base_ctx->recon_frames->width     = base_ctx->surface_width;
>> +    base_ctx->recon_frames->height    = base_ctx->surface_height;
>> +
>> +    hwfc->format[0]    = ctx->pic_format;
>> +    hwfc->create_pnext = &ctx->profile_list;
>> +    hwfc->tiling       = VK_IMAGE_TILING_OPTIMAL;
>> +    hwfc->usage        = VK_IMAGE_USAGE_SAMPLED_BIT              |
>> +                         VK_IMAGE_USAGE_VIDEO_ENCODE_DPB_BIT_KHR;
>> +
>> +    if (ctx->common.layered_dpb)
>> +        hwfc->nb_layers = ctx->caps.maxDpbSlots;
>> +
>> +    err = av_hwframe_ctx_init(base_ctx->recon_frames_ref);
>> +    if (err < 0) {
>> +        av_log(avctx, AV_LOG_ERROR, "Failed to initialise DPB frame 
>> context: %s\n",
>> +               av_err2str(err));
>> +        return err;
>> +    }
>> +
>> +    if (ctx->common.layered_dpb) {
>> +        ctx->common.layered_frame = av_frame_alloc();
>> +        if (!ctx->common.layered_frame)
>> +            return AVERROR(ENOMEM);
>> +
>> +        err = av_hwframe_get_buffer(base_ctx->recon_frames_ref,
>> +                                    ctx->common.layered_frame, 0);
>> +        if (err < 0)
>> +            return AVERROR(ENOMEM);
>> +
>> +        err = ff_vk_create_view(&ctx->s, &ctx->common,
>> +                                &ctx->common.layered_view,
>> +                                &ctx->common.layered_aspect,
>> +                                (AVVkFrame *)ctx- 
>> >common.layered_frame->data[0],
>> +                                hwfc->format[0], 1);
>> +        if (err < 0)
>> +            return err;
>> +
>> +        av_buffer_unref(&base_ctx->recon_frames_ref);
>> +    }
>> +
>> +    return 0;
>> +}
>> +
>> +static av_cold int init_rc(AVCodecContext *avctx, 
>> FFVulkanEncodeContext *ctx)
>> +{
>> +    if (ctx->opts.rc_mode == FF_VK_RC_MODE_AUTO) {
>> +        if (ctx->opts.qp >= 0) {
>> +            ctx->opts.rc_mode = 
>> VK_VIDEO_ENCODE_RATE_CONTROL_MODE_DISABLED_BIT_KHR;
>> +        } else if (avctx->global_quality > 0) {
>> +            if (avctx->flags & AV_CODEC_FLAG_QSCALE)
>> +                ctx->opts.qp = avctx->global_quality / FF_QP2LAMBDA;
>> +            else
>> +                ctx->opts.qp = avctx->global_quality;
>> +            ctx->opts.rc_mode = 
>> VK_VIDEO_ENCODE_RATE_CONTROL_MODE_DISABLED_BIT_KHR;
>> +        } else if (avctx->bit_rate) {
>> +            if (ctx->enc_caps.rateControlModes & 
>> VK_VIDEO_ENCODE_RATE_CONTROL_MODE_VBR_BIT_KHR)
>> +                ctx->opts.rc_mode = 
>> VK_VIDEO_ENCODE_RATE_CONTROL_MODE_VBR_BIT_KHR;
>> +            else if (ctx->enc_caps.rateControlModes & 
>> VK_VIDEO_ENCODE_RATE_CONTROL_MODE_CBR_BIT_KHR)
>> +                ctx->opts.rc_mode = 
>> VK_VIDEO_ENCODE_RATE_CONTROL_MODE_CBR_BIT_KHR;
>> +            else
>> +                ctx->opts.rc_mode = 
>> VK_VIDEO_ENCODE_RATE_CONTROL_MODE_DEFAULT_KHR;
>> +        } else {
>> +            ctx->opts.qp = 18;
>> +            ctx->opts.rc_mode = 
>> VK_VIDEO_ENCODE_RATE_CONTROL_MODE_DISABLED_BIT_KHR;
>> +            av_log(avctx, AV_LOG_WARNING, "No rate control settings 
>> specified, using fixed QP = %i\n",
>> +                   ctx->opts.qp);
>> +        }
>> +    }
>> +
>> +    if (ctx->opts.rc_mode && !(ctx->enc_caps.rateControlModes & ctx- 
>> >opts.rc_mode)) {
>> +        static const char *rc_modes[] = {
>> +            [VK_VIDEO_ENCODE_RATE_CONTROL_MODE_DEFAULT_KHR] = "default",
>> +            [VK_VIDEO_ENCODE_RATE_CONTROL_MODE_DISABLED_BIT_KHR] = 
>> "cqp",
>> +            [VK_VIDEO_ENCODE_RATE_CONTROL_MODE_CBR_BIT_KHR] = "cbr",
>> +            [VK_VIDEO_ENCODE_RATE_CONTROL_MODE_VBR_BIT_KHR] = "vbr",
>> +        };
>> +        av_log(avctx, AV_LOG_ERROR, "Unsupported rate control mode 
>> %i, supported are:\n",
>> +               ctx->opts.rc_mode);
>> +        for (int i = 0; i < av_popcount(ctx- 
>> >enc_caps.rateControlModes); i++) {
>> +            if (!(ctx->enc_caps.rateControlModes & (1 << i)))
>> +                continue;
>> +            av_log(avctx, AV_LOG_ERROR, "    %i: %s\n", i, rc_modes[i]);
>> +        }
>> +        return AVERROR(ENOTSUP);
>> +    }
>> +
>> +    return 0;
>> +}
>> +
>> +av_cold int ff_vulkan_encode_init(AVCodecContext *avctx, 
>> FFVulkanEncodeContext *ctx,
>> +                                  const FFVulkanEncodeDescriptor 
>> *vk_desc,
>> +                                  const FFVulkanCodec *codec,
>> +                                  void *codec_caps, void *quality_pnext)
>> +{
>> +    int i, err;
>> +    VkResult ret;
>> +    FFVulkanFunctions *vk = &ctx->s.vkfn;
>> +    FFVulkanContext *s = &ctx->s;
>> +    FFHWBaseEncodeContext *base_ctx = &ctx->base;
>> +
>> +    const AVPixFmtDescriptor *desc;
>> +
>> +    VkVideoFormatPropertiesKHR *ret_info;
>> +    uint32_t nb_out_fmts = 0;
>> +
>> +    VkPhysicalDeviceVideoEncodeQualityLevelInfoKHR quality_info;
>> +
>> +    VkQueryPoolVideoEncodeFeedbackCreateInfoKHR query_create;
>> +
>> +    VkVideoSessionCreateInfoKHR session_create = {
>> +        .sType = VK_STRUCTURE_TYPE_VIDEO_SESSION_CREATE_INFO_KHR,
>> +    };
>> +    VkPhysicalDeviceVideoFormatInfoKHR fmt_info = {
>> +        .sType = 
>> VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VIDEO_FORMAT_INFO_KHR,
>> +        .pNext = &ctx->profile_list,
>> +    };
>> +
>> +    if (!avctx->hw_frames_ctx) {
>> +        av_log(avctx, AV_LOG_ERROR, "A hardware frames reference is "
>> +               "required to associate the encoding device.\n");
>> +        return AVERROR(EINVAL);
>> +    }
>> +
>> +    ctx->base.op = &vulkan_base_encode_ops;
>> +    ctx->codec = codec;
>> +
>> +    s->frames_ref = av_buffer_ref(avctx->hw_frames_ctx);
>> +    s->frames = (AVHWFramesContext *)s->frames_ref->data;
>> +    s->hwfc = s->frames->hwctx;
>> +
>> +    s->device = (AVHWDeviceContext *)s->frames->device_ref->data;
>> +    s->hwctx = s->device->hwctx;
>> +
>> +    desc = av_pix_fmt_desc_get(avctx->sw_pix_fmt);
>> +    if (!desc)
>> +        return AVERROR(EINVAL);
>> +
>> +    s->extensions = ff_vk_extensions_to_mask(s->hwctx- 
>> >enabled_dev_extensions,
>> +                                             s->hwctx- 
>> >nb_enabled_dev_extensions);
>> +
>> +    if (!(s->extensions & FF_VK_EXT_VIDEO_ENCODE_QUEUE)) {
>> +        av_log(avctx, AV_LOG_ERROR, "Device does not support the %s 
>> extension!\n",
>> +               VK_KHR_VIDEO_ENCODE_QUEUE_EXTENSION_NAME);
>> +        return AVERROR(ENOSYS);
>> +    } else if (!(s->extensions & FF_VK_EXT_VIDEO_MAINTENANCE_1)) {
>> +        av_log(avctx, AV_LOG_ERROR, "Device does not support the %s 
>> extension!\n",
>> +               VK_KHR_VIDEO_MAINTENANCE_1_EXTENSION_NAME);
>> +        return AVERROR(ENOSYS);
>> +    } else if (!(s->extensions & vk_desc->encode_extension)) {
>> +        av_log(avctx, AV_LOG_ERROR, "Device does not support decoding 
>> %s!\n",
>> +               avcodec_get_name(avctx->codec_id));
>> +        return AVERROR(ENOSYS);
>> +    }
>> +
>> +    /* Load functions */
>> +    err = ff_vk_load_functions(s->device, vk, s->extensions, 1, 1);
>> +    if (err < 0)
>> +        return err;
>> +
>> +    /* Create queue context */
>> +    err = ff_vk_video_qf_init(s, &ctx->qf_enc,
>> +                              VK_QUEUE_VIDEO_ENCODE_BIT_KHR,
>> +                              vk_desc->encode_op);
>> +    if (err < 0) {
>> +        av_log(avctx, AV_LOG_ERROR, "Decoding of %s is not supported 
>> by this device\n",
>> +               avcodec_get_name(avctx->codec_id));
>> +        return err;
>> +    }
>> +
>> +    /* Load all properties */
>> +    err = ff_vk_load_props(s);
>> +    if (err < 0)
>> +        return err;
>> +
>> +    /* Set tuning */
>> +    ctx->usage_info = (VkVideoEncodeUsageInfoKHR) {
>> +        .sType             = 
>> VK_STRUCTURE_TYPE_VIDEO_ENCODE_USAGE_INFO_KHR,
>> +        .videoUsageHints   = ctx->opts.usage,
>> +        .videoContentHints = ctx->opts.content,
>> +        .tuningMode        = ctx->opts.tune,
>> +    };
>> +
>> +    /* Load up the profile now, needed for caps and to create a query 
>> pool */
>> +    ctx->profile.sType               = 
>> VK_STRUCTURE_TYPE_VIDEO_PROFILE_INFO_KHR;
>> +    ctx->profile.pNext               = &ctx->usage_info;
>> +    ctx->profile.videoCodecOperation = vk_desc->encode_op;
>> +    ctx->profile.chromaSubsampling   = 
>> ff_vk_subsampling_from_av_desc(desc);
>> +    ctx->profile.lumaBitDepth        = 
>> ff_vk_depth_from_av_depth(desc->comp[0].depth);
>> +    ctx->profile.chromaBitDepth      = ctx->profile.lumaBitDepth;
>> +
>> +    /* Setup a profile */
>> +    err = codec->init_profile(avctx, &ctx->profile, &ctx->usage_info);
>> +    if (err < 0)
>> +        return err;
>> +
>> +    ctx->profile_list.sType        = 
>> VK_STRUCTURE_TYPE_VIDEO_PROFILE_LIST_INFO_KHR;
>> +    ctx->profile_list.profileCount = 1;
>> +    ctx->profile_list.pProfiles    = &ctx->profile;
>> +
>> +    /* Get the capabilities of the encoder for the given profile */
>> +    ctx->enc_caps.sType = 
>> VK_STRUCTURE_TYPE_VIDEO_ENCODE_CAPABILITIES_KHR;
>> +    ctx->enc_caps.pNext = codec_caps;
>> +    ctx->caps.sType = VK_STRUCTURE_TYPE_VIDEO_CAPABILITIES_KHR;
>> +    ctx->caps.pNext = &ctx->enc_caps;
>> +
>> +    ret = vk->GetPhysicalDeviceVideoCapabilitiesKHR(s->hwctx->phys_dev,
>> +                                                    &ctx->profile,
>> +                                                    &ctx->caps);
>> +    if (ret == VK_ERROR_VIDEO_PROFILE_OPERATION_NOT_SUPPORTED_KHR) {
>> +        av_log(avctx, AV_LOG_ERROR, "Unable to initialize encoding: "
>> +               "%s profile \"%s\" not supported!\n",
>> +               avcodec_get_name(avctx->codec_id),
>> +               avcodec_profile_name(avctx->codec_id, avctx->profile));
>> +        return AVERROR(EINVAL);
>> +    } else if (ret == VK_ERROR_VIDEO_PROFILE_FORMAT_NOT_SUPPORTED_KHR) {
>> +        av_log(avctx, AV_LOG_ERROR, "Unable to initialize encoding: "
>> +               "format (%s) not supported!\n",
>> +               av_get_pix_fmt_name(avctx->sw_pix_fmt));
>> +        return AVERROR(EINVAL);
>> +    } else if (ret == VK_ERROR_FEATURE_NOT_PRESENT ||
>> +               ret == VK_ERROR_FORMAT_NOT_SUPPORTED) {
>> +        return AVERROR(EINVAL);
>> +    } else if (ret != VK_SUCCESS) {
>> +        return AVERROR_EXTERNAL;
>> +    }
>> +
>> +    err = init_rc(avctx, ctx);
>> +    if (err < 0)
>> +        return err;
>> +
>> +    /* Create command and query pool */
>> +    query_create = (VkQueryPoolVideoEncodeFeedbackCreateInfoKHR) {
>> +        .sType = 
>> VK_STRUCTURE_TYPE_QUERY_POOL_VIDEO_ENCODE_FEEDBACK_CREATE_INFO_KHR,
>> +        .pNext = &ctx->profile,
>> +        .encodeFeedbackFlags = ctx- 
>> >enc_caps.supportedEncodeFeedbackFlags,
>> +    };
>> +    err = ff_vk_exec_pool_init(s, &ctx->qf_enc, &ctx->enc_pool, 
>> base_ctx->async_depth,
>> +                               1, 
>> VK_QUERY_TYPE_VIDEO_ENCODE_FEEDBACK_KHR, 0,
>> +                               &query_create);
>> +    if (err < 0)
>> +        return err;
>> +
>> +    if (ctx->opts.quality > ctx->enc_caps.maxQualityLevels) {
>> +        av_log(avctx, AV_LOG_ERROR, "Invalid quality level %i: 
>> allowed range is "
>> +                                    "0 to %i\n",
>> +               ctx->opts.quality, ctx->enc_caps.maxQualityLevels);
>> +        return AVERROR(EINVAL);
>> +    }
>> +
>> +    /* Get quality properties for the profile and quality level */
>> +    quality_info = (VkPhysicalDeviceVideoEncodeQualityLevelInfoKHR) {
>> +        .sType = 
>> VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VIDEO_ENCODE_QUALITY_LEVEL_INFO_KHR,
>> +        .pVideoProfile = &ctx->profile,
>> +        .qualityLevel = ctx->opts.quality,
>> +    };
>> +    ctx->quality_props = (VkVideoEncodeQualityLevelPropertiesKHR) {
>> +        .sType = 
>> VK_STRUCTURE_TYPE_VIDEO_ENCODE_QUALITY_LEVEL_PROPERTIES_KHR,
>> +        .pNext = quality_pnext,
>> +    };
>> +    ret = vk- 
>> >GetPhysicalDeviceVideoEncodeQualityLevelPropertiesKHR(s->hwctx- 
>> >phys_dev,
>> +                                                                    
>> &quality_info,
>> +                                                                    
>> &ctx->quality_props);
>> +    if (ret != VK_SUCCESS)
>> +        return AVERROR_EXTERNAL;
>> +
>> +    /* Printout informative properties */
>> +    av_log(avctx, AV_LOG_VERBOSE, "Encoder capabilities for %s 
>> profile \"%s\":\n",
>> +           avcodec_get_name(avctx->codec_id),
>> +           avcodec_profile_name(avctx->codec_id, avctx->profile));
>> +    av_log(avctx, AV_LOG_VERBOSE, "    Width: from %i to %i\n",
>> +           ctx->caps.minCodedExtent.width, ctx- 
>> >caps.maxCodedExtent.width);
>> +    av_log(avctx, AV_LOG_VERBOSE, "    Height: from %i to %i\n",
>> +           ctx->caps.minCodedExtent.height, ctx- 
>> >caps.maxCodedExtent.height);
>> +    av_log(avctx, AV_LOG_VERBOSE, "    Width alignment: %i\n",
>> +           ctx->caps.pictureAccessGranularity.width);
>> +    av_log(avctx, AV_LOG_VERBOSE, "    Height alignment: %i\n",
>> +           ctx->caps.pictureAccessGranularity.height);
>> +    av_log(avctx, AV_LOG_VERBOSE, "    Bitstream offset alignment: 
>> %"PRIu64"\n",
>> +           ctx->caps.minBitstreamBufferOffsetAlignment);
>> +    av_log(avctx, AV_LOG_VERBOSE, "    Bitstream size alignment: 
>> %"PRIu64"\n",
>> +           ctx->caps.minBitstreamBufferSizeAlignment);
>> +    av_log(avctx, AV_LOG_VERBOSE, "    Maximum references: %u\n",
>> +           ctx->caps.maxDpbSlots);
>> +    av_log(avctx, AV_LOG_VERBOSE, "    Maximum active references: %u\n",
>> +           ctx->caps.maxActiveReferencePictures);
>> +    av_log(avctx, AV_LOG_VERBOSE, "    Codec header version: %i.%i.%i 
>> (driver), %i.%i.%i (compiled)\n",
>> +           CODEC_VER(ctx->caps.stdHeaderVersion.specVersion),
>> +           CODEC_VER(vk_desc->ext_props.specVersion));
>> +    av_log(avctx, AV_LOG_VERBOSE, "    Encoder max quality: %i\n",
>> +           ctx->enc_caps.maxQualityLevels);
>> +    av_log(avctx, AV_LOG_VERBOSE, "    Encoder image width alignment: 
>> %i\n",
>> +           ctx->enc_caps.encodeInputPictureGranularity.width);
>> +    av_log(avctx, AV_LOG_VERBOSE, "    Encoder image height 
>> alignment: %i\n",
>> +           ctx->enc_caps.encodeInputPictureGranularity.height);
>> +    av_log(avctx, AV_LOG_VERBOSE, "    Capability flags:%s%s%s\n",
>> +           ctx->caps.flags ? "" :
>> +               " none",
>> +           ctx->caps.flags & 
>> VK_VIDEO_CAPABILITY_PROTECTED_CONTENT_BIT_KHR ?
>> +               " protected" : "",
>> +           ctx->caps.flags & 
>> VK_VIDEO_CAPABILITY_SEPARATE_REFERENCE_IMAGES_BIT_KHR ?
>> +               " separate_references" : "");
>> +
>> +    /* Setup width/height alignment */
>> +    base_ctx->surface_width = avctx->coded_width =
>> +        FFALIGN(avctx->width, ctx- 
>> >enc_caps.encodeInputPictureGranularity.width);
>> +    base_ctx->surface_height = avctx->coded_height =
>> +        FFALIGN(avctx->height, ctx- 
>> >enc_caps.encodeInputPictureGranularity.height);
>> +
>> +    /* Check if encoding is possible with the given parameters */
>> +    if (avctx->coded_width  < ctx->caps.minCodedExtent.width   ||
>> +        avctx->coded_height < ctx->caps.minCodedExtent.height  ||
>> +        avctx->coded_width  > ctx->caps.maxCodedExtent.width   ||
>> +        avctx->coded_height > ctx->caps.maxCodedExtent.height) {
>> +        av_log(avctx, AV_LOG_ERROR, "Input of %ix%i too large for 
>> encoder limits: %ix%i max\n",
>> +               avctx->coded_width, avctx->coded_height,
>> +               ctx->caps.minCodedExtent.width, ctx- 
>> >caps.minCodedExtent.height);
>> +        return AVERROR(EINVAL);
>> +    }
>> +
>> +    fmt_info.imageUsage = VK_IMAGE_USAGE_VIDEO_ENCODE_DPB_BIT_KHR |
>> +                          VK_IMAGE_USAGE_VIDEO_ENCODE_DST_BIT_KHR;
>> +
>> +    ctx->common.layered_dpb = !(ctx->caps.flags & 
>> VK_VIDEO_CAPABILITY_SEPARATE_REFERENCE_IMAGES_BIT_KHR);
>> +
>> +    /* Get the supported image formats */
>> +    ret = vk->GetPhysicalDeviceVideoFormatPropertiesKHR(s->hwctx- 
>> >phys_dev,
>> +                                                        &fmt_info,
>> +                                                        &nb_out_fmts, 
>> NULL);
>> +    if (ret == VK_ERROR_FORMAT_NOT_SUPPORTED ||
>> +        (!nb_out_fmts && ret == VK_SUCCESS)) {
>> +        return AVERROR(EINVAL);
>> +    } else if (ret != VK_SUCCESS) {
>> +        av_log(avctx, AV_LOG_ERROR, "Unable to get Vulkan format 
>> properties: %s!\n",
>> +               ff_vk_ret2str(ret));
>> +        return AVERROR_EXTERNAL;
>> +    }
>> +
>> +    ret_info = av_mallocz(sizeof(*ret_info)*nb_out_fmts);
>> +    if (!ret_info)
>> +        return AVERROR(ENOMEM);
>> +
>> +    for (int i = 0; i < nb_out_fmts; i++)
>> +        ret_info[i].sType = 
>> VK_STRUCTURE_TYPE_VIDEO_FORMAT_PROPERTIES_KHR;
>> +
>> +    ret = vk->GetPhysicalDeviceVideoFormatPropertiesKHR(s->hwctx- 
>> >phys_dev,
>> +                                                        &fmt_info,
>> +                                                        &nb_out_fmts, 
>> ret_info);
>> +    if (ret == VK_ERROR_FORMAT_NOT_SUPPORTED ||
>> +        (!nb_out_fmts && ret == VK_SUCCESS)) {
>> +        av_free(ret_info);
>> +        return AVERROR(EINVAL);
>> +    } else if (ret != VK_SUCCESS) {
>> +        av_log(avctx, AV_LOG_ERROR, "Unable to get Vulkan format 
>> properties: %s!\n",
>> +               ff_vk_ret2str(ret));
>> +        av_free(ret_info);
>> +        return AVERROR_EXTERNAL;
>> +    }
>> +
>> +    av_log(avctx, AV_LOG_VERBOSE, "Supported input formats:\n");
>> +    for (i = 0; i < nb_out_fmts; i++)
>> +        av_log(avctx, AV_LOG_VERBOSE, "    %i: %i\n", i, 
>> ret_info[i].format);
>> +
>> +    for (i = 0; i < nb_out_fmts; i++) {
>> +        if (ff_vk_pix_fmt_from_vkfmt(ret_info[i].format) == s- 
>> >frames->sw_format) {
>> +            ctx->pic_format = ret_info[i].format;
>> +            break;
>> +        }
>> +    }
>> +
>> +    av_free(ret_info);
>> +
>> +    if (i == nb_out_fmts) {
>> +        av_log(avctx, AV_LOG_ERROR, "Pixel format %s of input frames 
>> not supported!\n",
>> +               av_get_pix_fmt_name(s->frames->sw_format));
>> +        return AVERROR(EINVAL);
>> +    }
>> +
>> +    /* Create session */
>> +    session_create.pVideoProfile = &ctx->profile;
>> +    session_create.flags = 0x0;
>> +    session_create.queueFamilyIndex = ctx->qf_enc.queue_family;
>> +    session_create.maxCodedExtent = ctx->caps.maxCodedExtent;
>> +    session_create.maxDpbSlots = ctx->caps.maxDpbSlots;
>> +    session_create.maxActiveReferencePictures = ctx- 
>> >caps.maxActiveReferencePictures;
>> +    session_create.pictureFormat = ctx->pic_format;
>> +    session_create.referencePictureFormat = 
>> session_create.pictureFormat;
>> +    session_create.pStdHeaderVersion = &vk_desc->ext_props;
>> +
>> +    err = ff_vk_video_common_init(avctx, s, &ctx->common, 
>> &session_create);
>> +    if (err < 0)
>> +        return err;
>> +
>> +    err = ff_hw_base_encode_init(avctx, &ctx->base);
>> +    if (err < 0)
>> +        return err;
>> +
>> +    err = vulkan_encode_create_dpb(avctx, ctx);
>> +    if (err < 0)
>> +        return err;
>> +
>> +    base_ctx->async_encode = 1;
>> +    base_ctx->encode_fifo = av_fifo_alloc2(base_ctx->async_depth,
>> +                                           
>> sizeof(FFVulkanEncodePicture *), 0);
>> +    if (!base_ctx->encode_fifo)
>> +        return AVERROR(ENOMEM);
>> +
>> +    return 0;
>> +}
>> diff --git a/libavcodec/vulkan_encode.h b/libavcodec/vulkan_encode.h
>> new file mode 100644
>> index 0000000000..995befeca2
>> --- /dev/null
>> +++ b/libavcodec/vulkan_encode.h
>> @@ -0,0 +1,243 @@
>> +/*
>> + * This file is part of FFmpeg.
>> + *
>> + * FFmpeg is free software; you can redistribute it and/or
>> + * modify it under the terms of the GNU Lesser General Public
>> + * License as published by the Free Software Foundation; either
>> + * version 2.1 of the License, or (at your option) any later version.
>> + *
>> + * FFmpeg is distributed in the hope that it will be useful,
>> + * but WITHOUT ANY WARRANTY; without even the implied warranty of
>> + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
>> + * Lesser General Public License for more details.
>> + *
>> + * You should have received a copy of the GNU Lesser General Public
>> + * License along with FFmpeg; if not, write to the Free Software
>> + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 
>> 02110-1301 USA
>> + */
>> +
>> +#ifndef AVCODEC_VULKAN_ENCODE_H
>> +#define AVCODEC_VULKAN_ENCODE_H
>> +
>> +#include "codec_id.h"
>> +#include "internal.h"
>> +
>> +#include "encode.h"
>> +#include "hwconfig.h"
>> +
>> +#include "vulkan_video.h"
>> +#include "hw_base_encode.h"
>> +
>> +typedef struct FFVulkanEncodeDescriptor {
>> +    enum AVCodecID                   codec_id;
>> +    FFVulkanExtensions               encode_extension;
>> +    VkVideoCodecOperationFlagBitsKHR encode_op;
>> +
>> +    VkExtensionProperties ext_props;
>> +} FFVulkanEncodeDescriptor;
>> +
>> +typedef struct FFVulkanEncodePicture {
>> +    FFHWBaseEncodePicture  base;
>> +    VkVideoPictureResourceInfoKHR dpb_res;
>> +    VkVideoReferenceSlotInfoKHR dpb_slot;
>> +
>> +    struct {
>> +        VkImageView        view;
>> +        VkImageAspectFlags aspect;
>> +    } in;
>> +
>> +    struct {
>> +        VkImageView        view;
>> +        VkImageAspectFlags aspect;
>> +    } dpb;
>> +
>> +    void                  *codec_layer;
>> +    void                  *codec_rc_layer;
>> +
>> +    FFVkExecContext       *exec;
>> +    AVBufferRef           *pkt_buf;
>> +    int                    slices_offset;
>> +} FFVulkanEncodePicture;
>> +
>> +/**
>> + * Callback for writing stream-level headers.
>> + */
>> +typedef int (*vkenc_cb_write_stream_headers)(AVCodecContext *avctx,
>> +                                             uint8_t *data, size_t 
>> *data_len);
>> +
>> +/**
>> + * Callback for initializing codec-specific picture headers.
>> + */
>> +typedef int (*vkenc_cb_init_pic_headers)(AVCodecContext *avctx,
>> +                                         FFVulkanEncodePicture *pic);
>> +
>> +/**
>> + * Callback for writing alignment data.
>> + * Align is the value to align offset to.
>> + */
>> +typedef int (*vkenc_cb_write_filler)(AVCodecContext *avctx, uint32_t 
>> filler,
>> +                                     uint8_t *data, size_t *data_len);
>> +
>> +/**
>> + * Callback for writing any extra units requested. data_len must be set
>> + * to the available size, and its value will be overwritten by the 
>> #bytes written
>> + * to the output buffer.
>> + */
>> +typedef int (*vkenc_cb_write_extra_headers)(AVCodecContext *avctx,
>> +                                            FFVulkanEncodePicture *pic,
>> +                                            uint8_t *data, size_t 
>> *data_len);
>> +
>> +typedef struct FFVulkanCodec {
>> +    /**
>> +     * Codec feature flags.
>> +     */
>> +    int flags;
>> +/* Codec output packet without timestamp delay, which means the
>> + * output packet has same PTS and DTS. For AV1. */
>> +#define VK_ENC_FLAG_NO_DELAY 1 << 6
>> +
>> +    /**
>> +     * Size of the codec-specific picture struct.
>> +     */
>> +    size_t picture_priv_data_size;
>> +
>> +    /**
>> +     * Size of the filler header.
>> +     */
>> +    size_t filler_header_size;
>> +
>> +    /**
>> +     * Initialize codec-specific structs in a Vulkan profile.
>> +     */
>> +    int (*init_profile)(AVCodecContext *avctx, VkVideoProfileInfoKHR 
>> *profile,
>> +                        void *pnext);
>> +
>> +    /**
>> +     * Initialize codec-specific rate control structures for a picture.
>> +     */
>> +    int (*init_pic_rc)(AVCodecContext *avctx, FFHWBaseEncodePicture 
>> *pic,
>> +                       VkVideoEncodeRateControlInfoKHR *rc_info,
>> +                       VkVideoEncodeRateControlLayerInfoKHR *rc_layer);
>> +
>> +    /**
>> +     * Initialize codec-specific picture parameters.
>> +     */
>> +    int (*init_pic_params)(AVCodecContext *avctx, 
>> FFHWBaseEncodePicture *pic,
>> +                           VkVideoEncodeInfoKHR *encode_info);
>> +
>> +    /**
>> +     * Callback for writing stream headers.
>> +     */
>> +    int (*write_sequence_headers)(AVCodecContext *avctx,
>> +                                  FFHWBaseEncodePicture *base_pic,
>> +                                  uint8_t *data, size_t *data_len);
>> +
>> +    /**
>> +     * Callback for writing alignment data.
>> +     */
>> +    int (*write_filler)(AVCodecContext *avctx, uint32_t filler,
>> +                        uint8_t *data, size_t *data_len);
>> +
>> +    /**
>> +     * Callback for writing any extra units requested. data_len must 
>> be set
>> +     * to the available size, and its value will be overwritten by 
>> the #bytes written
>> +     * to the output buffer.
>> +     */
>> +    int (*write_extra_headers)(AVCodecContext *avctx, 
>> FFHWBaseEncodePicture *pic,
>> +                               uint8_t *data, size_t *data_len);
>> +} FFVulkanCodec;
>> +
>> +typedef struct FFVkEncodeCommonOptions {
>> +    int qp;
>> +    int quality;
>> +    int profile;
>> +    int level;
>> +    int async_depth;
>> +    VkVideoEncodeUsageFlagBitsKHR usage;
>> +    VkVideoEncodeContentFlagBitsKHR content;
>> +    VkVideoEncodeTuningModeKHR tune;
>> +
>> +    VkVideoEncodeRateControlModeFlagBitsKHR rc_mode;
>> +#define FF_VK_RC_MODE_AUTO 0xFFFFFFFF
>> +} FFVkEncodeCommonOptions;
>> +
>> +typedef struct FFVulkanEncodeContext {
>> +    FFVulkanContext s;
>> +    FFVkVideoCommon common;
>> +    FFHWBaseEncodeContext base;
>> +    const FFVulkanCodec *codec;
>> +
>> +    /* Session parameters object, initialized by each codec 
>> independently
>> +     * and set here. */
>> +    VkVideoSessionParametersKHR session_params;
>> +
>> +    AVBufferPool *buf_pool;
>> +
>> +    VkFormat pic_format;
>> +
>> +    FFVkEncodeCommonOptions opts;
>> +
>> +    VkVideoProfileInfoKHR profile;
>> +    VkVideoProfileListInfoKHR profile_list;
>> +    VkVideoCapabilitiesKHR caps;
>> +    VkVideoEncodeQualityLevelPropertiesKHR quality_props;
>> +    VkVideoEncodeCapabilitiesKHR enc_caps;
>> +    VkVideoEncodeUsageInfoKHR usage_info;
>> +
>> +    FFVkQueueFamilyCtx qf_enc;
>> +    FFVkExecPool enc_pool;
>> +
>> +    uint32_t slots[32];
>> +} FFVulkanEncodeContext;
>> +
>> +#define VULKAN_ENCODE_COMMON_OPTIONS \
>> +    { "qp", "Use an explicit constant quantizer for the whole 
>> stream", OFFSET(common.opts.qp), AV_OPT_TYPE_INT, { .i64 = -1 }, -1, 
>> 255, FLAGS }, \
>> +    { "quality", "Set encode quality (trades off against speed, 
>> higher is faster)", OFFSET(common.opts.quality), AV_OPT_TYPE_INT, 
>> { .i64 = 0 }, 0, INT_MAX, FLAGS }, \
>> +    { "rc_mode", "Select rate control type", 
>> OFFSET(common.opts.rc_mode), AV_OPT_TYPE_INT, { .i64 = 
>> FF_VK_RC_MODE_AUTO }, 0, FF_VK_RC_MODE_AUTO, FLAGS, "rc_mode" }, \
>> +        { "auto",    "Choose mode automatically based on parameters", 
>> 0, AV_OPT_TYPE_CONST, { .i64 = FF_VK_RC_MODE_AUTO }, INT_MIN, INT_MAX, 
>> FLAGS, "rc_mode" }, \
>> +        { "driver",  "Driver-specific rate control", 0, 
>> AV_OPT_TYPE_CONST, { .i64 = 
>> VK_VIDEO_ENCODE_RATE_CONTROL_MODE_DEFAULT_KHR      }, INT_MIN, 
>> INT_MAX, FLAGS, "rc_mode" }, \
>> +        { "cqp",     "Constant quantizer mode", 0, AV_OPT_TYPE_CONST, 
>> { .i64 = VK_VIDEO_ENCODE_RATE_CONTROL_MODE_DISABLED_BIT_KHR }, 
>> INT_MIN, INT_MAX, FLAGS, "rc_mode" }, \
>> +        { "cbr",     "Constant bitrate mode", 0, AV_OPT_TYPE_CONST, 
>> { .i64 = VK_VIDEO_ENCODE_RATE_CONTROL_MODE_CBR_BIT_KHR      }, 
>> INT_MIN, INT_MAX, FLAGS, "rc_mode" }, \
>> +        { "vbr",     "Variable bitrate mode", 0, AV_OPT_TYPE_CONST, 
>> { .i64 = VK_VIDEO_ENCODE_RATE_CONTROL_MODE_VBR_BIT_KHR      }, 
>> INT_MIN, INT_MAX, FLAGS, "rc_mode" }, \
>> +    { "tune", "Select tuning type", OFFSET(common.opts.tune), 
>> AV_OPT_TYPE_INT, { .i64 = VK_VIDEO_ENCODE_TUNING_MODE_DEFAULT_KHR }, 
>> 0, INT_MAX, FLAGS, "tune" }, \
>> +        { "default",  "Default tuning", 0, AV_OPT_TYPE_CONST, { .i64 
>> = VK_VIDEO_ENCODE_TUNING_MODE_DEFAULT_KHR           }, INT_MIN, 
>> INT_MAX, FLAGS, "tune" }, \
>> +        { "hq",       "High quality tuning", 0, AV_OPT_TYPE_CONST, 
>> { .i64 = VK_VIDEO_ENCODE_TUNING_MODE_HIGH_QUALITY_KHR      }, INT_MIN, 
>> INT_MAX, FLAGS, "tune" }, \
>> +        { "ll",       "Low-latency tuning", 0, AV_OPT_TYPE_CONST, 
>> { .i64 = VK_VIDEO_ENCODE_TUNING_MODE_LOW_LATENCY_KHR       }, INT_MIN, 
>> INT_MAX, FLAGS, "tune" }, \
>> +        { "ull",      "Ultra low-latency tuning", 0, 
>> AV_OPT_TYPE_CONST, { .i64 = 
>> VK_VIDEO_ENCODE_TUNING_MODE_ULTRA_LOW_LATENCY_KHR }, INT_MIN, INT_MAX, 
>> FLAGS, "tune" }, \
>> +        { "lossless", "Lossless mode tuning", 0, AV_OPT_TYPE_CONST, 
>> { .i64 = VK_VIDEO_ENCODE_TUNING_MODE_LOSSLESS_KHR          }, INT_MIN, 
>> INT_MAX, FLAGS, "tune" }, \
>> +    { "usage", "Select usage type", OFFSET(common.opts.usage), 
>> AV_OPT_TYPE_FLAGS, { .i64 = VK_VIDEO_ENCODE_USAGE_DEFAULT_KHR }, 0, 
>> INT_MAX, FLAGS, "usage" }, \
>> +        { "default",    "Default optimizations", 0, 
>> AV_OPT_TYPE_CONST, { .i64 = 
>> VK_VIDEO_ENCODE_USAGE_DEFAULT_KHR          }, INT_MIN, INT_MAX, FLAGS, 
>> "usage" }, \
>> +        { "transcode",  "Optimize for transcoding", 0, 
>> AV_OPT_TYPE_CONST, { .i64 = 
>> VK_VIDEO_ENCODE_USAGE_TRANSCODING_BIT_KHR  }, INT_MIN, INT_MAX, FLAGS, 
>> "usage" }, \
>> +        { "stream",     "Optimize for streaming", 0, 
>> AV_OPT_TYPE_CONST, { .i64 = 
>> VK_VIDEO_ENCODE_USAGE_STREAMING_BIT_KHR    }, INT_MIN, INT_MAX, FLAGS, 
>> "usage" }, \
>> +        { "record",     "Optimize for offline recording", 0, 
>> AV_OPT_TYPE_CONST, { .i64 = 
>> VK_VIDEO_ENCODE_USAGE_RECORDING_BIT_KHR    }, INT_MIN, INT_MAX, FLAGS, 
>> "usage" }, \
>> +        { "conference", "Optimize for teleconferencing", 0, 
>> AV_OPT_TYPE_CONST, { .i64 = 
>> VK_VIDEO_ENCODE_USAGE_CONFERENCING_BIT_KHR }, INT_MIN, INT_MAX, FLAGS, 
>> "usage" }, \
>> +    { "content", "Select content type", OFFSET(common.opts.content), 
>> AV_OPT_TYPE_FLAGS, { .i64 = VK_VIDEO_ENCODE_CONTENT_DEFAULT_KHR }, 0, 
>> INT_MAX, FLAGS, "content" }, \
>> +        { "default",  "Default content", 0, AV_OPT_TYPE_CONST, { .i64 
>> = VK_VIDEO_ENCODE_CONTENT_DEFAULT_KHR      }, INT_MIN, INT_MAX, FLAGS, 
>> "content" }, \
>> +        { "camera",   "Camera footage", 0, AV_OPT_TYPE_CONST, { .i64 
>> = VK_VIDEO_ENCODE_CONTENT_CAMERA_BIT_KHR   }, INT_MIN, INT_MAX, FLAGS, 
>> "content" }, \
>> +        { "desktop",  "Screen recording", 0, AV_OPT_TYPE_CONST, 
>> { .i64 = VK_VIDEO_ENCODE_CONTENT_DESKTOP_BIT_KHR  }, INT_MIN, INT_MAX, 
>> FLAGS, "content" }, \
>> +        { "rendered", "Game or 3D content", 0, AV_OPT_TYPE_CONST, 
>> { .i64 = VK_VIDEO_ENCODE_CONTENT_RENDERED_BIT_KHR }, INT_MIN, INT_MAX, 
>> FLAGS, "content" }
>> +
>> +/**
>> + * Initialize encoder.
>> + */
>> +av_cold int ff_vulkan_encode_init(AVCodecContext *avctx, 
>> FFVulkanEncodeContext *ctx,
>> +                                  const FFVulkanEncodeDescriptor 
>> *vk_desc,
>> +                                  const FFVulkanCodec *codec,
>> +                                  void *codec_caps, void 
>> *quality_pnext);
>> +
>> +/**
>> + * Encode.
>> + */
>> +int ff_vulkan_encode_receive_packet(AVCodecContext *avctx, AVPacket 
>> *pkt);
>> +
>> +/**
>> + * Uninitialize encoder.
>> + */
>> +void ff_vulkan_encode_uninit(FFVulkanEncodeContext *ctx);
>> +
>> +/**
>> + * Paperwork.
>> + */
>> +extern const AVCodecHWConfigInternal *const 
>> ff_vulkan_encode_hw_configs[];
>> +
>> +#endif /* AVCODEC_VULKAN_ENCODE_H */
> 
> Going to push the Vulkan common code and the H.264 encoder tomorrow 
> unless there are objections.

Patchset pushed. HEVC is up next.
The encoder code is rather straightforward once you ignore all structs 
that Vulkan requires you to fill in (but from which drivers end up using 
no more than 10% of).
diff mbox series

Patch

diff --git a/configure b/configure
index a8e67d230c..6cfb736a86 100755
--- a/configure
+++ b/configure
@@ -2638,6 +2638,7 @@  CONFIG_EXTRA="
     vp3dsp
     vp56dsp
     vp8dsp
+    vulkan_encode
     wma_freqs
     wmv2dsp
 "
@@ -3299,6 +3300,7 @@  qsvdec_select="qsv"
 qsvenc_select="qsv"
 qsvvpp_select="qsv"
 vaapi_encode_deps="vaapi"
+vulkan_encode_deps="vulkan"
 v4l2_m2m_deps="linux_videodev2_h sem_timedwait"
 
 bilateral_cuda_filter_deps="ffnvcodec"
diff --git a/libavcodec/Makefile b/libavcodec/Makefile
index 27ef4638ce..ff6a3c4efc 100644
--- a/libavcodec/Makefile
+++ b/libavcodec/Makefile
@@ -1282,7 +1282,7 @@  SKIPHEADERS-$(CONFIG_QSVENC)           += qsvenc.h
 SKIPHEADERS-$(CONFIG_VAAPI)            += vaapi_decode.h vaapi_hevc.h vaapi_encode.h
 SKIPHEADERS-$(CONFIG_VDPAU)            += vdpau.h vdpau_internal.h
 SKIPHEADERS-$(CONFIG_VIDEOTOOLBOX)     += videotoolbox.h vt_internal.h
-SKIPHEADERS-$(CONFIG_VULKAN)           += vulkan.h vulkan_video.h vulkan_decode.h
+SKIPHEADERS-$(CONFIG_VULKAN)           += vulkan.h vulkan_video.h vulkan_encode.h vulkan_decode.h
 SKIPHEADERS-$(CONFIG_V4L2_M2M)         += v4l2_buffers.h v4l2_context.h v4l2_m2m.h
 SKIPHEADERS-$(CONFIG_ZLIB)             += zlib_wrapper.h
 
diff --git a/libavcodec/vulkan_encode.c b/libavcodec/vulkan_encode.c
new file mode 100644
index 0000000000..5e87d4c073
--- /dev/null
+++ b/libavcodec/vulkan_encode.c
@@ -0,0 +1,979 @@ 
+/*
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include "libavutil/mem.h"
+#include "libavutil/avassert.h"
+#include "vulkan_encode.h"
+#include "config.h"
+
+#include "libavutil/vulkan_loader.h"
+
+const AVCodecHWConfigInternal *const ff_vulkan_encode_hw_configs[] = {
+    HW_CONFIG_ENCODER_FRAMES(VULKAN, VULKAN),
+    NULL,
+};
+
+av_cold void ff_vulkan_encode_uninit(FFVulkanEncodeContext *ctx)
+{
+    FFVulkanContext *s = &ctx->s;
+    FFVulkanFunctions *vk = &s->vkfn;
+
+    /* Wait on and free execution pool */
+    ff_vk_exec_pool_free(s, &ctx->enc_pool);
+
+    /* Destroy the session params */
+    if (ctx->session_params)
+        vk->DestroyVideoSessionParametersKHR(s->hwctx->act_dev,
+                                             ctx->session_params,
+                                             s->hwctx->alloc);
+
+    ff_hw_base_encode_close(&ctx->base);
+
+    av_buffer_pool_uninit(&ctx->buf_pool);
+
+    ff_vk_video_common_uninit(s, &ctx->common);
+
+    ff_vk_uninit(s);
+}
+
+static int vulkan_encode_init(AVCodecContext *avctx, FFHWBaseEncodePicture *pic)
+{
+    int err;
+    FFVulkanEncodeContext *ctx = avctx->priv_data;
+    FFVulkanEncodePicture *vp = pic->priv;
+
+    AVFrame *f = pic->input_image;
+    AVHWFramesContext *hwfc = (AVHWFramesContext *)f->hw_frames_ctx->data;
+    AVVulkanFramesContext *vkfc = hwfc->hwctx;
+    AVVkFrame *vkf = (AVVkFrame *)f->data[0];
+
+    if (ctx->codec->picture_priv_data_size > 0) {
+        pic->codec_priv = av_mallocz(ctx->codec->picture_priv_data_size);
+        if (!pic->codec_priv)
+            return AVERROR(ENOMEM);
+    }
+
+    /* Input image view */
+    err = ff_vk_create_view(&ctx->s, &ctx->common,
+                            &vp->in.view, &vp->in.aspect,
+                            vkf, vkfc->format[0], 0);
+    if (err < 0)
+        return err;
+
+    /* Reference view */
+    if (!ctx->common.layered_dpb) {
+        AVFrame *rf = pic->recon_image;
+        AVVkFrame *rvkf = (AVVkFrame *)rf->data[0];
+        err = ff_vk_create_view(&ctx->s, &ctx->common,
+                                &vp->dpb.view, &vp->dpb.aspect,
+                                rvkf, ctx->pic_format, 1);
+        if (err < 0)
+            return err;
+    } else {
+        vp->dpb.view = ctx->common.layered_view;
+        vp->dpb.aspect = ctx->common.layered_aspect;
+    }
+
+    return 0;
+}
+
+static int vulkan_encode_free(AVCodecContext *avctx, FFHWBaseEncodePicture *pic)
+{
+    FFVulkanEncodeContext *ctx = avctx->priv_data;
+    FFVulkanFunctions *vk = &ctx->s.vkfn;
+
+    FFVulkanEncodePicture *vp = pic->priv;
+
+    if (vp->in.view)
+        vk->DestroyImageView(ctx->s.hwctx->act_dev, vp->in.view,
+                             ctx->s.hwctx->alloc);
+
+    if (!ctx->common.layered_dpb && vp->dpb.view)
+        vk->DestroyImageView(ctx->s.hwctx->act_dev, vp->dpb.view,
+                             ctx->s.hwctx->alloc);
+
+    ctx->slots[vp->dpb_slot.slotIndex] = 0;
+
+    return 0;
+}
+
+static int init_pic_rc(AVCodecContext *avctx, FFHWBaseEncodePicture *pic,
+                       VkVideoEncodeRateControlInfoKHR *rc_info,
+                       VkVideoEncodeRateControlLayerInfoKHR *rc_layer /* Goes in ^ */)
+{
+    FFVulkanEncodeContext *ctx = avctx->priv_data;
+
+    *rc_info = (VkVideoEncodeRateControlInfoKHR) {
+        .sType = VK_STRUCTURE_TYPE_VIDEO_ENCODE_RATE_CONTROL_INFO_KHR,
+        .rateControlMode = ctx->opts.rc_mode,
+    };
+
+    if (ctx->opts.rc_mode > VK_VIDEO_ENCODE_RATE_CONTROL_MODE_DISABLED_BIT_KHR) {
+        *rc_layer = (VkVideoEncodeRateControlLayerInfoKHR) {
+            .sType = VK_STRUCTURE_TYPE_VIDEO_ENCODE_RATE_CONTROL_LAYER_INFO_KHR,
+            .averageBitrate = avctx->bit_rate,
+            .maxBitrate = avctx->rc_max_rate ? avctx->rc_max_rate : avctx->bit_rate,
+            .frameRateNumerator = avctx->framerate.num,
+            .frameRateDenominator = avctx->framerate.den,
+        };
+        rc_info->layerCount++;
+        rc_info->pLayers = rc_layer;
+    }
+
+    return ctx->codec->init_pic_rc(avctx, pic, rc_info, rc_layer);
+}
+
+static int vulkan_encode_issue(AVCodecContext *avctx,
+                               FFHWBaseEncodePicture *base_pic)
+{
+    FFVulkanEncodeContext *ctx = avctx->priv_data;
+    FFVulkanFunctions *vk = &ctx->s.vkfn;
+
+    const size_t size_align = ctx->caps.minBitstreamBufferSizeAlignment;
+
+    FFVulkanEncodePicture *vp = base_pic->priv;
+    AVFrame *src = (AVFrame *)base_pic->input_image;
+    AVVkFrame *vkf = (AVVkFrame *)src->data[0];
+
+    int err, max_pkt_size;
+
+    FFVkBuffer *sd_buf;
+
+    int slot_index = -1;
+    FFVkExecContext *exec;
+    VkCommandBuffer cmd_buf;
+    VkImageMemoryBarrier2 img_bar[37];
+    int nb_img_bar = 0;
+
+    /* Coding start/end */
+    VkVideoBeginCodingInfoKHR encode_start;
+    VkVideoEndCodingInfoKHR encode_end = {
+        .sType = VK_STRUCTURE_TYPE_VIDEO_END_CODING_INFO_KHR,
+    };
+
+    VkVideoEncodeRateControlLayerInfoKHR rc_layer;
+    VkVideoEncodeRateControlInfoKHR rc_info;
+    VkVideoEncodeQualityLevelInfoKHR q_info;
+    VkVideoCodingControlInfoKHR encode_ctrl;
+
+    VkVideoReferenceSlotInfoKHR ref_slot[37];
+    VkVideoEncodeInfoKHR encode_info;
+
+    /* Create packet data buffer */
+    max_pkt_size = FFALIGN(3 * ctx->base.surface_width * ctx->base.surface_height + (1 << 16),
+                           ctx->caps.minBitstreamBufferSizeAlignment);
+
+    err = ff_vk_get_pooled_buffer(&ctx->s, &ctx->buf_pool, &vp->pkt_buf,
+                                  VK_BUFFER_USAGE_VIDEO_ENCODE_DST_BIT_KHR,
+                                  &ctx->profile_list, max_pkt_size,
+                                  VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT |
+                                  VK_MEMORY_PROPERTY_HOST_CACHED_BIT);
+    if (err < 0)
+        return err;
+
+    sd_buf = (FFVkBuffer *)vp->pkt_buf->data;
+
+    /* Setup rate control */
+    err = init_pic_rc(avctx, base_pic, &rc_info, &rc_layer);
+    if (err < 0)
+        return err;
+
+    q_info = (VkVideoEncodeQualityLevelInfoKHR) {
+        .sType = VK_STRUCTURE_TYPE_VIDEO_ENCODE_QUALITY_LEVEL_INFO_KHR,
+        .pNext = &rc_info,
+        .qualityLevel = ctx->opts.quality,
+    };
+    encode_ctrl = (VkVideoCodingControlInfoKHR) {
+        .sType = VK_STRUCTURE_TYPE_VIDEO_CODING_CONTROL_INFO_KHR,
+        .pNext = &q_info,
+        .flags = VK_VIDEO_CODING_CONTROL_ENCODE_QUALITY_LEVEL_BIT_KHR |
+                 VK_VIDEO_CODING_CONTROL_ENCODE_RATE_CONTROL_BIT_KHR |
+                 (base_pic->force_idr ? VK_VIDEO_CODING_CONTROL_RESET_BIT_KHR : 0),
+    };
+
+    /* Current picture's ref slot */
+    vp->dpb_res = (VkVideoPictureResourceInfoKHR) {
+        .sType = VK_STRUCTURE_TYPE_VIDEO_PICTURE_RESOURCE_INFO_KHR,
+        .pNext = NULL,
+        .codedOffset = { 0 },
+        .codedExtent = (VkExtent2D){ ctx->base.surface_width,
+                                     ctx->base.surface_height },
+        .baseArrayLayer = 0,
+        .imageViewBinding = vp->dpb.view,
+    };
+
+    for (int i = 0; i < ctx->caps.maxDpbSlots; i++) {
+        if (!ctx->slots[i]) {
+            slot_index = i;
+            ctx->slots[i] = 1;
+            break;
+        }
+    }
+    av_assert0(slot_index >= 0);
+
+    vp->dpb_slot = (VkVideoReferenceSlotInfoKHR) {
+        .sType = VK_STRUCTURE_TYPE_VIDEO_REFERENCE_SLOT_INFO_KHR,
+        .pNext = NULL,  // Set later
+        .slotIndex = slot_index,
+        .pPictureResource = &vp->dpb_res,
+    };
+
+    encode_info = (VkVideoEncodeInfoKHR) {
+        .sType = VK_STRUCTURE_TYPE_VIDEO_ENCODE_INFO_KHR,
+        .pNext = NULL, // Set later
+        .flags = 0x0,
+        .srcPictureResource = (VkVideoPictureResourceInfoKHR) {
+            .sType = VK_STRUCTURE_TYPE_VIDEO_PICTURE_RESOURCE_INFO_KHR,
+            .pNext = NULL,
+            .codedOffset = { 0, 0 },
+            .codedExtent = (VkExtent2D){ base_pic->input_image->width,
+                                         base_pic->input_image->height },
+            .baseArrayLayer = 0,
+            .imageViewBinding = vp->in.view,
+        },
+        .pSetupReferenceSlot = &vp->dpb_slot,
+        .referenceSlotCount = 0,
+        .pReferenceSlots = ref_slot,
+        .dstBuffer = sd_buf->buf,
+        .dstBufferOffset = 0,
+        .dstBufferRange = sd_buf->size,
+        .precedingExternallyEncodedBytes = 0,
+    };
+
+    for (int i = 0; i < MAX_REFERENCE_LIST_NUM; i++) {
+        for (int j = 0; j < base_pic->nb_refs[i]; j++) {
+            FFHWBaseEncodePicture *ref = base_pic->refs[i][j];
+            FFVulkanEncodePicture *rvp = ref->priv;
+            ref_slot[encode_info.referenceSlotCount++] = rvp->dpb_slot;
+        }
+    }
+
+    /* Setup picture parameters */
+    err = ctx->codec->init_pic_params(avctx, base_pic,
+                                      &encode_info);
+    if (err < 0)
+        return err;
+
+    encode_start = (VkVideoBeginCodingInfoKHR) {
+        .sType = VK_STRUCTURE_TYPE_VIDEO_BEGIN_CODING_INFO_KHR,
+        .pNext = !base_pic->force_idr ? &rc_info : NULL,
+        .videoSession = ctx->common.session,
+        .videoSessionParameters = ctx->session_params,
+        .referenceSlotCount = encode_info.referenceSlotCount,
+        .pReferenceSlots = ref_slot,
+    };
+
+    /* Calling vkCmdBeginVideoCodingKHR requires to declare all references
+     * being enabled upfront, including the current frame's output ref.
+     * If layered DBPs are used, make sure its not included twice. */
+    if (!ctx->common.layered_dpb || !encode_info.referenceSlotCount) {
+        ref_slot[encode_info.referenceSlotCount] = vp->dpb_slot;
+        ref_slot[encode_info.referenceSlotCount].slotIndex = -1;
+        encode_start.referenceSlotCount++;
+    }
+
+    /* Write header */
+    if (base_pic->type == FF_HW_PICTURE_TYPE_IDR) {
+        uint8_t *hdr_dst = sd_buf->mapped_mem + encode_info.dstBufferOffset;
+        size_t data_size = encode_info.dstBufferRange;
+        err = ctx->codec->write_sequence_headers(avctx, base_pic, hdr_dst, &data_size);
+        if (err < 0)
+            goto fail;
+        encode_info.dstBufferOffset += data_size;
+        encode_info.dstBufferRange  -= data_size;
+    }
+
+    /* Write extra units */
+    if (ctx->codec->write_extra_headers) {
+        uint8_t *hdr_dst = sd_buf->mapped_mem + encode_info.dstBufferOffset;
+        size_t data_size = encode_info.dstBufferRange;
+        err = ctx->codec->write_extra_headers(avctx, base_pic, hdr_dst, &data_size);
+        if (err < 0)
+            goto fail;
+        encode_info.dstBufferOffset += data_size;
+        encode_info.dstBufferRange  -= data_size;
+    }
+
+    /* Align buffer offset to the required value with filler units */
+    if (ctx->codec->write_filler) {
+        uint8_t *hdr_dst = sd_buf->mapped_mem + encode_info.dstBufferOffset;
+        size_t data_size = encode_info.dstBufferRange;
+
+        uint32_t offset = encode_info.dstBufferOffset;
+        size_t offset_align = ctx->caps.minBitstreamBufferOffsetAlignment;
+
+        uint32_t filler_data = FFALIGN(offset, offset_align) - offset;
+
+        if (filler_data) {
+            while (filler_data < ctx->codec->filler_header_size)
+                filler_data += offset_align;
+
+            filler_data -= ctx->codec->filler_header_size;
+
+            err = ctx->codec->write_filler(avctx, filler_data,
+                                           hdr_dst, &data_size);
+            if (err < 0)
+                goto fail;
+
+            encode_info.dstBufferOffset += data_size;
+            encode_info.dstBufferRange  -= data_size;
+        }
+    }
+
+    vp->slices_offset = encode_info.dstBufferOffset;
+
+    /* Align buffer size to the nearest lower alignment requirement. */
+    encode_info.dstBufferRange -= size_align;
+    encode_info.dstBufferRange = FFALIGN(encode_info.dstBufferRange,
+                                         size_align);
+
+    /* Start command buffer recording */
+    exec = vp->exec = ff_vk_exec_get(&ctx->enc_pool);
+    ff_vk_exec_start(&ctx->s, exec);
+    cmd_buf = exec->buf;
+
+    /* Output packet buffer */
+    err = ff_vk_exec_add_dep_buf(&ctx->s, exec, &vp->pkt_buf, 1, 1);
+    if (err < 0)
+        goto fail;
+
+    /* Source image */
+    err = ff_vk_exec_add_dep_frame(&ctx->s, exec, src,
+                                   VK_PIPELINE_STAGE_2_ALL_COMMANDS_BIT,
+                                   VK_PIPELINE_STAGE_2_VIDEO_ENCODE_BIT_KHR);
+    if (err < 0)
+        goto fail;
+
+    /* Source image layout conversion */
+    img_bar[nb_img_bar] = (VkImageMemoryBarrier2) {
+        .sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER_2,
+        .pNext = NULL,
+        .srcStageMask = VK_PIPELINE_STAGE_2_ALL_COMMANDS_BIT,
+        .srcAccessMask = vkf->access[0],
+        .dstStageMask = VK_PIPELINE_STAGE_2_VIDEO_ENCODE_BIT_KHR,
+        .dstAccessMask = VK_ACCESS_2_VIDEO_ENCODE_READ_BIT_KHR,
+        .oldLayout = vkf->layout[0],
+        .newLayout = VK_IMAGE_LAYOUT_VIDEO_ENCODE_SRC_KHR,
+        .srcQueueFamilyIndex = vkf->queue_family[0],
+        .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
+        .image = vkf->img[0],
+        .subresourceRange = (VkImageSubresourceRange) {
+            .aspectMask = vp->in.aspect,
+            .layerCount = 1,
+            .levelCount = 1,
+        },
+    };
+    ff_vk_exec_update_frame(&ctx->s, exec, src,
+                            &img_bar[nb_img_bar], &nb_img_bar);
+
+    if (!ctx->common.layered_dpb) {
+        /* Source image's ref slot.
+         * No need to do a layout conversion, since the frames which are allocated
+         * with a DPB usage are automatically converted. */
+        err = ff_vk_exec_add_dep_frame(&ctx->s, exec, base_pic->recon_image,
+                                       VK_PIPELINE_STAGE_2_ALL_COMMANDS_BIT,
+                                       VK_PIPELINE_STAGE_2_VIDEO_ENCODE_BIT_KHR);
+        if (err < 0)
+            return err;
+
+        /* All references */
+        for (int i = 0; i < MAX_REFERENCE_LIST_NUM; i++) {
+            for (int j = 0; j < base_pic->nb_refs[i]; j++) {
+                FFHWBaseEncodePicture *ref = base_pic->refs[i][j];
+                err = ff_vk_exec_add_dep_frame(&ctx->s, exec, ref->recon_image,
+                                               VK_PIPELINE_STAGE_2_ALL_COMMANDS_BIT,
+                                               VK_PIPELINE_STAGE_2_VIDEO_ENCODE_BIT_KHR);
+                if (err < 0)
+                    return err;
+            }
+        }
+    } else {
+        err = ff_vk_exec_add_dep_frame(&ctx->s, exec, ctx->common.layered_frame,
+                                       VK_PIPELINE_STAGE_2_VIDEO_ENCODE_BIT_KHR,
+                                       VK_PIPELINE_STAGE_2_VIDEO_ENCODE_BIT_KHR);
+        if (err < 0)
+            return err;
+    }
+
+    /* Change image layout */
+    vk->CmdPipelineBarrier2(cmd_buf, &(VkDependencyInfo) {
+            .sType = VK_STRUCTURE_TYPE_DEPENDENCY_INFO,
+            .pImageMemoryBarriers = img_bar,
+            .imageMemoryBarrierCount = nb_img_bar,
+        });
+
+    /* Start, use parameters */
+    vk->CmdBeginVideoCodingKHR(cmd_buf, &encode_start);
+
+    /* Send control data */
+    if (base_pic->force_idr)
+        vk->CmdControlVideoCodingKHR(cmd_buf, &encode_ctrl);
+
+    /* Encode */
+    vk->CmdBeginQuery(cmd_buf, ctx->enc_pool.query_pool, exec->query_idx + 0, 0);
+    vk->CmdEncodeVideoKHR(cmd_buf, &encode_info);
+    vk->CmdEndQuery(cmd_buf, ctx->enc_pool.query_pool, exec->query_idx + 0);
+
+    /* End encoding */
+    vk->CmdEndVideoCodingKHR(cmd_buf, &encode_end);
+
+    /* End recording and submit for execution */
+    err = ff_vk_exec_submit(&ctx->s, vp->exec);
+    if (err < 0)
+        goto fail;
+
+    /* We don't need to keep the input image any longer, its already ref'd */
+    av_frame_free(&base_pic->input_image);
+
+    return 0;
+
+fail:
+    return err;
+}
+
+static void vulkan_encode_wait(AVCodecContext *avctx,
+                               FFHWBaseEncodePicture *base_pic)
+{
+    FFVulkanEncodeContext *ctx = avctx->priv_data;
+    FFVulkanEncodePicture *vp = base_pic->priv;
+
+    av_assert0(base_pic->encode_issued);
+
+    if (base_pic->encode_complete)
+        return;
+
+    ff_vk_exec_wait(&ctx->s, vp->exec);
+    base_pic->encode_complete = 1;
+}
+
+static int vulkan_encode_output(AVCodecContext *avctx,
+                                FFHWBaseEncodePicture *base_pic, AVPacket *pkt)
+{
+    VkResult ret;
+    FFVulkanEncodePicture *vp = base_pic->priv;
+    FFVulkanEncodeContext *ctx = avctx->priv_data;
+    FFVkBuffer *sd_buf = (FFVkBuffer *)vp->pkt_buf->data;
+    uint32_t *query_data;
+
+    vulkan_encode_wait(avctx, base_pic);
+
+    ret = ff_vk_exec_get_query(&ctx->s, vp->exec, (void **)&query_data, 0);
+    if (ret == VK_NOT_READY) {
+        av_log(avctx, AV_LOG_ERROR, "Unable to perform query: %s!\n",
+               ff_vk_ret2str(ret));
+        return AVERROR(EINVAL);
+    }
+
+    if (ret != VK_NOT_READY && ret != VK_SUCCESS) {
+        av_log(avctx, AV_LOG_ERROR, "Unable to perform query: %s!\n",
+               ff_vk_ret2str(ret));
+        return AVERROR_EXTERNAL;
+    }
+
+    if (query_data[2] != VK_QUERY_RESULT_STATUS_COMPLETE_KHR) {
+        av_log(avctx, AV_LOG_ERROR, "Unable to encode: %u\n", query_data[2]);
+        return AVERROR_EXTERNAL;
+    }
+
+    /* Invalidate buffer if needed */
+    if (!(sd_buf->flags & VK_MEMORY_PROPERTY_HOST_COHERENT_BIT)) {
+        FFVulkanFunctions *vk = &ctx->s.vkfn;
+        VkMappedMemoryRange invalidate_buf;
+
+        int offs = vp->slices_offset;
+        /* If the non-coherent alignment is greater than the bitstream buffer
+         * offset's alignment, and the offs value is not aligned already,
+         * align it to the previous alignment point. */
+        if (ctx->s.props.properties.limits.nonCoherentAtomSize >
+            ctx->caps.minBitstreamBufferOffsetAlignment && offs &&
+            (FFALIGN(offs, ctx->s.props.properties.limits.nonCoherentAtomSize) != offs)) {
+            offs -= ctx->s.props.properties.limits.nonCoherentAtomSize;
+            offs = FFALIGN(FFMAX(offs, 0), ctx->s.props.properties.limits.nonCoherentAtomSize);
+        }
+
+        invalidate_buf = (VkMappedMemoryRange) {
+            .sType = VK_STRUCTURE_TYPE_MAPPED_MEMORY_RANGE,
+            .memory = sd_buf->mem,
+            .offset = offs,
+            .size = VK_WHOLE_SIZE,
+        };
+
+        vk->FlushMappedMemoryRanges(ctx->s.hwctx->act_dev, 1, &invalidate_buf);
+    }
+
+    pkt->data = sd_buf->mapped_mem;
+    pkt->size = vp->slices_offset + /* base offset */
+                query_data[0]       /* secondary offset */ +
+                query_data[1]       /* size */;
+
+    /* Move reference */
+    pkt->buf = vp->pkt_buf;
+    vp->pkt_buf = NULL;
+
+    av_log(avctx, AV_LOG_DEBUG, "Frame %"PRId64"/%"PRId64 " encoded\n",
+           base_pic->display_order, base_pic->encode_order);
+
+    ff_hw_base_encode_set_output_property(&ctx->base, avctx,
+                                          base_pic, pkt,
+                                          ctx->codec->flags & VK_ENC_FLAG_NO_DELAY);
+
+    return 0;
+}
+
+static const FFHWEncodePictureOperation vulkan_base_encode_ops = {
+    .priv_size = sizeof(FFVulkanEncodePicture),
+    .init   = &vulkan_encode_init,
+    .issue  = &vulkan_encode_issue,
+    .output = &vulkan_encode_output,
+    .free   = &vulkan_encode_free,
+};
+
+int ff_vulkan_encode_receive_packet(AVCodecContext *avctx, AVPacket *pkt)
+{
+    FFVulkanEncodeContext *ctx = avctx->priv_data;
+    return ff_hw_base_encode_receive_packet(&ctx->base, avctx, pkt);
+}
+
+static int vulkan_encode_create_dpb(AVCodecContext *avctx, FFVulkanEncodeContext *ctx)
+{
+    int err;
+    FFHWBaseEncodeContext *base_ctx = &ctx->base;
+    AVVulkanFramesContext *hwfc;
+
+    enum AVPixelFormat dpb_format;
+    err = ff_hw_base_get_recon_format(base_ctx, NULL, &dpb_format);
+    if (err < 0)
+        return err;
+
+    base_ctx->recon_frames_ref = av_hwframe_ctx_alloc(base_ctx->device_ref);
+    if (!base_ctx->recon_frames_ref)
+        return AVERROR(ENOMEM);
+
+    base_ctx->recon_frames = (AVHWFramesContext *)base_ctx->recon_frames_ref->data;
+    hwfc = (AVVulkanFramesContext *)base_ctx->recon_frames->hwctx;
+
+    base_ctx->recon_frames->format    = AV_PIX_FMT_VULKAN;
+    base_ctx->recon_frames->sw_format = dpb_format;
+    base_ctx->recon_frames->width     = base_ctx->surface_width;
+    base_ctx->recon_frames->height    = base_ctx->surface_height;
+
+    hwfc->format[0]    = ctx->pic_format;
+    hwfc->create_pnext = &ctx->profile_list;
+    hwfc->tiling       = VK_IMAGE_TILING_OPTIMAL;
+    hwfc->usage        = VK_IMAGE_USAGE_SAMPLED_BIT              |
+                         VK_IMAGE_USAGE_VIDEO_ENCODE_DPB_BIT_KHR;
+
+    if (ctx->common.layered_dpb)
+        hwfc->nb_layers = ctx->caps.maxDpbSlots;
+
+    err = av_hwframe_ctx_init(base_ctx->recon_frames_ref);
+    if (err < 0) {
+        av_log(avctx, AV_LOG_ERROR, "Failed to initialise DPB frame context: %s\n",
+               av_err2str(err));
+        return err;
+    }
+
+    if (ctx->common.layered_dpb) {
+        ctx->common.layered_frame = av_frame_alloc();
+        if (!ctx->common.layered_frame)
+            return AVERROR(ENOMEM);
+
+        err = av_hwframe_get_buffer(base_ctx->recon_frames_ref,
+                                    ctx->common.layered_frame, 0);
+        if (err < 0)
+            return AVERROR(ENOMEM);
+
+        err = ff_vk_create_view(&ctx->s, &ctx->common,
+                                &ctx->common.layered_view,
+                                &ctx->common.layered_aspect,
+                                (AVVkFrame *)ctx->common.layered_frame->data[0],
+                                hwfc->format[0], 1);
+        if (err < 0)
+            return err;
+
+        av_buffer_unref(&base_ctx->recon_frames_ref);
+    }
+
+    return 0;
+}
+
+static av_cold int init_rc(AVCodecContext *avctx, FFVulkanEncodeContext *ctx)
+{
+    if (ctx->opts.rc_mode == FF_VK_RC_MODE_AUTO) {
+        if (ctx->opts.qp >= 0) {
+            ctx->opts.rc_mode = VK_VIDEO_ENCODE_RATE_CONTROL_MODE_DISABLED_BIT_KHR;
+        } else if (avctx->global_quality > 0) {
+            if (avctx->flags & AV_CODEC_FLAG_QSCALE)
+                ctx->opts.qp = avctx->global_quality / FF_QP2LAMBDA;
+            else
+                ctx->opts.qp = avctx->global_quality;
+            ctx->opts.rc_mode = VK_VIDEO_ENCODE_RATE_CONTROL_MODE_DISABLED_BIT_KHR;
+        } else if (avctx->bit_rate) {
+            if (ctx->enc_caps.rateControlModes & VK_VIDEO_ENCODE_RATE_CONTROL_MODE_VBR_BIT_KHR)
+                ctx->opts.rc_mode = VK_VIDEO_ENCODE_RATE_CONTROL_MODE_VBR_BIT_KHR;
+            else if (ctx->enc_caps.rateControlModes & VK_VIDEO_ENCODE_RATE_CONTROL_MODE_CBR_BIT_KHR)
+                ctx->opts.rc_mode = VK_VIDEO_ENCODE_RATE_CONTROL_MODE_CBR_BIT_KHR;
+            else
+                ctx->opts.rc_mode = VK_VIDEO_ENCODE_RATE_CONTROL_MODE_DEFAULT_KHR;
+        } else {
+            ctx->opts.qp = 18;
+            ctx->opts.rc_mode = VK_VIDEO_ENCODE_RATE_CONTROL_MODE_DISABLED_BIT_KHR;
+            av_log(avctx, AV_LOG_WARNING, "No rate control settings specified, using fixed QP = %i\n",
+                   ctx->opts.qp);
+        }
+    }
+
+    if (ctx->opts.rc_mode && !(ctx->enc_caps.rateControlModes & ctx->opts.rc_mode)) {
+        static const char *rc_modes[] = {
+            [VK_VIDEO_ENCODE_RATE_CONTROL_MODE_DEFAULT_KHR] = "default",
+            [VK_VIDEO_ENCODE_RATE_CONTROL_MODE_DISABLED_BIT_KHR] = "cqp",
+            [VK_VIDEO_ENCODE_RATE_CONTROL_MODE_CBR_BIT_KHR] = "cbr",
+            [VK_VIDEO_ENCODE_RATE_CONTROL_MODE_VBR_BIT_KHR] = "vbr",
+        };
+        av_log(avctx, AV_LOG_ERROR, "Unsupported rate control mode %i, supported are:\n",
+               ctx->opts.rc_mode);
+        for (int i = 0; i < av_popcount(ctx->enc_caps.rateControlModes); i++) {
+            if (!(ctx->enc_caps.rateControlModes & (1 << i)))
+                continue;
+            av_log(avctx, AV_LOG_ERROR, "    %i: %s\n", i, rc_modes[i]);
+        }
+        return AVERROR(ENOTSUP);
+    }
+
+    return 0;
+}
+
+av_cold int ff_vulkan_encode_init(AVCodecContext *avctx, FFVulkanEncodeContext *ctx,
+                                  const FFVulkanEncodeDescriptor *vk_desc,
+                                  const FFVulkanCodec *codec,
+                                  void *codec_caps, void *quality_pnext)
+{
+    int i, err;
+    VkResult ret;
+    FFVulkanFunctions *vk = &ctx->s.vkfn;
+    FFVulkanContext *s = &ctx->s;
+    FFHWBaseEncodeContext *base_ctx = &ctx->base;
+
+    const AVPixFmtDescriptor *desc;
+
+    VkVideoFormatPropertiesKHR *ret_info;
+    uint32_t nb_out_fmts = 0;
+
+    VkPhysicalDeviceVideoEncodeQualityLevelInfoKHR quality_info;
+
+    VkQueryPoolVideoEncodeFeedbackCreateInfoKHR query_create;
+
+    VkVideoSessionCreateInfoKHR session_create = {
+        .sType = VK_STRUCTURE_TYPE_VIDEO_SESSION_CREATE_INFO_KHR,
+    };
+    VkPhysicalDeviceVideoFormatInfoKHR fmt_info = {
+        .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VIDEO_FORMAT_INFO_KHR,
+        .pNext = &ctx->profile_list,
+    };
+
+    if (!avctx->hw_frames_ctx) {
+        av_log(avctx, AV_LOG_ERROR, "A hardware frames reference is "
+               "required to associate the encoding device.\n");
+        return AVERROR(EINVAL);
+    }
+
+    ctx->base.op = &vulkan_base_encode_ops;
+    ctx->codec = codec;
+
+    s->frames_ref = av_buffer_ref(avctx->hw_frames_ctx);
+    s->frames = (AVHWFramesContext *)s->frames_ref->data;
+    s->hwfc = s->frames->hwctx;
+
+    s->device = (AVHWDeviceContext *)s->frames->device_ref->data;
+    s->hwctx = s->device->hwctx;
+
+    desc = av_pix_fmt_desc_get(avctx->sw_pix_fmt);
+    if (!desc)
+        return AVERROR(EINVAL);
+
+    s->extensions = ff_vk_extensions_to_mask(s->hwctx->enabled_dev_extensions,
+                                             s->hwctx->nb_enabled_dev_extensions);
+
+    if (!(s->extensions & FF_VK_EXT_VIDEO_ENCODE_QUEUE)) {
+        av_log(avctx, AV_LOG_ERROR, "Device does not support the %s extension!\n",
+               VK_KHR_VIDEO_ENCODE_QUEUE_EXTENSION_NAME);
+        return AVERROR(ENOSYS);
+    } else if (!(s->extensions & FF_VK_EXT_VIDEO_MAINTENANCE_1)) {
+        av_log(avctx, AV_LOG_ERROR, "Device does not support the %s extension!\n",
+               VK_KHR_VIDEO_MAINTENANCE_1_EXTENSION_NAME);
+        return AVERROR(ENOSYS);
+    } else if (!(s->extensions & vk_desc->encode_extension)) {
+        av_log(avctx, AV_LOG_ERROR, "Device does not support decoding %s!\n",
+               avcodec_get_name(avctx->codec_id));
+        return AVERROR(ENOSYS);
+    }
+
+    /* Load functions */
+    err = ff_vk_load_functions(s->device, vk, s->extensions, 1, 1);
+    if (err < 0)
+        return err;
+
+    /* Create queue context */
+    err = ff_vk_video_qf_init(s, &ctx->qf_enc,
+                              VK_QUEUE_VIDEO_ENCODE_BIT_KHR,
+                              vk_desc->encode_op);
+    if (err < 0) {
+        av_log(avctx, AV_LOG_ERROR, "Decoding of %s is not supported by this device\n",
+               avcodec_get_name(avctx->codec_id));
+        return err;
+    }
+
+    /* Load all properties */
+    err = ff_vk_load_props(s);
+    if (err < 0)
+        return err;
+
+    /* Set tuning */
+    ctx->usage_info = (VkVideoEncodeUsageInfoKHR) {
+        .sType             = VK_STRUCTURE_TYPE_VIDEO_ENCODE_USAGE_INFO_KHR,
+        .videoUsageHints   = ctx->opts.usage,
+        .videoContentHints = ctx->opts.content,
+        .tuningMode        = ctx->opts.tune,
+    };
+
+    /* Load up the profile now, needed for caps and to create a query pool */
+    ctx->profile.sType               = VK_STRUCTURE_TYPE_VIDEO_PROFILE_INFO_KHR;
+    ctx->profile.pNext               = &ctx->usage_info;
+    ctx->profile.videoCodecOperation = vk_desc->encode_op;
+    ctx->profile.chromaSubsampling   = ff_vk_subsampling_from_av_desc(desc);
+    ctx->profile.lumaBitDepth        = ff_vk_depth_from_av_depth(desc->comp[0].depth);
+    ctx->profile.chromaBitDepth      = ctx->profile.lumaBitDepth;
+
+    /* Setup a profile */
+    err = codec->init_profile(avctx, &ctx->profile, &ctx->usage_info);
+    if (err < 0)
+        return err;
+
+    ctx->profile_list.sType        = VK_STRUCTURE_TYPE_VIDEO_PROFILE_LIST_INFO_KHR;
+    ctx->profile_list.profileCount = 1;
+    ctx->profile_list.pProfiles    = &ctx->profile;
+
+    /* Get the capabilities of the encoder for the given profile */
+    ctx->enc_caps.sType = VK_STRUCTURE_TYPE_VIDEO_ENCODE_CAPABILITIES_KHR;
+    ctx->enc_caps.pNext = codec_caps;
+    ctx->caps.sType = VK_STRUCTURE_TYPE_VIDEO_CAPABILITIES_KHR;
+    ctx->caps.pNext = &ctx->enc_caps;
+
+    ret = vk->GetPhysicalDeviceVideoCapabilitiesKHR(s->hwctx->phys_dev,
+                                                    &ctx->profile,
+                                                    &ctx->caps);
+    if (ret == VK_ERROR_VIDEO_PROFILE_OPERATION_NOT_SUPPORTED_KHR) {
+        av_log(avctx, AV_LOG_ERROR, "Unable to initialize encoding: "
+               "%s profile \"%s\" not supported!\n",
+               avcodec_get_name(avctx->codec_id),
+               avcodec_profile_name(avctx->codec_id, avctx->profile));
+        return AVERROR(EINVAL);
+    } else if (ret == VK_ERROR_VIDEO_PROFILE_FORMAT_NOT_SUPPORTED_KHR) {
+        av_log(avctx, AV_LOG_ERROR, "Unable to initialize encoding: "
+               "format (%s) not supported!\n",
+               av_get_pix_fmt_name(avctx->sw_pix_fmt));
+        return AVERROR(EINVAL);
+    } else if (ret == VK_ERROR_FEATURE_NOT_PRESENT ||
+               ret == VK_ERROR_FORMAT_NOT_SUPPORTED) {
+        return AVERROR(EINVAL);
+    } else if (ret != VK_SUCCESS) {
+        return AVERROR_EXTERNAL;
+    }
+
+    err = init_rc(avctx, ctx);
+    if (err < 0)
+        return err;
+
+    /* Create command and query pool */
+    query_create = (VkQueryPoolVideoEncodeFeedbackCreateInfoKHR) {
+        .sType = VK_STRUCTURE_TYPE_QUERY_POOL_VIDEO_ENCODE_FEEDBACK_CREATE_INFO_KHR,
+        .pNext = &ctx->profile,
+        .encodeFeedbackFlags = ctx->enc_caps.supportedEncodeFeedbackFlags,
+    };
+    err = ff_vk_exec_pool_init(s, &ctx->qf_enc, &ctx->enc_pool, base_ctx->async_depth,
+                               1, VK_QUERY_TYPE_VIDEO_ENCODE_FEEDBACK_KHR, 0,
+                               &query_create);
+    if (err < 0)
+        return err;
+
+    if (ctx->opts.quality > ctx->enc_caps.maxQualityLevels) {
+        av_log(avctx, AV_LOG_ERROR, "Invalid quality level %i: allowed range is "
+                                    "0 to %i\n",
+               ctx->opts.quality, ctx->enc_caps.maxQualityLevels);
+        return AVERROR(EINVAL);
+    }
+
+    /* Get quality properties for the profile and quality level */
+    quality_info = (VkPhysicalDeviceVideoEncodeQualityLevelInfoKHR) {
+        .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VIDEO_ENCODE_QUALITY_LEVEL_INFO_KHR,
+        .pVideoProfile = &ctx->profile,
+        .qualityLevel = ctx->opts.quality,
+    };
+    ctx->quality_props = (VkVideoEncodeQualityLevelPropertiesKHR) {
+        .sType = VK_STRUCTURE_TYPE_VIDEO_ENCODE_QUALITY_LEVEL_PROPERTIES_KHR,
+        .pNext = quality_pnext,
+    };
+    ret = vk->GetPhysicalDeviceVideoEncodeQualityLevelPropertiesKHR(s->hwctx->phys_dev,
+                                                                    &quality_info,
+                                                                    &ctx->quality_props);
+    if (ret != VK_SUCCESS)
+        return AVERROR_EXTERNAL;
+
+    /* Printout informative properties */
+    av_log(avctx, AV_LOG_VERBOSE, "Encoder capabilities for %s profile \"%s\":\n",
+           avcodec_get_name(avctx->codec_id),
+           avcodec_profile_name(avctx->codec_id, avctx->profile));
+    av_log(avctx, AV_LOG_VERBOSE, "    Width: from %i to %i\n",
+           ctx->caps.minCodedExtent.width, ctx->caps.maxCodedExtent.width);
+    av_log(avctx, AV_LOG_VERBOSE, "    Height: from %i to %i\n",
+           ctx->caps.minCodedExtent.height, ctx->caps.maxCodedExtent.height);
+    av_log(avctx, AV_LOG_VERBOSE, "    Width alignment: %i\n",
+           ctx->caps.pictureAccessGranularity.width);
+    av_log(avctx, AV_LOG_VERBOSE, "    Height alignment: %i\n",
+           ctx->caps.pictureAccessGranularity.height);
+    av_log(avctx, AV_LOG_VERBOSE, "    Bitstream offset alignment: %"PRIu64"\n",
+           ctx->caps.minBitstreamBufferOffsetAlignment);
+    av_log(avctx, AV_LOG_VERBOSE, "    Bitstream size alignment: %"PRIu64"\n",
+           ctx->caps.minBitstreamBufferSizeAlignment);
+    av_log(avctx, AV_LOG_VERBOSE, "    Maximum references: %u\n",
+           ctx->caps.maxDpbSlots);
+    av_log(avctx, AV_LOG_VERBOSE, "    Maximum active references: %u\n",
+           ctx->caps.maxActiveReferencePictures);
+    av_log(avctx, AV_LOG_VERBOSE, "    Codec header version: %i.%i.%i (driver), %i.%i.%i (compiled)\n",
+           CODEC_VER(ctx->caps.stdHeaderVersion.specVersion),
+           CODEC_VER(vk_desc->ext_props.specVersion));
+    av_log(avctx, AV_LOG_VERBOSE, "    Encoder max quality: %i\n",
+           ctx->enc_caps.maxQualityLevels);
+    av_log(avctx, AV_LOG_VERBOSE, "    Encoder image width alignment: %i\n",
+           ctx->enc_caps.encodeInputPictureGranularity.width);
+    av_log(avctx, AV_LOG_VERBOSE, "    Encoder image height alignment: %i\n",
+           ctx->enc_caps.encodeInputPictureGranularity.height);
+    av_log(avctx, AV_LOG_VERBOSE, "    Capability flags:%s%s%s\n",
+           ctx->caps.flags ? "" :
+               " none",
+           ctx->caps.flags & VK_VIDEO_CAPABILITY_PROTECTED_CONTENT_BIT_KHR ?
+               " protected" : "",
+           ctx->caps.flags & VK_VIDEO_CAPABILITY_SEPARATE_REFERENCE_IMAGES_BIT_KHR ?
+               " separate_references" : "");
+
+    /* Setup width/height alignment */
+    base_ctx->surface_width = avctx->coded_width =
+        FFALIGN(avctx->width, ctx->enc_caps.encodeInputPictureGranularity.width);
+    base_ctx->surface_height = avctx->coded_height =
+        FFALIGN(avctx->height, ctx->enc_caps.encodeInputPictureGranularity.height);
+
+    /* Check if encoding is possible with the given parameters */
+    if (avctx->coded_width  < ctx->caps.minCodedExtent.width   ||
+        avctx->coded_height < ctx->caps.minCodedExtent.height  ||
+        avctx->coded_width  > ctx->caps.maxCodedExtent.width   ||
+        avctx->coded_height > ctx->caps.maxCodedExtent.height) {
+        av_log(avctx, AV_LOG_ERROR, "Input of %ix%i too large for encoder limits: %ix%i max\n",
+               avctx->coded_width, avctx->coded_height,
+               ctx->caps.minCodedExtent.width, ctx->caps.minCodedExtent.height);
+        return AVERROR(EINVAL);
+    }
+
+    fmt_info.imageUsage = VK_IMAGE_USAGE_VIDEO_ENCODE_DPB_BIT_KHR |
+                          VK_IMAGE_USAGE_VIDEO_ENCODE_DST_BIT_KHR;
+
+    ctx->common.layered_dpb = !(ctx->caps.flags & VK_VIDEO_CAPABILITY_SEPARATE_REFERENCE_IMAGES_BIT_KHR);
+
+    /* Get the supported image formats */
+    ret = vk->GetPhysicalDeviceVideoFormatPropertiesKHR(s->hwctx->phys_dev,
+                                                        &fmt_info,
+                                                        &nb_out_fmts, NULL);
+    if (ret == VK_ERROR_FORMAT_NOT_SUPPORTED ||
+        (!nb_out_fmts && ret == VK_SUCCESS)) {
+        return AVERROR(EINVAL);
+    } else if (ret != VK_SUCCESS) {
+        av_log(avctx, AV_LOG_ERROR, "Unable to get Vulkan format properties: %s!\n",
+               ff_vk_ret2str(ret));
+        return AVERROR_EXTERNAL;
+    }
+
+    ret_info = av_mallocz(sizeof(*ret_info)*nb_out_fmts);
+    if (!ret_info)
+        return AVERROR(ENOMEM);
+
+    for (int i = 0; i < nb_out_fmts; i++)
+        ret_info[i].sType = VK_STRUCTURE_TYPE_VIDEO_FORMAT_PROPERTIES_KHR;
+
+    ret = vk->GetPhysicalDeviceVideoFormatPropertiesKHR(s->hwctx->phys_dev,
+                                                        &fmt_info,
+                                                        &nb_out_fmts, ret_info);
+    if (ret == VK_ERROR_FORMAT_NOT_SUPPORTED ||
+        (!nb_out_fmts && ret == VK_SUCCESS)) {
+        av_free(ret_info);
+        return AVERROR(EINVAL);
+    } else if (ret != VK_SUCCESS) {
+        av_log(avctx, AV_LOG_ERROR, "Unable to get Vulkan format properties: %s!\n",
+               ff_vk_ret2str(ret));
+        av_free(ret_info);
+        return AVERROR_EXTERNAL;
+    }
+
+    av_log(avctx, AV_LOG_VERBOSE, "Supported input formats:\n");
+    for (i = 0; i < nb_out_fmts; i++)
+        av_log(avctx, AV_LOG_VERBOSE, "    %i: %i\n", i, ret_info[i].format);
+
+    for (i = 0; i < nb_out_fmts; i++) {
+        if (ff_vk_pix_fmt_from_vkfmt(ret_info[i].format) == s->frames->sw_format) {
+            ctx->pic_format = ret_info[i].format;
+            break;
+        }
+    }
+
+    av_free(ret_info);
+
+    if (i == nb_out_fmts) {
+        av_log(avctx, AV_LOG_ERROR, "Pixel format %s of input frames not supported!\n",
+               av_get_pix_fmt_name(s->frames->sw_format));
+        return AVERROR(EINVAL);
+    }
+
+    /* Create session */
+    session_create.pVideoProfile = &ctx->profile;
+    session_create.flags = 0x0;
+    session_create.queueFamilyIndex = ctx->qf_enc.queue_family;
+    session_create.maxCodedExtent = ctx->caps.maxCodedExtent;
+    session_create.maxDpbSlots = ctx->caps.maxDpbSlots;
+    session_create.maxActiveReferencePictures = ctx->caps.maxActiveReferencePictures;
+    session_create.pictureFormat = ctx->pic_format;
+    session_create.referencePictureFormat = session_create.pictureFormat;
+    session_create.pStdHeaderVersion = &vk_desc->ext_props;
+
+    err = ff_vk_video_common_init(avctx, s, &ctx->common, &session_create);
+    if (err < 0)
+        return err;
+
+    err = ff_hw_base_encode_init(avctx, &ctx->base);
+    if (err < 0)
+        return err;
+
+    err = vulkan_encode_create_dpb(avctx, ctx);
+    if (err < 0)
+        return err;
+
+    base_ctx->async_encode = 1;
+    base_ctx->encode_fifo = av_fifo_alloc2(base_ctx->async_depth,
+                                           sizeof(FFVulkanEncodePicture *), 0);
+    if (!base_ctx->encode_fifo)
+        return AVERROR(ENOMEM);
+
+    return 0;
+}
diff --git a/libavcodec/vulkan_encode.h b/libavcodec/vulkan_encode.h
new file mode 100644
index 0000000000..995befeca2
--- /dev/null
+++ b/libavcodec/vulkan_encode.h
@@ -0,0 +1,243 @@ 
+/*
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#ifndef AVCODEC_VULKAN_ENCODE_H
+#define AVCODEC_VULKAN_ENCODE_H
+
+#include "codec_id.h"
+#include "internal.h"
+
+#include "encode.h"
+#include "hwconfig.h"
+
+#include "vulkan_video.h"
+#include "hw_base_encode.h"
+
+typedef struct FFVulkanEncodeDescriptor {
+    enum AVCodecID                   codec_id;
+    FFVulkanExtensions               encode_extension;
+    VkVideoCodecOperationFlagBitsKHR encode_op;
+
+    VkExtensionProperties ext_props;
+} FFVulkanEncodeDescriptor;
+
+typedef struct FFVulkanEncodePicture {
+    FFHWBaseEncodePicture  base;
+    VkVideoPictureResourceInfoKHR dpb_res;
+    VkVideoReferenceSlotInfoKHR dpb_slot;
+
+    struct {
+        VkImageView        view;
+        VkImageAspectFlags aspect;
+    } in;
+
+    struct {
+        VkImageView        view;
+        VkImageAspectFlags aspect;
+    } dpb;
+
+    void                  *codec_layer;
+    void                  *codec_rc_layer;
+
+    FFVkExecContext       *exec;
+    AVBufferRef           *pkt_buf;
+    int                    slices_offset;
+} FFVulkanEncodePicture;
+
+/**
+ * Callback for writing stream-level headers.
+ */
+typedef int (*vkenc_cb_write_stream_headers)(AVCodecContext *avctx,
+                                             uint8_t *data, size_t *data_len);
+
+/**
+ * Callback for initializing codec-specific picture headers.
+ */
+typedef int (*vkenc_cb_init_pic_headers)(AVCodecContext *avctx,
+                                         FFVulkanEncodePicture *pic);
+
+/**
+ * Callback for writing alignment data.
+ * Align is the value to align offset to.
+ */
+typedef int (*vkenc_cb_write_filler)(AVCodecContext *avctx, uint32_t filler,
+                                     uint8_t *data, size_t *data_len);
+
+/**
+ * Callback for writing any extra units requested. data_len must be set
+ * to the available size, and its value will be overwritten by the #bytes written
+ * to the output buffer.
+ */
+typedef int (*vkenc_cb_write_extra_headers)(AVCodecContext *avctx,
+                                            FFVulkanEncodePicture *pic,
+                                            uint8_t *data, size_t *data_len);
+
+typedef struct FFVulkanCodec {
+    /**
+     * Codec feature flags.
+     */
+    int flags;
+/* Codec output packet without timestamp delay, which means the
+ * output packet has same PTS and DTS. For AV1. */
+#define VK_ENC_FLAG_NO_DELAY 1 << 6
+
+    /**
+     * Size of the codec-specific picture struct.
+     */
+    size_t picture_priv_data_size;
+
+    /**
+     * Size of the filler header.
+     */
+    size_t filler_header_size;
+
+    /**
+     * Initialize codec-specific structs in a Vulkan profile.
+     */
+    int (*init_profile)(AVCodecContext *avctx, VkVideoProfileInfoKHR *profile,
+                        void *pnext);
+
+    /**
+     * Initialize codec-specific rate control structures for a picture.
+     */
+    int (*init_pic_rc)(AVCodecContext *avctx, FFHWBaseEncodePicture *pic,
+                       VkVideoEncodeRateControlInfoKHR *rc_info,
+                       VkVideoEncodeRateControlLayerInfoKHR *rc_layer);
+
+    /**
+     * Initialize codec-specific picture parameters.
+     */
+    int (*init_pic_params)(AVCodecContext *avctx, FFHWBaseEncodePicture *pic,
+                           VkVideoEncodeInfoKHR *encode_info);
+
+    /**
+     * Callback for writing stream headers.
+     */
+    int (*write_sequence_headers)(AVCodecContext *avctx,
+                                  FFHWBaseEncodePicture *base_pic,
+                                  uint8_t *data, size_t *data_len);
+
+    /**
+     * Callback for writing alignment data.
+     */
+    int (*write_filler)(AVCodecContext *avctx, uint32_t filler,
+                        uint8_t *data, size_t *data_len);
+
+    /**
+     * Callback for writing any extra units requested. data_len must be set
+     * to the available size, and its value will be overwritten by the #bytes written
+     * to the output buffer.
+     */
+    int (*write_extra_headers)(AVCodecContext *avctx, FFHWBaseEncodePicture *pic,
+                               uint8_t *data, size_t *data_len);
+} FFVulkanCodec;
+
+typedef struct FFVkEncodeCommonOptions {
+    int qp;
+    int quality;
+    int profile;
+    int level;
+    int async_depth;
+    VkVideoEncodeUsageFlagBitsKHR usage;
+    VkVideoEncodeContentFlagBitsKHR content;
+    VkVideoEncodeTuningModeKHR tune;
+
+    VkVideoEncodeRateControlModeFlagBitsKHR rc_mode;
+#define FF_VK_RC_MODE_AUTO 0xFFFFFFFF
+} FFVkEncodeCommonOptions;
+
+typedef struct FFVulkanEncodeContext {
+    FFVulkanContext s;
+    FFVkVideoCommon common;
+    FFHWBaseEncodeContext base;
+    const FFVulkanCodec *codec;
+
+    /* Session parameters object, initialized by each codec independently
+     * and set here. */
+    VkVideoSessionParametersKHR session_params;
+
+    AVBufferPool *buf_pool;
+
+    VkFormat pic_format;
+
+    FFVkEncodeCommonOptions opts;
+
+    VkVideoProfileInfoKHR profile;
+    VkVideoProfileListInfoKHR profile_list;
+    VkVideoCapabilitiesKHR caps;
+    VkVideoEncodeQualityLevelPropertiesKHR quality_props;
+    VkVideoEncodeCapabilitiesKHR enc_caps;
+    VkVideoEncodeUsageInfoKHR usage_info;
+
+    FFVkQueueFamilyCtx qf_enc;
+    FFVkExecPool enc_pool;
+
+    uint32_t slots[32];
+} FFVulkanEncodeContext;
+
+#define VULKAN_ENCODE_COMMON_OPTIONS \
+    { "qp", "Use an explicit constant quantizer for the whole stream", OFFSET(common.opts.qp), AV_OPT_TYPE_INT, { .i64 = -1 }, -1, 255, FLAGS }, \
+    { "quality", "Set encode quality (trades off against speed, higher is faster)", OFFSET(common.opts.quality), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, INT_MAX, FLAGS }, \
+    { "rc_mode", "Select rate control type", OFFSET(common.opts.rc_mode), AV_OPT_TYPE_INT, { .i64 = FF_VK_RC_MODE_AUTO }, 0, FF_VK_RC_MODE_AUTO, FLAGS, "rc_mode" }, \
+        { "auto",    "Choose mode automatically based on parameters", 0, AV_OPT_TYPE_CONST, { .i64 = FF_VK_RC_MODE_AUTO }, INT_MIN, INT_MAX, FLAGS, "rc_mode" }, \
+        { "driver",  "Driver-specific rate control", 0, AV_OPT_TYPE_CONST, { .i64 = VK_VIDEO_ENCODE_RATE_CONTROL_MODE_DEFAULT_KHR      }, INT_MIN, INT_MAX, FLAGS, "rc_mode" }, \
+        { "cqp",     "Constant quantizer mode", 0, AV_OPT_TYPE_CONST, { .i64 = VK_VIDEO_ENCODE_RATE_CONTROL_MODE_DISABLED_BIT_KHR }, INT_MIN, INT_MAX, FLAGS, "rc_mode" }, \
+        { "cbr",     "Constant bitrate mode", 0, AV_OPT_TYPE_CONST, { .i64 = VK_VIDEO_ENCODE_RATE_CONTROL_MODE_CBR_BIT_KHR      }, INT_MIN, INT_MAX, FLAGS, "rc_mode" }, \
+        { "vbr",     "Variable bitrate mode", 0, AV_OPT_TYPE_CONST, { .i64 = VK_VIDEO_ENCODE_RATE_CONTROL_MODE_VBR_BIT_KHR      }, INT_MIN, INT_MAX, FLAGS, "rc_mode" }, \
+    { "tune", "Select tuning type", OFFSET(common.opts.tune), AV_OPT_TYPE_INT, { .i64 = VK_VIDEO_ENCODE_TUNING_MODE_DEFAULT_KHR }, 0, INT_MAX, FLAGS, "tune" }, \
+        { "default",  "Default tuning", 0, AV_OPT_TYPE_CONST, { .i64 = VK_VIDEO_ENCODE_TUNING_MODE_DEFAULT_KHR           }, INT_MIN, INT_MAX, FLAGS, "tune" }, \
+        { "hq",       "High quality tuning", 0, AV_OPT_TYPE_CONST, { .i64 = VK_VIDEO_ENCODE_TUNING_MODE_HIGH_QUALITY_KHR      }, INT_MIN, INT_MAX, FLAGS, "tune" }, \
+        { "ll",       "Low-latency tuning", 0, AV_OPT_TYPE_CONST, { .i64 = VK_VIDEO_ENCODE_TUNING_MODE_LOW_LATENCY_KHR       }, INT_MIN, INT_MAX, FLAGS, "tune" }, \
+        { "ull",      "Ultra low-latency tuning", 0, AV_OPT_TYPE_CONST, { .i64 = VK_VIDEO_ENCODE_TUNING_MODE_ULTRA_LOW_LATENCY_KHR }, INT_MIN, INT_MAX, FLAGS, "tune" }, \
+        { "lossless", "Lossless mode tuning", 0, AV_OPT_TYPE_CONST, { .i64 = VK_VIDEO_ENCODE_TUNING_MODE_LOSSLESS_KHR          }, INT_MIN, INT_MAX, FLAGS, "tune" }, \
+    { "usage", "Select usage type", OFFSET(common.opts.usage), AV_OPT_TYPE_FLAGS, { .i64 = VK_VIDEO_ENCODE_USAGE_DEFAULT_KHR }, 0, INT_MAX, FLAGS, "usage" }, \
+        { "default",    "Default optimizations", 0, AV_OPT_TYPE_CONST, { .i64 = VK_VIDEO_ENCODE_USAGE_DEFAULT_KHR          }, INT_MIN, INT_MAX, FLAGS, "usage" }, \
+        { "transcode",  "Optimize for transcoding", 0, AV_OPT_TYPE_CONST, { .i64 = VK_VIDEO_ENCODE_USAGE_TRANSCODING_BIT_KHR  }, INT_MIN, INT_MAX, FLAGS, "usage" }, \
+        { "stream",     "Optimize for streaming", 0, AV_OPT_TYPE_CONST, { .i64 = VK_VIDEO_ENCODE_USAGE_STREAMING_BIT_KHR    }, INT_MIN, INT_MAX, FLAGS, "usage" }, \
+        { "record",     "Optimize for offline recording", 0, AV_OPT_TYPE_CONST, { .i64 = VK_VIDEO_ENCODE_USAGE_RECORDING_BIT_KHR    }, INT_MIN, INT_MAX, FLAGS, "usage" }, \
+        { "conference", "Optimize for teleconferencing", 0, AV_OPT_TYPE_CONST, { .i64 = VK_VIDEO_ENCODE_USAGE_CONFERENCING_BIT_KHR }, INT_MIN, INT_MAX, FLAGS, "usage" }, \
+    { "content", "Select content type", OFFSET(common.opts.content), AV_OPT_TYPE_FLAGS, { .i64 = VK_VIDEO_ENCODE_CONTENT_DEFAULT_KHR }, 0, INT_MAX, FLAGS, "content" }, \
+        { "default",  "Default content", 0, AV_OPT_TYPE_CONST, { .i64 = VK_VIDEO_ENCODE_CONTENT_DEFAULT_KHR      }, INT_MIN, INT_MAX, FLAGS, "content" }, \
+        { "camera",   "Camera footage", 0, AV_OPT_TYPE_CONST, { .i64 = VK_VIDEO_ENCODE_CONTENT_CAMERA_BIT_KHR   }, INT_MIN, INT_MAX, FLAGS, "content" }, \
+        { "desktop",  "Screen recording", 0, AV_OPT_TYPE_CONST, { .i64 = VK_VIDEO_ENCODE_CONTENT_DESKTOP_BIT_KHR  }, INT_MIN, INT_MAX, FLAGS, "content" }, \
+        { "rendered", "Game or 3D content", 0, AV_OPT_TYPE_CONST, { .i64 = VK_VIDEO_ENCODE_CONTENT_RENDERED_BIT_KHR }, INT_MIN, INT_MAX, FLAGS, "content" }
+
+/**
+ * Initialize encoder.
+ */
+av_cold int ff_vulkan_encode_init(AVCodecContext *avctx, FFVulkanEncodeContext *ctx,
+                                  const FFVulkanEncodeDescriptor *vk_desc,
+                                  const FFVulkanCodec *codec,
+                                  void *codec_caps, void *quality_pnext);
+
+/**
+ * Encode.
+ */
+int ff_vulkan_encode_receive_packet(AVCodecContext *avctx, AVPacket *pkt);
+
+/**
+ * Uninitialize encoder.
+ */
+void ff_vulkan_encode_uninit(FFVulkanEncodeContext *ctx);
+
+/**
+ * Paperwork.
+ */
+extern const AVCodecHWConfigInternal *const ff_vulkan_encode_hw_configs[];
+
+#endif /* AVCODEC_VULKAN_ENCODE_H */