diff mbox

[FFmpeg-devel,6/6] lavfi: add a Vulkan overlay filter

Message ID 20180621165543.5198-7-atomnuker@gmail.com
State New
Headers show

Commit Message

Rostislav Pehlivanov June 21, 2018, 4:55 p.m. UTC
Could be done in-plane with the main image but framesync segfaults.

Signed-off-by: Rostislav Pehlivanov <atomnuker@gmail.com>
---
 configure                       |   1 +
 libavfilter/Makefile            |   1 +
 libavfilter/allfilters.c        |   1 +
 libavfilter/vf_overlay_vulkan.c | 461 ++++++++++++++++++++++++++++++++
 4 files changed, 464 insertions(+)
 create mode 100644 libavfilter/vf_overlay_vulkan.c

Comments

Mark Thompson Sept. 2, 2018, 9:50 p.m. UTC | #1
On 21/06/18 17:55, Rostislav Pehlivanov wrote:
> Could be done in-plane with the main image but framesync segfaults.

Is this framesync not working with hardware frames, so it tries to clone an image but can't?

(I have had vague plans for a while to clean that up by adding an av_hwframe_clone() call to hwcontext and making the read/write refcounting work as expected, but there are few real use-cases for it so have yet to bother.)

> Signed-off-by: Rostislav Pehlivanov <atomnuker@gmail.com>
> ---
>  configure                       |   1 +
>  libavfilter/Makefile            |   1 +
>  libavfilter/allfilters.c        |   1 +
>  libavfilter/vf_overlay_vulkan.c | 461 ++++++++++++++++++++++++++++++++
>  4 files changed, 464 insertions(+)
>  create mode 100644 libavfilter/vf_overlay_vulkan.c
> 
> diff --git a/configure b/configure
> index d1ceb9e38d..2edd4e36aa 100755
> --- a/configure
> +++ b/configure
> @@ -3370,6 +3370,7 @@ ocr_filter_deps="libtesseract"
>  ocv_filter_deps="libopencv"
>  openclsrc_filter_deps="opencl"
>  overlay_opencl_filter_deps="opencl"
> +overlay_vulkan_filter_deps="vulkan libshaderc"

'q' < 'v'

>  overlay_qsv_filter_deps="libmfx"
>  overlay_qsv_filter_select="qsvvpp"
>  owdenoise_filter_deps="gpl"
> diff --git a/libavfilter/Makefile b/libavfilter/Makefile
> index 6e2d3681ec..c51add5cb4 100644
> --- a/libavfilter/Makefile
> +++ b/libavfilter/Makefile
> @@ -280,6 +280,7 @@ OBJS-$(CONFIG_OSCILLOSCOPE_FILTER)           += vf_datascope.o
>  OBJS-$(CONFIG_OVERLAY_FILTER)                += vf_overlay.o framesync.o
>  OBJS-$(CONFIG_OVERLAY_OPENCL_FILTER)         += vf_overlay_opencl.o opencl.o \
>                                                  opencl/overlay.o framesync.o
> +OBJS-$(CONFIG_OVERLAY_VULKAN_FILTER)         += vf_overlay_vulkan.o

Also here.

>  OBJS-$(CONFIG_OVERLAY_QSV_FILTER)            += vf_overlay_qsv.o framesync.o
>  OBJS-$(CONFIG_OWDENOISE_FILTER)              += vf_owdenoise.o
>  OBJS-$(CONFIG_PAD_FILTER)                    += vf_pad.o
> diff --git a/libavfilter/allfilters.c b/libavfilter/allfilters.c
> index ee58cc9eee..7c9ff0ab41 100644
> --- a/libavfilter/allfilters.c
> +++ b/libavfilter/allfilters.c
> @@ -269,6 +269,7 @@ extern AVFilter ff_vf_ocv;
>  extern AVFilter ff_vf_oscilloscope;
>  extern AVFilter ff_vf_overlay;
>  extern AVFilter ff_vf_overlay_opencl;
> +extern AVFilter ff_vf_overlay_vulkan;

And here.

>  extern AVFilter ff_vf_overlay_qsv;
>  extern AVFilter ff_vf_owdenoise;
>  extern AVFilter ff_vf_pad;
> diff --git a/libavfilter/vf_overlay_vulkan.c b/libavfilter/vf_overlay_vulkan.c
> new file mode 100644
> index 0000000000..a7d8cc3cf2
> --- /dev/null
> +++ b/libavfilter/vf_overlay_vulkan.c

Code all looks fine.

Are you planning to add alpha as well?
diff mbox

Patch

diff --git a/configure b/configure
index d1ceb9e38d..2edd4e36aa 100755
--- a/configure
+++ b/configure
@@ -3370,6 +3370,7 @@  ocr_filter_deps="libtesseract"
 ocv_filter_deps="libopencv"
 openclsrc_filter_deps="opencl"
 overlay_opencl_filter_deps="opencl"
+overlay_vulkan_filter_deps="vulkan libshaderc"
 overlay_qsv_filter_deps="libmfx"
 overlay_qsv_filter_select="qsvvpp"
 owdenoise_filter_deps="gpl"
diff --git a/libavfilter/Makefile b/libavfilter/Makefile
index 6e2d3681ec..c51add5cb4 100644
--- a/libavfilter/Makefile
+++ b/libavfilter/Makefile
@@ -280,6 +280,7 @@  OBJS-$(CONFIG_OSCILLOSCOPE_FILTER)           += vf_datascope.o
 OBJS-$(CONFIG_OVERLAY_FILTER)                += vf_overlay.o framesync.o
 OBJS-$(CONFIG_OVERLAY_OPENCL_FILTER)         += vf_overlay_opencl.o opencl.o \
                                                 opencl/overlay.o framesync.o
+OBJS-$(CONFIG_OVERLAY_VULKAN_FILTER)         += vf_overlay_vulkan.o
 OBJS-$(CONFIG_OVERLAY_QSV_FILTER)            += vf_overlay_qsv.o framesync.o
 OBJS-$(CONFIG_OWDENOISE_FILTER)              += vf_owdenoise.o
 OBJS-$(CONFIG_PAD_FILTER)                    += vf_pad.o
diff --git a/libavfilter/allfilters.c b/libavfilter/allfilters.c
index ee58cc9eee..7c9ff0ab41 100644
--- a/libavfilter/allfilters.c
+++ b/libavfilter/allfilters.c
@@ -269,6 +269,7 @@  extern AVFilter ff_vf_ocv;
 extern AVFilter ff_vf_oscilloscope;
 extern AVFilter ff_vf_overlay;
 extern AVFilter ff_vf_overlay_opencl;
+extern AVFilter ff_vf_overlay_vulkan;
 extern AVFilter ff_vf_overlay_qsv;
 extern AVFilter ff_vf_owdenoise;
 extern AVFilter ff_vf_pad;
diff --git a/libavfilter/vf_overlay_vulkan.c b/libavfilter/vf_overlay_vulkan.c
new file mode 100644
index 0000000000..a7d8cc3cf2
--- /dev/null
+++ b/libavfilter/vf_overlay_vulkan.c
@@ -0,0 +1,461 @@ 
+/*
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include "libavutil/opt.h"
+#include "vulkan.h"
+#include "internal.h"
+#include "framesync.h"
+
+typedef struct OverlayVulkanContext {
+    VulkanFilterContext vkctx;
+
+    int initialized;
+    FFVkExecContext exec;
+    FFFrameSync fs;
+    FFVkBuffer params_buf;
+
+    /* Shader updators, must be in the main filter struct */
+    VkDescriptorImageInfo main_images[3];
+    VkDescriptorImageInfo overlay_images[3];
+    VkDescriptorImageInfo output_images[3];
+    VkDescriptorBufferInfo params_desc;
+
+    int overlay_x;
+    int overlay_y;
+} OverlayVulkanContext;
+
+static const char overlay_noalpha[] = {
+    C(0, void overlay_noalpha(int i, ivec2 pos)                                )
+    C(0, {                                                                     )
+    C(1,     ivec2 overlay_size = imageSize(overlay_img[i]);                   )
+    C(1,     if ((o_offset[i].x <= pos.x) && (o_offset[i].y <= pos.y) &&
+                 (pos.x < (o_offset[i].x + overlay_size.x)) &&
+                 (pos.y < (o_offset[i].y + overlay_size.y))) {                 )
+    C(2,         vec4 res = imageLoad(overlay_img[i], pos - o_offset[i]);      )
+    C(2,         imageStore(output_img[i], pos, res);                          )
+    C(1,     } else {                                                          )
+    C(2,         vec4 res = imageLoad(main_img[i], pos);                       )
+    C(2,         imageStore(output_img[i], pos, res);                          )
+    C(1,     }                                                                 )
+    C(0, }                                                                     )
+};
+
+static av_cold int init_filter(AVFilterContext *ctx)
+{
+    int err;
+    OverlayVulkanContext *s = ctx->priv;
+
+    { /* Create the shader */
+        const int planes = av_pix_fmt_count_planes(s->vkctx.output_format);
+
+        SPIRVShader *shd = ff_vk_init_shader(ctx, "overlay_compute",
+                                             VK_SHADER_STAGE_COMPUTE_BIT);
+        ff_vk_set_compute_shader_sizes(ctx, shd, (int [3]){ 16, 16, 1 });
+
+        VulkanDescriptorSetBinding desc_i[3] = {
+            {
+                .name       = "main_img",
+                .type       = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE,
+                .mem_layout = ff_vk_shader_rep_fmt(s->vkctx.output_format),
+                .mem_quali  = "readonly",
+                .dimensions = 2,
+                .elems      = planes,
+                .stages     = VK_SHADER_STAGE_COMPUTE_BIT,
+                .updater    = s->main_images,
+            },
+            {
+                .name       = "overlay_img",
+                .type       = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE,
+                .mem_layout = ff_vk_shader_rep_fmt(s->vkctx.output_format),
+                .mem_quali  = "readonly",
+                .dimensions = 2,
+                .elems      = planes,
+                .stages     = VK_SHADER_STAGE_COMPUTE_BIT,
+                .updater    = s->overlay_images,
+            },
+            {
+                .name       = "output_img",
+                .type       = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE,
+                .mem_layout = ff_vk_shader_rep_fmt(s->vkctx.output_format),
+                .mem_quali  = "writeonly",
+                .dimensions = 2,
+                .elems      = planes,
+                .stages     = VK_SHADER_STAGE_COMPUTE_BIT,
+                .updater    = s->output_images,
+            },
+        };
+
+        VulkanDescriptorSetBinding desc_b = {
+            .name        = "params",
+            .type        = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,
+            .mem_quali   = "readonly",
+            .mem_layout  = "std430",
+            .stages      = VK_SHADER_STAGE_COMPUTE_BIT,
+            .updater     = &s->params_desc,
+            .buf_content = "ivec2 o_offset[3];",
+        };
+
+        RET(ff_vk_add_descriptor_set(ctx, shd,  desc_i, 3, 0)); /* set 0 */
+        RET(ff_vk_add_descriptor_set(ctx, shd, &desc_b, 1, 0)); /* set 1 */
+
+        GLSLD(   overlay_noalpha                                              );
+        GLSLC(0, void main()                                                  );
+        GLSLC(0, {                                                            );
+        GLSLC(1,     ivec2 pos = ivec2(gl_GlobalInvocationID.xy);             );
+        GLSLF(1,     int planes = %i;                                  ,planes);
+        GLSLC(1,     for (int i = 0; i < planes; i++) {                       );
+        GLSLC(2,         overlay_noalpha(i, pos);                             );
+        GLSLC(1,     }                                                        );
+        GLSLC(0, }                                                            );
+
+        RET(ff_vk_compile_shader(ctx, shd, "main"));
+    }
+
+    RET(ff_vk_init_pipeline_layout(ctx));
+
+    {
+        const AVPixFmtDescriptor *desc;
+        struct {
+            int32_t o_offset[2*3];
+        } *par;
+
+        err = ff_vk_create_buf(ctx, &s->params_buf,
+                               sizeof(*par),
+                               VK_BUFFER_USAGE_VERTEX_BUFFER_BIT,
+                               VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT);
+        if (err)
+            return err;
+
+        err = ff_vk_map_buffers(ctx, &s->params_buf, (uint8_t **)&par, 1, 0);
+        if (err)
+            return err;
+
+        desc = av_pix_fmt_desc_get(s->vkctx.output_format);
+
+        par->o_offset[0] = s->overlay_x;
+        par->o_offset[1] = s->overlay_y;
+        par->o_offset[2] = par->o_offset[0] >> desc->log2_chroma_w;
+        par->o_offset[3] = par->o_offset[1] >> desc->log2_chroma_h;
+        par->o_offset[4] = par->o_offset[0] >> desc->log2_chroma_w;
+        par->o_offset[5] = par->o_offset[1] >> desc->log2_chroma_h;
+
+        err = ff_vk_unmap_buffers(ctx, &s->params_buf, 1, 1);
+        if (err)
+            return err;
+
+        s->params_desc.buffer = s->params_buf.buf;
+        s->params_desc.range  = VK_WHOLE_SIZE;
+
+        ff_vk_update_descriptor_set(ctx, 1);
+    }
+
+    /* Execution context */
+    RET(ff_vk_create_exec_ctx(ctx, &s->exec,
+                              s->vkctx.hwctx->queue_family_comp_index));
+
+    /* The pipeline */
+    RET(ff_vk_init_compute_pipeline(ctx));
+
+    s->initialized = 1;
+
+    return 0;
+
+fail:
+    return err;
+}
+
+static int process_frames(AVFilterContext *avctx, AVFrame *out_f,
+                          AVFrame *main_f, AVFrame *overlay_f)
+{
+    int err;
+    OverlayVulkanContext *s = avctx->priv;
+    int planes = av_pix_fmt_count_planes(s->vkctx.output_format);
+
+    AVVkFrame *out     = (AVVkFrame *)out_f->data[0];
+    AVVkFrame *main    = (AVVkFrame *)main_f->data[0];
+    AVVkFrame *overlay = (AVVkFrame *)overlay_f->data[0];
+
+    AVHWFramesContext *main_fc = (AVHWFramesContext*)main_f->hw_frames_ctx->data;
+    AVHWFramesContext *overlay_fc = (AVHWFramesContext*)overlay_f->hw_frames_ctx->data;
+
+    VkCommandBufferBeginInfo cmd_start = {
+        .sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_BEGIN_INFO,
+        .flags = VK_COMMAND_BUFFER_USAGE_ONE_TIME_SUBMIT_BIT,
+    };
+
+    VkComponentMapping null_map = {
+        .r = VK_COMPONENT_SWIZZLE_IDENTITY,
+        .g = VK_COMPONENT_SWIZZLE_IDENTITY,
+        .b = VK_COMPONENT_SWIZZLE_IDENTITY,
+        .a = VK_COMPONENT_SWIZZLE_IDENTITY,
+    };
+
+    for (int i = 0; i < planes; i++) {
+        RET(ff_vk_create_imageview(avctx, &s->main_images[i].imageView, main,
+                                   ff_vk_plane_rep_fmt(main_fc->sw_format, i),
+                                   ff_vk_aspect_flags(main_fc->sw_format, i),
+                                   null_map, NULL));
+
+        RET(ff_vk_create_imageview(avctx, &s->overlay_images[i].imageView, overlay,
+                                   ff_vk_plane_rep_fmt(overlay_fc->sw_format, i),
+                                   ff_vk_aspect_flags(overlay_fc->sw_format, i),
+                                   null_map, NULL));
+
+        RET(ff_vk_create_imageview(avctx, &s->output_images[i].imageView, out,
+                                   ff_vk_plane_rep_fmt(s->vkctx.output_format, i),
+                                   ff_vk_aspect_flags(s->vkctx.output_format, i),
+                                   null_map, NULL));
+
+        s->main_images[i].imageLayout    = VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL;
+        s->overlay_images[i].imageLayout = VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL;
+        s->output_images[i].imageLayout  = VK_IMAGE_LAYOUT_GENERAL;
+    }
+
+    ff_vk_update_descriptor_set(avctx, 0);
+
+    vkBeginCommandBuffer(s->exec.buf, &cmd_start);
+
+    {
+        VkImageMemoryBarrier bar[3] = {
+            {
+                .sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER,
+                .srcAccessMask = 0,
+                .dstAccessMask = VK_ACCESS_SHADER_READ_BIT,
+                .oldLayout = main->layout,
+                .newLayout = VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL,
+                .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
+                .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
+                .image = main->img,
+                .subresourceRange.aspectMask = ff_vk_aspect_flags(main_fc->sw_format, -1),
+                .subresourceRange.levelCount = 1,
+                .subresourceRange.layerCount = 1,
+            },
+            {
+                .sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER,
+                .srcAccessMask = 0,
+                .dstAccessMask = VK_ACCESS_SHADER_READ_BIT,
+                .oldLayout = overlay->layout,
+                .newLayout = VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL,
+                .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
+                .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
+                .image = overlay->img,
+                .subresourceRange.aspectMask = ff_vk_aspect_flags(overlay_fc->sw_format, -1),
+                .subresourceRange.levelCount = 1,
+                .subresourceRange.layerCount = 1,
+            },
+            {
+                .sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER,
+                .srcAccessMask = 0,
+                .dstAccessMask = VK_ACCESS_SHADER_WRITE_BIT,
+                .oldLayout = out->layout,
+                .newLayout = VK_IMAGE_LAYOUT_GENERAL,
+                .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
+                .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
+                .image = out->img,
+                .subresourceRange.aspectMask = ff_vk_aspect_flags(s->vkctx.output_format, -1),
+                .subresourceRange.levelCount = 1,
+                .subresourceRange.layerCount = 1,
+            },
+        };
+
+        vkCmdPipelineBarrier(s->exec.buf, VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT,
+                            VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, 0,
+                            0, NULL, 0, NULL, 3, bar);
+
+        main->layout    = bar[0].newLayout;
+        main->access    = bar[0].dstAccessMask;
+
+        overlay->layout = bar[1].newLayout;
+        overlay->access = bar[1].dstAccessMask;
+
+        out->layout     = bar[2].newLayout;
+        out->access     = bar[2].dstAccessMask;
+    }
+
+    vkCmdBindPipeline(s->exec.buf, VK_PIPELINE_BIND_POINT_COMPUTE, s->vkctx.pipeline);
+    vkCmdBindDescriptorSets(s->exec.buf, VK_PIPELINE_BIND_POINT_COMPUTE, s->vkctx.pipeline_layout, 0, s->vkctx.descriptor_sets_num, s->vkctx.desc_set, 0, 0);
+    vkCmdDispatch(s->exec.buf,
+                  FFALIGN(s->vkctx.output_width,  s->vkctx.shaders[0].local_size[0])/s->vkctx.shaders[0].local_size[0],
+                  FFALIGN(s->vkctx.output_height, s->vkctx.shaders[0].local_size[1])/s->vkctx.shaders[0].local_size[1], 1);
+
+    vkEndCommandBuffer(s->exec.buf);
+
+    VkSubmitInfo s_info = {
+        .sType                = VK_STRUCTURE_TYPE_SUBMIT_INFO,
+        .commandBufferCount   = 1,
+        .pCommandBuffers      = &s->exec.buf,
+    };
+
+    VkResult ret = vkQueueSubmit(s->exec.queue, 1, &s_info, s->exec.fence);
+    if (ret != VK_SUCCESS) {
+        av_log(avctx, AV_LOG_ERROR, "Unable to submit command buffer: %s\n",
+               ff_vk_ret2str(ret));
+        return AVERROR_EXTERNAL;
+    } else {
+        vkWaitForFences(s->vkctx.hwctx->act_dev, 1, &s->exec.fence, VK_TRUE, UINT64_MAX);
+        vkResetFences(s->vkctx.hwctx->act_dev, 1, &s->exec.fence);
+    }
+
+fail:
+
+    for (int i = 0; i < planes; i++) {
+        ff_vk_destroy_imageview(avctx, s->main_images[i].imageView);
+        ff_vk_destroy_imageview(avctx, s->overlay_images[i].imageView);
+        ff_vk_destroy_imageview(avctx, s->output_images[i].imageView);
+    }
+
+    return err;
+}
+
+static int overlay_vulkan_blend(FFFrameSync *fs)
+{
+    int err;
+    AVFilterContext *ctx = fs->parent;
+    OverlayVulkanContext *s = ctx->priv;
+    AVFilterLink *outlink = ctx->outputs[0];
+    AVFrame *input_main, *input_overlay, *out;
+
+    err = ff_framesync_get_frame(fs, 0, &input_main, 0);
+    if (err < 0)
+        goto fail;
+    err = ff_framesync_get_frame(fs, 1, &input_overlay, 0);
+    if (err < 0)
+        goto fail;
+
+    if (!input_main || !input_overlay)
+        return 0;
+
+    if (!s->initialized) {
+        AVHWFramesContext *main_fc = (AVHWFramesContext*)input_main->hw_frames_ctx->data;
+        AVHWFramesContext *overlay_fc = (AVHWFramesContext*)input_overlay->hw_frames_ctx->data;
+        if (main_fc->sw_format != overlay_fc->sw_format) {
+            av_log(ctx, AV_LOG_ERROR, "Mismatching sw formats!\n");
+            return AVERROR(EINVAL);
+        }
+        RET(init_filter(ctx));
+    }
+
+    out = ff_get_video_buffer(outlink, outlink->w, outlink->h);
+    if (!out) {
+        err = AVERROR(ENOMEM);
+        goto fail;
+    }
+
+    RET(process_frames(ctx, out, input_main, input_overlay));
+
+    err = av_frame_copy_props(out, input_main);
+    if (err < 0)
+        goto fail;
+
+    return ff_filter_frame(outlink, out);
+
+fail:
+    av_frame_free(&out);
+    return err;
+}
+
+static int overlay_vulkan_config_output(AVFilterLink *outlink)
+{
+    int err;
+    AVFilterContext *avctx = outlink->src;
+    OverlayVulkanContext *s = avctx->priv;
+
+    err = ff_vk_filter_config_output(outlink);
+    if (err < 0)
+        return err;
+
+    err = ff_framesync_init_dualinput(&s->fs, avctx);
+    if (err < 0)
+        return err;
+
+    return ff_framesync_configure(&s->fs);
+}
+
+static int overlay_vulkan_activate(AVFilterContext *avctx)
+{
+    OverlayVulkanContext *s = avctx->priv;
+
+    return ff_framesync_activate(&s->fs);
+}
+
+static av_cold int overlay_vulkan_init(AVFilterContext *avctx)
+{
+    OverlayVulkanContext *s = avctx->priv;
+
+    s->fs.on_event = &overlay_vulkan_blend;
+
+    return ff_vk_filter_init(avctx);
+}
+
+static void overlay_vulkan_uninit(AVFilterContext *avctx)
+{
+    OverlayVulkanContext *s = avctx->priv;
+
+    ff_vk_free_exec_ctx(avctx, &s->exec);
+    ff_vk_filter_uninit(avctx);
+    ff_framesync_uninit(&s->fs);
+
+    s->initialized = 0;
+}
+
+#define OFFSET(x) offsetof(OverlayVulkanContext, x)
+#define FLAGS (AV_OPT_FLAG_FILTERING_PARAM | AV_OPT_FLAG_VIDEO_PARAM)
+static const AVOption overlay_vulkan_options[] = {
+    { "x", "Set horizontal offset", OFFSET(overlay_x), AV_OPT_TYPE_INT, {.i64 = 0}, 0, INT_MAX, .flags = FLAGS },
+    { "y", "Set vertical offset",   OFFSET(overlay_y), AV_OPT_TYPE_INT, {.i64 = 0}, 0, INT_MAX, .flags = FLAGS },
+    { NULL },
+};
+
+AVFILTER_DEFINE_CLASS(overlay_vulkan);
+
+static const AVFilterPad overlay_vulkan_inputs[] = {
+    {
+        .name         = "main",
+        .type         = AVMEDIA_TYPE_VIDEO,
+        .config_props = &ff_vk_filter_config_input,
+    },
+    {
+        .name         = "overlay",
+        .type         = AVMEDIA_TYPE_VIDEO,
+        .config_props = &ff_vk_filter_config_input,
+    },
+    { NULL }
+};
+
+static const AVFilterPad overlay_vulkan_outputs[] = {
+    {
+        .name = "default",
+        .type = AVMEDIA_TYPE_VIDEO,
+        .config_props = &overlay_vulkan_config_output,
+    },
+    { NULL }
+};
+
+AVFilter ff_vf_overlay_vulkan = {
+    .name           = "overlay_vulkan",
+    .description    = NULL_IF_CONFIG_SMALL("Overlay a source on top of another"),
+    .priv_size      = sizeof(OverlayVulkanContext),
+    .init           = &overlay_vulkan_init,
+    .uninit         = &overlay_vulkan_uninit,
+    .query_formats  = &ff_vk_filter_query_formats,
+    .activate       = &overlay_vulkan_activate,
+    .inputs         = overlay_vulkan_inputs,
+    .outputs        = overlay_vulkan_outputs,
+    .priv_class     = &overlay_vulkan_class,
+    .flags_internal = FF_FILTER_FLAG_HWFRAME_AWARE,
+};