diff mbox series

[FFmpeg-devel] avfilter: add vf_overlay_videotoolbox

Message ID 20240221011806.2581-1-gnattuoc@me.com
State New
Headers show
Series [FFmpeg-devel] avfilter: add vf_overlay_videotoolbox | expand

Checks

Context Check Description
yinshiyou/make_loongarch64 success Make finished
yinshiyou/make_fate_loongarch64 success Make fate finished
andriy/make_x86 success Make finished
andriy/make_fate_x86 success Make fate finished

Commit Message

gnattu Feb. 21, 2024, 1:18 a.m. UTC
Overlay filter for VideoToolbox hwframes. Unlike most hardware
overlay filters, this filter does not require the two inputs to
have the same pixel format; instead, it will perform format
conversion automatically with hardware accelerated methods.

Signed-off-by: Gnattu OC <gnattuoc@me.com>
---
 Changelog                                     |   1 +
 configure                                     |   1 +
 libavfilter/Makefile                          |   3 +
 libavfilter/allfilters.c                      |   1 +
 libavfilter/metal/utils.h                     |   7 +
 libavfilter/metal/utils.m                     |  28 +
 .../metal/vf_overlay_videotoolbox.metal       |  58 ++
 libavfilter/vf_overlay_videotoolbox.m         | 504 ++++++++++++++++++
 8 files changed, 603 insertions(+)
 create mode 100644 libavfilter/metal/vf_overlay_videotoolbox.metal
 create mode 100644 libavfilter/vf_overlay_videotoolbox.m
diff mbox series

Patch

diff --git a/Changelog b/Changelog
index 610ee61dd6..3ecfdab81b 100644
--- a/Changelog
+++ b/Changelog
@@ -27,6 +27,7 @@  version <next>:
 - a C11-compliant compiler is now required; note that this requirement
   will be bumped to C17 in the near future, so consider updating your
   build environment if it lacks C17 support
+- VideoToolbox overlay filter
 
 version 6.1:
 - libaribcaption decoder
diff --git a/configure b/configure
index 23066efa32..a7c349d126 100755
--- a/configure
+++ b/configure
@@ -3807,6 +3807,7 @@  overlay_qsv_filter_deps="libmfx"
 overlay_qsv_filter_select="qsvvpp"
 overlay_vaapi_filter_deps="vaapi VAProcPipelineCaps_blend_flags"
 overlay_vulkan_filter_deps="vulkan spirv_compiler"
+overlay_videotoolbox_filter_deps="metal corevideo coreimage videotoolbox"
 owdenoise_filter_deps="gpl"
 pad_opencl_filter_deps="opencl"
 pan_filter_deps="swresample"
diff --git a/libavfilter/Makefile b/libavfilter/Makefile
index f6c1d641d6..330924fadf 100644
--- a/libavfilter/Makefile
+++ b/libavfilter/Makefile
@@ -413,6 +413,9 @@  OBJS-$(CONFIG_OVERLAY_OPENCL_FILTER)         += vf_overlay_opencl.o opencl.o \
 OBJS-$(CONFIG_OVERLAY_QSV_FILTER)            += vf_overlay_qsv.o framesync.o
 OBJS-$(CONFIG_OVERLAY_VAAPI_FILTER)          += vf_overlay_vaapi.o framesync.o vaapi_vpp.o
 OBJS-$(CONFIG_OVERLAY_VULKAN_FILTER)         += vf_overlay_vulkan.o vulkan.o vulkan_filter.o
+OBJS-$(CONFIG_OVERLAY_VIDEOTOOLBOX_FILTER)     += vf_overlay_videotoolbox.o \
+                                                metal/vf_overlay_videotoolbox.metallib.o \
+                                                metal/utils.o
 OBJS-$(CONFIG_OWDENOISE_FILTER)              += vf_owdenoise.o
 OBJS-$(CONFIG_PAD_FILTER)                    += vf_pad.o
 OBJS-$(CONFIG_PAD_OPENCL_FILTER)             += vf_pad_opencl.o opencl.o opencl/pad.o
diff --git a/libavfilter/allfilters.c b/libavfilter/allfilters.c
index 149bf50997..ec9d975ecb 100644
--- a/libavfilter/allfilters.c
+++ b/libavfilter/allfilters.c
@@ -389,6 +389,7 @@  extern const AVFilter ff_vf_overlay_qsv;
 extern const AVFilter ff_vf_overlay_vaapi;
 extern const AVFilter ff_vf_overlay_vulkan;
 extern const AVFilter ff_vf_overlay_cuda;
+extern const AVFilter ff_vf_overlay_videotoolbox;
 extern const AVFilter ff_vf_owdenoise;
 extern const AVFilter ff_vf_pad;
 extern const AVFilter ff_vf_pad_opencl;
diff --git a/libavfilter/metal/utils.h b/libavfilter/metal/utils.h
index 7350d42a35..a2142b6472 100644
--- a/libavfilter/metal/utils.h
+++ b/libavfilter/metal/utils.h
@@ -56,4 +56,11 @@  CVMetalTextureRef ff_metal_texture_from_pixbuf(void *avclass,
                                                MTLPixelFormat format)
                                                API_AVAILABLE(macos(10.11), ios(8.0));
 
+CVMetalTextureRef ff_metal_texture_from_non_planer_pixbuf(void *avclass,
+                                               CVMetalTextureCacheRef textureCache,
+                                               CVPixelBufferRef pixbuf,
+                                               int plane,
+                                               MTLPixelFormat format)
+API_AVAILABLE(macos(10.11), ios(8.0));
+
 #endif /* AVFILTER_METAL_UTILS_H */
diff --git a/libavfilter/metal/utils.m b/libavfilter/metal/utils.m
index f365d3ceea..b6a4ba16ff 100644
--- a/libavfilter/metal/utils.m
+++ b/libavfilter/metal/utils.m
@@ -74,3 +74,31 @@  CVMetalTextureRef ff_metal_texture_from_pixbuf(void *ctx,
 
     return tex;
 }
+
+CVMetalTextureRef ff_metal_texture_from_non_planer_pixbuf(void *ctx,
+                                               CVMetalTextureCacheRef textureCache,
+                                               CVPixelBufferRef pixbuf,
+                                               int plane,
+                                               MTLPixelFormat format)
+{
+    CVMetalTextureRef tex = NULL;
+    CVReturn ret;
+
+    ret = CVMetalTextureCacheCreateTextureFromImage(
+        NULL,
+        textureCache,
+        pixbuf,
+        NULL,
+        format,
+        CVPixelBufferGetWidth(pixbuf),
+        CVPixelBufferGetHeight(pixbuf),
+        plane,
+        &tex
+    );
+    if (ret != kCVReturnSuccess) {
+        av_log(ctx, AV_LOG_ERROR, "ff_metal_texture_from_non_planer_pixbuf Failed to create CVMetalTexture from image: %d\n", ret);
+        return NULL;
+    }
+
+    return tex;
+}
diff --git a/libavfilter/metal/vf_overlay_videotoolbox.metal b/libavfilter/metal/vf_overlay_videotoolbox.metal
new file mode 100644
index 0000000000..936e57e03e
--- /dev/null
+++ b/libavfilter/metal/vf_overlay_videotoolbox.metal
@@ -0,0 +1,58 @@ 
+/*
+ * Copyright (C) 2024 Gnattu OC <gnattuoc@me.com>
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include <metal_stdlib>
+#include <metal_integer>
+#include <metal_texture>
+
+using namespace metal;
+
+struct mtlBlendParams {
+    uint x_position;
+    uint y_position;
+};
+
+/*
+ * Blend shader for premultiplied alpha textures
+ */
+kernel void blend_shader(
+                         texture2d<float, access::read> source [[ texture(0) ]],
+                         texture2d<float, access::read> mask [[ texture(1) ]],
+                         texture2d<float, access::write> dest [[ texture(2) ]],
+                         constant mtlBlendParams& params [[ buffer(3) ]],
+                         uint2 gid [[ thread_position_in_grid ]])
+{
+    const auto mask_size = uint2(mask.get_width(),
+                                 mask.get_height());
+    const auto loc_overlay = uint2(params.x_position, params.y_position);
+    if (gid.x <  loc_overlay.x ||
+        gid.y <  loc_overlay.y ||
+        gid.x >= mask_size.x + loc_overlay.x ||
+        gid.y >= mask_size.y + loc_overlay.y)
+    {
+        float4 source_color = source.read(gid);
+        dest.write(source_color, gid);
+    } else {
+        float4 source_color = source.read(gid);
+        float4 mask_color = mask.read((gid - loc_overlay));
+        float4 result_color = source_color * (1.0f - mask_color.w) + (mask_color * mask_color.w);
+        dest.write(result_color, gid);
+    }
+}
diff --git a/libavfilter/vf_overlay_videotoolbox.m b/libavfilter/vf_overlay_videotoolbox.m
new file mode 100644
index 0000000000..e100523088
--- /dev/null
+++ b/libavfilter/vf_overlay_videotoolbox.m
@@ -0,0 +1,504 @@ 
+/*
+ * Copyright (C) 2024 Gnattu OC <gnattuoc@me.com>
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include <CoreImage/CoreImage.h>
+#include <VideoToolbox/VideoToolbox.h>
+#include "internal.h"
+#include "metal/utils.h"
+#include "framesync.h"
+#include "libavutil/hwcontext.h"
+#include "libavutil/pixdesc.h"
+#include "libavutil/opt.h"
+#include "libavutil/objc.h"
+#include "video.h"
+
+#include <assert.h>
+
+extern char ff_vf_overlay_videotoolbox_metallib_data[];
+extern unsigned int ff_vf_overlay_videotoolbox_metallib_len;
+
+typedef struct API_AVAILABLE(macos(10.11), ios(8.0)) OverlayVideoToolboxContext {
+    AVBufferRef *device_ref;
+    FFFrameSync fs;
+    CVMetalTextureCacheRef textureCache;
+    CVPixelBufferRef inputMainPixelBufferCache;
+    CVPixelBufferRef outputPixelBufferCache;
+    CVPixelBufferRef inputOverlayPixelBufferCache;
+    CIContext *coreImageCtx;
+    VTPixelTransferSessionRef vtSession;
+
+    id<MTLDevice> mtlDevice;
+    id<MTLLibrary> mtlLibrary;
+    id<MTLCommandQueue> mtlQueue;
+    id<MTLComputePipelineState> mtlPipeline;
+    id<MTLFunction> mtlFunction;
+    id<MTLBuffer> mtlParamsBuffer;
+
+    int              output_configured;
+    uint              x_position;
+    uint              y_position;
+    enum AVPixelFormat output_format;
+} OverlayVideoToolboxContext API_AVAILABLE(macos(10.11), ios(8.0));
+
+struct mtlBlendParams {
+    uint x_position;
+    uint y_position;
+};
+
+// Using sizeof(OverlayVideoToolboxContext) without an availability check will error
+// if we're targeting an older OS version, so we need to calculate the size ourselves
+// (we'll statically verify it's correct in overlay_videotoolbox_init behind a check)
+#define OVERLAY_VT_CTX_SIZE (sizeof(FFFrameSync) + sizeof(int) * 1 + sizeof(uint) * 2 + sizeof(void*) * 13 + sizeof(enum AVPixelFormat))
+
+static void call_kernel(AVFilterContext *avctx,
+                        id<MTLTexture> dst,
+                        id<MTLTexture> main,
+                        id<MTLTexture> overlay,
+                        uint x_position,
+                        uint y_position) API_AVAILABLE(macos(10.11), ios(8.0))
+{
+    OverlayVideoToolboxContext *ctx = avctx->priv;
+    id<MTLCommandBuffer> buffer = ctx->mtlQueue.commandBuffer;
+    id<MTLComputeCommandEncoder> encoder = buffer.computeCommandEncoder;
+
+    struct mtlBlendParams *params = (struct mtlBlendParams *)ctx->mtlParamsBuffer.contents;
+    *params = (struct mtlBlendParams){
+        .x_position = x_position,
+        .y_position = y_position,
+    };
+    [encoder setTexture:main atIndex:0];
+    [encoder setTexture:overlay atIndex:1];
+    [encoder setTexture:dst atIndex:2];
+    [encoder setBuffer:ctx->mtlParamsBuffer offset:0 atIndex:3];
+    ff_metal_compute_encoder_dispatch(ctx->mtlDevice, ctx->mtlPipeline, encoder, dst.width, dst.height);
+    [encoder endEncoding];
+    [buffer commit];
+    [buffer waitUntilCompleted];
+}
+
+static int overlay_vt_blend(FFFrameSync *fs) API_AVAILABLE(macos(10.11), ios(8.0))
+{
+    AVFilterContext *avctx = fs->parent;
+    OverlayVideoToolboxContext *ctx = avctx->priv;
+    AVFilterLink *outlink = avctx->outputs[0];
+    AVFilterLink *inlink = avctx->inputs[0];
+    AVFilterLink *inlink_overlay = avctx->inputs[1];
+    AVFrame *input_main, *input_overlay;
+    AVFrame *output;
+    AVHWFramesContext *frames_ctx = (AVHWFramesContext*)inlink->hw_frames_ctx->data;
+    AVHWFramesContext *frames_ctx_overlay = (AVHWFramesContext*)inlink_overlay->hw_frames_ctx->data;
+    const AVPixFmtDescriptor *in_overlay_desc;
+
+    CIImage *main_image = NULL;
+    CIImage *output_image = NULL;
+    CVMetalTextureRef main, dst, overlay;
+    id<MTLCommandBuffer> mtl_buffer = ctx->mtlQueue.commandBuffer;
+    id<MTLTexture> tex_main, tex_overlay, tex_dst;
+
+    MTLPixelFormat format = MTLPixelFormatBGRA8Unorm;
+    int ret;
+    int i, overlay_planes = 0;
+    in_overlay_desc = av_pix_fmt_desc_get(frames_ctx_overlay->sw_format);
+    // read main and overlay frames from inputs
+    ret = ff_framesync_get_frame(fs, 0, &input_main, 0);
+    if (ret < 0)
+        return ret;
+    ret = ff_framesync_get_frame(fs, 1, &input_overlay, 0);
+    if (ret < 0)
+        return ret;
+    if (!input_main)
+        return AVERROR_BUG;
+    if (!input_overlay)
+        return ff_filter_frame(outlink, input_main);
+
+    output = ff_get_video_buffer(outlink, outlink->w, outlink->h);
+    ret = av_frame_copy_props(output, input_main);
+    if (ret < 0)
+        return ret;
+    [mtl_buffer commit];
+    for (i = 0; i < in_overlay_desc->nb_components; i++)
+        overlay_planes = FFMAX(overlay_planes,
+                               in_overlay_desc->comp[i].plane + 1);
+    if (overlay_planes > 1) {
+        if (@available(macOS 10.8, iOS 16.0, *)) {
+            if (!ctx->vtSession) {
+                ret = VTPixelTransferSessionCreate(NULL, &ctx->vtSession);
+                if (ret < 0)
+                    return ret;
+            }
+            if (!ctx->inputOverlayPixelBufferCache) {
+                ret = CVPixelBufferCreate(kCFAllocatorDefault,
+                                          CVPixelBufferGetWidthOfPlane((CVPixelBufferRef)input_overlay->data[3], 0),
+                                          CVPixelBufferGetHeightOfPlane((CVPixelBufferRef)input_overlay->data[3], 0),
+                                          kCVPixelFormatType_32BGRA,
+                                          (__bridge CFDictionaryRef)@{
+                                              (NSString *)kCVPixelBufferCGImageCompatibilityKey: @(YES),
+                                              (NSString *)kCVPixelBufferMetalCompatibilityKey: @(YES)
+                                          },
+                                          &ctx->inputOverlayPixelBufferCache);
+                if (ret < 0)
+                    return ret;
+            }
+            // The YUV formatted overlays will be hwuploaded to kCVPixelFormatType_4444AYpCbCr16, which is not render-able using CoreImage.
+            // As a fallback, use the (much) slower VTPixelTransferSessionTransferImage instead.
+            // This should work on all macOS version provides Metal, but is only available on iOS >=16.
+            ret = VTPixelTransferSessionTransferImage(ctx->vtSession,(CVPixelBufferRef)input_overlay->data[3] ,ctx->inputOverlayPixelBufferCache);
+            if (ret < 0)
+                return ret;
+            overlay = ff_metal_texture_from_non_planer_pixbuf(avctx, ctx->textureCache, ctx->inputOverlayPixelBufferCache, 0, format);
+        } else {
+            av_log(ctx, AV_LOG_ERROR, "VTPixelTransferSessionTransferImage is not available on this OS version\n");
+            av_log(ctx, AV_LOG_ERROR, "Try an overlay with kCVPixelFormatType_32BGRA\n");
+            return AVERROR(ENOSYS);
+        }
+    } else {
+        overlay = ff_metal_texture_from_non_planer_pixbuf(avctx, ctx->textureCache, (CVPixelBufferRef)input_overlay->data[3], 0, format);
+    }
+    main_image = CFBridgingRetain([CIImage imageWithCVPixelBuffer: (CVPixelBufferRef)input_main->data[3]]);
+    if (!ctx->inputMainPixelBufferCache) {
+        ret = CVPixelBufferCreate(kCFAllocatorDefault,
+                                  CVPixelBufferGetWidthOfPlane((CVPixelBufferRef)input_main->data[3], 0),
+                                  CVPixelBufferGetHeightOfPlane((CVPixelBufferRef)input_main->data[3], 0),
+                                  kCVPixelFormatType_32BGRA,
+                                  (__bridge CFDictionaryRef)@{
+                                      (NSString *)kCVPixelBufferCGImageCompatibilityKey: @(YES),
+                                      (NSString *)kCVPixelBufferMetalCompatibilityKey: @(YES)
+                                  },
+                                  &ctx->inputMainPixelBufferCache);
+        if (ret < 0)
+            return ret;
+    }
+    if (!ctx->outputPixelBufferCache) {
+        ret = CVPixelBufferCreate(kCFAllocatorDefault,
+                                  CVPixelBufferGetWidthOfPlane((CVPixelBufferRef)input_main->data[3], 0),
+                                  CVPixelBufferGetHeightOfPlane((CVPixelBufferRef)input_main->data[3], 0),
+                                  kCVPixelFormatType_32BGRA,
+                                  (__bridge CFDictionaryRef)@{
+                                      (NSString *)kCVPixelBufferCGImageCompatibilityKey: @(YES),
+                                      (NSString *)kCVPixelBufferMetalCompatibilityKey: @(YES)
+                                  },
+                                  &ctx->outputPixelBufferCache);
+        if (ret < 0)
+            return ret;
+    }
+    [(__bridge CIContext*)ctx->coreImageCtx render: (__bridge CIImage*)main_image toCVPixelBuffer: ctx->inputMainPixelBufferCache];
+    [mtl_buffer waitUntilCompleted];
+    main = ff_metal_texture_from_non_planer_pixbuf(avctx, ctx->textureCache, ctx->inputMainPixelBufferCache, 0, format);
+    dst = ff_metal_texture_from_non_planer_pixbuf(avctx, ctx->textureCache, ctx->outputPixelBufferCache, 0, format);
+    tex_main = CVMetalTextureGetTexture(main);
+    tex_overlay  = CVMetalTextureGetTexture(overlay);
+    tex_dst = CVMetalTextureGetTexture(dst);
+    call_kernel(avctx, tex_dst, tex_main, tex_overlay, ctx->x_position, ctx->y_position);
+    output_image = CFBridgingRetain([CIImage imageWithCVPixelBuffer: ctx->outputPixelBufferCache]);
+    [(__bridge CIContext*)ctx->coreImageCtx render: (__bridge CIImage*)output_image toCVPixelBuffer: (CVPixelBufferRef)output->data[3]];
+    [mtl_buffer waitUntilCompleted];
+    CFRelease(main);
+    CFRelease(overlay);
+    CFRelease(dst);
+    CFRelease(main_image);
+    CFRelease(output_image);
+    CVBufferPropagateAttachments((CVPixelBufferRef)input_main->data[3], (CVPixelBufferRef)output->data[3]);
+
+    return ff_filter_frame(outlink, output);
+}
+
+static av_cold void do_uninit(AVFilterContext *avctx) API_AVAILABLE(macos(10.11), ios(8.0))
+{
+    OverlayVideoToolboxContext *ctx = avctx->priv;
+    if(ctx->coreImageCtx) {
+        CFRelease(ctx->coreImageCtx);
+        ctx->coreImageCtx = NULL;
+    }
+    if (ctx->output_configured) {
+        av_buffer_unref(&ctx->device_ref);
+    }
+
+    ff_objc_release(&ctx->mtlParamsBuffer);
+    ff_objc_release(&ctx->mtlFunction);
+    ff_objc_release(&ctx->mtlPipeline);
+    ff_objc_release(&ctx->mtlQueue);
+    ff_objc_release(&ctx->mtlLibrary);
+    ff_objc_release(&ctx->mtlDevice);
+
+    if (ctx->textureCache) {
+        CFRelease(ctx->textureCache);
+        ctx->textureCache = NULL;
+    }
+    if (ctx->inputMainPixelBufferCache) {
+        CFRelease(ctx->inputMainPixelBufferCache);
+        ctx->inputMainPixelBufferCache = NULL;
+    }
+    if (ctx->inputOverlayPixelBufferCache) {
+        CFRelease(ctx->inputOverlayPixelBufferCache);
+        ctx->inputOverlayPixelBufferCache = NULL;
+    }
+    if (ctx->outputPixelBufferCache) {
+        CFRelease(ctx->outputPixelBufferCache);
+        ctx->outputPixelBufferCache = NULL;
+    }
+    if(ctx->vtSession) {
+        VTPixelTransferSessionInvalidate(ctx->vtSession);
+        CFRelease(ctx->vtSession);
+        ctx->vtSession = NULL;
+    }
+    ff_framesync_uninit(&ctx->fs);
+}
+
+static av_cold void overlay_videotoolbox_uninit(AVFilterContext *ctx)
+{
+    if (@available(macOS 10.11, iOS 8.0, *)) {
+        do_uninit(ctx);
+    }
+}
+
+static av_cold int do_init(AVFilterContext *ctx) API_AVAILABLE(macos(10.11), ios(8.0))
+{
+    OverlayVideoToolboxContext *s = ctx->priv;
+    NSError *err = nil;
+    CVReturn ret;
+    dispatch_data_t libData;
+
+    s->mtlDevice = MTLCreateSystemDefaultDevice();
+    if (!s->mtlDevice) {
+        av_log(ctx, AV_LOG_ERROR, "Unable to find Metal device\n");
+        goto fail;
+    }
+
+    av_log(ctx, AV_LOG_INFO, "Using Metal device: %s\n", s->mtlDevice.name.UTF8String);
+
+    libData = dispatch_data_create(
+        ff_vf_overlay_videotoolbox_metallib_data,
+        ff_vf_overlay_videotoolbox_metallib_len,
+        nil,
+        nil);
+
+    s->mtlLibrary = [s->mtlDevice newLibraryWithData:libData error:&err];
+    dispatch_release(libData);
+    libData = nil;
+    s->mtlFunction = [s->mtlLibrary newFunctionWithName:@"blend_shader"];
+    if (!s->mtlFunction) {
+        av_log(ctx, AV_LOG_ERROR, "Failed to create Metal function!\n");
+        goto fail;
+    }
+
+    s->mtlQueue = s->mtlDevice.newCommandQueue;
+    if (!s->mtlQueue) {
+        av_log(ctx, AV_LOG_ERROR, "Failed to create Metal command queue!\n");
+        goto fail;
+    }
+
+    s->mtlPipeline = [s->mtlDevice
+        newComputePipelineStateWithFunction:s->mtlFunction
+        error:&err];
+    if (err) {
+        av_log(ctx, AV_LOG_ERROR, "Failed to create Metal compute pipeline: %s\n", err.description.UTF8String);
+        goto fail;
+    }
+
+    s->mtlParamsBuffer = [s->mtlDevice
+        newBufferWithLength:sizeof(struct mtlBlendParams)
+        options:MTLResourceStorageModeShared];
+    if (!s->mtlParamsBuffer) {
+        av_log(ctx, AV_LOG_ERROR, "Failed to create Metal buffer for parameters\n");
+        goto fail;
+    }
+
+    ret = CVMetalTextureCacheCreate(
+        NULL,
+        NULL,
+        s->mtlDevice,
+        NULL,
+        &s->textureCache
+    );
+    if (ret != kCVReturnSuccess) {
+        av_log(ctx, AV_LOG_ERROR, "Failed to create CVMetalTextureCache: %d\n", ret);
+        goto fail;
+    }
+
+    s->coreImageCtx = CFBridgingRetain([CIContext contextWithMTLCommandQueue: s->mtlQueue]);
+    s->fs.on_event = &overlay_vt_blend;
+    s->output_format = AV_PIX_FMT_NONE;
+    av_log(ctx, AV_LOG_INFO, "do_init!\n");
+
+    return 0;
+fail:
+    overlay_videotoolbox_uninit(ctx);
+    return AVERROR_EXTERNAL;
+}
+
+static av_cold int overlay_videotoolbox_init(AVFilterContext *ctx)
+{
+    if (@available(macOS 10.11, iOS 8.0, *)) {
+        // Ensure we calculated OVERLAY_VT_CTX_SIZE correctly
+        static_assert(OVERLAY_VT_CTX_SIZE == sizeof(OverlayVideoToolboxContext), "Incorrect OVERLAY_VT_CTX_SIZE value!");
+        return do_init(ctx);
+    } else {
+        av_log(ctx, AV_LOG_ERROR, "Metal is not available on this OS version\n");
+        return AVERROR(ENOSYS);
+    }
+}
+
+static int do_config_input(AVFilterLink *inlink) API_AVAILABLE(macos(10.11), ios(8.0))
+{
+    AVFilterContext *avctx = inlink->dst;
+    OverlayVideoToolboxContext *ctx = avctx->priv;
+    AVBufferRef *input_ref;
+    AVHWFramesContext *input_frames;
+
+    if (!inlink->hw_frames_ctx) {
+        av_log(avctx, AV_LOG_ERROR, "A hardware frames reference is "
+               "required to associate the processing device.\n");
+        return AVERROR(EINVAL);
+    }
+    input_ref = av_buffer_ref(inlink->hw_frames_ctx);
+    input_frames = (AVHWFramesContext*)input_ref->data;
+    av_assert0(input_frames);
+    ctx->device_ref = av_buffer_ref(input_frames->device_ref);
+
+    if (!ctx->device_ref) {
+        av_log(ctx, AV_LOG_ERROR, "A device reference create "
+                                  "failed.\n");
+        return AVERROR(ENOMEM);
+    }
+    if (ctx->output_format == AV_PIX_FMT_NONE)
+        ctx->output_format = input_frames->sw_format;
+    ctx->output_configured = 1;
+
+    return 0;
+}
+
+static int config_input(AVFilterLink *inlink)
+{
+    AVFilterContext *ctx = inlink->dst;
+    if (@available(macOS 10.13, iOS 9.0, *)) {
+        return do_config_input(inlink);
+    } else {
+        av_log(ctx, AV_LOG_ERROR, "Metal is not available on this OS version\n");
+        return AVERROR(ENOSYS);
+    }
+}
+
+static int do_config_output(AVFilterLink *link) API_AVAILABLE(macos(10.11), ios(8.0))
+{
+    AVHWFramesContext *output_frames;
+    AVFilterContext *avctx = link->src;
+    OverlayVideoToolboxContext *ctx = avctx->priv;
+    int ret = 0;
+
+    av_log(avctx, AV_LOG_INFO, "do_config_output!\n");
+    link->hw_frames_ctx = av_hwframe_ctx_alloc(ctx->device_ref);
+    if (!link->hw_frames_ctx) {
+        av_log(avctx, AV_LOG_ERROR, "Failed to create HW frame context "
+               "for output.\n");
+        ret = AVERROR(ENOMEM);
+        return ret;
+    }
+
+    output_frames = (AVHWFramesContext*)link->hw_frames_ctx->data;
+
+    output_frames->format    = AV_PIX_FMT_VIDEOTOOLBOX;
+    output_frames->sw_format = ctx->output_format;
+    output_frames->width     = avctx->inputs[0]->w;
+    output_frames->height    = avctx->inputs[0]->h;
+
+    ret = ff_filter_init_hw_frames(avctx, link, 10);
+    if (ret < 0)
+        return ret;
+
+    ret = av_hwframe_ctx_init(link->hw_frames_ctx);
+    if (ret < 0) {
+        av_log(avctx, AV_LOG_ERROR, "Failed to initialise VideoToolbox frame "
+               "context for output: %d\n", ret);
+        return ret;
+    }
+
+    ret = ff_framesync_init_dualinput(&ctx->fs, avctx);
+    if (ret < 0)
+        return ret;
+
+    ret = ff_framesync_configure(&ctx->fs);
+    return ret;
+}
+
+static int config_output(AVFilterLink *link)
+{
+    AVFilterContext *ctx = link->src;
+    if (@available(macOS 10.13, iOS 9.0, *)) {
+        return do_config_output(link);
+    } else {
+        av_log(ctx, AV_LOG_ERROR, "Metal is not available on this OS version\n");
+        return AVERROR(ENOSYS);
+    }
+}
+
+static int overlay_videotoolbox_activate(AVFilterContext *avctx) {
+    OverlayVideoToolboxContext *ctx = avctx->priv;
+    return ff_framesync_activate(&ctx->fs);
+}
+
+#define FLAGS (AV_OPT_FLAG_FILTERING_PARAM | AV_OPT_FLAG_VIDEO_PARAM)
+#define CONST(name, help, val, unit) { name, help, 0, AV_OPT_TYPE_CONST, {.i64=val}, INT_MIN, INT_MAX, FLAGS, unit }
+#define OFFSET(x) offsetof(OverlayVideoToolboxContext, x)
+
+static const AVOption overlay_videotoolbox_options[] = {
+    { "x", "Overlay x position",
+      OFFSET(x_position), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, INT_MAX, .flags = FLAGS },
+    { "y", "Overlay y position",
+      OFFSET(y_position), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, INT_MAX, .flags = FLAGS },
+    { NULL },
+};
+
+AVFILTER_DEFINE_CLASS(overlay_videotoolbox);
+
+static const AVFilterPad overlay_videotoolbox_inputs[] = {
+    {
+        .name         = "main",
+        .type         = AVMEDIA_TYPE_VIDEO,
+        .config_props = config_input,
+    },
+    {
+        .name         = "overlay",
+        .type         = AVMEDIA_TYPE_VIDEO,
+        .config_props = config_input,
+    },
+};
+
+static const AVFilterPad overlay_videotoolbox_outputs[] = {
+    {
+        .name          = "default",
+        .type          = AVMEDIA_TYPE_VIDEO,
+        .config_props  = config_output,
+    },
+};
+
+const AVFilter ff_vf_overlay_videotoolbox = {
+    .name           = "overlay_videotoolbox",
+    .description    = NULL_IF_CONFIG_SMALL("Overlay filter for VideoToolbox frames using Metal compute"),
+    .priv_size      = OVERLAY_VT_CTX_SIZE,
+    .priv_class     = &overlay_videotoolbox_class,
+    .init           = overlay_videotoolbox_init,
+    .uninit         = overlay_videotoolbox_uninit,
+    .activate        = overlay_videotoolbox_activate,
+    FILTER_SINGLE_PIXFMT(AV_PIX_FMT_VIDEOTOOLBOX),
+    FILTER_INPUTS(overlay_videotoolbox_inputs),
+    FILTER_OUTPUTS(overlay_videotoolbox_outputs),
+    .flags          = AVFILTER_FLAG_SUPPORT_TIMELINE_INTERNAL,
+    .flags_internal = FF_FILTER_FLAG_HWFRAME_AWARE,
+};