@@ -19,6 +19,9 @@
*.swp
*.ver
*.version
+*.metal.air
+*.metallib
+*.metallib.c
*.ptx
*.ptx.c
*.ptx.gz
@@ -382,6 +382,7 @@ Toolchain options:
--dep-cc=DEPCC use dependency generator DEPCC [$cc_default]
--nvcc=NVCC use Nvidia CUDA compiler NVCC or clang [$nvcc_default]
--ld=LD use linker LD [$ld_default]
+ --metalcc=METALCC use metal compiler METALCC [$metalcc_default]
--pkg-config=PKGCONFIG use pkg-config tool PKGCONFIG [$pkg_config_default]
--pkg-config-flags=FLAGS pass additional flags to pkgconf []
--ranlib=RANLIB use ranlib RANLIB [$ranlib_default]
@@ -2560,6 +2561,7 @@ CMDLINE_SET="
ln_s
logfile
malloc_prefix
+ metalcc
nm
optflags
nvcc
@@ -3608,6 +3610,7 @@ coreimagesrc_filter_deps="coreimage appkit"
coreimagesrc_filter_extralibs="-framework OpenGL"
cover_rect_filter_deps="avcodec avformat gpl"
cropdetect_filter_deps="gpl"
+deinterlace_metal_filter_deps="metal corevideo videotoolbox"
deinterlace_qsv_filter_deps="libmfx"
deinterlace_vaapi_filter_deps="vaapi"
delogo_filter_deps="gpl"
@@ -3828,6 +3831,7 @@ host_cc_default="gcc"
doxygen_default="doxygen"
install="install"
ln_s_default="ln -s -f"
+metalcc_default="xcrun metal"
nm_default="nm -g"
pkg_config_default=pkg-config
ranlib_default="ranlib"
@@ -4428,7 +4432,7 @@ if enabled cuda_nvcc; then
fi
set_default arch cc cxx doxygen pkg_config ranlib strip sysinclude \
- target_exec x86asmexe
+ target_exec x86asmexe metalcc
enabled cross_compile || host_cc_default=$cc
set_default host_cc
@@ -6316,6 +6320,7 @@ check_apple_framework CoreFoundation
check_apple_framework CoreMedia
check_apple_framework CoreVideo
check_apple_framework CoreAudio
+check_apple_framework Metal
enabled avfoundation && {
disable coregraphics applicationservices
@@ -7606,6 +7611,7 @@ ARFLAGS=$arflags
AR_O=$ar_o
AR_CMD=$ar
NM_CMD=$nm
+METALCC=$metalcc
RANLIB=$ranlib
STRIP=$strip
STRIPTYPE=$striptype
@@ -104,6 +104,15 @@ COMPILE_MSA = $(call COMPILE,CC,MSAFLAGS)
$(BIN2CEXE): ffbuild/bin2c_host.o
$(HOSTLD) $(HOSTLDFLAGS) $(HOSTLD_O) $^ $(HOSTEXTRALIBS)
+%.metal.air: %.metal
+ $(METALCC) $(patsubst $(SRC_PATH)/%,$(SRC_LINK)/%,$<) -o $@
+
+%.metallib: %.metal.air
+ $(METALCC)lib --split-module-without-linking $(patsubst $(SRC_PATH)/%,$(SRC_LINK)/%,$<) -o $@
+
+%.metallib.c: %.metallib
+ $(Q)xxd -i $(patsubst $(SRC_PATH)/%,$(SRC_LINK)/%,$<) | sed -E 's,[a-zA-Z_]*_(vf)_([a-zA-Z_]*)_metallib,\1_\2_metallib,' > $@
+
%.ptx: %.cu $(SRC_PATH)/compat/cuda/cuda_runtime.h
$(COMPILE_NVCC)
@@ -239,6 +239,8 @@ OBJS-$(CONFIG_DECONVOLVE_FILTER) += vf_convolve.o framesync.o
OBJS-$(CONFIG_DEDOT_FILTER) += vf_dedot.o
OBJS-$(CONFIG_DEFLATE_FILTER) += vf_neighbor.o
OBJS-$(CONFIG_DEFLICKER_FILTER) += vf_deflicker.o
+OBJS-$(CONFIG_DEINTERLACE_METAL_FILTER) += vf_deinterlace_metal.o vf_deinterlace_metal.metallib.o \
+ yadif_common.o
OBJS-$(CONFIG_DEINTERLACE_QSV_FILTER) += vf_deinterlace_qsv.o
OBJS-$(CONFIG_DEINTERLACE_VAAPI_FILTER) += vf_deinterlace_vaapi.o vaapi_vpp.o
OBJS-$(CONFIG_DEJUDDER_FILTER) += vf_dejudder.o
@@ -226,6 +226,7 @@ extern const AVFilter ff_vf_deconvolve;
extern const AVFilter ff_vf_dedot;
extern const AVFilter ff_vf_deflate;
extern const AVFilter ff_vf_deflicker;
+extern const AVFilter ff_vf_deinterlace_metal;
extern const AVFilter ff_vf_deinterlace_qsv;
extern const AVFilter ff_vf_deinterlace_vaapi;
extern const AVFilter ff_vf_dejudder;
new file mode 100644
@@ -0,0 +1,453 @@
+/*
+ * Copyright (C) 2018 Philip Langdale <philipl@overt.org>
+ * 2020 Aman Karmani <aman@tmm1.net>
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include "internal.h"
+#include "yadif.h"
+#include <libavutil/avassert.h>
+#include <libavutil/hwcontext.h>
+
+#import <CoreVideo/CoreVideo.h>
+#import <Metal/Metal.h>
+
+extern char vf_deinterlace_metal_metallib[];
+extern unsigned int vf_deinterlace_metal_metallib_len;
+
+typedef struct DeintMetalContext {
+ YADIFContext yadif;
+
+ AVBufferRef *device_ref;
+ AVBufferRef *input_frames_ref;
+ AVHWFramesContext *input_frames;
+
+ id<MTLDevice> mtlDevice;
+ id<MTLLibrary> mtlLibrary;
+ id<MTLCommandQueue> mtlQueue;
+ id<MTLComputePipelineState> mtlPipeline;
+ id<MTLFunction> mtlFunction;
+ id<MTLBuffer> mtlParamsBuffer;
+
+ CVMetalTextureCacheRef textureCache;
+} DeintMetalContext;
+
+struct mtlYadifParams {
+ uint channels;
+ uint parity;
+ uint tff;
+ bool is_second_field;
+ bool skip_spatial_check;
+ int field_mode;
+};
+
+static void call_kernel(AVFilterContext *ctx,
+ id<MTLTexture> dst,
+ id<MTLTexture> prev,
+ id<MTLTexture> cur,
+ id<MTLTexture> next,
+ int channels,
+ int parity,
+ int tff)
+{
+ DeintMetalContext *s = ctx->priv;
+ id<MTLCommandBuffer> buffer = s->mtlQueue.commandBuffer;
+ id<MTLComputeCommandEncoder> encoder = buffer.computeCommandEncoder;
+ struct mtlYadifParams *params = (struct mtlYadifParams *)s->mtlParamsBuffer.contents;
+ *params = (struct mtlYadifParams){
+ .channels = channels,
+ .parity = parity,
+ .tff = tff,
+ .is_second_field = !(parity ^ tff),
+ .skip_spatial_check = s->yadif.mode&2,
+ .field_mode = s->yadif.current_field
+ };
+
+ [encoder setComputePipelineState:s->mtlPipeline];
+ [encoder setTexture:dst atIndex:0];
+ [encoder setTexture:prev atIndex:1];
+ [encoder setTexture:cur atIndex:2];
+ [encoder setTexture:next atIndex:3];
+ [encoder setBuffer:s->mtlParamsBuffer offset:0 atIndex:4];
+
+ NSUInteger w = s->mtlPipeline.threadExecutionWidth;
+ NSUInteger h = s->mtlPipeline.maxTotalThreadsPerThreadgroup / w;
+ MTLSize threadsPerThreadgroup = MTLSizeMake(w, h, 1);
+ BOOL fallback = YES;
+ if (@available(macOS 10.15, iOS 11, tvOS 14.5, *)) {
+ if ([s->mtlDevice supportsFamily:MTLGPUFamilyCommon3]) {
+ MTLSize threadsPerGrid = MTLSizeMake(dst.width, dst.height, 1);
+ [encoder dispatchThreads:threadsPerGrid threadsPerThreadgroup:threadsPerThreadgroup];
+ fallback = NO;
+ }
+ }
+ if (fallback) {
+ MTLSize threadgroups = MTLSizeMake((dst.width + w - 1) / w,
+ (dst.height + h - 1) / h,
+ 1);
+ [encoder dispatchThreadgroups:threadgroups threadsPerThreadgroup:threadsPerThreadgroup];
+ }
+
+ [encoder endEncoding];
+
+ [buffer commit];
+ [buffer waitUntilCompleted];
+
+ [encoder release];
+ encoder = nil;
+ [buffer release];
+ buffer = nil;
+}
+
+static CVMetalTextureRef pixbuf_to_texture(AVFilterContext *ctx,
+ CVPixelBufferRef pixbuf,
+ int plane,
+ MTLPixelFormat format)
+{
+ DeintMetalContext *s = ctx->priv;
+ CVMetalTextureRef tex = NULL;
+ CVReturn ret;
+
+ ret = CVMetalTextureCacheCreateTextureFromImage(
+ NULL,
+ s->textureCache,
+ pixbuf,
+ NULL,
+ format,
+ CVPixelBufferGetWidthOfPlane(pixbuf, plane),
+ CVPixelBufferGetHeightOfPlane(pixbuf, plane),
+ plane,
+ &tex
+ );
+ if (ret != kCVReturnSuccess) {
+ av_log(ctx, AV_LOG_ERROR, "Failed to create CVMetalTexture from image: %d\n", ret);
+ return NULL;
+ }
+
+ return tex;
+}
+
+static void filter(AVFilterContext *ctx, AVFrame *dst,
+ int parity, int tff)
+{
+ DeintMetalContext *s = ctx->priv;
+ YADIFContext *y = &s->yadif;
+ int i;
+
+ for (i = 0; i < y->csp->nb_components; i++) {
+ int pixel_size, channels;
+ const AVComponentDescriptor *comp = &y->csp->comp[i];
+ CVMetalTextureRef prev, cur, next, dest;
+ id<MTLTexture> tex_prev, tex_cur, tex_next, tex_dest;
+ MTLPixelFormat format;
+
+ if (comp->plane < i) {
+ // We process planes as a whole, so don't reprocess
+ // them for additional components
+ continue;
+ }
+
+ pixel_size = (comp->depth + comp->shift) / 8;
+ channels = comp->step / pixel_size;
+ if (pixel_size > 2 || channels > 2) {
+ av_log(ctx, AV_LOG_ERROR, "Unsupported pixel format: %s\n", y->csp->name);
+ goto exit;
+ }
+ switch (pixel_size) {
+ case 1:
+ format = channels == 1 ? MTLPixelFormatR8Unorm : MTLPixelFormatRG8Unorm;
+ break;
+ case 2:
+ format = channels == 1 ? MTLPixelFormatR16Unorm : MTLPixelFormatRG16Unorm;
+ break;
+ default:
+ av_log(ctx, AV_LOG_ERROR, "Unsupported pixel format: %s\n", y->csp->name);
+ goto exit;
+ }
+ av_log(ctx, AV_LOG_TRACE,
+ "Deinterlacing plane %d: pixel_size: %d channels: %d\n",
+ comp->plane, pixel_size, channels);
+
+ prev = pixbuf_to_texture(ctx, (CVPixelBufferRef)y->prev->data[3], i, format);
+ cur = pixbuf_to_texture(ctx, (CVPixelBufferRef)y->cur->data[3], i, format);
+ next = pixbuf_to_texture(ctx, (CVPixelBufferRef)y->next->data[3], i, format);
+ dest = pixbuf_to_texture(ctx, (CVPixelBufferRef)dst->data[3], i, format);
+
+ tex_prev = CVMetalTextureGetTexture(prev);
+ tex_cur = CVMetalTextureGetTexture(cur);
+ tex_next = CVMetalTextureGetTexture(next);
+ tex_dest = CVMetalTextureGetTexture(dest);
+
+ call_kernel(ctx, tex_dest, tex_prev, tex_cur, tex_next,
+ channels, parity, tff);
+
+ CFRelease(prev);
+ CFRelease(cur);
+ CFRelease(next);
+ CFRelease(dest);
+ }
+
+ CVBufferPropagateAttachments((CVPixelBufferRef)y->cur->data[3], (CVPixelBufferRef)dst->data[3]);
+
+ if (y->current_field == YADIF_FIELD_END) {
+ y->current_field = YADIF_FIELD_NORMAL;
+ }
+
+exit:
+ return;
+}
+
+static av_cold int deint_metal_init(AVFilterContext *ctx)
+{
+ DeintMetalContext *s = ctx->priv;
+ NSError *err = nil;
+ CVReturn ret;
+
+ s->mtlDevice = MTLCreateSystemDefaultDevice();
+ if (!s->mtlDevice) {
+ av_log(ctx, AV_LOG_ERROR, "Unable to find Metal device\n");
+ return AVERROR_EXTERNAL;
+ }
+
+ av_log(ctx, AV_LOG_INFO, "Using Metal device: %s\n", s->mtlDevice.name.UTF8String);
+
+ dispatch_data_t libData = dispatch_data_create(
+ vf_deinterlace_metal_metallib,
+ vf_deinterlace_metal_metallib_len,
+ nil,
+ nil);
+ s->mtlLibrary = [s->mtlDevice
+ newLibraryWithData:libData
+ error:&err];
+ dispatch_release(libData);
+ libData = nil;
+ if (err) {
+ av_log(ctx, AV_LOG_ERROR, "Failed to load Metal library: %s\n", err.description.UTF8String);
+ return AVERROR_EXTERNAL;
+ }
+ s->mtlFunction = [s->mtlLibrary newFunctionWithName:@"deint"];
+
+ s->mtlQueue = s->mtlDevice.newCommandQueue;
+ if (!s->mtlQueue) {
+ av_log(ctx, AV_LOG_ERROR, "Failed to create Metal command queue!\n");
+ return AVERROR_EXTERNAL;
+ }
+
+ s->mtlPipeline = [s->mtlDevice
+ newComputePipelineStateWithFunction:s->mtlFunction
+ error:&err];
+ if (err) {
+ av_log(ctx, AV_LOG_ERROR, "Failed to create Metal compute pipeline: %s\n", err.description.UTF8String);
+ return AVERROR_EXTERNAL;
+ }
+
+ s->mtlParamsBuffer = [s->mtlDevice
+ newBufferWithLength:sizeof(struct mtlYadifParams)
+ options:MTLResourceStorageModeShared];
+
+ ret = CVMetalTextureCacheCreate(
+ NULL,
+ NULL,
+ s->mtlDevice,
+ NULL,
+ &s->textureCache
+ );
+ if (ret != kCVReturnSuccess) {
+ av_log(ctx, AV_LOG_ERROR, "Failed to create CVMetalTextureCache: %d\n", ret);
+ return AVERROR_EXTERNAL;
+ }
+
+ return 0;
+}
+
+static av_cold void deint_metal_uninit(AVFilterContext *ctx)
+{
+ DeintMetalContext *s = ctx->priv;
+ YADIFContext *y = &s->yadif;
+
+ av_frame_free(&y->prev);
+ av_frame_free(&y->cur);
+ av_frame_free(&y->next);
+
+ av_buffer_unref(&s->device_ref);
+ av_buffer_unref(&s->input_frames_ref);
+ s->input_frames = NULL;
+
+ [s->mtlParamsBuffer release];
+ [s->mtlFunction release];
+ [s->mtlPipeline release];
+ [s->mtlQueue release];
+ [s->mtlLibrary release];
+ [s->mtlDevice release];
+
+ s->mtlParamsBuffer = nil;
+ s->mtlFunction = nil;
+ s->mtlPipeline = nil;
+ s->mtlQueue = nil;
+ s->mtlLibrary = nil;
+ s->mtlDevice = nil;
+
+ if (s->textureCache) {
+ CFRelease(s->textureCache);
+ s->textureCache = NULL;
+ }
+}
+
+static int config_input(AVFilterLink *inlink)
+{
+ AVFilterContext *ctx = inlink->dst;
+ DeintMetalContext *s = ctx->priv;
+
+ if (!inlink->hw_frames_ctx) {
+ av_log(ctx, AV_LOG_ERROR, "A hardware frames reference is "
+ "required to associate the processing device.\n");
+ return AVERROR(EINVAL);
+ }
+
+ s->input_frames_ref = av_buffer_ref(inlink->hw_frames_ctx);
+ if (!s->input_frames_ref) {
+ av_log(ctx, AV_LOG_ERROR, "A input frames reference create "
+ "failed.\n");
+ return AVERROR(ENOMEM);
+ }
+ s->input_frames = (AVHWFramesContext*)s->input_frames_ref->data;
+
+ return 0;
+}
+
+static int config_output(AVFilterLink *link)
+{
+ AVHWFramesContext *output_frames;
+ AVFilterContext *ctx = link->src;
+ DeintMetalContext *s = ctx->priv;
+ YADIFContext *y = &s->yadif;
+ int ret = 0;
+
+ av_assert0(s->input_frames);
+ s->device_ref = av_buffer_ref(s->input_frames->device_ref);
+ if (!s->device_ref) {
+ av_log(ctx, AV_LOG_ERROR, "A device reference create "
+ "failed.\n");
+ return AVERROR(ENOMEM);
+ }
+
+ link->hw_frames_ctx = av_hwframe_ctx_alloc(s->device_ref);
+ if (!link->hw_frames_ctx) {
+ av_log(ctx, AV_LOG_ERROR, "Failed to create HW frame context "
+ "for output.\n");
+ ret = AVERROR(ENOMEM);
+ goto exit;
+ }
+
+ output_frames = (AVHWFramesContext*)link->hw_frames_ctx->data;
+
+ output_frames->format = AV_PIX_FMT_VIDEOTOOLBOX;
+ output_frames->sw_format = s->input_frames->sw_format;
+ output_frames->width = ctx->inputs[0]->w;
+ output_frames->height = ctx->inputs[0]->h;
+
+ ret = ff_filter_init_hw_frames(ctx, link, 10);
+ if (ret < 0)
+ goto exit;
+
+ ret = av_hwframe_ctx_init(link->hw_frames_ctx);
+ if (ret < 0) {
+ av_log(ctx, AV_LOG_ERROR, "Failed to initialise VideoToolbox frame "
+ "context for output: %d\n", ret);
+ goto exit;
+ }
+
+ link->time_base.num = ctx->inputs[0]->time_base.num;
+ link->time_base.den = ctx->inputs[0]->time_base.den * 2;
+ link->w = ctx->inputs[0]->w;
+ link->h = ctx->inputs[0]->h;
+
+ if(y->mode & 1)
+ link->frame_rate = av_mul_q(ctx->inputs[0]->frame_rate,
+ (AVRational){2, 1});
+
+ if (link->w < 3 || link->h < 3) {
+ av_log(ctx, AV_LOG_ERROR, "Video of less than 3 columns or lines is not supported\n");
+ ret = AVERROR(EINVAL);
+ goto exit;
+ }
+
+ y->csp = av_pix_fmt_desc_get(output_frames->sw_format);
+ y->filter = filter;
+
+exit:
+ return ret;
+}
+
+#define FLAGS AV_OPT_FLAG_VIDEO_PARAM|AV_OPT_FLAG_FILTERING_PARAM
+#define CONST(name, help, val, unit) { name, help, 0, AV_OPT_TYPE_CONST, {.i64=val}, INT_MIN, INT_MAX, FLAGS, unit }
+
+static const AVOption deinterlace_metal_options[] = {
+ #define OFFSET(x) offsetof(YADIFContext, x)
+ { "mode", "specify the interlacing mode", OFFSET(mode), AV_OPT_TYPE_INT, {.i64=YADIF_MODE_SEND_FRAME}, 0, 3, FLAGS, "mode"},
+ CONST("send_frame", "send one frame for each frame", YADIF_MODE_SEND_FRAME, "mode"),
+ CONST("send_field", "send one frame for each field", YADIF_MODE_SEND_FIELD, "mode"),
+ CONST("send_frame_nospatial", "send one frame for each frame, but skip spatial interlacing check", YADIF_MODE_SEND_FRAME_NOSPATIAL, "mode"),
+ CONST("send_field_nospatial", "send one frame for each field, but skip spatial interlacing check", YADIF_MODE_SEND_FIELD_NOSPATIAL, "mode"),
+
+ { "parity", "specify the assumed picture field parity", OFFSET(parity), AV_OPT_TYPE_INT, {.i64=YADIF_PARITY_AUTO}, -1, 1, FLAGS, "parity" },
+ CONST("tff", "assume top field first", YADIF_PARITY_TFF, "parity"),
+ CONST("bff", "assume bottom field first", YADIF_PARITY_BFF, "parity"),
+ CONST("auto", "auto detect parity", YADIF_PARITY_AUTO, "parity"),
+
+ { "deint", "specify which frames to deinterlace", OFFSET(deint), AV_OPT_TYPE_INT, {.i64=YADIF_DEINT_ALL}, 0, 1, FLAGS, "deint" },
+ CONST("all", "deinterlace all frames", YADIF_DEINT_ALL, "deint"),
+ CONST("interlaced", "only deinterlace frames marked as interlaced", YADIF_DEINT_INTERLACED, "deint"),
+ #undef OFFSET
+
+ { NULL }
+};
+
+AVFILTER_DEFINE_CLASS(deinterlace_metal);
+
+static const AVFilterPad deint_metal_inputs[] = {
+ {
+ .name = "default",
+ .type = AVMEDIA_TYPE_VIDEO,
+ .filter_frame = ff_yadif_filter_frame,
+ .config_props = config_input,
+ },
+};
+
+static const AVFilterPad deint_metal_outputs[] = {
+ {
+ .name = "default",
+ .type = AVMEDIA_TYPE_VIDEO,
+ .request_frame = ff_yadif_request_frame,
+ .config_props = config_output,
+ },
+};
+
+AVFilter ff_vf_deinterlace_metal = {
+ .name = "deinterlace_metal",
+ .description = NULL_IF_CONFIG_SMALL("Deinterlace VideoToolbox frames with Metal compute"),
+ .priv_size = sizeof(DeintMetalContext),
+ .priv_class = &deinterlace_metal_class,
+ .init = deint_metal_init,
+ .uninit = deint_metal_uninit,
+ FILTER_SINGLE_PIXFMT(AV_PIX_FMT_VIDEOTOOLBOX),
+ FILTER_INPUTS(deint_metal_inputs),
+ FILTER_OUTPUTS(deint_metal_outputs),
+ .flags = AVFILTER_FLAG_SUPPORT_TIMELINE_INTERNAL,
+ .flags_internal = FF_FILTER_FLAG_HWFRAME_AWARE,
+};
new file mode 100644
@@ -0,0 +1,269 @@
+/*
+ * Copyright (C) 2018 Philip Langdale <philipl@overt.org>
+ * 2020 Aman Karmani <aman@tmm1.net>
+ * 2020 Stefan Dyulgerov <stefan.dyulgerov@gmail.com>
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include <metal_stdlib>
+#include <metal_integer>
+#include <metal_texture>
+
+using namespace metal;
+
+/*
+ * Parameters
+ */
+
+struct deintParams {
+ uint channels;
+ uint parity;
+ uint tff;
+ bool is_second_field;
+ bool skip_spatial_check;
+ int field_mode;
+};
+
+/*
+ * Texture access helpers
+ */
+
+#define accesstype access::sample
+const sampler s(coord::pixel);
+
+template <typename T>
+T tex2D(texture2d<float, access::sample> tex, uint x, uint y)
+{
+ return tex.sample(s, float2(x, y)).x;
+}
+
+template <>
+float2 tex2D<float2>(texture2d<float, access::sample> tex, uint x, uint y)
+{
+ return tex.sample(s, float2(x, y)).xy;
+}
+
+template <typename T>
+T tex2D(texture2d<float, access::read> tex, uint x, uint y)
+{
+ return tex.read(uint2(x, y)).x;
+}
+
+template <>
+float2 tex2D<float2>(texture2d<float, access::read> tex, uint x, uint y)
+{
+ return tex.read(uint2(x, y)).xy;
+}
+
+/*
+ * YADIF helpers
+ */
+
+template<typename T>
+T spatial_predictor(T a, T b, T c, T d, T e, T f, T g,
+ T h, T i, T j, T k, T l, T m, T n)
+{
+ T spatial_pred = (d + k)/2;
+ T spatial_score = abs(c - j) + abs(d - k) + abs(e - l);
+
+ T score = abs(b - k) + abs(c - l) + abs(d - m);
+ if (score < spatial_score) {
+ spatial_pred = (c + l)/2;
+ spatial_score = score;
+ score = abs(a - l) + abs(b - m) + abs(c - n);
+ if (score < spatial_score) {
+ spatial_pred = (b + m)/2;
+ spatial_score = score;
+ }
+ }
+ score = abs(d - i) + abs(e - j) + abs(f - k);
+ if (score < spatial_score) {
+ spatial_pred = (e + j)/2;
+ spatial_score = score;
+ score = abs(e - h) + abs(f - i) + abs(g - j);
+ if (score < spatial_score) {
+ spatial_pred = (f + i)/2;
+ spatial_score = score;
+ }
+ }
+ return spatial_pred;
+}
+
+template<typename T>
+T temporal_predictor(T A, T B, T C, T D, T E, T F,
+ T G, T H, T I, T J, T K, T L,
+ T spatial_pred, bool skip_check)
+{
+ T p0 = (C + H) / 2;
+ T p1 = F;
+ T p2 = (D + I) / 2;
+ T p3 = G;
+ T p4 = (E + J) / 2;
+
+ T tdiff0 = abs(D - I);
+ T tdiff1 = (abs(A - F) + abs(B - G)) / 2;
+ T tdiff2 = (abs(K - F) + abs(G - L)) / 2;
+
+ T diff = max3(tdiff0, tdiff1, tdiff2);
+
+ if (!skip_check) {
+ T maxi = max3(p2 - p3, p2 - p1, min(p0 - p1, p4 - p3));
+ T mini = min3(p2 - p3, p2 - p1, max(p0 - p1, p4 - p3));
+ diff = max3(diff, mini, -maxi);
+ }
+
+ return clamp(spatial_pred, p2 - diff, p2 + diff);
+}
+
+#define T float2
+template <>
+T spatial_predictor<T>(T a, T b, T c, T d, T e, T f, T g,
+ T h, T i, T j, T k, T l, T m, T n)
+{
+ return T(
+ spatial_predictor(a.x, b.x, c.x, d.x, e.x, f.x, g.x,
+ h.x, i.x, j.x, k.x, l.x, m.x, n.x),
+ spatial_predictor(a.y, b.y, c.y, d.y, e.y, f.y, g.y,
+ h.y, i.y, j.y, k.y, l.y, m.y, n.y)
+ );
+}
+
+template <>
+T temporal_predictor<T>(T A, T B, T C, T D, T E, T F,
+ T G, T H, T I, T J, T K, T L,
+ T spatial_pred, bool skip_check)
+{
+ return T(
+ temporal_predictor(A.x, B.x, C.x, D.x, E.x, F.x,
+ G.x, H.x, I.x, J.x, K.x, L.x,
+ spatial_pred.x, skip_check),
+ temporal_predictor(A.y, B.y, C.y, D.y, E.y, F.y,
+ G.y, H.y, I.y, J.y, K.y, L.y,
+ spatial_pred.y, skip_check)
+ );
+}
+#undef T
+
+/*
+ * YADIF compute
+ */
+
+template <typename T>
+T yadif_compute_spatial(
+ texture2d<float, accesstype> cur,
+ uint2 pos)
+{
+ // Calculate spatial prediction
+ T a = tex2D<T>(cur, pos.x - 3, pos.y - 1);
+ T b = tex2D<T>(cur, pos.x - 2, pos.y - 1);
+ T c = tex2D<T>(cur, pos.x - 1, pos.y - 1);
+ T d = tex2D<T>(cur, pos.x - 0, pos.y - 1);
+ T e = tex2D<T>(cur, pos.x + 1, pos.y - 1);
+ T f = tex2D<T>(cur, pos.x + 2, pos.y - 1);
+ T g = tex2D<T>(cur, pos.x + 3, pos.y - 1);
+
+ T h = tex2D<T>(cur, pos.x - 3, pos.y + 1);
+ T i = tex2D<T>(cur, pos.x - 2, pos.y + 1);
+ T j = tex2D<T>(cur, pos.x - 1, pos.y + 1);
+ T k = tex2D<T>(cur, pos.x - 0, pos.y + 1);
+ T l = tex2D<T>(cur, pos.x + 1, pos.y + 1);
+ T m = tex2D<T>(cur, pos.x + 2, pos.y + 1);
+ T n = tex2D<T>(cur, pos.x + 3, pos.y + 1);
+
+ return spatial_predictor(a, b, c, d, e, f, g,
+ h, i, j, k, l, m, n);
+}
+
+template <typename T>
+T yadif_compute_temporal(
+ texture2d<float, accesstype> cur,
+ texture2d<float, accesstype> prev2,
+ texture2d<float, accesstype> prev1,
+ texture2d<float, accesstype> next1,
+ texture2d<float, accesstype> next2,
+ T spatial_pred,
+ bool skip_spatial_check,
+ uint2 pos)
+{
+ // Calculate temporal prediction
+ T A = tex2D<T>(prev2, pos.x, pos.y - 1);
+ T B = tex2D<T>(prev2, pos.x, pos.y + 1);
+ T C = tex2D<T>(prev1, pos.x, pos.y - 2);
+ T D = tex2D<T>(prev1, pos.x, pos.y + 0);
+ T E = tex2D<T>(prev1, pos.x, pos.y + 2);
+ T F = tex2D<T>(cur, pos.x, pos.y - 1);
+ T G = tex2D<T>(cur, pos.x, pos.y + 1);
+ T H = tex2D<T>(next1, pos.x, pos.y - 2);
+ T I = tex2D<T>(next1, pos.x, pos.y + 0);
+ T J = tex2D<T>(next1, pos.x, pos.y + 2);
+ T K = tex2D<T>(next2, pos.x, pos.y - 1);
+ T L = tex2D<T>(next2, pos.x, pos.y + 1);
+
+ return temporal_predictor(A, B, C, D, E, F, G, H, I, J, K, L,
+ spatial_pred, skip_spatial_check);
+}
+
+template <typename T>
+T yadif(
+ texture2d<float, access::write> dst,
+ texture2d<float, accesstype> prev,
+ texture2d<float, accesstype> cur,
+ texture2d<float, accesstype> next,
+ constant deintParams& params,
+ uint2 pos)
+{
+ T spatial_pred = yadif_compute_spatial<T>(cur, pos);
+
+ if (params.is_second_field) {
+ return yadif_compute_temporal(cur, prev, cur, next, next, spatial_pred, params.skip_spatial_check, pos);
+ } else {
+ return yadif_compute_temporal(cur, prev, prev, cur, next, spatial_pred, params.skip_spatial_check, pos);
+ }
+}
+
+/*
+ * Kernel dispatch
+ */
+
+kernel void deint(
+ texture2d<float, access::write> dst [[texture(0)]],
+ texture2d<float, accesstype> prev [[texture(1)]],
+ texture2d<float, accesstype> cur [[texture(2)]],
+ texture2d<float, accesstype> next [[texture(3)]],
+ constant deintParams& params [[buffer(4)]],
+ uint2 pos [[thread_position_in_grid]])
+{
+ if ((pos.x >= dst.get_width()) ||
+ (pos.y >= dst.get_height())) {
+ return;
+ }
+
+ // Don't modify the primary field
+ if (pos.y % 2 == params.parity) {
+ float4 in = cur.read(pos);
+ dst.write(in, pos);
+ return;
+ }
+
+ float2 pred;
+ if (params.channels == 1)
+ pred = float2(yadif<float>(dst, prev, cur, next, params, pos));
+ else
+ pred = yadif<float2>(dst, prev, cur, next, params, pos);
+ dst.write(pred.xyyy, pos);
+}