[FFmpeg-devel] avfilter: add yadif implementation using Metal.framework

Message ID	20211214014709.648-1-ffmpeg@tmm1.net
State	New
Headers	show Delivered-To: ffmpegpatchwork2@gmail.com Received-SPF: pass (google.com: domain of ffmpeg-devel-bounces@ffmpeg.org designates 79.124.17.100 as permitted sender) client-ip=79.124.17.100; From: Aman Karmani <ffmpeg@tmm1.net> To: ffmpeg-devel@ffmpeg.org Date: Mon, 13 Dec 2021 17:47:09 -0800 Message-Id: <20211214014709.648-1-ffmpeg@tmm1.net> MIME-Version: 1.0 Subject: [FFmpeg-devel] [PATCH] avfilter: add yadif implementation using Metal.framework Precedence: list Reply-To: FFmpeg development discussions and patches <ffmpeg-devel@ffmpeg.org> Cc: Aman Karmani <aman@tmm1.net> Content-Type: text/plain; charset="us-ascii" Content-Transfer-Encoding: 7bit Errors-To: ffmpeg-devel-bounces@ffmpeg.org Sender: "ffmpeg-devel" <ffmpeg-devel-bounces@ffmpeg.org>
Series	[FFmpeg-devel] avfilter: add yadif implementation using Metal.framework \| expand [FFmpeg-devel] avfilter: add yadif implementation using Metal.framework

Context	Check	Description
andriy/make_x86	success	Make finished
andriy/make_fate_x86	success	Make fate finished
andriy/make_ppc	success	Make finished
andriy/make_fate_ppc	success	Make fate finished

diff --git a/.gitignore b/.gitignore index 9ed24b542e..1a5bb29ad5 100644 --- a/.gitignore +++ b/.gitignore @@ -19,6 +19,9 @@ *.swp *.ver *.version +*.metal.air +*.metallib +*.metallib.c *.ptx *.ptx.c *.ptx.gz diff --git a/configure b/configure index a7593ec2db..f025ade67c 100755 --- a/configure +++ b/configure @@ -382,6 +382,7 @@ Toolchain options: --dep-cc=DEPCC use dependency generator DEPCC [$cc_default] --nvcc=NVCC use Nvidia CUDA compiler NVCC or clang [$nvcc_default] --ld=LD use linker LD [$ld_default] + --metalcc=METALCC use metal compiler METALCC [$metalcc_default] --pkg-config=PKGCONFIG use pkg-config tool PKGCONFIG [$pkg_config_default] --pkg-config-flags=FLAGS pass additional flags to pkgconf [] --ranlib=RANLIB use ranlib RANLIB [$ranlib_default] @@ -2560,6 +2561,7 @@ CMDLINE_SET=" ln_s logfile malloc_prefix + metalcc nm optflags nvcc @@ -3608,6 +3610,7 @@ coreimagesrc_filter_deps="coreimage appkit" coreimagesrc_filter_extralibs="-framework OpenGL" cover_rect_filter_deps="avcodec avformat gpl" cropdetect_filter_deps="gpl" +deinterlace_metal_filter_deps="metal corevideo videotoolbox" deinterlace_qsv_filter_deps="libmfx" deinterlace_vaapi_filter_deps="vaapi" delogo_filter_deps="gpl" @@ -3828,6 +3831,7 @@ host_cc_default="gcc" doxygen_default="doxygen" install="install" ln_s_default="ln -s -f" +metalcc_default="xcrun metal" nm_default="nm -g" pkg_config_default=pkg-config ranlib_default="ranlib" @@ -4428,7 +4432,7 @@ if enabled cuda_nvcc; then fi set_default arch cc cxx doxygen pkg_config ranlib strip sysinclude \ - target_exec x86asmexe + target_exec x86asmexe metalcc enabled cross_compile || host_cc_default=$cc set_default host_cc @@ -6316,6 +6320,7 @@ check_apple_framework CoreFoundation check_apple_framework CoreMedia check_apple_framework CoreVideo check_apple_framework CoreAudio +check_apple_framework Metal enabled avfoundation && { disable coregraphics applicationservices @@ -7606,6 +7611,7 @@ ARFLAGS=$arflags AR_O=$ar_o AR_CMD=$ar NM_CMD=$nm +METALCC=$metalcc RANLIB=$ranlib STRIP=$strip STRIPTYPE=$striptype diff --git a/ffbuild/common.mak b/ffbuild/common.mak index 268ae61154..5528f71659 100644 --- a/ffbuild/common.mak +++ b/ffbuild/common.mak @@ -104,6 +104,15 @@ COMPILE_MSA = $(call COMPILE,CC,MSAFLAGS) $(BIN2CEXE): ffbuild/bin2c_host.o $(HOSTLD) $(HOSTLDFLAGS) $(HOSTLD_O) $^ $(HOSTEXTRALIBS) +%.metal.air: %.metal + $(METALCC) $(patsubst $(SRC_PATH)/%,$(SRC_LINK)/%,$<) -o $@ + +%.metallib: %.metal.air + $(METALCC)lib --split-module-without-linking $(patsubst $(SRC_PATH)/%,$(SRC_LINK)/%,$<) -o $@ + +%.metallib.c: %.metallib + $(Q)xxd -i $(patsubst $(SRC_PATH)/%,$(SRC_LINK)/%,$<) | sed -E 's,[a-zA-Z_]*_(vf)_([a-zA-Z_]*)_metallib,\1_\2_metallib,' > $@ + %.ptx: %.cu $(SRC_PATH)/compat/cuda/cuda_runtime.h $(COMPILE_NVCC) diff --git a/libavfilter/Makefile b/libavfilter/Makefile index 2fe495df28..1c897cad55 100644 --- a/libavfilter/Makefile +++ b/libavfilter/Makefile @@ -239,6 +239,8 @@ OBJS-$(CONFIG_DECONVOLVE_FILTER) += vf_convolve.o framesync.o OBJS-$(CONFIG_DEDOT_FILTER) += vf_dedot.o OBJS-$(CONFIG_DEFLATE_FILTER) += vf_neighbor.o OBJS-$(CONFIG_DEFLICKER_FILTER) += vf_deflicker.o +OBJS-$(CONFIG_DEINTERLACE_METAL_FILTER) += vf_deinterlace_metal.o vf_deinterlace_metal.metallib.o \ + yadif_common.o OBJS-$(CONFIG_DEINTERLACE_QSV_FILTER) += vf_deinterlace_qsv.o OBJS-$(CONFIG_DEINTERLACE_VAAPI_FILTER) += vf_deinterlace_vaapi.o vaapi_vpp.o OBJS-$(CONFIG_DEJUDDER_FILTER) += vf_dejudder.o diff --git a/libavfilter/allfilters.c b/libavfilter/allfilters.c index ec57a2c49c..8e086c5bab 100644 --- a/libavfilter/allfilters.c +++ b/libavfilter/allfilters.c @@ -226,6 +226,7 @@ extern const AVFilter ff_vf_deconvolve; extern const AVFilter ff_vf_dedot; extern const AVFilter ff_vf_deflate; extern const AVFilter ff_vf_deflicker; +extern const AVFilter ff_vf_deinterlace_metal; extern const AVFilter ff_vf_deinterlace_qsv; extern const AVFilter ff_vf_deinterlace_vaapi; extern const AVFilter ff_vf_dejudder; diff --git a/libavfilter/vf_deinterlace_metal.m b/libavfilter/vf_deinterlace_metal.m new file mode 100644 index 0000000000..2ba15e3fbb --- /dev/null +++ b/libavfilter/vf_deinterlace_metal.m @@ -0,0 +1,453 @@ +/* + * Copyright (C) 2018 Philip Langdale <philipl@overt.org> + * 2020 Aman Karmani <aman@tmm1.net> + * + * This file is part of FFmpeg. + * + * FFmpeg is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * FFmpeg is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with FFmpeg; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#include "internal.h" +#include "yadif.h" +#include <libavutil/avassert.h> +#include <libavutil/hwcontext.h> + +#import <CoreVideo/CoreVideo.h> +#import <Metal/Metal.h> + +extern char vf_deinterlace_metal_metallib[]; +extern unsigned int vf_deinterlace_metal_metallib_len; + +typedef struct DeintMetalContext { + YADIFContext yadif; + + AVBufferRef *device_ref; + AVBufferRef *input_frames_ref; + AVHWFramesContext *input_frames; + + id<MTLDevice> mtlDevice; + id<MTLLibrary> mtlLibrary; + id<MTLCommandQueue> mtlQueue; + id<MTLComputePipelineState> mtlPipeline; + id<MTLFunction> mtlFunction; + id<MTLBuffer> mtlParamsBuffer; + + CVMetalTextureCacheRef textureCache; +} DeintMetalContext; + +struct mtlYadifParams { + uint channels; + uint parity; + uint tff; + bool is_second_field; + bool skip_spatial_check; + int field_mode; +}; + +static void call_kernel(AVFilterContext *ctx, + id<MTLTexture> dst, + id<MTLTexture> prev, + id<MTLTexture> cur, + id<MTLTexture> next, + int channels, + int parity, + int tff) +{ + DeintMetalContext *s = ctx->priv; + id<MTLCommandBuffer> buffer = s->mtlQueue.commandBuffer; + id<MTLComputeCommandEncoder> encoder = buffer.computeCommandEncoder; + struct mtlYadifParams *params = (struct mtlYadifParams *)s->mtlParamsBuffer.contents; + *params = (struct mtlYadifParams){ + .channels = channels, + .parity = parity, + .tff = tff, + .is_second_field = !(parity ^ tff), + .skip_spatial_check = s->yadif.mode&2, + .field_mode = s->yadif.current_field + }; + + [encoder setComputePipelineState:s->mtlPipeline]; + [encoder setTexture:dst atIndex:0]; + [encoder setTexture:prev atIndex:1]; + [encoder setTexture:cur atIndex:2]; + [encoder setTexture:next atIndex:3]; + [encoder setBuffer:s->mtlParamsBuffer offset:0 atIndex:4]; + + NSUInteger w = s->mtlPipeline.threadExecutionWidth; + NSUInteger h = s->mtlPipeline.maxTotalThreadsPerThreadgroup / w; + MTLSize threadsPerThreadgroup = MTLSizeMake(w, h, 1); + BOOL fallback = YES; + if (@available(macOS 10.15, iOS 11, tvOS 14.5, *)) { + if ([s->mtlDevice supportsFamily:MTLGPUFamilyCommon3]) { + MTLSize threadsPerGrid = MTLSizeMake(dst.width, dst.height, 1); + [encoder dispatchThreads:threadsPerGrid threadsPerThreadgroup:threadsPerThreadgroup]; + fallback = NO; + } + } + if (fallback) { + MTLSize threadgroups = MTLSizeMake((dst.width + w - 1) / w, + (dst.height + h - 1) / h, + 1); + [encoder dispatchThreadgroups:threadgroups threadsPerThreadgroup:threadsPerThreadgroup]; + } + + [encoder endEncoding]; + + [buffer commit]; + [buffer waitUntilCompleted]; + + [encoder release]; + encoder = nil; + [buffer release]; + buffer = nil; +} + +static CVMetalTextureRef pixbuf_to_texture(AVFilterContext *ctx, + CVPixelBufferRef pixbuf, + int plane, + MTLPixelFormat format) +{ + DeintMetalContext *s = ctx->priv; + CVMetalTextureRef tex = NULL; + CVReturn ret; + + ret = CVMetalTextureCacheCreateTextureFromImage( + NULL, + s->textureCache, + pixbuf, + NULL, + format, + CVPixelBufferGetWidthOfPlane(pixbuf, plane), + CVPixelBufferGetHeightOfPlane(pixbuf, plane), + plane, + &tex + ); + if (ret != kCVReturnSuccess) { + av_log(ctx, AV_LOG_ERROR, "Failed to create CVMetalTexture from image: %d\n", ret); + return NULL; + } + + return tex; +} + +static void filter(AVFilterContext *ctx, AVFrame *dst, + int parity, int tff) +{ + DeintMetalContext *s = ctx->priv; + YADIFContext *y = &s->yadif; + int i; + + for (i = 0; i < y->csp->nb_components; i++) { + int pixel_size, channels; + const AVComponentDescriptor *comp = &y->csp->comp[i]; + CVMetalTextureRef prev, cur, next, dest; + id<MTLTexture> tex_prev, tex_cur, tex_next, tex_dest; + MTLPixelFormat format; + + if (comp->plane < i) { + // We process planes as a whole, so don't reprocess + // them for additional components + continue; + } + + pixel_size = (comp->depth + comp->shift) / 8; + channels = comp->step / pixel_size; + if (pixel_size > 2 || channels > 2) { + av_log(ctx, AV_LOG_ERROR, "Unsupported pixel format: %s\n", y->csp->name); + goto exit; + } + switch (pixel_size) { + case 1: + format = channels == 1 ? MTLPixelFormatR8Unorm : MTLPixelFormatRG8Unorm; + break; + case 2: + format = channels == 1 ? MTLPixelFormatR16Unorm : MTLPixelFormatRG16Unorm; + break; + default: + av_log(ctx, AV_LOG_ERROR, "Unsupported pixel format: %s\n", y->csp->name); + goto exit; + } + av_log(ctx, AV_LOG_TRACE, + "Deinterlacing plane %d: pixel_size: %d channels: %d\n", + comp->plane, pixel_size, channels); + + prev = pixbuf_to_texture(ctx, (CVPixelBufferRef)y->prev->data[3], i, format); + cur = pixbuf_to_texture(ctx, (CVPixelBufferRef)y->cur->data[3], i, format); + next = pixbuf_to_texture(ctx, (CVPixelBufferRef)y->next->data[3], i, format); + dest = pixbuf_to_texture(ctx, (CVPixelBufferRef)dst->data[3], i, format); + + tex_prev = CVMetalTextureGetTexture(prev); + tex_cur = CVMetalTextureGetTexture(cur); + tex_next = CVMetalTextureGetTexture(next); + tex_dest = CVMetalTextureGetTexture(dest); + + call_kernel(ctx, tex_dest, tex_prev, tex_cur, tex_next, + channels, parity, tff); + + CFRelease(prev); + CFRelease(cur); + CFRelease(next); + CFRelease(dest); + } + + CVBufferPropagateAttachments((CVPixelBufferRef)y->cur->data[3], (CVPixelBufferRef)dst->data[3]); + + if (y->current_field == YADIF_FIELD_END) { + y->current_field = YADIF_FIELD_NORMAL; + } + +exit: + return; +} + +static av_cold int deint_metal_init(AVFilterContext *ctx) +{ + DeintMetalContext *s = ctx->priv; + NSError *err = nil; + CVReturn ret; + + s->mtlDevice = MTLCreateSystemDefaultDevice(); + if (!s->mtlDevice) { + av_log(ctx, AV_LOG_ERROR, "Unable to find Metal device\n"); + return AVERROR_EXTERNAL; + } + + av_log(ctx, AV_LOG_INFO, "Using Metal device: %s\n", s->mtlDevice.name.UTF8String); + + dispatch_data_t libData = dispatch_data_create( + vf_deinterlace_metal_metallib, + vf_deinterlace_metal_metallib_len, + nil, + nil); + s->mtlLibrary = [s->mtlDevice + newLibraryWithData:libData + error:&err]; + dispatch_release(libData); + libData = nil; + if (err) { + av_log(ctx, AV_LOG_ERROR, "Failed to load Metal library: %s\n", err.description.UTF8String); + return AVERROR_EXTERNAL; + } + s->mtlFunction = [s->mtlLibrary newFunctionWithName:@"deint"]; + + s->mtlQueue = s->mtlDevice.newCommandQueue; + if (!s->mtlQueue) { + av_log(ctx, AV_LOG_ERROR, "Failed to create Metal command queue!\n"); + return AVERROR_EXTERNAL; + } + + s->mtlPipeline = [s->mtlDevice + newComputePipelineStateWithFunction:s->mtlFunction + error:&err]; + if (err) { + av_log(ctx, AV_LOG_ERROR, "Failed to create Metal compute pipeline: %s\n", err.description.UTF8String); + return AVERROR_EXTERNAL; + } + + s->mtlParamsBuffer = [s->mtlDevice + newBufferWithLength:sizeof(struct mtlYadifParams) + options:MTLResourceStorageModeShared]; + + ret = CVMetalTextureCacheCreate( + NULL, + NULL, + s->mtlDevice, + NULL, + &s->textureCache + ); + if (ret != kCVReturnSuccess) { + av_log(ctx, AV_LOG_ERROR, "Failed to create CVMetalTextureCache: %d\n", ret); + return AVERROR_EXTERNAL; + } + + return 0; +} + +static av_cold void deint_metal_uninit(AVFilterContext *ctx) +{ + DeintMetalContext *s = ctx->priv; + YADIFContext *y = &s->yadif; + + av_frame_free(&y->prev); + av_frame_free(&y->cur); + av_frame_free(&y->next); + + av_buffer_unref(&s->device_ref); + av_buffer_unref(&s->input_frames_ref); + s->input_frames = NULL; + + [s->mtlParamsBuffer release]; + [s->mtlFunction release]; + [s->mtlPipeline release]; + [s->mtlQueue release]; + [s->mtlLibrary release]; + [s->mtlDevice release]; + + s->mtlParamsBuffer = nil; + s->mtlFunction = nil; + s->mtlPipeline = nil; + s->mtlQueue = nil; + s->mtlLibrary = nil; + s->mtlDevice = nil; + + if (s->textureCache) { + CFRelease(s->textureCache); + s->textureCache = NULL; + } +} + +static int config_input(AVFilterLink *inlink) +{ + AVFilterContext *ctx = inlink->dst; + DeintMetalContext *s = ctx->priv; + + if (!inlink->hw_frames_ctx) { + av_log(ctx, AV_LOG_ERROR, "A hardware frames reference is " + "required to associate the processing device.\n"); + return AVERROR(EINVAL); + } + + s->input_frames_ref = av_buffer_ref(inlink->hw_frames_ctx); + if (!s->input_frames_ref) { + av_log(ctx, AV_LOG_ERROR, "A input frames reference create " + "failed.\n"); + return AVERROR(ENOMEM); + } + s->input_frames = (AVHWFramesContext*)s->input_frames_ref->data; + + return 0; +} + +static int config_output(AVFilterLink *link) +{ + AVHWFramesContext *output_frames; + AVFilterContext *ctx = link->src; + DeintMetalContext *s = ctx->priv; + YADIFContext *y = &s->yadif; + int ret = 0; + + av_assert0(s->input_frames); + s->device_ref = av_buffer_ref(s->input_frames->device_ref); + if (!s->device_ref) { + av_log(ctx, AV_LOG_ERROR, "A device reference create " + "failed.\n"); + return AVERROR(ENOMEM); + } + + link->hw_frames_ctx = av_hwframe_ctx_alloc(s->device_ref); + if (!link->hw_frames_ctx) { + av_log(ctx, AV_LOG_ERROR, "Failed to create HW frame context " + "for output.\n"); + ret = AVERROR(ENOMEM); + goto exit; + } + + output_frames = (AVHWFramesContext*)link->hw_frames_ctx->data; + + output_frames->format = AV_PIX_FMT_VIDEOTOOLBOX; + output_frames->sw_format = s->input_frames->sw_format; + output_frames->width = ctx->inputs[0]->w; + output_frames->height = ctx->inputs[0]->h; + + ret = ff_filter_init_hw_frames(ctx, link, 10); + if (ret < 0) + goto exit; + + ret = av_hwframe_ctx_init(link->hw_frames_ctx); + if (ret < 0) { + av_log(ctx, AV_LOG_ERROR, "Failed to initialise VideoToolbox frame " + "context for output: %d\n", ret); + goto exit; + } + + link->time_base.num = ctx->inputs[0]->time_base.num; + link->time_base.den = ctx->inputs[0]->time_base.den * 2; + link->w = ctx->inputs[0]->w; + link->h = ctx->inputs[0]->h; + + if(y->mode & 1) + link->frame_rate = av_mul_q(ctx->inputs[0]->frame_rate, + (AVRational){2, 1}); + + if (link->w < 3 || link->h < 3) { + av_log(ctx, AV_LOG_ERROR, "Video of less than 3 columns or lines is not supported\n"); + ret = AVERROR(EINVAL); + goto exit; + } + + y->csp = av_pix_fmt_desc_get(output_frames->sw_format); + y->filter = filter; + +exit: + return ret; +} + +#define FLAGS AV_OPT_FLAG_VIDEO_PARAM|AV_OPT_FLAG_FILTERING_PARAM +#define CONST(name, help, val, unit) { name, help, 0, AV_OPT_TYPE_CONST, {.i64=val}, INT_MIN, INT_MAX, FLAGS, unit } + +static const AVOption deinterlace_metal_options[] = { + #define OFFSET(x) offsetof(YADIFContext, x) + { "mode", "specify the interlacing mode", OFFSET(mode), AV_OPT_TYPE_INT, {.i64=YADIF_MODE_SEND_FRAME}, 0, 3, FLAGS, "mode"}, + CONST("send_frame", "send one frame for each frame", YADIF_MODE_SEND_FRAME, "mode"), + CONST("send_field", "send one frame for each field", YADIF_MODE_SEND_FIELD, "mode"), + CONST("send_frame_nospatial", "send one frame for each frame, but skip spatial interlacing check", YADIF_MODE_SEND_FRAME_NOSPATIAL, "mode"), + CONST("send_field_nospatial", "send one frame for each field, but skip spatial interlacing check", YADIF_MODE_SEND_FIELD_NOSPATIAL, "mode"), + + { "parity", "specify the assumed picture field parity", OFFSET(parity), AV_OPT_TYPE_INT, {.i64=YADIF_PARITY_AUTO}, -1, 1, FLAGS, "parity" }, + CONST("tff", "assume top field first", YADIF_PARITY_TFF, "parity"), + CONST("bff", "assume bottom field first", YADIF_PARITY_BFF, "parity"), + CONST("auto", "auto detect parity", YADIF_PARITY_AUTO, "parity"), + + { "deint", "specify which frames to deinterlace", OFFSET(deint), AV_OPT_TYPE_INT, {.i64=YADIF_DEINT_ALL}, 0, 1, FLAGS, "deint" }, + CONST("all", "deinterlace all frames", YADIF_DEINT_ALL, "deint"), + CONST("interlaced", "only deinterlace frames marked as interlaced", YADIF_DEINT_INTERLACED, "deint"), + #undef OFFSET + + { NULL } +}; + +AVFILTER_DEFINE_CLASS(deinterlace_metal); + +static const AVFilterPad deint_metal_inputs[] = { + { + .name = "default", + .type = AVMEDIA_TYPE_VIDEO, + .filter_frame = ff_yadif_filter_frame, + .config_props = config_input, + }, +}; + +static const AVFilterPad deint_metal_outputs[] = { + { + .name = "default", + .type = AVMEDIA_TYPE_VIDEO, + .request_frame = ff_yadif_request_frame, + .config_props = config_output, + }, +}; + +AVFilter ff_vf_deinterlace_metal = { + .name = "deinterlace_metal", + .description = NULL_IF_CONFIG_SMALL("Deinterlace VideoToolbox frames with Metal compute"), + .priv_size = sizeof(DeintMetalContext), + .priv_class = &deinterlace_metal_class, + .init = deint_metal_init, + .uninit = deint_metal_uninit, + FILTER_SINGLE_PIXFMT(AV_PIX_FMT_VIDEOTOOLBOX), + FILTER_INPUTS(deint_metal_inputs), + FILTER_OUTPUTS(deint_metal_outputs), + .flags = AVFILTER_FLAG_SUPPORT_TIMELINE_INTERNAL, + .flags_internal = FF_FILTER_FLAG_HWFRAME_AWARE, +}; diff --git a/libavfilter/vf_deinterlace_metal.metal b/libavfilter/vf_deinterlace_metal.metal new file mode 100644 index 0000000000..50783f2ffe --- /dev/null +++ b/libavfilter/vf_deinterlace_metal.metal @@ -0,0 +1,269 @@ +/* + * Copyright (C) 2018 Philip Langdale <philipl@overt.org> + * 2020 Aman Karmani <aman@tmm1.net> + * 2020 Stefan Dyulgerov <stefan.dyulgerov@gmail.com> + * + * This file is part of FFmpeg. + * + * FFmpeg is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * FFmpeg is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with FFmpeg; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#include <metal_stdlib> +#include <metal_integer> +#include <metal_texture> + +using namespace metal; + +/* + * Parameters + */ + +struct deintParams { + uint channels; + uint parity; + uint tff; + bool is_second_field; + bool skip_spatial_check; + int field_mode; +}; + +/* + * Texture access helpers + */ + +#define accesstype access::sample +const sampler s(coord::pixel); + +template <typename T> +T tex2D(texture2d<float, access::sample> tex, uint x, uint y) +{ + return tex.sample(s, float2(x, y)).x; +} + +template <> +float2 tex2D<float2>(texture2d<float, access::sample> tex, uint x, uint y) +{ + return tex.sample(s, float2(x, y)).xy; +} + +template <typename T> +T tex2D(texture2d<float, access::read> tex, uint x, uint y) +{ + return tex.read(uint2(x, y)).x; +} + +template <> +float2 tex2D<float2>(texture2d<float, access::read> tex, uint x, uint y) +{ + return tex.read(uint2(x, y)).xy; +} + +/* + * YADIF helpers + */ + +template<typename T> +T spatial_predictor(T a, T b, T c, T d, T e, T f, T g, + T h, T i, T j, T k, T l, T m, T n) +{ + T spatial_pred = (d + k)/2; + T spatial_score = abs(c - j) + abs(d - k) + abs(e - l); + + T score = abs(b - k) + abs(c - l) + abs(d - m); + if (score < spatial_score) { + spatial_pred = (c + l)/2; + spatial_score = score; + score = abs(a - l) + abs(b - m) + abs(c - n); + if (score < spatial_score) { + spatial_pred = (b + m)/2; + spatial_score = score; + } + } + score = abs(d - i) + abs(e - j) + abs(f - k); + if (score < spatial_score) { + spatial_pred = (e + j)/2; + spatial_score = score; + score = abs(e - h) + abs(f - i) + abs(g - j); + if (score < spatial_score) { + spatial_pred = (f + i)/2; + spatial_score = score; + } + } + return spatial_pred; +} + +template<typename T> +T temporal_predictor(T A, T B, T C, T D, T E, T F, + T G, T H, T I, T J, T K, T L, + T spatial_pred, bool skip_check) +{ + T p0 = (C + H) / 2; + T p1 = F; + T p2 = (D + I) / 2; + T p3 = G; + T p4 = (E + J) / 2; + + T tdiff0 = abs(D - I); + T tdiff1 = (abs(A - F) + abs(B - G)) / 2; + T tdiff2 = (abs(K - F) + abs(G - L)) / 2; + + T diff = max3(tdiff0, tdiff1, tdiff2); + + if (!skip_check) { + T maxi = max3(p2 - p3, p2 - p1, min(p0 - p1, p4 - p3)); + T mini = min3(p2 - p3, p2 - p1, max(p0 - p1, p4 - p3)); + diff = max3(diff, mini, -maxi); + } + + return clamp(spatial_pred, p2 - diff, p2 + diff); +} + +#define T float2 +template <> +T spatial_predictor<T>(T a, T b, T c, T d, T e, T f, T g, + T h, T i, T j, T k, T l, T m, T n) +{ + return T( + spatial_predictor(a.x, b.x, c.x, d.x, e.x, f.x, g.x, + h.x, i.x, j.x, k.x, l.x, m.x, n.x), + spatial_predictor(a.y, b.y, c.y, d.y, e.y, f.y, g.y, + h.y, i.y, j.y, k.y, l.y, m.y, n.y) + ); +} + +template <> +T temporal_predictor<T>(T A, T B, T C, T D, T E, T F, + T G, T H, T I, T J, T K, T L, + T spatial_pred, bool skip_check) +{ + return T( + temporal_predictor(A.x, B.x, C.x, D.x, E.x, F.x, + G.x, H.x, I.x, J.x, K.x, L.x, + spatial_pred.x, skip_check), + temporal_predictor(A.y, B.y, C.y, D.y, E.y, F.y, + G.y, H.y, I.y, J.y, K.y, L.y, + spatial_pred.y, skip_check) + ); +} +#undef T + +/* + * YADIF compute + */ + +template <typename T> +T yadif_compute_spatial( + texture2d<float, accesstype> cur, + uint2 pos) +{ + // Calculate spatial prediction + T a = tex2D<T>(cur, pos.x - 3, pos.y - 1); + T b = tex2D<T>(cur, pos.x - 2, pos.y - 1); + T c = tex2D<T>(cur, pos.x - 1, pos.y - 1); + T d = tex2D<T>(cur, pos.x - 0, pos.y - 1); + T e = tex2D<T>(cur, pos.x + 1, pos.y - 1); + T f = tex2D<T>(cur, pos.x + 2, pos.y - 1); + T g = tex2D<T>(cur, pos.x + 3, pos.y - 1); + + T h = tex2D<T>(cur, pos.x - 3, pos.y + 1); + T i = tex2D<T>(cur, pos.x - 2, pos.y + 1); + T j = tex2D<T>(cur, pos.x - 1, pos.y + 1); + T k = tex2D<T>(cur, pos.x - 0, pos.y + 1); + T l = tex2D<T>(cur, pos.x + 1, pos.y + 1); + T m = tex2D<T>(cur, pos.x + 2, pos.y + 1); + T n = tex2D<T>(cur, pos.x + 3, pos.y + 1); + + return spatial_predictor(a, b, c, d, e, f, g, + h, i, j, k, l, m, n); +} + +template <typename T> +T yadif_compute_temporal( + texture2d<float, accesstype> cur, + texture2d<float, accesstype> prev2, + texture2d<float, accesstype> prev1, + texture2d<float, accesstype> next1, + texture2d<float, accesstype> next2, + T spatial_pred, + bool skip_spatial_check, + uint2 pos) +{ + // Calculate temporal prediction + T A = tex2D<T>(prev2, pos.x, pos.y - 1); + T B = tex2D<T>(prev2, pos.x, pos.y + 1); + T C = tex2D<T>(prev1, pos.x, pos.y - 2); + T D = tex2D<T>(prev1, pos.x, pos.y + 0); + T E = tex2D<T>(prev1, pos.x, pos.y + 2); + T F = tex2D<T>(cur, pos.x, pos.y - 1); + T G = tex2D<T>(cur, pos.x, pos.y + 1); + T H = tex2D<T>(next1, pos.x, pos.y - 2); + T I = tex2D<T>(next1, pos.x, pos.y + 0); + T J = tex2D<T>(next1, pos.x, pos.y + 2); + T K = tex2D<T>(next2, pos.x, pos.y - 1); + T L = tex2D<T>(next2, pos.x, pos.y + 1); + + return temporal_predictor(A, B, C, D, E, F, G, H, I, J, K, L, + spatial_pred, skip_spatial_check); +} + +template <typename T> +T yadif( + texture2d<float, access::write> dst, + texture2d<float, accesstype> prev, + texture2d<float, accesstype> cur, + texture2d<float, accesstype> next, + constant deintParams& params, + uint2 pos) +{ + T spatial_pred = yadif_compute_spatial<T>(cur, pos); + + if (params.is_second_field) { + return yadif_compute_temporal(cur, prev, cur, next, next, spatial_pred, params.skip_spatial_check, pos); + } else { + return yadif_compute_temporal(cur, prev, prev, cur, next, spatial_pred, params.skip_spatial_check, pos); + } +} + +/* + * Kernel dispatch + */ + +kernel void deint( + texture2d<float, access::write> dst [[texture(0)]], + texture2d<float, accesstype> prev [[texture(1)]], + texture2d<float, accesstype> cur [[texture(2)]], + texture2d<float, accesstype> next [[texture(3)]], + constant deintParams& params [[buffer(4)]], + uint2 pos [[thread_position_in_grid]]) +{ + if ((pos.x >= dst.get_width()) || + (pos.y >= dst.get_height())) { + return; + } + + // Don't modify the primary field + if (pos.y % 2 == params.parity) { + float4 in = cur.read(pos); + dst.write(in, pos); + return; + } + + float2 pred; + if (params.channels == 1) + pred = float2(yadif<float>(dst, prev, cur, next, params, pos)); + else + pred = yadif<float2>(dst, prev, cur, next, params, pos); + dst.write(pred.xyyy, pos); +}

[FFmpeg-devel] avfilter: add yadif implementation using Metal.framework

Checks

Commit Message

Comments

Patch