diff mbox series

[FFmpeg-devel,v4,5/5] avfilter: add vf_yadif_videotoolbox

Message ID 20211217200418.68942-5-ffmpeg@tmm1.net
State New
Headers show
Series [FFmpeg-devel,v4,1/5] avfilter/vf_yadif_cuda: simplify filter definition | expand

Checks

Context Check Description
andriy/make_x86 success Make finished
andriy/make_fate_x86 success Make fate finished
andriy/make_ppc success Make finished
andriy/make_fate_ppc success Make fate finished

Commit Message

Aman Karmani Dec. 17, 2021, 8:04 p.m. UTC
From: Aman Karmani <aman@tmm1.net>

deinterlaces CVPixelBuffers, i.e. AV_PIX_FMT_VIDEOTOOLBOX frames

for example, an interlaced mpeg2 video can be decoded by avcodec,
uploaded into a CVPixelBuffer, deinterlaced by Metal, and then
encoded to h264 by VideoToolbox as follows:

    ffmpeg \
           -init_hw_device videotoolbox \
           -i interlaced.ts \
           -vf hwupload,yadif_videotoolbox \
           -c:v h264_videotoolbox \
           -b:v 2000k \
           -c:a copy \
           -y progressive.ts

(note that uploading AVFrame into CVPixelBuffer via hwupload
 requires 504c60660d3194758823ddd45ceddb86e35d806f)

this work is sponsored by Fancy Bits LLC

Reviewed-by: Ridley Combs <rcombs@rcombs.me>
Signed-off-by: Aman Karmani <aman@tmm1.net>
---
 configure                                     |   1 +
 libavfilter/Makefile                          |   4 +
 libavfilter/allfilters.c                      |   1 +
 libavfilter/metal/vf_yadif_videotoolbox.metal | 269 ++++++++++++
 libavfilter/vf_yadif_videotoolbox.m           | 406 ++++++++++++++++++
 5 files changed, 681 insertions(+)
 create mode 100644 libavfilter/metal/vf_yadif_videotoolbox.metal
 create mode 100644 libavfilter/vf_yadif_videotoolbox.m

Comments

Philip Langdale Dec. 17, 2021, 9:38 p.m. UTC | #1
On Fri, 17 Dec 2021 12:04:18 -0800
Aman Karmani <ffmpeg@tmm1.net> wrote:

> From: Aman Karmani <aman@tmm1.net>
> 
> deinterlaces CVPixelBuffers, i.e. AV_PIX_FMT_VIDEOTOOLBOX frames
> 
> for example, an interlaced mpeg2 video can be decoded by avcodec,
> uploaded into a CVPixelBuffer, deinterlaced by Metal, and then
> encoded to h264 by VideoToolbox as follows:
> 
>     ffmpeg \
>            -init_hw_device videotoolbox \
>            -i interlaced.ts \
>            -vf hwupload,yadif_videotoolbox \
>            -c:v h264_videotoolbox \
>            -b:v 2000k \
>            -c:a copy \
>            -y progressive.ts
> 
> (note that uploading AVFrame into CVPixelBuffer via hwupload
>  requires 504c60660d3194758823ddd45ceddb86e35d806f)
> 
> this work is sponsored by Fancy Bits LLC
> 
> Reviewed-by: Ridley Combs <rcombs@rcombs.me>
> Signed-off-by: Aman Karmani <aman@tmm1.net>
> ---
>  configure                                     |   1 +
>  libavfilter/Makefile                          |   4 +
>  libavfilter/allfilters.c                      |   1 +
>  libavfilter/metal/vf_yadif_videotoolbox.metal | 269 ++++++++++++
>  libavfilter/vf_yadif_videotoolbox.m           | 406
> ++++++++++++++++++ 5 files changed, 681 insertions(+)
>  create mode 100644 libavfilter/metal/vf_yadif_videotoolbox.metal
>  create mode 100644 libavfilter/vf_yadif_videotoolbox.m
> 
> diff --git a/configure b/configure
> index 32a39f5f5b..d8b07c8e00 100755
> --- a/configure
> +++ b/configure
> @@ -3748,6 +3748,7 @@ vpp_qsv_filter_select="qsvvpp"
>  xfade_opencl_filter_deps="opencl"
>  yadif_cuda_filter_deps="ffnvcodec"
>  yadif_cuda_filter_deps_any="cuda_nvcc cuda_llvm"
> +yadif_videotoolbox_filter_deps="metal corevideo videotoolbox"
>  
>  # examples
>  avio_list_dir_deps="avformat avutil"
> diff --git a/libavfilter/Makefile b/libavfilter/Makefile
> index 2fe495df28..9a061ba3c8 100644
> --- a/libavfilter/Makefile
> +++ b/libavfilter/Makefile
> @@ -519,6 +519,10 @@ OBJS-$(CONFIG_XSTACK_FILTER)                 +=
> vf_stack.o framesync.o OBJS-$(CONFIG_YADIF_FILTER)
> += vf_yadif.o yadif_common.o OBJS-$(CONFIG_YADIF_CUDA_FILTER)
>     += vf_yadif_cuda.o vf_yadif_cuda.ptx.o \ yadif_common.o
> cuda/load_helper.o +OBJS-$(CONFIG_YADIF_VIDEOTOOLBOX_FILTER)     +=
> vf_yadif_videotoolbox.o \
> +
> metal/vf_yadif_videotoolbox.metallib.o \
> +                                                metal/utils.o \
> +                                                yadif_common.o
>  OBJS-$(CONFIG_YAEPBLUR_FILTER)               += vf_yaepblur.o
>  OBJS-$(CONFIG_ZMQ_FILTER)                    += f_zmq.o
>  OBJS-$(CONFIG_ZOOMPAN_FILTER)                += vf_zoompan.o
> diff --git a/libavfilter/allfilters.c b/libavfilter/allfilters.c
> index ec57a2c49c..26f1c73505 100644
> --- a/libavfilter/allfilters.c
> +++ b/libavfilter/allfilters.c
> @@ -496,6 +496,7 @@ extern const AVFilter ff_vf_xmedian;
>  extern const AVFilter ff_vf_xstack;
>  extern const AVFilter ff_vf_yadif;
>  extern const AVFilter ff_vf_yadif_cuda;
> +extern const AVFilter ff_vf_yadif_videotoolbox;
>  extern const AVFilter ff_vf_yaepblur;
>  extern const AVFilter ff_vf_zmq;
>  extern const AVFilter ff_vf_zoompan;
> diff --git a/libavfilter/metal/vf_yadif_videotoolbox.metal
> b/libavfilter/metal/vf_yadif_videotoolbox.metal new file mode 100644
> index 0000000000..50783f2ffe
> --- /dev/null
> +++ b/libavfilter/metal/vf_yadif_videotoolbox.metal
> @@ -0,0 +1,269 @@
> +/*
> + * Copyright (C) 2018 Philip Langdale <philipl@overt.org>
> + *               2020 Aman Karmani <aman@tmm1.net>
> + *               2020 Stefan Dyulgerov <stefan.dyulgerov@gmail.com>
> + *
> + * This file is part of FFmpeg.
> + *
> + * FFmpeg is free software; you can redistribute it and/or
> + * modify it under the terms of the GNU Lesser General Public
> + * License as published by the Free Software Foundation; either
> + * version 2.1 of the License, or (at your option) any later version.
> + *
> + * FFmpeg is distributed in the hope that it will be useful,
> + * but WITHOUT ANY WARRANTY; without even the implied warranty of
> + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
> + * Lesser General Public License for more details.
> + *
> + * You should have received a copy of the GNU Lesser General Public
> + * License along with FFmpeg; if not, write to the Free Software
> + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
> 02110-1301 USA
> + */
> +
> +#include <metal_stdlib>
> +#include <metal_integer>
> +#include <metal_texture>
> +
> +using namespace metal;
> +
> +/*
> + * Parameters
> + */
> +
> +struct deintParams {
> +    uint channels;
> +    uint parity;
> +    uint tff;
> +    bool is_second_field;
> +    bool skip_spatial_check;
> +    int field_mode;
> +};
> +
> +/*
> + * Texture access helpers
> + */
> +
> +#define accesstype access::sample
> +const sampler s(coord::pixel);
> +
> +template <typename T>
> +T tex2D(texture2d<float, access::sample> tex, uint x, uint y)
> +{
> +    return tex.sample(s, float2(x, y)).x;
> +}
> +
> +template <>
> +float2 tex2D<float2>(texture2d<float, access::sample> tex, uint x,
> uint y) +{
> +    return tex.sample(s, float2(x, y)).xy;
> +}
> +
> +template <typename T>
> +T tex2D(texture2d<float, access::read> tex, uint x, uint y)
> +{
> +    return tex.read(uint2(x, y)).x;
> +}
> +
> +template <>
> +float2 tex2D<float2>(texture2d<float, access::read> tex, uint x,
> uint y) +{
> +    return tex.read(uint2(x, y)).xy;
> +}
> +
> +/*
> + * YADIF helpers
> + */
> +
> +template<typename T>
> +T spatial_predictor(T a, T b, T c, T d, T e, T f, T g,
> +                    T h, T i, T j, T k, T l, T m, T n)
> +{
> +    T spatial_pred = (d + k)/2;
> +    T spatial_score = abs(c - j) + abs(d - k) + abs(e - l);
> +
> +    T score = abs(b - k) + abs(c - l) + abs(d - m);
> +    if (score < spatial_score) {
> +        spatial_pred = (c + l)/2;
> +        spatial_score = score;
> +        score = abs(a - l) + abs(b - m) + abs(c - n);
> +        if (score < spatial_score) {
> +            spatial_pred = (b + m)/2;
> +            spatial_score = score;
> +        }
> +    }
> +    score = abs(d - i) + abs(e - j) + abs(f - k);
> +    if (score < spatial_score) {
> +        spatial_pred = (e + j)/2;
> +        spatial_score = score;
> +        score = abs(e - h) + abs(f - i) + abs(g - j);
> +        if (score < spatial_score) {
> +            spatial_pred = (f + i)/2;
> +            spatial_score = score;
> +        }
> +    }
> +    return spatial_pred;
> +}
> +
> +template<typename T>
> +T temporal_predictor(T A, T B, T C, T D, T E, T F,
> +                     T G, T H, T I, T J, T K, T L,
> +                     T spatial_pred, bool skip_check)
> +{
> +    T p0 = (C + H) / 2;
> +    T p1 = F;
> +    T p2 = (D + I) / 2;
> +    T p3 = G;
> +    T p4 = (E + J) / 2;
> +
> +    T tdiff0 = abs(D - I);
> +    T tdiff1 = (abs(A - F) + abs(B - G)) / 2;
> +    T tdiff2 = (abs(K - F) + abs(G - L)) / 2;
> +
> +    T diff = max3(tdiff0, tdiff1, tdiff2);
> +
> +    if (!skip_check) {
> +        T maxi = max3(p2 - p3, p2 - p1, min(p0 - p1, p4 - p3));
> +        T mini = min3(p2 - p3, p2 - p1, max(p0 - p1, p4 - p3));
> +        diff = max3(diff, mini, -maxi);
> +    }
> +
> +    return clamp(spatial_pred, p2 - diff, p2 + diff);
> +}
> +
> +#define T float2
> +template <>
> +T spatial_predictor<T>(T a, T b, T c, T d, T e, T f, T g,
> +                       T h, T i, T j, T k, T l, T m, T n)
> +{
> +    return T(
> +        spatial_predictor(a.x, b.x, c.x, d.x, e.x, f.x, g.x,
> +                          h.x, i.x, j.x, k.x, l.x, m.x, n.x),
> +        spatial_predictor(a.y, b.y, c.y, d.y, e.y, f.y, g.y,
> +                          h.y, i.y, j.y, k.y, l.y, m.y, n.y)
> +    );
> +}
> +
> +template <>
> +T temporal_predictor<T>(T A, T B, T C, T D, T E, T F,
> +                        T G, T H, T I, T J, T K, T L,
> +                        T spatial_pred, bool skip_check)
> +{
> +    return T(
> +        temporal_predictor(A.x, B.x, C.x, D.x, E.x, F.x,
> +                           G.x, H.x, I.x, J.x, K.x, L.x,
> +                           spatial_pred.x, skip_check),
> +        temporal_predictor(A.y, B.y, C.y, D.y, E.y, F.y,
> +                           G.y, H.y, I.y, J.y, K.y, L.y,
> +                           spatial_pred.y, skip_check)
> +    );
> +}
> +#undef T
> +
> +/*
> + * YADIF compute
> + */
> +
> +template <typename T>
> +T yadif_compute_spatial(
> +    texture2d<float, accesstype> cur,
> +    uint2 pos)
> +{
> +    // Calculate spatial prediction
> +    T a = tex2D<T>(cur, pos.x - 3, pos.y - 1);
> +    T b = tex2D<T>(cur, pos.x - 2, pos.y - 1);
> +    T c = tex2D<T>(cur, pos.x - 1, pos.y - 1);
> +    T d = tex2D<T>(cur, pos.x - 0, pos.y - 1);
> +    T e = tex2D<T>(cur, pos.x + 1, pos.y - 1);
> +    T f = tex2D<T>(cur, pos.x + 2, pos.y - 1);
> +    T g = tex2D<T>(cur, pos.x + 3, pos.y - 1);
> +
> +    T h = tex2D<T>(cur, pos.x - 3, pos.y + 1);
> +    T i = tex2D<T>(cur, pos.x - 2, pos.y + 1);
> +    T j = tex2D<T>(cur, pos.x - 1, pos.y + 1);
> +    T k = tex2D<T>(cur, pos.x - 0, pos.y + 1);
> +    T l = tex2D<T>(cur, pos.x + 1, pos.y + 1);
> +    T m = tex2D<T>(cur, pos.x + 2, pos.y + 1);
> +    T n = tex2D<T>(cur, pos.x + 3, pos.y + 1);
> +
> +    return spatial_predictor(a, b, c, d, e, f, g,
> +                             h, i, j, k, l, m, n);
> +}
> +
> +template <typename T>
> +T yadif_compute_temporal(
> +    texture2d<float, accesstype> cur,
> +    texture2d<float, accesstype> prev2,
> +    texture2d<float, accesstype> prev1,
> +    texture2d<float, accesstype> next1,
> +    texture2d<float, accesstype> next2,
> +    T spatial_pred,
> +    bool skip_spatial_check,
> +    uint2 pos)
> +{
> +    // Calculate temporal prediction
> +    T A = tex2D<T>(prev2, pos.x, pos.y - 1);
> +    T B = tex2D<T>(prev2, pos.x, pos.y + 1);
> +    T C = tex2D<T>(prev1, pos.x, pos.y - 2);
> +    T D = tex2D<T>(prev1, pos.x, pos.y + 0);
> +    T E = tex2D<T>(prev1, pos.x, pos.y + 2);
> +    T F = tex2D<T>(cur,   pos.x, pos.y - 1);
> +    T G = tex2D<T>(cur,   pos.x, pos.y + 1);
> +    T H = tex2D<T>(next1, pos.x, pos.y - 2);
> +    T I = tex2D<T>(next1, pos.x, pos.y + 0);
> +    T J = tex2D<T>(next1, pos.x, pos.y + 2);
> +    T K = tex2D<T>(next2, pos.x, pos.y - 1);
> +    T L = tex2D<T>(next2, pos.x, pos.y + 1);
> +
> +    return temporal_predictor(A, B, C, D, E, F, G, H, I, J, K, L,
> +                              spatial_pred, skip_spatial_check);
> +}
> +
> +template <typename T>
> +T yadif(
> +    texture2d<float, access::write> dst,
> +    texture2d<float, accesstype> prev,
> +    texture2d<float, accesstype> cur,
> +    texture2d<float, accesstype> next,
> +    constant deintParams& params,
> +    uint2 pos)
> +{
> +    T spatial_pred = yadif_compute_spatial<T>(cur, pos);
> +
> +    if (params.is_second_field) {
> +        return yadif_compute_temporal(cur, prev, cur, next, next,
> spatial_pred, params.skip_spatial_check, pos);
> +    } else {
> +        return yadif_compute_temporal(cur, prev, prev, cur, next,
> spatial_pred, params.skip_spatial_check, pos);
> +    }
> +}
> +
> +/*
> + * Kernel dispatch
> + */
> +
> +kernel void deint(
> +    texture2d<float, access::write> dst [[texture(0)]],
> +    texture2d<float, accesstype> prev [[texture(1)]],
> +    texture2d<float, accesstype> cur  [[texture(2)]],
> +    texture2d<float, accesstype> next [[texture(3)]],
> +    constant deintParams& params [[buffer(4)]],
> +    uint2 pos [[thread_position_in_grid]])
> +{
> +    if ((pos.x >= dst.get_width()) ||
> +        (pos.y >= dst.get_height())) {
> +        return;
> +    }
> +
> +    // Don't modify the primary field
> +    if (pos.y % 2 == params.parity) {
> +        float4 in = cur.read(pos);
> +        dst.write(in, pos);
> +        return;
> +    }
> +
> +    float2 pred;
> +    if (params.channels == 1)
> +        pred = float2(yadif<float>(dst, prev, cur, next, params,
> pos));
> +    else
> +        pred = yadif<float2>(dst, prev, cur, next, params, pos);
> +    dst.write(pred.xyyy, pos);
> +}
> diff --git a/libavfilter/vf_yadif_videotoolbox.m
> b/libavfilter/vf_yadif_videotoolbox.m new file mode 100644
> index 0000000000..af83a73e89
> --- /dev/null
> +++ b/libavfilter/vf_yadif_videotoolbox.m
> @@ -0,0 +1,406 @@
> +/*
> + * Copyright (C) 2018 Philip Langdale <philipl@overt.org>
> + *               2020 Aman Karmani <aman@tmm1.net>
> + *
> + * This file is part of FFmpeg.
> + *
> + * FFmpeg is free software; you can redistribute it and/or
> + * modify it under the terms of the GNU Lesser General Public
> + * License as published by the Free Software Foundation; either
> + * version 2.1 of the License, or (at your option) any later version.
> + *
> + * FFmpeg is distributed in the hope that it will be useful,
> + * but WITHOUT ANY WARRANTY; without even the implied warranty of
> + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
> + * Lesser General Public License for more details.
> + *
> + * You should have received a copy of the GNU Lesser General Public
> + * License along with FFmpeg; if not, write to the Free Software
> + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
> 02110-1301 USA
> + */
> +
> +#include "internal.h"
> +#include "yadif.h"
> +#include <libavutil/avassert.h>
> +#include <libavutil/hwcontext.h>
> +#include <libavutil/objc.h>
> +#include <libavfilter/metal/utils.h>
> +
> +extern char ff_vf_yadif_videotoolbox_metallib_data[];
> +extern unsigned int ff_vf_yadif_videotoolbox_metallib_len;
> +
> +typedef struct YADIFVTContext {
> +    YADIFContext yadif;
> +
> +    AVBufferRef       *device_ref;
> +    AVBufferRef       *input_frames_ref;
> +    AVHWFramesContext *input_frames;
> +
> +    id<MTLDevice> mtlDevice;
> +    id<MTLLibrary> mtlLibrary;
> +    id<MTLCommandQueue> mtlQueue;
> +    id<MTLComputePipelineState> mtlPipeline;
> +    id<MTLFunction> mtlFunction;
> +    id<MTLBuffer> mtlParamsBuffer;
> +
> +    CVMetalTextureCacheRef textureCache;
> +} YADIFVTContext;
> +
> +struct mtlYadifParams {
> +    uint channels;
> +    uint parity;
> +    uint tff;
> +    bool is_second_field;
> +    bool skip_spatial_check;
> +    int field_mode;
> +};
> +
> +static void call_kernel(AVFilterContext *ctx,
> +                        id<MTLTexture> dst,
> +                        id<MTLTexture> prev,
> +                        id<MTLTexture> cur,
> +                        id<MTLTexture> next,
> +                        int channels,
> +                        int parity,
> +                        int tff)
> +{
> +    YADIFVTContext *s = ctx->priv;
> +    id<MTLCommandBuffer> buffer = s->mtlQueue.commandBuffer;
> +    id<MTLComputeCommandEncoder> encoder =
> buffer.computeCommandEncoder;
> +    struct mtlYadifParams *params = (struct mtlYadifParams
> *)s->mtlParamsBuffer.contents;
> +    *params = (struct mtlYadifParams){
> +        .channels = channels,
> +        .parity = parity,
> +        .tff = tff,
> +        .is_second_field = !(parity ^ tff),
> +        .skip_spatial_check = s->yadif.mode&2,
> +        .field_mode = s->yadif.current_field
> +    };
> +
> +    [encoder setTexture:dst  atIndex:0];
> +    [encoder setTexture:prev atIndex:1];
> +    [encoder setTexture:cur  atIndex:2];
> +    [encoder setTexture:next atIndex:3];
> +    [encoder setBuffer:s->mtlParamsBuffer offset:0 atIndex:4];
> +    ff_metal_compute_encoder_dispatch(s->mtlDevice, s->mtlPipeline,
> encoder, dst.width, dst.height);
> +    [encoder endEncoding];
> +
> +    [buffer commit];
> +    [buffer waitUntilCompleted];
> +
> +    ff_objc_release(&encoder);
> +    ff_objc_release(&buffer);
> +}
> +
> +static void filter(AVFilterContext *ctx, AVFrame *dst,
> +                   int parity, int tff)
> +{
> +    YADIFVTContext *s = ctx->priv;
> +    YADIFContext *y = &s->yadif;
> +    int i;
> +
> +    for (i = 0; i < y->csp->nb_components; i++) {
> +        int pixel_size, channels;
> +        const AVComponentDescriptor *comp = &y->csp->comp[i];
> +        CVMetalTextureRef prev, cur, next, dest;
> +        id<MTLTexture> tex_prev, tex_cur, tex_next, tex_dest;
> +        MTLPixelFormat format;
> +
> +        if (comp->plane < i) {
> +            // We process planes as a whole, so don't reprocess
> +            // them for additional components
> +            continue;
> +        }
> +
> +        pixel_size = (comp->depth + comp->shift) / 8;
> +        channels = comp->step / pixel_size;
> +        if (pixel_size > 2 || channels > 2) {
> +            av_log(ctx, AV_LOG_ERROR, "Unsupported pixel format:
> %s\n", y->csp->name);
> +            goto exit;
> +        }
> +        switch (pixel_size) {
> +        case 1:
> +            format = channels == 1 ? MTLPixelFormatR8Unorm :
> MTLPixelFormatRG8Unorm;
> +            break;
> +        case 2:
> +            format = channels == 1 ? MTLPixelFormatR16Unorm :
> MTLPixelFormatRG16Unorm;
> +            break;
> +        default:
> +            av_log(ctx, AV_LOG_ERROR, "Unsupported pixel format:
> %s\n", y->csp->name);
> +            goto exit;
> +        }
> +        av_log(ctx, AV_LOG_TRACE,
> +               "Deinterlacing plane %d: pixel_size: %d channels:
> %d\n",
> +               comp->plane, pixel_size, channels);
> +
> +        prev = ff_metal_texture_from_pixbuf(ctx, s->textureCache,
> (CVPixelBufferRef)y->prev->data[3], i, format);
> +        cur  = ff_metal_texture_from_pixbuf(ctx, s->textureCache,
> (CVPixelBufferRef)y->cur->data[3], i, format);
> +        next = ff_metal_texture_from_pixbuf(ctx, s->textureCache,
> (CVPixelBufferRef)y->next->data[3], i, format);
> +        dest = ff_metal_texture_from_pixbuf(ctx, s->textureCache,
> (CVPixelBufferRef)dst->data[3], i, format); +
> +        tex_prev = CVMetalTextureGetTexture(prev);
> +        tex_cur  = CVMetalTextureGetTexture(cur);
> +        tex_next = CVMetalTextureGetTexture(next);
> +        tex_dest = CVMetalTextureGetTexture(dest);
> +
> +        call_kernel(ctx, tex_dest, tex_prev, tex_cur, tex_next,
> +                         channels, parity, tff);
> +
> +        CFRelease(prev);
> +        CFRelease(cur);
> +        CFRelease(next);
> +        CFRelease(dest);
> +    }
> +
> +    CVBufferPropagateAttachments((CVPixelBufferRef)y->cur->data[3],
> (CVPixelBufferRef)dst->data[3]); +
> +    if (y->current_field == YADIF_FIELD_END) {
> +        y->current_field = YADIF_FIELD_NORMAL;
> +    }
> +
> +exit:
> +    return;
> +}
> +
> +static av_cold void yadif_videotoolbox_uninit(AVFilterContext *ctx)
> +{
> +    YADIFVTContext *s = ctx->priv;
> +    YADIFContext *y = &s->yadif;
> +
> +    av_frame_free(&y->prev);
> +    av_frame_free(&y->cur);
> +    av_frame_free(&y->next);
> +
> +    av_buffer_unref(&s->device_ref);
> +    av_buffer_unref(&s->input_frames_ref);
> +    s->input_frames = NULL;
> +
> +    ff_objc_release(&s->mtlParamsBuffer);
> +    ff_objc_release(&s->mtlFunction);
> +    ff_objc_release(&s->mtlPipeline);
> +    ff_objc_release(&s->mtlQueue);
> +    ff_objc_release(&s->mtlLibrary);
> +    ff_objc_release(&s->mtlDevice);
> +
> +    if (s->textureCache) {
> +        CFRelease(s->textureCache);
> +        s->textureCache = NULL;
> +    }
> +}
> +
> +static av_cold int yadif_videotoolbox_init(AVFilterContext *ctx)
> +{
> +    YADIFVTContext *s = ctx->priv;
> +    NSError *err = nil;
> +    CVReturn ret;
> +
> +    s->mtlDevice = MTLCreateSystemDefaultDevice();
> +    if (!s->mtlDevice) {
> +        av_log(ctx, AV_LOG_ERROR, "Unable to find Metal device\n");
> +        goto fail;
> +    }
> +
> +    av_log(ctx, AV_LOG_INFO, "Using Metal device: %s\n",
> s->mtlDevice.name.UTF8String); +
> +    dispatch_data_t libData = dispatch_data_create(
> +        ff_vf_yadif_videotoolbox_metallib_data,
> +        ff_vf_yadif_videotoolbox_metallib_len,
> +        nil,
> +        nil);
> +    s->mtlLibrary = [s->mtlDevice newLibraryWithData:libData
> error:&err];
> +    dispatch_release(libData);
> +    libData = nil;
> +    if (err) {
> +        av_log(ctx, AV_LOG_ERROR, "Failed to load Metal library:
> %s\n", err.description.UTF8String);
> +        goto fail;
> +    }
> +
> +    s->mtlFunction = [s->mtlLibrary newFunctionWithName:@"deint"];
> +    if (!s->mtlFunction) {
> +        av_log(ctx, AV_LOG_ERROR, "Failed to create Metal
> function!\n");
> +        goto fail;
> +    }
> +
> +    s->mtlQueue = s->mtlDevice.newCommandQueue;
> +    if (!s->mtlQueue) {
> +        av_log(ctx, AV_LOG_ERROR, "Failed to create Metal command
> queue!\n");
> +        goto fail;
> +    }
> +
> +    s->mtlPipeline = [s->mtlDevice
> +        newComputePipelineStateWithFunction:s->mtlFunction
> +        error:&err];
> +    if (err) {
> +        av_log(ctx, AV_LOG_ERROR, "Failed to create Metal compute
> pipeline: %s\n", err.description.UTF8String);
> +        goto fail;
> +    }
> +
> +    s->mtlParamsBuffer = [s->mtlDevice
> +        newBufferWithLength:sizeof(struct mtlYadifParams)
> +        options:MTLResourceStorageModeShared];
> +    if (!s->mtlParamsBuffer) {
> +        av_log(ctx, AV_LOG_ERROR, "Failed to create Metal buffer for
> parameters\n");
> +        goto fail;
> +    }
> +
> +    ret = CVMetalTextureCacheCreate(
> +        NULL,
> +        NULL,
> +        s->mtlDevice,
> +        NULL,
> +        &s->textureCache
> +    );
> +    if (ret != kCVReturnSuccess) {
> +        av_log(ctx, AV_LOG_ERROR, "Failed to create
> CVMetalTextureCache: %d\n", ret);
> +        goto fail;
> +    }
> +
> +    return 0;
> +fail:
> +    yadif_videotoolbox_uninit(ctx);
> +    return AVERROR_EXTERNAL;
> +}
> +
> +static int config_input(AVFilterLink *inlink)
> +{
> +    AVFilterContext *ctx = inlink->dst;
> +    YADIFVTContext *s = ctx->priv;
> +
> +    if (!inlink->hw_frames_ctx) {
> +        av_log(ctx, AV_LOG_ERROR, "A hardware frames reference is "
> +               "required to associate the processing device.\n");
> +        return AVERROR(EINVAL);
> +    }
> +
> +    s->input_frames_ref = av_buffer_ref(inlink->hw_frames_ctx);
> +    if (!s->input_frames_ref) {
> +        av_log(ctx, AV_LOG_ERROR, "A input frames reference create "
> +               "failed.\n");
> +        return AVERROR(ENOMEM);
> +    }
> +    s->input_frames = (AVHWFramesContext*)s->input_frames_ref->data;
> +
> +    return 0;
> +}
> +
> +static int config_output(AVFilterLink *link)
> +{
> +    AVHWFramesContext *output_frames;
> +    AVFilterContext *ctx = link->src;
> +    YADIFVTContext *s = ctx->priv;
> +    YADIFContext *y = &s->yadif;
> +    int ret = 0;
> +
> +    av_assert0(s->input_frames);
> +    s->device_ref = av_buffer_ref(s->input_frames->device_ref);
> +    if (!s->device_ref) {
> +        av_log(ctx, AV_LOG_ERROR, "A device reference create "
> +               "failed.\n");
> +        return AVERROR(ENOMEM);
> +    }
> +
> +    link->hw_frames_ctx = av_hwframe_ctx_alloc(s->device_ref);
> +    if (!link->hw_frames_ctx) {
> +        av_log(ctx, AV_LOG_ERROR, "Failed to create HW frame context
> "
> +               "for output.\n");
> +        ret = AVERROR(ENOMEM);
> +        goto exit;
> +    }
> +
> +    output_frames = (AVHWFramesContext*)link->hw_frames_ctx->data;
> +
> +    output_frames->format    = AV_PIX_FMT_VIDEOTOOLBOX;
> +    output_frames->sw_format = s->input_frames->sw_format;
> +    output_frames->width     = ctx->inputs[0]->w;
> +    output_frames->height    = ctx->inputs[0]->h;
> +
> +    ret = ff_filter_init_hw_frames(ctx, link, 10);
> +    if (ret < 0)
> +        goto exit;
> +
> +    ret = av_hwframe_ctx_init(link->hw_frames_ctx);
> +    if (ret < 0) {
> +        av_log(ctx, AV_LOG_ERROR, "Failed to initialise VideoToolbox
> frame "
> +               "context for output: %d\n", ret);
> +        goto exit;
> +    }
> +
> +    link->time_base.num = ctx->inputs[0]->time_base.num;
> +    link->time_base.den = ctx->inputs[0]->time_base.den * 2;
> +    link->w             = ctx->inputs[0]->w;
> +    link->h             = ctx->inputs[0]->h;
> +
> +    if(y->mode & 1)
> +        link->frame_rate = av_mul_q(ctx->inputs[0]->frame_rate,
> +                                    (AVRational){2, 1});
> +
> +    if (link->w < 3 || link->h < 3) {
> +        av_log(ctx, AV_LOG_ERROR, "Video of less than 3 columns or
> lines is not supported\n");
> +        ret = AVERROR(EINVAL);
> +        goto exit;
> +    }
> +
> +    y->csp = av_pix_fmt_desc_get(output_frames->sw_format);
> +    y->filter = filter;
> +
> +exit:
> +    return ret;
> +}
> +
> +#define FLAGS AV_OPT_FLAG_VIDEO_PARAM|AV_OPT_FLAG_FILTERING_PARAM
> +#define CONST(name, help, val, unit) { name, help, 0,
> AV_OPT_TYPE_CONST, {.i64=val}, INT_MIN, INT_MAX, FLAGS, unit } +
> +static const AVOption yadif_videotoolbox_options[] = {
> +    #define OFFSET(x) offsetof(YADIFContext, x)
> +    { "mode",   "specify the interlacing mode", OFFSET(mode),
> AV_OPT_TYPE_INT, {.i64=YADIF_MODE_SEND_FRAME}, 0, 3, FLAGS, "mode"},
> +    CONST("send_frame",           "send one frame for each frame",
>                                   YADIF_MODE_SEND_FRAME,
> "mode"),
> +    CONST("send_field",           "send one frame for each field",
>                                   YADIF_MODE_SEND_FIELD,
> "mode"),
> +    CONST("send_frame_nospatial", "send one frame for each frame,
> but skip spatial interlacing check", YADIF_MODE_SEND_FRAME_NOSPATIAL,
> "mode"),
> +    CONST("send_field_nospatial", "send one frame for each field,
> but skip spatial interlacing check", YADIF_MODE_SEND_FIELD_NOSPATIAL,
> "mode"), +
> +    { "parity", "specify the assumed picture field parity",
> OFFSET(parity), AV_OPT_TYPE_INT, {.i64=YADIF_PARITY_AUTO}, -1, 1,
> FLAGS, "parity" },
> +    CONST("tff",  "assume top field first",    YADIF_PARITY_TFF,
> "parity"),
> +    CONST("bff",  "assume bottom field first", YADIF_PARITY_BFF,
> "parity"),
> +    CONST("auto", "auto detect parity",        YADIF_PARITY_AUTO,
> "parity"), +
> +    { "deint", "specify which frames to deinterlace", OFFSET(deint),
> AV_OPT_TYPE_INT, {.i64=YADIF_DEINT_ALL}, 0, 1, FLAGS, "deint" },
> +    CONST("all",        "deinterlace all frames",
>    YADIF_DEINT_ALL,        "deint"),
> +    CONST("interlaced", "only deinterlace frames marked as
> interlaced", YADIF_DEINT_INTERLACED, "deint"),
> +    #undef OFFSET
> +
> +    { NULL }
> +};
> +
> +AVFILTER_DEFINE_CLASS(yadif_videotoolbox);
> +
> +static const AVFilterPad yadif_videotoolbox_inputs[] = {
> +    {
> +        .name          = "default",
> +        .type          = AVMEDIA_TYPE_VIDEO,
> +        .filter_frame  = ff_yadif_filter_frame,
> +        .config_props  = config_input,
> +    },
> +};
> +
> +static const AVFilterPad yadif_videotoolbox_outputs[] = {
> +    {
> +        .name          = "default",
> +        .type          = AVMEDIA_TYPE_VIDEO,
> +        .request_frame = ff_yadif_request_frame,
> +        .config_props  = config_output,
> +    },
> +};
> +
> +AVFilter ff_vf_yadif_videotoolbox = {
> +    .name           = "yadif_videotoolbox",
> +    .description    = NULL_IF_CONFIG_SMALL("YADIF for VideoToolbox
> frames using Metal compute"),
> +    .priv_size      = sizeof(YADIFVTContext),
> +    .priv_class     = &yadif_videotoolbox_class,
> +    .init           = yadif_videotoolbox_init,
> +    .uninit         = yadif_videotoolbox_uninit,
> +    FILTER_SINGLE_PIXFMT(AV_PIX_FMT_VIDEOTOOLBOX),
> +    FILTER_INPUTS(yadif_videotoolbox_inputs),
> +    FILTER_OUTPUTS(yadif_videotoolbox_outputs),
> +    .flags          = AVFILTER_FLAG_SUPPORT_TIMELINE_INTERNAL,
> +    .flags_internal = FF_FILTER_FLAG_HWFRAME_AWARE,
> +};

LGTM for the general part. I'll take your work that the metal specific
parts work as intended.

--phil
Aman Karmani Dec. 18, 2021, 8:02 p.m. UTC | #2
On Fri, Dec 17, 2021 at 1:38 PM Philip Langdale <philipl@overt.org> wrote:

> On Fri, 17 Dec 2021 12:04:18 -0800
> Aman Karmani <ffmpeg@tmm1.net> wrote:
>
> > From: Aman Karmani <aman@tmm1.net>
> >
> > deinterlaces CVPixelBuffers, i.e. AV_PIX_FMT_VIDEOTOOLBOX frames
> >
> > for example, an interlaced mpeg2 video can be decoded by avcodec,
> > uploaded into a CVPixelBuffer, deinterlaced by Metal, and then
> > encoded to h264 by VideoToolbox as follows:
> >
> >     ffmpeg \
> >            -init_hw_device videotoolbox \
> >            -i interlaced.ts \
> >            -vf hwupload,yadif_videotoolbox \
> >            -c:v h264_videotoolbox \
> >            -b:v 2000k \
> >            -c:a copy \
> >            -y progressive.ts
> >
> > (note that uploading AVFrame into CVPixelBuffer via hwupload
> >  requires 504c60660d3194758823ddd45ceddb86e35d806f)
> >
> > this work is sponsored by Fancy Bits LLC
> >
> > Reviewed-by: Ridley Combs <rcombs@rcombs.me>
> > Signed-off-by: Aman Karmani <aman@tmm1.net>
> > ---
> >  configure                                     |   1 +
> >  libavfilter/Makefile                          |   4 +
> >  libavfilter/allfilters.c                      |   1 +
> >  libavfilter/metal/vf_yadif_videotoolbox.metal | 269 ++++++++++++
> >  libavfilter/vf_yadif_videotoolbox.m           | 406
> > ++++++++++++++++++ 5 files changed, 681 insertions(+)
> >  create mode 100644 libavfilter/metal/vf_yadif_videotoolbox.metal
> >  create mode 100644 libavfilter/vf_yadif_videotoolbox.m
> >
> > diff --git a/configure b/configure
> > index 32a39f5f5b..d8b07c8e00 100755
> > --- a/configure
> > +++ b/configure
> > @@ -3748,6 +3748,7 @@ vpp_qsv_filter_select="qsvvpp"
> >  xfade_opencl_filter_deps="opencl"
> >  yadif_cuda_filter_deps="ffnvcodec"
> >  yadif_cuda_filter_deps_any="cuda_nvcc cuda_llvm"
> > +yadif_videotoolbox_filter_deps="metal corevideo videotoolbox"
> >
> >  # examples
> >  avio_list_dir_deps="avformat avutil"
> > diff --git a/libavfilter/Makefile b/libavfilter/Makefile
> > index 2fe495df28..9a061ba3c8 100644
> > --- a/libavfilter/Makefile
> > +++ b/libavfilter/Makefile
> > @@ -519,6 +519,10 @@ OBJS-$(CONFIG_XSTACK_FILTER)                 +=
> > vf_stack.o framesync.o OBJS-$(CONFIG_YADIF_FILTER)
> > += vf_yadif.o yadif_common.o OBJS-$(CONFIG_YADIF_CUDA_FILTER)
> >     += vf_yadif_cuda.o vf_yadif_cuda.ptx.o \ yadif_common.o
> > cuda/load_helper.o +OBJS-$(CONFIG_YADIF_VIDEOTOOLBOX_FILTER)     +=
> > vf_yadif_videotoolbox.o \
> > +
> > metal/vf_yadif_videotoolbox.metallib.o \
> > +                                                metal/utils.o \
> > +                                                yadif_common.o
> >  OBJS-$(CONFIG_YAEPBLUR_FILTER)               += vf_yaepblur.o
> >  OBJS-$(CONFIG_ZMQ_FILTER)                    += f_zmq.o
> >  OBJS-$(CONFIG_ZOOMPAN_FILTER)                += vf_zoompan.o
> > diff --git a/libavfilter/allfilters.c b/libavfilter/allfilters.c
> > index ec57a2c49c..26f1c73505 100644
> > --- a/libavfilter/allfilters.c
> > +++ b/libavfilter/allfilters.c
> > @@ -496,6 +496,7 @@ extern const AVFilter ff_vf_xmedian;
> >  extern const AVFilter ff_vf_xstack;
> >  extern const AVFilter ff_vf_yadif;
> >  extern const AVFilter ff_vf_yadif_cuda;
> > +extern const AVFilter ff_vf_yadif_videotoolbox;
> >  extern const AVFilter ff_vf_yaepblur;
> >  extern const AVFilter ff_vf_zmq;
> >  extern const AVFilter ff_vf_zoompan;
> > diff --git a/libavfilter/metal/vf_yadif_videotoolbox.metal
> > b/libavfilter/metal/vf_yadif_videotoolbox.metal new file mode 100644
> > index 0000000000..50783f2ffe
> > --- /dev/null
> > +++ b/libavfilter/metal/vf_yadif_videotoolbox.metal
> > @@ -0,0 +1,269 @@
> > +/*
> > + * Copyright (C) 2018 Philip Langdale <philipl@overt.org>
> > + *               2020 Aman Karmani <aman@tmm1.net>
> > + *               2020 Stefan Dyulgerov <stefan.dyulgerov@gmail.com>
> > + *
> > + * This file is part of FFmpeg.
> > + *
> > + * FFmpeg is free software; you can redistribute it and/or
> > + * modify it under the terms of the GNU Lesser General Public
> > + * License as published by the Free Software Foundation; either
> > + * version 2.1 of the License, or (at your option) any later version.
> > + *
> > + * FFmpeg is distributed in the hope that it will be useful,
> > + * but WITHOUT ANY WARRANTY; without even the implied warranty of
> > + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
> > + * Lesser General Public License for more details.
> > + *
> > + * You should have received a copy of the GNU Lesser General Public
> > + * License along with FFmpeg; if not, write to the Free Software
> > + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
> > 02110-1301 USA
> > + */
> > +
> > +#include <metal_stdlib>
> > +#include <metal_integer>
> > +#include <metal_texture>
> > +
> > +using namespace metal;
> > +
> > +/*
> > + * Parameters
> > + */
> > +
> > +struct deintParams {
> > +    uint channels;
> > +    uint parity;
> > +    uint tff;
> > +    bool is_second_field;
> > +    bool skip_spatial_check;
> > +    int field_mode;
> > +};
> > +
> > +/*
> > + * Texture access helpers
> > + */
> > +
> > +#define accesstype access::sample
> > +const sampler s(coord::pixel);
> > +
> > +template <typename T>
> > +T tex2D(texture2d<float, access::sample> tex, uint x, uint y)
> > +{
> > +    return tex.sample(s, float2(x, y)).x;
> > +}
> > +
> > +template <>
> > +float2 tex2D<float2>(texture2d<float, access::sample> tex, uint x,
> > uint y) +{
> > +    return tex.sample(s, float2(x, y)).xy;
> > +}
> > +
> > +template <typename T>
> > +T tex2D(texture2d<float, access::read> tex, uint x, uint y)
> > +{
> > +    return tex.read(uint2(x, y)).x;
> > +}
> > +
> > +template <>
> > +float2 tex2D<float2>(texture2d<float, access::read> tex, uint x,
> > uint y) +{
> > +    return tex.read(uint2(x, y)).xy;
> > +}
> > +
> > +/*
> > + * YADIF helpers
> > + */
> > +
> > +template<typename T>
> > +T spatial_predictor(T a, T b, T c, T d, T e, T f, T g,
> > +                    T h, T i, T j, T k, T l, T m, T n)
> > +{
> > +    T spatial_pred = (d + k)/2;
> > +    T spatial_score = abs(c - j) + abs(d - k) + abs(e - l);
> > +
> > +    T score = abs(b - k) + abs(c - l) + abs(d - m);
> > +    if (score < spatial_score) {
> > +        spatial_pred = (c + l)/2;
> > +        spatial_score = score;
> > +        score = abs(a - l) + abs(b - m) + abs(c - n);
> > +        if (score < spatial_score) {
> > +            spatial_pred = (b + m)/2;
> > +            spatial_score = score;
> > +        }
> > +    }
> > +    score = abs(d - i) + abs(e - j) + abs(f - k);
> > +    if (score < spatial_score) {
> > +        spatial_pred = (e + j)/2;
> > +        spatial_score = score;
> > +        score = abs(e - h) + abs(f - i) + abs(g - j);
> > +        if (score < spatial_score) {
> > +            spatial_pred = (f + i)/2;
> > +            spatial_score = score;
> > +        }
> > +    }
> > +    return spatial_pred;
> > +}
> > +
> > +template<typename T>
> > +T temporal_predictor(T A, T B, T C, T D, T E, T F,
> > +                     T G, T H, T I, T J, T K, T L,
> > +                     T spatial_pred, bool skip_check)
> > +{
> > +    T p0 = (C + H) / 2;
> > +    T p1 = F;
> > +    T p2 = (D + I) / 2;
> > +    T p3 = G;
> > +    T p4 = (E + J) / 2;
> > +
> > +    T tdiff0 = abs(D - I);
> > +    T tdiff1 = (abs(A - F) + abs(B - G)) / 2;
> > +    T tdiff2 = (abs(K - F) + abs(G - L)) / 2;
> > +
> > +    T diff = max3(tdiff0, tdiff1, tdiff2);
> > +
> > +    if (!skip_check) {
> > +        T maxi = max3(p2 - p3, p2 - p1, min(p0 - p1, p4 - p3));
> > +        T mini = min3(p2 - p3, p2 - p1, max(p0 - p1, p4 - p3));
> > +        diff = max3(diff, mini, -maxi);
> > +    }
> > +
> > +    return clamp(spatial_pred, p2 - diff, p2 + diff);
> > +}
> > +
> > +#define T float2
> > +template <>
> > +T spatial_predictor<T>(T a, T b, T c, T d, T e, T f, T g,
> > +                       T h, T i, T j, T k, T l, T m, T n)
> > +{
> > +    return T(
> > +        spatial_predictor(a.x, b.x, c.x, d.x, e.x, f.x, g.x,
> > +                          h.x, i.x, j.x, k.x, l.x, m.x, n.x),
> > +        spatial_predictor(a.y, b.y, c.y, d.y, e.y, f.y, g.y,
> > +                          h.y, i.y, j.y, k.y, l.y, m.y, n.y)
> > +    );
> > +}
> > +
> > +template <>
> > +T temporal_predictor<T>(T A, T B, T C, T D, T E, T F,
> > +                        T G, T H, T I, T J, T K, T L,
> > +                        T spatial_pred, bool skip_check)
> > +{
> > +    return T(
> > +        temporal_predictor(A.x, B.x, C.x, D.x, E.x, F.x,
> > +                           G.x, H.x, I.x, J.x, K.x, L.x,
> > +                           spatial_pred.x, skip_check),
> > +        temporal_predictor(A.y, B.y, C.y, D.y, E.y, F.y,
> > +                           G.y, H.y, I.y, J.y, K.y, L.y,
> > +                           spatial_pred.y, skip_check)
> > +    );
> > +}
> > +#undef T
> > +
> > +/*
> > + * YADIF compute
> > + */
> > +
> > +template <typename T>
> > +T yadif_compute_spatial(
> > +    texture2d<float, accesstype> cur,
> > +    uint2 pos)
> > +{
> > +    // Calculate spatial prediction
> > +    T a = tex2D<T>(cur, pos.x - 3, pos.y - 1);
> > +    T b = tex2D<T>(cur, pos.x - 2, pos.y - 1);
> > +    T c = tex2D<T>(cur, pos.x - 1, pos.y - 1);
> > +    T d = tex2D<T>(cur, pos.x - 0, pos.y - 1);
> > +    T e = tex2D<T>(cur, pos.x + 1, pos.y - 1);
> > +    T f = tex2D<T>(cur, pos.x + 2, pos.y - 1);
> > +    T g = tex2D<T>(cur, pos.x + 3, pos.y - 1);
> > +
> > +    T h = tex2D<T>(cur, pos.x - 3, pos.y + 1);
> > +    T i = tex2D<T>(cur, pos.x - 2, pos.y + 1);
> > +    T j = tex2D<T>(cur, pos.x - 1, pos.y + 1);
> > +    T k = tex2D<T>(cur, pos.x - 0, pos.y + 1);
> > +    T l = tex2D<T>(cur, pos.x + 1, pos.y + 1);
> > +    T m = tex2D<T>(cur, pos.x + 2, pos.y + 1);
> > +    T n = tex2D<T>(cur, pos.x + 3, pos.y + 1);
> > +
> > +    return spatial_predictor(a, b, c, d, e, f, g,
> > +                             h, i, j, k, l, m, n);
> > +}
> > +
> > +template <typename T>
> > +T yadif_compute_temporal(
> > +    texture2d<float, accesstype> cur,
> > +    texture2d<float, accesstype> prev2,
> > +    texture2d<float, accesstype> prev1,
> > +    texture2d<float, accesstype> next1,
> > +    texture2d<float, accesstype> next2,
> > +    T spatial_pred,
> > +    bool skip_spatial_check,
> > +    uint2 pos)
> > +{
> > +    // Calculate temporal prediction
> > +    T A = tex2D<T>(prev2, pos.x, pos.y - 1);
> > +    T B = tex2D<T>(prev2, pos.x, pos.y + 1);
> > +    T C = tex2D<T>(prev1, pos.x, pos.y - 2);
> > +    T D = tex2D<T>(prev1, pos.x, pos.y + 0);
> > +    T E = tex2D<T>(prev1, pos.x, pos.y + 2);
> > +    T F = tex2D<T>(cur,   pos.x, pos.y - 1);
> > +    T G = tex2D<T>(cur,   pos.x, pos.y + 1);
> > +    T H = tex2D<T>(next1, pos.x, pos.y - 2);
> > +    T I = tex2D<T>(next1, pos.x, pos.y + 0);
> > +    T J = tex2D<T>(next1, pos.x, pos.y + 2);
> > +    T K = tex2D<T>(next2, pos.x, pos.y - 1);
> > +    T L = tex2D<T>(next2, pos.x, pos.y + 1);
> > +
> > +    return temporal_predictor(A, B, C, D, E, F, G, H, I, J, K, L,
> > +                              spatial_pred, skip_spatial_check);
> > +}
> > +
> > +template <typename T>
> > +T yadif(
> > +    texture2d<float, access::write> dst,
> > +    texture2d<float, accesstype> prev,
> > +    texture2d<float, accesstype> cur,
> > +    texture2d<float, accesstype> next,
> > +    constant deintParams& params,
> > +    uint2 pos)
> > +{
> > +    T spatial_pred = yadif_compute_spatial<T>(cur, pos);
> > +
> > +    if (params.is_second_field) {
> > +        return yadif_compute_temporal(cur, prev, cur, next, next,
> > spatial_pred, params.skip_spatial_check, pos);
> > +    } else {
> > +        return yadif_compute_temporal(cur, prev, prev, cur, next,
> > spatial_pred, params.skip_spatial_check, pos);
> > +    }
> > +}
> > +
> > +/*
> > + * Kernel dispatch
> > + */
> > +
> > +kernel void deint(
> > +    texture2d<float, access::write> dst [[texture(0)]],
> > +    texture2d<float, accesstype> prev [[texture(1)]],
> > +    texture2d<float, accesstype> cur  [[texture(2)]],
> > +    texture2d<float, accesstype> next [[texture(3)]],
> > +    constant deintParams& params [[buffer(4)]],
> > +    uint2 pos [[thread_position_in_grid]])
> > +{
> > +    if ((pos.x >= dst.get_width()) ||
> > +        (pos.y >= dst.get_height())) {
> > +        return;
> > +    }
> > +
> > +    // Don't modify the primary field
> > +    if (pos.y % 2 == params.parity) {
> > +        float4 in = cur.read(pos);
> > +        dst.write(in, pos);
> > +        return;
> > +    }
> > +
> > +    float2 pred;
> > +    if (params.channels == 1)
> > +        pred = float2(yadif<float>(dst, prev, cur, next, params,
> > pos));
> > +    else
> > +        pred = yadif<float2>(dst, prev, cur, next, params, pos);
> > +    dst.write(pred.xyyy, pos);
> > +}
> > diff --git a/libavfilter/vf_yadif_videotoolbox.m
> > b/libavfilter/vf_yadif_videotoolbox.m new file mode 100644
> > index 0000000000..af83a73e89
> > --- /dev/null
> > +++ b/libavfilter/vf_yadif_videotoolbox.m
> > @@ -0,0 +1,406 @@
> > +/*
> > + * Copyright (C) 2018 Philip Langdale <philipl@overt.org>
> > + *               2020 Aman Karmani <aman@tmm1.net>
> > + *
> > + * This file is part of FFmpeg.
> > + *
> > + * FFmpeg is free software; you can redistribute it and/or
> > + * modify it under the terms of the GNU Lesser General Public
> > + * License as published by the Free Software Foundation; either
> > + * version 2.1 of the License, or (at your option) any later version.
> > + *
> > + * FFmpeg is distributed in the hope that it will be useful,
> > + * but WITHOUT ANY WARRANTY; without even the implied warranty of
> > + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
> > + * Lesser General Public License for more details.
> > + *
> > + * You should have received a copy of the GNU Lesser General Public
> > + * License along with FFmpeg; if not, write to the Free Software
> > + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
> > 02110-1301 USA
> > + */
> > +
> > +#include "internal.h"
> > +#include "yadif.h"
> > +#include <libavutil/avassert.h>
> > +#include <libavutil/hwcontext.h>
> > +#include <libavutil/objc.h>
> > +#include <libavfilter/metal/utils.h>
> > +
> > +extern char ff_vf_yadif_videotoolbox_metallib_data[];
> > +extern unsigned int ff_vf_yadif_videotoolbox_metallib_len;
> > +
> > +typedef struct YADIFVTContext {
> > +    YADIFContext yadif;
> > +
> > +    AVBufferRef       *device_ref;
> > +    AVBufferRef       *input_frames_ref;
> > +    AVHWFramesContext *input_frames;
> > +
> > +    id<MTLDevice> mtlDevice;
> > +    id<MTLLibrary> mtlLibrary;
> > +    id<MTLCommandQueue> mtlQueue;
> > +    id<MTLComputePipelineState> mtlPipeline;
> > +    id<MTLFunction> mtlFunction;
> > +    id<MTLBuffer> mtlParamsBuffer;
> > +
> > +    CVMetalTextureCacheRef textureCache;
> > +} YADIFVTContext;
> > +
> > +struct mtlYadifParams {
> > +    uint channels;
> > +    uint parity;
> > +    uint tff;
> > +    bool is_second_field;
> > +    bool skip_spatial_check;
> > +    int field_mode;
> > +};
> > +
> > +static void call_kernel(AVFilterContext *ctx,
> > +                        id<MTLTexture> dst,
> > +                        id<MTLTexture> prev,
> > +                        id<MTLTexture> cur,
> > +                        id<MTLTexture> next,
> > +                        int channels,
> > +                        int parity,
> > +                        int tff)
> > +{
> > +    YADIFVTContext *s = ctx->priv;
> > +    id<MTLCommandBuffer> buffer = s->mtlQueue.commandBuffer;
> > +    id<MTLComputeCommandEncoder> encoder =
> > buffer.computeCommandEncoder;
> > +    struct mtlYadifParams *params = (struct mtlYadifParams
> > *)s->mtlParamsBuffer.contents;
> > +    *params = (struct mtlYadifParams){
> > +        .channels = channels,
> > +        .parity = parity,
> > +        .tff = tff,
> > +        .is_second_field = !(parity ^ tff),
> > +        .skip_spatial_check = s->yadif.mode&2,
> > +        .field_mode = s->yadif.current_field
> > +    };
> > +
> > +    [encoder setTexture:dst  atIndex:0];
> > +    [encoder setTexture:prev atIndex:1];
> > +    [encoder setTexture:cur  atIndex:2];
> > +    [encoder setTexture:next atIndex:3];
> > +    [encoder setBuffer:s->mtlParamsBuffer offset:0 atIndex:4];
> > +    ff_metal_compute_encoder_dispatch(s->mtlDevice, s->mtlPipeline,
> > encoder, dst.width, dst.height);
> > +    [encoder endEncoding];
> > +
> > +    [buffer commit];
> > +    [buffer waitUntilCompleted];
> > +
> > +    ff_objc_release(&encoder);
> > +    ff_objc_release(&buffer);
> > +}
> > +
> > +static void filter(AVFilterContext *ctx, AVFrame *dst,
> > +                   int parity, int tff)
> > +{
> > +    YADIFVTContext *s = ctx->priv;
> > +    YADIFContext *y = &s->yadif;
> > +    int i;
> > +
> > +    for (i = 0; i < y->csp->nb_components; i++) {
> > +        int pixel_size, channels;
> > +        const AVComponentDescriptor *comp = &y->csp->comp[i];
> > +        CVMetalTextureRef prev, cur, next, dest;
> > +        id<MTLTexture> tex_prev, tex_cur, tex_next, tex_dest;
> > +        MTLPixelFormat format;
> > +
> > +        if (comp->plane < i) {
> > +            // We process planes as a whole, so don't reprocess
> > +            // them for additional components
> > +            continue;
> > +        }
> > +
> > +        pixel_size = (comp->depth + comp->shift) / 8;
> > +        channels = comp->step / pixel_size;
> > +        if (pixel_size > 2 || channels > 2) {
> > +            av_log(ctx, AV_LOG_ERROR, "Unsupported pixel format:
> > %s\n", y->csp->name);
> > +            goto exit;
> > +        }
> > +        switch (pixel_size) {
> > +        case 1:
> > +            format = channels == 1 ? MTLPixelFormatR8Unorm :
> > MTLPixelFormatRG8Unorm;
> > +            break;
> > +        case 2:
> > +            format = channels == 1 ? MTLPixelFormatR16Unorm :
> > MTLPixelFormatRG16Unorm;
> > +            break;
> > +        default:
> > +            av_log(ctx, AV_LOG_ERROR, "Unsupported pixel format:
> > %s\n", y->csp->name);
> > +            goto exit;
> > +        }
> > +        av_log(ctx, AV_LOG_TRACE,
> > +               "Deinterlacing plane %d: pixel_size: %d channels:
> > %d\n",
> > +               comp->plane, pixel_size, channels);
> > +
> > +        prev = ff_metal_texture_from_pixbuf(ctx, s->textureCache,
> > (CVPixelBufferRef)y->prev->data[3], i, format);
> > +        cur  = ff_metal_texture_from_pixbuf(ctx, s->textureCache,
> > (CVPixelBufferRef)y->cur->data[3], i, format);
> > +        next = ff_metal_texture_from_pixbuf(ctx, s->textureCache,
> > (CVPixelBufferRef)y->next->data[3], i, format);
> > +        dest = ff_metal_texture_from_pixbuf(ctx, s->textureCache,
> > (CVPixelBufferRef)dst->data[3], i, format); +
> > +        tex_prev = CVMetalTextureGetTexture(prev);
> > +        tex_cur  = CVMetalTextureGetTexture(cur);
> > +        tex_next = CVMetalTextureGetTexture(next);
> > +        tex_dest = CVMetalTextureGetTexture(dest);
> > +
> > +        call_kernel(ctx, tex_dest, tex_prev, tex_cur, tex_next,
> > +                         channels, parity, tff);
> > +
> > +        CFRelease(prev);
> > +        CFRelease(cur);
> > +        CFRelease(next);
> > +        CFRelease(dest);
> > +    }
> > +
> > +    CVBufferPropagateAttachments((CVPixelBufferRef)y->cur->data[3],
> > (CVPixelBufferRef)dst->data[3]); +
> > +    if (y->current_field == YADIF_FIELD_END) {
> > +        y->current_field = YADIF_FIELD_NORMAL;
> > +    }
> > +
> > +exit:
> > +    return;
> > +}
> > +
> > +static av_cold void yadif_videotoolbox_uninit(AVFilterContext *ctx)
> > +{
> > +    YADIFVTContext *s = ctx->priv;
> > +    YADIFContext *y = &s->yadif;
> > +
> > +    av_frame_free(&y->prev);
> > +    av_frame_free(&y->cur);
> > +    av_frame_free(&y->next);
> > +
> > +    av_buffer_unref(&s->device_ref);
> > +    av_buffer_unref(&s->input_frames_ref);
> > +    s->input_frames = NULL;
> > +
> > +    ff_objc_release(&s->mtlParamsBuffer);
> > +    ff_objc_release(&s->mtlFunction);
> > +    ff_objc_release(&s->mtlPipeline);
> > +    ff_objc_release(&s->mtlQueue);
> > +    ff_objc_release(&s->mtlLibrary);
> > +    ff_objc_release(&s->mtlDevice);
> > +
> > +    if (s->textureCache) {
> > +        CFRelease(s->textureCache);
> > +        s->textureCache = NULL;
> > +    }
> > +}
> > +
> > +static av_cold int yadif_videotoolbox_init(AVFilterContext *ctx)
> > +{
> > +    YADIFVTContext *s = ctx->priv;
> > +    NSError *err = nil;
> > +    CVReturn ret;
> > +
> > +    s->mtlDevice = MTLCreateSystemDefaultDevice();
> > +    if (!s->mtlDevice) {
> > +        av_log(ctx, AV_LOG_ERROR, "Unable to find Metal device\n");
> > +        goto fail;
> > +    }
> > +
> > +    av_log(ctx, AV_LOG_INFO, "Using Metal device: %s\n",
> > s->mtlDevice.name.UTF8String); +
> > +    dispatch_data_t libData = dispatch_data_create(
> > +        ff_vf_yadif_videotoolbox_metallib_data,
> > +        ff_vf_yadif_videotoolbox_metallib_len,
> > +        nil,
> > +        nil);
> > +    s->mtlLibrary = [s->mtlDevice newLibraryWithData:libData
> > error:&err];
> > +    dispatch_release(libData);
> > +    libData = nil;
> > +    if (err) {
> > +        av_log(ctx, AV_LOG_ERROR, "Failed to load Metal library:
> > %s\n", err.description.UTF8String);
> > +        goto fail;
> > +    }
> > +
> > +    s->mtlFunction = [s->mtlLibrary newFunctionWithName:@"deint"];
> > +    if (!s->mtlFunction) {
> > +        av_log(ctx, AV_LOG_ERROR, "Failed to create Metal
> > function!\n");
> > +        goto fail;
> > +    }
> > +
> > +    s->mtlQueue = s->mtlDevice.newCommandQueue;
> > +    if (!s->mtlQueue) {
> > +        av_log(ctx, AV_LOG_ERROR, "Failed to create Metal command
> > queue!\n");
> > +        goto fail;
> > +    }
> > +
> > +    s->mtlPipeline = [s->mtlDevice
> > +        newComputePipelineStateWithFunction:s->mtlFunction
> > +        error:&err];
> > +    if (err) {
> > +        av_log(ctx, AV_LOG_ERROR, "Failed to create Metal compute
> > pipeline: %s\n", err.description.UTF8String);
> > +        goto fail;
> > +    }
> > +
> > +    s->mtlParamsBuffer = [s->mtlDevice
> > +        newBufferWithLength:sizeof(struct mtlYadifParams)
> > +        options:MTLResourceStorageModeShared];
> > +    if (!s->mtlParamsBuffer) {
> > +        av_log(ctx, AV_LOG_ERROR, "Failed to create Metal buffer for
> > parameters\n");
> > +        goto fail;
> > +    }
> > +
> > +    ret = CVMetalTextureCacheCreate(
> > +        NULL,
> > +        NULL,
> > +        s->mtlDevice,
> > +        NULL,
> > +        &s->textureCache
> > +    );
> > +    if (ret != kCVReturnSuccess) {
> > +        av_log(ctx, AV_LOG_ERROR, "Failed to create
> > CVMetalTextureCache: %d\n", ret);
> > +        goto fail;
> > +    }
> > +
> > +    return 0;
> > +fail:
> > +    yadif_videotoolbox_uninit(ctx);
> > +    return AVERROR_EXTERNAL;
> > +}
> > +
> > +static int config_input(AVFilterLink *inlink)
> > +{
> > +    AVFilterContext *ctx = inlink->dst;
> > +    YADIFVTContext *s = ctx->priv;
> > +
> > +    if (!inlink->hw_frames_ctx) {
> > +        av_log(ctx, AV_LOG_ERROR, "A hardware frames reference is "
> > +               "required to associate the processing device.\n");
> > +        return AVERROR(EINVAL);
> > +    }
> > +
> > +    s->input_frames_ref = av_buffer_ref(inlink->hw_frames_ctx);
> > +    if (!s->input_frames_ref) {
> > +        av_log(ctx, AV_LOG_ERROR, "A input frames reference create "
> > +               "failed.\n");
> > +        return AVERROR(ENOMEM);
> > +    }
> > +    s->input_frames = (AVHWFramesContext*)s->input_frames_ref->data;
> > +
> > +    return 0;
> > +}
> > +
> > +static int config_output(AVFilterLink *link)
> > +{
> > +    AVHWFramesContext *output_frames;
> > +    AVFilterContext *ctx = link->src;
> > +    YADIFVTContext *s = ctx->priv;
> > +    YADIFContext *y = &s->yadif;
> > +    int ret = 0;
> > +
> > +    av_assert0(s->input_frames);
> > +    s->device_ref = av_buffer_ref(s->input_frames->device_ref);
> > +    if (!s->device_ref) {
> > +        av_log(ctx, AV_LOG_ERROR, "A device reference create "
> > +               "failed.\n");
> > +        return AVERROR(ENOMEM);
> > +    }
> > +
> > +    link->hw_frames_ctx = av_hwframe_ctx_alloc(s->device_ref);
> > +    if (!link->hw_frames_ctx) {
> > +        av_log(ctx, AV_LOG_ERROR, "Failed to create HW frame context
> > "
> > +               "for output.\n");
> > +        ret = AVERROR(ENOMEM);
> > +        goto exit;
> > +    }
> > +
> > +    output_frames = (AVHWFramesContext*)link->hw_frames_ctx->data;
> > +
> > +    output_frames->format    = AV_PIX_FMT_VIDEOTOOLBOX;
> > +    output_frames->sw_format = s->input_frames->sw_format;
> > +    output_frames->width     = ctx->inputs[0]->w;
> > +    output_frames->height    = ctx->inputs[0]->h;
> > +
> > +    ret = ff_filter_init_hw_frames(ctx, link, 10);
> > +    if (ret < 0)
> > +        goto exit;
> > +
> > +    ret = av_hwframe_ctx_init(link->hw_frames_ctx);
> > +    if (ret < 0) {
> > +        av_log(ctx, AV_LOG_ERROR, "Failed to initialise VideoToolbox
> > frame "
> > +               "context for output: %d\n", ret);
> > +        goto exit;
> > +    }
> > +
> > +    link->time_base.num = ctx->inputs[0]->time_base.num;
> > +    link->time_base.den = ctx->inputs[0]->time_base.den * 2;
> > +    link->w             = ctx->inputs[0]->w;
> > +    link->h             = ctx->inputs[0]->h;
> > +
> > +    if(y->mode & 1)
> > +        link->frame_rate = av_mul_q(ctx->inputs[0]->frame_rate,
> > +                                    (AVRational){2, 1});
> > +
> > +    if (link->w < 3 || link->h < 3) {
> > +        av_log(ctx, AV_LOG_ERROR, "Video of less than 3 columns or
> > lines is not supported\n");
> > +        ret = AVERROR(EINVAL);
> > +        goto exit;
> > +    }
> > +
> > +    y->csp = av_pix_fmt_desc_get(output_frames->sw_format);
> > +    y->filter = filter;
> > +
> > +exit:
> > +    return ret;
> > +}
> > +
> > +#define FLAGS AV_OPT_FLAG_VIDEO_PARAM|AV_OPT_FLAG_FILTERING_PARAM
> > +#define CONST(name, help, val, unit) { name, help, 0,
> > AV_OPT_TYPE_CONST, {.i64=val}, INT_MIN, INT_MAX, FLAGS, unit } +
> > +static const AVOption yadif_videotoolbox_options[] = {
> > +    #define OFFSET(x) offsetof(YADIFContext, x)
> > +    { "mode",   "specify the interlacing mode", OFFSET(mode),
> > AV_OPT_TYPE_INT, {.i64=YADIF_MODE_SEND_FRAME}, 0, 3, FLAGS, "mode"},
> > +    CONST("send_frame",           "send one frame for each frame",
> >                                   YADIF_MODE_SEND_FRAME,
> > "mode"),
> > +    CONST("send_field",           "send one frame for each field",
> >                                   YADIF_MODE_SEND_FIELD,
> > "mode"),
> > +    CONST("send_frame_nospatial", "send one frame for each frame,
> > but skip spatial interlacing check", YADIF_MODE_SEND_FRAME_NOSPATIAL,
> > "mode"),
> > +    CONST("send_field_nospatial", "send one frame for each field,
> > but skip spatial interlacing check", YADIF_MODE_SEND_FIELD_NOSPATIAL,
> > "mode"), +
> > +    { "parity", "specify the assumed picture field parity",
> > OFFSET(parity), AV_OPT_TYPE_INT, {.i64=YADIF_PARITY_AUTO}, -1, 1,
> > FLAGS, "parity" },
> > +    CONST("tff",  "assume top field first",    YADIF_PARITY_TFF,
> > "parity"),
> > +    CONST("bff",  "assume bottom field first", YADIF_PARITY_BFF,
> > "parity"),
> > +    CONST("auto", "auto detect parity",        YADIF_PARITY_AUTO,
> > "parity"), +
> > +    { "deint", "specify which frames to deinterlace", OFFSET(deint),
> > AV_OPT_TYPE_INT, {.i64=YADIF_DEINT_ALL}, 0, 1, FLAGS, "deint" },
> > +    CONST("all",        "deinterlace all frames",
> >    YADIF_DEINT_ALL,        "deint"),
> > +    CONST("interlaced", "only deinterlace frames marked as
> > interlaced", YADIF_DEINT_INTERLACED, "deint"),
> > +    #undef OFFSET
> > +
> > +    { NULL }
> > +};
> > +
> > +AVFILTER_DEFINE_CLASS(yadif_videotoolbox);
> > +
> > +static const AVFilterPad yadif_videotoolbox_inputs[] = {
> > +    {
> > +        .name          = "default",
> > +        .type          = AVMEDIA_TYPE_VIDEO,
> > +        .filter_frame  = ff_yadif_filter_frame,
> > +        .config_props  = config_input,
> > +    },
> > +};
> > +
> > +static const AVFilterPad yadif_videotoolbox_outputs[] = {
> > +    {
> > +        .name          = "default",
> > +        .type          = AVMEDIA_TYPE_VIDEO,
> > +        .request_frame = ff_yadif_request_frame,
> > +        .config_props  = config_output,
> > +    },
> > +};
> > +
> > +AVFilter ff_vf_yadif_videotoolbox = {
> > +    .name           = "yadif_videotoolbox",
> > +    .description    = NULL_IF_CONFIG_SMALL("YADIF for VideoToolbox
> > frames using Metal compute"),
> > +    .priv_size      = sizeof(YADIFVTContext),
> > +    .priv_class     = &yadif_videotoolbox_class,
> > +    .init           = yadif_videotoolbox_init,
> > +    .uninit         = yadif_videotoolbox_uninit,
> > +    FILTER_SINGLE_PIXFMT(AV_PIX_FMT_VIDEOTOOLBOX),
> > +    FILTER_INPUTS(yadif_videotoolbox_inputs),
> > +    FILTER_OUTPUTS(yadif_videotoolbox_outputs),
> > +    .flags          = AVFILTER_FLAG_SUPPORT_TIMELINE_INTERNAL,
> > +    .flags_internal = FF_FILTER_FLAG_HWFRAME_AWARE,
> > +};
>
> LGTM for the general part. I'll take your work that the metal specific
> parts work as intended.
>

Patchset applied. Thanks to everyone who helped review on and off-list.


>
> --phil
>
Pavel Koshevoy Dec. 19, 2021, 12:48 a.m. UTC | #3
On Sat, Dec 18, 2021 at 1:02 PM Aman Karmani <ffmpeg@tmm1.net> wrote:

> On Fri, Dec 17, 2021 at 1:38 PM Philip Langdale <philipl@overt.org> wrote:
>
> > On Fri, 17 Dec 2021 12:04:18 -0800
> > Aman Karmani <ffmpeg@tmm1.net> wrote:
> >
> > > From: Aman Karmani <aman@tmm1.net>
> > >
> > > deinterlaces CVPixelBuffers, i.e. AV_PIX_FMT_VIDEOTOOLBOX frames
> > >
> > > for example, an interlaced mpeg2 video can be decoded by avcodec,
> > > uploaded into a CVPixelBuffer, deinterlaced by Metal, and then
> > > encoded to h264 by VideoToolbox as follows:
> > >
> > >     ffmpeg \
> > >            -init_hw_device videotoolbox \
> > >            -i interlaced.ts \
> > >            -vf hwupload,yadif_videotoolbox \
> > >            -c:v h264_videotoolbox \
> > >            -b:v 2000k \
> > >            -c:a copy \
> > >            -y progressive.ts
> > >
> > > (note that uploading AVFrame into CVPixelBuffer via hwupload
> > >  requires 504c60660d3194758823ddd45ceddb86e35d806f)
> > >
> > > this work is sponsored by Fancy Bits LLC
> > >
> > > Reviewed-by: Ridley Combs <rcombs@rcombs.me>
> > > Signed-off-by: Aman Karmani <aman@tmm1.net>
> > > ---
> > >  configure                                     |   1 +
> > >  libavfilter/Makefile                          |   4 +
> > >  libavfilter/allfilters.c                      |   1 +
> > >  libavfilter/metal/vf_yadif_videotoolbox.metal | 269 ++++++++++++
> > >  libavfilter/vf_yadif_videotoolbox.m           | 406
> > > ++++++++++++++++++ 5 files changed, 681 insertions(+)
> > >  create mode 100644 libavfilter/metal/vf_yadif_videotoolbox.metal
> > >  create mode 100644 libavfilter/vf_yadif_videotoolbox.m
> > >
> > > diff --git a/configure b/configure
> > > index 32a39f5f5b..d8b07c8e00 100755
> > > --- a/configure
> > > +++ b/configure
> > > @@ -3748,6 +3748,7 @@ vpp_qsv_filter_select="qsvvpp"
> > >  xfade_opencl_filter_deps="opencl"
> > >  yadif_cuda_filter_deps="ffnvcodec"
> > >  yadif_cuda_filter_deps_any="cuda_nvcc cuda_llvm"
> > > +yadif_videotoolbox_filter_deps="metal corevideo videotoolbox"
> > >
> > >  # examples
> > >  avio_list_dir_deps="avformat avutil"
> > > diff --git a/libavfilter/Makefile b/libavfilter/Makefile
> > > index 2fe495df28..9a061ba3c8 100644
> > > --- a/libavfilter/Makefile
> > > +++ b/libavfilter/Makefile
> > > @@ -519,6 +519,10 @@ OBJS-$(CONFIG_XSTACK_FILTER)                 +=
> > > vf_stack.o framesync.o OBJS-$(CONFIG_YADIF_FILTER)
> > > += vf_yadif.o yadif_common.o OBJS-$(CONFIG_YADIF_CUDA_FILTER)
> > >     += vf_yadif_cuda.o vf_yadif_cuda.ptx.o \ yadif_common.o
> > > cuda/load_helper.o +OBJS-$(CONFIG_YADIF_VIDEOTOOLBOX_FILTER)     +=
> > > vf_yadif_videotoolbox.o \
> > > +
> > > metal/vf_yadif_videotoolbox.metallib.o \
> > > +                                                metal/utils.o \
> > > +                                                yadif_common.o
> > >  OBJS-$(CONFIG_YAEPBLUR_FILTER)               += vf_yaepblur.o
> > >  OBJS-$(CONFIG_ZMQ_FILTER)                    += f_zmq.o
> > >  OBJS-$(CONFIG_ZOOMPAN_FILTER)                += vf_zoompan.o
> > > diff --git a/libavfilter/allfilters.c b/libavfilter/allfilters.c
> > > index ec57a2c49c..26f1c73505 100644
> > > --- a/libavfilter/allfilters.c
> > > +++ b/libavfilter/allfilters.c
> > > @@ -496,6 +496,7 @@ extern const AVFilter ff_vf_xmedian;
> > >  extern const AVFilter ff_vf_xstack;
> > >  extern const AVFilter ff_vf_yadif;
> > >  extern const AVFilter ff_vf_yadif_cuda;
> > > +extern const AVFilter ff_vf_yadif_videotoolbox;
> > >  extern const AVFilter ff_vf_yaepblur;
> > >  extern const AVFilter ff_vf_zmq;
> > >  extern const AVFilter ff_vf_zoompan;
> > > diff --git a/libavfilter/metal/vf_yadif_videotoolbox.metal
> > > b/libavfilter/metal/vf_yadif_videotoolbox.metal new file mode 100644
> > > index 0000000000..50783f2ffe
> > > --- /dev/null
> > > +++ b/libavfilter/metal/vf_yadif_videotoolbox.metal
> > > @@ -0,0 +1,269 @@
> > > +/*
> > > + * Copyright (C) 2018 Philip Langdale <philipl@overt.org>
> > > + *               2020 Aman Karmani <aman@tmm1.net>
> > > + *               2020 Stefan Dyulgerov <stefan.dyulgerov@gmail.com>
> > > + *
> > > + * This file is part of FFmpeg.
> > > + *
> > > + * FFmpeg is free software; you can redistribute it and/or
> > > + * modify it under the terms of the GNU Lesser General Public
> > > + * License as published by the Free Software Foundation; either
> > > + * version 2.1 of the License, or (at your option) any later version.
> > > + *
> > > + * FFmpeg is distributed in the hope that it will be useful,
> > > + * but WITHOUT ANY WARRANTY; without even the implied warranty of
> > > + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
> > > + * Lesser General Public License for more details.
> > > + *
> > > + * You should have received a copy of the GNU Lesser General Public
> > > + * License along with FFmpeg; if not, write to the Free Software
> > > + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
> > > 02110-1301 USA
> > > + */
> > > +
> > > +#include <metal_stdlib>
> > > +#include <metal_integer>
> > > +#include <metal_texture>
> > > +
> > > +using namespace metal;
> > > +
> > > +/*
> > > + * Parameters
> > > + */
> > > +
> > > +struct deintParams {
> > > +    uint channels;
> > > +    uint parity;
> > > +    uint tff;
> > > +    bool is_second_field;
> > > +    bool skip_spatial_check;
> > > +    int field_mode;
> > > +};
> > > +
> > > +/*
> > > + * Texture access helpers
> > > + */
> > > +
> > > +#define accesstype access::sample
> > > +const sampler s(coord::pixel);
> > > +
> > > +template <typename T>
> > > +T tex2D(texture2d<float, access::sample> tex, uint x, uint y)
> > > +{
> > > +    return tex.sample(s, float2(x, y)).x;
> > > +}
> > > +
> > > +template <>
> > > +float2 tex2D<float2>(texture2d<float, access::sample> tex, uint x,
> > > uint y) +{
> > > +    return tex.sample(s, float2(x, y)).xy;
> > > +}
> > > +
> > > +template <typename T>
> > > +T tex2D(texture2d<float, access::read> tex, uint x, uint y)
> > > +{
> > > +    return tex.read(uint2(x, y)).x;
> > > +}
> > > +
> > > +template <>
> > > +float2 tex2D<float2>(texture2d<float, access::read> tex, uint x,
> > > uint y) +{
> > > +    return tex.read(uint2(x, y)).xy;
> > > +}
> > > +
> > > +/*
> > > + * YADIF helpers
> > > + */
> > > +
> > > +template<typename T>
> > > +T spatial_predictor(T a, T b, T c, T d, T e, T f, T g,
> > > +                    T h, T i, T j, T k, T l, T m, T n)
> > > +{
> > > +    T spatial_pred = (d + k)/2;
> > > +    T spatial_score = abs(c - j) + abs(d - k) + abs(e - l);
> > > +
> > > +    T score = abs(b - k) + abs(c - l) + abs(d - m);
> > > +    if (score < spatial_score) {
> > > +        spatial_pred = (c + l)/2;
> > > +        spatial_score = score;
> > > +        score = abs(a - l) + abs(b - m) + abs(c - n);
> > > +        if (score < spatial_score) {
> > > +            spatial_pred = (b + m)/2;
> > > +            spatial_score = score;
> > > +        }
> > > +    }
> > > +    score = abs(d - i) + abs(e - j) + abs(f - k);
> > > +    if (score < spatial_score) {
> > > +        spatial_pred = (e + j)/2;
> > > +        spatial_score = score;
> > > +        score = abs(e - h) + abs(f - i) + abs(g - j);
> > > +        if (score < spatial_score) {
> > > +            spatial_pred = (f + i)/2;
> > > +            spatial_score = score;
> > > +        }
> > > +    }
> > > +    return spatial_pred;
> > > +}
> > > +
> > > +template<typename T>
> > > +T temporal_predictor(T A, T B, T C, T D, T E, T F,
> > > +                     T G, T H, T I, T J, T K, T L,
> > > +                     T spatial_pred, bool skip_check)
> > > +{
> > > +    T p0 = (C + H) / 2;
> > > +    T p1 = F;
> > > +    T p2 = (D + I) / 2;
> > > +    T p3 = G;
> > > +    T p4 = (E + J) / 2;
> > > +
> > > +    T tdiff0 = abs(D - I);
> > > +    T tdiff1 = (abs(A - F) + abs(B - G)) / 2;
> > > +    T tdiff2 = (abs(K - F) + abs(G - L)) / 2;
> > > +
> > > +    T diff = max3(tdiff0, tdiff1, tdiff2);
> > > +
> > > +    if (!skip_check) {
> > > +        T maxi = max3(p2 - p3, p2 - p1, min(p0 - p1, p4 - p3));
> > > +        T mini = min3(p2 - p3, p2 - p1, max(p0 - p1, p4 - p3));
> > > +        diff = max3(diff, mini, -maxi);
> > > +    }
> > > +
> > > +    return clamp(spatial_pred, p2 - diff, p2 + diff);
> > > +}
> > > +
> > > +#define T float2
> > > +template <>
> > > +T spatial_predictor<T>(T a, T b, T c, T d, T e, T f, T g,
> > > +                       T h, T i, T j, T k, T l, T m, T n)
> > > +{
> > > +    return T(
> > > +        spatial_predictor(a.x, b.x, c.x, d.x, e.x, f.x, g.x,
> > > +                          h.x, i.x, j.x, k.x, l.x, m.x, n.x),
> > > +        spatial_predictor(a.y, b.y, c.y, d.y, e.y, f.y, g.y,
> > > +                          h.y, i.y, j.y, k.y, l.y, m.y, n.y)
> > > +    );
> > > +}
> > > +
> > > +template <>
> > > +T temporal_predictor<T>(T A, T B, T C, T D, T E, T F,
> > > +                        T G, T H, T I, T J, T K, T L,
> > > +                        T spatial_pred, bool skip_check)
> > > +{
> > > +    return T(
> > > +        temporal_predictor(A.x, B.x, C.x, D.x, E.x, F.x,
> > > +                           G.x, H.x, I.x, J.x, K.x, L.x,
> > > +                           spatial_pred.x, skip_check),
> > > +        temporal_predictor(A.y, B.y, C.y, D.y, E.y, F.y,
> > > +                           G.y, H.y, I.y, J.y, K.y, L.y,
> > > +                           spatial_pred.y, skip_check)
> > > +    );
> > > +}
> > > +#undef T
> > > +
> > > +/*
> > > + * YADIF compute
> > > + */
> > > +
> > > +template <typename T>
> > > +T yadif_compute_spatial(
> > > +    texture2d<float, accesstype> cur,
> > > +    uint2 pos)
> > > +{
> > > +    // Calculate spatial prediction
> > > +    T a = tex2D<T>(cur, pos.x - 3, pos.y - 1);
> > > +    T b = tex2D<T>(cur, pos.x - 2, pos.y - 1);
> > > +    T c = tex2D<T>(cur, pos.x - 1, pos.y - 1);
> > > +    T d = tex2D<T>(cur, pos.x - 0, pos.y - 1);
> > > +    T e = tex2D<T>(cur, pos.x + 1, pos.y - 1);
> > > +    T f = tex2D<T>(cur, pos.x + 2, pos.y - 1);
> > > +    T g = tex2D<T>(cur, pos.x + 3, pos.y - 1);
> > > +
> > > +    T h = tex2D<T>(cur, pos.x - 3, pos.y + 1);
> > > +    T i = tex2D<T>(cur, pos.x - 2, pos.y + 1);
> > > +    T j = tex2D<T>(cur, pos.x - 1, pos.y + 1);
> > > +    T k = tex2D<T>(cur, pos.x - 0, pos.y + 1);
> > > +    T l = tex2D<T>(cur, pos.x + 1, pos.y + 1);
> > > +    T m = tex2D<T>(cur, pos.x + 2, pos.y + 1);
> > > +    T n = tex2D<T>(cur, pos.x + 3, pos.y + 1);
> > > +
> > > +    return spatial_predictor(a, b, c, d, e, f, g,
> > > +                             h, i, j, k, l, m, n);
> > > +}
> > > +
> > > +template <typename T>
> > > +T yadif_compute_temporal(
> > > +    texture2d<float, accesstype> cur,
> > > +    texture2d<float, accesstype> prev2,
> > > +    texture2d<float, accesstype> prev1,
> > > +    texture2d<float, accesstype> next1,
> > > +    texture2d<float, accesstype> next2,
> > > +    T spatial_pred,
> > > +    bool skip_spatial_check,
> > > +    uint2 pos)
> > > +{
> > > +    // Calculate temporal prediction
> > > +    T A = tex2D<T>(prev2, pos.x, pos.y - 1);
> > > +    T B = tex2D<T>(prev2, pos.x, pos.y + 1);
> > > +    T C = tex2D<T>(prev1, pos.x, pos.y - 2);
> > > +    T D = tex2D<T>(prev1, pos.x, pos.y + 0);
> > > +    T E = tex2D<T>(prev1, pos.x, pos.y + 2);
> > > +    T F = tex2D<T>(cur,   pos.x, pos.y - 1);
> > > +    T G = tex2D<T>(cur,   pos.x, pos.y + 1);
> > > +    T H = tex2D<T>(next1, pos.x, pos.y - 2);
> > > +    T I = tex2D<T>(next1, pos.x, pos.y + 0);
> > > +    T J = tex2D<T>(next1, pos.x, pos.y + 2);
> > > +    T K = tex2D<T>(next2, pos.x, pos.y - 1);
> > > +    T L = tex2D<T>(next2, pos.x, pos.y + 1);
> > > +
> > > +    return temporal_predictor(A, B, C, D, E, F, G, H, I, J, K, L,
> > > +                              spatial_pred, skip_spatial_check);
> > > +}
> > > +
> > > +template <typename T>
> > > +T yadif(
> > > +    texture2d<float, access::write> dst,
> > > +    texture2d<float, accesstype> prev,
> > > +    texture2d<float, accesstype> cur,
> > > +    texture2d<float, accesstype> next,
> > > +    constant deintParams& params,
> > > +    uint2 pos)
> > > +{
> > > +    T spatial_pred = yadif_compute_spatial<T>(cur, pos);
> > > +
> > > +    if (params.is_second_field) {
> > > +        return yadif_compute_temporal(cur, prev, cur, next, next,
> > > spatial_pred, params.skip_spatial_check, pos);
> > > +    } else {
> > > +        return yadif_compute_temporal(cur, prev, prev, cur, next,
> > > spatial_pred, params.skip_spatial_check, pos);
> > > +    }
> > > +}
> > > +
> > > +/*
> > > + * Kernel dispatch
> > > + */
> > > +
> > > +kernel void deint(
> > > +    texture2d<float, access::write> dst [[texture(0)]],
> > > +    texture2d<float, accesstype> prev [[texture(1)]],
> > > +    texture2d<float, accesstype> cur  [[texture(2)]],
> > > +    texture2d<float, accesstype> next [[texture(3)]],
> > > +    constant deintParams& params [[buffer(4)]],
> > > +    uint2 pos [[thread_position_in_grid]])
> > > +{
> > > +    if ((pos.x >= dst.get_width()) ||
> > > +        (pos.y >= dst.get_height())) {
> > > +        return;
> > > +    }
> > > +
> > > +    // Don't modify the primary field
> > > +    if (pos.y % 2 == params.parity) {
> > > +        float4 in = cur.read(pos);
> > > +        dst.write(in, pos);
> > > +        return;
> > > +    }
> > > +
> > > +    float2 pred;
> > > +    if (params.channels == 1)
> > > +        pred = float2(yadif<float>(dst, prev, cur, next, params,
> > > pos));
> > > +    else
> > > +        pred = yadif<float2>(dst, prev, cur, next, params, pos);
> > > +    dst.write(pred.xyyy, pos);
> > > +}
> > > diff --git a/libavfilter/vf_yadif_videotoolbox.m
> > > b/libavfilter/vf_yadif_videotoolbox.m new file mode 100644
> > > index 0000000000..af83a73e89
> > > --- /dev/null
> > > +++ b/libavfilter/vf_yadif_videotoolbox.m
> > > @@ -0,0 +1,406 @@
> > > +/*
> > > + * Copyright (C) 2018 Philip Langdale <philipl@overt.org>
> > > + *               2020 Aman Karmani <aman@tmm1.net>
> > > + *
> > > + * This file is part of FFmpeg.
> > > + *
> > > + * FFmpeg is free software; you can redistribute it and/or
> > > + * modify it under the terms of the GNU Lesser General Public
> > > + * License as published by the Free Software Foundation; either
> > > + * version 2.1 of the License, or (at your option) any later version.
> > > + *
> > > + * FFmpeg is distributed in the hope that it will be useful,
> > > + * but WITHOUT ANY WARRANTY; without even the implied warranty of
> > > + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
> > > + * Lesser General Public License for more details.
> > > + *
> > > + * You should have received a copy of the GNU Lesser General Public
> > > + * License along with FFmpeg; if not, write to the Free Software
> > > + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
> > > 02110-1301 USA
> > > + */
> > > +
> > > +#include "internal.h"
> > > +#include "yadif.h"
> > > +#include <libavutil/avassert.h>
> > > +#include <libavutil/hwcontext.h>
> > > +#include <libavutil/objc.h>
> > > +#include <libavfilter/metal/utils.h>
> > > +
> > > +extern char ff_vf_yadif_videotoolbox_metallib_data[];
> > > +extern unsigned int ff_vf_yadif_videotoolbox_metallib_len;
> > > +
> > > +typedef struct YADIFVTContext {
> > > +    YADIFContext yadif;
> > > +
> > > +    AVBufferRef       *device_ref;
> > > +    AVBufferRef       *input_frames_ref;
> > > +    AVHWFramesContext *input_frames;
> > > +
> > > +    id<MTLDevice> mtlDevice;
> > > +    id<MTLLibrary> mtlLibrary;
> > > +    id<MTLCommandQueue> mtlQueue;
> > > +    id<MTLComputePipelineState> mtlPipeline;
> > > +    id<MTLFunction> mtlFunction;
> > > +    id<MTLBuffer> mtlParamsBuffer;
> > > +
> > > +    CVMetalTextureCacheRef textureCache;
> > > +} YADIFVTContext;
> > > +
> > > +struct mtlYadifParams {
> > > +    uint channels;
> > > +    uint parity;
> > > +    uint tff;
> > > +    bool is_second_field;
> > > +    bool skip_spatial_check;
> > > +    int field_mode;
> > > +};
> > > +
> > > +static void call_kernel(AVFilterContext *ctx,
> > > +                        id<MTLTexture> dst,
> > > +                        id<MTLTexture> prev,
> > > +                        id<MTLTexture> cur,
> > > +                        id<MTLTexture> next,
> > > +                        int channels,
> > > +                        int parity,
> > > +                        int tff)
> > > +{
> > > +    YADIFVTContext *s = ctx->priv;
> > > +    id<MTLCommandBuffer> buffer = s->mtlQueue.commandBuffer;
> > > +    id<MTLComputeCommandEncoder> encoder =
> > > buffer.computeCommandEncoder;
> > > +    struct mtlYadifParams *params = (struct mtlYadifParams
> > > *)s->mtlParamsBuffer.contents;
> > > +    *params = (struct mtlYadifParams){
> > > +        .channels = channels,
> > > +        .parity = parity,
> > > +        .tff = tff,
> > > +        .is_second_field = !(parity ^ tff),
> > > +        .skip_spatial_check = s->yadif.mode&2,
> > > +        .field_mode = s->yadif.current_field
> > > +    };
> > > +
> > > +    [encoder setTexture:dst  atIndex:0];
> > > +    [encoder setTexture:prev atIndex:1];
> > > +    [encoder setTexture:cur  atIndex:2];
> > > +    [encoder setTexture:next atIndex:3];
> > > +    [encoder setBuffer:s->mtlParamsBuffer offset:0 atIndex:4];
> > > +    ff_metal_compute_encoder_dispatch(s->mtlDevice, s->mtlPipeline,
> > > encoder, dst.width, dst.height);
> > > +    [encoder endEncoding];
> > > +
> > > +    [buffer commit];
> > > +    [buffer waitUntilCompleted];
> > > +
> > > +    ff_objc_release(&encoder);
> > > +    ff_objc_release(&buffer);
> > > +}
> > > +
> > > +static void filter(AVFilterContext *ctx, AVFrame *dst,
> > > +                   int parity, int tff)
> > > +{
> > > +    YADIFVTContext *s = ctx->priv;
> > > +    YADIFContext *y = &s->yadif;
> > > +    int i;
> > > +
> > > +    for (i = 0; i < y->csp->nb_components; i++) {
> > > +        int pixel_size, channels;
> > > +        const AVComponentDescriptor *comp = &y->csp->comp[i];
> > > +        CVMetalTextureRef prev, cur, next, dest;
> > > +        id<MTLTexture> tex_prev, tex_cur, tex_next, tex_dest;
> > > +        MTLPixelFormat format;
> > > +
> > > +        if (comp->plane < i) {
> > > +            // We process planes as a whole, so don't reprocess
> > > +            // them for additional components
> > > +            continue;
> > > +        }
> > > +
> > > +        pixel_size = (comp->depth + comp->shift) / 8;
> > > +        channels = comp->step / pixel_size;
> > > +        if (pixel_size > 2 || channels > 2) {
> > > +            av_log(ctx, AV_LOG_ERROR, "Unsupported pixel format:
> > > %s\n", y->csp->name);
> > > +            goto exit;
> > > +        }
> > > +        switch (pixel_size) {
> > > +        case 1:
> > > +            format = channels == 1 ? MTLPixelFormatR8Unorm :
> > > MTLPixelFormatRG8Unorm;
> > > +            break;
> > > +        case 2:
> > > +            format = channels == 1 ? MTLPixelFormatR16Unorm :
> > > MTLPixelFormatRG16Unorm;
> > > +            break;
> > > +        default:
> > > +            av_log(ctx, AV_LOG_ERROR, "Unsupported pixel format:
> > > %s\n", y->csp->name);
> > > +            goto exit;
> > > +        }
> > > +        av_log(ctx, AV_LOG_TRACE,
> > > +               "Deinterlacing plane %d: pixel_size: %d channels:
> > > %d\n",
> > > +               comp->plane, pixel_size, channels);
> > > +
> > > +        prev = ff_metal_texture_from_pixbuf(ctx, s->textureCache,
> > > (CVPixelBufferRef)y->prev->data[3], i, format);
> > > +        cur  = ff_metal_texture_from_pixbuf(ctx, s->textureCache,
> > > (CVPixelBufferRef)y->cur->data[3], i, format);
> > > +        next = ff_metal_texture_from_pixbuf(ctx, s->textureCache,
> > > (CVPixelBufferRef)y->next->data[3], i, format);
> > > +        dest = ff_metal_texture_from_pixbuf(ctx, s->textureCache,
> > > (CVPixelBufferRef)dst->data[3], i, format); +
> > > +        tex_prev = CVMetalTextureGetTexture(prev);
> > > +        tex_cur  = CVMetalTextureGetTexture(cur);
> > > +        tex_next = CVMetalTextureGetTexture(next);
> > > +        tex_dest = CVMetalTextureGetTexture(dest);
> > > +
> > > +        call_kernel(ctx, tex_dest, tex_prev, tex_cur, tex_next,
> > > +                         channels, parity, tff);
> > > +
> > > +        CFRelease(prev);
> > > +        CFRelease(cur);
> > > +        CFRelease(next);
> > > +        CFRelease(dest);
> > > +    }
> > > +
> > > +    CVBufferPropagateAttachments((CVPixelBufferRef)y->cur->data[3],
> > > (CVPixelBufferRef)dst->data[3]); +
> > > +    if (y->current_field == YADIF_FIELD_END) {
> > > +        y->current_field = YADIF_FIELD_NORMAL;
> > > +    }
> > > +
> > > +exit:
> > > +    return;
> > > +}
> > > +
> > > +static av_cold void yadif_videotoolbox_uninit(AVFilterContext *ctx)
> > > +{
> > > +    YADIFVTContext *s = ctx->priv;
> > > +    YADIFContext *y = &s->yadif;
> > > +
> > > +    av_frame_free(&y->prev);
> > > +    av_frame_free(&y->cur);
> > > +    av_frame_free(&y->next);
> > > +
> > > +    av_buffer_unref(&s->device_ref);
> > > +    av_buffer_unref(&s->input_frames_ref);
> > > +    s->input_frames = NULL;
> > > +
> > > +    ff_objc_release(&s->mtlParamsBuffer);
> > > +    ff_objc_release(&s->mtlFunction);
> > > +    ff_objc_release(&s->mtlPipeline);
> > > +    ff_objc_release(&s->mtlQueue);
> > > +    ff_objc_release(&s->mtlLibrary);
> > > +    ff_objc_release(&s->mtlDevice);
> > > +
> > > +    if (s->textureCache) {
> > > +        CFRelease(s->textureCache);
> > > +        s->textureCache = NULL;
> > > +    }
> > > +}
> > > +
> > > +static av_cold int yadif_videotoolbox_init(AVFilterContext *ctx)
> > > +{
> > > +    YADIFVTContext *s = ctx->priv;
> > > +    NSError *err = nil;
> > > +    CVReturn ret;
> > > +
> > > +    s->mtlDevice = MTLCreateSystemDefaultDevice();
> > > +    if (!s->mtlDevice) {
> > > +        av_log(ctx, AV_LOG_ERROR, "Unable to find Metal device\n");
> > > +        goto fail;
> > > +    }
> > > +
> > > +    av_log(ctx, AV_LOG_INFO, "Using Metal device: %s\n",
> > > s->mtlDevice.name.UTF8String); +
> > > +    dispatch_data_t libData = dispatch_data_create(
> > > +        ff_vf_yadif_videotoolbox_metallib_data,
> > > +        ff_vf_yadif_videotoolbox_metallib_len,
> > > +        nil,
> > > +        nil);
> > > +    s->mtlLibrary = [s->mtlDevice newLibraryWithData:libData
> > > error:&err];
> > > +    dispatch_release(libData);
> > > +    libData = nil;
> > > +    if (err) {
> > > +        av_log(ctx, AV_LOG_ERROR, "Failed to load Metal library:
> > > %s\n", err.description.UTF8String);
> > > +        goto fail;
> > > +    }
> > > +
> > > +    s->mtlFunction = [s->mtlLibrary newFunctionWithName:@"deint"];
> > > +    if (!s->mtlFunction) {
> > > +        av_log(ctx, AV_LOG_ERROR, "Failed to create Metal
> > > function!\n");
> > > +        goto fail;
> > > +    }
> > > +
> > > +    s->mtlQueue = s->mtlDevice.newCommandQueue;
> > > +    if (!s->mtlQueue) {
> > > +        av_log(ctx, AV_LOG_ERROR, "Failed to create Metal command
> > > queue!\n");
> > > +        goto fail;
> > > +    }
> > > +
> > > +    s->mtlPipeline = [s->mtlDevice
> > > +        newComputePipelineStateWithFunction:s->mtlFunction
> > > +        error:&err];
> > > +    if (err) {
> > > +        av_log(ctx, AV_LOG_ERROR, "Failed to create Metal compute
> > > pipeline: %s\n", err.description.UTF8String);
> > > +        goto fail;
> > > +    }
> > > +
> > > +    s->mtlParamsBuffer = [s->mtlDevice
> > > +        newBufferWithLength:sizeof(struct mtlYadifParams)
> > > +        options:MTLResourceStorageModeShared];
> > > +    if (!s->mtlParamsBuffer) {
> > > +        av_log(ctx, AV_LOG_ERROR, "Failed to create Metal buffer for
> > > parameters\n");
> > > +        goto fail;
> > > +    }
> > > +
> > > +    ret = CVMetalTextureCacheCreate(
> > > +        NULL,
> > > +        NULL,
> > > +        s->mtlDevice,
> > > +        NULL,
> > > +        &s->textureCache
> > > +    );
> > > +    if (ret != kCVReturnSuccess) {
> > > +        av_log(ctx, AV_LOG_ERROR, "Failed to create
> > > CVMetalTextureCache: %d\n", ret);
> > > +        goto fail;
> > > +    }
> > > +
> > > +    return 0;
> > > +fail:
> > > +    yadif_videotoolbox_uninit(ctx);
> > > +    return AVERROR_EXTERNAL;
> > > +}
> > > +
> > > +static int config_input(AVFilterLink *inlink)
> > > +{
> > > +    AVFilterContext *ctx = inlink->dst;
> > > +    YADIFVTContext *s = ctx->priv;
> > > +
> > > +    if (!inlink->hw_frames_ctx) {
> > > +        av_log(ctx, AV_LOG_ERROR, "A hardware frames reference is "
> > > +               "required to associate the processing device.\n");
> > > +        return AVERROR(EINVAL);
> > > +    }
> > > +
> > > +    s->input_frames_ref = av_buffer_ref(inlink->hw_frames_ctx);
> > > +    if (!s->input_frames_ref) {
> > > +        av_log(ctx, AV_LOG_ERROR, "A input frames reference create "
> > > +               "failed.\n");
> > > +        return AVERROR(ENOMEM);
> > > +    }
> > > +    s->input_frames = (AVHWFramesContext*)s->input_frames_ref->data;
> > > +
> > > +    return 0;
> > > +}
> > > +
> > > +static int config_output(AVFilterLink *link)
> > > +{
> > > +    AVHWFramesContext *output_frames;
> > > +    AVFilterContext *ctx = link->src;
> > > +    YADIFVTContext *s = ctx->priv;
> > > +    YADIFContext *y = &s->yadif;
> > > +    int ret = 0;
> > > +
> > > +    av_assert0(s->input_frames);
> > > +    s->device_ref = av_buffer_ref(s->input_frames->device_ref);
> > > +    if (!s->device_ref) {
> > > +        av_log(ctx, AV_LOG_ERROR, "A device reference create "
> > > +               "failed.\n");
> > > +        return AVERROR(ENOMEM);
> > > +    }
> > > +
> > > +    link->hw_frames_ctx = av_hwframe_ctx_alloc(s->device_ref);
> > > +    if (!link->hw_frames_ctx) {
> > > +        av_log(ctx, AV_LOG_ERROR, "Failed to create HW frame context
> > > "
> > > +               "for output.\n");
> > > +        ret = AVERROR(ENOMEM);
> > > +        goto exit;
> > > +    }
> > > +
> > > +    output_frames = (AVHWFramesContext*)link->hw_frames_ctx->data;
> > > +
> > > +    output_frames->format    = AV_PIX_FMT_VIDEOTOOLBOX;
> > > +    output_frames->sw_format = s->input_frames->sw_format;
> > > +    output_frames->width     = ctx->inputs[0]->w;
> > > +    output_frames->height    = ctx->inputs[0]->h;
> > > +
> > > +    ret = ff_filter_init_hw_frames(ctx, link, 10);
> > > +    if (ret < 0)
> > > +        goto exit;
> > > +
> > > +    ret = av_hwframe_ctx_init(link->hw_frames_ctx);
> > > +    if (ret < 0) {
> > > +        av_log(ctx, AV_LOG_ERROR, "Failed to initialise VideoToolbox
> > > frame "
> > > +               "context for output: %d\n", ret);
> > > +        goto exit;
> > > +    }
> > > +
> > > +    link->time_base.num = ctx->inputs[0]->time_base.num;
> > > +    link->time_base.den = ctx->inputs[0]->time_base.den * 2;
> > > +    link->w             = ctx->inputs[0]->w;
> > > +    link->h             = ctx->inputs[0]->h;
> > > +
> > > +    if(y->mode & 1)
> > > +        link->frame_rate = av_mul_q(ctx->inputs[0]->frame_rate,
> > > +                                    (AVRational){2, 1});
> > > +
> > > +    if (link->w < 3 || link->h < 3) {
> > > +        av_log(ctx, AV_LOG_ERROR, "Video of less than 3 columns or
> > > lines is not supported\n");
> > > +        ret = AVERROR(EINVAL);
> > > +        goto exit;
> > > +    }
> > > +
> > > +    y->csp = av_pix_fmt_desc_get(output_frames->sw_format);
> > > +    y->filter = filter;
> > > +
> > > +exit:
> > > +    return ret;
> > > +}
> > > +
> > > +#define FLAGS AV_OPT_FLAG_VIDEO_PARAM|AV_OPT_FLAG_FILTERING_PARAM
> > > +#define CONST(name, help, val, unit) { name, help, 0,
> > > AV_OPT_TYPE_CONST, {.i64=val}, INT_MIN, INT_MAX, FLAGS, unit } +
> > > +static const AVOption yadif_videotoolbox_options[] = {
> > > +    #define OFFSET(x) offsetof(YADIFContext, x)
> > > +    { "mode",   "specify the interlacing mode", OFFSET(mode),
> > > AV_OPT_TYPE_INT, {.i64=YADIF_MODE_SEND_FRAME}, 0, 3, FLAGS, "mode"},
> > > +    CONST("send_frame",           "send one frame for each frame",
> > >                                   YADIF_MODE_SEND_FRAME,
> > > "mode"),
> > > +    CONST("send_field",           "send one frame for each field",
> > >                                   YADIF_MODE_SEND_FIELD,
> > > "mode"),
> > > +    CONST("send_frame_nospatial", "send one frame for each frame,
> > > but skip spatial interlacing check", YADIF_MODE_SEND_FRAME_NOSPATIAL,
> > > "mode"),
> > > +    CONST("send_field_nospatial", "send one frame for each field,
> > > but skip spatial interlacing check", YADIF_MODE_SEND_FIELD_NOSPATIAL,
> > > "mode"), +
> > > +    { "parity", "specify the assumed picture field parity",
> > > OFFSET(parity), AV_OPT_TYPE_INT, {.i64=YADIF_PARITY_AUTO}, -1, 1,
> > > FLAGS, "parity" },
> > > +    CONST("tff",  "assume top field first",    YADIF_PARITY_TFF,
> > > "parity"),
> > > +    CONST("bff",  "assume bottom field first", YADIF_PARITY_BFF,
> > > "parity"),
> > > +    CONST("auto", "auto detect parity",        YADIF_PARITY_AUTO,
> > > "parity"), +
> > > +    { "deint", "specify which frames to deinterlace", OFFSET(deint),
> > > AV_OPT_TYPE_INT, {.i64=YADIF_DEINT_ALL}, 0, 1, FLAGS, "deint" },
> > > +    CONST("all",        "deinterlace all frames",
> > >    YADIF_DEINT_ALL,        "deint"),
> > > +    CONST("interlaced", "only deinterlace frames marked as
> > > interlaced", YADIF_DEINT_INTERLACED, "deint"),
> > > +    #undef OFFSET
> > > +
> > > +    { NULL }
> > > +};
> > > +
> > > +AVFILTER_DEFINE_CLASS(yadif_videotoolbox);
> > > +
> > > +static const AVFilterPad yadif_videotoolbox_inputs[] = {
> > > +    {
> > > +        .name          = "default",
> > > +        .type          = AVMEDIA_TYPE_VIDEO,
> > > +        .filter_frame  = ff_yadif_filter_frame,
> > > +        .config_props  = config_input,
> > > +    },
> > > +};
> > > +
> > > +static const AVFilterPad yadif_videotoolbox_outputs[] = {
> > > +    {
> > > +        .name          = "default",
> > > +        .type          = AVMEDIA_TYPE_VIDEO,
> > > +        .request_frame = ff_yadif_request_frame,
> > > +        .config_props  = config_output,
> > > +    },
> > > +};
> > > +
> > > +AVFilter ff_vf_yadif_videotoolbox = {
> > > +    .name           = "yadif_videotoolbox",
> > > +    .description    = NULL_IF_CONFIG_SMALL("YADIF for VideoToolbox
> > > frames using Metal compute"),
> > > +    .priv_size      = sizeof(YADIFVTContext),
> > > +    .priv_class     = &yadif_videotoolbox_class,
> > > +    .init           = yadif_videotoolbox_init,
> > > +    .uninit         = yadif_videotoolbox_uninit,
> > > +    FILTER_SINGLE_PIXFMT(AV_PIX_FMT_VIDEOTOOLBOX),
> > > +    FILTER_INPUTS(yadif_videotoolbox_inputs),
> > > +    FILTER_OUTPUTS(yadif_videotoolbox_outputs),
> > > +    .flags          = AVFILTER_FLAG_SUPPORT_TIMELINE_INTERNAL,
> > > +    .flags_internal = FF_FILTER_FLAG_HWFRAME_AWARE,
> > > +};
> >
> > LGTM for the general part. I'll take your work that the metal specific
> > parts work as intended.
> >
>
> Patchset applied. Thanks to everyone who helped review on and off-list.
>
>
>
It appears to break the build for me:

```
MAN doc/libavfilter.3
GEN libswresample/libswresample.ver
LD libswscale/libswscale.6.dylib
LD libpostproc/libpostproc.56.dylib
LD libswresample/libswresample.4.dylib
STRIP libavcodec/x86/vp9itxfm.o
GEN libavcodec/libavcodec.ver
LD libavcodec/libavcodec.59.dylib
ld: warning: could not create compact unwind for _ff_cfhd_init_vlcs: stack
subq instruction is too different from dwarf stack size
ld: warning: could not create compact unwind for _ff_rl_init_vlc: stack
subq instruction is too different from dwarf stack size
LD libavformat/libavformat.59.dylib
LD libavfilter/libavfilter.8.dylib
clang: error: no such file or directory:
'libavfilter/metal/vf_yadif_videotoolbox.metallib.o'
make: *** [libavfilter/libavfilter.8.dylib] Error 1

real 3m22.511s
user 16m21.483s
sys 1m43.498s
```


I initially tried --disable-metal, but that didn't work:
```
Unknown option "--disable-metal".
See /Users/pavel/src/ffmpeg/configure --help for available options.

real 0m0.271s
user 0m0.164s
sys 0m0.105s
```

I was able to work-around the linker error with
--disable-filter=yadif_videotoolbox


Pavel.
diff mbox series

Patch

diff --git a/configure b/configure
index 32a39f5f5b..d8b07c8e00 100755
--- a/configure
+++ b/configure
@@ -3748,6 +3748,7 @@  vpp_qsv_filter_select="qsvvpp"
 xfade_opencl_filter_deps="opencl"
 yadif_cuda_filter_deps="ffnvcodec"
 yadif_cuda_filter_deps_any="cuda_nvcc cuda_llvm"
+yadif_videotoolbox_filter_deps="metal corevideo videotoolbox"
 
 # examples
 avio_list_dir_deps="avformat avutil"
diff --git a/libavfilter/Makefile b/libavfilter/Makefile
index 2fe495df28..9a061ba3c8 100644
--- a/libavfilter/Makefile
+++ b/libavfilter/Makefile
@@ -519,6 +519,10 @@  OBJS-$(CONFIG_XSTACK_FILTER)                 += vf_stack.o framesync.o
 OBJS-$(CONFIG_YADIF_FILTER)                  += vf_yadif.o yadif_common.o
 OBJS-$(CONFIG_YADIF_CUDA_FILTER)             += vf_yadif_cuda.o vf_yadif_cuda.ptx.o \
                                                 yadif_common.o cuda/load_helper.o
+OBJS-$(CONFIG_YADIF_VIDEOTOOLBOX_FILTER)     += vf_yadif_videotoolbox.o \
+                                                metal/vf_yadif_videotoolbox.metallib.o \
+                                                metal/utils.o \
+                                                yadif_common.o
 OBJS-$(CONFIG_YAEPBLUR_FILTER)               += vf_yaepblur.o
 OBJS-$(CONFIG_ZMQ_FILTER)                    += f_zmq.o
 OBJS-$(CONFIG_ZOOMPAN_FILTER)                += vf_zoompan.o
diff --git a/libavfilter/allfilters.c b/libavfilter/allfilters.c
index ec57a2c49c..26f1c73505 100644
--- a/libavfilter/allfilters.c
+++ b/libavfilter/allfilters.c
@@ -496,6 +496,7 @@  extern const AVFilter ff_vf_xmedian;
 extern const AVFilter ff_vf_xstack;
 extern const AVFilter ff_vf_yadif;
 extern const AVFilter ff_vf_yadif_cuda;
+extern const AVFilter ff_vf_yadif_videotoolbox;
 extern const AVFilter ff_vf_yaepblur;
 extern const AVFilter ff_vf_zmq;
 extern const AVFilter ff_vf_zoompan;
diff --git a/libavfilter/metal/vf_yadif_videotoolbox.metal b/libavfilter/metal/vf_yadif_videotoolbox.metal
new file mode 100644
index 0000000000..50783f2ffe
--- /dev/null
+++ b/libavfilter/metal/vf_yadif_videotoolbox.metal
@@ -0,0 +1,269 @@ 
+/*
+ * Copyright (C) 2018 Philip Langdale <philipl@overt.org>
+ *               2020 Aman Karmani <aman@tmm1.net>
+ *               2020 Stefan Dyulgerov <stefan.dyulgerov@gmail.com>
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include <metal_stdlib>
+#include <metal_integer>
+#include <metal_texture>
+
+using namespace metal;
+
+/*
+ * Parameters
+ */
+
+struct deintParams {
+    uint channels;
+    uint parity;
+    uint tff;
+    bool is_second_field;
+    bool skip_spatial_check;
+    int field_mode;
+};
+
+/*
+ * Texture access helpers
+ */
+
+#define accesstype access::sample
+const sampler s(coord::pixel);
+
+template <typename T>
+T tex2D(texture2d<float, access::sample> tex, uint x, uint y)
+{
+    return tex.sample(s, float2(x, y)).x;
+}
+
+template <>
+float2 tex2D<float2>(texture2d<float, access::sample> tex, uint x, uint y)
+{
+    return tex.sample(s, float2(x, y)).xy;
+}
+
+template <typename T>
+T tex2D(texture2d<float, access::read> tex, uint x, uint y)
+{
+    return tex.read(uint2(x, y)).x;
+}
+
+template <>
+float2 tex2D<float2>(texture2d<float, access::read> tex, uint x, uint y)
+{
+    return tex.read(uint2(x, y)).xy;
+}
+
+/*
+ * YADIF helpers
+ */
+
+template<typename T>
+T spatial_predictor(T a, T b, T c, T d, T e, T f, T g,
+                    T h, T i, T j, T k, T l, T m, T n)
+{
+    T spatial_pred = (d + k)/2;
+    T spatial_score = abs(c - j) + abs(d - k) + abs(e - l);
+
+    T score = abs(b - k) + abs(c - l) + abs(d - m);
+    if (score < spatial_score) {
+        spatial_pred = (c + l)/2;
+        spatial_score = score;
+        score = abs(a - l) + abs(b - m) + abs(c - n);
+        if (score < spatial_score) {
+            spatial_pred = (b + m)/2;
+            spatial_score = score;
+        }
+    }
+    score = abs(d - i) + abs(e - j) + abs(f - k);
+    if (score < spatial_score) {
+        spatial_pred = (e + j)/2;
+        spatial_score = score;
+        score = abs(e - h) + abs(f - i) + abs(g - j);
+        if (score < spatial_score) {
+            spatial_pred = (f + i)/2;
+            spatial_score = score;
+        }
+    }
+    return spatial_pred;
+}
+
+template<typename T>
+T temporal_predictor(T A, T B, T C, T D, T E, T F,
+                     T G, T H, T I, T J, T K, T L,
+                     T spatial_pred, bool skip_check)
+{
+    T p0 = (C + H) / 2;
+    T p1 = F;
+    T p2 = (D + I) / 2;
+    T p3 = G;
+    T p4 = (E + J) / 2;
+
+    T tdiff0 = abs(D - I);
+    T tdiff1 = (abs(A - F) + abs(B - G)) / 2;
+    T tdiff2 = (abs(K - F) + abs(G - L)) / 2;
+
+    T diff = max3(tdiff0, tdiff1, tdiff2);
+
+    if (!skip_check) {
+        T maxi = max3(p2 - p3, p2 - p1, min(p0 - p1, p4 - p3));
+        T mini = min3(p2 - p3, p2 - p1, max(p0 - p1, p4 - p3));
+        diff = max3(diff, mini, -maxi);
+    }
+
+    return clamp(spatial_pred, p2 - diff, p2 + diff);
+}
+
+#define T float2
+template <>
+T spatial_predictor<T>(T a, T b, T c, T d, T e, T f, T g,
+                       T h, T i, T j, T k, T l, T m, T n)
+{
+    return T(
+        spatial_predictor(a.x, b.x, c.x, d.x, e.x, f.x, g.x,
+                          h.x, i.x, j.x, k.x, l.x, m.x, n.x),
+        spatial_predictor(a.y, b.y, c.y, d.y, e.y, f.y, g.y,
+                          h.y, i.y, j.y, k.y, l.y, m.y, n.y)
+    );
+}
+
+template <>
+T temporal_predictor<T>(T A, T B, T C, T D, T E, T F,
+                        T G, T H, T I, T J, T K, T L,
+                        T spatial_pred, bool skip_check)
+{
+    return T(
+        temporal_predictor(A.x, B.x, C.x, D.x, E.x, F.x,
+                           G.x, H.x, I.x, J.x, K.x, L.x,
+                           spatial_pred.x, skip_check),
+        temporal_predictor(A.y, B.y, C.y, D.y, E.y, F.y,
+                           G.y, H.y, I.y, J.y, K.y, L.y,
+                           spatial_pred.y, skip_check)
+    );
+}
+#undef T
+
+/*
+ * YADIF compute
+ */
+
+template <typename T>
+T yadif_compute_spatial(
+    texture2d<float, accesstype> cur,
+    uint2 pos)
+{
+    // Calculate spatial prediction
+    T a = tex2D<T>(cur, pos.x - 3, pos.y - 1);
+    T b = tex2D<T>(cur, pos.x - 2, pos.y - 1);
+    T c = tex2D<T>(cur, pos.x - 1, pos.y - 1);
+    T d = tex2D<T>(cur, pos.x - 0, pos.y - 1);
+    T e = tex2D<T>(cur, pos.x + 1, pos.y - 1);
+    T f = tex2D<T>(cur, pos.x + 2, pos.y - 1);
+    T g = tex2D<T>(cur, pos.x + 3, pos.y - 1);
+
+    T h = tex2D<T>(cur, pos.x - 3, pos.y + 1);
+    T i = tex2D<T>(cur, pos.x - 2, pos.y + 1);
+    T j = tex2D<T>(cur, pos.x - 1, pos.y + 1);
+    T k = tex2D<T>(cur, pos.x - 0, pos.y + 1);
+    T l = tex2D<T>(cur, pos.x + 1, pos.y + 1);
+    T m = tex2D<T>(cur, pos.x + 2, pos.y + 1);
+    T n = tex2D<T>(cur, pos.x + 3, pos.y + 1);
+
+    return spatial_predictor(a, b, c, d, e, f, g,
+                             h, i, j, k, l, m, n);
+}
+
+template <typename T>
+T yadif_compute_temporal(
+    texture2d<float, accesstype> cur,
+    texture2d<float, accesstype> prev2,
+    texture2d<float, accesstype> prev1,
+    texture2d<float, accesstype> next1,
+    texture2d<float, accesstype> next2,
+    T spatial_pred,
+    bool skip_spatial_check,
+    uint2 pos)
+{
+    // Calculate temporal prediction
+    T A = tex2D<T>(prev2, pos.x, pos.y - 1);
+    T B = tex2D<T>(prev2, pos.x, pos.y + 1);
+    T C = tex2D<T>(prev1, pos.x, pos.y - 2);
+    T D = tex2D<T>(prev1, pos.x, pos.y + 0);
+    T E = tex2D<T>(prev1, pos.x, pos.y + 2);
+    T F = tex2D<T>(cur,   pos.x, pos.y - 1);
+    T G = tex2D<T>(cur,   pos.x, pos.y + 1);
+    T H = tex2D<T>(next1, pos.x, pos.y - 2);
+    T I = tex2D<T>(next1, pos.x, pos.y + 0);
+    T J = tex2D<T>(next1, pos.x, pos.y + 2);
+    T K = tex2D<T>(next2, pos.x, pos.y - 1);
+    T L = tex2D<T>(next2, pos.x, pos.y + 1);
+
+    return temporal_predictor(A, B, C, D, E, F, G, H, I, J, K, L,
+                              spatial_pred, skip_spatial_check);
+}
+
+template <typename T>
+T yadif(
+    texture2d<float, access::write> dst,
+    texture2d<float, accesstype> prev,
+    texture2d<float, accesstype> cur,
+    texture2d<float, accesstype> next,
+    constant deintParams& params,
+    uint2 pos)
+{
+    T spatial_pred = yadif_compute_spatial<T>(cur, pos);
+
+    if (params.is_second_field) {
+        return yadif_compute_temporal(cur, prev, cur, next, next, spatial_pred, params.skip_spatial_check, pos);
+    } else {
+        return yadif_compute_temporal(cur, prev, prev, cur, next, spatial_pred, params.skip_spatial_check, pos);
+    }
+}
+
+/*
+ * Kernel dispatch
+ */
+
+kernel void deint(
+    texture2d<float, access::write> dst [[texture(0)]],
+    texture2d<float, accesstype> prev [[texture(1)]],
+    texture2d<float, accesstype> cur  [[texture(2)]],
+    texture2d<float, accesstype> next [[texture(3)]],
+    constant deintParams& params [[buffer(4)]],
+    uint2 pos [[thread_position_in_grid]])
+{
+    if ((pos.x >= dst.get_width()) ||
+        (pos.y >= dst.get_height())) {
+        return;
+    }
+
+    // Don't modify the primary field
+    if (pos.y % 2 == params.parity) {
+        float4 in = cur.read(pos);
+        dst.write(in, pos);
+        return;
+    }
+
+    float2 pred;
+    if (params.channels == 1)
+        pred = float2(yadif<float>(dst, prev, cur, next, params, pos));
+    else
+        pred = yadif<float2>(dst, prev, cur, next, params, pos);
+    dst.write(pred.xyyy, pos);
+}
diff --git a/libavfilter/vf_yadif_videotoolbox.m b/libavfilter/vf_yadif_videotoolbox.m
new file mode 100644
index 0000000000..af83a73e89
--- /dev/null
+++ b/libavfilter/vf_yadif_videotoolbox.m
@@ -0,0 +1,406 @@ 
+/*
+ * Copyright (C) 2018 Philip Langdale <philipl@overt.org>
+ *               2020 Aman Karmani <aman@tmm1.net>
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include "internal.h"
+#include "yadif.h"
+#include <libavutil/avassert.h>
+#include <libavutil/hwcontext.h>
+#include <libavutil/objc.h>
+#include <libavfilter/metal/utils.h>
+
+extern char ff_vf_yadif_videotoolbox_metallib_data[];
+extern unsigned int ff_vf_yadif_videotoolbox_metallib_len;
+
+typedef struct YADIFVTContext {
+    YADIFContext yadif;
+
+    AVBufferRef       *device_ref;
+    AVBufferRef       *input_frames_ref;
+    AVHWFramesContext *input_frames;
+
+    id<MTLDevice> mtlDevice;
+    id<MTLLibrary> mtlLibrary;
+    id<MTLCommandQueue> mtlQueue;
+    id<MTLComputePipelineState> mtlPipeline;
+    id<MTLFunction> mtlFunction;
+    id<MTLBuffer> mtlParamsBuffer;
+
+    CVMetalTextureCacheRef textureCache;
+} YADIFVTContext;
+
+struct mtlYadifParams {
+    uint channels;
+    uint parity;
+    uint tff;
+    bool is_second_field;
+    bool skip_spatial_check;
+    int field_mode;
+};
+
+static void call_kernel(AVFilterContext *ctx,
+                        id<MTLTexture> dst,
+                        id<MTLTexture> prev,
+                        id<MTLTexture> cur,
+                        id<MTLTexture> next,
+                        int channels,
+                        int parity,
+                        int tff)
+{
+    YADIFVTContext *s = ctx->priv;
+    id<MTLCommandBuffer> buffer = s->mtlQueue.commandBuffer;
+    id<MTLComputeCommandEncoder> encoder = buffer.computeCommandEncoder;
+    struct mtlYadifParams *params = (struct mtlYadifParams *)s->mtlParamsBuffer.contents;
+    *params = (struct mtlYadifParams){
+        .channels = channels,
+        .parity = parity,
+        .tff = tff,
+        .is_second_field = !(parity ^ tff),
+        .skip_spatial_check = s->yadif.mode&2,
+        .field_mode = s->yadif.current_field
+    };
+
+    [encoder setTexture:dst  atIndex:0];
+    [encoder setTexture:prev atIndex:1];
+    [encoder setTexture:cur  atIndex:2];
+    [encoder setTexture:next atIndex:3];
+    [encoder setBuffer:s->mtlParamsBuffer offset:0 atIndex:4];
+    ff_metal_compute_encoder_dispatch(s->mtlDevice, s->mtlPipeline, encoder, dst.width, dst.height);
+    [encoder endEncoding];
+
+    [buffer commit];
+    [buffer waitUntilCompleted];
+
+    ff_objc_release(&encoder);
+    ff_objc_release(&buffer);
+}
+
+static void filter(AVFilterContext *ctx, AVFrame *dst,
+                   int parity, int tff)
+{
+    YADIFVTContext *s = ctx->priv;
+    YADIFContext *y = &s->yadif;
+    int i;
+
+    for (i = 0; i < y->csp->nb_components; i++) {
+        int pixel_size, channels;
+        const AVComponentDescriptor *comp = &y->csp->comp[i];
+        CVMetalTextureRef prev, cur, next, dest;
+        id<MTLTexture> tex_prev, tex_cur, tex_next, tex_dest;
+        MTLPixelFormat format;
+
+        if (comp->plane < i) {
+            // We process planes as a whole, so don't reprocess
+            // them for additional components
+            continue;
+        }
+
+        pixel_size = (comp->depth + comp->shift) / 8;
+        channels = comp->step / pixel_size;
+        if (pixel_size > 2 || channels > 2) {
+            av_log(ctx, AV_LOG_ERROR, "Unsupported pixel format: %s\n", y->csp->name);
+            goto exit;
+        }
+        switch (pixel_size) {
+        case 1:
+            format = channels == 1 ? MTLPixelFormatR8Unorm : MTLPixelFormatRG8Unorm;
+            break;
+        case 2:
+            format = channels == 1 ? MTLPixelFormatR16Unorm : MTLPixelFormatRG16Unorm;
+            break;
+        default:
+            av_log(ctx, AV_LOG_ERROR, "Unsupported pixel format: %s\n", y->csp->name);
+            goto exit;
+        }
+        av_log(ctx, AV_LOG_TRACE,
+               "Deinterlacing plane %d: pixel_size: %d channels: %d\n",
+               comp->plane, pixel_size, channels);
+
+        prev = ff_metal_texture_from_pixbuf(ctx, s->textureCache, (CVPixelBufferRef)y->prev->data[3], i, format);
+        cur  = ff_metal_texture_from_pixbuf(ctx, s->textureCache, (CVPixelBufferRef)y->cur->data[3], i, format);
+        next = ff_metal_texture_from_pixbuf(ctx, s->textureCache, (CVPixelBufferRef)y->next->data[3], i, format);
+        dest = ff_metal_texture_from_pixbuf(ctx, s->textureCache, (CVPixelBufferRef)dst->data[3], i, format);
+
+        tex_prev = CVMetalTextureGetTexture(prev);
+        tex_cur  = CVMetalTextureGetTexture(cur);
+        tex_next = CVMetalTextureGetTexture(next);
+        tex_dest = CVMetalTextureGetTexture(dest);
+
+        call_kernel(ctx, tex_dest, tex_prev, tex_cur, tex_next,
+                         channels, parity, tff);
+
+        CFRelease(prev);
+        CFRelease(cur);
+        CFRelease(next);
+        CFRelease(dest);
+    }
+
+    CVBufferPropagateAttachments((CVPixelBufferRef)y->cur->data[3], (CVPixelBufferRef)dst->data[3]);
+
+    if (y->current_field == YADIF_FIELD_END) {
+        y->current_field = YADIF_FIELD_NORMAL;
+    }
+
+exit:
+    return;
+}
+
+static av_cold void yadif_videotoolbox_uninit(AVFilterContext *ctx)
+{
+    YADIFVTContext *s = ctx->priv;
+    YADIFContext *y = &s->yadif;
+
+    av_frame_free(&y->prev);
+    av_frame_free(&y->cur);
+    av_frame_free(&y->next);
+
+    av_buffer_unref(&s->device_ref);
+    av_buffer_unref(&s->input_frames_ref);
+    s->input_frames = NULL;
+
+    ff_objc_release(&s->mtlParamsBuffer);
+    ff_objc_release(&s->mtlFunction);
+    ff_objc_release(&s->mtlPipeline);
+    ff_objc_release(&s->mtlQueue);
+    ff_objc_release(&s->mtlLibrary);
+    ff_objc_release(&s->mtlDevice);
+
+    if (s->textureCache) {
+        CFRelease(s->textureCache);
+        s->textureCache = NULL;
+    }
+}
+
+static av_cold int yadif_videotoolbox_init(AVFilterContext *ctx)
+{
+    YADIFVTContext *s = ctx->priv;
+    NSError *err = nil;
+    CVReturn ret;
+
+    s->mtlDevice = MTLCreateSystemDefaultDevice();
+    if (!s->mtlDevice) {
+        av_log(ctx, AV_LOG_ERROR, "Unable to find Metal device\n");
+        goto fail;
+    }
+
+    av_log(ctx, AV_LOG_INFO, "Using Metal device: %s\n", s->mtlDevice.name.UTF8String);
+
+    dispatch_data_t libData = dispatch_data_create(
+        ff_vf_yadif_videotoolbox_metallib_data,
+        ff_vf_yadif_videotoolbox_metallib_len,
+        nil,
+        nil);
+    s->mtlLibrary = [s->mtlDevice newLibraryWithData:libData error:&err];
+    dispatch_release(libData);
+    libData = nil;
+    if (err) {
+        av_log(ctx, AV_LOG_ERROR, "Failed to load Metal library: %s\n", err.description.UTF8String);
+        goto fail;
+    }
+
+    s->mtlFunction = [s->mtlLibrary newFunctionWithName:@"deint"];
+    if (!s->mtlFunction) {
+        av_log(ctx, AV_LOG_ERROR, "Failed to create Metal function!\n");
+        goto fail;
+    }
+
+    s->mtlQueue = s->mtlDevice.newCommandQueue;
+    if (!s->mtlQueue) {
+        av_log(ctx, AV_LOG_ERROR, "Failed to create Metal command queue!\n");
+        goto fail;
+    }
+
+    s->mtlPipeline = [s->mtlDevice
+        newComputePipelineStateWithFunction:s->mtlFunction
+        error:&err];
+    if (err) {
+        av_log(ctx, AV_LOG_ERROR, "Failed to create Metal compute pipeline: %s\n", err.description.UTF8String);
+        goto fail;
+    }
+
+    s->mtlParamsBuffer = [s->mtlDevice
+        newBufferWithLength:sizeof(struct mtlYadifParams)
+        options:MTLResourceStorageModeShared];
+    if (!s->mtlParamsBuffer) {
+        av_log(ctx, AV_LOG_ERROR, "Failed to create Metal buffer for parameters\n");
+        goto fail;
+    }
+
+    ret = CVMetalTextureCacheCreate(
+        NULL,
+        NULL,
+        s->mtlDevice,
+        NULL,
+        &s->textureCache
+    );
+    if (ret != kCVReturnSuccess) {
+        av_log(ctx, AV_LOG_ERROR, "Failed to create CVMetalTextureCache: %d\n", ret);
+        goto fail;
+    }
+
+    return 0;
+fail:
+    yadif_videotoolbox_uninit(ctx);
+    return AVERROR_EXTERNAL;
+}
+
+static int config_input(AVFilterLink *inlink)
+{
+    AVFilterContext *ctx = inlink->dst;
+    YADIFVTContext *s = ctx->priv;
+
+    if (!inlink->hw_frames_ctx) {
+        av_log(ctx, AV_LOG_ERROR, "A hardware frames reference is "
+               "required to associate the processing device.\n");
+        return AVERROR(EINVAL);
+    }
+
+    s->input_frames_ref = av_buffer_ref(inlink->hw_frames_ctx);
+    if (!s->input_frames_ref) {
+        av_log(ctx, AV_LOG_ERROR, "A input frames reference create "
+               "failed.\n");
+        return AVERROR(ENOMEM);
+    }
+    s->input_frames = (AVHWFramesContext*)s->input_frames_ref->data;
+
+    return 0;
+}
+
+static int config_output(AVFilterLink *link)
+{
+    AVHWFramesContext *output_frames;
+    AVFilterContext *ctx = link->src;
+    YADIFVTContext *s = ctx->priv;
+    YADIFContext *y = &s->yadif;
+    int ret = 0;
+
+    av_assert0(s->input_frames);
+    s->device_ref = av_buffer_ref(s->input_frames->device_ref);
+    if (!s->device_ref) {
+        av_log(ctx, AV_LOG_ERROR, "A device reference create "
+               "failed.\n");
+        return AVERROR(ENOMEM);
+    }
+
+    link->hw_frames_ctx = av_hwframe_ctx_alloc(s->device_ref);
+    if (!link->hw_frames_ctx) {
+        av_log(ctx, AV_LOG_ERROR, "Failed to create HW frame context "
+               "for output.\n");
+        ret = AVERROR(ENOMEM);
+        goto exit;
+    }
+
+    output_frames = (AVHWFramesContext*)link->hw_frames_ctx->data;
+
+    output_frames->format    = AV_PIX_FMT_VIDEOTOOLBOX;
+    output_frames->sw_format = s->input_frames->sw_format;
+    output_frames->width     = ctx->inputs[0]->w;
+    output_frames->height    = ctx->inputs[0]->h;
+
+    ret = ff_filter_init_hw_frames(ctx, link, 10);
+    if (ret < 0)
+        goto exit;
+
+    ret = av_hwframe_ctx_init(link->hw_frames_ctx);
+    if (ret < 0) {
+        av_log(ctx, AV_LOG_ERROR, "Failed to initialise VideoToolbox frame "
+               "context for output: %d\n", ret);
+        goto exit;
+    }
+
+    link->time_base.num = ctx->inputs[0]->time_base.num;
+    link->time_base.den = ctx->inputs[0]->time_base.den * 2;
+    link->w             = ctx->inputs[0]->w;
+    link->h             = ctx->inputs[0]->h;
+
+    if(y->mode & 1)
+        link->frame_rate = av_mul_q(ctx->inputs[0]->frame_rate,
+                                    (AVRational){2, 1});
+
+    if (link->w < 3 || link->h < 3) {
+        av_log(ctx, AV_LOG_ERROR, "Video of less than 3 columns or lines is not supported\n");
+        ret = AVERROR(EINVAL);
+        goto exit;
+    }
+
+    y->csp = av_pix_fmt_desc_get(output_frames->sw_format);
+    y->filter = filter;
+
+exit:
+    return ret;
+}
+
+#define FLAGS AV_OPT_FLAG_VIDEO_PARAM|AV_OPT_FLAG_FILTERING_PARAM
+#define CONST(name, help, val, unit) { name, help, 0, AV_OPT_TYPE_CONST, {.i64=val}, INT_MIN, INT_MAX, FLAGS, unit }
+
+static const AVOption yadif_videotoolbox_options[] = {
+    #define OFFSET(x) offsetof(YADIFContext, x)
+    { "mode",   "specify the interlacing mode", OFFSET(mode), AV_OPT_TYPE_INT, {.i64=YADIF_MODE_SEND_FRAME}, 0, 3, FLAGS, "mode"},
+    CONST("send_frame",           "send one frame for each frame",                                     YADIF_MODE_SEND_FRAME,           "mode"),
+    CONST("send_field",           "send one frame for each field",                                     YADIF_MODE_SEND_FIELD,           "mode"),
+    CONST("send_frame_nospatial", "send one frame for each frame, but skip spatial interlacing check", YADIF_MODE_SEND_FRAME_NOSPATIAL, "mode"),
+    CONST("send_field_nospatial", "send one frame for each field, but skip spatial interlacing check", YADIF_MODE_SEND_FIELD_NOSPATIAL, "mode"),
+
+    { "parity", "specify the assumed picture field parity", OFFSET(parity), AV_OPT_TYPE_INT, {.i64=YADIF_PARITY_AUTO}, -1, 1, FLAGS, "parity" },
+    CONST("tff",  "assume top field first",    YADIF_PARITY_TFF,  "parity"),
+    CONST("bff",  "assume bottom field first", YADIF_PARITY_BFF,  "parity"),
+    CONST("auto", "auto detect parity",        YADIF_PARITY_AUTO, "parity"),
+
+    { "deint", "specify which frames to deinterlace", OFFSET(deint), AV_OPT_TYPE_INT, {.i64=YADIF_DEINT_ALL}, 0, 1, FLAGS, "deint" },
+    CONST("all",        "deinterlace all frames",                       YADIF_DEINT_ALL,        "deint"),
+    CONST("interlaced", "only deinterlace frames marked as interlaced", YADIF_DEINT_INTERLACED, "deint"),
+    #undef OFFSET
+
+    { NULL }
+};
+
+AVFILTER_DEFINE_CLASS(yadif_videotoolbox);
+
+static const AVFilterPad yadif_videotoolbox_inputs[] = {
+    {
+        .name          = "default",
+        .type          = AVMEDIA_TYPE_VIDEO,
+        .filter_frame  = ff_yadif_filter_frame,
+        .config_props  = config_input,
+    },
+};
+
+static const AVFilterPad yadif_videotoolbox_outputs[] = {
+    {
+        .name          = "default",
+        .type          = AVMEDIA_TYPE_VIDEO,
+        .request_frame = ff_yadif_request_frame,
+        .config_props  = config_output,
+    },
+};
+
+AVFilter ff_vf_yadif_videotoolbox = {
+    .name           = "yadif_videotoolbox",
+    .description    = NULL_IF_CONFIG_SMALL("YADIF for VideoToolbox frames using Metal compute"),
+    .priv_size      = sizeof(YADIFVTContext),
+    .priv_class     = &yadif_videotoolbox_class,
+    .init           = yadif_videotoolbox_init,
+    .uninit         = yadif_videotoolbox_uninit,
+    FILTER_SINGLE_PIXFMT(AV_PIX_FMT_VIDEOTOOLBOX),
+    FILTER_INPUTS(yadif_videotoolbox_inputs),
+    FILTER_OUTPUTS(yadif_videotoolbox_outputs),
+    .flags          = AVFILTER_FLAG_SUPPORT_TIMELINE_INTERNAL,
+    .flags_internal = FF_FILTER_FLAG_HWFRAME_AWARE,
+};