diff mbox series

[FFmpeg-devel,WIP,RFC] avfilter: add opencl v360 filter

Message ID 20211016092616.572373-1-onemda@gmail.com
State New
Headers show
Series [FFmpeg-devel,WIP,RFC] avfilter: add opencl v360 filter | expand

Checks

Context Check Description
andriy/configurex86 warning Failed to apply patch
andriy/configureppc warning Failed to apply patch

Commit Message

Paul B Mahol Oct. 16, 2021, 9:26 a.m. UTC
Signed-off-by: Paul B Mahol <onemda@gmail.com>
---

For now just equirectangular to flat conversion, but could be
with some effort extended with all formats supported by v360 filter,
minus non-padded stuff, but that is not present in normal usecases.

---
 libavfilter/Makefile         |   2 +
 libavfilter/allfilters.c     |   1 +
 libavfilter/opencl/v360.cl   | 158 +++++++++++
 libavfilter/opencl_source.h  |   1 +
 libavfilter/vf_v360_opencl.c | 505 +++++++++++++++++++++++++++++++++++
 5 files changed, 667 insertions(+)
 create mode 100644 libavfilter/opencl/v360.cl
 create mode 100644 libavfilter/vf_v360_opencl.c
diff mbox series

Patch

diff --git a/libavfilter/Makefile b/libavfilter/Makefile
index 358f121cb4..eb5365a739 100644
--- a/libavfilter/Makefile
+++ b/libavfilter/Makefile
@@ -485,6 +485,8 @@  OBJS-$(CONFIG_UNSHARP_OPENCL_FILTER)         += vf_unsharp_opencl.o opencl.o \
 OBJS-$(CONFIG_UNTILE_FILTER)                 += vf_untile.o
 OBJS-$(CONFIG_USPP_FILTER)                   += vf_uspp.o qp_table.o
 OBJS-$(CONFIG_V360_FILTER)                   += vf_v360.o
+OBJS-$(CONFIG_V360_OPENCL_FILTER)            += vf_v360_opencl.o opencl.o \
+                                                opencl/v360.o
 OBJS-$(CONFIG_VAGUEDENOISER_FILTER)          += vf_vaguedenoiser.o
 OBJS-$(CONFIG_VARBLUR_FILTER)                += vf_varblur.o framesync.o
 OBJS-$(CONFIG_VECTORSCOPE_FILTER)            += vf_vectorscope.o
diff --git a/libavfilter/allfilters.c b/libavfilter/allfilters.c
index 409ab5d3c4..04f1925c14 100644
--- a/libavfilter/allfilters.c
+++ b/libavfilter/allfilters.c
@@ -462,6 +462,7 @@  extern const AVFilter ff_vf_unsharp_opencl;
 extern const AVFilter ff_vf_untile;
 extern const AVFilter ff_vf_uspp;
 extern const AVFilter ff_vf_v360;
+extern const AVFilter ff_vf_v360_opencl;
 extern const AVFilter ff_vf_vaguedenoiser;
 extern const AVFilter ff_vf_varblur;
 extern const AVFilter ff_vf_vectorscope;
diff --git a/libavfilter/opencl/v360.cl b/libavfilter/opencl/v360.cl
new file mode 100644
index 0000000000..003c188249
--- /dev/null
+++ b/libavfilter/opencl/v360.cl
@@ -0,0 +1,158 @@ 
+/*
+ * Copyright (c) 2021 Paul B Mahol
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+const sampler_t sampler = (CLK_NORMALIZED_COORDS_FALSE | CLK_FILTER_LINEAR);
+
+static float scale(float x, float s)
+{
+    return (0.5 * x + 0.5) * (s - 1.);
+}
+
+static float rescale(int x, float s)
+{
+    return (2. * x + 1.) / s - 1.;
+}
+
+__kernel void equirect_to_xyz(__write_only image2d_t dst)
+{
+    int2 p = (int2)(get_global_id(0), get_global_id(1));
+    int2 size = (int2)(get_global_size(0), get_global_size(1));
+    float2 f;
+
+    f.x = rescale(p.x, size.x);
+    f.y = rescale(p.y, size.y);
+
+    float sin_phi   = sin(f.x);
+    float cos_phi   = cos(f.x);
+    float sin_theta = sin(f.y);
+    float cos_theta = cos(f.y);
+
+    float4 vec;
+
+    vec.x = cos_theta * sin_phi;
+    vec.y = sin_theta;
+    vec.z = cos_theta * cos_phi;
+
+    write_imagef(dst, p, vec);
+}
+
+__kernel void flat_to_xyz(global float3 *dst,
+                          float2 flat_range)
+{
+    int2 p = (int2)(get_global_id(0), get_global_id(1));
+    int2 size = (int2)(get_global_size(0), get_global_size(1));
+    float2 f;
+
+    f.x = flat_range.x * rescale(p.x, size.x);
+    f.y = flat_range.y * rescale(p.y, size.y);
+
+    float3 vec;
+
+    vec.x = f.x;
+    vec.y = f.y;
+    vec.z = 1.0;
+
+    vec = normalize(vec);
+
+    dst[p.y * size.x + p.x] = vec;
+}
+
+__kernel void xyz_to_equirect(global float2 *dst,
+                              float2 iflat_range,
+                              global float3 *m,
+                              __read_only image2d_t src)
+{
+    int2 p = (int2)(get_global_id(0), get_global_id(1));
+    int2 size = (int2)(get_global_size(0), get_global_size(1));
+
+    float3 vec = m[p.x + size.x * p.y];
+
+    const float phi   = atan2(vec.x, vec.z) / iflat_range.x;
+    const float theta = asin(vec.y) / iflat_range.y;
+
+    float2 uv;
+
+    uv.x = scale(phi, size.x);
+    uv.y = scale(theta, size.y);
+
+    dst[p.x + p.y * size.x] = uv;
+}
+
+__kernel void remap(__write_only image2d_t dst,
+                    __read_only image2d_t src,
+                    global float2 *remap)
+{
+    int2 p = (int2)(get_global_id(0), get_global_id(1));
+    int2 size = (int2)(get_global_size(0), get_global_size(1));
+
+    float2 f = remap[p.y * size.x + p.x];
+    float4 v = read_imagef(src, sampler, f.xy);
+
+    write_imagef(dst, p, v);
+}
+
+static float4 multiply_quaternion(float4 a, float4 b)
+{
+    float4 c;
+
+    c.s0 = a.s0 * b.s0 - a.s1 * b.s1 - a.s2 * b.s2 - a.s3 * b.s3;
+    c.s1 = a.s1 * b.s0 + a.s0 * b.s1 + a.s2 * b.s3 - a.s3 * b.s2;
+    c.s2 = a.s2 * b.s0 + a.s0 * b.s2 + a.s3 * b.s1 - a.s1 * b.s3;
+    c.s3 = a.s3 * b.s0 + a.s0 * b.s3 + a.s1 * b.s2 - a.s2 * b.s1;
+
+    return c;
+}
+
+__kernel void rotate(global float3 *dst,
+                     float8 quaternion)
+{
+    int2 size = (int2)(get_global_size(0), get_global_size(1));
+    int2 p = (int2)(get_global_id(0), get_global_id(1));
+    float4 qv;
+    float4 temp;
+    float4 rqv;
+
+    float3 vec = dst[p.y * size.x + p.x];
+
+    qv.x = 0.;
+    qv.s123 = vec.xyz;
+
+    temp = multiply_quaternion(quaternion.s0123, qv);
+    rqv  = multiply_quaternion(temp, quaternion.s4567);
+
+    vec.xyz = rqv.s123;
+
+    vec = normalize(vec);
+
+    dst[p.y * size.x + p.x] = vec;
+}
+
+__kernel void mirror(global float3 *dst,
+                     float3 mirror)
+{
+    int2 size = (int2)(get_global_size(0), get_global_size(1));
+    int2 p = (int2)(get_global_id(0), get_global_id(1));
+
+    float3 vec = dst[p.y * size.x + p.x];
+
+    vec.xyz *= mirror.xyz;
+
+    dst[p.y * size.x + p.x] = vec;
+}
diff --git a/libavfilter/opencl_source.h b/libavfilter/opencl_source.h
index 7e8133090e..5327b5c46b 100644
--- a/libavfilter/opencl_source.h
+++ b/libavfilter/opencl_source.h
@@ -31,6 +31,7 @@  extern const char *ff_opencl_source_pad;
 extern const char *ff_opencl_source_tonemap;
 extern const char *ff_opencl_source_transpose;
 extern const char *ff_opencl_source_unsharp;
+extern const char *ff_opencl_source_v360;
 extern const char *ff_opencl_source_xfade;
 
 #endif /* AVFILTER_OPENCL_SOURCE_H */
diff --git a/libavfilter/vf_v360_opencl.c b/libavfilter/vf_v360_opencl.c
new file mode 100644
index 0000000000..010953a363
--- /dev/null
+++ b/libavfilter/vf_v360_opencl.c
@@ -0,0 +1,505 @@ 
+/*
+ * Copyright (c) 2021 Paul B Mahol
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+#include <float.h>
+
+#include "libavutil/avassert.h"
+#include "libavutil/common.h"
+#include "libavutil/imgutils.h"
+#include "libavutil/mem.h"
+#include "libavutil/opt.h"
+#include "libavutil/pixdesc.h"
+
+#include "avfilter.h"
+#include "internal.h"
+#include "opencl.h"
+#include "opencl_source.h"
+#include "v360.h"
+#include "video.h"
+
+typedef struct V360OpenCLContext {
+    OpenCLFilterContext   ocf;
+    int                   initialised;
+
+    int                   in;
+    int                   out;
+    int                   prev_in[2];
+    int                   prev_out[2];
+    float                 h_fov;
+    float                 v_fov;
+    float                 d_fov;
+    float                 ih_fov;
+    float                 iv_fov;
+    float                 id_fov;
+    float                 yaw;
+    float                 pitch;
+    float                 roll;
+    char                 *rorder;
+    int                   ih_flip, iv_flip;
+    int                   h_flip, v_flip, d_flip;
+
+    cl_kernel             in_kernel;
+    cl_kernel             out_kernel;
+    cl_kernel             rotate_kernel;
+    cl_kernel             mirror_kernel;
+    cl_kernel             remap_kernel;
+    cl_mem                vectors[2];
+    cl_mem                remap[2];
+    cl_command_queue      command_queue;
+
+    float                 flat_range[2];
+    float                 iflat_range[2];
+    float                 output_mirror_modifier[3];
+    float                 rot_quaternion[2][4];
+
+    int                   rotation_order[3];
+} V360OpenCLContext;
+
+static int v360_opencl_init(AVFilterContext *avctx, int width, int height)
+{
+    V360OpenCLContext *ctx = avctx->priv;
+    cl_int cle;
+    int err;
+
+    err = ff_opencl_filter_load_program(avctx, &ff_opencl_source_v360, 1);
+    if (err < 0)
+        goto fail;
+
+    ctx->command_queue = clCreateCommandQueue(ctx->ocf.hwctx->context,
+                                              ctx->ocf.hwctx->device_id,
+                                              0, &cle);
+    CL_FAIL_ON_ERROR(AVERROR(EIO), "Failed to create OpenCL "
+                     "command queue %d.\n", cle);
+
+    ctx->in_kernel = clCreateKernel(ctx->ocf.program,
+                                    "xyz_to_equirect", &cle);
+    CL_FAIL_ON_ERROR(AVERROR(EIO), "Failed to create "
+                     "input_format kernel %d.\n", cle);
+
+    ctx->rotate_kernel = clCreateKernel(ctx->ocf.program,
+                                        "rotate", &cle);
+    CL_FAIL_ON_ERROR(AVERROR(EIO), "Failed to create "
+                     "rotate kernel %d.\n", cle);
+
+    ctx->mirror_kernel = clCreateKernel(ctx->ocf.program,
+                                        "mirror", &cle);
+    CL_FAIL_ON_ERROR(AVERROR(EIO), "Failed to create "
+                     "mirror kernel %d.\n", cle);
+
+    ctx->out_kernel = clCreateKernel(ctx->ocf.program,
+                                     "flat_to_xyz", &cle);
+    CL_FAIL_ON_ERROR(AVERROR(EIO), "Failed to create "
+                     "output_format kernel %d.\n", cle);
+
+    ctx->remap_kernel = clCreateKernel(ctx->ocf.program,
+                                       "remap", &cle);
+    CL_FAIL_ON_ERROR(AVERROR(EIO), "Failed to create "
+                     "remap kernel %d.\n", cle);
+
+    ctx->vectors[0] = clCreateBuffer(ctx->ocf.hwctx->context, 0,
+                                     width * height * sizeof(cl_float3),
+                                     NULL, &cle);
+    CL_FAIL_ON_ERROR(AVERROR(EIO), "Failed to create "
+                     "vectors image %d.\n", cle);
+
+    ctx->vectors[1] = clCreateBuffer(ctx->ocf.hwctx->context, 0,
+                                     width * height * sizeof(cl_float3),
+                                     NULL, &cle);
+    CL_FAIL_ON_ERROR(AVERROR(EIO), "Failed to create "
+                     "vectors image %d.\n", cle);
+
+    ctx->remap[0] = clCreateBuffer(ctx->ocf.hwctx->context, 0,
+                                   width * height * sizeof(cl_float2),
+                                   NULL, &cle);
+    CL_FAIL_ON_ERROR(AVERROR(EIO), "Failed to create "
+                     "remap image %d.\n", cle);
+
+    ctx->remap[1] = clCreateBuffer(ctx->ocf.hwctx->context, 0,
+                                   width * height * sizeof(cl_float2),
+                                   NULL, &cle);
+    CL_FAIL_ON_ERROR(AVERROR(EIO), "Failed to create "
+                     "remap image %d.\n", cle);
+
+    ctx->initialised = 1;
+    return 0;
+
+fail:
+    CL_RELEASE_KERNEL(ctx->in_kernel);
+    CL_RELEASE_KERNEL(ctx->out_kernel);
+    CL_RELEASE_KERNEL(ctx->rotate_kernel);
+    CL_RELEASE_KERNEL(ctx->mirror_kernel);
+    CL_RELEASE_KERNEL(ctx->remap_kernel);
+
+    CL_RELEASE_MEMORY(ctx->vectors[0]);
+    CL_RELEASE_MEMORY(ctx->vectors[1]);
+
+    CL_RELEASE_MEMORY(ctx->remap[0]);
+    CL_RELEASE_MEMORY(ctx->remap[1]);
+
+    CL_RELEASE_QUEUE(ctx->command_queue);
+
+    return err;
+}
+
+static int v360_opencl_config_input(AVFilterLink *inlink)
+{
+    AVFilterContext *avctx = inlink->dst;
+    V360OpenCLContext *ctx = avctx->priv;
+
+    ctx->prev_in[0] = ctx->prev_out[0] = -1;
+    ctx->prev_in[1] = ctx->prev_out[1] = -1;
+
+    ctx->rot_quaternion[0][0] = 1.f;
+    ctx->rot_quaternion[0][1] = ctx->rot_quaternion[0][2] = ctx->rot_quaternion[0][3] = 0.f;
+
+    return ff_opencl_filter_config_input(inlink);
+}
+
+static void multiply_quaternion(float c[4], const float a[4], const float b[4])
+{
+    c[0] = a[0] * b[0] - a[1] * b[1] - a[2] * b[2] - a[3] * b[3];
+    c[1] = a[1] * b[0] + a[0] * b[1] + a[2] * b[3] - a[3] * b[2];
+    c[2] = a[2] * b[0] + a[0] * b[2] + a[3] * b[1] - a[1] * b[3];
+    c[3] = a[3] * b[0] + a[0] * b[3] + a[1] * b[2] - a[2] * b[1];
+}
+
+static void conjugate_quaternion(float d[4], const float q[4])
+{
+    d[0] =  q[0];
+    d[1] = -q[1];
+    d[2] = -q[2];
+    d[3] = -q[3];
+}
+
+static inline void set_mirror_modifier(int h_flip, int v_flip, int d_flip,
+                                       float *modifier)
+{
+    modifier[0] = h_flip ? -1.f : 1.f;
+    modifier[1] = v_flip ? -1.f : 1.f;
+    modifier[2] = d_flip ? -1.f : 1.f;
+}
+
+static inline void input_flip(int16_t u[4][4], int16_t v[4][4], int w, int h, int hflip, int vflip)
+{
+    if (hflip) {
+        for (int i = 0; i < 4; i++) {
+            for (int j = 0; j < 4; j++)
+                u[i][j] = w - 1 - u[i][j];
+        }
+    }
+
+    if (vflip) {
+        for (int i = 0; i < 4; i++) {
+            for (int j = 0; j < 4; j++)
+                v[i][j] = h - 1 - v[i][j];
+        }
+    }
+}
+
+static inline void calculate_rotation(float yaw, float pitch, float roll,
+                                      float rot_quaternion[2][4],
+                                      const int rotation_order[3])
+{
+    const float yaw_rad   = yaw   * M_PI / 180.f;
+    const float pitch_rad = pitch * M_PI / 180.f;
+    const float roll_rad  = roll  * M_PI / 180.f;
+
+    const float sin_yaw   = sinf(yaw_rad   * 0.5f);
+    const float cos_yaw   = cosf(yaw_rad   * 0.5f);
+    const float sin_pitch = sinf(pitch_rad * 0.5f);
+    const float cos_pitch = cosf(pitch_rad * 0.5f);
+    const float sin_roll  = sinf(roll_rad  * 0.5f);
+    const float cos_roll  = cosf(roll_rad  * 0.5f);
+
+    float m[3][4];
+    float tmp[2][4];
+
+    m[0][0] = cos_yaw;   m[0][1] = 0.f;       m[0][2] = sin_yaw; m[0][3] = 0.f;
+    m[1][0] = cos_pitch; m[1][1] = sin_pitch; m[1][2] = 0.f;     m[1][3] = 0.f;
+    m[2][0] = cos_roll;  m[2][1] = 0.f;       m[2][2] = 0.f;     m[2][3] = sin_roll;
+
+    multiply_quaternion(tmp[0], rot_quaternion[0], m[rotation_order[0]]);
+    multiply_quaternion(tmp[1], tmp[0], m[rotation_order[1]]);
+    multiply_quaternion(rot_quaternion[0], tmp[1], m[rotation_order[2]]);
+
+    conjugate_quaternion(rot_quaternion[1], rot_quaternion[0]);
+}
+
+static int get_rorder(char c)
+{
+    switch (c) {
+    case 'Y':
+    case 'y':
+        return YAW;
+    case 'P':
+    case 'p':
+        return PITCH;
+    case 'R':
+    case 'r':
+        return ROLL;
+    default:
+        return -1;
+    }
+}
+
+static int v360_opencl_filter_frame(AVFilterLink *inlink, AVFrame *input)
+{
+    AVFilterContext *avctx = inlink->dst;
+    AVFilterLink *outlink = avctx->outputs[0];
+    V360OpenCLContext *ctx = avctx->priv;
+    AVFrame *output = NULL;
+    AVHWFramesContext *input_frames_ctx;
+    enum AVPixelFormat in_format;
+    size_t global_work[2];
+    cl_mem src, dst;
+    int err, cle;
+
+    if (!input->hw_frames_ctx)
+        return AVERROR(EINVAL);
+    input_frames_ctx = (AVHWFramesContext*)input->hw_frames_ctx->data;
+    in_format = input_frames_ctx->sw_format;
+
+    output = ff_get_video_buffer(outlink, outlink->w, outlink->h);
+    if (!output) {
+        err = AVERROR(ENOMEM);
+        goto fail;
+    }
+
+    err = av_frame_copy_props(output, input);
+    if (err < 0)
+        goto fail;
+
+    if (!ctx->initialised) {
+        err = v360_opencl_init(avctx, inlink->w, inlink->h);
+        if (err < 0)
+            goto fail;
+    }
+
+    ctx->flat_range[0] = tanf(0.5f * ctx->h_fov * M_PI / 180.f);
+    ctx->flat_range[1] = tanf(0.5f * ctx->v_fov * M_PI / 180.f);
+
+    ctx->iflat_range[0] = ctx->ih_fov * M_PI / 360.f;
+    ctx->iflat_range[1] = ctx->iv_fov * M_PI / 360.f;
+
+    for (int order = 0; order < NB_RORDERS; order++) {
+        const char c = ctx->rorder[order];
+        int rorder;
+
+        if (c == '\0') {
+            av_log(ctx, AV_LOG_WARNING,
+                   "Incomplete rorder option. Direction for all 3 rotation orders should be specified. Switching to default rorder.\n");
+            ctx->rotation_order[0] = YAW;
+            ctx->rotation_order[1] = PITCH;
+            ctx->rotation_order[2] = ROLL;
+            break;
+        }
+
+        rorder = get_rorder(c);
+        if (rorder == -1) {
+            av_log(ctx, AV_LOG_WARNING,
+                   "Incorrect rotation order symbol '%c' in rorder option. Switching to default rorder.\n", c);
+            ctx->rotation_order[0] = YAW;
+            ctx->rotation_order[1] = PITCH;
+            ctx->rotation_order[2] = ROLL;
+            break;
+        }
+
+        ctx->rotation_order[order] = rorder;
+    }
+
+    calculate_rotation(ctx->yaw, ctx->pitch, ctx->roll,
+                       ctx->rot_quaternion, ctx->rotation_order);
+
+    set_mirror_modifier(ctx->h_flip, ctx->v_flip, ctx->d_flip, ctx->output_mirror_modifier);
+
+    for (int p = 0; p < FF_ARRAY_ELEMS(output->data); p++) {
+        const int pp = p > 0 && p < 3 ? 1 : 0;
+        src = (cl_mem) input->data[p];
+        dst = (cl_mem) output->data[p];
+
+        if (!dst || !src)
+            break;
+
+        err = ff_opencl_filter_work_size_from_image(avctx, global_work,
+                                                    output, p, 0);
+        if (err < 0)
+            goto fail;
+
+        if ((pp == p) && ctx->prev_out[pp] != ctx->out) {
+            CL_SET_KERNEL_ARG(ctx->out_kernel, 0, cl_mem, &ctx->vectors[pp]);
+            CL_SET_KERNEL_ARG(ctx->out_kernel, 1, cl_float2, &ctx->flat_range);
+
+            cle = clEnqueueNDRangeKernel(ctx->command_queue, ctx->out_kernel, 2, NULL,
+                                         global_work, NULL, 0, NULL, NULL);
+
+            CL_FAIL_ON_ERROR(AVERROR(EIO), "Failed to enqueue output_format kernel: %d.\n", cle);
+
+            ctx->prev_out[pp] = ctx->out;
+        }
+
+        if ((pp == p) && (ctx->yaw != 0.f || ctx->pitch != 0.f || ctx->roll != 0.f)) {
+            CL_SET_KERNEL_ARG(ctx->rotate_kernel, 0, cl_mem, &ctx->vectors[pp]);
+            CL_SET_KERNEL_ARG(ctx->rotate_kernel, 1, cl_float8, &ctx->rot_quaternion);
+
+            cle = clEnqueueNDRangeKernel(ctx->command_queue, ctx->rotate_kernel, 2, NULL,
+                                         global_work, NULL, 0, NULL, NULL);
+
+            CL_FAIL_ON_ERROR(AVERROR(EIO), "Failed to enqueue rotate kernel: %d.\n", cle);
+
+            ctx->prev_in[pp] = -1;
+        }
+
+        if ((pp == p) && (ctx->h_flip != 0 || ctx->v_flip != 0 || ctx->d_flip != 0)) {
+            CL_SET_KERNEL_ARG(ctx->mirror_kernel, 0, cl_mem, &ctx->vectors[pp]);
+            CL_SET_KERNEL_ARG(ctx->mirror_kernel, 1, cl_float3, &ctx->output_mirror_modifier);
+
+            cle = clEnqueueNDRangeKernel(ctx->command_queue, ctx->mirror_kernel, 2, NULL,
+                                         global_work, NULL, 0, NULL, NULL);
+
+            CL_FAIL_ON_ERROR(AVERROR(EIO), "Failed to enqueue mirror kernel: %d.\n", cle);
+
+            ctx->prev_in[pp] = -1;
+        }
+
+        if ((pp == p) && ctx->prev_in[pp] != ctx->in) {
+            CL_SET_KERNEL_ARG(ctx->in_kernel, 0, cl_mem, &ctx->remap[pp]);
+            CL_SET_KERNEL_ARG(ctx->in_kernel, 1, cl_float2, &ctx->iflat_range);
+            CL_SET_KERNEL_ARG(ctx->in_kernel, 2, cl_mem, &ctx->vectors[pp]);
+            CL_SET_KERNEL_ARG(ctx->in_kernel, 3, cl_mem, &src);
+
+            cle = clEnqueueNDRangeKernel(ctx->command_queue, ctx->in_kernel, 2, NULL,
+                                         global_work, NULL, 0, NULL, NULL);
+
+            CL_FAIL_ON_ERROR(AVERROR(EIO), "Failed to enqueue input_format kernel: %d.\n", cle);
+
+            ctx->prev_in[pp] = ctx->in;
+        }
+
+        CL_SET_KERNEL_ARG(ctx->remap_kernel, 0, cl_mem, &dst);
+        CL_SET_KERNEL_ARG(ctx->remap_kernel, 1, cl_mem, &src);
+        CL_SET_KERNEL_ARG(ctx->remap_kernel, 2, cl_mem, &ctx->remap[pp]);
+
+        cle = clEnqueueNDRangeKernel(ctx->command_queue, ctx->remap_kernel, 2, NULL,
+                                     global_work, NULL, 0, NULL, NULL);
+
+        CL_FAIL_ON_ERROR(AVERROR(EIO), "Failed to enqueue remap kernel: %d.\n", cle);
+
+    }
+
+    cle = clFlush(ctx->command_queue);
+    CL_FAIL_ON_ERROR(AVERROR(EIO), "Failed to flush command queue: %d.\n", cle);
+
+    cle = clFinish(ctx->command_queue);
+    CL_FAIL_ON_ERROR(AVERROR(EIO), "Failed to finish kernel: %d.\n", cle);
+
+    ctx->yaw = ctx->pitch = ctx->roll = 0.f;
+    ctx->h_flip = ctx->v_flip = ctx->d_flip = 0;
+
+    av_frame_free(&input);
+
+    return ff_filter_frame(outlink, output);
+
+fail:
+    clFinish(ctx->command_queue);
+    av_frame_free(&input);
+    av_frame_free(&output);
+    return err;
+}
+
+static av_cold void v360_opencl_uninit(AVFilterContext *avctx)
+{
+    V360OpenCLContext *ctx = avctx->priv;
+    cl_int cle;
+
+    CL_RELEASE_KERNEL(ctx->out_kernel);
+    CL_RELEASE_KERNEL(ctx->rotate_kernel);
+    CL_RELEASE_KERNEL(ctx->mirror_kernel);
+    CL_RELEASE_KERNEL(ctx->in_kernel);
+    CL_RELEASE_KERNEL(ctx->remap_kernel);
+
+    CL_RELEASE_MEMORY(ctx->vectors[0]);
+    CL_RELEASE_MEMORY(ctx->vectors[1]);
+
+    CL_RELEASE_MEMORY(ctx->remap[0]);
+    CL_RELEASE_MEMORY(ctx->remap[1]);
+
+    CL_RELEASE_QUEUE(ctx->command_queue);
+
+    ff_opencl_filter_uninit(avctx);
+}
+
+#define OFFSET(x) offsetof(V360OpenCLContext, x)
+#define FLAGS (AV_OPT_FLAG_FILTERING_PARAM | AV_OPT_FLAG_VIDEO_PARAM)
+#define TFLAGS (AV_OPT_FLAG_FILTERING_PARAM | AV_OPT_FLAG_VIDEO_PARAM | AV_OPT_FLAG_RUNTIME_PARAM)
+static const AVOption v360_opencl_options[] = {
+    { "input",  "set input projection",            OFFSET(in),     AV_OPT_TYPE_INT,    {.i64=EQUIRECTANGULAR}, 0,    NB_PROJECTIONS-1, FLAGS, "in" },
+    {     "e",  "equirectangular",                 0,              AV_OPT_TYPE_CONST,  {.i64=EQUIRECTANGULAR}, 0,                   0, FLAGS, "in" },
+    {"equirect","equirectangular",                 0,              AV_OPT_TYPE_CONST,  {.i64=EQUIRECTANGULAR}, 0,                   0, FLAGS, "in" },
+    { "output", "set output projection",           OFFSET(out),    AV_OPT_TYPE_INT,    {.i64=FLAT},            0,    NB_PROJECTIONS-1, FLAGS, "out" },
+    {   "flat", "regular video",                   0,              AV_OPT_TYPE_CONST,  {.i64=FLAT},            0,                   0, FLAGS, "out" },
+    {    "yaw", "yaw rotation",                    OFFSET(yaw),    AV_OPT_TYPE_FLOAT,  {.dbl=0.f},        -180.f,               180.f,TFLAGS, "yaw"},
+    {  "pitch", "pitch rotation",                  OFFSET(pitch),  AV_OPT_TYPE_FLOAT,  {.dbl=0.f},        -180.f,               180.f,TFLAGS, "pitch"},
+    {   "roll", "roll rotation",                   OFFSET(roll),   AV_OPT_TYPE_FLOAT,  {.dbl=0.f},        -180.f,               180.f,TFLAGS, "roll"},
+    { "rorder", "rotation order",                  OFFSET(rorder), AV_OPT_TYPE_STRING, {.str="ypr"},           0,                   0,TFLAGS, "rorder"},
+    { "h_fov",  "output horizontal field of view", OFFSET(h_fov),  AV_OPT_TYPE_FLOAT,  {.dbl=0.f},           0.f,               360.f,TFLAGS, "h_fov"},
+    { "v_fov",  "output vertical field of view",   OFFSET(v_fov),  AV_OPT_TYPE_FLOAT,  {.dbl=0.f},           0.f,               360.f,TFLAGS, "v_fov"},
+    { "d_fov",  "output diagonal field of view",   OFFSET(d_fov),  AV_OPT_TYPE_FLOAT,  {.dbl=0.f},           0.f,               360.f,TFLAGS, "d_fov"},
+    {"h_flip",  "flip out video horizontally",     OFFSET(h_flip), AV_OPT_TYPE_BOOL,   {.i64=0},               0,                   1,TFLAGS, "h_flip"},
+    {"v_flip",  "flip out video vertically",       OFFSET(v_flip), AV_OPT_TYPE_BOOL,   {.i64=0},               0,                   1,TFLAGS, "v_flip"},
+    {"d_flip",  "flip out video indepth",          OFFSET(d_flip), AV_OPT_TYPE_BOOL,   {.i64=0},               0,                   1,TFLAGS, "d_flip"},
+    {"ih_flip", "flip in video horizontally",      OFFSET(ih_flip), AV_OPT_TYPE_BOOL,  {.i64=0},               0,                   1,TFLAGS, "ih_flip"},
+    {"iv_flip", "flip in video vertically",        OFFSET(iv_flip), AV_OPT_TYPE_BOOL,  {.i64=0},               0,                   1,TFLAGS, "iv_flip"},
+    { "ih_fov", "input horizontal field of view",  OFFSET(ih_fov), AV_OPT_TYPE_FLOAT,  {.dbl=0.f},           0.f,               360.f,TFLAGS, "ih_fov"},
+    { "iv_fov", "input vertical field of view",    OFFSET(iv_fov), AV_OPT_TYPE_FLOAT,  {.dbl=0.f},           0.f,               360.f,TFLAGS, "iv_fov"},
+    { "id_fov", "input diagonal field of view",    OFFSET(id_fov), AV_OPT_TYPE_FLOAT,  {.dbl=0.f},           0.f,               360.f,TFLAGS, "id_fov"},
+    { NULL }
+};
+
+AVFILTER_DEFINE_CLASS(v360_opencl);
+
+static const AVFilterPad v360_opencl_inputs[] = {
+    {
+        .name         = "default",
+        .type         = AVMEDIA_TYPE_VIDEO,
+        .filter_frame = &v360_opencl_filter_frame,
+        .config_props = &v360_opencl_config_input,
+    },
+};
+
+static const AVFilterPad v360_opencl_outputs[] = {
+    {
+        .name         = "default",
+        .type         = AVMEDIA_TYPE_VIDEO,
+        .config_props = &ff_opencl_filter_config_output,
+    },
+};
+
+AVFilter ff_vf_v360_opencl = {
+    .name            = "v360_opencl",
+    .description     = NULL_IF_CONFIG_SMALL("Convert 360 projection of video via OpenCL."),
+    .priv_size       = sizeof(V360OpenCLContext),
+    .priv_class      = &v360_opencl_class,
+    .init            = &ff_opencl_filter_init,
+    .uninit          = &v360_opencl_uninit,
+    FILTER_INPUTS(v360_opencl_inputs),
+    FILTER_OUTPUTS(v360_opencl_outputs),
+    FILTER_SINGLE_PIXFMT(AV_PIX_FMT_OPENCL),
+    .process_command = ff_filter_process_command,
+    .flags_internal  = FF_FILTER_FLAG_HWFRAME_AWARE,
+};