diff mbox

[FFmpeg-devel] lavfi: add vflip_opencl, hflip_opencl

Message ID 1531103218-31957-1-git-send-email-danyaschenko@gmail.com
State New
Headers show

Commit Message

Danil Iashchenko July 9, 2018, 2:26 a.m. UTC
lavfi: add vflip_opencl, hflip_opencl.
Behaves like existing vflip, hflip filters.
---
 configure                     |   2 +
 libavfilter/Makefile          |   4 +
 libavfilter/allfilters.c      |   2 +
 libavfilter/opencl/vflip.cl   |  60 ++++++++++
 libavfilter/opencl_source.h   |   1 +
 libavfilter/vf_vflip_opencl.c | 270 ++++++++++++++++++++++++++++++++++++++++++
 6 files changed, 339 insertions(+)
 create mode 100644 libavfilter/opencl/vflip.cl
 create mode 100644 libavfilter/vf_vflip_opencl.c

Comments

Mark Thompson July 11, 2018, 10:36 p.m. UTC | #1
On 09/07/18 03:26, Danil Iashchenko wrote:
> lavfi: add vflip_opencl, hflip_opencl.
> Behaves like existing vflip, hflip filters.
> ---
>  configure                     |   2 +
>  libavfilter/Makefile          |   4 +
>  libavfilter/allfilters.c      |   2 +
>  libavfilter/opencl/vflip.cl   |  60 ++++++++++
>  libavfilter/opencl_source.h   |   1 +
>  libavfilter/vf_vflip_opencl.c | 270 ++++++++++++++++++++++++++++++++++++++++++
>  6 files changed, 339 insertions(+)
>  create mode 100644 libavfilter/opencl/vflip.cl
>  create mode 100644 libavfilter/vf_vflip_opencl.c

These two filters feel a bit too trivial to make new files for?  Currently they can be implemented with program_opencl and a handful of lines of code:

hflip.cl:
"""
__kernel void hflip(__write_only image2d_t dst,
                    unsigned int index,
                    __read_only  image2d_t src)
{
    const sampler_t sampler = CLK_NORMALIZED_COORDS_FALSE;
    int2 dst_loc = (int2)(get_global_id(0), get_global_id(1));
    int2 src_loc = (int2)(get_image_dim(dst).x - 1 - dst_loc.x, dst_loc.y);
    write_imagef(dst, dst_loc, read_imagef(src, sampler, src_loc));
}
"""
+
-vf ...,program_opencl=source=hflip.cl:kernel=hflip,...

and equivalently for vflip.

> ...
> diff --git a/libavfilter/opencl/vflip.cl b/libavfilter/opencl/vflip.cl
> new file mode 100644
> index 0000000..4ed2f43
> --- /dev/null
> +++ b/libavfilter/opencl/vflip.cl
> @@ -0,0 +1,60 @@
> ...
> +
> +void swap_pix(__write_only image2d_t dst,
> +              __read_only  image2d_t src,
> +              int2 loc,
> +              int2 loc1) {
> +
> +    const sampler_t sampler = (CLK_NORMALIZED_COORDS_FALSE |
> +                               CLK_ADDRESS_CLAMP_TO_EDGE   |
> +                               CLK_FILTER_NEAREST);
> +
> +    float4 px  = read_imagef(src, sampler, loc );
> +    float4 px1 = read_imagef(src, sampler, loc1);
> +
> +    write_imagef(dst, loc,  px1);
> +    write_imagef(dst, loc1, px );
> +}

Is this swap approach better than just writing the one pixel as above?  Intuitively it feels slightly worse to me - every workitem ends up touching two completely different places in the input and output, which feels bad for optimisation/caching.

> +__kernel void vflip_global(__write_only image2d_t dst,
> +                           __read_only  image2d_t src)
> +{
> +
> +    int2 imgSize = get_image_dim(src);
> +    int2 loc  = (int2)(get_global_id(0), get_global_id(1));
> +    int2 loc1 = (int2)(loc.x, imgSize.y - loc.y - 1);
> +
> +    swap_pix(dst, src, loc, loc1);
> +}
> +
> +
> +__kernel void hflip_global(__write_only image2d_t dst,
> +                           __read_only  image2d_t src)
> +{
> +
> +    int2 imgSize = get_image_dim(src);
> +    int2 loc  = (int2)(get_global_id(0), get_global_id(1));
> +    int2 loc1 = (int2)(imgSize.x - loc.x - 1, loc.y);
> +
> +    swap_pix(dst, src, loc, loc1);
> +}
> ...

- Mark
diff mbox

Patch

diff --git a/configure b/configure
index b1a4dcf..7863056 100755
--- a/configure
+++ b/configure
@@ -3348,6 +3348,7 @@  frei0r_filter_deps="frei0r libdl"
 frei0r_src_filter_deps="frei0r libdl"
 fspp_filter_deps="gpl"
 geq_filter_deps="gpl"
+hflip_opencl_filter_deps="opencl"
 histeq_filter_deps="gpl"
 hqdn3d_filter_deps="gpl"
 interlace_filter_deps="gpl"
@@ -3422,6 +3423,7 @@  uspp_filter_deps="gpl avcodec"
 vaguedenoiser_filter_deps="gpl"
 vidstabdetect_filter_deps="libvidstab"
 vidstabtransform_filter_deps="libvidstab"
+vflip_opencl_filter_deps="opencl"
 libvmaf_filter_deps="libvmaf pthreads"
 zmq_filter_deps="libzmq"
 zoompan_filter_deps="swscale"
diff --git a/libavfilter/Makefile b/libavfilter/Makefile
index 7735c26..ad7daab 100644
--- a/libavfilter/Makefile
+++ b/libavfilter/Makefile
@@ -230,6 +230,8 @@  OBJS-$(CONFIG_GEQ_FILTER)                    += vf_geq.o
 OBJS-$(CONFIG_GRADFUN_FILTER)                += vf_gradfun.o
 OBJS-$(CONFIG_HALDCLUT_FILTER)               += vf_lut3d.o framesync.o
 OBJS-$(CONFIG_HFLIP_FILTER)                  += vf_hflip.o
+OBJS-$(CONFIG_HFLIP_OPENCL_FILTER)           += vf_vflip_opencl.o opencl.o \
+                                                opencl/vflip.o
 OBJS-$(CONFIG_HISTEQ_FILTER)                 += vf_histeq.o
 OBJS-$(CONFIG_HISTOGRAM_FILTER)              += vf_histogram.o
 OBJS-$(CONFIG_HQDN3D_FILTER)                 += vf_hqdn3d.o
@@ -376,6 +378,8 @@  OBJS-$(CONFIG_USPP_FILTER)                   += vf_uspp.o
 OBJS-$(CONFIG_VAGUEDENOISER_FILTER)          += vf_vaguedenoiser.o
 OBJS-$(CONFIG_VECTORSCOPE_FILTER)            += vf_vectorscope.o
 OBJS-$(CONFIG_VFLIP_FILTER)                  += vf_vflip.o
+OBJS-$(CONFIG_VFLIP_OPENCL_FILTER)           += vf_vflip_opencl.o opencl.o \
+                                                opencl/vflip.o
 OBJS-$(CONFIG_VFRDET_FILTER)                 += vf_vfrdet.o
 OBJS-$(CONFIG_VIDSTABDETECT_FILTER)          += vidstabutils.o vf_vidstabdetect.o
 OBJS-$(CONFIG_VIDSTABTRANSFORM_FILTER)       += vidstabutils.o vf_vidstabtransform.o
diff --git a/libavfilter/allfilters.c b/libavfilter/allfilters.c
index 0ded83e..3e7e11b 100644
--- a/libavfilter/allfilters.c
+++ b/libavfilter/allfilters.c
@@ -219,6 +219,7 @@  extern AVFilter ff_vf_geq;
 extern AVFilter ff_vf_gradfun;
 extern AVFilter ff_vf_haldclut;
 extern AVFilter ff_vf_hflip;
+extern AVFilter ff_vf_hflip_opencl;
 extern AVFilter ff_vf_histeq;
 extern AVFilter ff_vf_histogram;
 extern AVFilter ff_vf_hqdn3d;
@@ -359,6 +360,7 @@  extern AVFilter ff_vf_uspp;
 extern AVFilter ff_vf_vaguedenoiser;
 extern AVFilter ff_vf_vectorscope;
 extern AVFilter ff_vf_vflip;
+extern AVFilter ff_vf_vflip_opencl;
 extern AVFilter ff_vf_vfrdet;
 extern AVFilter ff_vf_vidstabdetect;
 extern AVFilter ff_vf_vidstabtransform;
diff --git a/libavfilter/opencl/vflip.cl b/libavfilter/opencl/vflip.cl
new file mode 100644
index 0000000..4ed2f43
--- /dev/null
+++ b/libavfilter/opencl/vflip.cl
@@ -0,0 +1,60 @@ 
+/*
+ * Copyright (c) 2018 Danil Iashchenko
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+
+void swap_pix(__write_only image2d_t dst,
+              __read_only  image2d_t src,
+              int2 loc,
+              int2 loc1) {
+
+    const sampler_t sampler = (CLK_NORMALIZED_COORDS_FALSE |
+                               CLK_ADDRESS_CLAMP_TO_EDGE   |
+                               CLK_FILTER_NEAREST);
+
+    float4 px  = read_imagef(src, sampler, loc );
+    float4 px1 = read_imagef(src, sampler, loc1);
+
+    write_imagef(dst, loc,  px1);
+    write_imagef(dst, loc1, px );
+}
+
+
+__kernel void vflip_global(__write_only image2d_t dst,
+                           __read_only  image2d_t src)
+{
+
+    int2 imgSize = get_image_dim(src);
+    int2 loc  = (int2)(get_global_id(0), get_global_id(1));
+    int2 loc1 = (int2)(loc.x, imgSize.y - loc.y - 1);
+
+    swap_pix(dst, src, loc, loc1);
+}
+
+
+__kernel void hflip_global(__write_only image2d_t dst,
+                           __read_only  image2d_t src)
+{
+
+    int2 imgSize = get_image_dim(src);
+    int2 loc  = (int2)(get_global_id(0), get_global_id(1));
+    int2 loc1 = (int2)(imgSize.x - loc.x - 1, loc.y);
+
+    swap_pix(dst, src, loc, loc1);
+}
diff --git a/libavfilter/opencl_source.h b/libavfilter/opencl_source.h
index de4e66e..a3c757f 100644
--- a/libavfilter/opencl_source.h
+++ b/libavfilter/opencl_source.h
@@ -25,5 +25,6 @@  extern const char *ff_opencl_source_convolution;
 extern const char *ff_opencl_source_overlay;
 extern const char *ff_opencl_source_tonemap;
 extern const char *ff_opencl_source_unsharp;
+extern const char *ff_opencl_source_vflip;
 
 #endif /* AVFILTER_OPENCL_SOURCE_H */
diff --git a/libavfilter/vf_vflip_opencl.c b/libavfilter/vf_vflip_opencl.c
new file mode 100644
index 0000000..eda8036
--- /dev/null
+++ b/libavfilter/vf_vflip_opencl.c
@@ -0,0 +1,270 @@ 
+/*
+ * Copyright (c) 2018 Danil Iashchenko
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include "libavutil/common.h"
+#include "libavutil/imgutils.h"
+#include "libavutil/mem.h"
+#include "libavutil/opt.h"
+#include "libavutil/pixdesc.h"
+#include "libavutil/avstring.h"
+
+
+#include "avfilter.h"
+#include "internal.h"
+#include "opencl.h"
+#include "opencl_source.h"
+#include "video.h"
+
+typedef struct VflipOpenCLContext {
+    OpenCLFilterContext ocf;
+
+    int              initialised;
+    cl_kernel        kernel;
+    cl_command_queue command_queue;
+
+} VflipOpenCLContext;
+
+
+static int vflip_opencl_init(AVFilterContext *avctx)
+{
+    VflipOpenCLContext *ctx = avctx->priv;
+    const char *kernel_name;
+    cl_int cle;
+    int err;
+
+    err = ff_opencl_filter_load_program(avctx, &ff_opencl_source_vflip, 1);
+    if (err < 0)
+        goto fail;
+
+    ctx->command_queue = clCreateCommandQueue(ctx->ocf.hwctx->context,
+                                              ctx->ocf.hwctx->device_id,
+                                              0, &cle);
+    if (!ctx->command_queue) {
+        av_log(avctx, AV_LOG_ERROR, "Failed to create OpenCL "
+               "command queue: %d.\n", cle);
+        err = AVERROR(EIO);
+        goto fail;
+    }
+
+    if (!strcmp(avctx->filter->name, "hflip_opencl")) {
+        kernel_name = "hflip_global";
+    } else if (!strcmp(avctx->filter->name, "vflip_opencl")) {
+        kernel_name = "vflip_global";
+    }
+    ctx->kernel = clCreateKernel(ctx->ocf.program, kernel_name, &cle);
+    if (!ctx->kernel) {
+        av_log(avctx, AV_LOG_ERROR, "Failed to create kernel: %d.\n", cle);
+        err = AVERROR(EIO);
+        goto fail;
+    }
+
+    ctx->initialised = 1;
+    return 0;
+
+fail:
+    if (ctx->command_queue)
+        clReleaseCommandQueue(ctx->command_queue);
+    if (ctx->kernel)
+        clReleaseKernel(ctx->kernel);
+    return err;
+}
+
+
+static int vflip_opencl_filter_frame(AVFilterLink *inlink, AVFrame *input)
+{
+    AVFilterContext *avctx = inlink->dst;
+    AVFilterLink *outlink = avctx->outputs[0];
+    VflipOpenCLContext *ctx = avctx->priv;
+    AVFrame *output = NULL;
+    cl_int cle;
+    size_t global_work[2];
+    cl_mem src, dst;
+    int err, p;
+
+    av_log(ctx, AV_LOG_DEBUG, "Filter input: %s, %ux%u (%"PRId64").\n",
+           av_get_pix_fmt_name(input->format),
+           input->width, input->height, input->pts);
+
+    if (!input->hw_frames_ctx)
+        return AVERROR(EINVAL);
+
+    if (!ctx->initialised) {
+        err = vflip_opencl_init(avctx);
+        if (err < 0)
+            goto fail;
+    }
+
+    output = ff_get_video_buffer(outlink, outlink->w, outlink->h);
+    if (!output) {
+        err = AVERROR(ENOMEM);
+        goto fail;
+    }
+
+    for (p = 0; p < FF_ARRAY_ELEMS(output->data); p++) {
+        src = (cl_mem) input->data[p];
+        dst = (cl_mem)output->data[p];
+
+        if (!dst)
+            break;
+
+        CL_SET_KERNEL_ARG(ctx->kernel, 0, cl_mem, &dst);
+        CL_SET_KERNEL_ARG(ctx->kernel, 1, cl_mem, &src);
+
+        err = ff_opencl_filter_work_size_from_image(avctx, global_work, output, p, 0);
+        if (err < 0)
+            goto fail;
+
+        av_log(avctx, AV_LOG_DEBUG, "Run kernel on plane %d "
+               "(%"SIZE_SPECIFIER"x%"SIZE_SPECIFIER").\n",
+               p, global_work[0], global_work[1]);
+
+        if (!strcmp(avctx->filter->name, "hflip_opencl")) {
+            global_work[0] = global_work[0] / 2;
+        } else if (!strcmp(avctx->filter->name, "vflip_opencl")) {
+            global_work[1] = global_work[1] / 2;
+        }
+
+        cle = clEnqueueNDRangeKernel(ctx->command_queue, ctx->kernel, 2, NULL,
+                                     global_work, NULL,
+                                     0, NULL, NULL);
+        if (cle != CL_SUCCESS) {
+            av_log(avctx, AV_LOG_ERROR, "Failed to enqueue kernel: %d.\n",
+                   cle);
+            err = AVERROR(EIO);
+            goto fail;
+        }
+    }
+
+    cle = clFinish(ctx->command_queue);
+    if (cle != CL_SUCCESS) {
+        av_log(avctx, AV_LOG_ERROR, "Failed to finish command queue: %d.\n",
+               cle);
+        err = AVERROR(EIO);
+        goto fail;
+    }
+
+    err = av_frame_copy_props(output, input);
+    if (err < 0)
+        goto fail;
+
+    av_frame_free(&input);
+
+    av_log(ctx, AV_LOG_DEBUG, "Filter output: %s, %ux%u (%"PRId64").\n",
+           av_get_pix_fmt_name(output->format),
+           output->width, output->height, output->pts);
+
+    return ff_filter_frame(outlink, output);
+
+fail:
+    clFinish(ctx->command_queue);
+    av_frame_free(&input);
+    av_frame_free(&output);
+    return err;
+}
+
+static av_cold void vflip_opencl_uninit(AVFilterContext *avctx)
+{
+    VflipOpenCLContext *ctx = avctx->priv;
+    int cle;
+
+    if (ctx->kernel) {
+        cle = clReleaseKernel(ctx->kernel);
+        if (cle != CL_SUCCESS)
+            av_log(avctx, AV_LOG_ERROR, "Failed to release "
+                   "kernel: %d.\n", cle);
+    }
+
+    if (ctx->command_queue) {
+        cle = clReleaseCommandQueue(ctx->command_queue);
+        if (cle != CL_SUCCESS)
+            av_log(avctx, AV_LOG_ERROR, "Failed to release "
+                   "command queue: %d.\n", cle);
+    }
+
+    ff_opencl_filter_uninit(avctx);
+}
+
+static const AVFilterPad vflip_opencl_inputs[] = {
+    {
+        .name         = "default",
+        .type         = AVMEDIA_TYPE_VIDEO,
+        .filter_frame = &vflip_opencl_filter_frame,
+        .config_props = &ff_opencl_filter_config_input,
+    },
+    { NULL }
+};
+
+static const AVFilterPad vflip_opencl_outputs[] = {
+    {
+        .name         = "default",
+        .type         = AVMEDIA_TYPE_VIDEO,
+        .config_props = &ff_opencl_filter_config_output,
+    },
+    { NULL }
+};
+
+#define OFFSET(x) offsetof(VflipOpenCLContext, x)
+#define FLAGS (AV_OPT_FLAG_FILTERING_PARAM | AV_OPT_FLAG_VIDEO_PARAM)
+
+#if CONFIG_VFLIP_OPENCL_FILTER
+
+static const AVOption vflip_opencl_options[] = {
+    { NULL }
+};
+
+AVFILTER_DEFINE_CLASS(vflip_opencl);
+
+AVFilter ff_vf_vflip_opencl = {
+    .name           = "vflip_opencl",
+    .description    = NULL_IF_CONFIG_SMALL("Flip the input video vertically."),
+    .priv_size      = sizeof(VflipOpenCLContext),
+    .priv_class     = &vflip_opencl_class,
+    .init           = &ff_opencl_filter_init,
+    .uninit         = &vflip_opencl_uninit,
+    .query_formats  = &ff_opencl_filter_query_formats,
+    .inputs         = vflip_opencl_inputs,
+    .outputs        = vflip_opencl_outputs,
+    .flags_internal = FF_FILTER_FLAG_HWFRAME_AWARE,
+};
+
+#endif /* CONFIG_VFLIP_OPENCL_FILTER */
+
+#if CONFIG_HFLIP_OPENCL_FILTER
+
+static const AVOption hflip_opencl_options[] = {
+    { NULL }
+};
+
+AVFILTER_DEFINE_CLASS(hflip_opencl);
+
+AVFilter ff_vf_hflip_opencl = {
+    .name           = "hflip_opencl",
+    .description    = NULL_IF_CONFIG_SMALL("Horizontally flip the input video."),
+    .priv_size      = sizeof(VflipOpenCLContext),
+    .priv_class     = &hflip_opencl_class,
+    .init           = &ff_opencl_filter_init,
+    .uninit         = &vflip_opencl_uninit,
+    .query_formats  = &ff_opencl_filter_query_formats,
+    .inputs         = vflip_opencl_inputs,
+    .outputs        = vflip_opencl_outputs,
+    .flags_internal = FF_FILTER_FLAG_HWFRAME_AWARE,
+};
+
+#endif /* CONFIG_HFLIP_OPENCL_FILTER */