[FFmpeg-devel] add convolution_opencl

Submitted by Danil Iashchenko on March 11, 2018, 8:19 p.m.

Details

Message ID 1520799566-5548-2-git-send-email-danyaschenko@gmail.com
State New
Headers show

Commit Message

Danil Iashchenko March 11, 2018, 8:19 p.m.
---
 configure                           |   1 +
 libavfilter/Makefile                |   1 +
 libavfilter/allfilters.c            |   1 +
 libavfilter/opencl/convolution.cl   |  42 ++++
 libavfilter/opencl_source.h         |   3 +
 libavfilter/vf_convolution_opencl.c | 464 ++++++++++++++++++++++++++++++++++++
 6 files changed, 512 insertions(+)
 create mode 100644 libavfilter/opencl/convolution.cl
 create mode 100644 libavfilter/vf_convolution_opencl.c

Comments

Mark Thompson March 12, 2018, 10:42 p.m.
On 11/03/18 20:19, Danil Iashchenko wrote:
> ---
>  configure                           |   1 +
>  libavfilter/Makefile                |   1 +
>  libavfilter/allfilters.c            |   1 +
>  libavfilter/opencl/convolution.cl   |  42 ++++
>  libavfilter/opencl_source.h         |   3 +
>  libavfilter/vf_convolution_opencl.c | 464 ++++++++++++++++++++++++++++++++++++
>  6 files changed, 512 insertions(+)
>  create mode 100644 libavfilter/opencl/convolution.cl
>  create mode 100644 libavfilter/vf_convolution_opencl.c

The commit title should have a tag for the library, and maybe say a little more about what it does.

E.g. something like:
"""
lavfi: Add OpenCL convolution filter

Behaves like the existing convolution filter, except working on OpenCL
hardware frames.  Takes exactly the same options.
"""


When applying, git gives the warning:

Applying: add convolution_opencl
.git/rebase-apply/patch:578: new blank line at EOF.
+
warning: 1 line adds whitespace errors.

> diff --git a/configure b/configure
> index 6916b45..7c79e20 100755
> --- a/configure
> +++ b/configure
> @@ -3212,6 +3212,7 @@ bs2b_filter_deps="libbs2b"
>  colormatrix_filter_deps="gpl"
>  convolve_filter_deps="avcodec"
>  convolve_filter_select="fft"
> +convolution_opencl_filter_deps="opencl"

These should be in alphabetical order.

>  coreimage_filter_deps="coreimage appkit"
>  coreimage_filter_extralibs="-framework OpenGL"
>  coreimagesrc_filter_deps="coreimage appkit"
> diff --git a/libavfilter/Makefile b/libavfilter/Makefile
> index 6a60836..d005934 100644
> --- a/libavfilter/Makefile
> +++ b/libavfilter/Makefile
> @@ -156,6 +156,7 @@ OBJS-$(CONFIG_COLORLEVELS_FILTER)            += vf_colorlevels.o
>  OBJS-$(CONFIG_COLORMATRIX_FILTER)            += vf_colormatrix.o
>  OBJS-$(CONFIG_COLORSPACE_FILTER)             += vf_colorspace.o colorspacedsp.o
>  OBJS-$(CONFIG_CONVOLUTION_FILTER)            += vf_convolution.o
> +OBJS-$(CONFIG_CONVOLUTION_OPENCL_FILTER)     += vf_convolution_opencl.o opencl.o opencl/convolution.o
>  OBJS-$(CONFIG_CONVOLVE_FILTER)               += vf_convolve.o framesync.o
>  OBJS-$(CONFIG_COPY_FILTER)                   += vf_copy.o
>  OBJS-$(CONFIG_COREIMAGE_FILTER)              += vf_coreimage.o
> diff --git a/libavfilter/allfilters.c b/libavfilter/allfilters.c
> index 9adb109..f2dc55e 100644
> --- a/libavfilter/allfilters.c
> +++ b/libavfilter/allfilters.c
> @@ -166,6 +166,7 @@ static void register_all(void)
>      REGISTER_FILTER(COLORMATRIX,    colormatrix,    vf);
>      REGISTER_FILTER(COLORSPACE,     colorspace,     vf);
>      REGISTER_FILTER(CONVOLUTION,    convolution,    vf);
> +    REGISTER_FILTER(CONVOLUTION_OPENCL, convolution_opencl, vf);
>      REGISTER_FILTER(CONVOLVE,       convolve,       vf);
>      REGISTER_FILTER(COPY,           copy,           vf);
>      REGISTER_FILTER(COREIMAGE,      coreimage,      vf);
> diff --git a/libavfilter/opencl/convolution.cl b/libavfilter/opencl/convolution.cl
> new file mode 100644
> index 0000000..d074d98
> --- /dev/null
> +++ b/libavfilter/opencl/convolution.cl
> @@ -0,0 +1,42 @@
> +/*
> + * This file is part of FFmpeg.
> + *
> + * FFmpeg is free software; you can redistribute it and/or
> + * modify it under the terms of the GNU Lesser General Public
> + * License as published by the Free Software Foundation; either
> + * version 2.1 of the License, or (at your option) any later version.
> + *
> + * FFmpeg is distributed in the hope that it will be useful,
> + * but WITHOUT ANY WARRANTY; without even the implied warranty of
> + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
> + * Lesser General Public License for more details.
> + *
> + * You should have received a copy of the GNU Lesser General Public
> + * License along with FFmpeg; if not, write to the Free Software
> + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
> + */
> +
> +__kernel void convolution_global(__write_only image2d_t dst,
> +                                 __read_only  image2d_t src,
> +                                 __constant int *coef_matrix_size,
> +                                 __constant float *coef_matrix,
> +                                 __constant float *div,
> +                                 __constant float *bias)
> +{
> +    const sampler_t sampler = (CLK_NORMALIZED_COORDS_FALSE | CLK_FILTER_NEAREST);
> +
> +    int2 loc = (int2)(get_global_id(0), get_global_id(1));
> +
> +    float4 convPix = (float4)(0.0f, 0.0f, 0.0f, 0.0f);
> +    for (int i = 0; i < 4; i++) {
> +        for (int conv_i = -(coef_matrix_size[i] / 2); conv_i <= (coef_matrix_size[i] / 2); conv_i++) {
> +            for (int conv_j = -(coef_matrix_size[i] / 2); conv_j <= (coef_matrix_size[i] / 2); conv_j++) {
> +                float4 px = read_imagef(src, sampler, loc + (int2)(conv_j, conv_i));
> +                convPix[i] += px[i] * coef_matrix[(coef_matrix_size[i]*coef_matrix_size[i]*i)
> +                            + (conv_i+(coef_matrix_size[i] / 2)) * coef_matrix_size[i] + (conv_j+(coef_matrix_size[i] / 2))];
> +            }
> +        }
> +        convPix[i] = convPix[i] * div[i] + bias[i];
> +    }

Er, I don't think this is quite what I was expecting.  You're now applying the different matrices to the different components in the same plane?  Look at the convolution filter - that applies the different matrices to each different plane.  I think the CL code that you had in the first version is still what you want here, but the kernel invocation needs to be adjusted to pass the correct matrix for each plane.  (Planar video formats like YUV420P and NV12 are generally much more interesting here than the four-component RGBA-type formats.)

> +    write_imagef(dst, loc, convPix);
> +}
> diff --git a/libavfilter/opencl_source.h b/libavfilter/opencl_source.h
> index 23cdfc6..3029f64 100644
> --- a/libavfilter/opencl_source.h
> +++ b/libavfilter/opencl_source.h
> @@ -19,7 +19,10 @@
>  #ifndef AVFILTER_OPENCL_SOURCE_H
>  #define AVFILTER_OPENCL_SOURCE_H
>  
> +extern const char *ff_opencl_source_convolution;
>  extern const char *ff_opencl_source_overlay;
>  extern const char *ff_opencl_source_unsharp;
>  
> +
> +

Please don't add extra blank lines.  (You've also got a number of places below with extra blank lines.)

>  #endif /* AVFILTER_OPENCL_SOURCE_H */
> diff --git a/libavfilter/vf_convolution_opencl.c b/libavfilter/vf_convolution_opencl.c
> new file mode 100644
> index 0000000..60e2d1f
> --- /dev/null
> +++ b/libavfilter/vf_convolution_opencl.c
> @@ -0,0 +1,464 @@
> +/*
> + * This file is part of FFmpeg.
> + *
> + * FFmpeg is free software; you can redistribute it and/or
> + * modify it under the terms of the GNU Lesser General Public
> + * License as published by the Free Software Foundation; either
> + * version 2.1 of the License, or (at your option) any later version.
> + *
> + * FFmpeg is distributed in the hope that it will be useful,
> + * but WITHOUT ANY WARRANTY; without even the implied warranty of
> + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
> + * Lesser General Public License for more details.
> + *
> + * You should have received a copy of the GNU Lesser General Public
> + * License along with FFmpeg; if not, write to the Free Software
> + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
> + */
> +
> +#include "libavutil/common.h"
> +#include "libavutil/imgutils.h"
> +#include "libavutil/mem.h"
> +#include "libavutil/opt.h"
> +#include "libavutil/pixdesc.h"
> +#include "libavutil/avstring.h"
> +
> +
> +#include "avfilter.h"
> +#include "internal.h"
> +#include "opencl.h"
> +#include "opencl_source.h"
> +#include "video.h"
> +
> +typedef struct ConvolutionOpenCLContext {
> +    OpenCLFilterContext ocf;
> +
> +    int              initialised;
> +    cl_kernel        kernel;
> +    cl_command_queue command_queue;
> +
> +    char *matrix_str[4];
> +    cl_int size[4];
> +
> +    cl_int total_matrix_len;
> +    cl_int matrix_length[4];
> +    float rdiv[4];
> +    float bias[4];
> +    cl_mem matrix;
> +    cl_mem rdiv_buffer;
> +    cl_mem bias_buffer;
> +    cl_mem size_buffer;
> +
> +} ConvolutionOpenCLContext;
> +
> +
> +
> +static int convolution_opencl_init(AVFilterContext *avctx)
> +{
> +    ConvolutionOpenCLContext *ctx = avctx->priv;
> +    cl_int cle;
> +    int err;
> +
> +    err = ff_opencl_filter_load_program(avctx, &ff_opencl_source_convolution, 1);
> +    if (err < 0)
> +        goto fail;
> +
> +    ctx->command_queue = clCreateCommandQueue(ctx->ocf.hwctx->context,
> +                                              ctx->ocf.hwctx->device_id,
> +                                              0, &cle);
> +    if (!ctx->command_queue) {
> +        av_log(avctx, AV_LOG_ERROR, "Failed to create OpenCL "
> +               "command queue: %d.\n", cle);
> +        err = AVERROR(EIO);
> +        goto fail;
> +    }
> +
> +    ctx->kernel = clCreateKernel(ctx->ocf.program, "convolution_global", &cle);
> +    if (!ctx->kernel) {
> +        av_log(avctx, AV_LOG_ERROR, "Failed to create kernel: %d.\n", cle);
> +        err = AVERROR(EIO);
> +        goto fail;
> +    }
> +
> +    ctx->initialised = 1;
> +    return 0;
> +
> +fail:
> +    if (ctx->command_queue)
> +        clReleaseCommandQueue(ctx->command_queue);
> +    if (ctx->kernel)
> +        clReleaseKernel(ctx->kernel);
> +    return err;
> +}
> +
> +
> +
> +static int convolution_opencl_make_filter_params(AVFilterContext *avctx)
> +{
> +    ConvolutionOpenCLContext *ctx = avctx->priv;
> +    float *matrix;
> +    float *matrix_rdiv;
> +    float *matrix_bias;
> +    int   *matrix_size;
> +    size_t matrix_bytes;
> +    cl_mem buffer, buffer_rdiv, buffer_bias, buffer_size;
> +    cl_int cle;
> +    int err, sscanf_err;
> +    int i, j, cnt;
> +    char *p, *arg, *saveptr = NULL;
> +    float input_matrix[4][49];
> +
> +    ctx->total_matrix_len = 0;
> +    for (i = 0; i < 4; i++) {
> +        p = ctx->matrix_str[i];
> +        while (ctx->matrix_length[i] < 49) {
> +            if (!(arg = av_strtok(p, " ", &saveptr)))
> +                break;
> +            p = NULL;
> +            sscanf_err = sscanf(arg, "%f", &input_matrix[i][ctx->matrix_length[i]]);
> +            if (!sscanf_err) {

$ cat sscanf.c
#include <stdio.h>

int main(void)
{
    int i;
    printf("%d\n", sscanf("", "%d", &i));
    return 0;
}
$ gcc sscanf.c
$ ./a.out 
-1

> +                av_log(ctx, AV_LOG_ERROR, "Matrix is sequence of 9, 25 or 49 signed numbers\n");
> +                return AVERROR(EINVAL);
> +            }
> +            ctx->matrix_length[i]++;
> +        }
> +        if (ctx->matrix_length[i] == 9) {
> +            ctx->size[i] = 3;
> +        } else if (ctx->matrix_length[i] == 25) {
> +                ctx->size[i] = 5;
> +        } else if (ctx->matrix_length[i] == 49) {
> +                ctx->size[i] = 7;
> +        } else {
> +            av_log(ctx, AV_LOG_ERROR, "Invalid matrix size:%d\n", ctx->matrix_length[i]);
> +            return AVERROR(EINVAL);
> +        }
> +        ctx->total_matrix_len += ctx->matrix_length[i];
> +
> +    }
> +    matrix_bytes = sizeof(float)*ctx->total_matrix_len;
> +    matrix = av_malloc(matrix_bytes);
> +    if (!matrix) {
> +        err = AVERROR(ENOMEM);
> +        goto fail;
> +    }
> +    cnt = 0;
> +    for (i = 0; i < 4; i++) {
> +        for (j = 0; j < ctx->matrix_length[i]; j++) {
> +            matrix[cnt++] = input_matrix[i][j];
> +        }
> +    }
> +
> +    buffer = clCreateBuffer(ctx->ocf.hwctx->context,
> +                            CL_MEM_READ_ONLY |
> +                            CL_MEM_COPY_HOST_PTR |
> +                            CL_MEM_HOST_NO_ACCESS,
> +                            matrix_bytes, matrix, &cle);
> +    if (!buffer) {
> +        av_log(avctx, AV_LOG_ERROR, "Failed to create matrix buffer: "
> +               "%d.\n", cle);
> +        err = AVERROR(EIO);
> +        goto fail;
> +    }
> +    ctx->matrix = buffer;
> +
> +
> +    matrix_bytes = sizeof(float)*4;
> +    matrix_rdiv = av_malloc(matrix_bytes);
> +    if (!matrix_rdiv) {
> +        err = AVERROR(ENOMEM);
> +        av_freep(&matrix_rdiv);
> +        goto fail;
> +    }
> +
> +    for (j = 0; j < 4; j++) {
> +        matrix_rdiv[j] = ctx->rdiv[j];
> +    }
> +    buffer_rdiv = clCreateBuffer(ctx->ocf.hwctx->context,
> +                            CL_MEM_READ_ONLY |
> +                            CL_MEM_COPY_HOST_PTR |
> +                            CL_MEM_HOST_NO_ACCESS,
> +                            matrix_bytes, matrix_rdiv, &cle);
> +    if (!buffer_rdiv) {
> +        av_log(avctx, AV_LOG_ERROR, "Failed to create rdiv buffer: "
> +               "%d.\n", cle);
> +        err = AVERROR(EIO);
> +        av_freep(&matrix_rdiv);
> +        goto fail;
> +    }
> +    ctx->rdiv_buffer = buffer_rdiv;
> +
> +
> +    matrix_bytes = sizeof(float)*4;
> +    matrix_bias = av_malloc(matrix_bytes);
> +    if (!matrix_bias) {
> +        err = AVERROR(ENOMEM);
> +        av_freep(&matrix_rdiv);
> +        av_freep(&matrix_bias);
> +        goto fail;
> +    }
> +
> +    for (j = 0; j < 4; j++) {
> +        matrix_bias[j] = ctx->bias[j];
> +    }
> +    buffer_bias = clCreateBuffer(ctx->ocf.hwctx->context,
> +                            CL_MEM_READ_ONLY |
> +                            CL_MEM_COPY_HOST_PTR |
> +                            CL_MEM_HOST_NO_ACCESS,
> +                            matrix_bytes, matrix_bias, &cle);
> +    if (!buffer_bias) {
> +        av_log(avctx, AV_LOG_ERROR, "Failed to create bias buffer: "
> +               "%d.\n", cle);
> +        err = AVERROR(EIO);
> +        av_freep(&matrix_rdiv);
> +        av_freep(&matrix_bias);
> +        goto fail;
> +    }
> +    ctx->bias_buffer = buffer_bias;
> +
> +    matrix_bytes = sizeof(int)*4;
> +    matrix_size = av_malloc(matrix_bytes);
> +    if (!matrix_size) {
> +        err = AVERROR(ENOMEM);
> +        av_freep(&matrix_rdiv);
> +        av_freep(&matrix_bias);
> +        av_freep(&matrix_size);
> +        goto fail;
> +    }
> +
> +    for (j = 0; j < 4; j++) {
> +        matrix_size[j] = ctx->size[j];
> +    }
> +    buffer_size = clCreateBuffer(ctx->ocf.hwctx->context,
> +                            CL_MEM_READ_ONLY |
> +                            CL_MEM_COPY_HOST_PTR |
> +                            CL_MEM_HOST_NO_ACCESS,
> +                            matrix_bytes, matrix_size, &cle);
> +    if (!buffer_size) {
> +        av_log(avctx, AV_LOG_ERROR, "Failed to create bias buffer: "
> +               "%d.\n", cle);
> +        err = AVERROR(EIO);
> +        av_freep(&matrix_rdiv);
> +        av_freep(&matrix_bias);
> +        av_freep(&matrix_size);
> +        goto fail;
> +    }
> +    ctx->size_buffer = buffer_size;
> +
> +
> +
> +
> +    err = 0;
> +fail:
> +    av_freep(&matrix);
> +    return err;
> +}
> +
> +static int convolution_opencl_filter_frame(AVFilterLink *inlink, AVFrame *input)
> +{
> +    AVFilterContext    *avctx = inlink->dst;
> +    AVFilterLink     *outlink = avctx->outputs[0];
> +    ConvolutionOpenCLContext *ctx = avctx->priv;
> +    AVFrame *output = NULL;
> +    cl_int cle;
> +    size_t global_work[2];
> +    cl_mem src, dst;
> +    int err, p;
> +
> +    av_log(ctx, AV_LOG_DEBUG, "Filter input: %s, %ux%u (%"PRId64").\n",
> +           av_get_pix_fmt_name(input->format),
> +           input->width, input->height, input->pts);
> +
> +    if (!input->hw_frames_ctx)
> +        return AVERROR(EINVAL);
> +
> +    if (!ctx->initialised) {
> +        err = convolution_opencl_init(avctx);
> +        if (err < 0)
> +            goto fail;
> +
> +        err = convolution_opencl_make_filter_params(avctx);
> +        if (err < 0)
> +            goto fail;
> +    }
> +
> +    output = ff_get_video_buffer(outlink, outlink->w, outlink->h);
> +    if (!output) {
> +        err = AVERROR(ENOMEM);
> +        goto fail;
> +    }
> +
> +    for (p = 0; p < FF_ARRAY_ELEMS(output->data); p++) {
> +        src = (cl_mem) input->data[p];
> +        dst = (cl_mem)output->data[p];
> +
> +
> +        if (!dst)
> +            break;
> +
> +        cle = clSetKernelArg(ctx->kernel, 0, sizeof(cl_mem), &dst);
> +        if (cle != CL_SUCCESS) {
> +            av_log(avctx, AV_LOG_ERROR, "Failed to set kernel "
> +                   "destination image argument: %d.\n", cle);
> +            goto fail;
> +        }
> +        cle = clSetKernelArg(ctx->kernel, 1, sizeof(cl_mem), &src);
> +        if (cle != CL_SUCCESS) {
> +            av_log(avctx, AV_LOG_ERROR, "Failed to set kernel "
> +                   "source image argument: %d.\n", cle);
> +            goto fail;
> +        }
> +        cle = clSetKernelArg(ctx->kernel, 2, sizeof(cl_mem), &ctx->size_buffer);
> +        if (cle != CL_SUCCESS) {
> +            av_log(avctx, AV_LOG_ERROR, "Failed to set kernel "
> +                   "matrix size argument: %d.\n", cle);
> +            goto fail;
> +        }
> +        cle = clSetKernelArg(ctx->kernel, 3, sizeof(cl_mem), &ctx->matrix);
> +        if (cle != CL_SUCCESS) {
> +            av_log(avctx, AV_LOG_ERROR, "Failed to set kernel "
> +                   "matrix argument: %d.\n", cle);
> +            goto fail;
> +        }
> +        cle = clSetKernelArg(ctx->kernel, 4, sizeof(cl_mem), &ctx->rdiv_buffer);
> +        if (cle != CL_SUCCESS) {
> +            av_log(avctx, AV_LOG_ERROR, "Failed to set kernel "
> +                   "div argument: %d.\n", cle);
> +            goto fail;
> +        }
> +        cle = clSetKernelArg(ctx->kernel, 5, sizeof(cl_mem), &ctx->bias_buffer);
> +        if (cle != CL_SUCCESS) {
> +            av_log(avctx, AV_LOG_ERROR, "Failed to set kernel "
> +                   "bias argument: %d.\n", cle);
> +            goto fail;
> +        }
> +
> +
> +
> +        global_work[0] = output->width;
> +        global_work[1] = output->height;
> +
> +
> +        av_log(avctx, AV_LOG_DEBUG, "Run kernel on plane %d "
> +               "(%"SIZE_SPECIFIER"x%"SIZE_SPECIFIER").\n",
> +               p, global_work[0], global_work[1]);
> +
> +        cle = clEnqueueNDRangeKernel(ctx->command_queue, ctx->kernel, 2, NULL,
> +                                     global_work, NULL,
> +                                     0, NULL, NULL);
> +        if (cle != CL_SUCCESS) {
> +            av_log(avctx, AV_LOG_ERROR, "Failed to enqueue kernel: %d.\n",
> +                   cle);
> +            err = AVERROR(EIO);
> +            goto fail;
> +        }
> +    }
> +
> +    cle = clFinish(ctx->command_queue);
> +    if (cle != CL_SUCCESS) {
> +        av_log(avctx, AV_LOG_ERROR, "Failed to finish command queue: %d.\n",
> +               cle);
> +        err = AVERROR(EIO);
> +        goto fail;
> +    }
> +
> +    err = av_frame_copy_props(output, input);
> +    if (err < 0)
> +        goto fail;
> +
> +    av_frame_free(&input);
> +
> +    av_log(ctx, AV_LOG_DEBUG, "Filter output: %s, %ux%u (%"PRId64").\n",
> +           av_get_pix_fmt_name(output->format),
> +           output->width, output->height, output->pts);
> +
> +    return ff_filter_frame(outlink, output);
> +
> +fail:
> +    clFinish(ctx->command_queue);
> +    av_frame_free(&input);
> +    av_frame_free(&output);
> +    return err;
> +}
> +
> +static av_cold void convolution_opencl_uninit(AVFilterContext *avctx)
> +{
> +    ConvolutionOpenCLContext *ctx = avctx->priv;
> +    cl_int cle;
> +
> +    if (ctx->matrix) {
> +        clReleaseMemObject(ctx->matrix);

You want to release the other buffer objects as well.  (Though I'm not sure they need to be buffers, since each one will only contain a single value.)

> +    }
> +
> +    if (ctx->kernel) {
> +        cle = clReleaseKernel(ctx->kernel);
> +        if (cle != CL_SUCCESS)
> +            av_log(avctx, AV_LOG_ERROR, "Failed to release "
> +                   "kernel: %d.\n", cle);
> +    }
> +
> +    if (ctx->command_queue) {
> +        cle = clReleaseCommandQueue(ctx->command_queue);
> +        if (cle != CL_SUCCESS)
> +            av_log(avctx, AV_LOG_ERROR, "Failed to release "
> +                   "command queue: %d.\n", cle);
> +    }
> +
> +    ff_opencl_filter_uninit(avctx);
> +}
> +
> +#define OFFSET(x) offsetof(ConvolutionOpenCLContext, x)
> +#define FLAGS (AV_OPT_FLAG_FILTERING_PARAM | AV_OPT_FLAG_VIDEO_PARAM)
> +static const AVOption convolution_opencl_options[] = {
> +    { "0m", "set matrix for 1st plane", OFFSET(matrix_str[0]), AV_OPT_TYPE_STRING, {.str="0 0 0 0 1 0 0 0 0"}, 0, 0, FLAGS },
> +    { "1m", "set matrix for 2nd plane", OFFSET(matrix_str[1]), AV_OPT_TYPE_STRING, {.str="0 0 0 0 1 0 0 0 0"}, 0, 0, FLAGS },
> +    { "2m", "set matrix for 3rd plane", OFFSET(matrix_str[2]), AV_OPT_TYPE_STRING, {.str="0 0 0 0 1 0 0 0 0"}, 0, 0, FLAGS },
> +    { "3m", "set matrix for 4th plane", OFFSET(matrix_str[3]), AV_OPT_TYPE_STRING, {.str="0 0 0 0 1 0 0 0 0"}, 0, 0, FLAGS },
> +    { "0rdiv", "set rdiv for 1st plane", OFFSET(rdiv[0]), AV_OPT_TYPE_FLOAT, {.dbl=1.0}, 0.0, INT_MAX, FLAGS},
> +    { "1rdiv", "set rdiv for 2nd plane", OFFSET(rdiv[1]), AV_OPT_TYPE_FLOAT, {.dbl=1.0}, 0.0, INT_MAX, FLAGS},
> +    { "2rdiv", "set rdiv for 3rd plane", OFFSET(rdiv[2]), AV_OPT_TYPE_FLOAT, {.dbl=1.0}, 0.0, INT_MAX, FLAGS},
> +    { "3rdiv", "set rdiv for 4th plane", OFFSET(rdiv[3]), AV_OPT_TYPE_FLOAT, {.dbl=1.0}, 0.0, INT_MAX, FLAGS},
> +    { "0bias", "set bias for 1st plane", OFFSET(bias[0]), AV_OPT_TYPE_FLOAT, {.dbl=0.0}, 0.0, INT_MAX, FLAGS},
> +    { "1bias", "set bias for 2nd plane", OFFSET(bias[1]), AV_OPT_TYPE_FLOAT, {.dbl=0.0}, 0.0, INT_MAX, FLAGS},
> +    { "2bias", "set bias for 3rd plane", OFFSET(bias[2]), AV_OPT_TYPE_FLOAT, {.dbl=0.0}, 0.0, INT_MAX, FLAGS},
> +    { "3bias", "set bias for 4th plane", OFFSET(bias[3]), AV_OPT_TYPE_FLOAT, {.dbl=0.0}, 0.0, INT_MAX, FLAGS},
> +    { NULL }
> +};
> +
> +AVFILTER_DEFINE_CLASS(convolution_opencl);
> +
> +
> +
> +static const AVFilterPad convolution_opencl_inputs[] = {
> +    {
> +        .name         = "default",
> +        .type         = AVMEDIA_TYPE_VIDEO,
> +        .filter_frame = &convolution_opencl_filter_frame,
> +        .config_props = &ff_opencl_filter_config_input,
> +    },
> +    { NULL }
> +};
> +
> +static const AVFilterPad convolution_opencl_outputs[] = {
> +    {
> +        .name         = "default",
> +        .type         = AVMEDIA_TYPE_VIDEO,
> +        .config_props = &ff_opencl_filter_config_output,
> +    },
> +    { NULL }
> +};
> +
> +AVFilter ff_vf_convolution_opencl = {
> +    .name           = "convolution_opencl",
> +    .description    = NULL_IF_CONFIG_SMALL("Apply convolution mask to input video"),
> +    .priv_size      = sizeof(ConvolutionOpenCLContext),
> +    .priv_class     = &convolution_opencl_class,
> +    .init           = &ff_opencl_filter_init,
> +    .uninit         = &convolution_opencl_uninit,
> +    .query_formats  = &ff_opencl_filter_query_formats,
> +    .inputs         = convolution_opencl_inputs,
> +    .outputs        = convolution_opencl_outputs,
> +    .flags_internal = FF_FILTER_FLAG_HWFRAME_AWARE,
> +};
> +

Thanks,

- Mark

Patch hide | download patch | download mbox

diff --git a/configure b/configure
index 6916b45..7c79e20 100755
--- a/configure
+++ b/configure
@@ -3212,6 +3212,7 @@  bs2b_filter_deps="libbs2b"
 colormatrix_filter_deps="gpl"
 convolve_filter_deps="avcodec"
 convolve_filter_select="fft"
+convolution_opencl_filter_deps="opencl"
 coreimage_filter_deps="coreimage appkit"
 coreimage_filter_extralibs="-framework OpenGL"
 coreimagesrc_filter_deps="coreimage appkit"
diff --git a/libavfilter/Makefile b/libavfilter/Makefile
index 6a60836..d005934 100644
--- a/libavfilter/Makefile
+++ b/libavfilter/Makefile
@@ -156,6 +156,7 @@  OBJS-$(CONFIG_COLORLEVELS_FILTER)            += vf_colorlevels.o
 OBJS-$(CONFIG_COLORMATRIX_FILTER)            += vf_colormatrix.o
 OBJS-$(CONFIG_COLORSPACE_FILTER)             += vf_colorspace.o colorspacedsp.o
 OBJS-$(CONFIG_CONVOLUTION_FILTER)            += vf_convolution.o
+OBJS-$(CONFIG_CONVOLUTION_OPENCL_FILTER)     += vf_convolution_opencl.o opencl.o opencl/convolution.o
 OBJS-$(CONFIG_CONVOLVE_FILTER)               += vf_convolve.o framesync.o
 OBJS-$(CONFIG_COPY_FILTER)                   += vf_copy.o
 OBJS-$(CONFIG_COREIMAGE_FILTER)              += vf_coreimage.o
diff --git a/libavfilter/allfilters.c b/libavfilter/allfilters.c
index 9adb109..f2dc55e 100644
--- a/libavfilter/allfilters.c
+++ b/libavfilter/allfilters.c
@@ -166,6 +166,7 @@  static void register_all(void)
     REGISTER_FILTER(COLORMATRIX,    colormatrix,    vf);
     REGISTER_FILTER(COLORSPACE,     colorspace,     vf);
     REGISTER_FILTER(CONVOLUTION,    convolution,    vf);
+    REGISTER_FILTER(CONVOLUTION_OPENCL, convolution_opencl, vf);
     REGISTER_FILTER(CONVOLVE,       convolve,       vf);
     REGISTER_FILTER(COPY,           copy,           vf);
     REGISTER_FILTER(COREIMAGE,      coreimage,      vf);
diff --git a/libavfilter/opencl/convolution.cl b/libavfilter/opencl/convolution.cl
new file mode 100644
index 0000000..d074d98
--- /dev/null
+++ b/libavfilter/opencl/convolution.cl
@@ -0,0 +1,42 @@ 
+/*
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+__kernel void convolution_global(__write_only image2d_t dst,
+                                 __read_only  image2d_t src,
+                                 __constant int *coef_matrix_size,
+                                 __constant float *coef_matrix,
+                                 __constant float *div,
+                                 __constant float *bias)
+{
+    const sampler_t sampler = (CLK_NORMALIZED_COORDS_FALSE | CLK_FILTER_NEAREST);
+
+    int2 loc = (int2)(get_global_id(0), get_global_id(1));
+
+    float4 convPix = (float4)(0.0f, 0.0f, 0.0f, 0.0f);
+    for (int i = 0; i < 4; i++) {
+        for (int conv_i = -(coef_matrix_size[i] / 2); conv_i <= (coef_matrix_size[i] / 2); conv_i++) {
+            for (int conv_j = -(coef_matrix_size[i] / 2); conv_j <= (coef_matrix_size[i] / 2); conv_j++) {
+                float4 px = read_imagef(src, sampler, loc + (int2)(conv_j, conv_i));
+                convPix[i] += px[i] * coef_matrix[(coef_matrix_size[i]*coef_matrix_size[i]*i)
+                            + (conv_i+(coef_matrix_size[i] / 2)) * coef_matrix_size[i] + (conv_j+(coef_matrix_size[i] / 2))];
+            }
+        }
+        convPix[i] = convPix[i] * div[i] + bias[i];
+    }
+    write_imagef(dst, loc, convPix);
+}
diff --git a/libavfilter/opencl_source.h b/libavfilter/opencl_source.h
index 23cdfc6..3029f64 100644
--- a/libavfilter/opencl_source.h
+++ b/libavfilter/opencl_source.h
@@ -19,7 +19,10 @@ 
 #ifndef AVFILTER_OPENCL_SOURCE_H
 #define AVFILTER_OPENCL_SOURCE_H
 
+extern const char *ff_opencl_source_convolution;
 extern const char *ff_opencl_source_overlay;
 extern const char *ff_opencl_source_unsharp;
 
+
+
 #endif /* AVFILTER_OPENCL_SOURCE_H */
diff --git a/libavfilter/vf_convolution_opencl.c b/libavfilter/vf_convolution_opencl.c
new file mode 100644
index 0000000..60e2d1f
--- /dev/null
+++ b/libavfilter/vf_convolution_opencl.c
@@ -0,0 +1,464 @@ 
+/*
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include "libavutil/common.h"
+#include "libavutil/imgutils.h"
+#include "libavutil/mem.h"
+#include "libavutil/opt.h"
+#include "libavutil/pixdesc.h"
+#include "libavutil/avstring.h"
+
+
+#include "avfilter.h"
+#include "internal.h"
+#include "opencl.h"
+#include "opencl_source.h"
+#include "video.h"
+
+typedef struct ConvolutionOpenCLContext {
+    OpenCLFilterContext ocf;
+
+    int              initialised;
+    cl_kernel        kernel;
+    cl_command_queue command_queue;
+
+    char *matrix_str[4];
+    cl_int size[4];
+
+    cl_int total_matrix_len;
+    cl_int matrix_length[4];
+    float rdiv[4];
+    float bias[4];
+    cl_mem matrix;
+    cl_mem rdiv_buffer;
+    cl_mem bias_buffer;
+    cl_mem size_buffer;
+
+} ConvolutionOpenCLContext;
+
+
+
+static int convolution_opencl_init(AVFilterContext *avctx)
+{
+    ConvolutionOpenCLContext *ctx = avctx->priv;
+    cl_int cle;
+    int err;
+
+    err = ff_opencl_filter_load_program(avctx, &ff_opencl_source_convolution, 1);
+    if (err < 0)
+        goto fail;
+
+    ctx->command_queue = clCreateCommandQueue(ctx->ocf.hwctx->context,
+                                              ctx->ocf.hwctx->device_id,
+                                              0, &cle);
+    if (!ctx->command_queue) {
+        av_log(avctx, AV_LOG_ERROR, "Failed to create OpenCL "
+               "command queue: %d.\n", cle);
+        err = AVERROR(EIO);
+        goto fail;
+    }
+
+    ctx->kernel = clCreateKernel(ctx->ocf.program, "convolution_global", &cle);
+    if (!ctx->kernel) {
+        av_log(avctx, AV_LOG_ERROR, "Failed to create kernel: %d.\n", cle);
+        err = AVERROR(EIO);
+        goto fail;
+    }
+
+    ctx->initialised = 1;
+    return 0;
+
+fail:
+    if (ctx->command_queue)
+        clReleaseCommandQueue(ctx->command_queue);
+    if (ctx->kernel)
+        clReleaseKernel(ctx->kernel);
+    return err;
+}
+
+
+
+static int convolution_opencl_make_filter_params(AVFilterContext *avctx)
+{
+    ConvolutionOpenCLContext *ctx = avctx->priv;
+    float *matrix;
+    float *matrix_rdiv;
+    float *matrix_bias;
+    int   *matrix_size;
+    size_t matrix_bytes;
+    cl_mem buffer, buffer_rdiv, buffer_bias, buffer_size;
+    cl_int cle;
+    int err, sscanf_err;
+    int i, j, cnt;
+    char *p, *arg, *saveptr = NULL;
+    float input_matrix[4][49];
+
+    ctx->total_matrix_len = 0;
+    for (i = 0; i < 4; i++) {
+        p = ctx->matrix_str[i];
+        while (ctx->matrix_length[i] < 49) {
+            if (!(arg = av_strtok(p, " ", &saveptr)))
+                break;
+            p = NULL;
+            sscanf_err = sscanf(arg, "%f", &input_matrix[i][ctx->matrix_length[i]]);
+            if (!sscanf_err) {
+                av_log(ctx, AV_LOG_ERROR, "Matrix is sequence of 9, 25 or 49 signed numbers\n");
+                return AVERROR(EINVAL);
+            }
+            ctx->matrix_length[i]++;
+        }
+        if (ctx->matrix_length[i] == 9) {
+            ctx->size[i] = 3;
+        } else if (ctx->matrix_length[i] == 25) {
+                ctx->size[i] = 5;
+        } else if (ctx->matrix_length[i] == 49) {
+                ctx->size[i] = 7;
+        } else {
+            av_log(ctx, AV_LOG_ERROR, "Invalid matrix size:%d\n", ctx->matrix_length[i]);
+            return AVERROR(EINVAL);
+        }
+        ctx->total_matrix_len += ctx->matrix_length[i];
+
+    }
+    matrix_bytes = sizeof(float)*ctx->total_matrix_len;
+    matrix = av_malloc(matrix_bytes);
+    if (!matrix) {
+        err = AVERROR(ENOMEM);
+        goto fail;
+    }
+    cnt = 0;
+    for (i = 0; i < 4; i++) {
+        for (j = 0; j < ctx->matrix_length[i]; j++) {
+            matrix[cnt++] = input_matrix[i][j];
+        }
+    }
+
+    buffer = clCreateBuffer(ctx->ocf.hwctx->context,
+                            CL_MEM_READ_ONLY |
+                            CL_MEM_COPY_HOST_PTR |
+                            CL_MEM_HOST_NO_ACCESS,
+                            matrix_bytes, matrix, &cle);
+    if (!buffer) {
+        av_log(avctx, AV_LOG_ERROR, "Failed to create matrix buffer: "
+               "%d.\n", cle);
+        err = AVERROR(EIO);
+        goto fail;
+    }
+    ctx->matrix = buffer;
+
+
+    matrix_bytes = sizeof(float)*4;
+    matrix_rdiv = av_malloc(matrix_bytes);
+    if (!matrix_rdiv) {
+        err = AVERROR(ENOMEM);
+        av_freep(&matrix_rdiv);
+        goto fail;
+    }
+
+    for (j = 0; j < 4; j++) {
+        matrix_rdiv[j] = ctx->rdiv[j];
+    }
+    buffer_rdiv = clCreateBuffer(ctx->ocf.hwctx->context,
+                            CL_MEM_READ_ONLY |
+                            CL_MEM_COPY_HOST_PTR |
+                            CL_MEM_HOST_NO_ACCESS,
+                            matrix_bytes, matrix_rdiv, &cle);
+    if (!buffer_rdiv) {
+        av_log(avctx, AV_LOG_ERROR, "Failed to create rdiv buffer: "
+               "%d.\n", cle);
+        err = AVERROR(EIO);
+        av_freep(&matrix_rdiv);
+        goto fail;
+    }
+    ctx->rdiv_buffer = buffer_rdiv;
+
+
+    matrix_bytes = sizeof(float)*4;
+    matrix_bias = av_malloc(matrix_bytes);
+    if (!matrix_bias) {
+        err = AVERROR(ENOMEM);
+        av_freep(&matrix_rdiv);
+        av_freep(&matrix_bias);
+        goto fail;
+    }
+
+    for (j = 0; j < 4; j++) {
+        matrix_bias[j] = ctx->bias[j];
+    }
+    buffer_bias = clCreateBuffer(ctx->ocf.hwctx->context,
+                            CL_MEM_READ_ONLY |
+                            CL_MEM_COPY_HOST_PTR |
+                            CL_MEM_HOST_NO_ACCESS,
+                            matrix_bytes, matrix_bias, &cle);
+    if (!buffer_bias) {
+        av_log(avctx, AV_LOG_ERROR, "Failed to create bias buffer: "
+               "%d.\n", cle);
+        err = AVERROR(EIO);
+        av_freep(&matrix_rdiv);
+        av_freep(&matrix_bias);
+        goto fail;
+    }
+    ctx->bias_buffer = buffer_bias;
+
+    matrix_bytes = sizeof(int)*4;
+    matrix_size = av_malloc(matrix_bytes);
+    if (!matrix_size) {
+        err = AVERROR(ENOMEM);
+        av_freep(&matrix_rdiv);
+        av_freep(&matrix_bias);
+        av_freep(&matrix_size);
+        goto fail;
+    }
+
+    for (j = 0; j < 4; j++) {
+        matrix_size[j] = ctx->size[j];
+    }
+    buffer_size = clCreateBuffer(ctx->ocf.hwctx->context,
+                            CL_MEM_READ_ONLY |
+                            CL_MEM_COPY_HOST_PTR |
+                            CL_MEM_HOST_NO_ACCESS,
+                            matrix_bytes, matrix_size, &cle);
+    if (!buffer_size) {
+        av_log(avctx, AV_LOG_ERROR, "Failed to create bias buffer: "
+               "%d.\n", cle);
+        err = AVERROR(EIO);
+        av_freep(&matrix_rdiv);
+        av_freep(&matrix_bias);
+        av_freep(&matrix_size);
+        goto fail;
+    }
+    ctx->size_buffer = buffer_size;
+
+
+
+
+    err = 0;
+fail:
+    av_freep(&matrix);
+    return err;
+}
+
+static int convolution_opencl_filter_frame(AVFilterLink *inlink, AVFrame *input)
+{
+    AVFilterContext    *avctx = inlink->dst;
+    AVFilterLink     *outlink = avctx->outputs[0];
+    ConvolutionOpenCLContext *ctx = avctx->priv;
+    AVFrame *output = NULL;
+    cl_int cle;
+    size_t global_work[2];
+    cl_mem src, dst;
+    int err, p;
+
+    av_log(ctx, AV_LOG_DEBUG, "Filter input: %s, %ux%u (%"PRId64").\n",
+           av_get_pix_fmt_name(input->format),
+           input->width, input->height, input->pts);
+
+    if (!input->hw_frames_ctx)
+        return AVERROR(EINVAL);
+
+    if (!ctx->initialised) {
+        err = convolution_opencl_init(avctx);
+        if (err < 0)
+            goto fail;
+
+        err = convolution_opencl_make_filter_params(avctx);
+        if (err < 0)
+            goto fail;
+    }
+
+    output = ff_get_video_buffer(outlink, outlink->w, outlink->h);
+    if (!output) {
+        err = AVERROR(ENOMEM);
+        goto fail;
+    }
+
+    for (p = 0; p < FF_ARRAY_ELEMS(output->data); p++) {
+        src = (cl_mem) input->data[p];
+        dst = (cl_mem)output->data[p];
+
+
+        if (!dst)
+            break;
+
+        cle = clSetKernelArg(ctx->kernel, 0, sizeof(cl_mem), &dst);
+        if (cle != CL_SUCCESS) {
+            av_log(avctx, AV_LOG_ERROR, "Failed to set kernel "
+                   "destination image argument: %d.\n", cle);
+            goto fail;
+        }
+        cle = clSetKernelArg(ctx->kernel, 1, sizeof(cl_mem), &src);
+        if (cle != CL_SUCCESS) {
+            av_log(avctx, AV_LOG_ERROR, "Failed to set kernel "
+                   "source image argument: %d.\n", cle);
+            goto fail;
+        }
+        cle = clSetKernelArg(ctx->kernel, 2, sizeof(cl_mem), &ctx->size_buffer);
+        if (cle != CL_SUCCESS) {
+            av_log(avctx, AV_LOG_ERROR, "Failed to set kernel "
+                   "matrix size argument: %d.\n", cle);
+            goto fail;
+        }
+        cle = clSetKernelArg(ctx->kernel, 3, sizeof(cl_mem), &ctx->matrix);
+        if (cle != CL_SUCCESS) {
+            av_log(avctx, AV_LOG_ERROR, "Failed to set kernel "
+                   "matrix argument: %d.\n", cle);
+            goto fail;
+        }
+        cle = clSetKernelArg(ctx->kernel, 4, sizeof(cl_mem), &ctx->rdiv_buffer);
+        if (cle != CL_SUCCESS) {
+            av_log(avctx, AV_LOG_ERROR, "Failed to set kernel "
+                   "div argument: %d.\n", cle);
+            goto fail;
+        }
+        cle = clSetKernelArg(ctx->kernel, 5, sizeof(cl_mem), &ctx->bias_buffer);
+        if (cle != CL_SUCCESS) {
+            av_log(avctx, AV_LOG_ERROR, "Failed to set kernel "
+                   "bias argument: %d.\n", cle);
+            goto fail;
+        }
+
+
+
+        global_work[0] = output->width;
+        global_work[1] = output->height;
+
+
+        av_log(avctx, AV_LOG_DEBUG, "Run kernel on plane %d "
+               "(%"SIZE_SPECIFIER"x%"SIZE_SPECIFIER").\n",
+               p, global_work[0], global_work[1]);
+
+        cle = clEnqueueNDRangeKernel(ctx->command_queue, ctx->kernel, 2, NULL,
+                                     global_work, NULL,
+                                     0, NULL, NULL);
+        if (cle != CL_SUCCESS) {
+            av_log(avctx, AV_LOG_ERROR, "Failed to enqueue kernel: %d.\n",
+                   cle);
+            err = AVERROR(EIO);
+            goto fail;
+        }
+    }
+
+    cle = clFinish(ctx->command_queue);
+    if (cle != CL_SUCCESS) {
+        av_log(avctx, AV_LOG_ERROR, "Failed to finish command queue: %d.\n",
+               cle);
+        err = AVERROR(EIO);
+        goto fail;
+    }
+
+    err = av_frame_copy_props(output, input);
+    if (err < 0)
+        goto fail;
+
+    av_frame_free(&input);
+
+    av_log(ctx, AV_LOG_DEBUG, "Filter output: %s, %ux%u (%"PRId64").\n",
+           av_get_pix_fmt_name(output->format),
+           output->width, output->height, output->pts);
+
+    return ff_filter_frame(outlink, output);
+
+fail:
+    clFinish(ctx->command_queue);
+    av_frame_free(&input);
+    av_frame_free(&output);
+    return err;
+}
+
+static av_cold void convolution_opencl_uninit(AVFilterContext *avctx)
+{
+    ConvolutionOpenCLContext *ctx = avctx->priv;
+    cl_int cle;
+
+    if (ctx->matrix) {
+        clReleaseMemObject(ctx->matrix);
+    }
+
+    if (ctx->kernel) {
+        cle = clReleaseKernel(ctx->kernel);
+        if (cle != CL_SUCCESS)
+            av_log(avctx, AV_LOG_ERROR, "Failed to release "
+                   "kernel: %d.\n", cle);
+    }
+
+    if (ctx->command_queue) {
+        cle = clReleaseCommandQueue(ctx->command_queue);
+        if (cle != CL_SUCCESS)
+            av_log(avctx, AV_LOG_ERROR, "Failed to release "
+                   "command queue: %d.\n", cle);
+    }
+
+    ff_opencl_filter_uninit(avctx);
+}
+
+#define OFFSET(x) offsetof(ConvolutionOpenCLContext, x)
+#define FLAGS (AV_OPT_FLAG_FILTERING_PARAM | AV_OPT_FLAG_VIDEO_PARAM)
+static const AVOption convolution_opencl_options[] = {
+    { "0m", "set matrix for 1st plane", OFFSET(matrix_str[0]), AV_OPT_TYPE_STRING, {.str="0 0 0 0 1 0 0 0 0"}, 0, 0, FLAGS },
+    { "1m", "set matrix for 2nd plane", OFFSET(matrix_str[1]), AV_OPT_TYPE_STRING, {.str="0 0 0 0 1 0 0 0 0"}, 0, 0, FLAGS },
+    { "2m", "set matrix for 3rd plane", OFFSET(matrix_str[2]), AV_OPT_TYPE_STRING, {.str="0 0 0 0 1 0 0 0 0"}, 0, 0, FLAGS },
+    { "3m", "set matrix for 4th plane", OFFSET(matrix_str[3]), AV_OPT_TYPE_STRING, {.str="0 0 0 0 1 0 0 0 0"}, 0, 0, FLAGS },
+    { "0rdiv", "set rdiv for 1st plane", OFFSET(rdiv[0]), AV_OPT_TYPE_FLOAT, {.dbl=1.0}, 0.0, INT_MAX, FLAGS},
+    { "1rdiv", "set rdiv for 2nd plane", OFFSET(rdiv[1]), AV_OPT_TYPE_FLOAT, {.dbl=1.0}, 0.0, INT_MAX, FLAGS},
+    { "2rdiv", "set rdiv for 3rd plane", OFFSET(rdiv[2]), AV_OPT_TYPE_FLOAT, {.dbl=1.0}, 0.0, INT_MAX, FLAGS},
+    { "3rdiv", "set rdiv for 4th plane", OFFSET(rdiv[3]), AV_OPT_TYPE_FLOAT, {.dbl=1.0}, 0.0, INT_MAX, FLAGS},
+    { "0bias", "set bias for 1st plane", OFFSET(bias[0]), AV_OPT_TYPE_FLOAT, {.dbl=0.0}, 0.0, INT_MAX, FLAGS},
+    { "1bias", "set bias for 2nd plane", OFFSET(bias[1]), AV_OPT_TYPE_FLOAT, {.dbl=0.0}, 0.0, INT_MAX, FLAGS},
+    { "2bias", "set bias for 3rd plane", OFFSET(bias[2]), AV_OPT_TYPE_FLOAT, {.dbl=0.0}, 0.0, INT_MAX, FLAGS},
+    { "3bias", "set bias for 4th plane", OFFSET(bias[3]), AV_OPT_TYPE_FLOAT, {.dbl=0.0}, 0.0, INT_MAX, FLAGS},
+    { NULL }
+};
+
+AVFILTER_DEFINE_CLASS(convolution_opencl);
+
+
+
+static const AVFilterPad convolution_opencl_inputs[] = {
+    {
+        .name         = "default",
+        .type         = AVMEDIA_TYPE_VIDEO,
+        .filter_frame = &convolution_opencl_filter_frame,
+        .config_props = &ff_opencl_filter_config_input,
+    },
+    { NULL }
+};
+
+static const AVFilterPad convolution_opencl_outputs[] = {
+    {
+        .name         = "default",
+        .type         = AVMEDIA_TYPE_VIDEO,
+        .config_props = &ff_opencl_filter_config_output,
+    },
+    { NULL }
+};
+
+AVFilter ff_vf_convolution_opencl = {
+    .name           = "convolution_opencl",
+    .description    = NULL_IF_CONFIG_SMALL("Apply convolution mask to input video"),
+    .priv_size      = sizeof(ConvolutionOpenCLContext),
+    .priv_class     = &convolution_opencl_class,
+    .init           = &ff_opencl_filter_init,
+    .uninit         = &convolution_opencl_uninit,
+    .query_formats  = &ff_opencl_filter_query_formats,
+    .inputs         = convolution_opencl_inputs,
+    .outputs        = convolution_opencl_outputs,
+    .flags_internal = FF_FILTER_FLAG_HWFRAME_AWARE,
+};
+
+
+