diff mbox

[FFmpeg-devel] lavfi: add sobel, prewitt, roberts filters

Message ID 1529886230-7458-1-git-send-email-danyaschenko@gmail.com
State Superseded
Headers show

Commit Message

Danil Iashchenko June 25, 2018, 12:23 a.m. UTC
Add opencl version of sobel, prewitt, roberts filters.
---
 configure                           |   3 +
 libavfilter/Makefile                |   8 +-
 libavfilter/allfilters.c            |   3 +
 libavfilter/opencl/convolution.cl   |  82 ++++++++++
 libavfilter/vf_convolution_opencl.c | 306 ++++++++++++++++++++++++++++++------
 5 files changed, 353 insertions(+), 49 deletions(-)

Comments

Marton Balint June 25, 2018, 8:41 a.m. UTC | #1
On Mon, 25 Jun 2018, Danil Iashchenko wrote:

> Add opencl version of sobel, prewitt, roberts filters.
> ---
> configure                           |   3 +
> libavfilter/Makefile                |   8 +-
> libavfilter/allfilters.c            |   3 +
> libavfilter/opencl/convolution.cl   |  82 ++++++++++
> libavfilter/vf_convolution_opencl.c | 306 ++++++++++++++++++++++++++++++------
> 5 files changed, 353 insertions(+), 49 deletions(-)
>

Please add proper documentation as well.

Thanks,
Marton
Mark Thompson June 28, 2018, 5:13 p.m. UTC | #2
On 25/06/18 01:23, Danil Iashchenko wrote:
> Add opencl version of sobel, prewitt, roberts filters.
> ---
>  configure                           |   3 +
>  libavfilter/Makefile                |   8 +-
>  libavfilter/allfilters.c            |   3 +
>  libavfilter/opencl/convolution.cl   |  82 ++++++++++
>  libavfilter/vf_convolution_opencl.c | 306 ++++++++++++++++++++++++++++++------
>  5 files changed, 353 insertions(+), 49 deletions(-)
> 
> ...
> diff --git a/libavfilter/opencl/convolution.cl b/libavfilter/opencl/convolution.cl
> index 03ef4ef..a2ddeba 100644
> --- a/libavfilter/opencl/convolution.cl
> +++ b/libavfilter/opencl/convolution.cl
> @@ -43,3 +43,85 @@ __kernel void convolution_global(__write_only image2d_t dst,
>       float4 dstPix = convPix * div + bias;
>       write_imagef(dst, loc, dstPix);
>  }
> +
> +
> +__kernel void sobel_global(__write_only image2d_t dst,
> +                           __read_only  image2d_t src,
> +                             float div,
> +                             float bias)
> +{
> +    const sampler_t sampler = (CLK_NORMALIZED_COORDS_FALSE |
> +                               CLK_ADDRESS_CLAMP_TO_EDGE   |
> +                               CLK_FILTER_NEAREST);
> +
> +    int2 loc = (int2)(get_global_id(0), get_global_id(1));
> +
> +    float4 sum1 = read_imagef(src, sampler, loc + (int2)(-1,-1)) * -1 +
> +                  read_imagef(src, sampler, loc + (int2)( 0,-1)) * -2 +
> +                  read_imagef(src, sampler, loc + (int2)( 1,-1)) * -1 +
> +                  read_imagef(src, sampler, loc + (int2)(-1, 1)) *  1 +
> +                  read_imagef(src, sampler, loc + (int2)( 0, 1)) *  2 +
> +                  read_imagef(src, sampler, loc + (int2)( 1, 1)) *  1;
> +
> +    float4 sum2 = read_imagef(src, sampler, loc + (int2)(-1,-1)) * -1 +
> +                  read_imagef(src, sampler, loc + (int2)(-1, 0)) * -2 +
> +                  read_imagef(src, sampler, loc + (int2)(-1, 1)) * -1 +
> +                  read_imagef(src, sampler, loc + (int2)( 1,-1)) *  1 +
> +                  read_imagef(src, sampler, loc + (int2)( 1, 0)) *  2 +
> +                  read_imagef(src, sampler, loc + (int2)( 1, 1)) *  1;
> +
> +    float4 dstPix = (sqrt(sum1*sum1 + sum2*sum2)) * div + bias;

                        ^ hypot(sum1, sum2) ?

> +    write_imagef(dst, loc, dstPix);
> +}
> +
> +__kernel void prewitt_global(__write_only image2d_t dst,
> +                             __read_only  image2d_t src,
> +                             float div,
> +                             float bias)
> +{
> +    const sampler_t sampler = (CLK_NORMALIZED_COORDS_FALSE |
> +                               CLK_ADDRESS_CLAMP_TO_EDGE   |
> +                               CLK_FILTER_NEAREST);
> +
> +    int2 loc = (int2)(get_global_id(0), get_global_id(1));
> +
> +    float4 sum1 = read_imagef(src, sampler, loc + (int2)(-1,-1)) *  1 +
> +                  read_imagef(src, sampler, loc + (int2)( 0,-1)) *  1 +
> +                  read_imagef(src, sampler, loc + (int2)( 1,-1)) *  1 +
> +                  read_imagef(src, sampler, loc + (int2)(-1, 1)) * -1 +
> +                  read_imagef(src, sampler, loc + (int2)( 0, 1)) * -1 +
> +                  read_imagef(src, sampler, loc + (int2)( 1, 1)) * -1;
> +
> +    float4 sum2 = read_imagef(src, sampler, loc + (int2)(-1,-1)) *  1 +
> +                  read_imagef(src, sampler, loc + (int2)(-1, 0)) *  1 +
> +                  read_imagef(src, sampler, loc + (int2)(-1, 1)) *  1 +
> +                  read_imagef(src, sampler, loc + (int2)( 1,-1)) * -1 +
> +                  read_imagef(src, sampler, loc + (int2)( 1, 0)) * -1 +
> +                  read_imagef(src, sampler, loc + (int2)( 1, 1)) * -1;
> +
> +    float4 dstPix = (sqrt(sum1*sum1 + sum2*sum2)) * div + bias;

Also here, and below.

> +    write_imagef(dst, loc, dstPix);
> +}
> +
> +__kernel void roberts_global(__write_only image2d_t dst,
> +                             __read_only  image2d_t src,
> +                             float div,
> +                             float bias)
> +{
> +    const sampler_t sampler = (CLK_NORMALIZED_COORDS_FALSE |
> +                               CLK_ADDRESS_CLAMP_TO_EDGE   |
> +                               CLK_FILTER_NEAREST);
> +
> +    int2 loc = (int2)(get_global_id(0), get_global_id(1));
> +
> +    float4 sum1 = read_imagef(src, sampler, loc + (int2)(-1,-1)) *  1 +
> +                  read_imagef(src, sampler, loc + (int2)( 0,-1)) * -1;
> +
> +
> +    float4 sum2 = read_imagef(src, sampler, loc + (int2)(-1, 0)) * -1 +
> +                  read_imagef(src, sampler, loc + (int2)( 0, 0)) *  1;
> +
> +
> +    float4 dstPix = (sqrt(sum1*sum1 + sum2*sum2)) * div + bias;
> +    write_imagef(dst, loc, dstPix);
> +}
> diff --git a/libavfilter/vf_convolution_opencl.c b/libavfilter/vf_convolution_opencl.c
> index 4d0ecf8..8d12191 100644
> --- a/libavfilter/vf_convolution_opencl.c
> +++ b/libavfilter/vf_convolution_opencl.c
> @@ -36,7 +36,7 @@ typedef struct ConvolutionOpenCLContext {
>      OpenCLFilterContext ocf;
>  
>      int              initialised;
> -    cl_kernel        kernel;
> +    cl_kernel        kernel, kernel_sobel, kernel_prewitt, kernel_roberts;
>      cl_command_queue command_queue;
>  
>      char *matrix_str[4];
> @@ -47,8 +47,11 @@ typedef struct ConvolutionOpenCLContext {
>      cl_float rdivs[4];
>      cl_float biases[4];
>  
> -} ConvolutionOpenCLContext;
> +    cl_int planes;
> +    cl_float scale;
> +    cl_float delta;
>  
> +} ConvolutionOpenCLContext;
>  
>  static int convolution_opencl_init(AVFilterContext *avctx)
>  {
> @@ -76,6 +79,24 @@ static int convolution_opencl_init(AVFilterContext *avctx)
>          err = AVERROR(EIO);
>          goto fail;
>      }
> +    ctx->kernel_sobel = clCreateKernel(ctx->ocf.program, "sobel_global", &cle);
> +    if (!ctx->kernel_sobel) {
> +        av_log(avctx, AV_LOG_ERROR, "Failed to create kernel: %d.\n", cle);
> +        err = AVERROR(EIO);
> +        goto fail;
> +    }
> +    ctx->kernel_prewitt = clCreateKernel(ctx->ocf.program, "prewitt_global", &cle);
> +    if (!ctx->kernel_prewitt) {
> +        av_log(avctx, AV_LOG_ERROR, "Failed to create kernel: %d.\n", cle);
> +        err = AVERROR(EIO);
> +        goto fail;
> +    }
> +    ctx->kernel_roberts = clCreateKernel(ctx->ocf.program, "roberts_global", &cle);
> +    if (!ctx->kernel_roberts) {
> +        av_log(avctx, AV_LOG_ERROR, "Failed to create kernel: %d.\n", cle);
> +        err = AVERROR(EIO);
> +        goto fail;
> +    }

I think you shouldn't need to make all of these - only one kernel is ever used for the filter, so you should be able to pick the right one here?

>  
>      ctx->initialised = 1;
>      return 0;
> @@ -85,6 +106,12 @@ fail:
>          clReleaseCommandQueue(ctx->command_queue);
>      if (ctx->kernel)
>          clReleaseKernel(ctx->kernel);
> +    if (ctx->kernel_sobel)
> +        clReleaseKernel(ctx->kernel_sobel);
> +    if (ctx->kernel_prewitt)
> +        clReleaseKernel(ctx->kernel_prewitt);
> +    if (ctx->kernel_roberts)
> +        clReleaseKernel(ctx->kernel_roberts);
>      return err;
>  }
>  
> @@ -163,6 +190,16 @@ static int convolution_opencl_make_filter_params(AVFilterContext *avctx)
>      return 0;
>  }
>  
> +static int filters_opencl_make_filter_params(AVFilterContext *avctx)
> +{
> +    ConvolutionOpenCLContext *ctx = avctx->priv;
> +
> +    ctx->delta /= 255.0;
> +
> +    return 0;
> +}

I don't think it's worth making this a separate function.

> +
> +
>  static int convolution_opencl_filter_frame(AVFilterLink *inlink, AVFrame *input)
>  {
>      AVFilterContext *avctx = inlink->dst;
> @@ -170,9 +207,12 @@ static int convolution_opencl_filter_frame(AVFilterLink *inlink, AVFrame *input)
>      ConvolutionOpenCLContext *ctx = avctx->priv;
>      AVFrame *output = NULL;
>      cl_int cle;
> -    size_t global_work[2];
> +    size_t global_work[2], width, height;
>      cl_mem src, dst;
> +    cl_kernel cur_kernel;
>      int err, p;
> +    size_t origin[3] = {0, 0, 0};
> +    size_t region[3] = {0, 0, 1};
>  
>      av_log(ctx, AV_LOG_DEBUG, "Filter input: %s, %ux%u (%"PRId64").\n",
>             av_get_pix_fmt_name(input->format),
> @@ -186,9 +226,16 @@ static int convolution_opencl_filter_frame(AVFilterLink *inlink, AVFrame *input)
>          if (err < 0)
>              goto fail;
>  
> -        err = convolution_opencl_make_filter_params(avctx);
> -        if (err < 0)
> -            goto fail;
> +        if (!strcmp(avctx->filter->name, "convolution_opencl")) {
> +            err = convolution_opencl_make_filter_params(avctx);
> +            if (err < 0)
> +                goto fail;
> +        } else {
> +            err = filters_opencl_make_filter_params(avctx);
> +            if (err < 0)
> +                goto fail;
> +        }
> +
>      }
>  
>      output = ff_get_video_buffer(outlink, outlink->w, outlink->h);
> @@ -198,35 +245,97 @@ static int convolution_opencl_filter_frame(AVFilterLink *inlink, AVFrame *input)
>      }
>  
>      for (p = 0; p < FF_ARRAY_ELEMS(output->data); p++) {
> -        src = (cl_mem) input->data[p];
> -        dst = (cl_mem)output->data[p];
> +        src  = (cl_mem) input->data[p];
> +        dst  = (cl_mem) output->data[p];
>  
>          if (!dst)
>              break;
>  
> -        CL_SET_KERNEL_ARG(ctx->kernel, 0, cl_mem,   &dst);
> -        CL_SET_KERNEL_ARG(ctx->kernel, 1, cl_mem,   &src);
> -        CL_SET_KERNEL_ARG(ctx->kernel, 2, cl_int,   &ctx->dims[p]);
> -        CL_SET_KERNEL_ARG(ctx->kernel, 3, cl_mem,   &ctx->matrix[p]);
> -        CL_SET_KERNEL_ARG(ctx->kernel, 4, cl_float, &ctx->rdivs[p]);
> -        CL_SET_KERNEL_ARG(ctx->kernel, 5, cl_float, &ctx->biases[p]);
> -
> -        err = ff_opencl_filter_work_size_from_image(avctx, global_work, output, p, 0);
> -        if (err < 0)
> -            goto fail;
> -
> -        av_log(avctx, AV_LOG_DEBUG, "Run kernel on plane %d "
> -               "(%"SIZE_SPECIFIER"x%"SIZE_SPECIFIER").\n",
> -               p, global_work[0], global_work[1]);
> -
> -        cle = clEnqueueNDRangeKernel(ctx->command_queue, ctx->kernel, 2, NULL,
> -                                     global_work, NULL,
> -                                     0, NULL, NULL);
> -        if (cle != CL_SUCCESS) {
> -            av_log(avctx, AV_LOG_ERROR, "Failed to enqueue kernel: %d.\n",
> -                   cle);
> -            err = AVERROR(EIO);
> -            goto fail;
> +        if (!strcmp(avctx->filter->name, "convolution_opencl")) {
> +            CL_SET_KERNEL_ARG(ctx->kernel, 0, cl_mem,   &dst);
> +            CL_SET_KERNEL_ARG(ctx->kernel, 1, cl_mem,   &src);
> +            CL_SET_KERNEL_ARG(ctx->kernel, 2, cl_int,   &ctx->dims[p]);
> +            CL_SET_KERNEL_ARG(ctx->kernel, 3, cl_mem,   &ctx->matrix[p]);
> +            CL_SET_KERNEL_ARG(ctx->kernel, 4, cl_float, &ctx->rdivs[p]);
> +            CL_SET_KERNEL_ARG(ctx->kernel, 5, cl_float, &ctx->biases[p]);
> +
> +            err = ff_opencl_filter_work_size_from_image(avctx, global_work, output, p, 0);
> +            if (err < 0)
> +                goto fail;
> +
> +            av_log(avctx, AV_LOG_DEBUG, "Run kernel on plane %d "
> +                   "(%"SIZE_SPECIFIER"x%"SIZE_SPECIFIER").\n",
> +                   p, global_work[0], global_work[1]);
> +
> +            cle = clEnqueueNDRangeKernel(ctx->command_queue, ctx->kernel, 2, NULL,
> +                                         global_work, NULL,
> +                                         0, NULL, NULL);
> +            if (cle != CL_SUCCESS) {
> +                av_log(avctx, AV_LOG_ERROR, "Failed to enqueue kernel: %d.\n",
> +                       cle);
> +                err = AVERROR(EIO);
> +                goto fail;
> +            }
> +        } else {
> +            if (!(ctx->planes & (1 << p))) {
> +                cle = clGetImageInfo(src, CL_IMAGE_WIDTH,  sizeof(size_t),
> +                                     &width, NULL);
> +                if (cle != CL_SUCCESS) {
> +                    av_log(avctx, AV_LOG_ERROR, "Failed to query plane %d width: %d.\n",
> +                           p, cle);
> +                    err = AVERROR_UNKNOWN;
> +                    goto fail;
> +                }
> +
> +                cle = clGetImageInfo(src, CL_IMAGE_HEIGHT, sizeof(size_t),
> +                                     &height, NULL);
> +                if (cle != CL_SUCCESS) {
> +                    av_log(avctx, AV_LOG_ERROR, "Failed to query plane %d height: %d.\n",
> +                           p, cle);
> +                    err = AVERROR_UNKNOWN;
> +                    goto fail;
> +                }
> +                region[0] = width;
> +                region[1] = height;

I think you could reuse ff_opencl_filter_work_size_from_image() to simplify this?  (Use region as the work_size argument, then set region[2] to 1.)

> +
> +                cle = clEnqueueCopyImage(ctx->command_queue, src, dst, origin, origin, region, 0, NULL, NULL);
> +                if (cle != CL_SUCCESS) {
> +                    av_log(avctx, AV_LOG_ERROR, "Failed to copy plane %d: %d.\n",
> +                           p, cle);
> +                    err = AVERROR(EIO);
> +                    goto fail;
> +                }
> +            } else {
> +                if (!strcmp(avctx->filter->name, "sobel_opencl")) {
> +                    cur_kernel = ctx->kernel_sobel;
> +                } else if (!strcmp(avctx->filter->name, "prewitt_opencl")){
> +                    cur_kernel = ctx->kernel_prewitt;
> +                } else if (!strcmp(avctx->filter->name, "roberts_opencl")){
> +                    cur_kernel = ctx->kernel_roberts;
> +                }
> +                CL_SET_KERNEL_ARG(cur_kernel, 0, cl_mem,   &dst);
> +                CL_SET_KERNEL_ARG(cur_kernel, 1, cl_mem,   &src);
> +                CL_SET_KERNEL_ARG(cur_kernel, 2, cl_float, &ctx->scale);
> +                CL_SET_KERNEL_ARG(cur_kernel, 3, cl_float, &ctx->delta);
> +
> +                err = ff_opencl_filter_work_size_from_image(avctx, global_work, output, p, 0);
> +                if (err < 0)
> +                    goto fail;
> +
> +                av_log(avctx, AV_LOG_DEBUG, "Run kernel on plane %d "
> +                       "(%"SIZE_SPECIFIER"x%"SIZE_SPECIFIER").\n",
> +                       p, global_work[0], global_work[1]);
> +
> +                cle = clEnqueueNDRangeKernel(ctx->command_queue, cur_kernel, 2, NULL,
> +                                                         global_work, NULL,
> +                                                         0, NULL, NULL);
> +                if (cle != CL_SUCCESS) {
> +                    av_log(avctx, AV_LOG_ERROR, "Failed to enqueue kernel: %d.\n",
> +                           cle);
> +                    err = AVERROR(EIO);
> +                    goto fail;
> +                }
> +            }
>          }
>      }
>  
> @@ -273,6 +382,24 @@ static av_cold void convolution_opencl_uninit(AVFilterContext *avctx)
>              av_log(avctx, AV_LOG_ERROR, "Failed to release "
>                     "kernel: %d.\n", cle);
>      }
> +    if (ctx->kernel_sobel) {
> +        cle = clReleaseKernel(ctx->kernel_sobel);
> +        if (cle != CL_SUCCESS)
> +            av_log(avctx, AV_LOG_ERROR, "Failed to release "
> +                   "kernel: %d.\n", cle);
> +    }
> +    if (ctx->kernel_prewitt) {
> +        cle = clReleaseKernel(ctx->kernel_prewitt);
> +        if (cle != CL_SUCCESS)
> +            av_log(avctx, AV_LOG_ERROR, "Failed to release "
> +                   "kernel: %d.\n", cle);
> +    }
> +    if (ctx->kernel_roberts) {
> +        cle = clReleaseKernel(ctx->kernel_roberts);
> +        if (cle != CL_SUCCESS)
> +            av_log(avctx, AV_LOG_ERROR, "Failed to release "
> +                   "kernel: %d.\n", cle);
> +    }
>  
>      if (ctx->command_queue) {
>          cle = clReleaseCommandQueue(ctx->command_queue);
> @@ -284,8 +411,30 @@ static av_cold void convolution_opencl_uninit(AVFilterContext *avctx)
>      ff_opencl_filter_uninit(avctx);
>  }
>  
> +static const AVFilterPad convolution_opencl_inputs[] = {
> +    {
> +        .name         = "default",
> +        .type         = AVMEDIA_TYPE_VIDEO,
> +        .filter_frame = &convolution_opencl_filter_frame,
> +        .config_props = &ff_opencl_filter_config_input,
> +    },
> +    { NULL }
> +};
> +
> +static const AVFilterPad convolution_opencl_outputs[] = {
> +    {
> +        .name         = "default",
> +        .type         = AVMEDIA_TYPE_VIDEO,
> +        .config_props = &ff_opencl_filter_config_output,
> +    },
> +    { NULL }
> +};
> +
>  #define OFFSET(x) offsetof(ConvolutionOpenCLContext, x)
>  #define FLAGS (AV_OPT_FLAG_FILTERING_PARAM | AV_OPT_FLAG_VIDEO_PARAM)
> +
> +#if CONFIG_CONVOLUTION_OPENCL_FILTER
> +
>  static const AVOption convolution_opencl_options[] = {
>      { "0m", "set matrix for 2nd plane", OFFSET(matrix_str[0]), AV_OPT_TYPE_STRING, {.str="0 0 0 0 1 0 0 0 0"}, 0, 0, FLAGS },
>      { "1m", "set matrix for 2nd plane", OFFSET(matrix_str[1]), AV_OPT_TYPE_STRING, {.str="0 0 0 0 1 0 0 0 0"}, 0, 0, FLAGS },
> @@ -304,30 +453,63 @@ static const AVOption convolution_opencl_options[] = {
>  
>  AVFILTER_DEFINE_CLASS(convolution_opencl);
>  
> -static const AVFilterPad convolution_opencl_inputs[] = {
> -    {
> -        .name         = "default",
> -        .type         = AVMEDIA_TYPE_VIDEO,
> -        .filter_frame = &convolution_opencl_filter_frame,
> -        .config_props = &ff_opencl_filter_config_input,
> -    },
> +AVFilter ff_vf_convolution_opencl = {
> +    .name           = "convolution_opencl",
> +    .description    = NULL_IF_CONFIG_SMALL("Apply convolution mask to input video"),
> +    .priv_size      = sizeof(ConvolutionOpenCLContext),
> +    .priv_class     = &convolution_opencl_class,
> +    .init           = &ff_opencl_filter_init,
> +    .uninit         = &convolution_opencl_uninit,
> +    .query_formats  = &ff_opencl_filter_query_formats,
> +    .inputs         = convolution_opencl_inputs,
> +    .outputs        = convolution_opencl_outputs,
> +    .flags_internal = FF_FILTER_FLAG_HWFRAME_AWARE,
> +};
> +
> +#endif /* CONFIG_CONVOLUTION_OPENCL_FILTER */
> +
> +#if CONFIG_SOBEL_OPENCL_FILTER
> +
> +static const AVOption sobel_opencl_options[] = {
> +    { "planes", "set planes to filter", OFFSET(planes), AV_OPT_TYPE_INT,  {.i64=15}, 0, 15, FLAGS},
> +    { "scale",  "set scale",            OFFSET(scale), AV_OPT_TYPE_FLOAT, {.dbl=1.0}, 0.0,  65535, FLAGS},
> +    { "delta",  "set delta",            OFFSET(delta), AV_OPT_TYPE_FLOAT, {.dbl=0}, -65535, 65535, FLAGS},
>      { NULL }
>  };
>  
> -static const AVFilterPad convolution_opencl_outputs[] = {
> -    {
> -        .name         = "default",
> -        .type         = AVMEDIA_TYPE_VIDEO,
> -        .config_props = &ff_opencl_filter_config_output,
> -    },
> +AVFILTER_DEFINE_CLASS(sobel_opencl);
> +
> +AVFilter ff_vf_sobel_opencl = {
> +    .name           = "sobel_opencl",
> +    .description    = NULL_IF_CONFIG_SMALL("Apply sobel operator"),
> +    .priv_size      = sizeof(ConvolutionOpenCLContext),
> +    .priv_class     = &sobel_opencl_class,
> +    .init           = &ff_opencl_filter_init,
> +    .uninit         = &convolution_opencl_uninit,
> +    .query_formats  = &ff_opencl_filter_query_formats,
> +    .inputs         = convolution_opencl_inputs,
> +    .outputs        = convolution_opencl_outputs,
> +    .flags_internal = FF_FILTER_FLAG_HWFRAME_AWARE,
> +};
> +
> +#endif /* CONFIG_SOBEL_OPENCL_FILTER */
> +
> +#if CONFIG_PREWITT_OPENCL_FILTER
> +
> +static const AVOption prewitt_opencl_options[] = {
> +    { "planes", "set planes to filter", OFFSET(planes), AV_OPT_TYPE_INT,  {.i64=15}, 0, 15, FLAGS},
> +    { "scale",  "set scale",            OFFSET(scale), AV_OPT_TYPE_FLOAT, {.dbl=1.0}, 0.0,  65535, FLAGS},
> +    { "delta",  "set delta",            OFFSET(delta), AV_OPT_TYPE_FLOAT, {.dbl=0}, -65535, 65535, FLAGS},
>      { NULL }
>  };
>  
> -AVFilter ff_vf_convolution_opencl = {
> -    .name           = "convolution_opencl",
> -    .description    = NULL_IF_CONFIG_SMALL("Apply convolution mask to input video"),
> +AVFILTER_DEFINE_CLASS(prewitt_opencl);
> +
> +AVFilter ff_vf_prewitt_opencl = {
> +    .name           = "prewitt_opencl",
> +    .description    = NULL_IF_CONFIG_SMALL("Apply prewitt operator"),
>      .priv_size      = sizeof(ConvolutionOpenCLContext),
> -    .priv_class     = &convolution_opencl_class,
> +    .priv_class     = &prewitt_opencl_class,
>      .init           = &ff_opencl_filter_init,
>      .uninit         = &convolution_opencl_uninit,
>      .query_formats  = &ff_opencl_filter_query_formats,
> @@ -335,3 +517,31 @@ AVFilter ff_vf_convolution_opencl = {
>      .outputs        = convolution_opencl_outputs,
>      .flags_internal = FF_FILTER_FLAG_HWFRAME_AWARE,
>  };
> +
> +#endif /* CONFIG_PREWITT_OPENCL_FILTER */
> +
> +#if CONFIG_ROBERTS_OPENCL_FILTER
> +
> +static const AVOption roberts_opencl_options[] = {
> +    { "planes", "set planes to filter", OFFSET(planes), AV_OPT_TYPE_INT,  {.i64=15}, 0, 15, FLAGS},
> +    { "scale",  "set scale",            OFFSET(scale), AV_OPT_TYPE_FLOAT, {.dbl=1.0}, 0.0,  65535, FLAGS},
> +    { "delta",  "set delta",            OFFSET(delta), AV_OPT_TYPE_FLOAT, {.dbl=0}, -65535, 65535, FLAGS},
> +    { NULL }
> +};
> +
> +AVFILTER_DEFINE_CLASS(roberts_opencl);
> +
> +AVFilter ff_vf_roberts_opencl = {
> +    .name           = "roberts_opencl",
> +    .description    = NULL_IF_CONFIG_SMALL("Apply roberts operator"),
> +    .priv_size      = sizeof(ConvolutionOpenCLContext),
> +    .priv_class     = &roberts_opencl_class,
> +    .init           = &ff_opencl_filter_init,
> +    .uninit         = &convolution_opencl_uninit,
> +    .query_formats  = &ff_opencl_filter_query_formats,
> +    .inputs         = convolution_opencl_inputs,
> +    .outputs        = convolution_opencl_outputs,
> +    .flags_internal = FF_FILTER_FLAG_HWFRAME_AWARE,
> +};
> +
> +#endif /* CONFIG_ROBERTS_OPENCL_FILTER */
> 

I also did a bit of testing, everything else about this looks good.

Thanks,

- Mark
diff mbox

Patch

diff --git a/configure b/configure
index 6ad5ce8..2c6360d 100755
--- a/configure
+++ b/configure
@@ -3372,12 +3372,14 @@  perspective_filter_deps="gpl"
 phase_filter_deps="gpl"
 pp7_filter_deps="gpl"
 pp_filter_deps="gpl postproc"
+prewitt_opencl_filter_deps="opencl"
 procamp_vaapi_filter_deps="vaapi VAProcPipelineParameterBuffer"
 program_opencl_filter_deps="opencl"
 pullup_filter_deps="gpl"
 removelogo_filter_deps="avcodec avformat swscale"
 repeatfields_filter_deps="gpl"
 resample_filter_deps="avresample"
+roberts_opencl_filter_deps="opencl"
 rubberband_filter_deps="librubberband"
 sab_filter_deps="gpl swscale"
 scale2ref_filter_deps="swscale"
@@ -3396,6 +3398,7 @@  showspectrumpic_filter_deps="avcodec"
 showspectrumpic_filter_select="fft"
 signature_filter_deps="gpl avcodec avformat"
 smartblur_filter_deps="gpl swscale"
+sobel_opencl_filter_deps="opencl"
 sofalizer_filter_deps="libmysofa avcodec"
 sofalizer_filter_select="fft"
 spectrumsynth_filter_deps="avcodec"
diff --git a/libavfilter/Makefile b/libavfilter/Makefile
index 34333aa..aa94a6d 100644
--- a/libavfilter/Makefile
+++ b/libavfilter/Makefile
@@ -171,7 +171,7 @@  OBJS-$(CONFIG_COLORMATRIX_FILTER)            += vf_colormatrix.o
 OBJS-$(CONFIG_COLORSPACE_FILTER)             += vf_colorspace.o colorspace.o colorspacedsp.o
 OBJS-$(CONFIG_CONVOLUTION_FILTER)            += vf_convolution.o
 OBJS-$(CONFIG_CONVOLUTION_OPENCL_FILTER)     += vf_convolution_opencl.o opencl.o \
-	                                        opencl/convolution.o
+                                                opencl/convolution.o
 OBJS-$(CONFIG_CONVOLVE_FILTER)               += vf_convolve.o framesync.o
 OBJS-$(CONFIG_COPY_FILTER)                   += vf_copy.o
 OBJS-$(CONFIG_COREIMAGE_FILTER)              += vf_coreimage.o
@@ -294,6 +294,8 @@  OBJS-$(CONFIG_PP_FILTER)                     += vf_pp.o
 OBJS-$(CONFIG_PP7_FILTER)                    += vf_pp7.o
 OBJS-$(CONFIG_PREMULTIPLY_FILTER)            += vf_premultiply.o framesync.o
 OBJS-$(CONFIG_PREWITT_FILTER)                += vf_convolution.o
+OBJS-$(CONFIG_PREWITT_OPENCL_FILTER)         += vf_convolution_opencl.o opencl.o \
+                                                opencl/convolution.o
 OBJS-$(CONFIG_PROCAMP_VAAPI_FILTER)          += vf_procamp_vaapi.o vaapi_vpp.o
 OBJS-$(CONFIG_PROGRAM_OPENCL_FILTER)         += vf_program_opencl.o opencl.o framesync.o
 OBJS-$(CONFIG_PSEUDOCOLOR_FILTER)            += vf_pseudocolor.o
@@ -310,6 +312,8 @@  OBJS-$(CONFIG_REMOVELOGO_FILTER)             += bbox.o lswsutils.o lavfutils.o v
 OBJS-$(CONFIG_REPEATFIELDS_FILTER)           += vf_repeatfields.o
 OBJS-$(CONFIG_REVERSE_FILTER)                += f_reverse.o
 OBJS-$(CONFIG_ROBERTS_FILTER)                += vf_convolution.o
+OBJS-$(CONFIG_ROBERTS_OPENCL_FILTER)         += vf_convolution_opencl.o opencl.o \
+                                                opencl/convolution.o
 OBJS-$(CONFIG_ROTATE_FILTER)                 += vf_rotate.o
 OBJS-$(CONFIG_SAB_FILTER)                    += vf_sab.o
 OBJS-$(CONFIG_SCALE_FILTER)                  += vf_scale.o scale.o
@@ -338,6 +342,8 @@  OBJS-$(CONFIG_SIGNALSTATS_FILTER)            += vf_signalstats.o
 OBJS-$(CONFIG_SIGNATURE_FILTER)              += vf_signature.o
 OBJS-$(CONFIG_SMARTBLUR_FILTER)              += vf_smartblur.o
 OBJS-$(CONFIG_SOBEL_FILTER)                  += vf_convolution.o
+OBJS-$(CONFIG_SOBEL_OPENCL_FILTER)           += vf_convolution_opencl.o opencl.o \
+                                                opencl/convolution.o
 OBJS-$(CONFIG_SPLIT_FILTER)                  += split.o
 OBJS-$(CONFIG_SPP_FILTER)                    += vf_spp.o
 OBJS-$(CONFIG_SRCNN_FILTER)                  += vf_srcnn.o
diff --git a/libavfilter/allfilters.c b/libavfilter/allfilters.c
index e07fe67..f8bf177 100644
--- a/libavfilter/allfilters.c
+++ b/libavfilter/allfilters.c
@@ -282,6 +282,7 @@  extern AVFilter ff_vf_pp;
 extern AVFilter ff_vf_pp7;
 extern AVFilter ff_vf_premultiply;
 extern AVFilter ff_vf_prewitt;
+extern AVFilter ff_vf_prewitt_opencl;
 extern AVFilter ff_vf_procamp_vaapi;
 extern AVFilter ff_vf_program_opencl;
 extern AVFilter ff_vf_pseudocolor;
@@ -298,6 +299,7 @@  extern AVFilter ff_vf_removelogo;
 extern AVFilter ff_vf_repeatfields;
 extern AVFilter ff_vf_reverse;
 extern AVFilter ff_vf_roberts;
+extern AVFilter ff_vf_roberts_opencl;
 extern AVFilter ff_vf_rotate;
 extern AVFilter ff_vf_sab;
 extern AVFilter ff_vf_scale;
@@ -326,6 +328,7 @@  extern AVFilter ff_vf_signalstats;
 extern AVFilter ff_vf_signature;
 extern AVFilter ff_vf_smartblur;
 extern AVFilter ff_vf_sobel;
+extern AVFilter ff_vf_sobel_opencl;
 extern AVFilter ff_vf_split;
 extern AVFilter ff_vf_spp;
 extern AVFilter ff_vf_srcnn;
diff --git a/libavfilter/opencl/convolution.cl b/libavfilter/opencl/convolution.cl
index 03ef4ef..a2ddeba 100644
--- a/libavfilter/opencl/convolution.cl
+++ b/libavfilter/opencl/convolution.cl
@@ -43,3 +43,85 @@  __kernel void convolution_global(__write_only image2d_t dst,
      float4 dstPix = convPix * div + bias;
      write_imagef(dst, loc, dstPix);
 }
+
+
+__kernel void sobel_global(__write_only image2d_t dst,
+                           __read_only  image2d_t src,
+                             float div,
+                             float bias)
+{
+    const sampler_t sampler = (CLK_NORMALIZED_COORDS_FALSE |
+                               CLK_ADDRESS_CLAMP_TO_EDGE   |
+                               CLK_FILTER_NEAREST);
+
+    int2 loc = (int2)(get_global_id(0), get_global_id(1));
+
+    float4 sum1 = read_imagef(src, sampler, loc + (int2)(-1,-1)) * -1 +
+                  read_imagef(src, sampler, loc + (int2)( 0,-1)) * -2 +
+                  read_imagef(src, sampler, loc + (int2)( 1,-1)) * -1 +
+                  read_imagef(src, sampler, loc + (int2)(-1, 1)) *  1 +
+                  read_imagef(src, sampler, loc + (int2)( 0, 1)) *  2 +
+                  read_imagef(src, sampler, loc + (int2)( 1, 1)) *  1;
+
+    float4 sum2 = read_imagef(src, sampler, loc + (int2)(-1,-1)) * -1 +
+                  read_imagef(src, sampler, loc + (int2)(-1, 0)) * -2 +
+                  read_imagef(src, sampler, loc + (int2)(-1, 1)) * -1 +
+                  read_imagef(src, sampler, loc + (int2)( 1,-1)) *  1 +
+                  read_imagef(src, sampler, loc + (int2)( 1, 0)) *  2 +
+                  read_imagef(src, sampler, loc + (int2)( 1, 1)) *  1;
+
+    float4 dstPix = (sqrt(sum1*sum1 + sum2*sum2)) * div + bias;
+    write_imagef(dst, loc, dstPix);
+}
+
+__kernel void prewitt_global(__write_only image2d_t dst,
+                             __read_only  image2d_t src,
+                             float div,
+                             float bias)
+{
+    const sampler_t sampler = (CLK_NORMALIZED_COORDS_FALSE |
+                               CLK_ADDRESS_CLAMP_TO_EDGE   |
+                               CLK_FILTER_NEAREST);
+
+    int2 loc = (int2)(get_global_id(0), get_global_id(1));
+
+    float4 sum1 = read_imagef(src, sampler, loc + (int2)(-1,-1)) *  1 +
+                  read_imagef(src, sampler, loc + (int2)( 0,-1)) *  1 +
+                  read_imagef(src, sampler, loc + (int2)( 1,-1)) *  1 +
+                  read_imagef(src, sampler, loc + (int2)(-1, 1)) * -1 +
+                  read_imagef(src, sampler, loc + (int2)( 0, 1)) * -1 +
+                  read_imagef(src, sampler, loc + (int2)( 1, 1)) * -1;
+
+    float4 sum2 = read_imagef(src, sampler, loc + (int2)(-1,-1)) *  1 +
+                  read_imagef(src, sampler, loc + (int2)(-1, 0)) *  1 +
+                  read_imagef(src, sampler, loc + (int2)(-1, 1)) *  1 +
+                  read_imagef(src, sampler, loc + (int2)( 1,-1)) * -1 +
+                  read_imagef(src, sampler, loc + (int2)( 1, 0)) * -1 +
+                  read_imagef(src, sampler, loc + (int2)( 1, 1)) * -1;
+
+    float4 dstPix = (sqrt(sum1*sum1 + sum2*sum2)) * div + bias;
+    write_imagef(dst, loc, dstPix);
+}
+
+__kernel void roberts_global(__write_only image2d_t dst,
+                             __read_only  image2d_t src,
+                             float div,
+                             float bias)
+{
+    const sampler_t sampler = (CLK_NORMALIZED_COORDS_FALSE |
+                               CLK_ADDRESS_CLAMP_TO_EDGE   |
+                               CLK_FILTER_NEAREST);
+
+    int2 loc = (int2)(get_global_id(0), get_global_id(1));
+
+    float4 sum1 = read_imagef(src, sampler, loc + (int2)(-1,-1)) *  1 +
+                  read_imagef(src, sampler, loc + (int2)( 0,-1)) * -1;
+
+
+    float4 sum2 = read_imagef(src, sampler, loc + (int2)(-1, 0)) * -1 +
+                  read_imagef(src, sampler, loc + (int2)( 0, 0)) *  1;
+
+
+    float4 dstPix = (sqrt(sum1*sum1 + sum2*sum2)) * div + bias;
+    write_imagef(dst, loc, dstPix);
+}
diff --git a/libavfilter/vf_convolution_opencl.c b/libavfilter/vf_convolution_opencl.c
index 4d0ecf8..8d12191 100644
--- a/libavfilter/vf_convolution_opencl.c
+++ b/libavfilter/vf_convolution_opencl.c
@@ -36,7 +36,7 @@  typedef struct ConvolutionOpenCLContext {
     OpenCLFilterContext ocf;
 
     int              initialised;
-    cl_kernel        kernel;
+    cl_kernel        kernel, kernel_sobel, kernel_prewitt, kernel_roberts;
     cl_command_queue command_queue;
 
     char *matrix_str[4];
@@ -47,8 +47,11 @@  typedef struct ConvolutionOpenCLContext {
     cl_float rdivs[4];
     cl_float biases[4];
 
-} ConvolutionOpenCLContext;
+    cl_int planes;
+    cl_float scale;
+    cl_float delta;
 
+} ConvolutionOpenCLContext;
 
 static int convolution_opencl_init(AVFilterContext *avctx)
 {
@@ -76,6 +79,24 @@  static int convolution_opencl_init(AVFilterContext *avctx)
         err = AVERROR(EIO);
         goto fail;
     }
+    ctx->kernel_sobel = clCreateKernel(ctx->ocf.program, "sobel_global", &cle);
+    if (!ctx->kernel_sobel) {
+        av_log(avctx, AV_LOG_ERROR, "Failed to create kernel: %d.\n", cle);
+        err = AVERROR(EIO);
+        goto fail;
+    }
+    ctx->kernel_prewitt = clCreateKernel(ctx->ocf.program, "prewitt_global", &cle);
+    if (!ctx->kernel_prewitt) {
+        av_log(avctx, AV_LOG_ERROR, "Failed to create kernel: %d.\n", cle);
+        err = AVERROR(EIO);
+        goto fail;
+    }
+    ctx->kernel_roberts = clCreateKernel(ctx->ocf.program, "roberts_global", &cle);
+    if (!ctx->kernel_roberts) {
+        av_log(avctx, AV_LOG_ERROR, "Failed to create kernel: %d.\n", cle);
+        err = AVERROR(EIO);
+        goto fail;
+    }
 
     ctx->initialised = 1;
     return 0;
@@ -85,6 +106,12 @@  fail:
         clReleaseCommandQueue(ctx->command_queue);
     if (ctx->kernel)
         clReleaseKernel(ctx->kernel);
+    if (ctx->kernel_sobel)
+        clReleaseKernel(ctx->kernel_sobel);
+    if (ctx->kernel_prewitt)
+        clReleaseKernel(ctx->kernel_prewitt);
+    if (ctx->kernel_roberts)
+        clReleaseKernel(ctx->kernel_roberts);
     return err;
 }
 
@@ -163,6 +190,16 @@  static int convolution_opencl_make_filter_params(AVFilterContext *avctx)
     return 0;
 }
 
+static int filters_opencl_make_filter_params(AVFilterContext *avctx)
+{
+    ConvolutionOpenCLContext *ctx = avctx->priv;
+
+    ctx->delta /= 255.0;
+
+    return 0;
+}
+
+
 static int convolution_opencl_filter_frame(AVFilterLink *inlink, AVFrame *input)
 {
     AVFilterContext *avctx = inlink->dst;
@@ -170,9 +207,12 @@  static int convolution_opencl_filter_frame(AVFilterLink *inlink, AVFrame *input)
     ConvolutionOpenCLContext *ctx = avctx->priv;
     AVFrame *output = NULL;
     cl_int cle;
-    size_t global_work[2];
+    size_t global_work[2], width, height;
     cl_mem src, dst;
+    cl_kernel cur_kernel;
     int err, p;
+    size_t origin[3] = {0, 0, 0};
+    size_t region[3] = {0, 0, 1};
 
     av_log(ctx, AV_LOG_DEBUG, "Filter input: %s, %ux%u (%"PRId64").\n",
            av_get_pix_fmt_name(input->format),
@@ -186,9 +226,16 @@  static int convolution_opencl_filter_frame(AVFilterLink *inlink, AVFrame *input)
         if (err < 0)
             goto fail;
 
-        err = convolution_opencl_make_filter_params(avctx);
-        if (err < 0)
-            goto fail;
+        if (!strcmp(avctx->filter->name, "convolution_opencl")) {
+            err = convolution_opencl_make_filter_params(avctx);
+            if (err < 0)
+                goto fail;
+        } else {
+            err = filters_opencl_make_filter_params(avctx);
+            if (err < 0)
+                goto fail;
+        }
+
     }
 
     output = ff_get_video_buffer(outlink, outlink->w, outlink->h);
@@ -198,35 +245,97 @@  static int convolution_opencl_filter_frame(AVFilterLink *inlink, AVFrame *input)
     }
 
     for (p = 0; p < FF_ARRAY_ELEMS(output->data); p++) {
-        src = (cl_mem) input->data[p];
-        dst = (cl_mem)output->data[p];
+        src  = (cl_mem) input->data[p];
+        dst  = (cl_mem) output->data[p];
 
         if (!dst)
             break;
 
-        CL_SET_KERNEL_ARG(ctx->kernel, 0, cl_mem,   &dst);
-        CL_SET_KERNEL_ARG(ctx->kernel, 1, cl_mem,   &src);
-        CL_SET_KERNEL_ARG(ctx->kernel, 2, cl_int,   &ctx->dims[p]);
-        CL_SET_KERNEL_ARG(ctx->kernel, 3, cl_mem,   &ctx->matrix[p]);
-        CL_SET_KERNEL_ARG(ctx->kernel, 4, cl_float, &ctx->rdivs[p]);
-        CL_SET_KERNEL_ARG(ctx->kernel, 5, cl_float, &ctx->biases[p]);
-
-        err = ff_opencl_filter_work_size_from_image(avctx, global_work, output, p, 0);
-        if (err < 0)
-            goto fail;
-
-        av_log(avctx, AV_LOG_DEBUG, "Run kernel on plane %d "
-               "(%"SIZE_SPECIFIER"x%"SIZE_SPECIFIER").\n",
-               p, global_work[0], global_work[1]);
-
-        cle = clEnqueueNDRangeKernel(ctx->command_queue, ctx->kernel, 2, NULL,
-                                     global_work, NULL,
-                                     0, NULL, NULL);
-        if (cle != CL_SUCCESS) {
-            av_log(avctx, AV_LOG_ERROR, "Failed to enqueue kernel: %d.\n",
-                   cle);
-            err = AVERROR(EIO);
-            goto fail;
+        if (!strcmp(avctx->filter->name, "convolution_opencl")) {
+            CL_SET_KERNEL_ARG(ctx->kernel, 0, cl_mem,   &dst);
+            CL_SET_KERNEL_ARG(ctx->kernel, 1, cl_mem,   &src);
+            CL_SET_KERNEL_ARG(ctx->kernel, 2, cl_int,   &ctx->dims[p]);
+            CL_SET_KERNEL_ARG(ctx->kernel, 3, cl_mem,   &ctx->matrix[p]);
+            CL_SET_KERNEL_ARG(ctx->kernel, 4, cl_float, &ctx->rdivs[p]);
+            CL_SET_KERNEL_ARG(ctx->kernel, 5, cl_float, &ctx->biases[p]);
+
+            err = ff_opencl_filter_work_size_from_image(avctx, global_work, output, p, 0);
+            if (err < 0)
+                goto fail;
+
+            av_log(avctx, AV_LOG_DEBUG, "Run kernel on plane %d "
+                   "(%"SIZE_SPECIFIER"x%"SIZE_SPECIFIER").\n",
+                   p, global_work[0], global_work[1]);
+
+            cle = clEnqueueNDRangeKernel(ctx->command_queue, ctx->kernel, 2, NULL,
+                                         global_work, NULL,
+                                         0, NULL, NULL);
+            if (cle != CL_SUCCESS) {
+                av_log(avctx, AV_LOG_ERROR, "Failed to enqueue kernel: %d.\n",
+                       cle);
+                err = AVERROR(EIO);
+                goto fail;
+            }
+        } else {
+            if (!(ctx->planes & (1 << p))) {
+                cle = clGetImageInfo(src, CL_IMAGE_WIDTH,  sizeof(size_t),
+                                     &width, NULL);
+                if (cle != CL_SUCCESS) {
+                    av_log(avctx, AV_LOG_ERROR, "Failed to query plane %d width: %d.\n",
+                           p, cle);
+                    err = AVERROR_UNKNOWN;
+                    goto fail;
+                }
+
+                cle = clGetImageInfo(src, CL_IMAGE_HEIGHT, sizeof(size_t),
+                                     &height, NULL);
+                if (cle != CL_SUCCESS) {
+                    av_log(avctx, AV_LOG_ERROR, "Failed to query plane %d height: %d.\n",
+                           p, cle);
+                    err = AVERROR_UNKNOWN;
+                    goto fail;
+                }
+                region[0] = width;
+                region[1] = height;
+
+                cle = clEnqueueCopyImage(ctx->command_queue, src, dst, origin, origin, region, 0, NULL, NULL);
+                if (cle != CL_SUCCESS) {
+                    av_log(avctx, AV_LOG_ERROR, "Failed to copy plane %d: %d.\n",
+                           p, cle);
+                    err = AVERROR(EIO);
+                    goto fail;
+                }
+            } else {
+                if (!strcmp(avctx->filter->name, "sobel_opencl")) {
+                    cur_kernel = ctx->kernel_sobel;
+                } else if (!strcmp(avctx->filter->name, "prewitt_opencl")){
+                    cur_kernel = ctx->kernel_prewitt;
+                } else if (!strcmp(avctx->filter->name, "roberts_opencl")){
+                    cur_kernel = ctx->kernel_roberts;
+                }
+                CL_SET_KERNEL_ARG(cur_kernel, 0, cl_mem,   &dst);
+                CL_SET_KERNEL_ARG(cur_kernel, 1, cl_mem,   &src);
+                CL_SET_KERNEL_ARG(cur_kernel, 2, cl_float, &ctx->scale);
+                CL_SET_KERNEL_ARG(cur_kernel, 3, cl_float, &ctx->delta);
+
+                err = ff_opencl_filter_work_size_from_image(avctx, global_work, output, p, 0);
+                if (err < 0)
+                    goto fail;
+
+                av_log(avctx, AV_LOG_DEBUG, "Run kernel on plane %d "
+                       "(%"SIZE_SPECIFIER"x%"SIZE_SPECIFIER").\n",
+                       p, global_work[0], global_work[1]);
+
+                cle = clEnqueueNDRangeKernel(ctx->command_queue, cur_kernel, 2, NULL,
+                                                         global_work, NULL,
+                                                         0, NULL, NULL);
+                if (cle != CL_SUCCESS) {
+                    av_log(avctx, AV_LOG_ERROR, "Failed to enqueue kernel: %d.\n",
+                           cle);
+                    err = AVERROR(EIO);
+                    goto fail;
+                }
+            }
         }
     }
 
@@ -273,6 +382,24 @@  static av_cold void convolution_opencl_uninit(AVFilterContext *avctx)
             av_log(avctx, AV_LOG_ERROR, "Failed to release "
                    "kernel: %d.\n", cle);
     }
+    if (ctx->kernel_sobel) {
+        cle = clReleaseKernel(ctx->kernel_sobel);
+        if (cle != CL_SUCCESS)
+            av_log(avctx, AV_LOG_ERROR, "Failed to release "
+                   "kernel: %d.\n", cle);
+    }
+    if (ctx->kernel_prewitt) {
+        cle = clReleaseKernel(ctx->kernel_prewitt);
+        if (cle != CL_SUCCESS)
+            av_log(avctx, AV_LOG_ERROR, "Failed to release "
+                   "kernel: %d.\n", cle);
+    }
+    if (ctx->kernel_roberts) {
+        cle = clReleaseKernel(ctx->kernel_roberts);
+        if (cle != CL_SUCCESS)
+            av_log(avctx, AV_LOG_ERROR, "Failed to release "
+                   "kernel: %d.\n", cle);
+    }
 
     if (ctx->command_queue) {
         cle = clReleaseCommandQueue(ctx->command_queue);
@@ -284,8 +411,30 @@  static av_cold void convolution_opencl_uninit(AVFilterContext *avctx)
     ff_opencl_filter_uninit(avctx);
 }
 
+static const AVFilterPad convolution_opencl_inputs[] = {
+    {
+        .name         = "default",
+        .type         = AVMEDIA_TYPE_VIDEO,
+        .filter_frame = &convolution_opencl_filter_frame,
+        .config_props = &ff_opencl_filter_config_input,
+    },
+    { NULL }
+};
+
+static const AVFilterPad convolution_opencl_outputs[] = {
+    {
+        .name         = "default",
+        .type         = AVMEDIA_TYPE_VIDEO,
+        .config_props = &ff_opencl_filter_config_output,
+    },
+    { NULL }
+};
+
 #define OFFSET(x) offsetof(ConvolutionOpenCLContext, x)
 #define FLAGS (AV_OPT_FLAG_FILTERING_PARAM | AV_OPT_FLAG_VIDEO_PARAM)
+
+#if CONFIG_CONVOLUTION_OPENCL_FILTER
+
 static const AVOption convolution_opencl_options[] = {
     { "0m", "set matrix for 2nd plane", OFFSET(matrix_str[0]), AV_OPT_TYPE_STRING, {.str="0 0 0 0 1 0 0 0 0"}, 0, 0, FLAGS },
     { "1m", "set matrix for 2nd plane", OFFSET(matrix_str[1]), AV_OPT_TYPE_STRING, {.str="0 0 0 0 1 0 0 0 0"}, 0, 0, FLAGS },
@@ -304,30 +453,63 @@  static const AVOption convolution_opencl_options[] = {
 
 AVFILTER_DEFINE_CLASS(convolution_opencl);
 
-static const AVFilterPad convolution_opencl_inputs[] = {
-    {
-        .name         = "default",
-        .type         = AVMEDIA_TYPE_VIDEO,
-        .filter_frame = &convolution_opencl_filter_frame,
-        .config_props = &ff_opencl_filter_config_input,
-    },
+AVFilter ff_vf_convolution_opencl = {
+    .name           = "convolution_opencl",
+    .description    = NULL_IF_CONFIG_SMALL("Apply convolution mask to input video"),
+    .priv_size      = sizeof(ConvolutionOpenCLContext),
+    .priv_class     = &convolution_opencl_class,
+    .init           = &ff_opencl_filter_init,
+    .uninit         = &convolution_opencl_uninit,
+    .query_formats  = &ff_opencl_filter_query_formats,
+    .inputs         = convolution_opencl_inputs,
+    .outputs        = convolution_opencl_outputs,
+    .flags_internal = FF_FILTER_FLAG_HWFRAME_AWARE,
+};
+
+#endif /* CONFIG_CONVOLUTION_OPENCL_FILTER */
+
+#if CONFIG_SOBEL_OPENCL_FILTER
+
+static const AVOption sobel_opencl_options[] = {
+    { "planes", "set planes to filter", OFFSET(planes), AV_OPT_TYPE_INT,  {.i64=15}, 0, 15, FLAGS},
+    { "scale",  "set scale",            OFFSET(scale), AV_OPT_TYPE_FLOAT, {.dbl=1.0}, 0.0,  65535, FLAGS},
+    { "delta",  "set delta",            OFFSET(delta), AV_OPT_TYPE_FLOAT, {.dbl=0}, -65535, 65535, FLAGS},
     { NULL }
 };
 
-static const AVFilterPad convolution_opencl_outputs[] = {
-    {
-        .name         = "default",
-        .type         = AVMEDIA_TYPE_VIDEO,
-        .config_props = &ff_opencl_filter_config_output,
-    },
+AVFILTER_DEFINE_CLASS(sobel_opencl);
+
+AVFilter ff_vf_sobel_opencl = {
+    .name           = "sobel_opencl",
+    .description    = NULL_IF_CONFIG_SMALL("Apply sobel operator"),
+    .priv_size      = sizeof(ConvolutionOpenCLContext),
+    .priv_class     = &sobel_opencl_class,
+    .init           = &ff_opencl_filter_init,
+    .uninit         = &convolution_opencl_uninit,
+    .query_formats  = &ff_opencl_filter_query_formats,
+    .inputs         = convolution_opencl_inputs,
+    .outputs        = convolution_opencl_outputs,
+    .flags_internal = FF_FILTER_FLAG_HWFRAME_AWARE,
+};
+
+#endif /* CONFIG_SOBEL_OPENCL_FILTER */
+
+#if CONFIG_PREWITT_OPENCL_FILTER
+
+static const AVOption prewitt_opencl_options[] = {
+    { "planes", "set planes to filter", OFFSET(planes), AV_OPT_TYPE_INT,  {.i64=15}, 0, 15, FLAGS},
+    { "scale",  "set scale",            OFFSET(scale), AV_OPT_TYPE_FLOAT, {.dbl=1.0}, 0.0,  65535, FLAGS},
+    { "delta",  "set delta",            OFFSET(delta), AV_OPT_TYPE_FLOAT, {.dbl=0}, -65535, 65535, FLAGS},
     { NULL }
 };
 
-AVFilter ff_vf_convolution_opencl = {
-    .name           = "convolution_opencl",
-    .description    = NULL_IF_CONFIG_SMALL("Apply convolution mask to input video"),
+AVFILTER_DEFINE_CLASS(prewitt_opencl);
+
+AVFilter ff_vf_prewitt_opencl = {
+    .name           = "prewitt_opencl",
+    .description    = NULL_IF_CONFIG_SMALL("Apply prewitt operator"),
     .priv_size      = sizeof(ConvolutionOpenCLContext),
-    .priv_class     = &convolution_opencl_class,
+    .priv_class     = &prewitt_opencl_class,
     .init           = &ff_opencl_filter_init,
     .uninit         = &convolution_opencl_uninit,
     .query_formats  = &ff_opencl_filter_query_formats,
@@ -335,3 +517,31 @@  AVFilter ff_vf_convolution_opencl = {
     .outputs        = convolution_opencl_outputs,
     .flags_internal = FF_FILTER_FLAG_HWFRAME_AWARE,
 };
+
+#endif /* CONFIG_PREWITT_OPENCL_FILTER */
+
+#if CONFIG_ROBERTS_OPENCL_FILTER
+
+static const AVOption roberts_opencl_options[] = {
+    { "planes", "set planes to filter", OFFSET(planes), AV_OPT_TYPE_INT,  {.i64=15}, 0, 15, FLAGS},
+    { "scale",  "set scale",            OFFSET(scale), AV_OPT_TYPE_FLOAT, {.dbl=1.0}, 0.0,  65535, FLAGS},
+    { "delta",  "set delta",            OFFSET(delta), AV_OPT_TYPE_FLOAT, {.dbl=0}, -65535, 65535, FLAGS},
+    { NULL }
+};
+
+AVFILTER_DEFINE_CLASS(roberts_opencl);
+
+AVFilter ff_vf_roberts_opencl = {
+    .name           = "roberts_opencl",
+    .description    = NULL_IF_CONFIG_SMALL("Apply roberts operator"),
+    .priv_size      = sizeof(ConvolutionOpenCLContext),
+    .priv_class     = &roberts_opencl_class,
+    .init           = &ff_opencl_filter_init,
+    .uninit         = &convolution_opencl_uninit,
+    .query_formats  = &ff_opencl_filter_query_formats,
+    .inputs         = convolution_opencl_inputs,
+    .outputs        = convolution_opencl_outputs,
+    .flags_internal = FF_FILTER_FLAG_HWFRAME_AWARE,
+};
+
+#endif /* CONFIG_ROBERTS_OPENCL_FILTER */