diff mbox

[FFmpeg-devel] lavfi: Add OpenCL avgblur filter

Message ID 20180319023023.19036-1-dylanf123@gmail.com
State New
Headers show

Commit Message

Dylan Fernando March 19, 2018, 2:30 a.m. UTC
From: drfer3 <drfer3@student.monash.edu>

Behaves like the existing avgblur filter, except working on OpenCL
hardware frames. Takes exactly the same options.
---
 configure                       |   1 +
 libavfilter/Makefile            |   2 +
 libavfilter/allfilters.c        |   1 +
 libavfilter/opencl/avgblur.cl   |  60 ++++++++
 libavfilter/opencl_source.h     |   1 +
 libavfilter/vf_avgblur_opencl.c | 328 ++++++++++++++++++++++++++++++++++++++++
 6 files changed, 393 insertions(+)
 create mode 100644 libavfilter/opencl/avgblur.cl
 create mode 100644 libavfilter/vf_avgblur_opencl.c

Comments

Mark Thompson March 19, 2018, 11:34 p.m. UTC | #1
On 19/03/18 02:30, dylanf123@gmail.com wrote:
> From: drfer3 <drfer3@student.monash.edu>
> 
> Behaves like the existing avgblur filter, except working on OpenCL
> hardware frames. Takes exactly the same options.
> ---
>  configure                       |   1 +
>  libavfilter/Makefile            |   2 +
>  libavfilter/allfilters.c        |   1 +
>  libavfilter/opencl/avgblur.cl   |  60 ++++++++
>  libavfilter/opencl_source.h     |   1 +
>  libavfilter/vf_avgblur_opencl.c | 328 ++++++++++++++++++++++++++++++++++++++++
>  6 files changed, 393 insertions(+)
>  create mode 100644 libavfilter/opencl/avgblur.cl
>  create mode 100644 libavfilter/vf_avgblur_opencl.c
> 
> diff --git a/configure b/configure
> index 0c5ed07a07..481d338caf 100755
> --- a/configure
> +++ b/configure
> @@ -3202,6 +3202,7 @@ aresample_filter_deps="swresample"
>  ass_filter_deps="libass"
>  atempo_filter_deps="avcodec"
>  atempo_filter_select="rdft"
> +avgblur_opencl_filter_deps="opencl"
>  azmq_filter_deps="libzmq"
>  blackframe_filter_deps="gpl"
>  boxblur_filter_deps="gpl"
> diff --git a/libavfilter/Makefile b/libavfilter/Makefile
> index fc16512e2c..1043b41d80 100644
> --- a/libavfilter/Makefile
> +++ b/libavfilter/Makefile
> @@ -139,6 +139,8 @@ OBJS-$(CONFIG_ALPHAMERGE_FILTER)             += vf_alphamerge.o
>  OBJS-$(CONFIG_ASS_FILTER)                    += vf_subtitles.o
>  OBJS-$(CONFIG_ATADENOISE_FILTER)             += vf_atadenoise.o
>  OBJS-$(CONFIG_AVGBLUR_FILTER)                += vf_avgblur.o
> +OBJS-$(CONFIG_AVGBLUR_OPENCL_FILTER)         += vf_avgblur_opencl.o opencl.o \
> +                                                opencl/avgblur.o
>  OBJS-$(CONFIG_BBOX_FILTER)                   += bbox.o vf_bbox.o
>  OBJS-$(CONFIG_BENCH_FILTER)                  += f_bench.o
>  OBJS-$(CONFIG_BITPLANENOISE_FILTER)          += vf_bitplanenoise.o
> diff --git a/libavfilter/allfilters.c b/libavfilter/allfilters.c
> index cc423af738..3f67e321bf 100644
> --- a/libavfilter/allfilters.c
> +++ b/libavfilter/allfilters.c
> @@ -149,6 +149,7 @@ static void register_all(void)
>      REGISTER_FILTER(ASS,            ass,            vf);
>      REGISTER_FILTER(ATADENOISE,     atadenoise,     vf);
>      REGISTER_FILTER(AVGBLUR,        avgblur,        vf);
> +    REGISTER_FILTER(AVGBLUR_OPENCL, avgblur_opencl, vf);
>      REGISTER_FILTER(BBOX,           bbox,           vf);
>      REGISTER_FILTER(BENCH,          bench,          vf);
>      REGISTER_FILTER(BITPLANENOISE,  bitplanenoise,  vf);
> diff --git a/libavfilter/opencl/avgblur.cl b/libavfilter/opencl/avgblur.cl
> new file mode 100644
> index 0000000000..6a8d70df93
> --- /dev/null
> +++ b/libavfilter/opencl/avgblur.cl
> @@ -0,0 +1,60 @@
> +/*
> + * Copyright (c) 2018 Dylan Fernando
> + *
> + * This file is part of FFmpeg.
> + *
> + * FFmpeg is free software; you can redistribute it and/or
> + * modify it under the terms of the GNU Lesser General Public
> + * License as published by the Free Software Foundation; either
> + * version 2.1 of the License, or (at your option) any later version.
> + *
> + * FFmpeg is distributed in the hope that it will be useful,
> + * but WITHOUT ANY WARRANTY; without even the implied warranty of
> + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
> + * Lesser General Public License for more details.
> + *
> + * You should have received a copy of the GNU Lesser General Public
> + * License along with FFmpeg; if not, write to the Free Software
> + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
> + */
> +
> +
> +__kernel void avgblur_horiz(__write_only image2d_t dst,
> +                            __read_only  image2d_t src,
> +                            int rad)
> +{
> +    const sampler_t sampler = (CLK_NORMALIZED_COORDS_FALSE |
> +                               CLK_FILTER_NEAREST);
> +    int2 loc = (int2)(get_global_id(0), get_global_id(1));
> +    int2 size = (int2)(get_global_size(0), get_global_size(1));
> +
> +    int count = 0;
> +    float4 acc = (float4)(0,0,0,0);
> +
> +    for (int xx = max(0, loc.x - rad); xx < min(loc.x + rad + 1, size.x); xx++) {
> +        count++;
> +        acc += read_imagef(src, sampler, (int2)(xx, loc.y));
> +    }
> +
> +    write_imagef(dst, loc, acc / count);
> +}
> +
> +__kernel void avgblur_vert(__write_only image2d_t dst,
> +                           __read_only  image2d_t src,
> +                           int radv)
> +{
> +    const sampler_t sampler = (CLK_NORMALIZED_COORDS_FALSE |
> +                               CLK_FILTER_NEAREST);
> +    int2 loc = (int2)(get_global_id(0), get_global_id(1));
> +    int2 size = (int2)(get_global_size(0), get_global_size(1));
> +
> +    int count = 0;
> +    float4 acc = (float4)(0,0,0,0);
> +
> +    for (int yy = max(0, loc.y - radv); yy < min(loc.y + radv + 1, size.y); yy++) {
> +        count++;
> +        acc += read_imagef(src, sampler, (int2)(loc.x, yy));
> +    }
> +
> +    write_imagef(dst, loc, acc / count);
> +}
> diff --git a/libavfilter/opencl_source.h b/libavfilter/opencl_source.h
> index 23cdfc6ac9..02bc1723b0 100644
> --- a/libavfilter/opencl_source.h
> +++ b/libavfilter/opencl_source.h
> @@ -19,6 +19,7 @@
>  #ifndef AVFILTER_OPENCL_SOURCE_H
>  #define AVFILTER_OPENCL_SOURCE_H
>  
> +extern const char *ff_opencl_source_avgblur;
>  extern const char *ff_opencl_source_overlay;
>  extern const char *ff_opencl_source_unsharp;
>  
> diff --git a/libavfilter/vf_avgblur_opencl.c b/libavfilter/vf_avgblur_opencl.c
> new file mode 100644
> index 0000000000..a6baa28ac1
> --- /dev/null
> +++ b/libavfilter/vf_avgblur_opencl.c
> @@ -0,0 +1,328 @@
> +/*
> + * Copyright (c) 2018 Dylan Fernando
> + *
> + * This file is part of FFmpeg.
> + *
> + * FFmpeg is free software; you can redistribute it and/or
> + * modify it under the terms of the GNU Lesser General Public
> + * License as published by the Free Software Foundation; either
> + * version 2.1 of the License, or (at your option) any later version.
> + *
> + * FFmpeg is distributed in the hope that it will be useful,
> + * but WITHOUT ANY WARRANTY; without even the implied warranty of
> + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
> + * Lesser General Public License for more details.
> + *
> + * You should have received a copy of the GNU Lesser General Public
> + * License along with FFmpeg; if not, write to the Free Software
> + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
> + */
> +
> +#include "libavutil/common.h"
> +#include "libavutil/imgutils.h"
> +#include "libavutil/mem.h"
> +#include "libavutil/opt.h"
> +#include "libavutil/pixdesc.h"
> +
> +#include "avfilter.h"
> +#include "internal.h"
> +#include "opencl.h"
> +#include "opencl_source.h"
> +#include "video.h"
> +
> +
> +typedef struct AverageBlurOpenCLContext {
> +    OpenCLFilterContext ocf;
> +
> +    int              initialised;
> +    cl_kernel        kernel_horiz;
> +    cl_kernel        kernel_vert;
> +    cl_command_queue command_queue;
> +
> +    int radius;
> +    int radiusV;
> +    int planes;
> +
> +} AverageBlurOpenCLContext;
> +
> +
> +static int avgblur_opencl_init(AVFilterContext *avctx)
> +{
> +    AverageBlurOpenCLContext *ctx = avctx->priv;
> +    cl_int cle;
> +    int err;
> +
> +    err = ff_opencl_filter_load_program(avctx, &ff_opencl_source_avgblur, 1);
> +    if (err < 0)
> +        goto fail;
> +
> +    ctx->command_queue = clCreateCommandQueue(ctx->ocf.hwctx->context,
> +                                              ctx->ocf.hwctx->device_id,
> +                                              0, &cle);
> +    if (!ctx->command_queue) {
> +        av_log(avctx, AV_LOG_ERROR, "Failed to create OpenCL "
> +               "command queue: %d.\n", cle);
> +        err = AVERROR(EIO);
> +        goto fail;
> +    }
> +
> +    ctx->kernel_horiz = clCreateKernel(ctx->ocf.program,"avgblur_horiz", &cle);
> +    if (!ctx->kernel_horiz) {
> +        av_log(avctx, AV_LOG_ERROR, "Failed to create kernel: %d.\n", cle);
> +        err = AVERROR(EIO);
> +        goto fail;
> +    }
> +
> +    ctx->kernel_vert = clCreateKernel(ctx->ocf.program,"avgblur_vert", &cle);
> +    if (!ctx->kernel_vert) {
> +        av_log(avctx, AV_LOG_ERROR, "Failed to create kernel: %d.\n", cle);
> +        err = AVERROR(EIO);
> +        goto fail;
> +    }
> +
> +    if (ctx->radiusV <= 0) {
> +        ctx->radiusV = ctx->radius;
> +    }
> +
> +    ctx->initialised = 1;
> +    return 0;
> +
> +fail:
> +    if (ctx->command_queue)
> +        clReleaseCommandQueue(ctx->command_queue);
> +    if (ctx->kernel_horiz)
> +        clReleaseKernel(ctx->kernel_horiz);
> +    if (ctx->kernel_vert)
> +        clReleaseKernel(ctx->kernel_vert);
> +    return err;
> +}
> +
> +static int avgblur_opencl_filter_frame(AVFilterLink *inlink, AVFrame *input)
> +{
> +    AVFilterContext    *avctx = inlink->dst;
> +    AVFilterLink     *outlink = avctx->outputs[0];
> +    AverageBlurOpenCLContext *ctx = avctx->priv;
> +    AVFrame *output = NULL;
> +    AVFrame *intermediate = NULL;
> +    cl_int cle;
> +    size_t global_work[2];
> +    cl_mem src, dst, inter;
> +    int err, p;
> +
> +    av_log(ctx, AV_LOG_DEBUG, "Filter input: %s, %ux%u (%"PRId64").\n",
> +           av_get_pix_fmt_name(input->format),
> +           input->width, input->height, input->pts);
> +
> +    if (!input->hw_frames_ctx)
> +        return AVERROR(EINVAL);
> +
> +    if (!ctx->initialised) {
> +        err = avgblur_opencl_init(avctx);
> +        if (err < 0)
> +            goto fail;
> +
> +    }
> +
> +    output = ff_get_video_buffer(outlink, outlink->w, outlink->h);
> +    if (!output) {
> +        err = AVERROR(ENOMEM);
> +        goto fail;
> +    }
> +
> +    intermediate = ff_get_video_buffer(outlink, outlink->w, outlink->h);
> +    if (!intermediate) {
> +        err = AVERROR(ENOMEM);
> +        goto fail;
> +    }
> +
> +    for (p = 0; p < FF_ARRAY_ELEMS(output->data); p++) {
> +        src = (cl_mem) input->data[p];
> +        dst = (cl_mem)output->data[p];
> +        inter = (cl_mem) intermediate->data[p];
> +
> +        if (!dst)
> +            break;
> +
> +        int radius_x = ctx->radius;
> +        int radius_y = ctx->radiusV;

We do still forbid mixed declarations and code, so these should be declared at the start of the block or function.  (You should get a compiler warning for that in at least some cases - I do from gcc 7.)

> +
> +        if (!(ctx->planes & (1 << p))) {
> +            radius_x = 0;
> +            radius_y = 0;
> +        }
> +
> +        cle = clSetKernelArg(ctx->kernel_horiz, 0, sizeof(cl_mem), &inter);
> +        if (cle != CL_SUCCESS) {
> +            av_log(avctx, AV_LOG_ERROR, "Failed to set kernel "
> +                   "destination image argument: %d.\n", cle);
> +            goto fail;
> +        }
> +        cle = clSetKernelArg(ctx->kernel_horiz, 1, sizeof(cl_mem), &src);
> +        if (cle != CL_SUCCESS) {
> +            av_log(avctx, AV_LOG_ERROR, "Failed to set kernel "
> +                   "source image argument: %d.\n", cle);
> +            goto fail;
> +        }
> +        cle = clSetKernelArg(ctx->kernel_horiz, 2, sizeof(cl_int), &radius_x);
> +        if (cle != CL_SUCCESS) {
> +            av_log(avctx, AV_LOG_ERROR, "Failed to set kernel "
> +                   "sizeX argument: %d.\n", cle);
> +            goto fail;
> +        }
> +
> +        global_work[0] = output->width;
> +        global_work[1] = output->height;
> +
> +        av_log(avctx, AV_LOG_DEBUG, "Run kernel on plane %d "
> +               "(%"SIZE_SPECIFIER"x%"SIZE_SPECIFIER").\n",
> +               p, global_work[0], global_work[1]);
> +
> +        cle = clEnqueueNDRangeKernel(ctx->command_queue, ctx->kernel_horiz, 2, NULL,
> +                                     global_work, NULL,
> +                                     0, NULL, NULL);
> +        if (cle != CL_SUCCESS) {
> +            av_log(avctx, AV_LOG_ERROR, "Failed to enqueue kernel: %d.\n",
> +                   cle);
> +            err = AVERROR(EIO);
> +            goto fail;
> +        }
> +
> +        cle = clSetKernelArg(ctx->kernel_vert, 0, sizeof(cl_mem), &dst);
> +        if (cle != CL_SUCCESS) {
> +            av_log(avctx, AV_LOG_ERROR, "Failed to set kernel "
> +                   "destination image argument: %d.\n", cle);
> +            goto fail;
> +        }
> +        cle = clSetKernelArg(ctx->kernel_vert, 1, sizeof(cl_mem), &inter);
> +        if (cle != CL_SUCCESS) {
> +            av_log(avctx, AV_LOG_ERROR, "Failed to set kernel "
> +                   "source image argument: %d.\n", cle);
> +            goto fail;
> +        }
> +        cle = clSetKernelArg(ctx->kernel_vert, 2, sizeof(cl_int), &radius_y);
> +        if (cle != CL_SUCCESS) {
> +            av_log(avctx, AV_LOG_ERROR, "Failed to set kernel "
> +                   "sizeY argument: %d.\n", cle);
> +            goto fail;
> +        }
> +
> +        global_work[0] = output->width;
> +        global_work[1] = output->height;
> +
> +        av_log(avctx, AV_LOG_DEBUG, "Run kernel on plane %d "
> +               "(%"SIZE_SPECIFIER"x%"SIZE_SPECIFIER").\n",
> +               p, global_work[0], global_work[1]);
> +
> +        cle = clEnqueueNDRangeKernel(ctx->command_queue, ctx->kernel_vert, 2, NULL,
> +                                     global_work, NULL,
> +                                     0, NULL, NULL);
> +        if (cle != CL_SUCCESS) {
> +            av_log(avctx, AV_LOG_ERROR, "Failed to enqueue kernel: %d.\n",
> +                   cle);
> +            err = AVERROR(EIO);
> +            goto fail;
> +        }
> +
> +    }
> +
> +    cle = clFinish(ctx->command_queue);
> +    if (cle != CL_SUCCESS) {
> +        av_log(avctx, AV_LOG_ERROR, "Failed to finish command queue: %d.\n",
> +               cle);
> +        err = AVERROR(EIO);
> +        goto fail;
> +    }
> +
> +    err = av_frame_copy_props(output, input);
> +    if (err < 0)
> +        goto fail;
> +
> +    av_frame_free(&input);
> +    av_frame_free(&intermediate);
> +
> +    av_log(ctx, AV_LOG_DEBUG, "Filter output: %s, %ux%u (%"PRId64").\n",
> +           av_get_pix_fmt_name(output->format),
> +           output->width, output->height, output->pts);
> +
> +    return ff_filter_frame(outlink, output);
> +
> +fail:
> +    clFinish(ctx->command_queue);
> +    av_frame_free(&input);
> +    av_frame_free(&output);
> +    av_frame_free(&intermediate);
> +    return err;
> +}
> +
> +static av_cold void avgblur_opencl_uninit(AVFilterContext *avctx)
> +{
> +    AverageBlurOpenCLContext *ctx = avctx->priv;
> +    cl_int cle;
> +
> +
> +    if (ctx->kernel_horiz) {
> +        cle = clReleaseKernel(ctx->kernel_horiz);
> +        if (cle != CL_SUCCESS)
> +            av_log(avctx, AV_LOG_ERROR, "Failed to release "
> +                   "kernel: %d.\n", cle);
> +    }
> +
> +    if (ctx->kernel_vert) {
> +        cle = clReleaseKernel(ctx->kernel_vert);
> +        if (cle != CL_SUCCESS)
> +            av_log(avctx, AV_LOG_ERROR, "Failed to release "
> +                   "kernel: %d.\n", cle);
> +    }
> +
> +    if (ctx->command_queue) {
> +        cle = clReleaseCommandQueue(ctx->command_queue);
> +        if (cle != CL_SUCCESS)
> +            av_log(avctx, AV_LOG_ERROR, "Failed to release "
> +                   "command queue: %d.\n", cle);
> +    }
> +
> +    ff_opencl_filter_uninit(avctx);
> +}
> +
> +#define OFFSET(x) offsetof(AverageBlurOpenCLContext, x)
> +#define FLAGS (AV_OPT_FLAG_FILTERING_PARAM | AV_OPT_FLAG_VIDEO_PARAM)
> +static const AVOption avgblur_opencl_options[] = {
> +    { "sizeX",  "set horizontal size",  OFFSET(radius),  AV_OPT_TYPE_INT, {.i64=1},   1, 1024, FLAGS },
> +    { "planes", "set planes to filter", OFFSET(planes),  AV_OPT_TYPE_INT, {.i64=0xF}, 0,  0xF, FLAGS },
> +    { "sizeY",  "set vertical size",    OFFSET(radiusV), AV_OPT_TYPE_INT, {.i64=0},   0, 1024, FLAGS },
> +    { NULL }
> +};
> +
> +AVFILTER_DEFINE_CLASS(avgblur_opencl);
> +
> +static const AVFilterPad avgblur_opencl_inputs[] = {
> +    {
> +        .name         = "default",
> +        .type         = AVMEDIA_TYPE_VIDEO,
> +        .filter_frame = &avgblur_opencl_filter_frame,
> +        .config_props = &ff_opencl_filter_config_input,
> +    },
> +    { NULL }
> +};
> +
> +static const AVFilterPad avgblur_opencl_outputs[] = {
> +    {
> +        .name         = "default",
> +        .type         = AVMEDIA_TYPE_VIDEO,
> +        .config_props = &ff_opencl_filter_config_output,
> +    },
> +    { NULL }
> +};
> +
> +AVFilter ff_vf_avgblur_opencl = {
> +    .name           = "avgblur_opencl",
> +    .description    = NULL_IF_CONFIG_SMALL("Apply average blur filter"),
> +    .priv_size      = sizeof(AverageBlurOpenCLContext),
> +    .priv_class     = &avgblur_opencl_class,
> +    .init           = &ff_opencl_filter_init,
> +    .uninit         = &avgblur_opencl_uninit,
> +    .query_formats  = &ff_opencl_filter_query_formats,
> +    .inputs         = avgblur_opencl_inputs,
> +    .outputs        = avgblur_opencl_outputs,
> +    .flags_internal = FF_FILTER_FLAG_HWFRAME_AWARE,
> +};
> 

Ignoring the one trivial issue above which I can easily fix myself, this all looks good to me.  I'll try to test on some other platforms (non-Intel, at least AMD and Mali) tomorrow, but I don't anticipate any issues.  Does anyone else have any thoughts?  I'll push this after tomorrow if there isn't anything further.

Thank you!

- Mark
Dylan Fernando March 21, 2018, 1:09 p.m. UTC | #2
On Tue, Mar 20, 2018 at 10:34 AM, Mark Thompson <sw@jkqxz.net> wrote:

> On 19/03/18 02:30, dylanf123@gmail.com wrote:
> > From: drfer3 <drfer3@student.monash.edu>
> >
> > Behaves like the existing avgblur filter, except working on OpenCL
> > hardware frames. Takes exactly the same options.
> > ---
> >  configure                       |   1 +
> >  libavfilter/Makefile            |   2 +
> >  libavfilter/allfilters.c        |   1 +
> >  libavfilter/opencl/avgblur.cl   |  60 ++++++++
> >  libavfilter/opencl_source.h     |   1 +
> >  libavfilter/vf_avgblur_opencl.c | 328 ++++++++++++++++++++++++++++++
> ++++++++++
> >  6 files changed, 393 insertions(+)
> >  create mode 100644 libavfilter/opencl/avgblur.cl
> >  create mode 100644 libavfilter/vf_avgblur_opencl.c
> >
> > diff --git a/configure b/configure
> > index 0c5ed07a07..481d338caf 100755
> > --- a/configure
> > +++ b/configure
> > @@ -3202,6 +3202,7 @@ aresample_filter_deps="swresample"
> >  ass_filter_deps="libass"
> >  atempo_filter_deps="avcodec"
> >  atempo_filter_select="rdft"
> > +avgblur_opencl_filter_deps="opencl"
> >  azmq_filter_deps="libzmq"
> >  blackframe_filter_deps="gpl"
> >  boxblur_filter_deps="gpl"
> > diff --git a/libavfilter/Makefile b/libavfilter/Makefile
> > index fc16512e2c..1043b41d80 100644
> > --- a/libavfilter/Makefile
> > +++ b/libavfilter/Makefile
> > @@ -139,6 +139,8 @@ OBJS-$(CONFIG_ALPHAMERGE_FILTER)             +=
> vf_alphamerge.o
> >  OBJS-$(CONFIG_ASS_FILTER)                    += vf_subtitles.o
> >  OBJS-$(CONFIG_ATADENOISE_FILTER)             += vf_atadenoise.o
> >  OBJS-$(CONFIG_AVGBLUR_FILTER)                += vf_avgblur.o
> > +OBJS-$(CONFIG_AVGBLUR_OPENCL_FILTER)         += vf_avgblur_opencl.o
> opencl.o \
> > +                                                opencl/avgblur.o
> >  OBJS-$(CONFIG_BBOX_FILTER)                   += bbox.o vf_bbox.o
> >  OBJS-$(CONFIG_BENCH_FILTER)                  += f_bench.o
> >  OBJS-$(CONFIG_BITPLANENOISE_FILTER)          += vf_bitplanenoise.o
> > diff --git a/libavfilter/allfilters.c b/libavfilter/allfilters.c
> > index cc423af738..3f67e321bf 100644
> > --- a/libavfilter/allfilters.c
> > +++ b/libavfilter/allfilters.c
> > @@ -149,6 +149,7 @@ static void register_all(void)
> >      REGISTER_FILTER(ASS,            ass,            vf);
> >      REGISTER_FILTER(ATADENOISE,     atadenoise,     vf);
> >      REGISTER_FILTER(AVGBLUR,        avgblur,        vf);
> > +    REGISTER_FILTER(AVGBLUR_OPENCL, avgblur_opencl, vf);
> >      REGISTER_FILTER(BBOX,           bbox,           vf);
> >      REGISTER_FILTER(BENCH,          bench,          vf);
> >      REGISTER_FILTER(BITPLANENOISE,  bitplanenoise,  vf);
> > diff --git a/libavfilter/opencl/avgblur.cl b/libavfilter/opencl/avgblur.
> cl
> > new file mode 100644
> > index 0000000000..6a8d70df93
> > --- /dev/null
> > +++ b/libavfilter/opencl/avgblur.cl
> > @@ -0,0 +1,60 @@
> > +/*
> > + * Copyright (c) 2018 Dylan Fernando
> > + *
> > + * This file is part of FFmpeg.
> > + *
> > + * FFmpeg is free software; you can redistribute it and/or
> > + * modify it under the terms of the GNU Lesser General Public
> > + * License as published by the Free Software Foundation; either
> > + * version 2.1 of the License, or (at your option) any later version.
> > + *
> > + * FFmpeg is distributed in the hope that it will be useful,
> > + * but WITHOUT ANY WARRANTY; without even the implied warranty of
> > + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
> > + * Lesser General Public License for more details.
> > + *
> > + * You should have received a copy of the GNU Lesser General Public
> > + * License along with FFmpeg; if not, write to the Free Software
> > + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
> 02110-1301 USA
> > + */
> > +
> > +
> > +__kernel void avgblur_horiz(__write_only image2d_t dst,
> > +                            __read_only  image2d_t src,
> > +                            int rad)
> > +{
> > +    const sampler_t sampler = (CLK_NORMALIZED_COORDS_FALSE |
> > +                               CLK_FILTER_NEAREST);
> > +    int2 loc = (int2)(get_global_id(0), get_global_id(1));
> > +    int2 size = (int2)(get_global_size(0), get_global_size(1));
> > +
> > +    int count = 0;
> > +    float4 acc = (float4)(0,0,0,0);
> > +
> > +    for (int xx = max(0, loc.x - rad); xx < min(loc.x + rad + 1,
> size.x); xx++) {
> > +        count++;
> > +        acc += read_imagef(src, sampler, (int2)(xx, loc.y));
> > +    }
> > +
> > +    write_imagef(dst, loc, acc / count);
> > +}
> > +
> > +__kernel void avgblur_vert(__write_only image2d_t dst,
> > +                           __read_only  image2d_t src,
> > +                           int radv)
> > +{
> > +    const sampler_t sampler = (CLK_NORMALIZED_COORDS_FALSE |
> > +                               CLK_FILTER_NEAREST);
> > +    int2 loc = (int2)(get_global_id(0), get_global_id(1));
> > +    int2 size = (int2)(get_global_size(0), get_global_size(1));
> > +
> > +    int count = 0;
> > +    float4 acc = (float4)(0,0,0,0);
> > +
> > +    for (int yy = max(0, loc.y - radv); yy < min(loc.y + radv + 1,
> size.y); yy++) {
> > +        count++;
> > +        acc += read_imagef(src, sampler, (int2)(loc.x, yy));
> > +    }
> > +
> > +    write_imagef(dst, loc, acc / count);
> > +}
> > diff --git a/libavfilter/opencl_source.h b/libavfilter/opencl_source.h
> > index 23cdfc6ac9..02bc1723b0 100644
> > --- a/libavfilter/opencl_source.h
> > +++ b/libavfilter/opencl_source.h
> > @@ -19,6 +19,7 @@
> >  #ifndef AVFILTER_OPENCL_SOURCE_H
> >  #define AVFILTER_OPENCL_SOURCE_H
> >
> > +extern const char *ff_opencl_source_avgblur;
> >  extern const char *ff_opencl_source_overlay;
> >  extern const char *ff_opencl_source_unsharp;
> >
> > diff --git a/libavfilter/vf_avgblur_opencl.c b/libavfilter/vf_avgblur_
> opencl.c
> > new file mode 100644
> > index 0000000000..a6baa28ac1
> > --- /dev/null
> > +++ b/libavfilter/vf_avgblur_opencl.c
> > @@ -0,0 +1,328 @@
> > +/*
> > + * Copyright (c) 2018 Dylan Fernando
> > + *
> > + * This file is part of FFmpeg.
> > + *
> > + * FFmpeg is free software; you can redistribute it and/or
> > + * modify it under the terms of the GNU Lesser General Public
> > + * License as published by the Free Software Foundation; either
> > + * version 2.1 of the License, or (at your option) any later version.
> > + *
> > + * FFmpeg is distributed in the hope that it will be useful,
> > + * but WITHOUT ANY WARRANTY; without even the implied warranty of
> > + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
> > + * Lesser General Public License for more details.
> > + *
> > + * You should have received a copy of the GNU Lesser General Public
> > + * License along with FFmpeg; if not, write to the Free Software
> > + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
> 02110-1301 USA
> > + */
> > +
> > +#include "libavutil/common.h"
> > +#include "libavutil/imgutils.h"
> > +#include "libavutil/mem.h"
> > +#include "libavutil/opt.h"
> > +#include "libavutil/pixdesc.h"
> > +
> > +#include "avfilter.h"
> > +#include "internal.h"
> > +#include "opencl.h"
> > +#include "opencl_source.h"
> > +#include "video.h"
> > +
> > +
> > +typedef struct AverageBlurOpenCLContext {
> > +    OpenCLFilterContext ocf;
> > +
> > +    int              initialised;
> > +    cl_kernel        kernel_horiz;
> > +    cl_kernel        kernel_vert;
> > +    cl_command_queue command_queue;
> > +
> > +    int radius;
> > +    int radiusV;
> > +    int planes;
> > +
> > +} AverageBlurOpenCLContext;
> > +
> > +
> > +static int avgblur_opencl_init(AVFilterContext *avctx)
> > +{
> > +    AverageBlurOpenCLContext *ctx = avctx->priv;
> > +    cl_int cle;
> > +    int err;
> > +
> > +    err = ff_opencl_filter_load_program(avctx,
> &ff_opencl_source_avgblur, 1);
> > +    if (err < 0)
> > +        goto fail;
> > +
> > +    ctx->command_queue = clCreateCommandQueue(ctx->ocf.hwctx->context,
> > +                                              ctx->ocf.hwctx->device_id,
> > +                                              0, &cle);
> > +    if (!ctx->command_queue) {
> > +        av_log(avctx, AV_LOG_ERROR, "Failed to create OpenCL "
> > +               "command queue: %d.\n", cle);
> > +        err = AVERROR(EIO);
> > +        goto fail;
> > +    }
> > +
> > +    ctx->kernel_horiz = clCreateKernel(ctx->ocf.program,"avgblur_horiz",
> &cle);
> > +    if (!ctx->kernel_horiz) {
> > +        av_log(avctx, AV_LOG_ERROR, "Failed to create kernel: %d.\n",
> cle);
> > +        err = AVERROR(EIO);
> > +        goto fail;
> > +    }
> > +
> > +    ctx->kernel_vert = clCreateKernel(ctx->ocf.program,"avgblur_vert",
> &cle);
> > +    if (!ctx->kernel_vert) {
> > +        av_log(avctx, AV_LOG_ERROR, "Failed to create kernel: %d.\n",
> cle);
> > +        err = AVERROR(EIO);
> > +        goto fail;
> > +    }
> > +
> > +    if (ctx->radiusV <= 0) {
> > +        ctx->radiusV = ctx->radius;
> > +    }
> > +
> > +    ctx->initialised = 1;
> > +    return 0;
> > +
> > +fail:
> > +    if (ctx->command_queue)
> > +        clReleaseCommandQueue(ctx->command_queue);
> > +    if (ctx->kernel_horiz)
> > +        clReleaseKernel(ctx->kernel_horiz);
> > +    if (ctx->kernel_vert)
> > +        clReleaseKernel(ctx->kernel_vert);
> > +    return err;
> > +}
> > +
> > +static int avgblur_opencl_filter_frame(AVFilterLink *inlink, AVFrame
> *input)
> > +{
> > +    AVFilterContext    *avctx = inlink->dst;
> > +    AVFilterLink     *outlink = avctx->outputs[0];
> > +    AverageBlurOpenCLContext *ctx = avctx->priv;
> > +    AVFrame *output = NULL;
> > +    AVFrame *intermediate = NULL;
> > +    cl_int cle;
> > +    size_t global_work[2];
> > +    cl_mem src, dst, inter;
> > +    int err, p;
> > +
> > +    av_log(ctx, AV_LOG_DEBUG, "Filter input: %s, %ux%u (%"PRId64").\n",
> > +           av_get_pix_fmt_name(input->format),
> > +           input->width, input->height, input->pts);
> > +
> > +    if (!input->hw_frames_ctx)
> > +        return AVERROR(EINVAL);
> > +
> > +    if (!ctx->initialised) {
> > +        err = avgblur_opencl_init(avctx);
> > +        if (err < 0)
> > +            goto fail;
> > +
> > +    }
> > +
> > +    output = ff_get_video_buffer(outlink, outlink->w, outlink->h);
> > +    if (!output) {
> > +        err = AVERROR(ENOMEM);
> > +        goto fail;
> > +    }
> > +
> > +    intermediate = ff_get_video_buffer(outlink, outlink->w, outlink->h);
> > +    if (!intermediate) {
> > +        err = AVERROR(ENOMEM);
> > +        goto fail;
> > +    }
> > +
> > +    for (p = 0; p < FF_ARRAY_ELEMS(output->data); p++) {
> > +        src = (cl_mem) input->data[p];
> > +        dst = (cl_mem)output->data[p];
> > +        inter = (cl_mem) intermediate->data[p];
> > +
> > +        if (!dst)
> > +            break;
> > +
> > +        int radius_x = ctx->radius;
> > +        int radius_y = ctx->radiusV;
>
> We do still forbid mixed declarations and code, so these should be
> declared at the start of the block or function.  (You should get a compiler
> warning for that in at least some cases - I do from gcc 7.)
>
> > +
> > +        if (!(ctx->planes & (1 << p))) {
> > +            radius_x = 0;
> > +            radius_y = 0;
> > +        }
> > +
> > +        cle = clSetKernelArg(ctx->kernel_horiz, 0, sizeof(cl_mem),
> &inter);
> > +        if (cle != CL_SUCCESS) {
> > +            av_log(avctx, AV_LOG_ERROR, "Failed to set kernel "
> > +                   "destination image argument: %d.\n", cle);
> > +            goto fail;
> > +        }
> > +        cle = clSetKernelArg(ctx->kernel_horiz, 1, sizeof(cl_mem),
> &src);
> > +        if (cle != CL_SUCCESS) {
> > +            av_log(avctx, AV_LOG_ERROR, "Failed to set kernel "
> > +                   "source image argument: %d.\n", cle);
> > +            goto fail;
> > +        }
> > +        cle = clSetKernelArg(ctx->kernel_horiz, 2, sizeof(cl_int),
> &radius_x);
> > +        if (cle != CL_SUCCESS) {
> > +            av_log(avctx, AV_LOG_ERROR, "Failed to set kernel "
> > +                   "sizeX argument: %d.\n", cle);
> > +            goto fail;
> > +        }
> > +
> > +        global_work[0] = output->width;
> > +        global_work[1] = output->height;
> > +
> > +        av_log(avctx, AV_LOG_DEBUG, "Run kernel on plane %d "
> > +               "(%"SIZE_SPECIFIER"x%"SIZE_SPECIFIER").\n",
> > +               p, global_work[0], global_work[1]);
> > +
> > +        cle = clEnqueueNDRangeKernel(ctx->command_queue,
> ctx->kernel_horiz, 2, NULL,
> > +                                     global_work, NULL,
> > +                                     0, NULL, NULL);
> > +        if (cle != CL_SUCCESS) {
> > +            av_log(avctx, AV_LOG_ERROR, "Failed to enqueue kernel:
> %d.\n",
> > +                   cle);
> > +            err = AVERROR(EIO);
> > +            goto fail;
> > +        }
> > +
> > +        cle = clSetKernelArg(ctx->kernel_vert, 0, sizeof(cl_mem),
> &dst);
> > +        if (cle != CL_SUCCESS) {
> > +            av_log(avctx, AV_LOG_ERROR, "Failed to set kernel "
> > +                   "destination image argument: %d.\n", cle);
> > +            goto fail;
> > +        }
> > +        cle = clSetKernelArg(ctx->kernel_vert, 1, sizeof(cl_mem),
> &inter);
> > +        if (cle != CL_SUCCESS) {
> > +            av_log(avctx, AV_LOG_ERROR, "Failed to set kernel "
> > +                   "source image argument: %d.\n", cle);
> > +            goto fail;
> > +        }
> > +        cle = clSetKernelArg(ctx->kernel_vert, 2, sizeof(cl_int),
> &radius_y);
> > +        if (cle != CL_SUCCESS) {
> > +            av_log(avctx, AV_LOG_ERROR, "Failed to set kernel "
> > +                   "sizeY argument: %d.\n", cle);
> > +            goto fail;
> > +        }
> > +
> > +        global_work[0] = output->width;
> > +        global_work[1] = output->height;
> > +
> > +        av_log(avctx, AV_LOG_DEBUG, "Run kernel on plane %d "
> > +               "(%"SIZE_SPECIFIER"x%"SIZE_SPECIFIER").\n",
> > +               p, global_work[0], global_work[1]);
> > +
> > +        cle = clEnqueueNDRangeKernel(ctx->command_queue,
> ctx->kernel_vert, 2, NULL,
> > +                                     global_work, NULL,
> > +                                     0, NULL, NULL);
> > +        if (cle != CL_SUCCESS) {
> > +            av_log(avctx, AV_LOG_ERROR, "Failed to enqueue kernel:
> %d.\n",
> > +                   cle);
> > +            err = AVERROR(EIO);
> > +            goto fail;
> > +        }
> > +
> > +    }
> > +
> > +    cle = clFinish(ctx->command_queue);
> > +    if (cle != CL_SUCCESS) {
> > +        av_log(avctx, AV_LOG_ERROR, "Failed to finish command queue:
> %d.\n",
> > +               cle);
> > +        err = AVERROR(EIO);
> > +        goto fail;
> > +    }
> > +
> > +    err = av_frame_copy_props(output, input);
> > +    if (err < 0)
> > +        goto fail;
> > +
> > +    av_frame_free(&input);
> > +    av_frame_free(&intermediate);
> > +
> > +    av_log(ctx, AV_LOG_DEBUG, "Filter output: %s, %ux%u (%"PRId64").\n",
> > +           av_get_pix_fmt_name(output->format),
> > +           output->width, output->height, output->pts);
> > +
> > +    return ff_filter_frame(outlink, output);
> > +
> > +fail:
> > +    clFinish(ctx->command_queue);
> > +    av_frame_free(&input);
> > +    av_frame_free(&output);
> > +    av_frame_free(&intermediate);
> > +    return err;
> > +}
> > +
> > +static av_cold void avgblur_opencl_uninit(AVFilterContext *avctx)
> > +{
> > +    AverageBlurOpenCLContext *ctx = avctx->priv;
> > +    cl_int cle;
> > +
> > +
> > +    if (ctx->kernel_horiz) {
> > +        cle = clReleaseKernel(ctx->kernel_horiz);
> > +        if (cle != CL_SUCCESS)
> > +            av_log(avctx, AV_LOG_ERROR, "Failed to release "
> > +                   "kernel: %d.\n", cle);
> > +    }
> > +
> > +    if (ctx->kernel_vert) {
> > +        cle = clReleaseKernel(ctx->kernel_vert);
> > +        if (cle != CL_SUCCESS)
> > +            av_log(avctx, AV_LOG_ERROR, "Failed to release "
> > +                   "kernel: %d.\n", cle);
> > +    }
> > +
> > +    if (ctx->command_queue) {
> > +        cle = clReleaseCommandQueue(ctx->command_queue);
> > +        if (cle != CL_SUCCESS)
> > +            av_log(avctx, AV_LOG_ERROR, "Failed to release "
> > +                   "command queue: %d.\n", cle);
> > +    }
> > +
> > +    ff_opencl_filter_uninit(avctx);
> > +}
> > +
> > +#define OFFSET(x) offsetof(AverageBlurOpenCLContext, x)
> > +#define FLAGS (AV_OPT_FLAG_FILTERING_PARAM | AV_OPT_FLAG_VIDEO_PARAM)
> > +static const AVOption avgblur_opencl_options[] = {
> > +    { "sizeX",  "set horizontal size",  OFFSET(radius),
> AV_OPT_TYPE_INT, {.i64=1},   1, 1024, FLAGS },
> > +    { "planes", "set planes to filter", OFFSET(planes),
> AV_OPT_TYPE_INT, {.i64=0xF}, 0,  0xF, FLAGS },
> > +    { "sizeY",  "set vertical size",    OFFSET(radiusV),
> AV_OPT_TYPE_INT, {.i64=0},   0, 1024, FLAGS },
> > +    { NULL }
> > +};
> > +
> > +AVFILTER_DEFINE_CLASS(avgblur_opencl);
> > +
> > +static const AVFilterPad avgblur_opencl_inputs[] = {
> > +    {
> > +        .name         = "default",
> > +        .type         = AVMEDIA_TYPE_VIDEO,
> > +        .filter_frame = &avgblur_opencl_filter_frame,
> > +        .config_props = &ff_opencl_filter_config_input,
> > +    },
> > +    { NULL }
> > +};
> > +
> > +static const AVFilterPad avgblur_opencl_outputs[] = {
> > +    {
> > +        .name         = "default",
> > +        .type         = AVMEDIA_TYPE_VIDEO,
> > +        .config_props = &ff_opencl_filter_config_output,
> > +    },
> > +    { NULL }
> > +};
> > +
> > +AVFilter ff_vf_avgblur_opencl = {
> > +    .name           = "avgblur_opencl",
> > +    .description    = NULL_IF_CONFIG_SMALL("Apply average blur filter"),
> > +    .priv_size      = sizeof(AverageBlurOpenCLContext),
> > +    .priv_class     = &avgblur_opencl_class,
> > +    .init           = &ff_opencl_filter_init,
> > +    .uninit         = &avgblur_opencl_uninit,
> > +    .query_formats  = &ff_opencl_filter_query_formats,
> > +    .inputs         = avgblur_opencl_inputs,
> > +    .outputs        = avgblur_opencl_outputs,
> > +    .flags_internal = FF_FILTER_FLAG_HWFRAME_AWARE,
> > +};
> >
>
> Ignoring the one trivial issue above which I can easily fix myself, this
> all looks good to me.  I'll try to test on some other platforms (non-Intel,
> at least AMD and Mali) tomorrow, but I don't anticipate any issues.  Does
> anyone else have any thoughts?  I'll push this after tomorrow if there
> isn't anything further.
>
> Thank you!
>
> - Mark
> _______________________________________________
> ffmpeg-devel mailing list
> ffmpeg-devel@ffmpeg.org
> http://ffmpeg.org/mailman/listinfo/ffmpeg-devel
>

Thanks.

What information should I put in my GSoC application? How should I
structure it? Should I give a rough timeline detailing exactly which color
conversion and scaling algorithms I’ll be implementing? If so, which files
should I look at to see the current colour conversion code?

- Dylan
Mark Thompson March 23, 2018, 12:36 a.m. UTC | #3
On 21/03/18 13:09, Dylan Fernando wrote:
> On Tue, Mar 20, 2018 at 10:34 AM, Mark Thompson <sw@jkqxz.net> wrote:
>> On 19/03/18 02:30, dylanf123@gmail.com wrote:
>>> From: drfer3 <drfer3@student.monash.edu>
>>>
>>> Behaves like the existing avgblur filter, except working on OpenCL
>>> hardware frames. Takes exactly the same options.
>>> ---
>>>  configure                       |   1 +
>>>  libavfilter/Makefile            |   2 +
>>>  libavfilter/allfilters.c        |   1 +
>>>  libavfilter/opencl/avgblur.cl   |  60 ++++++++
>>>  libavfilter/opencl_source.h     |   1 +
>>>  libavfilter/vf_avgblur_opencl.c | 328 ++++++++++++++++++++++++++++++
>> ++++++++++
>>>  6 files changed, 393 insertions(+)
>>>  create mode 100644 libavfilter/opencl/avgblur.cl
>>>  create mode 100644 libavfilter/vf_avgblur_opencl.c
>>>
>>> ...
>>
>> Ignoring the one trivial issue above which I can easily fix myself, this
>> all looks good to me.  I'll try to test on some other platforms (non-Intel,
>> at least AMD and Mali) tomorrow, but I don't anticipate any issues.  Does
>> anyone else have any thoughts?  I'll push this after tomorrow if there
>> isn't anything further.

And pushed, thank you!

> What information should I put in my GSoC application? How should I
> structure it? Should I give a rough timeline detailing exactly which color
> conversion and scaling algorithms I’ll be implementing? If so, which files
> should I look at to see the current colour conversion code?

I have to admit I'm not entirely sure what you need to put in the application (I haven't done this bit before, so I'm also reading what the GSoC site says).  Can anyone else give some guidance here about how it has worked in the past in FFmpeg?

Current scaling and colour conversion code is mostly found in libswscale, though there are also other places like the colorspace filter.  I don't know whether any of these will translate suitably to GPU code and what the right approach is here - some investigation will be required.

- Mark
Ruiling Song March 23, 2018, 12:59 a.m. UTC | #4
> 

> Current scaling and colour conversion code is mostly found in libswscale, though

> there are also other places like the colorspace filter.  I don't know whether any

> of these will translate suitably to GPU code and what the right approach is here -

> some investigation will be required.

Hi Mark,

I am currently working on the scaling filter implemented using OpenCL (it is something like scale_cuda),
I still need some time to polish the patch, and will send it out to the mail list once it is good enough.

Ruiling

> 

> - Mark

> _______________________________________________

> ffmpeg-devel mailing list

> ffmpeg-devel@ffmpeg.org

> http://ffmpeg.org/mailman/listinfo/ffmpeg-devel
Carl Eugen Hoyos March 23, 2018, 10:10 a.m. UTC | #5
2018-03-21 14:09 GMT+01:00, Dylan Fernando <dylanf123@gmail.com>:

> What information should I put in my GSoC application? How should I
> structure it? Should I give a rough timeline detailing exactly which color
> conversion and scaling algorithms I’ll be implementing? If so, which files
> should I look at to see the current colour conversion code?

Two blogposts that are meant to help you:
https://medium.com/@owtf/google-summer-of-code-writing-a-good-proposal-141b1376f076
http://mirca.fun/gsoc-application/

But please remember that in this project, the qualification task
is more important than the form of the application.
You of course absolutely have to finish an application, without
it you cannot be chosen as student.

Carl Eugen
Dylan Fernando March 26, 2018, 1:16 p.m. UTC | #6
On Fri, Mar 23, 2018 at 9:10 PM, Carl Eugen Hoyos <ceffmpeg@gmail.com>
wrote:

> 2018-03-21 14:09 GMT+01:00, Dylan Fernando <dylanf123@gmail.com>:
>
> > What information should I put in my GSoC application? How should I
> > structure it? Should I give a rough timeline detailing exactly which
> color
> > conversion and scaling algorithms I’ll be implementing? If so, which
> files
> > should I look at to see the current colour conversion code?
>
> Two blogposts that are meant to help you:
> https://medium.com/@owtf/google-summer-of-code-writing-
> a-good-proposal-141b1376f076
> http://mirca.fun/gsoc-application/
>
> But please remember that in this project, the qualification task
> is more important than the form of the application.
> You of course absolutely have to finish an application, without
> it you cannot be chosen as student.
>
> Carl Eugen
> _______________________________________________
> ffmpeg-devel mailing list
> ffmpeg-devel@ffmpeg.org
> http://ffmpeg.org/mailman/listinfo/ffmpeg-devel
>

Thanks,

I have the project timeline part of my application below. Feel free to
comment any suggestions:

Proposal
Video filtering with OpenCL

Currently, FFmpeg has good support for offloading decoding and encoding
from the CPU, but not such good support for doing anything else with that
video without using the CPU. There is interop support for mapping between
APIs so that generic methods like OpenCL which can be run on any GPU can be
used together with specific video APIs for decoding and encoding, but
currently there are only a few operations which can actually use this so
many use-cases require expensive additional steps to download frames from
the GPU and then upload them again later after performing some processing
on the CPU. Therefore, we would like to add more OpenCL filter support to
libavfilter so that more operations can be offloaded. This includes
implementing a scaler, supporting a choice of scaling algorithms, a
deinterlacer and color conversion.

Timeline
I have a report due for uni in June. This should only take a day or two.
Other than that, I have no commitments. All tasks below include writing
documentation.

Week 1-2:
Implement OpenCL deinterlacer based on yadif

Week 3-6:
Implement OpenCL YUV-to-YUV color conversion based on vf_colorspace

Week 7-9:
Implement OpenCL rgb colorspace conversion

Week 10-14:
Implement scaling algorithms with OpenCL including bilinear, bicubic, and
averaging area
diff mbox

Patch

diff --git a/configure b/configure
index 0c5ed07a07..481d338caf 100755
--- a/configure
+++ b/configure
@@ -3202,6 +3202,7 @@  aresample_filter_deps="swresample"
 ass_filter_deps="libass"
 atempo_filter_deps="avcodec"
 atempo_filter_select="rdft"
+avgblur_opencl_filter_deps="opencl"
 azmq_filter_deps="libzmq"
 blackframe_filter_deps="gpl"
 boxblur_filter_deps="gpl"
diff --git a/libavfilter/Makefile b/libavfilter/Makefile
index fc16512e2c..1043b41d80 100644
--- a/libavfilter/Makefile
+++ b/libavfilter/Makefile
@@ -139,6 +139,8 @@  OBJS-$(CONFIG_ALPHAMERGE_FILTER)             += vf_alphamerge.o
 OBJS-$(CONFIG_ASS_FILTER)                    += vf_subtitles.o
 OBJS-$(CONFIG_ATADENOISE_FILTER)             += vf_atadenoise.o
 OBJS-$(CONFIG_AVGBLUR_FILTER)                += vf_avgblur.o
+OBJS-$(CONFIG_AVGBLUR_OPENCL_FILTER)         += vf_avgblur_opencl.o opencl.o \
+                                                opencl/avgblur.o
 OBJS-$(CONFIG_BBOX_FILTER)                   += bbox.o vf_bbox.o
 OBJS-$(CONFIG_BENCH_FILTER)                  += f_bench.o
 OBJS-$(CONFIG_BITPLANENOISE_FILTER)          += vf_bitplanenoise.o
diff --git a/libavfilter/allfilters.c b/libavfilter/allfilters.c
index cc423af738..3f67e321bf 100644
--- a/libavfilter/allfilters.c
+++ b/libavfilter/allfilters.c
@@ -149,6 +149,7 @@  static void register_all(void)
     REGISTER_FILTER(ASS,            ass,            vf);
     REGISTER_FILTER(ATADENOISE,     atadenoise,     vf);
     REGISTER_FILTER(AVGBLUR,        avgblur,        vf);
+    REGISTER_FILTER(AVGBLUR_OPENCL, avgblur_opencl, vf);
     REGISTER_FILTER(BBOX,           bbox,           vf);
     REGISTER_FILTER(BENCH,          bench,          vf);
     REGISTER_FILTER(BITPLANENOISE,  bitplanenoise,  vf);
diff --git a/libavfilter/opencl/avgblur.cl b/libavfilter/opencl/avgblur.cl
new file mode 100644
index 0000000000..6a8d70df93
--- /dev/null
+++ b/libavfilter/opencl/avgblur.cl
@@ -0,0 +1,60 @@ 
+/*
+ * Copyright (c) 2018 Dylan Fernando
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+
+__kernel void avgblur_horiz(__write_only image2d_t dst,
+                            __read_only  image2d_t src,
+                            int rad)
+{
+    const sampler_t sampler = (CLK_NORMALIZED_COORDS_FALSE |
+                               CLK_FILTER_NEAREST);
+    int2 loc = (int2)(get_global_id(0), get_global_id(1));
+    int2 size = (int2)(get_global_size(0), get_global_size(1));
+
+    int count = 0;
+    float4 acc = (float4)(0,0,0,0);
+
+    for (int xx = max(0, loc.x - rad); xx < min(loc.x + rad + 1, size.x); xx++) {
+        count++;
+        acc += read_imagef(src, sampler, (int2)(xx, loc.y));
+    }
+
+    write_imagef(dst, loc, acc / count);
+}
+
+__kernel void avgblur_vert(__write_only image2d_t dst,
+                           __read_only  image2d_t src,
+                           int radv)
+{
+    const sampler_t sampler = (CLK_NORMALIZED_COORDS_FALSE |
+                               CLK_FILTER_NEAREST);
+    int2 loc = (int2)(get_global_id(0), get_global_id(1));
+    int2 size = (int2)(get_global_size(0), get_global_size(1));
+
+    int count = 0;
+    float4 acc = (float4)(0,0,0,0);
+
+    for (int yy = max(0, loc.y - radv); yy < min(loc.y + radv + 1, size.y); yy++) {
+        count++;
+        acc += read_imagef(src, sampler, (int2)(loc.x, yy));
+    }
+
+    write_imagef(dst, loc, acc / count);
+}
diff --git a/libavfilter/opencl_source.h b/libavfilter/opencl_source.h
index 23cdfc6ac9..02bc1723b0 100644
--- a/libavfilter/opencl_source.h
+++ b/libavfilter/opencl_source.h
@@ -19,6 +19,7 @@ 
 #ifndef AVFILTER_OPENCL_SOURCE_H
 #define AVFILTER_OPENCL_SOURCE_H
 
+extern const char *ff_opencl_source_avgblur;
 extern const char *ff_opencl_source_overlay;
 extern const char *ff_opencl_source_unsharp;
 
diff --git a/libavfilter/vf_avgblur_opencl.c b/libavfilter/vf_avgblur_opencl.c
new file mode 100644
index 0000000000..a6baa28ac1
--- /dev/null
+++ b/libavfilter/vf_avgblur_opencl.c
@@ -0,0 +1,328 @@ 
+/*
+ * Copyright (c) 2018 Dylan Fernando
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include "libavutil/common.h"
+#include "libavutil/imgutils.h"
+#include "libavutil/mem.h"
+#include "libavutil/opt.h"
+#include "libavutil/pixdesc.h"
+
+#include "avfilter.h"
+#include "internal.h"
+#include "opencl.h"
+#include "opencl_source.h"
+#include "video.h"
+
+
+typedef struct AverageBlurOpenCLContext {
+    OpenCLFilterContext ocf;
+
+    int              initialised;
+    cl_kernel        kernel_horiz;
+    cl_kernel        kernel_vert;
+    cl_command_queue command_queue;
+
+    int radius;
+    int radiusV;
+    int planes;
+
+} AverageBlurOpenCLContext;
+
+
+static int avgblur_opencl_init(AVFilterContext *avctx)
+{
+    AverageBlurOpenCLContext *ctx = avctx->priv;
+    cl_int cle;
+    int err;
+
+    err = ff_opencl_filter_load_program(avctx, &ff_opencl_source_avgblur, 1);
+    if (err < 0)
+        goto fail;
+
+    ctx->command_queue = clCreateCommandQueue(ctx->ocf.hwctx->context,
+                                              ctx->ocf.hwctx->device_id,
+                                              0, &cle);
+    if (!ctx->command_queue) {
+        av_log(avctx, AV_LOG_ERROR, "Failed to create OpenCL "
+               "command queue: %d.\n", cle);
+        err = AVERROR(EIO);
+        goto fail;
+    }
+
+    ctx->kernel_horiz = clCreateKernel(ctx->ocf.program,"avgblur_horiz", &cle);
+    if (!ctx->kernel_horiz) {
+        av_log(avctx, AV_LOG_ERROR, "Failed to create kernel: %d.\n", cle);
+        err = AVERROR(EIO);
+        goto fail;
+    }
+
+    ctx->kernel_vert = clCreateKernel(ctx->ocf.program,"avgblur_vert", &cle);
+    if (!ctx->kernel_vert) {
+        av_log(avctx, AV_LOG_ERROR, "Failed to create kernel: %d.\n", cle);
+        err = AVERROR(EIO);
+        goto fail;
+    }
+
+    if (ctx->radiusV <= 0) {
+        ctx->radiusV = ctx->radius;
+    }
+
+    ctx->initialised = 1;
+    return 0;
+
+fail:
+    if (ctx->command_queue)
+        clReleaseCommandQueue(ctx->command_queue);
+    if (ctx->kernel_horiz)
+        clReleaseKernel(ctx->kernel_horiz);
+    if (ctx->kernel_vert)
+        clReleaseKernel(ctx->kernel_vert);
+    return err;
+}
+
+static int avgblur_opencl_filter_frame(AVFilterLink *inlink, AVFrame *input)
+{
+    AVFilterContext    *avctx = inlink->dst;
+    AVFilterLink     *outlink = avctx->outputs[0];
+    AverageBlurOpenCLContext *ctx = avctx->priv;
+    AVFrame *output = NULL;
+    AVFrame *intermediate = NULL;
+    cl_int cle;
+    size_t global_work[2];
+    cl_mem src, dst, inter;
+    int err, p;
+
+    av_log(ctx, AV_LOG_DEBUG, "Filter input: %s, %ux%u (%"PRId64").\n",
+           av_get_pix_fmt_name(input->format),
+           input->width, input->height, input->pts);
+
+    if (!input->hw_frames_ctx)
+        return AVERROR(EINVAL);
+
+    if (!ctx->initialised) {
+        err = avgblur_opencl_init(avctx);
+        if (err < 0)
+            goto fail;
+
+    }
+
+    output = ff_get_video_buffer(outlink, outlink->w, outlink->h);
+    if (!output) {
+        err = AVERROR(ENOMEM);
+        goto fail;
+    }
+
+    intermediate = ff_get_video_buffer(outlink, outlink->w, outlink->h);
+    if (!intermediate) {
+        err = AVERROR(ENOMEM);
+        goto fail;
+    }
+
+    for (p = 0; p < FF_ARRAY_ELEMS(output->data); p++) {
+        src = (cl_mem) input->data[p];
+        dst = (cl_mem)output->data[p];
+        inter = (cl_mem) intermediate->data[p];
+
+        if (!dst)
+            break;
+
+        int radius_x = ctx->radius;
+        int radius_y = ctx->radiusV;
+
+        if (!(ctx->planes & (1 << p))) {
+            radius_x = 0;
+            radius_y = 0;
+        }
+
+        cle = clSetKernelArg(ctx->kernel_horiz, 0, sizeof(cl_mem), &inter);
+        if (cle != CL_SUCCESS) {
+            av_log(avctx, AV_LOG_ERROR, "Failed to set kernel "
+                   "destination image argument: %d.\n", cle);
+            goto fail;
+        }
+        cle = clSetKernelArg(ctx->kernel_horiz, 1, sizeof(cl_mem), &src);
+        if (cle != CL_SUCCESS) {
+            av_log(avctx, AV_LOG_ERROR, "Failed to set kernel "
+                   "source image argument: %d.\n", cle);
+            goto fail;
+        }
+        cle = clSetKernelArg(ctx->kernel_horiz, 2, sizeof(cl_int), &radius_x);
+        if (cle != CL_SUCCESS) {
+            av_log(avctx, AV_LOG_ERROR, "Failed to set kernel "
+                   "sizeX argument: %d.\n", cle);
+            goto fail;
+        }
+
+        global_work[0] = output->width;
+        global_work[1] = output->height;
+
+        av_log(avctx, AV_LOG_DEBUG, "Run kernel on plane %d "
+               "(%"SIZE_SPECIFIER"x%"SIZE_SPECIFIER").\n",
+               p, global_work[0], global_work[1]);
+
+        cle = clEnqueueNDRangeKernel(ctx->command_queue, ctx->kernel_horiz, 2, NULL,
+                                     global_work, NULL,
+                                     0, NULL, NULL);
+        if (cle != CL_SUCCESS) {
+            av_log(avctx, AV_LOG_ERROR, "Failed to enqueue kernel: %d.\n",
+                   cle);
+            err = AVERROR(EIO);
+            goto fail;
+        }
+
+        cle = clSetKernelArg(ctx->kernel_vert, 0, sizeof(cl_mem), &dst);
+        if (cle != CL_SUCCESS) {
+            av_log(avctx, AV_LOG_ERROR, "Failed to set kernel "
+                   "destination image argument: %d.\n", cle);
+            goto fail;
+        }
+        cle = clSetKernelArg(ctx->kernel_vert, 1, sizeof(cl_mem), &inter);
+        if (cle != CL_SUCCESS) {
+            av_log(avctx, AV_LOG_ERROR, "Failed to set kernel "
+                   "source image argument: %d.\n", cle);
+            goto fail;
+        }
+        cle = clSetKernelArg(ctx->kernel_vert, 2, sizeof(cl_int), &radius_y);
+        if (cle != CL_SUCCESS) {
+            av_log(avctx, AV_LOG_ERROR, "Failed to set kernel "
+                   "sizeY argument: %d.\n", cle);
+            goto fail;
+        }
+
+        global_work[0] = output->width;
+        global_work[1] = output->height;
+
+        av_log(avctx, AV_LOG_DEBUG, "Run kernel on plane %d "
+               "(%"SIZE_SPECIFIER"x%"SIZE_SPECIFIER").\n",
+               p, global_work[0], global_work[1]);
+
+        cle = clEnqueueNDRangeKernel(ctx->command_queue, ctx->kernel_vert, 2, NULL,
+                                     global_work, NULL,
+                                     0, NULL, NULL);
+        if (cle != CL_SUCCESS) {
+            av_log(avctx, AV_LOG_ERROR, "Failed to enqueue kernel: %d.\n",
+                   cle);
+            err = AVERROR(EIO);
+            goto fail;
+        }
+
+    }
+
+    cle = clFinish(ctx->command_queue);
+    if (cle != CL_SUCCESS) {
+        av_log(avctx, AV_LOG_ERROR, "Failed to finish command queue: %d.\n",
+               cle);
+        err = AVERROR(EIO);
+        goto fail;
+    }
+
+    err = av_frame_copy_props(output, input);
+    if (err < 0)
+        goto fail;
+
+    av_frame_free(&input);
+    av_frame_free(&intermediate);
+
+    av_log(ctx, AV_LOG_DEBUG, "Filter output: %s, %ux%u (%"PRId64").\n",
+           av_get_pix_fmt_name(output->format),
+           output->width, output->height, output->pts);
+
+    return ff_filter_frame(outlink, output);
+
+fail:
+    clFinish(ctx->command_queue);
+    av_frame_free(&input);
+    av_frame_free(&output);
+    av_frame_free(&intermediate);
+    return err;
+}
+
+static av_cold void avgblur_opencl_uninit(AVFilterContext *avctx)
+{
+    AverageBlurOpenCLContext *ctx = avctx->priv;
+    cl_int cle;
+
+
+    if (ctx->kernel_horiz) {
+        cle = clReleaseKernel(ctx->kernel_horiz);
+        if (cle != CL_SUCCESS)
+            av_log(avctx, AV_LOG_ERROR, "Failed to release "
+                   "kernel: %d.\n", cle);
+    }
+
+    if (ctx->kernel_vert) {
+        cle = clReleaseKernel(ctx->kernel_vert);
+        if (cle != CL_SUCCESS)
+            av_log(avctx, AV_LOG_ERROR, "Failed to release "
+                   "kernel: %d.\n", cle);
+    }
+
+    if (ctx->command_queue) {
+        cle = clReleaseCommandQueue(ctx->command_queue);
+        if (cle != CL_SUCCESS)
+            av_log(avctx, AV_LOG_ERROR, "Failed to release "
+                   "command queue: %d.\n", cle);
+    }
+
+    ff_opencl_filter_uninit(avctx);
+}
+
+#define OFFSET(x) offsetof(AverageBlurOpenCLContext, x)
+#define FLAGS (AV_OPT_FLAG_FILTERING_PARAM | AV_OPT_FLAG_VIDEO_PARAM)
+static const AVOption avgblur_opencl_options[] = {
+    { "sizeX",  "set horizontal size",  OFFSET(radius),  AV_OPT_TYPE_INT, {.i64=1},   1, 1024, FLAGS },
+    { "planes", "set planes to filter", OFFSET(planes),  AV_OPT_TYPE_INT, {.i64=0xF}, 0,  0xF, FLAGS },
+    { "sizeY",  "set vertical size",    OFFSET(radiusV), AV_OPT_TYPE_INT, {.i64=0},   0, 1024, FLAGS },
+    { NULL }
+};
+
+AVFILTER_DEFINE_CLASS(avgblur_opencl);
+
+static const AVFilterPad avgblur_opencl_inputs[] = {
+    {
+        .name         = "default",
+        .type         = AVMEDIA_TYPE_VIDEO,
+        .filter_frame = &avgblur_opencl_filter_frame,
+        .config_props = &ff_opencl_filter_config_input,
+    },
+    { NULL }
+};
+
+static const AVFilterPad avgblur_opencl_outputs[] = {
+    {
+        .name         = "default",
+        .type         = AVMEDIA_TYPE_VIDEO,
+        .config_props = &ff_opencl_filter_config_output,
+    },
+    { NULL }
+};
+
+AVFilter ff_vf_avgblur_opencl = {
+    .name           = "avgblur_opencl",
+    .description    = NULL_IF_CONFIG_SMALL("Apply average blur filter"),
+    .priv_size      = sizeof(AverageBlurOpenCLContext),
+    .priv_class     = &avgblur_opencl_class,
+    .init           = &ff_opencl_filter_init,
+    .uninit         = &avgblur_opencl_uninit,
+    .query_formats  = &ff_opencl_filter_query_formats,
+    .inputs         = avgblur_opencl_inputs,
+    .outputs        = avgblur_opencl_outputs,
+    .flags_internal = FF_FILTER_FLAG_HWFRAME_AWARE,
+};