diff mbox

[FFmpeg-devel] lavf: add tranpose_opencl filter

Message ID 1543215907-32002-1-git-send-email-ruiling.song@intel.com
State New
Headers show

Commit Message

Ruiling Song Nov. 26, 2018, 7:05 a.m. UTC
Signed-off-by: Ruiling Song <ruiling.song@intel.com>
---
 configure                         |   1 +
 libavfilter/Makefile              |   1 +
 libavfilter/allfilters.c          |   1 +
 libavfilter/opencl/transpose.cl   |  35 +++++
 libavfilter/opencl_source.h       |   1 +
 libavfilter/transpose.h           |  34 +++++
 libavfilter/vf_transpose.c        |  14 +-
 libavfilter/vf_transpose_opencl.c | 294 ++++++++++++++++++++++++++++++++++++++
 8 files changed, 368 insertions(+), 13 deletions(-)
 create mode 100644 libavfilter/opencl/transpose.cl
 create mode 100644 libavfilter/transpose.h
 create mode 100644 libavfilter/vf_transpose_opencl.c

Comments

Mark Thompson Nov. 28, 2018, 12:40 a.m. UTC | #1
On 26/11/2018 07:05, Ruiling Song wrote:
> Signed-off-by: Ruiling Song <ruiling.song@intel.com>
> ---
>  configure                         |   1 +
>  libavfilter/Makefile              |   1 +
>  libavfilter/allfilters.c          |   1 +
>  libavfilter/opencl/transpose.cl   |  35 +++++
>  libavfilter/opencl_source.h       |   1 +
>  libavfilter/transpose.h           |  34 +++++
>  libavfilter/vf_transpose.c        |  14 +-
>  libavfilter/vf_transpose_opencl.c | 294 ++++++++++++++++++++++++++++++++++++++
>  8 files changed, 368 insertions(+), 13 deletions(-)
>  create mode 100644 libavfilter/opencl/transpose.cl
>  create mode 100644 libavfilter/transpose.h
>  create mode 100644 libavfilter/vf_transpose_opencl.c
> 
> diff --git a/configure b/configure
> index b4f944c..dcb3f5f 100755
> --- a/configure
> +++ b/configure
> @@ -3479,6 +3479,7 @@ tinterlace_merge_test_deps="tinterlace_filter"
>  tinterlace_pad_test_deps="tinterlace_filter"
>  tonemap_filter_deps="const_nan"
>  tonemap_opencl_filter_deps="opencl const_nan"
> +transpose_opencl_filter_deps="opencl"
>  unsharp_opencl_filter_deps="opencl"
>  uspp_filter_deps="gpl avcodec"
>  vaguedenoiser_filter_deps="gpl"
> diff --git a/libavfilter/Makefile b/libavfilter/Makefile
> index 1895fa2..6e26581 100644
> --- a/libavfilter/Makefile
> +++ b/libavfilter/Makefile
> @@ -393,6 +393,7 @@ OBJS-$(CONFIG_TONEMAP_OPENCL_FILTER)         += vf_tonemap_opencl.o colorspace.o
>  OBJS-$(CONFIG_TPAD_FILTER)                   += vf_tpad.o
>  OBJS-$(CONFIG_TRANSPOSE_FILTER)              += vf_transpose.o
>  OBJS-$(CONFIG_TRANSPOSE_NPP_FILTER)          += vf_transpose_npp.o cuda_check.o
> +OBJS-$(CONFIG_TRANSPOSE_OPENCL_FILTER)       += vf_transpose_opencl.o opencl.o opencl/transpose.o
>  OBJS-$(CONFIG_TRIM_FILTER)                   += trim.o
>  OBJS-$(CONFIG_UNPREMULTIPLY_FILTER)          += vf_premultiply.o framesync.o
>  OBJS-$(CONFIG_UNSHARP_FILTER)                += vf_unsharp.o
> diff --git a/libavfilter/allfilters.c b/libavfilter/allfilters.c
> index 837c99e..a600069 100644
> --- a/libavfilter/allfilters.c
> +++ b/libavfilter/allfilters.c
> @@ -372,6 +372,7 @@ extern AVFilter ff_vf_tonemap_opencl;
>  extern AVFilter ff_vf_tpad;
>  extern AVFilter ff_vf_transpose;
>  extern AVFilter ff_vf_transpose_npp;
> +extern AVFilter ff_vf_transpose_opencl;
>  extern AVFilter ff_vf_trim;
>  extern AVFilter ff_vf_unpremultiply;
>  extern AVFilter ff_vf_unsharp;
> diff --git a/libavfilter/opencl/transpose.cl b/libavfilter/opencl/transpose.cl
> new file mode 100644
> index 0000000..e6388ab
> --- /dev/null
> +++ b/libavfilter/opencl/transpose.cl
> @@ -0,0 +1,35 @@
> +/*
> + * This file is part of FFmpeg.
> + *
> + * FFmpeg is free software; you can redistribute it and/or
> + * modify it under the terms of the GNU Lesser General Public
> + * License as published by the Free Software Foundation; either
> + * version 2.1 of the License, or (at your option) any later version.
> + *
> + * FFmpeg is distributed in the hope that it will be useful,
> + * but WITHOUT ANY WARRANTY; without even the implied warranty of
> + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
> + * Lesser General Public License for more details.
> + *
> + * You should have received a copy of the GNU Lesser General Public
> + * License along with FFmpeg; if not, write to the Free Software
> + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
> + */
> +kernel void transpose(__write_only image2d_t dst,
> +                      __read_only image2d_t src,
> +                      int dir) {
> +    const sampler_t sampler = (CLK_NORMALIZED_COORDS_FALSE |
> +                               CLK_ADDRESS_CLAMP_TO_EDGE   |
> +                               CLK_FILTER_NEAREST);
> +
> +    int2 size = get_image_dim(dst);
> +    int x = get_global_id(0);
> +    int y = get_global_id(1);
> +
> +    int xin = (dir & 2) ? (size.y - 1 - y) : y;
> +    int yin = (dir & 1) ? (size.x - 1 - x) : x;
> +    float4 data = read_imagef(src, sampler, (int2)(xin, yin));
> +
> +    if (x < size.x && y < size.y)
> +        write_imagef(dst, (int2)(x, y), data);
> +}

Does the dependency on dir have any effect on speed here?  Any call is only ever going to use one side of each of the dir cases, so it feels like it might be nicer to hard-code that so they aren't included in the compiled code at all.

> diff --git a/libavfilter/opencl_source.h b/libavfilter/opencl_source.h
> index 2f67d89..4118138 100644
> --- a/libavfilter/opencl_source.h
> +++ b/libavfilter/opencl_source.h
> @@ -25,6 +25,7 @@ extern const char *ff_opencl_source_convolution;
>  extern const char *ff_opencl_source_neighbor;
>  extern const char *ff_opencl_source_overlay;
>  extern const char *ff_opencl_source_tonemap;
> +extern const char *ff_opencl_source_transpose;
>  extern const char *ff_opencl_source_unsharp;
>  
>  #endif /* AVFILTER_OPENCL_SOURCE_H */
> diff --git a/libavfilter/transpose.h b/libavfilter/transpose.h
> new file mode 100644
> index 0000000..da8b28e
> --- /dev/null
> +++ b/libavfilter/transpose.h
> @@ -0,0 +1,34 @@
> +/*
> + * This file is part of FFmpeg.
> + *
> + * FFmpeg is free software; you can redistribute it and/or
> + * modify it under the terms of the GNU Lesser General Public
> + * License as published by the Free Software Foundation; either
> + * version 2.1 of the License, or (at your option) any later version.
> + *
> + * FFmpeg is distributed in the hope that it will be useful,
> + * but WITHOUT ANY WARRANTY; without even the implied warranty of
> + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
> + * Lesser General Public License for more details.
> + *
> + * You should have received a copy of the GNU Lesser General Public
> + * License along with FFmpeg; if not, write to the Free Software
> + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
> + */
> +#ifndef AVFILTER_TRANSPOSE_H
> +#define AVFILTER_TRANSPOSE_H
> +
> +typedef enum {
> +    TRANSPOSE_PT_TYPE_NONE,
> +    TRANSPOSE_PT_TYPE_LANDSCAPE,
> +    TRANSPOSE_PT_TYPE_PORTRAIT,
> +} PassthroughType;
> +
> +enum TransposeDir {
> +    TRANSPOSE_CCLOCK_FLIP,
> +    TRANSPOSE_CLOCK,
> +    TRANSPOSE_CCLOCK,
> +    TRANSPOSE_CLOCK_FLIP,
> +};

I know this was in the old code, but it's kindof weird that one of these is an anonymous enum typedef and the other a named enum.  Maybe make them the same, or just drop the names entirely since they are never used?

> +
> +#endif
> diff --git a/libavfilter/vf_transpose.c b/libavfilter/vf_transpose.c
> index 74a4bbc..dd54947 100644
> --- a/libavfilter/vf_transpose.c
> +++ b/libavfilter/vf_transpose.c
> @@ -38,19 +38,7 @@
>  #include "formats.h"
>  #include "internal.h"
>  #include "video.h"
> -
> -typedef enum {
> -    TRANSPOSE_PT_TYPE_NONE,
> -    TRANSPOSE_PT_TYPE_LANDSCAPE,
> -    TRANSPOSE_PT_TYPE_PORTRAIT,
> -} PassthroughType;
> -
> -enum TransposeDir {
> -    TRANSPOSE_CCLOCK_FLIP,
> -    TRANSPOSE_CLOCK,
> -    TRANSPOSE_CCLOCK,
> -    TRANSPOSE_CLOCK_FLIP,
> -};
> +#include "transpose.h"
>  
>  typedef struct TransVtable {
>      void (*transpose_8x8)(uint8_t *src, ptrdiff_t src_linesize,
> diff --git a/libavfilter/vf_transpose_opencl.c b/libavfilter/vf_transpose_opencl.c
> new file mode 100644
> index 0000000..efe7a0a
> --- /dev/null
> +++ b/libavfilter/vf_transpose_opencl.c
> @@ -0,0 +1,294 @@
> +/*
> + * This file is part of FFmpeg.
> + *
> + * FFmpeg is free software; you can redistribute it and/or
> + * modify it under the terms of the GNU Lesser General Public
> + * License as published by the Free Software Foundation; either
> + * version 2.1 of the License, or (at your option) any later version.
> + *
> + * FFmpeg is distributed in the hope that it will be useful,
> + * but WITHOUT ANY WARRANTY; without even the implied warranty of
> + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
> + * Lesser General Public License for more details.
> + *
> + * You should have received a copy of the GNU Lesser General Public
> + * License along with FFmpeg; if not, write to the Free Software
> + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
> + */
> +#include <float.h>
> +
> +#include "libavutil/avassert.h"
> +#include "libavutil/common.h"
> +#include "libavutil/imgutils.h"
> +#include "libavutil/mem.h"
> +#include "libavutil/opt.h"
> +#include "libavutil/pixdesc.h"
> +
> +#include "avfilter.h"
> +#include "internal.h"
> +#include "opencl.h"
> +#include "opencl_source.h"
> +#include "video.h"
> +#include "transpose.h"
> +
> +typedef struct TransposeOpenCLContext {
> +    OpenCLFilterContext ocf;
> +    int                   initialised;
> +    int passthrough;    ///< PassthroughType, landscape passthrough mode enabled
> +    int dir;            ///< TransposeDir
> +    cl_kernel             kernel;
> +    cl_command_queue      command_queue;
> +} TransposeOpenCLContext;
> +
> +static int transpose_opencl_init(AVFilterContext *avctx)
> +{
> +    TransposeOpenCLContext *ctx = avctx->priv;
> +    cl_int cle;
> +    int err;
> +
> +    err = ff_opencl_filter_load_program(avctx, &ff_opencl_source_transpose, 1);
> +    if (err < 0)
> +        goto fail;
> +
> +    ctx->command_queue = clCreateCommandQueue(ctx->ocf.hwctx->context,
> +                                              ctx->ocf.hwctx->device_id,
> +                                              0, &cle);
> +    CL_FAIL_ON_ERROR(AVERROR(EIO), "Failed to create OpenCL "
> +                     "command queue %d.\n", cle);
> +
> +    ctx->kernel = clCreateKernel(ctx->ocf.program, "transpose", &cle);
> +    CL_FAIL_ON_ERROR(AVERROR(EIO), "Failed to create kernel %d.\n", cle);
> +
> +
> +    ctx->initialised = 1;
> +    return 0;
> +
> +fail:
> +    if (ctx->command_queue)
> +        clReleaseCommandQueue(ctx->command_queue);
> +    if (ctx->kernel)
> +        clReleaseKernel(ctx->kernel);
> +    return err;
> +}
> +
> +static int transpose_opencl_config_output(AVFilterLink *outlink)
> +{
> +    AVFilterContext *avctx = outlink->src;
> +    TransposeOpenCLContext *s = avctx->priv;
> +    AVFilterLink *inlink = avctx->inputs[0];
> +    const AVPixFmtDescriptor *desc_in  = av_pix_fmt_desc_get(inlink->format);
> +    int ret;
> +
> +    if (s->dir&4) {
> +        av_log(avctx, AV_LOG_WARNING,
> +               "dir values greater than 3 are deprecated, "
> +               "use the passthrough option instead\n");
> +        s->dir &= 3;
> +        s->passthrough = TRANSPOSE_PT_TYPE_LANDSCAPE;

I'm not sure there is any point in including this legacy option - it was deprecated for the transpose filter in 2012.

> +    }
> +
> +    if ((inlink->w >= inlink->h &&
> +         s->passthrough == TRANSPOSE_PT_TYPE_LANDSCAPE) ||
> +        (inlink->w <= inlink->h &&
> +         s->passthrough == TRANSPOSE_PT_TYPE_PORTRAIT)) {
> +        av_log(avctx, AV_LOG_VERBOSE,
> +               "w:%d h:%d -> w:%d h:%d (passthrough mode)\n",
> +               inlink->w, inlink->h, inlink->w, inlink->h);
> +        return ff_opencl_filter_config_output(outlink);

Since you're doing real passthrough in this case, I think you want to set outlink->hw_frames_ctx to another reference to inlink->hw_frames_ctx?

(Calling ff_opencl_filter_config_output() here makes a new frames context.)

> +    } else {
> +        s->passthrough = TRANSPOSE_PT_TYPE_NONE;
> +    }
> +
> +    if (desc_in->log2_chroma_w != desc_in->log2_chroma_h) {
> +        av_log(avctx, AV_LOG_ERROR, "Input format %s not supported.\n",
> +               desc_in->name);
> +        return AVERROR(EINVAL);
> +    }
> +
> +    s->ocf.output_width = inlink->h;
> +    s->ocf.output_height = inlink->w;
> +    ret = ff_opencl_filter_config_output(outlink);
> +    if (ret < 0)
> +        return ret;
> +
> +    if (inlink->sample_aspect_ratio.num)
> +        outlink->sample_aspect_ratio = av_div_q((AVRational) { 1, 1 },
> +                                                inlink->sample_aspect_ratio);
> +    else
> +        outlink->sample_aspect_ratio = inlink->sample_aspect_ratio;
> +
> +    av_log(avctx, AV_LOG_VERBOSE,
> +           "w:%d h:%d dir:%d -> w:%d h:%d rotation:%s vflip:%d\n",
> +           inlink->w, inlink->h, s->dir, outlink->w, outlink->h,
> +           s->dir == 1 || s->dir == 3 ? "clockwise" : "counterclockwise",
> +           s->dir == 0 || s->dir == 3);
> +    return 0;
> +}
> +
> +static AVFrame *get_video_buffer(AVFilterLink *inlink, int w, int h)
> +{
> +    TransposeOpenCLContext *s = inlink->dst->priv;
> +
> +    return s->passthrough ?
> +        ff_null_get_video_buffer   (inlink, w, h) :
> +        ff_default_get_video_buffer(inlink, w, h);
> +}
> +
> +static int transpose_opencl_filter_frame(AVFilterLink *inlink, AVFrame *input)
> +{
> +    AVFilterContext    *avctx = inlink->dst;
> +    AVFilterLink     *outlink = avctx->outputs[0];
> +    TransposeOpenCLContext *ctx = avctx->priv;
> +    AVFrame *output = NULL;
> +    size_t global_work[2];
> +    size_t local_work[2];
> +    cl_mem src, dst;
> +    cl_int cle;
> +    int err, p;
> +
> +    av_log(ctx, AV_LOG_DEBUG, "Filter input: %s, %ux%u (%"PRId64").\n",
> +           av_get_pix_fmt_name(input->format),
> +           input->width, input->height, input->pts);
> +
> +    if (!input->hw_frames_ctx)
> +        return AVERROR(EINVAL);
> +
> +    if (ctx->passthrough)
> +        return ff_filter_frame(outlink, input);
> +
> +    output = ff_get_video_buffer(outlink, outlink->w, outlink->h);
> +    if (!output) {
> +        err = AVERROR(ENOMEM);
> +        goto fail;
> +    }
> +
> +    err = av_frame_copy_props(output, input);
> +    if (err < 0)
> +        goto fail;
> +
> +    if (input->sample_aspect_ratio.num == 0) {
> +        output->sample_aspect_ratio = input->sample_aspect_ratio;
> +    } else {
> +        output->sample_aspect_ratio.num = input->sample_aspect_ratio.den;
> +        output->sample_aspect_ratio.den = input->sample_aspect_ratio.num;
> +    }
> +
> +    if (!ctx->initialised) {
> +        err = transpose_opencl_init(avctx);
> +        if (err < 0)
> +            goto fail;
> +    }
> +
> +    local_work[0]  = 16;
> +    local_work[1]  = 16;

Does the local work size matter anywhere?  I think you may be able to leave this to the driver to choose its own values.

> +
> +    for (p = 0; p < FF_ARRAY_ELEMS(output->data); p++) {
> +        src = (cl_mem) input->data[p];
> +        dst = (cl_mem) output->data[p];
> +
> +        if (!dst)
> +            break;
> +        CL_SET_KERNEL_ARG(ctx->kernel, 0, cl_mem, &dst);
> +        CL_SET_KERNEL_ARG(ctx->kernel, 1, cl_mem, &src);
> +        CL_SET_KERNEL_ARG(ctx->kernel, 2, cl_int, &ctx->dir);
> +
> +        err = ff_opencl_filter_work_size_from_image(avctx, global_work, output,
> +                                                    p, 16);
> +
> +        cle = clEnqueueNDRangeKernel(ctx->command_queue, ctx->kernel, 2, NULL,
> +                                     global_work, local_work,
> +                                     0, NULL, NULL);
> +        CL_FAIL_ON_ERROR(AVERROR(EIO), "Failed to enqueue kernel: %d.\n", cle);
> +    }
> +    cle = clFinish(ctx->command_queue);
> +    CL_FAIL_ON_ERROR(AVERROR(EIO), "Failed to finish command queue: %d.\n", cle);
> +
> +    av_frame_free(&input);
> +
> +    av_log(ctx, AV_LOG_DEBUG, "Filter output: %s, %ux%u (%"PRId64").\n",
> +           av_get_pix_fmt_name(output->format),
> +           output->width, output->height, output->pts);
> +
> +    return ff_filter_frame(outlink, output);
> +
> +fail:
> +    clFinish(ctx->command_queue);
> +    av_frame_free(&input);
> +    av_frame_free(&output);
> +    return err;
> +}
> +
> +static av_cold void transpose_opencl_uninit(AVFilterContext *avctx)
> +{
> +    TransposeOpenCLContext *ctx = avctx->priv;
> +    cl_int cle;
> +
> +    if (ctx->kernel) {
> +        cle = clReleaseKernel(ctx->kernel);
> +        if (cle != CL_SUCCESS)
> +            av_log(avctx, AV_LOG_ERROR, "Failed to release "
> +                   "kernel: %d.\n", cle);
> +    }
> +
> +    if (ctx->command_queue) {
> +        cle = clReleaseCommandQueue(ctx->command_queue);
> +        if (cle != CL_SUCCESS)
> +            av_log(avctx, AV_LOG_ERROR, "Failed to release "
> +                   "command queue: %d.\n", cle);
> +    }
> +
> +    ff_opencl_filter_uninit(avctx);
> +}
> +
> +#define OFFSET(x) offsetof(TransposeOpenCLContext, x)
> +#define FLAGS (AV_OPT_FLAG_FILTERING_PARAM | AV_OPT_FLAG_VIDEO_PARAM)
> +static const AVOption transpose_opencl_options[] = {
> +    { "dir", "set transpose direction", OFFSET(dir), AV_OPT_TYPE_INT, { .i64 = TRANSPOSE_CCLOCK_FLIP }, 0, 7, FLAGS, "dir" },

(Following on from comment above, limit here could be 0-3.)

> +        { "cclock_flip", "rotate counter-clockwise with vertical flip", 0, AV_OPT_TYPE_CONST, { .i64 = TRANSPOSE_CCLOCK_FLIP }, .flags=FLAGS, .unit = "dir" },
> +        { "clock",       "rotate clockwise",                            0, AV_OPT_TYPE_CONST, { .i64 = TRANSPOSE_CLOCK       }, .flags=FLAGS, .unit = "dir" },
> +        { "cclock",      "rotate counter-clockwise",                    0, AV_OPT_TYPE_CONST, { .i64 = TRANSPOSE_CCLOCK      }, .flags=FLAGS, .unit = "dir" },
> +        { "clock_flip",  "rotate clockwise with vertical flip",         0, AV_OPT_TYPE_CONST, { .i64 = TRANSPOSE_CLOCK_FLIP  }, .flags=FLAGS, .unit = "dir" },
> +
> +    { "passthrough", "do not apply transposition if the input matches the specified geometry",
> +      OFFSET(passthrough), AV_OPT_TYPE_INT, {.i64=TRANSPOSE_PT_TYPE_NONE},  0, INT_MAX, FLAGS, "passthrough" },
> +        { "none",      "always apply transposition",   0, AV_OPT_TYPE_CONST, {.i64=TRANSPOSE_PT_TYPE_NONE},      INT_MIN, INT_MAX, FLAGS, "passthrough" },
> +        { "portrait",  "preserve portrait geometry",   0, AV_OPT_TYPE_CONST, {.i64=TRANSPOSE_PT_TYPE_PORTRAIT},  INT_MIN, INT_MAX, FLAGS, "passthrough" },
> +        { "landscape", "preserve landscape geometry",  0, AV_OPT_TYPE_CONST, {.i64=TRANSPOSE_PT_TYPE_LANDSCAPE}, INT_MIN, INT_MAX, FLAGS, "passthrough" },
> +
> +    { NULL }
> +};
> +
> +AVFILTER_DEFINE_CLASS(transpose_opencl);
> +
> +static const AVFilterPad transpose_opencl_inputs[] = {
> +    {
> +        .name         = "default",
> +        .type         = AVMEDIA_TYPE_VIDEO,
> +        .get_video_buffer = get_video_buffer,
> +        .filter_frame = &transpose_opencl_filter_frame,
> +        .config_props = &ff_opencl_filter_config_input,
> +    },
> +    { NULL }
> +};
> +
> +static const AVFilterPad transpose_opencl_outputs[] = {
> +    {
> +        .name         = "default",
> +        .type         = AVMEDIA_TYPE_VIDEO,
> +        .config_props = &transpose_opencl_config_output,
> +    },
> +    { NULL }
> +};
> +
> +AVFilter ff_vf_transpose_opencl = {
> +    .name           = "transpose_opencl",
> +    .description    = NULL_IF_CONFIG_SMALL("Tanspose input video"),

Typo in the name here                           ^

> +    .priv_size      = sizeof(TransposeOpenCLContext),
> +    .priv_class     = &transpose_opencl_class,
> +    .init           = &ff_opencl_filter_init,
> +    .uninit         = &transpose_opencl_uninit,
> +    .query_formats  = &ff_opencl_filter_query_formats,
> +    .inputs         = transpose_opencl_inputs,
> +    .outputs        = transpose_opencl_outputs,
> +    .flags_internal = FF_FILTER_FLAG_HWFRAME_AWARE,
> +};
> 

Looks good, and works nicely in the small amount of testing I did.  I'll try it on some more platforms later.

Thanks!

- Mark
Ruiling Song Nov. 28, 2018, 2:12 a.m. UTC | #2
Thanks for your valuable comments, reply inline.

> -----Original Message-----

> From: ffmpeg-devel [mailto:ffmpeg-devel-bounces@ffmpeg.org] On Behalf Of

> Mark Thompson

> Sent: Wednesday, November 28, 2018 8:41 AM

> To: ffmpeg-devel@ffmpeg.org

> Subject: Re: [FFmpeg-devel] [PATCH] lavf: add tranpose_opencl filter

> 

> On 26/11/2018 07:05, Ruiling Song wrote:

> > Signed-off-by: Ruiling Song <ruiling.song@intel.com>

> > ---

> >  configure                         |   1 +

> >  libavfilter/Makefile              |   1 +

> >  libavfilter/allfilters.c          |   1 +

> >  libavfilter/opencl/transpose.cl   |  35 +++++

> >  libavfilter/opencl_source.h       |   1 +

> >  libavfilter/transpose.h           |  34 +++++

> >  libavfilter/vf_transpose.c        |  14 +-

> >  libavfilter/vf_transpose_opencl.c | 294

> ++++++++++++++++++++++++++++++++++++++

> >  8 files changed, 368 insertions(+), 13 deletions(-)

> >  create mode 100644 libavfilter/opencl/transpose.cl

> >  create mode 100644 libavfilter/transpose.h

> >  create mode 100644 libavfilter/vf_transpose_opencl.c

> >

> > diff --git a/configure b/configure

> > index b4f944c..dcb3f5f 100755

> > --- a/configure

> > +++ b/configure

> > @@ -3479,6 +3479,7 @@ tinterlace_merge_test_deps="tinterlace_filter"

> >  tinterlace_pad_test_deps="tinterlace_filter"

> >  tonemap_filter_deps="const_nan"

> >  tonemap_opencl_filter_deps="opencl const_nan"

> > +transpose_opencl_filter_deps="opencl"

> >  unsharp_opencl_filter_deps="opencl"

> >  uspp_filter_deps="gpl avcodec"

> >  vaguedenoiser_filter_deps="gpl"

> > diff --git a/libavfilter/Makefile b/libavfilter/Makefile

> > index 1895fa2..6e26581 100644

> > --- a/libavfilter/Makefile

> > +++ b/libavfilter/Makefile

> > @@ -393,6 +393,7 @@ OBJS-$(CONFIG_TONEMAP_OPENCL_FILTER)         +=

> vf_tonemap_opencl.o colorspace.o

> >  OBJS-$(CONFIG_TPAD_FILTER)                   += vf_tpad.o

> >  OBJS-$(CONFIG_TRANSPOSE_FILTER)              += vf_transpose.o

> >  OBJS-$(CONFIG_TRANSPOSE_NPP_FILTER)          += vf_transpose_npp.o

> cuda_check.o

> > +OBJS-$(CONFIG_TRANSPOSE_OPENCL_FILTER)       += vf_transpose_opencl.o

> opencl.o opencl/transpose.o

> >  OBJS-$(CONFIG_TRIM_FILTER)                   += trim.o

> >  OBJS-$(CONFIG_UNPREMULTIPLY_FILTER)          += vf_premultiply.o

> framesync.o

> >  OBJS-$(CONFIG_UNSHARP_FILTER)                += vf_unsharp.o

> > diff --git a/libavfilter/allfilters.c b/libavfilter/allfilters.c

> > index 837c99e..a600069 100644

> > --- a/libavfilter/allfilters.c

> > +++ b/libavfilter/allfilters.c

> > @@ -372,6 +372,7 @@ extern AVFilter ff_vf_tonemap_opencl;

> >  extern AVFilter ff_vf_tpad;

> >  extern AVFilter ff_vf_transpose;

> >  extern AVFilter ff_vf_transpose_npp;

> > +extern AVFilter ff_vf_transpose_opencl;

> >  extern AVFilter ff_vf_trim;

> >  extern AVFilter ff_vf_unpremultiply;

> >  extern AVFilter ff_vf_unsharp;

> > diff --git a/libavfilter/opencl/transpose.cl b/libavfilter/opencl/transpose.cl

> > new file mode 100644

> > index 0000000..e6388ab

> > --- /dev/null

> > +++ b/libavfilter/opencl/transpose.cl

> > @@ -0,0 +1,35 @@

> > +/*

> > + * This file is part of FFmpeg.

> > + *

> > + * FFmpeg is free software; you can redistribute it and/or

> > + * modify it under the terms of the GNU Lesser General Public

> > + * License as published by the Free Software Foundation; either

> > + * version 2.1 of the License, or (at your option) any later version.

> > + *

> > + * FFmpeg is distributed in the hope that it will be useful,

> > + * but WITHOUT ANY WARRANTY; without even the implied warranty of

> > + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU

> > + * Lesser General Public License for more details.

> > + *

> > + * You should have received a copy of the GNU Lesser General Public

> > + * License along with FFmpeg; if not, write to the Free Software

> > + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301

> USA

> > + */

> > +kernel void transpose(__write_only image2d_t dst,

> > +                      __read_only image2d_t src,

> > +                      int dir) {

> > +    const sampler_t sampler = (CLK_NORMALIZED_COORDS_FALSE |

> > +                               CLK_ADDRESS_CLAMP_TO_EDGE   |

> > +                               CLK_FILTER_NEAREST);

> > +

> > +    int2 size = get_image_dim(dst);

> > +    int x = get_global_id(0);

> > +    int y = get_global_id(1);

> > +

> > +    int xin = (dir & 2) ? (size.y - 1 - y) : y;

> > +    int yin = (dir & 1) ? (size.x - 1 - x) : x;

> > +    float4 data = read_imagef(src, sampler, (int2)(xin, yin));

> > +

> > +    if (x < size.x && y < size.y)

> > +        write_imagef(dst, (int2)(x, y), data);

> > +}

> 

> Does the dependency on dir have any effect on speed here?  Any call is only ever

> going to use one side of each of the dir cases, so it feels like it might be nicer to

> hard-code that so they aren't included in the compiled code at all.

For such memory bound OpenCL kernel, some little more arithmetic operation would not affect the overall performance.
I did some more testing, and see no obvious performance difference for different 'dir' parameter. So I just keep it as now.

> 

> > diff --git a/libavfilter/opencl_source.h b/libavfilter/opencl_source.h

> > index 2f67d89..4118138 100644

> > --- a/libavfilter/opencl_source.h

> > +++ b/libavfilter/opencl_source.h

> > @@ -25,6 +25,7 @@ extern const char *ff_opencl_source_convolution;

> >  extern const char *ff_opencl_source_neighbor;

> >  extern const char *ff_opencl_source_overlay;

> >  extern const char *ff_opencl_source_tonemap;

> > +extern const char *ff_opencl_source_transpose;

> >  extern const char *ff_opencl_source_unsharp;

> >

> >  #endif /* AVFILTER_OPENCL_SOURCE_H */

> > diff --git a/libavfilter/transpose.h b/libavfilter/transpose.h

> > new file mode 100644

> > index 0000000..da8b28e

> > --- /dev/null

> > +++ b/libavfilter/transpose.h

> > @@ -0,0 +1,34 @@

> > +/*

> > + * This file is part of FFmpeg.

> > + *

> > + * FFmpeg is free software; you can redistribute it and/or

> > + * modify it under the terms of the GNU Lesser General Public

> > + * License as published by the Free Software Foundation; either

> > + * version 2.1 of the License, or (at your option) any later version.

> > + *

> > + * FFmpeg is distributed in the hope that it will be useful,

> > + * but WITHOUT ANY WARRANTY; without even the implied warranty of

> > + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU

> > + * Lesser General Public License for more details.

> > + *

> > + * You should have received a copy of the GNU Lesser General Public

> > + * License along with FFmpeg; if not, write to the Free Software

> > + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301

> USA

> > + */

> > +#ifndef AVFILTER_TRANSPOSE_H

> > +#define AVFILTER_TRANSPOSE_H

> > +

> > +typedef enum {

> > +    TRANSPOSE_PT_TYPE_NONE,

> > +    TRANSPOSE_PT_TYPE_LANDSCAPE,

> > +    TRANSPOSE_PT_TYPE_PORTRAIT,

> > +} PassthroughType;

> > +

> > +enum TransposeDir {

> > +    TRANSPOSE_CCLOCK_FLIP,

> > +    TRANSPOSE_CLOCK,

> > +    TRANSPOSE_CCLOCK,

> > +    TRANSPOSE_CLOCK_FLIP,

> > +};

> 

> I know this was in the old code, but it's kindof weird that one of these is an

> anonymous enum typedef and the other a named enum.  Maybe make them the

> same, or just drop the names entirely since they are never used?

Will make them the same.

> 

> > +

> > +#endif

> > diff --git a/libavfilter/vf_transpose.c b/libavfilter/vf_transpose.c

> > index 74a4bbc..dd54947 100644

> > --- a/libavfilter/vf_transpose.c

> > +++ b/libavfilter/vf_transpose.c

> > @@ -38,19 +38,7 @@

> >  #include "formats.h"

> >  #include "internal.h"

> >  #include "video.h"

> > -

> > -typedef enum {

> > -    TRANSPOSE_PT_TYPE_NONE,

> > -    TRANSPOSE_PT_TYPE_LANDSCAPE,

> > -    TRANSPOSE_PT_TYPE_PORTRAIT,

> > -} PassthroughType;

> > -

> > -enum TransposeDir {

> > -    TRANSPOSE_CCLOCK_FLIP,

> > -    TRANSPOSE_CLOCK,

> > -    TRANSPOSE_CCLOCK,

> > -    TRANSPOSE_CLOCK_FLIP,

> > -};

> > +#include "transpose.h"

> >

> >  typedef struct TransVtable {

> >      void (*transpose_8x8)(uint8_t *src, ptrdiff_t src_linesize,

> > diff --git a/libavfilter/vf_transpose_opencl.c

> b/libavfilter/vf_transpose_opencl.c

> > new file mode 100644

> > index 0000000..efe7a0a

> > --- /dev/null

> > +++ b/libavfilter/vf_transpose_opencl.c

> > @@ -0,0 +1,294 @@

> > +/*

> > + * This file is part of FFmpeg.

> > + *

> > + * FFmpeg is free software; you can redistribute it and/or

> > + * modify it under the terms of the GNU Lesser General Public

> > + * License as published by the Free Software Foundation; either

> > + * version 2.1 of the License, or (at your option) any later version.

> > + *

> > + * FFmpeg is distributed in the hope that it will be useful,

> > + * but WITHOUT ANY WARRANTY; without even the implied warranty of

> > + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU

> > + * Lesser General Public License for more details.

> > + *

> > + * You should have received a copy of the GNU Lesser General Public

> > + * License along with FFmpeg; if not, write to the Free Software

> > + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301

> USA

> > + */

> > +#include <float.h>

> > +

> > +#include "libavutil/avassert.h"

> > +#include "libavutil/common.h"

> > +#include "libavutil/imgutils.h"

> > +#include "libavutil/mem.h"

> > +#include "libavutil/opt.h"

> > +#include "libavutil/pixdesc.h"

> > +

> > +#include "avfilter.h"

> > +#include "internal.h"

> > +#include "opencl.h"

> > +#include "opencl_source.h"

> > +#include "video.h"

> > +#include "transpose.h"

> > +

> > +typedef struct TransposeOpenCLContext {

> > +    OpenCLFilterContext ocf;

> > +    int                   initialised;

> > +    int passthrough;    ///< PassthroughType, landscape passthrough mode

> enabled

> > +    int dir;            ///< TransposeDir

> > +    cl_kernel             kernel;

> > +    cl_command_queue      command_queue;

> > +} TransposeOpenCLContext;

> > +

> > +static int transpose_opencl_init(AVFilterContext *avctx)

> > +{

> > +    TransposeOpenCLContext *ctx = avctx->priv;

> > +    cl_int cle;

> > +    int err;

> > +

> > +    err = ff_opencl_filter_load_program(avctx, &ff_opencl_source_transpose,

> 1);

> > +    if (err < 0)

> > +        goto fail;

> > +

> > +    ctx->command_queue = clCreateCommandQueue(ctx->ocf.hwctx-

> >context,

> > +                                              ctx->ocf.hwctx->device_id,

> > +                                              0, &cle);

> > +    CL_FAIL_ON_ERROR(AVERROR(EIO), "Failed to create OpenCL "

> > +                     "command queue %d.\n", cle);

> > +

> > +    ctx->kernel = clCreateKernel(ctx->ocf.program, "transpose", &cle);

> > +    CL_FAIL_ON_ERROR(AVERROR(EIO), "Failed to create kernel %d.\n", cle);

> > +

> > +

> > +    ctx->initialised = 1;

> > +    return 0;

> > +

> > +fail:

> > +    if (ctx->command_queue)

> > +        clReleaseCommandQueue(ctx->command_queue);

> > +    if (ctx->kernel)

> > +        clReleaseKernel(ctx->kernel);

> > +    return err;

> > +}

> > +

> > +static int transpose_opencl_config_output(AVFilterLink *outlink)

> > +{

> > +    AVFilterContext *avctx = outlink->src;

> > +    TransposeOpenCLContext *s = avctx->priv;

> > +    AVFilterLink *inlink = avctx->inputs[0];

> > +    const AVPixFmtDescriptor *desc_in  = av_pix_fmt_desc_get(inlink->format);

> > +    int ret;

> > +

> > +    if (s->dir&4) {

> > +        av_log(avctx, AV_LOG_WARNING,

> > +               "dir values greater than 3 are deprecated, "

> > +               "use the passthrough option instead\n");

> > +        s->dir &= 3;

> > +        s->passthrough = TRANSPOSE_PT_TYPE_LANDSCAPE;

> 

> I'm not sure there is any point in including this legacy option - it was deprecated

> for the transpose filter in 2012.

Sounds good to not include them here. Will remove.

> 

> > +    }

> > +

> > +    if ((inlink->w >= inlink->h &&

> > +         s->passthrough == TRANSPOSE_PT_TYPE_LANDSCAPE) ||

> > +        (inlink->w <= inlink->h &&

> > +         s->passthrough == TRANSPOSE_PT_TYPE_PORTRAIT)) {

> > +        av_log(avctx, AV_LOG_VERBOSE,

> > +               "w:%d h:%d -> w:%d h:%d (passthrough mode)\n",

> > +               inlink->w, inlink->h, inlink->w, inlink->h);

> > +        return ff_opencl_filter_config_output(outlink);

> 

> Since you're doing real passthrough in this case, I think you want to set outlink-

> >hw_frames_ctx to another reference to inlink->hw_frames_ctx?

Sounds reasonable. Will fix it.
> 

> (Calling ff_opencl_filter_config_output() here makes a new frames context.)

> 

> > +    } else {

> > +        s->passthrough = TRANSPOSE_PT_TYPE_NONE;

> > +    }

> > +

> > +    if (desc_in->log2_chroma_w != desc_in->log2_chroma_h) {

> > +        av_log(avctx, AV_LOG_ERROR, "Input format %s not supported.\n",

> > +               desc_in->name);

> > +        return AVERROR(EINVAL);

> > +    }

> > +

> > +    s->ocf.output_width = inlink->h;

> > +    s->ocf.output_height = inlink->w;

> > +    ret = ff_opencl_filter_config_output(outlink);

> > +    if (ret < 0)

> > +        return ret;

> > +

> > +    if (inlink->sample_aspect_ratio.num)

> > +        outlink->sample_aspect_ratio = av_div_q((AVRational) { 1, 1 },

> > +                                                inlink->sample_aspect_ratio);

> > +    else

> > +        outlink->sample_aspect_ratio = inlink->sample_aspect_ratio;

> > +

> > +    av_log(avctx, AV_LOG_VERBOSE,

> > +           "w:%d h:%d dir:%d -> w:%d h:%d rotation:%s vflip:%d\n",

> > +           inlink->w, inlink->h, s->dir, outlink->w, outlink->h,

> > +           s->dir == 1 || s->dir == 3 ? "clockwise" : "counterclockwise",

> > +           s->dir == 0 || s->dir == 3);

> > +    return 0;

> > +}

> > +

> > +static AVFrame *get_video_buffer(AVFilterLink *inlink, int w, int h)

> > +{

> > +    TransposeOpenCLContext *s = inlink->dst->priv;

> > +

> > +    return s->passthrough ?

> > +        ff_null_get_video_buffer   (inlink, w, h) :

> > +        ff_default_get_video_buffer(inlink, w, h);

> > +}

> > +

> > +static int transpose_opencl_filter_frame(AVFilterLink *inlink, AVFrame *input)

> > +{

> > +    AVFilterContext    *avctx = inlink->dst;

> > +    AVFilterLink     *outlink = avctx->outputs[0];

> > +    TransposeOpenCLContext *ctx = avctx->priv;

> > +    AVFrame *output = NULL;

> > +    size_t global_work[2];

> > +    size_t local_work[2];

> > +    cl_mem src, dst;

> > +    cl_int cle;

> > +    int err, p;

> > +

> > +    av_log(ctx, AV_LOG_DEBUG, "Filter input: %s, %ux%u (%"PRId64").\n",

> > +           av_get_pix_fmt_name(input->format),

> > +           input->width, input->height, input->pts);

> > +

> > +    if (!input->hw_frames_ctx)

> > +        return AVERROR(EINVAL);

> > +

> > +    if (ctx->passthrough)

> > +        return ff_filter_frame(outlink, input);

> > +

> > +    output = ff_get_video_buffer(outlink, outlink->w, outlink->h);

> > +    if (!output) {

> > +        err = AVERROR(ENOMEM);

> > +        goto fail;

> > +    }

> > +

> > +    err = av_frame_copy_props(output, input);

> > +    if (err < 0)

> > +        goto fail;

> > +

> > +    if (input->sample_aspect_ratio.num == 0) {

> > +        output->sample_aspect_ratio = input->sample_aspect_ratio;

> > +    } else {

> > +        output->sample_aspect_ratio.num = input->sample_aspect_ratio.den;

> > +        output->sample_aspect_ratio.den = input->sample_aspect_ratio.num;

> > +    }

> > +

> > +    if (!ctx->initialised) {

> > +        err = transpose_opencl_init(avctx);

> > +        if (err < 0)

> > +            goto fail;

> > +    }

> > +

> > +    local_work[0]  = 16;

> > +    local_work[1]  = 16;

> 

> Does the local work size matter anywhere?  I think you may be able to leave this

> to the driver to choose its own values.

Sounds reasonable. I chose this value when I was experimenting some optimization but later got no benefit.
Will remove it.

> 

> > +

> > +    for (p = 0; p < FF_ARRAY_ELEMS(output->data); p++) {

> > +        src = (cl_mem) input->data[p];

> > +        dst = (cl_mem) output->data[p];

> > +

> > +        if (!dst)

> > +            break;

> > +        CL_SET_KERNEL_ARG(ctx->kernel, 0, cl_mem, &dst);

> > +        CL_SET_KERNEL_ARG(ctx->kernel, 1, cl_mem, &src);

> > +        CL_SET_KERNEL_ARG(ctx->kernel, 2, cl_int, &ctx->dir);

> > +

> > +        err = ff_opencl_filter_work_size_from_image(avctx, global_work,

> output,

> > +                                                    p, 16);

> > +

> > +        cle = clEnqueueNDRangeKernel(ctx->command_queue, ctx->kernel, 2,

> NULL,

> > +                                     global_work, local_work,

> > +                                     0, NULL, NULL);

> > +        CL_FAIL_ON_ERROR(AVERROR(EIO), "Failed to enqueue kernel: %d.\n",

> cle);

> > +    }

> > +    cle = clFinish(ctx->command_queue);

> > +    CL_FAIL_ON_ERROR(AVERROR(EIO), "Failed to finish command

> queue: %d.\n", cle);

> > +

> > +    av_frame_free(&input);

> > +

> > +    av_log(ctx, AV_LOG_DEBUG, "Filter output: %s, %ux%u (%"PRId64").\n",

> > +           av_get_pix_fmt_name(output->format),

> > +           output->width, output->height, output->pts);

> > +

> > +    return ff_filter_frame(outlink, output);

> > +

> > +fail:

> > +    clFinish(ctx->command_queue);

> > +    av_frame_free(&input);

> > +    av_frame_free(&output);

> > +    return err;

> > +}

> > +

> > +static av_cold void transpose_opencl_uninit(AVFilterContext *avctx)

> > +{

> > +    TransposeOpenCLContext *ctx = avctx->priv;

> > +    cl_int cle;

> > +

> > +    if (ctx->kernel) {

> > +        cle = clReleaseKernel(ctx->kernel);

> > +        if (cle != CL_SUCCESS)

> > +            av_log(avctx, AV_LOG_ERROR, "Failed to release "

> > +                   "kernel: %d.\n", cle);

> > +    }

> > +

> > +    if (ctx->command_queue) {

> > +        cle = clReleaseCommandQueue(ctx->command_queue);

> > +        if (cle != CL_SUCCESS)

> > +            av_log(avctx, AV_LOG_ERROR, "Failed to release "

> > +                   "command queue: %d.\n", cle);

> > +    }

> > +

> > +    ff_opencl_filter_uninit(avctx);

> > +}

> > +

> > +#define OFFSET(x) offsetof(TransposeOpenCLContext, x)

> > +#define FLAGS (AV_OPT_FLAG_FILTERING_PARAM |

> AV_OPT_FLAG_VIDEO_PARAM)

> > +static const AVOption transpose_opencl_options[] = {

> > +    { "dir", "set transpose direction", OFFSET(dir), AV_OPT_TYPE_INT, { .i64 =

> TRANSPOSE_CCLOCK_FLIP }, 0, 7, FLAGS, "dir" },

> 

> (Following on from comment above, limit here could be 0-3.)

Will fix it.

> 

> > +        { "cclock_flip", "rotate counter-clockwise with vertical flip", 0,

> AV_OPT_TYPE_CONST, { .i64 = TRANSPOSE_CCLOCK_FLIP }, .flags=FLAGS, .unit

> = "dir" },

> > +        { "clock",       "rotate clockwise",                            0, AV_OPT_TYPE_CONST,

> { .i64 = TRANSPOSE_CLOCK       }, .flags=FLAGS, .unit = "dir" },

> > +        { "cclock",      "rotate counter-clockwise",                    0,

> AV_OPT_TYPE_CONST, { .i64 = TRANSPOSE_CCLOCK      }, .flags=FLAGS, .unit =

> "dir" },

> > +        { "clock_flip",  "rotate clockwise with vertical flip",         0,

> AV_OPT_TYPE_CONST, { .i64 = TRANSPOSE_CLOCK_FLIP  }, .flags=FLAGS, .unit =

> "dir" },

> > +

> > +    { "passthrough", "do not apply transposition if the input matches the

> specified geometry",

> > +      OFFSET(passthrough), AV_OPT_TYPE_INT,

> {.i64=TRANSPOSE_PT_TYPE_NONE},  0, INT_MAX, FLAGS, "passthrough" },

> > +        { "none",      "always apply transposition",   0, AV_OPT_TYPE_CONST,

> {.i64=TRANSPOSE_PT_TYPE_NONE},      INT_MIN, INT_MAX, FLAGS,

> "passthrough" },

> > +        { "portrait",  "preserve portrait geometry",   0, AV_OPT_TYPE_CONST,

> {.i64=TRANSPOSE_PT_TYPE_PORTRAIT},  INT_MIN, INT_MAX, FLAGS,

> "passthrough" },

> > +        { "landscape", "preserve landscape geometry",  0, AV_OPT_TYPE_CONST,

> {.i64=TRANSPOSE_PT_TYPE_LANDSCAPE}, INT_MIN, INT_MAX, FLAGS,

> "passthrough" },

> > +

> > +    { NULL }

> > +};

> > +

> > +AVFILTER_DEFINE_CLASS(transpose_opencl);

> > +

> > +static const AVFilterPad transpose_opencl_inputs[] = {

> > +    {

> > +        .name         = "default",

> > +        .type         = AVMEDIA_TYPE_VIDEO,

> > +        .get_video_buffer = get_video_buffer,

> > +        .filter_frame = &transpose_opencl_filter_frame,

> > +        .config_props = &ff_opencl_filter_config_input,

> > +    },

> > +    { NULL }

> > +};

> > +

> > +static const AVFilterPad transpose_opencl_outputs[] = {

> > +    {

> > +        .name         = "default",

> > +        .type         = AVMEDIA_TYPE_VIDEO,

> > +        .config_props = &transpose_opencl_config_output,

> > +    },

> > +    { NULL }

> > +};

> > +

> > +AVFilter ff_vf_transpose_opencl = {

> > +    .name           = "transpose_opencl",

> > +    .description    = NULL_IF_CONFIG_SMALL("Tanspose input video"),

> 

> Typo in the name here                           ^

Will fix it.
> 

> > +    .priv_size      = sizeof(TransposeOpenCLContext),

> > +    .priv_class     = &transpose_opencl_class,

> > +    .init           = &ff_opencl_filter_init,

> > +    .uninit         = &transpose_opencl_uninit,

> > +    .query_formats  = &ff_opencl_filter_query_formats,

> > +    .inputs         = transpose_opencl_inputs,

> > +    .outputs        = transpose_opencl_outputs,

> > +    .flags_internal = FF_FILTER_FLAG_HWFRAME_AWARE,

> > +};

> >

> 

> Looks good, and works nicely in the small amount of testing I did.  I'll try it on

> some more platforms later.

Really appreciate it!

Ruiling
> 

> Thanks!

> 

> - Mark

> _______________________________________________

> ffmpeg-devel mailing list

> ffmpeg-devel@ffmpeg.org

> http://ffmpeg.org/mailman/listinfo/ffmpeg-devel
diff mbox

Patch

diff --git a/configure b/configure
index b4f944c..dcb3f5f 100755
--- a/configure
+++ b/configure
@@ -3479,6 +3479,7 @@  tinterlace_merge_test_deps="tinterlace_filter"
 tinterlace_pad_test_deps="tinterlace_filter"
 tonemap_filter_deps="const_nan"
 tonemap_opencl_filter_deps="opencl const_nan"
+transpose_opencl_filter_deps="opencl"
 unsharp_opencl_filter_deps="opencl"
 uspp_filter_deps="gpl avcodec"
 vaguedenoiser_filter_deps="gpl"
diff --git a/libavfilter/Makefile b/libavfilter/Makefile
index 1895fa2..6e26581 100644
--- a/libavfilter/Makefile
+++ b/libavfilter/Makefile
@@ -393,6 +393,7 @@  OBJS-$(CONFIG_TONEMAP_OPENCL_FILTER)         += vf_tonemap_opencl.o colorspace.o
 OBJS-$(CONFIG_TPAD_FILTER)                   += vf_tpad.o
 OBJS-$(CONFIG_TRANSPOSE_FILTER)              += vf_transpose.o
 OBJS-$(CONFIG_TRANSPOSE_NPP_FILTER)          += vf_transpose_npp.o cuda_check.o
+OBJS-$(CONFIG_TRANSPOSE_OPENCL_FILTER)       += vf_transpose_opencl.o opencl.o opencl/transpose.o
 OBJS-$(CONFIG_TRIM_FILTER)                   += trim.o
 OBJS-$(CONFIG_UNPREMULTIPLY_FILTER)          += vf_premultiply.o framesync.o
 OBJS-$(CONFIG_UNSHARP_FILTER)                += vf_unsharp.o
diff --git a/libavfilter/allfilters.c b/libavfilter/allfilters.c
index 837c99e..a600069 100644
--- a/libavfilter/allfilters.c
+++ b/libavfilter/allfilters.c
@@ -372,6 +372,7 @@  extern AVFilter ff_vf_tonemap_opencl;
 extern AVFilter ff_vf_tpad;
 extern AVFilter ff_vf_transpose;
 extern AVFilter ff_vf_transpose_npp;
+extern AVFilter ff_vf_transpose_opencl;
 extern AVFilter ff_vf_trim;
 extern AVFilter ff_vf_unpremultiply;
 extern AVFilter ff_vf_unsharp;
diff --git a/libavfilter/opencl/transpose.cl b/libavfilter/opencl/transpose.cl
new file mode 100644
index 0000000..e6388ab
--- /dev/null
+++ b/libavfilter/opencl/transpose.cl
@@ -0,0 +1,35 @@ 
+/*
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+kernel void transpose(__write_only image2d_t dst,
+                      __read_only image2d_t src,
+                      int dir) {
+    const sampler_t sampler = (CLK_NORMALIZED_COORDS_FALSE |
+                               CLK_ADDRESS_CLAMP_TO_EDGE   |
+                               CLK_FILTER_NEAREST);
+
+    int2 size = get_image_dim(dst);
+    int x = get_global_id(0);
+    int y = get_global_id(1);
+
+    int xin = (dir & 2) ? (size.y - 1 - y) : y;
+    int yin = (dir & 1) ? (size.x - 1 - x) : x;
+    float4 data = read_imagef(src, sampler, (int2)(xin, yin));
+
+    if (x < size.x && y < size.y)
+        write_imagef(dst, (int2)(x, y), data);
+}
diff --git a/libavfilter/opencl_source.h b/libavfilter/opencl_source.h
index 2f67d89..4118138 100644
--- a/libavfilter/opencl_source.h
+++ b/libavfilter/opencl_source.h
@@ -25,6 +25,7 @@  extern const char *ff_opencl_source_convolution;
 extern const char *ff_opencl_source_neighbor;
 extern const char *ff_opencl_source_overlay;
 extern const char *ff_opencl_source_tonemap;
+extern const char *ff_opencl_source_transpose;
 extern const char *ff_opencl_source_unsharp;
 
 #endif /* AVFILTER_OPENCL_SOURCE_H */
diff --git a/libavfilter/transpose.h b/libavfilter/transpose.h
new file mode 100644
index 0000000..da8b28e
--- /dev/null
+++ b/libavfilter/transpose.h
@@ -0,0 +1,34 @@ 
+/*
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+#ifndef AVFILTER_TRANSPOSE_H
+#define AVFILTER_TRANSPOSE_H
+
+typedef enum {
+    TRANSPOSE_PT_TYPE_NONE,
+    TRANSPOSE_PT_TYPE_LANDSCAPE,
+    TRANSPOSE_PT_TYPE_PORTRAIT,
+} PassthroughType;
+
+enum TransposeDir {
+    TRANSPOSE_CCLOCK_FLIP,
+    TRANSPOSE_CLOCK,
+    TRANSPOSE_CCLOCK,
+    TRANSPOSE_CLOCK_FLIP,
+};
+
+#endif
diff --git a/libavfilter/vf_transpose.c b/libavfilter/vf_transpose.c
index 74a4bbc..dd54947 100644
--- a/libavfilter/vf_transpose.c
+++ b/libavfilter/vf_transpose.c
@@ -38,19 +38,7 @@ 
 #include "formats.h"
 #include "internal.h"
 #include "video.h"
-
-typedef enum {
-    TRANSPOSE_PT_TYPE_NONE,
-    TRANSPOSE_PT_TYPE_LANDSCAPE,
-    TRANSPOSE_PT_TYPE_PORTRAIT,
-} PassthroughType;
-
-enum TransposeDir {
-    TRANSPOSE_CCLOCK_FLIP,
-    TRANSPOSE_CLOCK,
-    TRANSPOSE_CCLOCK,
-    TRANSPOSE_CLOCK_FLIP,
-};
+#include "transpose.h"
 
 typedef struct TransVtable {
     void (*transpose_8x8)(uint8_t *src, ptrdiff_t src_linesize,
diff --git a/libavfilter/vf_transpose_opencl.c b/libavfilter/vf_transpose_opencl.c
new file mode 100644
index 0000000..efe7a0a
--- /dev/null
+++ b/libavfilter/vf_transpose_opencl.c
@@ -0,0 +1,294 @@ 
+/*
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+#include <float.h>
+
+#include "libavutil/avassert.h"
+#include "libavutil/common.h"
+#include "libavutil/imgutils.h"
+#include "libavutil/mem.h"
+#include "libavutil/opt.h"
+#include "libavutil/pixdesc.h"
+
+#include "avfilter.h"
+#include "internal.h"
+#include "opencl.h"
+#include "opencl_source.h"
+#include "video.h"
+#include "transpose.h"
+
+typedef struct TransposeOpenCLContext {
+    OpenCLFilterContext ocf;
+    int                   initialised;
+    int passthrough;    ///< PassthroughType, landscape passthrough mode enabled
+    int dir;            ///< TransposeDir
+    cl_kernel             kernel;
+    cl_command_queue      command_queue;
+} TransposeOpenCLContext;
+
+static int transpose_opencl_init(AVFilterContext *avctx)
+{
+    TransposeOpenCLContext *ctx = avctx->priv;
+    cl_int cle;
+    int err;
+
+    err = ff_opencl_filter_load_program(avctx, &ff_opencl_source_transpose, 1);
+    if (err < 0)
+        goto fail;
+
+    ctx->command_queue = clCreateCommandQueue(ctx->ocf.hwctx->context,
+                                              ctx->ocf.hwctx->device_id,
+                                              0, &cle);
+    CL_FAIL_ON_ERROR(AVERROR(EIO), "Failed to create OpenCL "
+                     "command queue %d.\n", cle);
+
+    ctx->kernel = clCreateKernel(ctx->ocf.program, "transpose", &cle);
+    CL_FAIL_ON_ERROR(AVERROR(EIO), "Failed to create kernel %d.\n", cle);
+
+
+    ctx->initialised = 1;
+    return 0;
+
+fail:
+    if (ctx->command_queue)
+        clReleaseCommandQueue(ctx->command_queue);
+    if (ctx->kernel)
+        clReleaseKernel(ctx->kernel);
+    return err;
+}
+
+static int transpose_opencl_config_output(AVFilterLink *outlink)
+{
+    AVFilterContext *avctx = outlink->src;
+    TransposeOpenCLContext *s = avctx->priv;
+    AVFilterLink *inlink = avctx->inputs[0];
+    const AVPixFmtDescriptor *desc_in  = av_pix_fmt_desc_get(inlink->format);
+    int ret;
+
+    if (s->dir&4) {
+        av_log(avctx, AV_LOG_WARNING,
+               "dir values greater than 3 are deprecated, "
+               "use the passthrough option instead\n");
+        s->dir &= 3;
+        s->passthrough = TRANSPOSE_PT_TYPE_LANDSCAPE;
+    }
+
+    if ((inlink->w >= inlink->h &&
+         s->passthrough == TRANSPOSE_PT_TYPE_LANDSCAPE) ||
+        (inlink->w <= inlink->h &&
+         s->passthrough == TRANSPOSE_PT_TYPE_PORTRAIT)) {
+        av_log(avctx, AV_LOG_VERBOSE,
+               "w:%d h:%d -> w:%d h:%d (passthrough mode)\n",
+               inlink->w, inlink->h, inlink->w, inlink->h);
+        return ff_opencl_filter_config_output(outlink);
+    } else {
+        s->passthrough = TRANSPOSE_PT_TYPE_NONE;
+    }
+
+    if (desc_in->log2_chroma_w != desc_in->log2_chroma_h) {
+        av_log(avctx, AV_LOG_ERROR, "Input format %s not supported.\n",
+               desc_in->name);
+        return AVERROR(EINVAL);
+    }
+
+    s->ocf.output_width = inlink->h;
+    s->ocf.output_height = inlink->w;
+    ret = ff_opencl_filter_config_output(outlink);
+    if (ret < 0)
+        return ret;
+
+    if (inlink->sample_aspect_ratio.num)
+        outlink->sample_aspect_ratio = av_div_q((AVRational) { 1, 1 },
+                                                inlink->sample_aspect_ratio);
+    else
+        outlink->sample_aspect_ratio = inlink->sample_aspect_ratio;
+
+    av_log(avctx, AV_LOG_VERBOSE,
+           "w:%d h:%d dir:%d -> w:%d h:%d rotation:%s vflip:%d\n",
+           inlink->w, inlink->h, s->dir, outlink->w, outlink->h,
+           s->dir == 1 || s->dir == 3 ? "clockwise" : "counterclockwise",
+           s->dir == 0 || s->dir == 3);
+    return 0;
+}
+
+static AVFrame *get_video_buffer(AVFilterLink *inlink, int w, int h)
+{
+    TransposeOpenCLContext *s = inlink->dst->priv;
+
+    return s->passthrough ?
+        ff_null_get_video_buffer   (inlink, w, h) :
+        ff_default_get_video_buffer(inlink, w, h);
+}
+
+static int transpose_opencl_filter_frame(AVFilterLink *inlink, AVFrame *input)
+{
+    AVFilterContext    *avctx = inlink->dst;
+    AVFilterLink     *outlink = avctx->outputs[0];
+    TransposeOpenCLContext *ctx = avctx->priv;
+    AVFrame *output = NULL;
+    size_t global_work[2];
+    size_t local_work[2];
+    cl_mem src, dst;
+    cl_int cle;
+    int err, p;
+
+    av_log(ctx, AV_LOG_DEBUG, "Filter input: %s, %ux%u (%"PRId64").\n",
+           av_get_pix_fmt_name(input->format),
+           input->width, input->height, input->pts);
+
+    if (!input->hw_frames_ctx)
+        return AVERROR(EINVAL);
+
+    if (ctx->passthrough)
+        return ff_filter_frame(outlink, input);
+
+    output = ff_get_video_buffer(outlink, outlink->w, outlink->h);
+    if (!output) {
+        err = AVERROR(ENOMEM);
+        goto fail;
+    }
+
+    err = av_frame_copy_props(output, input);
+    if (err < 0)
+        goto fail;
+
+    if (input->sample_aspect_ratio.num == 0) {
+        output->sample_aspect_ratio = input->sample_aspect_ratio;
+    } else {
+        output->sample_aspect_ratio.num = input->sample_aspect_ratio.den;
+        output->sample_aspect_ratio.den = input->sample_aspect_ratio.num;
+    }
+
+    if (!ctx->initialised) {
+        err = transpose_opencl_init(avctx);
+        if (err < 0)
+            goto fail;
+    }
+
+    local_work[0]  = 16;
+    local_work[1]  = 16;
+
+    for (p = 0; p < FF_ARRAY_ELEMS(output->data); p++) {
+        src = (cl_mem) input->data[p];
+        dst = (cl_mem) output->data[p];
+
+        if (!dst)
+            break;
+        CL_SET_KERNEL_ARG(ctx->kernel, 0, cl_mem, &dst);
+        CL_SET_KERNEL_ARG(ctx->kernel, 1, cl_mem, &src);
+        CL_SET_KERNEL_ARG(ctx->kernel, 2, cl_int, &ctx->dir);
+
+        err = ff_opencl_filter_work_size_from_image(avctx, global_work, output,
+                                                    p, 16);
+
+        cle = clEnqueueNDRangeKernel(ctx->command_queue, ctx->kernel, 2, NULL,
+                                     global_work, local_work,
+                                     0, NULL, NULL);
+        CL_FAIL_ON_ERROR(AVERROR(EIO), "Failed to enqueue kernel: %d.\n", cle);
+    }
+    cle = clFinish(ctx->command_queue);
+    CL_FAIL_ON_ERROR(AVERROR(EIO), "Failed to finish command queue: %d.\n", cle);
+
+    av_frame_free(&input);
+
+    av_log(ctx, AV_LOG_DEBUG, "Filter output: %s, %ux%u (%"PRId64").\n",
+           av_get_pix_fmt_name(output->format),
+           output->width, output->height, output->pts);
+
+    return ff_filter_frame(outlink, output);
+
+fail:
+    clFinish(ctx->command_queue);
+    av_frame_free(&input);
+    av_frame_free(&output);
+    return err;
+}
+
+static av_cold void transpose_opencl_uninit(AVFilterContext *avctx)
+{
+    TransposeOpenCLContext *ctx = avctx->priv;
+    cl_int cle;
+
+    if (ctx->kernel) {
+        cle = clReleaseKernel(ctx->kernel);
+        if (cle != CL_SUCCESS)
+            av_log(avctx, AV_LOG_ERROR, "Failed to release "
+                   "kernel: %d.\n", cle);
+    }
+
+    if (ctx->command_queue) {
+        cle = clReleaseCommandQueue(ctx->command_queue);
+        if (cle != CL_SUCCESS)
+            av_log(avctx, AV_LOG_ERROR, "Failed to release "
+                   "command queue: %d.\n", cle);
+    }
+
+    ff_opencl_filter_uninit(avctx);
+}
+
+#define OFFSET(x) offsetof(TransposeOpenCLContext, x)
+#define FLAGS (AV_OPT_FLAG_FILTERING_PARAM | AV_OPT_FLAG_VIDEO_PARAM)
+static const AVOption transpose_opencl_options[] = {
+    { "dir", "set transpose direction", OFFSET(dir), AV_OPT_TYPE_INT, { .i64 = TRANSPOSE_CCLOCK_FLIP }, 0, 7, FLAGS, "dir" },
+        { "cclock_flip", "rotate counter-clockwise with vertical flip", 0, AV_OPT_TYPE_CONST, { .i64 = TRANSPOSE_CCLOCK_FLIP }, .flags=FLAGS, .unit = "dir" },
+        { "clock",       "rotate clockwise",                            0, AV_OPT_TYPE_CONST, { .i64 = TRANSPOSE_CLOCK       }, .flags=FLAGS, .unit = "dir" },
+        { "cclock",      "rotate counter-clockwise",                    0, AV_OPT_TYPE_CONST, { .i64 = TRANSPOSE_CCLOCK      }, .flags=FLAGS, .unit = "dir" },
+        { "clock_flip",  "rotate clockwise with vertical flip",         0, AV_OPT_TYPE_CONST, { .i64 = TRANSPOSE_CLOCK_FLIP  }, .flags=FLAGS, .unit = "dir" },
+
+    { "passthrough", "do not apply transposition if the input matches the specified geometry",
+      OFFSET(passthrough), AV_OPT_TYPE_INT, {.i64=TRANSPOSE_PT_TYPE_NONE},  0, INT_MAX, FLAGS, "passthrough" },
+        { "none",      "always apply transposition",   0, AV_OPT_TYPE_CONST, {.i64=TRANSPOSE_PT_TYPE_NONE},      INT_MIN, INT_MAX, FLAGS, "passthrough" },
+        { "portrait",  "preserve portrait geometry",   0, AV_OPT_TYPE_CONST, {.i64=TRANSPOSE_PT_TYPE_PORTRAIT},  INT_MIN, INT_MAX, FLAGS, "passthrough" },
+        { "landscape", "preserve landscape geometry",  0, AV_OPT_TYPE_CONST, {.i64=TRANSPOSE_PT_TYPE_LANDSCAPE}, INT_MIN, INT_MAX, FLAGS, "passthrough" },
+
+    { NULL }
+};
+
+AVFILTER_DEFINE_CLASS(transpose_opencl);
+
+static const AVFilterPad transpose_opencl_inputs[] = {
+    {
+        .name         = "default",
+        .type         = AVMEDIA_TYPE_VIDEO,
+        .get_video_buffer = get_video_buffer,
+        .filter_frame = &transpose_opencl_filter_frame,
+        .config_props = &ff_opencl_filter_config_input,
+    },
+    { NULL }
+};
+
+static const AVFilterPad transpose_opencl_outputs[] = {
+    {
+        .name         = "default",
+        .type         = AVMEDIA_TYPE_VIDEO,
+        .config_props = &transpose_opencl_config_output,
+    },
+    { NULL }
+};
+
+AVFilter ff_vf_transpose_opencl = {
+    .name           = "transpose_opencl",
+    .description    = NULL_IF_CONFIG_SMALL("Tanspose input video"),
+    .priv_size      = sizeof(TransposeOpenCLContext),
+    .priv_class     = &transpose_opencl_class,
+    .init           = &ff_opencl_filter_init,
+    .uninit         = &transpose_opencl_uninit,
+    .query_formats  = &ff_opencl_filter_query_formats,
+    .inputs         = transpose_opencl_inputs,
+    .outputs        = transpose_opencl_outputs,
+    .flags_internal = FF_FILTER_FLAG_HWFRAME_AWARE,
+};