diff mbox

[FFmpeg-devel,1/3] lavfi/opencl: Derive global work size from plane image sizes

Message ID 20180319230531.7079-1-sw@jkqxz.net
State Accepted
Commit 2a1542d105dc3161516e34eef77bcd64aa72cab4
Headers show

Commit Message

Mark Thompson March 19, 2018, 11:05 p.m. UTC
Add a new function to find the global work size given the output image and
the required block alignment, then use it in the overlay, program and unsharp
filters.  Fixes the overlay and unsharp filters applying the kernel to
locations outside the frame when subsampled planes are present.
---
 libavfilter/opencl.c            | 64 +++++++++++++++++++++++++++++++++++++++++
 libavfilter/opencl.h            |  8 ++++++
 libavfilter/vf_overlay_opencl.c |  6 ++--
 libavfilter/vf_program_opencl.c |  8 +++---
 libavfilter/vf_unsharp_opencl.c | 16 +++++------
 5 files changed, 87 insertions(+), 15 deletions(-)
diff mbox

Patch

diff --git a/libavfilter/opencl.c b/libavfilter/opencl.c
index 37afc41f8b..ae61667380 100644
--- a/libavfilter/opencl.c
+++ b/libavfilter/opencl.c
@@ -22,6 +22,7 @@ 
 #include "libavutil/hwcontext.h"
 #include "libavutil/hwcontext_opencl.h"
 #include "libavutil/mem.h"
+#include "libavutil/pixdesc.h"
 
 #include "avfilter.h"
 #include "formats.h"
@@ -276,3 +277,66 @@  fail:
     av_freep(&src);
     return err;
 }
+
+int ff_opencl_filter_work_size_from_image(AVFilterContext *avctx,
+                                          size_t *work_size,
+                                          AVFrame *frame, int plane,
+                                          int block_alignment)
+{
+    cl_mem image;
+    cl_mem_object_type type;
+    size_t width, height;
+    cl_int cle;
+
+    if (frame->format != AV_PIX_FMT_OPENCL) {
+        av_log(avctx, AV_LOG_ERROR, "Invalid frame format %s, "
+               "opencl required.\n", av_get_pix_fmt_name(frame->format));
+        return AVERROR(EINVAL);
+    }
+
+    image = (cl_mem)frame->data[plane];
+    if (!image) {
+        av_log(avctx, AV_LOG_ERROR, "Plane %d required but not set.\n",
+               plane);
+        return AVERROR(EINVAL);
+    }
+
+    cle = clGetMemObjectInfo(image, CL_MEM_TYPE, sizeof(type),
+                             &type, NULL);
+    if (cle != CL_SUCCESS) {
+        av_log(avctx, AV_LOG_ERROR, "Failed to query object type of "
+               "plane %d: %d.\n", plane, cle);
+        return AVERROR_UNKNOWN;
+    }
+    if (type != CL_MEM_OBJECT_IMAGE2D) {
+        av_log(avctx, AV_LOG_ERROR, "Plane %d is not a 2D image.\n",
+               plane);
+        return AVERROR(EINVAL);
+    }
+
+    cle = clGetImageInfo(image, CL_IMAGE_WIDTH,  sizeof(size_t),
+                         &width, NULL);
+    if (cle != CL_SUCCESS) {
+        av_log(avctx, AV_LOG_ERROR, "Failed to query plane %d width: %d.\n",
+               plane, cle);
+        return AVERROR_UNKNOWN;
+    }
+
+    cle = clGetImageInfo(image, CL_IMAGE_HEIGHT, sizeof(size_t),
+                         &height, NULL);
+    if (cle != CL_SUCCESS) {
+        av_log(avctx, AV_LOG_ERROR, "Failed to query plane %d height: %d.\n",
+               plane, cle);
+        return AVERROR_UNKNOWN;
+    }
+
+    if (block_alignment) {
+        width  = FFALIGN(width,  block_alignment);
+        height = FFALIGN(height, block_alignment);
+    }
+
+    work_size[0] = width;
+    work_size[1] = height;
+
+    return 0;
+}
diff --git a/libavfilter/opencl.h b/libavfilter/opencl.h
index 4d740c18ab..45fe2a2e27 100644
--- a/libavfilter/opencl.h
+++ b/libavfilter/opencl.h
@@ -84,4 +84,12 @@  int ff_opencl_filter_load_program(AVFilterContext *avctx,
 int ff_opencl_filter_load_program_from_file(AVFilterContext *avctx,
                                             const char *filename);
 
+/**
+ * Find the work size needed needed for a given plane of an image.
+ */
+int ff_opencl_filter_work_size_from_image(AVFilterContext *avctx,
+                                          size_t *work_size,
+                                          AVFrame *frame, int plane,
+                                          int block_alignment);
+
 #endif /* AVFILTER_OPENCL_H */
diff --git a/libavfilter/vf_overlay_opencl.c b/libavfilter/vf_overlay_opencl.c
index ee8381dfee..16e10f4371 100644
--- a/libavfilter/vf_overlay_opencl.c
+++ b/libavfilter/vf_overlay_opencl.c
@@ -216,8 +216,10 @@  static int overlay_opencl_blend(FFFrameSync *fs)
                 goto fail_kernel_arg;
         }
 
-        global_work[0] = output->width;
-        global_work[1] = output->height;
+        err = ff_opencl_filter_work_size_from_image(avctx, global_work,
+                                                    output, plane, 0);
+        if (err < 0)
+            goto fail;
 
         cle = clEnqueueNDRangeKernel(ctx->command_queue, ctx->kernel, 2, NULL,
                                      global_work, NULL, 0, NULL, NULL);
diff --git a/libavfilter/vf_program_opencl.c b/libavfilter/vf_program_opencl.c
index 4ee9668236..0bcf188ac7 100644
--- a/libavfilter/vf_program_opencl.c
+++ b/libavfilter/vf_program_opencl.c
@@ -142,10 +142,10 @@  static int program_opencl_run(AVFilterContext *avctx)
             }
         }
 
-        cle = clGetImageInfo(dst, CL_IMAGE_WIDTH,  sizeof(size_t),
-                             &global_work[0], NULL);
-        cle = clGetImageInfo(dst, CL_IMAGE_HEIGHT, sizeof(size_t),
-                             &global_work[1], NULL);
+        err = ff_opencl_filter_work_size_from_image(avctx, global_work,
+                                                    output, plane, 0);
+        if (err < 0)
+            goto fail;
 
         av_log(avctx, AV_LOG_DEBUG, "Run kernel on plane %d "
                "(%zux%zu).\n", plane, global_work[0], global_work[1]);
diff --git a/libavfilter/vf_unsharp_opencl.c b/libavfilter/vf_unsharp_opencl.c
index 6a453c014b..19c91857cb 100644
--- a/libavfilter/vf_unsharp_opencl.c
+++ b/libavfilter/vf_unsharp_opencl.c
@@ -320,15 +320,13 @@  static int unsharp_opencl_filter_frame(AVFilterLink *inlink, AVFrame *input)
             }
         }
 
-        if (ctx->global) {
-            global_work[0] = output->width;
-            global_work[1] = output->height;
-        } else {
-            global_work[0] = FFALIGN(output->width,  16);
-            global_work[1] = FFALIGN(output->height, 16);
-            local_work[0]  = 16;
-            local_work[1]  = 16;
-        }
+        err = ff_opencl_filter_work_size_from_image(avctx, global_work, output, p,
+                                                    ctx->global ? 0 : 16);
+        if (err < 0)
+            goto fail;
+
+        local_work[0]  = 16;
+        local_work[1]  = 16;
 
         av_log(avctx, AV_LOG_DEBUG, "Run kernel on plane %d "
                "(%"SIZE_SPECIFIER"x%"SIZE_SPECIFIER").\n",