diff mbox

[FFmpeg-devel,5/6] lavfi: add a Vulkan scale filter

Message ID 20180621165543.5198-6-atomnuker@gmail.com
State New
Headers show

Commit Message

Rostislav Pehlivanov June 21, 2018, 4:55 p.m. UTC
Can convert to RGB using very fast fixed-function conversions.

Signed-off-by: Rostislav Pehlivanov <atomnuker@gmail.com>
---
 configure                     |   1 +
 libavfilter/Makefile          |   1 +
 libavfilter/allfilters.c      |   1 +
 libavfilter/vf_scale_vulkan.c | 395 ++++++++++++++++++++++++++++++++++
 4 files changed, 398 insertions(+)
 create mode 100644 libavfilter/vf_scale_vulkan.c

Comments

Mark Thompson Sept. 2, 2018, 9:24 p.m. UTC | #1
On 21/06/18 17:55, Rostislav Pehlivanov wrote:
> Can convert to RGB using very fast fixed-function conversions.
> 
> Signed-off-by: Rostislav Pehlivanov <atomnuker@gmail.com>
> ---
>  configure                     |   1 +
>  libavfilter/Makefile          |   1 +
>  libavfilter/allfilters.c      |   1 +
>  libavfilter/vf_scale_vulkan.c | 395 ++++++++++++++++++++++++++++++++++
>  4 files changed, 398 insertions(+)
>  create mode 100644 libavfilter/vf_scale_vulkan.c

Would it be difficult to add support for RGB->YUV as well?  Having YUV->RGB only limits the conversion use slightly, especially since it has no colourspace information so you can't trust something else to do the conversion in the opposite direction.

From testing on ANV / Coffee Lake:

* When doing non-scaling YUV420P->RGB0 conversion on ANV I get something funny on the bottom and right pixel edges of a 1280x720 frame, as if it's taking some unknown value from beyond the edge.

* Scaling 1920x1080 -> 1280x720 from a mapped image seems to do something ugly with extra 8 pixels at the bottom.  The image appears to be 1080 height everywhere inside FFmpeg, but somehow it has the bottom region included as a fill-down of the last line (very obvious with a diagonal line at the bottom of the frame).

* I see some green bleeding in from the bottom in some YUV cases?  Not sure how to characterise them.  If you haven't seen this I can try to track down exactly when.


No comments for the actual code.

Thanks,

- Mark
diff mbox

Patch

diff --git a/configure b/configure
index cd5229ef2d..d1ceb9e38d 100755
--- a/configure
+++ b/configure
@@ -3428,6 +3428,7 @@  zmq_filter_deps="libzmq"
 zoompan_filter_deps="swscale"
 zscale_filter_deps="libzimg const_nan"
 scale_vaapi_filter_deps="vaapi VAProcPipelineParameterBuffer"
+scale_vulkan_filter_deps="vulkan libshaderc"
 vpp_qsv_filter_deps="libmfx"
 vpp_qsv_filter_select="qsvvpp"
 
diff --git a/libavfilter/Makefile b/libavfilter/Makefile
index dbb7a859dd..6e2d3681ec 100644
--- a/libavfilter/Makefile
+++ b/libavfilter/Makefile
@@ -317,6 +317,7 @@  OBJS-$(CONFIG_SCALE_CUDA_FILTER)             += vf_scale_cuda.o vf_scale_cuda.pt
 OBJS-$(CONFIG_SCALE_NPP_FILTER)              += vf_scale_npp.o scale.o
 OBJS-$(CONFIG_SCALE_QSV_FILTER)              += vf_scale_qsv.o
 OBJS-$(CONFIG_SCALE_VAAPI_FILTER)            += vf_scale_vaapi.o scale.o vaapi_vpp.o
+OBJS-$(CONFIG_SCALE_VULKAN_FILTER)           += vf_scale_vulkan.o scale.o vulkan.o
 OBJS-$(CONFIG_SCALE2REF_FILTER)              += vf_scale.o scale.o
 OBJS-$(CONFIG_SELECT_FILTER)                 += f_select.o
 OBJS-$(CONFIG_SELECTIVECOLOR_FILTER)         += vf_selectivecolor.o
diff --git a/libavfilter/allfilters.c b/libavfilter/allfilters.c
index 5848ad9b4f..ee58cc9eee 100644
--- a/libavfilter/allfilters.c
+++ b/libavfilter/allfilters.c
@@ -306,6 +306,7 @@  extern AVFilter ff_vf_scale_cuda;
 extern AVFilter ff_vf_scale_npp;
 extern AVFilter ff_vf_scale_qsv;
 extern AVFilter ff_vf_scale_vaapi;
+extern AVFilter ff_vf_scale_vulkan;
 extern AVFilter ff_vf_scale2ref;
 extern AVFilter ff_vf_select;
 extern AVFilter ff_vf_selectivecolor;
diff --git a/libavfilter/vf_scale_vulkan.c b/libavfilter/vf_scale_vulkan.c
new file mode 100644
index 0000000000..4a5647072d
--- /dev/null
+++ b/libavfilter/vf_scale_vulkan.c
@@ -0,0 +1,395 @@ 
+/*
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include "libavutil/opt.h"
+#include "vulkan.h"
+#include "scale.h"
+#include "internal.h"
+
+enum ScalerFunc {
+    F_BILINEAR = 0,
+    F_NEAREST,
+
+    F_NB,
+};
+
+typedef struct ScaleVulkanContext {
+    VulkanFilterContext vkctx;
+
+    int conv;
+    int initialized;
+    FFVkExecContext exec;
+    const VulkanSampler *sampler;
+
+    /* Shader updators, must be in the main filter struct */
+    VkDescriptorImageInfo input_images[3];
+    VkDescriptorImageInfo output_images[3];
+
+    enum ScalerFunc scaler;
+    char *output_format_string;
+    char *w_expr;
+    char *h_expr;
+} ScaleVulkanContext;
+
+static const char scale_bilinear[] = {
+    C(0, void scale_bilinear(int idx, ivec2 pos)                               )
+    C(0, {                                                                     )
+    C(1,     const vec2 npos = (vec2(pos) + 0.5f) / imageSize(output_img[idx]);)
+    C(1,     imageStore(output_img[idx], pos, texture(input_img[idx], npos));  )
+    C(0, }                                                                     )
+};
+
+static av_cold int init_filter(AVFilterContext *ctx, AVFrame *in)
+{
+    int err;
+    VkFilter sampler_mode;
+    ScaleVulkanContext *s = ctx->priv;
+
+    switch (s->scaler) {
+    case F_NEAREST:
+        sampler_mode = VK_FILTER_NEAREST;
+        break;
+    case F_BILINEAR:
+        sampler_mode = VK_FILTER_LINEAR;
+        break;
+    };
+
+    /* Create a sampler */
+    s->sampler = ff_vk_init_sampler(ctx, s->conv ? in : NULL, 0, sampler_mode);
+    if (!s->sampler)
+        return AVERROR_EXTERNAL;
+
+    { /* Create the shader */
+        SPIRVShader *shd = ff_vk_init_shader(ctx, "scale_compute",
+                                             VK_SHADER_STAGE_COMPUTE_BIT);
+        ff_vk_set_compute_shader_sizes(ctx, shd, (int [3]){ 16, 16, 1 });
+
+        VulkanDescriptorSetBinding desc_i[2] = {
+            {
+                .name       = "input_img",
+                .type       = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER,
+                .dimensions = 2,
+                .elems      = s->conv ? 1 :
+                              av_pix_fmt_count_planes(s->vkctx.input_format),
+                .stages     = VK_SHADER_STAGE_COMPUTE_BIT,
+                .updater    = s->input_images,
+                .samplers   = DUP_SAMPLER_ARRAY4(s->sampler->sampler),
+            },
+            {
+                .name       = "output_img",
+                .type       = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE,
+                .mem_layout = ff_vk_shader_rep_fmt(s->vkctx.output_format),
+                .mem_quali  = "writeonly",
+                .dimensions = 2,
+                .elems      = av_pix_fmt_count_planes(s->vkctx.output_format),
+                .stages     = VK_SHADER_STAGE_COMPUTE_BIT,
+                .updater    = s->output_images,
+            },
+        };
+
+        RET(ff_vk_add_descriptor_set(ctx, shd, desc_i, 2, 0)); /* set 0 */
+
+        GLSLD(   scale_bilinear                                               );
+        GLSLC(0, void main()                                                  );
+        GLSLC(0, {                                                            );
+        GLSLF(1,     for (int i = 0; i < %i; i++)             ,desc_i[1].elems);
+
+        switch (s->scaler) {
+        case F_NEAREST:
+        case F_BILINEAR:
+            GLSLC(2,     scale_bilinear(i, ivec2(gl_GlobalInvocationID.xy));  );
+            break;
+        };
+
+        GLSLC(0, }                                                            );
+
+        RET(ff_vk_compile_shader(ctx, shd, "main"));
+    }
+
+    RET(ff_vk_init_pipeline_layout(ctx));
+
+    /* Execution context */
+    RET(ff_vk_create_exec_ctx(ctx, &s->exec,
+                              s->vkctx.hwctx->queue_family_comp_index));
+
+    /* The pipeline */
+    RET(ff_vk_init_compute_pipeline(ctx));
+
+    s->initialized = 1;
+
+    return 0;
+
+fail:
+    return err;
+}
+
+static int process_frames(AVFilterContext *avctx, AVVkFrame *out, AVVkFrame *in)
+{
+    int i, err;
+    ScaleVulkanContext *s = avctx->priv;
+    int planes = av_pix_fmt_count_planes(s->vkctx.output_format);
+
+    VkCommandBufferBeginInfo cmd_start = {
+        .sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_BEGIN_INFO,
+        .flags = VK_COMMAND_BUFFER_USAGE_ONE_TIME_SUBMIT_BIT,
+    };
+
+    VkComponentMapping null_map = {
+        .r = VK_COMPONENT_SWIZZLE_IDENTITY,
+        .g = VK_COMPONENT_SWIZZLE_IDENTITY,
+        .b = VK_COMPONENT_SWIZZLE_IDENTITY,
+        .a = VK_COMPONENT_SWIZZLE_IDENTITY,
+    };
+
+    if (s->sampler->converting) { /* RGB */
+        RET(ff_vk_create_imageview(avctx, &s->input_images[0].imageView, in,
+                                   av_vkfmt_from_pixfmt(s->vkctx.input_format),
+                                   VK_IMAGE_ASPECT_COLOR_BIT,
+                                   null_map, &s->sampler->yuv_conv));
+        s->input_images[0].imageLayout = VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL;
+    } else {
+        for (i = 0; i < av_pix_fmt_count_planes(s->vkctx.input_format); i++) {
+            RET(ff_vk_create_imageview(avctx, &s->input_images[i].imageView, in,
+                                       ff_vk_plane_rep_fmt(s->vkctx.input_format, i),
+                                       ff_vk_aspect_flags(s->vkctx.input_format, i),
+                                       null_map, NULL));
+            s->input_images[i].imageLayout = VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL;
+        }
+    }
+
+    for (i = 0; i < av_pix_fmt_count_planes(s->vkctx.output_format); i++) {
+        RET(ff_vk_create_imageview(avctx, &s->output_images[i].imageView, out,
+                                   ff_vk_plane_rep_fmt(s->vkctx.output_format, i),
+                                   ff_vk_aspect_flags(s->vkctx.output_format, i),
+                                   null_map, NULL));
+        s->output_images[i].imageLayout = VK_IMAGE_LAYOUT_GENERAL;
+    }
+
+    ff_vk_update_descriptor_set(avctx, 0);
+
+    vkBeginCommandBuffer(s->exec.buf, &cmd_start);
+
+    {
+        VkImageMemoryBarrier bar[2] = {
+            {
+                .sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER,
+                .srcAccessMask = 0,
+                .dstAccessMask = VK_ACCESS_SHADER_READ_BIT,
+                .oldLayout = in->layout,
+                .newLayout = VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL,
+                .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
+                .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
+                .image = in->img,
+                .subresourceRange.aspectMask = ff_vk_aspect_flags(s->vkctx.input_format, -1),
+                .subresourceRange.levelCount = 1,
+                .subresourceRange.layerCount = 1,
+            },
+            {
+                .sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER,
+                .srcAccessMask = 0,
+                .dstAccessMask = VK_ACCESS_SHADER_WRITE_BIT,
+                .oldLayout = out->layout,
+                .newLayout = VK_IMAGE_LAYOUT_GENERAL,
+                .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
+                .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
+                .image = out->img,
+                .subresourceRange.aspectMask = ff_vk_aspect_flags(s->vkctx.output_format, -1),
+                .subresourceRange.levelCount = 1,
+                .subresourceRange.layerCount = 1,
+            },
+        };
+
+        vkCmdPipelineBarrier(s->exec.buf, VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT,
+                             VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, 0,
+                             0, NULL, 0, NULL, 2, bar);
+
+        in->layout  = bar[0].newLayout;
+        in->access  = bar[0].dstAccessMask;
+
+        out->layout = bar[1].newLayout;
+        out->access = bar[1].dstAccessMask;
+    }
+
+    vkCmdBindPipeline(s->exec.buf, VK_PIPELINE_BIND_POINT_COMPUTE, s->vkctx.pipeline);
+    vkCmdBindDescriptorSets(s->exec.buf, VK_PIPELINE_BIND_POINT_COMPUTE, s->vkctx.pipeline_layout, 0, s->vkctx.descriptor_sets_num, s->vkctx.desc_set, 0, 0);
+    vkCmdDispatch(s->exec.buf,
+                  FFALIGN(s->vkctx.output_width,  s->vkctx.shaders[0].local_size[0])/s->vkctx.shaders[0].local_size[0],
+                  FFALIGN(s->vkctx.output_height, s->vkctx.shaders[0].local_size[1])/s->vkctx.shaders[0].local_size[1], 1);
+
+    vkEndCommandBuffer(s->exec.buf);
+
+    VkSubmitInfo s_info = {
+        .sType                = VK_STRUCTURE_TYPE_SUBMIT_INFO,
+        .commandBufferCount   = 1,
+        .pCommandBuffers      = &s->exec.buf,
+    };
+
+    VkResult ret = vkQueueSubmit(s->exec.queue, 1, &s_info, s->exec.fence);
+    if (ret != VK_SUCCESS) {
+        av_log(avctx, AV_LOG_ERROR, "Unable to submit command buffer: %s\n",
+               ff_vk_ret2str(ret));
+        return AVERROR_EXTERNAL;
+    } else {
+        vkWaitForFences(s->vkctx.hwctx->act_dev, 1, &s->exec.fence, VK_TRUE, UINT64_MAX);
+        vkResetFences(s->vkctx.hwctx->act_dev, 1, &s->exec.fence);
+    }
+
+fail:
+
+    for (i = 0; i < planes; i++) {
+        ff_vk_destroy_imageview(avctx, s->input_images[i].imageView);
+        ff_vk_destroy_imageview(avctx, s->output_images[i].imageView);
+    }
+
+    return err;
+}
+
+static int scale_vulkan_filter_frame(AVFilterLink *link, AVFrame *in)
+{
+    int err;
+    AVFilterContext *ctx = link->dst;
+    ScaleVulkanContext *s = ctx->priv;
+    AVFilterLink *outlink = ctx->outputs[0];
+
+    AVFrame *out = ff_get_video_buffer(outlink, outlink->w, outlink->h);
+    if (!out) {
+        err = AVERROR(ENOMEM);
+        goto fail;
+    }
+
+    if (!s->initialized)
+        RET(init_filter(ctx, in));
+
+    RET(process_frames(ctx, (AVVkFrame *)out->data[0],
+                            (AVVkFrame *) in->data[0]));
+
+    err = av_frame_copy_props(out, in);
+    if (err < 0)
+        goto fail;
+
+    av_frame_free(&in);
+
+    return ff_filter_frame(outlink, out);
+
+fail:
+    av_frame_free(&in);
+    av_frame_free(&out);
+    return err;
+}
+
+static int scale_vulkan_config_output(AVFilterLink *outlink)
+{
+    int err;
+    const AVPixFmtDescriptor *desc;
+    AVFilterContext *avctx = outlink->src;
+    ScaleVulkanContext *s  = avctx->priv;
+    AVFilterLink *inlink   = outlink->src->inputs[0];
+
+    err = ff_scale_eval_dimensions(s, s->w_expr, s->h_expr, inlink, outlink,
+                                   &s->vkctx.output_width,
+                                   &s->vkctx.output_height);
+    if (err < 0)
+        return err;
+
+    if (s->output_format_string)
+        s->vkctx.output_format = av_get_pix_fmt(s->output_format_string);
+
+    desc = av_pix_fmt_desc_get(s->vkctx.output_format);
+
+    if ((s->vkctx.input_format != s->vkctx.output_format) &&
+        !(desc->flags & AV_PIX_FMT_FLAG_RGB)) {
+        av_log(avctx, AV_LOG_WARNING, "Unsupported conversion %s -> %s! "
+               "Currently output format must either match input format or "
+               "must be some supported RGB format!\n",
+               av_get_pix_fmt_name(s->vkctx.input_format),
+               av_get_pix_fmt_name(s->vkctx.output_format));
+        return AVERROR_PATCHWELCOME;
+    }
+
+    desc = av_pix_fmt_desc_get(s->vkctx.input_format);
+
+    s->conv = !(desc->flags & AV_PIX_FMT_FLAG_RGB) &&
+               (s->vkctx.input_format != s->vkctx.output_format);
+
+    err = ff_vk_filter_config_output(outlink);
+    if (err < 0)
+        return err;
+
+    if (inlink->sample_aspect_ratio.num)
+        outlink->sample_aspect_ratio = av_mul_q((AVRational){outlink->h * inlink->w, outlink->w * inlink->h}, inlink->sample_aspect_ratio);
+    else
+        outlink->sample_aspect_ratio = inlink->sample_aspect_ratio;
+
+    return 0;
+}
+
+static void scale_vulkan_uninit(AVFilterContext *avctx)
+{
+    ScaleVulkanContext *s = avctx->priv;
+
+    ff_vk_free_exec_ctx(avctx, &s->exec);
+    ff_vk_filter_uninit(avctx);
+
+    s->initialized = 0;
+}
+
+#define OFFSET(x) offsetof(ScaleVulkanContext, x)
+#define FLAGS (AV_OPT_FLAG_FILTERING_PARAM | AV_OPT_FLAG_VIDEO_PARAM)
+static const AVOption scale_vulkan_options[] = {
+    { "w", "Output video width",  OFFSET(w_expr), AV_OPT_TYPE_STRING, {.str = "iw"}, .flags = FLAGS },
+    { "h", "Output video height", OFFSET(h_expr), AV_OPT_TYPE_STRING, {.str = "ih"}, .flags = FLAGS },
+    { "scaler", "Scaler function", OFFSET(scaler), AV_OPT_TYPE_INT, {.i64 = F_BILINEAR}, 0, F_NB, .flags = FLAGS, "scaler" },
+        { "bilinear", "Bilinear interpolation (fastest)", 0, AV_OPT_TYPE_CONST, {.i64 = F_BILINEAR}, 0, 0, .flags = FLAGS, "scaler" },
+        { "nearest", "Nearest (useful for pixel art)", 0, AV_OPT_TYPE_CONST, {.i64 = F_NEAREST}, 0, 0, .flags = FLAGS, "scaler" },
+    { "format", "Output video format (software format of hardware frames)", OFFSET(output_format_string), AV_OPT_TYPE_STRING, .flags = FLAGS },
+    { NULL },
+};
+
+AVFILTER_DEFINE_CLASS(scale_vulkan);
+
+static const AVFilterPad scale_vulkan_inputs[] = {
+    {
+        .name         = "default",
+        .type         = AVMEDIA_TYPE_VIDEO,
+        .filter_frame = &scale_vulkan_filter_frame,
+        .config_props = &ff_vk_filter_config_input,
+    },
+    { NULL }
+};
+
+static const AVFilterPad scale_vulkan_outputs[] = {
+    {
+        .name = "default",
+        .type = AVMEDIA_TYPE_VIDEO,
+        .config_props = &scale_vulkan_config_output,
+    },
+    { NULL }
+};
+
+AVFilter ff_vf_scale_vulkan = {
+    .name           = "scale_vulkan",
+    .description    = NULL_IF_CONFIG_SMALL("Scale Vulkan frames"),
+    .priv_size      = sizeof(ScaleVulkanContext),
+    .init           = &ff_vk_filter_init,
+    .uninit         = &scale_vulkan_uninit,
+    .query_formats  = &ff_vk_filter_query_formats,
+    .inputs         = scale_vulkan_inputs,
+    .outputs        = scale_vulkan_outputs,
+    .priv_class     = &scale_vulkan_class,
+    .flags_internal = FF_FILTER_FLAG_HWFRAME_AWARE,
+};