diff mbox series

[FFmpeg-devel] hwcontext_vulkan: use host mapped buffers when uploading and downloading

Message ID M81hC28--J-2@lynne.ee
State Accepted
Headers show
Series [FFmpeg-devel] hwcontext_vulkan: use host mapped buffers when uploading and downloading | expand

Checks

Context Check Description
andriy/default pending
andriy/configure warning Failed to apply patch

Commit Message

Lynne May 23, 2020, 6:04 p.m. UTC
Speeds up both use cases by 30%.

Patch attached.
Subject: [PATCH] hwcontext_vulkan: use host mapped buffers when uploading and
 downloading

Speeds up both use cases by 30%.
---
 libavutil/hwcontext_vulkan.c | 96 +++++++++++++++++++++++++++++++-----
 1 file changed, 83 insertions(+), 13 deletions(-)
diff mbox series

Patch

diff --git a/libavutil/hwcontext_vulkan.c b/libavutil/hwcontext_vulkan.c
index d45ab23983..f2db9fcd8f 100644
--- a/libavutil/hwcontext_vulkan.c
+++ b/libavutil/hwcontext_vulkan.c
@@ -208,6 +208,7 @@  enum VulkanExtensions {
     EXT_DRM_MODIFIER_FLAGS     = 1ULL <<  1, /* VK_EXT_image_drm_format_modifier */
     EXT_EXTERNAL_FD_MEMORY     = 1ULL <<  2, /* VK_KHR_external_memory_fd */
     EXT_EXTERNAL_FD_SEM        = 1ULL <<  3, /* VK_KHR_external_semaphore_fd */
+    EXT_EXTERNAL_HOST_MEMORY   = 1ULL <<  4, /* VK_EXT_external_memory_host */
 
     EXT_NO_FLAG                = 1ULL << 63,
 };
@@ -226,6 +227,7 @@  static const VulkanOptExtension optional_device_exts[] = {
     { VK_EXT_EXTERNAL_MEMORY_DMA_BUF_EXTENSION_NAME,          EXT_EXTERNAL_DMABUF_MEMORY, },
     { VK_EXT_IMAGE_DRM_FORMAT_MODIFIER_EXTENSION_NAME,        EXT_DRM_MODIFIER_FLAGS,     },
     { VK_KHR_EXTERNAL_SEMAPHORE_FD_EXTENSION_NAME,            EXT_EXTERNAL_FD_SEM,        },
+    { VK_EXT_EXTERNAL_MEMORY_HOST_EXTENSION_NAME,             EXT_EXTERNAL_HOST_MEMORY,   },
 };
 
 /* Converts return values to strings */
@@ -2630,6 +2632,7 @@  typedef struct ImageBuffer {
     VkBuffer buf;
     VkDeviceMemory mem;
     VkMemoryPropertyFlagBits flags;
+    int host_mapped;
 } ImageBuffer;
 
 static void free_buf(void *opaque, uint8_t *data)
@@ -2668,7 +2671,12 @@  static int create_buf(AVHWDeviceContext *ctx, AVBufferRef **buf,
     if (!vkbuf)
         return AVERROR(ENOMEM);
 
-    *stride = FFALIGN(*stride, p->props.limits.optimalBufferCopyRowPitchAlignment);
+    vkbuf->host_mapped = alloc_pnext != NULL;
+
+    /* This means we're importing memory, so we must not change the stride */
+    if (!vkbuf->host_mapped)
+        *stride = FFALIGN(*stride, p->props.limits.optimalBufferCopyRowPitchAlignment);
+
     buf_spawn.size = height*(*stride);
 
     ret = vkCreateBuffer(hwctx->act_dev, &buf_spawn, NULL, &vkbuf->buf);
@@ -2701,6 +2709,7 @@  static int create_buf(AVHWDeviceContext *ctx, AVBufferRef **buf,
     return 0;
 }
 
+/* Skips mapping of host mapped buffers but still invalidates them */
 static int map_buffers(AVHWDeviceContext *ctx, AVBufferRef **bufs, uint8_t *mem[],
                        int nb_buffers, int invalidate)
 {
@@ -2711,6 +2720,9 @@  static int map_buffers(AVHWDeviceContext *ctx, AVBufferRef **bufs, uint8_t *mem[
 
     for (int i = 0; i < nb_buffers; i++) {
         ImageBuffer *vkbuf = (ImageBuffer *)bufs[i]->data;
+        if (vkbuf->host_mapped)
+            continue;
+
         ret = vkMapMemory(hwctx->act_dev, vkbuf->mem, 0,
                           VK_WHOLE_SIZE, 0, (void **)&mem[i]);
         if (ret != VK_SUCCESS) {
@@ -2780,6 +2792,8 @@  static int unmap_buffers(AVHWDeviceContext *ctx, AVBufferRef **bufs,
 
     for (int i = 0; i < nb_buffers; i++) {
         ImageBuffer *vkbuf = (ImageBuffer *)bufs[i]->data;
+        if (vkbuf->host_mapped)
+            continue;
         vkUnmapMemory(hwctx->act_dev, vkbuf->mem);
     }
 
@@ -2915,7 +2929,11 @@  static int vulkan_transfer_data_from_mem(AVHWFramesContext *hwfc, AVFrame *dst,
     AVHWDeviceContext *dev_ctx = hwfc->device_ctx;
     AVBufferRef *bufs[AV_NUM_DATA_POINTERS] = { 0 };
     const int planes = av_pix_fmt_count_planes(src->format);
-    int log2_chroma = av_pix_fmt_desc_get(src->format)->log2_chroma_h;
+    int log2_chroma_w = av_pix_fmt_desc_get(src->format)->log2_chroma_w;
+    int log2_chroma_h = av_pix_fmt_desc_get(src->format)->log2_chroma_h;
+    VulkanDevicePriv *p = hwfc->device_ctx->internal->priv;
+    int host_mapped[AV_NUM_DATA_POINTERS] = { 0 };
+    int map_host = p->extensions & EXT_EXTERNAL_HOST_MEMORY;
 
     if ((src->format != AV_PIX_FMT_NONE && !av_vkfmt_from_pixfmt(src->format))) {
         av_log(hwfc, AV_LOG_ERROR, "Unsupported source pixel format!\n");
@@ -2945,12 +2963,25 @@  static int vulkan_transfer_data_from_mem(AVHWFramesContext *hwfc, AVFrame *dst,
     /* Create buffers */
     for (int i = 0; i < planes; i++) {
         int h = src->height;
-        int p_height = i > 0 ? AV_CEIL_RSHIFT(h, log2_chroma) : h;
+        int p_height = i > 0 ? AV_CEIL_RSHIFT(h, log2_chroma_h) : h;
+
+        VkImportMemoryHostPointerInfoEXT import_desc = {
+            .sType = VK_STRUCTURE_TYPE_IMPORT_MEMORY_HOST_POINTER_INFO_EXT,
+            .handleType = VK_EXTERNAL_MEMORY_HANDLE_TYPE_HOST_ALLOCATION_BIT_EXT,
+            .pHostPointer = src->data[i],
+        };
+
+        /* We can only map images with positive stride and alignment appropriate
+         * for the device. */
+        host_mapped[i] = map_host && src->linesize[i] > 0 &&
+                         !(((uintptr_t)import_desc.pHostPointer) %
+                           p->props.limits.minMemoryMapAlignment);
 
         tmp.linesize[i] = FFABS(src->linesize[i]);
-        err = create_buf(dev_ctx, &bufs[i], p_height,
-                         &tmp.linesize[i], VK_BUFFER_USAGE_TRANSFER_SRC_BIT,
-                         VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT, NULL, NULL);
+        err = create_buf(dev_ctx, &bufs[i], p_height, &tmp.linesize[i],
+                         VK_BUFFER_USAGE_TRANSFER_SRC_BIT,
+                         VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT, NULL,
+                         host_mapped[i] ? &import_desc : NULL);
         if (err)
             goto end;
     }
@@ -2959,8 +2990,19 @@  static int vulkan_transfer_data_from_mem(AVHWFramesContext *hwfc, AVFrame *dst,
     if ((err = map_buffers(dev_ctx, bufs, tmp.data, planes, 0)))
         goto end;
 
-    av_image_copy(tmp.data, tmp.linesize, (const uint8_t **)src->data,
-                  src->linesize, src->format, src->width, src->height);
+    for (int i = 0; i < planes; i++) {
+        int w = src->width;
+        int h = src->height;
+        int p_width =  i > 0 ? AV_CEIL_RSHIFT(w, log2_chroma_w) : w;
+        int p_height = i > 0 ? AV_CEIL_RSHIFT(h, log2_chroma_h) : h;
+
+        if (host_mapped[i])
+            continue;
+
+        av_image_copy_plane(tmp.data[i], tmp.linesize[i],
+                            (const uint8_t *)src->data[i], src->linesize[i],
+                            p_width, p_height);
+    }
 
     if ((err = unmap_buffers(dev_ctx, bufs, planes, 1)))
         goto end;
@@ -3075,7 +3117,11 @@  static int vulkan_transfer_data_to_mem(AVHWFramesContext *hwfc, AVFrame *dst,
     AVHWDeviceContext *dev_ctx = hwfc->device_ctx;
     AVBufferRef *bufs[AV_NUM_DATA_POINTERS] = { 0 };
     const int planes = av_pix_fmt_count_planes(dst->format);
-    int log2_chroma = av_pix_fmt_desc_get(dst->format)->log2_chroma_h;
+    int log2_chroma_w = av_pix_fmt_desc_get(dst->format)->log2_chroma_w;
+    int log2_chroma_h = av_pix_fmt_desc_get(dst->format)->log2_chroma_h;
+    VulkanDevicePriv *p = hwfc->device_ctx->internal->priv;
+    int host_mapped[AV_NUM_DATA_POINTERS] = { 0 };
+    int map_host = p->extensions & EXT_EXTERNAL_HOST_MEMORY;
 
     if (dst->width > hwfc->width || dst->height > hwfc->height)
         return AVERROR(EINVAL);
@@ -3100,12 +3146,25 @@  static int vulkan_transfer_data_to_mem(AVHWFramesContext *hwfc, AVFrame *dst,
     /* Create buffers */
     for (int i = 0; i < planes; i++) {
         int h = dst->height;
-        int p_height = i > 0 ? AV_CEIL_RSHIFT(h, log2_chroma) : h;
+        int p_height = i > 0 ? AV_CEIL_RSHIFT(h, log2_chroma_h) : h;
+
+        VkImportMemoryHostPointerInfoEXT import_desc = {
+            .sType = VK_STRUCTURE_TYPE_IMPORT_MEMORY_HOST_POINTER_INFO_EXT,
+            .handleType = VK_EXTERNAL_MEMORY_HANDLE_TYPE_HOST_ALLOCATION_BIT_EXT,
+            .pHostPointer = dst->data[i],
+        };
+
+        /* We can only map images with positive stride and alignment appropriate
+         * for the device. */
+        host_mapped[i] = map_host && dst->linesize[i] > 0 &&
+                         !(((uintptr_t)import_desc.pHostPointer) %
+                           p->props.limits.minMemoryMapAlignment);
 
         tmp.linesize[i] = FFABS(dst->linesize[i]);
         err = create_buf(dev_ctx, &bufs[i], p_height,
                          &tmp.linesize[i], VK_BUFFER_USAGE_TRANSFER_DST_BIT,
-                         VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT, NULL, NULL);
+                         VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT, NULL,
+                         host_mapped[i] ? &import_desc : NULL);
         if (err)
             goto end;
     }
@@ -3119,8 +3178,19 @@  static int vulkan_transfer_data_to_mem(AVHWFramesContext *hwfc, AVFrame *dst,
     if ((err = map_buffers(dev_ctx, bufs, tmp.data, planes, 1)))
         goto end;
 
-    av_image_copy(dst->data, dst->linesize, (const uint8_t **)tmp.data,
-                  tmp.linesize, dst->format, dst->width, dst->height);
+    for (int i = 0; i < planes; i++) {
+        int w = src->width;
+        int h = src->height;
+        int p_width =  i > 0 ? AV_CEIL_RSHIFT(w, log2_chroma_w) : w;
+        int p_height = i > 0 ? AV_CEIL_RSHIFT(h, log2_chroma_h) : h;
+
+        if (host_mapped[i])
+            continue;
+
+        av_image_copy_plane(dst->data[i], dst->linesize[i],
+                            (const uint8_t *)tmp.data[i], tmp.linesize[i],
+                            p_width, p_height);
+    }
 
     err = unmap_buffers(dev_ctx, bufs, planes, 0);