From patchwork Sat May 23 18:04:06 2020 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Lynne X-Patchwork-Id: 19817 Return-Path: X-Original-To: patchwork@ffaux-bg.ffmpeg.org Delivered-To: patchwork@ffaux-bg.ffmpeg.org Received: from ffbox0-bg.mplayerhq.hu (ffbox0-bg.ffmpeg.org [79.124.17.100]) by ffaux.localdomain (Postfix) with ESMTP id 0176B448DD8 for ; Sat, 23 May 2020 21:04:15 +0300 (EEST) Received: from [127.0.1.1] (localhost [127.0.0.1]) by ffbox0-bg.mplayerhq.hu (Postfix) with ESMTP id D3DAD688136; Sat, 23 May 2020 21:04:14 +0300 (EEST) X-Original-To: ffmpeg-devel@ffmpeg.org Delivered-To: ffmpeg-devel@ffmpeg.org Received: from w4.tutanota.de (w4.tutanota.de [81.3.6.165]) by ffbox0-bg.mplayerhq.hu (Postfix) with ESMTPS id EEC66680BD1 for ; Sat, 23 May 2020 21:04:07 +0300 (EEST) Received: from w3.tutanota.de (unknown [192.168.1.164]) by w4.tutanota.de (Postfix) with ESMTP id BDF021060287 for ; Sat, 23 May 2020 18:04:06 +0000 (UTC) DKIM-Signature: v=1; a=rsa-sha256; q=dns/txt; c=relaxed/relaxed; t=1590257046; s=s1; d=lynne.ee; h=From:From:To:To:Subject:Subject:Content-Description:Content-ID:Content-Type:Content-Type:Content-Transfer-Encoding:Cc:Date:Date:In-Reply-To:MIME-Version:MIME-Version:Message-ID:Message-ID:Reply-To:References:Sender; bh=rUZUBvGxi65F38hZspeGQ4IB7YKIY7MavVRYokz8v5c=; b=XGle8Tkgi9hI0eZQbW/g4p1ajyzZA5R7Hc+S6xqsp/GcvLHGMIltAqnZ2sjPyO6N KGZcQIV55Xhnr7EWIKEdeiNoyWptyy8JscqOZXp+OjijJ4UGw+7w5rFrqzCHJ6kO3AZ 17iCKODEMf0kGoimZiU7PMhPuj3tGkraCf3BJTuJO2OT3tIGBKRPkIUJNkUDIdJ+F2N kfqWhED4bmvoktsC3pNWANCR+5pmUGnpyocTLHje/rYOu/hOiZ8BJQsVIaOb8KAG60/ SckmWl4j7jJ8PRc11wMZl5CjjoIAxpR2mEg2FhpHKCasTNRAZtswTZ0ODuWzBgWxlZV L0XKFlHyZw== Date: Sat, 23 May 2020 20:04:06 +0200 (CEST) From: Lynne To: Ffmpeg Devel Message-ID: MIME-Version: 1.0 Subject: [FFmpeg-devel] [PATCH] hwcontext_vulkan: use host mapped buffers when uploading and downloading X-BeenThere: ffmpeg-devel@ffmpeg.org X-Mailman-Version: 2.1.20 Precedence: list List-Id: FFmpeg development discussions and patches List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , Reply-To: FFmpeg development discussions and patches Errors-To: ffmpeg-devel-bounces@ffmpeg.org Sender: "ffmpeg-devel" Speeds up both use cases by 30%. Patch attached. Subject: [PATCH] hwcontext_vulkan: use host mapped buffers when uploading and downloading Speeds up both use cases by 30%. --- libavutil/hwcontext_vulkan.c | 96 +++++++++++++++++++++++++++++++----- 1 file changed, 83 insertions(+), 13 deletions(-) diff --git a/libavutil/hwcontext_vulkan.c b/libavutil/hwcontext_vulkan.c index d45ab23983..f2db9fcd8f 100644 --- a/libavutil/hwcontext_vulkan.c +++ b/libavutil/hwcontext_vulkan.c @@ -208,6 +208,7 @@ enum VulkanExtensions { EXT_DRM_MODIFIER_FLAGS = 1ULL << 1, /* VK_EXT_image_drm_format_modifier */ EXT_EXTERNAL_FD_MEMORY = 1ULL << 2, /* VK_KHR_external_memory_fd */ EXT_EXTERNAL_FD_SEM = 1ULL << 3, /* VK_KHR_external_semaphore_fd */ + EXT_EXTERNAL_HOST_MEMORY = 1ULL << 4, /* VK_EXT_external_memory_host */ EXT_NO_FLAG = 1ULL << 63, }; @@ -226,6 +227,7 @@ static const VulkanOptExtension optional_device_exts[] = { { VK_EXT_EXTERNAL_MEMORY_DMA_BUF_EXTENSION_NAME, EXT_EXTERNAL_DMABUF_MEMORY, }, { VK_EXT_IMAGE_DRM_FORMAT_MODIFIER_EXTENSION_NAME, EXT_DRM_MODIFIER_FLAGS, }, { VK_KHR_EXTERNAL_SEMAPHORE_FD_EXTENSION_NAME, EXT_EXTERNAL_FD_SEM, }, + { VK_EXT_EXTERNAL_MEMORY_HOST_EXTENSION_NAME, EXT_EXTERNAL_HOST_MEMORY, }, }; /* Converts return values to strings */ @@ -2630,6 +2632,7 @@ typedef struct ImageBuffer { VkBuffer buf; VkDeviceMemory mem; VkMemoryPropertyFlagBits flags; + int host_mapped; } ImageBuffer; static void free_buf(void *opaque, uint8_t *data) @@ -2668,7 +2671,12 @@ static int create_buf(AVHWDeviceContext *ctx, AVBufferRef **buf, if (!vkbuf) return AVERROR(ENOMEM); - *stride = FFALIGN(*stride, p->props.limits.optimalBufferCopyRowPitchAlignment); + vkbuf->host_mapped = alloc_pnext != NULL; + + /* This means we're importing memory, so we must not change the stride */ + if (!vkbuf->host_mapped) + *stride = FFALIGN(*stride, p->props.limits.optimalBufferCopyRowPitchAlignment); + buf_spawn.size = height*(*stride); ret = vkCreateBuffer(hwctx->act_dev, &buf_spawn, NULL, &vkbuf->buf); @@ -2701,6 +2709,7 @@ static int create_buf(AVHWDeviceContext *ctx, AVBufferRef **buf, return 0; } +/* Skips mapping of host mapped buffers but still invalidates them */ static int map_buffers(AVHWDeviceContext *ctx, AVBufferRef **bufs, uint8_t *mem[], int nb_buffers, int invalidate) { @@ -2711,6 +2720,9 @@ static int map_buffers(AVHWDeviceContext *ctx, AVBufferRef **bufs, uint8_t *mem[ for (int i = 0; i < nb_buffers; i++) { ImageBuffer *vkbuf = (ImageBuffer *)bufs[i]->data; + if (vkbuf->host_mapped) + continue; + ret = vkMapMemory(hwctx->act_dev, vkbuf->mem, 0, VK_WHOLE_SIZE, 0, (void **)&mem[i]); if (ret != VK_SUCCESS) { @@ -2780,6 +2792,8 @@ static int unmap_buffers(AVHWDeviceContext *ctx, AVBufferRef **bufs, for (int i = 0; i < nb_buffers; i++) { ImageBuffer *vkbuf = (ImageBuffer *)bufs[i]->data; + if (vkbuf->host_mapped) + continue; vkUnmapMemory(hwctx->act_dev, vkbuf->mem); } @@ -2915,7 +2929,11 @@ static int vulkan_transfer_data_from_mem(AVHWFramesContext *hwfc, AVFrame *dst, AVHWDeviceContext *dev_ctx = hwfc->device_ctx; AVBufferRef *bufs[AV_NUM_DATA_POINTERS] = { 0 }; const int planes = av_pix_fmt_count_planes(src->format); - int log2_chroma = av_pix_fmt_desc_get(src->format)->log2_chroma_h; + int log2_chroma_w = av_pix_fmt_desc_get(src->format)->log2_chroma_w; + int log2_chroma_h = av_pix_fmt_desc_get(src->format)->log2_chroma_h; + VulkanDevicePriv *p = hwfc->device_ctx->internal->priv; + int host_mapped[AV_NUM_DATA_POINTERS] = { 0 }; + int map_host = p->extensions & EXT_EXTERNAL_HOST_MEMORY; if ((src->format != AV_PIX_FMT_NONE && !av_vkfmt_from_pixfmt(src->format))) { av_log(hwfc, AV_LOG_ERROR, "Unsupported source pixel format!\n"); @@ -2945,12 +2963,25 @@ static int vulkan_transfer_data_from_mem(AVHWFramesContext *hwfc, AVFrame *dst, /* Create buffers */ for (int i = 0; i < planes; i++) { int h = src->height; - int p_height = i > 0 ? AV_CEIL_RSHIFT(h, log2_chroma) : h; + int p_height = i > 0 ? AV_CEIL_RSHIFT(h, log2_chroma_h) : h; + + VkImportMemoryHostPointerInfoEXT import_desc = { + .sType = VK_STRUCTURE_TYPE_IMPORT_MEMORY_HOST_POINTER_INFO_EXT, + .handleType = VK_EXTERNAL_MEMORY_HANDLE_TYPE_HOST_ALLOCATION_BIT_EXT, + .pHostPointer = src->data[i], + }; + + /* We can only map images with positive stride and alignment appropriate + * for the device. */ + host_mapped[i] = map_host && src->linesize[i] > 0 && + !(((uintptr_t)import_desc.pHostPointer) % + p->props.limits.minMemoryMapAlignment); tmp.linesize[i] = FFABS(src->linesize[i]); - err = create_buf(dev_ctx, &bufs[i], p_height, - &tmp.linesize[i], VK_BUFFER_USAGE_TRANSFER_SRC_BIT, - VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT, NULL, NULL); + err = create_buf(dev_ctx, &bufs[i], p_height, &tmp.linesize[i], + VK_BUFFER_USAGE_TRANSFER_SRC_BIT, + VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT, NULL, + host_mapped[i] ? &import_desc : NULL); if (err) goto end; } @@ -2959,8 +2990,19 @@ static int vulkan_transfer_data_from_mem(AVHWFramesContext *hwfc, AVFrame *dst, if ((err = map_buffers(dev_ctx, bufs, tmp.data, planes, 0))) goto end; - av_image_copy(tmp.data, tmp.linesize, (const uint8_t **)src->data, - src->linesize, src->format, src->width, src->height); + for (int i = 0; i < planes; i++) { + int w = src->width; + int h = src->height; + int p_width = i > 0 ? AV_CEIL_RSHIFT(w, log2_chroma_w) : w; + int p_height = i > 0 ? AV_CEIL_RSHIFT(h, log2_chroma_h) : h; + + if (host_mapped[i]) + continue; + + av_image_copy_plane(tmp.data[i], tmp.linesize[i], + (const uint8_t *)src->data[i], src->linesize[i], + p_width, p_height); + } if ((err = unmap_buffers(dev_ctx, bufs, planes, 1))) goto end; @@ -3075,7 +3117,11 @@ static int vulkan_transfer_data_to_mem(AVHWFramesContext *hwfc, AVFrame *dst, AVHWDeviceContext *dev_ctx = hwfc->device_ctx; AVBufferRef *bufs[AV_NUM_DATA_POINTERS] = { 0 }; const int planes = av_pix_fmt_count_planes(dst->format); - int log2_chroma = av_pix_fmt_desc_get(dst->format)->log2_chroma_h; + int log2_chroma_w = av_pix_fmt_desc_get(dst->format)->log2_chroma_w; + int log2_chroma_h = av_pix_fmt_desc_get(dst->format)->log2_chroma_h; + VulkanDevicePriv *p = hwfc->device_ctx->internal->priv; + int host_mapped[AV_NUM_DATA_POINTERS] = { 0 }; + int map_host = p->extensions & EXT_EXTERNAL_HOST_MEMORY; if (dst->width > hwfc->width || dst->height > hwfc->height) return AVERROR(EINVAL); @@ -3100,12 +3146,25 @@ static int vulkan_transfer_data_to_mem(AVHWFramesContext *hwfc, AVFrame *dst, /* Create buffers */ for (int i = 0; i < planes; i++) { int h = dst->height; - int p_height = i > 0 ? AV_CEIL_RSHIFT(h, log2_chroma) : h; + int p_height = i > 0 ? AV_CEIL_RSHIFT(h, log2_chroma_h) : h; + + VkImportMemoryHostPointerInfoEXT import_desc = { + .sType = VK_STRUCTURE_TYPE_IMPORT_MEMORY_HOST_POINTER_INFO_EXT, + .handleType = VK_EXTERNAL_MEMORY_HANDLE_TYPE_HOST_ALLOCATION_BIT_EXT, + .pHostPointer = dst->data[i], + }; + + /* We can only map images with positive stride and alignment appropriate + * for the device. */ + host_mapped[i] = map_host && dst->linesize[i] > 0 && + !(((uintptr_t)import_desc.pHostPointer) % + p->props.limits.minMemoryMapAlignment); tmp.linesize[i] = FFABS(dst->linesize[i]); err = create_buf(dev_ctx, &bufs[i], p_height, &tmp.linesize[i], VK_BUFFER_USAGE_TRANSFER_DST_BIT, - VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT, NULL, NULL); + VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT, NULL, + host_mapped[i] ? &import_desc : NULL); if (err) goto end; } @@ -3119,8 +3178,19 @@ static int vulkan_transfer_data_to_mem(AVHWFramesContext *hwfc, AVFrame *dst, if ((err = map_buffers(dev_ctx, bufs, tmp.data, planes, 1))) goto end; - av_image_copy(dst->data, dst->linesize, (const uint8_t **)tmp.data, - tmp.linesize, dst->format, dst->width, dst->height); + for (int i = 0; i < planes; i++) { + int w = src->width; + int h = src->height; + int p_width = i > 0 ? AV_CEIL_RSHIFT(w, log2_chroma_w) : w; + int p_height = i > 0 ? AV_CEIL_RSHIFT(h, log2_chroma_h) : h; + + if (host_mapped[i]) + continue; + + av_image_copy_plane(dst->data[i], dst->linesize[i], + (const uint8_t *)tmp.data[i], tmp.linesize[i], + p_width, p_height); + } err = unmap_buffers(dev_ctx, bufs, planes, 0);