From patchwork Sat Apr 4 16:23:45 2020 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Lynne X-Patchwork-Id: 18645 Return-Path: X-Original-To: patchwork@ffaux-bg.ffmpeg.org Delivered-To: patchwork@ffaux-bg.ffmpeg.org Received: from ffbox0-bg.mplayerhq.hu (ffbox0-bg.ffmpeg.org [79.124.17.100]) by ffaux.localdomain (Postfix) with ESMTP id B156944818B for ; Sat, 4 Apr 2020 19:23:52 +0300 (EEST) Received: from [127.0.1.1] (localhost [127.0.0.1]) by ffbox0-bg.mplayerhq.hu (Postfix) with ESMTP id 903DF68B035; Sat, 4 Apr 2020 19:23:52 +0300 (EEST) X-Original-To: ffmpeg-devel@ffmpeg.org Delivered-To: ffmpeg-devel@ffmpeg.org Received: from w4.tutanota.de (w4.tutanota.de [81.3.6.165]) by ffbox0-bg.mplayerhq.hu (Postfix) with ESMTPS id 5DE8268AF49 for ; Sat, 4 Apr 2020 19:23:46 +0300 (EEST) Received: from w3.tutanota.de (unknown [192.168.1.164]) by w4.tutanota.de (Postfix) with ESMTP id 8D1911060147 for ; Sat, 4 Apr 2020 16:23:45 +0000 (UTC) DKIM-Signature: v=1; a=rsa-sha256; q=dns/txt; c=relaxed/relaxed; t=1586017425; s=s1; d=lynne.ee; h=From:From:To:To:Subject:Subject:Content-Description:Content-ID:Content-Type:Content-Type:Content-Transfer-Encoding:Cc:Date:Date:In-Reply-To:MIME-Version:MIME-Version:Message-ID:Message-ID:Reply-To:References:Sender; bh=SLHS811xqR/CQmkDOyi/PkGBXbYt1y1c9NScW2fcZ6c=; b=2g2pPsnyM3RNefQhQemF6NaAbNeIKtcBHODRl6Fokn/YY5eEQ5lbQ/NiddpJy3Q1 hO2b+iz3gMl+V6BNITMGI1GnCnBFOXLFx5T/IF4pClxBHgTD8ChAMXUvfGdsKO7ly+N U2fknybs6SXIH9r8p2Wkmmz/6kWxin82I9g64QEwcfNpXxx6vfOJcEHOYsX8hFH84wB rzixH3Uu4nyZh+4CIAFlHh4BqnGpN+ShDDdO9B2195ANxPNEzH6U3UOje+BQZbXEOz7 YvE7d0tBrzkDu515PARLEJ4SUwdvW4eZAf4IMniFPlw8JkBuSzjzpyncYEHmFu1+cn3 2ZtCxVG2kA== Date: Sat, 4 Apr 2020 18:23:45 +0200 (CEST) From: Lynne To: Ffmpeg Devel Message-ID: MIME-Version: 1.0 Subject: [FFmpeg-devel] [PATCH] hwcontext_vulkan: only use one semaphore per image X-BeenThere: ffmpeg-devel@ffmpeg.org X-Mailman-Version: 2.1.20 Precedence: list List-Id: FFmpeg development discussions and patches List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , Reply-To: FFmpeg development discussions and patches Errors-To: ffmpeg-devel-bounces@ffmpeg.org Sender: "ffmpeg-devel" The idea was to allow separate planes to be filtered independently, however, in hindsight, literaly nothing uses separate per-plane semaphores and it would only work when each plane is backed by separate device memory. Patch attached. From 39506e3d6d703b070d861f88d4cde5689eea7ae2 Mon Sep 17 00:00:00 2001 From: Lynne Date: Sat, 4 Apr 2020 17:19:59 +0100 Subject: [PATCH] hwcontext_vulkan: only use one semaphore per image The idea was to allow separate planes to be filtered independently, however, in hindsight, literaly nothing uses separate per-plane semaphores and it would only work when each plane is backed by separate device memory. --- libavfilter/vulkan.c | 38 +++++----- libavutil/hwcontext_vulkan.c | 131 ++++++++++++++++++----------------- libavutil/hwcontext_vulkan.h | 4 +- 3 files changed, 86 insertions(+), 87 deletions(-) diff --git a/libavfilter/vulkan.c b/libavfilter/vulkan.c index ff76ab15e9..c103440529 100644 --- a/libavfilter/vulkan.c +++ b/libavfilter/vulkan.c @@ -390,32 +390,28 @@ int ff_vk_add_exec_dep(AVFilterContext *avctx, FFVkExecContext *e, AVFrame *frame, VkPipelineStageFlagBits in_wait_dst_flag) { AVVkFrame *f = (AVVkFrame *)frame->data[0]; - AVHWFramesContext *fc = (AVHWFramesContext *)frame->hw_frames_ctx->data; - int planes = av_pix_fmt_count_planes(fc->sw_format); - for (int i = 0; i < planes; i++) { - e->sem_wait = av_fast_realloc(e->sem_wait, &e->sem_wait_alloc, - (e->sem_wait_cnt + 1)*sizeof(*e->sem_wait)); - if (!e->sem_wait) - return AVERROR(ENOMEM); + e->sem_wait = av_fast_realloc(e->sem_wait, &e->sem_wait_alloc, + (e->sem_wait_cnt + 1)*sizeof(*e->sem_wait)); + if (!e->sem_wait) + return AVERROR(ENOMEM); - e->sem_wait_dst = av_fast_realloc(e->sem_wait_dst, &e->sem_wait_dst_alloc, - (e->sem_wait_cnt + 1)*sizeof(*e->sem_wait_dst)); - if (!e->sem_wait_dst) - return AVERROR(ENOMEM); + e->sem_wait_dst = av_fast_realloc(e->sem_wait_dst, &e->sem_wait_dst_alloc, + (e->sem_wait_cnt + 1)*sizeof(*e->sem_wait_dst)); + if (!e->sem_wait_dst) + return AVERROR(ENOMEM); - e->sem_sig = av_fast_realloc(e->sem_sig, &e->sem_sig_alloc, - (e->sem_sig_cnt + 1)*sizeof(*e->sem_sig)); - if (!e->sem_sig) - return AVERROR(ENOMEM); + e->sem_sig = av_fast_realloc(e->sem_sig, &e->sem_sig_alloc, + (e->sem_sig_cnt + 1)*sizeof(*e->sem_sig)); + if (!e->sem_sig) + return AVERROR(ENOMEM); - e->sem_wait[e->sem_wait_cnt] = f->sem[i]; - e->sem_wait_dst[e->sem_wait_cnt] = in_wait_dst_flag; - e->sem_wait_cnt++; + e->sem_wait[e->sem_wait_cnt] = f->sem; + e->sem_wait_dst[e->sem_wait_cnt] = in_wait_dst_flag; + e->sem_wait_cnt++; - e->sem_sig[e->sem_sig_cnt] = f->sem[i]; - e->sem_sig_cnt++; - } + e->sem_sig[e->sem_sig_cnt] = f->sem; + e->sem_sig_cnt++; return 0; } diff --git a/libavutil/hwcontext_vulkan.c b/libavutil/hwcontext_vulkan.c index ed88979d0d..7eb333b353 100644 --- a/libavutil/hwcontext_vulkan.c +++ b/libavutil/hwcontext_vulkan.c @@ -81,7 +81,7 @@ typedef struct AVVkFrameInternal { CUexternalMemory ext_mem[AV_NUM_DATA_POINTERS]; CUmipmappedArray cu_mma[AV_NUM_DATA_POINTERS]; CUarray cu_array[AV_NUM_DATA_POINTERS]; - CUexternalSemaphore cu_sem[AV_NUM_DATA_POINTERS]; + CUexternalSemaphore cu_sem; #endif } AVVkFrameInternal; @@ -1042,9 +1042,10 @@ static void vulkan_free_internal(AVVkFrameInternal *internal) AVCUDADeviceContextInternal *cu_internal = cuda_dev->internal; CudaFunctions *cu = cu_internal->cuda_dl; + if (internal->cu_sem) + CHECK_CU(cu->cuDestroyExternalSemaphore(internal->cu_sem)); + for (int i = 0; i < planes; i++) { - if (internal->cu_sem[i]) - CHECK_CU(cu->cuDestroyExternalSemaphore(internal->cu_sem[i])); if (internal->cu_mma[i]) CHECK_CU(cu->cuMipmappedArrayDestroy(internal->cu_mma[i])); if (internal->ext_mem[i]) @@ -1070,9 +1071,10 @@ static void vulkan_frame_free(void *opaque, uint8_t *data) for (int i = 0; i < planes; i++) { vkDestroyImage(hwctx->act_dev, f->img[i], hwctx->alloc); vkFreeMemory(hwctx->act_dev, f->mem[i], hwctx->alloc); - vkDestroySemaphore(hwctx->act_dev, f->sem[i], hwctx->alloc); } + vkDestroySemaphore(hwctx->act_dev, f->sem, hwctx->alloc); + av_free(f); } @@ -1166,8 +1168,8 @@ static int prepare_frame(AVHWFramesContext *hwfc, VulkanExecCtx *ectx, .commandBufferCount = 1, .pCommandBuffers = &ectx->buf, - .pSignalSemaphores = frame->sem, - .signalSemaphoreCount = planes, + .pSignalSemaphores = &frame->sem, + .signalSemaphoreCount = 1, }; switch (pmode) { @@ -1288,19 +1290,19 @@ static int create_frame(AVHWFramesContext *hwfc, AVVkFrame **frame, goto fail; } - /* Create semaphore */ - ret = vkCreateSemaphore(hwctx->act_dev, &sem_spawn, - hwctx->alloc, &f->sem[i]); - if (ret != VK_SUCCESS) { - av_log(hwctx, AV_LOG_ERROR, "Failed to create semaphore: %s\n", - vk_ret2str(ret)); - return AVERROR_EXTERNAL; - } - f->layout[i] = image_create_info.initialLayout; f->access[i] = 0x0; } + /* Create semaphore */ + ret = vkCreateSemaphore(hwctx->act_dev, &sem_spawn, + hwctx->alloc, &f->sem); + if (ret != VK_SUCCESS) { + av_log(hwctx, AV_LOG_ERROR, "Failed to create semaphore: %s\n", + vk_ret2str(ret)); + return AVERROR_EXTERNAL; + } + f->flags = 0x0; f->tiling = tiling; @@ -1622,9 +1624,10 @@ static void vulkan_unmap_from(AVHWFramesContext *hwfc, HWMapDescriptor *hwmap) for (int i = 0; i < planes; i++) { vkDestroyImage(hwctx->act_dev, map->frame->img[i], hwctx->alloc); vkFreeMemory(hwctx->act_dev, map->frame->mem[i], hwctx->alloc); - vkDestroySemaphore(hwctx->act_dev, map->frame->sem[i], hwctx->alloc); } + vkDestroySemaphore(hwctx->act_dev, map->frame->sem, hwctx->alloc); + av_freep(&map->frame); } @@ -1785,19 +1788,6 @@ static int vulkan_map_from_drm_frame_desc(AVHWFramesContext *hwfc, AVVkFrame **f goto fail; } - ret = vkCreateSemaphore(hwctx->act_dev, &sem_spawn, - hwctx->alloc, &f->sem[i]); - if (ret != VK_SUCCESS) { - av_log(hwctx, AV_LOG_ERROR, "Failed to create semaphore: %s\n", - vk_ret2str(ret)); - return AVERROR_EXTERNAL; - } - - /* We'd import a semaphore onto the one we created using - * vkImportSemaphoreFdKHR but unfortunately neither DRM nor VAAPI - * offer us anything we could import and sync with, so instead - * just signal the semaphore we created. */ - f->layout[i] = image_create_info.initialLayout; f->access[i] = 0x0; @@ -1818,6 +1808,19 @@ static int vulkan_map_from_drm_frame_desc(AVHWFramesContext *hwfc, AVVkFrame **f } } + ret = vkCreateSemaphore(hwctx->act_dev, &sem_spawn, + hwctx->alloc, &f->sem); + if (ret != VK_SUCCESS) { + av_log(hwctx, AV_LOG_ERROR, "Failed to create semaphore: %s\n", + vk_ret2str(ret)); + return AVERROR_EXTERNAL; + } + + /* We'd import a semaphore onto the one we created using + * vkImportSemaphoreFdKHR but unfortunately neither DRM nor VAAPI + * offer us anything we could import and sync with, so instead + * just signal the semaphore we created. */ + /* Bind the allocated memory to the images */ ret = vkBindImageMemory2(hwctx->act_dev, bind_counts, bind_info); if (ret != VK_SUCCESS) { @@ -1838,12 +1841,11 @@ static int vulkan_map_from_drm_frame_desc(AVHWFramesContext *hwfc, AVVkFrame **f return 0; fail: - for (int i = 0; i < desc->nb_layers; i++) { + for (int i = 0; i < desc->nb_layers; i++) vkDestroyImage(hwctx->act_dev, f->img[i], hwctx->alloc); - vkDestroySemaphore(hwctx->act_dev, f->sem[i], hwctx->alloc); - } for (int i = 0; i < desc->nb_objects; i++) vkFreeMemory(hwctx->act_dev, f->mem[i], hwctx->alloc); + vkDestroySemaphore(hwctx->act_dev, f->sem, hwctx->alloc); av_free(f); @@ -1953,6 +1955,15 @@ static int vulkan_export_to_cuda(AVHWFramesContext *hwfc, dst_int = dst_f->internal; if (!dst_int || !dst_int->cuda_fc_ref) { + VkSemaphoreGetFdInfoKHR sem_export = { + .sType = VK_STRUCTURE_TYPE_SEMAPHORE_GET_FD_INFO_KHR, + .semaphore = dst_f->sem, + .handleType = VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_FD_BIT, + }; + CUDA_EXTERNAL_SEMAPHORE_HANDLE_DESC ext_sem_desc = { + .type = CU_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_FD, + }; + if (!dst_f->internal) dst_f->internal = dst_int = av_mallocz(sizeof(*dst_f->internal)); @@ -1991,14 +2002,6 @@ static int vulkan_export_to_cuda(AVHWFramesContext *hwfc, .memory = dst_f->mem[i], .handleType = VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_FD_BIT_KHR, }; - VkSemaphoreGetFdInfoKHR sem_export = { - .sType = VK_STRUCTURE_TYPE_SEMAPHORE_GET_FD_INFO_KHR, - .semaphore = dst_f->sem[i], - .handleType = VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_FD_BIT, - }; - CUDA_EXTERNAL_SEMAPHORE_HANDLE_DESC ext_sem_desc = { - .type = CU_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_FD, - }; ret = pfn_vkGetMemoryFdKHR(hwctx->act_dev, &export_info, &ext_desc.handle.fd); @@ -2028,22 +2031,22 @@ static int vulkan_export_to_cuda(AVHWFramesContext *hwfc, err = AVERROR_EXTERNAL; goto fail; } + } - ret = pfn_vkGetSemaphoreFdKHR(hwctx->act_dev, &sem_export, - &ext_sem_desc.handle.fd); - if (ret != VK_SUCCESS) { - av_log(ctx, AV_LOG_ERROR, "Failed to export semaphore: %s\n", - vk_ret2str(ret)); - err = AVERROR_EXTERNAL; - goto fail; - } + ret = pfn_vkGetSemaphoreFdKHR(hwctx->act_dev, &sem_export, + &ext_sem_desc.handle.fd); + if (ret != VK_SUCCESS) { + av_log(ctx, AV_LOG_ERROR, "Failed to export semaphore: %s\n", + vk_ret2str(ret)); + err = AVERROR_EXTERNAL; + goto fail; + } - ret = CHECK_CU(cu->cuImportExternalSemaphore(&dst_int->cu_sem[i], - &ext_sem_desc)); - if (ret < 0) { - err = AVERROR_EXTERNAL; - goto fail; - } + ret = CHECK_CU(cu->cuImportExternalSemaphore(&dst_int->cu_sem, + &ext_sem_desc)); + if (ret < 0) { + err = AVERROR_EXTERNAL; + goto fail; } } @@ -2069,8 +2072,8 @@ static int vulkan_transfer_data_from_cuda(AVHWFramesContext *hwfc, AVCUDADeviceContext *cuda_dev = cuda_cu->hwctx; AVCUDADeviceContextInternal *cu_internal = cuda_dev->internal; CudaFunctions *cu = cu_internal->cuda_dl; - CUDA_EXTERNAL_SEMAPHORE_WAIT_PARAMS s_w_par[AV_NUM_DATA_POINTERS] = { 0 }; - CUDA_EXTERNAL_SEMAPHORE_SIGNAL_PARAMS s_s_par[AV_NUM_DATA_POINTERS] = { 0 }; + CUDA_EXTERNAL_SEMAPHORE_WAIT_PARAMS s_w_par = { 0 }; + CUDA_EXTERNAL_SEMAPHORE_SIGNAL_PARAMS s_s_par = { 0 }; ret = CHECK_CU(cu->cuCtxPushCurrent(cuda_dev->cuda_ctx)); if (ret < 0) { @@ -2086,8 +2089,8 @@ static int vulkan_transfer_data_from_cuda(AVHWFramesContext *hwfc, } dst_int = dst_f->internal; - ret = CHECK_CU(cu->cuWaitExternalSemaphoresAsync(dst_int->cu_sem, s_w_par, - planes, cuda_dev->stream)); + ret = CHECK_CU(cu->cuWaitExternalSemaphoresAsync(&dst_int->cu_sem, &s_w_par, + 1, cuda_dev->stream)); if (ret < 0) { err = AVERROR_EXTERNAL; goto fail; @@ -2115,8 +2118,8 @@ static int vulkan_transfer_data_from_cuda(AVHWFramesContext *hwfc, } } - ret = CHECK_CU(cu->cuSignalExternalSemaphoresAsync(dst_int->cu_sem, s_s_par, - planes, cuda_dev->stream)); + ret = CHECK_CU(cu->cuSignalExternalSemaphoresAsync(&dst_int->cu_sem, &s_s_par, + 1, cuda_dev->stream)); if (ret < 0) { err = AVERROR_EXTERNAL; goto fail; @@ -2492,11 +2495,11 @@ static int transfer_image_buf(AVHWDeviceContext *ctx, AVVkFrame *frame, .sType = VK_STRUCTURE_TYPE_SUBMIT_INFO, .commandBufferCount = 1, .pCommandBuffers = &s->cmd.buf, - .pSignalSemaphores = frame->sem, - .pWaitSemaphores = frame->sem, + .pSignalSemaphores = &frame->sem, + .pWaitSemaphores = &frame->sem, .pWaitDstStageMask = sem_wait_dst, - .signalSemaphoreCount = planes, - .waitSemaphoreCount = planes, + .signalSemaphoreCount = 1, + .waitSemaphoreCount = 1, }; ret = vkBeginCommandBuffer(s->cmd.buf, &cmd_start); diff --git a/libavutil/hwcontext_vulkan.h b/libavutil/hwcontext_vulkan.h index 01d6d98b01..ebc28916f3 100644 --- a/libavutil/hwcontext_vulkan.h +++ b/libavutil/hwcontext_vulkan.h @@ -133,10 +133,10 @@ typedef struct AVVkFrame { VkImageLayout layout[AV_NUM_DATA_POINTERS]; /** - * Per-image semaphores. Must not be freed manually. Must be waited on + * Per-frame semaphore. Must not be freed manually. Must be waited on * and signalled at every queue submission. */ - VkSemaphore sem[AV_NUM_DATA_POINTERS]; + VkSemaphore sem; /** * Internal data. -- 2.26.0