diff mbox series

[FFmpeg-devel] hwcontext_vulkan: only use one semaphore per image

Message ID M45-KEo--3-2@lynne.ee
State Accepted
Headers show
Series [FFmpeg-devel] hwcontext_vulkan: only use one semaphore per image | expand

Checks

Context Check Description
andriy/ffmpeg-patchwork success Make fate finished

Commit Message

Lynne April 4, 2020, 4:23 p.m. UTC
The idea was to allow separate planes to be filtered independently, however,
in hindsight, literaly nothing uses separate per-plane semaphores and it
would only work when each plane is backed by separate device memory.

Patch attached.
diff mbox series

Patch

From 39506e3d6d703b070d861f88d4cde5689eea7ae2 Mon Sep 17 00:00:00 2001
From: Lynne <dev@lynne.ee>
Date: Sat, 4 Apr 2020 17:19:59 +0100
Subject: [PATCH] hwcontext_vulkan: only use one semaphore per image

The idea was to allow separate planes to be filtered independently, however,
in hindsight, literaly nothing uses separate per-plane semaphores and it
would only work when each plane is backed by separate device memory.
---
 libavfilter/vulkan.c         |  38 +++++-----
 libavutil/hwcontext_vulkan.c | 131 ++++++++++++++++++-----------------
 libavutil/hwcontext_vulkan.h |   4 +-
 3 files changed, 86 insertions(+), 87 deletions(-)

diff --git a/libavfilter/vulkan.c b/libavfilter/vulkan.c
index ff76ab15e9..c103440529 100644
--- a/libavfilter/vulkan.c
+++ b/libavfilter/vulkan.c
@@ -390,32 +390,28 @@  int ff_vk_add_exec_dep(AVFilterContext *avctx, FFVkExecContext *e,
                        AVFrame *frame, VkPipelineStageFlagBits in_wait_dst_flag)
 {
     AVVkFrame *f = (AVVkFrame *)frame->data[0];
-    AVHWFramesContext *fc = (AVHWFramesContext *)frame->hw_frames_ctx->data;
-    int planes = av_pix_fmt_count_planes(fc->sw_format);
 
-    for (int i = 0; i < planes; i++) {
-        e->sem_wait = av_fast_realloc(e->sem_wait, &e->sem_wait_alloc,
-                                      (e->sem_wait_cnt + 1)*sizeof(*e->sem_wait));
-        if (!e->sem_wait)
-            return AVERROR(ENOMEM);
+    e->sem_wait = av_fast_realloc(e->sem_wait, &e->sem_wait_alloc,
+                                  (e->sem_wait_cnt + 1)*sizeof(*e->sem_wait));
+    if (!e->sem_wait)
+        return AVERROR(ENOMEM);
 
-        e->sem_wait_dst = av_fast_realloc(e->sem_wait_dst, &e->sem_wait_dst_alloc,
-                                          (e->sem_wait_cnt + 1)*sizeof(*e->sem_wait_dst));
-        if (!e->sem_wait_dst)
-            return AVERROR(ENOMEM);
+    e->sem_wait_dst = av_fast_realloc(e->sem_wait_dst, &e->sem_wait_dst_alloc,
+                                      (e->sem_wait_cnt + 1)*sizeof(*e->sem_wait_dst));
+    if (!e->sem_wait_dst)
+        return AVERROR(ENOMEM);
 
-        e->sem_sig = av_fast_realloc(e->sem_sig, &e->sem_sig_alloc,
-                                     (e->sem_sig_cnt + 1)*sizeof(*e->sem_sig));
-        if (!e->sem_sig)
-            return AVERROR(ENOMEM);
+    e->sem_sig = av_fast_realloc(e->sem_sig, &e->sem_sig_alloc,
+                                 (e->sem_sig_cnt + 1)*sizeof(*e->sem_sig));
+    if (!e->sem_sig)
+        return AVERROR(ENOMEM);
 
-        e->sem_wait[e->sem_wait_cnt] = f->sem[i];
-        e->sem_wait_dst[e->sem_wait_cnt] = in_wait_dst_flag;
-        e->sem_wait_cnt++;
+    e->sem_wait[e->sem_wait_cnt] = f->sem;
+    e->sem_wait_dst[e->sem_wait_cnt] = in_wait_dst_flag;
+    e->sem_wait_cnt++;
 
-        e->sem_sig[e->sem_sig_cnt] = f->sem[i];
-        e->sem_sig_cnt++;
-    }
+    e->sem_sig[e->sem_sig_cnt] = f->sem;
+    e->sem_sig_cnt++;
 
     return 0;
 }
diff --git a/libavutil/hwcontext_vulkan.c b/libavutil/hwcontext_vulkan.c
index ed88979d0d..7eb333b353 100644
--- a/libavutil/hwcontext_vulkan.c
+++ b/libavutil/hwcontext_vulkan.c
@@ -81,7 +81,7 @@  typedef struct AVVkFrameInternal {
     CUexternalMemory ext_mem[AV_NUM_DATA_POINTERS];
     CUmipmappedArray cu_mma[AV_NUM_DATA_POINTERS];
     CUarray cu_array[AV_NUM_DATA_POINTERS];
-    CUexternalSemaphore cu_sem[AV_NUM_DATA_POINTERS];
+    CUexternalSemaphore cu_sem;
 #endif
 } AVVkFrameInternal;
 
@@ -1042,9 +1042,10 @@  static void vulkan_free_internal(AVVkFrameInternal *internal)
         AVCUDADeviceContextInternal *cu_internal = cuda_dev->internal;
         CudaFunctions *cu = cu_internal->cuda_dl;
 
+        if (internal->cu_sem)
+            CHECK_CU(cu->cuDestroyExternalSemaphore(internal->cu_sem));
+
         for (int i = 0; i < planes; i++) {
-            if (internal->cu_sem[i])
-                CHECK_CU(cu->cuDestroyExternalSemaphore(internal->cu_sem[i]));
             if (internal->cu_mma[i])
                 CHECK_CU(cu->cuMipmappedArrayDestroy(internal->cu_mma[i]));
             if (internal->ext_mem[i])
@@ -1070,9 +1071,10 @@  static void vulkan_frame_free(void *opaque, uint8_t *data)
     for (int i = 0; i < planes; i++) {
         vkDestroyImage(hwctx->act_dev, f->img[i], hwctx->alloc);
         vkFreeMemory(hwctx->act_dev, f->mem[i], hwctx->alloc);
-        vkDestroySemaphore(hwctx->act_dev, f->sem[i], hwctx->alloc);
     }
 
+    vkDestroySemaphore(hwctx->act_dev, f->sem, hwctx->alloc);
+
     av_free(f);
 }
 
@@ -1166,8 +1168,8 @@  static int prepare_frame(AVHWFramesContext *hwfc, VulkanExecCtx *ectx,
         .commandBufferCount   = 1,
         .pCommandBuffers      = &ectx->buf,
 
-        .pSignalSemaphores    = frame->sem,
-        .signalSemaphoreCount = planes,
+        .pSignalSemaphores    = &frame->sem,
+        .signalSemaphoreCount = 1,
     };
 
     switch (pmode) {
@@ -1288,19 +1290,19 @@  static int create_frame(AVHWFramesContext *hwfc, AVVkFrame **frame,
             goto fail;
         }
 
-        /* Create semaphore */
-        ret = vkCreateSemaphore(hwctx->act_dev, &sem_spawn,
-                                hwctx->alloc, &f->sem[i]);
-        if (ret != VK_SUCCESS) {
-            av_log(hwctx, AV_LOG_ERROR, "Failed to create semaphore: %s\n",
-                   vk_ret2str(ret));
-            return AVERROR_EXTERNAL;
-        }
-
         f->layout[i] = image_create_info.initialLayout;
         f->access[i] = 0x0;
     }
 
+    /* Create semaphore */
+    ret = vkCreateSemaphore(hwctx->act_dev, &sem_spawn,
+                            hwctx->alloc, &f->sem);
+    if (ret != VK_SUCCESS) {
+        av_log(hwctx, AV_LOG_ERROR, "Failed to create semaphore: %s\n",
+               vk_ret2str(ret));
+        return AVERROR_EXTERNAL;
+    }
+
     f->flags     = 0x0;
     f->tiling    = tiling;
 
@@ -1622,9 +1624,10 @@  static void vulkan_unmap_from(AVHWFramesContext *hwfc, HWMapDescriptor *hwmap)
     for (int i = 0; i < planes; i++) {
         vkDestroyImage(hwctx->act_dev, map->frame->img[i], hwctx->alloc);
         vkFreeMemory(hwctx->act_dev, map->frame->mem[i], hwctx->alloc);
-        vkDestroySemaphore(hwctx->act_dev, map->frame->sem[i], hwctx->alloc);
     }
 
+    vkDestroySemaphore(hwctx->act_dev, map->frame->sem, hwctx->alloc);
+
     av_freep(&map->frame);
 }
 
@@ -1785,19 +1788,6 @@  static int vulkan_map_from_drm_frame_desc(AVHWFramesContext *hwfc, AVVkFrame **f
             goto fail;
         }
 
-        ret = vkCreateSemaphore(hwctx->act_dev, &sem_spawn,
-                                hwctx->alloc, &f->sem[i]);
-        if (ret != VK_SUCCESS) {
-            av_log(hwctx, AV_LOG_ERROR, "Failed to create semaphore: %s\n",
-                   vk_ret2str(ret));
-            return AVERROR_EXTERNAL;
-        }
-
-        /* We'd import a semaphore onto the one we created using
-         * vkImportSemaphoreFdKHR but unfortunately neither DRM nor VAAPI
-         * offer us anything we could import and sync with, so instead
-         * just signal the semaphore we created. */
-
         f->layout[i] = image_create_info.initialLayout;
         f->access[i] = 0x0;
 
@@ -1818,6 +1808,19 @@  static int vulkan_map_from_drm_frame_desc(AVHWFramesContext *hwfc, AVVkFrame **f
         }
     }
 
+    ret = vkCreateSemaphore(hwctx->act_dev, &sem_spawn,
+                            hwctx->alloc, &f->sem);
+    if (ret != VK_SUCCESS) {
+        av_log(hwctx, AV_LOG_ERROR, "Failed to create semaphore: %s\n",
+               vk_ret2str(ret));
+        return AVERROR_EXTERNAL;
+    }
+
+    /* We'd import a semaphore onto the one we created using
+     * vkImportSemaphoreFdKHR but unfortunately neither DRM nor VAAPI
+     * offer us anything we could import and sync with, so instead
+     * just signal the semaphore we created. */
+
     /* Bind the allocated memory to the images */
     ret = vkBindImageMemory2(hwctx->act_dev, bind_counts, bind_info);
     if (ret != VK_SUCCESS) {
@@ -1838,12 +1841,11 @@  static int vulkan_map_from_drm_frame_desc(AVHWFramesContext *hwfc, AVVkFrame **f
     return 0;
 
 fail:
-    for (int i = 0; i < desc->nb_layers; i++) {
+    for (int i = 0; i < desc->nb_layers; i++)
         vkDestroyImage(hwctx->act_dev, f->img[i], hwctx->alloc);
-        vkDestroySemaphore(hwctx->act_dev, f->sem[i], hwctx->alloc);
-    }
     for (int i = 0; i < desc->nb_objects; i++)
         vkFreeMemory(hwctx->act_dev, f->mem[i], hwctx->alloc);
+    vkDestroySemaphore(hwctx->act_dev, f->sem, hwctx->alloc);
 
     av_free(f);
 
@@ -1953,6 +1955,15 @@  static int vulkan_export_to_cuda(AVHWFramesContext *hwfc,
 
     dst_int = dst_f->internal;
     if (!dst_int || !dst_int->cuda_fc_ref) {
+        VkSemaphoreGetFdInfoKHR sem_export = {
+            .sType = VK_STRUCTURE_TYPE_SEMAPHORE_GET_FD_INFO_KHR,
+            .semaphore = dst_f->sem,
+            .handleType = VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_FD_BIT,
+        };
+        CUDA_EXTERNAL_SEMAPHORE_HANDLE_DESC ext_sem_desc = {
+            .type = CU_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_FD,
+        };
+
         if (!dst_f->internal)
             dst_f->internal = dst_int = av_mallocz(sizeof(*dst_f->internal));
 
@@ -1991,14 +2002,6 @@  static int vulkan_export_to_cuda(AVHWFramesContext *hwfc,
                 .memory     = dst_f->mem[i],
                 .handleType = VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_FD_BIT_KHR,
             };
-            VkSemaphoreGetFdInfoKHR sem_export = {
-                .sType = VK_STRUCTURE_TYPE_SEMAPHORE_GET_FD_INFO_KHR,
-                .semaphore = dst_f->sem[i],
-                .handleType = VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_FD_BIT,
-            };
-            CUDA_EXTERNAL_SEMAPHORE_HANDLE_DESC ext_sem_desc = {
-                .type = CU_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_FD,
-            };
 
             ret = pfn_vkGetMemoryFdKHR(hwctx->act_dev, &export_info,
                                        &ext_desc.handle.fd);
@@ -2028,22 +2031,22 @@  static int vulkan_export_to_cuda(AVHWFramesContext *hwfc,
                 err = AVERROR_EXTERNAL;
                 goto fail;
             }
+        }
 
-            ret = pfn_vkGetSemaphoreFdKHR(hwctx->act_dev, &sem_export,
-                                          &ext_sem_desc.handle.fd);
-            if (ret != VK_SUCCESS) {
-                av_log(ctx, AV_LOG_ERROR, "Failed to export semaphore: %s\n",
-                       vk_ret2str(ret));
-                err = AVERROR_EXTERNAL;
-                goto fail;
-            }
+        ret = pfn_vkGetSemaphoreFdKHR(hwctx->act_dev, &sem_export,
+                                      &ext_sem_desc.handle.fd);
+        if (ret != VK_SUCCESS) {
+            av_log(ctx, AV_LOG_ERROR, "Failed to export semaphore: %s\n",
+                   vk_ret2str(ret));
+            err = AVERROR_EXTERNAL;
+            goto fail;
+        }
 
-            ret = CHECK_CU(cu->cuImportExternalSemaphore(&dst_int->cu_sem[i],
-                                                         &ext_sem_desc));
-            if (ret < 0) {
-                err = AVERROR_EXTERNAL;
-                goto fail;
-            }
+        ret = CHECK_CU(cu->cuImportExternalSemaphore(&dst_int->cu_sem,
+                                                     &ext_sem_desc));
+        if (ret < 0) {
+            err = AVERROR_EXTERNAL;
+            goto fail;
         }
     }
 
@@ -2069,8 +2072,8 @@  static int vulkan_transfer_data_from_cuda(AVHWFramesContext *hwfc,
     AVCUDADeviceContext *cuda_dev = cuda_cu->hwctx;
     AVCUDADeviceContextInternal *cu_internal = cuda_dev->internal;
     CudaFunctions *cu = cu_internal->cuda_dl;
-    CUDA_EXTERNAL_SEMAPHORE_WAIT_PARAMS s_w_par[AV_NUM_DATA_POINTERS] = { 0 };
-    CUDA_EXTERNAL_SEMAPHORE_SIGNAL_PARAMS s_s_par[AV_NUM_DATA_POINTERS] = { 0 };
+    CUDA_EXTERNAL_SEMAPHORE_WAIT_PARAMS s_w_par = { 0 };
+    CUDA_EXTERNAL_SEMAPHORE_SIGNAL_PARAMS s_s_par = { 0 };
 
     ret = CHECK_CU(cu->cuCtxPushCurrent(cuda_dev->cuda_ctx));
     if (ret < 0) {
@@ -2086,8 +2089,8 @@  static int vulkan_transfer_data_from_cuda(AVHWFramesContext *hwfc,
     }
     dst_int = dst_f->internal;
 
-    ret = CHECK_CU(cu->cuWaitExternalSemaphoresAsync(dst_int->cu_sem, s_w_par,
-                                                     planes, cuda_dev->stream));
+    ret = CHECK_CU(cu->cuWaitExternalSemaphoresAsync(&dst_int->cu_sem, &s_w_par,
+                                                     1, cuda_dev->stream));
     if (ret < 0) {
         err = AVERROR_EXTERNAL;
         goto fail;
@@ -2115,8 +2118,8 @@  static int vulkan_transfer_data_from_cuda(AVHWFramesContext *hwfc,
         }
     }
 
-    ret = CHECK_CU(cu->cuSignalExternalSemaphoresAsync(dst_int->cu_sem, s_s_par,
-                                                       planes, cuda_dev->stream));
+    ret = CHECK_CU(cu->cuSignalExternalSemaphoresAsync(&dst_int->cu_sem, &s_s_par,
+                                                       1, cuda_dev->stream));
     if (ret < 0) {
         err = AVERROR_EXTERNAL;
         goto fail;
@@ -2492,11 +2495,11 @@  static int transfer_image_buf(AVHWDeviceContext *ctx, AVVkFrame *frame,
         .sType                = VK_STRUCTURE_TYPE_SUBMIT_INFO,
         .commandBufferCount   = 1,
         .pCommandBuffers      = &s->cmd.buf,
-        .pSignalSemaphores    = frame->sem,
-        .pWaitSemaphores      = frame->sem,
+        .pSignalSemaphores    = &frame->sem,
+        .pWaitSemaphores      = &frame->sem,
         .pWaitDstStageMask    = sem_wait_dst,
-        .signalSemaphoreCount = planes,
-        .waitSemaphoreCount   = planes,
+        .signalSemaphoreCount = 1,
+        .waitSemaphoreCount   = 1,
     };
 
     ret = vkBeginCommandBuffer(s->cmd.buf, &cmd_start);
diff --git a/libavutil/hwcontext_vulkan.h b/libavutil/hwcontext_vulkan.h
index 01d6d98b01..ebc28916f3 100644
--- a/libavutil/hwcontext_vulkan.h
+++ b/libavutil/hwcontext_vulkan.h
@@ -133,10 +133,10 @@  typedef struct AVVkFrame {
     VkImageLayout layout[AV_NUM_DATA_POINTERS];
 
     /**
-     * Per-image semaphores. Must not be freed manually. Must be waited on
+     * Per-frame semaphore. Must not be freed manually. Must be waited on
      * and signalled at every queue submission.
      */
-    VkSemaphore sem[AV_NUM_DATA_POINTERS];
+    VkSemaphore sem;
 
     /**
      * Internal data.
-- 
2.26.0