diff mbox series

[FFmpeg-devel,03/13] hwcontext_vulkan: rewrite queue picking system for the new API

Message ID 20240807213347.917235-3-dev@lynne.ee
State New
Headers show
Series [FFmpeg-devel,01/13] hwcontext_vulkan: add a new mechanism to expose used queue families | expand

Checks

Context Check Description
yinshiyou/make_loongarch64 success Make finished
yinshiyou/make_fate_loongarch64 success Make fate finished

Commit Message

Lynne Aug. 7, 2024, 9:33 p.m. UTC
This allows us to support different video ops on different queues,
as well as any other arbitrary queues we need.
---
 libavutil/hwcontext_vulkan.c | 262 ++++++++++++++++++++++-------------
 1 file changed, 167 insertions(+), 95 deletions(-)
diff mbox series

Patch

diff --git a/libavutil/hwcontext_vulkan.c b/libavutil/hwcontext_vulkan.c
index 33d856ddd3..5baf68660a 100644
--- a/libavutil/hwcontext_vulkan.c
+++ b/libavutil/hwcontext_vulkan.c
@@ -1028,16 +1028,51 @@  end:
 }
 
 /* Picks the least used qf with the fewest unneeded flags, or -1 if none found */
-static inline int pick_queue_family(VkQueueFamilyProperties *qf, uint32_t num_qf,
+static inline int pick_queue_family(VkQueueFamilyProperties2 *qf, uint32_t num_qf,
                                     VkQueueFlagBits flags)
 {
     int index = -1;
     uint32_t min_score = UINT32_MAX;
 
     for (int i = 0; i < num_qf; i++) {
-        const VkQueueFlagBits qflags = qf[i].queueFlags;
+        VkQueueFlagBits qflags = qf[i].queueFamilyProperties.queueFlags;
+
+        /* Per the spec, reporting transfer caps is optional for these 2 types */
+        if ((flags & VK_QUEUE_TRANSFER_BIT) &&
+            (qflags & (VK_QUEUE_GRAPHICS_BIT | VK_QUEUE_COMPUTE_BIT)))
+            qflags |= VK_QUEUE_TRANSFER_BIT;
+
         if (qflags & flags) {
-            uint32_t score = av_popcount(qflags) + qf[i].timestampValidBits;
+            uint32_t score = av_popcount(qflags) + qf[i].queueFamilyProperties.timestampValidBits;
+            if (score < min_score) {
+                index = i;
+                min_score = score;
+            }
+        }
+    }
+
+    if (index > -1)
+        qf[index].queueFamilyProperties.timestampValidBits++;
+
+    return index;
+}
+
+static inline int pick_video_queue_family(VkQueueFamilyProperties2 *qf,
+                                          VkQueueFamilyVideoPropertiesKHR *qf_vid, uint32_t num_qf,
+                                          VkVideoCodecOperationFlagBitsKHR flags)
+{
+    int index = -1;
+    uint32_t min_score = UINT32_MAX;
+
+    for (int i = 0; i < num_qf; i++) {
+        const VkQueueFlagBits qflags = qf[i].queueFamilyProperties.queueFlags;
+        const VkQueueFlagBits vflags = qf_vid[i].videoCodecOperations;
+
+        if (!(qflags & (VK_QUEUE_VIDEO_ENCODE_BIT_KHR | VK_QUEUE_VIDEO_DECODE_BIT_KHR)))
+            continue;
+
+        if (vflags & flags) {
+            uint32_t score = av_popcount(vflags) + qf[i].queueFamilyProperties.timestampValidBits;
             if (score < min_score) {
                 index = i;
                 min_score = score;
@@ -1046,7 +1081,7 @@  static inline int pick_queue_family(VkQueueFamilyProperties *qf, uint32_t num_qf
     }
 
     if (index > -1)
-        qf[index].timestampValidBits++;
+        qf[index].queueFamilyProperties.timestampValidBits++;
 
     return index;
 }
@@ -1054,12 +1089,12 @@  static inline int pick_queue_family(VkQueueFamilyProperties *qf, uint32_t num_qf
 static int setup_queue_families(AVHWDeviceContext *ctx, VkDeviceCreateInfo *cd)
 {
     uint32_t num;
-    float *weights;
-    VkQueueFamilyProperties *qf = NULL;
     VulkanDevicePriv *p = ctx->hwctx;
     AVVulkanDeviceContext *hwctx = &p->p;
     FFVulkanFunctions *vk = &p->vkctx.vkfn;
-    int graph_index, comp_index, tx_index, enc_index, dec_index;
+
+    VkQueueFamilyProperties2 *qf = NULL;
+    VkQueueFamilyVideoPropertiesKHR *qf_vid = NULL;
 
     /* First get the number of queue families */
     vk->GetPhysicalDeviceQueueFamilyProperties(hwctx->phys_dev, &num, NULL);
@@ -1069,118 +1104,155 @@  static int setup_queue_families(AVHWDeviceContext *ctx, VkDeviceCreateInfo *cd)
     }
 
     /* Then allocate memory */
-    qf = av_malloc_array(num, sizeof(VkQueueFamilyProperties));
+    qf = av_malloc_array(num, sizeof(VkQueueFamilyProperties2));
     if (!qf)
         return AVERROR(ENOMEM);
 
+    qf_vid = av_malloc_array(num, sizeof(VkQueueFamilyVideoPropertiesKHR));
+    if (!qf_vid)
+        return AVERROR(ENOMEM);
+
+    for (uint32_t i = 0; i < num; i++) {
+        qf_vid[i] = (VkQueueFamilyVideoPropertiesKHR) {
+            .sType = VK_STRUCTURE_TYPE_QUEUE_FAMILY_VIDEO_PROPERTIES_KHR,
+        };
+        qf[i] = (VkQueueFamilyProperties2) {
+            .sType = VK_STRUCTURE_TYPE_QUEUE_FAMILY_PROPERTIES_2,
+            .pNext = &qf_vid[i],
+        };
+    }
+
     /* Finally retrieve the queue families */
-    vk->GetPhysicalDeviceQueueFamilyProperties(hwctx->phys_dev, &num, qf);
+    vk->GetPhysicalDeviceQueueFamilyProperties2(hwctx->phys_dev, &num, qf);
 
     av_log(ctx, AV_LOG_VERBOSE, "Queue families:\n");
     for (int i = 0; i < num; i++) {
         av_log(ctx, AV_LOG_VERBOSE, "    %i:%s%s%s%s%s%s%s (queues: %i)\n", i,
-               ((qf[i].queueFlags) & VK_QUEUE_GRAPHICS_BIT) ? " graphics" : "",
-               ((qf[i].queueFlags) & VK_QUEUE_COMPUTE_BIT) ? " compute" : "",
-               ((qf[i].queueFlags) & VK_QUEUE_TRANSFER_BIT) ? " transfer" : "",
-               ((qf[i].queueFlags) & VK_QUEUE_VIDEO_ENCODE_BIT_KHR) ? " encode" : "",
-               ((qf[i].queueFlags) & VK_QUEUE_VIDEO_DECODE_BIT_KHR) ? " decode" : "",
-               ((qf[i].queueFlags) & VK_QUEUE_SPARSE_BINDING_BIT) ? " sparse" : "",
-               ((qf[i].queueFlags) & VK_QUEUE_PROTECTED_BIT) ? " protected" : "",
-               qf[i].queueCount);
+               ((qf[i].queueFamilyProperties.queueFlags) & VK_QUEUE_GRAPHICS_BIT) ? " graphics" : "",
+               ((qf[i].queueFamilyProperties.queueFlags) & VK_QUEUE_COMPUTE_BIT) ? " compute" : "",
+               ((qf[i].queueFamilyProperties.queueFlags) & VK_QUEUE_TRANSFER_BIT) ? " transfer" : "",
+               ((qf[i].queueFamilyProperties.queueFlags) & VK_QUEUE_VIDEO_ENCODE_BIT_KHR) ? " encode" : "",
+               ((qf[i].queueFamilyProperties.queueFlags) & VK_QUEUE_VIDEO_DECODE_BIT_KHR) ? " decode" : "",
+               ((qf[i].queueFamilyProperties.queueFlags) & VK_QUEUE_SPARSE_BINDING_BIT) ? " sparse" : "",
+               ((qf[i].queueFamilyProperties.queueFlags) & VK_QUEUE_PROTECTED_BIT) ? " protected" : "",
+               qf[i].queueFamilyProperties.queueCount);
 
         /* We use this field to keep a score of how many times we've used that
          * queue family in order to make better choices. */
-        qf[i].timestampValidBits = 0;
+        qf[i].queueFamilyProperties.timestampValidBits = 0;
     }
 
+    hwctx->nb_qf = 0;
+
     /* Pick each queue family to use */
-    graph_index = pick_queue_family(qf, num, VK_QUEUE_GRAPHICS_BIT);
-    comp_index  = pick_queue_family(qf, num, VK_QUEUE_COMPUTE_BIT);
-    tx_index    = pick_queue_family(qf, num, VK_QUEUE_TRANSFER_BIT);
-    enc_index   = pick_queue_family(qf, num, VK_QUEUE_VIDEO_ENCODE_BIT_KHR);
-    dec_index   = pick_queue_family(qf, num, VK_QUEUE_VIDEO_DECODE_BIT_KHR);
+#define PICK_QF(type, vid_op)                                            \
+    do {                                                                 \
+        uint32_t i;                                                      \
+        uint32_t idx;                                                    \
+                                                                         \
+        if (vid_op)                                                      \
+            idx = pick_video_queue_family(qf, qf_vid, num, vid_op);      \
+        else                                                             \
+            idx = pick_queue_family(qf, num, type);                      \
+                                                                         \
+        if (idx == -1)                                                   \
+            continue;                                                    \
+                                                                         \
+        for (i = 0; i < hwctx->nb_qf; i++) {                             \
+            if (hwctx->qf[i].idx == idx) {                               \
+                hwctx->qf[i].flags |= type;                              \
+                hwctx->qf[i].video_caps |= vid_op;                       \
+                break;                                                   \
+            }                                                            \
+        }                                                                \
+        if (i == hwctx->nb_qf) {                                         \
+            hwctx->qf[i].idx = idx;                                      \
+            hwctx->qf[i].num = qf[idx].queueFamilyProperties.queueCount; \
+            hwctx->qf[i].flags = type;                                   \
+            hwctx->qf[i].video_caps = vid_op;                            \
+            hwctx->nb_qf++;                                              \
+        }                                                                \
+    } while (0)
+
+    PICK_QF(VK_QUEUE_GRAPHICS_BIT, VK_VIDEO_CODEC_OPERATION_NONE_KHR);
+    PICK_QF(VK_QUEUE_COMPUTE_BIT, VK_VIDEO_CODEC_OPERATION_NONE_KHR);
+    PICK_QF(VK_QUEUE_TRANSFER_BIT, VK_VIDEO_CODEC_OPERATION_NONE_KHR);
+
+    PICK_QF(VK_QUEUE_VIDEO_ENCODE_BIT_KHR, VK_VIDEO_CODEC_OPERATION_ENCODE_H264_BIT_KHR);
+    PICK_QF(VK_QUEUE_VIDEO_DECODE_BIT_KHR, VK_VIDEO_CODEC_OPERATION_DECODE_H264_BIT_KHR);
+
+    PICK_QF(VK_QUEUE_VIDEO_ENCODE_BIT_KHR, VK_VIDEO_CODEC_OPERATION_ENCODE_H265_BIT_KHR);
+    PICK_QF(VK_QUEUE_VIDEO_DECODE_BIT_KHR, VK_VIDEO_CODEC_OPERATION_DECODE_H265_BIT_KHR);
+
+    PICK_QF(VK_QUEUE_VIDEO_DECODE_BIT_KHR, VK_VIDEO_CODEC_OPERATION_DECODE_AV1_BIT_KHR);
+
+    av_free(qf);
+    av_free(qf_vid);
+
+#undef PICK_QF
+
+    cd->pQueueCreateInfos = av_malloc_array(hwctx->nb_qf,
+                                            sizeof(VkDeviceQueueCreateInfo));
+    if (!cd->pQueueCreateInfos)
+        return AVERROR(ENOMEM);
+
+    for (uint32_t i = 0; i < hwctx->nb_qf; i++) {
+        int dup = 0;
+        float *weights = NULL;
+        VkDeviceQueueCreateInfo *pc;
+        for (uint32_t j = 0; j < cd->queueCreateInfoCount; j++) {
+            if (hwctx->qf[i].idx == cd->pQueueCreateInfos[j].queueFamilyIndex) {
+                dup = 1;
+                break;
+            }
+        }
+        if (dup)
+            continue;
+
+        weights = av_malloc_array(hwctx->qf[i].num, sizeof(float));
+        if (!weights) {
+            for (uint32_t j = 0; j < cd->queueCreateInfoCount; j++)
+                av_free((void *)cd->pQueueCreateInfos[i].pQueuePriorities);
+            av_free((void *)cd->pQueueCreateInfos);
+            return AVERROR(ENOMEM);
+        }
+
+        for (uint32_t j = 0; j < hwctx->qf[i].num; j++)
+            weights[j] = 1.0;
 
-    /* Signalling the transfer capabilities on a queue family is optional */
-    if (tx_index < 0) {
-        tx_index = pick_queue_family(qf, num, VK_QUEUE_COMPUTE_BIT);
-        if (tx_index < 0)
-            tx_index = pick_queue_family(qf, num, VK_QUEUE_GRAPHICS_BIT);
+        pc = (VkDeviceQueueCreateInfo *)cd->pQueueCreateInfos;
+        pc[cd->queueCreateInfoCount++] = (VkDeviceQueueCreateInfo) {
+            .sType = VK_STRUCTURE_TYPE_DEVICE_QUEUE_CREATE_INFO,
+            .queueFamilyIndex = hwctx->qf[i].idx,
+            .queueCount = hwctx->qf[i].num,
+            .pQueuePriorities = weights,
+        };
     }
 
+    /* Setup deprecated fields */
     hwctx->queue_family_index        = -1;
     hwctx->queue_family_comp_index   = -1;
     hwctx->queue_family_tx_index     = -1;
     hwctx->queue_family_encode_index = -1;
     hwctx->queue_family_decode_index = -1;
 
-#define SETUP_QUEUE(qf_idx)                                                    \
-    if (qf_idx > -1) {                                                         \
-        int fidx = qf_idx;                                                     \
-        int qc = qf[fidx].queueCount;                                          \
-        VkDeviceQueueCreateInfo *pc;                                           \
-                                                                               \
-        if (fidx == graph_index) {                                             \
-            hwctx->queue_family_index = fidx;                                  \
-            hwctx->nb_graphics_queues = qc;                                    \
-            graph_index = -1;                                                  \
-        }                                                                      \
-        if (fidx == comp_index) {                                              \
-            hwctx->queue_family_comp_index = fidx;                             \
-            hwctx->nb_comp_queues = qc;                                        \
-            comp_index = -1;                                                   \
-        }                                                                      \
-        if (fidx == tx_index) {                                                \
-            hwctx->queue_family_tx_index = fidx;                               \
-            hwctx->nb_tx_queues = qc;                                          \
-            tx_index = -1;                                                     \
-        }                                                                      \
-        if (fidx == enc_index) {                                               \
-            hwctx->queue_family_encode_index = fidx;                           \
-            hwctx->nb_encode_queues = qc;                                      \
-            enc_index = -1;                                                    \
-        }                                                                      \
-        if (fidx == dec_index) {                                               \
-            hwctx->queue_family_decode_index = fidx;                           \
-            hwctx->nb_decode_queues = qc;                                      \
-            dec_index = -1;                                                    \
-        }                                                                      \
-                                                                               \
-        pc = av_realloc((void *)cd->pQueueCreateInfos,                         \
-                        sizeof(*pc) * (cd->queueCreateInfoCount + 1));         \
-        if (!pc) {                                                             \
-            av_free(qf);                                                       \
-            return AVERROR(ENOMEM);                                            \
-        }                                                                      \
-        cd->pQueueCreateInfos = pc;                                            \
-        pc = &pc[cd->queueCreateInfoCount];                                    \
-                                                                               \
-        weights = av_malloc(qc * sizeof(float));                               \
-        if (!weights) {                                                        \
-            av_free(qf);                                                       \
-            return AVERROR(ENOMEM);                                            \
-        }                                                                      \
-                                                                               \
-        memset(pc, 0, sizeof(*pc));                                            \
-        pc->sType            = VK_STRUCTURE_TYPE_DEVICE_QUEUE_CREATE_INFO;     \
-        pc->queueFamilyIndex = fidx;                                           \
-        pc->queueCount       = qc;                                             \
-        pc->pQueuePriorities = weights;                                        \
-                                                                               \
-        for (int i = 0; i < qc; i++)                                           \
-            weights[i] = 1.0f / qc;                                            \
-                                                                               \
-        cd->queueCreateInfoCount++;                                            \
-    }
-
-    SETUP_QUEUE(graph_index)
-    SETUP_QUEUE(comp_index)
-    SETUP_QUEUE(tx_index)
-    SETUP_QUEUE(enc_index)
-    SETUP_QUEUE(dec_index)
-
-#undef SETUP_QUEUE
+#define SET_OLD_QF(field, nb_field, type)             \
+    do {                                              \
+        if (field < 0 && hwctx->qf[i].flags & type) { \
+            field = hwctx->qf[i].idx;                 \
+            nb_field = hwctx->qf[i].num;              \
+        }                                             \
+    } while (0)
 
-    av_free(qf);
+    for (uint32_t i = 0; i < hwctx->nb_qf; i++) {
+        SET_OLD_QF(hwctx->queue_family_index, hwctx->nb_graphics_queues, VK_QUEUE_GRAPHICS_BIT);
+        SET_OLD_QF(hwctx->queue_family_comp_index, hwctx->nb_comp_queues, VK_QUEUE_COMPUTE_BIT);
+        SET_OLD_QF(hwctx->queue_family_tx_index, hwctx->nb_tx_queues, VK_QUEUE_TRANSFER_BIT);
+        SET_OLD_QF(hwctx->queue_family_encode_index, hwctx->nb_encode_queues, VK_QUEUE_VIDEO_ENCODE_BIT_KHR);
+        SET_OLD_QF(hwctx->queue_family_decode_index, hwctx->nb_decode_queues, VK_QUEUE_VIDEO_DECODE_BIT_KHR);
+    }
+
+#undef SET_OLD_QF
 
     return 0;
 }