diff mbox series

[FFmpeg-devel,55/97] Vulkan patchset part 2 - hwcontext rewrite and filtering

Message ID NTnyn9K--3-9@lynne.ee
State New
Headers show
Series None | expand

Commit Message

Lynne April 24, 2023, 3:56 p.m. UTC
This is part two of the vulkan patchset, which contains all the
hwcontext and vulkan.c rewrites, and filtering changes.

55 patches attached.

Comments

Niklas Haas April 28, 2023, 1:28 p.m. UTC | #1
On Mon, 24 Apr 2023 17:56:38 +0200 Lynne <dev@lynne.ee> wrote:
> This is part two of the vulkan patchset, which contains all the
> hwcontext and vulkan.c rewrites, and filtering changes.
> 
> 55 patches attached.

The new Vulkan code looks like a clear improvement over the status quo
to me. I tested it locally and everything works as expected. The new
synchronization code LGTM overall.

As noted over IRC I think it specifies unnecessarily strict
execution/memory dependencies, though I think it's better to merge a
working state first than to play the microoptimization game at this
stage. (It's also worth noting that this code is still significantly
slower than vf_libplacebo's native hwupload/hwdownload primitives, for
as-of-yet-to-be-determined reasons)

That said, I'm still in favor of this subset of your patchset being
merged soon, since it fixes major issues that prevent the current Vulkan
code on master from working properly with mpv/libplacebo.
Lynne May 10, 2023, 7:10 p.m. UTC | #2
Apr 24, 2023, 17:57 by dev@lynne.ee:

> This is part two of the vulkan patchset, which contains all the
> hwcontext and vulkan.c rewrites, and filtering changes.
>
> 55 patches attached.
>

Final patchset pushed here https://github.com/cyanreg/FFmpeg/tree/vulkan
and attached as a single .tar.gz file, as it's a few Kb larger than the maximum
attachment size of 1MiB.
Anton Khirnov May 11, 2023, 3:36 p.m. UTC | #3
Quoting Lynne (2023-04-24 17:56:38)
> From 1de5bf4281b19847fc45556431850d772180269e Mon Sep 17 00:00:00 2001
> From: Lynne <dev@lynne.ee>
> Date: Wed, 23 Nov 2022 15:15:04 +0100
> Subject: [PATCH 23/97] hwcontext_vulkan: initialize and require instance
>  version 1.3

Some comments on why is this needed and what it implies would be nice.
Anton Khirnov May 11, 2023, 4:03 p.m. UTC | #4
Quoting Lynne (2023-04-24 17:56:38)
> From b0c429d0d77d1789b6349bc6b296449ae1f8e9da Mon Sep 17 00:00:00 2001
> From: Lynne <dev@lynne.ee>
> Date: Tue, 15 Mar 2022 23:00:32 +0100
> Subject: [PATCH 26/97] hwcontext_vulkan: support threadsafe queue and frame
>  operations
> 
> ---
>  libavutil/hwcontext_vulkan.c | 176 +++++++++++++++++++++++++----------
>  libavutil/hwcontext_vulkan.h |  40 +++++++-
>  2 files changed, 167 insertions(+), 49 deletions(-)
> 
> diff --git a/libavutil/hwcontext_vulkan.c b/libavutil/hwcontext_vulkan.c
> index 894b4b83f3..b0db59b2d8 100644
> --- a/libavutil/hwcontext_vulkan.c
> +++ b/libavutil/hwcontext_vulkan.c
> @@ -27,6 +27,7 @@
>  #include <dlfcn.h>
>  #endif
>  
> +#include <pthread.h>
>  #include <unistd.h>
>  
>  #include "config.h"
> @@ -92,8 +93,10 @@ typedef struct VulkanDevicePriv {
>      VkPhysicalDeviceVulkan13Features device_features_1_3;
>  
>      /* Queues */
> -    uint32_t qfs[5];
> -    int num_qfs;
> +    pthread_mutex_t **qf_mutex;
> +    int nb_tot_qfs;
> +    uint32_t img_qfs[5];
> +    int nb_img_qfs;

This patch would be so much more readable without random renamings.

>      /* Debug callback */
>      VkDebugUtilsMessengerEXT debug_ctx;
> @@ -127,6 +130,8 @@ typedef struct VulkanFramesPriv {
>  } VulkanFramesPriv;
>  
>  typedef struct AVVkFrameInternal {
> +    pthread_mutex_t update_mutex;

As far as I can see, none of the mutices you're adding here are
ever destroyed.

> +
>  #if CONFIG_CUDA
>      /* Importing external memory into cuda is really expensive so we keep the
>       * memory imported all the time */
> @@ -1304,6 +1309,10 @@ static void vulkan_device_free(AVHWDeviceContext *ctx)
>      if (p->libvulkan)
>          dlclose(p->libvulkan);
>  
> +    for (int i = 0; i < p->nb_tot_qfs; i++)
> +        av_freep(&p->qf_mutex[i]);
> +    av_freep(&p->qf_mutex);
> +
>      RELEASE_PROPS(hwctx->enabled_inst_extensions, hwctx->nb_enabled_inst_extensions);
>      RELEASE_PROPS(hwctx->enabled_dev_extensions, hwctx->nb_enabled_dev_extensions);
>  }
> @@ -1436,13 +1445,26 @@ end:
>      return err;
>  }
>  
> +static void lock_queue(AVHWDeviceContext *ctx, int queue_family, int index)

It'd be nice to be consistent with types.
These are uint32 in vulkan, no?

> +{
> +    VulkanDevicePriv *p = ctx->internal->priv;
> +    pthread_mutex_lock(&p->qf_mutex[queue_family][index]);
> +}
> +
> +static void unlock_queue(AVHWDeviceContext *ctx, int queue_family, int index)
> +{
> +    VulkanDevicePriv *p = ctx->internal->priv;
> +    pthread_mutex_unlock(&p->qf_mutex[queue_family][index]);
> +}
> +
>  static int vulkan_device_init(AVHWDeviceContext *ctx)
>  {
>      int err;
> -    uint32_t queue_num;
> +    uint32_t qf_num;
>      AVVulkanDeviceContext *hwctx = ctx->hwctx;
>      VulkanDevicePriv *p = ctx->internal->priv;
>      FFVulkanFunctions *vk = &p->vkfn;
> +    VkQueueFamilyProperties *qf;
>      int graph_index, comp_index, tx_index, enc_index, dec_index;
>  
>      /* Set device extension flags */
> @@ -1481,12 +1503,31 @@ static int vulkan_device_init(AVHWDeviceContext *ctx)
>      p->dev_is_nvidia = (p->props.properties.vendorID == 0x10de);
>      p->dev_is_intel  = (p->props.properties.vendorID == 0x8086);
>  
> -    vk->GetPhysicalDeviceQueueFamilyProperties(hwctx->phys_dev, &queue_num, NULL);
> -    if (!queue_num) {
> +    vk->GetPhysicalDeviceQueueFamilyProperties(hwctx->phys_dev, &qf_num, NULL);
> +    if (!qf_num) {
>          av_log(ctx, AV_LOG_ERROR, "Failed to get queues!\n");
>          return AVERROR_EXTERNAL;
>      }
>  
> +    qf = av_malloc_array(qf_num, sizeof(VkQueueFamilyProperties));
> +    if (!qf)
> +        return AVERROR(ENOMEM);
> +
> +    vk->GetPhysicalDeviceQueueFamilyProperties(hwctx->phys_dev, &qf_num, qf);
> +
> +    p->qf_mutex = av_mallocz(qf_num*sizeof(*p->qf_mutex));

av_calloc()

> +    if (!p->qf_mutex)
> +        return AVERROR(ENOMEM);
> +    p->nb_tot_qfs = qf_num;
> +
> +    for (int i = 0; i < qf_num; i++) {
> +        p->qf_mutex[i] = av_mallocz(qf[i].queueCount*sizeof(**p->qf_mutex));

av_calloc()

> +        if (!p->qf_mutex[i])
> +            return AVERROR(ENOMEM);
> +        for (int j = 0; j < qf[i].queueCount; j++)
> +            pthread_mutex_init(&p->qf_mutex[i][j], NULL);

Should be checked.

> +    }
> +
>      graph_index = hwctx->queue_family_index;
>      comp_index  = hwctx->queue_family_comp_index;
>      tx_index    = hwctx->queue_family_tx_index;
> @@ -1501,9 +1542,9 @@ static int vulkan_device_init(AVHWDeviceContext *ctx)
>              return AVERROR(EINVAL);                                                             \
>          } else if (fidx < 0 || ctx_qf < 0) {                                                    \
>              break;                                                                              \
> -        } else if (ctx_qf >= queue_num) {                                                       \
> +        } else if (ctx_qf >= qf_num) {                                                          \
>              av_log(ctx, AV_LOG_ERROR, "Invalid %s family index %i (device has %i families)!\n", \
> -                   type, ctx_qf, queue_num);                                                    \
> +                   type, ctx_qf, qf_num);                                                       \
>              return AVERROR(EINVAL);                                                             \
>          }                                                                                       \
>                                                                                                  \
> @@ -1520,7 +1561,7 @@ static int vulkan_device_init(AVHWDeviceContext *ctx)
>          tx_index    = (ctx_qf == tx_index)    ? -1 : tx_index;                                  \
>          enc_index   = (ctx_qf == enc_index)   ? -1 : enc_index;                                 \
>          dec_index   = (ctx_qf == dec_index)   ? -1 : dec_index;                                 \
> -        p->qfs[p->num_qfs++] = ctx_qf;                                                          \
> +        p->img_qfs[p->nb_img_qfs++] = ctx_qf;                                                   \
>      } while (0)
>  
>      CHECK_QUEUE("graphics", 0, graph_index, hwctx->queue_family_index,        hwctx->nb_graphics_queues);
> @@ -1531,6 +1572,11 @@ static int vulkan_device_init(AVHWDeviceContext *ctx)
>  
>  #undef CHECK_QUEUE
>  
> +    if (!hwctx->lock_queue)
> +        hwctx->lock_queue = lock_queue;
> +    if (!hwctx->unlock_queue)
> +        hwctx->unlock_queue = unlock_queue;
> +
>      /* Get device capabilities */
>      vk->GetPhysicalDeviceMemoryProperties(hwctx->phys_dev, &p->mprops);
>  
> @@ -1732,9 +1778,6 @@ static void vulkan_free_internal(AVVkFrame *f)
>  {
>      AVVkFrameInternal *internal = f->internal;
>  
> -    if (!internal)
> -        return;
> -
>  #if CONFIG_CUDA
>      if (internal->cuda_fc_ref) {
>          AVHWFramesContext *cuda_fc = (AVHWFramesContext *)internal->cuda_fc_ref->data;
> @@ -1923,9 +1966,11 @@ static int prepare_frame(AVHWFramesContext *hwfc, VulkanExecCtx *ectx,
>      uint32_t src_qf, dst_qf;
>      VkImageLayout new_layout;
>      VkAccessFlags new_access;
> +    AVVulkanFramesContext *vkfc = hwfc->hwctx;
>      const int planes = av_pix_fmt_count_planes(hwfc->sw_format);
>      VulkanDevicePriv *p = hwfc->device_ctx->internal->priv;
>      FFVulkanFunctions *vk = &p->vkfn;
> +    AVFrame tmp = { .data[0] = (uint8_t *)frame };

???
Anton Khirnov May 11, 2023, 4:05 p.m. UTC | #5
Quoting Lynne (2023-04-24 17:56:38)
> From c50347a552f5c7c2e3fcf20ef9a1ad4f1a419918 Mon Sep 17 00:00:00 2001
> From: Lynne <dev@lynne.ee>
> Date: Wed, 23 Nov 2022 20:32:49 +0100
> Subject: [PATCH 27/97] hwcontext_vulkan: remove contiguous memory hack
> 

This needs a lot more context.

What was the hack, why was it added, why is it being removed and what
will be affected by that.

> diff --git a/libavutil/hwcontext_vulkan.h b/libavutil/hwcontext_vulkan.h
> index 406d8709c3..e89fa52927 100644
> --- a/libavutil/hwcontext_vulkan.h
> +++ b/libavutil/hwcontext_vulkan.h
> @@ -160,9 +160,7 @@ typedef enum AVVkFrameFlags {
>       * device and tiling during av_hwframe_ctx_init(). */
>      AV_VK_FRAME_FLAG_NONE              = (1ULL << 0),
>  
> -    /* Image planes will be allocated in a single VkDeviceMemory, rather
> -     * than as per-plane VkDeviceMemory allocations. Required for exporting
> -     * to VAAPI on Intel devices. */
> +    /* DEPRECATED: does nothing. */
>      AV_VK_FRAME_FLAG_CONTIGUOUS_MEMORY = (1ULL << 1),

If it's deprecated then it should actually be deprecated.
Anton Khirnov May 11, 2023, 4:06 p.m. UTC | #6
Quoting Lynne (2023-04-24 17:56:38)
> From 287ec5138511a4760f2c66e94bd80f794cd9f7a3 Mon Sep 17 00:00:00 2001
> From: Lynne <dev@lynne.ee>
> Date: Wed, 23 Nov 2022 20:35:51 +0100
> Subject: [PATCH 28/97] hwcontext_vulkan: rename vk_pixfmt_map to
>  vk_pixfmt_planar_map
> 
> ---
>  libavutil/hwcontext_vulkan.c | 10 +++++-----
>  1 file changed, 5 insertions(+), 5 deletions(-)
> 
> diff --git a/libavutil/hwcontext_vulkan.c b/libavutil/hwcontext_vulkan.c
> index 67b4357dd1..9eacbb4d2e 100644
> --- a/libavutil/hwcontext_vulkan.c
> +++ b/libavutil/hwcontext_vulkan.c
> @@ -167,8 +167,8 @@ typedef struct AVVkFrameInternal {
>  
>  static const struct {
>      enum AVPixelFormat pixfmt;
> -    const VkFormat vkfmts[4];
> -} vk_pixfmt_map[] = {
> +    const VkFormat vkfmts[5];

???
This is not a rename.
Anton Khirnov May 11, 2023, 4:14 p.m. UTC | #7
Quoting Lynne (2023-04-24 17:56:38)
> From 956f043e9f233675856336e028cc8ee7e35c71f5 Mon Sep 17 00:00:00 2001
> From: Lynne <dev@lynne.ee>
> Date: Wed, 23 Nov 2022 14:04:28 +0100
> Subject: [PATCH 38/97] vulkan: lock queues before submitting operations
> 
> ---
>  libavutil/vulkan.c | 7 +++++++
>  1 file changed, 7 insertions(+)
> 
> diff --git a/libavutil/vulkan.c b/libavutil/vulkan.c
> index 6bf2c214b7..ad13b8f3cb 100644
> --- a/libavutil/vulkan.c
> +++ b/libavutil/vulkan.c
> @@ -625,7 +625,14 @@ int ff_vk_submit_exec_queue(FFVulkanContext *s, FFVkExecContext *e)
>          return AVERROR_EXTERNAL;
>      }
>  
> +    s->hwctx->lock_queue((AVHWDeviceContext *)s->device_ref->data,
> +                         e->qf->queue_family, e->qf->cur_queue % e->qf->actual_queues);
> +
>      ret = vk->QueueSubmit(q->queue, 1, &s_info, q->fence);
> +
> +    s->hwctx->unlock_queue((AVHWDeviceContext *)s->device_ref->data,
> +                           e->qf->queue_family, e->qf->cur_queue % e->qf->actual_queues);
> +

Should this patch be right after the one that adds these functions?
Anton Khirnov May 11, 2023, 4:15 p.m. UTC | #8
Quoting Lynne (2023-04-24 17:56:38)
> From d81aa7b001995a8cf65590934a7b75a51a63b192 Mon Sep 17 00:00:00 2001
> From: Lynne <dev@lynne.ee>
> Date: Wed, 23 Nov 2022 14:04:48 +0100
> Subject: [PATCH 39/97] vulkan: define VK_NO_PROTOTYPES

Some context on what this does and why is it needed would be nice.
Anton Khirnov May 11, 2023, 4:21 p.m. UTC | #9
Quoting Lynne (2023-04-24 17:56:38)
> From e20962a956444224b34d82f9a5936fae7e43bdf6 Mon Sep 17 00:00:00 2001
> From: Lynne <dev@lynne.ee>
> Date: Thu, 15 Dec 2022 17:43:27 +0100
> Subject: [PATCH 47/97] vulkan: allow alloc pNext in ff_vk_create_buf
> 
> ---
>  libavutil/vulkan.c | 5 +++--
>  libavutil/vulkan.h | 3 ++-
>  2 files changed, 5 insertions(+), 3 deletions(-)
> 
> diff --git a/libavutil/vulkan.c b/libavutil/vulkan.c
> index b1553c6537..0bb5b1eebf 100644
> --- a/libavutil/vulkan.c
> +++ b/libavutil/vulkan.c
> @@ -232,7 +232,8 @@ int ff_vk_alloc_mem(FFVulkanContext *s, VkMemoryRequirements *req,
>      return 0;
>  }
>  
> -int ff_vk_create_buf(FFVulkanContext *s, FFVkBuffer *buf, size_t size, void *pNext,
> +int ff_vk_create_buf(FFVulkanContext *s, FFVkBuffer *buf, size_t size,
> +                     void *pNext, void *alloc_pNext,
>                       VkBufferUsageFlags usage, VkMemoryPropertyFlagBits flags)
>  {
>      int err;
> @@ -254,7 +255,7 @@ int ff_vk_create_buf(FFVulkanContext *s, FFVkBuffer *buf, size_t size, void *pNe
>      };
>      VkMemoryDedicatedAllocateInfo ded_alloc = {
>          .sType = VK_STRUCTURE_TYPE_MEMORY_DEDICATED_ALLOCATE_INFO,
> -        .pNext = NULL,
> +        .pNext = alloc_pNext,
>      };
>      VkMemoryDedicatedRequirements ded_req = {
>          .sType = VK_STRUCTURE_TYPE_MEMORY_DEDICATED_REQUIREMENTS,
> diff --git a/libavutil/vulkan.h b/libavutil/vulkan.h
> index 85836a7807..d75be26977 100644
> --- a/libavutil/vulkan.h
> +++ b/libavutil/vulkan.h
> @@ -413,7 +413,8 @@ int ff_vk_submit_exec_queue(FFVulkanContext *s, FFVkExecContext *e);
>  /**
>   * Create a VkBuffer with the specified parameters.
>   */
> -int ff_vk_create_buf(FFVulkanContext *s, FFVkBuffer *buf, size_t size, void *pNext,
> +int ff_vk_create_buf(FFVulkanContext *s, FFVkBuffer *buf, size_t size,
> +                     void *pNext, void *alloc_pNext,
>                       VkBufferUsageFlags usage, VkMemoryPropertyFlagBits flags);

Shouldn't you be updating all the callers of this function here?
Anton Khirnov May 11, 2023, 4:29 p.m. UTC | #10
Quoting Lynne (2023-04-24 17:56:38)
> From 786a7d08bc90a88f77057fc31d0943dcb91e4558 Mon Sep 17 00:00:00 2001
> From: Lynne <dev@lynne.ee>
> Date: Thu, 22 Dec 2022 17:37:51 +0100
> Subject: [PATCH 53/97] vulkan: add support for retrieving queue, query and
>  video properties
> 
> ---
>  libavutil/vulkan.c           | 87 ++++++++++++++++++++++++++++++------
>  libavutil/vulkan.h           | 14 ++++--
>  libavutil/vulkan_functions.h |  1 +
>  3 files changed, 85 insertions(+), 17 deletions(-)
> 
> diff --git a/libavutil/vulkan.c b/libavutil/vulkan.c
> index de0c300c0e..d045ff83c1 100644
> --- a/libavutil/vulkan.c
> +++ b/libavutil/vulkan.c
> @@ -108,8 +108,9 @@ const char *ff_vk_ret2str(VkResult res)
>  #undef CASE
>  }
>  
> -void ff_vk_load_props(FFVulkanContext *s)
> +int ff_vk_load_props(FFVulkanContext *s)
>  {
> +    uint32_t qc = 0;
>      FFVulkanFunctions *vk = &s->vkfn;
>  
>      s->driver_props = (VkPhysicalDeviceDriverProperties) {
> @@ -120,8 +121,48 @@ void ff_vk_load_props(FFVulkanContext *s)
>          .pNext = &s->driver_props,
>      };
>  
> +
>      vk->GetPhysicalDeviceProperties2(s->hwctx->phys_dev, &s->props);
>      vk->GetPhysicalDeviceMemoryProperties(s->hwctx->phys_dev, &s->mprops);
> +    vk->GetPhysicalDeviceQueueFamilyProperties2(s->hwctx->phys_dev, &qc, s->qf_props);
> +
> +    if (s->qf_props)
> +        return 0;
> +
> +    s->qf_props = av_mallocz(sizeof(*s->qf_props)*qc);

av_calloc()

Also, wouldn't it be better to allocate a single array of
{ qf_props; query_props; video_props; }

> diff --git a/libavutil/vulkan.h b/libavutil/vulkan.h
> index 4bd1c9fc00..4c38dbc2e6 100644
> --- a/libavutil/vulkan.h
> +++ b/libavutil/vulkan.h
> @@ -216,6 +216,9 @@ typedef struct FFVulkanContext {
>      VkPhysicalDeviceProperties2 props;
>      VkPhysicalDeviceDriverProperties driver_props;
>      VkPhysicalDeviceMemoryProperties mprops;
> +    VkQueueFamilyQueryResultStatusPropertiesKHR *query_props;
> +    VkQueueFamilyVideoPropertiesKHR *video_props;
> +    VkQueueFamilyProperties2 *qf_props;

How does the user of these know how many elements are in each array?
Lynne May 11, 2023, 4:32 p.m. UTC | #11
May 11, 2023, 17:36 by anton@khirnov.net:

> Quoting Lynne (2023-04-24 17:56:38)
>
>> From 1de5bf4281b19847fc45556431850d772180269e Mon Sep 17 00:00:00 2001
>> From: Lynne <dev@lynne.ee>
>> Date: Wed, 23 Nov 2022 15:15:04 +0100
>> Subject: [PATCH 23/97] hwcontext_vulkan: initialize and require instance
>>  version 1.3
>>
>
> Some comments on why is this needed and what it implies would be nice.
>

This just bumps the required loader library version (libvulkan).
All device-related features, such as video decoding, atomics, etc.
are still optional and the code deals with their loss on a local level
(e.g. the decoder or filter checks for the features it needs, not
the hwcontext).
Bumping the required version essentially packs all maintenance
extensions which correct the spec rather than requiring to enable
them individually.
Anton Khirnov May 11, 2023, 4:34 p.m. UTC | #12
Quoting Lynne (2023-04-24 17:56:38)
> From 6b5301aa29b63b90d04505c9386822b2e207a038 Mon Sep 17 00:00:00 2001
> From: Lynne <dev@lynne.ee>
> Date: Thu, 29 Dec 2022 21:16:21 +0100
> Subject: [PATCH 55/97] vulkan: rewrite to support all necessary features
> 
> ---
>  libavutil/vulkan.c           | 2145 ++++++++++++++++++----------------
>  libavutil/vulkan.h           |  515 ++++----
>  libavutil/vulkan_functions.h |    1 +
>  3 files changed, 1344 insertions(+), 1317 deletions(-)

lol

We stopped doing development like this 15 years ago.
Anton Khirnov May 11, 2023, 4:34 p.m. UTC | #13
Quoting Lynne (2023-04-24 17:56:38)
> From 89e47afc304aaf01c9c25a328ddfde37873e1f89 Mon Sep 17 00:00:00 2001
> From: Lynne <dev@lynne.ee>
> Date: Wed, 11 Jan 2023 09:37:35 +0100
> Subject: [PATCH 59/97] hwcontext_vulkan: rewrite to support multiplane
>  surfaces
> 
> ---
>  libavutil/hwcontext_vulkan.c | 791 +++++++++++++++++++----------------
>  libavutil/hwcontext_vulkan.h |  73 ++--
>  2 files changed, 474 insertions(+), 390 deletions(-)

Again, lol. Not to menion an ABI break.
Anton Khirnov May 11, 2023, 4:40 p.m. UTC | #14
Quoting Lynne (2023-04-24 17:56:38)
> @@ -3685,8 +3547,9 @@ static int vulkan_map_from(AVHWFramesContext *hwfc, AVFrame *dst,
>  #endif
>  #endif
>      default:
> -        return vulkan_map_frame_to_mem(hwfc, dst, src, flags);
> +        break;

This seems like it's also removing the ability to map to memory at all.
Lynne May 11, 2023, 4:40 p.m. UTC | #15
May 11, 2023, 18:05 by anton@khirnov.net:

> Quoting Lynne (2023-04-24 17:56:38)
>
>> From c50347a552f5c7c2e3fcf20ef9a1ad4f1a419918 Mon Sep 17 00:00:00 2001
>> From: Lynne <dev@lynne.ee>
>> Date: Wed, 23 Nov 2022 20:32:49 +0100
>> Subject: [PATCH 27/97] hwcontext_vulkan: remove contiguous memory hack
>>
>
> This needs a lot more context.
>
> What was the hack, why was it added, why is it being removed and what
> will be affected by that.
>

The hack was added to enable exporting of vulkan images to DRM.
On Intel hardware, specifically for DRM images, all planes must be
allocated next to each other, due to hardware limitation, so the hack
used a single large allocation and suballocated all planes from it.

By natively supporting multiplane images, the driver is what decides
the layout, so exporting just works.

It's a hack because it conflicted heavily with image allocation, and
with the whole ecosystem in general, before multiplane images were
supported, which just made it redundant.

This is also the commit which broke the hwcontext hardest and prompted
the entire rewrite in the first place.


>> diff --git a/libavutil/hwcontext_vulkan.h b/libavutil/hwcontext_vulkan.h
>> index 406d8709c3..e89fa52927 100644
>> --- a/libavutil/hwcontext_vulkan.h
>> +++ b/libavutil/hwcontext_vulkan.h
>> @@ -160,9 +160,7 @@ typedef enum AVVkFrameFlags {
>>  * device and tiling during av_hwframe_ctx_init(). */
>>  AV_VK_FRAME_FLAG_NONE              = (1ULL << 0),
>>  
>> -    /* Image planes will be allocated in a single VkDeviceMemory, rather
>> -     * than as per-plane VkDeviceMemory allocations. Required for exporting
>> -     * to VAAPI on Intel devices. */
>> +    /* DEPRECATED: does nothing. */
>>  AV_VK_FRAME_FLAG_CONTIGUOUS_MEMORY = (1ULL << 1),
>>
>
> If it's deprecated then it should actually be deprecated.
>

Done.
Lynne May 11, 2023, 4:45 p.m. UTC | #16
May 11, 2023, 18:06 by anton@khirnov.net:

> Quoting Lynne (2023-04-24 17:56:38)
>
>> From 287ec5138511a4760f2c66e94bd80f794cd9f7a3 Mon Sep 17 00:00:00 2001
>> From: Lynne <dev@lynne.ee>
>> Date: Wed, 23 Nov 2022 20:35:51 +0100
>> Subject: [PATCH 28/97] hwcontext_vulkan: rename vk_pixfmt_map to
>>  vk_pixfmt_planar_map
>>
>> ---
>>  libavutil/hwcontext_vulkan.c | 10 +++++-----
>>  1 file changed, 5 insertions(+), 5 deletions(-)
>>
>> diff --git a/libavutil/hwcontext_vulkan.c b/libavutil/hwcontext_vulkan.c
>> index 67b4357dd1..9eacbb4d2e 100644
>> --- a/libavutil/hwcontext_vulkan.c
>> +++ b/libavutil/hwcontext_vulkan.c
>> @@ -167,8 +167,8 @@ typedef struct AVVkFrameInternal {
>>  
>>  static const struct {
>>  enum AVPixelFormat pixfmt;
>> -    const VkFormat vkfmts[4];
>> -} vk_pixfmt_map[] = {
>> +    const VkFormat vkfmts[5];
>>
>
> ???
> This is not a rename.
>

fixed
Lynne May 11, 2023, 4:47 p.m. UTC | #17
May 11, 2023, 18:14 by anton@khirnov.net:

> Quoting Lynne (2023-04-24 17:56:38)
>
>> From 956f043e9f233675856336e028cc8ee7e35c71f5 Mon Sep 17 00:00:00 2001
>> From: Lynne <dev@lynne.ee>
>> Date: Wed, 23 Nov 2022 14:04:28 +0100
>> Subject: [PATCH 38/97] vulkan: lock queues before submitting operations
>>
>> ---
>>  libavutil/vulkan.c | 7 +++++++
>>  1 file changed, 7 insertions(+)
>>
>> diff --git a/libavutil/vulkan.c b/libavutil/vulkan.c
>> index 6bf2c214b7..ad13b8f3cb 100644
>> --- a/libavutil/vulkan.c
>> +++ b/libavutil/vulkan.c
>> @@ -625,7 +625,14 @@ int ff_vk_submit_exec_queue(FFVulkanContext *s, FFVkExecContext *e)
>>  return AVERROR_EXTERNAL;
>>  }
>>  
>> +    s->hwctx->lock_queue((AVHWDeviceContext *)s->device_ref->data,
>> +                         e->qf->queue_family, e->qf->cur_queue % e->qf->actual_queues);
>> +
>>  ret = vk->QueueSubmit(q->queue, 1, &s_info, q->fence);
>> +
>> +    s->hwctx->unlock_queue((AVHWDeviceContext *)s->device_ref->data,
>> +                           e->qf->queue_family, e->qf->cur_queue % e->qf->actual_queues);
>> +
>>
>
> Should this patch be right after the one that adds these functions?
>

Yes. The patch before added support for them to the hwcontext.
This patch uses them in vulkan.c, which, at the given position in
the patchset is still an independent component only used for lavfi.
I can squash them, but I'd prefer not to, though no strong feelings
about it.
Lynne May 11, 2023, 4:50 p.m. UTC | #18
May 11, 2023, 18:15 by anton@khirnov.net:

> Quoting Lynne (2023-04-24 17:56:38)
>
>> From d81aa7b001995a8cf65590934a7b75a51a63b192 Mon Sep 17 00:00:00 2001
>> From: Lynne <dev@lynne.ee>
>> Date: Wed, 23 Nov 2022 14:04:48 +0100
>> Subject: [PATCH 39/97] vulkan: define VK_NO_PROTOTYPES
>>
>
> Some context on what this does and why is it needed would be nice.
>

This just disables the vulkan headers from defining any symbols
like vkCmdPipelineBarrier2(). Instead, all functions must be loaded
via the loader and used as function pointers as vk->CmdPipelineBarrier2.Mostly just forces developers to write correct code, as using the
symbols can be undesirable in case API users define their own
function wrappers via the loader API.
Anton Khirnov May 11, 2023, 4:59 p.m. UTC | #19
Quoting Lynne (2023-05-11 18:32:48)
> May 11, 2023, 17:36 by anton@khirnov.net:
> 
> > Quoting Lynne (2023-04-24 17:56:38)
> >
> >> From 1de5bf4281b19847fc45556431850d772180269e Mon Sep 17 00:00:00 2001
> >> From: Lynne <dev@lynne.ee>
> >> Date: Wed, 23 Nov 2022 15:15:04 +0100
> >> Subject: [PATCH 23/97] hwcontext_vulkan: initialize and require instance
> >>  version 1.3
> >>
> >
> > Some comments on why is this needed and what it implies would be nice.
> >
> 
> This just bumps the required loader library version (libvulkan).
> All device-related features, such as video decoding, atomics, etc.
> are still optional and the code deals with their loss on a local level
> (e.g. the decoder or filter checks for the features it needs, not
> the hwcontext).
> Bumping the required version essentially packs all maintenance
> extensions which correct the spec rather than requiring to enable
> them individually.

So just write in the commit message a short comment on what this gives
us and why is it worth forcing the users to update their loader.
Anton Khirnov May 11, 2023, 5 p.m. UTC | #20
Quoting Lynne (2023-05-11 18:40:54)
> May 11, 2023, 18:05 by anton@khirnov.net:
> 
> > Quoting Lynne (2023-04-24 17:56:38)
> >
> >> From c50347a552f5c7c2e3fcf20ef9a1ad4f1a419918 Mon Sep 17 00:00:00 2001
> >> From: Lynne <dev@lynne.ee>
> >> Date: Wed, 23 Nov 2022 20:32:49 +0100
> >> Subject: [PATCH 27/97] hwcontext_vulkan: remove contiguous memory hack
> >>
> >
> > This needs a lot more context.
> >
> > What was the hack, why was it added, why is it being removed and what
> > will be affected by that.
> >
> 
> The hack was added to enable exporting of vulkan images to DRM.
> On Intel hardware, specifically for DRM images, all planes must be
> allocated next to each other, due to hardware limitation, so the hack
> used a single large allocation and suballocated all planes from it.
> 
> By natively supporting multiplane images, the driver is what decides
> the layout, so exporting just works.
> 
> It's a hack because it conflicted heavily with image allocation, and
> with the whole ecosystem in general, before multiplane images were
> supported, which just made it redundant.
> 
> This is also the commit which broke the hwcontext hardest and prompted
> the entire rewrite in the first place.

So take this paragraph and put it in the commit message.
Lynne May 11, 2023, 5:12 p.m. UTC | #21
May 11, 2023, 18:34 by anton@khirnov.net:

> Quoting Lynne (2023-04-24 17:56:38)
>
>> From 6b5301aa29b63b90d04505c9386822b2e207a038 Mon Sep 17 00:00:00 2001
>> From: Lynne <dev@lynne.ee>
>> Date: Thu, 29 Dec 2022 21:16:21 +0100
>> Subject: [PATCH 55/97] vulkan: rewrite to support all necessary features
>>
>> ---
>>  libavutil/vulkan.c           | 2145 ++++++++++++++++++----------------
>>  libavutil/vulkan.h           |  515 ++++----
>>  libavutil/vulkan_functions.h |    1 +
>>  3 files changed, 1344 insertions(+), 1317 deletions(-)
>>
>
> lol
>
> We stopped doing development like this 15 years ago.
>

First, I'm criticized for having too many patches. Now, I'm criticized for
having too few. There's no winning.
This touches EVERYTHING, and so it's titled appropriately. If you dislike
the word in the commit message, I can change it to something with
more marketing impact.
Anton Khirnov May 11, 2023, 5:13 p.m. UTC | #22
Quoting Lynne (2023-05-11 18:47:46)
> May 11, 2023, 18:14 by anton@khirnov.net:
> 
> > Quoting Lynne (2023-04-24 17:56:38)
> >
> >> From 956f043e9f233675856336e028cc8ee7e35c71f5 Mon Sep 17 00:00:00 2001
> >> From: Lynne <dev@lynne.ee>
> >> Date: Wed, 23 Nov 2022 14:04:28 +0100
> >> Subject: [PATCH 38/97] vulkan: lock queues before submitting operations
> >>
> >> ---
> >>  libavutil/vulkan.c | 7 +++++++
> >>  1 file changed, 7 insertions(+)
> >>
> >> diff --git a/libavutil/vulkan.c b/libavutil/vulkan.c
> >> index 6bf2c214b7..ad13b8f3cb 100644
> >> --- a/libavutil/vulkan.c
> >> +++ b/libavutil/vulkan.c
> >> @@ -625,7 +625,14 @@ int ff_vk_submit_exec_queue(FFVulkanContext *s, FFVkExecContext *e)
> >>  return AVERROR_EXTERNAL;
> >>  }
> >>  
> >> +    s->hwctx->lock_queue((AVHWDeviceContext *)s->device_ref->data,
> >> +                         e->qf->queue_family, e->qf->cur_queue % e->qf->actual_queues);
> >> +
> >>  ret = vk->QueueSubmit(q->queue, 1, &s_info, q->fence);
> >> +
> >> +    s->hwctx->unlock_queue((AVHWDeviceContext *)s->device_ref->data,
> >> +                           e->qf->queue_family, e->qf->cur_queue % e->qf->actual_queues);
> >> +
> >>
> >
> > Should this patch be right after the one that adds these functions?
> >
> 
> Yes. The patch before added support for them to the hwcontext.
> This patch uses them in vulkan.c, which, at the given position in
> the patchset is still an independent component only used for lavfi.
> I can squash them, but I'd prefer not to, though no strong feelings
> about it.

I'm not asking you to squash it, I'm asking you to reorder it so that it
immediately follows the patch that adds the locking functions, and isn't
ordered 12 patches later for no apparent reason.
Lynne May 11, 2023, 5:16 p.m. UTC | #23
May 11, 2023, 18:34 by anton@khirnov.net:

> Quoting Lynne (2023-04-24 17:56:38)
>
>> From 89e47afc304aaf01c9c25a328ddfde37873e1f89 Mon Sep 17 00:00:00 2001
>> From: Lynne <dev@lynne.ee>
>> Date: Wed, 11 Jan 2023 09:37:35 +0100
>> Subject: [PATCH 59/97] hwcontext_vulkan: rewrite to support multiplane
>>  surfaces
>>
>> ---
>>  libavutil/hwcontext_vulkan.c | 791 +++++++++++++++++++----------------
>>  libavutil/hwcontext_vulkan.h |  73 ++--
>>  2 files changed, 474 insertions(+), 390 deletions(-)
>>
>
> Again, lol. Not to menion an ABI break.
>

I admit, it's an ABI break as it adds fields introduced before
a few commits prior in the same patch series.
But given that the patchset as a whole does not break the
ABI, I'd like to keep this, as the order is more appropriate,
and swapping the order of the two commits is impossible
without squashing literally every single commit in between
into a giant patch whose commit message starts with a re-
word.
Anton Khirnov May 11, 2023, 5:19 p.m. UTC | #24
Quoting Lynne (2023-05-11 19:12:56)
> May 11, 2023, 18:34 by anton@khirnov.net:
> 
> > Quoting Lynne (2023-04-24 17:56:38)
> >
> >> From 6b5301aa29b63b90d04505c9386822b2e207a038 Mon Sep 17 00:00:00 2001
> >> From: Lynne <dev@lynne.ee>
> >> Date: Thu, 29 Dec 2022 21:16:21 +0100
> >> Subject: [PATCH 55/97] vulkan: rewrite to support all necessary features
> >>
> >> ---
> >>  libavutil/vulkan.c           | 2145 ++++++++++++++++++----------------
> >>  libavutil/vulkan.h           |  515 ++++----
> >>  libavutil/vulkan_functions.h |    1 +
> >>  3 files changed, 1344 insertions(+), 1317 deletions(-)
> >>
> >
> > lol
> >
> > We stopped doing development like this 15 years ago.
> >
> 
> First, I'm criticized for having too many patches. Now, I'm criticized for
> having too few. There's no winning.

I have no issue with too many patches - my work on ffmpeg CLI is over
200 commits this year alone. Small patches are reviewable, this
horrorshow is not.

> This touches EVERYTHING, and so it's titled appropriately. If you dislike
> the word in the commit message, I can change it to something with
> more marketing impact.

No, I dislike the fact that there's a giant uberpatch that rewrites
everything with zero explanation or justification.

Ideally, it should be a series of small individually reviewable changes.
If that is for some reason not feasible, the commit message should
contain
* good justification for why it is not feasible
* detailed explanation on what exactly is being done and why
Lynne May 11, 2023, 5:20 p.m. UTC | #25
May 11, 2023, 18:40 by anton@khirnov.net:

> Quoting Lynne (2023-04-24 17:56:38)
>
>> @@ -3685,8 +3547,9 @@ static int vulkan_map_from(AVHWFramesContext *hwfc, AVFrame *dst,
>>  #endif
>>  #endif
>>  default:
>> -        return vulkan_map_frame_to_mem(hwfc, dst, src, flags);
>> +        break;
>>
>
> This seems like it's also removing the ability to map to memory at all.
>

It is. Due to the driver deciding the layout of multiplane images
(which are used by default), it's not spec-valid to map the memory
used. Rather than keeping complicated code which receives no
use at all, I decided to remove it.
Anton Khirnov May 11, 2023, 5:27 p.m. UTC | #26
Quoting Lynne (2023-05-11 19:20:45)
> May 11, 2023, 18:40 by anton@khirnov.net:
> 
> > Quoting Lynne (2023-04-24 17:56:38)
> >
> >> @@ -3685,8 +3547,9 @@ static int vulkan_map_from(AVHWFramesContext *hwfc, AVFrame *dst,
> >>  #endif
> >>  #endif
> >>  default:
> >> -        return vulkan_map_frame_to_mem(hwfc, dst, src, flags);
> >> +        break;
> >>
> >
> > This seems like it's also removing the ability to map to memory at all.
> >
> 
> It is. Due to the driver deciding the layout of multiplane images
> (which are used by default), it's not spec-valid to map the memory
> used. Rather than keeping complicated code which receives no
> use at all, I decided to remove it.

That should be stated more clearly in the commit message then, along
with the reason for dropping it.
Lynne May 11, 2023, 6:13 p.m. UTC | #27
May 11, 2023, 18:30 by anton@khirnov.net:

> Quoting Lynne (2023-04-24 17:56:38)
>
>> From 786a7d08bc90a88f77057fc31d0943dcb91e4558 Mon Sep 17 00:00:00 2001
>> From: Lynne <dev@lynne.ee>
>> Date: Thu, 22 Dec 2022 17:37:51 +0100
>> Subject: [PATCH 53/97] vulkan: add support for retrieving queue, query and
>>  video properties
>>
>> ---
>>  libavutil/vulkan.c           | 87 ++++++++++++++++++++++++++++++------
>>  libavutil/vulkan.h           | 14 ++++--
>>  libavutil/vulkan_functions.h |  1 +
>>  3 files changed, 85 insertions(+), 17 deletions(-)
>>
>> diff --git a/libavutil/vulkan.c b/libavutil/vulkan.c
>> index de0c300c0e..d045ff83c1 100644
>> --- a/libavutil/vulkan.c
>> +++ b/libavutil/vulkan.c
>> @@ -108,8 +108,9 @@ const char *ff_vk_ret2str(VkResult res)
>>  #undef CASE
>>  }
>>  
>> -void ff_vk_load_props(FFVulkanContext *s)
>> +int ff_vk_load_props(FFVulkanContext *s)
>>  {
>> +    uint32_t qc = 0;
>>  FFVulkanFunctions *vk = &s->vkfn;
>>  
>>  s->driver_props = (VkPhysicalDeviceDriverProperties) {
>> @@ -120,8 +121,48 @@ void ff_vk_load_props(FFVulkanContext *s)
>>  .pNext = &s->driver_props,
>>  };
>>  
>> +
>>  vk->GetPhysicalDeviceProperties2(s->hwctx->phys_dev, &s->props);
>>  vk->GetPhysicalDeviceMemoryProperties(s->hwctx->phys_dev, &s->mprops);
>> +    vk->GetPhysicalDeviceQueueFamilyProperties2(s->hwctx->phys_dev, &qc, s->qf_props);
>> +
>> +    if (s->qf_props)
>> +        return 0;
>> +
>> +    s->qf_props = av_mallocz(sizeof(*s->qf_props)*qc);
>>
>
> av_calloc()
>

Fixed (for all 3).


>
> Also, wouldn't it be better to allocate a single array of
> { qf_props; query_props; video_props; }
>

No, the way they're read requires the array to be contiguous.


>> diff --git a/libavutil/vulkan.h b/libavutil/vulkan.h
>> index 4bd1c9fc00..4c38dbc2e6 100644
>> --- a/libavutil/vulkan.h
>> +++ b/libavutil/vulkan.h
>> @@ -216,6 +216,9 @@ typedef struct FFVulkanContext {
>>  VkPhysicalDeviceProperties2 props;
>>  VkPhysicalDeviceDriverProperties driver_props;
>>  VkPhysicalDeviceMemoryProperties mprops;
>> +    VkQueueFamilyQueryResultStatusPropertiesKHR *query_props;
>> +    VkQueueFamilyVideoPropertiesKHR *video_props;
>> +    VkQueueFamilyProperties2 *qf_props;
>>
>
> How does the user of these know how many elements are in each array?
>

They don't have to, we read the total number of queues available
for the device, so all indices are always available.
Lynne May 11, 2023, 6:55 p.m. UTC | #28
May 11, 2023, 18:04 by anton@khirnov.net:

> Quoting Lynne (2023-04-24 17:56:38)
>
>> From b0c429d0d77d1789b6349bc6b296449ae1f8e9da Mon Sep 17 00:00:00 2001
>> From: Lynne <dev@lynne.ee>
>> Date: Tue, 15 Mar 2022 23:00:32 +0100
>> Subject: [PATCH 26/97] hwcontext_vulkan: support threadsafe queue and frame
>>  operations
>>
>> ---
>>  libavutil/hwcontext_vulkan.c | 176 +++++++++++++++++++++++++----------
>>  libavutil/hwcontext_vulkan.h |  40 +++++++-
>>  2 files changed, 167 insertions(+), 49 deletions(-)
>>
>> diff --git a/libavutil/hwcontext_vulkan.c b/libavutil/hwcontext_vulkan.c
>> index 894b4b83f3..b0db59b2d8 100644
>> --- a/libavutil/hwcontext_vulkan.c
>> +++ b/libavutil/hwcontext_vulkan.c
>> @@ -27,6 +27,7 @@
>>  #include <dlfcn.h>
>>  #endif
>>  
>> +#include <pthread.h>
>>  #include <unistd.h>
>>  
>>  #include "config.h"
>> @@ -92,8 +93,10 @@ typedef struct VulkanDevicePriv {
>>  VkPhysicalDeviceVulkan13Features device_features_1_3;
>>  
>>  /* Queues */
>> -    uint32_t qfs[5];
>> -    int num_qfs;
>> +    pthread_mutex_t **qf_mutex;
>> +    int nb_tot_qfs;
>> +    uint32_t img_qfs[5];
>> +    int nb_img_qfs;
>>
>
> This patch would be so much more readable without random renamings.
>

They're not random, the meaning of each variable is different
to what they meant before.
nb_img_qfs is the total number of enabled queue familiesnb_tot_qfs is the total number of queue families listed by the driver


>> /* Debug callback */
>>  VkDebugUtilsMessengerEXT debug_ctx;
>> @@ -127,6 +130,8 @@ typedef struct VulkanFramesPriv {
>>  } VulkanFramesPriv;
>>  
>>  typedef struct AVVkFrameInternal {
>> +    pthread_mutex_t update_mutex;
>>
>
> As far as I can see, none of the mutices you're adding here are
> ever destroyed.
>

Fixed.


>> +
>>  #if CONFIG_CUDA
>>  /* Importing external memory into cuda is really expensive so we keep the
>>  * memory imported all the time */
>> @@ -1304,6 +1309,10 @@ static void vulkan_device_free(AVHWDeviceContext *ctx)
>>  if (p->libvulkan)
>>  dlclose(p->libvulkan);
>>  
>> +    for (int i = 0; i < p->nb_tot_qfs; i++)
>> +        av_freep(&p->qf_mutex[i]);
>> +    av_freep(&p->qf_mutex);
>> +
>>  RELEASE_PROPS(hwctx->enabled_inst_extensions, hwctx->nb_enabled_inst_extensions);
>>  RELEASE_PROPS(hwctx->enabled_dev_extensions, hwctx->nb_enabled_dev_extensions);
>>  }
>> @@ -1436,13 +1445,26 @@ end:
>>  return err;
>>  }
>>  
>> +static void lock_queue(AVHWDeviceContext *ctx, int queue_family, int index)
>>
>
> It'd be nice to be consistent with types.
> These are uint32 in vulkan, no?
>

Fixed. Though, they're more closely related to the
number of queue families given in the hwcontext, which
are 32-bit ints.


>> +{
>> +    VulkanDevicePriv *p = ctx->internal->priv;
>> +    pthread_mutex_lock(&p->qf_mutex[queue_family][index]);
>> +}
>> +
>> +static void unlock_queue(AVHWDeviceContext *ctx, int queue_family, int index)
>> +{
>> +    VulkanDevicePriv *p = ctx->internal->priv;
>> +    pthread_mutex_unlock(&p->qf_mutex[queue_family][index]);
>> +}
>> +
>>  static int vulkan_device_init(AVHWDeviceContext *ctx)
>>  {
>>  int err;
>> -    uint32_t queue_num;
>> +    uint32_t qf_num;
>>  AVVulkanDeviceContext *hwctx = ctx->hwctx;
>>  VulkanDevicePriv *p = ctx->internal->priv;
>>  FFVulkanFunctions *vk = &p->vkfn;
>> +    VkQueueFamilyProperties *qf;
>>  int graph_index, comp_index, tx_index, enc_index, dec_index;
>>  
>>  /* Set device extension flags */
>> @@ -1481,12 +1503,31 @@ static int vulkan_device_init(AVHWDeviceContext *ctx)
>>  p->dev_is_nvidia = (p->props.properties.vendorID == 0x10de);
>>  p->dev_is_intel  = (p->props.properties.vendorID == 0x8086);
>>  
>> -    vk->GetPhysicalDeviceQueueFamilyProperties(hwctx->phys_dev, &queue_num, NULL);
>> -    if (!queue_num) {
>> +    vk->GetPhysicalDeviceQueueFamilyProperties(hwctx->phys_dev, &qf_num, NULL);
>> +    if (!qf_num) {
>>  av_log(ctx, AV_LOG_ERROR, "Failed to get queues!\n");
>>  return AVERROR_EXTERNAL;
>>  }
>>  
>> +    qf = av_malloc_array(qf_num, sizeof(VkQueueFamilyProperties));
>> +    if (!qf)
>> +        return AVERROR(ENOMEM);
>> +
>> +    vk->GetPhysicalDeviceQueueFamilyProperties(hwctx->phys_dev, &qf_num, qf);
>> +
>> +    p->qf_mutex = av_mallocz(qf_num*sizeof(*p->qf_mutex));
>>
>
> av_calloc()
>
>> +    if (!p->qf_mutex)
>> +        return AVERROR(ENOMEM);
>> +    p->nb_tot_qfs = qf_num;
>> +
>> +    for (int i = 0; i < qf_num; i++) {
>> +        p->qf_mutex[i] = av_mallocz(qf[i].queueCount*sizeof(**p->qf_mutex));
>>
>
> av_calloc()
>
>> +        if (!p->qf_mutex[i])
>> +            return AVERROR(ENOMEM);
>> +        for (int j = 0; j < qf[i].queueCount; j++)
>> +            pthread_mutex_init(&p->qf_mutex[i][j], NULL);
>>
>
> Should be checked.
>

Fixed all three.


>> +    }
>> +
>>  graph_index = hwctx->queue_family_index;
>>  comp_index  = hwctx->queue_family_comp_index;
>>  tx_index    = hwctx->queue_family_tx_index;
>> @@ -1501,9 +1542,9 @@ static int vulkan_device_init(AVHWDeviceContext *ctx)
>>  return AVERROR(EINVAL);                                                             \
>>  } else if (fidx < 0 || ctx_qf < 0) {                                                    \
>>  break;                                                                              \
>> -        } else if (ctx_qf >= queue_num) {                                                       \
>> +        } else if (ctx_qf >= qf_num) {                                                          \
>>  av_log(ctx, AV_LOG_ERROR, "Invalid %s family index %i (device has %i families)!\n", \
>> -                   type, ctx_qf, queue_num);                                                    \
>> +                   type, ctx_qf, qf_num);                                                       \
>>  return AVERROR(EINVAL);                                                             \
>>  }                                                                                       \
>>  \
>> @@ -1520,7 +1561,7 @@ static int vulkan_device_init(AVHWDeviceContext *ctx)
>>  tx_index    = (ctx_qf == tx_index)    ? -1 : tx_index;                                  \
>>  enc_index   = (ctx_qf == enc_index)   ? -1 : enc_index;                                 \
>>  dec_index   = (ctx_qf == dec_index)   ? -1 : dec_index;                                 \
>> -        p->qfs[p->num_qfs++] = ctx_qf;                                                          \
>> +        p->img_qfs[p->nb_img_qfs++] = ctx_qf;                                                   \
>>  } while (0)
>>  
>>  CHECK_QUEUE("graphics", 0, graph_index, hwctx->queue_family_index,        hwctx->nb_graphics_queues);
>> @@ -1531,6 +1572,11 @@ static int vulkan_device_init(AVHWDeviceContext *ctx)
>>  
>>  #undef CHECK_QUEUE
>>  
>> +    if (!hwctx->lock_queue)
>> +        hwctx->lock_queue = lock_queue;
>> +    if (!hwctx->unlock_queue)
>> +        hwctx->unlock_queue = unlock_queue;
>> +
>>  /* Get device capabilities */
>>  vk->GetPhysicalDeviceMemoryProperties(hwctx->phys_dev, &p->mprops);
>>  
>> @@ -1732,9 +1778,6 @@ static void vulkan_free_internal(AVVkFrame *f)
>>  {
>>  AVVkFrameInternal *internal = f->internal;
>>  
>> -    if (!internal)
>> -        return;
>> -
>>  #if CONFIG_CUDA
>>  if (internal->cuda_fc_ref) {
>>  AVHWFramesContext *cuda_fc = (AVHWFramesContext *)internal->cuda_fc_ref->data;
>> @@ -1923,9 +1966,11 @@ static int prepare_frame(AVHWFramesContext *hwfc, VulkanExecCtx *ectx,
>>  uint32_t src_qf, dst_qf;
>>  VkImageLayout new_layout;
>>  VkAccessFlags new_access;
>> +    AVVulkanFramesContext *vkfc = hwfc->hwctx;
>>  const int planes = av_pix_fmt_count_planes(hwfc->sw_format);
>>  VulkanDevicePriv *p = hwfc->device_ctx->internal->priv;
>>  FFVulkanFunctions *vk = &p->vkfn;
>> +    AVFrame tmp = { .data[0] = (uint8_t *)frame };
>>
>
> ???
>

This enables us to use the common dependency/dispatch code.
The prepare_frame function is used for both frame initialization
and frame import/export queue family transfer operations.
In the former case, no AVFrame exists yet, so, as this is purely
libavutil code, we create a temporary frame on stack. Otherwise,
we'd need to allocate multiple frames somewhere, one for each
possible command buffer dispatch.

Comment added to commit message.
Lynne May 11, 2023, 6:58 p.m. UTC | #29
May 11, 2023, 18:22 by anton@khirnov.net:

> Quoting Lynne (2023-04-24 17:56:38)
>
>> From e20962a956444224b34d82f9a5936fae7e43bdf6 Mon Sep 17 00:00:00 2001
>> From: Lynne <dev@lynne.ee>
>> Date: Thu, 15 Dec 2022 17:43:27 +0100
>> Subject: [PATCH 47/97] vulkan: allow alloc pNext in ff_vk_create_buf
>>
>> ---
>>  libavutil/vulkan.c | 5 +++--
>>  libavutil/vulkan.h | 3 ++-
>>  2 files changed, 5 insertions(+), 3 deletions(-)
>>
>> diff --git a/libavutil/vulkan.c b/libavutil/vulkan.c
>> index b1553c6537..0bb5b1eebf 100644
>> --- a/libavutil/vulkan.c
>> +++ b/libavutil/vulkan.c
>> @@ -232,7 +232,8 @@ int ff_vk_alloc_mem(FFVulkanContext *s, VkMemoryRequirements *req,
>>  return 0;
>>  }
>>  
>> -int ff_vk_create_buf(FFVulkanContext *s, FFVkBuffer *buf, size_t size, void *pNext,
>> +int ff_vk_create_buf(FFVulkanContext *s, FFVkBuffer *buf, size_t size,
>> +                     void *pNext, void *alloc_pNext,
>>  VkBufferUsageFlags usage, VkMemoryPropertyFlagBits flags)
>>  {
>>  int err;
>> @@ -254,7 +255,7 @@ int ff_vk_create_buf(FFVulkanContext *s, FFVkBuffer *buf, size_t size, void *pNe
>>  };
>>  VkMemoryDedicatedAllocateInfo ded_alloc = {
>>  .sType = VK_STRUCTURE_TYPE_MEMORY_DEDICATED_ALLOCATE_INFO,
>> -        .pNext = NULL,
>> +        .pNext = alloc_pNext,
>>  };
>>  VkMemoryDedicatedRequirements ded_req = {
>>  .sType = VK_STRUCTURE_TYPE_MEMORY_DEDICATED_REQUIREMENTS,
>> diff --git a/libavutil/vulkan.h b/libavutil/vulkan.h
>> index 85836a7807..d75be26977 100644
>> --- a/libavutil/vulkan.h
>> +++ b/libavutil/vulkan.h
>> @@ -413,7 +413,8 @@ int ff_vk_submit_exec_queue(FFVulkanContext *s, FFVkExecContext *e);
>>  /**
>>  * Create a VkBuffer with the specified parameters.
>>  */
>> -int ff_vk_create_buf(FFVulkanContext *s, FFVkBuffer *buf, size_t size, void *pNext,
>> +int ff_vk_create_buf(FFVulkanContext *s, FFVkBuffer *buf, size_t size,
>> +                     void *pNext, void *alloc_pNext,
>>  VkBufferUsageFlags usage, VkMemoryPropertyFlagBits flags);
>>
>
> Shouldn't you be updating all the callers of this function here?
>

All callers of the function were in filters in libavfilter, and I wanted to keep all
filter commits in libavfilter separate.
Lynne May 11, 2023, 7:11 p.m. UTC | #30
May 11, 2023, 19:27 by anton@khirnov.net:

> Quoting Lynne (2023-05-11 19:20:45)
>
>> May 11, 2023, 18:40 by anton@khirnov.net:
>>
>> > Quoting Lynne (2023-04-24 17:56:38)
>> >
>> >> @@ -3685,8 +3547,9 @@ static int vulkan_map_from(AVHWFramesContext *hwfc, AVFrame *dst,
>> >>  #endif
>> >>  #endif
>> >>  default:
>> >> -        return vulkan_map_frame_to_mem(hwfc, dst, src, flags);
>> >> +        break;
>> >>
>> >
>> > This seems like it's also removing the ability to map to memory at all.
>> >
>>
>> It is. Due to the driver deciding the layout of multiplane images
>> (which are used by default), it's not spec-valid to map the memory
>> used. Rather than keeping complicated code which receives no
>> use at all, I decided to remove it.
>>
>
> That should be stated more clearly in the commit message then, along
> with the reason for dropping it.
>

Branch updated with all feedback.
The top unsquashed commits remains as-is because I need
to know whether "lavc/decode: allow to allocate hwaccel_priv_data early"
is okay, as the commits are cleanups which would make reverting
their squashing bug/time-consuming.
Anton Khirnov May 16, 2023, 1:31 p.m. UTC | #31
Quoting Lynne (2023-05-11 20:55:40)
> May 11, 2023, 18:04 by anton@khirnov.net:
> 
> > Quoting Lynne (2023-04-24 17:56:38)
> >
> >> From b0c429d0d77d1789b6349bc6b296449ae1f8e9da Mon Sep 17 00:00:00 2001
> >> From: Lynne <dev@lynne.ee>
> >> Date: Tue, 15 Mar 2022 23:00:32 +0100
> >> Subject: [PATCH 26/97] hwcontext_vulkan: support threadsafe queue and frame
> >>  operations
> >>
> >> ---
> >>  libavutil/hwcontext_vulkan.c | 176 +++++++++++++++++++++++++----------
> >>  libavutil/hwcontext_vulkan.h |  40 +++++++-
> >>  2 files changed, 167 insertions(+), 49 deletions(-)
> >>
> >> diff --git a/libavutil/hwcontext_vulkan.c b/libavutil/hwcontext_vulkan.c
> >> index 894b4b83f3..b0db59b2d8 100644
> >> --- a/libavutil/hwcontext_vulkan.c
> >> +++ b/libavutil/hwcontext_vulkan.c
> >> @@ -27,6 +27,7 @@
> >>  #include <dlfcn.h>
> >>  #endif
> >>  
> >> +#include <pthread.h>
> >>  #include <unistd.h>
> >>  
> >>  #include "config.h"
> >> @@ -92,8 +93,10 @@ typedef struct VulkanDevicePriv {
> >>  VkPhysicalDeviceVulkan13Features device_features_1_3;
> >>  
> >>  /* Queues */
> >> -    uint32_t qfs[5];
> >> -    int num_qfs;
> >> +    pthread_mutex_t **qf_mutex;
> >> +    int nb_tot_qfs;
> >> +    uint32_t img_qfs[5];
> >> +    int nb_img_qfs;
> >>
> >
> > This patch would be so much more readable without random renamings.
> >
> 
> They're not random, the meaning of each variable is different
> to what they meant before.
> nb_img_qfs is the total number of enabled queue familiesnb_tot_qfs is the total number of queue families listed by the driver
> 
> 
> >> /* Debug callback */
> >>  VkDebugUtilsMessengerEXT debug_ctx;
> >> @@ -127,6 +130,8 @@ typedef struct VulkanFramesPriv {
> >>  } VulkanFramesPriv;
> >>  
> >>  typedef struct AVVkFrameInternal {
> >> +    pthread_mutex_t update_mutex;
> >>
> >
> > As far as I can see, none of the mutices you're adding here are
> > ever destroyed.
> >
> 
> Fixed.

In your current tree you're only destrying update_mutex, not qf_mutexes.

And not checking thre creation of update_mutex.
Anton Khirnov May 16, 2023, 1:33 p.m. UTC | #32
Quoting Lynne (2023-05-11 20:58:38)
> May 11, 2023, 18:22 by anton@khirnov.net:
> 
> > Quoting Lynne (2023-04-24 17:56:38)
> >
> >> From e20962a956444224b34d82f9a5936fae7e43bdf6 Mon Sep 17 00:00:00 2001
> >> From: Lynne <dev@lynne.ee>
> >> Date: Thu, 15 Dec 2022 17:43:27 +0100
> >> Subject: [PATCH 47/97] vulkan: allow alloc pNext in ff_vk_create_buf
> >>
> >> ---
> >>  libavutil/vulkan.c | 5 +++--
> >>  libavutil/vulkan.h | 3 ++-
> >>  2 files changed, 5 insertions(+), 3 deletions(-)
> >>
> >> diff --git a/libavutil/vulkan.c b/libavutil/vulkan.c
> >> index b1553c6537..0bb5b1eebf 100644
> >> --- a/libavutil/vulkan.c
> >> +++ b/libavutil/vulkan.c
> >> @@ -232,7 +232,8 @@ int ff_vk_alloc_mem(FFVulkanContext *s, VkMemoryRequirements *req,
> >>  return 0;
> >>  }
> >>  
> >> -int ff_vk_create_buf(FFVulkanContext *s, FFVkBuffer *buf, size_t size, void *pNext,
> >> +int ff_vk_create_buf(FFVulkanContext *s, FFVkBuffer *buf, size_t size,
> >> +                     void *pNext, void *alloc_pNext,
> >>  VkBufferUsageFlags usage, VkMemoryPropertyFlagBits flags)
> >>  {
> >>  int err;
> >> @@ -254,7 +255,7 @@ int ff_vk_create_buf(FFVulkanContext *s, FFVkBuffer *buf, size_t size, void *pNe
> >>  };
> >>  VkMemoryDedicatedAllocateInfo ded_alloc = {
> >>  .sType = VK_STRUCTURE_TYPE_MEMORY_DEDICATED_ALLOCATE_INFO,
> >> -        .pNext = NULL,
> >> +        .pNext = alloc_pNext,
> >>  };
> >>  VkMemoryDedicatedRequirements ded_req = {
> >>  .sType = VK_STRUCTURE_TYPE_MEMORY_DEDICATED_REQUIREMENTS,
> >> diff --git a/libavutil/vulkan.h b/libavutil/vulkan.h
> >> index 85836a7807..d75be26977 100644
> >> --- a/libavutil/vulkan.h
> >> +++ b/libavutil/vulkan.h
> >> @@ -413,7 +413,8 @@ int ff_vk_submit_exec_queue(FFVulkanContext *s, FFVkExecContext *e);
> >>  /**
> >>  * Create a VkBuffer with the specified parameters.
> >>  */
> >> -int ff_vk_create_buf(FFVulkanContext *s, FFVkBuffer *buf, size_t size, void *pNext,
> >> +int ff_vk_create_buf(FFVulkanContext *s, FFVkBuffer *buf, size_t size,
> >> +                     void *pNext, void *alloc_pNext,
> >>  VkBufferUsageFlags usage, VkMemoryPropertyFlagBits flags);
> >>
> >
> > Shouldn't you be updating all the callers of this function here?
> >
> 
> All callers of the function were in filters in libavfilter, and I wanted to keep all
> filter commits in libavfilter separate.

Every commit must be buildable on its own.
Anton Khirnov May 16, 2023, 1:40 p.m. UTC | #33
Quoting Lynne (2023-05-11 20:13:29)
> >> diff --git a/libavutil/vulkan.h b/libavutil/vulkan.h
> >> index 4bd1c9fc00..4c38dbc2e6 100644
> >> --- a/libavutil/vulkan.h
> >> +++ b/libavutil/vulkan.h
> >> @@ -216,6 +216,9 @@ typedef struct FFVulkanContext {
> >>  VkPhysicalDeviceProperties2 props;
> >>  VkPhysicalDeviceDriverProperties driver_props;
> >>  VkPhysicalDeviceMemoryProperties mprops;
> >> +    VkQueueFamilyQueryResultStatusPropertiesKHR *query_props;
> >> +    VkQueueFamilyVideoPropertiesKHR *video_props;
> >> +    VkQueueFamilyProperties2 *qf_props;
> >>
> >
> > How does the user of these know how many elements are in each array?
> >
> 
> They don't have to, we read the total number of queues available
> for the device, so all indices are always available.

"all indices"?

The allocated size of these arrays is purely local to
ff_vk_load_props(), so there is no safe way for any outside caller to
know it.
Lynne May 16, 2023, 2:41 p.m. UTC | #34
May 16, 2023, 15:33 by anton@khirnov.net:

> Quoting Lynne (2023-05-11 20:58:38)
>
>> May 11, 2023, 18:22 by anton@khirnov.net:
>>
>> > Quoting Lynne (2023-04-24 17:56:38)
>> >
>> >> From e20962a956444224b34d82f9a5936fae7e43bdf6 Mon Sep 17 00:00:00 2001
>> >> From: Lynne <dev@lynne.ee>
>> >> Date: Thu, 15 Dec 2022 17:43:27 +0100
>> >> Subject: [PATCH 47/97] vulkan: allow alloc pNext in ff_vk_create_buf
>> >>
>> >> ---
>> >>  libavutil/vulkan.c | 5 +++--
>> >>  libavutil/vulkan.h | 3 ++-
>> >>  2 files changed, 5 insertions(+), 3 deletions(-)
>> >>
>> >> diff --git a/libavutil/vulkan.c b/libavutil/vulkan.c
>> >> index b1553c6537..0bb5b1eebf 100644
>> >> --- a/libavutil/vulkan.c
>> >> +++ b/libavutil/vulkan.c
>> >> @@ -232,7 +232,8 @@ int ff_vk_alloc_mem(FFVulkanContext *s, VkMemoryRequirements *req,
>> >>  return 0;
>> >>  }
>> >> 
>> >> -int ff_vk_create_buf(FFVulkanContext *s, FFVkBuffer *buf, size_t size, void *pNext,
>> >> +int ff_vk_create_buf(FFVulkanContext *s, FFVkBuffer *buf, size_t size,
>> >> +                     void *pNext, void *alloc_pNext,
>> >>  VkBufferUsageFlags usage, VkMemoryPropertyFlagBits flags)
>> >>  {
>> >>  int err;
>> >> @@ -254,7 +255,7 @@ int ff_vk_create_buf(FFVulkanContext *s, FFVkBuffer *buf, size_t size, void *pNe
>> >>  };
>> >>  VkMemoryDedicatedAllocateInfo ded_alloc = {
>> >>  .sType = VK_STRUCTURE_TYPE_MEMORY_DEDICATED_ALLOCATE_INFO,
>> >> -        .pNext = NULL,
>> >> +        .pNext = alloc_pNext,
>> >>  };
>> >>  VkMemoryDedicatedRequirements ded_req = {
>> >>  .sType = VK_STRUCTURE_TYPE_MEMORY_DEDICATED_REQUIREMENTS,
>> >> diff --git a/libavutil/vulkan.h b/libavutil/vulkan.h
>> >> index 85836a7807..d75be26977 100644
>> >> --- a/libavutil/vulkan.h
>> >> +++ b/libavutil/vulkan.h
>> >> @@ -413,7 +413,8 @@ int ff_vk_submit_exec_queue(FFVulkanContext *s, FFVkExecContext *e);
>> >>  /**
>> >>  * Create a VkBuffer with the specified parameters.
>> >>  */
>> >> -int ff_vk_create_buf(FFVulkanContext *s, FFVkBuffer *buf, size_t size, void *pNext,
>> >> +int ff_vk_create_buf(FFVulkanContext *s, FFVkBuffer *buf, size_t size,
>> >> +                     void *pNext, void *alloc_pNext,
>> >>  VkBufferUsageFlags usage, VkMemoryPropertyFlagBits flags);
>> >>
>> >
>> > Shouldn't you be updating all the callers of this function here?
>> >
>>
>> All callers of the function were in filters in libavfilter, and I wanted to keep all
>> filter commits in libavfilter separate.
>>
>
> Every commit must be buildable on its own.
>

fixed
Lynne May 16, 2023, 2:46 p.m. UTC | #35
May 16, 2023, 15:41 by anton@khirnov.net:

> Quoting Lynne (2023-05-11 20:13:29)
>
>> >> diff --git a/libavutil/vulkan.h b/libavutil/vulkan.h
>> >> index 4bd1c9fc00..4c38dbc2e6 100644
>> >> --- a/libavutil/vulkan.h
>> >> +++ b/libavutil/vulkan.h
>> >> @@ -216,6 +216,9 @@ typedef struct FFVulkanContext {
>> >>  VkPhysicalDeviceProperties2 props;
>> >>  VkPhysicalDeviceDriverProperties driver_props;
>> >>  VkPhysicalDeviceMemoryProperties mprops;
>> >> +    VkQueueFamilyQueryResultStatusPropertiesKHR *query_props;
>> >> +    VkQueueFamilyVideoPropertiesKHR *video_props;
>> >> +    VkQueueFamilyProperties2 *qf_props;
>> >>
>> >
>> > How does the user of these know how many elements are in each array?
>> >
>>
>> They don't have to, we read the total number of queues available
>> for the device, so all indices are always available.
>>
>
> "all indices"?
>
> The allocated size of these arrays is purely local to
> ff_vk_load_props(), so there is no safe way for any outside caller to
> know it.
>

The init function queries the driver for the total number of queue family indices,
allocates an array of that amount for each structure, and reads the properties
into the array.
API users then index into the array based on the queue family index.
API users cannot index outside of the array ever, as the queue family index
they receive is always AVVulkanDeviceContext.queue_family_index (or the
transfer, compute, encode, or decode queue family index member of that structure).
The queue family index members of that structure are checked upon initialization
to not be larger than what the driver returns.

Hence, there's no need for them to know how large the array is, as
it is allocated for all possible indices.
Lynne May 16, 2023, 2:47 p.m. UTC | #36
May 16, 2023, 15:32 by anton@khirnov.net:

> Quoting Lynne (2023-05-11 20:55:40)
>
>> May 11, 2023, 18:04 by anton@khirnov.net:
>>
>> > Quoting Lynne (2023-04-24 17:56:38)
>> >
>> >> From b0c429d0d77d1789b6349bc6b296449ae1f8e9da Mon Sep 17 00:00:00 2001
>> >> From: Lynne <dev@lynne.ee>
>> >> Date: Tue, 15 Mar 2022 23:00:32 +0100
>> >> Subject: [PATCH 26/97] hwcontext_vulkan: support threadsafe queue and frame
>> >>  operations
>> >>
>> >> ---
>> >>  libavutil/hwcontext_vulkan.c | 176 +++++++++++++++++++++++++----------
>> >>  libavutil/hwcontext_vulkan.h |  40 +++++++-
>> >>  2 files changed, 167 insertions(+), 49 deletions(-)
>> >>
>> >> diff --git a/libavutil/hwcontext_vulkan.c b/libavutil/hwcontext_vulkan.c
>> >> index 894b4b83f3..b0db59b2d8 100644
>> >> --- a/libavutil/hwcontext_vulkan.c
>> >> +++ b/libavutil/hwcontext_vulkan.c
>> >> @@ -27,6 +27,7 @@
>> >>  #include <dlfcn.h>
>> >>  #endif
>> >> 
>> >> +#include <pthread.h>
>> >>  #include <unistd.h>
>> >> 
>> >>  #include "config.h"
>> >> @@ -92,8 +93,10 @@ typedef struct VulkanDevicePriv {
>> >>  VkPhysicalDeviceVulkan13Features device_features_1_3;
>> >> 
>> >>  /* Queues */
>> >> -    uint32_t qfs[5];
>> >> -    int num_qfs;
>> >> +    pthread_mutex_t **qf_mutex;
>> >> +    int nb_tot_qfs;
>> >> +    uint32_t img_qfs[5];
>> >> +    int nb_img_qfs;
>> >>
>> >
>> > This patch would be so much more readable without random renamings.
>> >
>>
>> They're not random, the meaning of each variable is different
>> to what they meant before.
>> nb_img_qfs is the total number of enabled queue familiesnb_tot_qfs is the total number of queue families listed by the driver
>>
>>
>> >> /* Debug callback */
>> >>  VkDebugUtilsMessengerEXT debug_ctx;
>> >> @@ -127,6 +130,8 @@ typedef struct VulkanFramesPriv {
>> >>  } VulkanFramesPriv;
>> >> 
>> >>  typedef struct AVVkFrameInternal {
>> >> +    pthread_mutex_t update_mutex;
>> >>
>> >
>> > As far as I can see, none of the mutices you're adding here are
>> > ever destroyed.
>> >
>>
>> Fixed.
>>
>
> In your current tree you're only destrying update_mutex, not qf_mutexes.
>
> And not checking thre creation of update_mutex.
>

fixed both
Anton Khirnov May 18, 2023, 8:29 a.m. UTC | #37
Quoting Lynne (2023-05-16 16:46:45)
> May 16, 2023, 15:41 by anton@khirnov.net:
> 
> > Quoting Lynne (2023-05-11 20:13:29)
> >
> >> >> diff --git a/libavutil/vulkan.h b/libavutil/vulkan.h
> >> >> index 4bd1c9fc00..4c38dbc2e6 100644
> >> >> --- a/libavutil/vulkan.h
> >> >> +++ b/libavutil/vulkan.h
> >> >> @@ -216,6 +216,9 @@ typedef struct FFVulkanContext {
> >> >>  VkPhysicalDeviceProperties2 props;
> >> >>  VkPhysicalDeviceDriverProperties driver_props;
> >> >>  VkPhysicalDeviceMemoryProperties mprops;
> >> >> +    VkQueueFamilyQueryResultStatusPropertiesKHR *query_props;
> >> >> +    VkQueueFamilyVideoPropertiesKHR *video_props;
> >> >> +    VkQueueFamilyProperties2 *qf_props;
> >> >>
> >> >
> >> > How does the user of these know how many elements are in each array?
> >> >
> >>
> >> They don't have to, we read the total number of queues available
> >> for the device, so all indices are always available.
> >>
> >
> > "all indices"?
> >
> > The allocated size of these arrays is purely local to
> > ff_vk_load_props(), so there is no safe way for any outside caller to
> > know it.
> >
> 
> The init function queries the driver for the total number of queue family indices,
> allocates an array of that amount for each structure, and reads the properties
> into the array.
> API users then index into the array based on the queue family index.
> API users cannot index outside of the array ever, as the queue family index
> they receive is always AVVulkanDeviceContext.queue_family_index (or the
> transfer, compute, encode, or decode queue family index member of that structure).
> The queue family index members of that structure are checked upon initialization
> to not be larger than what the driver returns.
> 
> Hence, there's no need for them to know how large the array is, as
> it is allocated for all possible indices.

That's way too much indirection and way too much code that has to make
the exact same unstated assumption on what the actual size is. In my
experience, it is almost always a good idea to be explicit: store the
exact array size right next to the array itself.

If nothing else, it will be very helpful for the person debugging the
inevitable invalid accesses.
Anton Khirnov May 18, 2023, 8:34 a.m. UTC | #38
> commit 3257feba101053b0b3689147c1a8850f68448f62
> Author: Lynne <dev@lynne.ee>
> Date:   Sun Dec 18 08:31:03 2022 +0100
> 
>     libavcodec: add Vulkan common video code
> 
> +static AVBufferRef *alloc_data_buf(void *opaque, size_t size)
> +{
> +    uint8_t *buf = av_mallocz(size);
> +    if (!buf)
> +        return NULL;
> +
> +    return av_buffer_create(buf, size, free_data_buf, opaque, 0);

leaks buf on av_buffer_create() failure.

> +av_cold int ff_vk_video_common_init(void *log, FFVulkanContext *s,
> +                                    FFVkVideoCommon *common,
> +                                    VkVideoSessionCreateInfoKHR *session_create)
> +{
> +    int err;
> +    VkResult ret;
> +    FFVulkanFunctions *vk = &s->vkfn;
> +    VkMemoryRequirements2 *mem_req = NULL;
> +    VkVideoSessionMemoryRequirementsKHR *mem = NULL;
> +    VkBindVideoSessionMemoryInfoKHR *bind_mem = NULL;
> +
> +    /* Create session */
> +    ret = vk->CreateVideoSessionKHR(s->hwctx->act_dev, session_create,
> +                                    s->hwctx->alloc, &common->session);
> +    if (ret != VK_SUCCESS)
> +        return AVERROR_EXTERNAL;
> +
> +    /* Get memory requirements */
> +    ret = vk->GetVideoSessionMemoryRequirementsKHR(s->hwctx->act_dev,
> +                                                   common->session,
> +                                                   &common->nb_mem,
> +                                                   NULL);
> +    if (ret != VK_SUCCESS) {
> +        err = AVERROR_EXTERNAL;
> +        goto fail;
> +    }
> +
> +    /* Allocate all memory needed to actually allocate memory */
> +    common->mem = av_mallocz(sizeof(*common->mem)*common->nb_mem);

av_calloc(), same below
Anton Khirnov May 18, 2023, 8:54 a.m. UTC | #39
> commit adb671b921d006255597ac126f85adb05f9d6677
> Author: Lynne <dev@lynne.ee>
> Date:   Mon Jan 16 07:23:27 2023 +0100
> 
>     libavcodec: add Vulkan common video decoding code
> 
> diff --git a/libavcodec/Makefile b/libavcodec/Makefile
> index d99f7bd25a..362ea31e3e 100644
> --- a/libavcodec/Makefile
> +++ b/libavcodec/Makefile
> @@ -1289,7 +1289,7 @@ SKIPHEADERS-$(CONFIG_XVMC)             += xvmc.h
>  SKIPHEADERS-$(CONFIG_VAAPI)            += vaapi_decode.h vaapi_hevc.h vaapi_encode.h
>  SKIPHEADERS-$(CONFIG_VDPAU)            += vdpau.h vdpau_internal.h
>  SKIPHEADERS-$(CONFIG_VIDEOTOOLBOX)     += videotoolbox.h vt_internal.h
> -SKIPHEADERS-$(CONFIG_VULKAN)           += vulkan.h vulkan_video.h
> +SKIPHEADERS-$(CONFIG_VULKAN)           += vulkan.h vulkan_video.h vulkan_decode.h
>  SKIPHEADERS-$(CONFIG_V4L2_M2M)         += v4l2_buffers.h v4l2_context.h v4l2_m2m.h
>  SKIPHEADERS-$(CONFIG_ZLIB)             += zlib_wrapper.h
>  
> diff --git a/libavcodec/vulkan_decode.c b/libavcodec/vulkan_decode.c
> new file mode 100644
> index 0000000000..d07b9aa3eb
> --- /dev/null
> +++ b/libavcodec/vulkan_decode.c
> @@ -0,0 +1,1182 @@
> +/*
> + * This file is part of FFmpeg.
> + *
> + * FFmpeg is free software; you can redistribute it and/or
> + * modify it under the terms of the GNU Lesser General Public
> + * License as published by the Free Software Foundation; either
> + * version 2.1 of the License, or (at your option) any later version.
> + *
> + * FFmpeg is distributed in the hope that it will be useful,
> + * but WITHOUT ANY WARRANTY; without even the implied warranty of
> + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
> + * Lesser General Public License for more details.
> + *
> + * You should have received a copy of the GNU Lesser General Public
> + * License along with FFmpeg; if not, write to the Free Software
> + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
> + */
> +
> +#include "vulkan_video.h"
> +#include "vulkan_decode.h"
> +#include "config_components.h"
> +#include "libavutil/hwcontext_internal.h"

what for?

> +static AVFrame *vk_get_dpb_pool(FFVulkanDecodeShared *ctx)
> +{
> +    AVFrame *avf = av_frame_alloc();
> +    AVHWFramesContext *dpb_frames = (AVHWFramesContext *)ctx->dpb_hwfc_ref->data;
> +    if (!avf)
> +        return NULL;
> +
> +    avf->hw_frames_ctx = av_buffer_ref(ctx->dpb_hwfc_ref);
> +    if (!avf->hw_frames_ctx)
> +        av_frame_free(&avf);
> +    avf->buf[0] = av_buffer_pool_get(dpb_frames->pool);
> +    if (!avf->buf[0])
> +        av_frame_free(&avf);
> +    avf->data[0] = avf->buf[0]->data;

Why is this not av_hwframe_get_buffer()?

> +void ff_vk_decode_free_frame(FFVulkanDecodeContext *dec, FFVulkanDecodePicture *vp)
> +{
> +    FFVulkanFunctions *vk;
> +    VkSemaphoreWaitInfo sem_wait;
> +    FFVulkanDecodeShared *ctx;
> +
> +    // TODO: investigate why this happens
> +    if (!dec || !dec->shared_ref) {

My guess is that this is called from a different thread than the one
whose hwaccel_priv_data you gave to ff_hwaccel_frame_priv_alloc().
You have to attach everything you need to hwaccel_priv_buf itself.

> +/* Since to even get decoder capabilities, we have to initialize quite a lot,
> + * this function does initialization and saves it to hwaccel_priv_data if
> + * available. */
> +static int vulkan_decode_check_init(AVCodecContext *avctx, AVBufferRef *frames_ref,
> +                                    VulkanVideoProfile *profile_data,
> +                                    int *width_align, int *height_align,
> +                                    enum AVPixelFormat *pix_fmt, VkFormat *vk_fmt,
> +                                    int *dpb_dedicate)
> +{
> +    VkResult ret;
> +    int err, max_level;
> +    const struct FFVkCodecMap *vk_codec = &ff_vk_codec_map[avctx->codec_id];
> +    AVHWFramesContext *frames = (AVHWFramesContext *)frames_ref->data;
> +    AVHWDeviceContext *device = (AVHWDeviceContext *)frames->device_ref->data;
> +    AVVulkanDeviceContext *hwctx = device->hwctx;
> +    enum AVPixelFormat source_format;
> +    enum AVPixelFormat best_format;
> +    VkFormat best_vkfmt;
> +    int base_profile, cur_profile = avctx->profile;
> +
> +    int dedicated_dpb;
> +    int layered_dpb;
> +
> +    FFVulkanDecodeContext *dec = avctx->internal->hwaccel_priv_data;
> +    FFVulkanDecodeShared *ctx = (FFVulkanDecodeShared *)dec->shared_ref->data;
> +
> +    FFVulkanExtensions local_extensions = 0x0;
> +    FFVulkanExtensions *extensions = ctx ? &ctx->s.extensions : &local_extensions;
> +    FFVulkanFunctions local_vk = { 0 };
> +    FFVulkanFunctions *vk = ctx ? &ctx->s.vkfn : &local_vk;
> +    VkVideoCapabilitiesKHR local_caps = { 0 };
> +    VkVideoCapabilitiesKHR *caps = ctx ? &ctx->common.caps : &local_caps;
> +    VkVideoDecodeCapabilitiesKHR local_dec_caps = { 0 };
> +    VkVideoDecodeCapabilitiesKHR *dec_caps = ctx ? &ctx->dec_caps : &local_dec_caps;
> +
> +    VkVideoDecodeH264ProfileInfoKHR local_h264_profile = { 0 };
> +    VkVideoDecodeH264ProfileInfoKHR *h264_profile = profile_data ?
> +                                                    &profile_data->h264_profile :
> +                                                    &local_h264_profile;
> +
> +    VkVideoDecodeH264ProfileInfoKHR local_h265_profile = { 0 };
> +    VkVideoDecodeH264ProfileInfoKHR *h265_profile = profile_data ?
> +                                                    &profile_data->h265_profile :
> +                                                    &local_h265_profile;
> +
> +    VkVideoDecodeUsageInfoKHR local_usage = { 0 };
> +    VkVideoDecodeUsageInfoKHR *usage = profile_data ?
> +                                       &profile_data->usage : &local_usage;
> +    VkVideoProfileInfoKHR local_profile = { 0 };
> +    VkVideoProfileInfoKHR *profile = profile_data ?
> +                                     &profile_data->profile : &local_profile;
> +    VkVideoProfileListInfoKHR local_profile_list = { 0 };
> +    VkVideoProfileListInfoKHR *profile_list = profile_data ?
> +                                              &profile_data->profile_list :
> +                                              &local_profile_list;
> +
> +    VkPhysicalDeviceVideoFormatInfoKHR fmt_info = {
> +        .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VIDEO_FORMAT_INFO_KHR,
> +        .pNext = profile_list,
> +    };
> +    VkVideoDecodeH264CapabilitiesKHR h264_caps = {
> +        .sType = VK_STRUCTURE_TYPE_VIDEO_DECODE_H264_CAPABILITIES_KHR,
> +    };
> +    VkVideoDecodeH265CapabilitiesKHR h265_caps = {
> +        .sType = VK_STRUCTURE_TYPE_VIDEO_DECODE_H265_CAPABILITIES_KHR,
> +    };
> +    VkVideoFormatPropertiesKHR *ret_info;
> +    uint32_t nb_out_fmts = 0;
> +
> +    const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(avctx->sw_pix_fmt);
> +    if (!desc)
> +        return AVERROR(EINVAL);
> +
> +    if (ctx && ctx->init)
> +        return 0;
> +
> +    if (!vk_codec->decode_op)
> +        return AVERROR(EINVAL);
> +
> +    *extensions = ff_vk_extensions_to_mask(hwctx->enabled_dev_extensions,
> +                                           hwctx->nb_enabled_dev_extensions);
> +
> +    if (!(*extensions & FF_VK_EXT_VIDEO_DECODE_QUEUE)) {
> +        av_log(avctx, AV_LOG_ERROR, "Device does not support the %s extension!\n",
> +               VK_KHR_VIDEO_DECODE_QUEUE_EXTENSION_NAME);
> +        return AVERROR(ENOSYS);
> +    } else if (!vk_codec->decode_extension) {
> +        av_log(avctx, AV_LOG_ERROR, "Unsupported codec for Vulkan decoding: %s!\n",
> +               avcodec_get_name(avctx->codec_id));
> +        return AVERROR(ENOSYS);
> +    } else if (!(vk_codec->decode_extension & *extensions)) {
> +        av_log(avctx, AV_LOG_ERROR, "Device does not support decoding %s!\n",
> +               avcodec_get_name(avctx->codec_id));
> +        return AVERROR(ENOSYS);
> +    }
> +
> +    err = ff_vk_load_functions(device, vk, *extensions, 1, 1);
> +    if (err < 0)
> +        return err;
> +
> +repeat:
> +    if (avctx->codec_id == AV_CODEC_ID_H264) {
> +        base_profile = FF_PROFILE_H264_CONSTRAINED_BASELINE;
> +        dec_caps->pNext = &h264_caps;
> +        usage->pNext = h264_profile;
> +        h264_profile->sType = VK_STRUCTURE_TYPE_VIDEO_DECODE_H264_PROFILE_INFO_KHR;
> +        h264_profile->stdProfileIdc = cur_profile;
> +        h264_profile->pictureLayout = avctx->field_order == AV_FIELD_UNKNOWN ||
> +                                      avctx->field_order == AV_FIELD_PROGRESSIVE ?
> +                                      VK_VIDEO_DECODE_H264_PICTURE_LAYOUT_PROGRESSIVE_KHR :
> +                                      VK_VIDEO_DECODE_H264_PICTURE_LAYOUT_INTERLACED_INTERLEAVED_LINES_BIT_KHR;
> +    } else if (avctx->codec_id == AV_CODEC_ID_H265) {
> +        base_profile = FF_PROFILE_HEVC_MAIN;
> +        dec_caps->pNext = &h265_caps;
> +        usage->pNext = h265_profile;
> +        h265_profile->sType = VK_STRUCTURE_TYPE_VIDEO_DECODE_H265_PROFILE_INFO_KHR;
> +        h265_profile->stdProfileIdc = cur_profile;
> +    }
> +
> +    usage->sType           = VK_STRUCTURE_TYPE_VIDEO_DECODE_USAGE_INFO_KHR;
> +    usage->videoUsageHints = VK_VIDEO_DECODE_USAGE_DEFAULT_KHR;
> +
> +    profile->sType               = VK_STRUCTURE_TYPE_VIDEO_PROFILE_INFO_KHR;
> +    profile->pNext               = usage;
> +    profile->videoCodecOperation = vk_codec->decode_op;
> +    profile->chromaSubsampling   = ff_vk_subsampling_from_av_desc(desc);
> +    profile->lumaBitDepth        = ff_vk_depth_from_av_depth(desc->comp[0].depth);
> +    profile->chromaBitDepth      = profile->lumaBitDepth;
> +
> +    profile_list->sType        = VK_STRUCTURE_TYPE_VIDEO_PROFILE_LIST_INFO_KHR;
> +    profile_list->profileCount = 1;
> +    profile_list->pProfiles    = profile;
> +
> +    /* Get the capabilities of the decoder for the given profile */
> +    caps->sType = VK_STRUCTURE_TYPE_VIDEO_CAPABILITIES_KHR;
> +    caps->pNext = dec_caps;
> +    dec_caps->sType = VK_STRUCTURE_TYPE_VIDEO_DECODE_CAPABILITIES_KHR;
> +    /* dec_caps->pNext already filled in */
> +
> +    ret = vk->GetPhysicalDeviceVideoCapabilitiesKHR(hwctx->phys_dev, profile,
> +                                                    caps);
> +    if (ret == VK_ERROR_VIDEO_PROFILE_OPERATION_NOT_SUPPORTED_KHR &&
> +        avctx->flags & AV_HWACCEL_FLAG_ALLOW_PROFILE_MISMATCH &&
> +        cur_profile != base_profile) {
> +        cur_profile = base_profile;
> +        av_log(avctx, AV_LOG_VERBOSE, "%s profile %s not supported, attempting "
> +               "again with profile %s\n",
> +               avcodec_get_name(avctx->codec_id),
> +               avcodec_profile_name(avctx->codec_id, avctx->profile),
> +               avcodec_profile_name(avctx->codec_id, base_profile));
> +        goto repeat;

This function is long and ugly enough even without backward gotos. What
would Dijkstra say?

> +#endif /* AVCODEC_VULKAN_DECODE_H */
> diff --git a/libavutil/vulkan.c b/libavutil/vulkan.c
> index 0ab90c8f3c..db47956198 100644
> --- a/libavutil/vulkan.c
> +++ b/libavutil/vulkan.c
> @@ -510,8 +510,8 @@ void ff_vk_exec_discard_deps(FFVulkanContext *s, FFVkExecContext *e)
>              AVVkFrame *vkf = (AVVkFrame *)f->data[0];
>              vkfc->unlock_frame(hwfc, vkf);
>              e->frame_locked[j] = 0;
> -            e->frame_update[j] = 0;
>          }
> +        e->frame_update[j] = 0;

unrelated?
Lynne May 18, 2023, 11:07 a.m. UTC | #40
May 18, 2023, 10:34 by anton@khirnov.net:

>> commit 3257feba101053b0b3689147c1a8850f68448f62
>> Author: Lynne <dev@lynne.ee>
>> Date:   Sun Dec 18 08:31:03 2022 +0100
>>
>>  libavcodec: add Vulkan common video code
>>
>> +static AVBufferRef *alloc_data_buf(void *opaque, size_t size)
>> +{
>> +    uint8_t *buf = av_mallocz(size);
>> +    if (!buf)
>> +        return NULL;
>> +
>> +    return av_buffer_create(buf, size, free_data_buf, opaque, 0);
>>
>
> leaks buf on av_buffer_create() failure.
>

fixed


>> +av_cold int ff_vk_video_common_init(void *log, FFVulkanContext *s,
>> +                                    FFVkVideoCommon *common,
>> +                                    VkVideoSessionCreateInfoKHR *session_create)
>> +{
>> +    int err;
>> +    VkResult ret;
>> +    FFVulkanFunctions *vk = &s->vkfn;
>> +    VkMemoryRequirements2 *mem_req = NULL;
>> +    VkVideoSessionMemoryRequirementsKHR *mem = NULL;
>> +    VkBindVideoSessionMemoryInfoKHR *bind_mem = NULL;
>> +
>> +    /* Create session */
>> +    ret = vk->CreateVideoSessionKHR(s->hwctx->act_dev, session_create,
>> +                                    s->hwctx->alloc, &common->session);
>> +    if (ret != VK_SUCCESS)
>> +        return AVERROR_EXTERNAL;
>> +
>> +    /* Get memory requirements */
>> +    ret = vk->GetVideoSessionMemoryRequirementsKHR(s->hwctx->act_dev,
>> +                                                   common->session,
>> +                                                   &common->nb_mem,
>> +                                                   NULL);
>> +    if (ret != VK_SUCCESS) {
>> +        err = AVERROR_EXTERNAL;
>> +        goto fail;
>> +    }
>> +
>> +    /* Allocate all memory needed to actually allocate memory */
>> +    common->mem = av_mallocz(sizeof(*common->mem)*common->nb_mem);
>>
>
> av_calloc(), same below
>

Fixed all av_mallocz usage in the file
Lynne May 18, 2023, 12:27 p.m. UTC | #41
May 18, 2023, 10:54 by anton@khirnov.net:

>> commit adb671b921d006255597ac126f85adb05f9d6677
>> Author: Lynne <dev@lynne.ee>
>> Date:   Mon Jan 16 07:23:27 2023 +0100
>>
>>  libavcodec: add Vulkan common video decoding code
>>
>> diff --git a/libavcodec/Makefile b/libavcodec/Makefile
>> index d99f7bd25a..362ea31e3e 100644
>> --- a/libavcodec/Makefile
>> +++ b/libavcodec/Makefile
>> @@ -1289,7 +1289,7 @@ SKIPHEADERS-$(CONFIG_XVMC)             += xvmc.h
>>  SKIPHEADERS-$(CONFIG_VAAPI)            += vaapi_decode.h vaapi_hevc.h vaapi_encode.h
>>  SKIPHEADERS-$(CONFIG_VDPAU)            += vdpau.h vdpau_internal.h
>>  SKIPHEADERS-$(CONFIG_VIDEOTOOLBOX)     += videotoolbox.h vt_internal.h
>> -SKIPHEADERS-$(CONFIG_VULKAN)           += vulkan.h vulkan_video.h
>> +SKIPHEADERS-$(CONFIG_VULKAN)           += vulkan.h vulkan_video.h vulkan_decode.h
>>  SKIPHEADERS-$(CONFIG_V4L2_M2M)         += v4l2_buffers.h v4l2_context.h v4l2_m2m.h
>>  SKIPHEADERS-$(CONFIG_ZLIB)             += zlib_wrapper.h
>>  
>> diff --git a/libavcodec/vulkan_decode.c b/libavcodec/vulkan_decode.c
>> new file mode 100644
>> index 0000000000..d07b9aa3eb
>> --- /dev/null
>> +++ b/libavcodec/vulkan_decode.c
>> @@ -0,0 +1,1182 @@
>> +/*
>> + * This file is part of FFmpeg.
>> + *
>> + * FFmpeg is free software; you can redistribute it and/or
>> + * modify it under the terms of the GNU Lesser General Public
>> + * License as published by the Free Software Foundation; either
>> + * version 2.1 of the License, or (at your option) any later version.
>> + *
>> + * FFmpeg is distributed in the hope that it will be useful,
>> + * but WITHOUT ANY WARRANTY; without even the implied warranty of
>> + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
>> + * Lesser General Public License for more details.
>> + *
>> + * You should have received a copy of the GNU Lesser General Public
>> + * License along with FFmpeg; if not, write to the Free Software
>> + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
>> + */
>> +
>> +#include "vulkan_video.h"
>> +#include "vulkan_decode.h"
>> +#include "config_components.h"
>> +#include "libavutil/hwcontext_internal.h"
>>
>
> what for?
>
>> +static AVFrame *vk_get_dpb_pool(FFVulkanDecodeShared *ctx)
>> +{
>> +    AVFrame *avf = av_frame_alloc();
>> +    AVHWFramesContext *dpb_frames = (AVHWFramesContext *)ctx->dpb_hwfc_ref->data;
>> +    if (!avf)
>> +        return NULL;
>> +
>> +    avf->hw_frames_ctx = av_buffer_ref(ctx->dpb_hwfc_ref);
>> +    if (!avf->hw_frames_ctx)
>> +        av_frame_free(&avf);
>> +    avf->buf[0] = av_buffer_pool_get(dpb_frames->pool);
>> +    if (!avf->buf[0])
>> +        av_frame_free(&avf);
>> +    avf->data[0] = avf->buf[0]->data;
>>
>
> Why is this not av_hwframe_get_buffer()?
>

Didn't occur to me. Fixed.


>> +void ff_vk_decode_free_frame(FFVulkanDecodeContext *dec, FFVulkanDecodePicture *vp)
>> +{
>> +    FFVulkanFunctions *vk;
>> +    VkSemaphoreWaitInfo sem_wait;
>> +    FFVulkanDecodeShared *ctx;
>> +
>> +    // TODO: investigate why this happens
>> +    if (!dec || !dec->shared_ref) {
>>
>
> My guess is that this is called from a different thread than the one
> whose hwaccel_priv_data you gave to ff_hwaccel_frame_priv_alloc().
> You have to attach everything you need to hwaccel_priv_buf itself.
>

This was an old todo which I fixed previously. Removed.


>> +               avcodec_get_name(avctx->codec_id),
>> +               avcodec_profile_name(avctx->codec_id, avctx->profile),
>> +               avcodec_profile_name(avctx->codec_id, base_profile));
>> +        goto repeat;
>>
>
> This function is long and ugly enough even without backward gotos. What
> would Dijkstra say?
>

I tried to do it with a function, but the result was more sphagetti
and code duplication, due to needing to handle the return code.
I've commented the goto parts better.


>> +#endif /* AVCODEC_VULKAN_DECODE_H */
>> diff --git a/libavutil/vulkan.c b/libavutil/vulkan.c
>> index 0ab90c8f3c..db47956198 100644
>> --- a/libavutil/vulkan.c
>> +++ b/libavutil/vulkan.c
>> @@ -510,8 +510,8 @@ void ff_vk_exec_discard_deps(FFVulkanContext *s, FFVkExecContext *e)
>>  AVVkFrame *vkf = (AVVkFrame *)f->data[0];
>>  vkfc->unlock_frame(hwfc, vkf);
>>  e->frame_locked[j] = 0;
>> -            e->frame_update[j] = 0;
>>  }
>> +        e->frame_update[j] = 0;
>>
>
> unrelated?
>

Fixed.
Lynne May 18, 2023, 12:28 p.m. UTC | #42
May 18, 2023, 10:30 by anton@khirnov.net:

> Quoting Lynne (2023-05-16 16:46:45)
>
>> May 16, 2023, 15:41 by anton@khirnov.net:
>>
>> > Quoting Lynne (2023-05-11 20:13:29)
>> >
>> >> >> diff --git a/libavutil/vulkan.h b/libavutil/vulkan.h
>> >> >> index 4bd1c9fc00..4c38dbc2e6 100644
>> >> >> --- a/libavutil/vulkan.h
>> >> >> +++ b/libavutil/vulkan.h
>> >> >> @@ -216,6 +216,9 @@ typedef struct FFVulkanContext {
>> >> >>  VkPhysicalDeviceProperties2 props;
>> >> >>  VkPhysicalDeviceDriverProperties driver_props;
>> >> >>  VkPhysicalDeviceMemoryProperties mprops;
>> >> >> +    VkQueueFamilyQueryResultStatusPropertiesKHR *query_props;
>> >> >> +    VkQueueFamilyVideoPropertiesKHR *video_props;
>> >> >> +    VkQueueFamilyProperties2 *qf_props;
>> >> >>
>> >> >
>> >> > How does the user of these know how many elements are in each array?
>> >> >
>> >>
>> >> They don't have to, we read the total number of queues available
>> >> for the device, so all indices are always available.
>> >>
>> >
>> > "all indices"?
>> >
>> > The allocated size of these arrays is purely local to
>> > ff_vk_load_props(), so there is no safe way for any outside caller to
>> > know it.
>> >
>>
>> The init function queries the driver for the total number of queue family indices,
>> allocates an array of that amount for each structure, and reads the properties
>> into the array.
>> API users then index into the array based on the queue family index.
>> API users cannot index outside of the array ever, as the queue family index
>> they receive is always AVVulkanDeviceContext.queue_family_index (or the
>> transfer, compute, encode, or decode queue family index member of that structure).
>> The queue family index members of that structure are checked upon initialization
>> to not be larger than what the driver returns.
>>
>> Hence, there's no need for them to know how large the array is, as
>> it is allocated for all possible indices.
>>
>
> That's way too much indirection and way too much code that has to make
> the exact same unstated assumption on what the actual size is. In my
> experience, it is almost always a good idea to be explicit: store the
> exact array size right next to the array itself.
>
> If nothing else, it will be very helpful for the person debugging the
> inevitable invalid accesses.
>

added a counter
Leo Izen May 19, 2023, 12:11 p.m. UTC | #43
On 4/24/23 11:56, Lynne wrote:
> This is part two of the vulkan patchset, which contains all the
> hwcontext and vulkan.c rewrites, and filtering changes.
> 
> 55 patches attached.
> 
> 

[PATCH 21/97] lavu: add 12-bit 2-plane 422 and 444 pixel formats

iirc new pixel formats need an APIChanges entry, a lavu micro bump, or 
both. I'm not really sure what the policy is but I remember being told 
something like that when I added a NE macro a few months ago.

[various]

 > static int vulkan_device_create_internal(AVHWDeviceContext *ctx,
 >    ...
 >    p->device_features_1_3.foo = dev_features_1_3.foo;

There's a lot of these in various patches, why do these need to be done 
manually?

[PATCH 26/97] hwcontext_vulkan: support threadsafe queue and frame

 > p->qf_mutex = av_mallocz(qf_num*sizeof(*p->qf_mutex));

av_calloc

+    for (int i = 0; i < qf_num; i++) {
+        p->qf_mutex[i] = 
av_mallocz(qf[i].queueCount*sizeof(**p->qf_mutex));
+        if (!p->qf_mutex[i])
+            return AVERROR(ENOMEM);
+        for (int j = 0; j < qf[i].queueCount; j++)
+            pthread_mutex_init(&p->qf_mutex[i][j], NULL);
+    }

If the allocation fails for i > 0, you end up with some initialized 
mutexes, is this going to be an issue ever?

@@ -1732,9 +1778,6 @@ static void vulkan_free_internal(AVVkFrame *f)
  {
      AVVkFrameInternal *internal = f->internal;

-    if (!internal)
-        return;
-
  #if CONFIG_CUDA
      if (internal->cuda_fc_ref) {
          AVHWFramesContext *cuda_fc = (AVHWFramesContext 
*)internal->cuda_fc_ref->data;

What happens if (!internal) and #defined(CONFIG_CUDA), do we just segfault?

- Leo Izen
Lynne May 22, 2023, 8:26 a.m. UTC | #44
Planning on pushing this partially (no encoding) tomorrow unless there are more comments.
All known issues have been fixed, and if there are more issues, they can be found as users test it.
Lynne May 25, 2023, 12:31 a.m. UTC | #45
May 22, 2023, 10:26 by dev@lynne.ee:

> Planning on pushing this partially (no encoding) tomorrow unless there are more comments.
> All known issues have been fixed, and if there are more issues, they can be found as users test it.
>

Added APIchanges and bumped minor for lavu and lavc.
Planning to push this in 2 days unless there are more comments.
All known issues have been addressed.
Lynne May 26, 2023, 5:52 p.m. UTC | #46
May 25, 2023, 02:31 by dev@lynne.ee:

> May 22, 2023, 10:26 by dev@lynne.ee:
>
>> Planning on pushing this partially (no encoding) tomorrow unless there are more comments.
>> All known issues have been fixed, and if there are more issues, they can be found as users test it.
>>
>
> Added APIchanges and bumped minor for lavu and lavc.
> Planning to push this in 2 days unless there are more comments.
> All known issues have been addressed.
>

Planning to push this tomorrow morning.
Anton Khirnov May 26, 2023, 7:19 p.m. UTC | #47
Quoting Lynne (2023-05-26 19:52:24)
> May 25, 2023, 02:31 by dev@lynne.ee:
> 
> > May 22, 2023, 10:26 by dev@lynne.ee:
> >
> >> Planning on pushing this partially (no encoding) tomorrow unless there are more comments.
> >> All known issues have been fixed, and if there are more issues, they can be found as users test it.
> >>
> >
> > Added APIchanges and bumped minor for lavu and lavc.
> > Planning to push this in 2 days unless there are more comments.
> > All known issues have been addressed.
> >
> 
> Planning to push this tomorrow morning.

I did not approve your internal_ref hack, and I strongly object to it.

Not to mention you never even sent it to the ML.
Lynne May 26, 2023, 8:50 p.m. UTC | #48
May 26, 2023, 21:19 by anton@khirnov.net:

> Quoting Lynne (2023-05-26 19:52:24)
>
>> May 25, 2023, 02:31 by dev@lynne.ee:
>>
>> > May 22, 2023, 10:26 by dev@lynne.ee:
>> >
>> >> Planning on pushing this partially (no encoding) tomorrow unless there are more comments.
>> >> All known issues have been fixed, and if there are more issues, they can be found as users test it.
>> >>
>> >
>> > Added APIchanges and bumped minor for lavu and lavc.
>> > Planning to push this in 2 days unless there are more comments.
>> > All known issues have been addressed.
>> >
>>
>> Planning to push this tomorrow morning.
>>
>
> I did not approve your internal_ref hack, and I strongly object to it.
>

Glad to hear it. You didn't say anything, and I was starting to get worried.


> Not to mention you never even sent it to the ML.
>

You discussed this with me on IRC.
Everyone on the ML knows where my tree is and that this
is what is for review.
You'll have to find a better reason than this.

I could've played unfair, and pushed the patches without
bumping this. But I didn't.
You refuse to talk to me, despite the vast majority
of issues with my patchset being solvable in no more than 20 minutes
of talking about better ways to fix a problem. This is what is unfair.

You didn't even specify *why* you object to it, let alone a way
to fix this, despite knowing about it since Monday, discussing it,
me pinging you, and you not responding. This is not acceptable,
and easily confused with trolling.

Either propose a better way, or I will look for the opinion of others
without taking yours into consideration.
diff mbox series

Patch

From 317ef8322e0d9109e1109a4b06f38470176ec12a Mon Sep 17 00:00:00 2001
From: Niklas Haas <git@haasn.dev>
Date: Mon, 30 Jan 2023 14:18:34 +0100
Subject: [PATCH 75/97] avfilter/vf_libplacebo: forward queue locking
 primitives

For thread safety.
---
 libavfilter/vf_libplacebo.c | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/libavfilter/vf_libplacebo.c b/libavfilter/vf_libplacebo.c
index ba852de08d..d6afcdab0a 100644
--- a/libavfilter/vf_libplacebo.c
+++ b/libavfilter/vf_libplacebo.c
@@ -289,6 +289,11 @@  static int init_vulkan(AVFilterContext *avctx)
         .extensions     = hwctx->enabled_dev_extensions,
         .num_extensions = hwctx->nb_enabled_dev_extensions,
         .features       = &hwctx->device_features,
+#if PL_API_VER >= 201
+        .lock_queue     = (void (*)(void *, int, int)) hwctx->lock_queue,
+        .unlock_queue   = (void (*)(void *, int, int)) hwctx->unlock_queue,
+        .queue_ctx      = (void *) avhwctx,
+#endif
         .queue_graphics = {
             .index = hwctx->queue_family_index,
             .count = hwctx->nb_graphics_queues,
-- 
2.40.0