diff mbox series

[FFmpeg-devel,v2,3/3] hwcontext_vaapi: Use PRIME_2 memory type for modifiers.

Message ID 20201113231548.869559-3-bas@basnieuwenhuizen.nl
State New
Headers show
Series [FFmpeg-devel,v2,1/3] kmsgrab: Use invalid modifier if modifiers weren't used.
Related show

Checks

Context Check Description
andriy/x86_make success Make finished
andriy/x86_make_fate success Make fate finished
andriy/PPC64_make success Make finished
andriy/PPC64_make_fate success Make fate finished

Commit Message

Bas Nieuwenhuizen Nov. 13, 2020, 11:15 p.m. UTC
This way we can pass explicit modifiers in. Sometimes the
modifier matters for the number of memory planes that
libva accepts, in particular when dealing with
driver-compressed textures. Furthermore the driver might
not actually be able to determine the implicit modifier
if all the buffer-passing has used explicit modifier.
All these issues should be resolved by passing in the
modifier, and for that we switch to using the PRIME_2
memory type.

Tested with experimental radeonsi patches for modifiers
and kmsgrab. Also tested with radeonsi without the
patches to double-check it works without PRIME_2 support.

v2:
  Cache PRIME_2 support to avoid doing two calls every time on
  libva drivers that do not support it.
---
 libavutil/hwcontext_vaapi.c | 158 ++++++++++++++++++++++++++----------
 1 file changed, 115 insertions(+), 43 deletions(-)

Comments

Mark Thompson Jan. 13, 2021, 11:21 p.m. UTC | #1
On 13/11/2020 23:15, Bas Nieuwenhuizen wrote:
> This way we can pass explicit modifiers in. Sometimes the
> modifier matters for the number of memory planes that
> libva accepts, in particular when dealing with
> driver-compressed textures. Furthermore the driver might
> not actually be able to determine the implicit modifier
> if all the buffer-passing has used explicit modifier.
> All these issues should be resolved by passing in the
> modifier, and for that we switch to using the PRIME_2
> memory type.
> 
> Tested with experimental radeonsi patches for modifiers
> and kmsgrab. Also tested with radeonsi without the
> patches to double-check it works without PRIME_2 support.
> 
> v2:
>    Cache PRIME_2 support to avoid doing two calls every time on
>    libva drivers that do not support it.
> ---
>   libavutil/hwcontext_vaapi.c | 158 ++++++++++++++++++++++++++----------
>   1 file changed, 115 insertions(+), 43 deletions(-)
> 
> diff --git a/libavutil/hwcontext_vaapi.c b/libavutil/hwcontext_vaapi.c
> index 2227d6ed69..62b5a163ee 100644
> --- a/libavutil/hwcontext_vaapi.c
> +++ b/libavutil/hwcontext_vaapi.c
> @@ -79,6 +79,9 @@ typedef struct VAAPIFramesContext {
>       unsigned int rt_format;
>       // Whether vaDeriveImage works.
>       int derive_works;
> +    // Caches whether VA_SURFACE_ATTRIB_MEM_TYPE_DRM_PRIME_2 is unsupported for
> +    // surface imports.
> +    int prime_2_import_unsupported;
>   } VAAPIFramesContext;
>   
>   typedef struct VAAPIMapping {
> @@ -1022,6 +1025,7 @@ static void vaapi_unmap_from_drm(AVHWFramesContext *dst_fc,
>   static int vaapi_map_from_drm(AVHWFramesContext *src_fc, AVFrame *dst,
>                                 const AVFrame *src, int flags)
>   {
> +    VAAPIFramesContext     *src_vafc = src_fc->internal->priv;
>       AVHWFramesContext      *dst_fc =
>           (AVHWFramesContext*)dst->hw_frames_ctx->data;
>       AVVAAPIDeviceContext  *dst_dev = dst_fc->device_ctx->hwctx;
> @@ -1029,25 +1033,10 @@ static int vaapi_map_from_drm(AVHWFramesContext *src_fc, AVFrame *dst,
>       const VAAPIFormatDescriptor *format_desc;
>       VASurfaceID surface_id;
>       VAStatus vas;
> +    VAStatus prime2_vas = VA_STATUS_SUCCESS;
> +    int use_prime2;
>       uint32_t va_fourcc;
> -    int err, i, j, k;
> -
> -    unsigned long buffer_handle;
> -    VASurfaceAttribExternalBuffers buffer_desc;
> -    VASurfaceAttrib attrs[2] = {
> -        {
> -            .type  = VASurfaceAttribMemoryType,
> -            .flags = VA_SURFACE_ATTRIB_SETTABLE,
> -            .value.type    = VAGenericValueTypeInteger,
> -            .value.value.i = VA_SURFACE_ATTRIB_MEM_TYPE_DRM_PRIME,
> -        },
> -        {
> -            .type  = VASurfaceAttribExternalBufferDescriptor,
> -            .flags = VA_SURFACE_ATTRIB_SETTABLE,
> -            .value.type    = VAGenericValueTypePointer,
> -            .value.value.p = &buffer_desc,
> -        }
> -    };
> +    int err, i, j;
>   
>       desc = (AVDRMFrameDescriptor*)src->data[0];
>   
> @@ -1083,35 +1072,115 @@ static int vaapi_map_from_drm(AVHWFramesContext *src_fc, AVFrame *dst,
>       format_desc = vaapi_format_from_fourcc(va_fourcc);
>       av_assert0(format_desc);
>   
> -    buffer_handle = desc->objects[0].fd;
> -    buffer_desc.pixel_format = va_fourcc;
> -    buffer_desc.width        = src_fc->width;
> -    buffer_desc.height       = src_fc->height;
> -    buffer_desc.data_size    = desc->objects[0].size;
> -    buffer_desc.buffers      = &buffer_handle;
> -    buffer_desc.num_buffers  = 1;
> -    buffer_desc.flags        = 0;
> -
> -    k = 0;
> -    for (i = 0; i < desc->nb_layers; i++) {
> -        for (j = 0; j < desc->layers[i].nb_planes; j++) {
> -            buffer_desc.pitches[k] = desc->layers[i].planes[j].pitch;
> -            buffer_desc.offsets[k] = desc->layers[i].planes[j].offset;
> -            ++k;
> +    use_prime2 = !src_vafc->prime_2_import_unsupported &&
> +                 desc->objects[0].format_modifier != DRM_FORMAT_MOD_INVALID;
> +    if (use_prime2) {
> +        VADRMPRIMESurfaceDescriptor prime_desc;
> +        VASurfaceAttrib prime_attrs[2] = {
> +            {
> +                .type  = VASurfaceAttribMemoryType,
> +                .flags = VA_SURFACE_ATTRIB_SETTABLE,
> +                .value.type    = VAGenericValueTypeInteger,
> +                .value.value.i = VA_SURFACE_ATTRIB_MEM_TYPE_DRM_PRIME_2,
> +            },
> +            {
> +                .type  = VASurfaceAttribExternalBufferDescriptor,
> +                .flags = VA_SURFACE_ATTRIB_SETTABLE,
> +                .value.type    = VAGenericValueTypePointer,
> +                .value.value.p = &prime_desc,
> +            }
> +        };
> +        prime_desc.fourcc = va_fourcc;
> +        prime_desc.width = src_fc->width;
> +        prime_desc.height = src_fc->height;
> +        prime_desc.num_objects = desc->nb_objects;
> +        for (i = 0; i < desc->nb_objects; ++i) {
> +            prime_desc.objects[i].fd = desc->objects[i].fd;
> +            prime_desc.objects[i].size = desc->objects[i].size;
> +            prime_desc.objects[i].drm_format_modifier =
> +                    desc->objects[i].format_modifier;
> +        }
> +
> +        prime_desc.num_layers = desc->nb_layers;
> +        for (i = 0; i < desc->nb_layers; ++i) {
> +            prime_desc.layers[i].drm_format = desc->layers[i].format;
> +            prime_desc.layers[i].num_planes = desc->layers[i].nb_planes;
> +            for (j = 0; j < desc->layers[i].nb_planes; ++j) {
> +                prime_desc.layers[i].object_index[j] =
> +                        desc->layers[i].planes[j].object_index;
> +                prime_desc.layers[i].offset[j] = desc->layers[i].planes[j].offset;
> +                prime_desc.layers[i].pitch[j] = desc->layers[i].planes[j].pitch;
> +            }
> +
> +            if (format_desc->chroma_planes_swapped &&
> +                desc->layers[i].nb_planes == 3) {
> +                FFSWAP(uint32_t, prime_desc.layers[i].pitch[1],
> +                    prime_desc.layers[i].pitch[2]);
> +                FFSWAP(uint32_t, prime_desc.layers[i].offset[1],
> +                    prime_desc.layers[i].offset[2]);
> +            }
>           }
> -    }
> -    buffer_desc.num_planes = k;
>   
> -    if (format_desc->chroma_planes_swapped &&
> -        buffer_desc.num_planes == 3) {
> -        FFSWAP(uint32_t, buffer_desc.pitches[1], buffer_desc.pitches[2]);
> -        FFSWAP(uint32_t, buffer_desc.offsets[1], buffer_desc.offsets[2]);
> +        /*
> +        * We can query for PRIME_2 support with vaQuerySurfaceAttributes, but that
> +        * that needs the config_id which we don't have here . Both Intel and
> +        * Gallium seem to do the correct error checks, so lets just try the
> +        * PRIME_2 import first.
> +        */
> +        prime2_vas = vaCreateSurfaces(dst_dev->display, format_desc->rt_format,
> +                                      src->width, src->height, &surface_id, 1,
> +                                      prime_attrs, FF_ARRAY_ELEMS(prime_attrs));
>       }
>   
> -    vas = vaCreateSurfaces(dst_dev->display, format_desc->rt_format,
> -                           src->width, src->height,
> -                           &surface_id, 1,
> -                           attrs, FF_ARRAY_ELEMS(attrs));
> +    if (prime2_vas != VA_STATUS_SUCCESS || !use_prime2) {
> +        int k;
> +        unsigned long buffer_handle;
> +        VASurfaceAttribExternalBuffers buffer_desc;
> +        VASurfaceAttrib buffer_attrs[2] = {
> +            {
> +                .type  = VASurfaceAttribMemoryType,
> +                .flags = VA_SURFACE_ATTRIB_SETTABLE,
> +                .value.type    = VAGenericValueTypeInteger,
> +                .value.value.i = VA_SURFACE_ATTRIB_MEM_TYPE_DRM_PRIME,
> +            },
> +            {
> +                .type  = VASurfaceAttribExternalBufferDescriptor,
> +                .flags = VA_SURFACE_ATTRIB_SETTABLE,
> +                .value.type    = VAGenericValueTypePointer,
> +                .value.value.p = &buffer_desc,
> +            }
> +        };
> +
> +        buffer_handle = desc->objects[0].fd;
> +        buffer_desc.pixel_format = va_fourcc;
> +        buffer_desc.width        = src_fc->width;
> +        buffer_desc.height       = src_fc->height;
> +        buffer_desc.data_size    = desc->objects[0].size;
> +        buffer_desc.buffers      = &buffer_handle;
> +        buffer_desc.num_buffers  = 1;
> +        buffer_desc.flags        = 0;
> +
> +        k = 0;
> +        for (i = 0; i < desc->nb_layers; i++) {
> +            for (j = 0; j < desc->layers[i].nb_planes; j++) {
> +                buffer_desc.pitches[k] = desc->layers[i].planes[j].pitch;
> +                buffer_desc.offsets[k] = desc->layers[i].planes[j].offset;
> +                ++k;
> +            }
> +        }
> +        buffer_desc.num_planes = k;
> +
> +        if (format_desc->chroma_planes_swapped &&
> +            buffer_desc.num_planes == 3) {
> +            FFSWAP(uint32_t, buffer_desc.pitches[1], buffer_desc.pitches[2]);
> +            FFSWAP(uint32_t, buffer_desc.offsets[1], buffer_desc.offsets[2]);
> +        }
> +
> +        vas = vaCreateSurfaces(dst_dev->display, format_desc->rt_format,
> +                               src->width, src->height,
> +                               &surface_id, 1,
> +                               buffer_attrs, FF_ARRAY_ELEMS(buffer_attrs));
> +    }
>       if (vas != VA_STATUS_SUCCESS) {

The status variables are confused here - vas is uninitialised if PRIME_2 import succeeded, giving random error results.

Maybe it would be cleaner to use vas everywhere ...

>           av_log(dst_fc, AV_LOG_ERROR, "Failed to create surface from DRM "
>                  "object: %d (%s).\n", vas, vaErrorStr(vas));
> @@ -1119,6 +1188,9 @@ static int vaapi_map_from_drm(AVHWFramesContext *src_fc, AVFrame *dst,
>       }
>       av_log(dst_fc, AV_LOG_DEBUG, "Create surface %#x.\n", surface_id);
>   
> +    if (prime2_vas != VA_STATUS_SUCCESS)
> +        src_vafc->prime_2_import_unsupported = 1;

... while moving this test to immediately after the PRIME_2 vaCreateSurfaces() call.

> +
>       err = ff_hwframe_map_create(dst->hw_frames_ctx, dst, src,
>                                   &vaapi_unmap_from_drm,
>                                   (void*)(uintptr_t)surface_id);
> 

Thanks,

- Mark
diff mbox series

Patch

diff --git a/libavutil/hwcontext_vaapi.c b/libavutil/hwcontext_vaapi.c
index 2227d6ed69..62b5a163ee 100644
--- a/libavutil/hwcontext_vaapi.c
+++ b/libavutil/hwcontext_vaapi.c
@@ -79,6 +79,9 @@  typedef struct VAAPIFramesContext {
     unsigned int rt_format;
     // Whether vaDeriveImage works.
     int derive_works;
+    // Caches whether VA_SURFACE_ATTRIB_MEM_TYPE_DRM_PRIME_2 is unsupported for
+    // surface imports.
+    int prime_2_import_unsupported;
 } VAAPIFramesContext;
 
 typedef struct VAAPIMapping {
@@ -1022,6 +1025,7 @@  static void vaapi_unmap_from_drm(AVHWFramesContext *dst_fc,
 static int vaapi_map_from_drm(AVHWFramesContext *src_fc, AVFrame *dst,
                               const AVFrame *src, int flags)
 {
+    VAAPIFramesContext     *src_vafc = src_fc->internal->priv;
     AVHWFramesContext      *dst_fc =
         (AVHWFramesContext*)dst->hw_frames_ctx->data;
     AVVAAPIDeviceContext  *dst_dev = dst_fc->device_ctx->hwctx;
@@ -1029,25 +1033,10 @@  static int vaapi_map_from_drm(AVHWFramesContext *src_fc, AVFrame *dst,
     const VAAPIFormatDescriptor *format_desc;
     VASurfaceID surface_id;
     VAStatus vas;
+    VAStatus prime2_vas = VA_STATUS_SUCCESS;
+    int use_prime2;
     uint32_t va_fourcc;
-    int err, i, j, k;
-
-    unsigned long buffer_handle;
-    VASurfaceAttribExternalBuffers buffer_desc;
-    VASurfaceAttrib attrs[2] = {
-        {
-            .type  = VASurfaceAttribMemoryType,
-            .flags = VA_SURFACE_ATTRIB_SETTABLE,
-            .value.type    = VAGenericValueTypeInteger,
-            .value.value.i = VA_SURFACE_ATTRIB_MEM_TYPE_DRM_PRIME,
-        },
-        {
-            .type  = VASurfaceAttribExternalBufferDescriptor,
-            .flags = VA_SURFACE_ATTRIB_SETTABLE,
-            .value.type    = VAGenericValueTypePointer,
-            .value.value.p = &buffer_desc,
-        }
-    };
+    int err, i, j;
 
     desc = (AVDRMFrameDescriptor*)src->data[0];
 
@@ -1083,35 +1072,115 @@  static int vaapi_map_from_drm(AVHWFramesContext *src_fc, AVFrame *dst,
     format_desc = vaapi_format_from_fourcc(va_fourcc);
     av_assert0(format_desc);
 
-    buffer_handle = desc->objects[0].fd;
-    buffer_desc.pixel_format = va_fourcc;
-    buffer_desc.width        = src_fc->width;
-    buffer_desc.height       = src_fc->height;
-    buffer_desc.data_size    = desc->objects[0].size;
-    buffer_desc.buffers      = &buffer_handle;
-    buffer_desc.num_buffers  = 1;
-    buffer_desc.flags        = 0;
-
-    k = 0;
-    for (i = 0; i < desc->nb_layers; i++) {
-        for (j = 0; j < desc->layers[i].nb_planes; j++) {
-            buffer_desc.pitches[k] = desc->layers[i].planes[j].pitch;
-            buffer_desc.offsets[k] = desc->layers[i].planes[j].offset;
-            ++k;
+    use_prime2 = !src_vafc->prime_2_import_unsupported &&
+                 desc->objects[0].format_modifier != DRM_FORMAT_MOD_INVALID;
+    if (use_prime2) {
+        VADRMPRIMESurfaceDescriptor prime_desc;
+        VASurfaceAttrib prime_attrs[2] = {
+            {
+                .type  = VASurfaceAttribMemoryType,
+                .flags = VA_SURFACE_ATTRIB_SETTABLE,
+                .value.type    = VAGenericValueTypeInteger,
+                .value.value.i = VA_SURFACE_ATTRIB_MEM_TYPE_DRM_PRIME_2,
+            },
+            {
+                .type  = VASurfaceAttribExternalBufferDescriptor,
+                .flags = VA_SURFACE_ATTRIB_SETTABLE,
+                .value.type    = VAGenericValueTypePointer,
+                .value.value.p = &prime_desc,
+            }
+        };
+        prime_desc.fourcc = va_fourcc;
+        prime_desc.width = src_fc->width;
+        prime_desc.height = src_fc->height;
+        prime_desc.num_objects = desc->nb_objects;
+        for (i = 0; i < desc->nb_objects; ++i) {
+            prime_desc.objects[i].fd = desc->objects[i].fd;
+            prime_desc.objects[i].size = desc->objects[i].size;
+            prime_desc.objects[i].drm_format_modifier =
+                    desc->objects[i].format_modifier;
+        }
+
+        prime_desc.num_layers = desc->nb_layers;
+        for (i = 0; i < desc->nb_layers; ++i) {
+            prime_desc.layers[i].drm_format = desc->layers[i].format;
+            prime_desc.layers[i].num_planes = desc->layers[i].nb_planes;
+            for (j = 0; j < desc->layers[i].nb_planes; ++j) {
+                prime_desc.layers[i].object_index[j] =
+                        desc->layers[i].planes[j].object_index;
+                prime_desc.layers[i].offset[j] = desc->layers[i].planes[j].offset;
+                prime_desc.layers[i].pitch[j] = desc->layers[i].planes[j].pitch;
+            }
+
+            if (format_desc->chroma_planes_swapped &&
+                desc->layers[i].nb_planes == 3) {
+                FFSWAP(uint32_t, prime_desc.layers[i].pitch[1],
+                    prime_desc.layers[i].pitch[2]);
+                FFSWAP(uint32_t, prime_desc.layers[i].offset[1],
+                    prime_desc.layers[i].offset[2]);
+            }
         }
-    }
-    buffer_desc.num_planes = k;
 
-    if (format_desc->chroma_planes_swapped &&
-        buffer_desc.num_planes == 3) {
-        FFSWAP(uint32_t, buffer_desc.pitches[1], buffer_desc.pitches[2]);
-        FFSWAP(uint32_t, buffer_desc.offsets[1], buffer_desc.offsets[2]);
+        /*
+        * We can query for PRIME_2 support with vaQuerySurfaceAttributes, but that
+        * that needs the config_id which we don't have here . Both Intel and
+        * Gallium seem to do the correct error checks, so lets just try the
+        * PRIME_2 import first.
+        */
+        prime2_vas = vaCreateSurfaces(dst_dev->display, format_desc->rt_format,
+                                      src->width, src->height, &surface_id, 1,
+                                      prime_attrs, FF_ARRAY_ELEMS(prime_attrs));
     }
 
-    vas = vaCreateSurfaces(dst_dev->display, format_desc->rt_format,
-                           src->width, src->height,
-                           &surface_id, 1,
-                           attrs, FF_ARRAY_ELEMS(attrs));
+    if (prime2_vas != VA_STATUS_SUCCESS || !use_prime2) {
+        int k;
+        unsigned long buffer_handle;
+        VASurfaceAttribExternalBuffers buffer_desc;
+        VASurfaceAttrib buffer_attrs[2] = {
+            {
+                .type  = VASurfaceAttribMemoryType,
+                .flags = VA_SURFACE_ATTRIB_SETTABLE,
+                .value.type    = VAGenericValueTypeInteger,
+                .value.value.i = VA_SURFACE_ATTRIB_MEM_TYPE_DRM_PRIME,
+            },
+            {
+                .type  = VASurfaceAttribExternalBufferDescriptor,
+                .flags = VA_SURFACE_ATTRIB_SETTABLE,
+                .value.type    = VAGenericValueTypePointer,
+                .value.value.p = &buffer_desc,
+            }
+        };
+
+        buffer_handle = desc->objects[0].fd;
+        buffer_desc.pixel_format = va_fourcc;
+        buffer_desc.width        = src_fc->width;
+        buffer_desc.height       = src_fc->height;
+        buffer_desc.data_size    = desc->objects[0].size;
+        buffer_desc.buffers      = &buffer_handle;
+        buffer_desc.num_buffers  = 1;
+        buffer_desc.flags        = 0;
+
+        k = 0;
+        for (i = 0; i < desc->nb_layers; i++) {
+            for (j = 0; j < desc->layers[i].nb_planes; j++) {
+                buffer_desc.pitches[k] = desc->layers[i].planes[j].pitch;
+                buffer_desc.offsets[k] = desc->layers[i].planes[j].offset;
+                ++k;
+            }
+        }
+        buffer_desc.num_planes = k;
+
+        if (format_desc->chroma_planes_swapped &&
+            buffer_desc.num_planes == 3) {
+            FFSWAP(uint32_t, buffer_desc.pitches[1], buffer_desc.pitches[2]);
+            FFSWAP(uint32_t, buffer_desc.offsets[1], buffer_desc.offsets[2]);
+        }
+
+        vas = vaCreateSurfaces(dst_dev->display, format_desc->rt_format,
+                               src->width, src->height,
+                               &surface_id, 1,
+                               buffer_attrs, FF_ARRAY_ELEMS(buffer_attrs));
+    }
     if (vas != VA_STATUS_SUCCESS) {
         av_log(dst_fc, AV_LOG_ERROR, "Failed to create surface from DRM "
                "object: %d (%s).\n", vas, vaErrorStr(vas));
@@ -1119,6 +1188,9 @@  static int vaapi_map_from_drm(AVHWFramesContext *src_fc, AVFrame *dst,
     }
     av_log(dst_fc, AV_LOG_DEBUG, "Create surface %#x.\n", surface_id);
 
+    if (prime2_vas != VA_STATUS_SUCCESS)
+        src_vafc->prime_2_import_unsupported = 1;
+
     err = ff_hwframe_map_create(dst->hw_frames_ctx, dst, src,
                                 &vaapi_unmap_from_drm,
                                 (void*)(uintptr_t)surface_id);