diff mbox series

[FFmpeg-devel,2/2] lavc/vaapi_av1: Avoid sending the same slice buffer multiple times

Message ID 20240428072650.168514-2-nowrep@gmail.com
State New
Headers show
Series [FFmpeg-devel,1/2] lavc/vaapi_decode: Make it possible to send multiple slice params buffers | expand

Checks

Context Check Description
yinshiyou/make_loongarch64 success Make finished
yinshiyou/make_fate_loongarch64 success Make fate finished

Commit Message

David Rosca April 28, 2024, 7:26 a.m. UTC
When there are multiple tiles in one slice buffer, use multiple slice
params to avoid sending the same slice buffer multiple times and thus
increasing the bitstream size the driver will need to upload to hw.
---
 libavcodec/vaapi_av1.c | 37 +++++++++++++++++++++++--------------
 1 file changed, 23 insertions(+), 14 deletions(-)

Comments

Mark Thompson May 7, 2024, 8:01 p.m. UTC | #1
On 28/04/2024 08:26, David Rosca wrote:
> When there are multiple tiles in one slice buffer, use multiple slice
> params to avoid sending the same slice buffer multiple times and thus
> increasing the bitstream size the driver will need to upload to hw.
> ---
>  libavcodec/vaapi_av1.c | 37 +++++++++++++++++++++++--------------
>  1 file changed, 23 insertions(+), 14 deletions(-)

Can you confirm that this works on at least iHD (Intel) and Mesa (AMD)?  (I don't expect any issue, but it's good to check in case of something strange going on matching up to what this was previously doing.)

> diff --git a/libavcodec/vaapi_av1.c b/libavcodec/vaapi_av1.c
> index 4a90db1e09..567f505fbd 100644
> --- a/libavcodec/vaapi_av1.c
> +++ b/libavcodec/vaapi_av1.c
> @@ -19,6 +19,7 @@
>   */
>  
>  #include "libavutil/frame.h"
> +#include "libavutil/mem.h"
>  #include "hwaccel_internal.h"
>  #include "vaapi_decode.h"
>  #include "internal.h"
> @@ -393,13 +394,17 @@ static int vaapi_av1_decode_slice(AVCodecContext *avctx,
>  {
>      const AV1DecContext *s = avctx->priv_data;
>      VAAPIDecodePicture *pic = s->cur_frame.hwaccel_picture_private;
> -    VASliceParameterBufferAV1 slice_param;
> -    int err = 0;
> +    VASliceParameterBufferAV1 *slice_params;
> +    int err = 0, nb_params = 0;

Remove the spurious initialisation on err?

>  
> -    for (int i = s->tg_start; i <= s->tg_end; i++) {
> -        memset(&slice_param, 0, sizeof(VASliceParameterBufferAV1));
> +    slice_params = av_calloc(s->tg_end - s->tg_start + 1, sizeof(*slice_params));

I suggest allocating this into VAAPIAV1DecContext to avoid the alloc/free on every call.  (Only reallocate if it needs to be bigger than the previous maximum.)

> +    if (!slice_params) {
> +        err = AVERROR(ENOMEM);
> +        goto fail;
> +    }
>  
> -        slice_param = (VASliceParameterBufferAV1) {
> +    for (int i = s->tg_start; i <= s->tg_end; i++) {
> +        slice_params[nb_params++] = (VASliceParameterBufferAV1) {
>              .slice_data_size   = s->tile_group_info[i].tile_size,
>              .slice_data_offset = s->tile_group_info[i].tile_offset,
>              .slice_data_flag   = VA_SLICE_DATA_FLAG_ALL,
> @@ -408,18 +413,22 @@ static int vaapi_av1_decode_slice(AVCodecContext *avctx,
>              .tg_start          = s->tg_start,
>              .tg_end            = s->tg_end,
>          };
> -
> -        err = ff_vaapi_decode_make_slice_buffer(avctx, pic, &slice_param, 1,
> -                                                sizeof(VASliceParameterBufferAV1),
> -                                                buffer,
> -                                                size);
> -        if (err) {
> -            ff_vaapi_decode_cancel(avctx, pic);
> -            return err;
> -        }
>      }
>  
> +    err = ff_vaapi_decode_make_slice_buffer(avctx, pic, slice_params, nb_params,
> +                                            sizeof(VASliceParameterBufferAV1),
> +                                            buffer,
> +                                            size);
> +    av_free(slice_params);
> +
> +    if (err)
> +        goto fail;
> +
>      return 0;
> +
> +fail:
> +    ff_vaapi_decode_cancel(avctx, pic);
> +    return err;
>  }
>  
>  const FFHWAccel ff_av1_vaapi_hwaccel = {

It's amusing that this quadratic behaviour was around for so long!  (I guess people don't use many tiles.)

Thanks,

- Mark
David Rosca May 8, 2024, 7:17 a.m. UTC | #2
On Tue, May 7, 2024 at 10:01 PM Mark Thompson <sw@jkqxz.net> wrote:
>
> On 28/04/2024 08:26, David Rosca wrote:
> > When there are multiple tiles in one slice buffer, use multiple slice
> > params to avoid sending the same slice buffer multiple times and thus
> > increasing the bitstream size the driver will need to upload to hw.
> > ---
> >  libavcodec/vaapi_av1.c | 37 +++++++++++++++++++++++--------------
> >  1 file changed, 23 insertions(+), 14 deletions(-)
>
> Can you confirm that this works on at least iHD (Intel) and Mesa (AMD)?  (I don't expect any issue, but it's good to check in case of something strange going on matching up to what this was previously doing.)

I've tested with Mesa and it works correctly. Gstreamer also does the same.

https://gitlab.freedesktop.org/gstreamer/gstreamer/-/blob/da35ed69164ba8c8599d337b00ba54074215f7e7/subprojects/gst-plugins-bad/sys/va/gstvaav1dec.c#L891-L912

>
> > diff --git a/libavcodec/vaapi_av1.c b/libavcodec/vaapi_av1.c
> > index 4a90db1e09..567f505fbd 100644
> > --- a/libavcodec/vaapi_av1.c
> > +++ b/libavcodec/vaapi_av1.c
> > @@ -19,6 +19,7 @@
> >   */
> >
> >  #include "libavutil/frame.h"
> > +#include "libavutil/mem.h"
> >  #include "hwaccel_internal.h"
> >  #include "vaapi_decode.h"
> >  #include "internal.h"
> > @@ -393,13 +394,17 @@ static int vaapi_av1_decode_slice(AVCodecContext *avctx,
> >  {
> >      const AV1DecContext *s = avctx->priv_data;
> >      VAAPIDecodePicture *pic = s->cur_frame.hwaccel_picture_private;
> > -    VASliceParameterBufferAV1 slice_param;
> > -    int err = 0;
> > +    VASliceParameterBufferAV1 *slice_params;
> > +    int err = 0, nb_params = 0;
>
> Remove the spurious initialisation on err?

Done in v2.

>
> >
> > -    for (int i = s->tg_start; i <= s->tg_end; i++) {
> > -        memset(&slice_param, 0, sizeof(VASliceParameterBufferAV1));
> > +    slice_params = av_calloc(s->tg_end - s->tg_start + 1, sizeof(*slice_params));
>
> I suggest allocating this into VAAPIAV1DecContext to avoid the alloc/free on every call.  (Only reallocate if it needs to be bigger than the previous maximum.)

Done in v2.

Thanks,
David

>
> > +    if (!slice_params) {
> > +        err = AVERROR(ENOMEM);
> > +        goto fail;
> > +    }
> >
> > -        slice_param = (VASliceParameterBufferAV1) {
> > +    for (int i = s->tg_start; i <= s->tg_end; i++) {
> > +        slice_params[nb_params++] = (VASliceParameterBufferAV1) {
> >              .slice_data_size   = s->tile_group_info[i].tile_size,
> >              .slice_data_offset = s->tile_group_info[i].tile_offset,
> >              .slice_data_flag   = VA_SLICE_DATA_FLAG_ALL,
> > @@ -408,18 +413,22 @@ static int vaapi_av1_decode_slice(AVCodecContext *avctx,
> >              .tg_start          = s->tg_start,
> >              .tg_end            = s->tg_end,
> >          };
> > -
> > -        err = ff_vaapi_decode_make_slice_buffer(avctx, pic, &slice_param, 1,
> > -                                                sizeof(VASliceParameterBufferAV1),
> > -                                                buffer,
> > -                                                size);
> > -        if (err) {
> > -            ff_vaapi_decode_cancel(avctx, pic);
> > -            return err;
> > -        }
> >      }
> >
> > +    err = ff_vaapi_decode_make_slice_buffer(avctx, pic, slice_params, nb_params,
> > +                                            sizeof(VASliceParameterBufferAV1),
> > +                                            buffer,
> > +                                            size);
> > +    av_free(slice_params);
> > +
> > +    if (err)
> > +        goto fail;
> > +
> >      return 0;
> > +
> > +fail:
> > +    ff_vaapi_decode_cancel(avctx, pic);
> > +    return err;
> >  }
> >
> >  const FFHWAccel ff_av1_vaapi_hwaccel = {
>
> It's amusing that this quadratic behaviour was around for so long!  (I guess people don't use many tiles.)
>
> Thanks,
>
> - Mark
> _______________________________________________
> ffmpeg-devel mailing list
> ffmpeg-devel@ffmpeg.org
> https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
>
> To unsubscribe, visit link above, or email
> ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".
diff mbox series

Patch

diff --git a/libavcodec/vaapi_av1.c b/libavcodec/vaapi_av1.c
index 4a90db1e09..567f505fbd 100644
--- a/libavcodec/vaapi_av1.c
+++ b/libavcodec/vaapi_av1.c
@@ -19,6 +19,7 @@ 
  */
 
 #include "libavutil/frame.h"
+#include "libavutil/mem.h"
 #include "hwaccel_internal.h"
 #include "vaapi_decode.h"
 #include "internal.h"
@@ -393,13 +394,17 @@  static int vaapi_av1_decode_slice(AVCodecContext *avctx,
 {
     const AV1DecContext *s = avctx->priv_data;
     VAAPIDecodePicture *pic = s->cur_frame.hwaccel_picture_private;
-    VASliceParameterBufferAV1 slice_param;
-    int err = 0;
+    VASliceParameterBufferAV1 *slice_params;
+    int err = 0, nb_params = 0;
 
-    for (int i = s->tg_start; i <= s->tg_end; i++) {
-        memset(&slice_param, 0, sizeof(VASliceParameterBufferAV1));
+    slice_params = av_calloc(s->tg_end - s->tg_start + 1, sizeof(*slice_params));
+    if (!slice_params) {
+        err = AVERROR(ENOMEM);
+        goto fail;
+    }
 
-        slice_param = (VASliceParameterBufferAV1) {
+    for (int i = s->tg_start; i <= s->tg_end; i++) {
+        slice_params[nb_params++] = (VASliceParameterBufferAV1) {
             .slice_data_size   = s->tile_group_info[i].tile_size,
             .slice_data_offset = s->tile_group_info[i].tile_offset,
             .slice_data_flag   = VA_SLICE_DATA_FLAG_ALL,
@@ -408,18 +413,22 @@  static int vaapi_av1_decode_slice(AVCodecContext *avctx,
             .tg_start          = s->tg_start,
             .tg_end            = s->tg_end,
         };
-
-        err = ff_vaapi_decode_make_slice_buffer(avctx, pic, &slice_param, 1,
-                                                sizeof(VASliceParameterBufferAV1),
-                                                buffer,
-                                                size);
-        if (err) {
-            ff_vaapi_decode_cancel(avctx, pic);
-            return err;
-        }
     }
 
+    err = ff_vaapi_decode_make_slice_buffer(avctx, pic, slice_params, nb_params,
+                                            sizeof(VASliceParameterBufferAV1),
+                                            buffer,
+                                            size);
+    av_free(slice_params);
+
+    if (err)
+        goto fail;
+
     return 0;
+
+fail:
+    ff_vaapi_decode_cancel(avctx, pic);
+    return err;
 }
 
 const FFHWAccel ff_av1_vaapi_hwaccel = {