diff mbox series

[FFmpeg-devel] set ulMaxDisplayDelay cuviddec parser option to zero if low_delay flag is on

Message ID 20210222180546.136572-1-clime7@gmail.com
State New
Headers show
Series [FFmpeg-devel] set ulMaxDisplayDelay cuviddec parser option to zero if low_delay flag is on
Related show

Checks

Context Check Description
andriy/x86_make success Make finished
andriy/x86_make_fate success Make fate finished
andriy/PPC64_make success Make finished
andriy/PPC64_make_fate success Make fate finished

Commit Message

clime Feb. 22, 2021, 6:05 p.m. UTC
From: Michal Novotny <michal.novotny@comprimato.com>

* zero is recommended value in Nvidia coding samples for low latency use-cases

Signed-off-by: Michal Novotny <michal.novotny@comprimato.com>
---
 libavcodec/cuviddec.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

Comments

clime March 16, 2021, 5:15 p.m. UTC | #1
Some measurements:

cpu:  Intel(R) Xeon(R) Silver 4210R CPU @ 2.40GHz
gpu: Quadro RTX 4000

GPU H264 DECODING
=================

NOTE:
- first column is max floating average over past 10s
- second column is overall maximum during measurement (at least 1 min)
- measure for custom app using new ffmpeg api, a very few ms could maybe
be trimmed off by more efficient implementation
- 1x LOW DELAY OFF is there for comparison

1x LOW DELAY ON
---------------
h264_1920_1080_420_8b_120p_bcount:0_crf:14.ts 14ms 26ms
h264_1920_1080_420_8b_120p_bcount:2_crf:14.ts 28ms 69ms
h264_1920_1080_420_8b_120p_bcount:4_crf:14.ts 28ms 78ms

h264_1920_1080_420_8b_30p_bcount:0_crf:14.ts 41ms  53ms
h264_1920_1080_420_8b_30p_bcount:2_crf:14.ts 105ms 244ms
h264_1920_1080_420_8b_30p_bcount:4_crf:14.ts 105ms 280ms

h264_1920_1080_420_8b_60p_bcount:0_crf:14.ts 23ms 36ms
h264_1920_1080_420_8b_60p_bcount:2_crf:14.ts 53ms 108ms
h264_1920_1080_420_8b_60p_bcount:4_crf:14.ts 54ms 129ms

h264_2880_1620_420_8b_60p_bcount:0_crf:14.ts 27ms 41ms
h264_2880_1620_420_8b_60p_bcount:2_crf:14.ts 55ms 111ms
h264_2880_1620_420_8b_60p_bcount:4_crf:14.ts 55ms 131ms

h264_2880_1620_420_8b_120p_bcount:0_crf:14.ts 20ms 46ms
h264_2880_1620_420_8b_120p_bcount:2_crf:14.ts 30ms 70ms
h264_2880_1620_420_8b_120p_bcount:4_crf:14.ts 31ms 79ms

1x LOW DELAY OFF
----------------
h264_1920_1080_420_8b_120p_bcount:0_crf:14.ts 36ms 48ms
h264_1920_1080_420_8b_120p_bcount:2_crf:14.ts 44ms 85ms
h264_1920_1080_420_8b_120p_bcount:4_crf:14.ts 53ms 103ms

h264_1920_1080_420_8b_30p_bcount:0_crf:14.ts 137ms 151ms
h264_1920_1080_420_8b_30p_bcount:2_crf:14.ts 172ms 309ms
h264_1920_1080_420_8b_30p_bcount:4_crf:14.ts 206ms 380ms

h264_1920_1080_420_8b_60p_bcount:0_crf:14.ts 69ms  82ms
h264_1920_1080_420_8b_60p_bcount:2_crf:14.ts 86ms  162ms
h264_1920_1080_420_8b_60p_bcount:4_crf:14.ts 104ms 210ms

h264_2880_1620_420_8b_60p_bcount:0_crf:14.ts 71ms  86ms
h264_2880_1620_420_8b_60p_bcount:2_crf:14.ts 88ms  161ms
h264_2880_1620_420_8b_60p_bcount:4_crf:14.ts 105ms 212ms

h264_2880_1620_420_8b_120p_bcount:0_crf:14.ts 38ms 52ms
h264_2880_1620_420_8b_120p_bcount:2_crf:14.ts 46ms 85ms
h264_2880_1620_420_8b_120p_bcount:4_crf:14.ts 54ms 106ms

3x LOW DELAY ON
---------------
(nvdec: 40% load)
h264_1920_1080_420_8b_120p_bcount:0_crf:14.ts 14ms 31ms
h264_1920_1080_420_8b_120p_bcount:2_crf:14.ts 27ms 67ms
h264_1920_1080_420_8b_120p_bcount:4_crf:14.ts 28ms 77ms

(nvdec: 24% load)
h264_1920_1080_420_8b_30p_bcount:0_crf:14.ts 41ms  58ms
h264_1920_1080_420_8b_30p_bcount:2_crf:14.ts 105ms 246ms
h264_1920_1080_420_8b_30p_bcount:4_crf:14.ts 105ms 277ms

(nvdec: 35% load)
h264_1920_1080_420_8b_60p_bcount:0_crf:14.ts 23ms 35ms
h264_1920_1080_420_8b_60p_bcount:2_crf:14.ts 53ms 109ms
h264_1920_1080_420_8b_60p_bcount:4_crf:14.ts 53ms 130ms

(nvdec: 36% load)
h264_2880_1620_420_8b_60p_bcount:0_crf:14.ts 26ms 44ms
h264_2880_1620_420_8b_60p_bcount:2_crf:14.ts 54ms 112ms
h264_2880_1620_420_8b_60p_bcount:4_crf:14.ts 55ms 133ms

(nvdec: 62% load)
h264_2880_1620_420_8b_120p_bcount:0_crf:14.ts 18ms 43ms
h264_2880_1620_420_8b_120p_bcount:2_crf:14.ts 30ms 71ms
h264_2880_1620_420_8b_120p_bcount:4_crf:14.ts 31ms 80ms

7x LOW DELAY ON
---------------
(nvdec: 70% load)
h264_1920_1080_420_8b_120p_bcount:0_crf:14.ts 15ms 40ms
h264_1920_1080_420_8b_120p_bcount:2_crf:14.ts 28ms 69ms
h264_1920_1080_420_8b_120p_bcount:4_crf:14.ts 29ms 78ms

11x LOW DELAY ON
----------------
(nvdec: 65% load)
h264_1920_1080_420_8b_60p_bcount:0_crf:14.ts 23ms 45ms
h264_1920_1080_420_8b_60p_bcount:2_crf:14.ts 53ms 108ms
h264_1920_1080_420_8b_60p_bcount:4_crf:14.ts 54ms 131ms

6x LOW DELAY ON
---------------
(nvdec: 70% load)
h264_2880_1620_420_8b_60p_bcount:0_crf:14.ts 28ms 53ms
h264_2880_1620_420_8b_60p_bcount:2_crf:14.ts 55ms 112ms
h264_2880_1620_420_8b_60p_bcount:4_crf:14.ts 55ms 134ms

4x LOW DELAY ON
---------------
(nvdec: 78% load)
h264_2880_1620_420_8b_120p_bcount:0_crf:14.ts 24ms 58ms
h264_2880_1620_420_8b_120p_bcount:2_crf:14.ts 31ms 70ms
h264_2880_1620_420_8b_120p_bcount:4_crf:14.ts 33ms 79ms


Sample bitrates for bcount:0 inputs:
h264_1920_1080_420_8b_120p_bcount:0_crf:14.ts: 89890 kb/s
h264_1920_1080_420_8b_30p_bcount:0_crf:14.ts: 66513 kb/s
h264_1920_1080_420_8b_60p_bcount:0_crf:14.ts: 85399 kb/s
h264_2880_1620_420_8b_60p_bcount:0_crf:14.ts: 138562 kb/s
h264_2880_1620_420_8b_120p_bcount:0_crf:14.ts: 149715 kb/s

Best regards
Michal Novotny

On Mon, 22 Feb 2021 at 18:06, clime <clime7@gmail.com> wrote:
>
> From: Michal Novotny <michal.novotny@comprimato.com>
>
> * zero is recommended value in Nvidia coding samples for low latency use-cases
>
> Signed-off-by: Michal Novotny <michal.novotny@comprimato.com>
> ---
>  libavcodec/cuviddec.c | 2 +-
>  1 file changed, 1 insertion(+), 1 deletion(-)
>
> diff --git a/libavcodec/cuviddec.c b/libavcodec/cuviddec.c
> index 49775b5a09..ec57afdefe 100644
> --- a/libavcodec/cuviddec.c
> +++ b/libavcodec/cuviddec.c
> @@ -999,7 +999,7 @@ static av_cold int cuvid_decode_init(AVCodecContext *avctx)
>      }
>
>      ctx->cuparseinfo.ulMaxNumDecodeSurfaces = ctx->nb_surfaces;
> -    ctx->cuparseinfo.ulMaxDisplayDelay = 4;
> +    ctx->cuparseinfo.ulMaxDisplayDelay = (avctx->flags & AV_CODEC_FLAG_LOW_DELAY) ? 0 : 4;
>      ctx->cuparseinfo.pUserData = avctx;
>      ctx->cuparseinfo.pfnSequenceCallback = cuvid_handle_video_sequence;
>      ctx->cuparseinfo.pfnDecodePicture = cuvid_handle_picture_decode;
> --
> 2.30.1
>
diff mbox series

Patch

diff --git a/libavcodec/cuviddec.c b/libavcodec/cuviddec.c
index 49775b5a09..ec57afdefe 100644
--- a/libavcodec/cuviddec.c
+++ b/libavcodec/cuviddec.c
@@ -999,7 +999,7 @@  static av_cold int cuvid_decode_init(AVCodecContext *avctx)
     }
 
     ctx->cuparseinfo.ulMaxNumDecodeSurfaces = ctx->nb_surfaces;
-    ctx->cuparseinfo.ulMaxDisplayDelay = 4;
+    ctx->cuparseinfo.ulMaxDisplayDelay = (avctx->flags & AV_CODEC_FLAG_LOW_DELAY) ? 0 : 4;
     ctx->cuparseinfo.pUserData = avctx;
     ctx->cuparseinfo.pfnSequenceCallback = cuvid_handle_video_sequence;
     ctx->cuparseinfo.pfnDecodePicture = cuvid_handle_picture_decode;