diff mbox

[FFmpeg-devel] CUVID: Allow to set number of used surfaces for decoding (resend)

Message ID 5832E40B.7010803@email.cz
State Superseded
Headers show

Commit Message

Miroslav Slugeň Nov. 21, 2016, 12:09 p.m. UTC
We are using more decoding threads at once, so it is sometime useful to 
lower number of surfaces used for decoding, it could lower GPU memory usage.

Comments

Timo Rothenpieler Nov. 21, 2016, 12:29 p.m. UTC | #1
Does not compile:

libavcodec/cuvid.c:861:19: error: 'CuvidContext' has no member named
'surfaces'
 #define OFFSET(x) offsetof(CuvidContext, x)
diff mbox

Patch

>From 30ddf173c87a9da16ba4725f8beea67d8fa6f537 Mon Sep 17 00:00:00 2001
From: Miroslav Slugen <thunder.m@email.cz>
Date: Mon, 21 Nov 2016 10:51:25 +0100
Subject: [PATCH] CUVID: Allow to set number of used surfaces for decoding

---
 libavcodec/cuvid.c | 16 ++++++++--------
 1 file changed, 8 insertions(+), 8 deletions(-)

diff --git a/libavcodec/cuvid.c b/libavcodec/cuvid.c
index eafce0a..fbe32fb 100644
--- a/libavcodec/cuvid.c
+++ b/libavcodec/cuvid.c
@@ -32,8 +32,6 @@ 
 
 #include "compat/cuda/nvcuvid.h"
 
-#define MAX_FRAME_COUNT 25
-
 typedef struct CuvidContext
 {
     AVClass *avclass;
@@ -42,6 +40,7 @@  typedef struct CuvidContext
     CUvideoparser cuparser;
 
     char *cu_gpu;
+    int nb_surfaces;
 
     AVBufferRef *hwdevice;
     AVBufferRef *hwframe;
@@ -185,7 +184,7 @@  static int CUDAAPI cuvid_handle_video_sequence(void *opaque, CUVIDEOFORMAT* form
     cuinfo.target_rect.right = cuinfo.ulWidth;
     cuinfo.target_rect.bottom = cuinfo.ulHeight;
 
-    cuinfo.ulNumDecodeSurfaces = MAX_FRAME_COUNT;
+    cuinfo.ulNumDecodeSurfaces = ctx->nb_surfaces;
     cuinfo.ulNumOutputSurfaces = 1;
     cuinfo.ulCreationFlags = cudaVideoCreate_PreferCUVID;
     cuinfo.bitDepthMinus8 = format->bit_depth_luma_minus8;
@@ -268,7 +267,7 @@  static int cuvid_decode_packet(AVCodecContext *avctx, const AVPacket *avpkt)
     if (is_flush && avpkt && avpkt->size)
         return AVERROR_EOF;
 
-    if (av_fifo_size(ctx->frame_queue) / sizeof(CuvidParsedFrame) > MAX_FRAME_COUNT - 2 && avpkt && avpkt->size)
+    if ((av_fifo_size(ctx->frame_queue) / sizeof(CuvidParsedFrame)) + 2 > ctx->nb_surfaces && avpkt && avpkt->size)
         return AVERROR(EAGAIN);
 
     if (ctx->bsf && avpkt && avpkt->size) {
@@ -576,7 +575,7 @@  static int cuvid_test_dummy_decoder(AVCodecContext *avctx, CUVIDPARSERPARAMS *cu
     cuinfo.target_rect.right = cuinfo.ulWidth;
     cuinfo.target_rect.bottom = cuinfo.ulHeight;
 
-    cuinfo.ulNumDecodeSurfaces = MAX_FRAME_COUNT;
+    cuinfo.ulNumDecodeSurfaces = ctx->nb_surfaces;
     cuinfo.ulNumOutputSurfaces = 1;
     cuinfo.ulCreationFlags = cudaVideoCreate_PreferCUVID;
     cuinfo.bitDepthMinus8 = 0;
@@ -616,7 +615,7 @@  static av_cold int cuvid_decode_init(AVCodecContext *avctx)
         return ret;
     }
 
-    ctx->frame_queue = av_fifo_alloc(MAX_FRAME_COUNT * sizeof(CuvidParsedFrame));
+    ctx->frame_queue = av_fifo_alloc(ctx->nb_surfaces * sizeof(CuvidParsedFrame));
     if (!ctx->frame_queue) {
         ret = AVERROR(ENOMEM);
         goto error;
@@ -743,7 +742,7 @@  static av_cold int cuvid_decode_init(AVCodecContext *avctx)
                FFMIN(sizeof(ctx->cuparse_ext.raw_seqhdr_data), avctx->extradata_size));
     }
 
-    ctx->cuparseinfo.ulMaxNumDecodeSurfaces = MAX_FRAME_COUNT;
+    ctx->cuparseinfo.ulMaxNumDecodeSurfaces = ctx->nb_surfaces;
     ctx->cuparseinfo.ulMaxDisplayDelay = 4;
     ctx->cuparseinfo.pUserData = avctx;
     ctx->cuparseinfo.pfnSequenceCallback = cuvid_handle_video_sequence;
@@ -802,7 +801,7 @@  static void cuvid_flush(AVCodecContext *avctx)
 
     av_fifo_freep(&ctx->frame_queue);
 
-    ctx->frame_queue = av_fifo_alloc(MAX_FRAME_COUNT * sizeof(CuvidParsedFrame));
+    ctx->frame_queue = av_fifo_alloc(ctx->nb_surfaces * sizeof(CuvidParsedFrame));
     if (!ctx->frame_queue) {
         av_log(avctx, AV_LOG_ERROR, "Failed to recreate frame queue on flush\n");
         return;
@@ -851,6 +850,7 @@  static const AVOption options[] = {
     { "bob",      "Bob deinterlacing",                       0, AV_OPT_TYPE_CONST, { .i64 = cudaVideoDeinterlaceMode_Bob      }, 0, 0, VD, "deint" },
     { "adaptive", "Adaptive deinterlacing",                  0, AV_OPT_TYPE_CONST, { .i64 = cudaVideoDeinterlaceMode_Adaptive }, 0, 0, VD, "deint" },
     { "gpu",      "GPU to be used for decoding", OFFSET(cu_gpu), AV_OPT_TYPE_STRING, { .str = NULL }, 0, 0, VD },
+    { "surfaces", "Maximum surfaces to be used for decoding", OFFSET(surfaces), AV_OPT_TYPE_INT, { .i64 = 25 }, 0, INT_MAX, VD },
     { NULL }
 };
 
-- 
2.1.4