[FFmpeg-devel,1/1] avcodec: NVENC improve debug and print hw capabilities

Submitted by Andreas Håkon on June 2, 2017, 1:19 p.m.

Details

Message ID hQHjEEwosKJxirODfujlgEgTvhWg4cYBCduIYEwAeoeTtWhickjxiqX635AvK_fUiZnfOj09wIMKJGoeAbWYBAqCbATXpjiTwavFZYcbkXw=@protonmail.com
State New
Headers show

Commit Message

Andreas Håkon June 2, 2017, 1:19 p.m.
Hi,

This patch focus in improved debug for NVENC.

Capabilities added:

- Prints HW capabilities of the device.

*** Note: based on work done by Jean-Paul Saman
*** http://mailman.videolan.org/pipermail/vlc-devel/2016-April/107005.html
*** However this updated version doesn’t crash if unknown capabilities exist.

- Describes the setup and indicates if the CUDA context is “global” or “local”

# global = with “-hwaccel cuvid”
# local = without “-hwaccel cuvid”

- Lists other parameters: Used Surfaces, active B-Frames, etc.

- Logs the use of two relevant structures: Surface Queues and Output Frames.

The objective of this patch is help developers and users.
For users, they can check the capabilities of the hardware where running FFmpeg.
For developers, it can assist to solve some troubles with “hwaccel cuvid”.
(see the bug described at http://trac.ffmpeg.org/ticket/6420)

As the code isn’t intrusive and is clean, it can be committed to the main branch.
I hope it helps!
A.H.
From 19366759f7dc77ec01e35761d0012aab00a278af Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Andreas=20H=C3=A5kon?= <andreas.hakon@protonmail.com>
Date: Fri, 2 Jun 2017 13:58:46 +0100
Subject: [PATCH 1/1] avcodec: NVENC improve debug and print hw capabilities

---
 libavcodec/nvenc.c |  112 ++++++++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 112 insertions(+)

Patch hide | download patch | download mbox

diff --git a/libavcodec/nvenc.c b/libavcodec/nvenc.c
index 160e642..682e868 100644
--- a/libavcodec/nvenc.c
+++ b/libavcodec/nvenc.c
@@ -240,11 +240,106 @@  static int nvenc_check_cap(AVCodecContext *avctx, NV_ENC_CAPS cap)
     return 0;
 }
 
+
+static const char *nvenc_decode_cap_int(uint32_t val)
+{
+    switch(val) {
+    case 0: return "not supported";
+    case 1: return "supported";
+    case 2: return "both";
+    default: return "unknown";
+    }
+}
+
+typedef struct NvencCabapilities {
+   NV_ENC_CAPS caps;
+    const char *descr;
+    const char * (*func)(uint32_t val);
+} NvencCapabilities;
+
+static const NvencCapabilities nvenc_capabilities[] = {
+        { NV_ENC_CAPS_NUM_MAX_BFRAMES, "Maximum number of B-frames", NULL },
+        { NV_ENC_CAPS_SUPPORTED_RATECONTROL_MODES, "Rate control modes", NULL },
+        { NV_ENC_CAPS_SUPPORT_FIELD_ENCODING, "Field mode encoding (interlaced)", nvenc_decode_cap_int },
+        { NV_ENC_CAPS_SUPPORT_MONOCHROME, "Monochrome encoding", nvenc_decode_cap_int },
+        { NV_ENC_CAPS_SUPPORT_FMO, "FMO", nvenc_decode_cap_int },
+        { NV_ENC_CAPS_SUPPORT_QPELMV, "Quarter pel motion estimation", nvenc_decode_cap_int },
+        { NV_ENC_CAPS_SUPPORT_BDIRECT_MODE, "BDirect modes", nvenc_decode_cap_int },
+        { NV_ENC_CAPS_SUPPORT_CABAC, "CABAC entropy coding mode", nvenc_decode_cap_int },
+        { NV_ENC_CAPS_SUPPORT_ADAPTIVE_TRANSFORM, "Adaptive transforms", nvenc_decode_cap_int },
+        { NV_ENC_CAPS_SUPPORT_RESERVED, "Reserved", NULL },
+        { NV_ENC_CAPS_NUM_MAX_TEMPORAL_LAYERS, "Temporal layers", nvenc_decode_cap_int },
+        { NV_ENC_CAPS_SUPPORT_HIERARCHICAL_PFRAMES, "Hierarchical P frames", nvenc_decode_cap_int },
+        { NV_ENC_CAPS_SUPPORT_HIERARCHICAL_BFRAMES, "Hierarchical B frames", nvenc_decode_cap_int },
+        { NV_ENC_CAPS_LEVEL_MAX, "Maximum encoding level", NULL },
+        { NV_ENC_CAPS_LEVEL_MIN, "Minimum encoding level", NULL },
+        { NV_ENC_CAPS_SEPARATE_COLOUR_PLANE, "Separate colour plane encoding", nvenc_decode_cap_int },
+        { NV_ENC_CAPS_WIDTH_MAX, "Maximum output width", NULL },
+        { NV_ENC_CAPS_HEIGHT_MAX, "Maximum output height", NULL },
+        { NV_ENC_CAPS_SUPPORT_TEMPORAL_SVC, "Temporal Scalability", nvenc_decode_cap_int },
+        { NV_ENC_CAPS_SUPPORT_DYN_RES_CHANGE, "Dynamic encode resolution change", nvenc_decode_cap_int },
+        { NV_ENC_CAPS_SUPPORT_DYN_BITRATE_CHANGE, "Dynamic encode bitrate change", nvenc_decode_cap_int },
+        { NV_ENC_CAPS_SUPPORT_DYN_FORCE_CONSTQP, "Forcing constant QP on the fly", nvenc_decode_cap_int },
+        { NV_ENC_CAPS_SUPPORT_DYN_RCMODE_CHANGE, "Dynamic rate control mode change", nvenc_decode_cap_int },
+        { NV_ENC_CAPS_SUPPORT_SUBFRAME_READBACK, "Subframe readback support (slice-based encoding)", nvenc_decode_cap_int },
+        { NV_ENC_CAPS_SUPPORT_CONSTRAINED_ENCODING, "Constrained encoding mode", nvenc_decode_cap_int },
+        { NV_ENC_CAPS_SUPPORT_INTRA_REFRESH, "Intra refresh mode support", nvenc_decode_cap_int },
+        { NV_ENC_CAPS_SUPPORT_CUSTOM_VBV_BUF_SIZE, "Custom VBV buffer size", nvenc_decode_cap_int },
+        { NV_ENC_CAPS_SUPPORT_DYNAMIC_SLICE_MODE, "Dynamic slice mode", nvenc_decode_cap_int },
+        { NV_ENC_CAPS_SUPPORT_REF_PIC_INVALIDATION, "Reference Picture Invalidation", nvenc_decode_cap_int },
+        { NV_ENC_CAPS_PREPROC_SUPPORT, "Preprocessing mask", NULL  },
+        { NV_ENC_CAPS_ASYNC_ENCODE_SUPPORT, "Async mode", nvenc_decode_cap_int },
+        { NV_ENC_CAPS_MB_NUM_MAX, "Maximum MBs per frame", NULL },
+        { NV_ENC_CAPS_MB_PER_SEC_MAX, "Maximum aggregate throughput (MB/s)", NULL  },
+        { NV_ENC_CAPS_SUPPORT_YUV444_ENCODE, "YUV444 mode encoding", nvenc_decode_cap_int },
+        { NV_ENC_CAPS_SUPPORT_LOSSLESS_ENCODE, "Lossless encoding", nvenc_decode_cap_int },
+        { NV_ENC_CAPS_SUPPORT_SAO, "Sample Adaptive offset", nvenc_decode_cap_int },
+        { NV_ENC_CAPS_SUPPORT_MEONLY_MODE, "ME only mode", nvenc_decode_cap_int },
+
+        { NV_ENC_CAPS_EXPOSED_COUNT, NULL, NULL },
+};
+
 static int nvenc_check_capabilities(AVCodecContext *avctx)
 {
     NvencContext *ctx = avctx->priv_data;
+    NV_ENCODE_API_FUNCTION_LIST *p_nvenc = &ctx->nvenc_dload_funcs.nvenc_funcs;
+    NVENCSTATUS nvstatus;
     int ret;
 
+    int val = 0;
+    int last = NV_ENC_CAPS_EXPOSED_COUNT;
+    av_log(avctx, AV_LOG_DEBUG, "HW Device Capabilities: (%d)\n",last);
+    for (int capability = 0;
+         capability < NV_ENC_CAPS_EXPOSED_COUNT;
+         capability++) {
+        NV_ENC_CAPS_PARAM param = {0};
+        param.version = NV_ENC_CAPS_PARAM_VER;
+        param.capsToQuery = (NV_ENC_CAPS)capability;
+        if ( nvenc_capabilities[capability].descr == NULL ) {
+            last = capability;
+        }
+        nvstatus = p_nvenc->nvEncGetEncodeCaps(ctx->nvencoder, ctx->init_encode_params.encodeGUID,
+                                               &param, &val);
+        if (nvstatus == NV_ENC_SUCCESS) {
+            if (capability >= last) {
+                 av_log(avctx, AV_LOG_DEBUG, " * (unknown): (0x%x) %d \n",
+                     val, val);
+            } else if (nvenc_capabilities[capability].func) {
+                 av_log(avctx, AV_LOG_DEBUG, " * %s: (%d) %s \n",
+                     nvenc_capabilities[capability].descr, val,
+                     nvenc_capabilities[capability].func(val));
+            } else if (capability == NV_ENC_CAPS_SUPPORTED_RATECONTROL_MODES) {
+                 av_log(avctx, AV_LOG_DEBUG, " * %s: (0x%x) \n", nvenc_capabilities[capability].descr, val);
+            } else if (capability != NV_ENC_CAPS_SUPPORT_RESERVED) {
+                 av_log(avctx, AV_LOG_DEBUG, " * %s: (0x%x) %d \n",
+                     nvenc_capabilities[capability].descr,
+                     val, val);
+            }
+        } else
+             av_log(avctx, AV_LOG_DEBUG, "\tCould not obtain capability '%s' from hardware",
+                     nvenc_capabilities[capability].descr);
+    }
+
     ret = nvenc_check_codec_support(avctx);
     if (ret < 0) {
         av_log(avctx, AV_LOG_VERBOSE, "Codec not supported\n");
@@ -425,6 +520,8 @@  static av_cold int nvenc_setup_device(AVCodecContext *avctx)
         AVCUDADeviceContext *device_hwctx;
         int ret;
 
+        av_log(avctx, AV_LOG_VERBOSE, "NVENC device setup with global hwaccel\n");
+
         if (avctx->hw_frames_ctx) {
             frames_ctx = (AVHWFramesContext*)avctx->hw_frames_ctx->data;
             device_hwctx = frames_ctx->device_ctx->hwctx;
@@ -449,6 +546,8 @@  static av_cold int nvenc_setup_device(AVCodecContext *avctx)
     } else {
         int i, nb_devices = 0;
 
+        av_log(avctx, AV_LOG_VERBOSE, "NVENC device setup with local hwaccel\n");
+
         if ((dl_fn->cuda_dl->cuInit(0)) != CUDA_SUCCESS) {
             av_log(avctx, AV_LOG_ERROR,
                    "Cannot init CUDA\n");
@@ -1116,6 +1215,7 @@  static av_cold int nvenc_setup_encoder(AVCodecContext *avctx)
     cpb_props->avg_bitrate = avctx->bit_rate;
     cpb_props->buffer_size = ctx->encode_config.rcParams.vbvBufferSize;
 
+    av_log(avctx, AV_LOG_VERBOSE, "Setup NVENC completed! (B-Frames config: max=%d,has=%d)\n",avctx->max_b_frames,avctx->has_b_frames);
     return 0;
 }
 
@@ -1226,6 +1326,7 @@  static av_cold int nvenc_setup_surfaces(AVCodecContext *avctx)
         if ((res = nvenc_alloc_surface(avctx, i)) < 0)
             return res;
     }
+    av_log(ctx, AV_LOG_VERBOSE, "Initialized device with %d surfaces\n", ctx->nb_surfaces);
 
     return 0;
 }
@@ -1367,6 +1468,11 @@  static NvencSurface *get_free_frame(NvencContext *ctx)
 {
     NvencSurface *tmp_surf;
 
+    av_log(ctx, AV_LOG_TRACE, "GetFreeFrames Queues: Unused=%03d Output=%03d Outready=%03d (total=%d)\n",
+             av_fifo_size(ctx->unused_surface_queue) / (int)sizeof(tmp_surf),
+             av_fifo_size(ctx->output_surface_queue) / (int)sizeof(tmp_surf),
+             av_fifo_size(ctx->output_surface_ready_queue) / (int)sizeof(tmp_surf),
+             ctx->nb_surfaces);
     if (!(av_fifo_size(ctx->unused_surface_queue) > 0))
         // queue empty
         return NULL;
@@ -1668,17 +1774,22 @@  static int process_output_surface(AVCodecContext *avctx, AVPacket *pkt, NvencSur
 
     switch (lock_params.pictureType) {
     case NV_ENC_PIC_TYPE_IDR:
+        av_log(avctx, AV_LOG_DEBUG, "Output Type IDR Frame\n");
         pkt->flags |= AV_PKT_FLAG_KEY;
     case NV_ENC_PIC_TYPE_I:
+        av_log(avctx, AV_LOG_DEBUG, "Output Type  I  Frame\n");
         pict_type = AV_PICTURE_TYPE_I;
         break;
     case NV_ENC_PIC_TYPE_P:
+        av_log(avctx, AV_LOG_DEBUG, "Output Type  P  Frame\n");
         pict_type = AV_PICTURE_TYPE_P;
         break;
     case NV_ENC_PIC_TYPE_B:
+        av_log(avctx, AV_LOG_DEBUG, "Output Type  B  Frame\n");
         pict_type = AV_PICTURE_TYPE_B;
         break;
     case NV_ENC_PIC_TYPE_BI:
+        av_log(avctx, AV_LOG_DEBUG, "Output Type  BI Frame\n");
         pict_type = AV_PICTURE_TYPE_BI;
         break;
     default:
@@ -1727,6 +1838,7 @@  static int output_ready(AVCodecContext *avctx, int flush)
 
     nb_ready   = av_fifo_size(ctx->output_surface_ready_queue)   / sizeof(NvencSurface*);
     nb_pending = av_fifo_size(ctx->output_surface_queue)         / sizeof(NvencSurface*);
+    av_log(avctx, AV_LOG_TRACE, "OutputReady Frames: Flush=%03d Ready=%03d Pending=%03d (async-depth=%d)\n",flush,nb_ready,nb_pending,ctx->async_depth);
     if (flush)
         return nb_ready > 0;
     return (nb_ready > 0) && (nb_ready + nb_pending >= ctx->async_depth);