From patchwork Thu Oct 15 13:17:25 2020
Content-Type: text/plain; charset="utf-8"
MIME-Version: 1.0
Content-Transfer-Encoding: 7bit
X-Patchwork-Submitter: "Guo, Yejun" <yejun.guo@intel.com>
X-Patchwork-Id: 22965
Return-Path: <ffmpeg-devel-bounces@ffmpeg.org>
X-Original-To: patchwork@ffaux-bg.ffmpeg.org
Delivered-To: patchwork@ffaux-bg.ffmpeg.org
Received: from ffbox0-bg.mplayerhq.hu (ffbox0-bg.ffmpeg.org [79.124.17.100])
	by ffaux.localdomain (Postfix) with ESMTP id DC7D344BB79
	for <patchwork@ffaux-bg.ffmpeg.org>; Thu, 15 Oct 2020 16:18:00 +0300 (EEST)
Received: from [127.0.1.1] (localhost [127.0.0.1])
	by ffbox0-bg.mplayerhq.hu (Postfix) with ESMTP id B7FBF68B86C;
	Thu, 15 Oct 2020 16:18:00 +0300 (EEST)
X-Original-To: ffmpeg-devel@ffmpeg.org
Delivered-To: ffmpeg-devel@ffmpeg.org
Received: from mga02.intel.com (mga02.intel.com [134.134.136.20])
 by ffbox0-bg.mplayerhq.hu (Postfix) with ESMTPS id 0C2FF68B777
 for <ffmpeg-devel@ffmpeg.org>; Thu, 15 Oct 2020 16:17:52 +0300 (EEST)
IronPort-SDR: 
 I9mZRHdsLV9TUJAWL4xRBaL7RKxagvKkJV2b8V3QDBqpDoHsXTtG5IUwzJYoGXK1y1PtSCtppz
 6FeSNFcSTtzA==
X-IronPort-AV: E=McAfee;i="6000,8403,9774"; a="153274047"
X-IronPort-AV: E=Sophos;i="5.77,379,1596524400"; d="scan'208";a="153274047"
X-Amp-Result: SKIPPED(no attachment in message)
X-Amp-File-Uploaded: False
Received: from fmsmga004.fm.intel.com ([10.253.24.48])
 by orsmga101.jf.intel.com with ESMTP/TLS/ECDHE-RSA-AES256-GCM-SHA384;
 15 Oct 2020 06:17:49 -0700
IronPort-SDR: 
 Roa1f2qG9Xp03YQiRHwyQd4FxuuoUvodzmamt1DV5VJm4HusRf0B5k7GsOo7dg+JLrzDWKZz3c
 Xqcy79IuyP9Q==
X-ExtLoop1: 1
X-IronPort-AV: E=Sophos;i="5.77,379,1596524400"; d="scan'208";a="346102468"
Received: from yguo18-skl-u1604.sh.intel.com (HELO
 yguo18-skl-u1804.sh.intel.com) ([10.239.159.53])
 by fmsmga004.fm.intel.com with ESMTP; 15 Oct 2020 06:17:47 -0700
From: "Guo, Yejun" <yejun.guo@intel.com>
To: ffmpeg-devel@ffmpeg.org
Date: Thu, 15 Oct 2020 21:17:25 +0800
Message-Id: <20201015131725.23415-1-yejun.guo@intel.com>
X-Mailer: git-send-email 2.17.1
Subject: [FFmpeg-devel] [PATCH V2] dnn_backend_tf.c: add option sess_config
	for tf backend
X-BeenThere: ffmpeg-devel@ffmpeg.org
X-Mailman-Version: 2.1.20
Precedence: list
List-Id: FFmpeg development discussions and patches <ffmpeg-devel.ffmpeg.org>
List-Unsubscribe: <https://ffmpeg.org/mailman/options/ffmpeg-devel>,
 <mailto:ffmpeg-devel-request@ffmpeg.org?subject=unsubscribe>
List-Archive: <https://ffmpeg.org/pipermail/ffmpeg-devel>
List-Post: <mailto:ffmpeg-devel@ffmpeg.org>
List-Help: <mailto:ffmpeg-devel-request@ffmpeg.org?subject=help>
List-Subscribe: <https://ffmpeg.org/mailman/listinfo/ffmpeg-devel>,
 <mailto:ffmpeg-devel-request@ffmpeg.org?subject=subscribe>
Reply-To: FFmpeg development discussions and patches <ffmpeg-devel@ffmpeg.org>
Cc: yejun.guo@intel.com
MIME-Version: 1.0
Errors-To: ffmpeg-devel-bounces@ffmpeg.org
Sender: "ffmpeg-devel" <ffmpeg-devel-bounces@ffmpeg.org>

TensorFlow C library accepts config for session options to
set different parameters for the inference. This patch exports
this interface.

The config is a serialized tensorflow.ConfigProto proto, so we need
two steps to use it:
1. generate the serialized proto with python (see script example below)
the output looks like: 0xab...cd
where 0xcd is the least significant byte and 0xab is the most significant byte.

2. pass the python script output into ffmpeg with
dnn_processing=options=sess_config=0xab...cd

The following script is an example to specify one GPU. If the system contains
3 GPU cards, the visible_device_list could be '0', '1', '2', '0,1' etc.
'0' does not mean physical GPU card 0, we need to try and see.
And we can also add more opitions here to generate more serialized proto.

script example to generate serialized proto which specifies one GPU:
import tensorflow as tf
gpu_options = tf.GPUOptions(visible_device_list='0')
config = tf.ConfigProto(gpu_options=gpu_options)
s = config.SerializeToString()
b = ''.join("%02x" % int(ord(b)) for b in s[::-1])
print('0x%s' % b)

Signed-off-by: Guo, Yejun <yejun.guo@intel.com>
---
v2: add the script example as comment within the code

 libavfilter/dnn/dnn_backend_tf.c | 94 ++++++++++++++++++++++++++++++--
 1 file changed, 88 insertions(+), 6 deletions(-)

diff --git a/libavfilter/dnn/dnn_backend_tf.c b/libavfilter/dnn/dnn_backend_tf.c
index 7923e1db69..76cc037b94 100644
--- a/libavfilter/dnn/dnn_backend_tf.c
+++ b/libavfilter/dnn/dnn_backend_tf.c
@@ -29,14 +29,20 @@
 #include "dnn_backend_native_layer_depth2space.h"
 #include "libavformat/avio.h"
 #include "libavutil/avassert.h"
+#include "../internal.h"
 #include "dnn_backend_native_layer_pad.h"
 #include "dnn_backend_native_layer_maximum.h"
 #include "dnn_io_proc.h"
 
 #include <tensorflow/c/c_api.h>
 
+typedef struct TFOptions{
+    char *sess_config;
+} TFOptions;
+
 typedef struct TFContext {
     const AVClass *class;
+    TFOptions options;
 } TFContext;
 
 typedef struct TFModel{
@@ -47,14 +53,15 @@ typedef struct TFModel{
     TF_Status *status;
 } TFModel;
 
-static const AVClass dnn_tensorflow_class = {
-    .class_name = "dnn_tensorflow",
-    .item_name  = av_default_item_name,
-    .option     = NULL,
-    .version    = LIBAVUTIL_VERSION_INT,
-    .category   = AV_CLASS_CATEGORY_FILTER,
+#define OFFSET(x) offsetof(TFContext, x)
+#define FLAGS AV_OPT_FLAG_FILTERING_PARAM
+static const AVOption dnn_tensorflow_options[] = {
+    { "sess_config", "config for SessionOptions", OFFSET(options.sess_config), AV_OPT_TYPE_STRING, { .str = NULL }, 0, 0, FLAGS },
+    { NULL }
 };
 
+AVFILTER_DEFINE_CLASS(dnn_tensorflow);
+
 static DNNReturnType execute_model_tf(const DNNModel *model, const char *input_name, AVFrame *in_frame,
                                       const char **output_names, uint32_t nb_output, AVFrame *out_frame,
                                       int do_ioproc);
@@ -194,10 +201,64 @@ static DNNReturnType load_tf_model(TFModel *tf_model, const char *model_filename
     TF_ImportGraphDefOptions *graph_opts;
     TF_SessionOptions *sess_opts;
     const TF_Operation *init_op;
+    uint8_t *sess_config = NULL;
+    int sess_config_length = 0;
+
+    // prepare the sess config data
+    if (tf_model->ctx.options.sess_config != NULL) {
+        /*
+        tf_model->ctx.options.sess_config is hex to present the serialized proto
+        required by TF_SetConfig below, so we need to first generate the serialized
+        proto in a python script, the following is a script example to generate
+        serialized proto which specifies one GPU, we can change the script to add
+        more options.
+
+        import tensorflow as tf
+        gpu_options = tf.GPUOptions(visible_device_list='0')
+        config = tf.ConfigProto(gpu_options=gpu_options)
+        s = config.SerializeToString()
+        b = ''.join("%02x" % int(ord(b)) for b in s[::-1])
+        print('0x%s' % b)
+
+        the script output looks like: 0xab...cd, and then pass 0xab...cd to sess_config.
+        */
+        char tmp[3];
+        tmp[2] = '\0';
+
+        if (strncmp(tf_model->ctx.options.sess_config, "0x", 2) != 0) {
+            av_log(ctx, AV_LOG_ERROR, "sess_config should start with '0x'\n");
+            return DNN_ERROR;
+        }
+
+        sess_config_length = strlen(tf_model->ctx.options.sess_config);
+        if (sess_config_length % 2 != 0) {
+            av_log(ctx, AV_LOG_ERROR, "the length of sess_config is not even (%s), "
+                                      "please re-generate the config.\n",
+                                      tf_model->ctx.options.sess_config);
+            return DNN_ERROR;
+        }
+
+        sess_config_length -= 2; //ignore the first '0x'
+        sess_config_length /= 2; //get the data length in byte
+
+        sess_config = av_malloc(sess_config_length);
+        if (!sess_config) {
+            av_log(ctx, AV_LOG_ERROR, "failed to allocate memory\n");
+            return DNN_ERROR;
+        }
+
+        for (int i = 0; i < sess_config_length; i++) {
+            int index = 2 + (sess_config_length - 1 - i) * 2;
+            tmp[0] = tf_model->ctx.options.sess_config[index];
+            tmp[1] = tf_model->ctx.options.sess_config[index + 1];
+            sess_config[i] = strtol(tmp, NULL, 16);
+        }
+    }
 
     graph_def = read_graph(model_filename);
     if (!graph_def){
         av_log(ctx, AV_LOG_ERROR, "Failed to read model \"%s\" graph\n", model_filename);
+        av_freep(&sess_config);
         return DNN_ERROR;
     }
     tf_model->graph = TF_NewGraph();
@@ -210,11 +271,23 @@ static DNNReturnType load_tf_model(TFModel *tf_model, const char *model_filename
         TF_DeleteGraph(tf_model->graph);
         TF_DeleteStatus(tf_model->status);
         av_log(ctx, AV_LOG_ERROR, "Failed to import serialized graph to model graph\n");
+        av_freep(&sess_config);
         return DNN_ERROR;
     }
 
     init_op = TF_GraphOperationByName(tf_model->graph, "init");
     sess_opts = TF_NewSessionOptions();
+
+    if (sess_config) {
+        TF_SetConfig(sess_opts, sess_config, sess_config_length,tf_model->status);
+        av_freep(&sess_config);
+        if (TF_GetCode(tf_model->status) != TF_OK) {
+            av_log(ctx, AV_LOG_ERROR, "Failed to set config for sess options with %s\n",
+                                      tf_model->ctx.options.sess_config);
+            return DNN_ERROR;
+        }
+    }
+
     tf_model->session = TF_NewSession(tf_model->graph, sess_opts, tf_model->status);
     TF_DeleteSessionOptions(sess_opts);
     if (TF_GetCode(tf_model->status) != TF_OK)
@@ -609,6 +682,15 @@ DNNModel *ff_dnn_load_model_tf(const char *model_filename, const char *options,
     tf_model->ctx.class = &dnn_tensorflow_class;
     tf_model->model = model;
 
+    //parse options
+    av_opt_set_defaults(&tf_model->ctx);
+    if (av_opt_set_from_string(&tf_model->ctx, options, NULL, "=", "&") < 0) {
+        av_log(&tf_model->ctx, AV_LOG_ERROR, "Failed to parse options \"%s\"\n", options);
+        av_freep(&tf_model);
+        av_freep(&model);
+        return NULL;
+    }
+
     if (load_tf_model(tf_model, model_filename) != DNN_SUCCESS){
         if (load_native_model(tf_model, model_filename) != DNN_SUCCESS){
             av_freep(&tf_model);