From patchwork Wed Oct 16 02:52:44 2019
Content-Type: text/plain; charset="utf-8"
MIME-Version: 1.0
Content-Transfer-Encoding: 7bit
X-Patchwork-Submitter: "Guo, Yejun" <yejun.guo@intel.com>
X-Patchwork-Id: 15787
Return-Path: <ffmpeg-devel-bounces@ffmpeg.org>
X-Original-To: patchwork@ffaux-bg.ffmpeg.org
Delivered-To: patchwork@ffaux-bg.ffmpeg.org
Received: from ffbox0-bg.mplayerhq.hu (ffbox0-bg.ffmpeg.org [79.124.17.100])
	by ffaux.localdomain (Postfix) with ESMTP id 83CF544817B
	for <patchwork@ffaux-bg.ffmpeg.org>;
	Wed, 16 Oct 2019 05:58:37 +0300 (EEST)
Received: from [127.0.1.1] (localhost [127.0.0.1])
	by ffbox0-bg.mplayerhq.hu (Postfix) with ESMTP id 5DAA968A26C;
	Wed, 16 Oct 2019 05:58:37 +0300 (EEST)
X-Original-To: ffmpeg-devel@ffmpeg.org
Delivered-To: ffmpeg-devel@ffmpeg.org
Received: from mga06.intel.com (mga06.intel.com [134.134.136.31])
	by ffbox0-bg.mplayerhq.hu (Postfix) with ESMTPS id 6D3BC689FE8
	for <ffmpeg-devel@ffmpeg.org>; Wed, 16 Oct 2019 05:58:30 +0300 (EEST)
X-Amp-Result: SKIPPED(no attachment in message)
X-Amp-File-Uploaded: False
Received: from orsmga005.jf.intel.com ([10.7.209.41])
	by orsmga104.jf.intel.com with ESMTP/TLS/DHE-RSA-AES256-GCM-SHA384;
	15 Oct 2019 19:58:28 -0700
X-ExtLoop1: 1
X-IronPort-AV: E=Sophos;i="5.67,302,1566889200"; d="scan'208";a="370666722"
Received: from yguo18-skl-u1604.sh.intel.com ([10.239.13.25])
	by orsmga005.jf.intel.com with ESMTP; 15 Oct 2019 19:58:27 -0700
From: "Guo, Yejun" <yejun.guo@intel.com>
To: ffmpeg-devel@ffmpeg.org
Date: Wed, 16 Oct 2019 10:52:44 +0800
Message-Id: <1571194364-16593-1-git-send-email-yejun.guo@intel.com>
X-Mailer: git-send-email 2.7.4
Subject: [FFmpeg-devel] [PATCH 1/4] dnn: add tf.nn.conv2d support for native
	model
X-BeenThere: ffmpeg-devel@ffmpeg.org
X-Mailman-Version: 2.1.20
Precedence: list
List-Id: FFmpeg development discussions and patches <ffmpeg-devel.ffmpeg.org>
List-Unsubscribe: <http://ffmpeg.org/mailman/options/ffmpeg-devel>,
	<mailto:ffmpeg-devel-request@ffmpeg.org?subject=unsubscribe>
List-Archive: <http://ffmpeg.org/pipermail/ffmpeg-devel/>
List-Post: <mailto:ffmpeg-devel@ffmpeg.org>
List-Help: <mailto:ffmpeg-devel-request@ffmpeg.org?subject=help>
List-Subscribe: <http://ffmpeg.org/mailman/listinfo/ffmpeg-devel>,
	<mailto:ffmpeg-devel-request@ffmpeg.org?subject=subscribe>
Reply-To: FFmpeg development discussions and patches
	<ffmpeg-devel@ffmpeg.org>
Cc: yejun.guo@intel.com
MIME-Version: 1.0
Errors-To: ffmpeg-devel-bounces@ffmpeg.org
Sender: "ffmpeg-devel" <ffmpeg-devel-bounces@ffmpeg.org>

Unlike other tf.*.conv2d layers, tf.nn.conv2d does not create many
nodes (within a scope) in the graph, it just acts like other layers.
tf.nn.conv2d only creates one node in the graph, and no internal
nodes such as 'kernel' are created.

The format of native model file is also changed, a flag named
has_bias is added, so change the version number.

Signed-off-by: Guo, Yejun <yejun.guo@intel.com>
---
 libavfilter/dnn/dnn_backend_native.c              |  2 +-
 libavfilter/dnn/dnn_backend_native_layer_conv2d.c | 37 +++++++++++-----
 libavfilter/dnn/dnn_backend_native_layer_conv2d.h |  1 +
 tests/dnn/dnn-layer-conv2d-test.c                 |  2 +
 tools/python/convert_from_tensorflow.py           | 54 ++++++++++++++++++++---
 tools/python/convert_header.py                    |  4 +-
 6 files changed, 82 insertions(+), 18 deletions(-)

diff --git a/libavfilter/dnn/dnn_backend_native.c b/libavfilter/dnn/dnn_backend_native.c
index 06b010d..ff280b5 100644
--- a/libavfilter/dnn/dnn_backend_native.c
+++ b/libavfilter/dnn/dnn_backend_native.c
@@ -98,7 +98,7 @@ DNNModel *ff_dnn_load_model_native(const char *model_filename)
     char header_expected[] = "FFMPEGDNNNATIVE";
     char *buf;
     size_t size;
-    int version, header_size, major_version_expected = 0;
+    int version, header_size, major_version_expected = 1;
     ConvolutionalNetwork *network = NULL;
     AVIOContext *model_file_context;
     int file_size, dnn_size, parsed_size;
diff --git a/libavfilter/dnn/dnn_backend_native_layer_conv2d.c b/libavfilter/dnn/dnn_backend_native_layer_conv2d.c
index 0de8902..6ec0fa7 100644
--- a/libavfilter/dnn/dnn_backend_native_layer_conv2d.c
+++ b/libavfilter/dnn/dnn_backend_native_layer_conv2d.c
@@ -38,27 +38,41 @@ int dnn_load_layer_conv2d(Layer *layer, AVIOContext *model_file_context, int fil
     conv_params->input_num = (int32_t)avio_rl32(model_file_context);
     conv_params->output_num = (int32_t)avio_rl32(model_file_context);
     conv_params->kernel_size = (int32_t)avio_rl32(model_file_context);
+    conv_params->has_bias = (int32_t)avio_rl32(model_file_context);
+    dnn_size += 28;
+
     kernel_size = conv_params->input_num * conv_params->output_num *
-                  conv_params->kernel_size * conv_params->kernel_size;
-    dnn_size += 24 + (kernel_size + conv_params->output_num << 2);
+                      conv_params->kernel_size * conv_params->kernel_size;
+    dnn_size += kernel_size * 4;
+    if (conv_params->has_bias)
+        dnn_size += conv_params->output_num * 4;
+
     if (dnn_size > file_size || conv_params->input_num <= 0 ||
         conv_params->output_num <= 0 || conv_params->kernel_size <= 0){
         av_freep(&conv_params);
         return 0;
     }
+
     conv_params->kernel = av_malloc(kernel_size * sizeof(float));
-    conv_params->biases = av_malloc(conv_params->output_num * sizeof(float));
-    if (!conv_params->kernel || !conv_params->biases){
-        av_freep(&conv_params->kernel);
-        av_freep(&conv_params->biases);
+    if (!conv_params->kernel) {
         av_freep(&conv_params);
         return 0;
     }
-    for (int i = 0; i < kernel_size; ++i){
+    for (int i = 0; i < kernel_size; ++i) {
         conv_params->kernel[i] = av_int2float(avio_rl32(model_file_context));
     }
-    for (int i = 0; i < conv_params->output_num; ++i){
-        conv_params->biases[i] = av_int2float(avio_rl32(model_file_context));
+
+    conv_params->biases = NULL;
+    if (conv_params->has_bias) {
+        conv_params->biases = av_malloc(conv_params->output_num * sizeof(float));
+        if (!conv_params->biases){
+            av_freep(&conv_params->kernel);
+            av_freep(&conv_params);
+            return 0;
+        }
+        for (int i = 0; i < conv_params->output_num; ++i){
+            conv_params->biases[i] = av_int2float(avio_rl32(model_file_context));
+        }
     }
 
     layer->params = conv_params;
@@ -103,7 +117,10 @@ int dnn_execute_layer_conv2d(DnnOperand *operands, const int32_t *input_operand_
     for (int y = pad_size; y < height - pad_size; ++y) {
         for (int x = pad_size; x < width - pad_size; ++x) {
             for (int n_filter = 0; n_filter < conv_params->output_num; ++n_filter) {
-                output[n_filter] = conv_params->biases[n_filter];
+                if (conv_params->has_bias)
+                    output[n_filter] = conv_params->biases[n_filter];
+                else
+                    output[n_filter] = 0.f;
 
                 for (int ch = 0; ch < conv_params->input_num; ++ch) {
                     for (int kernel_y = 0; kernel_y < conv_params->kernel_size; ++kernel_y) {
diff --git a/libavfilter/dnn/dnn_backend_native_layer_conv2d.h b/libavfilter/dnn/dnn_backend_native_layer_conv2d.h
index db90b2b..bf87264 100644
--- a/libavfilter/dnn/dnn_backend_native_layer_conv2d.h
+++ b/libavfilter/dnn/dnn_backend_native_layer_conv2d.h
@@ -31,6 +31,7 @@ typedef struct ConvolutionalParams{
     DNNActivationFunc activation;
     DNNConvPaddingParam padding_method;
     int32_t dilation;
+    int32_t has_bias;
     float *kernel;
     float *biases;
 } ConvolutionalParams;
diff --git a/tests/dnn/dnn-layer-conv2d-test.c b/tests/dnn/dnn-layer-conv2d-test.c
index 9d13da3..2da01e5 100644
--- a/tests/dnn/dnn-layer-conv2d-test.c
+++ b/tests/dnn/dnn-layer-conv2d-test.c
@@ -97,6 +97,7 @@ static int test_with_same_dilate(void)
     float bias[2] = { -1.6574852, -0.72915393 };
 
     params.activation = TANH;
+    params.has_bias = 1;
     params.biases = bias;
     params.dilation = 2;
     params.input_num = 3;
@@ -196,6 +197,7 @@ static int test_with_valid(void)
     float bias[2] = { -0.4773722, -0.19620377 };
 
     params.activation = TANH;
+    params.has_bias = 1;
     params.biases = bias;
     params.dilation = 1;
     params.input_num = 3;
diff --git a/tools/python/convert_from_tensorflow.py b/tools/python/convert_from_tensorflow.py
index a663b34..605158a 100644
--- a/tools/python/convert_from_tensorflow.py
+++ b/tools/python/convert_from_tensorflow.py
@@ -118,7 +118,7 @@ class TFConverter:
         return knode, bnode, dnode, anode
 
 
-    def dump_conv2d_to_file(self, node, f):
+    def dump_complex_conv2d_to_file(self, node, f):
         assert(node.op == 'Conv2D')
         self.layer_number = self.layer_number + 1
         self.converted_nodes.add(node.name)
@@ -153,7 +153,8 @@ class TFConverter:
         kernel = kernel.reshape(filter_height, filter_width, in_channels, out_channels)
         kernel = np.transpose(kernel, [3, 0, 1, 2])
 
-        np.array([self.op2code[node.op], dilation, padding, self.conv_activations[activation], in_channels, out_channels, filter_height], dtype=np.uint32).tofile(f)
+        has_bias = 1
+        np.array([self.op2code[node.op], dilation, padding, self.conv_activations[activation], in_channels, out_channels, filter_height, has_bias], dtype=np.uint32).tofile(f)
         kernel.tofile(f)
 
         btensor = bnode.attr['value'].tensor
@@ -173,6 +174,41 @@ class TFConverter:
         np.array([input_operand_index, output_operand_index], dtype=np.uint32).tofile(f)
 
 
+    def dump_simple_conv2d_to_file(self, node, f):
+        assert(node.op == 'Conv2D')
+        self.layer_number = self.layer_number + 1
+        self.converted_nodes.add(node.name)
+
+        node0 = self.name_node_dict[node.input[0]]
+        node1 = self.name_node_dict[node.input[1]]
+        if node0.op == 'Const':
+            knode = node0
+            input_name = node.input[1]
+        else:
+            knode = node1
+            input_name = node.input[0]
+
+        ktensor = knode.attr['value'].tensor
+        filter_height = ktensor.tensor_shape.dim[0].size
+        filter_width = ktensor.tensor_shape.dim[1].size
+        in_channels = ktensor.tensor_shape.dim[2].size
+        out_channels = ktensor.tensor_shape.dim[3].size
+        kernel = np.frombuffer(ktensor.tensor_content, dtype=np.float32)
+        kernel = kernel.reshape(filter_height, filter_width, in_channels, out_channels)
+        kernel = np.transpose(kernel, [3, 0, 1, 2])
+
+        has_bias = 0
+        dilation = 1
+        padding = node.attr['padding'].s.decode("utf-8")
+        np.array([self.op2code[node.op], dilation, self.conv_paddings[padding], self.conv_activations['None'],
+                  in_channels, out_channels, filter_height, has_bias], dtype=np.uint32).tofile(f)
+        kernel.tofile(f)
+
+        input_operand_index = self.add_operand(input_name, Operand.IOTYPE_INPUT)
+        output_operand_index = self.add_operand(node.name, Operand.IOTYPE_OUTPUT)
+        np.array([input_operand_index, output_operand_index], dtype=np.uint32).tofile(f)
+
+
     def dump_depth2space_to_file(self, node, f):
         assert(node.op == 'DepthToSpace')
         self.layer_number = self.layer_number + 1
@@ -222,10 +258,12 @@ class TFConverter:
             scope_name = TFConverter.get_scope_name(node.name)
             if scope_name in self.conv2d_scope_names:
                 if node.op == 'Conv2D':
-                    self.dump_conv2d_to_file(node, f)
+                    self.dump_complex_conv2d_to_file(node, f)
                 continue
 
-            if node.op == 'DepthToSpace':
+            if node.op == 'Conv2D':
+                self.dump_simple_conv2d_to_file(node, f)
+            elif node.op == 'DepthToSpace':
                 self.dump_depth2space_to_file(node, f)
             elif node.op == 'MirrorPad':
                 self.dump_mirrorpad_to_file(node, f)
@@ -312,10 +350,16 @@ class TFConverter:
 
 
     def generate_conv2d_scope_info(self):
-        # conv2d is a sub block in graph, get the scope name
+        # mostly, conv2d is a sub block in graph, get the scope name
         for node in self.nodes:
             if node.op == 'Conv2D':
                 scope = TFConverter.get_scope_name(node.name)
+                # for the case tf.nn.conv2d is called directly
+                if scope == '':
+                    continue
+                # for the case tf.nn.conv2d is called within a scope
+                if scope + '/kernel' not in self.name_node_dict:
+                    continue
                 self.conv2d_scope_names.add(scope)
 
         # get the input name to the conv2d sub block
diff --git a/tools/python/convert_header.py b/tools/python/convert_header.py
index 3c2acd5..67672b2 100644
--- a/tools/python/convert_header.py
+++ b/tools/python/convert_header.py
@@ -20,7 +20,7 @@
 str = 'FFMPEGDNNNATIVE'
 
 # increase major and reset minor when we have to re-convert the model file
-major = 0
+major = 1
 
 # increase minor when we don't have to re-convert the model file
-minor = 2
+minor = 0