diff mbox series

[FFmpeg-devel,1/2] lavfi/dnn/dnn_backend_native_layer_batchnormalization: add BN support

Message ID 20210601220448.108767-1-wenlong.ding@intel.com
State New
Headers show
Series [FFmpeg-devel,1/2] lavfi/dnn/dnn_backend_native_layer_batchnormalization: add BN support | expand

Checks

Context Check Description
andriy/x86_make success Make finished
andriy/x86_make_fate success Make fate finished
andriy/PPC64_make success Make finished
andriy/PPC64_make_fate success Make fate finished

Commit Message

Wenlong Ding June 1, 2021, 10:04 p.m. UTC
Signed-off-by: Wenlong Ding <wenlong.ding@intel.com>
---
 libavfilter/dnn/Makefile                      |   1 +
 libavfilter/dnn/dnn_backend_native.h          |   1 +
 ..._backend_native_layer_batchnormalization.c | 119 ++++++++++++++++++
 ..._backend_native_layer_batchnormalization.h |  44 +++++++
 libavfilter/dnn/dnn_backend_native_layers.c   |   2 +
 tools/python/convert_from_tensorflow.py       |  95 +++++++++++++-
 6 files changed, 258 insertions(+), 4 deletions(-)
 create mode 100644 libavfilter/dnn/dnn_backend_native_layer_batchnormalization.c
 create mode 100644 libavfilter/dnn/dnn_backend_native_layer_batchnormalization.h
diff mbox series

Patch

diff --git a/libavfilter/dnn/Makefile b/libavfilter/dnn/Makefile
index 4cfbce0efc..5f9952f447 100644
--- a/libavfilter/dnn/Makefile
+++ b/libavfilter/dnn/Makefile
@@ -13,6 +13,7 @@  OBJS-$(CONFIG_DNN)                           += dnn/dnn_backend_native_layer_dep
 OBJS-$(CONFIG_DNN)                           += dnn/dnn_backend_native_layer_maximum.o
 OBJS-$(CONFIG_DNN)                           += dnn/dnn_backend_native_layer_mathbinary.o
 OBJS-$(CONFIG_DNN)                           += dnn/dnn_backend_native_layer_mathunary.o
+OBJS-$(CONFIG_DNN)                           += dnn/dnn_backend_native_layer_batchnormalization.o
 
 DNN-OBJS-$(CONFIG_LIBTENSORFLOW)             += dnn/dnn_backend_tf.o
 DNN-OBJS-$(CONFIG_LIBOPENVINO)               += dnn/dnn_backend_openvino.o
diff --git a/libavfilter/dnn/dnn_backend_native.h b/libavfilter/dnn/dnn_backend_native.h
index 89bcb8e358..1f5915d547 100644
--- a/libavfilter/dnn/dnn_backend_native.h
+++ b/libavfilter/dnn/dnn_backend_native.h
@@ -46,6 +46,7 @@  typedef enum {
     DLT_MATH_UNARY = 6,
     DLT_AVG_POOL = 7,
     DLT_DENSE = 8,
+    DLT_BATCHNORMALIZATION = 9,
     DLT_COUNT
 } DNNLayerType;
 
diff --git a/libavfilter/dnn/dnn_backend_native_layer_batchnormalization.c b/libavfilter/dnn/dnn_backend_native_layer_batchnormalization.c
new file mode 100644
index 0000000000..3daac75367
--- /dev/null
+++ b/libavfilter/dnn/dnn_backend_native_layer_batchnormalization.c
@@ -0,0 +1,119 @@ 
+/*
+ * Copyright (c) 2021
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+/**
+ * @file
+ * DNN native backend implementation.
+ */
+
+#include "dnn_backend_native.h"
+#include "libavutil/avassert.h"
+#include "dnn_backend_native_layer_batchnormalization.h"
+
+int ff_dnn_load_layer_batchnormalization(Layer *layer, AVIOContext *model_file_context, int file_size, int operands_num)
+{
+    DnnLayerBatchnormalizationParams *params;
+    int dnn_size = 0;
+    params = av_malloc(sizeof(*params));
+    if (!params)
+        return 0;
+
+    params->channel = avio_rl32(model_file_context);
+    dnn_size += 4;
+
+    params->mean = av_malloc_array(params->channel, sizeof(*params->mean));
+    if (!params->mean) {
+        av_freep(&params);
+        return 0;
+    }
+
+    params->variance = av_malloc_array(params->channel, sizeof(*params->variance));
+    if (!params->variance) {
+        av_freep(&params->mean);
+        av_freep(&params);
+        return 0;
+    }
+
+    for (int32_t i = 0; i < params->channel; ++i) {
+        params->mean[i] = av_int2float(avio_rl32(model_file_context));
+    }
+    for (int32_t i = 0; i < params->channel; ++i) {
+        params->variance[i] = av_int2float(avio_rl32(model_file_context));
+    }
+    dnn_size += params->channel * 4 * 2;
+
+    params->variance_eps = av_int2float(avio_rl32(model_file_context));
+    params->scale = av_int2float(avio_rl32(model_file_context));
+    params->offset = av_int2float(avio_rl32(model_file_context));
+    dnn_size += 12;
+
+    layer->params = params;
+    layer->input_operand_indexes[0] = (int32_t)avio_rl32(model_file_context);
+    layer->output_operand_index = (int32_t)avio_rl32(model_file_context);
+    dnn_size += 8;
+    if (layer->input_operand_indexes[0] >= operands_num || layer->output_operand_index >= operands_num) {
+        return 0;
+    }
+
+    return dnn_size;
+}
+
+int ff_dnn_execute_layer_batchnormalization(DnnOperand *operands, const int32_t *input_operand_indexes,
+                                            int32_t output_operand_index, const void *parameters, NativeContext *ctx)
+{
+    const DnnOperand *input = &operands[input_operand_indexes[0]];
+    DnnOperand *output = &operands[output_operand_index];
+    const DnnLayerBatchnormalizationParams *params = parameters;
+    int dims_count;
+    const float *src;
+    float *dst;
+    int32_t channel_num = params->channel;
+    const float *mean = params->mean;
+    const float *variance = params->variance;
+    float variance_eps = params->variance_eps;
+    float scale = params->scale;
+    float offset = params->offset;
+
+    if (params->channel != input->dims[3])
+        return DNN_ERROR;
+
+    for (int i = 0; i < 4; ++i)
+        output->dims[i] = input->dims[i];
+
+    output->data_type = input->data_type;
+    output->length = ff_calculate_operand_data_length(output);
+    if (output->length <= 0) {
+        av_log(ctx, AV_LOG_ERROR, "The output data length overflow\n");
+        return DNN_ERROR;
+    }
+    output->data = av_realloc(output->data, output->length);
+    if (!output->data) {
+        av_log(ctx, AV_LOG_ERROR, "Failed to reallocate memory for output\n");
+        return DNN_ERROR;
+    }
+
+    dims_count = ff_calculate_operand_dims_count(output);
+    src = input->data;
+    dst = output->data;
+    for (int i = 0; i < dims_count; ++i) {
+        dst[i] = scale * (src[i] - mean[i % channel_num]) / sqrt(variance[i % channel_num] + variance_eps) + offset;
+    }
+    return 0;
+}
diff --git a/libavfilter/dnn/dnn_backend_native_layer_batchnormalization.h b/libavfilter/dnn/dnn_backend_native_layer_batchnormalization.h
new file mode 100644
index 0000000000..8d87430fcc
--- /dev/null
+++ b/libavfilter/dnn/dnn_backend_native_layer_batchnormalization.h
@@ -0,0 +1,44 @@ 
+/*
+ * Copyright (c) 2021
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+/**
+ * @file
+ * DNN inference functions interface for native backend.
+ */
+
+#ifndef AVFILTER_DNN_DNN_BACKEND_NATIVE_LAYER_BATCHNORMALIZATION_H
+#define AVFILTER_DNN_DNN_BACKEND_NATIVE_LAYER_BATCHNORMALIZATION_H
+
+#include "dnn_backend_native.h"
+
+typedef struct DnnLayerBatchnormalizationParams{
+    int32_t channel;
+    float *mean;
+    float *variance;
+    float offset;
+    float scale;
+    float variance_eps;
+}DnnLayerBatchnormalizationParams;
+
+int ff_dnn_load_layer_batchnormalization(Layer *layer, AVIOContext *model_file_context, int file_size, int operands_num);
+int ff_dnn_execute_layer_batchnormalization(DnnOperand *operands, const int32_t *input_operand_indexes,
+                                            int32_t output_operand_index, const void *parameters, NativeContext *ctx);
+
+#endif
diff --git a/libavfilter/dnn/dnn_backend_native_layers.c b/libavfilter/dnn/dnn_backend_native_layers.c
index 492939fd36..da1cd6e541 100644
--- a/libavfilter/dnn/dnn_backend_native_layers.c
+++ b/libavfilter/dnn/dnn_backend_native_layers.c
@@ -28,6 +28,7 @@ 
 #include "dnn_backend_native_layer_mathunary.h"
 #include "dnn_backend_native_layer_avgpool.h"
 #include "dnn_backend_native_layer_dense.h"
+#include "dnn_backend_native_layer_batchnormalization.h"
 
 const LayerFunc ff_layer_funcs[DLT_COUNT] = {
     {NULL, NULL},
@@ -39,4 +40,5 @@  const LayerFunc ff_layer_funcs[DLT_COUNT] = {
     {ff_dnn_execute_layer_math_unary,  ff_dnn_load_layer_math_unary},
     {ff_dnn_execute_layer_avg_pool,    ff_dnn_load_layer_avg_pool},
     {ff_dnn_execute_layer_dense,       ff_dnn_load_layer_dense},
+    {ff_dnn_execute_layer_batchnormalization, ff_dnn_load_layer_batchnormalization},
 };
diff --git a/tools/python/convert_from_tensorflow.py b/tools/python/convert_from_tensorflow.py
index 38e64c1c94..3c7e9909fe 100644
--- a/tools/python/convert_from_tensorflow.py
+++ b/tools/python/convert_from_tensorflow.py
@@ -74,7 +74,8 @@  class TFConverter:
         self.dense_scope_names = set()
         self.dense_scopename_inputname_dict = {}
         self.op2code = {'Conv2D':1, 'DepthToSpace':2, 'MirrorPad':3, 'Maximum':4,
-                        'MathBinary':5, 'MathUnary':6, 'AvgPool':7, 'MatMul':8}
+                        'MathBinary':5, 'MathUnary':6, 'AvgPool':7, 'MatMul':8,
+                        'BatchNormalization':9}
         self.mathbin2code = {'Sub':0, 'Add':1, 'Mul':2, 'RealDiv':3, 'Minimum':4, 'FloorMod':5}
         self.mathun2code  = {'Abs':0, 'Sin':1, 'Cos':2, 'Tan':3, 'Asin':4,
                 'Acos':5, 'Atan':6, 'Sinh':7, 'Cosh':8, 'Tanh':9, 'Asinh':10,
@@ -82,6 +83,8 @@  class TFConverter:
                 'Exp':16}
         self.mirrorpad_mode = {'CONSTANT':0, 'REFLECT':1, 'SYMMETRIC':2}
         self.name_operand_dict = {}
+        self.batchnorm_scope_names = set()
+        self.batchnorm_scopename_inputname_dict = {}
 
 
     def add_operand(self, name, type):
@@ -145,6 +148,29 @@  class TFConverter:
         return knode, bnode, anode
 
 
+    def get_batchnorm_params(self, batchnorm_scope_name):
+        variance_node_name = self.name_node_dict[batchnorm_scope_name + '/add'].input[0]
+        variance_eps_node_name = self.name_node_dict[batchnorm_scope_name + '/add'].input[1]
+        scale_node_name = self.name_node_dict[batchnorm_scope_name + '/mul'].input[1]
+        mean_node_name = self.name_node_dict[batchnorm_scope_name + '/mul_2'].input[0]
+        offset_node_name = self.name_node_dict[batchnorm_scope_name + '/sub'].input[0]
+
+        variance_node = self.name_node_dict[variance_node_name]
+        variance_eps_node = self.name_node_dict[variance_eps_node_name]
+        scale_node = self.name_node_dict[scale_node_name]
+        mean_node = self.name_node_dict[mean_node_name]
+        offset_node = self.name_node_dict[offset_node_name]
+        # the add_1 name is possible be changed into the output name,
+        # if the add_1 name is not in self.name_node_dict, and sub.next is the last op which is Identity
+        if batchnorm_scope_name + '/add_1' in self.name_node_dict:
+            output_node = self.name_node_dict[batchnorm_scope_name + '/add_1']
+        elif batchnorm_scope_name + '/sub' in self.edges:
+            output_node = self.edges[batchnorm_scope_name + '/sub'][0]
+        else:
+            output_node = None
+        return variance_node, variance_eps_node, scale_node, mean_node, offset_node, output_node
+
+
     def dump_complex_conv2d_to_file(self, node, f):
         assert(node.op == 'Conv2D')
         self.layer_number = self.layer_number + 1
@@ -372,6 +398,38 @@  class TFConverter:
         np.array([output_operand_index],dtype=np.uint32).tofile(f)
 
 
+    def dump_batchnormalization_to_file(self, node, f):
+        self.layer_number = self.layer_number + 1
+        self.converted_nodes.add(node.name)
+        scope_name = TFConverter.get_scope_name(node.name)
+        variance_node, variance_eps_node, scale_node, mean_node, offset_node, output_node = self.get_batchnorm_params(scope_name.split('/')[0])
+
+        bn_tensor_mean = mean_node.attr['value']
+        channel_mean = bn_tensor_mean.tensor.tensor_shape.dim[0].size
+        mean = np.frombuffer(bn_tensor_mean.tensor.tensor_content, np.float32)
+        bn_tensor_variance = variance_node.attr['value']
+        channel_variance = bn_tensor_variance.tensor.tensor_shape.dim[0].size
+        variance = np.frombuffer(bn_tensor_variance.tensor.tensor_content, np.float32)
+
+        offset = offset_node.attr['value'].tensor.float_val
+        scale = scale_node.attr['value'].tensor.float_val
+        variance_eps = variance_eps_node.attr['value'].tensor.float_val
+
+        channel = channel_mean
+        np.array([self.op2code['BatchNormalization'], channel], dtype=np.uint32).tofile(f)
+        np.array([mean], dtype=np.float32).tofile(f)
+        np.array([variance], dtype=np.float32).tofile(f)
+        np.array([variance_eps, scale, offset], dtype=np.float32).tofile(f)
+
+        input_name = self.batchnorm_scopename_inputname_dict[scope_name.split('/')[0]]
+        input_operand_index = self.add_operand(input_name, Operand.IOTYPE_INPUT)
+        if output_node is not None:
+            output_operand_index = self.add_operand(output_node.name, Operand.IOTYPE_OUTPUT)
+            np.array([input_operand_index, output_operand_index],dtype=np.uint32).tofile(f)
+        else:
+            print('batchnormazalition node is error')
+
+
     def dump_avg_pool_to_file(self, node, f):
         assert(node.op == 'AvgPool')
         self.layer_number = self.layer_number + 1
@@ -417,7 +475,10 @@  class TFConverter:
                 if node.op == 'MatMul':
                     self.dump_dense_to_file(node, f)
                 continue
-
+            if self.in_batchnorm_scope(node.name):
+                if node.op == 'Sub':
+                    self.dump_batchnormalization_to_file(node,f)
+                continue
 
             if node.op == 'Conv2D':
                 self.dump_simple_conv2d_to_file(node, f)
@@ -541,8 +602,20 @@  class TFConverter:
                 return True
         return False
 
+
+    def in_batchnorm_scope(self, name):
+        inner_scope = TFConverter.get_scope_name(name)
+        if inner_scope == "":
+            return False
+        for scope in self.batchnorm_scope_names:
+            index = inner_scope.find(scope)
+            if index == 0:
+                return True
+        return False
+
+
     def generate_sub_block_op_scope_info(self):
-        # mostly, conv2d/dense is a sub block in graph, get the scope name
+        # mostly, conv2d/dense/batchnorm is a sub block in graph, get the scope name
         for node in self.nodes:
             if node.op == 'Conv2D':
                 scope = TFConverter.get_scope_name(node.name)
@@ -562,8 +635,17 @@  class TFConverter:
                 if scope + '/kernel' not in self.name_node_dict and scope.split('/Tensordot')[0] + '/kernel' not in self.name_node_dict:
                     continue
                 self.dense_scope_names.add(scope.split('/Tensordot')[0])
+            elif node.op == 'Sub':
+                scope = TFConverter.get_scope_name(node.name)
+                # for the case tf.nn.batchnormalization is called directly
+                if scope == '':
+                    continue
+                # for the case tf.nn.batchnormalization is called with a scope
+                if scope + '/Rsqrt' not in self.name_node_dict and scope + '/add' not in self.name_node_dict and scope + '/mul' not in self.name_node_dict and scope + '/mul_1' not in self.name_node_dict and scope + '/sub' not in self.name_node_dict and scope + '/mul_2' not in self.name_node_dict:
+                    continue
+                self.batchnorm_scope_names.add(scope)
 
-        # get the input name to the conv2d/dense sub block
+        # get the input name to the conv2d/dense/batchnorm sub block
         for node in self.nodes:
             scope = TFConverter.get_scope_name(node.name)
             if scope in self.conv2d_scope_names:
@@ -581,6 +663,11 @@  class TFConverter:
                     for inp in node.input:
                         if TFConverter.get_scope_name(inp).find(scope)<0 and TFConverter.get_scope_name(inp).find(scope.split('/')[0])<0:
                             self.dense_scopename_inputname_dict[scope.split('/Tensordot')[0]] = inp
+            elif scope in self.batchnorm_scope_names:
+                if node.op == 'Mul' and scope + '/mul_1' == node.name:
+                    for inp in node.input:
+                        if TFConverter.get_scope_name(inp) != scope:
+                             self.batchnorm_scopename_inputname_dict[scope] = inp
 
 
     def run(self):