diff mbox

[FFmpeg-devel,4/4] dnn: convert tf.pad to native model in python script, and load/execute it in the c code.

Message ID 1563256566-28165-1-git-send-email-yejun.guo@intel.com
State Accepted
Commit ccbab41039af424237eaac5c302c293ab97540f8
Headers show

Commit Message

Guo, Yejun July 16, 2019, 5:56 a.m. UTC
since tf.pad is enabled, the conv2d(valid) changes back to its original behavior.

Signed-off-by: Guo, Yejun <yejun.guo@intel.com>
---
 libavfilter/dnn/dnn_backend_native.c    | 35 +++++++++++++++++++++++++++++++++
 libavfilter/dnn/dnn_backend_native.h    |  2 +-
 tools/python/convert_from_tensorflow.py | 23 +++++++++++++++++-----
 3 files changed, 54 insertions(+), 6 deletions(-)

Comments

Guo, Yejun July 24, 2019, 7:34 a.m. UTC | #1
> -----Original Message-----
> From: Guo, Yejun
> Sent: Tuesday, July 16, 2019 1:56 PM
> To: ffmpeg-devel@ffmpeg.org
> Cc: Guo, Yejun <yejun.guo@intel.com>
> Subject: [PATCH 4/4] dnn: convert tf.pad to native model in python script, and
> load/execute it in the c code.
> 
> since tf.pad is enabled, the conv2d(valid) changes back to its original behavior.
> 
> Signed-off-by: Guo, Yejun <yejun.guo@intel.com>
> ---
>  libavfilter/dnn/dnn_backend_native.c    | 35
> +++++++++++++++++++++++++++++++++
>  libavfilter/dnn/dnn_backend_native.h    |  2 +-
>  tools/python/convert_from_tensorflow.py | 23 +++++++++++++++++-----
>  3 files changed, 54 insertions(+), 6 deletions(-)
> 
> diff --git a/libavfilter/dnn/dnn_backend_native.c
> b/libavfilter/dnn/dnn_backend_native.c
> index 82e900b..09c583b 100644
> --- a/libavfilter/dnn/dnn_backend_native.c
> +++ b/libavfilter/dnn/dnn_backend_native.c
> @@ -25,6 +25,7 @@

this patch set asks for review, thanks.

I've locally finished more patches to improve dnn module, plan to send more them set by set, since the patches have dependency.

Just in case you are interested in my new patches, I've uploaded to https://github.com/guoyejun/ffmpeg/tree/dnn0716. 
for your convenient, I also copy the oneline log here for each patch (from newer to older) with 3 patch sets.

50a3353 fate: add unit test for dnn depth_to_space layer
af9e3ab dnn: separate depth_to_space layer from dnn_backend_native.c to a new file
41b97e4 fate: add unit test for dnn conv2d layer
4143485 dnn: separate conv2d layer from dnn_backend_native.c to a new file

870383e dnn: export operand info in python script and load in c code
650d576 dnn: change .model file format to put layer number at the end of file
d029bf8 dnn: introduce dnn operand (in c code) to hold operand infos within network

c9b9e1c doc/filters: update how to generate native model for derain filter
064aa45 convert_from_tensorflow.py: support conv2d with dilation
1c419a5 convert_from_tensorflow.py: add option to dump graph for visualization in tensorboard
diff mbox

Patch

diff --git a/libavfilter/dnn/dnn_backend_native.c b/libavfilter/dnn/dnn_backend_native.c
index 82e900b..09c583b 100644
--- a/libavfilter/dnn/dnn_backend_native.c
+++ b/libavfilter/dnn/dnn_backend_native.c
@@ -25,6 +25,7 @@ 
 
 #include "dnn_backend_native.h"
 #include "libavutil/avassert.h"
+#include "dnn_backend_native_layer_pad.h"
 
 static DNNReturnType set_input_output_native(void *model, DNNInputData *input, const char *input_name, const char **output_names, uint32_t nb_output)
 {
@@ -32,6 +33,7 @@  static DNNReturnType set_input_output_native(void *model, DNNInputData *input, c
     InputParams *input_params;
     ConvolutionalParams *conv_params;
     DepthToSpaceParams *depth_to_space_params;
+    LayerPadParams *pad_params;
     int cur_width, cur_height, cur_channels;
     int32_t layer;
 
@@ -77,6 +79,12 @@  static DNNReturnType set_input_output_native(void *model, DNNInputData *input, c
             cur_height *= depth_to_space_params->block_size;
             cur_width *= depth_to_space_params->block_size;
             break;
+        case MIRROR_PAD:
+            pad_params = (LayerPadParams *)network->layers[layer].params;
+            cur_height = cur_height + pad_params->paddings[1][0] + pad_params->paddings[1][1];
+            cur_width = cur_width + pad_params->paddings[2][0] + pad_params->paddings[2][1];
+            cur_channels = cur_channels + pad_params->paddings[3][0] + pad_params->paddings[3][1];
+            break;
         default:
             return DNN_ERROR;
         }
@@ -110,6 +118,7 @@  DNNModel *ff_dnn_load_model_native(const char *model_filename)
     DNNLayerType layer_type;
     ConvolutionalParams *conv_params;
     DepthToSpaceParams *depth_to_space_params;
+    LayerPadParams *pad_params;
 
     model = av_malloc(sizeof(DNNModel));
     if (!model){
@@ -207,6 +216,23 @@  DNNModel *ff_dnn_load_model_native(const char *model_filename)
             network->layers[layer].type = DEPTH_TO_SPACE;
             network->layers[layer].params = depth_to_space_params;
             break;
+        case MIRROR_PAD:
+            pad_params = av_malloc(sizeof(LayerPadParams));
+            if (!pad_params){
+                avio_closep(&model_file_context);
+                ff_dnn_free_model_native(&model);
+                return NULL;
+            }
+            pad_params->mode = (int32_t)avio_rl32(model_file_context);
+            dnn_size += 4;
+            for (i = 0; i < 4; ++i) {
+                pad_params->paddings[i][0] = avio_rl32(model_file_context);
+                pad_params->paddings[i][1] = avio_rl32(model_file_context);
+                dnn_size += 8;
+            }
+            network->layers[layer].type = MIRROR_PAD;
+            network->layers[layer].params = pad_params;
+            break;
         default:
             avio_closep(&model_file_context);
             ff_dnn_free_model_native(&model);
@@ -314,6 +340,7 @@  DNNReturnType ff_dnn_execute_model_native(const DNNModel *model, DNNData *output
     InputParams *input_params;
     ConvolutionalParams *conv_params;
     DepthToSpaceParams *depth_to_space_params;
+    LayerPadParams *pad_params;
 
     if (network->layers_num <= 0 || network->layers[0].type != INPUT || !network->layers[0].output){
         return DNN_ERROR;
@@ -348,6 +375,14 @@  DNNReturnType ff_dnn_execute_model_native(const DNNModel *model, DNNData *output
             cur_width *= depth_to_space_params->block_size;
             cur_channels /= depth_to_space_params->block_size * depth_to_space_params->block_size;
             break;
+        case MIRROR_PAD:
+            pad_params = (LayerPadParams *)network->layers[layer].params;
+            dnn_execute_layer_pad(network->layers[layer - 1].output, network->layers[layer].output,
+                                  pad_params, 1, cur_height, cur_width, cur_channels);
+            cur_height = cur_height + pad_params->paddings[1][0] + pad_params->paddings[1][1];
+            cur_width = cur_width + pad_params->paddings[2][0] + pad_params->paddings[2][1];
+            cur_channels = cur_channels + pad_params->paddings[3][0] + pad_params->paddings[3][1];
+            break;
         case INPUT:
             return DNN_ERROR;
         }
diff --git a/libavfilter/dnn/dnn_backend_native.h b/libavfilter/dnn/dnn_backend_native.h
index 532103c..c615d59 100644
--- a/libavfilter/dnn/dnn_backend_native.h
+++ b/libavfilter/dnn/dnn_backend_native.h
@@ -30,7 +30,7 @@ 
 #include "../dnn_interface.h"
 #include "libavformat/avio.h"
 
-typedef enum {INPUT, CONV, DEPTH_TO_SPACE} DNNLayerType;
+typedef enum {INPUT, CONV, DEPTH_TO_SPACE, MIRROR_PAD} DNNLayerType;
 
 typedef enum {RELU, TANH, SIGMOID, NONE, LEAKY_RELU} DNNActivationFunc;
 
diff --git a/tools/python/convert_from_tensorflow.py b/tools/python/convert_from_tensorflow.py
index 37049e5..041c82c 100644
--- a/tools/python/convert_from_tensorflow.py
+++ b/tools/python/convert_from_tensorflow.py
@@ -23,9 +23,6 @@  import sys, struct
 
 __all__ = ['convert_from_tensorflow']
 
-# as the first step to be compatible with vf_sr, it is not general.
-# it will be refined step by step.
-
 class TFConverter:
     def __init__(self, graph_def, nodes, outfile):
         self.graph_def = graph_def
@@ -36,9 +33,10 @@  class TFConverter:
         self.name_node_dict = {}
         self.edges = {}
         self.conv_activations = {'Relu':0, 'Tanh':1, 'Sigmoid':2, 'LeakyRelu':4}
-        self.conv_paddings = {'VALID':2, 'SAME':1}
+        self.conv_paddings = {'VALID':0, 'SAME':1}
         self.converted_nodes = set()
-        self.op2code = {'Conv2D':1, 'DepthToSpace':2}
+        self.op2code = {'Conv2D':1, 'DepthToSpace':2, 'MirrorPad':3}
+        self.mirrorpad_mode = {'CONSTANT':0, 'REFLECT':1, 'SYMMETRIC':2}
 
 
     def dump_for_tensorboard(self):
@@ -101,6 +99,19 @@  class TFConverter:
         self.converted_nodes.add(node.name)
 
 
+    def dump_mirrorpad_to_file(self, node, f):
+        assert(node.op == 'MirrorPad')
+        self.layer_number = self.layer_number + 1
+        mode = node.attr['mode'].s
+        mode = self.mirrorpad_mode[mode.decode("utf-8")]
+        np.array([self.op2code[node.op], mode], dtype=np.uint32).tofile(f)
+        pnode = self.name_node_dict[node.input[1]]
+        self.converted_nodes.add(pnode.name)
+        paddings = pnode.attr['value'].tensor.tensor_content
+        f.write(paddings)
+        self.converted_nodes.add(node.name)
+
+
     def generate_layer_number(self):
         # in current hard code implementation, the layer number is the first data written to the native model file
         # it is not easy to know it at the beginning time in the general converter, so first do a dry run for compatibility
@@ -118,6 +129,8 @@  class TFConverter:
                 self.dump_conv2d_to_file(node, f)
             elif node.op == 'DepthToSpace':
                 self.dump_depth2space_to_file(node, f)
+            elif node.op == 'MirrorPad':
+                self.dump_mirrorpad_to_file(node, f)
 
 
     def dump_to_file(self):