diff mbox

[FFmpeg-devel,3/3] dnn: export operand info in python script and load in c code

Message ID 1566291034-12857-1-git-send-email-yejun.guo@intel.com
State Accepted
Commit 83e0b71f66f2f0cc293305e2bd138d87660a8c5b
Headers show

Commit Message

Guo, Yejun Aug. 20, 2019, 8:50 a.m. UTC
Signed-off-by: Guo, Yejun <yejun.guo@intel.com>
---
 libavfilter/dnn/dnn_backend_native.c    |  49 +++++++++++---
 libavfilter/dnn/dnn_backend_native.h    |   2 +-
 libavfilter/dnn_interface.h             |   2 +-
 tools/python/convert_from_tensorflow.py | 111 +++++++++++++++++++++++++++++---
 4 files changed, 142 insertions(+), 22 deletions(-)

Comments

Pedro Arthur Aug. 27, 2019, 2:45 p.m. UTC | #1
hi,

Em ter, 20 de ago de 2019 às 05:54, Guo, Yejun <yejun.guo@intel.com> escreveu:
>
> Signed-off-by: Guo, Yejun <yejun.guo@intel.com>
> ---
>  libavfilter/dnn/dnn_backend_native.c    |  49 +++++++++++---
>  libavfilter/dnn/dnn_backend_native.h    |   2 +-
>  libavfilter/dnn_interface.h             |   2 +-
>  tools/python/convert_from_tensorflow.py | 111 +++++++++++++++++++++++++++++---
>  4 files changed, 142 insertions(+), 22 deletions(-)
>
> diff --git a/libavfilter/dnn/dnn_backend_native.c b/libavfilter/dnn/dnn_backend_native.c
> index 0ba4e44..eeae711 100644
> --- a/libavfilter/dnn/dnn_backend_native.c
> +++ b/libavfilter/dnn/dnn_backend_native.c
> @@ -72,7 +72,6 @@ DNNModel *ff_dnn_load_model_native(const char *model_filename)
>      ConvolutionalParams *conv_params;
>      DepthToSpaceParams *depth_to_space_params;
>      LayerPadParams *pad_params;
> -    int32_t operand_index = 0;
>
>      model = av_malloc(sizeof(DNNModel));
>      if (!model){
> @@ -93,9 +92,10 @@ DNNModel *ff_dnn_load_model_native(const char *model_filename)
>      }
>      model->model = (void *)network;
>
> -    avio_seek(model_file_context, file_size - 4, SEEK_SET);
> +    avio_seek(model_file_context, file_size - 8, SEEK_SET);
>      network->layers_num = (int32_t)avio_rl32(model_file_context);
> -    dnn_size = 4;
> +    network->operands_num = (int32_t)avio_rl32(model_file_context);
> +    dnn_size = 8;
>      avio_seek(model_file_context, 0, SEEK_SET);
>
I think it is worth adding some means to assert the input file is
indeed a dnn file, the code as is may alloc an undefined amout of
memory if the file passed is malformed or corrupted.
Maybe adding a magic number + the file size (or something else) at the
beginning of the file and early skip parsing it if it does not match?
however it may require two passes to generate the file which goes
against your previous patch.

Otherwise I can push it as is, as this behavior was already there
before the patch.

>      network->layers = av_mallocz(network->layers_num * sizeof(Layer));
> @@ -105,11 +105,6 @@ DNNModel *ff_dnn_load_model_native(const char *model_filename)
>          return NULL;
>      }
>
> -    /**
> -     * Operands should be read from model file, the whole change will be huge.
> -     * to make things step by step, we first mock the operands, instead of reading from model file.
> -     */
> -    network->operands_num = network->layers_num + 1;
>      network->operands = av_mallocz(network->operands_num * sizeof(DnnOperand));
>      if (!network->operands){
>          avio_closep(&model_file_context);
> @@ -120,8 +115,6 @@ DNNModel *ff_dnn_load_model_native(const char *model_filename)
>      for (layer = 0; layer < network->layers_num; ++layer){
>          layer_type = (int32_t)avio_rl32(model_file_context);
>          dnn_size += 4;
> -        network->layers[layer].input_operand_indexes[0] = operand_index++;
> -        network->layers[layer].output_operand_index = operand_index;
>          switch (layer_type){
>          case CONV:
>              conv_params = av_malloc(sizeof(ConvolutionalParams));
> @@ -162,6 +155,9 @@ DNNModel *ff_dnn_load_model_native(const char *model_filename)
>              for (i = 0; i < conv_params->output_num; ++i){
>                  conv_params->biases[i] = av_int2float(avio_rl32(model_file_context));
>              }
> +            network->layers[layer].input_operand_indexes[0] = (int32_t)avio_rl32(model_file_context);
> +            network->layers[layer].output_operand_index = (int32_t)avio_rl32(model_file_context);
> +            dnn_size += 8;
>              network->layers[layer].type = CONV;
>              network->layers[layer].params = conv_params;
>              break;
> @@ -174,6 +170,9 @@ DNNModel *ff_dnn_load_model_native(const char *model_filename)
>              }
>              depth_to_space_params->block_size = (int32_t)avio_rl32(model_file_context);
>              dnn_size += 4;
> +            network->layers[layer].input_operand_indexes[0] = (int32_t)avio_rl32(model_file_context);
> +            network->layers[layer].output_operand_index = (int32_t)avio_rl32(model_file_context);
> +            dnn_size += 8;
>              network->layers[layer].type = DEPTH_TO_SPACE;
>              network->layers[layer].params = depth_to_space_params;
>              break;
> @@ -191,6 +190,9 @@ DNNModel *ff_dnn_load_model_native(const char *model_filename)
>                  pad_params->paddings[i][1] = avio_rl32(model_file_context);
>                  dnn_size += 8;
>              }
> +            network->layers[layer].input_operand_indexes[0] = (int32_t)avio_rl32(model_file_context);
> +            network->layers[layer].output_operand_index = (int32_t)avio_rl32(model_file_context);
> +            dnn_size += 8;
>              network->layers[layer].type = MIRROR_PAD;
>              network->layers[layer].params = pad_params;
>              break;
> @@ -201,6 +203,33 @@ DNNModel *ff_dnn_load_model_native(const char *model_filename)
>          }
>      }
>
> +    for (int32_t i = 0; i < network->operands_num; ++i){
> +        DnnOperand *oprd;
> +        int32_t name_len;
> +        int32_t operand_index = (int32_t)avio_rl32(model_file_context);
> +        dnn_size += 4;
> +
> +        oprd = &network->operands[operand_index];
> +        name_len = (int32_t)avio_rl32(model_file_context);
> +        dnn_size += 4;
> +
> +        avio_get_str(model_file_context, name_len, oprd->name, sizeof(oprd->name));
> +        dnn_size += name_len;
> +
> +        oprd->type = (int32_t)avio_rl32(model_file_context);
> +        dnn_size += 4;
> +
> +        oprd->data_type = (int32_t)avio_rl32(model_file_context);
> +        dnn_size += 4;
> +
> +        for (int32_t dim = 0; dim < 4; ++dim) {
> +            oprd->dims[dim] = (int32_t)avio_rl32(model_file_context);
> +            dnn_size += 4;
> +        }
> +
> +        oprd->isNHWC = 1;
> +    }
> +
>      avio_closep(&model_file_context);
>
>      if (dnn_size != file_size){
> diff --git a/libavfilter/dnn/dnn_backend_native.h b/libavfilter/dnn/dnn_backend_native.h
> index d7737ac..172e1e7 100644
> --- a/libavfilter/dnn/dnn_backend_native.h
> +++ b/libavfilter/dnn/dnn_backend_native.h
> @@ -36,7 +36,7 @@ typedef enum {RELU, TANH, SIGMOID, NONE, LEAKY_RELU} DNNActivationFunc;
>
>  typedef enum {VALID, SAME, SAME_CLAMP_TO_EDGE} DNNConvPaddingParam;
>
> -typedef enum {DOT_INPUT, DOT_INTERMEDIATE, DOT_OUTPUT} DNNOperandType;
> +typedef enum {DOT_INPUT = 1, DOT_OUTPUT = 2, DOT_INTERMEDIATE = DOT_INPUT | DOT_INPUT} DNNOperandType;
>
>  typedef struct Layer{
>      DNNLayerType type;
> diff --git a/libavfilter/dnn_interface.h b/libavfilter/dnn_interface.h
> index c24df0e..057005f 100644
> --- a/libavfilter/dnn_interface.h
> +++ b/libavfilter/dnn_interface.h
> @@ -32,7 +32,7 @@ typedef enum {DNN_SUCCESS, DNN_ERROR} DNNReturnType;
>
>  typedef enum {DNN_NATIVE, DNN_TF} DNNBackendType;
>
> -typedef enum {DNN_FLOAT, DNN_UINT8} DNNDataType;
> +typedef enum {DNN_FLOAT = 1, DNN_UINT8 = 4} DNNDataType;
>
>  typedef struct DNNInputData{
>      void *data;
> diff --git a/tools/python/convert_from_tensorflow.py b/tools/python/convert_from_tensorflow.py
> index cbc76a9..bab11a5 100644
> --- a/tools/python/convert_from_tensorflow.py
> +++ b/tools/python/convert_from_tensorflow.py
> @@ -23,6 +23,37 @@ import sys, struct
>
>  __all__ = ['convert_from_tensorflow']
>
> +class Operand(object):
> +    IOTYPE_INPUT = 1
> +    IOTYPE_OUTPUT = 2
> +    IOTYPE_INTERMEDIATE = IOTYPE_INPUT | IOTYPE_OUTPUT
> +    DTYPE_FLOAT = 1
> +    DTYPE_UINT8 = 4
> +    index = 0
> +    def __init__(self, name, dtype, dims):
> +        self.name = name
> +        self.dtype = dtype
> +        self.dims = dims
> +        self.iotype = 0
> +        self.used_count = 0
> +        self.index = Operand.index
> +        Operand.index = Operand.index + 1
> +        self.iotype2str = {Operand.IOTYPE_INPUT: 'in', Operand.IOTYPE_OUTPUT: 'out', Operand.IOTYPE_INTERMEDIATE: 'inout'}
> +        self.dtype2str = {Operand.DTYPE_FLOAT: 'DT_FLOAT', Operand.DTYPE_UINT8: 'DT_UINT8'}
> +
> +    def add_iotype(self, iotype):
> +        self.iotype = self.iotype | iotype
> +        if iotype == Operand.IOTYPE_INPUT:
> +            self.used_count = self.used_count + 1
> +
> +    def __str__(self):
> +        return "{}: (name: {}, iotype: {}, dtype: {}, dims: ({},{},{},{}) used_count: {})".format(self.index,
> +                            self.name, self.iotype2str[self.iotype], self.dtype2str[self.dtype],
> +                            self.dims[0], self.dims[1], self.dims[2], self.dims[3], self.used_count)
> +
> +    def __lt__(self, other):
> +        return self.index < other.index
> +
>  class TFConverter:
>      def __init__(self, graph_def, nodes, outfile, dump4tb):
>          self.graph_def = graph_def
> @@ -37,8 +68,28 @@ class TFConverter:
>          self.conv_paddings = {'VALID':0, 'SAME':1}
>          self.converted_nodes = set()
>          self.conv2d_scope_names = set()
> +        self.conv2d_scopename_inputname_dict = {}
>          self.op2code = {'Conv2D':1, 'DepthToSpace':2, 'MirrorPad':3}
>          self.mirrorpad_mode = {'CONSTANT':0, 'REFLECT':1, 'SYMMETRIC':2}
> +        self.name_operand_dict = {}
> +
> +
> +    def add_operand(self, name, type):
> +        node = self.name_node_dict[name]
> +        if name not in self.name_operand_dict:
> +            dtype = node.attr['dtype'].type
> +            if dtype == 0:
> +                dtype = node.attr['T'].type
> +            dims = [-1,-1,-1,-1]
> +            if 'shape' in node.attr:
> +                dims[0] = node.attr['shape'].shape.dim[0].size
> +                dims[1] = node.attr['shape'].shape.dim[1].size
> +                dims[2] = node.attr['shape'].shape.dim[2].size
> +                dims[3] = node.attr['shape'].shape.dim[3].size
> +            operand = Operand(name, dtype, dims)
> +            self.name_operand_dict[name] = operand;
> +        self.name_operand_dict[name].add_iotype(type)
> +        return self.name_operand_dict[name].index
>
>
>      def dump_for_tensorboard(self):
> @@ -60,11 +111,10 @@ class TFConverter:
>          # the BiasAdd name is possible be changed into the output name,
>          # if activation is None, and BiasAdd.next is the last op which is Identity
>          if conv2d_scope_name + '/BiasAdd' in self.edges:
> -            activation = self.edges[conv2d_scope_name + '/BiasAdd'][0]
> -            activation = activation.op
> +            anode = self.edges[conv2d_scope_name + '/BiasAdd'][0]
>          else:
> -            activation = 'None'
> -        return knode, bnode, dnode, activation
> +            anode = None
> +        return knode, bnode, dnode, anode
>
>
>      def dump_conv2d_to_file(self, node, f):
> @@ -73,16 +123,21 @@ class TFConverter:
>          self.converted_nodes.add(node.name)
>
>          scope_name = TFConverter.get_scope_name(node.name)
> -        #knode for kernel, bnode for bias, dnode for dilation
> -        knode, bnode, dnode, activation = self.get_conv2d_params(scope_name)
> +        #knode for kernel, bnode for bias, dnode for dilation, anode for activation
> +        knode, bnode, dnode, anode = self.get_conv2d_params(scope_name)
>
>          if dnode is not None:
>              dilation = struct.unpack('i', dnode.attr['value'].tensor.tensor_content[0:4])[0]
>          else:
>              dilation = 1
>
> +        if anode is not None:
> +            activation = anode.op
> +        else:
> +            activation = 'None'
> +
>          padding = node.attr['padding'].s.decode("utf-8")
> -        # conv2d with dilation > 1 generates tens of nodes, not easy to parse them, so use tricky.
> +        # conv2d with dilation > 1 generates tens of nodes, not easy to parse them, so use this tricky method.
>          if dilation > 1 and scope_name + '/stack' in self.name_node_dict:
>              if self.name_node_dict[scope_name + '/stack'].op == "Const":
>                  padding = 'SAME'
> @@ -107,6 +162,15 @@ class TFConverter:
>              bias = btensor.tensor_content
>          f.write(bias)
>
> +        input_name = self.conv2d_scopename_inputname_dict[scope_name]
> +        input_operand_index = self.add_operand(input_name, Operand.IOTYPE_INPUT)
> +
> +        if anode is not None:
> +            output_operand_index = self.add_operand(anode.name, Operand.IOTYPE_OUTPUT)
> +        else:
> +            output_operand_index = self.add_operand(self.edges[bnode.name][0].name, Operand.IOTYPE_OUTPUT)
> +        np.array([input_operand_index, output_operand_index], dtype=np.uint32).tofile(f)
> +
>
>      def dump_depth2space_to_file(self, node, f):
>          assert(node.op == 'DepthToSpace')
> @@ -114,6 +178,9 @@ class TFConverter:
>          block_size = node.attr['block_size'].i
>          np.array([self.op2code[node.op], block_size], dtype=np.uint32).tofile(f)
>          self.converted_nodes.add(node.name)
> +        input_operand_index = self.add_operand(node.input[0], Operand.IOTYPE_INPUT)
> +        output_operand_index = self.add_operand(node.name, Operand.IOTYPE_OUTPUT)
> +        np.array([input_operand_index, output_operand_index], dtype=np.uint32).tofile(f)
>
>
>      def dump_mirrorpad_to_file(self, node, f):
> @@ -127,6 +194,9 @@ class TFConverter:
>          paddings = pnode.attr['value'].tensor.tensor_content
>          f.write(paddings)
>          self.converted_nodes.add(node.name)
> +        input_operand_index = self.add_operand(node.input[0], Operand.IOTYPE_INPUT)
> +        output_operand_index = self.add_operand(node.name, Operand.IOTYPE_OUTPUT)
> +        np.array([input_operand_index, output_operand_index], dtype=np.uint32).tofile(f)
>
>
>      def dump_layers_to_file(self, f):
> @@ -147,10 +217,21 @@ class TFConverter:
>                  self.dump_mirrorpad_to_file(node, f)
>
>
> +    def dump_operands_to_file(self, f):
> +            operands = sorted(self.name_operand_dict.values())
> +            for operand in operands:
> +                #print('{}'.format(operand))
> +                np.array([operand.index, len(operand.name)], dtype=np.uint32).tofile(f)
> +                f.write(operand.name.encode('utf-8'))
> +                np.array([operand.iotype, operand.dtype], dtype=np.uint32).tofile(f)
> +                np.array([operand.dims[0], operand.dims[1], operand.dims[2], operand.dims[3]], dtype=np.uint32).tofile(f)
> +
> +
>      def dump_to_file(self):
>          with open(self.outfile, 'wb') as f:
>              self.dump_layers_to_file(f)
> -            np.array([self.layer_number], dtype=np.uint32).tofile(f)
> +            self.dump_operands_to_file(f)
> +            np.array([self.layer_number, len(self.name_operand_dict)], dtype=np.uint32).tofile(f)
>
>
>      def generate_name_node_dict(self):
> @@ -212,19 +293,29 @@ class TFConverter:
>          return name[0:index]
>
>
> -    def generate_conv2d_scope_names(self):
> +    def generate_conv2d_scope_info(self):
> +        # conv2d is a sub block in graph, get the scope name
>          for node in self.nodes:
>              if node.op == 'Conv2D':
>                  scope = TFConverter.get_scope_name(node.name)
>                  self.conv2d_scope_names.add(scope)
>
> +        # get the input name to the conv2d sub block
> +        for node in self.nodes:
> +            scope = TFConverter.get_scope_name(node.name)
> +            if scope in self.conv2d_scope_names:
> +                if node.op == 'Conv2D' or node.op == 'Shape':
> +                    for inp in node.input:
> +                        if TFConverter.get_scope_name(inp) != scope:
> +                            self.conv2d_scopename_inputname_dict[scope] = inp
> +
>
>      def run(self):
>          self.generate_name_node_dict()
>          self.generate_output_names()
>          self.remove_identity()
>          self.generate_edges()
> -        self.generate_conv2d_scope_names()
> +        self.generate_conv2d_scope_info()
>
>          if self.dump4tb:
>              self.dump_for_tensorboard()
> --
> 2.7.4
>
> _______________________________________________
> ffmpeg-devel mailing list
> ffmpeg-devel@ffmpeg.org
> https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
>
> To unsubscribe, visit link above, or email
> ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".
Guo, Yejun Aug. 29, 2019, 5:41 a.m. UTC | #2
> -----Original Message-----

> From: ffmpeg-devel [mailto:ffmpeg-devel-bounces@ffmpeg.org] On Behalf Of

> Pedro Arthur

> Sent: Tuesday, August 27, 2019 10:46 PM

> To: FFmpeg development discussions and patches <ffmpeg-devel@ffmpeg.org>

> Subject: Re: [FFmpeg-devel] [PATCH 3/3] dnn: export operand info in python

> script and load in c code

> 

> hi,

> 

> Em ter, 20 de ago de 2019 às 05:54, Guo, Yejun <yejun.guo@intel.com>

> escreveu:

> >

> > Signed-off-by: Guo, Yejun <yejun.guo@intel.com>

> > ---

> >  libavfilter/dnn/dnn_backend_native.c    |  49 +++++++++++---

> >  libavfilter/dnn/dnn_backend_native.h    |   2 +-

> >  libavfilter/dnn_interface.h             |   2 +-

> >  tools/python/convert_from_tensorflow.py | 111

> +++++++++++++++++++++++++++++---

> >  4 files changed, 142 insertions(+), 22 deletions(-)

> >

> > diff --git a/libavfilter/dnn/dnn_backend_native.c

> b/libavfilter/dnn/dnn_backend_native.c

> > index 0ba4e44..eeae711 100644

> > --- a/libavfilter/dnn/dnn_backend_native.c

> > +++ b/libavfilter/dnn/dnn_backend_native.c

> > @@ -72,7 +72,6 @@ DNNModel *ff_dnn_load_model_native(const char

> *model_filename)

> >      ConvolutionalParams *conv_params;

> >      DepthToSpaceParams *depth_to_space_params;

> >      LayerPadParams *pad_params;

> > -    int32_t operand_index = 0;

> >

> >      model = av_malloc(sizeof(DNNModel));

> >      if (!model){

> > @@ -93,9 +92,10 @@ DNNModel *ff_dnn_load_model_native(const char

> *model_filename)

> >      }

> >      model->model = (void *)network;

> >

> > -    avio_seek(model_file_context, file_size - 4, SEEK_SET);

> > +    avio_seek(model_file_context, file_size - 8, SEEK_SET);

> >      network->layers_num = (int32_t)avio_rl32(model_file_context);

> > -    dnn_size = 4;

> > +    network->operands_num = (int32_t)avio_rl32(model_file_context);

> > +    dnn_size = 8;

> >      avio_seek(model_file_context, 0, SEEK_SET);

> >

> I think it is worth adding some means to assert the input file is

> indeed a dnn file, the code as is may alloc an undefined amout of

> memory if the file passed is malformed or corrupted.

> Maybe adding a magic number + the file size (or something else) at the

> beginning of the file and early skip parsing it if it does not match?

> however it may require two passes to generate the file which goes

> against your previous patch.

> 

> Otherwise I can push it as is, as this behavior was already there

> before the patch.


good point, how about add "FFMPEGDNNNATIVE" + version_number at the beginning of the file,
or we can use another magic number instead of "FFMPEGDNNNATIVE". Once we change the model file
format, the version_number should be increased. I can send a new patch after this patch set is pushed.

I think it doesn't matter to put the info at the beginning or at the end of the file, avio_seek
does not alloc memory. And the layers_num and operands_num take similar effect of file_size.

> 

> >      network->layers = av_mallocz(network->layers_num * sizeof(Layer));

> > @@ -105,11 +105,6 @@ DNNModel *ff_dnn_load_model_native(const char

> *model_filename)

> >          return NULL;

> >      }
Pedro Arthur Aug. 30, 2019, 2:49 p.m. UTC | #3
Em qui, 29 de ago de 2019 às 02:42, Guo, Yejun <yejun.guo@intel.com> escreveu:
>
>
>
> > -----Original Message-----
> > From: ffmpeg-devel [mailto:ffmpeg-devel-bounces@ffmpeg.org] On Behalf Of
> > Pedro Arthur
> > Sent: Tuesday, August 27, 2019 10:46 PM
> > To: FFmpeg development discussions and patches <ffmpeg-devel@ffmpeg.org>
> > Subject: Re: [FFmpeg-devel] [PATCH 3/3] dnn: export operand info in python
> > script and load in c code
> >
> > hi,
> >
> > Em ter, 20 de ago de 2019 às 05:54, Guo, Yejun <yejun.guo@intel.com>
> > escreveu:
> > >
> > > Signed-off-by: Guo, Yejun <yejun.guo@intel.com>
> > > ---
> > >  libavfilter/dnn/dnn_backend_native.c    |  49 +++++++++++---
> > >  libavfilter/dnn/dnn_backend_native.h    |   2 +-
> > >  libavfilter/dnn_interface.h             |   2 +-
> > >  tools/python/convert_from_tensorflow.py | 111
> > +++++++++++++++++++++++++++++---
> > >  4 files changed, 142 insertions(+), 22 deletions(-)
> > >
> > > diff --git a/libavfilter/dnn/dnn_backend_native.c
> > b/libavfilter/dnn/dnn_backend_native.c
> > > index 0ba4e44..eeae711 100644
> > > --- a/libavfilter/dnn/dnn_backend_native.c
> > > +++ b/libavfilter/dnn/dnn_backend_native.c
> > > @@ -72,7 +72,6 @@ DNNModel *ff_dnn_load_model_native(const char
> > *model_filename)
> > >      ConvolutionalParams *conv_params;
> > >      DepthToSpaceParams *depth_to_space_params;
> > >      LayerPadParams *pad_params;
> > > -    int32_t operand_index = 0;
> > >
> > >      model = av_malloc(sizeof(DNNModel));
> > >      if (!model){
> > > @@ -93,9 +92,10 @@ DNNModel *ff_dnn_load_model_native(const char
> > *model_filename)
> > >      }
> > >      model->model = (void *)network;
> > >
> > > -    avio_seek(model_file_context, file_size - 4, SEEK_SET);
> > > +    avio_seek(model_file_context, file_size - 8, SEEK_SET);
> > >      network->layers_num = (int32_t)avio_rl32(model_file_context);
> > > -    dnn_size = 4;
> > > +    network->operands_num = (int32_t)avio_rl32(model_file_context);
> > > +    dnn_size = 8;
> > >      avio_seek(model_file_context, 0, SEEK_SET);
> > >
> > I think it is worth adding some means to assert the input file is
> > indeed a dnn file, the code as is may alloc an undefined amout of
> > memory if the file passed is malformed or corrupted.
> > Maybe adding a magic number + the file size (or something else) at the
> > beginning of the file and early skip parsing it if it does not match?
> > however it may require two passes to generate the file which goes
> > against your previous patch.
> >
> > Otherwise I can push it as is, as this behavior was already there
> > before the patch.
>
> good point, how about add "FFMPEGDNNNATIVE" + version_number at the beginning of the file,
> or we can use another magic number instead of "FFMPEGDNNNATIVE". Once we change the model file
> format, the version_number should be increased. I can send a new patch after this patch set is pushed.
>
I was thinking of using a single dword but anything will do.


Patch pushed, thanks!
> I think it doesn't matter to put the info at the beginning or at the end of the file, avio_seek
> does not alloc memory. And the layers_num and operands_num take similar effect of file_size.
>
> >
> > >      network->layers = av_mallocz(network->layers_num * sizeof(Layer));
> > > @@ -105,11 +105,6 @@ DNNModel *ff_dnn_load_model_native(const char
> > *model_filename)
> > >          return NULL;
> > >      }
> _______________________________________________
> ffmpeg-devel mailing list
> ffmpeg-devel@ffmpeg.org
> https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
>
> To unsubscribe, visit link above, or email
> ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".
diff mbox

Patch

diff --git a/libavfilter/dnn/dnn_backend_native.c b/libavfilter/dnn/dnn_backend_native.c
index 0ba4e44..eeae711 100644
--- a/libavfilter/dnn/dnn_backend_native.c
+++ b/libavfilter/dnn/dnn_backend_native.c
@@ -72,7 +72,6 @@  DNNModel *ff_dnn_load_model_native(const char *model_filename)
     ConvolutionalParams *conv_params;
     DepthToSpaceParams *depth_to_space_params;
     LayerPadParams *pad_params;
-    int32_t operand_index = 0;
 
     model = av_malloc(sizeof(DNNModel));
     if (!model){
@@ -93,9 +92,10 @@  DNNModel *ff_dnn_load_model_native(const char *model_filename)
     }
     model->model = (void *)network;
 
-    avio_seek(model_file_context, file_size - 4, SEEK_SET);
+    avio_seek(model_file_context, file_size - 8, SEEK_SET);
     network->layers_num = (int32_t)avio_rl32(model_file_context);
-    dnn_size = 4;
+    network->operands_num = (int32_t)avio_rl32(model_file_context);
+    dnn_size = 8;
     avio_seek(model_file_context, 0, SEEK_SET);
 
     network->layers = av_mallocz(network->layers_num * sizeof(Layer));
@@ -105,11 +105,6 @@  DNNModel *ff_dnn_load_model_native(const char *model_filename)
         return NULL;
     }
 
-    /**
-     * Operands should be read from model file, the whole change will be huge.
-     * to make things step by step, we first mock the operands, instead of reading from model file.
-     */
-    network->operands_num = network->layers_num + 1;
     network->operands = av_mallocz(network->operands_num * sizeof(DnnOperand));
     if (!network->operands){
         avio_closep(&model_file_context);
@@ -120,8 +115,6 @@  DNNModel *ff_dnn_load_model_native(const char *model_filename)
     for (layer = 0; layer < network->layers_num; ++layer){
         layer_type = (int32_t)avio_rl32(model_file_context);
         dnn_size += 4;
-        network->layers[layer].input_operand_indexes[0] = operand_index++;
-        network->layers[layer].output_operand_index = operand_index;
         switch (layer_type){
         case CONV:
             conv_params = av_malloc(sizeof(ConvolutionalParams));
@@ -162,6 +155,9 @@  DNNModel *ff_dnn_load_model_native(const char *model_filename)
             for (i = 0; i < conv_params->output_num; ++i){
                 conv_params->biases[i] = av_int2float(avio_rl32(model_file_context));
             }
+            network->layers[layer].input_operand_indexes[0] = (int32_t)avio_rl32(model_file_context);
+            network->layers[layer].output_operand_index = (int32_t)avio_rl32(model_file_context);
+            dnn_size += 8;
             network->layers[layer].type = CONV;
             network->layers[layer].params = conv_params;
             break;
@@ -174,6 +170,9 @@  DNNModel *ff_dnn_load_model_native(const char *model_filename)
             }
             depth_to_space_params->block_size = (int32_t)avio_rl32(model_file_context);
             dnn_size += 4;
+            network->layers[layer].input_operand_indexes[0] = (int32_t)avio_rl32(model_file_context);
+            network->layers[layer].output_operand_index = (int32_t)avio_rl32(model_file_context);
+            dnn_size += 8;
             network->layers[layer].type = DEPTH_TO_SPACE;
             network->layers[layer].params = depth_to_space_params;
             break;
@@ -191,6 +190,9 @@  DNNModel *ff_dnn_load_model_native(const char *model_filename)
                 pad_params->paddings[i][1] = avio_rl32(model_file_context);
                 dnn_size += 8;
             }
+            network->layers[layer].input_operand_indexes[0] = (int32_t)avio_rl32(model_file_context);
+            network->layers[layer].output_operand_index = (int32_t)avio_rl32(model_file_context);
+            dnn_size += 8;
             network->layers[layer].type = MIRROR_PAD;
             network->layers[layer].params = pad_params;
             break;
@@ -201,6 +203,33 @@  DNNModel *ff_dnn_load_model_native(const char *model_filename)
         }
     }
 
+    for (int32_t i = 0; i < network->operands_num; ++i){
+        DnnOperand *oprd;
+        int32_t name_len;
+        int32_t operand_index = (int32_t)avio_rl32(model_file_context);
+        dnn_size += 4;
+
+        oprd = &network->operands[operand_index];
+        name_len = (int32_t)avio_rl32(model_file_context);
+        dnn_size += 4;
+
+        avio_get_str(model_file_context, name_len, oprd->name, sizeof(oprd->name));
+        dnn_size += name_len;
+
+        oprd->type = (int32_t)avio_rl32(model_file_context);
+        dnn_size += 4;
+
+        oprd->data_type = (int32_t)avio_rl32(model_file_context);
+        dnn_size += 4;
+
+        for (int32_t dim = 0; dim < 4; ++dim) {
+            oprd->dims[dim] = (int32_t)avio_rl32(model_file_context);
+            dnn_size += 4;
+        }
+
+        oprd->isNHWC = 1;
+    }
+
     avio_closep(&model_file_context);
 
     if (dnn_size != file_size){
diff --git a/libavfilter/dnn/dnn_backend_native.h b/libavfilter/dnn/dnn_backend_native.h
index d7737ac..172e1e7 100644
--- a/libavfilter/dnn/dnn_backend_native.h
+++ b/libavfilter/dnn/dnn_backend_native.h
@@ -36,7 +36,7 @@  typedef enum {RELU, TANH, SIGMOID, NONE, LEAKY_RELU} DNNActivationFunc;
 
 typedef enum {VALID, SAME, SAME_CLAMP_TO_EDGE} DNNConvPaddingParam;
 
-typedef enum {DOT_INPUT, DOT_INTERMEDIATE, DOT_OUTPUT} DNNOperandType;
+typedef enum {DOT_INPUT = 1, DOT_OUTPUT = 2, DOT_INTERMEDIATE = DOT_INPUT | DOT_INPUT} DNNOperandType;
 
 typedef struct Layer{
     DNNLayerType type;
diff --git a/libavfilter/dnn_interface.h b/libavfilter/dnn_interface.h
index c24df0e..057005f 100644
--- a/libavfilter/dnn_interface.h
+++ b/libavfilter/dnn_interface.h
@@ -32,7 +32,7 @@  typedef enum {DNN_SUCCESS, DNN_ERROR} DNNReturnType;
 
 typedef enum {DNN_NATIVE, DNN_TF} DNNBackendType;
 
-typedef enum {DNN_FLOAT, DNN_UINT8} DNNDataType;
+typedef enum {DNN_FLOAT = 1, DNN_UINT8 = 4} DNNDataType;
 
 typedef struct DNNInputData{
     void *data;
diff --git a/tools/python/convert_from_tensorflow.py b/tools/python/convert_from_tensorflow.py
index cbc76a9..bab11a5 100644
--- a/tools/python/convert_from_tensorflow.py
+++ b/tools/python/convert_from_tensorflow.py
@@ -23,6 +23,37 @@  import sys, struct
 
 __all__ = ['convert_from_tensorflow']
 
+class Operand(object):
+    IOTYPE_INPUT = 1
+    IOTYPE_OUTPUT = 2
+    IOTYPE_INTERMEDIATE = IOTYPE_INPUT | IOTYPE_OUTPUT
+    DTYPE_FLOAT = 1
+    DTYPE_UINT8 = 4
+    index = 0
+    def __init__(self, name, dtype, dims):
+        self.name = name
+        self.dtype = dtype
+        self.dims = dims
+        self.iotype = 0
+        self.used_count = 0
+        self.index = Operand.index
+        Operand.index = Operand.index + 1
+        self.iotype2str = {Operand.IOTYPE_INPUT: 'in', Operand.IOTYPE_OUTPUT: 'out', Operand.IOTYPE_INTERMEDIATE: 'inout'}
+        self.dtype2str = {Operand.DTYPE_FLOAT: 'DT_FLOAT', Operand.DTYPE_UINT8: 'DT_UINT8'}
+
+    def add_iotype(self, iotype):
+        self.iotype = self.iotype | iotype
+        if iotype == Operand.IOTYPE_INPUT:
+            self.used_count = self.used_count + 1
+
+    def __str__(self):
+        return "{}: (name: {}, iotype: {}, dtype: {}, dims: ({},{},{},{}) used_count: {})".format(self.index,
+                            self.name, self.iotype2str[self.iotype], self.dtype2str[self.dtype],
+                            self.dims[0], self.dims[1], self.dims[2], self.dims[3], self.used_count)
+
+    def __lt__(self, other):
+        return self.index < other.index
+
 class TFConverter:
     def __init__(self, graph_def, nodes, outfile, dump4tb):
         self.graph_def = graph_def
@@ -37,8 +68,28 @@  class TFConverter:
         self.conv_paddings = {'VALID':0, 'SAME':1}
         self.converted_nodes = set()
         self.conv2d_scope_names = set()
+        self.conv2d_scopename_inputname_dict = {}
         self.op2code = {'Conv2D':1, 'DepthToSpace':2, 'MirrorPad':3}
         self.mirrorpad_mode = {'CONSTANT':0, 'REFLECT':1, 'SYMMETRIC':2}
+        self.name_operand_dict = {}
+
+
+    def add_operand(self, name, type):
+        node = self.name_node_dict[name]
+        if name not in self.name_operand_dict:
+            dtype = node.attr['dtype'].type
+            if dtype == 0:
+                dtype = node.attr['T'].type
+            dims = [-1,-1,-1,-1]
+            if 'shape' in node.attr:
+                dims[0] = node.attr['shape'].shape.dim[0].size
+                dims[1] = node.attr['shape'].shape.dim[1].size
+                dims[2] = node.attr['shape'].shape.dim[2].size
+                dims[3] = node.attr['shape'].shape.dim[3].size
+            operand = Operand(name, dtype, dims)
+            self.name_operand_dict[name] = operand;
+        self.name_operand_dict[name].add_iotype(type)
+        return self.name_operand_dict[name].index
 
 
     def dump_for_tensorboard(self):
@@ -60,11 +111,10 @@  class TFConverter:
         # the BiasAdd name is possible be changed into the output name,
         # if activation is None, and BiasAdd.next is the last op which is Identity
         if conv2d_scope_name + '/BiasAdd' in self.edges:
-            activation = self.edges[conv2d_scope_name + '/BiasAdd'][0]
-            activation = activation.op
+            anode = self.edges[conv2d_scope_name + '/BiasAdd'][0]
         else:
-            activation = 'None'
-        return knode, bnode, dnode, activation
+            anode = None
+        return knode, bnode, dnode, anode
 
 
     def dump_conv2d_to_file(self, node, f):
@@ -73,16 +123,21 @@  class TFConverter:
         self.converted_nodes.add(node.name)
 
         scope_name = TFConverter.get_scope_name(node.name)
-        #knode for kernel, bnode for bias, dnode for dilation
-        knode, bnode, dnode, activation = self.get_conv2d_params(scope_name)
+        #knode for kernel, bnode for bias, dnode for dilation, anode for activation
+        knode, bnode, dnode, anode = self.get_conv2d_params(scope_name)
 
         if dnode is not None:
             dilation = struct.unpack('i', dnode.attr['value'].tensor.tensor_content[0:4])[0]
         else:
             dilation = 1
 
+        if anode is not None:
+            activation = anode.op
+        else:
+            activation = 'None'
+
         padding = node.attr['padding'].s.decode("utf-8")
-        # conv2d with dilation > 1 generates tens of nodes, not easy to parse them, so use tricky.
+        # conv2d with dilation > 1 generates tens of nodes, not easy to parse them, so use this tricky method.
         if dilation > 1 and scope_name + '/stack' in self.name_node_dict:
             if self.name_node_dict[scope_name + '/stack'].op == "Const":
                 padding = 'SAME'
@@ -107,6 +162,15 @@  class TFConverter:
             bias = btensor.tensor_content
         f.write(bias)
 
+        input_name = self.conv2d_scopename_inputname_dict[scope_name]
+        input_operand_index = self.add_operand(input_name, Operand.IOTYPE_INPUT)
+
+        if anode is not None:
+            output_operand_index = self.add_operand(anode.name, Operand.IOTYPE_OUTPUT)
+        else:
+            output_operand_index = self.add_operand(self.edges[bnode.name][0].name, Operand.IOTYPE_OUTPUT)
+        np.array([input_operand_index, output_operand_index], dtype=np.uint32).tofile(f)
+
 
     def dump_depth2space_to_file(self, node, f):
         assert(node.op == 'DepthToSpace')
@@ -114,6 +178,9 @@  class TFConverter:
         block_size = node.attr['block_size'].i
         np.array([self.op2code[node.op], block_size], dtype=np.uint32).tofile(f)
         self.converted_nodes.add(node.name)
+        input_operand_index = self.add_operand(node.input[0], Operand.IOTYPE_INPUT)
+        output_operand_index = self.add_operand(node.name, Operand.IOTYPE_OUTPUT)
+        np.array([input_operand_index, output_operand_index], dtype=np.uint32).tofile(f)
 
 
     def dump_mirrorpad_to_file(self, node, f):
@@ -127,6 +194,9 @@  class TFConverter:
         paddings = pnode.attr['value'].tensor.tensor_content
         f.write(paddings)
         self.converted_nodes.add(node.name)
+        input_operand_index = self.add_operand(node.input[0], Operand.IOTYPE_INPUT)
+        output_operand_index = self.add_operand(node.name, Operand.IOTYPE_OUTPUT)
+        np.array([input_operand_index, output_operand_index], dtype=np.uint32).tofile(f)
 
 
     def dump_layers_to_file(self, f):
@@ -147,10 +217,21 @@  class TFConverter:
                 self.dump_mirrorpad_to_file(node, f)
 
 
+    def dump_operands_to_file(self, f):
+            operands = sorted(self.name_operand_dict.values())
+            for operand in operands:
+                #print('{}'.format(operand))
+                np.array([operand.index, len(operand.name)], dtype=np.uint32).tofile(f)
+                f.write(operand.name.encode('utf-8'))
+                np.array([operand.iotype, operand.dtype], dtype=np.uint32).tofile(f)
+                np.array([operand.dims[0], operand.dims[1], operand.dims[2], operand.dims[3]], dtype=np.uint32).tofile(f)
+
+
     def dump_to_file(self):
         with open(self.outfile, 'wb') as f:
             self.dump_layers_to_file(f)
-            np.array([self.layer_number], dtype=np.uint32).tofile(f)
+            self.dump_operands_to_file(f)
+            np.array([self.layer_number, len(self.name_operand_dict)], dtype=np.uint32).tofile(f)
 
 
     def generate_name_node_dict(self):
@@ -212,19 +293,29 @@  class TFConverter:
         return name[0:index]
 
 
-    def generate_conv2d_scope_names(self):
+    def generate_conv2d_scope_info(self):
+        # conv2d is a sub block in graph, get the scope name
         for node in self.nodes:
             if node.op == 'Conv2D':
                 scope = TFConverter.get_scope_name(node.name)
                 self.conv2d_scope_names.add(scope)
 
+        # get the input name to the conv2d sub block
+        for node in self.nodes:
+            scope = TFConverter.get_scope_name(node.name)
+            if scope in self.conv2d_scope_names:
+                if node.op == 'Conv2D' or node.op == 'Shape':
+                    for inp in node.input:
+                        if TFConverter.get_scope_name(inp) != scope:
+                            self.conv2d_scopename_inputname_dict[scope] = inp
+
 
     def run(self):
         self.generate_name_node_dict()
         self.generate_output_names()
         self.remove_identity()
         self.generate_edges()
-        self.generate_conv2d_scope_names()
+        self.generate_conv2d_scope_info()
 
         if self.dump4tb:
             self.dump_for_tensorboard()