diff mbox

[FFmpeg-devel,1/4] libavfilter/dnn: add layer maximum for native mode.

Message ID 1568951748-6023-1-git-send-email-yejun.guo@intel.com
State Accepted
Commit b2683c66b215ee3b67628880b93f7371d21bc946
Headers show

Commit Message

Guo, Yejun Sept. 20, 2019, 3:55 a.m. UTC
The reason to add this layer is that it is used by srcnn in vf_sr.
This layer is currently ignored in native mode. After this patch,
we can add multiple outputs support for native mode.

Signed-off-by: Guo, Yejun <yejun.guo@intel.com>
---
 libavfilter/dnn/Makefile                           |  1 +
 libavfilter/dnn/dnn_backend_native.c               | 36 ++++++++++++++-
 libavfilter/dnn/dnn_backend_native.h               |  6 +--
 libavfilter/dnn/dnn_backend_native_layer_maximum.c | 54 ++++++++++++++++++++++
 libavfilter/dnn/dnn_backend_native_layer_maximum.h | 42 +++++++++++++++++
 libavfilter/dnn/dnn_backend_tf.c                   | 47 +++++++++++++++++++
 tools/python/convert_from_tensorflow.py            | 17 ++++++-
 tools/python/convert_header.py                     |  2 +-
 8 files changed, 198 insertions(+), 7 deletions(-)
 create mode 100644 libavfilter/dnn/dnn_backend_native_layer_maximum.c
 create mode 100644 libavfilter/dnn/dnn_backend_native_layer_maximum.h

Comments

Pedro Arthur Sept. 20, 2019, 2:17 p.m. UTC | #1
Hi,

Em sex, 20 de set de 2019 às 01:00, Guo, Yejun <yejun.guo@intel.com> escreveu:
>
> The reason to add this layer is that it is used by srcnn in vf_sr.
> This layer is currently ignored in native mode. After this patch,
> we can add multiple outputs support for native mode.
>
I did not quite understand the commit message. Where does srcnn needs
max a layer?
What is the relation between max layer and supporting multiple outputs?

> Signed-off-by: Guo, Yejun <yejun.guo@intel.com>
> ---
>  libavfilter/dnn/Makefile                           |  1 +
>  libavfilter/dnn/dnn_backend_native.c               | 36 ++++++++++++++-
>  libavfilter/dnn/dnn_backend_native.h               |  6 +--
>  libavfilter/dnn/dnn_backend_native_layer_maximum.c | 54 ++++++++++++++++++++++
>  libavfilter/dnn/dnn_backend_native_layer_maximum.h | 42 +++++++++++++++++
>  libavfilter/dnn/dnn_backend_tf.c                   | 47 +++++++++++++++++++
>  tools/python/convert_from_tensorflow.py            | 17 ++++++-
>  tools/python/convert_header.py                     |  2 +-
>  8 files changed, 198 insertions(+), 7 deletions(-)
>  create mode 100644 libavfilter/dnn/dnn_backend_native_layer_maximum.c
>  create mode 100644 libavfilter/dnn/dnn_backend_native_layer_maximum.h
>
> diff --git a/libavfilter/dnn/Makefile b/libavfilter/dnn/Makefile
> index 63a35e7..721094d 100644
> --- a/libavfilter/dnn/Makefile
> +++ b/libavfilter/dnn/Makefile
> @@ -3,6 +3,7 @@ OBJS-$(CONFIG_DNN)                           += dnn/dnn_backend_native.o
>  OBJS-$(CONFIG_DNN)                           += dnn/dnn_backend_native_layer_pad.o
>  OBJS-$(CONFIG_DNN)                           += dnn/dnn_backend_native_layer_conv2d.o
>  OBJS-$(CONFIG_DNN)                           += dnn/dnn_backend_native_layer_depth2space.o
> +OBJS-$(CONFIG_DNN)                           += dnn/dnn_backend_native_layer_maximum.o
>
>  DNN-OBJS-$(CONFIG_LIBTENSORFLOW)             += dnn/dnn_backend_tf.o
>
> diff --git a/libavfilter/dnn/dnn_backend_native.c b/libavfilter/dnn/dnn_backend_native.c
> index be548c6..22a9a33 100644
> --- a/libavfilter/dnn/dnn_backend_native.c
> +++ b/libavfilter/dnn/dnn_backend_native.c
> @@ -28,6 +28,7 @@
>  #include "dnn_backend_native_layer_pad.h"
>  #include "dnn_backend_native_layer_conv2d.h"
>  #include "dnn_backend_native_layer_depth2space.h"
> +#include "dnn_backend_native_layer_maximum.h"
>
>  static DNNReturnType set_input_output_native(void *model, DNNInputData *input, const char *input_name, const char **output_names, uint32_t nb_output)
>  {
> @@ -78,6 +79,7 @@ DNNModel *ff_dnn_load_model_native(const char *model_filename)
>      ConvolutionalParams *conv_params;
>      DepthToSpaceParams *depth_to_space_params;
>      LayerPadParams *pad_params;
> +    DnnLayerMaximumParams *maximum_params;
>
>      model = av_malloc(sizeof(DNNModel));
>      if (!model){
> @@ -237,6 +239,21 @@ DNNModel *ff_dnn_load_model_native(const char *model_filename)
>              network->layers[layer].type = MIRROR_PAD;
>              network->layers[layer].params = pad_params;
>              break;
> +        case MAXIMUM:
> +            maximum_params = av_malloc(sizeof(*maximum_params));
> +            if (!maximum_params){
> +                avio_closep(&model_file_context);
> +                ff_dnn_free_model_native(&model);
> +                return NULL;
> +            }
> +            maximum_params->val.u32 = avio_rl32(model_file_context);
> +            dnn_size += 4;
> +            network->layers[layer].type = MAXIMUM;
> +            network->layers[layer].params = maximum_params;
> +            network->layers[layer].input_operand_indexes[0] = (int32_t)avio_rl32(model_file_context);
> +            network->layers[layer].output_operand_index = (int32_t)avio_rl32(model_file_context);
> +            dnn_size += 8;
> +            break;
>          default:
>              avio_closep(&model_file_context);
>              ff_dnn_free_model_native(&model);
> @@ -290,6 +307,7 @@ DNNReturnType ff_dnn_execute_model_native(const DNNModel *model, DNNData *output
>      ConvolutionalParams *conv_params;
>      DepthToSpaceParams *depth_to_space_params;
>      LayerPadParams *pad_params;
> +    DnnLayerMaximumParams *maximum_params;
>
>      if (network->layers_num <= 0 || network->operands_num <= 0)
>          return DNN_ERROR;
> @@ -313,6 +331,11 @@ DNNReturnType ff_dnn_execute_model_native(const DNNModel *model, DNNData *output
>              dnn_execute_layer_pad(network->operands, network->layers[layer].input_operand_indexes,
>                                    network->layers[layer].output_operand_index, pad_params);
>              break;
> +        case MAXIMUM:
> +            maximum_params = (DnnLayerMaximumParams *)network->layers[layer].params;
> +            dnn_execute_layer_maximum(network->operands, network->layers[layer].input_operand_indexes,
> +                                  network->layers[layer].output_operand_index, maximum_params);
> +            break;
>          case INPUT:
>              return DNN_ERROR;
>          }
> @@ -333,10 +356,19 @@ DNNReturnType ff_dnn_execute_model_native(const DNNModel *model, DNNData *output
>      return DNN_SUCCESS;
>  }
>
> -int32_t calculate_operand_data_length(DnnOperand* operand)
> +int32_t calculate_operand_dims_count(const DnnOperand *oprd)
> +{
> +    int32_t result = 1;
> +    for (int i = 0; i < 4; ++i)
> +        result *= oprd->dims[i];
> +
> +    return result;
> +}
> +
> +int32_t calculate_operand_data_length(const DnnOperand* oprd)
>  {
>      // currently, we just support DNN_FLOAT
> -    return operand->dims[0] * operand->dims[1] * operand->dims[2] * operand->dims[3] * sizeof(float);
> +    return oprd->dims[0] * oprd->dims[1] * oprd->dims[2] * oprd->dims[3] * sizeof(float);
>  }
>
>  void ff_dnn_free_model_native(DNNModel **model)
> diff --git a/libavfilter/dnn/dnn_backend_native.h b/libavfilter/dnn/dnn_backend_native.h
> index a74d138..b238d18 100644
> --- a/libavfilter/dnn/dnn_backend_native.h
> +++ b/libavfilter/dnn/dnn_backend_native.h
> @@ -30,7 +30,7 @@
>  #include "../dnn_interface.h"
>  #include "libavformat/avio.h"
>
> -typedef enum {INPUT, CONV, DEPTH_TO_SPACE, MIRROR_PAD} DNNLayerType;
> +typedef enum {INPUT = 0, CONV = 1, DEPTH_TO_SPACE = 2, MIRROR_PAD = 3, MAXIMUM = 4} DNNLayerType;
>
>  typedef enum {DOT_INPUT = 1, DOT_OUTPUT = 2, DOT_INTERMEDIATE = DOT_INPUT | DOT_INPUT} DNNOperandType;
>
> @@ -104,6 +104,6 @@ DNNReturnType ff_dnn_execute_model_native(const DNNModel *model, DNNData *output
>
>  void ff_dnn_free_model_native(DNNModel **model);
>
> -int32_t calculate_operand_data_length(DnnOperand *operand);
> -
> +int32_t calculate_operand_data_length(const DnnOperand *oprd);
> +int32_t calculate_operand_dims_count(const DnnOperand *oprd);
>  #endif
> diff --git a/libavfilter/dnn/dnn_backend_native_layer_maximum.c b/libavfilter/dnn/dnn_backend_native_layer_maximum.c
> new file mode 100644
> index 0000000..a2669af
> --- /dev/null
> +++ b/libavfilter/dnn/dnn_backend_native_layer_maximum.c
> @@ -0,0 +1,54 @@
> +/*
> + * Copyright (c) 2019 Guo Yejun
> + *
> + * This file is part of FFmpeg.
> + *
> + * FFmpeg is free software; you can redistribute it and/or
> + * modify it under the terms of the GNU Lesser General Public
> + * License as published by the Free Software Foundation; either
> + * version 2.1 of the License, or (at your option) any later version.
> + *
> + * FFmpeg is distributed in the hope that it will be useful,
> + * but WITHOUT ANY WARRANTY; without even the implied warranty of
> + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
> + * Lesser General Public License for more details.
> + *
> + * You should have received a copy of the GNU Lesser General Public
> + * License along with FFmpeg; if not, write to the Free Software
> + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
> + */
> +
> +/**
> + * @file
> + * DNN native backend implementation.
> + */
> +
> +#include "dnn_backend_native.h"
> +#include "libavutil/avassert.h"
> +#include "dnn_backend_native_layer_maximum.h"
> +
> +int dnn_execute_layer_maximum(DnnOperand *operands, const int32_t *input_operand_indexes, int32_t output_operand_index, const DnnLayerMaximumParams *params)
> +{
> +    const DnnOperand *input = &operands[input_operand_indexes[0]];
> +    DnnOperand *output = &operands[output_operand_index];
> +    int dims_count;
> +    const float *src;
> +    float *dst;
> +
> +    for (int i = 0; i < 4; ++i)
> +        output->dims[i] = input->dims[i];
> +
> +    output->data_type = input->data_type;
> +    output->length = calculate_operand_data_length(output);
> +    output->data = av_realloc(output->data, output->length);
> +    if (!output->data)
> +        return DNN_ERROR;
> +
> +    dims_count = calculate_operand_dims_count(output);
> +    src = input->data;
> +    dst = output->data;
> +    for (int i = 0; i < dims_count; ++i)
> +        dst[i] = FFMAX(src[i], params->val.y);
> +
> +    return 0;
> +}
> diff --git a/libavfilter/dnn/dnn_backend_native_layer_maximum.h b/libavfilter/dnn/dnn_backend_native_layer_maximum.h
> new file mode 100644
> index 0000000..6396e58
> --- /dev/null
> +++ b/libavfilter/dnn/dnn_backend_native_layer_maximum.h
> @@ -0,0 +1,42 @@
> +/*
> + * Copyright (c) 2019 Guo Yejun
> + *
> + * This file is part of FFmpeg.
> + *
> + * FFmpeg is free software; you can redistribute it and/or
> + * modify it under the terms of the GNU Lesser General Public
> + * License as published by the Free Software Foundation; either
> + * version 2.1 of the License, or (at your option) any later version.
> + *
> + * FFmpeg is distributed in the hope that it will be useful,
> + * but WITHOUT ANY WARRANTY; without even the implied warranty of
> + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
> + * Lesser General Public License for more details.
> + *
> + * You should have received a copy of the GNU Lesser General Public
> + * License along with FFmpeg; if not, write to the Free Software
> + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
> + */
> +
> +/**
> + * @file
> + * DNN inference functions interface for native backend.
> + */
> +
> +
> +#ifndef AVFILTER_DNN_DNN_BACKEND_NATIVE_LAYER_MAXIMUM_H
> +#define AVFILTER_DNN_DNN_BACKEND_NATIVE_LAYER_MAXIMUM_H
> +
> +#include "libavformat/avio.h"
> +#include "dnn_backend_native.h"
> +
> +typedef struct DnnLayerMaximumParams{
> +    union {
> +        uint32_t u32;
> +        float y;
> +    }val;
> +} DnnLayerMaximumParams;
> +
> +int dnn_execute_layer_maximum(DnnOperand *operands, const int32_t *input_operand_indexes, int32_t output_operand_index, const DnnLayerMaximumParams *params);
> +
> +#endif
> diff --git a/libavfilter/dnn/dnn_backend_tf.c b/libavfilter/dnn/dnn_backend_tf.c
> index 8a3e40a..612d2e0 100644
> --- a/libavfilter/dnn/dnn_backend_tf.c
> +++ b/libavfilter/dnn/dnn_backend_tf.c
> @@ -30,6 +30,7 @@
>  #include "libavformat/avio.h"
>  #include "libavutil/avassert.h"
>  #include "dnn_backend_native_layer_pad.h"
> +#include "dnn_backend_native_layer_maximum.h"
>
>  #include <tensorflow/c/c_api.h>
>
> @@ -401,6 +402,48 @@ static DNNReturnType add_pad_layer(TFModel *tf_model, TF_Operation **cur_op,
>      return DNN_SUCCESS;
>  }
>
> +static DNNReturnType add_maximum_layer(TFModel *tf_model, TF_Operation **cur_op,
> +                                       DnnLayerMaximumParams *params, const int layer)
> +{
> +    TF_Operation *op;
> +    TF_Tensor *tensor;
> +    TF_OperationDescription *op_desc;
> +    TF_Output input;
> +    float *y;
> +
> +    char name_buffer[NAME_BUFFER_SIZE];
> +    snprintf(name_buffer, NAME_BUFFER_SIZE, "maximum/y%d", layer);
> +
> +    op_desc = TF_NewOperation(tf_model->graph, "Const", name_buffer);
> +    TF_SetAttrType(op_desc, "dtype", TF_FLOAT);
> +    tensor = TF_AllocateTensor(TF_FLOAT, NULL, 0, TF_DataTypeSize(TF_FLOAT));
> +    y = (float *)TF_TensorData(tensor);
> +    *y = params->val.y;
> +    TF_SetAttrTensor(op_desc, "value", tensor, tf_model->status);
> +    if (TF_GetCode(tf_model->status) != TF_OK){
> +        return DNN_ERROR;
> +    }
> +    op = TF_FinishOperation(op_desc, tf_model->status);
> +    if (TF_GetCode(tf_model->status) != TF_OK){
> +        return DNN_ERROR;
> +    }
> +
> +    snprintf(name_buffer, NAME_BUFFER_SIZE, "maximum%d", layer);
> +    op_desc = TF_NewOperation(tf_model->graph, "Maximum", name_buffer);
> +    input.oper = *cur_op;
> +    input.index = 0;
> +    TF_AddInput(op_desc, input);
> +    input.oper = op;
> +    TF_AddInput(op_desc, input);
> +    TF_SetAttrType(op_desc, "T", TF_FLOAT);
> +    *cur_op = TF_FinishOperation(op_desc, tf_model->status);
> +    if (TF_GetCode(tf_model->status) != TF_OK){
> +        return DNN_ERROR;
> +    }
> +
> +    return DNN_SUCCESS;
> +}
> +
>  static DNNReturnType load_native_model(TFModel *tf_model, const char *model_filename)
>  {
>      int32_t layer;
> @@ -471,6 +514,10 @@ static DNNReturnType load_native_model(TFModel *tf_model, const char *model_file
>              layer_add_res = add_pad_layer(tf_model, &op,
>                                            (LayerPadParams *)conv_network->layers[layer].params, layer);
>              break;
> +        case MAXIMUM:
> +            layer_add_res = add_maximum_layer(tf_model, &op,
> +                                          (DnnLayerMaximumParams *)conv_network->layers[layer].params, layer);
> +            break;
>          default:
>              CLEANUP_ON_ERROR(tf_model);
>          }
> diff --git a/tools/python/convert_from_tensorflow.py b/tools/python/convert_from_tensorflow.py
> index 1437ad3..a663b34 100644
> --- a/tools/python/convert_from_tensorflow.py
> +++ b/tools/python/convert_from_tensorflow.py
> @@ -70,7 +70,7 @@ class TFConverter:
>          self.converted_nodes = set()
>          self.conv2d_scope_names = set()
>          self.conv2d_scopename_inputname_dict = {}
> -        self.op2code = {'Conv2D':1, 'DepthToSpace':2, 'MirrorPad':3}
> +        self.op2code = {'Conv2D':1, 'DepthToSpace':2, 'MirrorPad':3, 'Maximum':4}
>          self.mirrorpad_mode = {'CONSTANT':0, 'REFLECT':1, 'SYMMETRIC':2}
>          self.name_operand_dict = {}
>
> @@ -200,6 +200,19 @@ class TFConverter:
>          np.array([input_operand_index, output_operand_index], dtype=np.uint32).tofile(f)
>
>
> +    def dump_maximum_to_file(self, node, f):
> +        assert(node.op == 'Maximum')
> +        self.layer_number = self.layer_number + 1
> +        ynode = self.name_node_dict[node.input[1]]
> +        y = ynode.attr['value'].tensor.float_val[0]
> +        np.array([self.op2code[node.op]], dtype=np.uint32).tofile(f)
> +        np.array([y], dtype=np.float32).tofile(f)
> +        self.converted_nodes.add(node.name)
> +        input_operand_index = self.add_operand(node.input[0], Operand.IOTYPE_INPUT)
> +        output_operand_index = self.add_operand(node.name, Operand.IOTYPE_OUTPUT)
> +        np.array([input_operand_index, output_operand_index], dtype=np.uint32).tofile(f)
> +
> +
>      def dump_layers_to_file(self, f):
>          for node in self.nodes:
>              if node.name in self.converted_nodes:
> @@ -216,6 +229,8 @@ class TFConverter:
>                  self.dump_depth2space_to_file(node, f)
>              elif node.op == 'MirrorPad':
>                  self.dump_mirrorpad_to_file(node, f)
> +            elif node.op == 'Maximum':
> +                self.dump_maximum_to_file(node, f)
>
>
>      def dump_operands_to_file(self, f):
> diff --git a/tools/python/convert_header.py b/tools/python/convert_header.py
> index 6a7e4af..3c2acd5 100644
> --- a/tools/python/convert_header.py
> +++ b/tools/python/convert_header.py
> @@ -23,4 +23,4 @@ str = 'FFMPEGDNNNATIVE'
>  major = 0
>
>  # increase minor when we don't have to re-convert the model file
> -minor = 1
> +minor = 2
> --
> 2.7.4
>

rest LGTM.

> _______________________________________________
> ffmpeg-devel mailing list
> ffmpeg-devel@ffmpeg.org
> https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
>
> To unsubscribe, visit link above, or email
> ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".
Guo, Yejun Sept. 20, 2019, 2:50 p.m. UTC | #2
> -----Original Message-----

> From: ffmpeg-devel [mailto:ffmpeg-devel-bounces@ffmpeg.org] On Behalf Of

> Pedro Arthur

> Sent: Friday, September 20, 2019 10:17 PM

> To: FFmpeg development discussions and patches <ffmpeg-devel@ffmpeg.org>

> Subject: Re: [FFmpeg-devel] [PATCH 1/4] libavfilter/dnn: add layer maximum for

> native mode.

> 

> Hi,

> 

> Em sex, 20 de set de 2019 às 01:00, Guo, Yejun <yejun.guo@intel.com>

> escreveu:

> >

> > The reason to add this layer is that it is used by srcnn in vf_sr.

> > This layer is currently ignored in native mode. After this patch,

> > we can add multiple outputs support for native mode.

> >

> I did not quite understand the commit message. Where does srcnn needs

> max a layer?


see https://github.com/HighVoltageRocknRoll/sr/blob/master/models/model_srcnn.py#L39 ,
the maximum layer is the last layer of the model.

> What is the relation between max layer and supporting multiple outputs?


thanks, I did not describe it explicitly, will add more detail as below. 

The direct relation is the max layer and the model output name, and then multiple outputs
can be supported after the output name matching is supported.

suppose the output name of srcnn is 'y', it means that the output name of max layer is 'y'
since max layer is the last layer. And suppose the input name of max layer is 'z', the network
looks like:
... -> 'z' -> (max layer) -> 'y'

In current implementation, the max layer is ignored in native mode, it means that 'y' is also
discarded in native mode. The output name of the native model becomes 'z'. And so we could not
find the correct output operand with name 'y'.

The reason that current implementation works is that we just consider the last operand as the
model output, ignoring the name matching.

to support multiple outputs, we have to recognize output operands by names. To support the output searching
with name, we must add 'y' back to srcnn (that is to handle max layer), so the vf_sr is compatible to work in both tf mode and native mode.


> 

> > Signed-off-by: Guo, Yejun <yejun.guo@intel.com>

> > ---

> >  libavfilter/dnn/Makefile                           |  1 +

> >  libavfilter/dnn/dnn_backend_native.c               | 36

> ++++++++++++++-

> >  libavfilter/dnn/dnn_backend_native.h               |  6 +--

> >  libavfilter/dnn/dnn_backend_native_layer_maximum.c | 54

> ++++++++++++++++++++++

> >  libavfilter/dnn/dnn_backend_native_layer_maximum.h | 42

> +++++++++++++++++

> >  libavfilter/dnn/dnn_backend_tf.c                   | 47

> +++++++++++++++++++

> >  tools/python/convert_from_tensorflow.py            | 17 ++++++-

> >  tools/python/convert_header.py                     |  2 +-

> >  8 files changed, 198 insertions(+), 7 deletions(-)

> >  create mode 100644

> libavfilter/dnn/dnn_backend_native_layer_maximum.c

> >  create mode 100644

> libavfilter/dnn/dnn_backend_native_layer_maximum.h

> >

> > diff --git a/libavfilter/dnn/Makefile b/libavfilter/dnn/Makefile

> > index 63a35e7..721094d 100644

> > --- a/libavfilter/dnn/Makefile

> > +++ b/libavfilter/dnn/Makefile

> > @@ -3,6 +3,7 @@ OBJS-$(CONFIG_DNN)

> += dnn/dnn_backend_native.o

> >  OBJS-$(CONFIG_DNN)                           +=

> dnn/dnn_backend_native_layer_pad.o

> >  OBJS-$(CONFIG_DNN)                           +=

> dnn/dnn_backend_native_layer_conv2d.o

> >  OBJS-$(CONFIG_DNN)                           +=

> dnn/dnn_backend_native_layer_depth2space.o

> > +OBJS-$(CONFIG_DNN)                           +=

> dnn/dnn_backend_native_layer_maximum.o

> >

> >  DNN-OBJS-$(CONFIG_LIBTENSORFLOW)             +=

> dnn/dnn_backend_tf.o

> >

> > diff --git a/libavfilter/dnn/dnn_backend_native.c

> b/libavfilter/dnn/dnn_backend_native.c

> > index be548c6..22a9a33 100644

> > --- a/libavfilter/dnn/dnn_backend_native.c

> > +++ b/libavfilter/dnn/dnn_backend_native.c

> > @@ -28,6 +28,7 @@

> >  #include "dnn_backend_native_layer_pad.h"

> >  #include "dnn_backend_native_layer_conv2d.h"

> >  #include "dnn_backend_native_layer_depth2space.h"

> > +#include "dnn_backend_native_layer_maximum.h"

> >

> >  static DNNReturnType set_input_output_native(void *model,

> DNNInputData *input, const char *input_name, const char **output_names,

> uint32_t nb_output)

> >  {

> > @@ -78,6 +79,7 @@ DNNModel *ff_dnn_load_model_native(const char

> *model_filename)

> >      ConvolutionalParams *conv_params;

> >      DepthToSpaceParams *depth_to_space_params;

> >      LayerPadParams *pad_params;

> > +    DnnLayerMaximumParams *maximum_params;

> >

> >      model = av_malloc(sizeof(DNNModel));

> >      if (!model){

> > @@ -237,6 +239,21 @@ DNNModel *ff_dnn_load_model_native(const char

> *model_filename)

> >              network->layers[layer].type = MIRROR_PAD;

> >              network->layers[layer].params = pad_params;

> >              break;

> > +        case MAXIMUM:

> > +            maximum_params = av_malloc(sizeof(*maximum_params));

> > +            if (!maximum_params){

> > +                avio_closep(&model_file_context);

> > +                ff_dnn_free_model_native(&model);

> > +                return NULL;

> > +            }

> > +            maximum_params->val.u32 = avio_rl32(model_file_context);

> > +            dnn_size += 4;

> > +            network->layers[layer].type = MAXIMUM;

> > +            network->layers[layer].params = maximum_params;

> > +            network->layers[layer].input_operand_indexes[0] =

> (int32_t)avio_rl32(model_file_context);

> > +            network->layers[layer].output_operand_index =

> (int32_t)avio_rl32(model_file_context);

> > +            dnn_size += 8;

> > +            break;

> >          default:

> >              avio_closep(&model_file_context);

> >              ff_dnn_free_model_native(&model);

> > @@ -290,6 +307,7 @@ DNNReturnType

> ff_dnn_execute_model_native(const DNNModel *model, DNNData *output

> >      ConvolutionalParams *conv_params;

> >      DepthToSpaceParams *depth_to_space_params;

> >      LayerPadParams *pad_params;

> > +    DnnLayerMaximumParams *maximum_params;

> >

> >      if (network->layers_num <= 0 || network->operands_num <= 0)

> >          return DNN_ERROR;

> > @@ -313,6 +331,11 @@ DNNReturnType

> ff_dnn_execute_model_native(const DNNModel *model, DNNData *output

> >              dnn_execute_layer_pad(network->operands,

> network->layers[layer].input_operand_indexes,

> >

> network->layers[layer].output_operand_index, pad_params);

> >              break;

> > +        case MAXIMUM:

> > +            maximum_params = (DnnLayerMaximumParams

> *)network->layers[layer].params;

> > +            dnn_execute_layer_maximum(network->operands,

> network->layers[layer].input_operand_indexes,

> > +

> network->layers[layer].output_operand_index, maximum_params);

> > +            break;

> >          case INPUT:

> >              return DNN_ERROR;

> >          }

> > @@ -333,10 +356,19 @@ DNNReturnType

> ff_dnn_execute_model_native(const DNNModel *model, DNNData *output

> >      return DNN_SUCCESS;

> >  }

> >

> > -int32_t calculate_operand_data_length(DnnOperand* operand)

> > +int32_t calculate_operand_dims_count(const DnnOperand *oprd)

> > +{

> > +    int32_t result = 1;

> > +    for (int i = 0; i < 4; ++i)

> > +        result *= oprd->dims[i];

> > +

> > +    return result;

> > +}

> > +

> > +int32_t calculate_operand_data_length(const DnnOperand* oprd)

> >  {

> >      // currently, we just support DNN_FLOAT

> > -    return operand->dims[0] * operand->dims[1] * operand->dims[2] *

> operand->dims[3] * sizeof(float);

> > +    return oprd->dims[0] * oprd->dims[1] * oprd->dims[2] * oprd->dims[3] *

> sizeof(float);

> >  }

> >

> >  void ff_dnn_free_model_native(DNNModel **model)

> > diff --git a/libavfilter/dnn/dnn_backend_native.h

> b/libavfilter/dnn/dnn_backend_native.h

> > index a74d138..b238d18 100644

> > --- a/libavfilter/dnn/dnn_backend_native.h

> > +++ b/libavfilter/dnn/dnn_backend_native.h

> > @@ -30,7 +30,7 @@

> >  #include "../dnn_interface.h"

> >  #include "libavformat/avio.h"

> >

> > -typedef enum {INPUT, CONV, DEPTH_TO_SPACE, MIRROR_PAD}

> DNNLayerType;

> > +typedef enum {INPUT = 0, CONV = 1, DEPTH_TO_SPACE = 2, MIRROR_PAD =

> 3, MAXIMUM = 4} DNNLayerType;

> >

> >  typedef enum {DOT_INPUT = 1, DOT_OUTPUT = 2, DOT_INTERMEDIATE =

> DOT_INPUT | DOT_INPUT} DNNOperandType;

> >

> > @@ -104,6 +104,6 @@ DNNReturnType

> ff_dnn_execute_model_native(const DNNModel *model, DNNData *output

> >

> >  void ff_dnn_free_model_native(DNNModel **model);

> >

> > -int32_t calculate_operand_data_length(DnnOperand *operand);

> > -

> > +int32_t calculate_operand_data_length(const DnnOperand *oprd);

> > +int32_t calculate_operand_dims_count(const DnnOperand *oprd);

> >  #endif

> > diff --git a/libavfilter/dnn/dnn_backend_native_layer_maximum.c

> b/libavfilter/dnn/dnn_backend_native_layer_maximum.c

> > new file mode 100644

> > index 0000000..a2669af

> > --- /dev/null

> > +++ b/libavfilter/dnn/dnn_backend_native_layer_maximum.c

> > @@ -0,0 +1,54 @@

> > +/*

> > + * Copyright (c) 2019 Guo Yejun

> > + *

> > + * This file is part of FFmpeg.

> > + *

> > + * FFmpeg is free software; you can redistribute it and/or

> > + * modify it under the terms of the GNU Lesser General Public

> > + * License as published by the Free Software Foundation; either

> > + * version 2.1 of the License, or (at your option) any later version.

> > + *

> > + * FFmpeg is distributed in the hope that it will be useful,

> > + * but WITHOUT ANY WARRANTY; without even the implied warranty of

> > + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the

> GNU

> > + * Lesser General Public License for more details.

> > + *

> > + * You should have received a copy of the GNU Lesser General Public

> > + * License along with FFmpeg; if not, write to the Free Software

> > + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301

> USA

> > + */

> > +

> > +/**

> > + * @file

> > + * DNN native backend implementation.

> > + */

> > +

> > +#include "dnn_backend_native.h"

> > +#include "libavutil/avassert.h"

> > +#include "dnn_backend_native_layer_maximum.h"

> > +

> > +int dnn_execute_layer_maximum(DnnOperand *operands, const int32_t

> *input_operand_indexes, int32_t output_operand_index, const

> DnnLayerMaximumParams *params)

> > +{

> > +    const DnnOperand *input = &operands[input_operand_indexes[0]];

> > +    DnnOperand *output = &operands[output_operand_index];

> > +    int dims_count;

> > +    const float *src;

> > +    float *dst;

> > +

> > +    for (int i = 0; i < 4; ++i)

> > +        output->dims[i] = input->dims[i];

> > +

> > +    output->data_type = input->data_type;

> > +    output->length = calculate_operand_data_length(output);

> > +    output->data = av_realloc(output->data, output->length);

> > +    if (!output->data)

> > +        return DNN_ERROR;

> > +

> > +    dims_count = calculate_operand_dims_count(output);

> > +    src = input->data;

> > +    dst = output->data;

> > +    for (int i = 0; i < dims_count; ++i)

> > +        dst[i] = FFMAX(src[i], params->val.y);

> > +

> > +    return 0;

> > +}

> > diff --git a/libavfilter/dnn/dnn_backend_native_layer_maximum.h

> b/libavfilter/dnn/dnn_backend_native_layer_maximum.h

> > new file mode 100644

> > index 0000000..6396e58

> > --- /dev/null

> > +++ b/libavfilter/dnn/dnn_backend_native_layer_maximum.h

> > @@ -0,0 +1,42 @@

> > +/*

> > + * Copyright (c) 2019 Guo Yejun

> > + *

> > + * This file is part of FFmpeg.

> > + *

> > + * FFmpeg is free software; you can redistribute it and/or

> > + * modify it under the terms of the GNU Lesser General Public

> > + * License as published by the Free Software Foundation; either

> > + * version 2.1 of the License, or (at your option) any later version.

> > + *

> > + * FFmpeg is distributed in the hope that it will be useful,

> > + * but WITHOUT ANY WARRANTY; without even the implied warranty of

> > + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the

> GNU

> > + * Lesser General Public License for more details.

> > + *

> > + * You should have received a copy of the GNU Lesser General Public

> > + * License along with FFmpeg; if not, write to the Free Software

> > + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301

> USA

> > + */

> > +

> > +/**

> > + * @file

> > + * DNN inference functions interface for native backend.

> > + */

> > +

> > +

> > +#ifndef AVFILTER_DNN_DNN_BACKEND_NATIVE_LAYER_MAXIMUM_H

> > +#define AVFILTER_DNN_DNN_BACKEND_NATIVE_LAYER_MAXIMUM_H

> > +

> > +#include "libavformat/avio.h"

> > +#include "dnn_backend_native.h"

> > +

> > +typedef struct DnnLayerMaximumParams{

> > +    union {

> > +        uint32_t u32;

> > +        float y;

> > +    }val;

> > +} DnnLayerMaximumParams;

> > +

> > +int dnn_execute_layer_maximum(DnnOperand *operands, const int32_t

> *input_operand_indexes, int32_t output_operand_index, const

> DnnLayerMaximumParams *params);

> > +

> > +#endif

> > diff --git a/libavfilter/dnn/dnn_backend_tf.c

> b/libavfilter/dnn/dnn_backend_tf.c

> > index 8a3e40a..612d2e0 100644

> > --- a/libavfilter/dnn/dnn_backend_tf.c

> > +++ b/libavfilter/dnn/dnn_backend_tf.c

> > @@ -30,6 +30,7 @@

> >  #include "libavformat/avio.h"

> >  #include "libavutil/avassert.h"

> >  #include "dnn_backend_native_layer_pad.h"

> > +#include "dnn_backend_native_layer_maximum.h"

> >

> >  #include <tensorflow/c/c_api.h>

> >

> > @@ -401,6 +402,48 @@ static DNNReturnType add_pad_layer(TFModel

> *tf_model, TF_Operation **cur_op,

> >      return DNN_SUCCESS;

> >  }

> >

> > +static DNNReturnType add_maximum_layer(TFModel *tf_model,

> TF_Operation **cur_op,

> > +                                       DnnLayerMaximumParams

> *params, const int layer)

> > +{

> > +    TF_Operation *op;

> > +    TF_Tensor *tensor;

> > +    TF_OperationDescription *op_desc;

> > +    TF_Output input;

> > +    float *y;

> > +

> > +    char name_buffer[NAME_BUFFER_SIZE];

> > +    snprintf(name_buffer, NAME_BUFFER_SIZE, "maximum/y%d", layer);

> > +

> > +    op_desc = TF_NewOperation(tf_model->graph, "Const", name_buffer);

> > +    TF_SetAttrType(op_desc, "dtype", TF_FLOAT);

> > +    tensor = TF_AllocateTensor(TF_FLOAT, NULL, 0,

> TF_DataTypeSize(TF_FLOAT));

> > +    y = (float *)TF_TensorData(tensor);

> > +    *y = params->val.y;

> > +    TF_SetAttrTensor(op_desc, "value", tensor, tf_model->status);

> > +    if (TF_GetCode(tf_model->status) != TF_OK){

> > +        return DNN_ERROR;

> > +    }

> > +    op = TF_FinishOperation(op_desc, tf_model->status);

> > +    if (TF_GetCode(tf_model->status) != TF_OK){

> > +        return DNN_ERROR;

> > +    }

> > +

> > +    snprintf(name_buffer, NAME_BUFFER_SIZE, "maximum%d", layer);

> > +    op_desc = TF_NewOperation(tf_model->graph, "Maximum",

> name_buffer);

> > +    input.oper = *cur_op;

> > +    input.index = 0;

> > +    TF_AddInput(op_desc, input);

> > +    input.oper = op;

> > +    TF_AddInput(op_desc, input);

> > +    TF_SetAttrType(op_desc, "T", TF_FLOAT);

> > +    *cur_op = TF_FinishOperation(op_desc, tf_model->status);

> > +    if (TF_GetCode(tf_model->status) != TF_OK){

> > +        return DNN_ERROR;

> > +    }

> > +

> > +    return DNN_SUCCESS;

> > +}

> > +

> >  static DNNReturnType load_native_model(TFModel *tf_model, const char

> *model_filename)

> >  {

> >      int32_t layer;

> > @@ -471,6 +514,10 @@ static DNNReturnType load_native_model(TFModel

> *tf_model, const char *model_file

> >              layer_add_res = add_pad_layer(tf_model, &op,

> >                                            (LayerPadParams

> *)conv_network->layers[layer].params, layer);

> >              break;

> > +        case MAXIMUM:

> > +            layer_add_res = add_maximum_layer(tf_model, &op,

> > +

> (DnnLayerMaximumParams *)conv_network->layers[layer].params, layer);

> > +            break;

> >          default:

> >              CLEANUP_ON_ERROR(tf_model);

> >          }

> > diff --git a/tools/python/convert_from_tensorflow.py

> b/tools/python/convert_from_tensorflow.py

> > index 1437ad3..a663b34 100644

> > --- a/tools/python/convert_from_tensorflow.py

> > +++ b/tools/python/convert_from_tensorflow.py

> > @@ -70,7 +70,7 @@ class TFConverter:

> >          self.converted_nodes = set()

> >          self.conv2d_scope_names = set()

> >          self.conv2d_scopename_inputname_dict = {}

> > -        self.op2code = {'Conv2D':1, 'DepthToSpace':2, 'MirrorPad':3}

> > +        self.op2code = {'Conv2D':1, 'DepthToSpace':2, 'MirrorPad':3,

> 'Maximum':4}

> >          self.mirrorpad_mode = {'CONSTANT':0, 'REFLECT':1,

> 'SYMMETRIC':2}

> >          self.name_operand_dict = {}

> >

> > @@ -200,6 +200,19 @@ class TFConverter:

> >          np.array([input_operand_index, output_operand_index],

> dtype=np.uint32).tofile(f)

> >

> >

> > +    def dump_maximum_to_file(self, node, f):

> > +        assert(node.op == 'Maximum')

> > +        self.layer_number = self.layer_number + 1

> > +        ynode = self.name_node_dict[node.input[1]]

> > +        y = ynode.attr['value'].tensor.float_val[0]

> > +        np.array([self.op2code[node.op]], dtype=np.uint32).tofile(f)

> > +        np.array([y], dtype=np.float32).tofile(f)

> > +        self.converted_nodes.add(node.name)

> > +        input_operand_index = self.add_operand(node.input[0],

> Operand.IOTYPE_INPUT)

> > +        output_operand_index = self.add_operand(node.name,

> Operand.IOTYPE_OUTPUT)

> > +        np.array([input_operand_index, output_operand_index],

> dtype=np.uint32).tofile(f)

> > +

> > +

> >      def dump_layers_to_file(self, f):

> >          for node in self.nodes:

> >              if node.name in self.converted_nodes:

> > @@ -216,6 +229,8 @@ class TFConverter:

> >                  self.dump_depth2space_to_file(node, f)

> >              elif node.op == 'MirrorPad':

> >                  self.dump_mirrorpad_to_file(node, f)

> > +            elif node.op == 'Maximum':

> > +                self.dump_maximum_to_file(node, f)

> >

> >

> >      def dump_operands_to_file(self, f):

> > diff --git a/tools/python/convert_header.py

> b/tools/python/convert_header.py

> > index 6a7e4af..3c2acd5 100644

> > --- a/tools/python/convert_header.py

> > +++ b/tools/python/convert_header.py

> > @@ -23,4 +23,4 @@ str = 'FFMPEGDNNNATIVE'

> >  major = 0

> >

> >  # increase minor when we don't have to re-convert the model file

> > -minor = 1

> > +minor = 2

> > --

> > 2.7.4

> >

> 

> rest LGTM.

> 

> > _______________________________________________

> > ffmpeg-devel mailing list

> > ffmpeg-devel@ffmpeg.org

> > https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

> >

> > To unsubscribe, visit link above, or email

> > ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".

> _______________________________________________

> ffmpeg-devel mailing list

> ffmpeg-devel@ffmpeg.org

> https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

> 

> To unsubscribe, visit link above, or email

> ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".
Pedro Arthur Sept. 20, 2019, 3:13 p.m. UTC | #3
Em sex, 20 de set de 2019 às 11:50, Guo, Yejun <yejun.guo@intel.com> escreveu:
>
>
>
> > -----Original Message-----
> > From: ffmpeg-devel [mailto:ffmpeg-devel-bounces@ffmpeg.org] On Behalf Of
> > Pedro Arthur
> > Sent: Friday, September 20, 2019 10:17 PM
> > To: FFmpeg development discussions and patches <ffmpeg-devel@ffmpeg.org>
> > Subject: Re: [FFmpeg-devel] [PATCH 1/4] libavfilter/dnn: add layer maximum for
> > native mode.
> >
> > Hi,
> >
> > Em sex, 20 de set de 2019 às 01:00, Guo, Yejun <yejun.guo@intel.com>
> > escreveu:
> > >
> > > The reason to add this layer is that it is used by srcnn in vf_sr.
> > > This layer is currently ignored in native mode. After this patch,
> > > we can add multiple outputs support for native mode.
> > >
> > I did not quite understand the commit message. Where does srcnn needs
> > max a layer?
>
> see https://github.com/HighVoltageRocknRoll/sr/blob/master/models/model_srcnn.py#L39 ,
> the maximum layer is the last layer of the model.
I see, indeed if I'm not missing something this max layer is
superfulous as the relu activation already does this right?
What we have to guarantee is that the output is in the range [0, 1],
that means we should have had a layer min(y, 1) instead of the max or
guarantee the conversion from float to integer properly saturates y >
1.

>
> > What is the relation between max layer and supporting multiple outputs?
>
> thanks, I did not describe it explicitly, will add more detail as below.
>
> The direct relation is the max layer and the model output name, and then multiple outputs
> can be supported after the output name matching is supported.
>
> suppose the output name of srcnn is 'y', it means that the output name of max layer is 'y'
> since max layer is the last layer. And suppose the input name of max layer is 'z', the network
> looks like:
> ... -> 'z' -> (max layer) -> 'y'
>
> In current implementation, the max layer is ignored in native mode, it means that 'y' is also
> discarded in native mode. The output name of the native model becomes 'z'. And so we could not
> find the correct output operand with name 'y'.
>
> The reason that current implementation works is that we just consider the last operand as the
> model output, ignoring the name matching.
>
> to support multiple outputs, we have to recognize output operands by names. To support the output searching
> with name, we must add 'y' back to srcnn (that is to handle max layer), so the vf_sr is compatible to work in both tf mode and native mode.
>
thanks, in any case the patch is useful, I should push it soon.

>
> >
> > > Signed-off-by: Guo, Yejun <yejun.guo@intel.com>
> > > ---
> > >  libavfilter/dnn/Makefile                           |  1 +
> > >  libavfilter/dnn/dnn_backend_native.c               | 36
> > ++++++++++++++-
> > >  libavfilter/dnn/dnn_backend_native.h               |  6 +--
> > >  libavfilter/dnn/dnn_backend_native_layer_maximum.c | 54
> > ++++++++++++++++++++++
> > >  libavfilter/dnn/dnn_backend_native_layer_maximum.h | 42
> > +++++++++++++++++
> > >  libavfilter/dnn/dnn_backend_tf.c                   | 47
> > +++++++++++++++++++
> > >  tools/python/convert_from_tensorflow.py            | 17 ++++++-
> > >  tools/python/convert_header.py                     |  2 +-
> > >  8 files changed, 198 insertions(+), 7 deletions(-)
> > >  create mode 100644
> > libavfilter/dnn/dnn_backend_native_layer_maximum.c
> > >  create mode 100644
> > libavfilter/dnn/dnn_backend_native_layer_maximum.h
> > >
> > > diff --git a/libavfilter/dnn/Makefile b/libavfilter/dnn/Makefile
> > > index 63a35e7..721094d 100644
> > > --- a/libavfilter/dnn/Makefile
> > > +++ b/libavfilter/dnn/Makefile
> > > @@ -3,6 +3,7 @@ OBJS-$(CONFIG_DNN)
> > += dnn/dnn_backend_native.o
> > >  OBJS-$(CONFIG_DNN)                           +=
> > dnn/dnn_backend_native_layer_pad.o
> > >  OBJS-$(CONFIG_DNN)                           +=
> > dnn/dnn_backend_native_layer_conv2d.o
> > >  OBJS-$(CONFIG_DNN)                           +=
> > dnn/dnn_backend_native_layer_depth2space.o
> > > +OBJS-$(CONFIG_DNN)                           +=
> > dnn/dnn_backend_native_layer_maximum.o
> > >
> > >  DNN-OBJS-$(CONFIG_LIBTENSORFLOW)             +=
> > dnn/dnn_backend_tf.o
> > >
> > > diff --git a/libavfilter/dnn/dnn_backend_native.c
> > b/libavfilter/dnn/dnn_backend_native.c
> > > index be548c6..22a9a33 100644
> > > --- a/libavfilter/dnn/dnn_backend_native.c
> > > +++ b/libavfilter/dnn/dnn_backend_native.c
> > > @@ -28,6 +28,7 @@
> > >  #include "dnn_backend_native_layer_pad.h"
> > >  #include "dnn_backend_native_layer_conv2d.h"
> > >  #include "dnn_backend_native_layer_depth2space.h"
> > > +#include "dnn_backend_native_layer_maximum.h"
> > >
> > >  static DNNReturnType set_input_output_native(void *model,
> > DNNInputData *input, const char *input_name, const char **output_names,
> > uint32_t nb_output)
> > >  {
> > > @@ -78,6 +79,7 @@ DNNModel *ff_dnn_load_model_native(const char
> > *model_filename)
> > >      ConvolutionalParams *conv_params;
> > >      DepthToSpaceParams *depth_to_space_params;
> > >      LayerPadParams *pad_params;
> > > +    DnnLayerMaximumParams *maximum_params;
> > >
> > >      model = av_malloc(sizeof(DNNModel));
> > >      if (!model){
> > > @@ -237,6 +239,21 @@ DNNModel *ff_dnn_load_model_native(const char
> > *model_filename)
> > >              network->layers[layer].type = MIRROR_PAD;
> > >              network->layers[layer].params = pad_params;
> > >              break;
> > > +        case MAXIMUM:
> > > +            maximum_params = av_malloc(sizeof(*maximum_params));
> > > +            if (!maximum_params){
> > > +                avio_closep(&model_file_context);
> > > +                ff_dnn_free_model_native(&model);
> > > +                return NULL;
> > > +            }
> > > +            maximum_params->val.u32 = avio_rl32(model_file_context);
> > > +            dnn_size += 4;
> > > +            network->layers[layer].type = MAXIMUM;
> > > +            network->layers[layer].params = maximum_params;
> > > +            network->layers[layer].input_operand_indexes[0] =
> > (int32_t)avio_rl32(model_file_context);
> > > +            network->layers[layer].output_operand_index =
> > (int32_t)avio_rl32(model_file_context);
> > > +            dnn_size += 8;
> > > +            break;
> > >          default:
> > >              avio_closep(&model_file_context);
> > >              ff_dnn_free_model_native(&model);
> > > @@ -290,6 +307,7 @@ DNNReturnType
> > ff_dnn_execute_model_native(const DNNModel *model, DNNData *output
> > >      ConvolutionalParams *conv_params;
> > >      DepthToSpaceParams *depth_to_space_params;
> > >      LayerPadParams *pad_params;
> > > +    DnnLayerMaximumParams *maximum_params;
> > >
> > >      if (network->layers_num <= 0 || network->operands_num <= 0)
> > >          return DNN_ERROR;
> > > @@ -313,6 +331,11 @@ DNNReturnType
> > ff_dnn_execute_model_native(const DNNModel *model, DNNData *output
> > >              dnn_execute_layer_pad(network->operands,
> > network->layers[layer].input_operand_indexes,
> > >
> > network->layers[layer].output_operand_index, pad_params);
> > >              break;
> > > +        case MAXIMUM:
> > > +            maximum_params = (DnnLayerMaximumParams
> > *)network->layers[layer].params;
> > > +            dnn_execute_layer_maximum(network->operands,
> > network->layers[layer].input_operand_indexes,
> > > +
> > network->layers[layer].output_operand_index, maximum_params);
> > > +            break;
> > >          case INPUT:
> > >              return DNN_ERROR;
> > >          }
> > > @@ -333,10 +356,19 @@ DNNReturnType
> > ff_dnn_execute_model_native(const DNNModel *model, DNNData *output
> > >      return DNN_SUCCESS;
> > >  }
> > >
> > > -int32_t calculate_operand_data_length(DnnOperand* operand)
> > > +int32_t calculate_operand_dims_count(const DnnOperand *oprd)
> > > +{
> > > +    int32_t result = 1;
> > > +    for (int i = 0; i < 4; ++i)
> > > +        result *= oprd->dims[i];
> > > +
> > > +    return result;
> > > +}
> > > +
> > > +int32_t calculate_operand_data_length(const DnnOperand* oprd)
> > >  {
> > >      // currently, we just support DNN_FLOAT
> > > -    return operand->dims[0] * operand->dims[1] * operand->dims[2] *
> > operand->dims[3] * sizeof(float);
> > > +    return oprd->dims[0] * oprd->dims[1] * oprd->dims[2] * oprd->dims[3] *
> > sizeof(float);
> > >  }
> > >
> > >  void ff_dnn_free_model_native(DNNModel **model)
> > > diff --git a/libavfilter/dnn/dnn_backend_native.h
> > b/libavfilter/dnn/dnn_backend_native.h
> > > index a74d138..b238d18 100644
> > > --- a/libavfilter/dnn/dnn_backend_native.h
> > > +++ b/libavfilter/dnn/dnn_backend_native.h
> > > @@ -30,7 +30,7 @@
> > >  #include "../dnn_interface.h"
> > >  #include "libavformat/avio.h"
> > >
> > > -typedef enum {INPUT, CONV, DEPTH_TO_SPACE, MIRROR_PAD}
> > DNNLayerType;
> > > +typedef enum {INPUT = 0, CONV = 1, DEPTH_TO_SPACE = 2, MIRROR_PAD =
> > 3, MAXIMUM = 4} DNNLayerType;
> > >
> > >  typedef enum {DOT_INPUT = 1, DOT_OUTPUT = 2, DOT_INTERMEDIATE =
> > DOT_INPUT | DOT_INPUT} DNNOperandType;
> > >
> > > @@ -104,6 +104,6 @@ DNNReturnType
> > ff_dnn_execute_model_native(const DNNModel *model, DNNData *output
> > >
> > >  void ff_dnn_free_model_native(DNNModel **model);
> > >
> > > -int32_t calculate_operand_data_length(DnnOperand *operand);
> > > -
> > > +int32_t calculate_operand_data_length(const DnnOperand *oprd);
> > > +int32_t calculate_operand_dims_count(const DnnOperand *oprd);
> > >  #endif
> > > diff --git a/libavfilter/dnn/dnn_backend_native_layer_maximum.c
> > b/libavfilter/dnn/dnn_backend_native_layer_maximum.c
> > > new file mode 100644
> > > index 0000000..a2669af
> > > --- /dev/null
> > > +++ b/libavfilter/dnn/dnn_backend_native_layer_maximum.c
> > > @@ -0,0 +1,54 @@
> > > +/*
> > > + * Copyright (c) 2019 Guo Yejun
> > > + *
> > > + * This file is part of FFmpeg.
> > > + *
> > > + * FFmpeg is free software; you can redistribute it and/or
> > > + * modify it under the terms of the GNU Lesser General Public
> > > + * License as published by the Free Software Foundation; either
> > > + * version 2.1 of the License, or (at your option) any later version.
> > > + *
> > > + * FFmpeg is distributed in the hope that it will be useful,
> > > + * but WITHOUT ANY WARRANTY; without even the implied warranty of
> > > + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
> > GNU
> > > + * Lesser General Public License for more details.
> > > + *
> > > + * You should have received a copy of the GNU Lesser General Public
> > > + * License along with FFmpeg; if not, write to the Free Software
> > > + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301
> > USA
> > > + */
> > > +
> > > +/**
> > > + * @file
> > > + * DNN native backend implementation.
> > > + */
> > > +
> > > +#include "dnn_backend_native.h"
> > > +#include "libavutil/avassert.h"
> > > +#include "dnn_backend_native_layer_maximum.h"
> > > +
> > > +int dnn_execute_layer_maximum(DnnOperand *operands, const int32_t
> > *input_operand_indexes, int32_t output_operand_index, const
> > DnnLayerMaximumParams *params)
> > > +{
> > > +    const DnnOperand *input = &operands[input_operand_indexes[0]];
> > > +    DnnOperand *output = &operands[output_operand_index];
> > > +    int dims_count;
> > > +    const float *src;
> > > +    float *dst;
> > > +
> > > +    for (int i = 0; i < 4; ++i)
> > > +        output->dims[i] = input->dims[i];
> > > +
> > > +    output->data_type = input->data_type;
> > > +    output->length = calculate_operand_data_length(output);
> > > +    output->data = av_realloc(output->data, output->length);
> > > +    if (!output->data)
> > > +        return DNN_ERROR;
> > > +
> > > +    dims_count = calculate_operand_dims_count(output);
> > > +    src = input->data;
> > > +    dst = output->data;
> > > +    for (int i = 0; i < dims_count; ++i)
> > > +        dst[i] = FFMAX(src[i], params->val.y);
> > > +
> > > +    return 0;
> > > +}
> > > diff --git a/libavfilter/dnn/dnn_backend_native_layer_maximum.h
> > b/libavfilter/dnn/dnn_backend_native_layer_maximum.h
> > > new file mode 100644
> > > index 0000000..6396e58
> > > --- /dev/null
> > > +++ b/libavfilter/dnn/dnn_backend_native_layer_maximum.h
> > > @@ -0,0 +1,42 @@
> > > +/*
> > > + * Copyright (c) 2019 Guo Yejun
> > > + *
> > > + * This file is part of FFmpeg.
> > > + *
> > > + * FFmpeg is free software; you can redistribute it and/or
> > > + * modify it under the terms of the GNU Lesser General Public
> > > + * License as published by the Free Software Foundation; either
> > > + * version 2.1 of the License, or (at your option) any later version.
> > > + *
> > > + * FFmpeg is distributed in the hope that it will be useful,
> > > + * but WITHOUT ANY WARRANTY; without even the implied warranty of
> > > + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
> > GNU
> > > + * Lesser General Public License for more details.
> > > + *
> > > + * You should have received a copy of the GNU Lesser General Public
> > > + * License along with FFmpeg; if not, write to the Free Software
> > > + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301
> > USA
> > > + */
> > > +
> > > +/**
> > > + * @file
> > > + * DNN inference functions interface for native backend.
> > > + */
> > > +
> > > +
> > > +#ifndef AVFILTER_DNN_DNN_BACKEND_NATIVE_LAYER_MAXIMUM_H
> > > +#define AVFILTER_DNN_DNN_BACKEND_NATIVE_LAYER_MAXIMUM_H
> > > +
> > > +#include "libavformat/avio.h"
> > > +#include "dnn_backend_native.h"
> > > +
> > > +typedef struct DnnLayerMaximumParams{
> > > +    union {
> > > +        uint32_t u32;
> > > +        float y;
> > > +    }val;
> > > +} DnnLayerMaximumParams;
> > > +
> > > +int dnn_execute_layer_maximum(DnnOperand *operands, const int32_t
> > *input_operand_indexes, int32_t output_operand_index, const
> > DnnLayerMaximumParams *params);
> > > +
> > > +#endif
> > > diff --git a/libavfilter/dnn/dnn_backend_tf.c
> > b/libavfilter/dnn/dnn_backend_tf.c
> > > index 8a3e40a..612d2e0 100644
> > > --- a/libavfilter/dnn/dnn_backend_tf.c
> > > +++ b/libavfilter/dnn/dnn_backend_tf.c
> > > @@ -30,6 +30,7 @@
> > >  #include "libavformat/avio.h"
> > >  #include "libavutil/avassert.h"
> > >  #include "dnn_backend_native_layer_pad.h"
> > > +#include "dnn_backend_native_layer_maximum.h"
> > >
> > >  #include <tensorflow/c/c_api.h>
> > >
> > > @@ -401,6 +402,48 @@ static DNNReturnType add_pad_layer(TFModel
> > *tf_model, TF_Operation **cur_op,
> > >      return DNN_SUCCESS;
> > >  }
> > >
> > > +static DNNReturnType add_maximum_layer(TFModel *tf_model,
> > TF_Operation **cur_op,
> > > +                                       DnnLayerMaximumParams
> > *params, const int layer)
> > > +{
> > > +    TF_Operation *op;
> > > +    TF_Tensor *tensor;
> > > +    TF_OperationDescription *op_desc;
> > > +    TF_Output input;
> > > +    float *y;
> > > +
> > > +    char name_buffer[NAME_BUFFER_SIZE];
> > > +    snprintf(name_buffer, NAME_BUFFER_SIZE, "maximum/y%d", layer);
> > > +
> > > +    op_desc = TF_NewOperation(tf_model->graph, "Const", name_buffer);
> > > +    TF_SetAttrType(op_desc, "dtype", TF_FLOAT);
> > > +    tensor = TF_AllocateTensor(TF_FLOAT, NULL, 0,
> > TF_DataTypeSize(TF_FLOAT));
> > > +    y = (float *)TF_TensorData(tensor);
> > > +    *y = params->val.y;
> > > +    TF_SetAttrTensor(op_desc, "value", tensor, tf_model->status);
> > > +    if (TF_GetCode(tf_model->status) != TF_OK){
> > > +        return DNN_ERROR;
> > > +    }
> > > +    op = TF_FinishOperation(op_desc, tf_model->status);
> > > +    if (TF_GetCode(tf_model->status) != TF_OK){
> > > +        return DNN_ERROR;
> > > +    }
> > > +
> > > +    snprintf(name_buffer, NAME_BUFFER_SIZE, "maximum%d", layer);
> > > +    op_desc = TF_NewOperation(tf_model->graph, "Maximum",
> > name_buffer);
> > > +    input.oper = *cur_op;
> > > +    input.index = 0;
> > > +    TF_AddInput(op_desc, input);
> > > +    input.oper = op;
> > > +    TF_AddInput(op_desc, input);
> > > +    TF_SetAttrType(op_desc, "T", TF_FLOAT);
> > > +    *cur_op = TF_FinishOperation(op_desc, tf_model->status);
> > > +    if (TF_GetCode(tf_model->status) != TF_OK){
> > > +        return DNN_ERROR;
> > > +    }
> > > +
> > > +    return DNN_SUCCESS;
> > > +}
> > > +
> > >  static DNNReturnType load_native_model(TFModel *tf_model, const char
> > *model_filename)
> > >  {
> > >      int32_t layer;
> > > @@ -471,6 +514,10 @@ static DNNReturnType load_native_model(TFModel
> > *tf_model, const char *model_file
> > >              layer_add_res = add_pad_layer(tf_model, &op,
> > >                                            (LayerPadParams
> > *)conv_network->layers[layer].params, layer);
> > >              break;
> > > +        case MAXIMUM:
> > > +            layer_add_res = add_maximum_layer(tf_model, &op,
> > > +
> > (DnnLayerMaximumParams *)conv_network->layers[layer].params, layer);
> > > +            break;
> > >          default:
> > >              CLEANUP_ON_ERROR(tf_model);
> > >          }
> > > diff --git a/tools/python/convert_from_tensorflow.py
> > b/tools/python/convert_from_tensorflow.py
> > > index 1437ad3..a663b34 100644
> > > --- a/tools/python/convert_from_tensorflow.py
> > > +++ b/tools/python/convert_from_tensorflow.py
> > > @@ -70,7 +70,7 @@ class TFConverter:
> > >          self.converted_nodes = set()
> > >          self.conv2d_scope_names = set()
> > >          self.conv2d_scopename_inputname_dict = {}
> > > -        self.op2code = {'Conv2D':1, 'DepthToSpace':2, 'MirrorPad':3}
> > > +        self.op2code = {'Conv2D':1, 'DepthToSpace':2, 'MirrorPad':3,
> > 'Maximum':4}
> > >          self.mirrorpad_mode = {'CONSTANT':0, 'REFLECT':1,
> > 'SYMMETRIC':2}
> > >          self.name_operand_dict = {}
> > >
> > > @@ -200,6 +200,19 @@ class TFConverter:
> > >          np.array([input_operand_index, output_operand_index],
> > dtype=np.uint32).tofile(f)
> > >
> > >
> > > +    def dump_maximum_to_file(self, node, f):
> > > +        assert(node.op == 'Maximum')
> > > +        self.layer_number = self.layer_number + 1
> > > +        ynode = self.name_node_dict[node.input[1]]
> > > +        y = ynode.attr['value'].tensor.float_val[0]
> > > +        np.array([self.op2code[node.op]], dtype=np.uint32).tofile(f)
> > > +        np.array([y], dtype=np.float32).tofile(f)
> > > +        self.converted_nodes.add(node.name)
> > > +        input_operand_index = self.add_operand(node.input[0],
> > Operand.IOTYPE_INPUT)
> > > +        output_operand_index = self.add_operand(node.name,
> > Operand.IOTYPE_OUTPUT)
> > > +        np.array([input_operand_index, output_operand_index],
> > dtype=np.uint32).tofile(f)
> > > +
> > > +
> > >      def dump_layers_to_file(self, f):
> > >          for node in self.nodes:
> > >              if node.name in self.converted_nodes:
> > > @@ -216,6 +229,8 @@ class TFConverter:
> > >                  self.dump_depth2space_to_file(node, f)
> > >              elif node.op == 'MirrorPad':
> > >                  self.dump_mirrorpad_to_file(node, f)
> > > +            elif node.op == 'Maximum':
> > > +                self.dump_maximum_to_file(node, f)
> > >
> > >
> > >      def dump_operands_to_file(self, f):
> > > diff --git a/tools/python/convert_header.py
> > b/tools/python/convert_header.py
> > > index 6a7e4af..3c2acd5 100644
> > > --- a/tools/python/convert_header.py
> > > +++ b/tools/python/convert_header.py
> > > @@ -23,4 +23,4 @@ str = 'FFMPEGDNNNATIVE'
> > >  major = 0
> > >
> > >  # increase minor when we don't have to re-convert the model file
> > > -minor = 1
> > > +minor = 2
> > > --
> > > 2.7.4
> > >
> >
> > rest LGTM.
> >
> > > _______________________________________________
> > > ffmpeg-devel mailing list
> > > ffmpeg-devel@ffmpeg.org
> > > https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
> > >
> > > To unsubscribe, visit link above, or email
> > > ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".
> > _______________________________________________
> > ffmpeg-devel mailing list
> > ffmpeg-devel@ffmpeg.org
> > https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
> >
> > To unsubscribe, visit link above, or email
> > ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".
> _______________________________________________
> ffmpeg-devel mailing list
> ffmpeg-devel@ffmpeg.org
> https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
>
> To unsubscribe, visit link above, or email
> ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".
Guo, Yejun Sept. 21, 2019, 7:04 a.m. UTC | #4
> -----Original Message-----

> From: ffmpeg-devel [mailto:ffmpeg-devel-bounces@ffmpeg.org] On Behalf Of

> Pedro Arthur

> Sent: Friday, September 20, 2019 11:14 PM

> To: FFmpeg development discussions and patches <ffmpeg-devel@ffmpeg.org>

> Subject: Re: [FFmpeg-devel] [PATCH 1/4] libavfilter/dnn: add layer maximum for

> native mode.

> 

> Em sex, 20 de set de 2019 às 11:50, Guo, Yejun <yejun.guo@intel.com>

> escreveu:

> >

> >

> >

> > > -----Original Message-----

> > > From: ffmpeg-devel [mailto:ffmpeg-devel-bounces@ffmpeg.org] On Behalf

> Of

> > > Pedro Arthur

> > > Sent: Friday, September 20, 2019 10:17 PM

> > > To: FFmpeg development discussions and patches

> <ffmpeg-devel@ffmpeg.org>

> > > Subject: Re: [FFmpeg-devel] [PATCH 1/4] libavfilter/dnn: add layer maximum

> for

> > > native mode.

> > >

> > > Hi,

> > >

> > > Em sex, 20 de set de 2019 às 01:00, Guo, Yejun <yejun.guo@intel.com>

> > > escreveu:

> > > >

> > > > The reason to add this layer is that it is used by srcnn in vf_sr.

> > > > This layer is currently ignored in native mode. After this patch,

> > > > we can add multiple outputs support for native mode.

> > > >

> > > I did not quite understand the commit message. Where does srcnn needs

> > > max a layer?

> >

> > see

> https://github.com/HighVoltageRocknRoll/sr/blob/master/models/model_srcn

> n.py#L39 ,

> > the maximum layer is the last layer of the model.

> I see, indeed if I'm not missing something this max layer is

> superfulous as the relu activation already does this right?

> What we have to guarantee is that the output is in the range [0, 1],

> that means we should have had a layer min(y, 1) instead of the max or

> guarantee the conversion from float to integer properly saturates y >

> 1.


yes, I think so.

> 

> >

> > > What is the relation between max layer and supporting multiple outputs?

> >

> > thanks, I did not describe it explicitly, will add more detail as below.

> >

> > The direct relation is the max layer and the model output name, and then

> multiple outputs

> > can be supported after the output name matching is supported.

> >

> > suppose the output name of srcnn is 'y', it means that the output name of

> max layer is 'y'

> > since max layer is the last layer. And suppose the input name of max layer is

> 'z', the network

> > looks like:

> > ... -> 'z' -> (max layer) -> 'y'

> >

> > In current implementation, the max layer is ignored in native mode, it means

> that 'y' is also

> > discarded in native mode. The output name of the native model becomes 'z'.

> And so we could not

> > find the correct output operand with name 'y'.

> >

> > The reason that current implementation works is that we just consider the

> last operand as the

> > model output, ignoring the name matching.

> >

> > to support multiple outputs, we have to recognize output operands by names.

> To support the output searching

> > with name, we must add 'y' back to srcnn (that is to handle max layer), so the

> vf_sr is compatible to work in both tf mode and native mode.

> >

> thanks, in any case the patch is useful, I should push it soon.


thanks.
diff mbox

Patch

diff --git a/libavfilter/dnn/Makefile b/libavfilter/dnn/Makefile
index 63a35e7..721094d 100644
--- a/libavfilter/dnn/Makefile
+++ b/libavfilter/dnn/Makefile
@@ -3,6 +3,7 @@  OBJS-$(CONFIG_DNN)                           += dnn/dnn_backend_native.o
 OBJS-$(CONFIG_DNN)                           += dnn/dnn_backend_native_layer_pad.o
 OBJS-$(CONFIG_DNN)                           += dnn/dnn_backend_native_layer_conv2d.o
 OBJS-$(CONFIG_DNN)                           += dnn/dnn_backend_native_layer_depth2space.o
+OBJS-$(CONFIG_DNN)                           += dnn/dnn_backend_native_layer_maximum.o
 
 DNN-OBJS-$(CONFIG_LIBTENSORFLOW)             += dnn/dnn_backend_tf.o
 
diff --git a/libavfilter/dnn/dnn_backend_native.c b/libavfilter/dnn/dnn_backend_native.c
index be548c6..22a9a33 100644
--- a/libavfilter/dnn/dnn_backend_native.c
+++ b/libavfilter/dnn/dnn_backend_native.c
@@ -28,6 +28,7 @@ 
 #include "dnn_backend_native_layer_pad.h"
 #include "dnn_backend_native_layer_conv2d.h"
 #include "dnn_backend_native_layer_depth2space.h"
+#include "dnn_backend_native_layer_maximum.h"
 
 static DNNReturnType set_input_output_native(void *model, DNNInputData *input, const char *input_name, const char **output_names, uint32_t nb_output)
 {
@@ -78,6 +79,7 @@  DNNModel *ff_dnn_load_model_native(const char *model_filename)
     ConvolutionalParams *conv_params;
     DepthToSpaceParams *depth_to_space_params;
     LayerPadParams *pad_params;
+    DnnLayerMaximumParams *maximum_params;
 
     model = av_malloc(sizeof(DNNModel));
     if (!model){
@@ -237,6 +239,21 @@  DNNModel *ff_dnn_load_model_native(const char *model_filename)
             network->layers[layer].type = MIRROR_PAD;
             network->layers[layer].params = pad_params;
             break;
+        case MAXIMUM:
+            maximum_params = av_malloc(sizeof(*maximum_params));
+            if (!maximum_params){
+                avio_closep(&model_file_context);
+                ff_dnn_free_model_native(&model);
+                return NULL;
+            }
+            maximum_params->val.u32 = avio_rl32(model_file_context);
+            dnn_size += 4;
+            network->layers[layer].type = MAXIMUM;
+            network->layers[layer].params = maximum_params;
+            network->layers[layer].input_operand_indexes[0] = (int32_t)avio_rl32(model_file_context);
+            network->layers[layer].output_operand_index = (int32_t)avio_rl32(model_file_context);
+            dnn_size += 8;
+            break;
         default:
             avio_closep(&model_file_context);
             ff_dnn_free_model_native(&model);
@@ -290,6 +307,7 @@  DNNReturnType ff_dnn_execute_model_native(const DNNModel *model, DNNData *output
     ConvolutionalParams *conv_params;
     DepthToSpaceParams *depth_to_space_params;
     LayerPadParams *pad_params;
+    DnnLayerMaximumParams *maximum_params;
 
     if (network->layers_num <= 0 || network->operands_num <= 0)
         return DNN_ERROR;
@@ -313,6 +331,11 @@  DNNReturnType ff_dnn_execute_model_native(const DNNModel *model, DNNData *output
             dnn_execute_layer_pad(network->operands, network->layers[layer].input_operand_indexes,
                                   network->layers[layer].output_operand_index, pad_params);
             break;
+        case MAXIMUM:
+            maximum_params = (DnnLayerMaximumParams *)network->layers[layer].params;
+            dnn_execute_layer_maximum(network->operands, network->layers[layer].input_operand_indexes,
+                                  network->layers[layer].output_operand_index, maximum_params);
+            break;
         case INPUT:
             return DNN_ERROR;
         }
@@ -333,10 +356,19 @@  DNNReturnType ff_dnn_execute_model_native(const DNNModel *model, DNNData *output
     return DNN_SUCCESS;
 }
 
-int32_t calculate_operand_data_length(DnnOperand* operand)
+int32_t calculate_operand_dims_count(const DnnOperand *oprd)
+{
+    int32_t result = 1;
+    for (int i = 0; i < 4; ++i)
+        result *= oprd->dims[i];
+
+    return result;
+}
+
+int32_t calculate_operand_data_length(const DnnOperand* oprd)
 {
     // currently, we just support DNN_FLOAT
-    return operand->dims[0] * operand->dims[1] * operand->dims[2] * operand->dims[3] * sizeof(float);
+    return oprd->dims[0] * oprd->dims[1] * oprd->dims[2] * oprd->dims[3] * sizeof(float);
 }
 
 void ff_dnn_free_model_native(DNNModel **model)
diff --git a/libavfilter/dnn/dnn_backend_native.h b/libavfilter/dnn/dnn_backend_native.h
index a74d138..b238d18 100644
--- a/libavfilter/dnn/dnn_backend_native.h
+++ b/libavfilter/dnn/dnn_backend_native.h
@@ -30,7 +30,7 @@ 
 #include "../dnn_interface.h"
 #include "libavformat/avio.h"
 
-typedef enum {INPUT, CONV, DEPTH_TO_SPACE, MIRROR_PAD} DNNLayerType;
+typedef enum {INPUT = 0, CONV = 1, DEPTH_TO_SPACE = 2, MIRROR_PAD = 3, MAXIMUM = 4} DNNLayerType;
 
 typedef enum {DOT_INPUT = 1, DOT_OUTPUT = 2, DOT_INTERMEDIATE = DOT_INPUT | DOT_INPUT} DNNOperandType;
 
@@ -104,6 +104,6 @@  DNNReturnType ff_dnn_execute_model_native(const DNNModel *model, DNNData *output
 
 void ff_dnn_free_model_native(DNNModel **model);
 
-int32_t calculate_operand_data_length(DnnOperand *operand);
-
+int32_t calculate_operand_data_length(const DnnOperand *oprd);
+int32_t calculate_operand_dims_count(const DnnOperand *oprd);
 #endif
diff --git a/libavfilter/dnn/dnn_backend_native_layer_maximum.c b/libavfilter/dnn/dnn_backend_native_layer_maximum.c
new file mode 100644
index 0000000..a2669af
--- /dev/null
+++ b/libavfilter/dnn/dnn_backend_native_layer_maximum.c
@@ -0,0 +1,54 @@ 
+/*
+ * Copyright (c) 2019 Guo Yejun
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+/**
+ * @file
+ * DNN native backend implementation.
+ */
+
+#include "dnn_backend_native.h"
+#include "libavutil/avassert.h"
+#include "dnn_backend_native_layer_maximum.h"
+
+int dnn_execute_layer_maximum(DnnOperand *operands, const int32_t *input_operand_indexes, int32_t output_operand_index, const DnnLayerMaximumParams *params)
+{
+    const DnnOperand *input = &operands[input_operand_indexes[0]];
+    DnnOperand *output = &operands[output_operand_index];
+    int dims_count;
+    const float *src;
+    float *dst;
+
+    for (int i = 0; i < 4; ++i)
+        output->dims[i] = input->dims[i];
+
+    output->data_type = input->data_type;
+    output->length = calculate_operand_data_length(output);
+    output->data = av_realloc(output->data, output->length);
+    if (!output->data)
+        return DNN_ERROR;
+
+    dims_count = calculate_operand_dims_count(output);
+    src = input->data;
+    dst = output->data;
+    for (int i = 0; i < dims_count; ++i)
+        dst[i] = FFMAX(src[i], params->val.y);
+
+    return 0;
+}
diff --git a/libavfilter/dnn/dnn_backend_native_layer_maximum.h b/libavfilter/dnn/dnn_backend_native_layer_maximum.h
new file mode 100644
index 0000000..6396e58
--- /dev/null
+++ b/libavfilter/dnn/dnn_backend_native_layer_maximum.h
@@ -0,0 +1,42 @@ 
+/*
+ * Copyright (c) 2019 Guo Yejun
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+/**
+ * @file
+ * DNN inference functions interface for native backend.
+ */
+
+
+#ifndef AVFILTER_DNN_DNN_BACKEND_NATIVE_LAYER_MAXIMUM_H
+#define AVFILTER_DNN_DNN_BACKEND_NATIVE_LAYER_MAXIMUM_H
+
+#include "libavformat/avio.h"
+#include "dnn_backend_native.h"
+
+typedef struct DnnLayerMaximumParams{
+    union {
+        uint32_t u32;
+        float y;
+    }val;
+} DnnLayerMaximumParams;
+
+int dnn_execute_layer_maximum(DnnOperand *operands, const int32_t *input_operand_indexes, int32_t output_operand_index, const DnnLayerMaximumParams *params);
+
+#endif
diff --git a/libavfilter/dnn/dnn_backend_tf.c b/libavfilter/dnn/dnn_backend_tf.c
index 8a3e40a..612d2e0 100644
--- a/libavfilter/dnn/dnn_backend_tf.c
+++ b/libavfilter/dnn/dnn_backend_tf.c
@@ -30,6 +30,7 @@ 
 #include "libavformat/avio.h"
 #include "libavutil/avassert.h"
 #include "dnn_backend_native_layer_pad.h"
+#include "dnn_backend_native_layer_maximum.h"
 
 #include <tensorflow/c/c_api.h>
 
@@ -401,6 +402,48 @@  static DNNReturnType add_pad_layer(TFModel *tf_model, TF_Operation **cur_op,
     return DNN_SUCCESS;
 }
 
+static DNNReturnType add_maximum_layer(TFModel *tf_model, TF_Operation **cur_op,
+                                       DnnLayerMaximumParams *params, const int layer)
+{
+    TF_Operation *op;
+    TF_Tensor *tensor;
+    TF_OperationDescription *op_desc;
+    TF_Output input;
+    float *y;
+
+    char name_buffer[NAME_BUFFER_SIZE];
+    snprintf(name_buffer, NAME_BUFFER_SIZE, "maximum/y%d", layer);
+
+    op_desc = TF_NewOperation(tf_model->graph, "Const", name_buffer);
+    TF_SetAttrType(op_desc, "dtype", TF_FLOAT);
+    tensor = TF_AllocateTensor(TF_FLOAT, NULL, 0, TF_DataTypeSize(TF_FLOAT));
+    y = (float *)TF_TensorData(tensor);
+    *y = params->val.y;
+    TF_SetAttrTensor(op_desc, "value", tensor, tf_model->status);
+    if (TF_GetCode(tf_model->status) != TF_OK){
+        return DNN_ERROR;
+    }
+    op = TF_FinishOperation(op_desc, tf_model->status);
+    if (TF_GetCode(tf_model->status) != TF_OK){
+        return DNN_ERROR;
+    }
+
+    snprintf(name_buffer, NAME_BUFFER_SIZE, "maximum%d", layer);
+    op_desc = TF_NewOperation(tf_model->graph, "Maximum", name_buffer);
+    input.oper = *cur_op;
+    input.index = 0;
+    TF_AddInput(op_desc, input);
+    input.oper = op;
+    TF_AddInput(op_desc, input);
+    TF_SetAttrType(op_desc, "T", TF_FLOAT);
+    *cur_op = TF_FinishOperation(op_desc, tf_model->status);
+    if (TF_GetCode(tf_model->status) != TF_OK){
+        return DNN_ERROR;
+    }
+
+    return DNN_SUCCESS;
+}
+
 static DNNReturnType load_native_model(TFModel *tf_model, const char *model_filename)
 {
     int32_t layer;
@@ -471,6 +514,10 @@  static DNNReturnType load_native_model(TFModel *tf_model, const char *model_file
             layer_add_res = add_pad_layer(tf_model, &op,
                                           (LayerPadParams *)conv_network->layers[layer].params, layer);
             break;
+        case MAXIMUM:
+            layer_add_res = add_maximum_layer(tf_model, &op,
+                                          (DnnLayerMaximumParams *)conv_network->layers[layer].params, layer);
+            break;
         default:
             CLEANUP_ON_ERROR(tf_model);
         }
diff --git a/tools/python/convert_from_tensorflow.py b/tools/python/convert_from_tensorflow.py
index 1437ad3..a663b34 100644
--- a/tools/python/convert_from_tensorflow.py
+++ b/tools/python/convert_from_tensorflow.py
@@ -70,7 +70,7 @@  class TFConverter:
         self.converted_nodes = set()
         self.conv2d_scope_names = set()
         self.conv2d_scopename_inputname_dict = {}
-        self.op2code = {'Conv2D':1, 'DepthToSpace':2, 'MirrorPad':3}
+        self.op2code = {'Conv2D':1, 'DepthToSpace':2, 'MirrorPad':3, 'Maximum':4}
         self.mirrorpad_mode = {'CONSTANT':0, 'REFLECT':1, 'SYMMETRIC':2}
         self.name_operand_dict = {}
 
@@ -200,6 +200,19 @@  class TFConverter:
         np.array([input_operand_index, output_operand_index], dtype=np.uint32).tofile(f)
 
 
+    def dump_maximum_to_file(self, node, f):
+        assert(node.op == 'Maximum')
+        self.layer_number = self.layer_number + 1
+        ynode = self.name_node_dict[node.input[1]]
+        y = ynode.attr['value'].tensor.float_val[0]
+        np.array([self.op2code[node.op]], dtype=np.uint32).tofile(f)
+        np.array([y], dtype=np.float32).tofile(f)
+        self.converted_nodes.add(node.name)
+        input_operand_index = self.add_operand(node.input[0], Operand.IOTYPE_INPUT)
+        output_operand_index = self.add_operand(node.name, Operand.IOTYPE_OUTPUT)
+        np.array([input_operand_index, output_operand_index], dtype=np.uint32).tofile(f)
+
+
     def dump_layers_to_file(self, f):
         for node in self.nodes:
             if node.name in self.converted_nodes:
@@ -216,6 +229,8 @@  class TFConverter:
                 self.dump_depth2space_to_file(node, f)
             elif node.op == 'MirrorPad':
                 self.dump_mirrorpad_to_file(node, f)
+            elif node.op == 'Maximum':
+                self.dump_maximum_to_file(node, f)
 
 
     def dump_operands_to_file(self, f):
diff --git a/tools/python/convert_header.py b/tools/python/convert_header.py
index 6a7e4af..3c2acd5 100644
--- a/tools/python/convert_header.py
+++ b/tools/python/convert_header.py
@@ -23,4 +23,4 @@  str = 'FFMPEGDNNNATIVE'
 major = 0
 
 # increase minor when we don't have to re-convert the model file
-minor = 1
+minor = 2