Message ID | 20200717152313.27672-1-ting.fu@intel.com |
---|---|
State | Superseded |
Headers | show |
Series | [FFmpeg-devel,1/2] dnn/native: add native support for avg_pool | expand |
Context | Check | Description |
---|---|---|
andriy/default | pending | |
andriy/make | success | Make finished |
andriy/make_fate | success | Make fate finished |
> -----Original Message----- > From: ffmpeg-devel <ffmpeg-devel-bounces@ffmpeg.org> On Behalf Of Ting Fu > Sent: 2020年7月17日 23:23 > To: ffmpeg-devel@ffmpeg.org > Subject: [FFmpeg-devel] [PATCH 1/2] dnn/native: add native support for > avg_pool > > It can be tested with the model generated with below python script: > > import tensorflow as tf > import numpy as np > import imageio > > in_img = imageio.imread('input_odd.jpg') > in_img = in_img.astype(np.float32)/255.0 > in_data = in_img[np.newaxis, :] > > x = tf.placeholder(tf.float32, shape=[1, None, None, 3], name='dnn_in') > x_pool = tf.nn.avg_pool(x, ksize=[1,2,2,1], strides=[1,2,2,1], padding='SAME') > #please alter the params as needed > y = tf.identity(x_pool, name='dnn_out') > > sess=tf.Session() > sess.run(tf.global_variables_initializer()) > > graph_def = tf.graph_util.convert_variables_to_constants(sess, sess.graph_def, > ['dnn_out']) > tf.train.write_graph(graph_def, '.', 'image_process.pb', as_text=False) > > print("image_process.pb generated, please use \ > path_to_ffmpeg/tools/python/convert.py to generate image_process.model\n") > > output = sess.run(y, feed_dict={x: in_data}) > imageio.imsave("out.jpg", np.squeeze(output)) > > Signed-off-by: Ting Fu <ting.fu@intel.com> > --- > libavfilter/dnn/Makefile | 1 + > libavfilter/dnn/dnn_backend_native.h | 2 + > .../dnn/dnn_backend_native_layer_avgpool.c | 136 ++++++++++++++++++ > .../dnn/dnn_backend_native_layer_avgpool.h | 35 +++++ > .../dnn/dnn_backend_native_layer_conv2d.h | 3 +- > libavfilter/dnn/dnn_backend_native_layers.c | 2 + > tools/python/convert_from_tensorflow.py | 31 +++- > 7 files changed, 207 insertions(+), 3 deletions(-) > create mode 100644 libavfilter/dnn/dnn_backend_native_layer_avgpool.c > create mode 100644 libavfilter/dnn/dnn_backend_native_layer_avgpool.h > > diff --git a/libavfilter/dnn/Makefile b/libavfilter/dnn/Makefile > index d90137ec42..e0957073ee 100644 > --- a/libavfilter/dnn/Makefile > +++ b/libavfilter/dnn/Makefile > @@ -1,6 +1,7 @@ > OBJS-$(CONFIG_DNN) += > dnn/dnn_interface.o > OBJS-$(CONFIG_DNN) += > dnn/dnn_backend_native.o > OBJS-$(CONFIG_DNN) += > dnn/dnn_backend_native_layers.o > +OBJS-$(CONFIG_DNN) += > dnn/dnn_backend_native_layer_avgpool.o > OBJS-$(CONFIG_DNN) += > dnn/dnn_backend_native_layer_pad.o > OBJS-$(CONFIG_DNN) += > dnn/dnn_backend_native_layer_conv2d.o > OBJS-$(CONFIG_DNN) += > dnn/dnn_backend_native_layer_depth2space.o > diff --git a/libavfilter/dnn/dnn_backend_native.h > b/libavfilter/dnn/dnn_backend_native.h > index 62191ffe88..26e9a33387 100644 > --- a/libavfilter/dnn/dnn_backend_native.h > +++ b/libavfilter/dnn/dnn_backend_native.h > @@ -43,10 +43,12 @@ typedef enum { > DLT_MAXIMUM = 4, > DLT_MATH_BINARY = 5, > DLT_MATH_UNARY = 6, > + DLT_AVG_POOL = 7, > DLT_COUNT > } DNNLayerType; > > typedef enum {DOT_INPUT = 1, DOT_OUTPUT = 2, DOT_INTERMEDIATE = > DOT_INPUT | DOT_OUTPUT} DNNOperandType; > +typedef enum {VALID, SAME, SAME_CLAMP_TO_EDGE} DNNPaddingParam; > > typedef struct Layer{ > DNNLayerType type; > diff --git a/libavfilter/dnn/dnn_backend_native_layer_avgpool.c > b/libavfilter/dnn/dnn_backend_native_layer_avgpool.c > new file mode 100644 > index 0000000000..f5a3f4a0dc > --- /dev/null > +++ b/libavfilter/dnn/dnn_backend_native_layer_avgpool.c > @@ -0,0 +1,136 @@ > +/* > + * Copyright (c) 2020 > + * > + * This file is part of FFmpeg. > + * > + * FFmpeg is free software; you can redistribute it and/or > + * modify it under the terms of the GNU Lesser General Public > + * License as published by the Free Software Foundation; either > + * version 2.1 of the License, or (at your option) any later version. > + * > + * FFmpeg is distributed in the hope that it will be useful, > + * but WITHOUT ANY WARRANTY; without even the implied warranty of > + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU > + * Lesser General Public License for more details. > + * > + * You should have received a copy of the GNU Lesser General Public > + * License along with FFmpeg; if not, write to the Free Software > + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA > + */ > + > +/** > + * @file > + * DNN native backend implementation. > + */ > + > +#include "libavutil/avassert.h" > +#include "dnn_backend_native_layer_avgpool.h" > + > +int dnn_load_layer_avg_pool(Layer *layer, AVIOContext *model_file_context, > int file_size, int operands_num) > +{ > + AvgPoolParams *avgpool_params; > + int dnn_size = 0; > + avgpool_params = av_malloc(sizeof(*avgpool_params)); > + if(!avgpool_params) > + return 0; > + > + avgpool_params->strides = (int32_t)avio_rl32(model_file_context); > + avgpool_params->padding_method = > (int32_t)avio_rl32(model_file_context); > + avgpool_params->in_channels = (int32_t)avio_rl32(model_file_context); > + avgpool_params->out_channels = (int32_t)avio_rl32(model_file_context); > + avgpool_params->kernel_size = (int32_t)avio_rl32(model_file_context); > + dnn_size += 20; > + > + if (dnn_size > file_size || avgpool_params->in_channels <= 0 || > + avgpool_params->out_channels <= 0 || > avgpool_params->kernel_size <= 0 || > + avgpool_params->strides <=0){ > + av_freep(&avgpool_params); > + return 0; > + } > + > + layer->params = avgpool_params; > + layer->input_operand_indexes[0] = > (int32_t)avio_rl32(model_file_context); > + layer->output_operand_index = (int32_t)avio_rl32(model_file_context); > + dnn_size += 8; > + > + if (layer->input_operand_indexes[0] >= operands_num || > layer->output_operand_index >= operands_num) { > + return 0; > + } > + return dnn_size; > +} > + > +int dnn_execute_layer_avg_pool(DnnOperand *operands, const int32_t > *input_operand_indexes, > + int32_t output_operand_index, const void > *parameters) > +{ > + float *output; > + int height_end, width_end, height_radius, width_radius, output_height, > output_width, kernel_area; > + int32_t input_operand_index = input_operand_indexes[0]; > + int number = operands[input_operand_index].dims[0]; > + int height = operands[input_operand_index].dims[1]; > + int width = operands[input_operand_index].dims[2]; > + int channel = operands[input_operand_index].dims[3]; the input channel should come from here, not in AvgPoolParams. And so as output channel. > + const float *input = operands[input_operand_index].data; > + const AvgPoolParams *avgpool_params = (const AvgPoolParams > *)parameters; > + > + float kernel_strides = avgpool_params->strides; why float? > + int src_linesize = width * avgpool_params->in_channels; > + DnnOperand *output_operand = &operands[output_operand_index]; > + > + if (avgpool_params->padding_method == SAME) { > + height_end = height; > + width_end = width; > + height_radius = (avgpool_params->kernel_size - ((height - 1) % (int) > kernel_strides + 1)); don't need the first '(' and last ')'. why we need to consider kernel_strides here? > + width_radius = (avgpool_params->kernel_size - ((width - 1) % (int) > kernel_strides + 1)); same as above. > + height_radius = height_radius < 0 ? 0 : height_radius >> 1; > + width_radius = width_radius < 0 ? 0 : width_radius >> 1; > + output_height = ceil(height / kernel_strides); > + output_width = ceil(width / kernel_strides); > + } else { > + height_end = height - avgpool_params->kernel_size + 1; > + width_end = width - avgpool_params->kernel_size + 1; > + height_radius = 0; > + width_radius = 0; > + output_height = ceil((height - avgpool_params->kernel_size + 1) / > kernel_strides); > + output_width = ceil((width - avgpool_params->kernel_size + 1) / > kernel_strides); > + } > + > + output_operand->dims[0] = number; > + output_operand->dims[1] = output_height; > + output_operand->dims[2] = output_width; > + output_operand->dims[3] = avgpool_params->out_channels; > + output_operand->data_type = > operands[input_operand_index].data_type; > + output_operand->length = > calculate_operand_data_length(output_operand); > + output_operand->data = av_realloc(output_operand->data, > output_operand->length); > + if (!output_operand->data) > + return -1; > + output = output_operand->data; > + > + av_assert0(channel == avgpool_params->in_channels); > + > + for (int y = 0; y < height_end; y += kernel_strides) { > + for (int x = 0; x < width_end; x += kernel_strides) { > + for (int n_filter = 0; n_filter < avgpool_params->out_channels; > ++n_filter) { [] better to use n_channel, instead of n_filter. > + output[n_filter] = 0.0; > + kernel_area = 0; > + for (int kernel_y = 0; kernel_y < > avgpool_params->kernel_size; ++kernel_y) { > + for (int kernel_x = 0; kernel_x < > avgpool_params->kernel_size; ++kernel_x) { > + float input_pel; > + int y_pos = y + (kernel_y - height_radius); > + int x_pos = x + (kernel_x - width_radius); > + if (x_pos < 0 || x_pos >= width || y_pos < 0 || > y_pos >= height) { > + input_pel = 0.0; > + } else { > + kernel_area++; > + input_pel = input[y_pos * src_linesize + > x_pos * avgpool_params->in_channels + n_filter]; > + } > + output[n_filter] += input_pel; > + } > + } > + output[n_filter] /= kernel_area; > + } > + output += avgpool_params->out_channels; > + } > + } > + > + return 0; > +} > diff --git a/libavfilter/dnn/dnn_backend_native_layer_avgpool.h > b/libavfilter/dnn/dnn_backend_native_layer_avgpool.h > new file mode 100644 > index 0000000000..0b37a8f64b > --- /dev/null > +++ b/libavfilter/dnn/dnn_backend_native_layer_avgpool.h > @@ -0,0 +1,35 @@ > +/* > + * Copyright (c) 2018 Sergey Lavrushkin > + * > + * This file is part of FFmpeg. > + * > + * FFmpeg is free software; you can redistribute it and/or > + * modify it under the terms of the GNU Lesser General Public > + * License as published by the Free Software Foundation; either > + * version 2.1 of the License, or (at your option) any later version. > + * > + * FFmpeg is distributed in the hope that it will be useful, > + * but WITHOUT ANY WARRANTY; without even the implied warranty of > + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU > + * Lesser General Public License for more details. > + * > + * You should have received a copy of the GNU Lesser General Public > + * License along with FFmpeg; if not, write to the Free Software > + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA > + */ > + > +#ifndef AVFILTER_DNN_DNN_BACKEND_NATIVE_LAYER_AVGPOOL_H > +#define AVFILTER_DNN_DNN_BACKEND_NATIVE_LAYER_AVGPOOL_H > + > +#include "dnn_backend_native.h" > + > +typedef struct AvgPoolParams{ > + int32_t strides, in_channels, out_channels, kernel_size; > + DNNPaddingParam padding_method; > +} AvgPoolParams; > + > +int dnn_load_layer_avg_pool(Layer *layer, AVIOContext *model_file_context, > int file_size, int operands_num); > +int dnn_execute_layer_avg_pool(DnnOperand *operands, const int32_t > *input_operand_indexes, > + int32_t output_operand_index, const void > *parameters); > + > +#endif > diff --git a/libavfilter/dnn/dnn_backend_native_layer_conv2d.h > b/libavfilter/dnn/dnn_backend_native_layer_conv2d.h > index eeb15fdf01..b240b7ef6b 100644 > --- a/libavfilter/dnn/dnn_backend_native_layer_conv2d.h > +++ b/libavfilter/dnn/dnn_backend_native_layer_conv2d.h > @@ -24,12 +24,11 @@ > #include "dnn_backend_native.h" > > typedef enum {RELU, TANH, SIGMOID, NONE, LEAKY_RELU} > DNNActivationFunc; > -typedef enum {VALID, SAME, SAME_CLAMP_TO_EDGE} > DNNConvPaddingParam; > > typedef struct ConvolutionalParams{ > int32_t input_num, output_num, kernel_size; > DNNActivationFunc activation; > - DNNConvPaddingParam padding_method; > + DNNPaddingParam padding_method; > int32_t dilation; > int32_t has_bias; > float *kernel; > diff --git a/libavfilter/dnn/dnn_backend_native_layers.c > b/libavfilter/dnn/dnn_backend_native_layers.c > index 70f9a5f958..4f42f62abb 100644 > --- a/libavfilter/dnn/dnn_backend_native_layers.c > +++ b/libavfilter/dnn/dnn_backend_native_layers.c > @@ -26,6 +26,7 @@ > #include "dnn_backend_native_layer_maximum.h" > #include "dnn_backend_native_layer_mathbinary.h" > #include "dnn_backend_native_layer_mathunary.h" > +#include "dnn_backend_native_layer_avgpool.h" > > LayerFunc layer_funcs[DLT_COUNT] = { > {NULL, NULL}, > @@ -35,4 +36,5 @@ LayerFunc layer_funcs[DLT_COUNT] = { > {dnn_execute_layer_maximum, dnn_load_layer_maximum}, > {dnn_execute_layer_math_binary, dnn_load_layer_math_binary}, > {dnn_execute_layer_math_unary, dnn_load_layer_math_unary}, > + {dnn_execute_layer_avg_pool, dnn_load_layer_avg_pool}, > }; > diff --git a/tools/python/convert_from_tensorflow.py > b/tools/python/convert_from_tensorflow.py > index 85db7bf710..975381e720 100644 > --- a/tools/python/convert_from_tensorflow.py > +++ b/tools/python/convert_from_tensorflow.py > @@ -67,10 +67,12 @@ class TFConverter: > self.edges = {} > self.conv_activations = {'Relu':0, 'Tanh':1, 'Sigmoid':2, 'None':3, > 'LeakyRelu':4} > self.conv_paddings = {'VALID':0, 'SAME':1} > + self.pool_paddings = {'VALID':0, 'SAME':1} > self.converted_nodes = set() > self.conv2d_scope_names = set() > self.conv2d_scopename_inputname_dict = {} > - self.op2code = {'Conv2D':1, 'DepthToSpace':2, 'MirrorPad':3, > 'Maximum':4, 'MathBinary':5, 'MathUnary':6} > + self.op2code = {'Conv2D':1, 'DepthToSpace':2, 'MirrorPad':3, > 'Maximum':4, > + 'MathBinary':5, 'MathUnary':6, 'AvgPool':7} > self.mathbin2code = {'Sub':0, 'Add':1, 'Mul':2, 'RealDiv':3, > 'Minimum':4} > self.mathun2code = {'Abs':0, 'Sin':1, 'Cos':2, 'Tan':3, 'Asin':4, 'Acos':5, > 'Atan':6, 'Sinh':7, 'Cosh':8, 'Tanh':9, 'Asinh':10, 'Acosh':11, 'Atanh':12} > self.mirrorpad_mode = {'CONSTANT':0, 'REFLECT':1, 'SYMMETRIC':2} > @@ -298,6 +300,31 @@ class TFConverter: > np.array([output_operand_index],dtype=np.uint32).tofile(f) > > > + def dump_avg_pool_to_file(self, node, f): > + assert(node.op == 'AvgPool') > + self.layer_number = self.layer_number + 1 > + self.converted_nodes.add(node.name) > + node0 = self.name_node_dict[node.input[0]] > + strides = node.attr['strides'] > + assert(strides.list.i[1]==strides.list.i[2]) > + strides = strides.list.i[1] > + filter_node = node.attr['ksize'] > + input_name = node.input[0] [] we can save strides[4] and ksize[4] in .model file, and do part support in .c file. > + > + filter_height = filter_node.list.i[1] > + filter_width = filter_node.list.i[2] > + > + in_channels = node0.attr['shape'].shape.dim[3].size > + out_channels = in_channels > + padding = node.attr['padding'].s.decode("utf-8") > + np.array([self.op2code[node.op], strides, self.pool_paddings[padding], > in_channels, out_channels, > + filter_height],dtype=np.uint32).tofile(f) > + > + input_operand_index = self.add_operand(input_name, > Operand.IOTYPE_INPUT) > + output_operand_index = self.add_operand(node.name, > Operand.IOTYPE_OUTPUT) > + np.array([input_operand_index, > output_operand_index],dtype=np.uint32).tofile(f) > + > + > def dump_layers_to_file(self, f): > for node in self.nodes: > if node.name in self.converted_nodes: > @@ -311,6 +338,8 @@ class TFConverter: > > if node.op == 'Conv2D': > self.dump_simple_conv2d_to_file(node, f) > + if node.op == 'AvgPool': > + self.dump_avg_pool_to_file(node, f) > elif node.op == 'DepthToSpace': > self.dump_depth2space_to_file(node, f) > elif node.op == 'MirrorPad': > -- > 2.17.1 > > _______________________________________________ > ffmpeg-devel mailing list > ffmpeg-devel@ffmpeg.org > https://ffmpeg.org/mailman/listinfo/ffmpeg-devel > > To unsubscribe, visit link above, or email > ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".
> -----Original Message----- > From: ffmpeg-devel <ffmpeg-devel-bounces@ffmpeg.org> On Behalf Of Guo, > Yejun > Sent: Monday, July 20, 2020 01:46 PM > To: FFmpeg development discussions and patches <ffmpeg-devel@ffmpeg.org> > Subject: Re: [FFmpeg-devel] [PATCH 1/2] dnn/native: add native support for > avg_pool > > > > > -----Original Message----- > > From: ffmpeg-devel <ffmpeg-devel-bounces@ffmpeg.org> On Behalf Of Ting > > Fu > > Sent: 2020年7月17日 23:23 > > To: ffmpeg-devel@ffmpeg.org > > Subject: [FFmpeg-devel] [PATCH 1/2] dnn/native: add native support for > > avg_pool > > > > It can be tested with the model generated with below python script: > > > > import tensorflow as tf > > import numpy as np > > import imageio > > > > in_img = imageio.imread('input_odd.jpg') in_img = > > in_img.astype(np.float32)/255.0 in_data = in_img[np.newaxis, :] > > > > x = tf.placeholder(tf.float32, shape=[1, None, None, 3], > > name='dnn_in') x_pool = tf.nn.avg_pool(x, ksize=[1,2,2,1], > > strides=[1,2,2,1], padding='SAME') #please alter the params as needed > > y = tf.identity(x_pool, name='dnn_out') > > > > sess=tf.Session() > > sess.run(tf.global_variables_initializer()) > > > > graph_def = tf.graph_util.convert_variables_to_constants(sess, > > sess.graph_def, > > ['dnn_out']) > > tf.train.write_graph(graph_def, '.', 'image_process.pb', > > as_text=False) > > > > print("image_process.pb generated, please use \ > > path_to_ffmpeg/tools/python/convert.py to generate > > image_process.model\n") > > > > output = sess.run(y, feed_dict={x: in_data}) imageio.imsave("out.jpg", > > np.squeeze(output)) > > > > Signed-off-by: Ting Fu <ting.fu@intel.com> > > --- > > libavfilter/dnn/Makefile | 1 + > > libavfilter/dnn/dnn_backend_native.h | 2 + > > .../dnn/dnn_backend_native_layer_avgpool.c | 136 ++++++++++++++++++ > > .../dnn/dnn_backend_native_layer_avgpool.h | 35 +++++ > > .../dnn/dnn_backend_native_layer_conv2d.h | 3 +- > > libavfilter/dnn/dnn_backend_native_layers.c | 2 + > > tools/python/convert_from_tensorflow.py | 31 +++- > > 7 files changed, 207 insertions(+), 3 deletions(-) create mode > > 100644 libavfilter/dnn/dnn_backend_native_layer_avgpool.c > > create mode 100644 libavfilter/dnn/dnn_backend_native_layer_avgpool.h > > [...] > > + int32_t input_operand_index = input_operand_indexes[0]; > > + int number = operands[input_operand_index].dims[0]; > > + int height = operands[input_operand_index].dims[1]; > > + int width = operands[input_operand_index].dims[2]; > > + int channel = operands[input_operand_index].dims[3]; > > the input channel should come from here, not in AvgPoolParams. > And so as output channel. HI Yejun, I got it that the in_channel should come from here. Does the 'so as output channel' mean out_channel = in_channel here (since the pooling of channel is not supported)? > > > + const float *input = operands[input_operand_index].data; > > + const AvgPoolParams *avgpool_params = (const AvgPoolParams > > *)parameters; > > + > > + float kernel_strides = avgpool_params->strides; > > why float? In order to calculate height/kernel_strides with float output in following ceil(). Or should I multiply kernel_strides with 1.0 when using ceil function? > > > + int src_linesize = width * avgpool_params->in_channels; > > + DnnOperand *output_operand = &operands[output_operand_index]; > > + > > + if (avgpool_params->padding_method == SAME) { > > + height_end = height; > > + width_end = width; > > + height_radius = (avgpool_params->kernel_size - ((height - 1) > > + % (int) > > kernel_strides + 1)); > > don't need the first '(' and last ')'. OK > > why we need to consider kernel_strides here? Because when padding_method=SAME, the tensorflow will only padding the half number of 0 pixels except the remainders. Eg: if the width is 1080, strides=11, so the 1080%11=2 And if ksize=5, it will fill (5-2)>>1=1 column before image and 2 columns after the image. And if ksize=2, so 2-2=0, so the remainder pixels just meet the need of calculating one time pooling, so no 0 pixels will be filled. Which means the numbers of filling 0-pixels rely on the remainder-pixels. Does the example make any sense? > > > + width_radius = (avgpool_params->kernel_size - ((width - 1) % > > + (int) > > kernel_strides + 1)); > > same as above. > > > + height_radius = height_radius < 0 ? 0 : height_radius >> 1; > > + width_radius = width_radius < 0 ? 0 : width_radius >> 1; [...] > > + for (int y = 0; y < height_end; y += kernel_strides) { > > + for (int x = 0; x < width_end; x += kernel_strides) { > > + for (int n_filter = 0; n_filter < > > + avgpool_params->out_channels; > > ++n_filter) { > [] > better to use n_channel, instead of n_filter. Sure > > > + output[n_filter] = 0.0; > > + kernel_area = 0; [...] > > + def dump_avg_pool_to_file(self, node, f): > > + assert(node.op == 'AvgPool') > > + self.layer_number = self.layer_number + 1 > > + self.converted_nodes.add(node.name) > > + node0 = self.name_node_dict[node.input[0]] > > + strides = node.attr['strides'] > > + assert(strides.list.i[1]==strides.list.i[2]) > > + strides = strides.list.i[1] > > + filter_node = node.attr['ksize'] > > + input_name = node.input[0] > [] > we can save strides[4] and ksize[4] in .model file, and do part support in .c file. Do you mean save all 4 numbers of strides and ksize in .model file, and extract the number we need in .c file? > > > + > > + filter_height = filter_node.list.i[1] > > + filter_width = filter_node.list.i[2] > > + > > + in_channels = node0.attr['shape'].shape.dim[3].size > > + out_channels = in_channels > > + padding = node.attr['padding'].s.decode("utf-8") > > + np.array([self.op2code[node.op], strides, > > + self.pool_paddings[padding], > > in_channels, out_channels, > > + filter_height],dtype=np.uint32).tofile(f) > > + > > + input_operand_index = self.add_operand(input_name, > > Operand.IOTYPE_INPUT) > > + output_operand_index = self.add_operand(node.name, > > Operand.IOTYPE_OUTPUT) > > + np.array([input_operand_index, > > output_operand_index],dtype=np.uint32).tofile(f) > > + > > + > > def dump_layers_to_file(self, f): > > for node in self.nodes: > > if node.name in self.converted_nodes: > > @@ -311,6 +338,8 @@ class TFConverter: > > > > if node.op == 'Conv2D': > > self.dump_simple_conv2d_to_file(node, f) > > + if node.op == 'AvgPool': > > + self.dump_avg_pool_to_file(node, f) > > elif node.op == 'DepthToSpace': > > self.dump_depth2space_to_file(node, f) > > elif node.op == 'MirrorPad': > > -- > > 2.17.1 > > > > _______________________________________________ > > ffmpeg-devel mailing list > > ffmpeg-devel@ffmpeg.org > > https://ffmpeg.org/mailman/listinfo/ffmpeg-devel > > > > To unsubscribe, visit link above, or email > > ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe". > _______________________________________________ > ffmpeg-devel mailing list > ffmpeg-devel@ffmpeg.org > https://ffmpeg.org/mailman/listinfo/ffmpeg-devel > > To unsubscribe, visit link above, or email ffmpeg-devel-request@ffmpeg.org > with subject "unsubscribe".
diff --git a/libavfilter/dnn/Makefile b/libavfilter/dnn/Makefile index d90137ec42..e0957073ee 100644 --- a/libavfilter/dnn/Makefile +++ b/libavfilter/dnn/Makefile @@ -1,6 +1,7 @@ OBJS-$(CONFIG_DNN) += dnn/dnn_interface.o OBJS-$(CONFIG_DNN) += dnn/dnn_backend_native.o OBJS-$(CONFIG_DNN) += dnn/dnn_backend_native_layers.o +OBJS-$(CONFIG_DNN) += dnn/dnn_backend_native_layer_avgpool.o OBJS-$(CONFIG_DNN) += dnn/dnn_backend_native_layer_pad.o OBJS-$(CONFIG_DNN) += dnn/dnn_backend_native_layer_conv2d.o OBJS-$(CONFIG_DNN) += dnn/dnn_backend_native_layer_depth2space.o diff --git a/libavfilter/dnn/dnn_backend_native.h b/libavfilter/dnn/dnn_backend_native.h index 62191ffe88..26e9a33387 100644 --- a/libavfilter/dnn/dnn_backend_native.h +++ b/libavfilter/dnn/dnn_backend_native.h @@ -43,10 +43,12 @@ typedef enum { DLT_MAXIMUM = 4, DLT_MATH_BINARY = 5, DLT_MATH_UNARY = 6, + DLT_AVG_POOL = 7, DLT_COUNT } DNNLayerType; typedef enum {DOT_INPUT = 1, DOT_OUTPUT = 2, DOT_INTERMEDIATE = DOT_INPUT | DOT_OUTPUT} DNNOperandType; +typedef enum {VALID, SAME, SAME_CLAMP_TO_EDGE} DNNPaddingParam; typedef struct Layer{ DNNLayerType type; diff --git a/libavfilter/dnn/dnn_backend_native_layer_avgpool.c b/libavfilter/dnn/dnn_backend_native_layer_avgpool.c new file mode 100644 index 0000000000..f5a3f4a0dc --- /dev/null +++ b/libavfilter/dnn/dnn_backend_native_layer_avgpool.c @@ -0,0 +1,136 @@ +/* + * Copyright (c) 2020 + * + * This file is part of FFmpeg. + * + * FFmpeg is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * FFmpeg is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with FFmpeg; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +/** + * @file + * DNN native backend implementation. + */ + +#include "libavutil/avassert.h" +#include "dnn_backend_native_layer_avgpool.h" + +int dnn_load_layer_avg_pool(Layer *layer, AVIOContext *model_file_context, int file_size, int operands_num) +{ + AvgPoolParams *avgpool_params; + int dnn_size = 0; + avgpool_params = av_malloc(sizeof(*avgpool_params)); + if(!avgpool_params) + return 0; + + avgpool_params->strides = (int32_t)avio_rl32(model_file_context); + avgpool_params->padding_method = (int32_t)avio_rl32(model_file_context); + avgpool_params->in_channels = (int32_t)avio_rl32(model_file_context); + avgpool_params->out_channels = (int32_t)avio_rl32(model_file_context); + avgpool_params->kernel_size = (int32_t)avio_rl32(model_file_context); + dnn_size += 20; + + if (dnn_size > file_size || avgpool_params->in_channels <= 0 || + avgpool_params->out_channels <= 0 || avgpool_params->kernel_size <= 0 || + avgpool_params->strides <=0){ + av_freep(&avgpool_params); + return 0; + } + + layer->params = avgpool_params; + layer->input_operand_indexes[0] = (int32_t)avio_rl32(model_file_context); + layer->output_operand_index = (int32_t)avio_rl32(model_file_context); + dnn_size += 8; + + if (layer->input_operand_indexes[0] >= operands_num || layer->output_operand_index >= operands_num) { + return 0; + } + return dnn_size; +} + +int dnn_execute_layer_avg_pool(DnnOperand *operands, const int32_t *input_operand_indexes, + int32_t output_operand_index, const void *parameters) +{ + float *output; + int height_end, width_end, height_radius, width_radius, output_height, output_width, kernel_area; + int32_t input_operand_index = input_operand_indexes[0]; + int number = operands[input_operand_index].dims[0]; + int height = operands[input_operand_index].dims[1]; + int width = operands[input_operand_index].dims[2]; + int channel = operands[input_operand_index].dims[3]; + const float *input = operands[input_operand_index].data; + const AvgPoolParams *avgpool_params = (const AvgPoolParams *)parameters; + + float kernel_strides = avgpool_params->strides; + int src_linesize = width * avgpool_params->in_channels; + DnnOperand *output_operand = &operands[output_operand_index]; + + if (avgpool_params->padding_method == SAME) { + height_end = height; + width_end = width; + height_radius = (avgpool_params->kernel_size - ((height - 1) % (int) kernel_strides + 1)); + width_radius = (avgpool_params->kernel_size - ((width - 1) % (int) kernel_strides + 1)); + height_radius = height_radius < 0 ? 0 : height_radius >> 1; + width_radius = width_radius < 0 ? 0 : width_radius >> 1; + output_height = ceil(height / kernel_strides); + output_width = ceil(width / kernel_strides); + } else { + height_end = height - avgpool_params->kernel_size + 1; + width_end = width - avgpool_params->kernel_size + 1; + height_radius = 0; + width_radius = 0; + output_height = ceil((height - avgpool_params->kernel_size + 1) / kernel_strides); + output_width = ceil((width - avgpool_params->kernel_size + 1) / kernel_strides); + } + + output_operand->dims[0] = number; + output_operand->dims[1] = output_height; + output_operand->dims[2] = output_width; + output_operand->dims[3] = avgpool_params->out_channels; + output_operand->data_type = operands[input_operand_index].data_type; + output_operand->length = calculate_operand_data_length(output_operand); + output_operand->data = av_realloc(output_operand->data, output_operand->length); + if (!output_operand->data) + return -1; + output = output_operand->data; + + av_assert0(channel == avgpool_params->in_channels); + + for (int y = 0; y < height_end; y += kernel_strides) { + for (int x = 0; x < width_end; x += kernel_strides) { + for (int n_filter = 0; n_filter < avgpool_params->out_channels; ++n_filter) { + output[n_filter] = 0.0; + kernel_area = 0; + for (int kernel_y = 0; kernel_y < avgpool_params->kernel_size; ++kernel_y) { + for (int kernel_x = 0; kernel_x < avgpool_params->kernel_size; ++kernel_x) { + float input_pel; + int y_pos = y + (kernel_y - height_radius); + int x_pos = x + (kernel_x - width_radius); + if (x_pos < 0 || x_pos >= width || y_pos < 0 || y_pos >= height) { + input_pel = 0.0; + } else { + kernel_area++; + input_pel = input[y_pos * src_linesize + x_pos * avgpool_params->in_channels + n_filter]; + } + output[n_filter] += input_pel; + } + } + output[n_filter] /= kernel_area; + } + output += avgpool_params->out_channels; + } + } + + return 0; +} diff --git a/libavfilter/dnn/dnn_backend_native_layer_avgpool.h b/libavfilter/dnn/dnn_backend_native_layer_avgpool.h new file mode 100644 index 0000000000..0b37a8f64b --- /dev/null +++ b/libavfilter/dnn/dnn_backend_native_layer_avgpool.h @@ -0,0 +1,35 @@ +/* + * Copyright (c) 2018 Sergey Lavrushkin + * + * This file is part of FFmpeg. + * + * FFmpeg is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * FFmpeg is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with FFmpeg; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#ifndef AVFILTER_DNN_DNN_BACKEND_NATIVE_LAYER_AVGPOOL_H +#define AVFILTER_DNN_DNN_BACKEND_NATIVE_LAYER_AVGPOOL_H + +#include "dnn_backend_native.h" + +typedef struct AvgPoolParams{ + int32_t strides, in_channels, out_channels, kernel_size; + DNNPaddingParam padding_method; +} AvgPoolParams; + +int dnn_load_layer_avg_pool(Layer *layer, AVIOContext *model_file_context, int file_size, int operands_num); +int dnn_execute_layer_avg_pool(DnnOperand *operands, const int32_t *input_operand_indexes, + int32_t output_operand_index, const void *parameters); + +#endif diff --git a/libavfilter/dnn/dnn_backend_native_layer_conv2d.h b/libavfilter/dnn/dnn_backend_native_layer_conv2d.h index eeb15fdf01..b240b7ef6b 100644 --- a/libavfilter/dnn/dnn_backend_native_layer_conv2d.h +++ b/libavfilter/dnn/dnn_backend_native_layer_conv2d.h @@ -24,12 +24,11 @@ #include "dnn_backend_native.h" typedef enum {RELU, TANH, SIGMOID, NONE, LEAKY_RELU} DNNActivationFunc; -typedef enum {VALID, SAME, SAME_CLAMP_TO_EDGE} DNNConvPaddingParam; typedef struct ConvolutionalParams{ int32_t input_num, output_num, kernel_size; DNNActivationFunc activation; - DNNConvPaddingParam padding_method; + DNNPaddingParam padding_method; int32_t dilation; int32_t has_bias; float *kernel; diff --git a/libavfilter/dnn/dnn_backend_native_layers.c b/libavfilter/dnn/dnn_backend_native_layers.c index 70f9a5f958..4f42f62abb 100644 --- a/libavfilter/dnn/dnn_backend_native_layers.c +++ b/libavfilter/dnn/dnn_backend_native_layers.c @@ -26,6 +26,7 @@ #include "dnn_backend_native_layer_maximum.h" #include "dnn_backend_native_layer_mathbinary.h" #include "dnn_backend_native_layer_mathunary.h" +#include "dnn_backend_native_layer_avgpool.h" LayerFunc layer_funcs[DLT_COUNT] = { {NULL, NULL}, @@ -35,4 +36,5 @@ LayerFunc layer_funcs[DLT_COUNT] = { {dnn_execute_layer_maximum, dnn_load_layer_maximum}, {dnn_execute_layer_math_binary, dnn_load_layer_math_binary}, {dnn_execute_layer_math_unary, dnn_load_layer_math_unary}, + {dnn_execute_layer_avg_pool, dnn_load_layer_avg_pool}, }; diff --git a/tools/python/convert_from_tensorflow.py b/tools/python/convert_from_tensorflow.py index 85db7bf710..975381e720 100644 --- a/tools/python/convert_from_tensorflow.py +++ b/tools/python/convert_from_tensorflow.py @@ -67,10 +67,12 @@ class TFConverter: self.edges = {} self.conv_activations = {'Relu':0, 'Tanh':1, 'Sigmoid':2, 'None':3, 'LeakyRelu':4} self.conv_paddings = {'VALID':0, 'SAME':1} + self.pool_paddings = {'VALID':0, 'SAME':1} self.converted_nodes = set() self.conv2d_scope_names = set() self.conv2d_scopename_inputname_dict = {} - self.op2code = {'Conv2D':1, 'DepthToSpace':2, 'MirrorPad':3, 'Maximum':4, 'MathBinary':5, 'MathUnary':6} + self.op2code = {'Conv2D':1, 'DepthToSpace':2, 'MirrorPad':3, 'Maximum':4, + 'MathBinary':5, 'MathUnary':6, 'AvgPool':7} self.mathbin2code = {'Sub':0, 'Add':1, 'Mul':2, 'RealDiv':3, 'Minimum':4} self.mathun2code = {'Abs':0, 'Sin':1, 'Cos':2, 'Tan':3, 'Asin':4, 'Acos':5, 'Atan':6, 'Sinh':7, 'Cosh':8, 'Tanh':9, 'Asinh':10, 'Acosh':11, 'Atanh':12} self.mirrorpad_mode = {'CONSTANT':0, 'REFLECT':1, 'SYMMETRIC':2} @@ -298,6 +300,31 @@ class TFConverter: np.array([output_operand_index],dtype=np.uint32).tofile(f) + def dump_avg_pool_to_file(self, node, f): + assert(node.op == 'AvgPool') + self.layer_number = self.layer_number + 1 + self.converted_nodes.add(node.name) + node0 = self.name_node_dict[node.input[0]] + strides = node.attr['strides'] + assert(strides.list.i[1]==strides.list.i[2]) + strides = strides.list.i[1] + filter_node = node.attr['ksize'] + input_name = node.input[0] + + filter_height = filter_node.list.i[1] + filter_width = filter_node.list.i[2] + + in_channels = node0.attr['shape'].shape.dim[3].size + out_channels = in_channels + padding = node.attr['padding'].s.decode("utf-8") + np.array([self.op2code[node.op], strides, self.pool_paddings[padding], in_channels, out_channels, + filter_height],dtype=np.uint32).tofile(f) + + input_operand_index = self.add_operand(input_name, Operand.IOTYPE_INPUT) + output_operand_index = self.add_operand(node.name, Operand.IOTYPE_OUTPUT) + np.array([input_operand_index, output_operand_index],dtype=np.uint32).tofile(f) + + def dump_layers_to_file(self, f): for node in self.nodes: if node.name in self.converted_nodes: @@ -311,6 +338,8 @@ class TFConverter: if node.op == 'Conv2D': self.dump_simple_conv2d_to_file(node, f) + if node.op == 'AvgPool': + self.dump_avg_pool_to_file(node, f) elif node.op == 'DepthToSpace': self.dump_depth2space_to_file(node, f) elif node.op == 'MirrorPad':
It can be tested with the model generated with below python script: import tensorflow as tf import numpy as np import imageio in_img = imageio.imread('input_odd.jpg') in_img = in_img.astype(np.float32)/255.0 in_data = in_img[np.newaxis, :] x = tf.placeholder(tf.float32, shape=[1, None, None, 3], name='dnn_in') x_pool = tf.nn.avg_pool(x, ksize=[1,2,2,1], strides=[1,2,2,1], padding='SAME') #please alter the params as needed y = tf.identity(x_pool, name='dnn_out') sess=tf.Session() sess.run(tf.global_variables_initializer()) graph_def = tf.graph_util.convert_variables_to_constants(sess, sess.graph_def, ['dnn_out']) tf.train.write_graph(graph_def, '.', 'image_process.pb', as_text=False) print("image_process.pb generated, please use \ path_to_ffmpeg/tools/python/convert.py to generate image_process.model\n") output = sess.run(y, feed_dict={x: in_data}) imageio.imsave("out.jpg", np.squeeze(output)) Signed-off-by: Ting Fu <ting.fu@intel.com> --- libavfilter/dnn/Makefile | 1 + libavfilter/dnn/dnn_backend_native.h | 2 + .../dnn/dnn_backend_native_layer_avgpool.c | 136 ++++++++++++++++++ .../dnn/dnn_backend_native_layer_avgpool.h | 35 +++++ .../dnn/dnn_backend_native_layer_conv2d.h | 3 +- libavfilter/dnn/dnn_backend_native_layers.c | 2 + tools/python/convert_from_tensorflow.py | 31 +++- 7 files changed, 207 insertions(+), 3 deletions(-) create mode 100644 libavfilter/dnn/dnn_backend_native_layer_avgpool.c create mode 100644 libavfilter/dnn/dnn_backend_native_layer_avgpool.h