From patchwork Thu Apr 25 02:14:33 2019 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: "Guo, Yejun" X-Patchwork-Id: 12898 Return-Path: X-Original-To: patchwork@ffaux-bg.ffmpeg.org Delivered-To: patchwork@ffaux-bg.ffmpeg.org Received: from ffbox0-bg.mplayerhq.hu (ffbox0-bg.ffmpeg.org [79.124.17.100]) by ffaux.localdomain (Postfix) with ESMTP id 6785344897B for ; Thu, 25 Apr 2019 05:14:40 +0300 (EEST) Received: from [127.0.1.1] (localhost [127.0.0.1]) by ffbox0-bg.mplayerhq.hu (Postfix) with ESMTP id 4EAE868A8AC; Thu, 25 Apr 2019 05:14:40 +0300 (EEST) X-Original-To: ffmpeg-devel@ffmpeg.org Delivered-To: ffmpeg-devel@ffmpeg.org Received: from mga03.intel.com (mga03.intel.com [134.134.136.65]) by ffbox0-bg.mplayerhq.hu (Postfix) with ESMTPS id 2181F6804C2 for ; Thu, 25 Apr 2019 05:14:38 +0300 (EEST) X-Amp-Result: SKIPPED(no attachment in message) X-Amp-File-Uploaded: False Received: from fmsmga006.fm.intel.com ([10.253.24.20]) by orsmga103.jf.intel.com with ESMTP/TLS/DHE-RSA-AES256-GCM-SHA384; 24 Apr 2019 19:14:37 -0700 X-ExtLoop1: 1 X-IronPort-AV: E=Sophos;i="5.60,392,1549958400"; d="scan'208";a="340574314" Received: from yguo18-skl-u1604.sh.intel.com ([10.239.13.25]) by fmsmga006.fm.intel.com with ESMTP; 24 Apr 2019 19:14:36 -0700 From: "Guo, Yejun" To: ffmpeg-devel@ffmpeg.org Date: Thu, 25 Apr 2019 10:14:33 +0800 Message-Id: <1556158473-15260-1-git-send-email-yejun.guo@intel.com> X-Mailer: git-send-email 2.7.4 Subject: [FFmpeg-devel] [PATCH V2 6/7] libavfilter/dnn: support multiple outputs for tensorflow model X-BeenThere: ffmpeg-devel@ffmpeg.org X-Mailman-Version: 2.1.20 Precedence: list List-Id: FFmpeg development discussions and patches List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , Reply-To: FFmpeg development discussions and patches Cc: yejun.guo@intel.com MIME-Version: 1.0 Errors-To: ffmpeg-devel-bounces@ffmpeg.org Sender: "ffmpeg-devel" some models such as ssd, yolo have more than one output. the clean up code in this patch is a little complex, it is because that set_input_output_tf could be called for many times together with ff_dnn_execute_model_tf, we have to clean resources for the case that the two interfaces are called interleaved. Signed-off-by: Guo, Yejun --- libavfilter/dnn_backend_native.c | 15 +++++--- libavfilter/dnn_backend_native.h | 2 +- libavfilter/dnn_backend_tf.c | 80 ++++++++++++++++++++++++++++++++-------- libavfilter/dnn_backend_tf.h | 2 +- libavfilter/dnn_interface.h | 6 ++- libavfilter/vf_sr.c | 11 +++--- 6 files changed, 85 insertions(+), 31 deletions(-) diff --git a/libavfilter/dnn_backend_native.c b/libavfilter/dnn_backend_native.c index 18735c0..8a83c63 100644 --- a/libavfilter/dnn_backend_native.c +++ b/libavfilter/dnn_backend_native.c @@ -25,7 +25,7 @@ #include "dnn_backend_native.h" -static DNNReturnType set_input_output_native(void *model, DNNData *input, const char *input_name, const char *output_name) +static DNNReturnType set_input_output_native(void *model, DNNData *input, const char *input_name, const char **output_names, uint32_t nb_output) { ConvolutionalNetwork *network = (ConvolutionalNetwork *)model; InputParams *input_params; @@ -275,7 +275,7 @@ static void depth_to_space(const float *input, float *output, int block_size, in } } -DNNReturnType ff_dnn_execute_model_native(const DNNModel *model, DNNData *output) +DNNReturnType ff_dnn_execute_model_native(const DNNModel *model, DNNData *outputs, uint32_t nb_output) { ConvolutionalNetwork *network = (ConvolutionalNetwork *)model->model; int cur_width, cur_height, cur_channels; @@ -317,10 +317,13 @@ DNNReturnType ff_dnn_execute_model_native(const DNNModel *model, DNNData *output } } - output->data = network->layers[network->layers_num - 1].output; - output->height = cur_height; - output->width = cur_width; - output->channels = cur_channels; + // native mode does not support multiple outputs yet + if (nb_output > 1) + return DNN_ERROR; + outputs[0].data = network->layers[network->layers_num - 1].output; + outputs[0].height = cur_height; + outputs[0].width = cur_width; + outputs[0].channels = cur_channels; return DNN_SUCCESS; } diff --git a/libavfilter/dnn_backend_native.h b/libavfilter/dnn_backend_native.h index adaf4a7..e13a68a 100644 --- a/libavfilter/dnn_backend_native.h +++ b/libavfilter/dnn_backend_native.h @@ -63,7 +63,7 @@ typedef struct ConvolutionalNetwork{ DNNModel *ff_dnn_load_model_native(const char *model_filename); -DNNReturnType ff_dnn_execute_model_native(const DNNModel *model, DNNData *output); +DNNReturnType ff_dnn_execute_model_native(const DNNModel *model, DNNData *outputs, uint32_t nb_output); void ff_dnn_free_model_native(DNNModel **model); diff --git a/libavfilter/dnn_backend_tf.c b/libavfilter/dnn_backend_tf.c index be8401e..ca6472d 100644 --- a/libavfilter/dnn_backend_tf.c +++ b/libavfilter/dnn_backend_tf.c @@ -26,6 +26,7 @@ #include "dnn_backend_tf.h" #include "dnn_backend_native.h" #include "libavformat/avio.h" +#include "libavutil/avassert.h" #include @@ -33,9 +34,11 @@ typedef struct TFModel{ TF_Graph *graph; TF_Session *session; TF_Status *status; - TF_Output input, output; + TF_Output input; TF_Tensor *input_tensor; - TF_Tensor *output_tensor; + TF_Output *outputs; + TF_Tensor **output_tensors; + uint32_t nb_output; } TFModel; static void free_buffer(void *data, size_t length) @@ -76,7 +79,7 @@ static TF_Buffer *read_graph(const char *model_filename) return graph_buf; } -static DNNReturnType set_input_output_tf(void *model, DNNData *input, const char *input_name, const char *output_name) +static DNNReturnType set_input_output_tf(void *model, DNNData *input, const char *input_name, const char **output_names, uint32_t nb_output) { TFModel *tf_model = (TFModel *)model; int64_t input_dims[] = {1, input->height, input->width, input->channels}; @@ -100,11 +103,38 @@ static DNNReturnType set_input_output_tf(void *model, DNNData *input, const char input->data = (float *)TF_TensorData(tf_model->input_tensor); // Output operation - tf_model->output.oper = TF_GraphOperationByName(tf_model->graph, output_name); - if (!tf_model->output.oper){ + if (nb_output == 0) + return DNN_ERROR; + + av_freep(&tf_model->outputs); + tf_model->outputs = av_malloc_array(nb_output, sizeof(*tf_model->outputs)); + if (!tf_model->outputs) + return DNN_ERROR; + for (int i = 0; i < nb_output; ++i) { + tf_model->outputs[i].oper = TF_GraphOperationByName(tf_model->graph, output_names[i]); + if (!tf_model->outputs[i].oper){ + av_freep(&tf_model->outputs); + return DNN_ERROR; + } + tf_model->outputs[i].index = 0; + } + + if (tf_model->output_tensors) { + for (uint32_t i = 0; i < tf_model->nb_output; ++i) { + if (tf_model->output_tensors[i]) { + TF_DeleteTensor(tf_model->output_tensors[i]); + tf_model->output_tensors[i] = NULL; + } + } + } + av_freep(&tf_model->output_tensors); + tf_model->output_tensors = av_mallocz_array(nb_output, sizeof(*tf_model->output_tensors)); + if (!tf_model->output_tensors) { + av_freep(&tf_model->outputs); return DNN_ERROR; } - tf_model->output.index = 0; + + tf_model->nb_output = nb_output; if (tf_model->session){ TF_CloseSession(tf_model->session, tf_model->status); @@ -484,25 +514,36 @@ DNNModel *ff_dnn_load_model_tf(const char *model_filename) -DNNReturnType ff_dnn_execute_model_tf(const DNNModel *model, DNNData *output) +DNNReturnType ff_dnn_execute_model_tf(const DNNModel *model, DNNData *outputs, uint32_t nb_output) { TFModel *tf_model = (TFModel *)model->model; - if (tf_model->output_tensor) - TF_DeleteTensor(tf_model->output_tensor); + uint32_t nb = FFMIN(nb_output, tf_model->nb_output); + if (nb == 0) + return DNN_ERROR; + + av_assert0(tf_model->output_tensors); + for (uint32_t i = 0; i < tf_model->nb_output; ++i) { + if (tf_model->output_tensors[i]) { + TF_DeleteTensor(tf_model->output_tensors[i]); + tf_model->output_tensors[i] = NULL; + } + } TF_SessionRun(tf_model->session, NULL, &tf_model->input, &tf_model->input_tensor, 1, - &tf_model->output, &tf_model->output_tensor, 1, + tf_model->outputs, tf_model->output_tensors, nb, NULL, 0, NULL, tf_model->status); if (TF_GetCode(tf_model->status) != TF_OK){ return DNN_ERROR; } - output->height = TF_Dim(tf_model->output_tensor, 1); - output->width = TF_Dim(tf_model->output_tensor, 2); - output->channels = TF_Dim(tf_model->output_tensor, 3); - output->data = TF_TensorData(tf_model->output_tensor); + for (uint32_t i = 0; i < nb; ++i) { + outputs[i].height = TF_Dim(tf_model->output_tensors[i], 1); + outputs[i].width = TF_Dim(tf_model->output_tensors[i], 2); + outputs[i].channels = TF_Dim(tf_model->output_tensors[i], 3); + outputs[i].data = TF_TensorData(tf_model->output_tensors[i]); + } return DNN_SUCCESS; } @@ -526,9 +567,16 @@ void ff_dnn_free_model_tf(DNNModel **model) if (tf_model->input_tensor){ TF_DeleteTensor(tf_model->input_tensor); } - if (tf_model->output_tensor){ - TF_DeleteTensor(tf_model->output_tensor); + if (tf_model->output_tensors) { + for (uint32_t i = 0; i < tf_model->nb_output; ++i) { + if (tf_model->output_tensors[i]) { + TF_DeleteTensor(tf_model->output_tensors[i]); + tf_model->output_tensors[i] = NULL; + } + } } + av_freep(&tf_model->outputs); + av_freep(&tf_model->output_tensors); av_freep(&tf_model); av_freep(model); } diff --git a/libavfilter/dnn_backend_tf.h b/libavfilter/dnn_backend_tf.h index 47a24ec..07877b1 100644 --- a/libavfilter/dnn_backend_tf.h +++ b/libavfilter/dnn_backend_tf.h @@ -31,7 +31,7 @@ DNNModel *ff_dnn_load_model_tf(const char *model_filename); -DNNReturnType ff_dnn_execute_model_tf(const DNNModel *model, DNNData *output); +DNNReturnType ff_dnn_execute_model_tf(const DNNModel *model, DNNData *outputs, uint32_t nb_output); void ff_dnn_free_model_tf(DNNModel **model); diff --git a/libavfilter/dnn_interface.h b/libavfilter/dnn_interface.h index 822f6e5..73d226e 100644 --- a/libavfilter/dnn_interface.h +++ b/libavfilter/dnn_interface.h @@ -26,6 +26,8 @@ #ifndef AVFILTER_DNN_INTERFACE_H #define AVFILTER_DNN_INTERFACE_H +#include + typedef enum {DNN_SUCCESS, DNN_ERROR} DNNReturnType; typedef enum {DNN_NATIVE, DNN_TF} DNNBackendType; @@ -40,7 +42,7 @@ typedef struct DNNModel{ void *model; // Sets model input and output. // Should be called at least once before model execution. - DNNReturnType (*set_input_output)(void *model, DNNData *input, const char *input_name, const char *output_name); + DNNReturnType (*set_input_output)(void *model, DNNData *input, const char *input_name, const char **output_names, uint32_t nb_output); } DNNModel; // Stores pointers to functions for loading, executing, freeing DNN models for one of the backends. @@ -48,7 +50,7 @@ typedef struct DNNModule{ // Loads model and parameters from given file. Returns NULL if it is not possible. DNNModel *(*load_model)(const char *model_filename); // Executes model with specified input and output. Returns DNN_ERROR otherwise. - DNNReturnType (*execute_model)(const DNNModel *model, DNNData *output); + DNNReturnType (*execute_model)(const DNNModel *model, DNNData *outputs, uint32_t nb_output); // Frees memory allocated for model. void (*free_model)(DNNModel **model); } DNNModule; diff --git a/libavfilter/vf_sr.c b/libavfilter/vf_sr.c index 53bd8ea..b4d4165 100644 --- a/libavfilter/vf_sr.c +++ b/libavfilter/vf_sr.c @@ -117,18 +117,19 @@ static int config_props(AVFilterLink *inlink) AVFilterLink *outlink = context->outputs[0]; DNNReturnType result; int sws_src_h, sws_src_w, sws_dst_h, sws_dst_w; + const char *model_output_name = "y"; sr_context->input.width = inlink->w * sr_context->scale_factor; sr_context->input.height = inlink->h * sr_context->scale_factor; sr_context->input.channels = 1; - result = (sr_context->model->set_input_output)(sr_context->model->model, &sr_context->input, "x", "y"); + result = (sr_context->model->set_input_output)(sr_context->model->model, &sr_context->input, "x", &model_output_name, 1); if (result != DNN_SUCCESS){ av_log(context, AV_LOG_ERROR, "could not set input and output for the model\n"); return AVERROR(EIO); } - result = (sr_context->dnn_module->execute_model)(sr_context->model, &sr_context->output); + result = (sr_context->dnn_module->execute_model)(sr_context->model, &sr_context->output, 1); if (result != DNN_SUCCESS){ av_log(context, AV_LOG_ERROR, "failed to execute loaded model\n"); return AVERROR(EIO); @@ -137,12 +138,12 @@ static int config_props(AVFilterLink *inlink) if (sr_context->input.height != sr_context->output.height || sr_context->input.width != sr_context->output.width){ sr_context->input.width = inlink->w; sr_context->input.height = inlink->h; - result = (sr_context->model->set_input_output)(sr_context->model->model, &sr_context->input, "x", "y"); + result = (sr_context->model->set_input_output)(sr_context->model->model, &sr_context->input, "x", &model_output_name, 1); if (result != DNN_SUCCESS){ av_log(context, AV_LOG_ERROR, "could not set input and output for the model\n"); return AVERROR(EIO); } - result = (sr_context->dnn_module->execute_model)(sr_context->model, &sr_context->output); + result = (sr_context->dnn_module->execute_model)(sr_context->model, &sr_context->output, 1); if (result != DNN_SUCCESS){ av_log(context, AV_LOG_ERROR, "failed to execute loaded model\n"); return AVERROR(EIO); @@ -259,7 +260,7 @@ static int filter_frame(AVFilterLink *inlink, AVFrame *in) } av_frame_free(&in); - dnn_result = (sr_context->dnn_module->execute_model)(sr_context->model, &sr_context->output); + dnn_result = (sr_context->dnn_module->execute_model)(sr_context->model, &sr_context->output, 1); if (dnn_result != DNN_SUCCESS){ av_log(context, AV_LOG_ERROR, "failed to execute loaded model\n"); return AVERROR(EIO);