From patchwork Mon Sep 14 06:28:34 2020 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: "Guo, Yejun" X-Patchwork-Id: 22371 Return-Path: X-Original-To: patchwork@ffaux-bg.ffmpeg.org Delivered-To: patchwork@ffaux-bg.ffmpeg.org Received: from ffbox0-bg.mplayerhq.hu (ffbox0-bg.ffmpeg.org [79.124.17.100]) by ffaux.localdomain (Postfix) with ESMTP id 8A65144A756 for ; Mon, 14 Sep 2020 09:34:39 +0300 (EEST) Received: from [127.0.1.1] (localhost [127.0.0.1]) by ffbox0-bg.mplayerhq.hu (Postfix) with ESMTP id 6A84968BACD; Mon, 14 Sep 2020 09:34:39 +0300 (EEST) X-Original-To: ffmpeg-devel@ffmpeg.org Delivered-To: ffmpeg-devel@ffmpeg.org Received: from mga05.intel.com (mga05.intel.com [192.55.52.43]) by ffbox0-bg.mplayerhq.hu (Postfix) with ESMTPS id 0FE0968B80A for ; Mon, 14 Sep 2020 09:34:31 +0300 (EEST) IronPort-SDR: T6uav/eGmC619H2xIUj9HvoFtu+Rl2ANYi1HFa5fyfRRpRj1h0MDIyaACTbVAvabWFwQT3yA+4 wWD64H3obXEQ== X-IronPort-AV: E=McAfee;i="6000,8403,9743"; a="243862222" X-IronPort-AV: E=Sophos;i="5.76,425,1592895600"; d="scan'208";a="243862222" X-Amp-Result: SKIPPED(no attachment in message) X-Amp-File-Uploaded: False Received: from fmsmga005.fm.intel.com ([10.253.24.32]) by fmsmga105.fm.intel.com with ESMTP/TLS/ECDHE-RSA-AES256-GCM-SHA384; 13 Sep 2020 23:34:29 -0700 IronPort-SDR: a3E6KYjSdnmABY+NijQC3zDgubLm4bSunoVpxq6pvyAkfy1CjRr5JEtgajfm2FZsuOgpX1dDRy mmJHaO8KPdXA== X-ExtLoop1: 1 X-IronPort-AV: E=Sophos;i="5.76,425,1592895600"; d="scan'208";a="507023687" Received: from yguo18-skl-u1604.sh.intel.com ([10.239.159.53]) by fmsmga005.fm.intel.com with ESMTP; 13 Sep 2020 23:34:28 -0700 From: "Guo, Yejun" To: ffmpeg-devel@ffmpeg.org Date: Mon, 14 Sep 2020 14:28:34 +0800 Message-Id: <20200914062834.22033-1-yejun.guo@intel.com> X-Mailer: git-send-email 2.17.1 Subject: [FFmpeg-devel] [PATCH 1/4] dnn: add userdata for load model parameter X-BeenThere: ffmpeg-devel@ffmpeg.org X-Mailman-Version: 2.1.20 Precedence: list List-Id: FFmpeg development discussions and patches List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , Reply-To: FFmpeg development discussions and patches Cc: yejun.guo@intel.com MIME-Version: 1.0 Errors-To: ffmpeg-devel-bounces@ffmpeg.org Sender: "ffmpeg-devel" the userdata will be used for the interaction between AVFrame and DNNData Signed-off-by: Guo, Yejun --- libavfilter/dnn/dnn_backend_native.c | 3 ++- libavfilter/dnn/dnn_backend_native.h | 2 +- libavfilter/dnn/dnn_backend_openvino.c | 3 ++- libavfilter/dnn/dnn_backend_openvino.h | 2 +- libavfilter/dnn/dnn_backend_tf.c | 5 +++-- libavfilter/dnn/dnn_backend_tf.h | 2 +- libavfilter/dnn_interface.h | 4 +++- libavfilter/vf_derain.c | 2 +- libavfilter/vf_dnn_processing.c | 2 +- libavfilter/vf_sr.c | 2 +- 10 files changed, 16 insertions(+), 11 deletions(-) diff --git a/libavfilter/dnn/dnn_backend_native.c b/libavfilter/dnn/dnn_backend_native.c index a9ecbdc88b..830ec19c80 100644 --- a/libavfilter/dnn/dnn_backend_native.c +++ b/libavfilter/dnn/dnn_backend_native.c @@ -123,7 +123,7 @@ static DNNReturnType set_input_native(void *model, DNNData *input, const char *i // layers_num,layer_type,layer_parameterss,layer_type,layer_parameters... // For CONV layer: activation_function, input_num, output_num, kernel_size, kernel, biases // For DEPTH_TO_SPACE layer: block_size -DNNModel *ff_dnn_load_model_native(const char *model_filename, const char *options) +DNNModel *ff_dnn_load_model_native(const char *model_filename, const char *options, void *userdata) { DNNModel *model = NULL; char header_expected[] = "FFMPEGDNNNATIVE"; @@ -265,6 +265,7 @@ DNNModel *ff_dnn_load_model_native(const char *model_filename, const char *optio model->set_input = &set_input_native; model->get_input = &get_input_native; + model->userdata = userdata; return model; diff --git a/libavfilter/dnn/dnn_backend_native.h b/libavfilter/dnn/dnn_backend_native.h index b1f8f3d6bf..33634118a8 100644 --- a/libavfilter/dnn/dnn_backend_native.h +++ b/libavfilter/dnn/dnn_backend_native.h @@ -125,7 +125,7 @@ typedef struct NativeModel{ int32_t operands_num; } NativeModel; -DNNModel *ff_dnn_load_model_native(const char *model_filename, const char *options); +DNNModel *ff_dnn_load_model_native(const char *model_filename, const char *options, void *userdata); DNNReturnType ff_dnn_execute_model_native(const DNNModel *model, DNNData *outputs, const char **output_names, uint32_t nb_output); diff --git a/libavfilter/dnn/dnn_backend_openvino.c b/libavfilter/dnn/dnn_backend_openvino.c index e5842906d1..01e1a1d4c8 100644 --- a/libavfilter/dnn/dnn_backend_openvino.c +++ b/libavfilter/dnn/dnn_backend_openvino.c @@ -174,7 +174,7 @@ err: return DNN_ERROR; } -DNNModel *ff_dnn_load_model_ov(const char *model_filename, const char *options) +DNNModel *ff_dnn_load_model_ov(const char *model_filename, const char *options, void *userdata) { char *all_dev_names = NULL; DNNModel *model = NULL; @@ -230,6 +230,7 @@ DNNModel *ff_dnn_load_model_ov(const char *model_filename, const char *options) model->set_input = &set_input_ov; model->get_input = &get_input_ov; model->options = options; + model->userdata = userdata; return model; diff --git a/libavfilter/dnn/dnn_backend_openvino.h b/libavfilter/dnn/dnn_backend_openvino.h index b2a86e0125..f69bc5ca0c 100644 --- a/libavfilter/dnn/dnn_backend_openvino.h +++ b/libavfilter/dnn/dnn_backend_openvino.h @@ -29,7 +29,7 @@ #include "../dnn_interface.h" -DNNModel *ff_dnn_load_model_ov(const char *model_filename, const char *options); +DNNModel *ff_dnn_load_model_ov(const char *model_filename, const char *options, void *userdata); DNNReturnType ff_dnn_execute_model_ov(const DNNModel *model, DNNData *outputs, const char **output_names, uint32_t nb_output); diff --git a/libavfilter/dnn/dnn_backend_tf.c b/libavfilter/dnn/dnn_backend_tf.c index 5e7f37bb12..bac7d8c420 100644 --- a/libavfilter/dnn/dnn_backend_tf.c +++ b/libavfilter/dnn/dnn_backend_tf.c @@ -502,7 +502,7 @@ static DNNReturnType load_native_model(TFModel *tf_model, const char *model_file DNNModel *model = NULL; NativeModel *native_model; - model = ff_dnn_load_model_native(model_filename, NULL); + model = ff_dnn_load_model_native(model_filename, NULL, NULL); if (!model){ av_log(ctx, AV_LOG_ERROR, "Failed to load native model\n"); return DNN_ERROR; @@ -586,7 +586,7 @@ static DNNReturnType load_native_model(TFModel *tf_model, const char *model_file return DNN_SUCCESS; } -DNNModel *ff_dnn_load_model_tf(const char *model_filename, const char *options) +DNNModel *ff_dnn_load_model_tf(const char *model_filename, const char *options, void *userdata) { DNNModel *model = NULL; TFModel *tf_model = NULL; @@ -616,6 +616,7 @@ DNNModel *ff_dnn_load_model_tf(const char *model_filename, const char *options) model->set_input = &set_input_tf; model->get_input = &get_input_tf; model->options = options; + model->userdata = userdata; return model; } diff --git a/libavfilter/dnn/dnn_backend_tf.h b/libavfilter/dnn/dnn_backend_tf.h index 6c7b6d2590..1cf5cc9e76 100644 --- a/libavfilter/dnn/dnn_backend_tf.h +++ b/libavfilter/dnn/dnn_backend_tf.h @@ -29,7 +29,7 @@ #include "../dnn_interface.h" -DNNModel *ff_dnn_load_model_tf(const char *model_filename, const char *options); +DNNModel *ff_dnn_load_model_tf(const char *model_filename, const char *options, void *userdata); DNNReturnType ff_dnn_execute_model_tf(const DNNModel *model, DNNData *outputs, const char **output_names, uint32_t nb_output); diff --git a/libavfilter/dnn_interface.h b/libavfilter/dnn_interface.h index 365c9a4729..702c8306e0 100644 --- a/libavfilter/dnn_interface.h +++ b/libavfilter/dnn_interface.h @@ -45,6 +45,8 @@ typedef struct DNNModel{ void *model; // Stores options when the model is executed by the backend const char *options; + // Stores userdata used for the interaction between AVFrame and DNNData + void *userdata; // Gets model input information // Just reuse struct DNNData here, actually the DNNData.data field is not needed. DNNReturnType (*get_input)(void *model, DNNData *input, const char *input_name); @@ -56,7 +58,7 @@ typedef struct DNNModel{ // Stores pointers to functions for loading, executing, freeing DNN models for one of the backends. typedef struct DNNModule{ // Loads model and parameters from given file. Returns NULL if it is not possible. - DNNModel *(*load_model)(const char *model_filename, const char *options); + DNNModel *(*load_model)(const char *model_filename, const char *options, void *userdata); // Executes model with specified input and output. Returns DNN_ERROR otherwise. DNNReturnType (*execute_model)(const DNNModel *model, DNNData *outputs, const char **output_names, uint32_t nb_output); // Frees memory allocated for model. diff --git a/libavfilter/vf_derain.c b/libavfilter/vf_derain.c index ff7d8d6308..c251d55ee7 100644 --- a/libavfilter/vf_derain.c +++ b/libavfilter/vf_derain.c @@ -161,7 +161,7 @@ static av_cold int init(AVFilterContext *ctx) return AVERROR(EINVAL); } - dr_context->model = (dr_context->dnn_module->load_model)(dr_context->model_filename, NULL); + dr_context->model = (dr_context->dnn_module->load_model)(dr_context->model_filename, NULL, NULL); if (!dr_context->model) { av_log(ctx, AV_LOG_ERROR, "could not load DNN model\n"); return AVERROR(EINVAL); diff --git a/libavfilter/vf_dnn_processing.c b/libavfilter/vf_dnn_processing.c index f78800f7c0..f120bf9df4 100644 --- a/libavfilter/vf_dnn_processing.c +++ b/libavfilter/vf_dnn_processing.c @@ -103,7 +103,7 @@ static av_cold int init(AVFilterContext *context) return AVERROR(EINVAL); } - ctx->model = (ctx->dnn_module->load_model)(ctx->model_filename, ctx->backend_options); + ctx->model = (ctx->dnn_module->load_model)(ctx->model_filename, ctx->backend_options, NULL); if (!ctx->model) { av_log(ctx, AV_LOG_ERROR, "could not load DNN model\n"); return AVERROR(EINVAL); diff --git a/libavfilter/vf_sr.c b/libavfilter/vf_sr.c index 37e1107145..445777f0c6 100644 --- a/libavfilter/vf_sr.c +++ b/libavfilter/vf_sr.c @@ -81,7 +81,7 @@ static av_cold int init(AVFilterContext *context) av_log(context, AV_LOG_ERROR, "load_model for network was not specified\n"); return AVERROR(EIO); } - sr_context->model = (sr_context->dnn_module->load_model)(sr_context->model_filename, NULL); + sr_context->model = (sr_context->dnn_module->load_model)(sr_context->model_filename, NULL, NULL); if (!sr_context->model){ av_log(context, AV_LOG_ERROR, "could not load DNN model\n"); return AVERROR(EIO); From patchwork Mon Sep 14 06:28:41 2020 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: "Guo, Yejun" X-Patchwork-Id: 22372 Return-Path: X-Original-To: patchwork@ffaux-bg.ffmpeg.org Delivered-To: patchwork@ffaux-bg.ffmpeg.org Received: from ffbox0-bg.mplayerhq.hu (ffbox0-bg.ffmpeg.org [79.124.17.100]) by ffaux.localdomain (Postfix) with ESMTP id CB72444A756 for ; Mon, 14 Sep 2020 09:34:44 +0300 (EEST) Received: from [127.0.1.1] (localhost [127.0.0.1]) by ffbox0-bg.mplayerhq.hu (Postfix) with ESMTP id B627A68BAF6; Mon, 14 Sep 2020 09:34:44 +0300 (EEST) X-Original-To: ffmpeg-devel@ffmpeg.org Delivered-To: ffmpeg-devel@ffmpeg.org Received: from mga04.intel.com (mga04.intel.com [192.55.52.120]) by ffbox0-bg.mplayerhq.hu (Postfix) with ESMTPS id 489CB68B9A6 for ; Mon, 14 Sep 2020 09:34:38 +0300 (EEST) IronPort-SDR: sE8AshnL8J5+x8tktTq4uytKgvtfFYZg7DEmIMsKnAPA6NL2kQqJILhjHUDYNuuJ2vDx4ivKgY QiufmozTkP8A== X-IronPort-AV: E=McAfee;i="6000,8403,9743"; a="156427165" X-IronPort-AV: E=Sophos;i="5.76,425,1592895600"; d="scan'208";a="156427165" X-Amp-Result: SKIPPED(no attachment in message) X-Amp-File-Uploaded: False Received: from fmsmga004.fm.intel.com ([10.253.24.48]) by fmsmga104.fm.intel.com with ESMTP/TLS/ECDHE-RSA-AES256-GCM-SHA384; 13 Sep 2020 23:34:36 -0700 IronPort-SDR: WuF25+kcJbUGDpiSY4auWObZHHrT6ZXCubriRfS9N7fzq9kACO3ztP/bOix3DizJCQfWsVx4yF /Pw9dQIZCxvA== X-ExtLoop1: 1 X-IronPort-AV: E=Sophos;i="5.76,425,1592895600"; d="scan'208";a="330604122" Received: from yguo18-skl-u1604.sh.intel.com ([10.239.159.53]) by fmsmga004.fm.intel.com with ESMTP; 13 Sep 2020 23:34:34 -0700 From: "Guo, Yejun" To: ffmpeg-devel@ffmpeg.org Date: Mon, 14 Sep 2020 14:28:41 +0800 Message-Id: <20200914062841.22082-1-yejun.guo@intel.com> X-Mailer: git-send-email 2.17.1 Subject: [FFmpeg-devel] [PATCH 2/4] dnn: change dnn interface to replace DNNData* with AVFrame* X-BeenThere: ffmpeg-devel@ffmpeg.org X-Mailman-Version: 2.1.20 Precedence: list List-Id: FFmpeg development discussions and patches List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , Reply-To: FFmpeg development discussions and patches Cc: yejun.guo@intel.com MIME-Version: 1.0 Errors-To: ffmpeg-devel-bounces@ffmpeg.org Sender: "ffmpeg-devel" Currently, every filter needs to provide code to transfer data from AVFrame* to model input (DNNData*), and also from model output (DNNData*) to AVFrame*. Actually, such transfer can be implemented within DNN module, and so filter can focus on its own business logic. DNN module also exports the function pointer pre_proc and post_proc in struct DNNModel, just in case that a filter has its special logic to transfer data between AVFrame* and DNNData*. The default implementation within DNN module is used if the filter does not set pre/post_proc. Signed-off-by: Guo, Yejun --- configure | 2 +- libavfilter/dnn/Makefile | 1 + libavfilter/dnn/dnn_backend_native.c | 53 ++++-- libavfilter/dnn/dnn_backend_native.h | 3 +- libavfilter/dnn/dnn_backend_openvino.c | 71 +++++--- libavfilter/dnn/dnn_backend_openvino.h | 2 +- libavfilter/dnn/dnn_backend_tf.c | 90 ++++++---- libavfilter/dnn/dnn_backend_tf.h | 2 +- libavfilter/dnn/dnn_io_proc.c | 135 ++++++++++++++ libavfilter/dnn/dnn_io_proc.h | 36 ++++ libavfilter/dnn_interface.h | 17 +- libavfilter/vf_derain.c | 59 ++---- libavfilter/vf_dnn_processing.c | 240 +++++-------------------- libavfilter/vf_sr.c | 166 +++++++---------- 14 files changed, 451 insertions(+), 426 deletions(-) create mode 100644 libavfilter/dnn/dnn_io_proc.c create mode 100644 libavfilter/dnn/dnn_io_proc.h diff --git a/configure b/configure index 5d68695192..39fabb4ad5 100755 --- a/configure +++ b/configure @@ -2628,6 +2628,7 @@ cbs_vp9_select="cbs" dct_select="rdft" dirac_parse_select="golomb" dnn_suggest="libtensorflow libopenvino" +dnn_deps="swscale" error_resilience_select="me_cmp" faandct_deps="faan" faandct_select="fdctdsp" @@ -3532,7 +3533,6 @@ derain_filter_select="dnn" deshake_filter_select="pixelutils" deshake_opencl_filter_deps="opencl" dilation_opencl_filter_deps="opencl" -dnn_processing_filter_deps="swscale" dnn_processing_filter_select="dnn" drawtext_filter_deps="libfreetype" drawtext_filter_suggest="libfontconfig libfribidi" diff --git a/libavfilter/dnn/Makefile b/libavfilter/dnn/Makefile index e0957073ee..ee08cc5243 100644 --- a/libavfilter/dnn/Makefile +++ b/libavfilter/dnn/Makefile @@ -1,4 +1,5 @@ OBJS-$(CONFIG_DNN) += dnn/dnn_interface.o +OBJS-$(CONFIG_DNN) += dnn/dnn_io_proc.o OBJS-$(CONFIG_DNN) += dnn/dnn_backend_native.o OBJS-$(CONFIG_DNN) += dnn/dnn_backend_native_layers.o OBJS-$(CONFIG_DNN) += dnn/dnn_backend_native_layer_avgpool.o diff --git a/libavfilter/dnn/dnn_backend_native.c b/libavfilter/dnn/dnn_backend_native.c index 830ec19c80..14e878b6b8 100644 --- a/libavfilter/dnn/dnn_backend_native.c +++ b/libavfilter/dnn/dnn_backend_native.c @@ -27,6 +27,7 @@ #include "libavutil/avassert.h" #include "dnn_backend_native_layer_conv2d.h" #include "dnn_backend_native_layers.h" +#include "dnn_io_proc.h" #define OFFSET(x) offsetof(NativeContext, x) #define FLAGS AV_OPT_FLAG_FILTERING_PARAM @@ -69,11 +70,12 @@ static DNNReturnType get_input_native(void *model, DNNData *input, const char *i return DNN_ERROR; } -static DNNReturnType set_input_native(void *model, DNNData *input, const char *input_name) +static DNNReturnType set_input_native(void *model, AVFrame *frame, const char *input_name) { NativeModel *native_model = (NativeModel *)model; NativeContext *ctx = &native_model->ctx; DnnOperand *oprd = NULL; + DNNData input; if (native_model->layers_num <= 0 || native_model->operands_num <= 0) { av_log(ctx, AV_LOG_ERROR, "No operands or layers in model\n"); @@ -97,10 +99,8 @@ static DNNReturnType set_input_native(void *model, DNNData *input, const char *i return DNN_ERROR; } - oprd->dims[0] = 1; - oprd->dims[1] = input->height; - oprd->dims[2] = input->width; - oprd->dims[3] = input->channels; + oprd->dims[1] = frame->height; + oprd->dims[2] = frame->width; av_freep(&oprd->data); oprd->length = calculate_operand_data_length(oprd); @@ -114,7 +114,16 @@ static DNNReturnType set_input_native(void *model, DNNData *input, const char *i return DNN_ERROR; } - input->data = oprd->data; + input.height = oprd->dims[1]; + input.width = oprd->dims[2]; + input.channels = oprd->dims[3]; + input.data = oprd->data; + input.dt = oprd->data_type; + if (native_model->model->pre_proc != NULL) { + native_model->model->pre_proc(frame, &input, native_model->model->userdata); + } else { + proc_from_frame_to_dnn(frame, &input, ctx); + } return DNN_SUCCESS; } @@ -185,6 +194,7 @@ DNNModel *ff_dnn_load_model_native(const char *model_filename, const char *optio if (av_opt_set_from_string(&native_model->ctx, model->options, NULL, "=", "&") < 0) goto fail; model->model = (void *)native_model; + native_model->model = model; #if !HAVE_PTHREAD_CANCEL if (native_model->ctx.options.conv2d_threads > 1){ @@ -275,11 +285,19 @@ fail: return NULL; } -DNNReturnType ff_dnn_execute_model_native(const DNNModel *model, DNNData *outputs, const char **output_names, uint32_t nb_output) +DNNReturnType ff_dnn_execute_model_native(const DNNModel *model, const char **output_names, uint32_t nb_output, AVFrame *out_frame) { NativeModel *native_model = (NativeModel *)model->model; NativeContext *ctx = &native_model->ctx; int32_t layer; + DNNData output; + + if (nb_output != 1) { + // currently, the filter does not need multiple outputs, + // so we just pending the support until we really need it. + av_log(ctx, AV_LOG_ERROR, "do not support multiple outputs\n"); + return DNN_ERROR; + } if (native_model->layers_num <= 0 || native_model->operands_num <= 0) { av_log(ctx, AV_LOG_ERROR, "No operands or layers in model\n"); @@ -317,11 +335,22 @@ DNNReturnType ff_dnn_execute_model_native(const DNNModel *model, DNNData *output return DNN_ERROR; } - outputs[i].data = oprd->data; - outputs[i].height = oprd->dims[1]; - outputs[i].width = oprd->dims[2]; - outputs[i].channels = oprd->dims[3]; - outputs[i].dt = oprd->data_type; + output.data = oprd->data; + output.height = oprd->dims[1]; + output.width = oprd->dims[2]; + output.channels = oprd->dims[3]; + output.dt = oprd->data_type; + + if (out_frame->width != output.width || out_frame->height != output.height) { + out_frame->width = output.width; + out_frame->height = output.height; + } else { + if (native_model->model->post_proc != NULL) { + native_model->model->post_proc(out_frame, &output, native_model->model->userdata); + } else { + proc_from_dnn_to_frame(out_frame, &output, ctx); + } + } } return DNN_SUCCESS; diff --git a/libavfilter/dnn/dnn_backend_native.h b/libavfilter/dnn/dnn_backend_native.h index 33634118a8..553438bd22 100644 --- a/libavfilter/dnn/dnn_backend_native.h +++ b/libavfilter/dnn/dnn_backend_native.h @@ -119,6 +119,7 @@ typedef struct NativeContext { // Represents simple feed-forward convolutional network. typedef struct NativeModel{ NativeContext ctx; + DNNModel *model; Layer *layers; int32_t layers_num; DnnOperand *operands; @@ -127,7 +128,7 @@ typedef struct NativeModel{ DNNModel *ff_dnn_load_model_native(const char *model_filename, const char *options, void *userdata); -DNNReturnType ff_dnn_execute_model_native(const DNNModel *model, DNNData *outputs, const char **output_names, uint32_t nb_output); +DNNReturnType ff_dnn_execute_model_native(const DNNModel *model, const char **output_names, uint32_t nb_output, AVFrame *out_frame); void ff_dnn_free_model_native(DNNModel **model); diff --git a/libavfilter/dnn/dnn_backend_openvino.c b/libavfilter/dnn/dnn_backend_openvino.c index 01e1a1d4c8..b1bad3f659 100644 --- a/libavfilter/dnn/dnn_backend_openvino.c +++ b/libavfilter/dnn/dnn_backend_openvino.c @@ -24,6 +24,7 @@ */ #include "dnn_backend_openvino.h" +#include "dnn_io_proc.h" #include "libavformat/avio.h" #include "libavutil/avassert.h" #include "libavutil/opt.h" @@ -42,6 +43,7 @@ typedef struct OVContext { typedef struct OVModel{ OVContext ctx; + DNNModel *model; ie_core_t *core; ie_network_t *network; ie_executable_network_t *exe_network; @@ -131,7 +133,7 @@ static DNNReturnType get_input_ov(void *model, DNNData *input, const char *input return DNN_ERROR; } -static DNNReturnType set_input_ov(void *model, DNNData *input, const char *input_name) +static DNNReturnType set_input_ov(void *model, AVFrame *frame, const char *input_name) { OVModel *ov_model = (OVModel *)model; OVContext *ctx = &ov_model->ctx; @@ -139,10 +141,7 @@ static DNNReturnType set_input_ov(void *model, DNNData *input, const char *input dimensions_t dims; precision_e precision; ie_blob_buffer_t blob_buffer; - - status = ie_exec_network_create_infer_request(ov_model->exe_network, &ov_model->infer_request); - if (status != OK) - goto err; + DNNData input; status = ie_infer_request_get_blob(ov_model->infer_request, input_name, &ov_model->input_blob); if (status != OK) @@ -153,23 +152,26 @@ static DNNReturnType set_input_ov(void *model, DNNData *input, const char *input if (status != OK) goto err; - av_assert0(input->channels == dims.dims[1]); - av_assert0(input->height == dims.dims[2]); - av_assert0(input->width == dims.dims[3]); - av_assert0(input->dt == precision_to_datatype(precision)); - status = ie_blob_get_buffer(ov_model->input_blob, &blob_buffer); if (status != OK) goto err; - input->data = blob_buffer.buffer; + + input.height = dims.dims[2]; + input.width = dims.dims[3]; + input.channels = dims.dims[1]; + input.data = blob_buffer.buffer; + input.dt = precision_to_datatype(precision); + if (ov_model->model->pre_proc != NULL) { + ov_model->model->pre_proc(frame, &input, ov_model->model->userdata); + } else { + proc_from_frame_to_dnn(frame, &input, ctx); + } return DNN_SUCCESS; err: if (ov_model->input_blob) ie_blob_free(&ov_model->input_blob); - if (ov_model->infer_request) - ie_infer_request_free(&ov_model->infer_request); av_log(ctx, AV_LOG_ERROR, "Failed to create inference instance or get input data/dims/precision/memory\n"); return DNN_ERROR; } @@ -184,7 +186,7 @@ DNNModel *ff_dnn_load_model_ov(const char *model_filename, const char *options, ie_config_t config = {NULL, NULL, NULL}; ie_available_devices_t a_dev; - model = av_malloc(sizeof(DNNModel)); + model = av_mallocz(sizeof(DNNModel)); if (!model){ return NULL; } @@ -192,6 +194,7 @@ DNNModel *ff_dnn_load_model_ov(const char *model_filename, const char *options, ov_model = av_mallocz(sizeof(OVModel)); if (!ov_model) goto err; + ov_model->model = model; ov_model->ctx.class = &dnn_openvino_class; ctx = &ov_model->ctx; @@ -226,6 +229,10 @@ DNNModel *ff_dnn_load_model_ov(const char *model_filename, const char *options, goto err; } + status = ie_exec_network_create_infer_request(ov_model->exe_network, &ov_model->infer_request); + if (status != OK) + goto err; + model->model = (void *)ov_model; model->set_input = &set_input_ov; model->get_input = &get_input_ov; @@ -238,6 +245,8 @@ err: if (model) av_freep(&model); if (ov_model) { + if (ov_model->infer_request) + ie_infer_request_free(&ov_model->infer_request); if (ov_model->exe_network) ie_exec_network_free(&ov_model->exe_network); if (ov_model->network) @@ -249,7 +258,7 @@ err: return NULL; } -DNNReturnType ff_dnn_execute_model_ov(const DNNModel *model, DNNData *outputs, const char **output_names, uint32_t nb_output) +DNNReturnType ff_dnn_execute_model_ov(const DNNModel *model, const char **output_names, uint32_t nb_output, AVFrame *out_frame) { char *model_output_name = NULL; char *all_output_names = NULL; @@ -258,8 +267,18 @@ DNNReturnType ff_dnn_execute_model_ov(const DNNModel *model, DNNData *outputs, c ie_blob_buffer_t blob_buffer; OVModel *ov_model = (OVModel *)model->model; OVContext *ctx = &ov_model->ctx; - IEStatusCode status = ie_infer_request_infer(ov_model->infer_request); + IEStatusCode status; size_t model_output_count = 0; + DNNData output; + + if (nb_output != 1) { + // currently, the filter does not need multiple outputs, + // so we just pending the support until we really need it. + av_log(ctx, AV_LOG_ERROR, "do not support multiple outputs\n"); + return DNN_ERROR; + } + + status = ie_infer_request_infer(ov_model->infer_request); if (status != OK) { av_log(ctx, AV_LOG_ERROR, "Failed to start synchronous model inference\n"); return DNN_ERROR; @@ -296,11 +315,21 @@ DNNReturnType ff_dnn_execute_model_ov(const DNNModel *model, DNNData *outputs, c return DNN_ERROR; } - outputs[i].channels = dims.dims[1]; - outputs[i].height = dims.dims[2]; - outputs[i].width = dims.dims[3]; - outputs[i].dt = precision_to_datatype(precision); - outputs[i].data = blob_buffer.buffer; + output.channels = dims.dims[1]; + output.height = dims.dims[2]; + output.width = dims.dims[3]; + output.dt = precision_to_datatype(precision); + output.data = blob_buffer.buffer; + if (out_frame->width != output.width || out_frame->height != output.height) { + out_frame->width = output.width; + out_frame->height = output.height; + } else { + if (ov_model->model->post_proc != NULL) { + ov_model->model->post_proc(out_frame, &output, ov_model->model->userdata); + } else { + proc_from_dnn_to_frame(out_frame, &output, ctx); + } + } } return DNN_SUCCESS; diff --git a/libavfilter/dnn/dnn_backend_openvino.h b/libavfilter/dnn/dnn_backend_openvino.h index f69bc5ca0c..efb349cb49 100644 --- a/libavfilter/dnn/dnn_backend_openvino.h +++ b/libavfilter/dnn/dnn_backend_openvino.h @@ -31,7 +31,7 @@ DNNModel *ff_dnn_load_model_ov(const char *model_filename, const char *options, void *userdata); -DNNReturnType ff_dnn_execute_model_ov(const DNNModel *model, DNNData *outputs, const char **output_names, uint32_t nb_output); +DNNReturnType ff_dnn_execute_model_ov(const DNNModel *model, const char **output_names, uint32_t nb_output, AVFrame *out_frame); void ff_dnn_free_model_ov(DNNModel **model); diff --git a/libavfilter/dnn/dnn_backend_tf.c b/libavfilter/dnn/dnn_backend_tf.c index bac7d8c420..c2d8c06931 100644 --- a/libavfilter/dnn/dnn_backend_tf.c +++ b/libavfilter/dnn/dnn_backend_tf.c @@ -31,6 +31,7 @@ #include "libavutil/avassert.h" #include "dnn_backend_native_layer_pad.h" #include "dnn_backend_native_layer_maximum.h" +#include "dnn_io_proc.h" #include @@ -40,13 +41,12 @@ typedef struct TFContext { typedef struct TFModel{ TFContext ctx; + DNNModel *model; TF_Graph *graph; TF_Session *session; TF_Status *status; TF_Output input; TF_Tensor *input_tensor; - TF_Tensor **output_tensors; - uint32_t nb_output; } TFModel; static const AVClass dnn_tensorflow_class = { @@ -152,13 +152,19 @@ static DNNReturnType get_input_tf(void *model, DNNData *input, const char *input return DNN_SUCCESS; } -static DNNReturnType set_input_tf(void *model, DNNData *input, const char *input_name) +static DNNReturnType set_input_tf(void *model, AVFrame *frame, const char *input_name) { TFModel *tf_model = (TFModel *)model; TFContext *ctx = &tf_model->ctx; + DNNData input; TF_SessionOptions *sess_opts; const TF_Operation *init_op = TF_GraphOperationByName(tf_model->graph, "init"); + if (get_input_tf(model, &input, input_name) != DNN_SUCCESS) + return DNN_ERROR; + input.height = frame->height; + input.width = frame->width; + // Input operation tf_model->input.oper = TF_GraphOperationByName(tf_model->graph, input_name); if (!tf_model->input.oper){ @@ -169,12 +175,18 @@ static DNNReturnType set_input_tf(void *model, DNNData *input, const char *input if (tf_model->input_tensor){ TF_DeleteTensor(tf_model->input_tensor); } - tf_model->input_tensor = allocate_input_tensor(input); + tf_model->input_tensor = allocate_input_tensor(&input); if (!tf_model->input_tensor){ av_log(ctx, AV_LOG_ERROR, "Failed to allocate memory for input tensor\n"); return DNN_ERROR; } - input->data = (float *)TF_TensorData(tf_model->input_tensor); + input.data = (float *)TF_TensorData(tf_model->input_tensor); + + if (tf_model->model->pre_proc != NULL) { + tf_model->model->pre_proc(frame, &input, tf_model->model->userdata); + } else { + proc_from_frame_to_dnn(frame, &input, ctx); + } // session if (tf_model->session){ @@ -591,7 +603,7 @@ DNNModel *ff_dnn_load_model_tf(const char *model_filename, const char *options, DNNModel *model = NULL; TFModel *tf_model = NULL; - model = av_malloc(sizeof(DNNModel)); + model = av_mallocz(sizeof(DNNModel)); if (!model){ return NULL; } @@ -602,6 +614,7 @@ DNNModel *ff_dnn_load_model_tf(const char *model_filename, const char *options, return NULL; } tf_model->ctx.class = &dnn_tensorflow_class; + tf_model->model = model; if (load_tf_model(tf_model, model_filename) != DNN_SUCCESS){ if (load_native_model(tf_model, model_filename) != DNN_SUCCESS){ @@ -621,11 +634,20 @@ DNNModel *ff_dnn_load_model_tf(const char *model_filename, const char *options, return model; } -DNNReturnType ff_dnn_execute_model_tf(const DNNModel *model, DNNData *outputs, const char **output_names, uint32_t nb_output) +DNNReturnType ff_dnn_execute_model_tf(const DNNModel *model, const char **output_names, uint32_t nb_output, AVFrame *out_frame) { TF_Output *tf_outputs; TFModel *tf_model = (TFModel *)model->model; TFContext *ctx = &tf_model->ctx; + DNNData output; + TF_Tensor **output_tensors; + + if (nb_output != 1) { + // currently, the filter does not need multiple outputs, + // so we just pending the support until we really need it. + av_log(ctx, AV_LOG_ERROR, "do not support multiple outputs\n"); + return DNN_ERROR; + } tf_outputs = av_malloc_array(nb_output, sizeof(*tf_outputs)); if (tf_outputs == NULL) { @@ -633,18 +655,8 @@ DNNReturnType ff_dnn_execute_model_tf(const DNNModel *model, DNNData *outputs, c return DNN_ERROR; } - if (tf_model->output_tensors) { - for (uint32_t i = 0; i < tf_model->nb_output; ++i) { - if (tf_model->output_tensors[i]) { - TF_DeleteTensor(tf_model->output_tensors[i]); - tf_model->output_tensors[i] = NULL; - } - } - } - av_freep(&tf_model->output_tensors); - tf_model->nb_output = nb_output; - tf_model->output_tensors = av_mallocz_array(nb_output, sizeof(*tf_model->output_tensors)); - if (!tf_model->output_tensors) { + output_tensors = av_mallocz_array(nb_output, sizeof(*output_tensors)); + if (!output_tensors) { av_freep(&tf_outputs); av_log(ctx, AV_LOG_ERROR, "Failed to allocate memory for output tensor\n"); \ return DNN_ERROR; @@ -654,6 +666,7 @@ DNNReturnType ff_dnn_execute_model_tf(const DNNModel *model, DNNData *outputs, c tf_outputs[i].oper = TF_GraphOperationByName(tf_model->graph, output_names[i]); if (!tf_outputs[i].oper) { av_freep(&tf_outputs); + av_freep(&output_tensors); av_log(ctx, AV_LOG_ERROR, "Could not find output \"%s\" in model\n", output_names[i]); \ return DNN_ERROR; } @@ -662,22 +675,40 @@ DNNReturnType ff_dnn_execute_model_tf(const DNNModel *model, DNNData *outputs, c TF_SessionRun(tf_model->session, NULL, &tf_model->input, &tf_model->input_tensor, 1, - tf_outputs, tf_model->output_tensors, nb_output, + tf_outputs, output_tensors, nb_output, NULL, 0, NULL, tf_model->status); if (TF_GetCode(tf_model->status) != TF_OK) { av_freep(&tf_outputs); + av_freep(&output_tensors); av_log(ctx, AV_LOG_ERROR, "Failed to run session when executing model\n"); return DNN_ERROR; } for (uint32_t i = 0; i < nb_output; ++i) { - outputs[i].height = TF_Dim(tf_model->output_tensors[i], 1); - outputs[i].width = TF_Dim(tf_model->output_tensors[i], 2); - outputs[i].channels = TF_Dim(tf_model->output_tensors[i], 3); - outputs[i].data = TF_TensorData(tf_model->output_tensors[i]); - outputs[i].dt = TF_TensorType(tf_model->output_tensors[i]); + output.height = TF_Dim(output_tensors[i], 1); + output.width = TF_Dim(output_tensors[i], 2); + output.channels = TF_Dim(output_tensors[i], 3); + output.data = TF_TensorData(output_tensors[i]); + output.dt = TF_TensorType(output_tensors[i]); + + if (out_frame->width != output.width || out_frame->height != output.height) { + out_frame->width = output.width; + out_frame->height = output.height; + } else { + if (tf_model->model->post_proc != NULL) { + tf_model->model->post_proc(out_frame, &output, tf_model->model->userdata); + } else { + proc_from_dnn_to_frame(out_frame, &output, ctx); + } + } } + for (uint32_t i = 0; i < nb_output; ++i) { + if (output_tensors[i]) { + TF_DeleteTensor(output_tensors[i]); + } + } + av_freep(&output_tensors); av_freep(&tf_outputs); return DNN_SUCCESS; } @@ -701,15 +732,6 @@ void ff_dnn_free_model_tf(DNNModel **model) if (tf_model->input_tensor){ TF_DeleteTensor(tf_model->input_tensor); } - if (tf_model->output_tensors) { - for (uint32_t i = 0; i < tf_model->nb_output; ++i) { - if (tf_model->output_tensors[i]) { - TF_DeleteTensor(tf_model->output_tensors[i]); - tf_model->output_tensors[i] = NULL; - } - } - } - av_freep(&tf_model->output_tensors); av_freep(&tf_model); av_freep(model); } diff --git a/libavfilter/dnn/dnn_backend_tf.h b/libavfilter/dnn/dnn_backend_tf.h index 1cf5cc9e76..f379e83d8d 100644 --- a/libavfilter/dnn/dnn_backend_tf.h +++ b/libavfilter/dnn/dnn_backend_tf.h @@ -31,7 +31,7 @@ DNNModel *ff_dnn_load_model_tf(const char *model_filename, const char *options, void *userdata); -DNNReturnType ff_dnn_execute_model_tf(const DNNModel *model, DNNData *outputs, const char **output_names, uint32_t nb_output); +DNNReturnType ff_dnn_execute_model_tf(const DNNModel *model, const char **output_names, uint32_t nb_output, AVFrame *out_frame); void ff_dnn_free_model_tf(DNNModel **model); diff --git a/libavfilter/dnn/dnn_io_proc.c b/libavfilter/dnn/dnn_io_proc.c new file mode 100644 index 0000000000..8ce1959b42 --- /dev/null +++ b/libavfilter/dnn/dnn_io_proc.c @@ -0,0 +1,135 @@ +/* + * Copyright (c) 2020 + * + * This file is part of FFmpeg. + * + * FFmpeg is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * FFmpeg is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with FFmpeg; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#include "dnn_io_proc.h" +#include "libavutil/imgutils.h" +#include "libswscale/swscale.h" + +DNNReturnType proc_from_dnn_to_frame(AVFrame *frame, DNNData *output, void *log_ctx) +{ + struct SwsContext *sws_ctx; + int bytewidth = av_image_get_linesize(frame->format, frame->width, 0); + if (output->dt != DNN_FLOAT) { + av_log(log_ctx, AV_LOG_ERROR, "do not support data type rather than DNN_FLOAT\n"); + return DNN_ERROR; + } + + switch (frame->format) { + case AV_PIX_FMT_RGB24: + case AV_PIX_FMT_BGR24: + sws_ctx = sws_getContext(frame->width * 3, + frame->height, + AV_PIX_FMT_GRAYF32, + frame->width * 3, + frame->height, + AV_PIX_FMT_GRAY8, + 0, NULL, NULL, NULL); + sws_scale(sws_ctx, (const uint8_t *[4]){(const uint8_t *)output->data, 0, 0, 0}, + (const int[4]){frame->width * 3 * sizeof(float), 0, 0, 0}, 0, frame->height, + (uint8_t * const*)frame->data, frame->linesize); + sws_freeContext(sws_ctx); + return DNN_SUCCESS; + case AV_PIX_FMT_GRAYF32: + av_image_copy_plane(frame->data[0], frame->linesize[0], + output->data, bytewidth, + bytewidth, frame->height); + return DNN_SUCCESS; + case AV_PIX_FMT_YUV420P: + case AV_PIX_FMT_YUV422P: + case AV_PIX_FMT_YUV444P: + case AV_PIX_FMT_YUV410P: + case AV_PIX_FMT_YUV411P: + case AV_PIX_FMT_GRAY8: + sws_ctx = sws_getContext(frame->width, + frame->height, + AV_PIX_FMT_GRAYF32, + frame->width, + frame->height, + AV_PIX_FMT_GRAY8, + 0, NULL, NULL, NULL); + sws_scale(sws_ctx, (const uint8_t *[4]){(const uint8_t *)output->data, 0, 0, 0}, + (const int[4]){frame->width * sizeof(float), 0, 0, 0}, 0, frame->height, + (uint8_t * const*)frame->data, frame->linesize); + sws_freeContext(sws_ctx); + return DNN_SUCCESS; + default: + av_log(log_ctx, AV_LOG_ERROR, "do not support frame format %d\n", frame->format); + return DNN_ERROR; + } + + return DNN_SUCCESS; +} + +DNNReturnType proc_from_frame_to_dnn(AVFrame *frame, DNNData *input, void *log_ctx) +{ + struct SwsContext *sws_ctx; + int bytewidth = av_image_get_linesize(frame->format, frame->width, 0); + if (input->dt != DNN_FLOAT) { + av_log(log_ctx, AV_LOG_ERROR, "do not support data type rather than DNN_FLOAT\n"); + return DNN_ERROR; + } + + switch (frame->format) { + case AV_PIX_FMT_RGB24: + case AV_PIX_FMT_BGR24: + sws_ctx = sws_getContext(frame->width * 3, + frame->height, + AV_PIX_FMT_GRAY8, + frame->width * 3, + frame->height, + AV_PIX_FMT_GRAYF32, + 0, NULL, NULL, NULL); + sws_scale(sws_ctx, (const uint8_t **)frame->data, + frame->linesize, 0, frame->height, + (uint8_t * const*)(&input->data), + (const int [4]){frame->width * 3 * sizeof(float), 0, 0, 0}); + sws_freeContext(sws_ctx); + break; + case AV_PIX_FMT_GRAYF32: + av_image_copy_plane(input->data, bytewidth, + frame->data[0], frame->linesize[0], + bytewidth, frame->height); + break; + case AV_PIX_FMT_YUV420P: + case AV_PIX_FMT_YUV422P: + case AV_PIX_FMT_YUV444P: + case AV_PIX_FMT_YUV410P: + case AV_PIX_FMT_YUV411P: + case AV_PIX_FMT_GRAY8: + sws_ctx = sws_getContext(frame->width, + frame->height, + AV_PIX_FMT_GRAY8, + frame->width, + frame->height, + AV_PIX_FMT_GRAYF32, + 0, NULL, NULL, NULL); + sws_scale(sws_ctx, (const uint8_t **)frame->data, + frame->linesize, 0, frame->height, + (uint8_t * const*)(&input->data), + (const int [4]){frame->width * sizeof(float), 0, 0, 0}); + sws_freeContext(sws_ctx); + break; + default: + av_log(log_ctx, AV_LOG_ERROR, "do not support frame format %d\n", frame->format); + return DNN_ERROR; + } + + return DNN_SUCCESS; +} diff --git a/libavfilter/dnn/dnn_io_proc.h b/libavfilter/dnn/dnn_io_proc.h new file mode 100644 index 0000000000..4c7dc7c1a2 --- /dev/null +++ b/libavfilter/dnn/dnn_io_proc.h @@ -0,0 +1,36 @@ +/* + * Copyright (c) 2020 + * + * This file is part of FFmpeg. + * + * FFmpeg is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * FFmpeg is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with FFmpeg; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +/** + * @file + * DNN input&output process between AVFrame and DNNData. + */ + + +#ifndef AVFILTER_DNN_DNN_IO_PROC_H +#define AVFILTER_DNN_DNN_IO_PROC_H + +#include "../dnn_interface.h" +#include "libavutil/frame.h" + +DNNReturnType proc_from_frame_to_dnn(AVFrame *frame, DNNData *input, void *log_ctx); +DNNReturnType proc_from_dnn_to_frame(AVFrame *frame, DNNData *output, void *log_ctx); + +#endif diff --git a/libavfilter/dnn_interface.h b/libavfilter/dnn_interface.h index 702c8306e0..6debc50607 100644 --- a/libavfilter/dnn_interface.h +++ b/libavfilter/dnn_interface.h @@ -27,6 +27,7 @@ #define AVFILTER_DNN_INTERFACE_H #include +#include "libavutil/frame.h" typedef enum {DNN_SUCCESS, DNN_ERROR} DNNReturnType; @@ -50,17 +51,23 @@ typedef struct DNNModel{ // Gets model input information // Just reuse struct DNNData here, actually the DNNData.data field is not needed. DNNReturnType (*get_input)(void *model, DNNData *input, const char *input_name); - // Sets model input and output. - // Should be called at least once before model execution. - DNNReturnType (*set_input)(void *model, DNNData *input, const char *input_name); + // Sets model input. + // Should be called every time before model execution. + DNNReturnType (*set_input)(void *model, AVFrame *frame, const char *input_name); + // set the pre process to transfer data from AVFrame to DNNData + // the default implementation within DNN is used if it is not provided by the filter + int (*pre_proc)(AVFrame *frame_in, DNNData *model_input, void *user_data); + // set the post process to transfer data from DNNData to AVFrame + // the default implementation within DNN is used if it is not provided by the filter + int (*post_proc)(AVFrame *frame_out, DNNData *model_output, void *user_data); } DNNModel; // Stores pointers to functions for loading, executing, freeing DNN models for one of the backends. typedef struct DNNModule{ // Loads model and parameters from given file. Returns NULL if it is not possible. DNNModel *(*load_model)(const char *model_filename, const char *options, void *userdata); - // Executes model with specified input and output. Returns DNN_ERROR otherwise. - DNNReturnType (*execute_model)(const DNNModel *model, DNNData *outputs, const char **output_names, uint32_t nb_output); + // Executes model with specified output. Returns DNN_ERROR otherwise. + DNNReturnType (*execute_model)(const DNNModel *model, const char **output_names, uint32_t nb_output, AVFrame *out_frame); // Frees memory allocated for model. void (*free_model)(DNNModel **model); } DNNModule; diff --git a/libavfilter/vf_derain.c b/libavfilter/vf_derain.c index c251d55ee7..a59cd6e941 100644 --- a/libavfilter/vf_derain.c +++ b/libavfilter/vf_derain.c @@ -39,11 +39,8 @@ typedef struct DRContext { DNNBackendType backend_type; DNNModule *dnn_module; DNNModel *model; - DNNData input; - DNNData output; } DRContext; -#define CLIP(x, min, max) (x < min ? min : (x > max ? max : x)) #define OFFSET(x) offsetof(DRContext, x) #define FLAGS AV_OPT_FLAG_FILTERING_PARAM | AV_OPT_FLAG_VIDEO_PARAM static const AVOption derain_options[] = { @@ -74,25 +71,6 @@ static int query_formats(AVFilterContext *ctx) return ff_set_common_formats(ctx, formats); } -static int config_inputs(AVFilterLink *inlink) -{ - AVFilterContext *ctx = inlink->dst; - DRContext *dr_context = ctx->priv; - DNNReturnType result; - - dr_context->input.width = inlink->w; - dr_context->input.height = inlink->h; - dr_context->input.channels = 3; - - result = (dr_context->model->set_input)(dr_context->model->model, &dr_context->input, "x"); - if (result != DNN_SUCCESS) { - av_log(ctx, AV_LOG_ERROR, "could not set input and output for the model\n"); - return AVERROR(EIO); - } - - return 0; -} - static int filter_frame(AVFilterLink *inlink, AVFrame *in) { AVFilterContext *ctx = inlink->dst; @@ -100,43 +78,30 @@ static int filter_frame(AVFilterLink *inlink, AVFrame *in) DRContext *dr_context = ctx->priv; DNNReturnType dnn_result; const char *model_output_name = "y"; + AVFrame *out; - AVFrame *out = ff_get_video_buffer(outlink, outlink->w, outlink->h); + dnn_result = (dr_context->model->set_input)(dr_context->model->model, in, "x"); + if (dnn_result != DNN_SUCCESS) { + av_log(ctx, AV_LOG_ERROR, "could not set input for the model\n"); + av_frame_free(&in); + return AVERROR(EIO); + } + + out = ff_get_video_buffer(outlink, outlink->w, outlink->h); if (!out) { av_log(ctx, AV_LOG_ERROR, "could not allocate memory for output frame\n"); av_frame_free(&in); return AVERROR(ENOMEM); } - av_frame_copy_props(out, in); - for (int i = 0; i < in->height; i++){ - for(int j = 0; j < in->width * 3; j++){ - int k = i * in->linesize[0] + j; - int t = i * in->width * 3 + j; - ((float *)dr_context->input.data)[t] = in->data[0][k] / 255.0; - } - } - - dnn_result = (dr_context->dnn_module->execute_model)(dr_context->model, &dr_context->output, &model_output_name, 1); + dnn_result = (dr_context->dnn_module->execute_model)(dr_context->model, &model_output_name, 1, out); if (dnn_result != DNN_SUCCESS){ av_log(ctx, AV_LOG_ERROR, "failed to execute model\n"); + av_frame_free(&in); return AVERROR(EIO); } - out->height = dr_context->output.height; - out->width = dr_context->output.width; - outlink->h = dr_context->output.height; - outlink->w = dr_context->output.width; - - for (int i = 0; i < out->height; i++){ - for(int j = 0; j < out->width * 3; j++){ - int k = i * out->linesize[0] + j; - int t = i * out->width * 3 + j; - out->data[0][k] = CLIP((int)((((float *)dr_context->output.data)[t]) * 255), 0, 255); - } - } - av_frame_free(&in); return ff_filter_frame(outlink, out); @@ -146,7 +111,6 @@ static av_cold int init(AVFilterContext *ctx) { DRContext *dr_context = ctx->priv; - dr_context->input.dt = DNN_FLOAT; dr_context->dnn_module = ff_get_dnn_module(dr_context->backend_type); if (!dr_context->dnn_module) { av_log(ctx, AV_LOG_ERROR, "could not create DNN module for requested backend\n"); @@ -184,7 +148,6 @@ static const AVFilterPad derain_inputs[] = { { .name = "default", .type = AVMEDIA_TYPE_VIDEO, - .config_props = config_inputs, .filter_frame = filter_frame, }, { NULL } diff --git a/libavfilter/vf_dnn_processing.c b/libavfilter/vf_dnn_processing.c index f120bf9df4..d7462bc828 100644 --- a/libavfilter/vf_dnn_processing.c +++ b/libavfilter/vf_dnn_processing.c @@ -46,12 +46,6 @@ typedef struct DnnProcessingContext { DNNModule *dnn_module; DNNModel *model; - // input & output of the model at execution time - DNNData input; - DNNData output; - - struct SwsContext *sws_gray8_to_grayf32; - struct SwsContext *sws_grayf32_to_gray8; struct SwsContext *sws_uv_scale; int sws_uv_height; } DnnProcessingContext; @@ -103,7 +97,7 @@ static av_cold int init(AVFilterContext *context) return AVERROR(EINVAL); } - ctx->model = (ctx->dnn_module->load_model)(ctx->model_filename, ctx->backend_options, NULL); + ctx->model = (ctx->dnn_module->load_model)(ctx->model_filename, ctx->backend_options, ctx); if (!ctx->model) { av_log(ctx, AV_LOG_ERROR, "could not load DNN model\n"); return AVERROR(EINVAL); @@ -148,6 +142,10 @@ static int check_modelinput_inlink(const DNNData *model_input, const AVFilterLin model_input->width, inlink->w); return AVERROR(EIO); } + if (model_input->dt != DNN_FLOAT) { + av_log(ctx, AV_LOG_ERROR, "only support dnn models with input data type as float32.\n"); + return AVERROR(EIO); + } switch (fmt) { case AV_PIX_FMT_RGB24: @@ -156,20 +154,6 @@ static int check_modelinput_inlink(const DNNData *model_input, const AVFilterLin LOG_FORMAT_CHANNEL_MISMATCH(); return AVERROR(EIO); } - if (model_input->dt != DNN_FLOAT && model_input->dt != DNN_UINT8) { - av_log(ctx, AV_LOG_ERROR, "only support dnn models with input data type as float32 and uint8.\n"); - return AVERROR(EIO); - } - return 0; - case AV_PIX_FMT_GRAY8: - if (model_input->channels != 1) { - LOG_FORMAT_CHANNEL_MISMATCH(); - return AVERROR(EIO); - } - if (model_input->dt != DNN_UINT8) { - av_log(ctx, AV_LOG_ERROR, "only support dnn models with input data type uint8.\n"); - return AVERROR(EIO); - } return 0; case AV_PIX_FMT_GRAYF32: case AV_PIX_FMT_YUV420P: @@ -181,10 +165,6 @@ static int check_modelinput_inlink(const DNNData *model_input, const AVFilterLin LOG_FORMAT_CHANNEL_MISMATCH(); return AVERROR(EIO); } - if (model_input->dt != DNN_FLOAT) { - av_log(ctx, AV_LOG_ERROR, "only support dnn models with input data type float32.\n"); - return AVERROR(EIO); - } return 0; default: av_log(ctx, AV_LOG_ERROR, "%s not supported.\n", av_get_pix_fmt_name(fmt)); @@ -213,74 +193,24 @@ static int config_input(AVFilterLink *inlink) return check; } - ctx->input.width = inlink->w; - ctx->input.height = inlink->h; - ctx->input.channels = model_input.channels; - ctx->input.dt = model_input.dt; - - result = (ctx->model->set_input)(ctx->model->model, - &ctx->input, ctx->model_inputname); - if (result != DNN_SUCCESS) { - av_log(ctx, AV_LOG_ERROR, "could not set input and output for the model\n"); - return AVERROR(EIO); - } - return 0; } -static int prepare_sws_context(AVFilterLink *outlink) +static av_always_inline int isPlanarYUV(enum AVPixelFormat pix_fmt) +{ + const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(pix_fmt); + av_assert0(desc); + return !(desc->flags & AV_PIX_FMT_FLAG_RGB) && desc->nb_components == 3; +} + +static int prepare_uv_scale(AVFilterLink *outlink) { AVFilterContext *context = outlink->src; DnnProcessingContext *ctx = context->priv; AVFilterLink *inlink = context->inputs[0]; enum AVPixelFormat fmt = inlink->format; - DNNDataType input_dt = ctx->input.dt; - DNNDataType output_dt = ctx->output.dt; - - switch (fmt) { - case AV_PIX_FMT_RGB24: - case AV_PIX_FMT_BGR24: - if (input_dt == DNN_FLOAT) { - ctx->sws_gray8_to_grayf32 = sws_getContext(inlink->w * 3, - inlink->h, - AV_PIX_FMT_GRAY8, - inlink->w * 3, - inlink->h, - AV_PIX_FMT_GRAYF32, - 0, NULL, NULL, NULL); - } - if (output_dt == DNN_FLOAT) { - ctx->sws_grayf32_to_gray8 = sws_getContext(outlink->w * 3, - outlink->h, - AV_PIX_FMT_GRAYF32, - outlink->w * 3, - outlink->h, - AV_PIX_FMT_GRAY8, - 0, NULL, NULL, NULL); - } - return 0; - case AV_PIX_FMT_YUV420P: - case AV_PIX_FMT_YUV422P: - case AV_PIX_FMT_YUV444P: - case AV_PIX_FMT_YUV410P: - case AV_PIX_FMT_YUV411P: - av_assert0(input_dt == DNN_FLOAT); - av_assert0(output_dt == DNN_FLOAT); - ctx->sws_gray8_to_grayf32 = sws_getContext(inlink->w, - inlink->h, - AV_PIX_FMT_GRAY8, - inlink->w, - inlink->h, - AV_PIX_FMT_GRAYF32, - 0, NULL, NULL, NULL); - ctx->sws_grayf32_to_gray8 = sws_getContext(outlink->w, - outlink->h, - AV_PIX_FMT_GRAYF32, - outlink->w, - outlink->h, - AV_PIX_FMT_GRAY8, - 0, NULL, NULL, NULL); + if (isPlanarYUV(fmt)) { if (inlink->w != outlink->w || inlink->h != outlink->h) { const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(fmt); int sws_src_h = AV_CEIL_RSHIFT(inlink->h, desc->log2_chroma_h); @@ -292,10 +222,6 @@ static int prepare_sws_context(AVFilterLink *outlink) SWS_BICUBIC, NULL, NULL, NULL); ctx->sws_uv_height = sws_src_h; } - return 0; - default: - //do nothing - break; } return 0; @@ -306,120 +232,34 @@ static int config_output(AVFilterLink *outlink) AVFilterContext *context = outlink->src; DnnProcessingContext *ctx = context->priv; DNNReturnType result; + AVFilterLink *inlink = context->inputs[0]; + AVFrame *out = NULL; - // have a try run in case that the dnn model resize the frame - result = (ctx->dnn_module->execute_model)(ctx->model, &ctx->output, (const char **)&ctx->model_outputname, 1); - if (result != DNN_SUCCESS){ - av_log(ctx, AV_LOG_ERROR, "failed to execute model\n"); + AVFrame *fake_in = ff_get_video_buffer(inlink, inlink->w, inlink->h); + result = (ctx->model->set_input)(ctx->model->model, fake_in, ctx->model_inputname); + if (result != DNN_SUCCESS) { + av_log(ctx, AV_LOG_ERROR, "could not set input for the model\n"); return AVERROR(EIO); } - outlink->w = ctx->output.width; - outlink->h = ctx->output.height; - - prepare_sws_context(outlink); - - return 0; -} - -static int copy_from_frame_to_dnn(DnnProcessingContext *ctx, const AVFrame *frame) -{ - int bytewidth = av_image_get_linesize(frame->format, frame->width, 0); - DNNData *dnn_input = &ctx->input; - - switch (frame->format) { - case AV_PIX_FMT_RGB24: - case AV_PIX_FMT_BGR24: - if (dnn_input->dt == DNN_FLOAT) { - sws_scale(ctx->sws_gray8_to_grayf32, (const uint8_t **)frame->data, frame->linesize, - 0, frame->height, (uint8_t * const*)(&dnn_input->data), - (const int [4]){frame->width * 3 * sizeof(float), 0, 0, 0}); - } else { - av_assert0(dnn_input->dt == DNN_UINT8); - av_image_copy_plane(dnn_input->data, bytewidth, - frame->data[0], frame->linesize[0], - bytewidth, frame->height); - } - return 0; - case AV_PIX_FMT_GRAY8: - case AV_PIX_FMT_GRAYF32: - av_image_copy_plane(dnn_input->data, bytewidth, - frame->data[0], frame->linesize[0], - bytewidth, frame->height); - return 0; - case AV_PIX_FMT_YUV420P: - case AV_PIX_FMT_YUV422P: - case AV_PIX_FMT_YUV444P: - case AV_PIX_FMT_YUV410P: - case AV_PIX_FMT_YUV411P: - sws_scale(ctx->sws_gray8_to_grayf32, (const uint8_t **)frame->data, frame->linesize, - 0, frame->height, (uint8_t * const*)(&dnn_input->data), - (const int [4]){frame->width * sizeof(float), 0, 0, 0}); - return 0; - default: + // have a try run in case that the dnn model resize the frame + out = ff_get_video_buffer(inlink, inlink->w, inlink->h); + result = (ctx->dnn_module->execute_model)(ctx->model, (const char **)&ctx->model_outputname, 1, out); + if (result != DNN_SUCCESS){ + av_log(ctx, AV_LOG_ERROR, "failed to execute model\n"); return AVERROR(EIO); } - return 0; -} + outlink->w = out->width; + outlink->h = out->height; -static int copy_from_dnn_to_frame(DnnProcessingContext *ctx, AVFrame *frame) -{ - int bytewidth = av_image_get_linesize(frame->format, frame->width, 0); - DNNData *dnn_output = &ctx->output; - - switch (frame->format) { - case AV_PIX_FMT_RGB24: - case AV_PIX_FMT_BGR24: - if (dnn_output->dt == DNN_FLOAT) { - sws_scale(ctx->sws_grayf32_to_gray8, (const uint8_t *[4]){(const uint8_t *)dnn_output->data, 0, 0, 0}, - (const int[4]){frame->width * 3 * sizeof(float), 0, 0, 0}, - 0, frame->height, (uint8_t * const*)frame->data, frame->linesize); - - } else { - av_assert0(dnn_output->dt == DNN_UINT8); - av_image_copy_plane(frame->data[0], frame->linesize[0], - dnn_output->data, bytewidth, - bytewidth, frame->height); - } - return 0; - case AV_PIX_FMT_GRAY8: - // it is possible that data type of dnn output is float32, - // need to add support for such case when needed. - av_assert0(dnn_output->dt == DNN_UINT8); - av_image_copy_plane(frame->data[0], frame->linesize[0], - dnn_output->data, bytewidth, - bytewidth, frame->height); - return 0; - case AV_PIX_FMT_GRAYF32: - av_assert0(dnn_output->dt == DNN_FLOAT); - av_image_copy_plane(frame->data[0], frame->linesize[0], - dnn_output->data, bytewidth, - bytewidth, frame->height); - return 0; - case AV_PIX_FMT_YUV420P: - case AV_PIX_FMT_YUV422P: - case AV_PIX_FMT_YUV444P: - case AV_PIX_FMT_YUV410P: - case AV_PIX_FMT_YUV411P: - sws_scale(ctx->sws_grayf32_to_gray8, (const uint8_t *[4]){(const uint8_t *)dnn_output->data, 0, 0, 0}, - (const int[4]){frame->width * sizeof(float), 0, 0, 0}, - 0, frame->height, (uint8_t * const*)frame->data, frame->linesize); - return 0; - default: - return AVERROR(EIO); - } + av_frame_free(&fake_in); + av_frame_free(&out); + prepare_uv_scale(outlink); return 0; } -static av_always_inline int isPlanarYUV(enum AVPixelFormat pix_fmt) -{ - const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(pix_fmt); - av_assert0(desc); - return !(desc->flags & AV_PIX_FMT_FLAG_RGB) && desc->nb_components == 3; -} - static int copy_uv_planes(DnnProcessingContext *ctx, AVFrame *out, const AVFrame *in) { const AVPixFmtDescriptor *desc; @@ -453,11 +293,9 @@ static int filter_frame(AVFilterLink *inlink, AVFrame *in) DNNReturnType dnn_result; AVFrame *out; - copy_from_frame_to_dnn(ctx, in); - - dnn_result = (ctx->dnn_module->execute_model)(ctx->model, &ctx->output, (const char **)&ctx->model_outputname, 1); - if (dnn_result != DNN_SUCCESS){ - av_log(ctx, AV_LOG_ERROR, "failed to execute model\n"); + dnn_result = (ctx->model->set_input)(ctx->model->model, in, ctx->model_inputname); + if (dnn_result != DNN_SUCCESS) { + av_log(ctx, AV_LOG_ERROR, "could not set input for the model\n"); av_frame_free(&in); return AVERROR(EIO); } @@ -467,9 +305,15 @@ static int filter_frame(AVFilterLink *inlink, AVFrame *in) av_frame_free(&in); return AVERROR(ENOMEM); } - av_frame_copy_props(out, in); - copy_from_dnn_to_frame(ctx, out); + + dnn_result = (ctx->dnn_module->execute_model)(ctx->model, (const char **)&ctx->model_outputname, 1, out); + if (dnn_result != DNN_SUCCESS){ + av_log(ctx, AV_LOG_ERROR, "failed to execute model\n"); + av_frame_free(&in); + av_frame_free(&out); + return AVERROR(EIO); + } if (isPlanarYUV(in->format)) copy_uv_planes(ctx, out, in); @@ -482,8 +326,6 @@ static av_cold void uninit(AVFilterContext *ctx) { DnnProcessingContext *context = ctx->priv; - sws_freeContext(context->sws_gray8_to_grayf32); - sws_freeContext(context->sws_grayf32_to_gray8); sws_freeContext(context->sws_uv_scale); if (context->dnn_module) diff --git a/libavfilter/vf_sr.c b/libavfilter/vf_sr.c index 445777f0c6..2eda8c3219 100644 --- a/libavfilter/vf_sr.c +++ b/libavfilter/vf_sr.c @@ -41,11 +41,10 @@ typedef struct SRContext { DNNBackendType backend_type; DNNModule *dnn_module; DNNModel *model; - DNNData input; - DNNData output; int scale_factor; - struct SwsContext *sws_contexts[3]; - int sws_slice_h, sws_input_linesize, sws_output_linesize; + struct SwsContext *sws_uv_scale; + int sws_uv_height; + struct SwsContext *sws_pre_scale; } SRContext; #define OFFSET(x) offsetof(SRContext, x) @@ -87,11 +86,6 @@ static av_cold int init(AVFilterContext *context) return AVERROR(EIO); } - sr_context->input.dt = DNN_FLOAT; - sr_context->sws_contexts[0] = NULL; - sr_context->sws_contexts[1] = NULL; - sr_context->sws_contexts[2] = NULL; - return 0; } @@ -111,95 +105,63 @@ static int query_formats(AVFilterContext *context) return ff_set_common_formats(context, formats_list); } -static int config_props(AVFilterLink *inlink) +static int config_output(AVFilterLink *outlink) { - AVFilterContext *context = inlink->dst; - SRContext *sr_context = context->priv; - AVFilterLink *outlink = context->outputs[0]; + AVFilterContext *context = outlink->src; + SRContext *ctx = context->priv; DNNReturnType result; - int sws_src_h, sws_src_w, sws_dst_h, sws_dst_w; + AVFilterLink *inlink = context->inputs[0]; + AVFrame *out = NULL; const char *model_output_name = "y"; - sr_context->input.width = inlink->w * sr_context->scale_factor; - sr_context->input.height = inlink->h * sr_context->scale_factor; - sr_context->input.channels = 1; - - result = (sr_context->model->set_input)(sr_context->model->model, &sr_context->input, "x"); - if (result != DNN_SUCCESS){ - av_log(context, AV_LOG_ERROR, "could not set input and output for the model\n"); + AVFrame *fake_in = ff_get_video_buffer(inlink, inlink->w, inlink->h); + result = (ctx->model->set_input)(ctx->model->model, fake_in, "x"); + if (result != DNN_SUCCESS) { + av_log(context, AV_LOG_ERROR, "could not set input for the model\n"); return AVERROR(EIO); } - result = (sr_context->dnn_module->execute_model)(sr_context->model, &sr_context->output, &model_output_name, 1); + // have a try run in case that the dnn model resize the frame + out = ff_get_video_buffer(inlink, inlink->w, inlink->h); + result = (ctx->dnn_module->execute_model)(ctx->model, (const char **)&model_output_name, 1, out); if (result != DNN_SUCCESS){ av_log(context, AV_LOG_ERROR, "failed to execute loaded model\n"); return AVERROR(EIO); } - if (sr_context->input.height != sr_context->output.height || sr_context->input.width != sr_context->output.width){ - sr_context->input.width = inlink->w; - sr_context->input.height = inlink->h; - result = (sr_context->model->set_input)(sr_context->model->model, &sr_context->input, "x"); - if (result != DNN_SUCCESS){ - av_log(context, AV_LOG_ERROR, "could not set input and output for the model\n"); - return AVERROR(EIO); - } - result = (sr_context->dnn_module->execute_model)(sr_context->model, &sr_context->output, &model_output_name, 1); - if (result != DNN_SUCCESS){ - av_log(context, AV_LOG_ERROR, "failed to execute loaded model\n"); - return AVERROR(EIO); - } - sr_context->scale_factor = 0; - } - outlink->h = sr_context->output.height; - outlink->w = sr_context->output.width; - sr_context->sws_contexts[1] = sws_getContext(sr_context->input.width, sr_context->input.height, AV_PIX_FMT_GRAY8, - sr_context->input.width, sr_context->input.height, AV_PIX_FMT_GRAYF32, - 0, NULL, NULL, NULL); - sr_context->sws_input_linesize = sr_context->input.width << 2; - sr_context->sws_contexts[2] = sws_getContext(sr_context->output.width, sr_context->output.height, AV_PIX_FMT_GRAYF32, - sr_context->output.width, sr_context->output.height, AV_PIX_FMT_GRAY8, - 0, NULL, NULL, NULL); - sr_context->sws_output_linesize = sr_context->output.width << 2; - if (!sr_context->sws_contexts[1] || !sr_context->sws_contexts[2]){ - av_log(context, AV_LOG_ERROR, "could not create SwsContext for conversions\n"); - return AVERROR(ENOMEM); - } - if (sr_context->scale_factor){ - sr_context->sws_contexts[0] = sws_getContext(inlink->w, inlink->h, inlink->format, - outlink->w, outlink->h, outlink->format, - SWS_BICUBIC, NULL, NULL, NULL); - if (!sr_context->sws_contexts[0]){ - av_log(context, AV_LOG_ERROR, "could not create SwsContext for scaling\n"); - return AVERROR(ENOMEM); - } - sr_context->sws_slice_h = inlink->h; - } else { + if (fake_in->width != out->width || fake_in->height != out->height) { + //espcn + outlink->w = out->width; + outlink->h = out->height; if (inlink->format != AV_PIX_FMT_GRAY8){ const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(inlink->format); - sws_src_h = AV_CEIL_RSHIFT(sr_context->input.height, desc->log2_chroma_h); - sws_src_w = AV_CEIL_RSHIFT(sr_context->input.width, desc->log2_chroma_w); - sws_dst_h = AV_CEIL_RSHIFT(sr_context->output.height, desc->log2_chroma_h); - sws_dst_w = AV_CEIL_RSHIFT(sr_context->output.width, desc->log2_chroma_w); - - sr_context->sws_contexts[0] = sws_getContext(sws_src_w, sws_src_h, AV_PIX_FMT_GRAY8, - sws_dst_w, sws_dst_h, AV_PIX_FMT_GRAY8, - SWS_BICUBIC, NULL, NULL, NULL); - if (!sr_context->sws_contexts[0]){ - av_log(context, AV_LOG_ERROR, "could not create SwsContext for scaling\n"); - return AVERROR(ENOMEM); - } - sr_context->sws_slice_h = sws_src_h; + int sws_src_h = AV_CEIL_RSHIFT(inlink->h, desc->log2_chroma_h); + int sws_src_w = AV_CEIL_RSHIFT(inlink->w, desc->log2_chroma_w); + int sws_dst_h = AV_CEIL_RSHIFT(outlink->h, desc->log2_chroma_h); + int sws_dst_w = AV_CEIL_RSHIFT(outlink->w, desc->log2_chroma_w); + ctx->sws_uv_scale = sws_getContext(sws_src_w, sws_src_h, AV_PIX_FMT_GRAY8, + sws_dst_w, sws_dst_h, AV_PIX_FMT_GRAY8, + SWS_BICUBIC, NULL, NULL, NULL); + ctx->sws_uv_height = sws_src_h; } + } else { + //srcnn + outlink->w = out->width * ctx->scale_factor; + outlink->h = out->height * ctx->scale_factor; + ctx->sws_pre_scale = sws_getContext(inlink->w, inlink->h, inlink->format, + outlink->w, outlink->h, outlink->format, + SWS_BICUBIC, NULL, NULL, NULL); } + av_frame_free(&fake_in); + av_frame_free(&out); return 0; } static int filter_frame(AVFilterLink *inlink, AVFrame *in) { AVFilterContext *context = inlink->dst; - SRContext *sr_context = context->priv; + SRContext *ctx = context->priv; AVFilterLink *outlink = context->outputs[0]; AVFrame *out = ff_get_video_buffer(outlink, outlink->w, outlink->h); DNNReturnType dnn_result; @@ -211,45 +173,44 @@ static int filter_frame(AVFilterLink *inlink, AVFrame *in) return AVERROR(ENOMEM); } av_frame_copy_props(out, in); - out->height = sr_context->output.height; - out->width = sr_context->output.width; - if (sr_context->scale_factor){ - sws_scale(sr_context->sws_contexts[0], (const uint8_t **)in->data, in->linesize, - 0, sr_context->sws_slice_h, out->data, out->linesize); - sws_scale(sr_context->sws_contexts[1], (const uint8_t **)out->data, out->linesize, - 0, out->height, (uint8_t * const*)(&sr_context->input.data), - (const int [4]){sr_context->sws_input_linesize, 0, 0, 0}); + if (ctx->sws_pre_scale) { + sws_scale(ctx->sws_pre_scale, + (const uint8_t **)in->data, in->linesize, 0, in->height, + out->data, out->linesize); + dnn_result = (ctx->model->set_input)(ctx->model->model, out, "x"); } else { - if (sr_context->sws_contexts[0]){ - sws_scale(sr_context->sws_contexts[0], (const uint8_t **)(in->data + 1), in->linesize + 1, - 0, sr_context->sws_slice_h, out->data + 1, out->linesize + 1); - sws_scale(sr_context->sws_contexts[0], (const uint8_t **)(in->data + 2), in->linesize + 2, - 0, sr_context->sws_slice_h, out->data + 2, out->linesize + 2); - } + dnn_result = (ctx->model->set_input)(ctx->model->model, in, "x"); + } - sws_scale(sr_context->sws_contexts[1], (const uint8_t **)in->data, in->linesize, - 0, in->height, (uint8_t * const*)(&sr_context->input.data), - (const int [4]){sr_context->sws_input_linesize, 0, 0, 0}); + if (dnn_result != DNN_SUCCESS) { + av_frame_free(&in); + av_frame_free(&out); + av_log(context, AV_LOG_ERROR, "could not set input for the model\n"); + return AVERROR(EIO); } - av_frame_free(&in); - dnn_result = (sr_context->dnn_module->execute_model)(sr_context->model, &sr_context->output, &model_output_name, 1); + dnn_result = (ctx->dnn_module->execute_model)(ctx->model, (const char **)&model_output_name, 1, out); if (dnn_result != DNN_SUCCESS){ - av_log(context, AV_LOG_ERROR, "failed to execute loaded model\n"); + av_log(ctx, AV_LOG_ERROR, "failed to execute loaded model\n"); + av_frame_free(&in); + av_frame_free(&out); return AVERROR(EIO); } - sws_scale(sr_context->sws_contexts[2], (const uint8_t *[4]){(const uint8_t *)sr_context->output.data, 0, 0, 0}, - (const int[4]){sr_context->sws_output_linesize, 0, 0, 0}, - 0, out->height, (uint8_t * const*)out->data, out->linesize); + if (ctx->sws_uv_scale) { + sws_scale(ctx->sws_uv_scale, (const uint8_t **)(in->data + 1), in->linesize + 1, + 0, ctx->sws_uv_height, out->data + 1, out->linesize + 1); + sws_scale(ctx->sws_uv_scale, (const uint8_t **)(in->data + 2), in->linesize + 2, + 0, ctx->sws_uv_height, out->data + 2, out->linesize + 2); + } + av_frame_free(&in); return ff_filter_frame(outlink, out); } static av_cold void uninit(AVFilterContext *context) { - int i; SRContext *sr_context = context->priv; if (sr_context->dnn_module){ @@ -257,16 +218,14 @@ static av_cold void uninit(AVFilterContext *context) av_freep(&sr_context->dnn_module); } - for (i = 0; i < 3; ++i){ - sws_freeContext(sr_context->sws_contexts[i]); - } + sws_freeContext(sr_context->sws_uv_scale); + sws_freeContext(sr_context->sws_pre_scale); } static const AVFilterPad sr_inputs[] = { { .name = "default", .type = AVMEDIA_TYPE_VIDEO, - .config_props = config_props, .filter_frame = filter_frame, }, { NULL } @@ -275,6 +234,7 @@ static const AVFilterPad sr_inputs[] = { static const AVFilterPad sr_outputs[] = { { .name = "default", + .config_props = config_output, .type = AVMEDIA_TYPE_VIDEO, }, { NULL } From patchwork Mon Sep 14 06:28:50 2020 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: "Guo, Yejun" X-Patchwork-Id: 22373 Return-Path: X-Original-To: patchwork@ffaux-bg.ffmpeg.org Delivered-To: patchwork@ffaux-bg.ffmpeg.org Received: from ffbox0-bg.mplayerhq.hu (ffbox0-bg.ffmpeg.org [79.124.17.100]) by ffaux.localdomain (Postfix) with ESMTP id 9FBEE44A756 for ; Mon, 14 Sep 2020 09:34:54 +0300 (EEST) Received: from [127.0.1.1] (localhost [127.0.0.1]) by ffbox0-bg.mplayerhq.hu (Postfix) with ESMTP id 8400A68BB3C; Mon, 14 Sep 2020 09:34:54 +0300 (EEST) X-Original-To: ffmpeg-devel@ffmpeg.org Delivered-To: ffmpeg-devel@ffmpeg.org Received: from mga18.intel.com (mga18.intel.com [134.134.136.126]) by ffbox0-bg.mplayerhq.hu (Postfix) with ESMTPS id 810B568BADE for ; Mon, 14 Sep 2020 09:34:47 +0300 (EEST) IronPort-SDR: 4J7yB/YkVeLarqC6r7+Ck4v8PzDVzivvj5hsPIJTDQo/FcCG783JMoNjwxpjUi0n/XeSxXCGYt Xk2Uu62H+LaQ== X-IronPort-AV: E=McAfee;i="6000,8403,9743"; a="146770637" X-IronPort-AV: E=Sophos;i="5.76,425,1592895600"; d="scan'208";a="146770637" X-Amp-Result: SKIPPED(no attachment in message) X-Amp-File-Uploaded: False Received: from fmsmga004.fm.intel.com ([10.253.24.48]) by orsmga106.jf.intel.com with ESMTP/TLS/ECDHE-RSA-AES256-GCM-SHA384; 13 Sep 2020 23:34:44 -0700 IronPort-SDR: vbUX3zTqLY2w1NpNH2Q+dYQUkVrjhATT5B9sZvUDzzvIQjIjLAhCVCSL29772cTQOtQxzjXPOT Z/wqexUgn7OQ== X-ExtLoop1: 1 X-IronPort-AV: E=Sophos;i="5.76,425,1592895600"; d="scan'208";a="330604156" Received: from yguo18-skl-u1604.sh.intel.com ([10.239.159.53]) by fmsmga004.fm.intel.com with ESMTP; 13 Sep 2020 23:34:43 -0700 From: "Guo, Yejun" To: ffmpeg-devel@ffmpeg.org Date: Mon, 14 Sep 2020 14:28:50 +0800 Message-Id: <20200914062850.22130-1-yejun.guo@intel.com> X-Mailer: git-send-email 2.17.1 Subject: [FFmpeg-devel] [PATCH 3/4] dnn: put DNNModel.set_input and DNNModule.execute_model together X-BeenThere: ffmpeg-devel@ffmpeg.org X-Mailman-Version: 2.1.20 Precedence: list List-Id: FFmpeg development discussions and patches List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , Reply-To: FFmpeg development discussions and patches Cc: yejun.guo@intel.com MIME-Version: 1.0 Errors-To: ffmpeg-devel-bounces@ffmpeg.org Sender: "ffmpeg-devel" suppose we have a detect and classify filter in the future, the detect filter generates some bounding boxes (BBox) as AVFrame sidedata, and the classify filter executes DNN model for each BBox. For each BBox, we need to crop the AVFrame, copy data to DNN model input and do the model execution. So we have to save the in_frame at DNNModel.set_input and use it at DNNModule.execute_model, such saving is not feasible when we support async execute_model. This patch sets the in_frame as execution_model parameter, and so all the information are put together within the same function for each inference. It also makes easy to support BBox async inference. Signed-off-by: Guo, Yejun --- libavfilter/dnn/dnn_backend_native.c | 119 +++++++++++-------------- libavfilter/dnn/dnn_backend_native.h | 3 +- libavfilter/dnn/dnn_backend_openvino.c | 85 ++++++++---------- libavfilter/dnn/dnn_backend_openvino.h | 3 +- libavfilter/dnn/dnn_backend_tf.c | 118 ++++++++++-------------- libavfilter/dnn/dnn_backend_tf.h | 3 +- libavfilter/dnn_interface.h | 8 +- libavfilter/vf_derain.c | 9 +- libavfilter/vf_dnn_processing.c | 18 +--- libavfilter/vf_sr.c | 25 ++---- 10 files changed, 156 insertions(+), 235 deletions(-) diff --git a/libavfilter/dnn/dnn_backend_native.c b/libavfilter/dnn/dnn_backend_native.c index 14e878b6b8..dc47c9b542 100644 --- a/libavfilter/dnn/dnn_backend_native.c +++ b/libavfilter/dnn/dnn_backend_native.c @@ -70,64 +70,6 @@ static DNNReturnType get_input_native(void *model, DNNData *input, const char *i return DNN_ERROR; } -static DNNReturnType set_input_native(void *model, AVFrame *frame, const char *input_name) -{ - NativeModel *native_model = (NativeModel *)model; - NativeContext *ctx = &native_model->ctx; - DnnOperand *oprd = NULL; - DNNData input; - - if (native_model->layers_num <= 0 || native_model->operands_num <= 0) { - av_log(ctx, AV_LOG_ERROR, "No operands or layers in model\n"); - return DNN_ERROR; - } - - /* inputs */ - for (int i = 0; i < native_model->operands_num; ++i) { - oprd = &native_model->operands[i]; - if (strcmp(oprd->name, input_name) == 0) { - if (oprd->type != DOT_INPUT) { - av_log(ctx, AV_LOG_ERROR, "Found \"%s\" in model, but it is not input node\n", input_name); - return DNN_ERROR; - } - break; - } - oprd = NULL; - } - if (!oprd) { - av_log(ctx, AV_LOG_ERROR, "Could not find \"%s\" in model\n", input_name); - return DNN_ERROR; - } - - oprd->dims[1] = frame->height; - oprd->dims[2] = frame->width; - - av_freep(&oprd->data); - oprd->length = calculate_operand_data_length(oprd); - if (oprd->length <= 0) { - av_log(ctx, AV_LOG_ERROR, "The input data length overflow\n"); - return DNN_ERROR; - } - oprd->data = av_malloc(oprd->length); - if (!oprd->data) { - av_log(ctx, AV_LOG_ERROR, "Failed to malloc memory for input data\n"); - return DNN_ERROR; - } - - input.height = oprd->dims[1]; - input.width = oprd->dims[2]; - input.channels = oprd->dims[3]; - input.data = oprd->data; - input.dt = oprd->data_type; - if (native_model->model->pre_proc != NULL) { - native_model->model->pre_proc(frame, &input, native_model->model->userdata); - } else { - proc_from_frame_to_dnn(frame, &input, ctx); - } - - return DNN_SUCCESS; -} - // Loads model and its parameters that are stored in a binary file with following structure: // layers_num,layer_type,layer_parameterss,layer_type,layer_parameters... // For CONV layer: activation_function, input_num, output_num, kernel_size, kernel, biases @@ -273,7 +215,6 @@ DNNModel *ff_dnn_load_model_native(const char *model_filename, const char *optio return NULL; } - model->set_input = &set_input_native; model->get_input = &get_input_native; model->userdata = userdata; @@ -285,26 +226,66 @@ fail: return NULL; } -DNNReturnType ff_dnn_execute_model_native(const DNNModel *model, const char **output_names, uint32_t nb_output, AVFrame *out_frame) +DNNReturnType ff_dnn_execute_model_native(const DNNModel *model, const char *input_name, AVFrame *in_frame, + const char **output_names, uint32_t nb_output, AVFrame *out_frame) { NativeModel *native_model = (NativeModel *)model->model; NativeContext *ctx = &native_model->ctx; int32_t layer; - DNNData output; + DNNData input, output; + DnnOperand *oprd = NULL; - if (nb_output != 1) { - // currently, the filter does not need multiple outputs, - // so we just pending the support until we really need it. - av_log(ctx, AV_LOG_ERROR, "do not support multiple outputs\n"); + if (native_model->layers_num <= 0 || native_model->operands_num <= 0) { + av_log(ctx, AV_LOG_ERROR, "No operands or layers in model\n"); return DNN_ERROR; } - if (native_model->layers_num <= 0 || native_model->operands_num <= 0) { - av_log(ctx, AV_LOG_ERROR, "No operands or layers in model\n"); + for (int i = 0; i < native_model->operands_num; ++i) { + oprd = &native_model->operands[i]; + if (strcmp(oprd->name, input_name) == 0) { + if (oprd->type != DOT_INPUT) { + av_log(ctx, AV_LOG_ERROR, "Found \"%s\" in model, but it is not input node\n", input_name); + return DNN_ERROR; + } + break; + } + oprd = NULL; + } + if (!oprd) { + av_log(ctx, AV_LOG_ERROR, "Could not find \"%s\" in model\n", input_name); + return DNN_ERROR; + } + + oprd->dims[1] = in_frame->height; + oprd->dims[2] = in_frame->width; + + av_freep(&oprd->data); + oprd->length = calculate_operand_data_length(oprd); + if (oprd->length <= 0) { + av_log(ctx, AV_LOG_ERROR, "The input data length overflow\n"); return DNN_ERROR; } - if (!native_model->operands[0].data) { - av_log(ctx, AV_LOG_ERROR, "Empty model input data\n"); + oprd->data = av_malloc(oprd->length); + if (!oprd->data) { + av_log(ctx, AV_LOG_ERROR, "Failed to malloc memory for input data\n"); + return DNN_ERROR; + } + + input.height = oprd->dims[1]; + input.width = oprd->dims[2]; + input.channels = oprd->dims[3]; + input.data = oprd->data; + input.dt = oprd->data_type; + if (native_model->model->pre_proc != NULL) { + native_model->model->pre_proc(in_frame, &input, native_model->model->userdata); + } else { + proc_from_frame_to_dnn(in_frame, &input, ctx); + } + + if (nb_output != 1) { + // currently, the filter does not need multiple outputs, + // so we just pending the support until we really need it. + av_log(ctx, AV_LOG_ERROR, "do not support multiple outputs\n"); return DNN_ERROR; } diff --git a/libavfilter/dnn/dnn_backend_native.h b/libavfilter/dnn/dnn_backend_native.h index 553438bd22..2f8d73fcf6 100644 --- a/libavfilter/dnn/dnn_backend_native.h +++ b/libavfilter/dnn/dnn_backend_native.h @@ -128,7 +128,8 @@ typedef struct NativeModel{ DNNModel *ff_dnn_load_model_native(const char *model_filename, const char *options, void *userdata); -DNNReturnType ff_dnn_execute_model_native(const DNNModel *model, const char **output_names, uint32_t nb_output, AVFrame *out_frame); +DNNReturnType ff_dnn_execute_model_native(const DNNModel *model, const char *input_name, AVFrame *in_frame, + const char **output_names, uint32_t nb_output, AVFrame *out_frame); void ff_dnn_free_model_native(DNNModel **model); diff --git a/libavfilter/dnn/dnn_backend_openvino.c b/libavfilter/dnn/dnn_backend_openvino.c index b1bad3f659..0dba1c1adc 100644 --- a/libavfilter/dnn/dnn_backend_openvino.c +++ b/libavfilter/dnn/dnn_backend_openvino.c @@ -48,7 +48,6 @@ typedef struct OVModel{ ie_network_t *network; ie_executable_network_t *exe_network; ie_infer_request_t *infer_request; - ie_blob_t *input_blob; } OVModel; #define APPEND_STRING(generated_string, iterate_string) \ @@ -133,49 +132,6 @@ static DNNReturnType get_input_ov(void *model, DNNData *input, const char *input return DNN_ERROR; } -static DNNReturnType set_input_ov(void *model, AVFrame *frame, const char *input_name) -{ - OVModel *ov_model = (OVModel *)model; - OVContext *ctx = &ov_model->ctx; - IEStatusCode status; - dimensions_t dims; - precision_e precision; - ie_blob_buffer_t blob_buffer; - DNNData input; - - status = ie_infer_request_get_blob(ov_model->infer_request, input_name, &ov_model->input_blob); - if (status != OK) - goto err; - - status |= ie_blob_get_dims(ov_model->input_blob, &dims); - status |= ie_blob_get_precision(ov_model->input_blob, &precision); - if (status != OK) - goto err; - - status = ie_blob_get_buffer(ov_model->input_blob, &blob_buffer); - if (status != OK) - goto err; - - input.height = dims.dims[2]; - input.width = dims.dims[3]; - input.channels = dims.dims[1]; - input.data = blob_buffer.buffer; - input.dt = precision_to_datatype(precision); - if (ov_model->model->pre_proc != NULL) { - ov_model->model->pre_proc(frame, &input, ov_model->model->userdata); - } else { - proc_from_frame_to_dnn(frame, &input, ctx); - } - - return DNN_SUCCESS; - -err: - if (ov_model->input_blob) - ie_blob_free(&ov_model->input_blob); - av_log(ctx, AV_LOG_ERROR, "Failed to create inference instance or get input data/dims/precision/memory\n"); - return DNN_ERROR; -} - DNNModel *ff_dnn_load_model_ov(const char *model_filename, const char *options, void *userdata) { char *all_dev_names = NULL; @@ -234,7 +190,6 @@ DNNModel *ff_dnn_load_model_ov(const char *model_filename, const char *options, goto err; model->model = (void *)ov_model; - model->set_input = &set_input_ov; model->get_input = &get_input_ov; model->options = options; model->userdata = userdata; @@ -258,7 +213,8 @@ err: return NULL; } -DNNReturnType ff_dnn_execute_model_ov(const DNNModel *model, const char **output_names, uint32_t nb_output, AVFrame *out_frame) +DNNReturnType ff_dnn_execute_model_ov(const DNNModel *model, const char *input_name, AVFrame *in_frame, + const char **output_names, uint32_t nb_output, AVFrame *out_frame) { char *model_output_name = NULL; char *all_output_names = NULL; @@ -269,7 +225,39 @@ DNNReturnType ff_dnn_execute_model_ov(const DNNModel *model, const char **output OVContext *ctx = &ov_model->ctx; IEStatusCode status; size_t model_output_count = 0; - DNNData output; + DNNData input, output; + ie_blob_t *input_blob = NULL; + + status = ie_infer_request_get_blob(ov_model->infer_request, input_name, &input_blob); + if (status != OK) { + av_log(ctx, AV_LOG_ERROR, "Failed to get input blob\n"); + return DNN_ERROR; + } + + status |= ie_blob_get_dims(input_blob, &dims); + status |= ie_blob_get_precision(input_blob, &precision); + if (status != OK) { + av_log(ctx, AV_LOG_ERROR, "Failed to get input blob dims/precision\n"); + return DNN_ERROR; + } + + status = ie_blob_get_buffer(input_blob, &blob_buffer); + if (status != OK) { + av_log(ctx, AV_LOG_ERROR, "Failed to get input blob buffer\n"); + return DNN_ERROR; + } + + input.height = dims.dims[2]; + input.width = dims.dims[3]; + input.channels = dims.dims[1]; + input.data = blob_buffer.buffer; + input.dt = precision_to_datatype(precision); + if (ov_model->model->pre_proc != NULL) { + ov_model->model->pre_proc(in_frame, &input, ov_model->model->userdata); + } else { + proc_from_frame_to_dnn(in_frame, &input, ctx); + } + ie_blob_free(&input_blob); if (nb_output != 1) { // currently, the filter does not need multiple outputs, @@ -330,6 +318,7 @@ DNNReturnType ff_dnn_execute_model_ov(const DNNModel *model, const char **output proc_from_dnn_to_frame(out_frame, &output, ctx); } } + ie_blob_free(&output_blob); } return DNN_SUCCESS; @@ -339,8 +328,6 @@ void ff_dnn_free_model_ov(DNNModel **model) { if (*model){ OVModel *ov_model = (OVModel *)(*model)->model; - if (ov_model->input_blob) - ie_blob_free(&ov_model->input_blob); if (ov_model->infer_request) ie_infer_request_free(&ov_model->infer_request); if (ov_model->exe_network) diff --git a/libavfilter/dnn/dnn_backend_openvino.h b/libavfilter/dnn/dnn_backend_openvino.h index efb349cb49..3f8f01da60 100644 --- a/libavfilter/dnn/dnn_backend_openvino.h +++ b/libavfilter/dnn/dnn_backend_openvino.h @@ -31,7 +31,8 @@ DNNModel *ff_dnn_load_model_ov(const char *model_filename, const char *options, void *userdata); -DNNReturnType ff_dnn_execute_model_ov(const DNNModel *model, const char **output_names, uint32_t nb_output, AVFrame *out_frame); +DNNReturnType ff_dnn_execute_model_ov(const DNNModel *model, const char *input_name, AVFrame *in_frame, + const char **output_names, uint32_t nb_output, AVFrame *out_frame); void ff_dnn_free_model_ov(DNNModel **model); diff --git a/libavfilter/dnn/dnn_backend_tf.c b/libavfilter/dnn/dnn_backend_tf.c index c2d8c06931..8467f8a459 100644 --- a/libavfilter/dnn/dnn_backend_tf.c +++ b/libavfilter/dnn/dnn_backend_tf.c @@ -45,8 +45,6 @@ typedef struct TFModel{ TF_Graph *graph; TF_Session *session; TF_Status *status; - TF_Output input; - TF_Tensor *input_tensor; } TFModel; static const AVClass dnn_tensorflow_class = { @@ -152,48 +150,33 @@ static DNNReturnType get_input_tf(void *model, DNNData *input, const char *input return DNN_SUCCESS; } -static DNNReturnType set_input_tf(void *model, AVFrame *frame, const char *input_name) +static DNNReturnType load_tf_model(TFModel *tf_model, const char *model_filename) { - TFModel *tf_model = (TFModel *)model; TFContext *ctx = &tf_model->ctx; - DNNData input; + TF_Buffer *graph_def; + TF_ImportGraphDefOptions *graph_opts; TF_SessionOptions *sess_opts; - const TF_Operation *init_op = TF_GraphOperationByName(tf_model->graph, "init"); - - if (get_input_tf(model, &input, input_name) != DNN_SUCCESS) - return DNN_ERROR; - input.height = frame->height; - input.width = frame->width; + const TF_Operation *init_op; - // Input operation - tf_model->input.oper = TF_GraphOperationByName(tf_model->graph, input_name); - if (!tf_model->input.oper){ - av_log(ctx, AV_LOG_ERROR, "Could not find \"%s\" in model\n", input_name); + graph_def = read_graph(model_filename); + if (!graph_def){ + av_log(ctx, AV_LOG_ERROR, "Failed to read model \"%s\" graph\n", model_filename); return DNN_ERROR; } - tf_model->input.index = 0; - if (tf_model->input_tensor){ - TF_DeleteTensor(tf_model->input_tensor); - } - tf_model->input_tensor = allocate_input_tensor(&input); - if (!tf_model->input_tensor){ - av_log(ctx, AV_LOG_ERROR, "Failed to allocate memory for input tensor\n"); + tf_model->graph = TF_NewGraph(); + tf_model->status = TF_NewStatus(); + graph_opts = TF_NewImportGraphDefOptions(); + TF_GraphImportGraphDef(tf_model->graph, graph_def, graph_opts, tf_model->status); + TF_DeleteImportGraphDefOptions(graph_opts); + TF_DeleteBuffer(graph_def); + if (TF_GetCode(tf_model->status) != TF_OK){ + TF_DeleteGraph(tf_model->graph); + TF_DeleteStatus(tf_model->status); + av_log(ctx, AV_LOG_ERROR, "Failed to import serialized graph to model graph\n"); return DNN_ERROR; } - input.data = (float *)TF_TensorData(tf_model->input_tensor); - - if (tf_model->model->pre_proc != NULL) { - tf_model->model->pre_proc(frame, &input, tf_model->model->userdata); - } else { - proc_from_frame_to_dnn(frame, &input, ctx); - } - - // session - if (tf_model->session){ - TF_CloseSession(tf_model->session, tf_model->status); - TF_DeleteSession(tf_model->session, tf_model->status); - } + init_op = TF_GraphOperationByName(tf_model->graph, "init"); sess_opts = TF_NewSessionOptions(); tf_model->session = TF_NewSession(tf_model->graph, sess_opts, tf_model->status); TF_DeleteSessionOptions(sess_opts); @@ -219,33 +202,6 @@ static DNNReturnType set_input_tf(void *model, AVFrame *frame, const char *input return DNN_SUCCESS; } -static DNNReturnType load_tf_model(TFModel *tf_model, const char *model_filename) -{ - TFContext *ctx = &tf_model->ctx; - TF_Buffer *graph_def; - TF_ImportGraphDefOptions *graph_opts; - - graph_def = read_graph(model_filename); - if (!graph_def){ - av_log(ctx, AV_LOG_ERROR, "Failed to read model \"%s\" graph\n", model_filename); - return DNN_ERROR; - } - tf_model->graph = TF_NewGraph(); - tf_model->status = TF_NewStatus(); - graph_opts = TF_NewImportGraphDefOptions(); - TF_GraphImportGraphDef(tf_model->graph, graph_def, graph_opts, tf_model->status); - TF_DeleteImportGraphDefOptions(graph_opts); - TF_DeleteBuffer(graph_def); - if (TF_GetCode(tf_model->status) != TF_OK){ - TF_DeleteGraph(tf_model->graph); - TF_DeleteStatus(tf_model->status); - av_log(ctx, AV_LOG_ERROR, "Failed to import serialized graph to model graph\n"); - return DNN_ERROR; - } - - return DNN_SUCCESS; -} - #define NAME_BUFFER_SIZE 256 static DNNReturnType add_conv_layer(TFModel *tf_model, TF_Operation *transpose_op, TF_Operation **cur_op, @@ -626,7 +582,6 @@ DNNModel *ff_dnn_load_model_tf(const char *model_filename, const char *options, } model->model = (void *)tf_model; - model->set_input = &set_input_tf; model->get_input = &get_input_tf; model->options = options; model->userdata = userdata; @@ -634,13 +589,40 @@ DNNModel *ff_dnn_load_model_tf(const char *model_filename, const char *options, return model; } -DNNReturnType ff_dnn_execute_model_tf(const DNNModel *model, const char **output_names, uint32_t nb_output, AVFrame *out_frame) +DNNReturnType ff_dnn_execute_model_tf(const DNNModel *model, const char *input_name, AVFrame *in_frame, + const char **output_names, uint32_t nb_output, AVFrame *out_frame) { TF_Output *tf_outputs; TFModel *tf_model = (TFModel *)model->model; TFContext *ctx = &tf_model->ctx; - DNNData output; + DNNData input, output; TF_Tensor **output_tensors; + TF_Output tf_input; + TF_Tensor *input_tensor; + + if (get_input_tf(tf_model, &input, input_name) != DNN_SUCCESS) + return DNN_ERROR; + input.height = in_frame->height; + input.width = in_frame->width; + + tf_input.oper = TF_GraphOperationByName(tf_model->graph, input_name); + if (!tf_input.oper){ + av_log(ctx, AV_LOG_ERROR, "Could not find \"%s\" in model\n", input_name); + return DNN_ERROR; + } + tf_input.index = 0; + input_tensor = allocate_input_tensor(&input); + if (!input_tensor){ + av_log(ctx, AV_LOG_ERROR, "Failed to allocate memory for input tensor\n"); + return DNN_ERROR; + } + input.data = (float *)TF_TensorData(input_tensor); + + if (tf_model->model->pre_proc != NULL) { + tf_model->model->pre_proc(in_frame, &input, tf_model->model->userdata); + } else { + proc_from_frame_to_dnn(in_frame, &input, ctx); + } if (nb_output != 1) { // currently, the filter does not need multiple outputs, @@ -674,7 +656,7 @@ DNNReturnType ff_dnn_execute_model_tf(const DNNModel *model, const char **output } TF_SessionRun(tf_model->session, NULL, - &tf_model->input, &tf_model->input_tensor, 1, + &tf_input, &input_tensor, 1, tf_outputs, output_tensors, nb_output, NULL, 0, NULL, tf_model->status); if (TF_GetCode(tf_model->status) != TF_OK) { @@ -708,6 +690,7 @@ DNNReturnType ff_dnn_execute_model_tf(const DNNModel *model, const char **output TF_DeleteTensor(output_tensors[i]); } } + TF_DeleteTensor(input_tensor); av_freep(&output_tensors); av_freep(&tf_outputs); return DNN_SUCCESS; @@ -729,9 +712,6 @@ void ff_dnn_free_model_tf(DNNModel **model) if (tf_model->status){ TF_DeleteStatus(tf_model->status); } - if (tf_model->input_tensor){ - TF_DeleteTensor(tf_model->input_tensor); - } av_freep(&tf_model); av_freep(model); } diff --git a/libavfilter/dnn/dnn_backend_tf.h b/libavfilter/dnn/dnn_backend_tf.h index f379e83d8d..1e00669736 100644 --- a/libavfilter/dnn/dnn_backend_tf.h +++ b/libavfilter/dnn/dnn_backend_tf.h @@ -31,7 +31,8 @@ DNNModel *ff_dnn_load_model_tf(const char *model_filename, const char *options, void *userdata); -DNNReturnType ff_dnn_execute_model_tf(const DNNModel *model, const char **output_names, uint32_t nb_output, AVFrame *out_frame); +DNNReturnType ff_dnn_execute_model_tf(const DNNModel *model, const char *input_name, AVFrame *in_frame, + const char **output_names, uint32_t nb_output, AVFrame *out_frame); void ff_dnn_free_model_tf(DNNModel **model); diff --git a/libavfilter/dnn_interface.h b/libavfilter/dnn_interface.h index 6debc50607..0369ee4f71 100644 --- a/libavfilter/dnn_interface.h +++ b/libavfilter/dnn_interface.h @@ -51,9 +51,6 @@ typedef struct DNNModel{ // Gets model input information // Just reuse struct DNNData here, actually the DNNData.data field is not needed. DNNReturnType (*get_input)(void *model, DNNData *input, const char *input_name); - // Sets model input. - // Should be called every time before model execution. - DNNReturnType (*set_input)(void *model, AVFrame *frame, const char *input_name); // set the pre process to transfer data from AVFrame to DNNData // the default implementation within DNN is used if it is not provided by the filter int (*pre_proc)(AVFrame *frame_in, DNNData *model_input, void *user_data); @@ -66,8 +63,9 @@ typedef struct DNNModel{ typedef struct DNNModule{ // Loads model and parameters from given file. Returns NULL if it is not possible. DNNModel *(*load_model)(const char *model_filename, const char *options, void *userdata); - // Executes model with specified output. Returns DNN_ERROR otherwise. - DNNReturnType (*execute_model)(const DNNModel *model, const char **output_names, uint32_t nb_output, AVFrame *out_frame); + // Executes model with specified input and output. Returns DNN_ERROR otherwise. + DNNReturnType (*execute_model)(const DNNModel *model, const char *input_name, AVFrame *in_frame, + const char **output_names, uint32_t nb_output, AVFrame *out_frame); // Frees memory allocated for model. void (*free_model)(DNNModel **model); } DNNModule; diff --git a/libavfilter/vf_derain.c b/libavfilter/vf_derain.c index a59cd6e941..77dd401263 100644 --- a/libavfilter/vf_derain.c +++ b/libavfilter/vf_derain.c @@ -80,13 +80,6 @@ static int filter_frame(AVFilterLink *inlink, AVFrame *in) const char *model_output_name = "y"; AVFrame *out; - dnn_result = (dr_context->model->set_input)(dr_context->model->model, in, "x"); - if (dnn_result != DNN_SUCCESS) { - av_log(ctx, AV_LOG_ERROR, "could not set input for the model\n"); - av_frame_free(&in); - return AVERROR(EIO); - } - out = ff_get_video_buffer(outlink, outlink->w, outlink->h); if (!out) { av_log(ctx, AV_LOG_ERROR, "could not allocate memory for output frame\n"); @@ -95,7 +88,7 @@ static int filter_frame(AVFilterLink *inlink, AVFrame *in) } av_frame_copy_props(out, in); - dnn_result = (dr_context->dnn_module->execute_model)(dr_context->model, &model_output_name, 1, out); + dnn_result = (dr_context->dnn_module->execute_model)(dr_context->model, "x", in, &model_output_name, 1, out); if (dnn_result != DNN_SUCCESS){ av_log(ctx, AV_LOG_ERROR, "failed to execute model\n"); av_frame_free(&in); diff --git a/libavfilter/vf_dnn_processing.c b/libavfilter/vf_dnn_processing.c index d7462bc828..2c8578c9b0 100644 --- a/libavfilter/vf_dnn_processing.c +++ b/libavfilter/vf_dnn_processing.c @@ -236,15 +236,11 @@ static int config_output(AVFilterLink *outlink) AVFrame *out = NULL; AVFrame *fake_in = ff_get_video_buffer(inlink, inlink->w, inlink->h); - result = (ctx->model->set_input)(ctx->model->model, fake_in, ctx->model_inputname); - if (result != DNN_SUCCESS) { - av_log(ctx, AV_LOG_ERROR, "could not set input for the model\n"); - return AVERROR(EIO); - } // have a try run in case that the dnn model resize the frame out = ff_get_video_buffer(inlink, inlink->w, inlink->h); - result = (ctx->dnn_module->execute_model)(ctx->model, (const char **)&ctx->model_outputname, 1, out); + result = (ctx->dnn_module->execute_model)(ctx->model, ctx->model_inputname, fake_in, + (const char **)&ctx->model_outputname, 1, out); if (result != DNN_SUCCESS){ av_log(ctx, AV_LOG_ERROR, "failed to execute model\n"); return AVERROR(EIO); @@ -293,13 +289,6 @@ static int filter_frame(AVFilterLink *inlink, AVFrame *in) DNNReturnType dnn_result; AVFrame *out; - dnn_result = (ctx->model->set_input)(ctx->model->model, in, ctx->model_inputname); - if (dnn_result != DNN_SUCCESS) { - av_log(ctx, AV_LOG_ERROR, "could not set input for the model\n"); - av_frame_free(&in); - return AVERROR(EIO); - } - out = ff_get_video_buffer(outlink, outlink->w, outlink->h); if (!out) { av_frame_free(&in); @@ -307,7 +296,8 @@ static int filter_frame(AVFilterLink *inlink, AVFrame *in) } av_frame_copy_props(out, in); - dnn_result = (ctx->dnn_module->execute_model)(ctx->model, (const char **)&ctx->model_outputname, 1, out); + dnn_result = (ctx->dnn_module->execute_model)(ctx->model, ctx->model_inputname, in, + (const char **)&ctx->model_outputname, 1, out); if (dnn_result != DNN_SUCCESS){ av_log(ctx, AV_LOG_ERROR, "failed to execute model\n"); av_frame_free(&in); diff --git a/libavfilter/vf_sr.c b/libavfilter/vf_sr.c index 2eda8c3219..72a3137262 100644 --- a/libavfilter/vf_sr.c +++ b/libavfilter/vf_sr.c @@ -114,16 +114,11 @@ static int config_output(AVFilterLink *outlink) AVFrame *out = NULL; const char *model_output_name = "y"; - AVFrame *fake_in = ff_get_video_buffer(inlink, inlink->w, inlink->h); - result = (ctx->model->set_input)(ctx->model->model, fake_in, "x"); - if (result != DNN_SUCCESS) { - av_log(context, AV_LOG_ERROR, "could not set input for the model\n"); - return AVERROR(EIO); - } - // have a try run in case that the dnn model resize the frame + AVFrame *fake_in = ff_get_video_buffer(inlink, inlink->w, inlink->h); out = ff_get_video_buffer(inlink, inlink->w, inlink->h); - result = (ctx->dnn_module->execute_model)(ctx->model, (const char **)&model_output_name, 1, out); + result = (ctx->dnn_module->execute_model)(ctx->model, "x", fake_in, + (const char **)&model_output_name, 1, out); if (result != DNN_SUCCESS){ av_log(context, AV_LOG_ERROR, "failed to execute loaded model\n"); return AVERROR(EIO); @@ -178,19 +173,13 @@ static int filter_frame(AVFilterLink *inlink, AVFrame *in) sws_scale(ctx->sws_pre_scale, (const uint8_t **)in->data, in->linesize, 0, in->height, out->data, out->linesize); - dnn_result = (ctx->model->set_input)(ctx->model->model, out, "x"); + dnn_result = (ctx->dnn_module->execute_model)(ctx->model, "x", out, + (const char **)&model_output_name, 1, out); } else { - dnn_result = (ctx->model->set_input)(ctx->model->model, in, "x"); - } - - if (dnn_result != DNN_SUCCESS) { - av_frame_free(&in); - av_frame_free(&out); - av_log(context, AV_LOG_ERROR, "could not set input for the model\n"); - return AVERROR(EIO); + dnn_result = (ctx->dnn_module->execute_model)(ctx->model, "x", in, + (const char **)&model_output_name, 1, out); } - dnn_result = (ctx->dnn_module->execute_model)(ctx->model, (const char **)&model_output_name, 1, out); if (dnn_result != DNN_SUCCESS){ av_log(ctx, AV_LOG_ERROR, "failed to execute loaded model\n"); av_frame_free(&in); From patchwork Mon Sep 14 06:28:57 2020 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: "Guo, Yejun" X-Patchwork-Id: 22374 Return-Path: X-Original-To: patchwork@ffaux-bg.ffmpeg.org Delivered-To: patchwork@ffaux-bg.ffmpeg.org Received: from ffbox0-bg.mplayerhq.hu (ffbox0-bg.ffmpeg.org [79.124.17.100]) by ffaux.localdomain (Postfix) with ESMTP id AACA344A756 for ; Mon, 14 Sep 2020 09:34:57 +0300 (EEST) Received: from [127.0.1.1] (localhost [127.0.0.1]) by ffbox0-bg.mplayerhq.hu (Postfix) with ESMTP id 8E55C68BB5A; Mon, 14 Sep 2020 09:34:57 +0300 (EEST) X-Original-To: ffmpeg-devel@ffmpeg.org Delivered-To: ffmpeg-devel@ffmpeg.org Received: from mga03.intel.com (mga03.intel.com [134.134.136.65]) by ffbox0-bg.mplayerhq.hu (Postfix) with ESMTPS id 1DB1768BADE for ; Mon, 14 Sep 2020 09:34:54 +0300 (EEST) IronPort-SDR: BOXirTsxA46P7Bi09jeFEnNLb3u5PPqqeCqjcLv9CHiRZKV979p7lHA3Vt9xVDbxLCzl9kmsus hcn/20aq7cHQ== X-IronPort-AV: E=McAfee;i="6000,8403,9743"; a="159068475" X-IronPort-AV: E=Sophos;i="5.76,425,1592895600"; d="scan'208";a="159068475" X-Amp-Result: SKIPPED(no attachment in message) X-Amp-File-Uploaded: False Received: from fmsmga004.fm.intel.com ([10.253.24.48]) by orsmga103.jf.intel.com with ESMTP/TLS/ECDHE-RSA-AES256-GCM-SHA384; 13 Sep 2020 23:34:52 -0700 IronPort-SDR: jGYg2odeSdjyRbe9hq/+hSiNSW5wtslMk+nzuuU9FCGx9I7IjVUci6dsb3yL9OBYwk8K7nzMMd 7WSqMe44803g== X-ExtLoop1: 1 X-IronPort-AV: E=Sophos;i="5.76,425,1592895600"; d="scan'208";a="330604180" Received: from yguo18-skl-u1604.sh.intel.com ([10.239.159.53]) by fmsmga004.fm.intel.com with ESMTP; 13 Sep 2020 23:34:50 -0700 From: "Guo, Yejun" To: ffmpeg-devel@ffmpeg.org Date: Mon, 14 Sep 2020 14:28:57 +0800 Message-Id: <20200914062857.22178-1-yejun.guo@intel.com> X-Mailer: git-send-email 2.17.1 Subject: [FFmpeg-devel] [PATCH 4/4] dnn: add a new interface DNNModel.get_output X-BeenThere: ffmpeg-devel@ffmpeg.org X-Mailman-Version: 2.1.20 Precedence: list List-Id: FFmpeg development discussions and patches List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , Reply-To: FFmpeg development discussions and patches Cc: yejun.guo@intel.com MIME-Version: 1.0 Errors-To: ffmpeg-devel-bounces@ffmpeg.org Sender: "ffmpeg-devel" for some cases (for example, super resolution), the DNN model changes the frame size which impacts the filter behavior, so the filter needs to know the out frame size at very beginning. Currently, the filter reuses DNNModule.execute_model to query the out frame size, it is not clear from interface perspective, so add a new explict interface DNNModel.get_output for such query. Signed-off-by: Guo, Yejun --- libavfilter/dnn/dnn_backend_native.c | 66 ++++++++++++++++++++++---- libavfilter/dnn/dnn_backend_openvino.c | 66 ++++++++++++++++++++++---- libavfilter/dnn/dnn_backend_tf.c | 66 ++++++++++++++++++++++---- libavfilter/dnn_interface.h | 3 ++ libavfilter/vf_dnn_processing.c | 17 ++----- libavfilter/vf_sr.c | 25 ++++------ 6 files changed, 185 insertions(+), 58 deletions(-) diff --git a/libavfilter/dnn/dnn_backend_native.c b/libavfilter/dnn/dnn_backend_native.c index dc47c9b542..d45e211f0c 100644 --- a/libavfilter/dnn/dnn_backend_native.c +++ b/libavfilter/dnn/dnn_backend_native.c @@ -44,6 +44,10 @@ const AVClass dnn_native_class = { .category = AV_CLASS_CATEGORY_FILTER, }; +static DNNReturnType execute_model_native(const DNNModel *model, const char *input_name, AVFrame *in_frame, + const char **output_names, uint32_t nb_output, AVFrame *out_frame, + int do_ioproc); + static DNNReturnType get_input_native(void *model, DNNData *input, const char *input_name) { NativeModel *native_model = (NativeModel *)model; @@ -70,6 +74,25 @@ static DNNReturnType get_input_native(void *model, DNNData *input, const char *i return DNN_ERROR; } +static DNNReturnType get_output_native(void *model, const char *input_name, int input_width, int input_height, + const char *output_name, int *output_width, int *output_height) +{ + DNNReturnType ret; + NativeModel *native_model = (NativeModel *)model; + AVFrame *in_frame = av_frame_alloc(); + AVFrame *out_frame = av_frame_alloc(); + in_frame->width = input_width; + in_frame->height = input_height; + + ret = execute_model_native(native_model->model, input_name, in_frame, &output_name, 1, out_frame, 0); + *output_width = out_frame->width; + *output_height = out_frame->height; + + av_frame_free(&out_frame); + av_frame_free(&in_frame); + return ret; +} + // Loads model and its parameters that are stored in a binary file with following structure: // layers_num,layer_type,layer_parameterss,layer_type,layer_parameters... // For CONV layer: activation_function, input_num, output_num, kernel_size, kernel, biases @@ -216,6 +239,7 @@ DNNModel *ff_dnn_load_model_native(const char *model_filename, const char *optio } model->get_input = &get_input_native; + model->get_output = &get_output_native; model->userdata = userdata; return model; @@ -226,8 +250,9 @@ fail: return NULL; } -DNNReturnType ff_dnn_execute_model_native(const DNNModel *model, const char *input_name, AVFrame *in_frame, - const char **output_names, uint32_t nb_output, AVFrame *out_frame) +static DNNReturnType execute_model_native(const DNNModel *model, const char *input_name, AVFrame *in_frame, + const char **output_names, uint32_t nb_output, AVFrame *out_frame, + int do_ioproc) { NativeModel *native_model = (NativeModel *)model->model; NativeContext *ctx = &native_model->ctx; @@ -276,10 +301,12 @@ DNNReturnType ff_dnn_execute_model_native(const DNNModel *model, const char *inp input.channels = oprd->dims[3]; input.data = oprd->data; input.dt = oprd->data_type; - if (native_model->model->pre_proc != NULL) { - native_model->model->pre_proc(in_frame, &input, native_model->model->userdata); - } else { - proc_from_frame_to_dnn(in_frame, &input, ctx); + if (do_ioproc) { + if (native_model->model->pre_proc != NULL) { + native_model->model->pre_proc(in_frame, &input, native_model->model->userdata); + } else { + proc_from_frame_to_dnn(in_frame, &input, ctx); + } } if (nb_output != 1) { @@ -322,21 +349,40 @@ DNNReturnType ff_dnn_execute_model_native(const DNNModel *model, const char *inp output.channels = oprd->dims[3]; output.dt = oprd->data_type; - if (out_frame->width != output.width || out_frame->height != output.height) { - out_frame->width = output.width; - out_frame->height = output.height; - } else { + if (do_ioproc) { if (native_model->model->post_proc != NULL) { native_model->model->post_proc(out_frame, &output, native_model->model->userdata); } else { proc_from_dnn_to_frame(out_frame, &output, ctx); } + } else { + out_frame->width = output.width; + out_frame->height = output.height; } } return DNN_SUCCESS; } +DNNReturnType ff_dnn_execute_model_native(const DNNModel *model, const char *input_name, AVFrame *in_frame, + const char **output_names, uint32_t nb_output, AVFrame *out_frame) +{ + NativeModel *native_model = (NativeModel *)model->model; + NativeContext *ctx = &native_model->ctx; + + if (!in_frame) { + av_log(ctx, AV_LOG_ERROR, "in frame is NULL when execute model.\n"); + return DNN_ERROR; + } + + if (!out_frame) { + av_log(ctx, AV_LOG_ERROR, "out frame is NULL when execute model.\n"); + return DNN_ERROR; + } + + return execute_model_native(model, input_name, in_frame, output_names, nb_output, out_frame, 1); +} + int32_t calculate_operand_dims_count(const DnnOperand *oprd) { int32_t result = 1; diff --git a/libavfilter/dnn/dnn_backend_openvino.c b/libavfilter/dnn/dnn_backend_openvino.c index 0dba1c1adc..495225d0b3 100644 --- a/libavfilter/dnn/dnn_backend_openvino.c +++ b/libavfilter/dnn/dnn_backend_openvino.c @@ -63,6 +63,10 @@ static const AVOption dnn_openvino_options[] = { AVFILTER_DEFINE_CLASS(dnn_openvino); +static DNNReturnType execute_model_ov(const DNNModel *model, const char *input_name, AVFrame *in_frame, + const char **output_names, uint32_t nb_output, AVFrame *out_frame, + int do_ioproc); + static DNNDataType precision_to_datatype(precision_e precision) { switch (precision) @@ -132,6 +136,25 @@ static DNNReturnType get_input_ov(void *model, DNNData *input, const char *input return DNN_ERROR; } +static DNNReturnType get_output_ov(void *model, const char *input_name, int input_width, int input_height, + const char *output_name, int *output_width, int *output_height) +{ + DNNReturnType ret; + OVModel *ov_model = (OVModel *)model; + AVFrame *in_frame = av_frame_alloc(); + AVFrame *out_frame = av_frame_alloc(); + in_frame->width = input_width; + in_frame->height = input_height; + + ret = execute_model_ov(ov_model->model, input_name, in_frame, &output_name, 1, out_frame, 0); + *output_width = out_frame->width; + *output_height = out_frame->height; + + av_frame_free(&out_frame); + av_frame_free(&in_frame); + return ret; +} + DNNModel *ff_dnn_load_model_ov(const char *model_filename, const char *options, void *userdata) { char *all_dev_names = NULL; @@ -191,6 +214,7 @@ DNNModel *ff_dnn_load_model_ov(const char *model_filename, const char *options, model->model = (void *)ov_model; model->get_input = &get_input_ov; + model->get_output = &get_output_ov; model->options = options; model->userdata = userdata; @@ -213,8 +237,9 @@ err: return NULL; } -DNNReturnType ff_dnn_execute_model_ov(const DNNModel *model, const char *input_name, AVFrame *in_frame, - const char **output_names, uint32_t nb_output, AVFrame *out_frame) +static DNNReturnType execute_model_ov(const DNNModel *model, const char *input_name, AVFrame *in_frame, + const char **output_names, uint32_t nb_output, AVFrame *out_frame, + int do_ioproc) { char *model_output_name = NULL; char *all_output_names = NULL; @@ -252,10 +277,12 @@ DNNReturnType ff_dnn_execute_model_ov(const DNNModel *model, const char *input_n input.channels = dims.dims[1]; input.data = blob_buffer.buffer; input.dt = precision_to_datatype(precision); - if (ov_model->model->pre_proc != NULL) { - ov_model->model->pre_proc(in_frame, &input, ov_model->model->userdata); - } else { - proc_from_frame_to_dnn(in_frame, &input, ctx); + if (do_ioproc) { + if (ov_model->model->pre_proc != NULL) { + ov_model->model->pre_proc(in_frame, &input, ov_model->model->userdata); + } else { + proc_from_frame_to_dnn(in_frame, &input, ctx); + } } ie_blob_free(&input_blob); @@ -308,15 +335,15 @@ DNNReturnType ff_dnn_execute_model_ov(const DNNModel *model, const char *input_n output.width = dims.dims[3]; output.dt = precision_to_datatype(precision); output.data = blob_buffer.buffer; - if (out_frame->width != output.width || out_frame->height != output.height) { - out_frame->width = output.width; - out_frame->height = output.height; - } else { + if (do_ioproc) { if (ov_model->model->post_proc != NULL) { ov_model->model->post_proc(out_frame, &output, ov_model->model->userdata); } else { proc_from_dnn_to_frame(out_frame, &output, ctx); } + } else { + out_frame->width = output.width; + out_frame->height = output.height; } ie_blob_free(&output_blob); } @@ -324,6 +351,25 @@ DNNReturnType ff_dnn_execute_model_ov(const DNNModel *model, const char *input_n return DNN_SUCCESS; } +DNNReturnType ff_dnn_execute_model_ov(const DNNModel *model, const char *input_name, AVFrame *in_frame, + const char **output_names, uint32_t nb_output, AVFrame *out_frame) +{ + OVModel *ov_model = (OVModel *)model->model; + OVContext *ctx = &ov_model->ctx; + + if (!in_frame) { + av_log(ctx, AV_LOG_ERROR, "in frame is NULL when execute model.\n"); + return DNN_ERROR; + } + + if (!out_frame) { + av_log(ctx, AV_LOG_ERROR, "out frame is NULL when execute model.\n"); + return DNN_ERROR; + } + + return execute_model_ov(model, input_name, in_frame, output_names, nb_output, out_frame, 1); +} + void ff_dnn_free_model_ov(DNNModel **model) { if (*model){ diff --git a/libavfilter/dnn/dnn_backend_tf.c b/libavfilter/dnn/dnn_backend_tf.c index 8467f8a459..be860b11b5 100644 --- a/libavfilter/dnn/dnn_backend_tf.c +++ b/libavfilter/dnn/dnn_backend_tf.c @@ -55,6 +55,10 @@ static const AVClass dnn_tensorflow_class = { .category = AV_CLASS_CATEGORY_FILTER, }; +static DNNReturnType execute_model_tf(const DNNModel *model, const char *input_name, AVFrame *in_frame, + const char **output_names, uint32_t nb_output, AVFrame *out_frame, + int do_ioproc); + static void free_buffer(void *data, size_t length) { av_freep(&data); @@ -150,6 +154,25 @@ static DNNReturnType get_input_tf(void *model, DNNData *input, const char *input return DNN_SUCCESS; } +static DNNReturnType get_output_tf(void *model, const char *input_name, int input_width, int input_height, + const char *output_name, int *output_width, int *output_height) +{ + DNNReturnType ret; + TFModel *tf_model = (TFModel *)model; + AVFrame *in_frame = av_frame_alloc(); + AVFrame *out_frame = av_frame_alloc(); + in_frame->width = input_width; + in_frame->height = input_height; + + ret = execute_model_tf(tf_model->model, input_name, in_frame, &output_name, 1, out_frame, 0); + *output_width = out_frame->width; + *output_height = out_frame->height; + + av_frame_free(&out_frame); + av_frame_free(&in_frame); + return ret; +} + static DNNReturnType load_tf_model(TFModel *tf_model, const char *model_filename) { TFContext *ctx = &tf_model->ctx; @@ -583,14 +606,16 @@ DNNModel *ff_dnn_load_model_tf(const char *model_filename, const char *options, model->model = (void *)tf_model; model->get_input = &get_input_tf; + model->get_output = &get_output_tf; model->options = options; model->userdata = userdata; return model; } -DNNReturnType ff_dnn_execute_model_tf(const DNNModel *model, const char *input_name, AVFrame *in_frame, - const char **output_names, uint32_t nb_output, AVFrame *out_frame) +static DNNReturnType execute_model_tf(const DNNModel *model, const char *input_name, AVFrame *in_frame, + const char **output_names, uint32_t nb_output, AVFrame *out_frame, + int do_ioproc) { TF_Output *tf_outputs; TFModel *tf_model = (TFModel *)model->model; @@ -618,10 +643,12 @@ DNNReturnType ff_dnn_execute_model_tf(const DNNModel *model, const char *input_n } input.data = (float *)TF_TensorData(input_tensor); - if (tf_model->model->pre_proc != NULL) { - tf_model->model->pre_proc(in_frame, &input, tf_model->model->userdata); - } else { - proc_from_frame_to_dnn(in_frame, &input, ctx); + if (do_ioproc) { + if (tf_model->model->pre_proc != NULL) { + tf_model->model->pre_proc(in_frame, &input, tf_model->model->userdata); + } else { + proc_from_frame_to_dnn(in_frame, &input, ctx); + } } if (nb_output != 1) { @@ -673,15 +700,15 @@ DNNReturnType ff_dnn_execute_model_tf(const DNNModel *model, const char *input_n output.data = TF_TensorData(output_tensors[i]); output.dt = TF_TensorType(output_tensors[i]); - if (out_frame->width != output.width || out_frame->height != output.height) { - out_frame->width = output.width; - out_frame->height = output.height; - } else { + if (do_ioproc) { if (tf_model->model->post_proc != NULL) { tf_model->model->post_proc(out_frame, &output, tf_model->model->userdata); } else { proc_from_dnn_to_frame(out_frame, &output, ctx); } + } else { + out_frame->width = output.width; + out_frame->height = output.height; } } @@ -696,6 +723,25 @@ DNNReturnType ff_dnn_execute_model_tf(const DNNModel *model, const char *input_n return DNN_SUCCESS; } +DNNReturnType ff_dnn_execute_model_tf(const DNNModel *model, const char *input_name, AVFrame *in_frame, + const char **output_names, uint32_t nb_output, AVFrame *out_frame) +{ + TFModel *tf_model = (TFModel *)model->model; + TFContext *ctx = &tf_model->ctx; + + if (!in_frame) { + av_log(ctx, AV_LOG_ERROR, "in frame is NULL when execute model.\n"); + return DNN_ERROR; + } + + if (!out_frame) { + av_log(ctx, AV_LOG_ERROR, "out frame is NULL when execute model.\n"); + return DNN_ERROR; + } + + return execute_model_tf(model, input_name, in_frame, output_names, nb_output, out_frame, 1); +} + void ff_dnn_free_model_tf(DNNModel **model) { TFModel *tf_model; diff --git a/libavfilter/dnn_interface.h b/libavfilter/dnn_interface.h index 0369ee4f71..2f129d535e 100644 --- a/libavfilter/dnn_interface.h +++ b/libavfilter/dnn_interface.h @@ -51,6 +51,9 @@ typedef struct DNNModel{ // Gets model input information // Just reuse struct DNNData here, actually the DNNData.data field is not needed. DNNReturnType (*get_input)(void *model, DNNData *input, const char *input_name); + // Gets model output width/height with given input w/h + DNNReturnType (*get_output)(void *model, const char *input_name, int input_width, int input_height, + const char *output_name, int *output_width, int *output_height); // set the pre process to transfer data from AVFrame to DNNData // the default implementation within DNN is used if it is not provided by the filter int (*pre_proc)(AVFrame *frame_in, DNNData *model_input, void *user_data); diff --git a/libavfilter/vf_dnn_processing.c b/libavfilter/vf_dnn_processing.c index 2c8578c9b0..334243bd2b 100644 --- a/libavfilter/vf_dnn_processing.c +++ b/libavfilter/vf_dnn_processing.c @@ -233,24 +233,15 @@ static int config_output(AVFilterLink *outlink) DnnProcessingContext *ctx = context->priv; DNNReturnType result; AVFilterLink *inlink = context->inputs[0]; - AVFrame *out = NULL; - - AVFrame *fake_in = ff_get_video_buffer(inlink, inlink->w, inlink->h); // have a try run in case that the dnn model resize the frame - out = ff_get_video_buffer(inlink, inlink->w, inlink->h); - result = (ctx->dnn_module->execute_model)(ctx->model, ctx->model_inputname, fake_in, - (const char **)&ctx->model_outputname, 1, out); - if (result != DNN_SUCCESS){ - av_log(ctx, AV_LOG_ERROR, "failed to execute model\n"); + result = ctx->model->get_output(ctx->model->model, ctx->model_inputname, inlink->w, inlink->h, + ctx->model_outputname, &outlink->w, &outlink->h); + if (result != DNN_SUCCESS) { + av_log(ctx, AV_LOG_ERROR, "could not get output from the model\n"); return AVERROR(EIO); } - outlink->w = out->width; - outlink->h = out->height; - - av_frame_free(&fake_in); - av_frame_free(&out); prepare_uv_scale(outlink); return 0; diff --git a/libavfilter/vf_sr.c b/libavfilter/vf_sr.c index 72a3137262..fe6c5d3c0d 100644 --- a/libavfilter/vf_sr.c +++ b/libavfilter/vf_sr.c @@ -111,23 +111,20 @@ static int config_output(AVFilterLink *outlink) SRContext *ctx = context->priv; DNNReturnType result; AVFilterLink *inlink = context->inputs[0]; - AVFrame *out = NULL; - const char *model_output_name = "y"; + int out_width, out_height; // have a try run in case that the dnn model resize the frame - AVFrame *fake_in = ff_get_video_buffer(inlink, inlink->w, inlink->h); - out = ff_get_video_buffer(inlink, inlink->w, inlink->h); - result = (ctx->dnn_module->execute_model)(ctx->model, "x", fake_in, - (const char **)&model_output_name, 1, out); - if (result != DNN_SUCCESS){ - av_log(context, AV_LOG_ERROR, "failed to execute loaded model\n"); + result = ctx->model->get_output(ctx->model->model, "x", inlink->w, inlink->h, + "y", &out_width, &out_height); + if (result != DNN_SUCCESS) { + av_log(ctx, AV_LOG_ERROR, "could not get output from the model\n"); return AVERROR(EIO); } - if (fake_in->width != out->width || fake_in->height != out->height) { + if (inlink->w != out_width || inlink->h != out_height) { //espcn - outlink->w = out->width; - outlink->h = out->height; + outlink->w = out_width; + outlink->h = out_height; if (inlink->format != AV_PIX_FMT_GRAY8){ const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(inlink->format); int sws_src_h = AV_CEIL_RSHIFT(inlink->h, desc->log2_chroma_h); @@ -141,15 +138,13 @@ static int config_output(AVFilterLink *outlink) } } else { //srcnn - outlink->w = out->width * ctx->scale_factor; - outlink->h = out->height * ctx->scale_factor; + outlink->w = out_width * ctx->scale_factor; + outlink->h = out_height * ctx->scale_factor; ctx->sws_pre_scale = sws_getContext(inlink->w, inlink->h, inlink->format, outlink->w, outlink->h, outlink->format, SWS_BICUBIC, NULL, NULL, NULL); } - av_frame_free(&fake_in); - av_frame_free(&out); return 0; }