From patchwork Wed Feb 10 04:58:35 2021 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: "Guo, Yejun" X-Patchwork-Id: 25534 Return-Path: X-Original-To: patchwork@ffaux-bg.ffmpeg.org Delivered-To: patchwork@ffaux-bg.ffmpeg.org Received: from ffbox0-bg.mplayerhq.hu (ffbox0-bg.ffmpeg.org [79.124.17.100]) by ffaux.localdomain (Postfix) with ESMTP id 8723C44BDDC for ; Wed, 10 Feb 2021 07:08:41 +0200 (EET) Received: from [127.0.1.1] (localhost [127.0.0.1]) by ffbox0-bg.mplayerhq.hu (Postfix) with ESMTP id 5E72E68A403; Wed, 10 Feb 2021 07:08:41 +0200 (EET) X-Original-To: ffmpeg-devel@ffmpeg.org Delivered-To: ffmpeg-devel@ffmpeg.org Received: from mga09.intel.com (mga09.intel.com [134.134.136.24]) by ffbox0-bg.mplayerhq.hu (Postfix) with ESMTPS id B1D2A689FB4 for ; Wed, 10 Feb 2021 07:08:39 +0200 (EET) IronPort-SDR: /1FmQVyzSMaM4ZosHht2JqeQFlhlvjtRso84x9r/x37KDaJMcAV9udsJvxqtQz0Ft3g3+9dCjq Itp8OBeJQaGQ== X-IronPort-AV: E=McAfee;i="6000,8403,9890"; a="182157795" X-IronPort-AV: E=Sophos;i="5.81,167,1610438400"; d="scan'208";a="182157795" Received: from fmsmga005.fm.intel.com ([10.253.24.32]) by orsmga102.jf.intel.com with ESMTP/TLS/ECDHE-RSA-AES256-GCM-SHA384; 09 Feb 2021 21:08:37 -0800 IronPort-SDR: KgvaWL9I+aAdDjrb7LAt89w9eMkEqVTapZriegl8Q5XK8CJCj6Hbi8Eoqd7l52Id60fJlZ5SG1 Dk6vjNpTqmJQ== X-ExtLoop1: 1 X-IronPort-AV: E=Sophos;i="5.81,167,1610438400"; d="scan'208";a="587228198" Received: from yguo18-skl-u1604.sh.intel.com ([10.239.159.53]) by fmsmga005.fm.intel.com with ESMTP; 09 Feb 2021 21:08:36 -0800 From: "Guo, Yejun" To: ffmpeg-devel@ffmpeg.org Date: Wed, 10 Feb 2021 12:58:35 +0800 Message-Id: <20210210045835.14868-1-yejun.guo@intel.com> X-Mailer: git-send-email 2.17.1 Subject: [FFmpeg-devel] [PATCH 06/10] dnn: add color conversion for analytic case X-BeenThere: ffmpeg-devel@ffmpeg.org X-Mailman-Version: 2.1.20 Precedence: list List-Id: FFmpeg development discussions and patches List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , Reply-To: FFmpeg development discussions and patches Cc: yejun.guo@intel.com MIME-Version: 1.0 Errors-To: ffmpeg-devel-bounces@ffmpeg.org Sender: "ffmpeg-devel" Signed-off-by: Guo, Yejun --- libavfilter/dnn/dnn_backend_native.c | 2 +- libavfilter/dnn/dnn_backend_openvino.c | 23 ++++++++++- libavfilter/dnn/dnn_backend_tf.c | 2 +- libavfilter/dnn/dnn_io_proc.c | 56 +++++++++++++++++++++++++- libavfilter/dnn/dnn_io_proc.h | 2 +- 5 files changed, 80 insertions(+), 5 deletions(-) diff --git a/libavfilter/dnn/dnn_backend_native.c b/libavfilter/dnn/dnn_backend_native.c index be6451367a..3bc253c1ad 100644 --- a/libavfilter/dnn/dnn_backend_native.c +++ b/libavfilter/dnn/dnn_backend_native.c @@ -321,7 +321,7 @@ static DNNReturnType execute_model_native(const DNNModel *model, const char *inp if (native_model->model->pre_proc != NULL) { native_model->model->pre_proc(in_frame, &input, native_model->model->filter_ctx); } else { - ff_proc_from_frame_to_dnn(in_frame, &input, ctx); + ff_proc_from_frame_to_dnn(in_frame, &input, native_model->model->func_type, ctx); } } diff --git a/libavfilter/dnn/dnn_backend_openvino.c b/libavfilter/dnn/dnn_backend_openvino.c index 7c1abb3eeb..cca155a52c 100644 --- a/libavfilter/dnn/dnn_backend_openvino.c +++ b/libavfilter/dnn/dnn_backend_openvino.c @@ -99,6 +99,8 @@ static DNNDataType precision_to_datatype(precision_e precision) { case FP32: return DNN_FLOAT; + case U8: + return DNN_UINT8; default: av_assert0(!"not supported yet."); return DNN_FLOAT; @@ -111,6 +113,8 @@ static int get_datatype_size(DNNDataType dt) { case DNN_FLOAT: return sizeof(float); + case DNN_UINT8: + return sizeof(uint8_t); default: av_assert0(!"not supported yet."); return 1; @@ -152,6 +156,9 @@ static DNNReturnType fill_model_input_ov(OVModel *ov_model, RequestItem *request input.channels = dims.dims[1]; input.data = blob_buffer.buffer; input.dt = precision_to_datatype(precision); + // all models in openvino open model zoo use BGR as input, + // change to be an option when necessary. + input.order = DCO_BGR; av_assert0(request->task_count <= dims.dims[0]); for (int i = 0; i < request->task_count; ++i) { @@ -160,7 +167,7 @@ static DNNReturnType fill_model_input_ov(OVModel *ov_model, RequestItem *request if (ov_model->model->pre_proc != NULL) { ov_model->model->pre_proc(task->in_frame, &input, ov_model->model->filter_ctx); } else { - ff_proc_from_frame_to_dnn(task->in_frame, &input, ctx); + ff_proc_from_frame_to_dnn(task->in_frame, &input, ov_model->model->func_type, ctx); } } input.data = (uint8_t *)input.data @@ -290,6 +297,20 @@ static DNNReturnType init_model_ov(OVModel *ov_model, const char *input_name, co goto err; } + // all models in openvino open model zoo use BGR with range [0.0f, 255.0f] as input, + // we don't have a AVPixelFormat to descibe it, so we'll use AV_PIX_FMT_BGR24 and + // ask openvino to do the conversion internally. + // the current supported SR model (frame processing) is generated from tensorflow model, + // and its input is Y channel as float with range [0.0f, 1.0f], so do not set for this case. + // TODO: we need to get a final clear&general solution with all backends/formats considered. + if (ov_model->model->func_type != DFT_PROCESS_FRAME) { + status = ie_network_set_input_precision(ov_model->network, input_name, U8); + if (status != OK) { + av_log(ctx, AV_LOG_ERROR, "Failed to set input precision as U8 for %s\n", input_name); + return DNN_ERROR; + } + } + status = ie_core_load_network(ov_model->core, ov_model->network, ctx->options.device_type, &config, &ov_model->exe_network); if (status != OK) { av_log(ctx, AV_LOG_ERROR, "Failed to load OpenVINO model network\n"); diff --git a/libavfilter/dnn/dnn_backend_tf.c b/libavfilter/dnn/dnn_backend_tf.c index e7e5f221f3..750a476726 100644 --- a/libavfilter/dnn/dnn_backend_tf.c +++ b/libavfilter/dnn/dnn_backend_tf.c @@ -744,7 +744,7 @@ static DNNReturnType execute_model_tf(const DNNModel *model, const char *input_n if (tf_model->model->pre_proc != NULL) { tf_model->model->pre_proc(in_frame, &input, tf_model->model->filter_ctx); } else { - ff_proc_from_frame_to_dnn(in_frame, &input, ctx); + ff_proc_from_frame_to_dnn(in_frame, &input, tf_model->model->func_type, ctx); } } diff --git a/libavfilter/dnn/dnn_io_proc.c b/libavfilter/dnn/dnn_io_proc.c index bee1423342..e104cc5064 100644 --- a/libavfilter/dnn/dnn_io_proc.c +++ b/libavfilter/dnn/dnn_io_proc.c @@ -21,6 +21,7 @@ #include "dnn_io_proc.h" #include "libavutil/imgutils.h" #include "libswscale/swscale.h" +#include "libavutil/avassert.h" DNNReturnType ff_proc_from_dnn_to_frame(AVFrame *frame, DNNData *output, void *log_ctx) { @@ -92,7 +93,7 @@ DNNReturnType ff_proc_from_dnn_to_frame(AVFrame *frame, DNNData *output, void *l return DNN_SUCCESS; } -DNNReturnType ff_proc_from_frame_to_dnn(AVFrame *frame, DNNData *input, void *log_ctx) +static DNNReturnType proc_from_frame_to_dnn_frameprocessing(AVFrame *frame, DNNData *input, void *log_ctx) { struct SwsContext *sws_ctx; int bytewidth = av_image_get_linesize(frame->format, frame->width, 0); @@ -163,3 +164,56 @@ DNNReturnType ff_proc_from_frame_to_dnn(AVFrame *frame, DNNData *input, void *lo return DNN_SUCCESS; } + +static enum AVPixelFormat get_pixel_format(DNNData *data) +{ + if (data->dt == DNN_UINT8 && data->order == DCO_BGR) { + return AV_PIX_FMT_BGR24; + } + + av_assert0(!"not supported yet.\n"); + return AV_PIX_FMT_BGR24; +} + +static DNNReturnType proc_from_frame_to_dnn_analytics(AVFrame *frame, DNNData *input, void *log_ctx) +{ + struct SwsContext *sws_ctx; + int linesizes[4]; + enum AVPixelFormat fmt = get_pixel_format(input); + sws_ctx = sws_getContext(frame->width, frame->height, frame->format, + input->width, input->height, fmt, + SWS_FAST_BILINEAR, NULL, NULL, NULL); + if (!sws_ctx) { + av_log(log_ctx, AV_LOG_ERROR, "Impossible to create scale context for the conversion " + "fmt:%s s:%dx%d -> fmt:%s s:%dx%d\n", + av_get_pix_fmt_name(frame->format), frame->width, frame->height, + av_get_pix_fmt_name(fmt), input->width, input->height); + return DNN_ERROR; + } + + if (av_image_fill_linesizes(linesizes, fmt, input->width) < 0) { + av_log(log_ctx, AV_LOG_ERROR, "unable to get linesizes with av_image_fill_linesizes"); + sws_freeContext(sws_ctx); + return DNN_ERROR; + } + + sws_scale(sws_ctx, (const uint8_t *const *)frame->data, frame->linesize, 0, frame->height, + (uint8_t *const *)(&input->data), linesizes); + + sws_freeContext(sws_ctx); + return DNN_SUCCESS; +} + +DNNReturnType ff_proc_from_frame_to_dnn(AVFrame *frame, DNNData *input, DNNFunctionType func_type, void *log_ctx) +{ + switch (func_type) + { + case DFT_PROCESS_FRAME: + return proc_from_frame_to_dnn_frameprocessing(frame, input, log_ctx); + case DFT_ANALYTICS_DETECT: + return proc_from_frame_to_dnn_analytics(frame, input, log_ctx); + default: + avpriv_report_missing_feature(log_ctx, "model function type %d", func_type); + return DNN_ERROR; + } +} diff --git a/libavfilter/dnn/dnn_io_proc.h b/libavfilter/dnn/dnn_io_proc.h index 6a410ccc7b..91ad3cb261 100644 --- a/libavfilter/dnn/dnn_io_proc.h +++ b/libavfilter/dnn/dnn_io_proc.h @@ -30,7 +30,7 @@ #include "../dnn_interface.h" #include "libavutil/frame.h" -DNNReturnType ff_proc_from_frame_to_dnn(AVFrame *frame, DNNData *input, void *log_ctx); +DNNReturnType ff_proc_from_frame_to_dnn(AVFrame *frame, DNNData *input, DNNFunctionType func_type, void *log_ctx); DNNReturnType ff_proc_from_dnn_to_frame(AVFrame *frame, DNNData *output, void *log_ctx); #endif