From patchwork Wed Feb 10 04:58:35 2021
Content-Type: text/plain; charset="utf-8"
MIME-Version: 1.0
Content-Transfer-Encoding: 7bit
X-Patchwork-Submitter: "Guo, Yejun" <yejun.guo@intel.com>
X-Patchwork-Id: 25534
Return-Path: <ffmpeg-devel-bounces@ffmpeg.org>
X-Original-To: patchwork@ffaux-bg.ffmpeg.org
Delivered-To: patchwork@ffaux-bg.ffmpeg.org
Received: from ffbox0-bg.mplayerhq.hu (ffbox0-bg.ffmpeg.org [79.124.17.100])
	by ffaux.localdomain (Postfix) with ESMTP id 8723C44BDDC
	for <patchwork@ffaux-bg.ffmpeg.org>; Wed, 10 Feb 2021 07:08:41 +0200 (EET)
Received: from [127.0.1.1] (localhost [127.0.0.1])
	by ffbox0-bg.mplayerhq.hu (Postfix) with ESMTP id 5E72E68A403;
	Wed, 10 Feb 2021 07:08:41 +0200 (EET)
X-Original-To: ffmpeg-devel@ffmpeg.org
Delivered-To: ffmpeg-devel@ffmpeg.org
Received: from mga09.intel.com (mga09.intel.com [134.134.136.24])
 by ffbox0-bg.mplayerhq.hu (Postfix) with ESMTPS id B1D2A689FB4
 for <ffmpeg-devel@ffmpeg.org>; Wed, 10 Feb 2021 07:08:39 +0200 (EET)
IronPort-SDR: 
 /1FmQVyzSMaM4ZosHht2JqeQFlhlvjtRso84x9r/x37KDaJMcAV9udsJvxqtQz0Ft3g3+9dCjq
 Itp8OBeJQaGQ==
X-IronPort-AV: E=McAfee;i="6000,8403,9890"; a="182157795"
X-IronPort-AV: E=Sophos;i="5.81,167,1610438400"; d="scan'208";a="182157795"
Received: from fmsmga005.fm.intel.com ([10.253.24.32])
 by orsmga102.jf.intel.com with ESMTP/TLS/ECDHE-RSA-AES256-GCM-SHA384;
 09 Feb 2021 21:08:37 -0800
IronPort-SDR: 
 KgvaWL9I+aAdDjrb7LAt89w9eMkEqVTapZriegl8Q5XK8CJCj6Hbi8Eoqd7l52Id60fJlZ5SG1
 Dk6vjNpTqmJQ==
X-ExtLoop1: 1
X-IronPort-AV: E=Sophos;i="5.81,167,1610438400"; d="scan'208";a="587228198"
Received: from yguo18-skl-u1604.sh.intel.com ([10.239.159.53])
 by fmsmga005.fm.intel.com with ESMTP; 09 Feb 2021 21:08:36 -0800
From: "Guo, Yejun" <yejun.guo@intel.com>
To: ffmpeg-devel@ffmpeg.org
Date: Wed, 10 Feb 2021 12:58:35 +0800
Message-Id: <20210210045835.14868-1-yejun.guo@intel.com>
X-Mailer: git-send-email 2.17.1
Subject: [FFmpeg-devel] [PATCH 06/10] dnn: add color conversion for analytic
	case
X-BeenThere: ffmpeg-devel@ffmpeg.org
X-Mailman-Version: 2.1.20
Precedence: list
List-Id: FFmpeg development discussions and patches <ffmpeg-devel.ffmpeg.org>
List-Unsubscribe: <https://ffmpeg.org/mailman/options/ffmpeg-devel>,
 <mailto:ffmpeg-devel-request@ffmpeg.org?subject=unsubscribe>
List-Archive: <https://ffmpeg.org/pipermail/ffmpeg-devel>
List-Post: <mailto:ffmpeg-devel@ffmpeg.org>
List-Help: <mailto:ffmpeg-devel-request@ffmpeg.org?subject=help>
List-Subscribe: <https://ffmpeg.org/mailman/listinfo/ffmpeg-devel>,
 <mailto:ffmpeg-devel-request@ffmpeg.org?subject=subscribe>
Reply-To: FFmpeg development discussions and patches <ffmpeg-devel@ffmpeg.org>
Cc: yejun.guo@intel.com
MIME-Version: 1.0
Errors-To: ffmpeg-devel-bounces@ffmpeg.org
Sender: "ffmpeg-devel" <ffmpeg-devel-bounces@ffmpeg.org>

Signed-off-by: Guo, Yejun <yejun.guo@intel.com>
---
 libavfilter/dnn/dnn_backend_native.c   |  2 +-
 libavfilter/dnn/dnn_backend_openvino.c | 23 ++++++++++-
 libavfilter/dnn/dnn_backend_tf.c       |  2 +-
 libavfilter/dnn/dnn_io_proc.c          | 56 +++++++++++++++++++++++++-
 libavfilter/dnn/dnn_io_proc.h          |  2 +-
 5 files changed, 80 insertions(+), 5 deletions(-)

diff --git a/libavfilter/dnn/dnn_backend_native.c b/libavfilter/dnn/dnn_backend_native.c
index be6451367a..3bc253c1ad 100644
--- a/libavfilter/dnn/dnn_backend_native.c
+++ b/libavfilter/dnn/dnn_backend_native.c
@@ -321,7 +321,7 @@ static DNNReturnType execute_model_native(const DNNModel *model, const char *inp
         if (native_model->model->pre_proc != NULL) {
             native_model->model->pre_proc(in_frame, &input, native_model->model->filter_ctx);
         } else {
-            ff_proc_from_frame_to_dnn(in_frame, &input, ctx);
+            ff_proc_from_frame_to_dnn(in_frame, &input, native_model->model->func_type, ctx);
         }
     }
 
diff --git a/libavfilter/dnn/dnn_backend_openvino.c b/libavfilter/dnn/dnn_backend_openvino.c
index 7c1abb3eeb..cca155a52c 100644
--- a/libavfilter/dnn/dnn_backend_openvino.c
+++ b/libavfilter/dnn/dnn_backend_openvino.c
@@ -99,6 +99,8 @@ static DNNDataType precision_to_datatype(precision_e precision)
     {
     case FP32:
         return DNN_FLOAT;
+    case U8:
+        return DNN_UINT8;
     default:
         av_assert0(!"not supported yet.");
         return DNN_FLOAT;
@@ -111,6 +113,8 @@ static int get_datatype_size(DNNDataType dt)
     {
     case DNN_FLOAT:
         return sizeof(float);
+    case DNN_UINT8:
+        return sizeof(uint8_t);
     default:
         av_assert0(!"not supported yet.");
         return 1;
@@ -152,6 +156,9 @@ static DNNReturnType fill_model_input_ov(OVModel *ov_model, RequestItem *request
     input.channels = dims.dims[1];
     input.data = blob_buffer.buffer;
     input.dt = precision_to_datatype(precision);
+    // all models in openvino open model zoo use BGR as input,
+    // change to be an option when necessary.
+    input.order = DCO_BGR;
 
     av_assert0(request->task_count <= dims.dims[0]);
     for (int i = 0; i < request->task_count; ++i) {
@@ -160,7 +167,7 @@ static DNNReturnType fill_model_input_ov(OVModel *ov_model, RequestItem *request
             if (ov_model->model->pre_proc != NULL) {
                 ov_model->model->pre_proc(task->in_frame, &input, ov_model->model->filter_ctx);
             } else {
-                ff_proc_from_frame_to_dnn(task->in_frame, &input, ctx);
+                ff_proc_from_frame_to_dnn(task->in_frame, &input, ov_model->model->func_type, ctx);
             }
         }
         input.data = (uint8_t *)input.data
@@ -290,6 +297,20 @@ static DNNReturnType init_model_ov(OVModel *ov_model, const char *input_name, co
         goto err;
     }
 
+    // all models in openvino open model zoo use BGR with range [0.0f, 255.0f] as input,
+    // we don't have a AVPixelFormat to descibe it, so we'll use AV_PIX_FMT_BGR24 and
+    // ask openvino to do the conversion internally.
+    // the current supported SR model (frame processing) is generated from tensorflow model,
+    // and its input is Y channel as float with range [0.0f, 1.0f], so do not set for this case.
+    // TODO: we need to get a final clear&general solution with all backends/formats considered.
+    if (ov_model->model->func_type != DFT_PROCESS_FRAME) {
+        status = ie_network_set_input_precision(ov_model->network, input_name, U8);
+        if (status != OK) {
+            av_log(ctx, AV_LOG_ERROR, "Failed to set input precision as U8 for %s\n", input_name);
+            return DNN_ERROR;
+        }
+    }
+
     status = ie_core_load_network(ov_model->core, ov_model->network, ctx->options.device_type, &config, &ov_model->exe_network);
     if (status != OK) {
         av_log(ctx, AV_LOG_ERROR, "Failed to load OpenVINO model network\n");
diff --git a/libavfilter/dnn/dnn_backend_tf.c b/libavfilter/dnn/dnn_backend_tf.c
index e7e5f221f3..750a476726 100644
--- a/libavfilter/dnn/dnn_backend_tf.c
+++ b/libavfilter/dnn/dnn_backend_tf.c
@@ -744,7 +744,7 @@ static DNNReturnType execute_model_tf(const DNNModel *model, const char *input_n
         if (tf_model->model->pre_proc != NULL) {
             tf_model->model->pre_proc(in_frame, &input, tf_model->model->filter_ctx);
         } else {
-            ff_proc_from_frame_to_dnn(in_frame, &input, ctx);
+            ff_proc_from_frame_to_dnn(in_frame, &input, tf_model->model->func_type, ctx);
         }
     }
 
diff --git a/libavfilter/dnn/dnn_io_proc.c b/libavfilter/dnn/dnn_io_proc.c
index bee1423342..e104cc5064 100644
--- a/libavfilter/dnn/dnn_io_proc.c
+++ b/libavfilter/dnn/dnn_io_proc.c
@@ -21,6 +21,7 @@
 #include "dnn_io_proc.h"
 #include "libavutil/imgutils.h"
 #include "libswscale/swscale.h"
+#include "libavutil/avassert.h"
 
 DNNReturnType ff_proc_from_dnn_to_frame(AVFrame *frame, DNNData *output, void *log_ctx)
 {
@@ -92,7 +93,7 @@ DNNReturnType ff_proc_from_dnn_to_frame(AVFrame *frame, DNNData *output, void *l
     return DNN_SUCCESS;
 }
 
-DNNReturnType ff_proc_from_frame_to_dnn(AVFrame *frame, DNNData *input, void *log_ctx)
+static DNNReturnType proc_from_frame_to_dnn_frameprocessing(AVFrame *frame, DNNData *input, void *log_ctx)
 {
     struct SwsContext *sws_ctx;
     int bytewidth = av_image_get_linesize(frame->format, frame->width, 0);
@@ -163,3 +164,56 @@ DNNReturnType ff_proc_from_frame_to_dnn(AVFrame *frame, DNNData *input, void *lo
 
     return DNN_SUCCESS;
 }
+
+static enum AVPixelFormat get_pixel_format(DNNData *data)
+{
+    if (data->dt == DNN_UINT8 && data->order == DCO_BGR) {
+        return AV_PIX_FMT_BGR24;
+    }
+
+    av_assert0(!"not supported yet.\n");
+    return AV_PIX_FMT_BGR24;
+}
+
+static DNNReturnType proc_from_frame_to_dnn_analytics(AVFrame *frame, DNNData *input, void *log_ctx)
+{
+    struct SwsContext *sws_ctx;
+    int linesizes[4];
+    enum AVPixelFormat fmt = get_pixel_format(input);
+    sws_ctx = sws_getContext(frame->width, frame->height, frame->format,
+                             input->width, input->height, fmt,
+                             SWS_FAST_BILINEAR, NULL, NULL, NULL);
+    if (!sws_ctx) {
+        av_log(log_ctx, AV_LOG_ERROR, "Impossible to create scale context for the conversion "
+            "fmt:%s s:%dx%d -> fmt:%s s:%dx%d\n",
+            av_get_pix_fmt_name(frame->format), frame->width, frame->height,
+            av_get_pix_fmt_name(fmt), input->width, input->height);
+        return DNN_ERROR;
+    }
+
+    if (av_image_fill_linesizes(linesizes, fmt, input->width) < 0) {
+        av_log(log_ctx, AV_LOG_ERROR, "unable to get linesizes with av_image_fill_linesizes");
+        sws_freeContext(sws_ctx);
+        return DNN_ERROR;
+    }
+
+    sws_scale(sws_ctx, (const uint8_t *const *)frame->data, frame->linesize, 0, frame->height,
+                       (uint8_t *const *)(&input->data), linesizes);
+
+    sws_freeContext(sws_ctx);
+    return DNN_SUCCESS;
+}
+
+DNNReturnType ff_proc_from_frame_to_dnn(AVFrame *frame, DNNData *input, DNNFunctionType func_type, void *log_ctx)
+{
+    switch (func_type)
+    {
+    case DFT_PROCESS_FRAME:
+        return proc_from_frame_to_dnn_frameprocessing(frame, input, log_ctx);
+    case DFT_ANALYTICS_DETECT:
+        return proc_from_frame_to_dnn_analytics(frame, input, log_ctx);
+    default:
+        avpriv_report_missing_feature(log_ctx, "model function type %d", func_type);
+        return DNN_ERROR;
+    }
+}
diff --git a/libavfilter/dnn/dnn_io_proc.h b/libavfilter/dnn/dnn_io_proc.h
index 6a410ccc7b..91ad3cb261 100644
--- a/libavfilter/dnn/dnn_io_proc.h
+++ b/libavfilter/dnn/dnn_io_proc.h
@@ -30,7 +30,7 @@
 #include "../dnn_interface.h"
 #include "libavutil/frame.h"
 
-DNNReturnType ff_proc_from_frame_to_dnn(AVFrame *frame, DNNData *input, void *log_ctx);
+DNNReturnType ff_proc_from_frame_to_dnn(AVFrame *frame, DNNData *input, DNNFunctionType func_type, void *log_ctx);
 DNNReturnType ff_proc_from_dnn_to_frame(AVFrame *frame, DNNData *output, void *log_ctx);
 
 #endif