From patchwork Wed Feb 10 09:34:23 2021
Content-Type: text/plain; charset="utf-8"
MIME-Version: 1.0
Content-Transfer-Encoding: 7bit
X-Patchwork-Submitter: "Guo, Yejun" <yejun.guo@intel.com>
X-Patchwork-Id: 25540
Return-Path: <ffmpeg-devel-bounces@ffmpeg.org>
X-Original-To: patchwork@ffaux-bg.ffmpeg.org
Delivered-To: patchwork@ffaux-bg.ffmpeg.org
Received: from ffbox0-bg.mplayerhq.hu (ffbox0-bg.ffmpeg.org [79.124.17.100])
	by ffaux.localdomain (Postfix) with ESMTP id 545CF449CE2
	for <patchwork@ffaux-bg.ffmpeg.org>; Wed, 10 Feb 2021 11:44:46 +0200 (EET)
Received: from [127.0.1.1] (localhost [127.0.0.1])
	by ffbox0-bg.mplayerhq.hu (Postfix) with ESMTP id 2F06A68A403;
	Wed, 10 Feb 2021 11:44:46 +0200 (EET)
X-Original-To: ffmpeg-devel@ffmpeg.org
Delivered-To: ffmpeg-devel@ffmpeg.org
Received: from mga01.intel.com (mga01.intel.com [192.55.52.88])
 by ffbox0-bg.mplayerhq.hu (Postfix) with ESMTPS id 7496668A31F
 for <ffmpeg-devel@ffmpeg.org>; Wed, 10 Feb 2021 11:44:39 +0200 (EET)
IronPort-SDR: 
 0JKN0nYfLxTqjNX0u5QaQJYCfzRJJHdNTb0AJRyo/m24vNOc4wCMJoQcRyYC1+Uu3VwgarDUIw
 Yc2jxjJVTNAg==
X-IronPort-AV: E=McAfee;i="6000,8403,9890"; a="201144850"
X-IronPort-AV: E=Sophos;i="5.81,167,1610438400"; d="scan'208";a="201144850"
Received: from fmsmga001.fm.intel.com ([10.253.24.23])
 by fmsmga101.fm.intel.com with ESMTP/TLS/ECDHE-RSA-AES256-GCM-SHA384;
 10 Feb 2021 01:44:35 -0800
IronPort-SDR: 
 8tfuoLDFulf8aUasol9PsGFLgFKPB5aYrCAxrFaDl5NdwYjEkHE8K7tH3UyM6HOtEwqfs37uYl
 NJZEJ52EdtiQ==
X-ExtLoop1: 1
X-IronPort-AV: E=Sophos;i="5.81,167,1610438400"; d="scan'208";a="488706545"
Received: from yguo18-skl-u1604.sh.intel.com ([10.239.159.53])
 by fmsmga001.fm.intel.com with ESMTP; 10 Feb 2021 01:44:33 -0800
From: "Guo, Yejun" <yejun.guo@intel.com>
To: ffmpeg-devel@ffmpeg.org
Date: Wed, 10 Feb 2021 17:34:23 +0800
Message-Id: <20210210093432.9135-1-yejun.guo@intel.com>
X-Mailer: git-send-email 2.17.1
Subject: [FFmpeg-devel] [PATCH V2 01/10] dnn_backend_openvino.c: fix
	mismatch between ffmpeg(NHWC) and openvino(NCHW)
X-BeenThere: ffmpeg-devel@ffmpeg.org
X-Mailman-Version: 2.1.20
Precedence: list
List-Id: FFmpeg development discussions and patches <ffmpeg-devel.ffmpeg.org>
List-Unsubscribe: <https://ffmpeg.org/mailman/options/ffmpeg-devel>,
 <mailto:ffmpeg-devel-request@ffmpeg.org?subject=unsubscribe>
List-Archive: <https://ffmpeg.org/pipermail/ffmpeg-devel>
List-Post: <mailto:ffmpeg-devel@ffmpeg.org>
List-Help: <mailto:ffmpeg-devel-request@ffmpeg.org?subject=help>
List-Subscribe: <https://ffmpeg.org/mailman/listinfo/ffmpeg-devel>,
 <mailto:ffmpeg-devel-request@ffmpeg.org?subject=subscribe>
Reply-To: FFmpeg development discussions and patches <ffmpeg-devel@ffmpeg.org>
Cc: yejun.guo@intel.com
MIME-Version: 1.0
Errors-To: ffmpeg-devel-bounces@ffmpeg.org
Sender: "ffmpeg-devel" <ffmpeg-devel-bounces@ffmpeg.org>

Signed-off-by: Guo, Yejun <yejun.guo@intel.com>
---
 libavfilter/dnn/dnn_backend_openvino.c | 21 +++++++++++++++++----
 1 file changed, 17 insertions(+), 4 deletions(-)

diff --git a/libavfilter/dnn/dnn_backend_openvino.c b/libavfilter/dnn/dnn_backend_openvino.c
index beca256390..48f5ba50be 100644
--- a/libavfilter/dnn/dnn_backend_openvino.c
+++ b/libavfilter/dnn/dnn_backend_openvino.c
@@ -250,7 +250,7 @@ static void infer_completion_callback(void *args)
     }
 }
 
-static DNNReturnType init_model_ov(OVModel *ov_model)
+static DNNReturnType init_model_ov(OVModel *ov_model, const char *input_name, const char *output_name)
 {
     OVContext *ctx = &ov_model->ctx;
     IEStatusCode status;
@@ -276,6 +276,19 @@ static DNNReturnType init_model_ov(OVModel *ov_model)
             goto err;
     }
 
+    // The order of dims in the openvino is fixed and it is always NCHW for 4-D data.
+    // while we pass NHWC data from FFmpeg to openvino
+    status = ie_network_set_input_layout(ov_model->network, input_name, NHWC);
+    if (status != OK) {
+        av_log(ctx, AV_LOG_ERROR, "Failed to set layout as NHWC for input %s\n", input_name);
+        goto err;
+    }
+    status = ie_network_set_output_layout(ov_model->network, output_name, NHWC);
+    if (status != OK) {
+        av_log(ctx, AV_LOG_ERROR, "Failed to set layout as NHWC for output %s\n", output_name);
+        goto err;
+    }
+
     status = ie_core_load_network(ov_model->core, ov_model->network, ctx->options.device_type, &config, &ov_model->exe_network);
     if (status != OK) {
         av_log(ctx, AV_LOG_ERROR, "Failed to load OpenVINO model network\n");
@@ -482,7 +495,7 @@ static DNNReturnType get_output_ov(void *model, const char *input_name, int inpu
     }
 
     if (!ov_model->exe_network) {
-        if (init_model_ov(ov_model) != DNN_SUCCESS) {
+        if (init_model_ov(ov_model, input_name, output_name) != DNN_SUCCESS) {
             av_log(ctx, AV_LOG_ERROR, "Failed init OpenVINO exectuable network or inference request\n");
             return DNN_ERROR;
         }
@@ -598,7 +611,7 @@ DNNReturnType ff_dnn_execute_model_ov(const DNNModel *model, const char *input_n
     }
 
     if (!ov_model->exe_network) {
-        if (init_model_ov(ov_model) != DNN_SUCCESS) {
+        if (init_model_ov(ov_model, input_name, output_names[0]) != DNN_SUCCESS) {
             av_log(ctx, AV_LOG_ERROR, "Failed init OpenVINO exectuable network or inference request\n");
             return DNN_ERROR;
         }
@@ -645,7 +658,7 @@ DNNReturnType ff_dnn_execute_model_async_ov(const DNNModel *model, const char *i
     }
 
     if (!ov_model->exe_network) {
-        if (init_model_ov(ov_model) != DNN_SUCCESS) {
+        if (init_model_ov(ov_model, input_name, output_names[0]) != DNN_SUCCESS) {
             av_log(ctx, AV_LOG_ERROR, "Failed init OpenVINO exectuable network or inference request\n");
             return DNN_ERROR;
         }

From patchwork Wed Feb 10 09:34:24 2021
Content-Type: text/plain; charset="utf-8"
MIME-Version: 1.0
Content-Transfer-Encoding: 7bit
X-Patchwork-Submitter: "Guo, Yejun" <yejun.guo@intel.com>
X-Patchwork-Id: 25539
Return-Path: <ffmpeg-devel-bounces@ffmpeg.org>
X-Original-To: patchwork@ffaux-bg.ffmpeg.org
Delivered-To: patchwork@ffaux-bg.ffmpeg.org
Received: from ffbox0-bg.mplayerhq.hu (ffbox0-bg.ffmpeg.org [79.124.17.100])
	by ffaux.localdomain (Postfix) with ESMTP id 49315449CE2
	for <patchwork@ffaux-bg.ffmpeg.org>; Wed, 10 Feb 2021 11:44:44 +0200 (EET)
Received: from [127.0.1.1] (localhost [127.0.0.1])
	by ffbox0-bg.mplayerhq.hu (Postfix) with ESMTP id 2477168A2BB;
	Wed, 10 Feb 2021 11:44:44 +0200 (EET)
X-Original-To: ffmpeg-devel@ffmpeg.org
Delivered-To: ffmpeg-devel@ffmpeg.org
Received: from mga01.intel.com (mga01.intel.com [192.55.52.88])
 by ffbox0-bg.mplayerhq.hu (Postfix) with ESMTPS id B8166689EEC
 for <ffmpeg-devel@ffmpeg.org>; Wed, 10 Feb 2021 11:44:37 +0200 (EET)
IronPort-SDR: 
 476FSrXxvQi4+wjfEqV0ulKo4Ssu7fAZfd5thCAcox7VzNnyBggW5/gv6v1Qewh6Al5DXkSr7J
 eSRHp5ssMaNQ==
X-IronPort-AV: E=McAfee;i="6000,8403,9890"; a="201144853"
X-IronPort-AV: E=Sophos;i="5.81,167,1610438400"; d="scan'208";a="201144853"
Received: from fmsmga001.fm.intel.com ([10.253.24.23])
 by fmsmga101.fm.intel.com with ESMTP/TLS/ECDHE-RSA-AES256-GCM-SHA384;
 10 Feb 2021 01:44:35 -0800
IronPort-SDR: 
 80ZjCPSrn0wbPWHVGHpSPkNkXqsh7g9ZlTRXtCbxc4gXAxsqm9DaEfh4uoOvGdD6G3qvxJkgV7
 tWNSqlUacqaQ==
X-ExtLoop1: 1
X-IronPort-AV: E=Sophos;i="5.81,167,1610438400"; d="scan'208";a="488706552"
Received: from yguo18-skl-u1604.sh.intel.com ([10.239.159.53])
 by fmsmga001.fm.intel.com with ESMTP; 10 Feb 2021 01:44:34 -0800
From: "Guo, Yejun" <yejun.guo@intel.com>
To: ffmpeg-devel@ffmpeg.org
Date: Wed, 10 Feb 2021 17:34:24 +0800
Message-Id: <20210210093432.9135-2-yejun.guo@intel.com>
X-Mailer: git-send-email 2.17.1
In-Reply-To: <20210210093432.9135-1-yejun.guo@intel.com>
References: <20210210093432.9135-1-yejun.guo@intel.com>
Subject: [FFmpeg-devel] [PATCH V2 02/10] dnn_backend_openvino.c: fix
	multi-thread issue for async execution
X-BeenThere: ffmpeg-devel@ffmpeg.org
X-Mailman-Version: 2.1.20
Precedence: list
List-Id: FFmpeg development discussions and patches <ffmpeg-devel.ffmpeg.org>
List-Unsubscribe: <https://ffmpeg.org/mailman/options/ffmpeg-devel>,
 <mailto:ffmpeg-devel-request@ffmpeg.org?subject=unsubscribe>
List-Archive: <https://ffmpeg.org/pipermail/ffmpeg-devel>
List-Post: <mailto:ffmpeg-devel@ffmpeg.org>
List-Help: <mailto:ffmpeg-devel-request@ffmpeg.org?subject=help>
List-Subscribe: <https://ffmpeg.org/mailman/listinfo/ffmpeg-devel>,
 <mailto:ffmpeg-devel-request@ffmpeg.org?subject=subscribe>
Reply-To: FFmpeg development discussions and patches <ffmpeg-devel@ffmpeg.org>
Cc: yejun.guo@intel.com
MIME-Version: 1.0
Errors-To: ffmpeg-devel-bounces@ffmpeg.org
Sender: "ffmpeg-devel" <ffmpeg-devel-bounces@ffmpeg.org>

once we mark done for the task in function infer_completion_callback,
the task is possible to be release in function ff_dnn_get_async_result_ov
in another thread just after it, so we need to record request queue
first, instead of using task->ov_model->request_queue later.

Signed-off-by: Guo, Yejun <yejun.guo@intel.com>
---
 libavfilter/dnn/dnn_backend_openvino.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/libavfilter/dnn/dnn_backend_openvino.c b/libavfilter/dnn/dnn_backend_openvino.c
index 48f5ba50be..ed41b721fc 100644
--- a/libavfilter/dnn/dnn_backend_openvino.c
+++ b/libavfilter/dnn/dnn_backend_openvino.c
@@ -178,6 +178,7 @@ static void infer_completion_callback(void *args)
     IEStatusCode status;
     RequestItem *request = args;
     TaskItem *task = request->tasks[0];
+    SafeQueue *requestq = task->ov_model->request_queue;
     ie_blob_t *output_blob = NULL;
     ie_blob_buffer_t blob_buffer;
     DNNData output;
@@ -243,7 +244,7 @@ static void infer_completion_callback(void *args)
     request->task_count = 0;
 
     if (task->async) {
-        if (ff_safe_queue_push_back(task->ov_model->request_queue, request) < 0) {
+        if (ff_safe_queue_push_back(requestq, request) < 0) {
             av_log(ctx, AV_LOG_ERROR, "Failed to push back request_queue.\n");
             return;
         }

From patchwork Wed Feb 10 09:34:25 2021
Content-Type: text/plain; charset="utf-8"
MIME-Version: 1.0
Content-Transfer-Encoding: 7bit
X-Patchwork-Submitter: "Guo, Yejun" <yejun.guo@intel.com>
X-Patchwork-Id: 25541
Return-Path: <ffmpeg-devel-bounces@ffmpeg.org>
X-Original-To: patchwork@ffaux-bg.ffmpeg.org
Delivered-To: patchwork@ffaux-bg.ffmpeg.org
Received: from ffbox0-bg.mplayerhq.hu (ffbox0-bg.ffmpeg.org [79.124.17.100])
	by ffaux.localdomain (Postfix) with ESMTP id 63BD8449CE2
	for <patchwork@ffaux-bg.ffmpeg.org>; Wed, 10 Feb 2021 11:44:48 +0200 (EET)
Received: from [127.0.1.1] (localhost [127.0.0.1])
	by ffbox0-bg.mplayerhq.hu (Postfix) with ESMTP id 49DC268A3BF;
	Wed, 10 Feb 2021 11:44:48 +0200 (EET)
X-Original-To: ffmpeg-devel@ffmpeg.org
Delivered-To: ffmpeg-devel@ffmpeg.org
Received: from mga01.intel.com (mga01.intel.com [192.55.52.88])
 by ffbox0-bg.mplayerhq.hu (Postfix) with ESMTPS id 88441680821
 for <ffmpeg-devel@ffmpeg.org>; Wed, 10 Feb 2021 11:44:40 +0200 (EET)
IronPort-SDR: 
 UrJ30wbffFnSTqBAeec0YK4H3sRW+MTEX3Wn4Z4j235Jw0ZRjS/T+uGSTJPo0o5BdfHXAthLnU
 nJpwKAf32+gg==
X-IronPort-AV: E=McAfee;i="6000,8403,9890"; a="201144865"
X-IronPort-AV: E=Sophos;i="5.81,167,1610438400"; d="scan'208";a="201144865"
Received: from fmsmga001.fm.intel.com ([10.253.24.23])
 by fmsmga101.fm.intel.com with ESMTP/TLS/ECDHE-RSA-AES256-GCM-SHA384;
 10 Feb 2021 01:44:36 -0800
IronPort-SDR: 
 XMVoml1b0Tlcotl1cOtBrWwMFkZcDlaS1AAgftbm+cAtVBfBGb8mf2KKfh3lIOJsrblbU2HzrD
 zQbaaZvavhiw==
X-ExtLoop1: 1
X-IronPort-AV: E=Sophos;i="5.81,167,1610438400"; d="scan'208";a="488706559"
Received: from yguo18-skl-u1604.sh.intel.com ([10.239.159.53])
 by fmsmga001.fm.intel.com with ESMTP; 10 Feb 2021 01:44:35 -0800
From: "Guo, Yejun" <yejun.guo@intel.com>
To: ffmpeg-devel@ffmpeg.org
Date: Wed, 10 Feb 2021 17:34:25 +0800
Message-Id: <20210210093432.9135-3-yejun.guo@intel.com>
X-Mailer: git-send-email 2.17.1
In-Reply-To: <20210210093432.9135-1-yejun.guo@intel.com>
References: <20210210093432.9135-1-yejun.guo@intel.com>
Subject: [FFmpeg-devel] [PATCH V2 03/10] dnn: extract common functions used
	by different filters
X-BeenThere: ffmpeg-devel@ffmpeg.org
X-Mailman-Version: 2.1.20
Precedence: list
List-Id: FFmpeg development discussions and patches <ffmpeg-devel.ffmpeg.org>
List-Unsubscribe: <https://ffmpeg.org/mailman/options/ffmpeg-devel>,
 <mailto:ffmpeg-devel-request@ffmpeg.org?subject=unsubscribe>
List-Archive: <https://ffmpeg.org/pipermail/ffmpeg-devel>
List-Post: <mailto:ffmpeg-devel@ffmpeg.org>
List-Help: <mailto:ffmpeg-devel-request@ffmpeg.org?subject=help>
List-Subscribe: <https://ffmpeg.org/mailman/listinfo/ffmpeg-devel>,
 <mailto:ffmpeg-devel-request@ffmpeg.org?subject=subscribe>
Reply-To: FFmpeg development discussions and patches <ffmpeg-devel@ffmpeg.org>
Cc: yejun.guo@intel.com
MIME-Version: 1.0
Errors-To: ffmpeg-devel-bounces@ffmpeg.org
Sender: "ffmpeg-devel" <ffmpeg-devel-bounces@ffmpeg.org>

Signed-off-by: Guo, Yejun <yejun.guo@intel.com>
---
 libavfilter/Makefile            |   1 +
 libavfilter/dnn_filter_common.c | 106 ++++++++++++++++++++++++++++++++
 libavfilter/dnn_filter_common.h |  59 ++++++++++++++++++
 libavfilter/vf_derain.c         |  46 +++-----------
 libavfilter/vf_dnn_processing.c |  93 +++++-----------------------
 libavfilter/vf_sr.c             |  53 ++++------------
 6 files changed, 201 insertions(+), 157 deletions(-)
 create mode 100644 libavfilter/dnn_filter_common.c
 create mode 100644 libavfilter/dnn_filter_common.h

diff --git a/libavfilter/Makefile b/libavfilter/Makefile
index 3ec28df411..b43933be64 100644
--- a/libavfilter/Makefile
+++ b/libavfilter/Makefile
@@ -27,6 +27,7 @@ OBJS-$(HAVE_THREADS)                         += pthread.o
 # subsystems
 OBJS-$(CONFIG_QSVVPP)                        += qsvvpp.o
 OBJS-$(CONFIG_SCENE_SAD)                     += scene_sad.o
+OBJS-$(CONFIG_DNN)                           += dnn_filter_common.o
 include $(SRC_PATH)/libavfilter/dnn/Makefile
 
 # audio filters
diff --git a/libavfilter/dnn_filter_common.c b/libavfilter/dnn_filter_common.c
new file mode 100644
index 0000000000..5d0d7d3b90
--- /dev/null
+++ b/libavfilter/dnn_filter_common.c
@@ -0,0 +1,106 @@
+/*
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include "dnn_filter_common.h"
+
+int ff_dnn_init(DnnContext *ctx, AVFilterContext *filter_ctx)
+{
+    if (!ctx->model_filename) {
+        av_log(filter_ctx, AV_LOG_ERROR, "model file for network is not specified\n");
+        return AVERROR(EINVAL);
+    }
+    if (!ctx->model_inputname) {
+        av_log(filter_ctx, AV_LOG_ERROR, "input name of the model network is not specified\n");
+        return AVERROR(EINVAL);
+    }
+    if (!ctx->model_outputname) {
+        av_log(filter_ctx, AV_LOG_ERROR, "output name of the model network is not specified\n");
+        return AVERROR(EINVAL);
+    }
+
+    ctx->dnn_module = ff_get_dnn_module(ctx->backend_type);
+    if (!ctx->dnn_module) {
+        av_log(filter_ctx, AV_LOG_ERROR, "could not create DNN module for requested backend\n");
+        return AVERROR(ENOMEM);
+    }
+    if (!ctx->dnn_module->load_model) {
+        av_log(filter_ctx, AV_LOG_ERROR, "load_model for network is not specified\n");
+        return AVERROR(EINVAL);
+    }
+
+    ctx->model = (ctx->dnn_module->load_model)(ctx->model_filename, ctx->backend_options, filter_ctx);
+    if (!ctx->model) {
+        av_log(filter_ctx, AV_LOG_ERROR, "could not load DNN model\n");
+        return AVERROR(EINVAL);
+    }
+
+    if (!ctx->dnn_module->execute_model_async && ctx->async) {
+        ctx->async = 0;
+        av_log(filter_ctx, AV_LOG_WARNING, "this backend does not support async execution, roll back to sync.\n");
+    }
+
+#if !HAVE_PTHREAD_CANCEL
+    if (ctx->async) {
+        ctx->async = 0;
+        av_log(filter_ctx, AV_LOG_WARNING, "pthread is not supported, roll back to sync.\n");
+    }
+#endif
+
+    return 0;
+}
+
+DNNReturnType ff_dnn_get_input(DnnContext *ctx, DNNData *input)
+{
+    return ctx->model->get_input(ctx->model->model, input, ctx->model_inputname);
+}
+
+DNNReturnType ff_dnn_get_output(DnnContext *ctx, int input_width, int input_height, int *output_width, int *output_height)
+{
+    return ctx->model->get_output(ctx->model->model, ctx->model_inputname, input_width, input_height,
+                                    ctx->model_outputname, output_width, output_height);
+}
+
+DNNReturnType ff_dnn_execute_model(DnnContext *ctx, AVFrame *in_frame, AVFrame *out_frame)
+{
+    return (ctx->dnn_module->execute_model)(ctx->model, ctx->model_inputname, in_frame,
+                                            (const char **)&ctx->model_outputname, 1, out_frame);
+}
+
+DNNReturnType ff_dnn_execute_model_async(DnnContext *ctx, AVFrame *in_frame, AVFrame *out_frame)
+{
+    return (ctx->dnn_module->execute_model_async)(ctx->model, ctx->model_inputname, in_frame,
+                                                  (const char **)&ctx->model_outputname, 1, out_frame);
+}
+
+DNNAsyncStatusType ff_dnn_get_async_result(DnnContext *ctx, AVFrame **in_frame, AVFrame **out_frame)
+{
+    return (ctx->dnn_module->get_async_result)(ctx->model, in_frame, out_frame);
+}
+
+DNNReturnType ff_dnn_flush(DnnContext *ctx)
+{
+    return (ctx->dnn_module->flush)(ctx->model);
+}
+
+void ff_dnn_uninit(DnnContext *ctx)
+{
+    if (ctx->dnn_module) {
+        (ctx->dnn_module->free_model)(&ctx->model);
+        av_freep(&ctx->dnn_module);
+    }
+}
diff --git a/libavfilter/dnn_filter_common.h b/libavfilter/dnn_filter_common.h
new file mode 100644
index 0000000000..ab49a992ed
--- /dev/null
+++ b/libavfilter/dnn_filter_common.h
@@ -0,0 +1,59 @@
+/*
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+/**
+ * @file
+ * common functions for the dnn based filters
+ */
+
+#ifndef AVFILTER_DNN_FILTER_COMMON_H
+#define AVFILTER_DNN_FILTER_COMMON_H
+
+#include "dnn_interface.h"
+
+typedef struct DnnContext {
+    char *model_filename;
+    DNNBackendType backend_type;
+    char *model_inputname;
+    char *model_outputname;
+    char *backend_options;
+    int async;
+
+    DNNModule *dnn_module;
+    DNNModel *model;
+} DnnContext;
+
+#define DNN_COMMON_OPTIONS \
+    { "model",              "path to model file",         OFFSET(model_filename),   AV_OPT_TYPE_STRING,    { .str = NULL }, 0, 0, FLAGS },\
+    { "input",              "input name of the model",    OFFSET(model_inputname),  AV_OPT_TYPE_STRING,    { .str = NULL }, 0, 0, FLAGS },\
+    { "output",             "output name of the model",   OFFSET(model_outputname), AV_OPT_TYPE_STRING,    { .str = NULL }, 0, 0, FLAGS },\
+    { "backend_configs",    "backend configs",            OFFSET(backend_options),  AV_OPT_TYPE_STRING,    { .str = NULL }, 0, 0, FLAGS },\
+    { "options",            "backend configs",            OFFSET(backend_options),  AV_OPT_TYPE_STRING,    { .str = NULL }, 0, 0, FLAGS },\
+    { "async",              "use DNN async inference",    OFFSET(async),            AV_OPT_TYPE_BOOL,      { .i64 = 1},     0, 1, FLAGS},
+
+
+int ff_dnn_init(DnnContext *ctx, AVFilterContext *filter_ctx);
+DNNReturnType ff_dnn_get_input(DnnContext *ctx, DNNData *input);
+DNNReturnType ff_dnn_get_output(DnnContext *ctx, int input_width, int input_height, int *output_width, int *output_height);
+DNNReturnType ff_dnn_execute_model(DnnContext *ctx, AVFrame *in_frame, AVFrame *out_frame);
+DNNReturnType ff_dnn_execute_model_async(DnnContext *ctx, AVFrame *in_frame, AVFrame *out_frame);
+DNNAsyncStatusType ff_dnn_get_async_result(DnnContext *ctx, AVFrame **in_frame, AVFrame **out_frame);
+DNNReturnType ff_dnn_flush(DnnContext *ctx);
+void ff_dnn_uninit(DnnContext *ctx);
+
+#endif
diff --git a/libavfilter/vf_derain.c b/libavfilter/vf_derain.c
index 77dd401263..ec9853d957 100644
--- a/libavfilter/vf_derain.c
+++ b/libavfilter/vf_derain.c
@@ -27,18 +27,14 @@
 #include "libavformat/avio.h"
 #include "libavutil/opt.h"
 #include "avfilter.h"
-#include "dnn_interface.h"
+#include "dnn_filter_common.h"
 #include "formats.h"
 #include "internal.h"
 
 typedef struct DRContext {
     const AVClass *class;
-
+    DnnContext dnnctx;
     int                filter_type;
-    char              *model_filename;
-    DNNBackendType     backend_type;
-    DNNModule         *dnn_module;
-    DNNModel          *model;
 } DRContext;
 
 #define OFFSET(x) offsetof(DRContext, x)
@@ -47,12 +43,14 @@ static const AVOption derain_options[] = {
     { "filter_type", "filter type(derain/dehaze)",  OFFSET(filter_type),    AV_OPT_TYPE_INT,    { .i64 = 0 },    0, 1, FLAGS, "type" },
     { "derain",      "derain filter flag",          0,                      AV_OPT_TYPE_CONST,  { .i64 = 0 },    0, 0, FLAGS, "type" },
     { "dehaze",      "dehaze filter flag",          0,                      AV_OPT_TYPE_CONST,  { .i64 = 1 },    0, 0, FLAGS, "type" },
-    { "dnn_backend", "DNN backend",                 OFFSET(backend_type),   AV_OPT_TYPE_INT,    { .i64 = 0 },    0, 1, FLAGS, "backend" },
+    { "dnn_backend", "DNN backend",                 OFFSET(dnnctx.backend_type),   AV_OPT_TYPE_INT,    { .i64 = 0 },    0, 1, FLAGS, "backend" },
     { "native",      "native backend flag",         0,                      AV_OPT_TYPE_CONST,  { .i64 = 0 },    0, 0, FLAGS, "backend" },
 #if (CONFIG_LIBTENSORFLOW == 1)
     { "tensorflow",  "tensorflow backend flag",     0,                      AV_OPT_TYPE_CONST,  { .i64 = 1 },    0, 0, FLAGS, "backend" },
 #endif
-    { "model",       "path to model file",          OFFSET(model_filename), AV_OPT_TYPE_STRING, { .str = NULL }, 0, 0, FLAGS },
+    { "model",       "path to model file",          OFFSET(dnnctx.model_filename),   AV_OPT_TYPE_STRING,    { .str = NULL }, 0, 0, FLAGS },
+    { "input",       "input name of the model",     OFFSET(dnnctx.model_inputname),  AV_OPT_TYPE_STRING,    { .str = "x" },  0, 0, FLAGS },
+    { "output",      "output name of the model",    OFFSET(dnnctx.model_outputname), AV_OPT_TYPE_STRING,    { .str = "y" },  0, 0, FLAGS },
     { NULL }
 };
 
@@ -77,7 +75,6 @@ static int filter_frame(AVFilterLink *inlink, AVFrame *in)
     AVFilterLink *outlink = ctx->outputs[0];
     DRContext *dr_context = ctx->priv;
     DNNReturnType dnn_result;
-    const char *model_output_name = "y";
     AVFrame *out;
 
     out = ff_get_video_buffer(outlink, outlink->w, outlink->h);
@@ -88,7 +85,7 @@ static int filter_frame(AVFilterLink *inlink, AVFrame *in)
     }
     av_frame_copy_props(out, in);
 
-    dnn_result = (dr_context->dnn_module->execute_model)(dr_context->model, "x", in, &model_output_name, 1, out);
+    dnn_result = ff_dnn_execute_model(&dr_context->dnnctx, in, out);
     if (dnn_result != DNN_SUCCESS){
         av_log(ctx, AV_LOG_ERROR, "failed to execute model\n");
         av_frame_free(&in);
@@ -103,38 +100,13 @@ static int filter_frame(AVFilterLink *inlink, AVFrame *in)
 static av_cold int init(AVFilterContext *ctx)
 {
     DRContext *dr_context = ctx->priv;
-
-    dr_context->dnn_module = ff_get_dnn_module(dr_context->backend_type);
-    if (!dr_context->dnn_module) {
-        av_log(ctx, AV_LOG_ERROR, "could not create DNN module for requested backend\n");
-        return AVERROR(ENOMEM);
-    }
-    if (!dr_context->model_filename) {
-        av_log(ctx, AV_LOG_ERROR, "model file for network is not specified\n");
-        return AVERROR(EINVAL);
-    }
-    if (!dr_context->dnn_module->load_model) {
-        av_log(ctx, AV_LOG_ERROR, "load_model for network is not specified\n");
-        return AVERROR(EINVAL);
-    }
-
-    dr_context->model = (dr_context->dnn_module->load_model)(dr_context->model_filename, NULL, NULL);
-    if (!dr_context->model) {
-        av_log(ctx, AV_LOG_ERROR, "could not load DNN model\n");
-        return AVERROR(EINVAL);
-    }
-
-    return 0;
+    return ff_dnn_init(&dr_context->dnnctx, ctx);
 }
 
 static av_cold void uninit(AVFilterContext *ctx)
 {
     DRContext *dr_context = ctx->priv;
-
-    if (dr_context->dnn_module) {
-        (dr_context->dnn_module->free_model)(&dr_context->model);
-        av_freep(&dr_context->dnn_module);
-    }
+    ff_dnn_uninit(&dr_context->dnnctx);
 }
 
 static const AVFilterPad derain_inputs[] = {
diff --git a/libavfilter/vf_dnn_processing.c b/libavfilter/vf_dnn_processing.c
index 09e1699b9e..08ebf122c9 100644
--- a/libavfilter/vf_dnn_processing.c
+++ b/libavfilter/vf_dnn_processing.c
@@ -29,7 +29,7 @@
 #include "libavutil/avassert.h"
 #include "libavutil/imgutils.h"
 #include "filters.h"
-#include "dnn_interface.h"
+#include "dnn_filter_common.h"
 #include "formats.h"
 #include "internal.h"
 #include "libswscale/swscale.h"
@@ -37,22 +37,12 @@
 
 typedef struct DnnProcessingContext {
     const AVClass *class;
-
-    char *model_filename;
-    DNNBackendType backend_type;
-    char *model_inputname;
-    char *model_outputname;
-    char *backend_options;
-    int async;
-
-    DNNModule *dnn_module;
-    DNNModel *model;
-
+    DnnContext dnnctx;
     struct SwsContext *sws_uv_scale;
     int sws_uv_height;
 } DnnProcessingContext;
 
-#define OFFSET(x) offsetof(DnnProcessingContext, x)
+#define OFFSET(x) offsetof(DnnProcessingContext, dnnctx.x)
 #define FLAGS AV_OPT_FLAG_FILTERING_PARAM | AV_OPT_FLAG_VIDEO_PARAM
 static const AVOption dnn_processing_options[] = {
     { "dnn_backend", "DNN backend",                OFFSET(backend_type),     AV_OPT_TYPE_INT,       { .i64 = 0 },    INT_MIN, INT_MAX, FLAGS, "backend" },
@@ -63,11 +53,7 @@ static const AVOption dnn_processing_options[] = {
 #if (CONFIG_LIBOPENVINO == 1)
     { "openvino",    "openvino backend flag",      0,                        AV_OPT_TYPE_CONST,     { .i64 = 2 },    0, 0, FLAGS, "backend" },
 #endif
-    { "model",       "path to model file",         OFFSET(model_filename),   AV_OPT_TYPE_STRING,    { .str = NULL }, 0, 0, FLAGS },
-    { "input",       "input name of the model",    OFFSET(model_inputname),  AV_OPT_TYPE_STRING,    { .str = NULL }, 0, 0, FLAGS },
-    { "output",      "output name of the model",   OFFSET(model_outputname), AV_OPT_TYPE_STRING,    { .str = NULL }, 0, 0, FLAGS },
-    { "options",     "backend options",            OFFSET(backend_options),  AV_OPT_TYPE_STRING,    { .str = NULL }, 0, 0, FLAGS },
-    { "async",       "use DNN async inference",    OFFSET(async),            AV_OPT_TYPE_BOOL,      { .i64 = 1},     0, 1, FLAGS},
+    DNN_COMMON_OPTIONS
     { NULL }
 };
 
@@ -76,49 +62,7 @@ AVFILTER_DEFINE_CLASS(dnn_processing);
 static av_cold int init(AVFilterContext *context)
 {
     DnnProcessingContext *ctx = context->priv;
-
-    if (!ctx->model_filename) {
-        av_log(ctx, AV_LOG_ERROR, "model file for network is not specified\n");
-        return AVERROR(EINVAL);
-    }
-    if (!ctx->model_inputname) {
-        av_log(ctx, AV_LOG_ERROR, "input name of the model network is not specified\n");
-        return AVERROR(EINVAL);
-    }
-    if (!ctx->model_outputname) {
-        av_log(ctx, AV_LOG_ERROR, "output name of the model network is not specified\n");
-        return AVERROR(EINVAL);
-    }
-
-    ctx->dnn_module = ff_get_dnn_module(ctx->backend_type);
-    if (!ctx->dnn_module) {
-        av_log(ctx, AV_LOG_ERROR, "could not create DNN module for requested backend\n");
-        return AVERROR(ENOMEM);
-    }
-    if (!ctx->dnn_module->load_model) {
-        av_log(ctx, AV_LOG_ERROR, "load_model for network is not specified\n");
-        return AVERROR(EINVAL);
-    }
-
-    ctx->model = (ctx->dnn_module->load_model)(ctx->model_filename, ctx->backend_options, context);
-    if (!ctx->model) {
-        av_log(ctx, AV_LOG_ERROR, "could not load DNN model\n");
-        return AVERROR(EINVAL);
-    }
-
-    if (!ctx->dnn_module->execute_model_async && ctx->async) {
-        ctx->async = 0;
-        av_log(ctx, AV_LOG_WARNING, "this backend does not support async execution, roll back to sync.\n");
-    }
-
-#if !HAVE_PTHREAD_CANCEL
-    if (ctx->async) {
-        ctx->async = 0;
-        av_log(ctx, AV_LOG_WARNING, "pthread is not supported, roll back to sync.\n");
-    }
-#endif
-
-    return 0;
+    return ff_dnn_init(&ctx->dnnctx, context);
 }
 
 static int query_formats(AVFilterContext *context)
@@ -199,7 +143,7 @@ static int config_input(AVFilterLink *inlink)
     DNNData model_input;
     int check;
 
-    result = ctx->model->get_input(ctx->model->model, &model_input, ctx->model_inputname);
+    result = ff_dnn_get_input(&ctx->dnnctx, &model_input);
     if (result != DNN_SUCCESS) {
         av_log(ctx, AV_LOG_ERROR, "could not get input from the model\n");
         return AVERROR(EIO);
@@ -259,8 +203,7 @@ static int config_output(AVFilterLink *outlink)
     AVFilterLink *inlink = context->inputs[0];
 
     // have a try run in case that the dnn model resize the frame
-    result = ctx->model->get_output(ctx->model->model, ctx->model_inputname, inlink->w, inlink->h,
-                                    ctx->model_outputname, &outlink->w, &outlink->h);
+    result = ff_dnn_get_output(&ctx->dnnctx, inlink->w, inlink->h, &outlink->w, &outlink->h);
     if (result != DNN_SUCCESS) {
         av_log(ctx, AV_LOG_ERROR, "could not get output from the model\n");
         return AVERROR(EIO);
@@ -314,8 +257,7 @@ static int filter_frame(AVFilterLink *inlink, AVFrame *in)
     }
     av_frame_copy_props(out, in);
 
-    dnn_result = (ctx->dnn_module->execute_model)(ctx->model, ctx->model_inputname, in,
-                                                  (const char **)&ctx->model_outputname, 1, out);
+    dnn_result = ff_dnn_execute_model(&ctx->dnnctx, in, out);
     if (dnn_result != DNN_SUCCESS){
         av_log(ctx, AV_LOG_ERROR, "failed to execute model\n");
         av_frame_free(&in);
@@ -376,7 +318,7 @@ static int flush_frame(AVFilterLink *outlink, int64_t pts, int64_t *out_pts)
     int ret;
     DNNAsyncStatusType async_state;
 
-    ret = (ctx->dnn_module->flush)(ctx->model);
+    ret = ff_dnn_flush(&ctx->dnnctx);
     if (ret != DNN_SUCCESS) {
         return -1;
     }
@@ -384,7 +326,7 @@ static int flush_frame(AVFilterLink *outlink, int64_t pts, int64_t *out_pts)
     do {
         AVFrame *in_frame = NULL;
         AVFrame *out_frame = NULL;
-        async_state = (ctx->dnn_module->get_async_result)(ctx->model, &in_frame, &out_frame);
+        async_state = ff_dnn_get_async_result(&ctx->dnnctx, &in_frame, &out_frame);
         if (out_frame) {
             if (isPlanarYUV(in_frame->format))
                 copy_uv_planes(ctx, out_frame, in_frame);
@@ -405,7 +347,7 @@ static int activate_async(AVFilterContext *filter_ctx)
 {
     AVFilterLink *inlink = filter_ctx->inputs[0];
     AVFilterLink *outlink = filter_ctx->outputs[0];
-    DnnProcessingContext *ctx = (DnnProcessingContext *)filter_ctx->priv;
+    DnnProcessingContext *ctx = filter_ctx->priv;
     AVFrame *in = NULL, *out = NULL;
     int64_t pts;
     int ret, status;
@@ -426,8 +368,7 @@ static int activate_async(AVFilterContext *filter_ctx)
                 return AVERROR(ENOMEM);
             }
             av_frame_copy_props(out, in);
-            if ((ctx->dnn_module->execute_model_async)(ctx->model, ctx->model_inputname, in,
-                                                       (const char **)&ctx->model_outputname, 1, out) != DNN_SUCCESS) {
+            if (ff_dnn_execute_model_async(&ctx->dnnctx, in, out) != DNN_SUCCESS) {
                 return AVERROR(EIO);
             }
         }
@@ -437,7 +378,7 @@ static int activate_async(AVFilterContext *filter_ctx)
     do {
         AVFrame *in_frame = NULL;
         AVFrame *out_frame = NULL;
-        async_state = (ctx->dnn_module->get_async_result)(ctx->model, &in_frame, &out_frame);
+        async_state = ff_dnn_get_async_result(&ctx->dnnctx, &in_frame, &out_frame);
         if (out_frame) {
             if (isPlanarYUV(in_frame->format))
                 copy_uv_planes(ctx, out_frame, in_frame);
@@ -471,7 +412,7 @@ static int activate(AVFilterContext *filter_ctx)
 {
     DnnProcessingContext *ctx = filter_ctx->priv;
 
-    if (ctx->async)
+    if (ctx->dnnctx.async)
         return activate_async(filter_ctx);
     else
         return activate_sync(filter_ctx);
@@ -482,11 +423,7 @@ static av_cold void uninit(AVFilterContext *ctx)
     DnnProcessingContext *context = ctx->priv;
 
     sws_freeContext(context->sws_uv_scale);
-
-    if (context->dnn_module)
-        (context->dnn_module->free_model)(&context->model);
-
-    av_freep(&context->dnn_module);
+    ff_dnn_uninit(&context->dnnctx);
 }
 
 static const AVFilterPad dnn_processing_inputs[] = {
diff --git a/libavfilter/vf_sr.c b/libavfilter/vf_sr.c
index fe6c5d3c0d..20334a84c4 100644
--- a/libavfilter/vf_sr.c
+++ b/libavfilter/vf_sr.c
@@ -32,15 +32,11 @@
 #include "libavutil/pixdesc.h"
 #include "libavformat/avio.h"
 #include "libswscale/swscale.h"
-#include "dnn_interface.h"
+#include "dnn_filter_common.h"
 
 typedef struct SRContext {
     const AVClass *class;
-
-    char *model_filename;
-    DNNBackendType backend_type;
-    DNNModule *dnn_module;
-    DNNModel *model;
+    DnnContext dnnctx;
     int scale_factor;
     struct SwsContext *sws_uv_scale;
     int sws_uv_height;
@@ -50,13 +46,15 @@ typedef struct SRContext {
 #define OFFSET(x) offsetof(SRContext, x)
 #define FLAGS AV_OPT_FLAG_FILTERING_PARAM | AV_OPT_FLAG_VIDEO_PARAM
 static const AVOption sr_options[] = {
-    { "dnn_backend", "DNN backend used for model execution", OFFSET(backend_type), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, 1, FLAGS, "backend" },
+    { "dnn_backend", "DNN backend used for model execution", OFFSET(dnnctx.backend_type), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, 1, FLAGS, "backend" },
     { "native", "native backend flag", 0, AV_OPT_TYPE_CONST, { .i64 = 0 }, 0, 0, FLAGS, "backend" },
 #if (CONFIG_LIBTENSORFLOW == 1)
     { "tensorflow", "tensorflow backend flag", 0, AV_OPT_TYPE_CONST, { .i64 = 1 }, 0, 0, FLAGS, "backend" },
 #endif
     { "scale_factor", "scale factor for SRCNN model", OFFSET(scale_factor), AV_OPT_TYPE_INT, { .i64 = 2 }, 2, 4, FLAGS },
-    { "model", "path to model file specifying network architecture and its parameters", OFFSET(model_filename), AV_OPT_TYPE_STRING, {.str=NULL}, 0, 0, FLAGS },
+    { "model", "path to model file specifying network architecture and its parameters", OFFSET(dnnctx.model_filename), AV_OPT_TYPE_STRING, {.str=NULL}, 0, 0, FLAGS },
+    { "input",       "input name of the model",     OFFSET(dnnctx.model_inputname),  AV_OPT_TYPE_STRING,    { .str = "x" },  0, 0, FLAGS },
+    { "output",      "output name of the model",    OFFSET(dnnctx.model_outputname), AV_OPT_TYPE_STRING,    { .str = "y" },  0, 0, FLAGS },
     { NULL }
 };
 
@@ -65,28 +63,7 @@ AVFILTER_DEFINE_CLASS(sr);
 static av_cold int init(AVFilterContext *context)
 {
     SRContext *sr_context = context->priv;
-
-    sr_context->dnn_module = ff_get_dnn_module(sr_context->backend_type);
-    if (!sr_context->dnn_module){
-        av_log(context, AV_LOG_ERROR, "could not create DNN module for requested backend\n");
-        return AVERROR(ENOMEM);
-    }
-
-    if (!sr_context->model_filename){
-        av_log(context, AV_LOG_ERROR, "model file for network was not specified\n");
-        return AVERROR(EIO);
-    }
-    if (!sr_context->dnn_module->load_model) {
-        av_log(context, AV_LOG_ERROR, "load_model for network was not specified\n");
-        return AVERROR(EIO);
-    }
-    sr_context->model = (sr_context->dnn_module->load_model)(sr_context->model_filename, NULL, NULL);
-    if (!sr_context->model){
-        av_log(context, AV_LOG_ERROR, "could not load DNN model\n");
-        return AVERROR(EIO);
-    }
-
-    return 0;
+    return ff_dnn_init(&sr_context->dnnctx, context);
 }
 
 static int query_formats(AVFilterContext *context)
@@ -114,8 +91,7 @@ static int config_output(AVFilterLink *outlink)
     int out_width, out_height;
 
     // have a try run in case that the dnn model resize the frame
-    result = ctx->model->get_output(ctx->model->model, "x", inlink->w, inlink->h,
-                                    "y", &out_width, &out_height);
+    result = ff_dnn_get_output(&ctx->dnnctx, inlink->w, inlink->h, &out_width, &out_height);
     if (result != DNN_SUCCESS) {
         av_log(ctx, AV_LOG_ERROR, "could not get output from the model\n");
         return AVERROR(EIO);
@@ -155,7 +131,6 @@ static int filter_frame(AVFilterLink *inlink, AVFrame *in)
     AVFilterLink *outlink = context->outputs[0];
     AVFrame *out = ff_get_video_buffer(outlink, outlink->w, outlink->h);
     DNNReturnType dnn_result;
-    const char *model_output_name = "y";
 
     if (!out){
         av_log(context, AV_LOG_ERROR, "could not allocate memory for output frame\n");
@@ -168,11 +143,9 @@ static int filter_frame(AVFilterLink *inlink, AVFrame *in)
         sws_scale(ctx->sws_pre_scale,
                     (const uint8_t **)in->data, in->linesize, 0, in->height,
                     out->data, out->linesize);
-        dnn_result = (ctx->dnn_module->execute_model)(ctx->model, "x", out,
-                                                      (const char **)&model_output_name, 1, out);
+        dnn_result = ff_dnn_execute_model(&ctx->dnnctx, out, out);
     } else {
-        dnn_result = (ctx->dnn_module->execute_model)(ctx->model, "x", in,
-                                                      (const char **)&model_output_name, 1, out);
+        dnn_result = ff_dnn_execute_model(&ctx->dnnctx, in, out);
     }
 
     if (dnn_result != DNN_SUCCESS){
@@ -197,11 +170,7 @@ static av_cold void uninit(AVFilterContext *context)
 {
     SRContext *sr_context = context->priv;
 
-    if (sr_context->dnn_module){
-        (sr_context->dnn_module->free_model)(&sr_context->model);
-        av_freep(&sr_context->dnn_module);
-    }
-
+    ff_dnn_uninit(&sr_context->dnnctx);
     sws_freeContext(sr_context->sws_uv_scale);
     sws_freeContext(sr_context->sws_pre_scale);
 }

From patchwork Wed Feb 10 09:34:26 2021
Content-Type: text/plain; charset="utf-8"
MIME-Version: 1.0
Content-Transfer-Encoding: 7bit
X-Patchwork-Submitter: "Guo, Yejun" <yejun.guo@intel.com>
X-Patchwork-Id: 25542
Return-Path: <ffmpeg-devel-bounces@ffmpeg.org>
X-Original-To: patchwork@ffaux-bg.ffmpeg.org
Delivered-To: patchwork@ffaux-bg.ffmpeg.org
Received: from ffbox0-bg.mplayerhq.hu (ffbox0-bg.ffmpeg.org [79.124.17.100])
	by ffaux.localdomain (Postfix) with ESMTP id 73DF5449CE2
	for <patchwork@ffaux-bg.ffmpeg.org>; Wed, 10 Feb 2021 11:44:50 +0200 (EET)
Received: from [127.0.1.1] (localhost [127.0.0.1])
	by ffbox0-bg.mplayerhq.hu (Postfix) with ESMTP id 5A04D68A477;
	Wed, 10 Feb 2021 11:44:50 +0200 (EET)
X-Original-To: ffmpeg-devel@ffmpeg.org
Delivered-To: ffmpeg-devel@ffmpeg.org
Received: from mga01.intel.com (mga01.intel.com [192.55.52.88])
 by ffbox0-bg.mplayerhq.hu (Postfix) with ESMTPS id 9F13468A4C3
 for <ffmpeg-devel@ffmpeg.org>; Wed, 10 Feb 2021 11:44:43 +0200 (EET)
IronPort-SDR: 
 lycbmQvG4x7F+H1ak75lK7g6k+XXC0oF5uMYmSHk4ywrtg19URAXiPASPACgO9W6UOakGmz9ba
 9BmZR3MDQEaw==
X-IronPort-AV: E=McAfee;i="6000,8403,9890"; a="201144870"
X-IronPort-AV: E=Sophos;i="5.81,167,1610438400"; d="scan'208";a="201144870"
Received: from fmsmga001.fm.intel.com ([10.253.24.23])
 by fmsmga101.fm.intel.com with ESMTP/TLS/ECDHE-RSA-AES256-GCM-SHA384;
 10 Feb 2021 01:44:37 -0800
IronPort-SDR: 
 RD37FkR9ACbTBZzqbIYwsmnZl0xubffDyTb8tP28BJotlt6bZCf8OZgwWh8ojxix4neYmrTCNu
 GhktvILTVu9w==
X-ExtLoop1: 1
X-IronPort-AV: E=Sophos;i="5.81,167,1610438400"; d="scan'208";a="488706562"
Received: from yguo18-skl-u1604.sh.intel.com ([10.239.159.53])
 by fmsmga001.fm.intel.com with ESMTP; 10 Feb 2021 01:44:37 -0800
From: "Guo, Yejun" <yejun.guo@intel.com>
To: ffmpeg-devel@ffmpeg.org
Date: Wed, 10 Feb 2021 17:34:26 +0800
Message-Id: <20210210093432.9135-4-yejun.guo@intel.com>
X-Mailer: git-send-email 2.17.1
In-Reply-To: <20210210093432.9135-1-yejun.guo@intel.com>
References: <20210210093432.9135-1-yejun.guo@intel.com>
Subject: [FFmpeg-devel] [PATCH V2 04/10] dnn: add function type for model
X-BeenThere: ffmpeg-devel@ffmpeg.org
X-Mailman-Version: 2.1.20
Precedence: list
List-Id: FFmpeg development discussions and patches <ffmpeg-devel.ffmpeg.org>
List-Unsubscribe: <https://ffmpeg.org/mailman/options/ffmpeg-devel>,
 <mailto:ffmpeg-devel-request@ffmpeg.org?subject=unsubscribe>
List-Archive: <https://ffmpeg.org/pipermail/ffmpeg-devel>
List-Post: <mailto:ffmpeg-devel@ffmpeg.org>
List-Help: <mailto:ffmpeg-devel-request@ffmpeg.org?subject=help>
List-Subscribe: <https://ffmpeg.org/mailman/listinfo/ffmpeg-devel>,
 <mailto:ffmpeg-devel-request@ffmpeg.org?subject=subscribe>
Reply-To: FFmpeg development discussions and patches <ffmpeg-devel@ffmpeg.org>
Cc: yejun.guo@intel.com
MIME-Version: 1.0
Errors-To: ffmpeg-devel-bounces@ffmpeg.org
Sender: "ffmpeg-devel" <ffmpeg-devel-bounces@ffmpeg.org>

So the backend knows the usage of model is for frame processing,
detect, classify, etc. Each function type has different behavior
in backend when handling the input/output data of the model.

Signed-off-by: Guo, Yejun <yejun.guo@intel.com>
---
 libavfilter/dnn/dnn_backend_native.c   |  3 ++-
 libavfilter/dnn/dnn_backend_native.h   |  2 +-
 libavfilter/dnn/dnn_backend_openvino.c |  3 ++-
 libavfilter/dnn/dnn_backend_openvino.h |  2 +-
 libavfilter/dnn/dnn_backend_tf.c       |  5 +++--
 libavfilter/dnn/dnn_backend_tf.h       |  2 +-
 libavfilter/dnn_filter_common.c        |  4 ++--
 libavfilter/dnn_filter_common.h        |  2 +-
 libavfilter/dnn_interface.h            | 11 ++++++++++-
 libavfilter/vf_derain.c                |  2 +-
 libavfilter/vf_dnn_processing.c        |  2 +-
 libavfilter/vf_sr.c                    |  2 +-
 12 files changed, 26 insertions(+), 14 deletions(-)

diff --git a/libavfilter/dnn/dnn_backend_native.c b/libavfilter/dnn/dnn_backend_native.c
index 87f3568cc2..be6451367a 100644
--- a/libavfilter/dnn/dnn_backend_native.c
+++ b/libavfilter/dnn/dnn_backend_native.c
@@ -112,7 +112,7 @@ static DNNReturnType get_output_native(void *model, const char *input_name, int
 // layers_num,layer_type,layer_parameterss,layer_type,layer_parameters...
 // For CONV layer: activation_function, input_num, output_num, kernel_size, kernel, biases
 // For DEPTH_TO_SPACE layer: block_size
-DNNModel *ff_dnn_load_model_native(const char *model_filename, const char *options, AVFilterContext *filter_ctx)
+DNNModel *ff_dnn_load_model_native(const char *model_filename, DNNFunctionType func_type, const char *options, AVFilterContext *filter_ctx)
 {
     DNNModel *model = NULL;
     char header_expected[] = "FFMPEGDNNNATIVE";
@@ -256,6 +256,7 @@ DNNModel *ff_dnn_load_model_native(const char *model_filename, const char *optio
     model->get_input = &get_input_native;
     model->get_output = &get_output_native;
     model->filter_ctx = filter_ctx;
+    model->func_type = func_type;
 
     return model;
 
diff --git a/libavfilter/dnn/dnn_backend_native.h b/libavfilter/dnn/dnn_backend_native.h
index 5c8ce82b35..d313c48f3a 100644
--- a/libavfilter/dnn/dnn_backend_native.h
+++ b/libavfilter/dnn/dnn_backend_native.h
@@ -128,7 +128,7 @@ typedef struct NativeModel{
     int32_t operands_num;
 } NativeModel;
 
-DNNModel *ff_dnn_load_model_native(const char *model_filename, const char *options, AVFilterContext *filter_ctx);
+DNNModel *ff_dnn_load_model_native(const char *model_filename, DNNFunctionType func_type, const char *options, AVFilterContext *filter_ctx);
 
 DNNReturnType ff_dnn_execute_model_native(const DNNModel *model, const char *input_name, AVFrame *in_frame,
                                           const char **output_names, uint32_t nb_output, AVFrame *out_frame);
diff --git a/libavfilter/dnn/dnn_backend_openvino.c b/libavfilter/dnn/dnn_backend_openvino.c
index ed41b721fc..7c1abb3eeb 100644
--- a/libavfilter/dnn/dnn_backend_openvino.c
+++ b/libavfilter/dnn/dnn_backend_openvino.c
@@ -524,7 +524,7 @@ static DNNReturnType get_output_ov(void *model, const char *input_name, int inpu
     return ret;
 }
 
-DNNModel *ff_dnn_load_model_ov(const char *model_filename, const char *options, AVFilterContext *filter_ctx)
+DNNModel *ff_dnn_load_model_ov(const char *model_filename, DNNFunctionType func_type, const char *options, AVFilterContext *filter_ctx)
 {
     DNNModel *model = NULL;
     OVModel *ov_model = NULL;
@@ -572,6 +572,7 @@ DNNModel *ff_dnn_load_model_ov(const char *model_filename, const char *options,
     model->get_output = &get_output_ov;
     model->options = options;
     model->filter_ctx = filter_ctx;
+    model->func_type = func_type;
 
     return model;
 
diff --git a/libavfilter/dnn/dnn_backend_openvino.h b/libavfilter/dnn/dnn_backend_openvino.h
index 23b819440e..a484a7be32 100644
--- a/libavfilter/dnn/dnn_backend_openvino.h
+++ b/libavfilter/dnn/dnn_backend_openvino.h
@@ -29,7 +29,7 @@
 
 #include "../dnn_interface.h"
 
-DNNModel *ff_dnn_load_model_ov(const char *model_filename, const char *options, AVFilterContext *filter_ctx);
+DNNModel *ff_dnn_load_model_ov(const char *model_filename, DNNFunctionType func_type, const char *options, AVFilterContext *filter_ctx);
 
 DNNReturnType ff_dnn_execute_model_ov(const DNNModel *model, const char *input_name, AVFrame *in_frame,
                                       const char **output_names, uint32_t nb_output, AVFrame *out_frame);
diff --git a/libavfilter/dnn/dnn_backend_tf.c b/libavfilter/dnn/dnn_backend_tf.c
index 71a2a308b5..e7e5f221f3 100644
--- a/libavfilter/dnn/dnn_backend_tf.c
+++ b/libavfilter/dnn/dnn_backend_tf.c
@@ -580,7 +580,7 @@ static DNNReturnType load_native_model(TFModel *tf_model, const char *model_file
     DNNModel *model = NULL;
     NativeModel *native_model;
 
-    model = ff_dnn_load_model_native(model_filename, NULL, NULL);
+    model = ff_dnn_load_model_native(model_filename, DFT_PROCESS_FRAME, NULL, NULL);
     if (!model){
         av_log(ctx, AV_LOG_ERROR, "Failed to load native model\n");
         return DNN_ERROR;
@@ -664,7 +664,7 @@ static DNNReturnType load_native_model(TFModel *tf_model, const char *model_file
     return DNN_SUCCESS;
 }
 
-DNNModel *ff_dnn_load_model_tf(const char *model_filename, const char *options, AVFilterContext *filter_ctx)
+DNNModel *ff_dnn_load_model_tf(const char *model_filename, DNNFunctionType func_type, const char *options, AVFilterContext *filter_ctx)
 {
     DNNModel *model = NULL;
     TFModel *tf_model = NULL;
@@ -705,6 +705,7 @@ DNNModel *ff_dnn_load_model_tf(const char *model_filename, const char *options,
     model->get_output = &get_output_tf;
     model->options = options;
     model->filter_ctx = filter_ctx;
+    model->func_type = func_type;
 
     return model;
 }
diff --git a/libavfilter/dnn/dnn_backend_tf.h b/libavfilter/dnn/dnn_backend_tf.h
index cac8936729..8cec04748e 100644
--- a/libavfilter/dnn/dnn_backend_tf.h
+++ b/libavfilter/dnn/dnn_backend_tf.h
@@ -29,7 +29,7 @@
 
 #include "../dnn_interface.h"
 
-DNNModel *ff_dnn_load_model_tf(const char *model_filename, const char *options, AVFilterContext *filter_ctx);
+DNNModel *ff_dnn_load_model_tf(const char *model_filename, DNNFunctionType func_type, const char *options, AVFilterContext *filter_ctx);
 
 DNNReturnType ff_dnn_execute_model_tf(const DNNModel *model, const char *input_name, AVFrame *in_frame,
                                       const char **output_names, uint32_t nb_output, AVFrame *out_frame);
diff --git a/libavfilter/dnn_filter_common.c b/libavfilter/dnn_filter_common.c
index 5d0d7d3b90..413adba406 100644
--- a/libavfilter/dnn_filter_common.c
+++ b/libavfilter/dnn_filter_common.c
@@ -18,7 +18,7 @@
 
 #include "dnn_filter_common.h"
 
-int ff_dnn_init(DnnContext *ctx, AVFilterContext *filter_ctx)
+int ff_dnn_init(DnnContext *ctx, DNNFunctionType func_type, AVFilterContext *filter_ctx)
 {
     if (!ctx->model_filename) {
         av_log(filter_ctx, AV_LOG_ERROR, "model file for network is not specified\n");
@@ -43,7 +43,7 @@ int ff_dnn_init(DnnContext *ctx, AVFilterContext *filter_ctx)
         return AVERROR(EINVAL);
     }
 
-    ctx->model = (ctx->dnn_module->load_model)(ctx->model_filename, ctx->backend_options, filter_ctx);
+    ctx->model = (ctx->dnn_module->load_model)(ctx->model_filename, func_type, ctx->backend_options, filter_ctx);
     if (!ctx->model) {
         av_log(filter_ctx, AV_LOG_ERROR, "could not load DNN model\n");
         return AVERROR(EINVAL);
diff --git a/libavfilter/dnn_filter_common.h b/libavfilter/dnn_filter_common.h
index ab49a992ed..79c4d3efe3 100644
--- a/libavfilter/dnn_filter_common.h
+++ b/libavfilter/dnn_filter_common.h
@@ -47,7 +47,7 @@ typedef struct DnnContext {
     { "async",              "use DNN async inference",    OFFSET(async),            AV_OPT_TYPE_BOOL,      { .i64 = 1},     0, 1, FLAGS},
 
 
-int ff_dnn_init(DnnContext *ctx, AVFilterContext *filter_ctx);
+int ff_dnn_init(DnnContext *ctx, DNNFunctionType func_type, AVFilterContext *filter_ctx);
 DNNReturnType ff_dnn_get_input(DnnContext *ctx, DNNData *input);
 DNNReturnType ff_dnn_get_output(DnnContext *ctx, int input_width, int input_height, int *output_width, int *output_height);
 DNNReturnType ff_dnn_execute_model(DnnContext *ctx, AVFrame *in_frame, AVFrame *out_frame);
diff --git a/libavfilter/dnn_interface.h b/libavfilter/dnn_interface.h
index ff338ea084..2fb9b15676 100644
--- a/libavfilter/dnn_interface.h
+++ b/libavfilter/dnn_interface.h
@@ -43,6 +43,13 @@ typedef enum {
     DAST_SUCCESS            // got a result frame successfully
 } DNNAsyncStatusType;
 
+typedef enum {
+    DFT_NONE,
+    DFT_PROCESS_FRAME,      // process the whole frame
+    DFT_ANALYTICS_DETECT,   // detect from the whole frame
+    // we can add more such as detect_from_crop, classify_from_bbox, etc.
+}DNNFunctionType;
+
 typedef struct DNNData{
     void *data;
     DNNDataType dt;
@@ -56,6 +63,8 @@ typedef struct DNNModel{
     const char *options;
     // Stores FilterContext used for the interaction between AVFrame and DNNData
     AVFilterContext *filter_ctx;
+    // Stores function type of the model
+    DNNFunctionType func_type;
     // Gets model input information
     // Just reuse struct DNNData here, actually the DNNData.data field is not needed.
     DNNReturnType (*get_input)(void *model, DNNData *input, const char *input_name);
@@ -73,7 +82,7 @@ typedef struct DNNModel{
 // Stores pointers to functions for loading, executing, freeing DNN models for one of the backends.
 typedef struct DNNModule{
     // Loads model and parameters from given file. Returns NULL if it is not possible.
-    DNNModel *(*load_model)(const char *model_filename, const char *options, AVFilterContext *filter_ctx);
+    DNNModel *(*load_model)(const char *model_filename, DNNFunctionType func_type, const char *options, AVFilterContext *filter_ctx);
     // Executes model with specified input and output. Returns DNN_ERROR otherwise.
     DNNReturnType (*execute_model)(const DNNModel *model, const char *input_name, AVFrame *in_frame,
                                    const char **output_names, uint32_t nb_output, AVFrame *out_frame);
diff --git a/libavfilter/vf_derain.c b/libavfilter/vf_derain.c
index ec9853d957..7814fc1e03 100644
--- a/libavfilter/vf_derain.c
+++ b/libavfilter/vf_derain.c
@@ -100,7 +100,7 @@ static int filter_frame(AVFilterLink *inlink, AVFrame *in)
 static av_cold int init(AVFilterContext *ctx)
 {
     DRContext *dr_context = ctx->priv;
-    return ff_dnn_init(&dr_context->dnnctx, ctx);
+    return ff_dnn_init(&dr_context->dnnctx, DFT_PROCESS_FRAME, ctx);
 }
 
 static av_cold void uninit(AVFilterContext *ctx)
diff --git a/libavfilter/vf_dnn_processing.c b/libavfilter/vf_dnn_processing.c
index 08ebf122c9..88e95e8ae3 100644
--- a/libavfilter/vf_dnn_processing.c
+++ b/libavfilter/vf_dnn_processing.c
@@ -62,7 +62,7 @@ AVFILTER_DEFINE_CLASS(dnn_processing);
 static av_cold int init(AVFilterContext *context)
 {
     DnnProcessingContext *ctx = context->priv;
-    return ff_dnn_init(&ctx->dnnctx, context);
+    return ff_dnn_init(&ctx->dnnctx, DFT_PROCESS_FRAME, context);
 }
 
 static int query_formats(AVFilterContext *context)
diff --git a/libavfilter/vf_sr.c b/libavfilter/vf_sr.c
index 20334a84c4..45f941acdb 100644
--- a/libavfilter/vf_sr.c
+++ b/libavfilter/vf_sr.c
@@ -63,7 +63,7 @@ AVFILTER_DEFINE_CLASS(sr);
 static av_cold int init(AVFilterContext *context)
 {
     SRContext *sr_context = context->priv;
-    return ff_dnn_init(&sr_context->dnnctx, context);
+    return ff_dnn_init(&sr_context->dnnctx, DFT_PROCESS_FRAME, context);
 }
 
 static int query_formats(AVFilterContext *context)

From patchwork Wed Feb 10 09:34:27 2021
Content-Type: text/plain; charset="utf-8"
MIME-Version: 1.0
Content-Transfer-Encoding: 7bit
X-Patchwork-Submitter: "Guo, Yejun" <yejun.guo@intel.com>
X-Patchwork-Id: 25543
Return-Path: <ffmpeg-devel-bounces@ffmpeg.org>
X-Original-To: patchwork@ffaux-bg.ffmpeg.org
Delivered-To: patchwork@ffaux-bg.ffmpeg.org
Received: from ffbox0-bg.mplayerhq.hu (ffbox0-bg.ffmpeg.org [79.124.17.100])
	by ffaux.localdomain (Postfix) with ESMTP id 7A178449CE2
	for <patchwork@ffaux-bg.ffmpeg.org>; Wed, 10 Feb 2021 11:44:52 +0200 (EET)
Received: from [127.0.1.1] (localhost [127.0.0.1])
	by ffbox0-bg.mplayerhq.hu (Postfix) with ESMTP id 6573568A562;
	Wed, 10 Feb 2021 11:44:52 +0200 (EET)
X-Original-To: ffmpeg-devel@ffmpeg.org
Delivered-To: ffmpeg-devel@ffmpeg.org
Received: from mga01.intel.com (mga01.intel.com [192.55.52.88])
 by ffbox0-bg.mplayerhq.hu (Postfix) with ESMTPS id 5A69368A4C5
 for <ffmpeg-devel@ffmpeg.org>; Wed, 10 Feb 2021 11:44:45 +0200 (EET)
IronPort-SDR: 
 WtRySQBdrK9Wl93+b5bNeJ+ejlXZt1HGr3DL3P90J+F1LnYwaF6ommNs/PxboYjwGKas586Lci
 kMDxola6HyQw==
X-IronPort-AV: E=McAfee;i="6000,8403,9890"; a="201144876"
X-IronPort-AV: E=Sophos;i="5.81,167,1610438400"; d="scan'208";a="201144876"
Received: from fmsmga001.fm.intel.com ([10.253.24.23])
 by fmsmga101.fm.intel.com with ESMTP/TLS/ECDHE-RSA-AES256-GCM-SHA384;
 10 Feb 2021 01:44:38 -0800
IronPort-SDR: 
 75BQN/snEULM14yg6LxyYxMkUUG/ZvQ105Ig/chVrC6BWeT51o8n4EFogR1NxKL6irqmw0/9J2
 8v/0eM/f3w9A==
X-ExtLoop1: 1
X-IronPort-AV: E=Sophos;i="5.81,167,1610438400"; d="scan'208";a="488706567"
Received: from yguo18-skl-u1604.sh.intel.com ([10.239.159.53])
 by fmsmga001.fm.intel.com with ESMTP; 10 Feb 2021 01:44:37 -0800
From: "Guo, Yejun" <yejun.guo@intel.com>
To: ffmpeg-devel@ffmpeg.org
Date: Wed, 10 Feb 2021 17:34:27 +0800
Message-Id: <20210210093432.9135-5-yejun.guo@intel.com>
X-Mailer: git-send-email 2.17.1
In-Reply-To: <20210210093432.9135-1-yejun.guo@intel.com>
References: <20210210093432.9135-1-yejun.guo@intel.com>
Subject: [FFmpeg-devel] [PATCH V2 05/10] dnn_interface.h: add enum
	DNNColorOrder
X-BeenThere: ffmpeg-devel@ffmpeg.org
X-Mailman-Version: 2.1.20
Precedence: list
List-Id: FFmpeg development discussions and patches <ffmpeg-devel.ffmpeg.org>
List-Unsubscribe: <https://ffmpeg.org/mailman/options/ffmpeg-devel>,
 <mailto:ffmpeg-devel-request@ffmpeg.org?subject=unsubscribe>
List-Archive: <https://ffmpeg.org/pipermail/ffmpeg-devel>
List-Post: <mailto:ffmpeg-devel@ffmpeg.org>
List-Help: <mailto:ffmpeg-devel-request@ffmpeg.org?subject=help>
List-Subscribe: <https://ffmpeg.org/mailman/listinfo/ffmpeg-devel>,
 <mailto:ffmpeg-devel-request@ffmpeg.org?subject=subscribe>
Reply-To: FFmpeg development discussions and patches <ffmpeg-devel@ffmpeg.org>
Cc: yejun.guo@intel.com
MIME-Version: 1.0
Errors-To: ffmpeg-devel-bounces@ffmpeg.org
Sender: "ffmpeg-devel" <ffmpeg-devel-bounces@ffmpeg.org>

the data type and order together decide the color format, we could
not use AVPixelFormat directly because not all the possible formats
are covered by it.

Signed-off-by: Guo, Yejun <yejun.guo@intel.com>
---
 libavfilter/dnn_interface.h | 9 ++++++++-
 1 file changed, 8 insertions(+), 1 deletion(-)

diff --git a/libavfilter/dnn_interface.h b/libavfilter/dnn_interface.h
index 2fb9b15676..d3a0c58a61 100644
--- a/libavfilter/dnn_interface.h
+++ b/libavfilter/dnn_interface.h
@@ -36,6 +36,11 @@ typedef enum {DNN_NATIVE, DNN_TF, DNN_OV} DNNBackendType;
 
 typedef enum {DNN_FLOAT = 1, DNN_UINT8 = 4} DNNDataType;
 
+typedef enum {
+    DCO_NONE,
+    DCO_BGR,
+} DNNColorOrder;
+
 typedef enum {
     DAST_FAIL,              // something wrong
     DAST_EMPTY_QUEUE,       // no more inference result to get
@@ -52,8 +57,10 @@ typedef enum {
 
 typedef struct DNNData{
     void *data;
-    DNNDataType dt;
     int width, height, channels;
+    // dt and order together decide the color format
+    DNNDataType dt;
+    DNNColorOrder order;
 } DNNData;
 
 typedef struct DNNModel{

From patchwork Wed Feb 10 09:34:28 2021
Content-Type: text/plain; charset="utf-8"
MIME-Version: 1.0
Content-Transfer-Encoding: 7bit
X-Patchwork-Submitter: "Guo, Yejun" <yejun.guo@intel.com>
X-Patchwork-Id: 25544
Return-Path: <ffmpeg-devel-bounces@ffmpeg.org>
X-Original-To: patchwork@ffaux-bg.ffmpeg.org
Delivered-To: patchwork@ffaux-bg.ffmpeg.org
Received: from ffbox0-bg.mplayerhq.hu (ffbox0-bg.ffmpeg.org [79.124.17.100])
	by ffaux.localdomain (Postfix) with ESMTP id 9CFAE449CE2
	for <patchwork@ffaux-bg.ffmpeg.org>; Wed, 10 Feb 2021 11:44:53 +0200 (EET)
Received: from [127.0.1.1] (localhost [127.0.0.1])
	by ffbox0-bg.mplayerhq.hu (Postfix) with ESMTP id 7F1D968A4C3;
	Wed, 10 Feb 2021 11:44:53 +0200 (EET)
X-Original-To: ffmpeg-devel@ffmpeg.org
Delivered-To: ffmpeg-devel@ffmpeg.org
Received: from mga01.intel.com (mga01.intel.com [192.55.52.88])
 by ffbox0-bg.mplayerhq.hu (Postfix) with ESMTPS id A5C2C68A4FE
 for <ffmpeg-devel@ffmpeg.org>; Wed, 10 Feb 2021 11:44:46 +0200 (EET)
IronPort-SDR: 
 YuECt8l633wvedcb20HnpodCViCZpCAI+AvtpWwKD+nMfxR4zdYMBCx1+vSfdx/q11MWHEGUMW
 WsPfyH7ralow==
X-IronPort-AV: E=McAfee;i="6000,8403,9890"; a="201144878"
X-IronPort-AV: E=Sophos;i="5.81,167,1610438400"; d="scan'208";a="201144878"
Received: from fmsmga001.fm.intel.com ([10.253.24.23])
 by fmsmga101.fm.intel.com with ESMTP/TLS/ECDHE-RSA-AES256-GCM-SHA384;
 10 Feb 2021 01:44:39 -0800
IronPort-SDR: 
 BmMWxEs0YPbosn3YMWI9xPKBA8UltSiCe37cy7evl9g/GKJPcLTYPafYDEDS2/QUsPCDx5/5lO
 H+HuGGv3dyhA==
X-ExtLoop1: 1
X-IronPort-AV: E=Sophos;i="5.81,167,1610438400"; d="scan'208";a="488706571"
Received: from yguo18-skl-u1604.sh.intel.com ([10.239.159.53])
 by fmsmga001.fm.intel.com with ESMTP; 10 Feb 2021 01:44:39 -0800
From: "Guo, Yejun" <yejun.guo@intel.com>
To: ffmpeg-devel@ffmpeg.org
Date: Wed, 10 Feb 2021 17:34:28 +0800
Message-Id: <20210210093432.9135-6-yejun.guo@intel.com>
X-Mailer: git-send-email 2.17.1
In-Reply-To: <20210210093432.9135-1-yejun.guo@intel.com>
References: <20210210093432.9135-1-yejun.guo@intel.com>
Subject: [FFmpeg-devel] [PATCH V2 06/10] dnn: add color conversion for
	analytic case
X-BeenThere: ffmpeg-devel@ffmpeg.org
X-Mailman-Version: 2.1.20
Precedence: list
List-Id: FFmpeg development discussions and patches <ffmpeg-devel.ffmpeg.org>
List-Unsubscribe: <https://ffmpeg.org/mailman/options/ffmpeg-devel>,
 <mailto:ffmpeg-devel-request@ffmpeg.org?subject=unsubscribe>
List-Archive: <https://ffmpeg.org/pipermail/ffmpeg-devel>
List-Post: <mailto:ffmpeg-devel@ffmpeg.org>
List-Help: <mailto:ffmpeg-devel-request@ffmpeg.org?subject=help>
List-Subscribe: <https://ffmpeg.org/mailman/listinfo/ffmpeg-devel>,
 <mailto:ffmpeg-devel-request@ffmpeg.org?subject=subscribe>
Reply-To: FFmpeg development discussions and patches <ffmpeg-devel@ffmpeg.org>
Cc: yejun.guo@intel.com
MIME-Version: 1.0
Errors-To: ffmpeg-devel-bounces@ffmpeg.org
Sender: "ffmpeg-devel" <ffmpeg-devel-bounces@ffmpeg.org>

Signed-off-by: Guo, Yejun <yejun.guo@intel.com>
---
 libavfilter/dnn/dnn_backend_native.c   |  2 +-
 libavfilter/dnn/dnn_backend_openvino.c | 23 ++++++++++-
 libavfilter/dnn/dnn_backend_tf.c       |  2 +-
 libavfilter/dnn/dnn_io_proc.c          | 56 +++++++++++++++++++++++++-
 libavfilter/dnn/dnn_io_proc.h          |  2 +-
 5 files changed, 80 insertions(+), 5 deletions(-)

diff --git a/libavfilter/dnn/dnn_backend_native.c b/libavfilter/dnn/dnn_backend_native.c
index be6451367a..3bc253c1ad 100644
--- a/libavfilter/dnn/dnn_backend_native.c
+++ b/libavfilter/dnn/dnn_backend_native.c
@@ -321,7 +321,7 @@ static DNNReturnType execute_model_native(const DNNModel *model, const char *inp
         if (native_model->model->pre_proc != NULL) {
             native_model->model->pre_proc(in_frame, &input, native_model->model->filter_ctx);
         } else {
-            ff_proc_from_frame_to_dnn(in_frame, &input, ctx);
+            ff_proc_from_frame_to_dnn(in_frame, &input, native_model->model->func_type, ctx);
         }
     }
 
diff --git a/libavfilter/dnn/dnn_backend_openvino.c b/libavfilter/dnn/dnn_backend_openvino.c
index 7c1abb3eeb..cca155a52c 100644
--- a/libavfilter/dnn/dnn_backend_openvino.c
+++ b/libavfilter/dnn/dnn_backend_openvino.c
@@ -99,6 +99,8 @@ static DNNDataType precision_to_datatype(precision_e precision)
     {
     case FP32:
         return DNN_FLOAT;
+    case U8:
+        return DNN_UINT8;
     default:
         av_assert0(!"not supported yet.");
         return DNN_FLOAT;
@@ -111,6 +113,8 @@ static int get_datatype_size(DNNDataType dt)
     {
     case DNN_FLOAT:
         return sizeof(float);
+    case DNN_UINT8:
+        return sizeof(uint8_t);
     default:
         av_assert0(!"not supported yet.");
         return 1;
@@ -152,6 +156,9 @@ static DNNReturnType fill_model_input_ov(OVModel *ov_model, RequestItem *request
     input.channels = dims.dims[1];
     input.data = blob_buffer.buffer;
     input.dt = precision_to_datatype(precision);
+    // all models in openvino open model zoo use BGR as input,
+    // change to be an option when necessary.
+    input.order = DCO_BGR;
 
     av_assert0(request->task_count <= dims.dims[0]);
     for (int i = 0; i < request->task_count; ++i) {
@@ -160,7 +167,7 @@ static DNNReturnType fill_model_input_ov(OVModel *ov_model, RequestItem *request
             if (ov_model->model->pre_proc != NULL) {
                 ov_model->model->pre_proc(task->in_frame, &input, ov_model->model->filter_ctx);
             } else {
-                ff_proc_from_frame_to_dnn(task->in_frame, &input, ctx);
+                ff_proc_from_frame_to_dnn(task->in_frame, &input, ov_model->model->func_type, ctx);
             }
         }
         input.data = (uint8_t *)input.data
@@ -290,6 +297,20 @@ static DNNReturnType init_model_ov(OVModel *ov_model, const char *input_name, co
         goto err;
     }
 
+    // all models in openvino open model zoo use BGR with range [0.0f, 255.0f] as input,
+    // we don't have a AVPixelFormat to descibe it, so we'll use AV_PIX_FMT_BGR24 and
+    // ask openvino to do the conversion internally.
+    // the current supported SR model (frame processing) is generated from tensorflow model,
+    // and its input is Y channel as float with range [0.0f, 1.0f], so do not set for this case.
+    // TODO: we need to get a final clear&general solution with all backends/formats considered.
+    if (ov_model->model->func_type != DFT_PROCESS_FRAME) {
+        status = ie_network_set_input_precision(ov_model->network, input_name, U8);
+        if (status != OK) {
+            av_log(ctx, AV_LOG_ERROR, "Failed to set input precision as U8 for %s\n", input_name);
+            return DNN_ERROR;
+        }
+    }
+
     status = ie_core_load_network(ov_model->core, ov_model->network, ctx->options.device_type, &config, &ov_model->exe_network);
     if (status != OK) {
         av_log(ctx, AV_LOG_ERROR, "Failed to load OpenVINO model network\n");
diff --git a/libavfilter/dnn/dnn_backend_tf.c b/libavfilter/dnn/dnn_backend_tf.c
index e7e5f221f3..750a476726 100644
--- a/libavfilter/dnn/dnn_backend_tf.c
+++ b/libavfilter/dnn/dnn_backend_tf.c
@@ -744,7 +744,7 @@ static DNNReturnType execute_model_tf(const DNNModel *model, const char *input_n
         if (tf_model->model->pre_proc != NULL) {
             tf_model->model->pre_proc(in_frame, &input, tf_model->model->filter_ctx);
         } else {
-            ff_proc_from_frame_to_dnn(in_frame, &input, ctx);
+            ff_proc_from_frame_to_dnn(in_frame, &input, tf_model->model->func_type, ctx);
         }
     }
 
diff --git a/libavfilter/dnn/dnn_io_proc.c b/libavfilter/dnn/dnn_io_proc.c
index bee1423342..e104cc5064 100644
--- a/libavfilter/dnn/dnn_io_proc.c
+++ b/libavfilter/dnn/dnn_io_proc.c
@@ -21,6 +21,7 @@
 #include "dnn_io_proc.h"
 #include "libavutil/imgutils.h"
 #include "libswscale/swscale.h"
+#include "libavutil/avassert.h"
 
 DNNReturnType ff_proc_from_dnn_to_frame(AVFrame *frame, DNNData *output, void *log_ctx)
 {
@@ -92,7 +93,7 @@ DNNReturnType ff_proc_from_dnn_to_frame(AVFrame *frame, DNNData *output, void *l
     return DNN_SUCCESS;
 }
 
-DNNReturnType ff_proc_from_frame_to_dnn(AVFrame *frame, DNNData *input, void *log_ctx)
+static DNNReturnType proc_from_frame_to_dnn_frameprocessing(AVFrame *frame, DNNData *input, void *log_ctx)
 {
     struct SwsContext *sws_ctx;
     int bytewidth = av_image_get_linesize(frame->format, frame->width, 0);
@@ -163,3 +164,56 @@ DNNReturnType ff_proc_from_frame_to_dnn(AVFrame *frame, DNNData *input, void *lo
 
     return DNN_SUCCESS;
 }
+
+static enum AVPixelFormat get_pixel_format(DNNData *data)
+{
+    if (data->dt == DNN_UINT8 && data->order == DCO_BGR) {
+        return AV_PIX_FMT_BGR24;
+    }
+
+    av_assert0(!"not supported yet.\n");
+    return AV_PIX_FMT_BGR24;
+}
+
+static DNNReturnType proc_from_frame_to_dnn_analytics(AVFrame *frame, DNNData *input, void *log_ctx)
+{
+    struct SwsContext *sws_ctx;
+    int linesizes[4];
+    enum AVPixelFormat fmt = get_pixel_format(input);
+    sws_ctx = sws_getContext(frame->width, frame->height, frame->format,
+                             input->width, input->height, fmt,
+                             SWS_FAST_BILINEAR, NULL, NULL, NULL);
+    if (!sws_ctx) {
+        av_log(log_ctx, AV_LOG_ERROR, "Impossible to create scale context for the conversion "
+            "fmt:%s s:%dx%d -> fmt:%s s:%dx%d\n",
+            av_get_pix_fmt_name(frame->format), frame->width, frame->height,
+            av_get_pix_fmt_name(fmt), input->width, input->height);
+        return DNN_ERROR;
+    }
+
+    if (av_image_fill_linesizes(linesizes, fmt, input->width) < 0) {
+        av_log(log_ctx, AV_LOG_ERROR, "unable to get linesizes with av_image_fill_linesizes");
+        sws_freeContext(sws_ctx);
+        return DNN_ERROR;
+    }
+
+    sws_scale(sws_ctx, (const uint8_t *const *)frame->data, frame->linesize, 0, frame->height,
+                       (uint8_t *const *)(&input->data), linesizes);
+
+    sws_freeContext(sws_ctx);
+    return DNN_SUCCESS;
+}
+
+DNNReturnType ff_proc_from_frame_to_dnn(AVFrame *frame, DNNData *input, DNNFunctionType func_type, void *log_ctx)
+{
+    switch (func_type)
+    {
+    case DFT_PROCESS_FRAME:
+        return proc_from_frame_to_dnn_frameprocessing(frame, input, log_ctx);
+    case DFT_ANALYTICS_DETECT:
+        return proc_from_frame_to_dnn_analytics(frame, input, log_ctx);
+    default:
+        avpriv_report_missing_feature(log_ctx, "model function type %d", func_type);
+        return DNN_ERROR;
+    }
+}
diff --git a/libavfilter/dnn/dnn_io_proc.h b/libavfilter/dnn/dnn_io_proc.h
index 6a410ccc7b..91ad3cb261 100644
--- a/libavfilter/dnn/dnn_io_proc.h
+++ b/libavfilter/dnn/dnn_io_proc.h
@@ -30,7 +30,7 @@
 #include "../dnn_interface.h"
 #include "libavutil/frame.h"
 
-DNNReturnType ff_proc_from_frame_to_dnn(AVFrame *frame, DNNData *input, void *log_ctx);
+DNNReturnType ff_proc_from_frame_to_dnn(AVFrame *frame, DNNData *input, DNNFunctionType func_type, void *log_ctx);
 DNNReturnType ff_proc_from_dnn_to_frame(AVFrame *frame, DNNData *output, void *log_ctx);
 
 #endif

From patchwork Wed Feb 10 09:34:29 2021
Content-Type: text/plain; charset="utf-8"
MIME-Version: 1.0
Content-Transfer-Encoding: 7bit
X-Patchwork-Submitter: "Guo, Yejun" <yejun.guo@intel.com>
X-Patchwork-Id: 25547
Return-Path: <ffmpeg-devel-bounces@ffmpeg.org>
X-Original-To: patchwork@ffaux-bg.ffmpeg.org
Delivered-To: patchwork@ffaux-bg.ffmpeg.org
Received: from ffbox0-bg.mplayerhq.hu (ffbox0-bg.ffmpeg.org [79.124.17.100])
	by ffaux.localdomain (Postfix) with ESMTP id 2FFB9449CE2
	for <patchwork@ffaux-bg.ffmpeg.org>; Wed, 10 Feb 2021 11:44:57 +0200 (EET)
Received: from [127.0.1.1] (localhost [127.0.0.1])
	by ffbox0-bg.mplayerhq.hu (Postfix) with ESMTP id 1AE5068A5E4;
	Wed, 10 Feb 2021 11:44:57 +0200 (EET)
X-Original-To: ffmpeg-devel@ffmpeg.org
Delivered-To: ffmpeg-devel@ffmpeg.org
Received: from mga01.intel.com (mga01.intel.com [192.55.52.88])
 by ffbox0-bg.mplayerhq.hu (Postfix) with ESMTPS id 8D14F68A56C
 for <ffmpeg-devel@ffmpeg.org>; Wed, 10 Feb 2021 11:44:49 +0200 (EET)
IronPort-SDR: 
 wLJF9PA64S0OpbbkBfZfQrgiNqmlMvzkZ2wiwZxHN008f1mRcRY/e6QIuJVAs6LQ5REvUI99UM
 zmhfNplKZJ4g==
X-IronPort-AV: E=McAfee;i="6000,8403,9890"; a="201144880"
X-IronPort-AV: E=Sophos;i="5.81,167,1610438400"; d="scan'208";a="201144880"
Received: from fmsmga001.fm.intel.com ([10.253.24.23])
 by fmsmga101.fm.intel.com with ESMTP/TLS/ECDHE-RSA-AES256-GCM-SHA384;
 10 Feb 2021 01:44:41 -0800
IronPort-SDR: 
 lhoNvN9hcpb6O2sKCMCpdqEAFhT9qqSo3SmU5EN8gWjDZ53A4Wu0J40aOlXcGQW5hU+VEAmuNx
 8ikye3UwiIdQ==
X-ExtLoop1: 1
X-IronPort-AV: E=Sophos;i="5.81,167,1610438400"; d="scan'208";a="488706575"
Received: from yguo18-skl-u1604.sh.intel.com ([10.239.159.53])
 by fmsmga001.fm.intel.com with ESMTP; 10 Feb 2021 01:44:40 -0800
From: "Guo, Yejun" <yejun.guo@intel.com>
To: ffmpeg-devel@ffmpeg.org
Date: Wed, 10 Feb 2021 17:34:29 +0800
Message-Id: <20210210093432.9135-7-yejun.guo@intel.com>
X-Mailer: git-send-email 2.17.1
In-Reply-To: <20210210093432.9135-1-yejun.guo@intel.com>
References: <20210210093432.9135-1-yejun.guo@intel.com>
Subject: [FFmpeg-devel] [PATCH V2 07/10] dnn_backend_openvino.c: allow
	out_frame as NULL for analytic case
X-BeenThere: ffmpeg-devel@ffmpeg.org
X-Mailman-Version: 2.1.20
Precedence: list
List-Id: FFmpeg development discussions and patches <ffmpeg-devel.ffmpeg.org>
List-Unsubscribe: <https://ffmpeg.org/mailman/options/ffmpeg-devel>,
 <mailto:ffmpeg-devel-request@ffmpeg.org?subject=unsubscribe>
List-Archive: <https://ffmpeg.org/pipermail/ffmpeg-devel>
List-Post: <mailto:ffmpeg-devel@ffmpeg.org>
List-Help: <mailto:ffmpeg-devel-request@ffmpeg.org?subject=help>
List-Subscribe: <https://ffmpeg.org/mailman/listinfo/ffmpeg-devel>,
 <mailto:ffmpeg-devel-request@ffmpeg.org?subject=subscribe>
Reply-To: FFmpeg development discussions and patches <ffmpeg-devel@ffmpeg.org>
Cc: yejun.guo@intel.com
MIME-Version: 1.0
Errors-To: ffmpeg-devel-bounces@ffmpeg.org
Sender: "ffmpeg-devel" <ffmpeg-devel-bounces@ffmpeg.org>

From: Ting Fu <ting.fu@intel.com>
---
 libavfilter/dnn/dnn_backend_openvino.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/libavfilter/dnn/dnn_backend_openvino.c b/libavfilter/dnn/dnn_backend_openvino.c
index cca155a52c..5be053b7f8 100644
--- a/libavfilter/dnn/dnn_backend_openvino.c
+++ b/libavfilter/dnn/dnn_backend_openvino.c
@@ -616,7 +616,7 @@ DNNReturnType ff_dnn_execute_model_ov(const DNNModel *model, const char *input_n
         return DNN_ERROR;
     }
 
-    if (!out_frame) {
+    if (!out_frame && model->func_type == DFT_PROCESS_FRAME) {
         av_log(ctx, AV_LOG_ERROR, "out frame is NULL when execute model.\n");
         return DNN_ERROR;
     }
@@ -669,7 +669,7 @@ DNNReturnType ff_dnn_execute_model_async_ov(const DNNModel *model, const char *i
         return DNN_ERROR;
     }
 
-    if (!out_frame) {
+    if (!out_frame && model->func_type == DFT_PROCESS_FRAME) {
         av_log(ctx, AV_LOG_ERROR, "out frame is NULL when async execute model.\n");
         return DNN_ERROR;
     }

From patchwork Wed Feb 10 09:34:30 2021
Content-Type: text/plain; charset="utf-8"
MIME-Version: 1.0
Content-Transfer-Encoding: 7bit
X-Patchwork-Submitter: "Guo, Yejun" <yejun.guo@intel.com>
X-Patchwork-Id: 25548
Return-Path: <ffmpeg-devel-bounces@ffmpeg.org>
X-Original-To: patchwork@ffaux-bg.ffmpeg.org
Delivered-To: patchwork@ffaux-bg.ffmpeg.org
Received: from ffbox0-bg.mplayerhq.hu (ffbox0-bg.ffmpeg.org [79.124.17.100])
	by ffaux.localdomain (Postfix) with ESMTP id B3E34449CE2
	for <patchwork@ffaux-bg.ffmpeg.org>; Wed, 10 Feb 2021 11:45:00 +0200 (EET)
Received: from [127.0.1.1] (localhost [127.0.0.1])
	by ffbox0-bg.mplayerhq.hu (Postfix) with ESMTP id 9B02868A636;
	Wed, 10 Feb 2021 11:45:00 +0200 (EET)
X-Original-To: ffmpeg-devel@ffmpeg.org
Delivered-To: ffmpeg-devel@ffmpeg.org
Received: from mga01.intel.com (mga01.intel.com [192.55.52.88])
 by ffbox0-bg.mplayerhq.hu (Postfix) with ESMTPS id 42A1C68A5D7
 for <ffmpeg-devel@ffmpeg.org>; Wed, 10 Feb 2021 11:44:51 +0200 (EET)
IronPort-SDR: 
 yiDvZIclxrAMWj3861c8/gV0sXc1C6oQX8c9GeZjwT+aNg9Zqh9wAq04ilpHGe70M3GXQiRjSS
 B4i1A3iKnNng==
X-IronPort-AV: E=McAfee;i="6000,8403,9890"; a="201144882"
X-IronPort-AV: E=Sophos;i="5.81,167,1610438400"; d="scan'208";a="201144882"
Received: from fmsmga001.fm.intel.com ([10.253.24.23])
 by fmsmga101.fm.intel.com with ESMTP/TLS/ECDHE-RSA-AES256-GCM-SHA384;
 10 Feb 2021 01:44:41 -0800
IronPort-SDR: 
 HdXGU4LH9V2DjGrPyxUbWvcHM03xadwC0us3vCz8LY+riFTMre5Gqh9y6rGdPMVPyfTUyPFBMR
 2YkhOcAB5xug==
X-ExtLoop1: 1
X-IronPort-AV: E=Sophos;i="5.81,167,1610438400"; d="scan'208";a="488706582"
Received: from yguo18-skl-u1604.sh.intel.com ([10.239.159.53])
 by fmsmga001.fm.intel.com with ESMTP; 10 Feb 2021 01:44:41 -0800
From: "Guo, Yejun" <yejun.guo@intel.com>
To: ffmpeg-devel@ffmpeg.org
Date: Wed, 10 Feb 2021 17:34:30 +0800
Message-Id: <20210210093432.9135-8-yejun.guo@intel.com>
X-Mailer: git-send-email 2.17.1
In-Reply-To: <20210210093432.9135-1-yejun.guo@intel.com>
References: <20210210093432.9135-1-yejun.guo@intel.com>
Subject: [FFmpeg-devel] [PATCH V2 08/10] libavutil: add side data
	AVDnnBoundingBox for dnn based detect/classify filters
X-BeenThere: ffmpeg-devel@ffmpeg.org
X-Mailman-Version: 2.1.20
Precedence: list
List-Id: FFmpeg development discussions and patches <ffmpeg-devel.ffmpeg.org>
List-Unsubscribe: <https://ffmpeg.org/mailman/options/ffmpeg-devel>,
 <mailto:ffmpeg-devel-request@ffmpeg.org?subject=unsubscribe>
List-Archive: <https://ffmpeg.org/pipermail/ffmpeg-devel>
List-Post: <mailto:ffmpeg-devel@ffmpeg.org>
List-Help: <mailto:ffmpeg-devel-request@ffmpeg.org?subject=help>
List-Subscribe: <https://ffmpeg.org/mailman/listinfo/ffmpeg-devel>,
 <mailto:ffmpeg-devel-request@ffmpeg.org?subject=subscribe>
Reply-To: FFmpeg development discussions and patches <ffmpeg-devel@ffmpeg.org>
Cc: yejun.guo@intel.com
MIME-Version: 1.0
Errors-To: ffmpeg-devel-bounces@ffmpeg.org
Sender: "ffmpeg-devel" <ffmpeg-devel-bounces@ffmpeg.org>

Signed-off-by: Guo, Yejun <yejun.guo@intel.com>
---
 doc/APIchanges       |  2 ++
 libavutil/Makefile   |  1 +
 libavutil/dnn_bbox.h | 68 ++++++++++++++++++++++++++++++++++++++++++++
 libavutil/frame.c    |  1 +
 libavutil/frame.h    |  7 +++++
 libavutil/version.h  |  2 +-
 6 files changed, 80 insertions(+), 1 deletion(-)
 create mode 100644 libavutil/dnn_bbox.h

diff --git a/doc/APIchanges b/doc/APIchanges
index 1332694820..7cbfa9fafa 100644
--- a/doc/APIchanges
+++ b/doc/APIchanges
@@ -14,6 +14,8 @@ libavutil:     2017-10-21
 
 
 API changes, most recent first:
+2021-02-08 - xxxxxxxxxx - lavu 56.65.100 - frame.h
+  Add AV_FRAME_DATA_DNN_BBOXES
 
 2021-01-26 - xxxxxxxxxx - lavu 56.64.100 - common.h
   Add FFABSU()
diff --git a/libavutil/Makefile b/libavutil/Makefile
index 27bafe9e12..b99cb7084f 100644
--- a/libavutil/Makefile
+++ b/libavutil/Makefile
@@ -23,6 +23,7 @@ HEADERS = adler32.h                                                     \
           des.h                                                         \
           dict.h                                                        \
           display.h                                                     \
+          dnn_bbox.h                                                    \
           dovi_meta.h                                                   \
           downmix_info.h                                                \
           encryption_info.h                                             \
diff --git a/libavutil/dnn_bbox.h b/libavutil/dnn_bbox.h
new file mode 100644
index 0000000000..50899c4486
--- /dev/null
+++ b/libavutil/dnn_bbox.h
@@ -0,0 +1,68 @@
+/*
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#ifndef AVUTIL_DNN_BBOX_H
+#define AVUTIL_DNN_BBOX_H
+
+#include "rational.h"
+
+typedef struct AVDnnBoundingBox {
+    /**
+     * Must be set to the size of this data structure (that is,
+     * sizeof(AVDnnBoundingBox)).
+     */
+    uint32_t self_size;
+
+    /**
+     * Object detection is usually applied to a smaller image that
+     * is scaled down from the original frame.
+     * width and height are attributes of the scaled image, in pixel.
+     */
+    int model_input_width;
+    int model_input_height;
+
+    /**
+     * Distance in pixels from the top edge of the scaled image to top
+     * and bottom, and from the left edge of the scaled image to left and
+     * right, defining the bounding box.
+     */
+    int top;
+    int left;
+    int bottom;
+    int right;
+
+    /**
+     * Detect result
+     */
+    int detect_label;
+    AVRational detect_conf;
+
+    /**
+     * At most 4 classifications based on the detected bounding box.
+     * For example, we can get max 4 different attributes with 4 different
+     * DNN models on one bounding box.
+     * classify_count is zero if no classification.
+     */
+#define AV_NUM_BBOX_CLASSIFY 4
+    uint32_t classify_count;
+    int classify_labels[AV_NUM_BBOX_CLASSIFY];
+    AVRational classify_confs[AV_NUM_BBOX_CLASSIFY];
+} AVDnnBoundingBox;
+
+#endif
diff --git a/libavutil/frame.c b/libavutil/frame.c
index eab51b6a32..4308507827 100644
--- a/libavutil/frame.c
+++ b/libavutil/frame.c
@@ -852,6 +852,7 @@ const char *av_frame_side_data_name(enum AVFrameSideDataType type)
     case AV_FRAME_DATA_VIDEO_ENC_PARAMS:            return "Video encoding parameters";
     case AV_FRAME_DATA_SEI_UNREGISTERED:            return "H.26[45] User Data Unregistered SEI message";
     case AV_FRAME_DATA_FILM_GRAIN_PARAMS:           return "Film grain parameters";
+    case AV_FRAME_DATA_DNN_BBOXES:                  return "DNN bounding boxes";
     }
     return NULL;
 }
diff --git a/libavutil/frame.h b/libavutil/frame.h
index 1aeafef6de..a4dcfd27c9 100644
--- a/libavutil/frame.h
+++ b/libavutil/frame.h
@@ -198,6 +198,13 @@ enum AVFrameSideDataType {
      * Must be present for every frame which should have film grain applied.
      */
     AV_FRAME_DATA_FILM_GRAIN_PARAMS,
+
+    /**
+     * Bounding box generated by dnn based filters for object detection and classification,
+     * the data is an array of AVDnnBoudingBox, the number of array element is implied by
+     * AVFrameSideData.size / AVDnnBoudingBox.self_size.
+     */
+    AV_FRAME_DATA_DNN_BBOXES,
 };
 
 enum AVActiveFormatDescription {
diff --git a/libavutil/version.h b/libavutil/version.h
index 21136e6cb7..b2165754f9 100644
--- a/libavutil/version.h
+++ b/libavutil/version.h
@@ -79,7 +79,7 @@
  */
 
 #define LIBAVUTIL_VERSION_MAJOR  56
-#define LIBAVUTIL_VERSION_MINOR  64
+#define LIBAVUTIL_VERSION_MINOR  65
 #define LIBAVUTIL_VERSION_MICRO 100
 
 #define LIBAVUTIL_VERSION_INT   AV_VERSION_INT(LIBAVUTIL_VERSION_MAJOR, \

From patchwork Wed Feb 10 09:34:31 2021
Content-Type: text/plain; charset="utf-8"
MIME-Version: 1.0
Content-Transfer-Encoding: 7bit
X-Patchwork-Submitter: "Guo, Yejun" <yejun.guo@intel.com>
X-Patchwork-Id: 25545
Return-Path: <ffmpeg-devel-bounces@ffmpeg.org>
X-Original-To: patchwork@ffaux-bg.ffmpeg.org
Delivered-To: patchwork@ffaux-bg.ffmpeg.org
Received: from ffbox0-bg.mplayerhq.hu (ffbox0-bg.ffmpeg.org [79.124.17.100])
	by ffaux.localdomain (Postfix) with ESMTP id 9A8C5449CE2
	for <patchwork@ffaux-bg.ffmpeg.org>; Wed, 10 Feb 2021 11:44:55 +0200 (EET)
Received: from [127.0.1.1] (localhost [127.0.0.1])
	by ffbox0-bg.mplayerhq.hu (Postfix) with ESMTP id 8855368A640;
	Wed, 10 Feb 2021 11:44:55 +0200 (EET)
X-Original-To: ffmpeg-devel@ffmpeg.org
Delivered-To: ffmpeg-devel@ffmpeg.org
Received: from mga01.intel.com (mga01.intel.com [192.55.52.88])
 by ffbox0-bg.mplayerhq.hu (Postfix) with ESMTPS id B9C8368A57E
 for <ffmpeg-devel@ffmpeg.org>; Wed, 10 Feb 2021 11:44:52 +0200 (EET)
IronPort-SDR: 
 cSA1oSUupvYyF14r2/dRsH0kR4DY62tNrfOhtgnVS2UeUptu8dTZlmcNbgMUkAKy/3Jgx3Mo+H
 21Mm4Lyl98Pg==
X-IronPort-AV: E=McAfee;i="6000,8403,9890"; a="201144887"
X-IronPort-AV: E=Sophos;i="5.81,167,1610438400"; d="scan'208";a="201144887"
Received: from fmsmga001.fm.intel.com ([10.253.24.23])
 by fmsmga101.fm.intel.com with ESMTP/TLS/ECDHE-RSA-AES256-GCM-SHA384;
 10 Feb 2021 01:44:42 -0800
IronPort-SDR: 
 SdwRWPQwdpXiVrIzZ8UUFpJ56LBWXeo7gV9YsOx7a1Rs1ghLxtrMeVLX7gszo2HerU+lYRyYh0
 HBAYhvtuUvkQ==
X-ExtLoop1: 1
X-IronPort-AV: E=Sophos;i="5.81,167,1610438400"; d="scan'208";a="488706586"
Received: from yguo18-skl-u1604.sh.intel.com ([10.239.159.53])
 by fmsmga001.fm.intel.com with ESMTP; 10 Feb 2021 01:44:42 -0800
From: "Guo, Yejun" <yejun.guo@intel.com>
To: ffmpeg-devel@ffmpeg.org
Date: Wed, 10 Feb 2021 17:34:31 +0800
Message-Id: <20210210093432.9135-9-yejun.guo@intel.com>
X-Mailer: git-send-email 2.17.1
In-Reply-To: <20210210093432.9135-1-yejun.guo@intel.com>
References: <20210210093432.9135-1-yejun.guo@intel.com>
Subject: [FFmpeg-devel] [PATCH V2 09/10] libavfilter: show side data of dnn
	bounding box
X-BeenThere: ffmpeg-devel@ffmpeg.org
X-Mailman-Version: 2.1.20
Precedence: list
List-Id: FFmpeg development discussions and patches <ffmpeg-devel.ffmpeg.org>
List-Unsubscribe: <https://ffmpeg.org/mailman/options/ffmpeg-devel>,
 <mailto:ffmpeg-devel-request@ffmpeg.org?subject=unsubscribe>
List-Archive: <https://ffmpeg.org/pipermail/ffmpeg-devel>
List-Post: <mailto:ffmpeg-devel@ffmpeg.org>
List-Help: <mailto:ffmpeg-devel-request@ffmpeg.org?subject=help>
List-Subscribe: <https://ffmpeg.org/mailman/listinfo/ffmpeg-devel>,
 <mailto:ffmpeg-devel-request@ffmpeg.org?subject=subscribe>
Reply-To: FFmpeg development discussions and patches <ffmpeg-devel@ffmpeg.org>
Cc: yejun.guo@intel.com
MIME-Version: 1.0
Errors-To: ffmpeg-devel-bounces@ffmpeg.org
Sender: "ffmpeg-devel" <ffmpeg-devel-bounces@ffmpeg.org>

Signed-off-by: Guo, Yejun <yejun.guo@intel.com>
---
 libavfilter/f_sidedata.c  |  2 ++
 libavfilter/vf_showinfo.c | 34 ++++++++++++++++++++++++++++++++++
 2 files changed, 36 insertions(+)

diff --git a/libavfilter/f_sidedata.c b/libavfilter/f_sidedata.c
index 3757723375..08ee96c4ef 100644
--- a/libavfilter/f_sidedata.c
+++ b/libavfilter/f_sidedata.c
@@ -71,6 +71,7 @@ static const AVOption filt_name##_options[] = { \
     {   "S12M_TIMECOD",               "", 0,             AV_OPT_TYPE_CONST,  {.i64 = AV_FRAME_DATA_S12M_TIMECODE              }, 0, 0, FLAGS, "type" }, \
     {   "DYNAMIC_HDR_PLUS",           "", 0,             AV_OPT_TYPE_CONST,  {.i64 = AV_FRAME_DATA_DYNAMIC_HDR_PLUS           }, 0, 0, FLAGS, "type" }, \
     {   "REGIONS_OF_INTEREST",        "", 0,             AV_OPT_TYPE_CONST,  {.i64 = AV_FRAME_DATA_REGIONS_OF_INTEREST        }, 0, 0, FLAGS, "type" }, \
+    {   "DNN_BOUNDING_BOXES",         "", 0,             AV_OPT_TYPE_CONST,  {.i64 = AV_FRAME_DATA_DNN_BBOXES                 }, 0, 0, FLAGS, "type" }, \
     {   "SEI_UNREGISTERED",           "", 0,             AV_OPT_TYPE_CONST,  {.i64 = AV_FRAME_DATA_SEI_UNREGISTERED           }, 0, 0, FLAGS, "type" }, \
     { NULL } \
 }
@@ -100,6 +101,7 @@ static const AVOption filt_name##_options[] = { \
     {   "S12M_TIMECOD",               "", 0,             AV_OPT_TYPE_CONST,  {.i64 = AV_FRAME_DATA_S12M_TIMECODE              }, 0, 0, FLAGS, "type" }, \
     {   "DYNAMIC_HDR_PLUS",           "", 0,             AV_OPT_TYPE_CONST,  {.i64 = AV_FRAME_DATA_DYNAMIC_HDR_PLUS           }, 0, 0, FLAGS, "type" }, \
     {   "REGIONS_OF_INTEREST",        "", 0,             AV_OPT_TYPE_CONST,  {.i64 = AV_FRAME_DATA_REGIONS_OF_INTEREST        }, 0, 0, FLAGS, "type" }, \
+    {   "DNN_BOUNDING_BOXES",         "", 0,             AV_OPT_TYPE_CONST,  {.i64 = AV_FRAME_DATA_DNN_BBOXES                 }, 0, 0, FLAGS, "type" }, \
     {   "SEI_UNREGISTERED",           "", 0,             AV_OPT_TYPE_CONST,  {.i64 = AV_FRAME_DATA_SEI_UNREGISTERED           }, 0, 0, FLAGS, "type" }, \
     { NULL } \
 }
diff --git a/libavfilter/vf_showinfo.c b/libavfilter/vf_showinfo.c
index 6208892005..67653b17ec 100644
--- a/libavfilter/vf_showinfo.c
+++ b/libavfilter/vf_showinfo.c
@@ -38,6 +38,7 @@
 #include "libavutil/timecode.h"
 #include "libavutil/mastering_display_metadata.h"
 #include "libavutil/video_enc_params.h"
+#include "libavutil/dnn_bbox.h"
 
 #include "avfilter.h"
 #include "internal.h"
@@ -153,6 +154,36 @@ static void dump_roi(AVFilterContext *ctx, const AVFrameSideData *sd)
     }
 }
 
+static void dump_dnnbbox(AVFilterContext *ctx, const AVFrameSideData *sd)
+{
+    int nb_bbox;
+    const AVDnnBoundingBox *bbox;
+    uint32_t bbox_size;
+
+    bbox = (const AVDnnBoundingBox *)sd->data;
+    bbox_size = bbox->self_size;
+    if (!bbox_size || sd->size % bbox_size != 0 || bbox_size != sizeof(*bbox)) {
+        av_log(ctx, AV_LOG_ERROR, "Invalid AVDnnBoundingBox.self_size.\n");
+        return;
+    }
+    nb_bbox = sd->size / bbox_size;
+
+    av_log(ctx, AV_LOG_INFO, "Dnn bounding boxes:\n");
+    for (int i = 0; i < nb_bbox; i++) {
+        bbox = (const AVDnnBoundingBox *)(sd->data + bbox_size * i);
+        av_log(ctx, AV_LOG_INFO, "index: %d, region: (%d/%d, %d/%d) -> (%d/%d, %d/%d), label: %d, confidence: %d/%d.\n",
+               i, bbox->left, bbox->model_input_width, bbox->top, bbox->model_input_height,
+               bbox->right, bbox->model_input_width, bbox->bottom, bbox->model_input_height,
+               bbox->detect_label, bbox->detect_conf.num, bbox->detect_conf.den);
+        if (bbox->classify_count > 0) {
+            for (int j = 0; j < bbox->classify_count; j++) {
+                av_log(ctx, AV_LOG_INFO, "\t\tclassify:  label: %d, confidence: %d/%d.\n",
+                       bbox->classify_labels[j], bbox->classify_confs[j].num, bbox->classify_confs[j].den);
+            }
+        }
+    }
+}
+
 static void dump_mastering_display(AVFilterContext *ctx, const AVFrameSideData *sd)
 {
     const AVMasteringDisplayMetadata *mastering_display;
@@ -494,6 +525,9 @@ static int filter_frame(AVFilterLink *inlink, AVFrame *frame)
         case AV_FRAME_DATA_REGIONS_OF_INTEREST:
             dump_roi(ctx, sd);
             break;
+        case AV_FRAME_DATA_DNN_BBOXES:
+            dump_dnnbbox(ctx, sd);
+            break;
         case AV_FRAME_DATA_MASTERING_DISPLAY_METADATA:
             dump_mastering_display(ctx, sd);
             break;

From patchwork Wed Feb 10 09:34:32 2021
Content-Type: text/plain; charset="utf-8"
MIME-Version: 1.0
Content-Transfer-Encoding: 7bit
X-Patchwork-Submitter: "Guo, Yejun" <yejun.guo@intel.com>
X-Patchwork-Id: 25546
Return-Path: <ffmpeg-devel-bounces@ffmpeg.org>
X-Original-To: patchwork@ffaux-bg.ffmpeg.org
Delivered-To: patchwork@ffaux-bg.ffmpeg.org
Received: from ffbox0-bg.mplayerhq.hu (ffbox0-bg.ffmpeg.org [79.124.17.100])
	by ffaux.localdomain (Postfix) with ESMTP id 549AA449CE2
	for <patchwork@ffaux-bg.ffmpeg.org>; Wed, 10 Feb 2021 11:44:56 +0200 (EET)
Received: from [127.0.1.1] (localhost [127.0.0.1])
	by ffbox0-bg.mplayerhq.hu (Postfix) with ESMTP id 437A668A67E;
	Wed, 10 Feb 2021 11:44:56 +0200 (EET)
X-Original-To: ffmpeg-devel@ffmpeg.org
Delivered-To: ffmpeg-devel@ffmpeg.org
Received: from mga01.intel.com (mga01.intel.com [192.55.52.88])
 by ffbox0-bg.mplayerhq.hu (Postfix) with ESMTPS id BA1B968A56C
 for <ffmpeg-devel@ffmpeg.org>; Wed, 10 Feb 2021 11:44:53 +0200 (EET)
IronPort-SDR: 
 8ysNelTl8RglAHz9XbeU/VAZQXY58WCeJfXc5MQtbhCFj841Atoojo929/l7xB8PhPhJZQbI4w
 0uITRwuqUSsw==
X-IronPort-AV: E=McAfee;i="6000,8403,9890"; a="201144888"
X-IronPort-AV: E=Sophos;i="5.81,167,1610438400"; d="scan'208";a="201144888"
Received: from fmsmga001.fm.intel.com ([10.253.24.23])
 by fmsmga101.fm.intel.com with ESMTP/TLS/ECDHE-RSA-AES256-GCM-SHA384;
 10 Feb 2021 01:44:43 -0800
IronPort-SDR: 
 KLvOHMkl1Ru5RgNo3qVvlKK3DnHjRdVVe5Pum4JqIiWKc032/sx9Vqbqt2LAoN4d+PhF+7rALx
 zDDP8fsGGm3w==
X-ExtLoop1: 1
X-IronPort-AV: E=Sophos;i="5.81,167,1610438400"; d="scan'208";a="488706593"
Received: from yguo18-skl-u1604.sh.intel.com ([10.239.159.53])
 by fmsmga001.fm.intel.com with ESMTP; 10 Feb 2021 01:44:43 -0800
From: "Guo, Yejun" <yejun.guo@intel.com>
To: ffmpeg-devel@ffmpeg.org
Date: Wed, 10 Feb 2021 17:34:32 +0800
Message-Id: <20210210093432.9135-10-yejun.guo@intel.com>
X-Mailer: git-send-email 2.17.1
In-Reply-To: <20210210093432.9135-1-yejun.guo@intel.com>
References: <20210210093432.9135-1-yejun.guo@intel.com>
Subject: [FFmpeg-devel] [PATCH V2 10/10] libavfilter: add filter dnn_detect
	for object detection
X-BeenThere: ffmpeg-devel@ffmpeg.org
X-Mailman-Version: 2.1.20
Precedence: list
List-Id: FFmpeg development discussions and patches <ffmpeg-devel.ffmpeg.org>
List-Unsubscribe: <https://ffmpeg.org/mailman/options/ffmpeg-devel>,
 <mailto:ffmpeg-devel-request@ffmpeg.org?subject=unsubscribe>
List-Archive: <https://ffmpeg.org/pipermail/ffmpeg-devel>
List-Post: <mailto:ffmpeg-devel@ffmpeg.org>
List-Help: <mailto:ffmpeg-devel-request@ffmpeg.org?subject=help>
List-Subscribe: <https://ffmpeg.org/mailman/listinfo/ffmpeg-devel>,
 <mailto:ffmpeg-devel-request@ffmpeg.org?subject=subscribe>
Reply-To: FFmpeg development discussions and patches <ffmpeg-devel@ffmpeg.org>
Cc: yejun.guo@intel.com
MIME-Version: 1.0
Errors-To: ffmpeg-devel-bounces@ffmpeg.org
Sender: "ffmpeg-devel" <ffmpeg-devel-bounces@ffmpeg.org>

Below are the example steps to do object detection:

1. download and install l_openvino_toolkit_p_2021.1.110.tgz from
https://software.intel.com/content/www/us/en/develop/tools/openvino-toolkit/download.html
  or, We can get source code (tag 2021.1), build and install.
2. export LD_LIBRARY_PATH with openvino settings, for example:
.../deployment_tools/inference_engine/lib/intel64/:.../deployment_tools/inference_engine/external/tbb/lib/
3. rebuild ffmpeg from source code with configure option:
--enable-libopenvino
--extra-cflags='-I.../deployment_tools/inference_engine/include/'
--extra-ldflags='-L.../deployment_tools/inference_engine/lib/intel64'
4. download model files and test image
wget https://github.com/guoyejun/ffmpeg_dnn/raw/main/models/openvino/2021.1/face-detection-adas-0001.bin
wget https://github.com/guoyejun/ffmpeg_dnn/raw/main/models/openvino/2021.1/face-detection-adas-0001.xml
wget https://github.com/guoyejun/ffmpeg_dnn/raw/main/images/cici.jpg
5. run ffmpeg with:
./ffmpeg -i cici.jpg -vf dnn_detect=dnn_backend=openvino:model=face-detection-adas-0001.xml:input=data:output=detection_out:conf=0.6,showinfo -f null -

We'll see the detect result as below:
[Parsed_showinfo_1 @ 0x55db3ffb60c0]   side data - Dnn bounding boxes:
[Parsed_showinfo_1 @ 0x55db3ffb60c0] index: 0, region: (330/672, 203/384) -> (356/672, 226/384), label: 1, confidence: 10000/10000.
[Parsed_showinfo_1 @ 0x55db3ffb60c0] index: 1, region: (291/672, 209/384) -> (317/672, 231/384), label: 1, confidence: 6917/10000.

There are two faces detected with confidence 100% and 69.17%, and
the input image size of the model is 672x384. The two bounding boxes
in this image are (330, 203)->(356, 226) and (291, 209)->(317, 231).

Since the orignal input image size is 2048x1536, so the two bounding
boxese in the original image are
(330/672*2048=1006, 203/384*1536=812) -> (1085, 904) and
(887, 836) -> (966, 924), and we can check them manually.

Signed-off-by: Guo, Yejun <yejun.guo@intel.com>

Next, we'll add tensorflow backend and update filter vf_drawbox etc
to visualize the detect result.
---
 configure                              |   1 +
 doc/filters.texi                       |  33 +++
 libavfilter/Makefile                   |   1 +
 libavfilter/allfilters.c               |   1 +
 libavfilter/dnn/dnn_backend_openvino.c |  12 +
 libavfilter/dnn_filter_common.c        |   7 +
 libavfilter/dnn_filter_common.h        |   1 +
 libavfilter/dnn_interface.h            |   6 +-
 libavfilter/vf_dnn_detect.c            | 356 +++++++++++++++++++++++++
 9 files changed, 416 insertions(+), 2 deletions(-)
 create mode 100644 libavfilter/vf_dnn_detect.c

diff --git a/configure b/configure
index a76c2ec4ae..2d2668571d 100755
--- a/configure
+++ b/configure
@@ -3548,6 +3548,7 @@ derain_filter_select="dnn"
 deshake_filter_select="pixelutils"
 deshake_opencl_filter_deps="opencl"
 dilation_opencl_filter_deps="opencl"
+dnn_detect_filter_select="dnn"
 dnn_processing_filter_select="dnn"
 drawtext_filter_deps="libfreetype"
 drawtext_filter_suggest="libfontconfig libfribidi"
diff --git a/doc/filters.texi b/doc/filters.texi
index 079bba9a1e..340402e650 100644
--- a/doc/filters.texi
+++ b/doc/filters.texi
@@ -10073,6 +10073,39 @@ ffmpeg -i INPUT -f lavfi -i nullsrc=hd720,geq='r=128+80*(sin(sqrt((X-W/2)*(X-W/2
 @end example
 @end itemize
 
+@section dnn_detect
+
+Do object detection with deep neural networks.
+
+The filter accepts the following options:
+
+@table @option
+@item dnn_backend
+Specify which DNN backend to use for model loading and execution. This option accepts
+only openvino now, tensorflow backends will be added.
+
+@item model
+Set path to model file specifying network architecture and its parameters.
+Note that different backends use different file formats.
+
+@item input
+Set the input name of the dnn network.
+
+@item output
+Set the output name of the dnn network.
+
+@item conf
+Set the confidence threshold (default: 0.5).
+
+@item backend_configs
+Set the configs to be passed into backend
+
+@item async
+use DNN async execution if set (default: set),
+roll back to sync execution if the backend does not support async.
+
+@end table
+
 @anchor{dnn_processing}
 @section dnn_processing
 
diff --git a/libavfilter/Makefile b/libavfilter/Makefile
index b43933be64..6c39e7111b 100644
--- a/libavfilter/Makefile
+++ b/libavfilter/Makefile
@@ -244,6 +244,7 @@ OBJS-$(CONFIG_DILATION_FILTER)               += vf_neighbor.o
 OBJS-$(CONFIG_DILATION_OPENCL_FILTER)        += vf_neighbor_opencl.o opencl.o \
                                                 opencl/neighbor.o
 OBJS-$(CONFIG_DISPLACE_FILTER)               += vf_displace.o framesync.o
+OBJS-$(CONFIG_DNN_DETECT_FILTER)             += vf_dnn_detect.o
 OBJS-$(CONFIG_DNN_PROCESSING_FILTER)         += vf_dnn_processing.o
 OBJS-$(CONFIG_DOUBLEWEAVE_FILTER)            += vf_weave.o
 OBJS-$(CONFIG_DRAWBOX_FILTER)                += vf_drawbox.o
diff --git a/libavfilter/allfilters.c b/libavfilter/allfilters.c
index 73d859ce5e..37bb276685 100644
--- a/libavfilter/allfilters.c
+++ b/libavfilter/allfilters.c
@@ -229,6 +229,7 @@ extern AVFilter ff_vf_detelecine;
 extern AVFilter ff_vf_dilation;
 extern AVFilter ff_vf_dilation_opencl;
 extern AVFilter ff_vf_displace;
+extern AVFilter ff_vf_dnn_detect;
 extern AVFilter ff_vf_dnn_processing;
 extern AVFilter ff_vf_doubleweave;
 extern AVFilter ff_vf_drawbox;
diff --git a/libavfilter/dnn/dnn_backend_openvino.c b/libavfilter/dnn/dnn_backend_openvino.c
index 5be053b7f8..928d84b744 100644
--- a/libavfilter/dnn/dnn_backend_openvino.c
+++ b/libavfilter/dnn/dnn_backend_openvino.c
@@ -621,6 +621,12 @@ DNNReturnType ff_dnn_execute_model_ov(const DNNModel *model, const char *input_n
         return DNN_ERROR;
     }
 
+    if (model->func_type != DFT_PROCESS_FRAME) {
+        if (!out_frame) {
+            out_frame = in_frame;
+        }
+    }
+
     if (nb_output != 1) {
         // currently, the filter does not need multiple outputs,
         // so we just pending the support until we really need it.
@@ -674,6 +680,12 @@ DNNReturnType ff_dnn_execute_model_async_ov(const DNNModel *model, const char *i
         return DNN_ERROR;
     }
 
+    if (model->func_type != DFT_PROCESS_FRAME) {
+        if (!out_frame) {
+            out_frame = in_frame;
+        }
+    }
+
     task = av_malloc(sizeof(*task));
     if (!task) {
         av_log(ctx, AV_LOG_ERROR, "unable to alloc memory for task item.\n");
diff --git a/libavfilter/dnn_filter_common.c b/libavfilter/dnn_filter_common.c
index 413adba406..92b696e710 100644
--- a/libavfilter/dnn_filter_common.c
+++ b/libavfilter/dnn_filter_common.c
@@ -64,6 +64,13 @@ int ff_dnn_init(DnnContext *ctx, DNNFunctionType func_type, AVFilterContext *fil
     return 0;
 }
 
+int ff_dnn_set_proc(DnnContext *ctx, PRE_POST_PROC pre_proc, PRE_POST_PROC post_proc)
+{
+    ctx->model->pre_proc = pre_proc;
+    ctx->model->post_proc = post_proc;
+    return 0;
+}
+
 DNNReturnType ff_dnn_get_input(DnnContext *ctx, DNNData *input)
 {
     return ctx->model->get_input(ctx->model->model, input, ctx->model_inputname);
diff --git a/libavfilter/dnn_filter_common.h b/libavfilter/dnn_filter_common.h
index 79c4d3efe3..0e88b88bdd 100644
--- a/libavfilter/dnn_filter_common.h
+++ b/libavfilter/dnn_filter_common.h
@@ -48,6 +48,7 @@ typedef struct DnnContext {
 
 
 int ff_dnn_init(DnnContext *ctx, DNNFunctionType func_type, AVFilterContext *filter_ctx);
+int ff_dnn_set_proc(DnnContext *ctx, PRE_POST_PROC pre_proc, PRE_POST_PROC post_proc);
 DNNReturnType ff_dnn_get_input(DnnContext *ctx, DNNData *input);
 DNNReturnType ff_dnn_get_output(DnnContext *ctx, int input_width, int input_height, int *output_width, int *output_height);
 DNNReturnType ff_dnn_execute_model(DnnContext *ctx, AVFrame *in_frame, AVFrame *out_frame);
diff --git a/libavfilter/dnn_interface.h b/libavfilter/dnn_interface.h
index d3a0c58a61..90a08129f4 100644
--- a/libavfilter/dnn_interface.h
+++ b/libavfilter/dnn_interface.h
@@ -63,6 +63,8 @@ typedef struct DNNData{
     DNNColorOrder order;
 } DNNData;
 
+typedef int (*PRE_POST_PROC)(AVFrame *frame, DNNData *model, AVFilterContext *filter_ctx);
+
 typedef struct DNNModel{
     // Stores model that can be different for different backends.
     void *model;
@@ -80,10 +82,10 @@ typedef struct DNNModel{
                                 const char *output_name, int *output_width, int *output_height);
     // set the pre process to transfer data from AVFrame to DNNData
     // the default implementation within DNN is used if it is not provided by the filter
-    int (*pre_proc)(AVFrame *frame_in, DNNData *model_input, AVFilterContext *filter_ctx);
+    PRE_POST_PROC pre_proc;
     // set the post process to transfer data from DNNData to AVFrame
     // the default implementation within DNN is used if it is not provided by the filter
-    int (*post_proc)(AVFrame *frame_out, DNNData *model_output, AVFilterContext *filter_ctx);
+    PRE_POST_PROC post_proc;
 } DNNModel;
 
 // Stores pointers to functions for loading, executing, freeing DNN models for one of the backends.
diff --git a/libavfilter/vf_dnn_detect.c b/libavfilter/vf_dnn_detect.c
new file mode 100644
index 0000000000..bac5035ae8
--- /dev/null
+++ b/libavfilter/vf_dnn_detect.c
@@ -0,0 +1,356 @@
+/*
+ * Copyright (c) 2021
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+/**
+ * @file
+ * implementing an object detecting filter using deep learning networks.
+ */
+
+#include "libavformat/avio.h"
+#include "libavutil/opt.h"
+#include "libavutil/pixdesc.h"
+#include "libavutil/avassert.h"
+#include "libavutil/imgutils.h"
+#include "filters.h"
+#include "dnn_filter_common.h"
+#include "formats.h"
+#include "internal.h"
+#include "libavutil/time.h"
+#include "libavutil/dnn_bbox.h"
+
+typedef struct DnnDetectContext {
+    const AVClass *class;
+    DnnContext dnnctx;
+    float conf;
+    int model_input_width;
+    int model_input_height;
+} DnnDetectContext;
+
+#define OFFSET(x) offsetof(DnnDetectContext, dnnctx.x)
+#define OFFSET2(x) offsetof(DnnDetectContext, x)
+#define FLAGS AV_OPT_FLAG_FILTERING_PARAM | AV_OPT_FLAG_VIDEO_PARAM
+static const AVOption dnn_detect_options[] = {
+    { "dnn_backend", "DNN backend",                OFFSET(backend_type),     AV_OPT_TYPE_INT,       { .i64 = 2 },    INT_MIN, INT_MAX, FLAGS, "backend" },
+#if (CONFIG_LIBOPENVINO == 1)
+    { "openvino",    "openvino backend flag",      0,                        AV_OPT_TYPE_CONST,     { .i64 = 2 },    0, 0, FLAGS, "backend" },
+#endif
+    DNN_COMMON_OPTIONS
+    { "conf",       "threshold of confidence",    OFFSET2(conf),             AV_OPT_TYPE_FLOAT,     { .dbl = 0.5 },  0, 1, FLAGS},
+    { NULL }
+};
+
+AVFILTER_DEFINE_CLASS(dnn_detect);
+
+static int dnn_detect_post_proc(AVFrame *frame, DNNData *output, AVFilterContext *filter_ctx)
+{
+    DnnDetectContext *ctx = filter_ctx->priv;
+    float conf_threshold = ctx->conf;
+    int proposal_count = output->height;
+    int detect_size = output->width;
+    float *detections = output->data;
+    int nb_bbox = 0;
+    AVFrameSideData *sd;
+    AVDnnBoundingBox *bbox;
+
+    sd = av_frame_get_side_data(frame, AV_FRAME_DATA_DNN_BBOXES);
+    if (sd) {
+        av_log(filter_ctx, AV_LOG_ERROR, "already have dnn bounding boxes in side data.\n");
+        return -1;
+    }
+
+    for (int i = 0; i < proposal_count; ++i) {
+            float conf = detections[i * detect_size + 2];
+            if (conf < conf_threshold) {
+                continue;
+            }
+            nb_bbox++;
+    }
+
+    if (nb_bbox == 0) {
+        av_log(filter_ctx, AV_LOG_VERBOSE, "nothing detected in this frame.\n");
+        return 0;
+    }
+
+    sd = av_frame_new_side_data(frame, AV_FRAME_DATA_DNN_BBOXES,
+                                sizeof(AVDnnBoundingBox) * nb_bbox);
+    if (!sd) {
+        av_log(filter_ctx, AV_LOG_ERROR, "failed to allocate side data for AV_FRAME_DATA_DNN_BBOXES with %d bboxes\n", nb_bbox);
+        return -1;
+    }
+
+    bbox = (AVDnnBoundingBox *)sd->data;
+    for (int i = 0; i < proposal_count; ++i) {
+            int av_unused image_id = (int)detections[i * detect_size + 0];
+            int label = (int)detections[i * detect_size + 1];
+            float conf = detections[i * detect_size + 2];
+            float x0 = detections[i * detect_size + 3];
+            float y0 = detections[i * detect_size + 4];
+            float x1 = detections[i * detect_size + 5];
+            float y1 = detections[i * detect_size + 6];
+
+            if (conf < conf_threshold) {
+                continue;
+            }
+
+            *bbox = (AVDnnBoundingBox) {
+                .self_size = sizeof(*bbox),
+                .model_input_width = ctx->model_input_width,
+                .model_input_height = ctx->model_input_height,
+                .left = (int)(x0 * ctx->model_input_width),
+                .right = (int)(x1 * ctx->model_input_width),
+                .top = (int)(y0 * ctx->model_input_height),
+                .bottom = (int)(y1 * ctx->model_input_height),
+                .detect_label = label,
+                .detect_conf = av_make_q((int)(conf * 10000), 10000),
+                .classify_count = 0,
+            };
+
+            nb_bbox--;
+            if (nb_bbox == 0) {
+                break;
+            }
+            bbox++;
+    }
+
+    return 0;
+}
+
+static av_cold int dnn_detect_init(AVFilterContext *context)
+{
+    DNNReturnType result;
+    DNNData model_input;
+    DnnDetectContext *ctx = context->priv;
+    int ret = ff_dnn_init(&ctx->dnnctx, DFT_ANALYTICS_DETECT, context);
+    if (ret < 0)
+        return ret;
+    ff_dnn_set_proc(&ctx->dnnctx, NULL, dnn_detect_post_proc);
+
+    result = ff_dnn_get_input(&ctx->dnnctx, &model_input);
+    if (result != DNN_SUCCESS) {
+        av_log(ctx, AV_LOG_ERROR, "could not get input from the model.\n");
+        return AVERROR(EIO);
+    }
+
+    ctx->model_input_width = model_input.width;
+    ctx->model_input_height = model_input.height;
+    return 0;
+}
+
+static int dnn_detect_query_formats(AVFilterContext *context)
+{
+    static const enum AVPixelFormat pix_fmts[] = {
+        AV_PIX_FMT_RGB24, AV_PIX_FMT_BGR24,
+        AV_PIX_FMT_GRAY8, AV_PIX_FMT_GRAYF32,
+        AV_PIX_FMT_YUV420P, AV_PIX_FMT_YUV422P,
+        AV_PIX_FMT_YUV444P, AV_PIX_FMT_YUV410P, AV_PIX_FMT_YUV411P,
+        AV_PIX_FMT_NV12,
+        AV_PIX_FMT_NONE
+    };
+    AVFilterFormats *fmts_list = ff_make_format_list(pix_fmts);
+    return ff_set_common_formats(context, fmts_list);
+}
+
+static int dnn_detect_filter_frame(AVFilterLink *inlink, AVFrame *in)
+{
+    AVFilterContext *context  = inlink->dst;
+    AVFilterLink *outlink = context->outputs[0];
+    DnnDetectContext *ctx = context->priv;
+    DNNReturnType dnn_result;
+
+    dnn_result = ff_dnn_execute_model(&ctx->dnnctx, in, NULL);
+    if (dnn_result != DNN_SUCCESS){
+        av_log(ctx, AV_LOG_ERROR, "failed to execute model\n");
+        av_frame_free(&in);
+        return AVERROR(EIO);
+    }
+
+    return ff_filter_frame(outlink, in);
+}
+
+static int dnn_detect_activate_sync(AVFilterContext *filter_ctx)
+{
+    AVFilterLink *inlink = filter_ctx->inputs[0];
+    AVFilterLink *outlink = filter_ctx->outputs[0];
+    AVFrame *in = NULL;
+    int64_t pts;
+    int ret, status;
+    int got_frame = 0;
+
+    FF_FILTER_FORWARD_STATUS_BACK(outlink, inlink);
+
+    do {
+        // drain all input frames
+        ret = ff_inlink_consume_frame(inlink, &in);
+        if (ret < 0)
+            return ret;
+        if (ret > 0) {
+            ret = dnn_detect_filter_frame(inlink, in);
+            if (ret < 0)
+                return ret;
+            got_frame = 1;
+        }
+    } while (ret > 0);
+
+    // if frame got, schedule to next filter
+    if (got_frame)
+        return 0;
+
+    if (ff_inlink_acknowledge_status(inlink, &status, &pts)) {
+        if (status == AVERROR_EOF) {
+            ff_outlink_set_status(outlink, status, pts);
+            return ret;
+        }
+    }
+
+    FF_FILTER_FORWARD_WANTED(outlink, inlink);
+
+    return FFERROR_NOT_READY;
+}
+
+static int dnn_detect_flush_frame(AVFilterLink *outlink, int64_t pts, int64_t *out_pts)
+{
+    DnnDetectContext *ctx = outlink->src->priv;
+    int ret;
+    DNNAsyncStatusType async_state;
+
+    ret = ff_dnn_flush(&ctx->dnnctx);
+    if (ret != DNN_SUCCESS) {
+        return -1;
+    }
+
+    do {
+        AVFrame *in_frame = NULL;
+        AVFrame *out_frame = NULL;
+        async_state = ff_dnn_get_async_result(&ctx->dnnctx, &in_frame, &out_frame);
+        if (out_frame) {
+            av_assert0(in_frame == out_frame);
+            ret = ff_filter_frame(outlink, out_frame);
+            if (ret < 0)
+                return ret;
+            if (out_pts)
+                *out_pts = out_frame->pts + pts;
+        }
+        av_usleep(5000);
+    } while (async_state >= DAST_NOT_READY);
+
+    return 0;
+}
+
+static int dnn_detect_activate_async(AVFilterContext *filter_ctx)
+{
+    AVFilterLink *inlink = filter_ctx->inputs[0];
+    AVFilterLink *outlink = filter_ctx->outputs[0];
+    DnnDetectContext *ctx = filter_ctx->priv;
+    AVFrame *in = NULL;
+    int64_t pts;
+    int ret, status;
+    int got_frame = 0;
+    int async_state;
+
+    FF_FILTER_FORWARD_STATUS_BACK(outlink, inlink);
+
+    do {
+        // drain all input frames
+        ret = ff_inlink_consume_frame(inlink, &in);
+        if (ret < 0)
+            return ret;
+        if (ret > 0) {
+            if (ff_dnn_execute_model_async(&ctx->dnnctx, in, NULL) != DNN_SUCCESS) {
+                return AVERROR(EIO);
+            }
+        }
+    } while (ret > 0);
+
+    // drain all processed frames
+    do {
+        AVFrame *in_frame = NULL;
+        AVFrame *out_frame = NULL;
+        async_state = ff_dnn_get_async_result(&ctx->dnnctx, &in_frame, &out_frame);
+        if (out_frame) {
+            av_assert0(in_frame == out_frame);
+            ret = ff_filter_frame(outlink, out_frame);
+            if (ret < 0)
+                return ret;
+            got_frame = 1;
+        }
+    } while (async_state == DAST_SUCCESS);
+
+    // if frame got, schedule to next filter
+    if (got_frame)
+        return 0;
+
+    if (ff_inlink_acknowledge_status(inlink, &status, &pts)) {
+        if (status == AVERROR_EOF) {
+            int64_t out_pts = pts;
+            ret = dnn_detect_flush_frame(outlink, pts, &out_pts);
+            ff_outlink_set_status(outlink, status, out_pts);
+            return ret;
+        }
+    }
+
+    FF_FILTER_FORWARD_WANTED(outlink, inlink);
+
+    return 0;
+}
+
+static int dnn_detect_activate(AVFilterContext *filter_ctx)
+{
+    DnnDetectContext *ctx = filter_ctx->priv;
+
+    if (ctx->dnnctx.async)
+        return dnn_detect_activate_async(filter_ctx);
+    else
+        return dnn_detect_activate_sync(filter_ctx);
+}
+
+static av_cold void dnn_detect_uninit(AVFilterContext *ctx)
+{
+    DnnDetectContext *context = ctx->priv;
+    ff_dnn_uninit(&context->dnnctx);
+}
+
+static const AVFilterPad dnn_detect_inputs[] = {
+    {
+        .name         = "default",
+        .type         = AVMEDIA_TYPE_VIDEO,
+    },
+    { NULL }
+};
+
+static const AVFilterPad dnn_detect_outputs[] = {
+    {
+        .name = "default",
+        .type = AVMEDIA_TYPE_VIDEO,
+    },
+    { NULL }
+};
+
+AVFilter ff_vf_dnn_detect = {
+    .name          = "dnn_detect",
+    .description   = NULL_IF_CONFIG_SMALL("Apply DNN detect filter to the input."),
+    .priv_size     = sizeof(DnnDetectContext),
+    .init          = dnn_detect_init,
+    .uninit        = dnn_detect_uninit,
+    .query_formats = dnn_detect_query_formats,
+    .inputs        = dnn_detect_inputs,
+    .outputs       = dnn_detect_outputs,
+    .priv_class    = &dnn_detect_class,
+    .activate      = dnn_detect_activate,
+};