From patchwork Fri Jan 8 08:36:34 2021 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: "Guo, Yejun" X-Patchwork-Id: 24845 Return-Path: X-Original-To: patchwork@ffaux-bg.ffmpeg.org Delivered-To: patchwork@ffaux-bg.ffmpeg.org Received: from ffbox0-bg.mplayerhq.hu (ffbox0-bg.ffmpeg.org [79.124.17.100]) by ffaux.localdomain (Postfix) with ESMTP id 0E9E744AC32 for ; Fri, 8 Jan 2021 10:38:20 +0200 (EET) Received: from [127.0.1.1] (localhost [127.0.0.1]) by ffbox0-bg.mplayerhq.hu (Postfix) with ESMTP id D97C7689AB4; Fri, 8 Jan 2021 10:38:19 +0200 (EET) X-Original-To: ffmpeg-devel@ffmpeg.org Delivered-To: ffmpeg-devel@ffmpeg.org Received: from mga07.intel.com (mga07.intel.com [134.134.136.100]) by ffbox0-bg.mplayerhq.hu (Postfix) with ESMTPS id 1D93C6880F8 for ; Fri, 8 Jan 2021 10:38:12 +0200 (EET) IronPort-SDR: x7CWSMDwTnkWyuPYaE0gMKw/M0BQjKJLGuU3Lk7wAAmGf4/A8APnJ52XE87LJ5h0Ujn8gck3XI nSUYF81tbrIQ== X-IronPort-AV: E=McAfee;i="6000,8403,9857"; a="241637751" X-IronPort-AV: E=Sophos;i="5.79,330,1602572400"; d="scan'208";a="241637751" Received: from orsmga008.jf.intel.com ([10.7.209.65]) by orsmga105.jf.intel.com with ESMTP/TLS/ECDHE-RSA-AES256-GCM-SHA384; 08 Jan 2021 00:38:09 -0800 IronPort-SDR: rlHjrIUnP5Lu8cGQjOfEUOOX08f+eaMm0bw+4Iv/DWjQOZLc3ld5GuXnU5iQT9x7k0v96a8mZJ +E5A3j9DXrQQ== X-ExtLoop1: 1 X-IronPort-AV: E=Sophos;i="5.79,330,1602572400"; d="scan'208";a="380054352" Received: from yguo18-skl-u1604.sh.intel.com ([10.239.159.53]) by orsmga008.jf.intel.com with ESMTP; 08 Jan 2021 00:38:03 -0800 From: "Guo, Yejun" To: ffmpeg-devel@ffmpeg.org Date: Fri, 8 Jan 2021 16:36:34 +0800 Message-Id: <20210108083634.16805-1-yejun.guo@intel.com> X-Mailer: git-send-email 2.17.1 Subject: [FFmpeg-devel] [PATCH] libavfilter/dnn: add batch mode for async execution X-BeenThere: ffmpeg-devel@ffmpeg.org X-Mailman-Version: 2.1.20 Precedence: list List-Id: FFmpeg development discussions and patches List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , Reply-To: FFmpeg development discussions and patches Cc: yejun.guo@intel.com MIME-Version: 1.0 Errors-To: ffmpeg-devel-bounces@ffmpeg.org Sender: "ffmpeg-devel" the default number of batch_size is 1 Signed-off-by: Xie, Lin Signed-off-by: Wu Zhiwen Signed-off-by: Guo, Yejun --- libavfilter/dnn/dnn_backend_openvino.c | 157 +++++++++++++++++++++---- libavfilter/dnn/dnn_backend_openvino.h | 1 + libavfilter/dnn/dnn_interface.c | 1 + libavfilter/dnn_interface.h | 2 + libavfilter/vf_dnn_processing.c | 36 +++++- 5 files changed, 173 insertions(+), 24 deletions(-) diff --git a/libavfilter/dnn/dnn_backend_openvino.c b/libavfilter/dnn/dnn_backend_openvino.c index d27e451eea..cb1bc3d22d 100644 --- a/libavfilter/dnn/dnn_backend_openvino.c +++ b/libavfilter/dnn/dnn_backend_openvino.c @@ -37,6 +37,7 @@ typedef struct OVOptions{ char *device_type; int nireq; + int batch_size; } OVOptions; typedef struct OVContext { @@ -70,7 +71,8 @@ typedef struct TaskItem { typedef struct RequestItem { ie_infer_request_t *infer_request; - TaskItem *task; + TaskItem **tasks; + int task_count; ie_complete_call_back_t callback; } RequestItem; @@ -83,6 +85,7 @@ typedef struct RequestItem { static const AVOption dnn_openvino_options[] = { { "device", "device to run model", OFFSET(options.device_type), AV_OPT_TYPE_STRING, { .str = "CPU" }, 0, 0, FLAGS }, { "nireq", "number of request", OFFSET(options.nireq), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, INT_MAX, FLAGS }, + { "batch_size", "batch size per request", OFFSET(options.batch_size), AV_OPT_TYPE_INT, { .i64 = 1 }, 1, 1000, FLAGS}, { NULL } }; @@ -100,7 +103,19 @@ static DNNDataType precision_to_datatype(precision_e precision) } } -static DNNReturnType fill_model_input_ov(OVModel *ov_model, TaskItem *task, RequestItem *request) +static int get_datatype_size(DNNDataType dt) +{ + switch (dt) + { + case DNN_FLOAT: + return sizeof(float); + default: + av_assert0(!"not supported yet."); + return 1; + } +} + +static DNNReturnType fill_model_input_ov(OVModel *ov_model, RequestItem *request) { dimensions_t dims; precision_e precision; @@ -109,6 +124,7 @@ static DNNReturnType fill_model_input_ov(OVModel *ov_model, TaskItem *task, Requ IEStatusCode status; DNNData input; ie_blob_t *input_blob = NULL; + TaskItem *task = request->tasks[0]; status = ie_infer_request_get_blob(request->infer_request, task->input_name, &input_blob); if (status != OK) { @@ -134,12 +150,19 @@ static DNNReturnType fill_model_input_ov(OVModel *ov_model, TaskItem *task, Requ input.channels = dims.dims[1]; input.data = blob_buffer.buffer; input.dt = precision_to_datatype(precision); - if (task->do_ioproc) { - if (ov_model->model->pre_proc != NULL) { - ov_model->model->pre_proc(task->in_frame, &input, ov_model->model->filter_ctx); - } else { - proc_from_frame_to_dnn(task->in_frame, &input, ctx); + + av_assert0(request->task_count <= dims.dims[0]); + for (int i = 0; i < request->task_count; ++i) { + task = request->tasks[i]; + if (task->do_ioproc) { + if (ov_model->model->pre_proc != NULL) { + ov_model->model->pre_proc(task->in_frame, &input, ov_model->model->filter_ctx); + } else { + proc_from_frame_to_dnn(task->in_frame, &input, ctx); + } } + input.data = (uint8_t *)input.data + + input.width * input.height * input.channels * get_datatype_size(input.dt); } ie_blob_free(&input_blob); @@ -152,7 +175,7 @@ static void infer_completion_callback(void *args) precision_e precision; IEStatusCode status; RequestItem *request = args; - TaskItem *task = request->task; + TaskItem *task = request->tasks[0]; ie_blob_t *output_blob = NULL; ie_blob_buffer_t blob_buffer; DNNData output; @@ -194,41 +217,65 @@ static void infer_completion_callback(void *args) output.width = dims.dims[3]; output.dt = precision_to_datatype(precision); output.data = blob_buffer.buffer; - if (task->do_ioproc) { - if (task->ov_model->model->post_proc != NULL) { - task->ov_model->model->post_proc(task->out_frame, &output, task->ov_model->model->filter_ctx); + + av_assert0(request->task_count <= dims.dims[0]); + for (int i = 0; i < request->task_count; ++i) { + task = request->tasks[i]; + if (task->do_ioproc) { + if (task->ov_model->model->post_proc != NULL) { + task->ov_model->model->post_proc(task->out_frame, &output, task->ov_model->model->filter_ctx); + } else { + proc_from_dnn_to_frame(task->out_frame, &output, ctx); + } } else { - proc_from_dnn_to_frame(task->out_frame, &output, ctx); + task->out_frame->width = output.width; + task->out_frame->height = output.height; } - } else { - task->out_frame->width = output.width; - task->out_frame->height = output.height; + task->done = 1; + output.data = (uint8_t *)output.data + + output.width * output.height * output.channels * get_datatype_size(output.dt); } ie_blob_free(&output_blob); + request->task_count = 0; + if (task->async) { - request->task = NULL; if (ff_safe_queue_push_back(task->ov_model->request_queue, request) < 0) { av_log(ctx, AV_LOG_ERROR, "Failed to push back request_queue.\n"); return; } } - - task->done = 1; } static DNNReturnType execute_model_ov(TaskItem *task, RequestItem *request) { IEStatusCode status; + DNNReturnType ret; OVContext *ctx = &task->ov_model->ctx; - DNNReturnType ret = fill_model_input_ov(task->ov_model, task, request); - if (ret != DNN_SUCCESS) { - return ret; + if (!request->tasks) { + request->tasks = av_malloc_array(ctx->options.batch_size, sizeof(*request->tasks)); + if (!request->tasks) { + av_log(ctx, AV_LOG_ERROR, "Failed to allocate memory for tasks\n"); + return DNN_ERROR; + } + request->task_count = 0; } + request->tasks[request->task_count++] = task; + if (task->async) { - request->task = task; + if (request->task_count < ctx->options.batch_size) { + if (ff_safe_queue_push_back(task->ov_model->request_queue, request) < 0) { + av_log(ctx, AV_LOG_ERROR, "Failed to push back request_queue.\n"); + return DNN_ERROR; + } + return DNN_SUCCESS; + } + ret = fill_model_input_ov(task->ov_model, request); + if (ret != DNN_SUCCESS) { + return ret; + } status = ie_infer_set_completion_callback(request->infer_request, &request->callback); if (status != OK) { av_log(ctx, AV_LOG_ERROR, "Failed to set completion callback for inference\n"); @@ -241,12 +288,15 @@ static DNNReturnType execute_model_ov(TaskItem *task, RequestItem *request) } return DNN_SUCCESS; } else { + ret = fill_model_input_ov(task->ov_model, request); + if (ret != DNN_SUCCESS) { + return ret; + } status = ie_infer_request_infer(request->infer_request); if (status != OK) { av_log(ctx, AV_LOG_ERROR, "Failed to start synchronous model inference\n"); return DNN_ERROR; } - request->task = task; infer_completion_callback(request); return task->done ? DNN_SUCCESS : DNN_ERROR; } @@ -343,6 +393,8 @@ static DNNReturnType get_output_ov(void *model, const char *input_name, int inpu task.ov_model = ov_model; request.infer_request = ov_model->infer_request; + request.tasks = NULL; + request.task_count = 0; ret = execute_model_ov(&task, &request); *output_width = out_frame->width; @@ -393,6 +445,23 @@ DNNModel *ff_dnn_load_model_ov(const char *model_filename, const char *options, if (status != OK) goto err; + if (ctx->options.batch_size <= 0) { + ctx->options.batch_size = 1; + } + + if (ctx->options.batch_size > 1) { + input_shapes_t input_shapes; + status = ie_network_get_input_shapes(ov_model->network, &input_shapes); + if (status != OK) + goto err; + for (int i = 0; i < input_shapes.shape_num; i++) + input_shapes.shapes[i].shape.dims[0] = ctx->options.batch_size; + status = ie_network_reshape(ov_model->network, input_shapes); + ie_network_input_shapes_free(&input_shapes); + if (status != OK) + goto err; + } + status = ie_core_load_network(ov_model->core, ov_model->network, ctx->options.device_type, &config, &ov_model->exe_network); if (status != OK) { av_log(ctx, AV_LOG_ERROR, "Failed to init OpenVINO model\n"); @@ -497,6 +566,8 @@ DNNReturnType ff_dnn_execute_model_ov(const DNNModel *model, const char *input_n task.ov_model = ov_model; request.infer_request = ov_model->infer_request; + request.tasks = NULL; + request.task_count = 0; return execute_model_ov(&task, &request); } @@ -569,6 +640,43 @@ DNNAsyncStatusType ff_dnn_get_async_result_ov(const DNNModel *model, AVFrame **i return DAST_SUCCESS; } +DNNReturnType ff_dnn_flush_ov(const DNNModel *model) +{ + OVModel *ov_model = (OVModel *)model->model; + OVContext *ctx = &ov_model->ctx; + RequestItem *request; + IEStatusCode status; + DNNReturnType ret; + + request = ff_safe_queue_pop_front(ov_model->request_queue); + if (!request) { + av_log(ctx, AV_LOG_ERROR, "unable to get infer request.\n"); + return DNN_ERROR; + } + + if (request->task_count == 0) { + // no pending task need to flush + return DNN_ERROR; + } + + ret = fill_model_input_ov(ov_model, request); + if (ret != DNN_SUCCESS) { + return ret; + } + status = ie_infer_set_completion_callback(request->infer_request, &request->callback); + if (status != OK) { + av_log(ctx, AV_LOG_ERROR, "Failed to set completion callback for inference\n"); + return DNN_ERROR; + } + status = ie_infer_request_infer_async(request->infer_request); + if (status != OK) { + av_log(ctx, AV_LOG_ERROR, "Failed to start async inference\n"); + return DNN_ERROR; + } + + return DNN_SUCCESS; +} + void ff_dnn_free_model_ov(DNNModel **model) { if (*model){ @@ -578,12 +686,15 @@ void ff_dnn_free_model_ov(DNNModel **model) if (item && item->infer_request) { ie_infer_request_free(&item->infer_request); } + av_freep(&item->tasks); av_freep(&item); } ff_safe_queue_destroy(ov_model->request_queue); while (ff_queue_size(ov_model->task_queue) != 0) { TaskItem *item = ff_queue_pop_front(ov_model->task_queue); + av_frame_free(&item->in_frame); + av_frame_free(&item->out_frame); av_freep(&item); } ff_queue_destroy(ov_model->task_queue); diff --git a/libavfilter/dnn/dnn_backend_openvino.h b/libavfilter/dnn/dnn_backend_openvino.h index 1b70150040..23b819440e 100644 --- a/libavfilter/dnn/dnn_backend_openvino.h +++ b/libavfilter/dnn/dnn_backend_openvino.h @@ -36,6 +36,7 @@ DNNReturnType ff_dnn_execute_model_ov(const DNNModel *model, const char *input_n DNNReturnType ff_dnn_execute_model_async_ov(const DNNModel *model, const char *input_name, AVFrame *in_frame, const char **output_names, uint32_t nb_output, AVFrame *out_frame); DNNAsyncStatusType ff_dnn_get_async_result_ov(const DNNModel *model, AVFrame **in, AVFrame **out); +DNNReturnType ff_dnn_flush_ov(const DNNModel *model); void ff_dnn_free_model_ov(DNNModel **model); diff --git a/libavfilter/dnn/dnn_interface.c b/libavfilter/dnn/dnn_interface.c index e1b41a21e1..02e532fc1b 100644 --- a/libavfilter/dnn/dnn_interface.c +++ b/libavfilter/dnn/dnn_interface.c @@ -60,6 +60,7 @@ DNNModule *ff_get_dnn_module(DNNBackendType backend_type) dnn_module->execute_model = &ff_dnn_execute_model_ov; dnn_module->execute_model_async = &ff_dnn_execute_model_async_ov; dnn_module->get_async_result = &ff_dnn_get_async_result_ov; + dnn_module->flush = &ff_dnn_flush_ov; dnn_module->free_model = &ff_dnn_free_model_ov; #else av_freep(&dnn_module); diff --git a/libavfilter/dnn_interface.h b/libavfilter/dnn_interface.h index 9533c88829..ff338ea084 100644 --- a/libavfilter/dnn_interface.h +++ b/libavfilter/dnn_interface.h @@ -82,6 +82,8 @@ typedef struct DNNModule{ const char **output_names, uint32_t nb_output, AVFrame *out_frame); // Retrieve inference result. DNNAsyncStatusType (*get_async_result)(const DNNModel *model, AVFrame **in, AVFrame **out); + // Flush all the pending tasks. + DNNReturnType (*flush)(const DNNModel *model); // Frees memory allocated for model. void (*free_model)(DNNModel **model); } DNNModule; diff --git a/libavfilter/vf_dnn_processing.c b/libavfilter/vf_dnn_processing.c index fff5696a31..5033bbf482 100644 --- a/libavfilter/vf_dnn_processing.c +++ b/libavfilter/vf_dnn_processing.c @@ -33,6 +33,7 @@ #include "formats.h" #include "internal.h" #include "libswscale/swscale.h" +#include "libavutil/time.h" typedef struct DnnProcessingContext { const AVClass *class; @@ -329,6 +330,37 @@ static int filter_frame(AVFilterLink *inlink, AVFrame *in) return ff_filter_frame(outlink, out); } +static int flush_frame(AVFilterLink *outlink, int64_t pts, int64_t *out_pts) +{ + DnnProcessingContext *ctx = outlink->src->priv; + DNNReturnType ret; + DNNAsyncStatusType async_state; + + ret = (ctx->dnn_module->flush)(ctx->model); + if (ret != DNN_SUCCESS) { + return -1; + } + + do { + AVFrame *in_frame = NULL; + AVFrame *out_frame = NULL; + async_state = (ctx->dnn_module->get_async_result)(ctx->model, &in_frame, &out_frame); + if (out_frame) { + if (isPlanarYUV(in_frame->format)) + copy_uv_planes(ctx, out_frame, in_frame); + av_frame_free(&in_frame); + ret = ff_filter_frame(outlink, out_frame); + if (ret < 0) + return ret; + if (out_pts) + *out_pts = out_frame->pts + pts; + } + av_usleep(5000); + } while (async_state >= DAST_NOT_READY); + + return 0; +} + static int activate_sync(AVFilterContext *filter_ctx) { AVFilterLink *inlink = filter_ctx->inputs[0]; @@ -423,7 +455,9 @@ static int activate_async(AVFilterContext *filter_ctx) if (ff_inlink_acknowledge_status(inlink, &status, &pts)) { if (status == AVERROR_EOF) { - ff_outlink_set_status(outlink, status, pts); + int64_t out_pts = pts; + ret = flush_frame(outlink, pts, &out_pts); + ff_outlink_set_status(outlink, status, out_pts); return ret; } }