Message ID | 20210808105539.18822-9-shubhanshu.e01@gmail.com |
---|---|
State | Accepted |
Commit | 2063745a93f3a17ee14ffd01905878b8fdc17023 |
Headers | show |
Series | [FFmpeg-devel,v3,1/9,GSoC] lavfi/dnn: Add Async Execution Mechanism and Documentation | expand |
Context | Check | Description |
---|---|---|
andriy/x86_make | success | Make finished |
andriy/x86_make_fate | success | Make fate finished |
andriy/PPC64_make | success | Make finished |
andriy/PPC64_make_fate | success | Make fate finished |
> -----Original Message----- > From: ffmpeg-devel <ffmpeg-devel-bounces@ffmpeg.org> On Behalf Of > Shubhanshu Saxena > Sent: 2021年8月8日 18:56 > To: ffmpeg-devel@ffmpeg.org > Cc: Shubhanshu Saxena <shubhanshu.e01@gmail.com> > Subject: [FFmpeg-devel] [PATCH v3 9/9] [GSoC] lavfi/dnn: > DNNAsyncExecModule Execution Failure Handling > > This commit adds the case handling if the asynchronous execution of a request > fails by checking the exit status of the thread when joining before starting > another execution. On failure, it does the cleanup as well. > > Signed-off-by: Shubhanshu Saxena <shubhanshu.e01@gmail.com> > --- > libavfilter/dnn/dnn_backend_common.c | 23 +++++++++++++++++++---- > libavfilter/dnn/dnn_backend_tf.c | 10 +++++++++- > 2 files changed, 28 insertions(+), 5 deletions(-) > > diff --git a/libavfilter/dnn/dnn_backend_common.c > b/libavfilter/dnn/dnn_backend_common.c > index 470fffa2ae..426683b73d 100644 > --- a/libavfilter/dnn/dnn_backend_common.c > +++ b/libavfilter/dnn/dnn_backend_common.c > @@ -23,6 +23,9 @@ > > #include "dnn_backend_common.h" > > +#define DNN_ASYNC_SUCCESS (void *)0 > +#define DNN_ASYNC_FAIL (void *)-1 > + > int ff_check_exec_params(void *ctx, DNNBackendType backend, > DNNFunctionType func_type, DNNExecBaseParams *exec_params) { > if (!exec_params) { > @@ -79,18 +82,25 @@ static void *async_thread_routine(void *args) > DNNAsyncExecModule *async_module = args; > void *request = async_module->args; > > - async_module->start_inference(request); > + if (async_module->start_inference(request) != DNN_SUCCESS) { > + return DNN_ASYNC_FAIL; > + } > async_module->callback(request); > - return NULL; > + return DNN_ASYNC_SUCCESS; > } > > DNNReturnType ff_dnn_async_module_cleanup(DNNAsyncExecModule > *async_module) { > + void *status = 0; > if (!async_module) { > return DNN_ERROR; > } > #if HAVE_PTHREAD_CANCEL > - pthread_join(async_module->thread_id, NULL); > + pthread_join(async_module->thread_id, &status); > + if (status == DNN_ASYNC_FAIL) { > + av_log(NULL, AV_LOG_ERROR, "Last Inference Failed.\n"); > + return DNN_ERROR; > + } > #endif > async_module->start_inference = NULL; > async_module->callback = NULL; > @@ -101,6 +111,7 @@ DNNReturnType > ff_dnn_async_module_cleanup(DNNAsyncExecModule *async_module) > DNNReturnType ff_dnn_start_inference_async(void *ctx, > DNNAsyncExecModule *async_module) { > int ret; > + void *status = 0; > > if (!async_module) { > av_log(ctx, AV_LOG_ERROR, "async_module is null when starting async > inference.\n"); @@ -108,7 +119,11 @@ DNNReturnType > ff_dnn_start_inference_async(void *ctx, DNNAsyncExecModule *async_ > } > > #if HAVE_PTHREAD_CANCEL > - pthread_join(async_module->thread_id, NULL); > + pthread_join(async_module->thread_id, &status); > + if (status == DNN_ASYNC_FAIL) { > + av_log(ctx, AV_LOG_ERROR, "Unable to start inference as previous > inference failed.\n"); > + return DNN_ERROR; > + } > ret = pthread_create(&async_module->thread_id, NULL, > async_thread_routine, async_module); > if (ret != 0) { > av_log(ctx, AV_LOG_ERROR, "Unable to start async inference.\n"); diff --git > a/libavfilter/dnn/dnn_backend_tf.c b/libavfilter/dnn/dnn_backend_tf.c > index fb3f6f5ea6..ffec1b1328 100644 > --- a/libavfilter/dnn/dnn_backend_tf.c > +++ b/libavfilter/dnn/dnn_backend_tf.c > @@ -91,6 +91,7 @@ AVFILTER_DEFINE_CLASS(dnn_tensorflow); > > static DNNReturnType execute_model_tf(TFRequestItem *request, Queue > *inference_queue); static void infer_completion_callback(void *args); > +static inline void destroy_request_item(TFRequestItem **arg); > > static void free_buffer(void *data, size_t length) { @@ -172,6 +173,10 @@ > static DNNReturnType tf_start_inference(void *args) > request->status); > if (TF_GetCode(request->status) != TF_OK) { > av_log(&tf_model->ctx, AV_LOG_ERROR, "%s", TF_Message(request- > >status)); > + tf_free_request(infer_request); > + if (ff_safe_queue_push_back(tf_model->request_queue, request) < 0) { > + destroy_request_item(&request); > + } > return DNN_ERROR; > } > return DNN_SUCCESS; > @@ -1095,7 +1100,10 @@ static DNNReturnType > execute_model_tf(TFRequestItem *request, Queue *inference_q > } > > if (task->async) { > - return ff_dnn_start_inference_async(ctx, &request->exec_module); > + if (ff_dnn_start_inference_async(ctx, &request->exec_module) != > DNN_SUCCESS) { > + goto err; > + } > + return DNN_SUCCESS; > } else { > if (tf_start_inference(request) != DNN_SUCCESS) { > goto err; > -- > 2.25.1 LGTM, those patches function well and tensorflow backend performs much better. > > _______________________________________________ > ffmpeg-devel mailing list > ffmpeg-devel@ffmpeg.org > https://ffmpeg.org/mailman/listinfo/ffmpeg-devel > > To unsubscribe, visit link above, or email ffmpeg-devel-request@ffmpeg.org > with subject "unsubscribe".
> -----Original Message----- > From: ffmpeg-devel <ffmpeg-devel-bounces@ffmpeg.org> On Behalf Of Fu, > Ting > Sent: 2021年8月9日 18:13 > To: FFmpeg development discussions and patches <ffmpeg- > devel@ffmpeg.org> > Subject: Re: [FFmpeg-devel] [PATCH v3 9/9] [GSoC] lavfi/dnn: > DNNAsyncExecModule Execution Failure Handling > > > > > -----Original Message----- > > From: ffmpeg-devel <ffmpeg-devel-bounces@ffmpeg.org> On Behalf Of > > Shubhanshu Saxena > > Sent: 2021年8月8日 18:56 > > To: ffmpeg-devel@ffmpeg.org > > Cc: Shubhanshu Saxena <shubhanshu.e01@gmail.com> > > Subject: [FFmpeg-devel] [PATCH v3 9/9] [GSoC] lavfi/dnn: > > DNNAsyncExecModule Execution Failure Handling > > > > This commit adds the case handling if the asynchronous execution of a > > request fails by checking the exit status of the thread when joining > > before starting another execution. On failure, it does the cleanup as well. > > > > Signed-off-by: Shubhanshu Saxena <shubhanshu.e01@gmail.com> > > --- > > libavfilter/dnn/dnn_backend_common.c | 23 +++++++++++++++++++---- > > libavfilter/dnn/dnn_backend_tf.c | 10 +++++++++- > > 2 files changed, 28 insertions(+), 5 deletions(-) > > > > diff --git a/libavfilter/dnn/dnn_backend_common.c > > b/libavfilter/dnn/dnn_backend_common.c > > index 470fffa2ae..426683b73d 100644 > > --- a/libavfilter/dnn/dnn_backend_common.c > > +++ b/libavfilter/dnn/dnn_backend_common.c > > @@ -23,6 +23,9 @@ > > > > #include "dnn_backend_common.h" > > > > +#define DNN_ASYNC_SUCCESS (void *)0 > > +#define DNN_ASYNC_FAIL (void *)-1 > > + > > int ff_check_exec_params(void *ctx, DNNBackendType backend, > > DNNFunctionType func_type, DNNExecBaseParams *exec_params) { > > if (!exec_params) { > > @@ -79,18 +82,25 @@ static void *async_thread_routine(void *args) > > DNNAsyncExecModule *async_module = args; > > void *request = async_module->args; > > > > - async_module->start_inference(request); > > + if (async_module->start_inference(request) != DNN_SUCCESS) { > > + return DNN_ASYNC_FAIL; > > + } > > async_module->callback(request); > > - return NULL; > > + return DNN_ASYNC_SUCCESS; > > } > > > > DNNReturnType ff_dnn_async_module_cleanup(DNNAsyncExecModule > > *async_module) { > > + void *status = 0; > > if (!async_module) { > > return DNN_ERROR; > > } > > #if HAVE_PTHREAD_CANCEL > > - pthread_join(async_module->thread_id, NULL); > > + pthread_join(async_module->thread_id, &status); > > + if (status == DNN_ASYNC_FAIL) { > > + av_log(NULL, AV_LOG_ERROR, "Last Inference Failed.\n"); > > + return DNN_ERROR; > > + } > > #endif > > async_module->start_inference = NULL; > > async_module->callback = NULL; > > @@ -101,6 +111,7 @@ DNNReturnType > > ff_dnn_async_module_cleanup(DNNAsyncExecModule *async_module) > > DNNReturnType ff_dnn_start_inference_async(void *ctx, > > DNNAsyncExecModule *async_module) { > > int ret; > > + void *status = 0; > > > > if (!async_module) { > > av_log(ctx, AV_LOG_ERROR, "async_module is null when starting > > async inference.\n"); @@ -108,7 +119,11 @@ DNNReturnType > > ff_dnn_start_inference_async(void *ctx, DNNAsyncExecModule *async_ > > } > > > > #if HAVE_PTHREAD_CANCEL > > - pthread_join(async_module->thread_id, NULL); > > + pthread_join(async_module->thread_id, &status); > > + if (status == DNN_ASYNC_FAIL) { > > + av_log(ctx, AV_LOG_ERROR, "Unable to start inference as > > + previous > > inference failed.\n"); > > + return DNN_ERROR; > > + } > > ret = pthread_create(&async_module->thread_id, NULL, > > async_thread_routine, async_module); > > if (ret != 0) { > > av_log(ctx, AV_LOG_ERROR, "Unable to start async > > inference.\n"); diff --git a/libavfilter/dnn/dnn_backend_tf.c > > b/libavfilter/dnn/dnn_backend_tf.c > > index fb3f6f5ea6..ffec1b1328 100644 > > --- a/libavfilter/dnn/dnn_backend_tf.c > > +++ b/libavfilter/dnn/dnn_backend_tf.c > > @@ -91,6 +91,7 @@ AVFILTER_DEFINE_CLASS(dnn_tensorflow); > > > > static DNNReturnType execute_model_tf(TFRequestItem *request, > Queue > > *inference_queue); static void infer_completion_callback(void *args); > > +static inline void destroy_request_item(TFRequestItem **arg); > > > > static void free_buffer(void *data, size_t length) { @@ -172,6 > > +173,10 @@ static DNNReturnType tf_start_inference(void *args) > > request->status); > > if (TF_GetCode(request->status) != TF_OK) { > > av_log(&tf_model->ctx, AV_LOG_ERROR, "%s", > > TF_Message(request- > > >status)); > > + tf_free_request(infer_request); > > + if (ff_safe_queue_push_back(tf_model->request_queue, request) < > 0) { > > + destroy_request_item(&request); > > + } > > return DNN_ERROR; > > } > > return DNN_SUCCESS; > > @@ -1095,7 +1100,10 @@ static DNNReturnType > > execute_model_tf(TFRequestItem *request, Queue *inference_q > > } > > > > if (task->async) { > > - return ff_dnn_start_inference_async(ctx, &request->exec_module); > > + if (ff_dnn_start_inference_async(ctx, &request->exec_module) > > + != > > DNN_SUCCESS) { > > + goto err; > > + } > > + return DNN_SUCCESS; > > } else { > > if (tf_start_inference(request) != DNN_SUCCESS) { > > goto err; > > -- > > 2.25.1 > > LGTM, those patches function well and tensorflow backend performs much > better. > Thanks for the review, will push tomorrow.
diff --git a/libavfilter/dnn/dnn_backend_common.c b/libavfilter/dnn/dnn_backend_common.c index 470fffa2ae..426683b73d 100644 --- a/libavfilter/dnn/dnn_backend_common.c +++ b/libavfilter/dnn/dnn_backend_common.c @@ -23,6 +23,9 @@ #include "dnn_backend_common.h" +#define DNN_ASYNC_SUCCESS (void *)0 +#define DNN_ASYNC_FAIL (void *)-1 + int ff_check_exec_params(void *ctx, DNNBackendType backend, DNNFunctionType func_type, DNNExecBaseParams *exec_params) { if (!exec_params) { @@ -79,18 +82,25 @@ static void *async_thread_routine(void *args) DNNAsyncExecModule *async_module = args; void *request = async_module->args; - async_module->start_inference(request); + if (async_module->start_inference(request) != DNN_SUCCESS) { + return DNN_ASYNC_FAIL; + } async_module->callback(request); - return NULL; + return DNN_ASYNC_SUCCESS; } DNNReturnType ff_dnn_async_module_cleanup(DNNAsyncExecModule *async_module) { + void *status = 0; if (!async_module) { return DNN_ERROR; } #if HAVE_PTHREAD_CANCEL - pthread_join(async_module->thread_id, NULL); + pthread_join(async_module->thread_id, &status); + if (status == DNN_ASYNC_FAIL) { + av_log(NULL, AV_LOG_ERROR, "Last Inference Failed.\n"); + return DNN_ERROR; + } #endif async_module->start_inference = NULL; async_module->callback = NULL; @@ -101,6 +111,7 @@ DNNReturnType ff_dnn_async_module_cleanup(DNNAsyncExecModule *async_module) DNNReturnType ff_dnn_start_inference_async(void *ctx, DNNAsyncExecModule *async_module) { int ret; + void *status = 0; if (!async_module) { av_log(ctx, AV_LOG_ERROR, "async_module is null when starting async inference.\n"); @@ -108,7 +119,11 @@ DNNReturnType ff_dnn_start_inference_async(void *ctx, DNNAsyncExecModule *async_ } #if HAVE_PTHREAD_CANCEL - pthread_join(async_module->thread_id, NULL); + pthread_join(async_module->thread_id, &status); + if (status == DNN_ASYNC_FAIL) { + av_log(ctx, AV_LOG_ERROR, "Unable to start inference as previous inference failed.\n"); + return DNN_ERROR; + } ret = pthread_create(&async_module->thread_id, NULL, async_thread_routine, async_module); if (ret != 0) { av_log(ctx, AV_LOG_ERROR, "Unable to start async inference.\n"); diff --git a/libavfilter/dnn/dnn_backend_tf.c b/libavfilter/dnn/dnn_backend_tf.c index fb3f6f5ea6..ffec1b1328 100644 --- a/libavfilter/dnn/dnn_backend_tf.c +++ b/libavfilter/dnn/dnn_backend_tf.c @@ -91,6 +91,7 @@ AVFILTER_DEFINE_CLASS(dnn_tensorflow); static DNNReturnType execute_model_tf(TFRequestItem *request, Queue *inference_queue); static void infer_completion_callback(void *args); +static inline void destroy_request_item(TFRequestItem **arg); static void free_buffer(void *data, size_t length) { @@ -172,6 +173,10 @@ static DNNReturnType tf_start_inference(void *args) request->status); if (TF_GetCode(request->status) != TF_OK) { av_log(&tf_model->ctx, AV_LOG_ERROR, "%s", TF_Message(request->status)); + tf_free_request(infer_request); + if (ff_safe_queue_push_back(tf_model->request_queue, request) < 0) { + destroy_request_item(&request); + } return DNN_ERROR; } return DNN_SUCCESS; @@ -1095,7 +1100,10 @@ static DNNReturnType execute_model_tf(TFRequestItem *request, Queue *inference_q } if (task->async) { - return ff_dnn_start_inference_async(ctx, &request->exec_module); + if (ff_dnn_start_inference_async(ctx, &request->exec_module) != DNN_SUCCESS) { + goto err; + } + return DNN_SUCCESS; } else { if (tf_start_inference(request) != DNN_SUCCESS) { goto err;
This commit adds the case handling if the asynchronous execution of a request fails by checking the exit status of the thread when joining before starting another execution. On failure, it does the cleanup as well. Signed-off-by: Shubhanshu Saxena <shubhanshu.e01@gmail.com> --- libavfilter/dnn/dnn_backend_common.c | 23 +++++++++++++++++++---- libavfilter/dnn/dnn_backend_tf.c | 10 +++++++++- 2 files changed, 28 insertions(+), 5 deletions(-)