diff mbox series

[FFmpeg-devel,v3,9/9,GSoC] lavfi/dnn: DNNAsyncExecModule Execution Failure Handling

Message ID 20210808105539.18822-9-shubhanshu.e01@gmail.com
State Accepted
Commit 2063745a93f3a17ee14ffd01905878b8fdc17023
Headers show
Series [FFmpeg-devel,v3,1/9,GSoC] lavfi/dnn: Add Async Execution Mechanism and Documentation
Related show

Checks

Context Check Description
andriy/x86_make success Make finished
andriy/x86_make_fate success Make fate finished
andriy/PPC64_make success Make finished
andriy/PPC64_make_fate success Make fate finished

Commit Message

Shubhanshu Saxena Aug. 8, 2021, 10:55 a.m. UTC
This commit adds the case handling if the asynchronous execution
of a request fails by checking the exit status of the thread when
joining before starting another execution. On failure, it does the
cleanup as well.

Signed-off-by: Shubhanshu Saxena <shubhanshu.e01@gmail.com>
---
 libavfilter/dnn/dnn_backend_common.c | 23 +++++++++++++++++++----
 libavfilter/dnn/dnn_backend_tf.c     | 10 +++++++++-
 2 files changed, 28 insertions(+), 5 deletions(-)

Comments

Fu, Ting Aug. 9, 2021, 10:12 a.m. UTC | #1
> -----Original Message-----
> From: ffmpeg-devel <ffmpeg-devel-bounces@ffmpeg.org> On Behalf Of
> Shubhanshu Saxena
> Sent: 2021年8月8日 18:56
> To: ffmpeg-devel@ffmpeg.org
> Cc: Shubhanshu Saxena <shubhanshu.e01@gmail.com>
> Subject: [FFmpeg-devel] [PATCH v3 9/9] [GSoC] lavfi/dnn:
> DNNAsyncExecModule Execution Failure Handling
> 
> This commit adds the case handling if the asynchronous execution of a request
> fails by checking the exit status of the thread when joining before starting
> another execution. On failure, it does the cleanup as well.
> 
> Signed-off-by: Shubhanshu Saxena <shubhanshu.e01@gmail.com>
> ---
>  libavfilter/dnn/dnn_backend_common.c | 23 +++++++++++++++++++----
>  libavfilter/dnn/dnn_backend_tf.c     | 10 +++++++++-
>  2 files changed, 28 insertions(+), 5 deletions(-)
> 
> diff --git a/libavfilter/dnn/dnn_backend_common.c
> b/libavfilter/dnn/dnn_backend_common.c
> index 470fffa2ae..426683b73d 100644
> --- a/libavfilter/dnn/dnn_backend_common.c
> +++ b/libavfilter/dnn/dnn_backend_common.c
> @@ -23,6 +23,9 @@
> 
>  #include "dnn_backend_common.h"
> 
> +#define DNN_ASYNC_SUCCESS (void *)0
> +#define DNN_ASYNC_FAIL (void *)-1
> +
>  int ff_check_exec_params(void *ctx, DNNBackendType backend,
> DNNFunctionType func_type, DNNExecBaseParams *exec_params)  {
>      if (!exec_params) {
> @@ -79,18 +82,25 @@ static void *async_thread_routine(void *args)
>      DNNAsyncExecModule *async_module = args;
>      void *request = async_module->args;
> 
> -    async_module->start_inference(request);
> +    if (async_module->start_inference(request) != DNN_SUCCESS) {
> +        return DNN_ASYNC_FAIL;
> +    }
>      async_module->callback(request);
> -    return NULL;
> +    return DNN_ASYNC_SUCCESS;
>  }
> 
>  DNNReturnType ff_dnn_async_module_cleanup(DNNAsyncExecModule
> *async_module)  {
> +    void *status = 0;
>      if (!async_module) {
>          return DNN_ERROR;
>      }
>  #if HAVE_PTHREAD_CANCEL
> -    pthread_join(async_module->thread_id, NULL);
> +    pthread_join(async_module->thread_id, &status);
> +    if (status == DNN_ASYNC_FAIL) {
> +        av_log(NULL, AV_LOG_ERROR, "Last Inference Failed.\n");
> +        return DNN_ERROR;
> +    }
>  #endif
>      async_module->start_inference = NULL;
>      async_module->callback = NULL;
> @@ -101,6 +111,7 @@ DNNReturnType
> ff_dnn_async_module_cleanup(DNNAsyncExecModule *async_module)
> DNNReturnType ff_dnn_start_inference_async(void *ctx,
> DNNAsyncExecModule *async_module)  {
>      int ret;
> +    void *status = 0;
> 
>      if (!async_module) {
>          av_log(ctx, AV_LOG_ERROR, "async_module is null when starting async
> inference.\n"); @@ -108,7 +119,11 @@ DNNReturnType
> ff_dnn_start_inference_async(void *ctx, DNNAsyncExecModule *async_
>      }
> 
>  #if HAVE_PTHREAD_CANCEL
> -    pthread_join(async_module->thread_id, NULL);
> +    pthread_join(async_module->thread_id, &status);
> +    if (status == DNN_ASYNC_FAIL) {
> +        av_log(ctx, AV_LOG_ERROR, "Unable to start inference as previous
> inference failed.\n");
> +        return DNN_ERROR;
> +    }
>      ret = pthread_create(&async_module->thread_id, NULL,
> async_thread_routine, async_module);
>      if (ret != 0) {
>          av_log(ctx, AV_LOG_ERROR, "Unable to start async inference.\n"); diff --git
> a/libavfilter/dnn/dnn_backend_tf.c b/libavfilter/dnn/dnn_backend_tf.c
> index fb3f6f5ea6..ffec1b1328 100644
> --- a/libavfilter/dnn/dnn_backend_tf.c
> +++ b/libavfilter/dnn/dnn_backend_tf.c
> @@ -91,6 +91,7 @@ AVFILTER_DEFINE_CLASS(dnn_tensorflow);
> 
>  static DNNReturnType execute_model_tf(TFRequestItem *request, Queue
> *inference_queue);  static void infer_completion_callback(void *args);
> +static inline void destroy_request_item(TFRequestItem **arg);
> 
>  static void free_buffer(void *data, size_t length)  { @@ -172,6 +173,10 @@
> static DNNReturnType tf_start_inference(void *args)
>                    request->status);
>      if (TF_GetCode(request->status) != TF_OK) {
>          av_log(&tf_model->ctx, AV_LOG_ERROR, "%s", TF_Message(request-
> >status));
> +        tf_free_request(infer_request);
> +        if (ff_safe_queue_push_back(tf_model->request_queue, request) < 0) {
> +            destroy_request_item(&request);
> +        }
>          return DNN_ERROR;
>      }
>      return DNN_SUCCESS;
> @@ -1095,7 +1100,10 @@ static DNNReturnType
> execute_model_tf(TFRequestItem *request, Queue *inference_q
>      }
> 
>      if (task->async) {
> -        return ff_dnn_start_inference_async(ctx, &request->exec_module);
> +        if (ff_dnn_start_inference_async(ctx, &request->exec_module) !=
> DNN_SUCCESS) {
> +            goto err;
> +        }
> +        return DNN_SUCCESS;
>      } else {
>          if (tf_start_inference(request) != DNN_SUCCESS) {
>              goto err;
> --
> 2.25.1

LGTM, those patches function well and tensorflow backend performs much better.

> 
> _______________________________________________
> ffmpeg-devel mailing list
> ffmpeg-devel@ffmpeg.org
> https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
> 
> To unsubscribe, visit link above, or email ffmpeg-devel-request@ffmpeg.org
> with subject "unsubscribe".
Guo, Yejun Aug. 9, 2021, 12:36 p.m. UTC | #2
> -----Original Message-----
> From: ffmpeg-devel <ffmpeg-devel-bounces@ffmpeg.org> On Behalf Of Fu,
> Ting
> Sent: 2021年8月9日 18:13
> To: FFmpeg development discussions and patches <ffmpeg-
> devel@ffmpeg.org>
> Subject: Re: [FFmpeg-devel] [PATCH v3 9/9] [GSoC] lavfi/dnn:
> DNNAsyncExecModule Execution Failure Handling
> 
> 
> 
> > -----Original Message-----
> > From: ffmpeg-devel <ffmpeg-devel-bounces@ffmpeg.org> On Behalf Of
> > Shubhanshu Saxena
> > Sent: 2021年8月8日 18:56
> > To: ffmpeg-devel@ffmpeg.org
> > Cc: Shubhanshu Saxena <shubhanshu.e01@gmail.com>
> > Subject: [FFmpeg-devel] [PATCH v3 9/9] [GSoC] lavfi/dnn:
> > DNNAsyncExecModule Execution Failure Handling
> >
> > This commit adds the case handling if the asynchronous execution of a
> > request fails by checking the exit status of the thread when joining
> > before starting another execution. On failure, it does the cleanup as well.
> >
> > Signed-off-by: Shubhanshu Saxena <shubhanshu.e01@gmail.com>
> > ---
> >  libavfilter/dnn/dnn_backend_common.c | 23 +++++++++++++++++++----
> >  libavfilter/dnn/dnn_backend_tf.c     | 10 +++++++++-
> >  2 files changed, 28 insertions(+), 5 deletions(-)
> >
> > diff --git a/libavfilter/dnn/dnn_backend_common.c
> > b/libavfilter/dnn/dnn_backend_common.c
> > index 470fffa2ae..426683b73d 100644
> > --- a/libavfilter/dnn/dnn_backend_common.c
> > +++ b/libavfilter/dnn/dnn_backend_common.c
> > @@ -23,6 +23,9 @@
> >
> >  #include "dnn_backend_common.h"
> >
> > +#define DNN_ASYNC_SUCCESS (void *)0
> > +#define DNN_ASYNC_FAIL (void *)-1
> > +
> >  int ff_check_exec_params(void *ctx, DNNBackendType backend,
> > DNNFunctionType func_type, DNNExecBaseParams *exec_params)  {
> >      if (!exec_params) {
> > @@ -79,18 +82,25 @@ static void *async_thread_routine(void *args)
> >      DNNAsyncExecModule *async_module = args;
> >      void *request = async_module->args;
> >
> > -    async_module->start_inference(request);
> > +    if (async_module->start_inference(request) != DNN_SUCCESS) {
> > +        return DNN_ASYNC_FAIL;
> > +    }
> >      async_module->callback(request);
> > -    return NULL;
> > +    return DNN_ASYNC_SUCCESS;
> >  }
> >
> >  DNNReturnType ff_dnn_async_module_cleanup(DNNAsyncExecModule
> > *async_module)  {
> > +    void *status = 0;
> >      if (!async_module) {
> >          return DNN_ERROR;
> >      }
> >  #if HAVE_PTHREAD_CANCEL
> > -    pthread_join(async_module->thread_id, NULL);
> > +    pthread_join(async_module->thread_id, &status);
> > +    if (status == DNN_ASYNC_FAIL) {
> > +        av_log(NULL, AV_LOG_ERROR, "Last Inference Failed.\n");
> > +        return DNN_ERROR;
> > +    }
> >  #endif
> >      async_module->start_inference = NULL;
> >      async_module->callback = NULL;
> > @@ -101,6 +111,7 @@ DNNReturnType
> > ff_dnn_async_module_cleanup(DNNAsyncExecModule *async_module)
> > DNNReturnType ff_dnn_start_inference_async(void *ctx,
> > DNNAsyncExecModule *async_module)  {
> >      int ret;
> > +    void *status = 0;
> >
> >      if (!async_module) {
> >          av_log(ctx, AV_LOG_ERROR, "async_module is null when starting
> > async inference.\n"); @@ -108,7 +119,11 @@ DNNReturnType
> > ff_dnn_start_inference_async(void *ctx, DNNAsyncExecModule *async_
> >      }
> >
> >  #if HAVE_PTHREAD_CANCEL
> > -    pthread_join(async_module->thread_id, NULL);
> > +    pthread_join(async_module->thread_id, &status);
> > +    if (status == DNN_ASYNC_FAIL) {
> > +        av_log(ctx, AV_LOG_ERROR, "Unable to start inference as
> > + previous
> > inference failed.\n");
> > +        return DNN_ERROR;
> > +    }
> >      ret = pthread_create(&async_module->thread_id, NULL,
> > async_thread_routine, async_module);
> >      if (ret != 0) {
> >          av_log(ctx, AV_LOG_ERROR, "Unable to start async
> > inference.\n"); diff --git a/libavfilter/dnn/dnn_backend_tf.c
> > b/libavfilter/dnn/dnn_backend_tf.c
> > index fb3f6f5ea6..ffec1b1328 100644
> > --- a/libavfilter/dnn/dnn_backend_tf.c
> > +++ b/libavfilter/dnn/dnn_backend_tf.c
> > @@ -91,6 +91,7 @@ AVFILTER_DEFINE_CLASS(dnn_tensorflow);
> >
> >  static DNNReturnType execute_model_tf(TFRequestItem *request,
> Queue
> > *inference_queue);  static void infer_completion_callback(void *args);
> > +static inline void destroy_request_item(TFRequestItem **arg);
> >
> >  static void free_buffer(void *data, size_t length)  { @@ -172,6
> > +173,10 @@ static DNNReturnType tf_start_inference(void *args)
> >                    request->status);
> >      if (TF_GetCode(request->status) != TF_OK) {
> >          av_log(&tf_model->ctx, AV_LOG_ERROR, "%s",
> > TF_Message(request-
> > >status));
> > +        tf_free_request(infer_request);
> > +        if (ff_safe_queue_push_back(tf_model->request_queue, request) <
> 0) {
> > +            destroy_request_item(&request);
> > +        }
> >          return DNN_ERROR;
> >      }
> >      return DNN_SUCCESS;
> > @@ -1095,7 +1100,10 @@ static DNNReturnType
> > execute_model_tf(TFRequestItem *request, Queue *inference_q
> >      }
> >
> >      if (task->async) {
> > -        return ff_dnn_start_inference_async(ctx, &request->exec_module);
> > +        if (ff_dnn_start_inference_async(ctx, &request->exec_module)
> > + !=
> > DNN_SUCCESS) {
> > +            goto err;
> > +        }
> > +        return DNN_SUCCESS;
> >      } else {
> >          if (tf_start_inference(request) != DNN_SUCCESS) {
> >              goto err;
> > --
> > 2.25.1
> 
> LGTM, those patches function well and tensorflow backend performs much
> better.
> 
Thanks for the review, will push tomorrow.
diff mbox series

Patch

diff --git a/libavfilter/dnn/dnn_backend_common.c b/libavfilter/dnn/dnn_backend_common.c
index 470fffa2ae..426683b73d 100644
--- a/libavfilter/dnn/dnn_backend_common.c
+++ b/libavfilter/dnn/dnn_backend_common.c
@@ -23,6 +23,9 @@ 
 
 #include "dnn_backend_common.h"
 
+#define DNN_ASYNC_SUCCESS (void *)0
+#define DNN_ASYNC_FAIL (void *)-1
+
 int ff_check_exec_params(void *ctx, DNNBackendType backend, DNNFunctionType func_type, DNNExecBaseParams *exec_params)
 {
     if (!exec_params) {
@@ -79,18 +82,25 @@  static void *async_thread_routine(void *args)
     DNNAsyncExecModule *async_module = args;
     void *request = async_module->args;
 
-    async_module->start_inference(request);
+    if (async_module->start_inference(request) != DNN_SUCCESS) {
+        return DNN_ASYNC_FAIL;
+    }
     async_module->callback(request);
-    return NULL;
+    return DNN_ASYNC_SUCCESS;
 }
 
 DNNReturnType ff_dnn_async_module_cleanup(DNNAsyncExecModule *async_module)
 {
+    void *status = 0;
     if (!async_module) {
         return DNN_ERROR;
     }
 #if HAVE_PTHREAD_CANCEL
-    pthread_join(async_module->thread_id, NULL);
+    pthread_join(async_module->thread_id, &status);
+    if (status == DNN_ASYNC_FAIL) {
+        av_log(NULL, AV_LOG_ERROR, "Last Inference Failed.\n");
+        return DNN_ERROR;
+    }
 #endif
     async_module->start_inference = NULL;
     async_module->callback = NULL;
@@ -101,6 +111,7 @@  DNNReturnType ff_dnn_async_module_cleanup(DNNAsyncExecModule *async_module)
 DNNReturnType ff_dnn_start_inference_async(void *ctx, DNNAsyncExecModule *async_module)
 {
     int ret;
+    void *status = 0;
 
     if (!async_module) {
         av_log(ctx, AV_LOG_ERROR, "async_module is null when starting async inference.\n");
@@ -108,7 +119,11 @@  DNNReturnType ff_dnn_start_inference_async(void *ctx, DNNAsyncExecModule *async_
     }
 
 #if HAVE_PTHREAD_CANCEL
-    pthread_join(async_module->thread_id, NULL);
+    pthread_join(async_module->thread_id, &status);
+    if (status == DNN_ASYNC_FAIL) {
+        av_log(ctx, AV_LOG_ERROR, "Unable to start inference as previous inference failed.\n");
+        return DNN_ERROR;
+    }
     ret = pthread_create(&async_module->thread_id, NULL, async_thread_routine, async_module);
     if (ret != 0) {
         av_log(ctx, AV_LOG_ERROR, "Unable to start async inference.\n");
diff --git a/libavfilter/dnn/dnn_backend_tf.c b/libavfilter/dnn/dnn_backend_tf.c
index fb3f6f5ea6..ffec1b1328 100644
--- a/libavfilter/dnn/dnn_backend_tf.c
+++ b/libavfilter/dnn/dnn_backend_tf.c
@@ -91,6 +91,7 @@  AVFILTER_DEFINE_CLASS(dnn_tensorflow);
 
 static DNNReturnType execute_model_tf(TFRequestItem *request, Queue *inference_queue);
 static void infer_completion_callback(void *args);
+static inline void destroy_request_item(TFRequestItem **arg);
 
 static void free_buffer(void *data, size_t length)
 {
@@ -172,6 +173,10 @@  static DNNReturnType tf_start_inference(void *args)
                   request->status);
     if (TF_GetCode(request->status) != TF_OK) {
         av_log(&tf_model->ctx, AV_LOG_ERROR, "%s", TF_Message(request->status));
+        tf_free_request(infer_request);
+        if (ff_safe_queue_push_back(tf_model->request_queue, request) < 0) {
+            destroy_request_item(&request);
+        }
         return DNN_ERROR;
     }
     return DNN_SUCCESS;
@@ -1095,7 +1100,10 @@  static DNNReturnType execute_model_tf(TFRequestItem *request, Queue *inference_q
     }
 
     if (task->async) {
-        return ff_dnn_start_inference_async(ctx, &request->exec_module);
+        if (ff_dnn_start_inference_async(ctx, &request->exec_module) != DNN_SUCCESS) {
+            goto err;
+        }
+        return DNN_SUCCESS;
     } else {
         if (tf_start_inference(request) != DNN_SUCCESS) {
             goto err;