diff mbox series

[FFmpeg-devel] avfilter/dnn/dnn_backend_common: check thread create status before join thread

Message ID 20211119124805.77429-1-lq@chinaffmpeg.org
State New
Headers show
Series [FFmpeg-devel] avfilter/dnn/dnn_backend_common: check thread create status before join thread
Related show

Checks

Context Check Description
andriy/make_x86 success Make finished
andriy/make_fate_x86 success Make fate finished
andriy/make_ppc success Make finished
andriy/make_fate_ppc success Make fate finished

Commit Message

Steven Liu Nov. 19, 2021, 12:48 p.m. UTC
From: Steven Liu <liuqi05@kuaishou.com>

fix SIGSEGV problem, check the thread create status before join thread.
set the init status to 0 when create DNNAsyncExecModule, and set status
to 1 after pthread_create success.

coredump backtrace info:
[Thread 0x7fff4778e700 (LWP 323218) exited]

Program received signal SIGSEGV, Segmentation fault.
0x00007fffed71af81 in pthread_join () from /lib64/libpthread.so.0
(gdb) bt
0  0x00007fffed71af81 in pthread_join () from /lib64/libpthread.so.0
1  0x0000000000872e3a in ff_dnn_start_inference_async (ctx=0x30cbe80, async_module=0x4848c58) at libavfilter/dnn/dnn_backend_common.c:122
2  0x0000000000870f70 in execute_model_tf (request=0x4848c40, lltask_queue=0x484c7c0) at libavfilter/dnn/dnn_backend_tf.c:1111
3  0x0000000000871195 in ff_dnn_execute_model_tf (model=0x30c9700, exec_params=0x7fffffffafb0) at libavfilter/dnn/dnn_backend_tf.c:1168
4  0x000000000084a475 in ff_dnn_execute_model (ctx=0x30f8388, in_frame=0x4890fc0, out_frame=0x485f780) at libavfilter/dnn_filter_common.c:129
5  0x0000000000524d69 in activate (filter_ctx=0x3100a40) at libavfilter/vf_dnn_processing.c:299
6  0x000000000046bc68 in ff_filter_activate (filter=0x3100a40) at libavfilter/avfilter.c:1364
7  0x00000000004701fd in ff_filter_graph_run_once (graph=0x3114cc0) at libavfilter/avfiltergraph.c:1341
8  0x0000000000471331 in push_frame (graph=0x3114cc0) at libavfilter/buffersrc.c:156
9  0x0000000000471861 in av_buffersrc_add_frame_flags (ctx=0x484ce00, frame=0x41670c0, flags=4) at libavfilter/buffersrc.c:224
10 0x000000000042d415 in ifilter_send_frame (ifilter=0x314e300, frame=0x41670c0) at fftools/ffmpeg.c:2249
11 0x000000000042d682 in send_frame_to_filters (ist=0x30ff1c0, decoded_frame=0x41670c0) at fftools/ffmpeg.c:2323
12 0x000000000042e3b5 in decode_video (ist=0x30ff1c0, pkt=0x30b0b40, got_output=0x7fffffffb524, duration_pts=0x7fffffffb528, eof=0, decode_failed=0x7fffffffb520)
   at fftools/ffmpeg.c:2525
13 0x000000000042ecd4 in process_input_packet (ist=0x30ff1c0, pkt=0x3148cc0, no_eof=0) at fftools/ffmpeg.c:2681
14 0x0000000000435b2d in process_input (file_index=0) at fftools/ffmpeg.c:4579
15 0x0000000000435fe8 in transcode_step () at fftools/ffmpeg.c:4719
16 0x000000000043610b in transcode () at fftools/ffmpeg.c:4773
17 0x00000000004368a7 in main (argc=8, argv=0x7fffffffbd68) at fftools/ffmpeg.c:4977

Reported-by: Yu Yang <yuyang14@kuaishou.com>
Signed-off-by: Steven Liu <liuqi05@kuaishou.com>
---
 libavfilter/dnn/dnn_backend_common.c | 23 +++++++++++++++--------
 libavfilter/dnn/dnn_backend_common.h |  1 +
 libavfilter/dnn/dnn_backend_tf.c     |  4 +++-
 3 files changed, 19 insertions(+), 9 deletions(-)

Comments

Guo, Yejun Nov. 24, 2021, 8:20 a.m. UTC | #1
-----Original Message-----
From: ffmpeg-devel <ffmpeg-devel-bounces@ffmpeg.org> On Behalf Of Steven Liu
Sent: 2021年11月19日 20:48
To: ffmpeg-devel@ffmpeg.org
Cc: Steven Liu <liuqi05@kuaishou.com>; Yu Yang <yuyang14@kuaishou.com>
Subject: [FFmpeg-devel] [PATCH] avfilter/dnn/dnn_backend_common: check thread create status before join thread

From: Steven Liu <liuqi05@kuaishou.com>

fix SIGSEGV problem, check the thread create status before join thread.
set the init status to 0 when create DNNAsyncExecModule, and set status
to 1 after pthread_create success.

coredump backtrace info:
[Thread 0x7fff4778e700 (LWP 323218) exited]

Program received signal SIGSEGV, Segmentation fault.
0x00007fffed71af81 in pthread_join () from /lib64/libpthread.so.0
(gdb) bt
0  0x00007fffed71af81 in pthread_join () from /lib64/libpthread.so.0
1  0x0000000000872e3a in ff_dnn_start_inference_async (ctx=0x30cbe80, async_module=0x4848c58) at libavfilter/dnn/dnn_backend_common.c:122
2  0x0000000000870f70 in execute_model_tf (request=0x4848c40, lltask_queue=0x484c7c0) at libavfilter/dnn/dnn_backend_tf.c:1111
3  0x0000000000871195 in ff_dnn_execute_model_tf (model=0x30c9700, exec_params=0x7fffffffafb0) at libavfilter/dnn/dnn_backend_tf.c:1168
4  0x000000000084a475 in ff_dnn_execute_model (ctx=0x30f8388, in_frame=0x4890fc0, out_frame=0x485f780) at libavfilter/dnn_filter_common.c:129
5  0x0000000000524d69 in activate (filter_ctx=0x3100a40) at libavfilter/vf_dnn_processing.c:299
6  0x000000000046bc68 in ff_filter_activate (filter=0x3100a40) at libavfilter/avfilter.c:1364
7  0x00000000004701fd in ff_filter_graph_run_once (graph=0x3114cc0) at libavfilter/avfiltergraph.c:1341
8  0x0000000000471331 in push_frame (graph=0x3114cc0) at libavfilter/buffersrc.c:156
9  0x0000000000471861 in av_buffersrc_add_frame_flags (ctx=0x484ce00, frame=0x41670c0, flags=4) at libavfilter/buffersrc.c:224
10 0x000000000042d415 in ifilter_send_frame (ifilter=0x314e300, frame=0x41670c0) at fftools/ffmpeg.c:2249
11 0x000000000042d682 in send_frame_to_filters (ist=0x30ff1c0, decoded_frame=0x41670c0) at fftools/ffmpeg.c:2323
12 0x000000000042e3b5 in decode_video (ist=0x30ff1c0, pkt=0x30b0b40, got_output=0x7fffffffb524, duration_pts=0x7fffffffb528, eof=0, decode_failed=0x7fffffffb520)
   at fftools/ffmpeg.c:2525
13 0x000000000042ecd4 in process_input_packet (ist=0x30ff1c0, pkt=0x3148cc0, no_eof=0) at fftools/ffmpeg.c:2681
14 0x0000000000435b2d in process_input (file_index=0) at fftools/ffmpeg.c:4579
15 0x0000000000435fe8 in transcode_step () at fftools/ffmpeg.c:4719
16 0x000000000043610b in transcode () at fftools/ffmpeg.c:4773
17 0x00000000004368a7 in main (argc=8, argv=0x7fffffffbd68) at fftools/ffmpeg.c:4977

Reported-by: Yu Yang <yuyang14@kuaishou.com>
Signed-off-by: Steven Liu <liuqi05@kuaishou.com>
---
 libavfilter/dnn/dnn_backend_common.c | 23 +++++++++++++++--------
 libavfilter/dnn/dnn_backend_common.h |  1 +
 libavfilter/dnn/dnn_backend_tf.c     |  4 +++-
 3 files changed, 19 insertions(+), 9 deletions(-)

diff --git a/libavfilter/dnn/dnn_backend_common.c b/libavfilter/dnn/dnn_backend_common.c
index 6a9c4cc87f..a25d0eded1 100644
--- a/libavfilter/dnn/dnn_backend_common.c
+++ b/libavfilter/dnn/dnn_backend_common.c
@@ -96,10 +96,13 @@ DNNReturnType ff_dnn_async_module_cleanup(DNNAsyncExecModule *async_module)
         return DNN_ERROR;
     }
 #if HAVE_PTHREAD_CANCEL
-    pthread_join(async_module->thread_id, &status);
-    if (status == DNN_ASYNC_FAIL) {
-        av_log(NULL, AV_LOG_ERROR, "Last Inference Failed.\n");
-        return DNN_ERROR;
+    if (async_module->thread_created) {
+        pthread_join(async_module->thread_id, &status);
+        if (status == DNN_ASYNC_FAIL) {
+            av_log(NULL, AV_LOG_ERROR, "Last Inference Failed.\n");
+            return DNN_ERROR;
+        }
+        async_module->thread_created = 0;
     }
 #endif
     async_module->start_inference = NULL;
@@ -119,16 +122,20 @@ DNNReturnType ff_dnn_start_inference_async(void *ctx, DNNAsyncExecModule *async_
     }
 
 #if HAVE_PTHREAD_CANCEL
-    pthread_join(async_module->thread_id, &status);
-    if (status == DNN_ASYNC_FAIL) {
-        av_log(ctx, AV_LOG_ERROR, "Unable to start inference as previous inference failed.\n");
-        return DNN_ERROR;
+    if (async_module->thread_created) {
+        pthread_join(async_module->thread_id, &status);
+        if (status == DNN_ASYNC_FAIL) {
+            av_log(ctx, AV_LOG_ERROR, "Unable to start inference as previous inference failed.\n");
+            return DNN_ERROR;
+        }
+        async_module->thread_created = 0;
     }
     ret = pthread_create(&async_module->thread_id, NULL, async_thread_routine, async_module);
     if (ret != 0) {
         av_log(ctx, AV_LOG_ERROR, "Unable to start async inference.\n");
         return DNN_ERROR;
     }
+    async_module->thread_created = 1;
 #else
     if (async_module->start_inference(async_module->args) != DNN_SUCCESS) {
         return DNN_ERROR;
diff --git a/libavfilter/dnn/dnn_backend_common.h b/libavfilter/dnn/dnn_backend_common.h
index 6b6a5e21ae..6c4909a489 100644
--- a/libavfilter/dnn/dnn_backend_common.h
+++ b/libavfilter/dnn/dnn_backend_common.h
@@ -75,6 +75,7 @@ typedef struct DNNAsyncExecModule {
      */
     void *args;
 #if HAVE_PTHREAD_CANCEL
+    int thread_created;
     pthread_t thread_id;
     pthread_attr_t thread_attr;
 #endif
diff --git a/libavfilter/dnn/dnn_backend_tf.c b/libavfilter/dnn/dnn_backend_tf.c
index 7dd48fb612..644c794612 100644
--- a/libavfilter/dnn/dnn_backend_tf.c
+++ b/libavfilter/dnn/dnn_backend_tf.c
@@ -912,7 +912,9 @@ DNNModel *ff_dnn_load_model_tf(const char *model_filename, DNNFunctionType func_
         item->exec_module.start_inference = &tf_start_inference;
         item->exec_module.callback = &infer_completion_callback;
         item->exec_module.args = item;
-
+#if HAVE_PTHREAD_CANCEL
+        item->exec_module.thread_created = 0;
+#endif
         if (ff_safe_queue_push_back(tf_model->request_queue, item) < 0) {
             destroy_request_item(&item);
             goto err;
diff mbox series

Patch

diff --git a/libavfilter/dnn/dnn_backend_common.c b/libavfilter/dnn/dnn_backend_common.c
index 6a9c4cc87f..a25d0eded1 100644
--- a/libavfilter/dnn/dnn_backend_common.c
+++ b/libavfilter/dnn/dnn_backend_common.c
@@ -96,10 +96,13 @@  DNNReturnType ff_dnn_async_module_cleanup(DNNAsyncExecModule *async_module)
         return DNN_ERROR;
     }
 #if HAVE_PTHREAD_CANCEL
-    pthread_join(async_module->thread_id, &status);
-    if (status == DNN_ASYNC_FAIL) {
-        av_log(NULL, AV_LOG_ERROR, "Last Inference Failed.\n");
-        return DNN_ERROR;
+    if (async_module->thread_created) {
+        pthread_join(async_module->thread_id, &status);
+        if (status == DNN_ASYNC_FAIL) {
+            av_log(NULL, AV_LOG_ERROR, "Last Inference Failed.\n");
+            return DNN_ERROR;
+        }
+        async_module->thread_created = 0;
     }
 #endif
     async_module->start_inference = NULL;
@@ -119,16 +122,20 @@  DNNReturnType ff_dnn_start_inference_async(void *ctx, DNNAsyncExecModule *async_
     }
 
 #if HAVE_PTHREAD_CANCEL
-    pthread_join(async_module->thread_id, &status);
-    if (status == DNN_ASYNC_FAIL) {
-        av_log(ctx, AV_LOG_ERROR, "Unable to start inference as previous inference failed.\n");
-        return DNN_ERROR;
+    if (async_module->thread_created) {
+        pthread_join(async_module->thread_id, &status);
+        if (status == DNN_ASYNC_FAIL) {
+            av_log(ctx, AV_LOG_ERROR, "Unable to start inference as previous inference failed.\n");
+            return DNN_ERROR;
+        }
+        async_module->thread_created = 0;
     }
     ret = pthread_create(&async_module->thread_id, NULL, async_thread_routine, async_module);
     if (ret != 0) {
         av_log(ctx, AV_LOG_ERROR, "Unable to start async inference.\n");
         return DNN_ERROR;
     }
+    async_module->thread_created = 1;
 #else
     if (async_module->start_inference(async_module->args) != DNN_SUCCESS) {
         return DNN_ERROR;
diff --git a/libavfilter/dnn/dnn_backend_common.h b/libavfilter/dnn/dnn_backend_common.h
index 6b6a5e21ae..6c4909a489 100644
--- a/libavfilter/dnn/dnn_backend_common.h
+++ b/libavfilter/dnn/dnn_backend_common.h
@@ -75,6 +75,7 @@  typedef struct DNNAsyncExecModule {
      */
     void *args;
 #if HAVE_PTHREAD_CANCEL
+    int thread_created;
     pthread_t thread_id;
     pthread_attr_t thread_attr;
 #endif
diff --git a/libavfilter/dnn/dnn_backend_tf.c b/libavfilter/dnn/dnn_backend_tf.c
index 7dd48fb612..644c794612 100644
--- a/libavfilter/dnn/dnn_backend_tf.c
+++ b/libavfilter/dnn/dnn_backend_tf.c
@@ -912,7 +912,9 @@  DNNModel *ff_dnn_load_model_tf(const char *model_filename, DNNFunctionType func_
         item->exec_module.start_inference = &tf_start_inference;
         item->exec_module.callback = &infer_completion_callback;
         item->exec_module.args = item;
-
+#if HAVE_PTHREAD_CANCEL
+        item->exec_module.thread_created = 0;
+#endif
         if (ff_safe_queue_push_back(tf_model->request_queue, item) < 0) {
             destroy_request_item(&item);
             goto err;