diff mbox series

[FFmpeg-devel] avcodec/vvcdec: fix potential deadlock in report_frame_progress

Message ID TYSPR06MB64331F8C7162D79EDD6D04B0AA8A2@TYSPR06MB6433.apcprd06.prod.outlook.com
State New
Headers show
Series [FFmpeg-devel] avcodec/vvcdec: fix potential deadlock in report_frame_progress | expand

Checks

Context Check Description
yinshiyou/make_loongarch64 success Make finished
yinshiyou/make_fate_loongarch64 success Make fate finished
andriy/make_x86 success Make finished
andriy/make_fate_x86 success Make fate finished

Commit Message

Nuo Mi Aug. 25, 2024, 4:35 a.m. UTC
Fixes:
https://fate.ffmpeg.org/report.cgi?slot=x86_64-archlinux-gcc-tsan&time=20240823175808

Reproduction steps:
./configure --enable-memory-poisoning --toolchain=gcc-tsan --disable-stripping && make fate-vvc

Root cause:
We hold the current frame's lock while updating progress for other frames.
This process also requires acquiring other frame locks. It could lead to a deadlock.

Example scenario:
- Frame F0 holds mutex M0 and then tries to lock M1.
- Frame F1, meanwhile, holds mutex M1 and attempts to lock M0.
---
 libavcodec/vvc/refs.c   | 13 +++++++------
 libavcodec/vvc/thread.c |  8 ++++++--
 2 files changed, 13 insertions(+), 8 deletions(-)

Comments

Nuo Mi Aug. 25, 2024, 8:01 a.m. UTC | #1
On Sun, Aug 25, 2024 at 12:36 PM Nuo Mi <nuomi2021@gmail.com> wrote:

> Fixes:
>
> https://fate.ffmpeg.org/report.cgi?slot=x86_64-archlinux-gcc-tsan&time=20240823175808
>
> Reproduction steps:
> ./configure --enable-memory-poisoning --toolchain=gcc-tsan
> --disable-stripping && make fate-vvc
>
> Root cause:
> We hold the current frame's lock while updating progress for other frames.
> This process also requires acquiring other frame locks. It could lead to a
> deadlock.
>
> Example scenario:
> - Frame F0 holds mutex M0 and then tries to lock M1.
> - Frame F1, meanwhile, holds mutex M1 and attempts to lock M0.
>
Hi Anton and James.
Upon reconsideration, the scenario described in the commit log might be
incorrect, as there's no cyclic reference.
If F0 reports progress to F1, F1 won't report progress back to F0.

Could this be a false positive?
Someone report an issue like this
https://github.com/google/sanitizers/issues/1419
And our PR test will test 200+ clips every time. I did not observe any
deadlock at least for half a year.

> ---
>  libavcodec/vvc/refs.c   | 13 +++++++------
>  libavcodec/vvc/thread.c |  8 ++++++--
>  2 files changed, 13 insertions(+), 8 deletions(-)
>
> diff --git a/libavcodec/vvc/refs.c b/libavcodec/vvc/refs.c
> index c1fc6132c2..bebcef7fd6 100644
> --- a/libavcodec/vvc/refs.c
> +++ b/libavcodec/vvc/refs.c
> @@ -588,12 +588,13 @@ void ff_vvc_report_progress(VVCFrame *frame, const
> VVCProgress vp, const int y)
>      VVCProgressListener *l = NULL;
>
>      ff_mutex_lock(&p->lock);
> -
> -    av_assert0(p->progress[vp] < y || p->progress[vp] == INT_MAX);
> -    p->progress[vp] = y;
> -    l = get_done_listener(p, vp);
> -    ff_cond_signal(&p->cond);
> -
> +    if (p->progress[vp] < y) {
> +        // Due to the nature of thread scheduling, later progress may
> reach this point before earlier progress.
> +        // Therefore, we only update the progress when p->progress[vp] <
> y.
> +        p->progress[vp] = y;
> +        l = get_done_listener(p, vp);
> +        ff_cond_signal(&p->cond);
> +    }
>      ff_mutex_unlock(&p->lock);
>
>      while (l) {
> diff --git a/libavcodec/vvc/thread.c b/libavcodec/vvc/thread.c
> index 74f8e4e9d0..86a7753c6a 100644
> --- a/libavcodec/vvc/thread.c
> +++ b/libavcodec/vvc/thread.c
> @@ -466,12 +466,16 @@ static void report_frame_progress(VVCFrameContext
> *fc,
>          y = old = ft->row_progress[idx];
>          while (y < ft->ctu_height &&
> atomic_load(&ft->rows[y].col_progress[idx]) == ft->ctu_width)
>              y++;
> +        if (old != y)
> +            ft->row_progress[idx] = y;
> +        // ff_vvc_report_progress will acquire other frames' locks, which
> could lead to a deadlock
> +        // We need to unlock ft->lock first
> +        ff_mutex_unlock(&ft->lock);
> +
>          if (old != y) {
>              const int progress = y == ft->ctu_height ? INT_MAX : y *
> ctu_size;
> -            ft->row_progress[idx] = y;
>              ff_vvc_report_progress(fc->ref, idx, progress);
>          }
> -        ff_mutex_unlock(&ft->lock);
>      }
>  }
>
> --
> 2.34.1
>
>
diff mbox series

Patch

diff --git a/libavcodec/vvc/refs.c b/libavcodec/vvc/refs.c
index c1fc6132c2..bebcef7fd6 100644
--- a/libavcodec/vvc/refs.c
+++ b/libavcodec/vvc/refs.c
@@ -588,12 +588,13 @@  void ff_vvc_report_progress(VVCFrame *frame, const VVCProgress vp, const int y)
     VVCProgressListener *l = NULL;
 
     ff_mutex_lock(&p->lock);
-
-    av_assert0(p->progress[vp] < y || p->progress[vp] == INT_MAX);
-    p->progress[vp] = y;
-    l = get_done_listener(p, vp);
-    ff_cond_signal(&p->cond);
-
+    if (p->progress[vp] < y) {
+        // Due to the nature of thread scheduling, later progress may reach this point before earlier progress.
+        // Therefore, we only update the progress when p->progress[vp] < y.
+        p->progress[vp] = y;
+        l = get_done_listener(p, vp);
+        ff_cond_signal(&p->cond);
+    }
     ff_mutex_unlock(&p->lock);
 
     while (l) {
diff --git a/libavcodec/vvc/thread.c b/libavcodec/vvc/thread.c
index 74f8e4e9d0..86a7753c6a 100644
--- a/libavcodec/vvc/thread.c
+++ b/libavcodec/vvc/thread.c
@@ -466,12 +466,16 @@  static void report_frame_progress(VVCFrameContext *fc,
         y = old = ft->row_progress[idx];
         while (y < ft->ctu_height && atomic_load(&ft->rows[y].col_progress[idx]) == ft->ctu_width)
             y++;
+        if (old != y)
+            ft->row_progress[idx] = y;
+        // ff_vvc_report_progress will acquire other frames' locks, which could lead to a deadlock
+        // We need to unlock ft->lock first
+        ff_mutex_unlock(&ft->lock);
+
         if (old != y) {
             const int progress = y == ft->ctu_height ? INT_MAX : y * ctu_size;
-            ft->row_progress[idx] = y;
             ff_vvc_report_progress(fc->ref, idx, progress);
         }
-        ff_mutex_unlock(&ft->lock);
     }
 }