diff mbox series

[FFmpeg-devel,v4,1/3] avcodec: make a local copy of executor

Message ID 20241002014358.296769-1-nuomi2021@gmail.com
State New
Headers show
Series [FFmpeg-devel,v4,1/3] avcodec: make a local copy of executor | expand

Checks

Context Check Description
andriy/make_x86 success Make finished
andriy/make_fate_x86 success Make fate finished

Commit Message

Nuo Mi Oct. 2, 2024, 1:43 a.m. UTC
We still need several refactors to improve the current VVC decoder's performance,
which will frequently break the API/ABI. To mitigate this, we've copied the executor from
avutil to avcodec. Once the API/ABI is stable, we will move this class back to avutil
---
 libavcodec/Makefile     |   1 +
 libavcodec/executor.c   | 221 ++++++++++++++++++++++++++++++++++++++++
 libavcodec/executor.h   |  73 +++++++++++++
 libavcodec/vvc/dec.h    |   2 +-
 libavcodec/vvc/thread.c |  22 ++--
 libavcodec/vvc/thread.h |   4 +-
 6 files changed, 309 insertions(+), 14 deletions(-)
 create mode 100644 libavcodec/executor.c
 create mode 100644 libavcodec/executor.h
diff mbox series

Patch

diff --git a/libavcodec/Makefile b/libavcodec/Makefile
index a4fcce3b42..da1a1aa945 100644
--- a/libavcodec/Makefile
+++ b/libavcodec/Makefile
@@ -43,6 +43,7 @@  OBJS = ac3_parser.o                                                     \
        dirac.o                                                          \
        dv_profile.o                                                     \
        encode.o                                                         \
+       executor.o                                                       \
        get_buffer.o                                                     \
        imgconvert.o                                                     \
        jni.o                                                            \
diff --git a/libavcodec/executor.c b/libavcodec/executor.c
new file mode 100644
index 0000000000..574c5c7be7
--- /dev/null
+++ b/libavcodec/executor.c
@@ -0,0 +1,221 @@ 
+/*
+ * Copyright (C) 2024 Nuo Mi
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include "config.h"
+
+#include <stdbool.h>
+
+#include "libavutil/mem.h"
+#include "libavutil/thread.h"
+
+#include "executor.h"
+
+#if !HAVE_THREADS
+
+#define ExecutorThread  char
+
+#define executor_thread_create(t, a, s, ar)      0
+#define executor_thread_join(t, r)               do {} while(0)
+
+#else
+
+#define ExecutorThread  pthread_t
+
+#define executor_thread_create(t, a, s, ar)      pthread_create(t, a, s, ar)
+#define executor_thread_join(t, r)               pthread_join(t, r)
+
+#endif //!HAVE_THREADS
+
+typedef struct ThreadInfo {
+    FFExecutor *e;
+    ExecutorThread thread;
+} ThreadInfo;
+
+struct FFExecutor {
+    FFTaskCallbacks cb;
+    int thread_count;
+    bool recursive;
+
+    ThreadInfo *threads;
+    uint8_t *local_contexts;
+
+    AVMutex lock;
+    AVCond cond;
+    int die;
+
+    FFTask *tasks;
+};
+
+static FFTask* remove_task(FFTask **prev, FFTask *t)
+{
+    *prev  = t->next;
+    t->next = NULL;
+    return t;
+}
+
+static void add_task(FFTask **prev, FFTask *t)
+{
+    t->next = *prev;
+    *prev   = t;
+}
+
+static int run_one_task(FFExecutor *e, void *lc)
+{
+    FFTaskCallbacks *cb = &e->cb;
+    FFTask **prev;
+
+    for (prev = &e->tasks; *prev && !cb->ready(*prev, cb->user_data); prev = &(*prev)->next)
+        /* nothing */;
+    if (*prev) {
+        FFTask *t = remove_task(prev, *prev);
+        if (e->thread_count > 0)
+            ff_mutex_unlock(&e->lock);
+        cb->run(t, lc, cb->user_data);
+        if (e->thread_count > 0)
+            ff_mutex_lock(&e->lock);
+        return 1;
+    }
+    return 0;
+}
+
+#if HAVE_THREADS
+static void *executor_worker_task(void *data)
+{
+    ThreadInfo *ti = (ThreadInfo*)data;
+    FFExecutor *e  = ti->e;
+    void *lc       = e->local_contexts + (ti - e->threads) * e->cb.local_context_size;
+
+    ff_mutex_lock(&e->lock);
+    while (1) {
+        if (e->die) break;
+
+        if (!run_one_task(e, lc)) {
+            //no task in one loop
+            ff_cond_wait(&e->cond, &e->lock);
+        }
+    }
+    ff_mutex_unlock(&e->lock);
+    return NULL;
+}
+#endif
+
+static void executor_free(FFExecutor *e, const int has_lock, const int has_cond)
+{
+    if (e->thread_count) {
+        //signal die
+        ff_mutex_lock(&e->lock);
+        e->die = 1;
+        ff_cond_broadcast(&e->cond);
+        ff_mutex_unlock(&e->lock);
+
+        for (int i = 0; i < e->thread_count; i++)
+            executor_thread_join(e->threads[i].thread, NULL);
+    }
+    if (has_cond)
+        ff_cond_destroy(&e->cond);
+    if (has_lock)
+        ff_mutex_destroy(&e->lock);
+
+    av_free(e->threads);
+    av_free(e->local_contexts);
+
+    av_free(e);
+}
+
+FFExecutor* ff_executor_alloc(const FFTaskCallbacks *cb, int thread_count)
+{
+    FFExecutor *e;
+    int has_lock = 0, has_cond = 0;
+    if (!cb || !cb->user_data || !cb->ready || !cb->run || !cb->priority_higher)
+        return NULL;
+
+    e = av_mallocz(sizeof(*e));
+    if (!e)
+        return NULL;
+    e->cb = *cb;
+
+    e->local_contexts = av_calloc(FFMAX(thread_count, 1), e->cb.local_context_size);
+    if (!e->local_contexts)
+        goto free_executor;
+
+    e->threads = av_calloc(FFMAX(thread_count, 1), sizeof(*e->threads));
+    if (!e->threads)
+        goto free_executor;
+
+    if (!thread_count)
+        return e;
+
+    has_lock = !ff_mutex_init(&e->lock, NULL);
+    has_cond = !ff_cond_init(&e->cond, NULL);
+
+    if (!has_lock || !has_cond)
+        goto free_executor;
+
+    for (/* nothing */; e->thread_count < thread_count; e->thread_count++) {
+        ThreadInfo *ti = e->threads + e->thread_count;
+        ti->e = e;
+        if (executor_thread_create(&ti->thread, NULL, executor_worker_task, ti))
+            goto free_executor;
+    }
+    return e;
+
+free_executor:
+    executor_free(e, has_lock, has_cond);
+    return NULL;
+}
+
+void ff_executor_free(FFExecutor **executor)
+{
+    int thread_count;
+
+    if (!executor || !*executor)
+        return;
+    thread_count = (*executor)->thread_count;
+    executor_free(*executor, thread_count, thread_count);
+    *executor = NULL;
+}
+
+void ff_executor_execute(FFExecutor *e, FFTask *t)
+{
+    FFTaskCallbacks *cb = &e->cb;
+    FFTask **prev;
+
+    if (e->thread_count)
+        ff_mutex_lock(&e->lock);
+    if (t) {
+        for (prev = &e->tasks; *prev && cb->priority_higher(*prev, t); prev = &(*prev)->next)
+            /* nothing */;
+        add_task(prev, t);
+    }
+    if (e->thread_count) {
+        ff_cond_signal(&e->cond);
+        ff_mutex_unlock(&e->lock);
+    }
+
+    if (!e->thread_count || !HAVE_THREADS) {
+        if (e->recursive)
+            return;
+        e->recursive = true;
+        // We are running in a single-threaded environment, so we must handle all tasks ourselves
+        while (run_one_task(e, e->local_contexts))
+            /* nothing */;
+        e->recursive = false;
+    }
+}
diff --git a/libavcodec/executor.h b/libavcodec/executor.h
new file mode 100644
index 0000000000..2d02734ad6
--- /dev/null
+++ b/libavcodec/executor.h
@@ -0,0 +1,73 @@ 
+/*
+ * Copyright (C) 2024 Nuo Mi
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+/*
+ * We still need several refactors to improve the current VVC decoder's performance,
+ * which will frequently break the API/ABI. To mitigate this, we've copied the executor from
+ * avutil to avcodec. Once the API/ABI is stable, we will move this class back to avutil
+ */
+
+#ifndef AVCODEC_EXECUTOR_H
+#define AVCODEC_EXECUTOR_H
+
+typedef struct FFExecutor FFExecutor;
+typedef struct FFTask FFTask;
+
+struct FFTask {
+    FFTask *next;
+};
+
+typedef struct FFTaskCallbacks {
+    void *user_data;
+
+    int local_context_size;
+
+    // return 1 if a's priority > b's priority
+    int (*priority_higher)(const FFTask *a, const FFTask *b);
+
+    // task is ready for run
+    int (*ready)(const FFTask *t, void *user_data);
+
+    // run the task
+    int (*run)(FFTask *t, void *local_context, void *user_data);
+} FFTaskCallbacks;
+
+/**
+ * Alloc executor
+ * @param callbacks callback structure for executor
+ * @param thread_count worker thread number, 0 for run on caller's thread directly
+ * @return return the executor
+ */
+FFExecutor* ff_executor_alloc(const FFTaskCallbacks *callbacks, int thread_count);
+
+/**
+ * Free executor
+ * @param e  pointer to executor
+ */
+void ff_executor_free(FFExecutor **e);
+
+/**
+ * Add task to executor
+ * @param e pointer to executor
+ * @param t pointer to task. If NULL, it will wakeup one work thread
+ */
+void ff_executor_execute(FFExecutor *e, FFTask *t);
+
+#endif //AVCODEC_EXECUTOR_H
diff --git a/libavcodec/vvc/dec.h b/libavcodec/vvc/dec.h
index d27cf52ca2..159c60942b 100644
--- a/libavcodec/vvc/dec.h
+++ b/libavcodec/vvc/dec.h
@@ -236,7 +236,7 @@  typedef struct VVCContext {
     uint16_t seq_decode;
     uint16_t seq_output;
 
-    struct AVExecutor *executor;
+    struct FFExecutor *executor;
 
     VVCFrameContext *fcs;
     int nb_fcs;
diff --git a/libavcodec/vvc/thread.c b/libavcodec/vvc/thread.c
index 86a7753c6a..e6907fd764 100644
--- a/libavcodec/vvc/thread.c
+++ b/libavcodec/vvc/thread.c
@@ -22,7 +22,7 @@ 
 
 #include <stdatomic.h>
 
-#include "libavutil/executor.h"
+#include "libavcodec/executor.h"
 #include "libavutil/mem.h"
 #include "libavutil/thread.h"
 
@@ -55,7 +55,7 @@  typedef enum VVCTaskStage {
 typedef struct VVCTask {
     union {
         struct VVCTask *next;                //for executor debug only
-        AVTask task;
+        FFTask task;
     } u;
 
     VVCTaskStage stage;
@@ -109,7 +109,7 @@  static void add_task(VVCContext *s, VVCTask *t)
 
     atomic_fetch_add(&ft->nb_scheduled_tasks, 1);
 
-    av_executor_execute(s->executor, &t->u.task);
+    ff_executor_execute(s->executor, &t->u.task);
 }
 
 static void task_init(VVCTask *t, VVCTaskStage stage, VVCFrameContext *fc, const int rx, const int ry)
@@ -372,7 +372,7 @@  static int task_is_stage_ready(VVCTask *t, int add)
     return task_has_target_score(t, stage, score);
 }
 
-static int task_ready(const AVTask *_t, void *user_data)
+static int task_ready(const FFTask *_t, void *user_data)
 {
     VVCTask *t = (VVCTask*)_t;
 
@@ -385,7 +385,7 @@  static int task_ready(const AVTask *_t, void *user_data)
             return (a) < (b);               \
     } while (0)
 
-static int task_priority_higher(const AVTask *_a, const AVTask *_b)
+static int task_priority_higher(const FFTask *_a, const FFTask *_b)
 {
     const VVCTask *a = (const VVCTask*)_a;
     const VVCTask *b = (const VVCTask*)_b;
@@ -661,7 +661,7 @@  static void task_run_stage(VVCTask *t, VVCContext *s, VVCLocalContext *lc)
     return;
 }
 
-static int task_run(AVTask *_t, void *local_context, void *user_data)
+static int task_run(FFTask *_t, void *local_context, void *user_data)
 {
     VVCTask *t          = (VVCTask*)_t;
     VVCContext *s       = (VVCContext *)user_data;
@@ -683,21 +683,21 @@  static int task_run(AVTask *_t, void *local_context, void *user_data)
     return 0;
 }
 
-AVExecutor* ff_vvc_executor_alloc(VVCContext *s, const int thread_count)
+FFExecutor* ff_vvc_executor_alloc(VVCContext *s, const int thread_count)
 {
-    AVTaskCallbacks callbacks = {
+    FFTaskCallbacks callbacks = {
         s,
         sizeof(VVCLocalContext),
         task_priority_higher,
         task_ready,
         task_run,
     };
-    return av_executor_alloc(&callbacks, thread_count);
+    return ff_executor_alloc(&callbacks, thread_count);
 }
 
-void ff_vvc_executor_free(AVExecutor **e)
+void ff_vvc_executor_free(FFExecutor **e)
 {
-    av_executor_free(e);
+    ff_executor_free(e);
 }
 
 void ff_vvc_frame_thread_free(VVCFrameContext *fc)
diff --git a/libavcodec/vvc/thread.h b/libavcodec/vvc/thread.h
index 7b15dbee59..b89aee3b32 100644
--- a/libavcodec/vvc/thread.h
+++ b/libavcodec/vvc/thread.h
@@ -25,8 +25,8 @@ 
 
 #include "dec.h"
 
-struct AVExecutor* ff_vvc_executor_alloc(VVCContext *s, int thread_count);
-void ff_vvc_executor_free(struct AVExecutor **e);
+struct FFExecutor* ff_vvc_executor_alloc(VVCContext *s, int thread_count);
+void ff_vvc_executor_free(struct FFExecutor **e);
 
 int ff_vvc_frame_thread_init(VVCFrameContext *fc);
 void ff_vvc_frame_thread_free(VVCFrameContext *fc);