[FFmpeg-devel] Parallelize vf_lut

Submitted by Britt Cyr on Feb. 28, 2019, 7:35 p.m.

Details

Message ID 20190228193552.45519-1-cyr@google.com
State New
Headers show

Commit Message

Britt Cyr Feb. 28, 2019, 7:35 p.m.
This will use ff_filter_get_nb_threads(ctx) threads which was 4x
faster for when I was testing on a 4K video
---
 libavfilter/vf_lut.c | 106 ++++++++++++++++++++++++++++---------------
 1 file changed, 70 insertions(+), 36 deletions(-)

Patch hide | download patch | download mbox

diff --git a/libavfilter/vf_lut.c b/libavfilter/vf_lut.c
index c815ddc194..9e5527e4a1 100644
--- a/libavfilter/vf_lut.c
+++ b/libavfilter/vf_lut.c
@@ -72,6 +72,12 @@  typedef struct LutContext {
     int negate_alpha; /* only used by negate */
 } LutContext;
 
+typedef struct ThreadData {
+    AVFrame *in;
+    AVFrame *out;
+    AVFilterLink *link;
+} ThreadData;
+
 #define Y 0
 #define U 1
 #define V 2
@@ -337,26 +343,13 @@  static int config_props(AVFilterLink *inlink)
     return 0;
 }
 
-static int filter_frame(AVFilterLink *inlink, AVFrame *in)
-{
-    AVFilterContext *ctx = inlink->dst;
+static int lookup_slice(AVFilterContext *ctx, void *arg, int jobnr, int nb_jobs) {
     LutContext *s = ctx->priv;
-    AVFilterLink *outlink = ctx->outputs[0];
-    AVFrame *out;
-    int i, j, plane, direct = 0;
-
-    if (av_frame_is_writable(in)) {
-        direct = 1;
-        out = in;
-    } else {
-        out = ff_get_video_buffer(outlink, outlink->w, outlink->h);
-        if (!out) {
-            av_frame_free(&in);
-            return AVERROR(ENOMEM);
-        }
-        av_frame_copy_props(out, in);
-    }
-
+    int i, j, plane = 0;
+    const ThreadData *td = arg;
+    const AVFrame *in  = td->in;
+    AVFrame *out = td->out;
+    const AVFilterLink *inlink = td->link;
     if (s->is_rgb && s->is_16bit && !s->is_planar) {
         /* packed, 16-bit */
         uint16_t *inrow, *outrow, *inrow0, *outrow0;
@@ -366,11 +359,13 @@  static int filter_frame(AVFilterLink *inlink, AVFrame *in)
         const int in_linesize  =  in->linesize[0] / 2;
         const int out_linesize = out->linesize[0] / 2;
         const int step = s->step;
+        const int row_min = jobnr / nb_jobs * h;
+        const int row_max = (jobnr + 1) / nb_jobs * h;
 
         inrow0  = (uint16_t*) in ->data[0];
         outrow0 = (uint16_t*) out->data[0];
 
-        for (i = 0; i < h; i ++) {
+        for (i = row_min; i < row_max; i ++) {
             inrow  = inrow0;
             outrow = outrow0;
             for (j = 0; j < w; j++) {
@@ -403,11 +398,13 @@  static int filter_frame(AVFilterLink *inlink, AVFrame *in)
         const int in_linesize  =  in->linesize[0];
         const int out_linesize = out->linesize[0];
         const int step = s->step;
+        const int row_min = jobnr / nb_jobs * h;
+        const int row_max = (jobnr + 1) / nb_jobs * h;
 
         inrow0  = in ->data[0];
         outrow0 = out->data[0];
 
-        for (i = 0; i < h; i ++) {
+        for (i = row_min; i < row_max; i ++) {
             inrow  = inrow0;
             outrow = outrow0;
             for (j = 0; j < w; j++) {
@@ -435,11 +432,13 @@  static int filter_frame(AVFilterLink *inlink, AVFrame *in)
             const uint16_t *tab = s->lut[plane];
             const int in_linesize  =  in->linesize[plane] / 2;
             const int out_linesize = out->linesize[plane] / 2;
+            const int row_min = jobnr / nb_jobs * h;
+            const int row_max = (jobnr + 1) / nb_jobs * h;
 
             inrow  = (uint16_t *)in ->data[plane];
             outrow = (uint16_t *)out->data[plane];
 
-            for (i = 0; i < h; i++) {
+            for (i = row_min; i < row_max; i++) {
                 for (j = 0; j < w; j++) {
 #if HAVE_BIGENDIAN
                     outrow[j] = av_bswap16(tab[av_bswap16(inrow[j])]);
@@ -463,11 +462,13 @@  static int filter_frame(AVFilterLink *inlink, AVFrame *in)
             const uint16_t *tab = s->lut[plane];
             const int in_linesize  =  in->linesize[plane];
             const int out_linesize = out->linesize[plane];
+            const int row_min = jobnr / nb_jobs * h;
+            const int row_max = (jobnr + 1) / nb_jobs * h;
 
             inrow  = in ->data[plane];
             outrow = out->data[plane];
 
-            for (i = 0; i < h; i++) {
+            for (i = row_min; i < row_max; i++) {
                 for (j = 0; j < w; j++)
                     outrow[j] = tab[inrow[j]];
                 inrow  += in_linesize;
@@ -476,9 +477,42 @@  static int filter_frame(AVFilterLink *inlink, AVFrame *in)
         }
     }
 
-    if (!direct)
+    return 0;
+}
+
+static AVFrame *apply_lut(AVFilterLink *inlink, AVFrame *in) {
+    AVFilterContext *ctx = inlink->dst;
+    AVFilterLink *outlink = ctx->outputs[0];
+    AVFrame *out;
+    ThreadData td;
+
+    if (av_frame_is_writable(in)) {
+        out = in;
+    } else {
+        out = ff_get_video_buffer(outlink, outlink->w, outlink->h);
+        if (!out) {
+            av_frame_free(&in);
+            return NULL;
+        }
+        av_frame_copy_props(out, in);
+    }
+    td.in  = in;
+    td.out = out;
+    td.link = inlink;
+    ctx->internal->execute(ctx, lookup_slice, &td, NULL, FFMIN(outlink->h, ff_filter_get_nb_threads(ctx)));
+
+    if (out != in)
         av_frame_free(&in);
 
+    return out;
+}
+
+static int filter_frame(AVFilterLink *inlink, AVFrame *in)
+{
+    AVFilterLink *outlink = inlink->dst->outputs[0];
+    AVFrame *out = apply_lut(inlink, in);
+    if (!out)
+        return AVERROR(ENOMEM);
     return ff_filter_frame(outlink, out);
 }
 
@@ -497,18 +531,18 @@  static const AVFilterPad outputs[] = {
     { NULL }
 };
 
-#define DEFINE_LUT_FILTER(name_, description_)                          \
-    AVFilter ff_vf_##name_ = {                                          \
-        .name          = #name_,                                        \
-        .description   = NULL_IF_CONFIG_SMALL(description_),            \
-        .priv_size     = sizeof(LutContext),                            \
-        .priv_class    = &name_ ## _class,                              \
-        .init          = name_##_init,                                  \
-        .uninit        = uninit,                                        \
-        .query_formats = query_formats,                                 \
-        .inputs        = inputs,                                        \
-        .outputs       = outputs,                                       \
-        .flags         = AVFILTER_FLAG_SUPPORT_TIMELINE_GENERIC,        \
+#define DEFINE_LUT_FILTER(name_, description_)                                                  \
+    AVFilter ff_vf_##name_ = {                                                                  \
+        .name          = #name_,                                                                \
+        .description   = NULL_IF_CONFIG_SMALL(description_),                                    \
+        .priv_size     = sizeof(LutContext),                                                    \
+        .priv_class    = &name_ ## _class,                                                      \
+        .init          = name_##_init,                                                          \
+        .uninit        = uninit,                                                                \
+        .query_formats = query_formats,                                                         \
+        .inputs        = inputs,                                                                \
+        .outputs       = outputs,                                                               \
+        .flags         = AVFILTER_FLAG_SUPPORT_TIMELINE_GENERIC |  AVFILTER_FLAG_SLICE_THREADS, \
     }
 
 #if CONFIG_LUT_FILTER