From patchwork Thu Feb 28 19:35:52 2019 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Britt Cyr X-Patchwork-Id: 12172 Return-Path: X-Original-To: patchwork@ffaux-bg.ffmpeg.org Delivered-To: patchwork@ffaux-bg.ffmpeg.org Received: from ffbox0-bg.mplayerhq.hu (ffbox0-bg.ffmpeg.org [79.124.17.100]) by ffaux.localdomain (Postfix) with ESMTP id 9708E446C87 for ; Thu, 28 Feb 2019 21:36:05 +0200 (EET) Received: from [127.0.1.1] (localhost [127.0.0.1]) by ffbox0-bg.mplayerhq.hu (Postfix) with ESMTP id 7655F6805D6; Thu, 28 Feb 2019 21:36:05 +0200 (EET) X-Original-To: ffmpeg-devel@ffmpeg.org Delivered-To: ffmpeg-devel@ffmpeg.org Received: from mail-vk1-f202.google.com (mail-vk1-f202.google.com [209.85.221.202]) by ffbox0-bg.mplayerhq.hu (Postfix) with ESMTPS id BBECB6805D0 for ; Thu, 28 Feb 2019 21:35:58 +0200 (EET) Received: by mail-vk1-f202.google.com with SMTP id 202so10974817vkv.11 for ; Thu, 28 Feb 2019 11:35:58 -0800 (PST) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=google.com; s=20161025; h=date:in-reply-to:message-id:mime-version:references:subject:from:to :cc; bh=sDskWEcczOVp+IBB41sr7s8RSd4LVYf1PfMAiLwoH0I=; b=MZt5XWQYucU5Ge1ayL1IYYMIKoL0Tp8BYyC0JW8uFmdcY90fAcrGolikhrADUGA32D lBxQa+Mm5lJQFkM8vKBEIKSqRPUMje4F4iT5fJ/NxY62aOpcE8m+35HINhte2zAbFpE4 2Z3IcISRV9CMjMICJq+hUdvOm5oyAzO4ZBrF2QYMLYhMBoJeVJrl/FwxI3lKXTcNqRXS WaL47jIMBzFWKPvgg/yXDpZ2/0UJypZkAFbLvnB7jU05mijndnyg6D4WoHaLwIV8xH60 j0VQqqTcZDExkYi+hBrk+WikQw26iGfZLiIMMoAp12Qq4KMDMnI1Qx4e4Lx4U4rJfq56 JSUw== X-Google-DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=1e100.net; s=20161025; h=x-gm-message-state:date:in-reply-to:message-id:mime-version :references:subject:from:to:cc; bh=sDskWEcczOVp+IBB41sr7s8RSd4LVYf1PfMAiLwoH0I=; b=ILFc5CO6mMt7VpwSNTjCRWuAKY56GoLiLvyfmUcaAwTngNqsvRD/PVLtbtweCpbx5f 3oM95dKrF4RzK08L2BnNeWYSb9CrPXg0PxVOpIGqnReK9p1Ct0iB4cksiv2rzDXP12Cn mX3BeHN2fG6iCpXfPaAYHJ5h+z1EQPYChy59i4m0W04ptPa7th8DiV90OPKdV62VVXeO 4avND3wD+k0UHd2DhWVxyQ74o1KrnctxvGGWM8u3NErwOezuNM6Veis8Qh6P3L73vIsV KsfMcJjaZrZvGrR7h7dfVRnzi7BTaRxjQlyGD+Tkjr+0DJ5mRTmk8DXTclSTuSUgdX3S sAHA== X-Gm-Message-State: APjAAAVFG6awK+8oMxJxCeqsh/EO3m5DmZkdJ/dG/bIlqQf6EcZ3x1FM j7zSlDXrC9apepew1CCkHc6pDF16pS9pZFpj+ULG4DMAU5Uu8jsSTfupj90enNdPNYnFZt46w1J 2Kiyaah67KlvB9aettuusPlp75p4jeIqVZRVq97wZiRwPQCpsq8Jt7F0= X-Google-Smtp-Source: APXvYqxNyrsh053VD3FFscpb+bzKrijavp0jvf9HzvIib3j0Wncgrbcdv9pQJWWH3q9zduXizSXYMKQ= X-Received: by 2002:a67:e10c:: with SMTP id d12mr656832vsl.22.1551382556922; Thu, 28 Feb 2019 11:35:56 -0800 (PST) Date: Thu, 28 Feb 2019 14:35:52 -0500 In-Reply-To: <20190227152805.GO3501@michaelspb> Message-Id: <20190228193552.45519-1-cyr@google.com> Mime-Version: 1.0 References: <20190227152805.GO3501@michaelspb> X-Mailer: git-send-email 2.21.0.rc2.261.ga7da99ff1b-goog From: Britt Cyr To: ffmpeg-devel@ffmpeg.org Subject: [FFmpeg-devel] [PATCH] Parallelize vf_lut X-BeenThere: ffmpeg-devel@ffmpeg.org X-Mailman-Version: 2.1.20 Precedence: list List-Id: FFmpeg development discussions and patches List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , Reply-To: FFmpeg development discussions and patches Cc: Britt Cyr Errors-To: ffmpeg-devel-bounces@ffmpeg.org Sender: "ffmpeg-devel" This will use ff_filter_get_nb_threads(ctx) threads which was 4x faster for when I was testing on a 4K video --- libavfilter/vf_lut.c | 106 ++++++++++++++++++++++++++++--------------- 1 file changed, 70 insertions(+), 36 deletions(-) diff --git a/libavfilter/vf_lut.c b/libavfilter/vf_lut.c index c815ddc194..9e5527e4a1 100644 --- a/libavfilter/vf_lut.c +++ b/libavfilter/vf_lut.c @@ -72,6 +72,12 @@ typedef struct LutContext { int negate_alpha; /* only used by negate */ } LutContext; +typedef struct ThreadData { + AVFrame *in; + AVFrame *out; + AVFilterLink *link; +} ThreadData; + #define Y 0 #define U 1 #define V 2 @@ -337,26 +343,13 @@ static int config_props(AVFilterLink *inlink) return 0; } -static int filter_frame(AVFilterLink *inlink, AVFrame *in) -{ - AVFilterContext *ctx = inlink->dst; +static int lookup_slice(AVFilterContext *ctx, void *arg, int jobnr, int nb_jobs) { LutContext *s = ctx->priv; - AVFilterLink *outlink = ctx->outputs[0]; - AVFrame *out; - int i, j, plane, direct = 0; - - if (av_frame_is_writable(in)) { - direct = 1; - out = in; - } else { - out = ff_get_video_buffer(outlink, outlink->w, outlink->h); - if (!out) { - av_frame_free(&in); - return AVERROR(ENOMEM); - } - av_frame_copy_props(out, in); - } - + int i, j, plane = 0; + const ThreadData *td = arg; + const AVFrame *in = td->in; + AVFrame *out = td->out; + const AVFilterLink *inlink = td->link; if (s->is_rgb && s->is_16bit && !s->is_planar) { /* packed, 16-bit */ uint16_t *inrow, *outrow, *inrow0, *outrow0; @@ -366,11 +359,13 @@ static int filter_frame(AVFilterLink *inlink, AVFrame *in) const int in_linesize = in->linesize[0] / 2; const int out_linesize = out->linesize[0] / 2; const int step = s->step; + const int row_min = jobnr / nb_jobs * h; + const int row_max = (jobnr + 1) / nb_jobs * h; inrow0 = (uint16_t*) in ->data[0]; outrow0 = (uint16_t*) out->data[0]; - for (i = 0; i < h; i ++) { + for (i = row_min; i < row_max; i ++) { inrow = inrow0; outrow = outrow0; for (j = 0; j < w; j++) { @@ -403,11 +398,13 @@ static int filter_frame(AVFilterLink *inlink, AVFrame *in) const int in_linesize = in->linesize[0]; const int out_linesize = out->linesize[0]; const int step = s->step; + const int row_min = jobnr / nb_jobs * h; + const int row_max = (jobnr + 1) / nb_jobs * h; inrow0 = in ->data[0]; outrow0 = out->data[0]; - for (i = 0; i < h; i ++) { + for (i = row_min; i < row_max; i ++) { inrow = inrow0; outrow = outrow0; for (j = 0; j < w; j++) { @@ -435,11 +432,13 @@ static int filter_frame(AVFilterLink *inlink, AVFrame *in) const uint16_t *tab = s->lut[plane]; const int in_linesize = in->linesize[plane] / 2; const int out_linesize = out->linesize[plane] / 2; + const int row_min = jobnr / nb_jobs * h; + const int row_max = (jobnr + 1) / nb_jobs * h; inrow = (uint16_t *)in ->data[plane]; outrow = (uint16_t *)out->data[plane]; - for (i = 0; i < h; i++) { + for (i = row_min; i < row_max; i++) { for (j = 0; j < w; j++) { #if HAVE_BIGENDIAN outrow[j] = av_bswap16(tab[av_bswap16(inrow[j])]); @@ -463,11 +462,13 @@ static int filter_frame(AVFilterLink *inlink, AVFrame *in) const uint16_t *tab = s->lut[plane]; const int in_linesize = in->linesize[plane]; const int out_linesize = out->linesize[plane]; + const int row_min = jobnr / nb_jobs * h; + const int row_max = (jobnr + 1) / nb_jobs * h; inrow = in ->data[plane]; outrow = out->data[plane]; - for (i = 0; i < h; i++) { + for (i = row_min; i < row_max; i++) { for (j = 0; j < w; j++) outrow[j] = tab[inrow[j]]; inrow += in_linesize; @@ -476,9 +477,42 @@ static int filter_frame(AVFilterLink *inlink, AVFrame *in) } } - if (!direct) + return 0; +} + +static AVFrame *apply_lut(AVFilterLink *inlink, AVFrame *in) { + AVFilterContext *ctx = inlink->dst; + AVFilterLink *outlink = ctx->outputs[0]; + AVFrame *out; + ThreadData td; + + if (av_frame_is_writable(in)) { + out = in; + } else { + out = ff_get_video_buffer(outlink, outlink->w, outlink->h); + if (!out) { + av_frame_free(&in); + return NULL; + } + av_frame_copy_props(out, in); + } + td.in = in; + td.out = out; + td.link = inlink; + ctx->internal->execute(ctx, lookup_slice, &td, NULL, FFMIN(outlink->h, ff_filter_get_nb_threads(ctx))); + + if (out != in) av_frame_free(&in); + return out; +} + +static int filter_frame(AVFilterLink *inlink, AVFrame *in) +{ + AVFilterLink *outlink = inlink->dst->outputs[0]; + AVFrame *out = apply_lut(inlink, in); + if (!out) + return AVERROR(ENOMEM); return ff_filter_frame(outlink, out); } @@ -497,18 +531,18 @@ static const AVFilterPad outputs[] = { { NULL } }; -#define DEFINE_LUT_FILTER(name_, description_) \ - AVFilter ff_vf_##name_ = { \ - .name = #name_, \ - .description = NULL_IF_CONFIG_SMALL(description_), \ - .priv_size = sizeof(LutContext), \ - .priv_class = &name_ ## _class, \ - .init = name_##_init, \ - .uninit = uninit, \ - .query_formats = query_formats, \ - .inputs = inputs, \ - .outputs = outputs, \ - .flags = AVFILTER_FLAG_SUPPORT_TIMELINE_GENERIC, \ +#define DEFINE_LUT_FILTER(name_, description_) \ + AVFilter ff_vf_##name_ = { \ + .name = #name_, \ + .description = NULL_IF_CONFIG_SMALL(description_), \ + .priv_size = sizeof(LutContext), \ + .priv_class = &name_ ## _class, \ + .init = name_##_init, \ + .uninit = uninit, \ + .query_formats = query_formats, \ + .inputs = inputs, \ + .outputs = outputs, \ + .flags = AVFILTER_FLAG_SUPPORT_TIMELINE_GENERIC | AVFILTER_FLAG_SLICE_THREADS, \ } #if CONFIG_LUT_FILTER