From patchwork Sat May 25 02:36:14 2019 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Jun Zhao X-Patchwork-Id: 13287 Return-Path: X-Original-To: patchwork@ffaux-bg.ffmpeg.org Delivered-To: patchwork@ffaux-bg.ffmpeg.org Received: from ffbox0-bg.mplayerhq.hu (ffbox0-bg.ffmpeg.org [79.124.17.100]) by ffaux.localdomain (Postfix) with ESMTP id 9E3924495EB for ; Sat, 25 May 2019 05:36:31 +0300 (EEST) Received: from [127.0.1.1] (localhost [127.0.0.1]) by ffbox0-bg.mplayerhq.hu (Postfix) with ESMTP id 88FB768A375; Sat, 25 May 2019 05:36:31 +0300 (EEST) X-Original-To: ffmpeg-devel@ffmpeg.org Delivered-To: ffmpeg-devel@ffmpeg.org Received: from mail-pg1-f195.google.com (mail-pg1-f195.google.com [209.85.215.195]) by ffbox0-bg.mplayerhq.hu (Postfix) with ESMTPS id 89048689990 for ; Sat, 25 May 2019 05:36:25 +0300 (EEST) Received: by mail-pg1-f195.google.com with SMTP id h2so2945972pgg.1 for ; Fri, 24 May 2019 19:36:25 -0700 (PDT) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=gmail.com; s=20161025; h=from:to:cc:subject:date:message-id:in-reply-to:references; bh=1ANzJM5l/vZspdufPugFuQ1M5mfyb7r65yAkkAkyN9M=; b=q0V7gXmclTs/zCnCgFFzh9SXm8Q8X1FgBHFpjAjBs5nYUBqKR5zzdMH/zHotqZH/EE K94a6rxpsLJkqzUN7SF8QKFm1YJ1VbVPPOEZeAw7GGAiMV24C2NwY6Qd6Yj8afNmpkJy iYwHXhiwirp5/07Kh/TT/TuGtmiD1P3/0FGf8Z8Dr+c+g+9ALrh73FFUrDcH81SnC0ma N5JYdnSMA7xMsC1nUi92nm/EsmAQSeGrwHfhcXsXl3yK1OreyaTBljNzU3GmWK89xSNg QLXL1vNopsz6BKOOjnaArOTTU8J8QvnpHjOFsqTfAXbLMHB/gIcfAzK7GtoaKm1bmOh2 OTNA== X-Google-DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=1e100.net; s=20161025; h=x-gm-message-state:from:to:cc:subject:date:message-id:in-reply-to :references; bh=1ANzJM5l/vZspdufPugFuQ1M5mfyb7r65yAkkAkyN9M=; b=jmMzUi6gvS3ybcV1TgFi3JjfEPWqKY5pWOroKHkZcDcEBmkO8CVMPN1Ww+/6dLfmhn yryctUMvQZ5u8K3VhBcfPgK702+p5GUX7FfwiHWdOwrsTJ18RWS4opOW2Q0C6ECihsZq YPjEH99FZxdAl3YHIidXALS8waMGe6AU1DnPLZRSdBelhT6wBteFcYeJdYfPP9Tb06xS C6TScK9VVy4iGs+nmcnSl4V4dP3eBirES+BGDOUD4H/qxD8Ah8HmGNA//nyUgTXqpqaH 8Qg+OtCt4/w5N3mRjHkPxA30A1OUG4hXKy6Jy8V1mtDVAIC/bcRtNOi6j2zyPUzK8bHj sX1g== X-Gm-Message-State: APjAAAV+zyoso0Y+D9JhaRvpa6r2owYhaI9/MP+IbQq0IJxan6W5yO8D o4HWhZHkhIYNjFKy3bkDNdjKNPD8 X-Google-Smtp-Source: APXvYqyiFciiuEsvXI8mCgOBWjbTbr91Tc5KlxzScQTRFaMa75zOa0OA+wT/ohJkrF4vBCdISEmUpQ== X-Received: by 2002:a65:628d:: with SMTP id f13mr8023596pgv.177.1558751783892; Fri, 24 May 2019 19:36:23 -0700 (PDT) Received: from localhost.localdomain ([47.90.47.25]) by smtp.gmail.com with ESMTPSA id s2sm3962457pfe.105.2019.05.24.19.36.22 (version=TLS1_2 cipher=ECDHE-RSA-AES128-GCM-SHA256 bits=128/128); Fri, 24 May 2019 19:36:23 -0700 (PDT) From: Jun Zhao To: ffmpeg-devel@ffmpeg.org Date: Sat, 25 May 2019 10:36:14 +0800 Message-Id: <1558751774-4204-2-git-send-email-mypopydev@gmail.com> X-Mailer: git-send-email 1.7.1 In-Reply-To: <1558751774-4204-1-git-send-email-mypopydev@gmail.com> References: <1558751774-4204-1-git-send-email-mypopydev@gmail.com> Subject: [FFmpeg-devel] [PATCH V4] lavfi/colorlevels: Add slice threading support X-BeenThere: ffmpeg-devel@ffmpeg.org X-Mailman-Version: 2.1.20 Precedence: list List-Id: FFmpeg development discussions and patches List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , Reply-To: FFmpeg development discussions and patches Cc: Jun Zhao MIME-Version: 1.0 Errors-To: ffmpeg-devel-bounces@ffmpeg.org Sender: "ffmpeg-devel" From: Jun Zhao Add slice threading support, use the command like: ./ffmpeg -i input -vf colorlevels -f null /dev/null with 1080p h264 clip, the fps from 39 fps to 79 fps in the local(Intel(R) Core(TM) i5-8265U CPU @ 1.60GHz) Signed-off-by: Jun Zhao --- libavfilter/vf_colorlevels.c | 121 +++++++++++++++++++++++++++++++++++------- 1 files changed, 102 insertions(+), 19 deletions(-) diff --git a/libavfilter/vf_colorlevels.c b/libavfilter/vf_colorlevels.c index 5385a5e..4e47089 100644 --- a/libavfilter/vf_colorlevels.c +++ b/libavfilter/vf_colorlevels.c @@ -105,6 +105,79 @@ static int config_input(AVFilterLink *inlink) return 0; } +struct thread_data { + const uint8_t *srcrow; + uint8_t *dstrow; + int dst_linesize; + int src_linesize; + + double coeff; + uint8_t offset; + + int h; + + int imin; + int omin; +}; + +static int colorlevel_slice_8(AVFilterContext *ctx, void *arg, int jobnr, int nb_jobs) +{ + ColorLevelsContext *s = ctx->priv; + const struct thread_data *td = arg; + + int process_h = td->h; + const int slice_start = (process_h * jobnr ) / nb_jobs; + const int slice_end = (process_h * (jobnr+1)) / nb_jobs; + int x, y; + const uint8_t *srcrow = td->srcrow; + uint8_t *dstrow = td->dstrow; + const int step = s->step; + const uint8_t offset = td->offset; + + int imin = td->imin; + int omin = td->omin; + double coeff = td->coeff; + + for (y = slice_start; y < slice_end; y++) { + const uint8_t *src = srcrow + y * td->src_linesize; + uint8_t *dst = dstrow + y * td->dst_linesize; + + for (x = 0; x < s->linesize; x += step) + dst[x + offset] = av_clip_uint8((src[x + offset] - imin) * coeff + omin); + } + + return 0; +} + +static int colorlevel_slice_16(AVFilterContext *ctx, void *arg, int jobnr, int nb_jobs) +{ + ColorLevelsContext *s = ctx->priv; + const struct thread_data *td = arg; + + int process_h = td->h; + const int slice_start = (process_h * jobnr ) / nb_jobs; + const int slice_end = (process_h * (jobnr+1)) / nb_jobs; + int x, y; + const uint8_t *srcrow = td->srcrow; + uint8_t *dstrow = td->dstrow; + const int step = s->step; + const uint8_t offset = td->offset; + + int imin = td->imin; + int omin = td->omin; + double coeff = td->coeff; + + for (y = slice_start; y < slice_end; y++) { + const uint16_t *src = (const uint16_t *)(srcrow + y * td->src_linesize); + uint16_t *dst = (uint16_t *)(dstrow + y * td->dst_linesize); + + for (x = 0; x < s->linesize; x += step) + dst[x + offset] = av_clip_uint16((src[x + offset] - imin) * coeff + omin); + } + + return 0; +} + static int filter_frame(AVFilterLink *inlink, AVFrame *in) { AVFilterContext *ctx = inlink->dst; @@ -137,6 +210,7 @@ static int filter_frame(AVFilterLink *inlink, AVFrame *in) int omin = lrint(r->out_min * UINT8_MAX); int omax = lrint(r->out_max * UINT8_MAX); double coeff; + struct thread_data td; if (imin < 0) { imin = UINT8_MAX; @@ -162,15 +236,19 @@ static int filter_frame(AVFilterLink *inlink, AVFrame *in) srcrow = in->data[0]; coeff = (omax - omin) / (double)(imax - imin); - for (y = 0; y < inlink->h; y++) { - const uint8_t *src = srcrow; - uint8_t *dst = dstrow; - - for (x = 0; x < s->linesize; x += step) - dst[x + offset] = av_clip_uint8((src[x + offset] - imin) * coeff + omin); - dstrow += out->linesize[0]; - srcrow += in->linesize[0]; - } + + td.srcrow = srcrow; + td.dstrow = dstrow; + td.dst_linesize = out->linesize[0]; + td.src_linesize = in->linesize[0]; + td.coeff = coeff; + td.offset = offset; + td.h = inlink->h; + td.imin = imin; + td.omin = omin; + + ctx->internal->execute(ctx, colorlevel_slice_8, &td, NULL, + FFMIN(inlink->h, ff_filter_get_nb_threads(ctx))); } break; case 2: @@ -184,6 +262,7 @@ static int filter_frame(AVFilterLink *inlink, AVFrame *in) int omin = lrint(r->out_min * UINT16_MAX); int omax = lrint(r->out_max * UINT16_MAX); double coeff; + struct thread_data td; if (imin < 0) { imin = UINT16_MAX; @@ -209,15 +288,19 @@ static int filter_frame(AVFilterLink *inlink, AVFrame *in) srcrow = in->data[0]; coeff = (omax - omin) / (double)(imax - imin); - for (y = 0; y < inlink->h; y++) { - const uint16_t *src = (const uint16_t*)srcrow; - uint16_t *dst = (uint16_t *)dstrow; - - for (x = 0; x < s->linesize; x += step) - dst[x + offset] = av_clip_uint16((src[x + offset] - imin) * coeff + omin); - dstrow += out->linesize[0]; - srcrow += in->linesize[0]; - } + + td.srcrow = srcrow; + td.dstrow = dstrow; + td.dst_linesize = out->linesize[0]; + td.src_linesize = in->linesize[0]; + td.coeff = coeff; + td.offset = offset; + td.h = inlink->h; + td.imin = imin; + td.omin = omin; + + ctx->internal->execute(ctx, colorlevel_slice_16, &td, NULL, + FFMIN(inlink->h, ff_filter_get_nb_threads(ctx))); } } @@ -252,5 +335,5 @@ AVFilter ff_vf_colorlevels = { .query_formats = query_formats, .inputs = colorlevels_inputs, .outputs = colorlevels_outputs, - .flags = AVFILTER_FLAG_SUPPORT_TIMELINE_GENERIC, + .flags = AVFILTER_FLAG_SUPPORT_TIMELINE_GENERIC | AVFILTER_FLAG_SLICE_THREADS, };