From patchwork Tue Feb 16 22:10:39 2021 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Paul B Mahol X-Patchwork-Id: 25667 Return-Path: X-Original-To: patchwork@ffaux-bg.ffmpeg.org Delivered-To: patchwork@ffaux-bg.ffmpeg.org Received: from ffbox0-bg.mplayerhq.hu (ffbox0-bg.ffmpeg.org [79.124.17.100]) by ffaux.localdomain (Postfix) with ESMTP id 517CB4490F3 for ; Wed, 17 Feb 2021 00:10:57 +0200 (EET) Received: from [127.0.1.1] (localhost [127.0.0.1]) by ffbox0-bg.mplayerhq.hu (Postfix) with ESMTP id 31A6D689D7A; Wed, 17 Feb 2021 00:10:57 +0200 (EET) X-Original-To: ffmpeg-devel@ffmpeg.org Delivered-To: ffmpeg-devel@ffmpeg.org Received: from mail-ej1-f54.google.com (mail-ej1-f54.google.com [209.85.218.54]) by ffbox0-bg.mplayerhq.hu (Postfix) with ESMTPS id AADC36880C4 for ; Wed, 17 Feb 2021 00:10:48 +0200 (EET) Received: by mail-ej1-f54.google.com with SMTP id lu16so4199471ejb.9 for ; Tue, 16 Feb 2021 14:10:48 -0800 (PST) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=gmail.com; s=20161025; h=from:to:subject:date:message-id; bh=wiHH3UgD18n05jdm7IA8ox23Ff8K2DOazeIH/6/NPbo=; b=NusKVArnolyM8/WLrm56P5RjiRY+lqE8w31+CEfPFcyrIircl829+v3yGb+hfozkjl NQGmJxW/I5vwREA4nRRB8s5yV+KpDzqmfXCpSeYOuXAZgosXigLEDAq0JdDjskrqbCW0 eQQXfEWZ54pICPtBhzI3jFfpynzAZzV+jWqajYvXxyOdWtGjdQqqhZyqXJ2QU6IO13MQ WYheaN9iWSp35lqVlxPdCTCnkzqS94FnS2DkYCXd3c5PNacAZ2Y5416gvtYvYlDHmvU0 NAkCUfx+gfaVb+gdYnFWnfU6wxhv85L8AqRhx9ww1OWzXgrdZ1Xj0vHaK4ZAZnfkVzqx Crdg== X-Google-DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=1e100.net; s=20161025; h=x-gm-message-state:from:to:subject:date:message-id; bh=wiHH3UgD18n05jdm7IA8ox23Ff8K2DOazeIH/6/NPbo=; b=e7R4n/Kf6fGJgojlJsv2dn4a73WgmKmn5oPvbw4yllJBeYeGdwevV+3l6B14A/Roql tS91sV/GjuvBv71QMbrHs5GHkl2u+RDQxrf5ipZlGyC4qVYeHZiS75VRECLtjs79GYNM TOv0armC9m0MJy2yCE9DFeM4KSWwnKX3l3KFn3cibOAiI2C5kadXXuzRoRahFs+bowKV 74AQHFvFZXL+VdNS0Y89LMwRu8zVIXIszcexpMH3KtSZTYBtb7FSb36yF3476tqyj62K 8AlZd4l0zcVTRg7nQ8PVxPl/RYpyJV0dhuiWFEDzldMg5NBQDvD7xsClkSTOn5CY1/K3 kPVA== X-Gm-Message-State: AOAM533vcMt8kJqQmm5jA1MSsyXfouyg6MUdU3o5NO4ay9aQLWa74DBN e4n990RoXWjME6ncjL3m2PIypSYh+P+7mQ== X-Google-Smtp-Source: ABdhPJwcO6KTMMd3Pv7BQ/JasOPliiKE6gEuGFQCPWOBEe+pFqbD7rnAQohF+NuTizQwf7VtwRYwoA== X-Received: by 2002:a17:906:7e42:: with SMTP id z2mr22625914ejr.177.1613513447905; Tue, 16 Feb 2021 14:10:47 -0800 (PST) Received: from localhost.localdomain ([94.250.162.225]) by smtp.gmail.com with ESMTPSA id q20sm88593ejs.17.2021.02.16.14.10.46 for (version=TLS1_3 cipher=TLS_AES_256_GCM_SHA384 bits=256/256); Tue, 16 Feb 2021 14:10:47 -0800 (PST) From: Paul B Mahol To: ffmpeg-devel@ffmpeg.org Date: Tue, 16 Feb 2021 23:10:39 +0100 Message-Id: <20210216221039.9806-1-onemda@gmail.com> X-Mailer: git-send-email 2.17.1 Subject: [FFmpeg-devel] [PATCH] avfilter/vf_ssim: add slice threading X-BeenThere: ffmpeg-devel@ffmpeg.org X-Mailman-Version: 2.1.20 Precedence: list List-Id: FFmpeg development discussions and patches List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , Reply-To: FFmpeg development discussions and patches MIME-Version: 1.0 Errors-To: ffmpeg-devel-bounces@ffmpeg.org Sender: "ffmpeg-devel" Signed-off-by: Paul B Mahol --- libavfilter/vf_ssim.c | 196 ++++++++++++++++++++++++++++++------------ 1 file changed, 141 insertions(+), 55 deletions(-) diff --git a/libavfilter/vf_ssim.c b/libavfilter/vf_ssim.c index 4efc807f37..dc0af79dbf 100644 --- a/libavfilter/vf_ssim.c +++ b/libavfilter/vf_ssim.c @@ -51,6 +51,7 @@ typedef struct SSIMContext { FILE *stats_file; char *stats_file_str; int nb_components; + int nb_threads; int max; uint64_t nb_frames; double ssim[4], ssim_total; @@ -59,13 +60,11 @@ typedef struct SSIMContext { uint8_t rgba_map[4]; int planewidth[4]; int planeheight[4]; - int *temp; + int **temp; int is_rgb; - double (*ssim_plane)(SSIMDSPContext *dsp, - uint8_t *main, int main_stride, - uint8_t *ref, int ref_stride, - int width, int height, void *temp, - int max); + double **score; + int (*ssim_plane)(AVFilterContext *ctx, void *arg, + int jobnr, int nb_jobs); SSIMDSPContext dsp; } SSIMContext; @@ -221,60 +220,104 @@ static double ssim_endn_8bit(const int (*sum0)[4], const int (*sum1)[4], int wid #define SUM_LEN(w) (((w) >> 2) + 3) -static double ssim_plane_16bit(SSIMDSPContext *dsp, - uint8_t *main, int main_stride, - uint8_t *ref, int ref_stride, - int width, int height, void *temp, - int max) +typedef struct ThreadData { + const uint8_t *main_data[4]; + const uint8_t *ref_data[4]; + int main_linesize[4]; + int ref_linesize[4]; + int planewidth[4]; + int planeheight[4]; + double **score; + int **temp; + int nb_components; + int max; + SSIMDSPContext *dsp; +} ThreadData; + +static int ssim_plane_16bit(AVFilterContext *ctx, void *arg, + int jobnr, int nb_jobs) { - int z = 0, y; - double ssim = 0.0; - int64_t (*sum0)[4] = temp; - int64_t (*sum1)[4] = sum0 + SUM_LEN(width); - - width >>= 2; - height >>= 2; - - for (y = 1; y < height; y++) { - for (; z <= y; z++) { - FFSWAP(void*, sum0, sum1); - ssim_4x4xn_16bit(&main[4 * z * main_stride], main_stride, - &ref[4 * z * ref_stride], ref_stride, - sum0, width); + ThreadData *td = arg; + double *score = td->score[jobnr]; + void *temp = td->temp[jobnr]; + const int max = td->max; + + for (int c = 0; c < td->nb_components; c++) { + const uint8_t *main = td->main_data[c]; + const uint8_t *ref = td->ref_data[c]; + const int main_stride = td->main_linesize[c]; + const int ref_stride = td->ref_linesize[c]; + int width = td->planewidth[c]; + int height = td->planeheight[c]; + const int slice_start = (((height + 3) >> 2) * jobnr) / nb_jobs; + const int slice_end = (((height + 3) >> 2) * (jobnr+1)) / nb_jobs; + const int ystart = FFMAX(1, slice_start); + int z = ystart - 1; + double ssim = 0.0; + int64_t (*sum0)[4] = temp; + int64_t (*sum1)[4] = sum0 + SUM_LEN(width); + + width >>= 2; + height >>= 2; + + for (int y = ystart; y < slice_end; y++) { + for (; z <= y; z++) { + FFSWAP(void*, sum0, sum1); + ssim_4x4xn_16bit(&main[4 * z * main_stride], main_stride, + &ref[4 * z * ref_stride], ref_stride, + sum0, width); + } + + ssim += ssim_endn_16bit((const int64_t (*)[4])sum0, (const int64_t (*)[4])sum1, width - 1, max); } - ssim += ssim_endn_16bit((const int64_t (*)[4])sum0, (const int64_t (*)[4])sum1, width - 1, max); + score[c] = ssim / ((height - 1) * (width - 1)); } - return ssim / ((height - 1) * (width - 1)); + return 0; } -static double ssim_plane(SSIMDSPContext *dsp, - uint8_t *main, int main_stride, - uint8_t *ref, int ref_stride, - int width, int height, void *temp, - int max) +static int ssim_plane(AVFilterContext *ctx, void *arg, + int jobnr, int nb_jobs) { - int z = 0, y; - double ssim = 0.0; - int (*sum0)[4] = temp; - int (*sum1)[4] = sum0 + SUM_LEN(width); - - width >>= 2; - height >>= 2; - - for (y = 1; y < height; y++) { - for (; z <= y; z++) { - FFSWAP(void*, sum0, sum1); - dsp->ssim_4x4_line(&main[4 * z * main_stride], main_stride, - &ref[4 * z * ref_stride], ref_stride, - sum0, width); + ThreadData *td = arg; + double *score = td->score[jobnr]; + void *temp = td->temp[jobnr]; + SSIMDSPContext *dsp = td->dsp; + + for (int c = 0; c < td->nb_components; c++) { + const uint8_t *main = td->main_data[c]; + const uint8_t *ref = td->ref_data[c]; + const int main_stride = td->main_linesize[c]; + const int ref_stride = td->ref_linesize[c]; + int width = td->planewidth[c]; + int height = td->planeheight[c]; + const int slice_start = (((height + 3) >> 2) * jobnr) / nb_jobs; + const int slice_end = (((height + 3) >> 2) * (jobnr+1)) / nb_jobs; + const int ystart = FFMAX(1, slice_start); + int z = ystart - 1; + double ssim = 0.0; + int (*sum0)[4] = temp; + int (*sum1)[4] = sum0 + SUM_LEN(width); + + width >>= 2; + height >>= 2; + + for (int y = ystart; y < slice_end; y++) { + for (; z <= y; z++) { + FFSWAP(void*, sum0, sum1); + dsp->ssim_4x4_line(&main[4 * z * main_stride], main_stride, + &ref[4 * z * ref_stride], ref_stride, + sum0, width); + } + + ssim += dsp->ssim_end_line((const int (*)[4])sum0, (const int (*)[4])sum1, width - 1); } - ssim += dsp->ssim_end_line((const int (*)[4])sum0, (const int (*)[4])sum1, width - 1); + score[c] = ssim / ((height - 1) * (width - 1)); } - return ssim / ((height - 1) * (width - 1)); + return 0; } static double ssim_db(double ssim, double weight) @@ -288,7 +331,8 @@ static int do_ssim(FFFrameSync *fs) SSIMContext *s = ctx->priv; AVFrame *master, *ref; AVDictionary **metadata; - double c[4] = { 0 }, ssimv = 0.0; + double c[4] = {0}, ssimv = 0.0; + ThreadData td; int ret, i; ret = ff_framesync_dualinput_get(fs, &master, &ref); @@ -300,14 +344,33 @@ static int do_ssim(FFFrameSync *fs) s->nb_frames++; + td.nb_components = s->nb_components; + td.dsp = &s->dsp; + td.score = s->score; + td.temp = s->temp; + td.max = s->max; + + for (int n = 0; n < s->nb_components; n++) { + td.main_data[n] = master->data[n]; + td.ref_data[n] = ref->data[n]; + td.main_linesize[n] = master->linesize[n]; + td.ref_linesize[n] = ref->linesize[n]; + td.planewidth[n] = s->planewidth[n]; + td.planeheight[n] = s->planeheight[n]; + } + + ctx->internal->execute(ctx, s->ssim_plane, &td, NULL, FFMIN((s->planeheight[1] + 3) >> 2, s->nb_threads)); + + for (i = 0; i < s->nb_components; i++) { + for (int j = 0; j < s->nb_threads; j++) + c[i] += s->score[j][i]; + } + for (i = 0; i < s->nb_components; i++) { - c[i] = s->ssim_plane(&s->dsp, master->data[i], master->linesize[i], - ref->data[i], ref->linesize[i], - s->planewidth[i], s->planeheight[i], s->temp, - s->max); ssimv += s->coefs[i] * c[i]; s->ssim[i] += c[i]; } + for (i = 0; i < s->nb_components; i++) { int cidx = s->is_rgb ? s->rgba_map[i] : i; set_meta(metadata, "lavfi.ssim.", s->comps[i], c[cidx]); @@ -383,6 +446,7 @@ static int config_input_ref(AVFilterLink *inlink) SSIMContext *s = ctx->priv; int sum = 0, i; + s->nb_threads = ff_filter_get_nb_threads(ctx); s->nb_components = desc->nb_components; if (ctx->inputs[0]->w != ctx->inputs[1]->w || @@ -410,9 +474,15 @@ static int config_input_ref(AVFilterLink *inlink) for (i = 0; i < s->nb_components; i++) s->coefs[i] = (double) s->planeheight[i] * s->planewidth[i] / sum; - s->temp = av_mallocz_array(2 * SUM_LEN(inlink->w), (desc->comp[0].depth > 8) ? sizeof(int64_t[4]) : sizeof(int[4])); + s->temp = av_calloc(s->nb_threads, sizeof(*s->temp)); if (!s->temp) return AVERROR(ENOMEM); + + for (int t = 0; t < s->nb_threads; t++) { + s->temp[t] = av_mallocz_array(2 * SUM_LEN(inlink->w), (desc->comp[0].depth > 8) ? sizeof(int64_t[4]) : sizeof(int[4])); + if (!s->temp[t]) + return AVERROR(ENOMEM); + } s->max = (1 << desc->comp[0].depth) - 1; s->ssim_plane = desc->comp[0].depth > 8 ? ssim_plane_16bit : ssim_plane; @@ -421,6 +491,16 @@ static int config_input_ref(AVFilterLink *inlink) if (ARCH_X86) ff_ssim_init_x86(&s->dsp); + s->score = av_calloc(s->nb_threads, sizeof(*s->score)); + if (!s->score) + return AVERROR(ENOMEM); + + for (int t = 0; t < s->nb_threads && s->score; t++) { + s->score[t] = av_calloc(s->nb_components, sizeof(*s->score[0])); + if (!s->score[t]) + return AVERROR(ENOMEM); + } + return 0; } @@ -482,6 +562,12 @@ static av_cold void uninit(AVFilterContext *ctx) if (s->stats_file && s->stats_file != stdout) fclose(s->stats_file); + for (int t = 0; t < s->nb_threads && s->score; t++) + av_freep(&s->score[t]); + av_freep(&s->score); + + for (int t = 0; t < s->nb_threads && s->temp; t++) + av_freep(&s->temp[t]); av_freep(&s->temp); } @@ -518,5 +604,5 @@ AVFilter ff_vf_ssim = { .priv_class = &ssim_class, .inputs = ssim_inputs, .outputs = ssim_outputs, - .flags = AVFILTER_FLAG_SUPPORT_TIMELINE_INTERNAL, + .flags = AVFILTER_FLAG_SUPPORT_TIMELINE_INTERNAL | AVFILTER_FLAG_SLICE_THREADS, };