From patchwork Fri Jan 31 09:04:54 2020 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Paul B Mahol X-Patchwork-Id: 17628 Return-Path: X-Original-To: patchwork@ffaux-bg.ffmpeg.org Delivered-To: patchwork@ffaux-bg.ffmpeg.org Received: from ffbox0-bg.mplayerhq.hu (ffbox0-bg.ffmpeg.org [79.124.17.100]) by ffaux.localdomain (Postfix) with ESMTP id 22994449086 for ; Fri, 31 Jan 2020 11:05:13 +0200 (EET) Received: from [127.0.1.1] (localhost [127.0.0.1]) by ffbox0-bg.mplayerhq.hu (Postfix) with ESMTP id 0206068AA5F; Fri, 31 Jan 2020 11:05:13 +0200 (EET) X-Original-To: ffmpeg-devel@ffmpeg.org Delivered-To: ffmpeg-devel@ffmpeg.org Received: from mail-wr1-f65.google.com (mail-wr1-f65.google.com [209.85.221.65]) by ffbox0-bg.mplayerhq.hu (Postfix) with ESMTPS id A8F99689F22 for ; Fri, 31 Jan 2020 11:05:05 +0200 (EET) Received: by mail-wr1-f65.google.com with SMTP id a6so7619044wrx.12 for ; Fri, 31 Jan 2020 01:05:05 -0800 (PST) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=gmail.com; s=20161025; h=from:to:subject:date:message-id; bh=M4UjDsQMxZvhWo9lON7gD1b/OXbETCY2im8cl95Raz0=; b=e6i1pQgfz3BNv9F3jhFq+O+xft7ytKJFPmZMQFJY7GrvsYSdId8m6ZGrvbPsg0JBhm Oa0nRDa62Fmb4oX/TayW/4p8gfTCi0GkQcR6M48PnvjfKO6PbfBg2lGGGc0ZYIt0LixH uYRAaQ0/cL0EL2E5fvl/aVXP/1jNxZCK+01YVHXAbHZAwK8j+gx3w2q5vMGyH+JQzYEa HUJ/n9FFTxjmOAYiIt3/o4ABu7sKuj6t7jD93edx/T45g4h/oUqwZ8kR+ssrQX8uOnNr wtXvyveyS5xG2GIlOBPsh8Qoo+jPlSPHYwtGAnnzbeOHYDpJsvxy7c9D6RGa6290THtb OZSg== X-Google-DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=1e100.net; s=20161025; h=x-gm-message-state:from:to:subject:date:message-id; bh=M4UjDsQMxZvhWo9lON7gD1b/OXbETCY2im8cl95Raz0=; b=coSugNfbbhkIcGONfq6K91OXcWaYsO+RkYwDSMnr8eIe6yP8ydZvzSTamrmVeQ16Yf RcL4qx3IxFbxwwnXp9Ng1er+q1W0xxP0odjB5SLWJKz4mokFui+Z5E4IdCBI+mCIoAwz aHok0yUFhIqbqGuGOXUZGZA6GYCu6o7Zd5LRDF4QgK8ocABHoMXSICEHiGGpONQiYbuA 0AydTw9f4RlI+1ghQI3qzh1M8UlBKByC1CA8YUpcNxKHDprksOhG0W8B7t/CJSgMTNkV rulzh5+cMinBJ04Rq6NAonuKljb9pK1ESO89PDl5zXvjkW9KTGqW9UgpWSxdi8qt8I4S z4eQ== X-Gm-Message-State: APjAAAW+NDiIGipLECp4MccKkFxKow0FvDhd6uCH8pQfKntiOKnVai2b MIF/oV/bIOtbFH7zMyFw0H7UPmnAg2k= X-Google-Smtp-Source: APXvYqwCpqZnyRD04g271SUtSom2DUNEeS7F0/Rh/VkfRcAUqyU7UlALDi9pNWkUYmBzZrU3rhBg6w== X-Received: by 2002:a5d:4fd0:: with SMTP id h16mr11327098wrw.255.1580461504389; Fri, 31 Jan 2020 01:05:04 -0800 (PST) Received: from localhost.localdomain ([109.227.58.1]) by smtp.gmail.com with ESMTPSA id x10sm11291216wrp.58.2020.01.31.01.05.02 for (version=TLS1_3 cipher=TLS_AES_256_GCM_SHA384 bits=256/256); Fri, 31 Jan 2020 01:05:03 -0800 (PST) From: Paul B Mahol To: ffmpeg-devel@ffmpeg.org Date: Fri, 31 Jan 2020 10:04:54 +0100 Message-Id: <20200131090454.17838-1-onemda@gmail.com> X-Mailer: git-send-email 2.17.1 Subject: [FFmpeg-devel] [PATCH] avfilter/vf_ssim: improve precision X-BeenThere: ffmpeg-devel@ffmpeg.org X-Mailman-Version: 2.1.20 Precedence: list List-Id: FFmpeg development discussions and patches List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , Reply-To: FFmpeg development discussions and patches MIME-Version: 1.0 Errors-To: ffmpeg-devel-bounces@ffmpeg.org Sender: "ffmpeg-devel" Use doubles for accumulating floats. Signed-off-by: Paul B Mahol --- libavfilter/ssim.h | 2 +- libavfilter/vf_ssim.c | 18 +++++------ libavfilter/x86/vf_ssim.asm | 36 ++++++++++++++-------- libavfilter/x86/vf_ssim_init.c | 2 +- tests/ref/fate/filter-refcmp-ssim-rgb | 44 +++++++++++++-------------- tests/ref/fate/filter-refcmp-ssim-yuv | 26 ++++++++-------- 6 files changed, 70 insertions(+), 58 deletions(-) diff --git a/libavfilter/ssim.h b/libavfilter/ssim.h index ac0395a22a..a6a41aabe6 100644 --- a/libavfilter/ssim.h +++ b/libavfilter/ssim.h @@ -28,7 +28,7 @@ typedef struct SSIMDSPContext { void (*ssim_4x4_line)(const uint8_t *buf, ptrdiff_t buf_stride, const uint8_t *ref, ptrdiff_t ref_stride, int (*sums)[4], int w); - float (*ssim_end_line)(const int (*sum0)[4], const int (*sum1)[4], int w); + double (*ssim_end_line)(const int (*sum0)[4], const int (*sum1)[4], int w); } SSIMDSPContext; void ff_ssim_init_x86(SSIMDSPContext *dsp); diff --git a/libavfilter/vf_ssim.c b/libavfilter/vf_ssim.c index c08fbcdcc2..17dce8e8e8 100644 --- a/libavfilter/vf_ssim.c +++ b/libavfilter/vf_ssim.c @@ -55,13 +55,13 @@ typedef struct SSIMContext { uint64_t nb_frames; double ssim[4], ssim_total; char comps[4]; - float coefs[4]; + double coefs[4]; uint8_t rgba_map[4]; int planewidth[4]; int planeheight[4]; int *temp; int is_rgb; - float (*ssim_plane)(SSIMDSPContext *dsp, + double (*ssim_plane)(SSIMDSPContext *dsp, uint8_t *main, int main_stride, uint8_t *ref, int ref_stride, int width, int height, void *temp, @@ -206,9 +206,9 @@ static float ssim_endn_16bit(const int64_t (*sum0)[4], const int64_t (*sum1)[4], return ssim; } -static float ssim_endn_8bit(const int (*sum0)[4], const int (*sum1)[4], int width) +static double ssim_endn_8bit(const int (*sum0)[4], const int (*sum1)[4], int width) { - float ssim = 0.0; + double ssim = 0.0; int i; for (i = 0; i < width; i++) @@ -221,14 +221,14 @@ static float ssim_endn_8bit(const int (*sum0)[4], const int (*sum1)[4], int widt #define SUM_LEN(w) (((w) >> 2) + 3) -static float ssim_plane_16bit(SSIMDSPContext *dsp, +static double ssim_plane_16bit(SSIMDSPContext *dsp, uint8_t *main, int main_stride, uint8_t *ref, int ref_stride, int width, int height, void *temp, int max) { int z = 0, y; - float ssim = 0.0; + double ssim = 0.0; int64_t (*sum0)[4] = temp; int64_t (*sum1)[4] = sum0 + SUM_LEN(width); @@ -249,14 +249,14 @@ static float ssim_plane_16bit(SSIMDSPContext *dsp, return ssim / ((height - 1) * (width - 1)); } -static float ssim_plane(SSIMDSPContext *dsp, +static double ssim_plane(SSIMDSPContext *dsp, uint8_t *main, int main_stride, uint8_t *ref, int ref_stride, int width, int height, void *temp, int max) { int z = 0, y; - float ssim = 0.0; + double ssim = 0.0; int (*sum0)[4] = temp; int (*sum1)[4] = sum0 + SUM_LEN(width); @@ -288,7 +288,7 @@ static int do_ssim(FFFrameSync *fs) SSIMContext *s = ctx->priv; AVFrame *master, *ref; AVDictionary **metadata; - float c[4], ssimv = 0.0; + double c[4], ssimv = 0.0; int ret, i; ret = ff_framesync_dualinput_get(fs, &master, &ref); diff --git a/libavfilter/x86/vf_ssim.asm b/libavfilter/x86/vf_ssim.asm index 3293e66701..4cd6293b59 100644 --- a/libavfilter/x86/vf_ssim.asm +++ b/libavfilter/x86/vf_ssim.asm @@ -169,8 +169,9 @@ SSIM_4X4_LINE 8 %endif INIT_XMM sse4 -cglobal ssim_end_line, 3, 3, 6, sum0, sum1, w +cglobal ssim_end_line, 3, 3, 7, sum0, sum1, w pxor m0, m0 + pxor m6, m6 .loop: mova m1, [sum0q+mmsize*0] mova m2, [sum0q+mmsize*1] @@ -214,34 +215,45 @@ cglobal ssim_end_line, 3, 3, 6, sum0, sum1, w mulps m4, m5 mulps m3, m1 divps m4, m3 ; ssim_endl - addps m0, m4 ; ssim + mova m5, m4 + cvtps2pd m3, m5 + movhlps m5, m5 + cvtps2pd m5, m5 + addpd m0, m3 ; ssim + addpd m6, m5 ; ssim add sum0q, mmsize*4 add sum1q, mmsize*4 sub wd, 4 jg .loop - ; subps the ones we added too much + ; subpd the ones we added too much test wd, wd jz .end add wd, 4 + test wd, 3 + jz .skip3 test wd, 2 jz .skip2 - psrldq m4, 8 -.skip2: test wd, 1 jz .skip1 - psrldq m4, 4 +.skip3: + psrldq m5, 8 + subpd m6, m5 + jmp .end +.skip2: + psrldq m3, 8 + subpd m0, m3 + jmp .end .skip1: - subps m0, m4 + psrldq m5, 8 + subpd m6, m5 .end: + addpd m0, m6 movhlps m4, m0 - addps m0, m4 - movss m4, m0 - shufps m0, m0, 1 - addss m0, m4 + addpd m0, m4 %if ARCH_X86_32 - movss r0m, m0 + movsd r0m, m0 fld r0mp %endif RET diff --git a/libavfilter/x86/vf_ssim_init.c b/libavfilter/x86/vf_ssim_init.c index 599c928403..cbaa20ef16 100644 --- a/libavfilter/x86/vf_ssim_init.c +++ b/libavfilter/x86/vf_ssim_init.c @@ -28,7 +28,7 @@ void ff_ssim_4x4_line_ssse3(const uint8_t *buf, ptrdiff_t buf_stride, void ff_ssim_4x4_line_xop (const uint8_t *buf, ptrdiff_t buf_stride, const uint8_t *ref, ptrdiff_t ref_stride, int (*sums)[4], int w); -float ff_ssim_end_line_sse4(const int (*sum0)[4], const int (*sum1)[4], int w); +double ff_ssim_end_line_sse4(const int (*sum0)[4], const int (*sum1)[4], int w); void ff_ssim_init_x86(SSIMDSPContext *dsp) { diff --git a/tests/ref/fate/filter-refcmp-ssim-rgb b/tests/ref/fate/filter-refcmp-ssim-rgb index 8c23c60b37..cb3b5f2e12 100644 --- a/tests/ref/fate/filter-refcmp-ssim-rgb +++ b/tests/ref/fate/filter-refcmp-ssim-rgb @@ -1,30 +1,30 @@ frame:0 pts:0 pts_time:0 +lavfi.ssim.R=0.73 +lavfi.ssim.G=0.77 +lavfi.ssim.B=0.90 +lavfi.ssim.All=0.80 +lavfi.ssim.dB=7.01 +frame:1 pts:1 pts_time:1 lavfi.ssim.R=0.72 lavfi.ssim.G=0.76 -lavfi.ssim.B=0.89 -lavfi.ssim.All=0.79 -lavfi.ssim.dB=6.74 -frame:1 pts:1 pts_time:1 -lavfi.ssim.R=0.70 -lavfi.ssim.G=0.74 -lavfi.ssim.B=0.85 -lavfi.ssim.All=0.77 -lavfi.ssim.dB=6.31 +lavfi.ssim.B=0.86 +lavfi.ssim.All=0.78 +lavfi.ssim.dB=6.56 frame:2 pts:2 pts_time:2 -lavfi.ssim.R=0.71 +lavfi.ssim.R=0.72 +lavfi.ssim.G=0.76 +lavfi.ssim.B=0.85 +lavfi.ssim.All=0.78 +lavfi.ssim.dB=6.53 +frame:3 pts:3 pts_time:3 +lavfi.ssim.R=0.72 lavfi.ssim.G=0.75 lavfi.ssim.B=0.84 +lavfi.ssim.All=0.77 +lavfi.ssim.dB=6.35 +frame:4 pts:4 pts_time:4 +lavfi.ssim.R=0.72 +lavfi.ssim.G=0.75 +lavfi.ssim.B=0.82 lavfi.ssim.All=0.76 lavfi.ssim.dB=6.29 -frame:3 pts:3 pts_time:3 -lavfi.ssim.R=0.70 -lavfi.ssim.G=0.73 -lavfi.ssim.B=0.83 -lavfi.ssim.All=0.76 -lavfi.ssim.dB=6.11 -frame:4 pts:4 pts_time:4 -lavfi.ssim.R=0.71 -lavfi.ssim.G=0.74 -lavfi.ssim.B=0.80 -lavfi.ssim.All=0.75 -lavfi.ssim.dB=6.05 diff --git a/tests/ref/fate/filter-refcmp-ssim-yuv b/tests/ref/fate/filter-refcmp-ssim-yuv index 5c8ffb9483..209c8bd600 100644 --- a/tests/ref/fate/filter-refcmp-ssim-yuv +++ b/tests/ref/fate/filter-refcmp-ssim-yuv @@ -1,30 +1,30 @@ frame:0 pts:0 pts_time:0 -lavfi.ssim.Y=0.80 +lavfi.ssim.Y=0.82 lavfi.ssim.U=0.76 lavfi.ssim.V=0.69 -lavfi.ssim.All=0.76 -lavfi.ssim.dB=6.25 +lavfi.ssim.All=0.77 +lavfi.ssim.dB=6.37 frame:1 pts:1 pts_time:1 -lavfi.ssim.Y=0.80 +lavfi.ssim.Y=0.81 lavfi.ssim.U=0.73 lavfi.ssim.V=0.68 -lavfi.ssim.All=0.75 -lavfi.ssim.dB=6.08 +lavfi.ssim.All=0.76 +lavfi.ssim.dB=6.20 frame:2 pts:2 pts_time:2 -lavfi.ssim.Y=0.80 +lavfi.ssim.Y=0.82 lavfi.ssim.U=0.73 lavfi.ssim.V=0.68 -lavfi.ssim.All=0.75 -lavfi.ssim.dB=6.10 +lavfi.ssim.All=0.76 +lavfi.ssim.dB=6.22 frame:3 pts:3 pts_time:3 -lavfi.ssim.Y=0.79 +lavfi.ssim.Y=0.81 lavfi.ssim.U=0.72 lavfi.ssim.V=0.68 lavfi.ssim.All=0.75 -lavfi.ssim.dB=5.94 +lavfi.ssim.dB=6.06 frame:4 pts:4 pts_time:4 -lavfi.ssim.Y=0.80 +lavfi.ssim.Y=0.81 lavfi.ssim.U=0.72 lavfi.ssim.V=0.68 lavfi.ssim.All=0.75 -lavfi.ssim.dB=5.97 +lavfi.ssim.dB=6.05