From patchwork Thu Jan 30 21:02:45 2020 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Paul B Mahol X-Patchwork-Id: 17625 Return-Path: X-Original-To: patchwork@ffaux-bg.ffmpeg.org Delivered-To: patchwork@ffaux-bg.ffmpeg.org Received: from ffbox0-bg.mplayerhq.hu (ffbox0-bg.ffmpeg.org [79.124.17.100]) by ffaux.localdomain (Postfix) with ESMTP id E6B3344BD15 for ; Thu, 30 Jan 2020 23:03:03 +0200 (EET) Received: from [127.0.1.1] (localhost [127.0.0.1]) by ffbox0-bg.mplayerhq.hu (Postfix) with ESMTP id C540668AE26; Thu, 30 Jan 2020 23:03:03 +0200 (EET) X-Original-To: ffmpeg-devel@ffmpeg.org Delivered-To: ffmpeg-devel@ffmpeg.org Received: from mail-wr1-f67.google.com (mail-wr1-f67.google.com [209.85.221.67]) by ffbox0-bg.mplayerhq.hu (Postfix) with ESMTPS id 8D1BD68818B for ; Thu, 30 Jan 2020 23:02:57 +0200 (EET) Received: by mail-wr1-f67.google.com with SMTP id y11so5886914wrt.6 for ; Thu, 30 Jan 2020 13:02:57 -0800 (PST) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=gmail.com; s=20161025; h=from:to:subject:date:message-id; bh=Pi0JyJVs2WnRVJOWUaa7buWv7JSNSWM6sHY271LfhFQ=; b=IsYTNR6A5NveaKrXzKy6BAkocjvbN+3zvnqau5TnXWui+cR8UK+qCFEsymQpjyjVOa niOmIR6vxbV7Vfm6vlqXX+/u2jK7dsQcS2Pvvl0K9kDCq6F9f5RdxBPP8eHQe0/5WF2k iz1Iutfuhgd2R3FOxSkmuQjRfuy/+vZXVOLR7jZHHvrVRqea3W5yJswkJQKh/nPfQ6Pn Z0pnu15Jj3g5Yfw1trIxHt0vFI60XuHiz4an/PDbeg1Nk7uxDF8iJWJuxTkFGPuDViR3 D/BdWeYNL/4Q4aOLAxKYPyRGuBncah+FefQ9S8y0nKtVAMgO8sNjltmmxmBTLLo4PNeP gnUw== X-Google-DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=1e100.net; s=20161025; h=x-gm-message-state:from:to:subject:date:message-id; bh=Pi0JyJVs2WnRVJOWUaa7buWv7JSNSWM6sHY271LfhFQ=; b=j3yChSwMBp0YYAQa6jhPbzBGGCk7XX9Or1fPY+vvJxhccUr7ap4jmFHbLPRc+A7FjE VdRuSuCyhhdzLM0mzCpA5wjK2y12EX9rCpLTabuZaEUZKZuBpZweGgRVHL8luqrIGq6k iNBGSYZvG4Ftnr5Urma1uvpENStBqYAhRhzs3+ZdFFbj4YujLFsPe3WYAkYOCX4/KnlA rg/T3/BHAbxV+A0/7h9ZmqFdlPE9P5oGT45Zmmzmny4FMkXzCllRnECcqXQox1K5kS5N k+4mufpArOiM4SnMbeHdyDQHt0BFN7iEnuY6svi0z6pes6Fo4Mrq1gQvQ1kAY8W7T6pM 36vQ== X-Gm-Message-State: APjAAAWBBEY5AG/Ym2LLQCbd8si7uehLkDzJeH/e6vPM51TXt5z8CCK4 Lrf/9aRGRBEx2xTF7OM/T/YZzKUY5oI= X-Google-Smtp-Source: APXvYqxeMUJdzLDI2gIMg8i6gDFn7SUP6pDCZrVudf7xOsc1Ua1Pz2/yeuiT56SSxTCZ3FTkckNxpA== X-Received: by 2002:a5d:5267:: with SMTP id l7mr7475477wrc.84.1580418176504; Thu, 30 Jan 2020 13:02:56 -0800 (PST) Received: from localhost.localdomain ([109.227.58.1]) by smtp.gmail.com with ESMTPSA id z3sm8991876wrs.94.2020.01.30.13.02.54 for (version=TLS1_3 cipher=TLS_AES_256_GCM_SHA384 bits=256/256); Thu, 30 Jan 2020 13:02:55 -0800 (PST) From: Paul B Mahol To: ffmpeg-devel@ffmpeg.org Date: Thu, 30 Jan 2020 22:02:45 +0100 Message-Id: <20200130210245.23676-1-onemda@gmail.com> X-Mailer: git-send-email 2.17.1 Subject: [FFmpeg-devel] [PATCH] avfilter/vf_ssim: improve precision X-BeenThere: ffmpeg-devel@ffmpeg.org X-Mailman-Version: 2.1.20 Precedence: list List-Id: FFmpeg development discussions and patches List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , Reply-To: FFmpeg development discussions and patches MIME-Version: 1.0 Errors-To: ffmpeg-devel-bounces@ffmpeg.org Sender: "ffmpeg-devel" Use doubles for accumulating floats. Signed-off-by: Paul B Mahol --- libavfilter/ssim.h | 2 +- libavfilter/vf_ssim.c | 18 ++++++++--------- libavfilter/x86/vf_ssim.asm | 36 ++++++++++++++++++++++------------ libavfilter/x86/vf_ssim_init.c | 2 +- 4 files changed, 35 insertions(+), 23 deletions(-) diff --git a/libavfilter/ssim.h b/libavfilter/ssim.h index ac0395a22a..a6a41aabe6 100644 --- a/libavfilter/ssim.h +++ b/libavfilter/ssim.h @@ -28,7 +28,7 @@ typedef struct SSIMDSPContext { void (*ssim_4x4_line)(const uint8_t *buf, ptrdiff_t buf_stride, const uint8_t *ref, ptrdiff_t ref_stride, int (*sums)[4], int w); - float (*ssim_end_line)(const int (*sum0)[4], const int (*sum1)[4], int w); + double (*ssim_end_line)(const int (*sum0)[4], const int (*sum1)[4], int w); } SSIMDSPContext; void ff_ssim_init_x86(SSIMDSPContext *dsp); diff --git a/libavfilter/vf_ssim.c b/libavfilter/vf_ssim.c index c08fbcdcc2..17dce8e8e8 100644 --- a/libavfilter/vf_ssim.c +++ b/libavfilter/vf_ssim.c @@ -55,13 +55,13 @@ typedef struct SSIMContext { uint64_t nb_frames; double ssim[4], ssim_total; char comps[4]; - float coefs[4]; + double coefs[4]; uint8_t rgba_map[4]; int planewidth[4]; int planeheight[4]; int *temp; int is_rgb; - float (*ssim_plane)(SSIMDSPContext *dsp, + double (*ssim_plane)(SSIMDSPContext *dsp, uint8_t *main, int main_stride, uint8_t *ref, int ref_stride, int width, int height, void *temp, @@ -206,9 +206,9 @@ static float ssim_endn_16bit(const int64_t (*sum0)[4], const int64_t (*sum1)[4], return ssim; } -static float ssim_endn_8bit(const int (*sum0)[4], const int (*sum1)[4], int width) +static double ssim_endn_8bit(const int (*sum0)[4], const int (*sum1)[4], int width) { - float ssim = 0.0; + double ssim = 0.0; int i; for (i = 0; i < width; i++) @@ -221,14 +221,14 @@ static float ssim_endn_8bit(const int (*sum0)[4], const int (*sum1)[4], int widt #define SUM_LEN(w) (((w) >> 2) + 3) -static float ssim_plane_16bit(SSIMDSPContext *dsp, +static double ssim_plane_16bit(SSIMDSPContext *dsp, uint8_t *main, int main_stride, uint8_t *ref, int ref_stride, int width, int height, void *temp, int max) { int z = 0, y; - float ssim = 0.0; + double ssim = 0.0; int64_t (*sum0)[4] = temp; int64_t (*sum1)[4] = sum0 + SUM_LEN(width); @@ -249,14 +249,14 @@ static float ssim_plane_16bit(SSIMDSPContext *dsp, return ssim / ((height - 1) * (width - 1)); } -static float ssim_plane(SSIMDSPContext *dsp, +static double ssim_plane(SSIMDSPContext *dsp, uint8_t *main, int main_stride, uint8_t *ref, int ref_stride, int width, int height, void *temp, int max) { int z = 0, y; - float ssim = 0.0; + double ssim = 0.0; int (*sum0)[4] = temp; int (*sum1)[4] = sum0 + SUM_LEN(width); @@ -288,7 +288,7 @@ static int do_ssim(FFFrameSync *fs) SSIMContext *s = ctx->priv; AVFrame *master, *ref; AVDictionary **metadata; - float c[4], ssimv = 0.0; + double c[4], ssimv = 0.0; int ret, i; ret = ff_framesync_dualinput_get(fs, &master, &ref); diff --git a/libavfilter/x86/vf_ssim.asm b/libavfilter/x86/vf_ssim.asm index 3293e66701..4cd6293b59 100644 --- a/libavfilter/x86/vf_ssim.asm +++ b/libavfilter/x86/vf_ssim.asm @@ -169,8 +169,9 @@ SSIM_4X4_LINE 8 %endif INIT_XMM sse4 -cglobal ssim_end_line, 3, 3, 6, sum0, sum1, w +cglobal ssim_end_line, 3, 3, 7, sum0, sum1, w pxor m0, m0 + pxor m6, m6 .loop: mova m1, [sum0q+mmsize*0] mova m2, [sum0q+mmsize*1] @@ -214,34 +215,45 @@ cglobal ssim_end_line, 3, 3, 6, sum0, sum1, w mulps m4, m5 mulps m3, m1 divps m4, m3 ; ssim_endl - addps m0, m4 ; ssim + mova m5, m4 + cvtps2pd m3, m5 + movhlps m5, m5 + cvtps2pd m5, m5 + addpd m0, m3 ; ssim + addpd m6, m5 ; ssim add sum0q, mmsize*4 add sum1q, mmsize*4 sub wd, 4 jg .loop - ; subps the ones we added too much + ; subpd the ones we added too much test wd, wd jz .end add wd, 4 + test wd, 3 + jz .skip3 test wd, 2 jz .skip2 - psrldq m4, 8 -.skip2: test wd, 1 jz .skip1 - psrldq m4, 4 +.skip3: + psrldq m5, 8 + subpd m6, m5 + jmp .end +.skip2: + psrldq m3, 8 + subpd m0, m3 + jmp .end .skip1: - subps m0, m4 + psrldq m5, 8 + subpd m6, m5 .end: + addpd m0, m6 movhlps m4, m0 - addps m0, m4 - movss m4, m0 - shufps m0, m0, 1 - addss m0, m4 + addpd m0, m4 %if ARCH_X86_32 - movss r0m, m0 + movsd r0m, m0 fld r0mp %endif RET diff --git a/libavfilter/x86/vf_ssim_init.c b/libavfilter/x86/vf_ssim_init.c index 599c928403..cbaa20ef16 100644 --- a/libavfilter/x86/vf_ssim_init.c +++ b/libavfilter/x86/vf_ssim_init.c @@ -28,7 +28,7 @@ void ff_ssim_4x4_line_ssse3(const uint8_t *buf, ptrdiff_t buf_stride, void ff_ssim_4x4_line_xop (const uint8_t *buf, ptrdiff_t buf_stride, const uint8_t *ref, ptrdiff_t ref_stride, int (*sums)[4], int w); -float ff_ssim_end_line_sse4(const int (*sum0)[4], const int (*sum1)[4], int w); +double ff_ssim_end_line_sse4(const int (*sum0)[4], const int (*sum1)[4], int w); void ff_ssim_init_x86(SSIMDSPContext *dsp) {