From patchwork Sat Apr 4 10:33:53 2020 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: =?utf-8?q?FR=C3=89D=C3=89RIC_RECOULES?= X-Patchwork-Id: 18635 Return-Path: X-Original-To: patchwork@ffaux-bg.ffmpeg.org Delivered-To: patchwork@ffaux-bg.ffmpeg.org Received: from ffbox0-bg.mplayerhq.hu (ffbox0-bg.ffmpeg.org [79.124.17.100]) by ffaux.localdomain (Postfix) with ESMTP id C5BF0448C59 for ; Sat, 4 Apr 2020 13:33:59 +0300 (EEST) Received: from [127.0.1.1] (localhost [127.0.0.1]) by ffbox0-bg.mplayerhq.hu (Postfix) with ESMTP id 9D4C768B20F; Sat, 4 Apr 2020 13:33:59 +0300 (EEST) X-Original-To: ffmpeg-devel@ffmpeg.org Delivered-To: ffmpeg-devel@ffmpeg.org Received: from zm-mta-out-3.u-ga.fr (zm-mta-out-3.u-ga.fr [152.77.200.56]) by ffbox0-bg.mplayerhq.hu (Postfix) with ESMTPS id 9F0D8689C58 for ; Sat, 4 Apr 2020 13:33:53 +0300 (EEST) Received: from zm-mta-out.u-ga.fr (zm-mta-out.u-ga.fr [152.77.200.53]) by zm-mta-out-3.u-ga.fr (Postfix) with ESMTP id 2A7D540FE4; Sat, 4 Apr 2020 12:33:53 +0200 (CEST) Received: from zm-mbx06.u-ga.fr (zm-mbx06.u-ga.fr [152.77.200.20]) by zm-mta-out.u-ga.fr (Postfix) with ESMTP id 24F66807FA; Sat, 4 Apr 2020 12:33:53 +0200 (CEST) Date: Sat, 4 Apr 2020 12:33:53 +0200 (CEST) From: =?utf-8?b?RlLDiUTDiVJJQw==?= RECOULES To: ffmpeg-devel Message-ID: <1941150656.3741620.1585996433124.JavaMail.zimbra@univ-grenoble-alpes.fr> MIME-Version: 1.0 X-Originating-IP: [46.193.2.18] X-Mailer: Zimbra 8.8.15_GA_3918 (ZimbraWebClient - FF72 (Linux)/8.8.15_GA_3895) Thread-Index: LCPPifz1nU0ZDPUQdqBM/DS0+RLOgQ== Thread-Topic: x86 inline assembly compliance X-Content-Filtered-By: Mailman/MimeDel 2.1.20 Subject: [FFmpeg-devel] [PATCH 2/5] x86 inline assembly compliance X-BeenThere: ffmpeg-devel@ffmpeg.org X-Mailman-Version: 2.1.20 Precedence: list List-Id: FFmpeg development discussions and patches List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , Reply-To: FFmpeg development discussions and patches Cc: Richard Bonichon , =?utf-8?q?S=C3=A9bastien?= Bardin Errors-To: ffmpeg-devel-bounces@ffmpeg.org Sender: "ffmpeg-devel" [inline assembly] merges contiguous assembly statements --- libavcodec/x86/hpeldsp_init.c | 8 ++++++++ libavcodec/x86/rnd_template.c | 14 +++++++------- 2 files changed, 15 insertions(+), 7 deletions(-) diff --git a/libavcodec/x86/hpeldsp_init.c b/libavcodec/x86/hpeldsp_init.c index d89928cec6..c99513035b 100644 --- a/libavcodec/x86/hpeldsp_init.c +++ b/libavcodec/x86/hpeldsp_init.c @@ -95,6 +95,8 @@ void ff_avg_approx_pixels8_xy2_3dnow(uint8_t *block, const uint8_t *pixels, /* MMX no rounding */ #define DEF(x, y) x ## _no_rnd_ ## y ## _mmx #define SET_RND MOVQ_WONE +#define SET_RND_TPL MOVQ_WONE_TPL +#define COMMA_SET_RND_IN #define PAVGBP(a, b, c, d, e, f) PAVGBP_MMX_NO_RND(a, b, c, d, e, f) #define PAVGB(a, b, c, e) PAVGB_MMX_NO_RND(a, b, c, e) #define STATIC static @@ -104,6 +106,8 @@ void ff_avg_approx_pixels8_xy2_3dnow(uint8_t *block, const uint8_t *pixels, #undef DEF #undef SET_RND +#undef SET_RND_TPL +#undef COMMA_SET_RND_IN #undef PAVGBP #undef PAVGB #undef STATIC @@ -121,6 +125,8 @@ CALL_2X_PIXELS(put_no_rnd_pixels16_xy2_mmx, put_no_rnd_pixels8_xy2_mmx, 8) #define DEF(x, y) x ## _ ## y ## _mmx #define SET_RND MOVQ_WTWO +#define SET_RND_TPL MOVQ_WTWO_TPL +#define COMMA_SET_RND_IN COMMA_MOVQ_WTWO_IN #define PAVGBP(a, b, c, d, e, f) PAVGBP_MMX(a, b, c, d, e, f) #define PAVGB(a, b, c, e) PAVGB_MMX(a, b, c, e) @@ -134,6 +140,8 @@ CALL_2X_PIXELS(put_no_rnd_pixels16_xy2_mmx, put_no_rnd_pixels8_xy2_mmx, 8) #undef DEF #undef SET_RND +#undef SET_RND_TPL +#undef COMMA_SET_RND_IN #undef PAVGBP #undef PAVGB diff --git a/libavcodec/x86/rnd_template.c b/libavcodec/x86/rnd_template.c index 09946bd23f..a98fbc10ab 100644 --- a/libavcodec/x86/rnd_template.c +++ b/libavcodec/x86/rnd_template.c @@ -33,9 +33,9 @@ av_unused STATIC void DEF(put, pixels8_xy2)(uint8_t *block, const uint8_t *pixels, ptrdiff_t line_size, int h) { - MOVQ_ZERO(mm7); - SET_RND(mm6); // =2 for rnd and =1 for no_rnd version __asm__ volatile( + MOVQ_ZERO_TPL(mm7) + SET_RND_TPL(mm6) // =2 for rnd and =1 for no_rnd version "movq (%1), %%mm0 \n\t" "movq 1(%1), %%mm4 \n\t" "movq %%mm0, %%mm1 \n\t" @@ -93,7 +93,7 @@ av_unused STATIC void DEF(put, pixels8_xy2)(uint8_t *block, const uint8_t *pixel "subl $2, %0 \n\t" "jnz 1b \n\t" :"+g"(h), "+S"(pixels) - :"D"(block), "r"((x86_reg)line_size) + :"D"(block), "r"((x86_reg)line_size) COMMA_SET_RND_IN :FF_REG_a, "memory"); } @@ -102,10 +102,10 @@ av_unused STATIC void DEF(put, pixels8_xy2)(uint8_t *block, const uint8_t *pixel av_unused STATIC void DEF(avg, pixels8_xy2)(uint8_t *block, const uint8_t *pixels, ptrdiff_t line_size, int h) { - MOVQ_ZERO(mm7); - SET_RND(mm6); // =2 for rnd and =1 for no_rnd version __asm__ volatile( - "movq (%1), %%mm0 \n\t" + MOVQ_ZERO_TPL(mm7) + SET_RND_TPL(mm6) // =2 for rnd and =1 for no_rnd version + "movq (%1), %%mm0 \n\t" "movq 1(%1), %%mm4 \n\t" "movq %%mm0, %%mm1 \n\t" "movq %%mm4, %%mm5 \n\t" @@ -170,6 +170,6 @@ av_unused STATIC void DEF(avg, pixels8_xy2)(uint8_t *block, const uint8_t *pixel "subl $2, %0 \n\t" "jnz 1b \n\t" :"+g"(h), "+S"(pixels) - :"D"(block), "r"((x86_reg)line_size) + :"D"(block), "r"((x86_reg)line_size) COMMA_SET_RND_IN :FF_REG_a, "memory"); }