From patchwork Mon Jul 22 18:44:20 2024 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: =?utf-8?q?R=C3=A9mi_Denis-Courmont?= X-Patchwork-Id: 50682 Delivered-To: ffmpegpatchwork2@gmail.com Received: by 2002:a59:a742:0:b0:482:c625:d099 with SMTP id f2csp2197929vqm; Mon, 22 Jul 2024 11:44:43 -0700 (PDT) X-Forwarded-Encrypted: i=2; AJvYcCURk0CPQMNm/fNJs0t7spfOLdSiC/QBb3sN02lN4B88nWBYNaxpVthBdz62gzRVz2oB0OD9FPXRAJnrVSt8sE11xtOjfpZ9aUlgGg== X-Google-Smtp-Source: AGHT+IEcoFJgp35jHm9/rseFLPsS7Vdyiua9nrFSr/NY81JcQNmHay65UltpgRAhBKUpUvMW8QC7 X-Received: by 2002:a17:907:944b:b0:a77:eb34:3b42 with SMTP id a640c23a62f3a-a7a4bfbb653mr600309966b.12.1721673882899; Mon, 22 Jul 2024 11:44:42 -0700 (PDT) ARC-Seal: i=1; a=rsa-sha256; t=1721673882; cv=none; d=google.com; s=arc-20160816; b=pcW8c3AWi4evyiXS0brQy97n40P4whYafbjeVSrPexNrMJnsFqfwKKfEr5LKB6Sn+d jexXspynhLgTMCOILzYVqqVcvLEWdlftpJkrXu2Kn1tiKC1PnIKlZGcCrh+SU9s6rG5C Rr51GtFrl48GQ+udgKcBJvjUv2N9b03oAJPCkr+w48DHV7PH0XQmvib8ObYDmjCVNkXR fCPgDa/zQm8Hc8ZMTqIlQHfcJiusoswhtVBDPAP0HHW3xwdJeizNVjKLh87nySi6BGl4 RICC0f4INHr9LgjiNBpT+L7qiRF4j6u1ba9Pkwk6E2xfwz2G3T2i4kJ7IgOKzEZuQPUC WjAQ== ARC-Message-Signature: i=1; a=rsa-sha256; c=relaxed/relaxed; d=google.com; s=arc-20160816; h=sender:errors-to:content-transfer-encoding:reply-to:list-subscribe :list-help:list-post:list-archive:list-unsubscribe:list-id :precedence:subject:mime-version:message-id:date:to:from :delivered-to; bh=A6wvj9XmpVBrRCUz5R0NIHLuq3TLbQFvfnD0MtWNFkY=; fh=YOA8vD9MJZuwZ71F/05pj6KdCjf6jQRmzLS+CATXUQk=; b=lU+zLiyDnpkNIQPblmpK4PVtMqdIB5ss2eDJGovDKA5FaZT4x0pgoj94xaBtUcWSal VyNWxlPWOThhplR4AWFfUSfEdm19jJmJxxXyC0Avn+1UiTbKaQ491KiACtRHH29laLxI xZ2ScnoSujBW8j8qICw1U6qQEN69LLse3NcahKSpuTlX5lWUI7ynUiMl4u8psvBEQeql rt8tACK1tvgffF12AsBidVL0CYYEAMl5SzzuVinyf4UdZCUgnB9ZCAAX/SSW28aASDE9 F7Kq6rTjX6cQnabhbT8ZJ5VbfLH9wXkWHKVnoD/B8FUW4jWgDdAAmhJyvl04oKwdMChv mcwA==; dara=google.com ARC-Authentication-Results: i=1; mx.google.com; spf=pass (google.com: domain of ffmpeg-devel-bounces@ffmpeg.org designates 79.124.17.100 as permitted sender) smtp.mailfrom=ffmpeg-devel-bounces@ffmpeg.org Return-Path: Received: from ffbox0-bg.mplayerhq.hu (ffbox0-bg.ffmpeg.org. [79.124.17.100]) by mx.google.com with ESMTP id a640c23a62f3a-a7a3c926bf6si442768666b.643.2024.07.22.11.44.42; Mon, 22 Jul 2024 11:44:42 -0700 (PDT) Received-SPF: pass (google.com: domain of ffmpeg-devel-bounces@ffmpeg.org designates 79.124.17.100 as permitted sender) client-ip=79.124.17.100; Authentication-Results: mx.google.com; spf=pass (google.com: domain of ffmpeg-devel-bounces@ffmpeg.org designates 79.124.17.100 as permitted sender) smtp.mailfrom=ffmpeg-devel-bounces@ffmpeg.org Received: from [127.0.1.1] (localhost [127.0.0.1]) by ffbox0-bg.mplayerhq.hu (Postfix) with ESMTP id C0D5268D58F; Mon, 22 Jul 2024 21:44:38 +0300 (EEST) X-Original-To: ffmpeg-devel@ffmpeg.org Delivered-To: ffmpeg-devel@ffmpeg.org Received: from ursule.remlab.net (vps-a2bccee9.vps.ovh.net [51.75.19.47]) by ffbox0-bg.mplayerhq.hu (Postfix) with ESMTP id 288AD68D07E for ; Mon, 22 Jul 2024 21:44:32 +0300 (EEST) Received: from basile.remlab.net (localhost [IPv6:::1]) by ursule.remlab.net (Postfix) with ESMTP id C1E5BC0069 for ; Mon, 22 Jul 2024 21:44:31 +0300 (EEST) From: =?utf-8?q?R=C3=A9mi_Denis-Courmont?= To: ffmpeg-devel@ffmpeg.org Date: Mon, 22 Jul 2024 21:44:20 +0300 Message-ID: <20240722184431.40853-1-remi@remlab.net> X-Mailer: git-send-email 2.45.2 MIME-Version: 1.0 Subject: [FFmpeg-devel] [PATCH 1/9] lavu/riscv: allow any number of extensions X-BeenThere: ffmpeg-devel@ffmpeg.org X-Mailman-Version: 2.1.29 Precedence: list List-Id: FFmpeg development discussions and patches List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , Reply-To: FFmpeg development discussions and patches Errors-To: ffmpeg-devel-bounces@ffmpeg.org Sender: "ffmpeg-devel" X-TUID: OMEgexD0xiQw This reworks the func/endfunc macros to support any number of ISA extension as parameters. --- libavutil/riscv/asm.S | 17 +++++++++-------- 1 file changed, 9 insertions(+), 8 deletions(-) diff --git a/libavutil/riscv/asm.S b/libavutil/riscv/asm.S index 2cf4f7b7ab..78e9defbd4 100644 --- a/libavutil/riscv/asm.S +++ b/libavutil/riscv/asm.S @@ -36,17 +36,18 @@ #define HWD #endif - .macro func sym, ext1=, ext2= + .macro archadd ext=, more:vararg + .ifnb \ext + .option arch, +\ext + archadd \more + .endif + .endm + + .macro func sym, exts:vararg .text .align 2 - .option push - .ifnb \ext1 - .option arch, +\ext1 - .ifnb \ext2 - .option arch, +\ext2 - .endif - .endif + archadd \exts .global \sym .hidden \sym From patchwork Mon Jul 22 18:44:21 2024 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: =?utf-8?q?R=C3=A9mi_Denis-Courmont?= X-Patchwork-Id: 50688 Delivered-To: ffmpegpatchwork2@gmail.com Received: by 2002:a59:a742:0:b0:482:c625:d099 with SMTP id f2csp2200882vqm; Mon, 22 Jul 2024 11:51:56 -0700 (PDT) X-Forwarded-Encrypted: i=2; AJvYcCW0ic5dQ9aeaPX5nquiH1f5HkFGYcfIXubXzfeysLLG5DBlILUNad1AzA5T8e4NMHwbRjLw6nTiOP70leI5x+6DPcVzaEBOX6tJ0g== X-Google-Smtp-Source: AGHT+IHm1m0xGpV5zCxEbBbhB2Xl6n+l7RebXE/o5rsXEZd/q38SD23moELsBLwtd4dzcC4m8ntt X-Received: by 2002:a05:6512:2216:b0:52e:9f17:8418 with SMTP id 2adb3069b0e04-52efb524068mr4751735e87.12.1721674316596; Mon, 22 Jul 2024 11:51:56 -0700 (PDT) ARC-Seal: i=1; a=rsa-sha256; t=1721674316; cv=none; d=google.com; s=arc-20160816; b=0VyiNAyyc13wYyKSWv4XP1HBxZzo01v4wmHOYxXqRpS3QXW4GB7eBVg/tC8bIVuu+v EfjPTYX8xSzZoEl3e7V1u/3p6XBQ9yJbvJxOLFucHWxMxO6Fu8CKJlwfnXND/HcD80oC 6L04DNnly8Lld0w7xVHe+4QamPsHwx8kfr9+GZgk8sAaSEuVSLFyvScRPGcNFQ+ASgFv rkFLqjWTeizglCr/fwGWcRiRHkLjZz1+5ThKqtU7qSpTHSgoiFVZtyQyCiAFRnQGwtiY rkkcDJRv8+6p7Bdw/mzC92SxK8F7ikZE/auNRa8aUku17rxvfb5f6AGQPSelUT/8pH2g QAgw== ARC-Message-Signature: i=1; a=rsa-sha256; c=relaxed/relaxed; d=google.com; s=arc-20160816; h=sender:errors-to:content-transfer-encoding:reply-to:list-subscribe :list-help:list-post:list-archive:list-unsubscribe:list-id :precedence:subject:mime-version:references:in-reply-to:message-id :date:to:from:delivered-to; bh=4P2Xqu+mbTnEapULrdDbaGRvJaTcsl/LhsIWOSlI6z0=; fh=YOA8vD9MJZuwZ71F/05pj6KdCjf6jQRmzLS+CATXUQk=; b=RySXCNRxcmshH60VgSjRN81g7CoZDdn8nMu7wEPZ9sCD7noVbj/Y9oCUhjcCIs2Grf 3tIhz8bQPMq03QH73FMIqJAXxlN/oSv9PRgJ+2Fz2xrUoH+5pvWsScqa3SMDVOnfPaL8 dUGywRwYo6BXufKDPY7HZl810qVk1mIIngNx9jn3LouCnfTsQzYNNyD2VEZb8tETq63m 9Uc5RimZTArwU8hDu0MxclIcBNE/zC3VRY+Ak4M0zQmMvkHKk6or1+THvPoDgDOZFi0g DNL07o1ixqg3sgYK3I1WhF4rbF8OWDYyfxzFPqyRGhF6B4BmnMIRLVfYEFTsE4yPEbZN DONQ==; dara=google.com ARC-Authentication-Results: i=1; mx.google.com; spf=pass (google.com: domain of ffmpeg-devel-bounces@ffmpeg.org designates 79.124.17.100 as permitted sender) smtp.mailfrom=ffmpeg-devel-bounces@ffmpeg.org Return-Path: Received: from ffbox0-bg.mplayerhq.hu (ffbox0-bg.ffmpeg.org. [79.124.17.100]) by mx.google.com with ESMTP id 2adb3069b0e04-52fc327633asi228631e87.56.2024.07.22.11.51.56; Mon, 22 Jul 2024 11:51:56 -0700 (PDT) Received-SPF: pass (google.com: domain of ffmpeg-devel-bounces@ffmpeg.org designates 79.124.17.100 as permitted sender) client-ip=79.124.17.100; Authentication-Results: mx.google.com; spf=pass (google.com: domain of ffmpeg-devel-bounces@ffmpeg.org designates 79.124.17.100 as permitted sender) smtp.mailfrom=ffmpeg-devel-bounces@ffmpeg.org Received: from [127.0.1.1] (localhost [127.0.0.1]) by ffbox0-bg.mplayerhq.hu (Postfix) with ESMTP id EEF2268D6C1; Mon, 22 Jul 2024 21:44:39 +0300 (EEST) X-Original-To: ffmpeg-devel@ffmpeg.org Delivered-To: ffmpeg-devel@ffmpeg.org Received: from ursule.remlab.net (vps-a2bccee9.vps.ovh.net [51.75.19.47]) by ffbox0-bg.mplayerhq.hu (Postfix) with ESMTP id 79BAE68D2D8 for ; Mon, 22 Jul 2024 21:44:32 +0300 (EEST) Received: from basile.remlab.net (localhost [IPv6:::1]) by ursule.remlab.net (Postfix) with ESMTP id E9E56C0186 for ; Mon, 22 Jul 2024 21:44:31 +0300 (EEST) From: =?utf-8?q?R=C3=A9mi_Denis-Courmont?= To: ffmpeg-devel@ffmpeg.org Date: Mon, 22 Jul 2024 21:44:21 +0300 Message-ID: <20240722184431.40853-2-remi@remlab.net> X-Mailer: git-send-email 2.45.2 In-Reply-To: <20240722184431.40853-1-remi@remlab.net> References: <20240722184431.40853-1-remi@remlab.net> MIME-Version: 1.0 Subject: [FFmpeg-devel] [PATCH 2/9] lavu/riscv: grok B as an extension X-BeenThere: ffmpeg-devel@ffmpeg.org X-Mailman-Version: 2.1.29 Precedence: list List-Id: FFmpeg development discussions and patches List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , Reply-To: FFmpeg development discussions and patches Errors-To: ffmpeg-devel-bounces@ffmpeg.org Sender: "ffmpeg-devel" X-TUID: gZoi+HN7Jtc2 The RISC-V B bit manipulation extension was ratified only two months ago. But it is strictly equivalent to the union of the zba, zbb and zbs extensions which were defined almost 3 years earlier. Rather than require new assembler, we can just match the extension name manually and translate it into its constituent parts. --- libavutil/riscv/asm.S | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/libavutil/riscv/asm.S b/libavutil/riscv/asm.S index 78e9defbd4..0c29680d84 100644 --- a/libavutil/riscv/asm.S +++ b/libavutil/riscv/asm.S @@ -38,7 +38,12 @@ .macro archadd ext=, more:vararg .ifnb \ext - .option arch, +\ext + .ifc \ext, b + # B was defined later, is known to fewer assemblers. + archadd zba, zbb, zbs + .else + .option arch, +\ext + .endif archadd \more .endif .endm From patchwork Mon Jul 22 18:44:22 2024 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 8bit X-Patchwork-Submitter: =?utf-8?q?R=C3=A9mi_Denis-Courmont?= X-Patchwork-Id: 50691 Delivered-To: ffmpegpatchwork2@gmail.com Received: by 2002:a59:a742:0:b0:482:c625:d099 with SMTP id f2csp2205621vqm; Mon, 22 Jul 2024 12:01:53 -0700 (PDT) X-Forwarded-Encrypted: i=2; AJvYcCWoKArQlUjOXufxi1hzycuPB00u6cr2+kl+HxUsPU/aRunJ688inCKGSMQkbls931GiV/kzK+YI0mVwE8mfCglf2S8tKz25eXqN2w== X-Google-Smtp-Source: AGHT+IG5QYVGbVyJ9I5MZYiyN69ZDcv1b51csFMp3kr5cyMPd6376N2yILRo5PDoUmN4VoJ49AAG X-Received: by 2002:a05:6512:131d:b0:52f:c2fa:b213 with SMTP id 2adb3069b0e04-52fc4075b0dmr608320e87.55.1721674913607; Mon, 22 Jul 2024 12:01:53 -0700 (PDT) ARC-Seal: i=1; a=rsa-sha256; t=1721674913; cv=none; d=google.com; s=arc-20160816; b=AKg/OE6HbJc+pV5I7LN01Opl98IvnLl6hm0o69p6ERXw+wL5sKeJQUhnayqY9x2tIL +RTNhBvZBtF+LUgA6Uya6yhVjTmvkrscx6Fvso6vrd1+g8vDAeLnm7cPjJkocn2FaVmr 2ja/q4eyVLX10/GZ1VVU+ihoKsTV1PyKruPuS4pAb6c/XSYHEW2I3pkkSaAIIENkjsV4 JtQaSrZfpBlcjPOcI42i1efcHIjJmm530YsuCYRDNY/oFyVMZq9m93Rr2LY9K7TLr/8h xT6IR6mi7UVzKgIWUg4WSlXtZXfKmMvt597srM/qIqElGIW6G/ZqIWkKec3zgP6Kg7b9 99iQ== ARC-Message-Signature: i=1; a=rsa-sha256; c=relaxed/relaxed; d=google.com; s=arc-20160816; h=sender:errors-to:content-transfer-encoding:reply-to:list-subscribe :list-help:list-post:list-archive:list-unsubscribe:list-id :precedence:subject:mime-version:references:in-reply-to:message-id :date:to:from:delivered-to; bh=khUsmbrQ8RiTc3mYSFcpWLYb+Y+qMcoh+VUF94p9iNc=; fh=YOA8vD9MJZuwZ71F/05pj6KdCjf6jQRmzLS+CATXUQk=; b=tBjeqC2T7ngT3ibBnkYHPAOvo47XKVVpLiTvwoRltlQXqykX6ibIyzMc2bXQaIecOk /pfVtkSk/6LUyRSX0I3oGHcn04fGUBKZ09zvvx2fw3O2ClN8doKj8znPO/muur/5xPz2 3c1AlGiQwIdBcFU4qQrRMnhmthUsQWEG+T9vmh3q+1TSpzFD9F+JzU3ofgZctFtbMMSP y2bYqK8e9KrHPB8ocyDvl7PNtnCRjw0Hxx0kQAIFLQMgMZUYWoG14gOCmPzIp2c42Blo 0jpoaFEp/iFaN2l4DpmuwVLviqUMNTRp3rpGIB006tZiKCwF60JPdJmbaL9RCcGywOhY fD/A==; dara=google.com ARC-Authentication-Results: i=1; mx.google.com; spf=pass (google.com: domain of ffmpeg-devel-bounces@ffmpeg.org designates 79.124.17.100 as permitted sender) smtp.mailfrom=ffmpeg-devel-bounces@ffmpeg.org Return-Path: Received: from ffbox0-bg.mplayerhq.hu (ffbox0-bg.ffmpeg.org. [79.124.17.100]) by mx.google.com with ESMTP id 2adb3069b0e04-52ef54f80edsi2176052e87.32.2024.07.22.12.01.53; Mon, 22 Jul 2024 12:01:53 -0700 (PDT) Received-SPF: pass (google.com: domain of ffmpeg-devel-bounces@ffmpeg.org designates 79.124.17.100 as permitted sender) client-ip=79.124.17.100; Authentication-Results: mx.google.com; spf=pass (google.com: domain of ffmpeg-devel-bounces@ffmpeg.org designates 79.124.17.100 as permitted sender) smtp.mailfrom=ffmpeg-devel-bounces@ffmpeg.org Received: from [127.0.1.1] (localhost [127.0.0.1]) by ffbox0-bg.mplayerhq.hu (Postfix) with ESMTP id EC74E68D6CC; Mon, 22 Jul 2024 21:44:40 +0300 (EEST) X-Original-To: ffmpeg-devel@ffmpeg.org Delivered-To: ffmpeg-devel@ffmpeg.org Received: from ursule.remlab.net (vps-a2bccee9.vps.ovh.net [51.75.19.47]) by ffbox0-bg.mplayerhq.hu (Postfix) with ESMTP id 84A0068D3C9 for ; Mon, 22 Jul 2024 21:44:32 +0300 (EEST) Received: from basile.remlab.net (localhost [IPv6:::1]) by ursule.remlab.net (Postfix) with ESMTP id 267EAC01F0 for ; Mon, 22 Jul 2024 21:44:32 +0300 (EEST) From: =?utf-8?q?R=C3=A9mi_Denis-Courmont?= To: ffmpeg-devel@ffmpeg.org Date: Mon, 22 Jul 2024 21:44:22 +0300 Message-ID: <20240722184431.40853-3-remi@remlab.net> X-Mailer: git-send-email 2.45.2 In-Reply-To: <20240722184431.40853-1-remi@remlab.net> References: <20240722184431.40853-1-remi@remlab.net> MIME-Version: 1.0 Subject: [FFmpeg-devel] [PATCH 3/9] lavc/riscv: require B or zba explicitly X-BeenThere: ffmpeg-devel@ffmpeg.org X-Mailman-Version: 2.1.29 Precedence: list List-Id: FFmpeg development discussions and patches List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , Reply-To: FFmpeg development discussions and patches Errors-To: ffmpeg-devel-bounces@ffmpeg.org Sender: "ffmpeg-devel" X-TUID: RsImKkEUtkAa --- libavcodec/riscv/aacencdsp_rvv.S | 4 +-- libavcodec/riscv/aacpsdsp_rvv.S | 10 +++---- libavcodec/riscv/ac3dsp_rvv.S | 6 ++-- libavcodec/riscv/ac3dsp_rvvb.S | 2 +- libavcodec/riscv/alacdsp_rvv.S | 6 ++-- libavcodec/riscv/audiodsp_rvv.S | 6 ++-- libavcodec/riscv/bswapdsp_rvb.S | 2 +- libavcodec/riscv/bswapdsp_rvv.S | 2 +- libavcodec/riscv/exrdsp_rvv.S | 2 +- libavcodec/riscv/fixed_vtype.S | 48 ++++++++++++++++++++++++++++++ libavcodec/riscv/flacdsp_rvv.S | 42 +++++++++++++------------- libavcodec/riscv/fmtconvert_rvv.S | 4 +-- libavcodec/riscv/h264_mc_chroma.S | 4 +-- libavcodec/riscv/h264idct_rvv.S | 2 +- libavcodec/riscv/huffyuvdsp_rvv.S | 4 +-- libavcodec/riscv/jpeg2000dsp_rvv.S | 4 +-- libavcodec/riscv/llauddsp_rvv.S | 4 +-- libavcodec/riscv/lpc_rvv.S | 4 +-- libavcodec/riscv/opusdsp_rvv.S | 2 +- libavcodec/riscv/rv40dsp_rvv.S | 4 +-- libavcodec/riscv/sbrdsp_rvv.S | 16 +++++----- libavcodec/riscv/svqenc_rvv.S | 2 +- libavcodec/riscv/takdsp_rvv.S | 8 ++--- libavcodec/riscv/utvideodsp_rvv.S | 4 +-- libavcodec/riscv/vc1dsp_rvv.S | 6 ++-- libavcodec/riscv/vorbisdsp_rvv.S | 2 +- libavcodec/riscv/vp7dsp_rvv.S | 2 +- libavcodec/riscv/vp8dsp_rvv.S | 4 +-- libavcodec/riscv/vp9_intra_rvi.S | 6 ++-- 29 files changed, 129 insertions(+), 83 deletions(-) create mode 100644 libavcodec/riscv/fixed_vtype.S diff --git a/libavcodec/riscv/aacencdsp_rvv.S b/libavcodec/riscv/aacencdsp_rvv.S index 21e66a77ae..05a603b6f6 100644 --- a/libavcodec/riscv/aacencdsp_rvv.S +++ b/libavcodec/riscv/aacencdsp_rvv.S @@ -21,7 +21,7 @@ #include "libavutil/riscv/asm.S" -func ff_abs_pow34_rvv, zve32f +func ff_abs_pow34_rvv, zve32f, zba 1: vsetvli t0, a2, e32, m8, ta, ma sub a2, a2, t0 @@ -38,7 +38,7 @@ func ff_abs_pow34_rvv, zve32f ret endfunc -func ff_aac_quant_bands_rvv, zve32f +func ff_aac_quant_bands_rvv, zve32f, zba NOHWF fmv.w.x fa0, a6 NOHWF fmv.w.x fa1, a7 fcvt.s.w ft0, a5 diff --git a/libavcodec/riscv/aacpsdsp_rvv.S b/libavcodec/riscv/aacpsdsp_rvv.S index 2d6858688a..72e2103c22 100644 --- a/libavcodec/riscv/aacpsdsp_rvv.S +++ b/libavcodec/riscv/aacpsdsp_rvv.S @@ -20,7 +20,7 @@ #include "libavutil/riscv/asm.S" -func ff_ps_add_squares_rvv, zve64f +func ff_ps_add_squares_rvv, zve64f, zba li t1, 32 1: vsetvli t0, a2, e32, m4, ta, ma @@ -39,7 +39,7 @@ func ff_ps_add_squares_rvv, zve64f ret endfunc -func ff_ps_mul_pair_single_rvv, zve32f +func ff_ps_mul_pair_single_rvv, zve32f, zba 1: vsetvli t0, a3, e32, m4, ta, ma vlseg2e32.v v24, (a1) @@ -134,7 +134,7 @@ NOHWD flw fs\n, (4 * \n)(sp) .purgem filter endfunc -func ff_ps_hybrid_analysis_ileave_rvv, zve32x /* no needs for zve32f here */ +func ff_ps_hybrid_analysis_ileave_rvv, zve32x /* no zve32f here */, zba slli t0, a2, 5 + 1 + 2 // ctz(32 * 2 * 4) sh2add a1, a2, a1 add a0, a0, t0 @@ -169,7 +169,7 @@ func ff_ps_hybrid_analysis_ileave_rvv, zve32x /* no needs for zve32f here */ ret endfunc -func ff_ps_hybrid_synthesis_deint_rvv, zve64x +func ff_ps_hybrid_synthesis_deint_rvv, zve64x, zba slli t0, a2, 5 + 1 + 2 sh2add a0, a2, a0 add a1, a1, t0 @@ -207,7 +207,7 @@ func ff_ps_hybrid_synthesis_deint_rvv, zve64x ret endfunc -func ff_ps_stereo_interpolate_rvv, zve32f, zbb +func ff_ps_stereo_interpolate_rvv, zve32f, b vsetvli t0, zero, e32, m2, ta, ma vid.v v24 flw ft0, (a2) diff --git a/libavcodec/riscv/ac3dsp_rvv.S b/libavcodec/riscv/ac3dsp_rvv.S index 1b5f67a9ec..c733733286 100644 --- a/libavcodec/riscv/ac3dsp_rvv.S +++ b/libavcodec/riscv/ac3dsp_rvv.S @@ -43,7 +43,7 @@ func ff_ac3_exponent_min_rvv, zve32x ret endfunc -func ff_float_to_fixed24_rvv, zve32f +func ff_float_to_fixed24_rvv, zve32f, zba li t1, 1 << 24 fcvt.s.w f0, t1 1: @@ -61,7 +61,7 @@ func ff_float_to_fixed24_rvv, zve32f endfunc #if __riscv_xlen >= 64 -func ff_sum_square_butterfly_int32_rvv, zve64x +func ff_sum_square_butterfly_int32_rvv, zve64x, zba vsetvli t0, zero, e64, m8, ta, ma vmv.v.x v0, zero vmv.v.x v8, zero @@ -101,7 +101,7 @@ func ff_sum_square_butterfly_int32_rvv, zve64x endfunc #endif -func ff_sum_square_butterfly_float_rvv, zve32f +func ff_sum_square_butterfly_float_rvv, zve32f, zba vsetvli t0, zero, e32, m8, ta, ma vmv.v.x v0, zero vmv.v.x v8, zero diff --git a/libavcodec/riscv/ac3dsp_rvvb.S b/libavcodec/riscv/ac3dsp_rvvb.S index 64766b56be..5bffb40bba 100644 --- a/libavcodec/riscv/ac3dsp_rvvb.S +++ b/libavcodec/riscv/ac3dsp_rvvb.S @@ -21,7 +21,7 @@ #include "config.h" #include "libavutil/riscv/asm.S" -func ff_extract_exponents_rvvb, zve32x, zvbb +func ff_extract_exponents_rvvb, zve32x, zvbb, zba 1: vsetvli t0, a2, e32, m8, ta, ma vle32.v v8, (a1) diff --git a/libavcodec/riscv/alacdsp_rvv.S b/libavcodec/riscv/alacdsp_rvv.S index 8efb04e0c8..19714bd6e3 100644 --- a/libavcodec/riscv/alacdsp_rvv.S +++ b/libavcodec/riscv/alacdsp_rvv.S @@ -21,7 +21,7 @@ #include "libavutil/riscv/asm.S" #if (__riscv_xlen == 64) -func ff_alac_decorrelate_stereo_rvv, zve32x +func ff_alac_decorrelate_stereo_rvv, zve32x, zba ld a4, 8(a0) ld a0, 0(a0) 1: @@ -43,7 +43,7 @@ func ff_alac_decorrelate_stereo_rvv, zve32x ret endfunc -func ff_alac_append_extra_bits_mono_rvv, zve32x +func ff_alac_append_extra_bits_mono_rvv, zve32x, zba ld a0, (a0) ld a1, (a1) 1: @@ -61,7 +61,7 @@ func ff_alac_append_extra_bits_mono_rvv, zve32x ret endfunc -func ff_alac_append_extra_bits_stereo_rvv, zve32x +func ff_alac_append_extra_bits_stereo_rvv, zve32x, zba ld a6, 8(a0) ld a0, (a0) ld a7, 8(a1) diff --git a/libavcodec/riscv/audiodsp_rvv.S b/libavcodec/riscv/audiodsp_rvv.S index f0b23bab5e..b7134de523 100644 --- a/libavcodec/riscv/audiodsp_rvv.S +++ b/libavcodec/riscv/audiodsp_rvv.S @@ -20,7 +20,7 @@ #include "libavutil/riscv/asm.S" -func ff_scalarproduct_int16_rvv, zve32x +func ff_scalarproduct_int16_rvv, zve32x, zba vsetvli t0, zero, e32, m8, ta, ma vmv.v.x v8, zero vmv.s.x v0, zero @@ -40,7 +40,7 @@ func ff_scalarproduct_int16_rvv, zve32x ret endfunc -func ff_vector_clip_int32_rvv, zve32x +func ff_vector_clip_int32_rvv, zve32x, zba 1: vsetvli t0, a4, e32, m8, ta, ma vle32.v v8, (a1) @@ -55,7 +55,7 @@ func ff_vector_clip_int32_rvv, zve32x ret endfunc -func ff_vector_clipf_rvv, zve32f +func ff_vector_clipf_rvv, zve32f, zba NOHWF fmv.w.x fa0, a3 NOHWF fmv.w.x fa1, a4 1: diff --git a/libavcodec/riscv/bswapdsp_rvb.S b/libavcodec/riscv/bswapdsp_rvb.S index 0786bd3f36..17cfd5d7ef 100644 --- a/libavcodec/riscv/bswapdsp_rvb.S +++ b/libavcodec/riscv/bswapdsp_rvb.S @@ -23,7 +23,7 @@ #include "libavutil/riscv/bswap_rvb.S" #if (__riscv_xlen >= 64) -func ff_bswap32_buf_rvb, zbb +func ff_bswap32_buf_rvb, zba, zbb bswap32_rvb a0, a1, a2 endfunc #endif diff --git a/libavcodec/riscv/bswapdsp_rvv.S b/libavcodec/riscv/bswapdsp_rvv.S index b37fe26255..14484a772d 100644 --- a/libavcodec/riscv/bswapdsp_rvv.S +++ b/libavcodec/riscv/bswapdsp_rvv.S @@ -21,7 +21,7 @@ #include "config.h" #include "libavutil/riscv/asm.S" -func ff_bswap16_buf_rvv, zve32x +func ff_bswap16_buf_rvv, zve32x, zba 1: vsetvli t0, a2, e16, m8, ta, ma vle16.v v8, (a1) diff --git a/libavcodec/riscv/exrdsp_rvv.S b/libavcodec/riscv/exrdsp_rvv.S index f4a35f58ff..c1d7dfcb86 100644 --- a/libavcodec/riscv/exrdsp_rvv.S +++ b/libavcodec/riscv/exrdsp_rvv.S @@ -20,7 +20,7 @@ #include "libavutil/riscv/asm.S" -func ff_reorder_pixels_rvv, zve32x +func ff_reorder_pixels_rvv, zve32x, zba srai a2, a2, 1 add t1, a1, a2 1: diff --git a/libavcodec/riscv/fixed_vtype.S b/libavcodec/riscv/fixed_vtype.S new file mode 100644 index 0000000000..7aac70bda8 --- /dev/null +++ b/libavcodec/riscv/fixed_vtype.S @@ -0,0 +1,48 @@ +/* + * Copyright © 2024 Rémi Denis-Courmont. + * + * This file is part of FFmpeg. + * + * FFmpeg is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * FFmpeg is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with FFmpeg; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#define VILL -0x8000 + + .data +ff_vtype_e8: +ff_vtype_e8_1: + .half VILL +ff_vtype_e8_2: + .half VILL +ff_vtype_e8_4: + .half VILL +ff_vtype_e8_8: + .half VILL +ff_vtype_e8_16: + .half VILL +ff_vtype_e8_32: + .half VILL + +ff_vtype_e16: +ff_vtype_e16_1: + .half VILL +ff_vtype_e16_2: + .half VILL +ff_vtype_e16_4: + .half VILL +ff_vtype_e16_8: + .half VILL +ff_vtype_e16_16: + .half VILL diff --git a/libavcodec/riscv/flacdsp_rvv.S b/libavcodec/riscv/flacdsp_rvv.S index 1724aee9d7..69505c694a 100644 --- a/libavcodec/riscv/flacdsp_rvv.S +++ b/libavcodec/riscv/flacdsp_rvv.S @@ -20,7 +20,7 @@ #include "libavutil/riscv/asm.S" -func ff_flac_lpc16_rvv, zve32x, zbb +func ff_flac_lpc16_rvv, zve32x, b vtype_vli t0, a2, t2, e32, ta, ma vsetvl zero, a2, t0 vle32.v v8, (a1) @@ -45,7 +45,7 @@ func ff_flac_lpc16_rvv, zve32x, zbb endfunc #if (__riscv_xlen == 64) -func ff_flac_lpc32_rvv, zve64x +func ff_flac_lpc32_rvv, zve64x, zba addi t2, a2, -16 ble t2, zero, ff_flac_lpc32_rvv_simple vsetivli zero, 1, e64, m1, ta, ma @@ -76,7 +76,7 @@ func ff_flac_lpc32_rvv, zve64x ret endfunc -func ff_flac_lpc32_rvv_simple, zve64x, zbb +func ff_flac_lpc32_rvv_simple, zve64x, b vtype_vli t3, a2, t1, e64, ta, ma vntypei t2, t3 vsetvl zero, a2, t3 // e64 @@ -104,7 +104,7 @@ func ff_flac_lpc32_rvv_simple, zve64x, zbb ret endfunc -func ff_flac_lpc33_rvv, zve64x, zbb +func ff_flac_lpc33_rvv, zve64x, b vtype_vli t0, a3, t1, e64, ta, ma vsetvl zero, a3, t0 vmv.s.x v0, zero @@ -132,7 +132,7 @@ func ff_flac_lpc33_rvv, zve64x, zbb endfunc #endif -func ff_flac_wasted32_rvv, zve32x +func ff_flac_wasted32_rvv, zve32x, zba 1: vsetvli t0, a2, e32, m8, ta, ma vle32.v v8, (a0) @@ -145,7 +145,7 @@ func ff_flac_wasted32_rvv, zve32x ret endfunc -func ff_flac_wasted33_rvv, zve64x +func ff_flac_wasted33_rvv, zve64x, zba srli t0, a2, 5 li t1, 1 bnez t0, 2f @@ -177,7 +177,7 @@ func ff_flac_wasted33_rvv, zve64x endfunc #if (__riscv_xlen == 64) -func ff_flac_decorrelate_indep2_16_rvv, zve32x +func ff_flac_decorrelate_indep2_16_rvv, zve32x, zba ld a0, (a0) ld a2, 8(a1) ld a1, (a1) @@ -200,7 +200,7 @@ func ff_flac_decorrelate_indep2_16_rvv, zve32x ret endfunc -func ff_flac_decorrelate_indep4_16_rvv, zve32x +func ff_flac_decorrelate_indep4_16_rvv, zve32x, zba ld a0, (a0) ld a2, 8(a1) ld t1, 16(a1) @@ -233,7 +233,7 @@ func ff_flac_decorrelate_indep4_16_rvv, zve32x ret endfunc -func ff_flac_decorrelate_indep6_16_rvv, zve32x +func ff_flac_decorrelate_indep6_16_rvv, zve32x, zba ld a0, (a0) ld a2, 8(a1) ld t1, 16(a1) @@ -278,7 +278,7 @@ func ff_flac_decorrelate_indep6_16_rvv, zve32x ret endfunc -func ff_flac_decorrelate_indep8_16_rvv, zve32x +func ff_flac_decorrelate_indep8_16_rvv, zve32x, zba ld a0, (a0) ld a2, 8(a1) ld t1, 16(a1) @@ -332,9 +332,7 @@ func ff_flac_decorrelate_indep8_16_rvv, zve32x ret endfunc - - -func ff_flac_decorrelate_ls_16_rvv, zve32x +func ff_flac_decorrelate_ls_16_rvv, zve32x, zba ld a0, (a0) ld a2, 8(a1) ld a1, (a1) @@ -358,7 +356,7 @@ func ff_flac_decorrelate_ls_16_rvv, zve32x ret endfunc -func ff_flac_decorrelate_rs_16_rvv, zve32x +func ff_flac_decorrelate_rs_16_rvv, zve32x, zba ld a0, (a0) ld a2, 8(a1) ld a1, (a1) @@ -382,7 +380,7 @@ func ff_flac_decorrelate_rs_16_rvv, zve32x ret endfunc -func ff_flac_decorrelate_ms_16_rvv, zve32x +func ff_flac_decorrelate_ms_16_rvv, zve32x, zba ld a0, (a0) ld a2, 8(a1) ld a1, (a1) @@ -408,7 +406,7 @@ func ff_flac_decorrelate_ms_16_rvv, zve32x ret endfunc -func ff_flac_decorrelate_indep2_32_rvv, zve32x +func ff_flac_decorrelate_indep2_32_rvv, zve32x, zba ld a0, (a0) ld a2, 8(a1) ld a1, (a1) @@ -428,7 +426,7 @@ func ff_flac_decorrelate_indep2_32_rvv, zve32x ret endfunc -func ff_flac_decorrelate_indep4_32_rvv, zve32x +func ff_flac_decorrelate_indep4_32_rvv, zve32x, zba ld a0, (a0) ld a2, 8(a1) ld t1, 16(a1) @@ -457,7 +455,7 @@ func ff_flac_decorrelate_indep4_32_rvv, zve32x ret endfunc -func ff_flac_decorrelate_indep6_32_rvv, zve32x +func ff_flac_decorrelate_indep6_32_rvv, zve32x, zba ld a0, (a0) ld a2, 8(a1) ld t1, 16(a1) @@ -495,7 +493,7 @@ func ff_flac_decorrelate_indep6_32_rvv, zve32x ret endfunc -func ff_flac_decorrelate_indep8_32_rvv, zve32x +func ff_flac_decorrelate_indep8_32_rvv, zve32x, zba ld a0, (a0) ld a2, 8(a1) ld t1, 16(a1) @@ -540,7 +538,7 @@ func ff_flac_decorrelate_indep8_32_rvv, zve32x ret endfunc -func ff_flac_decorrelate_ls_32_rvv, zve32x +func ff_flac_decorrelate_ls_32_rvv, zve32x, zba ld a0, (a0) ld a2, 8(a1) ld a1, (a1) @@ -561,7 +559,7 @@ func ff_flac_decorrelate_ls_32_rvv, zve32x ret endfunc -func ff_flac_decorrelate_rs_32_rvv, zve32x +func ff_flac_decorrelate_rs_32_rvv, zve32x, zba ld a0, (a0) ld a2, 8(a1) ld a1, (a1) @@ -582,7 +580,7 @@ func ff_flac_decorrelate_rs_32_rvv, zve32x ret endfunc -func ff_flac_decorrelate_ms_32_rvv, zve32x +func ff_flac_decorrelate_ms_32_rvv, zve32x, zba ld a0, (a0) ld a2, 8(a1) ld a1, (a1) diff --git a/libavcodec/riscv/fmtconvert_rvv.S b/libavcodec/riscv/fmtconvert_rvv.S index d0e2f106d5..05cd3b38a5 100644 --- a/libavcodec/riscv/fmtconvert_rvv.S +++ b/libavcodec/riscv/fmtconvert_rvv.S @@ -20,7 +20,7 @@ #include "libavutil/riscv/asm.S" -func ff_int32_to_float_fmul_scalar_rvv, zve32f +func ff_int32_to_float_fmul_scalar_rvv, zve32f, zba NOHWF fmv.w.x fa0, a2 NOHWF mv a2, a3 1: @@ -37,7 +37,7 @@ NOHWF mv a2, a3 ret endfunc -func ff_int32_to_float_fmul_array8_rvv, zve32f +func ff_int32_to_float_fmul_array8_rvv, zve32f, zba srai a4, a4, 3 1: vsetvli t0, a4, e32, m1, ta, ma diff --git a/libavcodec/riscv/h264_mc_chroma.S b/libavcodec/riscv/h264_mc_chroma.S index ce99bda44d..b6c0e1c635 100644 --- a/libavcodec/riscv/h264_mc_chroma.S +++ b/libavcodec/riscv/h264_mc_chroma.S @@ -325,7 +325,7 @@ ret .endm -func h264_put_chroma_mc_rvv, zve32x +func h264_put_chroma_mc_rvv, zve32x, zba 11: li a7, 3 blt a3, a7, 12f @@ -334,7 +334,7 @@ func h264_put_chroma_mc_rvv, zve32x do_chroma_mc put 0 endfunc -func h264_avg_chroma_mc_rvv, zve32x +func h264_avg_chroma_mc_rvv, zve32x, zba 21: li a7, 3 blt a3, a7, 22f diff --git a/libavcodec/riscv/h264idct_rvv.S b/libavcodec/riscv/h264idct_rvv.S index c74ea18c19..514c849bce 100644 --- a/libavcodec/riscv/h264idct_rvv.S +++ b/libavcodec/riscv/h264idct_rvv.S @@ -298,7 +298,7 @@ func ff_h264_idct8_add_8_rvv, zve32x ret endfunc -func ff_h264_idct8_add_16_rvv, zve32x +func ff_h264_idct8_add_16_rvv, zve32x, zba csrwi vxrm, 0 .Lidct8_add_16_rvv: li a4, 8 diff --git a/libavcodec/riscv/huffyuvdsp_rvv.S b/libavcodec/riscv/huffyuvdsp_rvv.S index d334f5c6d0..54d1d94059 100644 --- a/libavcodec/riscv/huffyuvdsp_rvv.S +++ b/libavcodec/riscv/huffyuvdsp_rvv.S @@ -20,7 +20,7 @@ #include "libavutil/riscv/asm.S" -func ff_add_int16_rvv, zve32x +func ff_add_int16_rvv, zve32x, zba 1: vsetvli t0, a3, e16, m8, ta, ma vle16.v v16, (a0) @@ -36,7 +36,7 @@ func ff_add_int16_rvv, zve32x ret endfunc -func ff_add_hfyu_left_pred_bgr32_rvv, zve32x, zbb +func ff_add_hfyu_left_pred_bgr32_rvv, zve32x, b vtype_ivli t1, 4, e8, ta, ma li t0, 4 vsetvl zero, t0, t1 diff --git a/libavcodec/riscv/jpeg2000dsp_rvv.S b/libavcodec/riscv/jpeg2000dsp_rvv.S index 10efe6b0db..77c6fd2d32 100644 --- a/libavcodec/riscv/jpeg2000dsp_rvv.S +++ b/libavcodec/riscv/jpeg2000dsp_rvv.S @@ -20,7 +20,7 @@ #include "libavutil/riscv/asm.S" -func ff_ict_float_rvv, zve32f +func ff_ict_float_rvv, zve32f, zba lla t0, ff_jpeg2000_f_ict_params flw ft0, 0(t0) flw ft1, 4(t0) @@ -48,7 +48,7 @@ func ff_ict_float_rvv, zve32f ret endfunc -func ff_rct_int_rvv, zve32x +func ff_rct_int_rvv, zve32x, zba 1: vsetvli t0, a3, e32, m8, ta, ma vle32.v v16, (a1) diff --git a/libavcodec/riscv/llauddsp_rvv.S b/libavcodec/riscv/llauddsp_rvv.S index 5569864832..6af2e6a882 100644 --- a/libavcodec/riscv/llauddsp_rvv.S +++ b/libavcodec/riscv/llauddsp_rvv.S @@ -20,7 +20,7 @@ #include "libavutil/riscv/asm.S" -func ff_scalarproduct_and_madd_int16_rvv, zve32x +func ff_scalarproduct_and_madd_int16_rvv, zve32x, zba vsetvli t0, zero, e32, m8, ta, ma vmv.v.x v0, zero 1: @@ -44,7 +44,7 @@ func ff_scalarproduct_and_madd_int16_rvv, zve32x ret endfunc -func ff_scalarproduct_and_madd_int32_rvv, zve32x +func ff_scalarproduct_and_madd_int32_rvv, zve32x, zba vsetvli t0, zero, e32, m8, ta, ma vmv.v.x v0, zero 1: diff --git a/libavcodec/riscv/lpc_rvv.S b/libavcodec/riscv/lpc_rvv.S index fe80305d9a..8fd1ddbbf0 100644 --- a/libavcodec/riscv/lpc_rvv.S +++ b/libavcodec/riscv/lpc_rvv.S @@ -21,7 +21,7 @@ #include "libavutil/riscv/asm.S" #if __riscv_xlen >= 64 -func ff_lpc_apply_welch_window_rvv, zve64d +func ff_lpc_apply_welch_window_rvv, zve64d, zba vsetvli t0, zero, e64, m8, ta, ma vid.v v0 addi t2, a1, -1 @@ -86,7 +86,7 @@ func ff_lpc_apply_welch_window_rvv, zve64d ret endfunc -func ff_lpc_compute_autocorr_rvv, zve64d, zbb +func ff_lpc_compute_autocorr_rvv, zve64d, b vtype_vli t1, a2, t2, e64, ta, ma, 1 addi a2, a2, 1 li t0, 1 diff --git a/libavcodec/riscv/opusdsp_rvv.S b/libavcodec/riscv/opusdsp_rvv.S index 42d845a370..389dc744f5 100644 --- a/libavcodec/riscv/opusdsp_rvv.S +++ b/libavcodec/riscv/opusdsp_rvv.S @@ -20,7 +20,7 @@ #include "libavutil/riscv/asm.S" -func ff_opus_postfilter_rvv, zve32f, zbb +func ff_opus_postfilter_rvv, zve32f, b flw fa0, 0(a2) // g0 slli t1, a1, 2 flw fa1, 4(a2) // g1 diff --git a/libavcodec/riscv/rv40dsp_rvv.S b/libavcodec/riscv/rv40dsp_rvv.S index e49345ef70..53d3d1d6f9 100644 --- a/libavcodec/riscv/rv40dsp_rvv.S +++ b/libavcodec/riscv/rv40dsp_rvv.S @@ -332,7 +332,7 @@ ret .endm -func ff_put_rv40_chroma_mc_rvv, zve32x +func ff_put_rv40_chroma_mc_rvv, zve32x, zba 11: li a7, 3 blt a3, a7, 12f @@ -341,7 +341,7 @@ func ff_put_rv40_chroma_mc_rvv, zve32x do_chroma_mc put 0 endfunc -func ff_avg_rv40_chroma_mc_rvv, zve32x +func ff_avg_rv40_chroma_mc_rvv, zve32x, zba 21: li a7, 3 blt a3, a7, 22f diff --git a/libavcodec/riscv/sbrdsp_rvv.S b/libavcodec/riscv/sbrdsp_rvv.S index 331b88022c..7c90a8addf 100644 --- a/libavcodec/riscv/sbrdsp_rvv.S +++ b/libavcodec/riscv/sbrdsp_rvv.S @@ -20,7 +20,7 @@ #include "libavutil/riscv/asm.S" -func ff_sbr_sum64x5_rvv, zve32f +func ff_sbr_sum64x5_rvv, zve32f, zba li a5, 64 addi a1, a0, 64 * 4 addi a2, a0, 128 * 4 @@ -49,7 +49,7 @@ func ff_sbr_sum64x5_rvv, zve32f ret endfunc -func ff_sbr_sum_square_rvv, zve32f +func ff_sbr_sum_square_rvv, zve32f, zba vsetvli t0, zero, e32, m8, ta, ma slli a1, a1, 1 vmv.v.x v8, zero @@ -157,7 +157,7 @@ func ff_sbr_autocorrelate_rvv, zve32f ret endfunc -func ff_sbr_hf_gen_rvv, zve32f +func ff_sbr_hf_gen_rvv, zve32f, zba NOHWF fmv.w.x fa0, a4 NOHWF mv a4, a5 NOHWF mv a5, a6 @@ -207,7 +207,7 @@ NOHWF mv a5, a6 ret endfunc -func ff_sbr_hf_g_filt_rvv, zve32f +func ff_sbr_hf_g_filt_rvv, zve32f, zba li t1, 40 * 2 * 4 sh3add a1, a4, a1 1: @@ -272,16 +272,16 @@ endfunc ret .endm -func ff_sbr_hf_apply_noise_0_rvv, zve32f, zbb +func ff_sbr_hf_apply_noise_0_rvv, zve32f, b hf_apply_noise 0 endfunc -func ff_sbr_hf_apply_noise_3_rvv, zve32f, zbb +func ff_sbr_hf_apply_noise_3_rvv, zve32f, b not a4, a4 // invert parity of kx // fall through endfunc -func ff_sbr_hf_apply_noise_1_rvv, zve32f, zbb +func ff_sbr_hf_apply_noise_1_rvv, zve32f, b vsetvli t0, zero, e32, m4, ta, ma vid.v v4 vxor.vx v4, v4, a4 @@ -289,6 +289,6 @@ func ff_sbr_hf_apply_noise_1_rvv, zve32f, zbb hf_apply_noise 1 endfunc -func ff_sbr_hf_apply_noise_2_rvv, zve32f, zbb +func ff_sbr_hf_apply_noise_2_rvv, zve32f, b hf_apply_noise 2 endfunc diff --git a/libavcodec/riscv/svqenc_rvv.S b/libavcodec/riscv/svqenc_rvv.S index cfc27154dd..8b7a8b0400 100644 --- a/libavcodec/riscv/svqenc_rvv.S +++ b/libavcodec/riscv/svqenc_rvv.S @@ -20,7 +20,7 @@ #include "libavutil/riscv/asm.S" -func ff_ssd_int8_vs_int16_rvv, zve32x +func ff_ssd_int8_vs_int16_rvv, zve32x, zba vsetvli t0, zero, e32, m8, ta, ma vmv.v.x v24, zero 1: diff --git a/libavcodec/riscv/takdsp_rvv.S b/libavcodec/riscv/takdsp_rvv.S index fa942a3be6..f3a230ccec 100644 --- a/libavcodec/riscv/takdsp_rvv.S +++ b/libavcodec/riscv/takdsp_rvv.S @@ -21,7 +21,7 @@ #include "libavutil/riscv/asm.S" -func ff_decorrelate_ls_rvv, zve32x +func ff_decorrelate_ls_rvv, zve32x, zba 1: vsetvli t0, a2, e32, m8, ta, ma sub a2, a2, t0 @@ -35,7 +35,7 @@ func ff_decorrelate_ls_rvv, zve32x ret endfunc -func ff_decorrelate_sr_rvv, zve32x +func ff_decorrelate_sr_rvv, zve32x, zba 1: vsetvli t0, a2, e32, m8, ta, ma vle32.v v0, (a0) @@ -49,7 +49,7 @@ func ff_decorrelate_sr_rvv, zve32x ret endfunc -func ff_decorrelate_sm_rvv, zve32x +func ff_decorrelate_sm_rvv, zve32x, zba 1: vsetvli t0, a2, e32, m8, ta, ma vle32.v v8, (a1) @@ -67,7 +67,7 @@ func ff_decorrelate_sm_rvv, zve32x ret endfunc -func ff_decorrelate_sf_rvv, zve32x +func ff_decorrelate_sf_rvv, zve32x, zba csrwi vxrm, 0 1: vsetvli t0, a2, e32, m8, ta, ma diff --git a/libavcodec/riscv/utvideodsp_rvv.S b/libavcodec/riscv/utvideodsp_rvv.S index fa70d0eb34..5e833eeb3c 100644 --- a/libavcodec/riscv/utvideodsp_rvv.S +++ b/libavcodec/riscv/utvideodsp_rvv.S @@ -20,7 +20,7 @@ #include "libavutil/riscv/asm.S" -func ff_restore_rgb_planes_rvv, zve32x +func ff_restore_rgb_planes_rvv, zve32x, zba li t1, -0x80 sub a3, a3, a6 sub a4, a4, a6 @@ -52,7 +52,7 @@ func ff_restore_rgb_planes_rvv, zve32x ret endfunc -func ff_restore_rgb_planes10_rvv, zve32x +func ff_restore_rgb_planes10_rvv, zve32x, zba li t1, -0x200 li t2, 0x3FF sub a3, a3, a6 diff --git a/libavcodec/riscv/vc1dsp_rvv.S b/libavcodec/riscv/vc1dsp_rvv.S index d8b62579aa..aede87ccc0 100644 --- a/libavcodec/riscv/vc1dsp_rvv.S +++ b/libavcodec/riscv/vc1dsp_rvv.S @@ -21,7 +21,7 @@ #include "libavutil/riscv/asm.S" -func ff_vc1_inv_trans_8x8_dc_rvv, zve64x +func ff_vc1_inv_trans_8x8_dc_rvv, zve64x, zba lh t2, (a2) vsetivli zero, 8, e8, mf2, ta, ma vlse64.v v0, (a0), a1 @@ -43,7 +43,7 @@ func ff_vc1_inv_trans_8x8_dc_rvv, zve64x ret endfunc -func ff_vc1_inv_trans_4x8_dc_rvv, zve32x +func ff_vc1_inv_trans_4x8_dc_rvv, zve32x, zba lh t2, (a2) vsetivli zero, 8, e8, mf2, ta, ma vlse32.v v0, (a0), a1 @@ -67,7 +67,7 @@ func ff_vc1_inv_trans_4x8_dc_rvv, zve32x ret endfunc -func ff_vc1_inv_trans_8x4_dc_rvv, zve64x +func ff_vc1_inv_trans_8x4_dc_rvv, zve64x, zba lh t2, (a2) vsetivli zero, 4, e8, mf4, ta, ma vlse64.v v0, (a0), a1 diff --git a/libavcodec/riscv/vorbisdsp_rvv.S b/libavcodec/riscv/vorbisdsp_rvv.S index 81a6c62a65..d136188d2e 100644 --- a/libavcodec/riscv/vorbisdsp_rvv.S +++ b/libavcodec/riscv/vorbisdsp_rvv.S @@ -20,7 +20,7 @@ #include "libavutil/riscv/asm.S" -func ff_vorbis_inverse_coupling_rvv, zve32f +func ff_vorbis_inverse_coupling_rvv, zve32f, zba fmv.w.x ft0, zero 1: vsetvli t0, a2, e32, m4, ta, ma diff --git a/libavcodec/riscv/vp7dsp_rvv.S b/libavcodec/riscv/vp7dsp_rvv.S index 856b0e8c96..bfcc220273 100644 --- a/libavcodec/riscv/vp7dsp_rvv.S +++ b/libavcodec/riscv/vp7dsp_rvv.S @@ -21,7 +21,7 @@ #include "libavutil/riscv/asm.S" #if __riscv_xlen >= 64 -func ff_vp7_luma_dc_wht_rvv, zve32x +func ff_vp7_luma_dc_wht_rvv, zve32x, zba li a2, 4 * 16 * 2 li a7, 16 * 2 jal t0, 1f diff --git a/libavcodec/riscv/vp8dsp_rvv.S b/libavcodec/riscv/vp8dsp_rvv.S index d366748a0a..4675f4c76b 100644 --- a/libavcodec/riscv/vp8dsp_rvv.S +++ b/libavcodec/riscv/vp8dsp_rvv.S @@ -440,7 +440,7 @@ endconst .endm .macro epel len size type -func ff_put_vp8_epel\len\()_\type\()\size\()_rvv, zve32x +func ff_put_vp8_epel\len\()_\type\()\size\()_rvv, zve32x, zba epel_filter \size \type t vsetvlstatic8 \len 1: @@ -455,7 +455,7 @@ endfunc .endm .macro epel_hv len hsize vsize -func ff_put_vp8_epel\len\()_h\hsize\()v\vsize\()_rvv, zve32x +func ff_put_vp8_epel\len\()_h\hsize\()v\vsize\()_rvv, zve32x, zba #if __riscv_xlen == 64 addi sp, sp, -48 .irp n,0,1,2,3,4,5 diff --git a/libavcodec/riscv/vp9_intra_rvi.S b/libavcodec/riscv/vp9_intra_rvi.S index 16b6bdb25a..dadd4be194 100644 --- a/libavcodec/riscv/vp9_intra_rvi.S +++ b/libavcodec/riscv/vp9_intra_rvi.S @@ -21,7 +21,7 @@ #include "libavutil/riscv/asm.S" #if __riscv_xlen >= 64 -func ff_v_32x32_rvi +func ff_v_32x32_rvi, zba ld t0, (a3) ld t1, 8(a3) ld t2, 16(a3) @@ -42,7 +42,7 @@ func ff_v_32x32_rvi ret endfunc -func ff_v_16x16_rvi +func ff_v_16x16_rvi, zba ld t0, (a3) ld t1, 8(a3) .rept 8 @@ -57,7 +57,7 @@ func ff_v_16x16_rvi ret endfunc -func ff_v_8x8_rvi +func ff_v_8x8_rvi, zba ld t0, (a3) .rept 4 add a7, a0, a1 From patchwork Mon Jul 22 18:44:23 2024 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: =?utf-8?q?R=C3=A9mi_Denis-Courmont?= X-Patchwork-Id: 50687 Delivered-To: ffmpegpatchwork2@gmail.com Received: by 2002:a59:a742:0:b0:482:c625:d099 with SMTP id f2csp2200867vqm; Mon, 22 Jul 2024 11:51:54 -0700 (PDT) X-Forwarded-Encrypted: i=2; AJvYcCU1uI/4FT7Akx8y1rc2RXJQ8TY8KJ/UZ7lV8vcSLBLx5f62NlBChwK9UeypxkWq4O0JPcqpQ2TEZQhvNsiF0/Lg5T/ZIt9qM9SbzA== X-Google-Smtp-Source: AGHT+IEL16ai037en/FZTjRtpeeDRpsBRTj5c55ixT++QviVxv0Sfs2yhdvDknXMF7INDmvGfJTx X-Received: by 2002:a50:d703:0:b0:5a1:6c50:a3d with SMTP id 4fb4d7f45d1cf-5a9425dcb32mr556419a12.20.1721674314080; Mon, 22 Jul 2024 11:51:54 -0700 (PDT) ARC-Seal: i=1; a=rsa-sha256; t=1721674314; cv=none; d=google.com; s=arc-20160816; b=f6/yg2z/j7ooEflOMBmW9VGKolDx7NTYtokfRAC6U5hSuyJw0LsiKjcc8QoY+LE884 QpkhK2JDDAJxbfRi7DXXXbAiI4rqYpg2AZDF8g/dy6+dZ0BsPIbsRBTP7qHXSh3C2dT1 s42Y8LRZTlKt8zdpcQOT89Bi2VK92GTR+9glBH2p++BWpkPV6RKqkw3JNmF0LRJyi4Ek m7gk5FSMG6ZhH5htaZ12jiyXQ4uvfqvUKarHgNEo7Sf+5GtD9YFxnRHGbRmfakq3cQBI yFj2JImYkVUPrJ0kstBthU1hoVwjHj888ofE0TgcGsLPaLBf0mKEhs3MlFjjM2US2LSw d1qg== ARC-Message-Signature: i=1; a=rsa-sha256; c=relaxed/relaxed; d=google.com; s=arc-20160816; h=sender:errors-to:content-transfer-encoding:reply-to:list-subscribe :list-help:list-post:list-archive:list-unsubscribe:list-id :precedence:subject:mime-version:references:in-reply-to:message-id :date:to:from:delivered-to; bh=fLYPfQRqiifeFM7/u0Bet/CZYg2YrottPm61SceGTVI=; fh=YOA8vD9MJZuwZ71F/05pj6KdCjf6jQRmzLS+CATXUQk=; b=PNtGzTq9IVGaANNyWE9pI9XUETEpZOpfnc6lu2wwPMd3oCwn3YBhA5kYm6EPP6qCHf cLwJoAEHrMvQM/uhMarIHz+NY36sy/SG9odmNB9wmim8XqGR2QA5JlCxvmKh6iaYVgAS JXDcTycYosVbyYiMtBHiNDFzAzNryUVLjPC+fUMi7S9039HLk0LiHIrVgA2RjuWi88WF sXf9TQjqoI8FBblsJPpzn3i9bl7VO7z2f6BmPKqIBvPt6jiQkvbtM5c+5s6VZu17eLlo 9Hif/FmcUJvfUl14sygPPLbzYh9UpsHM2ssZT0JtN4WghbTexgETPmwEB3DN+WGgJkzr nDPw==; dara=google.com ARC-Authentication-Results: i=1; mx.google.com; spf=pass (google.com: domain of ffmpeg-devel-bounces@ffmpeg.org designates 79.124.17.100 as permitted sender) smtp.mailfrom=ffmpeg-devel-bounces@ffmpeg.org Return-Path: Received: from ffbox0-bg.mplayerhq.hu (ffbox0-bg.ffmpeg.org. [79.124.17.100]) by mx.google.com with ESMTP id 4fb4d7f45d1cf-5a309fffca5si4889536a12.127.2024.07.22.11.51.53; Mon, 22 Jul 2024 11:51:54 -0700 (PDT) Received-SPF: pass (google.com: domain of ffmpeg-devel-bounces@ffmpeg.org designates 79.124.17.100 as permitted sender) client-ip=79.124.17.100; Authentication-Results: mx.google.com; spf=pass (google.com: domain of ffmpeg-devel-bounces@ffmpeg.org designates 79.124.17.100 as permitted sender) smtp.mailfrom=ffmpeg-devel-bounces@ffmpeg.org Received: from [127.0.1.1] (localhost [127.0.0.1]) by ffbox0-bg.mplayerhq.hu (Postfix) with ESMTP id 0BF2E68D3C9; Mon, 22 Jul 2024 21:44:42 +0300 (EEST) X-Original-To: ffmpeg-devel@ffmpeg.org Delivered-To: ffmpeg-devel@ffmpeg.org Received: from ursule.remlab.net (vps-a2bccee9.vps.ovh.net [51.75.19.47]) by ffbox0-bg.mplayerhq.hu (Postfix) with ESMTP id A480E68D553 for ; Mon, 22 Jul 2024 21:44:32 +0300 (EEST) Received: from basile.remlab.net (localhost [IPv6:::1]) by ursule.remlab.net (Postfix) with ESMTP id 57DFBC0233 for ; Mon, 22 Jul 2024 21:44:32 +0300 (EEST) From: =?utf-8?q?R=C3=A9mi_Denis-Courmont?= To: ffmpeg-devel@ffmpeg.org Date: Mon, 22 Jul 2024 21:44:23 +0300 Message-ID: <20240722184431.40853-4-remi@remlab.net> X-Mailer: git-send-email 2.45.2 In-Reply-To: <20240722184431.40853-1-remi@remlab.net> References: <20240722184431.40853-1-remi@remlab.net> MIME-Version: 1.0 Subject: [FFmpeg-devel] [PATCH 4/9] lavfi/riscv: require B or zba explicitly X-BeenThere: ffmpeg-devel@ffmpeg.org X-Mailman-Version: 2.1.29 Precedence: list List-Id: FFmpeg development discussions and patches List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , Reply-To: FFmpeg development discussions and patches Errors-To: ffmpeg-devel-bounces@ffmpeg.org Sender: "ffmpeg-devel" X-TUID: XnLEBtz/+8KV --- libavfilter/riscv/af_afir_rvv.S | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/libavfilter/riscv/af_afir_rvv.S b/libavfilter/riscv/af_afir_rvv.S index 04ec2e50d8..2107d97166 100644 --- a/libavfilter/riscv/af_afir_rvv.S +++ b/libavfilter/riscv/af_afir_rvv.S @@ -21,7 +21,7 @@ #include "libavutil/riscv/asm.S" // void ff_fcmul_add(float *sum, const float *t, const float *c, int len) -func ff_fcmul_add_rvv, zve64f +func ff_fcmul_add_rvv, zve64f, zba li t1, 32 1: vsetvli t0, a3, e32, m4, ta, ma From patchwork Mon Jul 22 18:44:24 2024 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: =?utf-8?q?R=C3=A9mi_Denis-Courmont?= X-Patchwork-Id: 50684 Delivered-To: ffmpegpatchwork2@gmail.com Received: by 2002:a59:a742:0:b0:482:c625:d099 with SMTP id f2csp2198194vqm; Mon, 22 Jul 2024 11:45:18 -0700 (PDT) X-Forwarded-Encrypted: i=2; AJvYcCUBoGxAAMRu++SjiQE50yHBOP9Xm/OpmjuwhNI+Eio/sFjDBQg1k9vSl2nIGVAf/PsXh1IKrK7AOR/jjH2aaqFD00b6dXoS04OEFw== X-Google-Smtp-Source: AGHT+IEqyA0ye4eVO6m05f7Ek9eXQCoHZr+gXGZWyHW+J+QHenIg+tXZdRYvIJw3He3lWolD4p+d X-Received: by 2002:a05:6512:3da4:b0:52e:7f87:4e66 with SMTP id 2adb3069b0e04-52fc407504cmr530092e87.49.1721673918575; Mon, 22 Jul 2024 11:45:18 -0700 (PDT) ARC-Seal: i=1; a=rsa-sha256; t=1721673918; cv=none; d=google.com; s=arc-20160816; b=BljOuWUfFgbEtktPlSjN9Okp/YaZbezJfYr5DOYkOeRp8z9ntM8eZBuDwFDMwgNVS7 YbIzY9oKdMl6XWfXTsJN2gmzWi8TR8PlLlVVBpdeb795Z1hg71J07bQOcY3HfoURpJ4A iaachj3LtV5e4GNWDHQHJ/TNez40wdMwyW8KtsH6XEBAZ3vLy6iZiH9AJMBskJa/QGF9 1IQFkSYpKgOBiTN5JR/Sb0uNsTJzZFhrOJ2h5zpwXX7pHgaEZmAyZd78PKz2V4P4EOFm jO+hLIIAWHwYuYF7KFiowp02B6KmUOFySUU9aX8JMBAzhimxeGKXeNDBKx/PWTiDyroF 3wSg== ARC-Message-Signature: i=1; a=rsa-sha256; c=relaxed/relaxed; d=google.com; s=arc-20160816; h=sender:errors-to:content-transfer-encoding:reply-to:list-subscribe :list-help:list-post:list-archive:list-unsubscribe:list-id :precedence:subject:mime-version:references:in-reply-to:message-id :date:to:from:delivered-to; bh=JGXt1bzakZlwhKFbH3LIhEEunJ2sQNch7VKkJVvb51Q=; fh=YOA8vD9MJZuwZ71F/05pj6KdCjf6jQRmzLS+CATXUQk=; b=jIGO8z/Ia6e/0147lpS9U1kJP0xjtplgA2P5ePRKqJQ69RYVxX7+bJo+7mJ6udOxeW QAJMmO5UiA6zTojRa1yY0NsZnogNjV0Stvd/UIktO91YbtN0OT8wga5Kj2SWVJXrrPRf CIkXZfz5qBo+AQ0Rky6N+3ZHhx4edNnsq65uiRnJtMWV9WyigneZoQNl/yMa5gg/KgYw fRFUZ9oadm/KWlOQwqz4wa8V2YVm6a18tVGLrEbPnUHQQRCzzk29z8pTXoCoK1hAt6pI vppbg8zi8zH2MVjAocs3WLdssxc70SdmyK3LhtZ0vmKTItoCZn97BYYT3bP0Iv0EJ3ps jDUw==; dara=google.com ARC-Authentication-Results: i=1; mx.google.com; spf=pass (google.com: domain of ffmpeg-devel-bounces@ffmpeg.org designates 79.124.17.100 as permitted sender) smtp.mailfrom=ffmpeg-devel-bounces@ffmpeg.org Return-Path: Received: from ffbox0-bg.mplayerhq.hu (ffbox0-bg.ffmpeg.org. [79.124.17.100]) by mx.google.com with ESMTP id 2adb3069b0e04-52ef55428c6si2184794e87.354.2024.07.22.11.45.18; Mon, 22 Jul 2024 11:45:18 -0700 (PDT) Received-SPF: pass (google.com: domain of ffmpeg-devel-bounces@ffmpeg.org designates 79.124.17.100 as permitted sender) client-ip=79.124.17.100; Authentication-Results: mx.google.com; spf=pass (google.com: domain of ffmpeg-devel-bounces@ffmpeg.org designates 79.124.17.100 as permitted sender) smtp.mailfrom=ffmpeg-devel-bounces@ffmpeg.org Received: from [127.0.1.1] (localhost [127.0.0.1]) by ffbox0-bg.mplayerhq.hu (Postfix) with ESMTP id 07B9D68D6D2; Mon, 22 Jul 2024 21:44:43 +0300 (EEST) X-Original-To: ffmpeg-devel@ffmpeg.org Delivered-To: ffmpeg-devel@ffmpeg.org Received: from ursule.remlab.net (vps-a2bccee9.vps.ovh.net [51.75.19.47]) by ffbox0-bg.mplayerhq.hu (Postfix) with ESMTP id E872968D553 for ; Mon, 22 Jul 2024 21:44:32 +0300 (EEST) Received: from basile.remlab.net (localhost [IPv6:::1]) by ursule.remlab.net (Postfix) with ESMTP id 8697BC0236 for ; Mon, 22 Jul 2024 21:44:32 +0300 (EEST) From: =?utf-8?q?R=C3=A9mi_Denis-Courmont?= To: ffmpeg-devel@ffmpeg.org Date: Mon, 22 Jul 2024 21:44:24 +0300 Message-ID: <20240722184431.40853-5-remi@remlab.net> X-Mailer: git-send-email 2.45.2 In-Reply-To: <20240722184431.40853-1-remi@remlab.net> References: <20240722184431.40853-1-remi@remlab.net> MIME-Version: 1.0 Subject: [FFmpeg-devel] [PATCH 5/9] sws/riscv: require B or zba explicitly X-BeenThere: ffmpeg-devel@ffmpeg.org X-Mailman-Version: 2.1.29 Precedence: list List-Id: FFmpeg development discussions and patches List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , Reply-To: FFmpeg development discussions and patches Errors-To: ffmpeg-devel-bounces@ffmpeg.org Sender: "ffmpeg-devel" X-TUID: 0MACfIXR9tNH --- libswscale/riscv/input_rvv.S | 12 ++++++------ libswscale/riscv/range_rvv.S | 8 ++++---- libswscale/riscv/rgb2rgb_rvb.S | 2 +- libswscale/riscv/rgb2rgb_rvv.S | 12 ++++++------ 4 files changed, 17 insertions(+), 17 deletions(-) diff --git a/libswscale/riscv/input_rvv.S b/libswscale/riscv/input_rvv.S index 1d7de59c66..d07db43b55 100644 --- a/libswscale/riscv/input_rvv.S +++ b/libswscale/riscv/input_rvv.S @@ -26,7 +26,7 @@ func ff_bgr24ToY_rvv, zve32x j 1f endfunc -func ff_rgb24ToY_rvv, zve32x +func ff_rgb24ToY_rvv, zve32x, zba lw t1, 0(a5) # RY lw t3, 8(a5) # BY 1: @@ -62,7 +62,7 @@ func ff_bgr24ToUV_rvv, zve32x j 1f endfunc -func ff_rgb24ToUV_rvv, zve32x +func ff_rgb24ToUV_rvv, zve32x, zba lw t1, 12(a6) # RU lw t4, 24(a6) # RV lw t3, 20(a6) # BU @@ -108,7 +108,7 @@ func ff_bgr24ToUV_half_rvv, zve32x j 1f endfunc -func ff_rgb24ToUV_half_rvv, zve32x +func ff_rgb24ToUV_half_rvv, zve32x, zba lw t1, 12(a6) # RU lw t4, 24(a6) # RV lw t3, 20(a6) # BU @@ -157,7 +157,7 @@ func ff_\chr1\()ToY_rvv, zve32x j 1f endfunc -func ff_\chr0\()ToY_rvv, zve32x +func ff_\chr0\()ToY_rvv, zve32x, zba lw t1, 0(a5) # RY lw t3, 8(a5) # BY 1: @@ -199,7 +199,7 @@ func ff_\chr1\()ToUV_rvv, zve32x j 1f endfunc -func ff_\chr0\()ToUV_rvv, zve32x +func ff_\chr0\()ToUV_rvv, zve32x, zba lw t1, 12(a6) # RU lw t4, 24(a6) # RV lw t3, 20(a6) # BU @@ -251,7 +251,7 @@ func ff_\chr1\()ToUV_half_rvv, zve32x j 1f endfunc -func ff_\chr0\()ToUV_half_rvv, zve32x +func ff_\chr0\()ToUV_half_rvv, zve32x, zba lw t1, 12(a6) # RU lw t4, 24(a6) # RV lw t3, 20(a6) # BU diff --git a/libswscale/riscv/range_rvv.S b/libswscale/riscv/range_rvv.S index 9da80e6199..19a74eba79 100644 --- a/libswscale/riscv/range_rvv.S +++ b/libswscale/riscv/range_rvv.S @@ -20,7 +20,7 @@ #include "libavutil/riscv/asm.S" -func ff_range_lum_to_jpeg_16_rvv, zve32x +func ff_range_lum_to_jpeg_16_rvv, zve32x, zba li t1, 30189 li t2, 19077 li t3, -39057361 @@ -41,7 +41,7 @@ func ff_range_lum_to_jpeg_16_rvv, zve32x ret endfunc -func ff_range_lum_from_jpeg_16_rvv, zve32x +func ff_range_lum_from_jpeg_16_rvv, zve32x, zba li t1, 14071 li t2, 33561947 1: @@ -60,7 +60,7 @@ func ff_range_lum_from_jpeg_16_rvv, zve32x ret endfunc -func ff_range_chr_to_jpeg_16_rvv, zve32x +func ff_range_chr_to_jpeg_16_rvv, zve32x, zba li t1, 30775 li t2, 4663 li t3, -9289992 @@ -88,7 +88,7 @@ func ff_range_chr_to_jpeg_16_rvv, zve32x ret endfunc -func ff_range_chr_from_jpeg_16_rvv, zve32x +func ff_range_chr_from_jpeg_16_rvv, zve32x, zba li t1, 1799 li t2, 4081085 1: diff --git a/libswscale/riscv/rgb2rgb_rvb.S b/libswscale/riscv/rgb2rgb_rvb.S index af127b32ed..d18e5ba01b 100644 --- a/libswscale/riscv/rgb2rgb_rvb.S +++ b/libswscale/riscv/rgb2rgb_rvb.S @@ -23,7 +23,7 @@ #include "libavutil/riscv/bswap_rvb.S" #if (__riscv_xlen >= 64) -func ff_shuffle_bytes_3210_rvb, zbb +func ff_shuffle_bytes_3210_rvb, zba, zbb srli a2, a2, 2 bswap32_rvb a1, a0, a2 endfunc diff --git a/libswscale/riscv/rgb2rgb_rvv.S b/libswscale/riscv/rgb2rgb_rvv.S index 19f7aaf67d..e1270ac0df 100644 --- a/libswscale/riscv/rgb2rgb_rvv.S +++ b/libswscale/riscv/rgb2rgb_rvv.S @@ -25,7 +25,7 @@ func ff_shuffle_bytes_0321_rvv, zve32x j 1f endfunc -func ff_shuffle_bytes_2103_rvv, zve32x +func ff_shuffle_bytes_2103_rvv, zve32x, zba li t1, ~0x00ff00ff 1: not t2, t1 @@ -54,7 +54,7 @@ func ff_shuffle_bytes_1230_rvv, zve32x j 3f endfunc -func ff_shuffle_bytes_3012_rvv, zve32x +func ff_shuffle_bytes_3012_rvv, zve32x, zba li t1, 8 li t2, 24 3: @@ -74,7 +74,7 @@ func ff_shuffle_bytes_3012_rvv, zve32x ret endfunc -func ff_interleave_bytes_rvv, zve32x +func ff_interleave_bytes_rvv, zve32x, zba 1: mv t0, a0 mv t1, a1 @@ -100,7 +100,7 @@ func ff_interleave_bytes_rvv, zve32x ret endfunc -func ff_deinterleave_bytes_rvv, zve32x +func ff_deinterleave_bytes_rvv, zve32x, zba 1: mv t0, a0 mv t1, a1 @@ -165,10 +165,10 @@ endfunc ret .endm -func ff_uyvytoyuv422_rvv, zve32x, zbb +func ff_uyvytoyuv422_rvv, zve32x, b yuy2_to_i422p v20, v16 endfunc -func ff_yuyvtoyuv422_rvv, zve32x, zbb +func ff_yuyvtoyuv422_rvv, zve32x, b yuy2_to_i422p v16, v20 endfunc From patchwork Mon Jul 22 18:44:25 2024 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: =?utf-8?q?R=C3=A9mi_Denis-Courmont?= X-Patchwork-Id: 50685 Delivered-To: ffmpegpatchwork2@gmail.com Received: by 2002:a59:a742:0:b0:482:c625:d099 with SMTP id f2csp2198251vqm; Mon, 22 Jul 2024 11:45:27 -0700 (PDT) X-Forwarded-Encrypted: i=2; AJvYcCW6bj7hVUiFKzGwF4OfhmiBXTgmrBuvnGbC6eJGrIvrZvrBxOtgL1oy9F8AP7NG6Klckf7MBfU54gZp+qhNv1J3jnUi8lwuhaHolA== X-Google-Smtp-Source: AGHT+IFZYN9ESWYK6M/4ILFtLpKFos5j58L6C0ROboa6Dma6MEeY7AGt7SNVcECA+O9E/Q2ObW0S X-Received: by 2002:a05:6512:2810:b0:52e:9425:3cc8 with SMTP id 2adb3069b0e04-52ef8a1b94emr2295233e87.19.1721673927326; Mon, 22 Jul 2024 11:45:27 -0700 (PDT) ARC-Seal: i=1; a=rsa-sha256; t=1721673927; cv=none; d=google.com; s=arc-20160816; b=mZbmiY4Wo0lbPMJ0RCDdfG3cg/PGrnYjBZb0b7+hGDAtGRfafZeYfrNb6zW3q/iiLm JGwe4CxYIhMlJ2GSlYZOrWqxx8BgrlXcHHfYdPFvrH3Wt+CpEmJOeEe67sKCqtLfbHak Y38KUeoUb1sb7djltcmMYz5brS2A9JcOo2hQbiQwFcEKgVYkCS043+uMqkTfxLm8znVI SIxp+p8fjrLdmQUoU3yWMAG9umkRMllvhG1EC5hWU3d+NZV6YhVAtOeCARMcgkoB3e20 l3SDVli7FnrvU/ChhcdVRRaxKwUreZ3j/GqJ/PR7uDBbeM+lGI2ZBfFigMp/BjffclaR BKdQ== ARC-Message-Signature: i=1; a=rsa-sha256; c=relaxed/relaxed; d=google.com; s=arc-20160816; h=sender:errors-to:content-transfer-encoding:reply-to:list-subscribe :list-help:list-post:list-archive:list-unsubscribe:list-id :precedence:subject:mime-version:references:in-reply-to:message-id :date:to:from:delivered-to; bh=UufFmWYVPZHQrAeINetjHRjy+ZQRTcP8lQbozKx/jYM=; fh=YOA8vD9MJZuwZ71F/05pj6KdCjf6jQRmzLS+CATXUQk=; b=Ff6jlTcv2OTBeLYaB/N1WKizH4XJiu2DjkTx5P7W+MrQG/BUaxU+/+grRlYwvoeWOC JpvWu25sfw0ieT5PmoI8DtFQOK1aLpKFg3ZZh4Uj6SdwAeHazGlmCLgxBRi+KyJMj4a0 YouCjFDFULmZp+8uI7svBaEbLkE9n8fyVNQalKBJiLIa0aw1NMqHVmmEmCNQJ2FpW890 2CJQcs7+nvPttjAzPyOYsT+8XckCARiGfxKj3N4gHXEFvuS8PWPut1icKZGQlMLCRLMH iVKnoSPWiGk/D1d4EcXBL+Zi/CKShYSSRPIQLfhioeKlquloDAAPQ6jhWu4ipWaalO41 eyFQ==; dara=google.com ARC-Authentication-Results: i=1; mx.google.com; spf=pass (google.com: domain of ffmpeg-devel-bounces@ffmpeg.org designates 79.124.17.100 as permitted sender) smtp.mailfrom=ffmpeg-devel-bounces@ffmpeg.org Return-Path: Received: from ffbox0-bg.mplayerhq.hu (ffbox0-bg.ffmpeg.org. [79.124.17.100]) by mx.google.com with ESMTP id 2adb3069b0e04-52f00ec2a42si1114612e87.380.2024.07.22.11.45.26; Mon, 22 Jul 2024 11:45:27 -0700 (PDT) Received-SPF: pass (google.com: domain of ffmpeg-devel-bounces@ffmpeg.org designates 79.124.17.100 as permitted sender) client-ip=79.124.17.100; Authentication-Results: mx.google.com; spf=pass (google.com: domain of ffmpeg-devel-bounces@ffmpeg.org designates 79.124.17.100 as permitted sender) smtp.mailfrom=ffmpeg-devel-bounces@ffmpeg.org Received: from [127.0.1.1] (localhost [127.0.0.1]) by ffbox0-bg.mplayerhq.hu (Postfix) with ESMTP id 1661868D726; Mon, 22 Jul 2024 21:44:44 +0300 (EEST) X-Original-To: ffmpeg-devel@ffmpeg.org Delivered-To: ffmpeg-devel@ffmpeg.org Received: from ursule.remlab.net (vps-a2bccee9.vps.ovh.net [51.75.19.47]) by ffbox0-bg.mplayerhq.hu (Postfix) with ESMTP id 6F88668D6F9 for ; Mon, 22 Jul 2024 21:44:37 +0300 (EEST) Received: from basile.remlab.net (localhost [IPv6:::1]) by ursule.remlab.net (Postfix) with ESMTP id B77FEC023D for ; Mon, 22 Jul 2024 21:44:32 +0300 (EEST) From: =?utf-8?q?R=C3=A9mi_Denis-Courmont?= To: ffmpeg-devel@ffmpeg.org Date: Mon, 22 Jul 2024 21:44:25 +0300 Message-ID: <20240722184431.40853-6-remi@remlab.net> X-Mailer: git-send-email 2.45.2 In-Reply-To: <20240722184431.40853-1-remi@remlab.net> References: <20240722184431.40853-1-remi@remlab.net> MIME-Version: 1.0 Subject: [FFmpeg-devel] [PATCH 6/9] lavu/riscv: require B or zba explicitly X-BeenThere: ffmpeg-devel@ffmpeg.org X-Mailman-Version: 2.1.29 Precedence: list List-Id: FFmpeg development discussions and patches List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , Reply-To: FFmpeg development discussions and patches Errors-To: ffmpeg-devel-bounces@ffmpeg.org Sender: "ffmpeg-devel" X-TUID: Cz/dlE8wtZjA --- libavutil/riscv/fixed_dsp_rvv.S | 14 +++++++------- libavutil/riscv/float_dsp_rvv.S | 24 ++++++++++++------------ 2 files changed, 19 insertions(+), 19 deletions(-) diff --git a/libavutil/riscv/fixed_dsp_rvv.S b/libavutil/riscv/fixed_dsp_rvv.S index 6bac5813b8..0fa6aab3d4 100644 --- a/libavutil/riscv/fixed_dsp_rvv.S +++ b/libavutil/riscv/fixed_dsp_rvv.S @@ -20,7 +20,7 @@ #include "asm.S" -func ff_vector_fmul_window_scaled_rvv, zve64x +func ff_vector_fmul_window_scaled_rvv, zve64x, zba csrwi vxrm, 0 vsetvli t0, zero, e16, m1, ta, ma sh2add a2, a4, a2 @@ -68,7 +68,7 @@ func ff_vector_fmul_window_scaled_rvv, zve64x ret endfunc -func ff_vector_fmul_window_fixed_rvv, zve64x +func ff_vector_fmul_window_fixed_rvv, zve64x, zba csrwi vxrm, 0 vsetvli t0, zero, e16, m1, ta, ma sh2add a2, a4, a2 @@ -112,7 +112,7 @@ func ff_vector_fmul_window_fixed_rvv, zve64x ret endfunc -func ff_vector_fmul_fixed_rvv, zve32x +func ff_vector_fmul_fixed_rvv, zve32x, zba csrwi vxrm, 0 1: vsetvli t0, a3, e32, m4, ta, ma @@ -129,7 +129,7 @@ func ff_vector_fmul_fixed_rvv, zve32x ret endfunc -func ff_vector_fmul_reverse_fixed_rvv, zve32x +func ff_vector_fmul_reverse_fixed_rvv, zve32x, zba csrwi vxrm, 0 // e16/m4 and e32/m8 are possible but slow the gathers down. vsetvli t0, zero, e16, m1, ta, ma @@ -155,7 +155,7 @@ func ff_vector_fmul_reverse_fixed_rvv, zve32x ret endfunc -func ff_vector_fmul_add_fixed_rvv, zve32x +func ff_vector_fmul_add_fixed_rvv, zve32x, zba csrwi vxrm, 0 1: vsetvli t0, a4, e32, m8, ta, ma @@ -175,7 +175,7 @@ func ff_vector_fmul_add_fixed_rvv, zve32x ret endfunc -func ff_scalarproduct_fixed_rvv, zve64x +func ff_scalarproduct_fixed_rvv, zve64x, zba li t1, 1 << 30 vsetvli t0, zero, e64, m8, ta, ma vmv.v.x v8, zero @@ -198,7 +198,7 @@ func ff_scalarproduct_fixed_rvv, zve64x endfunc // (a0) = (a0) + (a1), (a1) = (a0) - (a1) [0..a2-1] -func ff_butterflies_fixed_rvv, zve32x +func ff_butterflies_fixed_rvv, zve32x, zba 1: vsetvli t0, a2, e32, m4, ta, ma vle32.v v16, (a0) diff --git a/libavutil/riscv/float_dsp_rvv.S b/libavutil/riscv/float_dsp_rvv.S index 2f0ade6db6..c7744cf0e8 100644 --- a/libavutil/riscv/float_dsp_rvv.S +++ b/libavutil/riscv/float_dsp_rvv.S @@ -21,7 +21,7 @@ #include "asm.S" // (a0) = (a1) * (a2) [0..a3-1] -func ff_vector_fmul_rvv, zve32f +func ff_vector_fmul_rvv, zve32f, zba 1: vsetvli t0, a3, e32, m8, ta, ma vle32.v v16, (a1) @@ -38,7 +38,7 @@ func ff_vector_fmul_rvv, zve32f endfunc // (a0) += (a1) * fa0 [0..a2-1] -func ff_vector_fmac_scalar_rvv, zve32f +func ff_vector_fmac_scalar_rvv, zve32f, zba NOHWF fmv.w.x fa0, a2 NOHWF mv a2, a3 1: @@ -57,7 +57,7 @@ NOHWF mv a2, a3 endfunc // (a0) = (a1) * fa0 [0..a2-1] -func ff_vector_fmul_scalar_rvv, zve32f +func ff_vector_fmul_scalar_rvv, zve32f, zba NOHWF fmv.w.x fa0, a2 NOHWF mv a2, a3 1: @@ -73,7 +73,7 @@ NOHWF mv a2, a3 ret endfunc -func ff_vector_fmul_window_rvv, zve32f +func ff_vector_fmul_window_rvv, zve32f, zba // a0: dst, a1: src0, a2: src1, a3: window, a4: length // e16/m2 and e32/m4 are possible but slower due to gather. vsetvli t0, zero, e16, m1, ta, ma @@ -113,7 +113,7 @@ func ff_vector_fmul_window_rvv, zve32f endfunc // (a0) = (a1) * (a2) + (a3) [0..a4-1] -func ff_vector_fmul_add_rvv, zve32f +func ff_vector_fmul_add_rvv, zve32f, zba 1: vsetvli t0, a4, e32, m8, ta, ma vle32.v v8, (a1) @@ -133,7 +133,7 @@ endfunc // TODO factor vrsub, separate last iteration? // (a0) = (a1) * reverse(a2) [0..a3-1] -func ff_vector_fmul_reverse_rvv, zve32f +func ff_vector_fmul_reverse_rvv, zve32f, zba // e16/m4 and e32/m8 are possible but slower due to gather. vsetvli t0, zero, e16, m1, ta, ma sh2add a2, a3, a2 @@ -159,7 +159,7 @@ func ff_vector_fmul_reverse_rvv, zve32f endfunc // (a0) = (a0) + (a1), (a1) = (a0) - (a1) [0..a2-1] -func ff_butterflies_float_rvv, zve32f +func ff_butterflies_float_rvv, zve32f, zba 1: vsetvli t0, a2, e32, m8, ta, ma vle32.v v16, (a0) @@ -177,7 +177,7 @@ func ff_butterflies_float_rvv, zve32f endfunc // a0 = (a0).(a1) [0..a2-1] -func ff_scalarproduct_float_rvv, zve32f +func ff_scalarproduct_float_rvv, zve32f, zba vsetvli t0, zero, e32, m8, ta, ma vmv.v.x v8, zero vmv.s.x v0, zero @@ -199,7 +199,7 @@ NOHWF fmv.x.w a0, fa0 endfunc // (a0) = (a1) * (a2) [0..a3-1] -func ff_vector_dmul_rvv, zve64d +func ff_vector_dmul_rvv, zve64d, zba 1: vsetvli t0, a3, e64, m8, ta, ma vle64.v v16, (a1) @@ -216,7 +216,7 @@ func ff_vector_dmul_rvv, zve64d endfunc // (a0) += (a1) * fa0 [0..a2-1] -func ff_vector_dmac_scalar_rvv, zve64d +func ff_vector_dmac_scalar_rvv, zve64d, zba NOHWD fmv.d.x fa0, a2 NOHWD mv a2, a3 1: @@ -234,7 +234,7 @@ NOHWD mv a2, a3 endfunc // (a0) = (a1) * fa0 [0..a2-1] -func ff_vector_dmul_scalar_rvv, zve64d +func ff_vector_dmul_scalar_rvv, zve64d, zba NOHWD fmv.d.x fa0, a2 NOHWD mv a2, a3 1: @@ -250,7 +250,7 @@ NOHWD mv a2, a3 ret endfunc -func ff_scalarproduct_double_rvv, zve64f +func ff_scalarproduct_double_rvv, zve64f, zba vsetvli t0, zero, e64, m8, ta, ma vmv.v.x v8, zero vmv.s.x v0, zero From patchwork Mon Jul 22 18:44:26 2024 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: =?utf-8?q?R=C3=A9mi_Denis-Courmont?= X-Patchwork-Id: 50690 Delivered-To: ffmpegpatchwork2@gmail.com Received: by 2002:a59:a742:0:b0:482:c625:d099 with SMTP id f2csp2205618vqm; Mon, 22 Jul 2024 12:01:53 -0700 (PDT) X-Forwarded-Encrypted: i=2; AJvYcCXwsZBNp3Cj9yem3eNk7+OP9wL6nds1GVwDSZBqA4oFK3UNAeR2hDAnIhjqwTIaeFB5qFKWKnI78g08kKdMHFrUMDsHuMLiOXJRMA== X-Google-Smtp-Source: AGHT+IF9ab00b4pioZu6P8L6M1dYsyHaLc5Ff4R0fzQt7RZcCU+Ngv+kxza9O0w7MH80S+Mxkcqu X-Received: by 2002:a17:907:1b26:b0:a77:a1f1:cfa0 with SMTP id a640c23a62f3a-a7a41d08905mr771533166b.36.1721674913350; Mon, 22 Jul 2024 12:01:53 -0700 (PDT) ARC-Seal: i=1; a=rsa-sha256; t=1721674913; cv=none; d=google.com; s=arc-20160816; b=P8bTYaZRCsr41Jj253/CQpjyaBoEzylKXmkUwX+nUCzKHrW4J9faXqDUIg0TPULR1s eQlWW4a2y1gediTr7i1jt6tEsCLfhI2c0VHiyNBHLwaJ+39MSis+a/PR5V2C54EhFbzM A7eWDAfXtJghmhpkAwvghYANET4txvRrt43cnqhS0/oo7Zx8a5ZqbQ2ODxU/Uy6GZ1fE uNhMZTM/CVdeGCWXtgtd3CgXoBaZ66bK0WwbTW76cIADwFN55pRga8ngZciDHlXMwmRf snUo8pbuh5TQk/4e9GXtd/fCiWvPA7IyRybr524eEWQiCcL+yZzTZ6zkik/93Vt1lkro zLVg== ARC-Message-Signature: i=1; a=rsa-sha256; c=relaxed/relaxed; d=google.com; s=arc-20160816; h=sender:errors-to:content-transfer-encoding:reply-to:list-subscribe :list-help:list-post:list-archive:list-unsubscribe:list-id :precedence:subject:mime-version:references:in-reply-to:message-id :date:to:from:delivered-to; bh=wB/jR7AYp1L+eeOBku9vVsaPt2FbyOD1NhFDRONUuMU=; fh=YOA8vD9MJZuwZ71F/05pj6KdCjf6jQRmzLS+CATXUQk=; b=zAzqWwKbsa1t8SE+rL5buKnEWQuxBp7JU0WkkdakZ6knyLpeipJBqCDobYe4nNS4DH OlfZKGjcKod1V70uLqPmFy/4mGwZdy/CV1+IPmgCXuoF2Y0SnobR3j7qQPNhRqfZ/a32 gXKwx4DciBKE0Ab6iYfOCUCFcQpzyL3ES5peC2JHVvMbiTmNyW7LNXLr9IfHzbnETqfB JtxPg+MYjv7sIKHnJoLKD5H/erBYYK5Gxr9ZpOF/d5Ee3MUDvvIcfeNR5VL7Nh0ooHx8 SNyKMqyQMeAi1bC/UgCtohT3eAupiYqbC1MsdauTA4bo1z6S3Ch8TaH7XCHLwjxZG8Cm JQYw==; dara=google.com ARC-Authentication-Results: i=1; mx.google.com; spf=pass (google.com: domain of ffmpeg-devel-bounces@ffmpeg.org designates 79.124.17.100 as permitted sender) smtp.mailfrom=ffmpeg-devel-bounces@ffmpeg.org Return-Path: Received: from ffbox0-bg.mplayerhq.hu (ffbox0-bg.ffmpeg.org. [79.124.17.100]) by mx.google.com with ESMTP id a640c23a62f3a-a7a3c95171bsi439936766b.808.2024.07.22.12.01.53; Mon, 22 Jul 2024 12:01:53 -0700 (PDT) Received-SPF: pass (google.com: domain of ffmpeg-devel-bounces@ffmpeg.org designates 79.124.17.100 as permitted sender) client-ip=79.124.17.100; Authentication-Results: mx.google.com; spf=pass (google.com: domain of ffmpeg-devel-bounces@ffmpeg.org designates 79.124.17.100 as permitted sender) smtp.mailfrom=ffmpeg-devel-bounces@ffmpeg.org Received: from [127.0.1.1] (localhost [127.0.0.1]) by ffbox0-bg.mplayerhq.hu (Postfix) with ESMTP id 4DD1268D756; Mon, 22 Jul 2024 21:44:45 +0300 (EEST) X-Original-To: ffmpeg-devel@ffmpeg.org Delivered-To: ffmpeg-devel@ffmpeg.org Received: from ursule.remlab.net (vps-a2bccee9.vps.ovh.net [51.75.19.47]) by ffbox0-bg.mplayerhq.hu (Postfix) with ESMTP id 7863868D6FD for ; Mon, 22 Jul 2024 21:44:37 +0300 (EEST) Received: from basile.remlab.net (localhost [IPv6:::1]) by ursule.remlab.net (Postfix) with ESMTP id E7CD7C023E for ; Mon, 22 Jul 2024 21:44:32 +0300 (EEST) From: =?utf-8?q?R=C3=A9mi_Denis-Courmont?= To: ffmpeg-devel@ffmpeg.org Date: Mon, 22 Jul 2024 21:44:26 +0300 Message-ID: <20240722184431.40853-7-remi@remlab.net> X-Mailer: git-send-email 2.45.2 In-Reply-To: <20240722184431.40853-1-remi@remlab.net> References: <20240722184431.40853-1-remi@remlab.net> MIME-Version: 1.0 Subject: [FFmpeg-devel] [PATCH 7/9] lavu/riscv: remove bespoke SH{1, 2, 3}ADD assembler X-BeenThere: ffmpeg-devel@ffmpeg.org X-Mailman-Version: 2.1.29 Precedence: list List-Id: FFmpeg development discussions and patches List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , Reply-To: FFmpeg development discussions and patches Errors-To: ffmpeg-devel-bounces@ffmpeg.org Sender: "ffmpeg-devel" X-TUID: ZHNBYI7tPqUY configure checks that the assembler supports the B extension (or rather its constituents) anyway. These macros were dodging sanity checks for unsupported instructions and nothing else. --- libavutil/riscv/asm.S | 19 ------------------- 1 file changed, 19 deletions(-) diff --git a/libavutil/riscv/asm.S b/libavutil/riscv/asm.S index 0c29680d84..8b96e07b75 100644 --- a/libavutil/riscv/asm.S +++ b/libavutil/riscv/asm.S @@ -83,25 +83,6 @@ .endm .endm -#if !defined (__riscv_zba) - /* SH{1,2,3}ADD definitions for pre-Zba assemblers */ - .macro shnadd n, rd, rs1, rs2 - .insn r OP, 2 * \n, 16, \rd, \rs1, \rs2 - .endm - - .macro sh1add rd, rs1, rs2 - shnadd 1, \rd, \rs1, \rs2 - .endm - - .macro sh2add rd, rs1, rs2 - shnadd 2, \rd, \rs1, \rs2 - .endm - - .macro sh3add rd, rs1, rs2 - shnadd 3, \rd, \rs1, \rs2 - .endm -#endif - #if defined (__riscv_v_elen) # define RV_V_ELEN __riscv_v_elen #else From patchwork Mon Jul 22 18:44:27 2024 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: =?utf-8?q?R=C3=A9mi_Denis-Courmont?= X-Patchwork-Id: 50686 Delivered-To: ffmpegpatchwork2@gmail.com Received: by 2002:a59:a742:0:b0:482:c625:d099 with SMTP id f2csp2198373vqm; Mon, 22 Jul 2024 11:45:44 -0700 (PDT) X-Forwarded-Encrypted: i=2; AJvYcCUf/dNt/qdIGN3hNBlKQW8J3ZDuYs/JCPEG/zDBmaUQZvpXedk56K+hvpr1JPZpMYaduptHpSMYpGl5pAfP0UZQnkpzdHkLO6obqg== X-Google-Smtp-Source: AGHT+IGPY60k0uf59MWj7wpGPlBfGZdgLwOBhp9udJBCvAU5DqjgYQ3t0Cb56Mjhn5vjReudxmYm X-Received: by 2002:a17:907:1c26:b0:a72:8a75:6559 with SMTP id a640c23a62f3a-a7a88486e7cmr56416866b.47.1721673943781; Mon, 22 Jul 2024 11:45:43 -0700 (PDT) ARC-Seal: i=1; a=rsa-sha256; t=1721673943; cv=none; d=google.com; s=arc-20160816; b=Z8vygWCUi3EvB6bZ4Z9nGHl5NDHNAKwz5+82HPAe2wVJrPlffA8sq0mLw/wnGIJQgd giRmN3iWkBfAAy59E1NpE59Nk91P+eEFWz4emEH7oxXNeqJK2Dbf24WqiJWFDn4juiUT tBUSzlpoiRaSagIOxkjZe8iP88pkc5waY8B2VBGCsOd3IyVzDgj1PKTBQxdndcZUvaLC ZOXOW3oF9gng4fGndaAkQZXe18vXjliPGnb3KNOZ0bS73WEY1VUErbCHUNUYSYw9DwxV l8Ujwd61sYevuq6T4UfY9Koi3po2UvNqlWIzPLMuEEGSq/OZO6Hzvja6tg5gVWBQqd+e p3kQ== ARC-Message-Signature: i=1; a=rsa-sha256; c=relaxed/relaxed; d=google.com; s=arc-20160816; h=sender:errors-to:content-transfer-encoding:reply-to:list-subscribe :list-help:list-post:list-archive:list-unsubscribe:list-id :precedence:subject:mime-version:references:in-reply-to:message-id :date:to:from:delivered-to; bh=GRIKoY74STMVtm3OldqTSE+3n8ibv9XolKkZRYd6lrA=; fh=YOA8vD9MJZuwZ71F/05pj6KdCjf6jQRmzLS+CATXUQk=; b=xwizld3TfZEB9/tyiTyTJTNHDEv5cIST+bxuildVncCp6LGCBKrG6TYjxK8NCJ6KLt pl2VOSI1nUvmn/FVj/moIWVir8fBw3Wcmqx2uZ/tPXmiusHIAfSg5hdYZfmfXVo3zHTR va/rT6Hsyzv+MQIEMxQSMuIte/Y8XKH/YRDihhn9XluDj7EBYgww5/IWojRTZfNn+9v5 RmRnLnnH8G4txRoAnkyel9E/ZlhvHplhx5tI+JwbDbuFoW437pIouAtPaUYxCShT65t/ 44cYa+lQHLrRaqzt4a6t9RcRF5WKfRMX+ROJvd8SG+lh3gMNAg9nSUcjQtCkfQVyRh64 fQCA==; dara=google.com ARC-Authentication-Results: i=1; mx.google.com; spf=pass (google.com: domain of ffmpeg-devel-bounces@ffmpeg.org designates 79.124.17.100 as permitted sender) smtp.mailfrom=ffmpeg-devel-bounces@ffmpeg.org Return-Path: Received: from ffbox0-bg.mplayerhq.hu (ffbox0-bg.ffmpeg.org. [79.124.17.100]) by mx.google.com with ESMTP id a640c23a62f3a-a7a845e0eafsi38439466b.401.2024.07.22.11.45.43; Mon, 22 Jul 2024 11:45:43 -0700 (PDT) Received-SPF: pass (google.com: domain of ffmpeg-devel-bounces@ffmpeg.org designates 79.124.17.100 as permitted sender) client-ip=79.124.17.100; Authentication-Results: mx.google.com; spf=pass (google.com: domain of ffmpeg-devel-bounces@ffmpeg.org designates 79.124.17.100 as permitted sender) smtp.mailfrom=ffmpeg-devel-bounces@ffmpeg.org Received: from [127.0.1.1] (localhost [127.0.0.1]) by ffbox0-bg.mplayerhq.hu (Postfix) with ESMTP id 688FC68D76E; Mon, 22 Jul 2024 21:44:46 +0300 (EEST) X-Original-To: ffmpeg-devel@ffmpeg.org Delivered-To: ffmpeg-devel@ffmpeg.org Received: from ursule.remlab.net (vps-a2bccee9.vps.ovh.net [51.75.19.47]) by ffbox0-bg.mplayerhq.hu (Postfix) with ESMTP id BE94F68D701 for ; Mon, 22 Jul 2024 21:44:37 +0300 (EEST) Received: from basile.remlab.net (localhost [IPv6:::1]) by ursule.remlab.net (Postfix) with ESMTP id 24360C0240 for ; Mon, 22 Jul 2024 21:44:33 +0300 (EEST) From: =?utf-8?q?R=C3=A9mi_Denis-Courmont?= To: ffmpeg-devel@ffmpeg.org Date: Mon, 22 Jul 2024 21:44:27 +0300 Message-ID: <20240722184431.40853-8-remi@remlab.net> X-Mailer: git-send-email 2.45.2 In-Reply-To: <20240722184431.40853-1-remi@remlab.net> References: <20240722184431.40853-1-remi@remlab.net> MIME-Version: 1.0 Subject: [FFmpeg-devel] [PATCH 8/9] lavu/riscv: add CPU flag for B bit manipulations X-BeenThere: ffmpeg-devel@ffmpeg.org X-Mailman-Version: 2.1.29 Precedence: list List-Id: FFmpeg development discussions and patches List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , Reply-To: FFmpeg development discussions and patches Errors-To: ffmpeg-devel-bounces@ffmpeg.org Sender: "ffmpeg-devel" X-TUID: zBEk4jY0ibQq The B extension was finally ratified in May 2024, encompassing: - Zba (addresses), - Zbb (basics) and - Zbs (single bits). It does not include Zbc (base-2 polynomials). --- doc/APIchanges | 3 +++ libavutil/cpu.c | 1 + libavutil/cpu.h | 1 + libavutil/riscv/cpu.c | 13 +++++++++++++ libavutil/tests/cpu.c | 1 + tests/checkasm/checkasm.c | 1 + 6 files changed, 20 insertions(+) diff --git a/doc/APIchanges b/doc/APIchanges index 5751216b24..0061b084b8 100644 --- a/doc/APIchanges +++ b/doc/APIchanges @@ -2,6 +2,9 @@ The last version increases of all libraries were on 2024-03-07 API changes, most recent first: +2024-07-22 - xxxxxxxxx - lavu 59.18.100 - cpu.h + Add AV_CPU_FLAG_RVB. + 2024-07-xx - xxxxxxxxxx - lavf 61 - avformat.h Deprecate avformat_transfer_internal_stream_timing_info() and av_stream_get_codec_timebase() without replacement. diff --git a/libavutil/cpu.c b/libavutil/cpu.c index 9ac2f01c20..17afe8858a 100644 --- a/libavutil/cpu.c +++ b/libavutil/cpu.c @@ -186,6 +186,7 @@ int av_parse_cpu_caps(unsigned *flags, const char *s) { "rvi", NULL, 0, AV_OPT_TYPE_CONST, { .i64 = AV_CPU_FLAG_RVI }, .unit = "flags" }, { "rvf", NULL, 0, AV_OPT_TYPE_CONST, { .i64 = AV_CPU_FLAG_RVF }, .unit = "flags" }, { "rvd", NULL, 0, AV_OPT_TYPE_CONST, { .i64 = AV_CPU_FLAG_RVD }, .unit = "flags" }, + { "rvb", NULL, 0, AV_OPT_TYPE_CONST, { .i64 = AV_CPU_FLAG_RVB }, .unit = "flags" }, { "zve32x", NULL, 0, AV_OPT_TYPE_CONST, { .i64 = AV_CPU_FLAG_RVV_I32 }, .unit = "flags" }, { "zve32f", NULL, 0, AV_OPT_TYPE_CONST, { .i64 = AV_CPU_FLAG_RVV_F32 }, .unit = "flags" }, { "zve64x", NULL, 0, AV_OPT_TYPE_CONST, { .i64 = AV_CPU_FLAG_RVV_I64 }, .unit = "flags" }, diff --git a/libavutil/cpu.h b/libavutil/cpu.h index a25901433e..9f419aae02 100644 --- a/libavutil/cpu.h +++ b/libavutil/cpu.h @@ -92,6 +92,7 @@ #define AV_CPU_FLAG_RVB_ADDR (1 << 8) ///< Address bit-manipulations #define AV_CPU_FLAG_RV_ZVBB (1 << 9) ///< Vector basic bit-manipulations #define AV_CPU_FLAG_RV_MISALIGNED (1 <<10) ///< Fast misaligned accesses +#define AV_CPU_FLAG_RVB (1 <<11) ///< B (bit manipulations) /** * Return the flags which specify extensions supported by the CPU. diff --git a/libavutil/riscv/cpu.c b/libavutil/riscv/cpu.c index 04ac404bbf..e035f4b024 100644 --- a/libavutil/riscv/cpu.c +++ b/libavutil/riscv/cpu.c @@ -72,6 +72,12 @@ int ff_get_cpu_flags_riscv(void) #ifdef RISCV_HWPROBE_EXT_ZBB if (pairs[1].value & RISCV_HWPROBE_EXT_ZBB) ret |= AV_CPU_FLAG_RVB_BASIC; +#if defined (RISCV_HWPROBE_EXT_ZBA) && defined (RISCV_HWPROBE_EXT_ZBS) + if ((pairs[1].value & RISCV_HWPROBE_EXT_ZBA) && + (pairs[1].value & RISCV_HWPROBE_EXT_ZBB) && + (pairs[1].value & RISCV_HWPROBE_EXT_ZBS)) + ret |= AV_CPU_FLAG_RVB; +#endif #endif #ifdef RISCV_HWPROBE_EXT_ZVBB if (pairs[1].value & RISCV_HWPROBE_EXT_ZVBB) @@ -94,6 +100,9 @@ int ff_get_cpu_flags_riscv(void) ret |= AV_CPU_FLAG_RVF; if (hwcap & HWCAP_RV('D')) ret |= AV_CPU_FLAG_RVD; + if (hwcap & HWCAP_RV('B')) + ret |= AV_CPU_FLAG_RVB_ADDR | AV_CPU_FLAG_RVB_BASIC | + AV_CPU_FLAG_RVB; /* The V extension implies all Zve* functional subsets */ if (hwcap & HWCAP_RV('V')) @@ -118,6 +127,10 @@ int ff_get_cpu_flags_riscv(void) #ifdef __riscv_zbb ret |= AV_CPU_FLAG_RVB_BASIC; #endif +#if defined (__riscv_b) || \ + (defined (__riscv_zba) && defined (__riscv_zbb) && defined (__riscv_zbs)) + ret |= AV_CPU_FLAG_RVB; +#endif /* If RV-V is enabled statically at compile-time, check the details. */ #ifdef __riscv_vector diff --git a/libavutil/tests/cpu.c b/libavutil/tests/cpu.c index 02b98682e3..b4b11775d8 100644 --- a/libavutil/tests/cpu.c +++ b/libavutil/tests/cpu.c @@ -90,6 +90,7 @@ static const struct { { AV_CPU_FLAG_RVD, "rvd" }, { AV_CPU_FLAG_RVB_ADDR, "zba" }, { AV_CPU_FLAG_RVB_BASIC, "zbb" }, + { AV_CPU_FLAG_RVB, "rvb" }, { AV_CPU_FLAG_RVV_I32, "zve32x" }, { AV_CPU_FLAG_RVV_F32, "zve32f" }, { AV_CPU_FLAG_RVV_I64, "zve64x" }, diff --git a/tests/checkasm/checkasm.c b/tests/checkasm/checkasm.c index de0024099a..016f2329b0 100644 --- a/tests/checkasm/checkasm.c +++ b/tests/checkasm/checkasm.c @@ -295,6 +295,7 @@ static const struct { { "RVD", "rvd", AV_CPU_FLAG_RVD }, { "RVBaddr", "rvb_a", AV_CPU_FLAG_RVB_ADDR }, { "RVBbasic", "rvb_b", AV_CPU_FLAG_RVB_BASIC }, + { "RVB", "rvb", AV_CPU_FLAG_RVB }, { "RVVi32", "rvv_i32", AV_CPU_FLAG_RVV_I32 }, { "RVVf32", "rvv_f32", AV_CPU_FLAG_RVV_F32 }, { "RVVi64", "rvv_i64", AV_CPU_FLAG_RVV_I64 }, From patchwork Mon Jul 22 18:44:28 2024 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: =?utf-8?q?R=C3=A9mi_Denis-Courmont?= X-Patchwork-Id: 50689 Delivered-To: ffmpegpatchwork2@gmail.com Received: by 2002:a59:a742:0:b0:482:c625:d099 with SMTP id f2csp2200884vqm; Mon, 22 Jul 2024 11:51:57 -0700 (PDT) X-Forwarded-Encrypted: i=2; AJvYcCWR7qDaHaMD2ppGzooEtXXlxW1ZgqZ866W+IvjQkR5rOyumbA9zByQVKkmJDZy1YLHna3eHO2w3Fum8XnEPlZmoQSdG65btmvVjxw== X-Google-Smtp-Source: AGHT+IEQ1ik++IvZUVvgiU8BPrRhPmP1W6Jr+M9quB8eB9ZedAN2u5FAi0+iwTVbx09xLU/IFV3O X-Received: by 2002:a05:6512:3c98:b0:52c:8b03:99d6 with SMTP id 2adb3069b0e04-52ef8d856b4mr5859273e87.6.1721674317004; Mon, 22 Jul 2024 11:51:57 -0700 (PDT) ARC-Seal: i=1; a=rsa-sha256; t=1721674316; cv=none; d=google.com; s=arc-20160816; b=iYmlBbWiijqkoAiecg9IwcWFdR3TkgUwEbbTIaEgjttj+lDbMNfTvVXmE7oalKiUOY GnAhiV537y5RVxd2Cwoy8YGPkTNFZe3aQ1kwKOpV4F0kB3Q92b2VKmVmIal5jj3SiW4x /EcOXLz8Co9FJ2ysbtmgb7J9IR/aMbCZ9poe62D/Q3E89pmI5UCnvPMZ3fUif02T801l 70VR3+Q/w7CZzsWP2ctpj8DfWlVjh6mfUViOVg25U/UeAFsfEY+SYTGmHkYdaSXW7mOM +K2cjXNPstZXlMEAru0USQ1Y3GHiQXVR4aKKfBM6s81ZkNeqG5QAtmtWne8gWIPfWMcL dTyA== ARC-Message-Signature: i=1; a=rsa-sha256; c=relaxed/relaxed; d=google.com; s=arc-20160816; h=sender:errors-to:content-transfer-encoding:reply-to:list-subscribe :list-help:list-post:list-archive:list-unsubscribe:list-id :precedence:subject:mime-version:references:in-reply-to:message-id :date:to:from:delivered-to; bh=335GEm2OyNAhnDqaD/3/tGCpu+wwx4EiQm5GI49TFcg=; fh=YOA8vD9MJZuwZ71F/05pj6KdCjf6jQRmzLS+CATXUQk=; b=bZpyTMqkXYYqTLvYptsxKJY0RC0cwbKnwxzIv456xw3RG0aIA1C6qxJMMCRQjmz6SF j+U+lueh72JYr8B89nri/XM5ZlkLCi7/ceqhmMwVwKt9siHoObtl5WsmuxPa3tvc2wNB VF1vZ06pcPexCEd5z7GnWxyWdKZH6D6BwhkeWeGRq/TWkzKvv3NU4uSPorOvrIwCCT5I QAo++gf+wgeaGsUZRKr8pFJ6ixVWl2KcWvmSir3ILWMJy1EYKxIcQ06vx/SPWTkARmDo bhxT+r+cDM4hsMQ2C64lunUMNBHVFfg0CVTx6ovwFQeNuoEaqYBAyYy0UmSnu8JwuHKq Y6Dw==; dara=google.com ARC-Authentication-Results: i=1; mx.google.com; spf=pass (google.com: domain of ffmpeg-devel-bounces@ffmpeg.org designates 79.124.17.100 as permitted sender) smtp.mailfrom=ffmpeg-devel-bounces@ffmpeg.org Return-Path: Received: from ffbox0-bg.mplayerhq.hu (ffbox0-bg.ffmpeg.org. [79.124.17.100]) by mx.google.com with ESMTP id 4fb4d7f45d1cf-5a30bf10c59si4946326a12.243.2024.07.22.11.51.56; Mon, 22 Jul 2024 11:51:56 -0700 (PDT) Received-SPF: pass (google.com: domain of ffmpeg-devel-bounces@ffmpeg.org designates 79.124.17.100 as permitted sender) client-ip=79.124.17.100; Authentication-Results: mx.google.com; spf=pass (google.com: domain of ffmpeg-devel-bounces@ffmpeg.org designates 79.124.17.100 as permitted sender) smtp.mailfrom=ffmpeg-devel-bounces@ffmpeg.org Received: from [127.0.1.1] (localhost [127.0.0.1]) by ffbox0-bg.mplayerhq.hu (Postfix) with ESMTP id 441E668D6F5; Mon, 22 Jul 2024 21:44:47 +0300 (EEST) X-Original-To: ffmpeg-devel@ffmpeg.org Delivered-To: ffmpeg-devel@ffmpeg.org Received: from ursule.remlab.net (vps-a2bccee9.vps.ovh.net [51.75.19.47]) by ffbox0-bg.mplayerhq.hu (Postfix) with ESMTP id BB00A68D6D2 for ; Mon, 22 Jul 2024 21:44:37 +0300 (EEST) Received: from basile.remlab.net (localhost [IPv6:::1]) by ursule.remlab.net (Postfix) with ESMTP id 544E3C0244 for ; Mon, 22 Jul 2024 21:44:33 +0300 (EEST) From: =?utf-8?q?R=C3=A9mi_Denis-Courmont?= To: ffmpeg-devel@ffmpeg.org Date: Mon, 22 Jul 2024 21:44:28 +0300 Message-ID: <20240722184431.40853-9-remi@remlab.net> X-Mailer: git-send-email 2.45.2 In-Reply-To: <20240722184431.40853-1-remi@remlab.net> References: <20240722184431.40853-1-remi@remlab.net> MIME-Version: 1.0 Subject: [FFmpeg-devel] [PATCH 9/9] lavc/h264dsp: use RISC-V B extension X-BeenThere: ffmpeg-devel@ffmpeg.org X-Mailman-Version: 2.1.29 Precedence: list List-Id: FFmpeg development discussions and patches List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , Reply-To: FFmpeg development discussions and patches Errors-To: ffmpeg-devel-bounces@ffmpeg.org Sender: "ffmpeg-devel" X-TUID: md06nu3k0Cg9 This saves one register and one instruction per transform. add16 and add16intra thus become stack-less. --- libavcodec/riscv/h264dsp_init.c | 25 ++++++++-------- libavcodec/riscv/h264idct_rvv.S | 51 ++++++++++++++++----------------- 2 files changed, 38 insertions(+), 38 deletions(-) diff --git a/libavcodec/riscv/h264dsp_init.c b/libavcodec/riscv/h264dsp_init.c index 9ae182151c..836c073559 100644 --- a/libavcodec/riscv/h264dsp_init.c +++ b/libavcodec/riscv/h264dsp_init.c @@ -98,13 +98,14 @@ av_cold void ff_h264dsp_init_riscv(H264DSPContext *dsp, const int bit_depth, dsp->h264_idct_add = ff_h264_idct_add_8_rvv; dsp->h264_idct8_add = ff_h264_idct8_add_8_rvv; + dsp->h264_idct_dc_add = ff_h264_idct4_dc_add_8_rvv; + if (flags & AV_CPU_FLAG_RVB) { + dsp->h264_idct_add16 = ff_h264_idct_add16_8_rvv; + dsp->h264_idct_add16intra = ff_h264_idct_add16intra_8_rvv; # if __riscv_xlen == 64 - dsp->h264_idct_add16 = ff_h264_idct_add16_8_rvv; - dsp->h264_idct_add16intra = ff_h264_idct_add16intra_8_rvv; - dsp->h264_idct8_add4 = ff_h264_idct8_add4_8_rvv; + dsp->h264_idct8_add4 = ff_h264_idct8_add4_8_rvv; # endif - if (flags & AV_CPU_FLAG_RVV_I32) - dsp->h264_idct_dc_add = ff_h264_idct4_dc_add_8_rvv; + } if (flags & AV_CPU_FLAG_RVV_I64) { dsp->h264_add_pixels8_clear = ff_h264_add_pixels8_8_rvv; dsp->h264_idct8_dc_add = ff_h264_idct8_dc_add_8_rvv; @@ -118,16 +119,16 @@ av_cold void ff_h264dsp_init_riscv(H264DSPContext *dsp, const int bit_depth, dsp->h264_idct_add = ff_h264_idct_add_##depth##_rvv; \ if (flags & AV_CPU_FLAG_RVB_ADDR) \ dsp->h264_idct8_add = ff_h264_idct8_add_##depth##_rvv; \ - if (zvl128b && (flags & AV_CPU_FLAG_RVB_ADDR)) { \ + if (zvl128b && (flags & AV_CPU_FLAG_RVB)) { \ dsp->h264_idct_dc_add = ff_h264_idct4_dc_add_##depth##_rvv; \ dsp->h264_idct8_dc_add = ff_h264_idct8_dc_add_##depth##_rvv; \ + if (__riscv_xlen == 64) { \ + dsp->h264_idct_add16 = ff_h264_idct_add16_##depth##_rvv; \ + dsp->h264_idct_add16intra = \ + ff_h264_idct_add16intra_##depth##_rvv; \ + } \ } \ - if (__riscv_xlen == 64 && zvl128b) { \ - dsp->h264_idct_add16 = ff_h264_idct_add16_##depth##_rvv; \ - dsp->h264_idct_add16intra = \ - ff_h264_idct_add16intra_##depth##_rvv; \ - } \ - if (__riscv_xlen == 64 && (flags & AV_CPU_FLAG_RVB_ADDR)) \ + if (__riscv_xlen == 64 && (flags & AV_CPU_FLAG_RVB)) \ dsp->h264_idct8_add4 = ff_h264_idct8_add4_##depth##_rvv; \ } diff --git a/libavcodec/riscv/h264idct_rvv.S b/libavcodec/riscv/h264idct_rvv.S index 514c849bce..a49a32c47e 100644 --- a/libavcodec/riscv/h264idct_rvv.S +++ b/libavcodec/riscv/h264idct_rvv.S @@ -532,16 +532,11 @@ const ff_h264_scan8 .byte 034, 035, 044, 045, 036, 037, 046, 047 endconst -#if (__riscv_xlen == 64) .macro idct4_adds type, depth -func ff_h264_idct_add\type\()_\depth\()_rvv, zve32x +func ff_h264_idct_add\type\()_\depth\()_rvv, zve32x, b csrwi vxrm, 0 - addi sp, sp, -16 lla t0, ff_h264_scan8 - sd s0, (sp) li t1, 32 * (\depth / 8) - mv s0, sp - sd ra, 8(sp) vsetivli zero, 16, e8, m1, ta, ma vle8.v v8, (t0) .if \depth == 8 @@ -567,20 +562,23 @@ func ff_h264_idct_add\type\()_\depth\()_rvv, zve32x vsetvli zero, zero, e16, m2, ta, ma vmv.x.s a4, v0 vmv.x.s a7, v1 + zext.h a4, a4 + slli a7, a7, 16 mv t4, a0 + or a4, a4, a7 mv t5, a1 mv a1, a2 mv a2, a3 li a3, 16 + mv a7, ra 1: andi t0, a4, 1 addi a3, a3, -1 - srli a4, a4, 1 .ifc \type, 16 beqz t0, 3f # if (nnz) .endif lw t2, (t5) # block_offset[i] - andi t1, a7, 1 + bexti t1, a4, 16 add a0, t4, t2 .ifc \type, 16 bnez t1, 2f # if (nnz == 1 && block[i * 16]) @@ -595,14 +593,12 @@ func ff_h264_idct_add\type\()_\depth\()_rvv, zve32x .endif jal ff_h264_idct4_dc_add_\depth\()_rvv 3: - srli a7, a7, 1 + srli a4, a4, 1 addi t5, t5, 4 addi a1, a1, 16 * 2 * (\depth / 8) bnez a3, 1b - ld ra, 8(sp) - ld s0, 0(sp) - addi sp, sp, 16 + mv ra, a7 ret endfunc .endm @@ -611,9 +607,10 @@ endfunc idct4_adds 16, \depth idct4_adds 16intra, \depth -func ff_h264_idct8_add4_\depth\()_rvv, zve32x +#if (__riscv_xlen == 64) +func ff_h264_idct8_add4_\depth\()_rvv, zve32x, b csrwi vxrm, 0 - addi sp, sp, -64 + addi sp, sp, -48 lla t0, ff_h264_scan8 sd s0, (sp) li t1, 4 * 32 * (\depth / 8) @@ -622,9 +619,8 @@ func ff_h264_idct8_add4_\depth\()_rvv, zve32x sd ra, 8(sp) sd s1, 16(sp) sd s2, 24(sp) - sd s3, 32(sp) - sd s4, 40(sp) - sd s5, 48(sp) + sd s4, 32(sp) + sd s5, 40(sp) vsetivli zero, 4, e8, mf4, ta, ma vlse8.v v8, (t0), t2 .if \depth == 8 @@ -644,8 +640,11 @@ func ff_h264_idct8_add4_\depth\()_rvv, zve32x vmsne.vi v0, v12, 0 vmand.mm v1, v1, v2 vmv.x.s s2, v0 - vmv.x.s s3, v1 + vmv.x.s a7, v1 + zext.h s2, s2 + slli a7, a7, 16 li s1, 4 + or s2, s2, a7 mv s4, a0 mv s5, a1 mv a1, a2 @@ -653,10 +652,9 @@ func ff_h264_idct8_add4_\depth\()_rvv, zve32x 1: andi t0, s2, 1 addi s1, s1, -1 - srli s2, s2, 1 beqz t0, 3f # if (nnz) lw t2, (s5) # block_offset[i] - andi t1, s3, 1 + bexti t1, s2, 16 add a0, s4, t2 bnez t1, 2f # if (nnz == 1 && block[i * 16]) jal .Lidct8_add_\depth\()_rvv @@ -670,20 +668,20 @@ func ff_h264_idct8_add4_\depth\()_rvv, zve32x 3: addi a1, a1, 4 * 16 * 2 * (\depth / 8) 4: - srli s3, s3, 1 + srli s2, s2, 1 addi s5, s5, 4 * 4 bnez s1, 1b - ld s5, 48(sp) - ld s4, 40(sp) - ld s3, 32(sp) + ld s5, 40(sp) + ld s4, 32(sp) ld s2, 24(sp) ld s1, 16(sp) ld ra, 8(sp) ld s0, 0(sp) - addi sp, sp, 64 + addi sp, sp, 48 ret endfunc +#endif .endr .irp depth, 9, 10, 12, 14 @@ -697,9 +695,10 @@ func ff_h264_idct_add16intra_\depth\()_rvv, zve32x j ff_h264_idct_add16intra_16_rvv endfunc +#if (__riscv_xlen == 64) func ff_h264_idct8_add4_\depth\()_rvv, zve32x li a5, (1 << \depth) - 1 j ff_h264_idct8_add4_16_rvv endfunc -.endr #endif +.endr