From patchwork Mon Jul 22 18:44:22 2024 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 8bit X-Patchwork-Submitter: =?utf-8?q?R=C3=A9mi_Denis-Courmont?= X-Patchwork-Id: 50691 Delivered-To: ffmpegpatchwork2@gmail.com Received: by 2002:a59:a742:0:b0:482:c625:d099 with SMTP id f2csp2205621vqm; Mon, 22 Jul 2024 12:01:53 -0700 (PDT) X-Forwarded-Encrypted: i=2; AJvYcCWoKArQlUjOXufxi1hzycuPB00u6cr2+kl+HxUsPU/aRunJ688inCKGSMQkbls931GiV/kzK+YI0mVwE8mfCglf2S8tKz25eXqN2w== X-Google-Smtp-Source: AGHT+IG5QYVGbVyJ9I5MZYiyN69ZDcv1b51csFMp3kr5cyMPd6376N2yILRo5PDoUmN4VoJ49AAG X-Received: by 2002:a05:6512:131d:b0:52f:c2fa:b213 with SMTP id 2adb3069b0e04-52fc4075b0dmr608320e87.55.1721674913607; Mon, 22 Jul 2024 12:01:53 -0700 (PDT) ARC-Seal: i=1; a=rsa-sha256; t=1721674913; cv=none; d=google.com; s=arc-20160816; b=AKg/OE6HbJc+pV5I7LN01Opl98IvnLl6hm0o69p6ERXw+wL5sKeJQUhnayqY9x2tIL +RTNhBvZBtF+LUgA6Uya6yhVjTmvkrscx6Fvso6vrd1+g8vDAeLnm7cPjJkocn2FaVmr 2ja/q4eyVLX10/GZ1VVU+ihoKsTV1PyKruPuS4pAb6c/XSYHEW2I3pkkSaAIIENkjsV4 JtQaSrZfpBlcjPOcI42i1efcHIjJmm530YsuCYRDNY/oFyVMZq9m93Rr2LY9K7TLr/8h xT6IR6mi7UVzKgIWUg4WSlXtZXfKmMvt597srM/qIqElGIW6G/ZqIWkKec3zgP6Kg7b9 99iQ== ARC-Message-Signature: i=1; a=rsa-sha256; c=relaxed/relaxed; d=google.com; s=arc-20160816; h=sender:errors-to:content-transfer-encoding:reply-to:list-subscribe :list-help:list-post:list-archive:list-unsubscribe:list-id :precedence:subject:mime-version:references:in-reply-to:message-id :date:to:from:delivered-to; bh=khUsmbrQ8RiTc3mYSFcpWLYb+Y+qMcoh+VUF94p9iNc=; fh=YOA8vD9MJZuwZ71F/05pj6KdCjf6jQRmzLS+CATXUQk=; b=tBjeqC2T7ngT3ibBnkYHPAOvo47XKVVpLiTvwoRltlQXqykX6ibIyzMc2bXQaIecOk /pfVtkSk/6LUyRSX0I3oGHcn04fGUBKZ09zvvx2fw3O2ClN8doKj8znPO/muur/5xPz2 3c1AlGiQwIdBcFU4qQrRMnhmthUsQWEG+T9vmh3q+1TSpzFD9F+JzU3ofgZctFtbMMSP y2bYqK8e9KrHPB8ocyDvl7PNtnCRjw0Hxx0kQAIFLQMgMZUYWoG14gOCmPzIp2c42Blo 0jpoaFEp/iFaN2l4DpmuwVLviqUMNTRp3rpGIB006tZiKCwF60JPdJmbaL9RCcGywOhY fD/A==; dara=google.com ARC-Authentication-Results: i=1; mx.google.com; spf=pass (google.com: domain of ffmpeg-devel-bounces@ffmpeg.org designates 79.124.17.100 as permitted sender) smtp.mailfrom=ffmpeg-devel-bounces@ffmpeg.org Return-Path: Received: from ffbox0-bg.mplayerhq.hu (ffbox0-bg.ffmpeg.org. [79.124.17.100]) by mx.google.com with ESMTP id 2adb3069b0e04-52ef54f80edsi2176052e87.32.2024.07.22.12.01.53; Mon, 22 Jul 2024 12:01:53 -0700 (PDT) Received-SPF: pass (google.com: domain of ffmpeg-devel-bounces@ffmpeg.org designates 79.124.17.100 as permitted sender) client-ip=79.124.17.100; Authentication-Results: mx.google.com; spf=pass (google.com: domain of ffmpeg-devel-bounces@ffmpeg.org designates 79.124.17.100 as permitted sender) smtp.mailfrom=ffmpeg-devel-bounces@ffmpeg.org Received: from [127.0.1.1] (localhost [127.0.0.1]) by ffbox0-bg.mplayerhq.hu (Postfix) with ESMTP id EC74E68D6CC; Mon, 22 Jul 2024 21:44:40 +0300 (EEST) X-Original-To: ffmpeg-devel@ffmpeg.org Delivered-To: ffmpeg-devel@ffmpeg.org Received: from ursule.remlab.net (vps-a2bccee9.vps.ovh.net [51.75.19.47]) by ffbox0-bg.mplayerhq.hu (Postfix) with ESMTP id 84A0068D3C9 for ; Mon, 22 Jul 2024 21:44:32 +0300 (EEST) Received: from basile.remlab.net (localhost [IPv6:::1]) by ursule.remlab.net (Postfix) with ESMTP id 267EAC01F0 for ; Mon, 22 Jul 2024 21:44:32 +0300 (EEST) From: =?utf-8?q?R=C3=A9mi_Denis-Courmont?= To: ffmpeg-devel@ffmpeg.org Date: Mon, 22 Jul 2024 21:44:22 +0300 Message-ID: <20240722184431.40853-3-remi@remlab.net> X-Mailer: git-send-email 2.45.2 In-Reply-To: <20240722184431.40853-1-remi@remlab.net> References: <20240722184431.40853-1-remi@remlab.net> MIME-Version: 1.0 Subject: [FFmpeg-devel] [PATCH 3/9] lavc/riscv: require B or zba explicitly X-BeenThere: ffmpeg-devel@ffmpeg.org X-Mailman-Version: 2.1.29 Precedence: list List-Id: FFmpeg development discussions and patches List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , Reply-To: FFmpeg development discussions and patches Errors-To: ffmpeg-devel-bounces@ffmpeg.org Sender: "ffmpeg-devel" X-TUID: RsImKkEUtkAa --- libavcodec/riscv/aacencdsp_rvv.S | 4 +-- libavcodec/riscv/aacpsdsp_rvv.S | 10 +++---- libavcodec/riscv/ac3dsp_rvv.S | 6 ++-- libavcodec/riscv/ac3dsp_rvvb.S | 2 +- libavcodec/riscv/alacdsp_rvv.S | 6 ++-- libavcodec/riscv/audiodsp_rvv.S | 6 ++-- libavcodec/riscv/bswapdsp_rvb.S | 2 +- libavcodec/riscv/bswapdsp_rvv.S | 2 +- libavcodec/riscv/exrdsp_rvv.S | 2 +- libavcodec/riscv/fixed_vtype.S | 48 ++++++++++++++++++++++++++++++ libavcodec/riscv/flacdsp_rvv.S | 42 +++++++++++++------------- libavcodec/riscv/fmtconvert_rvv.S | 4 +-- libavcodec/riscv/h264_mc_chroma.S | 4 +-- libavcodec/riscv/h264idct_rvv.S | 2 +- libavcodec/riscv/huffyuvdsp_rvv.S | 4 +-- libavcodec/riscv/jpeg2000dsp_rvv.S | 4 +-- libavcodec/riscv/llauddsp_rvv.S | 4 +-- libavcodec/riscv/lpc_rvv.S | 4 +-- libavcodec/riscv/opusdsp_rvv.S | 2 +- libavcodec/riscv/rv40dsp_rvv.S | 4 +-- libavcodec/riscv/sbrdsp_rvv.S | 16 +++++----- libavcodec/riscv/svqenc_rvv.S | 2 +- libavcodec/riscv/takdsp_rvv.S | 8 ++--- libavcodec/riscv/utvideodsp_rvv.S | 4 +-- libavcodec/riscv/vc1dsp_rvv.S | 6 ++-- libavcodec/riscv/vorbisdsp_rvv.S | 2 +- libavcodec/riscv/vp7dsp_rvv.S | 2 +- libavcodec/riscv/vp8dsp_rvv.S | 4 +-- libavcodec/riscv/vp9_intra_rvi.S | 6 ++-- 29 files changed, 129 insertions(+), 83 deletions(-) create mode 100644 libavcodec/riscv/fixed_vtype.S diff --git a/libavcodec/riscv/aacencdsp_rvv.S b/libavcodec/riscv/aacencdsp_rvv.S index 21e66a77ae..05a603b6f6 100644 --- a/libavcodec/riscv/aacencdsp_rvv.S +++ b/libavcodec/riscv/aacencdsp_rvv.S @@ -21,7 +21,7 @@ #include "libavutil/riscv/asm.S" -func ff_abs_pow34_rvv, zve32f +func ff_abs_pow34_rvv, zve32f, zba 1: vsetvli t0, a2, e32, m8, ta, ma sub a2, a2, t0 @@ -38,7 +38,7 @@ func ff_abs_pow34_rvv, zve32f ret endfunc -func ff_aac_quant_bands_rvv, zve32f +func ff_aac_quant_bands_rvv, zve32f, zba NOHWF fmv.w.x fa0, a6 NOHWF fmv.w.x fa1, a7 fcvt.s.w ft0, a5 diff --git a/libavcodec/riscv/aacpsdsp_rvv.S b/libavcodec/riscv/aacpsdsp_rvv.S index 2d6858688a..72e2103c22 100644 --- a/libavcodec/riscv/aacpsdsp_rvv.S +++ b/libavcodec/riscv/aacpsdsp_rvv.S @@ -20,7 +20,7 @@ #include "libavutil/riscv/asm.S" -func ff_ps_add_squares_rvv, zve64f +func ff_ps_add_squares_rvv, zve64f, zba li t1, 32 1: vsetvli t0, a2, e32, m4, ta, ma @@ -39,7 +39,7 @@ func ff_ps_add_squares_rvv, zve64f ret endfunc -func ff_ps_mul_pair_single_rvv, zve32f +func ff_ps_mul_pair_single_rvv, zve32f, zba 1: vsetvli t0, a3, e32, m4, ta, ma vlseg2e32.v v24, (a1) @@ -134,7 +134,7 @@ NOHWD flw fs\n, (4 * \n)(sp) .purgem filter endfunc -func ff_ps_hybrid_analysis_ileave_rvv, zve32x /* no needs for zve32f here */ +func ff_ps_hybrid_analysis_ileave_rvv, zve32x /* no zve32f here */, zba slli t0, a2, 5 + 1 + 2 // ctz(32 * 2 * 4) sh2add a1, a2, a1 add a0, a0, t0 @@ -169,7 +169,7 @@ func ff_ps_hybrid_analysis_ileave_rvv, zve32x /* no needs for zve32f here */ ret endfunc -func ff_ps_hybrid_synthesis_deint_rvv, zve64x +func ff_ps_hybrid_synthesis_deint_rvv, zve64x, zba slli t0, a2, 5 + 1 + 2 sh2add a0, a2, a0 add a1, a1, t0 @@ -207,7 +207,7 @@ func ff_ps_hybrid_synthesis_deint_rvv, zve64x ret endfunc -func ff_ps_stereo_interpolate_rvv, zve32f, zbb +func ff_ps_stereo_interpolate_rvv, zve32f, b vsetvli t0, zero, e32, m2, ta, ma vid.v v24 flw ft0, (a2) diff --git a/libavcodec/riscv/ac3dsp_rvv.S b/libavcodec/riscv/ac3dsp_rvv.S index 1b5f67a9ec..c733733286 100644 --- a/libavcodec/riscv/ac3dsp_rvv.S +++ b/libavcodec/riscv/ac3dsp_rvv.S @@ -43,7 +43,7 @@ func ff_ac3_exponent_min_rvv, zve32x ret endfunc -func ff_float_to_fixed24_rvv, zve32f +func ff_float_to_fixed24_rvv, zve32f, zba li t1, 1 << 24 fcvt.s.w f0, t1 1: @@ -61,7 +61,7 @@ func ff_float_to_fixed24_rvv, zve32f endfunc #if __riscv_xlen >= 64 -func ff_sum_square_butterfly_int32_rvv, zve64x +func ff_sum_square_butterfly_int32_rvv, zve64x, zba vsetvli t0, zero, e64, m8, ta, ma vmv.v.x v0, zero vmv.v.x v8, zero @@ -101,7 +101,7 @@ func ff_sum_square_butterfly_int32_rvv, zve64x endfunc #endif -func ff_sum_square_butterfly_float_rvv, zve32f +func ff_sum_square_butterfly_float_rvv, zve32f, zba vsetvli t0, zero, e32, m8, ta, ma vmv.v.x v0, zero vmv.v.x v8, zero diff --git a/libavcodec/riscv/ac3dsp_rvvb.S b/libavcodec/riscv/ac3dsp_rvvb.S index 64766b56be..5bffb40bba 100644 --- a/libavcodec/riscv/ac3dsp_rvvb.S +++ b/libavcodec/riscv/ac3dsp_rvvb.S @@ -21,7 +21,7 @@ #include "config.h" #include "libavutil/riscv/asm.S" -func ff_extract_exponents_rvvb, zve32x, zvbb +func ff_extract_exponents_rvvb, zve32x, zvbb, zba 1: vsetvli t0, a2, e32, m8, ta, ma vle32.v v8, (a1) diff --git a/libavcodec/riscv/alacdsp_rvv.S b/libavcodec/riscv/alacdsp_rvv.S index 8efb04e0c8..19714bd6e3 100644 --- a/libavcodec/riscv/alacdsp_rvv.S +++ b/libavcodec/riscv/alacdsp_rvv.S @@ -21,7 +21,7 @@ #include "libavutil/riscv/asm.S" #if (__riscv_xlen == 64) -func ff_alac_decorrelate_stereo_rvv, zve32x +func ff_alac_decorrelate_stereo_rvv, zve32x, zba ld a4, 8(a0) ld a0, 0(a0) 1: @@ -43,7 +43,7 @@ func ff_alac_decorrelate_stereo_rvv, zve32x ret endfunc -func ff_alac_append_extra_bits_mono_rvv, zve32x +func ff_alac_append_extra_bits_mono_rvv, zve32x, zba ld a0, (a0) ld a1, (a1) 1: @@ -61,7 +61,7 @@ func ff_alac_append_extra_bits_mono_rvv, zve32x ret endfunc -func ff_alac_append_extra_bits_stereo_rvv, zve32x +func ff_alac_append_extra_bits_stereo_rvv, zve32x, zba ld a6, 8(a0) ld a0, (a0) ld a7, 8(a1) diff --git a/libavcodec/riscv/audiodsp_rvv.S b/libavcodec/riscv/audiodsp_rvv.S index f0b23bab5e..b7134de523 100644 --- a/libavcodec/riscv/audiodsp_rvv.S +++ b/libavcodec/riscv/audiodsp_rvv.S @@ -20,7 +20,7 @@ #include "libavutil/riscv/asm.S" -func ff_scalarproduct_int16_rvv, zve32x +func ff_scalarproduct_int16_rvv, zve32x, zba vsetvli t0, zero, e32, m8, ta, ma vmv.v.x v8, zero vmv.s.x v0, zero @@ -40,7 +40,7 @@ func ff_scalarproduct_int16_rvv, zve32x ret endfunc -func ff_vector_clip_int32_rvv, zve32x +func ff_vector_clip_int32_rvv, zve32x, zba 1: vsetvli t0, a4, e32, m8, ta, ma vle32.v v8, (a1) @@ -55,7 +55,7 @@ func ff_vector_clip_int32_rvv, zve32x ret endfunc -func ff_vector_clipf_rvv, zve32f +func ff_vector_clipf_rvv, zve32f, zba NOHWF fmv.w.x fa0, a3 NOHWF fmv.w.x fa1, a4 1: diff --git a/libavcodec/riscv/bswapdsp_rvb.S b/libavcodec/riscv/bswapdsp_rvb.S index 0786bd3f36..17cfd5d7ef 100644 --- a/libavcodec/riscv/bswapdsp_rvb.S +++ b/libavcodec/riscv/bswapdsp_rvb.S @@ -23,7 +23,7 @@ #include "libavutil/riscv/bswap_rvb.S" #if (__riscv_xlen >= 64) -func ff_bswap32_buf_rvb, zbb +func ff_bswap32_buf_rvb, zba, zbb bswap32_rvb a0, a1, a2 endfunc #endif diff --git a/libavcodec/riscv/bswapdsp_rvv.S b/libavcodec/riscv/bswapdsp_rvv.S index b37fe26255..14484a772d 100644 --- a/libavcodec/riscv/bswapdsp_rvv.S +++ b/libavcodec/riscv/bswapdsp_rvv.S @@ -21,7 +21,7 @@ #include "config.h" #include "libavutil/riscv/asm.S" -func ff_bswap16_buf_rvv, zve32x +func ff_bswap16_buf_rvv, zve32x, zba 1: vsetvli t0, a2, e16, m8, ta, ma vle16.v v8, (a1) diff --git a/libavcodec/riscv/exrdsp_rvv.S b/libavcodec/riscv/exrdsp_rvv.S index f4a35f58ff..c1d7dfcb86 100644 --- a/libavcodec/riscv/exrdsp_rvv.S +++ b/libavcodec/riscv/exrdsp_rvv.S @@ -20,7 +20,7 @@ #include "libavutil/riscv/asm.S" -func ff_reorder_pixels_rvv, zve32x +func ff_reorder_pixels_rvv, zve32x, zba srai a2, a2, 1 add t1, a1, a2 1: diff --git a/libavcodec/riscv/fixed_vtype.S b/libavcodec/riscv/fixed_vtype.S new file mode 100644 index 0000000000..7aac70bda8 --- /dev/null +++ b/libavcodec/riscv/fixed_vtype.S @@ -0,0 +1,48 @@ +/* + * Copyright © 2024 Rémi Denis-Courmont. + * + * This file is part of FFmpeg. + * + * FFmpeg is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * FFmpeg is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with FFmpeg; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#define VILL -0x8000 + + .data +ff_vtype_e8: +ff_vtype_e8_1: + .half VILL +ff_vtype_e8_2: + .half VILL +ff_vtype_e8_4: + .half VILL +ff_vtype_e8_8: + .half VILL +ff_vtype_e8_16: + .half VILL +ff_vtype_e8_32: + .half VILL + +ff_vtype_e16: +ff_vtype_e16_1: + .half VILL +ff_vtype_e16_2: + .half VILL +ff_vtype_e16_4: + .half VILL +ff_vtype_e16_8: + .half VILL +ff_vtype_e16_16: + .half VILL diff --git a/libavcodec/riscv/flacdsp_rvv.S b/libavcodec/riscv/flacdsp_rvv.S index 1724aee9d7..69505c694a 100644 --- a/libavcodec/riscv/flacdsp_rvv.S +++ b/libavcodec/riscv/flacdsp_rvv.S @@ -20,7 +20,7 @@ #include "libavutil/riscv/asm.S" -func ff_flac_lpc16_rvv, zve32x, zbb +func ff_flac_lpc16_rvv, zve32x, b vtype_vli t0, a2, t2, e32, ta, ma vsetvl zero, a2, t0 vle32.v v8, (a1) @@ -45,7 +45,7 @@ func ff_flac_lpc16_rvv, zve32x, zbb endfunc #if (__riscv_xlen == 64) -func ff_flac_lpc32_rvv, zve64x +func ff_flac_lpc32_rvv, zve64x, zba addi t2, a2, -16 ble t2, zero, ff_flac_lpc32_rvv_simple vsetivli zero, 1, e64, m1, ta, ma @@ -76,7 +76,7 @@ func ff_flac_lpc32_rvv, zve64x ret endfunc -func ff_flac_lpc32_rvv_simple, zve64x, zbb +func ff_flac_lpc32_rvv_simple, zve64x, b vtype_vli t3, a2, t1, e64, ta, ma vntypei t2, t3 vsetvl zero, a2, t3 // e64 @@ -104,7 +104,7 @@ func ff_flac_lpc32_rvv_simple, zve64x, zbb ret endfunc -func ff_flac_lpc33_rvv, zve64x, zbb +func ff_flac_lpc33_rvv, zve64x, b vtype_vli t0, a3, t1, e64, ta, ma vsetvl zero, a3, t0 vmv.s.x v0, zero @@ -132,7 +132,7 @@ func ff_flac_lpc33_rvv, zve64x, zbb endfunc #endif -func ff_flac_wasted32_rvv, zve32x +func ff_flac_wasted32_rvv, zve32x, zba 1: vsetvli t0, a2, e32, m8, ta, ma vle32.v v8, (a0) @@ -145,7 +145,7 @@ func ff_flac_wasted32_rvv, zve32x ret endfunc -func ff_flac_wasted33_rvv, zve64x +func ff_flac_wasted33_rvv, zve64x, zba srli t0, a2, 5 li t1, 1 bnez t0, 2f @@ -177,7 +177,7 @@ func ff_flac_wasted33_rvv, zve64x endfunc #if (__riscv_xlen == 64) -func ff_flac_decorrelate_indep2_16_rvv, zve32x +func ff_flac_decorrelate_indep2_16_rvv, zve32x, zba ld a0, (a0) ld a2, 8(a1) ld a1, (a1) @@ -200,7 +200,7 @@ func ff_flac_decorrelate_indep2_16_rvv, zve32x ret endfunc -func ff_flac_decorrelate_indep4_16_rvv, zve32x +func ff_flac_decorrelate_indep4_16_rvv, zve32x, zba ld a0, (a0) ld a2, 8(a1) ld t1, 16(a1) @@ -233,7 +233,7 @@ func ff_flac_decorrelate_indep4_16_rvv, zve32x ret endfunc -func ff_flac_decorrelate_indep6_16_rvv, zve32x +func ff_flac_decorrelate_indep6_16_rvv, zve32x, zba ld a0, (a0) ld a2, 8(a1) ld t1, 16(a1) @@ -278,7 +278,7 @@ func ff_flac_decorrelate_indep6_16_rvv, zve32x ret endfunc -func ff_flac_decorrelate_indep8_16_rvv, zve32x +func ff_flac_decorrelate_indep8_16_rvv, zve32x, zba ld a0, (a0) ld a2, 8(a1) ld t1, 16(a1) @@ -332,9 +332,7 @@ func ff_flac_decorrelate_indep8_16_rvv, zve32x ret endfunc - - -func ff_flac_decorrelate_ls_16_rvv, zve32x +func ff_flac_decorrelate_ls_16_rvv, zve32x, zba ld a0, (a0) ld a2, 8(a1) ld a1, (a1) @@ -358,7 +356,7 @@ func ff_flac_decorrelate_ls_16_rvv, zve32x ret endfunc -func ff_flac_decorrelate_rs_16_rvv, zve32x +func ff_flac_decorrelate_rs_16_rvv, zve32x, zba ld a0, (a0) ld a2, 8(a1) ld a1, (a1) @@ -382,7 +380,7 @@ func ff_flac_decorrelate_rs_16_rvv, zve32x ret endfunc -func ff_flac_decorrelate_ms_16_rvv, zve32x +func ff_flac_decorrelate_ms_16_rvv, zve32x, zba ld a0, (a0) ld a2, 8(a1) ld a1, (a1) @@ -408,7 +406,7 @@ func ff_flac_decorrelate_ms_16_rvv, zve32x ret endfunc -func ff_flac_decorrelate_indep2_32_rvv, zve32x +func ff_flac_decorrelate_indep2_32_rvv, zve32x, zba ld a0, (a0) ld a2, 8(a1) ld a1, (a1) @@ -428,7 +426,7 @@ func ff_flac_decorrelate_indep2_32_rvv, zve32x ret endfunc -func ff_flac_decorrelate_indep4_32_rvv, zve32x +func ff_flac_decorrelate_indep4_32_rvv, zve32x, zba ld a0, (a0) ld a2, 8(a1) ld t1, 16(a1) @@ -457,7 +455,7 @@ func ff_flac_decorrelate_indep4_32_rvv, zve32x ret endfunc -func ff_flac_decorrelate_indep6_32_rvv, zve32x +func ff_flac_decorrelate_indep6_32_rvv, zve32x, zba ld a0, (a0) ld a2, 8(a1) ld t1, 16(a1) @@ -495,7 +493,7 @@ func ff_flac_decorrelate_indep6_32_rvv, zve32x ret endfunc -func ff_flac_decorrelate_indep8_32_rvv, zve32x +func ff_flac_decorrelate_indep8_32_rvv, zve32x, zba ld a0, (a0) ld a2, 8(a1) ld t1, 16(a1) @@ -540,7 +538,7 @@ func ff_flac_decorrelate_indep8_32_rvv, zve32x ret endfunc -func ff_flac_decorrelate_ls_32_rvv, zve32x +func ff_flac_decorrelate_ls_32_rvv, zve32x, zba ld a0, (a0) ld a2, 8(a1) ld a1, (a1) @@ -561,7 +559,7 @@ func ff_flac_decorrelate_ls_32_rvv, zve32x ret endfunc -func ff_flac_decorrelate_rs_32_rvv, zve32x +func ff_flac_decorrelate_rs_32_rvv, zve32x, zba ld a0, (a0) ld a2, 8(a1) ld a1, (a1) @@ -582,7 +580,7 @@ func ff_flac_decorrelate_rs_32_rvv, zve32x ret endfunc -func ff_flac_decorrelate_ms_32_rvv, zve32x +func ff_flac_decorrelate_ms_32_rvv, zve32x, zba ld a0, (a0) ld a2, 8(a1) ld a1, (a1) diff --git a/libavcodec/riscv/fmtconvert_rvv.S b/libavcodec/riscv/fmtconvert_rvv.S index d0e2f106d5..05cd3b38a5 100644 --- a/libavcodec/riscv/fmtconvert_rvv.S +++ b/libavcodec/riscv/fmtconvert_rvv.S @@ -20,7 +20,7 @@ #include "libavutil/riscv/asm.S" -func ff_int32_to_float_fmul_scalar_rvv, zve32f +func ff_int32_to_float_fmul_scalar_rvv, zve32f, zba NOHWF fmv.w.x fa0, a2 NOHWF mv a2, a3 1: @@ -37,7 +37,7 @@ NOHWF mv a2, a3 ret endfunc -func ff_int32_to_float_fmul_array8_rvv, zve32f +func ff_int32_to_float_fmul_array8_rvv, zve32f, zba srai a4, a4, 3 1: vsetvli t0, a4, e32, m1, ta, ma diff --git a/libavcodec/riscv/h264_mc_chroma.S b/libavcodec/riscv/h264_mc_chroma.S index ce99bda44d..b6c0e1c635 100644 --- a/libavcodec/riscv/h264_mc_chroma.S +++ b/libavcodec/riscv/h264_mc_chroma.S @@ -325,7 +325,7 @@ ret .endm -func h264_put_chroma_mc_rvv, zve32x +func h264_put_chroma_mc_rvv, zve32x, zba 11: li a7, 3 blt a3, a7, 12f @@ -334,7 +334,7 @@ func h264_put_chroma_mc_rvv, zve32x do_chroma_mc put 0 endfunc -func h264_avg_chroma_mc_rvv, zve32x +func h264_avg_chroma_mc_rvv, zve32x, zba 21: li a7, 3 blt a3, a7, 22f diff --git a/libavcodec/riscv/h264idct_rvv.S b/libavcodec/riscv/h264idct_rvv.S index c74ea18c19..514c849bce 100644 --- a/libavcodec/riscv/h264idct_rvv.S +++ b/libavcodec/riscv/h264idct_rvv.S @@ -298,7 +298,7 @@ func ff_h264_idct8_add_8_rvv, zve32x ret endfunc -func ff_h264_idct8_add_16_rvv, zve32x +func ff_h264_idct8_add_16_rvv, zve32x, zba csrwi vxrm, 0 .Lidct8_add_16_rvv: li a4, 8 diff --git a/libavcodec/riscv/huffyuvdsp_rvv.S b/libavcodec/riscv/huffyuvdsp_rvv.S index d334f5c6d0..54d1d94059 100644 --- a/libavcodec/riscv/huffyuvdsp_rvv.S +++ b/libavcodec/riscv/huffyuvdsp_rvv.S @@ -20,7 +20,7 @@ #include "libavutil/riscv/asm.S" -func ff_add_int16_rvv, zve32x +func ff_add_int16_rvv, zve32x, zba 1: vsetvli t0, a3, e16, m8, ta, ma vle16.v v16, (a0) @@ -36,7 +36,7 @@ func ff_add_int16_rvv, zve32x ret endfunc -func ff_add_hfyu_left_pred_bgr32_rvv, zve32x, zbb +func ff_add_hfyu_left_pred_bgr32_rvv, zve32x, b vtype_ivli t1, 4, e8, ta, ma li t0, 4 vsetvl zero, t0, t1 diff --git a/libavcodec/riscv/jpeg2000dsp_rvv.S b/libavcodec/riscv/jpeg2000dsp_rvv.S index 10efe6b0db..77c6fd2d32 100644 --- a/libavcodec/riscv/jpeg2000dsp_rvv.S +++ b/libavcodec/riscv/jpeg2000dsp_rvv.S @@ -20,7 +20,7 @@ #include "libavutil/riscv/asm.S" -func ff_ict_float_rvv, zve32f +func ff_ict_float_rvv, zve32f, zba lla t0, ff_jpeg2000_f_ict_params flw ft0, 0(t0) flw ft1, 4(t0) @@ -48,7 +48,7 @@ func ff_ict_float_rvv, zve32f ret endfunc -func ff_rct_int_rvv, zve32x +func ff_rct_int_rvv, zve32x, zba 1: vsetvli t0, a3, e32, m8, ta, ma vle32.v v16, (a1) diff --git a/libavcodec/riscv/llauddsp_rvv.S b/libavcodec/riscv/llauddsp_rvv.S index 5569864832..6af2e6a882 100644 --- a/libavcodec/riscv/llauddsp_rvv.S +++ b/libavcodec/riscv/llauddsp_rvv.S @@ -20,7 +20,7 @@ #include "libavutil/riscv/asm.S" -func ff_scalarproduct_and_madd_int16_rvv, zve32x +func ff_scalarproduct_and_madd_int16_rvv, zve32x, zba vsetvli t0, zero, e32, m8, ta, ma vmv.v.x v0, zero 1: @@ -44,7 +44,7 @@ func ff_scalarproduct_and_madd_int16_rvv, zve32x ret endfunc -func ff_scalarproduct_and_madd_int32_rvv, zve32x +func ff_scalarproduct_and_madd_int32_rvv, zve32x, zba vsetvli t0, zero, e32, m8, ta, ma vmv.v.x v0, zero 1: diff --git a/libavcodec/riscv/lpc_rvv.S b/libavcodec/riscv/lpc_rvv.S index fe80305d9a..8fd1ddbbf0 100644 --- a/libavcodec/riscv/lpc_rvv.S +++ b/libavcodec/riscv/lpc_rvv.S @@ -21,7 +21,7 @@ #include "libavutil/riscv/asm.S" #if __riscv_xlen >= 64 -func ff_lpc_apply_welch_window_rvv, zve64d +func ff_lpc_apply_welch_window_rvv, zve64d, zba vsetvli t0, zero, e64, m8, ta, ma vid.v v0 addi t2, a1, -1 @@ -86,7 +86,7 @@ func ff_lpc_apply_welch_window_rvv, zve64d ret endfunc -func ff_lpc_compute_autocorr_rvv, zve64d, zbb +func ff_lpc_compute_autocorr_rvv, zve64d, b vtype_vli t1, a2, t2, e64, ta, ma, 1 addi a2, a2, 1 li t0, 1 diff --git a/libavcodec/riscv/opusdsp_rvv.S b/libavcodec/riscv/opusdsp_rvv.S index 42d845a370..389dc744f5 100644 --- a/libavcodec/riscv/opusdsp_rvv.S +++ b/libavcodec/riscv/opusdsp_rvv.S @@ -20,7 +20,7 @@ #include "libavutil/riscv/asm.S" -func ff_opus_postfilter_rvv, zve32f, zbb +func ff_opus_postfilter_rvv, zve32f, b flw fa0, 0(a2) // g0 slli t1, a1, 2 flw fa1, 4(a2) // g1 diff --git a/libavcodec/riscv/rv40dsp_rvv.S b/libavcodec/riscv/rv40dsp_rvv.S index e49345ef70..53d3d1d6f9 100644 --- a/libavcodec/riscv/rv40dsp_rvv.S +++ b/libavcodec/riscv/rv40dsp_rvv.S @@ -332,7 +332,7 @@ ret .endm -func ff_put_rv40_chroma_mc_rvv, zve32x +func ff_put_rv40_chroma_mc_rvv, zve32x, zba 11: li a7, 3 blt a3, a7, 12f @@ -341,7 +341,7 @@ func ff_put_rv40_chroma_mc_rvv, zve32x do_chroma_mc put 0 endfunc -func ff_avg_rv40_chroma_mc_rvv, zve32x +func ff_avg_rv40_chroma_mc_rvv, zve32x, zba 21: li a7, 3 blt a3, a7, 22f diff --git a/libavcodec/riscv/sbrdsp_rvv.S b/libavcodec/riscv/sbrdsp_rvv.S index 331b88022c..7c90a8addf 100644 --- a/libavcodec/riscv/sbrdsp_rvv.S +++ b/libavcodec/riscv/sbrdsp_rvv.S @@ -20,7 +20,7 @@ #include "libavutil/riscv/asm.S" -func ff_sbr_sum64x5_rvv, zve32f +func ff_sbr_sum64x5_rvv, zve32f, zba li a5, 64 addi a1, a0, 64 * 4 addi a2, a0, 128 * 4 @@ -49,7 +49,7 @@ func ff_sbr_sum64x5_rvv, zve32f ret endfunc -func ff_sbr_sum_square_rvv, zve32f +func ff_sbr_sum_square_rvv, zve32f, zba vsetvli t0, zero, e32, m8, ta, ma slli a1, a1, 1 vmv.v.x v8, zero @@ -157,7 +157,7 @@ func ff_sbr_autocorrelate_rvv, zve32f ret endfunc -func ff_sbr_hf_gen_rvv, zve32f +func ff_sbr_hf_gen_rvv, zve32f, zba NOHWF fmv.w.x fa0, a4 NOHWF mv a4, a5 NOHWF mv a5, a6 @@ -207,7 +207,7 @@ NOHWF mv a5, a6 ret endfunc -func ff_sbr_hf_g_filt_rvv, zve32f +func ff_sbr_hf_g_filt_rvv, zve32f, zba li t1, 40 * 2 * 4 sh3add a1, a4, a1 1: @@ -272,16 +272,16 @@ endfunc ret .endm -func ff_sbr_hf_apply_noise_0_rvv, zve32f, zbb +func ff_sbr_hf_apply_noise_0_rvv, zve32f, b hf_apply_noise 0 endfunc -func ff_sbr_hf_apply_noise_3_rvv, zve32f, zbb +func ff_sbr_hf_apply_noise_3_rvv, zve32f, b not a4, a4 // invert parity of kx // fall through endfunc -func ff_sbr_hf_apply_noise_1_rvv, zve32f, zbb +func ff_sbr_hf_apply_noise_1_rvv, zve32f, b vsetvli t0, zero, e32, m4, ta, ma vid.v v4 vxor.vx v4, v4, a4 @@ -289,6 +289,6 @@ func ff_sbr_hf_apply_noise_1_rvv, zve32f, zbb hf_apply_noise 1 endfunc -func ff_sbr_hf_apply_noise_2_rvv, zve32f, zbb +func ff_sbr_hf_apply_noise_2_rvv, zve32f, b hf_apply_noise 2 endfunc diff --git a/libavcodec/riscv/svqenc_rvv.S b/libavcodec/riscv/svqenc_rvv.S index cfc27154dd..8b7a8b0400 100644 --- a/libavcodec/riscv/svqenc_rvv.S +++ b/libavcodec/riscv/svqenc_rvv.S @@ -20,7 +20,7 @@ #include "libavutil/riscv/asm.S" -func ff_ssd_int8_vs_int16_rvv, zve32x +func ff_ssd_int8_vs_int16_rvv, zve32x, zba vsetvli t0, zero, e32, m8, ta, ma vmv.v.x v24, zero 1: diff --git a/libavcodec/riscv/takdsp_rvv.S b/libavcodec/riscv/takdsp_rvv.S index fa942a3be6..f3a230ccec 100644 --- a/libavcodec/riscv/takdsp_rvv.S +++ b/libavcodec/riscv/takdsp_rvv.S @@ -21,7 +21,7 @@ #include "libavutil/riscv/asm.S" -func ff_decorrelate_ls_rvv, zve32x +func ff_decorrelate_ls_rvv, zve32x, zba 1: vsetvli t0, a2, e32, m8, ta, ma sub a2, a2, t0 @@ -35,7 +35,7 @@ func ff_decorrelate_ls_rvv, zve32x ret endfunc -func ff_decorrelate_sr_rvv, zve32x +func ff_decorrelate_sr_rvv, zve32x, zba 1: vsetvli t0, a2, e32, m8, ta, ma vle32.v v0, (a0) @@ -49,7 +49,7 @@ func ff_decorrelate_sr_rvv, zve32x ret endfunc -func ff_decorrelate_sm_rvv, zve32x +func ff_decorrelate_sm_rvv, zve32x, zba 1: vsetvli t0, a2, e32, m8, ta, ma vle32.v v8, (a1) @@ -67,7 +67,7 @@ func ff_decorrelate_sm_rvv, zve32x ret endfunc -func ff_decorrelate_sf_rvv, zve32x +func ff_decorrelate_sf_rvv, zve32x, zba csrwi vxrm, 0 1: vsetvli t0, a2, e32, m8, ta, ma diff --git a/libavcodec/riscv/utvideodsp_rvv.S b/libavcodec/riscv/utvideodsp_rvv.S index fa70d0eb34..5e833eeb3c 100644 --- a/libavcodec/riscv/utvideodsp_rvv.S +++ b/libavcodec/riscv/utvideodsp_rvv.S @@ -20,7 +20,7 @@ #include "libavutil/riscv/asm.S" -func ff_restore_rgb_planes_rvv, zve32x +func ff_restore_rgb_planes_rvv, zve32x, zba li t1, -0x80 sub a3, a3, a6 sub a4, a4, a6 @@ -52,7 +52,7 @@ func ff_restore_rgb_planes_rvv, zve32x ret endfunc -func ff_restore_rgb_planes10_rvv, zve32x +func ff_restore_rgb_planes10_rvv, zve32x, zba li t1, -0x200 li t2, 0x3FF sub a3, a3, a6 diff --git a/libavcodec/riscv/vc1dsp_rvv.S b/libavcodec/riscv/vc1dsp_rvv.S index d8b62579aa..aede87ccc0 100644 --- a/libavcodec/riscv/vc1dsp_rvv.S +++ b/libavcodec/riscv/vc1dsp_rvv.S @@ -21,7 +21,7 @@ #include "libavutil/riscv/asm.S" -func ff_vc1_inv_trans_8x8_dc_rvv, zve64x +func ff_vc1_inv_trans_8x8_dc_rvv, zve64x, zba lh t2, (a2) vsetivli zero, 8, e8, mf2, ta, ma vlse64.v v0, (a0), a1 @@ -43,7 +43,7 @@ func ff_vc1_inv_trans_8x8_dc_rvv, zve64x ret endfunc -func ff_vc1_inv_trans_4x8_dc_rvv, zve32x +func ff_vc1_inv_trans_4x8_dc_rvv, zve32x, zba lh t2, (a2) vsetivli zero, 8, e8, mf2, ta, ma vlse32.v v0, (a0), a1 @@ -67,7 +67,7 @@ func ff_vc1_inv_trans_4x8_dc_rvv, zve32x ret endfunc -func ff_vc1_inv_trans_8x4_dc_rvv, zve64x +func ff_vc1_inv_trans_8x4_dc_rvv, zve64x, zba lh t2, (a2) vsetivli zero, 4, e8, mf4, ta, ma vlse64.v v0, (a0), a1 diff --git a/libavcodec/riscv/vorbisdsp_rvv.S b/libavcodec/riscv/vorbisdsp_rvv.S index 81a6c62a65..d136188d2e 100644 --- a/libavcodec/riscv/vorbisdsp_rvv.S +++ b/libavcodec/riscv/vorbisdsp_rvv.S @@ -20,7 +20,7 @@ #include "libavutil/riscv/asm.S" -func ff_vorbis_inverse_coupling_rvv, zve32f +func ff_vorbis_inverse_coupling_rvv, zve32f, zba fmv.w.x ft0, zero 1: vsetvli t0, a2, e32, m4, ta, ma diff --git a/libavcodec/riscv/vp7dsp_rvv.S b/libavcodec/riscv/vp7dsp_rvv.S index 856b0e8c96..bfcc220273 100644 --- a/libavcodec/riscv/vp7dsp_rvv.S +++ b/libavcodec/riscv/vp7dsp_rvv.S @@ -21,7 +21,7 @@ #include "libavutil/riscv/asm.S" #if __riscv_xlen >= 64 -func ff_vp7_luma_dc_wht_rvv, zve32x +func ff_vp7_luma_dc_wht_rvv, zve32x, zba li a2, 4 * 16 * 2 li a7, 16 * 2 jal t0, 1f diff --git a/libavcodec/riscv/vp8dsp_rvv.S b/libavcodec/riscv/vp8dsp_rvv.S index d366748a0a..4675f4c76b 100644 --- a/libavcodec/riscv/vp8dsp_rvv.S +++ b/libavcodec/riscv/vp8dsp_rvv.S @@ -440,7 +440,7 @@ endconst .endm .macro epel len size type -func ff_put_vp8_epel\len\()_\type\()\size\()_rvv, zve32x +func ff_put_vp8_epel\len\()_\type\()\size\()_rvv, zve32x, zba epel_filter \size \type t vsetvlstatic8 \len 1: @@ -455,7 +455,7 @@ endfunc .endm .macro epel_hv len hsize vsize -func ff_put_vp8_epel\len\()_h\hsize\()v\vsize\()_rvv, zve32x +func ff_put_vp8_epel\len\()_h\hsize\()v\vsize\()_rvv, zve32x, zba #if __riscv_xlen == 64 addi sp, sp, -48 .irp n,0,1,2,3,4,5 diff --git a/libavcodec/riscv/vp9_intra_rvi.S b/libavcodec/riscv/vp9_intra_rvi.S index 16b6bdb25a..dadd4be194 100644 --- a/libavcodec/riscv/vp9_intra_rvi.S +++ b/libavcodec/riscv/vp9_intra_rvi.S @@ -21,7 +21,7 @@ #include "libavutil/riscv/asm.S" #if __riscv_xlen >= 64 -func ff_v_32x32_rvi +func ff_v_32x32_rvi, zba ld t0, (a3) ld t1, 8(a3) ld t2, 16(a3) @@ -42,7 +42,7 @@ func ff_v_32x32_rvi ret endfunc -func ff_v_16x16_rvi +func ff_v_16x16_rvi, zba ld t0, (a3) ld t1, 8(a3) .rept 8 @@ -57,7 +57,7 @@ func ff_v_16x16_rvi ret endfunc -func ff_v_8x8_rvi +func ff_v_8x8_rvi, zba ld t0, (a3) .rept 4 add a7, a0, a1