diff mbox series

[FFmpeg-devel,3/6] lavc/riscv: add forward-edge CFI landing pads

Message ID 20240722193818.50824-3-remi@remlab.net
State New
Headers show
Series [FFmpeg-devel,1/6] lavu/riscv: assembly for zicfilp LPAD | expand

Checks

Context Check Description
andriy/make_x86 success Make finished
andriy/make_fate_x86 success Make fate finished

Commit Message

Rémi Denis-Courmont July 22, 2024, 7:38 p.m. UTC
---
 libavcodec/riscv/aacencdsp_rvv.S   |  2 ++
 libavcodec/riscv/aacpsdsp_rvv.S    |  5 +++++
 libavcodec/riscv/ac3dsp_rvb.S      |  2 ++
 libavcodec/riscv/ac3dsp_rvv.S      |  4 ++++
 libavcodec/riscv/ac3dsp_rvvb.S     |  1 +
 libavcodec/riscv/alacdsp_rvv.S     |  3 +++
 libavcodec/riscv/audiodsp_rvf.S    |  1 +
 libavcodec/riscv/audiodsp_rvv.S    |  2 ++
 libavcodec/riscv/blockdsp_rvv.S    |  4 ++++
 libavcodec/riscv/bswapdsp_rvb.S    |  1 +
 libavcodec/riscv/bswapdsp_rvv.S    |  1 +
 libavcodec/riscv/exrdsp_rvv.S      |  1 +
 libavcodec/riscv/flacdsp_rvv.S     | 22 ++++++++++++++++++++--
 libavcodec/riscv/fmtconvert_rvv.S  |  2 ++
 libavcodec/riscv/g722dsp_rvv.S     |  1 +
 libavcodec/riscv/h263dsp_rvv.S     |  2 ++
 libavcodec/riscv/h264_mc_chroma.S  |  8 ++++++++
 libavcodec/riscv/h264addpx_rvv.S   |  4 ++++
 libavcodec/riscv/h264dsp_rvv.S     |  5 +++++
 libavcodec/riscv/h264idct_rvv.S    | 16 ++++++++++++++++
 libavcodec/riscv/huffyuvdsp_rvv.S  |  2 ++
 libavcodec/riscv/idctdsp_rvv.S     |  3 +++
 libavcodec/riscv/jpeg2000dsp_rvv.S |  2 ++
 libavcodec/riscv/llauddsp_rvv.S    |  2 ++
 libavcodec/riscv/llviddsp_rvv.S    |  1 +
 libavcodec/riscv/llvidencdsp_rvv.S |  1 +
 libavcodec/riscv/lpc_rvv.S         |  2 ++
 libavcodec/riscv/me_cmp_rvv.S      | 17 +++++++++++++++++
 libavcodec/riscv/opusdsp_rvv.S     |  1 +
 libavcodec/riscv/pixblockdsp_rvi.S |  2 ++
 libavcodec/riscv/pixblockdsp_rvv.S |  4 ++++
 libavcodec/riscv/rv34dsp_rvv.S     |  2 ++
 libavcodec/riscv/rv40dsp_rvv.S     |  4 ++++
 libavcodec/riscv/sbrdsp_rvv.S      | 13 +++++++++++--
 libavcodec/riscv/startcode_rvb.S   |  1 +
 libavcodec/riscv/startcode_rvv.S   |  1 +
 libavcodec/riscv/svqenc_rvv.S      |  1 +
 libavcodec/riscv/takdsp_rvv.S      |  4 ++++
 libavcodec/riscv/utvideodsp_rvv.S  |  2 ++
 libavcodec/riscv/vc1dsp_rvi.S      |  2 ++
 libavcodec/riscv/vc1dsp_rvv.S      | 11 +++++++++++
 libavcodec/riscv/vorbisdsp_rvv.S   |  1 +
 libavcodec/riscv/vp7dsp_rvv.S      |  3 +++
 libavcodec/riscv/vp8dsp_rvi.S      |  3 +++
 libavcodec/riscv/vp8dsp_rvv.S      | 12 ++++++++++++
 libavcodec/riscv/vp9_intra_rvi.S   |  3 +++
 libavcodec/riscv/vp9_intra_rvv.S   |  7 +++++++
 libavcodec/riscv/vp9_mc_rvi.S      |  5 +++++
 libavcodec/riscv/vp9_mc_rvv.S      |  1 +
 49 files changed, 196 insertions(+), 4 deletions(-)
diff mbox series

Patch

diff --git a/libavcodec/riscv/aacencdsp_rvv.S b/libavcodec/riscv/aacencdsp_rvv.S
index 21e66a77ae..e9e776dc9b 100644
--- a/libavcodec/riscv/aacencdsp_rvv.S
+++ b/libavcodec/riscv/aacencdsp_rvv.S
@@ -22,6 +22,7 @@ 
 #include "libavutil/riscv/asm.S"
 
 func ff_abs_pow34_rvv, zve32f
+        lpad    0
 1:
         vsetvli      t0, a2, e32, m8, ta, ma
         sub          a2, a2, t0
@@ -39,6 +40,7 @@  func ff_abs_pow34_rvv, zve32f
 endfunc
 
 func ff_aac_quant_bands_rvv, zve32f
+        lpad    0
 NOHWF   fmv.w.x     fa0, a6
 NOHWF   fmv.w.x     fa1, a7
         fcvt.s.w    ft0, a5
diff --git a/libavcodec/riscv/aacpsdsp_rvv.S b/libavcodec/riscv/aacpsdsp_rvv.S
index 2d6858688a..6d01bfb734 100644
--- a/libavcodec/riscv/aacpsdsp_rvv.S
+++ b/libavcodec/riscv/aacpsdsp_rvv.S
@@ -21,6 +21,7 @@ 
 #include "libavutil/riscv/asm.S"
 
 func ff_ps_add_squares_rvv, zve64f
+        lpad    0
         li          t1, 32
 1:
         vsetvli     t0, a2, e32, m4, ta, ma
@@ -40,6 +41,7 @@  func ff_ps_add_squares_rvv, zve64f
 endfunc
 
 func ff_ps_mul_pair_single_rvv, zve32f
+        lpad    0
 1:
         vsetvli     t0, a3, e32, m4, ta, ma
         vlseg2e32.v v24, (a1)
@@ -57,6 +59,7 @@  func ff_ps_mul_pair_single_rvv, zve32f
 endfunc
 
 func ff_ps_hybrid_analysis_rvv, zve32f
+        lpad    0
         /* We need 26 FP registers, for 20 scratch ones. Spill fs0-fs5. */
         addi    sp, sp, -48
         .irp n, 0, 1, 2, 3, 4, 5
@@ -135,6 +138,7 @@  NOHWD   flw     fs\n, (4 * \n)(sp)
 endfunc
 
 func ff_ps_hybrid_analysis_ileave_rvv, zve32x /* no needs for zve32f here */
+        lpad    0
         slli        t0, a2, 5 + 1 + 2 // ctz(32 * 2 * 4)
         sh2add      a1, a2, a1
         add         a0, a0, t0
@@ -208,6 +212,7 @@  func ff_ps_hybrid_synthesis_deint_rvv, zve64x
 endfunc
 
 func ff_ps_stereo_interpolate_rvv, zve32f, zbb
+        lpad    0
         vsetvli      t0, zero, e32, m2, ta, ma
         vid.v        v24
         flw          ft0,   (a2)
diff --git a/libavcodec/riscv/ac3dsp_rvb.S b/libavcodec/riscv/ac3dsp_rvb.S
index 0ca56466e1..a3c5187cfe 100644
--- a/libavcodec/riscv/ac3dsp_rvb.S
+++ b/libavcodec/riscv/ac3dsp_rvb.S
@@ -22,6 +22,7 @@ 
 #include "libavutil/riscv/asm.S"
 
 func ff_ac3_exponent_min_rvb, zbb
+        lpad    0
         beqz    a1, 3f
 1:
         addi    a2, a2, -1
@@ -43,6 +44,7 @@  func ff_ac3_exponent_min_rvb, zbb
 endfunc
 
 func ff_extract_exponents_rvb, zbb
+        lpad    0
 1:
         lw       t0, (a1)
         addi     a0, a0, 1
diff --git a/libavcodec/riscv/ac3dsp_rvv.S b/libavcodec/riscv/ac3dsp_rvv.S
index 1b5f67a9ec..0ca1332bf1 100644
--- a/libavcodec/riscv/ac3dsp_rvv.S
+++ b/libavcodec/riscv/ac3dsp_rvv.S
@@ -22,6 +22,7 @@ 
 #include "libavutil/riscv/asm.S"
 
 func ff_ac3_exponent_min_rvv, zve32x
+        lpad    0
         beqz     a1, 3f
 1:
         vsetvli  t2, a2, e8, m8, ta, ma
@@ -44,6 +45,7 @@  func ff_ac3_exponent_min_rvv, zve32x
 endfunc
 
 func ff_float_to_fixed24_rvv, zve32f
+        lpad    0
         li            t1, 1 << 24
         fcvt.s.w      f0, t1
 1:
@@ -62,6 +64,7 @@  endfunc
 
 #if __riscv_xlen >= 64
 func ff_sum_square_butterfly_int32_rvv, zve64x
+        lpad    0
         vsetvli    t0, zero, e64, m8, ta, ma
         vmv.v.x    v0, zero
         vmv.v.x    v8, zero
@@ -102,6 +105,7 @@  endfunc
 #endif
 
 func ff_sum_square_butterfly_float_rvv, zve32f
+        lpad    0
         vsetvli     t0, zero, e32, m8, ta, ma
         vmv.v.x     v0, zero
         vmv.v.x     v8, zero
diff --git a/libavcodec/riscv/ac3dsp_rvvb.S b/libavcodec/riscv/ac3dsp_rvvb.S
index 64766b56be..031e38affd 100644
--- a/libavcodec/riscv/ac3dsp_rvvb.S
+++ b/libavcodec/riscv/ac3dsp_rvvb.S
@@ -22,6 +22,7 @@ 
 #include "libavutil/riscv/asm.S"
 
 func ff_extract_exponents_rvvb, zve32x, zvbb
+        lpad    0
 1:
         vsetvli     t0, a2, e32, m8, ta, ma
         vle32.v     v8, (a1)
diff --git a/libavcodec/riscv/alacdsp_rvv.S b/libavcodec/riscv/alacdsp_rvv.S
index 8efb04e0c8..2a942bad8c 100644
--- a/libavcodec/riscv/alacdsp_rvv.S
+++ b/libavcodec/riscv/alacdsp_rvv.S
@@ -22,6 +22,7 @@ 
 
 #if (__riscv_xlen == 64)
 func ff_alac_decorrelate_stereo_rvv, zve32x
+        lpad    0
         ld          a4, 8(a0)
         ld          a0, 0(a0)
 1:
@@ -44,6 +45,7 @@  func ff_alac_decorrelate_stereo_rvv, zve32x
 endfunc
 
 func ff_alac_append_extra_bits_mono_rvv, zve32x
+        lpad    0
         ld      a0, (a0)
         ld      a1, (a1)
 1:
@@ -62,6 +64,7 @@  func ff_alac_append_extra_bits_mono_rvv, zve32x
 endfunc
 
 func ff_alac_append_extra_bits_stereo_rvv, zve32x
+        lpad    0
         ld      a6, 8(a0)
         ld      a0,  (a0)
         ld      a7, 8(a1)
diff --git a/libavcodec/riscv/audiodsp_rvf.S b/libavcodec/riscv/audiodsp_rvf.S
index 2ec8a11691..97aa930ab5 100644
--- a/libavcodec/riscv/audiodsp_rvf.S
+++ b/libavcodec/riscv/audiodsp_rvf.S
@@ -21,6 +21,7 @@ 
 #include "libavutil/riscv/asm.S"
 
 func ff_vector_clipf_rvf, f
+        lpad    0
 NOHWF   fmv.w.x fa0, a3
 NOHWF   fmv.w.x fa1, a4
 1:
diff --git a/libavcodec/riscv/audiodsp_rvv.S b/libavcodec/riscv/audiodsp_rvv.S
index f0b23bab5e..b749e9ed4a 100644
--- a/libavcodec/riscv/audiodsp_rvv.S
+++ b/libavcodec/riscv/audiodsp_rvv.S
@@ -21,6 +21,7 @@ 
 #include "libavutil/riscv/asm.S"
 
 func ff_scalarproduct_int16_rvv, zve32x
+        lpad    0
         vsetvli     t0, zero, e32, m8, ta, ma
         vmv.v.x     v8, zero
         vmv.s.x     v0, zero
@@ -56,6 +57,7 @@  func ff_vector_clip_int32_rvv, zve32x
 endfunc
 
 func ff_vector_clipf_rvv, zve32f
+        lpad    0
 NOHWF   fmv.w.x  fa0, a3
 NOHWF   fmv.w.x  fa1, a4
 1:
diff --git a/libavcodec/riscv/blockdsp_rvv.S b/libavcodec/riscv/blockdsp_rvv.S
index 18ab17da00..04da265417 100644
--- a/libavcodec/riscv/blockdsp_rvv.S
+++ b/libavcodec/riscv/blockdsp_rvv.S
@@ -21,6 +21,7 @@ 
 #include "libavutil/riscv/asm.S"
 
 func ff_clear_block_rvv, zve64x
+        lpad    0
         vsetivli      zero, 16, e64, m8, ta, ma
         vmv.v.i       v0, 0
         vse64.v       v0, (a0)
@@ -29,6 +30,7 @@  func ff_clear_block_rvv, zve64x
 endfunc
 
 func ff_clear_blocks_rvv, zve64x
+        lpad    0
         vsetivli      zero, 16, e64, m8, ta, ma
         vmv.v.i       v0, 0
 
@@ -42,6 +44,7 @@  func ff_clear_blocks_rvv, zve64x
 endfunc
 
 func ff_fill_block16_rvv, zve32x
+        lpad    0
         vsetivli      t0, 16, e8, m1, ta, ma
         vmv.v.x       v8, a1
 1:
@@ -54,6 +57,7 @@  func ff_fill_block16_rvv, zve32x
 endfunc
 
 func ff_fill_block8_rvv, zve64x
+        lpad    0
         vsetvli       t0, zero, e8, m4, ta, ma
         vmv.v.x       v8, a1
         vsetvli       t0, a3, e64, m4, ta, ma
diff --git a/libavcodec/riscv/bswapdsp_rvb.S b/libavcodec/riscv/bswapdsp_rvb.S
index 0786bd3f36..82e44ec84c 100644
--- a/libavcodec/riscv/bswapdsp_rvb.S
+++ b/libavcodec/riscv/bswapdsp_rvb.S
@@ -24,6 +24,7 @@ 
 
 #if (__riscv_xlen >= 64)
 func ff_bswap32_buf_rvb, zbb
+        lpad    0
         bswap32_rvb a0, a1, a2
 endfunc
 #endif
diff --git a/libavcodec/riscv/bswapdsp_rvv.S b/libavcodec/riscv/bswapdsp_rvv.S
index b37fe26255..af55820fe4 100644
--- a/libavcodec/riscv/bswapdsp_rvv.S
+++ b/libavcodec/riscv/bswapdsp_rvv.S
@@ -22,6 +22,7 @@ 
 #include "libavutil/riscv/asm.S"
 
 func ff_bswap16_buf_rvv, zve32x
+        lpad    0
 1:
         vsetvli t0, a2, e16, m8, ta, ma
         vle16.v v8, (a1)
diff --git a/libavcodec/riscv/exrdsp_rvv.S b/libavcodec/riscv/exrdsp_rvv.S
index f4a35f58ff..f087fce0db 100644
--- a/libavcodec/riscv/exrdsp_rvv.S
+++ b/libavcodec/riscv/exrdsp_rvv.S
@@ -21,6 +21,7 @@ 
 #include "libavutil/riscv/asm.S"
 
 func ff_reorder_pixels_rvv, zve32x
+        lpad    0
         srai    a2, a2, 1
         add     t1, a1, a2
 1:
diff --git a/libavcodec/riscv/flacdsp_rvv.S b/libavcodec/riscv/flacdsp_rvv.S
index 1724aee9d7..0829042bfa 100644
--- a/libavcodec/riscv/flacdsp_rvv.S
+++ b/libavcodec/riscv/flacdsp_rvv.S
@@ -21,6 +21,7 @@ 
 #include "libavutil/riscv/asm.S"
 
 func ff_flac_lpc16_rvv, zve32x, zbb
+        lpad    0
         vtype_vli t0, a2, t2, e32, ta, ma
         vsetvl  zero, a2, t0
         vle32.v v8, (a1)
@@ -46,6 +47,7 @@  endfunc
 
 #if (__riscv_xlen == 64)
 func ff_flac_lpc32_rvv, zve64x
+        lpad    0
         addi    t2, a2, -16
         ble     t2, zero, ff_flac_lpc32_rvv_simple
         vsetivli zero, 1, e64, m1, ta, ma
@@ -77,6 +79,7 @@  func ff_flac_lpc32_rvv, zve64x
 endfunc
 
 func ff_flac_lpc32_rvv_simple, zve64x, zbb
+        lpad    0
         vtype_vli t3, a2, t1, e64, ta, ma
         vntypei t2, t3
         vsetvl  zero, a2, t3 // e64
@@ -105,6 +108,7 @@  func ff_flac_lpc32_rvv_simple, zve64x, zbb
 endfunc
 
 func ff_flac_lpc33_rvv, zve64x, zbb
+        lpad    0
         vtype_vli t0, a3, t1, e64, ta, ma
         vsetvl  zero, a3, t0
         vmv.s.x v0, zero
@@ -133,6 +137,7 @@  endfunc
 #endif
 
 func ff_flac_wasted32_rvv, zve32x
+        lpad    0
 1:
         vsetvli t0, a2, e32, m8, ta, ma
         vle32.v v8, (a0)
@@ -146,6 +151,7 @@  func ff_flac_wasted32_rvv, zve32x
 endfunc
 
 func ff_flac_wasted33_rvv, zve64x
+        lpad    0
         srli         t0, a2, 5
         li           t1, 1
         bnez         t0, 2f
@@ -178,6 +184,7 @@  endfunc
 
 #if (__riscv_xlen == 64)
 func ff_flac_decorrelate_indep2_16_rvv, zve32x
+        lpad    0
         ld      a0,  (a0)
         ld      a2, 8(a1)
         ld      a1,  (a1)
@@ -201,6 +208,7 @@  func ff_flac_decorrelate_indep2_16_rvv, zve32x
 endfunc
 
 func ff_flac_decorrelate_indep4_16_rvv, zve32x
+        lpad    0
         ld      a0,   (a0)
         ld      a2,  8(a1)
         ld      t1, 16(a1)
@@ -234,6 +242,7 @@  func ff_flac_decorrelate_indep4_16_rvv, zve32x
 endfunc
 
 func ff_flac_decorrelate_indep6_16_rvv, zve32x
+        lpad    0
         ld      a0,   (a0)
         ld      a2,  8(a1)
         ld      t1, 16(a1)
@@ -279,6 +288,7 @@  func ff_flac_decorrelate_indep6_16_rvv, zve32x
 endfunc
 
 func ff_flac_decorrelate_indep8_16_rvv, zve32x
+        lpad    0
         ld      a0,   (a0)
         ld      a2,  8(a1)
         ld      t1, 16(a1)
@@ -332,9 +342,8 @@  func ff_flac_decorrelate_indep8_16_rvv, zve32x
         ret
 endfunc
 
-
-
 func ff_flac_decorrelate_ls_16_rvv, zve32x
+        lpad    0
         ld      a0,  (a0)
         ld      a2, 8(a1)
         ld      a1,  (a1)
@@ -359,6 +368,7 @@  func ff_flac_decorrelate_ls_16_rvv, zve32x
 endfunc
 
 func ff_flac_decorrelate_rs_16_rvv, zve32x
+        lpad    0
         ld      a0,  (a0)
         ld      a2, 8(a1)
         ld      a1,  (a1)
@@ -383,6 +393,7 @@  func ff_flac_decorrelate_rs_16_rvv, zve32x
 endfunc
 
 func ff_flac_decorrelate_ms_16_rvv, zve32x
+        lpad    0
         ld      a0,  (a0)
         ld      a2, 8(a1)
         ld      a1,  (a1)
@@ -409,6 +420,7 @@  func ff_flac_decorrelate_ms_16_rvv, zve32x
 endfunc
 
 func ff_flac_decorrelate_indep2_32_rvv, zve32x
+        lpad    0
         ld      a0,  (a0)
         ld      a2, 8(a1)
         ld      a1,  (a1)
@@ -429,6 +441,7 @@  func ff_flac_decorrelate_indep2_32_rvv, zve32x
 endfunc
 
 func ff_flac_decorrelate_indep4_32_rvv, zve32x
+        lpad    0
         ld      a0,   (a0)
         ld      a2,  8(a1)
         ld      t1, 16(a1)
@@ -458,6 +471,7 @@  func ff_flac_decorrelate_indep4_32_rvv, zve32x
 endfunc
 
 func ff_flac_decorrelate_indep6_32_rvv, zve32x
+        lpad    0
         ld      a0,   (a0)
         ld      a2,  8(a1)
         ld      t1, 16(a1)
@@ -496,6 +510,7 @@  func ff_flac_decorrelate_indep6_32_rvv, zve32x
 endfunc
 
 func ff_flac_decorrelate_indep8_32_rvv, zve32x
+        lpad    0
         ld      a0,   (a0)
         ld      a2,  8(a1)
         ld      t1, 16(a1)
@@ -541,6 +556,7 @@  func ff_flac_decorrelate_indep8_32_rvv, zve32x
 endfunc
 
 func ff_flac_decorrelate_ls_32_rvv, zve32x
+        lpad    0
         ld      a0,  (a0)
         ld      a2, 8(a1)
         ld      a1,  (a1)
@@ -562,6 +578,7 @@  func ff_flac_decorrelate_ls_32_rvv, zve32x
 endfunc
 
 func ff_flac_decorrelate_rs_32_rvv, zve32x
+        lpad    0
         ld      a0,  (a0)
         ld      a2, 8(a1)
         ld      a1,  (a1)
@@ -583,6 +600,7 @@  func ff_flac_decorrelate_rs_32_rvv, zve32x
 endfunc
 
 func ff_flac_decorrelate_ms_32_rvv, zve32x
+        lpad    0
         ld      a0,  (a0)
         ld      a2, 8(a1)
         ld      a1,  (a1)
diff --git a/libavcodec/riscv/fmtconvert_rvv.S b/libavcodec/riscv/fmtconvert_rvv.S
index d0e2f106d5..c356196c97 100644
--- a/libavcodec/riscv/fmtconvert_rvv.S
+++ b/libavcodec/riscv/fmtconvert_rvv.S
@@ -21,6 +21,7 @@ 
 #include "libavutil/riscv/asm.S"
 
 func ff_int32_to_float_fmul_scalar_rvv, zve32f
+        lpad    0
 NOHWF   fmv.w.x     fa0, a2
 NOHWF   mv          a2, a3
 1:
@@ -38,6 +39,7 @@  NOHWF   mv          a2, a3
 endfunc
 
 func ff_int32_to_float_fmul_array8_rvv, zve32f
+        lpad    0
         srai        a4, a4, 3
 
 1:      vsetvli     t0, a4, e32, m1, ta, ma
diff --git a/libavcodec/riscv/g722dsp_rvv.S b/libavcodec/riscv/g722dsp_rvv.S
index 981d5cecd8..6ceb70fde1 100644
--- a/libavcodec/riscv/g722dsp_rvv.S
+++ b/libavcodec/riscv/g722dsp_rvv.S
@@ -21,6 +21,7 @@ 
 #include "libavutil/riscv/asm.S"
 
 func ff_g722_apply_qmf_rvv, zve32x
+        lpad    0
         lla         t0, qmf_coeffs
         vsetivli    zero, 12, e16, m2, ta, ma
         vlseg2e16.v v28, (a0)
diff --git a/libavcodec/riscv/h263dsp_rvv.S b/libavcodec/riscv/h263dsp_rvv.S
index 97503d527c..c78483926a 100644
--- a/libavcodec/riscv/h263dsp_rvv.S
+++ b/libavcodec/riscv/h263dsp_rvv.S
@@ -23,6 +23,7 @@ 
         .option push
         .option norelax
 func ff_h263_h_loop_filter_rvv, zve32x
+        lpad    0
         addi        a0, a0, -2
         vsetivli    zero, 8, e8, mf2, ta, ma
         vlsseg4e8.v v8, (a0), a1
@@ -83,6 +84,7 @@  endfunc
         .option pop
 
 func ff_h263_v_loop_filter_rvv, zve32x
+        lpad    0
         sub         a4, a0, a1
         vsetivli    zero, 8, e8, mf2, ta, ma
         vle8.v      v10, (a0)
diff --git a/libavcodec/riscv/h264_mc_chroma.S b/libavcodec/riscv/h264_mc_chroma.S
index ce99bda44d..22ac4ef20b 100644
--- a/libavcodec/riscv/h264_mc_chroma.S
+++ b/libavcodec/riscv/h264_mc_chroma.S
@@ -325,6 +325,7 @@ 
         ret
 .endm
 
+        .variant_cc h264_put_chroma_mc_rvv
 func h264_put_chroma_mc_rvv, zve32x
 11:
         li      a7, 3
@@ -334,6 +335,7 @@  func h264_put_chroma_mc_rvv, zve32x
         do_chroma_mc put 0
 endfunc
 
+        .variant_cc h264_avg_chroma_mc_rvv
 func h264_avg_chroma_mc_rvv, zve32x
 21:
         li      a7, 3
@@ -344,31 +346,37 @@  func h264_avg_chroma_mc_rvv, zve32x
 endfunc
 
 func h264_put_chroma_mc8_rvv, zve32x
+        lpad    0
         li      t6, 8
         j       11b
 endfunc
 
 func h264_put_chroma_mc4_rvv, zve32x
+        lpad    0
         li      t6, 4
         j       11b
 endfunc
 
 func h264_put_chroma_mc2_rvv, zve32x
+        lpad    0
         li      t6, 2
         j       11b
 endfunc
 
 func h264_avg_chroma_mc8_rvv, zve32x
+        lpad    0
         li      t6, 8
         j       21b
 endfunc
 
 func h264_avg_chroma_mc4_rvv, zve32x
+        lpad    0
         li      t6, 4
         j       21b
 endfunc
 
 func h264_avg_chroma_mc2_rvv, zve32x
+        lpad    0
         li      t6, 2
         j       21b
 endfunc
diff --git a/libavcodec/riscv/h264addpx_rvv.S b/libavcodec/riscv/h264addpx_rvv.S
index 3c0700d1d9..82739881d9 100644
--- a/libavcodec/riscv/h264addpx_rvv.S
+++ b/libavcodec/riscv/h264addpx_rvv.S
@@ -37,6 +37,7 @@ 
         .endm
 
 func ff_h264_add_pixels4_8_rvv, zve32x
+        lpad    0
         vsetivli        zero, 4, e8, mf4, ta, ma
         vlse32.v        v8, (a0), a2
         vsetivli        zero, 4 * 4, e8, m1, ta, ma
@@ -54,6 +55,7 @@  func ff_h264_add_pixels4_8_rvv, zve32x
 endfunc
 
 func ff_h264_add_pixels4_16_rvv, zve64x
+        lpad    0
         vsetivli        zero, 4, e16, mf2, ta, ma
         vlse64.v        v8, (a0), a2
         vsetivli        zero, 4 * 4, e16, m2, ta, ma
@@ -71,6 +73,7 @@  func ff_h264_add_pixels4_16_rvv, zve64x
 endfunc
 
 func ff_h264_add_pixels8_8_rvv, zve64x
+        lpad    0
         li      t0, 8 * 8
         vsetivli        zero, 8, e8, mf2, ta, ma
         vlse64.v        v8, (a0), a2
@@ -89,6 +92,7 @@  func ff_h264_add_pixels8_8_rvv, zve64x
 endfunc
 
 func ff_h264_add_pixels8_16_rvv, zve32x
+        lpad    0
         li      t0, 8
         vsetivli    zero, 8, e16, m1, ta, ma
 1:
diff --git a/libavcodec/riscv/h264dsp_rvv.S b/libavcodec/riscv/h264dsp_rvv.S
index ed6a16a9c4..a38bf7ef1d 100644
--- a/libavcodec/riscv/h264dsp_rvv.S
+++ b/libavcodec/riscv/h264dsp_rvv.S
@@ -164,6 +164,7 @@  endfunc
 
 .irp    w, 16, 8, 4, 2
 func ff_h264_weight_pixels\w\()_8_rvv, zve32x
+        lpad    0
         li      a6, \w
         .if     \w == 16
         j       ff_h264_weight_pixels_simple_8_rvv
@@ -173,6 +174,7 @@  func ff_h264_weight_pixels\w\()_8_rvv, zve32x
 endfunc
 
 func ff_h264_biweight_pixels\w\()_8_rvv, zve32x
+        lpad    0
         li      t6, \w
         .if     \w == 16
         j       ff_h264_biweight_pixels_simple_8_rvv
@@ -272,6 +274,7 @@  func ff_h264_loop_filter_luma_8_rvv, zve32x
 endfunc
 
 func ff_h264_v_loop_filter_luma_8_rvv, zve32x
+        lpad    0
         vsetivli  zero, 4, e32, m1, ta, ma
         vle8.v    v4, (a4)
         li        t0, 0x01010101
@@ -299,6 +302,7 @@  func ff_h264_v_loop_filter_luma_8_rvv, zve32x
 endfunc
 
 func ff_h264_h_loop_filter_luma_8_rvv, zve32x
+        lpad    0
         vsetivli    zero, 4, e32, m1, ta, ma
         vle8.v      v4, (a4)
         li          t0, 0x01010101
@@ -313,6 +317,7 @@  func ff_h264_h_loop_filter_luma_8_rvv, zve32x
 endfunc
 
 func ff_h264_h_loop_filter_luma_mbaff_8_rvv, zve32x
+        lpad    0
         vsetivli    zero, 4, e16, mf2, ta, ma
         vle8.v      v4, (a4)
         li          t0, 0x0101
diff --git a/libavcodec/riscv/h264idct_rvv.S b/libavcodec/riscv/h264idct_rvv.S
index c74ea18c19..f01f7c2a90 100644
--- a/libavcodec/riscv/h264idct_rvv.S
+++ b/libavcodec/riscv/h264idct_rvv.S
@@ -55,6 +55,7 @@  func ff_h264_idct4_rvv, zve32x
 endfunc
 
 func ff_h264_idct_add_8_rvv, zve32x
+        lpad    0
         csrwi       vxrm, 0
 .Lidct_add4_8_rvv:
         vsetivli    zero, 4, e16, mf2, ta, ma
@@ -213,6 +214,7 @@  func ff_h264_idct8_rvv, zve32x
 endfunc
 
 func ff_h264_idct8_add_8_rvv, zve32x
+        lpad    0
         csrwi       vxrm, 0
 .Lidct8_add_8_rvv:
         vsetivli    zero, 8, e16, m1, ta, ma
@@ -405,11 +407,13 @@  endfunc
 
 .irp    depth, 9, 10, 12, 14
 func ff_h264_idct_add_\depth\()_rvv, zve32x
+        lpad    0
         li      a5, (1 << \depth) - 1
         j       ff_h264_idct_add_16_rvv
 endfunc
 
 func ff_h264_idct8_add_\depth\()_rvv, zve32x
+        lpad    0
         li      a5, (1 << \depth) - 1
         j       ff_h264_idct8_add_16_rvv
 endfunc
@@ -417,6 +421,7 @@  endfunc
 
 .macro idct_dc_add8 width
 func ff_h264_idct\width\()_dc_add_8_rvv, zve64x
+        lpad    0
 .if \width == 8
         vsetivli        zero, \width, e8, mf2, ta, ma
 .else
@@ -517,11 +522,13 @@  idct_dc_add 8
 
 .irp depth,9,10,12,14
 func ff_h264_idct4_dc_add_\depth\()_rvv, zve64x
+        lpad    0
         li              a5, (1 << \depth) - 1
         j               ff_h264_idct4_dc_add_16_rvv
 endfunc
 
 func ff_h264_idct8_dc_add_\depth\()_rvv, zve64x
+        lpad    0
         li              a5, (1 << \depth) - 1
         j               ff_h264_idct8_dc_add_16_rvv
 endfunc
@@ -535,6 +542,9 @@  endconst
 #if (__riscv_xlen == 64)
 .macro  idct4_adds type, depth
 func ff_h264_idct_add\type\()_\depth\()_rvv, zve32x
+.if \depth == 8
+        lpad    0
+.endif
         csrwi   vxrm, 0
         addi    sp, sp, -16
         lla     t0, ff_h264_scan8
@@ -612,6 +622,9 @@  idct4_adds 16, \depth
 idct4_adds 16intra, \depth
 
 func ff_h264_idct8_add4_\depth\()_rvv, zve32x
+.if \depth == 8
+        lpad    0
+.endif
         csrwi       vxrm, 0
         addi    sp, sp, -64
         lla     t0, ff_h264_scan8
@@ -688,16 +701,19 @@  endfunc
 
 .irp    depth, 9, 10, 12, 14
 func ff_h264_idct_add16_\depth\()_rvv, zve32x
+        lpad    0
         li      a5, (1 << \depth) - 1
         j       ff_h264_idct_add16_16_rvv
 endfunc
 
 func ff_h264_idct_add16intra_\depth\()_rvv, zve32x
+        lpad    0
         li      a5, (1 << \depth) - 1
         j       ff_h264_idct_add16intra_16_rvv
 endfunc
 
 func ff_h264_idct8_add4_\depth\()_rvv, zve32x
+        lpad    0
         li      a5, (1 << \depth) - 1
         j       ff_h264_idct8_add4_16_rvv
 endfunc
diff --git a/libavcodec/riscv/huffyuvdsp_rvv.S b/libavcodec/riscv/huffyuvdsp_rvv.S
index d334f5c6d0..5b8c2473b9 100644
--- a/libavcodec/riscv/huffyuvdsp_rvv.S
+++ b/libavcodec/riscv/huffyuvdsp_rvv.S
@@ -21,6 +21,7 @@ 
 #include "libavutil/riscv/asm.S"
 
 func ff_add_int16_rvv, zve32x
+        lpad    0
 1:
         vsetvli t0, a3, e16, m8, ta, ma
         vle16.v v16, (a0)
@@ -37,6 +38,7 @@  func ff_add_int16_rvv, zve32x
 endfunc
 
 func ff_add_hfyu_left_pred_bgr32_rvv, zve32x, zbb
+        lpad    0
         vtype_ivli t1, 4, e8, ta, ma
         li      t0, 4
         vsetvl  zero, t0, t1
diff --git a/libavcodec/riscv/idctdsp_rvv.S b/libavcodec/riscv/idctdsp_rvv.S
index e93e6b5e7a..de229a9ae7 100644
--- a/libavcodec/riscv/idctdsp_rvv.S
+++ b/libavcodec/riscv/idctdsp_rvv.S
@@ -21,6 +21,7 @@ 
 #include "libavutil/riscv/asm.S"
 
 func ff_put_pixels_clamped_rvv, zve64x
+        lpad    0
         li      t0, 8 * 8
         vsetvli zero, t0, e16, m8, ta, ma
         vle16.v v24, (a0)
@@ -35,6 +36,7 @@  func ff_put_pixels_clamped_rvv, zve64x
 endfunc
 
 func ff_put_signed_pixels_clamped_rvv, zve64x
+        lpad    0
         li      t0, 8 * 8
         vsetvli zero, t0, e8, m4, ta, ma
         vle16.v v24, (a0)
@@ -47,6 +49,7 @@  func ff_put_signed_pixels_clamped_rvv, zve64x
 endfunc
 
 func ff_add_pixels_clamped_rvv, zve64x
+        lpad    0
         vsetivli zero, 8, e8, mf2, ta, ma
         li      t0, 8 * 8
         vlse64.v v16, (a1), a2
diff --git a/libavcodec/riscv/jpeg2000dsp_rvv.S b/libavcodec/riscv/jpeg2000dsp_rvv.S
index 10efe6b0db..73ed78239e 100644
--- a/libavcodec/riscv/jpeg2000dsp_rvv.S
+++ b/libavcodec/riscv/jpeg2000dsp_rvv.S
@@ -21,6 +21,7 @@ 
 #include "libavutil/riscv/asm.S"
 
 func ff_ict_float_rvv, zve32f
+        lpad    0
         lla     t0, ff_jpeg2000_f_ict_params
         flw     ft0,  0(t0)
         flw     ft1,  4(t0)
@@ -49,6 +50,7 @@  func ff_ict_float_rvv, zve32f
 endfunc
 
 func ff_rct_int_rvv, zve32x
+        lpad    0
 1:
         vsetvli t0, a3, e32, m8, ta, ma
         vle32.v v16, (a1)
diff --git a/libavcodec/riscv/llauddsp_rvv.S b/libavcodec/riscv/llauddsp_rvv.S
index 5569864832..54ffbeb666 100644
--- a/libavcodec/riscv/llauddsp_rvv.S
+++ b/libavcodec/riscv/llauddsp_rvv.S
@@ -21,6 +21,7 @@ 
 #include "libavutil/riscv/asm.S"
 
 func ff_scalarproduct_and_madd_int16_rvv, zve32x
+        lpad    0
         vsetvli t0, zero, e32, m8, ta, ma
         vmv.v.x v0, zero
 1:
@@ -45,6 +46,7 @@  func ff_scalarproduct_and_madd_int16_rvv, zve32x
 endfunc
 
 func ff_scalarproduct_and_madd_int32_rvv, zve32x
+        lpad    0
         vsetvli t0, zero, e32, m8, ta, ma
         vmv.v.x v0, zero
 1:
diff --git a/libavcodec/riscv/llviddsp_rvv.S b/libavcodec/riscv/llviddsp_rvv.S
index a4814837b9..9572e92dce 100644
--- a/libavcodec/riscv/llviddsp_rvv.S
+++ b/libavcodec/riscv/llviddsp_rvv.S
@@ -21,6 +21,7 @@ 
 #include "libavutil/riscv/asm.S"
 
 func ff_llvid_add_bytes_rvv, zve32x
+        lpad    0
 1:
         vsetvli t0, a2, e8, m8, ta, ma
         vle8.v  v0, (a1)
diff --git a/libavcodec/riscv/llvidencdsp_rvv.S b/libavcodec/riscv/llvidencdsp_rvv.S
index 0342165127..44bf3ac7e5 100644
--- a/libavcodec/riscv/llvidencdsp_rvv.S
+++ b/libavcodec/riscv/llvidencdsp_rvv.S
@@ -21,6 +21,7 @@ 
 #include "libavutil/riscv/asm.S"
 
 func ff_llvidenc_diff_bytes_rvv, zve32x
+        lpad    0
 1:
         vsetvli t0, a3, e8, m8, ta, ma
         vle8.v  v0, (a1)
diff --git a/libavcodec/riscv/lpc_rvv.S b/libavcodec/riscv/lpc_rvv.S
index fe80305d9a..e70c5f3121 100644
--- a/libavcodec/riscv/lpc_rvv.S
+++ b/libavcodec/riscv/lpc_rvv.S
@@ -22,6 +22,7 @@ 
 
 #if __riscv_xlen >= 64
 func ff_lpc_apply_welch_window_rvv, zve64d
+        lpad    0
         vsetvli   t0, zero, e64, m8, ta, ma
         vid.v     v0
         addi      t2, a1, -1
@@ -87,6 +88,7 @@  func ff_lpc_apply_welch_window_rvv, zve64d
 endfunc
 
 func ff_lpc_compute_autocorr_rvv, zve64d, zbb
+        lpad    0
         vtype_vli t1, a2, t2, e64, ta, ma, 1
         addi      a2, a2, 1
         li        t0, 1
diff --git a/libavcodec/riscv/me_cmp_rvv.S b/libavcodec/riscv/me_cmp_rvv.S
index c9ae5bb6fc..8989c91dde 100644
--- a/libavcodec/riscv/me_cmp_rvv.S
+++ b/libavcodec/riscv/me_cmp_rvv.S
@@ -27,6 +27,7 @@ 
 .endm
 
 func ff_pix_abs16_rvv, zve32x
+        lpad    0
         vsetivli        zero, 1, e32, m1, ta, ma
         vmv.s.x         v0, zero
 1:
@@ -47,6 +48,7 @@  func ff_pix_abs16_rvv, zve32x
 endfunc
 
 func ff_pix_abs8_rvv, zve32x
+        lpad    0
         vsetivli        zero, 1, e32, m1, ta, ma
         vmv.s.x         v0, zero
 1:
@@ -67,6 +69,7 @@  func ff_pix_abs8_rvv, zve32x
 endfunc
 
 func ff_pix_abs16_x2_rvv, zve32x
+        lpad    0
         csrwi           vxrm, 0
         vsetivli        zero, 1, e32, m1, ta, ma
         li              t5, 1
@@ -92,6 +95,7 @@  func ff_pix_abs16_x2_rvv, zve32x
 endfunc
 
 func ff_pix_abs8_x2_rvv, zve32x
+        lpad    0
         csrwi           vxrm, 0
         vsetivli        zero, 1, e32, m1, ta, ma
         li              t5, 1
@@ -117,6 +121,7 @@  func ff_pix_abs8_x2_rvv, zve32x
 endfunc
 
 func ff_pix_abs16_y2_rvv, zve32x
+        lpad    0
         csrwi           vxrm, 0
         vsetivli        zero, 1, e32, m1, ta, ma
         add             t1, a2, a3
@@ -142,6 +147,7 @@  func ff_pix_abs16_y2_rvv, zve32x
 endfunc
 
 func ff_pix_abs8_y2_rvv, zve32x
+        lpad    0
         csrwi           vxrm, 0
         vsetivli        zero, 1, e32, m1, ta, ma
         add             t1, a2, a3
@@ -167,6 +173,7 @@  func ff_pix_abs8_y2_rvv, zve32x
 endfunc
 
 func ff_sse16_rvv, zve32x
+        lpad    0
         vsetivli        t0, 16, e32, m4, ta, ma
         vmv.v.x         v24, zero
         vmv.s.x         v0, zero
@@ -189,6 +196,7 @@  func ff_sse16_rvv, zve32x
 endfunc
 
 func ff_sse8_rvv, zve32x
+        lpad    0
         vsetivli        t0, 8, e32, m2, ta, ma
         vmv.v.x         v24, zero
         vmv.s.x         v0, zero
@@ -211,6 +219,7 @@  func ff_sse8_rvv, zve32x
 endfunc
 
 func ff_sse4_rvv, zve32x
+        lpad    0
         vsetivli        t0, 4, e32, m1, ta, ma
         vmv.v.x         v24, zero
         vmv.s.x         v0, zero
@@ -239,6 +248,7 @@  endfunc
 .endm
 
 .macro  vsad_vsse16 type
+        lpad    0
         vsetivli        t0, 16, e32, m4, ta, ma
         addi            a4, a4, -1
         add             t1, a1, a3
@@ -277,6 +287,7 @@  endfunc
 .endm
 
 .macro  vsad_vsse8 type
+        lpad    0
         vsetivli        t0, 8, e32, m2, ta, ma
         addi            a4, a4, -1
         add             t1, a1, a3
@@ -315,6 +326,7 @@  endfunc
 .endm
 
 .macro  vsad_vsse_intra16 type
+        lpad    0
         vsetivli        t0, 16, e32, m4, ta, ma
         addi            a4, a4, -1
         add             t1, a1, a3
@@ -346,6 +358,7 @@  endfunc
 .endm
 
 .macro  vsad_vsse_intra8 type
+        lpad    0
         vsetivli        t0, 8, e32, m2, ta, ma
         addi            a4, a4, -1
         add             t1, a1, a3
@@ -409,6 +422,8 @@  func ff_vsad_intra8_rvv, zve32x
 endfunc
 
 func ff_nsse16_rvv, zve32x
+        lpad    0
+
         .macro squarediff16
         vsetivli        zero, 16, e8, m1, tu, ma
         vle8.v          v4, (a1)
@@ -468,6 +483,8 @@  func ff_nsse16_rvv, zve32x
 endfunc
 
 func ff_nsse8_rvv, zve32x
+        lpad    0
+
         .macro squarediff8
         vsetivli        zero, 8, e8, mf2, tu, ma
         vle8.v          v4, (a1)
diff --git a/libavcodec/riscv/opusdsp_rvv.S b/libavcodec/riscv/opusdsp_rvv.S
index 42d845a370..bf4b2319e1 100644
--- a/libavcodec/riscv/opusdsp_rvv.S
+++ b/libavcodec/riscv/opusdsp_rvv.S
@@ -21,6 +21,7 @@ 
 #include "libavutil/riscv/asm.S"
 
 func ff_opus_postfilter_rvv, zve32f, zbb
+        lpad    0
         flw     fa0, 0(a2) // g0
         slli    t1, a1, 2
         flw     fa1, 4(a2) // g1
diff --git a/libavcodec/riscv/pixblockdsp_rvi.S b/libavcodec/riscv/pixblockdsp_rvi.S
index efdd422228..ed1af70251 100644
--- a/libavcodec/riscv/pixblockdsp_rvi.S
+++ b/libavcodec/riscv/pixblockdsp_rvi.S
@@ -21,6 +21,7 @@ 
 #include "libavutil/riscv/asm.S"
 
 func ff_get_pixels_8_rvi
+        lpad    0
 .irp row, 0, 1, 2, 3, 4, 5, 6, 7
         ld      t0,    (a1)
         add     a1, a1, a2
@@ -47,6 +48,7 @@  func ff_get_pixels_8_rvi
 endfunc
 
 func ff_get_pixels_16_rvi
+        lpad    0
 .irp row, 0, 1, 2, 3, 4, 5, 6, 7
         ld      t0, 0(a1)
         ld      t1, 8(a1)
diff --git a/libavcodec/riscv/pixblockdsp_rvv.S b/libavcodec/riscv/pixblockdsp_rvv.S
index 4213cd1b85..85233470cf 100644
--- a/libavcodec/riscv/pixblockdsp_rvv.S
+++ b/libavcodec/riscv/pixblockdsp_rvv.S
@@ -21,6 +21,7 @@ 
 #include "libavutil/riscv/asm.S"
 
 func ff_get_pixels_8_rvv, zve64x
+        lpad    0
         vsetivli zero, 8, e8, mf2, ta, ma
         li      t0, 8 * 8
 1:
@@ -32,6 +33,7 @@  func ff_get_pixels_8_rvv, zve64x
 endfunc
 
 func ff_get_pixels_unaligned_8_rvv, zve64x
+        lpad    0
         andi    t1, a1, 7
         vsetivli zero, 8, e64, m4, ta, ma
         li      t0, 8 * 8
@@ -52,6 +54,7 @@  func ff_get_pixels_unaligned_8_rvv, zve64x
 endfunc
 
 func ff_diff_pixels_rvv, zve64x
+        lpad    0
         vsetivli zero, 8, e8, mf2, ta, ma
         li      t0, 8 * 8
         vlse64.v v16, (a1), a3
@@ -63,6 +66,7 @@  func ff_diff_pixels_rvv, zve64x
 endfunc
 
 func ff_diff_pixels_unaligned_rvv, zve32x
+        lpad    0
         vsetivli    zero, 8, e8, mf2, ta, ma
         vlsseg8e8.v v16, (a1), a3
         vlsseg8e8.v v24, (a2), a3
diff --git a/libavcodec/riscv/rv34dsp_rvv.S b/libavcodec/riscv/rv34dsp_rvv.S
index 8eda01665d..478bc0a860 100644
--- a/libavcodec/riscv/rv34dsp_rvv.S
+++ b/libavcodec/riscv/rv34dsp_rvv.S
@@ -21,6 +21,7 @@ 
 #include "libavutil/riscv/asm.S"
 
 func ff_rv34_inv_transform_dc_rvv, zve32x
+        lpad    0
         lh            t1, 0(a0)
         li            t0, 13 * 13 * 3
         mul           t2, t0, t1
@@ -33,6 +34,7 @@  func ff_rv34_inv_transform_dc_rvv, zve32x
 endfunc
 
 func ff_rv34_idct_dc_add_rvv, zve32x
+        lpad    0
         vsetivli      zero, 4, e8, mf4, ta, ma
         vlse32.v      v0, (a0), a1
         li            t1, 169
diff --git a/libavcodec/riscv/rv40dsp_rvv.S b/libavcodec/riscv/rv40dsp_rvv.S
index e49345ef70..edb979ffb0 100644
--- a/libavcodec/riscv/rv40dsp_rvv.S
+++ b/libavcodec/riscv/rv40dsp_rvv.S
@@ -351,21 +351,25 @@  func ff_avg_rv40_chroma_mc_rvv, zve32x
 endfunc
 
 func ff_put_rv40_chroma_mc8_rvv, zve32x
+        lpad    0
         li      t6, 8
         j       11b
 endfunc
 
 func ff_put_rv40_chroma_mc4_rvv, zve32x
+        lpad    0
         li      t6, 4
         j       11b
 endfunc
 
 func ff_avg_rv40_chroma_mc8_rvv, zve32x
+        lpad    0
         li      t6, 8
         j       21b
 endfunc
 
 func ff_avg_rv40_chroma_mc4_rvv, zve32x
+        lpad    0
         li      t6, 4
         j       21b
 endfunc
diff --git a/libavcodec/riscv/sbrdsp_rvv.S b/libavcodec/riscv/sbrdsp_rvv.S
index 331b88022c..7c6103d081 100644
--- a/libavcodec/riscv/sbrdsp_rvv.S
+++ b/libavcodec/riscv/sbrdsp_rvv.S
@@ -21,6 +21,7 @@ 
 #include "libavutil/riscv/asm.S"
 
 func ff_sbr_sum64x5_rvv, zve32f
+        lpad    0
         li      a5, 64
         addi    a1, a0, 64 * 4
         addi    a2, a0, 128 * 4
@@ -50,6 +51,7 @@  func ff_sbr_sum64x5_rvv, zve32f
 endfunc
 
 func ff_sbr_sum_square_rvv, zve32f
+        lpad    0
         vsetvli t0, zero, e32, m8, ta, ma
         slli    a1, a1, 1
         vmv.v.x v8, zero
@@ -69,6 +71,7 @@  NOHWF   fmv.x.w  a0, fa0
 endfunc
 
 func ff_sbr_autocorrelate_rvv, zve32f
+        lpad    0
         vsetvli t0, zero, e32, m4, ta, ma
         vmv.v.x v0, zero
         flw     fa0,   (a0)
@@ -158,6 +161,7 @@  func ff_sbr_autocorrelate_rvv, zve32f
 endfunc
 
 func ff_sbr_hf_gen_rvv, zve32f
+        lpad    0
 NOHWF   fmv.w.x fa0, a4
 NOHWF   mv      a4, a5
 NOHWF   mv      a5, a6
@@ -208,6 +212,7 @@  NOHWF   mv      a5, a6
 endfunc
 
 func ff_sbr_hf_g_filt_rvv, zve32f
+        lpad    0
         li      t1, 40 * 2 * 4
         sh3add  a1, a4, a1
 1:
@@ -273,15 +278,18 @@  endfunc
 .endm
 
 func ff_sbr_hf_apply_noise_0_rvv, zve32f, zbb
+        lpad    0
         hf_apply_noise 0
 endfunc
 
 func ff_sbr_hf_apply_noise_3_rvv, zve32f, zbb
-       not     a4, a4 // invert parity of kx
-       // fall through
+        lpad    0
+        not     a4, a4 // invert parity of kx
+        // fall through
 endfunc
 
 func ff_sbr_hf_apply_noise_1_rvv, zve32f, zbb
+        lpad    0
         vsetvli t0, zero, e32, m4, ta, ma
         vid.v   v4
         vxor.vx v4, v4, a4
@@ -290,5 +298,6 @@  func ff_sbr_hf_apply_noise_1_rvv, zve32f, zbb
 endfunc
 
 func ff_sbr_hf_apply_noise_2_rvv, zve32f, zbb
+        lpad    0
         hf_apply_noise 2
 endfunc
diff --git a/libavcodec/riscv/startcode_rvb.S b/libavcodec/riscv/startcode_rvb.S
index c043d59809..eec92d3340 100644
--- a/libavcodec/riscv/startcode_rvb.S
+++ b/libavcodec/riscv/startcode_rvb.S
@@ -37,6 +37,7 @@ 
         .endm
 
 func ff_startcode_find_candidate_rvb, zbb
+        lpad    0
         add     a1, a0, a1
 
         // Potentially unaligned head
diff --git a/libavcodec/riscv/startcode_rvv.S b/libavcodec/riscv/startcode_rvv.S
index 36a3369431..f4d0a0f087 100644
--- a/libavcodec/riscv/startcode_rvv.S
+++ b/libavcodec/riscv/startcode_rvv.S
@@ -27,6 +27,7 @@ 
 #include "libavutil/riscv/asm.S"
 
 func ff_startcode_find_candidate_rvv, zve32x
+        lpad    0
         mv       t0, a0
 1:
         vsetvli  t1, a1, e8, m8, ta, ma
diff --git a/libavcodec/riscv/svqenc_rvv.S b/libavcodec/riscv/svqenc_rvv.S
index cfc27154dd..55bec57a7b 100644
--- a/libavcodec/riscv/svqenc_rvv.S
+++ b/libavcodec/riscv/svqenc_rvv.S
@@ -21,6 +21,7 @@ 
 #include "libavutil/riscv/asm.S"
 
 func ff_ssd_int8_vs_int16_rvv, zve32x
+        lpad    0
         vsetvli      t0, zero, e32, m8, ta, ma
         vmv.v.x      v24, zero
 1:
diff --git a/libavcodec/riscv/takdsp_rvv.S b/libavcodec/riscv/takdsp_rvv.S
index fa942a3be6..3128d00a7c 100644
--- a/libavcodec/riscv/takdsp_rvv.S
+++ b/libavcodec/riscv/takdsp_rvv.S
@@ -22,6 +22,7 @@ 
 #include "libavutil/riscv/asm.S"
 
 func ff_decorrelate_ls_rvv, zve32x
+        lpad    0
 1:
         vsetvli  t0, a2, e32, m8, ta, ma
         sub      a2, a2, t0
@@ -36,6 +37,7 @@  func ff_decorrelate_ls_rvv, zve32x
 endfunc
 
 func ff_decorrelate_sr_rvv, zve32x
+        lpad    0
 1:
         vsetvli  t0, a2, e32, m8, ta, ma
         vle32.v  v0, (a0)
@@ -50,6 +52,7 @@  func ff_decorrelate_sr_rvv, zve32x
 endfunc
 
 func ff_decorrelate_sm_rvv, zve32x
+        lpad    0
 1:
         vsetvli  t0, a2, e32, m8, ta, ma
         vle32.v  v8, (a1)
@@ -68,6 +71,7 @@  func ff_decorrelate_sm_rvv, zve32x
 endfunc
 
 func ff_decorrelate_sf_rvv, zve32x
+        lpad    0
         csrwi    vxrm, 0
 1:
         vsetvli  t0, a2, e32, m8, ta, ma
diff --git a/libavcodec/riscv/utvideodsp_rvv.S b/libavcodec/riscv/utvideodsp_rvv.S
index fa70d0eb34..4c0c177d30 100644
--- a/libavcodec/riscv/utvideodsp_rvv.S
+++ b/libavcodec/riscv/utvideodsp_rvv.S
@@ -21,6 +21,7 @@ 
 #include "libavutil/riscv/asm.S"
 
 func ff_restore_rgb_planes_rvv, zve32x
+        lpad    0
         li      t1, -0x80
         sub     a3, a3, a6
         sub     a4, a4, a6
@@ -53,6 +54,7 @@  func ff_restore_rgb_planes_rvv, zve32x
 endfunc
 
 func ff_restore_rgb_planes10_rvv, zve32x
+        lpad    0
         li      t1, -0x200
         li      t2, 0x3FF
         sub     a3, a3, a6
diff --git a/libavcodec/riscv/vc1dsp_rvi.S b/libavcodec/riscv/vc1dsp_rvi.S
index d4a1b5bf49..7725bfb628 100644
--- a/libavcodec/riscv/vc1dsp_rvi.S
+++ b/libavcodec/riscv/vc1dsp_rvi.S
@@ -22,6 +22,7 @@ 
 
 #if __riscv_xlen >= 64
 func ff_put_pixels8x8_rvi
+        lpad    0
 .rept 8
         ld t0, (a1)
         sd t0, (a0)
@@ -33,6 +34,7 @@  func ff_put_pixels8x8_rvi
 endfunc
 
 func ff_put_pixels16x16_rvi
+        lpad    0
 .rept 16
         ld t0, (a1)
         ld t1, 8(a1)
diff --git a/libavcodec/riscv/vc1dsp_rvv.S b/libavcodec/riscv/vc1dsp_rvv.S
index d8b62579aa..2fcd125f55 100644
--- a/libavcodec/riscv/vc1dsp_rvv.S
+++ b/libavcodec/riscv/vc1dsp_rvv.S
@@ -22,6 +22,7 @@ 
 #include "libavutil/riscv/asm.S"
 
 func ff_vc1_inv_trans_8x8_dc_rvv, zve64x
+        lpad    0
         lh            t2, (a2)
         vsetivli      zero, 8, e8, mf2, ta, ma
         vlse64.v      v0, (a0), a1
@@ -44,6 +45,7 @@  func ff_vc1_inv_trans_8x8_dc_rvv, zve64x
 endfunc
 
 func ff_vc1_inv_trans_4x8_dc_rvv, zve32x
+        lpad    0
         lh            t2, (a2)
         vsetivli      zero, 8, e8, mf2, ta, ma
         vlse32.v      v0, (a0), a1
@@ -68,6 +70,7 @@  func ff_vc1_inv_trans_4x8_dc_rvv, zve32x
 endfunc
 
 func ff_vc1_inv_trans_8x4_dc_rvv, zve64x
+        lpad    0
         lh            t2, (a2)
         vsetivli      zero, 4, e8, mf4, ta, ma
         vlse64.v      v0, (a0), a1
@@ -91,6 +94,7 @@  func ff_vc1_inv_trans_8x4_dc_rvv, zve64x
 endfunc
 
 func ff_vc1_inv_trans_4x4_dc_rvv, zve32x
+        lpad    0
         lh            t2, (a2)
         vsetivli      zero, 4, e8, mf4, ta, ma
         vlse32.v      v0, (a0), a1
@@ -203,6 +207,7 @@  func ff_vc1_inv_trans_4_rvv, zve32x
 endfunc
 
 func ff_vc1_inv_trans_8x8_rvv, zve32x
+        lpad    0
         csrwi    vxrm, 0
         vsetivli zero, 8, e16, m1, ta, ma
         addi     a1, a0, 1 * 8 * 2
@@ -240,6 +245,7 @@  func ff_vc1_inv_trans_8x8_rvv, zve32x
 endfunc
 
 func ff_vc1_inv_trans_8x4_rvv, zve32x
+        lpad    0
         csrwi       vxrm, 0
         vsetivli    zero, 4, e16, mf2, ta, ma
         vlseg8e16.v v0, (a2)
@@ -285,6 +291,7 @@  func ff_vc1_inv_trans_8x4_rvv, zve32x
 endfunc
 
 func ff_vc1_inv_trans_4x8_rvv, zve32x
+        lpad    0
         li           a3, 8 * 2
         csrwi        vxrm, 0
         vsetivli     zero, 8, e16, m1, ta, ma
@@ -359,6 +366,7 @@  func ff_vc1_inv_trans_4x8_rvv, zve32x
 endfunc
 
 func ff_vc1_inv_trans_4x4_rvv, zve32x
+        lpad    0
         li           a3, 8 * 2
         csrwi        vxrm, 0
         vsetivli     zero, 4, e16, mf2, ta, ma
@@ -422,12 +430,14 @@  endfunc
 .endm
 
 func ff_avg_pixels16x16_rvv, zve32x
+        lpad    0
         li       t0, 16
         vsetivli zero, 16, e8, m1, ta, ma
         j        1f
 endfunc
 
 func ff_avg_pixels8x8_rvv, zve32x
+        lpad    0
         li        t0, 8
         vsetivli  zero, 8, e8, mf2, ta, ma
 1:
@@ -446,6 +456,7 @@  func ff_avg_pixels8x8_rvv, zve32x
 endfunc
 
 func ff_vc1_unescape_buffer_rvv, zve32x
+        lpad    0
         vsetivli       zero, 2, e8, m1, ta, ma
         vmv.v.i        v8, -1
         li             t4, 1
diff --git a/libavcodec/riscv/vorbisdsp_rvv.S b/libavcodec/riscv/vorbisdsp_rvv.S
index 81a6c62a65..14abebb20d 100644
--- a/libavcodec/riscv/vorbisdsp_rvv.S
+++ b/libavcodec/riscv/vorbisdsp_rvv.S
@@ -21,6 +21,7 @@ 
 #include "libavutil/riscv/asm.S"
 
 func ff_vorbis_inverse_coupling_rvv, zve32f
+        lpad    0
         fmv.w.x   ft0, zero
 1:
         vsetvli   t0, a2, e32, m4, ta, ma
diff --git a/libavcodec/riscv/vp7dsp_rvv.S b/libavcodec/riscv/vp7dsp_rvv.S
index 856b0e8c96..a0d257e5df 100644
--- a/libavcodec/riscv/vp7dsp_rvv.S
+++ b/libavcodec/riscv/vp7dsp_rvv.S
@@ -22,6 +22,7 @@ 
 
 #if __riscv_xlen >= 64
 func ff_vp7_luma_dc_wht_rvv, zve32x
+        lpad    0
         li          a2, 4 * 16 * 2
         li          a7, 16 * 2
         jal         t0, 1f
@@ -99,6 +100,7 @@  func ff_vp7_luma_dc_wht_rvv, zve32x
 endfunc
 
 func ff_vp7_idct_add_rvv, zve32x
+        lpad    0
         jal         t0, 1b
         csrwi       vxrm, 2
         vsetvli     zero, zero, e8, mf4, ta, ma
@@ -130,6 +132,7 @@  endfunc
 
 .irp type, y, uv
 func ff_vp7_idct_dc_add4\type\()_rvv, zve32x
+        lpad    0
         li       t0, 32
         vsetivli zero, 4, e16, mf2, ta, ma
         li       t1, 23170
diff --git a/libavcodec/riscv/vp8dsp_rvi.S b/libavcodec/riscv/vp8dsp_rvi.S
index 50ba4f293f..07d5c85032 100644
--- a/libavcodec/riscv/vp8dsp_rvi.S
+++ b/libavcodec/riscv/vp8dsp_rvi.S
@@ -22,6 +22,7 @@ 
 
 #if __riscv_xlen >= 64
 func ff_put_vp8_pixels16_rvi
+        lpad    0
 1:
         addi          a4, a4, -1
         ld            t0, (a2)
@@ -36,6 +37,7 @@  func ff_put_vp8_pixels16_rvi
 endfunc
 
 func ff_put_vp8_pixels8_rvi
+        lpad    0
 1:
         addi          a4, a4, -1
         ld            t0, (a2)
@@ -49,6 +51,7 @@  endfunc
 #endif
 
 func ff_put_vp8_pixels4_rvi
+        lpad    0
 1:
         addi          a4, a4, -1
         lw            t0, (a2)
diff --git a/libavcodec/riscv/vp8dsp_rvv.S b/libavcodec/riscv/vp8dsp_rvv.S
index d366748a0a..e5d5a80bf8 100644
--- a/libavcodec/riscv/vp8dsp_rvv.S
+++ b/libavcodec/riscv/vp8dsp_rvv.S
@@ -45,6 +45,7 @@ 
 
 #if __riscv_xlen >= 64
 func ff_vp8_luma_dc_wht_rvv, zve64x
+        lpad    0
         vsetivli    zero, 1, e64, m1, ta, ma
         vlseg4e64.v v4, (a1)
         vsetivli    zero, 4, e16, mf2, ta, ma
@@ -99,6 +100,7 @@  endfunc
 #endif
 
 func ff_vp8_idct_add_rvv, zve32x
+        lpad    0
         csrwi       vxrm, 0
         vsetivli    zero, 4, e16, mf2, ta, ma
         addi        a3, a1, 1 * 4 * 2
@@ -158,6 +160,7 @@  func ff_vp8_idct_add_rvv, zve32x
 endfunc
 
 func ff_vp8_idct_dc_add_rvv, zve32x
+        lpad    0
         lh      a3, (a1)
         addi    a3, a3, 4
         srai    a3, a3, 3
@@ -182,6 +185,7 @@  func ff_vp78_idct_dc_add_rvv, zve32x
 endfunc
 
 func ff_vp8_idct_dc_add4y_rvv, zve32x
+        lpad    0
         li       t0, 32
         vsetivli zero, 4, e16, mf2, ta, ma
         li       t1, 4 - (128 << 3)
@@ -217,6 +221,7 @@  func ff_vp78_idct_dc_add4y_rvv, zve32x
 endfunc
 
 func ff_vp8_idct_dc_add4uv_rvv, zve32x
+        lpad    0
         li       t0, 32
         vsetivli zero, 4, e16, mf2, ta, ma
         li       t1, 4 - (128 << 3)
@@ -265,6 +270,7 @@  endfunc
 
 .macro put_vp8_bilin_h_v type mn
 func ff_put_vp8_bilin4_\type\()_rvv, zve32x
+        lpad    0
         vsetvlstatic8   4
 .Lbilin_\type:
         li              t1, 8
@@ -310,6 +316,7 @@  put_vp8_bilin_h_v h a5
 put_vp8_bilin_h_v v a6
 
 func ff_put_vp8_bilin4_hv_rvv, zve32x
+        lpad    0
         vsetvlstatic8   4
 .Lbilin_hv:
         li              t3, 8
@@ -335,16 +342,19 @@  endfunc
 
 .irp len,16,8
 func ff_put_vp8_bilin\len\()_h_rvv, zve32x
+        lpad    0
         vsetvlstatic8 \len
         j             .Lbilin_h
 endfunc
 
 func ff_put_vp8_bilin\len\()_v_rvv, zve32x
+        lpad    0
         vsetvlstatic8 \len
         j             .Lbilin_v
 endfunc
 
 func ff_put_vp8_bilin\len\()_hv_rvv, zve32x
+        lpad    0
         vsetvlstatic8 \len
         j             .Lbilin_hv
 endfunc
@@ -441,6 +451,7 @@  endconst
 
 .macro epel len size type
 func ff_put_vp8_epel\len\()_\type\()\size\()_rvv, zve32x
+        lpad    0
         epel_filter     \size \type t
         vsetvlstatic8   \len
 1:
@@ -456,6 +467,7 @@  endfunc
 
 .macro epel_hv len hsize vsize
 func ff_put_vp8_epel\len\()_h\hsize\()v\vsize\()_rvv, zve32x
+        lpad    0
 #if __riscv_xlen == 64
         addi            sp, sp, -48
         .irp n,0,1,2,3,4,5
diff --git a/libavcodec/riscv/vp9_intra_rvi.S b/libavcodec/riscv/vp9_intra_rvi.S
index 16b6bdb25a..d14008f849 100644
--- a/libavcodec/riscv/vp9_intra_rvi.S
+++ b/libavcodec/riscv/vp9_intra_rvi.S
@@ -22,6 +22,7 @@ 
 
 #if __riscv_xlen >= 64
 func ff_v_32x32_rvi
+        lpad    0
         ld           t0, (a3)
         ld           t1, 8(a3)
         ld           t2, 16(a3)
@@ -43,6 +44,7 @@  func ff_v_32x32_rvi
 endfunc
 
 func ff_v_16x16_rvi
+        lpad    0
         ld           t0, (a3)
         ld           t1, 8(a3)
         .rept 8
@@ -58,6 +60,7 @@  func ff_v_16x16_rvi
 endfunc
 
 func ff_v_8x8_rvi
+        lpad    0
         ld           t0, (a3)
         .rept 4
         add          a7, a0, a1
diff --git a/libavcodec/riscv/vp9_intra_rvv.S b/libavcodec/riscv/vp9_intra_rvv.S
index beeb1ff88c..13d695c831 100644
--- a/libavcodec/riscv/vp9_intra_rvv.S
+++ b/libavcodec/riscv/vp9_intra_rvv.S
@@ -91,6 +91,7 @@ 
 
 .macro func_dc name size type n restore ext
 func ff_\()\name\()_\()\size\()x\size\()_rvv, \ext
+        lpad    0
 .if \size == 8
         dc_e64 \type \size \n \restore
 .else
@@ -119,6 +120,7 @@  func_dc dc_top   16  top  4  1  zve32x
 func_dc dc_top   8   top  3  0  zve64x
 
 func ff_h_32x32_rvv, zve32x
+        lpad    0
         li           t0, 32
         addi         a2, a2, 31
         vsetvli      zero, t0, e8, m2, ta, ma
@@ -139,6 +141,7 @@  func ff_h_32x32_rvv, zve32x
 endfunc
 
 func ff_h_16x16_rvv, zve32x
+        lpad    0
         addi         a2, a2, 15
         vsetivli     zero, 16, e8, m1, ta, ma
 
@@ -157,6 +160,7 @@  func ff_h_16x16_rvv, zve32x
 endfunc
 
 func ff_h_8x8_rvv, zve32x
+        lpad    0
         addi         a2, a2, 7
         vsetivli     zero, 8, e8, mf2, ta, ma
 
@@ -190,6 +194,7 @@  endfunc
 .endm
 
 func ff_tm_32x32_rvv, zve32x
+        lpad    0
         lbu          a4, -1(a3)
         li           t5, 32
 
@@ -244,6 +249,7 @@  func ff_tm_16x16_rvv, zve32x
 endfunc
 
 func ff_tm_8x8_rvv, zve32x
+        lpad    0
         vsetivli     zero, 8, e16, m1, ta, ma
         vle8.v       v8, (a3)
         vzext.vf2    v28, v8
@@ -269,6 +275,7 @@  func ff_tm_8x8_rvv, zve32x
 endfunc
 
 func ff_tm_4x4_rvv, zve32x
+        lpad    0
         vsetivli     zero, 4, e16, mf2, ta, ma
         vle8.v       v8, (a3)
         vzext.vf2    v28, v8
diff --git a/libavcodec/riscv/vp9_mc_rvi.S b/libavcodec/riscv/vp9_mc_rvi.S
index 0db14e83c7..4a8371b232 100644
--- a/libavcodec/riscv/vp9_mc_rvi.S
+++ b/libavcodec/riscv/vp9_mc_rvi.S
@@ -22,6 +22,7 @@ 
 
 #if __riscv_xlen >= 64
 func ff_copy64_rvi
+        lpad    0
 1:
         addi          a4, a4, -1
         ld            t0, (a2)
@@ -48,6 +49,7 @@  func ff_copy64_rvi
 endfunc
 
 func ff_copy32_rvi
+        lpad    0
 1:
         addi          a4, a4, -1
         ld            t0, (a2)
@@ -66,6 +68,7 @@  func ff_copy32_rvi
 endfunc
 
 func ff_copy16_rvi
+        lpad    0
 1:
         addi          a4, a4, -1
         ld            t0, (a2)
@@ -80,6 +83,7 @@  func ff_copy16_rvi
 endfunc
 
 func ff_copy8_rvi
+        lpad    0
 1:
         addi          a4, a4, -1
         ld            t0, (a2)
@@ -93,6 +97,7 @@  endfunc
 #endif
 
 func ff_copy4_rvi
+        lpad    0
 1:
         addi          a4, a4, -1
         lw            t0, (a2)
diff --git a/libavcodec/riscv/vp9_mc_rvv.S b/libavcodec/riscv/vp9_mc_rvv.S
index 7cb38ec94a..8d776661d9 100644
--- a/libavcodec/riscv/vp9_mc_rvv.S
+++ b/libavcodec/riscv/vp9_mc_rvv.S
@@ -38,6 +38,7 @@ 
 
 .macro copy_avg len
 func ff_vp9_avg\len\()_rvv, zve32x
+        lpad    0
         csrwi           vxrm, 0
         vsetvlstatic8   \len, t0, 64
 1: