@@ -257,6 +257,9 @@ func ff_vc1_inv_trans_8x4_rvv, zve32x
vsetivli zero, 4, e16, mf2, ta, ma
vlseg8e16.v v0, (a2)
jal t0, ff_vc1_inv_trans_8_rvv
+ .irp n,0,1,2,3,4,5,6,7
+ vssra.vi v\n, v\n, 3
+ .endr
vsseg8e16.v v0, (a2)
addi a3, a2, 1 * 8 * 2
vsetivli zero, 8, e16, m1, ta, ma
@@ -266,10 +269,6 @@ func ff_vc1_inv_trans_8x4_rvv, zve32x
addi a5, a2, 3 * 8 * 2
vle16.v v2, (a4)
vle16.v v3, (a5)
- .irp n,0,1,2,3
- # shift 4 vectors of 8 elems after transpose instead of 8 of 4
- vssra.vi v\n, v\n, 3
- .endr
li t1, 7
jal t0, ff_vc1_inv_trans_4_rvv
add a3, a1, a0