@@ -380,7 +380,7 @@ function ff_vp9_\txfm1\()_\txfm2\()_8x8_add_neon, export=1
.ifc \txfm1\()_\txfm2,idct_idct
movrel x4, idct_coeffs
.else
- movrel x4, iadst8_coeffs
+ movrel x4, iadst8_coeffs
ld1 {v1.8h}, [x4], #16
.endif
ld1 {v0.8h}, [x4]
@@ -480,23 +480,23 @@ itxfm_func8x8 iadst, iadst
function idct16x16_dc_add_neon
- movrel x4, idct_coeffs
+ movrel x4, idct_coeffs
ld1 {v0.4h}, [x4]
- movi v1.4h, #0
+ movi v1.4h, #0
ld1 {v2.h}[0], [x2]
- smull v2.4s, v2.4h, v0.h[0]
- rshrn v2.4h, v2.4s, #14
- smull v2.4s, v2.4h, v0.h[0]
- rshrn v2.4h, v2.4s, #14
+ smull v2.4s, v2.4h, v0.h[0]
+ rshrn v2.4h, v2.4s, #14
+ smull v2.4s, v2.4h, v0.h[0]
+ rshrn v2.4h, v2.4s, #14
dup v2.8h, v2.h[0]
st1 {v1.h}[0], [x2]
- srshr v2.8h, v2.8h, #6
+ srshr v2.8h, v2.8h, #6
- mov x3, x0
- mov x4, #16
+ mov x3, x0
+ mov x4, #16
1:
// Loop to add the constant from v2 into all 16x16 outputs
subs x4, x4, #2
@@ -869,7 +869,7 @@ function ff_vp9_\txfm1\()_\txfm2\()_16x16_add_neon, export=1
.ifc \txfm1,idct
ld1 {v0.8h,v1.8h}, [x10]
.endif
- mov x9, #32
+ mov x9, #32
.ifc \txfm1\()_\txfm2,idct_idct
cmp w3, #10
@@ -1046,10 +1046,10 @@ idct16_partial quarter
idct16_partial half
function idct32x32_dc_add_neon
- movrel x4, idct_coeffs
+ movrel x4, idct_coeffs
ld1 {v0.4h}, [x4]
- movi v1.4h, #0
+ movi v1.4h, #0
ld1 {v2.h}[0], [x2]
smull v2.4s, v2.4h, v0.h[0]
@@ -1059,10 +1059,10 @@ function idct32x32_dc_add_neon
dup v2.8h, v2.h[0]
st1 {v1.h}[0], [x2]
- srshr v0.8h, v2.8h, #6
+ srshr v0.8h, v2.8h, #6
- mov x3, x0
- mov x4, #32
+ mov x3, x0
+ mov x4, #32
1:
// Loop to add the constant v0 into all 32x32 outputs
subs x4, x4, #2
@@ -1230,7 +1230,7 @@ endfunc
// x9 = double input stride
function idct32_1d_8x32_pass1\suffix\()_neon
mov x14, x30
- movi v2.8h, #0
+ movi v2.8h, #0
// v16 = IN(0), v17 = IN(2) ... v31 = IN(30)
.ifb \suffix
@@ -1295,7 +1295,7 @@ function idct32_1d_8x32_pass1\suffix\()_neon
.endif
add x2, x2, #64
- movi v2.8h, #0
+ movi v2.8h, #0
// v16 = IN(1), v17 = IN(3) ... v31 = IN(31)
.ifb \suffix
.irp i, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31
@@ -530,7 +530,7 @@ function idct16x16_dc_add_neon
movrel r12, idct_coeffs
vld1.16 {d0}, [r12,:64]
- vmov.i16 q2, #0
+ vmov.i16 q2, #0
vld1.16 {d16[]}, [r2,:16]
vmull.s16 q8, d16, d0[0]
@@ -793,7 +793,7 @@ function \txfm\()16_1d_4x16_pass1_neon
push {lr}
mov r12, #32
- vmov.s16 q2, #0
+ vmov.s16 q2, #0
.irp i, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31
vld1.16 {d\i}, [r2,:64]
vst1.16 {d4}, [r2,:64], r12
@@ -1142,7 +1142,7 @@ function idct32x32_dc_add_neon
movrel r12, idct_coeffs
vld1.16 {d0}, [r12,:64]
- vmov.i16 q2, #0
+ vmov.i16 q2, #0
vld1.16 {d16[]}, [r2,:16]
vmull.s16 q8, d16, d0[0]
@@ -1330,7 +1330,7 @@ function idct32_1d_4x32_pass1\suffix\()_neon
@ Double stride of the input, since we only read every other line
mov r12, #128
- vmov.s16 d4, #0
+ vmov.s16 d4, #0
@ d16 = IN(0), d17 = IN(2) ... d31 = IN(30)
.ifb \suffix
@@ -1394,7 +1394,7 @@ function idct32_1d_4x32_pass1\suffix\()_neon
.endif
add r2, r2, #64
- vmov.s16 d8, #0
+ vmov.s16 d8, #0
@ d16 = IN(1), d17 = IN(3) ... d31 = IN(31)
.ifb \suffix
.irp i, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31
@@ -1533,9 +1533,9 @@ function idct32_1d_4x32_pass2\suffix\()_neon
.endif
vld1.32 {d12[]}, [r0,:32], r1
vld1.32 {d12[1]}, [r0,:32], r1
- vrshr.s16 q4, q4, #6
+ vrshr.s16 q4, q4, #6
vld1.32 {d13[]}, [r0,:32], r1
- vrshr.s16 q5, q5, #6
+ vrshr.s16 q5, q5, #6
vld1.32 {d13[1]}, [r0,:32], r1
sub r0, r0, r1, lsl #2
vaddw.u8 q4, q4, d12
@@ -828,7 +828,7 @@ function ff_vp9_loop_filter_v_16_16_neon, export=1
endfunc
function vp9_loop_filter_h_16_neon
- sub r12, r0, #8
+ sub r12, r0, #8
vld1.8 {d16}, [r12,:64], r1
vld1.8 {d24}, [r0, :64], r1
vld1.8 {d17}, [r12,:64], r1