Message ID | 20220713204854.3114817-1-martin@martin.st |
---|---|
State | Accepted |
Commit | 02e7853fd94aa78a5f0990ee0105a291172a5eab |
Headers | show |
Series | [FFmpeg-devel,1/5] libavcodec: aarch64: Don't clobber v8 in the h%4 case in ff_pix_abs16_xy2_neon | expand |
Context | Check | Description |
---|---|---|
andriy/make_x86 | success | Make finished |
andriy/make_fate_x86 | success | Make fate finished |
LGTM. -- Jonathan Swinney On 7/13/22, 3:49 PM, "Martin Storsjö" <martin@martin.st> wrote: CAUTION: This email originated from outside of the organization. Do not click links or open attachments unless you can confirm the sender and know the content is safe. Checkasm doesn't currently test this codepath. --- libavcodec/aarch64/me_cmp_neon.S | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/libavcodec/aarch64/me_cmp_neon.S b/libavcodec/aarch64/me_cmp_neon.S index e49d049fc2..31db3793d9 100644 --- a/libavcodec/aarch64/me_cmp_neon.S +++ b/libavcodec/aarch64/me_cmp_neon.S @@ -189,11 +189,11 @@ function ff_pix_abs16_xy2_neon, export=1 urshr v16.8h, v16.8h, #2 // shift right by 2 0..7 (rounding shift right) urshr v17.8h, v17.8h, #2 // shift right by 2 8..15 - uxtl2 v8.8h, v1.16b // 8->16 bits pix1 8..15 + uxtl2 v7.8h, v1.16b // 8->16 bits pix1 8..15 uxtl v1.8h, v1.8b // 8->16 bits pix1 0..7 uabd v6.8h, v1.8h, v16.8h // absolute difference 0..7 - uaba v6.8h, v8.8h, v17.8h // absolute difference accumulate 8..15 + uaba v6.8h, v7.8h, v17.8h // absolute difference accumulate 8..15 mov v2.16b, v18.16b // pix3 -> pix2 mov v3.16b, v19.16b // pix3+1 -> pix2+1 uaddlv s6, v6.8h // add up accumulator in v6 -- 2.25.1
diff --git a/libavcodec/aarch64/me_cmp_neon.S b/libavcodec/aarch64/me_cmp_neon.S index e49d049fc2..31db3793d9 100644 --- a/libavcodec/aarch64/me_cmp_neon.S +++ b/libavcodec/aarch64/me_cmp_neon.S @@ -189,11 +189,11 @@ function ff_pix_abs16_xy2_neon, export=1 urshr v16.8h, v16.8h, #2 // shift right by 2 0..7 (rounding shift right) urshr v17.8h, v17.8h, #2 // shift right by 2 8..15 - uxtl2 v8.8h, v1.16b // 8->16 bits pix1 8..15 + uxtl2 v7.8h, v1.16b // 8->16 bits pix1 8..15 uxtl v1.8h, v1.8b // 8->16 bits pix1 0..7 uabd v6.8h, v1.8h, v16.8h // absolute difference 0..7 - uaba v6.8h, v8.8h, v17.8h // absolute difference accumulate 8..15 + uaba v6.8h, v7.8h, v17.8h // absolute difference accumulate 8..15 mov v2.16b, v18.16b // pix3 -> pix2 mov v3.16b, v19.16b // pix3+1 -> pix2+1 uaddlv s6, v6.8h // add up accumulator in v6