diff mbox series

[FFmpeg-devel] lavc/aarch64: Move non-neon vp9 copy functions out of neon source file

Message ID CAB0OVGpAZ-27qUQq0CL=dJNE3r01wFE3PPnA=rJ8c_wQhJkMDg@mail.gmail.com
State Accepted
Headers show
Series [FFmpeg-devel] lavc/aarch64: Move non-neon vp9 copy functions out of neon source file | expand

Checks

Context Check Description
andriy/ffmpeg-patchwork success Make fate finished

Commit Message

Carl Eugen Hoyos March 11, 2020, 12:04 p.m. UTC
Hi!

Attached patch fixes part of ticket #8565 (compilation with
--disable-neon is broken on aarch64).

Please comment, Carl Eugen

Comments

Martin Storsjö March 11, 2020, 12:20 p.m. UTC | #1
On Wed, 11 Mar 2020, Carl Eugen Hoyos wrote:

> Hi!
>
> Attached patch fixes part of ticket #8565 (compilation with
> --disable-neon is broken on aarch64).

This looks ok to me.

// Martin
Carl Eugen Hoyos March 11, 2020, 1:31 p.m. UTC | #2
Am Mi., 11. März 2020 um 13:35 Uhr schrieb Martin Storsjö <martin@martin.st>:
>
> On Wed, 11 Mar 2020, Carl Eugen Hoyos wrote:
>
> > Hi!
> >
> > Attached patch fixes part of ticket #8565 (compilation with
> > --disable-neon is broken on aarch64).
>
> This looks ok to me.

Patch applied (and fixed).

Thank you, Carl Eugen
diff mbox series

Patch

From d96c8d26802978077d5d32b7aa2b535eca99cfea Mon Sep 17 00:00:00 2001
From: Carl Eugen Hoyos <ceffmpeg@gmail.com>
Date: Wed, 11 Mar 2020 13:01:02 +0100
Subject: [PATCH] lavc/aarch64: Move non-neon vp9 copy functions out of neon source file.

Fixes part of ticket #8565.
---
 libavcodec/aarch64/Makefile           |  1 +
 libavcodec/aarch64/vp9mc_16bpp_neon.S | 25 ---------
 libavcodec/aarch64/vp9mc_aarch64.c    | 81 +++++++++++++++++++++++++++
 libavcodec/aarch64/vp9mc_neon.S       | 30 ----------
 4 files changed, 82 insertions(+), 55 deletions(-)
 create mode 100644 libavcodec/aarch64/vp9mc_aarch64.c

diff --git a/libavcodec/aarch64/Makefile b/libavcodec/aarch64/Makefile
index 00f93bf59f..90e7210ee0 100644
--- a/libavcodec/aarch64/Makefile
+++ b/libavcodec/aarch64/Makefile
@@ -21,6 +21,7 @@  OBJS-$(CONFIG_VC1DSP)                   += aarch64/vc1dsp_init_aarch64.o
 OBJS-$(CONFIG_VORBIS_DECODER)           += aarch64/vorbisdsp_init.o
 OBJS-$(CONFIG_VP9_DECODER)              += aarch64/vp9dsp_init_10bpp_aarch64.o \
                                            aarch64/vp9dsp_init_12bpp_aarch64.o \
+                                           aarch64/vp9mc_aarch64.o             \
                                            aarch64/vp9dsp_init_aarch64.o
 
 # ARMv8 optimizations
diff --git a/libavcodec/aarch64/vp9mc_16bpp_neon.S b/libavcodec/aarch64/vp9mc_16bpp_neon.S
index cac6428709..53b372c262 100644
--- a/libavcodec/aarch64/vp9mc_16bpp_neon.S
+++ b/libavcodec/aarch64/vp9mc_16bpp_neon.S
@@ -25,31 +25,6 @@ 
 //                            const uint8_t *ref, ptrdiff_t ref_stride,
 //                            int h, int mx, int my);
 
-function ff_vp9_copy128_aarch64, export=1
-1:
-        ldp             x5,  x6,  [x2]
-        ldp             x7,  x8,  [x2, #16]
-        stp             x5,  x6,  [x0]
-        ldp             x9,  x10, [x2, #32]
-        stp             x7,  x8,  [x0, #16]
-        subs            w4,  w4,  #1
-        ldp             x11, x12, [x2, #48]
-        stp             x9,  x10, [x0, #32]
-        stp             x11, x12, [x0, #48]
-        ldp             x5,  x6,  [x2, #64]
-        ldp             x7,  x8,  [x2, #80]
-        stp             x5,  x6,  [x0, #64]
-        ldp             x9,  x10, [x2, #96]
-        stp             x7,  x8,  [x0, #80]
-        ldp             x11, x12, [x2, #112]
-        stp             x9,  x10, [x0, #96]
-        stp             x11, x12, [x0, #112]
-        add             x2,  x2,  x3
-        add             x0,  x0,  x1
-        b.ne            1b
-        ret
-endfunc
-
 function ff_vp9_avg64_16_neon, export=1
         mov             x5,  x0
         sub             x1,  x1,  #64
diff --git a/libavcodec/aarch64/vp9mc_aarch64.c b/libavcodec/aarch64/vp9mc_aarch64.c
new file mode 100644
index 0000000000..f17a8cf04a
--- /dev/null
+++ b/libavcodec/aarch64/vp9mc_aarch64.c
@@ -0,0 +1,81 @@ 
+/*
+ * Copyright (c) 2016 Google Inc.
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include "libavutil/aarch64/asm.S"
+
+// All public functions in this file have the following signature:
+// typedef void (*vp9_mc_func)(uint8_t *dst, ptrdiff_t dst_stride,
+//                            const uint8_t *ref, ptrdiff_t ref_stride,
+//                            int h, int mx, int my);
+
+function ff_vp9_copy128_aarch64, export=1
+1:
+        ldp             x5,  x6,  [x2]
+        ldp             x7,  x8,  [x2, #16]
+        stp             x5,  x6,  [x0]
+        ldp             x9,  x10, [x2, #32]
+        stp             x7,  x8,  [x0, #16]
+        subs            w4,  w4,  #1
+        ldp             x11, x12, [x2, #48]
+        stp             x9,  x10, [x0, #32]
+        stp             x11, x12, [x0, #48]
+        ldp             x5,  x6,  [x2, #64]
+        ldp             x7,  x8,  [x2, #80]
+        stp             x5,  x6,  [x0, #64]
+        ldp             x9,  x10, [x2, #96]
+        stp             x7,  x8,  [x0, #80]
+        ldp             x11, x12, [x2, #112]
+        stp             x9,  x10, [x0, #96]
+        stp             x11, x12, [x0, #112]
+        add             x2,  x2,  x3
+        add             x0,  x0,  x1
+        b.ne            1b
+        ret
+endfunc
+
+function ff_vp9_copy64_aarch64, export=1
+1:
+        ldp             x5,  x6,  [x2]
+        ldp             x7,  x8,  [x2, #16]
+        stp             x5,  x6,  [x0]
+        ldp             x9,  x10, [x2, #32]
+        stp             x7,  x8,  [x0, #16]
+        subs            w4,  w4,  #1
+        ldp             x11, x12, [x2, #48]
+        stp             x9,  x10, [x0, #32]
+        stp             x11, x12, [x0, #48]
+        add             x2,  x2,  x3
+        add             x0,  x0,  x1
+        b.ne            1b
+        ret
+endfunc
+
+function ff_vp9_copy32_aarch64, export=1
+1:
+        ldp             x5,  x6,  [x2]
+        ldp             x7,  x8,  [x2, #16]
+        stp             x5,  x6,  [x0]
+        subs            w4,  w4,  #1
+        stp             x7,  x8,  [x0, #16]
+        add             x2,  x2,  x3
+        add             x0,  x0,  x1
+        b.ne            1b
+        ret
+endfunc
diff --git a/libavcodec/aarch64/vp9mc_neon.S b/libavcodec/aarch64/vp9mc_neon.S
index f67624ca04..abf2bae9db 100644
--- a/libavcodec/aarch64/vp9mc_neon.S
+++ b/libavcodec/aarch64/vp9mc_neon.S
@@ -25,23 +25,6 @@ 
 //                            const uint8_t *ref, ptrdiff_t ref_stride,
 //                            int h, int mx, int my);
 
-function ff_vp9_copy64_aarch64, export=1
-1:
-        ldp             x5,  x6,  [x2]
-        ldp             x7,  x8,  [x2, #16]
-        stp             x5,  x6,  [x0]
-        ldp             x9,  x10, [x2, #32]
-        stp             x7,  x8,  [x0, #16]
-        subs            w4,  w4,  #1
-        ldp             x11, x12, [x2, #48]
-        stp             x9,  x10, [x0, #32]
-        stp             x11, x12, [x0, #48]
-        add             x2,  x2,  x3
-        add             x0,  x0,  x1
-        b.ne            1b
-        ret
-endfunc
-
 function ff_vp9_avg64_neon, export=1
         mov             x5,  x0
 1:
@@ -64,19 +47,6 @@  function ff_vp9_avg64_neon, export=1
         ret
 endfunc
 
-function ff_vp9_copy32_aarch64, export=1
-1:
-        ldp             x5,  x6,  [x2]
-        ldp             x7,  x8,  [x2, #16]
-        stp             x5,  x6,  [x0]
-        subs            w4,  w4,  #1
-        stp             x7,  x8,  [x0, #16]
-        add             x2,  x2,  x3
-        add             x0,  x0,  x1
-        b.ne            1b
-        ret
-endfunc
-
 function ff_vp9_avg32_neon, export=1
 1:
         ld1             {v2.16b, v3.16b},  [x2], x3
-- 
2.24.1