diff mbox series

[FFmpeg-devel] arm64: Add Armv8.3-A PAC support to assembly files

Message ID DBBPR08MB55477152C288659E8492A8DFEF339@DBBPR08MB5547.eurprd08.prod.outlook.com
State New
Headers show
Series [FFmpeg-devel] arm64: Add Armv8.3-A PAC support to assembly files | expand

Checks

Context Check Description
yinshiyou/make_loongarch64 success Make finished
yinshiyou/make_fate_loongarch64 success Make fate finished
andriy/make_x86 success Make finished
andriy/make_fate_x86 success Make fate finished
andriy/make_ppc success Make finished
andriy/make_fate_ppc success Make fate finished
andriy/make_aarch64_jetson success Make finished
andriy/make_fate_aarch64_jetson success Make fate finished
andriy/make_armv7_RPi4 success Make finished
andriy/make_fate_armv7_RPi4 success Make fate finished

Commit Message

Andre Kempe Feb. 14, 2022, 12:28 p.m. UTC
This patch adds optional support for Arm Pointer Authentication Codes.

PAC support is turned on or off at compile time using additional
compiler flags. Unless any of these is enabled explicitly, no additional
code will be emitted at all.

Signed-off-by: André Kempe <andre.kempe@arm.com>
---
 libavcodec/aarch64/fft_neon.S          |   3 +-
 libavcodec/aarch64/mdct_neon.S         |  17 ++++-
 libavcodec/aarch64/synth_filter_neon.S |   2 +
 libavutil/aarch64/asm.S                | 102 +++++++++++++++++++++++--
 4 files changed, 113 insertions(+), 11 deletions(-)

--
2.25.1

IMPORTANT NOTICE: The contents of this email and any attachments are confidential and may also be privileged. If you are not the intended recipient, please notify the sender immediately and do not disclose the contents to any other person, use it for any purpose, or store or copy the information in any medium. Thank you.

Comments

Martin Storsjö Feb. 22, 2022, 1:53 p.m. UTC | #1
On Mon, 14 Feb 2022, Andre Kempe wrote:

> This patch adds optional support for Arm Pointer Authentication Codes.
>
> PAC support is turned on or off at compile time using additional
> compiler flags. Unless any of these is enabled explicitly, no additional
> code will be emitted at all.
>
> Signed-off-by: André Kempe <andre.kempe@arm.com>
> ---
> libavcodec/aarch64/fft_neon.S          |   3 +-
> libavcodec/aarch64/mdct_neon.S         |  17 ++++-
> libavcodec/aarch64/synth_filter_neon.S |   2 +
> libavutil/aarch64/asm.S                | 102 +++++++++++++++++++++++--
> 4 files changed, 113 insertions(+), 11 deletions(-)

LGTM, this looks generally correct and consistent to me.

But I realized one thing (which probably should be fixed both here and in 
dav1d, but which I guess we can fix separately afterwards too) - I guess 
we should only output the .note.gnu.property section if we're targeting 
ELF. (I guess that PAC/BTI aren't used on other platforms yet, but it's 
plausible that they will be, and then they won't use ELF note sections to 
signal it.)

// Martin
Martin Storsjö March 9, 2022, 1:13 p.m. UTC | #2
On Tue, 22 Feb 2022, Martin Storsjö wrote:

> On Mon, 14 Feb 2022, Andre Kempe wrote:
>
>> This patch adds optional support for Arm Pointer Authentication Codes.
>> 
>> PAC support is turned on or off at compile time using additional
>> compiler flags. Unless any of these is enabled explicitly, no additional
>> code will be emitted at all.
>> 
>> Signed-off-by: André Kempe <andre.kempe@arm.com>
>> ---
>> libavcodec/aarch64/fft_neon.S          |   3 +-
>> libavcodec/aarch64/mdct_neon.S         |  17 ++++-
>> libavcodec/aarch64/synth_filter_neon.S |   2 +
>> libavutil/aarch64/asm.S                | 102 +++++++++++++++++++++++--
>> 4 files changed, 113 insertions(+), 11 deletions(-)
>
> LGTM, this looks generally correct and consistent to me.

Pushed now.

// Martin
diff mbox series

Patch

diff --git a/libavcodec/aarch64/fft_neon.S b/libavcodec/aarch64/fft_neon.S
index b4020fc8c7..dd5f55eb87 100644
--- a/libavcodec/aarch64/fft_neon.S
+++ b/libavcodec/aarch64/fft_neon.S
@@ -340,7 +340,7 @@  endfunc

 .macro  def_fft n, n2, n4
 function fft\n\()_neon, align=6
-        AARCH64_VALID_JUMP_TARGET
+        AARCH64_SIGN_LINK_REGISTER
         sub             sp,  sp,  #16
         stp             x28, x30, [sp]
         add             x28, x0,  #\n4*2*8
@@ -351,6 +351,7 @@  function fft\n\()_neon, align=6
         bl              fft\n4\()_neon
         sub             x0,  x28, #\n4*2*8
         ldp             x28, x30, [sp], #16
+        AARCH64_VALIDATE_LINK_REGISTER
         movrel          x4,  X(ff_cos_\n)
         mov             x2,  #\n4>>1
         b               fft_pass_neon
diff --git a/libavcodec/aarch64/mdct_neon.S b/libavcodec/aarch64/mdct_neon.S
index 1fd199c972..6091e72022 100644
--- a/libavcodec/aarch64/mdct_neon.S
+++ b/libavcodec/aarch64/mdct_neon.S
@@ -25,6 +25,7 @@ 
 function ff_imdct_half_neon, export=1
         sub             sp,  sp,  #32
         stp             x19, x20, [sp]
+        AARCH64_SIGN_LINK_REGISTER
         str             x30, [sp, #16]
         mov             x12, #1
         ldr             w14, [x0, #28]          // mdct_bits
@@ -121,6 +122,7 @@  function ff_imdct_half_neon, export=1

         ldp             x19, x20, [sp]
         ldr             x30, [sp, #16]
+        AARCH64_VALIDATE_LINK_REGISTER
         add             sp,  sp,  #32

         ret
@@ -129,6 +131,7 @@  endfunc
 function ff_imdct_calc_neon, export=1
         sub             sp,  sp,  #32
         stp             x19, x20, [sp]
+        AARCH64_SIGN_LINK_REGISTER
         str             x30, [sp, #16]
         ldr             w3,  [x0, #28]          // mdct_bits
         mov             x19, #1
@@ -160,8 +163,10 @@  function ff_imdct_calc_neon, export=1
         subs            x19, x19,  #16
         b.gt            1b

-        ldp             x19, x20, [sp], #16
-        ldr             x30, [sp], #16
+        ldp             x19, x20, [sp]
+        ldr             x30, [sp, #16]
+        AARCH64_VALIDATE_LINK_REGISTER
+        add             sp,  sp,  #32

         ret
 endfunc
@@ -170,6 +175,7 @@  endfunc
 function ff_mdct_calc_neon, export=1
         sub             sp,  sp,  #32
         stp             x19, x20, [sp]
+        AARCH64_SIGN_LINK_REGISTER
         str             x30, [sp, #16]

         mov             x12, #1
@@ -317,7 +323,10 @@  function ff_mdct_calc_neon, export=1
         st2             {v4.2s,v5.2s},  [x0]
         st2             {v6.2s,v7.2s},  [x8]

-        ldp             x19, x20, [sp], #16
-        ldr             x30, [sp], #16
+        ldp             x19, x20, [sp]
+        ldr             x30, [sp, #16]
+        AARCH64_VALIDATE_LINK_REGISTER
+        add             sp,  sp,  #32
+
         ret
 endfunc
diff --git a/libavcodec/aarch64/synth_filter_neon.S b/libavcodec/aarch64/synth_filter_neon.S
index 8fcd71f252..ba79ba9686 100644
--- a/libavcodec/aarch64/synth_filter_neon.S
+++ b/libavcodec/aarch64/synth_filter_neon.S
@@ -52,6 +52,7 @@  function ff_synth_filter_float_neon, export=1
         stp             x5,  x1,  [sp, #16]
         and             x7,  x7,  #~63
         and             w8,  w8,  #511
+        AARCH64_SIGN_LINK_REGISTER
         stp             x7,  x30, [sp, #32]
         str             w8,  [x2]
         str             s0,  [sp, #48]
@@ -63,6 +64,7 @@  function ff_synth_filter_float_neon, export=1
         ldp             x2,  x4,  [sp]          // synct_buf_2, window
         ldp             x13, x9,  [sp, #16]     // out, synth_buf
         ldp             x0,  x30, [sp, #32]     // *synth_buf_offset
+        AARCH64_VALIDATE_LINK_REGISTER
         ldr             s0,  [sp, #48]

         add             x3,  x2,  #16*4         // synct_buf_2 + 16
diff --git a/libavutil/aarch64/asm.S b/libavutil/aarch64/asm.S
index bee91d59c3..b817eaab22 100644
--- a/libavutil/aarch64/asm.S
+++ b/libavutil/aarch64/asm.S
@@ -36,10 +36,79 @@ 
 #   define __has_feature(x) 0
 #endif

-/* Support macros for the Armv8.5-A Branch Target Identification feature which
- * requires emitting a .note.gnu.property section with the appropriate
- * architecture-dependent feature bits set.
- * Read more: "ELF for the Arm® 64-bit Architecture"
+
+/* Support macros for
+ *   - Armv8.3-A Pointer Authentication and
+ *   - Armv8.5-A Branch Target Identification
+ * features which require emitting a .note.gnu.property section with the
+ * appropriate architecture-dependent feature bits set.
+ *
+ * |AARCH64_SIGN_LINK_REGISTER| and |AARCH64_VALIDATE_LINK_REGISTER| expand to
+ * PACIxSP and AUTIxSP, respectively. |AARCH64_SIGN_LINK_REGISTER| should be
+ * used immediately before saving the LR register (x30) to the stack.
+ * |AARCH64_VALIDATE_LINK_REGISTER| should be used immediately after restoring
+ * it. Note |AARCH64_SIGN_LINK_REGISTER|'s modifications to LR must be undone
+ * with |AARCH64_VALIDATE_LINK_REGISTER| before RET. The SP register must also
+ * have the same value at the two points. For example:
+ *
+ *   .global f
+ *   f:
+ *     AARCH64_SIGN_LINK_REGISTER
+ *     stp x29, x30, [sp, #-96]!
+ *     mov x29, sp
+ *     ...
+ *     ldp x29, x30, [sp], #96
+ *     AARCH64_VALIDATE_LINK_REGISTER
+ *     ret
+ *
+ * |AARCH64_VALID_CALL_TARGET| expands to BTI 'c'. Either it, or
+ * |AARCH64_SIGN_LINK_REGISTER|, must be used at every point that may be an
+ * indirect call target. In particular, all symbols exported from a file must
+ * begin with one of these macros. For example, a leaf function that does not
+ * save LR can instead use |AARCH64_VALID_CALL_TARGET|:
+ *
+ *   .globl return_zero
+ *   return_zero:
+ *     AARCH64_VALID_CALL_TARGET
+ *     mov x0, #0
+ *     ret
+ *
+ * A non-leaf function which does not immediately save LR may need both macros
+ * because |AARCH64_SIGN_LINK_REGISTER| appears late. For example, the function
+ * may jump to an alternate implementation before setting up the stack:
+ *
+ *   .globl with_early_jump
+ *   with_early_jump:
+ *     AARCH64_VALID_CALL_TARGET
+ *     cmp x0, #128
+ *     b.lt .Lwith_early_jump_128
+ *     AARCH64_SIGN_LINK_REGISTER
+ *     stp x29, x30, [sp, #-96]!
+ *     mov x29, sp
+ *     ...
+ *     ldp x29, x30, [sp], #96
+ *     AARCH64_VALIDATE_LINK_REGISTER
+ *     ret
+ *
+ *  .Lwith_early_jump_128:
+ *     ...
+ *     ret
+ *
+ * These annotations are only required with indirect calls. Private symbols that
+ * are only the target of direct calls do not require annotations. Also note
+ * that |AARCH64_VALID_CALL_TARGET| is only valid for indirect calls (BLR), not
+ * indirect jumps (BR). Indirect jumps in assembly are supported through
+ * |AARCH64_VALID_JUMP_TARGET|. Landing Pads which shall serve for jumps and
+ * calls can be created using |AARCH64_VALID_JUMP_CALL_TARGET|.
+ *
+ * Although not necessary, it is safe to use these macros in 32-bit ARM
+ * assembly. This may be used to simplify dual 32-bit and 64-bit files.
+ *
+ * References:
+ * - "ELF for the Arm® 64-bit Architecture"
+ *   https: *github.com/ARM-software/abi-aa/blob/master/aaelf64/aaelf64.rst
+ * - "Providing protection for complex software"
+ *   https://developer.arm.com/architectures/learn-the-architecture/providing-protection-for-complex-software
  */
 #if defined(__ARM_FEATURE_BTI_DEFAULT) && (__ARM_FEATURE_BTI_DEFAULT == 1)
 #   define GNU_PROPERTY_AARCH64_BTI (1 << 0)   // Has BTI
@@ -51,7 +120,28 @@ 
 #   define AARCH64_VALID_JUMP_TARGET
 #endif

-#if (GNU_PROPERTY_AARCH64_BTI != 0)
+#if defined(__ARM_FEATURE_PAC_DEFAULT)
+#   if ((__ARM_FEATURE_PAC_DEFAULT & (1 << 0)) != 0) // authentication using key A
+#       define AARCH64_SIGN_LINK_REGISTER      paciasp
+#       define AARCH64_VALIDATE_LINK_REGISTER  autiasp
+#   elif ((__ARM_FEATURE_PAC_DEFAULT & (1 << 1)) != 0) // authentication using key B
+#       define AARCH64_SIGN_LINK_REGISTER      pacibsp
+#       define AARCH64_VALIDATE_LINK_REGISTER  autibsp
+#   else
+#       error Pointer authentication defines no valid key!
+#   endif
+#   if ((__ARM_FEATURE_PAC_DEFAULT & (1 << 2)) != 0)
+#       error Authentication of leaf functions is enabled but not supported in FFmpeg!
+#   endif
+#   define GNU_PROPERTY_AARCH64_PAC (1 << 1)
+#else
+#   define GNU_PROPERTY_AARCH64_PAC 0
+#   define AARCH64_SIGN_LINK_REGISTER
+#   define AARCH64_VALIDATE_LINK_REGISTER
+#endif
+
+
+#if (GNU_PROPERTY_AARCH64_BTI != 0 || GNU_PROPERTY_AARCH64_PAC != 0)
         .pushsection .note.gnu.property, "a"
         .balign 8
         .long 4
@@ -60,7 +150,7 @@ 
         .asciz "GNU"
         .long 0xc0000000 /* GNU_PROPERTY_AARCH64_FEATURE_1_AND */
         .long 4
-        .long GNU_PROPERTY_AARCH64_BTI
+        .long (GNU_PROPERTY_AARCH64_BTI | GNU_PROPERTY_AARCH64_PAC)
         .long 0
         .popsection
 #endif