diff mbox

[FFmpeg-devel] avcodec/arm/sbcenc: save callee preserved vfp registers

Message ID 20190728214606.15179-1-jcowgill@debian.org
State New
Headers show

Commit Message

James Cowgill July 28, 2019, 9:46 p.m. UTC
When compiling FFmpeg with GCC-9, some very random segfaults were
observed in code which had previously called down into the SBC encoder
NEON assembly routines. This was caused by these functions clobbering
some of the vfp callee saved registers (d8 - d15 aka q4 - q7). GCC was
using these registers to save local variables, but after these
functions returned, they would contain garbage.

Fix by saving the relevant registers on the stack in the affected
functions.

Signed-off-by: James Cowgill <jcowgill@debian.org>
---
 libavcodec/arm/sbcdsp_neon.S | 6 ++++++
 1 file changed, 6 insertions(+)

Comments

Reimar Döffinger July 29, 2019, 6:59 p.m. UTC | #1
Seems sensible to me, though extra points if you or someone has numbers on performance impact.
To know whether it would be worthwhile to check if it can be optimized...

On 28.07.2019, at 23:46, James Cowgill <jcowgill@debian.org> wrote:

> When compiling FFmpeg with GCC-9, some very random segfaults were
> observed in code which had previously called down into the SBC encoder
> NEON assembly routines. This was caused by these functions clobbering
> some of the vfp callee saved registers (d8 - d15 aka q4 - q7). GCC was
> using these registers to save local variables, but after these
> functions returned, they would contain garbage.
> 
> Fix by saving the relevant registers on the stack in the affected
> functions.
> 
> Signed-off-by: James Cowgill <jcowgill@debian.org>
> ---
> libavcodec/arm/sbcdsp_neon.S | 6 ++++++
> 1 file changed, 6 insertions(+)
> 
> diff --git a/libavcodec/arm/sbcdsp_neon.S b/libavcodec/arm/sbcdsp_neon.S
> index d83d21d202..aa03800096 100644
> --- a/libavcodec/arm/sbcdsp_neon.S
> +++ b/libavcodec/arm/sbcdsp_neon.S
> @@ -38,6 +38,8 @@ function ff_sbc_analyze_4_neon, export=1
>         /* TODO: merge even and odd cases (or even merge all four calls to this
>          * function) in order to have only aligned reads from 'in' array
>          * and reduce number of load instructions */
> +        vpush           {d8-d11}
> +
>         vld1.16         {d4, d5}, [r0, :64]!
>         vld1.16         {d8, d9}, [r2, :128]!
> 
> @@ -84,6 +86,7 @@ function ff_sbc_analyze_4_neon, export=1
> 
>         vst1.32         {d0, d1}, [r1, :128]
> 
> +        vpop            {d8-d11}
>         bx              lr
> endfunc
> 
> @@ -91,6 +94,8 @@ function ff_sbc_analyze_8_neon, export=1
>         /* TODO: merge even and odd cases (or even merge all four calls to this
>          * function) in order to have only aligned reads from 'in' array
>          * and reduce number of load instructions */
> +        vpush           {d8-d15}
> +
>         vld1.16         {d4, d5}, [r0, :64]!
>         vld1.16         {d8, d9}, [r2, :128]!
> 
> @@ -188,6 +193,7 @@ function ff_sbc_analyze_8_neon, export=1
> 
>         vst1.32         {d0, d1, d2, d3}, [r1, :128]
> 
> +        vpop            {d8-d15}
>         bx              lr
> endfunc
> 
> -- 
> 2.22.0
> 
> _______________________________________________
> ffmpeg-devel mailing list
> ffmpeg-devel@ffmpeg.org
> https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
> 
> To unsubscribe, visit link above, or email
> ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".
diff mbox

Patch

diff --git a/libavcodec/arm/sbcdsp_neon.S b/libavcodec/arm/sbcdsp_neon.S
index d83d21d202..aa03800096 100644
--- a/libavcodec/arm/sbcdsp_neon.S
+++ b/libavcodec/arm/sbcdsp_neon.S
@@ -38,6 +38,8 @@  function ff_sbc_analyze_4_neon, export=1
         /* TODO: merge even and odd cases (or even merge all four calls to this
          * function) in order to have only aligned reads from 'in' array
          * and reduce number of load instructions */
+        vpush           {d8-d11}
+
         vld1.16         {d4, d5}, [r0, :64]!
         vld1.16         {d8, d9}, [r2, :128]!
 
@@ -84,6 +86,7 @@  function ff_sbc_analyze_4_neon, export=1
 
         vst1.32         {d0, d1}, [r1, :128]
 
+        vpop            {d8-d11}
         bx              lr
 endfunc
 
@@ -91,6 +94,8 @@  function ff_sbc_analyze_8_neon, export=1
         /* TODO: merge even and odd cases (or even merge all four calls to this
          * function) in order to have only aligned reads from 'in' array
          * and reduce number of load instructions */
+        vpush           {d8-d15}
+
         vld1.16         {d4, d5}, [r0, :64]!
         vld1.16         {d8, d9}, [r2, :128]!
 
@@ -188,6 +193,7 @@  function ff_sbc_analyze_8_neon, export=1
 
         vst1.32         {d0, d1, d2, d3}, [r1, :128]
 
+        vpop            {d8-d15}
         bx              lr
 endfunc