diff mbox series

[FFmpeg-devel] avfft: avoid overreads with RDFT API users

Message ID NqDuJcZ--7-9@lynne.ee
State New
Headers show
Series [FFmpeg-devel] avfft: avoid overreads with RDFT API users | expand

Checks

Context Check Description
yinshiyou/make_loongarch64 success Make finished
yinshiyou/make_fate_loongarch64 success Make fate finished
andriy/make_x86 success Make finished
andriy/make_fate_x86 success Make fate finished

Commit Message

Lynne Feb. 9, 2024, 5:27 p.m. UTC
The new API requires an extra array member at the very end,
which old API users did not do.

This disables in-place RDFT transforms and instead
does the transform out of place by copying once, there shouldn't
be a significant loss of speed as our in-place FFT requires a reorder
which is likely more expensive in the majority of cases to do.

Patch attached.

Comments

Matthieu Bouron Feb. 9, 2024, 7:18 p.m. UTC | #1
On Fri, Feb 09, 2024 at 06:27:50PM +0100, Lynne wrote:
> The new API requires an extra array member at the very end,
> which old API users did not do.
> 
> This disables in-place RDFT transforms and instead
> does the transform out of place by copying once, there shouldn't
> be a significant loss of speed as our in-place FFT requires a reorder
> which is likely more expensive in the majority of cases to do.
> 
> Patch attached.
> 

> From 763f2e8941dc3dd4282ad42574bd8d451a256a53 Mon Sep 17 00:00:00 2001
> From: Lynne <dev@lynne.ee>
> Date: Fri, 9 Feb 2024 18:17:54 +0100
> Subject: [PATCH] avfft: avoid overreads with RDFT API users
> 
> The new API requires an extra array member at the very end,
> which old API users did not do.
> 
> This disables in-place RDFT transforms and instead
> does the transform out of place by copying once, there shouldn't
> be a significant loss of speed as our in-place FFT requires a reorder
> which is likely more expensive in the majority of cases to do.
> ---
>  libavcodec/avfft.c | 31 +++++++++++++++++++++++++------
>  1 file changed, 25 insertions(+), 6 deletions(-)
> 
> diff --git a/libavcodec/avfft.c b/libavcodec/avfft.c
> index 999b5ed79a..485f216427 100644
> --- a/libavcodec/avfft.c
> +++ b/libavcodec/avfft.c
> @@ -152,7 +152,7 @@ RDFTContext *av_rdft_init(int nbits, enum RDFTransformType trans)
>          return NULL;
>  
>      ret = av_tx_init(&s->ctx, &s->fn, AV_TX_FLOAT_RDFT, trans == IDFT_C2R,
> -                     1 << nbits, &scale, AV_TX_INPLACE);
> +                     1 << nbits, &scale, 0x0);
>      if (ret < 0) {
>          av_free(s);
>          return NULL;
> @@ -162,17 +162,35 @@ RDFTContext *av_rdft_init(int nbits, enum RDFTransformType trans)
>      s->len = 1 << nbits;
>      s->inv = trans == IDFT_C2R;
>  
> +    s->tmp = av_malloc((s->len + 2)*sizeof(float));
> +    if (!s->tmp) {
> +        av_tx_uninit(&s->ctx);
> +        av_free(s);
> +        return NULL;
> +    }
> +
>      return (RDFTContext *)s;
>  }
>  
>  void av_rdft_calc(RDFTContext *s, FFTSample *data)
>  {
>      AVTXWrapper *w = (AVTXWrapper *)s;
> -    if (w->inv)
> -        FFSWAP(float, data[1], data[w->len]);
> -    w->fn(w->ctx, data, (void *)data, w->stride);
> -    if (!w->inv)
> -        FFSWAP(float, data[1], data[w->len]);
> +    float *src = w->inv ? w->tmp : (float *)data;
> +    float *dst = w->inv ? (float *)data : w->tmp;
> +
> +    if (w->inv) {
> +        memcpy(src, data, w->len*sizeof(float));
> +
> +        src[w->len] = src[1];
> +        src[1] = 0.0f;
> +    }
> +
> +    w->fn(w->ctx, dst, (void *)src, w->stride);
> +
> +    if (!w->inv) {
> +        dst[1] = dst[w->len];
> +        memcpy(data, dst, w->len*sizeof(float));
> +    }
>  }
>  
>  av_cold void av_rdft_end(RDFTContext *s)
> @@ -180,6 +198,7 @@ av_cold void av_rdft_end(RDFTContext *s)
>      if (s) {
>          AVTXWrapper *w = (AVTXWrapper *)s;
>          av_tx_uninit(&w->ctx);
> +        av_free(&w->tmp);

av_free(w->tmp);

>          av_free(w);
>      }
>  }
> -- 
> 2.43.0.381.gb435a96ce8
> 

Hi,

I tested the patch with the av_free() adjustment and it fixed the crash /
memory corruption I was encountering on macOS.

Thanks,
Matthieu
Lynne Feb. 9, 2024, 7:30 p.m. UTC | #2
Feb 9, 2024, 20:19 by matthieu.bouron@gmail.com:

> On Fri, Feb 09, 2024 at 06:27:50PM +0100, Lynne wrote:
>
>> The new API requires an extra array member at the very end,
>> which old API users did not do.
>>
>> This disables in-place RDFT transforms and instead
>> does the transform out of place by copying once, there shouldn't
>> be a significant loss of speed as our in-place FFT requires a reorder
>> which is likely more expensive in the majority of cases to do.
>>
>> Patch attached.
>>
>> From 763f2e8941dc3dd4282ad42574bd8d451a256a53 Mon Sep 17 00:00:00 2001
>> From: Lynne <dev@lynne.ee>
>> Date: Fri, 9 Feb 2024 18:17:54 +0100
>> Subject: [PATCH] avfft: avoid overreads with RDFT API users
>>
>> The new API requires an extra array member at the very end,
>> which old API users did not do.
>>
>> This disables in-place RDFT transforms and instead
>> does the transform out of place by copying once, there shouldn't
>> be a significant loss of speed as our in-place FFT requires a reorder
>> which is likely more expensive in the majority of cases to do.
>> ---
>>  libavcodec/avfft.c | 31 +++++++++++++++++++++++++------
>>  1 file changed, 25 insertions(+), 6 deletions(-)
>>
>> diff --git a/libavcodec/avfft.c b/libavcodec/avfft.c
>> index 999b5ed79a..485f216427 100644
>> --- a/libavcodec/avfft.c
>> +++ b/libavcodec/avfft.c
>> @@ -152,7 +152,7 @@ RDFTContext *av_rdft_init(int nbits, enum RDFTransformType trans)
>>  return NULL;
>>  
>>  ret = av_tx_init(&s->ctx, &s->fn, AV_TX_FLOAT_RDFT, trans == IDFT_C2R,
>> -                     1 << nbits, &scale, AV_TX_INPLACE);
>> +                     1 << nbits, &scale, 0x0);
>>  if (ret < 0) {
>>  av_free(s);
>>  return NULL;
>> @@ -162,17 +162,35 @@ RDFTContext *av_rdft_init(int nbits, enum RDFTransformType trans)
>>  s->len = 1 << nbits;
>>  s->inv = trans == IDFT_C2R;
>>  
>> +    s->tmp = av_malloc((s->len + 2)*sizeof(float));
>> +    if (!s->tmp) {
>> +        av_tx_uninit(&s->ctx);
>> +        av_free(s);
>> +        return NULL;
>> +    }
>> +
>>  return (RDFTContext *)s;
>>  }
>>  
>>  void av_rdft_calc(RDFTContext *s, FFTSample *data)
>>  {
>>  AVTXWrapper *w = (AVTXWrapper *)s;
>> -    if (w->inv)
>> -        FFSWAP(float, data[1], data[w->len]);
>> -    w->fn(w->ctx, data, (void *)data, w->stride);
>> -    if (!w->inv)
>> -        FFSWAP(float, data[1], data[w->len]);
>> +    float *src = w->inv ? w->tmp : (float *)data;
>> +    float *dst = w->inv ? (float *)data : w->tmp;
>> +
>> +    if (w->inv) {
>> +        memcpy(src, data, w->len*sizeof(float));
>> +
>> +        src[w->len] = src[1];
>> +        src[1] = 0.0f;
>> +    }
>> +
>> +    w->fn(w->ctx, dst, (void *)src, w->stride);
>> +
>> +    if (!w->inv) {
>> +        dst[1] = dst[w->len];
>> +        memcpy(data, dst, w->len*sizeof(float));
>> +    }
>>  }
>>  
>>  av_cold void av_rdft_end(RDFTContext *s)
>> @@ -180,6 +198,7 @@ av_cold void av_rdft_end(RDFTContext *s)
>>  if (s) {
>>  AVTXWrapper *w = (AVTXWrapper *)s;
>>  av_tx_uninit(&w->ctx);
>> +        av_free(&w->tmp);
>>
>
> av_free(w->tmp);
>

Yup, discussed on IRC.


>> av_free(w);
>>  }
>>  }
>> -- 
>> 2.43.0.381.gb435a96ce8
>>
>
> Hi,
>
> I tested the patch with the av_free() adjustment and it fixed the crash /
> memory corruption I was encountering on macOS.
>

Thanks, will push soon.
diff mbox series

Patch

From 763f2e8941dc3dd4282ad42574bd8d451a256a53 Mon Sep 17 00:00:00 2001
From: Lynne <dev@lynne.ee>
Date: Fri, 9 Feb 2024 18:17:54 +0100
Subject: [PATCH] avfft: avoid overreads with RDFT API users

The new API requires an extra array member at the very end,
which old API users did not do.

This disables in-place RDFT transforms and instead
does the transform out of place by copying once, there shouldn't
be a significant loss of speed as our in-place FFT requires a reorder
which is likely more expensive in the majority of cases to do.
---
 libavcodec/avfft.c | 31 +++++++++++++++++++++++++------
 1 file changed, 25 insertions(+), 6 deletions(-)

diff --git a/libavcodec/avfft.c b/libavcodec/avfft.c
index 999b5ed79a..485f216427 100644
--- a/libavcodec/avfft.c
+++ b/libavcodec/avfft.c
@@ -152,7 +152,7 @@  RDFTContext *av_rdft_init(int nbits, enum RDFTransformType trans)
         return NULL;
 
     ret = av_tx_init(&s->ctx, &s->fn, AV_TX_FLOAT_RDFT, trans == IDFT_C2R,
-                     1 << nbits, &scale, AV_TX_INPLACE);
+                     1 << nbits, &scale, 0x0);
     if (ret < 0) {
         av_free(s);
         return NULL;
@@ -162,17 +162,35 @@  RDFTContext *av_rdft_init(int nbits, enum RDFTransformType trans)
     s->len = 1 << nbits;
     s->inv = trans == IDFT_C2R;
 
+    s->tmp = av_malloc((s->len + 2)*sizeof(float));
+    if (!s->tmp) {
+        av_tx_uninit(&s->ctx);
+        av_free(s);
+        return NULL;
+    }
+
     return (RDFTContext *)s;
 }
 
 void av_rdft_calc(RDFTContext *s, FFTSample *data)
 {
     AVTXWrapper *w = (AVTXWrapper *)s;
-    if (w->inv)
-        FFSWAP(float, data[1], data[w->len]);
-    w->fn(w->ctx, data, (void *)data, w->stride);
-    if (!w->inv)
-        FFSWAP(float, data[1], data[w->len]);
+    float *src = w->inv ? w->tmp : (float *)data;
+    float *dst = w->inv ? (float *)data : w->tmp;
+
+    if (w->inv) {
+        memcpy(src, data, w->len*sizeof(float));
+
+        src[w->len] = src[1];
+        src[1] = 0.0f;
+    }
+
+    w->fn(w->ctx, dst, (void *)src, w->stride);
+
+    if (!w->inv) {
+        dst[1] = dst[w->len];
+        memcpy(data, dst, w->len*sizeof(float));
+    }
 }
 
 av_cold void av_rdft_end(RDFTContext *s)
@@ -180,6 +198,7 @@  av_cold void av_rdft_end(RDFTContext *s)
     if (s) {
         AVTXWrapper *w = (AVTXWrapper *)s;
         av_tx_uninit(&w->ctx);
+        av_free(&w->tmp);
         av_free(w);
     }
 }
-- 
2.43.0.381.gb435a96ce8