diff mbox

[FFmpeg-devel,v3] avcodec/fft_template: improve performance of the ff_fft_init in fft_template

Message ID 20181226081527.82441-1-lq@chinaffmpeg.org
State Accepted
Commit eb81fd792fa88f9015b5e99e4940a464de9182a5
Headers show

Commit Message

Liu Steven Dec. 26, 2018, 8:15 a.m. UTC
Before patch:
init nbits = 17, get 10000 samples, average cost: 16175 us
After patch:
init nbits = 17, get 10000 samples, average cost: 14989 us

Signed-off-by: Steven Liu <lq@chinaffmpeg.org>
---
 libavcodec/fft_template.c | 46 +++++++++++++++++++++++++++++++++++-----------
 1 file changed, 35 insertions(+), 11 deletions(-)

Comments

Steven Liu Jan. 3, 2019, 7:01 a.m. UTC | #1
Steven Liu <lq@chinaffmpeg.org> 于2018年12月26日周三 下午4:15写道:
>
> Before patch:
> init nbits = 17, get 10000 samples, average cost: 16175 us
> After patch:
> init nbits = 17, get 10000 samples, average cost: 14989 us
>
> Signed-off-by: Steven Liu <lq@chinaffmpeg.org>
> ---
>  libavcodec/fft_template.c | 46 +++++++++++++++++++++++++++++++++++-----------
>  1 file changed, 35 insertions(+), 11 deletions(-)
>
> diff --git a/libavcodec/fft_template.c b/libavcodec/fft_template.c
> index 762c014bc8..20a62e4290 100644
> --- a/libavcodec/fft_template.c
> +++ b/libavcodec/fft_template.c
> @@ -261,17 +261,41 @@ av_cold int ff_fft_init(FFTContext *s, int nbits, int inverse)
>      if (s->fft_permutation == FF_FFT_PERM_AVX) {
>          fft_perm_avx(s);
>      } else {
> -        for(i=0; i<n; i++) {
> -            int k;
> -            j = i;
> -            if (s->fft_permutation == FF_FFT_PERM_SWAP_LSBS)
> -                j = (j&~3) | ((j>>1)&1) | ((j<<1)&2);
> -            k = -split_radix_permutation(i, n, s->inverse) & (n-1);
> -            if (s->revtab)
> -                s->revtab[k] = j;
> -            if (s->revtab32)
> -                s->revtab32[k] = j;
> -        }
> +#define PROCESS_FFT_PERM_SWAP_LSBS(num) do {\
> +    for(i = 0; i < n; i++) {\
> +        int k;\
> +        j = i;\
> +        j = (j & ~3) | ((j >> 1) & 1) | ((j << 1) & 2);\
> +        k = -split_radix_permutation(i, n, s->inverse) & (n - 1);\
> +        s->revtab##num[k] = j;\
> +    } \
> +} while(0);
> +
> +#define PROCESS_FFT_PERM_DEFAULT(num) do {\
> +    for(i = 0; i < n; i++) {\
> +        int k;\
> +        j = i;\
> +        k = -split_radix_permutation(i, n, s->inverse) & (n - 1);\
> +        s->revtab##num[k] = j;\
> +    } \
> +} while(0);
> +
> +#define SPLIT_RADIX_PERMUTATION(num) do { \
> +    if (s->fft_permutation == FF_FFT_PERM_SWAP_LSBS) {\
> +        PROCESS_FFT_PERM_SWAP_LSBS(num) \
> +    } else {\
> +        PROCESS_FFT_PERM_DEFAULT(num) \
> +    }\
> +} while(0);
> +
> +    if (s->revtab)
> +        SPLIT_RADIX_PERMUTATION()
> +    if (s->revtab32)
> +        SPLIT_RADIX_PERMUTATION(32)
> +
> +#undef PROCESS_FFT_PERM_DEFAULT
> +#undef PROCESS_FFT_PERM_SWAP_LSBS
> +#undef SPLIT_RADIX_PERMUTATION
>      }
>
>      return 0;
> --
> 2.15.2 (Apple Git-101.1)
>
>
>
> _______________________________________________
> ffmpeg-devel mailing list
> ffmpeg-devel@ffmpeg.org
> http://ffmpeg.org/mailman/listinfo/ffmpeg-devel

ping
Michael Niedermayer Jan. 3, 2019, 7:01 p.m. UTC | #2
On Wed, Dec 26, 2018 at 04:15:27PM +0800, Steven Liu wrote:
> Before patch:
> init nbits = 17, get 10000 samples, average cost: 16175 us
> After patch:
> init nbits = 17, get 10000 samples, average cost: 14989 us
> 
> Signed-off-by: Steven Liu <lq@chinaffmpeg.org>
> ---
>  libavcodec/fft_template.c | 46 +++++++++++++++++++++++++++++++++++-----------
>  1 file changed, 35 insertions(+), 11 deletions(-)

should be ok

thx

[...]
Steven Liu Jan. 4, 2019, 3:23 a.m. UTC | #3
Michael Niedermayer <michael@niedermayer.cc> 于2019年1月4日周五 上午3:01写道:
>
> On Wed, Dec 26, 2018 at 04:15:27PM +0800, Steven Liu wrote:
> > Before patch:
> > init nbits = 17, get 10000 samples, average cost: 16175 us
> > After patch:
> > init nbits = 17, get 10000 samples, average cost: 14989 us
> >
> > Signed-off-by: Steven Liu <lq@chinaffmpeg.org>
> > ---
> >  libavcodec/fft_template.c | 46 +++++++++++++++++++++++++++++++++++-----------
> >  1 file changed, 35 insertions(+), 11 deletions(-)
>
> should be ok

Pushed


Thanks
>
> thx
>
> [...]
> --
> Michael     GnuPG fingerprint: 9FF2128B147EF6730BADF133611EC787040B0FAB
>
> The real ebay dictionary, page 1
> "Used only once"    - "Some unspecified defect prevented a second use"
> "In good condition" - "Can be repaird by experienced expert"
> "As is" - "You wouldnt want it even if you were payed for it, if you knew ..."
> _______________________________________________
> ffmpeg-devel mailing list
> ffmpeg-devel@ffmpeg.org
> http://ffmpeg.org/mailman/listinfo/ffmpeg-devel
diff mbox

Patch

diff --git a/libavcodec/fft_template.c b/libavcodec/fft_template.c
index 762c014bc8..20a62e4290 100644
--- a/libavcodec/fft_template.c
+++ b/libavcodec/fft_template.c
@@ -261,17 +261,41 @@  av_cold int ff_fft_init(FFTContext *s, int nbits, int inverse)
     if (s->fft_permutation == FF_FFT_PERM_AVX) {
         fft_perm_avx(s);
     } else {
-        for(i=0; i<n; i++) {
-            int k;
-            j = i;
-            if (s->fft_permutation == FF_FFT_PERM_SWAP_LSBS)
-                j = (j&~3) | ((j>>1)&1) | ((j<<1)&2);
-            k = -split_radix_permutation(i, n, s->inverse) & (n-1);
-            if (s->revtab)
-                s->revtab[k] = j;
-            if (s->revtab32)
-                s->revtab32[k] = j;
-        }
+#define PROCESS_FFT_PERM_SWAP_LSBS(num) do {\
+    for(i = 0; i < n; i++) {\
+        int k;\
+        j = i;\
+        j = (j & ~3) | ((j >> 1) & 1) | ((j << 1) & 2);\
+        k = -split_radix_permutation(i, n, s->inverse) & (n - 1);\
+        s->revtab##num[k] = j;\
+    } \
+} while(0);
+
+#define PROCESS_FFT_PERM_DEFAULT(num) do {\
+    for(i = 0; i < n; i++) {\
+        int k;\
+        j = i;\
+        k = -split_radix_permutation(i, n, s->inverse) & (n - 1);\
+        s->revtab##num[k] = j;\
+    } \
+} while(0);
+
+#define SPLIT_RADIX_PERMUTATION(num) do { \
+    if (s->fft_permutation == FF_FFT_PERM_SWAP_LSBS) {\
+        PROCESS_FFT_PERM_SWAP_LSBS(num) \
+    } else {\
+        PROCESS_FFT_PERM_DEFAULT(num) \
+    }\
+} while(0);
+
+    if (s->revtab)
+        SPLIT_RADIX_PERMUTATION()
+    if (s->revtab32)
+        SPLIT_RADIX_PERMUTATION(32)
+
+#undef PROCESS_FFT_PERM_DEFAULT
+#undef PROCESS_FFT_PERM_SWAP_LSBS
+#undef SPLIT_RADIX_PERMUTATION
     }
 
     return 0;