diff mbox

[FFmpeg-devel] libavcodec/fft_template: improve performance of the ff_fft_init in fft_template

Message ID 20181217091305.50957-1-lq@chinaffmpeg.org
State New
Headers show

Commit Message

Liu Steven Dec. 17, 2018, 9:13 a.m. UTC
After patch:
init nbits = 17, get 10000 samples, duration: 15221
Before patch:
init nbits = 17, get 10000 samples, duration: 16105

test script:
DURATION=0
for((i=0;i<10000;i++)) do
./libavcodec/tests/fft -n 17 &>output
T_DURATION=`grep "duration" output | awk -F"=" '{ print $2}'`
DURATION=`expr $DURATION + $T_DURATION`
done
TOTAL=`expr $DURATION / 10000`
echo $TOTAL

Signed-off-by: Steven Liu <lq@chinaffmpeg.org>
---
 libavcodec/fft_template.c | 27 +++++++++++++++++----------
 1 file changed, 17 insertions(+), 10 deletions(-)

Comments

Carl Eugen Hoyos Dec. 17, 2018, 2:05 p.m. UTC | #1
2018-12-17 10:13 GMT+01:00, Steven Liu <lq@chinaffmpeg.org>:
> After patch:
> init nbits = 17, get 10000 samples, duration: 15221
> Before patch:
> init nbits = 17, get 10000 samples, duration: 16105
>
> test script:
> DURATION=0
> for((i=0;i<10000;i++)) do
> ./libavcodec/tests/fft -n 17 &>output
> T_DURATION=`grep "duration" output | awk -F"=" '{ print $2}'`
> DURATION=`expr $DURATION + $T_DURATION`
> done
> TOTAL=`expr $DURATION / 10000`
> echo $TOTAL

This script does not allow to reproduce afaict.
(There is no "duration" here in output)

Carl Eugen
Liu Steven Dec. 17, 2018, 2:56 p.m. UTC | #2
> On Dec 17, 2018, at 22:05, Carl Eugen Hoyos <ceffmpeg@gmail.com> wrote:
> 
> 2018-12-17 10:13 GMT+01:00, Steven Liu <lq@chinaffmpeg.org>:
>> After patch:
>> init nbits = 17, get 10000 samples, duration: 15221
>> Before patch:
>> init nbits = 17, get 10000 samples, duration: 16105
>> 
>> test script:
>> DURATION=0
>> for((i=0;i<10000;i++)) do
>> ./libavcodec/tests/fft -n 17 &>output
>> T_DURATION=`grep "duration" output | awk -F"=" '{ print $2}'`
>> DURATION=`expr $DURATION + $T_DURATION`
>> done
>> TOTAL=`expr $DURATION / 10000`
>> echo $TOTAL
> 
> This script does not allow to reproduce afaict.
> (There is no "duration" here in output)
Do you mean i should commit 2/3 patch of the libavcodec/tests/fft.c modify?
or just upload the modify diff here?
> 
> Carl Eugen
> _______________________________________________
> ffmpeg-devel mailing list
> ffmpeg-devel@ffmpeg.org
> http://ffmpeg.org/mailman/listinfo/ffmpeg-devel

Thanks
Steven
Carl Eugen Hoyos Dec. 17, 2018, 7:35 p.m. UTC | #3
2018-12-17 15:56 GMT+01:00, Steven Liu <lq@chinaffmpeg.org>:
>
>
>> On Dec 17, 2018, at 22:05, Carl Eugen Hoyos <ceffmpeg@gmail.com> wrote:
>>
>> 2018-12-17 10:13 GMT+01:00, Steven Liu <lq@chinaffmpeg.org>:
>>> After patch:
>>> init nbits = 17, get 10000 samples, duration: 15221
>>> Before patch:
>>> init nbits = 17, get 10000 samples, duration: 16105
>>>
>>> test script:
>>> DURATION=0
>>> for((i=0;i<10000;i++)) do
>>> ./libavcodec/tests/fft -n 17 &>output
>>> T_DURATION=`grep "duration" output | awk -F"=" '{ print $2}'`
>>> DURATION=`expr $DURATION + $T_DURATION`
>>> done
>>> TOTAL=`expr $DURATION / 10000`
>>> echo $TOTAL
>>
>> This script does not allow to reproduce afaict.
>> (There is no "duration" here in output)
>
> Do you mean i should commit 2/3 patch of the libavcodec/tests/fft.c modify?
> or just upload the modify diff here?

I believe your commit message should not contain a script
that cannot be used to reproduce your findings.

Carl Eugen
Michael Niedermayer Dec. 17, 2018, 11:24 p.m. UTC | #4
On Mon, Dec 17, 2018 at 05:13:05PM +0800, Steven Liu wrote:
> After patch:
> init nbits = 17, get 10000 samples, duration: 15221
> Before patch:
> init nbits = 17, get 10000 samples, duration: 16105
> 
> test script:
> DURATION=0
> for((i=0;i<10000;i++)) do
> ./libavcodec/tests/fft -n 17 &>output
> T_DURATION=`grep "duration" output | awk -F"=" '{ print $2}'`
> DURATION=`expr $DURATION + $T_DURATION`
> done
> TOTAL=`expr $DURATION / 10000`
> echo $TOTAL
> 
> Signed-off-by: Steven Liu <lq@chinaffmpeg.org>
> ---
>  libavcodec/fft_template.c | 27 +++++++++++++++++----------
>  1 file changed, 17 insertions(+), 10 deletions(-)

this breaks fate
make: *** [fate-unknown_layout-ac3] Error 1
make: *** [fate-acodec-dca] Error 1
make: *** [fate-acodec-dca2] Error 1
make: *** [fate-lavf-rm] Error 1
make: *** [fate-ac3-fixed-2.0] Error 1
make: *** [fate-ac3-fixed-4.0-downmix-mono] Error 1
make: *** [fate-ac3-fixed-5.1-downmix-stereo] Error 1
make: *** [fate-ac3-fixed-5.1-downmix-mono] Error 1
make: *** [fate-ac3-fixed-encode] Error 1
make: *** [fate-atrac1-1] Error 1
make: *** [fate-atrac3p-2] Error 1
make: *** [fate-atrac3p-1] Error 1
make: *** [fate-opus-testvector01] Error 1
make: *** [fate-opus-testvector06] Error 1
make: *** [fate-opus-testvector05] Error 1
make: *** [fate-opus-testvector11] Error 1
make: *** [fate-opus-testvector08] Error 1
make: *** [fate-opus-tron.6ch.tinypkts] Error 1
make: *** [fate-opus-testvector07] Error 1
make: *** [fate-opus-testvector09] Error 1
make: *** [fate-opus-testvector10] Error 1
make: *** [fate-prores-gray] Error 1
make: *** [fate-aac-al04sf_48] Error 1
make: *** [fate-aac-fixed-al05_44] Error 1
make: *** [fate-aac-er_eld2100np_48_ep0] Error 1
make: *** [fate-aac-fixed-al06_44] Error 1
make: *** [fate-aac-fixed-al04_44] Error 1
make: *** [fate-aac-fixed-al15_44] Error 1
make: *** [fate-aac-fixed-al17_44] Error 1
make: *** [fate-aac-fixed-er_ad6000np_44_ep0] Error 1
make: *** [fate-aac-fixed-al18_44] Error 1
make: *** [fate-aac-fixed-ap05_48] Error 1
make: *** [fate-aac-fixed-al_sbr_hq_cm_48_2] Error 1
make: *** [fate-aac-fixed-er_eld1001np_44_ep0] Error 1
make: *** [fate-aac-fixed-er_eld2000np_48_ep0] Error 1
make: *** [fate-aac-fixed-al_sbr_hq_sr_48_2_fsaac48] Error 1
make: *** [fate-aac-fixed-al_sbr_hq_cm_48_5.1] Error 1
make: *** [fate-dca-core_51_24_48_768_1] Error 1
make: *** [fate-dca-core_51_24_48_768_0] Error 1
make: *** [fate-dca-core_51_24_48_768_1-dmix_2] Error 1
make: *** [fate-dca-xxch_71_24_48_2046] Error 1
make: *** [fate-dca-xbr_xxch_71_24_48_3840] Error 1
make: *** [fate-dca-xch_61_24_48_768-dmix_6] Error 1
make: *** [fate-dca-xbr_xch_61_24_48_3840] Error 1
make: *** [fate-dca-xch_61_24_48_768] Error 1
make: *** [fate-dca-xbr_51_24_48_3840] Error 1
make: *** [fate-dts_es] Error 1
make: *** [fate-dca-core] Error 1
...

[...]
diff mbox

Patch

diff --git a/libavcodec/fft_template.c b/libavcodec/fft_template.c
index 762c014bc8..5f6f52275d 100644
--- a/libavcodec/fft_template.c
+++ b/libavcodec/fft_template.c
@@ -257,21 +257,28 @@  av_cold int ff_fft_init(FFTContext *s, int nbits, int inverse)
     }
 #endif /* FFT_FIXED_32 */
 
+#define SPLIT_RADIX_PERMUTATION(num) \
+    for(i=0; i<n; i++) { \
+        int k; \
+        j = i; \
+        if (s->fft_permutation == FF_FFT_PERM_SWAP_LSBS) \
+            j = (j&~3) | ((j>>1)&1) | ((j<<1)&2); \
+        k = -split_radix_permutation(i, n, s->inverse) & (n-1); \
+        j = (j&~3) | ((j>>1)&1) | ((j<<1)&2); \
+        k = -split_radix_permutation(i, n, s->inverse) & (n-1); \
+        s->revtab##num[k] = j; \
+    }
 
     if (s->fft_permutation == FF_FFT_PERM_AVX) {
         fft_perm_avx(s);
     } else {
-        for(i=0; i<n; i++) {
-            int k;
-            j = i;
-            if (s->fft_permutation == FF_FFT_PERM_SWAP_LSBS)
-                j = (j&~3) | ((j>>1)&1) | ((j<<1)&2);
-            k = -split_radix_permutation(i, n, s->inverse) & (n-1);
-            if (s->revtab)
-                s->revtab[k] = j;
-            if (s->revtab32)
-                s->revtab32[k] = j;
+        if (s->revtab) {
+            SPLIT_RADIX_PERMUTATION()
+        }
+        if (s->revtab32) {
+            SPLIT_RADIX_PERMUTATION(32)
         }
+#undef SPLIT_RADIX_PERMUTATION
     }
 
     return 0;