From patchwork Wed Dec 26 08:15:27 2018 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Liu Steven X-Patchwork-Id: 11553 Return-Path: X-Original-To: patchwork@ffaux-bg.ffmpeg.org Delivered-To: patchwork@ffaux-bg.ffmpeg.org Received: from ffbox0-bg.mplayerhq.hu (ffbox0-bg.ffmpeg.org [79.124.17.100]) by ffaux.localdomain (Postfix) with ESMTP id 72B4A44C890 for ; Wed, 26 Dec 2018 10:15:45 +0200 (EET) Received: from [127.0.1.1] (localhost [127.0.0.1]) by ffbox0-bg.mplayerhq.hu (Postfix) with ESMTP id 2585768AE45; Wed, 26 Dec 2018 10:15:42 +0200 (EET) X-Original-To: ffmpeg-devel@ffmpeg.org Delivered-To: ffmpeg-devel@ffmpeg.org Received: from smtpbgeu2.qq.com (smtpbgeu2.qq.com [18.194.254.142]) by ffbox0-bg.mplayerhq.hu (Postfix) with ESMTPS id BD4CA68ABC6 for ; Wed, 26 Dec 2018 10:15:35 +0200 (EET) X-QQ-mid: bizesmtp3t1545812134trk51z26n Received: from localhost (unknown [106.2.229.242]) by esmtp4.qq.com (ESMTP) with id ; Wed, 26 Dec 2018 16:15:33 +0800 (CST) X-QQ-SSF: 01100000002000F0FNF0B00A0000000 X-QQ-FEAT: Q4mUGnBphwOPOXqBKD0u3YyGTYSx/q+3U9Qz+CxPYYoiF3GAXAwGzPtONpNma kM6hJjYwjrBK+fJiw/zdRzUzh6N0mCBhoayNC5dH0TG7EMYa8rAZqZad37/TAWaUWoJoEFE zxUSJTQvk2GApHfwzIYmAx2KeWh10Tu7XomFFs5yWEKXcAt87ILs91PRLijiWniF1/vBMau kNLqdagnebONs2UH9Ut+8GBPOaj+SVS/NOffe6GD+sm2AGLOAPAyPCcLKe0133KIZ8reRaL w/gzyz2aE5iLgW7W5ZaQNhcz4voTOFvqaoJH1FK3xYobh/ X-QQ-GoodBg: 0 From: Steven Liu To: ffmpeg-devel@ffmpeg.org Date: Wed, 26 Dec 2018 16:15:27 +0800 Message-Id: <20181226081527.82441-1-lq@chinaffmpeg.org> X-Mailer: git-send-email 2.15.2 (Apple Git-101.1) In-Reply-To: References: X-QQ-SENDSIZE: 520 Feedback-ID: bizesmtp:chinaffmpeg.org:qybgforeign:qybgforeign1 X-QQ-Bgrelay: 1 Subject: [FFmpeg-devel] [PATCH v3] avcodec/fft_template: improve performance of the ff_fft_init in fft_template X-BeenThere: ffmpeg-devel@ffmpeg.org X-Mailman-Version: 2.1.20 Precedence: list List-Id: FFmpeg development discussions and patches List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , Reply-To: FFmpeg development discussions and patches Cc: Steven Liu MIME-Version: 1.0 Errors-To: ffmpeg-devel-bounces@ffmpeg.org Sender: "ffmpeg-devel" Before patch: init nbits = 17, get 10000 samples, average cost: 16175 us After patch: init nbits = 17, get 10000 samples, average cost: 14989 us Signed-off-by: Steven Liu --- libavcodec/fft_template.c | 46 +++++++++++++++++++++++++++++++++++----------- 1 file changed, 35 insertions(+), 11 deletions(-) diff --git a/libavcodec/fft_template.c b/libavcodec/fft_template.c index 762c014bc8..20a62e4290 100644 --- a/libavcodec/fft_template.c +++ b/libavcodec/fft_template.c @@ -261,17 +261,41 @@ av_cold int ff_fft_init(FFTContext *s, int nbits, int inverse) if (s->fft_permutation == FF_FFT_PERM_AVX) { fft_perm_avx(s); } else { - for(i=0; ifft_permutation == FF_FFT_PERM_SWAP_LSBS) - j = (j&~3) | ((j>>1)&1) | ((j<<1)&2); - k = -split_radix_permutation(i, n, s->inverse) & (n-1); - if (s->revtab) - s->revtab[k] = j; - if (s->revtab32) - s->revtab32[k] = j; - } +#define PROCESS_FFT_PERM_SWAP_LSBS(num) do {\ + for(i = 0; i < n; i++) {\ + int k;\ + j = i;\ + j = (j & ~3) | ((j >> 1) & 1) | ((j << 1) & 2);\ + k = -split_radix_permutation(i, n, s->inverse) & (n - 1);\ + s->revtab##num[k] = j;\ + } \ +} while(0); + +#define PROCESS_FFT_PERM_DEFAULT(num) do {\ + for(i = 0; i < n; i++) {\ + int k;\ + j = i;\ + k = -split_radix_permutation(i, n, s->inverse) & (n - 1);\ + s->revtab##num[k] = j;\ + } \ +} while(0); + +#define SPLIT_RADIX_PERMUTATION(num) do { \ + if (s->fft_permutation == FF_FFT_PERM_SWAP_LSBS) {\ + PROCESS_FFT_PERM_SWAP_LSBS(num) \ + } else {\ + PROCESS_FFT_PERM_DEFAULT(num) \ + }\ +} while(0); + + if (s->revtab) + SPLIT_RADIX_PERMUTATION() + if (s->revtab32) + SPLIT_RADIX_PERMUTATION(32) + +#undef PROCESS_FFT_PERM_DEFAULT +#undef PROCESS_FFT_PERM_SWAP_LSBS +#undef SPLIT_RADIX_PERMUTATION } return 0;