From patchwork Tue Jan 12 07:18:35 2021 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Lynne X-Patchwork-Id: 24916 Return-Path: X-Original-To: patchwork@ffaux-bg.ffmpeg.org Delivered-To: patchwork@ffaux-bg.ffmpeg.org Received: from ffbox0-bg.mplayerhq.hu (ffbox0-bg.ffmpeg.org [79.124.17.100]) by ffaux.localdomain (Postfix) with ESMTP id 899E044B458 for ; Tue, 12 Jan 2021 09:18:44 +0200 (EET) Received: from [127.0.1.1] (localhost [127.0.0.1]) by ffbox0-bg.mplayerhq.hu (Postfix) with ESMTP id 52AC668ABB9; Tue, 12 Jan 2021 09:18:44 +0200 (EET) X-Original-To: ffmpeg-devel@ffmpeg.org Delivered-To: ffmpeg-devel@ffmpeg.org Received: from w4.tutanota.de (w4.tutanota.de [81.3.6.165]) by ffbox0-bg.mplayerhq.hu (Postfix) with ESMTPS id 178676881E8 for ; Tue, 12 Jan 2021 09:18:37 +0200 (EET) Received: from w3.tutanota.de (unknown [192.168.1.164]) by w4.tutanota.de (Postfix) with ESMTP id 27AB410602CF for ; Tue, 12 Jan 2021 07:18:37 +0000 (UTC) DKIM-Signature: v=1; a=rsa-sha256; q=dns/txt; c=relaxed/relaxed; t=1610435915; s=s1; d=lynne.ee; h=From:From:To:To:Subject:Subject:Content-Description:Content-ID:Content-Type:Content-Type:Content-Transfer-Encoding:Cc:Date:Date:In-Reply-To:MIME-Version:MIME-Version:Message-ID:Message-ID:Reply-To:References:Sender; bh=nLalv5ibuhEuy9fxvHtO7g9uG1lr8qamgvQek6NzgC0=; b=hZUb35F/WCdRdLWr2P6U6YdjUGkMJxTxq43kTANGN6M6E1KUOLTyt6tCqHaOVb/L rdxko0CkzM5yT2sPLVCfKWt2Ghks27mz7SV5cGrLWkagvDAygfKLUSV0qTs75P9C34s L/l3l5MrDQBaTTGtd29wpeRc7k3lD/hIXufInCzUnGxbGujntihG3Ob5//58s9QMDxN E8NccMSDmlYVPrE9Nnwl4/g2aEP5BjYWVQNPT64xHFMSeEMg9BF3enigZZgF/y11pvp pmWibOyVHioE6EZKaDahGWe50BuKl6fO6Azeor2459GVLXyvpr232pkpV+FtGqUwNuM ImAFUQvtcg== Date: Tue, 12 Jan 2021 08:18:35 +0100 (CET) From: Lynne To: Ffmpeg Devel Message-ID: MIME-Version: 1.0 Subject: [FFmpeg-devel] [PATCH] lavu: support arbitrary-point FFT and all even (i)MDCT transforms X-BeenThere: ffmpeg-devel@ffmpeg.org X-Mailman-Version: 2.1.20 Precedence: list List-Id: FFmpeg development discussions and patches List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , Reply-To: FFmpeg development discussions and patches Errors-To: ffmpeg-devel-bounces@ffmpeg.org Sender: "ffmpeg-devel" This patch adds support for arbitrary-point FFTs and all even MDCT transforms. Odd MDCTs are not supported yet as they're based on the DCT-II and DCT-III and they're very niche. With this we can now write tests. Patch attached. Subject: [PATCH] lavu: support arbitrary-point FFT and all even (i)MDCT transforms This patch adds support for arbitrary-point FFTs and all even MDCT transforms. Odd MDCTs are not supported yet as they're based on the DCT-II and DCT-III and they're very niche. With this we can now write tests. --- libavutil/tx.h | 5 +- libavutil/tx_priv.h | 3 ++ libavutil/tx_template.c | 101 +++++++++++++++++++++++++++++++++++++--- 3 files changed, 101 insertions(+), 8 deletions(-) diff --git a/libavutil/tx.h b/libavutil/tx.h index 418e8ec1ed..f49eb8c4c7 100644 --- a/libavutil/tx.h +++ b/libavutil/tx.h @@ -51,6 +51,8 @@ enum AVTXType { * For inverse transforms, the stride specifies the spacing between each * sample in the input array in bytes. The output will be a flat array. * Stride must be a non-zero multiple of sizeof(float). + * NOTE: the inverse transform is half-length, meaning the output will not + * contain redundant data. This is what most codecs work with. */ AV_TX_FLOAT_MDCT = 1, /** @@ -93,8 +95,7 @@ typedef void (*av_tx_fn)(AVTXContext *s, void *out, void *in, ptrdiff_t stride); /** * Initialize a transform context with the given configuration - * Currently power of two lengths from 2 to 131072 are supported, along with - * any length decomposable to a power of two and either 3, 5 or 15. + * (i)MDCTs with an odd length are currently not supported. * * @param ctx the context to allocate, will be NULL on error * @param tx pointer to the transform function pointer to set diff --git a/libavutil/tx_priv.h b/libavutil/tx_priv.h index 0ace3e90dc..18a07c312c 100644 --- a/libavutil/tx_priv.h +++ b/libavutil/tx_priv.h @@ -58,6 +58,7 @@ typedef void FFTComplex; (dim) = (are) * (bim) - (aim) * (bre); \ } while (0) +#define UNSCALE(x) (x) #define RESCALE(x) (x) #define FOLD(a, b) ((a) + (b)) @@ -85,6 +86,7 @@ typedef void FFTComplex; (dim) = (int)(((accu) + 0x40000000) >> 31); \ } while (0) +#define UNSCALE(x) ((double)x/2147483648.0) #define RESCALE(x) (av_clip64(lrintf((x) * 2147483648.0), INT32_MIN, INT32_MAX)) #define FOLD(x, y) ((int)((x) + (unsigned)(y) + 32) >> 6) @@ -108,6 +110,7 @@ struct AVTXContext { int m; /* Ptwo part */ int inv; /* Is inverted */ int type; /* Type */ + double scale; /* Scale */ FFTComplex *exptab; /* MDCT exptab */ FFTComplex *tmp; /* Temporary buffer needed for all compound transforms */ diff --git a/libavutil/tx_template.c b/libavutil/tx_template.c index 7f4ca2f31e..a91b8f900c 100644 --- a/libavutil/tx_template.c +++ b/libavutil/tx_template.c @@ -397,6 +397,31 @@ static void monolithic_fft(AVTXContext *s, void *_out, void *_in, fft_dispatch[mb](out); } +static void naive_fft(AVTXContext *s, void *_out, void *_in, + ptrdiff_t stride) +{ + FFTComplex *in = _in; + FFTComplex *out = _out; + const int n = s->n; + double phase = s->inv ? 2.0*M_PI/n : -2.0*M_PI/n; + + for(int i = 0; i < n; i++) { + FFTComplex tmp = { 0 }; + for(int j = 0; j < n; j++) { + const double factor = phase*i*j; + const FFTComplex mult = { + RESCALE(cos(factor)), + RESCALE(sin(factor)), + }; + FFTComplex res; + CMUL3(res, in[j], mult); + tmp.re += res.re; + tmp.im += res.im; + } + out[i] = tmp; + } +} + #define DECL_COMP_IMDCT(N) \ static void compound_imdct_##N##xM(AVTXContext *s, void *_dst, void *_src, \ ptrdiff_t stride) \ @@ -553,6 +578,57 @@ static void monolithic_mdct(AVTXContext *s, void *_dst, void *_src, } } +static void naive_imdct(AVTXContext *s, void *_dst, void *_src, + ptrdiff_t stride) +{ + int len = s->n; + int len2 = len*2; + FFTSample *src = _src; + FFTSample *dst = _dst; + double scale = s->scale; + const double phase = M_PI/(4.0*len2); + + stride /= sizeof(*src); + + for (int i = 0; i < len; i++) { + double sum_d = 0.0; + double sum_u = 0.0; + double i_d = phase * (4*len - 2*i - 1); + double i_u = phase * (3*len2 + 2*i + 1); + for (int j = 0; j < len2; j++) { + double a = (2 * j + 1); + double a_d = cos(a * i_d); + double a_u = cos(a * i_u); + double val = UNSCALE(src[j*stride]); + sum_d += a_d * val; + sum_u += a_u * val; + } + dst[i + 0] = RESCALE( sum_d*scale); + dst[i + len] = RESCALE(-sum_u*scale); + } +} + +static void naive_mdct(AVTXContext *s, void *_dst, void *_src, + ptrdiff_t stride) +{ + int len = s->n*2; + FFTSample *src = _src; + FFTSample *dst = _dst; + double scale = s->scale; + const double phase = M_PI/(4.0*len); + + stride /= sizeof(*dst); + + for (int i = 0; i < len; i++) { + double sum = 0.0; + for (int j = 0; j < len*2; j++) { + int a = (2*j + 1 + len) * (2*i + 1); + sum += UNSCALE(src[j]) * cos(a * phase); + } + dst[i*stride] = RESCALE(sum*scale); + } +} + static int gen_mdct_exptab(AVTXContext *s, int len4, double scale) { const double theta = (scale < 0 ? len4 : 0) + 1.0/8.0; @@ -575,11 +651,13 @@ int TX_NAME(ff_tx_init_mdct_fft)(AVTXContext *s, av_tx_fn *tx, const void *scale, uint64_t flags) { const int is_mdct = ff_tx_type_is_mdct(type); - int err, n = 1, m = 1, max_ptwo = 1 << (FF_ARRAY_ELEMS(fft_dispatch) - 1); + int err, l, n = 1, m = 1, max_ptwo = 1 << (FF_ARRAY_ELEMS(fft_dispatch) - 1); if (is_mdct) len >>= 1; + l = len; + #define CHECK_FACTOR(DST, FACTOR, SRC) \ if (DST == 1 && !(SRC % FACTOR)) { \ DST = FACTOR; \ @@ -601,12 +679,23 @@ int TX_NAME(ff_tx_init_mdct_fft)(AVTXContext *s, av_tx_fn *tx, s->inv = inv; s->type = type; - /* Filter out direct 3, 5 and 15 transforms, too niche */ + /* If we weren't able to split the length into factors we can handle, + * resort to using the naive and slow FT. This also filters out + * direct 3, 5 and 15 transforms as they're too niche. */ if (len > 1 || m == 1) { - av_log(NULL, AV_LOG_ERROR, "Unsupported transform size: n = %i, " - "m = %i, residual = %i!\n", n, m, len); - return AVERROR(EINVAL); - } else if (n > 1 && m > 1) { /* 2D transform case */ + if (is_mdct && (l & 1)) /* Odd (i)MDCTs are not supported yet */ + return AVERROR(ENOTSUP); + s->n = l; + s->m = 1; + *tx = naive_fft; + if (is_mdct) { + s->scale = *((SCALE_TYPE *)scale); + *tx = inv ? naive_imdct : naive_mdct; + } + return 0; + } + + if (n > 1 && m > 1) { /* 2D transform case */ if ((err = ff_tx_gen_compound_mapping(s))) return err; if (!(s->tmp = av_malloc(n*m*sizeof(*s->tmp))))