@@ -522,10 +522,13 @@ static void print_tx_structure(AVTXContext *s, int depth)
cd->type == TX_TYPE_ANY ? "all" :
cd->type == AV_TX_FLOAT_FFT ? "fft_float" :
cd->type == AV_TX_FLOAT_MDCT ? "mdct_float" :
+ cd->type == AV_TX_FLOAT_RDFT ? "rdft_float" :
cd->type == AV_TX_DOUBLE_FFT ? "fft_double" :
cd->type == AV_TX_DOUBLE_MDCT ? "mdct_double" :
+ cd->type == AV_TX_DOUBLE_RDFT ? "rdft_double" :
cd->type == AV_TX_INT32_FFT ? "fft_int32" :
- cd->type == AV_TX_INT32_MDCT ? "mdct_int32" : "unknown",
+ cd->type == AV_TX_INT32_MDCT ? "mdct_int32" :
+ cd->type == AV_TX_INT32_RDFT ? "rdft_double" : "unknown",
s->len, cd->function);
for (int i = 0; i < s->nb_sub; i++)
@@ -83,6 +83,13 @@ enum AVTXType {
*/
AV_TX_INT32_MDCT = 5,
+ /**
+ * Standard real to complex/complex to real DFT.
+ */
+ AV_TX_FLOAT_RDFT = 6,
+ AV_TX_DOUBLE_RDFT = 7,
+ AV_TX_INT32_RDFT = 8,
+
/* Not part of the API, do not use */
AV_TX_NB,
};
@@ -1301,6 +1301,116 @@ DECL_COMP_MDCT(7)
DECL_COMP_MDCT(9)
DECL_COMP_MDCT(15)
+static av_cold int TX_NAME(ff_tx_rdft_init)(AVTXContext *s,
+ const FFTXCodelet *cd,
+ uint64_t flags,
+ FFTXCodeletOptions *opts,
+ int len, int inv,
+ const void *scale)
+{
+ int ret;
+ double f;
+ FFTSample *tab;
+
+ if ((ret = ff_tx_init_subtx(s, TX_TYPE(FFT), flags, NULL, len >> 1, inv, scale)))
+ return ret;
+
+ if (!(s->exp = av_mallocz(((len >> 2) + 1)*sizeof(*s->exp))))
+ return AVERROR(ENOMEM);
+
+ tab = (FFTSample *)s->exp;
+
+ f = 2*M_PI/len;
+
+ for (int i = 0; i < len >> 2; i++)
+ *tab++ = RESCALE(cos(i*f));
+ for (int i = len >> 2; i >= 0; i--)
+ *tab++ = RESCALE(cos(i*f));
+
+ return 0;
+}
+
+#define DECL_RDFT(name, s0, s1, inv) \
+static void TX_NAME(ff_tx_rdft_ ##name)(AVTXContext *s, void *_dst, \
+ void *_src, ptrdiff_t stride) \
+{ \
+ const int len2 = s->len >> 1; \
+ const int len4 = s->len >> 2; \
+ const FFTSample *tcos = (void *)s->exp; \
+ const FFTSample *tsin = tcos + len4; \
+ FFTComplex *data = inv ? _src : _dst; \
+ FFTComplex t[3]; \
+ \
+ if (!inv) \
+ s->fn[0](&s->sub[0], data, _src, sizeof(FFTComplex)); \
+ else \
+ data[0].im = data[len2].re; \
+ \
+ /* The DC value's both components are real, but we need to change them \
+ * into complex values */ \
+ t[0].re = data[0].re; \
+ data[0].re = t[0].re + data[0].im; \
+ data[0].im = t[0].re - data[0].im; \
+ \
+ for (int i = 1; i < len4; i++) { \
+ /* Separate even and odd FFTs */ \
+ t[0].re = RESCALE(0.5 )*(data[i].re + data[len2 - i].re); \
+ t[0].im = RESCALE( - 0.5)*(data[i].im - data[len2 - i].im); \
+ t[1].re = RESCALE( (0.5 - inv))*(data[i].im + data[len2 - i].im); \
+ t[1].im = RESCALE(-(0.5 - inv))*(data[i].re - data[len2 - i].re); \
+ \
+ /* Apply twiddle factors to the odd FFT and add to the even FFT */ \
+ t[2].re = t[1].re*tcos[i] s0 t[1].im*tsin[i]; \
+ t[2].im = t[1].im*tcos[i] s1 t[1].re*tsin[i]; \
+ \
+ data[ i].re = t[0].re + t[2].re; \
+ data[ i].im = t[2].im - t[0].im; \
+ data[len2 - i].re = t[0].re - t[2].re; \
+ data[len2 - i].im = t[2].im + t[0].im; \
+ } \
+ \
+ data[len4].im = -data[len4].im; \
+ \
+ if (inv) { \
+ data[0].re *= RESCALE(0.5); \
+ data[0].im *= RESCALE(0.5); \
+ s->fn[0](&s->sub[0], _dst, data, sizeof(FFTComplex)); \
+ } else { \
+ /* Move [0].im to the last position, as convention requires */ \
+ data[len2].re = data[0].im; \
+ data[ 0].im = 0; \
+ } \
+}
+
+DECL_RDFT(fwd, +, -, 0)
+DECL_RDFT(inv, -, +, 1)
+
+const FFTXCodelet TX_NAME(ff_tx_rdft_fwd_def) = {
+ .name = TX_NAME_STR("rdft_fwd"),
+ .function = TX_NAME(ff_tx_rdft_fwd),
+ .type = TX_TYPE(RDFT),
+ .flags = AV_TX_UNALIGNED | FF_TX_OUT_OF_PLACE | FF_TX_FORWARD_ONLY,
+ .factors = { 2, TX_FACTOR_ANY },
+ .min_len = 2,
+ .max_len = TX_LEN_UNLIMITED,
+ .init = TX_NAME(ff_tx_rdft_init),
+ .cpu_flags = FF_TX_CPU_FLAGS_ALL,
+ .prio = FF_TX_PRIO_BASE,
+};
+
+const FFTXCodelet TX_NAME(ff_tx_rdft_inv_def) = {
+ .name = TX_NAME_STR("rdft_inv"),
+ .function = TX_NAME(ff_tx_rdft_inv),
+ .type = TX_TYPE(RDFT),
+ .flags = AV_TX_UNALIGNED | FF_TX_OUT_OF_PLACE | FF_TX_INVERSE_ONLY,
+ .factors = { 2, TX_FACTOR_ANY },
+ .min_len = 2,
+ .max_len = TX_LEN_UNLIMITED,
+ .init = TX_NAME(ff_tx_rdft_init),
+ .cpu_flags = FF_TX_CPU_FLAGS_ALL,
+ .prio = FF_TX_PRIO_BASE,
+};
+
const FFTXCodelet * const TX_NAME(ff_tx_codelet_list)[] = {
/* Split-Radix codelets */
&TX_NAME(ff_tx_fft2_ns_def),
@@ -1345,6 +1455,8 @@ const FFTXCodelet * const TX_NAME(ff_tx_codelet_list)[] = {
&TX_NAME(ff_tx_mdct_naive_fwd_def),
&TX_NAME(ff_tx_mdct_naive_inv_def),
&TX_NAME(ff_tx_mdct_inv_full_def),
+ &TX_NAME(ff_tx_rdft_fwd_def),
+ &TX_NAME(ff_tx_rdft_inv_def),
NULL,
};