diff mbox series

[FFmpeg-devel,2/2] lavu/tx: add DCT-I and DST-I transforms

Message ID NawEKP2--B-9@lynne.ee
State New
Headers show
Series [FFmpeg-devel,1/2] lavu/tx: add real to real and real to imaginary RDFT transforms | expand

Checks

Context Check Description
yinshiyou/configure_loongarch64 warning Failed to apply patch
andriy/configure_x86 warning Failed to apply patch

Commit Message

Lynne Aug. 3, 2023, 4:31 p.m. UTC
These are true, actual DCT-I and DST-I transforms, unlike the
libavcodec versions, which are plainly not.

Error tests via https://github.com/cyanreg/lavu_fft_test

RMS error on a 2048-sample DCT-I:
RMSE   av_tx = 0.000000 (4096 matches, first mismatch at -1)
RMSE  fftw3f = 0.000000 (4096 matches, first mismatch at -1)
RMSE   avfft = 0.011440 (0 matches, first mismatch at 0)

RMS error on a 2048-sample DST-I:
RMSE   av_tx = 0.000000 (4096 matches, first mismatch at -1)
RMSE  fftw3f = 0.000000 (4096 matches, first mismatch at -1)
RMSE   avfft = 0.015316 (0 matches, first mismatch at 0)
diff mbox series

Patch

From 0bbe264a0c597a5a871ffc2bfea06e717bc9e0a1 Mon Sep 17 00:00:00 2001
From: Lynne <dev@lynne.ee>
Date: Thu, 3 Aug 2023 18:23:02 +0200
Subject: [PATCH 2/2] lavu/tx: add DCT-I and DST-I transforms

These are true, actual DCT-I and DST-I transforms, unlike the
libavcodec versions, which are plainly not.
---
 libavutil/tx.h          |  24 ++++++++++
 libavutil/tx_template.c | 103 ++++++++++++++++++++++++++++++++++++++++
 2 files changed, 127 insertions(+)

diff --git a/libavutil/tx.h b/libavutil/tx.h
index d178e8ee9d..4696988cae 100644
--- a/libavutil/tx.h
+++ b/libavutil/tx.h
@@ -105,6 +105,30 @@  enum AVTXType {
     AV_TX_DOUBLE_DCT = 10,
     AV_TX_INT32_DCT  = 11,
 
+    /**
+     * Discrete Cosine Transform I
+     *
+     * The forward transform is a DCT-I.
+     * The inverse transform is a DCT-I multiplied by 2/(N + 1).
+     *
+     * The input array is always overwritten.
+     */
+    AV_TX_FLOAT_DCT_I  = 12,
+    AV_TX_DOUBLE_DCT_I = 13,
+    AV_TX_INT32_DCT_I  = 14,
+
+    /**
+     * Discrete Sine Transform I
+     *
+     * The forward transform is a DST-I.
+     * The inverse transform is a DST-I multiplied by 2/(N + 1).
+     *
+     * The input array is always overwritten.
+     */
+    AV_TX_FLOAT_DST_I  = 15,
+    AV_TX_DOUBLE_DST_I = 16,
+    AV_TX_INT32_DST_I  = 17,
+
     /* Not part of the API, do not use */
     AV_TX_NB,
 };
diff --git a/libavutil/tx_template.c b/libavutil/tx_template.c
index 50c65d00b5..9bdac1e57d 100644
--- a/libavutil/tx_template.c
+++ b/libavutil/tx_template.c
@@ -2004,6 +2004,107 @@  static const FFTXCodelet TX_NAME(ff_tx_dctIII_def) = {
     .prio       = FF_TX_PRIO_BASE,
 };
 
+static av_cold int TX_NAME(ff_tx_dcstI_init)(AVTXContext *s,
+                                             const FFTXCodelet *cd,
+                                             uint64_t flags,
+                                             FFTXCodeletOptions *opts,
+                                             int len, int inv,
+                                             const void *scale)
+{
+    int ret;
+    SCALE_TYPE rsc = *((SCALE_TYPE *)scale);
+
+    if (0 && inv) {
+        len *= 2;
+        s->len *= 2;
+        rsc *= 0.5;
+    }
+
+    /* We want a half-complex RDFT */
+    flags |= cd->type == TX_TYPE(DCT_I) ? AV_TX_REAL_TO_REAL :
+                                          AV_TX_REAL_TO_IMAGINARY;
+
+    if ((ret = ff_tx_init_subtx(s, TX_TYPE(RDFT), flags, NULL,
+                                (len - 1 + 2*(cd->type == TX_TYPE(DST_I)))*2,
+                                0, &rsc)))
+        return ret;
+
+    s->tmp = av_mallocz((len + 1)*2*sizeof(TXSample));
+    if (!s->tmp)
+        return AVERROR(ENOMEM);
+
+    return 0;
+}
+
+static void TX_NAME(ff_tx_dctI)(AVTXContext *s, void *_dst,
+                                void *_src, ptrdiff_t stride)
+{
+    TXSample *dst = _dst;
+    TXSample *src = _src;
+    const int len = s->len - 1;
+    TXSample *tmp = (TXSample *)s->tmp;
+
+    stride /= sizeof(TXSample);
+
+    for (int i = 0; i < len; i++)
+        tmp[i] = tmp[2*len - i] = src[i * stride];
+
+    tmp[len] = src[len * stride]; /* Middle */
+
+    s->fn[0](&s->sub[0], dst, tmp, sizeof(TXSample));
+}
+
+static void TX_NAME(ff_tx_dstI)(AVTXContext *s, void *_dst,
+                                void *_src, ptrdiff_t stride)
+{
+    TXSample *dst = _dst;
+    TXSample *src = _src;
+    const int len = s->len + 1;
+    TXSample *tmp = (void *)s->tmp;
+
+    stride /= sizeof(TXSample);
+
+    tmp[0] = 0;
+
+    for (int i = 1; i < len; i++) {
+        TXSample a = src[(i - 1) * stride];
+        tmp[i] = -a;
+        tmp[2*len - i] = a;
+    }
+
+    tmp[len] = 0; /* i == n, Nyquist */
+
+    s->fn[0](&s->sub[0], dst, tmp, sizeof(float));
+}
+
+static const FFTXCodelet TX_NAME(ff_tx_dctI_def) = {
+    .name       = TX_NAME_STR("dctI"),
+    .function   = TX_NAME(ff_tx_dctI),
+    .type       = TX_TYPE(DCT_I),
+    .flags      = AV_TX_UNALIGNED | AV_TX_INPLACE | FF_TX_OUT_OF_PLACE,
+    .factors    = { 2, TX_FACTOR_ANY },
+    .nb_factors = 2,
+    .min_len    = 2,
+    .max_len    = TX_LEN_UNLIMITED,
+    .init       = TX_NAME(ff_tx_dcstI_init),
+    .cpu_flags  = FF_TX_CPU_FLAGS_ALL,
+    .prio       = FF_TX_PRIO_BASE,
+};
+
+static const FFTXCodelet TX_NAME(ff_tx_dstI_def) = {
+    .name       = TX_NAME_STR("dstI"),
+    .function   = TX_NAME(ff_tx_dstI),
+    .type       = TX_TYPE(DST_I),
+    .flags      = AV_TX_UNALIGNED | AV_TX_INPLACE | FF_TX_OUT_OF_PLACE,
+    .factors    = { 2, TX_FACTOR_ANY },
+    .nb_factors = 2,
+    .min_len    = 2,
+    .max_len    = TX_LEN_UNLIMITED,
+    .init       = TX_NAME(ff_tx_dcstI_init),
+    .cpu_flags  = FF_TX_CPU_FLAGS_ALL,
+    .prio       = FF_TX_PRIO_BASE,
+};
+
 int TX_TAB(ff_tx_mdct_gen_exp)(AVTXContext *s, int *pre_tab)
 {
     int off = 0;
@@ -2101,6 +2202,8 @@  const FFTXCodelet * const TX_NAME(ff_tx_codelet_list)[] = {
     &TX_NAME(ff_tx_rdft_c2r_def),
     &TX_NAME(ff_tx_dctII_def),
     &TX_NAME(ff_tx_dctIII_def),
+    &TX_NAME(ff_tx_dctI_def),
+    &TX_NAME(ff_tx_dstI_def),
 
     NULL,
 };
-- 
2.40.1