diff mbox series

[FFmpeg-devel,v2] wmavoice: convert DCT-I/DST-I to lavu/tx

Message ID NbELai4--3-9@lynne.ee
State New
Headers show
Series [FFmpeg-devel,v2] wmavoice: convert DCT-I/DST-I to lavu/tx | expand

Checks

Context Check Description
andriy/configure_x86 warning Failed to apply patch
yinshiyou/configure_loongarch64 warning Failed to apply patch

Commit Message

Lynne Aug. 7, 2023, 9:35 a.m. UTC
No real changes from V1, just used a CMP_TARGET
for the FATE tests, and changed the _new suffix to _ref
for the references.

New references are here, to be uploaded to FATE under the same names:
https://files.lynne.ee/streaming_CBR-7K_ref.pcm
https://files.lynne.ee/streaming_CBR-11K_ref.pcm
https://files.lynne.ee/streaming_CBR-19K_ref.pcm

Before and after comparisons for 11K, if anyone is interested:
https://files.lynne.ee/wmavoice_current.png
https://files.lynne.ee/wmavoice_new.png
https://files.lynne.ee/wmavoice_microsoft.png

Comments

Paul B Mahol Aug. 13, 2023, 1:46 p.m. UTC | #1
LGTM to whole set
diff mbox series

Patch

From a2cdeddb8be7e4c6a93de51edc9533accbff2a87 Mon Sep 17 00:00:00 2001
From: Lynne <dev@lynne.ee>
Date: Fri, 4 Aug 2023 21:16:30 +0200
Subject: [PATCH v2] wmavoice: convert DCT-I/DST-I to lavu/tx

This is the very last user of any lavc transform code.

This also *corrects* wmavoice decoding, as the previous DCT/DST
transforms were incorrect, bringing it closer to Microsoft's
own wmavoice decoder.
---
 libavcodec/wmavoice.c | 29 +++++++++++++++++------------
 tests/fate/wma.mak    | 11 +++++++----
 2 files changed, 24 insertions(+), 16 deletions(-)

diff --git a/libavcodec/wmavoice.c b/libavcodec/wmavoice.c
index 5ae92e2dbc..915315cb8a 100644
--- a/libavcodec/wmavoice.c
+++ b/libavcodec/wmavoice.c
@@ -42,8 +42,6 @@ 
 #include "acelp_vectors.h"
 #include "acelp_filters.h"
 #include "lsp.h"
-#include "dct.h"
-#include "rdft.h"
 #include "sinewin.h"
 
 #define MAX_BLOCKS           8   ///< maximum number of blocks per frame
@@ -266,8 +264,8 @@  typedef struct WMAVoiceContext {
      */
     AVTXContext *rdft, *irdft;    ///< contexts for FFT-calculation in the
     av_tx_fn rdft_fn, irdft_fn;   ///< postfilter (for denoise filter)
-    DCTContext dct, dst;          ///< contexts for phase shift (in Hilbert
-                                  ///< transform, part of postfilter)
+    AVTXContext *dct, *dst;       ///< contexts for phase shift (in Hilbert
+    av_tx_fn dct_fn, dst_fn;      ///< transform, part of postfilter)
     float sin[511], cos[511];     ///< 8-bit cosine/sine windows over [-pi,pi]
                                   ///< range
     float postfilter_agc;         ///< gain control memory, used in
@@ -391,10 +389,6 @@  static av_cold int wmavoice_decode_init(AVCodecContext *ctx)
     if (s->do_apf) {
         float scale = 1.0f;
 
-        if ((ret = ff_dct_init (&s->dct,   6,    DCT_I)) < 0 ||
-            (ret = ff_dct_init (&s->dst,   6,    DST_I)) < 0)
-            return ret;
-
         ret = av_tx_init(&s->rdft, &s->rdft_fn, AV_TX_FLOAT_RDFT, 0, 1 << 7, &scale, 0);
         if (ret < 0)
             return ret;
@@ -403,6 +397,16 @@  static av_cold int wmavoice_decode_init(AVCodecContext *ctx)
         if (ret < 0)
             return ret;
 
+        scale = 1.0 / (1 << 6);
+        ret = av_tx_init(&s->dct, &s->dct_fn, AV_TX_FLOAT_DCT_I, 0, 1 << 6, &scale, 0);
+        if (ret < 0)
+            return ret;
+
+        scale = 1.0 / (1 << 6);
+        ret = av_tx_init(&s->dst, &s->dst_fn, AV_TX_FLOAT_DST_I, 0, 1 << 6, &scale, 0);
+        if (ret < 0)
+            return ret;
+
         ff_sine_window_init(s->cos, 256);
         memcpy(&s->sin[255], s->cos, 256 * sizeof(s->cos[0]));
         for (n = 0; n < 255; n++) {
@@ -612,6 +616,7 @@  static void calc_input_response(WMAVoiceContext *s, float *lpcs_src,
     float irange, angle_mul, gain_mul, range, sq;
     LOCAL_ALIGNED_32(float, coeffs, [0x82]);
     LOCAL_ALIGNED_32(float, lpcs, [0x82]);
+    LOCAL_ALIGNED_32(float, lpcs_dct, [0x82]);
     int n, idx;
 
     memcpy(coeffs, coeffs_dst, 0x82*sizeof(float));
@@ -662,8 +667,8 @@  static void calc_input_response(WMAVoiceContext *s, float *lpcs_src,
      * is a sine input) by doing a phase shift (in theory, H(sin())=cos()).
      * Hilbert_Transform(RDFT(x)) = Laplace_Transform(x), which calculates the
      * "moment" of the LPCs in this filter. */
-    s->dct.dct_calc(&s->dct, lpcs);
-    s->dst.dct_calc(&s->dst, lpcs);
+    s->dct_fn(s->dct, lpcs_dct, lpcs, sizeof(float));
+    s->dst_fn(s->dst, lpcs, lpcs_dct, sizeof(float));
 
     /* Split out the coefficient indexes into phase/magnitude pairs */
     idx = 255 + av_clip(lpcs[64],               -255, 255);
@@ -2003,8 +2008,8 @@  static av_cold int wmavoice_decode_end(AVCodecContext *ctx)
     if (s->do_apf) {
         av_tx_uninit(&s->rdft);
         av_tx_uninit(&s->irdft);
-        ff_dct_end(&s->dct);
-        ff_dct_end(&s->dst);
+        av_tx_uninit(&s->dct);
+        av_tx_uninit(&s->dst);
     }
 
     return 0;
diff --git a/tests/fate/wma.mak b/tests/fate/wma.mak
index c13874ebfc..ed2ac24c65 100644
--- a/tests/fate/wma.mak
+++ b/tests/fate/wma.mak
@@ -20,18 +20,21 @@  fate-wmapro: $(FATE_WMAPRO-yes)
 
 FATE_WMAVOICE-$(call DEMDEC, ASF, WMAVOICE) += fate-wmavoice-7k
 fate-wmavoice-7k: CMD = pcm -i $(TARGET_SAMPLES)/wmavoice/streaming_CBR-7K.wma
-fate-wmavoice-7k: REF = $(SAMPLES)/wmavoice/streaming_CBR-7K.pcm
+fate-wmavoice-7k: REF = $(SAMPLES)/wmavoice/streaming_CBR-7K_ref.pcm
+fate-wmavoice-7k: CMP_TARGET = 1368.61
 fate-wmavoice-7k: FUZZ = 3
 
 FATE_WMAVOICE-$(call DEMDEC, ASF, WMAVOICE) += fate-wmavoice-11k
 fate-wmavoice-11k: CMD = pcm -i $(TARGET_SAMPLES)/wmavoice/streaming_CBR-11K.wma
-fate-wmavoice-11k: REF = $(SAMPLES)/wmavoice/streaming_CBR-11K.pcm
+fate-wmavoice-11k: REF = $(SAMPLES)/wmavoice/streaming_CBR-11K_ref.pcm
+fate-wmavoice-11k: CMP_TARGET = 965.24
 fate-wmavoice-11k: FUZZ = 3
 
 FATE_WMAVOICE-$(call DEMDEC, ASF, WMAVOICE) += fate-wmavoice-19k
 fate-wmavoice-19k: CMD = pcm -i $(TARGET_SAMPLES)/wmavoice/streaming_CBR-19K.wma
-fate-wmavoice-19k: REF = $(SAMPLES)/wmavoice/streaming_CBR-19K.pcm
-fate-wmavoice-19k: FUZZ = 3
+fate-wmavoice-19k: REF = $(SAMPLES)/wmavoice/streaming_CBR-19K_ref.pcm
+fate-wmavoice-19k: CMP_TARGET = 689.33
+fate-wmavoice-11k: FUZZ = 3
 
 $(FATE_WMAVOICE-yes): CMP = stddev
 
-- 
2.40.1