diff mbox series

[FFmpeg-devel] avcodec/wmadec: fix WMA gapless playback

Message ID 20210911194900.17480-1-onemda@gmail.com
State Accepted
Commit 19802d170a304f5853d92e01d0513b9e06897d61
Headers show
Series [FFmpeg-devel] avcodec/wmadec: fix WMA gapless playback | expand

Checks

Context Check Description
andriy/make_x86 success Make finished
andriy/make_fate_x86 success Make fate finished
andriy/make_ppc success Make finished
andriy/make_fate_ppc success Make fate finished

Commit Message

Paul B Mahol Sept. 11, 2021, 7:49 p.m. UTC
From: bnnm <bananaman255@gmail.com>

Fixes trac issue #7473.

Removes encoder delay (skip samples) and writes remaining frame samples after EOF to get correct sample count.

Output is now accurate vs players that use Microsoft's codecs (Windows Media Format Runtime).

Tested vs encode>decode WMAv2 with MS's codecs and most sample rate/bit rate/channel/mode combinations in ASF/XWMA.
WMAv1 appears to use the same delay, from FFmpeg samples.

Signed-off-by: bnnm <bananaman255@gmail.com>
Signed-off-by: Paul B Mahol <onemda@gmail.com>
---
 libavcodec/wma.h        |  2 ++
 libavcodec/wmadec.c     | 22 ++++++++++++++++++++--
 tests/fate/wma.mak      |  8 ++++----
 tests/ref/fate/flcl1905 |  3 +--
 4 files changed, 27 insertions(+), 8 deletions(-)

Comments

Paul B Mahol Sept. 12, 2021, 6:26 p.m. UTC | #1
will apply soon
Mika Fischer April 28, 2022, 10:58 a.m. UTC | #2
Hi,

I think this patch, which was applied on 2021-09-12 in 
19802d170a304f5853d92e01d0513b9e06897d61 and is included in n5.0 and 
n5.0.1 causes a regression.

This is the (shortened) diff for the test decoding in the patch:
----------------
diff --git a/tests/ref/fate/flcl1905 b/tests/ref/fate/flcl1905
index 5f5245ebcf..d702139db8 100644
--- a/tests/ref/fate/flcl1905
+++ b/tests/ref/fate/flcl1905
@@ -1,6 +1,4 @@
  packet|[...]|pts=0|pts_time=0.000000|[...]
-frame|[...]|pts=0|pts_time=0.000000|[...]
-frame|[...]|pts=N/A|pts_time=N/A|[...]
  frame|[...]|pts=N/A|pts_time=N/A|[...]
----------------

As can be seen, the patch causes the first two frames to be dropped, 
which might be OK to better conform to the MS decoder.

The problem is that only the first frame had the timestamp info from the 
packet. Since that got dropped, now the first frame decoded from the 
packet has no timestamp info.

This causes us issues since we assumed that the first audio frame 
decoded after seeking always has timestamp info.

Best regards,
  Mika Fischer
diff mbox series

Patch

diff --git a/libavcodec/wma.h b/libavcodec/wma.h
index aea7ba28ab..80e52687fd 100644
--- a/libavcodec/wma.h
+++ b/libavcodec/wma.h
@@ -135,6 +135,8 @@  typedef struct WMACodecContext {
     float lsp_pow_m_table2[(1 << LSP_POW_BITS)];
     AVFloatDSPContext *fdsp;
 
+    int eof_done; /* decode flag to output remaining samples after EOF */
+
 #ifdef TRACE
     int frame_count;
 #endif /* TRACE */
diff --git a/libavcodec/wmadec.c b/libavcodec/wmadec.c
index cc67244344..9955aaa7d6 100644
--- a/libavcodec/wmadec.c
+++ b/libavcodec/wmadec.c
@@ -135,6 +135,8 @@  static av_cold int wma_decode_init(AVCodecContext *avctx)
 
     avctx->sample_fmt = AV_SAMPLE_FMT_FLTP;
 
+    avctx->internal->skip_samples = s->frame_len * 2;
+
     return 0;
 }
 
@@ -829,7 +831,20 @@  static int wma_decode_superframe(AVCodecContext *avctx, void *data,
     ff_tlog(avctx, "***decode_superframe:\n");
 
     if (buf_size == 0) {
+        if (s->eof_done)
+            return 0;
+
+        frame->nb_samples = s->frame_len;
+        if ((ret = ff_get_buffer(avctx, frame, 0)) < 0)
+            return ret;
+
+        for (i = 0; i < s->avctx->channels; i++)
+            memcpy(frame->extended_data[i], &s->frame_out[i][0],
+                   frame->nb_samples * sizeof(s->frame_out[i][0]));
+
         s->last_superframe_len = 0;
+        s->eof_done = 1;
+        *got_frame_ptr = 1;
         return 0;
     }
     if (buf_size < avctx->block_align) {
@@ -975,6 +990,9 @@  static av_cold void flush(AVCodecContext *avctx)
 
     s->last_bitoffset      =
     s->last_superframe_len = 0;
+
+    s->eof_done = 0;
+    avctx->internal->skip_samples = s->frame_len * 2;
 }
 
 #if CONFIG_WMAV1_DECODER
@@ -988,7 +1006,7 @@  const AVCodec ff_wmav1_decoder = {
     .close          = ff_wma_end,
     .decode         = wma_decode_superframe,
     .flush          = flush,
-    .capabilities   = AV_CODEC_CAP_DR1,
+    .capabilities   = AV_CODEC_CAP_DR1 | AV_CODEC_CAP_DELAY,
     .sample_fmts    = (const enum AVSampleFormat[]) { AV_SAMPLE_FMT_FLTP,
                                                       AV_SAMPLE_FMT_NONE },
     .caps_internal  = FF_CODEC_CAP_INIT_THREADSAFE | FF_CODEC_CAP_INIT_CLEANUP,
@@ -1005,7 +1023,7 @@  const AVCodec ff_wmav2_decoder = {
     .close          = ff_wma_end,
     .decode         = wma_decode_superframe,
     .flush          = flush,
-    .capabilities   = AV_CODEC_CAP_DR1,
+    .capabilities   = AV_CODEC_CAP_DR1 | AV_CODEC_CAP_DELAY,
     .sample_fmts    = (const enum AVSampleFormat[]) { AV_SAMPLE_FMT_FLTP,
                                                       AV_SAMPLE_FMT_NONE },
     .caps_internal  = FF_CODEC_CAP_INIT_THREADSAFE | FF_CODEC_CAP_INIT_CLEANUP,
diff --git a/tests/fate/wma.mak b/tests/fate/wma.mak
index bc530998e8..c13874ebfc 100644
--- a/tests/fate/wma.mak
+++ b/tests/fate/wma.mak
@@ -40,14 +40,14 @@  fate-wmavoice: $(FATE_WMAVOICE-yes)
 
 FATE_WMA_ENCODE-$(call ENCDEC, WMAV1, ASF) += fate-wmav1-encode
 fate-wmav1-encode: CMD = enc_dec_pcm asf wav s16le $(subst $(SAMPLES),$(TARGET_SAMPLES),$(REF)) -c:a wmav1 -b:a 128k
-fate-wmav1-encode: CMP_SHIFT = -8192
-fate-wmav1-encode: CMP_TARGET = 291.06
+fate-wmav1-encode: CMP_SHIFT = 8192
+fate-wmav1-encode: CMP_TARGET = 299.99
 fate-wmav1-encode: SIZE_TOLERANCE = 4632
 
 FATE_WMA_ENCODE-$(call ENCDEC, WMAV2, ASF) += fate-wmav2-encode
 fate-wmav2-encode: CMD = enc_dec_pcm asf wav s16le $(subst $(SAMPLES),$(TARGET_SAMPLES),$(REF)) -c:a wmav2 -b:a 128k
-fate-wmav2-encode: CMP_SHIFT = -8192
-fate-wmav2-encode: CMP_TARGET = 258.32
+fate-wmav2-encode: CMP_SHIFT = 8192
+fate-wmav2-encode: CMP_TARGET = 267.92
 fate-wmav2-encode: SIZE_TOLERANCE = 4632
 
 $(FATE_WMA_ENCODE-yes): CMP = stddev
diff --git a/tests/ref/fate/flcl1905 b/tests/ref/fate/flcl1905
index 5f5245ebcf..d702139db8 100644
--- a/tests/ref/fate/flcl1905
+++ b/tests/ref/fate/flcl1905
@@ -1,6 +1,4 @@ 
 packet|codec_type=audio|stream_index=0|pts=0|pts_time=0.000000|dts=0|dts_time=0.000000|duration=22528|duration_time=0.510839|size=4092|pos=56|flags=K_
-frame|media_type=audio|stream_index=0|key_frame=1|pts=0|pts_time=0.000000|pkt_dts=0|pkt_dts_time=0.000000|best_effort_timestamp=0|best_effort_timestamp_time=0.000000|pkt_duration=22528|pkt_duration_time=0.510839|pkt_pos=56|pkt_size=4092|sample_fmt=fltp|nb_samples=2048|channels=2|channel_layout=unknown
-frame|media_type=audio|stream_index=0|key_frame=1|pts=N/A|pts_time=N/A|pkt_dts=N/A|pkt_dts_time=N/A|best_effort_timestamp=N/A|best_effort_timestamp_time=N/A|pkt_duration=22528|pkt_duration_time=0.510839|pkt_pos=56|pkt_size=3720|sample_fmt=fltp|nb_samples=2048|channels=2|channel_layout=unknown
 frame|media_type=audio|stream_index=0|key_frame=1|pts=N/A|pts_time=N/A|pkt_dts=N/A|pkt_dts_time=N/A|best_effort_timestamp=N/A|best_effort_timestamp_time=N/A|pkt_duration=22528|pkt_duration_time=0.510839|pkt_pos=56|pkt_size=3348|sample_fmt=fltp|nb_samples=2048|channels=2|channel_layout=unknown
 frame|media_type=audio|stream_index=0|key_frame=1|pts=N/A|pts_time=N/A|pkt_dts=N/A|pkt_dts_time=N/A|best_effort_timestamp=N/A|best_effort_timestamp_time=N/A|pkt_duration=22528|pkt_duration_time=0.510839|pkt_pos=56|pkt_size=2976|sample_fmt=fltp|nb_samples=2048|channels=2|channel_layout=unknown
 frame|media_type=audio|stream_index=0|key_frame=1|pts=N/A|pts_time=N/A|pkt_dts=N/A|pkt_dts_time=N/A|best_effort_timestamp=N/A|best_effort_timestamp_time=N/A|pkt_duration=22528|pkt_duration_time=0.510839|pkt_pos=56|pkt_size=2604|sample_fmt=fltp|nb_samples=2048|channels=2|channel_layout=unknown
@@ -191,3 +189,4 @@  frame|media_type=audio|stream_index=0|key_frame=1|pts=N/A|pts_time=N/A|pkt_dts=N
 frame|media_type=audio|stream_index=0|key_frame=1|pts=N/A|pts_time=N/A|pkt_dts=N/A|pkt_dts_time=N/A|best_effort_timestamp=N/A|best_effort_timestamp_time=N/A|pkt_duration=22528|pkt_duration_time=0.510839|pkt_pos=61436|pkt_size=744|sample_fmt=fltp|nb_samples=2048|channels=2|channel_layout=unknown
 frame|media_type=audio|stream_index=0|key_frame=1|pts=N/A|pts_time=N/A|pkt_dts=N/A|pkt_dts_time=N/A|best_effort_timestamp=N/A|best_effort_timestamp_time=N/A|pkt_duration=22528|pkt_duration_time=0.510839|pkt_pos=61436|pkt_size=372|sample_fmt=fltp|nb_samples=2048|channels=2|channel_layout=unknown
 packet|codec_type=audio|stream_index=0|pts=360448|pts_time=8.173424|dts=360448|dts_time=8.173424|duration=44|duration_time=0.000998|size=8|pos=65528|flags=K_
+frame|media_type=audio|stream_index=0|key_frame=1|pts=N/A|pts_time=N/A|pkt_dts=N/A|pkt_dts_time=N/A|best_effort_timestamp=N/A|best_effort_timestamp_time=N/A|pkt_duration=N/A|pkt_duration_time=N/A|pkt_pos=N/A|pkt_size=0|sample_fmt=fltp|nb_samples=2048|channels=2|channel_layout=unknown