[FFmpeg-devel] avcodec/wmaprodec: improve WMAPRO/XMA gapless output

Submitted by bananaman255@gmail.com on Oct. 9, 2018, 11:15 p.m.

Details

Message ID 20181009231502.5292-1-bananaman255@gmail.com
State New
Headers show

Commit Message

bananaman255@gmail.com Oct. 9, 2018, 11:15 p.m.
From: bnnm <bananaman255@gmail.com>

Improves trac issue #6722. Fixes truncated WMAPRO/XMA output due to delay samples and partially applies bitstream gapless info.

This can be tested with files encoding a small nb_samples (like 400), which couldn't output anything due to missing delay at the end.

Applying end_skip and partial delay samples would require some extra changes, so files are slightly bigger than what should be.

Signed-off-by: bnnm <bananaman255@gmail.com>
---
 libavcodec/wmaprodec.c | 100 ++++++++++++++++++++++++++++++++++++-------------
 tests/fate/wma.mak     |   1 +
 2 files changed, 74 insertions(+), 27 deletions(-)

Patch hide | download patch | download mbox

diff --git a/libavcodec/wmaprodec.c b/libavcodec/wmaprodec.c
index 9439bfa771..e18e2e438f 100644
--- a/libavcodec/wmaprodec.c
+++ b/libavcodec/wmaprodec.c
@@ -216,9 +216,9 @@  typedef struct WMAProDecodeCtx {
     GetBitContext    gb;                            ///< bitstream reader context
     int              buf_bit_size;                  ///< buffer size in bits
     uint8_t          drc_gain;                      ///< gain for the DRC tool
-    int8_t           skip_frame;                    ///< skip output step
     int8_t           parsed_all_subframes;          ///< all subframes decoded?
     uint8_t          skip_packets;                  ///< packets to skip to find next packet in a stream (XMA1/2)
+    int8_t           eof_done;                      ///< set when EOF reached and extra subframe is written (XMA1/2)
 
     /* subframe/block decode state */
     int16_t          subframe_len;                  ///< current subframe length
@@ -379,12 +379,6 @@  static av_cold int decode_init(WMAProDecodeCtx *s, AVCodecContext *avctx, int nu
         return AVERROR_PATCHWELCOME;
     }
 
-    /** frame info */
-    if (avctx->codec_id != AV_CODEC_ID_WMAPRO)
-        s->skip_frame = 0;
-    else
-        s->skip_frame = 1; /* skip first frame */
-
     s->packet_loss = 1;
     s->len_prefix  = (s->decode_flags & 0x40);
 
@@ -1450,21 +1444,38 @@  static int decode_frame(WMAProDecodeCtx *s, AVFrame *frame, int *got_frame_ptr)
         ff_dlog(s->avctx, "drc_gain %i\n", s->drc_gain);
     }
 
-    /** no idea what these are for, might be the number of samples
-        that need to be skipped at the beginning or end of a stream */
+    /** read encoder delay/padding (gapless) info */
     if (get_bits1(gb)) {
-        int av_unused skip;
+        int start_skip, end_skip;
+
 
-        /** usually true for the first frame */
+        /** usually true for the first frame and equal to 1 frame */
         if (get_bits1(gb)) {
-            skip = get_bits(gb, av_log2(s->samples_per_frame * 2));
-            ff_dlog(s->avctx, "start skip: %i\n", skip);
+            start_skip = get_bits(gb, av_log2(s->samples_per_frame * 2));
+            start_skip = FFMIN(start_skip, s->samples_per_frame);
+
+            /* must add for XMA to respect starting skip_samples */
+            s->avctx->internal->skip_samples += start_skip;
         }
 
-        /** sometimes true for the last frame */
+        /** usually true for the last frame and less than 1 frame */
         if (get_bits1(gb)) {
-            skip = get_bits(gb, av_log2(s->samples_per_frame * 2));
-            ff_dlog(s->avctx, "end skip: %i\n", skip);
+            end_skip = get_bits(gb, av_log2(s->samples_per_frame * 2));
+            end_skip = FFMIN(end_skip, s->samples_per_frame);
+
+            //TODO fix end_skip (also needs changes in XMA multistream's handling)
+            /* for XMA/WMAPRO end_skip may span last frame + final delay samples,
+             * so we'd need some counter to (possibly) skip some in this frame and
+             * rest after last frame (buf_size 0), so final samples would be:
+             * samples_per_frame + delay_samples - start_skip - end_skip
+             * (files with 1 packet are possible so both skips are considered).
+             *
+             * WMAPRO 5.1 in XWMA does't do this and audio is padded instead,
+             * and min packets is 2 (assumes only 1/2ch need it). */
+
+            if (s->avctx->codec_id == AV_CODEC_ID_WMAPRO && s->nb_channels > 2) {
+                frame->nb_samples = s->samples_per_frame - end_skip;
+            }
         }
 
     }
@@ -1500,13 +1511,9 @@  static int decode_frame(WMAProDecodeCtx *s, AVFrame *frame, int *got_frame_ptr)
                s->samples_per_frame * sizeof(*s->channel[i].out) >> 1);
     }
 
-    if (s->skip_frame) {
-        s->skip_frame = 0;
-        *got_frame_ptr = 0;
-        av_frame_unref(frame);
-    } else {
-        *got_frame_ptr = 1;
-    }
+    /** frame may be partially discarded with gapless info in the bitstream */
+    *got_frame_ptr = 1;
+
 
     if (s->len_prefix) {
         if (len != (get_bits_count(gb) - s->frame_offset) + 2) {
@@ -1609,7 +1616,35 @@  static int decode_packet(AVCodecContext *avctx, WMAProDecodeCtx *s,
 
     *got_frame_ptr = 0;
 
-    if (s->packet_done || s->packet_loss) {
+    if (!buf_size) {
+        AVFrame *frame = data;
+        int i;
+
+        /** Must output remaining samples after stream end. WMAPRO 5.1 created
+         * by XWMA encoder don't though; assuming only 1/2ch streams need it. */
+        s->packet_done = 0;
+        if (s->eof_done || s->nb_channels > 2)
+            return 0;
+
+        /** clean output buffer and copy last IMCDT samples */
+        for (i = 0; i < s->nb_channels; i++) {
+            memset(frame->extended_data[i], 0,
+            s->samples_per_frame * sizeof(*s->channel[i].out));
+
+            memcpy(frame->extended_data[i], s->channel[i].out,
+                   s->samples_per_frame * sizeof(*s->channel[i].out) >> 1);
+        }
+
+        /* TODO: change nb_samples to delay_samples. XMA always uses 128
+         * (with 512 samples per frame) and WMAPRO may use 768 (2048 s.p.f.).
+         * XMA needs changes in multi-stream handling though. */
+
+        s->eof_done = 1;
+        s->packet_done = 1;
+        *got_frame_ptr = 1;
+        return 0;
+    }
+    else if (s->packet_done || s->packet_loss) {
         s->packet_done = 0;
 
         /** sanity check for the buffer length */
@@ -1896,6 +1931,12 @@  static av_cold int xma_decode_init(AVCodecContext *avctx)
         start_channels += s->xma[i].nb_channels;
     }
 
+    /** XMA seems to require this to create gapless files (WMAPRO encoder
+     * pads files with silence, while XMA can encode any nb_samples).
+     * This value seems to counter the extra subframe output at EOF
+     * (or perhaps, XMA decoder should output 64 samples earlier?) */
+    avctx->internal->skip_samples = 128/2;
+
     return ret;
 }
 
@@ -1922,6 +1963,7 @@  static void flush(WMAProDecodeCtx *s)
                sizeof(*s->channel[i].out));
     s->packet_loss = 1;
     s->skip_packets = 0;
+    s->eof_done = 0;
 }
 
 
@@ -1934,6 +1976,8 @@  static void wmapro_flush(AVCodecContext *avctx)
     WMAProDecodeCtx *s = avctx->priv_data;
 
     flush(s);
+
+    avctx->internal->skip_samples = 0;
 }
 
 static void xma_flush(AVCodecContext *avctx)
@@ -1946,6 +1990,8 @@  static void xma_flush(AVCodecContext *avctx)
 
     memset(s->offset, 0, sizeof(s->offset));
     s->current_stream = 0;
+
+    avctx->internal->skip_samples = 128/2;
 }
 
 
@@ -1961,7 +2007,7 @@  AVCodec ff_wmapro_decoder = {
     .init           = wmapro_decode_init,
     .close          = wmapro_decode_end,
     .decode         = wmapro_decode_packet,
-    .capabilities   = AV_CODEC_CAP_SUBFRAMES | AV_CODEC_CAP_DR1,
+    .capabilities   = AV_CODEC_CAP_SUBFRAMES | AV_CODEC_CAP_DR1 | AV_CODEC_CAP_DELAY,
     .flush          = wmapro_flush,
     .sample_fmts    = (const enum AVSampleFormat[]) { AV_SAMPLE_FMT_FLTP,
                                                       AV_SAMPLE_FMT_NONE },
@@ -1976,7 +2022,7 @@  AVCodec ff_xma1_decoder = {
     .init           = xma_decode_init,
     .close          = xma_decode_end,
     .decode         = xma_decode_packet,
-    .capabilities   = AV_CODEC_CAP_SUBFRAMES | AV_CODEC_CAP_DR1,
+    .capabilities   = AV_CODEC_CAP_SUBFRAMES | AV_CODEC_CAP_DR1 | AV_CODEC_CAP_DELAY,
     .sample_fmts    = (const enum AVSampleFormat[]) { AV_SAMPLE_FMT_FLTP,
                                                       AV_SAMPLE_FMT_NONE },
 };
@@ -1991,7 +2037,7 @@  AVCodec ff_xma2_decoder = {
     .close          = xma_decode_end,
     .decode         = xma_decode_packet,
     .flush          = xma_flush,
-    .capabilities   = AV_CODEC_CAP_SUBFRAMES | AV_CODEC_CAP_DR1,
+    .capabilities   = AV_CODEC_CAP_SUBFRAMES | AV_CODEC_CAP_DR1 | AV_CODEC_CAP_DELAY,
     .sample_fmts    = (const enum AVSampleFormat[]) { AV_SAMPLE_FMT_FLTP,
                                                       AV_SAMPLE_FMT_NONE },
 };
diff --git a/tests/fate/wma.mak b/tests/fate/wma.mak
index 12a8fa989a..9c7ce6fd7d 100644
--- a/tests/fate/wma.mak
+++ b/tests/fate/wma.mak
@@ -11,6 +11,7 @@  fate-wmapro-5.1: SIZE_TOLERANCE = 24576
 FATE_WMAPRO-$(call DEMDEC, MOV, WMAPRO) += fate-wmapro-ism
 fate-wmapro-ism: CMD = pcm -i $(TARGET_SAMPLES)/isom/vc1-wmapro.ism -vn
 fate-wmapro-ism: REF = $(SAMPLES)/isom/vc1-wmapro.pcm
+fate-wmapro-ism: SIZE_TOLERANCE = 12976
 
 $(FATE_WMAPRO-yes): CMP = oneoff