diff mbox series

[FFmpeg-devel] avformat/dv: allow returning damaged audio

Message ID 20201029215838.25699-1-michael@niedermayer.cc
State New
Headers show
Series [FFmpeg-devel] avformat/dv: allow returning damaged audio | expand

Checks

Context Check Description
andriy/x86_make success Make finished
andriy/x86_make_fate success Make fate finished
andriy/PPC64_make success Make finished
andriy/PPC64_make_fate success Make fate finished

Commit Message

Michael Niedermayer Oct. 29, 2020, 9:58 p.m. UTC
These potentially damaged packets are marked as corrupt.
The packet length is predicted based on packet length history,
allowing prediction of the common pattern used in NTSC.

Fixes: Ticket8762
Tested-by: Dave Rice <dave@dericed.com>
Signed-off-by: Michael Niedermayer <michael@niedermayer.cc>
---
 libavformat/dv.c | 60 ++++++++++++++++++++++++++++++++++++++++++------
 1 file changed, 53 insertions(+), 7 deletions(-)

Comments

Carl Eugen Hoyos Oct. 31, 2020, 12:24 p.m. UTC | #1
Am Do., 29. Okt. 2020 um 22:59 Uhr schrieb Michael Niedermayer
<michael@niedermayer.cc>:

> +static const AVOption dv_options[] = {
> +    { "dvaudio_concealment", "", OFFSET(dvaudio_concealment), AV_OPT_TYPE_INT  , {.i64 = AUDIO_CONCEAL_DROP}, 0, INT_MAX, DEC, "dvaudio_concealment"},
> +    { "drop",                "", 0                          , AV_OPT_TYPE_CONST, {.i64 = AUDIO_CONCEAL_DROP}, 0, INT_MAX, DEC, "dvaudio_concealment"},
> +    { "pass",                "", 0                          , AV_OPT_TYPE_CONST, {.i64 = AUDIO_CONCEAL_PASS}, 0, INT_MAX, DEC, "dvaudio_concealment"},

Am I correct that the current lavc option "ec" only works for video?

Shouldn't its documentation be extended so it also works for audio
(to avoid adding a new option)?

Carl Eugen
Marton Balint Oct. 31, 2020, 4:25 p.m. UTC | #2
On Thu, 29 Oct 2020, Michael Niedermayer wrote:

> These potentially damaged packets are marked as corrupt.
> The packet length is predicted based on packet length history,
> allowing prediction of the common pattern used in NTSC.
>
> Fixes: Ticket8762

Sync loss is because of invalid timestamps, this is a possible workaround 
for the ticket ignoring the core problem. I will post a patch to fix the 
timestamps, it does not seem hard.

Regards,
Marton


> Tested-by: Dave Rice <dave@dericed.com>
> Signed-off-by: Michael Niedermayer <michael@niedermayer.cc>
> ---
> libavformat/dv.c | 60 ++++++++++++++++++++++++++++++++++++++++++------
> 1 file changed, 53 insertions(+), 7 deletions(-)
>
> diff --git a/libavformat/dv.c b/libavformat/dv.c
> index 3e0d12c0e3..3499cba6f8 100644
> --- a/libavformat/dv.c
> +++ b/libavformat/dv.c
> @@ -34,12 +34,20 @@
> #include "libavcodec/dv_profile.h"
> #include "libavcodec/dv.h"
> #include "libavutil/channel_layout.h"
> +#include "libavutil/opt.h"
> #include "libavutil/intreadwrite.h"
> #include "libavutil/mathematics.h"
> #include "libavutil/timecode.h"
> #include "dv.h"
> #include "libavutil/avassert.h"
> 
> +#define AS_HASH_SIZE 16
> +
> +enum AudioConceal {
> +    AUDIO_CONCEAL_PASS = 1,
> +    AUDIO_CONCEAL_DROP = 0,
> +};
> +
> struct DVDemuxContext {
>     const AVDVProfile*  sys;    /* Current DV profile. E.g.: 525/60, 625/50 */
>     AVFormatContext*  fctx;
> @@ -50,6 +58,9 @@ struct DVDemuxContext {
>     int               ach;
>     int               frames;
>     uint64_t          abytes;
> +    uint8_t           as_pack[AS_HASH_SIZE][5];
> +    uint8_t           as_hash;
> +    int               dvaudio_concealment;
> };
> 
> static inline uint16_t dv_audio_12to16(uint16_t sample)
> @@ -72,7 +83,7 @@ static inline uint16_t dv_audio_12to16(uint16_t sample)
>     return result;
> }
> 
> -static const uint8_t *dv_extract_pack(const uint8_t *frame, enum dv_pack_type t)
> +static const uint8_t *dv_extract_pack(DVDemuxContext *d, const uint8_t *frame, enum dv_pack_type t)
> {
>     int offs;
>     int c;
> @@ -101,6 +112,17 @@ static const uint8_t *dv_extract_pack(const uint8_t *frame, enum dv_pack_type t)
>             break;
>     }
> 
> +    if (t == dv_audio_source || t == dv_audio_control) {
> +        int index = (d->as_hash>>1) & (AS_HASH_SIZE-1);
> +        if (frame[offs] == t) {
> +            memcpy(d->as_pack[index], &frame[offs], sizeof(d->as_pack[index]));
> +        } else if (d->as_pack[index][0] && d->dvaudio_concealment == AUDIO_CONCEAL_PASS) {
> +            return d->as_pack[index];
> +        } else if (d->as_pack[0][0] && d->dvaudio_concealment == AUDIO_CONCEAL_PASS) {
> +            return d->as_pack[0];
> +        }
> +    }
> +
>     return frame[offs] == t ? &frame[offs] : NULL;
> }
> 
> @@ -116,7 +138,7 @@ static const int dv_audio_frequency[3] = {
>  * 3. Audio is always returned as 16-bit linear samples: 12-bit nonlinear samples
>  *    are converted into 16-bit linear ones.
>  */
> -static int dv_extract_audio(const uint8_t *frame, uint8_t **ppcm,
> +static int dv_extract_audio(DVDemuxContext *c, const uint8_t *frame, uint8_t **ppcm,
>                             const AVDVProfile *sys)
> {
>     int size, chan, i, j, d, of, smpls, freq, quant, half_ch;
> @@ -124,7 +146,7 @@ static int dv_extract_audio(const uint8_t *frame, uint8_t **ppcm,
>     const uint8_t *as_pack;
>     uint8_t *pcm, ipcm;
> 
> -    as_pack = dv_extract_pack(frame, dv_audio_source);
> +    as_pack = dv_extract_pack(c, frame, dv_audio_source);
>     if (!as_pack)    /* No audio ? */
>         return 0;
> 
> @@ -137,6 +159,7 @@ static int dv_extract_audio(const uint8_t *frame, uint8_t **ppcm,
>
>     if (freq >= FF_ARRAY_ELEMS(dv_audio_frequency))
>         return AVERROR_INVALIDDATA;
> +    c->as_hash = 2*c->as_hash + smpls;
>
>     size    = (sys->audio_min_samples[freq] + smpls) * 4; /* 2ch, 2bytes */
>     half_ch = sys->difseg_size / 2;
> @@ -153,6 +176,9 @@ static int dv_extract_audio(const uint8_t *frame, uint8_t **ppcm,
>     /* for each DIF channel */
>     for (chan = 0; chan < sys->n_difchan; chan++) {
>         av_assert0(ipcm<4);
> +        c->audio_pkt[ipcm].flags &= ~AV_PKT_FLAG_CORRUPT;
> +        if (as_pack >= c->as_pack[0] && as_pack < c->as_pack[AS_HASH_SIZE])
> +            c->audio_pkt[ipcm].flags |= AV_PKT_FLAG_CORRUPT;
>         pcm = ppcm[ipcm++];
>         if (!pcm)
>             break;
> @@ -224,7 +250,7 @@ static int dv_extract_audio_info(DVDemuxContext *c, const uint8_t *frame)
>     const uint8_t *as_pack;
>     int freq, stype, smpls, quant, i, ach;
> 
> -    as_pack = dv_extract_pack(frame, dv_audio_source);
> +    as_pack = dv_extract_pack(c, frame, dv_audio_source);
>     if (!as_pack || !c->sys) {    /* No audio ? */
>         c->ach = 0;
>         return 0;
> @@ -292,7 +318,7 @@ static int dv_extract_video_info(DVDemuxContext *c, const uint8_t *frame)
>     c->vst->avg_frame_rate = av_inv_q(c->vst->time_base);
>
>     /* finding out SAR is a little bit messy */
> -    vsc_pack = dv_extract_pack(frame, dv_video_control);
> +    vsc_pack = dv_extract_pack(c, frame, dv_video_control);
>     apt      = frame[4] & 0x07;
>     is16_9   = (vsc_pack && ((vsc_pack[2] & 0x07) == 0x02 ||
>                              (!apt && (vsc_pack[2] & 0x07) == 0x07)));
> @@ -312,7 +338,7 @@ static int dv_extract_timecode(DVDemuxContext* c, const uint8_t* frame, char *tc
>     // is only relevant for NTSC systems.
>     int prevent_df = c->sys->ltc_divisor == 25 || c->sys->ltc_divisor == 50;
> 
> -    tc_pack = dv_extract_pack(frame, dv_timecode);
> +    tc_pack = dv_extract_pack(c, frame, dv_timecode);
>     if (!tc_pack)
>         return 0;
>     av_timecode_make_smpte_tc_string2(tc, av_inv_q(c->sys->time_base), AV_RB32(tc_pack + 1), prevent_df, 1);
> @@ -392,7 +418,7 @@ int avpriv_dv_produce_packet(DVDemuxContext *c, AVPacket *pkt,
>         ppcm[i] = c->audio_buf[i];
>     }
>     if (c->ach)
> -        dv_extract_audio(buf, ppcm, c->sys);
> +        dv_extract_audio(c, buf, ppcm, c->sys);
>
>     /* We work with 720p frames split in half, thus even frames have
>      * channels 0,1 and odd 2,3. */
> @@ -460,8 +486,10 @@ void ff_dv_offset_reset(DVDemuxContext *c, int64_t frame_offset)
>  ************************************************************/
> 
> typedef struct RawDVContext {
> +    const AVClass  *class;
>     DVDemuxContext  dv_demux;
>     uint8_t         buf[DV_MAX_FRAME_SIZE];
> +    int             dvaudio_concealment;
> } RawDVContext;
> 
> static int dv_read_timecode(AVFormatContext *s) {
> @@ -502,6 +530,7 @@ static int dv_read_header(AVFormatContext *s)
>
>     if ((ret = dv_init_demux(s, &c->dv_demux)) < 0)
>         return ret;
> +    c->dv_demux.dvaudio_concealment = c->dvaudio_concealment;
>
>     state = avio_rb32(s->pb);
>     while ((state & 0xffffff7f) != 0x1f07003f) {
> @@ -626,6 +655,22 @@ static int dv_probe(const AVProbeData *p)
>     return 0;
> }
> 
> +#define OFFSET(x) offsetof(RawDVContext, x)
> +#define DEC AV_OPT_FLAG_DECODING_PARAM
> +static const AVOption dv_options[] = {
> +    { "dvaudio_concealment", "", OFFSET(dvaudio_concealment), AV_OPT_TYPE_INT  , {.i64 = AUDIO_CONCEAL_DROP}, 0, INT_MAX, DEC, "dvaudio_concealment"},
> +    { "drop",                "", 0                          , AV_OPT_TYPE_CONST, {.i64 = AUDIO_CONCEAL_DROP}, 0, INT_MAX, DEC, "dvaudio_concealment"},
> +    { "pass",                "", 0                          , AV_OPT_TYPE_CONST, {.i64 = AUDIO_CONCEAL_PASS}, 0, INT_MAX, DEC, "dvaudio_concealment"},
> +    { NULL },
> +};
> +
> +static const AVClass dv_demuxer_class = {
> +    .class_name = "DV demuxer",
> +    .item_name  = av_default_item_name,
> +    .option     = dv_options,
> +    .version    = LIBAVUTIL_VERSION_INT,
> +};
> +
> AVInputFormat ff_dv_demuxer = {
>     .name           = "dv",
>     .long_name      = NULL_IF_CONFIG_SMALL("DV (Digital Video)"),
> @@ -635,4 +680,5 @@ AVInputFormat ff_dv_demuxer = {
>     .read_packet    = dv_read_packet,
>     .read_seek      = dv_read_seek,
>     .extensions     = "dv,dif",
> +    .priv_class     = &dv_demuxer_class,
> };
> -- 
> 2.17.1
>
> _______________________________________________
> ffmpeg-devel mailing list
> ffmpeg-devel@ffmpeg.org
> https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
>
> To unsubscribe, visit link above, or email
> ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".
Marton Balint Oct. 31, 2020, 4:46 p.m. UTC | #3
On Sat, 31 Oct 2020, Carl Eugen Hoyos wrote:

> Am Do., 29. Okt. 2020 um 22:59 Uhr schrieb Michael Niedermayer
> <michael@niedermayer.cc>:
>
>> +static const AVOption dv_options[] = {
>> +    { "dvaudio_concealment", "", OFFSET(dvaudio_concealment), AV_OPT_TYPE_INT  , {.i64 = AUDIO_CONCEAL_DROP}, 0, INT_MAX, DEC, "dvaudio_concealment"},
>> +    { "drop",                "", 0                          , AV_OPT_TYPE_CONST, {.i64 = AUDIO_CONCEAL_DROP}, 0, INT_MAX, DEC, "dvaudio_concealment"},
>> +    { "pass",                "", 0                          , AV_OPT_TYPE_CONST, {.i64 = AUDIO_CONCEAL_PASS}, 0, INT_MAX, DEC, "dvaudio_concealment"},
>
> Am I correct that the current lavc option "ec" only works for video?
>
> Shouldn't its documentation be extended so it also works for audio
> (to avoid adding a new option)?

This is demuxer not decoder, so ec cannot be used.

Regards,
Marton
diff mbox series

Patch

diff --git a/libavformat/dv.c b/libavformat/dv.c
index 3e0d12c0e3..3499cba6f8 100644
--- a/libavformat/dv.c
+++ b/libavformat/dv.c
@@ -34,12 +34,20 @@ 
 #include "libavcodec/dv_profile.h"
 #include "libavcodec/dv.h"
 #include "libavutil/channel_layout.h"
+#include "libavutil/opt.h"
 #include "libavutil/intreadwrite.h"
 #include "libavutil/mathematics.h"
 #include "libavutil/timecode.h"
 #include "dv.h"
 #include "libavutil/avassert.h"
 
+#define AS_HASH_SIZE 16
+
+enum AudioConceal {
+    AUDIO_CONCEAL_PASS = 1,
+    AUDIO_CONCEAL_DROP = 0,
+};
+
 struct DVDemuxContext {
     const AVDVProfile*  sys;    /* Current DV profile. E.g.: 525/60, 625/50 */
     AVFormatContext*  fctx;
@@ -50,6 +58,9 @@  struct DVDemuxContext {
     int               ach;
     int               frames;
     uint64_t          abytes;
+    uint8_t           as_pack[AS_HASH_SIZE][5];
+    uint8_t           as_hash;
+    int               dvaudio_concealment;
 };
 
 static inline uint16_t dv_audio_12to16(uint16_t sample)
@@ -72,7 +83,7 @@  static inline uint16_t dv_audio_12to16(uint16_t sample)
     return result;
 }
 
-static const uint8_t *dv_extract_pack(const uint8_t *frame, enum dv_pack_type t)
+static const uint8_t *dv_extract_pack(DVDemuxContext *d, const uint8_t *frame, enum dv_pack_type t)
 {
     int offs;
     int c;
@@ -101,6 +112,17 @@  static const uint8_t *dv_extract_pack(const uint8_t *frame, enum dv_pack_type t)
             break;
     }
 
+    if (t == dv_audio_source || t == dv_audio_control) {
+        int index = (d->as_hash>>1) & (AS_HASH_SIZE-1);
+        if (frame[offs] == t) {
+            memcpy(d->as_pack[index], &frame[offs], sizeof(d->as_pack[index]));
+        } else if (d->as_pack[index][0] && d->dvaudio_concealment == AUDIO_CONCEAL_PASS) {
+            return d->as_pack[index];
+        } else if (d->as_pack[0][0] && d->dvaudio_concealment == AUDIO_CONCEAL_PASS) {
+            return d->as_pack[0];
+        }
+    }
+
     return frame[offs] == t ? &frame[offs] : NULL;
 }
 
@@ -116,7 +138,7 @@  static const int dv_audio_frequency[3] = {
  * 3. Audio is always returned as 16-bit linear samples: 12-bit nonlinear samples
  *    are converted into 16-bit linear ones.
  */
-static int dv_extract_audio(const uint8_t *frame, uint8_t **ppcm,
+static int dv_extract_audio(DVDemuxContext *c, const uint8_t *frame, uint8_t **ppcm,
                             const AVDVProfile *sys)
 {
     int size, chan, i, j, d, of, smpls, freq, quant, half_ch;
@@ -124,7 +146,7 @@  static int dv_extract_audio(const uint8_t *frame, uint8_t **ppcm,
     const uint8_t *as_pack;
     uint8_t *pcm, ipcm;
 
-    as_pack = dv_extract_pack(frame, dv_audio_source);
+    as_pack = dv_extract_pack(c, frame, dv_audio_source);
     if (!as_pack)    /* No audio ? */
         return 0;
 
@@ -137,6 +159,7 @@  static int dv_extract_audio(const uint8_t *frame, uint8_t **ppcm,
 
     if (freq >= FF_ARRAY_ELEMS(dv_audio_frequency))
         return AVERROR_INVALIDDATA;
+    c->as_hash = 2*c->as_hash + smpls;
 
     size    = (sys->audio_min_samples[freq] + smpls) * 4; /* 2ch, 2bytes */
     half_ch = sys->difseg_size / 2;
@@ -153,6 +176,9 @@  static int dv_extract_audio(const uint8_t *frame, uint8_t **ppcm,
     /* for each DIF channel */
     for (chan = 0; chan < sys->n_difchan; chan++) {
         av_assert0(ipcm<4);
+        c->audio_pkt[ipcm].flags &= ~AV_PKT_FLAG_CORRUPT;
+        if (as_pack >= c->as_pack[0] && as_pack < c->as_pack[AS_HASH_SIZE])
+            c->audio_pkt[ipcm].flags |= AV_PKT_FLAG_CORRUPT;
         pcm = ppcm[ipcm++];
         if (!pcm)
             break;
@@ -224,7 +250,7 @@  static int dv_extract_audio_info(DVDemuxContext *c, const uint8_t *frame)
     const uint8_t *as_pack;
     int freq, stype, smpls, quant, i, ach;
 
-    as_pack = dv_extract_pack(frame, dv_audio_source);
+    as_pack = dv_extract_pack(c, frame, dv_audio_source);
     if (!as_pack || !c->sys) {    /* No audio ? */
         c->ach = 0;
         return 0;
@@ -292,7 +318,7 @@  static int dv_extract_video_info(DVDemuxContext *c, const uint8_t *frame)
     c->vst->avg_frame_rate = av_inv_q(c->vst->time_base);
 
     /* finding out SAR is a little bit messy */
-    vsc_pack = dv_extract_pack(frame, dv_video_control);
+    vsc_pack = dv_extract_pack(c, frame, dv_video_control);
     apt      = frame[4] & 0x07;
     is16_9   = (vsc_pack && ((vsc_pack[2] & 0x07) == 0x02 ||
                              (!apt && (vsc_pack[2] & 0x07) == 0x07)));
@@ -312,7 +338,7 @@  static int dv_extract_timecode(DVDemuxContext* c, const uint8_t* frame, char *tc
     // is only relevant for NTSC systems.
     int prevent_df = c->sys->ltc_divisor == 25 || c->sys->ltc_divisor == 50;
 
-    tc_pack = dv_extract_pack(frame, dv_timecode);
+    tc_pack = dv_extract_pack(c, frame, dv_timecode);
     if (!tc_pack)
         return 0;
     av_timecode_make_smpte_tc_string2(tc, av_inv_q(c->sys->time_base), AV_RB32(tc_pack + 1), prevent_df, 1);
@@ -392,7 +418,7 @@  int avpriv_dv_produce_packet(DVDemuxContext *c, AVPacket *pkt,
         ppcm[i] = c->audio_buf[i];
     }
     if (c->ach)
-        dv_extract_audio(buf, ppcm, c->sys);
+        dv_extract_audio(c, buf, ppcm, c->sys);
 
     /* We work with 720p frames split in half, thus even frames have
      * channels 0,1 and odd 2,3. */
@@ -460,8 +486,10 @@  void ff_dv_offset_reset(DVDemuxContext *c, int64_t frame_offset)
  ************************************************************/
 
 typedef struct RawDVContext {
+    const AVClass  *class;
     DVDemuxContext  dv_demux;
     uint8_t         buf[DV_MAX_FRAME_SIZE];
+    int             dvaudio_concealment;
 } RawDVContext;
 
 static int dv_read_timecode(AVFormatContext *s) {
@@ -502,6 +530,7 @@  static int dv_read_header(AVFormatContext *s)
 
     if ((ret = dv_init_demux(s, &c->dv_demux)) < 0)
         return ret;
+    c->dv_demux.dvaudio_concealment = c->dvaudio_concealment;
 
     state = avio_rb32(s->pb);
     while ((state & 0xffffff7f) != 0x1f07003f) {
@@ -626,6 +655,22 @@  static int dv_probe(const AVProbeData *p)
     return 0;
 }
 
+#define OFFSET(x) offsetof(RawDVContext, x)
+#define DEC AV_OPT_FLAG_DECODING_PARAM
+static const AVOption dv_options[] = {
+    { "dvaudio_concealment", "", OFFSET(dvaudio_concealment), AV_OPT_TYPE_INT  , {.i64 = AUDIO_CONCEAL_DROP}, 0, INT_MAX, DEC, "dvaudio_concealment"},
+    { "drop",                "", 0                          , AV_OPT_TYPE_CONST, {.i64 = AUDIO_CONCEAL_DROP}, 0, INT_MAX, DEC, "dvaudio_concealment"},
+    { "pass",                "", 0                          , AV_OPT_TYPE_CONST, {.i64 = AUDIO_CONCEAL_PASS}, 0, INT_MAX, DEC, "dvaudio_concealment"},
+    { NULL },
+};
+
+static const AVClass dv_demuxer_class = {
+    .class_name = "DV demuxer",
+    .item_name  = av_default_item_name,
+    .option     = dv_options,
+    .version    = LIBAVUTIL_VERSION_INT,
+};
+
 AVInputFormat ff_dv_demuxer = {
     .name           = "dv",
     .long_name      = NULL_IF_CONFIG_SMALL("DV (Digital Video)"),
@@ -635,4 +680,5 @@  AVInputFormat ff_dv_demuxer = {
     .read_packet    = dv_read_packet,
     .read_seek      = dv_read_seek,
     .extensions     = "dv,dif",
+    .priv_class     = &dv_demuxer_class,
 };