Message ID | 20201029215838.25699-1-michael@niedermayer.cc |
---|---|
State | New |
Headers | show |
Series | [FFmpeg-devel] avformat/dv: allow returning damaged audio | expand |
Context | Check | Description |
---|---|---|
andriy/x86_make | success | Make finished |
andriy/x86_make_fate | success | Make fate finished |
andriy/PPC64_make | success | Make finished |
andriy/PPC64_make_fate | success | Make fate finished |
Am Do., 29. Okt. 2020 um 22:59 Uhr schrieb Michael Niedermayer <michael@niedermayer.cc>: > +static const AVOption dv_options[] = { > + { "dvaudio_concealment", "", OFFSET(dvaudio_concealment), AV_OPT_TYPE_INT , {.i64 = AUDIO_CONCEAL_DROP}, 0, INT_MAX, DEC, "dvaudio_concealment"}, > + { "drop", "", 0 , AV_OPT_TYPE_CONST, {.i64 = AUDIO_CONCEAL_DROP}, 0, INT_MAX, DEC, "dvaudio_concealment"}, > + { "pass", "", 0 , AV_OPT_TYPE_CONST, {.i64 = AUDIO_CONCEAL_PASS}, 0, INT_MAX, DEC, "dvaudio_concealment"}, Am I correct that the current lavc option "ec" only works for video? Shouldn't its documentation be extended so it also works for audio (to avoid adding a new option)? Carl Eugen
On Thu, 29 Oct 2020, Michael Niedermayer wrote: > These potentially damaged packets are marked as corrupt. > The packet length is predicted based on packet length history, > allowing prediction of the common pattern used in NTSC. > > Fixes: Ticket8762 Sync loss is because of invalid timestamps, this is a possible workaround for the ticket ignoring the core problem. I will post a patch to fix the timestamps, it does not seem hard. Regards, Marton > Tested-by: Dave Rice <dave@dericed.com> > Signed-off-by: Michael Niedermayer <michael@niedermayer.cc> > --- > libavformat/dv.c | 60 ++++++++++++++++++++++++++++++++++++++++++------ > 1 file changed, 53 insertions(+), 7 deletions(-) > > diff --git a/libavformat/dv.c b/libavformat/dv.c > index 3e0d12c0e3..3499cba6f8 100644 > --- a/libavformat/dv.c > +++ b/libavformat/dv.c > @@ -34,12 +34,20 @@ > #include "libavcodec/dv_profile.h" > #include "libavcodec/dv.h" > #include "libavutil/channel_layout.h" > +#include "libavutil/opt.h" > #include "libavutil/intreadwrite.h" > #include "libavutil/mathematics.h" > #include "libavutil/timecode.h" > #include "dv.h" > #include "libavutil/avassert.h" > > +#define AS_HASH_SIZE 16 > + > +enum AudioConceal { > + AUDIO_CONCEAL_PASS = 1, > + AUDIO_CONCEAL_DROP = 0, > +}; > + > struct DVDemuxContext { > const AVDVProfile* sys; /* Current DV profile. E.g.: 525/60, 625/50 */ > AVFormatContext* fctx; > @@ -50,6 +58,9 @@ struct DVDemuxContext { > int ach; > int frames; > uint64_t abytes; > + uint8_t as_pack[AS_HASH_SIZE][5]; > + uint8_t as_hash; > + int dvaudio_concealment; > }; > > static inline uint16_t dv_audio_12to16(uint16_t sample) > @@ -72,7 +83,7 @@ static inline uint16_t dv_audio_12to16(uint16_t sample) > return result; > } > > -static const uint8_t *dv_extract_pack(const uint8_t *frame, enum dv_pack_type t) > +static const uint8_t *dv_extract_pack(DVDemuxContext *d, const uint8_t *frame, enum dv_pack_type t) > { > int offs; > int c; > @@ -101,6 +112,17 @@ static const uint8_t *dv_extract_pack(const uint8_t *frame, enum dv_pack_type t) > break; > } > > + if (t == dv_audio_source || t == dv_audio_control) { > + int index = (d->as_hash>>1) & (AS_HASH_SIZE-1); > + if (frame[offs] == t) { > + memcpy(d->as_pack[index], &frame[offs], sizeof(d->as_pack[index])); > + } else if (d->as_pack[index][0] && d->dvaudio_concealment == AUDIO_CONCEAL_PASS) { > + return d->as_pack[index]; > + } else if (d->as_pack[0][0] && d->dvaudio_concealment == AUDIO_CONCEAL_PASS) { > + return d->as_pack[0]; > + } > + } > + > return frame[offs] == t ? &frame[offs] : NULL; > } > > @@ -116,7 +138,7 @@ static const int dv_audio_frequency[3] = { > * 3. Audio is always returned as 16-bit linear samples: 12-bit nonlinear samples > * are converted into 16-bit linear ones. > */ > -static int dv_extract_audio(const uint8_t *frame, uint8_t **ppcm, > +static int dv_extract_audio(DVDemuxContext *c, const uint8_t *frame, uint8_t **ppcm, > const AVDVProfile *sys) > { > int size, chan, i, j, d, of, smpls, freq, quant, half_ch; > @@ -124,7 +146,7 @@ static int dv_extract_audio(const uint8_t *frame, uint8_t **ppcm, > const uint8_t *as_pack; > uint8_t *pcm, ipcm; > > - as_pack = dv_extract_pack(frame, dv_audio_source); > + as_pack = dv_extract_pack(c, frame, dv_audio_source); > if (!as_pack) /* No audio ? */ > return 0; > > @@ -137,6 +159,7 @@ static int dv_extract_audio(const uint8_t *frame, uint8_t **ppcm, > > if (freq >= FF_ARRAY_ELEMS(dv_audio_frequency)) > return AVERROR_INVALIDDATA; > + c->as_hash = 2*c->as_hash + smpls; > > size = (sys->audio_min_samples[freq] + smpls) * 4; /* 2ch, 2bytes */ > half_ch = sys->difseg_size / 2; > @@ -153,6 +176,9 @@ static int dv_extract_audio(const uint8_t *frame, uint8_t **ppcm, > /* for each DIF channel */ > for (chan = 0; chan < sys->n_difchan; chan++) { > av_assert0(ipcm<4); > + c->audio_pkt[ipcm].flags &= ~AV_PKT_FLAG_CORRUPT; > + if (as_pack >= c->as_pack[0] && as_pack < c->as_pack[AS_HASH_SIZE]) > + c->audio_pkt[ipcm].flags |= AV_PKT_FLAG_CORRUPT; > pcm = ppcm[ipcm++]; > if (!pcm) > break; > @@ -224,7 +250,7 @@ static int dv_extract_audio_info(DVDemuxContext *c, const uint8_t *frame) > const uint8_t *as_pack; > int freq, stype, smpls, quant, i, ach; > > - as_pack = dv_extract_pack(frame, dv_audio_source); > + as_pack = dv_extract_pack(c, frame, dv_audio_source); > if (!as_pack || !c->sys) { /* No audio ? */ > c->ach = 0; > return 0; > @@ -292,7 +318,7 @@ static int dv_extract_video_info(DVDemuxContext *c, const uint8_t *frame) > c->vst->avg_frame_rate = av_inv_q(c->vst->time_base); > > /* finding out SAR is a little bit messy */ > - vsc_pack = dv_extract_pack(frame, dv_video_control); > + vsc_pack = dv_extract_pack(c, frame, dv_video_control); > apt = frame[4] & 0x07; > is16_9 = (vsc_pack && ((vsc_pack[2] & 0x07) == 0x02 || > (!apt && (vsc_pack[2] & 0x07) == 0x07))); > @@ -312,7 +338,7 @@ static int dv_extract_timecode(DVDemuxContext* c, const uint8_t* frame, char *tc > // is only relevant for NTSC systems. > int prevent_df = c->sys->ltc_divisor == 25 || c->sys->ltc_divisor == 50; > > - tc_pack = dv_extract_pack(frame, dv_timecode); > + tc_pack = dv_extract_pack(c, frame, dv_timecode); > if (!tc_pack) > return 0; > av_timecode_make_smpte_tc_string2(tc, av_inv_q(c->sys->time_base), AV_RB32(tc_pack + 1), prevent_df, 1); > @@ -392,7 +418,7 @@ int avpriv_dv_produce_packet(DVDemuxContext *c, AVPacket *pkt, > ppcm[i] = c->audio_buf[i]; > } > if (c->ach) > - dv_extract_audio(buf, ppcm, c->sys); > + dv_extract_audio(c, buf, ppcm, c->sys); > > /* We work with 720p frames split in half, thus even frames have > * channels 0,1 and odd 2,3. */ > @@ -460,8 +486,10 @@ void ff_dv_offset_reset(DVDemuxContext *c, int64_t frame_offset) > ************************************************************/ > > typedef struct RawDVContext { > + const AVClass *class; > DVDemuxContext dv_demux; > uint8_t buf[DV_MAX_FRAME_SIZE]; > + int dvaudio_concealment; > } RawDVContext; > > static int dv_read_timecode(AVFormatContext *s) { > @@ -502,6 +530,7 @@ static int dv_read_header(AVFormatContext *s) > > if ((ret = dv_init_demux(s, &c->dv_demux)) < 0) > return ret; > + c->dv_demux.dvaudio_concealment = c->dvaudio_concealment; > > state = avio_rb32(s->pb); > while ((state & 0xffffff7f) != 0x1f07003f) { > @@ -626,6 +655,22 @@ static int dv_probe(const AVProbeData *p) > return 0; > } > > +#define OFFSET(x) offsetof(RawDVContext, x) > +#define DEC AV_OPT_FLAG_DECODING_PARAM > +static const AVOption dv_options[] = { > + { "dvaudio_concealment", "", OFFSET(dvaudio_concealment), AV_OPT_TYPE_INT , {.i64 = AUDIO_CONCEAL_DROP}, 0, INT_MAX, DEC, "dvaudio_concealment"}, > + { "drop", "", 0 , AV_OPT_TYPE_CONST, {.i64 = AUDIO_CONCEAL_DROP}, 0, INT_MAX, DEC, "dvaudio_concealment"}, > + { "pass", "", 0 , AV_OPT_TYPE_CONST, {.i64 = AUDIO_CONCEAL_PASS}, 0, INT_MAX, DEC, "dvaudio_concealment"}, > + { NULL }, > +}; > + > +static const AVClass dv_demuxer_class = { > + .class_name = "DV demuxer", > + .item_name = av_default_item_name, > + .option = dv_options, > + .version = LIBAVUTIL_VERSION_INT, > +}; > + > AVInputFormat ff_dv_demuxer = { > .name = "dv", > .long_name = NULL_IF_CONFIG_SMALL("DV (Digital Video)"), > @@ -635,4 +680,5 @@ AVInputFormat ff_dv_demuxer = { > .read_packet = dv_read_packet, > .read_seek = dv_read_seek, > .extensions = "dv,dif", > + .priv_class = &dv_demuxer_class, > }; > -- > 2.17.1 > > _______________________________________________ > ffmpeg-devel mailing list > ffmpeg-devel@ffmpeg.org > https://ffmpeg.org/mailman/listinfo/ffmpeg-devel > > To unsubscribe, visit link above, or email > ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".
On Sat, 31 Oct 2020, Carl Eugen Hoyos wrote: > Am Do., 29. Okt. 2020 um 22:59 Uhr schrieb Michael Niedermayer > <michael@niedermayer.cc>: > >> +static const AVOption dv_options[] = { >> + { "dvaudio_concealment", "", OFFSET(dvaudio_concealment), AV_OPT_TYPE_INT , {.i64 = AUDIO_CONCEAL_DROP}, 0, INT_MAX, DEC, "dvaudio_concealment"}, >> + { "drop", "", 0 , AV_OPT_TYPE_CONST, {.i64 = AUDIO_CONCEAL_DROP}, 0, INT_MAX, DEC, "dvaudio_concealment"}, >> + { "pass", "", 0 , AV_OPT_TYPE_CONST, {.i64 = AUDIO_CONCEAL_PASS}, 0, INT_MAX, DEC, "dvaudio_concealment"}, > > Am I correct that the current lavc option "ec" only works for video? > > Shouldn't its documentation be extended so it also works for audio > (to avoid adding a new option)? This is demuxer not decoder, so ec cannot be used. Regards, Marton
diff --git a/libavformat/dv.c b/libavformat/dv.c index 3e0d12c0e3..3499cba6f8 100644 --- a/libavformat/dv.c +++ b/libavformat/dv.c @@ -34,12 +34,20 @@ #include "libavcodec/dv_profile.h" #include "libavcodec/dv.h" #include "libavutil/channel_layout.h" +#include "libavutil/opt.h" #include "libavutil/intreadwrite.h" #include "libavutil/mathematics.h" #include "libavutil/timecode.h" #include "dv.h" #include "libavutil/avassert.h" +#define AS_HASH_SIZE 16 + +enum AudioConceal { + AUDIO_CONCEAL_PASS = 1, + AUDIO_CONCEAL_DROP = 0, +}; + struct DVDemuxContext { const AVDVProfile* sys; /* Current DV profile. E.g.: 525/60, 625/50 */ AVFormatContext* fctx; @@ -50,6 +58,9 @@ struct DVDemuxContext { int ach; int frames; uint64_t abytes; + uint8_t as_pack[AS_HASH_SIZE][5]; + uint8_t as_hash; + int dvaudio_concealment; }; static inline uint16_t dv_audio_12to16(uint16_t sample) @@ -72,7 +83,7 @@ static inline uint16_t dv_audio_12to16(uint16_t sample) return result; } -static const uint8_t *dv_extract_pack(const uint8_t *frame, enum dv_pack_type t) +static const uint8_t *dv_extract_pack(DVDemuxContext *d, const uint8_t *frame, enum dv_pack_type t) { int offs; int c; @@ -101,6 +112,17 @@ static const uint8_t *dv_extract_pack(const uint8_t *frame, enum dv_pack_type t) break; } + if (t == dv_audio_source || t == dv_audio_control) { + int index = (d->as_hash>>1) & (AS_HASH_SIZE-1); + if (frame[offs] == t) { + memcpy(d->as_pack[index], &frame[offs], sizeof(d->as_pack[index])); + } else if (d->as_pack[index][0] && d->dvaudio_concealment == AUDIO_CONCEAL_PASS) { + return d->as_pack[index]; + } else if (d->as_pack[0][0] && d->dvaudio_concealment == AUDIO_CONCEAL_PASS) { + return d->as_pack[0]; + } + } + return frame[offs] == t ? &frame[offs] : NULL; } @@ -116,7 +138,7 @@ static const int dv_audio_frequency[3] = { * 3. Audio is always returned as 16-bit linear samples: 12-bit nonlinear samples * are converted into 16-bit linear ones. */ -static int dv_extract_audio(const uint8_t *frame, uint8_t **ppcm, +static int dv_extract_audio(DVDemuxContext *c, const uint8_t *frame, uint8_t **ppcm, const AVDVProfile *sys) { int size, chan, i, j, d, of, smpls, freq, quant, half_ch; @@ -124,7 +146,7 @@ static int dv_extract_audio(const uint8_t *frame, uint8_t **ppcm, const uint8_t *as_pack; uint8_t *pcm, ipcm; - as_pack = dv_extract_pack(frame, dv_audio_source); + as_pack = dv_extract_pack(c, frame, dv_audio_source); if (!as_pack) /* No audio ? */ return 0; @@ -137,6 +159,7 @@ static int dv_extract_audio(const uint8_t *frame, uint8_t **ppcm, if (freq >= FF_ARRAY_ELEMS(dv_audio_frequency)) return AVERROR_INVALIDDATA; + c->as_hash = 2*c->as_hash + smpls; size = (sys->audio_min_samples[freq] + smpls) * 4; /* 2ch, 2bytes */ half_ch = sys->difseg_size / 2; @@ -153,6 +176,9 @@ static int dv_extract_audio(const uint8_t *frame, uint8_t **ppcm, /* for each DIF channel */ for (chan = 0; chan < sys->n_difchan; chan++) { av_assert0(ipcm<4); + c->audio_pkt[ipcm].flags &= ~AV_PKT_FLAG_CORRUPT; + if (as_pack >= c->as_pack[0] && as_pack < c->as_pack[AS_HASH_SIZE]) + c->audio_pkt[ipcm].flags |= AV_PKT_FLAG_CORRUPT; pcm = ppcm[ipcm++]; if (!pcm) break; @@ -224,7 +250,7 @@ static int dv_extract_audio_info(DVDemuxContext *c, const uint8_t *frame) const uint8_t *as_pack; int freq, stype, smpls, quant, i, ach; - as_pack = dv_extract_pack(frame, dv_audio_source); + as_pack = dv_extract_pack(c, frame, dv_audio_source); if (!as_pack || !c->sys) { /* No audio ? */ c->ach = 0; return 0; @@ -292,7 +318,7 @@ static int dv_extract_video_info(DVDemuxContext *c, const uint8_t *frame) c->vst->avg_frame_rate = av_inv_q(c->vst->time_base); /* finding out SAR is a little bit messy */ - vsc_pack = dv_extract_pack(frame, dv_video_control); + vsc_pack = dv_extract_pack(c, frame, dv_video_control); apt = frame[4] & 0x07; is16_9 = (vsc_pack && ((vsc_pack[2] & 0x07) == 0x02 || (!apt && (vsc_pack[2] & 0x07) == 0x07))); @@ -312,7 +338,7 @@ static int dv_extract_timecode(DVDemuxContext* c, const uint8_t* frame, char *tc // is only relevant for NTSC systems. int prevent_df = c->sys->ltc_divisor == 25 || c->sys->ltc_divisor == 50; - tc_pack = dv_extract_pack(frame, dv_timecode); + tc_pack = dv_extract_pack(c, frame, dv_timecode); if (!tc_pack) return 0; av_timecode_make_smpte_tc_string2(tc, av_inv_q(c->sys->time_base), AV_RB32(tc_pack + 1), prevent_df, 1); @@ -392,7 +418,7 @@ int avpriv_dv_produce_packet(DVDemuxContext *c, AVPacket *pkt, ppcm[i] = c->audio_buf[i]; } if (c->ach) - dv_extract_audio(buf, ppcm, c->sys); + dv_extract_audio(c, buf, ppcm, c->sys); /* We work with 720p frames split in half, thus even frames have * channels 0,1 and odd 2,3. */ @@ -460,8 +486,10 @@ void ff_dv_offset_reset(DVDemuxContext *c, int64_t frame_offset) ************************************************************/ typedef struct RawDVContext { + const AVClass *class; DVDemuxContext dv_demux; uint8_t buf[DV_MAX_FRAME_SIZE]; + int dvaudio_concealment; } RawDVContext; static int dv_read_timecode(AVFormatContext *s) { @@ -502,6 +530,7 @@ static int dv_read_header(AVFormatContext *s) if ((ret = dv_init_demux(s, &c->dv_demux)) < 0) return ret; + c->dv_demux.dvaudio_concealment = c->dvaudio_concealment; state = avio_rb32(s->pb); while ((state & 0xffffff7f) != 0x1f07003f) { @@ -626,6 +655,22 @@ static int dv_probe(const AVProbeData *p) return 0; } +#define OFFSET(x) offsetof(RawDVContext, x) +#define DEC AV_OPT_FLAG_DECODING_PARAM +static const AVOption dv_options[] = { + { "dvaudio_concealment", "", OFFSET(dvaudio_concealment), AV_OPT_TYPE_INT , {.i64 = AUDIO_CONCEAL_DROP}, 0, INT_MAX, DEC, "dvaudio_concealment"}, + { "drop", "", 0 , AV_OPT_TYPE_CONST, {.i64 = AUDIO_CONCEAL_DROP}, 0, INT_MAX, DEC, "dvaudio_concealment"}, + { "pass", "", 0 , AV_OPT_TYPE_CONST, {.i64 = AUDIO_CONCEAL_PASS}, 0, INT_MAX, DEC, "dvaudio_concealment"}, + { NULL }, +}; + +static const AVClass dv_demuxer_class = { + .class_name = "DV demuxer", + .item_name = av_default_item_name, + .option = dv_options, + .version = LIBAVUTIL_VERSION_INT, +}; + AVInputFormat ff_dv_demuxer = { .name = "dv", .long_name = NULL_IF_CONFIG_SMALL("DV (Digital Video)"), @@ -635,4 +680,5 @@ AVInputFormat ff_dv_demuxer = { .read_packet = dv_read_packet, .read_seek = dv_read_seek, .extensions = "dv,dif", + .priv_class = &dv_demuxer_class, };