diff mbox

[FFmpeg-devel] lavf/mov.c: Make audio timestamps strictly monotonically increasing inside an edit list. Fixes gapless decoding.

Message ID 1474406986-31418-1-git-send-email-isasi@google.com
State Superseded
Headers show

Commit Message

Sasi Inguva Sept. 20, 2016, 9:29 p.m. UTC
Signed-off-by: Sasi Inguva <isasi@google.com>
---
 libavcodec/utils.c                           | 15 +++---
 libavformat/mov.c                            | 81 ++++++++++++++++++++++++----
 tests/ref/fate/gaplessenc-itunes-to-ipod-aac |  2 +-
 tests/ref/fate/gaplessenc-pcm-to-mov-aac     |  2 +-
 4 files changed, 78 insertions(+), 22 deletions(-)

Comments

Sasi Inguva Sept. 22, 2016, 6:53 a.m. UTC | #1
friendly ping

On Tue, Sep 20, 2016 at 2:29 PM, Sasi Inguva <isasi@google.com> wrote:

> Signed-off-by: Sasi Inguva <isasi@google.com>
> ---
>  libavcodec/utils.c                           | 15 +++---
>  libavformat/mov.c                            | 81
> ++++++++++++++++++++++++----
>  tests/ref/fate/gaplessenc-itunes-to-ipod-aac |  2 +-
>  tests/ref/fate/gaplessenc-pcm-to-mov-aac     |  2 +-
>  4 files changed, 78 insertions(+), 22 deletions(-)
>
> diff --git a/libavcodec/utils.c b/libavcodec/utils.c
> index b0345b6..e18476c 100644
> --- a/libavcodec/utils.c
> +++ b/libavcodec/utils.c
> @@ -2320,7 +2320,6 @@ int attribute_align_arg avcodec_decode_audio4(AVCodecContext
> *avctx,
>          uint32_t discard_padding = 0;
>          uint8_t skip_reason = 0;
>          uint8_t discard_reason = 0;
> -        int demuxer_skip_samples = 0;
>          // copy to ensure we do not change avpkt
>          AVPacket tmp = *avpkt;
>          int did_split = av_packet_split_side_data(&tmp);
> @@ -2328,7 +2327,6 @@ int attribute_align_arg avcodec_decode_audio4(AVCodecContext
> *avctx,
>          if (ret < 0)
>              goto fail;
>
> -        demuxer_skip_samples = avctx->internal->skip_samples;
>          avctx->internal->pkt = &tmp;
>          if (HAVE_THREADS && avctx->active_thread_type & FF_THREAD_FRAME)
>              ret = ff_thread_decode_frame(avctx, frame, got_frame_ptr,
> &tmp);
> @@ -2353,13 +2351,6 @@ int attribute_align_arg avcodec_decode_audio4(AVCodecContext
> *avctx,
>                  frame->sample_rate = avctx->sample_rate;
>          }
>
> -
> -        if (frame->flags & AV_FRAME_FLAG_DISCARD) {
> -            // If using discard frame flag, ignore skip_samples set by
> the decoder.
> -            avctx->internal->skip_samples = demuxer_skip_samples;
> -            *got_frame_ptr = 0;
> -        }
> -
>          side= av_packet_get_side_data(avctx->internal->pkt,
> AV_PKT_DATA_SKIP_SAMPLES, &side_size);
>          if(side && side_size>=10) {
>              avctx->internal->skip_samples = AV_RL32(side);
> @@ -2369,6 +2360,12 @@ int attribute_align_arg avcodec_decode_audio4(AVCodecContext
> *avctx,
>              skip_reason = AV_RL8(side + 8);
>              discard_reason = AV_RL8(side + 9);
>          }
> +
> +        if ((frame->flags & AV_FRAME_FLAG_DISCARD) && *got_frame_ptr) {
> +            avctx->internal->skip_samples -= frame->nb_samples;
> +            *got_frame_ptr = 0;
> +        }
> +
>          if (avctx->internal->skip_samples > 0 && *got_frame_ptr &&
>              !(avctx->flags2 & AV_CODEC_FLAG2_SKIP_MANUAL)) {
>              if(frame->nb_samples <= avctx->internal->skip_samples){
> diff --git a/libavformat/mov.c b/libavformat/mov.c
> index b84d9c0..bb86780 100644
> --- a/libavformat/mov.c
> +++ b/libavformat/mov.c
> @@ -2856,6 +2856,21 @@ static int64_t add_index_entry(AVStream *st,
> int64_t pos, int64_t timestamp,
>  }
>
>  /**
> + * Rewrite timestamps of index entries in the range [end_index -
> frame_duration_buffer_size, end_index)
> + * by subtracting end_ts successively by the amounts given in
> frame_duration_buffer.
> + */
> +static void fix_index_entry_timestamps(AVStream* st, int end_index,
> int64_t end_ts,
> +                                       int64_t* frame_duration_buffer,
> +                                       int frame_duration_buffer_size) {
> +    int i = 0;
> +    av_assert0(end_index >= 0 && end_index <= st->nb_index_entries);
> +    for (i = 0; i < frame_duration_buffer_size; i++) {
> +        end_ts -= frame_duration_buffer[frame_duration_buffer_size - 1 -
> i];
> +        st->index_entries[end_index - 1 - i].timestamp = end_ts;
> +    }
> +}
> +
> +/**
>   * Append a new ctts entry to ctts_data.
>   * Returns the new ctts_count if successful, else returns -1.
>   */
> @@ -2919,7 +2934,10 @@ static void mov_fix_index(MOVContext *mov, AVStream
> *st)
>      int64_t edit_list_media_time_dts = 0;
>      int64_t edit_list_start_encountered = 0;
>      int64_t search_timestamp = 0;
> -
> +    int64_t* frame_duration_buffer = NULL;
> +    int num_discarded_begin = 0;
> +    int first_non_zero_audio_edit = -1;
> +    int packet_skip_samples = 0;
>
>      if (!msc->elst_data || msc->elst_count <= 0) {
>          return;
> @@ -2955,6 +2973,7 @@ static void mov_fix_index(MOVContext *mov, AVStream
> *st)
>          edit_list_index++;
>          edit_list_dts_counter = edit_list_dts_entry_end;
>          edit_list_dts_entry_end += edit_list_duration;
> +        num_discarded_begin = 0;
>          if (edit_list_media_time == -1) {
>              continue;
>          }
> @@ -2962,7 +2981,14 @@ static void mov_fix_index(MOVContext *mov, AVStream
> *st)
>          // If we encounter a non-negative edit list reset the
> skip_samples/start_pad fields and set them
>          // according to the edit list below.
>          if (st->codecpar->codec_type == AVMEDIA_TYPE_AUDIO) {
> -            st->skip_samples = msc->start_pad = 0;
> +            if (first_non_zero_audio_edit < 0) {
> +                first_non_zero_audio_edit = 1;
> +            } else {
> +                first_non_zero_audio_edit = 0;
> +            }
> +
> +            if (first_non_zero_audio_edit > 0)
> +                st->skip_samples = msc->start_pad = 0;
>          }
>
>          //find closest previous key frame
> @@ -3041,24 +3067,57 @@ static void mov_fix_index(MOVContext *mov,
> AVStream *st)
>              }
>
>              if (curr_cts < edit_list_media_time || curr_cts >=
> (edit_list_duration + edit_list_media_time)) {
> -                if (st->codecpar->codec_type == AVMEDIA_TYPE_AUDIO &&
> curr_cts < edit_list_media_time &&
> -                    curr_cts + frame_duration > edit_list_media_time &&
> -                    st->skip_samples == 0 && msc->start_pad == 0) {
> -                    st->skip_samples = msc->start_pad =
> edit_list_media_time - curr_cts;
> -
> -                    // Shift the index entry timestamp by skip_samples to
> be correct.
> -                    edit_list_dts_counter -= st->skip_samples;
> +                if (st->codecpar->codec_type == AVMEDIA_TYPE_AUDIO &&
> st->codecpar->codec_id != AV_CODEC_ID_VORBIS &&
> +                    curr_cts < edit_list_media_time && curr_cts +
> frame_duration > edit_list_media_time &&
> +                    first_non_zero_audio_edit > 0) {
> +                     packet_skip_samples = edit_list_media_time -
> curr_cts;
> +                     st->skip_samples += packet_skip_samples;
> +
> +                    // Shift the index entry timestamp by
> packet_skip_samples to be correct.
> +                    edit_list_dts_counter -= packet_skip_samples;
>                      if (edit_list_start_encountered == 0)  {
> -                      edit_list_start_encountered = 1;
> +                        edit_list_start_encountered = 1;
> +                        // Make timestamps strictly monotonically
> increasing for audio, by rewriting timestamps for
> +                        // discarded packets.
> +                        if (frame_duration_buffer) {
> +                          fix_index_entry_timestamps(st,
> st->nb_index_entries, edit_list_dts_counter,
> +
>  frame_duration_buffer, num_discarded_begin);
> +                          av_freep(&frame_duration_buffer);
> +                        }
>                      }
>
> -                    av_log(mov->fc, AV_LOG_DEBUG, "skip %d audio samples
> from curr_cts: %"PRId64"\n", st->skip_samples, curr_cts);
> +                    av_log(mov->fc, AV_LOG_DEBUG, "skip %d audio samples
> from curr_cts: %"PRId64"\n", packet_skip_samples, curr_cts);
>                  } else {
>                      flags |= AVINDEX_DISCARD_FRAME;
>                      av_log(mov->fc, AV_LOG_DEBUG, "drop a frame at
> curr_cts: %"PRId64" @ %"PRId64"\n", curr_cts, index);
> +
> +                    if (st->codecpar->codec_type == AVMEDIA_TYPE_AUDIO &&
> edit_list_start_encountered == 0) {
> +                        num_discarded_begin++;
> +                        frame_duration_buffer = av_realloc(frame_duration_
> buffer,
> +
>  num_discarded_begin * sizeof(int64_t));
> +                        if (!frame_duration_buffer) {
> +                            av_log(mov->fc, AV_LOG_ERROR, "Cannot
> reallocate frame duration buffer\n");
> +                            break;
> +                        }
> +                        frame_duration_buffer[num_discarded_begin - 1] =
> frame_duration;
> +
> +                        // Increment skip_samples for the first non-zero
> audio edit list
> +                        if (first_non_zero_audio_edit > 0 &&
> st->codecpar->codec_id != AV_CODEC_ID_VORBIS) {
> +                            st->skip_samples += frame_duration;
> +                            msc->start_pad = st->skip_samples;
> +                        }
> +                    }
>                  }
>              } else if (edit_list_start_encountered == 0) {
>                  edit_list_start_encountered = 1;
> +                // Make timestamps strictly monotonically increasing for
> audio, by rewriting timestamps for
> +                // discarded packets.
> +                if (st->codecpar->codec_type == AVMEDIA_TYPE_AUDIO &&
> frame_duration_buffer) {
> +                    fix_index_entry_timestamps(st, st->nb_index_entries,
> edit_list_dts_counter,
> +                                               frame_duration_buffer,
> num_discarded_begin);
> +                    av_freep(&frame_duration_buffer);
> +                }
> +
>              }
>
>              if (add_index_entry(st, current->pos, edit_list_dts_counter,
> current->size,
> diff --git a/tests/ref/fate/gaplessenc-itunes-to-ipod-aac
> b/tests/ref/fate/gaplessenc-itunes-to-ipod-aac
> index 043c085..789681f 100644
> --- a/tests/ref/fate/gaplessenc-itunes-to-ipod-aac
> +++ b/tests/ref/fate/gaplessenc-itunes-to-ipod-aac
> @@ -7,7 +7,7 @@ duration_ts=103326
>  start_time=0.000000
>  duration=2.367000
>  [/FORMAT]
> -packet|pts=0|dts=0|duration=N/A
> +packet|pts=-1024|dts=-1024|duration=1024
>  packet|pts=0|dts=0|duration=1024
>  packet|pts=1024|dts=1024|duration=1024
>  packet|pts=2048|dts=2048|duration=1024
> diff --git a/tests/ref/fate/gaplessenc-pcm-to-mov-aac
> b/tests/ref/fate/gaplessenc-pcm-to-mov-aac
> index 8b7e3f6..8702611 100644
> --- a/tests/ref/fate/gaplessenc-pcm-to-mov-aac
> +++ b/tests/ref/fate/gaplessenc-pcm-to-mov-aac
> @@ -7,7 +7,7 @@ duration_ts=529200
>  start_time=0.000000
>  duration=12.024000
>  [/FORMAT]
> -packet|pts=0|dts=0|duration=N/A
> +packet|pts=-1024|dts=-1024|duration=1024
>  packet|pts=0|dts=0|duration=1024
>  packet|pts=1024|dts=1024|duration=1024
>  packet|pts=2048|dts=2048|duration=1024
> --
> 2.8.0.rc3.226.g39d4020
>
>
wm4 Sept. 22, 2016, 12:49 p.m. UTC | #2
On Tue, 20 Sep 2016 14:29:46 -0700
Sasi Inguva <isasi-at-google.com@ffmpeg.org> wrote:

> Signed-off-by: Sasi Inguva <isasi@google.com>
> ---
>  libavcodec/utils.c                           | 15 +++---
>  libavformat/mov.c                            | 81 ++++++++++++++++++++++++----
>  tests/ref/fate/gaplessenc-itunes-to-ipod-aac |  2 +-
>  tests/ref/fate/gaplessenc-pcm-to-mov-aac     |  2 +-
>  4 files changed, 78 insertions(+), 22 deletions(-)
> 
> diff --git a/libavcodec/utils.c b/libavcodec/utils.c
> index b0345b6..e18476c 100644
> --- a/libavcodec/utils.c
> +++ b/libavcodec/utils.c
> @@ -2320,7 +2320,6 @@ int attribute_align_arg avcodec_decode_audio4(AVCodecContext *avctx,
>          uint32_t discard_padding = 0;
>          uint8_t skip_reason = 0;
>          uint8_t discard_reason = 0;
> -        int demuxer_skip_samples = 0;
>          // copy to ensure we do not change avpkt
>          AVPacket tmp = *avpkt;
>          int did_split = av_packet_split_side_data(&tmp);
> @@ -2328,7 +2327,6 @@ int attribute_align_arg avcodec_decode_audio4(AVCodecContext *avctx,
>          if (ret < 0)
>              goto fail;
>  
> -        demuxer_skip_samples = avctx->internal->skip_samples;
>          avctx->internal->pkt = &tmp;
>          if (HAVE_THREADS && avctx->active_thread_type & FF_THREAD_FRAME)
>              ret = ff_thread_decode_frame(avctx, frame, got_frame_ptr, &tmp);
> @@ -2353,13 +2351,6 @@ int attribute_align_arg avcodec_decode_audio4(AVCodecContext *avctx,
>                  frame->sample_rate = avctx->sample_rate;
>          }
>  
> -
> -        if (frame->flags & AV_FRAME_FLAG_DISCARD) {
> -            // If using discard frame flag, ignore skip_samples set by the decoder.
> -            avctx->internal->skip_samples = demuxer_skip_samples;
> -            *got_frame_ptr = 0;
> -        }
> -
>          side= av_packet_get_side_data(avctx->internal->pkt, AV_PKT_DATA_SKIP_SAMPLES, &side_size);
>          if(side && side_size>=10) {
>              avctx->internal->skip_samples = AV_RL32(side);
> @@ -2369,6 +2360,12 @@ int attribute_align_arg avcodec_decode_audio4(AVCodecContext *avctx,
>              skip_reason = AV_RL8(side + 8);
>              discard_reason = AV_RL8(side + 9);
>          }
> +
> +        if ((frame->flags & AV_FRAME_FLAG_DISCARD) && *got_frame_ptr) {
> +            avctx->internal->skip_samples -= frame->nb_samples;
> +            *got_frame_ptr = 0;
> +        }
> +
>          if (avctx->internal->skip_samples > 0 && *got_frame_ptr &&
>              !(avctx->flags2 & AV_CODEC_FLAG2_SKIP_MANUAL)) {
>              if(frame->nb_samples <= avctx->internal->skip_samples){
> diff --git a/libavformat/mov.c b/libavformat/mov.c
> index b84d9c0..bb86780 100644
> --- a/libavformat/mov.c
> +++ b/libavformat/mov.c
> @@ -2856,6 +2856,21 @@ static int64_t add_index_entry(AVStream *st, int64_t pos, int64_t timestamp,
>  }
>  
>  /**
> + * Rewrite timestamps of index entries in the range [end_index - frame_duration_buffer_size, end_index)
> + * by subtracting end_ts successively by the amounts given in frame_duration_buffer.
> + */
> +static void fix_index_entry_timestamps(AVStream* st, int end_index, int64_t end_ts,
> +                                       int64_t* frame_duration_buffer,
> +                                       int frame_duration_buffer_size) {
> +    int i = 0;
> +    av_assert0(end_index >= 0 && end_index <= st->nb_index_entries);
> +    for (i = 0; i < frame_duration_buffer_size; i++) {
> +        end_ts -= frame_duration_buffer[frame_duration_buffer_size - 1 - i];
> +        st->index_entries[end_index - 1 - i].timestamp = end_ts;
> +    }
> +}
> +
> +/**
>   * Append a new ctts entry to ctts_data.
>   * Returns the new ctts_count if successful, else returns -1.
>   */
> @@ -2919,7 +2934,10 @@ static void mov_fix_index(MOVContext *mov, AVStream *st)
>      int64_t edit_list_media_time_dts = 0;
>      int64_t edit_list_start_encountered = 0;
>      int64_t search_timestamp = 0;
> -
> +    int64_t* frame_duration_buffer = NULL;
> +    int num_discarded_begin = 0;
> +    int first_non_zero_audio_edit = -1;
> +    int packet_skip_samples = 0;
>  
>      if (!msc->elst_data || msc->elst_count <= 0) {
>          return;
> @@ -2955,6 +2973,7 @@ static void mov_fix_index(MOVContext *mov, AVStream *st)
>          edit_list_index++;
>          edit_list_dts_counter = edit_list_dts_entry_end;
>          edit_list_dts_entry_end += edit_list_duration;
> +        num_discarded_begin = 0;
>          if (edit_list_media_time == -1) {
>              continue;
>          }
> @@ -2962,7 +2981,14 @@ static void mov_fix_index(MOVContext *mov, AVStream *st)
>          // If we encounter a non-negative edit list reset the skip_samples/start_pad fields and set them
>          // according to the edit list below.
>          if (st->codecpar->codec_type == AVMEDIA_TYPE_AUDIO) {
> -            st->skip_samples = msc->start_pad = 0;
> +            if (first_non_zero_audio_edit < 0) {
> +                first_non_zero_audio_edit = 1;
> +            } else {
> +                first_non_zero_audio_edit = 0;
> +            }
> +
> +            if (first_non_zero_audio_edit > 0)
> +                st->skip_samples = msc->start_pad = 0;
>          }
>  
>          //find closest previous key frame
> @@ -3041,24 +3067,57 @@ static void mov_fix_index(MOVContext *mov, AVStream *st)
>              }
>  
>              if (curr_cts < edit_list_media_time || curr_cts >= (edit_list_duration + edit_list_media_time)) {
> -                if (st->codecpar->codec_type == AVMEDIA_TYPE_AUDIO && curr_cts < edit_list_media_time &&
> -                    curr_cts + frame_duration > edit_list_media_time &&
> -                    st->skip_samples == 0 && msc->start_pad == 0) {
> -                    st->skip_samples = msc->start_pad = edit_list_media_time - curr_cts;
> -
> -                    // Shift the index entry timestamp by skip_samples to be correct.
> -                    edit_list_dts_counter -= st->skip_samples;
> +                if (st->codecpar->codec_type == AVMEDIA_TYPE_AUDIO && st->codecpar->codec_id != AV_CODEC_ID_VORBIS &&
> +                    curr_cts < edit_list_media_time && curr_cts + frame_duration > edit_list_media_time &&
> +                    first_non_zero_audio_edit > 0) {
> +                     packet_skip_samples = edit_list_media_time - curr_cts;
> +                     st->skip_samples += packet_skip_samples;
> +
> +                    // Shift the index entry timestamp by packet_skip_samples to be correct.
> +                    edit_list_dts_counter -= packet_skip_samples;
>                      if (edit_list_start_encountered == 0)  {
> -                      edit_list_start_encountered = 1;
> +                        edit_list_start_encountered = 1;
> +                        // Make timestamps strictly monotonically increasing for audio, by rewriting timestamps for
> +                        // discarded packets.
> +                        if (frame_duration_buffer) {
> +                          fix_index_entry_timestamps(st, st->nb_index_entries, edit_list_dts_counter,
> +                                                     frame_duration_buffer, num_discarded_begin);
> +                          av_freep(&frame_duration_buffer);
> +                        }
>                      }
>  
> -                    av_log(mov->fc, AV_LOG_DEBUG, "skip %d audio samples from curr_cts: %"PRId64"\n", st->skip_samples, curr_cts);
> +                    av_log(mov->fc, AV_LOG_DEBUG, "skip %d audio samples from curr_cts: %"PRId64"\n", packet_skip_samples, curr_cts);
>                  } else {
>                      flags |= AVINDEX_DISCARD_FRAME;
>                      av_log(mov->fc, AV_LOG_DEBUG, "drop a frame at curr_cts: %"PRId64" @ %"PRId64"\n", curr_cts, index);
> +
> +                    if (st->codecpar->codec_type == AVMEDIA_TYPE_AUDIO && edit_list_start_encountered == 0) {
> +                        num_discarded_begin++;
> +                        frame_duration_buffer = av_realloc(frame_duration_buffer,
> +                                                           num_discarded_begin * sizeof(int64_t));
> +                        if (!frame_duration_buffer) {
> +                            av_log(mov->fc, AV_LOG_ERROR, "Cannot reallocate frame duration buffer\n");
> +                            break;
> +                        }
> +                        frame_duration_buffer[num_discarded_begin - 1] = frame_duration;
> +
> +                        // Increment skip_samples for the first non-zero audio edit list
> +                        if (first_non_zero_audio_edit > 0 && st->codecpar->codec_id != AV_CODEC_ID_VORBIS) {
> +                            st->skip_samples += frame_duration;
> +                            msc->start_pad = st->skip_samples;
> +                        }
> +                    }
>                  }
>              } else if (edit_list_start_encountered == 0) {
>                  edit_list_start_encountered = 1;
> +                // Make timestamps strictly monotonically increasing for audio, by rewriting timestamps for
> +                // discarded packets.
> +                if (st->codecpar->codec_type == AVMEDIA_TYPE_AUDIO && frame_duration_buffer) {
> +                    fix_index_entry_timestamps(st, st->nb_index_entries, edit_list_dts_counter,
> +                                               frame_duration_buffer, num_discarded_begin);
> +                    av_freep(&frame_duration_buffer);
> +                }
> +
>              }
>  
>              if (add_index_entry(st, current->pos, edit_list_dts_counter, current->size,
> diff --git a/tests/ref/fate/gaplessenc-itunes-to-ipod-aac b/tests/ref/fate/gaplessenc-itunes-to-ipod-aac
> index 043c085..789681f 100644
> --- a/tests/ref/fate/gaplessenc-itunes-to-ipod-aac
> +++ b/tests/ref/fate/gaplessenc-itunes-to-ipod-aac
> @@ -7,7 +7,7 @@ duration_ts=103326
>  start_time=0.000000
>  duration=2.367000
>  [/FORMAT]
> -packet|pts=0|dts=0|duration=N/A
> +packet|pts=-1024|dts=-1024|duration=1024
>  packet|pts=0|dts=0|duration=1024
>  packet|pts=1024|dts=1024|duration=1024
>  packet|pts=2048|dts=2048|duration=1024
> diff --git a/tests/ref/fate/gaplessenc-pcm-to-mov-aac b/tests/ref/fate/gaplessenc-pcm-to-mov-aac
> index 8b7e3f6..8702611 100644
> --- a/tests/ref/fate/gaplessenc-pcm-to-mov-aac
> +++ b/tests/ref/fate/gaplessenc-pcm-to-mov-aac
> @@ -7,7 +7,7 @@ duration_ts=529200
>  start_time=0.000000
>  duration=12.024000
>  [/FORMAT]
> -packet|pts=0|dts=0|duration=N/A
> +packet|pts=-1024|dts=-1024|duration=1024
>  packet|pts=0|dts=0|duration=1024
>  packet|pts=1024|dts=1024|duration=1024
>  packet|pts=2048|dts=2048|duration=1024

This is a complex patch, and builds upon new code that isn't quite
known to me, the result looks like an improvement to me.

Does it work with the "skip_manual" libavcodec option?

I also think the fate tests should be updated to include the packet
flags, since they are just as important as the timestamps.
Sasi Inguva Sept. 22, 2016, 6:38 p.m. UTC | #3
On Thu, Sep 22, 2016 at 5:49 AM, wm4 <nfxjfg@googlemail.com> wrote:

> On Tue, 20 Sep 2016 14:29:46 -0700
> Sasi Inguva <isasi-at-google.com@ffmpeg.org> wrote:
>
> > Signed-off-by: Sasi Inguva <isasi@google.com>
> > ---
> >  libavcodec/utils.c                           | 15 +++---
> >  libavformat/mov.c                            | 81
> ++++++++++++++++++++++++----
> >  tests/ref/fate/gaplessenc-itunes-to-ipod-aac |  2 +-
> >  tests/ref/fate/gaplessenc-pcm-to-mov-aac     |  2 +-
> >  4 files changed, 78 insertions(+), 22 deletions(-)
> >
> > diff --git a/libavcodec/utils.c b/libavcodec/utils.c
> > index b0345b6..e18476c 100644
> > --- a/libavcodec/utils.c
> > +++ b/libavcodec/utils.c
> > @@ -2320,7 +2320,6 @@ int attribute_align_arg avcodec_decode_audio4(AVCodecContext
> *avctx,
> >          uint32_t discard_padding = 0;
> >          uint8_t skip_reason = 0;
> >          uint8_t discard_reason = 0;
> > -        int demuxer_skip_samples = 0;
> >          // copy to ensure we do not change avpkt
> >          AVPacket tmp = *avpkt;
> >          int did_split = av_packet_split_side_data(&tmp);
> > @@ -2328,7 +2327,6 @@ int attribute_align_arg avcodec_decode_audio4(AVCodecContext
> *avctx,
> >          if (ret < 0)
> >              goto fail;
> >
> > -        demuxer_skip_samples = avctx->internal->skip_samples;
> >          avctx->internal->pkt = &tmp;
> >          if (HAVE_THREADS && avctx->active_thread_type & FF_THREAD_FRAME)
> >              ret = ff_thread_decode_frame(avctx, frame, got_frame_ptr,
> &tmp);
> > @@ -2353,13 +2351,6 @@ int attribute_align_arg avcodec_decode_audio4(AVCodecContext
> *avctx,
> >                  frame->sample_rate = avctx->sample_rate;
> >          }
> >
> > -
> > -        if (frame->flags & AV_FRAME_FLAG_DISCARD) {
> > -            // If using discard frame flag, ignore skip_samples set by
> the decoder.
> > -            avctx->internal->skip_samples = demuxer_skip_samples;
> > -            *got_frame_ptr = 0;
> > -        }
> > -
> >          side= av_packet_get_side_data(avctx->internal->pkt,
> AV_PKT_DATA_SKIP_SAMPLES, &side_size);
> >          if(side && side_size>=10) {
> >              avctx->internal->skip_samples = AV_RL32(side);
> > @@ -2369,6 +2360,12 @@ int attribute_align_arg avcodec_decode_audio4(AVCodecContext
> *avctx,
> >              skip_reason = AV_RL8(side + 8);
> >              discard_reason = AV_RL8(side + 9);
> >          }
> > +
> > +        if ((frame->flags & AV_FRAME_FLAG_DISCARD) && *got_frame_ptr) {
> > +            avctx->internal->skip_samples -= frame->nb_samples;
> > +            *got_frame_ptr = 0;
> > +        }
> > +
> >          if (avctx->internal->skip_samples > 0 && *got_frame_ptr &&
> >              !(avctx->flags2 & AV_CODEC_FLAG2_SKIP_MANUAL)) {
> >              if(frame->nb_samples <= avctx->internal->skip_samples){
> > diff --git a/libavformat/mov.c b/libavformat/mov.c
> > index b84d9c0..bb86780 100644
> > --- a/libavformat/mov.c
> > +++ b/libavformat/mov.c
> > @@ -2856,6 +2856,21 @@ static int64_t add_index_entry(AVStream *st,
> int64_t pos, int64_t timestamp,
> >  }
> >
> >  /**
> > + * Rewrite timestamps of index entries in the range [end_index -
> frame_duration_buffer_size, end_index)
> > + * by subtracting end_ts successively by the amounts given in
> frame_duration_buffer.
> > + */
> > +static void fix_index_entry_timestamps(AVStream* st, int end_index,
> int64_t end_ts,
> > +                                       int64_t* frame_duration_buffer,
> > +                                       int frame_duration_buffer_size) {
> > +    int i = 0;
> > +    av_assert0(end_index >= 0 && end_index <= st->nb_index_entries);
> > +    for (i = 0; i < frame_duration_buffer_size; i++) {
> > +        end_ts -= frame_duration_buffer[frame_duration_buffer_size - 1
> - i];
> > +        st->index_entries[end_index - 1 - i].timestamp = end_ts;
> > +    }
> > +}
> > +
> > +/**
> >   * Append a new ctts entry to ctts_data.
> >   * Returns the new ctts_count if successful, else returns -1.
> >   */
> > @@ -2919,7 +2934,10 @@ static void mov_fix_index(MOVContext *mov,
> AVStream *st)
> >      int64_t edit_list_media_time_dts = 0;
> >      int64_t edit_list_start_encountered = 0;
> >      int64_t search_timestamp = 0;
> > -
> > +    int64_t* frame_duration_buffer = NULL;
> > +    int num_discarded_begin = 0;
> > +    int first_non_zero_audio_edit = -1;
> > +    int packet_skip_samples = 0;
> >
> >      if (!msc->elst_data || msc->elst_count <= 0) {
> >          return;
> > @@ -2955,6 +2973,7 @@ static void mov_fix_index(MOVContext *mov,
> AVStream *st)
> >          edit_list_index++;
> >          edit_list_dts_counter = edit_list_dts_entry_end;
> >          edit_list_dts_entry_end += edit_list_duration;
> > +        num_discarded_begin = 0;
> >          if (edit_list_media_time == -1) {
> >              continue;
> >          }
> > @@ -2962,7 +2981,14 @@ static void mov_fix_index(MOVContext *mov,
> AVStream *st)
> >          // If we encounter a non-negative edit list reset the
> skip_samples/start_pad fields and set them
> >          // according to the edit list below.
> >          if (st->codecpar->codec_type == AVMEDIA_TYPE_AUDIO) {
> > -            st->skip_samples = msc->start_pad = 0;
> > +            if (first_non_zero_audio_edit < 0) {
> > +                first_non_zero_audio_edit = 1;
> > +            } else {
> > +                first_non_zero_audio_edit = 0;
> > +            }
> > +
> > +            if (first_non_zero_audio_edit > 0)
> > +                st->skip_samples = msc->start_pad = 0;
> >          }
> >
> >          //find closest previous key frame
> > @@ -3041,24 +3067,57 @@ static void mov_fix_index(MOVContext *mov,
> AVStream *st)
> >              }
> >
> >              if (curr_cts < edit_list_media_time || curr_cts >=
> (edit_list_duration + edit_list_media_time)) {
> > -                if (st->codecpar->codec_type == AVMEDIA_TYPE_AUDIO &&
> curr_cts < edit_list_media_time &&
> > -                    curr_cts + frame_duration > edit_list_media_time &&
> > -                    st->skip_samples == 0 && msc->start_pad == 0) {
> > -                    st->skip_samples = msc->start_pad =
> edit_list_media_time - curr_cts;
> > -
> > -                    // Shift the index entry timestamp by skip_samples
> to be correct.
> > -                    edit_list_dts_counter -= st->skip_samples;
> > +                if (st->codecpar->codec_type == AVMEDIA_TYPE_AUDIO &&
> st->codecpar->codec_id != AV_CODEC_ID_VORBIS &&
> > +                    curr_cts < edit_list_media_time && curr_cts +
> frame_duration > edit_list_media_time &&
> > +                    first_non_zero_audio_edit > 0) {
> > +                     packet_skip_samples = edit_list_media_time -
> curr_cts;
> > +                     st->skip_samples += packet_skip_samples;
> > +
> > +                    // Shift the index entry timestamp by
> packet_skip_samples to be correct.
> > +                    edit_list_dts_counter -= packet_skip_samples;
> >                      if (edit_list_start_encountered == 0)  {
> > -                      edit_list_start_encountered = 1;
> > +                        edit_list_start_encountered = 1;
> > +                        // Make timestamps strictly monotonically
> increasing for audio, by rewriting timestamps for
> > +                        // discarded packets.
> > +                        if (frame_duration_buffer) {
> > +                          fix_index_entry_timestamps(st,
> st->nb_index_entries, edit_list_dts_counter,
> > +
>  frame_duration_buffer, num_discarded_begin);
> > +                          av_freep(&frame_duration_buffer);
> > +                        }
> >                      }
> >
> > -                    av_log(mov->fc, AV_LOG_DEBUG, "skip %d audio
> samples from curr_cts: %"PRId64"\n", st->skip_samples, curr_cts);
> > +                    av_log(mov->fc, AV_LOG_DEBUG, "skip %d audio
> samples from curr_cts: %"PRId64"\n", packet_skip_samples, curr_cts);
> >                  } else {
> >                      flags |= AVINDEX_DISCARD_FRAME;
> >                      av_log(mov->fc, AV_LOG_DEBUG, "drop a frame at
> curr_cts: %"PRId64" @ %"PRId64"\n", curr_cts, index);
> > +
> > +                    if (st->codecpar->codec_type == AVMEDIA_TYPE_AUDIO
> && edit_list_start_encountered == 0) {
> > +                        num_discarded_begin++;
> > +                        frame_duration_buffer =
> av_realloc(frame_duration_buffer,
> > +
>  num_discarded_begin * sizeof(int64_t));
> > +                        if (!frame_duration_buffer) {
> > +                            av_log(mov->fc, AV_LOG_ERROR, "Cannot
> reallocate frame duration buffer\n");
> > +                            break;
> > +                        }
> > +                        frame_duration_buffer[num_discarded_begin - 1]
> = frame_duration;
> > +
> > +                        // Increment skip_samples for the first
> non-zero audio edit list
> > +                        if (first_non_zero_audio_edit > 0 &&
> st->codecpar->codec_id != AV_CODEC_ID_VORBIS) {
> > +                            st->skip_samples += frame_duration;
> > +                            msc->start_pad = st->skip_samples;
> > +                        }
> > +                    }
> >                  }
> >              } else if (edit_list_start_encountered == 0) {
> >                  edit_list_start_encountered = 1;
> > +                // Make timestamps strictly monotonically increasing
> for audio, by rewriting timestamps for
> > +                // discarded packets.
> > +                if (st->codecpar->codec_type == AVMEDIA_TYPE_AUDIO &&
> frame_duration_buffer) {
> > +                    fix_index_entry_timestamps(st,
> st->nb_index_entries, edit_list_dts_counter,
> > +                                               frame_duration_buffer,
> num_discarded_begin);
> > +                    av_freep(&frame_duration_buffer);
> > +                }
> > +
> >              }
> >
> >              if (add_index_entry(st, current->pos,
> edit_list_dts_counter, current->size,
> > diff --git a/tests/ref/fate/gaplessenc-itunes-to-ipod-aac
> b/tests/ref/fate/gaplessenc-itunes-to-ipod-aac
> > index 043c085..789681f 100644
> > --- a/tests/ref/fate/gaplessenc-itunes-to-ipod-aac
> > +++ b/tests/ref/fate/gaplessenc-itunes-to-ipod-aac
> > @@ -7,7 +7,7 @@ duration_ts=103326
> >  start_time=0.000000
> >  duration=2.367000
> >  [/FORMAT]
> > -packet|pts=0|dts=0|duration=N/A
> > +packet|pts=-1024|dts=-1024|duration=1024
> >  packet|pts=0|dts=0|duration=1024
> >  packet|pts=1024|dts=1024|duration=1024
> >  packet|pts=2048|dts=2048|duration=1024
> > diff --git a/tests/ref/fate/gaplessenc-pcm-to-mov-aac
> b/tests/ref/fate/gaplessenc-pcm-to-mov-aac
> > index 8b7e3f6..8702611 100644
> > --- a/tests/ref/fate/gaplessenc-pcm-to-mov-aac
> > +++ b/tests/ref/fate/gaplessenc-pcm-to-mov-aac
> > @@ -7,7 +7,7 @@ duration_ts=529200
> >  start_time=0.000000
> >  duration=12.024000
> >  [/FORMAT]
> > -packet|pts=0|dts=0|duration=N/A
> > +packet|pts=-1024|dts=-1024|duration=1024
> >  packet|pts=0|dts=0|duration=1024
> >  packet|pts=1024|dts=1024|duration=1024
> >  packet|pts=2048|dts=2048|duration=1024
>
> This is a complex patch, and builds upon new code that isn't quite
> known to me, the result looks like an improvement to me.
>
> Does it work with the "skip_manual" libavcodec option?
>
> Nice catch. sent the patch again  to make it work with skip_manual

I also think the fate tests should be updated to include the packet
> flags, since they are just as important as the timestamps.
>

I had another patch making ffprobe show the DISCARD flag. If / once that
patch is applied it will be easier to modify the fate test for gapless to
show the discard flag.


> _______________________________________________
> ffmpeg-devel mailing list
> ffmpeg-devel@ffmpeg.org
> http://ffmpeg.org/mailman/listinfo/ffmpeg-devel
>
Sasi Inguva Sept. 24, 2016, 1:19 a.m. UTC | #4
I have updated the patch with flag values for fate tests.

On Thu, Sep 22, 2016 at 11:38 AM, Sasi Inguva <isasi@google.com> wrote:

>
> On Thu, Sep 22, 2016 at 5:49 AM, wm4 <nfxjfg@googlemail.com> wrote:
>
>> On Tue, 20 Sep 2016 14:29:46 -0700
>> Sasi Inguva <isasi-at-google.com@ffmpeg.org> wrote:
>>
>> > Signed-off-by: Sasi Inguva <isasi@google.com>
>> > ---
>> >  libavcodec/utils.c                           | 15 +++---
>> >  libavformat/mov.c                            | 81
>> ++++++++++++++++++++++++----
>> >  tests/ref/fate/gaplessenc-itunes-to-ipod-aac |  2 +-
>> >  tests/ref/fate/gaplessenc-pcm-to-mov-aac     |  2 +-
>> >  4 files changed, 78 insertions(+), 22 deletions(-)
>> >
>> > diff --git a/libavcodec/utils.c b/libavcodec/utils.c
>> > index b0345b6..e18476c 100644
>> > --- a/libavcodec/utils.c
>> > +++ b/libavcodec/utils.c
>> > @@ -2320,7 +2320,6 @@ int attribute_align_arg
>> avcodec_decode_audio4(AVCodecContext *avctx,
>> >          uint32_t discard_padding = 0;
>> >          uint8_t skip_reason = 0;
>> >          uint8_t discard_reason = 0;
>> > -        int demuxer_skip_samples = 0;
>> >          // copy to ensure we do not change avpkt
>> >          AVPacket tmp = *avpkt;
>> >          int did_split = av_packet_split_side_data(&tmp);
>> > @@ -2328,7 +2327,6 @@ int attribute_align_arg
>> avcodec_decode_audio4(AVCodecContext *avctx,
>> >          if (ret < 0)
>> >              goto fail;
>> >
>> > -        demuxer_skip_samples = avctx->internal->skip_samples;
>> >          avctx->internal->pkt = &tmp;
>> >          if (HAVE_THREADS && avctx->active_thread_type &
>> FF_THREAD_FRAME)
>> >              ret = ff_thread_decode_frame(avctx, frame, got_frame_ptr,
>> &tmp);
>> > @@ -2353,13 +2351,6 @@ int attribute_align_arg
>> avcodec_decode_audio4(AVCodecContext *avctx,
>> >                  frame->sample_rate = avctx->sample_rate;
>> >          }
>> >
>> > -
>> > -        if (frame->flags & AV_FRAME_FLAG_DISCARD) {
>> > -            // If using discard frame flag, ignore skip_samples set by
>> the decoder.
>> > -            avctx->internal->skip_samples = demuxer_skip_samples;
>> > -            *got_frame_ptr = 0;
>> > -        }
>> > -
>> >          side= av_packet_get_side_data(avctx->internal->pkt,
>> AV_PKT_DATA_SKIP_SAMPLES, &side_size);
>> >          if(side && side_size>=10) {
>> >              avctx->internal->skip_samples = AV_RL32(side);
>> > @@ -2369,6 +2360,12 @@ int attribute_align_arg
>> avcodec_decode_audio4(AVCodecContext *avctx,
>> >              skip_reason = AV_RL8(side + 8);
>> >              discard_reason = AV_RL8(side + 9);
>> >          }
>> > +
>> > +        if ((frame->flags & AV_FRAME_FLAG_DISCARD) && *got_frame_ptr) {
>> > +            avctx->internal->skip_samples -= frame->nb_samples;
>> > +            *got_frame_ptr = 0;
>> > +        }
>> > +
>> >          if (avctx->internal->skip_samples > 0 && *got_frame_ptr &&
>> >              !(avctx->flags2 & AV_CODEC_FLAG2_SKIP_MANUAL)) {
>> >              if(frame->nb_samples <= avctx->internal->skip_samples){
>> > diff --git a/libavformat/mov.c b/libavformat/mov.c
>> > index b84d9c0..bb86780 100644
>> > --- a/libavformat/mov.c
>> > +++ b/libavformat/mov.c
>> > @@ -2856,6 +2856,21 @@ static int64_t add_index_entry(AVStream *st,
>> int64_t pos, int64_t timestamp,
>> >  }
>> >
>> >  /**
>> > + * Rewrite timestamps of index entries in the range [end_index -
>> frame_duration_buffer_size, end_index)
>> > + * by subtracting end_ts successively by the amounts given in
>> frame_duration_buffer.
>> > + */
>> > +static void fix_index_entry_timestamps(AVStream* st, int end_index,
>> int64_t end_ts,
>> > +                                       int64_t* frame_duration_buffer,
>> > +                                       int frame_duration_buffer_size)
>> {
>> > +    int i = 0;
>> > +    av_assert0(end_index >= 0 && end_index <= st->nb_index_entries);
>> > +    for (i = 0; i < frame_duration_buffer_size; i++) {
>> > +        end_ts -= frame_duration_buffer[frame_duration_buffer_size -
>> 1 - i];
>> > +        st->index_entries[end_index - 1 - i].timestamp = end_ts;
>> > +    }
>> > +}
>> > +
>> > +/**
>> >   * Append a new ctts entry to ctts_data.
>> >   * Returns the new ctts_count if successful, else returns -1.
>> >   */
>> > @@ -2919,7 +2934,10 @@ static void mov_fix_index(MOVContext *mov,
>> AVStream *st)
>> >      int64_t edit_list_media_time_dts = 0;
>> >      int64_t edit_list_start_encountered = 0;
>> >      int64_t search_timestamp = 0;
>> > -
>> > +    int64_t* frame_duration_buffer = NULL;
>> > +    int num_discarded_begin = 0;
>> > +    int first_non_zero_audio_edit = -1;
>> > +    int packet_skip_samples = 0;
>> >
>> >      if (!msc->elst_data || msc->elst_count <= 0) {
>> >          return;
>> > @@ -2955,6 +2973,7 @@ static void mov_fix_index(MOVContext *mov,
>> AVStream *st)
>> >          edit_list_index++;
>> >          edit_list_dts_counter = edit_list_dts_entry_end;
>> >          edit_list_dts_entry_end += edit_list_duration;
>> > +        num_discarded_begin = 0;
>> >          if (edit_list_media_time == -1) {
>> >              continue;
>> >          }
>> > @@ -2962,7 +2981,14 @@ static void mov_fix_index(MOVContext *mov,
>> AVStream *st)
>> >          // If we encounter a non-negative edit list reset the
>> skip_samples/start_pad fields and set them
>> >          // according to the edit list below.
>> >          if (st->codecpar->codec_type == AVMEDIA_TYPE_AUDIO) {
>> > -            st->skip_samples = msc->start_pad = 0;
>> > +            if (first_non_zero_audio_edit < 0) {
>> > +                first_non_zero_audio_edit = 1;
>> > +            } else {
>> > +                first_non_zero_audio_edit = 0;
>> > +            }
>> > +
>> > +            if (first_non_zero_audio_edit > 0)
>> > +                st->skip_samples = msc->start_pad = 0;
>> >          }
>> >
>> >          //find closest previous key frame
>> > @@ -3041,24 +3067,57 @@ static void mov_fix_index(MOVContext *mov,
>> AVStream *st)
>> >              }
>> >
>> >              if (curr_cts < edit_list_media_time || curr_cts >=
>> (edit_list_duration + edit_list_media_time)) {
>> > -                if (st->codecpar->codec_type == AVMEDIA_TYPE_AUDIO &&
>> curr_cts < edit_list_media_time &&
>> > -                    curr_cts + frame_duration > edit_list_media_time &&
>> > -                    st->skip_samples == 0 && msc->start_pad == 0) {
>> > -                    st->skip_samples = msc->start_pad =
>> edit_list_media_time - curr_cts;
>> > -
>> > -                    // Shift the index entry timestamp by skip_samples
>> to be correct.
>> > -                    edit_list_dts_counter -= st->skip_samples;
>> > +                if (st->codecpar->codec_type == AVMEDIA_TYPE_AUDIO &&
>> st->codecpar->codec_id != AV_CODEC_ID_VORBIS &&
>> > +                    curr_cts < edit_list_media_time && curr_cts +
>> frame_duration > edit_list_media_time &&
>> > +                    first_non_zero_audio_edit > 0) {
>> > +                     packet_skip_samples = edit_list_media_time -
>> curr_cts;
>> > +                     st->skip_samples += packet_skip_samples;
>> > +
>> > +                    // Shift the index entry timestamp by
>> packet_skip_samples to be correct.
>> > +                    edit_list_dts_counter -= packet_skip_samples;
>> >                      if (edit_list_start_encountered == 0)  {
>> > -                      edit_list_start_encountered = 1;
>> > +                        edit_list_start_encountered = 1;
>> > +                        // Make timestamps strictly monotonically
>> increasing for audio, by rewriting timestamps for
>> > +                        // discarded packets.
>> > +                        if (frame_duration_buffer) {
>> > +                          fix_index_entry_timestamps(st,
>> st->nb_index_entries, edit_list_dts_counter,
>> > +
>>  frame_duration_buffer, num_discarded_begin);
>> > +                          av_freep(&frame_duration_buffer);
>> > +                        }
>> >                      }
>> >
>> > -                    av_log(mov->fc, AV_LOG_DEBUG, "skip %d audio
>> samples from curr_cts: %"PRId64"\n", st->skip_samples, curr_cts);
>> > +                    av_log(mov->fc, AV_LOG_DEBUG, "skip %d audio
>> samples from curr_cts: %"PRId64"\n", packet_skip_samples, curr_cts);
>> >                  } else {
>> >                      flags |= AVINDEX_DISCARD_FRAME;
>> >                      av_log(mov->fc, AV_LOG_DEBUG, "drop a frame at
>> curr_cts: %"PRId64" @ %"PRId64"\n", curr_cts, index);
>> > +
>> > +                    if (st->codecpar->codec_type == AVMEDIA_TYPE_AUDIO
>> && edit_list_start_encountered == 0) {
>> > +                        num_discarded_begin++;
>> > +                        frame_duration_buffer =
>> av_realloc(frame_duration_buffer,
>> > +
>>  num_discarded_begin * sizeof(int64_t));
>> > +                        if (!frame_duration_buffer) {
>> > +                            av_log(mov->fc, AV_LOG_ERROR, "Cannot
>> reallocate frame duration buffer\n");
>> > +                            break;
>> > +                        }
>> > +                        frame_duration_buffer[num_discarded_begin -
>> 1] = frame_duration;
>> > +
>> > +                        // Increment skip_samples for the first
>> non-zero audio edit list
>> > +                        if (first_non_zero_audio_edit > 0 &&
>> st->codecpar->codec_id != AV_CODEC_ID_VORBIS) {
>> > +                            st->skip_samples += frame_duration;
>> > +                            msc->start_pad = st->skip_samples;
>> > +                        }
>> > +                    }
>> >                  }
>> >              } else if (edit_list_start_encountered == 0) {
>> >                  edit_list_start_encountered = 1;
>> > +                // Make timestamps strictly monotonically increasing
>> for audio, by rewriting timestamps for
>> > +                // discarded packets.
>> > +                if (st->codecpar->codec_type == AVMEDIA_TYPE_AUDIO &&
>> frame_duration_buffer) {
>> > +                    fix_index_entry_timestamps(st,
>> st->nb_index_entries, edit_list_dts_counter,
>> > +                                               frame_duration_buffer,
>> num_discarded_begin);
>> > +                    av_freep(&frame_duration_buffer);
>> > +                }
>> > +
>> >              }
>> >
>> >              if (add_index_entry(st, current->pos,
>> edit_list_dts_counter, current->size,
>> > diff --git a/tests/ref/fate/gaplessenc-itunes-to-ipod-aac
>> b/tests/ref/fate/gaplessenc-itunes-to-ipod-aac
>> > index 043c085..789681f 100644
>> > --- a/tests/ref/fate/gaplessenc-itunes-to-ipod-aac
>> > +++ b/tests/ref/fate/gaplessenc-itunes-to-ipod-aac
>> > @@ -7,7 +7,7 @@ duration_ts=103326
>> >  start_time=0.000000
>> >  duration=2.367000
>> >  [/FORMAT]
>> > -packet|pts=0|dts=0|duration=N/A
>> > +packet|pts=-1024|dts=-1024|duration=1024
>> >  packet|pts=0|dts=0|duration=1024
>> >  packet|pts=1024|dts=1024|duration=1024
>> >  packet|pts=2048|dts=2048|duration=1024
>> > diff --git a/tests/ref/fate/gaplessenc-pcm-to-mov-aac
>> b/tests/ref/fate/gaplessenc-pcm-to-mov-aac
>> > index 8b7e3f6..8702611 100644
>> > --- a/tests/ref/fate/gaplessenc-pcm-to-mov-aac
>> > +++ b/tests/ref/fate/gaplessenc-pcm-to-mov-aac
>> > @@ -7,7 +7,7 @@ duration_ts=529200
>> >  start_time=0.000000
>> >  duration=12.024000
>> >  [/FORMAT]
>> > -packet|pts=0|dts=0|duration=N/A
>> > +packet|pts=-1024|dts=-1024|duration=1024
>> >  packet|pts=0|dts=0|duration=1024
>> >  packet|pts=1024|dts=1024|duration=1024
>> >  packet|pts=2048|dts=2048|duration=1024
>>
>> This is a complex patch, and builds upon new code that isn't quite
>> known to me, the result looks like an improvement to me.
>>
>> Does it work with the "skip_manual" libavcodec option?
>>
>> Nice catch. sent the patch again  to make it work with skip_manual
>
> I also think the fate tests should be updated to include the packet
>> flags, since they are just as important as the timestamps.
>>
>
> I had another patch making ffprobe show the DISCARD flag. If / once that
> patch is applied it will be easier to modify the fate test for gapless to
> show the discard flag.
>
>
>> _______________________________________________
>> ffmpeg-devel mailing list
>> ffmpeg-devel@ffmpeg.org
>> http://ffmpeg.org/mailman/listinfo/ffmpeg-devel
>>
>
>
diff mbox

Patch

diff --git a/libavcodec/utils.c b/libavcodec/utils.c
index b0345b6..e18476c 100644
--- a/libavcodec/utils.c
+++ b/libavcodec/utils.c
@@ -2320,7 +2320,6 @@  int attribute_align_arg avcodec_decode_audio4(AVCodecContext *avctx,
         uint32_t discard_padding = 0;
         uint8_t skip_reason = 0;
         uint8_t discard_reason = 0;
-        int demuxer_skip_samples = 0;
         // copy to ensure we do not change avpkt
         AVPacket tmp = *avpkt;
         int did_split = av_packet_split_side_data(&tmp);
@@ -2328,7 +2327,6 @@  int attribute_align_arg avcodec_decode_audio4(AVCodecContext *avctx,
         if (ret < 0)
             goto fail;
 
-        demuxer_skip_samples = avctx->internal->skip_samples;
         avctx->internal->pkt = &tmp;
         if (HAVE_THREADS && avctx->active_thread_type & FF_THREAD_FRAME)
             ret = ff_thread_decode_frame(avctx, frame, got_frame_ptr, &tmp);
@@ -2353,13 +2351,6 @@  int attribute_align_arg avcodec_decode_audio4(AVCodecContext *avctx,
                 frame->sample_rate = avctx->sample_rate;
         }
 
-
-        if (frame->flags & AV_FRAME_FLAG_DISCARD) {
-            // If using discard frame flag, ignore skip_samples set by the decoder.
-            avctx->internal->skip_samples = demuxer_skip_samples;
-            *got_frame_ptr = 0;
-        }
-
         side= av_packet_get_side_data(avctx->internal->pkt, AV_PKT_DATA_SKIP_SAMPLES, &side_size);
         if(side && side_size>=10) {
             avctx->internal->skip_samples = AV_RL32(side);
@@ -2369,6 +2360,12 @@  int attribute_align_arg avcodec_decode_audio4(AVCodecContext *avctx,
             skip_reason = AV_RL8(side + 8);
             discard_reason = AV_RL8(side + 9);
         }
+
+        if ((frame->flags & AV_FRAME_FLAG_DISCARD) && *got_frame_ptr) {
+            avctx->internal->skip_samples -= frame->nb_samples;
+            *got_frame_ptr = 0;
+        }
+
         if (avctx->internal->skip_samples > 0 && *got_frame_ptr &&
             !(avctx->flags2 & AV_CODEC_FLAG2_SKIP_MANUAL)) {
             if(frame->nb_samples <= avctx->internal->skip_samples){
diff --git a/libavformat/mov.c b/libavformat/mov.c
index b84d9c0..bb86780 100644
--- a/libavformat/mov.c
+++ b/libavformat/mov.c
@@ -2856,6 +2856,21 @@  static int64_t add_index_entry(AVStream *st, int64_t pos, int64_t timestamp,
 }
 
 /**
+ * Rewrite timestamps of index entries in the range [end_index - frame_duration_buffer_size, end_index)
+ * by subtracting end_ts successively by the amounts given in frame_duration_buffer.
+ */
+static void fix_index_entry_timestamps(AVStream* st, int end_index, int64_t end_ts,
+                                       int64_t* frame_duration_buffer,
+                                       int frame_duration_buffer_size) {
+    int i = 0;
+    av_assert0(end_index >= 0 && end_index <= st->nb_index_entries);
+    for (i = 0; i < frame_duration_buffer_size; i++) {
+        end_ts -= frame_duration_buffer[frame_duration_buffer_size - 1 - i];
+        st->index_entries[end_index - 1 - i].timestamp = end_ts;
+    }
+}
+
+/**
  * Append a new ctts entry to ctts_data.
  * Returns the new ctts_count if successful, else returns -1.
  */
@@ -2919,7 +2934,10 @@  static void mov_fix_index(MOVContext *mov, AVStream *st)
     int64_t edit_list_media_time_dts = 0;
     int64_t edit_list_start_encountered = 0;
     int64_t search_timestamp = 0;
-
+    int64_t* frame_duration_buffer = NULL;
+    int num_discarded_begin = 0;
+    int first_non_zero_audio_edit = -1;
+    int packet_skip_samples = 0;
 
     if (!msc->elst_data || msc->elst_count <= 0) {
         return;
@@ -2955,6 +2973,7 @@  static void mov_fix_index(MOVContext *mov, AVStream *st)
         edit_list_index++;
         edit_list_dts_counter = edit_list_dts_entry_end;
         edit_list_dts_entry_end += edit_list_duration;
+        num_discarded_begin = 0;
         if (edit_list_media_time == -1) {
             continue;
         }
@@ -2962,7 +2981,14 @@  static void mov_fix_index(MOVContext *mov, AVStream *st)
         // If we encounter a non-negative edit list reset the skip_samples/start_pad fields and set them
         // according to the edit list below.
         if (st->codecpar->codec_type == AVMEDIA_TYPE_AUDIO) {
-            st->skip_samples = msc->start_pad = 0;
+            if (first_non_zero_audio_edit < 0) {
+                first_non_zero_audio_edit = 1;
+            } else {
+                first_non_zero_audio_edit = 0;
+            }
+
+            if (first_non_zero_audio_edit > 0)
+                st->skip_samples = msc->start_pad = 0;
         }
 
         //find closest previous key frame
@@ -3041,24 +3067,57 @@  static void mov_fix_index(MOVContext *mov, AVStream *st)
             }
 
             if (curr_cts < edit_list_media_time || curr_cts >= (edit_list_duration + edit_list_media_time)) {
-                if (st->codecpar->codec_type == AVMEDIA_TYPE_AUDIO && curr_cts < edit_list_media_time &&
-                    curr_cts + frame_duration > edit_list_media_time &&
-                    st->skip_samples == 0 && msc->start_pad == 0) {
-                    st->skip_samples = msc->start_pad = edit_list_media_time - curr_cts;
-
-                    // Shift the index entry timestamp by skip_samples to be correct.
-                    edit_list_dts_counter -= st->skip_samples;
+                if (st->codecpar->codec_type == AVMEDIA_TYPE_AUDIO && st->codecpar->codec_id != AV_CODEC_ID_VORBIS &&
+                    curr_cts < edit_list_media_time && curr_cts + frame_duration > edit_list_media_time &&
+                    first_non_zero_audio_edit > 0) {
+                     packet_skip_samples = edit_list_media_time - curr_cts;
+                     st->skip_samples += packet_skip_samples;
+
+                    // Shift the index entry timestamp by packet_skip_samples to be correct.
+                    edit_list_dts_counter -= packet_skip_samples;
                     if (edit_list_start_encountered == 0)  {
-                      edit_list_start_encountered = 1;
+                        edit_list_start_encountered = 1;
+                        // Make timestamps strictly monotonically increasing for audio, by rewriting timestamps for
+                        // discarded packets.
+                        if (frame_duration_buffer) {
+                          fix_index_entry_timestamps(st, st->nb_index_entries, edit_list_dts_counter,
+                                                     frame_duration_buffer, num_discarded_begin);
+                          av_freep(&frame_duration_buffer);
+                        }
                     }
 
-                    av_log(mov->fc, AV_LOG_DEBUG, "skip %d audio samples from curr_cts: %"PRId64"\n", st->skip_samples, curr_cts);
+                    av_log(mov->fc, AV_LOG_DEBUG, "skip %d audio samples from curr_cts: %"PRId64"\n", packet_skip_samples, curr_cts);
                 } else {
                     flags |= AVINDEX_DISCARD_FRAME;
                     av_log(mov->fc, AV_LOG_DEBUG, "drop a frame at curr_cts: %"PRId64" @ %"PRId64"\n", curr_cts, index);
+
+                    if (st->codecpar->codec_type == AVMEDIA_TYPE_AUDIO && edit_list_start_encountered == 0) {
+                        num_discarded_begin++;
+                        frame_duration_buffer = av_realloc(frame_duration_buffer,
+                                                           num_discarded_begin * sizeof(int64_t));
+                        if (!frame_duration_buffer) {
+                            av_log(mov->fc, AV_LOG_ERROR, "Cannot reallocate frame duration buffer\n");
+                            break;
+                        }
+                        frame_duration_buffer[num_discarded_begin - 1] = frame_duration;
+
+                        // Increment skip_samples for the first non-zero audio edit list
+                        if (first_non_zero_audio_edit > 0 && st->codecpar->codec_id != AV_CODEC_ID_VORBIS) {
+                            st->skip_samples += frame_duration;
+                            msc->start_pad = st->skip_samples;
+                        }
+                    }
                 }
             } else if (edit_list_start_encountered == 0) {
                 edit_list_start_encountered = 1;
+                // Make timestamps strictly monotonically increasing for audio, by rewriting timestamps for
+                // discarded packets.
+                if (st->codecpar->codec_type == AVMEDIA_TYPE_AUDIO && frame_duration_buffer) {
+                    fix_index_entry_timestamps(st, st->nb_index_entries, edit_list_dts_counter,
+                                               frame_duration_buffer, num_discarded_begin);
+                    av_freep(&frame_duration_buffer);
+                }
+
             }
 
             if (add_index_entry(st, current->pos, edit_list_dts_counter, current->size,
diff --git a/tests/ref/fate/gaplessenc-itunes-to-ipod-aac b/tests/ref/fate/gaplessenc-itunes-to-ipod-aac
index 043c085..789681f 100644
--- a/tests/ref/fate/gaplessenc-itunes-to-ipod-aac
+++ b/tests/ref/fate/gaplessenc-itunes-to-ipod-aac
@@ -7,7 +7,7 @@  duration_ts=103326
 start_time=0.000000
 duration=2.367000
 [/FORMAT]
-packet|pts=0|dts=0|duration=N/A
+packet|pts=-1024|dts=-1024|duration=1024
 packet|pts=0|dts=0|duration=1024
 packet|pts=1024|dts=1024|duration=1024
 packet|pts=2048|dts=2048|duration=1024
diff --git a/tests/ref/fate/gaplessenc-pcm-to-mov-aac b/tests/ref/fate/gaplessenc-pcm-to-mov-aac
index 8b7e3f6..8702611 100644
--- a/tests/ref/fate/gaplessenc-pcm-to-mov-aac
+++ b/tests/ref/fate/gaplessenc-pcm-to-mov-aac
@@ -7,7 +7,7 @@  duration_ts=529200
 start_time=0.000000
 duration=12.024000
 [/FORMAT]
-packet|pts=0|dts=0|duration=N/A
+packet|pts=-1024|dts=-1024|duration=1024
 packet|pts=0|dts=0|duration=1024
 packet|pts=1024|dts=1024|duration=1024
 packet|pts=2048|dts=2048|duration=1024