@@ -305,6 +305,7 @@ typedef struct MOVContext {
int32_t movie_display_matrix[3][3]; ///< display matrix from mvhd
int have_read_mfra_size;
uint32_t mfra_size;
+ uint32_t max_stts_delta;
} MOVContext;
int ff_mp4_read_descr_len(AVIOContext *pb);
@@ -2925,6 +2925,9 @@ static int mov_read_stts(MOVContext *c, AVIOContext *pb, MOVAtom atom)
unsigned int i, entries, alloc_size = 0;
int64_t duration = 0;
int64_t total_sample_count = 0;
+ int64_t current_dts = 0;
+ int64_t last_dts = 0;
+ int64_t dts_correction = 0;
if (c->fc->nb_streams < 1)
return 0;
@@ -2948,6 +2951,7 @@ static int mov_read_stts(MOVContext *c, AVIOContext *pb, MOVAtom atom)
for (i = 0; i < entries && !pb->eof_reached; i++) {
unsigned int sample_duration;
unsigned int sample_count;
+ unsigned int stts_warn = 0;
unsigned int min_entries = FFMIN(FFMAX(i + 1, 1024 * 1024), entries);
MOVStts *stts_data = av_fast_realloc(sc->stts_data, &alloc_size,
min_entries * sizeof(*sc->stts_data));
@@ -2965,13 +2969,41 @@ static int mov_read_stts(MOVContext *c, AVIOContext *pb, MOVAtom atom)
sc->stts_data[i].count= sample_count;
sc->stts_data[i].duration= sample_duration;
- av_log(c->fc, AV_LOG_TRACE, "sample_count=%d, sample_duration=%d\n",
+ av_log(c->fc, AV_LOG_TRACE, "sample_count=%u, sample_duration=%u\n",
sample_count, sample_duration);
- duration+=(int64_t)sample_duration*(uint64_t)sample_count;
- total_sample_count+=sample_count;
- }
+ for (int j = 0; j < sample_count; j++) {
+ /* STTS sample offsets are uint32 but some files store it as int32
+ * with negative values used to correct DTS delays.
+ There may be abnormally large values as well. */
+ if (sample_duration > c->max_stts_delta) {
+ // assume high delta is a negative correction if greater than c->max_stts_delta
+ int32_t delta_magnitude = *((int32_t *)&sample_duration);
+ av_log_once(c->fc, AV_LOG_WARNING, AV_LOG_DEBUG, &stts_warn,
+ "Too large sample offset %u in stts entry %u with count %u in st:%d. Clipping to 1.\n",
+ sample_duration, i, sample_count, st->index);
+ sc->stts_data[i].duration = 1;
+ dts_correction += (delta_magnitude < 0 ? delta_magnitude - 1 : 0);
+ }
+ current_dts += sc->stts_data[i].duration;
+
+ if (!dts_correction || current_dts + dts_correction > last_dts) {
+ current_dts += dts_correction;
+ if (!j)
+ sc->stts_data[i].duration += dts_correction/sample_count;
+ dts_correction = 0;
+ } else {
+ /* Avoid creating non-monotonous DTS */
+ dts_correction += current_dts - last_dts - 1;
+ current_dts = last_dts + 1;
+ }
+ last_dts = current_dts;
+ }
+ duration+=(int64_t)sc->stts_data[i].duration*(uint64_t)sc->stts_data[i].count;
+ total_sample_count+=sc->stts_data[i].count;
+
+ }
sc->stts_count = i;
if (duration > 0 &&
@@ -3856,13 +3888,10 @@ static void mov_build_index(MOVContext *mov, AVStream *st)
unsigned int distance = 0;
unsigned int rap_group_index = 0;
unsigned int rap_group_sample = 0;
- int64_t last_dts = 0;
- int64_t dts_correction = 0;
int rap_group_present = sc->rap_group_count && sc->rap_group;
int key_off = (sc->keyframe_count && sc->keyframes[0] > 0) || (sc->stps_count && sc->stps_data[0] > 0);
current_dts -= sc->dts_shift;
- last_dts = current_dts;
if (!sc->sample_count || sti->nb_index_entries)
return;
@@ -3973,26 +4002,8 @@ static void mov_build_index(MOVContext *mov, AVStream *st)
current_offset += sample_size;
stream_size += sample_size;
- /* A negative sample duration is invalid based on the spec,
- * but some samples need it to correct the DTS. */
- if (sc->stts_data[stts_index].duration < 0) {
- av_log(mov->fc, AV_LOG_WARNING,
- "Invalid SampleDelta %d in STTS, at %d st:%d\n",
- sc->stts_data[stts_index].duration, stts_index,
- st->index);
- dts_correction += sc->stts_data[stts_index].duration - 1;
- sc->stts_data[stts_index].duration = 1;
- }
current_dts += sc->stts_data[stts_index].duration;
- if (!dts_correction || current_dts + dts_correction > last_dts) {
- current_dts += dts_correction;
- dts_correction = 0;
- } else {
- /* Avoid creating non-monotonous DTS */
- dts_correction += current_dts - last_dts - 1;
- current_dts = last_dts + 1;
- }
- last_dts = current_dts;
+
distance++;
stts_sample++;
current_sample++;
@@ -8577,6 +8588,7 @@ static const AVOption mov_options[] = {
{ "decryption_key", "The media decryption key (hex)", OFFSET(decryption_key), AV_OPT_TYPE_BINARY, .flags = AV_OPT_FLAG_DECODING_PARAM },
{ "enable_drefs", "Enable external track support.", OFFSET(enable_drefs), AV_OPT_TYPE_BOOL,
{.i64 = 0}, 0, 1, FLAGS },
+ { "max_stts_delta", "treat offsets above this value as invalid", OFFSET(max_stts_delta), AV_OPT_TYPE_INT, {.i64 = UINT_MAX-48000*10 }, 0, UINT_MAX, .flags = AV_OPT_FLAG_DECODING_PARAM },
{ NULL },
};