diff mbox series

[FFmpeg-devel] avformat/matroskaenc: Allow changing the time stamp precision via option

Message ID 20210520191639.1140-1-michael.dirks@xaymar.com
State Superseded
Headers show
Series [FFmpeg-devel] avformat/matroskaenc: Allow changing the time stamp precision via option | expand

Checks

Context Check Description
andriy/x86_make success Make finished
andriy/x86_make_fate success Make fate finished
andriy/PPC64_make success Make finished
andriy/PPC64_make_fate success Make fate finished

Commit Message

Michael Fabian 'Xaymar' Dirks May 20, 2021, 7:16 p.m. UTC
From: Michael Fabian 'Xaymar' Dirks <michael.dirks@xaymar.com>

Adds "timestamp_precision" to the available option for Matroska/WebM
muxing. The option enables users and developers to change the precision
of the time stamps in the Matroska/WebM container up to 1 nanosecond,
which can aid with the proper detection of constant and variable rate
content.

Work-around fix for: 259, 6406, 7927, 8909 and 9124.

Signed-off-by: Michael Fabian 'Xaymar' Dirks <michael.dirks@xaymar.com>
---
 doc/muxers.texi           |  8 ++++++++
 libavformat/matroskaenc.c | 28 ++++++++++++++++++++--------
 2 files changed, 28 insertions(+), 8 deletions(-)

Comments

Michael Fabian 'Xaymar' Dirks May 20, 2021, 7:35 p.m. UTC | #1
On 2021-05-20 21:16, michael.dirks@xaymar.com wrote:
> From: Michael Fabian 'Xaymar' Dirks <michael.dirks@xaymar.com>
>
> Adds "timestamp_precision" to the available option for Matroska/WebM
> muxing. The option enables users and developers to change the precision
> of the time stamps in the Matroska/WebM container up to 1 nanosecond,
> which can aid with the proper detection of constant and variable rate
> content.
>
> Work-around fix for: 259, 6406, 7927, 8909 and 9124.
>
> Signed-off-by: Michael Fabian 'Xaymar' Dirks <michael.dirks@xaymar.com>
> ---
>   doc/muxers.texi           |  8 ++++++++
>   libavformat/matroskaenc.c | 28 ++++++++++++++++++++--------
>   2 files changed, 28 insertions(+), 8 deletions(-)
>
> diff --git a/doc/muxers.texi b/doc/muxers.texi
> index e1c6ad0829..8655be94ff 100644
> --- a/doc/muxers.texi
> +++ b/doc/muxers.texi
> @@ -1583,6 +1583,14 @@ bitmap is stored bottom-up. Note that this option does not flip the bitmap
>   which has to be done manually beforehand, e.g. by using the vflip filter.
>   Default is @var{false} and indicates bitmap is stored top down.
>   
> +@item timestamp_precision
> +Sets the timestamp precision up to 1 nanosecond for Matroska/WebM, which can
> +improve detection of constant rate content in demuxers. Note that some poorly
> +implemented demuxers may require a timestamp precision of 1 millisecond, so
> +increasing it past that point may result in playback issues. Higher precision
> +also reduces the maximum possible timestamp significantly.
> +Default is @var{1/1000} (1 millisecond).
> +
>   @end table
>   
>   @anchor{md5}
> diff --git a/libavformat/matroskaenc.c b/libavformat/matroskaenc.c
> index 186a25d920..2417ac12cc 100644
> --- a/libavformat/matroskaenc.c
> +++ b/libavformat/matroskaenc.c
> @@ -20,6 +20,7 @@
>    */
>   
>   #include <stdint.h>
> +#include <float.h>
>   
>   #include "av1.h"
>   #include "avc.h"
> @@ -158,6 +159,8 @@ typedef struct MatroskaMuxContext {
>       int                 default_mode;
>   
>       uint32_t            segment_uid[4];
> +
> +    AVRational          time_base;
>   } MatroskaMuxContext;
>   
>   /** 2 bytes * 7 for EBML IDs, 7 1-byte EBML lengths, 6 1-byte uint,
> @@ -1814,6 +1817,7 @@ static int mkv_write_header(AVFormatContext *s)
>       const AVDictionaryEntry *tag;
>       int ret, i, version = 2;
>       int64_t creation_time;
> +    int64_t time_base = 1;
>   
>       if (mkv->mode != MODE_WEBM ||
>           av_dict_get(s->metadata, "stereo_mode", NULL, 0) ||
> @@ -1850,7 +1854,10 @@ static int mkv_write_header(AVFormatContext *s)
>           return ret;
>       pb = mkv->info.bc;
>   
> -    put_ebml_uint(pb, MATROSKA_ID_TIMECODESCALE, 1000000);
> +    time_base = av_rescale_q(time_base, mkv->time_base, (AVRational){1, 1000000000});
> +    av_log(s, AV_LOG_DEBUG, "TimestampScale is: %" PRId64 " ns\n", time_base);
> +    put_ebml_uint(pb, MATROSKA_ID_TIMECODESCALE, time_base);
> +
>       if ((tag = av_dict_get(s->metadata, "title", NULL, 0)))
>           put_ebml_string(pb, MATROSKA_ID_TITLE, tag->value);
>       if (!(s->flags & AVFMT_FLAG_BITEXACT)) {
> @@ -1883,11 +1890,11 @@ static int mkv_write_header(AVFormatContext *s)
>           int64_t metadata_duration = get_metadata_duration(s);
>   
>           if (s->duration > 0) {
> -            int64_t scaledDuration = av_rescale(s->duration, 1000, AV_TIME_BASE);
> +            int64_t scaledDuration = av_rescale_q(s->duration, AV_TIME_BASE_Q, mkv->time_base);
>               put_ebml_float(pb, MATROSKA_ID_DURATION, scaledDuration);
>               av_log(s, AV_LOG_DEBUG, "Write early duration from recording time = %" PRIu64 "\n", scaledDuration);
>           } else if (metadata_duration > 0) {
> -            int64_t scaledDuration = av_rescale(metadata_duration, 1000, AV_TIME_BASE);
> +            int64_t scaledDuration = av_rescale_q(metadata_duration, AV_TIME_BASE_Q, mkv->time_base);
>               put_ebml_float(pb, MATROSKA_ID_DURATION, scaledDuration);
>               av_log(s, AV_LOG_DEBUG, "Write early duration from metadata = %" PRIu64 "\n", scaledDuration);
>           } else if (s->pb->seekable & AVIO_SEEKABLE_NORMAL) {
> @@ -1948,12 +1955,12 @@ static int mkv_write_header(AVFormatContext *s)
>       // after 4k and on a keyframe
>       if (IS_SEEKABLE(pb, mkv)) {
>           if (mkv->cluster_time_limit < 0)
> -            mkv->cluster_time_limit = 5000;
> +            mkv->cluster_time_limit = av_rescale_q(5000, (AVRational){1, 1000}, mkv->time_base);
>           if (mkv->cluster_size_limit < 0)
>               mkv->cluster_size_limit = 5 * 1024 * 1024;
>       } else {
>           if (mkv->cluster_time_limit < 0)
> -            mkv->cluster_time_limit = 1000;
> +            mkv->cluster_time_limit = av_rescale_q(1000, (AVRational){1, 1000}, mkv->time_base);
>           if (mkv->cluster_size_limit < 0)
>               mkv->cluster_size_limit = 32 * 1024;
>       }
> @@ -2323,7 +2330,7 @@ static int mkv_write_packet_internal(AVFormatContext *s, const AVPacket *pkt)
>               ret = mkv_end_cluster(s);
>               if (ret < 0)
>                   return ret;
> -            av_log(s, AV_LOG_WARNING, "Starting new cluster due to timestamp\n");
> +            av_log(s, AV_LOG_VERBOSE, "Starting new cluster as timestamp offset exceeds signed 16-bit integer range.\n");

I've marked this as verbose, as the problem is not exactly something we should even bother warning about. So far in my testing, creating new clusters has not caused issues.

>           }
>       }
>   
> @@ -2738,8 +2745,8 @@ static int mkv_init(struct AVFormatContext *s)
>               track->uid = mkv_get_uid(mkv->tracks, i, &c);
>           }
>   
> -        // ms precision is the de-facto standard timescale for mkv files
> -        avpriv_set_pts_info(st, 64, 1, 1000);
> +        // Use user-defined timescale.
> +        avpriv_set_pts_info(st, 64, mkv->time_base.num, mkv->time_base.den);
>   
>           if (st->codecpar->codec_type == AVMEDIA_TYPE_ATTACHMENT) {
>               if (mkv->mode == MODE_WEBM) {
> @@ -2759,6 +2766,10 @@ static int mkv_init(struct AVFormatContext *s)
>           track->track_num_size = ebml_num_size(track->track_num);
>       }
>   
> +    // Scale the configured cluster_time_limit.
> +    if (mkv->cluster_time_limit >= 0)
> +        mkv->cluster_time_limit = av_rescale_q(mkv->cluster_time_limit, (AVRational){1, 1000}, mkv->time_base);
> +
>       if (mkv->is_dash && nb_tracks != 1)
>           return AVERROR(EINVAL);
>   
> @@ -2826,6 +2837,7 @@ static const AVOption options[] = {
>       { "infer", "For each track type, mark the first track of disposition default as default; if none exists, mark the first track as default.", 0, AV_OPT_TYPE_CONST, { .i64 = DEFAULT_MODE_INFER }, 0, 0, FLAGS, "default_mode" },
>       { "infer_no_subs", "For each track type, mark the first track of disposition default as default; for audio and video: if none exists, mark the first track as default.", 0, AV_OPT_TYPE_CONST, { .i64 = DEFAULT_MODE_INFER_NO_SUBS }, 0, 0, FLAGS, "default_mode" },
>       { "passthrough", "Use the disposition flag as-is", 0, AV_OPT_TYPE_CONST, { .i64 = DEFAULT_MODE_PASSTHROUGH }, 0, 0, FLAGS, "default_mode" },
> +    { "timestamp_precision", "Timestamp precision to use for the entire container", OFFSET(time_base), AV_OPT_TYPE_RATIONAL, { .dbl = 0.001 }, 0.000000001, DBL_MAX, FLAGS},
Reverted the default to 1ms as per discussion. In the future, Matroska hopefully adopts rational per-track time bases so that this parameter can be deprecated.
>       { NULL },
>   };
>
James Almer May 20, 2021, 9:47 p.m. UTC | #2
On 5/20/2021 4:35 PM, Michael Fabian 'Xaymar' Dirks wrote:
> On 2021-05-20 21:16, michael.dirks@xaymar.com wrote:
>> From: Michael Fabian 'Xaymar' Dirks <michael.dirks@xaymar.com>
>>
>> Adds "timestamp_precision" to the available option for Matroska/WebM
>> muxing. The option enables users and developers to change the precision
>> of the time stamps in the Matroska/WebM container up to 1 nanosecond,
>> which can aid with the proper detection of constant and variable rate
>> content.
>>
>> Work-around fix for: 259, 6406, 7927, 8909 and 9124.
>>
>> Signed-off-by: Michael Fabian 'Xaymar' Dirks <michael.dirks@xaymar.com>
>> ---
>>   doc/muxers.texi           |  8 ++++++++
>>   libavformat/matroskaenc.c | 28 ++++++++++++++++++++--------
>>   2 files changed, 28 insertions(+), 8 deletions(-)
>>
>> diff --git a/doc/muxers.texi b/doc/muxers.texi
>> index e1c6ad0829..8655be94ff 100644
>> --- a/doc/muxers.texi
>> +++ b/doc/muxers.texi
>> @@ -1583,6 +1583,14 @@ bitmap is stored bottom-up. Note that this 
>> option does not flip the bitmap
>>   which has to be done manually beforehand, e.g. by using the vflip 
>> filter.
>>   Default is @var{false} and indicates bitmap is stored top down.
>> +@item timestamp_precision
>> +Sets the timestamp precision up to 1 nanosecond for Matroska/WebM, 
>> which can
>> +improve detection of constant rate content in demuxers. Note that 
>> some poorly
>> +implemented demuxers may require a timestamp precision of 1 
>> millisecond, so
>> +increasing it past that point may result in playback issues. Higher 
>> precision
>> +also reduces the maximum possible timestamp significantly.
>> +Default is @var{1/1000} (1 millisecond).
>> +
>>   @end table
>>   @anchor{md5}
>> diff --git a/libavformat/matroskaenc.c b/libavformat/matroskaenc.c
>> index 186a25d920..2417ac12cc 100644
>> --- a/libavformat/matroskaenc.c
>> +++ b/libavformat/matroskaenc.c
>> @@ -20,6 +20,7 @@
>>    */
>>   #include <stdint.h>
>> +#include <float.h>
>>   #include "av1.h"
>>   #include "avc.h"
>> @@ -158,6 +159,8 @@ typedef struct MatroskaMuxContext {
>>       int                 default_mode;
>>       uint32_t            segment_uid[4];
>> +
>> +    AVRational          time_base;
>>   } MatroskaMuxContext;
>>   /** 2 bytes * 7 for EBML IDs, 7 1-byte EBML lengths, 6 1-byte uint,
>> @@ -1814,6 +1817,7 @@ static int mkv_write_header(AVFormatContext *s)
>>       const AVDictionaryEntry *tag;
>>       int ret, i, version = 2;
>>       int64_t creation_time;
>> +    int64_t time_base = 1;
>>       if (mkv->mode != MODE_WEBM ||
>>           av_dict_get(s->metadata, "stereo_mode", NULL, 0) ||
>> @@ -1850,7 +1854,10 @@ static int mkv_write_header(AVFormatContext *s)
>>           return ret;
>>       pb = mkv->info.bc;
>> -    put_ebml_uint(pb, MATROSKA_ID_TIMECODESCALE, 1000000);
>> +    time_base = av_rescale_q(time_base, mkv->time_base, 
>> (AVRational){1, 1000000000});
>> +    av_log(s, AV_LOG_DEBUG, "TimestampScale is: %" PRId64 " ns\n", 
>> time_base);
>> +    put_ebml_uint(pb, MATROSKA_ID_TIMECODESCALE, time_base);
>> +
>>       if ((tag = av_dict_get(s->metadata, "title", NULL, 0)))
>>           put_ebml_string(pb, MATROSKA_ID_TITLE, tag->value);
>>       if (!(s->flags & AVFMT_FLAG_BITEXACT)) {
>> @@ -1883,11 +1890,11 @@ static int mkv_write_header(AVFormatContext *s)
>>           int64_t metadata_duration = get_metadata_duration(s);
>>           if (s->duration > 0) {
>> -            int64_t scaledDuration = av_rescale(s->duration, 1000, 
>> AV_TIME_BASE);
>> +            int64_t scaledDuration = av_rescale_q(s->duration, 
>> AV_TIME_BASE_Q, mkv->time_base);
>>               put_ebml_float(pb, MATROSKA_ID_DURATION, scaledDuration);
>>               av_log(s, AV_LOG_DEBUG, "Write early duration from 
>> recording time = %" PRIu64 "\n", scaledDuration);
>>           } else if (metadata_duration > 0) {
>> -            int64_t scaledDuration = av_rescale(metadata_duration, 
>> 1000, AV_TIME_BASE);
>> +            int64_t scaledDuration = av_rescale_q(metadata_duration, 
>> AV_TIME_BASE_Q, mkv->time_base);
>>               put_ebml_float(pb, MATROSKA_ID_DURATION, scaledDuration);
>>               av_log(s, AV_LOG_DEBUG, "Write early duration from 
>> metadata = %" PRIu64 "\n", scaledDuration);
>>           } else if (s->pb->seekable & AVIO_SEEKABLE_NORMAL) {
>> @@ -1948,12 +1955,12 @@ static int mkv_write_header(AVFormatContext *s)
>>       // after 4k and on a keyframe
>>       if (IS_SEEKABLE(pb, mkv)) {
>>           if (mkv->cluster_time_limit < 0)
>> -            mkv->cluster_time_limit = 5000;
>> +            mkv->cluster_time_limit = av_rescale_q(5000, 
>> (AVRational){1, 1000}, mkv->time_base);
>>           if (mkv->cluster_size_limit < 0)
>>               mkv->cluster_size_limit = 5 * 1024 * 1024;
>>       } else {
>>           if (mkv->cluster_time_limit < 0)
>> -            mkv->cluster_time_limit = 1000;
>> +            mkv->cluster_time_limit = av_rescale_q(1000, 
>> (AVRational){1, 1000}, mkv->time_base);
>>           if (mkv->cluster_size_limit < 0)
>>               mkv->cluster_size_limit = 32 * 1024;
>>       }
>> @@ -2323,7 +2330,7 @@ static int 
>> mkv_write_packet_internal(AVFormatContext *s, const AVPacket *pkt)
>>               ret = mkv_end_cluster(s);
>>               if (ret < 0)
>>                   return ret;
>> -            av_log(s, AV_LOG_WARNING, "Starting new cluster due to 
>> timestamp\n");
>> +            av_log(s, AV_LOG_VERBOSE, "Starting new cluster as 
>> timestamp offset exceeds signed 16-bit integer range.\n");
> 
> I've marked this as verbose, as the problem is not exactly something we 
> should even bother warning about. So far in my testing, creating new 
> clusters has not caused issues.

It's not about having issues with the resulting file, it's about size 
overhead present in it (Each cluster is a top level element, so one per 
block is a lot of bytes), and about meeting the requested limit set with 
cluster_size_limit and cluster_time_limit or not.
Clusters delimitation is what's used to split the file in Dash Webm 
after all, so honoring the requested limits is important and this log 
message is useful to let the user know if the values they requested can 
be achieved.

> 
>>           }
>>       }
>> @@ -2738,8 +2745,8 @@ static int mkv_init(struct AVFormatContext *s)
>>               track->uid = mkv_get_uid(mkv->tracks, i, &c);
>>           }
>> -        // ms precision is the de-facto standard timescale for mkv files
>> -        avpriv_set_pts_info(st, 64, 1, 1000);
>> +        // Use user-defined timescale.
>> +        avpriv_set_pts_info(st, 64, mkv->time_base.num, 
>> mkv->time_base.den);
>>           if (st->codecpar->codec_type == AVMEDIA_TYPE_ATTACHMENT) {
>>               if (mkv->mode == MODE_WEBM) {
>> @@ -2759,6 +2766,10 @@ static int mkv_init(struct AVFormatContext *s)
>>           track->track_num_size = ebml_num_size(track->track_num);
>>       }
>> +    // Scale the configured cluster_time_limit.
>> +    if (mkv->cluster_time_limit >= 0)
>> +        mkv->cluster_time_limit = 
>> av_rescale_q(mkv->cluster_time_limit, (AVRational){1, 1000}, 
>> mkv->time_base);
>> +
>>       if (mkv->is_dash && nb_tracks != 1)
>>           return AVERROR(EINVAL);
>> @@ -2826,6 +2837,7 @@ static const AVOption options[] = {
>>       { "infer", "For each track type, mark the first track of 
>> disposition default as default; if none exists, mark the first track 
>> as default.", 0, AV_OPT_TYPE_CONST, { .i64 = DEFAULT_MODE_INFER }, 0, 
>> 0, FLAGS, "default_mode" },
>>       { "infer_no_subs", "For each track type, mark the first track of 
>> disposition default as default; for audio and video: if none exists, 
>> mark the first track as default.", 0, AV_OPT_TYPE_CONST, { .i64 = 
>> DEFAULT_MODE_INFER_NO_SUBS }, 0, 0, FLAGS, "default_mode" },
>>       { "passthrough", "Use the disposition flag as-is", 0, 
>> AV_OPT_TYPE_CONST, { .i64 = DEFAULT_MODE_PASSTHROUGH }, 0, 0, FLAGS, 
>> "default_mode" },
>> +    { "timestamp_precision", "Timestamp precision to use for the 
>> entire container", OFFSET(time_base), AV_OPT_TYPE_RATIONAL, { .dbl = 
>> 0.001 }, 0.000000001, DBL_MAX, FLAGS},

Is DBL_MAX useful? Can't another max value be used instead?

> Reverted the default to 1ms as per discussion. In the future, Matroska 
> hopefully adopts rational per-track time bases so that this parameter 
> can be deprecated.

Ok.

>>       { NULL },
>>   };
>
Michael Fabian 'Xaymar' Dirks May 20, 2021, 10:04 p.m. UTC | #3
On 2021-05-20 23:47, James Almer wrote:
> On 5/20/2021 4:35 PM, Michael Fabian 'Xaymar' Dirks wrote:
>> On 2021-05-20 21:16, michael.dirks@xaymar.com wrote:
>>> From: Michael Fabian 'Xaymar' Dirks <michael.dirks@xaymar.com>
>>>
>>> Adds "timestamp_precision" to the available option for Matroska/WebM
>>> muxing. The option enables users and developers to change the precision
>>> of the time stamps in the Matroska/WebM container up to 1 nanosecond,
>>> which can aid with the proper detection of constant and variable rate
>>> content.
>>>
>>> Work-around fix for: 259, 6406, 7927, 8909 and 9124.
>>>
>>> Signed-off-by: Michael Fabian 'Xaymar' Dirks <michael.dirks@xaymar.com>
>>> ---
>>>   doc/muxers.texi           |  8 ++++++++
>>>   libavformat/matroskaenc.c | 28 ++++++++++++++++++++--------
>>>   2 files changed, 28 insertions(+), 8 deletions(-)
>>>
>>> diff --git a/doc/muxers.texi b/doc/muxers.texi
>>> index e1c6ad0829..8655be94ff 100644
>>> --- a/doc/muxers.texi
>>> +++ b/doc/muxers.texi
>>> @@ -1583,6 +1583,14 @@ bitmap is stored bottom-up. Note that this option does not flip the bitmap
>>>   which has to be done manually beforehand, e.g. by using the vflip filter.
>>>   Default is @var{false} and indicates bitmap is stored top down.
>>> +@item timestamp_precision
>>> +Sets the timestamp precision up to 1 nanosecond for Matroska/WebM, which can
>>> +improve detection of constant rate content in demuxers. Note that some poorly
>>> +implemented demuxers may require a timestamp precision of 1 millisecond, so
>>> +increasing it past that point may result in playback issues. Higher precision
>>> +also reduces the maximum possible timestamp significantly.
>>> +Default is @var{1/1000} (1 millisecond).
>>> +
>>>   @end table
>>>   @anchor{md5}
>>> diff --git a/libavformat/matroskaenc.c b/libavformat/matroskaenc.c
>>> index 186a25d920..2417ac12cc 100644
>>> --- a/libavformat/matroskaenc.c
>>> +++ b/libavformat/matroskaenc.c
>>> @@ -20,6 +20,7 @@
>>>    */
>>>   #include <stdint.h>
>>> +#include <float.h>
>>>   #include "av1.h"
>>>   #include "avc.h"
>>> @@ -158,6 +159,8 @@ typedef struct MatroskaMuxContext {
>>>       int                 default_mode;
>>>       uint32_t            segment_uid[4];
>>> +
>>> +    AVRational          time_base;
>>>   } MatroskaMuxContext;
>>>   /** 2 bytes * 7 for EBML IDs, 7 1-byte EBML lengths, 6 1-byte uint,
>>> @@ -1814,6 +1817,7 @@ static int mkv_write_header(AVFormatContext *s)
>>>       const AVDictionaryEntry *tag;
>>>       int ret, i, version = 2;
>>>       int64_t creation_time;
>>> +    int64_t time_base = 1;
>>>       if (mkv->mode != MODE_WEBM ||
>>>           av_dict_get(s->metadata, "stereo_mode", NULL, 0) ||
>>> @@ -1850,7 +1854,10 @@ static int mkv_write_header(AVFormatContext *s)
>>>           return ret;
>>>       pb = mkv->info.bc;
>>> -    put_ebml_uint(pb, MATROSKA_ID_TIMECODESCALE, 1000000);
>>> +    time_base = av_rescale_q(time_base, mkv->time_base, (AVRational){1, 1000000000});
>>> +    av_log(s, AV_LOG_DEBUG, "TimestampScale is: %" PRId64 " ns\n", time_base);
>>> +    put_ebml_uint(pb, MATROSKA_ID_TIMECODESCALE, time_base);
>>> +
>>>       if ((tag = av_dict_get(s->metadata, "title", NULL, 0)))
>>>           put_ebml_string(pb, MATROSKA_ID_TITLE, tag->value);
>>>       if (!(s->flags & AVFMT_FLAG_BITEXACT)) {
>>> @@ -1883,11 +1890,11 @@ static int mkv_write_header(AVFormatContext *s)
>>>           int64_t metadata_duration = get_metadata_duration(s);
>>>           if (s->duration > 0) {
>>> -            int64_t scaledDuration = av_rescale(s->duration, 1000, AV_TIME_BASE);
>>> +            int64_t scaledDuration = av_rescale_q(s->duration, AV_TIME_BASE_Q, mkv->time_base);
>>>               put_ebml_float(pb, MATROSKA_ID_DURATION, scaledDuration);
>>>               av_log(s, AV_LOG_DEBUG, "Write early duration from recording time = %" PRIu64 "\n", scaledDuration);
>>>           } else if (metadata_duration > 0) {
>>> -            int64_t scaledDuration = av_rescale(metadata_duration, 1000, AV_TIME_BASE);
>>> +            int64_t scaledDuration = av_rescale_q(metadata_duration, AV_TIME_BASE_Q, mkv->time_base);
>>>               put_ebml_float(pb, MATROSKA_ID_DURATION, scaledDuration);
>>>               av_log(s, AV_LOG_DEBUG, "Write early duration from metadata = %" PRIu64 "\n", scaledDuration);
>>>           } else if (s->pb->seekable & AVIO_SEEKABLE_NORMAL) {
>>> @@ -1948,12 +1955,12 @@ static int mkv_write_header(AVFormatContext *s)
>>>       // after 4k and on a keyframe
>>>       if (IS_SEEKABLE(pb, mkv)) {
>>>           if (mkv->cluster_time_limit < 0)
>>> -            mkv->cluster_time_limit = 5000;
>>> +            mkv->cluster_time_limit = av_rescale_q(5000, (AVRational){1, 1000}, mkv->time_base);
>>>           if (mkv->cluster_size_limit < 0)
>>>               mkv->cluster_size_limit = 5 * 1024 * 1024;
>>>       } else {
>>>           if (mkv->cluster_time_limit < 0)
>>> -            mkv->cluster_time_limit = 1000;
>>> +            mkv->cluster_time_limit = av_rescale_q(1000, (AVRational){1, 1000}, mkv->time_base);
>>>           if (mkv->cluster_size_limit < 0)
>>>               mkv->cluster_size_limit = 32 * 1024;
>>>       }
>>> @@ -2323,7 +2330,7 @@ static int mkv_write_packet_internal(AVFormatContext *s, const AVPacket *pkt)
>>>               ret = mkv_end_cluster(s);
>>>               if (ret < 0)
>>>                   return ret;
>>> -            av_log(s, AV_LOG_WARNING, "Starting new cluster due to timestamp\n");
>>> +            av_log(s, AV_LOG_VERBOSE, "Starting new cluster as timestamp offset exceeds signed 16-bit integer range.\n");
>>
>> I've marked this as verbose, as the problem is not exactly something we should even bother warning about. So far in my testing, creating new clusters has not caused issues.
>
> It's not about having issues with the resulting file, it's about size overhead present in it (Each cluster is a top level element, so one per block is a lot of bytes), and about meeting the requested limit set with cluster_size_limit and cluster_time_limit or not.
> Clusters delimitation is what's used to split the file in Dash Webm after all, so honoring the requested limits is important and this log message is useful to let the user know if the values they requested can be achieved.
Ah then I misunderstood the purpose of the warning. I will revert this part once I hear back on the latter comment.
>
>>
>>>           }
>>>       }
>>> @@ -2738,8 +2745,8 @@ static int mkv_init(struct AVFormatContext *s)
>>>               track->uid = mkv_get_uid(mkv->tracks, i, &c);
>>>           }
>>> -        // ms precision is the de-facto standard timescale for mkv files
>>> -        avpriv_set_pts_info(st, 64, 1, 1000);
>>> +        // Use user-defined timescale.
>>> +        avpriv_set_pts_info(st, 64, mkv->time_base.num, mkv->time_base.den);
>>>           if (st->codecpar->codec_type == AVMEDIA_TYPE_ATTACHMENT) {
>>>               if (mkv->mode == MODE_WEBM) {
>>> @@ -2759,6 +2766,10 @@ static int mkv_init(struct AVFormatContext *s)
>>>           track->track_num_size = ebml_num_size(track->track_num);
>>>       }
>>> +    // Scale the configured cluster_time_limit.
>>> +    if (mkv->cluster_time_limit >= 0)
>>> +        mkv->cluster_time_limit = av_rescale_q(mkv->cluster_time_limit, (AVRational){1, 1000}, mkv->time_base);
>>> +
>>>       if (mkv->is_dash && nb_tracks != 1)
>>>           return AVERROR(EINVAL);
>>> @@ -2826,6 +2837,7 @@ static const AVOption options[] = {
>>>       { "infer", "For each track type, mark the first track of disposition default as default; if none exists, mark the first track as default.", 0, AV_OPT_TYPE_CONST, { .i64 = DEFAULT_MODE_INFER }, 0, 0, FLAGS, "default_mode" },
>>>       { "infer_no_subs", "For each track type, mark the first track of disposition default as default; for audio and video: if none exists, mark the first track as default.", 0, AV_OPT_TYPE_CONST, { .i64 = DEFAULT_MODE_INFER_NO_SUBS }, 0, 0, FLAGS, "default_mode" },
>>>       { "passthrough", "Use the disposition flag as-is", 0, AV_OPT_TYPE_CONST, { .i64 = DEFAULT_MODE_PASSTHROUGH }, 0, 0, FLAGS, "default_mode" },
>>> +    { "timestamp_precision", "Timestamp precision to use for the entire container", OFFSET(time_base), AV_OPT_TYPE_RATIONAL, { .dbl = 0.001 }, 0.000000001, DBL_MAX, FLAGS},
>
> Is DBL_MAX useful? Can't another max value be used instead?

It's not really useful, as the actual limit for AVRational is 2147483647/1, while Matroska/WebM could support as much as 18446744073/1. So perhaps INT_MAX is a better choice here, which is already being used in other places. As I personally do not need lower precision, I have no opinion on what upper limit would be ideal to present to the user.

>
>> Reverted the default to 1ms as per discussion. In the future, Matroska hopefully adopts rational per-track time bases so that this parameter can be deprecated.
>
> Ok.
>
>>>       { NULL },
>>>   };
James Almer May 20, 2021, 10:12 p.m. UTC | #4
On 5/20/2021 7:04 PM, Michael Fabian 'Xaymar' Dirks wrote:
> On 2021-05-20 23:47, James Almer wrote:
>> On 5/20/2021 4:35 PM, Michael Fabian 'Xaymar' Dirks wrote:
>>> On 2021-05-20 21:16, michael.dirks@xaymar.com wrote:
>>>> From: Michael Fabian 'Xaymar' Dirks <michael.dirks@xaymar.com>
>>>>
>>>> Adds "timestamp_precision" to the available option for Matroska/WebM
>>>> muxing. The option enables users and developers to change the precision
>>>> of the time stamps in the Matroska/WebM container up to 1 nanosecond,
>>>> which can aid with the proper detection of constant and variable rate
>>>> content.
>>>>
>>>> Work-around fix for: 259, 6406, 7927, 8909 and 9124.
>>>>
>>>> Signed-off-by: Michael Fabian 'Xaymar' Dirks <michael.dirks@xaymar.com>
>>>> ---
>>>>   doc/muxers.texi           |  8 ++++++++
>>>>   libavformat/matroskaenc.c | 28 ++++++++++++++++++++--------
>>>>   2 files changed, 28 insertions(+), 8 deletions(-)
>>>>
>>>> diff --git a/doc/muxers.texi b/doc/muxers.texi
>>>> index e1c6ad0829..8655be94ff 100644
>>>> --- a/doc/muxers.texi
>>>> +++ b/doc/muxers.texi
>>>> @@ -1583,6 +1583,14 @@ bitmap is stored bottom-up. Note that this 
>>>> option does not flip the bitmap
>>>>   which has to be done manually beforehand, e.g. by using the vflip 
>>>> filter.
>>>>   Default is @var{false} and indicates bitmap is stored top down.
>>>> +@item timestamp_precision
>>>> +Sets the timestamp precision up to 1 nanosecond for Matroska/WebM, 
>>>> which can
>>>> +improve detection of constant rate content in demuxers. Note that 
>>>> some poorly
>>>> +implemented demuxers may require a timestamp precision of 1 
>>>> millisecond, so
>>>> +increasing it past that point may result in playback issues. Higher 
>>>> precision
>>>> +also reduces the maximum possible timestamp significantly.
>>>> +Default is @var{1/1000} (1 millisecond).
>>>> +
>>>>   @end table
>>>>   @anchor{md5}
>>>> diff --git a/libavformat/matroskaenc.c b/libavformat/matroskaenc.c
>>>> index 186a25d920..2417ac12cc 100644
>>>> --- a/libavformat/matroskaenc.c
>>>> +++ b/libavformat/matroskaenc.c
>>>> @@ -20,6 +20,7 @@
>>>>    */
>>>>   #include <stdint.h>
>>>> +#include <float.h>
>>>>   #include "av1.h"
>>>>   #include "avc.h"
>>>> @@ -158,6 +159,8 @@ typedef struct MatroskaMuxContext {
>>>>       int                 default_mode;
>>>>       uint32_t            segment_uid[4];
>>>> +
>>>> +    AVRational          time_base;
>>>>   } MatroskaMuxContext;
>>>>   /** 2 bytes * 7 for EBML IDs, 7 1-byte EBML lengths, 6 1-byte uint,
>>>> @@ -1814,6 +1817,7 @@ static int mkv_write_header(AVFormatContext *s)
>>>>       const AVDictionaryEntry *tag;
>>>>       int ret, i, version = 2;
>>>>       int64_t creation_time;
>>>> +    int64_t time_base = 1;
>>>>       if (mkv->mode != MODE_WEBM ||
>>>>           av_dict_get(s->metadata, "stereo_mode", NULL, 0) ||
>>>> @@ -1850,7 +1854,10 @@ static int mkv_write_header(AVFormatContext *s)
>>>>           return ret;
>>>>       pb = mkv->info.bc;
>>>> -    put_ebml_uint(pb, MATROSKA_ID_TIMECODESCALE, 1000000);
>>>> +    time_base = av_rescale_q(time_base, mkv->time_base, 
>>>> (AVRational){1, 1000000000});
>>>> +    av_log(s, AV_LOG_DEBUG, "TimestampScale is: %" PRId64 " ns\n", 
>>>> time_base);
>>>> +    put_ebml_uint(pb, MATROSKA_ID_TIMECODESCALE, time_base);
>>>> +
>>>>       if ((tag = av_dict_get(s->metadata, "title", NULL, 0)))
>>>>           put_ebml_string(pb, MATROSKA_ID_TITLE, tag->value);
>>>>       if (!(s->flags & AVFMT_FLAG_BITEXACT)) {
>>>> @@ -1883,11 +1890,11 @@ static int mkv_write_header(AVFormatContext *s)
>>>>           int64_t metadata_duration = get_metadata_duration(s);
>>>>           if (s->duration > 0) {
>>>> -            int64_t scaledDuration = av_rescale(s->duration, 1000, 
>>>> AV_TIME_BASE);
>>>> +            int64_t scaledDuration = av_rescale_q(s->duration, 
>>>> AV_TIME_BASE_Q, mkv->time_base);
>>>>               put_ebml_float(pb, MATROSKA_ID_DURATION, scaledDuration);
>>>>               av_log(s, AV_LOG_DEBUG, "Write early duration from 
>>>> recording time = %" PRIu64 "\n", scaledDuration);
>>>>           } else if (metadata_duration > 0) {
>>>> -            int64_t scaledDuration = av_rescale(metadata_duration, 
>>>> 1000, AV_TIME_BASE);
>>>> +            int64_t scaledDuration = 
>>>> av_rescale_q(metadata_duration, AV_TIME_BASE_Q, mkv->time_base);
>>>>               put_ebml_float(pb, MATROSKA_ID_DURATION, scaledDuration);
>>>>               av_log(s, AV_LOG_DEBUG, "Write early duration from 
>>>> metadata = %" PRIu64 "\n", scaledDuration);
>>>>           } else if (s->pb->seekable & AVIO_SEEKABLE_NORMAL) {
>>>> @@ -1948,12 +1955,12 @@ static int mkv_write_header(AVFormatContext *s)
>>>>       // after 4k and on a keyframe
>>>>       if (IS_SEEKABLE(pb, mkv)) {
>>>>           if (mkv->cluster_time_limit < 0)
>>>> -            mkv->cluster_time_limit = 5000;
>>>> +            mkv->cluster_time_limit = av_rescale_q(5000, 
>>>> (AVRational){1, 1000}, mkv->time_base);
>>>>           if (mkv->cluster_size_limit < 0)
>>>>               mkv->cluster_size_limit = 5 * 1024 * 1024;
>>>>       } else {
>>>>           if (mkv->cluster_time_limit < 0)
>>>> -            mkv->cluster_time_limit = 1000;
>>>> +            mkv->cluster_time_limit = av_rescale_q(1000, 
>>>> (AVRational){1, 1000}, mkv->time_base);
>>>>           if (mkv->cluster_size_limit < 0)
>>>>               mkv->cluster_size_limit = 32 * 1024;
>>>>       }
>>>> @@ -2323,7 +2330,7 @@ static int 
>>>> mkv_write_packet_internal(AVFormatContext *s, const AVPacket *pkt)
>>>>               ret = mkv_end_cluster(s);
>>>>               if (ret < 0)
>>>>                   return ret;
>>>> -            av_log(s, AV_LOG_WARNING, "Starting new cluster due to 
>>>> timestamp\n");
>>>> +            av_log(s, AV_LOG_VERBOSE, "Starting new cluster as 
>>>> timestamp offset exceeds signed 16-bit integer range.\n");
>>>
>>> I've marked this as verbose, as the problem is not exactly something 
>>> we should even bother warning about. So far in my testing, creating 
>>> new clusters has not caused issues.
>>
>> It's not about having issues with the resulting file, it's about size 
>> overhead present in it (Each cluster is a top level element, so one 
>> per block is a lot of bytes), and about meeting the requested limit 
>> set with cluster_size_limit and cluster_time_limit or not.
>> Clusters delimitation is what's used to split the file in Dash Webm 
>> after all, so honoring the requested limits is important and this log 
>> message is useful to let the user know if the values they requested 
>> can be achieved.
> Ah then I misunderstood the purpose of the warning. I will revert this 
> part once I hear back on the latter comment.
>>
>>>
>>>>           }
>>>>       }
>>>> @@ -2738,8 +2745,8 @@ static int mkv_init(struct AVFormatContext *s)
>>>>               track->uid = mkv_get_uid(mkv->tracks, i, &c);
>>>>           }
>>>> -        // ms precision is the de-facto standard timescale for mkv 
>>>> files
>>>> -        avpriv_set_pts_info(st, 64, 1, 1000);
>>>> +        // Use user-defined timescale.
>>>> +        avpriv_set_pts_info(st, 64, mkv->time_base.num, 
>>>> mkv->time_base.den);
>>>>           if (st->codecpar->codec_type == AVMEDIA_TYPE_ATTACHMENT) {
>>>>               if (mkv->mode == MODE_WEBM) {
>>>> @@ -2759,6 +2766,10 @@ static int mkv_init(struct AVFormatContext *s)
>>>>           track->track_num_size = ebml_num_size(track->track_num);
>>>>       }
>>>> +    // Scale the configured cluster_time_limit.
>>>> +    if (mkv->cluster_time_limit >= 0)
>>>> +        mkv->cluster_time_limit = 
>>>> av_rescale_q(mkv->cluster_time_limit, (AVRational){1, 1000}, 
>>>> mkv->time_base);
>>>> +
>>>>       if (mkv->is_dash && nb_tracks != 1)
>>>>           return AVERROR(EINVAL);
>>>> @@ -2826,6 +2837,7 @@ static const AVOption options[] = {
>>>>       { "infer", "For each track type, mark the first track of 
>>>> disposition default as default; if none exists, mark the first track 
>>>> as default.", 0, AV_OPT_TYPE_CONST, { .i64 = DEFAULT_MODE_INFER }, 
>>>> 0, 0, FLAGS, "default_mode" },
>>>>       { "infer_no_subs", "For each track type, mark the first track 
>>>> of disposition default as default; for audio and video: if none 
>>>> exists, mark the first track as default.", 0, AV_OPT_TYPE_CONST, { 
>>>> .i64 = DEFAULT_MODE_INFER_NO_SUBS }, 0, 0, FLAGS, "default_mode" },
>>>>       { "passthrough", "Use the disposition flag as-is", 0, 
>>>> AV_OPT_TYPE_CONST, { .i64 = DEFAULT_MODE_PASSTHROUGH }, 0, 0, FLAGS, 
>>>> "default_mode" },
>>>> +    { "timestamp_precision", "Timestamp precision to use for the 
>>>> entire container", OFFSET(time_base), AV_OPT_TYPE_RATIONAL, { .dbl = 
>>>> 0.001 }, 0.000000001, DBL_MAX, FLAGS},
>>
>> Is DBL_MAX useful? Can't another max value be used instead?
> 
> It's not really useful, as the actual limit for AVRational is 
> 2147483647/1, while Matroska/WebM could support as much as 
> 18446744073/1. So perhaps INT_MAX is a better choice here, which is 
> already being used in other places. As I personally do not need lower 
> precision, I have no opinion on what upper limit would be ideal to 
> present to the user.

INT_MAX sounds good, yes.

> 
>>
>>> Reverted the default to 1ms as per discussion. In the future, 
>>> Matroska hopefully adopts rational per-track time bases so that this 
>>> parameter can be deprecated.
>>
>> Ok.
>>
>>>>       { NULL },
>>>>   };
> 
>
diff mbox series

Patch

diff --git a/doc/muxers.texi b/doc/muxers.texi
index e1c6ad0829..8655be94ff 100644
--- a/doc/muxers.texi
+++ b/doc/muxers.texi
@@ -1583,6 +1583,14 @@  bitmap is stored bottom-up. Note that this option does not flip the bitmap
 which has to be done manually beforehand, e.g. by using the vflip filter.
 Default is @var{false} and indicates bitmap is stored top down.
 
+@item timestamp_precision
+Sets the timestamp precision up to 1 nanosecond for Matroska/WebM, which can
+improve detection of constant rate content in demuxers. Note that some poorly
+implemented demuxers may require a timestamp precision of 1 millisecond, so
+increasing it past that point may result in playback issues. Higher precision
+also reduces the maximum possible timestamp significantly.
+Default is @var{1/1000} (1 millisecond).
+
 @end table
 
 @anchor{md5}
diff --git a/libavformat/matroskaenc.c b/libavformat/matroskaenc.c
index 186a25d920..2417ac12cc 100644
--- a/libavformat/matroskaenc.c
+++ b/libavformat/matroskaenc.c
@@ -20,6 +20,7 @@ 
  */
 
 #include <stdint.h>
+#include <float.h>
 
 #include "av1.h"
 #include "avc.h"
@@ -158,6 +159,8 @@  typedef struct MatroskaMuxContext {
     int                 default_mode;
 
     uint32_t            segment_uid[4];
+
+    AVRational          time_base;
 } MatroskaMuxContext;
 
 /** 2 bytes * 7 for EBML IDs, 7 1-byte EBML lengths, 6 1-byte uint,
@@ -1814,6 +1817,7 @@  static int mkv_write_header(AVFormatContext *s)
     const AVDictionaryEntry *tag;
     int ret, i, version = 2;
     int64_t creation_time;
+    int64_t time_base = 1;
 
     if (mkv->mode != MODE_WEBM ||
         av_dict_get(s->metadata, "stereo_mode", NULL, 0) ||
@@ -1850,7 +1854,10 @@  static int mkv_write_header(AVFormatContext *s)
         return ret;
     pb = mkv->info.bc;
 
-    put_ebml_uint(pb, MATROSKA_ID_TIMECODESCALE, 1000000);
+    time_base = av_rescale_q(time_base, mkv->time_base, (AVRational){1, 1000000000});
+    av_log(s, AV_LOG_DEBUG, "TimestampScale is: %" PRId64 " ns\n", time_base);
+    put_ebml_uint(pb, MATROSKA_ID_TIMECODESCALE, time_base);
+
     if ((tag = av_dict_get(s->metadata, "title", NULL, 0)))
         put_ebml_string(pb, MATROSKA_ID_TITLE, tag->value);
     if (!(s->flags & AVFMT_FLAG_BITEXACT)) {
@@ -1883,11 +1890,11 @@  static int mkv_write_header(AVFormatContext *s)
         int64_t metadata_duration = get_metadata_duration(s);
 
         if (s->duration > 0) {
-            int64_t scaledDuration = av_rescale(s->duration, 1000, AV_TIME_BASE);
+            int64_t scaledDuration = av_rescale_q(s->duration, AV_TIME_BASE_Q, mkv->time_base);
             put_ebml_float(pb, MATROSKA_ID_DURATION, scaledDuration);
             av_log(s, AV_LOG_DEBUG, "Write early duration from recording time = %" PRIu64 "\n", scaledDuration);
         } else if (metadata_duration > 0) {
-            int64_t scaledDuration = av_rescale(metadata_duration, 1000, AV_TIME_BASE);
+            int64_t scaledDuration = av_rescale_q(metadata_duration, AV_TIME_BASE_Q, mkv->time_base);
             put_ebml_float(pb, MATROSKA_ID_DURATION, scaledDuration);
             av_log(s, AV_LOG_DEBUG, "Write early duration from metadata = %" PRIu64 "\n", scaledDuration);
         } else if (s->pb->seekable & AVIO_SEEKABLE_NORMAL) {
@@ -1948,12 +1955,12 @@  static int mkv_write_header(AVFormatContext *s)
     // after 4k and on a keyframe
     if (IS_SEEKABLE(pb, mkv)) {
         if (mkv->cluster_time_limit < 0)
-            mkv->cluster_time_limit = 5000;
+            mkv->cluster_time_limit = av_rescale_q(5000, (AVRational){1, 1000}, mkv->time_base);
         if (mkv->cluster_size_limit < 0)
             mkv->cluster_size_limit = 5 * 1024 * 1024;
     } else {
         if (mkv->cluster_time_limit < 0)
-            mkv->cluster_time_limit = 1000;
+            mkv->cluster_time_limit = av_rescale_q(1000, (AVRational){1, 1000}, mkv->time_base);
         if (mkv->cluster_size_limit < 0)
             mkv->cluster_size_limit = 32 * 1024;
     }
@@ -2323,7 +2330,7 @@  static int mkv_write_packet_internal(AVFormatContext *s, const AVPacket *pkt)
             ret = mkv_end_cluster(s);
             if (ret < 0)
                 return ret;
-            av_log(s, AV_LOG_WARNING, "Starting new cluster due to timestamp\n");
+            av_log(s, AV_LOG_VERBOSE, "Starting new cluster as timestamp offset exceeds signed 16-bit integer range.\n");
         }
     }
 
@@ -2738,8 +2745,8 @@  static int mkv_init(struct AVFormatContext *s)
             track->uid = mkv_get_uid(mkv->tracks, i, &c);
         }
 
-        // ms precision is the de-facto standard timescale for mkv files
-        avpriv_set_pts_info(st, 64, 1, 1000);
+        // Use user-defined timescale.
+        avpriv_set_pts_info(st, 64, mkv->time_base.num, mkv->time_base.den);
 
         if (st->codecpar->codec_type == AVMEDIA_TYPE_ATTACHMENT) {
             if (mkv->mode == MODE_WEBM) {
@@ -2759,6 +2766,10 @@  static int mkv_init(struct AVFormatContext *s)
         track->track_num_size = ebml_num_size(track->track_num);
     }
 
+    // Scale the configured cluster_time_limit.
+    if (mkv->cluster_time_limit >= 0)
+        mkv->cluster_time_limit = av_rescale_q(mkv->cluster_time_limit, (AVRational){1, 1000}, mkv->time_base);
+
     if (mkv->is_dash && nb_tracks != 1)
         return AVERROR(EINVAL);
 
@@ -2826,6 +2837,7 @@  static const AVOption options[] = {
     { "infer", "For each track type, mark the first track of disposition default as default; if none exists, mark the first track as default.", 0, AV_OPT_TYPE_CONST, { .i64 = DEFAULT_MODE_INFER }, 0, 0, FLAGS, "default_mode" },
     { "infer_no_subs", "For each track type, mark the first track of disposition default as default; for audio and video: if none exists, mark the first track as default.", 0, AV_OPT_TYPE_CONST, { .i64 = DEFAULT_MODE_INFER_NO_SUBS }, 0, 0, FLAGS, "default_mode" },
     { "passthrough", "Use the disposition flag as-is", 0, AV_OPT_TYPE_CONST, { .i64 = DEFAULT_MODE_PASSTHROUGH }, 0, 0, FLAGS, "default_mode" },
+    { "timestamp_precision", "Timestamp precision to use for the entire container", OFFSET(time_base), AV_OPT_TYPE_RATIONAL, { .dbl = 0.001 }, 0.000000001, DBL_MAX, FLAGS},
     { NULL },
 };