diff mbox

[FFmpeg-devel] flvenc: Fix sequence header update timestamps

Message ID 20180503165021.2120-1-alex.converse@gmail.com
State Superseded
Headers show

Commit Message

Alex Converse May 3, 2018, 4:50 p.m. UTC
From: Alex Converse <alexconv@twitch.tv>

---
 libavformat/flvenc.c | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

Comments

Jan Ekström May 3, 2018, 5:58 p.m. UTC | #1
On Thu, May 3, 2018 at 7:50 PM, Alex Converse <alex.converse@gmail.com> wrote:
> From: Alex Converse <alexconv@twitch.tv>
>
> ---
>  libavformat/flvenc.c | 10 +++++-----
>  1 file changed, 5 insertions(+), 5 deletions(-)
>
> diff --git a/libavformat/flvenc.c b/libavformat/flvenc.c
> index e8af48cb64..827d798a61 100644
> --- a/libavformat/flvenc.c
> +++ b/libavformat/flvenc.c
> @@ -480,7 +480,7 @@ static int unsupported_codec(AVFormatContext *s,
>      return AVERROR(ENOSYS);
>  }
>
> -static void flv_write_codec_header(AVFormatContext* s, AVCodecParameters* par) {
> +static void flv_write_codec_header(AVFormatContext* s, AVCodecParameters* par, unsigned int ts) {

Hi,

While the timestamp field is 8+24=32 bits, the DTS value is int64_ts.
Thus, while I'm really bad at doing wrap-around logic, wouldn't it be
something a la ´uint32_t wrapped_timestamp = (uint32_t)(ts %
UINT32_MAX)`?

Additionally, I did briefly go through E.4.1 (FLV Tag) in the spec and
it doesn't really seem to define whether this value is PTS or DTS...
Is this defined anywhere proper? Given FLV's age I wouldn't be
surprised that the answer would be "effectively DTS", of course. But
if you've seen what  Adobe's implementation writes with B-frames it'd
be interesting to see.

>      int64_t data_size;
>      AVIOContext *pb = s->pb;
>      FLVContext *flv = s->priv_data;
> @@ -492,8 +492,8 @@ static void flv_write_codec_header(AVFormatContext* s, AVCodecParameters* par) {
>                  par->codec_type == AVMEDIA_TYPE_VIDEO ?
>                          FLV_TAG_TYPE_VIDEO : FLV_TAG_TYPE_AUDIO);
>          avio_wb24(pb, 0); // size patched later
> -        avio_wb24(pb, 0); // ts
> -        avio_w8(pb, 0);   // ts ext
> +        avio_wb24(pb, ts & 0xFFFFFF);
> +        avio_w8(pb, (ts >> 24) & 0x7F);

Looking at the spec this is bottom 24 bits of the timestamp and top 8,
thus why is the second one not & 0xFF ?

>          avio_wb24(pb, 0); // streamid
>          pos = avio_tell(pb);
>          if (par->codec_id == AV_CODEC_ID_AAC) {
> @@ -756,7 +756,7 @@ static int flv_write_header(AVFormatContext *s)
>      }
>
>      for (i = 0; i < s->nb_streams; i++) {
> -        flv_write_codec_header(s, s->streams[i]->codecpar);
> +        flv_write_codec_header(s, s->streams[i]->codecpar, 0);
>      }
>

Is it customary that even if you've got a live stream going, that the
start point of a given ingest is always zero? In that case you might
want to take mention of the initial dts, and then write offsets
compared to that? Otherwise if you have an initial offset of, say, 5s,
and then you a 5s GOP with an update, then while the difference to the
initial timestamp would be 5s, using the original dts field you would
get the second header having a value of 10s there. Or am I
misunderstanding something here?

Otherwise, looks like a nice improvement in the implementation.


Best regards,
Jan
Jan Ekström May 3, 2018, 6 p.m. UTC | #2
On Thu, May 3, 2018 at 8:58 PM, Jan Ekström <jeebjp@gmail.com> wrote:
> On Thu, May 3, 2018 at 7:50 PM, Alex Converse <alex.converse@gmail.com> wrote:
>> From: Alex Converse <alexconv@twitch.tv>
>>
>> ---
>>  libavformat/flvenc.c | 10 +++++-----
>>  1 file changed, 5 insertions(+), 5 deletions(-)
>>
>> diff --git a/libavformat/flvenc.c b/libavformat/flvenc.c
>> index e8af48cb64..827d798a61 100644
>> --- a/libavformat/flvenc.c
>> +++ b/libavformat/flvenc.c
>> @@ -480,7 +480,7 @@ static int unsupported_codec(AVFormatContext *s,
>>      return AVERROR(ENOSYS);
>>  }
>>
>> -static void flv_write_codec_header(AVFormatContext* s, AVCodecParameters* par) {
>> +static void flv_write_codec_header(AVFormatContext* s, AVCodecParameters* par, unsigned int ts) {
>
> Hi,
>
> While the timestamp field is 8+24=32 bits, the DTS value is int64_ts.
> Thus, while I'm really bad at doing wrap-around logic, wouldn't it be
> something a la ´uint32_t wrapped_timestamp = (uint32_t)(ts %
> UINT32_MAX)`?
>
> Additionally, I did briefly go through E.4.1 (FLV Tag) in the spec and
> it doesn't really seem to define whether this value is PTS or DTS...
> Is this defined anywhere proper? Given FLV's age I wouldn't be
> surprised that the answer would be "effectively DTS", of course. But
> if you've seen what  Adobe's implementation writes with B-frames it'd
> be interesting to see.
>
>>      int64_t data_size;
>>      AVIOContext *pb = s->pb;
>>      FLVContext *flv = s->priv_data;
>> @@ -492,8 +492,8 @@ static void flv_write_codec_header(AVFormatContext* s, AVCodecParameters* par) {
>>                  par->codec_type == AVMEDIA_TYPE_VIDEO ?
>>                          FLV_TAG_TYPE_VIDEO : FLV_TAG_TYPE_AUDIO);
>>          avio_wb24(pb, 0); // size patched later
>> -        avio_wb24(pb, 0); // ts
>> -        avio_w8(pb, 0);   // ts ext
>> +        avio_wb24(pb, ts & 0xFFFFFF);
>> +        avio_w8(pb, (ts >> 24) & 0x7F);
>
> Looking at the spec this is bottom 24 bits of the timestamp and top 8,
> thus why is the second one not & 0xFF ?
>

D'oh, I should read better. "...to form a SI32 value". Thus, this is LGTM.

>>          avio_wb24(pb, 0); // streamid
>>          pos = avio_tell(pb);
>>          if (par->codec_id == AV_CODEC_ID_AAC) {
>> @@ -756,7 +756,7 @@ static int flv_write_header(AVFormatContext *s)
>>      }
>>
>>      for (i = 0; i < s->nb_streams; i++) {
>> -        flv_write_codec_header(s, s->streams[i]->codecpar);
>> +        flv_write_codec_header(s, s->streams[i]->codecpar, 0);
>>      }
>>
>
> Is it customary that even if you've got a live stream going, that the
> start point of a given ingest is always zero? In that case you might
> want to take mention of the initial dts, and then write offsets
> compared to that? Otherwise if you have an initial offset of, say, 5s,
> and then you a 5s GOP with an update, then while the difference to the
> initial timestamp would be 5s, using the original dts field you would
> get the second header having a value of 10s there. Or am I
> misunderstanding something here?
>
> Otherwise, looks like a nice improvement in the implementation.
>
>
> Best regards,
> Jan

Jan
Liu Steven May 4, 2018, 3:03 a.m. UTC | #3
> On 4 May 2018, at 02:00, Jan Ekström <jeebjp@gmail.com> wrote:
> 
> On Thu, May 3, 2018 at 8:58 PM, Jan Ekström <jeebjp@gmail.com> wrote:
>> On Thu, May 3, 2018 at 7:50 PM, Alex Converse <alex.converse@gmail.com> wrote:
>>> From: Alex Converse <alexconv@twitch.tv>
>>> 
>>> ---
>>> libavformat/flvenc.c | 10 +++++-----
>>> 1 file changed, 5 insertions(+), 5 deletions(-)
>>> 
>>> diff --git a/libavformat/flvenc.c b/libavformat/flvenc.c
>>> index e8af48cb64..827d798a61 100644
>>> --- a/libavformat/flvenc.c
>>> +++ b/libavformat/flvenc.c
>>> @@ -480,7 +480,7 @@ static int unsupported_codec(AVFormatContext *s,
>>>     return AVERROR(ENOSYS);
>>> }
>>> 
>>> -static void flv_write_codec_header(AVFormatContext* s, AVCodecParameters* par) {
>>> +static void flv_write_codec_header(AVFormatContext* s, AVCodecParameters* par, unsigned int ts) {
>> 
>> Hi,
>> 
>> While the timestamp field is 8+24=32 bits, the DTS value is int64_ts.
>> Thus, while I'm really bad at doing wrap-around logic, wouldn't it be
>> something a la ´uint32_t wrapped_timestamp = (uint32_t)(ts %
>> UINT32_MAX)`?
>> 
>> Additionally, I did briefly go through E.4.1 (FLV Tag) in the spec and
>> it doesn't really seem to define whether this value is PTS or DTS...
>> Is this defined anywhere proper? Given FLV's age I wouldn't be
>> surprised that the answer would be "effectively DTS", of course. But
>> if you've seen what  Adobe's implementation writes with B-frames it'd
>> be interesting to see.
>> 
>>>     int64_t data_size;
>>>     AVIOContext *pb = s->pb;
>>>     FLVContext *flv = s->priv_data;
>>> @@ -492,8 +492,8 @@ static void flv_write_codec_header(AVFormatContext* s, AVCodecParameters* par) {
>>>                 par->codec_type == AVMEDIA_TYPE_VIDEO ?
>>>                         FLV_TAG_TYPE_VIDEO : FLV_TAG_TYPE_AUDIO);
>>>         avio_wb24(pb, 0); // size patched later
>>> -        avio_wb24(pb, 0); // ts
>>> -        avio_w8(pb, 0);   // ts ext
>>> +        avio_wb24(pb, ts & 0xFFFFFF);
>>> +        avio_w8(pb, (ts >> 24) & 0x7F);
>> 
>> Looking at the spec this is bottom 24 bits of the timestamp and top 8,
>> thus why is the second one not & 0xFF ?
>> 
> 
> D'oh, I should read better. "...to form a SI32 value". Thus, this is LGTM.

Will apply.

Thanks
Steven
Jan Ekström May 4, 2018, 9:20 a.m. UTC | #4
WHAT!?

This LGTM was for the bit mask *only*

ALL OTHER POINTS STAND

Jan

On Fri, May 4, 2018, 06:04 Steven Liu <lq@chinaffmpeg.org> wrote:

>
>
> > On 4 May 2018, at 02:00, Jan Ekström <jeebjp@gmail.com> wrote:
> >
> > On Thu, May 3, 2018 at 8:58 PM, Jan Ekström <jeebjp@gmail.com> wrote:
> >> On Thu, May 3, 2018 at 7:50 PM, Alex Converse <alex.converse@gmail.com>
> wrote:
> >>> From: Alex Converse <alexconv@twitch.tv>
> >>>
> >>> ---
> >>> libavformat/flvenc.c | 10 +++++-----
> >>> 1 file changed, 5 insertions(+), 5 deletions(-)
> >>>
> >>> diff --git a/libavformat/flvenc.c b/libavformat/flvenc.c
> >>> index e8af48cb64..827d798a61 100644
> >>> --- a/libavformat/flvenc.c
> >>> +++ b/libavformat/flvenc.c
> >>> @@ -480,7 +480,7 @@ static int unsupported_codec(AVFormatContext *s,
> >>>     return AVERROR(ENOSYS);
> >>> }
> >>>
> >>> -static void flv_write_codec_header(AVFormatContext* s,
> AVCodecParameters* par) {
> >>> +static void flv_write_codec_header(AVFormatContext* s,
> AVCodecParameters* par, unsigned int ts) {
> >>
> >> Hi,
> >>
> >> While the timestamp field is 8+24=32 bits, the DTS value is int64_ts.
> >> Thus, while I'm really bad at doing wrap-around logic, wouldn't it be
> >> something a la ´uint32_t wrapped_timestamp = (uint32_t)(ts %
> >> UINT32_MAX)`?
> >>
> >> Additionally, I did briefly go through E.4.1 (FLV Tag) in the spec and
> >> it doesn't really seem to define whether this value is PTS or DTS...
> >> Is this defined anywhere proper? Given FLV's age I wouldn't be
> >> surprised that the answer would be "effectively DTS", of course. But
> >> if you've seen what  Adobe's implementation writes with B-frames it'd
> >> be interesting to see.
> >>
> >>>     int64_t data_size;
> >>>     AVIOContext *pb = s->pb;
> >>>     FLVContext *flv = s->priv_data;
> >>> @@ -492,8 +492,8 @@ static void
> flv_write_codec_header(AVFormatContext* s, AVCodecParameters* par) {
> >>>                 par->codec_type == AVMEDIA_TYPE_VIDEO ?
> >>>                         FLV_TAG_TYPE_VIDEO : FLV_TAG_TYPE_AUDIO);
> >>>         avio_wb24(pb, 0); // size patched later
> >>> -        avio_wb24(pb, 0); // ts
> >>> -        avio_w8(pb, 0);   // ts ext
> >>> +        avio_wb24(pb, ts & 0xFFFFFF);
> >>> +        avio_w8(pb, (ts >> 24) & 0x7F);
> >>
> >> Looking at the spec this is bottom 24 bits of the timestamp and top 8,
> >> thus why is the second one not & 0xFF ?
> >>
> >
> > D'oh, I should read better. "...to form a SI32 value". Thus, this is
> LGTM.
>
> Will apply.
>
> Thanks
> Steven
>
>
>
>
>
> _______________________________________________
> ffmpeg-devel mailing list
> ffmpeg-devel@ffmpeg.org
> http://ffmpeg.org/mailman/listinfo/ffmpeg-devel
>
Liu Steven May 4, 2018, 9:31 a.m. UTC | #5
> On 4 May 2018, at 17:20, Jan Ekström <jeebjp@gmail.com> wrote:
> 
> WHAT!?
> 
> This LGTM was for the bit mask *only*
> 
> ALL OTHER POINTS STAND

Don’t worry , all LGTM too, this could fix the resend sequence header timestamp problem, all should be ok.
> 
> Jan



Thanks
Steven
Michael Niedermayer May 4, 2018, 11:26 a.m. UTC | #6
On Fri, May 04, 2018 at 11:03:20AM +0800, Steven Liu wrote:
> 
> 
> > On 4 May 2018, at 02:00, Jan Ekström <jeebjp@gmail.com> wrote:
> > 
> > On Thu, May 3, 2018 at 8:58 PM, Jan Ekström <jeebjp@gmail.com> wrote:
> >> On Thu, May 3, 2018 at 7:50 PM, Alex Converse <alex.converse@gmail.com> wrote:
[...]

> Will apply.

I think Alex Converse has a git write account, so he can push himself if you
prefer

thx

[...]
wm4 May 4, 2018, 12:14 p.m. UTC | #7
On Fri, 4 May 2018 17:31:52 +0800
Steven Liu <lq@chinaffmpeg.org> wrote:

> > On 4 May 2018, at 17:20, Jan Ekström <jeebjp@gmail.com> wrote:
> > 
> > WHAT!?
> > 
> > This LGTM was for the bit mask *only*
> > 
> > ALL OTHER POINTS STAND  
> 
> Don’t worry , all LGTM too, this could fix the resend sequence header timestamp problem, all should be ok.

Well it doesn't LGT him.
Josh Dekker May 4, 2018, 2:11 p.m. UTC | #8
> On 4 May 2018, at 12:26, Michael Niedermayer <michael@niedermayer.cc> wrote:
> 
>> On Fri, May 04, 2018 at 11:03:20AM +0800, Steven Liu wrote:
>> 
>> 
>>> On 4 May 2018, at 02:00, Jan Ekström <jeebjp@gmail.com> wrote:
>>> 
>>> On Thu, May 3, 2018 at 8:58 PM, Jan Ekström <jeebjp@gmail.com> wrote:
>>>> On Thu, May 3, 2018 at 7:50 PM, Alex Converse <alex.converse@gmail.com> wrote:
> [...]
> 
>> Will apply.
> 
> I think Alex Converse has a git write account, so he can push himself if you
> prefer
> 
> thx
> 
> [...]
> -- 
> Michael     GnuPG fingerprint: 9FF2128B147EF6730BADF133611EC787040B0FAB
> 
> In a rich man's house there is no place to spit but his face.
> -- Diogenes of Sinope

Hold off on pushing this for a day or two, I think Jan had some extra comments (he said he was unable to reply until later as he is busy right now).

Thanks,
Alex Converse May 10, 2018, 10:39 p.m. UTC | #9
On Thu, May 3, 2018 at 10:58 AM, Jan Ekström <jeebjp@gmail.com> wrote:
>
> On Thu, May 3, 2018 at 7:50 PM, Alex Converse <alex.converse@gmail.com> wrote:
> > From: Alex Converse <alexconv@twitch.tv>
> >
> > ---
> >  libavformat/flvenc.c | 10 +++++-----
> >  1 file changed, 5 insertions(+), 5 deletions(-)
> >
> > diff --git a/libavformat/flvenc.c b/libavformat/flvenc.c
> > index e8af48cb64..827d798a61 100644
> > --- a/libavformat/flvenc.c
> > +++ b/libavformat/flvenc.c
> > @@ -480,7 +480,7 @@ static int unsupported_codec(AVFormatContext *s,
> >      return AVERROR(ENOSYS);
> >  }
> >
> > -static void flv_write_codec_header(AVFormatContext* s, AVCodecParameters* par) {
> > +static void flv_write_codec_header(AVFormatContext* s, AVCodecParameters* par, unsigned int ts) {
>
> Hi,
>
> While the timestamp field is 8+24=32 bits, the DTS value is int64_ts.
> Thus, while I'm really bad at doing wrap-around logic, wouldn't it be
> something a la ´uint32_t wrapped_timestamp = (uint32_t)(ts %
> UINT32_MAX)`?
>

See the note about timestamps below.

> Additionally, I did briefly go through E.4.1 (FLV Tag) in the spec and
> it doesn't really seem to define whether this value is PTS or DTS...
> Is this defined anywhere proper? Given FLV's age I wouldn't be
> surprised that the answer would be "effectively DTS", of course. But
> if you've seen what  Adobe's implementation writes with B-frames it'd
> be interesting to see.
>

FLV AVC packets have a field called CompositionTime described to match
14496-12. FLV requires this composition offset to be zero for sequence header
type packets. This strongly indicates to me that we want DTS here. In addition
the current muxer and demuxer already use dts pretty consistently for this
value (and use cts = pts - dts).


> >      int64_t data_size;
> >      AVIOContext *pb = s->pb;
> >      FLVContext *flv = s->priv_data;
> > @@ -492,8 +492,8 @@ static void flv_write_codec_header(AVFormatContext* s, AVCodecParameters* par) {
> >                  par->codec_type == AVMEDIA_TYPE_VIDEO ?
> >                          FLV_TAG_TYPE_VIDEO : FLV_TAG_TYPE_AUDIO);
> >          avio_wb24(pb, 0); // size patched later
> > -        avio_wb24(pb, 0); // ts
> > -        avio_w8(pb, 0);   // ts ext
> > +        avio_wb24(pb, ts & 0xFFFFFF);
> > +        avio_w8(pb, (ts >> 24) & 0x7F);
>
> Looking at the spec this is bottom 24 bits of the timestamp and top 8,
> thus why is the second one not & 0xFF ?
>

The mapping from dts to the flv timestamp we write to the stream is identical
for to what we do for non header packets. [1] I can pull this into a separate
function but it's only two lines so it didn't seem worth while.

> >          avio_wb24(pb, 0); // streamid
> >          pos = avio_tell(pb);
> >          if (par->codec_id == AV_CODEC_ID_AAC) {
> > @@ -756,7 +756,7 @@ static int flv_write_header(AVFormatContext *s)
> >      }
> >
> >      for (i = 0; i < s->nb_streams; i++) {
> > -        flv_write_codec_header(s, s->streams[i]->codecpar);
> > +        flv_write_codec_header(s, s->streams[i]->codecpar, 0);
> >      }
> >
>
> Is it customary that even if you've got a live stream going, that the
> start point of a given ingest is always zero? In that case you might
> want to take mention of the initial dts, and then write offsets
> compared to that? Otherwise if you have an initial offset of, say, 5s,
> and then you a 5s GOP with an update, then while the difference to the
> initial timestamp would be 5s, using the original dts field you would
> get the second header having a value of 10s there. Or am I
> misunderstanding something here?

The FLV file (on disk) format requires the initial DTS to be zero. For FLV
over RTMP this is not the case.

At one point the flv muxer handled that itself, but that changed to handle
copyts better.[2]

For non-zero starting DTS should header packets have non-zero
timestamp? Maybe but that's the case irrespective of weather or
not we offset late extradata. Meanwhile many popular demuxers
(avformat included) special case initial extradata.

[1] https://git.videolan.org/?p=ffmpeg.git;a=blob;f=libavformat/flvenc.c;h=e8af48cb6415a0e5795d7e36d82789ae1699f89f#l981
[2] https://git.videolan.org/?p=ffmpeg.git;a=commitdiff;h=a0174f67298ba9494c146183dd360e637b03db64

>
> Otherwise, looks like a nice improvement in the implementation.
>
Alex Converse May 10, 2018, 10:48 p.m. UTC | #10
On Fri, May 4, 2018 at 7:11 AM, Josh de Kock <josh@itanimul.li> wrote:
>
>> On 4 May 2018, at 12:26, Michael Niedermayer <michael@niedermayer.cc> wrote:
>>
>>> On Fri, May 04, 2018 at 11:03:20AM +0800, Steven Liu wrote:
>>>
>>>
>>>> On 4 May 2018, at 02:00, Jan Ekström <jeebjp@gmail.com> wrote:
>>>>
>>>> On Thu, May 3, 2018 at 8:58 PM, Jan Ekström <jeebjp@gmail.com> wrote:
>>>>> On Thu, May 3, 2018 at 7:50 PM, Alex Converse <alex.converse@gmail.com> wrote:
>> [...]
>>
>>> Will apply.
>>
>> I think Alex Converse has a git write account, so he can push himself if you
>> prefer
>>
>> thx
>>
>> [...]
>> --
>> Michael     GnuPG fingerprint: 9FF2128B147EF6730BADF133611EC787040B0FAB
>>
>> In a rich man's house there is no place to spit but his face.
>> -- Diogenes of Sinope
>
> Hold off on pushing this for a day or two, I think Jan had some extra comments (he said he was unable to reply until later as he is busy right now).
>

Sorry about the lateness on my part. Were there any later comments. I
haven't seen any. (And yes I can push this myself when all the
concerns are resolved).
Jan Ekström May 10, 2018, 11:08 p.m. UTC | #11
On Fri, May 11, 2018 at 1:39 AM, Alex Converse <alex.converse@gmail.com> wrote:
> On Thu, May 3, 2018 at 10:58 AM, Jan Ekström <jeebjp@gmail.com> wrote:
>>
>> Hi,
>>
>> While the timestamp field is 8+24=32 bits, the DTS value is int64_ts.
>> Thus, while I'm really bad at doing wrap-around logic, wouldn't it be
>> something a la ´uint32_t wrapped_timestamp = (uint32_t)(ts %
>> UINT32_MAX)`?
>>
>
> See the note about timestamps below.
>
>> Additionally, I did briefly go through E.4.1 (FLV Tag) in the spec and
>> it doesn't really seem to define whether this value is PTS or DTS...
>> Is this defined anywhere proper? Given FLV's age I wouldn't be
>> surprised that the answer would be "effectively DTS", of course. But
>> if you've seen what  Adobe's implementation writes with B-frames it'd
>> be interesting to see.
>>
>
> FLV AVC packets have a field called CompositionTime described to match
> 14496-12. FLV requires this composition offset to be zero for sequence header
> type packets. This strongly indicates to me that we want DTS here. In addition
> the current muxer and demuxer already use dts pretty consistently for this
> value (and use cts = pts - dts).
>

Alright, so dts it is... That only leaves out the fact that we're
stuffing int64_t into an "unsigned int", which most likely will not
warn on 64bit linux, but most likely will on various architectures
and/or systems.

So we:
1) take in the dts as int64_t.
2) apply wrap-over at either 32 or 31 bits depending on if the signed
value is ever supposed to be negative. Depends on how wrap-around in
FLV is supposed to be handled (aka "Is the negative area ever supposed
to be used from the 32bit signed integer field?").
3) Write it there.

>
>> >      int64_t data_size;
>> >      AVIOContext *pb = s->pb;
>> >      FLVContext *flv = s->priv_data;
>> > @@ -492,8 +492,8 @@ static void flv_write_codec_header(AVFormatContext* s, AVCodecParameters* par) {
>> >                  par->codec_type == AVMEDIA_TYPE_VIDEO ?
>> >                          FLV_TAG_TYPE_VIDEO : FLV_TAG_TYPE_AUDIO);
>> >          avio_wb24(pb, 0); // size patched later
>> > -        avio_wb24(pb, 0); // ts
>> > -        avio_w8(pb, 0);   // ts ext
>> > +        avio_wb24(pb, ts & 0xFFFFFF);
>> > +        avio_w8(pb, (ts >> 24) & 0x7F);
>>
>> Looking at the spec this is bottom 24 bits of the timestamp and top 8,
>> thus why is the second one not & 0xFF ?
>>
>
> The mapping from dts to the flv timestamp we write to the stream is identical
> for to what we do for non header packets. [1] I can pull this into a separate
> function but it's only two lines so it didn't seem worth while.

Yes, I think I commented on this that I hadn't noticed the signedness
of the value there :) So this is 100% a-OK.

>
>> >          avio_wb24(pb, 0); // streamid
>> >          pos = avio_tell(pb);
>> >          if (par->codec_id == AV_CODEC_ID_AAC) {
>> > @@ -756,7 +756,7 @@ static int flv_write_header(AVFormatContext *s)
>> >      }
>> >
>> >      for (i = 0; i < s->nb_streams; i++) {
>> > -        flv_write_codec_header(s, s->streams[i]->codecpar);
>> > +        flv_write_codec_header(s, s->streams[i]->codecpar, 0);
>> >      }
>> >
>>
>> Is it customary that even if you've got a live stream going, that the
>> start point of a given ingest is always zero? In that case you might
>> want to take mention of the initial dts, and then write offsets
>> compared to that? Otherwise if you have an initial offset of, say, 5s,
>> and then you a 5s GOP with an update, then while the difference to the
>> initial timestamp would be 5s, using the original dts field you would
>> get the second header having a value of 10s there. Or am I
>> misunderstanding something here?
>
> The FLV file (on disk) format requires the initial DTS to be zero. For FLV
> over RTMP this is not the case.
>

Oh, interesting... makes sense, of course, for continuing a stream
relative to the last timestamp of the previous ingest.

> At one point the flv muxer handled that itself, but that changed to handle
> copyts better.[2]
>
> For non-zero starting DTS should header packets have non-zero
> timestamp? Maybe but that's the case irrespective of weather or
> not we offset late extradata. Meanwhile many popular demuxers
> (avformat included) special case initial extradata.

Interesting, so in theory we should be making the initial
initialization data to be according to the DTS according to your
reading? Or is the initial extradata packet always supposed to be
zero, irrelevant of the actual initial content DTS?

As soon as these questions can be answered I think we've got the ball
rolling on getting this in. As I mentioned already, this is an
improvement.


Best regards,
Jan
Jan Ekström May 10, 2018, 11:15 p.m. UTC | #12
On Fri, May 11, 2018 at 1:48 AM, Alex Converse <alex.converse@gmail.com> wrote:
>
> Sorry about the lateness on my part. Were there any later comments. I
> haven't seen any. (And yes I can push this myself when all the
> concerns are resolved).

It's OK, it has been a rather busy time for me as well.

I was already thinking if you had given up on this and if I should
just apply the changes I thought were correct and re-post it for
review myself:
* making the function take in int64_t.
* DTS is most likely what's wanted so no change there.
* most likely FLV never used negative timestamps (even though the
value is signed) so:
  1.wrap-around at the 31 bits for the signed integer value, and then
  2. keep the 0x7F because the topmost bit of a signed field would
never be used.
* try to figure out if the initial metadata packet should specifically
be starting with a zero timestamp, or the initial DTS of the input?

Jan
Alex Converse May 11, 2018, 1:01 a.m. UTC | #13
On Thu, May 10, 2018 at 4:08 PM, Jan Ekström <jeebjp@gmail.com> wrote:
> On Fri, May 11, 2018 at 1:39 AM, Alex Converse <alex.converse@gmail.com> wrote:
>> On Thu, May 3, 2018 at 10:58 AM, Jan Ekström <jeebjp@gmail.com> wrote:
>>>
>>> Hi,
>>>
>>> While the timestamp field is 8+24=32 bits, the DTS value is int64_ts.
>>> Thus, while I'm really bad at doing wrap-around logic, wouldn't it be
>>> something a la ´uint32_t wrapped_timestamp = (uint32_t)(ts %
>>> UINT32_MAX)`?
>>>
>>
>> See the note about timestamps below.
>>
>>> Additionally, I did briefly go through E.4.1 (FLV Tag) in the spec and
>>> it doesn't really seem to define whether this value is PTS or DTS...
>>> Is this defined anywhere proper? Given FLV's age I wouldn't be
>>> surprised that the answer would be "effectively DTS", of course. But
>>> if you've seen what  Adobe's implementation writes with B-frames it'd
>>> be interesting to see.
>>>
>>
>> FLV AVC packets have a field called CompositionTime described to match
>> 14496-12. FLV requires this composition offset to be zero for sequence header
>> type packets. This strongly indicates to me that we want DTS here. In addition
>> the current muxer and demuxer already use dts pretty consistently for this
>> value (and use cts = pts - dts).
>>
>
> Alright, so dts it is... That only leaves out the fact that we're
> stuffing int64_t into an "unsigned int", which most likely will not
> warn on 64bit linux, but most likely will on various architectures
> and/or systems.
>
> So we:
> 1) take in the dts as int64_t.
> 2) apply wrap-over at either 32 or 31 bits depending on if the signed
> value is ever supposed to be negative. Depends on how wrap-around in
> FLV is supposed to be handled (aka "Is the negative area ever supposed
> to be used from the 32bit signed integer field?").
> 3) Write it there.
>
>>
>>> >      int64_t data_size;
>>> >      AVIOContext *pb = s->pb;
>>> >      FLVContext *flv = s->priv_data;
>>> > @@ -492,8 +492,8 @@ static void flv_write_codec_header(AVFormatContext* s, AVCodecParameters* par) {
>>> >                  par->codec_type == AVMEDIA_TYPE_VIDEO ?
>>> >                          FLV_TAG_TYPE_VIDEO : FLV_TAG_TYPE_AUDIO);
>>> >          avio_wb24(pb, 0); // size patched later
>>> > -        avio_wb24(pb, 0); // ts
>>> > -        avio_w8(pb, 0);   // ts ext
>>> > +        avio_wb24(pb, ts & 0xFFFFFF);
>>> > +        avio_w8(pb, (ts >> 24) & 0x7F);
>>>
>>> Looking at the spec this is bottom 24 bits of the timestamp and top 8,
>>> thus why is the second one not & 0xFF ?
>>>
>>
>> The mapping from dts to the flv timestamp we write to the stream is identical
>> for to what we do for non header packets. [1] I can pull this into a separate
>> function but it's only two lines so it didn't seem worth while.
>
> Yes, I think I commented on this that I hadn't noticed the signedness
> of the value there :) So this is 100% a-OK.
>
>>
>>> >          avio_wb24(pb, 0); // streamid
>>> >          pos = avio_tell(pb);
>>> >          if (par->codec_id == AV_CODEC_ID_AAC) {
>>> > @@ -756,7 +756,7 @@ static int flv_write_header(AVFormatContext *s)
>>> >      }
>>> >
>>> >      for (i = 0; i < s->nb_streams; i++) {
>>> > -        flv_write_codec_header(s, s->streams[i]->codecpar);
>>> > +        flv_write_codec_header(s, s->streams[i]->codecpar, 0);
>>> >      }
>>> >
>>>
>>> Is it customary that even if you've got a live stream going, that the
>>> start point of a given ingest is always zero? In that case you might
>>> want to take mention of the initial dts, and then write offsets
>>> compared to that? Otherwise if you have an initial offset of, say, 5s,
>>> and then you a 5s GOP with an update, then while the difference to the
>>> initial timestamp would be 5s, using the original dts field you would
>>> get the second header having a value of 10s there. Or am I
>>> misunderstanding something here?
>>
>> The FLV file (on disk) format requires the initial DTS to be zero. For FLV
>> over RTMP this is not the case.
>>
>
> Oh, interesting... makes sense, of course, for continuing a stream
> relative to the last timestamp of the previous ingest.
>
>> At one point the flv muxer handled that itself, but that changed to handle
>> copyts better.[2]
>>
>> For non-zero starting DTS should header packets have non-zero
>> timestamp? Maybe but that's the case irrespective of weather or
>> not we offset late extradata. Meanwhile many popular demuxers
>> (avformat included) special case initial extradata.
>
> Interesting, so in theory we should be making the initial
> initialization data to be according to the DTS according to your
> reading? Or is the initial extradata packet always supposed to be
> zero, irrelevant of the actual initial content DTS?
>
> As soon as these questions can be answered I think we've got the ball
> rolling on getting this in. As I mentioned already, this is an
> improvement.
>

There are some subtle differences in how FLV timestamps are handled on the
wire in rtmp vs in files, and I think that's what's creating most of
he complexity here.

In RTMP timestamps can start at any value, are unsigned, and have 32-bit
roll over semantics.

In FLV files timestamps "always" begin at 0 and are 32-bit "signed" but the
spec doesn't give any guidance on how negative values should be interpreted
or what to do in the case that the (logical) timestamp of a packet is
greater than
(signed) INT32_MAX.

Both of these specs are a little underspecified in my opinion and the
behavior of
popular implementations should probably be considered.

On Thu, May 10, 2018 at 4:15 PM, Jan Ekström <jeebjp@gmail.com> wrote:
> On Fri, May 11, 2018 at 1:48 AM, Alex Converse <alex.converse@gmail.com> wrote:
>>
>> Sorry about the lateness on my part. Were there any later comments. I
>> haven't seen any. (And yes I can push this myself when all the
>> concerns are resolved).
>
> It's OK, it has been a rather busy time for me as well.
>
> I was already thinking if you had given up on this and if I should
> just apply the changes I thought were correct and re-post it for
> review myself:

I think changes to broader FLV timestamp semantics ought to be in
separate patches.

> * making the function take in int64_t.
> * DTS is most likely what's wanted so no change there.
> * most likely FLV never used negative timestamps (even though the
> value is signed) so:
>   1.wrap-around at the 31 bits for the signed integer value, and then

I have some doubts that this behavior is correct. In particular it
breaks the copyts behavior that the muxer tries to respect. 31-bit
wrap around is not a concept that any of the specs mention. How do
players handle it.

>   2. keep the 0x7F because the topmost bit of a signed field would
> never be used.
> * try to figure out if the initial metadata packet should specifically
> be starting with a zero timestamp, or the initial DTS of the input?

This maybe different for file-flv and rtmp-flv.

Best Regards,
Alex
diff mbox

Patch

diff --git a/libavformat/flvenc.c b/libavformat/flvenc.c
index e8af48cb64..827d798a61 100644
--- a/libavformat/flvenc.c
+++ b/libavformat/flvenc.c
@@ -480,7 +480,7 @@  static int unsupported_codec(AVFormatContext *s,
     return AVERROR(ENOSYS);
 }
 
-static void flv_write_codec_header(AVFormatContext* s, AVCodecParameters* par) {
+static void flv_write_codec_header(AVFormatContext* s, AVCodecParameters* par, unsigned int ts) {
     int64_t data_size;
     AVIOContext *pb = s->pb;
     FLVContext *flv = s->priv_data;
@@ -492,8 +492,8 @@  static void flv_write_codec_header(AVFormatContext* s, AVCodecParameters* par) {
                 par->codec_type == AVMEDIA_TYPE_VIDEO ?
                         FLV_TAG_TYPE_VIDEO : FLV_TAG_TYPE_AUDIO);
         avio_wb24(pb, 0); // size patched later
-        avio_wb24(pb, 0); // ts
-        avio_w8(pb, 0);   // ts ext
+        avio_wb24(pb, ts & 0xFFFFFF);
+        avio_w8(pb, (ts >> 24) & 0x7F);
         avio_wb24(pb, 0); // streamid
         pos = avio_tell(pb);
         if (par->codec_id == AV_CODEC_ID_AAC) {
@@ -756,7 +756,7 @@  static int flv_write_header(AVFormatContext *s)
     }
 
     for (i = 0; i < s->nb_streams; i++) {
-        flv_write_codec_header(s, s->streams[i]->codecpar);
+        flv_write_codec_header(s, s->streams[i]->codecpar, 0);
     }
 
     flv->datastart_offset = avio_tell(pb);
@@ -900,7 +900,7 @@  static int flv_write_packet(AVFormatContext *s, AVPacket *pkt)
             }
             memcpy(par->extradata, side, side_size);
             par->extradata_size = side_size;
-            flv_write_codec_header(s, par);
+            flv_write_codec_header(s, par, pkt->dts);
         }
     }