diff mbox

[FFmpeg-devel] flvdec: option for dropping negative CTS frames Initial frames with negative pts can produce video/audio desynchronization when a decoder is not prepared to handle negative pts. For example: QSV transcoding from RTMP Wowza server

Message ID 1491413370-18163-1-git-send-email-felipe@astroza.cl
State New
Headers show

Commit Message

Felipe Astroza Araya April 5, 2017, 5:29 p.m. UTC
From: Felipe Astroza <felipe@astroza.cl>

Signed-off-by: Felipe Astroza <felipe@astroza.cl>
---
 libavformat/flvdec.c | 14 +++++++++++---
 1 file changed, 11 insertions(+), 3 deletions(-)

Comments

wm4 April 5, 2017, 6:35 p.m. UTC | #1
On Wed,  5 Apr 2017 14:29:30 -0300
felipe@astroza.cl wrote:

> From: Felipe Astroza <felipe@astroza.cl>
> 
> Signed-off-by: Felipe Astroza <felipe@astroza.cl>
> ---
>  libavformat/flvdec.c | 14 +++++++++++---
>  1 file changed, 11 insertions(+), 3 deletions(-)
> 
> diff --git a/libavformat/flvdec.c b/libavformat/flvdec.c
> index 3959a36..1556fe0 100644
> --- a/libavformat/flvdec.c
> +++ b/libavformat/flvdec.c
> @@ -44,6 +44,7 @@
>  typedef struct FLVContext {
>      const AVClass *class; ///< Class for private options.
>      int trust_metadata;   ///< configure streams according onMetaData
> +    int drop_negative_cts;///< drop frames if cts is negative
>      int wrong_dts;        ///< wrong dts due to negative cts
>      uint8_t *new_extradata[FLV_STREAM_TYPE_NB];
>      int new_extradata_size[FLV_STREAM_TYPE_NB];
> @@ -1139,10 +1140,16 @@ retry_duration:
>              int32_t cts = (avio_rb24(s->pb) + 0xff800000) ^ 0xff800000;
>              pts = dts + cts;
>              if (cts < 0) { // dts might be wrong
> -                if (!flv->wrong_dts)
> +                if (flv->drop_negative_cts) {
>                      av_log(s, AV_LOG_WARNING,
> -                        "Negative cts, previous timestamps might be wrong.\n");
> -                flv->wrong_dts = 1;
> +                            "Negative cts, frames will be dropped.\n");
> +                    dts = pts = AV_NOPTS_VALUE;
> +                } else {
> +                    if (!flv->wrong_dts)
> +                        av_log(s, AV_LOG_WARNING,
> +                            "Negative cts, previous timestamps might be wrong.\n");
> +                    flv->wrong_dts = 1;
> +                }
>              } else if (FFABS(dts - pts) > 1000*60*15) {
>                  av_log(s, AV_LOG_WARNING,
>                         "invalid timestamps %"PRId64" %"PRId64"\n", dts, pts);
> @@ -1253,6 +1260,7 @@ static int flv_read_seek(AVFormatContext *s, int stream_index,
>  #define VD AV_OPT_FLAG_VIDEO_PARAM | AV_OPT_FLAG_DECODING_PARAM
>  static const AVOption options[] = {
>      { "flv_metadata", "Allocate streams according to the onMetaData array", OFFSET(trust_metadata), AV_OPT_TYPE_BOOL, { .i64 = 0 }, 0, 1, VD },
> +    { "flv_drop_negative_cts", "Drop frames with negative composition timestamp", OFFSET(drop_negative_cts), AV_OPT_TYPE_BOOL, { .i64 = 0 }, 0, 1, VD },
>      { "missing_streams", "", OFFSET(missing_streams), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, 0xFF, VD | AV_OPT_FLAG_EXPORT | AV_OPT_FLAG_READONLY },
>      { NULL }
>  };

This seems all kind of wrong. You don't add a hack to a single demuxer
just because a single decoder can't handle unusual things in "some"
files. You don't add it as option either. (If this is a "fix my problem
the easiest way" hack, you should probably keep it in your own ffmpeg
branch.)

Why do the negative CTS happen, and what are their semantics?

Is this just the audio delay?
Felipe Astroza Araya April 5, 2017, 8:15 p.m. UTC | #2
2017-04-05 15:35 GMT-03:00 wm4 <nfxjfg@googlemail.com>:

> On Wed,  5 Apr 2017 14:29:30 -0300
> felipe@astroza.cl wrote:
>
> > From: Felipe Astroza <felipe@astroza.cl>
> >
> > Signed-off-by: Felipe Astroza <felipe@astroza.cl>
> > ---
> >  libavformat/flvdec.c | 14 +++++++++++---
> >  1 file changed, 11 insertions(+), 3 deletions(-)
> >
> > diff --git a/libavformat/flvdec.c b/libavformat/flvdec.c
> > index 3959a36..1556fe0 100644
> > --- a/libavformat/flvdec.c
> > +++ b/libavformat/flvdec.c
> > @@ -44,6 +44,7 @@
> >  typedef struct FLVContext {
> >      const AVClass *class; ///< Class for private options.
> >      int trust_metadata;   ///< configure streams according onMetaData
> > +    int drop_negative_cts;///< drop frames if cts is negative
> >      int wrong_dts;        ///< wrong dts due to negative cts
> >      uint8_t *new_extradata[FLV_STREAM_TYPE_NB];
> >      int new_extradata_size[FLV_STREAM_TYPE_NB];
> > @@ -1139,10 +1140,16 @@ retry_duration:
> >              int32_t cts = (avio_rb24(s->pb) + 0xff800000) ^ 0xff800000;
> >              pts = dts + cts;
> >              if (cts < 0) { // dts might be wrong
> > -                if (!flv->wrong_dts)
> > +                if (flv->drop_negative_cts) {
> >                      av_log(s, AV_LOG_WARNING,
> > -                        "Negative cts, previous timestamps might be
> wrong.\n");
> > -                flv->wrong_dts = 1;
> > +                            "Negative cts, frames will be dropped.\n");
> > +                    dts = pts = AV_NOPTS_VALUE;
> > +                } else {
> > +                    if (!flv->wrong_dts)
> > +                        av_log(s, AV_LOG_WARNING,
> > +                            "Negative cts, previous timestamps might be
> wrong.\n");
> > +                    flv->wrong_dts = 1;
> > +                }
> >              } else if (FFABS(dts - pts) > 1000*60*15) {
> >                  av_log(s, AV_LOG_WARNING,
> >                         "invalid timestamps %"PRId64" %"PRId64"\n", dts,
> pts);
> > @@ -1253,6 +1260,7 @@ static int flv_read_seek(AVFormatContext *s, int
> stream_index,
> >  #define VD AV_OPT_FLAG_VIDEO_PARAM | AV_OPT_FLAG_DECODING_PARAM
> >  static const AVOption options[] = {
> >      { "flv_metadata", "Allocate streams according to the onMetaData
> array", OFFSET(trust_metadata), AV_OPT_TYPE_BOOL, { .i64 = 0 }, 0, 1, VD },
> > +    { "flv_drop_negative_cts", "Drop frames with negative composition
> timestamp", OFFSET(drop_negative_cts), AV_OPT_TYPE_BOOL, { .i64 = 0 }, 0,
> 1, VD },
> >      { "missing_streams", "", OFFSET(missing_streams), AV_OPT_TYPE_INT,
> { .i64 = 0 }, 0, 0xFF, VD | AV_OPT_FLAG_EXPORT | AV_OPT_FLAG_READONLY },
> >      { NULL }
> >  };
>
> This seems all kind of wrong. You don't add a hack to a single demuxer
> just because a single decoder can't handle unusual things in "some"
> files. You don't add it as option either. (If this is a "fix my problem
> the easiest way" hack, you should probably keep it in your own ffmpeg
> branch.)
>
> It was the way I found to avoid the initial frames without a preceding
keyframe (marked with pts < 0) that RTMP wowza server sends in live
streams, just cover flv format case :/. And yes yes, you're right, this is
a hack because of I was not able to patch QSV decoder.

h264_qsv decoder -> h264_qsv encoder produces a video delayed output
h264_qsv decoder -> libx264 encoder produces a video delayed output
libx264 decoder -> libx264 encoder produces a right output

h264_qsv is the source of my issues. I was passing -itsoffset CONSTANT(0.5
in my case) as workaround but it works 90% of the time and I just want a
definitive solution.

Maybe this option do not deserve be part of master but, maybe useful for
people trying to work with QSV transcoding

Why do the negative CTS happen, and what are their semantics?
>
>  Context: https://patches.libav.org/patch/56714/

"Some streaming servers (such as wowza) send all preceding packets in
the current GOP (to allow starting decoding immediately, instead of
waiting for the next keyframe)"

Is this just the audio delay?


Video delay, audio goes first

> _______________________________________________
> ffmpeg-devel mailing list
> ffmpeg-devel@ffmpeg.org
> http://ffmpeg.org/mailman/listinfo/ffmpeg-devel
>
wm4 April 6, 2017, 5 a.m. UTC | #3
On Wed, 5 Apr 2017 17:15:26 -0300
Felipe Astroza <felipe@astroza.cl> wrote:

> 2017-04-05 15:35 GMT-03:00 wm4 <nfxjfg@googlemail.com>:
> 
> > On Wed,  5 Apr 2017 14:29:30 -0300
> > felipe@astroza.cl wrote:
> >  
> > > From: Felipe Astroza <felipe@astroza.cl>
> > >
> > > Signed-off-by: Felipe Astroza <felipe@astroza.cl>
> > > ---
> > >  libavformat/flvdec.c | 14 +++++++++++---
> > >  1 file changed, 11 insertions(+), 3 deletions(-)
> > >
> > > diff --git a/libavformat/flvdec.c b/libavformat/flvdec.c
> > > index 3959a36..1556fe0 100644
> > > --- a/libavformat/flvdec.c
> > > +++ b/libavformat/flvdec.c
> > > @@ -44,6 +44,7 @@
> > >  typedef struct FLVContext {
> > >      const AVClass *class; ///< Class for private options.
> > >      int trust_metadata;   ///< configure streams according onMetaData
> > > +    int drop_negative_cts;///< drop frames if cts is negative
> > >      int wrong_dts;        ///< wrong dts due to negative cts
> > >      uint8_t *new_extradata[FLV_STREAM_TYPE_NB];
> > >      int new_extradata_size[FLV_STREAM_TYPE_NB];
> > > @@ -1139,10 +1140,16 @@ retry_duration:
> > >              int32_t cts = (avio_rb24(s->pb) + 0xff800000) ^ 0xff800000;
> > >              pts = dts + cts;
> > >              if (cts < 0) { // dts might be wrong
> > > -                if (!flv->wrong_dts)
> > > +                if (flv->drop_negative_cts) {
> > >                      av_log(s, AV_LOG_WARNING,
> > > -                        "Negative cts, previous timestamps might be  
> > wrong.\n");  
> > > -                flv->wrong_dts = 1;
> > > +                            "Negative cts, frames will be dropped.\n");
> > > +                    dts = pts = AV_NOPTS_VALUE;
> > > +                } else {
> > > +                    if (!flv->wrong_dts)
> > > +                        av_log(s, AV_LOG_WARNING,
> > > +                            "Negative cts, previous timestamps might be  
> > wrong.\n");  
> > > +                    flv->wrong_dts = 1;
> > > +                }
> > >              } else if (FFABS(dts - pts) > 1000*60*15) {
> > >                  av_log(s, AV_LOG_WARNING,
> > >                         "invalid timestamps %"PRId64" %"PRId64"\n", dts,  
> > pts);  
> > > @@ -1253,6 +1260,7 @@ static int flv_read_seek(AVFormatContext *s, int  
> > stream_index,  
> > >  #define VD AV_OPT_FLAG_VIDEO_PARAM | AV_OPT_FLAG_DECODING_PARAM
> > >  static const AVOption options[] = {
> > >      { "flv_metadata", "Allocate streams according to the onMetaData  
> > array", OFFSET(trust_metadata), AV_OPT_TYPE_BOOL, { .i64 = 0 }, 0, 1, VD },  
> > > +    { "flv_drop_negative_cts", "Drop frames with negative composition  
> > timestamp", OFFSET(drop_negative_cts), AV_OPT_TYPE_BOOL, { .i64 = 0 }, 0,
> > 1, VD },  
> > >      { "missing_streams", "", OFFSET(missing_streams), AV_OPT_TYPE_INT,  
> > { .i64 = 0 }, 0, 0xFF, VD | AV_OPT_FLAG_EXPORT | AV_OPT_FLAG_READONLY },  
> > >      { NULL }
> > >  };  
> >
> > This seems all kind of wrong. You don't add a hack to a single demuxer
> > just because a single decoder can't handle unusual things in "some"
> > files. You don't add it as option either. (If this is a "fix my problem
> > the easiest way" hack, you should probably keep it in your own ffmpeg
> > branch.)
> >
> > It was the way I found to avoid the initial frames without a preceding  
> keyframe (marked with pts < 0) that RTMP wowza server sends in live
> streams, just cover flv format case :/. And yes yes, you're right, this is
> a hack because of I was not able to patch QSV decoder.
> 
> h264_qsv decoder -> h264_qsv encoder produces a video delayed output
> h264_qsv decoder -> libx264 encoder produces a video delayed output
> libx264 decoder -> libx264 encoder produces a right output

There's no libx264 decoder - I assume you mean ffmpeg's native decoder.

> h264_qsv is the source of my issues. I was passing -itsoffset CONSTANT(0.5
> in my case) as workaround but it works 90% of the time and I just want a
> definitive solution.

Did you check whether there's some obvious cause, like due to how qsv
represents timestamps? Also there is no reason to use the qsv
_decoder_. The native ffmpeg decoder + hwaccel will do getter. Anyway,
still legitimate to want to fix qsv, of course.

> Maybe this option do not deserve be part of master but, maybe useful for
> people trying to work with QSV transcoding
> 
> Why do the negative CTS happen, and what are their semantics?
> >
> >  Context: https://patches.libav.org/patch/56714/  
> 
> "Some streaming servers (such as wowza) send all preceding packets in
> the current GOP (to allow starting decoding immediately, instead of
> waiting for the next keyframe)"
> 
> Is this just the audio delay?
> 
> 
> Video delay, audio goes first

That link is helpful. So in theory these frames with negative cts
should be decoded, and then discarded. I'd really look into somehow
checking if qsv can't be made to take these timestamps somehow.
Felipe Astroza Araya April 6, 2017, 5:18 p.m. UTC | #4
2017-04-06 2:00 GMT-03:00 wm4 <nfxjfg@googlemail.com>:

> On Wed, 5 Apr 2017 17:15:26 -0300
> Felipe Astroza <felipe@astroza.cl> wrote:
>
> > 2017-04-05 15:35 GMT-03:00 wm4 <nfxjfg@googlemail.com>:
> >
> > > On Wed,  5 Apr 2017 14:29:30 -0300
> > > felipe@astroza.cl wrote:
> > >
> > > > From: Felipe Astroza <felipe@astroza.cl>
> > > >
> > > > Signed-off-by: Felipe Astroza <felipe@astroza.cl>
> > > > ---
> > > >  libavformat/flvdec.c | 14 +++++++++++---
> > > >  1 file changed, 11 insertions(+), 3 deletions(-)
> > > >
> > > > diff --git a/libavformat/flvdec.c b/libavformat/flvdec.c
> > > > index 3959a36..1556fe0 100644
> > > > --- a/libavformat/flvdec.c
> > > > +++ b/libavformat/flvdec.c
> > > > @@ -44,6 +44,7 @@
> > > >  typedef struct FLVContext {
> > > >      const AVClass *class; ///< Class for private options.
> > > >      int trust_metadata;   ///< configure streams according
> onMetaData
> > > > +    int drop_negative_cts;///< drop frames if cts is negative
> > > >      int wrong_dts;        ///< wrong dts due to negative cts
> > > >      uint8_t *new_extradata[FLV_STREAM_TYPE_NB];
> > > >      int new_extradata_size[FLV_STREAM_TYPE_NB];
> > > > @@ -1139,10 +1140,16 @@ retry_duration:
> > > >              int32_t cts = (avio_rb24(s->pb) + 0xff800000) ^
> 0xff800000;
> > > >              pts = dts + cts;
> > > >              if (cts < 0) { // dts might be wrong
> > > > -                if (!flv->wrong_dts)
> > > > +                if (flv->drop_negative_cts) {
> > > >                      av_log(s, AV_LOG_WARNING,
> > > > -                        "Negative cts, previous timestamps might be
> > > wrong.\n");
> > > > -                flv->wrong_dts = 1;
> > > > +                            "Negative cts, frames will be
> dropped.\n");
> > > > +                    dts = pts = AV_NOPTS_VALUE;
> > > > +                } else {
> > > > +                    if (!flv->wrong_dts)
> > > > +                        av_log(s, AV_LOG_WARNING,
> > > > +                            "Negative cts, previous timestamps
> might be
> > > wrong.\n");
> > > > +                    flv->wrong_dts = 1;
> > > > +                }
> > > >              } else if (FFABS(dts - pts) > 1000*60*15) {
> > > >                  av_log(s, AV_LOG_WARNING,
> > > >                         "invalid timestamps %"PRId64" %"PRId64"\n",
> dts,
> > > pts);
> > > > @@ -1253,6 +1260,7 @@ static int flv_read_seek(AVFormatContext *s,
> int
> > > stream_index,
> > > >  #define VD AV_OPT_FLAG_VIDEO_PARAM | AV_OPT_FLAG_DECODING_PARAM
> > > >  static const AVOption options[] = {
> > > >      { "flv_metadata", "Allocate streams according to the onMetaData
> > > array", OFFSET(trust_metadata), AV_OPT_TYPE_BOOL, { .i64 = 0 }, 0, 1,
> VD },
> > > > +    { "flv_drop_negative_cts", "Drop frames with negative
> composition
> > > timestamp", OFFSET(drop_negative_cts), AV_OPT_TYPE_BOOL, { .i64 = 0 },
> 0,
> > > 1, VD },
> > > >      { "missing_streams", "", OFFSET(missing_streams),
> AV_OPT_TYPE_INT,
> > > { .i64 = 0 }, 0, 0xFF, VD | AV_OPT_FLAG_EXPORT | AV_OPT_FLAG_READONLY
> },
> > > >      { NULL }
> > > >  };
> > >
> > > This seems all kind of wrong. You don't add a hack to a single demuxer
> > > just because a single decoder can't handle unusual things in "some"
> > > files. You don't add it as option either. (If this is a "fix my problem
> > > the easiest way" hack, you should probably keep it in your own ffmpeg
> > > branch.)
> > >
> > > It was the way I found to avoid the initial frames without a preceding
> > keyframe (marked with pts < 0) that RTMP wowza server sends in live
> > streams, just cover flv format case :/. And yes yes, you're right, this
> is
> > a hack because of I was not able to patch QSV decoder.
> >
> > h264_qsv decoder -> h264_qsv encoder produces a video delayed output
> > h264_qsv decoder -> libx264 encoder produces a video delayed output
> > libx264 decoder -> libx264 encoder produces a right output
>
> There's no libx264 decoder - I assume you mean ffmpeg's native decoder.
>
> > h264_qsv is the source of my issues. I was passing -itsoffset
> CONSTANT(0.5
> > in my case) as workaround but it works 90% of the time and I just want a
> > definitive solution.
>
> Did you check whether there's some obvious cause, like due to how qsv
> represents timestamps? Also there is no reason to use the qsv
> _decoder_. The native ffmpeg decoder + hwaccel will do getter. Anyway,
> still legitimate to want to fix qsv, of course.
>

I'm not sure of that. Reading input at native frame rate:

* h264 native decoder -> h264_qsv encoder (needs hwcontext)
command: ffmpeg -re -loglevel verbose -hwaccel qsv -qsv_device
/dev/dri/renderD129 -i INPUT -c:v h264_qsv -look_ahead 0 -profile:v high
-preset:v veryfast -bufsize 1000k -r 30 -b:v 3440800 -maxrate 3440800 -c:a
aac test.mp4

Stream mapping:
  Stream #0:1 -> #0:0 (h264 (native) -> h264 (h264_qsv))
  Stream #0:2 -> #0:1 (aac (native) -> aac (native))
[h264_qsv @ 0x25052a0] Warning in encoder initialization: partial
acceleration (4)

*CPU utilization is 125%*

* h264_qsv decoder -> h264_qsv encoder
command: ffmpeg -re -loglevel verbose -hwaccel qsv -qsv_device
/dev/dri/renderD129 -c:v h264_qsv -i INPUT -c:v h264_qsv -look_ahead 0
-profile:v high -preset:v veryfast -bufsize 1000k -r 30 -b:v 3440800
-maxrate 3440800 -c:a aac test.mp4

Stream mapping:
  Stream #0:1 -> #0:0 (h264 (h264_qsv) -> h264 (h264_qsv))
  Stream #0:2 -> #0:1 (aac (native) -> aac (native))

*CPU utilization is 22%*

I am using with QSV to take off load from CPU and native decoding does not
help.

>
> > Maybe this option do not deserve be part of master but, maybe useful for
> > people trying to work with QSV transcoding
> >
> > Why do the negative CTS happen, and what are their semantics?
> > >
> > >  Context: https://patches.libav.org/patch/56714/
> >
> > "Some streaming servers (such as wowza) send all preceding packets in
> > the current GOP (to allow starting decoding immediately, instead of
> > waiting for the next keyframe)"
> >
> > Is this just the audio delay?
> >
> >
> > Video delay, audio goes first
>
> That link is helpful. So in theory these frames with negative cts
> should be decoded, and then discarded. I'd really look into somehow
> checking if qsv can't be made to take these timestamps somehow.
>
I'll look that

> _______________________________________________
> ffmpeg-devel mailing list
> ffmpeg-devel@ffmpeg.org
> http://ffmpeg.org/mailman/listinfo/ffmpeg-devel
>
wm4 April 6, 2017, 5:28 p.m. UTC | #5
On Thu, 6 Apr 2017 14:18:20 -0300
Felipe Astroza <felipe@astroza.cl> wrote:

> 2017-04-06 2:00 GMT-03:00 wm4 <nfxjfg@googlemail.com>:
> 
> > On Wed, 5 Apr 2017 17:15:26 -0300
> > Felipe Astroza <felipe@astroza.cl> wrote:
> >  
> > > 2017-04-05 15:35 GMT-03:00 wm4 <nfxjfg@googlemail.com>:
> > >  
> > > > On Wed,  5 Apr 2017 14:29:30 -0300
> > > > felipe@astroza.cl wrote:
> > > >  
> > > > > From: Felipe Astroza <felipe@astroza.cl>
> > > > >
> > > > > Signed-off-by: Felipe Astroza <felipe@astroza.cl>
> > > > > ---
> > > > >  libavformat/flvdec.c | 14 +++++++++++---
> > > > >  1 file changed, 11 insertions(+), 3 deletions(-)
> > > > >
> > > > > diff --git a/libavformat/flvdec.c b/libavformat/flvdec.c
> > > > > index 3959a36..1556fe0 100644
> > > > > --- a/libavformat/flvdec.c
> > > > > +++ b/libavformat/flvdec.c
> > > > > @@ -44,6 +44,7 @@
> > > > >  typedef struct FLVContext {
> > > > >      const AVClass *class; ///< Class for private options.
> > > > >      int trust_metadata;   ///< configure streams according  
> > onMetaData  
> > > > > +    int drop_negative_cts;///< drop frames if cts is negative
> > > > >      int wrong_dts;        ///< wrong dts due to negative cts
> > > > >      uint8_t *new_extradata[FLV_STREAM_TYPE_NB];
> > > > >      int new_extradata_size[FLV_STREAM_TYPE_NB];
> > > > > @@ -1139,10 +1140,16 @@ retry_duration:
> > > > >              int32_t cts = (avio_rb24(s->pb) + 0xff800000) ^  
> > 0xff800000;  
> > > > >              pts = dts + cts;
> > > > >              if (cts < 0) { // dts might be wrong
> > > > > -                if (!flv->wrong_dts)
> > > > > +                if (flv->drop_negative_cts) {
> > > > >                      av_log(s, AV_LOG_WARNING,
> > > > > -                        "Negative cts, previous timestamps might be  
> > > > wrong.\n");  
> > > > > -                flv->wrong_dts = 1;
> > > > > +                            "Negative cts, frames will be  
> > dropped.\n");  
> > > > > +                    dts = pts = AV_NOPTS_VALUE;
> > > > > +                } else {
> > > > > +                    if (!flv->wrong_dts)
> > > > > +                        av_log(s, AV_LOG_WARNING,
> > > > > +                            "Negative cts, previous timestamps  
> > might be  
> > > > wrong.\n");  
> > > > > +                    flv->wrong_dts = 1;
> > > > > +                }
> > > > >              } else if (FFABS(dts - pts) > 1000*60*15) {
> > > > >                  av_log(s, AV_LOG_WARNING,
> > > > >                         "invalid timestamps %"PRId64" %"PRId64"\n",  
> > dts,  
> > > > pts);  
> > > > > @@ -1253,6 +1260,7 @@ static int flv_read_seek(AVFormatContext *s,  
> > int  
> > > > stream_index,  
> > > > >  #define VD AV_OPT_FLAG_VIDEO_PARAM | AV_OPT_FLAG_DECODING_PARAM
> > > > >  static const AVOption options[] = {
> > > > >      { "flv_metadata", "Allocate streams according to the onMetaData  
> > > > array", OFFSET(trust_metadata), AV_OPT_TYPE_BOOL, { .i64 = 0 }, 0, 1,  
> > VD },  
> > > > > +    { "flv_drop_negative_cts", "Drop frames with negative  
> > composition  
> > > > timestamp", OFFSET(drop_negative_cts), AV_OPT_TYPE_BOOL, { .i64 = 0 },  
> > 0,  
> > > > 1, VD },  
> > > > >      { "missing_streams", "", OFFSET(missing_streams),  
> > AV_OPT_TYPE_INT,  
> > > > { .i64 = 0 }, 0, 0xFF, VD | AV_OPT_FLAG_EXPORT | AV_OPT_FLAG_READONLY  
> > },  
> > > > >      { NULL }
> > > > >  };  
> > > >
> > > > This seems all kind of wrong. You don't add a hack to a single demuxer
> > > > just because a single decoder can't handle unusual things in "some"
> > > > files. You don't add it as option either. (If this is a "fix my problem
> > > > the easiest way" hack, you should probably keep it in your own ffmpeg
> > > > branch.)
> > > >
> > > > It was the way I found to avoid the initial frames without a preceding  
> > > keyframe (marked with pts < 0) that RTMP wowza server sends in live
> > > streams, just cover flv format case :/. And yes yes, you're right, this  
> > is  
> > > a hack because of I was not able to patch QSV decoder.
> > >
> > > h264_qsv decoder -> h264_qsv encoder produces a video delayed output
> > > h264_qsv decoder -> libx264 encoder produces a video delayed output
> > > libx264 decoder -> libx264 encoder produces a right output  
> >
> > There's no libx264 decoder - I assume you mean ffmpeg's native decoder.
> >  
> > > h264_qsv is the source of my issues. I was passing -itsoffset  
> > CONSTANT(0.5  
> > > in my case) as workaround but it works 90% of the time and I just want a
> > > definitive solution.  
> >
> > Did you check whether there's some obvious cause, like due to how qsv
> > represents timestamps? Also there is no reason to use the qsv
> > _decoder_. The native ffmpeg decoder + hwaccel will do getter. Anyway,
> > still legitimate to want to fix qsv, of course.
> >  
> 
> I'm not sure of that. Reading input at native frame rate:
> 
> * h264 native decoder -> h264_qsv encoder (needs hwcontext)
> command: ffmpeg -re -loglevel verbose -hwaccel qsv -qsv_device
> /dev/dri/renderD129 -i INPUT -c:v h264_qsv -look_ahead 0 -profile:v high
> -preset:v veryfast -bufsize 1000k -r 30 -b:v 3440800 -maxrate 3440800 -c:a
> aac test.mp4
> 
> Stream mapping:
>   Stream #0:1 -> #0:0 (h264 (native) -> h264 (h264_qsv))
>   Stream #0:2 -> #0:1 (aac (native) -> aac (native))
> [h264_qsv @ 0x25052a0] Warning in encoder initialization: partial
> acceleration (4)
> 
> *CPU utilization is 125%*
> 
> * h264_qsv decoder -> h264_qsv encoder
> command: ffmpeg -re -loglevel verbose -hwaccel qsv -qsv_device
> /dev/dri/renderD129 -c:v h264_qsv -i INPUT -c:v h264_qsv -look_ahead 0
> -profile:v high -preset:v veryfast -bufsize 1000k -r 30 -b:v 3440800
> -maxrate 3440800 -c:a aac test.mp4
> 
> Stream mapping:
>   Stream #0:1 -> #0:0 (h264 (h264_qsv) -> h264 (h264_qsv))
>   Stream #0:2 -> #0:1 (aac (native) -> aac (native))
> 
> *CPU utilization is 22%*
> 
> I am using with QSV to take off load from CPU and native decoding does not
> help.

That doesn't use hwaccel. "-hwaccel qsv" will do nothing with the
native decoder.

I'm not sure yet whether the frame mapping stuff (for vaapi->qsv
encoding) is ported from Libav yet, or how it works. Maybe Mark
Thompson can say something about the expected performance.
diff mbox

Patch

diff --git a/libavformat/flvdec.c b/libavformat/flvdec.c
index 3959a36..1556fe0 100644
--- a/libavformat/flvdec.c
+++ b/libavformat/flvdec.c
@@ -44,6 +44,7 @@ 
 typedef struct FLVContext {
     const AVClass *class; ///< Class for private options.
     int trust_metadata;   ///< configure streams according onMetaData
+    int drop_negative_cts;///< drop frames if cts is negative
     int wrong_dts;        ///< wrong dts due to negative cts
     uint8_t *new_extradata[FLV_STREAM_TYPE_NB];
     int new_extradata_size[FLV_STREAM_TYPE_NB];
@@ -1139,10 +1140,16 @@  retry_duration:
             int32_t cts = (avio_rb24(s->pb) + 0xff800000) ^ 0xff800000;
             pts = dts + cts;
             if (cts < 0) { // dts might be wrong
-                if (!flv->wrong_dts)
+                if (flv->drop_negative_cts) {
                     av_log(s, AV_LOG_WARNING,
-                        "Negative cts, previous timestamps might be wrong.\n");
-                flv->wrong_dts = 1;
+                            "Negative cts, frames will be dropped.\n");
+                    dts = pts = AV_NOPTS_VALUE;
+                } else {
+                    if (!flv->wrong_dts)
+                        av_log(s, AV_LOG_WARNING,
+                            "Negative cts, previous timestamps might be wrong.\n");
+                    flv->wrong_dts = 1;
+                }
             } else if (FFABS(dts - pts) > 1000*60*15) {
                 av_log(s, AV_LOG_WARNING,
                        "invalid timestamps %"PRId64" %"PRId64"\n", dts, pts);
@@ -1253,6 +1260,7 @@  static int flv_read_seek(AVFormatContext *s, int stream_index,
 #define VD AV_OPT_FLAG_VIDEO_PARAM | AV_OPT_FLAG_DECODING_PARAM
 static const AVOption options[] = {
     { "flv_metadata", "Allocate streams according to the onMetaData array", OFFSET(trust_metadata), AV_OPT_TYPE_BOOL, { .i64 = 0 }, 0, 1, VD },
+    { "flv_drop_negative_cts", "Drop frames with negative composition timestamp", OFFSET(drop_negative_cts), AV_OPT_TYPE_BOOL, { .i64 = 0 }, 0, 1, VD },
     { "missing_streams", "", OFFSET(missing_streams), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, 0xFF, VD | AV_OPT_FLAG_EXPORT | AV_OPT_FLAG_READONLY },
     { NULL }
 };