diff mbox

[FFmpeg-devel,5/6] avcodec/qtrle: return last frame even if unchanged

Message ID 20190824181829.25724-5-michael@niedermayer.cc
State New
Headers show

Commit Message

Michael Niedermayer Aug. 24, 2019, 6:18 p.m. UTC
Fixes: Ticket7880

Signed-off-by: Michael Niedermayer <michael@niedermayer.cc>
---
 libavcodec/qtrle.c        | 30 ++++++++++++++++++++++++++----
 tests/ref/fate/qtrle-8bit |  1 +
 2 files changed, 27 insertions(+), 4 deletions(-)

Comments

Paul B Mahol Aug. 25, 2019, 6:38 a.m. UTC | #1
Very ugly non-solution. NAK.

On Sat, Aug 24, 2019 at 8:27 PM Michael Niedermayer <michael@niedermayer.cc>
wrote:

> Fixes: Ticket7880
>
> Signed-off-by: Michael Niedermayer <michael@niedermayer.cc>
> ---
>  libavcodec/qtrle.c        | 30 ++++++++++++++++++++++++++----
>  tests/ref/fate/qtrle-8bit |  1 +
>  2 files changed, 27 insertions(+), 4 deletions(-)
>
> diff --git a/libavcodec/qtrle.c b/libavcodec/qtrle.c
> index 2c29547e5a..c22a1a582d 100644
> --- a/libavcodec/qtrle.c
> +++ b/libavcodec/qtrle.c
> @@ -45,6 +45,7 @@ typedef struct QtrleContext {
>
>      GetByteContext g;
>      uint32_t pal[256];
> +    AVPacket flush_pkt;
>  } QtrleContext;
>
>  #define CHECK_PIXEL_PTR(n)
>             \
> @@ -454,11 +455,27 @@ static int qtrle_decode_frame(AVCodecContext *avctx,
>      int has_palette = 0;
>      int ret, size;
>
> +    if (!avpkt->data) {
> +        if (avctx->internal->need_flush) {
> +            avctx->internal->need_flush = 0;
> +            ret = ff_setup_buffered_frame_for_return(avctx, data,
> s->frame, &s->flush_pkt);
> +            if (ret < 0)
> +                return ret;
> +            *got_frame = 1;
> +        }
> +        return 0;
> +    }
> +    s->flush_pkt = *avpkt;
> +    s->frame->pkt_dts = s->flush_pkt.dts;
> +
>      bytestream2_init(&s->g, avpkt->data, avpkt->size);
>
>      /* check if this frame is even supposed to change */
> -    if (avpkt->size < 8)
> +    if (avpkt->size < 8) {
> +        avctx->internal->need_flush = 1;
>          return avpkt->size;
> +    }
> +    avctx->internal->need_flush = 0;
>
>      /* start after the chunk size */
>      size = bytestream2_get_be32(&s->g) & 0x3FFFFFFF;
> @@ -471,14 +488,18 @@ static int qtrle_decode_frame(AVCodecContext *avctx,
>
>      /* if a header is present, fetch additional decoding parameters */
>      if (header & 0x0008) {
> -        if (avpkt->size < 14)
> +        if (avpkt->size < 14) {
> +            avctx->internal->need_flush = 1;
>              return avpkt->size;
> +        }
>          start_line = bytestream2_get_be16(&s->g);
>          bytestream2_skip(&s->g, 2);
>          height     = bytestream2_get_be16(&s->g);
>          bytestream2_skip(&s->g, 2);
> -        if (height > s->avctx->height - start_line)
> +        if (height > s->avctx->height - start_line) {
> +            avctx->internal->need_flush = 1;
>              return avpkt->size;
> +        }
>      } else {
>          start_line = 0;
>          height     = s->avctx->height;
> @@ -572,5 +593,6 @@ AVCodec ff_qtrle_decoder = {
>      .init           = qtrle_decode_init,
>      .close          = qtrle_decode_end,
>      .decode         = qtrle_decode_frame,
> -    .capabilities   = AV_CODEC_CAP_DR1,
> +    .caps_internal  = FF_CODEC_CAP_SETS_PKT_DTS |
> FF_CODEC_CAP_SETS_PKT_POS,
> +    .capabilities   = AV_CODEC_CAP_DR1 | AV_CODEC_CAP_DELAY,
>  };
> diff --git a/tests/ref/fate/qtrle-8bit b/tests/ref/fate/qtrle-8bit
> index 27bb8aad71..39a03b7b6c 100644
> --- a/tests/ref/fate/qtrle-8bit
> +++ b/tests/ref/fate/qtrle-8bit
> @@ -61,3 +61,4 @@
>  0,        160,        160,        1,   921600, 0xcfd6ad2b
>  0,        163,        163,        1,   921600, 0x3b372379
>  0,        165,        165,        1,   921600, 0x36f245f5
> +0,        166,        166,        1,   921600, 0x36f245f5
> --
> 2.23.0
>
> _______________________________________________
> ffmpeg-devel mailing list
> ffmpeg-devel@ffmpeg.org
> https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
>
> To unsubscribe, visit link above, or email
> ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".
James Almer Aug. 25, 2019, 4:22 p.m. UTC | #2
On 8/24/2019 3:18 PM, Michael Niedermayer wrote:
> Fixes: Ticket7880
> 
> Signed-off-by: Michael Niedermayer <michael@niedermayer.cc>
> ---
>  libavcodec/qtrle.c        | 30 ++++++++++++++++++++++++++----
>  tests/ref/fate/qtrle-8bit |  1 +
>  2 files changed, 27 insertions(+), 4 deletions(-)
> 
> diff --git a/libavcodec/qtrle.c b/libavcodec/qtrle.c
> index 2c29547e5a..c22a1a582d 100644
> --- a/libavcodec/qtrle.c
> +++ b/libavcodec/qtrle.c
> @@ -45,6 +45,7 @@ typedef struct QtrleContext {
>  
>      GetByteContext g;
>      uint32_t pal[256];
> +    AVPacket flush_pkt;
>  } QtrleContext;
>  
>  #define CHECK_PIXEL_PTR(n)                                                            \
> @@ -454,11 +455,27 @@ static int qtrle_decode_frame(AVCodecContext *avctx,
>      int has_palette = 0;
>      int ret, size;
>  
> +    if (!avpkt->data) {
> +        if (avctx->internal->need_flush) {
> +            avctx->internal->need_flush = 0;
> +            ret = ff_setup_buffered_frame_for_return(avctx, data, s->frame, &s->flush_pkt);
> +            if (ret < 0)
> +                return ret;
> +            *got_frame = 1;
> +        }
> +        return 0;
> +    }
> +    s->flush_pkt = *avpkt;
> +    s->frame->pkt_dts = s->flush_pkt.dts;
> +
>      bytestream2_init(&s->g, avpkt->data, avpkt->size);
>  
>      /* check if this frame is even supposed to change */
> -    if (avpkt->size < 8)
> +    if (avpkt->size < 8) {
> +        avctx->internal->need_flush = 1;
>          return avpkt->size;
> +    }
> +    avctx->internal->need_flush = 0;
>  
>      /* start after the chunk size */
>      size = bytestream2_get_be32(&s->g) & 0x3FFFFFFF;
> @@ -471,14 +488,18 @@ static int qtrle_decode_frame(AVCodecContext *avctx,
>  
>      /* if a header is present, fetch additional decoding parameters */
>      if (header & 0x0008) {
> -        if (avpkt->size < 14)
> +        if (avpkt->size < 14) {
> +            avctx->internal->need_flush = 1;
>              return avpkt->size;
> +        }
>          start_line = bytestream2_get_be16(&s->g);
>          bytestream2_skip(&s->g, 2);
>          height     = bytestream2_get_be16(&s->g);
>          bytestream2_skip(&s->g, 2);
> -        if (height > s->avctx->height - start_line)
> +        if (height > s->avctx->height - start_line) {
> +            avctx->internal->need_flush = 1;
>              return avpkt->size;
> +        }
>      } else {
>          start_line = 0;
>          height     = s->avctx->height;
> @@ -572,5 +593,6 @@ AVCodec ff_qtrle_decoder = {
>      .init           = qtrle_decode_init,
>      .close          = qtrle_decode_end,
>      .decode         = qtrle_decode_frame,
> -    .capabilities   = AV_CODEC_CAP_DR1,
> +    .caps_internal  = FF_CODEC_CAP_SETS_PKT_DTS | FF_CODEC_CAP_SETS_PKT_POS,
> +    .capabilities   = AV_CODEC_CAP_DR1 | AV_CODEC_CAP_DELAY,
>  };
> diff --git a/tests/ref/fate/qtrle-8bit b/tests/ref/fate/qtrle-8bit
> index 27bb8aad71..39a03b7b6c 100644
> --- a/tests/ref/fate/qtrle-8bit
> +++ b/tests/ref/fate/qtrle-8bit
> @@ -61,3 +61,4 @@
>  0,        160,        160,        1,   921600, 0xcfd6ad2b
>  0,        163,        163,        1,   921600, 0x3b372379
>  0,        165,        165,        1,   921600, 0x36f245f5
> +0,        166,        166,        1,   921600, 0x36f245f5

Following what i said in the nuv patch, do you still experience timeouts
with the current codebase, or even if you revert commit
a9dacdeea6168787a142209bd19fdd74aefc9dd6? Creating a reference to an
existing frame buffer shouldn't be expensive anymore for the fuzzer
after my ref counting changes to target_dec_fuzzer.c

This is a very ugly solution to a problem that was already solved when
reference counting was introduced. Returning duplicate frames to achieve
cfr in decoders where it's expected shouldn't affect performance.
Michael Niedermayer Aug. 25, 2019, 9:46 p.m. UTC | #3
On Sun, Aug 25, 2019 at 01:22:22PM -0300, James Almer wrote:
> On 8/24/2019 3:18 PM, Michael Niedermayer wrote:
> > Fixes: Ticket7880
> > 
> > Signed-off-by: Michael Niedermayer <michael@niedermayer.cc>
> > ---
> >  libavcodec/qtrle.c        | 30 ++++++++++++++++++++++++++----
> >  tests/ref/fate/qtrle-8bit |  1 +
> >  2 files changed, 27 insertions(+), 4 deletions(-)
> > 
> > diff --git a/libavcodec/qtrle.c b/libavcodec/qtrle.c
> > index 2c29547e5a..c22a1a582d 100644
> > --- a/libavcodec/qtrle.c
> > +++ b/libavcodec/qtrle.c
> > @@ -45,6 +45,7 @@ typedef struct QtrleContext {
> >  
> >      GetByteContext g;
> >      uint32_t pal[256];
> > +    AVPacket flush_pkt;
> >  } QtrleContext;
> >  
> >  #define CHECK_PIXEL_PTR(n)                                                            \
> > @@ -454,11 +455,27 @@ static int qtrle_decode_frame(AVCodecContext *avctx,
> >      int has_palette = 0;
> >      int ret, size;
> >  
> > +    if (!avpkt->data) {
> > +        if (avctx->internal->need_flush) {
> > +            avctx->internal->need_flush = 0;
> > +            ret = ff_setup_buffered_frame_for_return(avctx, data, s->frame, &s->flush_pkt);
> > +            if (ret < 0)
> > +                return ret;
> > +            *got_frame = 1;
> > +        }
> > +        return 0;
> > +    }
> > +    s->flush_pkt = *avpkt;
> > +    s->frame->pkt_dts = s->flush_pkt.dts;
> > +
> >      bytestream2_init(&s->g, avpkt->data, avpkt->size);
> >  
> >      /* check if this frame is even supposed to change */
> > -    if (avpkt->size < 8)
> > +    if (avpkt->size < 8) {
> > +        avctx->internal->need_flush = 1;
> >          return avpkt->size;
> > +    }
> > +    avctx->internal->need_flush = 0;
> >  
> >      /* start after the chunk size */
> >      size = bytestream2_get_be32(&s->g) & 0x3FFFFFFF;
> > @@ -471,14 +488,18 @@ static int qtrle_decode_frame(AVCodecContext *avctx,
> >  
> >      /* if a header is present, fetch additional decoding parameters */
> >      if (header & 0x0008) {
> > -        if (avpkt->size < 14)
> > +        if (avpkt->size < 14) {
> > +            avctx->internal->need_flush = 1;
> >              return avpkt->size;
> > +        }
> >          start_line = bytestream2_get_be16(&s->g);
> >          bytestream2_skip(&s->g, 2);
> >          height     = bytestream2_get_be16(&s->g);
> >          bytestream2_skip(&s->g, 2);
> > -        if (height > s->avctx->height - start_line)
> > +        if (height > s->avctx->height - start_line) {
> > +            avctx->internal->need_flush = 1;
> >              return avpkt->size;
> > +        }
> >      } else {
> >          start_line = 0;
> >          height     = s->avctx->height;
> > @@ -572,5 +593,6 @@ AVCodec ff_qtrle_decoder = {
> >      .init           = qtrle_decode_init,
> >      .close          = qtrle_decode_end,
> >      .decode         = qtrle_decode_frame,
> > -    .capabilities   = AV_CODEC_CAP_DR1,
> > +    .caps_internal  = FF_CODEC_CAP_SETS_PKT_DTS | FF_CODEC_CAP_SETS_PKT_POS,
> > +    .capabilities   = AV_CODEC_CAP_DR1 | AV_CODEC_CAP_DELAY,
> >  };
> > diff --git a/tests/ref/fate/qtrle-8bit b/tests/ref/fate/qtrle-8bit
> > index 27bb8aad71..39a03b7b6c 100644
> > --- a/tests/ref/fate/qtrle-8bit
> > +++ b/tests/ref/fate/qtrle-8bit
> > @@ -61,3 +61,4 @@
> >  0,        160,        160,        1,   921600, 0xcfd6ad2b
> >  0,        163,        163,        1,   921600, 0x3b372379
> >  0,        165,        165,        1,   921600, 0x36f245f5
> > +0,        166,        166,        1,   921600, 0x36f245f5
> 
> Following what i said in the nuv patch, do you still experience timeouts
> with the current codebase, or even if you revert commit
> a9dacdeea6168787a142209bd19fdd74aefc9dd6? Creating a reference to an
> existing frame buffer shouldn't be expensive anymore for the fuzzer
> after my ref counting changes to target_dec_fuzzer.c
> 
> This is a very ugly solution to a problem that was already solved when
> reference counting was introduced. Returning duplicate frames to achieve
> cfr in decoders where it's expected shouldn't affect performance.

Maybe i should ask this backward to make it clearer what this is trying
to fix.

Consider a patch that would return every frame twice with the same ref
of course.
Would you consider this to have 0 performance impact ?
if every filter following needs to process frames twice 2x CPU load
and after the filters references would also not be the same anymore
the following encoder would encoder 2x as many frames 2x CPU load,
bigger file lower quality per bitrate. Also playback of the resulting
file would require more cpu time as it has more frames.

I think that would be considered a very bad patch for its performance
impact.
So if we do the opposite of removing duplicates why is this so
controversal ?

This is not about the fuzzer at all ...

Thanks

[...]
Michael Niedermayer Aug. 25, 2019, 10:14 p.m. UTC | #4
On Sun, Aug 25, 2019 at 11:46:36PM +0200, Michael Niedermayer wrote:
> On Sun, Aug 25, 2019 at 01:22:22PM -0300, James Almer wrote:
> > On 8/24/2019 3:18 PM, Michael Niedermayer wrote:
> > > Fixes: Ticket7880
> > > 
> > > Signed-off-by: Michael Niedermayer <michael@niedermayer.cc>
> > > ---
> > >  libavcodec/qtrle.c        | 30 ++++++++++++++++++++++++++----
> > >  tests/ref/fate/qtrle-8bit |  1 +
> > >  2 files changed, 27 insertions(+), 4 deletions(-)
> > > 
> > > diff --git a/libavcodec/qtrle.c b/libavcodec/qtrle.c
> > > index 2c29547e5a..c22a1a582d 100644
> > > --- a/libavcodec/qtrle.c
> > > +++ b/libavcodec/qtrle.c
> > > @@ -45,6 +45,7 @@ typedef struct QtrleContext {
> > >  
> > >      GetByteContext g;
> > >      uint32_t pal[256];
> > > +    AVPacket flush_pkt;
> > >  } QtrleContext;
> > >  
> > >  #define CHECK_PIXEL_PTR(n)                                                            \
> > > @@ -454,11 +455,27 @@ static int qtrle_decode_frame(AVCodecContext *avctx,
> > >      int has_palette = 0;
> > >      int ret, size;
> > >  
> > > +    if (!avpkt->data) {
> > > +        if (avctx->internal->need_flush) {
> > > +            avctx->internal->need_flush = 0;
> > > +            ret = ff_setup_buffered_frame_for_return(avctx, data, s->frame, &s->flush_pkt);
> > > +            if (ret < 0)
> > > +                return ret;
> > > +            *got_frame = 1;
> > > +        }
> > > +        return 0;
> > > +    }
> > > +    s->flush_pkt = *avpkt;
> > > +    s->frame->pkt_dts = s->flush_pkt.dts;
> > > +
> > >      bytestream2_init(&s->g, avpkt->data, avpkt->size);
> > >  
> > >      /* check if this frame is even supposed to change */
> > > -    if (avpkt->size < 8)
> > > +    if (avpkt->size < 8) {
> > > +        avctx->internal->need_flush = 1;
> > >          return avpkt->size;
> > > +    }
> > > +    avctx->internal->need_flush = 0;
> > >  
> > >      /* start after the chunk size */
> > >      size = bytestream2_get_be32(&s->g) & 0x3FFFFFFF;
> > > @@ -471,14 +488,18 @@ static int qtrle_decode_frame(AVCodecContext *avctx,
> > >  
> > >      /* if a header is present, fetch additional decoding parameters */
> > >      if (header & 0x0008) {
> > > -        if (avpkt->size < 14)
> > > +        if (avpkt->size < 14) {
> > > +            avctx->internal->need_flush = 1;
> > >              return avpkt->size;
> > > +        }
> > >          start_line = bytestream2_get_be16(&s->g);
> > >          bytestream2_skip(&s->g, 2);
> > >          height     = bytestream2_get_be16(&s->g);
> > >          bytestream2_skip(&s->g, 2);
> > > -        if (height > s->avctx->height - start_line)
> > > +        if (height > s->avctx->height - start_line) {
> > > +            avctx->internal->need_flush = 1;
> > >              return avpkt->size;
> > > +        }
> > >      } else {
> > >          start_line = 0;
> > >          height     = s->avctx->height;
> > > @@ -572,5 +593,6 @@ AVCodec ff_qtrle_decoder = {
> > >      .init           = qtrle_decode_init,
> > >      .close          = qtrle_decode_end,
> > >      .decode         = qtrle_decode_frame,
> > > -    .capabilities   = AV_CODEC_CAP_DR1,
> > > +    .caps_internal  = FF_CODEC_CAP_SETS_PKT_DTS | FF_CODEC_CAP_SETS_PKT_POS,
> > > +    .capabilities   = AV_CODEC_CAP_DR1 | AV_CODEC_CAP_DELAY,
> > >  };
> > > diff --git a/tests/ref/fate/qtrle-8bit b/tests/ref/fate/qtrle-8bit
> > > index 27bb8aad71..39a03b7b6c 100644
> > > --- a/tests/ref/fate/qtrle-8bit
> > > +++ b/tests/ref/fate/qtrle-8bit
> > > @@ -61,3 +61,4 @@
> > >  0,        160,        160,        1,   921600, 0xcfd6ad2b
> > >  0,        163,        163,        1,   921600, 0x3b372379
> > >  0,        165,        165,        1,   921600, 0x36f245f5
> > > +0,        166,        166,        1,   921600, 0x36f245f5
> > 
> > Following what i said in the nuv patch, do you still experience timeouts
> > with the current codebase, or even if you revert commit
> > a9dacdeea6168787a142209bd19fdd74aefc9dd6? Creating a reference to an
> > existing frame buffer shouldn't be expensive anymore for the fuzzer
> > after my ref counting changes to target_dec_fuzzer.c
> > 
> > This is a very ugly solution to a problem that was already solved when
> > reference counting was introduced. Returning duplicate frames to achieve
> > cfr in decoders where it's expected shouldn't affect performance.
> 
> Maybe i should ask this backward to make it clearer what this is trying
> to fix.
> 
> Consider a patch that would return every frame twice with the same ref
> of course.
> Would you consider this to have 0 performance impact ?
> if every filter following needs to process frames twice 2x CPU load
> and after the filters references would also not be the same anymore
> the following encoder would encoder 2x as many frames 2x CPU load,
> bigger file lower quality per bitrate. Also playback of the resulting
> file would require more cpu time as it has more frames.
> 
> I think that would be considered a very bad patch for its performance
> impact.
> So if we do the opposite of removing duplicates why is this so
> controversal ?
> 
> This is not about the fuzzer at all ...

Also about the implementation itself.
This can of course be done in countless other ways
for example one can probably detect the duplicate ref somewhere in common
code and then optionally drop the frames.
Or it could be done in a filter, that would then only help applications
using libavfilter too though, ...

Iam not arguing in favor of this specific implementation, rather that
these frames should not be processed multiple times 
Also the terse NAK comments this sort of patches receive and did receive
do not motivate one very much to spend alot of time doing a really
perfect design ...

Thanks

[...]
James Almer Aug. 25, 2019, 11:04 p.m. UTC | #5
On 8/25/2019 6:46 PM, Michael Niedermayer wrote:
> On Sun, Aug 25, 2019 at 01:22:22PM -0300, James Almer wrote:
>> On 8/24/2019 3:18 PM, Michael Niedermayer wrote:
>>> Fixes: Ticket7880
>>>
>>> Signed-off-by: Michael Niedermayer <michael@niedermayer.cc>
>>> ---
>>>  libavcodec/qtrle.c        | 30 ++++++++++++++++++++++++++----
>>>  tests/ref/fate/qtrle-8bit |  1 +
>>>  2 files changed, 27 insertions(+), 4 deletions(-)
>>>
>>> diff --git a/libavcodec/qtrle.c b/libavcodec/qtrle.c
>>> index 2c29547e5a..c22a1a582d 100644
>>> --- a/libavcodec/qtrle.c
>>> +++ b/libavcodec/qtrle.c
>>> @@ -45,6 +45,7 @@ typedef struct QtrleContext {
>>>  
>>>      GetByteContext g;
>>>      uint32_t pal[256];
>>> +    AVPacket flush_pkt;
>>>  } QtrleContext;
>>>  
>>>  #define CHECK_PIXEL_PTR(n)                                                            \
>>> @@ -454,11 +455,27 @@ static int qtrle_decode_frame(AVCodecContext *avctx,
>>>      int has_palette = 0;
>>>      int ret, size;
>>>  
>>> +    if (!avpkt->data) {
>>> +        if (avctx->internal->need_flush) {
>>> +            avctx->internal->need_flush = 0;
>>> +            ret = ff_setup_buffered_frame_for_return(avctx, data, s->frame, &s->flush_pkt);
>>> +            if (ret < 0)
>>> +                return ret;
>>> +            *got_frame = 1;
>>> +        }
>>> +        return 0;
>>> +    }
>>> +    s->flush_pkt = *avpkt;
>>> +    s->frame->pkt_dts = s->flush_pkt.dts;
>>> +
>>>      bytestream2_init(&s->g, avpkt->data, avpkt->size);
>>>  
>>>      /* check if this frame is even supposed to change */
>>> -    if (avpkt->size < 8)
>>> +    if (avpkt->size < 8) {
>>> +        avctx->internal->need_flush = 1;
>>>          return avpkt->size;
>>> +    }
>>> +    avctx->internal->need_flush = 0;
>>>  
>>>      /* start after the chunk size */
>>>      size = bytestream2_get_be32(&s->g) & 0x3FFFFFFF;
>>> @@ -471,14 +488,18 @@ static int qtrle_decode_frame(AVCodecContext *avctx,
>>>  
>>>      /* if a header is present, fetch additional decoding parameters */
>>>      if (header & 0x0008) {
>>> -        if (avpkt->size < 14)
>>> +        if (avpkt->size < 14) {
>>> +            avctx->internal->need_flush = 1;
>>>              return avpkt->size;
>>> +        }
>>>          start_line = bytestream2_get_be16(&s->g);
>>>          bytestream2_skip(&s->g, 2);
>>>          height     = bytestream2_get_be16(&s->g);
>>>          bytestream2_skip(&s->g, 2);
>>> -        if (height > s->avctx->height - start_line)
>>> +        if (height > s->avctx->height - start_line) {
>>> +            avctx->internal->need_flush = 1;
>>>              return avpkt->size;
>>> +        }
>>>      } else {
>>>          start_line = 0;
>>>          height     = s->avctx->height;
>>> @@ -572,5 +593,6 @@ AVCodec ff_qtrle_decoder = {
>>>      .init           = qtrle_decode_init,
>>>      .close          = qtrle_decode_end,
>>>      .decode         = qtrle_decode_frame,
>>> -    .capabilities   = AV_CODEC_CAP_DR1,
>>> +    .caps_internal  = FF_CODEC_CAP_SETS_PKT_DTS | FF_CODEC_CAP_SETS_PKT_POS,
>>> +    .capabilities   = AV_CODEC_CAP_DR1 | AV_CODEC_CAP_DELAY,
>>>  };
>>> diff --git a/tests/ref/fate/qtrle-8bit b/tests/ref/fate/qtrle-8bit
>>> index 27bb8aad71..39a03b7b6c 100644
>>> --- a/tests/ref/fate/qtrle-8bit
>>> +++ b/tests/ref/fate/qtrle-8bit
>>> @@ -61,3 +61,4 @@
>>>  0,        160,        160,        1,   921600, 0xcfd6ad2b
>>>  0,        163,        163,        1,   921600, 0x3b372379
>>>  0,        165,        165,        1,   921600, 0x36f245f5
>>> +0,        166,        166,        1,   921600, 0x36f245f5
>>
>> Following what i said in the nuv patch, do you still experience timeouts
>> with the current codebase, or even if you revert commit
>> a9dacdeea6168787a142209bd19fdd74aefc9dd6? Creating a reference to an
>> existing frame buffer shouldn't be expensive anymore for the fuzzer
>> after my ref counting changes to target_dec_fuzzer.c
>>
>> This is a very ugly solution to a problem that was already solved when
>> reference counting was introduced. Returning duplicate frames to achieve
>> cfr in decoders where it's expected shouldn't affect performance.
> 
> Maybe i should ask this backward to make it clearer what this is trying
> to fix.
> 
> Consider a patch that would return every frame twice with the same ref
> of course.
> Would you consider this to have 0 performance impact ?

No, but it would be negligible.

> if every filter following needs to process frames twice 2x CPU load
> and after the filters references would also not be the same anymore
> the following encoder would encoder 2x as many frames 2x CPU load,
> bigger file lower quality per bitrate. Also playback of the resulting
> file would require more cpu time as it has more frames.

What if the filter the user injected is meant to affect each and every
frame in a different way? Think for example a constant fade out. If you
remove the duplicate ones from what's meant to be a several seconds long
completely static scene, would that effect be altered? Between dts x and
dts y there are meant to be 100 frames, but now there's 1.
I'm not sure how libavfilter works here, but if it tries to fill out the
missing frames on its own, then the work of inserting the duplicate
frames for the fade out effect would simply move from lavc to lavfi.

> 
> I think that would be considered a very bad patch for its performance
> impact.
> So if we do the opposite of removing duplicates why is this so
> controversal ?
> 
> This is not about the fuzzer at all ...

a9dacdeea6168787a142209bd19fdd74aefc9dd6 was committed to reduce timeout
runs in the fuzzer. What that commit did was turn a cfr decoder into a
vfr one. This patch here is not about the fuzzer, no, but it's to fix a
regression introduced by the aforementioned commit, when the last frame
is meant to be a duplicate of the previous one, and it achieves it by
introducing a very convoluted way to precisely pass a new reference to
the previous frame, which was the original behavior.

The output for this specific codec seems to be intended to be 1:1. For
every unique packet passed to the decoder there's a frame meant to be
output from it. The standard compliant behavior would then be a cfr one.
This patch is controversial because, as i mentioned above, it's a very
convoluted and dirty way to output something the decoder was not really
meant to.

The user can choose to get vfr output, or at least should, with the
vsynth cli option. There could also be a new option/flag added to
libavcodec to output such frames or not by setting AV_FRAME_FLAG_DISCARD
on them (thus never returned by receive_frame/decode_video2 since the
generic code looks for it and discards them). Or if that's not an
option, maybe with a new frame flag AV_FRAME_FLAG_DISPOSABLE to let the
user (or even libavfilter) decide what to do with the returned frames in
question.

> 
> Thanks
> 
> [...]
> 
> 
> _______________________________________________
> ffmpeg-devel mailing list
> ffmpeg-devel@ffmpeg.org
> https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
> 
> To unsubscribe, visit link above, or email
> ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".
>
James Almer Aug. 25, 2019, 11:18 p.m. UTC | #6
On 8/25/2019 7:14 PM, Michael Niedermayer wrote:
> On Sun, Aug 25, 2019 at 11:46:36PM +0200, Michael Niedermayer wrote:
>> On Sun, Aug 25, 2019 at 01:22:22PM -0300, James Almer wrote:
>>> On 8/24/2019 3:18 PM, Michael Niedermayer wrote:
>>>> Fixes: Ticket7880
>>>>
>>>> Signed-off-by: Michael Niedermayer <michael@niedermayer.cc>
>>>> ---
>>>>  libavcodec/qtrle.c        | 30 ++++++++++++++++++++++++++----
>>>>  tests/ref/fate/qtrle-8bit |  1 +
>>>>  2 files changed, 27 insertions(+), 4 deletions(-)
>>>>
>>>> diff --git a/libavcodec/qtrle.c b/libavcodec/qtrle.c
>>>> index 2c29547e5a..c22a1a582d 100644
>>>> --- a/libavcodec/qtrle.c
>>>> +++ b/libavcodec/qtrle.c
>>>> @@ -45,6 +45,7 @@ typedef struct QtrleContext {
>>>>  
>>>>      GetByteContext g;
>>>>      uint32_t pal[256];
>>>> +    AVPacket flush_pkt;
>>>>  } QtrleContext;
>>>>  
>>>>  #define CHECK_PIXEL_PTR(n)                                                            \
>>>> @@ -454,11 +455,27 @@ static int qtrle_decode_frame(AVCodecContext *avctx,
>>>>      int has_palette = 0;
>>>>      int ret, size;
>>>>  
>>>> +    if (!avpkt->data) {
>>>> +        if (avctx->internal->need_flush) {
>>>> +            avctx->internal->need_flush = 0;
>>>> +            ret = ff_setup_buffered_frame_for_return(avctx, data, s->frame, &s->flush_pkt);
>>>> +            if (ret < 0)
>>>> +                return ret;
>>>> +            *got_frame = 1;
>>>> +        }
>>>> +        return 0;
>>>> +    }
>>>> +    s->flush_pkt = *avpkt;
>>>> +    s->frame->pkt_dts = s->flush_pkt.dts;
>>>> +
>>>>      bytestream2_init(&s->g, avpkt->data, avpkt->size);
>>>>  
>>>>      /* check if this frame is even supposed to change */
>>>> -    if (avpkt->size < 8)
>>>> +    if (avpkt->size < 8) {
>>>> +        avctx->internal->need_flush = 1;
>>>>          return avpkt->size;
>>>> +    }
>>>> +    avctx->internal->need_flush = 0;
>>>>  
>>>>      /* start after the chunk size */
>>>>      size = bytestream2_get_be32(&s->g) & 0x3FFFFFFF;
>>>> @@ -471,14 +488,18 @@ static int qtrle_decode_frame(AVCodecContext *avctx,
>>>>  
>>>>      /* if a header is present, fetch additional decoding parameters */
>>>>      if (header & 0x0008) {
>>>> -        if (avpkt->size < 14)
>>>> +        if (avpkt->size < 14) {
>>>> +            avctx->internal->need_flush = 1;
>>>>              return avpkt->size;
>>>> +        }
>>>>          start_line = bytestream2_get_be16(&s->g);
>>>>          bytestream2_skip(&s->g, 2);
>>>>          height     = bytestream2_get_be16(&s->g);
>>>>          bytestream2_skip(&s->g, 2);
>>>> -        if (height > s->avctx->height - start_line)
>>>> +        if (height > s->avctx->height - start_line) {
>>>> +            avctx->internal->need_flush = 1;
>>>>              return avpkt->size;
>>>> +        }
>>>>      } else {
>>>>          start_line = 0;
>>>>          height     = s->avctx->height;
>>>> @@ -572,5 +593,6 @@ AVCodec ff_qtrle_decoder = {
>>>>      .init           = qtrle_decode_init,
>>>>      .close          = qtrle_decode_end,
>>>>      .decode         = qtrle_decode_frame,
>>>> -    .capabilities   = AV_CODEC_CAP_DR1,
>>>> +    .caps_internal  = FF_CODEC_CAP_SETS_PKT_DTS | FF_CODEC_CAP_SETS_PKT_POS,
>>>> +    .capabilities   = AV_CODEC_CAP_DR1 | AV_CODEC_CAP_DELAY,
>>>>  };
>>>> diff --git a/tests/ref/fate/qtrle-8bit b/tests/ref/fate/qtrle-8bit
>>>> index 27bb8aad71..39a03b7b6c 100644
>>>> --- a/tests/ref/fate/qtrle-8bit
>>>> +++ b/tests/ref/fate/qtrle-8bit
>>>> @@ -61,3 +61,4 @@
>>>>  0,        160,        160,        1,   921600, 0xcfd6ad2b
>>>>  0,        163,        163,        1,   921600, 0x3b372379
>>>>  0,        165,        165,        1,   921600, 0x36f245f5
>>>> +0,        166,        166,        1,   921600, 0x36f245f5
>>>
>>> Following what i said in the nuv patch, do you still experience timeouts
>>> with the current codebase, or even if you revert commit
>>> a9dacdeea6168787a142209bd19fdd74aefc9dd6? Creating a reference to an
>>> existing frame buffer shouldn't be expensive anymore for the fuzzer
>>> after my ref counting changes to target_dec_fuzzer.c
>>>
>>> This is a very ugly solution to a problem that was already solved when
>>> reference counting was introduced. Returning duplicate frames to achieve
>>> cfr in decoders where it's expected shouldn't affect performance.
>>
>> Maybe i should ask this backward to make it clearer what this is trying
>> to fix.
>>
>> Consider a patch that would return every frame twice with the same ref
>> of course.
>> Would you consider this to have 0 performance impact ?
>> if every filter following needs to process frames twice 2x CPU load
>> and after the filters references would also not be the same anymore
>> the following encoder would encoder 2x as many frames 2x CPU load,
>> bigger file lower quality per bitrate. Also playback of the resulting
>> file would require more cpu time as it has more frames.
>>
>> I think that would be considered a very bad patch for its performance
>> impact.
>> So if we do the opposite of removing duplicates why is this so
>> controversal ?
>>
>> This is not about the fuzzer at all ...
> 
> Also about the implementation itself.
> This can of course be done in countless other ways
> for example one can probably detect the duplicate ref somewhere in common
> code and then optionally drop the frames.

This is one of the suggestions i made in the email sent a few minutes
ago, yes. Based on a user set option, either dropping the frames in
generic code by flagging them as discard, or flagging them as
"disposable" and letting the library user (external applications, ffmpeg
cli, libavfilter, etc) decide what to do with them.

> Or it could be done in a filter, that would then only help applications
> using libavfilter too though, ...
> 
> Iam not arguing in favor of this specific implementation, rather that
> these frames should not be processed multiple times 
> Also the terse NAK comments this sort of patches receive and did receive
> do not motivate one very much to spend alot of time doing a really
> perfect design ...

That's why i'm not the one writing such replies, and instead is trying
to help coming up with a nicer way to solve this that doesn't involve
unconditionally altering the output of decoders.

> 
> Thanks
> 
> [...]
> 
> 
> _______________________________________________
> ffmpeg-devel mailing list
> ffmpeg-devel@ffmpeg.org
> https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
> 
> To unsubscribe, visit link above, or email
> ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".
>
James Almer Aug. 26, 2019, 2:37 a.m. UTC | #7
On 8/24/2019 3:18 PM, Michael Niedermayer wrote:
> Fixes: Ticket7880
> 
> Signed-off-by: Michael Niedermayer <michael@niedermayer.cc>
> ---
>  libavcodec/qtrle.c        | 30 ++++++++++++++++++++++++++----
>  tests/ref/fate/qtrle-8bit |  1 +
>  2 files changed, 27 insertions(+), 4 deletions(-)
> 
> diff --git a/libavcodec/qtrle.c b/libavcodec/qtrle.c
> index 2c29547e5a..c22a1a582d 100644
> --- a/libavcodec/qtrle.c
> +++ b/libavcodec/qtrle.c
> @@ -45,6 +45,7 @@ typedef struct QtrleContext {
>  
>      GetByteContext g;
>      uint32_t pal[256];
> +    AVPacket flush_pkt;
>  } QtrleContext;
>  
>  #define CHECK_PIXEL_PTR(n)                                                            \
> @@ -454,11 +455,27 @@ static int qtrle_decode_frame(AVCodecContext *avctx,
>      int has_palette = 0;
>      int ret, size;
>  
> +    if (!avpkt->data) {
> +        if (avctx->internal->need_flush) {
> +            avctx->internal->need_flush = 0;

This same effect can be achieved by instead checking for
s->frame->data[0] != NULL after adding a AVCodec.flush() callback that
unrefs s->frame (Which should be added regardless of this patch).

That way you wont need to add need_flush to AVCodecInternal.

> +            ret = ff_setup_buffered_frame_for_return(avctx, data, s->frame, &s->flush_pkt);
> +            if (ret < 0)
> +                return ret;
> +            *got_frame = 1;
> +        }
> +        return 0;
> +    }
> +    s->flush_pkt = *avpkt;
> +    s->frame->pkt_dts = s->flush_pkt.dts;
> +
>      bytestream2_init(&s->g, avpkt->data, avpkt->size);
>  
>      /* check if this frame is even supposed to change */
> -    if (avpkt->size < 8)
> +    if (avpkt->size < 8) {
> +        avctx->internal->need_flush = 1;
>          return avpkt->size;
> +    }
> +    avctx->internal->need_flush = 0;
>  
>      /* start after the chunk size */
>      size = bytestream2_get_be32(&s->g) & 0x3FFFFFFF;
> @@ -471,14 +488,18 @@ static int qtrle_decode_frame(AVCodecContext *avctx,
>  
>      /* if a header is present, fetch additional decoding parameters */
>      if (header & 0x0008) {
> -        if (avpkt->size < 14)
> +        if (avpkt->size < 14) {
> +            avctx->internal->need_flush = 1;
>              return avpkt->size;
> +        }
>          start_line = bytestream2_get_be16(&s->g);
>          bytestream2_skip(&s->g, 2);
>          height     = bytestream2_get_be16(&s->g);
>          bytestream2_skip(&s->g, 2);
> -        if (height > s->avctx->height - start_line)
> +        if (height > s->avctx->height - start_line) {
> +            avctx->internal->need_flush = 1;
>              return avpkt->size;
> +        }
>      } else {
>          start_line = 0;
>          height     = s->avctx->height;
> @@ -572,5 +593,6 @@ AVCodec ff_qtrle_decoder = {
>      .init           = qtrle_decode_init,
>      .close          = qtrle_decode_end,
>      .decode         = qtrle_decode_frame,
> -    .capabilities   = AV_CODEC_CAP_DR1,
> +    .caps_internal  = FF_CODEC_CAP_SETS_PKT_DTS | FF_CODEC_CAP_SETS_PKT_POS,
> +    .capabilities   = AV_CODEC_CAP_DR1 | AV_CODEC_CAP_DELAY,
>  };
> diff --git a/tests/ref/fate/qtrle-8bit b/tests/ref/fate/qtrle-8bit
> index 27bb8aad71..39a03b7b6c 100644
> --- a/tests/ref/fate/qtrle-8bit
> +++ b/tests/ref/fate/qtrle-8bit
> @@ -61,3 +61,4 @@
>  0,        160,        160,        1,   921600, 0xcfd6ad2b
>  0,        163,        163,        1,   921600, 0x3b372379
>  0,        165,        165,        1,   921600, 0x36f245f5
> +0,        166,        166,        1,   921600, 0x36f245f5
>
James Almer Aug. 26, 2019, 2:43 a.m. UTC | #8
On 8/25/2019 8:18 PM, James Almer wrote:
> On 8/25/2019 7:14 PM, Michael Niedermayer wrote:
>> On Sun, Aug 25, 2019 at 11:46:36PM +0200, Michael Niedermayer wrote:
>>> On Sun, Aug 25, 2019 at 01:22:22PM -0300, James Almer wrote:
>>>> On 8/24/2019 3:18 PM, Michael Niedermayer wrote:
>>>>> Fixes: Ticket7880
>>>>>
>>>>> Signed-off-by: Michael Niedermayer <michael@niedermayer.cc>
>>>>> ---
>>>>>  libavcodec/qtrle.c        | 30 ++++++++++++++++++++++++++----
>>>>>  tests/ref/fate/qtrle-8bit |  1 +
>>>>>  2 files changed, 27 insertions(+), 4 deletions(-)
>>>>>
>>>>> diff --git a/libavcodec/qtrle.c b/libavcodec/qtrle.c
>>>>> index 2c29547e5a..c22a1a582d 100644
>>>>> --- a/libavcodec/qtrle.c
>>>>> +++ b/libavcodec/qtrle.c
>>>>> @@ -45,6 +45,7 @@ typedef struct QtrleContext {
>>>>>  
>>>>>      GetByteContext g;
>>>>>      uint32_t pal[256];
>>>>> +    AVPacket flush_pkt;
>>>>>  } QtrleContext;
>>>>>  
>>>>>  #define CHECK_PIXEL_PTR(n)                                                            \
>>>>> @@ -454,11 +455,27 @@ static int qtrle_decode_frame(AVCodecContext *avctx,
>>>>>      int has_palette = 0;
>>>>>      int ret, size;
>>>>>  
>>>>> +    if (!avpkt->data) {
>>>>> +        if (avctx->internal->need_flush) {
>>>>> +            avctx->internal->need_flush = 0;
>>>>> +            ret = ff_setup_buffered_frame_for_return(avctx, data, s->frame, &s->flush_pkt);
>>>>> +            if (ret < 0)
>>>>> +                return ret;
>>>>> +            *got_frame = 1;
>>>>> +        }
>>>>> +        return 0;
>>>>> +    }
>>>>> +    s->flush_pkt = *avpkt;
>>>>> +    s->frame->pkt_dts = s->flush_pkt.dts;
>>>>> +
>>>>>      bytestream2_init(&s->g, avpkt->data, avpkt->size);
>>>>>  
>>>>>      /* check if this frame is even supposed to change */
>>>>> -    if (avpkt->size < 8)
>>>>> +    if (avpkt->size < 8) {
>>>>> +        avctx->internal->need_flush = 1;
>>>>>          return avpkt->size;
>>>>> +    }
>>>>> +    avctx->internal->need_flush = 0;
>>>>>  
>>>>>      /* start after the chunk size */
>>>>>      size = bytestream2_get_be32(&s->g) & 0x3FFFFFFF;
>>>>> @@ -471,14 +488,18 @@ static int qtrle_decode_frame(AVCodecContext *avctx,
>>>>>  
>>>>>      /* if a header is present, fetch additional decoding parameters */
>>>>>      if (header & 0x0008) {
>>>>> -        if (avpkt->size < 14)
>>>>> +        if (avpkt->size < 14) {
>>>>> +            avctx->internal->need_flush = 1;
>>>>>              return avpkt->size;
>>>>> +        }
>>>>>          start_line = bytestream2_get_be16(&s->g);
>>>>>          bytestream2_skip(&s->g, 2);
>>>>>          height     = bytestream2_get_be16(&s->g);
>>>>>          bytestream2_skip(&s->g, 2);
>>>>> -        if (height > s->avctx->height - start_line)
>>>>> +        if (height > s->avctx->height - start_line) {
>>>>> +            avctx->internal->need_flush = 1;
>>>>>              return avpkt->size;
>>>>> +        }
>>>>>      } else {
>>>>>          start_line = 0;
>>>>>          height     = s->avctx->height;
>>>>> @@ -572,5 +593,6 @@ AVCodec ff_qtrle_decoder = {
>>>>>      .init           = qtrle_decode_init,
>>>>>      .close          = qtrle_decode_end,
>>>>>      .decode         = qtrle_decode_frame,
>>>>> -    .capabilities   = AV_CODEC_CAP_DR1,
>>>>> +    .caps_internal  = FF_CODEC_CAP_SETS_PKT_DTS | FF_CODEC_CAP_SETS_PKT_POS,
>>>>> +    .capabilities   = AV_CODEC_CAP_DR1 | AV_CODEC_CAP_DELAY,
>>>>>  };
>>>>> diff --git a/tests/ref/fate/qtrle-8bit b/tests/ref/fate/qtrle-8bit
>>>>> index 27bb8aad71..39a03b7b6c 100644
>>>>> --- a/tests/ref/fate/qtrle-8bit
>>>>> +++ b/tests/ref/fate/qtrle-8bit
>>>>> @@ -61,3 +61,4 @@
>>>>>  0,        160,        160,        1,   921600, 0xcfd6ad2b
>>>>>  0,        163,        163,        1,   921600, 0x3b372379
>>>>>  0,        165,        165,        1,   921600, 0x36f245f5
>>>>> +0,        166,        166,        1,   921600, 0x36f245f5
>>>>
>>>> Following what i said in the nuv patch, do you still experience timeouts
>>>> with the current codebase, or even if you revert commit
>>>> a9dacdeea6168787a142209bd19fdd74aefc9dd6? Creating a reference to an
>>>> existing frame buffer shouldn't be expensive anymore for the fuzzer
>>>> after my ref counting changes to target_dec_fuzzer.c
>>>>
>>>> This is a very ugly solution to a problem that was already solved when
>>>> reference counting was introduced. Returning duplicate frames to achieve
>>>> cfr in decoders where it's expected shouldn't affect performance.
>>>
>>> Maybe i should ask this backward to make it clearer what this is trying
>>> to fix.
>>>
>>> Consider a patch that would return every frame twice with the same ref
>>> of course.
>>> Would you consider this to have 0 performance impact ?
>>> if every filter following needs to process frames twice 2x CPU load
>>> and after the filters references would also not be the same anymore
>>> the following encoder would encoder 2x as many frames 2x CPU load,
>>> bigger file lower quality per bitrate. Also playback of the resulting
>>> file would require more cpu time as it has more frames.
>>>
>>> I think that would be considered a very bad patch for its performance
>>> impact.
>>> So if we do the opposite of removing duplicates why is this so
>>> controversal ?
>>>
>>> This is not about the fuzzer at all ...
>>
>> Also about the implementation itself.
>> This can of course be done in countless other ways
>> for example one can probably detect the duplicate ref somewhere in common
>> code and then optionally drop the frames.
> 
> This is one of the suggestions i made in the email sent a few minutes
> ago, yes. Based on a user set option, either dropping the frames in
> generic code by flagging them as discard

Of course, this has the same issue as the regression this patch is
trying to solve. The last frame, being a duplicate, would also be
discarded by the generic code.

> , or flagging them as
> "disposable" and letting the library user (external applications, ffmpeg
> cli, libavfilter, etc) decide what to do with them.

This one would also help making the cli's -vsync vfr option more useful,
i think.

> 
>> Or it could be done in a filter, that would then only help applications
>> using libavfilter too though, ...
>>
>> Iam not arguing in favor of this specific implementation, rather that
>> these frames should not be processed multiple times 
>> Also the terse NAK comments this sort of patches receive and did receive
>> do not motivate one very much to spend alot of time doing a really
>> perfect design ...
> 
> That's why i'm not the one writing such replies, and instead is trying
> to help coming up with a nicer way to solve this that doesn't involve
> unconditionally altering the output of decoders.
> 
>>
>> Thanks
>>
>> [...]
>>
>>
>> _______________________________________________
>> ffmpeg-devel mailing list
>> ffmpeg-devel@ffmpeg.org
>> https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
>>
>> To unsubscribe, visit link above, or email
>> ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".
>>
>
Michael Niedermayer Aug. 26, 2019, 7:15 a.m. UTC | #9
On Sun, Aug 25, 2019 at 11:37:02PM -0300, James Almer wrote:
> On 8/24/2019 3:18 PM, Michael Niedermayer wrote:
> > Fixes: Ticket7880
> > 
> > Signed-off-by: Michael Niedermayer <michael@niedermayer.cc>
> > ---
> >  libavcodec/qtrle.c        | 30 ++++++++++++++++++++++++++----
> >  tests/ref/fate/qtrle-8bit |  1 +
> >  2 files changed, 27 insertions(+), 4 deletions(-)
> > 
> > diff --git a/libavcodec/qtrle.c b/libavcodec/qtrle.c
> > index 2c29547e5a..c22a1a582d 100644
> > --- a/libavcodec/qtrle.c
> > +++ b/libavcodec/qtrle.c
> > @@ -45,6 +45,7 @@ typedef struct QtrleContext {
> >  
> >      GetByteContext g;
> >      uint32_t pal[256];
> > +    AVPacket flush_pkt;
> >  } QtrleContext;
> >  
> >  #define CHECK_PIXEL_PTR(n)                                                            \
> > @@ -454,11 +455,27 @@ static int qtrle_decode_frame(AVCodecContext *avctx,
> >      int has_palette = 0;
> >      int ret, size;
> >  
> > +    if (!avpkt->data) {
> > +        if (avctx->internal->need_flush) {
> > +            avctx->internal->need_flush = 0;
> 
> This same effect can be achieved by instead checking for
> s->frame->data[0] != NULL after adding a AVCodec.flush() callback that
> unrefs s->frame (Which should be added regardless of this patch).
> 
> That way you wont need to add need_flush to AVCodecInternal.

need_flush in common code was done to avoid haveing the same code in
every decoder as shown in the previous iteration of the patchset

Avoiding the need_flush completely by clearing the internal frame
might not work completly because there are more cases than this
1. seek (clear the last frame is possible so here it works)
2. output a frame (here we cannot clear the frame as it may be used by the next
   frame, still this is the same case, if a frame was just output
   theres no need to flush at the end so we also need need_flush = 0

Thanks

[...]
Michael Niedermayer Aug. 26, 2019, 7:32 a.m. UTC | #10
On Sun, Aug 25, 2019 at 08:04:03PM -0300, James Almer wrote:
> On 8/25/2019 6:46 PM, Michael Niedermayer wrote:
> > On Sun, Aug 25, 2019 at 01:22:22PM -0300, James Almer wrote:
> >> On 8/24/2019 3:18 PM, Michael Niedermayer wrote:
> >>> Fixes: Ticket7880
> >>>
> >>> Signed-off-by: Michael Niedermayer <michael@niedermayer.cc>
> >>> ---
> >>>  libavcodec/qtrle.c        | 30 ++++++++++++++++++++++++++----
> >>>  tests/ref/fate/qtrle-8bit |  1 +
> >>>  2 files changed, 27 insertions(+), 4 deletions(-)
> >>>
> >>> diff --git a/libavcodec/qtrle.c b/libavcodec/qtrle.c
> >>> index 2c29547e5a..c22a1a582d 100644
> >>> --- a/libavcodec/qtrle.c
> >>> +++ b/libavcodec/qtrle.c
> >>> @@ -45,6 +45,7 @@ typedef struct QtrleContext {
> >>>  
> >>>      GetByteContext g;
> >>>      uint32_t pal[256];
> >>> +    AVPacket flush_pkt;
> >>>  } QtrleContext;
> >>>  
> >>>  #define CHECK_PIXEL_PTR(n)                                                            \
> >>> @@ -454,11 +455,27 @@ static int qtrle_decode_frame(AVCodecContext *avctx,
> >>>      int has_palette = 0;
> >>>      int ret, size;
> >>>  
> >>> +    if (!avpkt->data) {
> >>> +        if (avctx->internal->need_flush) {
> >>> +            avctx->internal->need_flush = 0;
> >>> +            ret = ff_setup_buffered_frame_for_return(avctx, data, s->frame, &s->flush_pkt);
> >>> +            if (ret < 0)
> >>> +                return ret;
> >>> +            *got_frame = 1;
> >>> +        }
> >>> +        return 0;
> >>> +    }
> >>> +    s->flush_pkt = *avpkt;
> >>> +    s->frame->pkt_dts = s->flush_pkt.dts;
> >>> +
> >>>      bytestream2_init(&s->g, avpkt->data, avpkt->size);
> >>>  
> >>>      /* check if this frame is even supposed to change */
> >>> -    if (avpkt->size < 8)
> >>> +    if (avpkt->size < 8) {
> >>> +        avctx->internal->need_flush = 1;
> >>>          return avpkt->size;
> >>> +    }
> >>> +    avctx->internal->need_flush = 0;
> >>>  
> >>>      /* start after the chunk size */
> >>>      size = bytestream2_get_be32(&s->g) & 0x3FFFFFFF;
> >>> @@ -471,14 +488,18 @@ static int qtrle_decode_frame(AVCodecContext *avctx,
> >>>  
> >>>      /* if a header is present, fetch additional decoding parameters */
> >>>      if (header & 0x0008) {
> >>> -        if (avpkt->size < 14)
> >>> +        if (avpkt->size < 14) {
> >>> +            avctx->internal->need_flush = 1;
> >>>              return avpkt->size;
> >>> +        }
> >>>          start_line = bytestream2_get_be16(&s->g);
> >>>          bytestream2_skip(&s->g, 2);
> >>>          height     = bytestream2_get_be16(&s->g);
> >>>          bytestream2_skip(&s->g, 2);
> >>> -        if (height > s->avctx->height - start_line)
> >>> +        if (height > s->avctx->height - start_line) {
> >>> +            avctx->internal->need_flush = 1;
> >>>              return avpkt->size;
> >>> +        }
> >>>      } else {
> >>>          start_line = 0;
> >>>          height     = s->avctx->height;
> >>> @@ -572,5 +593,6 @@ AVCodec ff_qtrle_decoder = {
> >>>      .init           = qtrle_decode_init,
> >>>      .close          = qtrle_decode_end,
> >>>      .decode         = qtrle_decode_frame,
> >>> -    .capabilities   = AV_CODEC_CAP_DR1,
> >>> +    .caps_internal  = FF_CODEC_CAP_SETS_PKT_DTS | FF_CODEC_CAP_SETS_PKT_POS,
> >>> +    .capabilities   = AV_CODEC_CAP_DR1 | AV_CODEC_CAP_DELAY,
> >>>  };
> >>> diff --git a/tests/ref/fate/qtrle-8bit b/tests/ref/fate/qtrle-8bit
> >>> index 27bb8aad71..39a03b7b6c 100644
> >>> --- a/tests/ref/fate/qtrle-8bit
> >>> +++ b/tests/ref/fate/qtrle-8bit
> >>> @@ -61,3 +61,4 @@
> >>>  0,        160,        160,        1,   921600, 0xcfd6ad2b
> >>>  0,        163,        163,        1,   921600, 0x3b372379
> >>>  0,        165,        165,        1,   921600, 0x36f245f5
> >>> +0,        166,        166,        1,   921600, 0x36f245f5
> >>
> >> Following what i said in the nuv patch, do you still experience timeouts
> >> with the current codebase, or even if you revert commit
> >> a9dacdeea6168787a142209bd19fdd74aefc9dd6? Creating a reference to an
> >> existing frame buffer shouldn't be expensive anymore for the fuzzer
> >> after my ref counting changes to target_dec_fuzzer.c
> >>
> >> This is a very ugly solution to a problem that was already solved when
> >> reference counting was introduced. Returning duplicate frames to achieve
> >> cfr in decoders where it's expected shouldn't affect performance.
> > 
> > Maybe i should ask this backward to make it clearer what this is trying
> > to fix.
> > 
> > Consider a patch that would return every frame twice with the same ref
> > of course.
> > Would you consider this to have 0 performance impact ?
> 
> No, but it would be negligible.

ok, lets see some actual numbers

This comparission is about fixing ticket 7880 (just saying so we dont loose
track of what we compare here)

this qtrle patchset which fixes the last frame
time ./ffmpeg -i clock.mov -vf scale=1920x1080 -qscale 2 -y test-new.avi
real            0m0.233s
user            0m0.404s
sys             0m0.036s
-rw-r----- 1 michael michael 769212 Aug 26 08:38 test-new.avi
time ./ffmpeg -i test-new.avi -f null -
real            0m0.095s
user            0m0.131s
sys             0m0.038s

The alternative of reverting the code that discards duplicate frames
(this i believe is what the people opposing this patchset here want)
git revert  a9dacdeea6168787a142209bd19fdd74aefc9dd6
time ./ffmpeg -i clock.mov -vf scale=1920x1080 -qscale 2 -y test.avi
real            0m1.208s
user            0m2.866s
sys             0m0.168s
-rw-r----- 1 michael michael 3274430 Aug 26 08:44 test.avi
time ./ffmpeg -i test.avi -f null -
real            0m0.185s
user            0m0.685s
sys             0m0.076s

https://samples.ffmpeg.org/ffmpeg-bugs/trac/ticket7880/clock.mov

As we can see the solution preferred by the opposition to this patchset
will make the code 6 times slower and result in 4 times higher bitrate for
the same quality.
I would say this is not "negligible"
you know this is not 0.6%, its not 6% its not 60% it is 600%

I do not understand why we have a discussion here about this.
Do we want 600% slower code ?
Do our users want 600% slower code ?

I do not want 600% slower code

Thanks

[...]
Michael Niedermayer Aug. 26, 2019, 7:37 a.m. UTC | #11
On Sun, Aug 25, 2019 at 11:43:42PM -0300, James Almer wrote:
> On 8/25/2019 8:18 PM, James Almer wrote:
> > On 8/25/2019 7:14 PM, Michael Niedermayer wrote:
> >> On Sun, Aug 25, 2019 at 11:46:36PM +0200, Michael Niedermayer wrote:
> >>> On Sun, Aug 25, 2019 at 01:22:22PM -0300, James Almer wrote:
> >>>> On 8/24/2019 3:18 PM, Michael Niedermayer wrote:
> >>>>> Fixes: Ticket7880
> >>>>>
> >>>>> Signed-off-by: Michael Niedermayer <michael@niedermayer.cc>
> >>>>> ---
> >>>>>  libavcodec/qtrle.c        | 30 ++++++++++++++++++++++++++----
> >>>>>  tests/ref/fate/qtrle-8bit |  1 +
> >>>>>  2 files changed, 27 insertions(+), 4 deletions(-)
> >>>>>
> >>>>> diff --git a/libavcodec/qtrle.c b/libavcodec/qtrle.c
> >>>>> index 2c29547e5a..c22a1a582d 100644
> >>>>> --- a/libavcodec/qtrle.c
> >>>>> +++ b/libavcodec/qtrle.c
> >>>>> @@ -45,6 +45,7 @@ typedef struct QtrleContext {
> >>>>>  
> >>>>>      GetByteContext g;
> >>>>>      uint32_t pal[256];
> >>>>> +    AVPacket flush_pkt;
> >>>>>  } QtrleContext;
> >>>>>  
> >>>>>  #define CHECK_PIXEL_PTR(n)                                                            \
> >>>>> @@ -454,11 +455,27 @@ static int qtrle_decode_frame(AVCodecContext *avctx,
> >>>>>      int has_palette = 0;
> >>>>>      int ret, size;
> >>>>>  
> >>>>> +    if (!avpkt->data) {
> >>>>> +        if (avctx->internal->need_flush) {
> >>>>> +            avctx->internal->need_flush = 0;
> >>>>> +            ret = ff_setup_buffered_frame_for_return(avctx, data, s->frame, &s->flush_pkt);
> >>>>> +            if (ret < 0)
> >>>>> +                return ret;
> >>>>> +            *got_frame = 1;
> >>>>> +        }
> >>>>> +        return 0;
> >>>>> +    }
> >>>>> +    s->flush_pkt = *avpkt;
> >>>>> +    s->frame->pkt_dts = s->flush_pkt.dts;
> >>>>> +
> >>>>>      bytestream2_init(&s->g, avpkt->data, avpkt->size);
> >>>>>  
> >>>>>      /* check if this frame is even supposed to change */
> >>>>> -    if (avpkt->size < 8)
> >>>>> +    if (avpkt->size < 8) {
> >>>>> +        avctx->internal->need_flush = 1;
> >>>>>          return avpkt->size;
> >>>>> +    }
> >>>>> +    avctx->internal->need_flush = 0;
> >>>>>  
> >>>>>      /* start after the chunk size */
> >>>>>      size = bytestream2_get_be32(&s->g) & 0x3FFFFFFF;
> >>>>> @@ -471,14 +488,18 @@ static int qtrle_decode_frame(AVCodecContext *avctx,
> >>>>>  
> >>>>>      /* if a header is present, fetch additional decoding parameters */
> >>>>>      if (header & 0x0008) {
> >>>>> -        if (avpkt->size < 14)
> >>>>> +        if (avpkt->size < 14) {
> >>>>> +            avctx->internal->need_flush = 1;
> >>>>>              return avpkt->size;
> >>>>> +        }
> >>>>>          start_line = bytestream2_get_be16(&s->g);
> >>>>>          bytestream2_skip(&s->g, 2);
> >>>>>          height     = bytestream2_get_be16(&s->g);
> >>>>>          bytestream2_skip(&s->g, 2);
> >>>>> -        if (height > s->avctx->height - start_line)
> >>>>> +        if (height > s->avctx->height - start_line) {
> >>>>> +            avctx->internal->need_flush = 1;
> >>>>>              return avpkt->size;
> >>>>> +        }
> >>>>>      } else {
> >>>>>          start_line = 0;
> >>>>>          height     = s->avctx->height;
> >>>>> @@ -572,5 +593,6 @@ AVCodec ff_qtrle_decoder = {
> >>>>>      .init           = qtrle_decode_init,
> >>>>>      .close          = qtrle_decode_end,
> >>>>>      .decode         = qtrle_decode_frame,
> >>>>> -    .capabilities   = AV_CODEC_CAP_DR1,
> >>>>> +    .caps_internal  = FF_CODEC_CAP_SETS_PKT_DTS | FF_CODEC_CAP_SETS_PKT_POS,
> >>>>> +    .capabilities   = AV_CODEC_CAP_DR1 | AV_CODEC_CAP_DELAY,
> >>>>>  };
> >>>>> diff --git a/tests/ref/fate/qtrle-8bit b/tests/ref/fate/qtrle-8bit
> >>>>> index 27bb8aad71..39a03b7b6c 100644
> >>>>> --- a/tests/ref/fate/qtrle-8bit
> >>>>> +++ b/tests/ref/fate/qtrle-8bit
> >>>>> @@ -61,3 +61,4 @@
> >>>>>  0,        160,        160,        1,   921600, 0xcfd6ad2b
> >>>>>  0,        163,        163,        1,   921600, 0x3b372379
> >>>>>  0,        165,        165,        1,   921600, 0x36f245f5
> >>>>> +0,        166,        166,        1,   921600, 0x36f245f5
> >>>>
> >>>> Following what i said in the nuv patch, do you still experience timeouts
> >>>> with the current codebase, or even if you revert commit
> >>>> a9dacdeea6168787a142209bd19fdd74aefc9dd6? Creating a reference to an
> >>>> existing frame buffer shouldn't be expensive anymore for the fuzzer
> >>>> after my ref counting changes to target_dec_fuzzer.c
> >>>>
> >>>> This is a very ugly solution to a problem that was already solved when
> >>>> reference counting was introduced. Returning duplicate frames to achieve
> >>>> cfr in decoders where it's expected shouldn't affect performance.
> >>>
> >>> Maybe i should ask this backward to make it clearer what this is trying
> >>> to fix.
> >>>
> >>> Consider a patch that would return every frame twice with the same ref
> >>> of course.
> >>> Would you consider this to have 0 performance impact ?
> >>> if every filter following needs to process frames twice 2x CPU load
> >>> and after the filters references would also not be the same anymore
> >>> the following encoder would encoder 2x as many frames 2x CPU load,
> >>> bigger file lower quality per bitrate. Also playback of the resulting
> >>> file would require more cpu time as it has more frames.
> >>>
> >>> I think that would be considered a very bad patch for its performance
> >>> impact.
> >>> So if we do the opposite of removing duplicates why is this so
> >>> controversal ?
> >>>
> >>> This is not about the fuzzer at all ...
> >>
> >> Also about the implementation itself.
> >> This can of course be done in countless other ways
> >> for example one can probably detect the duplicate ref somewhere in common
> >> code and then optionally drop the frames.
> > 
> > This is one of the suggestions i made in the email sent a few minutes
> > ago, yes. Based on a user set option, either dropping the frames in
> > generic code by flagging them as discard
> 
> Of course, this has the same issue as the regression this patch is
> trying to solve. The last frame, being a duplicate, would also be
> discarded by the generic code.

yes, so we need to keep a reference to the last frame and return that at
the end.
can certainly be done, i do not think it will get any other reaction
than a single line "NAK". 
I mean if a single added variable to common code is rejected, this will
need more changes in common code and API
we need to first find a clear consensus what to do before its done
so whoever does it doesnt waste his time ...

Thanks

[...]
Michael Niedermayer Aug. 26, 2019, 7:44 a.m. UTC | #12
On Sun, Aug 25, 2019 at 08:04:03PM -0300, James Almer wrote:
> On 8/25/2019 6:46 PM, Michael Niedermayer wrote:
> > On Sun, Aug 25, 2019 at 01:22:22PM -0300, James Almer wrote:
> >> On 8/24/2019 3:18 PM, Michael Niedermayer wrote:
[...]

> > if every filter following needs to process frames twice 2x CPU load
> > and after the filters references would also not be the same anymore
> > the following encoder would encoder 2x as many frames 2x CPU load,
> > bigger file lower quality per bitrate. Also playback of the resulting
> > file would require more cpu time as it has more frames.
> 
> What if the filter the user injected is meant to affect each and every
> frame in a different way? Think for example a constant fade out. If you
> remove the duplicate ones from what's meant to be a several seconds long
> completely static scene, would that effect be altered? Between dts x and
> dts y there are meant to be 100 frames, but now there's 1.
> I'm not sure how libavfilter works here, but if it tries to fill out the
> missing frames on its own, then the work of inserting the duplicate
> frames for the fade out effect would simply move from lavc to lavfi.

If you run a filter that requires a minimum number of frames per second
then you will need to ensure that to be there.
Theres a wide range of reasons why your input might not have frames
there.
So unless you only work with filter chains designed for a single input
container and codec simply assuming that there would be 25 fps at least
might once in a while bite you with no frame drop patches.
you might even with a single container and codec be hit by a 1fps or
lower slide show once in a while

thx

[...]
Hendrik Leppkes Aug. 26, 2019, 7:51 a.m. UTC | #13
On Mon, Aug 26, 2019 at 9:33 AM Michael Niedermayer
<michael@niedermayer.cc> wrote:
>
> On Sun, Aug 25, 2019 at 08:04:03PM -0300, James Almer wrote:
> > On 8/25/2019 6:46 PM, Michael Niedermayer wrote:
> > > On Sun, Aug 25, 2019 at 01:22:22PM -0300, James Almer wrote:
> > >> On 8/24/2019 3:18 PM, Michael Niedermayer wrote:
> > >>> Fixes: Ticket7880
> > >>>
> > >>> Signed-off-by: Michael Niedermayer <michael@niedermayer.cc>
> > >>> ---
> > >>>  libavcodec/qtrle.c        | 30 ++++++++++++++++++++++++++----
> > >>>  tests/ref/fate/qtrle-8bit |  1 +
> > >>>  2 files changed, 27 insertions(+), 4 deletions(-)
> > >>>
> > >>> diff --git a/libavcodec/qtrle.c b/libavcodec/qtrle.c
> > >>> index 2c29547e5a..c22a1a582d 100644
> > >>> --- a/libavcodec/qtrle.c
> > >>> +++ b/libavcodec/qtrle.c
> > >>> @@ -45,6 +45,7 @@ typedef struct QtrleContext {
> > >>>
> > >>>      GetByteContext g;
> > >>>      uint32_t pal[256];
> > >>> +    AVPacket flush_pkt;
> > >>>  } QtrleContext;
> > >>>
> > >>>  #define CHECK_PIXEL_PTR(n)                                                            \
> > >>> @@ -454,11 +455,27 @@ static int qtrle_decode_frame(AVCodecContext *avctx,
> > >>>      int has_palette = 0;
> > >>>      int ret, size;
> > >>>
> > >>> +    if (!avpkt->data) {
> > >>> +        if (avctx->internal->need_flush) {
> > >>> +            avctx->internal->need_flush = 0;
> > >>> +            ret = ff_setup_buffered_frame_for_return(avctx, data, s->frame, &s->flush_pkt);
> > >>> +            if (ret < 0)
> > >>> +                return ret;
> > >>> +            *got_frame = 1;
> > >>> +        }
> > >>> +        return 0;
> > >>> +    }
> > >>> +    s->flush_pkt = *avpkt;
> > >>> +    s->frame->pkt_dts = s->flush_pkt.dts;
> > >>> +
> > >>>      bytestream2_init(&s->g, avpkt->data, avpkt->size);
> > >>>
> > >>>      /* check if this frame is even supposed to change */
> > >>> -    if (avpkt->size < 8)
> > >>> +    if (avpkt->size < 8) {
> > >>> +        avctx->internal->need_flush = 1;
> > >>>          return avpkt->size;
> > >>> +    }
> > >>> +    avctx->internal->need_flush = 0;
> > >>>
> > >>>      /* start after the chunk size */
> > >>>      size = bytestream2_get_be32(&s->g) & 0x3FFFFFFF;
> > >>> @@ -471,14 +488,18 @@ static int qtrle_decode_frame(AVCodecContext *avctx,
> > >>>
> > >>>      /* if a header is present, fetch additional decoding parameters */
> > >>>      if (header & 0x0008) {
> > >>> -        if (avpkt->size < 14)
> > >>> +        if (avpkt->size < 14) {
> > >>> +            avctx->internal->need_flush = 1;
> > >>>              return avpkt->size;
> > >>> +        }
> > >>>          start_line = bytestream2_get_be16(&s->g);
> > >>>          bytestream2_skip(&s->g, 2);
> > >>>          height     = bytestream2_get_be16(&s->g);
> > >>>          bytestream2_skip(&s->g, 2);
> > >>> -        if (height > s->avctx->height - start_line)
> > >>> +        if (height > s->avctx->height - start_line) {
> > >>> +            avctx->internal->need_flush = 1;
> > >>>              return avpkt->size;
> > >>> +        }
> > >>>      } else {
> > >>>          start_line = 0;
> > >>>          height     = s->avctx->height;
> > >>> @@ -572,5 +593,6 @@ AVCodec ff_qtrle_decoder = {
> > >>>      .init           = qtrle_decode_init,
> > >>>      .close          = qtrle_decode_end,
> > >>>      .decode         = qtrle_decode_frame,
> > >>> -    .capabilities   = AV_CODEC_CAP_DR1,
> > >>> +    .caps_internal  = FF_CODEC_CAP_SETS_PKT_DTS | FF_CODEC_CAP_SETS_PKT_POS,
> > >>> +    .capabilities   = AV_CODEC_CAP_DR1 | AV_CODEC_CAP_DELAY,
> > >>>  };
> > >>> diff --git a/tests/ref/fate/qtrle-8bit b/tests/ref/fate/qtrle-8bit
> > >>> index 27bb8aad71..39a03b7b6c 100644
> > >>> --- a/tests/ref/fate/qtrle-8bit
> > >>> +++ b/tests/ref/fate/qtrle-8bit
> > >>> @@ -61,3 +61,4 @@
> > >>>  0,        160,        160,        1,   921600, 0xcfd6ad2b
> > >>>  0,        163,        163,        1,   921600, 0x3b372379
> > >>>  0,        165,        165,        1,   921600, 0x36f245f5
> > >>> +0,        166,        166,        1,   921600, 0x36f245f5
> > >>
> > >> Following what i said in the nuv patch, do you still experience timeouts
> > >> with the current codebase, or even if you revert commit
> > >> a9dacdeea6168787a142209bd19fdd74aefc9dd6? Creating a reference to an
> > >> existing frame buffer shouldn't be expensive anymore for the fuzzer
> > >> after my ref counting changes to target_dec_fuzzer.c
> > >>
> > >> This is a very ugly solution to a problem that was already solved when
> > >> reference counting was introduced. Returning duplicate frames to achieve
> > >> cfr in decoders where it's expected shouldn't affect performance.
> > >
> > > Maybe i should ask this backward to make it clearer what this is trying
> > > to fix.
> > >
> > > Consider a patch that would return every frame twice with the same ref
> > > of course.
> > > Would you consider this to have 0 performance impact ?
> >
> > No, but it would be negligible.
>
> ok, lets see some actual numbers
>
> This comparission is about fixing ticket 7880 (just saying so we dont loose
> track of what we compare here)
>
> this qtrle patchset which fixes the last frame
> time ./ffmpeg -i clock.mov -vf scale=1920x1080 -qscale 2 -y test-new.avi
> real            0m0.233s
> user            0m0.404s
> sys             0m0.036s
> -rw-r----- 1 michael michael 769212 Aug 26 08:38 test-new.avi
> time ./ffmpeg -i test-new.avi -f null -
> real            0m0.095s
> user            0m0.131s
> sys             0m0.038s
>
> The alternative of reverting the code that discards duplicate frames
> (this i believe is what the people opposing this patchset here want)
> git revert  a9dacdeea6168787a142209bd19fdd74aefc9dd6
> time ./ffmpeg -i clock.mov -vf scale=1920x1080 -qscale 2 -y test.avi
> real            0m1.208s
> user            0m2.866s
> sys             0m0.168s
> -rw-r----- 1 michael michael 3274430 Aug 26 08:44 test.avi
> time ./ffmpeg -i test.avi -f null -
> real            0m0.185s
> user            0m0.685s
> sys             0m0.076s
>
> https://samples.ffmpeg.org/ffmpeg-bugs/trac/ticket7880/clock.mov
>
> As we can see the solution preferred by the opposition to this patchset
> will make the code 6 times slower and result in 4 times higher bitrate for
> the same quality.
> I would say this is not "negligible"
> you know this is not 0.6%, its not 6% its not 60% it is 600%
>
> I do not understand why we have a discussion here about this.
> Do we want 600% slower code ?
> Do our users want 600% slower code ?
>
> I do not want 600% slower code
>

Speed at the expense of correctness should not even be an argument.
You seem to not value at all the correctness of proper consistent CFR
output, while many of us others do.  The need for CFR output cannot be
explained away with performance numbers.

- Hendrik
Paul B Mahol Aug. 26, 2019, 7:51 a.m. UTC | #14
On Mon, Aug 26, 2019 at 9:37 AM Michael Niedermayer <michael@niedermayer.cc>
wrote:

> On Sun, Aug 25, 2019 at 11:43:42PM -0300, James Almer wrote:
> > On 8/25/2019 8:18 PM, James Almer wrote:
> > > On 8/25/2019 7:14 PM, Michael Niedermayer wrote:
> > >> On Sun, Aug 25, 2019 at 11:46:36PM +0200, Michael Niedermayer wrote:
> > >>> On Sun, Aug 25, 2019 at 01:22:22PM -0300, James Almer wrote:
> > >>>> On 8/24/2019 3:18 PM, Michael Niedermayer wrote:
> > >>>>> Fixes: Ticket7880
> > >>>>>
> > >>>>> Signed-off-by: Michael Niedermayer <michael@niedermayer.cc>
> > >>>>> ---
> > >>>>>  libavcodec/qtrle.c        | 30 ++++++++++++++++++++++++++----
> > >>>>>  tests/ref/fate/qtrle-8bit |  1 +
> > >>>>>  2 files changed, 27 insertions(+), 4 deletions(-)
> > >>>>>
> > >>>>> diff --git a/libavcodec/qtrle.c b/libavcodec/qtrle.c
> > >>>>> index 2c29547e5a..c22a1a582d 100644
> > >>>>> --- a/libavcodec/qtrle.c
> > >>>>> +++ b/libavcodec/qtrle.c
> > >>>>> @@ -45,6 +45,7 @@ typedef struct QtrleContext {
> > >>>>>
> > >>>>>      GetByteContext g;
> > >>>>>      uint32_t pal[256];
> > >>>>> +    AVPacket flush_pkt;
> > >>>>>  } QtrleContext;
> > >>>>>
> > >>>>>  #define CHECK_PIXEL_PTR(n)
>                     \
> > >>>>> @@ -454,11 +455,27 @@ static int qtrle_decode_frame(AVCodecContext
> *avctx,
> > >>>>>      int has_palette = 0;
> > >>>>>      int ret, size;
> > >>>>>
> > >>>>> +    if (!avpkt->data) {
> > >>>>> +        if (avctx->internal->need_flush) {
> > >>>>> +            avctx->internal->need_flush = 0;
> > >>>>> +            ret = ff_setup_buffered_frame_for_return(avctx, data,
> s->frame, &s->flush_pkt);
> > >>>>> +            if (ret < 0)
> > >>>>> +                return ret;
> > >>>>> +            *got_frame = 1;
> > >>>>> +        }
> > >>>>> +        return 0;
> > >>>>> +    }
> > >>>>> +    s->flush_pkt = *avpkt;
> > >>>>> +    s->frame->pkt_dts = s->flush_pkt.dts;
> > >>>>> +
> > >>>>>      bytestream2_init(&s->g, avpkt->data, avpkt->size);
> > >>>>>
> > >>>>>      /* check if this frame is even supposed to change */
> > >>>>> -    if (avpkt->size < 8)
> > >>>>> +    if (avpkt->size < 8) {
> > >>>>> +        avctx->internal->need_flush = 1;
> > >>>>>          return avpkt->size;
> > >>>>> +    }
> > >>>>> +    avctx->internal->need_flush = 0;
> > >>>>>
> > >>>>>      /* start after the chunk size */
> > >>>>>      size = bytestream2_get_be32(&s->g) & 0x3FFFFFFF;
> > >>>>> @@ -471,14 +488,18 @@ static int qtrle_decode_frame(AVCodecContext
> *avctx,
> > >>>>>
> > >>>>>      /* if a header is present, fetch additional decoding
> parameters */
> > >>>>>      if (header & 0x0008) {
> > >>>>> -        if (avpkt->size < 14)
> > >>>>> +        if (avpkt->size < 14) {
> > >>>>> +            avctx->internal->need_flush = 1;
> > >>>>>              return avpkt->size;
> > >>>>> +        }
> > >>>>>          start_line = bytestream2_get_be16(&s->g);
> > >>>>>          bytestream2_skip(&s->g, 2);
> > >>>>>          height     = bytestream2_get_be16(&s->g);
> > >>>>>          bytestream2_skip(&s->g, 2);
> > >>>>> -        if (height > s->avctx->height - start_line)
> > >>>>> +        if (height > s->avctx->height - start_line) {
> > >>>>> +            avctx->internal->need_flush = 1;
> > >>>>>              return avpkt->size;
> > >>>>> +        }
> > >>>>>      } else {
> > >>>>>          start_line = 0;
> > >>>>>          height     = s->avctx->height;
> > >>>>> @@ -572,5 +593,6 @@ AVCodec ff_qtrle_decoder = {
> > >>>>>      .init           = qtrle_decode_init,
> > >>>>>      .close          = qtrle_decode_end,
> > >>>>>      .decode         = qtrle_decode_frame,
> > >>>>> -    .capabilities   = AV_CODEC_CAP_DR1,
> > >>>>> +    .caps_internal  = FF_CODEC_CAP_SETS_PKT_DTS |
> FF_CODEC_CAP_SETS_PKT_POS,
> > >>>>> +    .capabilities   = AV_CODEC_CAP_DR1 | AV_CODEC_CAP_DELAY,
> > >>>>>  };
> > >>>>> diff --git a/tests/ref/fate/qtrle-8bit b/tests/ref/fate/qtrle-8bit
> > >>>>> index 27bb8aad71..39a03b7b6c 100644
> > >>>>> --- a/tests/ref/fate/qtrle-8bit
> > >>>>> +++ b/tests/ref/fate/qtrle-8bit
> > >>>>> @@ -61,3 +61,4 @@
> > >>>>>  0,        160,        160,        1,   921600, 0xcfd6ad2b
> > >>>>>  0,        163,        163,        1,   921600, 0x3b372379
> > >>>>>  0,        165,        165,        1,   921600, 0x36f245f5
> > >>>>> +0,        166,        166,        1,   921600, 0x36f245f5
> > >>>>
> > >>>> Following what i said in the nuv patch, do you still experience
> timeouts
> > >>>> with the current codebase, or even if you revert commit
> > >>>> a9dacdeea6168787a142209bd19fdd74aefc9dd6? Creating a reference to an
> > >>>> existing frame buffer shouldn't be expensive anymore for the fuzzer
> > >>>> after my ref counting changes to target_dec_fuzzer.c
> > >>>>
> > >>>> This is a very ugly solution to a problem that was already solved
> when
> > >>>> reference counting was introduced. Returning duplicate frames to
> achieve
> > >>>> cfr in decoders where it's expected shouldn't affect performance.
> > >>>
> > >>> Maybe i should ask this backward to make it clearer what this is
> trying
> > >>> to fix.
> > >>>
> > >>> Consider a patch that would return every frame twice with the same
> ref
> > >>> of course.
> > >>> Would you consider this to have 0 performance impact ?
> > >>> if every filter following needs to process frames twice 2x CPU load
> > >>> and after the filters references would also not be the same anymore
> > >>> the following encoder would encoder 2x as many frames 2x CPU load,
> > >>> bigger file lower quality per bitrate. Also playback of the resulting
> > >>> file would require more cpu time as it has more frames.
> > >>>
> > >>> I think that would be considered a very bad patch for its performance
> > >>> impact.
> > >>> So if we do the opposite of removing duplicates why is this so
> > >>> controversal ?
> > >>>
> > >>> This is not about the fuzzer at all ...
> > >>
> > >> Also about the implementation itself.
> > >> This can of course be done in countless other ways
> > >> for example one can probably detect the duplicate ref somewhere in
> common
> > >> code and then optionally drop the frames.
> > >
> > > This is one of the suggestions i made in the email sent a few minutes
> > > ago, yes. Based on a user set option, either dropping the frames in
> > > generic code by flagging them as discard
> >
> > Of course, this has the same issue as the regression this patch is
> > trying to solve. The last frame, being a duplicate, would also be
> > discarded by the generic code.
>
> yes, so we need to keep a reference to the last frame and return that at
> the end.
> can certainly be done, i do not think it will get any other reaction
> than a single line "NAK".
> I mean if a single added variable to common code is rejected, this will
> need more changes in common code and API
> we need to first find a clear consensus what to do before its done
> so whoever does it doesnt waste his time ...
>


Sure, there is other very large opposition but they are as usual very
silent.
Do you remember your fraps codec change which broke it in same way as qtrle.
You obviously cant remember that deep in past.

If you so care for speed, just abort immediately - no need to decode
anything.




>
> Thanks
>
> [...]
> --
> Michael     GnuPG fingerprint: 9FF2128B147EF6730BADF133611EC787040B0FAB
>
> Many that live deserve death. And some that die deserve life. Can you give
> it to them? Then do not be too eager to deal out death in judgement. For
> even the very wise cannot see all ends. -- Gandalf
> _______________________________________________
> ffmpeg-devel mailing list
> ffmpeg-devel@ffmpeg.org
> https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
>
> To unsubscribe, visit link above, or email
> ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".
Paul B Mahol Aug. 26, 2019, 8:28 a.m. UTC | #15
On Mon, Aug 26, 2019 at 9:51 AM Paul B Mahol <onemda@gmail.com> wrote:

>
>
> On Mon, Aug 26, 2019 at 9:37 AM Michael Niedermayer <michael@niedermayer.cc>
> wrote:
>
>> On Sun, Aug 25, 2019 at 11:43:42PM -0300, James Almer wrote:
>> > On 8/25/2019 8:18 PM, James Almer wrote:
>> > > On 8/25/2019 7:14 PM, Michael Niedermayer wrote:
>> > >> On Sun, Aug 25, 2019 at 11:46:36PM +0200, Michael Niedermayer wrote:
>> > >>> On Sun, Aug 25, 2019 at 01:22:22PM -0300, James Almer wrote:
>> > >>>> On 8/24/2019 3:18 PM, Michael Niedermayer wrote:
>> > >>>>> Fixes: Ticket7880
>> > >>>>>
>> > >>>>> Signed-off-by: Michael Niedermayer <michael@niedermayer.cc>
>> > >>>>> ---
>> > >>>>>  libavcodec/qtrle.c        | 30 ++++++++++++++++++++++++++----
>> > >>>>>  tests/ref/fate/qtrle-8bit |  1 +
>> > >>>>>  2 files changed, 27 insertions(+), 4 deletions(-)
>> > >>>>>
>> > >>>>> diff --git a/libavcodec/qtrle.c b/libavcodec/qtrle.c
>> > >>>>> index 2c29547e5a..c22a1a582d 100644
>> > >>>>> --- a/libavcodec/qtrle.c
>> > >>>>> +++ b/libavcodec/qtrle.c
>> > >>>>> @@ -45,6 +45,7 @@ typedef struct QtrleContext {
>> > >>>>>
>> > >>>>>      GetByteContext g;
>> > >>>>>      uint32_t pal[256];
>> > >>>>> +    AVPacket flush_pkt;
>> > >>>>>  } QtrleContext;
>> > >>>>>
>> > >>>>>  #define CHECK_PIXEL_PTR(n)
>>                       \
>> > >>>>> @@ -454,11 +455,27 @@ static int
>> qtrle_decode_frame(AVCodecContext *avctx,
>> > >>>>>      int has_palette = 0;
>> > >>>>>      int ret, size;
>> > >>>>>
>> > >>>>> +    if (!avpkt->data) {
>> > >>>>> +        if (avctx->internal->need_flush) {
>> > >>>>> +            avctx->internal->need_flush = 0;
>> > >>>>> +            ret = ff_setup_buffered_frame_for_return(avctx,
>> data, s->frame, &s->flush_pkt);
>> > >>>>> +            if (ret < 0)
>> > >>>>> +                return ret;
>> > >>>>> +            *got_frame = 1;
>> > >>>>> +        }
>> > >>>>> +        return 0;
>> > >>>>> +    }
>> > >>>>> +    s->flush_pkt = *avpkt;
>> > >>>>> +    s->frame->pkt_dts = s->flush_pkt.dts;
>> > >>>>> +
>> > >>>>>      bytestream2_init(&s->g, avpkt->data, avpkt->size);
>> > >>>>>
>> > >>>>>      /* check if this frame is even supposed to change */
>> > >>>>> -    if (avpkt->size < 8)
>> > >>>>> +    if (avpkt->size < 8) {
>> > >>>>> +        avctx->internal->need_flush = 1;
>> > >>>>>          return avpkt->size;
>> > >>>>> +    }
>> > >>>>> +    avctx->internal->need_flush = 0;
>> > >>>>>
>> > >>>>>      /* start after the chunk size */
>> > >>>>>      size = bytestream2_get_be32(&s->g) & 0x3FFFFFFF;
>> > >>>>> @@ -471,14 +488,18 @@ static int
>> qtrle_decode_frame(AVCodecContext *avctx,
>> > >>>>>
>> > >>>>>      /* if a header is present, fetch additional decoding
>> parameters */
>> > >>>>>      if (header & 0x0008) {
>> > >>>>> -        if (avpkt->size < 14)
>> > >>>>> +        if (avpkt->size < 14) {
>> > >>>>> +            avctx->internal->need_flush = 1;
>> > >>>>>              return avpkt->size;
>> > >>>>> +        }
>> > >>>>>          start_line = bytestream2_get_be16(&s->g);
>> > >>>>>          bytestream2_skip(&s->g, 2);
>> > >>>>>          height     = bytestream2_get_be16(&s->g);
>> > >>>>>          bytestream2_skip(&s->g, 2);
>> > >>>>> -        if (height > s->avctx->height - start_line)
>> > >>>>> +        if (height > s->avctx->height - start_line) {
>> > >>>>> +            avctx->internal->need_flush = 1;
>> > >>>>>              return avpkt->size;
>> > >>>>> +        }
>> > >>>>>      } else {
>> > >>>>>          start_line = 0;
>> > >>>>>          height     = s->avctx->height;
>> > >>>>> @@ -572,5 +593,6 @@ AVCodec ff_qtrle_decoder = {
>> > >>>>>      .init           = qtrle_decode_init,
>> > >>>>>      .close          = qtrle_decode_end,
>> > >>>>>      .decode         = qtrle_decode_frame,
>> > >>>>> -    .capabilities   = AV_CODEC_CAP_DR1,
>> > >>>>> +    .caps_internal  = FF_CODEC_CAP_SETS_PKT_DTS |
>> FF_CODEC_CAP_SETS_PKT_POS,
>> > >>>>> +    .capabilities   = AV_CODEC_CAP_DR1 | AV_CODEC_CAP_DELAY,
>> > >>>>>  };
>> > >>>>> diff --git a/tests/ref/fate/qtrle-8bit b/tests/ref/fate/qtrle-8bit
>> > >>>>> index 27bb8aad71..39a03b7b6c 100644
>> > >>>>> --- a/tests/ref/fate/qtrle-8bit
>> > >>>>> +++ b/tests/ref/fate/qtrle-8bit
>> > >>>>> @@ -61,3 +61,4 @@
>> > >>>>>  0,        160,        160,        1,   921600, 0xcfd6ad2b
>> > >>>>>  0,        163,        163,        1,   921600, 0x3b372379
>> > >>>>>  0,        165,        165,        1,   921600, 0x36f245f5
>> > >>>>> +0,        166,        166,        1,   921600, 0x36f245f5
>> > >>>>
>> > >>>> Following what i said in the nuv patch, do you still experience
>> timeouts
>> > >>>> with the current codebase, or even if you revert commit
>> > >>>> a9dacdeea6168787a142209bd19fdd74aefc9dd6? Creating a reference to
>> an
>> > >>>> existing frame buffer shouldn't be expensive anymore for the fuzzer
>> > >>>> after my ref counting changes to target_dec_fuzzer.c
>> > >>>>
>> > >>>> This is a very ugly solution to a problem that was already solved
>> when
>> > >>>> reference counting was introduced. Returning duplicate frames to
>> achieve
>> > >>>> cfr in decoders where it's expected shouldn't affect performance.
>> > >>>
>> > >>> Maybe i should ask this backward to make it clearer what this is
>> trying
>> > >>> to fix.
>> > >>>
>> > >>> Consider a patch that would return every frame twice with the same
>> ref
>> > >>> of course.
>> > >>> Would you consider this to have 0 performance impact ?
>> > >>> if every filter following needs to process frames twice 2x CPU load
>> > >>> and after the filters references would also not be the same anymore
>> > >>> the following encoder would encoder 2x as many frames 2x CPU load,
>> > >>> bigger file lower quality per bitrate. Also playback of the
>> resulting
>> > >>> file would require more cpu time as it has more frames.
>> > >>>
>> > >>> I think that would be considered a very bad patch for its
>> performance
>> > >>> impact.
>> > >>> So if we do the opposite of removing duplicates why is this so
>> > >>> controversal ?
>> > >>>
>> > >>> This is not about the fuzzer at all ...
>> > >>
>> > >> Also about the implementation itself.
>> > >> This can of course be done in countless other ways
>> > >> for example one can probably detect the duplicate ref somewhere in
>> common
>> > >> code and then optionally drop the frames.
>> > >
>> > > This is one of the suggestions i made in the email sent a few minutes
>> > > ago, yes. Based on a user set option, either dropping the frames in
>> > > generic code by flagging them as discard
>> >
>> > Of course, this has the same issue as the regression this patch is
>> > trying to solve. The last frame, being a duplicate, would also be
>> > discarded by the generic code.
>>
>> yes, so we need to keep a reference to the last frame and return that at
>> the end.
>> can certainly be done, i do not think it will get any other reaction
>> than a single line "NAK".
>> I mean if a single added variable to common code is rejected, this will
>> need more changes in common code and API
>> we need to first find a clear consensus what to do before its done
>> so whoever does it doesnt waste his time ...
>>
>
>
> Sure, there is other very large opposition but they are as usual very
> silent.
> Do you remember your fraps codec change which broke it in same way as
> qtrle.
>

Actually, it was not your change, but Reimar change. Which is little
puzzling why so much before all other changes that follow same agenda.

>
Michael Niedermayer Aug. 26, 2019, 9:09 a.m. UTC | #16
On Mon, Aug 26, 2019 at 09:51:45AM +0200, Hendrik Leppkes wrote:
> On Mon, Aug 26, 2019 at 9:33 AM Michael Niedermayer
> <michael@niedermayer.cc> wrote:
> >
> > On Sun, Aug 25, 2019 at 08:04:03PM -0300, James Almer wrote:
> > > On 8/25/2019 6:46 PM, Michael Niedermayer wrote:
> > > > On Sun, Aug 25, 2019 at 01:22:22PM -0300, James Almer wrote:
> > > >> On 8/24/2019 3:18 PM, Michael Niedermayer wrote:
> > > >>> Fixes: Ticket7880
> > > >>>
> > > >>> Signed-off-by: Michael Niedermayer <michael@niedermayer.cc>
> > > >>> ---
> > > >>>  libavcodec/qtrle.c        | 30 ++++++++++++++++++++++++++----
> > > >>>  tests/ref/fate/qtrle-8bit |  1 +
> > > >>>  2 files changed, 27 insertions(+), 4 deletions(-)
> > > >>>
> > > >>> diff --git a/libavcodec/qtrle.c b/libavcodec/qtrle.c
> > > >>> index 2c29547e5a..c22a1a582d 100644
> > > >>> --- a/libavcodec/qtrle.c
> > > >>> +++ b/libavcodec/qtrle.c
> > > >>> @@ -45,6 +45,7 @@ typedef struct QtrleContext {
> > > >>>
> > > >>>      GetByteContext g;
> > > >>>      uint32_t pal[256];
> > > >>> +    AVPacket flush_pkt;
> > > >>>  } QtrleContext;
> > > >>>
> > > >>>  #define CHECK_PIXEL_PTR(n)                                                            \
> > > >>> @@ -454,11 +455,27 @@ static int qtrle_decode_frame(AVCodecContext *avctx,
> > > >>>      int has_palette = 0;
> > > >>>      int ret, size;
> > > >>>
> > > >>> +    if (!avpkt->data) {
> > > >>> +        if (avctx->internal->need_flush) {
> > > >>> +            avctx->internal->need_flush = 0;
> > > >>> +            ret = ff_setup_buffered_frame_for_return(avctx, data, s->frame, &s->flush_pkt);
> > > >>> +            if (ret < 0)
> > > >>> +                return ret;
> > > >>> +            *got_frame = 1;
> > > >>> +        }
> > > >>> +        return 0;
> > > >>> +    }
> > > >>> +    s->flush_pkt = *avpkt;
> > > >>> +    s->frame->pkt_dts = s->flush_pkt.dts;
> > > >>> +
> > > >>>      bytestream2_init(&s->g, avpkt->data, avpkt->size);
> > > >>>
> > > >>>      /* check if this frame is even supposed to change */
> > > >>> -    if (avpkt->size < 8)
> > > >>> +    if (avpkt->size < 8) {
> > > >>> +        avctx->internal->need_flush = 1;
> > > >>>          return avpkt->size;
> > > >>> +    }
> > > >>> +    avctx->internal->need_flush = 0;
> > > >>>
> > > >>>      /* start after the chunk size */
> > > >>>      size = bytestream2_get_be32(&s->g) & 0x3FFFFFFF;
> > > >>> @@ -471,14 +488,18 @@ static int qtrle_decode_frame(AVCodecContext *avctx,
> > > >>>
> > > >>>      /* if a header is present, fetch additional decoding parameters */
> > > >>>      if (header & 0x0008) {
> > > >>> -        if (avpkt->size < 14)
> > > >>> +        if (avpkt->size < 14) {
> > > >>> +            avctx->internal->need_flush = 1;
> > > >>>              return avpkt->size;
> > > >>> +        }
> > > >>>          start_line = bytestream2_get_be16(&s->g);
> > > >>>          bytestream2_skip(&s->g, 2);
> > > >>>          height     = bytestream2_get_be16(&s->g);
> > > >>>          bytestream2_skip(&s->g, 2);
> > > >>> -        if (height > s->avctx->height - start_line)
> > > >>> +        if (height > s->avctx->height - start_line) {
> > > >>> +            avctx->internal->need_flush = 1;
> > > >>>              return avpkt->size;
> > > >>> +        }
> > > >>>      } else {
> > > >>>          start_line = 0;
> > > >>>          height     = s->avctx->height;
> > > >>> @@ -572,5 +593,6 @@ AVCodec ff_qtrle_decoder = {
> > > >>>      .init           = qtrle_decode_init,
> > > >>>      .close          = qtrle_decode_end,
> > > >>>      .decode         = qtrle_decode_frame,
> > > >>> -    .capabilities   = AV_CODEC_CAP_DR1,
> > > >>> +    .caps_internal  = FF_CODEC_CAP_SETS_PKT_DTS | FF_CODEC_CAP_SETS_PKT_POS,
> > > >>> +    .capabilities   = AV_CODEC_CAP_DR1 | AV_CODEC_CAP_DELAY,
> > > >>>  };
> > > >>> diff --git a/tests/ref/fate/qtrle-8bit b/tests/ref/fate/qtrle-8bit
> > > >>> index 27bb8aad71..39a03b7b6c 100644
> > > >>> --- a/tests/ref/fate/qtrle-8bit
> > > >>> +++ b/tests/ref/fate/qtrle-8bit
> > > >>> @@ -61,3 +61,4 @@
> > > >>>  0,        160,        160,        1,   921600, 0xcfd6ad2b
> > > >>>  0,        163,        163,        1,   921600, 0x3b372379
> > > >>>  0,        165,        165,        1,   921600, 0x36f245f5
> > > >>> +0,        166,        166,        1,   921600, 0x36f245f5
> > > >>
> > > >> Following what i said in the nuv patch, do you still experience timeouts
> > > >> with the current codebase, or even if you revert commit
> > > >> a9dacdeea6168787a142209bd19fdd74aefc9dd6? Creating a reference to an
> > > >> existing frame buffer shouldn't be expensive anymore for the fuzzer
> > > >> after my ref counting changes to target_dec_fuzzer.c
> > > >>
> > > >> This is a very ugly solution to a problem that was already solved when
> > > >> reference counting was introduced. Returning duplicate frames to achieve
> > > >> cfr in decoders where it's expected shouldn't affect performance.
> > > >
> > > > Maybe i should ask this backward to make it clearer what this is trying
> > > > to fix.
> > > >
> > > > Consider a patch that would return every frame twice with the same ref
> > > > of course.
> > > > Would you consider this to have 0 performance impact ?
> > >
> > > No, but it would be negligible.
> >
> > ok, lets see some actual numbers
> >
> > This comparission is about fixing ticket 7880 (just saying so we dont loose
> > track of what we compare here)
> >
> > this qtrle patchset which fixes the last frame
> > time ./ffmpeg -i clock.mov -vf scale=1920x1080 -qscale 2 -y test-new.avi
> > real            0m0.233s
> > user            0m0.404s
> > sys             0m0.036s
> > -rw-r----- 1 michael michael 769212 Aug 26 08:38 test-new.avi
> > time ./ffmpeg -i test-new.avi -f null -
> > real            0m0.095s
> > user            0m0.131s
> > sys             0m0.038s
> >
> > The alternative of reverting the code that discards duplicate frames
> > (this i believe is what the people opposing this patchset here want)
> > git revert  a9dacdeea6168787a142209bd19fdd74aefc9dd6
> > time ./ffmpeg -i clock.mov -vf scale=1920x1080 -qscale 2 -y test.avi
> > real            0m1.208s
> > user            0m2.866s
> > sys             0m0.168s
> > -rw-r----- 1 michael michael 3274430 Aug 26 08:44 test.avi
> > time ./ffmpeg -i test.avi -f null -
> > real            0m0.185s
> > user            0m0.685s
> > sys             0m0.076s
> >
> > https://samples.ffmpeg.org/ffmpeg-bugs/trac/ticket7880/clock.mov
> >
> > As we can see the solution preferred by the opposition to this patchset
> > will make the code 6 times slower and result in 4 times higher bitrate for
> > the same quality.
> > I would say this is not "negligible"
> > you know this is not 0.6%, its not 6% its not 60% it is 600%
> >
> > I do not understand why we have a discussion here about this.
> > Do we want 600% slower code ?
> > Do our users want 600% slower code ?
> >
> > I do not want 600% slower code
> >
> 
> Speed at the expense of correctness should not even be an argument.

and here we are back to square 0 because for all mainstream codecs
we drop duplicates and fail this correctness.
And noone wants to change that no matter which side of the argument one
is on.


> You seem to not value at all the correctness of proper consistent CFR
> output, while many of us others do.  The need for CFR output cannot be
> explained away with performance numbers.

being correct in the "CFR" sense and still being fast is possible, these
are not exclusive. 
But we lack consensus to go that direction as well as exactly how as there
are several possibilities

Thanks

[...]
Paul B Mahol Aug. 26, 2019, 10:21 a.m. UTC | #17
On Mon, Aug 26, 2019 at 11:09 AM Michael Niedermayer <michael@niedermayer.cc>
wrote:

> On Mon, Aug 26, 2019 at 09:51:45AM +0200, Hendrik Leppkes wrote:
> > On Mon, Aug 26, 2019 at 9:33 AM Michael Niedermayer
> > <michael@niedermayer.cc> wrote:
> > >
> > > On Sun, Aug 25, 2019 at 08:04:03PM -0300, James Almer wrote:
> > > > On 8/25/2019 6:46 PM, Michael Niedermayer wrote:
> > > > > On Sun, Aug 25, 2019 at 01:22:22PM -0300, James Almer wrote:
> > > > >> On 8/24/2019 3:18 PM, Michael Niedermayer wrote:
> > > > >>> Fixes: Ticket7880
> > > > >>>
> > > > >>> Signed-off-by: Michael Niedermayer <michael@niedermayer.cc>
> > > > >>> ---
> > > > >>>  libavcodec/qtrle.c        | 30 ++++++++++++++++++++++++++----
> > > > >>>  tests/ref/fate/qtrle-8bit |  1 +
> > > > >>>  2 files changed, 27 insertions(+), 4 deletions(-)
> > > > >>>
> > > > >>> diff --git a/libavcodec/qtrle.c b/libavcodec/qtrle.c
> > > > >>> index 2c29547e5a..c22a1a582d 100644
> > > > >>> --- a/libavcodec/qtrle.c
> > > > >>> +++ b/libavcodec/qtrle.c
> > > > >>> @@ -45,6 +45,7 @@ typedef struct QtrleContext {
> > > > >>>
> > > > >>>      GetByteContext g;
> > > > >>>      uint32_t pal[256];
> > > > >>> +    AVPacket flush_pkt;
> > > > >>>  } QtrleContext;
> > > > >>>
> > > > >>>  #define CHECK_PIXEL_PTR(n)
>                       \
> > > > >>> @@ -454,11 +455,27 @@ static int
> qtrle_decode_frame(AVCodecContext *avctx,
> > > > >>>      int has_palette = 0;
> > > > >>>      int ret, size;
> > > > >>>
> > > > >>> +    if (!avpkt->data) {
> > > > >>> +        if (avctx->internal->need_flush) {
> > > > >>> +            avctx->internal->need_flush = 0;
> > > > >>> +            ret = ff_setup_buffered_frame_for_return(avctx,
> data, s->frame, &s->flush_pkt);
> > > > >>> +            if (ret < 0)
> > > > >>> +                return ret;
> > > > >>> +            *got_frame = 1;
> > > > >>> +        }
> > > > >>> +        return 0;
> > > > >>> +    }
> > > > >>> +    s->flush_pkt = *avpkt;
> > > > >>> +    s->frame->pkt_dts = s->flush_pkt.dts;
> > > > >>> +
> > > > >>>      bytestream2_init(&s->g, avpkt->data, avpkt->size);
> > > > >>>
> > > > >>>      /* check if this frame is even supposed to change */
> > > > >>> -    if (avpkt->size < 8)
> > > > >>> +    if (avpkt->size < 8) {
> > > > >>> +        avctx->internal->need_flush = 1;
> > > > >>>          return avpkt->size;
> > > > >>> +    }
> > > > >>> +    avctx->internal->need_flush = 0;
> > > > >>>
> > > > >>>      /* start after the chunk size */
> > > > >>>      size = bytestream2_get_be32(&s->g) & 0x3FFFFFFF;
> > > > >>> @@ -471,14 +488,18 @@ static int
> qtrle_decode_frame(AVCodecContext *avctx,
> > > > >>>
> > > > >>>      /* if a header is present, fetch additional decoding
> parameters */
> > > > >>>      if (header & 0x0008) {
> > > > >>> -        if (avpkt->size < 14)
> > > > >>> +        if (avpkt->size < 14) {
> > > > >>> +            avctx->internal->need_flush = 1;
> > > > >>>              return avpkt->size;
> > > > >>> +        }
> > > > >>>          start_line = bytestream2_get_be16(&s->g);
> > > > >>>          bytestream2_skip(&s->g, 2);
> > > > >>>          height     = bytestream2_get_be16(&s->g);
> > > > >>>          bytestream2_skip(&s->g, 2);
> > > > >>> -        if (height > s->avctx->height - start_line)
> > > > >>> +        if (height > s->avctx->height - start_line) {
> > > > >>> +            avctx->internal->need_flush = 1;
> > > > >>>              return avpkt->size;
> > > > >>> +        }
> > > > >>>      } else {
> > > > >>>          start_line = 0;
> > > > >>>          height     = s->avctx->height;
> > > > >>> @@ -572,5 +593,6 @@ AVCodec ff_qtrle_decoder = {
> > > > >>>      .init           = qtrle_decode_init,
> > > > >>>      .close          = qtrle_decode_end,
> > > > >>>      .decode         = qtrle_decode_frame,
> > > > >>> -    .capabilities   = AV_CODEC_CAP_DR1,
> > > > >>> +    .caps_internal  = FF_CODEC_CAP_SETS_PKT_DTS |
> FF_CODEC_CAP_SETS_PKT_POS,
> > > > >>> +    .capabilities   = AV_CODEC_CAP_DR1 | AV_CODEC_CAP_DELAY,
> > > > >>>  };
> > > > >>> diff --git a/tests/ref/fate/qtrle-8bit
> b/tests/ref/fate/qtrle-8bit
> > > > >>> index 27bb8aad71..39a03b7b6c 100644
> > > > >>> --- a/tests/ref/fate/qtrle-8bit
> > > > >>> +++ b/tests/ref/fate/qtrle-8bit
> > > > >>> @@ -61,3 +61,4 @@
> > > > >>>  0,        160,        160,        1,   921600, 0xcfd6ad2b
> > > > >>>  0,        163,        163,        1,   921600, 0x3b372379
> > > > >>>  0,        165,        165,        1,   921600, 0x36f245f5
> > > > >>> +0,        166,        166,        1,   921600, 0x36f245f5
> > > > >>
> > > > >> Following what i said in the nuv patch, do you still experience
> timeouts
> > > > >> with the current codebase, or even if you revert commit
> > > > >> a9dacdeea6168787a142209bd19fdd74aefc9dd6? Creating a reference to
> an
> > > > >> existing frame buffer shouldn't be expensive anymore for the
> fuzzer
> > > > >> after my ref counting changes to target_dec_fuzzer.c
> > > > >>
> > > > >> This is a very ugly solution to a problem that was already solved
> when
> > > > >> reference counting was introduced. Returning duplicate frames to
> achieve
> > > > >> cfr in decoders where it's expected shouldn't affect performance.
> > > > >
> > > > > Maybe i should ask this backward to make it clearer what this is
> trying
> > > > > to fix.
> > > > >
> > > > > Consider a patch that would return every frame twice with the same
> ref
> > > > > of course.
> > > > > Would you consider this to have 0 performance impact ?
> > > >
> > > > No, but it would be negligible.
> > >
> > > ok, lets see some actual numbers
> > >
> > > This comparission is about fixing ticket 7880 (just saying so we dont
> loose
> > > track of what we compare here)
> > >
> > > this qtrle patchset which fixes the last frame
> > > time ./ffmpeg -i clock.mov -vf scale=1920x1080 -qscale 2 -y
> test-new.avi
> > > real            0m0.233s
> > > user            0m0.404s
> > > sys             0m0.036s
> > > -rw-r----- 1 michael michael 769212 Aug 26 08:38 test-new.avi
> > > time ./ffmpeg -i test-new.avi -f null -
> > > real            0m0.095s
> > > user            0m0.131s
> > > sys             0m0.038s
> > >
> > > The alternative of reverting the code that discards duplicate frames
> > > (this i believe is what the people opposing this patchset here want)
> > > git revert  a9dacdeea6168787a142209bd19fdd74aefc9dd6
> > > time ./ffmpeg -i clock.mov -vf scale=1920x1080 -qscale 2 -y test.avi
> > > real            0m1.208s
> > > user            0m2.866s
> > > sys             0m0.168s
> > > -rw-r----- 1 michael michael 3274430 Aug 26 08:44 test.avi
> > > time ./ffmpeg -i test.avi -f null -
> > > real            0m0.185s
> > > user            0m0.685s
> > > sys             0m0.076s
> > >
> > > https://samples.ffmpeg.org/ffmpeg-bugs/trac/ticket7880/clock.mov
> > >
> > > As we can see the solution preferred by the opposition to this patchset
> > > will make the code 6 times slower and result in 4 times higher bitrate
> for
> > > the same quality.
> > > I would say this is not "negligible"
> > > you know this is not 0.6%, its not 6% its not 60% it is 600%
> > >
> > > I do not understand why we have a discussion here about this.
> > > Do we want 600% slower code ?
> > > Do our users want 600% slower code ?
> > >
> > > I do not want 600% slower code
> > >
> >
> > Speed at the expense of correctness should not even be an argument.
>
> and here we are back to square 0 because for all mainstream codecs
> we drop duplicates and fail this correctness.
>

So you basically telling now that all mainstream codecs in FFmpeg do not
follow specification?



> And noone wants to change that no matter which side of the argument one
> is on.
>
>
> > You seem to not value at all the correctness of proper consistent CFR
> > output, while many of us others do.  The need for CFR output cannot be
> > explained away with performance numbers.
>
> being correct in the "CFR" sense and still being fast is possible, these
> are not exclusive.
> But we lack consensus to go that direction as well as exactly how as there
> are several possibilities
>
> Thanks
>
> [...]
>
> --
> Michael     GnuPG fingerprint: 9FF2128B147EF6730BADF133611EC787040B0FAB
>
> Those who are too smart to engage in politics are punished by being
> governed by those who are dumber. -- Plato
> _______________________________________________
> ffmpeg-devel mailing list
> ffmpeg-devel@ffmpeg.org
> https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
>
> To unsubscribe, visit link above, or email
> ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".
Hendrik Leppkes Aug. 26, 2019, 11:45 a.m. UTC | #18
On Mon, Aug 26, 2019 at 11:09 AM Michael Niedermayer
<michael@niedermayer.cc> wrote:
>
> On Mon, Aug 26, 2019 at 09:51:45AM +0200, Hendrik Leppkes wrote:
> > On Mon, Aug 26, 2019 at 9:33 AM Michael Niedermayer
> > <michael@niedermayer.cc> wrote:
> > >
> > > On Sun, Aug 25, 2019 at 08:04:03PM -0300, James Almer wrote:
> > > > On 8/25/2019 6:46 PM, Michael Niedermayer wrote:
> > > > > On Sun, Aug 25, 2019 at 01:22:22PM -0300, James Almer wrote:
> > > > >> On 8/24/2019 3:18 PM, Michael Niedermayer wrote:
> > > > >>> Fixes: Ticket7880
> > > > >>>
> > > > >>> Signed-off-by: Michael Niedermayer <michael@niedermayer.cc>
> > > > >>> ---
> > > > >>>  libavcodec/qtrle.c        | 30 ++++++++++++++++++++++++++----
> > > > >>>  tests/ref/fate/qtrle-8bit |  1 +
> > > > >>>  2 files changed, 27 insertions(+), 4 deletions(-)
> > > > >>>
> > > > >>> diff --git a/libavcodec/qtrle.c b/libavcodec/qtrle.c
> > > > >>> index 2c29547e5a..c22a1a582d 100644
> > > > >>> --- a/libavcodec/qtrle.c
> > > > >>> +++ b/libavcodec/qtrle.c
> > > > >>> @@ -45,6 +45,7 @@ typedef struct QtrleContext {
> > > > >>>
> > > > >>>      GetByteContext g;
> > > > >>>      uint32_t pal[256];
> > > > >>> +    AVPacket flush_pkt;
> > > > >>>  } QtrleContext;
> > > > >>>
> > > > >>>  #define CHECK_PIXEL_PTR(n)                                                            \
> > > > >>> @@ -454,11 +455,27 @@ static int qtrle_decode_frame(AVCodecContext *avctx,
> > > > >>>      int has_palette = 0;
> > > > >>>      int ret, size;
> > > > >>>
> > > > >>> +    if (!avpkt->data) {
> > > > >>> +        if (avctx->internal->need_flush) {
> > > > >>> +            avctx->internal->need_flush = 0;
> > > > >>> +            ret = ff_setup_buffered_frame_for_return(avctx, data, s->frame, &s->flush_pkt);
> > > > >>> +            if (ret < 0)
> > > > >>> +                return ret;
> > > > >>> +            *got_frame = 1;
> > > > >>> +        }
> > > > >>> +        return 0;
> > > > >>> +    }
> > > > >>> +    s->flush_pkt = *avpkt;
> > > > >>> +    s->frame->pkt_dts = s->flush_pkt.dts;
> > > > >>> +
> > > > >>>      bytestream2_init(&s->g, avpkt->data, avpkt->size);
> > > > >>>
> > > > >>>      /* check if this frame is even supposed to change */
> > > > >>> -    if (avpkt->size < 8)
> > > > >>> +    if (avpkt->size < 8) {
> > > > >>> +        avctx->internal->need_flush = 1;
> > > > >>>          return avpkt->size;
> > > > >>> +    }
> > > > >>> +    avctx->internal->need_flush = 0;
> > > > >>>
> > > > >>>      /* start after the chunk size */
> > > > >>>      size = bytestream2_get_be32(&s->g) & 0x3FFFFFFF;
> > > > >>> @@ -471,14 +488,18 @@ static int qtrle_decode_frame(AVCodecContext *avctx,
> > > > >>>
> > > > >>>      /* if a header is present, fetch additional decoding parameters */
> > > > >>>      if (header & 0x0008) {
> > > > >>> -        if (avpkt->size < 14)
> > > > >>> +        if (avpkt->size < 14) {
> > > > >>> +            avctx->internal->need_flush = 1;
> > > > >>>              return avpkt->size;
> > > > >>> +        }
> > > > >>>          start_line = bytestream2_get_be16(&s->g);
> > > > >>>          bytestream2_skip(&s->g, 2);
> > > > >>>          height     = bytestream2_get_be16(&s->g);
> > > > >>>          bytestream2_skip(&s->g, 2);
> > > > >>> -        if (height > s->avctx->height - start_line)
> > > > >>> +        if (height > s->avctx->height - start_line) {
> > > > >>> +            avctx->internal->need_flush = 1;
> > > > >>>              return avpkt->size;
> > > > >>> +        }
> > > > >>>      } else {
> > > > >>>          start_line = 0;
> > > > >>>          height     = s->avctx->height;
> > > > >>> @@ -572,5 +593,6 @@ AVCodec ff_qtrle_decoder = {
> > > > >>>      .init           = qtrle_decode_init,
> > > > >>>      .close          = qtrle_decode_end,
> > > > >>>      .decode         = qtrle_decode_frame,
> > > > >>> -    .capabilities   = AV_CODEC_CAP_DR1,
> > > > >>> +    .caps_internal  = FF_CODEC_CAP_SETS_PKT_DTS | FF_CODEC_CAP_SETS_PKT_POS,
> > > > >>> +    .capabilities   = AV_CODEC_CAP_DR1 | AV_CODEC_CAP_DELAY,
> > > > >>>  };
> > > > >>> diff --git a/tests/ref/fate/qtrle-8bit b/tests/ref/fate/qtrle-8bit
> > > > >>> index 27bb8aad71..39a03b7b6c 100644
> > > > >>> --- a/tests/ref/fate/qtrle-8bit
> > > > >>> +++ b/tests/ref/fate/qtrle-8bit
> > > > >>> @@ -61,3 +61,4 @@
> > > > >>>  0,        160,        160,        1,   921600, 0xcfd6ad2b
> > > > >>>  0,        163,        163,        1,   921600, 0x3b372379
> > > > >>>  0,        165,        165,        1,   921600, 0x36f245f5
> > > > >>> +0,        166,        166,        1,   921600, 0x36f245f5
> > > > >>
> > > > >> Following what i said in the nuv patch, do you still experience timeouts
> > > > >> with the current codebase, or even if you revert commit
> > > > >> a9dacdeea6168787a142209bd19fdd74aefc9dd6? Creating a reference to an
> > > > >> existing frame buffer shouldn't be expensive anymore for the fuzzer
> > > > >> after my ref counting changes to target_dec_fuzzer.c
> > > > >>
> > > > >> This is a very ugly solution to a problem that was already solved when
> > > > >> reference counting was introduced. Returning duplicate frames to achieve
> > > > >> cfr in decoders where it's expected shouldn't affect performance.
> > > > >
> > > > > Maybe i should ask this backward to make it clearer what this is trying
> > > > > to fix.
> > > > >
> > > > > Consider a patch that would return every frame twice with the same ref
> > > > > of course.
> > > > > Would you consider this to have 0 performance impact ?
> > > >
> > > > No, but it would be negligible.
> > >
> > > ok, lets see some actual numbers
> > >
> > > This comparission is about fixing ticket 7880 (just saying so we dont loose
> > > track of what we compare here)
> > >
> > > this qtrle patchset which fixes the last frame
> > > time ./ffmpeg -i clock.mov -vf scale=1920x1080 -qscale 2 -y test-new.avi
> > > real            0m0.233s
> > > user            0m0.404s
> > > sys             0m0.036s
> > > -rw-r----- 1 michael michael 769212 Aug 26 08:38 test-new.avi
> > > time ./ffmpeg -i test-new.avi -f null -
> > > real            0m0.095s
> > > user            0m0.131s
> > > sys             0m0.038s
> > >
> > > The alternative of reverting the code that discards duplicate frames
> > > (this i believe is what the people opposing this patchset here want)
> > > git revert  a9dacdeea6168787a142209bd19fdd74aefc9dd6
> > > time ./ffmpeg -i clock.mov -vf scale=1920x1080 -qscale 2 -y test.avi
> > > real            0m1.208s
> > > user            0m2.866s
> > > sys             0m0.168s
> > > -rw-r----- 1 michael michael 3274430 Aug 26 08:44 test.avi
> > > time ./ffmpeg -i test.avi -f null -
> > > real            0m0.185s
> > > user            0m0.685s
> > > sys             0m0.076s
> > >
> > > https://samples.ffmpeg.org/ffmpeg-bugs/trac/ticket7880/clock.mov
> > >
> > > As we can see the solution preferred by the opposition to this patchset
> > > will make the code 6 times slower and result in 4 times higher bitrate for
> > > the same quality.
> > > I would say this is not "negligible"
> > > you know this is not 0.6%, its not 6% its not 60% it is 600%
> > >
> > > I do not understand why we have a discussion here about this.
> > > Do we want 600% slower code ?
> > > Do our users want 600% slower code ?
> > >
> > > I do not want 600% slower code
> > >
> >
> > Speed at the expense of correctness should not even be an argument.
>
> and here we are back to square 0 because for all mainstream codecs
> we drop duplicates and fail this correctness.
> And noone wants to change that no matter which side of the argument one
> is on.

You claim this is the same thing, but it really is not. If I encode a
static scene in h264 CFR, I get X frames in the bitstream, and avcodec
gives me X frames on the output, even if every single frame is
perfectly identical.
If I do the same with QTRLE, or whichever other codec you've already
been to, I get X frames in the encoded bitstream, and only Y (Y < X)
on the decoder output (at worst, 1 only).

Every encoded frame should produce a decoded frame, and it should not
be up to the decoder to decide to drop them.
H264 repeats are not frames in the bitstream, they are metadata. They
do not have a timestamp that is being lost, they did not belong to a
distinct input packet. And we convey this metadata to the user, so he
can decide to act on it.

How can you not see this huge gap? This behavior in these decoders
actively loses data. The H264 decoder does not lose any data.

>
>
> > You seem to not value at all the correctness of proper consistent CFR
> > output, while many of us others do.  The need for CFR output cannot be
> > explained away with performance numbers.
>
> being correct in the "CFR" sense and still being fast is possible, these
> are not exclusive.
> But we lack consensus to go that direction as well as exactly how as there
> are several possibilities
>

I don't really recall many people arguing against keeping CFR, beyond
yourself of course.

- Hendrik
Hendrik Leppkes Aug. 26, 2019, 2:35 p.m. UTC | #19
On Mon, Aug 26, 2019 at 1:18 AM James Almer <jamrial@gmail.com> wrote:
>
> On 8/25/2019 7:14 PM, Michael Niedermayer wrote:
> > On Sun, Aug 25, 2019 at 11:46:36PM +0200, Michael Niedermayer wrote:
> >> On Sun, Aug 25, 2019 at 01:22:22PM -0300, James Almer wrote:
> >>> On 8/24/2019 3:18 PM, Michael Niedermayer wrote:
> >>>> Fixes: Ticket7880
> >>>>
> >>>> Signed-off-by: Michael Niedermayer <michael@niedermayer.cc>
> >>>> ---
> >>>>  libavcodec/qtrle.c        | 30 ++++++++++++++++++++++++++----
> >>>>  tests/ref/fate/qtrle-8bit |  1 +
> >>>>  2 files changed, 27 insertions(+), 4 deletions(-)
> >>>>
> >>>> diff --git a/libavcodec/qtrle.c b/libavcodec/qtrle.c
> >>>> index 2c29547e5a..c22a1a582d 100644
> >>>> --- a/libavcodec/qtrle.c
> >>>> +++ b/libavcodec/qtrle.c
> >>>> @@ -45,6 +45,7 @@ typedef struct QtrleContext {
> >>>>
> >>>>      GetByteContext g;
> >>>>      uint32_t pal[256];
> >>>> +    AVPacket flush_pkt;
> >>>>  } QtrleContext;
> >>>>
> >>>>  #define CHECK_PIXEL_PTR(n)                                                            \
> >>>> @@ -454,11 +455,27 @@ static int qtrle_decode_frame(AVCodecContext *avctx,
> >>>>      int has_palette = 0;
> >>>>      int ret, size;
> >>>>
> >>>> +    if (!avpkt->data) {
> >>>> +        if (avctx->internal->need_flush) {
> >>>> +            avctx->internal->need_flush = 0;
> >>>> +            ret = ff_setup_buffered_frame_for_return(avctx, data, s->frame, &s->flush_pkt);
> >>>> +            if (ret < 0)
> >>>> +                return ret;
> >>>> +            *got_frame = 1;
> >>>> +        }
> >>>> +        return 0;
> >>>> +    }
> >>>> +    s->flush_pkt = *avpkt;
> >>>> +    s->frame->pkt_dts = s->flush_pkt.dts;
> >>>> +
> >>>>      bytestream2_init(&s->g, avpkt->data, avpkt->size);
> >>>>
> >>>>      /* check if this frame is even supposed to change */
> >>>> -    if (avpkt->size < 8)
> >>>> +    if (avpkt->size < 8) {
> >>>> +        avctx->internal->need_flush = 1;
> >>>>          return avpkt->size;
> >>>> +    }
> >>>> +    avctx->internal->need_flush = 0;
> >>>>
> >>>>      /* start after the chunk size */
> >>>>      size = bytestream2_get_be32(&s->g) & 0x3FFFFFFF;
> >>>> @@ -471,14 +488,18 @@ static int qtrle_decode_frame(AVCodecContext *avctx,
> >>>>
> >>>>      /* if a header is present, fetch additional decoding parameters */
> >>>>      if (header & 0x0008) {
> >>>> -        if (avpkt->size < 14)
> >>>> +        if (avpkt->size < 14) {
> >>>> +            avctx->internal->need_flush = 1;
> >>>>              return avpkt->size;
> >>>> +        }
> >>>>          start_line = bytestream2_get_be16(&s->g);
> >>>>          bytestream2_skip(&s->g, 2);
> >>>>          height     = bytestream2_get_be16(&s->g);
> >>>>          bytestream2_skip(&s->g, 2);
> >>>> -        if (height > s->avctx->height - start_line)
> >>>> +        if (height > s->avctx->height - start_line) {
> >>>> +            avctx->internal->need_flush = 1;
> >>>>              return avpkt->size;
> >>>> +        }
> >>>>      } else {
> >>>>          start_line = 0;
> >>>>          height     = s->avctx->height;
> >>>> @@ -572,5 +593,6 @@ AVCodec ff_qtrle_decoder = {
> >>>>      .init           = qtrle_decode_init,
> >>>>      .close          = qtrle_decode_end,
> >>>>      .decode         = qtrle_decode_frame,
> >>>> -    .capabilities   = AV_CODEC_CAP_DR1,
> >>>> +    .caps_internal  = FF_CODEC_CAP_SETS_PKT_DTS | FF_CODEC_CAP_SETS_PKT_POS,
> >>>> +    .capabilities   = AV_CODEC_CAP_DR1 | AV_CODEC_CAP_DELAY,
> >>>>  };
> >>>> diff --git a/tests/ref/fate/qtrle-8bit b/tests/ref/fate/qtrle-8bit
> >>>> index 27bb8aad71..39a03b7b6c 100644
> >>>> --- a/tests/ref/fate/qtrle-8bit
> >>>> +++ b/tests/ref/fate/qtrle-8bit
> >>>> @@ -61,3 +61,4 @@
> >>>>  0,        160,        160,        1,   921600, 0xcfd6ad2b
> >>>>  0,        163,        163,        1,   921600, 0x3b372379
> >>>>  0,        165,        165,        1,   921600, 0x36f245f5
> >>>> +0,        166,        166,        1,   921600, 0x36f245f5
> >>>
> >>> Following what i said in the nuv patch, do you still experience timeouts
> >>> with the current codebase, or even if you revert commit
> >>> a9dacdeea6168787a142209bd19fdd74aefc9dd6? Creating a reference to an
> >>> existing frame buffer shouldn't be expensive anymore for the fuzzer
> >>> after my ref counting changes to target_dec_fuzzer.c
> >>>
> >>> This is a very ugly solution to a problem that was already solved when
> >>> reference counting was introduced. Returning duplicate frames to achieve
> >>> cfr in decoders where it's expected shouldn't affect performance.
> >>
> >> Maybe i should ask this backward to make it clearer what this is trying
> >> to fix.
> >>
> >> Consider a patch that would return every frame twice with the same ref
> >> of course.
> >> Would you consider this to have 0 performance impact ?
> >> if every filter following needs to process frames twice 2x CPU load
> >> and after the filters references would also not be the same anymore
> >> the following encoder would encoder 2x as many frames 2x CPU load,
> >> bigger file lower quality per bitrate. Also playback of the resulting
> >> file would require more cpu time as it has more frames.
> >>
> >> I think that would be considered a very bad patch for its performance
> >> impact.
> >> So if we do the opposite of removing duplicates why is this so
> >> controversal ?
> >>
> >> This is not about the fuzzer at all ...
> >
> > Also about the implementation itself.
> > This can of course be done in countless other ways
> > for example one can probably detect the duplicate ref somewhere in common
> > code and then optionally drop the frames.
>
> This is one of the suggestions i made in the email sent a few minutes
> ago, yes. Based on a user set option, either dropping the frames in
> generic code by flagging them as discard, or flagging them as
> "disposable" and letting the library user (external applications, ffmpeg
> cli, libavfilter, etc) decide what to do with them.
>

Flagging identical repeat frames as "disposable" seems like a good
idea to me. "discard" imho doesn't fit, since it has a specific
meaning of "should be discarded", while the semantics of "disposable"
would fit this use-case (ie. this frame is valid and you can keep it,
but you could also drop it if you favor performance).

- Hendrik
James Almer Aug. 26, 2019, 2:41 p.m. UTC | #20
On 8/26/2019 4:32 AM, Michael Niedermayer wrote:
> On Sun, Aug 25, 2019 at 08:04:03PM -0300, James Almer wrote:
>> On 8/25/2019 6:46 PM, Michael Niedermayer wrote:
>>> On Sun, Aug 25, 2019 at 01:22:22PM -0300, James Almer wrote:
>>>> On 8/24/2019 3:18 PM, Michael Niedermayer wrote:
>>>>> Fixes: Ticket7880
>>>>>
>>>>> Signed-off-by: Michael Niedermayer <michael@niedermayer.cc>
>>>>> ---
>>>>>  libavcodec/qtrle.c        | 30 ++++++++++++++++++++++++++----
>>>>>  tests/ref/fate/qtrle-8bit |  1 +
>>>>>  2 files changed, 27 insertions(+), 4 deletions(-)
>>>>>
>>>>> diff --git a/libavcodec/qtrle.c b/libavcodec/qtrle.c
>>>>> index 2c29547e5a..c22a1a582d 100644
>>>>> --- a/libavcodec/qtrle.c
>>>>> +++ b/libavcodec/qtrle.c
>>>>> @@ -45,6 +45,7 @@ typedef struct QtrleContext {
>>>>>  
>>>>>      GetByteContext g;
>>>>>      uint32_t pal[256];
>>>>> +    AVPacket flush_pkt;
>>>>>  } QtrleContext;
>>>>>  
>>>>>  #define CHECK_PIXEL_PTR(n)                                                            \
>>>>> @@ -454,11 +455,27 @@ static int qtrle_decode_frame(AVCodecContext *avctx,
>>>>>      int has_palette = 0;
>>>>>      int ret, size;
>>>>>  
>>>>> +    if (!avpkt->data) {
>>>>> +        if (avctx->internal->need_flush) {
>>>>> +            avctx->internal->need_flush = 0;
>>>>> +            ret = ff_setup_buffered_frame_for_return(avctx, data, s->frame, &s->flush_pkt);
>>>>> +            if (ret < 0)
>>>>> +                return ret;
>>>>> +            *got_frame = 1;
>>>>> +        }
>>>>> +        return 0;
>>>>> +    }
>>>>> +    s->flush_pkt = *avpkt;
>>>>> +    s->frame->pkt_dts = s->flush_pkt.dts;
>>>>> +
>>>>>      bytestream2_init(&s->g, avpkt->data, avpkt->size);
>>>>>  
>>>>>      /* check if this frame is even supposed to change */
>>>>> -    if (avpkt->size < 8)
>>>>> +    if (avpkt->size < 8) {
>>>>> +        avctx->internal->need_flush = 1;
>>>>>          return avpkt->size;
>>>>> +    }
>>>>> +    avctx->internal->need_flush = 0;
>>>>>  
>>>>>      /* start after the chunk size */
>>>>>      size = bytestream2_get_be32(&s->g) & 0x3FFFFFFF;
>>>>> @@ -471,14 +488,18 @@ static int qtrle_decode_frame(AVCodecContext *avctx,
>>>>>  
>>>>>      /* if a header is present, fetch additional decoding parameters */
>>>>>      if (header & 0x0008) {
>>>>> -        if (avpkt->size < 14)
>>>>> +        if (avpkt->size < 14) {
>>>>> +            avctx->internal->need_flush = 1;
>>>>>              return avpkt->size;
>>>>> +        }
>>>>>          start_line = bytestream2_get_be16(&s->g);
>>>>>          bytestream2_skip(&s->g, 2);
>>>>>          height     = bytestream2_get_be16(&s->g);
>>>>>          bytestream2_skip(&s->g, 2);
>>>>> -        if (height > s->avctx->height - start_line)
>>>>> +        if (height > s->avctx->height - start_line) {
>>>>> +            avctx->internal->need_flush = 1;
>>>>>              return avpkt->size;
>>>>> +        }
>>>>>      } else {
>>>>>          start_line = 0;
>>>>>          height     = s->avctx->height;
>>>>> @@ -572,5 +593,6 @@ AVCodec ff_qtrle_decoder = {
>>>>>      .init           = qtrle_decode_init,
>>>>>      .close          = qtrle_decode_end,
>>>>>      .decode         = qtrle_decode_frame,
>>>>> -    .capabilities   = AV_CODEC_CAP_DR1,
>>>>> +    .caps_internal  = FF_CODEC_CAP_SETS_PKT_DTS | FF_CODEC_CAP_SETS_PKT_POS,
>>>>> +    .capabilities   = AV_CODEC_CAP_DR1 | AV_CODEC_CAP_DELAY,
>>>>>  };
>>>>> diff --git a/tests/ref/fate/qtrle-8bit b/tests/ref/fate/qtrle-8bit
>>>>> index 27bb8aad71..39a03b7b6c 100644
>>>>> --- a/tests/ref/fate/qtrle-8bit
>>>>> +++ b/tests/ref/fate/qtrle-8bit
>>>>> @@ -61,3 +61,4 @@
>>>>>  0,        160,        160,        1,   921600, 0xcfd6ad2b
>>>>>  0,        163,        163,        1,   921600, 0x3b372379
>>>>>  0,        165,        165,        1,   921600, 0x36f245f5
>>>>> +0,        166,        166,        1,   921600, 0x36f245f5
>>>>
>>>> Following what i said in the nuv patch, do you still experience timeouts
>>>> with the current codebase, or even if you revert commit
>>>> a9dacdeea6168787a142209bd19fdd74aefc9dd6? Creating a reference to an
>>>> existing frame buffer shouldn't be expensive anymore for the fuzzer
>>>> after my ref counting changes to target_dec_fuzzer.c
>>>>
>>>> This is a very ugly solution to a problem that was already solved when
>>>> reference counting was introduced. Returning duplicate frames to achieve
>>>> cfr in decoders where it's expected shouldn't affect performance.
>>>
>>> Maybe i should ask this backward to make it clearer what this is trying
>>> to fix.
>>>
>>> Consider a patch that would return every frame twice with the same ref
>>> of course.
>>> Would you consider this to have 0 performance impact ?
>>
>> No, but it would be negligible.
> 
> ok, lets see some actual numbers
> 
> This comparission is about fixing ticket 7880 (just saying so we dont loose
> track of what we compare here)
> 
> this qtrle patchset which fixes the last frame
> time ./ffmpeg -i clock.mov -vf scale=1920x1080 -qscale 2 -y test-new.avi
> real            0m0.233s
> user            0m0.404s
> sys             0m0.036s
> -rw-r----- 1 michael michael 769212 Aug 26 08:38 test-new.avi
> time ./ffmpeg -i test-new.avi -f null -
> real            0m0.095s
> user            0m0.131s
> sys             0m0.038s
> 
> The alternative of reverting the code that discards duplicate frames
> (this i believe is what the people opposing this patchset here want)
> git revert  a9dacdeea6168787a142209bd19fdd74aefc9dd6
> time ./ffmpeg -i clock.mov -vf scale=1920x1080 -qscale 2 -y test.avi
> real            0m1.208s
> user            0m2.866s
> sys             0m0.168s
> -rw-r----- 1 michael michael 3274430 Aug 26 08:44 test.avi
> time ./ffmpeg -i test.avi -f null -
> real            0m0.185s
> user            0m0.685s
> sys             0m0.076s
> 
> https://samples.ffmpeg.org/ffmpeg-bugs/trac/ticket7880/clock.mov
> 
> As we can see the solution preferred by the opposition to this patchset
> will make the code 6 times slower and result in 4 times higher bitrate for
> the same quality.
> I would say this is not "negligible"
> you know this is not 0.6%, its not 6% its not 60% it is 600%
> 
> I do not understand why we have a discussion here about this.
> Do we want 600% slower code ?
> Do our users want 600% slower code ?
> 
> I do not want 600% slower code

I was talking about the decoder outputting the frames would have
negligible performance effect. It's after all generating a new reference
to an existing buffer. I never said passing 100 frames instead of 20 to
libavfilter and expect it to process them all would be just as fast,
because that's obviously not going to happen.

By flagging frames as "disposable" however, libavfilter (or any library
user) could drop them on sight, achieving the speed up you mentioned. Or
by flagging them as discard at the user's request, the generic
libavcodec would never propagate them (aka, generate the current forced
vfr behavior post a9dacdeea6).
I'm fine with either solution, but i want other people's opinion. One
requires a new frame flag and no extra work in generic lavc code (what
to do with them is up to the user), whereas the other will require a new
lavc option or avctx flag to let the user request dropping these frames.

libavfilter is uncharted land for me, btw, so i have no idea how to have
it look for the aforementioned disposable flag and properly drop them
without disturbing the filtering process. But perhaps that should be the
job of whatever calls libavfilter instead.
James Almer Aug. 26, 2019, 4:25 p.m. UTC | #21
On 8/26/2019 11:35 AM, Hendrik Leppkes wrote:
> On Mon, Aug 26, 2019 at 1:18 AM James Almer <jamrial@gmail.com> wrote:
>>
>> On 8/25/2019 7:14 PM, Michael Niedermayer wrote:
>>> On Sun, Aug 25, 2019 at 11:46:36PM +0200, Michael Niedermayer wrote:
>>>> On Sun, Aug 25, 2019 at 01:22:22PM -0300, James Almer wrote:
>>>>> On 8/24/2019 3:18 PM, Michael Niedermayer wrote:
>>>>>> Fixes: Ticket7880
>>>>>>
>>>>>> Signed-off-by: Michael Niedermayer <michael@niedermayer.cc>
>>>>>> ---
>>>>>>  libavcodec/qtrle.c        | 30 ++++++++++++++++++++++++++----
>>>>>>  tests/ref/fate/qtrle-8bit |  1 +
>>>>>>  2 files changed, 27 insertions(+), 4 deletions(-)
>>>>>>
>>>>>> diff --git a/libavcodec/qtrle.c b/libavcodec/qtrle.c
>>>>>> index 2c29547e5a..c22a1a582d 100644
>>>>>> --- a/libavcodec/qtrle.c
>>>>>> +++ b/libavcodec/qtrle.c
>>>>>> @@ -45,6 +45,7 @@ typedef struct QtrleContext {
>>>>>>
>>>>>>      GetByteContext g;
>>>>>>      uint32_t pal[256];
>>>>>> +    AVPacket flush_pkt;
>>>>>>  } QtrleContext;
>>>>>>
>>>>>>  #define CHECK_PIXEL_PTR(n)                                                            \
>>>>>> @@ -454,11 +455,27 @@ static int qtrle_decode_frame(AVCodecContext *avctx,
>>>>>>      int has_palette = 0;
>>>>>>      int ret, size;
>>>>>>
>>>>>> +    if (!avpkt->data) {
>>>>>> +        if (avctx->internal->need_flush) {
>>>>>> +            avctx->internal->need_flush = 0;
>>>>>> +            ret = ff_setup_buffered_frame_for_return(avctx, data, s->frame, &s->flush_pkt);
>>>>>> +            if (ret < 0)
>>>>>> +                return ret;
>>>>>> +            *got_frame = 1;
>>>>>> +        }
>>>>>> +        return 0;
>>>>>> +    }
>>>>>> +    s->flush_pkt = *avpkt;
>>>>>> +    s->frame->pkt_dts = s->flush_pkt.dts;
>>>>>> +
>>>>>>      bytestream2_init(&s->g, avpkt->data, avpkt->size);
>>>>>>
>>>>>>      /* check if this frame is even supposed to change */
>>>>>> -    if (avpkt->size < 8)
>>>>>> +    if (avpkt->size < 8) {
>>>>>> +        avctx->internal->need_flush = 1;
>>>>>>          return avpkt->size;
>>>>>> +    }
>>>>>> +    avctx->internal->need_flush = 0;
>>>>>>
>>>>>>      /* start after the chunk size */
>>>>>>      size = bytestream2_get_be32(&s->g) & 0x3FFFFFFF;
>>>>>> @@ -471,14 +488,18 @@ static int qtrle_decode_frame(AVCodecContext *avctx,
>>>>>>
>>>>>>      /* if a header is present, fetch additional decoding parameters */
>>>>>>      if (header & 0x0008) {
>>>>>> -        if (avpkt->size < 14)
>>>>>> +        if (avpkt->size < 14) {
>>>>>> +            avctx->internal->need_flush = 1;
>>>>>>              return avpkt->size;
>>>>>> +        }
>>>>>>          start_line = bytestream2_get_be16(&s->g);
>>>>>>          bytestream2_skip(&s->g, 2);
>>>>>>          height     = bytestream2_get_be16(&s->g);
>>>>>>          bytestream2_skip(&s->g, 2);
>>>>>> -        if (height > s->avctx->height - start_line)
>>>>>> +        if (height > s->avctx->height - start_line) {
>>>>>> +            avctx->internal->need_flush = 1;
>>>>>>              return avpkt->size;
>>>>>> +        }
>>>>>>      } else {
>>>>>>          start_line = 0;
>>>>>>          height     = s->avctx->height;
>>>>>> @@ -572,5 +593,6 @@ AVCodec ff_qtrle_decoder = {
>>>>>>      .init           = qtrle_decode_init,
>>>>>>      .close          = qtrle_decode_end,
>>>>>>      .decode         = qtrle_decode_frame,
>>>>>> -    .capabilities   = AV_CODEC_CAP_DR1,
>>>>>> +    .caps_internal  = FF_CODEC_CAP_SETS_PKT_DTS | FF_CODEC_CAP_SETS_PKT_POS,
>>>>>> +    .capabilities   = AV_CODEC_CAP_DR1 | AV_CODEC_CAP_DELAY,
>>>>>>  };
>>>>>> diff --git a/tests/ref/fate/qtrle-8bit b/tests/ref/fate/qtrle-8bit
>>>>>> index 27bb8aad71..39a03b7b6c 100644
>>>>>> --- a/tests/ref/fate/qtrle-8bit
>>>>>> +++ b/tests/ref/fate/qtrle-8bit
>>>>>> @@ -61,3 +61,4 @@
>>>>>>  0,        160,        160,        1,   921600, 0xcfd6ad2b
>>>>>>  0,        163,        163,        1,   921600, 0x3b372379
>>>>>>  0,        165,        165,        1,   921600, 0x36f245f5
>>>>>> +0,        166,        166,        1,   921600, 0x36f245f5
>>>>>
>>>>> Following what i said in the nuv patch, do you still experience timeouts
>>>>> with the current codebase, or even if you revert commit
>>>>> a9dacdeea6168787a142209bd19fdd74aefc9dd6? Creating a reference to an
>>>>> existing frame buffer shouldn't be expensive anymore for the fuzzer
>>>>> after my ref counting changes to target_dec_fuzzer.c
>>>>>
>>>>> This is a very ugly solution to a problem that was already solved when
>>>>> reference counting was introduced. Returning duplicate frames to achieve
>>>>> cfr in decoders where it's expected shouldn't affect performance.
>>>>
>>>> Maybe i should ask this backward to make it clearer what this is trying
>>>> to fix.
>>>>
>>>> Consider a patch that would return every frame twice with the same ref
>>>> of course.
>>>> Would you consider this to have 0 performance impact ?
>>>> if every filter following needs to process frames twice 2x CPU load
>>>> and after the filters references would also not be the same anymore
>>>> the following encoder would encoder 2x as many frames 2x CPU load,
>>>> bigger file lower quality per bitrate. Also playback of the resulting
>>>> file would require more cpu time as it has more frames.
>>>>
>>>> I think that would be considered a very bad patch for its performance
>>>> impact.
>>>> So if we do the opposite of removing duplicates why is this so
>>>> controversal ?
>>>>
>>>> This is not about the fuzzer at all ...
>>>
>>> Also about the implementation itself.
>>> This can of course be done in countless other ways
>>> for example one can probably detect the duplicate ref somewhere in common
>>> code and then optionally drop the frames.
>>
>> This is one of the suggestions i made in the email sent a few minutes
>> ago, yes. Based on a user set option, either dropping the frames in
>> generic code by flagging them as discard, or flagging them as
>> "disposable" and letting the library user (external applications, ffmpeg
>> cli, libavfilter, etc) decide what to do with them.
>>
> 
> Flagging identical repeat frames as "disposable" seems like a good
> idea to me. "discard" imho doesn't fit, since it has a specific
> meaning of "should be discarded", while the semantics of "disposable"
> would fit this use-case (ie. this frame is valid and you can keep it,
> but you could also drop it if you favor performance).

Ok, just sent patchset to signal frames as disposable, with qtrle as the
first decoder to implement it as a PoC.

What's missing is making some "vfr" setting in the ffmpeg cli look for
it in frames to effectively drop them before instead of passing them to
filters or encoders, at the user's request.
Suggestions or patches for that welcome.

> 
> - Hendrik
> _______________________________________________
> ffmpeg-devel mailing list
> ffmpeg-devel@ffmpeg.org
> https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
> 
> To unsubscribe, visit link above, or email
> ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".
>
Baptiste Coudurier Aug. 26, 2019, 5:40 p.m. UTC | #22
Hey guys,


> On Aug 26, 2019, at 9:25 AM, James Almer <jamrial@gmail.com> wrote:
> 
> On 8/26/2019 11:35 AM, Hendrik Leppkes wrote:
>> On Mon, Aug 26, 2019 at 1:18 AM James Almer <jamrial@gmail.com> wrote:
>>> 
>>> On 8/25/2019 7:14 PM, Michael Niedermayer wrote:
>>>> On Sun, Aug 25, 2019 at 11:46:36PM +0200, Michael Niedermayer wrote:
>>>>> On Sun, Aug 25, 2019 at 01:22:22PM -0300, James Almer wrote:
>>>>>> On 8/24/2019 3:18 PM, Michael Niedermayer wrote:
>>>>>>> Fixes: Ticket7880
>>>>>>> 
>>>>>>> Signed-off-by: Michael Niedermayer <michael@niedermayer.cc>
>>>>>>> ---
>>>>>>> libavcodec/qtrle.c        | 30 ++++++++++++++++++++++++++----
>>>>>>> tests/ref/fate/qtrle-8bit |  1 +
>>>>>>> 2 files changed, 27 insertions(+), 4 deletions(-)
>>>>>>> 
>>>>>>> diff --git a/libavcodec/qtrle.c b/libavcodec/qtrle.c
>>>>>>> index 2c29547e5a..c22a1a582d 100644
>>>>>>> --- a/libavcodec/qtrle.c
>>>>>>> +++ b/libavcodec/qtrle.c
>>>>>>> @@ -45,6 +45,7 @@ typedef struct QtrleContext {
>>>>>>> 
>>>>>>>     GetByteContext g;
>>>>>>>     uint32_t pal[256];
>>>>>>> +    AVPacket flush_pkt;
>>>>>>> } QtrleContext;
>>>>>>> 
>>>>>>> #define CHECK_PIXEL_PTR(n)                                                            \
>>>>>>> @@ -454,11 +455,27 @@ static int qtrle_decode_frame(AVCodecContext *avctx,
>>>>>>>     int has_palette = 0;
>>>>>>>     int ret, size;
>>>>>>> 
>>>>>>> +    if (!avpkt->data) {
>>>>>>> +        if (avctx->internal->need_flush) {
>>>>>>> +            avctx->internal->need_flush = 0;
>>>>>>> +            ret = ff_setup_buffered_frame_for_return(avctx, data, s->frame, &s->flush_pkt);
>>>>>>> +            if (ret < 0)
>>>>>>> +                return ret;
>>>>>>> +            *got_frame = 1;
>>>>>>> +        }
>>>>>>> +        return 0;
>>>>>>> +    }
>>>>>>> +    s->flush_pkt = *avpkt;
>>>>>>> +    s->frame->pkt_dts = s->flush_pkt.dts;
>>>>>>> +
>>>>>>>     bytestream2_init(&s->g, avpkt->data, avpkt->size);
>>>>>>> 
>>>>>>>     /* check if this frame is even supposed to change */
>>>>>>> -    if (avpkt->size < 8)
>>>>>>> +    if (avpkt->size < 8) {
>>>>>>> +        avctx->internal->need_flush = 1;
>>>>>>>         return avpkt->size;
>>>>>>> +    }
>>>>>>> +    avctx->internal->need_flush = 0;
>>>>>>> 
>>>>>>>     /* start after the chunk size */
>>>>>>>     size = bytestream2_get_be32(&s->g) & 0x3FFFFFFF;
>>>>>>> @@ -471,14 +488,18 @@ static int qtrle_decode_frame(AVCodecContext *avctx,
>>>>>>> 
>>>>>>>     /* if a header is present, fetch additional decoding parameters */
>>>>>>>     if (header & 0x0008) {
>>>>>>> -        if (avpkt->size < 14)
>>>>>>> +        if (avpkt->size < 14) {
>>>>>>> +            avctx->internal->need_flush = 1;
>>>>>>>             return avpkt->size;
>>>>>>> +        }
>>>>>>>         start_line = bytestream2_get_be16(&s->g);
>>>>>>>         bytestream2_skip(&s->g, 2);
>>>>>>>         height     = bytestream2_get_be16(&s->g);
>>>>>>>         bytestream2_skip(&s->g, 2);
>>>>>>> -        if (height > s->avctx->height - start_line)
>>>>>>> +        if (height > s->avctx->height - start_line) {
>>>>>>> +            avctx->internal->need_flush = 1;
>>>>>>>             return avpkt->size;
>>>>>>> +        }
>>>>>>>     } else {
>>>>>>>         start_line = 0;
>>>>>>>         height     = s->avctx->height;
>>>>>>> @@ -572,5 +593,6 @@ AVCodec ff_qtrle_decoder = {
>>>>>>>     .init           = qtrle_decode_init,
>>>>>>>     .close          = qtrle_decode_end,
>>>>>>>     .decode         = qtrle_decode_frame,
>>>>>>> -    .capabilities   = AV_CODEC_CAP_DR1,
>>>>>>> +    .caps_internal  = FF_CODEC_CAP_SETS_PKT_DTS | FF_CODEC_CAP_SETS_PKT_POS,
>>>>>>> +    .capabilities   = AV_CODEC_CAP_DR1 | AV_CODEC_CAP_DELAY,
>>>>>>> };
>>>>>>> diff --git a/tests/ref/fate/qtrle-8bit b/tests/ref/fate/qtrle-8bit
>>>>>>> index 27bb8aad71..39a03b7b6c 100644
>>>>>>> --- a/tests/ref/fate/qtrle-8bit
>>>>>>> +++ b/tests/ref/fate/qtrle-8bit
>>>>>>> @@ -61,3 +61,4 @@
>>>>>>> 0,        160,        160,        1,   921600, 0xcfd6ad2b
>>>>>>> 0,        163,        163,        1,   921600, 0x3b372379
>>>>>>> 0,        165,        165,        1,   921600, 0x36f245f5
>>>>>>> +0,        166,        166,        1,   921600, 0x36f245f5
>>>>>> 
>>>>>> Following what i said in the nuv patch, do you still experience timeouts
>>>>>> with the current codebase, or even if you revert commit
>>>>>> a9dacdeea6168787a142209bd19fdd74aefc9dd6? Creating a reference to an
>>>>>> existing frame buffer shouldn't be expensive anymore for the fuzzer
>>>>>> after my ref counting changes to target_dec_fuzzer.c
>>>>>> 
>>>>>> This is a very ugly solution to a problem that was already solved when
>>>>>> reference counting was introduced. Returning duplicate frames to achieve
>>>>>> cfr in decoders where it's expected shouldn't affect performance.
>>>>> 
>>>>> Maybe i should ask this backward to make it clearer what this is trying
>>>>> to fix.
>>>>> 
>>>>> Consider a patch that would return every frame twice with the same ref
>>>>> of course.
>>>>> Would you consider this to have 0 performance impact ?
>>>>> if every filter following needs to process frames twice 2x CPU load
>>>>> and after the filters references would also not be the same anymore
>>>>> the following encoder would encoder 2x as many frames 2x CPU load,
>>>>> bigger file lower quality per bitrate. Also playback of the resulting
>>>>> file would require more cpu time as it has more frames.
>>>>> 
>>>>> I think that would be considered a very bad patch for its performance
>>>>> impact.
>>>>> So if we do the opposite of removing duplicates why is this so
>>>>> controversal ?
>>>>> 
>>>>> This is not about the fuzzer at all ...
>>>> 
>>>> Also about the implementation itself.
>>>> This can of course be done in countless other ways
>>>> for example one can probably detect the duplicate ref somewhere in common
>>>> code and then optionally drop the frames.
>>> 
>>> This is one of the suggestions i made in the email sent a few minutes
>>> ago, yes. Based on a user set option, either dropping the frames in
>>> generic code by flagging them as discard, or flagging them as
>>> "disposable" and letting the library user (external applications, ffmpeg
>>> cli, libavfilter, etc) decide what to do with them.
>>> 
>> 
>> Flagging identical repeat frames as "disposable" seems like a good
>> idea to me. "discard" imho doesn't fit, since it has a specific
>> meaning of "should be discarded", while the semantics of "disposable"
>> would fit this use-case (ie. this frame is valid and you can keep it,
>> but you could also drop it if you favor performance).
> 
> Ok, just sent patchset to signal frames as disposable, with qtrle as the
> first decoder to implement it as a PoC.
> 
> What's missing is making some "vfr" setting in the ffmpeg cli look for
> it in frames to effectively drop them before instead of passing them to
> filters or encoders, at the user's request.
> Suggestions or patches for that welcome.

IMHO a decoder should output according to the specifications. 
FFmpeg, a while ago, chose to disagree and ignore features like “repeat first field” in mpeg codecs
and instead signal it so the user/application would do it.
In that spirit, it is understandable that QTRLE decoder behaves the same way for consistency reasons.

Now, I would not be opposed to change decoders to follow specifications and actually repeat the fields
instead of relying on the users of libavcodec to do it, but obviously this is a much bigger undertaking.

— 
Baptiste
Paul B Mahol Aug. 26, 2019, 5:50 p.m. UTC | #23
On Mon, Aug 26, 2019 at 7:47 PM Baptiste Coudurier <
baptiste.coudurier@gmail.com> wrote:

> Hey guys,
>
>
> > On Aug 26, 2019, at 9:25 AM, James Almer <jamrial@gmail.com> wrote:
> >
> > On 8/26/2019 11:35 AM, Hendrik Leppkes wrote:
> >> On Mon, Aug 26, 2019 at 1:18 AM James Almer <jamrial@gmail.com> wrote:
> >>>
> >>> On 8/25/2019 7:14 PM, Michael Niedermayer wrote:
> >>>> On Sun, Aug 25, 2019 at 11:46:36PM +0200, Michael Niedermayer wrote:
> >>>>> On Sun, Aug 25, 2019 at 01:22:22PM -0300, James Almer wrote:
> >>>>>> On 8/24/2019 3:18 PM, Michael Niedermayer wrote:
> >>>>>>> Fixes: Ticket7880
> >>>>>>>
> >>>>>>> Signed-off-by: Michael Niedermayer <michael@niedermayer.cc>
> >>>>>>> ---
> >>>>>>> libavcodec/qtrle.c        | 30 ++++++++++++++++++++++++++----
> >>>>>>> tests/ref/fate/qtrle-8bit |  1 +
> >>>>>>> 2 files changed, 27 insertions(+), 4 deletions(-)
> >>>>>>>
> >>>>>>> diff --git a/libavcodec/qtrle.c b/libavcodec/qtrle.c
> >>>>>>> index 2c29547e5a..c22a1a582d 100644
> >>>>>>> --- a/libavcodec/qtrle.c
> >>>>>>> +++ b/libavcodec/qtrle.c
> >>>>>>> @@ -45,6 +45,7 @@ typedef struct QtrleContext {
> >>>>>>>
> >>>>>>>     GetByteContext g;
> >>>>>>>     uint32_t pal[256];
> >>>>>>> +    AVPacket flush_pkt;
> >>>>>>> } QtrleContext;
> >>>>>>>
> >>>>>>> #define CHECK_PIXEL_PTR(n)
>                     \
> >>>>>>> @@ -454,11 +455,27 @@ static int qtrle_decode_frame(AVCodecContext
> *avctx,
> >>>>>>>     int has_palette = 0;
> >>>>>>>     int ret, size;
> >>>>>>>
> >>>>>>> +    if (!avpkt->data) {
> >>>>>>> +        if (avctx->internal->need_flush) {
> >>>>>>> +            avctx->internal->need_flush = 0;
> >>>>>>> +            ret = ff_setup_buffered_frame_for_return(avctx, data,
> s->frame, &s->flush_pkt);
> >>>>>>> +            if (ret < 0)
> >>>>>>> +                return ret;
> >>>>>>> +            *got_frame = 1;
> >>>>>>> +        }
> >>>>>>> +        return 0;
> >>>>>>> +    }
> >>>>>>> +    s->flush_pkt = *avpkt;
> >>>>>>> +    s->frame->pkt_dts = s->flush_pkt.dts;
> >>>>>>> +
> >>>>>>>     bytestream2_init(&s->g, avpkt->data, avpkt->size);
> >>>>>>>
> >>>>>>>     /* check if this frame is even supposed to change */
> >>>>>>> -    if (avpkt->size < 8)
> >>>>>>> +    if (avpkt->size < 8) {
> >>>>>>> +        avctx->internal->need_flush = 1;
> >>>>>>>         return avpkt->size;
> >>>>>>> +    }
> >>>>>>> +    avctx->internal->need_flush = 0;
> >>>>>>>
> >>>>>>>     /* start after the chunk size */
> >>>>>>>     size = bytestream2_get_be32(&s->g) & 0x3FFFFFFF;
> >>>>>>> @@ -471,14 +488,18 @@ static int qtrle_decode_frame(AVCodecContext
> *avctx,
> >>>>>>>
> >>>>>>>     /* if a header is present, fetch additional decoding
> parameters */
> >>>>>>>     if (header & 0x0008) {
> >>>>>>> -        if (avpkt->size < 14)
> >>>>>>> +        if (avpkt->size < 14) {
> >>>>>>> +            avctx->internal->need_flush = 1;
> >>>>>>>             return avpkt->size;
> >>>>>>> +        }
> >>>>>>>         start_line = bytestream2_get_be16(&s->g);
> >>>>>>>         bytestream2_skip(&s->g, 2);
> >>>>>>>         height     = bytestream2_get_be16(&s->g);
> >>>>>>>         bytestream2_skip(&s->g, 2);
> >>>>>>> -        if (height > s->avctx->height - start_line)
> >>>>>>> +        if (height > s->avctx->height - start_line) {
> >>>>>>> +            avctx->internal->need_flush = 1;
> >>>>>>>             return avpkt->size;
> >>>>>>> +        }
> >>>>>>>     } else {
> >>>>>>>         start_line = 0;
> >>>>>>>         height     = s->avctx->height;
> >>>>>>> @@ -572,5 +593,6 @@ AVCodec ff_qtrle_decoder = {
> >>>>>>>     .init           = qtrle_decode_init,
> >>>>>>>     .close          = qtrle_decode_end,
> >>>>>>>     .decode         = qtrle_decode_frame,
> >>>>>>> -    .capabilities   = AV_CODEC_CAP_DR1,
> >>>>>>> +    .caps_internal  = FF_CODEC_CAP_SETS_PKT_DTS |
> FF_CODEC_CAP_SETS_PKT_POS,
> >>>>>>> +    .capabilities   = AV_CODEC_CAP_DR1 | AV_CODEC_CAP_DELAY,
> >>>>>>> };
> >>>>>>> diff --git a/tests/ref/fate/qtrle-8bit b/tests/ref/fate/qtrle-8bit
> >>>>>>> index 27bb8aad71..39a03b7b6c 100644
> >>>>>>> --- a/tests/ref/fate/qtrle-8bit
> >>>>>>> +++ b/tests/ref/fate/qtrle-8bit
> >>>>>>> @@ -61,3 +61,4 @@
> >>>>>>> 0,        160,        160,        1,   921600, 0xcfd6ad2b
> >>>>>>> 0,        163,        163,        1,   921600, 0x3b372379
> >>>>>>> 0,        165,        165,        1,   921600, 0x36f245f5
> >>>>>>> +0,        166,        166,        1,   921600, 0x36f245f5
> >>>>>>
> >>>>>> Following what i said in the nuv patch, do you still experience
> timeouts
> >>>>>> with the current codebase, or even if you revert commit
> >>>>>> a9dacdeea6168787a142209bd19fdd74aefc9dd6? Creating a reference to an
> >>>>>> existing frame buffer shouldn't be expensive anymore for the fuzzer
> >>>>>> after my ref counting changes to target_dec_fuzzer.c
> >>>>>>
> >>>>>> This is a very ugly solution to a problem that was already solved
> when
> >>>>>> reference counting was introduced. Returning duplicate frames to
> achieve
> >>>>>> cfr in decoders where it's expected shouldn't affect performance.
> >>>>>
> >>>>> Maybe i should ask this backward to make it clearer what this is
> trying
> >>>>> to fix.
> >>>>>
> >>>>> Consider a patch that would return every frame twice with the same
> ref
> >>>>> of course.
> >>>>> Would you consider this to have 0 performance impact ?
> >>>>> if every filter following needs to process frames twice 2x CPU load
> >>>>> and after the filters references would also not be the same anymore
> >>>>> the following encoder would encoder 2x as many frames 2x CPU load,
> >>>>> bigger file lower quality per bitrate. Also playback of the resulting
> >>>>> file would require more cpu time as it has more frames.
> >>>>>
> >>>>> I think that would be considered a very bad patch for its performance
> >>>>> impact.
> >>>>> So if we do the opposite of removing duplicates why is this so
> >>>>> controversal ?
> >>>>>
> >>>>> This is not about the fuzzer at all ...
> >>>>
> >>>> Also about the implementation itself.
> >>>> This can of course be done in countless other ways
> >>>> for example one can probably detect the duplicate ref somewhere in
> common
> >>>> code and then optionally drop the frames.
> >>>
> >>> This is one of the suggestions i made in the email sent a few minutes
> >>> ago, yes. Based on a user set option, either dropping the frames in
> >>> generic code by flagging them as discard, or flagging them as
> >>> "disposable" and letting the library user (external applications,
> ffmpeg
> >>> cli, libavfilter, etc) decide what to do with them.
> >>>
> >>
> >> Flagging identical repeat frames as "disposable" seems like a good
> >> idea to me. "discard" imho doesn't fit, since it has a specific
> >> meaning of "should be discarded", while the semantics of "disposable"
> >> would fit this use-case (ie. this frame is valid and you can keep it,
> >> but you could also drop it if you favor performance).
> >
> > Ok, just sent patchset to signal frames as disposable, with qtrle as the
> > first decoder to implement it as a PoC.
> >
> > What's missing is making some "vfr" setting in the ffmpeg cli look for
> > it in frames to effectively drop them before instead of passing them to
> > filters or encoders, at the user's request.
> > Suggestions or patches for that welcome.
>
> IMHO a decoder should output according to the specifications.
> FFmpeg, a while ago, chose to disagree and ignore features like “repeat
> first field” in mpeg codecs
> and instead signal it so the user/application would do it.
> In that spirit, it is understandable that QTRLE decoder behaves the same
> way for consistency reasons.
>

There is no any consistency at all, qtrle is not repeating any fields.


>
> Now, I would not be opposed to change decoders to follow specifications
> and actually repeat the fields
> instead of relying on the users of libavcodec to do it, but obviously this
> is a much bigger undertaking.
>
> —
> Baptiste
> _______________________________________________
> ffmpeg-devel mailing list
> ffmpeg-devel@ffmpeg.org
> https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
>
> To unsubscribe, visit link above, or email
> ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".
Hendrik Leppkes Aug. 26, 2019, 6:23 p.m. UTC | #24
On Mon, Aug 26, 2019 at 7:47 PM Baptiste Coudurier
<baptiste.coudurier@gmail.com> wrote:
>
> Hey guys,
>
>
> > On Aug 26, 2019, at 9:25 AM, James Almer <jamrial@gmail.com> wrote:
> >
> > On 8/26/2019 11:35 AM, Hendrik Leppkes wrote:
> >> On Mon, Aug 26, 2019 at 1:18 AM James Almer <jamrial@gmail.com> wrote:
> >>>
> >>> On 8/25/2019 7:14 PM, Michael Niedermayer wrote:
> >>>> On Sun, Aug 25, 2019 at 11:46:36PM +0200, Michael Niedermayer wrote:
> >>>>> On Sun, Aug 25, 2019 at 01:22:22PM -0300, James Almer wrote:
> >>>>>> On 8/24/2019 3:18 PM, Michael Niedermayer wrote:
> >>>>>>> Fixes: Ticket7880
> >>>>>>>
> >>>>>>> Signed-off-by: Michael Niedermayer <michael@niedermayer.cc>
> >>>>>>> ---
> >>>>>>> libavcodec/qtrle.c        | 30 ++++++++++++++++++++++++++----
> >>>>>>> tests/ref/fate/qtrle-8bit |  1 +
> >>>>>>> 2 files changed, 27 insertions(+), 4 deletions(-)
> >>>>>>>
> >>>>>>> diff --git a/libavcodec/qtrle.c b/libavcodec/qtrle.c
> >>>>>>> index 2c29547e5a..c22a1a582d 100644
> >>>>>>> --- a/libavcodec/qtrle.c
> >>>>>>> +++ b/libavcodec/qtrle.c
> >>>>>>> @@ -45,6 +45,7 @@ typedef struct QtrleContext {
> >>>>>>>
> >>>>>>>     GetByteContext g;
> >>>>>>>     uint32_t pal[256];
> >>>>>>> +    AVPacket flush_pkt;
> >>>>>>> } QtrleContext;
> >>>>>>>
> >>>>>>> #define CHECK_PIXEL_PTR(n)                                                            \
> >>>>>>> @@ -454,11 +455,27 @@ static int qtrle_decode_frame(AVCodecContext *avctx,
> >>>>>>>     int has_palette = 0;
> >>>>>>>     int ret, size;
> >>>>>>>
> >>>>>>> +    if (!avpkt->data) {
> >>>>>>> +        if (avctx->internal->need_flush) {
> >>>>>>> +            avctx->internal->need_flush = 0;
> >>>>>>> +            ret = ff_setup_buffered_frame_for_return(avctx, data, s->frame, &s->flush_pkt);
> >>>>>>> +            if (ret < 0)
> >>>>>>> +                return ret;
> >>>>>>> +            *got_frame = 1;
> >>>>>>> +        }
> >>>>>>> +        return 0;
> >>>>>>> +    }
> >>>>>>> +    s->flush_pkt = *avpkt;
> >>>>>>> +    s->frame->pkt_dts = s->flush_pkt.dts;
> >>>>>>> +
> >>>>>>>     bytestream2_init(&s->g, avpkt->data, avpkt->size);
> >>>>>>>
> >>>>>>>     /* check if this frame is even supposed to change */
> >>>>>>> -    if (avpkt->size < 8)
> >>>>>>> +    if (avpkt->size < 8) {
> >>>>>>> +        avctx->internal->need_flush = 1;
> >>>>>>>         return avpkt->size;
> >>>>>>> +    }
> >>>>>>> +    avctx->internal->need_flush = 0;
> >>>>>>>
> >>>>>>>     /* start after the chunk size */
> >>>>>>>     size = bytestream2_get_be32(&s->g) & 0x3FFFFFFF;
> >>>>>>> @@ -471,14 +488,18 @@ static int qtrle_decode_frame(AVCodecContext *avctx,
> >>>>>>>
> >>>>>>>     /* if a header is present, fetch additional decoding parameters */
> >>>>>>>     if (header & 0x0008) {
> >>>>>>> -        if (avpkt->size < 14)
> >>>>>>> +        if (avpkt->size < 14) {
> >>>>>>> +            avctx->internal->need_flush = 1;
> >>>>>>>             return avpkt->size;
> >>>>>>> +        }
> >>>>>>>         start_line = bytestream2_get_be16(&s->g);
> >>>>>>>         bytestream2_skip(&s->g, 2);
> >>>>>>>         height     = bytestream2_get_be16(&s->g);
> >>>>>>>         bytestream2_skip(&s->g, 2);
> >>>>>>> -        if (height > s->avctx->height - start_line)
> >>>>>>> +        if (height > s->avctx->height - start_line) {
> >>>>>>> +            avctx->internal->need_flush = 1;
> >>>>>>>             return avpkt->size;
> >>>>>>> +        }
> >>>>>>>     } else {
> >>>>>>>         start_line = 0;
> >>>>>>>         height     = s->avctx->height;
> >>>>>>> @@ -572,5 +593,6 @@ AVCodec ff_qtrle_decoder = {
> >>>>>>>     .init           = qtrle_decode_init,
> >>>>>>>     .close          = qtrle_decode_end,
> >>>>>>>     .decode         = qtrle_decode_frame,
> >>>>>>> -    .capabilities   = AV_CODEC_CAP_DR1,
> >>>>>>> +    .caps_internal  = FF_CODEC_CAP_SETS_PKT_DTS | FF_CODEC_CAP_SETS_PKT_POS,
> >>>>>>> +    .capabilities   = AV_CODEC_CAP_DR1 | AV_CODEC_CAP_DELAY,
> >>>>>>> };
> >>>>>>> diff --git a/tests/ref/fate/qtrle-8bit b/tests/ref/fate/qtrle-8bit
> >>>>>>> index 27bb8aad71..39a03b7b6c 100644
> >>>>>>> --- a/tests/ref/fate/qtrle-8bit
> >>>>>>> +++ b/tests/ref/fate/qtrle-8bit
> >>>>>>> @@ -61,3 +61,4 @@
> >>>>>>> 0,        160,        160,        1,   921600, 0xcfd6ad2b
> >>>>>>> 0,        163,        163,        1,   921600, 0x3b372379
> >>>>>>> 0,        165,        165,        1,   921600, 0x36f245f5
> >>>>>>> +0,        166,        166,        1,   921600, 0x36f245f5
> >>>>>>
> >>>>>> Following what i said in the nuv patch, do you still experience timeouts
> >>>>>> with the current codebase, or even if you revert commit
> >>>>>> a9dacdeea6168787a142209bd19fdd74aefc9dd6? Creating a reference to an
> >>>>>> existing frame buffer shouldn't be expensive anymore for the fuzzer
> >>>>>> after my ref counting changes to target_dec_fuzzer.c
> >>>>>>
> >>>>>> This is a very ugly solution to a problem that was already solved when
> >>>>>> reference counting was introduced. Returning duplicate frames to achieve
> >>>>>> cfr in decoders where it's expected shouldn't affect performance.
> >>>>>
> >>>>> Maybe i should ask this backward to make it clearer what this is trying
> >>>>> to fix.
> >>>>>
> >>>>> Consider a patch that would return every frame twice with the same ref
> >>>>> of course.
> >>>>> Would you consider this to have 0 performance impact ?
> >>>>> if every filter following needs to process frames twice 2x CPU load
> >>>>> and after the filters references would also not be the same anymore
> >>>>> the following encoder would encoder 2x as many frames 2x CPU load,
> >>>>> bigger file lower quality per bitrate. Also playback of the resulting
> >>>>> file would require more cpu time as it has more frames.
> >>>>>
> >>>>> I think that would be considered a very bad patch for its performance
> >>>>> impact.
> >>>>> So if we do the opposite of removing duplicates why is this so
> >>>>> controversal ?
> >>>>>
> >>>>> This is not about the fuzzer at all ...
> >>>>
> >>>> Also about the implementation itself.
> >>>> This can of course be done in countless other ways
> >>>> for example one can probably detect the duplicate ref somewhere in common
> >>>> code and then optionally drop the frames.
> >>>
> >>> This is one of the suggestions i made in the email sent a few minutes
> >>> ago, yes. Based on a user set option, either dropping the frames in
> >>> generic code by flagging them as discard, or flagging them as
> >>> "disposable" and letting the library user (external applications, ffmpeg
> >>> cli, libavfilter, etc) decide what to do with them.
> >>>
> >>
> >> Flagging identical repeat frames as "disposable" seems like a good
> >> idea to me. "discard" imho doesn't fit, since it has a specific
> >> meaning of "should be discarded", while the semantics of "disposable"
> >> would fit this use-case (ie. this frame is valid and you can keep it,
> >> but you could also drop it if you favor performance).
> >
> > Ok, just sent patchset to signal frames as disposable, with qtrle as the
> > first decoder to implement it as a PoC.
> >
> > What's missing is making some "vfr" setting in the ffmpeg cli look for
> > it in frames to effectively drop them before instead of passing them to
> > filters or encoders, at the user's request.
> > Suggestions or patches for that welcome.
>
> IMHO a decoder should output according to the specifications.
> FFmpeg, a while ago, chose to disagree and ignore features like “repeat first field” in mpeg codecs
> and instead signal it so the user/application would do it.
> In that spirit, it is understandable that QTRLE decoder behaves the same way for consistency reasons.
>

Honoring metadata (or not) and actively dropping identical frames are
quite different things.
As explained in an earlier mail already, the repeat metadata is
maintained perfectly as metadata, the information about the dropped
frames here is flat out lost.

- Hendrik
Baptiste Coudurier Aug. 26, 2019, 6:43 p.m. UTC | #25
> On Aug 26, 2019, at 11:23 AM, Hendrik Leppkes <h.leppkes@gmail.com> wrote:
> 
> On Mon, Aug 26, 2019 at 7:47 PM Baptiste Coudurier
> <baptiste.coudurier@gmail.com <mailto:baptiste.coudurier@gmail.com>> wrote:
>> 
>> Hey guys,
>> 
>> 
>>> On Aug 26, 2019, at 9:25 AM, James Almer <jamrial@gmail.com> wrote:
>>> 
>>> On 8/26/2019 11:35 AM, Hendrik Leppkes wrote:
>>>> On Mon, Aug 26, 2019 at 1:18 AM James Almer <jamrial@gmail.com> wrote:
>>>>> 
>>>>> On 8/25/2019 7:14 PM, Michael Niedermayer wrote:
>>>>>> On Sun, Aug 25, 2019 at 11:46:36PM +0200, Michael Niedermayer wrote:
>>>>>>> On Sun, Aug 25, 2019 at 01:22:22PM -0300, James Almer wrote:
>>>>>>>> On 8/24/2019 3:18 PM, Michael Niedermayer wrote:
>>>>>>>>> Fixes: Ticket7880
>>>>>>>>> 
>>>>>>>>> Signed-off-by: Michael Niedermayer <michael@niedermayer.cc>
>>>>>>>>> ---
>>>>>>>>> libavcodec/qtrle.c        | 30 ++++++++++++++++++++++++++----
>>>>>>>>> tests/ref/fate/qtrle-8bit |  1 +
>>>>>>>>> 2 files changed, 27 insertions(+), 4 deletions(-)
>>>>>>>>> 
>>>>>>>>> diff --git a/libavcodec/qtrle.c b/libavcodec/qtrle.c
>>>>>>>>> index 2c29547e5a..c22a1a582d 100644
>>>>>>>>> --- a/libavcodec/qtrle.c
>>>>>>>>> +++ b/libavcodec/qtrle.c
>>>>>>>>> @@ -45,6 +45,7 @@ typedef struct QtrleContext {
>>>>>>>>> 
>>>>>>>>>    GetByteContext g;
>>>>>>>>>    uint32_t pal[256];
>>>>>>>>> +    AVPacket flush_pkt;
>>>>>>>>> } QtrleContext;
>>>>>>>>> 
>>>>>>>>> #define CHECK_PIXEL_PTR(n)                                                            \
>>>>>>>>> @@ -454,11 +455,27 @@ static int qtrle_decode_frame(AVCodecContext *avctx,
>>>>>>>>>    int has_palette = 0;
>>>>>>>>>    int ret, size;
>>>>>>>>> 
>>>>>>>>> +    if (!avpkt->data) {
>>>>>>>>> +        if (avctx->internal->need_flush) {
>>>>>>>>> +            avctx->internal->need_flush = 0;
>>>>>>>>> +            ret = ff_setup_buffered_frame_for_return(avctx, data, s->frame, &s->flush_pkt);
>>>>>>>>> +            if (ret < 0)
>>>>>>>>> +                return ret;
>>>>>>>>> +            *got_frame = 1;
>>>>>>>>> +        }
>>>>>>>>> +        return 0;
>>>>>>>>> +    }
>>>>>>>>> +    s->flush_pkt = *avpkt;
>>>>>>>>> +    s->frame->pkt_dts = s->flush_pkt.dts;
>>>>>>>>> +
>>>>>>>>>    bytestream2_init(&s->g, avpkt->data, avpkt->size);
>>>>>>>>> 
>>>>>>>>>    /* check if this frame is even supposed to change */
>>>>>>>>> -    if (avpkt->size < 8)
>>>>>>>>> +    if (avpkt->size < 8) {
>>>>>>>>> +        avctx->internal->need_flush = 1;
>>>>>>>>>        return avpkt->size;
>>>>>>>>> +    }
>>>>>>>>> +    avctx->internal->need_flush = 0;
>>>>>>>>> 
>>>>>>>>>    /* start after the chunk size */
>>>>>>>>>    size = bytestream2_get_be32(&s->g) & 0x3FFFFFFF;
>>>>>>>>> @@ -471,14 +488,18 @@ static int qtrle_decode_frame(AVCodecContext *avctx,
>>>>>>>>> 
>>>>>>>>>    /* if a header is present, fetch additional decoding parameters */
>>>>>>>>>    if (header & 0x0008) {
>>>>>>>>> -        if (avpkt->size < 14)
>>>>>>>>> +        if (avpkt->size < 14) {
>>>>>>>>> +            avctx->internal->need_flush = 1;
>>>>>>>>>            return avpkt->size;
>>>>>>>>> +        }
>>>>>>>>>        start_line = bytestream2_get_be16(&s->g);
>>>>>>>>>        bytestream2_skip(&s->g, 2);
>>>>>>>>>        height     = bytestream2_get_be16(&s->g);
>>>>>>>>>        bytestream2_skip(&s->g, 2);
>>>>>>>>> -        if (height > s->avctx->height - start_line)
>>>>>>>>> +        if (height > s->avctx->height - start_line) {
>>>>>>>>> +            avctx->internal->need_flush = 1;
>>>>>>>>>            return avpkt->size;
>>>>>>>>> +        }
>>>>>>>>>    } else {
>>>>>>>>>        start_line = 0;
>>>>>>>>>        height     = s->avctx->height;
>>>>>>>>> @@ -572,5 +593,6 @@ AVCodec ff_qtrle_decoder = {
>>>>>>>>>    .init           = qtrle_decode_init,
>>>>>>>>>    .close          = qtrle_decode_end,
>>>>>>>>>    .decode         = qtrle_decode_frame,
>>>>>>>>> -    .capabilities   = AV_CODEC_CAP_DR1,
>>>>>>>>> +    .caps_internal  = FF_CODEC_CAP_SETS_PKT_DTS | FF_CODEC_CAP_SETS_PKT_POS,
>>>>>>>>> +    .capabilities   = AV_CODEC_CAP_DR1 | AV_CODEC_CAP_DELAY,
>>>>>>>>> };
>>>>>>>>> diff --git a/tests/ref/fate/qtrle-8bit b/tests/ref/fate/qtrle-8bit
>>>>>>>>> index 27bb8aad71..39a03b7b6c 100644
>>>>>>>>> --- a/tests/ref/fate/qtrle-8bit
>>>>>>>>> +++ b/tests/ref/fate/qtrle-8bit
>>>>>>>>> @@ -61,3 +61,4 @@
>>>>>>>>> 0,        160,        160,        1,   921600, 0xcfd6ad2b
>>>>>>>>> 0,        163,        163,        1,   921600, 0x3b372379
>>>>>>>>> 0,        165,        165,        1,   921600, 0x36f245f5
>>>>>>>>> +0,        166,        166,        1,   921600, 0x36f245f5
>>>>>>>> 
>>>>>>>> Following what i said in the nuv patch, do you still experience timeouts
>>>>>>>> with the current codebase, or even if you revert commit
>>>>>>>> a9dacdeea6168787a142209bd19fdd74aefc9dd6? Creating a reference to an
>>>>>>>> existing frame buffer shouldn't be expensive anymore for the fuzzer
>>>>>>>> after my ref counting changes to target_dec_fuzzer.c
>>>>>>>> 
>>>>>>>> This is a very ugly solution to a problem that was already solved when
>>>>>>>> reference counting was introduced. Returning duplicate frames to achieve
>>>>>>>> cfr in decoders where it's expected shouldn't affect performance.
>>>>>>> 
>>>>>>> Maybe i should ask this backward to make it clearer what this is trying
>>>>>>> to fix.
>>>>>>> 
>>>>>>> Consider a patch that would return every frame twice with the same ref
>>>>>>> of course.
>>>>>>> Would you consider this to have 0 performance impact ?
>>>>>>> if every filter following needs to process frames twice 2x CPU load
>>>>>>> and after the filters references would also not be the same anymore
>>>>>>> the following encoder would encoder 2x as many frames 2x CPU load,
>>>>>>> bigger file lower quality per bitrate. Also playback of the resulting
>>>>>>> file would require more cpu time as it has more frames.
>>>>>>> 
>>>>>>> I think that would be considered a very bad patch for its performance
>>>>>>> impact.
>>>>>>> So if we do the opposite of removing duplicates why is this so
>>>>>>> controversal ?
>>>>>>> 
>>>>>>> This is not about the fuzzer at all ...
>>>>>> 
>>>>>> Also about the implementation itself.
>>>>>> This can of course be done in countless other ways
>>>>>> for example one can probably detect the duplicate ref somewhere in common
>>>>>> code and then optionally drop the frames.
>>>>> 
>>>>> This is one of the suggestions i made in the email sent a few minutes
>>>>> ago, yes. Based on a user set option, either dropping the frames in
>>>>> generic code by flagging them as discard, or flagging them as
>>>>> "disposable" and letting the library user (external applications, ffmpeg
>>>>> cli, libavfilter, etc) decide what to do with them.
>>>>> 
>>>> 
>>>> Flagging identical repeat frames as "disposable" seems like a good
>>>> idea to me. "discard" imho doesn't fit, since it has a specific
>>>> meaning of "should be discarded", while the semantics of "disposable"
>>>> would fit this use-case (ie. this frame is valid and you can keep it,
>>>> but you could also drop it if you favor performance).
>>> 
>>> Ok, just sent patchset to signal frames as disposable, with qtrle as the
>>> first decoder to implement it as a PoC.
>>> 
>>> What's missing is making some "vfr" setting in the ffmpeg cli look for
>>> it in frames to effectively drop them before instead of passing them to
>>> filters or encoders, at the user's request.
>>> Suggestions or patches for that welcome.
>> 
>> IMHO a decoder should output according to the specifications.
>> FFmpeg, a while ago, chose to disagree and ignore features like “repeat first field” in mpeg codecs
>> and instead signal it so the user/application would do it.
>> In that spirit, it is understandable that QTRLE decoder behaves the same way for consistency reasons.
>> 
> 
> Honoring metadata (or not) and actively dropping identical frames are
> quite different things.

There are no specs for QTRLE, the frames _are_ empty. 
How do you know how quicktime behaves ? Both implementations seems valid to me.

> As explained in an earlier mail already, the repeat metadata is
> maintained perfectly as metadata, the information about the dropped
> frames here is flat out lost.


Arguing about the metadata loss is one thing, CFR output is a different topic IMHO.

— 
Baptiste
Michael Niedermayer Aug. 26, 2019, 7:37 p.m. UTC | #26
On Mon, Aug 26, 2019 at 01:45:55PM +0200, Hendrik Leppkes wrote:
> On Mon, Aug 26, 2019 at 11:09 AM Michael Niedermayer
> <michael@niedermayer.cc> wrote:
> >
> > On Mon, Aug 26, 2019 at 09:51:45AM +0200, Hendrik Leppkes wrote:
> > > On Mon, Aug 26, 2019 at 9:33 AM Michael Niedermayer
> > > <michael@niedermayer.cc> wrote:
> > > >
> > > > On Sun, Aug 25, 2019 at 08:04:03PM -0300, James Almer wrote:
> > > > > On 8/25/2019 6:46 PM, Michael Niedermayer wrote:
> > > > > > On Sun, Aug 25, 2019 at 01:22:22PM -0300, James Almer wrote:
> > > > > >> On 8/24/2019 3:18 PM, Michael Niedermayer wrote:
> > > > > >>> Fixes: Ticket7880
> > > > > >>>
> > > > > >>> Signed-off-by: Michael Niedermayer <michael@niedermayer.cc>
> > > > > >>> ---
> > > > > >>>  libavcodec/qtrle.c        | 30 ++++++++++++++++++++++++++----
> > > > > >>>  tests/ref/fate/qtrle-8bit |  1 +
> > > > > >>>  2 files changed, 27 insertions(+), 4 deletions(-)
> > > > > >>>
> > > > > >>> diff --git a/libavcodec/qtrle.c b/libavcodec/qtrle.c
> > > > > >>> index 2c29547e5a..c22a1a582d 100644
> > > > > >>> --- a/libavcodec/qtrle.c
> > > > > >>> +++ b/libavcodec/qtrle.c
> > > > > >>> @@ -45,6 +45,7 @@ typedef struct QtrleContext {
> > > > > >>>
> > > > > >>>      GetByteContext g;
> > > > > >>>      uint32_t pal[256];
> > > > > >>> +    AVPacket flush_pkt;
> > > > > >>>  } QtrleContext;
> > > > > >>>
> > > > > >>>  #define CHECK_PIXEL_PTR(n)                                                            \
> > > > > >>> @@ -454,11 +455,27 @@ static int qtrle_decode_frame(AVCodecContext *avctx,
> > > > > >>>      int has_palette = 0;
> > > > > >>>      int ret, size;
> > > > > >>>
> > > > > >>> +    if (!avpkt->data) {
> > > > > >>> +        if (avctx->internal->need_flush) {
> > > > > >>> +            avctx->internal->need_flush = 0;
> > > > > >>> +            ret = ff_setup_buffered_frame_for_return(avctx, data, s->frame, &s->flush_pkt);
> > > > > >>> +            if (ret < 0)
> > > > > >>> +                return ret;
> > > > > >>> +            *got_frame = 1;
> > > > > >>> +        }
> > > > > >>> +        return 0;
> > > > > >>> +    }
> > > > > >>> +    s->flush_pkt = *avpkt;
> > > > > >>> +    s->frame->pkt_dts = s->flush_pkt.dts;
> > > > > >>> +
> > > > > >>>      bytestream2_init(&s->g, avpkt->data, avpkt->size);
> > > > > >>>
> > > > > >>>      /* check if this frame is even supposed to change */
> > > > > >>> -    if (avpkt->size < 8)
> > > > > >>> +    if (avpkt->size < 8) {
> > > > > >>> +        avctx->internal->need_flush = 1;
> > > > > >>>          return avpkt->size;
> > > > > >>> +    }
> > > > > >>> +    avctx->internal->need_flush = 0;
> > > > > >>>
> > > > > >>>      /* start after the chunk size */
> > > > > >>>      size = bytestream2_get_be32(&s->g) & 0x3FFFFFFF;
> > > > > >>> @@ -471,14 +488,18 @@ static int qtrle_decode_frame(AVCodecContext *avctx,
> > > > > >>>
> > > > > >>>      /* if a header is present, fetch additional decoding parameters */
> > > > > >>>      if (header & 0x0008) {
> > > > > >>> -        if (avpkt->size < 14)
> > > > > >>> +        if (avpkt->size < 14) {
> > > > > >>> +            avctx->internal->need_flush = 1;
> > > > > >>>              return avpkt->size;
> > > > > >>> +        }
> > > > > >>>          start_line = bytestream2_get_be16(&s->g);
> > > > > >>>          bytestream2_skip(&s->g, 2);
> > > > > >>>          height     = bytestream2_get_be16(&s->g);
> > > > > >>>          bytestream2_skip(&s->g, 2);
> > > > > >>> -        if (height > s->avctx->height - start_line)
> > > > > >>> +        if (height > s->avctx->height - start_line) {
> > > > > >>> +            avctx->internal->need_flush = 1;
> > > > > >>>              return avpkt->size;
> > > > > >>> +        }
> > > > > >>>      } else {
> > > > > >>>          start_line = 0;
> > > > > >>>          height     = s->avctx->height;
> > > > > >>> @@ -572,5 +593,6 @@ AVCodec ff_qtrle_decoder = {
> > > > > >>>      .init           = qtrle_decode_init,
> > > > > >>>      .close          = qtrle_decode_end,
> > > > > >>>      .decode         = qtrle_decode_frame,
> > > > > >>> -    .capabilities   = AV_CODEC_CAP_DR1,
> > > > > >>> +    .caps_internal  = FF_CODEC_CAP_SETS_PKT_DTS | FF_CODEC_CAP_SETS_PKT_POS,
> > > > > >>> +    .capabilities   = AV_CODEC_CAP_DR1 | AV_CODEC_CAP_DELAY,
> > > > > >>>  };
> > > > > >>> diff --git a/tests/ref/fate/qtrle-8bit b/tests/ref/fate/qtrle-8bit
> > > > > >>> index 27bb8aad71..39a03b7b6c 100644
> > > > > >>> --- a/tests/ref/fate/qtrle-8bit
> > > > > >>> +++ b/tests/ref/fate/qtrle-8bit
> > > > > >>> @@ -61,3 +61,4 @@
> > > > > >>>  0,        160,        160,        1,   921600, 0xcfd6ad2b
> > > > > >>>  0,        163,        163,        1,   921600, 0x3b372379
> > > > > >>>  0,        165,        165,        1,   921600, 0x36f245f5
> > > > > >>> +0,        166,        166,        1,   921600, 0x36f245f5
> > > > > >>
> > > > > >> Following what i said in the nuv patch, do you still experience timeouts
> > > > > >> with the current codebase, or even if you revert commit
> > > > > >> a9dacdeea6168787a142209bd19fdd74aefc9dd6? Creating a reference to an
> > > > > >> existing frame buffer shouldn't be expensive anymore for the fuzzer
> > > > > >> after my ref counting changes to target_dec_fuzzer.c
> > > > > >>
> > > > > >> This is a very ugly solution to a problem that was already solved when
> > > > > >> reference counting was introduced. Returning duplicate frames to achieve
> > > > > >> cfr in decoders where it's expected shouldn't affect performance.
> > > > > >
> > > > > > Maybe i should ask this backward to make it clearer what this is trying
> > > > > > to fix.
> > > > > >
> > > > > > Consider a patch that would return every frame twice with the same ref
> > > > > > of course.
> > > > > > Would you consider this to have 0 performance impact ?
> > > > >
> > > > > No, but it would be negligible.
> > > >
> > > > ok, lets see some actual numbers
> > > >
> > > > This comparission is about fixing ticket 7880 (just saying so we dont loose
> > > > track of what we compare here)
> > > >
> > > > this qtrle patchset which fixes the last frame
> > > > time ./ffmpeg -i clock.mov -vf scale=1920x1080 -qscale 2 -y test-new.avi
> > > > real            0m0.233s
> > > > user            0m0.404s
> > > > sys             0m0.036s
> > > > -rw-r----- 1 michael michael 769212 Aug 26 08:38 test-new.avi
> > > > time ./ffmpeg -i test-new.avi -f null -
> > > > real            0m0.095s
> > > > user            0m0.131s
> > > > sys             0m0.038s
> > > >
> > > > The alternative of reverting the code that discards duplicate frames
> > > > (this i believe is what the people opposing this patchset here want)
> > > > git revert  a9dacdeea6168787a142209bd19fdd74aefc9dd6
> > > > time ./ffmpeg -i clock.mov -vf scale=1920x1080 -qscale 2 -y test.avi
> > > > real            0m1.208s
> > > > user            0m2.866s
> > > > sys             0m0.168s
> > > > -rw-r----- 1 michael michael 3274430 Aug 26 08:44 test.avi
> > > > time ./ffmpeg -i test.avi -f null -
> > > > real            0m0.185s
> > > > user            0m0.685s
> > > > sys             0m0.076s
> > > >
> > > > https://samples.ffmpeg.org/ffmpeg-bugs/trac/ticket7880/clock.mov
> > > >
> > > > As we can see the solution preferred by the opposition to this patchset
> > > > will make the code 6 times slower and result in 4 times higher bitrate for
> > > > the same quality.
> > > > I would say this is not "negligible"
> > > > you know this is not 0.6%, its not 6% its not 60% it is 600%
> > > >
> > > > I do not understand why we have a discussion here about this.
> > > > Do we want 600% slower code ?
> > > > Do our users want 600% slower code ?
> > > >
> > > > I do not want 600% slower code
> > > >
> > >
> > > Speed at the expense of correctness should not even be an argument.
> >
> > and here we are back to square 0 because for all mainstream codecs
> > we drop duplicates and fail this correctness.
> > And noone wants to change that no matter which side of the argument one
> > is on.
> 
> You claim this is the same thing, but it really is not. If I encode a
> static scene in h264 CFR, I get X frames in the bitstream, and avcodec
> gives me X frames on the output, even if every single frame is
> perfectly identical.
> If I do the same with QTRLE, or whichever other codec you've already
> been to, I get X frames in the encoded bitstream, and only Y (Y < X)
> on the decoder output (at worst, 1 only).
> 
> Every encoded frame should produce a decoded frame, and it should not
> be up to the decoder to decide to drop them.
> H264 repeats are not frames in the bitstream, they are metadata. They
> do not have a timestamp that is being lost, they did not belong to a
> distinct input packet. And we convey this metadata to the user, so he
> can decide to act on it.
> 

> How can you not see this huge gap? This behavior in these decoders
> actively loses data. The H264 decoder does not lose any data.

Lets check H264
claim "They do not have a timestamp"
immedeatly after the pic_struct field which tells you about the repeating
behavior there is a loop for each repeated value with a timestamp.
This timestamp is lost, if at CFR one can call it that way.

about "Every encoded frame should produce a decoded frame"
quote about this timestamp from the h264 spec
"The contents of the clock timestamp syntax elements indicate a time of origin, capture, or alternative ideal display."

"origin, capture" implies that the repeated frame can have come from
a real input packet because otherwise it wouldnt have a capture or origin
time.
So an encoder could use this as a way to represent some input frames
And in that case only a decoder that follows these repeats exactly would
have input == output
which again brings us full circle as this matches what we see in qtrle repeats


Thanks

[...]
Hendrik Leppkes Aug. 26, 2019, 8:39 p.m. UTC | #27
On Mon, Aug 26, 2019 at 9:38 PM Michael Niedermayer
<michael@niedermayer.cc> wrote:
>
> On Mon, Aug 26, 2019 at 01:45:55PM +0200, Hendrik Leppkes wrote:
> > On Mon, Aug 26, 2019 at 11:09 AM Michael Niedermayer
> > <michael@niedermayer.cc> wrote:
> > >
> > > On Mon, Aug 26, 2019 at 09:51:45AM +0200, Hendrik Leppkes wrote:
> > > > On Mon, Aug 26, 2019 at 9:33 AM Michael Niedermayer
> > > > <michael@niedermayer.cc> wrote:
> > > > >
> > > > > On Sun, Aug 25, 2019 at 08:04:03PM -0300, James Almer wrote:
> > > > > > On 8/25/2019 6:46 PM, Michael Niedermayer wrote:
> > > > > > > On Sun, Aug 25, 2019 at 01:22:22PM -0300, James Almer wrote:
> > > > > > >> On 8/24/2019 3:18 PM, Michael Niedermayer wrote:
> > > > > > >>> Fixes: Ticket7880
> > > > > > >>>
> > > > > > >>> Signed-off-by: Michael Niedermayer <michael@niedermayer.cc>
> > > > > > >>> ---
> > > > > > >>>  libavcodec/qtrle.c        | 30 ++++++++++++++++++++++++++----
> > > > > > >>>  tests/ref/fate/qtrle-8bit |  1 +
> > > > > > >>>  2 files changed, 27 insertions(+), 4 deletions(-)
> > > > > > >>>
> > > > > > >>> diff --git a/libavcodec/qtrle.c b/libavcodec/qtrle.c
> > > > > > >>> index 2c29547e5a..c22a1a582d 100644
> > > > > > >>> --- a/libavcodec/qtrle.c
> > > > > > >>> +++ b/libavcodec/qtrle.c
> > > > > > >>> @@ -45,6 +45,7 @@ typedef struct QtrleContext {
> > > > > > >>>
> > > > > > >>>      GetByteContext g;
> > > > > > >>>      uint32_t pal[256];
> > > > > > >>> +    AVPacket flush_pkt;
> > > > > > >>>  } QtrleContext;
> > > > > > >>>
> > > > > > >>>  #define CHECK_PIXEL_PTR(n)                                                            \
> > > > > > >>> @@ -454,11 +455,27 @@ static int qtrle_decode_frame(AVCodecContext *avctx,
> > > > > > >>>      int has_palette = 0;
> > > > > > >>>      int ret, size;
> > > > > > >>>
> > > > > > >>> +    if (!avpkt->data) {
> > > > > > >>> +        if (avctx->internal->need_flush) {
> > > > > > >>> +            avctx->internal->need_flush = 0;
> > > > > > >>> +            ret = ff_setup_buffered_frame_for_return(avctx, data, s->frame, &s->flush_pkt);
> > > > > > >>> +            if (ret < 0)
> > > > > > >>> +                return ret;
> > > > > > >>> +            *got_frame = 1;
> > > > > > >>> +        }
> > > > > > >>> +        return 0;
> > > > > > >>> +    }
> > > > > > >>> +    s->flush_pkt = *avpkt;
> > > > > > >>> +    s->frame->pkt_dts = s->flush_pkt.dts;
> > > > > > >>> +
> > > > > > >>>      bytestream2_init(&s->g, avpkt->data, avpkt->size);
> > > > > > >>>
> > > > > > >>>      /* check if this frame is even supposed to change */
> > > > > > >>> -    if (avpkt->size < 8)
> > > > > > >>> +    if (avpkt->size < 8) {
> > > > > > >>> +        avctx->internal->need_flush = 1;
> > > > > > >>>          return avpkt->size;
> > > > > > >>> +    }
> > > > > > >>> +    avctx->internal->need_flush = 0;
> > > > > > >>>
> > > > > > >>>      /* start after the chunk size */
> > > > > > >>>      size = bytestream2_get_be32(&s->g) & 0x3FFFFFFF;
> > > > > > >>> @@ -471,14 +488,18 @@ static int qtrle_decode_frame(AVCodecContext *avctx,
> > > > > > >>>
> > > > > > >>>      /* if a header is present, fetch additional decoding parameters */
> > > > > > >>>      if (header & 0x0008) {
> > > > > > >>> -        if (avpkt->size < 14)
> > > > > > >>> +        if (avpkt->size < 14) {
> > > > > > >>> +            avctx->internal->need_flush = 1;
> > > > > > >>>              return avpkt->size;
> > > > > > >>> +        }
> > > > > > >>>          start_line = bytestream2_get_be16(&s->g);
> > > > > > >>>          bytestream2_skip(&s->g, 2);
> > > > > > >>>          height     = bytestream2_get_be16(&s->g);
> > > > > > >>>          bytestream2_skip(&s->g, 2);
> > > > > > >>> -        if (height > s->avctx->height - start_line)
> > > > > > >>> +        if (height > s->avctx->height - start_line) {
> > > > > > >>> +            avctx->internal->need_flush = 1;
> > > > > > >>>              return avpkt->size;
> > > > > > >>> +        }
> > > > > > >>>      } else {
> > > > > > >>>          start_line = 0;
> > > > > > >>>          height     = s->avctx->height;
> > > > > > >>> @@ -572,5 +593,6 @@ AVCodec ff_qtrle_decoder = {
> > > > > > >>>      .init           = qtrle_decode_init,
> > > > > > >>>      .close          = qtrle_decode_end,
> > > > > > >>>      .decode         = qtrle_decode_frame,
> > > > > > >>> -    .capabilities   = AV_CODEC_CAP_DR1,
> > > > > > >>> +    .caps_internal  = FF_CODEC_CAP_SETS_PKT_DTS | FF_CODEC_CAP_SETS_PKT_POS,
> > > > > > >>> +    .capabilities   = AV_CODEC_CAP_DR1 | AV_CODEC_CAP_DELAY,
> > > > > > >>>  };
> > > > > > >>> diff --git a/tests/ref/fate/qtrle-8bit b/tests/ref/fate/qtrle-8bit
> > > > > > >>> index 27bb8aad71..39a03b7b6c 100644
> > > > > > >>> --- a/tests/ref/fate/qtrle-8bit
> > > > > > >>> +++ b/tests/ref/fate/qtrle-8bit
> > > > > > >>> @@ -61,3 +61,4 @@
> > > > > > >>>  0,        160,        160,        1,   921600, 0xcfd6ad2b
> > > > > > >>>  0,        163,        163,        1,   921600, 0x3b372379
> > > > > > >>>  0,        165,        165,        1,   921600, 0x36f245f5
> > > > > > >>> +0,        166,        166,        1,   921600, 0x36f245f5
> > > > > > >>
> > > > > > >> Following what i said in the nuv patch, do you still experience timeouts
> > > > > > >> with the current codebase, or even if you revert commit
> > > > > > >> a9dacdeea6168787a142209bd19fdd74aefc9dd6? Creating a reference to an
> > > > > > >> existing frame buffer shouldn't be expensive anymore for the fuzzer
> > > > > > >> after my ref counting changes to target_dec_fuzzer.c
> > > > > > >>
> > > > > > >> This is a very ugly solution to a problem that was already solved when
> > > > > > >> reference counting was introduced. Returning duplicate frames to achieve
> > > > > > >> cfr in decoders where it's expected shouldn't affect performance.
> > > > > > >
> > > > > > > Maybe i should ask this backward to make it clearer what this is trying
> > > > > > > to fix.
> > > > > > >
> > > > > > > Consider a patch that would return every frame twice with the same ref
> > > > > > > of course.
> > > > > > > Would you consider this to have 0 performance impact ?
> > > > > >
> > > > > > No, but it would be negligible.
> > > > >
> > > > > ok, lets see some actual numbers
> > > > >
> > > > > This comparission is about fixing ticket 7880 (just saying so we dont loose
> > > > > track of what we compare here)
> > > > >
> > > > > this qtrle patchset which fixes the last frame
> > > > > time ./ffmpeg -i clock.mov -vf scale=1920x1080 -qscale 2 -y test-new.avi
> > > > > real            0m0.233s
> > > > > user            0m0.404s
> > > > > sys             0m0.036s
> > > > > -rw-r----- 1 michael michael 769212 Aug 26 08:38 test-new.avi
> > > > > time ./ffmpeg -i test-new.avi -f null -
> > > > > real            0m0.095s
> > > > > user            0m0.131s
> > > > > sys             0m0.038s
> > > > >
> > > > > The alternative of reverting the code that discards duplicate frames
> > > > > (this i believe is what the people opposing this patchset here want)
> > > > > git revert  a9dacdeea6168787a142209bd19fdd74aefc9dd6
> > > > > time ./ffmpeg -i clock.mov -vf scale=1920x1080 -qscale 2 -y test.avi
> > > > > real            0m1.208s
> > > > > user            0m2.866s
> > > > > sys             0m0.168s
> > > > > -rw-r----- 1 michael michael 3274430 Aug 26 08:44 test.avi
> > > > > time ./ffmpeg -i test.avi -f null -
> > > > > real            0m0.185s
> > > > > user            0m0.685s
> > > > > sys             0m0.076s
> > > > >
> > > > > https://samples.ffmpeg.org/ffmpeg-bugs/trac/ticket7880/clock.mov
> > > > >
> > > > > As we can see the solution preferred by the opposition to this patchset
> > > > > will make the code 6 times slower and result in 4 times higher bitrate for
> > > > > the same quality.
> > > > > I would say this is not "negligible"
> > > > > you know this is not 0.6%, its not 6% its not 60% it is 600%
> > > > >
> > > > > I do not understand why we have a discussion here about this.
> > > > > Do we want 600% slower code ?
> > > > > Do our users want 600% slower code ?
> > > > >
> > > > > I do not want 600% slower code
> > > > >
> > > >
> > > > Speed at the expense of correctness should not even be an argument.
> > >
> > > and here we are back to square 0 because for all mainstream codecs
> > > we drop duplicates and fail this correctness.
> > > And noone wants to change that no matter which side of the argument one
> > > is on.
> >
> > You claim this is the same thing, but it really is not. If I encode a
> > static scene in h264 CFR, I get X frames in the bitstream, and avcodec
> > gives me X frames on the output, even if every single frame is
> > perfectly identical.
> > If I do the same with QTRLE, or whichever other codec you've already
> > been to, I get X frames in the encoded bitstream, and only Y (Y < X)
> > on the decoder output (at worst, 1 only).
> >
> > Every encoded frame should produce a decoded frame, and it should not
> > be up to the decoder to decide to drop them.
> > H264 repeats are not frames in the bitstream, they are metadata. They
> > do not have a timestamp that is being lost, they did not belong to a
> > distinct input packet. And we convey this metadata to the user, so he
> > can decide to act on it.
> >
>
> > How can you not see this huge gap? This behavior in these decoders
> > actively loses data. The H264 decoder does not lose any data.
>
> Lets check H264
> claim "They do not have a timestamp"
> immedeatly after the pic_struct field which tells you about the repeating
> behavior there is a loop for each repeated value with a timestamp.
> This timestamp is lost, if at CFR one can call it that way.
>

"time of origin, capture" is clearly a timecode, not a timestamp in
the sense we're talking about here (plus that the bitstream codes it
in hours/minutes/seconds). I expect you know the difference.
If these timecodes are considered useful it would be trivial to expose
them from the decoder too, since they are already being parsed and
stored.

> about "Every encoded frame should produce a decoded frame"
> quote about this timestamp from the h264 spec
> "The contents of the clock timestamp syntax elements indicate a time of origin, capture, or alternative ideal display."
>
> "origin, capture" implies that the repeated frame can have come from
> a real input packet because otherwise it wouldnt have a capture or origin
> time.
> So an encoder could use this as a way to represent some input frames
> And in that case only a decoder that follows these repeats exactly would
> have input == output
> which again brings us full circle as this matches what we see in qtrle repeats
>
>

No it does not. There are key differences in how this is handled on a
technical level. I can reproduce the full original video stream from
the output the H264 decoder gives me. I cannot do this with QTRLE.
That is all that it ultimately comes down to, any other arguments are
irrelevant.

- Hendrik
Kieran Kunhya Aug. 26, 2019, 8:57 p.m. UTC | #28
>
>
> Lets check H264
> claim "They do not have a timestamp"
> immedeatly after the pic_struct field which tells you about the repeating
> behavior there is a loop for each repeated value with a timestamp.
> This timestamp is lost, if at CFR one can call it that way.
>
> about "Every encoded frame should produce a decoded frame"
> quote about this timestamp from the h264 spec
> "The contents of the clock timestamp syntax elements indicate a time of
> origin, capture, or alternative ideal display."
>

This is a simple misunderstanding of the difference between timecode and
timestamp.
I point you to numerous publications about the difference.


> "origin, capture" implies that the repeated frame can have come from
> a real input packet because otherwise it wouldnt have a capture or origin
> time.
> So an encoder could use this as a way to represent some input frames
> And in that case only a decoder that follows these repeats exactly would
> have input == output
> which again brings us full circle as this matches what we see in qtrle
> repeats
>

You conflate presentation (with repeated fields/frame) and decoding which
are not the same thing.

Kieran
Kieran Kunhya Aug. 26, 2019, 8:58 p.m. UTC | #29
>
> "time of origin, capture" is clearly a timecode, not a timestamp in
> the sense we're talking about here (plus that the bitstream codes it
> in hours/minutes/seconds). I expect you know the difference.
> If these timecodes are considered useful it would be trivial to expose
> them from the decoder too, since they are already being parsed and
> stored.
>

These timecodes are already outputted as side data.
As Nev says they have *nothing* to do with presentation, they are just
metadata.

Kieran
diff mbox

Patch

diff --git a/libavcodec/qtrle.c b/libavcodec/qtrle.c
index 2c29547e5a..c22a1a582d 100644
--- a/libavcodec/qtrle.c
+++ b/libavcodec/qtrle.c
@@ -45,6 +45,7 @@  typedef struct QtrleContext {
 
     GetByteContext g;
     uint32_t pal[256];
+    AVPacket flush_pkt;
 } QtrleContext;
 
 #define CHECK_PIXEL_PTR(n)                                                            \
@@ -454,11 +455,27 @@  static int qtrle_decode_frame(AVCodecContext *avctx,
     int has_palette = 0;
     int ret, size;
 
+    if (!avpkt->data) {
+        if (avctx->internal->need_flush) {
+            avctx->internal->need_flush = 0;
+            ret = ff_setup_buffered_frame_for_return(avctx, data, s->frame, &s->flush_pkt);
+            if (ret < 0)
+                return ret;
+            *got_frame = 1;
+        }
+        return 0;
+    }
+    s->flush_pkt = *avpkt;
+    s->frame->pkt_dts = s->flush_pkt.dts;
+
     bytestream2_init(&s->g, avpkt->data, avpkt->size);
 
     /* check if this frame is even supposed to change */
-    if (avpkt->size < 8)
+    if (avpkt->size < 8) {
+        avctx->internal->need_flush = 1;
         return avpkt->size;
+    }
+    avctx->internal->need_flush = 0;
 
     /* start after the chunk size */
     size = bytestream2_get_be32(&s->g) & 0x3FFFFFFF;
@@ -471,14 +488,18 @@  static int qtrle_decode_frame(AVCodecContext *avctx,
 
     /* if a header is present, fetch additional decoding parameters */
     if (header & 0x0008) {
-        if (avpkt->size < 14)
+        if (avpkt->size < 14) {
+            avctx->internal->need_flush = 1;
             return avpkt->size;
+        }
         start_line = bytestream2_get_be16(&s->g);
         bytestream2_skip(&s->g, 2);
         height     = bytestream2_get_be16(&s->g);
         bytestream2_skip(&s->g, 2);
-        if (height > s->avctx->height - start_line)
+        if (height > s->avctx->height - start_line) {
+            avctx->internal->need_flush = 1;
             return avpkt->size;
+        }
     } else {
         start_line = 0;
         height     = s->avctx->height;
@@ -572,5 +593,6 @@  AVCodec ff_qtrle_decoder = {
     .init           = qtrle_decode_init,
     .close          = qtrle_decode_end,
     .decode         = qtrle_decode_frame,
-    .capabilities   = AV_CODEC_CAP_DR1,
+    .caps_internal  = FF_CODEC_CAP_SETS_PKT_DTS | FF_CODEC_CAP_SETS_PKT_POS,
+    .capabilities   = AV_CODEC_CAP_DR1 | AV_CODEC_CAP_DELAY,
 };
diff --git a/tests/ref/fate/qtrle-8bit b/tests/ref/fate/qtrle-8bit
index 27bb8aad71..39a03b7b6c 100644
--- a/tests/ref/fate/qtrle-8bit
+++ b/tests/ref/fate/qtrle-8bit
@@ -61,3 +61,4 @@ 
 0,        160,        160,        1,   921600, 0xcfd6ad2b
 0,        163,        163,        1,   921600, 0x3b372379
 0,        165,        165,        1,   921600, 0x36f245f5
+0,        166,        166,        1,   921600, 0x36f245f5