diff mbox series

[FFmpeg-devel,2/4] fftools/ffmpeg: stop using av_stream_get_parser()

Message ID 20220818134605.12583-2-anton@khirnov.net
State New
Headers show
Series [FFmpeg-devel,1/4] lavf: deprecate av_stream_get_end_pts() | expand

Checks

Context Check Description
yinshiyou/make_loongarch64 success Make finished
yinshiyou/make_fate_loongarch64 success Make fate finished
andriy/make_x86 success Make finished
andriy/make_fate_x86 success Make fate finished

Commit Message

Anton Khirnov Aug. 18, 2022, 1:46 p.m. UTC
The parser is internal to the demuxer, so its state at any particular
point is not well-defined for the caller. Additionally, it is being
accessed from the main thread, while demuxing runs in a separate thread.

Use a separate parser owned by ffmpeg.c to retrieve the same
information.

Fixes races, e.g. in:
- fate-h264-brokensps-2580
- fate-h264-extradata-reload
- fate-iv8-demux
- fate-m4v-cfr
- fate-m4v
---
 fftools/ffmpeg.c     | 33 +++++++++++++++++++++++++++++++--
 fftools/ffmpeg.h     |  9 +++++++++
 fftools/ffmpeg_opt.c |  9 +++++++++
 3 files changed, 49 insertions(+), 2 deletions(-)

Comments

Andreas Rheinhardt Aug. 18, 2022, 1:58 p.m. UTC | #1
Anton Khirnov:
> The parser is internal to the demuxer, so its state at any particular
> point is not well-defined for the caller. Additionally, it is being
> accessed from the main thread, while demuxing runs in a separate thread.
> 
> Use a separate parser owned by ffmpeg.c to retrieve the same
> information.
> 
> Fixes races, e.g. in:
> - fate-h264-brokensps-2580
> - fate-h264-extradata-reload
> - fate-iv8-demux
> - fate-m4v-cfr
> - fate-m4v
> ---
>  fftools/ffmpeg.c     | 33 +++++++++++++++++++++++++++++++--
>  fftools/ffmpeg.h     |  9 +++++++++
>  fftools/ffmpeg_opt.c |  9 +++++++++
>  3 files changed, 49 insertions(+), 2 deletions(-)
> 
> diff --git a/fftools/ffmpeg.c b/fftools/ffmpeg.c
> index ef7177fc33..580df0443a 100644
> --- a/fftools/ffmpeg.c
> +++ b/fftools/ffmpeg.c
> @@ -610,6 +610,9 @@ static void ffmpeg_cleanup(int ret)
>          avcodec_free_context(&ist->dec_ctx);
>          avcodec_parameters_free(&ist->par);
>  
> +        avcodec_free_context(&ist->parser_dec);
> +        av_parser_close(ist->parser);
> +
>          av_freep(&input_streams[i]);
>      }
>  
> @@ -2421,6 +2424,15 @@ static int process_input_packet(InputStream *ist, const AVPacket *pkt, int no_eo
>          ret = av_packet_ref(avpkt, pkt);
>          if (ret < 0)
>              return ret;
> +
> +        if (ist->parser) {
> +            // we do not use the parsed output, only the
> +            // filled codec context fields
> +            uint8_t *dummy;
> +            int      dummy_size;
> +            av_parser_parse2(ist->parser, ist->parser_dec, &dummy, &dummy_size,
> +                             pkt->data, pkt->size, pkt->pts, pkt->dts, pkt->pos);
> +        }
>      }
>  
>      if (pkt && pkt->dts != AV_NOPTS_VALUE) {
> @@ -2452,7 +2464,8 @@ static int process_input_packet(InputStream *ist, const AVPacket *pkt, int no_eo
>                  if (pkt && pkt->duration) {
>                      duration_dts = av_rescale_q(pkt->duration, ist->st->time_base, AV_TIME_BASE_Q);
>                  } else if(ist->dec_ctx->framerate.num != 0 && ist->dec_ctx->framerate.den != 0) {
> -                    int ticks= av_stream_get_parser(ist->st) ? av_stream_get_parser(ist->st)->repeat_pict+1 : ist->dec_ctx->ticks_per_frame;
> +                    int ticks = ist->parser ? ist->parser->repeat_pict + 1 :
> +                                              ist->dec_ctx->ticks_per_frame;
>                      duration_dts = ((int64_t)AV_TIME_BASE *
>                                      ist->dec_ctx->framerate.den * ticks) /
>                                      ist->dec_ctx->framerate.num / ist->dec_ctx->ticks_per_frame;
> @@ -2555,7 +2568,8 @@ static int process_input_packet(InputStream *ist, const AVPacket *pkt, int no_eo
>              } else if (pkt->duration) {
>                  ist->next_dts += av_rescale_q(pkt->duration, ist->st->time_base, AV_TIME_BASE_Q);
>              } else if(ist->dec_ctx->framerate.num != 0) {
> -                int ticks= av_stream_get_parser(ist->st) ? av_stream_get_parser(ist->st)->repeat_pict + 1 : ist->dec_ctx->ticks_per_frame;
> +                int ticks = ist->parser ? ist->parser->repeat_pict + 1 :
> +                                          ist->dec_ctx->ticks_per_frame;
>                  ist->next_dts += ((int64_t)AV_TIME_BASE *
>                                    ist->dec_ctx->framerate.den * ticks) /
>                                    ist->dec_ctx->framerate.num / ist->dec_ctx->ticks_per_frame;
> @@ -2688,6 +2702,21 @@ static int init_input_stream(int ist_index, char *error, int error_len)
>          assert_avoptions(ist->decoder_opts);
>      }
>  
> +    if (ist->parser) {
> +        AVCodecParameters *par_tmp;
> +
> +        par_tmp = avcodec_parameters_alloc();
> +        if (!par_tmp)
> +            return AVERROR(ENOMEM);
> +
> +        ret = avcodec_parameters_from_context(par_tmp, ist->dec_ctx);
> +        if (ret >= 0)
> +            ret = avcodec_parameters_to_context(ist->parser_dec, par_tmp);
> +        avcodec_parameters_free(&par_tmp);
> +        if (ret < 0)
> +            return ret;
> +    }
> +
>      ist->next_pts = AV_NOPTS_VALUE;
>      ist->next_dts = AV_NOPTS_VALUE;
>  
> diff --git a/fftools/ffmpeg.h b/fftools/ffmpeg.h
> index 44cc23fa84..f67a2f1d1d 100644
> --- a/fftools/ffmpeg.h
> +++ b/fftools/ffmpeg.h
> @@ -334,6 +334,15 @@ typedef struct InputStream {
>      AVFrame *decoded_frame;
>      AVPacket *pkt;
>  
> +    /**
> +     * Parser for timestamp processing.
> +     */
> +    AVCodecParserContext *parser;
> +    /**
> +     * Codec context needed by parsing.
> +     */
> +    AVCodecContext       *parser_dec;
> +
>      int64_t       prev_pkt_pts;
>      int64_t       start;     /* time when read started */
>      /* predicted dts of the next packet read for this stream or (when there are
> diff --git a/fftools/ffmpeg_opt.c b/fftools/ffmpeg_opt.c
> index 30ca5cd609..a505c7b26f 100644
> --- a/fftools/ffmpeg_opt.c
> +++ b/fftools/ffmpeg_opt.c
> @@ -1065,6 +1065,15 @@ static void add_input_streams(OptionsContext *o, AVFormatContext *ic)
>              ist->top_field_first = -1;
>              MATCH_PER_STREAM_OPT(top_field_first, i, ist->top_field_first, ic, st);
>  
> +            ist->parser = av_parser_init(ist->dec->id);
> +            if (ist->parser) {
> +                ist->parser->flags |= PARSER_FLAG_COMPLETE_FRAMES;
> +
> +                ist->parser_dec = avcodec_alloc_context3(NULL);
> +                if (!ist->parser_dec)
> +                    exit_program(1);
> +            }
> +
>              break;
>          case AVMEDIA_TYPE_AUDIO:
>              ist->guess_layout_max = INT_MAX;

Are you aware that some parsers (e.g. the HEVC one) are still slow even
with the PARSER_FLAG_COMPLETE_FRAMES flag and consume non-negligible
amounts of memory?

- Andreas
Anton Khirnov Aug. 18, 2022, 2:14 p.m. UTC | #2
Quoting Andreas Rheinhardt (2022-08-18 15:58:37)
> 
> Are you aware that some parsers (e.g. the HEVC one) are still slow even
> with the PARSER_FLAG_COMPLETE_FRAMES flag and consume non-negligible
> amounts of memory?

Alternative suggestions are welcome.

I would also be extremely happy to drop this disgusting hack, but I
expect that would break some sample from bug 913458 when the stars are
not right.
Andreas Rheinhardt Aug. 18, 2022, 2:34 p.m. UTC | #3
Anton Khirnov:
> Quoting Andreas Rheinhardt (2022-08-18 15:58:37)
>>
>> Are you aware that some parsers (e.g. the HEVC one) are still slow even
>> with the PARSER_FLAG_COMPLETE_FRAMES flag and consume non-negligible
>> amounts of memory?
> 
> Alternative suggestions are welcome.
> 

Can't we pass the relevant information via DemuxMsg to the main thread?

> I would also be extremely happy to drop this disgusting hack, but I
> expect that would break some sample from bug 913458 when the stars are
> not right.
>
Michael Niedermayer Aug. 18, 2022, 3:19 p.m. UTC | #4
On Thu, Aug 18, 2022 at 03:46:03PM +0200, Anton Khirnov wrote:
> The parser is internal to the demuxer, so its state at any particular
> point is not well-defined for the caller. Additionally, it is being
> accessed from the main thread, while demuxing runs in a separate thread.
> 
> Use a separate parser owned by ffmpeg.c to retrieve the same
> information.
> 
> Fixes races, e.g. in:
> - fate-h264-brokensps-2580
> - fate-h264-extradata-reload
> - fate-iv8-demux
> - fate-m4v-cfr
> - fate-m4v
> ---
>  fftools/ffmpeg.c     | 33 +++++++++++++++++++++++++++++++--
>  fftools/ffmpeg.h     |  9 +++++++++
>  fftools/ffmpeg_opt.c |  9 +++++++++
>  3 files changed, 49 insertions(+), 2 deletions(-)

This segfaults:
./ffmpeg -max_alloc 100000 -i ~/tickets/2950/mpeg2_fuzz.mpg  -max_muxing_queue_size 8000 -f null -

==25621== Invalid read of size 4
==25621==    at 0x2F3018: add_input_streams (in ffmpeg_g)
==25621==    by 0x2F3BB0: open_input_file (in ffmpeg_g)
==25621==    by 0x2FA93B: ffmpeg_parse_options (in ffmpeg_g)
==25621==    by 0x2E74B4: main (in ffmpeg_g)
==25621==  Address 0x14 is not stack'd, malloc'd or (recently) free'd

thx

[...]
James Almer Aug. 18, 2022, 4:44 p.m. UTC | #5
On 8/18/2022 10:58 AM, Andreas Rheinhardt wrote:
> Anton Khirnov:
>> The parser is internal to the demuxer, so its state at any particular
>> point is not well-defined for the caller. Additionally, it is being
>> accessed from the main thread, while demuxing runs in a separate thread.
>>
>> Use a separate parser owned by ffmpeg.c to retrieve the same
>> information.
>>
>> Fixes races, e.g. in:
>> - fate-h264-brokensps-2580
>> - fate-h264-extradata-reload
>> - fate-iv8-demux
>> - fate-m4v-cfr
>> - fate-m4v
>> ---
>>   fftools/ffmpeg.c     | 33 +++++++++++++++++++++++++++++++--
>>   fftools/ffmpeg.h     |  9 +++++++++
>>   fftools/ffmpeg_opt.c |  9 +++++++++
>>   3 files changed, 49 insertions(+), 2 deletions(-)
>>
>> diff --git a/fftools/ffmpeg.c b/fftools/ffmpeg.c
>> index ef7177fc33..580df0443a 100644
>> --- a/fftools/ffmpeg.c
>> +++ b/fftools/ffmpeg.c
>> @@ -610,6 +610,9 @@ static void ffmpeg_cleanup(int ret)
>>           avcodec_free_context(&ist->dec_ctx);
>>           avcodec_parameters_free(&ist->par);
>>   
>> +        avcodec_free_context(&ist->parser_dec);
>> +        av_parser_close(ist->parser);
>> +
>>           av_freep(&input_streams[i]);
>>       }
>>   
>> @@ -2421,6 +2424,15 @@ static int process_input_packet(InputStream *ist, const AVPacket *pkt, int no_eo
>>           ret = av_packet_ref(avpkt, pkt);
>>           if (ret < 0)
>>               return ret;
>> +
>> +        if (ist->parser) {
>> +            // we do not use the parsed output, only the
>> +            // filled codec context fields
>> +            uint8_t *dummy;
>> +            int      dummy_size;
>> +            av_parser_parse2(ist->parser, ist->parser_dec, &dummy, &dummy_size,
>> +                             pkt->data, pkt->size, pkt->pts, pkt->dts, pkt->pos);
>> +        }
>>       }
>>   
>>       if (pkt && pkt->dts != AV_NOPTS_VALUE) {
>> @@ -2452,7 +2464,8 @@ static int process_input_packet(InputStream *ist, const AVPacket *pkt, int no_eo
>>                   if (pkt && pkt->duration) {
>>                       duration_dts = av_rescale_q(pkt->duration, ist->st->time_base, AV_TIME_BASE_Q);
>>                   } else if(ist->dec_ctx->framerate.num != 0 && ist->dec_ctx->framerate.den != 0) {
>> -                    int ticks= av_stream_get_parser(ist->st) ? av_stream_get_parser(ist->st)->repeat_pict+1 : ist->dec_ctx->ticks_per_frame;
>> +                    int ticks = ist->parser ? ist->parser->repeat_pict + 1 :
>> +                                              ist->dec_ctx->ticks_per_frame;
>>                       duration_dts = ((int64_t)AV_TIME_BASE *
>>                                       ist->dec_ctx->framerate.den * ticks) /
>>                                       ist->dec_ctx->framerate.num / ist->dec_ctx->ticks_per_frame;
>> @@ -2555,7 +2568,8 @@ static int process_input_packet(InputStream *ist, const AVPacket *pkt, int no_eo
>>               } else if (pkt->duration) {
>>                   ist->next_dts += av_rescale_q(pkt->duration, ist->st->time_base, AV_TIME_BASE_Q);
>>               } else if(ist->dec_ctx->framerate.num != 0) {
>> -                int ticks= av_stream_get_parser(ist->st) ? av_stream_get_parser(ist->st)->repeat_pict + 1 : ist->dec_ctx->ticks_per_frame;
>> +                int ticks = ist->parser ? ist->parser->repeat_pict + 1 :
>> +                                          ist->dec_ctx->ticks_per_frame;
>>                   ist->next_dts += ((int64_t)AV_TIME_BASE *
>>                                     ist->dec_ctx->framerate.den * ticks) /
>>                                     ist->dec_ctx->framerate.num / ist->dec_ctx->ticks_per_frame;
>> @@ -2688,6 +2702,21 @@ static int init_input_stream(int ist_index, char *error, int error_len)
>>           assert_avoptions(ist->decoder_opts);
>>       }
>>   
>> +    if (ist->parser) {
>> +        AVCodecParameters *par_tmp;
>> +
>> +        par_tmp = avcodec_parameters_alloc();
>> +        if (!par_tmp)
>> +            return AVERROR(ENOMEM);
>> +
>> +        ret = avcodec_parameters_from_context(par_tmp, ist->dec_ctx);
>> +        if (ret >= 0)
>> +            ret = avcodec_parameters_to_context(ist->parser_dec, par_tmp);
>> +        avcodec_parameters_free(&par_tmp);
>> +        if (ret < 0)
>> +            return ret;
>> +    }
>> +
>>       ist->next_pts = AV_NOPTS_VALUE;
>>       ist->next_dts = AV_NOPTS_VALUE;
>>   
>> diff --git a/fftools/ffmpeg.h b/fftools/ffmpeg.h
>> index 44cc23fa84..f67a2f1d1d 100644
>> --- a/fftools/ffmpeg.h
>> +++ b/fftools/ffmpeg.h
>> @@ -334,6 +334,15 @@ typedef struct InputStream {
>>       AVFrame *decoded_frame;
>>       AVPacket *pkt;
>>   
>> +    /**
>> +     * Parser for timestamp processing.
>> +     */
>> +    AVCodecParserContext *parser;
>> +    /**
>> +     * Codec context needed by parsing.
>> +     */
>> +    AVCodecContext       *parser_dec;
>> +
>>       int64_t       prev_pkt_pts;
>>       int64_t       start;     /* time when read started */
>>       /* predicted dts of the next packet read for this stream or (when there are
>> diff --git a/fftools/ffmpeg_opt.c b/fftools/ffmpeg_opt.c
>> index 30ca5cd609..a505c7b26f 100644
>> --- a/fftools/ffmpeg_opt.c
>> +++ b/fftools/ffmpeg_opt.c
>> @@ -1065,6 +1065,15 @@ static void add_input_streams(OptionsContext *o, AVFormatContext *ic)
>>               ist->top_field_first = -1;
>>               MATCH_PER_STREAM_OPT(top_field_first, i, ist->top_field_first, ic, st);
>>   
>> +            ist->parser = av_parser_init(ist->dec->id);
>> +            if (ist->parser) {
>> +                ist->parser->flags |= PARSER_FLAG_COMPLETE_FRAMES;
>> +
>> +                ist->parser_dec = avcodec_alloc_context3(NULL);
>> +                if (!ist->parser_dec)
>> +                    exit_program(1);
>> +            }
>> +
>>               break;
>>           case AVMEDIA_TYPE_AUDIO:
>>               ist->guess_layout_max = INT_MAX;
> 
> Are you aware that some parsers (e.g. the HEVC one) are still slow even
> with the PARSER_FLAG_COMPLETE_FRAMES flag and consume non-negligible
> amounts of memory?

Is it because it uses ff_h2645_packet_split()? It used to do like 
h264_parser and feature a custom NAL splitting implementation in order 
to not bother unscaping entire slice NALs (For raw annexb) since it only 
cares about their headers.

Maybe adding a max_read_size parameter to it so it stops parsing NALs 
after that point would help. It would also allow us to use it in 
h264_parser, simplifying it considerably.

> 
> - Andreas
> _______________________________________________
> ffmpeg-devel mailing list
> ffmpeg-devel@ffmpeg.org
> https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
> 
> To unsubscribe, visit link above, or email
> ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".
Andreas Rheinhardt Aug. 18, 2022, 5:08 p.m. UTC | #6
James Almer:
> On 8/18/2022 10:58 AM, Andreas Rheinhardt wrote:
>> Anton Khirnov:
>>> The parser is internal to the demuxer, so its state at any particular
>>> point is not well-defined for the caller. Additionally, it is being
>>> accessed from the main thread, while demuxing runs in a separate thread.
>>>
>>> Use a separate parser owned by ffmpeg.c to retrieve the same
>>> information.
>>>
>>> Fixes races, e.g. in:
>>> - fate-h264-brokensps-2580
>>> - fate-h264-extradata-reload
>>> - fate-iv8-demux
>>> - fate-m4v-cfr
>>> - fate-m4v
>>> ---
>>>   fftools/ffmpeg.c     | 33 +++++++++++++++++++++++++++++++--
>>>   fftools/ffmpeg.h     |  9 +++++++++
>>>   fftools/ffmpeg_opt.c |  9 +++++++++
>>>   3 files changed, 49 insertions(+), 2 deletions(-)
>>>
>>> diff --git a/fftools/ffmpeg.c b/fftools/ffmpeg.c
>>> index ef7177fc33..580df0443a 100644
>>> --- a/fftools/ffmpeg.c
>>> +++ b/fftools/ffmpeg.c
>>> @@ -610,6 +610,9 @@ static void ffmpeg_cleanup(int ret)
>>>           avcodec_free_context(&ist->dec_ctx);
>>>           avcodec_parameters_free(&ist->par);
>>>   +        avcodec_free_context(&ist->parser_dec);
>>> +        av_parser_close(ist->parser);
>>> +
>>>           av_freep(&input_streams[i]);
>>>       }
>>>   @@ -2421,6 +2424,15 @@ static int process_input_packet(InputStream
>>> *ist, const AVPacket *pkt, int no_eo
>>>           ret = av_packet_ref(avpkt, pkt);
>>>           if (ret < 0)
>>>               return ret;
>>> +
>>> +        if (ist->parser) {
>>> +            // we do not use the parsed output, only the
>>> +            // filled codec context fields
>>> +            uint8_t *dummy;
>>> +            int      dummy_size;
>>> +            av_parser_parse2(ist->parser, ist->parser_dec, &dummy,
>>> &dummy_size,
>>> +                             pkt->data, pkt->size, pkt->pts,
>>> pkt->dts, pkt->pos);
>>> +        }
>>>       }
>>>         if (pkt && pkt->dts != AV_NOPTS_VALUE) {
>>> @@ -2452,7 +2464,8 @@ static int process_input_packet(InputStream
>>> *ist, const AVPacket *pkt, int no_eo
>>>                   if (pkt && pkt->duration) {
>>>                       duration_dts = av_rescale_q(pkt->duration,
>>> ist->st->time_base, AV_TIME_BASE_Q);
>>>                   } else if(ist->dec_ctx->framerate.num != 0 &&
>>> ist->dec_ctx->framerate.den != 0) {
>>> -                    int ticks= av_stream_get_parser(ist->st) ?
>>> av_stream_get_parser(ist->st)->repeat_pict+1 :
>>> ist->dec_ctx->ticks_per_frame;
>>> +                    int ticks = ist->parser ?
>>> ist->parser->repeat_pict + 1 :
>>> +                                             
>>> ist->dec_ctx->ticks_per_frame;
>>>                       duration_dts = ((int64_t)AV_TIME_BASE *
>>>                                       ist->dec_ctx->framerate.den *
>>> ticks) /
>>>                                       ist->dec_ctx->framerate.num /
>>> ist->dec_ctx->ticks_per_frame;
>>> @@ -2555,7 +2568,8 @@ static int process_input_packet(InputStream
>>> *ist, const AVPacket *pkt, int no_eo
>>>               } else if (pkt->duration) {
>>>                   ist->next_dts += av_rescale_q(pkt->duration,
>>> ist->st->time_base, AV_TIME_BASE_Q);
>>>               } else if(ist->dec_ctx->framerate.num != 0) {
>>> -                int ticks= av_stream_get_parser(ist->st) ?
>>> av_stream_get_parser(ist->st)->repeat_pict + 1 :
>>> ist->dec_ctx->ticks_per_frame;
>>> +                int ticks = ist->parser ? ist->parser->repeat_pict +
>>> 1 :
>>> +                                         
>>> ist->dec_ctx->ticks_per_frame;
>>>                   ist->next_dts += ((int64_t)AV_TIME_BASE *
>>>                                     ist->dec_ctx->framerate.den *
>>> ticks) /
>>>                                     ist->dec_ctx->framerate.num /
>>> ist->dec_ctx->ticks_per_frame;
>>> @@ -2688,6 +2702,21 @@ static int init_input_stream(int ist_index,
>>> char *error, int error_len)
>>>           assert_avoptions(ist->decoder_opts);
>>>       }
>>>   +    if (ist->parser) {
>>> +        AVCodecParameters *par_tmp;
>>> +
>>> +        par_tmp = avcodec_parameters_alloc();
>>> +        if (!par_tmp)
>>> +            return AVERROR(ENOMEM);
>>> +
>>> +        ret = avcodec_parameters_from_context(par_tmp, ist->dec_ctx);
>>> +        if (ret >= 0)
>>> +            ret = avcodec_parameters_to_context(ist->parser_dec,
>>> par_tmp);
>>> +        avcodec_parameters_free(&par_tmp);
>>> +        if (ret < 0)
>>> +            return ret;
>>> +    }
>>> +
>>>       ist->next_pts = AV_NOPTS_VALUE;
>>>       ist->next_dts = AV_NOPTS_VALUE;
>>>   diff --git a/fftools/ffmpeg.h b/fftools/ffmpeg.h
>>> index 44cc23fa84..f67a2f1d1d 100644
>>> --- a/fftools/ffmpeg.h
>>> +++ b/fftools/ffmpeg.h
>>> @@ -334,6 +334,15 @@ typedef struct InputStream {
>>>       AVFrame *decoded_frame;
>>>       AVPacket *pkt;
>>>   +    /**
>>> +     * Parser for timestamp processing.
>>> +     */
>>> +    AVCodecParserContext *parser;
>>> +    /**
>>> +     * Codec context needed by parsing.
>>> +     */
>>> +    AVCodecContext       *parser_dec;
>>> +
>>>       int64_t       prev_pkt_pts;
>>>       int64_t       start;     /* time when read started */
>>>       /* predicted dts of the next packet read for this stream or
>>> (when there are
>>> diff --git a/fftools/ffmpeg_opt.c b/fftools/ffmpeg_opt.c
>>> index 30ca5cd609..a505c7b26f 100644
>>> --- a/fftools/ffmpeg_opt.c
>>> +++ b/fftools/ffmpeg_opt.c
>>> @@ -1065,6 +1065,15 @@ static void add_input_streams(OptionsContext
>>> *o, AVFormatContext *ic)
>>>               ist->top_field_first = -1;
>>>               MATCH_PER_STREAM_OPT(top_field_first, i,
>>> ist->top_field_first, ic, st);
>>>   +            ist->parser = av_parser_init(ist->dec->id);
>>> +            if (ist->parser) {
>>> +                ist->parser->flags |= PARSER_FLAG_COMPLETE_FRAMES;
>>> +
>>> +                ist->parser_dec = avcodec_alloc_context3(NULL);
>>> +                if (!ist->parser_dec)
>>> +                    exit_program(1);
>>> +            }
>>> +
>>>               break;
>>>           case AVMEDIA_TYPE_AUDIO:
>>>               ist->guess_layout_max = INT_MAX;
>>
>> Are you aware that some parsers (e.g. the HEVC one) are still slow even
>> with the PARSER_FLAG_COMPLETE_FRAMES flag and consume non-negligible
>> amounts of memory?
> 
> Is it because it uses ff_h2645_packet_split()? It used to do like
> h264_parser and feature a custom NAL splitting implementation in order
> to not bother unscaping entire slice NALs (For raw annexb) since it only
> cares about their headers.
> 
> Maybe adding a max_read_size parameter to it so it stops parsing NALs
> after that point would help. It would also allow us to use it in
> h264_parser, simplifying it considerably.
> 

1. It is because ff_h2645_packet_split().
2. And even if unescaping is stopped after the first few bytes (which is
e.g. not possible for SEIs as it might be the last SEI message that
contains important information), one would still need a buffer to hold
the complete input packet (because it might be that it contains a
billion little NALUs).
3. The simplifications due to ceb085906623dcc64b82151d433d1be2b0c71f73
were in part due to the HEVC parser retaining a list of NALUs instead of
only retaining the current NALU (it only looks at the current NALU
anyway). The H.264 parser does not make this mistake, so any
simplifications (if any) will be smaller.

- Andreas
diff mbox series

Patch

diff --git a/fftools/ffmpeg.c b/fftools/ffmpeg.c
index ef7177fc33..580df0443a 100644
--- a/fftools/ffmpeg.c
+++ b/fftools/ffmpeg.c
@@ -610,6 +610,9 @@  static void ffmpeg_cleanup(int ret)
         avcodec_free_context(&ist->dec_ctx);
         avcodec_parameters_free(&ist->par);
 
+        avcodec_free_context(&ist->parser_dec);
+        av_parser_close(ist->parser);
+
         av_freep(&input_streams[i]);
     }
 
@@ -2421,6 +2424,15 @@  static int process_input_packet(InputStream *ist, const AVPacket *pkt, int no_eo
         ret = av_packet_ref(avpkt, pkt);
         if (ret < 0)
             return ret;
+
+        if (ist->parser) {
+            // we do not use the parsed output, only the
+            // filled codec context fields
+            uint8_t *dummy;
+            int      dummy_size;
+            av_parser_parse2(ist->parser, ist->parser_dec, &dummy, &dummy_size,
+                             pkt->data, pkt->size, pkt->pts, pkt->dts, pkt->pos);
+        }
     }
 
     if (pkt && pkt->dts != AV_NOPTS_VALUE) {
@@ -2452,7 +2464,8 @@  static int process_input_packet(InputStream *ist, const AVPacket *pkt, int no_eo
                 if (pkt && pkt->duration) {
                     duration_dts = av_rescale_q(pkt->duration, ist->st->time_base, AV_TIME_BASE_Q);
                 } else if(ist->dec_ctx->framerate.num != 0 && ist->dec_ctx->framerate.den != 0) {
-                    int ticks= av_stream_get_parser(ist->st) ? av_stream_get_parser(ist->st)->repeat_pict+1 : ist->dec_ctx->ticks_per_frame;
+                    int ticks = ist->parser ? ist->parser->repeat_pict + 1 :
+                                              ist->dec_ctx->ticks_per_frame;
                     duration_dts = ((int64_t)AV_TIME_BASE *
                                     ist->dec_ctx->framerate.den * ticks) /
                                     ist->dec_ctx->framerate.num / ist->dec_ctx->ticks_per_frame;
@@ -2555,7 +2568,8 @@  static int process_input_packet(InputStream *ist, const AVPacket *pkt, int no_eo
             } else if (pkt->duration) {
                 ist->next_dts += av_rescale_q(pkt->duration, ist->st->time_base, AV_TIME_BASE_Q);
             } else if(ist->dec_ctx->framerate.num != 0) {
-                int ticks= av_stream_get_parser(ist->st) ? av_stream_get_parser(ist->st)->repeat_pict + 1 : ist->dec_ctx->ticks_per_frame;
+                int ticks = ist->parser ? ist->parser->repeat_pict + 1 :
+                                          ist->dec_ctx->ticks_per_frame;
                 ist->next_dts += ((int64_t)AV_TIME_BASE *
                                   ist->dec_ctx->framerate.den * ticks) /
                                   ist->dec_ctx->framerate.num / ist->dec_ctx->ticks_per_frame;
@@ -2688,6 +2702,21 @@  static int init_input_stream(int ist_index, char *error, int error_len)
         assert_avoptions(ist->decoder_opts);
     }
 
+    if (ist->parser) {
+        AVCodecParameters *par_tmp;
+
+        par_tmp = avcodec_parameters_alloc();
+        if (!par_tmp)
+            return AVERROR(ENOMEM);
+
+        ret = avcodec_parameters_from_context(par_tmp, ist->dec_ctx);
+        if (ret >= 0)
+            ret = avcodec_parameters_to_context(ist->parser_dec, par_tmp);
+        avcodec_parameters_free(&par_tmp);
+        if (ret < 0)
+            return ret;
+    }
+
     ist->next_pts = AV_NOPTS_VALUE;
     ist->next_dts = AV_NOPTS_VALUE;
 
diff --git a/fftools/ffmpeg.h b/fftools/ffmpeg.h
index 44cc23fa84..f67a2f1d1d 100644
--- a/fftools/ffmpeg.h
+++ b/fftools/ffmpeg.h
@@ -334,6 +334,15 @@  typedef struct InputStream {
     AVFrame *decoded_frame;
     AVPacket *pkt;
 
+    /**
+     * Parser for timestamp processing.
+     */
+    AVCodecParserContext *parser;
+    /**
+     * Codec context needed by parsing.
+     */
+    AVCodecContext       *parser_dec;
+
     int64_t       prev_pkt_pts;
     int64_t       start;     /* time when read started */
     /* predicted dts of the next packet read for this stream or (when there are
diff --git a/fftools/ffmpeg_opt.c b/fftools/ffmpeg_opt.c
index 30ca5cd609..a505c7b26f 100644
--- a/fftools/ffmpeg_opt.c
+++ b/fftools/ffmpeg_opt.c
@@ -1065,6 +1065,15 @@  static void add_input_streams(OptionsContext *o, AVFormatContext *ic)
             ist->top_field_first = -1;
             MATCH_PER_STREAM_OPT(top_field_first, i, ist->top_field_first, ic, st);
 
+            ist->parser = av_parser_init(ist->dec->id);
+            if (ist->parser) {
+                ist->parser->flags |= PARSER_FLAG_COMPLETE_FRAMES;
+
+                ist->parser_dec = avcodec_alloc_context3(NULL);
+                if (!ist->parser_dec)
+                    exit_program(1);
+            }
+
             break;
         case AVMEDIA_TYPE_AUDIO:
             ist->guess_layout_max = INT_MAX;