diff mbox

[FFmpeg-devel,RFC] ffmpeg: Improved the performance of 1:N for adaptive bitrate scenario.

Message ID 1541934438-30717-2-git-send-email-mypopydev@gmail.com
State New
Headers show

Commit Message

Jun Zhao Nov. 11, 2018, 11:07 a.m. UTC
Improved the performance of 1 decode + N filter graphs and adaptive
bitrate scenario.

With new option "-abr_pipeline"
1. It enabled multiple filter graph concurrency, which bring above
about 5%~20% improvement in some 1:N scenario by CPU or GPU
acceleration
2. Next step will continue to improve the concurrency of complex
filter graph which can support high efficiency of filter net

Below are some test cases and test result as reference.
(Hardware platform: Intel(R) Core(TM) i7-6700 CPU @ 3.40GHz)
(Software: Intel iHD driver - 16.9.00100, CentOS 7)

Command for Intel GPU acceleration case, 1 decode to N scaling:
ffmpeg -vaapi_device /dev/dri/renderD128 -hwaccel vaapi \
    -hwaccel_output_format vaapi \
    -i ~/Videos/1920x1080p_30.00_x264_qp28.h264 \
    -vf "scale_vaapi=1280:720:format=nv12,hwdownload" \
    -pix_fmt nv12 -f null /dev/null \
    -vf "scale_vaapi=720:480:format=nv12,hwdownload" \
    -pix_fmt nv12 -f null /dev/null \
    -abr_pipeline

    test results:
                2 scale
    Improved       ~34%

Command for CPU only 1 decode to N scaling:
ffmpeg -i ~/Videos/1920x1080p_30.00_x264_qp28.h264 \
    -vf "scale=1280:720" -pix_fmt nv12 -f null /dev/null \
    -vf "scale=720:480" -pix_fmt nv12 -f null /dev/null \
    -abr_pipeline

    test results:
                2 scale
    Improved       ~25%

Command for 1:N transcode by GPU acceleration:
./ffmpeg -vaapi_device /dev/dri/renderD128 -hwaccel vaapi \
    -hwaccel_output_format vaapi \
    -i ~/Videos/1920x1080p_30.00_x264_qp28.h264 \
    -vf "scale_vaapi=1280:720" -c:v h264_vaapi -f null /dev/null \
    -vf "scale_vaapi=720:480" -c:v h264_vaapi -f null /dev/null \
    -abr_pipeline

    test results:
                2 scale+enc
    Improved      ~6.1%

Signed-off-by: Wang, Shaofei <shaofei.wang@intel.com>
Signed-off-by: Jun Zhao <jun.zhao@intel.com>
---
 fftools/ffmpeg.c        |  236 ++++++++++++++++++++++++++++++++++++++++++++---
 fftools/ffmpeg.h        |   12 +++
 fftools/ffmpeg_filter.c |    6 +
 fftools/ffmpeg_opt.c    |    6 +-
 4 files changed, 246 insertions(+), 14 deletions(-)

Comments

Steven Liu Nov. 12, 2018, 9:44 a.m. UTC | #1
Jun Zhao <mypopydev@gmail.com> 于2018年11月11日周日 下午7:07写道:
>
> Improved the performance of 1 decode + N filter graphs and adaptive
> bitrate scenario.
>
> With new option "-abr_pipeline"
> 1. It enabled multiple filter graph concurrency, which bring above
> about 5%~20% improvement in some 1:N scenario by CPU or GPU
> acceleration
> 2. Next step will continue to improve the concurrency of complex
> filter graph which can support high efficiency of filter net
>
> Below are some test cases and test result as reference.
> (Hardware platform: Intel(R) Core(TM) i7-6700 CPU @ 3.40GHz)
> (Software: Intel iHD driver - 16.9.00100, CentOS 7)
>
> Command for Intel GPU acceleration case, 1 decode to N scaling:
> ffmpeg -vaapi_device /dev/dri/renderD128 -hwaccel vaapi \
>     -hwaccel_output_format vaapi \
>     -i ~/Videos/1920x1080p_30.00_x264_qp28.h264 \
>     -vf "scale_vaapi=1280:720:format=nv12,hwdownload" \
>     -pix_fmt nv12 -f null /dev/null \
>     -vf "scale_vaapi=720:480:format=nv12,hwdownload" \
>     -pix_fmt nv12 -f null /dev/null \
>     -abr_pipeline
>
>     test results:
>                 2 scale
>     Improved       ~34%
>
> Command for CPU only 1 decode to N scaling:
> ffmpeg -i ~/Videos/1920x1080p_30.00_x264_qp28.h264 \
>     -vf "scale=1280:720" -pix_fmt nv12 -f null /dev/null \
>     -vf "scale=720:480" -pix_fmt nv12 -f null /dev/null \
>     -abr_pipeline
>
>     test results:
>                 2 scale
>     Improved       ~25%
>
> Command for 1:N transcode by GPU acceleration:
> ./ffmpeg -vaapi_device /dev/dri/renderD128 -hwaccel vaapi \
>     -hwaccel_output_format vaapi \
>     -i ~/Videos/1920x1080p_30.00_x264_qp28.h264 \
>     -vf "scale_vaapi=1280:720" -c:v h264_vaapi -f null /dev/null \
>     -vf "scale_vaapi=720:480" -c:v h264_vaapi -f null /dev/null \
>     -abr_pipeline
>
>     test results:
>                 2 scale+enc
>     Improved      ~6.1%
>
> Signed-off-by: Wang, Shaofei <shaofei.wang@intel.com>
> Signed-off-by: Jun Zhao <jun.zhao@intel.com>
> ---
>  fftools/ffmpeg.c        |  236 ++++++++++++++++++++++++++++++++++++++++++++---
>  fftools/ffmpeg.h        |   12 +++
>  fftools/ffmpeg_filter.c |    6 +
>  fftools/ffmpeg_opt.c    |    6 +-
>  4 files changed, 246 insertions(+), 14 deletions(-)
>
> diff --git a/fftools/ffmpeg.c b/fftools/ffmpeg.c
> index 38c21e9..5dc80fd 100644
> --- a/fftools/ffmpeg.c
> +++ b/fftools/ffmpeg.c
> @@ -1523,6 +1523,110 @@ static int reap_filters(int flush)
>      return 0;
>  }
>
> +static int pipeline_reap_filters(int flush, InputFilter * ifilter)
> +{
> +    AVFrame *filtered_frame = NULL;
> +    int i;
> +
> +    for (i = 0; i < nb_output_streams; i++) {
> +        if (ifilter == output_streams[i]->filter->graph->inputs[0]) break;
> +    }
> +    OutputStream *ost = output_streams[i];
> +    OutputFile    *of = output_files[ost->file_index];
> +    AVFilterContext *filter;
> +    AVCodecContext *enc = ost->enc_ctx;
> +    int ret = 0;
> +
> +    if (!ost->filter || !ost->filter->graph->graph)
> +        return 0;
> +    filter = ost->filter->filter;
> +
> +    if (!ost->initialized) {
> +        char error[1024] = "";
> +        ret = init_output_stream(ost, error, sizeof(error));
> +        if (ret < 0) {
> +            av_log(NULL, AV_LOG_ERROR, "Error initializing output stream %d:%d -- %s\n",
> +                   ost->file_index, ost->index, error);
> +            exit_program(1);
> +        }
> +    }
> +
> +    if (!ost->filtered_frame && !(ost->filtered_frame = av_frame_alloc())) {
> +        return AVERROR(ENOMEM);
> +    }
> +    filtered_frame = ost->filtered_frame;
> +
> +    while (1) {
> +        double float_pts = AV_NOPTS_VALUE; // this is identical to filtered_frame.pts but with higher precision
> +        ret = av_buffersink_get_frame_flags(filter, filtered_frame,
> +                                           AV_BUFFERSINK_FLAG_NO_REQUEST);
> +        if (ret < 0) {
> +            if (ret != AVERROR(EAGAIN) && ret != AVERROR_EOF) {
> +                av_log(NULL, AV_LOG_WARNING,
> +                       "Error in av_buffersink_get_frame_flags(): %s\n", av_err2str(ret));
> +            } else if (flush && ret == AVERROR_EOF) {
> +                if (av_buffersink_get_type(filter) == AVMEDIA_TYPE_VIDEO)
> +                    do_video_out(of, ost, NULL, AV_NOPTS_VALUE);
> +            }
> +            break;
> +        }
> +        if (ost->finished) {
> +            av_frame_unref(filtered_frame);
> +            continue;
> +        }
> +        if (filtered_frame->pts != AV_NOPTS_VALUE) {
> +            int64_t start_time = (of->start_time == AV_NOPTS_VALUE) ? 0 : of->start_time;
> +            AVRational filter_tb = av_buffersink_get_time_base(filter);
> +            AVRational tb = enc->time_base;
> +            int extra_bits = av_clip(29 - av_log2(tb.den), 0, 16);
> +
> +            tb.den <<= extra_bits;
> +            float_pts =
> +                av_rescale_q(filtered_frame->pts, filter_tb, tb) -
> +                av_rescale_q(start_time, AV_TIME_BASE_Q, tb);
> +            float_pts /= 1 << extra_bits;
> +            // avoid exact midoints to reduce the chance of rounding differences, this can be removed in case the fps code is changed to work with integers
> +            float_pts += FFSIGN(float_pts) * 1.0 / (1<<17);
> +
> +            filtered_frame->pts =
> +                av_rescale_q(filtered_frame->pts, filter_tb, enc->time_base) -
> +                av_rescale_q(start_time, AV_TIME_BASE_Q, enc->time_base);
> +        }
> +
> +        switch (av_buffersink_get_type(filter)) {
> +        case AVMEDIA_TYPE_VIDEO:
> +            if (!ost->frame_aspect_ratio.num)
> +                enc->sample_aspect_ratio = filtered_frame->sample_aspect_ratio;
> +
> +            if (debug_ts) {
> +                av_log(NULL, AV_LOG_INFO, "filter -> pts:%s pts_time:%s exact:%f time_base:%d/%d\n",
> +                        av_ts2str(filtered_frame->pts), av_ts2timestr(filtered_frame->pts, &enc->time_base),
> +                        float_pts,
> +                        enc->time_base.num, enc->time_base.den);
> +            }
> +
> +            do_video_out(of, ost, filtered_frame, float_pts);
> +            break;
> +        case AVMEDIA_TYPE_AUDIO:
> +            if (!(enc->codec->capabilities & AV_CODEC_CAP_PARAM_CHANGE) &&
> +                enc->channels != filtered_frame->channels) {
> +                av_log(NULL, AV_LOG_ERROR,
> +                       "Audio filter graph output is not normalized and encoder does not support parameter changes\n");
> +                break;
> +            }
> +            do_audio_out(of, ost, filtered_frame);
> +            break;
> +        default:
> +            // TODO support subtitle filters
> +            av_assert0(0);
> +        }
> +
> +        av_frame_unref(filtered_frame);
> +    }
> +
> +    return 0;
> +}
> +
>  static void print_final_stats(int64_t total_size)
>  {
>      uint64_t video_size = 0, audio_size = 0, extra_size = 0, other_size = 0;
> @@ -2175,7 +2279,15 @@ static int ifilter_send_frame(InputFilter *ifilter, AVFrame *frame)
>              }
>          }
>
> +#if HAVE_THREADS
> +        if (!abr_pipeline) {
> +            ret = reap_filters(1);
> +        } else {
> +            ret = pipeline_reap_filters(1, ifilter);
> +        }
> +#else
>          ret = reap_filters(1);
> +#endif
>          if (ret < 0 && ret != AVERROR_EOF) {
>              av_log(NULL, AV_LOG_ERROR, "Error while filtering: %s\n", av_err2str(ret));
>              return ret;
> @@ -2204,6 +2316,16 @@ static int ifilter_send_eof(InputFilter *ifilter, int64_t pts)
>
>      ifilter->eof = 1;
>
> +#if HAVE_THREADS
> +    if (abr_pipeline) {
> +        ifilter->waited_frm = NULL;
> +        pthread_mutex_lock(&ifilter->process_mutex);
> +        ifilter->t_end = 1;
> +        pthread_cond_signal(&ifilter->process_cond);
> +        pthread_mutex_unlock(&ifilter->process_mutex);
> +        pthread_join(ifilter->f_thread, NULL);
> +    }
> +#endif
>      if (ifilter->filter) {
>          ret = av_buffersrc_close(ifilter->filter, pts, AV_BUFFERSRC_FLAG_PUSH);
>          if (ret < 0)
> @@ -2248,6 +2370,41 @@ static int decode(AVCodecContext *avctx, AVFrame *frame, int *got_frame, AVPacke
>      return 0;
>  }
>
> +#if HAVE_THREADS
> +static void *filter_pipeline(void *arg)
> +{
> +    InputFilter *fl = arg;
> +    AVFrame *frm;
> +    int ret;
> +    while(1) {
> +        pthread_mutex_lock(&fl->process_mutex);
> +        while (fl->waited_frm == NULL && !fl->t_end)
> +            pthread_cond_wait(&fl->process_cond, &fl->process_mutex);
> +        pthread_mutex_unlock(&fl->process_mutex);
> +
> +        if (fl->t_end) break;
> +
> +        frm = fl->waited_frm;
> +        ret = ifilter_send_frame(fl, frm);
> +        if (ret < 0) {
> +            av_log(NULL, AV_LOG_ERROR,
> +                   "Failed to inject frame into filter network: %s\n", av_err2str(ret));
> +        } else
> +            ret = pipeline_reap_filters(0, fl);
> +
> +        fl->t_error = ret;
> +
> +        pthread_mutex_lock(&fl->finish_mutex);
> +        fl->waited_frm = NULL;
> +        pthread_cond_signal(&fl->finish_cond);
> +        pthread_mutex_unlock(&fl->finish_mutex);
> +
> +        if (ret < 0)
> +            break;
> +    }
> +    return;
> +}
> +#endif
>  static int send_frame_to_filters(InputStream *ist, AVFrame *decoded_frame)
>  {
>      int i, ret;
> @@ -2255,22 +2412,72 @@ static int send_frame_to_filters(InputStream *ist, AVFrame *decoded_frame)
>
>      av_assert1(ist->nb_filters > 0); /* ensure ret is initialized */
>      for (i = 0; i < ist->nb_filters; i++) {
> -        if (i < ist->nb_filters - 1) {
> -            f = ist->filter_frame;
> -            ret = av_frame_ref(f, decoded_frame);
> -            if (ret < 0)
> +#if HAVE_THREADS
> +        if (!abr_pipeline) {
> +#endif
> +            if (i < ist->nb_filters - 1) {
> +                f = ist->filter_frame;
> +                ret = av_frame_ref(f, decoded_frame);
> +                if (ret < 0)
> +                    break;
> +            } else
> +                f = decoded_frame;
> +
> +                ret = ifilter_send_frame(ist->filters[i], f);
> +                if (ret == AVERROR_EOF)
> +                    ret = 0; /* ignore */
> +                if (ret < 0) {
> +                    av_log(NULL, AV_LOG_ERROR,
> +                           "Failed to inject frame into filter network: %s\n", av_err2str(ret));
> +                    break;
> +                }
> +#if HAVE_THREADS
> +        } else {
> +            if (i < ist->nb_filters - 1) {
> +                f = &ist->filters[i]->input_frm;
> +                ret = av_frame_ref(f, decoded_frame);
> +                if (ret < 0)
> +                    break;
> +            } else
> +                f = decoded_frame;
> +
> +            if(ist->filters[i]->f_thread == 0) {
> +                if ((ret = pthread_create(&ist->filters[i]->f_thread, NULL, filter_pipeline, ist->filters[i]))) {
> +                    av_log(NULL, AV_LOG_ERROR, "pthread_create failed: %s. Try to increase `ulimit -v` or decrease `ulimit -s`.\n", strerror(ret));
> +                    return AVERROR(ret);
> +                }
> +                pthread_mutex_init(&ist->filters[i]->process_mutex, NULL);
> +                pthread_mutex_init(&ist->filters[i]->finish_mutex, NULL);
> +                pthread_cond_init(&ist->filters[i]->process_cond, NULL);
> +                pthread_cond_init(&ist->filters[i]->finish_cond, NULL);
> +                ist->filters[i]->t_end = 0;
> +                ist->filters[i]->t_error = 0;
> +            }
> +
> +            pthread_mutex_lock(&ist->filters[i]->process_mutex);
> +            ist->filters[i]->waited_frm = f;
> +            pthread_cond_signal(&ist->filters[i]->process_cond);
> +            pthread_mutex_unlock(&ist->filters[i]->process_mutex);
> +        }
> +#endif
> +    }
> +#if HAVE_THREADS
> +    if (abr_pipeline) {
> +        for (i = 0; i < ist->nb_filters; i++) {
> +            pthread_mutex_lock(&ist->filters[i]->finish_mutex);
> +            while(ist->filters[i]->waited_frm != NULL)
> +                pthread_cond_wait(&ist->filters[i]->finish_cond, &ist->filters[i]->finish_mutex);
> +            pthread_mutex_unlock(&ist->filters[i]->finish_mutex);
> +        }
> +        for (i = 0; i < ist->nb_filters; i++) {
> +            if (ist->filters[i]->t_error < 0) {
> +                ret = ist->filters[i]->t_error;
>                  break;
> -        } else
> -            f = decoded_frame;
> -        ret = ifilter_send_frame(ist->filters[i], f);
> -        if (ret == AVERROR_EOF)
> -            ret = 0; /* ignore */
> -        if (ret < 0) {
> -            av_log(NULL, AV_LOG_ERROR,
> -                   "Failed to inject frame into filter network: %s\n", av_err2str(ret));
> -            break;
> +            }
>          }
>      }
> +#endif
> +
>      return ret;
>  }
>
> @@ -4635,6 +4842,9 @@ static int transcode_step(void)
>      if (ret < 0)
>          return ret == AVERROR_EOF ? 0 : ret;
>
> +#if HAVE_THREADS
> +    if (abr_pipeline) return 0;
> +#endif
>      return reap_filters(0);
>  }
>
> diff --git a/fftools/ffmpeg.h b/fftools/ffmpeg.h
> index eb1eaf6..436e428 100644
> --- a/fftools/ffmpeg.h
> +++ b/fftools/ffmpeg.h
> @@ -253,6 +253,17 @@ typedef struct InputFilter {
>
>      AVBufferRef *hw_frames_ctx;
>
> +    // for abr pipeline
> +    AVFrame *waited_frm;
> +    AVFrame input_frm;
> +    pthread_t f_thread;
> +    pthread_cond_t process_cond;
> +    pthread_cond_t finish_cond;
> +    pthread_mutex_t process_mutex;
> +    pthread_mutex_t finish_mutex;
> +    int t_end;
> +    int t_error;
> +
>      int eof;
>  } InputFilter;
>
> @@ -606,6 +617,7 @@ extern int frame_bits_per_raw_sample;
>  extern AVIOContext *progress_avio;
>  extern float max_error_rate;
>  extern char *videotoolbox_pixfmt;
> +extern int abr_pipeline;
>
>  extern int filter_nbthreads;
>  extern int filter_complex_nbthreads;
> diff --git a/fftools/ffmpeg_filter.c b/fftools/ffmpeg_filter.c
> index 6518d50..0323b10 100644
> --- a/fftools/ffmpeg_filter.c
> +++ b/fftools/ffmpeg_filter.c
> @@ -197,6 +197,7 @@ DEF_CHOOSE_FORMAT(channel_layouts, uint64_t, channel_layout, channel_layouts, 0,
>  int init_simple_filtergraph(InputStream *ist, OutputStream *ost)
>  {
>      FilterGraph *fg = av_mallocz(sizeof(*fg));
> +    int i;
>
>      if (!fg)
>          exit_program(1);
> @@ -225,6 +226,11 @@ int init_simple_filtergraph(InputStream *ist, OutputStream *ost)
>      GROW_ARRAY(ist->filters, ist->nb_filters);
>      ist->filters[ist->nb_filters - 1] = fg->inputs[0];
>
> +    if (abr_pipeline) {
> +        for (i = 0; i < ist->nb_filters; i++) {
> +            ist->filters[i]->f_thread = 0;
> +        }
> +    }
>      GROW_ARRAY(filtergraphs, nb_filtergraphs);
>      filtergraphs[nb_filtergraphs - 1] = fg;
>
> diff --git a/fftools/ffmpeg_opt.c b/fftools/ffmpeg_opt.c
> index d4851a2..fa5a556 100644
> --- a/fftools/ffmpeg_opt.c
> +++ b/fftools/ffmpeg_opt.c
> @@ -110,6 +110,7 @@ float max_error_rate  = 2.0/3;
>  int filter_nbthreads = 0;
>  int filter_complex_nbthreads = 0;
>  int vstats_version = 2;
> +int abr_pipeline      = 0;
>
>
>  static int intra_only         = 0;
> @@ -3502,7 +3503,10 @@ const OptionDef options[] = {
>          "set the maximum number of queued packets from the demuxer" },
>      { "find_stream_info", OPT_BOOL | OPT_PERFILE | OPT_INPUT | OPT_EXPERT, { &find_stream_info },
>          "read and decode the streams to fill missing information with heuristics" },
> -
> +#if HAVE_THREADS
> +    { "abr_pipeline",    OPT_BOOL,                                    { &abr_pipeline },
> +        "adaptive bitrate pipeline (1 decode to N filter graphs, and 1 to N transcode" },
> +#endif
>      /* video options */
>      { "vframes",      OPT_VIDEO | HAS_ARG  | OPT_PERFILE | OPT_OUTPUT,           { .func_arg = opt_video_frames },
>          "set the number of video frames to output", "number" },
> --
> 1.7.1
>
> _______________________________________________
> ffmpeg-devel mailing list
> ffmpeg-devel@ffmpeg.org
> http://ffmpeg.org/mailman/listinfo/ffmpeg-devel

LGTM, and tested ok, but i cannot sure this is good to other guys.
Michael Niedermayer Nov. 12, 2018, 6:28 p.m. UTC | #2
On Sun, Nov 11, 2018 at 07:07:18PM +0800, Jun Zhao wrote:
> Improved the performance of 1 decode + N filter graphs and adaptive
> bitrate scenario.
> 
> With new option "-abr_pipeline"
> 1. It enabled multiple filter graph concurrency, which bring above
> about 5%~20% improvement in some 1:N scenario by CPU or GPU
> acceleration
> 2. Next step will continue to improve the concurrency of complex
> filter graph which can support high efficiency of filter net
> 
> Below are some test cases and test result as reference.
> (Hardware platform: Intel(R) Core(TM) i7-6700 CPU @ 3.40GHz)
> (Software: Intel iHD driver - 16.9.00100, CentOS 7)
> 
> Command for Intel GPU acceleration case, 1 decode to N scaling:
> ffmpeg -vaapi_device /dev/dri/renderD128 -hwaccel vaapi \
>     -hwaccel_output_format vaapi \
>     -i ~/Videos/1920x1080p_30.00_x264_qp28.h264 \
>     -vf "scale_vaapi=1280:720:format=nv12,hwdownload" \
>     -pix_fmt nv12 -f null /dev/null \
>     -vf "scale_vaapi=720:480:format=nv12,hwdownload" \
>     -pix_fmt nv12 -f null /dev/null \
>     -abr_pipeline
> 
>     test results:
>                 2 scale
>     Improved       ~34%
> 
> Command for CPU only 1 decode to N scaling:
> ffmpeg -i ~/Videos/1920x1080p_30.00_x264_qp28.h264 \
>     -vf "scale=1280:720" -pix_fmt nv12 -f null /dev/null \
>     -vf "scale=720:480" -pix_fmt nv12 -f null /dev/null \
>     -abr_pipeline
> 
>     test results:
>                 2 scale
>     Improved       ~25%
> 
> Command for 1:N transcode by GPU acceleration:
> ./ffmpeg -vaapi_device /dev/dri/renderD128 -hwaccel vaapi \
>     -hwaccel_output_format vaapi \
>     -i ~/Videos/1920x1080p_30.00_x264_qp28.h264 \
>     -vf "scale_vaapi=1280:720" -c:v h264_vaapi -f null /dev/null \
>     -vf "scale_vaapi=720:480" -c:v h264_vaapi -f null /dev/null \
>     -abr_pipeline
> 
>     test results:
>                 2 scale+enc
>     Improved      ~6.1%
> 
> Signed-off-by: Wang, Shaofei <shaofei.wang@intel.com>
> Signed-off-by: Jun Zhao <jun.zhao@intel.com>
> ---
>  fftools/ffmpeg.c        |  236 ++++++++++++++++++++++++++++++++++++++++++++---
>  fftools/ffmpeg.h        |   12 +++
>  fftools/ffmpeg_filter.c |    6 +
>  fftools/ffmpeg_opt.c    |    6 +-
>  4 files changed, 246 insertions(+), 14 deletions(-)

this breaks build for mingw64

CC	fftools/ffmpeg_filter.o
CC	fftools/ffmpeg.o
src/fftools/ffmpeg_filter.c: In function ‘init_simple_filtergraph’:
src/fftools/ffmpeg_filter.c:231:39: error: incompatible types when assigning to type ‘pthread_t’ from type ‘int’
             ist->filters[i]->f_thread = 0;
                                       ^
make: *** [fftools/ffmpeg_filter.o] Error 1
make: *** Waiting for unfinished jobs....
src/fftools/ffmpeg.c: In function ‘pipeline_reap_filters’:
src/fftools/ffmpeg.c:1534:5: warning: ISO C90 forbids mixed declarations and code [-Wdeclaration-after-statement]
     OutputStream *ost = output_streams[i];
     ^
src/fftools/ffmpeg.c: In function ‘do_streamcopy’:
src/fftools/ffmpeg.c:2177:5: warning: ‘av_copy_packet_side_data’ is deprecated (declared at src/libavcodec/avcodec.h:4423) [-Wdeprecated-declarations]
     av_copy_packet_side_data(&opkt, pkt);
     ^
src/fftools/ffmpeg.c: In function ‘filter_pipeline’:
src/fftools/ffmpeg.c:2405:5: warning: ‘return’ with no value, in function returning non-void [enabled by default]
     return;
     ^
src/fftools/ffmpeg.c: In function ‘send_frame_to_filters’:
src/fftools/ffmpeg.c:2444:42: error: invalid operands to binary == (have ‘pthread_t’ and ‘int’)
             if(ist->filters[i]->f_thread == 0) {
                                          ^
src/fftools/ffmpeg.c: In function ‘init_output_stream’:
src/fftools/ffmpeg.c:3747:9: warning: ‘avcodec_copy_context’ is deprecated (declared at src/libavcodec/avcodec.h:4195) [-Wdeprecated-declarations]
         ret = avcodec_copy_context(ost->st->codec, ost->enc_ctx);
         ^
src/fftools/ffmpeg.c:3747:9: warning: ‘codec’ is deprecated (declared at src/libavformat/avformat.h:878) [-Wdeprecated-declarations]
src/fftools/ffmpeg.c:3793:9: warning: ‘codec’ is deprecated (declared at src/libavformat/avformat.h:878) [-Wdeprecated-declarations]
         ost->st->codec->codec= ost->enc_ctx->codec;
         ^
src/fftools/ffmpeg.c: In function ‘check_keyboard_interaction’:
src/fftools/ffmpeg.c:4174:13: warning: ‘codec’ is deprecated (declared at src/libavformat/avformat.h:878) [-Wdeprecated-declarations]
             debug = input_streams[0]->st->codec->debug<<1;
             ^
src/fftools/ffmpeg.c:4197:13: warning: ‘codec’ is deprecated (declared at src/libavformat/avformat.h:878) [-Wdeprecated-declarations]
             input_streams[i]->st->codec->debug = debug;
             ^
make: *** [fftools/ffmpeg.o] Error 1

[...]
James Almer Nov. 12, 2018, 6:37 p.m. UTC | #3
On 11/12/2018 3:28 PM, Michael Niedermayer wrote:
> On Sun, Nov 11, 2018 at 07:07:18PM +0800, Jun Zhao wrote:
>> Improved the performance of 1 decode + N filter graphs and adaptive
>> bitrate scenario.
>>
>> With new option "-abr_pipeline"
>> 1. It enabled multiple filter graph concurrency, which bring above
>> about 5%~20% improvement in some 1:N scenario by CPU or GPU
>> acceleration
>> 2. Next step will continue to improve the concurrency of complex
>> filter graph which can support high efficiency of filter net
>>
>> Below are some test cases and test result as reference.
>> (Hardware platform: Intel(R) Core(TM) i7-6700 CPU @ 3.40GHz)
>> (Software: Intel iHD driver - 16.9.00100, CentOS 7)
>>
>> Command for Intel GPU acceleration case, 1 decode to N scaling:
>> ffmpeg -vaapi_device /dev/dri/renderD128 -hwaccel vaapi \
>>     -hwaccel_output_format vaapi \
>>     -i ~/Videos/1920x1080p_30.00_x264_qp28.h264 \
>>     -vf "scale_vaapi=1280:720:format=nv12,hwdownload" \
>>     -pix_fmt nv12 -f null /dev/null \
>>     -vf "scale_vaapi=720:480:format=nv12,hwdownload" \
>>     -pix_fmt nv12 -f null /dev/null \
>>     -abr_pipeline
>>
>>     test results:
>>                 2 scale
>>     Improved       ~34%
>>
>> Command for CPU only 1 decode to N scaling:
>> ffmpeg -i ~/Videos/1920x1080p_30.00_x264_qp28.h264 \
>>     -vf "scale=1280:720" -pix_fmt nv12 -f null /dev/null \
>>     -vf "scale=720:480" -pix_fmt nv12 -f null /dev/null \
>>     -abr_pipeline
>>
>>     test results:
>>                 2 scale
>>     Improved       ~25%
>>
>> Command for 1:N transcode by GPU acceleration:
>> ./ffmpeg -vaapi_device /dev/dri/renderD128 -hwaccel vaapi \
>>     -hwaccel_output_format vaapi \
>>     -i ~/Videos/1920x1080p_30.00_x264_qp28.h264 \
>>     -vf "scale_vaapi=1280:720" -c:v h264_vaapi -f null /dev/null \
>>     -vf "scale_vaapi=720:480" -c:v h264_vaapi -f null /dev/null \
>>     -abr_pipeline
>>
>>     test results:
>>                 2 scale+enc
>>     Improved      ~6.1%
>>
>> Signed-off-by: Wang, Shaofei <shaofei.wang@intel.com>
>> Signed-off-by: Jun Zhao <jun.zhao@intel.com>
>> ---
>>  fftools/ffmpeg.c        |  236 ++++++++++++++++++++++++++++++++++++++++++++---
>>  fftools/ffmpeg.h        |   12 +++
>>  fftools/ffmpeg_filter.c |    6 +
>>  fftools/ffmpeg_opt.c    |    6 +-
>>  4 files changed, 246 insertions(+), 14 deletions(-)
> 
> this breaks build for mingw64
> 
> CC	fftools/ffmpeg_filter.o
> CC	fftools/ffmpeg.o
> src/fftools/ffmpeg_filter.c: In function ‘init_simple_filtergraph’:
> src/fftools/ffmpeg_filter.c:231:39: error: incompatible types when assigning to type ‘pthread_t’ from type ‘int’
>              ist->filters[i]->f_thread = 0;
>                                        ^
> make: *** [fftools/ffmpeg_filter.o] Error 1
> make: *** Waiting for unfinished jobs....
> src/fftools/ffmpeg.c: In function ‘pipeline_reap_filters’:
> src/fftools/ffmpeg.c:1534:5: warning: ISO C90 forbids mixed declarations and code [-Wdeclaration-after-statement]
>      OutputStream *ost = output_streams[i];
>      ^
> src/fftools/ffmpeg.c: In function ‘do_streamcopy’:
> src/fftools/ffmpeg.c:2177:5: warning: ‘av_copy_packet_side_data’ is deprecated (declared at src/libavcodec/avcodec.h:4423) [-Wdeprecated-declarations]
>      av_copy_packet_side_data(&opkt, pkt);
>      ^
> src/fftools/ffmpeg.c: In function ‘filter_pipeline’:
> src/fftools/ffmpeg.c:2405:5: warning: ‘return’ with no value, in function returning non-void [enabled by default]
>      return;
>      ^
> src/fftools/ffmpeg.c: In function ‘send_frame_to_filters’:
> src/fftools/ffmpeg.c:2444:42: error: invalid operands to binary == (have ‘pthread_t’ and ‘int’)
>              if(ist->filters[i]->f_thread == 0) {

How old is your mingw64 toolchain? It should be using the w32threads
wrapper or some library like winpthreads, but it's not.

In any case, the issue is a missing #if HAVE_THREADS check for the newly
added ffmpeg.h fields.
Michael Niedermayer Nov. 12, 2018, 7:05 p.m. UTC | #4
On Mon, Nov 12, 2018 at 03:37:05PM -0300, James Almer wrote:
> On 11/12/2018 3:28 PM, Michael Niedermayer wrote:
> > On Sun, Nov 11, 2018 at 07:07:18PM +0800, Jun Zhao wrote:
> >> Improved the performance of 1 decode + N filter graphs and adaptive
> >> bitrate scenario.
> >>
> >> With new option "-abr_pipeline"
> >> 1. It enabled multiple filter graph concurrency, which bring above
> >> about 5%~20% improvement in some 1:N scenario by CPU or GPU
> >> acceleration
> >> 2. Next step will continue to improve the concurrency of complex
> >> filter graph which can support high efficiency of filter net
> >>
> >> Below are some test cases and test result as reference.
> >> (Hardware platform: Intel(R) Core(TM) i7-6700 CPU @ 3.40GHz)
> >> (Software: Intel iHD driver - 16.9.00100, CentOS 7)
> >>
> >> Command for Intel GPU acceleration case, 1 decode to N scaling:
> >> ffmpeg -vaapi_device /dev/dri/renderD128 -hwaccel vaapi \
> >>     -hwaccel_output_format vaapi \
> >>     -i ~/Videos/1920x1080p_30.00_x264_qp28.h264 \
> >>     -vf "scale_vaapi=1280:720:format=nv12,hwdownload" \
> >>     -pix_fmt nv12 -f null /dev/null \
> >>     -vf "scale_vaapi=720:480:format=nv12,hwdownload" \
> >>     -pix_fmt nv12 -f null /dev/null \
> >>     -abr_pipeline
> >>
> >>     test results:
> >>                 2 scale
> >>     Improved       ~34%
> >>
> >> Command for CPU only 1 decode to N scaling:
> >> ffmpeg -i ~/Videos/1920x1080p_30.00_x264_qp28.h264 \
> >>     -vf "scale=1280:720" -pix_fmt nv12 -f null /dev/null \
> >>     -vf "scale=720:480" -pix_fmt nv12 -f null /dev/null \
> >>     -abr_pipeline
> >>
> >>     test results:
> >>                 2 scale
> >>     Improved       ~25%
> >>
> >> Command for 1:N transcode by GPU acceleration:
> >> ./ffmpeg -vaapi_device /dev/dri/renderD128 -hwaccel vaapi \
> >>     -hwaccel_output_format vaapi \
> >>     -i ~/Videos/1920x1080p_30.00_x264_qp28.h264 \
> >>     -vf "scale_vaapi=1280:720" -c:v h264_vaapi -f null /dev/null \
> >>     -vf "scale_vaapi=720:480" -c:v h264_vaapi -f null /dev/null \
> >>     -abr_pipeline
> >>
> >>     test results:
> >>                 2 scale+enc
> >>     Improved      ~6.1%
> >>
> >> Signed-off-by: Wang, Shaofei <shaofei.wang@intel.com>
> >> Signed-off-by: Jun Zhao <jun.zhao@intel.com>
> >> ---
> >>  fftools/ffmpeg.c        |  236 ++++++++++++++++++++++++++++++++++++++++++++---
> >>  fftools/ffmpeg.h        |   12 +++
> >>  fftools/ffmpeg_filter.c |    6 +
> >>  fftools/ffmpeg_opt.c    |    6 +-
> >>  4 files changed, 246 insertions(+), 14 deletions(-)
> > 
> > this breaks build for mingw64
> > 
> > CC	fftools/ffmpeg_filter.o
> > CC	fftools/ffmpeg.o
> > src/fftools/ffmpeg_filter.c: In function ‘init_simple_filtergraph’:
> > src/fftools/ffmpeg_filter.c:231:39: error: incompatible types when assigning to type ‘pthread_t’ from type ‘int’
> >              ist->filters[i]->f_thread = 0;
> >                                        ^
> > make: *** [fftools/ffmpeg_filter.o] Error 1
> > make: *** Waiting for unfinished jobs....
> > src/fftools/ffmpeg.c: In function ‘pipeline_reap_filters’:
> > src/fftools/ffmpeg.c:1534:5: warning: ISO C90 forbids mixed declarations and code [-Wdeclaration-after-statement]
> >      OutputStream *ost = output_streams[i];
> >      ^
> > src/fftools/ffmpeg.c: In function ‘do_streamcopy’:
> > src/fftools/ffmpeg.c:2177:5: warning: ‘av_copy_packet_side_data’ is deprecated (declared at src/libavcodec/avcodec.h:4423) [-Wdeprecated-declarations]
> >      av_copy_packet_side_data(&opkt, pkt);
> >      ^
> > src/fftools/ffmpeg.c: In function ‘filter_pipeline’:
> > src/fftools/ffmpeg.c:2405:5: warning: ‘return’ with no value, in function returning non-void [enabled by default]
> >      return;
> >      ^
> > src/fftools/ffmpeg.c: In function ‘send_frame_to_filters’:
> > src/fftools/ffmpeg.c:2444:42: error: invalid operands to binary == (have ‘pthread_t’ and ‘int’)
> >              if(ist->filters[i]->f_thread == 0) {
> 
> How old is your mingw64 toolchain? It should be using the w32threads
> wrapper or some library like winpthreads, but it's not.

IIRC its the mingw that came with the distribution which is still ubuntu 14.04
It will get upgraded when i update ubuntu on that box. 

[...]
James Almer Nov. 12, 2018, 7:15 p.m. UTC | #5
On 11/12/2018 4:05 PM, Michael Niedermayer wrote:
> On Mon, Nov 12, 2018 at 03:37:05PM -0300, James Almer wrote:
>> On 11/12/2018 3:28 PM, Michael Niedermayer wrote:
>>> On Sun, Nov 11, 2018 at 07:07:18PM +0800, Jun Zhao wrote:
>>>> Improved the performance of 1 decode + N filter graphs and adaptive
>>>> bitrate scenario.
>>>>
>>>> With new option "-abr_pipeline"
>>>> 1. It enabled multiple filter graph concurrency, which bring above
>>>> about 5%~20% improvement in some 1:N scenario by CPU or GPU
>>>> acceleration
>>>> 2. Next step will continue to improve the concurrency of complex
>>>> filter graph which can support high efficiency of filter net
>>>>
>>>> Below are some test cases and test result as reference.
>>>> (Hardware platform: Intel(R) Core(TM) i7-6700 CPU @ 3.40GHz)
>>>> (Software: Intel iHD driver - 16.9.00100, CentOS 7)
>>>>
>>>> Command for Intel GPU acceleration case, 1 decode to N scaling:
>>>> ffmpeg -vaapi_device /dev/dri/renderD128 -hwaccel vaapi \
>>>>     -hwaccel_output_format vaapi \
>>>>     -i ~/Videos/1920x1080p_30.00_x264_qp28.h264 \
>>>>     -vf "scale_vaapi=1280:720:format=nv12,hwdownload" \
>>>>     -pix_fmt nv12 -f null /dev/null \
>>>>     -vf "scale_vaapi=720:480:format=nv12,hwdownload" \
>>>>     -pix_fmt nv12 -f null /dev/null \
>>>>     -abr_pipeline
>>>>
>>>>     test results:
>>>>                 2 scale
>>>>     Improved       ~34%
>>>>
>>>> Command for CPU only 1 decode to N scaling:
>>>> ffmpeg -i ~/Videos/1920x1080p_30.00_x264_qp28.h264 \
>>>>     -vf "scale=1280:720" -pix_fmt nv12 -f null /dev/null \
>>>>     -vf "scale=720:480" -pix_fmt nv12 -f null /dev/null \
>>>>     -abr_pipeline
>>>>
>>>>     test results:
>>>>                 2 scale
>>>>     Improved       ~25%
>>>>
>>>> Command for 1:N transcode by GPU acceleration:
>>>> ./ffmpeg -vaapi_device /dev/dri/renderD128 -hwaccel vaapi \
>>>>     -hwaccel_output_format vaapi \
>>>>     -i ~/Videos/1920x1080p_30.00_x264_qp28.h264 \
>>>>     -vf "scale_vaapi=1280:720" -c:v h264_vaapi -f null /dev/null \
>>>>     -vf "scale_vaapi=720:480" -c:v h264_vaapi -f null /dev/null \
>>>>     -abr_pipeline
>>>>
>>>>     test results:
>>>>                 2 scale+enc
>>>>     Improved      ~6.1%
>>>>
>>>> Signed-off-by: Wang, Shaofei <shaofei.wang@intel.com>
>>>> Signed-off-by: Jun Zhao <jun.zhao@intel.com>
>>>> ---
>>>>  fftools/ffmpeg.c        |  236 ++++++++++++++++++++++++++++++++++++++++++++---
>>>>  fftools/ffmpeg.h        |   12 +++
>>>>  fftools/ffmpeg_filter.c |    6 +
>>>>  fftools/ffmpeg_opt.c    |    6 +-
>>>>  4 files changed, 246 insertions(+), 14 deletions(-)
>>>
>>> this breaks build for mingw64
>>>
>>> CC	fftools/ffmpeg_filter.o
>>> CC	fftools/ffmpeg.o
>>> src/fftools/ffmpeg_filter.c: In function ‘init_simple_filtergraph’:
>>> src/fftools/ffmpeg_filter.c:231:39: error: incompatible types when assigning to type ‘pthread_t’ from type ‘int’
>>>              ist->filters[i]->f_thread = 0;
>>>                                        ^
>>> make: *** [fftools/ffmpeg_filter.o] Error 1
>>> make: *** Waiting for unfinished jobs....
>>> src/fftools/ffmpeg.c: In function ‘pipeline_reap_filters’:
>>> src/fftools/ffmpeg.c:1534:5: warning: ISO C90 forbids mixed declarations and code [-Wdeclaration-after-statement]
>>>      OutputStream *ost = output_streams[i];
>>>      ^
>>> src/fftools/ffmpeg.c: In function ‘do_streamcopy’:
>>> src/fftools/ffmpeg.c:2177:5: warning: ‘av_copy_packet_side_data’ is deprecated (declared at src/libavcodec/avcodec.h:4423) [-Wdeprecated-declarations]
>>>      av_copy_packet_side_data(&opkt, pkt);
>>>      ^
>>> src/fftools/ffmpeg.c: In function ‘filter_pipeline’:
>>> src/fftools/ffmpeg.c:2405:5: warning: ‘return’ with no value, in function returning non-void [enabled by default]
>>>      return;
>>>      ^
>>> src/fftools/ffmpeg.c: In function ‘send_frame_to_filters’:
>>> src/fftools/ffmpeg.c:2444:42: error: invalid operands to binary == (have ‘pthread_t’ and ‘int’)
>>>              if(ist->filters[i]->f_thread == 0) {
>>
>> How old is your mingw64 toolchain? It should be using the w32threads
>> wrapper or some library like winpthreads, but it's not.
> 
> IIRC its the mingw that came with the distribution which is still ubuntu 14.04
> It will get upgraded when i update ubuntu on that box. 

Yeah, a build that old is probably missing CONDITION_VARIABLE and/or
INIT_ONCE, which are needed by the wrapper.
diff mbox

Patch

diff --git a/fftools/ffmpeg.c b/fftools/ffmpeg.c
index 38c21e9..5dc80fd 100644
--- a/fftools/ffmpeg.c
+++ b/fftools/ffmpeg.c
@@ -1523,6 +1523,110 @@  static int reap_filters(int flush)
     return 0;
 }
 
+static int pipeline_reap_filters(int flush, InputFilter * ifilter)
+{
+    AVFrame *filtered_frame = NULL;
+    int i;
+
+    for (i = 0; i < nb_output_streams; i++) {
+        if (ifilter == output_streams[i]->filter->graph->inputs[0]) break;
+    }
+    OutputStream *ost = output_streams[i];
+    OutputFile    *of = output_files[ost->file_index];
+    AVFilterContext *filter;
+    AVCodecContext *enc = ost->enc_ctx;
+    int ret = 0;
+
+    if (!ost->filter || !ost->filter->graph->graph)
+        return 0;
+    filter = ost->filter->filter;
+
+    if (!ost->initialized) {
+        char error[1024] = "";
+        ret = init_output_stream(ost, error, sizeof(error));
+        if (ret < 0) {
+            av_log(NULL, AV_LOG_ERROR, "Error initializing output stream %d:%d -- %s\n",
+                   ost->file_index, ost->index, error);
+            exit_program(1);
+        }
+    }
+
+    if (!ost->filtered_frame && !(ost->filtered_frame = av_frame_alloc())) {
+        return AVERROR(ENOMEM);
+    }
+    filtered_frame = ost->filtered_frame;
+
+    while (1) {
+        double float_pts = AV_NOPTS_VALUE; // this is identical to filtered_frame.pts but with higher precision
+        ret = av_buffersink_get_frame_flags(filter, filtered_frame,
+                                           AV_BUFFERSINK_FLAG_NO_REQUEST);
+        if (ret < 0) {
+            if (ret != AVERROR(EAGAIN) && ret != AVERROR_EOF) {
+                av_log(NULL, AV_LOG_WARNING,
+                       "Error in av_buffersink_get_frame_flags(): %s\n", av_err2str(ret));
+            } else if (flush && ret == AVERROR_EOF) {
+                if (av_buffersink_get_type(filter) == AVMEDIA_TYPE_VIDEO)
+                    do_video_out(of, ost, NULL, AV_NOPTS_VALUE);
+            }
+            break;
+        }
+        if (ost->finished) {
+            av_frame_unref(filtered_frame);
+            continue;
+        }
+        if (filtered_frame->pts != AV_NOPTS_VALUE) {
+            int64_t start_time = (of->start_time == AV_NOPTS_VALUE) ? 0 : of->start_time;
+            AVRational filter_tb = av_buffersink_get_time_base(filter);
+            AVRational tb = enc->time_base;
+            int extra_bits = av_clip(29 - av_log2(tb.den), 0, 16);
+
+            tb.den <<= extra_bits;
+            float_pts =
+                av_rescale_q(filtered_frame->pts, filter_tb, tb) -
+                av_rescale_q(start_time, AV_TIME_BASE_Q, tb);
+            float_pts /= 1 << extra_bits;
+            // avoid exact midoints to reduce the chance of rounding differences, this can be removed in case the fps code is changed to work with integers
+            float_pts += FFSIGN(float_pts) * 1.0 / (1<<17);
+
+            filtered_frame->pts =
+                av_rescale_q(filtered_frame->pts, filter_tb, enc->time_base) -
+                av_rescale_q(start_time, AV_TIME_BASE_Q, enc->time_base);
+        }
+
+        switch (av_buffersink_get_type(filter)) {
+        case AVMEDIA_TYPE_VIDEO:
+            if (!ost->frame_aspect_ratio.num)
+                enc->sample_aspect_ratio = filtered_frame->sample_aspect_ratio;
+
+            if (debug_ts) {
+                av_log(NULL, AV_LOG_INFO, "filter -> pts:%s pts_time:%s exact:%f time_base:%d/%d\n",
+                        av_ts2str(filtered_frame->pts), av_ts2timestr(filtered_frame->pts, &enc->time_base),
+                        float_pts,
+                        enc->time_base.num, enc->time_base.den);
+            }
+
+            do_video_out(of, ost, filtered_frame, float_pts);
+            break;
+        case AVMEDIA_TYPE_AUDIO:
+            if (!(enc->codec->capabilities & AV_CODEC_CAP_PARAM_CHANGE) &&
+                enc->channels != filtered_frame->channels) {
+                av_log(NULL, AV_LOG_ERROR,
+                       "Audio filter graph output is not normalized and encoder does not support parameter changes\n");
+                break;
+            }
+            do_audio_out(of, ost, filtered_frame);
+            break;
+        default:
+            // TODO support subtitle filters
+            av_assert0(0);
+        }
+
+        av_frame_unref(filtered_frame);
+    }
+
+    return 0;
+}
+
 static void print_final_stats(int64_t total_size)
 {
     uint64_t video_size = 0, audio_size = 0, extra_size = 0, other_size = 0;
@@ -2175,7 +2279,15 @@  static int ifilter_send_frame(InputFilter *ifilter, AVFrame *frame)
             }
         }
 
+#if HAVE_THREADS
+        if (!abr_pipeline) {
+            ret = reap_filters(1);
+        } else {
+            ret = pipeline_reap_filters(1, ifilter);
+        }
+#else
         ret = reap_filters(1);
+#endif
         if (ret < 0 && ret != AVERROR_EOF) {
             av_log(NULL, AV_LOG_ERROR, "Error while filtering: %s\n", av_err2str(ret));
             return ret;
@@ -2204,6 +2316,16 @@  static int ifilter_send_eof(InputFilter *ifilter, int64_t pts)
 
     ifilter->eof = 1;
 
+#if HAVE_THREADS
+    if (abr_pipeline) {
+        ifilter->waited_frm = NULL;
+        pthread_mutex_lock(&ifilter->process_mutex);
+        ifilter->t_end = 1;
+        pthread_cond_signal(&ifilter->process_cond);
+        pthread_mutex_unlock(&ifilter->process_mutex);
+        pthread_join(ifilter->f_thread, NULL);
+    }
+#endif
     if (ifilter->filter) {
         ret = av_buffersrc_close(ifilter->filter, pts, AV_BUFFERSRC_FLAG_PUSH);
         if (ret < 0)
@@ -2248,6 +2370,41 @@  static int decode(AVCodecContext *avctx, AVFrame *frame, int *got_frame, AVPacke
     return 0;
 }
 
+#if HAVE_THREADS
+static void *filter_pipeline(void *arg)
+{
+    InputFilter *fl = arg;
+    AVFrame *frm;
+    int ret;
+    while(1) {
+        pthread_mutex_lock(&fl->process_mutex);
+        while (fl->waited_frm == NULL && !fl->t_end)
+            pthread_cond_wait(&fl->process_cond, &fl->process_mutex);
+        pthread_mutex_unlock(&fl->process_mutex);
+
+        if (fl->t_end) break;
+
+        frm = fl->waited_frm;
+        ret = ifilter_send_frame(fl, frm);
+        if (ret < 0) {
+            av_log(NULL, AV_LOG_ERROR,
+                   "Failed to inject frame into filter network: %s\n", av_err2str(ret));
+        } else
+            ret = pipeline_reap_filters(0, fl);
+
+        fl->t_error = ret;
+
+        pthread_mutex_lock(&fl->finish_mutex);
+        fl->waited_frm = NULL;
+        pthread_cond_signal(&fl->finish_cond);
+        pthread_mutex_unlock(&fl->finish_mutex);
+
+        if (ret < 0)
+            break;
+    }
+    return;
+}
+#endif
 static int send_frame_to_filters(InputStream *ist, AVFrame *decoded_frame)
 {
     int i, ret;
@@ -2255,22 +2412,72 @@  static int send_frame_to_filters(InputStream *ist, AVFrame *decoded_frame)
 
     av_assert1(ist->nb_filters > 0); /* ensure ret is initialized */
     for (i = 0; i < ist->nb_filters; i++) {
-        if (i < ist->nb_filters - 1) {
-            f = ist->filter_frame;
-            ret = av_frame_ref(f, decoded_frame);
-            if (ret < 0)
+#if HAVE_THREADS
+        if (!abr_pipeline) {
+#endif
+            if (i < ist->nb_filters - 1) {
+                f = ist->filter_frame;
+                ret = av_frame_ref(f, decoded_frame);
+                if (ret < 0)
+                    break;
+            } else
+                f = decoded_frame;
+
+                ret = ifilter_send_frame(ist->filters[i], f);
+                if (ret == AVERROR_EOF)
+                    ret = 0; /* ignore */
+                if (ret < 0) {
+                    av_log(NULL, AV_LOG_ERROR,
+                           "Failed to inject frame into filter network: %s\n", av_err2str(ret));
+                    break;
+                }
+#if HAVE_THREADS
+        } else {
+            if (i < ist->nb_filters - 1) {
+                f = &ist->filters[i]->input_frm;
+                ret = av_frame_ref(f, decoded_frame);
+                if (ret < 0)
+                    break;
+            } else
+                f = decoded_frame;
+
+            if(ist->filters[i]->f_thread == 0) {
+                if ((ret = pthread_create(&ist->filters[i]->f_thread, NULL, filter_pipeline, ist->filters[i]))) {
+                    av_log(NULL, AV_LOG_ERROR, "pthread_create failed: %s. Try to increase `ulimit -v` or decrease `ulimit -s`.\n", strerror(ret));
+                    return AVERROR(ret);
+                }
+                pthread_mutex_init(&ist->filters[i]->process_mutex, NULL);
+                pthread_mutex_init(&ist->filters[i]->finish_mutex, NULL);
+                pthread_cond_init(&ist->filters[i]->process_cond, NULL);
+                pthread_cond_init(&ist->filters[i]->finish_cond, NULL);
+                ist->filters[i]->t_end = 0;
+                ist->filters[i]->t_error = 0;
+            }
+
+            pthread_mutex_lock(&ist->filters[i]->process_mutex);
+            ist->filters[i]->waited_frm = f;
+            pthread_cond_signal(&ist->filters[i]->process_cond);
+            pthread_mutex_unlock(&ist->filters[i]->process_mutex);
+        }
+#endif
+    }
+#if HAVE_THREADS
+    if (abr_pipeline) {
+        for (i = 0; i < ist->nb_filters; i++) {
+            pthread_mutex_lock(&ist->filters[i]->finish_mutex);
+            while(ist->filters[i]->waited_frm != NULL)
+                pthread_cond_wait(&ist->filters[i]->finish_cond, &ist->filters[i]->finish_mutex);
+            pthread_mutex_unlock(&ist->filters[i]->finish_mutex);
+        }
+        for (i = 0; i < ist->nb_filters; i++) {
+            if (ist->filters[i]->t_error < 0) {
+                ret = ist->filters[i]->t_error;
                 break;
-        } else
-            f = decoded_frame;
-        ret = ifilter_send_frame(ist->filters[i], f);
-        if (ret == AVERROR_EOF)
-            ret = 0; /* ignore */
-        if (ret < 0) {
-            av_log(NULL, AV_LOG_ERROR,
-                   "Failed to inject frame into filter network: %s\n", av_err2str(ret));
-            break;
+            }
         }
     }
+#endif
+
     return ret;
 }
 
@@ -4635,6 +4842,9 @@  static int transcode_step(void)
     if (ret < 0)
         return ret == AVERROR_EOF ? 0 : ret;
 
+#if HAVE_THREADS
+    if (abr_pipeline) return 0;
+#endif
     return reap_filters(0);
 }
 
diff --git a/fftools/ffmpeg.h b/fftools/ffmpeg.h
index eb1eaf6..436e428 100644
--- a/fftools/ffmpeg.h
+++ b/fftools/ffmpeg.h
@@ -253,6 +253,17 @@  typedef struct InputFilter {
 
     AVBufferRef *hw_frames_ctx;
 
+    // for abr pipeline
+    AVFrame *waited_frm;
+    AVFrame input_frm;
+    pthread_t f_thread;
+    pthread_cond_t process_cond;
+    pthread_cond_t finish_cond;
+    pthread_mutex_t process_mutex;
+    pthread_mutex_t finish_mutex;
+    int t_end;
+    int t_error;
+
     int eof;
 } InputFilter;
 
@@ -606,6 +617,7 @@  extern int frame_bits_per_raw_sample;
 extern AVIOContext *progress_avio;
 extern float max_error_rate;
 extern char *videotoolbox_pixfmt;
+extern int abr_pipeline;
 
 extern int filter_nbthreads;
 extern int filter_complex_nbthreads;
diff --git a/fftools/ffmpeg_filter.c b/fftools/ffmpeg_filter.c
index 6518d50..0323b10 100644
--- a/fftools/ffmpeg_filter.c
+++ b/fftools/ffmpeg_filter.c
@@ -197,6 +197,7 @@  DEF_CHOOSE_FORMAT(channel_layouts, uint64_t, channel_layout, channel_layouts, 0,
 int init_simple_filtergraph(InputStream *ist, OutputStream *ost)
 {
     FilterGraph *fg = av_mallocz(sizeof(*fg));
+    int i;
 
     if (!fg)
         exit_program(1);
@@ -225,6 +226,11 @@  int init_simple_filtergraph(InputStream *ist, OutputStream *ost)
     GROW_ARRAY(ist->filters, ist->nb_filters);
     ist->filters[ist->nb_filters - 1] = fg->inputs[0];
 
+    if (abr_pipeline) {
+        for (i = 0; i < ist->nb_filters; i++) {
+            ist->filters[i]->f_thread = 0;
+        }
+    }
     GROW_ARRAY(filtergraphs, nb_filtergraphs);
     filtergraphs[nb_filtergraphs - 1] = fg;
 
diff --git a/fftools/ffmpeg_opt.c b/fftools/ffmpeg_opt.c
index d4851a2..fa5a556 100644
--- a/fftools/ffmpeg_opt.c
+++ b/fftools/ffmpeg_opt.c
@@ -110,6 +110,7 @@  float max_error_rate  = 2.0/3;
 int filter_nbthreads = 0;
 int filter_complex_nbthreads = 0;
 int vstats_version = 2;
+int abr_pipeline      = 0;
 
 
 static int intra_only         = 0;
@@ -3502,7 +3503,10 @@  const OptionDef options[] = {
         "set the maximum number of queued packets from the demuxer" },
     { "find_stream_info", OPT_BOOL | OPT_PERFILE | OPT_INPUT | OPT_EXPERT, { &find_stream_info },
         "read and decode the streams to fill missing information with heuristics" },
-
+#if HAVE_THREADS
+    { "abr_pipeline",    OPT_BOOL,                                    { &abr_pipeline },
+        "adaptive bitrate pipeline (1 decode to N filter graphs, and 1 to N transcode" },
+#endif
     /* video options */
     { "vframes",      OPT_VIDEO | HAS_ARG  | OPT_PERFILE | OPT_OUTPUT,           { .func_arg = opt_video_frames },
         "set the number of video frames to output", "number" },