Message ID | 20220920175021.60790-3-rcombs@rcombs.me |
---|---|
State | New |
Headers | show |
Series | [FFmpeg-devel,1/7] lavu/cpu: add av_cpu_job_count() | expand |
Context | Check | Description |
---|---|---|
yinshiyou/make_loongarch64 | success | Make finished |
yinshiyou/make_fate_loongarch64 | success | Make fate finished |
andriy/configure_x86 | warning | Failed to run configure |
On Tue, Sep 20, 2022 at 12:50:16PM -0500, rcombs wrote: > This allows for more efficient use of asymmetric-multiprocessing systems. > --- > libswscale/options.c | 2 ++ > libswscale/swscale_internal.h | 1 + > libswscale/utils.c | 9 ++++++--- > libswscale/version.h | 2 +- > 4 files changed, 10 insertions(+), 4 deletions(-) This chnages the output from ./ffmpeg -i ~/tickets/1984/00186002.avi -vframes 100 -qscale 1 -an -bitexact file-1984-ref.avi (i suspect its all yuv410p indeo videos not just this one) I presume thats unintended also the jobs number ideally, especially with bitexact should not change the output but maybe iam missing something thx [...]
On Wed, Sep 21, 2022 at 10:37:04AM +0200, Michael Niedermayer wrote: > On Tue, Sep 20, 2022 at 12:50:16PM -0500, rcombs wrote: > > This allows for more efficient use of asymmetric-multiprocessing systems. > > --- > > libswscale/options.c | 2 ++ > > libswscale/swscale_internal.h | 1 + > > libswscale/utils.c | 9 ++++++--- > > libswscale/version.h | 2 +- > > 4 files changed, 10 insertions(+), 4 deletions(-) > > This chnages the output from > ./ffmpeg -i ~/tickets/1984/00186002.avi -vframes 100 -qscale 1 -an -bitexact file-1984-ref.avi > (i suspect its all yuv410p indeo videos not just this one) > > I presume thats unintended > also the jobs number ideally, especially with bitexact should not change > the output > > but maybe iam missing something "same" issue appears with some dxtory, snow and 10bit h264 inputs [...]
On 9/20/2022 2:50 PM, rcombs wrote: > This allows for more efficient use of asymmetric-multiprocessing systems. > --- > libswscale/options.c | 2 ++ > libswscale/swscale_internal.h | 1 + > libswscale/utils.c | 9 ++++++--- > libswscale/version.h | 2 +- > 4 files changed, 10 insertions(+), 4 deletions(-) > > diff --git a/libswscale/options.c b/libswscale/options.c > index 4d41b835b1..5765daa100 100644 > --- a/libswscale/options.c > +++ b/libswscale/options.c > @@ -81,6 +81,8 @@ static const AVOption swscale_options[] = { > > { "threads", "number of threads", OFFSET(nb_threads), AV_OPT_TYPE_INT, {.i64 = 1 }, 0, INT_MAX, VE, "threads" }, > { "auto", NULL, 0, AV_OPT_TYPE_CONST, {.i64 = 0 }, .flags = VE, "threads" }, > + { "jobs", "number of jobs", OFFSET(nb_jobs), AV_OPT_TYPE_INT, {.i64 = 0 }, 0, INT_MAX, VE, "jobs" }, Default should probably be 1. > + { "auto", NULL, 0, AV_OPT_TYPE_CONST, {.i64 = 0 }, .flags = VE, "jobs" }, > > { NULL } > }; > diff --git a/libswscale/swscale_internal.h b/libswscale/swscale_internal.h > index abeebbb002..602082e12c 100644 > --- a/libswscale/swscale_internal.h > +++ b/libswscale/swscale_internal.h > @@ -339,6 +339,7 @@ typedef struct SwsContext { > int vChrDrop; ///< Binary logarithm of extra vertical subsampling factor in source image chroma planes specified by user. > int sliceDir; ///< Direction that slices are fed to the scaler (1 = top-to-bottom, -1 = bottom-to-top). > int nb_threads; ///< Number of threads used for scaling > + int nb_jobs; ///< Number of slice jobs used for scaling > double param[2]; ///< Input parameters for scaling algorithms that need them. > > AVFrame *frame_src; > diff --git a/libswscale/utils.c b/libswscale/utils.c > index 45baa22b23..c9ff9db957 100644 > --- a/libswscale/utils.c > +++ b/libswscale/utils.c > @@ -1277,18 +1277,21 @@ static int context_init_threaded(SwsContext *c, > ff_sws_slice_worker, NULL, c->nb_threads); > if (ret == AVERROR(ENOSYS)) { > c->nb_threads = 1; > + c->nb_jobs = 1; > return 0; > } else if (ret < 0) > return ret; > > c->nb_threads = ret; > + if (c->nb_jobs < 1) Can c->nb_jobs even be -1? The AVOption range above is 0..INT_MAX > + c->nb_jobs = av_cpu_job_count(); > > - c->slice_ctx = av_calloc(c->nb_threads, sizeof(*c->slice_ctx)); > - c->slice_err = av_calloc(c->nb_threads, sizeof(*c->slice_err)); > + c->slice_ctx = av_calloc(c->nb_jobs, sizeof(*c->slice_ctx)); > + c->slice_err = av_calloc(c->nb_jobs, sizeof(*c->slice_err)); > if (!c->slice_ctx || !c->slice_err) > return AVERROR(ENOMEM); > > - for (int i = 0; i < c->nb_threads; i++) { > + for (int i = 0; i < c->nb_jobs; i++) { > c->slice_ctx[i] = sws_alloc_context(); > if (!c->slice_ctx[i]) > return AVERROR(ENOMEM); > diff --git a/libswscale/version.h b/libswscale/version.h > index 9bb3b171a7..4529a2d7d4 100644 > --- a/libswscale/version.h > +++ b/libswscale/version.h > @@ -29,7 +29,7 @@ > #include "version_major.h" > > #define LIBSWSCALE_VERSION_MINOR 8 > -#define LIBSWSCALE_VERSION_MICRO 112 > +#define LIBSWSCALE_VERSION_MICRO 113 > > #define LIBSWSCALE_VERSION_INT AV_VERSION_INT(LIBSWSCALE_VERSION_MAJOR, \ > LIBSWSCALE_VERSION_MINOR, \
diff --git a/libswscale/options.c b/libswscale/options.c index 4d41b835b1..5765daa100 100644 --- a/libswscale/options.c +++ b/libswscale/options.c @@ -81,6 +81,8 @@ static const AVOption swscale_options[] = { { "threads", "number of threads", OFFSET(nb_threads), AV_OPT_TYPE_INT, {.i64 = 1 }, 0, INT_MAX, VE, "threads" }, { "auto", NULL, 0, AV_OPT_TYPE_CONST, {.i64 = 0 }, .flags = VE, "threads" }, + { "jobs", "number of jobs", OFFSET(nb_jobs), AV_OPT_TYPE_INT, {.i64 = 0 }, 0, INT_MAX, VE, "jobs" }, + { "auto", NULL, 0, AV_OPT_TYPE_CONST, {.i64 = 0 }, .flags = VE, "jobs" }, { NULL } }; diff --git a/libswscale/swscale_internal.h b/libswscale/swscale_internal.h index abeebbb002..602082e12c 100644 --- a/libswscale/swscale_internal.h +++ b/libswscale/swscale_internal.h @@ -339,6 +339,7 @@ typedef struct SwsContext { int vChrDrop; ///< Binary logarithm of extra vertical subsampling factor in source image chroma planes specified by user. int sliceDir; ///< Direction that slices are fed to the scaler (1 = top-to-bottom, -1 = bottom-to-top). int nb_threads; ///< Number of threads used for scaling + int nb_jobs; ///< Number of slice jobs used for scaling double param[2]; ///< Input parameters for scaling algorithms that need them. AVFrame *frame_src; diff --git a/libswscale/utils.c b/libswscale/utils.c index 45baa22b23..c9ff9db957 100644 --- a/libswscale/utils.c +++ b/libswscale/utils.c @@ -1277,18 +1277,21 @@ static int context_init_threaded(SwsContext *c, ff_sws_slice_worker, NULL, c->nb_threads); if (ret == AVERROR(ENOSYS)) { c->nb_threads = 1; + c->nb_jobs = 1; return 0; } else if (ret < 0) return ret; c->nb_threads = ret; + if (c->nb_jobs < 1) + c->nb_jobs = av_cpu_job_count(); - c->slice_ctx = av_calloc(c->nb_threads, sizeof(*c->slice_ctx)); - c->slice_err = av_calloc(c->nb_threads, sizeof(*c->slice_err)); + c->slice_ctx = av_calloc(c->nb_jobs, sizeof(*c->slice_ctx)); + c->slice_err = av_calloc(c->nb_jobs, sizeof(*c->slice_err)); if (!c->slice_ctx || !c->slice_err) return AVERROR(ENOMEM); - for (int i = 0; i < c->nb_threads; i++) { + for (int i = 0; i < c->nb_jobs; i++) { c->slice_ctx[i] = sws_alloc_context(); if (!c->slice_ctx[i]) return AVERROR(ENOMEM); diff --git a/libswscale/version.h b/libswscale/version.h index 9bb3b171a7..4529a2d7d4 100644 --- a/libswscale/version.h +++ b/libswscale/version.h @@ -29,7 +29,7 @@ #include "version_major.h" #define LIBSWSCALE_VERSION_MINOR 8 -#define LIBSWSCALE_VERSION_MICRO 112 +#define LIBSWSCALE_VERSION_MICRO 113 #define LIBSWSCALE_VERSION_INT AV_VERSION_INT(LIBSWSCALE_VERSION_MAJOR, \ LIBSWSCALE_VERSION_MINOR, \