Message ID | 20240521123338.29539-1-jdek@itanimul.li |
---|---|
State | New |
Headers | show |
Series | [FFmpeg-devel,v2] checkasm: add sample argument to adjust during bench | expand |
Context | Check | Description |
---|---|---|
yinshiyou/make_loongarch64 | success | Make finished |
yinshiyou/make_fate_loongarch64 | success | Make fate finished |
On Tue, May 21, 2024 at 2:33 PM J. Dekker <jdek@itanimul.li> wrote: > @@ -338,8 +338,9 @@ typedef struct CheckasmPerf { > uint64_t tsum = 0;\ > int ti, tcount = 0;\ > uint64_t t = 0; \ > + const uint64_t truns = bench_runs;\ > checkasm_set_signal_handler_state(1);\ > - for (ti = 0; ti < BENCH_RUNS; ti++) {\ > + for (ti = 0; ti < truns; ti++) {\ This is comparing int with uint64_t. We should probably just use int for the sample count too.
On 21/05/2024 14:32, J. Dekker wrote: > Some timers on certain device and test combinations can produce noisy > results, affecting the reliability of performance measurements. One > notable example of this is the Canaan K230 RISC-V development board. > > An option to adjust the number of samples (--samples) has been added, > allowing developers to increase or adjust the sample count for more > reliable results. > > Signed-off-by: J. Dekker <jdek@itanimul.li> > --- > > Auto-detection can be added later when either a count is omitted or a specific > value or term such as '0' or 'auto' is provided. This is a development tool, > the users will be developers primarily working on master who follow checkasm > changes and/ or add their own tests and functionality; there's no need to > support a feature like this or deprecate it for years if a better solution > is submitted. > > tests/checkasm/checkasm.c | 12 +++++++++++- > tests/checkasm/checkasm.h | 5 +++-- > 2 files changed, 14 insertions(+), 3 deletions(-) > > diff --git a/tests/checkasm/checkasm.c b/tests/checkasm/checkasm.c > index 31ca9f6e2b..b8e5cfb9dd 100644 > --- a/tests/checkasm/checkasm.c > +++ b/tests/checkasm/checkasm.c > @@ -72,6 +72,9 @@ > void (*checkasm_checked_call)(void *func, int dummy, ...) = checkasm_checked_call_novfp; > #endif > > +/* Trade-off between speed and accuracy */ > +uint64_t bench_runs = 1000; > + > /* List of tests to invoke */ > static const struct { > const char *name; > @@ -820,7 +823,7 @@ static void bench_uninit(void) > static int usage(const char *path) > { > fprintf(stderr, > - "Usage: %s [--bench] [--test=<pattern>] [--verbose] [seed]\n", > + "Usage: %s [--bench] [--samples=<count>] [--test=<pattern>] [--verbose] [seed]\n", > path); > return 1; > } > @@ -867,6 +870,13 @@ int main(int argc, char *argv[]) > state.test_name = arg + 7; > } else if (!strcmp(arg, "--verbose") || !strcmp(arg, "-v")) { > state.verbose = 1; > + } else if (!strncmp(arg, "--samples=", 10)) { > + l = strtoul(arg + 10, &end, 10); > + if (*end == '\0') { > + bench_runs = l; > + } else { > + return usage(argv[0]); > + } > } else if ((l = strtoul(arg, &end, 10)) <= UINT_MAX && > *end == '\0') { > seed = l; > diff --git a/tests/checkasm/checkasm.h b/tests/checkasm/checkasm.h > index 07fcc751ff..d6921cc50c 100644 > --- a/tests/checkasm/checkasm.h > +++ b/tests/checkasm/checkasm.h > @@ -167,7 +167,7 @@ extern AVLFG checkasm_lfg; > > static av_unused void *func_ref, *func_new; > > -#define BENCH_RUNS 1000 /* Trade-off between accuracy and speed */ > +extern uint64_t bench_runs; > > /* Decide whether or not the specified function needs to be tested */ > #define check_func(func, ...) (checkasm_save_context(), func_ref = checkasm_check_func((func_new = func), __VA_ARGS__)) > @@ -338,8 +338,9 @@ typedef struct CheckasmPerf { > uint64_t tsum = 0;\ > int ti, tcount = 0;\ > uint64_t t = 0; \ > + const uint64_t truns = bench_runs;\ > checkasm_set_signal_handler_state(1);\ > - for (ti = 0; ti < BENCH_RUNS; ti++) {\ > + for (ti = 0; ti < truns; ti++) {\ > PERF_START(t);\ > tfunc(__VA_ARGS__);\ > tfunc(__VA_ARGS__);\ While working on the FFT asm with https://github.com/cyanreg/lavu_fft_test which has a built-in benchmark, I've found that exponentiation works best, as adding more and more digits at the end is prone to under/overshoot. For large functions, 1 << 16 is a good starting point, while for very small functions, 1 << 23 becomes more optimal. I suggest replacing --samples with --runs (or --bench-runs, but we're all lazy for that), and documenting it as "--runs=<ptwo>" and rejecting anything large enough to overflow.
diff --git a/tests/checkasm/checkasm.c b/tests/checkasm/checkasm.c index 31ca9f6e2b..b8e5cfb9dd 100644 --- a/tests/checkasm/checkasm.c +++ b/tests/checkasm/checkasm.c @@ -72,6 +72,9 @@ void (*checkasm_checked_call)(void *func, int dummy, ...) = checkasm_checked_call_novfp; #endif +/* Trade-off between speed and accuracy */ +uint64_t bench_runs = 1000; + /* List of tests to invoke */ static const struct { const char *name; @@ -820,7 +823,7 @@ static void bench_uninit(void) static int usage(const char *path) { fprintf(stderr, - "Usage: %s [--bench] [--test=<pattern>] [--verbose] [seed]\n", + "Usage: %s [--bench] [--samples=<count>] [--test=<pattern>] [--verbose] [seed]\n", path); return 1; } @@ -867,6 +870,13 @@ int main(int argc, char *argv[]) state.test_name = arg + 7; } else if (!strcmp(arg, "--verbose") || !strcmp(arg, "-v")) { state.verbose = 1; + } else if (!strncmp(arg, "--samples=", 10)) { + l = strtoul(arg + 10, &end, 10); + if (*end == '\0') { + bench_runs = l; + } else { + return usage(argv[0]); + } } else if ((l = strtoul(arg, &end, 10)) <= UINT_MAX && *end == '\0') { seed = l; diff --git a/tests/checkasm/checkasm.h b/tests/checkasm/checkasm.h index 07fcc751ff..d6921cc50c 100644 --- a/tests/checkasm/checkasm.h +++ b/tests/checkasm/checkasm.h @@ -167,7 +167,7 @@ extern AVLFG checkasm_lfg; static av_unused void *func_ref, *func_new; -#define BENCH_RUNS 1000 /* Trade-off between accuracy and speed */ +extern uint64_t bench_runs; /* Decide whether or not the specified function needs to be tested */ #define check_func(func, ...) (checkasm_save_context(), func_ref = checkasm_check_func((func_new = func), __VA_ARGS__)) @@ -338,8 +338,9 @@ typedef struct CheckasmPerf { uint64_t tsum = 0;\ int ti, tcount = 0;\ uint64_t t = 0; \ + const uint64_t truns = bench_runs;\ checkasm_set_signal_handler_state(1);\ - for (ti = 0; ti < BENCH_RUNS; ti++) {\ + for (ti = 0; ti < truns; ti++) {\ PERF_START(t);\ tfunc(__VA_ARGS__);\ tfunc(__VA_ARGS__);\
Some timers on certain device and test combinations can produce noisy results, affecting the reliability of performance measurements. One notable example of this is the Canaan K230 RISC-V development board. An option to adjust the number of samples (--samples) has been added, allowing developers to increase or adjust the sample count for more reliable results. Signed-off-by: J. Dekker <jdek@itanimul.li> --- Auto-detection can be added later when either a count is omitted or a specific value or term such as '0' or 'auto' is provided. This is a development tool, the users will be developers primarily working on master who follow checkasm changes and/ or add their own tests and functionality; there's no need to support a feature like this or deprecate it for years if a better solution is submitted. tests/checkasm/checkasm.c | 12 +++++++++++- tests/checkasm/checkasm.h | 5 +++-- 2 files changed, 14 insertions(+), 3 deletions(-)