diff mbox series

[FFmpeg-devel,v2] checkasm: add sample argument to adjust during bench

Message ID 20240521123338.29539-1-jdek@itanimul.li
State New
Headers show
Series [FFmpeg-devel,v2] checkasm: add sample argument to adjust during bench | expand

Checks

Context Check Description
yinshiyou/make_loongarch64 success Make finished
yinshiyou/make_fate_loongarch64 success Make fate finished

Commit Message

J. Dekker May 21, 2024, 12:32 p.m. UTC
Some timers on certain device and test combinations can produce noisy
results, affecting the reliability of performance measurements. One
notable example of this is the Canaan K230 RISC-V development board.

An option to adjust the number of samples (--samples) has been added,
allowing developers to increase or adjust the sample count for more
reliable results.

Signed-off-by: J. Dekker <jdek@itanimul.li>
---

 Auto-detection can be added later when either a count is omitted or a specific
 value or term such as '0' or 'auto' is provided. This is a development tool,
 the users will be developers primarily working on master who follow checkasm
 changes and/ or add their own tests and functionality; there's no need to
 support a feature like this or deprecate it for years if a better solution
 is submitted.

 tests/checkasm/checkasm.c | 12 +++++++++++-
 tests/checkasm/checkasm.h |  5 +++--
 2 files changed, 14 insertions(+), 3 deletions(-)

Comments

Henrik Gramner May 21, 2024, 12:48 p.m. UTC | #1
On Tue, May 21, 2024 at 2:33 PM J. Dekker <jdek@itanimul.li> wrote:
> @@ -338,8 +338,9 @@ typedef struct CheckasmPerf {
>              uint64_t tsum = 0;\
>              int ti, tcount = 0;\
>              uint64_t t = 0; \
> +            const uint64_t truns = bench_runs;\
>              checkasm_set_signal_handler_state(1);\
> -            for (ti = 0; ti < BENCH_RUNS; ti++) {\
> +            for (ti = 0; ti < truns; ti++) {\

This is comparing int with uint64_t. We should probably just use int
for the sample count too.
Lynne May 21, 2024, 1 p.m. UTC | #2
On 21/05/2024 14:32, J. Dekker wrote:
> Some timers on certain device and test combinations can produce noisy
> results, affecting the reliability of performance measurements. One
> notable example of this is the Canaan K230 RISC-V development board.
> 
> An option to adjust the number of samples (--samples) has been added,
> allowing developers to increase or adjust the sample count for more
> reliable results.
> 
> Signed-off-by: J. Dekker <jdek@itanimul.li>
> ---
> 
>   Auto-detection can be added later when either a count is omitted or a specific
>   value or term such as '0' or 'auto' is provided. This is a development tool,
>   the users will be developers primarily working on master who follow checkasm
>   changes and/ or add their own tests and functionality; there's no need to
>   support a feature like this or deprecate it for years if a better solution
>   is submitted.
> 
>   tests/checkasm/checkasm.c | 12 +++++++++++-
>   tests/checkasm/checkasm.h |  5 +++--
>   2 files changed, 14 insertions(+), 3 deletions(-)
> 
> diff --git a/tests/checkasm/checkasm.c b/tests/checkasm/checkasm.c
> index 31ca9f6e2b..b8e5cfb9dd 100644
> --- a/tests/checkasm/checkasm.c
> +++ b/tests/checkasm/checkasm.c
> @@ -72,6 +72,9 @@
>   void (*checkasm_checked_call)(void *func, int dummy, ...) = checkasm_checked_call_novfp;
>   #endif
>   
> +/* Trade-off between speed and accuracy */
> +uint64_t bench_runs = 1000;
> +
>   /* List of tests to invoke */
>   static const struct {
>       const char *name;
> @@ -820,7 +823,7 @@ static void bench_uninit(void)
>   static int usage(const char *path)
>   {
>       fprintf(stderr,
> -            "Usage: %s [--bench] [--test=<pattern>] [--verbose] [seed]\n",
> +            "Usage: %s [--bench] [--samples=<count>] [--test=<pattern>] [--verbose] [seed]\n",
>               path);
>       return 1;
>   }
> @@ -867,6 +870,13 @@ int main(int argc, char *argv[])
>               state.test_name = arg + 7;
>           } else if (!strcmp(arg, "--verbose") || !strcmp(arg, "-v")) {
>               state.verbose = 1;
> +        } else if (!strncmp(arg, "--samples=", 10)) {
> +            l = strtoul(arg + 10, &end, 10);
> +            if (*end == '\0') {
> +                bench_runs = l;
> +            } else {
> +                return usage(argv[0]);
> +            }
>           } else if ((l = strtoul(arg, &end, 10)) <= UINT_MAX &&
>                      *end == '\0') {
>               seed = l;
> diff --git a/tests/checkasm/checkasm.h b/tests/checkasm/checkasm.h
> index 07fcc751ff..d6921cc50c 100644
> --- a/tests/checkasm/checkasm.h
> +++ b/tests/checkasm/checkasm.h
> @@ -167,7 +167,7 @@ extern AVLFG checkasm_lfg;
>   
>   static av_unused void *func_ref, *func_new;
>   
> -#define BENCH_RUNS 1000 /* Trade-off between accuracy and speed */
> +extern uint64_t bench_runs;
>   
>   /* Decide whether or not the specified function needs to be tested */
>   #define check_func(func, ...) (checkasm_save_context(), func_ref = checkasm_check_func((func_new = func), __VA_ARGS__))
> @@ -338,8 +338,9 @@ typedef struct CheckasmPerf {
>               uint64_t tsum = 0;\
>               int ti, tcount = 0;\
>               uint64_t t = 0; \
> +            const uint64_t truns = bench_runs;\
>               checkasm_set_signal_handler_state(1);\
> -            for (ti = 0; ti < BENCH_RUNS; ti++) {\
> +            for (ti = 0; ti < truns; ti++) {\
>                   PERF_START(t);\
>                   tfunc(__VA_ARGS__);\
>                   tfunc(__VA_ARGS__);\

While working on the FFT asm with
https://github.com/cyanreg/lavu_fft_test which has a built-in benchmark, 
I've found that exponentiation works best, as adding more and more 
digits at the end is prone to under/overshoot. For large functions, 1 << 
16 is a good starting point, while for very small functions, 1 << 23 
becomes more optimal.

I suggest replacing --samples with --runs (or --bench-runs, but we're 
all lazy for that), and documenting it as "--runs=<ptwo>" and rejecting 
anything large enough to overflow.
diff mbox series

Patch

diff --git a/tests/checkasm/checkasm.c b/tests/checkasm/checkasm.c
index 31ca9f6e2b..b8e5cfb9dd 100644
--- a/tests/checkasm/checkasm.c
+++ b/tests/checkasm/checkasm.c
@@ -72,6 +72,9 @@ 
 void (*checkasm_checked_call)(void *func, int dummy, ...) = checkasm_checked_call_novfp;
 #endif
 
+/* Trade-off between speed and accuracy */
+uint64_t bench_runs = 1000;
+
 /* List of tests to invoke */
 static const struct {
     const char *name;
@@ -820,7 +823,7 @@  static void bench_uninit(void)
 static int usage(const char *path)
 {
     fprintf(stderr,
-            "Usage: %s [--bench] [--test=<pattern>] [--verbose] [seed]\n",
+            "Usage: %s [--bench] [--samples=<count>] [--test=<pattern>] [--verbose] [seed]\n",
             path);
     return 1;
 }
@@ -867,6 +870,13 @@  int main(int argc, char *argv[])
             state.test_name = arg + 7;
         } else if (!strcmp(arg, "--verbose") || !strcmp(arg, "-v")) {
             state.verbose = 1;
+        } else if (!strncmp(arg, "--samples=", 10)) {
+            l = strtoul(arg + 10, &end, 10);
+            if (*end == '\0') {
+                bench_runs = l;
+            } else {
+                return usage(argv[0]);
+            }
         } else if ((l = strtoul(arg, &end, 10)) <= UINT_MAX &&
                    *end == '\0') {
             seed = l;
diff --git a/tests/checkasm/checkasm.h b/tests/checkasm/checkasm.h
index 07fcc751ff..d6921cc50c 100644
--- a/tests/checkasm/checkasm.h
+++ b/tests/checkasm/checkasm.h
@@ -167,7 +167,7 @@  extern AVLFG checkasm_lfg;
 
 static av_unused void *func_ref, *func_new;
 
-#define BENCH_RUNS 1000 /* Trade-off between accuracy and speed */
+extern uint64_t bench_runs;
 
 /* Decide whether or not the specified function needs to be tested */
 #define check_func(func, ...) (checkasm_save_context(), func_ref = checkasm_check_func((func_new = func), __VA_ARGS__))
@@ -338,8 +338,9 @@  typedef struct CheckasmPerf {
             uint64_t tsum = 0;\
             int ti, tcount = 0;\
             uint64_t t = 0; \
+            const uint64_t truns = bench_runs;\
             checkasm_set_signal_handler_state(1);\
-            for (ti = 0; ti < BENCH_RUNS; ti++) {\
+            for (ti = 0; ti < truns; ti++) {\
                 PERF_START(t);\
                 tfunc(__VA_ARGS__);\
                 tfunc(__VA_ARGS__);\