diff mbox series

[FFmpeg-devel] checkasm: add (private) kperf timing for macOS

Message ID 20210413004523.6500-1-josh@itanimul.li
State New
Headers show
Series [FFmpeg-devel] checkasm: add (private) kperf timing for macOS | expand

Checks

Context Check Description
andriy/x86_make success Make finished
andriy/x86_make_fate fail Make fate failed
andriy/PPC64_make success Make finished
andriy/PPC64_make_fate warning Make fate failed

Commit Message

Josh Dekker April 13, 2021, 12:45 a.m. UTC
Signed-off-by: Josh Dekker <josh@itanimul.li>
---
 configure                    |   2 +
 tests/checkasm/Makefile      |   1 +
 tests/checkasm/checkasm.c    |  19 ++++-
 tests/checkasm/checkasm.h    |  10 ++-
 tests/checkasm/macos_kperf.c | 143 +++++++++++++++++++++++++++++++++++
 tests/checkasm/macos_kperf.h |  23 ++++++
 6 files changed, 195 insertions(+), 3 deletions(-)
 create mode 100644 tests/checkasm/macos_kperf.c
 create mode 100644 tests/checkasm/macos_kperf.h

Comments

Lynne April 13, 2021, 1:35 a.m. UTC | #1
Apr 13, 2021, 02:45 by josh@itanimul.li:

> Signed-off-by: Josh Dekker <josh@itanimul.li>
> ---
>  configure                    |   2 +
>  tests/checkasm/Makefile      |   1 +
>  tests/checkasm/checkasm.c    |  19 ++++-
>  tests/checkasm/checkasm.h    |  10 ++-
>  tests/checkasm/macos_kperf.c | 143 +++++++++++++++++++++++++++++++++++
>  tests/checkasm/macos_kperf.h |  23 ++++++
>  6 files changed, 195 insertions(+), 3 deletions(-)
>  create mode 100644 tests/checkasm/macos_kperf.c
>  create mode 100644 tests/checkasm/macos_kperf.h
>
> diff --git a/configure b/configure
> index d7a3f507e8..a47e3dea67 100755
> --- a/configure
> +++ b/configure
> @@ -490,6 +490,7 @@ Developer options (useful when working on FFmpeg itself):
>  --ignore-tests=TESTS     comma-separated list (without "fate-" prefix
>  in the name) of tests whose result is ignored
>  --enable-linux-perf      enable Linux Performance Monitor API
> +  --enable-macos-kperf     enable macOS kperf (private) API
>  --disable-large-tests    disable tests that use a large amount of memory
>  
>  NOTE: Object files are built at the place where configure is launched.
> @@ -1949,6 +1950,7 @@ CONFIG_LIST="
>  fontconfig
>  large_tests
>  linux_perf
> +    macos_kperf
>  memory_poisoning
>  neon_clobber_test
>  ossfuzz
> diff --git a/tests/checkasm/Makefile b/tests/checkasm/Makefile
> index 1827a4e134..4abaef9c63 100644
> --- a/tests/checkasm/Makefile
> +++ b/tests/checkasm/Makefile
> @@ -58,6 +58,7 @@ CHECKASMOBJS-$(CONFIG_AVUTIL)  += $(AVUTILOBJS)
>  CHECKASMOBJS-$(ARCH_AARCH64)            += aarch64/checkasm.o
>  CHECKASMOBJS-$(HAVE_ARMV5TE_EXTERNAL)   += arm/checkasm.o
>  CHECKASMOBJS-$(HAVE_X86ASM)             += x86/checkasm.o
> +CHECKASMOBJS-$(CONFIG_MACOS_KPERF)      += macos_kperf.o
>  
>  CHECKASMOBJS += $(CHECKASMOBJS-yes) checkasm.o
>  CHECKASMOBJS := $(sort $(CHECKASMOBJS:%=tests/checkasm/%))
> diff --git a/tests/checkasm/checkasm.c b/tests/checkasm/checkasm.c
> index 8338e8ff58..4c42040244 100644
> --- a/tests/checkasm/checkasm.c
> +++ b/tests/checkasm/checkasm.c
> @@ -26,6 +26,8 @@
>  # ifndef _GNU_SOURCE
>  #  define _GNU_SOURCE // for syscall (performance monitoring API)
>  # endif
> +#elif CONFIG_MACOS_KPERF
> +#include "macos_kperf.h"
>  #endif
>  
>  #include <stdarg.h>
> @@ -637,9 +639,20 @@ static int bench_init_linux(void)
>  }
>  return 0;
>  }
> -#endif
> +#elif CONFIG_MACOS_KPERF
> +static int bench_init_kperf(void)
> +{
> +    if (ff_kperf_init() || ff_kperf_setup())
> +        return -1;
>  
> -#if !CONFIG_LINUX_PERF
> +    if (ff_kperf_cycles(NULL)) {
> +        fprintf(stderr, "checkasm must be run as root to use kperf on macOS\n");
> +        return -1;
> +    }
> +
> +    return 0;
> +}
> +#else
>  static int bench_init_ffmpeg(void)
>  {
>  #ifdef AV_READ_TIME
> @@ -656,6 +669,8 @@ static int bench_init(void)
>  {
>  #if CONFIG_LINUX_PERF
>  int ret = bench_init_linux();
> +#elif CONFIG_MACOS_KPERF
> +    int ret = bench_init_kperf();
>  #else
>  int ret = bench_init_ffmpeg();
>  #endif
> diff --git a/tests/checkasm/checkasm.h b/tests/checkasm/checkasm.h
> index ef6645e3a2..4127081d74 100644
> --- a/tests/checkasm/checkasm.h
> +++ b/tests/checkasm/checkasm.h
> @@ -31,6 +31,8 @@
>  #include <sys/ioctl.h>
>  #include <asm/unistd.h>
>  #include <linux/perf_event.h>
> +#elif CONFIG_MACOS_KPERF
> +#include "macos_kperf.h"
>  #endif
>  
>  #include "libavutil/avstring.h"
> @@ -224,7 +226,7 @@ typedef struct CheckasmPerf {
>  int iterations;
>  } CheckasmPerf;
>  
> -#if defined(AV_READ_TIME) || CONFIG_LINUX_PERF
> +#if defined(AV_READ_TIME) || CONFIG_LINUX_PERF || CONFIG_MACOS_KPERF
>  
>  #if CONFIG_LINUX_PERF
>  #define PERF_START(t) do {                              \
> @@ -235,6 +237,12 @@ typedef struct CheckasmPerf {
>  ioctl(sysfd, PERF_EVENT_IOC_DISABLE, 0);            \
>  read(sysfd, &t, sizeof(t));                         \
>  } while (0)
> +#elif CONFIG_MACOS_KPERF
> +#define PERF_START(t) do {                              \
> +    t = 0;                                              \
> +    ff_kperf_cycles(&t);                                \
> +} while (0)
> +#define PERF_STOP(t) ff_kperf_cycles(&t)
>  #else
>  #define PERF_START(t) t = AV_READ_TIME()
>  #define PERF_STOP(t)  t = AV_READ_TIME() - t
> diff --git a/tests/checkasm/macos_kperf.c b/tests/checkasm/macos_kperf.c
> new file mode 100644
> index 0000000000..e6ae316608
> --- /dev/null
> +++ b/tests/checkasm/macos_kperf.c
> @@ -0,0 +1,143 @@
> +/*
> + * This file is part of FFmpeg.
> + *
> + * FFmpeg is free software; you can redistribute it and/or modify
> + * it under the terms of the GNU General Public License as published by
> + * the Free Software Foundation; either version 2 of the License, or
> + * (at your option) any later version.
> + *
> + * FFmpeg is distributed in the hope that it will be useful,
> + * but WITHOUT ANY WARRANTY; without even the implied warranty of
> + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
> + * GNU General Public License for more details.
> + *
> + * You should have received a copy of the GNU General Public License along
> + * with FFmpeg; if not, write to the Free Software Foundation, Inc.,
> + * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
> + */
> +
> +#include "macos_kperf.h"
> +#include <stdio.h>
> +#include <stdlib.h>
> +#include <dlfcn.h>
> +
> +#define KPERF_LIST                                             \
> +    F(int, kpc_get_counting, void)                             \
> +    F(int, kpc_force_all_ctrs_set, int)                        \
> +    F(int, kpc_set_counting, uint32_t)                         \
> +    F(int, kpc_set_thread_counting, uint32_t)                  \
> +    F(int, kpc_set_config, uint32_t, void *)                   \
> +    F(int, kpc_get_config, uint32_t, void *)                   \
> +    F(int, kpc_set_period, uint32_t, void *)                   \
> +    F(int, kpc_get_period, uint32_t, void *)                   \
> +    F(uint32_t, kpc_get_counter_count, uint32_t)               \
> +    F(uint32_t, kpc_get_config_count, uint32_t)                \
> +    F(int, kperf_sample_get, int *)                            \
> +    F(int, kpc_get_thread_counters, int, unsigned int, void *)
> +
> +#define F(ret, name, ...)                                      \
> +    typedef ret name##proc(__VA_ARGS__);                       \
> +    static name##proc *name = NULL;
> +KPERF_LIST
> +#undef F
> +
> +#define CFGWORD_EL0A32EN_MASK (0x10000)
> +#define CFGWORD_EL0A64EN_MASK (0x20000)
> +#define CFGWORD_EL1EN_MASK    (0x40000)
> +#define CFGWORD_EL3EN_MASK    (0x80000)
> +#define CFGWORD_ALLMODES_MASK (0xf0000)
> +
> +#define CPMU_NONE 0
> +#define CPMU_CORE_CYCLE 0x02
> +#define CPMU_INST_A64 0x8c
> +#define CPMU_INST_BRANCH 0x8d
> +#define CPMU_SYNC_DC_LOAD_MISS 0xbf
> +#define CPMU_SYNC_DC_STORE_MISS 0xc0
> +#define CPMU_SYNC_DTLB_MISS 0xc1
> +#define CPMU_SYNC_ST_HIT_YNGR_LD 0xc4
> +#define CPMU_SYNC_BR_ANY_MISP 0xcb
> +#define CPMU_FED_IC_MISS_DEM 0xd3
> +#define CPMU_FED_ITLB_MISS 0xd4
> +
> +#define KPC_CLASS_FIXED_MASK        (1 << 0)
> +#define KPC_CLASS_CONFIGURABLE_MASK (1 << 1)
> +#define KPC_CLASS_POWER_MASK        (1 << 2)
> +#define KPC_CLASS_RAWPMU_MASK       (1 << 3)
> +
> +#define COUNTERS_COUNT 10
> +#define CONFIG_COUNT 8
> +#define KPC_MASK (KPC_CLASS_CONFIGURABLE_MASK | KPC_CLASS_FIXED_MASK)
> +
> +int ff_kperf_setup()
> +{
> +    uint64_t config[COUNTERS_COUNT] = {0};
> +    config[0] = CPMU_CORE_CYCLE | CFGWORD_EL0A64EN_MASK;
> +    // config[3] = CPMU_INST_BRANCH | CFGWORD_EL0A64EN_MASK;
> +    // config[4] = CPMU_SYNC_BR_ANY_MISP | CFGWORD_EL0A64EN_MASK;
> +    // config[5] = CPMU_INST_A64 | CFGWORD_EL0A64EN_MASK;
> +
> +    if (kpc_set_config(KPC_MASK, config)) {
> +        fprintf(stderr, "kperf: kpc_set_config failed\n");
> +        return -1;
> +    }
> +
> +    if (kpc_force_all_ctrs_set(1)) {
> +        fprintf(stderr, "kperf: kpc_force_all_ctrs_set failed\n");
> +        return -1;
> +    }
> +
> +    if (kpc_set_counting(KPC_MASK)) {
> +        fprintf(stderr, "kperf: kpc_set_counting failed\n");
> +        return -1;
> +    }
> +
> +    if (kpc_set_thread_counting(KPC_MASK)) {
> +        fprintf(stderr, "kperf: kpc_set_thread_counting failed\n");
> +        return -1;
> +    }
> +
> +    return 0;
> +}
> +
> +int ff_kperf_init()
> +{
> +    void *kperf = dlopen("/System/Library/PrivateFrameworks/kperf.framework/Versions/A/kperf", RTLD_LAZY);
> +    if (!kperf) {
> +        fprintf(stderr, "kperf: kperf = %p\n", kperf);
> +        return -1;
> +    }
> +
> +#define F(ret, name, ...)                            \
> +    name = (name##proc *)(dlsym(kperf, #name));      \
> +    if (!name) {                                     \
> +        fprintf(stderr, "kperf: %s = %p\n", #name, (void *)name);    \
> +        return -1;                                   \
> +    }
> +    KPERF_LIST
> +#undef F
> +
> +    if (kpc_get_counter_count(KPC_MASK) != COUNTERS_COUNT) {
> +        fprintf(stderr, "kperf: wrong fixed counters count\n");
> +        return -1;
> +    }
> +
> +    if (kpc_get_config_count(KPC_MASK) != CONFIG_COUNT) {
> +        fprintf(stderr, "kperf: wrong fixed config count\n");
> +        return -1;
> +    }
> +
> +    return 0;
> +}
> +
> +int ff_kperf_cycles(uint64_t *cycles)
> +{
> +    uint64_t counters[COUNTERS_COUNT];
> +    if (kpc_get_thread_counters(0, COUNTERS_COUNT, counters)) {
> +        return -1;
> +    }
> +
> +    if (cycles)
> +        *cycles = counters[0] - *cycles;
> +
> +    return 0;
> +}
> diff --git a/tests/checkasm/macos_kperf.h b/tests/checkasm/macos_kperf.h
> new file mode 100644
> index 0000000000..dddd6e4d24
> --- /dev/null
> +++ b/tests/checkasm/macos_kperf.h
> @@ -0,0 +1,23 @@
> +/*
> + * This file is part of FFmpeg.
> + *
> + * FFmpeg is free software; you can redistribute it and/or modify
> + * it under the terms of the GNU General Public License as published by
> + * the Free Software Foundation; either version 2 of the License, or
> + * (at your option) any later version.
> + *
> + * FFmpeg is distributed in the hope that it will be useful,
> + * but WITHOUT ANY WARRANTY; without even the implied warranty of
> + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
> + * GNU General Public License for more details.
> + *
> + * You should have received a copy of the GNU General Public License along
> + * with FFmpeg; if not, write to the Free Software Foundation, Inc.,
> + * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
> + */
> +
> +#include <stdint.h>
> +
> +int ff_kperf_setup(void);
> +int ff_kperf_init(void);
> +int ff_kperf_cycles(uint64_t *cycles);
>

Could you move this to libavutil and hook this up to libavutil/timer.h as well?
Martin Storsjö April 13, 2021, 12:35 p.m. UTC | #2
On Tue, 13 Apr 2021, Lynne wrote:

> Could you move this to libavutil and hook this up to libavutil/timer.h 
> as well?

I'm a bit hesitant about that, as the API (just like the existing linux 
perf integration which is predecent for heavyweight timer APIs) requires a 
bit more setup than what libavutil/timer.h normally does.

// Martin
Martin Storsjö April 13, 2021, 12:45 p.m. UTC | #3
On Tue, 13 Apr 2021, Josh Dekker wrote:

> Signed-off-by: Josh Dekker <josh@itanimul.li>
> ---
> configure                    |   2 +
> tests/checkasm/Makefile      |   1 +
> tests/checkasm/checkasm.c    |  19 ++++-
> tests/checkasm/checkasm.h    |  10 ++-
> tests/checkasm/macos_kperf.c | 143 +++++++++++++++++++++++++++++++++++
> tests/checkasm/macos_kperf.h |  23 ++++++
> 6 files changed, 195 insertions(+), 3 deletions(-)
> create mode 100644 tests/checkasm/macos_kperf.c
> create mode 100644 tests/checkasm/macos_kperf.h
>
> diff --git a/tests/checkasm/checkasm.h b/tests/checkasm/checkasm.h
> index ef6645e3a2..4127081d74 100644
> --- a/tests/checkasm/checkasm.h
> +++ b/tests/checkasm/checkasm.h
> @@ -31,6 +31,8 @@
> #include <sys/ioctl.h>
> #include <asm/unistd.h>
> #include <linux/perf_event.h>
> +#elif CONFIG_MACOS_KPERF
> +#include "macos_kperf.h"
> #endif
> 
> #include "libavutil/avstring.h"
> @@ -224,7 +226,7 @@ typedef struct CheckasmPerf {
>     int iterations;
> } CheckasmPerf;
> 
> -#if defined(AV_READ_TIME) || CONFIG_LINUX_PERF
> +#if defined(AV_READ_TIME) || CONFIG_LINUX_PERF || CONFIG_MACOS_KPERF
> 
> #if CONFIG_LINUX_PERF
> #define PERF_START(t) do {                              \
> @@ -235,6 +237,12 @@ typedef struct CheckasmPerf {
>     ioctl(sysfd, PERF_EVENT_IOC_DISABLE, 0);            \
>     read(sysfd, &t, sizeof(t));                         \
> } while (0)
> +#elif CONFIG_MACOS_KPERF
> +#define PERF_START(t) do {                              \
> +    t = 0;                                              \
> +    ff_kperf_cycles(&t);                                \
> +} while (0)
> +#define PERF_STOP(t) ff_kperf_cycles(&t)
> #else

I find this implicit subtraction in ff_kperf_cycles() highly surprising 
and hard to reason about.

E.g. if I do t=0; followed by ff_kperf_cycles(&t) three times - what value 
do I have in 't'? With the current code, the answer is "the difference 
between the absolute current timer minus the number of cycles it took 
between the first and second call", and that doesn't sound like something 
that is useful to anybody, ever.

Just make the function return the value as-is and do the subtraction here.


> #define PERF_START(t) t = AV_READ_TIME()
> #define PERF_STOP(t)  t = AV_READ_TIME() - t
> diff --git a/tests/checkasm/macos_kperf.c b/tests/checkasm/macos_kperf.c
> new file mode 100644
> index 0000000000..e6ae316608
> --- /dev/null
> +++ b/tests/checkasm/macos_kperf.c
> @@ -0,0 +1,143 @@
> +/*
> + * This file is part of FFmpeg.
> + *
> + * FFmpeg is free software; you can redistribute it and/or modify
> + * it under the terms of the GNU General Public License as published by
> + * the Free Software Foundation; either version 2 of the License, or
> + * (at your option) any later version.
> + *
> + * FFmpeg is distributed in the hope that it will be useful,
> + * but WITHOUT ANY WARRANTY; without even the implied warranty of
> + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
> + * GNU General Public License for more details.
> + *
> + * You should have received a copy of the GNU General Public License along
> + * with FFmpeg; if not, write to the Free Software Foundation, Inc.,
> + * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
> + */
> +
> +#include "macos_kperf.h"
> +#include <stdio.h>
> +#include <stdlib.h>
> +#include <dlfcn.h>
> +
> +#define KPERF_LIST                                             \
> +    F(int, kpc_get_counting, void)                             \
> +    F(int, kpc_force_all_ctrs_set, int)                        \
> +    F(int, kpc_set_counting, uint32_t)                         \
> +    F(int, kpc_set_thread_counting, uint32_t)                  \
> +    F(int, kpc_set_config, uint32_t, void *)                   \
> +    F(int, kpc_get_config, uint32_t, void *)                   \
> +    F(int, kpc_set_period, uint32_t, void *)                   \
> +    F(int, kpc_get_period, uint32_t, void *)                   \
> +    F(uint32_t, kpc_get_counter_count, uint32_t)               \
> +    F(uint32_t, kpc_get_config_count, uint32_t)                \
> +    F(int, kperf_sample_get, int *)                            \
> +    F(int, kpc_get_thread_counters, int, unsigned int, void *)
> +
> +#define F(ret, name, ...)                                      \
> +    typedef ret name##proc(__VA_ARGS__);                       \
> +    static name##proc *name = NULL;
> +KPERF_LIST
> +#undef F
> +
> +#define CFGWORD_EL0A32EN_MASK (0x10000)
> +#define CFGWORD_EL0A64EN_MASK (0x20000)
> +#define CFGWORD_EL1EN_MASK    (0x40000)
> +#define CFGWORD_EL3EN_MASK    (0x80000)
> +#define CFGWORD_ALLMODES_MASK (0xf0000)
> +
> +#define CPMU_NONE 0
> +#define CPMU_CORE_CYCLE 0x02
> +#define CPMU_INST_A64 0x8c
> +#define CPMU_INST_BRANCH 0x8d
> +#define CPMU_SYNC_DC_LOAD_MISS 0xbf
> +#define CPMU_SYNC_DC_STORE_MISS 0xc0
> +#define CPMU_SYNC_DTLB_MISS 0xc1
> +#define CPMU_SYNC_ST_HIT_YNGR_LD 0xc4
> +#define CPMU_SYNC_BR_ANY_MISP 0xcb
> +#define CPMU_FED_IC_MISS_DEM 0xd3
> +#define CPMU_FED_ITLB_MISS 0xd4
> +
> +#define KPC_CLASS_FIXED_MASK        (1 << 0)
> +#define KPC_CLASS_CONFIGURABLE_MASK (1 << 1)
> +#define KPC_CLASS_POWER_MASK        (1 << 2)
> +#define KPC_CLASS_RAWPMU_MASK       (1 << 3)
> +
> +#define COUNTERS_COUNT 10
> +#define CONFIG_COUNT 8

Is this value actually needed for anything, other than verifying the 
return value of kpc_get_config_count? It looks to me like one could omit 
it. (Then again, I know this API is undocumented and all, so it's not 
really known how it's supposed to be used.)

> +#define KPC_MASK (KPC_CLASS_CONFIGURABLE_MASK | KPC_CLASS_FIXED_MASK)
> +
> +int ff_kperf_setup()
> +{
> +    uint64_t config[COUNTERS_COUNT] = {0};
> +    config[0] = CPMU_CORE_CYCLE | CFGWORD_EL0A64EN_MASK;
> +    // config[3] = CPMU_INST_BRANCH | CFGWORD_EL0A64EN_MASK;
> +    // config[4] = CPMU_SYNC_BR_ANY_MISP | CFGWORD_EL0A64EN_MASK;
> +    // config[5] = CPMU_INST_A64 | CFGWORD_EL0A64EN_MASK;
> +
> +    if (kpc_set_config(KPC_MASK, config)) {
> +        fprintf(stderr, "kperf: kpc_set_config failed\n");
> +        return -1;
> +    }
> +
> +    if (kpc_force_all_ctrs_set(1)) {
> +        fprintf(stderr, "kperf: kpc_force_all_ctrs_set failed\n");
> +        return -1;
> +    }
> +
> +    if (kpc_set_counting(KPC_MASK)) {
> +        fprintf(stderr, "kperf: kpc_set_counting failed\n");
> +        return -1;
> +    }
> +
> +    if (kpc_set_thread_counting(KPC_MASK)) {
> +        fprintf(stderr, "kperf: kpc_set_thread_counting failed\n");
> +        return -1;
> +    }
> +
> +    return 0;
> +}
> +
> +int ff_kperf_init()
> +{
> +    void *kperf = dlopen("/System/Library/PrivateFrameworks/kperf.framework/Versions/A/kperf", RTLD_LAZY);
> +    if (!kperf) {
> +        fprintf(stderr, "kperf: kperf = %p\n", kperf);

If it failed, then this will always print (null), so passing the variable 
to it right now is kinda pointless. A more useful error might be available 
via dlerror().

> +        return -1;
> +    }
> +
> +#define F(ret, name, ...)                            \
> +    name = (name##proc *)(dlsym(kperf, #name));      \
> +    if (!name) {                                     \
> +        fprintf(stderr, "kperf: %s = %p\n", #name, (void *)name);    \

The second parameter here will always be null too

> +        return -1;                                   \
> +    }
> +    KPERF_LIST
> +#undef F
> +
> +    if (kpc_get_counter_count(KPC_MASK) != COUNTERS_COUNT) {

This is also in the field of unknown API usage, but I guess in one sense, 
one could consider also allowing it to work if counter_count >= 1 or 
however many we actually use. But I guess we'd need to enlarge the stack 
buffers a bit more in that case, to allow an actual value up to whatever 
we've allocated.

> +        fprintf(stderr, "kperf: wrong fixed counters count\n");
> +        return -1;
> +    }
> +
> +    if (kpc_get_config_count(KPC_MASK) != CONFIG_COUNT) {
> +        fprintf(stderr, "kperf: wrong fixed config count\n");
> +        return -1;
> +    }
> +
> +    return 0;
> +}
> +
> +int ff_kperf_cycles(uint64_t *cycles)
> +{
> +    uint64_t counters[COUNTERS_COUNT];
> +    if (kpc_get_thread_counters(0, COUNTERS_COUNT, counters)) {
> +        return -1;
> +    }
> +
> +    if (cycles)
> +        *cycles = counters[0] - *cycles;

As point out in checkasm.h, move the subtraction there; having it here 
makes the function very strange to reason about.

// Martin
diff mbox series

Patch

diff --git a/configure b/configure
index d7a3f507e8..a47e3dea67 100755
--- a/configure
+++ b/configure
@@ -490,6 +490,7 @@  Developer options (useful when working on FFmpeg itself):
   --ignore-tests=TESTS     comma-separated list (without "fate-" prefix
                            in the name) of tests whose result is ignored
   --enable-linux-perf      enable Linux Performance Monitor API
+  --enable-macos-kperf     enable macOS kperf (private) API
   --disable-large-tests    disable tests that use a large amount of memory
 
 NOTE: Object files are built at the place where configure is launched.
@@ -1949,6 +1950,7 @@  CONFIG_LIST="
     fontconfig
     large_tests
     linux_perf
+    macos_kperf
     memory_poisoning
     neon_clobber_test
     ossfuzz
diff --git a/tests/checkasm/Makefile b/tests/checkasm/Makefile
index 1827a4e134..4abaef9c63 100644
--- a/tests/checkasm/Makefile
+++ b/tests/checkasm/Makefile
@@ -58,6 +58,7 @@  CHECKASMOBJS-$(CONFIG_AVUTIL)  += $(AVUTILOBJS)
 CHECKASMOBJS-$(ARCH_AARCH64)            += aarch64/checkasm.o
 CHECKASMOBJS-$(HAVE_ARMV5TE_EXTERNAL)   += arm/checkasm.o
 CHECKASMOBJS-$(HAVE_X86ASM)             += x86/checkasm.o
+CHECKASMOBJS-$(CONFIG_MACOS_KPERF)      += macos_kperf.o
 
 CHECKASMOBJS += $(CHECKASMOBJS-yes) checkasm.o
 CHECKASMOBJS := $(sort $(CHECKASMOBJS:%=tests/checkasm/%))
diff --git a/tests/checkasm/checkasm.c b/tests/checkasm/checkasm.c
index 8338e8ff58..4c42040244 100644
--- a/tests/checkasm/checkasm.c
+++ b/tests/checkasm/checkasm.c
@@ -26,6 +26,8 @@ 
 # ifndef _GNU_SOURCE
 #  define _GNU_SOURCE // for syscall (performance monitoring API)
 # endif
+#elif CONFIG_MACOS_KPERF
+#include "macos_kperf.h"
 #endif
 
 #include <stdarg.h>
@@ -637,9 +639,20 @@  static int bench_init_linux(void)
     }
     return 0;
 }
-#endif
+#elif CONFIG_MACOS_KPERF
+static int bench_init_kperf(void)
+{
+    if (ff_kperf_init() || ff_kperf_setup())
+        return -1;
 
-#if !CONFIG_LINUX_PERF
+    if (ff_kperf_cycles(NULL)) {
+        fprintf(stderr, "checkasm must be run as root to use kperf on macOS\n");
+        return -1;
+    }
+
+    return 0;
+}
+#else
 static int bench_init_ffmpeg(void)
 {
 #ifdef AV_READ_TIME
@@ -656,6 +669,8 @@  static int bench_init(void)
 {
 #if CONFIG_LINUX_PERF
     int ret = bench_init_linux();
+#elif CONFIG_MACOS_KPERF
+    int ret = bench_init_kperf();
 #else
     int ret = bench_init_ffmpeg();
 #endif
diff --git a/tests/checkasm/checkasm.h b/tests/checkasm/checkasm.h
index ef6645e3a2..4127081d74 100644
--- a/tests/checkasm/checkasm.h
+++ b/tests/checkasm/checkasm.h
@@ -31,6 +31,8 @@ 
 #include <sys/ioctl.h>
 #include <asm/unistd.h>
 #include <linux/perf_event.h>
+#elif CONFIG_MACOS_KPERF
+#include "macos_kperf.h"
 #endif
 
 #include "libavutil/avstring.h"
@@ -224,7 +226,7 @@  typedef struct CheckasmPerf {
     int iterations;
 } CheckasmPerf;
 
-#if defined(AV_READ_TIME) || CONFIG_LINUX_PERF
+#if defined(AV_READ_TIME) || CONFIG_LINUX_PERF || CONFIG_MACOS_KPERF
 
 #if CONFIG_LINUX_PERF
 #define PERF_START(t) do {                              \
@@ -235,6 +237,12 @@  typedef struct CheckasmPerf {
     ioctl(sysfd, PERF_EVENT_IOC_DISABLE, 0);            \
     read(sysfd, &t, sizeof(t));                         \
 } while (0)
+#elif CONFIG_MACOS_KPERF
+#define PERF_START(t) do {                              \
+    t = 0;                                              \
+    ff_kperf_cycles(&t);                                \
+} while (0)
+#define PERF_STOP(t) ff_kperf_cycles(&t)
 #else
 #define PERF_START(t) t = AV_READ_TIME()
 #define PERF_STOP(t)  t = AV_READ_TIME() - t
diff --git a/tests/checkasm/macos_kperf.c b/tests/checkasm/macos_kperf.c
new file mode 100644
index 0000000000..e6ae316608
--- /dev/null
+++ b/tests/checkasm/macos_kperf.c
@@ -0,0 +1,143 @@ 
+/*
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with FFmpeg; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+ */
+
+#include "macos_kperf.h"
+#include <stdio.h>
+#include <stdlib.h>
+#include <dlfcn.h>
+
+#define KPERF_LIST                                             \
+    F(int, kpc_get_counting, void)                             \
+    F(int, kpc_force_all_ctrs_set, int)                        \
+    F(int, kpc_set_counting, uint32_t)                         \
+    F(int, kpc_set_thread_counting, uint32_t)                  \
+    F(int, kpc_set_config, uint32_t, void *)                   \
+    F(int, kpc_get_config, uint32_t, void *)                   \
+    F(int, kpc_set_period, uint32_t, void *)                   \
+    F(int, kpc_get_period, uint32_t, void *)                   \
+    F(uint32_t, kpc_get_counter_count, uint32_t)               \
+    F(uint32_t, kpc_get_config_count, uint32_t)                \
+    F(int, kperf_sample_get, int *)                            \
+    F(int, kpc_get_thread_counters, int, unsigned int, void *)
+
+#define F(ret, name, ...)                                      \
+    typedef ret name##proc(__VA_ARGS__);                       \
+    static name##proc *name = NULL;
+KPERF_LIST
+#undef F
+
+#define CFGWORD_EL0A32EN_MASK (0x10000)
+#define CFGWORD_EL0A64EN_MASK (0x20000)
+#define CFGWORD_EL1EN_MASK    (0x40000)
+#define CFGWORD_EL3EN_MASK    (0x80000)
+#define CFGWORD_ALLMODES_MASK (0xf0000)
+
+#define CPMU_NONE 0
+#define CPMU_CORE_CYCLE 0x02
+#define CPMU_INST_A64 0x8c
+#define CPMU_INST_BRANCH 0x8d
+#define CPMU_SYNC_DC_LOAD_MISS 0xbf
+#define CPMU_SYNC_DC_STORE_MISS 0xc0
+#define CPMU_SYNC_DTLB_MISS 0xc1
+#define CPMU_SYNC_ST_HIT_YNGR_LD 0xc4
+#define CPMU_SYNC_BR_ANY_MISP 0xcb
+#define CPMU_FED_IC_MISS_DEM 0xd3
+#define CPMU_FED_ITLB_MISS 0xd4
+
+#define KPC_CLASS_FIXED_MASK        (1 << 0)
+#define KPC_CLASS_CONFIGURABLE_MASK (1 << 1)
+#define KPC_CLASS_POWER_MASK        (1 << 2)
+#define KPC_CLASS_RAWPMU_MASK       (1 << 3)
+
+#define COUNTERS_COUNT 10
+#define CONFIG_COUNT 8
+#define KPC_MASK (KPC_CLASS_CONFIGURABLE_MASK | KPC_CLASS_FIXED_MASK)
+
+int ff_kperf_setup()
+{
+    uint64_t config[COUNTERS_COUNT] = {0};
+    config[0] = CPMU_CORE_CYCLE | CFGWORD_EL0A64EN_MASK;
+    // config[3] = CPMU_INST_BRANCH | CFGWORD_EL0A64EN_MASK;
+    // config[4] = CPMU_SYNC_BR_ANY_MISP | CFGWORD_EL0A64EN_MASK;
+    // config[5] = CPMU_INST_A64 | CFGWORD_EL0A64EN_MASK;
+
+    if (kpc_set_config(KPC_MASK, config)) {
+        fprintf(stderr, "kperf: kpc_set_config failed\n");
+        return -1;
+    }
+
+    if (kpc_force_all_ctrs_set(1)) {
+        fprintf(stderr, "kperf: kpc_force_all_ctrs_set failed\n");
+        return -1;
+    }
+
+    if (kpc_set_counting(KPC_MASK)) {
+        fprintf(stderr, "kperf: kpc_set_counting failed\n");
+        return -1;
+    }
+
+    if (kpc_set_thread_counting(KPC_MASK)) {
+        fprintf(stderr, "kperf: kpc_set_thread_counting failed\n");
+        return -1;
+    }
+
+    return 0;
+}
+
+int ff_kperf_init()
+{
+    void *kperf = dlopen("/System/Library/PrivateFrameworks/kperf.framework/Versions/A/kperf", RTLD_LAZY);
+    if (!kperf) {
+        fprintf(stderr, "kperf: kperf = %p\n", kperf);
+        return -1;
+    }
+
+#define F(ret, name, ...)                            \
+    name = (name##proc *)(dlsym(kperf, #name));      \
+    if (!name) {                                     \
+        fprintf(stderr, "kperf: %s = %p\n", #name, (void *)name);    \
+        return -1;                                   \
+    }
+    KPERF_LIST
+#undef F
+
+    if (kpc_get_counter_count(KPC_MASK) != COUNTERS_COUNT) {
+        fprintf(stderr, "kperf: wrong fixed counters count\n");
+        return -1;
+    }
+
+    if (kpc_get_config_count(KPC_MASK) != CONFIG_COUNT) {
+        fprintf(stderr, "kperf: wrong fixed config count\n");
+        return -1;
+    }
+
+    return 0;
+}
+
+int ff_kperf_cycles(uint64_t *cycles)
+{
+    uint64_t counters[COUNTERS_COUNT];
+    if (kpc_get_thread_counters(0, COUNTERS_COUNT, counters)) {
+        return -1;
+    }
+
+    if (cycles)
+        *cycles = counters[0] - *cycles;
+
+    return 0;
+}
diff --git a/tests/checkasm/macos_kperf.h b/tests/checkasm/macos_kperf.h
new file mode 100644
index 0000000000..dddd6e4d24
--- /dev/null
+++ b/tests/checkasm/macos_kperf.h
@@ -0,0 +1,23 @@ 
+/*
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with FFmpeg; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+ */
+
+#include <stdint.h>
+
+int ff_kperf_setup(void);
+int ff_kperf_init(void);
+int ff_kperf_cycles(uint64_t *cycles);