Message ID | 20210413004523.6500-1-josh@itanimul.li |
---|---|
State | New |
Headers | show |
Series | [FFmpeg-devel] checkasm: add (private) kperf timing for macOS | expand |
Context | Check | Description |
---|---|---|
andriy/x86_make | success | Make finished |
andriy/x86_make_fate | fail | Make fate failed |
andriy/PPC64_make | success | Make finished |
andriy/PPC64_make_fate | warning | Make fate failed |
Apr 13, 2021, 02:45 by josh@itanimul.li: > Signed-off-by: Josh Dekker <josh@itanimul.li> > --- > configure | 2 + > tests/checkasm/Makefile | 1 + > tests/checkasm/checkasm.c | 19 ++++- > tests/checkasm/checkasm.h | 10 ++- > tests/checkasm/macos_kperf.c | 143 +++++++++++++++++++++++++++++++++++ > tests/checkasm/macos_kperf.h | 23 ++++++ > 6 files changed, 195 insertions(+), 3 deletions(-) > create mode 100644 tests/checkasm/macos_kperf.c > create mode 100644 tests/checkasm/macos_kperf.h > > diff --git a/configure b/configure > index d7a3f507e8..a47e3dea67 100755 > --- a/configure > +++ b/configure > @@ -490,6 +490,7 @@ Developer options (useful when working on FFmpeg itself): > --ignore-tests=TESTS comma-separated list (without "fate-" prefix > in the name) of tests whose result is ignored > --enable-linux-perf enable Linux Performance Monitor API > + --enable-macos-kperf enable macOS kperf (private) API > --disable-large-tests disable tests that use a large amount of memory > > NOTE: Object files are built at the place where configure is launched. > @@ -1949,6 +1950,7 @@ CONFIG_LIST=" > fontconfig > large_tests > linux_perf > + macos_kperf > memory_poisoning > neon_clobber_test > ossfuzz > diff --git a/tests/checkasm/Makefile b/tests/checkasm/Makefile > index 1827a4e134..4abaef9c63 100644 > --- a/tests/checkasm/Makefile > +++ b/tests/checkasm/Makefile > @@ -58,6 +58,7 @@ CHECKASMOBJS-$(CONFIG_AVUTIL) += $(AVUTILOBJS) > CHECKASMOBJS-$(ARCH_AARCH64) += aarch64/checkasm.o > CHECKASMOBJS-$(HAVE_ARMV5TE_EXTERNAL) += arm/checkasm.o > CHECKASMOBJS-$(HAVE_X86ASM) += x86/checkasm.o > +CHECKASMOBJS-$(CONFIG_MACOS_KPERF) += macos_kperf.o > > CHECKASMOBJS += $(CHECKASMOBJS-yes) checkasm.o > CHECKASMOBJS := $(sort $(CHECKASMOBJS:%=tests/checkasm/%)) > diff --git a/tests/checkasm/checkasm.c b/tests/checkasm/checkasm.c > index 8338e8ff58..4c42040244 100644 > --- a/tests/checkasm/checkasm.c > +++ b/tests/checkasm/checkasm.c > @@ -26,6 +26,8 @@ > # ifndef _GNU_SOURCE > # define _GNU_SOURCE // for syscall (performance monitoring API) > # endif > +#elif CONFIG_MACOS_KPERF > +#include "macos_kperf.h" > #endif > > #include <stdarg.h> > @@ -637,9 +639,20 @@ static int bench_init_linux(void) > } > return 0; > } > -#endif > +#elif CONFIG_MACOS_KPERF > +static int bench_init_kperf(void) > +{ > + if (ff_kperf_init() || ff_kperf_setup()) > + return -1; > > -#if !CONFIG_LINUX_PERF > + if (ff_kperf_cycles(NULL)) { > + fprintf(stderr, "checkasm must be run as root to use kperf on macOS\n"); > + return -1; > + } > + > + return 0; > +} > +#else > static int bench_init_ffmpeg(void) > { > #ifdef AV_READ_TIME > @@ -656,6 +669,8 @@ static int bench_init(void) > { > #if CONFIG_LINUX_PERF > int ret = bench_init_linux(); > +#elif CONFIG_MACOS_KPERF > + int ret = bench_init_kperf(); > #else > int ret = bench_init_ffmpeg(); > #endif > diff --git a/tests/checkasm/checkasm.h b/tests/checkasm/checkasm.h > index ef6645e3a2..4127081d74 100644 > --- a/tests/checkasm/checkasm.h > +++ b/tests/checkasm/checkasm.h > @@ -31,6 +31,8 @@ > #include <sys/ioctl.h> > #include <asm/unistd.h> > #include <linux/perf_event.h> > +#elif CONFIG_MACOS_KPERF > +#include "macos_kperf.h" > #endif > > #include "libavutil/avstring.h" > @@ -224,7 +226,7 @@ typedef struct CheckasmPerf { > int iterations; > } CheckasmPerf; > > -#if defined(AV_READ_TIME) || CONFIG_LINUX_PERF > +#if defined(AV_READ_TIME) || CONFIG_LINUX_PERF || CONFIG_MACOS_KPERF > > #if CONFIG_LINUX_PERF > #define PERF_START(t) do { \ > @@ -235,6 +237,12 @@ typedef struct CheckasmPerf { > ioctl(sysfd, PERF_EVENT_IOC_DISABLE, 0); \ > read(sysfd, &t, sizeof(t)); \ > } while (0) > +#elif CONFIG_MACOS_KPERF > +#define PERF_START(t) do { \ > + t = 0; \ > + ff_kperf_cycles(&t); \ > +} while (0) > +#define PERF_STOP(t) ff_kperf_cycles(&t) > #else > #define PERF_START(t) t = AV_READ_TIME() > #define PERF_STOP(t) t = AV_READ_TIME() - t > diff --git a/tests/checkasm/macos_kperf.c b/tests/checkasm/macos_kperf.c > new file mode 100644 > index 0000000000..e6ae316608 > --- /dev/null > +++ b/tests/checkasm/macos_kperf.c > @@ -0,0 +1,143 @@ > +/* > + * This file is part of FFmpeg. > + * > + * FFmpeg is free software; you can redistribute it and/or modify > + * it under the terms of the GNU General Public License as published by > + * the Free Software Foundation; either version 2 of the License, or > + * (at your option) any later version. > + * > + * FFmpeg is distributed in the hope that it will be useful, > + * but WITHOUT ANY WARRANTY; without even the implied warranty of > + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the > + * GNU General Public License for more details. > + * > + * You should have received a copy of the GNU General Public License along > + * with FFmpeg; if not, write to the Free Software Foundation, Inc., > + * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. > + */ > + > +#include "macos_kperf.h" > +#include <stdio.h> > +#include <stdlib.h> > +#include <dlfcn.h> > + > +#define KPERF_LIST \ > + F(int, kpc_get_counting, void) \ > + F(int, kpc_force_all_ctrs_set, int) \ > + F(int, kpc_set_counting, uint32_t) \ > + F(int, kpc_set_thread_counting, uint32_t) \ > + F(int, kpc_set_config, uint32_t, void *) \ > + F(int, kpc_get_config, uint32_t, void *) \ > + F(int, kpc_set_period, uint32_t, void *) \ > + F(int, kpc_get_period, uint32_t, void *) \ > + F(uint32_t, kpc_get_counter_count, uint32_t) \ > + F(uint32_t, kpc_get_config_count, uint32_t) \ > + F(int, kperf_sample_get, int *) \ > + F(int, kpc_get_thread_counters, int, unsigned int, void *) > + > +#define F(ret, name, ...) \ > + typedef ret name##proc(__VA_ARGS__); \ > + static name##proc *name = NULL; > +KPERF_LIST > +#undef F > + > +#define CFGWORD_EL0A32EN_MASK (0x10000) > +#define CFGWORD_EL0A64EN_MASK (0x20000) > +#define CFGWORD_EL1EN_MASK (0x40000) > +#define CFGWORD_EL3EN_MASK (0x80000) > +#define CFGWORD_ALLMODES_MASK (0xf0000) > + > +#define CPMU_NONE 0 > +#define CPMU_CORE_CYCLE 0x02 > +#define CPMU_INST_A64 0x8c > +#define CPMU_INST_BRANCH 0x8d > +#define CPMU_SYNC_DC_LOAD_MISS 0xbf > +#define CPMU_SYNC_DC_STORE_MISS 0xc0 > +#define CPMU_SYNC_DTLB_MISS 0xc1 > +#define CPMU_SYNC_ST_HIT_YNGR_LD 0xc4 > +#define CPMU_SYNC_BR_ANY_MISP 0xcb > +#define CPMU_FED_IC_MISS_DEM 0xd3 > +#define CPMU_FED_ITLB_MISS 0xd4 > + > +#define KPC_CLASS_FIXED_MASK (1 << 0) > +#define KPC_CLASS_CONFIGURABLE_MASK (1 << 1) > +#define KPC_CLASS_POWER_MASK (1 << 2) > +#define KPC_CLASS_RAWPMU_MASK (1 << 3) > + > +#define COUNTERS_COUNT 10 > +#define CONFIG_COUNT 8 > +#define KPC_MASK (KPC_CLASS_CONFIGURABLE_MASK | KPC_CLASS_FIXED_MASK) > + > +int ff_kperf_setup() > +{ > + uint64_t config[COUNTERS_COUNT] = {0}; > + config[0] = CPMU_CORE_CYCLE | CFGWORD_EL0A64EN_MASK; > + // config[3] = CPMU_INST_BRANCH | CFGWORD_EL0A64EN_MASK; > + // config[4] = CPMU_SYNC_BR_ANY_MISP | CFGWORD_EL0A64EN_MASK; > + // config[5] = CPMU_INST_A64 | CFGWORD_EL0A64EN_MASK; > + > + if (kpc_set_config(KPC_MASK, config)) { > + fprintf(stderr, "kperf: kpc_set_config failed\n"); > + return -1; > + } > + > + if (kpc_force_all_ctrs_set(1)) { > + fprintf(stderr, "kperf: kpc_force_all_ctrs_set failed\n"); > + return -1; > + } > + > + if (kpc_set_counting(KPC_MASK)) { > + fprintf(stderr, "kperf: kpc_set_counting failed\n"); > + return -1; > + } > + > + if (kpc_set_thread_counting(KPC_MASK)) { > + fprintf(stderr, "kperf: kpc_set_thread_counting failed\n"); > + return -1; > + } > + > + return 0; > +} > + > +int ff_kperf_init() > +{ > + void *kperf = dlopen("/System/Library/PrivateFrameworks/kperf.framework/Versions/A/kperf", RTLD_LAZY); > + if (!kperf) { > + fprintf(stderr, "kperf: kperf = %p\n", kperf); > + return -1; > + } > + > +#define F(ret, name, ...) \ > + name = (name##proc *)(dlsym(kperf, #name)); \ > + if (!name) { \ > + fprintf(stderr, "kperf: %s = %p\n", #name, (void *)name); \ > + return -1; \ > + } > + KPERF_LIST > +#undef F > + > + if (kpc_get_counter_count(KPC_MASK) != COUNTERS_COUNT) { > + fprintf(stderr, "kperf: wrong fixed counters count\n"); > + return -1; > + } > + > + if (kpc_get_config_count(KPC_MASK) != CONFIG_COUNT) { > + fprintf(stderr, "kperf: wrong fixed config count\n"); > + return -1; > + } > + > + return 0; > +} > + > +int ff_kperf_cycles(uint64_t *cycles) > +{ > + uint64_t counters[COUNTERS_COUNT]; > + if (kpc_get_thread_counters(0, COUNTERS_COUNT, counters)) { > + return -1; > + } > + > + if (cycles) > + *cycles = counters[0] - *cycles; > + > + return 0; > +} > diff --git a/tests/checkasm/macos_kperf.h b/tests/checkasm/macos_kperf.h > new file mode 100644 > index 0000000000..dddd6e4d24 > --- /dev/null > +++ b/tests/checkasm/macos_kperf.h > @@ -0,0 +1,23 @@ > +/* > + * This file is part of FFmpeg. > + * > + * FFmpeg is free software; you can redistribute it and/or modify > + * it under the terms of the GNU General Public License as published by > + * the Free Software Foundation; either version 2 of the License, or > + * (at your option) any later version. > + * > + * FFmpeg is distributed in the hope that it will be useful, > + * but WITHOUT ANY WARRANTY; without even the implied warranty of > + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the > + * GNU General Public License for more details. > + * > + * You should have received a copy of the GNU General Public License along > + * with FFmpeg; if not, write to the Free Software Foundation, Inc., > + * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. > + */ > + > +#include <stdint.h> > + > +int ff_kperf_setup(void); > +int ff_kperf_init(void); > +int ff_kperf_cycles(uint64_t *cycles); > Could you move this to libavutil and hook this up to libavutil/timer.h as well?
On Tue, 13 Apr 2021, Lynne wrote: > Could you move this to libavutil and hook this up to libavutil/timer.h > as well? I'm a bit hesitant about that, as the API (just like the existing linux perf integration which is predecent for heavyweight timer APIs) requires a bit more setup than what libavutil/timer.h normally does. // Martin
On Tue, 13 Apr 2021, Josh Dekker wrote: > Signed-off-by: Josh Dekker <josh@itanimul.li> > --- > configure | 2 + > tests/checkasm/Makefile | 1 + > tests/checkasm/checkasm.c | 19 ++++- > tests/checkasm/checkasm.h | 10 ++- > tests/checkasm/macos_kperf.c | 143 +++++++++++++++++++++++++++++++++++ > tests/checkasm/macos_kperf.h | 23 ++++++ > 6 files changed, 195 insertions(+), 3 deletions(-) > create mode 100644 tests/checkasm/macos_kperf.c > create mode 100644 tests/checkasm/macos_kperf.h > > diff --git a/tests/checkasm/checkasm.h b/tests/checkasm/checkasm.h > index ef6645e3a2..4127081d74 100644 > --- a/tests/checkasm/checkasm.h > +++ b/tests/checkasm/checkasm.h > @@ -31,6 +31,8 @@ > #include <sys/ioctl.h> > #include <asm/unistd.h> > #include <linux/perf_event.h> > +#elif CONFIG_MACOS_KPERF > +#include "macos_kperf.h" > #endif > > #include "libavutil/avstring.h" > @@ -224,7 +226,7 @@ typedef struct CheckasmPerf { > int iterations; > } CheckasmPerf; > > -#if defined(AV_READ_TIME) || CONFIG_LINUX_PERF > +#if defined(AV_READ_TIME) || CONFIG_LINUX_PERF || CONFIG_MACOS_KPERF > > #if CONFIG_LINUX_PERF > #define PERF_START(t) do { \ > @@ -235,6 +237,12 @@ typedef struct CheckasmPerf { > ioctl(sysfd, PERF_EVENT_IOC_DISABLE, 0); \ > read(sysfd, &t, sizeof(t)); \ > } while (0) > +#elif CONFIG_MACOS_KPERF > +#define PERF_START(t) do { \ > + t = 0; \ > + ff_kperf_cycles(&t); \ > +} while (0) > +#define PERF_STOP(t) ff_kperf_cycles(&t) > #else I find this implicit subtraction in ff_kperf_cycles() highly surprising and hard to reason about. E.g. if I do t=0; followed by ff_kperf_cycles(&t) three times - what value do I have in 't'? With the current code, the answer is "the difference between the absolute current timer minus the number of cycles it took between the first and second call", and that doesn't sound like something that is useful to anybody, ever. Just make the function return the value as-is and do the subtraction here. > #define PERF_START(t) t = AV_READ_TIME() > #define PERF_STOP(t) t = AV_READ_TIME() - t > diff --git a/tests/checkasm/macos_kperf.c b/tests/checkasm/macos_kperf.c > new file mode 100644 > index 0000000000..e6ae316608 > --- /dev/null > +++ b/tests/checkasm/macos_kperf.c > @@ -0,0 +1,143 @@ > +/* > + * This file is part of FFmpeg. > + * > + * FFmpeg is free software; you can redistribute it and/or modify > + * it under the terms of the GNU General Public License as published by > + * the Free Software Foundation; either version 2 of the License, or > + * (at your option) any later version. > + * > + * FFmpeg is distributed in the hope that it will be useful, > + * but WITHOUT ANY WARRANTY; without even the implied warranty of > + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the > + * GNU General Public License for more details. > + * > + * You should have received a copy of the GNU General Public License along > + * with FFmpeg; if not, write to the Free Software Foundation, Inc., > + * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. > + */ > + > +#include "macos_kperf.h" > +#include <stdio.h> > +#include <stdlib.h> > +#include <dlfcn.h> > + > +#define KPERF_LIST \ > + F(int, kpc_get_counting, void) \ > + F(int, kpc_force_all_ctrs_set, int) \ > + F(int, kpc_set_counting, uint32_t) \ > + F(int, kpc_set_thread_counting, uint32_t) \ > + F(int, kpc_set_config, uint32_t, void *) \ > + F(int, kpc_get_config, uint32_t, void *) \ > + F(int, kpc_set_period, uint32_t, void *) \ > + F(int, kpc_get_period, uint32_t, void *) \ > + F(uint32_t, kpc_get_counter_count, uint32_t) \ > + F(uint32_t, kpc_get_config_count, uint32_t) \ > + F(int, kperf_sample_get, int *) \ > + F(int, kpc_get_thread_counters, int, unsigned int, void *) > + > +#define F(ret, name, ...) \ > + typedef ret name##proc(__VA_ARGS__); \ > + static name##proc *name = NULL; > +KPERF_LIST > +#undef F > + > +#define CFGWORD_EL0A32EN_MASK (0x10000) > +#define CFGWORD_EL0A64EN_MASK (0x20000) > +#define CFGWORD_EL1EN_MASK (0x40000) > +#define CFGWORD_EL3EN_MASK (0x80000) > +#define CFGWORD_ALLMODES_MASK (0xf0000) > + > +#define CPMU_NONE 0 > +#define CPMU_CORE_CYCLE 0x02 > +#define CPMU_INST_A64 0x8c > +#define CPMU_INST_BRANCH 0x8d > +#define CPMU_SYNC_DC_LOAD_MISS 0xbf > +#define CPMU_SYNC_DC_STORE_MISS 0xc0 > +#define CPMU_SYNC_DTLB_MISS 0xc1 > +#define CPMU_SYNC_ST_HIT_YNGR_LD 0xc4 > +#define CPMU_SYNC_BR_ANY_MISP 0xcb > +#define CPMU_FED_IC_MISS_DEM 0xd3 > +#define CPMU_FED_ITLB_MISS 0xd4 > + > +#define KPC_CLASS_FIXED_MASK (1 << 0) > +#define KPC_CLASS_CONFIGURABLE_MASK (1 << 1) > +#define KPC_CLASS_POWER_MASK (1 << 2) > +#define KPC_CLASS_RAWPMU_MASK (1 << 3) > + > +#define COUNTERS_COUNT 10 > +#define CONFIG_COUNT 8 Is this value actually needed for anything, other than verifying the return value of kpc_get_config_count? It looks to me like one could omit it. (Then again, I know this API is undocumented and all, so it's not really known how it's supposed to be used.) > +#define KPC_MASK (KPC_CLASS_CONFIGURABLE_MASK | KPC_CLASS_FIXED_MASK) > + > +int ff_kperf_setup() > +{ > + uint64_t config[COUNTERS_COUNT] = {0}; > + config[0] = CPMU_CORE_CYCLE | CFGWORD_EL0A64EN_MASK; > + // config[3] = CPMU_INST_BRANCH | CFGWORD_EL0A64EN_MASK; > + // config[4] = CPMU_SYNC_BR_ANY_MISP | CFGWORD_EL0A64EN_MASK; > + // config[5] = CPMU_INST_A64 | CFGWORD_EL0A64EN_MASK; > + > + if (kpc_set_config(KPC_MASK, config)) { > + fprintf(stderr, "kperf: kpc_set_config failed\n"); > + return -1; > + } > + > + if (kpc_force_all_ctrs_set(1)) { > + fprintf(stderr, "kperf: kpc_force_all_ctrs_set failed\n"); > + return -1; > + } > + > + if (kpc_set_counting(KPC_MASK)) { > + fprintf(stderr, "kperf: kpc_set_counting failed\n"); > + return -1; > + } > + > + if (kpc_set_thread_counting(KPC_MASK)) { > + fprintf(stderr, "kperf: kpc_set_thread_counting failed\n"); > + return -1; > + } > + > + return 0; > +} > + > +int ff_kperf_init() > +{ > + void *kperf = dlopen("/System/Library/PrivateFrameworks/kperf.framework/Versions/A/kperf", RTLD_LAZY); > + if (!kperf) { > + fprintf(stderr, "kperf: kperf = %p\n", kperf); If it failed, then this will always print (null), so passing the variable to it right now is kinda pointless. A more useful error might be available via dlerror(). > + return -1; > + } > + > +#define F(ret, name, ...) \ > + name = (name##proc *)(dlsym(kperf, #name)); \ > + if (!name) { \ > + fprintf(stderr, "kperf: %s = %p\n", #name, (void *)name); \ The second parameter here will always be null too > + return -1; \ > + } > + KPERF_LIST > +#undef F > + > + if (kpc_get_counter_count(KPC_MASK) != COUNTERS_COUNT) { This is also in the field of unknown API usage, but I guess in one sense, one could consider also allowing it to work if counter_count >= 1 or however many we actually use. But I guess we'd need to enlarge the stack buffers a bit more in that case, to allow an actual value up to whatever we've allocated. > + fprintf(stderr, "kperf: wrong fixed counters count\n"); > + return -1; > + } > + > + if (kpc_get_config_count(KPC_MASK) != CONFIG_COUNT) { > + fprintf(stderr, "kperf: wrong fixed config count\n"); > + return -1; > + } > + > + return 0; > +} > + > +int ff_kperf_cycles(uint64_t *cycles) > +{ > + uint64_t counters[COUNTERS_COUNT]; > + if (kpc_get_thread_counters(0, COUNTERS_COUNT, counters)) { > + return -1; > + } > + > + if (cycles) > + *cycles = counters[0] - *cycles; As point out in checkasm.h, move the subtraction there; having it here makes the function very strange to reason about. // Martin
diff --git a/configure b/configure index d7a3f507e8..a47e3dea67 100755 --- a/configure +++ b/configure @@ -490,6 +490,7 @@ Developer options (useful when working on FFmpeg itself): --ignore-tests=TESTS comma-separated list (without "fate-" prefix in the name) of tests whose result is ignored --enable-linux-perf enable Linux Performance Monitor API + --enable-macos-kperf enable macOS kperf (private) API --disable-large-tests disable tests that use a large amount of memory NOTE: Object files are built at the place where configure is launched. @@ -1949,6 +1950,7 @@ CONFIG_LIST=" fontconfig large_tests linux_perf + macos_kperf memory_poisoning neon_clobber_test ossfuzz diff --git a/tests/checkasm/Makefile b/tests/checkasm/Makefile index 1827a4e134..4abaef9c63 100644 --- a/tests/checkasm/Makefile +++ b/tests/checkasm/Makefile @@ -58,6 +58,7 @@ CHECKASMOBJS-$(CONFIG_AVUTIL) += $(AVUTILOBJS) CHECKASMOBJS-$(ARCH_AARCH64) += aarch64/checkasm.o CHECKASMOBJS-$(HAVE_ARMV5TE_EXTERNAL) += arm/checkasm.o CHECKASMOBJS-$(HAVE_X86ASM) += x86/checkasm.o +CHECKASMOBJS-$(CONFIG_MACOS_KPERF) += macos_kperf.o CHECKASMOBJS += $(CHECKASMOBJS-yes) checkasm.o CHECKASMOBJS := $(sort $(CHECKASMOBJS:%=tests/checkasm/%)) diff --git a/tests/checkasm/checkasm.c b/tests/checkasm/checkasm.c index 8338e8ff58..4c42040244 100644 --- a/tests/checkasm/checkasm.c +++ b/tests/checkasm/checkasm.c @@ -26,6 +26,8 @@ # ifndef _GNU_SOURCE # define _GNU_SOURCE // for syscall (performance monitoring API) # endif +#elif CONFIG_MACOS_KPERF +#include "macos_kperf.h" #endif #include <stdarg.h> @@ -637,9 +639,20 @@ static int bench_init_linux(void) } return 0; } -#endif +#elif CONFIG_MACOS_KPERF +static int bench_init_kperf(void) +{ + if (ff_kperf_init() || ff_kperf_setup()) + return -1; -#if !CONFIG_LINUX_PERF + if (ff_kperf_cycles(NULL)) { + fprintf(stderr, "checkasm must be run as root to use kperf on macOS\n"); + return -1; + } + + return 0; +} +#else static int bench_init_ffmpeg(void) { #ifdef AV_READ_TIME @@ -656,6 +669,8 @@ static int bench_init(void) { #if CONFIG_LINUX_PERF int ret = bench_init_linux(); +#elif CONFIG_MACOS_KPERF + int ret = bench_init_kperf(); #else int ret = bench_init_ffmpeg(); #endif diff --git a/tests/checkasm/checkasm.h b/tests/checkasm/checkasm.h index ef6645e3a2..4127081d74 100644 --- a/tests/checkasm/checkasm.h +++ b/tests/checkasm/checkasm.h @@ -31,6 +31,8 @@ #include <sys/ioctl.h> #include <asm/unistd.h> #include <linux/perf_event.h> +#elif CONFIG_MACOS_KPERF +#include "macos_kperf.h" #endif #include "libavutil/avstring.h" @@ -224,7 +226,7 @@ typedef struct CheckasmPerf { int iterations; } CheckasmPerf; -#if defined(AV_READ_TIME) || CONFIG_LINUX_PERF +#if defined(AV_READ_TIME) || CONFIG_LINUX_PERF || CONFIG_MACOS_KPERF #if CONFIG_LINUX_PERF #define PERF_START(t) do { \ @@ -235,6 +237,12 @@ typedef struct CheckasmPerf { ioctl(sysfd, PERF_EVENT_IOC_DISABLE, 0); \ read(sysfd, &t, sizeof(t)); \ } while (0) +#elif CONFIG_MACOS_KPERF +#define PERF_START(t) do { \ + t = 0; \ + ff_kperf_cycles(&t); \ +} while (0) +#define PERF_STOP(t) ff_kperf_cycles(&t) #else #define PERF_START(t) t = AV_READ_TIME() #define PERF_STOP(t) t = AV_READ_TIME() - t diff --git a/tests/checkasm/macos_kperf.c b/tests/checkasm/macos_kperf.c new file mode 100644 index 0000000000..e6ae316608 --- /dev/null +++ b/tests/checkasm/macos_kperf.c @@ -0,0 +1,143 @@ +/* + * This file is part of FFmpeg. + * + * FFmpeg is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * FFmpeg is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License along + * with FFmpeg; if not, write to the Free Software Foundation, Inc., + * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + */ + +#include "macos_kperf.h" +#include <stdio.h> +#include <stdlib.h> +#include <dlfcn.h> + +#define KPERF_LIST \ + F(int, kpc_get_counting, void) \ + F(int, kpc_force_all_ctrs_set, int) \ + F(int, kpc_set_counting, uint32_t) \ + F(int, kpc_set_thread_counting, uint32_t) \ + F(int, kpc_set_config, uint32_t, void *) \ + F(int, kpc_get_config, uint32_t, void *) \ + F(int, kpc_set_period, uint32_t, void *) \ + F(int, kpc_get_period, uint32_t, void *) \ + F(uint32_t, kpc_get_counter_count, uint32_t) \ + F(uint32_t, kpc_get_config_count, uint32_t) \ + F(int, kperf_sample_get, int *) \ + F(int, kpc_get_thread_counters, int, unsigned int, void *) + +#define F(ret, name, ...) \ + typedef ret name##proc(__VA_ARGS__); \ + static name##proc *name = NULL; +KPERF_LIST +#undef F + +#define CFGWORD_EL0A32EN_MASK (0x10000) +#define CFGWORD_EL0A64EN_MASK (0x20000) +#define CFGWORD_EL1EN_MASK (0x40000) +#define CFGWORD_EL3EN_MASK (0x80000) +#define CFGWORD_ALLMODES_MASK (0xf0000) + +#define CPMU_NONE 0 +#define CPMU_CORE_CYCLE 0x02 +#define CPMU_INST_A64 0x8c +#define CPMU_INST_BRANCH 0x8d +#define CPMU_SYNC_DC_LOAD_MISS 0xbf +#define CPMU_SYNC_DC_STORE_MISS 0xc0 +#define CPMU_SYNC_DTLB_MISS 0xc1 +#define CPMU_SYNC_ST_HIT_YNGR_LD 0xc4 +#define CPMU_SYNC_BR_ANY_MISP 0xcb +#define CPMU_FED_IC_MISS_DEM 0xd3 +#define CPMU_FED_ITLB_MISS 0xd4 + +#define KPC_CLASS_FIXED_MASK (1 << 0) +#define KPC_CLASS_CONFIGURABLE_MASK (1 << 1) +#define KPC_CLASS_POWER_MASK (1 << 2) +#define KPC_CLASS_RAWPMU_MASK (1 << 3) + +#define COUNTERS_COUNT 10 +#define CONFIG_COUNT 8 +#define KPC_MASK (KPC_CLASS_CONFIGURABLE_MASK | KPC_CLASS_FIXED_MASK) + +int ff_kperf_setup() +{ + uint64_t config[COUNTERS_COUNT] = {0}; + config[0] = CPMU_CORE_CYCLE | CFGWORD_EL0A64EN_MASK; + // config[3] = CPMU_INST_BRANCH | CFGWORD_EL0A64EN_MASK; + // config[4] = CPMU_SYNC_BR_ANY_MISP | CFGWORD_EL0A64EN_MASK; + // config[5] = CPMU_INST_A64 | CFGWORD_EL0A64EN_MASK; + + if (kpc_set_config(KPC_MASK, config)) { + fprintf(stderr, "kperf: kpc_set_config failed\n"); + return -1; + } + + if (kpc_force_all_ctrs_set(1)) { + fprintf(stderr, "kperf: kpc_force_all_ctrs_set failed\n"); + return -1; + } + + if (kpc_set_counting(KPC_MASK)) { + fprintf(stderr, "kperf: kpc_set_counting failed\n"); + return -1; + } + + if (kpc_set_thread_counting(KPC_MASK)) { + fprintf(stderr, "kperf: kpc_set_thread_counting failed\n"); + return -1; + } + + return 0; +} + +int ff_kperf_init() +{ + void *kperf = dlopen("/System/Library/PrivateFrameworks/kperf.framework/Versions/A/kperf", RTLD_LAZY); + if (!kperf) { + fprintf(stderr, "kperf: kperf = %p\n", kperf); + return -1; + } + +#define F(ret, name, ...) \ + name = (name##proc *)(dlsym(kperf, #name)); \ + if (!name) { \ + fprintf(stderr, "kperf: %s = %p\n", #name, (void *)name); \ + return -1; \ + } + KPERF_LIST +#undef F + + if (kpc_get_counter_count(KPC_MASK) != COUNTERS_COUNT) { + fprintf(stderr, "kperf: wrong fixed counters count\n"); + return -1; + } + + if (kpc_get_config_count(KPC_MASK) != CONFIG_COUNT) { + fprintf(stderr, "kperf: wrong fixed config count\n"); + return -1; + } + + return 0; +} + +int ff_kperf_cycles(uint64_t *cycles) +{ + uint64_t counters[COUNTERS_COUNT]; + if (kpc_get_thread_counters(0, COUNTERS_COUNT, counters)) { + return -1; + } + + if (cycles) + *cycles = counters[0] - *cycles; + + return 0; +} diff --git a/tests/checkasm/macos_kperf.h b/tests/checkasm/macos_kperf.h new file mode 100644 index 0000000000..dddd6e4d24 --- /dev/null +++ b/tests/checkasm/macos_kperf.h @@ -0,0 +1,23 @@ +/* + * This file is part of FFmpeg. + * + * FFmpeg is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * FFmpeg is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License along + * with FFmpeg; if not, write to the Free Software Foundation, Inc., + * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + */ + +#include <stdint.h> + +int ff_kperf_setup(void); +int ff_kperf_init(void); +int ff_kperf_cycles(uint64_t *cycles);
Signed-off-by: Josh Dekker <josh@itanimul.li> --- configure | 2 + tests/checkasm/Makefile | 1 + tests/checkasm/checkasm.c | 19 ++++- tests/checkasm/checkasm.h | 10 ++- tests/checkasm/macos_kperf.c | 143 +++++++++++++++++++++++++++++++++++ tests/checkasm/macos_kperf.h | 23 ++++++ 6 files changed, 195 insertions(+), 3 deletions(-) create mode 100644 tests/checkasm/macos_kperf.c create mode 100644 tests/checkasm/macos_kperf.h