Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux
1
fork

Configure Feed

Select the types of activity you want to include in your feed.

perf bench: Add event synthesis benchmark

Event synthesis may occur at the start or end (tail) of a perf command.
In system-wide mode it can scan every process in /proc, which may add
seconds of latency before event recording. Add a new benchmark that
times how long event synthesis takes with and without data synthesis.

An example execution looks like:

$ perf bench internals synthesize
# Running 'internals/synthesize' benchmark:
Average synthesis took: 168.253800 usec
Average data synthesis took: 208.104700 usec

Signed-off-by: Ian Rogers <irogers@google.com>
Acked-by: Jiri Olsa <jolsa@redhat.com>
Tested-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Cc: Andrey Zhizhikin <andrey.z@gmail.com>
Cc: Kan Liang <kan.liang@linux.intel.com>
Cc: Kefeng Wang <wangkefeng.wang@huawei.com>
Cc: Mark Rutland <mark.rutland@arm.com>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Petr Mladek <pmladek@suse.com>
Cc: Stephane Eranian <eranian@google.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Link: http://lore.kernel.org/lkml/20200402154357.107873-2-irogers@google.com
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>

authored by

Ian Rogers and committed by
Arnaldo Carvalho de Melo
2a4b5166 1a2725f3

+117 -2
+8
tools/perf/Documentation/perf-bench.txt
··· 61 61 'epoll':: 62 62 Eventpoll (epoll) stressing benchmarks. 63 63 64 + 'internals':: 65 + Benchmark internal perf functionality. 66 + 64 67 'all':: 65 68 All benchmark subsystems. 66 69 ··· 216 213 217 214 *ctl*:: 218 215 Suite for evaluating multiple epoll_ctl calls. 216 + 217 + SUITES FOR 'internals' 218 + ~~~~~~~~~~~~~~~~~~~~~~ 219 + *synthesize*:: 220 + Suite for evaluating perf's event synthesis performance. 219 221 220 222 SEE ALSO 221 223 --------
+1 -1
tools/perf/bench/Build
··· 6 6 perf-y += futex-wake-parallel.o 7 7 perf-y += futex-requeue.o 8 8 perf-y += futex-lock-pi.o 9 - 10 9 perf-y += epoll-wait.o 11 10 perf-y += epoll-ctl.o 11 + perf-y += synthesize.o 12 12 13 13 perf-$(CONFIG_X86_64) += mem-memcpy-x86-64-lib.o 14 14 perf-$(CONFIG_X86_64) += mem-memcpy-x86-64-asm.o
+1 -1
tools/perf/bench/bench.h
··· 41 41 int bench_futex_requeue(int argc, const char **argv); 42 42 /* pi futexes */ 43 43 int bench_futex_lock_pi(int argc, const char **argv); 44 - 45 44 int bench_epoll_wait(int argc, const char **argv); 46 45 int bench_epoll_ctl(int argc, const char **argv); 46 + int bench_synthesize(int argc, const char **argv); 47 47 48 48 #define BENCH_FORMAT_DEFAULT_STR "default" 49 49 #define BENCH_FORMAT_DEFAULT 0
+101
tools/perf/bench/synthesize.c
··· 1 + // SPDX-License-Identifier: GPL-2.0 2 + /* 3 + * Benchmark synthesis of perf events such as at the start of a 'perf 4 + * record'. Synthesis is done on the current process and the 'dummy' event 5 + * handlers are invoked that support dump_trace but otherwise do nothing. 6 + * 7 + * Copyright 2019 Google LLC. 8 + */ 9 + #include <stdio.h> 10 + #include "bench.h" 11 + #include "../util/debug.h" 12 + #include "../util/session.h" 13 + #include "../util/synthetic-events.h" 14 + #include "../util/target.h" 15 + #include "../util/thread_map.h" 16 + #include "../util/tool.h" 17 + #include <linux/err.h> 18 + #include <linux/time64.h> 19 + #include <subcmd/parse-options.h> 20 + 21 + static unsigned int iterations = 10000; 22 + 23 + static const struct option options[] = { 24 + OPT_UINTEGER('i', "iterations", &iterations, 25 + "Number of iterations used to compute average"), 26 + OPT_END() 27 + }; 28 + 29 + static const char *const usage[] = { 30 + "perf bench internals synthesize <options>", 31 + NULL 32 + }; 33 + 34 + 35 + static int do_synthesize(struct perf_session *session, 36 + struct perf_thread_map *threads, 37 + struct target *target, bool data_mmap) 38 + { 39 + const unsigned int nr_threads_synthesize = 1; 40 + struct timeval start, end, diff; 41 + u64 runtime_us; 42 + unsigned int i; 43 + double average; 44 + int err; 45 + 46 + gettimeofday(&start, NULL); 47 + for (i = 0; i < iterations; i++) { 48 + err = machine__synthesize_threads(&session->machines.host, 49 + target, threads, data_mmap, 50 + nr_threads_synthesize); 51 + if (err) 52 + return err; 53 + } 54 + 55 + gettimeofday(&end, NULL); 56 + timersub(&end, &start, &diff); 57 + runtime_us = diff.tv_sec * USEC_PER_SEC + diff.tv_usec; 58 + average = (double)runtime_us/(double)iterations; 59 + printf("Average %ssynthesis took: %f usec\n", 60 + data_mmap ? "data " : "", average); 61 + return 0; 62 + } 63 + 64 + int bench_synthesize(int argc, const char **argv) 65 + { 66 + struct perf_tool tool; 67 + struct perf_session *session; 68 + struct target target = { 69 + .pid = "self", 70 + }; 71 + struct perf_thread_map *threads; 72 + int err; 73 + 74 + argc = parse_options(argc, argv, options, usage, 0); 75 + 76 + session = perf_session__new(NULL, false, NULL); 77 + if (IS_ERR(session)) { 78 + pr_err("Session creation failed.\n"); 79 + return PTR_ERR(session); 80 + } 81 + threads = thread_map__new_by_pid(getpid()); 82 + if (!threads) { 83 + pr_err("Thread map creation failed.\n"); 84 + err = -ENOMEM; 85 + goto err_out; 86 + } 87 + perf_tool__fill_defaults(&tool); 88 + 89 + err = do_synthesize(session, threads, &target, false); 90 + if (err) 91 + goto err_out; 92 + 93 + err = do_synthesize(session, threads, &target, true); 94 + 95 + err_out: 96 + if (threads) 97 + perf_thread_map__put(threads); 98 + 99 + perf_session__delete(session); 100 + return err; 101 + }
+6
tools/perf/builtin-bench.c
··· 76 76 }; 77 77 #endif // HAVE_EVENTFD 78 78 79 + static struct bench internals_benchmarks[] = { 80 + { "synthesize", "Benchmark perf event synthesis", bench_synthesize }, 81 + { NULL, NULL, NULL } 82 + }; 83 + 79 84 struct collection { 80 85 const char *name; 81 86 const char *summary; ··· 97 92 #ifdef HAVE_EVENTFD 98 93 {"epoll", "Epoll stressing benchmarks", epoll_benchmarks }, 99 94 #endif 95 + { "internals", "Perf-internals benchmarks", internals_benchmarks }, 100 96 { "all", "All benchmarks", NULL }, 101 97 { NULL, NULL, NULL } 102 98 };