Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux
1
fork

Configure Feed

Select the types of activity you want to include in your feed.

KVM: arm64: Sync boot clock with the nVHE/pKVM hyp

Configure the hypervisor tracing clock with the kernel boot clock. For
tracing purposes, the boot clock is interesting: it doesn't stop on
suspend. However, it is corrected on a regular basis, which implies the
need to re-evaluate it every once in a while.

Cc: John Stultz <jstultz@google.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Stephen Boyd <sboyd@kernel.org>
Cc: Christopher S. Hall <christopher.s.hall@intel.com>
Cc: Richard Cochran <richardcochran@gmail.com>
Signed-off-by: Vincent Donnefort <vdonnefort@google.com>
Link: https://patch.msgid.link/20260309162516.2623589-26-vdonnefort@google.com
Signed-off-by: Marc Zyngier <maz@kernel.org>

authored by

Vincent Donnefort and committed by
Marc Zyngier
b2288891 3aed038a

+179
+1
arch/arm64/include/asm/kvm_asm.h
··· 93 93 __KVM_HOST_SMCCC_FUNC___tracing_unload, 94 94 __KVM_HOST_SMCCC_FUNC___tracing_enable, 95 95 __KVM_HOST_SMCCC_FUNC___tracing_swap_reader, 96 + __KVM_HOST_SMCCC_FUNC___tracing_update_clock, 96 97 }; 97 98 98 99 #define DECLARE_KVM_VHE_SYM(sym) extern char sym[]
+2
arch/arm64/kvm/hyp/include/nvhe/trace.h
··· 11 11 void __tracing_unload(void); 12 12 int __tracing_enable(bool enable); 13 13 int __tracing_swap_reader(unsigned int cpu); 14 + void __tracing_update_clock(u32 mult, u32 shift, u64 epoch_ns, u64 epoch_cyc); 14 15 #else 15 16 static inline void *tracing_reserve_entry(unsigned long length) { return NULL; } 16 17 static inline void tracing_commit_entry(void) { } ··· 20 19 static inline void __tracing_unload(void) { } 21 20 static inline int __tracing_enable(bool enable) { return -ENODEV; } 22 21 static inline int __tracing_swap_reader(unsigned int cpu) { return -ENODEV; } 22 + static inline void __tracing_update_clock(u32 mult, u32 shift, u64 epoch_ns, u64 epoch_cyc) { } 23 23 #endif 24 24 #endif
+11
arch/arm64/kvm/hyp/nvhe/hyp-main.c
··· 615 615 cpu_reg(host_ctxt, 1) = __tracing_swap_reader(cpu); 616 616 } 617 617 618 + static void handle___tracing_update_clock(struct kvm_cpu_context *host_ctxt) 619 + { 620 + DECLARE_REG(u32, mult, host_ctxt, 1); 621 + DECLARE_REG(u32, shift, host_ctxt, 2); 622 + DECLARE_REG(u64, epoch_ns, host_ctxt, 3); 623 + DECLARE_REG(u64, epoch_cyc, host_ctxt, 4); 624 + 625 + __tracing_update_clock(mult, shift, epoch_ns, epoch_cyc); 626 + } 627 + 618 628 typedef void (*hcall_t)(struct kvm_cpu_context *); 619 629 620 630 #define HANDLE_FUNC(x) [__KVM_HOST_SMCCC_FUNC_##x] = (hcall_t)handle_##x ··· 670 660 HANDLE_FUNC(__tracing_unload), 671 661 HANDLE_FUNC(__tracing_enable), 672 662 HANDLE_FUNC(__tracing_swap_reader), 663 + HANDLE_FUNC(__tracing_update_clock), 673 664 }; 674 665 675 666 static void handle_host_hcall(struct kvm_cpu_context *host_ctxt)
+16
arch/arm64/kvm/hyp/nvhe/trace.c
··· 271 271 272 272 return ret; 273 273 } 274 + 275 + void __tracing_update_clock(u32 mult, u32 shift, u64 epoch_ns, u64 epoch_cyc) 276 + { 277 + int cpu; 278 + 279 + /* After this loop, all CPUs are observing the new bank... */ 280 + for (cpu = 0; cpu < hyp_nr_cpus; cpu++) { 281 + struct simple_rb_per_cpu *simple_rb = per_cpu_ptr(trace_buffer.simple_rbs, cpu); 282 + 283 + while (READ_ONCE(simple_rb->status) == SIMPLE_RB_WRITING) 284 + ; 285 + } 286 + 287 + /* ...we can now override the old one and swap. */ 288 + trace_clock_update(mult, shift, epoch_ns, epoch_cyc); 289 + }
+149
arch/arm64/kvm/hyp_trace.c
··· 4 4 * Author: Vincent Donnefort <vdonnefort@google.com> 5 5 */ 6 6 7 + #include <linux/cpumask.h> 7 8 #include <linux/trace_remote.h> 9 + #include <linux/tracefs.h> 8 10 #include <linux/simple_ring_buffer.h> 9 11 12 + #include <asm/arch_timer.h> 10 13 #include <asm/kvm_host.h> 11 14 #include <asm/kvm_hyptrace.h> 12 15 #include <asm/kvm_mmu.h> 13 16 14 17 #include "hyp_trace.h" 18 + 19 + /* Same 10min used by clocksource when width is more than 32-bits */ 20 + #define CLOCK_MAX_CONVERSION_S 600 21 + /* 22 + * Time to give for the clock init. Long enough to get a good mult/shift 23 + * estimation. Short enough to not delay the tracing start too much. 24 + */ 25 + #define CLOCK_INIT_MS 100 26 + /* 27 + * Time between clock checks. Must be small enough to catch clock deviation when 28 + * it is still tiny. 29 + */ 30 + #define CLOCK_UPDATE_MS 500 31 + 32 + static struct hyp_trace_clock { 33 + u64 cycles; 34 + u64 cyc_overflow64; 35 + u64 boot; 36 + u32 mult; 37 + u32 shift; 38 + struct delayed_work work; 39 + struct completion ready; 40 + struct mutex lock; 41 + bool running; 42 + } hyp_clock; 43 + 44 + static void __hyp_clock_work(struct work_struct *work) 45 + { 46 + struct delayed_work *dwork = to_delayed_work(work); 47 + struct hyp_trace_clock *hyp_clock; 48 + struct system_time_snapshot snap; 49 + u64 rate, delta_cycles; 50 + u64 boot, delta_boot; 51 + 52 + hyp_clock = container_of(dwork, struct hyp_trace_clock, work); 53 + 54 + ktime_get_snapshot(&snap); 55 + boot = ktime_to_ns(snap.boot); 56 + 57 + delta_boot = boot - hyp_clock->boot; 58 + delta_cycles = snap.cycles - hyp_clock->cycles; 59 + 60 + /* Compare hyp clock with the kernel boot clock */ 61 + if (hyp_clock->mult) { 62 + u64 err, cur = delta_cycles; 63 + 64 + if (WARN_ON_ONCE(cur >= hyp_clock->cyc_overflow64)) { 65 + __uint128_t tmp = (__uint128_t)cur * hyp_clock->mult; 66 + 67 + cur = tmp >> hyp_clock->shift; 68 + } else { 69 + cur *= hyp_clock->mult; 70 + cur >>= hyp_clock->shift; 71 + } 72 + cur += hyp_clock->boot; 73 + 74 + err = abs_diff(cur, boot); 75 + /* No deviation, only update epoch if necessary */ 76 + if (!err) { 77 + if (delta_cycles >= (hyp_clock->cyc_overflow64 >> 1)) 78 + goto fast_forward; 79 + 80 + goto resched; 81 + } 82 + 83 + /* Warn if the error is above tracing precision (1us) */ 84 + if (err > NSEC_PER_USEC) 85 + pr_warn_ratelimited("hyp trace clock off by %lluus\n", 86 + err / NSEC_PER_USEC); 87 + } 88 + 89 + rate = div64_u64(delta_cycles * NSEC_PER_SEC, delta_boot); 90 + 91 + clocks_calc_mult_shift(&hyp_clock->mult, &hyp_clock->shift, 92 + rate, NSEC_PER_SEC, CLOCK_MAX_CONVERSION_S); 93 + 94 + /* Add a comfortable 50% margin */ 95 + hyp_clock->cyc_overflow64 = (U64_MAX / hyp_clock->mult) >> 1; 96 + 97 + fast_forward: 98 + hyp_clock->cycles = snap.cycles; 99 + hyp_clock->boot = boot; 100 + kvm_call_hyp_nvhe(__tracing_update_clock, hyp_clock->mult, 101 + hyp_clock->shift, hyp_clock->boot, hyp_clock->cycles); 102 + complete(&hyp_clock->ready); 103 + 104 + resched: 105 + schedule_delayed_work(&hyp_clock->work, 106 + msecs_to_jiffies(CLOCK_UPDATE_MS)); 107 + } 108 + 109 + static void hyp_trace_clock_enable(struct hyp_trace_clock *hyp_clock, bool enable) 110 + { 111 + struct system_time_snapshot snap; 112 + 113 + if (hyp_clock->running == enable) 114 + return; 115 + 116 + if (!enable) { 117 + cancel_delayed_work_sync(&hyp_clock->work); 118 + hyp_clock->running = false; 119 + } 120 + 121 + ktime_get_snapshot(&snap); 122 + 123 + hyp_clock->boot = ktime_to_ns(snap.boot); 124 + hyp_clock->cycles = snap.cycles; 125 + hyp_clock->mult = 0; 126 + 127 + init_completion(&hyp_clock->ready); 128 + INIT_DELAYED_WORK(&hyp_clock->work, __hyp_clock_work); 129 + schedule_delayed_work(&hyp_clock->work, msecs_to_jiffies(CLOCK_INIT_MS)); 130 + wait_for_completion(&hyp_clock->ready); 131 + hyp_clock->running = true; 132 + } 15 133 16 134 /* Access to this struct within the trace_remote_callbacks are protected by the trace_remote lock */ 17 135 static struct hyp_trace_buffer { ··· 301 183 302 184 static int hyp_trace_enable_tracing(bool enable, void *priv) 303 185 { 186 + hyp_trace_clock_enable(&hyp_clock, enable); 187 + 304 188 return kvm_call_hyp_nvhe(__tracing_enable, enable); 305 189 } 306 190 ··· 321 201 return 0; 322 202 } 323 203 204 + static int hyp_trace_clock_show(struct seq_file *m, void *v) 205 + { 206 + seq_puts(m, "[boot]\n"); 207 + 208 + return 0; 209 + } 210 + DEFINE_SHOW_ATTRIBUTE(hyp_trace_clock); 211 + 212 + static int hyp_trace_init_tracefs(struct dentry *d, void *priv) 213 + { 214 + return tracefs_create_file("trace_clock", 0440, d, NULL, &hyp_trace_clock_fops) ? 215 + 0 : -ENOMEM; 216 + } 217 + 324 218 static struct trace_remote_callbacks trace_remote_callbacks = { 219 + .init = hyp_trace_init_tracefs, 325 220 .load_trace_buffer = hyp_trace_load, 326 221 .unload_trace_buffer = hyp_trace_unload, 327 222 .enable_tracing = hyp_trace_enable_tracing, ··· 347 212 348 213 int __init kvm_hyp_trace_init(void) 349 214 { 215 + int cpu; 216 + 350 217 if (is_kernel_in_hyp_mode()) 351 218 return 0; 219 + 220 + #ifdef CONFIG_ARM_ARCH_TIMER_OOL_WORKAROUND 221 + for_each_possible_cpu(cpu) { 222 + const struct arch_timer_erratum_workaround *wa = 223 + per_cpu(timer_unstable_counter_workaround, cpu); 224 + 225 + if (wa && wa->read_cntvct_el0) { 226 + pr_warn("hyp trace can't handle CNTVCT workaround '%s'\n", wa->desc); 227 + return -EOPNOTSUPP; 228 + } 229 + } 230 + #endif 352 231 353 232 return trace_remote_register("hypervisor", &trace_remote_callbacks, &trace_buffer, NULL, 0); 354 233 }