Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux
1
fork

Configure Feed

Select the types of activity you want to include in your feed.

KVM: arm64: Add tracing capability for the nVHE/pKVM hyp

There is currently no way to inspect or log what's happening at EL2
when the nVHE or pKVM hypervisor is used. With the growing set of
features for pKVM, the need for tooling is more pressing. And tracefs,
by its reliability, versatility and support for user-space is fit for
purpose.

Add support to write into a tracefs compatible ring-buffer. There's no
way the hypervisor could log events directly into the host tracefs
ring-buffers. So instead let's use our own, where the hypervisor is the
writer and the host the reader.

Signed-off-by: Vincent Donnefort <vdonnefort@google.com>
Link: https://patch.msgid.link/20260309162516.2623589-24-vdonnefort@google.com
Signed-off-by: Marc Zyngier <maz@kernel.org>

authored by

Vincent Donnefort and committed by
Marc Zyngier
680a04c3 4cdf8dec

+354 -1
+4
arch/arm64/include/asm/kvm_asm.h
··· 89 89 __KVM_HOST_SMCCC_FUNC___pkvm_vcpu_load, 90 90 __KVM_HOST_SMCCC_FUNC___pkvm_vcpu_put, 91 91 __KVM_HOST_SMCCC_FUNC___pkvm_tlb_flush_vmid, 92 + __KVM_HOST_SMCCC_FUNC___tracing_load, 93 + __KVM_HOST_SMCCC_FUNC___tracing_unload, 94 + __KVM_HOST_SMCCC_FUNC___tracing_enable, 95 + __KVM_HOST_SMCCC_FUNC___tracing_swap_reader, 92 96 }; 93 97 94 98 #define DECLARE_KVM_VHE_SYM(sym) extern char sym[]
+13
arch/arm64/include/asm/kvm_hyptrace.h
··· 1 + /* SPDX-License-Identifier: GPL-2.0-only */ 2 + #ifndef __ARM64_KVM_HYPTRACE_H_ 3 + #define __ARM64_KVM_HYPTRACE_H_ 4 + 5 + #include <linux/ring_buffer.h> 6 + 7 + struct hyp_trace_desc { 8 + unsigned long bpages_backing_start; 9 + size_t bpages_backing_size; 10 + struct trace_buffer_desc trace_buffer_desc; 11 + 12 + }; 13 + #endif
+5
arch/arm64/kvm/Kconfig
··· 72 72 73 73 if NVHE_EL2_DEBUG 74 74 75 + config NVHE_EL2_TRACING 76 + bool 77 + depends on TRACING 78 + default y 79 + 75 80 config PKVM_DISABLE_STAGE2_ON_PANIC 76 81 bool "Disable the host stage-2 on panic" 77 82 default n
+23
arch/arm64/kvm/hyp/include/nvhe/trace.h
··· 1 + /* SPDX-License-Identifier: GPL-2.0-only */ 2 + #ifndef __ARM64_KVM_HYP_NVHE_TRACE_H 3 + #define __ARM64_KVM_HYP_NVHE_TRACE_H 4 + #include <asm/kvm_hyptrace.h> 5 + 6 + #ifdef CONFIG_NVHE_EL2_TRACING 7 + void *tracing_reserve_entry(unsigned long length); 8 + void tracing_commit_entry(void); 9 + 10 + int __tracing_load(unsigned long desc_va, size_t desc_size); 11 + void __tracing_unload(void); 12 + int __tracing_enable(bool enable); 13 + int __tracing_swap_reader(unsigned int cpu); 14 + #else 15 + static inline void *tracing_reserve_entry(unsigned long length) { return NULL; } 16 + static inline void tracing_commit_entry(void) { } 17 + 18 + static inline int __tracing_load(unsigned long desc_va, size_t desc_size) { return -ENODEV; } 19 + static inline void __tracing_unload(void) { } 20 + static inline int __tracing_enable(bool enable) { return -ENODEV; } 21 + static inline int __tracing_swap_reader(unsigned int cpu) { return -ENODEV; } 22 + #endif 23 + #endif
+4 -1
arch/arm64/kvm/hyp/nvhe/Makefile
··· 29 29 ../fpsimd.o ../hyp-entry.o ../exception.o ../pgtable.o 30 30 hyp-obj-y += ../../../kernel/smccc-call.o 31 31 hyp-obj-$(CONFIG_LIST_HARDENED) += list_debug.o 32 - hyp-obj-$(CONFIG_NVHE_EL2_TRACING) += clock.o 32 + hyp-obj-$(CONFIG_NVHE_EL2_TRACING) += clock.o trace.o 33 33 hyp-obj-y += $(lib-objs) 34 + 35 + # Path to simple_ring_buffer.c 36 + CFLAGS_trace.nvhe.o += -I$(objtree)/kernel/trace/ 34 37 35 38 ## 36 39 ## Build rules for compiling nVHE hyp code
+32
arch/arm64/kvm/hyp/nvhe/hyp-main.c
··· 18 18 #include <nvhe/mem_protect.h> 19 19 #include <nvhe/mm.h> 20 20 #include <nvhe/pkvm.h> 21 + #include <nvhe/trace.h> 21 22 #include <nvhe/trap_handler.h> 22 23 23 24 DEFINE_PER_CPU(struct kvm_nvhe_init_params, kvm_init_params); ··· 588 587 cpu_reg(host_ctxt, 1) = __pkvm_teardown_vm(handle); 589 588 } 590 589 590 + static void handle___tracing_load(struct kvm_cpu_context *host_ctxt) 591 + { 592 + DECLARE_REG(unsigned long, desc_hva, host_ctxt, 1); 593 + DECLARE_REG(size_t, desc_size, host_ctxt, 2); 594 + 595 + cpu_reg(host_ctxt, 1) = __tracing_load(desc_hva, desc_size); 596 + } 597 + 598 + static void handle___tracing_unload(struct kvm_cpu_context *host_ctxt) 599 + { 600 + __tracing_unload(); 601 + } 602 + 603 + static void handle___tracing_enable(struct kvm_cpu_context *host_ctxt) 604 + { 605 + DECLARE_REG(bool, enable, host_ctxt, 1); 606 + 607 + cpu_reg(host_ctxt, 1) = __tracing_enable(enable); 608 + } 609 + 610 + static void handle___tracing_swap_reader(struct kvm_cpu_context *host_ctxt) 611 + { 612 + DECLARE_REG(unsigned int, cpu, host_ctxt, 1); 613 + 614 + cpu_reg(host_ctxt, 1) = __tracing_swap_reader(cpu); 615 + } 616 + 591 617 typedef void (*hcall_t)(struct kvm_cpu_context *); 592 618 593 619 #define HANDLE_FUNC(x) [__KVM_HOST_SMCCC_FUNC_##x] = (hcall_t)handle_##x ··· 656 628 HANDLE_FUNC(__pkvm_vcpu_load), 657 629 HANDLE_FUNC(__pkvm_vcpu_put), 658 630 HANDLE_FUNC(__pkvm_tlb_flush_vmid), 631 + HANDLE_FUNC(__tracing_load), 632 + HANDLE_FUNC(__tracing_unload), 633 + HANDLE_FUNC(__tracing_enable), 634 + HANDLE_FUNC(__tracing_swap_reader), 659 635 }; 660 636 661 637 static void handle_host_hcall(struct kvm_cpu_context *host_ctxt)
+273
arch/arm64/kvm/hyp/nvhe/trace.c
··· 1 + // SPDX-License-Identifier: GPL-2.0-only 2 + /* 3 + * Copyright (C) 2025 Google LLC 4 + * Author: Vincent Donnefort <vdonnefort@google.com> 5 + */ 6 + 7 + #include <nvhe/clock.h> 8 + #include <nvhe/mem_protect.h> 9 + #include <nvhe/mm.h> 10 + #include <nvhe/trace.h> 11 + 12 + #include <asm/percpu.h> 13 + #include <asm/kvm_mmu.h> 14 + #include <asm/local.h> 15 + 16 + #include "simple_ring_buffer.c" 17 + 18 + static DEFINE_PER_CPU(struct simple_rb_per_cpu, __simple_rbs); 19 + 20 + static struct hyp_trace_buffer { 21 + struct simple_rb_per_cpu __percpu *simple_rbs; 22 + void *bpages_backing_start; 23 + size_t bpages_backing_size; 24 + hyp_spinlock_t lock; 25 + } trace_buffer = { 26 + .simple_rbs = &__simple_rbs, 27 + .lock = __HYP_SPIN_LOCK_UNLOCKED, 28 + }; 29 + 30 + static bool hyp_trace_buffer_loaded(struct hyp_trace_buffer *trace_buffer) 31 + { 32 + return trace_buffer->bpages_backing_size > 0; 33 + } 34 + 35 + void *tracing_reserve_entry(unsigned long length) 36 + { 37 + return simple_ring_buffer_reserve(this_cpu_ptr(trace_buffer.simple_rbs), length, 38 + trace_clock()); 39 + } 40 + 41 + void tracing_commit_entry(void) 42 + { 43 + simple_ring_buffer_commit(this_cpu_ptr(trace_buffer.simple_rbs)); 44 + } 45 + 46 + static int __admit_host_mem(void *start, u64 size) 47 + { 48 + if (!PAGE_ALIGNED(start) || !PAGE_ALIGNED(size) || !size) 49 + return -EINVAL; 50 + 51 + if (!is_protected_kvm_enabled()) 52 + return 0; 53 + 54 + return __pkvm_host_donate_hyp(hyp_virt_to_pfn(start), size >> PAGE_SHIFT); 55 + } 56 + 57 + static void __release_host_mem(void *start, u64 size) 58 + { 59 + if (!is_protected_kvm_enabled()) 60 + return; 61 + 62 + WARN_ON(__pkvm_hyp_donate_host(hyp_virt_to_pfn(start), size >> PAGE_SHIFT)); 63 + } 64 + 65 + static int hyp_trace_buffer_load_bpage_backing(struct hyp_trace_buffer *trace_buffer, 66 + struct hyp_trace_desc *desc) 67 + { 68 + void *start = (void *)kern_hyp_va(desc->bpages_backing_start); 69 + size_t size = desc->bpages_backing_size; 70 + int ret; 71 + 72 + ret = __admit_host_mem(start, size); 73 + if (ret) 74 + return ret; 75 + 76 + memset(start, 0, size); 77 + 78 + trace_buffer->bpages_backing_start = start; 79 + trace_buffer->bpages_backing_size = size; 80 + 81 + return 0; 82 + } 83 + 84 + static void hyp_trace_buffer_unload_bpage_backing(struct hyp_trace_buffer *trace_buffer) 85 + { 86 + void *start = trace_buffer->bpages_backing_start; 87 + size_t size = trace_buffer->bpages_backing_size; 88 + 89 + if (!size) 90 + return; 91 + 92 + memset(start, 0, size); 93 + 94 + __release_host_mem(start, size); 95 + 96 + trace_buffer->bpages_backing_start = 0; 97 + trace_buffer->bpages_backing_size = 0; 98 + } 99 + 100 + static void *__pin_shared_page(unsigned long kern_va) 101 + { 102 + void *va = kern_hyp_va((void *)kern_va); 103 + 104 + if (!is_protected_kvm_enabled()) 105 + return va; 106 + 107 + return hyp_pin_shared_mem(va, va + PAGE_SIZE) ? NULL : va; 108 + } 109 + 110 + static void __unpin_shared_page(void *va) 111 + { 112 + if (!is_protected_kvm_enabled()) 113 + return; 114 + 115 + hyp_unpin_shared_mem(va, va + PAGE_SIZE); 116 + } 117 + 118 + static void hyp_trace_buffer_unload(struct hyp_trace_buffer *trace_buffer) 119 + { 120 + int cpu; 121 + 122 + hyp_assert_lock_held(&trace_buffer->lock); 123 + 124 + if (!hyp_trace_buffer_loaded(trace_buffer)) 125 + return; 126 + 127 + for (cpu = 0; cpu < hyp_nr_cpus; cpu++) 128 + simple_ring_buffer_unload_mm(per_cpu_ptr(trace_buffer->simple_rbs, cpu), 129 + __unpin_shared_page); 130 + 131 + hyp_trace_buffer_unload_bpage_backing(trace_buffer); 132 + } 133 + 134 + static int hyp_trace_buffer_load(struct hyp_trace_buffer *trace_buffer, 135 + struct hyp_trace_desc *desc) 136 + { 137 + struct simple_buffer_page *bpages; 138 + struct ring_buffer_desc *rb_desc; 139 + int ret, cpu; 140 + 141 + hyp_assert_lock_held(&trace_buffer->lock); 142 + 143 + if (hyp_trace_buffer_loaded(trace_buffer)) 144 + return -EINVAL; 145 + 146 + ret = hyp_trace_buffer_load_bpage_backing(trace_buffer, desc); 147 + if (ret) 148 + return ret; 149 + 150 + bpages = trace_buffer->bpages_backing_start; 151 + for_each_ring_buffer_desc(rb_desc, cpu, &desc->trace_buffer_desc) { 152 + ret = simple_ring_buffer_init_mm(per_cpu_ptr(trace_buffer->simple_rbs, cpu), 153 + bpages, rb_desc, __pin_shared_page, 154 + __unpin_shared_page); 155 + if (ret) 156 + break; 157 + 158 + bpages += rb_desc->nr_page_va; 159 + } 160 + 161 + if (ret) 162 + hyp_trace_buffer_unload(trace_buffer); 163 + 164 + return ret; 165 + } 166 + 167 + static bool hyp_trace_desc_validate(struct hyp_trace_desc *desc, size_t desc_size) 168 + { 169 + struct ring_buffer_desc *rb_desc; 170 + unsigned int cpu; 171 + size_t nr_bpages; 172 + void *desc_end; 173 + 174 + /* 175 + * Both desc_size and bpages_backing_size are untrusted host-provided 176 + * values. We rely on __pkvm_host_donate_hyp() to enforce their validity. 177 + */ 178 + desc_end = (void *)desc + desc_size; 179 + nr_bpages = desc->bpages_backing_size / sizeof(struct simple_buffer_page); 180 + 181 + for_each_ring_buffer_desc(rb_desc, cpu, &desc->trace_buffer_desc) { 182 + /* Can we read nr_page_va? */ 183 + if ((void *)rb_desc + struct_size(rb_desc, page_va, 0) > desc_end) 184 + return false; 185 + 186 + /* Overflow desc? */ 187 + if ((void *)rb_desc + struct_size(rb_desc, page_va, rb_desc->nr_page_va) > desc_end) 188 + return false; 189 + 190 + /* Overflow bpages backing memory? */ 191 + if (nr_bpages < rb_desc->nr_page_va) 192 + return false; 193 + 194 + if (cpu >= hyp_nr_cpus) 195 + return false; 196 + 197 + if (cpu != rb_desc->cpu) 198 + return false; 199 + 200 + nr_bpages -= rb_desc->nr_page_va; 201 + } 202 + 203 + return true; 204 + } 205 + 206 + int __tracing_load(unsigned long desc_hva, size_t desc_size) 207 + { 208 + struct hyp_trace_desc *desc = (struct hyp_trace_desc *)kern_hyp_va(desc_hva); 209 + int ret; 210 + 211 + ret = __admit_host_mem(desc, desc_size); 212 + if (ret) 213 + return ret; 214 + 215 + if (!hyp_trace_desc_validate(desc, desc_size)) 216 + goto err_release_desc; 217 + 218 + hyp_spin_lock(&trace_buffer.lock); 219 + 220 + ret = hyp_trace_buffer_load(&trace_buffer, desc); 221 + 222 + hyp_spin_unlock(&trace_buffer.lock); 223 + 224 + err_release_desc: 225 + __release_host_mem(desc, desc_size); 226 + return ret; 227 + } 228 + 229 + void __tracing_unload(void) 230 + { 231 + hyp_spin_lock(&trace_buffer.lock); 232 + hyp_trace_buffer_unload(&trace_buffer); 233 + hyp_spin_unlock(&trace_buffer.lock); 234 + } 235 + 236 + int __tracing_enable(bool enable) 237 + { 238 + int cpu, ret = enable ? -EINVAL : 0; 239 + 240 + hyp_spin_lock(&trace_buffer.lock); 241 + 242 + if (!hyp_trace_buffer_loaded(&trace_buffer)) 243 + goto unlock; 244 + 245 + for (cpu = 0; cpu < hyp_nr_cpus; cpu++) 246 + simple_ring_buffer_enable_tracing(per_cpu_ptr(trace_buffer.simple_rbs, cpu), 247 + enable); 248 + 249 + ret = 0; 250 + 251 + unlock: 252 + hyp_spin_unlock(&trace_buffer.lock); 253 + 254 + return ret; 255 + } 256 + 257 + int __tracing_swap_reader(unsigned int cpu) 258 + { 259 + int ret = -ENODEV; 260 + 261 + if (cpu >= hyp_nr_cpus) 262 + return -EINVAL; 263 + 264 + hyp_spin_lock(&trace_buffer.lock); 265 + 266 + if (hyp_trace_buffer_loaded(&trace_buffer)) 267 + ret = simple_ring_buffer_swap_reader_page( 268 + per_cpu_ptr(trace_buffer.simple_rbs, cpu)); 269 + 270 + hyp_spin_unlock(&trace_buffer.lock); 271 + 272 + return ret; 273 + }