Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux
1
fork

Configure Feed

Select the types of activity you want to include in your feed.

Merge tag 'for-linus' of git://git.kernel.org/pub/scm/virt/kvm/kvm

Pull more kvm updates from Paolo Bonzini:
"This includes the 6.4 changes for RISC-V, and a few bugfix patches for
other architectures. For x86, this closes a longstanding performance
issue in the newer and (usually) more scalable page table management
code.

RISC-V:
- ONE_REG interface to enable/disable SBI extensions
- Zbb extension for Guest/VM
- AIA CSR virtualization

x86:
- Fix a long-standing TDP MMU flaw, where unloading roots on a vCPU
can result in the root being freed even though the root is
completely valid and can be reused as-is (with a TLB flush).

s390:
- A couple of bugfixes"

* tag 'for-linus' of git://git.kernel.org/pub/scm/virt/kvm/kvm:
KVM: s390: fix race in gmap_make_secure()
KVM: s390: pv: fix asynchronous teardown for small VMs
KVM: x86: Preserve TDP MMU roots until they are explicitly invalidated
RISC-V: KVM: Virtualize per-HART AIA CSRs
RISC-V: KVM: Use bitmap for irqs_pending and irqs_pending_mask
RISC-V: KVM: Add ONE_REG interface for AIA CSRs
RISC-V: KVM: Implement subtype for CSR ONE_REG interface
RISC-V: KVM: Initial skeletal support for AIA
RISC-V: KVM: Drop the _MASK suffix from hgatp.VMID mask defines
RISC-V: Detect AIA CSRs from ISA string
RISC-V: Add AIA related CSR defines
RISC-V: KVM: Allow Zbb extension for Guest/VM
RISC-V: KVM: Add ONE_REG interface to enable/disable SBI extensions
RISC-V: KVM: Alphabetize selects
KVM: RISC-V: Retry fault if vma_lookup() results become invalid

+1211 -180
+100 -7
arch/riscv/include/asm/csr.h
··· 7 7 #define _ASM_RISCV_CSR_H 8 8 9 9 #include <asm/asm.h> 10 - #include <linux/const.h> 10 + #include <linux/bits.h> 11 11 12 12 /* Status register flags */ 13 13 #define SR_SIE _AC(0x00000002, UL) /* Supervisor Interrupt Enable */ ··· 72 72 #define IRQ_S_EXT 9 73 73 #define IRQ_VS_EXT 10 74 74 #define IRQ_M_EXT 11 75 + #define IRQ_S_GEXT 12 75 76 #define IRQ_PMU_OVF 13 77 + #define IRQ_LOCAL_MAX (IRQ_PMU_OVF + 1) 78 + #define IRQ_LOCAL_MASK GENMASK((IRQ_LOCAL_MAX - 1), 0) 76 79 77 80 /* Exception causes */ 78 81 #define EXC_INST_MISALIGNED 0 ··· 130 127 131 128 #define HGATP32_MODE_SHIFT 31 132 129 #define HGATP32_VMID_SHIFT 22 133 - #define HGATP32_VMID_MASK _AC(0x1FC00000, UL) 134 - #define HGATP32_PPN _AC(0x003FFFFF, UL) 130 + #define HGATP32_VMID GENMASK(28, 22) 131 + #define HGATP32_PPN GENMASK(21, 0) 135 132 136 133 #define HGATP64_MODE_SHIFT 60 137 134 #define HGATP64_VMID_SHIFT 44 138 - #define HGATP64_VMID_MASK _AC(0x03FFF00000000000, UL) 139 - #define HGATP64_PPN _AC(0x00000FFFFFFFFFFF, UL) 135 + #define HGATP64_VMID GENMASK(57, 44) 136 + #define HGATP64_PPN GENMASK(43, 0) 140 137 141 138 #define HGATP_PAGE_SHIFT 12 142 139 143 140 #ifdef CONFIG_64BIT 144 141 #define HGATP_PPN HGATP64_PPN 145 142 #define HGATP_VMID_SHIFT HGATP64_VMID_SHIFT 146 - #define HGATP_VMID_MASK HGATP64_VMID_MASK 143 + #define HGATP_VMID HGATP64_VMID 147 144 #define HGATP_MODE_SHIFT HGATP64_MODE_SHIFT 148 145 #else 149 146 #define HGATP_PPN HGATP32_PPN 150 147 #define HGATP_VMID_SHIFT HGATP32_VMID_SHIFT 151 - #define HGATP_VMID_MASK HGATP32_VMID_MASK 148 + #define HGATP_VMID HGATP32_VMID 152 149 #define HGATP_MODE_SHIFT HGATP32_MODE_SHIFT 153 150 #endif 154 151 ··· 157 154 #define VSIP_VALID_MASK ((_AC(1, UL) << IRQ_S_SOFT) | \ 158 155 (_AC(1, UL) << IRQ_S_TIMER) | \ 159 156 (_AC(1, UL) << IRQ_S_EXT)) 157 + 158 + /* AIA CSR bits */ 159 + #define TOPI_IID_SHIFT 16 160 + #define TOPI_IID_MASK GENMASK(11, 0) 161 + #define TOPI_IPRIO_MASK GENMASK(7, 0) 162 + #define TOPI_IPRIO_BITS 8 163 + 164 + #define TOPEI_ID_SHIFT 16 165 + #define TOPEI_ID_MASK GENMASK(10, 0) 166 + #define TOPEI_PRIO_MASK GENMASK(10, 0) 167 + 168 + #define ISELECT_IPRIO0 0x30 169 + #define ISELECT_IPRIO15 0x3f 170 + #define ISELECT_MASK GENMASK(8, 0) 171 + 172 + #define HVICTL_VTI BIT(30) 173 + #define HVICTL_IID GENMASK(27, 16) 174 + #define HVICTL_IID_SHIFT 16 175 + #define HVICTL_DPR BIT(9) 176 + #define HVICTL_IPRIOM BIT(8) 177 + #define HVICTL_IPRIO GENMASK(7, 0) 160 178 161 179 /* xENVCFG flags */ 162 180 #define ENVCFG_STCE (_AC(1, ULL) << 63) ··· 273 249 #define CSR_STIMECMP 0x14D 274 250 #define CSR_STIMECMPH 0x15D 275 251 252 + /* Supervisor-Level Window to Indirectly Accessed Registers (AIA) */ 253 + #define CSR_SISELECT 0x150 254 + #define CSR_SIREG 0x151 255 + 256 + /* Supervisor-Level Interrupts (AIA) */ 257 + #define CSR_STOPEI 0x15c 258 + #define CSR_STOPI 0xdb0 259 + 260 + /* Supervisor-Level High-Half CSRs (AIA) */ 261 + #define CSR_SIEH 0x114 262 + #define CSR_SIPH 0x154 263 + 276 264 #define CSR_VSSTATUS 0x200 277 265 #define CSR_VSIE 0x204 278 266 #define CSR_VSTVEC 0x205 ··· 314 278 #define CSR_HGATP 0x680 315 279 #define CSR_HGEIP 0xe12 316 280 281 + /* Virtual Interrupts and Interrupt Priorities (H-extension with AIA) */ 282 + #define CSR_HVIEN 0x608 283 + #define CSR_HVICTL 0x609 284 + #define CSR_HVIPRIO1 0x646 285 + #define CSR_HVIPRIO2 0x647 286 + 287 + /* VS-Level Window to Indirectly Accessed Registers (H-extension with AIA) */ 288 + #define CSR_VSISELECT 0x250 289 + #define CSR_VSIREG 0x251 290 + 291 + /* VS-Level Interrupts (H-extension with AIA) */ 292 + #define CSR_VSTOPEI 0x25c 293 + #define CSR_VSTOPI 0xeb0 294 + 295 + /* Hypervisor and VS-Level High-Half CSRs (H-extension with AIA) */ 296 + #define CSR_HIDELEGH 0x613 297 + #define CSR_HVIENH 0x618 298 + #define CSR_HVIPH 0x655 299 + #define CSR_HVIPRIO1H 0x656 300 + #define CSR_HVIPRIO2H 0x657 301 + #define CSR_VSIEH 0x214 302 + #define CSR_VSIPH 0x254 303 + 317 304 #define CSR_MSTATUS 0x300 318 305 #define CSR_MISA 0x301 306 + #define CSR_MIDELEG 0x303 319 307 #define CSR_MIE 0x304 320 308 #define CSR_MTVEC 0x305 321 309 #define CSR_MENVCFG 0x30a ··· 356 296 #define CSR_MIMPID 0xf13 357 297 #define CSR_MHARTID 0xf14 358 298 299 + /* Machine-Level Window to Indirectly Accessed Registers (AIA) */ 300 + #define CSR_MISELECT 0x350 301 + #define CSR_MIREG 0x351 302 + 303 + /* Machine-Level Interrupts (AIA) */ 304 + #define CSR_MTOPEI 0x35c 305 + #define CSR_MTOPI 0xfb0 306 + 307 + /* Virtual Interrupts for Supervisor Level (AIA) */ 308 + #define CSR_MVIEN 0x308 309 + #define CSR_MVIP 0x309 310 + 311 + /* Machine-Level High-Half CSRs (AIA) */ 312 + #define CSR_MIDELEGH 0x313 313 + #define CSR_MIEH 0x314 314 + #define CSR_MVIENH 0x318 315 + #define CSR_MVIPH 0x319 316 + #define CSR_MIPH 0x354 317 + 359 318 #ifdef CONFIG_RISCV_M_MODE 360 319 # define CSR_STATUS CSR_MSTATUS 361 320 # define CSR_IE CSR_MIE ··· 384 305 # define CSR_CAUSE CSR_MCAUSE 385 306 # define CSR_TVAL CSR_MTVAL 386 307 # define CSR_IP CSR_MIP 308 + 309 + # define CSR_IEH CSR_MIEH 310 + # define CSR_ISELECT CSR_MISELECT 311 + # define CSR_IREG CSR_MIREG 312 + # define CSR_IPH CSR_MIPH 313 + # define CSR_TOPEI CSR_MTOPEI 314 + # define CSR_TOPI CSR_MTOPI 387 315 388 316 # define SR_IE SR_MIE 389 317 # define SR_PIE SR_MPIE ··· 408 322 # define CSR_CAUSE CSR_SCAUSE 409 323 # define CSR_TVAL CSR_STVAL 410 324 # define CSR_IP CSR_SIP 325 + 326 + # define CSR_IEH CSR_SIEH 327 + # define CSR_ISELECT CSR_SISELECT 328 + # define CSR_IREG CSR_SIREG 329 + # define CSR_IPH CSR_SIPH 330 + # define CSR_TOPEI CSR_STOPEI 331 + # define CSR_TOPI CSR_STOPI 411 332 412 333 # define SR_IE SR_SIE 413 334 # define SR_PIE SR_SPIE
+8
arch/riscv/include/asm/hwcap.h
··· 44 44 #define RISCV_ISA_EXT_ZIHINTPAUSE 32 45 45 #define RISCV_ISA_EXT_SVNAPOT 33 46 46 #define RISCV_ISA_EXT_ZICBOZ 34 47 + #define RISCV_ISA_EXT_SMAIA 35 48 + #define RISCV_ISA_EXT_SSAIA 36 47 49 48 50 #define RISCV_ISA_EXT_MAX 64 49 51 #define RISCV_ISA_EXT_NAME_LEN_MAX 32 52 + 53 + #ifdef CONFIG_RISCV_M_MODE 54 + #define RISCV_ISA_EXT_SxAIA RISCV_ISA_EXT_SMAIA 55 + #else 56 + #define RISCV_ISA_EXT_SxAIA RISCV_ISA_EXT_SSAIA 57 + #endif 50 58 51 59 #ifndef __ASSEMBLY__ 52 60
+127
arch/riscv/include/asm/kvm_aia.h
··· 1 + /* SPDX-License-Identifier: GPL-2.0-only */ 2 + /* 3 + * Copyright (C) 2021 Western Digital Corporation or its affiliates. 4 + * Copyright (C) 2022 Ventana Micro Systems Inc. 5 + * 6 + * Authors: 7 + * Anup Patel <apatel@ventanamicro.com> 8 + */ 9 + 10 + #ifndef __KVM_RISCV_AIA_H 11 + #define __KVM_RISCV_AIA_H 12 + 13 + #include <linux/jump_label.h> 14 + #include <linux/kvm_types.h> 15 + #include <asm/csr.h> 16 + 17 + struct kvm_aia { 18 + /* In-kernel irqchip created */ 19 + bool in_kernel; 20 + 21 + /* In-kernel irqchip initialized */ 22 + bool initialized; 23 + }; 24 + 25 + struct kvm_vcpu_aia_csr { 26 + unsigned long vsiselect; 27 + unsigned long hviprio1; 28 + unsigned long hviprio2; 29 + unsigned long vsieh; 30 + unsigned long hviph; 31 + unsigned long hviprio1h; 32 + unsigned long hviprio2h; 33 + }; 34 + 35 + struct kvm_vcpu_aia { 36 + /* CPU AIA CSR context of Guest VCPU */ 37 + struct kvm_vcpu_aia_csr guest_csr; 38 + 39 + /* CPU AIA CSR context upon Guest VCPU reset */ 40 + struct kvm_vcpu_aia_csr guest_reset_csr; 41 + }; 42 + 43 + #define kvm_riscv_aia_initialized(k) ((k)->arch.aia.initialized) 44 + 45 + #define irqchip_in_kernel(k) ((k)->arch.aia.in_kernel) 46 + 47 + DECLARE_STATIC_KEY_FALSE(kvm_riscv_aia_available); 48 + #define kvm_riscv_aia_available() \ 49 + static_branch_unlikely(&kvm_riscv_aia_available) 50 + 51 + #define KVM_RISCV_AIA_IMSIC_TOPEI (ISELECT_MASK + 1) 52 + static inline int kvm_riscv_vcpu_aia_imsic_rmw(struct kvm_vcpu *vcpu, 53 + unsigned long isel, 54 + unsigned long *val, 55 + unsigned long new_val, 56 + unsigned long wr_mask) 57 + { 58 + return 0; 59 + } 60 + 61 + #ifdef CONFIG_32BIT 62 + void kvm_riscv_vcpu_aia_flush_interrupts(struct kvm_vcpu *vcpu); 63 + void kvm_riscv_vcpu_aia_sync_interrupts(struct kvm_vcpu *vcpu); 64 + #else 65 + static inline void kvm_riscv_vcpu_aia_flush_interrupts(struct kvm_vcpu *vcpu) 66 + { 67 + } 68 + static inline void kvm_riscv_vcpu_aia_sync_interrupts(struct kvm_vcpu *vcpu) 69 + { 70 + } 71 + #endif 72 + bool kvm_riscv_vcpu_aia_has_interrupts(struct kvm_vcpu *vcpu, u64 mask); 73 + 74 + void kvm_riscv_vcpu_aia_update_hvip(struct kvm_vcpu *vcpu); 75 + void kvm_riscv_vcpu_aia_load(struct kvm_vcpu *vcpu, int cpu); 76 + void kvm_riscv_vcpu_aia_put(struct kvm_vcpu *vcpu); 77 + int kvm_riscv_vcpu_aia_get_csr(struct kvm_vcpu *vcpu, 78 + unsigned long reg_num, 79 + unsigned long *out_val); 80 + int kvm_riscv_vcpu_aia_set_csr(struct kvm_vcpu *vcpu, 81 + unsigned long reg_num, 82 + unsigned long val); 83 + 84 + int kvm_riscv_vcpu_aia_rmw_topei(struct kvm_vcpu *vcpu, 85 + unsigned int csr_num, 86 + unsigned long *val, 87 + unsigned long new_val, 88 + unsigned long wr_mask); 89 + int kvm_riscv_vcpu_aia_rmw_ireg(struct kvm_vcpu *vcpu, unsigned int csr_num, 90 + unsigned long *val, unsigned long new_val, 91 + unsigned long wr_mask); 92 + #define KVM_RISCV_VCPU_AIA_CSR_FUNCS \ 93 + { .base = CSR_SIREG, .count = 1, .func = kvm_riscv_vcpu_aia_rmw_ireg }, \ 94 + { .base = CSR_STOPEI, .count = 1, .func = kvm_riscv_vcpu_aia_rmw_topei }, 95 + 96 + static inline int kvm_riscv_vcpu_aia_update(struct kvm_vcpu *vcpu) 97 + { 98 + return 1; 99 + } 100 + 101 + static inline void kvm_riscv_vcpu_aia_reset(struct kvm_vcpu *vcpu) 102 + { 103 + } 104 + 105 + static inline int kvm_riscv_vcpu_aia_init(struct kvm_vcpu *vcpu) 106 + { 107 + return 0; 108 + } 109 + 110 + static inline void kvm_riscv_vcpu_aia_deinit(struct kvm_vcpu *vcpu) 111 + { 112 + } 113 + 114 + static inline void kvm_riscv_aia_init_vm(struct kvm *kvm) 115 + { 116 + } 117 + 118 + static inline void kvm_riscv_aia_destroy_vm(struct kvm *kvm) 119 + { 120 + } 121 + 122 + void kvm_riscv_aia_enable(void); 123 + void kvm_riscv_aia_disable(void); 124 + int kvm_riscv_aia_init(void); 125 + void kvm_riscv_aia_exit(void); 126 + 127 + #endif
+11 -3
arch/riscv/include/asm/kvm_host.h
··· 14 14 #include <linux/kvm_types.h> 15 15 #include <linux/spinlock.h> 16 16 #include <asm/hwcap.h> 17 + #include <asm/kvm_aia.h> 17 18 #include <asm/kvm_vcpu_fp.h> 18 19 #include <asm/kvm_vcpu_insn.h> 19 20 #include <asm/kvm_vcpu_sbi.h> ··· 95 94 96 95 /* Guest Timer */ 97 96 struct kvm_guest_timer timer; 97 + 98 + /* AIA Guest/VM context */ 99 + struct kvm_aia aia; 98 100 }; 99 101 100 102 struct kvm_cpu_trap { ··· 204 200 * in irqs_pending. Our approach is modeled around multiple producer 205 201 * and single consumer problem where the consumer is the VCPU itself. 206 202 */ 207 - unsigned long irqs_pending; 208 - unsigned long irqs_pending_mask; 203 + #define KVM_RISCV_VCPU_NR_IRQS 64 204 + DECLARE_BITMAP(irqs_pending, KVM_RISCV_VCPU_NR_IRQS); 205 + DECLARE_BITMAP(irqs_pending_mask, KVM_RISCV_VCPU_NR_IRQS); 209 206 210 207 /* VCPU Timer */ 211 208 struct kvm_vcpu_timer timer; ··· 225 220 226 221 /* SBI context */ 227 222 struct kvm_vcpu_sbi_context sbi_context; 223 + 224 + /* AIA VCPU context */ 225 + struct kvm_vcpu_aia aia_context; 228 226 229 227 /* Cache pages needed to program page tables with spinlock held */ 230 228 struct kvm_mmu_memory_cache mmu_page_cache; ··· 335 327 int kvm_riscv_vcpu_unset_interrupt(struct kvm_vcpu *vcpu, unsigned int irq); 336 328 void kvm_riscv_vcpu_flush_interrupts(struct kvm_vcpu *vcpu); 337 329 void kvm_riscv_vcpu_sync_interrupts(struct kvm_vcpu *vcpu); 338 - bool kvm_riscv_vcpu_has_interrupts(struct kvm_vcpu *vcpu, unsigned long mask); 330 + bool kvm_riscv_vcpu_has_interrupts(struct kvm_vcpu *vcpu, u64 mask); 339 331 void kvm_riscv_vcpu_power_off(struct kvm_vcpu *vcpu); 340 332 void kvm_riscv_vcpu_power_on(struct kvm_vcpu *vcpu); 341 333
+7 -1
arch/riscv/include/asm/kvm_vcpu_sbi.h
··· 16 16 17 17 struct kvm_vcpu_sbi_context { 18 18 int return_handled; 19 + bool extension_disabled[KVM_RISCV_SBI_EXT_MAX]; 19 20 }; 20 21 21 22 struct kvm_vcpu_sbi_return { ··· 46 45 struct kvm_run *run, 47 46 u32 type, u64 flags); 48 47 int kvm_riscv_vcpu_sbi_return(struct kvm_vcpu *vcpu, struct kvm_run *run); 49 - const struct kvm_vcpu_sbi_extension *kvm_vcpu_sbi_find_ext(unsigned long extid); 48 + int kvm_riscv_vcpu_set_reg_sbi_ext(struct kvm_vcpu *vcpu, 49 + const struct kvm_one_reg *reg); 50 + int kvm_riscv_vcpu_get_reg_sbi_ext(struct kvm_vcpu *vcpu, 51 + const struct kvm_one_reg *reg); 52 + const struct kvm_vcpu_sbi_extension *kvm_vcpu_sbi_find_ext( 53 + struct kvm_vcpu *vcpu, unsigned long extid); 50 54 int kvm_riscv_vcpu_sbi_ecall(struct kvm_vcpu *vcpu, struct kvm_run *run); 51 55 52 56 #ifdef CONFIG_RISCV_SBI_V01
+50 -1
arch/riscv/include/uapi/asm/kvm.h
··· 12 12 #ifndef __ASSEMBLY__ 13 13 14 14 #include <linux/types.h> 15 + #include <asm/bitsperlong.h> 15 16 #include <asm/ptrace.h> 16 17 17 18 #define __KVM_HAVE_READONLY_MEM ··· 66 65 #define KVM_RISCV_MODE_S 1 67 66 #define KVM_RISCV_MODE_U 0 68 67 69 - /* CSR registers for KVM_GET_ONE_REG and KVM_SET_ONE_REG */ 68 + /* General CSR registers for KVM_GET_ONE_REG and KVM_SET_ONE_REG */ 70 69 struct kvm_riscv_csr { 71 70 unsigned long sstatus; 72 71 unsigned long sie; ··· 78 77 unsigned long sip; 79 78 unsigned long satp; 80 79 unsigned long scounteren; 80 + }; 81 + 82 + /* AIA CSR registers for KVM_GET_ONE_REG and KVM_SET_ONE_REG */ 83 + struct kvm_riscv_aia_csr { 84 + unsigned long siselect; 85 + unsigned long iprio1; 86 + unsigned long iprio2; 87 + unsigned long sieh; 88 + unsigned long siph; 89 + unsigned long iprio1h; 90 + unsigned long iprio2h; 81 91 }; 82 92 83 93 /* TIMER registers for KVM_GET_ONE_REG and KVM_SET_ONE_REG */ ··· 119 107 KVM_RISCV_ISA_EXT_ZIHINTPAUSE, 120 108 KVM_RISCV_ISA_EXT_ZICBOM, 121 109 KVM_RISCV_ISA_EXT_ZICBOZ, 110 + KVM_RISCV_ISA_EXT_ZBB, 111 + KVM_RISCV_ISA_EXT_SSAIA, 122 112 KVM_RISCV_ISA_EXT_MAX, 113 + }; 114 + 115 + /* 116 + * SBI extension IDs specific to KVM. This is not the same as the SBI 117 + * extension IDs defined by the RISC-V SBI specification. 118 + */ 119 + enum KVM_RISCV_SBI_EXT_ID { 120 + KVM_RISCV_SBI_EXT_V01 = 0, 121 + KVM_RISCV_SBI_EXT_TIME, 122 + KVM_RISCV_SBI_EXT_IPI, 123 + KVM_RISCV_SBI_EXT_RFENCE, 124 + KVM_RISCV_SBI_EXT_SRST, 125 + KVM_RISCV_SBI_EXT_HSM, 126 + KVM_RISCV_SBI_EXT_PMU, 127 + KVM_RISCV_SBI_EXT_EXPERIMENTAL, 128 + KVM_RISCV_SBI_EXT_VENDOR, 129 + KVM_RISCV_SBI_EXT_MAX, 123 130 }; 124 131 125 132 /* Possible states for kvm_riscv_timer */ ··· 151 120 /* If you need to interpret the index values, here is the key: */ 152 121 #define KVM_REG_RISCV_TYPE_MASK 0x00000000FF000000 153 122 #define KVM_REG_RISCV_TYPE_SHIFT 24 123 + #define KVM_REG_RISCV_SUBTYPE_MASK 0x0000000000FF0000 124 + #define KVM_REG_RISCV_SUBTYPE_SHIFT 16 154 125 155 126 /* Config registers are mapped as type 1 */ 156 127 #define KVM_REG_RISCV_CONFIG (0x01 << KVM_REG_RISCV_TYPE_SHIFT) ··· 166 133 167 134 /* Control and status registers are mapped as type 3 */ 168 135 #define KVM_REG_RISCV_CSR (0x03 << KVM_REG_RISCV_TYPE_SHIFT) 136 + #define KVM_REG_RISCV_CSR_GENERAL (0x0 << KVM_REG_RISCV_SUBTYPE_SHIFT) 137 + #define KVM_REG_RISCV_CSR_AIA (0x1 << KVM_REG_RISCV_SUBTYPE_SHIFT) 169 138 #define KVM_REG_RISCV_CSR_REG(name) \ 170 139 (offsetof(struct kvm_riscv_csr, name) / sizeof(unsigned long)) 140 + #define KVM_REG_RISCV_CSR_AIA_REG(name) \ 141 + (offsetof(struct kvm_riscv_aia_csr, name) / sizeof(unsigned long)) 171 142 172 143 /* Timer registers are mapped as type 4 */ 173 144 #define KVM_REG_RISCV_TIMER (0x04 << KVM_REG_RISCV_TYPE_SHIFT) ··· 190 153 191 154 /* ISA Extension registers are mapped as type 7 */ 192 155 #define KVM_REG_RISCV_ISA_EXT (0x07 << KVM_REG_RISCV_TYPE_SHIFT) 156 + 157 + /* SBI extension registers are mapped as type 8 */ 158 + #define KVM_REG_RISCV_SBI_EXT (0x08 << KVM_REG_RISCV_TYPE_SHIFT) 159 + #define KVM_REG_RISCV_SBI_SINGLE (0x0 << KVM_REG_RISCV_SUBTYPE_SHIFT) 160 + #define KVM_REG_RISCV_SBI_MULTI_EN (0x1 << KVM_REG_RISCV_SUBTYPE_SHIFT) 161 + #define KVM_REG_RISCV_SBI_MULTI_DIS (0x2 << KVM_REG_RISCV_SUBTYPE_SHIFT) 162 + #define KVM_REG_RISCV_SBI_MULTI_REG(__ext_id) \ 163 + ((__ext_id) / __BITS_PER_LONG) 164 + #define KVM_REG_RISCV_SBI_MULTI_MASK(__ext_id) \ 165 + (1UL << ((__ext_id) % __BITS_PER_LONG)) 166 + #define KVM_REG_RISCV_SBI_MULTI_REG_LAST \ 167 + KVM_REG_RISCV_SBI_MULTI_REG(KVM_RISCV_SBI_EXT_MAX - 1) 193 168 194 169 #endif 195 170
+2
arch/riscv/kernel/cpu.c
··· 185 185 __RISCV_ISA_EXT_DATA(zicboz, RISCV_ISA_EXT_ZICBOZ), 186 186 __RISCV_ISA_EXT_DATA(zihintpause, RISCV_ISA_EXT_ZIHINTPAUSE), 187 187 __RISCV_ISA_EXT_DATA(zbb, RISCV_ISA_EXT_ZBB), 188 + __RISCV_ISA_EXT_DATA(smaia, RISCV_ISA_EXT_SMAIA), 189 + __RISCV_ISA_EXT_DATA(ssaia, RISCV_ISA_EXT_SSAIA), 188 190 __RISCV_ISA_EXT_DATA(sscofpmf, RISCV_ISA_EXT_SSCOFPMF), 189 191 __RISCV_ISA_EXT_DATA(sstc, RISCV_ISA_EXT_SSTC), 190 192 __RISCV_ISA_EXT_DATA(svinval, RISCV_ISA_EXT_SVINVAL),
+2
arch/riscv/kernel/cpufeature.c
··· 228 228 } 229 229 } else { 230 230 /* sorted alphabetically */ 231 + SET_ISA_EXT_MAP("smaia", RISCV_ISA_EXT_SMAIA); 232 + SET_ISA_EXT_MAP("ssaia", RISCV_ISA_EXT_SSAIA); 231 233 SET_ISA_EXT_MAP("sscofpmf", RISCV_ISA_EXT_SSCOFPMF); 232 234 SET_ISA_EXT_MAP("sstc", RISCV_ISA_EXT_SSTC); 233 235 SET_ISA_EXT_MAP("svinval", RISCV_ISA_EXT_SVINVAL);
+5 -5
arch/riscv/kvm/Kconfig
··· 20 20 config KVM 21 21 tristate "Kernel-based Virtual Machine (KVM) support (EXPERIMENTAL)" 22 22 depends on RISCV_SBI && MMU 23 + select HAVE_KVM_EVENTFD 24 + select HAVE_KVM_VCPU_ASYNC_IOCTL 25 + select KVM_GENERIC_DIRTYLOG_READ_PROTECT 23 26 select KVM_GENERIC_HARDWARE_ENABLING 27 + select KVM_MMIO 28 + select KVM_XFER_TO_GUEST_WORK 24 29 select MMU_NOTIFIER 25 30 select PREEMPT_NOTIFIERS 26 - select KVM_MMIO 27 - select KVM_GENERIC_DIRTYLOG_READ_PROTECT 28 - select KVM_XFER_TO_GUEST_WORK 29 - select HAVE_KVM_VCPU_ASYNC_IOCTL 30 - select HAVE_KVM_EVENTFD 31 31 help 32 32 Support hosting virtualized guest machines. 33 33
+1
arch/riscv/kvm/Makefile
··· 26 26 kvm-y += vcpu_sbi_hsm.o 27 27 kvm-y += vcpu_timer.o 28 28 kvm-$(CONFIG_RISCV_PMU_SBI) += vcpu_pmu.o vcpu_sbi_pmu.o 29 + kvm-y += aia.o
+388
arch/riscv/kvm/aia.c
··· 1 + // SPDX-License-Identifier: GPL-2.0 2 + /* 3 + * Copyright (C) 2021 Western Digital Corporation or its affiliates. 4 + * Copyright (C) 2022 Ventana Micro Systems Inc. 5 + * 6 + * Authors: 7 + * Anup Patel <apatel@ventanamicro.com> 8 + */ 9 + 10 + #include <linux/kernel.h> 11 + #include <linux/kvm_host.h> 12 + #include <asm/hwcap.h> 13 + 14 + DEFINE_STATIC_KEY_FALSE(kvm_riscv_aia_available); 15 + 16 + static void aia_set_hvictl(bool ext_irq_pending) 17 + { 18 + unsigned long hvictl; 19 + 20 + /* 21 + * HVICTL.IID == 9 and HVICTL.IPRIO == 0 represents 22 + * no interrupt in HVICTL. 23 + */ 24 + 25 + hvictl = (IRQ_S_EXT << HVICTL_IID_SHIFT) & HVICTL_IID; 26 + hvictl |= ext_irq_pending; 27 + csr_write(CSR_HVICTL, hvictl); 28 + } 29 + 30 + #ifdef CONFIG_32BIT 31 + void kvm_riscv_vcpu_aia_flush_interrupts(struct kvm_vcpu *vcpu) 32 + { 33 + struct kvm_vcpu_aia_csr *csr = &vcpu->arch.aia_context.guest_csr; 34 + unsigned long mask, val; 35 + 36 + if (!kvm_riscv_aia_available()) 37 + return; 38 + 39 + if (READ_ONCE(vcpu->arch.irqs_pending_mask[1])) { 40 + mask = xchg_acquire(&vcpu->arch.irqs_pending_mask[1], 0); 41 + val = READ_ONCE(vcpu->arch.irqs_pending[1]) & mask; 42 + 43 + csr->hviph &= ~mask; 44 + csr->hviph |= val; 45 + } 46 + } 47 + 48 + void kvm_riscv_vcpu_aia_sync_interrupts(struct kvm_vcpu *vcpu) 49 + { 50 + struct kvm_vcpu_aia_csr *csr = &vcpu->arch.aia_context.guest_csr; 51 + 52 + if (kvm_riscv_aia_available()) 53 + csr->vsieh = csr_read(CSR_VSIEH); 54 + } 55 + #endif 56 + 57 + bool kvm_riscv_vcpu_aia_has_interrupts(struct kvm_vcpu *vcpu, u64 mask) 58 + { 59 + unsigned long seip; 60 + 61 + if (!kvm_riscv_aia_available()) 62 + return false; 63 + 64 + #ifdef CONFIG_32BIT 65 + if (READ_ONCE(vcpu->arch.irqs_pending[1]) & 66 + (vcpu->arch.aia_context.guest_csr.vsieh & upper_32_bits(mask))) 67 + return true; 68 + #endif 69 + 70 + seip = vcpu->arch.guest_csr.vsie; 71 + seip &= (unsigned long)mask; 72 + seip &= BIT(IRQ_S_EXT); 73 + 74 + if (!kvm_riscv_aia_initialized(vcpu->kvm) || !seip) 75 + return false; 76 + 77 + return false; 78 + } 79 + 80 + void kvm_riscv_vcpu_aia_update_hvip(struct kvm_vcpu *vcpu) 81 + { 82 + struct kvm_vcpu_csr *csr = &vcpu->arch.guest_csr; 83 + 84 + if (!kvm_riscv_aia_available()) 85 + return; 86 + 87 + #ifdef CONFIG_32BIT 88 + csr_write(CSR_HVIPH, vcpu->arch.aia_context.guest_csr.hviph); 89 + #endif 90 + aia_set_hvictl(!!(csr->hvip & BIT(IRQ_VS_EXT))); 91 + } 92 + 93 + void kvm_riscv_vcpu_aia_load(struct kvm_vcpu *vcpu, int cpu) 94 + { 95 + struct kvm_vcpu_aia_csr *csr = &vcpu->arch.aia_context.guest_csr; 96 + 97 + if (!kvm_riscv_aia_available()) 98 + return; 99 + 100 + csr_write(CSR_VSISELECT, csr->vsiselect); 101 + csr_write(CSR_HVIPRIO1, csr->hviprio1); 102 + csr_write(CSR_HVIPRIO2, csr->hviprio2); 103 + #ifdef CONFIG_32BIT 104 + csr_write(CSR_VSIEH, csr->vsieh); 105 + csr_write(CSR_HVIPH, csr->hviph); 106 + csr_write(CSR_HVIPRIO1H, csr->hviprio1h); 107 + csr_write(CSR_HVIPRIO2H, csr->hviprio2h); 108 + #endif 109 + } 110 + 111 + void kvm_riscv_vcpu_aia_put(struct kvm_vcpu *vcpu) 112 + { 113 + struct kvm_vcpu_aia_csr *csr = &vcpu->arch.aia_context.guest_csr; 114 + 115 + if (!kvm_riscv_aia_available()) 116 + return; 117 + 118 + csr->vsiselect = csr_read(CSR_VSISELECT); 119 + csr->hviprio1 = csr_read(CSR_HVIPRIO1); 120 + csr->hviprio2 = csr_read(CSR_HVIPRIO2); 121 + #ifdef CONFIG_32BIT 122 + csr->vsieh = csr_read(CSR_VSIEH); 123 + csr->hviph = csr_read(CSR_HVIPH); 124 + csr->hviprio1h = csr_read(CSR_HVIPRIO1H); 125 + csr->hviprio2h = csr_read(CSR_HVIPRIO2H); 126 + #endif 127 + } 128 + 129 + int kvm_riscv_vcpu_aia_get_csr(struct kvm_vcpu *vcpu, 130 + unsigned long reg_num, 131 + unsigned long *out_val) 132 + { 133 + struct kvm_vcpu_aia_csr *csr = &vcpu->arch.aia_context.guest_csr; 134 + 135 + if (reg_num >= sizeof(struct kvm_riscv_aia_csr) / sizeof(unsigned long)) 136 + return -EINVAL; 137 + 138 + *out_val = 0; 139 + if (kvm_riscv_aia_available()) 140 + *out_val = ((unsigned long *)csr)[reg_num]; 141 + 142 + return 0; 143 + } 144 + 145 + int kvm_riscv_vcpu_aia_set_csr(struct kvm_vcpu *vcpu, 146 + unsigned long reg_num, 147 + unsigned long val) 148 + { 149 + struct kvm_vcpu_aia_csr *csr = &vcpu->arch.aia_context.guest_csr; 150 + 151 + if (reg_num >= sizeof(struct kvm_riscv_aia_csr) / sizeof(unsigned long)) 152 + return -EINVAL; 153 + 154 + if (kvm_riscv_aia_available()) { 155 + ((unsigned long *)csr)[reg_num] = val; 156 + 157 + #ifdef CONFIG_32BIT 158 + if (reg_num == KVM_REG_RISCV_CSR_AIA_REG(siph)) 159 + WRITE_ONCE(vcpu->arch.irqs_pending_mask[1], 0); 160 + #endif 161 + } 162 + 163 + return 0; 164 + } 165 + 166 + int kvm_riscv_vcpu_aia_rmw_topei(struct kvm_vcpu *vcpu, 167 + unsigned int csr_num, 168 + unsigned long *val, 169 + unsigned long new_val, 170 + unsigned long wr_mask) 171 + { 172 + /* If AIA not available then redirect trap */ 173 + if (!kvm_riscv_aia_available()) 174 + return KVM_INSN_ILLEGAL_TRAP; 175 + 176 + /* If AIA not initialized then forward to user space */ 177 + if (!kvm_riscv_aia_initialized(vcpu->kvm)) 178 + return KVM_INSN_EXIT_TO_USER_SPACE; 179 + 180 + return kvm_riscv_vcpu_aia_imsic_rmw(vcpu, KVM_RISCV_AIA_IMSIC_TOPEI, 181 + val, new_val, wr_mask); 182 + } 183 + 184 + /* 185 + * External IRQ priority always read-only zero. This means default 186 + * priority order is always preferred for external IRQs unless 187 + * HVICTL.IID == 9 and HVICTL.IPRIO != 0 188 + */ 189 + static int aia_irq2bitpos[] = { 190 + 0, 8, -1, -1, 16, 24, -1, -1, /* 0 - 7 */ 191 + 32, -1, -1, -1, -1, 40, 48, 56, /* 8 - 15 */ 192 + 64, 72, 80, 88, 96, 104, 112, 120, /* 16 - 23 */ 193 + -1, -1, -1, -1, -1, -1, -1, -1, /* 24 - 31 */ 194 + -1, -1, -1, -1, -1, -1, -1, -1, /* 32 - 39 */ 195 + -1, -1, -1, -1, -1, -1, -1, -1, /* 40 - 47 */ 196 + -1, -1, -1, -1, -1, -1, -1, -1, /* 48 - 55 */ 197 + -1, -1, -1, -1, -1, -1, -1, -1, /* 56 - 63 */ 198 + }; 199 + 200 + static u8 aia_get_iprio8(struct kvm_vcpu *vcpu, unsigned int irq) 201 + { 202 + unsigned long hviprio; 203 + int bitpos = aia_irq2bitpos[irq]; 204 + 205 + if (bitpos < 0) 206 + return 0; 207 + 208 + switch (bitpos / BITS_PER_LONG) { 209 + case 0: 210 + hviprio = csr_read(CSR_HVIPRIO1); 211 + break; 212 + case 1: 213 + #ifndef CONFIG_32BIT 214 + hviprio = csr_read(CSR_HVIPRIO2); 215 + break; 216 + #else 217 + hviprio = csr_read(CSR_HVIPRIO1H); 218 + break; 219 + case 2: 220 + hviprio = csr_read(CSR_HVIPRIO2); 221 + break; 222 + case 3: 223 + hviprio = csr_read(CSR_HVIPRIO2H); 224 + break; 225 + #endif 226 + default: 227 + return 0; 228 + } 229 + 230 + return (hviprio >> (bitpos % BITS_PER_LONG)) & TOPI_IPRIO_MASK; 231 + } 232 + 233 + static void aia_set_iprio8(struct kvm_vcpu *vcpu, unsigned int irq, u8 prio) 234 + { 235 + unsigned long hviprio; 236 + int bitpos = aia_irq2bitpos[irq]; 237 + 238 + if (bitpos < 0) 239 + return; 240 + 241 + switch (bitpos / BITS_PER_LONG) { 242 + case 0: 243 + hviprio = csr_read(CSR_HVIPRIO1); 244 + break; 245 + case 1: 246 + #ifndef CONFIG_32BIT 247 + hviprio = csr_read(CSR_HVIPRIO2); 248 + break; 249 + #else 250 + hviprio = csr_read(CSR_HVIPRIO1H); 251 + break; 252 + case 2: 253 + hviprio = csr_read(CSR_HVIPRIO2); 254 + break; 255 + case 3: 256 + hviprio = csr_read(CSR_HVIPRIO2H); 257 + break; 258 + #endif 259 + default: 260 + return; 261 + } 262 + 263 + hviprio &= ~(TOPI_IPRIO_MASK << (bitpos % BITS_PER_LONG)); 264 + hviprio |= (unsigned long)prio << (bitpos % BITS_PER_LONG); 265 + 266 + switch (bitpos / BITS_PER_LONG) { 267 + case 0: 268 + csr_write(CSR_HVIPRIO1, hviprio); 269 + break; 270 + case 1: 271 + #ifndef CONFIG_32BIT 272 + csr_write(CSR_HVIPRIO2, hviprio); 273 + break; 274 + #else 275 + csr_write(CSR_HVIPRIO1H, hviprio); 276 + break; 277 + case 2: 278 + csr_write(CSR_HVIPRIO2, hviprio); 279 + break; 280 + case 3: 281 + csr_write(CSR_HVIPRIO2H, hviprio); 282 + break; 283 + #endif 284 + default: 285 + return; 286 + } 287 + } 288 + 289 + static int aia_rmw_iprio(struct kvm_vcpu *vcpu, unsigned int isel, 290 + unsigned long *val, unsigned long new_val, 291 + unsigned long wr_mask) 292 + { 293 + int i, first_irq, nirqs; 294 + unsigned long old_val; 295 + u8 prio; 296 + 297 + #ifndef CONFIG_32BIT 298 + if (isel & 0x1) 299 + return KVM_INSN_ILLEGAL_TRAP; 300 + #endif 301 + 302 + nirqs = 4 * (BITS_PER_LONG / 32); 303 + first_irq = (isel - ISELECT_IPRIO0) * 4; 304 + 305 + old_val = 0; 306 + for (i = 0; i < nirqs; i++) { 307 + prio = aia_get_iprio8(vcpu, first_irq + i); 308 + old_val |= (unsigned long)prio << (TOPI_IPRIO_BITS * i); 309 + } 310 + 311 + if (val) 312 + *val = old_val; 313 + 314 + if (wr_mask) { 315 + new_val = (old_val & ~wr_mask) | (new_val & wr_mask); 316 + for (i = 0; i < nirqs; i++) { 317 + prio = (new_val >> (TOPI_IPRIO_BITS * i)) & 318 + TOPI_IPRIO_MASK; 319 + aia_set_iprio8(vcpu, first_irq + i, prio); 320 + } 321 + } 322 + 323 + return KVM_INSN_CONTINUE_NEXT_SEPC; 324 + } 325 + 326 + #define IMSIC_FIRST 0x70 327 + #define IMSIC_LAST 0xff 328 + int kvm_riscv_vcpu_aia_rmw_ireg(struct kvm_vcpu *vcpu, unsigned int csr_num, 329 + unsigned long *val, unsigned long new_val, 330 + unsigned long wr_mask) 331 + { 332 + unsigned int isel; 333 + 334 + /* If AIA not available then redirect trap */ 335 + if (!kvm_riscv_aia_available()) 336 + return KVM_INSN_ILLEGAL_TRAP; 337 + 338 + /* First try to emulate in kernel space */ 339 + isel = csr_read(CSR_VSISELECT) & ISELECT_MASK; 340 + if (isel >= ISELECT_IPRIO0 && isel <= ISELECT_IPRIO15) 341 + return aia_rmw_iprio(vcpu, isel, val, new_val, wr_mask); 342 + else if (isel >= IMSIC_FIRST && isel <= IMSIC_LAST && 343 + kvm_riscv_aia_initialized(vcpu->kvm)) 344 + return kvm_riscv_vcpu_aia_imsic_rmw(vcpu, isel, val, new_val, 345 + wr_mask); 346 + 347 + /* We can't handle it here so redirect to user space */ 348 + return KVM_INSN_EXIT_TO_USER_SPACE; 349 + } 350 + 351 + void kvm_riscv_aia_enable(void) 352 + { 353 + if (!kvm_riscv_aia_available()) 354 + return; 355 + 356 + aia_set_hvictl(false); 357 + csr_write(CSR_HVIPRIO1, 0x0); 358 + csr_write(CSR_HVIPRIO2, 0x0); 359 + #ifdef CONFIG_32BIT 360 + csr_write(CSR_HVIPH, 0x0); 361 + csr_write(CSR_HIDELEGH, 0x0); 362 + csr_write(CSR_HVIPRIO1H, 0x0); 363 + csr_write(CSR_HVIPRIO2H, 0x0); 364 + #endif 365 + } 366 + 367 + void kvm_riscv_aia_disable(void) 368 + { 369 + if (!kvm_riscv_aia_available()) 370 + return; 371 + 372 + aia_set_hvictl(false); 373 + } 374 + 375 + int kvm_riscv_aia_init(void) 376 + { 377 + if (!riscv_isa_extension_available(NULL, SxAIA)) 378 + return -ENODEV; 379 + 380 + /* Enable KVM AIA support */ 381 + static_branch_enable(&kvm_riscv_aia_available); 382 + 383 + return 0; 384 + } 385 + 386 + void kvm_riscv_aia_exit(void) 387 + { 388 + }
+21 -1
arch/riscv/kvm/main.c
··· 44 44 45 45 csr_write(CSR_HVIP, 0); 46 46 47 + kvm_riscv_aia_enable(); 48 + 47 49 return 0; 48 50 } 49 51 50 52 void kvm_arch_hardware_disable(void) 51 53 { 54 + kvm_riscv_aia_disable(); 55 + 52 56 /* 53 57 * After clearing the hideleg CSR, the host kernel will receive 54 58 * spurious interrupts if hvip CSR has pending interrupts and the ··· 67 63 68 64 static int __init riscv_kvm_init(void) 69 65 { 66 + int rc; 70 67 const char *str; 71 68 72 69 if (!riscv_isa_extension_available(NULL, h)) { ··· 88 83 kvm_riscv_gstage_mode_detect(); 89 84 90 85 kvm_riscv_gstage_vmid_detect(); 86 + 87 + rc = kvm_riscv_aia_init(); 88 + if (rc && rc != -ENODEV) 89 + return rc; 91 90 92 91 kvm_info("hypervisor extension available\n"); 93 92 ··· 115 106 116 107 kvm_info("VMID %ld bits available\n", kvm_riscv_gstage_vmid_bits()); 117 108 118 - return kvm_init(sizeof(struct kvm_vcpu), 0, THIS_MODULE); 109 + if (kvm_riscv_aia_available()) 110 + kvm_info("AIA available\n"); 111 + 112 + rc = kvm_init(sizeof(struct kvm_vcpu), 0, THIS_MODULE); 113 + if (rc) { 114 + kvm_riscv_aia_exit(); 115 + return rc; 116 + } 117 + 118 + return 0; 119 119 } 120 120 module_init(riscv_kvm_init); 121 121 122 122 static void __exit riscv_kvm_exit(void) 123 123 { 124 + kvm_riscv_aia_exit(); 125 + 124 126 kvm_exit(); 125 127 } 126 128 module_exit(riscv_kvm_exit);
+17 -11
arch/riscv/kvm/mmu.c
··· 628 628 !(memslot->flags & KVM_MEM_READONLY)) ? true : false; 629 629 unsigned long vma_pagesize, mmu_seq; 630 630 631 + /* We need minimum second+third level pages */ 632 + ret = kvm_mmu_topup_memory_cache(pcache, gstage_pgd_levels); 633 + if (ret) { 634 + kvm_err("Failed to topup G-stage cache\n"); 635 + return ret; 636 + } 637 + 631 638 mmap_read_lock(current->mm); 632 639 633 640 vma = vma_lookup(current->mm, hva); ··· 655 648 if (vma_pagesize == PMD_SIZE || vma_pagesize == PUD_SIZE) 656 649 gfn = (gpa & huge_page_mask(hstate_vma(vma))) >> PAGE_SHIFT; 657 650 651 + /* 652 + * Read mmu_invalidate_seq so that KVM can detect if the results of 653 + * vma_lookup() or gfn_to_pfn_prot() become stale priort to acquiring 654 + * kvm->mmu_lock. 655 + * 656 + * Rely on mmap_read_unlock() for an implicit smp_rmb(), which pairs 657 + * with the smp_wmb() in kvm_mmu_invalidate_end(). 658 + */ 659 + mmu_seq = kvm->mmu_invalidate_seq; 658 660 mmap_read_unlock(current->mm); 659 661 660 662 if (vma_pagesize != PUD_SIZE && ··· 672 656 kvm_err("Invalid VMA page size 0x%lx\n", vma_pagesize); 673 657 return -EFAULT; 674 658 } 675 - 676 - /* We need minimum second+third level pages */ 677 - ret = kvm_mmu_topup_memory_cache(pcache, gstage_pgd_levels); 678 - if (ret) { 679 - kvm_err("Failed to topup G-stage cache\n"); 680 - return ret; 681 - } 682 - 683 - mmu_seq = kvm->mmu_invalidate_seq; 684 659 685 660 hfn = gfn_to_pfn_prot(kvm, gfn, is_write, &writable); 686 661 if (hfn == KVM_PFN_ERR_HWPOISON) { ··· 755 748 unsigned long hgatp = gstage_mode; 756 749 struct kvm_arch *k = &vcpu->kvm->arch; 757 750 758 - hgatp |= (READ_ONCE(k->vmid.vmid) << HGATP_VMID_SHIFT) & 759 - HGATP_VMID_MASK; 751 + hgatp |= (READ_ONCE(k->vmid.vmid) << HGATP_VMID_SHIFT) & HGATP_VMID; 760 752 hgatp |= (k->pgd_phys >> PAGE_SHIFT) & HGATP_PPN; 761 753 762 754 csr_write(CSR_HGATP, hgatp);
+155 -45
arch/riscv/kvm/vcpu.c
··· 58 58 [KVM_RISCV_ISA_EXT_I] = RISCV_ISA_EXT_i, 59 59 [KVM_RISCV_ISA_EXT_M] = RISCV_ISA_EXT_m, 60 60 61 + KVM_ISA_EXT_ARR(SSAIA), 61 62 KVM_ISA_EXT_ARR(SSTC), 62 63 KVM_ISA_EXT_ARR(SVINVAL), 63 64 KVM_ISA_EXT_ARR(SVPBMT), 65 + KVM_ISA_EXT_ARR(ZBB), 64 66 KVM_ISA_EXT_ARR(ZIHINTPAUSE), 65 67 KVM_ISA_EXT_ARR(ZICBOM), 66 68 KVM_ISA_EXT_ARR(ZICBOZ), ··· 99 97 case KVM_RISCV_ISA_EXT_C: 100 98 case KVM_RISCV_ISA_EXT_I: 101 99 case KVM_RISCV_ISA_EXT_M: 100 + case KVM_RISCV_ISA_EXT_SSAIA: 102 101 case KVM_RISCV_ISA_EXT_SSTC: 103 102 case KVM_RISCV_ISA_EXT_SVINVAL: 104 103 case KVM_RISCV_ISA_EXT_ZIHINTPAUSE: 104 + case KVM_RISCV_ISA_EXT_ZBB: 105 105 return false; 106 106 default: 107 107 break; ··· 140 136 141 137 kvm_riscv_vcpu_timer_reset(vcpu); 142 138 143 - WRITE_ONCE(vcpu->arch.irqs_pending, 0); 144 - WRITE_ONCE(vcpu->arch.irqs_pending_mask, 0); 139 + kvm_riscv_vcpu_aia_reset(vcpu); 140 + 141 + bitmap_zero(vcpu->arch.irqs_pending, KVM_RISCV_VCPU_NR_IRQS); 142 + bitmap_zero(vcpu->arch.irqs_pending_mask, KVM_RISCV_VCPU_NR_IRQS); 145 143 146 144 kvm_riscv_vcpu_pmu_reset(vcpu); 147 145 ··· 164 158 165 159 int kvm_arch_vcpu_create(struct kvm_vcpu *vcpu) 166 160 { 161 + int rc; 167 162 struct kvm_cpu_context *cntx; 168 163 struct kvm_vcpu_csr *reset_csr = &vcpu->arch.guest_reset_csr; 169 164 unsigned long host_isa, i; ··· 207 200 /* setup performance monitoring */ 208 201 kvm_riscv_vcpu_pmu_init(vcpu); 209 202 203 + /* Setup VCPU AIA */ 204 + rc = kvm_riscv_vcpu_aia_init(vcpu); 205 + if (rc) 206 + return rc; 207 + 210 208 /* Reset VCPU */ 211 209 kvm_riscv_reset_vcpu(vcpu); 212 210 ··· 231 219 232 220 void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu) 233 221 { 222 + /* Cleanup VCPU AIA context */ 223 + kvm_riscv_vcpu_aia_deinit(vcpu); 224 + 234 225 /* Cleanup VCPU timer */ 235 226 kvm_riscv_vcpu_timer_deinit(vcpu); 236 227 ··· 470 455 return 0; 471 456 } 472 457 473 - static int kvm_riscv_vcpu_get_reg_csr(struct kvm_vcpu *vcpu, 474 - const struct kvm_one_reg *reg) 458 + static int kvm_riscv_vcpu_general_get_csr(struct kvm_vcpu *vcpu, 459 + unsigned long reg_num, 460 + unsigned long *out_val) 475 461 { 476 462 struct kvm_vcpu_csr *csr = &vcpu->arch.guest_csr; 477 - unsigned long __user *uaddr = 478 - (unsigned long __user *)(unsigned long)reg->addr; 479 - unsigned long reg_num = reg->id & ~(KVM_REG_ARCH_MASK | 480 - KVM_REG_SIZE_MASK | 481 - KVM_REG_RISCV_CSR); 482 - unsigned long reg_val; 483 463 484 - if (KVM_REG_SIZE(reg->id) != sizeof(unsigned long)) 485 - return -EINVAL; 486 464 if (reg_num >= sizeof(struct kvm_riscv_csr) / sizeof(unsigned long)) 487 465 return -EINVAL; 488 466 489 467 if (reg_num == KVM_REG_RISCV_CSR_REG(sip)) { 490 468 kvm_riscv_vcpu_flush_interrupts(vcpu); 491 - reg_val = (csr->hvip >> VSIP_TO_HVIP_SHIFT) & VSIP_VALID_MASK; 469 + *out_val = (csr->hvip >> VSIP_TO_HVIP_SHIFT) & VSIP_VALID_MASK; 470 + *out_val |= csr->hvip & ~IRQ_LOCAL_MASK; 492 471 } else 493 - reg_val = ((unsigned long *)csr)[reg_num]; 472 + *out_val = ((unsigned long *)csr)[reg_num]; 473 + 474 + return 0; 475 + } 476 + 477 + static inline int kvm_riscv_vcpu_general_set_csr(struct kvm_vcpu *vcpu, 478 + unsigned long reg_num, 479 + unsigned long reg_val) 480 + { 481 + struct kvm_vcpu_csr *csr = &vcpu->arch.guest_csr; 482 + 483 + if (reg_num >= sizeof(struct kvm_riscv_csr) / sizeof(unsigned long)) 484 + return -EINVAL; 485 + 486 + if (reg_num == KVM_REG_RISCV_CSR_REG(sip)) { 487 + reg_val &= VSIP_VALID_MASK; 488 + reg_val <<= VSIP_TO_HVIP_SHIFT; 489 + } 490 + 491 + ((unsigned long *)csr)[reg_num] = reg_val; 492 + 493 + if (reg_num == KVM_REG_RISCV_CSR_REG(sip)) 494 + WRITE_ONCE(vcpu->arch.irqs_pending_mask[0], 0); 495 + 496 + return 0; 497 + } 498 + 499 + static int kvm_riscv_vcpu_get_reg_csr(struct kvm_vcpu *vcpu, 500 + const struct kvm_one_reg *reg) 501 + { 502 + int rc; 503 + unsigned long __user *uaddr = 504 + (unsigned long __user *)(unsigned long)reg->addr; 505 + unsigned long reg_num = reg->id & ~(KVM_REG_ARCH_MASK | 506 + KVM_REG_SIZE_MASK | 507 + KVM_REG_RISCV_CSR); 508 + unsigned long reg_val, reg_subtype; 509 + 510 + if (KVM_REG_SIZE(reg->id) != sizeof(unsigned long)) 511 + return -EINVAL; 512 + 513 + reg_subtype = reg_num & KVM_REG_RISCV_SUBTYPE_MASK; 514 + reg_num &= ~KVM_REG_RISCV_SUBTYPE_MASK; 515 + switch (reg_subtype) { 516 + case KVM_REG_RISCV_CSR_GENERAL: 517 + rc = kvm_riscv_vcpu_general_get_csr(vcpu, reg_num, &reg_val); 518 + break; 519 + case KVM_REG_RISCV_CSR_AIA: 520 + rc = kvm_riscv_vcpu_aia_get_csr(vcpu, reg_num, &reg_val); 521 + break; 522 + default: 523 + rc = -EINVAL; 524 + break; 525 + } 526 + if (rc) 527 + return rc; 494 528 495 529 if (copy_to_user(uaddr, &reg_val, KVM_REG_SIZE(reg->id))) 496 530 return -EFAULT; ··· 550 486 static int kvm_riscv_vcpu_set_reg_csr(struct kvm_vcpu *vcpu, 551 487 const struct kvm_one_reg *reg) 552 488 { 553 - struct kvm_vcpu_csr *csr = &vcpu->arch.guest_csr; 489 + int rc; 554 490 unsigned long __user *uaddr = 555 491 (unsigned long __user *)(unsigned long)reg->addr; 556 492 unsigned long reg_num = reg->id & ~(KVM_REG_ARCH_MASK | 557 493 KVM_REG_SIZE_MASK | 558 494 KVM_REG_RISCV_CSR); 559 - unsigned long reg_val; 495 + unsigned long reg_val, reg_subtype; 560 496 561 497 if (KVM_REG_SIZE(reg->id) != sizeof(unsigned long)) 562 - return -EINVAL; 563 - if (reg_num >= sizeof(struct kvm_riscv_csr) / sizeof(unsigned long)) 564 498 return -EINVAL; 565 499 566 500 if (copy_from_user(&reg_val, uaddr, KVM_REG_SIZE(reg->id))) 567 501 return -EFAULT; 568 502 569 - if (reg_num == KVM_REG_RISCV_CSR_REG(sip)) { 570 - reg_val &= VSIP_VALID_MASK; 571 - reg_val <<= VSIP_TO_HVIP_SHIFT; 503 + reg_subtype = reg_num & KVM_REG_RISCV_SUBTYPE_MASK; 504 + reg_num &= ~KVM_REG_RISCV_SUBTYPE_MASK; 505 + switch (reg_subtype) { 506 + case KVM_REG_RISCV_CSR_GENERAL: 507 + rc = kvm_riscv_vcpu_general_set_csr(vcpu, reg_num, reg_val); 508 + break; 509 + case KVM_REG_RISCV_CSR_AIA: 510 + rc = kvm_riscv_vcpu_aia_set_csr(vcpu, reg_num, reg_val); 511 + break; 512 + default: 513 + rc = -EINVAL; 514 + break; 572 515 } 573 - 574 - ((unsigned long *)csr)[reg_num] = reg_val; 575 - 576 - if (reg_num == KVM_REG_RISCV_CSR_REG(sip)) 577 - WRITE_ONCE(vcpu->arch.irqs_pending_mask, 0); 516 + if (rc) 517 + return rc; 578 518 579 519 return 0; 580 520 } ··· 677 609 KVM_REG_RISCV_FP_D); 678 610 case KVM_REG_RISCV_ISA_EXT: 679 611 return kvm_riscv_vcpu_set_reg_isa_ext(vcpu, reg); 612 + case KVM_REG_RISCV_SBI_EXT: 613 + return kvm_riscv_vcpu_set_reg_sbi_ext(vcpu, reg); 680 614 default: 681 615 break; 682 616 } ··· 706 636 KVM_REG_RISCV_FP_D); 707 637 case KVM_REG_RISCV_ISA_EXT: 708 638 return kvm_riscv_vcpu_get_reg_isa_ext(vcpu, reg); 639 + case KVM_REG_RISCV_SBI_EXT: 640 + return kvm_riscv_vcpu_get_reg_sbi_ext(vcpu, reg); 709 641 default: 710 642 break; 711 643 } ··· 808 736 struct kvm_vcpu_csr *csr = &vcpu->arch.guest_csr; 809 737 unsigned long mask, val; 810 738 811 - if (READ_ONCE(vcpu->arch.irqs_pending_mask)) { 812 - mask = xchg_acquire(&vcpu->arch.irqs_pending_mask, 0); 813 - val = READ_ONCE(vcpu->arch.irqs_pending) & mask; 739 + if (READ_ONCE(vcpu->arch.irqs_pending_mask[0])) { 740 + mask = xchg_acquire(&vcpu->arch.irqs_pending_mask[0], 0); 741 + val = READ_ONCE(vcpu->arch.irqs_pending[0]) & mask; 814 742 815 743 csr->hvip &= ~mask; 816 744 csr->hvip |= val; 817 745 } 746 + 747 + /* Flush AIA high interrupts */ 748 + kvm_riscv_vcpu_aia_flush_interrupts(vcpu); 818 749 } 819 750 820 751 void kvm_riscv_vcpu_sync_interrupts(struct kvm_vcpu *vcpu) ··· 834 759 if ((csr->hvip ^ hvip) & (1UL << IRQ_VS_SOFT)) { 835 760 if (hvip & (1UL << IRQ_VS_SOFT)) { 836 761 if (!test_and_set_bit(IRQ_VS_SOFT, 837 - &v->irqs_pending_mask)) 838 - set_bit(IRQ_VS_SOFT, &v->irqs_pending); 762 + v->irqs_pending_mask)) 763 + set_bit(IRQ_VS_SOFT, v->irqs_pending); 839 764 } else { 840 765 if (!test_and_set_bit(IRQ_VS_SOFT, 841 - &v->irqs_pending_mask)) 842 - clear_bit(IRQ_VS_SOFT, &v->irqs_pending); 766 + v->irqs_pending_mask)) 767 + clear_bit(IRQ_VS_SOFT, v->irqs_pending); 843 768 } 844 769 } 770 + 771 + /* Sync-up AIA high interrupts */ 772 + kvm_riscv_vcpu_aia_sync_interrupts(vcpu); 845 773 846 774 /* Sync-up timer CSRs */ 847 775 kvm_riscv_vcpu_timer_sync(vcpu); ··· 852 774 853 775 int kvm_riscv_vcpu_set_interrupt(struct kvm_vcpu *vcpu, unsigned int irq) 854 776 { 855 - if (irq != IRQ_VS_SOFT && 777 + /* 778 + * We only allow VS-mode software, timer, and external 779 + * interrupts when irq is one of the local interrupts 780 + * defined by RISC-V privilege specification. 781 + */ 782 + if (irq < IRQ_LOCAL_MAX && 783 + irq != IRQ_VS_SOFT && 856 784 irq != IRQ_VS_TIMER && 857 785 irq != IRQ_VS_EXT) 858 786 return -EINVAL; 859 787 860 - set_bit(irq, &vcpu->arch.irqs_pending); 788 + set_bit(irq, vcpu->arch.irqs_pending); 861 789 smp_mb__before_atomic(); 862 - set_bit(irq, &vcpu->arch.irqs_pending_mask); 790 + set_bit(irq, vcpu->arch.irqs_pending_mask); 863 791 864 792 kvm_vcpu_kick(vcpu); 865 793 ··· 874 790 875 791 int kvm_riscv_vcpu_unset_interrupt(struct kvm_vcpu *vcpu, unsigned int irq) 876 792 { 877 - if (irq != IRQ_VS_SOFT && 793 + /* 794 + * We only allow VS-mode software, timer, and external 795 + * interrupts when irq is one of the local interrupts 796 + * defined by RISC-V privilege specification. 797 + */ 798 + if (irq < IRQ_LOCAL_MAX && 799 + irq != IRQ_VS_SOFT && 878 800 irq != IRQ_VS_TIMER && 879 801 irq != IRQ_VS_EXT) 880 802 return -EINVAL; 881 803 882 - clear_bit(irq, &vcpu->arch.irqs_pending); 804 + clear_bit(irq, vcpu->arch.irqs_pending); 883 805 smp_mb__before_atomic(); 884 - set_bit(irq, &vcpu->arch.irqs_pending_mask); 806 + set_bit(irq, vcpu->arch.irqs_pending_mask); 885 807 886 808 return 0; 887 809 } 888 810 889 - bool kvm_riscv_vcpu_has_interrupts(struct kvm_vcpu *vcpu, unsigned long mask) 811 + bool kvm_riscv_vcpu_has_interrupts(struct kvm_vcpu *vcpu, u64 mask) 890 812 { 891 - unsigned long ie = ((vcpu->arch.guest_csr.vsie & VSIP_VALID_MASK) 892 - << VSIP_TO_HVIP_SHIFT) & mask; 813 + unsigned long ie; 893 814 894 - return (READ_ONCE(vcpu->arch.irqs_pending) & ie) ? true : false; 815 + ie = ((vcpu->arch.guest_csr.vsie & VSIP_VALID_MASK) 816 + << VSIP_TO_HVIP_SHIFT) & (unsigned long)mask; 817 + ie |= vcpu->arch.guest_csr.vsie & ~IRQ_LOCAL_MASK & 818 + (unsigned long)mask; 819 + if (READ_ONCE(vcpu->arch.irqs_pending[0]) & ie) 820 + return true; 821 + 822 + /* Check AIA high interrupts */ 823 + return kvm_riscv_vcpu_aia_has_interrupts(vcpu, mask); 895 824 } 896 825 897 826 void kvm_riscv_vcpu_power_off(struct kvm_vcpu *vcpu) ··· 1003 906 kvm_riscv_vcpu_guest_fp_restore(&vcpu->arch.guest_context, 1004 907 vcpu->arch.isa); 1005 908 909 + kvm_riscv_vcpu_aia_load(vcpu, cpu); 910 + 1006 911 vcpu->cpu = cpu; 1007 912 } 1008 913 ··· 1013 914 struct kvm_vcpu_csr *csr = &vcpu->arch.guest_csr; 1014 915 1015 916 vcpu->cpu = -1; 917 + 918 + kvm_riscv_vcpu_aia_put(vcpu); 1016 919 1017 920 kvm_riscv_vcpu_guest_fp_save(&vcpu->arch.guest_context, 1018 921 vcpu->arch.isa); ··· 1083 982 struct kvm_vcpu_csr *csr = &vcpu->arch.guest_csr; 1084 983 1085 984 csr_write(CSR_HVIP, csr->hvip); 985 + kvm_riscv_vcpu_aia_update_hvip(vcpu); 1086 986 } 1087 987 1088 988 /* ··· 1156 1054 1157 1055 kvm_riscv_check_vcpu_requests(vcpu); 1158 1056 1057 + preempt_disable(); 1058 + 1059 + /* Update AIA HW state before entering guest */ 1060 + ret = kvm_riscv_vcpu_aia_update(vcpu); 1061 + if (ret <= 0) { 1062 + preempt_enable(); 1063 + continue; 1064 + } 1065 + 1159 1066 local_irq_disable(); 1160 1067 1161 1068 /* ··· 1193 1082 xfer_to_guest_mode_work_pending()) { 1194 1083 vcpu->mode = OUTSIDE_GUEST_MODE; 1195 1084 local_irq_enable(); 1085 + preempt_enable(); 1196 1086 kvm_vcpu_srcu_read_lock(vcpu); 1197 1087 continue; 1198 1088 } ··· 1226 1114 1227 1115 /* Syncup interrupts state with HW */ 1228 1116 kvm_riscv_vcpu_sync_interrupts(vcpu); 1229 - 1230 - preempt_disable(); 1231 1117 1232 1118 /* 1233 1119 * We must ensure that any pending interrupts are taken before
+1
arch/riscv/kvm/vcpu_insn.c
··· 214 214 }; 215 215 216 216 static const struct csr_func csr_funcs[] = { 217 + KVM_RISCV_VCPU_AIA_CSR_FUNCS 217 218 KVM_RISCV_VCPU_HPMCOUNTER_CSR_FUNCS 218 219 }; 219 220
+230 -17
arch/riscv/kvm/vcpu_sbi.c
··· 30 30 }; 31 31 #endif 32 32 33 - static const struct kvm_vcpu_sbi_extension *sbi_ext[] = { 34 - &vcpu_sbi_ext_v01, 35 - &vcpu_sbi_ext_base, 36 - &vcpu_sbi_ext_time, 37 - &vcpu_sbi_ext_ipi, 38 - &vcpu_sbi_ext_rfence, 39 - &vcpu_sbi_ext_srst, 40 - &vcpu_sbi_ext_hsm, 41 - &vcpu_sbi_ext_pmu, 42 - &vcpu_sbi_ext_experimental, 43 - &vcpu_sbi_ext_vendor, 33 + struct kvm_riscv_sbi_extension_entry { 34 + enum KVM_RISCV_SBI_EXT_ID dis_idx; 35 + const struct kvm_vcpu_sbi_extension *ext_ptr; 36 + }; 37 + 38 + static const struct kvm_riscv_sbi_extension_entry sbi_ext[] = { 39 + { 40 + .dis_idx = KVM_RISCV_SBI_EXT_V01, 41 + .ext_ptr = &vcpu_sbi_ext_v01, 42 + }, 43 + { 44 + .dis_idx = KVM_RISCV_SBI_EXT_MAX, /* Can't be disabled */ 45 + .ext_ptr = &vcpu_sbi_ext_base, 46 + }, 47 + { 48 + .dis_idx = KVM_RISCV_SBI_EXT_TIME, 49 + .ext_ptr = &vcpu_sbi_ext_time, 50 + }, 51 + { 52 + .dis_idx = KVM_RISCV_SBI_EXT_IPI, 53 + .ext_ptr = &vcpu_sbi_ext_ipi, 54 + }, 55 + { 56 + .dis_idx = KVM_RISCV_SBI_EXT_RFENCE, 57 + .ext_ptr = &vcpu_sbi_ext_rfence, 58 + }, 59 + { 60 + .dis_idx = KVM_RISCV_SBI_EXT_SRST, 61 + .ext_ptr = &vcpu_sbi_ext_srst, 62 + }, 63 + { 64 + .dis_idx = KVM_RISCV_SBI_EXT_HSM, 65 + .ext_ptr = &vcpu_sbi_ext_hsm, 66 + }, 67 + { 68 + .dis_idx = KVM_RISCV_SBI_EXT_PMU, 69 + .ext_ptr = &vcpu_sbi_ext_pmu, 70 + }, 71 + { 72 + .dis_idx = KVM_RISCV_SBI_EXT_EXPERIMENTAL, 73 + .ext_ptr = &vcpu_sbi_ext_experimental, 74 + }, 75 + { 76 + .dis_idx = KVM_RISCV_SBI_EXT_VENDOR, 77 + .ext_ptr = &vcpu_sbi_ext_vendor, 78 + }, 44 79 }; 45 80 46 81 void kvm_riscv_vcpu_sbi_forward(struct kvm_vcpu *vcpu, struct kvm_run *run) ··· 134 99 return 0; 135 100 } 136 101 137 - const struct kvm_vcpu_sbi_extension *kvm_vcpu_sbi_find_ext(unsigned long extid) 102 + static int riscv_vcpu_set_sbi_ext_single(struct kvm_vcpu *vcpu, 103 + unsigned long reg_num, 104 + unsigned long reg_val) 138 105 { 139 - int i = 0; 106 + unsigned long i; 107 + const struct kvm_riscv_sbi_extension_entry *sext = NULL; 108 + struct kvm_vcpu_sbi_context *scontext = &vcpu->arch.sbi_context; 109 + 110 + if (reg_num >= KVM_RISCV_SBI_EXT_MAX || 111 + (reg_val != 1 && reg_val != 0)) 112 + return -EINVAL; 140 113 141 114 for (i = 0; i < ARRAY_SIZE(sbi_ext); i++) { 142 - if (sbi_ext[i]->extid_start <= extid && 143 - sbi_ext[i]->extid_end >= extid) 144 - return sbi_ext[i]; 115 + if (sbi_ext[i].dis_idx == reg_num) { 116 + sext = &sbi_ext[i]; 117 + break; 118 + } 119 + } 120 + if (!sext) 121 + return -ENOENT; 122 + 123 + scontext->extension_disabled[sext->dis_idx] = !reg_val; 124 + 125 + return 0; 126 + } 127 + 128 + static int riscv_vcpu_get_sbi_ext_single(struct kvm_vcpu *vcpu, 129 + unsigned long reg_num, 130 + unsigned long *reg_val) 131 + { 132 + unsigned long i; 133 + const struct kvm_riscv_sbi_extension_entry *sext = NULL; 134 + struct kvm_vcpu_sbi_context *scontext = &vcpu->arch.sbi_context; 135 + 136 + if (reg_num >= KVM_RISCV_SBI_EXT_MAX) 137 + return -EINVAL; 138 + 139 + for (i = 0; i < ARRAY_SIZE(sbi_ext); i++) { 140 + if (sbi_ext[i].dis_idx == reg_num) { 141 + sext = &sbi_ext[i]; 142 + break; 143 + } 144 + } 145 + if (!sext) 146 + return -ENOENT; 147 + 148 + *reg_val = !scontext->extension_disabled[sext->dis_idx]; 149 + 150 + return 0; 151 + } 152 + 153 + static int riscv_vcpu_set_sbi_ext_multi(struct kvm_vcpu *vcpu, 154 + unsigned long reg_num, 155 + unsigned long reg_val, bool enable) 156 + { 157 + unsigned long i, ext_id; 158 + 159 + if (reg_num > KVM_REG_RISCV_SBI_MULTI_REG_LAST) 160 + return -EINVAL; 161 + 162 + for_each_set_bit(i, &reg_val, BITS_PER_LONG) { 163 + ext_id = i + reg_num * BITS_PER_LONG; 164 + if (ext_id >= KVM_RISCV_SBI_EXT_MAX) 165 + break; 166 + 167 + riscv_vcpu_set_sbi_ext_single(vcpu, ext_id, enable); 168 + } 169 + 170 + return 0; 171 + } 172 + 173 + static int riscv_vcpu_get_sbi_ext_multi(struct kvm_vcpu *vcpu, 174 + unsigned long reg_num, 175 + unsigned long *reg_val) 176 + { 177 + unsigned long i, ext_id, ext_val; 178 + 179 + if (reg_num > KVM_REG_RISCV_SBI_MULTI_REG_LAST) 180 + return -EINVAL; 181 + 182 + for (i = 0; i < BITS_PER_LONG; i++) { 183 + ext_id = i + reg_num * BITS_PER_LONG; 184 + if (ext_id >= KVM_RISCV_SBI_EXT_MAX) 185 + break; 186 + 187 + ext_val = 0; 188 + riscv_vcpu_get_sbi_ext_single(vcpu, ext_id, &ext_val); 189 + if (ext_val) 190 + *reg_val |= KVM_REG_RISCV_SBI_MULTI_MASK(ext_id); 191 + } 192 + 193 + return 0; 194 + } 195 + 196 + int kvm_riscv_vcpu_set_reg_sbi_ext(struct kvm_vcpu *vcpu, 197 + const struct kvm_one_reg *reg) 198 + { 199 + unsigned long __user *uaddr = 200 + (unsigned long __user *)(unsigned long)reg->addr; 201 + unsigned long reg_num = reg->id & ~(KVM_REG_ARCH_MASK | 202 + KVM_REG_SIZE_MASK | 203 + KVM_REG_RISCV_SBI_EXT); 204 + unsigned long reg_val, reg_subtype; 205 + 206 + if (KVM_REG_SIZE(reg->id) != sizeof(unsigned long)) 207 + return -EINVAL; 208 + 209 + if (vcpu->arch.ran_atleast_once) 210 + return -EBUSY; 211 + 212 + reg_subtype = reg_num & KVM_REG_RISCV_SUBTYPE_MASK; 213 + reg_num &= ~KVM_REG_RISCV_SUBTYPE_MASK; 214 + 215 + if (copy_from_user(&reg_val, uaddr, KVM_REG_SIZE(reg->id))) 216 + return -EFAULT; 217 + 218 + switch (reg_subtype) { 219 + case KVM_REG_RISCV_SBI_SINGLE: 220 + return riscv_vcpu_set_sbi_ext_single(vcpu, reg_num, reg_val); 221 + case KVM_REG_RISCV_SBI_MULTI_EN: 222 + return riscv_vcpu_set_sbi_ext_multi(vcpu, reg_num, reg_val, true); 223 + case KVM_REG_RISCV_SBI_MULTI_DIS: 224 + return riscv_vcpu_set_sbi_ext_multi(vcpu, reg_num, reg_val, false); 225 + default: 226 + return -EINVAL; 227 + } 228 + 229 + return 0; 230 + } 231 + 232 + int kvm_riscv_vcpu_get_reg_sbi_ext(struct kvm_vcpu *vcpu, 233 + const struct kvm_one_reg *reg) 234 + { 235 + int rc; 236 + unsigned long __user *uaddr = 237 + (unsigned long __user *)(unsigned long)reg->addr; 238 + unsigned long reg_num = reg->id & ~(KVM_REG_ARCH_MASK | 239 + KVM_REG_SIZE_MASK | 240 + KVM_REG_RISCV_SBI_EXT); 241 + unsigned long reg_val, reg_subtype; 242 + 243 + if (KVM_REG_SIZE(reg->id) != sizeof(unsigned long)) 244 + return -EINVAL; 245 + 246 + reg_subtype = reg_num & KVM_REG_RISCV_SUBTYPE_MASK; 247 + reg_num &= ~KVM_REG_RISCV_SUBTYPE_MASK; 248 + 249 + reg_val = 0; 250 + switch (reg_subtype) { 251 + case KVM_REG_RISCV_SBI_SINGLE: 252 + rc = riscv_vcpu_get_sbi_ext_single(vcpu, reg_num, &reg_val); 253 + break; 254 + case KVM_REG_RISCV_SBI_MULTI_EN: 255 + case KVM_REG_RISCV_SBI_MULTI_DIS: 256 + rc = riscv_vcpu_get_sbi_ext_multi(vcpu, reg_num, &reg_val); 257 + if (!rc && reg_subtype == KVM_REG_RISCV_SBI_MULTI_DIS) 258 + reg_val = ~reg_val; 259 + break; 260 + default: 261 + rc = -EINVAL; 262 + } 263 + if (rc) 264 + return rc; 265 + 266 + if (copy_to_user(uaddr, &reg_val, KVM_REG_SIZE(reg->id))) 267 + return -EFAULT; 268 + 269 + return 0; 270 + } 271 + 272 + const struct kvm_vcpu_sbi_extension *kvm_vcpu_sbi_find_ext( 273 + struct kvm_vcpu *vcpu, unsigned long extid) 274 + { 275 + int i; 276 + const struct kvm_riscv_sbi_extension_entry *sext; 277 + struct kvm_vcpu_sbi_context *scontext = &vcpu->arch.sbi_context; 278 + 279 + for (i = 0; i < ARRAY_SIZE(sbi_ext); i++) { 280 + sext = &sbi_ext[i]; 281 + if (sext->ext_ptr->extid_start <= extid && 282 + sext->ext_ptr->extid_end >= extid) { 283 + if (sext->dis_idx < KVM_RISCV_SBI_EXT_MAX && 284 + scontext->extension_disabled[sext->dis_idx]) 285 + return NULL; 286 + return sbi_ext[i].ext_ptr; 287 + } 145 288 } 146 289 147 290 return NULL; ··· 339 126 }; 340 127 bool ext_is_v01 = false; 341 128 342 - sbi_ext = kvm_vcpu_sbi_find_ext(cp->a7); 129 + sbi_ext = kvm_vcpu_sbi_find_ext(vcpu, cp->a7); 343 130 if (sbi_ext && sbi_ext->handler) { 344 131 #ifdef CONFIG_RISCV_SBI_V01 345 132 if (cp->a7 >= SBI_EXT_0_1_SET_TIMER &&
+1 -1
arch/riscv/kvm/vcpu_sbi_base.c
··· 44 44 kvm_riscv_vcpu_sbi_forward(vcpu, run); 45 45 retdata->uexit = true; 46 46 } else { 47 - sbi_ext = kvm_vcpu_sbi_find_ext(cp->a0); 47 + sbi_ext = kvm_vcpu_sbi_find_ext(vcpu, cp->a0); 48 48 *out_val = sbi_ext && sbi_ext->probe ? 49 49 sbi_ext->probe(vcpu) : !!sbi_ext; 50 50 }
+4
arch/riscv/kvm/vm.c
··· 41 41 return r; 42 42 } 43 43 44 + kvm_riscv_aia_init_vm(kvm); 45 + 44 46 kvm_riscv_guest_timer_init(kvm); 45 47 46 48 return 0; ··· 51 49 void kvm_arch_destroy_vm(struct kvm *kvm) 52 50 { 53 51 kvm_destroy_vcpus(kvm); 52 + 53 + kvm_riscv_aia_destroy_vm(kvm); 54 54 } 55 55 56 56 int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
+2 -2
arch/riscv/kvm/vmid.c
··· 26 26 27 27 /* Figure-out number of VMID bits in HW */ 28 28 old = csr_read(CSR_HGATP); 29 - csr_write(CSR_HGATP, old | HGATP_VMID_MASK); 29 + csr_write(CSR_HGATP, old | HGATP_VMID); 30 30 vmid_bits = csr_read(CSR_HGATP); 31 - vmid_bits = (vmid_bits & HGATP_VMID_MASK) >> HGATP_VMID_SHIFT; 31 + vmid_bits = (vmid_bits & HGATP_VMID) >> HGATP_VMID_SHIFT; 32 32 vmid_bits = fls_long(vmid_bits); 33 33 csr_write(CSR_HGATP, old); 34 34
+11 -21
arch/s390/kernel/uv.c
··· 192 192 return res; 193 193 } 194 194 195 - static int make_secure_pte(pte_t *ptep, unsigned long addr, 196 - struct page *exp_page, struct uv_cb_header *uvcb) 195 + static int make_page_secure(struct page *page, struct uv_cb_header *uvcb) 197 196 { 198 - pte_t entry = READ_ONCE(*ptep); 199 - struct page *page; 200 197 int expected, cc = 0; 201 198 202 - if (!pte_present(entry)) 203 - return -ENXIO; 204 - if (pte_val(entry) & _PAGE_INVALID) 205 - return -ENXIO; 206 - 207 - page = pte_page(entry); 208 - if (page != exp_page) 209 - return -ENXIO; 210 199 if (PageWriteback(page)) 211 200 return -EAGAIN; 212 201 expected = expected_page_refs(page); ··· 293 304 goto out; 294 305 295 306 rc = -ENXIO; 296 - page = follow_page(vma, uaddr, FOLL_WRITE); 297 - if (IS_ERR_OR_NULL(page)) 298 - goto out; 299 - 300 - lock_page(page); 301 307 ptep = get_locked_pte(gmap->mm, uaddr, &ptelock); 302 - if (should_export_before_import(uvcb, gmap->mm)) 303 - uv_convert_from_secure(page_to_phys(page)); 304 - rc = make_secure_pte(ptep, uaddr, page, uvcb); 308 + if (pte_present(*ptep) && !(pte_val(*ptep) & _PAGE_INVALID) && pte_write(*ptep)) { 309 + page = pte_page(*ptep); 310 + rc = -EAGAIN; 311 + if (trylock_page(page)) { 312 + if (should_export_before_import(uvcb, gmap->mm)) 313 + uv_convert_from_secure(page_to_phys(page)); 314 + rc = make_page_secure(page, uvcb); 315 + unlock_page(page); 316 + } 317 + } 305 318 pte_unmap_unlock(ptep, ptelock); 306 - unlock_page(page); 307 319 out: 308 320 mmap_read_unlock(gmap->mm); 309 321
+5
arch/s390/kvm/pv.c
··· 314 314 */ 315 315 if (kvm->arch.pv.set_aside) 316 316 return -EINVAL; 317 + 318 + /* Guest with segment type ASCE, refuse to destroy asynchronously */ 319 + if ((kvm->arch.gmap->asce & _ASCE_TYPE_MASK) == _ASCE_TYPE_SEGMENT) 320 + return -EINVAL; 321 + 317 322 priv = kzalloc(sizeof(*priv), GFP_KERNEL); 318 323 if (!priv) 319 324 return -ENOMEM;
+7
arch/s390/mm/gmap.c
··· 2822 2822 * s390_replace_asce - Try to replace the current ASCE of a gmap with a copy 2823 2823 * @gmap: the gmap whose ASCE needs to be replaced 2824 2824 * 2825 + * If the ASCE is a SEGMENT type then this function will return -EINVAL, 2826 + * otherwise the pointers in the host_to_guest radix tree will keep pointing 2827 + * to the wrong pages, causing use-after-free and memory corruption. 2825 2828 * If the allocation of the new top level page table fails, the ASCE is not 2826 2829 * replaced. 2827 2830 * In any case, the old ASCE is always removed from the gmap CRST list. ··· 2838 2835 void *table; 2839 2836 2840 2837 s390_unlist_old_asce(gmap); 2838 + 2839 + /* Replacing segment type ASCEs would cause serious issues */ 2840 + if ((gmap->asce & _ASCE_TYPE_MASK) == _ASCE_TYPE_SEGMENT) 2841 + return -EINVAL; 2841 2842 2842 2843 page = alloc_pages(GFP_KERNEL_ACCOUNT, CRST_ALLOC_ORDER); 2843 2844 if (!page)
+56 -65
arch/x86/kvm/mmu/tdp_mmu.c
··· 40 40 41 41 void kvm_mmu_uninit_tdp_mmu(struct kvm *kvm) 42 42 { 43 - /* Also waits for any queued work items. */ 43 + /* 44 + * Invalidate all roots, which besides the obvious, schedules all roots 45 + * for zapping and thus puts the TDP MMU's reference to each root, i.e. 46 + * ultimately frees all roots. 47 + */ 48 + kvm_tdp_mmu_invalidate_all_roots(kvm); 49 + 50 + /* 51 + * Destroying a workqueue also first flushes the workqueue, i.e. no 52 + * need to invoke kvm_tdp_mmu_zap_invalidated_roots(). 53 + */ 44 54 destroy_workqueue(kvm->arch.tdp_mmu_zap_wq); 45 55 46 56 WARN_ON(atomic64_read(&kvm->arch.tdp_mmu_pages)); ··· 126 116 queue_work(kvm->arch.tdp_mmu_zap_wq, &root->tdp_mmu_async_work); 127 117 } 128 118 129 - static inline bool kvm_tdp_root_mark_invalid(struct kvm_mmu_page *page) 130 - { 131 - union kvm_mmu_page_role role = page->role; 132 - role.invalid = true; 133 - 134 - /* No need to use cmpxchg, only the invalid bit can change. */ 135 - role.word = xchg(&page->role.word, role.word); 136 - return role.invalid; 137 - } 138 - 139 119 void kvm_tdp_mmu_put_root(struct kvm *kvm, struct kvm_mmu_page *root, 140 120 bool shared) 141 121 { ··· 134 134 if (!refcount_dec_and_test(&root->tdp_mmu_root_count)) 135 135 return; 136 136 137 - WARN_ON(!is_tdp_mmu_page(root)); 138 - 139 137 /* 140 - * The root now has refcount=0. It is valid, but readers already 141 - * cannot acquire a reference to it because kvm_tdp_mmu_get_root() 142 - * rejects it. This remains true for the rest of the execution 143 - * of this function, because readers visit valid roots only 144 - * (except for tdp_mmu_zap_root_work(), which however 145 - * does not acquire any reference itself). 146 - * 147 - * Even though there are flows that need to visit all roots for 148 - * correctness, they all take mmu_lock for write, so they cannot yet 149 - * run concurrently. The same is true after kvm_tdp_root_mark_invalid, 150 - * since the root still has refcount=0. 151 - * 152 - * However, tdp_mmu_zap_root can yield, and writers do not expect to 153 - * see refcount=0 (see for example kvm_tdp_mmu_invalidate_all_roots()). 154 - * So the root temporarily gets an extra reference, going to refcount=1 155 - * while staying invalid. Readers still cannot acquire any reference; 156 - * but writers are now allowed to run if tdp_mmu_zap_root yields and 157 - * they might take an extra reference if they themselves yield. 158 - * Therefore, when the reference is given back by the worker, 159 - * there is no guarantee that the refcount is still 1. If not, whoever 160 - * puts the last reference will free the page, but they will not have to 161 - * zap the root because a root cannot go from invalid to valid. 138 + * The TDP MMU itself holds a reference to each root until the root is 139 + * explicitly invalidated, i.e. the final reference should be never be 140 + * put for a valid root. 162 141 */ 163 - if (!kvm_tdp_root_mark_invalid(root)) { 164 - refcount_set(&root->tdp_mmu_root_count, 1); 165 - 166 - /* 167 - * Zapping the root in a worker is not just "nice to have"; 168 - * it is required because kvm_tdp_mmu_invalidate_all_roots() 169 - * skips already-invalid roots. If kvm_tdp_mmu_put_root() did 170 - * not add the root to the workqueue, kvm_tdp_mmu_zap_all_fast() 171 - * might return with some roots not zapped yet. 172 - */ 173 - tdp_mmu_schedule_zap_root(kvm, root); 174 - return; 175 - } 142 + KVM_BUG_ON(!is_tdp_mmu_page(root) || !root->role.invalid, kvm); 176 143 177 144 spin_lock(&kvm->arch.tdp_mmu_pages_lock); 178 145 list_del_rcu(&root->link); ··· 287 320 root = tdp_mmu_alloc_sp(vcpu); 288 321 tdp_mmu_init_sp(root, NULL, 0, role); 289 322 290 - refcount_set(&root->tdp_mmu_root_count, 1); 323 + /* 324 + * TDP MMU roots are kept until they are explicitly invalidated, either 325 + * by a memslot update or by the destruction of the VM. Initialize the 326 + * refcount to two; one reference for the vCPU, and one reference for 327 + * the TDP MMU itself, which is held until the root is invalidated and 328 + * is ultimately put by tdp_mmu_zap_root_work(). 329 + */ 330 + refcount_set(&root->tdp_mmu_root_count, 2); 291 331 292 332 spin_lock(&kvm->arch.tdp_mmu_pages_lock); 293 333 list_add_rcu(&root->link, &kvm->arch.tdp_mmu_roots); ··· 920 946 /* 921 947 * Mark each TDP MMU root as invalid to prevent vCPUs from reusing a root that 922 948 * is about to be zapped, e.g. in response to a memslots update. The actual 923 - * zapping is performed asynchronously, so a reference is taken on all roots. 924 - * Using a separate workqueue makes it easy to ensure that the destruction is 925 - * performed before the "fast zap" completes, without keeping a separate list 926 - * of invalidated roots; the list is effectively the list of work items in 927 - * the workqueue. 949 + * zapping is performed asynchronously. Using a separate workqueue makes it 950 + * easy to ensure that the destruction is performed before the "fast zap" 951 + * completes, without keeping a separate list of invalidated roots; the list is 952 + * effectively the list of work items in the workqueue. 928 953 * 929 - * Get a reference even if the root is already invalid, the asynchronous worker 930 - * assumes it was gifted a reference to the root it processes. Because mmu_lock 931 - * is held for write, it should be impossible to observe a root with zero refcount, 932 - * i.e. the list of roots cannot be stale. 933 - * 934 - * This has essentially the same effect for the TDP MMU 935 - * as updating mmu_valid_gen does for the shadow MMU. 954 + * Note, the asynchronous worker is gifted the TDP MMU's reference. 955 + * See kvm_tdp_mmu_get_vcpu_root_hpa(). 936 956 */ 937 957 void kvm_tdp_mmu_invalidate_all_roots(struct kvm *kvm) 938 958 { 939 959 struct kvm_mmu_page *root; 940 960 941 - lockdep_assert_held_write(&kvm->mmu_lock); 942 - list_for_each_entry(root, &kvm->arch.tdp_mmu_roots, link) { 943 - if (!root->role.invalid && 944 - !WARN_ON_ONCE(!kvm_tdp_mmu_get_root(root))) { 961 + /* 962 + * mmu_lock must be held for write to ensure that a root doesn't become 963 + * invalid while there are active readers (invalidating a root while 964 + * there are active readers may or may not be problematic in practice, 965 + * but it's uncharted territory and not supported). 966 + * 967 + * Waive the assertion if there are no users of @kvm, i.e. the VM is 968 + * being destroyed after all references have been put, or if no vCPUs 969 + * have been created (which means there are no roots), i.e. the VM is 970 + * being destroyed in an error path of KVM_CREATE_VM. 971 + */ 972 + if (IS_ENABLED(CONFIG_PROVE_LOCKING) && 973 + refcount_read(&kvm->users_count) && kvm->created_vcpus) 974 + lockdep_assert_held_write(&kvm->mmu_lock); 975 + 976 + /* 977 + * As above, mmu_lock isn't held when destroying the VM! There can't 978 + * be other references to @kvm, i.e. nothing else can invalidate roots 979 + * or be consuming roots, but walking the list of roots does need to be 980 + * guarded against roots being deleted by the asynchronous zap worker. 981 + */ 982 + rcu_read_lock(); 983 + 984 + list_for_each_entry_rcu(root, &kvm->arch.tdp_mmu_roots, link) { 985 + if (!root->role.invalid) { 945 986 root->role.invalid = true; 946 987 tdp_mmu_schedule_zap_root(kvm, root); 947 988 } 948 989 } 990 + 991 + rcu_read_unlock(); 949 992 } 950 993 951 994 /*