Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux
1
fork

Configure Feed

Select the types of activity you want to include in your feed.

RISC-V: KVM: Implement VMID allocator

We implement a simple VMID allocator for Guests/VMs which:
1. Detects number of VMID bits at boot-time
2. Uses atomic number to track VMID version and increments
VMID version whenever we run-out of VMIDs
3. Flushes Guest TLBs on all host CPUs whenever we run-out
of VMIDs
4. Force updates HW Stage2 VMID for each Guest VCPU whenever
VMID changes using VCPU request KVM_REQ_UPDATE_HGATP

Signed-off-by: Anup Patel <anup.patel@wdc.com>
Acked-by: Paolo Bonzini <pbonzini@redhat.com>
Reviewed-by: Paolo Bonzini <pbonzini@redhat.com>
Reviewed-by: Alexander Graf <graf@amazon.com>
Acked-by: Palmer Dabbelt <palmerdabbelt@google.com>

authored by

Anup Patel and committed by
Anup Patel
fd7bb4a2 5a5d79ac

+249 -2
+24
arch/riscv/include/asm/kvm_host.h
··· 26 26 #define KVM_REQ_SLEEP \ 27 27 KVM_ARCH_REQ_FLAGS(0, KVM_REQUEST_WAIT | KVM_REQUEST_NO_WAKEUP) 28 28 #define KVM_REQ_VCPU_RESET KVM_ARCH_REQ(1) 29 + #define KVM_REQ_UPDATE_HGATP KVM_ARCH_REQ(2) 29 30 30 31 struct kvm_vm_stat { 31 32 struct kvm_vm_stat_generic generic; ··· 44 43 struct kvm_arch_memory_slot { 45 44 }; 46 45 46 + struct kvm_vmid { 47 + /* 48 + * Writes to vmid_version and vmid happen with vmid_lock held 49 + * whereas reads happen without any lock held. 50 + */ 51 + unsigned long vmid_version; 52 + unsigned long vmid; 53 + }; 54 + 47 55 struct kvm_arch { 56 + /* stage2 vmid */ 57 + struct kvm_vmid vmid; 58 + 48 59 /* stage2 page table */ 49 60 pgd_t *pgd; 50 61 phys_addr_t pgd_phys; ··· 186 173 static inline void kvm_arch_sched_in(struct kvm_vcpu *vcpu, int cpu) {} 187 174 static inline void kvm_arch_vcpu_block_finish(struct kvm_vcpu *vcpu) {} 188 175 176 + void __kvm_riscv_hfence_gvma_vmid_gpa(unsigned long gpa, unsigned long vmid); 177 + void __kvm_riscv_hfence_gvma_vmid(unsigned long vmid); 178 + void __kvm_riscv_hfence_gvma_gpa(unsigned long gpa); 179 + void __kvm_riscv_hfence_gvma_all(void); 180 + 189 181 int kvm_riscv_stage2_map(struct kvm_vcpu *vcpu, 190 182 struct kvm_memory_slot *memslot, 191 183 gpa_t gpa, unsigned long hva, bool is_write); ··· 198 180 int kvm_riscv_stage2_alloc_pgd(struct kvm *kvm); 199 181 void kvm_riscv_stage2_free_pgd(struct kvm *kvm); 200 182 void kvm_riscv_stage2_update_hgatp(struct kvm_vcpu *vcpu); 183 + 184 + void kvm_riscv_stage2_vmid_detect(void); 185 + unsigned long kvm_riscv_stage2_vmid_bits(void); 186 + int kvm_riscv_stage2_vmid_init(struct kvm *kvm); 187 + bool kvm_riscv_stage2_vmid_ver_changed(struct kvm_vmid *vmid); 188 + void kvm_riscv_stage2_vmid_update(struct kvm_vcpu *vcpu); 201 189 202 190 void __kvm_riscv_unpriv_trap(void); 203 191
+12 -2
arch/riscv/kvm/Makefile
··· 9 9 10 10 obj-$(CONFIG_KVM) += kvm.o 11 11 12 - kvm-y += $(KVM)/kvm_main.o $(KVM)/coalesced_mmio.o $(KVM)/binary_stats.o \ 13 - $(KVM)/eventfd.o main.o vm.o mmu.o vcpu.o vcpu_exit.o vcpu_switch.o 12 + kvm-y += $(KVM)/kvm_main.o 13 + kvm-y += $(KVM)/coalesced_mmio.o 14 + kvm-y += $(KVM)/binary_stats.o 15 + kvm-y += $(KVM)/eventfd.o 16 + kvm-y += main.o 17 + kvm-y += vm.o 18 + kvm-y += vmid.o 19 + kvm-y += tlb.o 20 + kvm-y += mmu.o 21 + kvm-y += vcpu.o 22 + kvm-y += vcpu_exit.o 23 + kvm-y += vcpu_switch.o
+4
arch/riscv/kvm/main.c
··· 79 79 return -ENODEV; 80 80 } 81 81 82 + kvm_riscv_stage2_vmid_detect(); 83 + 82 84 kvm_info("hypervisor extension available\n"); 85 + 86 + kvm_info("VMID %ld bits available\n", kvm_riscv_stage2_vmid_bits()); 83 87 84 88 return 0; 85 89 }
+74
arch/riscv/kvm/tlb.S
··· 1 + /* SPDX-License-Identifier: GPL-2.0 */ 2 + /* 3 + * Copyright (C) 2019 Western Digital Corporation or its affiliates. 4 + * 5 + * Authors: 6 + * Anup Patel <anup.patel@wdc.com> 7 + */ 8 + 9 + #include <linux/linkage.h> 10 + #include <asm/asm.h> 11 + 12 + .text 13 + .altmacro 14 + .option norelax 15 + 16 + /* 17 + * Instruction encoding of hfence.gvma is: 18 + * HFENCE.GVMA rs1, rs2 19 + * HFENCE.GVMA zero, rs2 20 + * HFENCE.GVMA rs1 21 + * HFENCE.GVMA 22 + * 23 + * rs1!=zero and rs2!=zero ==> HFENCE.GVMA rs1, rs2 24 + * rs1==zero and rs2!=zero ==> HFENCE.GVMA zero, rs2 25 + * rs1!=zero and rs2==zero ==> HFENCE.GVMA rs1 26 + * rs1==zero and rs2==zero ==> HFENCE.GVMA 27 + * 28 + * Instruction encoding of HFENCE.GVMA is: 29 + * 0110001 rs2(5) rs1(5) 000 00000 1110011 30 + */ 31 + 32 + ENTRY(__kvm_riscv_hfence_gvma_vmid_gpa) 33 + /* 34 + * rs1 = a0 (GPA) 35 + * rs2 = a1 (VMID) 36 + * HFENCE.GVMA a0, a1 37 + * 0110001 01011 01010 000 00000 1110011 38 + */ 39 + .word 0x62b50073 40 + ret 41 + ENDPROC(__kvm_riscv_hfence_gvma_vmid_gpa) 42 + 43 + ENTRY(__kvm_riscv_hfence_gvma_vmid) 44 + /* 45 + * rs1 = zero 46 + * rs2 = a0 (VMID) 47 + * HFENCE.GVMA zero, a0 48 + * 0110001 01010 00000 000 00000 1110011 49 + */ 50 + .word 0x62a00073 51 + ret 52 + ENDPROC(__kvm_riscv_hfence_gvma_vmid) 53 + 54 + ENTRY(__kvm_riscv_hfence_gvma_gpa) 55 + /* 56 + * rs1 = a0 (GPA) 57 + * rs2 = zero 58 + * HFENCE.GVMA a0 59 + * 0110001 00000 01010 000 00000 1110011 60 + */ 61 + .word 0x62050073 62 + ret 63 + ENDPROC(__kvm_riscv_hfence_gvma_gpa) 64 + 65 + ENTRY(__kvm_riscv_hfence_gvma_all) 66 + /* 67 + * rs1 = zero 68 + * rs2 = zero 69 + * HFENCE.GVMA 70 + * 0110001 00000 00000 000 00000 1110011 71 + */ 72 + .word 0x62000073 73 + ret 74 + ENDPROC(__kvm_riscv_hfence_gvma_all)
+9
arch/riscv/kvm/vcpu.c
··· 622 622 623 623 if (kvm_check_request(KVM_REQ_VCPU_RESET, vcpu)) 624 624 kvm_riscv_reset_vcpu(vcpu); 625 + 626 + if (kvm_check_request(KVM_REQ_UPDATE_HGATP, vcpu)) 627 + kvm_riscv_stage2_update_hgatp(vcpu); 628 + 629 + if (kvm_check_request(KVM_REQ_TLB_FLUSH, vcpu)) 630 + __kvm_riscv_hfence_gvma_all(); 625 631 } 626 632 } 627 633 ··· 673 667 /* Check conditions before entering the guest */ 674 668 cond_resched(); 675 669 670 + kvm_riscv_stage2_vmid_update(vcpu); 671 + 676 672 kvm_riscv_check_vcpu_requests(vcpu); 677 673 678 674 preempt_disable(); ··· 711 703 kvm_riscv_update_hvip(vcpu); 712 704 713 705 if (ret <= 0 || 706 + kvm_riscv_stage2_vmid_ver_changed(&vcpu->kvm->arch.vmid) || 714 707 kvm_request_pending(vcpu)) { 715 708 vcpu->mode = OUTSIDE_GUEST_MODE; 716 709 local_irq_enable();
+6
arch/riscv/kvm/vm.c
··· 41 41 if (r) 42 42 return r; 43 43 44 + r = kvm_riscv_stage2_vmid_init(kvm); 45 + if (r) { 46 + kvm_riscv_stage2_free_pgd(kvm); 47 + return r; 48 + } 49 + 44 50 return 0; 45 51 } 46 52
+120
arch/riscv/kvm/vmid.c
··· 1 + // SPDX-License-Identifier: GPL-2.0 2 + /* 3 + * Copyright (C) 2019 Western Digital Corporation or its affiliates. 4 + * 5 + * Authors: 6 + * Anup Patel <anup.patel@wdc.com> 7 + */ 8 + 9 + #include <linux/bitops.h> 10 + #include <linux/cpumask.h> 11 + #include <linux/errno.h> 12 + #include <linux/err.h> 13 + #include <linux/module.h> 14 + #include <linux/kvm_host.h> 15 + #include <asm/csr.h> 16 + #include <asm/sbi.h> 17 + 18 + static unsigned long vmid_version = 1; 19 + static unsigned long vmid_next; 20 + static unsigned long vmid_bits; 21 + static DEFINE_SPINLOCK(vmid_lock); 22 + 23 + void kvm_riscv_stage2_vmid_detect(void) 24 + { 25 + unsigned long old; 26 + 27 + /* Figure-out number of VMID bits in HW */ 28 + old = csr_read(CSR_HGATP); 29 + csr_write(CSR_HGATP, old | HGATP_VMID_MASK); 30 + vmid_bits = csr_read(CSR_HGATP); 31 + vmid_bits = (vmid_bits & HGATP_VMID_MASK) >> HGATP_VMID_SHIFT; 32 + vmid_bits = fls_long(vmid_bits); 33 + csr_write(CSR_HGATP, old); 34 + 35 + /* We polluted local TLB so flush all guest TLB */ 36 + __kvm_riscv_hfence_gvma_all(); 37 + 38 + /* We don't use VMID bits if they are not sufficient */ 39 + if ((1UL << vmid_bits) < num_possible_cpus()) 40 + vmid_bits = 0; 41 + } 42 + 43 + unsigned long kvm_riscv_stage2_vmid_bits(void) 44 + { 45 + return vmid_bits; 46 + } 47 + 48 + int kvm_riscv_stage2_vmid_init(struct kvm *kvm) 49 + { 50 + /* Mark the initial VMID and VMID version invalid */ 51 + kvm->arch.vmid.vmid_version = 0; 52 + kvm->arch.vmid.vmid = 0; 53 + 54 + return 0; 55 + } 56 + 57 + bool kvm_riscv_stage2_vmid_ver_changed(struct kvm_vmid *vmid) 58 + { 59 + if (!vmid_bits) 60 + return false; 61 + 62 + return unlikely(READ_ONCE(vmid->vmid_version) != 63 + READ_ONCE(vmid_version)); 64 + } 65 + 66 + void kvm_riscv_stage2_vmid_update(struct kvm_vcpu *vcpu) 67 + { 68 + int i; 69 + struct kvm_vcpu *v; 70 + struct cpumask hmask; 71 + struct kvm_vmid *vmid = &vcpu->kvm->arch.vmid; 72 + 73 + if (!kvm_riscv_stage2_vmid_ver_changed(vmid)) 74 + return; 75 + 76 + spin_lock(&vmid_lock); 77 + 78 + /* 79 + * We need to re-check the vmid_version here to ensure that if 80 + * another vcpu already allocated a valid vmid for this vm. 81 + */ 82 + if (!kvm_riscv_stage2_vmid_ver_changed(vmid)) { 83 + spin_unlock(&vmid_lock); 84 + return; 85 + } 86 + 87 + /* First user of a new VMID version? */ 88 + if (unlikely(vmid_next == 0)) { 89 + WRITE_ONCE(vmid_version, READ_ONCE(vmid_version) + 1); 90 + vmid_next = 1; 91 + 92 + /* 93 + * We ran out of VMIDs so we increment vmid_version and 94 + * start assigning VMIDs from 1. 95 + * 96 + * This also means existing VMIDs assignement to all Guest 97 + * instances is invalid and we have force VMID re-assignement 98 + * for all Guest instances. The Guest instances that were not 99 + * running will automatically pick-up new VMIDs because will 100 + * call kvm_riscv_stage2_vmid_update() whenever they enter 101 + * in-kernel run loop. For Guest instances that are already 102 + * running, we force VM exits on all host CPUs using IPI and 103 + * flush all Guest TLBs. 104 + */ 105 + riscv_cpuid_to_hartid_mask(cpu_online_mask, &hmask); 106 + sbi_remote_hfence_gvma(cpumask_bits(&hmask), 0, 0); 107 + } 108 + 109 + vmid->vmid = vmid_next; 110 + vmid_next++; 111 + vmid_next &= (1 << vmid_bits) - 1; 112 + 113 + WRITE_ONCE(vmid->vmid_version, READ_ONCE(vmid_version)); 114 + 115 + spin_unlock(&vmid_lock); 116 + 117 + /* Request stage2 page table update for all VCPUs */ 118 + kvm_for_each_vcpu(i, v, vcpu->kvm) 119 + kvm_make_request(KVM_REQ_UPDATE_HGATP, v); 120 + }