Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux
1
fork

Configure Feed

Select the types of activity you want to include in your feed.

Merge tag 'kvm-riscv-7.1-1' of https://github.com/kvm-riscv/linux into HEAD

KVM/riscv changes for 7.1

- Fix steal time shared memory alignment checks
- Fix vector context allocation leak
- Fix array out-of-bounds in pmu_ctr_read() and pmu_fw_ctr_read_hi()
- Fix double-free of sdata in kvm_pmu_clear_snapshot_area()
- Fix integer overflow in kvm_pmu_validate_counter_mask()
- Fix shift-out-of-bounds in make_xfence_request()
- Fix lost write protection on huge pages during dirty logging
- Split huge pages during fault handling for dirty logging
- Skip CSR restore if VCPU is reloaded on the same core
- Implement kvm_arch_has_default_irqchip() for KVM selftests
- Factored-out ISA checks into separate sources
- Added hideleg to struct kvm_vcpu_config
- Factored-out VCPU config into separate sources
- Support configuration of per-VM HGATP mode from KVM user space

+956 -501
+44 -7
arch/riscv/include/asm/kvm_gstage.h
··· 15 15 #define KVM_GSTAGE_FLAGS_LOCAL BIT(0) 16 16 unsigned long vmid; 17 17 pgd_t *pgd; 18 + unsigned long pgd_levels; 18 19 }; 19 20 20 21 struct kvm_gstage_mapping { ··· 30 29 #define kvm_riscv_gstage_index_bits 10 31 30 #endif 32 31 33 - extern unsigned long kvm_riscv_gstage_mode; 34 - extern unsigned long kvm_riscv_gstage_pgd_levels; 32 + extern unsigned long kvm_riscv_gstage_max_pgd_levels; 35 33 36 34 #define kvm_riscv_gstage_pgd_xbits 2 37 35 #define kvm_riscv_gstage_pgd_size (1UL << (HGATP_PAGE_SHIFT + kvm_riscv_gstage_pgd_xbits)) 38 - #define kvm_riscv_gstage_gpa_bits (HGATP_PAGE_SHIFT + \ 39 - (kvm_riscv_gstage_pgd_levels * \ 40 - kvm_riscv_gstage_index_bits) + \ 41 - kvm_riscv_gstage_pgd_xbits) 42 - #define kvm_riscv_gstage_gpa_size ((gpa_t)(1ULL << kvm_riscv_gstage_gpa_bits)) 36 + 37 + static inline unsigned long kvm_riscv_gstage_gpa_bits(unsigned long pgd_levels) 38 + { 39 + return (HGATP_PAGE_SHIFT + 40 + pgd_levels * kvm_riscv_gstage_index_bits + 41 + kvm_riscv_gstage_pgd_xbits); 42 + } 43 + 44 + static inline gpa_t kvm_riscv_gstage_gpa_size(unsigned long pgd_levels) 45 + { 46 + return BIT_ULL(kvm_riscv_gstage_gpa_bits(pgd_levels)); 47 + } 43 48 44 49 bool kvm_riscv_gstage_get_leaf(struct kvm_gstage *gstage, gpa_t addr, 45 50 pte_t **ptepp, u32 *ptep_level); ··· 59 52 gpa_t gpa, phys_addr_t hpa, unsigned long page_size, 60 53 bool page_rdonly, bool page_exec, 61 54 struct kvm_gstage_mapping *out_map); 55 + 56 + int kvm_riscv_gstage_split_huge(struct kvm_gstage *gstage, 57 + struct kvm_mmu_memory_cache *pcache, 58 + gpa_t addr, u32 target_level, bool flush); 62 59 63 60 enum kvm_riscv_gstage_op { 64 61 GSTAGE_OP_NOP = 0, /* Nothing */ ··· 79 68 void kvm_riscv_gstage_wp_range(struct kvm_gstage *gstage, gpa_t start, gpa_t end); 80 69 81 70 void kvm_riscv_gstage_mode_detect(void); 71 + 72 + static inline unsigned long kvm_riscv_gstage_mode(unsigned long pgd_levels) 73 + { 74 + switch (pgd_levels) { 75 + case 2: 76 + return HGATP_MODE_SV32X4; 77 + case 3: 78 + return HGATP_MODE_SV39X4; 79 + case 4: 80 + return HGATP_MODE_SV48X4; 81 + case 5: 82 + return HGATP_MODE_SV57X4; 83 + default: 84 + WARN_ON_ONCE(1); 85 + return HGATP_MODE_OFF; 86 + } 87 + } 88 + 89 + static inline void kvm_riscv_gstage_init(struct kvm_gstage *gstage, struct kvm *kvm) 90 + { 91 + gstage->kvm = kvm; 92 + gstage->flags = 0; 93 + gstage->vmid = READ_ONCE(kvm->arch.vmid.vmid); 94 + gstage->pgd = kvm->arch.pgd; 95 + gstage->pgd_levels = kvm->arch.pgd_levels; 96 + } 82 97 83 98 #endif
+5 -18
arch/riscv/include/asm/kvm_host.h
··· 18 18 #include <asm/ptrace.h> 19 19 #include <asm/kvm_tlb.h> 20 20 #include <asm/kvm_vmid.h> 21 + #include <asm/kvm_vcpu_config.h> 21 22 #include <asm/kvm_vcpu_fp.h> 22 23 #include <asm/kvm_vcpu_insn.h> 23 24 #include <asm/kvm_vcpu_sbi.h> ··· 47 46 #define KVM_REQ_STEAL_UPDATE KVM_ARCH_REQ(6) 48 47 49 48 #define __KVM_HAVE_ARCH_FLUSH_REMOTE_TLBS_RANGE 50 - 51 - #define KVM_HEDELEG_DEFAULT (BIT(EXC_INST_MISALIGNED) | \ 52 - BIT(EXC_INST_ILLEGAL) | \ 53 - BIT(EXC_BREAKPOINT) | \ 54 - BIT(EXC_SYSCALL) | \ 55 - BIT(EXC_INST_PAGE_FAULT) | \ 56 - BIT(EXC_LOAD_PAGE_FAULT) | \ 57 - BIT(EXC_STORE_PAGE_FAULT)) 58 - 59 - #define KVM_HIDELEG_DEFAULT (BIT(IRQ_VS_SOFT) | \ 60 - BIT(IRQ_VS_TIMER) | \ 61 - BIT(IRQ_VS_EXT)) 62 49 63 50 #define KVM_DIRTY_LOG_MANUAL_CAPS (KVM_DIRTY_LOG_MANUAL_PROTECT_ENABLE | \ 64 51 KVM_DIRTY_LOG_INITIALLY_SET) ··· 83 94 /* G-stage page table */ 84 95 pgd_t *pgd; 85 96 phys_addr_t pgd_phys; 97 + unsigned long pgd_levels; 86 98 87 99 /* Guest Timer */ 88 100 struct kvm_guest_timer timer; ··· 155 165 unsigned long vsatp; 156 166 unsigned long scounteren; 157 167 unsigned long senvcfg; 158 - }; 159 - 160 - struct kvm_vcpu_config { 161 - u64 henvcfg; 162 - u64 hstateen0; 163 - unsigned long hedeleg; 164 168 }; 165 169 166 170 struct kvm_vcpu_smstateen_csr { ··· 256 272 257 273 /* 'static' configurations which are set only once */ 258 274 struct kvm_vcpu_config cfg; 275 + 276 + /* Indicates modified guest CSRs */ 277 + bool csr_dirty; 259 278 260 279 /* SBI steal-time accounting */ 261 280 struct {
+20
arch/riscv/include/asm/kvm_isa.h
··· 1 + /* SPDX-License-Identifier: GPL-2.0-only */ 2 + /* 3 + * Copyright (c) 2026 Qualcomm Technologies, Inc. 4 + */ 5 + 6 + #ifndef __KVM_RISCV_ISA_H 7 + #define __KVM_RISCV_ISA_H 8 + 9 + #include <linux/types.h> 10 + 11 + unsigned long kvm_riscv_base2isa_ext(unsigned long base_ext); 12 + 13 + int __kvm_riscv_isa_check_host(unsigned long ext, unsigned long *base_ext); 14 + #define kvm_riscv_isa_check_host(ext) \ 15 + __kvm_riscv_isa_check_host(KVM_RISCV_ISA_EXT_##ext, NULL) 16 + 17 + bool kvm_riscv_isa_enable_allowed(unsigned long ext); 18 + bool kvm_riscv_isa_disable_allowed(unsigned long ext); 19 + 20 + #endif
+25
arch/riscv/include/asm/kvm_vcpu_config.h
··· 1 + /* SPDX-License-Identifier: GPL-2.0-only */ 2 + /* 3 + * Copyright (c) 2026 Qualcomm Technologies, Inc. 4 + */ 5 + 6 + #ifndef __KVM_VCPU_RISCV_CONFIG_H 7 + #define __KVM_VCPU_RISCV_CONFIG_H 8 + 9 + #include <linux/types.h> 10 + 11 + struct kvm_vcpu; 12 + 13 + struct kvm_vcpu_config { 14 + u64 henvcfg; 15 + u64 hstateen0; 16 + unsigned long hedeleg; 17 + unsigned long hideleg; 18 + }; 19 + 20 + void kvm_riscv_vcpu_config_init(struct kvm_vcpu *vcpu); 21 + void kvm_riscv_vcpu_config_guest_debug(struct kvm_vcpu *vcpu); 22 + void kvm_riscv_vcpu_config_ran_once(struct kvm_vcpu *vcpu); 23 + void kvm_riscv_vcpu_config_load(struct kvm_vcpu *vcpu); 24 + 25 + #endif
+4 -4
arch/riscv/include/uapi/asm/kvm.h
··· 110 110 __u64 state; 111 111 }; 112 112 113 + /* Possible states for kvm_riscv_timer */ 114 + #define KVM_RISCV_TIMER_STATE_OFF 0 115 + #define KVM_RISCV_TIMER_STATE_ON 1 116 + 113 117 /* 114 118 * ISA extension IDs specific to KVM. This is not the same as the host ISA 115 119 * extension IDs as that is internal to the host and should not be exposed ··· 241 237 struct kvm_riscv_sbi_fwft_feature misaligned_deleg; 242 238 struct kvm_riscv_sbi_fwft_feature pointer_masking; 243 239 }; 244 - 245 - /* Possible states for kvm_riscv_timer */ 246 - #define KVM_RISCV_TIMER_STATE_OFF 0 247 - #define KVM_RISCV_TIMER_STATE_ON 1 248 240 249 241 /* If you need to interpret the index values, here is the key: */ 250 242 #define KVM_REG_RISCV_TYPE_MASK 0x00000000FF000000
+2
arch/riscv/kvm/Makefile
··· 15 15 kvm-y += aia_device.o 16 16 kvm-y += aia_imsic.o 17 17 kvm-y += gstage.o 18 + kvm-y += isa.o 18 19 kvm-y += main.o 19 20 kvm-y += mmu.o 20 21 kvm-y += nacl.o 21 22 kvm-y += tlb.o 22 23 kvm-y += vcpu.o 24 + kvm-y += vcpu_config.o 23 25 kvm-y += vcpu_exit.o 24 26 kvm-y += vcpu_fp.o 25 27 kvm-y += vcpu_insn.o
+2 -2
arch/riscv/kvm/aia_device.c
··· 11 11 #include <linux/irqchip/riscv-imsic.h> 12 12 #include <linux/kvm_host.h> 13 13 #include <linux/uaccess.h> 14 - #include <linux/cpufeature.h> 14 + #include <asm/kvm_isa.h> 15 15 16 16 static int aia_create(struct kvm_device *dev, u32 type) 17 17 { ··· 23 23 if (irqchip_in_kernel(kvm)) 24 24 return -EEXIST; 25 25 26 - if (!riscv_isa_extension_available(NULL, SSAIA)) 26 + if (kvm_riscv_isa_check_host(SSAIA)) 27 27 return -ENODEV; 28 28 29 29 ret = -EBUSY;
+160 -38
arch/riscv/kvm/gstage.c
··· 12 12 #include <asm/kvm_gstage.h> 13 13 14 14 #ifdef CONFIG_64BIT 15 - unsigned long kvm_riscv_gstage_mode __ro_after_init = HGATP_MODE_SV39X4; 16 - unsigned long kvm_riscv_gstage_pgd_levels __ro_after_init = 3; 15 + unsigned long kvm_riscv_gstage_max_pgd_levels __ro_after_init = 3; 17 16 #else 18 - unsigned long kvm_riscv_gstage_mode __ro_after_init = HGATP_MODE_SV32X4; 19 - unsigned long kvm_riscv_gstage_pgd_levels __ro_after_init = 2; 17 + unsigned long kvm_riscv_gstage_max_pgd_levels __ro_after_init = 2; 20 18 #endif 21 19 22 20 #define gstage_pte_leaf(__ptep) \ 23 21 (pte_val(*(__ptep)) & (_PAGE_READ | _PAGE_WRITE | _PAGE_EXEC)) 24 22 25 - static inline unsigned long gstage_pte_index(gpa_t addr, u32 level) 23 + static inline unsigned long gstage_pte_index(struct kvm_gstage *gstage, 24 + gpa_t addr, u32 level) 26 25 { 27 26 unsigned long mask; 28 27 unsigned long shift = HGATP_PAGE_SHIFT + (kvm_riscv_gstage_index_bits * level); 29 28 30 - if (level == (kvm_riscv_gstage_pgd_levels - 1)) 29 + if (level == gstage->pgd_levels - 1) 31 30 mask = (PTRS_PER_PTE * (1UL << kvm_riscv_gstage_pgd_xbits)) - 1; 32 31 else 33 32 mask = PTRS_PER_PTE - 1; ··· 39 40 return (unsigned long)pfn_to_virt(__page_val_to_pfn(pte_val(pte))); 40 41 } 41 42 42 - static int gstage_page_size_to_level(unsigned long page_size, u32 *out_level) 43 + static int gstage_page_size_to_level(struct kvm_gstage *gstage, unsigned long page_size, 44 + u32 *out_level) 43 45 { 44 46 u32 i; 45 47 unsigned long psz = 1UL << 12; 46 48 47 - for (i = 0; i < kvm_riscv_gstage_pgd_levels; i++) { 49 + for (i = 0; i < gstage->pgd_levels; i++) { 48 50 if (page_size == (psz << (i * kvm_riscv_gstage_index_bits))) { 49 51 *out_level = i; 50 52 return 0; ··· 55 55 return -EINVAL; 56 56 } 57 57 58 - static int gstage_level_to_page_order(u32 level, unsigned long *out_pgorder) 58 + static int gstage_level_to_page_order(struct kvm_gstage *gstage, u32 level, 59 + unsigned long *out_pgorder) 59 60 { 60 - if (kvm_riscv_gstage_pgd_levels < level) 61 + if (gstage->pgd_levels < level) 61 62 return -EINVAL; 62 63 63 64 *out_pgorder = 12 + (level * kvm_riscv_gstage_index_bits); 64 65 return 0; 65 66 } 66 67 67 - static int gstage_level_to_page_size(u32 level, unsigned long *out_pgsize) 68 + static int gstage_level_to_page_size(struct kvm_gstage *gstage, u32 level, 69 + unsigned long *out_pgsize) 68 70 { 69 71 int rc; 70 72 unsigned long page_order = PAGE_SHIFT; 71 73 72 - rc = gstage_level_to_page_order(level, &page_order); 74 + rc = gstage_level_to_page_order(gstage, level, &page_order); 73 75 if (rc) 74 76 return rc; 75 77 ··· 83 81 pte_t **ptepp, u32 *ptep_level) 84 82 { 85 83 pte_t *ptep; 86 - u32 current_level = kvm_riscv_gstage_pgd_levels - 1; 84 + u32 current_level = gstage->pgd_levels - 1; 87 85 88 86 *ptep_level = current_level; 89 87 ptep = (pte_t *)gstage->pgd; 90 - ptep = &ptep[gstage_pte_index(addr, current_level)]; 88 + ptep = &ptep[gstage_pte_index(gstage, addr, current_level)]; 91 89 while (ptep && pte_val(ptep_get(ptep))) { 92 90 if (gstage_pte_leaf(ptep)) { 93 91 *ptep_level = current_level; ··· 99 97 current_level--; 100 98 *ptep_level = current_level; 101 99 ptep = (pte_t *)gstage_pte_page_vaddr(ptep_get(ptep)); 102 - ptep = &ptep[gstage_pte_index(addr, current_level)]; 100 + ptep = &ptep[gstage_pte_index(gstage, addr, current_level)]; 103 101 } else { 104 102 ptep = NULL; 105 103 } ··· 112 110 { 113 111 unsigned long order = PAGE_SHIFT; 114 112 115 - if (gstage_level_to_page_order(level, &order)) 113 + if (gstage_level_to_page_order(gstage, level, &order)) 116 114 return; 117 115 addr &= ~(BIT(order) - 1); 118 116 ··· 127 125 struct kvm_mmu_memory_cache *pcache, 128 126 const struct kvm_gstage_mapping *map) 129 127 { 130 - u32 current_level = kvm_riscv_gstage_pgd_levels - 1; 128 + u32 current_level = gstage->pgd_levels - 1; 131 129 pte_t *next_ptep = (pte_t *)gstage->pgd; 132 - pte_t *ptep = &next_ptep[gstage_pte_index(map->addr, current_level)]; 130 + pte_t *ptep = &next_ptep[gstage_pte_index(gstage, map->addr, current_level)]; 133 131 134 132 if (current_level < map->level) 135 133 return -EINVAL; ··· 153 151 } 154 152 155 153 current_level--; 156 - ptep = &next_ptep[gstage_pte_index(map->addr, current_level)]; 154 + ptep = &next_ptep[gstage_pte_index(gstage, map->addr, current_level)]; 157 155 } 158 156 159 157 if (pte_val(*ptep) != pte_val(map->pte)) { ··· 165 163 return 0; 166 164 } 167 165 166 + static void kvm_riscv_gstage_update_pte_prot(struct kvm_gstage *gstage, u32 level, 167 + gpa_t addr, pte_t *ptep, pgprot_t prot) 168 + { 169 + pte_t new_pte; 170 + 171 + if (pgprot_val(pte_pgprot(ptep_get(ptep))) == pgprot_val(prot)) 172 + return; 173 + 174 + new_pte = pfn_pte(pte_pfn(ptep_get(ptep)), prot); 175 + new_pte = pte_mkdirty(new_pte); 176 + 177 + set_pte(ptep, new_pte); 178 + 179 + gstage_tlb_flush(gstage, level, addr); 180 + } 181 + 168 182 int kvm_riscv_gstage_map_page(struct kvm_gstage *gstage, 169 183 struct kvm_mmu_memory_cache *pcache, 170 184 gpa_t gpa, phys_addr_t hpa, unsigned long page_size, 171 185 bool page_rdonly, bool page_exec, 172 186 struct kvm_gstage_mapping *out_map) 173 187 { 188 + bool found_leaf; 189 + u32 ptep_level; 174 190 pgprot_t prot; 191 + pte_t *ptep; 175 192 int ret; 176 193 177 194 out_map->addr = gpa; 178 195 out_map->level = 0; 179 196 180 - ret = gstage_page_size_to_level(page_size, &out_map->level); 197 + ret = gstage_page_size_to_level(gstage, page_size, &out_map->level); 181 198 if (ret) 182 199 return ret; 183 200 ··· 224 203 else 225 204 prot = PAGE_WRITE; 226 205 } 206 + 207 + found_leaf = kvm_riscv_gstage_get_leaf(gstage, gpa, &ptep, &ptep_level); 208 + if (found_leaf) { 209 + /* 210 + * ptep_level is the current gstage mapping level of addr, out_map->level 211 + * is the required mapping level during fault handling. 212 + * 213 + * 1) ptep_level > out_map->level 214 + * This happens when dirty logging is enabled and huge pages are used. 215 + * KVM must track the pages at 4K level, and split the huge mapping 216 + * into 4K mappings. 217 + * 218 + * 2) ptep_level < out_map->level 219 + * This happens when dirty logging is disabled and huge pages are used. 220 + * The gstage is split into 4K mappings, but the out_map level is now 221 + * back to the huge page level. Ignore the out_map level this time, and 222 + * just update the pte prot here. Otherwise, we would fall back to mapping 223 + * the gstage at huge page level in `kvm_riscv_gstage_set_pte`, with the 224 + * overhead of freeing the page tables(not support now), which would slow 225 + * down the vCPUs' performance. 226 + * 227 + * It is better to recover the huge page mapping in the ioctl context when 228 + * disabling dirty logging. 229 + * 230 + * 3) ptep_level == out_map->level 231 + * We already have the ptep, just update the pte prot if the pfn not change. 232 + * There is no need to invoke `kvm_riscv_gstage_set_pte` again. 233 + */ 234 + if (ptep_level > out_map->level) { 235 + kvm_riscv_gstage_split_huge(gstage, pcache, gpa, 236 + out_map->level, true); 237 + } else if (ALIGN_DOWN(PFN_PHYS(pte_pfn(ptep_get(ptep))), page_size) == hpa) { 238 + kvm_riscv_gstage_update_pte_prot(gstage, ptep_level, gpa, ptep, prot); 239 + return 0; 240 + } 241 + } 242 + 227 243 out_map->pte = pfn_pte(PFN_DOWN(hpa), prot); 228 244 out_map->pte = pte_mkdirty(out_map->pte); 229 245 230 246 return kvm_riscv_gstage_set_pte(gstage, pcache, out_map); 247 + } 248 + 249 + static inline unsigned long make_child_pte(unsigned long huge_pte, int index, 250 + unsigned long child_page_size) 251 + { 252 + unsigned long child_pte = huge_pte; 253 + unsigned long child_pfn_offset; 254 + 255 + /* 256 + * The child_pte already has the base address of the huge page being 257 + * split. So we just have to OR in the offset to the page at the next 258 + * lower level for the given index. 259 + */ 260 + child_pfn_offset = index * (child_page_size / PAGE_SIZE); 261 + child_pte |= pte_val(pfn_pte(child_pfn_offset, __pgprot(0))); 262 + 263 + return child_pte; 264 + } 265 + 266 + int kvm_riscv_gstage_split_huge(struct kvm_gstage *gstage, 267 + struct kvm_mmu_memory_cache *pcache, 268 + gpa_t addr, u32 target_level, bool flush) 269 + { 270 + u32 current_level = gstage->pgd_levels - 1; 271 + pte_t *next_ptep = (pte_t *)gstage->pgd; 272 + unsigned long huge_pte, child_pte; 273 + unsigned long child_page_size; 274 + pte_t *ptep; 275 + int i, ret; 276 + 277 + if (!pcache) 278 + return -ENOMEM; 279 + 280 + while(current_level > target_level) { 281 + ptep = (pte_t *)&next_ptep[gstage_pte_index(gstage, addr, current_level)]; 282 + 283 + if (!pte_val(ptep_get(ptep))) 284 + break; 285 + 286 + if (!gstage_pte_leaf(ptep)) { 287 + next_ptep = (pte_t *)gstage_pte_page_vaddr(ptep_get(ptep)); 288 + current_level--; 289 + continue; 290 + } 291 + 292 + huge_pte = pte_val(ptep_get(ptep)); 293 + 294 + ret = gstage_level_to_page_size(gstage, current_level - 1, &child_page_size); 295 + if (ret) 296 + return ret; 297 + 298 + next_ptep = kvm_mmu_memory_cache_alloc(pcache); 299 + if (!next_ptep) 300 + return -ENOMEM; 301 + 302 + for (i = 0; i < PTRS_PER_PTE; i++) { 303 + child_pte = make_child_pte(huge_pte, i, child_page_size); 304 + set_pte((pte_t *)&next_ptep[i], __pte(child_pte)); 305 + } 306 + 307 + set_pte(ptep, pfn_pte(PFN_DOWN(__pa(next_ptep)), 308 + __pgprot(_PAGE_TABLE))); 309 + 310 + if (flush) 311 + gstage_tlb_flush(gstage, current_level, addr); 312 + 313 + current_level--; 314 + } 315 + 316 + return 0; 231 317 } 232 318 233 319 void kvm_riscv_gstage_op_pte(struct kvm_gstage *gstage, gpa_t addr, ··· 345 217 u32 next_ptep_level; 346 218 unsigned long next_page_size, page_size; 347 219 348 - ret = gstage_level_to_page_size(ptep_level, &page_size); 220 + ret = gstage_level_to_page_size(gstage, ptep_level, &page_size); 349 221 if (ret) 350 222 return; 351 223 ··· 357 229 if (ptep_level && !gstage_pte_leaf(ptep)) { 358 230 next_ptep = (pte_t *)gstage_pte_page_vaddr(ptep_get(ptep)); 359 231 next_ptep_level = ptep_level - 1; 360 - ret = gstage_level_to_page_size(next_ptep_level, &next_page_size); 232 + ret = gstage_level_to_page_size(gstage, next_ptep_level, &next_page_size); 361 233 if (ret) 362 234 return; 363 235 ··· 391 263 392 264 while (addr < end) { 393 265 found_leaf = kvm_riscv_gstage_get_leaf(gstage, addr, &ptep, &ptep_level); 394 - ret = gstage_level_to_page_size(ptep_level, &page_size); 266 + ret = gstage_level_to_page_size(gstage, ptep_level, &page_size); 395 267 if (ret) 396 268 break; 397 269 ··· 425 297 426 298 while (addr < end) { 427 299 found_leaf = kvm_riscv_gstage_get_leaf(gstage, addr, &ptep, &ptep_level); 428 - ret = gstage_level_to_page_size(ptep_level, &page_size); 300 + ret = gstage_level_to_page_size(gstage, ptep_level, &page_size); 429 301 if (ret) 430 302 break; 431 303 432 304 if (!found_leaf) 433 305 goto next; 434 306 435 - if (!(addr & (page_size - 1)) && ((end - addr) >= page_size)) 436 - kvm_riscv_gstage_op_pte(gstage, addr, ptep, 437 - ptep_level, GSTAGE_OP_WP); 438 - 307 + addr = ALIGN_DOWN(addr, page_size); 308 + kvm_riscv_gstage_op_pte(gstage, addr, ptep, 309 + ptep_level, GSTAGE_OP_WP); 439 310 next: 440 311 addr += page_size; 441 312 } ··· 446 319 /* Try Sv57x4 G-stage mode */ 447 320 csr_write(CSR_HGATP, HGATP_MODE_SV57X4 << HGATP_MODE_SHIFT); 448 321 if ((csr_read(CSR_HGATP) >> HGATP_MODE_SHIFT) == HGATP_MODE_SV57X4) { 449 - kvm_riscv_gstage_mode = HGATP_MODE_SV57X4; 450 - kvm_riscv_gstage_pgd_levels = 5; 322 + kvm_riscv_gstage_max_pgd_levels = 5; 451 323 goto done; 452 324 } 453 325 454 326 /* Try Sv48x4 G-stage mode */ 455 327 csr_write(CSR_HGATP, HGATP_MODE_SV48X4 << HGATP_MODE_SHIFT); 456 328 if ((csr_read(CSR_HGATP) >> HGATP_MODE_SHIFT) == HGATP_MODE_SV48X4) { 457 - kvm_riscv_gstage_mode = HGATP_MODE_SV48X4; 458 - kvm_riscv_gstage_pgd_levels = 4; 329 + kvm_riscv_gstage_max_pgd_levels = 4; 459 330 goto done; 460 331 } 461 332 462 333 /* Try Sv39x4 G-stage mode */ 463 334 csr_write(CSR_HGATP, HGATP_MODE_SV39X4 << HGATP_MODE_SHIFT); 464 335 if ((csr_read(CSR_HGATP) >> HGATP_MODE_SHIFT) == HGATP_MODE_SV39X4) { 465 - kvm_riscv_gstage_mode = HGATP_MODE_SV39X4; 466 - kvm_riscv_gstage_pgd_levels = 3; 336 + kvm_riscv_gstage_max_pgd_levels = 3; 467 337 goto done; 468 338 } 469 339 #else /* CONFIG_32BIT */ 470 340 /* Try Sv32x4 G-stage mode */ 471 341 csr_write(CSR_HGATP, HGATP_MODE_SV32X4 << HGATP_MODE_SHIFT); 472 342 if ((csr_read(CSR_HGATP) >> HGATP_MODE_SHIFT) == HGATP_MODE_SV32X4) { 473 - kvm_riscv_gstage_mode = HGATP_MODE_SV32X4; 474 - kvm_riscv_gstage_pgd_levels = 2; 343 + kvm_riscv_gstage_max_pgd_levels = 2; 475 344 goto done; 476 345 } 477 346 #endif 478 347 479 348 /* KVM depends on !HGATP_MODE_OFF */ 480 - kvm_riscv_gstage_mode = HGATP_MODE_OFF; 481 - kvm_riscv_gstage_pgd_levels = 0; 349 + kvm_riscv_gstage_max_pgd_levels = 0; 482 350 483 351 done: 484 352 csr_write(CSR_HGATP, 0);
+253
arch/riscv/kvm/isa.c
··· 1 + // SPDX-License-Identifier: GPL-2.0 2 + /* 3 + * Copyright (c) 2026 Qualcomm Technologies, Inc. 4 + */ 5 + 6 + #include <linux/cpufeature.h> 7 + #include <linux/errno.h> 8 + #include <linux/kvm_host.h> 9 + #include <linux/nospec.h> 10 + #include <linux/pgtable.h> 11 + #include <asm/kvm_isa.h> 12 + #include <asm/vector.h> 13 + 14 + #define KVM_ISA_EXT_ARR(ext) \ 15 + [KVM_RISCV_ISA_EXT_##ext] = RISCV_ISA_EXT_##ext 16 + 17 + /* Mapping between KVM ISA Extension ID & guest ISA extension ID */ 18 + static const unsigned long kvm_isa_ext_arr[] = { 19 + /* Single letter extensions (alphabetically sorted) */ 20 + [KVM_RISCV_ISA_EXT_A] = RISCV_ISA_EXT_a, 21 + [KVM_RISCV_ISA_EXT_C] = RISCV_ISA_EXT_c, 22 + [KVM_RISCV_ISA_EXT_D] = RISCV_ISA_EXT_d, 23 + [KVM_RISCV_ISA_EXT_F] = RISCV_ISA_EXT_f, 24 + [KVM_RISCV_ISA_EXT_H] = RISCV_ISA_EXT_h, 25 + [KVM_RISCV_ISA_EXT_I] = RISCV_ISA_EXT_i, 26 + [KVM_RISCV_ISA_EXT_M] = RISCV_ISA_EXT_m, 27 + [KVM_RISCV_ISA_EXT_V] = RISCV_ISA_EXT_v, 28 + /* Multi letter extensions (alphabetically sorted) */ 29 + KVM_ISA_EXT_ARR(SMNPM), 30 + KVM_ISA_EXT_ARR(SMSTATEEN), 31 + KVM_ISA_EXT_ARR(SSAIA), 32 + KVM_ISA_EXT_ARR(SSCOFPMF), 33 + KVM_ISA_EXT_ARR(SSNPM), 34 + KVM_ISA_EXT_ARR(SSTC), 35 + KVM_ISA_EXT_ARR(SVADE), 36 + KVM_ISA_EXT_ARR(SVADU), 37 + KVM_ISA_EXT_ARR(SVINVAL), 38 + KVM_ISA_EXT_ARR(SVNAPOT), 39 + KVM_ISA_EXT_ARR(SVPBMT), 40 + KVM_ISA_EXT_ARR(SVVPTC), 41 + KVM_ISA_EXT_ARR(ZAAMO), 42 + KVM_ISA_EXT_ARR(ZABHA), 43 + KVM_ISA_EXT_ARR(ZACAS), 44 + KVM_ISA_EXT_ARR(ZALASR), 45 + KVM_ISA_EXT_ARR(ZALRSC), 46 + KVM_ISA_EXT_ARR(ZAWRS), 47 + KVM_ISA_EXT_ARR(ZBA), 48 + KVM_ISA_EXT_ARR(ZBB), 49 + KVM_ISA_EXT_ARR(ZBC), 50 + KVM_ISA_EXT_ARR(ZBKB), 51 + KVM_ISA_EXT_ARR(ZBKC), 52 + KVM_ISA_EXT_ARR(ZBKX), 53 + KVM_ISA_EXT_ARR(ZBS), 54 + KVM_ISA_EXT_ARR(ZCA), 55 + KVM_ISA_EXT_ARR(ZCB), 56 + KVM_ISA_EXT_ARR(ZCD), 57 + KVM_ISA_EXT_ARR(ZCF), 58 + KVM_ISA_EXT_ARR(ZCLSD), 59 + KVM_ISA_EXT_ARR(ZCMOP), 60 + KVM_ISA_EXT_ARR(ZFA), 61 + KVM_ISA_EXT_ARR(ZFBFMIN), 62 + KVM_ISA_EXT_ARR(ZFH), 63 + KVM_ISA_EXT_ARR(ZFHMIN), 64 + KVM_ISA_EXT_ARR(ZICBOM), 65 + KVM_ISA_EXT_ARR(ZICBOP), 66 + KVM_ISA_EXT_ARR(ZICBOZ), 67 + KVM_ISA_EXT_ARR(ZICCRSE), 68 + KVM_ISA_EXT_ARR(ZICNTR), 69 + KVM_ISA_EXT_ARR(ZICOND), 70 + KVM_ISA_EXT_ARR(ZICSR), 71 + KVM_ISA_EXT_ARR(ZIFENCEI), 72 + KVM_ISA_EXT_ARR(ZIHINTNTL), 73 + KVM_ISA_EXT_ARR(ZIHINTPAUSE), 74 + KVM_ISA_EXT_ARR(ZIHPM), 75 + KVM_ISA_EXT_ARR(ZILSD), 76 + KVM_ISA_EXT_ARR(ZIMOP), 77 + KVM_ISA_EXT_ARR(ZKND), 78 + KVM_ISA_EXT_ARR(ZKNE), 79 + KVM_ISA_EXT_ARR(ZKNH), 80 + KVM_ISA_EXT_ARR(ZKR), 81 + KVM_ISA_EXT_ARR(ZKSED), 82 + KVM_ISA_EXT_ARR(ZKSH), 83 + KVM_ISA_EXT_ARR(ZKT), 84 + KVM_ISA_EXT_ARR(ZTSO), 85 + KVM_ISA_EXT_ARR(ZVBB), 86 + KVM_ISA_EXT_ARR(ZVBC), 87 + KVM_ISA_EXT_ARR(ZVFBFMIN), 88 + KVM_ISA_EXT_ARR(ZVFBFWMA), 89 + KVM_ISA_EXT_ARR(ZVFH), 90 + KVM_ISA_EXT_ARR(ZVFHMIN), 91 + KVM_ISA_EXT_ARR(ZVKB), 92 + KVM_ISA_EXT_ARR(ZVKG), 93 + KVM_ISA_EXT_ARR(ZVKNED), 94 + KVM_ISA_EXT_ARR(ZVKNHA), 95 + KVM_ISA_EXT_ARR(ZVKNHB), 96 + KVM_ISA_EXT_ARR(ZVKSED), 97 + KVM_ISA_EXT_ARR(ZVKSH), 98 + KVM_ISA_EXT_ARR(ZVKT), 99 + }; 100 + 101 + unsigned long kvm_riscv_base2isa_ext(unsigned long base_ext) 102 + { 103 + unsigned long i; 104 + 105 + for (i = 0; i < KVM_RISCV_ISA_EXT_MAX; i++) { 106 + if (kvm_isa_ext_arr[i] == base_ext) 107 + return i; 108 + } 109 + 110 + return KVM_RISCV_ISA_EXT_MAX; 111 + } 112 + 113 + int __kvm_riscv_isa_check_host(unsigned long kvm_ext, unsigned long *base_ext) 114 + { 115 + unsigned long host_ext; 116 + 117 + if (kvm_ext >= KVM_RISCV_ISA_EXT_MAX || 118 + kvm_ext >= ARRAY_SIZE(kvm_isa_ext_arr)) 119 + return -ENOENT; 120 + 121 + kvm_ext = array_index_nospec(kvm_ext, ARRAY_SIZE(kvm_isa_ext_arr)); 122 + switch (kvm_isa_ext_arr[kvm_ext]) { 123 + case RISCV_ISA_EXT_SMNPM: 124 + /* 125 + * Pointer masking effective in (H)S-mode is provided by the 126 + * Smnpm extension, so that extension is reported to the guest, 127 + * even though the CSR bits for configuring VS-mode pointer 128 + * masking on the host side are part of the Ssnpm extension. 129 + */ 130 + host_ext = RISCV_ISA_EXT_SSNPM; 131 + break; 132 + default: 133 + host_ext = kvm_isa_ext_arr[kvm_ext]; 134 + break; 135 + } 136 + 137 + if (!__riscv_isa_extension_available(NULL, host_ext)) 138 + return -ENOENT; 139 + 140 + if (base_ext) 141 + *base_ext = kvm_isa_ext_arr[kvm_ext]; 142 + 143 + return 0; 144 + } 145 + 146 + bool kvm_riscv_isa_enable_allowed(unsigned long ext) 147 + { 148 + switch (ext) { 149 + case KVM_RISCV_ISA_EXT_H: 150 + return false; 151 + case KVM_RISCV_ISA_EXT_SSCOFPMF: 152 + /* Sscofpmf depends on interrupt filtering defined in ssaia */ 153 + return !kvm_riscv_isa_check_host(SSAIA); 154 + case KVM_RISCV_ISA_EXT_SVADU: 155 + /* 156 + * The henvcfg.ADUE is read-only zero if menvcfg.ADUE is zero. 157 + * Guest OS can use Svadu only when host OS enable Svadu. 158 + */ 159 + return arch_has_hw_pte_young(); 160 + case KVM_RISCV_ISA_EXT_V: 161 + return riscv_v_vstate_ctrl_user_allowed(); 162 + default: 163 + break; 164 + } 165 + 166 + return true; 167 + } 168 + 169 + bool kvm_riscv_isa_disable_allowed(unsigned long ext) 170 + { 171 + switch (ext) { 172 + /* Extensions which don't have any mechanism to disable */ 173 + case KVM_RISCV_ISA_EXT_A: 174 + case KVM_RISCV_ISA_EXT_C: 175 + case KVM_RISCV_ISA_EXT_I: 176 + case KVM_RISCV_ISA_EXT_M: 177 + /* There is not architectural config bit to disable sscofpmf completely */ 178 + case KVM_RISCV_ISA_EXT_SSCOFPMF: 179 + case KVM_RISCV_ISA_EXT_SSNPM: 180 + case KVM_RISCV_ISA_EXT_SSTC: 181 + case KVM_RISCV_ISA_EXT_SVINVAL: 182 + case KVM_RISCV_ISA_EXT_SVNAPOT: 183 + case KVM_RISCV_ISA_EXT_SVVPTC: 184 + case KVM_RISCV_ISA_EXT_ZAAMO: 185 + case KVM_RISCV_ISA_EXT_ZABHA: 186 + case KVM_RISCV_ISA_EXT_ZACAS: 187 + case KVM_RISCV_ISA_EXT_ZALASR: 188 + case KVM_RISCV_ISA_EXT_ZALRSC: 189 + case KVM_RISCV_ISA_EXT_ZAWRS: 190 + case KVM_RISCV_ISA_EXT_ZBA: 191 + case KVM_RISCV_ISA_EXT_ZBB: 192 + case KVM_RISCV_ISA_EXT_ZBC: 193 + case KVM_RISCV_ISA_EXT_ZBKB: 194 + case KVM_RISCV_ISA_EXT_ZBKC: 195 + case KVM_RISCV_ISA_EXT_ZBKX: 196 + case KVM_RISCV_ISA_EXT_ZBS: 197 + case KVM_RISCV_ISA_EXT_ZCA: 198 + case KVM_RISCV_ISA_EXT_ZCB: 199 + case KVM_RISCV_ISA_EXT_ZCD: 200 + case KVM_RISCV_ISA_EXT_ZCF: 201 + case KVM_RISCV_ISA_EXT_ZCMOP: 202 + case KVM_RISCV_ISA_EXT_ZFA: 203 + case KVM_RISCV_ISA_EXT_ZFBFMIN: 204 + case KVM_RISCV_ISA_EXT_ZFH: 205 + case KVM_RISCV_ISA_EXT_ZFHMIN: 206 + case KVM_RISCV_ISA_EXT_ZICBOP: 207 + case KVM_RISCV_ISA_EXT_ZICCRSE: 208 + case KVM_RISCV_ISA_EXT_ZICNTR: 209 + case KVM_RISCV_ISA_EXT_ZICOND: 210 + case KVM_RISCV_ISA_EXT_ZICSR: 211 + case KVM_RISCV_ISA_EXT_ZIFENCEI: 212 + case KVM_RISCV_ISA_EXT_ZIHINTNTL: 213 + case KVM_RISCV_ISA_EXT_ZIHINTPAUSE: 214 + case KVM_RISCV_ISA_EXT_ZIHPM: 215 + case KVM_RISCV_ISA_EXT_ZIMOP: 216 + case KVM_RISCV_ISA_EXT_ZKND: 217 + case KVM_RISCV_ISA_EXT_ZKNE: 218 + case KVM_RISCV_ISA_EXT_ZKNH: 219 + case KVM_RISCV_ISA_EXT_ZKR: 220 + case KVM_RISCV_ISA_EXT_ZKSED: 221 + case KVM_RISCV_ISA_EXT_ZKSH: 222 + case KVM_RISCV_ISA_EXT_ZKT: 223 + case KVM_RISCV_ISA_EXT_ZTSO: 224 + case KVM_RISCV_ISA_EXT_ZVBB: 225 + case KVM_RISCV_ISA_EXT_ZVBC: 226 + case KVM_RISCV_ISA_EXT_ZVFBFMIN: 227 + case KVM_RISCV_ISA_EXT_ZVFBFWMA: 228 + case KVM_RISCV_ISA_EXT_ZVFH: 229 + case KVM_RISCV_ISA_EXT_ZVFHMIN: 230 + case KVM_RISCV_ISA_EXT_ZVKB: 231 + case KVM_RISCV_ISA_EXT_ZVKG: 232 + case KVM_RISCV_ISA_EXT_ZVKNED: 233 + case KVM_RISCV_ISA_EXT_ZVKNHA: 234 + case KVM_RISCV_ISA_EXT_ZVKNHB: 235 + case KVM_RISCV_ISA_EXT_ZVKSED: 236 + case KVM_RISCV_ISA_EXT_ZVKSH: 237 + case KVM_RISCV_ISA_EXT_ZVKT: 238 + return false; 239 + /* Extensions which can be disabled using Smstateen */ 240 + case KVM_RISCV_ISA_EXT_SSAIA: 241 + return riscv_has_extension_unlikely(RISCV_ISA_EXT_SMSTATEEN); 242 + case KVM_RISCV_ISA_EXT_SVADE: 243 + /* 244 + * The henvcfg.ADUE is read-only zero if menvcfg.ADUE is zero. 245 + * Svade can't be disabled unless we support Svadu. 246 + */ 247 + return arch_has_hw_pte_young(); 248 + default: 249 + break; 250 + } 251 + 252 + return true; 253 + }
+8 -8
arch/riscv/kvm/main.c
··· 41 41 if (rc) 42 42 return rc; 43 43 44 - csr_write(CSR_HEDELEG, KVM_HEDELEG_DEFAULT); 45 - csr_write(CSR_HIDELEG, KVM_HIDELEG_DEFAULT); 44 + csr_write(CSR_HEDELEG, 0); 45 + csr_write(CSR_HIDELEG, 0); 46 46 47 47 /* VS should access only the time counter directly. Everything else should trap */ 48 48 csr_write(CSR_HCOUNTEREN, 0x02); ··· 105 105 return rc; 106 106 107 107 kvm_riscv_gstage_mode_detect(); 108 - switch (kvm_riscv_gstage_mode) { 109 - case HGATP_MODE_SV32X4: 108 + switch (kvm_riscv_gstage_max_pgd_levels) { 109 + case 2: 110 110 str = "Sv32x4"; 111 111 break; 112 - case HGATP_MODE_SV39X4: 112 + case 3: 113 113 str = "Sv39x4"; 114 114 break; 115 - case HGATP_MODE_SV48X4: 115 + case 4: 116 116 str = "Sv48x4"; 117 117 break; 118 - case HGATP_MODE_SV57X4: 118 + case 5: 119 119 str = "Sv57x4"; 120 120 break; 121 121 default: ··· 164 164 (rc) ? slist : "no features"); 165 165 } 166 166 167 - kvm_info("using %s G-stage page table format\n", str); 167 + kvm_info("highest G-stage page table mode is %s\n", str); 168 168 169 169 kvm_info("VMID %ld bits available\n", kvm_riscv_gstage_vmid_bits()); 170 170
+22 -48
arch/riscv/kvm/mmu.c
··· 24 24 phys_addr_t end = (memslot->base_gfn + memslot->npages) << PAGE_SHIFT; 25 25 struct kvm_gstage gstage; 26 26 27 - gstage.kvm = kvm; 28 - gstage.flags = 0; 29 - gstage.vmid = READ_ONCE(kvm->arch.vmid.vmid); 30 - gstage.pgd = kvm->arch.pgd; 27 + kvm_riscv_gstage_init(&gstage, kvm); 31 28 32 29 spin_lock(&kvm->mmu_lock); 33 30 kvm_riscv_gstage_wp_range(&gstage, start, end); ··· 46 49 struct kvm_gstage_mapping map; 47 50 struct kvm_gstage gstage; 48 51 49 - gstage.kvm = kvm; 50 - gstage.flags = 0; 51 - gstage.vmid = READ_ONCE(kvm->arch.vmid.vmid); 52 - gstage.pgd = kvm->arch.pgd; 52 + kvm_riscv_gstage_init(&gstage, kvm); 53 53 54 54 end = (gpa + size + PAGE_SIZE - 1) & PAGE_MASK; 55 55 pfn = __phys_to_pfn(hpa); ··· 61 67 if (!writable) 62 68 map.pte = pte_wrprotect(map.pte); 63 69 64 - ret = kvm_mmu_topup_memory_cache(&pcache, kvm_riscv_gstage_pgd_levels); 70 + ret = kvm_mmu_topup_memory_cache(&pcache, kvm->arch.pgd_levels); 65 71 if (ret) 66 72 goto out; 67 73 ··· 83 89 { 84 90 struct kvm_gstage gstage; 85 91 86 - gstage.kvm = kvm; 87 - gstage.flags = 0; 88 - gstage.vmid = READ_ONCE(kvm->arch.vmid.vmid); 89 - gstage.pgd = kvm->arch.pgd; 92 + kvm_riscv_gstage_init(&gstage, kvm); 90 93 91 94 spin_lock(&kvm->mmu_lock); 92 95 kvm_riscv_gstage_unmap_range(&gstage, gpa, size, false); ··· 100 109 phys_addr_t end = (base_gfn + __fls(mask) + 1) << PAGE_SHIFT; 101 110 struct kvm_gstage gstage; 102 111 103 - gstage.kvm = kvm; 104 - gstage.flags = 0; 105 - gstage.vmid = READ_ONCE(kvm->arch.vmid.vmid); 106 - gstage.pgd = kvm->arch.pgd; 112 + kvm_riscv_gstage_init(&gstage, kvm); 107 113 108 114 kvm_riscv_gstage_wp_range(&gstage, start, end); 109 115 } ··· 129 141 phys_addr_t size = slot->npages << PAGE_SHIFT; 130 142 struct kvm_gstage gstage; 131 143 132 - gstage.kvm = kvm; 133 - gstage.flags = 0; 134 - gstage.vmid = READ_ONCE(kvm->arch.vmid.vmid); 135 - gstage.pgd = kvm->arch.pgd; 144 + kvm_riscv_gstage_init(&gstage, kvm); 136 145 137 146 spin_lock(&kvm->mmu_lock); 138 147 kvm_riscv_gstage_unmap_range(&gstage, gpa, size, false); ··· 171 186 * space addressable by the KVM guest GPA space. 172 187 */ 173 188 if ((new->base_gfn + new->npages) >= 174 - (kvm_riscv_gstage_gpa_size >> PAGE_SHIFT)) 189 + kvm_riscv_gstage_gpa_size(kvm->arch.pgd_levels) >> PAGE_SHIFT) 175 190 return -EFAULT; 176 191 177 192 hva = new->userspace_addr; ··· 235 250 if (!kvm->arch.pgd) 236 251 return false; 237 252 238 - gstage.kvm = kvm; 239 - gstage.flags = 0; 240 - gstage.vmid = READ_ONCE(kvm->arch.vmid.vmid); 241 - gstage.pgd = kvm->arch.pgd; 253 + kvm_riscv_gstage_init(&gstage, kvm); 242 254 mmu_locked = spin_trylock(&kvm->mmu_lock); 243 255 kvm_riscv_gstage_unmap_range(&gstage, range->start << PAGE_SHIFT, 244 256 (range->end - range->start) << PAGE_SHIFT, ··· 257 275 258 276 WARN_ON(size != PAGE_SIZE && size != PMD_SIZE && size != PUD_SIZE); 259 277 260 - gstage.kvm = kvm; 261 - gstage.flags = 0; 262 - gstage.vmid = READ_ONCE(kvm->arch.vmid.vmid); 263 - gstage.pgd = kvm->arch.pgd; 278 + kvm_riscv_gstage_init(&gstage, kvm); 264 279 if (!kvm_riscv_gstage_get_leaf(&gstage, range->start << PAGE_SHIFT, 265 280 &ptep, &ptep_level)) 266 281 return false; ··· 277 298 278 299 WARN_ON(size != PAGE_SIZE && size != PMD_SIZE && size != PUD_SIZE); 279 300 280 - gstage.kvm = kvm; 281 - gstage.flags = 0; 282 - gstage.vmid = READ_ONCE(kvm->arch.vmid.vmid); 283 - gstage.pgd = kvm->arch.pgd; 301 + kvm_riscv_gstage_init(&gstage, kvm); 284 302 if (!kvm_riscv_gstage_get_leaf(&gstage, range->start << PAGE_SHIFT, 285 303 &ptep, &ptep_level)) 286 304 return false; ··· 439 463 struct kvm_gstage gstage; 440 464 struct page *page; 441 465 442 - gstage.kvm = kvm; 443 - gstage.flags = 0; 444 - gstage.vmid = READ_ONCE(kvm->arch.vmid.vmid); 445 - gstage.pgd = kvm->arch.pgd; 466 + kvm_riscv_gstage_init(&gstage, kvm); 446 467 447 468 /* Setup initial state of output mapping */ 448 469 memset(out_map, 0, sizeof(*out_map)); 449 470 450 471 /* We need minimum second+third level pages */ 451 - ret = kvm_mmu_topup_memory_cache(pcache, kvm_riscv_gstage_pgd_levels); 472 + ret = kvm_mmu_topup_memory_cache(pcache, kvm->arch.pgd_levels); 452 473 if (ret) { 453 474 kvm_err("Failed to topup G-stage cache\n"); 454 475 return ret; ··· 548 575 return -ENOMEM; 549 576 kvm->arch.pgd = page_to_virt(pgd_page); 550 577 kvm->arch.pgd_phys = page_to_phys(pgd_page); 578 + kvm->arch.pgd_levels = kvm_riscv_gstage_max_pgd_levels; 551 579 552 580 return 0; 553 581 } ··· 560 586 561 587 spin_lock(&kvm->mmu_lock); 562 588 if (kvm->arch.pgd) { 563 - gstage.kvm = kvm; 564 - gstage.flags = 0; 565 - gstage.vmid = READ_ONCE(kvm->arch.vmid.vmid); 566 - gstage.pgd = kvm->arch.pgd; 567 - kvm_riscv_gstage_unmap_range(&gstage, 0UL, kvm_riscv_gstage_gpa_size, false); 589 + kvm_riscv_gstage_init(&gstage, kvm); 590 + kvm_riscv_gstage_unmap_range(&gstage, 0UL, 591 + kvm_riscv_gstage_gpa_size(kvm->arch.pgd_levels), false); 568 592 pgd = READ_ONCE(kvm->arch.pgd); 569 593 kvm->arch.pgd = NULL; 570 594 kvm->arch.pgd_phys = 0; 595 + kvm->arch.pgd_levels = 0; 571 596 } 572 597 spin_unlock(&kvm->mmu_lock); 573 598 ··· 576 603 577 604 void kvm_riscv_mmu_update_hgatp(struct kvm_vcpu *vcpu) 578 605 { 579 - unsigned long hgatp = kvm_riscv_gstage_mode << HGATP_MODE_SHIFT; 580 - struct kvm_arch *k = &vcpu->kvm->arch; 606 + struct kvm_arch *ka = &vcpu->kvm->arch; 607 + unsigned long hgatp = kvm_riscv_gstage_mode(ka->pgd_levels) 608 + << HGATP_MODE_SHIFT; 581 609 582 - hgatp |= (READ_ONCE(k->vmid.vmid) << HGATP_VMID_SHIFT) & HGATP_VMID; 583 - hgatp |= (k->pgd_phys >> PAGE_SHIFT) & HGATP_PPN; 610 + hgatp |= (READ_ONCE(ka->vmid.vmid) << HGATP_VMID_SHIFT) & HGATP_VMID; 611 + hgatp |= (ka->pgd_phys >> PAGE_SHIFT) & HGATP_PPN; 584 612 585 613 ncsr_write(CSR_HGATP, hgatp); 586 614
+2 -1
arch/riscv/kvm/tlb.c
··· 338 338 bitmap_zero(vcpu_mask, KVM_MAX_VCPUS); 339 339 kvm_for_each_vcpu(i, vcpu, kvm) { 340 340 if (hbase != -1UL) { 341 - if (vcpu->vcpu_id < hbase) 341 + if (vcpu->vcpu_id < hbase || 342 + vcpu->vcpu_id >= hbase + BITS_PER_LONG) 342 343 continue; 343 344 if (!(hmask & (1UL << (vcpu->vcpu_id - hbase)))) 344 345 continue;
+38 -73
arch/riscv/kvm/vcpu.c
··· 24 24 #define CREATE_TRACE_POINTS 25 25 #include "trace.h" 26 26 27 + static DEFINE_PER_CPU(struct kvm_vcpu *, kvm_former_vcpu); 28 + 27 29 const struct kvm_stats_desc kvm_vcpu_stats_desc[] = { 28 30 KVM_GENERIC_VCPU_STATS(), 29 31 STATS_DESC_COUNTER(VCPU, ecall_exit_stat), ··· 135 133 /* Mark this VCPU never ran */ 136 134 vcpu->arch.ran_atleast_once = false; 137 135 138 - vcpu->arch.cfg.hedeleg = KVM_HEDELEG_DEFAULT; 139 136 vcpu->arch.mmu_page_cache.gfp_zero = __GFP_ZERO; 140 137 bitmap_zero(vcpu->arch.isa, RISCV_ISA_EXT_MAX); 138 + 139 + /* Setup VCPU config */ 140 + kvm_riscv_vcpu_config_init(vcpu); 141 141 142 142 /* Setup ISA features available to VCPU */ 143 143 kvm_riscv_vcpu_setup_isa(vcpu); ··· 533 529 int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu, 534 530 struct kvm_guest_debug *dbg) 535 531 { 536 - if (dbg->control & KVM_GUESTDBG_ENABLE) { 532 + if (dbg->control & KVM_GUESTDBG_ENABLE) 537 533 vcpu->guest_debug = dbg->control; 538 - vcpu->arch.cfg.hedeleg &= ~BIT(EXC_BREAKPOINT); 539 - } else { 534 + else 540 535 vcpu->guest_debug = 0; 541 - vcpu->arch.cfg.hedeleg |= BIT(EXC_BREAKPOINT); 542 - } 543 536 544 537 return 0; 545 - } 546 - 547 - static void kvm_riscv_vcpu_setup_config(struct kvm_vcpu *vcpu) 548 - { 549 - const unsigned long *isa = vcpu->arch.isa; 550 - struct kvm_vcpu_config *cfg = &vcpu->arch.cfg; 551 - 552 - if (riscv_isa_extension_available(isa, SVPBMT)) 553 - cfg->henvcfg |= ENVCFG_PBMTE; 554 - 555 - if (riscv_isa_extension_available(isa, SSTC)) 556 - cfg->henvcfg |= ENVCFG_STCE; 557 - 558 - if (riscv_isa_extension_available(isa, ZICBOM)) 559 - cfg->henvcfg |= (ENVCFG_CBIE | ENVCFG_CBCFE); 560 - 561 - if (riscv_isa_extension_available(isa, ZICBOZ)) 562 - cfg->henvcfg |= ENVCFG_CBZE; 563 - 564 - if (riscv_isa_extension_available(isa, SVADU) && 565 - !riscv_isa_extension_available(isa, SVADE)) 566 - cfg->henvcfg |= ENVCFG_ADUE; 567 - 568 - if (riscv_has_extension_unlikely(RISCV_ISA_EXT_SMSTATEEN)) { 569 - cfg->hstateen0 |= SMSTATEEN0_HSENVCFG; 570 - if (riscv_isa_extension_available(isa, SSAIA)) 571 - cfg->hstateen0 |= SMSTATEEN0_AIA_IMSIC | 572 - SMSTATEEN0_AIA | 573 - SMSTATEEN0_AIA_ISEL; 574 - if (riscv_isa_extension_available(isa, SMSTATEEN)) 575 - cfg->hstateen0 |= SMSTATEEN0_SSTATEEN0; 576 - } 577 - 578 - if (vcpu->guest_debug) 579 - cfg->hedeleg &= ~BIT(EXC_BREAKPOINT); 580 538 } 581 539 582 540 void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu) 583 541 { 584 542 void *nsh; 585 543 struct kvm_vcpu_csr *csr = &vcpu->arch.guest_csr; 586 - struct kvm_vcpu_config *cfg = &vcpu->arch.cfg; 544 + 545 + /* 546 + * If VCPU is being reloaded on the same physical CPU and no 547 + * other KVM VCPU has run on this CPU since it was last put, 548 + * we can skip the expensive CSR and HGATP writes. 549 + * 550 + * Note: If a new CSR is added to this fast-path skip block, 551 + * make sure that 'csr_dirty' is set to true in any 552 + * ioctl (e.g., KVM_SET_ONE_REG) that modifies it. 553 + */ 554 + if (vcpu != __this_cpu_read(kvm_former_vcpu)) 555 + __this_cpu_write(kvm_former_vcpu, vcpu); 556 + else if (vcpu->arch.last_exit_cpu == cpu && !vcpu->arch.csr_dirty) 557 + goto csr_restore_done; 558 + 559 + vcpu->arch.csr_dirty = false; 560 + 561 + /* 562 + * Load VCPU config CSRs before other CSRs because 563 + * the read/write behaviour of certain CSRs change 564 + * based on VCPU config CSRs. 565 + */ 566 + kvm_riscv_vcpu_config_load(vcpu); 587 567 588 568 if (kvm_riscv_nacl_sync_csr_available()) { 589 569 nsh = nacl_shmem(); ··· 578 590 nacl_csr_write(nsh, CSR_VSEPC, csr->vsepc); 579 591 nacl_csr_write(nsh, CSR_VSCAUSE, csr->vscause); 580 592 nacl_csr_write(nsh, CSR_VSTVAL, csr->vstval); 581 - nacl_csr_write(nsh, CSR_HEDELEG, cfg->hedeleg); 582 593 nacl_csr_write(nsh, CSR_HVIP, csr->hvip); 583 594 nacl_csr_write(nsh, CSR_VSATP, csr->vsatp); 584 - nacl_csr_write(nsh, CSR_HENVCFG, cfg->henvcfg); 585 - if (IS_ENABLED(CONFIG_32BIT)) 586 - nacl_csr_write(nsh, CSR_HENVCFGH, cfg->henvcfg >> 32); 587 - if (riscv_has_extension_unlikely(RISCV_ISA_EXT_SMSTATEEN)) { 588 - nacl_csr_write(nsh, CSR_HSTATEEN0, cfg->hstateen0); 589 - if (IS_ENABLED(CONFIG_32BIT)) 590 - nacl_csr_write(nsh, CSR_HSTATEEN0H, cfg->hstateen0 >> 32); 591 - } 592 595 } else { 593 596 csr_write(CSR_VSSTATUS, csr->vsstatus); 594 597 csr_write(CSR_VSIE, csr->vsie); ··· 588 609 csr_write(CSR_VSEPC, csr->vsepc); 589 610 csr_write(CSR_VSCAUSE, csr->vscause); 590 611 csr_write(CSR_VSTVAL, csr->vstval); 591 - csr_write(CSR_HEDELEG, cfg->hedeleg); 592 612 csr_write(CSR_HVIP, csr->hvip); 593 613 csr_write(CSR_VSATP, csr->vsatp); 594 - csr_write(CSR_HENVCFG, cfg->henvcfg); 595 - if (IS_ENABLED(CONFIG_32BIT)) 596 - csr_write(CSR_HENVCFGH, cfg->henvcfg >> 32); 597 - if (riscv_has_extension_unlikely(RISCV_ISA_EXT_SMSTATEEN)) { 598 - csr_write(CSR_HSTATEEN0, cfg->hstateen0); 599 - if (IS_ENABLED(CONFIG_32BIT)) 600 - csr_write(CSR_HSTATEEN0H, cfg->hstateen0 >> 32); 601 - } 602 614 } 603 615 604 616 kvm_riscv_mmu_update_hgatp(vcpu); 605 617 618 + kvm_riscv_vcpu_aia_load(vcpu, cpu); 619 + 620 + csr_restore_done: 606 621 kvm_riscv_vcpu_timer_restore(vcpu); 607 622 608 623 kvm_riscv_vcpu_host_fp_save(&vcpu->arch.host_context); ··· 605 632 kvm_riscv_vcpu_host_vector_save(&vcpu->arch.host_context); 606 633 kvm_riscv_vcpu_guest_vector_restore(&vcpu->arch.guest_context, 607 634 vcpu->arch.isa); 608 - 609 - kvm_riscv_vcpu_aia_load(vcpu, cpu); 610 635 611 636 kvm_make_request(KVM_REQ_STEAL_UPDATE, vcpu); 612 637 ··· 721 750 { 722 751 struct kvm_vcpu_smstateen_csr *smcsr = &vcpu->arch.smstateen_csr; 723 752 struct kvm_vcpu_csr *csr = &vcpu->arch.guest_csr; 724 - struct kvm_vcpu_config *cfg = &vcpu->arch.cfg; 725 753 726 754 vcpu->arch.host_scounteren = csr_swap(CSR_SCOUNTEREN, csr->scounteren); 727 755 vcpu->arch.host_senvcfg = csr_swap(CSR_SENVCFG, csr->senvcfg); 728 - if (riscv_has_extension_unlikely(RISCV_ISA_EXT_SMSTATEEN) && 729 - (cfg->hstateen0 & SMSTATEEN0_SSTATEEN0)) 730 - vcpu->arch.host_sstateen0 = csr_swap(CSR_SSTATEEN0, 731 - smcsr->sstateen0); 756 + if (riscv_has_extension_unlikely(RISCV_ISA_EXT_SMSTATEEN)) 757 + vcpu->arch.host_sstateen0 = csr_swap(CSR_SSTATEEN0, smcsr->sstateen0); 732 758 } 733 759 734 760 static __always_inline void kvm_riscv_vcpu_swap_in_host_state(struct kvm_vcpu *vcpu) 735 761 { 736 762 struct kvm_vcpu_smstateen_csr *smcsr = &vcpu->arch.smstateen_csr; 737 763 struct kvm_vcpu_csr *csr = &vcpu->arch.guest_csr; 738 - struct kvm_vcpu_config *cfg = &vcpu->arch.cfg; 739 764 740 765 csr->scounteren = csr_swap(CSR_SCOUNTEREN, vcpu->arch.host_scounteren); 741 766 csr->senvcfg = csr_swap(CSR_SENVCFG, vcpu->arch.host_senvcfg); 742 - if (riscv_has_extension_unlikely(RISCV_ISA_EXT_SMSTATEEN) && 743 - (cfg->hstateen0 & SMSTATEEN0_SSTATEEN0)) 744 - smcsr->sstateen0 = csr_swap(CSR_SSTATEEN0, 745 - vcpu->arch.host_sstateen0); 767 + if (riscv_has_extension_unlikely(RISCV_ISA_EXT_SMSTATEEN)) 768 + smcsr->sstateen0 = csr_swap(CSR_SSTATEEN0, vcpu->arch.host_sstateen0); 746 769 } 747 770 748 771 /* ··· 833 868 struct kvm_run *run = vcpu->run; 834 869 835 870 if (!vcpu->arch.ran_atleast_once) 836 - kvm_riscv_vcpu_setup_config(vcpu); 871 + kvm_riscv_vcpu_config_ran_once(vcpu); 837 872 838 873 /* Mark this VCPU ran at least once */ 839 874 vcpu->arch.ran_atleast_once = true;
+103
arch/riscv/kvm/vcpu_config.c
··· 1 + // SPDX-License-Identifier: GPL-2.0 2 + /* 3 + * Copyright (c) 2026 Qualcomm Technologies, Inc. 4 + */ 5 + 6 + #include <linux/kvm_host.h> 7 + #include <asm/kvm_nacl.h> 8 + 9 + #define KVM_HEDELEG_DEFAULT (BIT(EXC_INST_MISALIGNED) | \ 10 + BIT(EXC_INST_ILLEGAL) | \ 11 + BIT(EXC_BREAKPOINT) | \ 12 + BIT(EXC_SYSCALL) | \ 13 + BIT(EXC_INST_PAGE_FAULT) | \ 14 + BIT(EXC_LOAD_PAGE_FAULT) | \ 15 + BIT(EXC_STORE_PAGE_FAULT)) 16 + 17 + #define KVM_HIDELEG_DEFAULT (BIT(IRQ_VS_SOFT) | \ 18 + BIT(IRQ_VS_TIMER) | \ 19 + BIT(IRQ_VS_EXT)) 20 + 21 + void kvm_riscv_vcpu_config_init(struct kvm_vcpu *vcpu) 22 + { 23 + vcpu->arch.cfg.hedeleg = KVM_HEDELEG_DEFAULT; 24 + vcpu->arch.cfg.hideleg = KVM_HIDELEG_DEFAULT; 25 + } 26 + 27 + void kvm_riscv_vcpu_config_guest_debug(struct kvm_vcpu *vcpu) 28 + { 29 + struct kvm_vcpu_config *cfg = &vcpu->arch.cfg; 30 + 31 + if (vcpu->guest_debug) 32 + cfg->hedeleg &= ~BIT(EXC_BREAKPOINT); 33 + else 34 + cfg->hedeleg |= BIT(EXC_BREAKPOINT); 35 + 36 + vcpu->arch.csr_dirty = true; 37 + } 38 + 39 + void kvm_riscv_vcpu_config_ran_once(struct kvm_vcpu *vcpu) 40 + { 41 + const unsigned long *isa = vcpu->arch.isa; 42 + struct kvm_vcpu_config *cfg = &vcpu->arch.cfg; 43 + 44 + if (riscv_isa_extension_available(isa, SVPBMT)) 45 + cfg->henvcfg |= ENVCFG_PBMTE; 46 + 47 + if (riscv_isa_extension_available(isa, SSTC)) 48 + cfg->henvcfg |= ENVCFG_STCE; 49 + 50 + if (riscv_isa_extension_available(isa, ZICBOM)) 51 + cfg->henvcfg |= (ENVCFG_CBIE | ENVCFG_CBCFE); 52 + 53 + if (riscv_isa_extension_available(isa, ZICBOZ)) 54 + cfg->henvcfg |= ENVCFG_CBZE; 55 + 56 + if (riscv_isa_extension_available(isa, SVADU) && 57 + !riscv_isa_extension_available(isa, SVADE)) 58 + cfg->henvcfg |= ENVCFG_ADUE; 59 + 60 + if (riscv_has_extension_unlikely(RISCV_ISA_EXT_SMSTATEEN)) { 61 + cfg->hstateen0 |= SMSTATEEN0_HSENVCFG; 62 + if (riscv_isa_extension_available(isa, SSAIA)) 63 + cfg->hstateen0 |= SMSTATEEN0_AIA_IMSIC | 64 + SMSTATEEN0_AIA | 65 + SMSTATEEN0_AIA_ISEL; 66 + if (riscv_isa_extension_available(isa, SMSTATEEN)) 67 + cfg->hstateen0 |= SMSTATEEN0_SSTATEEN0; 68 + } 69 + 70 + if (vcpu->guest_debug) 71 + cfg->hedeleg &= ~BIT(EXC_BREAKPOINT); 72 + } 73 + 74 + void kvm_riscv_vcpu_config_load(struct kvm_vcpu *vcpu) 75 + { 76 + struct kvm_vcpu_config *cfg = &vcpu->arch.cfg; 77 + void *nsh; 78 + 79 + if (kvm_riscv_nacl_sync_csr_available()) { 80 + nsh = nacl_shmem(); 81 + nacl_csr_write(nsh, CSR_HEDELEG, cfg->hedeleg); 82 + nacl_csr_write(nsh, CSR_HIDELEG, cfg->hideleg); 83 + nacl_csr_write(nsh, CSR_HENVCFG, cfg->henvcfg); 84 + if (IS_ENABLED(CONFIG_32BIT)) 85 + nacl_csr_write(nsh, CSR_HENVCFGH, cfg->henvcfg >> 32); 86 + if (riscv_has_extension_unlikely(RISCV_ISA_EXT_SMSTATEEN)) { 87 + nacl_csr_write(nsh, CSR_HSTATEEN0, cfg->hstateen0); 88 + if (IS_ENABLED(CONFIG_32BIT)) 89 + nacl_csr_write(nsh, CSR_HSTATEEN0H, cfg->hstateen0 >> 32); 90 + } 91 + } else { 92 + csr_write(CSR_HEDELEG, cfg->hedeleg); 93 + csr_write(CSR_HIDELEG, cfg->hideleg); 94 + csr_write(CSR_HENVCFG, cfg->henvcfg); 95 + if (IS_ENABLED(CONFIG_32BIT)) 96 + csr_write(CSR_HENVCFGH, cfg->henvcfg >> 32); 97 + if (riscv_has_extension_unlikely(RISCV_ISA_EXT_SMSTATEEN)) { 98 + csr_write(CSR_HSTATEEN0, cfg->hstateen0); 99 + if (IS_ENABLED(CONFIG_32BIT)) 100 + csr_write(CSR_HSTATEEN0H, cfg->hstateen0 >> 32); 101 + } 102 + } 103 + }
+5 -4
arch/riscv/kvm/vcpu_fp.c
··· 13 13 #include <linux/nospec.h> 14 14 #include <linux/uaccess.h> 15 15 #include <asm/cpufeature.h> 16 + #include <asm/kvm_isa.h> 16 17 17 18 #ifdef CONFIG_FPU 18 19 void kvm_riscv_vcpu_fp_reset(struct kvm_vcpu *vcpu) ··· 61 60 void kvm_riscv_vcpu_host_fp_save(struct kvm_cpu_context *cntx) 62 61 { 63 62 /* No need to check host sstatus as it can be modified outside */ 64 - if (riscv_isa_extension_available(NULL, d)) 63 + if (!kvm_riscv_isa_check_host(D)) 65 64 __kvm_riscv_fp_d_save(cntx); 66 - else if (riscv_isa_extension_available(NULL, f)) 65 + else if (!kvm_riscv_isa_check_host(F)) 67 66 __kvm_riscv_fp_f_save(cntx); 68 67 } 69 68 70 69 void kvm_riscv_vcpu_host_fp_restore(struct kvm_cpu_context *cntx) 71 70 { 72 - if (riscv_isa_extension_available(NULL, d)) 71 + if (!kvm_riscv_isa_check_host(D)) 73 72 __kvm_riscv_fp_d_restore(cntx); 74 - else if (riscv_isa_extension_available(NULL, f)) 73 + else if (!kvm_riscv_isa_check_host(F)) 75 74 __kvm_riscv_fp_f_restore(cntx); 76 75 } 77 76 #endif
+23 -261
arch/riscv/kvm/vcpu_onereg.c
··· 15 15 #include <linux/kvm_host.h> 16 16 #include <asm/cacheflush.h> 17 17 #include <asm/cpufeature.h> 18 + #include <asm/kvm_isa.h> 18 19 #include <asm/kvm_vcpu_vector.h> 19 - #include <asm/pgtable.h> 20 - #include <asm/vector.h> 21 20 22 21 #define KVM_RISCV_BASE_ISA_MASK GENMASK(25, 0) 23 - 24 - #define KVM_ISA_EXT_ARR(ext) \ 25 - [KVM_RISCV_ISA_EXT_##ext] = RISCV_ISA_EXT_##ext 26 - 27 - /* Mapping between KVM ISA Extension ID & guest ISA extension ID */ 28 - static const unsigned long kvm_isa_ext_arr[] = { 29 - /* Single letter extensions (alphabetically sorted) */ 30 - [KVM_RISCV_ISA_EXT_A] = RISCV_ISA_EXT_a, 31 - [KVM_RISCV_ISA_EXT_C] = RISCV_ISA_EXT_c, 32 - [KVM_RISCV_ISA_EXT_D] = RISCV_ISA_EXT_d, 33 - [KVM_RISCV_ISA_EXT_F] = RISCV_ISA_EXT_f, 34 - [KVM_RISCV_ISA_EXT_H] = RISCV_ISA_EXT_h, 35 - [KVM_RISCV_ISA_EXT_I] = RISCV_ISA_EXT_i, 36 - [KVM_RISCV_ISA_EXT_M] = RISCV_ISA_EXT_m, 37 - [KVM_RISCV_ISA_EXT_V] = RISCV_ISA_EXT_v, 38 - /* Multi letter extensions (alphabetically sorted) */ 39 - KVM_ISA_EXT_ARR(SMNPM), 40 - KVM_ISA_EXT_ARR(SMSTATEEN), 41 - KVM_ISA_EXT_ARR(SSAIA), 42 - KVM_ISA_EXT_ARR(SSCOFPMF), 43 - KVM_ISA_EXT_ARR(SSNPM), 44 - KVM_ISA_EXT_ARR(SSTC), 45 - KVM_ISA_EXT_ARR(SVADE), 46 - KVM_ISA_EXT_ARR(SVADU), 47 - KVM_ISA_EXT_ARR(SVINVAL), 48 - KVM_ISA_EXT_ARR(SVNAPOT), 49 - KVM_ISA_EXT_ARR(SVPBMT), 50 - KVM_ISA_EXT_ARR(SVVPTC), 51 - KVM_ISA_EXT_ARR(ZAAMO), 52 - KVM_ISA_EXT_ARR(ZABHA), 53 - KVM_ISA_EXT_ARR(ZACAS), 54 - KVM_ISA_EXT_ARR(ZALASR), 55 - KVM_ISA_EXT_ARR(ZALRSC), 56 - KVM_ISA_EXT_ARR(ZAWRS), 57 - KVM_ISA_EXT_ARR(ZBA), 58 - KVM_ISA_EXT_ARR(ZBB), 59 - KVM_ISA_EXT_ARR(ZBC), 60 - KVM_ISA_EXT_ARR(ZBKB), 61 - KVM_ISA_EXT_ARR(ZBKC), 62 - KVM_ISA_EXT_ARR(ZBKX), 63 - KVM_ISA_EXT_ARR(ZBS), 64 - KVM_ISA_EXT_ARR(ZCA), 65 - KVM_ISA_EXT_ARR(ZCB), 66 - KVM_ISA_EXT_ARR(ZCD), 67 - KVM_ISA_EXT_ARR(ZCF), 68 - KVM_ISA_EXT_ARR(ZCLSD), 69 - KVM_ISA_EXT_ARR(ZCMOP), 70 - KVM_ISA_EXT_ARR(ZFA), 71 - KVM_ISA_EXT_ARR(ZFBFMIN), 72 - KVM_ISA_EXT_ARR(ZFH), 73 - KVM_ISA_EXT_ARR(ZFHMIN), 74 - KVM_ISA_EXT_ARR(ZICBOM), 75 - KVM_ISA_EXT_ARR(ZICBOP), 76 - KVM_ISA_EXT_ARR(ZICBOZ), 77 - KVM_ISA_EXT_ARR(ZICCRSE), 78 - KVM_ISA_EXT_ARR(ZICNTR), 79 - KVM_ISA_EXT_ARR(ZICOND), 80 - KVM_ISA_EXT_ARR(ZICSR), 81 - KVM_ISA_EXT_ARR(ZIFENCEI), 82 - KVM_ISA_EXT_ARR(ZIHINTNTL), 83 - KVM_ISA_EXT_ARR(ZIHINTPAUSE), 84 - KVM_ISA_EXT_ARR(ZIHPM), 85 - KVM_ISA_EXT_ARR(ZILSD), 86 - KVM_ISA_EXT_ARR(ZIMOP), 87 - KVM_ISA_EXT_ARR(ZKND), 88 - KVM_ISA_EXT_ARR(ZKNE), 89 - KVM_ISA_EXT_ARR(ZKNH), 90 - KVM_ISA_EXT_ARR(ZKR), 91 - KVM_ISA_EXT_ARR(ZKSED), 92 - KVM_ISA_EXT_ARR(ZKSH), 93 - KVM_ISA_EXT_ARR(ZKT), 94 - KVM_ISA_EXT_ARR(ZTSO), 95 - KVM_ISA_EXT_ARR(ZVBB), 96 - KVM_ISA_EXT_ARR(ZVBC), 97 - KVM_ISA_EXT_ARR(ZVFBFMIN), 98 - KVM_ISA_EXT_ARR(ZVFBFWMA), 99 - KVM_ISA_EXT_ARR(ZVFH), 100 - KVM_ISA_EXT_ARR(ZVFHMIN), 101 - KVM_ISA_EXT_ARR(ZVKB), 102 - KVM_ISA_EXT_ARR(ZVKG), 103 - KVM_ISA_EXT_ARR(ZVKNED), 104 - KVM_ISA_EXT_ARR(ZVKNHA), 105 - KVM_ISA_EXT_ARR(ZVKNHB), 106 - KVM_ISA_EXT_ARR(ZVKSED), 107 - KVM_ISA_EXT_ARR(ZVKSH), 108 - KVM_ISA_EXT_ARR(ZVKT), 109 - }; 110 - 111 - static unsigned long kvm_riscv_vcpu_base2isa_ext(unsigned long base_ext) 112 - { 113 - unsigned long i; 114 - 115 - for (i = 0; i < KVM_RISCV_ISA_EXT_MAX; i++) { 116 - if (kvm_isa_ext_arr[i] == base_ext) 117 - return i; 118 - } 119 - 120 - return KVM_RISCV_ISA_EXT_MAX; 121 - } 122 - 123 - static int kvm_riscv_vcpu_isa_check_host(unsigned long kvm_ext, unsigned long *guest_ext) 124 - { 125 - unsigned long host_ext; 126 - 127 - if (kvm_ext >= KVM_RISCV_ISA_EXT_MAX || 128 - kvm_ext >= ARRAY_SIZE(kvm_isa_ext_arr)) 129 - return -ENOENT; 130 - 131 - kvm_ext = array_index_nospec(kvm_ext, ARRAY_SIZE(kvm_isa_ext_arr)); 132 - *guest_ext = kvm_isa_ext_arr[kvm_ext]; 133 - switch (*guest_ext) { 134 - case RISCV_ISA_EXT_SMNPM: 135 - /* 136 - * Pointer masking effective in (H)S-mode is provided by the 137 - * Smnpm extension, so that extension is reported to the guest, 138 - * even though the CSR bits for configuring VS-mode pointer 139 - * masking on the host side are part of the Ssnpm extension. 140 - */ 141 - host_ext = RISCV_ISA_EXT_SSNPM; 142 - break; 143 - default: 144 - host_ext = *guest_ext; 145 - break; 146 - } 147 - 148 - if (!__riscv_isa_extension_available(NULL, host_ext)) 149 - return -ENOENT; 150 - 151 - return 0; 152 - } 153 - 154 - static bool kvm_riscv_vcpu_isa_enable_allowed(unsigned long ext) 155 - { 156 - switch (ext) { 157 - case KVM_RISCV_ISA_EXT_H: 158 - return false; 159 - case KVM_RISCV_ISA_EXT_SSCOFPMF: 160 - /* Sscofpmf depends on interrupt filtering defined in ssaia */ 161 - return __riscv_isa_extension_available(NULL, RISCV_ISA_EXT_SSAIA); 162 - case KVM_RISCV_ISA_EXT_SVADU: 163 - /* 164 - * The henvcfg.ADUE is read-only zero if menvcfg.ADUE is zero. 165 - * Guest OS can use Svadu only when host OS enable Svadu. 166 - */ 167 - return arch_has_hw_pte_young(); 168 - case KVM_RISCV_ISA_EXT_V: 169 - return riscv_v_vstate_ctrl_user_allowed(); 170 - default: 171 - break; 172 - } 173 - 174 - return true; 175 - } 176 - 177 - static bool kvm_riscv_vcpu_isa_disable_allowed(unsigned long ext) 178 - { 179 - switch (ext) { 180 - /* Extensions which don't have any mechanism to disable */ 181 - case KVM_RISCV_ISA_EXT_A: 182 - case KVM_RISCV_ISA_EXT_C: 183 - case KVM_RISCV_ISA_EXT_I: 184 - case KVM_RISCV_ISA_EXT_M: 185 - /* There is not architectural config bit to disable sscofpmf completely */ 186 - case KVM_RISCV_ISA_EXT_SSCOFPMF: 187 - case KVM_RISCV_ISA_EXT_SSNPM: 188 - case KVM_RISCV_ISA_EXT_SSTC: 189 - case KVM_RISCV_ISA_EXT_SVINVAL: 190 - case KVM_RISCV_ISA_EXT_SVNAPOT: 191 - case KVM_RISCV_ISA_EXT_SVVPTC: 192 - case KVM_RISCV_ISA_EXT_ZAAMO: 193 - case KVM_RISCV_ISA_EXT_ZABHA: 194 - case KVM_RISCV_ISA_EXT_ZACAS: 195 - case KVM_RISCV_ISA_EXT_ZALASR: 196 - case KVM_RISCV_ISA_EXT_ZALRSC: 197 - case KVM_RISCV_ISA_EXT_ZAWRS: 198 - case KVM_RISCV_ISA_EXT_ZBA: 199 - case KVM_RISCV_ISA_EXT_ZBB: 200 - case KVM_RISCV_ISA_EXT_ZBC: 201 - case KVM_RISCV_ISA_EXT_ZBKB: 202 - case KVM_RISCV_ISA_EXT_ZBKC: 203 - case KVM_RISCV_ISA_EXT_ZBKX: 204 - case KVM_RISCV_ISA_EXT_ZBS: 205 - case KVM_RISCV_ISA_EXT_ZCA: 206 - case KVM_RISCV_ISA_EXT_ZCB: 207 - case KVM_RISCV_ISA_EXT_ZCD: 208 - case KVM_RISCV_ISA_EXT_ZCF: 209 - case KVM_RISCV_ISA_EXT_ZCMOP: 210 - case KVM_RISCV_ISA_EXT_ZFA: 211 - case KVM_RISCV_ISA_EXT_ZFBFMIN: 212 - case KVM_RISCV_ISA_EXT_ZFH: 213 - case KVM_RISCV_ISA_EXT_ZFHMIN: 214 - case KVM_RISCV_ISA_EXT_ZICBOP: 215 - case KVM_RISCV_ISA_EXT_ZICCRSE: 216 - case KVM_RISCV_ISA_EXT_ZICNTR: 217 - case KVM_RISCV_ISA_EXT_ZICOND: 218 - case KVM_RISCV_ISA_EXT_ZICSR: 219 - case KVM_RISCV_ISA_EXT_ZIFENCEI: 220 - case KVM_RISCV_ISA_EXT_ZIHINTNTL: 221 - case KVM_RISCV_ISA_EXT_ZIHINTPAUSE: 222 - case KVM_RISCV_ISA_EXT_ZIHPM: 223 - case KVM_RISCV_ISA_EXT_ZIMOP: 224 - case KVM_RISCV_ISA_EXT_ZKND: 225 - case KVM_RISCV_ISA_EXT_ZKNE: 226 - case KVM_RISCV_ISA_EXT_ZKNH: 227 - case KVM_RISCV_ISA_EXT_ZKR: 228 - case KVM_RISCV_ISA_EXT_ZKSED: 229 - case KVM_RISCV_ISA_EXT_ZKSH: 230 - case KVM_RISCV_ISA_EXT_ZKT: 231 - case KVM_RISCV_ISA_EXT_ZTSO: 232 - case KVM_RISCV_ISA_EXT_ZVBB: 233 - case KVM_RISCV_ISA_EXT_ZVBC: 234 - case KVM_RISCV_ISA_EXT_ZVFBFMIN: 235 - case KVM_RISCV_ISA_EXT_ZVFBFWMA: 236 - case KVM_RISCV_ISA_EXT_ZVFH: 237 - case KVM_RISCV_ISA_EXT_ZVFHMIN: 238 - case KVM_RISCV_ISA_EXT_ZVKB: 239 - case KVM_RISCV_ISA_EXT_ZVKG: 240 - case KVM_RISCV_ISA_EXT_ZVKNED: 241 - case KVM_RISCV_ISA_EXT_ZVKNHA: 242 - case KVM_RISCV_ISA_EXT_ZVKNHB: 243 - case KVM_RISCV_ISA_EXT_ZVKSED: 244 - case KVM_RISCV_ISA_EXT_ZVKSH: 245 - case KVM_RISCV_ISA_EXT_ZVKT: 246 - return false; 247 - /* Extensions which can be disabled using Smstateen */ 248 - case KVM_RISCV_ISA_EXT_SSAIA: 249 - return riscv_has_extension_unlikely(RISCV_ISA_EXT_SMSTATEEN); 250 - case KVM_RISCV_ISA_EXT_SVADE: 251 - /* 252 - * The henvcfg.ADUE is read-only zero if menvcfg.ADUE is zero. 253 - * Svade can't be disabled unless we support Svadu. 254 - */ 255 - return arch_has_hw_pte_young(); 256 - default: 257 - break; 258 - } 259 - 260 - return true; 261 - } 262 22 263 23 void kvm_riscv_vcpu_setup_isa(struct kvm_vcpu *vcpu) 264 24 { 265 25 unsigned long guest_ext, i; 266 26 267 - for (i = 0; i < ARRAY_SIZE(kvm_isa_ext_arr); i++) { 268 - if (kvm_riscv_vcpu_isa_check_host(i, &guest_ext)) 27 + for (i = 0; i < KVM_RISCV_ISA_EXT_MAX; i++) { 28 + if (__kvm_riscv_isa_check_host(i, &guest_ext)) 269 29 continue; 270 - if (kvm_riscv_vcpu_isa_enable_allowed(i)) 30 + if (kvm_riscv_isa_enable_allowed(i)) 271 31 set_bit(guest_ext, vcpu->arch.isa); 272 32 } 273 33 } ··· 50 290 reg_val = vcpu->arch.isa[0] & KVM_RISCV_BASE_ISA_MASK; 51 291 break; 52 292 case KVM_REG_RISCV_CONFIG_REG(zicbom_block_size): 53 - if (!riscv_isa_extension_available(NULL, ZICBOM)) 293 + if (kvm_riscv_isa_check_host(ZICBOM)) 54 294 return -ENOENT; 55 295 reg_val = riscv_cbom_block_size; 56 296 break; 57 297 case KVM_REG_RISCV_CONFIG_REG(zicboz_block_size): 58 - if (!riscv_isa_extension_available(NULL, ZICBOZ)) 298 + if (kvm_riscv_isa_check_host(ZICBOZ)) 59 299 return -ENOENT; 60 300 reg_val = riscv_cboz_block_size; 61 301 break; 62 302 case KVM_REG_RISCV_CONFIG_REG(zicbop_block_size): 63 - if (!riscv_isa_extension_available(NULL, ZICBOP)) 303 + if (kvm_riscv_isa_check_host(ZICBOP)) 64 304 return -ENOENT; 65 305 reg_val = riscv_cbop_block_size; 66 306 break; ··· 121 361 if (!vcpu->arch.ran_atleast_once) { 122 362 /* Ignore the enable/disable request for certain extensions */ 123 363 for (i = 0; i < RISCV_ISA_EXT_BASE; i++) { 124 - isa_ext = kvm_riscv_vcpu_base2isa_ext(i); 364 + isa_ext = kvm_riscv_base2isa_ext(i); 125 365 if (isa_ext >= KVM_RISCV_ISA_EXT_MAX) { 126 366 reg_val &= ~BIT(i); 127 367 continue; 128 368 } 129 - if (!kvm_riscv_vcpu_isa_enable_allowed(isa_ext)) 369 + if (!kvm_riscv_isa_enable_allowed(isa_ext)) 130 370 if (reg_val & BIT(i)) 131 371 reg_val &= ~BIT(i); 132 - if (!kvm_riscv_vcpu_isa_disable_allowed(isa_ext)) 372 + if (!kvm_riscv_isa_disable_allowed(isa_ext)) 133 373 if (!(reg_val & BIT(i))) 134 374 reg_val |= BIT(i); 135 375 } ··· 144 384 } 145 385 break; 146 386 case KVM_REG_RISCV_CONFIG_REG(zicbom_block_size): 147 - if (!riscv_isa_extension_available(NULL, ZICBOM)) 387 + if (kvm_riscv_isa_check_host(ZICBOM)) 148 388 return -ENOENT; 149 389 if (reg_val != riscv_cbom_block_size) 150 390 return -EINVAL; 151 391 break; 152 392 case KVM_REG_RISCV_CONFIG_REG(zicboz_block_size): 153 - if (!riscv_isa_extension_available(NULL, ZICBOZ)) 393 + if (kvm_riscv_isa_check_host(ZICBOZ)) 154 394 return -ENOENT; 155 395 if (reg_val != riscv_cboz_block_size) 156 396 return -EINVAL; 157 397 break; 158 398 case KVM_REG_RISCV_CONFIG_REG(zicbop_block_size): 159 - if (!riscv_isa_extension_available(NULL, ZICBOP)) 399 + if (kvm_riscv_isa_check_host(ZICBOP)) 160 400 return -ENOENT; 161 401 if (reg_val != riscv_cbop_block_size) 162 402 return -EINVAL; ··· 430 670 if (rc) 431 671 return rc; 432 672 673 + vcpu->arch.csr_dirty = true; 674 + 433 675 return 0; 434 676 } 435 677 ··· 442 680 unsigned long guest_ext; 443 681 int ret; 444 682 445 - ret = kvm_riscv_vcpu_isa_check_host(reg_num, &guest_ext); 683 + ret = __kvm_riscv_isa_check_host(reg_num, &guest_ext); 446 684 if (ret) 447 685 return ret; 448 686 ··· 460 698 unsigned long guest_ext; 461 699 int ret; 462 700 463 - ret = kvm_riscv_vcpu_isa_check_host(reg_num, &guest_ext); 701 + ret = __kvm_riscv_isa_check_host(reg_num, &guest_ext); 464 702 if (ret) 465 703 return ret; 466 704 ··· 473 711 * extension can be disabled 474 712 */ 475 713 if (reg_val == 1 && 476 - kvm_riscv_vcpu_isa_enable_allowed(reg_num)) 714 + kvm_riscv_isa_enable_allowed(reg_num)) 477 715 set_bit(guest_ext, vcpu->arch.isa); 478 716 else if (!reg_val && 479 - kvm_riscv_vcpu_isa_disable_allowed(reg_num)) 717 + kvm_riscv_isa_disable_allowed(reg_num)) 480 718 clear_bit(guest_ext, vcpu->arch.isa); 481 719 else 482 720 return -EINVAL; ··· 619 857 * was not available. 620 858 */ 621 859 if (i == KVM_REG_RISCV_CONFIG_REG(zicbom_block_size) && 622 - !riscv_isa_extension_available(NULL, ZICBOM)) 860 + kvm_riscv_isa_check_host(ZICBOM)) 623 861 continue; 624 862 else if (i == KVM_REG_RISCV_CONFIG_REG(zicboz_block_size) && 625 - !riscv_isa_extension_available(NULL, ZICBOZ)) 863 + kvm_riscv_isa_check_host(ZICBOZ)) 626 864 continue; 627 865 else if (i == KVM_REG_RISCV_CONFIG_REG(zicbop_block_size) && 628 - !riscv_isa_extension_available(NULL, ZICBOP)) 866 + kvm_riscv_isa_check_host(ZICBOP)) 629 867 continue; 630 868 631 869 size = IS_ENABLED(CONFIG_32BIT) ? KVM_REG_SIZE_U32 : KVM_REG_SIZE_U64; ··· 846 1084 KVM_REG_SIZE_U32 : KVM_REG_SIZE_U64; 847 1085 u64 reg = KVM_REG_RISCV | size | KVM_REG_RISCV_ISA_EXT | i; 848 1086 849 - if (kvm_riscv_vcpu_isa_check_host(i, &guest_ext)) 1087 + if (__kvm_riscv_isa_check_host(i, &guest_ext)) 850 1088 continue; 851 1089 852 1090 if (uindices) {
+26 -7
arch/riscv/kvm/vcpu_pmu.c
··· 7 7 */ 8 8 9 9 #define pr_fmt(fmt) "riscv-kvm-pmu: " fmt 10 + #include <linux/bitops.h> 10 11 #include <linux/errno.h> 11 12 #include <linux/err.h> 12 13 #include <linux/kvm_host.h> 13 14 #include <linux/nospec.h> 14 15 #include <linux/perf/riscv_pmu.h> 15 16 #include <asm/csr.h> 17 + #include <asm/kvm_isa.h> 16 18 #include <asm/kvm_vcpu_sbi.h> 17 19 #include <asm/kvm_vcpu_pmu.h> 18 20 #include <asm/sbi.h> 19 - #include <linux/bitops.h> 20 21 21 22 #define kvm_pmu_num_counters(pmu) ((pmu)->num_hw_ctrs + (pmu)->num_fw_ctrs) 22 23 #define get_event_type(x) (((x) & SBI_PMU_EVENT_IDX_TYPE_MASK) >> 16) ··· 227 226 if (pmc->cinfo.type != SBI_PMU_CTR_TYPE_FW) 228 227 return -EINVAL; 229 228 229 + if (pmc->event_idx == SBI_PMU_EVENT_IDX_INVALID) 230 + return -EINVAL; 231 + 230 232 fevent_code = get_event_code(pmc->event_idx); 233 + if (WARN_ONCE(fevent_code >= SBI_PMU_FW_MAX, 234 + "Invalid firmware event code: %d\n", fevent_code)) 235 + return -EINVAL; 236 + 231 237 pmc->counter_val = kvpmu->fw_event[fevent_code].value; 232 238 233 239 *out_val = pmc->counter_val >> 32; ··· 259 251 pmc = &kvpmu->pmc[cidx]; 260 252 261 253 if (pmc->cinfo.type == SBI_PMU_CTR_TYPE_FW) { 254 + if (pmc->event_idx == SBI_PMU_EVENT_IDX_INVALID) 255 + return -EINVAL; 256 + 262 257 fevent_code = get_event_code(pmc->event_idx); 258 + if (WARN_ONCE(fevent_code >= SBI_PMU_FW_MAX, 259 + "Invalid firmware event code: %d\n", fevent_code)) 260 + return -EINVAL; 261 + 263 262 pmc->counter_val = kvpmu->fw_event[fevent_code].value; 264 263 } else if (pmc->perf_event) { 265 264 pmc->counter_val += perf_event_read_value(pmc->perf_event, &enabled, &running); ··· 281 266 static int kvm_pmu_validate_counter_mask(struct kvm_pmu *kvpmu, unsigned long ctr_base, 282 267 unsigned long ctr_mask) 283 268 { 284 - /* Make sure the we have a valid counter mask requested from the caller */ 285 - if (!ctr_mask || (ctr_base + __fls(ctr_mask) >= kvm_pmu_num_counters(kvpmu))) 269 + unsigned long num_ctrs = kvm_pmu_num_counters(kvpmu); 270 + 271 + /* Make sure we have a valid counter mask requested from the caller */ 272 + if (!ctr_mask || ctr_base >= num_ctrs || (ctr_base + __fls(ctr_mask) >= num_ctrs)) 286 273 return -EINVAL; 287 274 288 275 return 0; ··· 444 427 saddr = saddr_low; 445 428 446 429 if (saddr_high != 0) { 447 - if (IS_ENABLED(CONFIG_32BIT)) 430 + if (IS_ENABLED(CONFIG_32BIT)) { 448 431 saddr |= ((gpa_t)saddr_high << 32); 449 - else 432 + } else { 450 433 sbiret = SBI_ERR_INVALID_ADDRESS; 451 - goto out; 434 + goto out; 435 + } 452 436 } 453 437 454 438 kvpmu->sdata = kzalloc(snapshot_area_size, GFP_ATOMIC); ··· 459 441 /* No need to check writable slot explicitly as kvm_vcpu_write_guest does it internally */ 460 442 if (kvm_vcpu_write_guest(vcpu, saddr, kvpmu->sdata, snapshot_area_size)) { 461 443 kfree(kvpmu->sdata); 444 + kvpmu->sdata = NULL; 462 445 sbiret = SBI_ERR_INVALID_ADDRESS; 463 446 goto out; 464 447 } ··· 846 827 * filtering is available in the host. Otherwise, guest will always count 847 828 * events while the execution is in hypervisor mode. 848 829 */ 849 - if (!riscv_isa_extension_available(NULL, SSCOFPMF)) 830 + if (kvm_riscv_isa_check_host(SSCOFPMF)) 850 831 return; 851 832 852 833 ret = riscv_pmu_get_hpm_info(&hpm_width, &num_hw_ctrs);
+11 -5
arch/riscv/kvm/vcpu_sbi_sta.c
··· 181 181 unsigned long reg_size, const void *reg_val) 182 182 { 183 183 unsigned long value; 184 + gpa_t new_shmem = INVALID_GPA; 184 185 185 186 if (reg_size != sizeof(unsigned long)) 186 187 return -EINVAL; ··· 192 191 if (IS_ENABLED(CONFIG_32BIT)) { 193 192 gpa_t hi = upper_32_bits(vcpu->arch.sta.shmem); 194 193 195 - vcpu->arch.sta.shmem = value; 196 - vcpu->arch.sta.shmem |= hi << 32; 194 + new_shmem = value; 195 + new_shmem |= hi << 32; 197 196 } else { 198 - vcpu->arch.sta.shmem = value; 197 + new_shmem = value; 199 198 } 200 199 break; 201 200 case KVM_REG_RISCV_SBI_STA_REG(shmem_hi): 202 201 if (IS_ENABLED(CONFIG_32BIT)) { 203 202 gpa_t lo = lower_32_bits(vcpu->arch.sta.shmem); 204 203 205 - vcpu->arch.sta.shmem = ((gpa_t)value << 32); 206 - vcpu->arch.sta.shmem |= lo; 204 + new_shmem = ((gpa_t)value << 32); 205 + new_shmem |= lo; 207 206 } else if (value != 0) { 208 207 return -EINVAL; 209 208 } ··· 211 210 default: 212 211 return -ENOENT; 213 212 } 213 + 214 + if (new_shmem != INVALID_GPA && !IS_ALIGNED(new_shmem, 64)) 215 + return -EINVAL; 216 + 217 + vcpu->arch.sta.shmem = new_shmem; 214 218 215 219 return 0; 216 220 }
+2 -1
arch/riscv/kvm/vcpu_timer.c
··· 12 12 #include <linux/uaccess.h> 13 13 #include <clocksource/timer-riscv.h> 14 14 #include <asm/delay.h> 15 + #include <asm/kvm_isa.h> 15 16 #include <asm/kvm_nacl.h> 16 17 #include <asm/kvm_vcpu_timer.h> 17 18 ··· 254 253 t->next_set = false; 255 254 256 255 /* Enable sstc for every vcpu if available in hardware */ 257 - if (riscv_isa_extension_available(NULL, SSTC)) { 256 + if (!kvm_riscv_isa_check_host(SSTC)) { 258 257 t->sstc_enabled = true; 259 258 hrtimer_setup(&t->hrt, kvm_riscv_vcpu_vstimer_expired, CLOCK_MONOTONIC, 260 259 HRTIMER_MODE_REL);
+8 -3
arch/riscv/kvm/vcpu_vector.c
··· 12 12 #include <linux/kvm_host.h> 13 13 #include <linux/uaccess.h> 14 14 #include <asm/cpufeature.h> 15 + #include <asm/kvm_isa.h> 15 16 #include <asm/kvm_vcpu_vector.h> 16 17 #include <asm/vector.h> 17 18 ··· 64 63 void kvm_riscv_vcpu_host_vector_save(struct kvm_cpu_context *cntx) 65 64 { 66 65 /* No need to check host sstatus as it can be modified outside */ 67 - if (riscv_isa_extension_available(NULL, v)) 66 + if (!kvm_riscv_isa_check_host(V)) 68 67 __kvm_riscv_vector_save(cntx); 69 68 } 70 69 71 70 void kvm_riscv_vcpu_host_vector_restore(struct kvm_cpu_context *cntx) 72 71 { 73 - if (riscv_isa_extension_available(NULL, v)) 72 + if (!kvm_riscv_isa_check_host(V)) 74 73 __kvm_riscv_vector_restore(cntx); 75 74 } 76 75 ··· 81 80 return -ENOMEM; 82 81 83 82 vcpu->arch.host_context.vector.datap = kzalloc(riscv_v_vsize, GFP_KERNEL); 84 - if (!vcpu->arch.host_context.vector.datap) 83 + if (!vcpu->arch.host_context.vector.datap) { 84 + kfree(vcpu->arch.guest_context.vector.datap); 85 + vcpu->arch.guest_context.vector.datap = NULL; 85 86 return -ENOMEM; 87 + } 86 88 87 89 return 0; 88 90 } ··· 131 127 } else if (reg_num <= KVM_REG_RISCV_VECTOR_REG(31)) { 132 128 if (reg_size != vlenb) 133 129 return -EINVAL; 130 + WARN_ON(!cntx->vector.datap); 134 131 *reg_addr = cntx->vector.datap + 135 132 (reg_num - KVM_REG_RISCV_VECTOR_REG(0)) * vlenb; 136 133 } else {
+46 -3
arch/riscv/kvm/vm.c
··· 199 199 r = KVM_USER_MEM_SLOTS; 200 200 break; 201 201 case KVM_CAP_VM_GPA_BITS: 202 - r = kvm_riscv_gstage_gpa_bits; 202 + if (!kvm) 203 + r = kvm_riscv_gstage_gpa_bits(kvm_riscv_gstage_max_pgd_levels); 204 + else 205 + r = kvm_riscv_gstage_gpa_bits(kvm->arch.pgd_levels); 203 206 break; 204 207 default: 205 208 r = 0; ··· 214 211 215 212 int kvm_vm_ioctl_enable_cap(struct kvm *kvm, struct kvm_enable_cap *cap) 216 213 { 214 + if (cap->flags) 215 + return -EINVAL; 216 + 217 217 switch (cap->cap) { 218 218 case KVM_CAP_RISCV_MP_STATE_RESET: 219 - if (cap->flags) 220 - return -EINVAL; 221 219 kvm->arch.mp_state_reset = true; 222 220 return 0; 221 + case KVM_CAP_VM_GPA_BITS: { 222 + unsigned long gpa_bits = cap->args[0]; 223 + unsigned long new_levels; 224 + int r = 0; 225 + 226 + /* Decide target pgd levels from requested gpa_bits */ 227 + #ifdef CONFIG_64BIT 228 + if (gpa_bits <= 41) 229 + new_levels = 3; /* Sv39x4 */ 230 + else if (gpa_bits <= 50) 231 + new_levels = 4; /* Sv48x4 */ 232 + else if (gpa_bits <= 59) 233 + new_levels = 5; /* Sv57x4 */ 234 + else 235 + return -EINVAL; 236 + #else 237 + /* 32-bit: only Sv32x4*/ 238 + if (gpa_bits <= 34) 239 + new_levels = 2; 240 + else 241 + return -EINVAL; 242 + #endif 243 + if (new_levels > kvm_riscv_gstage_max_pgd_levels) 244 + return -EINVAL; 245 + 246 + /* Follow KVM's lock ordering: kvm->lock -> kvm->slots_lock. */ 247 + mutex_lock(&kvm->lock); 248 + mutex_lock(&kvm->slots_lock); 249 + 250 + if (kvm->created_vcpus || !kvm_are_all_memslots_empty(kvm)) 251 + r = -EBUSY; 252 + else 253 + kvm->arch.pgd_levels = new_levels; 254 + 255 + mutex_unlock(&kvm->slots_lock); 256 + mutex_unlock(&kvm->lock); 257 + 258 + return r; 259 + } 223 260 default: 224 261 return -EINVAL; 225 262 }
+2 -1
arch/riscv/kvm/vmid.c
··· 26 26 void __init kvm_riscv_gstage_vmid_detect(void) 27 27 { 28 28 /* Figure-out number of VMID bits in HW */ 29 - csr_write(CSR_HGATP, (kvm_riscv_gstage_mode << HGATP_MODE_SHIFT) | HGATP_VMID); 29 + csr_write(CSR_HGATP, (kvm_riscv_gstage_mode(kvm_riscv_gstage_max_pgd_levels) << 30 + HGATP_MODE_SHIFT) | HGATP_VMID); 30 31 vmid_bits = csr_read(CSR_HGATP); 31 32 vmid_bits = (vmid_bits & HGATP_VMID) >> HGATP_VMID_SHIFT; 32 33 vmid_bits = fls_long(vmid_bits);
+2
tools/testing/selftests/kvm/include/kvm_util_types.h
··· 17 17 typedef uint64_t vm_paddr_t; /* Virtual Machine (Guest) physical address */ 18 18 typedef uint64_t vm_vaddr_t; /* Virtual Machine (Guest) virtual address */ 19 19 20 + #define INVALID_GPA (~(uint64_t)0) 21 + 20 22 #endif /* SELFTEST_KVM_UTIL_TYPES_H */
+37
tools/testing/selftests/kvm/include/riscv/sbi.h
··· 97 97 SBI_PMU_HW_GENERAL_MAX, 98 98 }; 99 99 100 + enum sbi_pmu_fw_generic_events_t { 101 + SBI_PMU_FW_MISALIGNED_LOAD = 0, 102 + SBI_PMU_FW_MISALIGNED_STORE = 1, 103 + SBI_PMU_FW_ACCESS_LOAD = 2, 104 + SBI_PMU_FW_ACCESS_STORE = 3, 105 + SBI_PMU_FW_ILLEGAL_INSN = 4, 106 + SBI_PMU_FW_SET_TIMER = 5, 107 + SBI_PMU_FW_IPI_SENT = 6, 108 + SBI_PMU_FW_IPI_RCVD = 7, 109 + SBI_PMU_FW_FENCE_I_SENT = 8, 110 + SBI_PMU_FW_FENCE_I_RCVD = 9, 111 + SBI_PMU_FW_SFENCE_VMA_SENT = 10, 112 + SBI_PMU_FW_SFENCE_VMA_RCVD = 11, 113 + SBI_PMU_FW_SFENCE_VMA_ASID_SENT = 12, 114 + SBI_PMU_FW_SFENCE_VMA_ASID_RCVD = 13, 115 + 116 + SBI_PMU_FW_HFENCE_GVMA_SENT = 14, 117 + SBI_PMU_FW_HFENCE_GVMA_RCVD = 15, 118 + SBI_PMU_FW_HFENCE_GVMA_VMID_SENT = 16, 119 + SBI_PMU_FW_HFENCE_GVMA_VMID_RCVD = 17, 120 + 121 + SBI_PMU_FW_HFENCE_VVMA_SENT = 18, 122 + SBI_PMU_FW_HFENCE_VVMA_RCVD = 19, 123 + SBI_PMU_FW_HFENCE_VVMA_ASID_SENT = 20, 124 + SBI_PMU_FW_HFENCE_VVMA_ASID_RCVD = 21, 125 + SBI_PMU_FW_MAX, 126 + }; 127 + 128 + /* SBI PMU event types */ 129 + enum sbi_pmu_event_type { 130 + SBI_PMU_EVENT_TYPE_HW = 0x0, 131 + SBI_PMU_EVENT_TYPE_CACHE = 0x1, 132 + SBI_PMU_EVENT_TYPE_RAW = 0x2, 133 + SBI_PMU_EVENT_TYPE_RAW_V2 = 0x3, 134 + SBI_PMU_EVENT_TYPE_FW = 0xf, 135 + }; 136 + 100 137 /* SBI PMU counter types */ 101 138 enum sbi_pmu_ctr_type { 102 139 SBI_PMU_CTR_TYPE_HW = 0x0,
+5
tools/testing/selftests/kvm/lib/riscv/processor.c
··· 566 566 567 567 return val; 568 568 } 569 + 570 + bool kvm_arch_has_default_irqchip(void) 571 + { 572 + return kvm_check_cap(KVM_CAP_IRQCHIP); 573 + }
+19 -1
tools/testing/selftests/kvm/riscv/sbi_pmu_test.c
··· 436 436 struct sbiret ret; 437 437 int num_counters = 0, i; 438 438 union sbi_pmu_ctr_info ctrinfo; 439 + unsigned long fw_eidx; 439 440 440 441 probe = guest_sbi_probe_extension(SBI_EXT_PMU, &out_val); 441 442 GUEST_ASSERT(probe && out_val == 1); ··· 462 461 pmu_csr_read_num(ctrinfo.csr); 463 462 GUEST_ASSERT(illegal_handler_invoked); 464 463 } else if (ctrinfo.type == SBI_PMU_CTR_TYPE_FW) { 465 - read_fw_counter(i, ctrinfo); 464 + /* Read without configure should fail */ 465 + ret = sbi_ecall(SBI_EXT_PMU, SBI_EXT_PMU_COUNTER_FW_READ, 466 + i, 0, 0, 0, 0, 0); 467 + GUEST_ASSERT(ret.error == SBI_ERR_INVALID_PARAM); 468 + 469 + /* 470 + * Try to configure with a common firmware event. 471 + * If configuration succeeds, verify we can read it. 472 + */ 473 + fw_eidx = ((unsigned long)SBI_PMU_EVENT_TYPE_FW << 16) | 474 + SBI_PMU_FW_ACCESS_LOAD; 475 + 476 + ret = sbi_ecall(SBI_EXT_PMU, SBI_EXT_PMU_COUNTER_CFG_MATCH, 477 + i, 1, 0, fw_eidx, 0, 0); 478 + if (ret.error == 0) { 479 + GUEST_ASSERT(ret.value == i); 480 + read_fw_counter(i, ctrinfo); 481 + } 466 482 } 467 483 } 468 484
+82 -16
tools/testing/selftests/kvm/steal_time.c
··· 69 69 70 70 static void steal_time_init(struct kvm_vcpu *vcpu, uint32_t i) 71 71 { 72 - int ret; 73 - 74 72 /* ST_GPA_BASE is identity mapped */ 75 73 st_gva[i] = (void *)(ST_GPA_BASE + i * STEAL_TIME_SIZE); 76 74 sync_global_to_guest(vcpu->vm, st_gva[i]); 77 - 78 - ret = _vcpu_set_msr(vcpu, MSR_KVM_STEAL_TIME, 79 - (ulong)st_gva[i] | KVM_STEAL_RESERVED_MASK); 80 - TEST_ASSERT(ret == 0, "Bad GPA didn't fail"); 81 75 82 76 vcpu_set_msr(vcpu, MSR_KVM_STEAL_TIME, (ulong)st_gva[i] | KVM_MSR_ENABLED); 83 77 } ··· 91 97 st->pad[0], st->pad[1], st->pad[2], st->pad[3], 92 98 st->pad[4], st->pad[5], st->pad[6], st->pad[7], 93 99 st->pad[8], st->pad[9], st->pad[10]); 100 + } 101 + 102 + static void check_steal_time_uapi(void) 103 + { 104 + struct kvm_vm *vm; 105 + struct kvm_vcpu *vcpu; 106 + int ret; 107 + 108 + vm = vm_create_with_one_vcpu(&vcpu, NULL); 109 + 110 + ret = _vcpu_set_msr(vcpu, MSR_KVM_STEAL_TIME, 111 + (ulong)ST_GPA_BASE | KVM_STEAL_RESERVED_MASK); 112 + TEST_ASSERT(ret == 0, "Bad GPA didn't fail"); 113 + 114 + kvm_vm_free(vm); 94 115 } 95 116 96 117 #elif defined(__aarch64__) ··· 179 170 { 180 171 struct kvm_vm *vm = vcpu->vm; 181 172 uint64_t st_ipa; 182 - int ret; 183 173 184 174 struct kvm_device_attr dev = { 185 175 .group = KVM_ARM_VCPU_PVTIME_CTRL, ··· 186 178 .addr = (uint64_t)&st_ipa, 187 179 }; 188 180 189 - vcpu_ioctl(vcpu, KVM_HAS_DEVICE_ATTR, &dev); 190 - 191 181 /* ST_GPA_BASE is identity mapped */ 192 182 st_gva[i] = (void *)(ST_GPA_BASE + i * STEAL_TIME_SIZE); 193 183 sync_global_to_guest(vm, st_gva[i]); 194 184 195 - st_ipa = (ulong)st_gva[i] | 1; 196 - ret = __vcpu_ioctl(vcpu, KVM_SET_DEVICE_ATTR, &dev); 197 - TEST_ASSERT(ret == -1 && errno == EINVAL, "Bad IPA didn't report EINVAL"); 198 - 199 185 st_ipa = (ulong)st_gva[i]; 200 186 vcpu_ioctl(vcpu, KVM_SET_DEVICE_ATTR, &dev); 201 - 202 - ret = __vcpu_ioctl(vcpu, KVM_SET_DEVICE_ATTR, &dev); 203 - TEST_ASSERT(ret == -1 && errno == EEXIST, "Set IPA twice without EEXIST"); 204 187 } 205 188 206 189 static void steal_time_dump(struct kvm_vm *vm, uint32_t vcpu_idx) ··· 202 203 ksft_print_msg(" rev: %d\n", st->rev); 203 204 ksft_print_msg(" attr: %d\n", st->attr); 204 205 ksft_print_msg(" st_time: %ld\n", st->st_time); 206 + } 207 + 208 + static void check_steal_time_uapi(void) 209 + { 210 + struct kvm_vm *vm; 211 + struct kvm_vcpu *vcpu; 212 + uint64_t st_ipa; 213 + int ret; 214 + 215 + vm = vm_create_with_one_vcpu(&vcpu, NULL); 216 + 217 + struct kvm_device_attr dev = { 218 + .group = KVM_ARM_VCPU_PVTIME_CTRL, 219 + .attr = KVM_ARM_VCPU_PVTIME_IPA, 220 + .addr = (uint64_t)&st_ipa, 221 + }; 222 + 223 + vcpu_ioctl(vcpu, KVM_HAS_DEVICE_ATTR, &dev); 224 + 225 + st_ipa = (ulong)ST_GPA_BASE | 1; 226 + ret = __vcpu_ioctl(vcpu, KVM_SET_DEVICE_ATTR, &dev); 227 + TEST_ASSERT(ret == -1 && errno == EINVAL, "Bad IPA didn't report EINVAL"); 228 + 229 + st_ipa = (ulong)ST_GPA_BASE; 230 + vcpu_ioctl(vcpu, KVM_SET_DEVICE_ATTR, &dev); 231 + 232 + ret = __vcpu_ioctl(vcpu, KVM_SET_DEVICE_ATTR, &dev); 233 + TEST_ASSERT(ret == -1 && errno == EEXIST, "Set IPA twice without EEXIST"); 234 + 235 + kvm_vm_free(vm); 205 236 } 206 237 207 238 #elif defined(__riscv) ··· 328 299 for (i = 0; i < 47; ++i) 329 300 pr_info("%d", st->pad[i]); 330 301 pr_info("\n"); 302 + } 303 + 304 + static void check_steal_time_uapi(void) 305 + { 306 + struct kvm_vm *vm; 307 + struct kvm_vcpu *vcpu; 308 + struct kvm_one_reg reg; 309 + uint64_t shmem; 310 + int ret; 311 + 312 + vm = vm_create_with_one_vcpu(&vcpu, NULL); 313 + 314 + reg.id = KVM_REG_RISCV | 315 + KVM_REG_SIZE_ULONG | 316 + KVM_REG_RISCV_SBI_STATE | 317 + KVM_REG_RISCV_SBI_STA | 318 + KVM_REG_RISCV_SBI_STA_REG(shmem_lo); 319 + reg.addr = (uint64_t)&shmem; 320 + 321 + shmem = ST_GPA_BASE + 1; 322 + ret = __vcpu_ioctl(vcpu, KVM_SET_ONE_REG, &reg); 323 + TEST_ASSERT(ret == -1 && errno == EINVAL, 324 + "misaligned STA shmem returns -EINVAL"); 325 + 326 + shmem = ST_GPA_BASE; 327 + ret = __vcpu_ioctl(vcpu, KVM_SET_ONE_REG, &reg); 328 + TEST_ASSERT(ret == 0, 329 + "aligned STA shmem succeeds"); 330 + 331 + shmem = INVALID_GPA; 332 + ret = __vcpu_ioctl(vcpu, KVM_SET_ONE_REG, &reg); 333 + TEST_ASSERT(ret == 0, 334 + "all-ones for STA shmem succeeds"); 335 + 336 + kvm_vm_free(vm); 331 337 } 332 338 333 339 #elif defined(__loongarch__) ··· 528 464 ksft_print_header(); 529 465 TEST_REQUIRE(is_steal_time_supported(vcpus[0])); 530 466 ksft_set_plan(NR_VCPUS); 467 + 468 + check_steal_time_uapi(); 531 469 532 470 /* Run test on each VCPU */ 533 471 for (i = 0; i < NR_VCPUS; ++i) {