Merge tag 'riscv-for-linus-5.17-mw1' of git://git.kernel.org/pub/scm/linux/kernel/git/riscv/linux

+6 -6

Documentation/riscv/vm-layout.rst

··· 47 47 | Kernel-space virtual memory, shared between all processes: 48 48 ____________________________________________________________|___________________________________________________________ 49 49 | | | | 50 - ffffffc000000000 | -256 GB | ffffffc7ffffffff | 32 GB | kasan 51 - ffffffcefee00000 | -196 GB | ffffffcefeffffff | 2 MB | fixmap 52 - ffffffceff000000 | -196 GB | ffffffceffffffff | 16 MB | PCI io 53 - ffffffcf00000000 | -196 GB | ffffffcfffffffff | 4 GB | vmemmap 54 - ffffffd000000000 | -192 GB | ffffffdfffffffff | 64 GB | vmalloc/ioremap space 55 - ffffffe000000000 | -128 GB | ffffffff7fffffff | 124 GB | direct mapping of all physical memory 50 + ffffffc6fee00000 | -228 GB | ffffffc6feffffff | 2 MB | fixmap 51 + ffffffc6ff000000 | -228 GB | ffffffc6ffffffff | 16 MB | PCI io 52 + ffffffc700000000 | -228 GB | ffffffc7ffffffff | 4 GB | vmemmap 53 + ffffffc800000000 | -224 GB | ffffffd7ffffffff | 64 GB | vmalloc/ioremap space 54 + ffffffd800000000 | -160 GB | fffffff6ffffffff | 124 GB | direct mapping of all physical memory 55 + fffffff700000000 | -36 GB | fffffffeffffffff | 32 GB | kasan 56 56 __________________|____________|__________________|_________|____________________________________________________________ 57 57 | 58 58 |

+18 -34

arch/riscv/Kconfig

··· 147 147 Select if you want MMU-based virtualised addressing space 148 148 support by paged memory management. If unsure, say 'Y'. 149 149 150 - config VA_BITS 151 - int 152 - default 32 if 32BIT 153 - default 39 if 64BIT 154 - 155 - config PA_BITS 156 - int 157 - default 34 if 32BIT 158 - default 56 if 64BIT 159 - 160 150 config PAGE_OFFSET 161 151 hex 162 - default 0xC0000000 if 32BIT && MAXPHYSMEM_1GB 152 + default 0xC0000000 if 32BIT 163 153 default 0x80000000 if 64BIT && !MMU 164 - default 0xffffffff80000000 if 64BIT && MAXPHYSMEM_2GB 165 - default 0xffffffe000000000 if 64BIT && MAXPHYSMEM_128GB 154 + default 0xffffaf8000000000 if 64BIT 166 155 167 156 config KASAN_SHADOW_OFFSET 168 157 hex 169 158 depends on KASAN_GENERIC 170 - default 0xdfffffc800000000 if 64BIT 159 + default 0xdfffffff00000000 if 64BIT 171 160 default 0xffffffff if 32BIT 172 161 173 162 config ARCH_FLATMEM_ENABLE ··· 202 213 203 214 config PGTABLE_LEVELS 204 215 int 205 - default 3 if 64BIT 216 + default 4 if 64BIT 206 217 default 2 207 218 208 219 config LOCKDEP_SUPPORT ··· 259 270 config MODULE_SECTIONS 260 271 bool 261 272 select HAVE_MOD_ARCH_SPECIFIC 262 - 263 - choice 264 - prompt "Maximum Physical Memory" 265 - default MAXPHYSMEM_1GB if 32BIT 266 - default MAXPHYSMEM_2GB if 64BIT && CMODEL_MEDLOW 267 - default MAXPHYSMEM_128GB if 64BIT && CMODEL_MEDANY 268 - 269 - config MAXPHYSMEM_1GB 270 - depends on 32BIT 271 - bool "1GiB" 272 - config MAXPHYSMEM_2GB 273 - depends on 64BIT 274 - bool "2GiB" 275 - config MAXPHYSMEM_128GB 276 - depends on 64BIT && CMODEL_MEDANY 277 - bool "128GiB" 278 - endchoice 279 - 280 273 281 274 config SMP 282 275 bool "Symmetric Multi-Processing" ··· 363 392 364 393 config RISCV_SBI_V01 365 394 bool "SBI v0.1 support" 366 - default y 367 395 depends on RISCV_SBI 368 396 help 369 397 This config allows kernel to use SBI v0.1 APIs. This will be 370 398 deprecated in future once legacy M-mode software are no longer in use. 399 + 400 + config RISCV_BOOT_SPINWAIT 401 + bool "Spinwait booting method" 402 + depends on SMP 403 + default y 404 + help 405 + This enables support for booting Linux via spinwait method. In the 406 + spinwait method, all cores randomly jump to Linux. One of the cores 407 + gets chosen via lottery and all other keep spinning on a percpu 408 + variable. This method cannot support CPU hotplug and sparse hartid 409 + scheme. It should be only enabled for M-mode Linux or platforms relying 410 + on older firmware without SBI HSM extension. All other platforms should 411 + rely on ordered booting via SBI HSM extension which gets chosen 412 + dynamically at runtime if the firmware supports it. 371 413 372 414 config KEXEC 373 415 bool "Kexec system call"

+5

arch/riscv/boot/dts/sifive/hifive-unmatched-a00.dts

··· 39 39 clock-frequency = <RTCCLK_FREQ>; 40 40 clock-output-names = "rtcclk"; 41 41 }; 42 + 43 + gpio-poweroff { 44 + compatible = "gpio-poweroff"; 45 + gpios = <&gpio 2 GPIO_ACTIVE_LOW>; 46 + }; 42 47 }; 43 48 44 49 &uart0 {

-1

arch/riscv/configs/nommu_k210_defconfig

··· 29 29 CONFIG_SLOB=y 30 30 # CONFIG_MMU is not set 31 31 CONFIG_SOC_CANAAN=y 32 - CONFIG_MAXPHYSMEM_2GB=y 33 32 CONFIG_SMP=y 34 33 CONFIG_NR_CPUS=2 35 34 CONFIG_CMDLINE="earlycon console=ttySIF0"

-1

arch/riscv/configs/nommu_k210_sdcard_defconfig

··· 21 21 CONFIG_SLOB=y 22 22 # CONFIG_MMU is not set 23 23 CONFIG_SOC_CANAAN=y 24 - CONFIG_MAXPHYSMEM_2GB=y 25 24 CONFIG_SMP=y 26 25 CONFIG_NR_CPUS=2 27 26 CONFIG_CMDLINE="earlycon console=ttySIF0 rootdelay=2 root=/dev/mmcblk0p1 ro"

-2

arch/riscv/configs/nommu_virt_defconfig

··· 24 24 # CONFIG_VM_EVENT_COUNTERS is not set 25 25 # CONFIG_COMPAT_BRK is not set 26 26 CONFIG_SLOB=y 27 - # CONFIG_SLAB_MERGE_DEFAULT is not set 28 27 # CONFIG_MMU is not set 29 28 CONFIG_SOC_VIRT=y 30 - CONFIG_MAXPHYSMEM_2GB=y 31 29 CONFIG_SMP=y 32 30 CONFIG_CMDLINE="root=/dev/vda rw earlycon=uart8250,mmio,0x10000000,115200n8 console=ttyS0" 33 31 CONFIG_CMDLINE_FORCE=y

-2

arch/riscv/include/asm/cpu_ops.h

··· 40 40 41 41 extern const struct cpu_operations *cpu_ops[NR_CPUS]; 42 42 void __init cpu_set_ops(int cpu); 43 - void cpu_update_secondary_bootdata(unsigned int cpuid, 44 - struct task_struct *tidle); 45 43 46 44 #endif /* ifndef __ASM_CPU_OPS_H */

+25

arch/riscv/include/asm/cpu_ops_sbi.h

··· 1 + /* SPDX-License-Identifier: GPL-2.0-only */ 2 + /* 3 + * Copyright (c) 2021 by Rivos Inc. 4 + */ 5 + #ifndef __ASM_CPU_OPS_SBI_H 6 + #define __ASM_CPU_OPS_SBI_H 7 + 8 + #ifndef __ASSEMBLY__ 9 + #include <linux/init.h> 10 + #include <linux/sched.h> 11 + #include <linux/threads.h> 12 + 13 + /** 14 + * struct sbi_hart_boot_data - Hart specific boot used during booting and 15 + * cpu hotplug. 16 + * @task_ptr: A pointer to the hart specific tp 17 + * @stack_ptr: A pointer to the hart specific sp 18 + */ 19 + struct sbi_hart_boot_data { 20 + void *task_ptr; 21 + void *stack_ptr; 22 + }; 23 + #endif 24 + 25 + #endif /* ifndef __ASM_CPU_OPS_SBI_H */

+1 -2

arch/riscv/include/asm/csr.h

··· 40 40 #ifndef CONFIG_64BIT 41 41 #define SATP_PPN _AC(0x003FFFFF, UL) 42 42 #define SATP_MODE_32 _AC(0x80000000, UL) 43 - #define SATP_MODE SATP_MODE_32 44 43 #define SATP_ASID_BITS 9 45 44 #define SATP_ASID_SHIFT 22 46 45 #define SATP_ASID_MASK _AC(0x1FF, UL) 47 46 #else 48 47 #define SATP_PPN _AC(0x00000FFFFFFFFFFF, UL) 49 48 #define SATP_MODE_39 _AC(0x8000000000000000, UL) 50 - #define SATP_MODE SATP_MODE_39 49 + #define SATP_MODE_48 _AC(0x9000000000000000, UL) 51 50 #define SATP_ASID_BITS 16 52 51 #define SATP_ASID_SHIFT 44 53 52 #define SATP_ASID_MASK _AC(0xFFFF, UL)

+1

arch/riscv/include/asm/fixmap.h

··· 24 24 FIX_HOLE, 25 25 FIX_PTE, 26 26 FIX_PMD, 27 + FIX_PUD, 27 28 FIX_TEXT_POKE1, 28 29 FIX_TEXT_POKE0, 29 30 FIX_EARLYCON_MEM_BASE,

+8 -3

arch/riscv/include/asm/kasan.h

··· 27 27 */ 28 28 #define KASAN_SHADOW_SCALE_SHIFT 3 29 29 30 - #define KASAN_SHADOW_SIZE (UL(1) << ((CONFIG_VA_BITS - 1) - KASAN_SHADOW_SCALE_SHIFT)) 31 - #define KASAN_SHADOW_START KERN_VIRT_START 32 - #define KASAN_SHADOW_END (KASAN_SHADOW_START + KASAN_SHADOW_SIZE) 30 + #define KASAN_SHADOW_SIZE (UL(1) << ((VA_BITS - 1) - KASAN_SHADOW_SCALE_SHIFT)) 31 + /* 32 + * Depending on the size of the virtual address space, the region may not be 33 + * aligned on PGDIR_SIZE, so force its alignment to ease its population. 34 + */ 35 + #define KASAN_SHADOW_START ((KASAN_SHADOW_END - KASAN_SHADOW_SIZE) & PGDIR_MASK) 36 + #define KASAN_SHADOW_END MODULES_LOWEST_VADDR 33 37 #define KASAN_SHADOW_OFFSET _AC(CONFIG_KASAN_SHADOW_OFFSET, UL) 34 38 35 39 void kasan_init(void); 36 40 asmlinkage void kasan_early_init(void); 41 + void kasan_swapper_init(void); 37 42 38 43 #endif 39 44 #endif

+14 -2

arch/riscv/include/asm/page.h

··· 31 31 * When not using MMU this corresponds to the first free page in 32 32 * physical memory (aligned on a page boundary). 33 33 */ 34 + #ifdef CONFIG_64BIT 35 + #ifdef CONFIG_MMU 36 + #define PAGE_OFFSET kernel_map.page_offset 37 + #else 34 38 #define PAGE_OFFSET _AC(CONFIG_PAGE_OFFSET, UL) 35 - 36 - #define KERN_VIRT_SIZE (-PAGE_OFFSET) 39 + #endif 40 + /* 41 + * By default, CONFIG_PAGE_OFFSET value corresponds to SV48 address space so 42 + * define the PAGE_OFFSET value for SV39. 43 + */ 44 + #define PAGE_OFFSET_L3 _AC(0xffffffd800000000, UL) 45 + #else 46 + #define PAGE_OFFSET _AC(CONFIG_PAGE_OFFSET, UL) 47 + #endif /* CONFIG_64BIT */ 37 48 38 49 #ifndef __ASSEMBLY__ 39 50 ··· 97 86 #endif /* CONFIG_MMU */ 98 87 99 88 struct kernel_mapping { 89 + unsigned long page_offset; 100 90 unsigned long virt_addr; 101 91 uintptr_t phys_addr; 102 92 uintptr_t size;

+40

arch/riscv/include/asm/pgalloc.h

··· 11 11 #include <asm/tlb.h> 12 12 13 13 #ifdef CONFIG_MMU 14 + #define __HAVE_ARCH_PUD_ALLOC_ONE 15 + #define __HAVE_ARCH_PUD_FREE 14 16 #include <asm-generic/pgalloc.h> 15 17 16 18 static inline void pmd_populate_kernel(struct mm_struct *mm, ··· 38 36 39 37 set_pud(pud, __pud((pfn << _PAGE_PFN_SHIFT) | _PAGE_TABLE)); 40 38 } 39 + 40 + static inline void p4d_populate(struct mm_struct *mm, p4d_t *p4d, pud_t *pud) 41 + { 42 + if (pgtable_l4_enabled) { 43 + unsigned long pfn = virt_to_pfn(pud); 44 + 45 + set_p4d(p4d, __p4d((pfn << _PAGE_PFN_SHIFT) | _PAGE_TABLE)); 46 + } 47 + } 48 + 49 + static inline void p4d_populate_safe(struct mm_struct *mm, p4d_t *p4d, 50 + pud_t *pud) 51 + { 52 + if (pgtable_l4_enabled) { 53 + unsigned long pfn = virt_to_pfn(pud); 54 + 55 + set_p4d_safe(p4d, 56 + __p4d((pfn << _PAGE_PFN_SHIFT) | _PAGE_TABLE)); 57 + } 58 + } 59 + 60 + #define pud_alloc_one pud_alloc_one 61 + static inline pud_t *pud_alloc_one(struct mm_struct *mm, unsigned long addr) 62 + { 63 + if (pgtable_l4_enabled) 64 + return __pud_alloc_one(mm, addr); 65 + 66 + return NULL; 67 + } 68 + 69 + #define pud_free pud_free 70 + static inline void pud_free(struct mm_struct *mm, pud_t *pud) 71 + { 72 + if (pgtable_l4_enabled) 73 + __pud_free(mm, pud); 74 + } 75 + 76 + #define __pud_free_tlb(tlb, pud, addr) pud_free((tlb)->mm, pud) 41 77 #endif /* __PAGETABLE_PMD_FOLDED */ 42 78 43 79 static inline pgd_t *pgd_alloc(struct mm_struct *mm)

+107 -1

arch/riscv/include/asm/pgtable-64.h

··· 8 8 9 9 #include <linux/const.h> 10 10 11 - #define PGDIR_SHIFT 30 11 + extern bool pgtable_l4_enabled; 12 + 13 + #define PGDIR_SHIFT_L3 30 14 + #define PGDIR_SHIFT_L4 39 15 + #define PGDIR_SIZE_L3 (_AC(1, UL) << PGDIR_SHIFT_L3) 16 + 17 + #define PGDIR_SHIFT (pgtable_l4_enabled ? PGDIR_SHIFT_L4 : PGDIR_SHIFT_L3) 12 18 /* Size of region mapped by a page global directory */ 13 19 #define PGDIR_SIZE (_AC(1, UL) << PGDIR_SHIFT) 14 20 #define PGDIR_MASK (~(PGDIR_SIZE - 1)) 21 + 22 + /* pud is folded into pgd in case of 3-level page table */ 23 + #define PUD_SHIFT 30 24 + #define PUD_SIZE (_AC(1, UL) << PUD_SHIFT) 25 + #define PUD_MASK (~(PUD_SIZE - 1)) 15 26 16 27 #define PMD_SHIFT 21 17 28 /* Size of region mapped by a page middle directory */ 18 29 #define PMD_SIZE (_AC(1, UL) << PMD_SHIFT) 19 30 #define PMD_MASK (~(PMD_SIZE - 1)) 31 + 32 + /* Page Upper Directory entry */ 33 + typedef struct { 34 + unsigned long pud; 35 + } pud_t; 36 + 37 + #define pud_val(x) ((x).pud) 38 + #define __pud(x) ((pud_t) { (x) }) 39 + #define PTRS_PER_PUD (PAGE_SIZE / sizeof(pud_t)) 20 40 21 41 /* Page Middle Directory entry */ 22 42 typedef struct { ··· 79 59 set_pud(pudp, __pud(0)); 80 60 } 81 61 62 + static inline pud_t pfn_pud(unsigned long pfn, pgprot_t prot) 63 + { 64 + return __pud((pfn << _PAGE_PFN_SHIFT) | pgprot_val(prot)); 65 + } 66 + 67 + static inline unsigned long _pud_pfn(pud_t pud) 68 + { 69 + return pud_val(pud) >> _PAGE_PFN_SHIFT; 70 + } 71 + 82 72 static inline pmd_t *pud_pgtable(pud_t pud) 83 73 { 84 74 return (pmd_t *)pfn_to_virt(pud_val(pud) >> _PAGE_PFN_SHIFT); ··· 98 68 { 99 69 return pfn_to_page(pud_val(pud) >> _PAGE_PFN_SHIFT); 100 70 } 71 + 72 + #define mm_pud_folded mm_pud_folded 73 + static inline bool mm_pud_folded(struct mm_struct *mm) 74 + { 75 + if (pgtable_l4_enabled) 76 + return false; 77 + 78 + return true; 79 + } 80 + 81 + #define pmd_index(addr) (((addr) >> PMD_SHIFT) & (PTRS_PER_PMD - 1)) 101 82 102 83 static inline pmd_t pfn_pmd(unsigned long pfn, pgprot_t prot) 103 84 { ··· 124 83 125 84 #define pmd_ERROR(e) \ 126 85 pr_err("%s:%d: bad pmd %016lx.\n", __FILE__, __LINE__, pmd_val(e)) 86 + 87 + #define pud_ERROR(e) \ 88 + pr_err("%s:%d: bad pud %016lx.\n", __FILE__, __LINE__, pud_val(e)) 89 + 90 + static inline void set_p4d(p4d_t *p4dp, p4d_t p4d) 91 + { 92 + if (pgtable_l4_enabled) 93 + *p4dp = p4d; 94 + else 95 + set_pud((pud_t *)p4dp, (pud_t){ p4d_val(p4d) }); 96 + } 97 + 98 + static inline int p4d_none(p4d_t p4d) 99 + { 100 + if (pgtable_l4_enabled) 101 + return (p4d_val(p4d) == 0); 102 + 103 + return 0; 104 + } 105 + 106 + static inline int p4d_present(p4d_t p4d) 107 + { 108 + if (pgtable_l4_enabled) 109 + return (p4d_val(p4d) & _PAGE_PRESENT); 110 + 111 + return 1; 112 + } 113 + 114 + static inline int p4d_bad(p4d_t p4d) 115 + { 116 + if (pgtable_l4_enabled) 117 + return !p4d_present(p4d); 118 + 119 + return 0; 120 + } 121 + 122 + static inline void p4d_clear(p4d_t *p4d) 123 + { 124 + if (pgtable_l4_enabled) 125 + set_p4d(p4d, __p4d(0)); 126 + } 127 + 128 + static inline pud_t *p4d_pgtable(p4d_t p4d) 129 + { 130 + if (pgtable_l4_enabled) 131 + return (pud_t *)pfn_to_virt(p4d_val(p4d) >> _PAGE_PFN_SHIFT); 132 + 133 + return (pud_t *)pud_pgtable((pud_t) { p4d_val(p4d) }); 134 + } 135 + 136 + static inline struct page *p4d_page(p4d_t p4d) 137 + { 138 + return pfn_to_page(p4d_val(p4d) >> _PAGE_PFN_SHIFT); 139 + } 140 + 141 + #define pud_index(addr) (((addr) >> PUD_SHIFT) & (PTRS_PER_PUD - 1)) 142 + 143 + #define pud_offset pud_offset 144 + static inline pud_t *pud_offset(p4d_t *p4d, unsigned long address) 145 + { 146 + if (pgtable_l4_enabled) 147 + return p4d_pgtable(*p4d) + pud_index(address); 148 + 149 + return (pud_t *)p4d; 150 + } 127 151 128 152 #endif /* _ASM_RISCV_PGTABLE_64_H */

+52 -13

arch/riscv/include/asm/pgtable.h

··· 24 24 #define KERNEL_LINK_ADDR PAGE_OFFSET 25 25 #endif 26 26 27 + /* Number of entries in the page global directory */ 28 + #define PTRS_PER_PGD (PAGE_SIZE / sizeof(pgd_t)) 29 + /* Number of entries in the page table */ 30 + #define PTRS_PER_PTE (PAGE_SIZE / sizeof(pte_t)) 31 + 32 + /* 33 + * Half of the kernel address space (half of the entries of the page global 34 + * directory) is for the direct mapping. 35 + */ 36 + #define KERN_VIRT_SIZE ((PTRS_PER_PGD / 2 * PGDIR_SIZE) / 2) 37 + 27 38 #define VMALLOC_SIZE (KERN_VIRT_SIZE >> 1) 28 39 #define VMALLOC_END PAGE_OFFSET 29 40 #define VMALLOC_START (PAGE_OFFSET - VMALLOC_SIZE) ··· 50 39 51 40 /* Modules always live before the kernel */ 52 41 #ifdef CONFIG_64BIT 53 - #define MODULES_VADDR (PFN_ALIGN((unsigned long)&_end) - SZ_2G) 54 - #define MODULES_END (PFN_ALIGN((unsigned long)&_start)) 42 + /* This is used to define the end of the KASAN shadow region */ 43 + #define MODULES_LOWEST_VADDR (KERNEL_LINK_ADDR - SZ_2G) 44 + #define MODULES_VADDR (PFN_ALIGN((unsigned long)&_end) - SZ_2G) 45 + #define MODULES_END (PFN_ALIGN((unsigned long)&_start)) 55 46 #endif 56 47 57 48 /* ··· 61 48 * struct pages to map half the virtual address space. Then 62 49 * position vmemmap directly below the VMALLOC region. 63 50 */ 51 + #ifdef CONFIG_64BIT 52 + #define VA_BITS (pgtable_l4_enabled ? 48 : 39) 53 + #else 54 + #define VA_BITS 32 55 + #endif 56 + 64 57 #define VMEMMAP_SHIFT \ 65 - (CONFIG_VA_BITS - PAGE_SHIFT - 1 + STRUCT_PAGE_MAX_SHIFT) 58 + (VA_BITS - PAGE_SHIFT - 1 + STRUCT_PAGE_MAX_SHIFT) 66 59 #define VMEMMAP_SIZE BIT(VMEMMAP_SHIFT) 67 60 #define VMEMMAP_END VMALLOC_START 68 61 #define VMEMMAP_START (VMALLOC_START - VMEMMAP_SIZE) ··· 102 83 103 84 #ifndef __ASSEMBLY__ 104 85 105 - /* Page Upper Directory not used in RISC-V */ 106 - #include <asm-generic/pgtable-nopud.h> 86 + #include <asm-generic/pgtable-nop4d.h> 107 87 #include <asm/page.h> 108 88 #include <asm/tlbflush.h> 109 89 #include <linux/mm_types.h> ··· 125 107 #define XIP_FIXUP(addr) (addr) 126 108 #endif /* CONFIG_XIP_KERNEL */ 127 109 128 - #ifdef CONFIG_MMU 129 - /* Number of entries in the page global directory */ 130 - #define PTRS_PER_PGD (PAGE_SIZE / sizeof(pgd_t)) 131 - /* Number of entries in the page table */ 132 - #define PTRS_PER_PTE (PAGE_SIZE / sizeof(pte_t)) 110 + struct pt_alloc_ops { 111 + pte_t *(*get_pte_virt)(phys_addr_t pa); 112 + phys_addr_t (*alloc_pte)(uintptr_t va); 113 + #ifndef __PAGETABLE_PMD_FOLDED 114 + pmd_t *(*get_pmd_virt)(phys_addr_t pa); 115 + phys_addr_t (*alloc_pmd)(uintptr_t va); 116 + pud_t *(*get_pud_virt)(phys_addr_t pa); 117 + phys_addr_t (*alloc_pud)(uintptr_t va); 118 + #endif 119 + }; 133 120 121 + extern struct pt_alloc_ops pt_ops __initdata; 122 + 123 + #ifdef CONFIG_MMU 134 124 /* Number of PGD entries that a user-mode program can use */ 135 125 #define USER_PTRS_PER_PGD (TASK_SIZE / PGDIR_SIZE) 136 126 ··· 685 659 * and give the kernel the other (upper) half. 686 660 */ 687 661 #ifdef CONFIG_64BIT 688 - #define KERN_VIRT_START (-(BIT(CONFIG_VA_BITS)) + TASK_SIZE) 662 + #define KERN_VIRT_START (-(BIT(VA_BITS)) + TASK_SIZE) 689 663 #else 690 664 #define KERN_VIRT_START FIXADDR_START 691 665 #endif ··· 693 667 /* 694 668 * Task size is 0x4000000000 for RV64 or 0x9fc00000 for RV32. 695 669 * Note that PGDIR_SIZE must evenly divide TASK_SIZE. 670 + * Task size is: 671 + * - 0x9fc00000 (~2.5GB) for RV32. 672 + * - 0x4000000000 ( 256GB) for RV64 using SV39 mmu 673 + * - 0x800000000000 ( 128TB) for RV64 using SV48 mmu 674 + * 675 + * Note that PGDIR_SIZE must evenly divide TASK_SIZE since "RISC-V 676 + * Instruction Set Manual Volume II: Privileged Architecture" states that 677 + * "load and store effective addresses, which are 64bits, must have bits 678 + * 63–48 all equal to bit 47, or else a page-fault exception will occur." 696 679 */ 697 680 #ifdef CONFIG_64BIT 698 - #define TASK_SIZE (PGDIR_SIZE * PTRS_PER_PGD / 2) 681 + #define TASK_SIZE (PGDIR_SIZE * PTRS_PER_PGD / 2) 682 + #define TASK_SIZE_MIN (PGDIR_SIZE_L3 * PTRS_PER_PGD / 2) 699 683 #else 700 - #define TASK_SIZE FIXADDR_START 684 + #define TASK_SIZE FIXADDR_START 685 + #define TASK_SIZE_MIN TASK_SIZE 701 686 #endif 702 687 703 688 #else /* CONFIG_MMU */ ··· 734 697 #define dtb_early_va _dtb_early_va 735 698 #define dtb_early_pa _dtb_early_pa 736 699 #endif /* CONFIG_XIP_KERNEL */ 700 + extern u64 satp_mode; 701 + extern bool pgtable_l4_enabled; 737 702 738 703 void paging_init(void); 739 704 void misc_mem_init(void);

+10 -9

arch/riscv/include/asm/sbi.h

··· 8 8 #define _ASM_RISCV_SBI_H 9 9 10 10 #include <linux/types.h> 11 + #include <linux/cpumask.h> 11 12 12 13 #ifdef CONFIG_RISCV_SBI 13 14 enum sbi_ext_id { ··· 129 128 void sbi_set_timer(uint64_t stime_value); 130 129 void sbi_shutdown(void); 131 130 void sbi_clear_ipi(void); 132 - int sbi_send_ipi(const unsigned long *hart_mask); 133 - int sbi_remote_fence_i(const unsigned long *hart_mask); 134 - int sbi_remote_sfence_vma(const unsigned long *hart_mask, 131 + int sbi_send_ipi(const struct cpumask *cpu_mask); 132 + int sbi_remote_fence_i(const struct cpumask *cpu_mask); 133 + int sbi_remote_sfence_vma(const struct cpumask *cpu_mask, 135 134 unsigned long start, 136 135 unsigned long size); 137 136 138 - int sbi_remote_sfence_vma_asid(const unsigned long *hart_mask, 137 + int sbi_remote_sfence_vma_asid(const struct cpumask *cpu_mask, 139 138 unsigned long start, 140 139 unsigned long size, 141 140 unsigned long asid); 142 - int sbi_remote_hfence_gvma(const unsigned long *hart_mask, 141 + int sbi_remote_hfence_gvma(const struct cpumask *cpu_mask, 143 142 unsigned long start, 144 143 unsigned long size); 145 - int sbi_remote_hfence_gvma_vmid(const unsigned long *hart_mask, 144 + int sbi_remote_hfence_gvma_vmid(const struct cpumask *cpu_mask, 146 145 unsigned long start, 147 146 unsigned long size, 148 147 unsigned long vmid); 149 - int sbi_remote_hfence_vvma(const unsigned long *hart_mask, 148 + int sbi_remote_hfence_vvma(const struct cpumask *cpu_mask, 150 149 unsigned long start, 151 150 unsigned long size); 152 - int sbi_remote_hfence_vvma_asid(const unsigned long *hart_mask, 151 + int sbi_remote_hfence_vvma_asid(const struct cpumask *cpu_mask, 153 152 unsigned long start, 154 153 unsigned long size, 155 154 unsigned long asid); ··· 184 183 185 184 int sbi_err_map_linux_errno(int err); 186 185 #else /* CONFIG_RISCV_SBI */ 187 - static inline int sbi_remote_fence_i(const unsigned long *hart_mask) { return -1; } 186 + static inline int sbi_remote_fence_i(const struct cpumask *cpu_mask) { return -1; } 188 187 static inline void sbi_init(void) {} 189 188 #endif /* CONFIG_RISCV_SBI */ 190 189 #endif /* _ASM_RISCV_SBI_H */

-2

arch/riscv/include/asm/smp.h

··· 92 92 93 93 #endif /* CONFIG_SMP */ 94 94 95 - void riscv_cpuid_to_hartid_mask(const struct cpumask *in, struct cpumask *out); 96 - 97 95 #if defined(CONFIG_HOTPLUG_CPU) && (CONFIG_SMP) 98 96 bool cpu_has_hotplug(unsigned int cpu); 99 97 #else

+5 -1

arch/riscv/include/asm/sparsemem.h

··· 4 4 #define _ASM_RISCV_SPARSEMEM_H 5 5 6 6 #ifdef CONFIG_SPARSEMEM 7 - #define MAX_PHYSMEM_BITS CONFIG_PA_BITS 7 + #ifdef CONFIG_64BIT 8 + #define MAX_PHYSMEM_BITS 56 9 + #else 10 + #define MAX_PHYSMEM_BITS 34 11 + #endif /* CONFIG_64BIT */ 8 12 #define SECTION_SIZE_BITS 27 9 13 #endif /* CONFIG_SPARSEMEM */ 10 14

+2 -1

arch/riscv/kernel/Makefile

··· 43 43 obj-$(CONFIG_SMP) += smpboot.o 44 44 obj-$(CONFIG_SMP) += smp.o 45 45 obj-$(CONFIG_SMP) += cpu_ops.o 46 - obj-$(CONFIG_SMP) += cpu_ops_spinwait.o 46 + 47 + obj-$(CONFIG_RISCV_BOOT_SPINWAIT) += cpu_ops_spinwait.o 47 48 obj-$(CONFIG_MODULES) += module.o 48 49 obj-$(CONFIG_MODULE_SECTIONS) += module-sections.o 49 50

+3

arch/riscv/kernel/asm-offsets.c

··· 12 12 #include <asm/kvm_host.h> 13 13 #include <asm/thread_info.h> 14 14 #include <asm/ptrace.h> 15 + #include <asm/cpu_ops_sbi.h> 15 16 16 17 void asm_offsets(void); 17 18 ··· 469 468 DEFINE(PT_SIZE_ON_STACK, ALIGN(sizeof(struct pt_regs), STACK_ALIGN)); 470 469 471 470 OFFSET(KERNEL_MAP_VIRT_ADDR, kernel_mapping, virt_addr); 471 + OFFSET(SBI_HART_BOOT_TASK_PTR_OFFSET, sbi_hart_boot_data, task_ptr); 472 + OFFSET(SBI_HART_BOOT_STACK_PTR_OFFSET, sbi_hart_boot_data, stack_ptr); 472 473 }

+14 -13

arch/riscv/kernel/cpu.c

··· 7 7 #include <linux/seq_file.h> 8 8 #include <linux/of.h> 9 9 #include <asm/smp.h> 10 + #include <asm/pgtable.h> 10 11 11 12 /* 12 13 * Returns the hart ID of the given device tree node, or -ENODEV if the node ··· 72 71 seq_puts(f, "\n"); 73 72 } 74 73 75 - static void print_mmu(struct seq_file *f, const char *mmu_type) 74 + static void print_mmu(struct seq_file *f) 76 75 { 77 - #if defined(CONFIG_32BIT) 78 - if (strcmp(mmu_type, "riscv,sv32") != 0) 79 - return; 80 - #elif defined(CONFIG_64BIT) 81 - if (strcmp(mmu_type, "riscv,sv39") != 0 && 82 - strcmp(mmu_type, "riscv,sv48") != 0) 83 - return; 84 - #endif 76 + char sv_type[16]; 85 77 86 - seq_printf(f, "mmu\t\t: %s\n", mmu_type+6); 78 + #if defined(CONFIG_32BIT) 79 + strncpy(sv_type, "sv32", 5); 80 + #elif defined(CONFIG_64BIT) 81 + if (pgtable_l4_enabled) 82 + strncpy(sv_type, "sv48", 5); 83 + else 84 + strncpy(sv_type, "sv39", 5); 85 + #endif 86 + seq_printf(f, "mmu\t\t: %s\n", sv_type); 87 87 } 88 88 89 89 static void *c_start(struct seq_file *m, loff_t *pos) ··· 109 107 { 110 108 unsigned long cpu_id = (unsigned long)v - 1; 111 109 struct device_node *node = of_get_cpu_node(cpu_id, NULL); 112 - const char *compat, *isa, *mmu; 110 + const char *compat, *isa; 113 111 114 112 seq_printf(m, "processor\t: %lu\n", cpu_id); 115 113 seq_printf(m, "hart\t\t: %lu\n", cpuid_to_hartid_map(cpu_id)); 116 114 if (!of_property_read_string(node, "riscv,isa", &isa)) 117 115 print_isa(m, isa); 118 - if (!of_property_read_string(node, "mmu-type", &mmu)) 119 - print_mmu(m, mmu); 116 + print_mmu(m); 120 117 if (!of_property_read_string(node, "compatible", &compat) 121 118 && strcmp(compat, "riscv")) 122 119 seq_printf(m, "uarch\t\t: %s\n", compat);

+9 -17

arch/riscv/kernel/cpu_ops.c

··· 8 8 #include <linux/of.h> 9 9 #include <linux/string.h> 10 10 #include <linux/sched.h> 11 - #include <linux/sched/task_stack.h> 12 11 #include <asm/cpu_ops.h> 13 12 #include <asm/sbi.h> 14 13 #include <asm/smp.h> 15 14 16 15 const struct cpu_operations *cpu_ops[NR_CPUS] __ro_after_init; 17 16 18 - void *__cpu_up_stack_pointer[NR_CPUS] __section(".data"); 19 - void *__cpu_up_task_pointer[NR_CPUS] __section(".data"); 20 - 21 17 extern const struct cpu_operations cpu_ops_sbi; 18 + #ifdef CONFIG_RISCV_BOOT_SPINWAIT 22 19 extern const struct cpu_operations cpu_ops_spinwait; 23 - 24 - void cpu_update_secondary_bootdata(unsigned int cpuid, 25 - struct task_struct *tidle) 26 - { 27 - int hartid = cpuid_to_hartid_map(cpuid); 28 - 29 - /* Make sure tidle is updated */ 30 - smp_mb(); 31 - WRITE_ONCE(__cpu_up_stack_pointer[hartid], 32 - task_stack_page(tidle) + THREAD_SIZE); 33 - WRITE_ONCE(__cpu_up_task_pointer[hartid], tidle); 34 - } 20 + #else 21 + const struct cpu_operations cpu_ops_spinwait = { 22 + .name = "", 23 + .cpu_prepare = NULL, 24 + .cpu_start = NULL, 25 + }; 26 + #endif 35 27 36 28 void __init cpu_set_ops(int cpuid) 37 29 { 38 30 #if IS_ENABLED(CONFIG_RISCV_SBI) 39 31 if (sbi_probe_extension(SBI_EXT_HSM) > 0) { 40 32 if (!cpuid) 41 - pr_info("SBI v0.2 HSM extension detected\n"); 33 + pr_info("SBI HSM extension detected\n"); 42 34 cpu_ops[cpuid] = &cpu_ops_sbi; 43 35 } else 44 36 #endif

+19 -5

arch/riscv/kernel/cpu_ops_sbi.c

··· 7 7 8 8 #include <linux/init.h> 9 9 #include <linux/mm.h> 10 + #include <linux/sched/task_stack.h> 10 11 #include <asm/cpu_ops.h> 12 + #include <asm/cpu_ops_sbi.h> 11 13 #include <asm/sbi.h> 12 14 #include <asm/smp.h> 13 15 14 16 extern char secondary_start_sbi[]; 15 17 const struct cpu_operations cpu_ops_sbi; 18 + 19 + /* 20 + * Ordered booting via HSM brings one cpu at a time. However, cpu hotplug can 21 + * be invoked from multiple threads in parallel. Define a per cpu data 22 + * to handle that. 23 + */ 24 + DEFINE_PER_CPU(struct sbi_hart_boot_data, boot_data); 16 25 17 26 static int sbi_hsm_hart_start(unsigned long hartid, unsigned long saddr, 18 27 unsigned long priv) ··· 64 55 65 56 static int sbi_cpu_start(unsigned int cpuid, struct task_struct *tidle) 66 57 { 67 - int rc; 68 58 unsigned long boot_addr = __pa_symbol(secondary_start_sbi); 69 59 int hartid = cpuid_to_hartid_map(cpuid); 60 + unsigned long hsm_data; 61 + struct sbi_hart_boot_data *bdata = &per_cpu(boot_data, cpuid); 70 62 71 - cpu_update_secondary_bootdata(cpuid, tidle); 72 - rc = sbi_hsm_hart_start(hartid, boot_addr, 0); 73 - 74 - return rc; 63 + /* Make sure tidle is updated */ 64 + smp_mb(); 65 + bdata->task_ptr = tidle; 66 + bdata->stack_ptr = task_stack_page(tidle) + THREAD_SIZE; 67 + /* Make sure boot data is updated */ 68 + smp_mb(); 69 + hsm_data = __pa(bdata); 70 + return sbi_hsm_hart_start(hartid, boot_addr, hsm_data); 75 71 } 76 72 77 73 static int sbi_cpu_prepare(unsigned int cpuid)

+26 -1

arch/riscv/kernel/cpu_ops_spinwait.c

··· 6 6 #include <linux/errno.h> 7 7 #include <linux/of.h> 8 8 #include <linux/string.h> 9 + #include <linux/sched/task_stack.h> 9 10 #include <asm/cpu_ops.h> 10 11 #include <asm/sbi.h> 11 12 #include <asm/smp.h> 12 13 13 14 const struct cpu_operations cpu_ops_spinwait; 15 + void *__cpu_spinwait_stack_pointer[NR_CPUS] __section(".data"); 16 + void *__cpu_spinwait_task_pointer[NR_CPUS] __section(".data"); 17 + 18 + static void cpu_update_secondary_bootdata(unsigned int cpuid, 19 + struct task_struct *tidle) 20 + { 21 + int hartid = cpuid_to_hartid_map(cpuid); 22 + 23 + /* 24 + * The hartid must be less than NR_CPUS to avoid out-of-bound access 25 + * errors for __cpu_spinwait_stack/task_pointer. That is not always possible 26 + * for platforms with discontiguous hartid numbering scheme. That's why 27 + * spinwait booting is not the recommended approach for any platforms 28 + * booting Linux in S-mode and can be disabled in the future. 29 + */ 30 + if (hartid == INVALID_HARTID || hartid >= NR_CPUS) 31 + return; 32 + 33 + /* Make sure tidle is updated */ 34 + smp_mb(); 35 + WRITE_ONCE(__cpu_spinwait_stack_pointer[hartid], 36 + task_stack_page(tidle) + THREAD_SIZE); 37 + WRITE_ONCE(__cpu_spinwait_task_pointer[hartid], tidle); 38 + } 14 39 15 40 static int spinwait_cpu_prepare(unsigned int cpuid) 16 41 { ··· 53 28 * selects the first cpu to boot the kernel and causes the remainder 54 29 * of the cpus to spin in a loop waiting for their stack pointer to be 55 30 * setup by that main cpu. Writing to bootdata 56 - * (i.e __cpu_up_stack_pointer) signals to the spinning cpus that they 31 + * (i.e __cpu_spinwait_stack_pointer) signals to the spinning cpus that they 57 32 * can continue the boot process. 58 33 */ 59 34 cpu_update_secondary_bootdata(cpuid, tidle);

+22 -16

arch/riscv/kernel/head.S

··· 11 11 #include <asm/page.h> 12 12 #include <asm/pgtable.h> 13 13 #include <asm/csr.h> 14 + #include <asm/cpu_ops_sbi.h> 14 15 #include <asm/hwcap.h> 15 16 #include <asm/image.h> 16 17 #include "efi-header.S" ··· 106 105 107 106 /* Compute satp for kernel page tables, but don't load it yet */ 108 107 srl a2, a0, PAGE_SHIFT 109 - li a1, SATP_MODE 108 + la a1, satp_mode 109 + REG_L a1, 0(a1) 110 110 or a2, a2, a1 111 111 112 112 /* ··· 169 167 la a3, .Lsecondary_park 170 168 csrw CSR_TVEC, a3 171 169 172 - slli a3, a0, LGREG 173 - la a4, __cpu_up_stack_pointer 174 - XIP_FIXUP_OFFSET a4 175 - la a5, __cpu_up_task_pointer 176 - XIP_FIXUP_OFFSET a5 177 - add a4, a3, a4 178 - add a5, a3, a5 179 - REG_L sp, (a4) 180 - REG_L tp, (a5) 170 + /* a0 contains the hartid & a1 contains boot data */ 171 + li a2, SBI_HART_BOOT_TASK_PTR_OFFSET 172 + XIP_FIXUP_OFFSET a2 173 + add a2, a2, a1 174 + REG_L tp, (a2) 175 + li a3, SBI_HART_BOOT_STACK_PTR_OFFSET 176 + XIP_FIXUP_OFFSET a3 177 + add a3, a3, a1 178 + REG_L sp, (a3) 181 179 182 180 .Lsecondary_start_common: 183 181 ··· 259 257 li t0, SR_FS 260 258 csrc CSR_STATUS, t0 261 259 262 - #ifdef CONFIG_SMP 260 + #ifdef CONFIG_RISCV_BOOT_SPINWAIT 263 261 li t0, CONFIG_NR_CPUS 264 262 blt a0, t0, .Lgood_cores 265 263 tail .Lsecondary_park 266 264 .Lgood_cores: 267 - #endif 268 265 266 + /* The lottery system is only required for spinwait booting method */ 269 267 #ifndef CONFIG_XIP_KERNEL 270 268 /* Pick one hart to run the main boot sequence */ 271 269 la a3, hart_lottery ··· 284 282 /* first time here if hart_lottery in RAM is not set */ 285 283 beq t0, t1, .Lsecondary_start 286 284 285 + #endif /* CONFIG_XIP */ 286 + #endif /* CONFIG_RISCV_BOOT_SPINWAIT */ 287 + 288 + #ifdef CONFIG_XIP_KERNEL 287 289 la sp, _end + THREAD_SIZE 288 290 XIP_FIXUP_OFFSET sp 289 291 mv s0, a0 ··· 344 338 call soc_early_init 345 339 tail start_kernel 346 340 341 + #if CONFIG_RISCV_BOOT_SPINWAIT 347 342 .Lsecondary_start: 348 - #ifdef CONFIG_SMP 349 343 /* Set trap vector to spin forever to help debug */ 350 344 la a3, .Lsecondary_park 351 345 csrw CSR_TVEC, a3 352 346 353 347 slli a3, a0, LGREG 354 - la a1, __cpu_up_stack_pointer 348 + la a1, __cpu_spinwait_stack_pointer 355 349 XIP_FIXUP_OFFSET a1 356 - la a2, __cpu_up_task_pointer 350 + la a2, __cpu_spinwait_task_pointer 357 351 XIP_FIXUP_OFFSET a2 358 352 add a1, a3, a1 359 353 add a2, a3, a2 ··· 371 365 fence 372 366 373 367 tail .Lsecondary_start_common 374 - #endif 368 + #endif /* CONFIG_RISCV_BOOT_SPINWAIT */ 375 369 376 370 END(_start_kernel) 377 371

+4 -2

arch/riscv/kernel/head.h

··· 16 16 asmlinkage void __init __copy_data(void); 17 17 #endif 18 18 19 - extern void *__cpu_up_stack_pointer[]; 20 - extern void *__cpu_up_task_pointer[]; 19 + #ifdef CONFIG_RISCV_BOOT_SPINWAIT 20 + extern void *__cpu_spinwait_stack_pointer[]; 21 + extern void *__cpu_spinwait_task_pointer[]; 22 + #endif 21 23 22 24 #endif /* __ASM_HEAD_H */

+1 -3

arch/riscv/kernel/ptrace.c

··· 42 42 unsigned int pos, unsigned int count, 43 43 const void *kbuf, const void __user *ubuf) 44 44 { 45 - int ret; 46 45 struct pt_regs *regs; 47 46 48 47 regs = task_pt_regs(target); 49 - ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf, regs, 0, -1); 50 - return ret; 48 + return user_regset_copyin(&pos, &count, &kbuf, &ubuf, regs, 0, -1); 51 49 } 52 50 53 51 #ifdef CONFIG_FPU

+106 -83

arch/riscv/kernel/sbi.c

··· 16 16 EXPORT_SYMBOL(sbi_spec_version); 17 17 18 18 static void (*__sbi_set_timer)(uint64_t stime) __ro_after_init; 19 - static int (*__sbi_send_ipi)(const unsigned long *hart_mask) __ro_after_init; 20 - static int (*__sbi_rfence)(int fid, const unsigned long *hart_mask, 19 + static int (*__sbi_send_ipi)(const struct cpumask *cpu_mask) __ro_after_init; 20 + static int (*__sbi_rfence)(int fid, const struct cpumask *cpu_mask, 21 21 unsigned long start, unsigned long size, 22 22 unsigned long arg4, unsigned long arg5) __ro_after_init; 23 23 ··· 67 67 EXPORT_SYMBOL(sbi_err_map_linux_errno); 68 68 69 69 #ifdef CONFIG_RISCV_SBI_V01 70 + static unsigned long __sbi_v01_cpumask_to_hartmask(const struct cpumask *cpu_mask) 71 + { 72 + unsigned long cpuid, hartid; 73 + unsigned long hmask = 0; 74 + 75 + /* 76 + * There is no maximum hartid concept in RISC-V and NR_CPUS must not be 77 + * associated with hartid. As SBI v0.1 is only kept for backward compatibility 78 + * and will be removed in the future, there is no point in supporting hartid 79 + * greater than BITS_PER_LONG (32 for RV32 and 64 for RV64). Ideally, SBI v0.2 80 + * should be used for platforms with hartid greater than BITS_PER_LONG. 81 + */ 82 + for_each_cpu(cpuid, cpu_mask) { 83 + hartid = cpuid_to_hartid_map(cpuid); 84 + if (hartid >= BITS_PER_LONG) { 85 + pr_warn("Unable to send any request to hartid > BITS_PER_LONG for SBI v0.1\n"); 86 + break; 87 + } 88 + hmask |= 1 << hartid; 89 + } 90 + 91 + return hmask; 92 + } 93 + 70 94 /** 71 95 * sbi_console_putchar() - Writes given character to the console device. 72 96 * @ch: The data to be written to the console. ··· 156 132 #endif 157 133 } 158 134 159 - static int __sbi_send_ipi_v01(const unsigned long *hart_mask) 135 + static int __sbi_send_ipi_v01(const struct cpumask *cpu_mask) 160 136 { 161 - sbi_ecall(SBI_EXT_0_1_SEND_IPI, 0, (unsigned long)hart_mask, 137 + unsigned long hart_mask; 138 + 139 + if (!cpu_mask) 140 + cpu_mask = cpu_online_mask; 141 + hart_mask = __sbi_v01_cpumask_to_hartmask(cpu_mask); 142 + 143 + sbi_ecall(SBI_EXT_0_1_SEND_IPI, 0, (unsigned long)(&hart_mask), 162 144 0, 0, 0, 0, 0); 163 145 return 0; 164 146 } 165 147 166 - static int __sbi_rfence_v01(int fid, const unsigned long *hart_mask, 148 + static int __sbi_rfence_v01(int fid, const struct cpumask *cpu_mask, 167 149 unsigned long start, unsigned long size, 168 150 unsigned long arg4, unsigned long arg5) 169 151 { 170 152 int result = 0; 153 + unsigned long hart_mask; 154 + 155 + if (!cpu_mask) 156 + cpu_mask = cpu_online_mask; 157 + hart_mask = __sbi_v01_cpumask_to_hartmask(cpu_mask); 171 158 172 159 /* v0.2 function IDs are equivalent to v0.1 extension IDs */ 173 160 switch (fid) { 174 161 case SBI_EXT_RFENCE_REMOTE_FENCE_I: 175 162 sbi_ecall(SBI_EXT_0_1_REMOTE_FENCE_I, 0, 176 - (unsigned long)hart_mask, 0, 0, 0, 0, 0); 163 + (unsigned long)&hart_mask, 0, 0, 0, 0, 0); 177 164 break; 178 165 case SBI_EXT_RFENCE_REMOTE_SFENCE_VMA: 179 166 sbi_ecall(SBI_EXT_0_1_REMOTE_SFENCE_VMA, 0, 180 - (unsigned long)hart_mask, start, size, 167 + (unsigned long)&hart_mask, start, size, 181 168 0, 0, 0); 182 169 break; 183 170 case SBI_EXT_RFENCE_REMOTE_SFENCE_VMA_ASID: 184 171 sbi_ecall(SBI_EXT_0_1_REMOTE_SFENCE_VMA_ASID, 0, 185 - (unsigned long)hart_mask, start, size, 172 + (unsigned long)&hart_mask, start, size, 186 173 arg4, 0, 0); 187 174 break; 188 175 default: ··· 215 180 sbi_major_version(), sbi_minor_version()); 216 181 } 217 182 218 - static int __sbi_send_ipi_v01(const unsigned long *hart_mask) 183 + static int __sbi_send_ipi_v01(const struct cpumask *cpu_mask) 219 184 { 220 185 pr_warn("IPI extension is not available in SBI v%lu.%lu\n", 221 186 sbi_major_version(), sbi_minor_version()); ··· 223 188 return 0; 224 189 } 225 190 226 - static int __sbi_rfence_v01(int fid, const unsigned long *hart_mask, 191 + static int __sbi_rfence_v01(int fid, const struct cpumask *cpu_mask, 227 192 unsigned long start, unsigned long size, 228 193 unsigned long arg4, unsigned long arg5) 229 194 { ··· 247 212 #endif 248 213 } 249 214 250 - static int __sbi_send_ipi_v02(const unsigned long *hart_mask) 215 + static int __sbi_send_ipi_v02(const struct cpumask *cpu_mask) 251 216 { 252 - unsigned long hartid, hmask_val, hbase; 253 - struct cpumask tmask; 217 + unsigned long hartid, cpuid, hmask = 0, hbase = 0; 254 218 struct sbiret ret = {0}; 255 219 int result; 256 220 257 - if (!hart_mask || !(*hart_mask)) { 258 - riscv_cpuid_to_hartid_mask(cpu_online_mask, &tmask); 259 - hart_mask = cpumask_bits(&tmask); 260 - } 221 + if (!cpu_mask) 222 + cpu_mask = cpu_online_mask; 261 223 262 - hmask_val = 0; 263 - hbase = 0; 264 - for_each_set_bit(hartid, hart_mask, NR_CPUS) { 265 - if (hmask_val && ((hbase + BITS_PER_LONG) <= hartid)) { 224 + for_each_cpu(cpuid, cpu_mask) { 225 + hartid = cpuid_to_hartid_map(cpuid); 226 + if (hmask && ((hbase + BITS_PER_LONG) <= hartid)) { 266 227 ret = sbi_ecall(SBI_EXT_IPI, SBI_EXT_IPI_SEND_IPI, 267 - hmask_val, hbase, 0, 0, 0, 0); 228 + hmask, hbase, 0, 0, 0, 0); 268 229 if (ret.error) 269 230 goto ecall_failed; 270 - hmask_val = 0; 231 + hmask = 0; 271 232 hbase = 0; 272 233 } 273 - if (!hmask_val) 234 + if (!hmask) 274 235 hbase = hartid; 275 - hmask_val |= 1UL << (hartid - hbase); 236 + hmask |= 1UL << (hartid - hbase); 276 237 } 277 238 278 - if (hmask_val) { 239 + if (hmask) { 279 240 ret = sbi_ecall(SBI_EXT_IPI, SBI_EXT_IPI_SEND_IPI, 280 - hmask_val, hbase, 0, 0, 0, 0); 241 + hmask, hbase, 0, 0, 0, 0); 281 242 if (ret.error) 282 243 goto ecall_failed; 283 244 } ··· 283 252 ecall_failed: 284 253 result = sbi_err_map_linux_errno(ret.error); 285 254 pr_err("%s: hbase = [%lu] hmask = [0x%lx] failed (error [%d])\n", 286 - __func__, hbase, hmask_val, result); 255 + __func__, hbase, hmask, result); 287 256 return result; 288 257 } 289 258 290 - static int __sbi_rfence_v02_call(unsigned long fid, unsigned long hmask_val, 259 + static int __sbi_rfence_v02_call(unsigned long fid, unsigned long hmask, 291 260 unsigned long hbase, unsigned long start, 292 261 unsigned long size, unsigned long arg4, 293 262 unsigned long arg5) ··· 298 267 299 268 switch (fid) { 300 269 case SBI_EXT_RFENCE_REMOTE_FENCE_I: 301 - ret = sbi_ecall(ext, fid, hmask_val, hbase, 0, 0, 0, 0); 270 + ret = sbi_ecall(ext, fid, hmask, hbase, 0, 0, 0, 0); 302 271 break; 303 272 case SBI_EXT_RFENCE_REMOTE_SFENCE_VMA: 304 - ret = sbi_ecall(ext, fid, hmask_val, hbase, start, 273 + ret = sbi_ecall(ext, fid, hmask, hbase, start, 305 274 size, 0, 0); 306 275 break; 307 276 case SBI_EXT_RFENCE_REMOTE_SFENCE_VMA_ASID: 308 - ret = sbi_ecall(ext, fid, hmask_val, hbase, start, 277 + ret = sbi_ecall(ext, fid, hmask, hbase, start, 309 278 size, arg4, 0); 310 279 break; 311 280 312 281 case SBI_EXT_RFENCE_REMOTE_HFENCE_GVMA: 313 - ret = sbi_ecall(ext, fid, hmask_val, hbase, start, 282 + ret = sbi_ecall(ext, fid, hmask, hbase, start, 314 283 size, 0, 0); 315 284 break; 316 285 case SBI_EXT_RFENCE_REMOTE_HFENCE_GVMA_VMID: 317 - ret = sbi_ecall(ext, fid, hmask_val, hbase, start, 286 + ret = sbi_ecall(ext, fid, hmask, hbase, start, 318 287 size, arg4, 0); 319 288 break; 320 289 case SBI_EXT_RFENCE_REMOTE_HFENCE_VVMA: 321 - ret = sbi_ecall(ext, fid, hmask_val, hbase, start, 290 + ret = sbi_ecall(ext, fid, hmask, hbase, start, 322 291 size, 0, 0); 323 292 break; 324 293 case SBI_EXT_RFENCE_REMOTE_HFENCE_VVMA_ASID: 325 - ret = sbi_ecall(ext, fid, hmask_val, hbase, start, 294 + ret = sbi_ecall(ext, fid, hmask, hbase, start, 326 295 size, arg4, 0); 327 296 break; 328 297 default: ··· 334 303 if (ret.error) { 335 304 result = sbi_err_map_linux_errno(ret.error); 336 305 pr_err("%s: hbase = [%lu] hmask = [0x%lx] failed (error [%d])\n", 337 - __func__, hbase, hmask_val, result); 306 + __func__, hbase, hmask, result); 338 307 } 339 308 340 309 return result; 341 310 } 342 311 343 - static int __sbi_rfence_v02(int fid, const unsigned long *hart_mask, 312 + static int __sbi_rfence_v02(int fid, const struct cpumask *cpu_mask, 344 313 unsigned long start, unsigned long size, 345 314 unsigned long arg4, unsigned long arg5) 346 315 { 347 - unsigned long hmask_val, hartid, hbase; 348 - struct cpumask tmask; 316 + unsigned long hartid, cpuid, hmask = 0, hbase = 0; 349 317 int result; 350 318 351 - if (!hart_mask || !(*hart_mask)) { 352 - riscv_cpuid_to_hartid_mask(cpu_online_mask, &tmask); 353 - hart_mask = cpumask_bits(&tmask); 354 - } 319 + if (!cpu_mask) 320 + cpu_mask = cpu_online_mask; 355 321 356 - hmask_val = 0; 357 - hbase = 0; 358 - for_each_set_bit(hartid, hart_mask, NR_CPUS) { 359 - if (hmask_val && ((hbase + BITS_PER_LONG) <= hartid)) { 360 - result = __sbi_rfence_v02_call(fid, hmask_val, hbase, 322 + for_each_cpu(cpuid, cpu_mask) { 323 + hartid = cpuid_to_hartid_map(cpuid); 324 + if (hmask && ((hbase + BITS_PER_LONG) <= hartid)) { 325 + result = __sbi_rfence_v02_call(fid, hmask, hbase, 361 326 start, size, arg4, arg5); 362 327 if (result) 363 328 return result; 364 - hmask_val = 0; 329 + hmask = 0; 365 330 hbase = 0; 366 331 } 367 - if (!hmask_val) 332 + if (!hmask) 368 333 hbase = hartid; 369 - hmask_val |= 1UL << (hartid - hbase); 334 + hmask |= 1UL << (hartid - hbase); 370 335 } 371 336 372 - if (hmask_val) { 373 - result = __sbi_rfence_v02_call(fid, hmask_val, hbase, 337 + if (hmask) { 338 + result = __sbi_rfence_v02_call(fid, hmask, hbase, 374 339 start, size, arg4, arg5); 375 340 if (result) 376 341 return result; ··· 388 361 389 362 /** 390 363 * sbi_send_ipi() - Send an IPI to any hart. 391 - * @hart_mask: A cpu mask containing all the target harts. 364 + * @cpu_mask: A cpu mask containing all the target harts. 392 365 * 393 366 * Return: 0 on success, appropriate linux error code otherwise. 394 367 */ 395 - int sbi_send_ipi(const unsigned long *hart_mask) 368 + int sbi_send_ipi(const struct cpumask *cpu_mask) 396 369 { 397 - return __sbi_send_ipi(hart_mask); 370 + return __sbi_send_ipi(cpu_mask); 398 371 } 399 372 EXPORT_SYMBOL(sbi_send_ipi); 400 373 401 374 /** 402 375 * sbi_remote_fence_i() - Execute FENCE.I instruction on given remote harts. 403 - * @hart_mask: A cpu mask containing all the target harts. 376 + * @cpu_mask: A cpu mask containing all the target harts. 404 377 * 405 378 * Return: 0 on success, appropriate linux error code otherwise. 406 379 */ 407 - int sbi_remote_fence_i(const unsigned long *hart_mask) 380 + int sbi_remote_fence_i(const struct cpumask *cpu_mask) 408 381 { 409 382 return __sbi_rfence(SBI_EXT_RFENCE_REMOTE_FENCE_I, 410 - hart_mask, 0, 0, 0, 0); 383 + cpu_mask, 0, 0, 0, 0); 411 384 } 412 385 EXPORT_SYMBOL(sbi_remote_fence_i); 413 386 414 387 /** 415 388 * sbi_remote_sfence_vma() - Execute SFENCE.VMA instructions on given remote 416 389 * harts for the specified virtual address range. 417 - * @hart_mask: A cpu mask containing all the target harts. 390 + * @cpu_mask: A cpu mask containing all the target harts. 418 391 * @start: Start of the virtual address 419 392 * @size: Total size of the virtual address range. 420 393 * 421 394 * Return: 0 on success, appropriate linux error code otherwise. 422 395 */ 423 - int sbi_remote_sfence_vma(const unsigned long *hart_mask, 396 + int sbi_remote_sfence_vma(const struct cpumask *cpu_mask, 424 397 unsigned long start, 425 398 unsigned long size) 426 399 { 427 400 return __sbi_rfence(SBI_EXT_RFENCE_REMOTE_SFENCE_VMA, 428 - hart_mask, start, size, 0, 0); 401 + cpu_mask, start, size, 0, 0); 429 402 } 430 403 EXPORT_SYMBOL(sbi_remote_sfence_vma); 431 404 ··· 433 406 * sbi_remote_sfence_vma_asid() - Execute SFENCE.VMA instructions on given 434 407 * remote harts for a virtual address range belonging to a specific ASID. 435 408 * 436 - * @hart_mask: A cpu mask containing all the target harts. 409 + * @cpu_mask: A cpu mask containing all the target harts. 437 410 * @start: Start of the virtual address 438 411 * @size: Total size of the virtual address range. 439 412 * @asid: The value of address space identifier (ASID). 440 413 * 441 414 * Return: 0 on success, appropriate linux error code otherwise. 442 415 */ 443 - int sbi_remote_sfence_vma_asid(const unsigned long *hart_mask, 416 + int sbi_remote_sfence_vma_asid(const struct cpumask *cpu_mask, 444 417 unsigned long start, 445 418 unsigned long size, 446 419 unsigned long asid) 447 420 { 448 421 return __sbi_rfence(SBI_EXT_RFENCE_REMOTE_SFENCE_VMA_ASID, 449 - hart_mask, start, size, asid, 0); 422 + cpu_mask, start, size, asid, 0); 450 423 } 451 424 EXPORT_SYMBOL(sbi_remote_sfence_vma_asid); 452 425 453 426 /** 454 427 * sbi_remote_hfence_gvma() - Execute HFENCE.GVMA instructions on given remote 455 428 * harts for the specified guest physical address range. 456 - * @hart_mask: A cpu mask containing all the target harts. 429 + * @cpu_mask: A cpu mask containing all the target harts. 457 430 * @start: Start of the guest physical address 458 431 * @size: Total size of the guest physical address range. 459 432 * 460 433 * Return: None 461 434 */ 462 - int sbi_remote_hfence_gvma(const unsigned long *hart_mask, 435 + int sbi_remote_hfence_gvma(const struct cpumask *cpu_mask, 463 436 unsigned long start, 464 437 unsigned long size) 465 438 { 466 439 return __sbi_rfence(SBI_EXT_RFENCE_REMOTE_HFENCE_GVMA, 467 - hart_mask, start, size, 0, 0); 440 + cpu_mask, start, size, 0, 0); 468 441 } 469 442 EXPORT_SYMBOL_GPL(sbi_remote_hfence_gvma); 470 443 ··· 472 445 * sbi_remote_hfence_gvma_vmid() - Execute HFENCE.GVMA instructions on given 473 446 * remote harts for a guest physical address range belonging to a specific VMID. 474 447 * 475 - * @hart_mask: A cpu mask containing all the target harts. 448 + * @cpu_mask: A cpu mask containing all the target harts. 476 449 * @start: Start of the guest physical address 477 450 * @size: Total size of the guest physical address range. 478 451 * @vmid: The value of guest ID (VMID). 479 452 * 480 453 * Return: 0 if success, Error otherwise. 481 454 */ 482 - int sbi_remote_hfence_gvma_vmid(const unsigned long *hart_mask, 455 + int sbi_remote_hfence_gvma_vmid(const struct cpumask *cpu_mask, 483 456 unsigned long start, 484 457 unsigned long size, 485 458 unsigned long vmid) 486 459 { 487 460 return __sbi_rfence(SBI_EXT_RFENCE_REMOTE_HFENCE_GVMA_VMID, 488 - hart_mask, start, size, vmid, 0); 461 + cpu_mask, start, size, vmid, 0); 489 462 } 490 463 EXPORT_SYMBOL(sbi_remote_hfence_gvma_vmid); 491 464 492 465 /** 493 466 * sbi_remote_hfence_vvma() - Execute HFENCE.VVMA instructions on given remote 494 467 * harts for the current guest virtual address range. 495 - * @hart_mask: A cpu mask containing all the target harts. 468 + * @cpu_mask: A cpu mask containing all the target harts. 496 469 * @start: Start of the current guest virtual address 497 470 * @size: Total size of the current guest virtual address range. 498 471 * 499 472 * Return: None 500 473 */ 501 - int sbi_remote_hfence_vvma(const unsigned long *hart_mask, 474 + int sbi_remote_hfence_vvma(const struct cpumask *cpu_mask, 502 475 unsigned long start, 503 476 unsigned long size) 504 477 { 505 478 return __sbi_rfence(SBI_EXT_RFENCE_REMOTE_HFENCE_VVMA, 506 - hart_mask, start, size, 0, 0); 479 + cpu_mask, start, size, 0, 0); 507 480 } 508 481 EXPORT_SYMBOL(sbi_remote_hfence_vvma); 509 482 ··· 512 485 * remote harts for current guest virtual address range belonging to a specific 513 486 * ASID. 514 487 * 515 - * @hart_mask: A cpu mask containing all the target harts. 488 + * @cpu_mask: A cpu mask containing all the target harts. 516 489 * @start: Start of the current guest virtual address 517 490 * @size: Total size of the current guest virtual address range. 518 491 * @asid: The value of address space identifier (ASID). 519 492 * 520 493 * Return: None 521 494 */ 522 - int sbi_remote_hfence_vvma_asid(const unsigned long *hart_mask, 495 + int sbi_remote_hfence_vvma_asid(const struct cpumask *cpu_mask, 523 496 unsigned long start, 524 497 unsigned long size, 525 498 unsigned long asid) 526 499 { 527 500 return __sbi_rfence(SBI_EXT_RFENCE_REMOTE_HFENCE_VVMA_ASID, 528 - hart_mask, start, size, asid, 0); 501 + cpu_mask, start, size, asid, 0); 529 502 } 530 503 EXPORT_SYMBOL(sbi_remote_hfence_vvma_asid); 531 504 ··· 618 591 619 592 static void sbi_send_cpumask_ipi(const struct cpumask *target) 620 593 { 621 - struct cpumask hartid_mask; 622 - 623 - riscv_cpuid_to_hartid_mask(target, &hartid_mask); 624 - 625 - sbi_send_ipi(cpumask_bits(&hartid_mask)); 594 + sbi_send_ipi(target); 626 595 } 627 596 628 597 static const struct riscv_ipi_ops sbi_ipi_ops = {

-10

arch/riscv/kernel/setup.c

··· 59 59 unsigned long boot_cpu_hartid; 60 60 static DEFINE_PER_CPU(struct cpu, cpu_devices); 61 61 62 - void riscv_cpuid_to_hartid_mask(const struct cpumask *in, struct cpumask *out) 63 - { 64 - int cpu; 65 - 66 - cpumask_clear(out); 67 - for_each_cpu(cpu, in) 68 - cpumask_set_cpu(cpuid_to_hartid_map(cpu), out); 69 - } 70 - EXPORT_SYMBOL_GPL(riscv_cpuid_to_hartid_mask); 71 - 72 62 /* 73 63 * Place kernel memory regions on the resource tree so that 74 64 * kexec-tools can retrieve them from /proc/iomem. While there

+1 -1

arch/riscv/kernel/smpboot.c

··· 96 96 if (cpuid >= NR_CPUS) { 97 97 pr_warn("Invalid cpuid [%d] for hartid [%d]\n", 98 98 cpuid, hart); 99 - break; 99 + continue; 100 100 } 101 101 102 102 cpuid_to_hartid_map(cpuid) = hart;

+1 -3

arch/riscv/kvm/mmu.c

··· 114 114 115 115 static void stage2_remote_tlb_flush(struct kvm *kvm, u32 level, gpa_t addr) 116 116 { 117 - struct cpumask hmask; 118 117 unsigned long size = PAGE_SIZE; 119 118 struct kvm_vmid *vmid = &kvm->arch.vmid; 120 119 ··· 126 127 * where the Guest/VM is running. 127 128 */ 128 129 preempt_disable(); 129 - riscv_cpuid_to_hartid_mask(cpu_online_mask, &hmask); 130 - sbi_remote_hfence_gvma_vmid(cpumask_bits(&hmask), addr, size, 130 + sbi_remote_hfence_gvma_vmid(cpu_online_mask, addr, size, 131 131 READ_ONCE(vmid->vmid)); 132 132 preempt_enable(); 133 133 }

+4 -7

arch/riscv/kvm/vcpu_sbi_replace.c

··· 82 82 { 83 83 int ret = 0; 84 84 unsigned long i; 85 - struct cpumask cm, hm; 85 + struct cpumask cm; 86 86 struct kvm_vcpu *tmp; 87 87 struct kvm_cpu_context *cp = &vcpu->arch.guest_context; 88 88 unsigned long hmask = cp->a0; ··· 90 90 unsigned long funcid = cp->a6; 91 91 92 92 cpumask_clear(&cm); 93 - cpumask_clear(&hm); 94 93 kvm_for_each_vcpu(i, tmp, vcpu->kvm) { 95 94 if (hbase != -1UL) { 96 95 if (tmp->vcpu_id < hbase) ··· 102 103 cpumask_set_cpu(tmp->cpu, &cm); 103 104 } 104 105 105 - riscv_cpuid_to_hartid_mask(&cm, &hm); 106 - 107 106 switch (funcid) { 108 107 case SBI_EXT_RFENCE_REMOTE_FENCE_I: 109 - ret = sbi_remote_fence_i(cpumask_bits(&hm)); 108 + ret = sbi_remote_fence_i(&cm); 110 109 break; 111 110 case SBI_EXT_RFENCE_REMOTE_SFENCE_VMA: 112 - ret = sbi_remote_hfence_vvma(cpumask_bits(&hm), cp->a2, cp->a3); 111 + ret = sbi_remote_hfence_vvma(&cm, cp->a2, cp->a3); 113 112 break; 114 113 case SBI_EXT_RFENCE_REMOTE_SFENCE_VMA_ASID: 115 - ret = sbi_remote_hfence_vvma_asid(cpumask_bits(&hm), cp->a2, 114 + ret = sbi_remote_hfence_vvma_asid(&cm, cp->a2, 116 115 cp->a3, cp->a4); 117 116 break; 118 117 case SBI_EXT_RFENCE_REMOTE_HFENCE_GVMA:

+4 -7

arch/riscv/kvm/vcpu_sbi_v01.c

··· 38 38 int i, ret = 0; 39 39 u64 next_cycle; 40 40 struct kvm_vcpu *rvcpu; 41 - struct cpumask cm, hm; 41 + struct cpumask cm; 42 42 struct kvm *kvm = vcpu->kvm; 43 43 struct kvm_cpu_context *cp = &vcpu->arch.guest_context; 44 44 ··· 101 101 continue; 102 102 cpumask_set_cpu(rvcpu->cpu, &cm); 103 103 } 104 - riscv_cpuid_to_hartid_mask(&cm, &hm); 105 104 if (cp->a7 == SBI_EXT_0_1_REMOTE_FENCE_I) 106 - ret = sbi_remote_fence_i(cpumask_bits(&hm)); 105 + ret = sbi_remote_fence_i(&cm); 107 106 else if (cp->a7 == SBI_EXT_0_1_REMOTE_SFENCE_VMA) 108 - ret = sbi_remote_hfence_vvma(cpumask_bits(&hm), 109 - cp->a1, cp->a2); 107 + ret = sbi_remote_hfence_vvma(&cm, cp->a1, cp->a2); 110 108 else 111 - ret = sbi_remote_hfence_vvma_asid(cpumask_bits(&hm), 112 - cp->a1, cp->a2, cp->a3); 109 + ret = sbi_remote_hfence_vvma_asid(&cm, cp->a1, cp->a2, cp->a3); 113 110 break; 114 111 default: 115 112 ret = -EINVAL;

+1 -3

arch/riscv/kvm/vmid.c

··· 67 67 { 68 68 unsigned long i; 69 69 struct kvm_vcpu *v; 70 - struct cpumask hmask; 71 70 struct kvm_vmid *vmid = &vcpu->kvm->arch.vmid; 72 71 73 72 if (!kvm_riscv_stage2_vmid_ver_changed(vmid)) ··· 101 102 * running, we force VM exits on all host CPUs using IPI and 102 103 * flush all Guest TLBs. 103 104 */ 104 - riscv_cpuid_to_hartid_mask(cpu_online_mask, &hmask); 105 - sbi_remote_hfence_gvma(cpumask_bits(&hmask), 0, 0); 105 + sbi_remote_hfence_gvma(cpu_online_mask, 0, 0); 106 106 } 107 107 108 108 vmid->vmid = vmid_next;

+1 -4

arch/riscv/mm/cacheflush.c

··· 67 67 */ 68 68 smp_mb(); 69 69 } else if (IS_ENABLED(CONFIG_RISCV_SBI)) { 70 - cpumask_t hartid_mask; 71 - 72 - riscv_cpuid_to_hartid_mask(&others, &hartid_mask); 73 - sbi_remote_fence_i(cpumask_bits(&hartid_mask)); 70 + sbi_remote_fence_i(&others); 74 71 } else { 75 72 on_each_cpu_mask(&others, ipi_remote_fence_i, NULL, 1); 76 73 }

+2 -2

arch/riscv/mm/context.c

··· 192 192 switch_mm_fast: 193 193 csr_write(CSR_SATP, virt_to_pfn(mm->pgd) | 194 194 ((cntx & asid_mask) << SATP_ASID_SHIFT) | 195 - SATP_MODE); 195 + satp_mode); 196 196 197 197 if (need_flush_tlb) 198 198 local_flush_tlb_all(); ··· 201 201 static void set_mm_noasid(struct mm_struct *mm) 202 202 { 203 203 /* Switch the page table and blindly nuke entire local TLB */ 204 - csr_write(CSR_SATP, virt_to_pfn(mm->pgd) | SATP_MODE); 204 + csr_write(CSR_SATP, virt_to_pfn(mm->pgd) | satp_mode); 205 205 local_flush_tlb_all(); 206 206 } 207 207

+281 -97

arch/riscv/mm/init.c

··· 37 37 #define kernel_map (*(struct kernel_mapping *)XIP_FIXUP(&kernel_map)) 38 38 #endif 39 39 40 + #ifdef CONFIG_64BIT 41 + u64 satp_mode = !IS_ENABLED(CONFIG_XIP_KERNEL) ? SATP_MODE_48 : SATP_MODE_39; 42 + #else 43 + u64 satp_mode = SATP_MODE_32; 44 + #endif 45 + EXPORT_SYMBOL(satp_mode); 46 + 47 + bool pgtable_l4_enabled = IS_ENABLED(CONFIG_64BIT) && !IS_ENABLED(CONFIG_XIP_KERNEL); 48 + EXPORT_SYMBOL(pgtable_l4_enabled); 49 + 40 50 phys_addr_t phys_ram_base __ro_after_init; 41 51 EXPORT_SYMBOL(phys_ram_base); 42 - 43 - #ifdef CONFIG_XIP_KERNEL 44 - extern char _xiprom[], _exiprom[], __data_loc; 45 - #endif 46 52 47 53 unsigned long empty_zero_page[PAGE_SIZE / sizeof(unsigned long)] 48 54 __page_aligned_bss; ··· 58 52 #define DTB_EARLY_BASE_VA PGDIR_SIZE 59 53 void *_dtb_early_va __initdata; 60 54 uintptr_t _dtb_early_pa __initdata; 61 - 62 - struct pt_alloc_ops { 63 - pte_t *(*get_pte_virt)(phys_addr_t pa); 64 - phys_addr_t (*alloc_pte)(uintptr_t va); 65 - #ifndef __PAGETABLE_PMD_FOLDED 66 - pmd_t *(*get_pmd_virt)(phys_addr_t pa); 67 - phys_addr_t (*alloc_pmd)(uintptr_t va); 68 - #endif 69 - }; 70 55 71 56 static phys_addr_t dma32_phys_limit __initdata; 72 57 ··· 99 102 (unsigned long)VMALLOC_END); 100 103 print_mlm("lowmem", (unsigned long)PAGE_OFFSET, 101 104 (unsigned long)high_memory); 102 - #ifdef CONFIG_64BIT 103 - print_mlm("kernel", (unsigned long)KERNEL_LINK_ADDR, 104 - (unsigned long)ADDRESS_SPACE_END); 105 + if (IS_ENABLED(CONFIG_64BIT)) { 106 + #ifdef CONFIG_KASAN 107 + print_mlm("kasan", KASAN_SHADOW_START, KASAN_SHADOW_END); 105 108 #endif 109 + 110 + print_mlm("kernel", (unsigned long)KERNEL_LINK_ADDR, 111 + (unsigned long)ADDRESS_SPACE_END); 112 + } 106 113 } 107 114 #else 108 115 static void print_vm_layout(void) { } ··· 131 130 print_vm_layout(); 132 131 } 133 132 134 - /* 135 - * The default maximal physical memory size is -PAGE_OFFSET for 32-bit kernel, 136 - * whereas for 64-bit kernel, the end of the virtual address space is occupied 137 - * by the modules/BPF/kernel mappings which reduces the available size of the 138 - * linear mapping. 139 - * Limit the memory size via mem. 140 - */ 141 - #ifdef CONFIG_64BIT 142 - static phys_addr_t memory_limit = -PAGE_OFFSET - SZ_4G; 143 - #else 144 - static phys_addr_t memory_limit = -PAGE_OFFSET; 145 - #endif 133 + /* Limit the memory size via mem. */ 134 + static phys_addr_t memory_limit; 146 135 147 136 static int __init early_mem(char *p) 148 137 { ··· 153 162 static void __init setup_bootmem(void) 154 163 { 155 164 phys_addr_t vmlinux_end = __pa_symbol(&_end); 156 - phys_addr_t vmlinux_start = __pa_symbol(&_start); 157 - phys_addr_t __maybe_unused max_mapped_addr; 158 - phys_addr_t phys_ram_end; 165 + phys_addr_t max_mapped_addr; 166 + phys_addr_t phys_ram_end, vmlinux_start; 159 167 160 - #ifdef CONFIG_XIP_KERNEL 161 - vmlinux_start = __pa_symbol(&_sdata); 162 - #endif 168 + if (IS_ENABLED(CONFIG_XIP_KERNEL)) 169 + vmlinux_start = __pa_symbol(&_sdata); 170 + else 171 + vmlinux_start = __pa_symbol(&_start); 163 172 164 173 memblock_enforce_memory_limit(memory_limit); 165 174 166 - /* 167 - * Reserve from the start of the kernel to the end of the kernel 168 - */ 169 - #if defined(CONFIG_64BIT) && defined(CONFIG_STRICT_KERNEL_RWX) 170 175 /* 171 176 * Make sure we align the reservation on PMD_SIZE since we will 172 177 * map the kernel in the linear mapping as read-only: we do not want 173 178 * any allocation to happen between _end and the next pmd aligned page. 174 179 */ 175 - vmlinux_end = (vmlinux_end + PMD_SIZE - 1) & PMD_MASK; 176 - #endif 180 + if (IS_ENABLED(CONFIG_64BIT) && IS_ENABLED(CONFIG_STRICT_KERNEL_RWX)) 181 + vmlinux_end = (vmlinux_end + PMD_SIZE - 1) & PMD_MASK; 182 + /* 183 + * Reserve from the start of the kernel to the end of the kernel 184 + */ 177 185 memblock_reserve(vmlinux_start, vmlinux_end - vmlinux_start); 178 186 179 - 180 187 phys_ram_end = memblock_end_of_DRAM(); 181 - #ifndef CONFIG_XIP_KERNEL 182 - phys_ram_base = memblock_start_of_DRAM(); 183 - #endif 184 - #ifndef CONFIG_64BIT 188 + if (!IS_ENABLED(CONFIG_XIP_KERNEL)) 189 + phys_ram_base = memblock_start_of_DRAM(); 185 190 /* 186 191 * memblock allocator is not aware of the fact that last 4K bytes of 187 192 * the addressable memory can not be mapped because of IS_ERR_VALUE ··· 187 200 * address space is occupied by the kernel mapping then this check must 188 201 * be done as soon as the kernel mapping base address is determined. 189 202 */ 190 - max_mapped_addr = __pa(~(ulong)0); 191 - if (max_mapped_addr == (phys_ram_end - 1)) 192 - memblock_set_current_limit(max_mapped_addr - 4096); 193 - #endif 203 + if (!IS_ENABLED(CONFIG_64BIT)) { 204 + max_mapped_addr = __pa(~(ulong)0); 205 + if (max_mapped_addr == (phys_ram_end - 1)) 206 + memblock_set_current_limit(max_mapped_addr - 4096); 207 + } 194 208 195 209 min_low_pfn = PFN_UP(phys_ram_base); 196 210 max_low_pfn = max_pfn = PFN_DOWN(phys_ram_end); ··· 217 229 } 218 230 219 231 #ifdef CONFIG_MMU 220 - static struct pt_alloc_ops _pt_ops __initdata; 221 - 222 - #ifdef CONFIG_XIP_KERNEL 223 - #define pt_ops (*(struct pt_alloc_ops *)XIP_FIXUP(&_pt_ops)) 224 - #else 225 - #define pt_ops _pt_ops 226 - #endif 232 + struct pt_alloc_ops pt_ops __initdata; 227 233 228 234 unsigned long riscv_pfn_base __ro_after_init; 229 235 EXPORT_SYMBOL(riscv_pfn_base); ··· 227 245 static pte_t fixmap_pte[PTRS_PER_PTE] __page_aligned_bss; 228 246 229 247 pgd_t early_pg_dir[PTRS_PER_PGD] __initdata __aligned(PAGE_SIZE); 248 + static pud_t __maybe_unused early_dtb_pud[PTRS_PER_PUD] __initdata __aligned(PAGE_SIZE); 230 249 static pmd_t __maybe_unused early_dtb_pmd[PTRS_PER_PMD] __initdata __aligned(PAGE_SIZE); 231 250 232 251 #ifdef CONFIG_XIP_KERNEL 252 + #define pt_ops (*(struct pt_alloc_ops *)XIP_FIXUP(&pt_ops)) 233 253 #define trampoline_pg_dir ((pgd_t *)XIP_FIXUP(trampoline_pg_dir)) 234 254 #define fixmap_pte ((pte_t *)XIP_FIXUP(fixmap_pte)) 235 255 #define early_pg_dir ((pgd_t *)XIP_FIXUP(early_pg_dir)) ··· 317 333 #define early_pmd ((pmd_t *)XIP_FIXUP(early_pmd)) 318 334 #endif /* CONFIG_XIP_KERNEL */ 319 335 336 + static pud_t trampoline_pud[PTRS_PER_PUD] __page_aligned_bss; 337 + static pud_t fixmap_pud[PTRS_PER_PUD] __page_aligned_bss; 338 + static pud_t early_pud[PTRS_PER_PUD] __initdata __aligned(PAGE_SIZE); 339 + 340 + #ifdef CONFIG_XIP_KERNEL 341 + #define trampoline_pud ((pud_t *)XIP_FIXUP(trampoline_pud)) 342 + #define fixmap_pud ((pud_t *)XIP_FIXUP(fixmap_pud)) 343 + #define early_pud ((pud_t *)XIP_FIXUP(early_pud)) 344 + #endif /* CONFIG_XIP_KERNEL */ 345 + 320 346 static pmd_t *__init get_pmd_virt_early(phys_addr_t pa) 321 347 { 322 348 /* Before MMU is enabled */ ··· 346 352 347 353 static phys_addr_t __init alloc_pmd_early(uintptr_t va) 348 354 { 349 - BUG_ON((va - kernel_map.virt_addr) >> PGDIR_SHIFT); 355 + BUG_ON((va - kernel_map.virt_addr) >> PUD_SHIFT); 350 356 351 357 return (uintptr_t)early_pmd; 352 358 } ··· 393 399 create_pte_mapping(ptep, va, pa, sz, prot); 394 400 } 395 401 396 - #define pgd_next_t pmd_t 397 - #define alloc_pgd_next(__va) pt_ops.alloc_pmd(__va) 398 - #define get_pgd_next_virt(__pa) pt_ops.get_pmd_virt(__pa) 402 + static pud_t *__init get_pud_virt_early(phys_addr_t pa) 403 + { 404 + return (pud_t *)((uintptr_t)pa); 405 + } 406 + 407 + static pud_t *__init get_pud_virt_fixmap(phys_addr_t pa) 408 + { 409 + clear_fixmap(FIX_PUD); 410 + return (pud_t *)set_fixmap_offset(FIX_PUD, pa); 411 + } 412 + 413 + static pud_t *__init get_pud_virt_late(phys_addr_t pa) 414 + { 415 + return (pud_t *)__va(pa); 416 + } 417 + 418 + static phys_addr_t __init alloc_pud_early(uintptr_t va) 419 + { 420 + /* Only one PUD is available for early mapping */ 421 + BUG_ON((va - kernel_map.virt_addr) >> PGDIR_SHIFT); 422 + 423 + return (uintptr_t)early_pud; 424 + } 425 + 426 + static phys_addr_t __init alloc_pud_fixmap(uintptr_t va) 427 + { 428 + return memblock_phys_alloc(PAGE_SIZE, PAGE_SIZE); 429 + } 430 + 431 + static phys_addr_t alloc_pud_late(uintptr_t va) 432 + { 433 + unsigned long vaddr; 434 + 435 + vaddr = __get_free_page(GFP_KERNEL); 436 + BUG_ON(!vaddr); 437 + return __pa(vaddr); 438 + } 439 + 440 + static void __init create_pud_mapping(pud_t *pudp, 441 + uintptr_t va, phys_addr_t pa, 442 + phys_addr_t sz, pgprot_t prot) 443 + { 444 + pmd_t *nextp; 445 + phys_addr_t next_phys; 446 + uintptr_t pud_index = pud_index(va); 447 + 448 + if (sz == PUD_SIZE) { 449 + if (pud_val(pudp[pud_index]) == 0) 450 + pudp[pud_index] = pfn_pud(PFN_DOWN(pa), prot); 451 + return; 452 + } 453 + 454 + if (pud_val(pudp[pud_index]) == 0) { 455 + next_phys = pt_ops.alloc_pmd(va); 456 + pudp[pud_index] = pfn_pud(PFN_DOWN(next_phys), PAGE_TABLE); 457 + nextp = pt_ops.get_pmd_virt(next_phys); 458 + memset(nextp, 0, PAGE_SIZE); 459 + } else { 460 + next_phys = PFN_PHYS(_pud_pfn(pudp[pud_index])); 461 + nextp = pt_ops.get_pmd_virt(next_phys); 462 + } 463 + 464 + create_pmd_mapping(nextp, va, pa, sz, prot); 465 + } 466 + 467 + #define pgd_next_t pud_t 468 + #define alloc_pgd_next(__va) (pgtable_l4_enabled ? \ 469 + pt_ops.alloc_pud(__va) : pt_ops.alloc_pmd(__va)) 470 + #define get_pgd_next_virt(__pa) (pgtable_l4_enabled ? \ 471 + pt_ops.get_pud_virt(__pa) : (pgd_next_t *)pt_ops.get_pmd_virt(__pa)) 399 472 #define create_pgd_next_mapping(__nextp, __va, __pa, __sz, __prot) \ 400 - create_pmd_mapping(__nextp, __va, __pa, __sz, __prot) 401 - #define fixmap_pgd_next fixmap_pmd 473 + (pgtable_l4_enabled ? \ 474 + create_pud_mapping(__nextp, __va, __pa, __sz, __prot) : \ 475 + create_pmd_mapping((pmd_t *)__nextp, __va, __pa, __sz, __prot)) 476 + #define fixmap_pgd_next (pgtable_l4_enabled ? \ 477 + (uintptr_t)fixmap_pud : (uintptr_t)fixmap_pmd) 478 + #define trampoline_pgd_next (pgtable_l4_enabled ? \ 479 + (uintptr_t)trampoline_pud : (uintptr_t)trampoline_pmd) 480 + #define early_dtb_pgd_next (pgtable_l4_enabled ? \ 481 + (uintptr_t)early_dtb_pud : (uintptr_t)early_dtb_pmd) 402 482 #else 403 483 #define pgd_next_t pte_t 404 484 #define alloc_pgd_next(__va) pt_ops.alloc_pte(__va) 405 485 #define get_pgd_next_virt(__pa) pt_ops.get_pte_virt(__pa) 406 486 #define create_pgd_next_mapping(__nextp, __va, __pa, __sz, __prot) \ 407 487 create_pte_mapping(__nextp, __va, __pa, __sz, __prot) 408 - #define fixmap_pgd_next fixmap_pte 488 + #define fixmap_pgd_next ((uintptr_t)fixmap_pte) 489 + #define early_dtb_pgd_next ((uintptr_t)early_dtb_pmd) 490 + #define create_pud_mapping(__pmdp, __va, __pa, __sz, __prot) 409 491 #define create_pmd_mapping(__pmdp, __va, __pa, __sz, __prot) 410 - #endif 492 + #endif /* __PAGETABLE_PMD_FOLDED */ 411 493 412 494 void __init create_pgd_mapping(pgd_t *pgdp, 413 495 uintptr_t va, phys_addr_t pa, ··· 522 452 } 523 453 524 454 #ifdef CONFIG_XIP_KERNEL 455 + extern char _xiprom[], _exiprom[], __data_loc; 456 + 525 457 /* called from head.S with MMU off */ 526 458 asmlinkage void __init __copy_data(void) 527 459 { ··· 571 499 return PAGE_KERNEL_EXEC; 572 500 } 573 501 #endif /* CONFIG_STRICT_KERNEL_RWX */ 502 + 503 + #ifdef CONFIG_64BIT 504 + static void __init disable_pgtable_l4(void) 505 + { 506 + pgtable_l4_enabled = false; 507 + kernel_map.page_offset = PAGE_OFFSET_L3; 508 + satp_mode = SATP_MODE_39; 509 + } 510 + 511 + /* 512 + * There is a simple way to determine if 4-level is supported by the 513 + * underlying hardware: establish 1:1 mapping in 4-level page table mode 514 + * then read SATP to see if the configuration was taken into account 515 + * meaning sv48 is supported. 516 + */ 517 + static __init void set_satp_mode(void) 518 + { 519 + u64 identity_satp, hw_satp; 520 + uintptr_t set_satp_mode_pmd; 521 + 522 + set_satp_mode_pmd = ((unsigned long)set_satp_mode) & PMD_MASK; 523 + create_pgd_mapping(early_pg_dir, 524 + set_satp_mode_pmd, (uintptr_t)early_pud, 525 + PGDIR_SIZE, PAGE_TABLE); 526 + create_pud_mapping(early_pud, 527 + set_satp_mode_pmd, (uintptr_t)early_pmd, 528 + PUD_SIZE, PAGE_TABLE); 529 + /* Handle the case where set_satp_mode straddles 2 PMDs */ 530 + create_pmd_mapping(early_pmd, 531 + set_satp_mode_pmd, set_satp_mode_pmd, 532 + PMD_SIZE, PAGE_KERNEL_EXEC); 533 + create_pmd_mapping(early_pmd, 534 + set_satp_mode_pmd + PMD_SIZE, 535 + set_satp_mode_pmd + PMD_SIZE, 536 + PMD_SIZE, PAGE_KERNEL_EXEC); 537 + 538 + identity_satp = PFN_DOWN((uintptr_t)&early_pg_dir) | satp_mode; 539 + 540 + local_flush_tlb_all(); 541 + csr_write(CSR_SATP, identity_satp); 542 + hw_satp = csr_swap(CSR_SATP, 0ULL); 543 + local_flush_tlb_all(); 544 + 545 + if (hw_satp != identity_satp) 546 + disable_pgtable_l4(); 547 + 548 + memset(early_pg_dir, 0, PAGE_SIZE); 549 + memset(early_pud, 0, PAGE_SIZE); 550 + memset(early_pmd, 0, PAGE_SIZE); 551 + } 552 + #endif 574 553 575 554 /* 576 555 * setup_vm() is called from head.S with MMU-off. ··· 687 564 uintptr_t pa = dtb_pa & ~(PMD_SIZE - 1); 688 565 689 566 create_pgd_mapping(early_pg_dir, DTB_EARLY_BASE_VA, 690 - IS_ENABLED(CONFIG_64BIT) ? (uintptr_t)early_dtb_pmd : pa, 567 + IS_ENABLED(CONFIG_64BIT) ? early_dtb_pgd_next : pa, 691 568 PGDIR_SIZE, 692 569 IS_ENABLED(CONFIG_64BIT) ? PAGE_TABLE : PAGE_KERNEL); 570 + 571 + if (pgtable_l4_enabled) { 572 + create_pud_mapping(early_dtb_pud, DTB_EARLY_BASE_VA, 573 + (uintptr_t)early_dtb_pmd, PUD_SIZE, PAGE_TABLE); 574 + } 693 575 694 576 if (IS_ENABLED(CONFIG_64BIT)) { 695 577 create_pmd_mapping(early_dtb_pmd, DTB_EARLY_BASE_VA, ··· 717 589 dtb_early_pa = dtb_pa; 718 590 } 719 591 592 + /* 593 + * MMU is not enabled, the page tables are allocated directly using 594 + * early_pmd/pud/p4d and the address returned is the physical one. 595 + */ 596 + void __init pt_ops_set_early(void) 597 + { 598 + pt_ops.alloc_pte = alloc_pte_early; 599 + pt_ops.get_pte_virt = get_pte_virt_early; 600 + #ifndef __PAGETABLE_PMD_FOLDED 601 + pt_ops.alloc_pmd = alloc_pmd_early; 602 + pt_ops.get_pmd_virt = get_pmd_virt_early; 603 + pt_ops.alloc_pud = alloc_pud_early; 604 + pt_ops.get_pud_virt = get_pud_virt_early; 605 + #endif 606 + } 607 + 608 + /* 609 + * MMU is enabled but page table setup is not complete yet. 610 + * fixmap page table alloc functions must be used as a means to temporarily 611 + * map the allocated physical pages since the linear mapping does not exist yet. 612 + * 613 + * Note that this is called with MMU disabled, hence kernel_mapping_pa_to_va, 614 + * but it will be used as described above. 615 + */ 616 + void __init pt_ops_set_fixmap(void) 617 + { 618 + pt_ops.alloc_pte = kernel_mapping_pa_to_va((uintptr_t)alloc_pte_fixmap); 619 + pt_ops.get_pte_virt = kernel_mapping_pa_to_va((uintptr_t)get_pte_virt_fixmap); 620 + #ifndef __PAGETABLE_PMD_FOLDED 621 + pt_ops.alloc_pmd = kernel_mapping_pa_to_va((uintptr_t)alloc_pmd_fixmap); 622 + pt_ops.get_pmd_virt = kernel_mapping_pa_to_va((uintptr_t)get_pmd_virt_fixmap); 623 + pt_ops.alloc_pud = kernel_mapping_pa_to_va((uintptr_t)alloc_pud_fixmap); 624 + pt_ops.get_pud_virt = kernel_mapping_pa_to_va((uintptr_t)get_pud_virt_fixmap); 625 + #endif 626 + } 627 + 628 + /* 629 + * MMU is enabled and page table setup is complete, so from now, we can use 630 + * generic page allocation functions to setup page table. 631 + */ 632 + void __init pt_ops_set_late(void) 633 + { 634 + pt_ops.alloc_pte = alloc_pte_late; 635 + pt_ops.get_pte_virt = get_pte_virt_late; 636 + #ifndef __PAGETABLE_PMD_FOLDED 637 + pt_ops.alloc_pmd = alloc_pmd_late; 638 + pt_ops.get_pmd_virt = get_pmd_virt_late; 639 + pt_ops.alloc_pud = alloc_pud_late; 640 + pt_ops.get_pud_virt = get_pud_virt_late; 641 + #endif 642 + } 643 + 720 644 asmlinkage void __init setup_vm(uintptr_t dtb_pa) 721 645 { 722 646 pmd_t __maybe_unused fix_bmap_spmd, fix_bmap_epmd; 723 647 724 648 kernel_map.virt_addr = KERNEL_LINK_ADDR; 649 + kernel_map.page_offset = _AC(CONFIG_PAGE_OFFSET, UL); 725 650 726 651 #ifdef CONFIG_XIP_KERNEL 727 652 kernel_map.xiprom = (uintptr_t)CONFIG_XIP_PHYS_ADDR; ··· 789 608 kernel_map.phys_addr = (uintptr_t)(&_start); 790 609 kernel_map.size = (uintptr_t)(&_end) - kernel_map.phys_addr; 791 610 #endif 611 + 612 + #if defined(CONFIG_64BIT) && !defined(CONFIG_XIP_KERNEL) 613 + set_satp_mode(); 614 + #endif 615 + 792 616 kernel_map.va_pa_offset = PAGE_OFFSET - kernel_map.phys_addr; 793 617 kernel_map.va_kernel_pa_offset = kernel_map.virt_addr - kernel_map.phys_addr; 794 618 795 619 riscv_pfn_base = PFN_DOWN(kernel_map.phys_addr); 620 + 621 + /* 622 + * The default maximal physical memory size is KERN_VIRT_SIZE for 32-bit 623 + * kernel, whereas for 64-bit kernel, the end of the virtual address 624 + * space is occupied by the modules/BPF/kernel mappings which reduces 625 + * the available size of the linear mapping. 626 + */ 627 + memory_limit = KERN_VIRT_SIZE - (IS_ENABLED(CONFIG_64BIT) ? SZ_4G : 0); 796 628 797 629 /* Sanity check alignment and size */ 798 630 BUG_ON((PAGE_OFFSET % PGDIR_SIZE) != 0); ··· 819 625 BUG_ON((kernel_map.virt_addr + kernel_map.size) > ADDRESS_SPACE_END - SZ_4K); 820 626 #endif 821 627 822 - pt_ops.alloc_pte = alloc_pte_early; 823 - pt_ops.get_pte_virt = get_pte_virt_early; 824 - #ifndef __PAGETABLE_PMD_FOLDED 825 - pt_ops.alloc_pmd = alloc_pmd_early; 826 - pt_ops.get_pmd_virt = get_pmd_virt_early; 827 - #endif 628 + pt_ops_set_early(); 629 + 828 630 /* Setup early PGD for fixmap */ 829 631 create_pgd_mapping(early_pg_dir, FIXADDR_START, 830 - (uintptr_t)fixmap_pgd_next, PGDIR_SIZE, PAGE_TABLE); 632 + fixmap_pgd_next, PGDIR_SIZE, PAGE_TABLE); 831 633 832 634 #ifndef __PAGETABLE_PMD_FOLDED 833 - /* Setup fixmap PMD */ 635 + /* Setup fixmap PUD and PMD */ 636 + if (pgtable_l4_enabled) 637 + create_pud_mapping(fixmap_pud, FIXADDR_START, 638 + (uintptr_t)fixmap_pmd, PUD_SIZE, PAGE_TABLE); 834 639 create_pmd_mapping(fixmap_pmd, FIXADDR_START, 835 640 (uintptr_t)fixmap_pte, PMD_SIZE, PAGE_TABLE); 836 641 /* Setup trampoline PGD and PMD */ 837 642 create_pgd_mapping(trampoline_pg_dir, kernel_map.virt_addr, 838 - (uintptr_t)trampoline_pmd, PGDIR_SIZE, PAGE_TABLE); 643 + trampoline_pgd_next, PGDIR_SIZE, PAGE_TABLE); 644 + if (pgtable_l4_enabled) 645 + create_pud_mapping(trampoline_pud, kernel_map.virt_addr, 646 + (uintptr_t)trampoline_pmd, PUD_SIZE, PAGE_TABLE); 839 647 #ifdef CONFIG_XIP_KERNEL 840 648 create_pmd_mapping(trampoline_pmd, kernel_map.virt_addr, 841 649 kernel_map.xiprom, PMD_SIZE, PAGE_KERNEL_EXEC); ··· 865 669 * Bootime fixmap only can handle PMD_SIZE mapping. Thus, boot-ioremap 866 670 * range can not span multiple pmds. 867 671 */ 868 - BUILD_BUG_ON((__fix_to_virt(FIX_BTMAP_BEGIN) >> PMD_SHIFT) 672 + BUG_ON((__fix_to_virt(FIX_BTMAP_BEGIN) >> PMD_SHIFT) 869 673 != (__fix_to_virt(FIX_BTMAP_END) >> PMD_SHIFT)); 870 674 871 675 #ifndef __PAGETABLE_PMD_FOLDED ··· 890 694 pr_warn("FIX_BTMAP_BEGIN: %d\n", FIX_BTMAP_BEGIN); 891 695 } 892 696 #endif 697 + 698 + pt_ops_set_fixmap(); 893 699 } 894 700 895 701 static void __init setup_vm_final(void) ··· 900 702 phys_addr_t pa, start, end; 901 703 u64 i; 902 704 903 - /** 904 - * MMU is enabled at this point. But page table setup is not complete yet. 905 - * fixmap page table alloc functions should be used at this point 906 - */ 907 - pt_ops.alloc_pte = alloc_pte_fixmap; 908 - pt_ops.get_pte_virt = get_pte_virt_fixmap; 909 - #ifndef __PAGETABLE_PMD_FOLDED 910 - pt_ops.alloc_pmd = alloc_pmd_fixmap; 911 - pt_ops.get_pmd_virt = get_pmd_virt_fixmap; 912 - #endif 913 705 /* Setup swapper PGD for fixmap */ 914 706 create_pgd_mapping(swapper_pg_dir, FIXADDR_START, 915 707 __pa_symbol(fixmap_pgd_next), ··· 924 736 } 925 737 } 926 738 927 - #ifdef CONFIG_64BIT 928 739 /* Map the kernel */ 929 - create_kernel_page_table(swapper_pg_dir, false); 740 + if (IS_ENABLED(CONFIG_64BIT)) 741 + create_kernel_page_table(swapper_pg_dir, false); 742 + 743 + #ifdef CONFIG_KASAN 744 + kasan_swapper_init(); 930 745 #endif 931 746 932 747 /* Clear fixmap PTE and PMD mappings */ 933 748 clear_fixmap(FIX_PTE); 934 749 clear_fixmap(FIX_PMD); 750 + clear_fixmap(FIX_PUD); 935 751 936 752 /* Move to swapper page table */ 937 - csr_write(CSR_SATP, PFN_DOWN(__pa_symbol(swapper_pg_dir)) | SATP_MODE); 753 + csr_write(CSR_SATP, PFN_DOWN(__pa_symbol(swapper_pg_dir)) | satp_mode); 938 754 local_flush_tlb_all(); 939 755 940 - /* generic page allocation functions must be used to setup page table */ 941 - pt_ops.alloc_pte = alloc_pte_late; 942 - pt_ops.get_pte_virt = get_pte_virt_late; 943 - #ifndef __PAGETABLE_PMD_FOLDED 944 - pt_ops.alloc_pmd = alloc_pmd_late; 945 - pt_ops.get_pmd_virt = get_pmd_virt_late; 946 - #endif 756 + pt_ops_set_late(); 947 757 } 948 758 #else 949 759 asmlinkage void __init setup_vm(uintptr_t dtb_pa) ··· 977 791 * since it doesn't make much sense and we have limited memory 978 792 * resources. 979 793 */ 980 - #ifdef CONFIG_CRASH_DUMP 981 794 if (is_kdump_kernel()) { 982 795 pr_info("crashkernel: ignoring reservation request\n"); 983 796 return; 984 797 } 985 - #endif 986 798 987 799 ret = parse_crashkernel(boot_command_line, memblock_phys_mem_size(), 988 800 &crash_size, &crash_base);

+191 -57

arch/riscv/mm/kasan_init.c

··· 11 11 #include <asm/fixmap.h> 12 12 #include <asm/pgalloc.h> 13 13 14 + /* 15 + * Kasan shadow region must lie at a fixed address across sv39, sv48 and sv57 16 + * which is right before the kernel. 17 + * 18 + * For sv39, the region is aligned on PGDIR_SIZE so we only need to populate 19 + * the page global directory with kasan_early_shadow_pmd. 20 + * 21 + * For sv48 and sv57, the region is not aligned on PGDIR_SIZE so the mapping 22 + * must be divided as follows: 23 + * - the first PGD entry, although incomplete, is populated with 24 + * kasan_early_shadow_pud/p4d 25 + * - the PGD entries in the middle are populated with kasan_early_shadow_pud/p4d 26 + * - the last PGD entry is shared with the kernel mapping so populated at the 27 + * lower levels pud/p4d 28 + * 29 + * In addition, when shallow populating a kasan region (for example vmalloc), 30 + * this region may also not be aligned on PGDIR size, so we must go down to the 31 + * pud level too. 32 + */ 33 + 14 34 extern pgd_t early_pg_dir[PTRS_PER_PGD]; 15 - asmlinkage void __init kasan_early_init(void) 16 - { 17 - uintptr_t i; 18 - pgd_t *pgd = early_pg_dir + pgd_index(KASAN_SHADOW_START); 19 - 20 - BUILD_BUG_ON(KASAN_SHADOW_OFFSET != 21 - KASAN_SHADOW_END - (1UL << (64 - KASAN_SHADOW_SCALE_SHIFT))); 22 - 23 - for (i = 0; i < PTRS_PER_PTE; ++i) 24 - set_pte(kasan_early_shadow_pte + i, 25 - mk_pte(virt_to_page(kasan_early_shadow_page), 26 - PAGE_KERNEL)); 27 - 28 - for (i = 0; i < PTRS_PER_PMD; ++i) 29 - set_pmd(kasan_early_shadow_pmd + i, 30 - pfn_pmd(PFN_DOWN 31 - (__pa((uintptr_t) kasan_early_shadow_pte)), 32 - __pgprot(_PAGE_TABLE))); 33 - 34 - for (i = KASAN_SHADOW_START; i < KASAN_SHADOW_END; 35 - i += PGDIR_SIZE, ++pgd) 36 - set_pgd(pgd, 37 - pfn_pgd(PFN_DOWN 38 - (__pa(((uintptr_t) kasan_early_shadow_pmd))), 39 - __pgprot(_PAGE_TABLE))); 40 - 41 - /* init for swapper_pg_dir */ 42 - pgd = pgd_offset_k(KASAN_SHADOW_START); 43 - 44 - for (i = KASAN_SHADOW_START; i < KASAN_SHADOW_END; 45 - i += PGDIR_SIZE, ++pgd) 46 - set_pgd(pgd, 47 - pfn_pgd(PFN_DOWN 48 - (__pa(((uintptr_t) kasan_early_shadow_pmd))), 49 - __pgprot(_PAGE_TABLE))); 50 - 51 - local_flush_tlb_all(); 52 - } 53 35 54 36 static void __init kasan_populate_pte(pmd_t *pmd, unsigned long vaddr, unsigned long end) 55 37 { ··· 55 73 set_pmd(pmd, pfn_pmd(PFN_DOWN(__pa(base_pte)), PAGE_TABLE)); 56 74 } 57 75 58 - static void __init kasan_populate_pmd(pgd_t *pgd, unsigned long vaddr, unsigned long end) 76 + static void __init kasan_populate_pmd(pud_t *pud, unsigned long vaddr, unsigned long end) 59 77 { 60 78 phys_addr_t phys_addr; 61 79 pmd_t *pmdp, *base_pmd; 62 80 unsigned long next; 63 81 64 - base_pmd = (pmd_t *)pgd_page_vaddr(*pgd); 65 - if (base_pmd == lm_alias(kasan_early_shadow_pmd)) 82 + if (pud_none(*pud)) { 66 83 base_pmd = memblock_alloc(PTRS_PER_PMD * sizeof(pmd_t), PAGE_SIZE); 84 + } else { 85 + base_pmd = (pmd_t *)pud_pgtable(*pud); 86 + if (base_pmd == lm_alias(kasan_early_shadow_pmd)) 87 + base_pmd = memblock_alloc(PTRS_PER_PMD * sizeof(pmd_t), PAGE_SIZE); 88 + } 67 89 68 90 pmdp = base_pmd + pmd_index(vaddr); 69 91 ··· 91 105 * it entirely, memblock could allocate a page at a physical address 92 106 * where KASAN is not populated yet and then we'd get a page fault. 93 107 */ 94 - set_pgd(pgd, pfn_pgd(PFN_DOWN(__pa(base_pmd)), PAGE_TABLE)); 108 + set_pud(pud, pfn_pud(PFN_DOWN(__pa(base_pmd)), PAGE_TABLE)); 95 109 } 96 110 97 - static void __init kasan_populate_pgd(unsigned long vaddr, unsigned long end) 111 + static void __init kasan_populate_pud(pgd_t *pgd, 112 + unsigned long vaddr, unsigned long end, 113 + bool early) 98 114 { 99 115 phys_addr_t phys_addr; 100 - pgd_t *pgdp = pgd_offset_k(vaddr); 116 + pud_t *pudp, *base_pud; 117 + unsigned long next; 118 + 119 + if (early) { 120 + /* 121 + * We can't use pgd_page_vaddr here as it would return a linear 122 + * mapping address but it is not mapped yet, but when populating 123 + * early_pg_dir, we need the physical address and when populating 124 + * swapper_pg_dir, we need the kernel virtual address so use 125 + * pt_ops facility. 126 + */ 127 + base_pud = pt_ops.get_pud_virt(pfn_to_phys(_pgd_pfn(*pgd))); 128 + } else { 129 + base_pud = (pud_t *)pgd_page_vaddr(*pgd); 130 + if (base_pud == lm_alias(kasan_early_shadow_pud)) 131 + base_pud = memblock_alloc(PTRS_PER_PUD * sizeof(pud_t), PAGE_SIZE); 132 + } 133 + 134 + pudp = base_pud + pud_index(vaddr); 135 + 136 + do { 137 + next = pud_addr_end(vaddr, end); 138 + 139 + if (pud_none(*pudp) && IS_ALIGNED(vaddr, PUD_SIZE) && (next - vaddr) >= PUD_SIZE) { 140 + if (early) { 141 + phys_addr = __pa(((uintptr_t)kasan_early_shadow_pmd)); 142 + set_pud(pudp, pfn_pud(PFN_DOWN(phys_addr), PAGE_TABLE)); 143 + continue; 144 + } else { 145 + phys_addr = memblock_phys_alloc(PUD_SIZE, PUD_SIZE); 146 + if (phys_addr) { 147 + set_pud(pudp, pfn_pud(PFN_DOWN(phys_addr), PAGE_KERNEL)); 148 + continue; 149 + } 150 + } 151 + } 152 + 153 + kasan_populate_pmd(pudp, vaddr, next); 154 + } while (pudp++, vaddr = next, vaddr != end); 155 + 156 + /* 157 + * Wait for the whole PGD to be populated before setting the PGD in 158 + * the page table, otherwise, if we did set the PGD before populating 159 + * it entirely, memblock could allocate a page at a physical address 160 + * where KASAN is not populated yet and then we'd get a page fault. 161 + */ 162 + if (!early) 163 + set_pgd(pgd, pfn_pgd(PFN_DOWN(__pa(base_pud)), PAGE_TABLE)); 164 + } 165 + 166 + #define kasan_early_shadow_pgd_next (pgtable_l4_enabled ? \ 167 + (uintptr_t)kasan_early_shadow_pud : \ 168 + (uintptr_t)kasan_early_shadow_pmd) 169 + #define kasan_populate_pgd_next(pgdp, vaddr, next, early) \ 170 + (pgtable_l4_enabled ? \ 171 + kasan_populate_pud(pgdp, vaddr, next, early) : \ 172 + kasan_populate_pmd((pud_t *)pgdp, vaddr, next)) 173 + 174 + static void __init kasan_populate_pgd(pgd_t *pgdp, 175 + unsigned long vaddr, unsigned long end, 176 + bool early) 177 + { 178 + phys_addr_t phys_addr; 101 179 unsigned long next; 102 180 103 181 do { 104 182 next = pgd_addr_end(vaddr, end); 105 183 106 - /* 107 - * pgdp can't be none since kasan_early_init initialized all KASAN 108 - * shadow region with kasan_early_shadow_pmd: if this is stillthe case, 109 - * that means we can try to allocate a hugepage as a replacement. 110 - */ 111 - if (pgd_page_vaddr(*pgdp) == (unsigned long)lm_alias(kasan_early_shadow_pmd) && 112 - IS_ALIGNED(vaddr, PGDIR_SIZE) && (next - vaddr) >= PGDIR_SIZE) { 113 - phys_addr = memblock_phys_alloc(PGDIR_SIZE, PGDIR_SIZE); 114 - if (phys_addr) { 115 - set_pgd(pgdp, pfn_pgd(PFN_DOWN(phys_addr), PAGE_KERNEL)); 184 + if (IS_ALIGNED(vaddr, PGDIR_SIZE) && (next - vaddr) >= PGDIR_SIZE) { 185 + if (early) { 186 + phys_addr = __pa((uintptr_t)kasan_early_shadow_pgd_next); 187 + set_pgd(pgdp, pfn_pgd(PFN_DOWN(phys_addr), PAGE_TABLE)); 116 188 continue; 189 + } else if (pgd_page_vaddr(*pgdp) == 190 + (unsigned long)lm_alias(kasan_early_shadow_pgd_next)) { 191 + /* 192 + * pgdp can't be none since kasan_early_init 193 + * initialized all KASAN shadow region with 194 + * kasan_early_shadow_pud: if this is still the 195 + * case, that means we can try to allocate a 196 + * hugepage as a replacement. 197 + */ 198 + phys_addr = memblock_phys_alloc(PGDIR_SIZE, PGDIR_SIZE); 199 + if (phys_addr) { 200 + set_pgd(pgdp, pfn_pgd(PFN_DOWN(phys_addr), PAGE_KERNEL)); 201 + continue; 202 + } 117 203 } 118 204 } 119 205 120 - kasan_populate_pmd(pgdp, vaddr, next); 206 + kasan_populate_pgd_next(pgdp, vaddr, next, early); 121 207 } while (pgdp++, vaddr = next, vaddr != end); 208 + } 209 + 210 + asmlinkage void __init kasan_early_init(void) 211 + { 212 + uintptr_t i; 213 + 214 + BUILD_BUG_ON(KASAN_SHADOW_OFFSET != 215 + KASAN_SHADOW_END - (1UL << (64 - KASAN_SHADOW_SCALE_SHIFT))); 216 + 217 + for (i = 0; i < PTRS_PER_PTE; ++i) 218 + set_pte(kasan_early_shadow_pte + i, 219 + mk_pte(virt_to_page(kasan_early_shadow_page), 220 + PAGE_KERNEL)); 221 + 222 + for (i = 0; i < PTRS_PER_PMD; ++i) 223 + set_pmd(kasan_early_shadow_pmd + i, 224 + pfn_pmd(PFN_DOWN 225 + (__pa((uintptr_t)kasan_early_shadow_pte)), 226 + PAGE_TABLE)); 227 + 228 + if (pgtable_l4_enabled) { 229 + for (i = 0; i < PTRS_PER_PUD; ++i) 230 + set_pud(kasan_early_shadow_pud + i, 231 + pfn_pud(PFN_DOWN 232 + (__pa(((uintptr_t)kasan_early_shadow_pmd))), 233 + PAGE_TABLE)); 234 + } 235 + 236 + kasan_populate_pgd(early_pg_dir + pgd_index(KASAN_SHADOW_START), 237 + KASAN_SHADOW_START, KASAN_SHADOW_END, true); 238 + 239 + local_flush_tlb_all(); 240 + } 241 + 242 + void __init kasan_swapper_init(void) 243 + { 244 + kasan_populate_pgd(pgd_offset_k(KASAN_SHADOW_START), 245 + KASAN_SHADOW_START, KASAN_SHADOW_END, true); 246 + 247 + local_flush_tlb_all(); 122 248 } 123 249 124 250 static void __init kasan_populate(void *start, void *end) ··· 238 140 unsigned long vaddr = (unsigned long)start & PAGE_MASK; 239 141 unsigned long vend = PAGE_ALIGN((unsigned long)end); 240 142 241 - kasan_populate_pgd(vaddr, vend); 143 + kasan_populate_pgd(pgd_offset_k(vaddr), vaddr, vend, false); 242 144 243 145 local_flush_tlb_all(); 244 146 memset(start, KASAN_SHADOW_INIT, end - start); 147 + } 148 + 149 + static void __init kasan_shallow_populate_pud(pgd_t *pgdp, 150 + unsigned long vaddr, unsigned long end, 151 + bool kasan_populate) 152 + { 153 + unsigned long next; 154 + pud_t *pudp, *base_pud; 155 + pmd_t *base_pmd; 156 + bool is_kasan_pmd; 157 + 158 + base_pud = (pud_t *)pgd_page_vaddr(*pgdp); 159 + pudp = base_pud + pud_index(vaddr); 160 + 161 + if (kasan_populate) 162 + memcpy(base_pud, (void *)kasan_early_shadow_pgd_next, 163 + sizeof(pud_t) * PTRS_PER_PUD); 164 + 165 + do { 166 + next = pud_addr_end(vaddr, end); 167 + is_kasan_pmd = (pud_pgtable(*pudp) == lm_alias(kasan_early_shadow_pmd)); 168 + 169 + if (is_kasan_pmd) { 170 + base_pmd = memblock_alloc(PAGE_SIZE, PAGE_SIZE); 171 + set_pud(pudp, pfn_pud(PFN_DOWN(__pa(base_pmd)), PAGE_TABLE)); 172 + } 173 + } while (pudp++, vaddr = next, vaddr != end); 245 174 } 246 175 247 176 static void __init kasan_shallow_populate_pgd(unsigned long vaddr, unsigned long end) ··· 276 151 unsigned long next; 277 152 void *p; 278 153 pgd_t *pgd_k = pgd_offset_k(vaddr); 154 + bool is_kasan_pgd_next; 279 155 280 156 do { 281 157 next = pgd_addr_end(vaddr, end); 282 - if (pgd_page_vaddr(*pgd_k) == (unsigned long)lm_alias(kasan_early_shadow_pmd)) { 158 + is_kasan_pgd_next = (pgd_page_vaddr(*pgd_k) == 159 + (unsigned long)lm_alias(kasan_early_shadow_pgd_next)); 160 + 161 + if (is_kasan_pgd_next) { 283 162 p = memblock_alloc(PAGE_SIZE, PAGE_SIZE); 284 163 set_pgd(pgd_k, pfn_pgd(PFN_DOWN(__pa(p)), PAGE_TABLE)); 285 164 } 165 + 166 + if (IS_ALIGNED(vaddr, PGDIR_SIZE) && (next - vaddr) >= PGDIR_SIZE) 167 + continue; 168 + 169 + kasan_shallow_populate_pud(pgd_k, vaddr, next, is_kasan_pgd_next); 286 170 } while (pgd_k++, vaddr = next, vaddr != end); 287 171 } 288 172

+2 -7

arch/riscv/mm/tlbflush.c

··· 32 32 unsigned long size, unsigned long stride) 33 33 { 34 34 struct cpumask *cmask = mm_cpumask(mm); 35 - struct cpumask hmask; 36 35 unsigned int cpuid; 37 36 bool broadcast; 38 37 ··· 45 46 unsigned long asid = atomic_long_read(&mm->context.id); 46 47 47 48 if (broadcast) { 48 - riscv_cpuid_to_hartid_mask(cmask, &hmask); 49 - sbi_remote_sfence_vma_asid(cpumask_bits(&hmask), 50 - start, size, asid); 49 + sbi_remote_sfence_vma_asid(cmask, start, size, asid); 51 50 } else if (size <= stride) { 52 51 local_flush_tlb_page_asid(start, asid); 53 52 } else { ··· 53 56 } 54 57 } else { 55 58 if (broadcast) { 56 - riscv_cpuid_to_hartid_mask(cmask, &hmask); 57 - sbi_remote_sfence_vma(cpumask_bits(&hmask), 58 - start, size); 59 + sbi_remote_sfence_vma(cmask, start, size); 59 60 } else if (size <= stride) { 60 61 local_flush_tlb_page(start); 61 62 } else {

+1 -1

arch/riscv/net/bpf_jit_comp64.c

··· 497 497 offset = pc - (long)&ex->insn; 498 498 if (WARN_ON_ONCE(offset >= 0 || offset < INT_MIN)) 499 499 return -ERANGE; 500 - ex->insn = pc; 500 + ex->insn = offset; 501 501 502 502 /* 503 503 * Since the extable follows the program, the fixup offset is always

+2

drivers/firmware/efi/libstub/efi-stub.c

··· 40 40 41 41 #ifdef CONFIG_ARM64 42 42 # define EFI_RT_VIRTUAL_LIMIT DEFAULT_MAP_WINDOW_64 43 + #elif defined(CONFIG_RISCV) 44 + # define EFI_RT_VIRTUAL_LIMIT TASK_SIZE_MIN 43 45 #else 44 46 # define EFI_RT_VIRTUAL_LIMIT TASK_SIZE 45 47 #endif

-1

drivers/soc/canaan/Kconfig

··· 5 5 depends on RISCV && SOC_CANAAN && OF 6 6 default SOC_CANAAN 7 7 select PM 8 - select SYSCON 9 8 select MFD_SYSCON 10 9 help 11 10 Canaan Kendryte K210 SoC system controller driver.

+18 -6

include/asm-generic/pgalloc.h

··· 147 147 148 148 #if CONFIG_PGTABLE_LEVELS > 3 149 149 150 + static inline pud_t *__pud_alloc_one(struct mm_struct *mm, unsigned long addr) 151 + { 152 + gfp_t gfp = GFP_PGTABLE_USER; 153 + 154 + if (mm == &init_mm) 155 + gfp = GFP_PGTABLE_KERNEL; 156 + return (pud_t *)get_zeroed_page(gfp); 157 + } 158 + 150 159 #ifndef __HAVE_ARCH_PUD_ALLOC_ONE 151 160 /** 152 161 * pud_alloc_one - allocate a page for PUD-level page table ··· 168 159 */ 169 160 static inline pud_t *pud_alloc_one(struct mm_struct *mm, unsigned long addr) 170 161 { 171 - gfp_t gfp = GFP_PGTABLE_USER; 172 - 173 - if (mm == &init_mm) 174 - gfp = GFP_PGTABLE_KERNEL; 175 - return (pud_t *)get_zeroed_page(gfp); 162 + return __pud_alloc_one(mm, addr); 176 163 } 177 164 #endif 178 165 179 - static inline void pud_free(struct mm_struct *mm, pud_t *pud) 166 + static inline void __pud_free(struct mm_struct *mm, pud_t *pud) 180 167 { 181 168 BUG_ON((unsigned long)pud & (PAGE_SIZE-1)); 182 169 free_page((unsigned long)pud); 183 170 } 171 + 172 + #ifndef __HAVE_ARCH_PUD_FREE 173 + static inline void pud_free(struct mm_struct *mm, pud_t *pud) 174 + { 175 + __pud_free(mm, pud); 176 + } 177 + #endif 184 178 185 179 #endif /* CONFIG_PGTABLE_LEVELS > 3 */ 186 180

Configure Feed

Configure Feed