Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux
1
fork

Configure Feed

Select the types of activity you want to include in your feed.

Merge tag 'riscv-for-linus-6.1-rc8' of git://git.kernel.org/pub/scm/linux/kernel/git/riscv/linux

Pull RISC-V fixes from Palmer Dabbelt:

- build fix for the NR_CPUS Kconfig SBI version dependency

- fixes to early memory initialization, to fix page permissions in EFI
and post-initmem-free

- build fix for the VDSO, to avoid trying to profile the VDSO functions

- fixes for kexec crash handling, to fix multi-core and interrupt
related initialization inside the crash kernel

- fix for a race condition when handling multiple concurrect kernel
stack overflows

* tag 'riscv-for-linus-6.1-rc8' of git://git.kernel.org/pub/scm/linux/kernel/git/riscv/linux:
riscv: kexec: Fixup crash_smp_send_stop without multi cores
riscv: kexec: Fixup irq controller broken in kexec crash path
riscv: mm: Proper page permissions after initmem free
riscv: vdso: fix section overlapping under some conditions
riscv: fix race when vmap stack overflow
riscv: Sync efi page table's kernel mappings before switching
riscv: Fix NR_CPUS range conditions

+187 -24
+3 -3
arch/riscv/Kconfig
··· 317 317 config NR_CPUS 318 318 int "Maximum number of CPUs (2-512)" 319 319 depends on SMP 320 - range 2 512 if !SBI_V01 321 - range 2 32 if SBI_V01 && 32BIT 322 - range 2 64 if SBI_V01 && 64BIT 320 + range 2 512 if !RISCV_SBI_V01 321 + range 2 32 if RISCV_SBI_V01 && 32BIT 322 + range 2 64 if RISCV_SBI_V01 && 64BIT 323 323 default "32" if 32BIT 324 324 default "64" if 64BIT 325 325
+1
arch/riscv/include/asm/asm.h
··· 23 23 #define REG_L __REG_SEL(ld, lw) 24 24 #define REG_S __REG_SEL(sd, sw) 25 25 #define REG_SC __REG_SEL(sc.d, sc.w) 26 + #define REG_AMOSWAP_AQ __REG_SEL(amoswap.d.aq, amoswap.w.aq) 26 27 #define REG_ASM __REG_SEL(.dword, .word) 27 28 #define SZREG __REG_SEL(8, 4) 28 29 #define LGREG __REG_SEL(3, 2)
+5 -1
arch/riscv/include/asm/efi.h
··· 10 10 #include <asm/mmu_context.h> 11 11 #include <asm/ptrace.h> 12 12 #include <asm/tlbflush.h> 13 + #include <asm/pgalloc.h> 13 14 14 15 #ifdef CONFIG_EFI 15 16 extern void efi_init(void); ··· 21 20 int efi_create_mapping(struct mm_struct *mm, efi_memory_desc_t *md); 22 21 int efi_set_mapping_permissions(struct mm_struct *mm, efi_memory_desc_t *md); 23 22 24 - #define arch_efi_call_virt_setup() efi_virtmap_load() 23 + #define arch_efi_call_virt_setup() ({ \ 24 + sync_kernel_mappings(efi_mm.pgd); \ 25 + efi_virtmap_load(); \ 26 + }) 25 27 #define arch_efi_call_virt_teardown() efi_virtmap_unload() 26 28 27 29 #define ARCH_EFI_IRQ_FLAGS_MASK (SR_IE | SR_SPIE)
+8 -3
arch/riscv/include/asm/pgalloc.h
··· 127 127 #define __p4d_free_tlb(tlb, p4d, addr) p4d_free((tlb)->mm, p4d) 128 128 #endif /* __PAGETABLE_PMD_FOLDED */ 129 129 130 + static inline void sync_kernel_mappings(pgd_t *pgd) 131 + { 132 + memcpy(pgd + USER_PTRS_PER_PGD, 133 + init_mm.pgd + USER_PTRS_PER_PGD, 134 + (PTRS_PER_PGD - USER_PTRS_PER_PGD) * sizeof(pgd_t)); 135 + } 136 + 130 137 static inline pgd_t *pgd_alloc(struct mm_struct *mm) 131 138 { 132 139 pgd_t *pgd; ··· 142 135 if (likely(pgd != NULL)) { 143 136 memset(pgd, 0, USER_PTRS_PER_PGD * sizeof(pgd_t)); 144 137 /* Copy kernel mappings */ 145 - memcpy(pgd + USER_PTRS_PER_PGD, 146 - init_mm.pgd + USER_PTRS_PER_PGD, 147 - (PTRS_PER_PGD - USER_PTRS_PER_PGD) * sizeof(pgd_t)); 138 + sync_kernel_mappings(pgd); 148 139 } 149 140 return pgd; 150 141 }
+3
arch/riscv/include/asm/smp.h
··· 50 50 /* Clear IPI for current CPU */ 51 51 void riscv_clear_ipi(void); 52 52 53 + /* Check other CPUs stop or not */ 54 + bool smp_crash_stop_failed(void); 55 + 53 56 /* Secondary hart entry */ 54 57 asmlinkage void smp_callin(void); 55 58
+13
arch/riscv/kernel/entry.S
··· 404 404 405 405 #ifdef CONFIG_VMAP_STACK 406 406 handle_kernel_stack_overflow: 407 + /* 408 + * Takes the psuedo-spinlock for the shadow stack, in case multiple 409 + * harts are concurrently overflowing their kernel stacks. We could 410 + * store any value here, but since we're overflowing the kernel stack 411 + * already we only have SP to use as a scratch register. So we just 412 + * swap in the address of the spinlock, as that's definately non-zero. 413 + * 414 + * Pairs with a store_release in handle_bad_stack(). 415 + */ 416 + 1: la sp, spin_shadow_stack 417 + REG_AMOSWAP_AQ sp, sp, (sp) 418 + bnez sp, 1b 419 + 407 420 la sp, shadow_stack 408 421 addi sp, sp, SHADOW_OVERFLOW_STACK_SIZE 409 422
+35 -11
arch/riscv/kernel/machine_kexec.c
··· 15 15 #include <linux/compiler.h> /* For unreachable() */ 16 16 #include <linux/cpu.h> /* For cpu_down() */ 17 17 #include <linux/reboot.h> 18 + #include <linux/interrupt.h> 19 + #include <linux/irq.h> 18 20 19 21 /* 20 22 * kexec_image_info - Print received image details ··· 140 138 #endif 141 139 } 142 140 143 - /* Override the weak function in kernel/panic.c */ 144 - void crash_smp_send_stop(void) 141 + static void machine_kexec_mask_interrupts(void) 145 142 { 146 - static int cpus_stopped; 143 + unsigned int i; 144 + struct irq_desc *desc; 147 145 148 - /* 149 - * This function can be called twice in panic path, but obviously 150 - * we execute this only once. 151 - */ 152 - if (cpus_stopped) 153 - return; 146 + for_each_irq_desc(i, desc) { 147 + struct irq_chip *chip; 148 + int ret; 154 149 155 - smp_send_stop(); 156 - cpus_stopped = 1; 150 + chip = irq_desc_get_chip(desc); 151 + if (!chip) 152 + continue; 153 + 154 + /* 155 + * First try to remove the active state. If this 156 + * fails, try to EOI the interrupt. 157 + */ 158 + ret = irq_set_irqchip_state(i, IRQCHIP_STATE_ACTIVE, false); 159 + 160 + if (ret && irqd_irq_inprogress(&desc->irq_data) && 161 + chip->irq_eoi) 162 + chip->irq_eoi(&desc->irq_data); 163 + 164 + if (chip->irq_mask) 165 + chip->irq_mask(&desc->irq_data); 166 + 167 + if (chip->irq_disable && !irqd_irq_disabled(&desc->irq_data)) 168 + chip->irq_disable(&desc->irq_data); 169 + } 157 170 } 158 171 159 172 /* ··· 186 169 crash_smp_send_stop(); 187 170 188 171 crash_save_cpu(regs, smp_processor_id()); 172 + machine_kexec_mask_interrupts(); 173 + 189 174 pr_info("Starting crashdump kernel...\n"); 190 175 } 191 176 ··· 213 194 unsigned long fdt_addr = internal->fdt_addr; 214 195 void *control_code_buffer = page_address(image->control_code_page); 215 196 riscv_kexec_method kexec_method = NULL; 197 + 198 + #ifdef CONFIG_SMP 199 + WARN(smp_crash_stop_failed(), 200 + "Some CPUs may be stale, kdump will be unreliable.\n"); 201 + #endif 216 202 217 203 if (image->type != KEXEC_TYPE_CRASH) 218 204 kexec_method = control_code_buffer;
+5 -4
arch/riscv/kernel/setup.c
··· 322 322 323 323 void free_initmem(void) 324 324 { 325 - if (IS_ENABLED(CONFIG_STRICT_KERNEL_RWX)) 326 - set_kernel_memory(lm_alias(__init_begin), lm_alias(__init_end), 327 - IS_ENABLED(CONFIG_64BIT) ? 328 - set_memory_rw : set_memory_rw_nx); 325 + if (IS_ENABLED(CONFIG_STRICT_KERNEL_RWX)) { 326 + set_kernel_memory(lm_alias(__init_begin), lm_alias(__init_end), set_memory_rw_nx); 327 + if (IS_ENABLED(CONFIG_64BIT)) 328 + set_kernel_memory(__init_begin, __init_end, set_memory_nx); 329 + } 329 330 330 331 free_initmem_default(POISON_FREE_INITMEM); 331 332 }
+95 -2
arch/riscv/kernel/smp.c
··· 12 12 #include <linux/clockchips.h> 13 13 #include <linux/interrupt.h> 14 14 #include <linux/module.h> 15 + #include <linux/kexec.h> 15 16 #include <linux/profile.h> 16 17 #include <linux/smp.h> 17 18 #include <linux/sched.h> ··· 23 22 #include <asm/sbi.h> 24 23 #include <asm/tlbflush.h> 25 24 #include <asm/cacheflush.h> 25 + #include <asm/cpu_ops.h> 26 26 27 27 enum ipi_message_type { 28 28 IPI_RESCHEDULE, 29 29 IPI_CALL_FUNC, 30 30 IPI_CPU_STOP, 31 + IPI_CPU_CRASH_STOP, 31 32 IPI_IRQ_WORK, 32 33 IPI_TIMER, 33 34 IPI_MAX ··· 73 70 while (1) 74 71 wait_for_interrupt(); 75 72 } 73 + 74 + #ifdef CONFIG_KEXEC_CORE 75 + static atomic_t waiting_for_crash_ipi = ATOMIC_INIT(0); 76 + 77 + static inline void ipi_cpu_crash_stop(unsigned int cpu, struct pt_regs *regs) 78 + { 79 + crash_save_cpu(regs, cpu); 80 + 81 + atomic_dec(&waiting_for_crash_ipi); 82 + 83 + local_irq_disable(); 84 + 85 + #ifdef CONFIG_HOTPLUG_CPU 86 + if (cpu_has_hotplug(cpu)) 87 + cpu_ops[cpu]->cpu_stop(); 88 + #endif 89 + 90 + for(;;) 91 + wait_for_interrupt(); 92 + } 93 + #else 94 + static inline void ipi_cpu_crash_stop(unsigned int cpu, struct pt_regs *regs) 95 + { 96 + unreachable(); 97 + } 98 + #endif 76 99 77 100 static const struct riscv_ipi_ops *ipi_ops __ro_after_init; 78 101 ··· 153 124 154 125 void handle_IPI(struct pt_regs *regs) 155 126 { 156 - unsigned long *pending_ipis = &ipi_data[smp_processor_id()].bits; 157 - unsigned long *stats = ipi_data[smp_processor_id()].stats; 127 + unsigned int cpu = smp_processor_id(); 128 + unsigned long *pending_ipis = &ipi_data[cpu].bits; 129 + unsigned long *stats = ipi_data[cpu].stats; 158 130 159 131 riscv_clear_ipi(); 160 132 ··· 184 154 ipi_stop(); 185 155 } 186 156 157 + if (ops & (1 << IPI_CPU_CRASH_STOP)) { 158 + ipi_cpu_crash_stop(cpu, get_irq_regs()); 159 + } 160 + 187 161 if (ops & (1 << IPI_IRQ_WORK)) { 188 162 stats[IPI_IRQ_WORK]++; 189 163 irq_work_run(); ··· 210 176 [IPI_RESCHEDULE] = "Rescheduling interrupts", 211 177 [IPI_CALL_FUNC] = "Function call interrupts", 212 178 [IPI_CPU_STOP] = "CPU stop interrupts", 179 + [IPI_CPU_CRASH_STOP] = "CPU stop (for crash dump) interrupts", 213 180 [IPI_IRQ_WORK] = "IRQ work interrupts", 214 181 [IPI_TIMER] = "Timer broadcast interrupts", 215 182 }; ··· 269 234 pr_warn("SMP: failed to stop secondary CPUs %*pbl\n", 270 235 cpumask_pr_args(cpu_online_mask)); 271 236 } 237 + 238 + #ifdef CONFIG_KEXEC_CORE 239 + /* 240 + * The number of CPUs online, not counting this CPU (which may not be 241 + * fully online and so not counted in num_online_cpus()). 242 + */ 243 + static inline unsigned int num_other_online_cpus(void) 244 + { 245 + unsigned int this_cpu_online = cpu_online(smp_processor_id()); 246 + 247 + return num_online_cpus() - this_cpu_online; 248 + } 249 + 250 + void crash_smp_send_stop(void) 251 + { 252 + static int cpus_stopped; 253 + cpumask_t mask; 254 + unsigned long timeout; 255 + 256 + /* 257 + * This function can be called twice in panic path, but obviously 258 + * we execute this only once. 259 + */ 260 + if (cpus_stopped) 261 + return; 262 + 263 + cpus_stopped = 1; 264 + 265 + /* 266 + * If this cpu is the only one alive at this point in time, online or 267 + * not, there are no stop messages to be sent around, so just back out. 268 + */ 269 + if (num_other_online_cpus() == 0) 270 + return; 271 + 272 + cpumask_copy(&mask, cpu_online_mask); 273 + cpumask_clear_cpu(smp_processor_id(), &mask); 274 + 275 + atomic_set(&waiting_for_crash_ipi, num_other_online_cpus()); 276 + 277 + pr_crit("SMP: stopping secondary CPUs\n"); 278 + send_ipi_mask(&mask, IPI_CPU_CRASH_STOP); 279 + 280 + /* Wait up to one second for other CPUs to stop */ 281 + timeout = USEC_PER_SEC; 282 + while ((atomic_read(&waiting_for_crash_ipi) > 0) && timeout--) 283 + udelay(1); 284 + 285 + if (atomic_read(&waiting_for_crash_ipi) > 0) 286 + pr_warn("SMP: failed to stop secondary CPUs %*pbl\n", 287 + cpumask_pr_args(&mask)); 288 + } 289 + 290 + bool smp_crash_stop_failed(void) 291 + { 292 + return (atomic_read(&waiting_for_crash_ipi) > 0); 293 + } 294 + #endif 272 295 273 296 void smp_send_reschedule(int cpu) 274 297 {
+18
arch/riscv/kernel/traps.c
··· 221 221 OVERFLOW_STACK_SIZE; 222 222 } 223 223 224 + /* 225 + * A pseudo spinlock to protect the shadow stack from being used by multiple 226 + * harts concurrently. This isn't a real spinlock because the lock side must 227 + * be taken without a valid stack and only a single register, it's only taken 228 + * while in the process of panicing anyway so the performance and error 229 + * checking a proper spinlock gives us doesn't matter. 230 + */ 231 + unsigned long spin_shadow_stack; 232 + 224 233 asmlinkage void handle_bad_stack(struct pt_regs *regs) 225 234 { 226 235 unsigned long tsk_stk = (unsigned long)current->stack; 227 236 unsigned long ovf_stk = (unsigned long)this_cpu_ptr(overflow_stack); 237 + 238 + /* 239 + * We're done with the shadow stack by this point, as we're on the 240 + * overflow stack. Tell any other concurrent overflowing harts that 241 + * they can proceed with panicing by releasing the pseudo-spinlock. 242 + * 243 + * This pairs with an amoswap.aq in handle_kernel_stack_overflow. 244 + */ 245 + smp_store_release(&spin_shadow_stack, 0); 228 246 229 247 console_verbose(); 230 248
+1
arch/riscv/kernel/vdso/Makefile
··· 17 17 obj-vdso = $(patsubst %, %.o, $(vdso-syms)) note.o 18 18 19 19 ccflags-y := -fno-stack-protector 20 + ccflags-y += -DDISABLE_BRANCH_PROFILING 20 21 21 22 ifneq ($(c-gettimeofday-y),) 22 23 CFLAGS_vgettimeofday.o += -fPIC -include $(c-gettimeofday-y)