Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux
1
fork

Configure Feed

Select the types of activity you want to include in your feed.

Merge branch 'x86-urgent-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip

Pull x86 fixes from Thomas Gleixner:

- the high latency PIT detection fix, which slipped through the cracks
for rc1

- a regression fix for the early printk mechanism

- the x86 part to plug irq/vector related hotplug races

- move the allocation of the espfix pages on cpu hotplug to non atomic
context. The current code triggers a might_sleep() warning.

- a series of KASAN fixes addressing boot crashes and usability

- a trivial typo fix for Kconfig help text

* 'x86-urgent-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip:
x86/kconfig: Fix typo in the CONFIG_CMDLINE_BOOL help text
x86/irq: Retrieve irq data after locking irq_desc
x86/irq: Use proper locking in check_irq_vectors_for_cpu_disable()
x86/irq: Plug irq vector hotplug race
x86/earlyprintk: Allow early_printk() to use console style parameters like '115200n8'
x86/espfix: Init espfix on the boot CPU side
x86/espfix: Add 'cpu' parameter to init_espfix_ap()
x86/kasan: Move KASAN_SHADOW_OFFSET to the arch Kconfig
x86/kasan: Add message about KASAN being initialized
x86/kasan: Fix boot crash on AMD processors
x86/kasan: Flush TLBs after switching CR3
x86/kasan: Fix KASAN shadow region page tables
x86/init: Clear 'init_level4_pgt' earlier
x86/tsc: Let high latency PIT fail fast in quick_pit_calibrate()

+116 -91
+6 -1
arch/x86/Kconfig
··· 254 254 config ARCH_SUPPORTS_DEBUG_PAGEALLOC 255 255 def_bool y 256 256 257 + config KASAN_SHADOW_OFFSET 258 + hex 259 + depends on KASAN 260 + default 0xdffffc0000000000 261 + 257 262 config HAVE_INTEL_TXT 258 263 def_bool y 259 264 depends on INTEL_IOMMU && ACPI ··· 2020 2015 2021 2016 To compile command line arguments into the kernel, 2022 2017 set this option to 'Y', then fill in the 2023 - the boot arguments in CONFIG_CMDLINE. 2018 + boot arguments in CONFIG_CMDLINE. 2024 2019 2025 2020 Systems with fully functional boot loaders (i.e. non-embedded) 2026 2021 should leave this option set to 'N'.
+1 -1
arch/x86/include/asm/espfix.h
··· 9 9 DECLARE_PER_CPU_READ_MOSTLY(unsigned long, espfix_waddr); 10 10 11 11 extern void init_espfix_bsp(void); 12 - extern void init_espfix_ap(void); 12 + extern void init_espfix_ap(int cpu); 13 13 14 14 #endif /* CONFIG_X86_64 */ 15 15
+2 -6
arch/x86/include/asm/kasan.h
··· 14 14 15 15 #ifndef __ASSEMBLY__ 16 16 17 - extern pte_t kasan_zero_pte[]; 18 - extern pte_t kasan_zero_pmd[]; 19 - extern pte_t kasan_zero_pud[]; 20 - 21 17 #ifdef CONFIG_KASAN 22 - void __init kasan_map_early_shadow(pgd_t *pgd); 18 + void __init kasan_early_init(void); 23 19 void __init kasan_init(void); 24 20 #else 25 - static inline void kasan_map_early_shadow(pgd_t *pgd) { } 21 + static inline void kasan_early_init(void) { } 26 22 static inline void kasan_init(void) { } 27 23 #endif 28 24
+2 -8
arch/x86/kernel/apic/vector.c
··· 409 409 int irq, vector; 410 410 struct apic_chip_data *data; 411 411 412 - /* 413 - * vector_lock will make sure that we don't run into irq vector 414 - * assignments that might be happening on another cpu in parallel, 415 - * while we setup our initial vector to irq mappings. 416 - */ 417 - raw_spin_lock(&vector_lock); 418 412 /* Mark the inuse vectors */ 419 413 for_each_active_irq(irq) { 420 414 data = apic_chip_data(irq_get_irq_data(irq)); ··· 430 436 if (!cpumask_test_cpu(cpu, data->domain)) 431 437 per_cpu(vector_irq, cpu)[vector] = VECTOR_UNDEFINED; 432 438 } 433 - raw_spin_unlock(&vector_lock); 434 439 } 435 440 436 441 /* 437 - * Setup the vector to irq mappings. 442 + * Setup the vector to irq mappings. Must be called with vector_lock held. 438 443 */ 439 444 void setup_vector_irq(int cpu) 440 445 { 441 446 int irq; 442 447 448 + lockdep_assert_held(&vector_lock); 443 449 /* 444 450 * On most of the platforms, legacy PIC delivers the interrupts on the 445 451 * boot cpu. But there are certain platforms where PIC interrupts are
+3 -1
arch/x86/kernel/early_printk.c
··· 175 175 } 176 176 177 177 if (*s) { 178 - if (kstrtoul(s, 0, &baud) < 0 || baud == 0) 178 + baud = simple_strtoull(s, &e, 0); 179 + 180 + if (baud == 0 || s == e) 179 181 baud = DEFAULT_BAUD; 180 182 } 181 183
+16 -12
arch/x86/kernel/espfix_64.c
··· 131 131 init_espfix_random(); 132 132 133 133 /* The rest is the same as for any other processor */ 134 - init_espfix_ap(); 134 + init_espfix_ap(0); 135 135 } 136 136 137 - void init_espfix_ap(void) 137 + void init_espfix_ap(int cpu) 138 138 { 139 - unsigned int cpu, page; 139 + unsigned int page; 140 140 unsigned long addr; 141 141 pud_t pud, *pud_p; 142 142 pmd_t pmd, *pmd_p; 143 143 pte_t pte, *pte_p; 144 - int n; 144 + int n, node; 145 145 void *stack_page; 146 146 pteval_t ptemask; 147 147 148 148 /* We only have to do this once... */ 149 - if (likely(this_cpu_read(espfix_stack))) 149 + if (likely(per_cpu(espfix_stack, cpu))) 150 150 return; /* Already initialized */ 151 151 152 - cpu = smp_processor_id(); 153 152 addr = espfix_base_addr(cpu); 154 153 page = cpu/ESPFIX_STACKS_PER_PAGE; 155 154 ··· 164 165 if (stack_page) 165 166 goto unlock_done; 166 167 168 + node = cpu_to_node(cpu); 167 169 ptemask = __supported_pte_mask; 168 170 169 171 pud_p = &espfix_pud_page[pud_index(addr)]; 170 172 pud = *pud_p; 171 173 if (!pud_present(pud)) { 172 - pmd_p = (pmd_t *)__get_free_page(PGALLOC_GFP); 174 + struct page *page = alloc_pages_node(node, PGALLOC_GFP, 0); 175 + 176 + pmd_p = (pmd_t *)page_address(page); 173 177 pud = __pud(__pa(pmd_p) | (PGTABLE_PROT & ptemask)); 174 178 paravirt_alloc_pmd(&init_mm, __pa(pmd_p) >> PAGE_SHIFT); 175 179 for (n = 0; n < ESPFIX_PUD_CLONES; n++) ··· 182 180 pmd_p = pmd_offset(&pud, addr); 183 181 pmd = *pmd_p; 184 182 if (!pmd_present(pmd)) { 185 - pte_p = (pte_t *)__get_free_page(PGALLOC_GFP); 183 + struct page *page = alloc_pages_node(node, PGALLOC_GFP, 0); 184 + 185 + pte_p = (pte_t *)page_address(page); 186 186 pmd = __pmd(__pa(pte_p) | (PGTABLE_PROT & ptemask)); 187 187 paravirt_alloc_pte(&init_mm, __pa(pte_p) >> PAGE_SHIFT); 188 188 for (n = 0; n < ESPFIX_PMD_CLONES; n++) ··· 192 188 } 193 189 194 190 pte_p = pte_offset_kernel(&pmd, addr); 195 - stack_page = (void *)__get_free_page(GFP_KERNEL); 191 + stack_page = page_address(alloc_pages_node(node, GFP_KERNEL, 0)); 196 192 pte = __pte(__pa(stack_page) | (__PAGE_KERNEL_RO & ptemask)); 197 193 for (n = 0; n < ESPFIX_PTE_CLONES; n++) 198 194 set_pte(&pte_p[n*PTE_STRIDE], pte); ··· 203 199 unlock_done: 204 200 mutex_unlock(&espfix_init_mutex); 205 201 done: 206 - this_cpu_write(espfix_stack, addr); 207 - this_cpu_write(espfix_waddr, (unsigned long)stack_page 208 - + (addr & ~PAGE_MASK)); 202 + per_cpu(espfix_stack, cpu) = addr; 203 + per_cpu(espfix_waddr, cpu) = (unsigned long)stack_page 204 + + (addr & ~PAGE_MASK); 209 205 }
+4 -6
arch/x86/kernel/head64.c
··· 161 161 /* Kill off the identity-map trampoline */ 162 162 reset_early_page_tables(); 163 163 164 - kasan_map_early_shadow(early_level4_pgt); 165 - 166 - /* clear bss before set_intr_gate with early_idt_handler */ 167 164 clear_bss(); 165 + 166 + clear_page(init_level4_pgt); 167 + 168 + kasan_early_init(); 168 169 169 170 for (i = 0; i < NUM_EXCEPTION_VECTORS; i++) 170 171 set_intr_gate(i, early_idt_handler_array[i]); ··· 178 177 */ 179 178 load_ucode_bsp(); 180 179 181 - clear_page(init_level4_pgt); 182 180 /* set init_level4_pgt kernel high mapping*/ 183 181 init_level4_pgt[511] = early_level4_pgt[511]; 184 - 185 - kasan_map_early_shadow(init_level4_pgt); 186 182 187 183 x86_64_start_reservations(real_mode_data); 188 184 }
-29
arch/x86/kernel/head_64.S
··· 516 516 /* This must match the first entry in level2_kernel_pgt */ 517 517 .quad 0x0000000000000000 518 518 519 - #ifdef CONFIG_KASAN 520 - #define FILL(VAL, COUNT) \ 521 - .rept (COUNT) ; \ 522 - .quad (VAL) ; \ 523 - .endr 524 - 525 - NEXT_PAGE(kasan_zero_pte) 526 - FILL(kasan_zero_page - __START_KERNEL_map + _KERNPG_TABLE, 512) 527 - NEXT_PAGE(kasan_zero_pmd) 528 - FILL(kasan_zero_pte - __START_KERNEL_map + _KERNPG_TABLE, 512) 529 - NEXT_PAGE(kasan_zero_pud) 530 - FILL(kasan_zero_pmd - __START_KERNEL_map + _KERNPG_TABLE, 512) 531 - 532 - #undef FILL 533 - #endif 534 - 535 - 536 519 #include "../../x86/xen/xen-head.S" 537 520 538 521 __PAGE_ALIGNED_BSS 539 522 NEXT_PAGE(empty_zero_page) 540 523 .skip PAGE_SIZE 541 524 542 - #ifdef CONFIG_KASAN 543 - /* 544 - * This page used as early shadow. We don't use empty_zero_page 545 - * at early stages, stack instrumentation could write some garbage 546 - * to this page. 547 - * Latter we reuse it as zero shadow for large ranges of memory 548 - * that allowed to access, but not instrumented by kasan 549 - * (vmalloc/vmemmap ...). 550 - */ 551 - NEXT_PAGE(kasan_zero_page) 552 - .skip PAGE_SIZE 553 - #endif
+18 -2
arch/x86/kernel/irq.c
··· 347 347 if (!desc) 348 348 continue; 349 349 350 + /* 351 + * Protect against concurrent action removal, 352 + * affinity changes etc. 353 + */ 354 + raw_spin_lock(&desc->lock); 350 355 data = irq_desc_get_irq_data(desc); 351 356 cpumask_copy(&affinity_new, data->affinity); 352 357 cpumask_clear_cpu(this_cpu, &affinity_new); 353 358 354 359 /* Do not count inactive or per-cpu irqs. */ 355 - if (!irq_has_action(irq) || irqd_is_per_cpu(data)) 360 + if (!irq_has_action(irq) || irqd_is_per_cpu(data)) { 361 + raw_spin_unlock(&desc->lock); 356 362 continue; 363 + } 357 364 365 + raw_spin_unlock(&desc->lock); 358 366 /* 359 367 * A single irq may be mapped to multiple 360 368 * cpu's vector_irq[] (for example IOAPIC cluster ··· 393 385 * vector. If the vector is marked in the used vectors 394 386 * bitmap or an irq is assigned to it, we don't count 395 387 * it as available. 388 + * 389 + * As this is an inaccurate snapshot anyway, we can do 390 + * this w/o holding vector_lock. 396 391 */ 397 392 for (vector = FIRST_EXTERNAL_VECTOR; 398 393 vector < first_system_vector; vector++) { ··· 497 486 */ 498 487 mdelay(1); 499 488 489 + /* 490 + * We can walk the vector array of this cpu without holding 491 + * vector_lock because the cpu is already marked !online, so 492 + * nothing else will touch it. 493 + */ 500 494 for (vector = FIRST_EXTERNAL_VECTOR; vector < NR_VECTORS; vector++) { 501 495 unsigned int irr; 502 496 ··· 513 497 irq = __this_cpu_read(vector_irq[vector]); 514 498 515 499 desc = irq_to_desc(irq); 500 + raw_spin_lock(&desc->lock); 516 501 data = irq_desc_get_irq_data(desc); 517 502 chip = irq_data_get_irq_chip(data); 518 - raw_spin_lock(&desc->lock); 519 503 if (chip->irq_retrigger) { 520 504 chip->irq_retrigger(data); 521 505 __this_cpu_write(vector_irq[vector], VECTOR_RETRIGGERED);
+12 -15
arch/x86/kernel/smpboot.c
··· 171 171 apic_ap_setup(); 172 172 173 173 /* 174 - * Need to setup vector mappings before we enable interrupts. 175 - */ 176 - setup_vector_irq(smp_processor_id()); 177 - 178 - /* 179 174 * Save our processor parameters. Note: this information 180 175 * is needed for clock calibration. 181 176 */ ··· 234 239 check_tsc_sync_target(); 235 240 236 241 /* 237 - * Enable the espfix hack for this CPU 238 - */ 239 - #ifdef CONFIG_X86_ESPFIX64 240 - init_espfix_ap(); 241 - #endif 242 - 243 - /* 244 - * We need to hold vector_lock so there the set of online cpus 245 - * does not change while we are assigning vectors to cpus. Holding 246 - * this lock ensures we don't half assign or remove an irq from a cpu. 242 + * Lock vector_lock and initialize the vectors on this cpu 243 + * before setting the cpu online. We must set it online with 244 + * vector_lock held to prevent a concurrent setup/teardown 245 + * from seeing a half valid vector space. 247 246 */ 248 247 lock_vector_lock(); 248 + setup_vector_irq(smp_processor_id()); 249 249 set_cpu_online(smp_processor_id(), true); 250 250 unlock_vector_lock(); 251 251 cpu_set_state_online(smp_processor_id()); ··· 843 853 early_gdt_descr.address = (unsigned long)get_cpu_gdt_table(cpu); 844 854 initial_code = (unsigned long)start_secondary; 845 855 stack_start = idle->thread.sp; 856 + 857 + /* 858 + * Enable the espfix hack for this CPU 859 + */ 860 + #ifdef CONFIG_X86_ESPFIX64 861 + init_espfix_ap(cpu); 862 + #endif 846 863 847 864 /* So we see what's up */ 848 865 announce_cpu(cpu, apicid);
+10 -1
arch/x86/kernel/tsc.c
··· 598 598 if (!pit_expect_msb(0xff-i, &delta, &d2)) 599 599 break; 600 600 601 + delta -= tsc; 602 + 603 + /* 604 + * Extrapolate the error and fail fast if the error will 605 + * never be below 500 ppm. 606 + */ 607 + if (i == 1 && 608 + d1 + d2 >= (delta * MAX_QUICK_PIT_ITERATIONS) >> 11) 609 + return 0; 610 + 601 611 /* 602 612 * Iterate until the error is less than 500 ppm 603 613 */ 604 - delta -= tsc; 605 614 if (d1+d2 >= delta >> 11) 606 615 continue; 607 616
+42 -5
arch/x86/mm/kasan_init_64.c
··· 1 + #define pr_fmt(fmt) "kasan: " fmt 1 2 #include <linux/bootmem.h> 2 3 #include <linux/kasan.h> 3 4 #include <linux/kdebug.h> ··· 12 11 extern pgd_t early_level4_pgt[PTRS_PER_PGD]; 13 12 extern struct range pfn_mapped[E820_X_MAX]; 14 13 15 - extern unsigned char kasan_zero_page[PAGE_SIZE]; 14 + static pud_t kasan_zero_pud[PTRS_PER_PUD] __page_aligned_bss; 15 + static pmd_t kasan_zero_pmd[PTRS_PER_PMD] __page_aligned_bss; 16 + static pte_t kasan_zero_pte[PTRS_PER_PTE] __page_aligned_bss; 17 + 18 + /* 19 + * This page used as early shadow. We don't use empty_zero_page 20 + * at early stages, stack instrumentation could write some garbage 21 + * to this page. 22 + * Latter we reuse it as zero shadow for large ranges of memory 23 + * that allowed to access, but not instrumented by kasan 24 + * (vmalloc/vmemmap ...). 25 + */ 26 + static unsigned char kasan_zero_page[PAGE_SIZE] __page_aligned_bss; 16 27 17 28 static int __init map_range(struct range *range) 18 29 { ··· 49 36 pgd_clear(pgd_offset_k(start)); 50 37 } 51 38 52 - void __init kasan_map_early_shadow(pgd_t *pgd) 39 + static void __init kasan_map_early_shadow(pgd_t *pgd) 53 40 { 54 41 int i; 55 42 unsigned long start = KASAN_SHADOW_START; ··· 86 73 while (IS_ALIGNED(addr, PMD_SIZE) && addr + PMD_SIZE <= end) { 87 74 WARN_ON(!pmd_none(*pmd)); 88 75 set_pmd(pmd, __pmd(__pa_nodebug(kasan_zero_pte) 89 - | __PAGE_KERNEL_RO)); 76 + | _KERNPG_TABLE)); 90 77 addr += PMD_SIZE; 91 78 pmd = pmd_offset(pud, addr); 92 79 } ··· 112 99 while (IS_ALIGNED(addr, PUD_SIZE) && addr + PUD_SIZE <= end) { 113 100 WARN_ON(!pud_none(*pud)); 114 101 set_pud(pud, __pud(__pa_nodebug(kasan_zero_pmd) 115 - | __PAGE_KERNEL_RO)); 102 + | _KERNPG_TABLE)); 116 103 addr += PUD_SIZE; 117 104 pud = pud_offset(pgd, addr); 118 105 } ··· 137 124 while (IS_ALIGNED(addr, PGDIR_SIZE) && addr + PGDIR_SIZE <= end) { 138 125 WARN_ON(!pgd_none(*pgd)); 139 126 set_pgd(pgd, __pgd(__pa_nodebug(kasan_zero_pud) 140 - | __PAGE_KERNEL_RO)); 127 + | _KERNPG_TABLE)); 141 128 addr += PGDIR_SIZE; 142 129 pgd = pgd_offset_k(addr); 143 130 } ··· 179 166 }; 180 167 #endif 181 168 169 + void __init kasan_early_init(void) 170 + { 171 + int i; 172 + pteval_t pte_val = __pa_nodebug(kasan_zero_page) | __PAGE_KERNEL; 173 + pmdval_t pmd_val = __pa_nodebug(kasan_zero_pte) | _KERNPG_TABLE; 174 + pudval_t pud_val = __pa_nodebug(kasan_zero_pmd) | _KERNPG_TABLE; 175 + 176 + for (i = 0; i < PTRS_PER_PTE; i++) 177 + kasan_zero_pte[i] = __pte(pte_val); 178 + 179 + for (i = 0; i < PTRS_PER_PMD; i++) 180 + kasan_zero_pmd[i] = __pmd(pmd_val); 181 + 182 + for (i = 0; i < PTRS_PER_PUD; i++) 183 + kasan_zero_pud[i] = __pud(pud_val); 184 + 185 + kasan_map_early_shadow(early_level4_pgt); 186 + kasan_map_early_shadow(init_level4_pgt); 187 + } 188 + 182 189 void __init kasan_init(void) 183 190 { 184 191 int i; ··· 209 176 210 177 memcpy(early_level4_pgt, init_level4_pgt, sizeof(early_level4_pgt)); 211 178 load_cr3(early_level4_pgt); 179 + __flush_tlb_all(); 212 180 213 181 clear_pgds(KASAN_SHADOW_START, KASAN_SHADOW_END); 214 182 ··· 236 202 memset(kasan_zero_page, 0, PAGE_SIZE); 237 203 238 204 load_cr3(init_level4_pgt); 205 + __flush_tlb_all(); 239 206 init_task.kasan_depth = 0; 207 + 208 + pr_info("Kernel address sanitizer initialized\n"); 240 209 }
-4
lib/Kconfig.kasan
··· 18 18 For better error detection enable CONFIG_STACKTRACE, 19 19 and add slub_debug=U to boot cmdline. 20 20 21 - config KASAN_SHADOW_OFFSET 22 - hex 23 - default 0xdffffc0000000000 if X86_64 24 - 25 21 choice 26 22 prompt "Instrumentation type" 27 23 depends on KASAN