Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux
1
fork

Configure Feed

Select the types of activity you want to include in your feed.

Merge branch 'x86-urgent-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip

Pull x86 fixes from Ingo Molnar:
"Misc fixes:

- fix the s2ram regression related to confusion around segment
register restoration, plus related cleanups that make the code more
robust

- a guess-unwinder Kconfig dependency fix

- an isoimage build target fix for certain tool chain combinations

- instruction decoder opcode map fixes+updates, and the syncing of
the kernel decoder headers to the objtool headers

- a kmmio tracing fix

- two 5-level paging related fixes

- a topology enumeration fix on certain SMP systems"

* 'x86-urgent-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip:
objtool: Resync objtool's instruction decoder source code copy with the kernel's latest version
x86/decoder: Fix and update the opcodes map
x86/power: Make restore_processor_context() sane
x86/power/32: Move SYSENTER MSR restoration to fix_processor_context()
x86/power/64: Use struct desc_ptr for the IDT in struct saved_context
x86/unwinder/guess: Prevent using CONFIG_UNWINDER_GUESS=y with CONFIG_STACKDEPOT=y
x86/build: Don't verify mtools configuration file for isoimage
x86/mm/kmmio: Fix mmiotrace for page unaligned addresses
x86/boot/compressed/64: Print error if 5-level paging is not supported
x86/boot/compressed/64: Detect and handle 5-level paging at boot-time
x86/smpboot: Do not use smp_num_siblings in __max_logical_packages calculation

+174 -81
+1
arch/x86/Kconfig.debug
··· 400 400 config UNWINDER_GUESS 401 401 bool "Guess unwinder" 402 402 depends on EXPERT 403 + depends on !STACKDEPOT 403 404 ---help--- 404 405 This option enables the "guess" unwinder for unwinding kernel stack 405 406 traces. It scans the stack and reports every kernel text address it
+1
arch/x86/boot/compressed/Makefile
··· 80 80 ifdef CONFIG_X86_64 81 81 vmlinux-objs-$(CONFIG_RANDOMIZE_BASE) += $(obj)/pagetable.o 82 82 vmlinux-objs-y += $(obj)/mem_encrypt.o 83 + vmlinux-objs-y += $(obj)/pgtable_64.o 83 84 endif 84 85 85 86 $(obj)/eboot.o: KBUILD_CFLAGS += -fshort-wchar -mno-red-zone
+12 -4
arch/x86/boot/compressed/head_64.S
··· 305 305 leaq boot_stack_end(%rbx), %rsp 306 306 307 307 #ifdef CONFIG_X86_5LEVEL 308 - /* Check if 5-level paging has already enabled */ 309 - movq %cr4, %rax 310 - testl $X86_CR4_LA57, %eax 311 - jnz lvl5 308 + /* 309 + * Check if we need to enable 5-level paging. 310 + * RSI holds real mode data and need to be preserved across 311 + * a function call. 312 + */ 313 + pushq %rsi 314 + call l5_paging_required 315 + popq %rsi 316 + 317 + /* If l5_paging_required() returned zero, we're done here. */ 318 + cmpq $0, %rax 319 + je lvl5 312 320 313 321 /* 314 322 * At this point we are in long mode with 4-level paging enabled,
+16
arch/x86/boot/compressed/misc.c
··· 169 169 } 170 170 } 171 171 172 + static bool l5_supported(void) 173 + { 174 + /* Check if leaf 7 is supported. */ 175 + if (native_cpuid_eax(0) < 7) 176 + return 0; 177 + 178 + /* Check if la57 is supported. */ 179 + return native_cpuid_ecx(7) & (1 << (X86_FEATURE_LA57 & 31)); 180 + } 181 + 172 182 #if CONFIG_X86_NEED_RELOCS 173 183 static void handle_relocations(void *output, unsigned long output_len, 174 184 unsigned long virt_addr) ··· 371 361 372 362 console_init(); 373 363 debug_putstr("early console in extract_kernel\n"); 364 + 365 + if (IS_ENABLED(CONFIG_X86_5LEVEL) && !l5_supported()) { 366 + error("This linux kernel as configured requires 5-level paging\n" 367 + "This CPU does not support the required 'cr4.la57' feature\n" 368 + "Unable to boot - please use a kernel appropriate for your CPU\n"); 369 + } 374 370 375 371 free_mem_ptr = heap; /* Heap */ 376 372 free_mem_end_ptr = heap + BOOT_HEAP_SIZE;
+28
arch/x86/boot/compressed/pgtable_64.c
··· 1 + #include <asm/processor.h> 2 + 3 + /* 4 + * __force_order is used by special_insns.h asm code to force instruction 5 + * serialization. 6 + * 7 + * It is not referenced from the code, but GCC < 5 with -fPIE would fail 8 + * due to an undefined symbol. Define it to make these ancient GCCs work. 9 + */ 10 + unsigned long __force_order; 11 + 12 + int l5_paging_required(void) 13 + { 14 + /* Check if leaf 7 is supported. */ 15 + 16 + if (native_cpuid_eax(0) < 7) 17 + return 0; 18 + 19 + /* Check if la57 is supported. */ 20 + if (!(native_cpuid_ecx(7) & (1 << (X86_FEATURE_LA57 & 31)))) 21 + return 0; 22 + 23 + /* Check if 5-level paging has already been enabled. */ 24 + if (native_read_cr4() & X86_CR4_LA57) 25 + return 0; 26 + 27 + return 1; 28 + }
+3 -1
arch/x86/boot/genimage.sh
··· 44 44 45 45 # Make sure the files actually exist 46 46 verify "$FBZIMAGE" 47 - verify "$MTOOLSRC" 48 47 49 48 genbzdisk() { 49 + verify "$MTOOLSRC" 50 50 mformat a: 51 51 syslinux $FIMAGE 52 52 echo "$KCMDLINE" | mcopy - a:syslinux.cfg ··· 57 57 } 58 58 59 59 genfdimage144() { 60 + verify "$MTOOLSRC" 60 61 dd if=/dev/zero of=$FIMAGE bs=1024 count=1440 2> /dev/null 61 62 mformat v: 62 63 syslinux $FIMAGE ··· 69 68 } 70 69 71 70 genfdimage288() { 71 + verify "$MTOOLSRC" 72 72 dd if=/dev/zero of=$FIMAGE bs=1024 count=2880 2> /dev/null 73 73 mformat w: 74 74 syslinux $FIMAGE
+7 -1
arch/x86/include/asm/suspend_32.h
··· 12 12 13 13 /* image of the saved processor state */ 14 14 struct saved_context { 15 - u16 es, fs, gs, ss; 15 + /* 16 + * On x86_32, all segment registers, with the possible exception of 17 + * gs, are saved at kernel entry in pt_regs. 18 + */ 19 + #ifdef CONFIG_X86_32_LAZY_GS 20 + u16 gs; 21 + #endif 16 22 unsigned long cr0, cr2, cr3, cr4; 17 23 u64 misc_enable; 18 24 bool misc_enable_saved;
+15 -4
arch/x86/include/asm/suspend_64.h
··· 20 20 */ 21 21 struct saved_context { 22 22 struct pt_regs regs; 23 - u16 ds, es, fs, gs, ss; 24 - unsigned long gs_base, gs_kernel_base, fs_base; 23 + 24 + /* 25 + * User CS and SS are saved in current_pt_regs(). The rest of the 26 + * segment selectors need to be saved and restored here. 27 + */ 28 + u16 ds, es, fs, gs; 29 + 30 + /* 31 + * Usermode FSBASE and GSBASE may not match the fs and gs selectors, 32 + * so we save them separately. We save the kernelmode GSBASE to 33 + * restore percpu access after resume. 34 + */ 35 + unsigned long kernelmode_gs_base, usermode_gs_base, fs_base; 36 + 25 37 unsigned long cr0, cr2, cr3, cr4, cr8; 26 38 u64 misc_enable; 27 39 bool misc_enable_saved; ··· 42 30 u16 gdt_pad; /* Unused */ 43 31 struct desc_ptr gdt_desc; 44 32 u16 idt_pad; 45 - u16 idt_limit; 46 - unsigned long idt_base; 33 + struct desc_ptr idt; 47 34 u16 ldt; 48 35 u16 tss; 49 36 unsigned long tr;
+2 -2
arch/x86/kernel/smpboot.c
··· 106 106 static unsigned int logical_packages __read_mostly; 107 107 108 108 /* Maximum number of SMT threads on any online core */ 109 - int __max_smt_threads __read_mostly; 109 + int __read_mostly __max_smt_threads = 1; 110 110 111 111 /* Flag to indicate if a complete sched domain rebuild is required */ 112 112 bool x86_topology_update; ··· 1304 1304 * Today neither Intel nor AMD support heterogenous systems so 1305 1305 * extrapolate the boot cpu's data to all packages. 1306 1306 */ 1307 - ncpus = cpu_data(0).booted_cores * smp_num_siblings; 1307 + ncpus = cpu_data(0).booted_cores * topology_max_smt_threads(); 1308 1308 __max_logical_packages = DIV_ROUND_UP(nr_cpu_ids, ncpus); 1309 1309 pr_info("Max logical packages: %u\n", __max_logical_packages); 1310 1310
+11 -2
arch/x86/lib/x86-opcode-map.txt
··· 607 607 fc: paddb Pq,Qq | vpaddb Vx,Hx,Wx (66),(v1) 608 608 fd: paddw Pq,Qq | vpaddw Vx,Hx,Wx (66),(v1) 609 609 fe: paddd Pq,Qq | vpaddd Vx,Hx,Wx (66),(v1) 610 - ff: 610 + ff: UD0 611 611 EndTable 612 612 613 613 Table: 3-byte opcode 1 (0x0f 0x38) ··· 717 717 7e: vpermt2d/q Vx,Hx,Wx (66),(ev) 718 718 7f: vpermt2ps/d Vx,Hx,Wx (66),(ev) 719 719 80: INVEPT Gy,Mdq (66) 720 - 81: INVPID Gy,Mdq (66) 720 + 81: INVVPID Gy,Mdq (66) 721 721 82: INVPCID Gy,Mdq (66) 722 722 83: vpmultishiftqb Vx,Hx,Wx (66),(ev) 723 723 88: vexpandps/d Vpd,Wpd (66),(ev) ··· 970 970 EndTable 971 971 972 972 GrpTable: Grp10 973 + # all are UD1 974 + 0: UD1 975 + 1: UD1 976 + 2: UD1 977 + 3: UD1 978 + 4: UD1 979 + 5: UD1 980 + 6: UD1 981 + 7: UD1 973 982 EndTable 974 983 975 984 # Grp11A and Grp11B are expressed as Grp11 in Intel SDM
+2 -2
arch/x86/mm/ioremap.c
··· 404 404 return; 405 405 } 406 406 407 + mmiotrace_iounmap(addr); 408 + 407 409 addr = (volatile void __iomem *) 408 410 (PAGE_MASK & (unsigned long __force)addr); 409 - 410 - mmiotrace_iounmap(addr); 411 411 412 412 /* Use the vm area unlocked, assuming the caller 413 413 ensures there isn't another iounmap for the same address
+7 -5
arch/x86/mm/kmmio.c
··· 435 435 unsigned long flags; 436 436 int ret = 0; 437 437 unsigned long size = 0; 438 + unsigned long addr = p->addr & PAGE_MASK; 438 439 const unsigned long size_lim = p->len + (p->addr & ~PAGE_MASK); 439 440 unsigned int l; 440 441 pte_t *pte; 441 442 442 443 spin_lock_irqsave(&kmmio_lock, flags); 443 - if (get_kmmio_probe(p->addr)) { 444 + if (get_kmmio_probe(addr)) { 444 445 ret = -EEXIST; 445 446 goto out; 446 447 } 447 448 448 - pte = lookup_address(p->addr, &l); 449 + pte = lookup_address(addr, &l); 449 450 if (!pte) { 450 451 ret = -EINVAL; 451 452 goto out; ··· 455 454 kmmio_count++; 456 455 list_add_rcu(&p->list, &kmmio_probes); 457 456 while (size < size_lim) { 458 - if (add_kmmio_fault_page(p->addr + size)) 457 + if (add_kmmio_fault_page(addr + size)) 459 458 pr_err("Unable to set page fault.\n"); 460 459 size += page_level_size(l); 461 460 } ··· 529 528 { 530 529 unsigned long flags; 531 530 unsigned long size = 0; 531 + unsigned long addr = p->addr & PAGE_MASK; 532 532 const unsigned long size_lim = p->len + (p->addr & ~PAGE_MASK); 533 533 struct kmmio_fault_page *release_list = NULL; 534 534 struct kmmio_delayed_release *drelease; 535 535 unsigned int l; 536 536 pte_t *pte; 537 537 538 - pte = lookup_address(p->addr, &l); 538 + pte = lookup_address(addr, &l); 539 539 if (!pte) 540 540 return; 541 541 542 542 spin_lock_irqsave(&kmmio_lock, flags); 543 543 while (size < size_lim) { 544 - release_kmmio_fault_page(p->addr + size, &release_list); 544 + release_kmmio_fault_page(addr + size, &release_list); 545 545 size += page_level_size(l); 546 546 } 547 547 list_del_rcu(&p->list);
+46 -55
arch/x86/power/cpu.c
··· 82 82 /* 83 83 * descriptor tables 84 84 */ 85 - #ifdef CONFIG_X86_32 86 85 store_idt(&ctxt->idt); 87 - #else 88 - /* CONFIG_X86_64 */ 89 - store_idt((struct desc_ptr *)&ctxt->idt_limit); 90 - #endif 86 + 91 87 /* 92 88 * We save it here, but restore it only in the hibernate case. 93 89 * For ACPI S3 resume, this is loaded via 'early_gdt_desc' in 64-bit ··· 99 103 /* 100 104 * segment registers 101 105 */ 102 - #ifdef CONFIG_X86_32 103 - savesegment(es, ctxt->es); 104 - savesegment(fs, ctxt->fs); 106 + #ifdef CONFIG_X86_32_LAZY_GS 105 107 savesegment(gs, ctxt->gs); 106 - savesegment(ss, ctxt->ss); 107 - #else 108 - /* CONFIG_X86_64 */ 109 - asm volatile ("movw %%ds, %0" : "=m" (ctxt->ds)); 110 - asm volatile ("movw %%es, %0" : "=m" (ctxt->es)); 111 - asm volatile ("movw %%fs, %0" : "=m" (ctxt->fs)); 112 - asm volatile ("movw %%gs, %0" : "=m" (ctxt->gs)); 113 - asm volatile ("movw %%ss, %0" : "=m" (ctxt->ss)); 108 + #endif 109 + #ifdef CONFIG_X86_64 110 + savesegment(gs, ctxt->gs); 111 + savesegment(fs, ctxt->fs); 112 + savesegment(ds, ctxt->ds); 113 + savesegment(es, ctxt->es); 114 114 115 115 rdmsrl(MSR_FS_BASE, ctxt->fs_base); 116 - rdmsrl(MSR_GS_BASE, ctxt->gs_base); 117 - rdmsrl(MSR_KERNEL_GS_BASE, ctxt->gs_kernel_base); 116 + rdmsrl(MSR_GS_BASE, ctxt->kernelmode_gs_base); 117 + rdmsrl(MSR_KERNEL_GS_BASE, ctxt->usermode_gs_base); 118 118 mtrr_save_fixed_ranges(NULL); 119 119 120 120 rdmsrl(MSR_EFER, ctxt->efer); ··· 170 178 write_gdt_entry(desc, GDT_ENTRY_TSS, &tss, DESC_TSS); 171 179 172 180 syscall_init(); /* This sets MSR_*STAR and related */ 181 + #else 182 + if (boot_cpu_has(X86_FEATURE_SEP)) 183 + enable_sep_cpu(); 173 184 #endif 174 185 load_TR_desc(); /* This does ltr */ 175 186 load_mm_ldt(current->active_mm); /* This does lldt */ ··· 185 190 } 186 191 187 192 /** 188 - * __restore_processor_state - restore the contents of CPU registers saved 189 - * by __save_processor_state() 190 - * @ctxt - structure to load the registers contents from 193 + * __restore_processor_state - restore the contents of CPU registers saved 194 + * by __save_processor_state() 195 + * @ctxt - structure to load the registers contents from 196 + * 197 + * The asm code that gets us here will have restored a usable GDT, although 198 + * it will be pointing to the wrong alias. 191 199 */ 192 200 static void notrace __restore_processor_state(struct saved_context *ctxt) 193 201 { ··· 213 215 write_cr2(ctxt->cr2); 214 216 write_cr0(ctxt->cr0); 215 217 216 - /* 217 - * now restore the descriptor tables to their proper values 218 - * ltr is done i fix_processor_context(). 219 - */ 220 - #ifdef CONFIG_X86_32 218 + /* Restore the IDT. */ 221 219 load_idt(&ctxt->idt); 222 - #else 223 - /* CONFIG_X86_64 */ 224 - load_idt((const struct desc_ptr *)&ctxt->idt_limit); 225 - #endif 226 220 227 - #ifdef CONFIG_X86_64 228 221 /* 229 - * We need GSBASE restored before percpu access can work. 230 - * percpu access can happen in exception handlers or in complicated 231 - * helpers like load_gs_index(). 222 + * Just in case the asm code got us here with the SS, DS, or ES 223 + * out of sync with the GDT, update them. 232 224 */ 233 - wrmsrl(MSR_GS_BASE, ctxt->gs_base); 225 + loadsegment(ss, __KERNEL_DS); 226 + loadsegment(ds, __USER_DS); 227 + loadsegment(es, __USER_DS); 228 + 229 + /* 230 + * Restore percpu access. Percpu access can happen in exception 231 + * handlers or in complicated helpers like load_gs_index(). 232 + */ 233 + #ifdef CONFIG_X86_64 234 + wrmsrl(MSR_GS_BASE, ctxt->kernelmode_gs_base); 235 + #else 236 + loadsegment(fs, __KERNEL_PERCPU); 237 + loadsegment(gs, __KERNEL_STACK_CANARY); 234 238 #endif 235 239 240 + /* Restore the TSS, RO GDT, LDT, and usermode-relevant MSRs. */ 236 241 fix_processor_context(); 237 242 238 243 /* 239 - * Restore segment registers. This happens after restoring the GDT 240 - * and LDT, which happen in fix_processor_context(). 244 + * Now that we have descriptor tables fully restored and working 245 + * exception handling, restore the usermode segments. 241 246 */ 242 - #ifdef CONFIG_X86_32 247 + #ifdef CONFIG_X86_64 248 + loadsegment(ds, ctxt->es); 243 249 loadsegment(es, ctxt->es); 244 250 loadsegment(fs, ctxt->fs); 245 - loadsegment(gs, ctxt->gs); 246 - loadsegment(ss, ctxt->ss); 247 - 248 - /* 249 - * sysenter MSRs 250 - */ 251 - if (boot_cpu_has(X86_FEATURE_SEP)) 252 - enable_sep_cpu(); 253 - #else 254 - /* CONFIG_X86_64 */ 255 - asm volatile ("movw %0, %%ds" :: "r" (ctxt->ds)); 256 - asm volatile ("movw %0, %%es" :: "r" (ctxt->es)); 257 - asm volatile ("movw %0, %%fs" :: "r" (ctxt->fs)); 258 251 load_gs_index(ctxt->gs); 259 - asm volatile ("movw %0, %%ss" :: "r" (ctxt->ss)); 260 252 261 253 /* 262 - * Restore FSBASE and user GSBASE after reloading the respective 263 - * segment selectors. 254 + * Restore FSBASE and GSBASE after restoring the selectors, since 255 + * restoring the selectors clobbers the bases. Keep in mind 256 + * that MSR_KERNEL_GS_BASE is horribly misnamed. 264 257 */ 265 258 wrmsrl(MSR_FS_BASE, ctxt->fs_base); 266 - wrmsrl(MSR_KERNEL_GS_BASE, ctxt->gs_kernel_base); 259 + wrmsrl(MSR_KERNEL_GS_BASE, ctxt->usermode_gs_base); 260 + #elif defined(CONFIG_X86_32_LAZY_GS) 261 + loadsegment(gs, ctxt->gs); 267 262 #endif 268 263 269 264 do_fpu_end();
+12 -3
tools/objtool/arch/x86/lib/x86-opcode-map.txt
··· 607 607 fc: paddb Pq,Qq | vpaddb Vx,Hx,Wx (66),(v1) 608 608 fd: paddw Pq,Qq | vpaddw Vx,Hx,Wx (66),(v1) 609 609 fe: paddd Pq,Qq | vpaddd Vx,Hx,Wx (66),(v1) 610 - ff: 610 + ff: UD0 611 611 EndTable 612 612 613 613 Table: 3-byte opcode 1 (0x0f 0x38) ··· 717 717 7e: vpermt2d/q Vx,Hx,Wx (66),(ev) 718 718 7f: vpermt2ps/d Vx,Hx,Wx (66),(ev) 719 719 80: INVEPT Gy,Mdq (66) 720 - 81: INVPID Gy,Mdq (66) 720 + 81: INVVPID Gy,Mdq (66) 721 721 82: INVPCID Gy,Mdq (66) 722 722 83: vpmultishiftqb Vx,Hx,Wx (66),(ev) 723 723 88: vexpandps/d Vpd,Wpd (66),(ev) ··· 896 896 897 897 GrpTable: Grp3_1 898 898 0: TEST Eb,Ib 899 - 1: 899 + 1: TEST Eb,Ib 900 900 2: NOT Eb 901 901 3: NEG Eb 902 902 4: MUL AL,Eb ··· 970 970 EndTable 971 971 972 972 GrpTable: Grp10 973 + # all are UD1 974 + 0: UD1 975 + 1: UD1 976 + 2: UD1 977 + 3: UD1 978 + 4: UD1 979 + 5: UD1 980 + 6: UD1 981 + 7: UD1 973 982 EndTable 974 983 975 984 # Grp11A and Grp11B are expressed as Grp11 in Intel SDM
+11 -2
tools/perf/util/intel-pt-decoder/x86-opcode-map.txt
··· 607 607 fc: paddb Pq,Qq | vpaddb Vx,Hx,Wx (66),(v1) 608 608 fd: paddw Pq,Qq | vpaddw Vx,Hx,Wx (66),(v1) 609 609 fe: paddd Pq,Qq | vpaddd Vx,Hx,Wx (66),(v1) 610 - ff: 610 + ff: UD0 611 611 EndTable 612 612 613 613 Table: 3-byte opcode 1 (0x0f 0x38) ··· 717 717 7e: vpermt2d/q Vx,Hx,Wx (66),(ev) 718 718 7f: vpermt2ps/d Vx,Hx,Wx (66),(ev) 719 719 80: INVEPT Gy,Mdq (66) 720 - 81: INVPID Gy,Mdq (66) 720 + 81: INVVPID Gy,Mdq (66) 721 721 82: INVPCID Gy,Mdq (66) 722 722 83: vpmultishiftqb Vx,Hx,Wx (66),(ev) 723 723 88: vexpandps/d Vpd,Wpd (66),(ev) ··· 970 970 EndTable 971 971 972 972 GrpTable: Grp10 973 + # all are UD1 974 + 0: UD1 975 + 1: UD1 976 + 2: UD1 977 + 3: UD1 978 + 4: UD1 979 + 5: UD1 980 + 6: UD1 981 + 7: UD1 973 982 EndTable 974 983 975 984 # Grp11A and Grp11B are expressed as Grp11 in Intel SDM