Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux
1
fork

Configure Feed

Select the types of activity you want to include in your feed.

Merge tag 'x86_cpu_for_6.19-rc1' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip

Pull x86 CPU feature updates from Dave Hansen:
"The biggest thing of note here is Linear Address Space Separation
(LASS). It represents the first time I can think of that the
upper=>kernel/lower=>user address space convention is actually
recognized by the hardware on x86. It ensures that userspace can not
even get the hardware to _start_ page walks for the kernel address
space. This, of course, is a really nice generic side channel defense.

This is really only a down payment on LASS support. There are still
some details to work out in its interaction with EFI calls and
vsyscall emulation. For now, LASS is disabled if either of those
features is compiled in (which is almost always the case).

There's also one straggler commit in here which converts an
under-utilized AMD CPU feature leaf into a generic Linux-defined leaf
so more feature can be packed in there.

Summary:

- Enable Linear Address Space Separation (LASS)

- Change X86_FEATURE leaf 17 from an AMD leaf to Linux-defined"

* tag 'x86_cpu_for_6.19-rc1' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip:
x86/cpu: Enable LASS during CPU initialization
selftests/x86: Update the negative vsyscall tests to expect a #GP
x86/traps: Communicate a LASS violation in #GP message
x86/kexec: Disable LASS during relocate kernel
x86/alternatives: Disable LASS when patching kernel code
x86/asm: Introduce inline memcpy and memset
x86/cpu: Add an LASS dependency on SMAP
x86/cpufeatures: Enumerate the LASS feature bits
x86/cpufeatures: Make X86_FEATURE leaf 17 Linux-specific

+177 -38
+4
arch/x86/Kconfig.cpufeatures
··· 124 124 def_bool y 125 125 depends on !X86_64 126 126 127 + config X86_DISABLED_FEATURE_LASS 128 + def_bool y 129 + depends on X86_32 130 + 127 131 config X86_DISABLED_FEATURE_PKU 128 132 def_bool y 129 133 depends on !X86_INTEL_MEMORY_PROTECTION_KEYS
+1 -1
arch/x86/include/asm/cpufeature.h
··· 30 30 CPUID_6_EAX, 31 31 CPUID_8000_000A_EDX, 32 32 CPUID_7_ECX, 33 - CPUID_8000_0007_EBX, 33 + CPUID_LNX_6, 34 34 CPUID_7_EDX, 35 35 CPUID_8000_001F_EAX, 36 36 CPUID_8000_0021_EAX,
+5 -1
arch/x86/include/asm/cpufeatures.h
··· 314 314 #define X86_FEATURE_SM4 (12*32+ 2) /* SM4 instructions */ 315 315 #define X86_FEATURE_AVX_VNNI (12*32+ 4) /* "avx_vnni" AVX VNNI instructions */ 316 316 #define X86_FEATURE_AVX512_BF16 (12*32+ 5) /* "avx512_bf16" AVX512 BFLOAT16 instructions */ 317 + #define X86_FEATURE_LASS (12*32+ 6) /* "lass" Linear Address Space Separation */ 317 318 #define X86_FEATURE_CMPCCXADD (12*32+ 7) /* CMPccXADD instructions */ 318 319 #define X86_FEATURE_ARCH_PERFMON_EXT (12*32+ 8) /* Intel Architectural PerfMon Extension */ 319 320 #define X86_FEATURE_FZRM (12*32+10) /* Fast zero-length REP MOVSB */ ··· 408 407 #define X86_FEATURE_ENQCMD (16*32+29) /* "enqcmd" ENQCMD and ENQCMDS instructions */ 409 408 #define X86_FEATURE_SGX_LC (16*32+30) /* "sgx_lc" Software Guard Extensions Launch Control */ 410 409 411 - /* AMD-defined CPU features, CPUID level 0x80000007 (EBX), word 17 */ 410 + /* 411 + * Linux-defined word for use with scattered/synthetic bits. 412 + */ 412 413 #define X86_FEATURE_OVERFLOW_RECOV (17*32+ 0) /* "overflow_recov" MCA overflow recovery support */ 413 414 #define X86_FEATURE_SUCCOR (17*32+ 1) /* "succor" Uncorrectable error containment and recovery */ 415 + 414 416 #define X86_FEATURE_SMCA (17*32+ 3) /* "smca" Scalable MCA */ 415 417 416 418 /* Intel-defined CPU features, CPUID level 0x00000007:0 (EDX), word 18 */
+39 -2
arch/x86/include/asm/smap.h
··· 23 23 24 24 #else /* __ASSEMBLER__ */ 25 25 26 + /* 27 + * The CLAC/STAC instructions toggle the enforcement of 28 + * X86_FEATURE_SMAP along with X86_FEATURE_LASS. 29 + * 30 + * SMAP enforcement is based on the _PAGE_BIT_USER bit in the page 31 + * tables. The kernel is not allowed to touch pages with that bit set 32 + * unless the AC bit is set. 33 + * 34 + * Use stac()/clac() when accessing userspace (_PAGE_USER) mappings, 35 + * regardless of location. 36 + * 37 + * Note: a barrier is implicit in alternative(). 38 + */ 39 + 26 40 static __always_inline void clac(void) 27 41 { 28 - /* Note: a barrier is implicit in alternative() */ 29 42 alternative("", "clac", X86_FEATURE_SMAP); 30 43 } 31 44 32 45 static __always_inline void stac(void) 33 46 { 34 - /* Note: a barrier is implicit in alternative() */ 35 47 alternative("", "stac", X86_FEATURE_SMAP); 48 + } 49 + 50 + /* 51 + * LASS enforcement is based on bit 63 of the virtual address. The 52 + * kernel is not allowed to touch memory in the lower half of the 53 + * virtual address space. 54 + * 55 + * Use lass_stac()/lass_clac() to toggle the AC bit for kernel data 56 + * accesses (!_PAGE_USER) that are blocked by LASS, but not by SMAP. 57 + * 58 + * Even with the AC bit set, LASS will continue to block instruction 59 + * fetches from the user half of the address space. To allow those, 60 + * clear CR4.LASS to disable the LASS mechanism entirely. 61 + * 62 + * Note: a barrier is implicit in alternative(). 63 + */ 64 + 65 + static __always_inline void lass_clac(void) 66 + { 67 + alternative("", "clac", X86_FEATURE_LASS); 68 + } 69 + 70 + static __always_inline void lass_stac(void) 71 + { 72 + alternative("", "stac", X86_FEATURE_LASS); 36 73 } 37 74 38 75 static __always_inline unsigned long smap_save(void)
+26
arch/x86/include/asm/string.h
··· 1 1 /* SPDX-License-Identifier: GPL-2.0 */ 2 + #ifndef _ASM_X86_STRING_H 3 + #define _ASM_X86_STRING_H 4 + 2 5 #ifdef CONFIG_X86_32 3 6 # include <asm/string_32.h> 4 7 #else 5 8 # include <asm/string_64.h> 6 9 #endif 10 + 11 + static __always_inline void *__inline_memcpy(void *to, const void *from, size_t len) 12 + { 13 + void *ret = to; 14 + 15 + asm volatile("rep movsb" 16 + : "+D" (to), "+S" (from), "+c" (len) 17 + : : "memory"); 18 + return ret; 19 + } 20 + 21 + static __always_inline void *__inline_memset(void *s, int v, size_t n) 22 + { 23 + void *ret = s; 24 + 25 + asm volatile("rep stosb" 26 + : "+D" (s), "+c" (n) 27 + : "a" ((uint8_t)v) 28 + : "memory"); 29 + return ret; 30 + } 31 + 32 + #endif /* _ASM_X86_STRING_H */
+2
arch/x86/include/uapi/asm/processor-flags.h
··· 136 136 #define X86_CR4_PKE _BITUL(X86_CR4_PKE_BIT) 137 137 #define X86_CR4_CET_BIT 23 /* enable Control-flow Enforcement Technology */ 138 138 #define X86_CR4_CET _BITUL(X86_CR4_CET_BIT) 139 + #define X86_CR4_LASS_BIT 27 /* enable Linear Address Space Separation support */ 140 + #define X86_CR4_LASS _BITUL(X86_CR4_LASS_BIT) 139 141 #define X86_CR4_LAM_SUP_BIT 28 /* LAM for supervisor pointers */ 140 142 #define X86_CR4_LAM_SUP _BITUL(X86_CR4_LAM_SUP_BIT) 141 143
+16 -2
arch/x86/kernel/alternative.c
··· 2453 2453 __ro_after_init struct mm_struct *text_poke_mm; 2454 2454 __ro_after_init unsigned long text_poke_mm_addr; 2455 2455 2456 + /* 2457 + * Text poking creates and uses a mapping in the lower half of the 2458 + * address space. Relax LASS enforcement when accessing the poking 2459 + * address. 2460 + * 2461 + * objtool enforces a strict policy of "no function calls within AC=1 2462 + * regions". Adhere to the policy by using inline versions of 2463 + * memcpy()/memset() that will never result in a function call. 2464 + */ 2465 + 2456 2466 static void text_poke_memcpy(void *dst, const void *src, size_t len) 2457 2467 { 2458 - memcpy(dst, src, len); 2468 + lass_stac(); 2469 + __inline_memcpy(dst, src, len); 2470 + lass_clac(); 2459 2471 } 2460 2472 2461 2473 static void text_poke_memset(void *dst, const void *src, size_t len) 2462 2474 { 2463 2475 int c = *(const int *)src; 2464 2476 2465 - memset(dst, c, len); 2477 + lass_stac(); 2478 + __inline_memset(dst, c, len); 2479 + lass_clac(); 2466 2480 } 2467 2481 2468 2482 typedef void text_poke_f(void *dst, const void *src, size_t len);
+25 -7
arch/x86/kernel/cpu/common.c
··· 406 406 cr4_clear_bits(X86_CR4_UMIP); 407 407 } 408 408 409 + static __always_inline void setup_lass(struct cpuinfo_x86 *c) 410 + { 411 + if (!cpu_feature_enabled(X86_FEATURE_LASS)) 412 + return; 413 + 414 + /* 415 + * Legacy vsyscall page access causes a #GP when LASS is active. 416 + * Disable LASS because the #GP handler doesn't support vsyscall 417 + * emulation. 418 + * 419 + * Also disable LASS when running under EFI, as some runtime and 420 + * boot services rely on 1:1 mappings in the lower half. 421 + */ 422 + if (IS_ENABLED(CONFIG_X86_VSYSCALL_EMULATION) || 423 + IS_ENABLED(CONFIG_EFI)) { 424 + setup_clear_cpu_cap(X86_FEATURE_LASS); 425 + return; 426 + } 427 + 428 + cr4_set_bits(X86_CR4_LASS); 429 + } 430 + 409 431 /* These bits should not change their value after CPU init is finished. */ 410 432 static const unsigned long cr4_pinned_mask = X86_CR4_SMEP | X86_CR4_SMAP | X86_CR4_UMIP | 411 433 X86_CR4_FSGSBASE | X86_CR4_CET | X86_CR4_FRED; ··· 1048 1026 c->x86_capability[CPUID_8000_0001_EDX] = edx; 1049 1027 } 1050 1028 1051 - if (c->extended_cpuid_level >= 0x80000007) { 1052 - cpuid(0x80000007, &eax, &ebx, &ecx, &edx); 1053 - 1054 - c->x86_capability[CPUID_8000_0007_EBX] = ebx; 1055 - c->x86_power = edx; 1056 - } 1029 + if (c->extended_cpuid_level >= 0x80000007) 1030 + c->x86_power = cpuid_edx(0x80000007); 1057 1031 1058 1032 if (c->extended_cpuid_level >= 0x80000008) { 1059 1033 cpuid(0x80000008, &eax, &ebx, &ecx, &edx); ··· 2034 2016 /* Disable the PN if appropriate */ 2035 2017 squash_the_stupid_serial_number(c); 2036 2018 2037 - /* Set up SMEP/SMAP/UMIP */ 2038 2019 setup_smep(c); 2039 2020 setup_smap(c); 2040 2021 setup_umip(c); 2022 + setup_lass(c); 2041 2023 2042 2024 /* Enable FSGSBASE instructions if available. */ 2043 2025 if (cpu_has(c, X86_FEATURE_FSGSBASE)) {
+1
arch/x86/kernel/cpu/cpuid-deps.c
··· 91 91 { X86_FEATURE_SHSTK, X86_FEATURE_XSAVES }, 92 92 { X86_FEATURE_FRED, X86_FEATURE_LKGS }, 93 93 { X86_FEATURE_SPEC_CTRL_SSBD, X86_FEATURE_SPEC_CTRL }, 94 + { X86_FEATURE_LASS, X86_FEATURE_SMAP }, 94 95 {} 95 96 }; 96 97
+3
arch/x86/kernel/cpu/scattered.c
··· 45 45 { X86_FEATURE_SGX2, CPUID_EAX, 1, 0x00000012, 0 }, 46 46 { X86_FEATURE_SGX_EUPDATESVN, CPUID_EAX, 10, 0x00000012, 0 }, 47 47 { X86_FEATURE_SGX_EDECCSSA, CPUID_EAX, 11, 0x00000012, 0 }, 48 + { X86_FEATURE_OVERFLOW_RECOV, CPUID_EBX, 0, 0x80000007, 0 }, 49 + { X86_FEATURE_SUCCOR, CPUID_EBX, 1, 0x80000007, 0 }, 50 + { X86_FEATURE_SMCA, CPUID_EBX, 3, 0x80000007, 0 }, 48 51 { X86_FEATURE_HW_PSTATE, CPUID_EDX, 7, 0x80000007, 0 }, 49 52 { X86_FEATURE_CPB, CPUID_EDX, 9, 0x80000007, 0 }, 50 53 { X86_FEATURE_PROC_FEEDBACK, CPUID_EDX, 11, 0x80000007, 0 },
+5 -2
arch/x86/kernel/relocate_kernel_64.S
··· 95 95 /* Leave CR4 in %r13 to enable the right paging mode later. */ 96 96 movq %cr4, %r13 97 97 98 - /* Disable global pages immediately to ensure this mapping is RWX */ 98 + /* 99 + * Disable global pages immediately to ensure this mapping is RWX. 100 + * Disable LASS before jumping to the identity mapped page. 101 + */ 99 102 movq %r13, %r12 100 - andq $~(X86_CR4_PGE), %r12 103 + andq $~(X86_CR4_PGE | X86_CR4_LASS), %r12 101 104 movq %r12, %cr4 102 105 103 106 /* Save %rsp and CRs. */
+34 -12
arch/x86/kernel/traps.c
··· 732 732 enum kernel_gp_hint { 733 733 GP_NO_HINT, 734 734 GP_NON_CANONICAL, 735 - GP_CANONICAL 735 + GP_CANONICAL, 736 + GP_LASS_VIOLATION, 737 + GP_NULL_POINTER, 738 + }; 739 + 740 + static const char * const kernel_gp_hint_help[] = { 741 + [GP_NON_CANONICAL] = "probably for non-canonical address", 742 + [GP_CANONICAL] = "maybe for address", 743 + [GP_LASS_VIOLATION] = "probably LASS violation for address", 744 + [GP_NULL_POINTER] = "kernel NULL pointer dereference", 736 745 }; 737 746 738 747 /* 739 748 * When an uncaught #GP occurs, try to determine the memory address accessed by 740 749 * the instruction and return that address to the caller. Also, try to figure 741 - * out whether any part of the access to that address was non-canonical. 750 + * out whether any part of the access to that address was non-canonical or 751 + * across privilege levels. 742 752 */ 743 753 static enum kernel_gp_hint get_kernel_gp_address(struct pt_regs *regs, 744 754 unsigned long *addr) ··· 770 760 return GP_NO_HINT; 771 761 772 762 #ifdef CONFIG_X86_64 773 - /* 774 - * Check that: 775 - * - the operand is not in the kernel half 776 - * - the last byte of the operand is not in the user canonical half 777 - */ 778 - if (*addr < ~__VIRTUAL_MASK && 779 - *addr + insn.opnd_bytes - 1 > __VIRTUAL_MASK) 763 + /* Operand is in the kernel half */ 764 + if (*addr >= ~__VIRTUAL_MASK) 765 + return GP_CANONICAL; 766 + 767 + /* The last byte of the operand is not in the user canonical half */ 768 + if (*addr + insn.opnd_bytes - 1 > __VIRTUAL_MASK) 780 769 return GP_NON_CANONICAL; 770 + 771 + /* 772 + * A NULL pointer dereference usually causes a #PF. However, it 773 + * can result in a #GP when LASS is active. Provide the same 774 + * hint in the rare case that the condition is hit without LASS. 775 + */ 776 + if (*addr < PAGE_SIZE) 777 + return GP_NULL_POINTER; 778 + 779 + /* 780 + * Assume that LASS caused the exception, because the address is 781 + * canonical and in the user half. 782 + */ 783 + if (cpu_feature_enabled(X86_FEATURE_LASS)) 784 + return GP_LASS_VIOLATION; 781 785 #endif 782 786 783 787 return GP_CANONICAL; ··· 954 930 955 931 if (hint != GP_NO_HINT) 956 932 snprintf(desc, sizeof(desc), GPFSTR ", %s 0x%lx", 957 - (hint == GP_NON_CANONICAL) ? "probably for non-canonical address" 958 - : "maybe for address", 959 - gp_addr); 933 + kernel_gp_hint_help[hint], gp_addr); 960 934 961 935 /* 962 936 * KASAN is interested only in the non-canonical case, clear it
-1
arch/x86/kvm/reverse_cpuid.h
··· 78 78 [CPUID_6_EAX] = { 6, 0, CPUID_EAX}, 79 79 [CPUID_8000_000A_EDX] = {0x8000000a, 0, CPUID_EDX}, 80 80 [CPUID_7_ECX] = { 7, 0, CPUID_ECX}, 81 - [CPUID_8000_0007_EBX] = {0x80000007, 0, CPUID_EBX}, 82 81 [CPUID_7_EDX] = { 7, 0, CPUID_EDX}, 83 82 [CPUID_7_1_EAX] = { 7, 1, CPUID_EAX}, 84 83 [CPUID_12_EAX] = {0x00000012, 0, CPUID_EAX},
+4 -1
tools/arch/x86/include/asm/cpufeatures.h
··· 407 407 #define X86_FEATURE_ENQCMD (16*32+29) /* "enqcmd" ENQCMD and ENQCMDS instructions */ 408 408 #define X86_FEATURE_SGX_LC (16*32+30) /* "sgx_lc" Software Guard Extensions Launch Control */ 409 409 410 - /* AMD-defined CPU features, CPUID level 0x80000007 (EBX), word 17 */ 410 + /* 411 + * Linux-defined word for use with scattered/synthetic bits. 412 + */ 411 413 #define X86_FEATURE_OVERFLOW_RECOV (17*32+ 0) /* "overflow_recov" MCA overflow recovery support */ 412 414 #define X86_FEATURE_SUCCOR (17*32+ 1) /* "succor" Uncorrectable error containment and recovery */ 415 + 413 416 #define X86_FEATURE_SMCA (17*32+ 3) /* "smca" Scalable MCA */ 414 417 415 418 /* Intel-defined CPU features, CPUID level 0x00000007:0 (EDX), word 18 */
+12 -9
tools/testing/selftests/x86/test_vsyscall.c
··· 308 308 #ifdef __x86_64__ 309 309 310 310 static jmp_buf jmpbuf; 311 - static volatile unsigned long segv_err; 311 + static volatile unsigned long segv_err, segv_trapno; 312 312 313 313 static void sigsegv(int sig, siginfo_t *info, void *ctx_void) 314 314 { 315 315 ucontext_t *ctx = (ucontext_t *)ctx_void; 316 316 317 + segv_trapno = ctx->uc_mcontext.gregs[REG_TRAPNO]; 317 318 segv_err = ctx->uc_mcontext.gregs[REG_ERR]; 318 319 siglongjmp(jmpbuf, 1); 319 320 } ··· 337 336 else if (can_read) 338 337 ksft_test_result_pass("We have read access\n"); 339 338 else 340 - ksft_test_result_pass("We do not have read access: #PF(0x%lx)\n", segv_err); 339 + ksft_test_result_pass("We do not have read access (trap=%ld, error=0x%lx)\n", 340 + segv_trapno, segv_err); 341 341 } 342 342 343 343 static void test_vsys_x(void) ··· 349 347 return; 350 348 } 351 349 352 - ksft_print_msg("Make sure that vsyscalls really page fault\n"); 350 + ksft_print_msg("Make sure that vsyscalls really cause a fault\n"); 353 351 354 352 bool can_exec; 355 353 if (sigsetjmp(jmpbuf, 1) == 0) { ··· 360 358 } 361 359 362 360 if (can_exec) 363 - ksft_test_result_fail("Executing the vsyscall did not page fault\n"); 364 - else if (segv_err & (1 << 4)) /* INSTR */ 365 - ksft_test_result_pass("Executing the vsyscall page failed: #PF(0x%lx)\n", 366 - segv_err); 361 + ksft_test_result_fail("Executing the vsyscall did not fault\n"); 362 + /* #GP or #PF (with X86_PF_INSTR) */ 363 + else if ((segv_trapno == 13) || ((segv_trapno == 14) && (segv_err & (1 << 4)))) 364 + ksft_test_result_pass("Executing the vsyscall page failed (trap=%ld, error=0x%lx)\n", 365 + segv_trapno, segv_err); 367 366 else 368 - ksft_test_result_fail("Execution failed with the wrong error: #PF(0x%lx)\n", 369 - segv_err); 367 + ksft_test_result_fail("Execution failed with the wrong error (trap=%ld, error=0x%lx)\n", 368 + segv_trapno, segv_err); 370 369 } 371 370 372 371 /*