Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux
1
fork

Configure Feed

Select the types of activity you want to include in your feed.

Merge branch 'x86-pti-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip

Pull x86 pti fixes from Thomas Gleixner:
"A set of updates for the x86/pti related code:

- Preserve r8-r11 in int $0x80. r8-r11 need to be preserved, but the
int$80 entry code removed that quite some time ago. Make it correct
again.

- A set of fixes for the Global Bit work which went into 4.17 and
caused a bunch of interesting regressions:

- Triggering a BUG in the page attribute code due to a missing
check for early boot stage

- Warnings in the page attribute code about holes in the kernel
text mapping which are caused by the freeing of the init code.
Handle such holes gracefully.

- Reduce the amount of kernel memory which is set global to the
actual text and do not incidentally overlap with data.

- Disable the global bit when RANDSTRUCT is enabled as it
partially defeats the hardening.

- Make the page protection setup correct for vma->page_prot
population again. The adjustment of the protections fell through
the crack during the Global bit rework and triggers warnings on
machines which do not support certain features, e.g. NX"

* 'x86-pti-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip:
x86/entry/64/compat: Preserve r8-r11 in int $0x80
x86/pti: Filter at vma->vm_page_prot population
x86/pti: Disallow global kernel text with RANDSTRUCT
x86/pti: Reduce amount of kernel text allowed to be Global
x86/pti: Fix boot warning from Global-bit setting
x86/pti: Fix boot problems from Global-bit setting

+103 -38
+4
arch/x86/Kconfig
··· 52 52 select ARCH_HAS_DEVMEM_IS_ALLOWED 53 53 select ARCH_HAS_ELF_RANDOMIZE 54 54 select ARCH_HAS_FAST_MULTIPLIER 55 + select ARCH_HAS_FILTER_PGPROT 55 56 select ARCH_HAS_FORTIFY_SOURCE 56 57 select ARCH_HAS_GCOV_PROFILE_ALL 57 58 select ARCH_HAS_KCOV if X86_64 ··· 272 271 def_bool y 273 272 274 273 config ARCH_HAS_CACHE_LINE_SIZE 274 + def_bool y 275 + 276 + config ARCH_HAS_FILTER_PGPROT 275 277 def_bool y 276 278 277 279 config HAVE_SETUP_PER_CPU_AREA
+4 -4
arch/x86/entry/entry_64_compat.S
··· 84 84 pushq %rdx /* pt_regs->dx */ 85 85 pushq %rcx /* pt_regs->cx */ 86 86 pushq $-ENOSYS /* pt_regs->ax */ 87 - pushq $0 /* pt_regs->r8 = 0 */ 87 + pushq %r8 /* pt_regs->r8 */ 88 88 xorl %r8d, %r8d /* nospec r8 */ 89 - pushq $0 /* pt_regs->r9 = 0 */ 89 + pushq %r9 /* pt_regs->r9 */ 90 90 xorl %r9d, %r9d /* nospec r9 */ 91 - pushq $0 /* pt_regs->r10 = 0 */ 91 + pushq %r10 /* pt_regs->r10 */ 92 92 xorl %r10d, %r10d /* nospec r10 */ 93 - pushq $0 /* pt_regs->r11 = 0 */ 93 + pushq %r11 /* pt_regs->r11 */ 94 94 xorl %r11d, %r11d /* nospec r11 */ 95 95 pushq %rbx /* pt_regs->rbx */ 96 96 xorl %ebx, %ebx /* nospec rbx */
+5
arch/x86/include/asm/pgtable.h
··· 601 601 602 602 #define canon_pgprot(p) __pgprot(massage_pgprot(p)) 603 603 604 + static inline pgprot_t arch_filter_pgprot(pgprot_t prot) 605 + { 606 + return canon_pgprot(prot); 607 + } 608 + 604 609 static inline int is_new_memtype_allowed(u64 paddr, unsigned long size, 605 610 enum page_cache_mode pcm, 606 611 enum page_cache_mode new_pcm)
+36 -16
arch/x86/mm/pageattr.c
··· 93 93 static inline void split_page_count(int level) { } 94 94 #endif 95 95 96 + static inline int 97 + within(unsigned long addr, unsigned long start, unsigned long end) 98 + { 99 + return addr >= start && addr < end; 100 + } 101 + 102 + static inline int 103 + within_inclusive(unsigned long addr, unsigned long start, unsigned long end) 104 + { 105 + return addr >= start && addr <= end; 106 + } 107 + 96 108 #ifdef CONFIG_X86_64 97 109 98 110 static inline unsigned long highmap_start_pfn(void) ··· 118 106 return __pa_symbol(roundup(_brk_end, PMD_SIZE) - 1) >> PAGE_SHIFT; 119 107 } 120 108 109 + static bool __cpa_pfn_in_highmap(unsigned long pfn) 110 + { 111 + /* 112 + * Kernel text has an alias mapping at a high address, known 113 + * here as "highmap". 114 + */ 115 + return within_inclusive(pfn, highmap_start_pfn(), highmap_end_pfn()); 116 + } 117 + 118 + #else 119 + 120 + static bool __cpa_pfn_in_highmap(unsigned long pfn) 121 + { 122 + /* There is no highmap on 32-bit */ 123 + return false; 124 + } 125 + 121 126 #endif 122 - 123 - static inline int 124 - within(unsigned long addr, unsigned long start, unsigned long end) 125 - { 126 - return addr >= start && addr < end; 127 - } 128 - 129 - static inline int 130 - within_inclusive(unsigned long addr, unsigned long start, unsigned long end) 131 - { 132 - return addr >= start && addr <= end; 133 - } 134 127 135 128 /* 136 129 * Flushing functions ··· 189 172 190 173 static void cpa_flush_all(unsigned long cache) 191 174 { 192 - BUG_ON(irqs_disabled()); 175 + BUG_ON(irqs_disabled() && !early_boot_irqs_disabled); 193 176 194 177 on_each_cpu(__cpa_flush_all, (void *) cache, 1); 195 178 } ··· 253 236 unsigned long do_wbinvd = cache && numpages >= 1024; /* 4M threshold */ 254 237 #endif 255 238 256 - BUG_ON(irqs_disabled()); 239 + BUG_ON(irqs_disabled() && !early_boot_irqs_disabled); 257 240 258 241 on_each_cpu(__cpa_flush_all, (void *) do_wbinvd, 1); 259 242 ··· 1200 1183 cpa->numpages = 1; 1201 1184 cpa->pfn = __pa(vaddr) >> PAGE_SHIFT; 1202 1185 return 0; 1186 + 1187 + } else if (__cpa_pfn_in_highmap(cpa->pfn)) { 1188 + /* Faults in the highmap are OK, so do not warn: */ 1189 + return -EFAULT; 1203 1190 } else { 1204 1191 WARN(1, KERN_WARNING "CPA: called for zero pte. " 1205 1192 "vaddr = %lx cpa->vaddr = %lx\n", vaddr, ··· 1356 1335 * to touch the high mapped kernel as well: 1357 1336 */ 1358 1337 if (!within(vaddr, (unsigned long)_text, _brk_end) && 1359 - within_inclusive(cpa->pfn, highmap_start_pfn(), 1360 - highmap_end_pfn())) { 1338 + __cpa_pfn_in_highmap(cpa->pfn)) { 1361 1339 unsigned long temp_cpa_vaddr = (cpa->pfn << PAGE_SHIFT) + 1362 1340 __START_KERNEL_map - phys_base; 1363 1341 alias_cpa = *cpa;
+23 -3
arch/x86/mm/pti.c
··· 421 421 if (boot_cpu_has(X86_FEATURE_K8)) 422 422 return false; 423 423 424 + /* 425 + * RANDSTRUCT derives its hardening benefits from the 426 + * attacker's lack of knowledge about the layout of kernel 427 + * data structures. Keep the kernel image non-global in 428 + * cases where RANDSTRUCT is in use to help keep the layout a 429 + * secret. 430 + */ 431 + if (IS_ENABLED(CONFIG_GCC_PLUGIN_RANDSTRUCT)) 432 + return false; 433 + 424 434 return true; 425 435 } 426 436 ··· 440 430 */ 441 431 void pti_clone_kernel_text(void) 442 432 { 433 + /* 434 + * rodata is part of the kernel image and is normally 435 + * readable on the filesystem or on the web. But, do not 436 + * clone the areas past rodata, they might contain secrets. 437 + */ 443 438 unsigned long start = PFN_ALIGN(_text); 444 - unsigned long end = ALIGN((unsigned long)_end, PMD_PAGE_SIZE); 439 + unsigned long end = (unsigned long)__end_rodata_hpage_align; 445 440 446 441 if (!pti_kernel_image_global_ok()) 447 442 return; 448 443 444 + pr_debug("mapping partial kernel image into user address space\n"); 445 + 446 + /* 447 + * Note that this will undo _some_ of the work that 448 + * pti_set_kernel_image_nonglobal() did to clear the 449 + * global bit. 450 + */ 449 451 pti_clone_pmds(start, end, _PAGE_RW); 450 452 } 451 453 ··· 479 457 480 458 if (pti_kernel_image_global_ok()) 481 459 return; 482 - 483 - pr_debug("set kernel image non-global\n"); 484 460 485 461 set_memory_nonglobal(start, (end - start) >> PAGE_SHIFT); 486 462 }
+10 -1
mm/mmap.c
··· 100 100 __S000, __S001, __S010, __S011, __S100, __S101, __S110, __S111 101 101 }; 102 102 103 + #ifndef CONFIG_ARCH_HAS_FILTER_PGPROT 104 + static inline pgprot_t arch_filter_pgprot(pgprot_t prot) 105 + { 106 + return prot; 107 + } 108 + #endif 109 + 103 110 pgprot_t vm_get_page_prot(unsigned long vm_flags) 104 111 { 105 - return __pgprot(pgprot_val(protection_map[vm_flags & 112 + pgprot_t ret = __pgprot(pgprot_val(protection_map[vm_flags & 106 113 (VM_READ|VM_WRITE|VM_EXEC|VM_SHARED)]) | 107 114 pgprot_val(arch_vm_get_page_prot(vm_flags))); 115 + 116 + return arch_filter_pgprot(ret); 108 117 } 109 118 EXPORT_SYMBOL(vm_get_page_prot); 110 119
+21 -14
tools/testing/selftests/x86/test_syscall_vdso.c
··· 100 100 " shl $32, %r8\n" 101 101 " orq $0x7f7f7f7f, %r8\n" 102 102 " movq %r8, %r9\n" 103 - " movq %r8, %r10\n" 104 - " movq %r8, %r11\n" 105 - " movq %r8, %r12\n" 106 - " movq %r8, %r13\n" 107 - " movq %r8, %r14\n" 108 - " movq %r8, %r15\n" 103 + " incq %r9\n" 104 + " movq %r9, %r10\n" 105 + " incq %r10\n" 106 + " movq %r10, %r11\n" 107 + " incq %r11\n" 108 + " movq %r11, %r12\n" 109 + " incq %r12\n" 110 + " movq %r12, %r13\n" 111 + " incq %r13\n" 112 + " movq %r13, %r14\n" 113 + " incq %r14\n" 114 + " movq %r14, %r15\n" 115 + " incq %r15\n" 109 116 " ret\n" 110 117 " .code32\n" 111 118 " .popsection\n" ··· 135 128 int err = 0; 136 129 int num = 8; 137 130 uint64_t *r64 = &regs64.r8; 131 + uint64_t expected = 0x7f7f7f7f7f7f7f7fULL; 138 132 139 133 if (!kernel_is_64bit) 140 134 return 0; 141 135 142 136 do { 143 - if (*r64 == 0x7f7f7f7f7f7f7f7fULL) 137 + if (*r64 == expected++) 144 138 continue; /* register did not change */ 145 139 if (syscall_addr != (long)&int80) { 146 140 /* ··· 155 147 continue; 156 148 } 157 149 } else { 158 - /* INT80 syscall entrypoint can be used by 150 + /* 151 + * INT80 syscall entrypoint can be used by 159 152 * 64-bit programs too, unlike SYSCALL/SYSENTER. 160 153 * Therefore it must preserve R12+ 161 154 * (they are callee-saved registers in 64-bit C ABI). 162 155 * 163 - * This was probably historically not intended, 164 - * but R8..11 are clobbered (cleared to 0). 165 - * IOW: they are the only registers which aren't 166 - * preserved across INT80 syscall. 156 + * Starting in Linux 4.17 (and any kernel that 157 + * backports the change), R8..11 are preserved. 158 + * Historically (and probably unintentionally), they 159 + * were clobbered or zeroed. 167 160 */ 168 - if (*r64 == 0 && num <= 11) 169 - continue; 170 161 } 171 162 printf("[FAIL]\tR%d has changed:%016llx\n", num, *r64); 172 163 err++;