Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux
1
fork

Configure Feed

Select the types of activity you want to include in your feed.

Merge tag 'x86_urgent_for_v5.12_rc3' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip

Pull x86 fixes from Borislav Petkov:

- A couple of SEV-ES fixes and robustifications: verify usermode stack
pointer in NMI is not coming from the syscall gap, correctly track
IRQ states in the #VC handler and access user insn bytes atomically
in same handler as latter cannot sleep.

- Balance 32-bit fast syscall exit path to do the proper work on exit
and thus not confuse audit and ptrace frameworks.

- Two fixes for the ORC unwinder going "off the rails" into KASAN
redzones and when ORC data is missing.

* tag 'x86_urgent_for_v5.12_rc3' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip:
x86/sev-es: Use __copy_from_user_inatomic()
x86/sev-es: Correctly track IRQ states in runtime #VC handler
x86/sev-es: Check regs->sp is trusted before adjusting #VC IST stack
x86/sev-es: Introduce ip_within_syscall_gap() helper
x86/entry: Fix entry/exit mismatch on failed fast 32-bit syscalls
x86/unwind/orc: Silence warnings caused by missing ORC data
x86/unwind/orc: Disable KASAN checking in the ORC unwinder, part 2

+99 -29
+2 -1
arch/x86/entry/common.c
··· 128 128 regs->ax = -EFAULT; 129 129 130 130 instrumentation_end(); 131 - syscall_exit_to_user_mode(regs); 131 + local_irq_disable(); 132 + irqentry_exit_to_user_mode(regs); 132 133 return false; 133 134 } 134 135
+2
arch/x86/entry/entry_64_compat.S
··· 210 210 /* Switch to the kernel stack */ 211 211 movq PER_CPU_VAR(cpu_current_top_of_stack), %rsp 212 212 213 + SYM_INNER_LABEL(entry_SYSCALL_compat_safe_stack, SYM_L_GLOBAL) 214 + 213 215 /* Construct struct pt_regs on stack */ 214 216 pushq $__USER32_DS /* pt_regs->ss */ 215 217 pushq %r8 /* pt_regs->sp */
+2
arch/x86/include/asm/insn-eval.h
··· 23 23 int insn_get_code_seg_params(struct pt_regs *regs); 24 24 int insn_fetch_from_user(struct pt_regs *regs, 25 25 unsigned char buf[MAX_INSN_SIZE]); 26 + int insn_fetch_from_user_inatomic(struct pt_regs *regs, 27 + unsigned char buf[MAX_INSN_SIZE]); 26 28 bool insn_decode(struct insn *insn, struct pt_regs *regs, 27 29 unsigned char buf[MAX_INSN_SIZE], int buf_size); 28 30
+1
arch/x86/include/asm/proto.h
··· 25 25 void entry_SYSENTER_compat(void); 26 26 void __end_entry_SYSENTER_compat(void); 27 27 void entry_SYSCALL_compat(void); 28 + void entry_SYSCALL_compat_safe_stack(void); 28 29 void entry_INT80_compat(void); 29 30 #ifdef CONFIG_XEN_PV 30 31 void xen_entry_INT80_compat(void);
+15
arch/x86/include/asm/ptrace.h
··· 94 94 #include <asm/paravirt_types.h> 95 95 #endif 96 96 97 + #include <asm/proto.h> 98 + 97 99 struct cpuinfo_x86; 98 100 struct task_struct; 99 101 ··· 177 175 #ifdef CONFIG_X86_64 178 176 #define current_user_stack_pointer() current_pt_regs()->sp 179 177 #define compat_user_stack_pointer() current_pt_regs()->sp 178 + 179 + static inline bool ip_within_syscall_gap(struct pt_regs *regs) 180 + { 181 + bool ret = (regs->ip >= (unsigned long)entry_SYSCALL_64 && 182 + regs->ip < (unsigned long)entry_SYSCALL_64_safe_stack); 183 + 184 + #ifdef CONFIG_IA32_EMULATION 185 + ret = ret || (regs->ip >= (unsigned long)entry_SYSCALL_compat && 186 + regs->ip < (unsigned long)entry_SYSCALL_compat_safe_stack); 187 + #endif 188 + 189 + return ret; 190 + } 180 191 #endif 181 192 182 193 static inline unsigned long kernel_stack_pointer(struct pt_regs *regs)
+17 -5
arch/x86/kernel/sev-es.c
··· 121 121 cea_set_pte((void *)vaddr, pa, PAGE_KERNEL); 122 122 } 123 123 124 - static __always_inline bool on_vc_stack(unsigned long sp) 124 + static __always_inline bool on_vc_stack(struct pt_regs *regs) 125 125 { 126 + unsigned long sp = regs->sp; 127 + 128 + /* User-mode RSP is not trusted */ 129 + if (user_mode(regs)) 130 + return false; 131 + 132 + /* SYSCALL gap still has user-mode RSP */ 133 + if (ip_within_syscall_gap(regs)) 134 + return false; 135 + 126 136 return ((sp >= __this_cpu_ist_bottom_va(VC)) && (sp < __this_cpu_ist_top_va(VC))); 127 137 } 128 138 ··· 154 144 old_ist = __this_cpu_read(cpu_tss_rw.x86_tss.ist[IST_INDEX_VC]); 155 145 156 146 /* Make room on the IST stack */ 157 - if (on_vc_stack(regs->sp)) 147 + if (on_vc_stack(regs)) 158 148 new_ist = ALIGN_DOWN(regs->sp, 8) - sizeof(old_ist); 159 149 else 160 150 new_ist = old_ist - sizeof(old_ist); ··· 258 248 int res; 259 249 260 250 if (user_mode(ctxt->regs)) { 261 - res = insn_fetch_from_user(ctxt->regs, buffer); 251 + res = insn_fetch_from_user_inatomic(ctxt->regs, buffer); 262 252 if (!res) { 263 253 ctxt->fi.vector = X86_TRAP_PF; 264 254 ctxt->fi.error_code = X86_PF_INSTR | X86_PF_USER; ··· 1258 1248 DEFINE_IDTENTRY_VC_SAFE_STACK(exc_vmm_communication) 1259 1249 { 1260 1250 struct sev_es_runtime_data *data = this_cpu_read(runtime_data); 1251 + irqentry_state_t irq_state; 1261 1252 struct ghcb_state state; 1262 1253 struct es_em_ctxt ctxt; 1263 1254 enum es_result result; 1264 1255 struct ghcb *ghcb; 1265 - 1266 - lockdep_assert_irqs_disabled(); 1267 1256 1268 1257 /* 1269 1258 * Handle #DB before calling into !noinstr code to avoid recursive #DB. ··· 1272 1263 return; 1273 1264 } 1274 1265 1266 + irq_state = irqentry_nmi_enter(regs); 1267 + lockdep_assert_irqs_disabled(); 1275 1268 instrumentation_begin(); 1276 1269 1277 1270 /* ··· 1336 1325 1337 1326 out: 1338 1327 instrumentation_end(); 1328 + irqentry_nmi_exit(regs, irq_state); 1339 1329 1340 1330 return; 1341 1331
+1 -2
arch/x86/kernel/traps.c
··· 694 694 * In the SYSCALL entry path the RSP value comes from user-space - don't 695 695 * trust it and switch to the current kernel stack 696 696 */ 697 - if (regs->ip >= (unsigned long)entry_SYSCALL_64 && 698 - regs->ip < (unsigned long)entry_SYSCALL_64_safe_stack) { 697 + if (ip_within_syscall_gap(regs)) { 699 698 sp = this_cpu_read(cpu_current_top_of_stack); 700 699 goto sync; 701 700 }
+7 -7
arch/x86/kernel/unwind_orc.c
··· 13 13 14 14 #define orc_warn_current(args...) \ 15 15 ({ \ 16 - if (state->task == current) \ 16 + if (state->task == current && !state->error) \ 17 17 orc_warn(args); \ 18 18 }) 19 19 ··· 367 367 if (!stack_access_ok(state, addr, sizeof(struct pt_regs))) 368 368 return false; 369 369 370 - *ip = regs->ip; 371 - *sp = regs->sp; 370 + *ip = READ_ONCE_NOCHECK(regs->ip); 371 + *sp = READ_ONCE_NOCHECK(regs->sp); 372 372 return true; 373 373 } 374 374 ··· 380 380 if (!stack_access_ok(state, addr, IRET_FRAME_SIZE)) 381 381 return false; 382 382 383 - *ip = regs->ip; 384 - *sp = regs->sp; 383 + *ip = READ_ONCE_NOCHECK(regs->ip); 384 + *sp = READ_ONCE_NOCHECK(regs->sp); 385 385 return true; 386 386 } 387 387 ··· 402 402 return false; 403 403 404 404 if (state->full_regs) { 405 - *val = ((unsigned long *)state->regs)[reg]; 405 + *val = READ_ONCE_NOCHECK(((unsigned long *)state->regs)[reg]); 406 406 return true; 407 407 } 408 408 409 409 if (state->prev_regs) { 410 - *val = ((unsigned long *)state->prev_regs)[reg]; 410 + *val = READ_ONCE_NOCHECK(((unsigned long *)state->prev_regs)[reg]); 411 411 return true; 412 412 } 413 413
+52 -14
arch/x86/lib/insn-eval.c
··· 1415 1415 } 1416 1416 } 1417 1417 1418 + static unsigned long insn_get_effective_ip(struct pt_regs *regs) 1419 + { 1420 + unsigned long seg_base = 0; 1421 + 1422 + /* 1423 + * If not in user-space long mode, a custom code segment could be in 1424 + * use. This is true in protected mode (if the process defined a local 1425 + * descriptor table), or virtual-8086 mode. In most of the cases 1426 + * seg_base will be zero as in USER_CS. 1427 + */ 1428 + if (!user_64bit_mode(regs)) { 1429 + seg_base = insn_get_seg_base(regs, INAT_SEG_REG_CS); 1430 + if (seg_base == -1L) 1431 + return 0; 1432 + } 1433 + 1434 + return seg_base + regs->ip; 1435 + } 1436 + 1418 1437 /** 1419 1438 * insn_fetch_from_user() - Copy instruction bytes from user-space memory 1420 1439 * @regs: Structure with register values as seen when entering kernel mode ··· 1450 1431 */ 1451 1432 int insn_fetch_from_user(struct pt_regs *regs, unsigned char buf[MAX_INSN_SIZE]) 1452 1433 { 1453 - unsigned long seg_base = 0; 1434 + unsigned long ip; 1454 1435 int not_copied; 1455 1436 1456 - /* 1457 - * If not in user-space long mode, a custom code segment could be in 1458 - * use. This is true in protected mode (if the process defined a local 1459 - * descriptor table), or virtual-8086 mode. In most of the cases 1460 - * seg_base will be zero as in USER_CS. 1461 - */ 1462 - if (!user_64bit_mode(regs)) { 1463 - seg_base = insn_get_seg_base(regs, INAT_SEG_REG_CS); 1464 - if (seg_base == -1L) 1465 - return 0; 1466 - } 1437 + ip = insn_get_effective_ip(regs); 1438 + if (!ip) 1439 + return 0; 1467 1440 1441 + not_copied = copy_from_user(buf, (void __user *)ip, MAX_INSN_SIZE); 1468 1442 1469 - not_copied = copy_from_user(buf, (void __user *)(seg_base + regs->ip), 1470 - MAX_INSN_SIZE); 1443 + return MAX_INSN_SIZE - not_copied; 1444 + } 1445 + 1446 + /** 1447 + * insn_fetch_from_user_inatomic() - Copy instruction bytes from user-space memory 1448 + * while in atomic code 1449 + * @regs: Structure with register values as seen when entering kernel mode 1450 + * @buf: Array to store the fetched instruction 1451 + * 1452 + * Gets the linear address of the instruction and copies the instruction bytes 1453 + * to the buf. This function must be used in atomic context. 1454 + * 1455 + * Returns: 1456 + * 1457 + * Number of instruction bytes copied. 1458 + * 1459 + * 0 if nothing was copied. 1460 + */ 1461 + int insn_fetch_from_user_inatomic(struct pt_regs *regs, unsigned char buf[MAX_INSN_SIZE]) 1462 + { 1463 + unsigned long ip; 1464 + int not_copied; 1465 + 1466 + ip = insn_get_effective_ip(regs); 1467 + if (!ip) 1468 + return 0; 1469 + 1470 + not_copied = __copy_from_user_inatomic(buf, (void __user *)ip, MAX_INSN_SIZE); 1471 1471 1472 1472 return MAX_INSN_SIZE - not_copied; 1473 1473 }