Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux
1
fork

Configure Feed

Select the types of activity you want to include in your feed.

Merge tag 'x86-entry-2020-08-04' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip

Pull x86 conversion to generic entry code from Thomas Gleixner:
"The conversion of X86 syscall, interrupt and exception entry/exit
handling to the generic code.

Pretty much a straight-forward 1:1 conversion plus the consolidation
of the KVM handling of pending work before entering guest mode"

* tag 'x86-entry-2020-08-04' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip:
x86/kvm: Use __xfer_to_guest_mode_work_pending() in kvm_run_vcpu()
x86/kvm: Use generic xfer to guest work function
x86/entry: Cleanup idtentry_enter/exit
x86/entry: Use generic interrupt entry/exit code
x86/entry: Cleanup idtentry_entry/exit_user
x86/entry: Use generic syscall exit functionality
x86/entry: Use generic syscall entry function
x86/ptrace: Provide pt_regs helper for entry/exit
x86/entry: Move user return notifier out of loop
x86/entry: Consolidate 32/64 bit syscall entry
x86/entry: Consolidate check_user_regs()
x86: Correct noinstr qualifiers
x86/idtentry: Remove stale comment

+174 -669
+1
arch/x86/Kconfig
··· 115 115 select GENERIC_CPU_AUTOPROBE 116 116 select GENERIC_CPU_VULNERABILITIES 117 117 select GENERIC_EARLY_IOREMAP 118 + select GENERIC_ENTRY 118 119 select GENERIC_FIND_FIRST_BIT 119 120 select GENERIC_IOMAP 120 121 select GENERIC_IRQ_EFFECTIVE_AFF_MASK if SMP
+37 -595
arch/x86/entry/common.c
··· 10 10 #include <linux/kernel.h> 11 11 #include <linux/sched.h> 12 12 #include <linux/sched/task_stack.h> 13 + #include <linux/entry-common.h> 13 14 #include <linux/mm.h> 14 15 #include <linux/smp.h> 15 16 #include <linux/errno.h> 16 17 #include <linux/ptrace.h> 17 - #include <linux/tracehook.h> 18 - #include <linux/audit.h> 19 - #include <linux/seccomp.h> 20 - #include <linux/signal.h> 21 18 #include <linux/export.h> 22 - #include <linux/context_tracking.h> 23 - #include <linux/user-return-notifier.h> 24 19 #include <linux/nospec.h> 25 - #include <linux/uprobes.h> 26 - #include <linux/livepatch.h> 27 20 #include <linux/syscalls.h> 28 21 #include <linux/uaccess.h> 29 22 ··· 35 42 #include <asm/syscall.h> 36 43 #include <asm/irq_stack.h> 37 44 38 - #define CREATE_TRACE_POINTS 39 - #include <trace/events/syscalls.h> 40 - 41 - /* Check that the stack and regs on entry from user mode are sane. */ 42 - static noinstr void check_user_regs(struct pt_regs *regs) 43 - { 44 - if (IS_ENABLED(CONFIG_DEBUG_ENTRY)) { 45 - /* 46 - * Make sure that the entry code gave us a sensible EFLAGS 47 - * register. Native because we want to check the actual CPU 48 - * state, not the interrupt state as imagined by Xen. 49 - */ 50 - unsigned long flags = native_save_fl(); 51 - WARN_ON_ONCE(flags & (X86_EFLAGS_AC | X86_EFLAGS_DF | 52 - X86_EFLAGS_NT)); 53 - 54 - /* We think we came from user mode. Make sure pt_regs agrees. */ 55 - WARN_ON_ONCE(!user_mode(regs)); 56 - 57 - /* 58 - * All entries from user mode (except #DF) should be on the 59 - * normal thread stack and should have user pt_regs in the 60 - * correct location. 61 - */ 62 - WARN_ON_ONCE(!on_thread_stack()); 63 - WARN_ON_ONCE(regs != task_pt_regs(current)); 64 - } 65 - } 66 - 67 - #ifdef CONFIG_CONTEXT_TRACKING 68 - /** 69 - * enter_from_user_mode - Establish state when coming from user mode 70 - * 71 - * Syscall entry disables interrupts, but user mode is traced as interrupts 72 - * enabled. Also with NO_HZ_FULL RCU might be idle. 73 - * 74 - * 1) Tell lockdep that interrupts are disabled 75 - * 2) Invoke context tracking if enabled to reactivate RCU 76 - * 3) Trace interrupts off state 77 - */ 78 - static noinstr void enter_from_user_mode(void) 79 - { 80 - enum ctx_state state = ct_state(); 81 - 82 - lockdep_hardirqs_off(CALLER_ADDR0); 83 - user_exit_irqoff(); 84 - 85 - instrumentation_begin(); 86 - CT_WARN_ON(state != CONTEXT_USER); 87 - trace_hardirqs_off_finish(); 88 - instrumentation_end(); 89 - } 90 - #else 91 - static __always_inline void enter_from_user_mode(void) 92 - { 93 - lockdep_hardirqs_off(CALLER_ADDR0); 94 - instrumentation_begin(); 95 - trace_hardirqs_off_finish(); 96 - instrumentation_end(); 97 - } 98 - #endif 99 - 100 - /** 101 - * exit_to_user_mode - Fixup state when exiting to user mode 102 - * 103 - * Syscall exit enables interrupts, but the kernel state is interrupts 104 - * disabled when this is invoked. Also tell RCU about it. 105 - * 106 - * 1) Trace interrupts on state 107 - * 2) Invoke context tracking if enabled to adjust RCU state 108 - * 3) Clear CPU buffers if CPU is affected by MDS and the migitation is on. 109 - * 4) Tell lockdep that interrupts are enabled 110 - */ 111 - static __always_inline void exit_to_user_mode(void) 112 - { 113 - instrumentation_begin(); 114 - trace_hardirqs_on_prepare(); 115 - lockdep_hardirqs_on_prepare(CALLER_ADDR0); 116 - instrumentation_end(); 117 - 118 - user_enter_irqoff(); 119 - mds_user_clear_cpu_buffers(); 120 - lockdep_hardirqs_on(CALLER_ADDR0); 121 - } 122 - 123 - static void do_audit_syscall_entry(struct pt_regs *regs, u32 arch) 124 - { 125 - #ifdef CONFIG_X86_64 126 - if (arch == AUDIT_ARCH_X86_64) { 127 - audit_syscall_entry(regs->orig_ax, regs->di, 128 - regs->si, regs->dx, regs->r10); 129 - } else 130 - #endif 131 - { 132 - audit_syscall_entry(regs->orig_ax, regs->bx, 133 - regs->cx, regs->dx, regs->si); 134 - } 135 - } 136 - 137 - /* 138 - * Returns the syscall nr to run (which should match regs->orig_ax) or -1 139 - * to skip the syscall. 140 - */ 141 - static long syscall_trace_enter(struct pt_regs *regs) 142 - { 143 - u32 arch = in_ia32_syscall() ? AUDIT_ARCH_I386 : AUDIT_ARCH_X86_64; 144 - 145 - struct thread_info *ti = current_thread_info(); 146 - unsigned long ret = 0; 147 - u32 work; 148 - 149 - work = READ_ONCE(ti->flags); 150 - 151 - if (work & (_TIF_SYSCALL_TRACE | _TIF_SYSCALL_EMU)) { 152 - ret = tracehook_report_syscall_entry(regs); 153 - if (ret || (work & _TIF_SYSCALL_EMU)) 154 - return -1L; 155 - } 156 - 157 - #ifdef CONFIG_SECCOMP 158 - /* 159 - * Do seccomp after ptrace, to catch any tracer changes. 160 - */ 161 - if (work & _TIF_SECCOMP) { 162 - struct seccomp_data sd; 163 - 164 - sd.arch = arch; 165 - sd.nr = regs->orig_ax; 166 - sd.instruction_pointer = regs->ip; 167 - #ifdef CONFIG_X86_64 168 - if (arch == AUDIT_ARCH_X86_64) { 169 - sd.args[0] = regs->di; 170 - sd.args[1] = regs->si; 171 - sd.args[2] = regs->dx; 172 - sd.args[3] = regs->r10; 173 - sd.args[4] = regs->r8; 174 - sd.args[5] = regs->r9; 175 - } else 176 - #endif 177 - { 178 - sd.args[0] = regs->bx; 179 - sd.args[1] = regs->cx; 180 - sd.args[2] = regs->dx; 181 - sd.args[3] = regs->si; 182 - sd.args[4] = regs->di; 183 - sd.args[5] = regs->bp; 184 - } 185 - 186 - ret = __secure_computing(&sd); 187 - if (ret == -1) 188 - return ret; 189 - } 190 - #endif 191 - 192 - if (unlikely(test_thread_flag(TIF_SYSCALL_TRACEPOINT))) 193 - trace_sys_enter(regs, regs->orig_ax); 194 - 195 - do_audit_syscall_entry(regs, arch); 196 - 197 - return ret ?: regs->orig_ax; 198 - } 199 - 200 - #define EXIT_TO_USERMODE_LOOP_FLAGS \ 201 - (_TIF_SIGPENDING | _TIF_NOTIFY_RESUME | _TIF_UPROBE | \ 202 - _TIF_NEED_RESCHED | _TIF_USER_RETURN_NOTIFY | _TIF_PATCH_PENDING) 203 - 204 - static void exit_to_usermode_loop(struct pt_regs *regs, u32 cached_flags) 205 - { 206 - /* 207 - * In order to return to user mode, we need to have IRQs off with 208 - * none of EXIT_TO_USERMODE_LOOP_FLAGS set. Several of these flags 209 - * can be set at any time on preemptible kernels if we have IRQs on, 210 - * so we need to loop. Disabling preemption wouldn't help: doing the 211 - * work to clear some of the flags can sleep. 212 - */ 213 - while (true) { 214 - /* We have work to do. */ 215 - local_irq_enable(); 216 - 217 - if (cached_flags & _TIF_NEED_RESCHED) 218 - schedule(); 219 - 220 - if (cached_flags & _TIF_UPROBE) 221 - uprobe_notify_resume(regs); 222 - 223 - if (cached_flags & _TIF_PATCH_PENDING) 224 - klp_update_patch_state(current); 225 - 226 - /* deal with pending signal delivery */ 227 - if (cached_flags & _TIF_SIGPENDING) 228 - do_signal(regs); 229 - 230 - if (cached_flags & _TIF_NOTIFY_RESUME) { 231 - clear_thread_flag(TIF_NOTIFY_RESUME); 232 - tracehook_notify_resume(regs); 233 - rseq_handle_notify_resume(NULL, regs); 234 - } 235 - 236 - if (cached_flags & _TIF_USER_RETURN_NOTIFY) 237 - fire_user_return_notifiers(); 238 - 239 - /* Disable IRQs and retry */ 240 - local_irq_disable(); 241 - 242 - cached_flags = READ_ONCE(current_thread_info()->flags); 243 - 244 - if (!(cached_flags & EXIT_TO_USERMODE_LOOP_FLAGS)) 245 - break; 246 - } 247 - } 248 - 249 - static void __prepare_exit_to_usermode(struct pt_regs *regs) 250 - { 251 - struct thread_info *ti = current_thread_info(); 252 - u32 cached_flags; 253 - 254 - addr_limit_user_check(); 255 - 256 - lockdep_assert_irqs_disabled(); 257 - lockdep_sys_exit(); 258 - 259 - cached_flags = READ_ONCE(ti->flags); 260 - 261 - if (unlikely(cached_flags & EXIT_TO_USERMODE_LOOP_FLAGS)) 262 - exit_to_usermode_loop(regs, cached_flags); 263 - 264 - /* Reload ti->flags; we may have rescheduled above. */ 265 - cached_flags = READ_ONCE(ti->flags); 266 - 267 - if (unlikely(cached_flags & _TIF_IO_BITMAP)) 268 - tss_update_io_bitmap(); 269 - 270 - fpregs_assert_state_consistent(); 271 - if (unlikely(cached_flags & _TIF_NEED_FPU_LOAD)) 272 - switch_fpu_return(); 273 - 274 - #ifdef CONFIG_COMPAT 275 - /* 276 - * Compat syscalls set TS_COMPAT. Make sure we clear it before 277 - * returning to user mode. We need to clear it *after* signal 278 - * handling, because syscall restart has a fixup for compat 279 - * syscalls. The fixup is exercised by the ptrace_syscall_32 280 - * selftest. 281 - * 282 - * We also need to clear TS_REGS_POKED_I386: the 32-bit tracer 283 - * special case only applies after poking regs and before the 284 - * very next return to user mode. 285 - */ 286 - ti->status &= ~(TS_COMPAT|TS_I386_REGS_POKED); 287 - #endif 288 - } 289 - 290 - static noinstr void prepare_exit_to_usermode(struct pt_regs *regs) 291 - { 292 - instrumentation_begin(); 293 - __prepare_exit_to_usermode(regs); 294 - instrumentation_end(); 295 - exit_to_user_mode(); 296 - } 297 - 298 - #define SYSCALL_EXIT_WORK_FLAGS \ 299 - (_TIF_SYSCALL_TRACE | _TIF_SYSCALL_AUDIT | \ 300 - _TIF_SINGLESTEP | _TIF_SYSCALL_TRACEPOINT) 301 - 302 - static void syscall_slow_exit_work(struct pt_regs *regs, u32 cached_flags) 303 - { 304 - bool step; 305 - 306 - audit_syscall_exit(regs); 307 - 308 - if (cached_flags & _TIF_SYSCALL_TRACEPOINT) 309 - trace_sys_exit(regs, regs->ax); 310 - 311 - /* 312 - * If TIF_SYSCALL_EMU is set, we only get here because of 313 - * TIF_SINGLESTEP (i.e. this is PTRACE_SYSEMU_SINGLESTEP). 314 - * We already reported this syscall instruction in 315 - * syscall_trace_enter(). 316 - */ 317 - step = unlikely( 318 - (cached_flags & (_TIF_SINGLESTEP | _TIF_SYSCALL_EMU)) 319 - == _TIF_SINGLESTEP); 320 - if (step || cached_flags & _TIF_SYSCALL_TRACE) 321 - tracehook_report_syscall_exit(regs, step); 322 - } 323 - 324 - static void __syscall_return_slowpath(struct pt_regs *regs) 325 - { 326 - struct thread_info *ti = current_thread_info(); 327 - u32 cached_flags = READ_ONCE(ti->flags); 328 - 329 - CT_WARN_ON(ct_state() != CONTEXT_KERNEL); 330 - 331 - if (IS_ENABLED(CONFIG_PROVE_LOCKING) && 332 - WARN(irqs_disabled(), "syscall %ld left IRQs disabled", regs->orig_ax)) 333 - local_irq_enable(); 334 - 335 - rseq_syscall(regs); 336 - 337 - /* 338 - * First do one-time work. If these work items are enabled, we 339 - * want to run them exactly once per syscall exit with IRQs on. 340 - */ 341 - if (unlikely(cached_flags & SYSCALL_EXIT_WORK_FLAGS)) 342 - syscall_slow_exit_work(regs, cached_flags); 343 - 344 - local_irq_disable(); 345 - __prepare_exit_to_usermode(regs); 346 - } 347 - 348 - /* 349 - * Called with IRQs on and fully valid regs. Returns with IRQs off in a 350 - * state such that we can immediately switch to user mode. 351 - */ 352 - __visible noinstr void syscall_return_slowpath(struct pt_regs *regs) 353 - { 354 - instrumentation_begin(); 355 - __syscall_return_slowpath(regs); 356 - instrumentation_end(); 357 - exit_to_user_mode(); 358 - } 359 - 360 45 #ifdef CONFIG_X86_64 361 46 __visible noinstr void do_syscall_64(unsigned long nr, struct pt_regs *regs) 362 47 { 363 - struct thread_info *ti; 48 + nr = syscall_enter_from_user_mode(regs, nr); 364 49 365 - check_user_regs(regs); 366 - 367 - enter_from_user_mode(); 368 50 instrumentation_begin(); 369 - 370 - local_irq_enable(); 371 - ti = current_thread_info(); 372 - if (READ_ONCE(ti->flags) & _TIF_WORK_SYSCALL_ENTRY) 373 - nr = syscall_trace_enter(regs); 374 - 375 51 if (likely(nr < NR_syscalls)) { 376 52 nr = array_index_nospec(nr, NR_syscalls); 377 53 regs->ax = sys_call_table[nr](regs); ··· 52 390 regs->ax = x32_sys_call_table[nr](regs); 53 391 #endif 54 392 } 55 - __syscall_return_slowpath(regs); 56 - 57 393 instrumentation_end(); 58 - exit_to_user_mode(); 394 + syscall_exit_to_user_mode(regs); 59 395 } 60 396 #endif 61 397 62 398 #if defined(CONFIG_X86_32) || defined(CONFIG_IA32_EMULATION) 63 - /* 64 - * Does a 32-bit syscall. Called with IRQs on in CONTEXT_KERNEL. Does 65 - * all entry and exit work and returns with IRQs off. This function is 66 - * extremely hot in workloads that use it, and it's usually called from 67 - * do_fast_syscall_32, so forcibly inline it to improve performance. 68 - */ 69 - static void do_syscall_32_irqs_on(struct pt_regs *regs) 399 + static __always_inline unsigned int syscall_32_enter(struct pt_regs *regs) 70 400 { 71 - struct thread_info *ti = current_thread_info(); 72 401 unsigned int nr = (unsigned int)regs->orig_ax; 73 402 74 - #ifdef CONFIG_IA32_EMULATION 75 - ti->status |= TS_COMPAT; 76 - #endif 403 + if (IS_ENABLED(CONFIG_IA32_EMULATION)) 404 + current_thread_info()->status |= TS_COMPAT; 405 + /* 406 + * Subtlety here: if ptrace pokes something larger than 2^32-1 into 407 + * orig_ax, the unsigned int return value truncates it. This may 408 + * or may not be necessary, but it matches the old asm behavior. 409 + */ 410 + return (unsigned int)syscall_enter_from_user_mode(regs, nr); 411 + } 77 412 78 - if (READ_ONCE(ti->flags) & _TIF_WORK_SYSCALL_ENTRY) { 79 - /* 80 - * Subtlety here: if ptrace pokes something larger than 81 - * 2^32-1 into orig_ax, this truncates it. This may or 82 - * may not be necessary, but it matches the old asm 83 - * behavior. 84 - */ 85 - nr = syscall_trace_enter(regs); 86 - } 87 - 413 + /* 414 + * Invoke a 32-bit syscall. Called with IRQs on in CONTEXT_KERNEL. 415 + */ 416 + static __always_inline void do_syscall_32_irqs_on(struct pt_regs *regs, 417 + unsigned int nr) 418 + { 88 419 if (likely(nr < IA32_NR_syscalls)) { 420 + instrumentation_begin(); 89 421 nr = array_index_nospec(nr, IA32_NR_syscalls); 90 422 regs->ax = ia32_sys_call_table[nr](regs); 423 + instrumentation_end(); 91 424 } 92 - 93 - __syscall_return_slowpath(regs); 94 425 } 95 426 96 427 /* Handles int $0x80 */ 97 428 __visible noinstr void do_int80_syscall_32(struct pt_regs *regs) 98 429 { 99 - check_user_regs(regs); 430 + unsigned int nr = syscall_32_enter(regs); 100 431 101 - enter_from_user_mode(); 102 - instrumentation_begin(); 103 - 104 - local_irq_enable(); 105 - do_syscall_32_irqs_on(regs); 106 - 107 - instrumentation_end(); 108 - exit_to_user_mode(); 432 + do_syscall_32_irqs_on(regs, nr); 433 + syscall_exit_to_user_mode(regs); 109 434 } 110 435 111 - static bool __do_fast_syscall_32(struct pt_regs *regs) 436 + static noinstr bool __do_fast_syscall_32(struct pt_regs *regs) 112 437 { 438 + unsigned int nr = syscall_32_enter(regs); 113 439 int res; 114 440 441 + instrumentation_begin(); 115 442 /* Fetch EBP from where the vDSO stashed it. */ 116 443 if (IS_ENABLED(CONFIG_X86_64)) { 117 444 /* ··· 113 462 res = get_user(*(u32 *)&regs->bp, 114 463 (u32 __user __force *)(unsigned long)(u32)regs->sp); 115 464 } 465 + instrumentation_end(); 116 466 117 467 if (res) { 118 468 /* User code screwed up. */ 119 469 regs->ax = -EFAULT; 120 - local_irq_disable(); 121 - __prepare_exit_to_usermode(regs); 470 + syscall_exit_to_user_mode(regs); 122 471 return false; 123 472 } 124 473 125 474 /* Now this is just like a normal syscall. */ 126 - do_syscall_32_irqs_on(regs); 475 + do_syscall_32_irqs_on(regs, nr); 476 + syscall_exit_to_user_mode(regs); 127 477 return true; 128 478 } 129 479 ··· 137 485 */ 138 486 unsigned long landing_pad = (unsigned long)current->mm->context.vdso + 139 487 vdso_image_32.sym_int80_landing_pad; 140 - bool success; 141 - 142 - check_user_regs(regs); 143 488 144 489 /* 145 490 * SYSENTER loses EIP, and even SYSCALL32 needs us to skip forward ··· 145 496 */ 146 497 regs->ip = landing_pad; 147 498 148 - enter_from_user_mode(); 149 - instrumentation_begin(); 150 - 151 - local_irq_enable(); 152 - success = __do_fast_syscall_32(regs); 153 - 154 - instrumentation_end(); 155 - exit_to_user_mode(); 156 - 157 - /* If it failed, keep it simple: use IRET. */ 158 - if (!success) 499 + /* Invoke the syscall. If it failed, keep it simple: use IRET. */ 500 + if (!__do_fast_syscall_32(regs)) 159 501 return 0; 160 502 161 503 #ifdef CONFIG_X86_64 ··· 196 556 SYSCALL_DEFINE0(ni_syscall) 197 557 { 198 558 return -ENOSYS; 199 - } 200 - 201 - /** 202 - * idtentry_enter - Handle state tracking on ordinary idtentries 203 - * @regs: Pointer to pt_regs of interrupted context 204 - * 205 - * Invokes: 206 - * - lockdep irqflag state tracking as low level ASM entry disabled 207 - * interrupts. 208 - * 209 - * - Context tracking if the exception hit user mode. 210 - * 211 - * - The hardirq tracer to keep the state consistent as low level ASM 212 - * entry disabled interrupts. 213 - * 214 - * As a precondition, this requires that the entry came from user mode, 215 - * idle, or a kernel context in which RCU is watching. 216 - * 217 - * For kernel mode entries RCU handling is done conditional. If RCU is 218 - * watching then the only RCU requirement is to check whether the tick has 219 - * to be restarted. If RCU is not watching then rcu_irq_enter() has to be 220 - * invoked on entry and rcu_irq_exit() on exit. 221 - * 222 - * Avoiding the rcu_irq_enter/exit() calls is an optimization but also 223 - * solves the problem of kernel mode pagefaults which can schedule, which 224 - * is not possible after invoking rcu_irq_enter() without undoing it. 225 - * 226 - * For user mode entries enter_from_user_mode() must be invoked to 227 - * establish the proper context for NOHZ_FULL. Otherwise scheduling on exit 228 - * would not be possible. 229 - * 230 - * Returns: An opaque object that must be passed to idtentry_exit() 231 - * 232 - * The return value must be fed into the state argument of 233 - * idtentry_exit(). 234 - */ 235 - noinstr idtentry_state_t idtentry_enter(struct pt_regs *regs) 236 - { 237 - idtentry_state_t ret = { 238 - .exit_rcu = false, 239 - }; 240 - 241 - if (user_mode(regs)) { 242 - check_user_regs(regs); 243 - enter_from_user_mode(); 244 - return ret; 245 - } 246 - 247 - /* 248 - * If this entry hit the idle task invoke rcu_irq_enter() whether 249 - * RCU is watching or not. 250 - * 251 - * Interupts can nest when the first interrupt invokes softirq 252 - * processing on return which enables interrupts. 253 - * 254 - * Scheduler ticks in the idle task can mark quiescent state and 255 - * terminate a grace period, if and only if the timer interrupt is 256 - * not nested into another interrupt. 257 - * 258 - * Checking for __rcu_is_watching() here would prevent the nesting 259 - * interrupt to invoke rcu_irq_enter(). If that nested interrupt is 260 - * the tick then rcu_flavor_sched_clock_irq() would wrongfully 261 - * assume that it is the first interupt and eventually claim 262 - * quiescient state and end grace periods prematurely. 263 - * 264 - * Unconditionally invoke rcu_irq_enter() so RCU state stays 265 - * consistent. 266 - * 267 - * TINY_RCU does not support EQS, so let the compiler eliminate 268 - * this part when enabled. 269 - */ 270 - if (!IS_ENABLED(CONFIG_TINY_RCU) && is_idle_task(current)) { 271 - /* 272 - * If RCU is not watching then the same careful 273 - * sequence vs. lockdep and tracing is required 274 - * as in enter_from_user_mode(). 275 - */ 276 - lockdep_hardirqs_off(CALLER_ADDR0); 277 - rcu_irq_enter(); 278 - instrumentation_begin(); 279 - trace_hardirqs_off_finish(); 280 - instrumentation_end(); 281 - 282 - ret.exit_rcu = true; 283 - return ret; 284 - } 285 - 286 - /* 287 - * If RCU is watching then RCU only wants to check whether it needs 288 - * to restart the tick in NOHZ mode. rcu_irq_enter_check_tick() 289 - * already contains a warning when RCU is not watching, so no point 290 - * in having another one here. 291 - */ 292 - instrumentation_begin(); 293 - rcu_irq_enter_check_tick(); 294 - /* Use the combo lockdep/tracing function */ 295 - trace_hardirqs_off(); 296 - instrumentation_end(); 297 - 298 - return ret; 299 - } 300 - 301 - static void idtentry_exit_cond_resched(struct pt_regs *regs, bool may_sched) 302 - { 303 - if (may_sched && !preempt_count()) { 304 - /* Sanity check RCU and thread stack */ 305 - rcu_irq_exit_check_preempt(); 306 - if (IS_ENABLED(CONFIG_DEBUG_ENTRY)) 307 - WARN_ON_ONCE(!on_thread_stack()); 308 - if (need_resched()) 309 - preempt_schedule_irq(); 310 - } 311 - /* Covers both tracing and lockdep */ 312 - trace_hardirqs_on(); 313 - } 314 - 315 - /** 316 - * idtentry_exit - Handle return from exception that used idtentry_enter() 317 - * @regs: Pointer to pt_regs (exception entry regs) 318 - * @state: Return value from matching call to idtentry_enter() 319 - * 320 - * Depending on the return target (kernel/user) this runs the necessary 321 - * preemption and work checks if possible and reguired and returns to 322 - * the caller with interrupts disabled and no further work pending. 323 - * 324 - * This is the last action before returning to the low level ASM code which 325 - * just needs to return to the appropriate context. 326 - * 327 - * Counterpart to idtentry_enter(). The return value of the entry 328 - * function must be fed into the @state argument. 329 - */ 330 - noinstr void idtentry_exit(struct pt_regs *regs, idtentry_state_t state) 331 - { 332 - lockdep_assert_irqs_disabled(); 333 - 334 - /* Check whether this returns to user mode */ 335 - if (user_mode(regs)) { 336 - prepare_exit_to_usermode(regs); 337 - } else if (regs->flags & X86_EFLAGS_IF) { 338 - /* 339 - * If RCU was not watching on entry this needs to be done 340 - * carefully and needs the same ordering of lockdep/tracing 341 - * and RCU as the return to user mode path. 342 - */ 343 - if (state.exit_rcu) { 344 - instrumentation_begin(); 345 - /* Tell the tracer that IRET will enable interrupts */ 346 - trace_hardirqs_on_prepare(); 347 - lockdep_hardirqs_on_prepare(CALLER_ADDR0); 348 - instrumentation_end(); 349 - rcu_irq_exit(); 350 - lockdep_hardirqs_on(CALLER_ADDR0); 351 - return; 352 - } 353 - 354 - instrumentation_begin(); 355 - idtentry_exit_cond_resched(regs, IS_ENABLED(CONFIG_PREEMPTION)); 356 - instrumentation_end(); 357 - } else { 358 - /* 359 - * IRQ flags state is correct already. Just tell RCU if it 360 - * was not watching on entry. 361 - */ 362 - if (state.exit_rcu) 363 - rcu_irq_exit(); 364 - } 365 - } 366 - 367 - /** 368 - * idtentry_enter_user - Handle state tracking on idtentry from user mode 369 - * @regs: Pointer to pt_regs of interrupted context 370 - * 371 - * Invokes enter_from_user_mode() to establish the proper context for 372 - * NOHZ_FULL. Otherwise scheduling on exit would not be possible. 373 - */ 374 - noinstr void idtentry_enter_user(struct pt_regs *regs) 375 - { 376 - check_user_regs(regs); 377 - enter_from_user_mode(); 378 - } 379 - 380 - /** 381 - * idtentry_exit_user - Handle return from exception to user mode 382 - * @regs: Pointer to pt_regs (exception entry regs) 383 - * 384 - * Runs the necessary preemption and work checks and returns to the caller 385 - * with interrupts disabled and no further work pending. 386 - * 387 - * This is the last action before returning to the low level ASM code which 388 - * just needs to return to the appropriate context. 389 - * 390 - * Counterpart to idtentry_enter_user(). 391 - */ 392 - noinstr void idtentry_exit_user(struct pt_regs *regs) 393 - { 394 - lockdep_assert_irqs_disabled(); 395 - 396 - prepare_exit_to_usermode(regs); 397 559 } 398 560 399 561 noinstr bool idtentry_enter_nmi(struct pt_regs *regs) ··· 282 840 { 283 841 struct pt_regs *old_regs; 284 842 bool inhcall; 285 - idtentry_state_t state; 843 + irqentry_state_t state; 286 844 287 - state = idtentry_enter(regs); 845 + state = irqentry_enter(regs); 288 846 old_regs = set_irq_regs(regs); 289 847 290 848 instrumentation_begin(); ··· 296 854 inhcall = get_and_clear_inhcall(); 297 855 if (inhcall && !WARN_ON_ONCE(state.exit_rcu)) { 298 856 instrumentation_begin(); 299 - idtentry_exit_cond_resched(regs, true); 857 + irqentry_exit_cond_resched(); 300 858 instrumentation_end(); 301 859 restore_inhcall(inhcall); 302 860 } else { 303 - idtentry_exit(regs, state); 861 + irqentry_exit(regs, state); 304 862 } 305 863 } 306 864 #endif /* CONFIG_XEN_PV */
+1 -1
arch/x86/entry/entry_32.S
··· 846 846 2: 847 847 /* When we fork, we trace the syscall return in the child, too. */ 848 848 movl %esp, %eax 849 - call syscall_return_slowpath 849 + call syscall_exit_to_user_mode 850 850 jmp .Lsyscall_32_done 851 851 852 852 /* kernel thread */
+1 -1
arch/x86/entry/entry_64.S
··· 283 283 2: 284 284 UNWIND_HINT_REGS 285 285 movq %rsp, %rdi 286 - call syscall_return_slowpath /* returns with IRQs disabled */ 286 + call syscall_exit_to_user_mode /* returns with IRQs disabled */ 287 287 jmp swapgs_restore_regs_and_return_to_usermode 288 288 289 289 1:
+76
arch/x86/include/asm/entry-common.h
··· 1 + /* SPDX-License-Identifier: GPL-2.0-only */ 2 + #ifndef _ASM_X86_ENTRY_COMMON_H 3 + #define _ASM_X86_ENTRY_COMMON_H 4 + 5 + #include <linux/user-return-notifier.h> 6 + 7 + #include <asm/nospec-branch.h> 8 + #include <asm/io_bitmap.h> 9 + #include <asm/fpu/api.h> 10 + 11 + /* Check that the stack and regs on entry from user mode are sane. */ 12 + static __always_inline void arch_check_user_regs(struct pt_regs *regs) 13 + { 14 + if (IS_ENABLED(CONFIG_DEBUG_ENTRY)) { 15 + /* 16 + * Make sure that the entry code gave us a sensible EFLAGS 17 + * register. Native because we want to check the actual CPU 18 + * state, not the interrupt state as imagined by Xen. 19 + */ 20 + unsigned long flags = native_save_fl(); 21 + WARN_ON_ONCE(flags & (X86_EFLAGS_AC | X86_EFLAGS_DF | 22 + X86_EFLAGS_NT)); 23 + 24 + /* We think we came from user mode. Make sure pt_regs agrees. */ 25 + WARN_ON_ONCE(!user_mode(regs)); 26 + 27 + /* 28 + * All entries from user mode (except #DF) should be on the 29 + * normal thread stack and should have user pt_regs in the 30 + * correct location. 31 + */ 32 + WARN_ON_ONCE(!on_thread_stack()); 33 + WARN_ON_ONCE(regs != task_pt_regs(current)); 34 + } 35 + } 36 + #define arch_check_user_regs arch_check_user_regs 37 + 38 + #define ARCH_SYSCALL_EXIT_WORK (_TIF_SINGLESTEP) 39 + 40 + static inline void arch_exit_to_user_mode_prepare(struct pt_regs *regs, 41 + unsigned long ti_work) 42 + { 43 + if (ti_work & _TIF_USER_RETURN_NOTIFY) 44 + fire_user_return_notifiers(); 45 + 46 + if (unlikely(ti_work & _TIF_IO_BITMAP)) 47 + tss_update_io_bitmap(); 48 + 49 + fpregs_assert_state_consistent(); 50 + if (unlikely(ti_work & _TIF_NEED_FPU_LOAD)) 51 + switch_fpu_return(); 52 + 53 + #ifdef CONFIG_COMPAT 54 + /* 55 + * Compat syscalls set TS_COMPAT. Make sure we clear it before 56 + * returning to user mode. We need to clear it *after* signal 57 + * handling, because syscall restart has a fixup for compat 58 + * syscalls. The fixup is exercised by the ptrace_syscall_32 59 + * selftest. 60 + * 61 + * We also need to clear TS_REGS_POKED_I386: the 32-bit tracer 62 + * special case only applies after poking regs and before the 63 + * very next return to user mode. 64 + */ 65 + current_thread_info()->status &= ~(TS_COMPAT | TS_I386_REGS_POKED); 66 + #endif 67 + } 68 + #define arch_exit_to_user_mode_prepare arch_exit_to_user_mode_prepare 69 + 70 + static __always_inline void arch_exit_to_user_mode(void) 71 + { 72 + mds_user_clear_cpu_buffers(); 73 + } 74 + #define arch_exit_to_user_mode arch_exit_to_user_mode 75 + 76 + #endif
+17 -28
arch/x86/include/asm/idtentry.h
··· 6 6 #include <asm/trapnr.h> 7 7 8 8 #ifndef __ASSEMBLY__ 9 + #include <linux/entry-common.h> 9 10 #include <linux/hardirq.h> 10 11 11 12 #include <asm/irq_stack.h> 12 - 13 - void idtentry_enter_user(struct pt_regs *regs); 14 - void idtentry_exit_user(struct pt_regs *regs); 15 - 16 - typedef struct idtentry_state { 17 - bool exit_rcu; 18 - } idtentry_state_t; 19 - 20 - idtentry_state_t idtentry_enter(struct pt_regs *regs); 21 - void idtentry_exit(struct pt_regs *regs, idtentry_state_t state); 22 13 23 14 bool idtentry_enter_nmi(struct pt_regs *regs); 24 15 void idtentry_exit_nmi(struct pt_regs *regs, bool irq_state); ··· 43 52 * The macro is written so it acts as function definition. Append the 44 53 * body with a pair of curly brackets. 45 54 * 46 - * idtentry_enter() contains common code which has to be invoked before 47 - * arbitrary code in the body. idtentry_exit() contains common code 55 + * irqentry_enter() contains common code which has to be invoked before 56 + * arbitrary code in the body. irqentry_exit() contains common code 48 57 * which has to run before returning to the low level assembly code. 49 58 */ 50 59 #define DEFINE_IDTENTRY(func) \ ··· 52 61 \ 53 62 __visible noinstr void func(struct pt_regs *regs) \ 54 63 { \ 55 - idtentry_state_t state = idtentry_enter(regs); \ 64 + irqentry_state_t state = irqentry_enter(regs); \ 56 65 \ 57 66 instrumentation_begin(); \ 58 67 __##func (regs); \ 59 68 instrumentation_end(); \ 60 - idtentry_exit(regs, state); \ 69 + irqentry_exit(regs, state); \ 61 70 } \ 62 71 \ 63 72 static __always_inline void __##func(struct pt_regs *regs) ··· 99 108 __visible noinstr void func(struct pt_regs *regs, \ 100 109 unsigned long error_code) \ 101 110 { \ 102 - idtentry_state_t state = idtentry_enter(regs); \ 111 + irqentry_state_t state = irqentry_enter(regs); \ 103 112 \ 104 113 instrumentation_begin(); \ 105 114 __##func (regs, error_code); \ 106 115 instrumentation_end(); \ 107 - idtentry_exit(regs, state); \ 116 + irqentry_exit(regs, state); \ 108 117 } \ 109 118 \ 110 119 static __always_inline void __##func(struct pt_regs *regs, \ ··· 159 168 * body with a pair of curly brackets. 160 169 * 161 170 * Contrary to DEFINE_IDTENTRY_ERRORCODE() this does not invoke the 162 - * idtentry_enter/exit() helpers before and after the body invocation. This 171 + * irqentry_enter/exit() helpers before and after the body invocation. This 163 172 * needs to be done in the body itself if applicable. Use if extra work 164 173 * is required before the enter/exit() helpers are invoked. 165 174 */ ··· 185 194 * to the function as error_code argument which needs to be truncated 186 195 * to an u8 because the push is sign extending. 187 196 * 188 - * On 64-bit idtentry_enter/exit() are invoked in the ASM entry code before 189 - * and after switching to the interrupt stack. On 32-bit this happens in C. 190 - * 191 197 * irq_enter/exit_rcu() are invoked before the function body and the 192 - * KVM L1D flush request is set. 198 + * KVM L1D flush request is set. Stack switching to the interrupt stack 199 + * has to be done in the function body if necessary. 193 200 */ 194 201 #define DEFINE_IDTENTRY_IRQ(func) \ 195 202 static __always_inline void __##func(struct pt_regs *regs, u8 vector); \ ··· 195 206 __visible noinstr void func(struct pt_regs *regs, \ 196 207 unsigned long error_code) \ 197 208 { \ 198 - idtentry_state_t state = idtentry_enter(regs); \ 209 + irqentry_state_t state = irqentry_enter(regs); \ 199 210 \ 200 211 instrumentation_begin(); \ 201 212 irq_enter_rcu(); \ ··· 203 214 __##func (regs, (u8)error_code); \ 204 215 irq_exit_rcu(); \ 205 216 instrumentation_end(); \ 206 - idtentry_exit(regs, state); \ 217 + irqentry_exit(regs, state); \ 207 218 } \ 208 219 \ 209 220 static __always_inline void __##func(struct pt_regs *regs, u8 vector) ··· 227 238 * DEFINE_IDTENTRY_SYSVEC - Emit code for system vector IDT entry points 228 239 * @func: Function name of the entry point 229 240 * 230 - * idtentry_enter/exit() and irq_enter/exit_rcu() are invoked before the 241 + * irqentry_enter/exit() and irq_enter/exit_rcu() are invoked before the 231 242 * function body. KVM L1D flush request is set. 232 243 * 233 244 * Runs the function on the interrupt stack if the entry hit kernel mode ··· 237 248 \ 238 249 __visible noinstr void func(struct pt_regs *regs) \ 239 250 { \ 240 - idtentry_state_t state = idtentry_enter(regs); \ 251 + irqentry_state_t state = irqentry_enter(regs); \ 241 252 \ 242 253 instrumentation_begin(); \ 243 254 irq_enter_rcu(); \ ··· 245 256 run_on_irqstack_cond(__##func, regs, regs); \ 246 257 irq_exit_rcu(); \ 247 258 instrumentation_end(); \ 248 - idtentry_exit(regs, state); \ 259 + irqentry_exit(regs, state); \ 249 260 } \ 250 261 \ 251 262 static noinline void __##func(struct pt_regs *regs) ··· 266 277 \ 267 278 __visible noinstr void func(struct pt_regs *regs) \ 268 279 { \ 269 - idtentry_state_t state = idtentry_enter(regs); \ 280 + irqentry_state_t state = irqentry_enter(regs); \ 270 281 \ 271 282 instrumentation_begin(); \ 272 283 __irq_enter_raw(); \ ··· 274 285 __##func (regs); \ 275 286 __irq_exit_raw(); \ 276 287 instrumentation_end(); \ 277 - idtentry_exit(regs, state); \ 288 + irqentry_exit(regs, state); \ 278 289 } \ 279 290 \ 280 291 static __always_inline void __##func(struct pt_regs *regs)
+5
arch/x86/include/asm/ptrace.h
··· 209 209 regs->sp = val; 210 210 } 211 211 212 + static __always_inline bool regs_irqs_disabled(struct pt_regs *regs) 213 + { 214 + return !(regs->flags & X86_EFLAGS_IF); 215 + } 216 + 212 217 /* Query offset/name of register from its name/offset */ 213 218 extern int regs_query_register_offset(const char *name); 214 219 extern const char *regs_query_register_name(unsigned int offset);
-1
arch/x86/include/asm/signal.h
··· 35 35 #endif /* __ASSEMBLY__ */ 36 36 #include <uapi/asm/signal.h> 37 37 #ifndef __ASSEMBLY__ 38 - extern void do_signal(struct pt_regs *regs); 39 38 40 39 #define __ARCH_HAS_SA_RESTORER 41 40
-5
arch/x86/include/asm/thread_info.h
··· 133 133 #define _TIF_X32 (1 << TIF_X32) 134 134 #define _TIF_FSCHECK (1 << TIF_FSCHECK) 135 135 136 - /* Work to do before invoking the actual syscall. */ 137 - #define _TIF_WORK_SYSCALL_ENTRY \ 138 - (_TIF_SYSCALL_TRACE | _TIF_SYSCALL_EMU | _TIF_SYSCALL_AUDIT | \ 139 - _TIF_SECCOMP | _TIF_SYSCALL_TRACEPOINT) 140 - 141 136 /* flags to check in __switch_to() */ 142 137 #define _TIF_WORK_CTXSW_BASE \ 143 138 (_TIF_NOCPUID | _TIF_NOTSC | _TIF_BLOCKSTEP | \
+1 -1
arch/x86/kernel/alternative.c
··· 1047 1047 return 0; 1048 1048 } 1049 1049 1050 - int noinstr poke_int3_handler(struct pt_regs *regs) 1050 + noinstr int poke_int3_handler(struct pt_regs *regs) 1051 1051 { 1052 1052 struct bp_patching_desc *desc; 1053 1053 struct text_poke_loc *tp;
+3 -3
arch/x86/kernel/cpu/mce/core.c
··· 1215 1215 * backing the user stack, tracing that reads the user stack will cause 1216 1216 * potentially infinite recursion. 1217 1217 */ 1218 - void noinstr do_machine_check(struct pt_regs *regs) 1218 + noinstr void do_machine_check(struct pt_regs *regs) 1219 1219 { 1220 1220 DECLARE_BITMAP(valid_banks, MAX_NR_BANKS); 1221 1221 DECLARE_BITMAP(toclear, MAX_NR_BANKS); ··· 1930 1930 1931 1931 static __always_inline void exc_machine_check_user(struct pt_regs *regs) 1932 1932 { 1933 - idtentry_enter_user(regs); 1933 + irqentry_enter_from_user_mode(regs); 1934 1934 instrumentation_begin(); 1935 1935 machine_check_vector(regs); 1936 1936 instrumentation_end(); 1937 - idtentry_exit_user(regs); 1937 + irqentry_exit_to_user_mode(regs); 1938 1938 } 1939 1939 1940 1940 #ifdef CONFIG_X86_64
+3 -3
arch/x86/kernel/kvm.c
··· 233 233 noinstr bool __kvm_handle_async_pf(struct pt_regs *regs, u32 token) 234 234 { 235 235 u32 reason = kvm_read_and_reset_apf_flags(); 236 - idtentry_state_t state; 236 + irqentry_state_t state; 237 237 238 238 switch (reason) { 239 239 case KVM_PV_REASON_PAGE_NOT_PRESENT: ··· 243 243 return false; 244 244 } 245 245 246 - state = idtentry_enter(regs); 246 + state = irqentry_enter(regs); 247 247 instrumentation_begin(); 248 248 249 249 /* ··· 264 264 } 265 265 266 266 instrumentation_end(); 267 - idtentry_exit(regs, state); 267 + irqentry_exit(regs, state); 268 268 return true; 269 269 } 270 270
+2 -1
arch/x86/kernel/signal.c
··· 25 25 #include <linux/user-return-notifier.h> 26 26 #include <linux/uprobes.h> 27 27 #include <linux/context_tracking.h> 28 + #include <linux/entry-common.h> 28 29 #include <linux/syscalls.h> 29 30 30 31 #include <asm/processor.h> ··· 804 803 * want to handle. Thus you cannot kill init even with a SIGKILL even by 805 804 * mistake. 806 805 */ 807 - void do_signal(struct pt_regs *regs) 806 + void arch_do_signal(struct pt_regs *regs) 808 807 { 809 808 struct ksignal ksig; 810 809
+12 -12
arch/x86/kernel/traps.c
··· 245 245 246 246 DEFINE_IDTENTRY_RAW(exc_invalid_op) 247 247 { 248 - idtentry_state_t state; 248 + irqentry_state_t state; 249 249 250 250 /* 251 251 * We use UD2 as a short encoding for 'CALL __WARN', as such ··· 255 255 if (!user_mode(regs) && handle_bug(regs)) 256 256 return; 257 257 258 - state = idtentry_enter(regs); 258 + state = irqentry_enter(regs); 259 259 instrumentation_begin(); 260 260 handle_invalid_op(regs); 261 261 instrumentation_end(); 262 - idtentry_exit(regs, state); 262 + irqentry_exit(regs, state); 263 263 } 264 264 265 265 DEFINE_IDTENTRY(exc_coproc_segment_overrun) ··· 638 638 return; 639 639 640 640 /* 641 - * idtentry_enter_user() uses static_branch_{,un}likely() and therefore 642 - * can trigger INT3, hence poke_int3_handler() must be done 643 - * before. If the entry came from kernel mode, then use nmi_enter() 644 - * because the INT3 could have been hit in any context including 645 - * NMI. 641 + * irqentry_enter_from_user_mode() uses static_branch_{,un}likely() 642 + * and therefore can trigger INT3, hence poke_int3_handler() must 643 + * be done before. If the entry came from kernel mode, then use 644 + * nmi_enter() because the INT3 could have been hit in any context 645 + * including NMI. 646 646 */ 647 647 if (user_mode(regs)) { 648 - idtentry_enter_user(regs); 648 + irqentry_enter_from_user_mode(regs); 649 649 instrumentation_begin(); 650 650 do_int3_user(regs); 651 651 instrumentation_end(); 652 - idtentry_exit_user(regs); 652 + irqentry_exit_to_user_mode(regs); 653 653 } else { 654 654 bool irq_state = idtentry_enter_nmi(regs); 655 655 instrumentation_begin(); ··· 895 895 */ 896 896 WARN_ON_ONCE(!user_mode(regs)); 897 897 898 - idtentry_enter_user(regs); 898 + irqentry_enter_from_user_mode(regs); 899 899 instrumentation_begin(); 900 900 901 901 handle_debug(regs, dr6, true); 902 902 903 903 instrumentation_end(); 904 - idtentry_exit_user(regs); 904 + irqentry_exit_to_user_mode(regs); 905 905 } 906 906 907 907 #ifdef CONFIG_X86_64
+1
arch/x86/kvm/Kconfig
··· 42 42 select HAVE_KVM_MSI 43 43 select HAVE_KVM_CPU_RELAX_INTERCEPT 44 44 select HAVE_KVM_NO_POLL 45 + select KVM_XFER_TO_GUEST_WORK 45 46 select KVM_GENERIC_DIRTYLOG_READ_PROTECT 46 47 select KVM_VFIO 47 48 select SRCU
+5 -6
arch/x86/kvm/vmx/vmx.c
··· 27 27 #include <linux/slab.h> 28 28 #include <linux/tboot.h> 29 29 #include <linux/trace_events.h> 30 + #include <linux/entry-kvm.h> 30 31 31 32 #include <asm/apic.h> 32 33 #include <asm/asm.h> ··· 5374 5373 } 5375 5374 5376 5375 /* 5377 - * Note, return 1 and not 0, vcpu_run() is responsible for 5378 - * morphing the pending signal into the proper return code. 5376 + * Note, return 1 and not 0, vcpu_run() will invoke 5377 + * xfer_to_guest_mode() which will create a proper return 5378 + * code. 5379 5379 */ 5380 - if (signal_pending(current)) 5380 + if (__xfer_to_guest_mode_work_pending()) 5381 5381 return 1; 5382 - 5383 - if (need_resched()) 5384 - schedule(); 5385 5382 } 5386 5383 5387 5384 return 1;
+6 -9
arch/x86/kvm/x86.c
··· 56 56 #include <linux/sched/stat.h> 57 57 #include <linux/sched/isolation.h> 58 58 #include <linux/mem_encrypt.h> 59 + #include <linux/entry-kvm.h> 59 60 60 61 #include <trace/events/kvm.h> 61 62 ··· 1588 1587 bool kvm_vcpu_exit_request(struct kvm_vcpu *vcpu) 1589 1588 { 1590 1589 return vcpu->mode == EXITING_GUEST_MODE || kvm_request_pending(vcpu) || 1591 - need_resched() || signal_pending(current); 1590 + xfer_to_guest_mode_work_pending(); 1592 1591 } 1593 1592 EXPORT_SYMBOL_GPL(kvm_vcpu_exit_request); 1594 1593 ··· 8682 8681 break; 8683 8682 } 8684 8683 8685 - if (signal_pending(current)) { 8686 - r = -EINTR; 8687 - vcpu->run->exit_reason = KVM_EXIT_INTR; 8688 - ++vcpu->stat.signal_exits; 8689 - break; 8690 - } 8691 - if (need_resched()) { 8684 + if (__xfer_to_guest_mode_work_pending()) { 8692 8685 srcu_read_unlock(&kvm->srcu, vcpu->srcu_idx); 8693 - cond_resched(); 8686 + r = xfer_to_guest_mode_handle_work(vcpu); 8687 + if (r) 8688 + return r; 8694 8689 vcpu->srcu_idx = srcu_read_lock(&kvm->srcu); 8695 8690 } 8696 8691 }
+3 -3
arch/x86/mm/fault.c
··· 1377 1377 DEFINE_IDTENTRY_RAW_ERRORCODE(exc_page_fault) 1378 1378 { 1379 1379 unsigned long address = read_cr2(); 1380 - idtentry_state_t state; 1380 + irqentry_state_t state; 1381 1381 1382 1382 prefetchw(&current->mm->mmap_lock); 1383 1383 ··· 1412 1412 * code reenabled RCU to avoid subsequent wreckage which helps 1413 1413 * debugability. 1414 1414 */ 1415 - state = idtentry_enter(regs); 1415 + state = irqentry_enter(regs); 1416 1416 1417 1417 instrumentation_begin(); 1418 1418 handle_page_fault(regs, error_code, address); 1419 1419 instrumentation_end(); 1420 1420 1421 - idtentry_exit(regs, state); 1421 + irqentry_exit(regs, state); 1422 1422 }