Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux
1
fork

Configure Feed

Select the types of activity you want to include in your feed.

i387: move TS_USEDFPU flag from thread_info to task_struct

This moves the bit that indicates whether a thread has ownership of the
FPU from the TS_USEDFPU bit in thread_info->status to a word of its own
(called 'has_fpu') in task_struct->thread.has_fpu.

This fixes two independent bugs at the same time:

- changing 'thread_info->status' from the scheduler causes nasty
problems for the other users of that variable, since it is defined to
be thread-synchronous (that's what the "TS_" part of the naming was
supposed to indicate).

So perfectly valid code could (and did) do

ti->status |= TS_RESTORE_SIGMASK;

and the compiler was free to do that as separate load, or and store
instructions. Which can cause problems with preemption, since a task
switch could happen in between, and change the TS_USEDFPU bit. The
change to TS_USEDFPU would be overwritten by the final store.

In practice, this seldom happened, though, because the 'status' field
was seldom used more than once, so gcc would generally tend to
generate code that used a read-modify-write instruction and thus
happened to avoid this problem - RMW instructions are naturally low
fat and preemption-safe.

- On x86-32, the current_thread_info() pointer would, during interrupts
and softirqs, point to a *copy* of the real thread_info, because
x86-32 uses %esp to calculate the thread_info address, and thus the
separate irq (and softirq) stacks would cause these kinds of odd
thread_info copy aliases.

This is normally not a problem, since interrupts aren't supposed to
look at thread information anyway (what thread is running at
interrupt time really isn't very well-defined), but it confused the
heck out of irq_fpu_usable() and the code that tried to squirrel
away the FPU state.

(It also caused untold confusion for us poor kernel developers).

It also turns out that using 'task_struct' is actually much more natural
for most of the call sites that care about the FPU state, since they
tend to work with the task struct for other reasons anyway (ie
scheduling). And the FPU data that we are going to save/restore is
found there too.

Thanks to Arjan Van De Ven <arjan@linux.intel.com> for pointing us to
the %esp issue.

Cc: Arjan van de Ven <arjan@linux.intel.com>
Reported-and-tested-by: Raphael Prevost <raphael@buro.asia>
Acked-and-tested-by: Suresh Siddha <suresh.b.siddha@intel.com>
Tested-by: Peter Anvin <hpa@zytor.com>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

+30 -32
+22 -22
arch/x86/include/asm/i387.h
··· 264 264 * be preemption protection *and* they need to be 265 265 * properly paired with the CR0.TS changes! 266 266 */ 267 - static inline int __thread_has_fpu(struct thread_info *ti) 267 + static inline int __thread_has_fpu(struct task_struct *tsk) 268 268 { 269 - return ti->status & TS_USEDFPU; 269 + return tsk->thread.has_fpu; 270 270 } 271 271 272 272 /* Must be paired with an 'stts' after! */ 273 - static inline void __thread_clear_has_fpu(struct thread_info *ti) 273 + static inline void __thread_clear_has_fpu(struct task_struct *tsk) 274 274 { 275 - ti->status &= ~TS_USEDFPU; 275 + tsk->thread.has_fpu = 0; 276 276 } 277 277 278 278 /* Must be paired with a 'clts' before! */ 279 - static inline void __thread_set_has_fpu(struct thread_info *ti) 279 + static inline void __thread_set_has_fpu(struct task_struct *tsk) 280 280 { 281 - ti->status |= TS_USEDFPU; 281 + tsk->thread.has_fpu = 1; 282 282 } 283 283 284 284 /* ··· 288 288 * These generally need preemption protection to work, 289 289 * do try to avoid using these on their own. 290 290 */ 291 - static inline void __thread_fpu_end(struct thread_info *ti) 291 + static inline void __thread_fpu_end(struct task_struct *tsk) 292 292 { 293 - __thread_clear_has_fpu(ti); 293 + __thread_clear_has_fpu(tsk); 294 294 stts(); 295 295 } 296 296 297 - static inline void __thread_fpu_begin(struct thread_info *ti) 297 + static inline void __thread_fpu_begin(struct task_struct *tsk) 298 298 { 299 299 clts(); 300 - __thread_set_has_fpu(ti); 300 + __thread_set_has_fpu(tsk); 301 301 } 302 302 303 303 /* ··· 308 308 309 309 static inline void __unlazy_fpu(struct task_struct *tsk) 310 310 { 311 - if (__thread_has_fpu(task_thread_info(tsk))) { 311 + if (__thread_has_fpu(tsk)) { 312 312 __save_init_fpu(tsk); 313 - __thread_fpu_end(task_thread_info(tsk)); 313 + __thread_fpu_end(tsk); 314 314 } else 315 315 tsk->fpu_counter = 0; 316 316 } 317 317 318 318 static inline void __clear_fpu(struct task_struct *tsk) 319 319 { 320 - if (__thread_has_fpu(task_thread_info(tsk))) { 320 + if (__thread_has_fpu(tsk)) { 321 321 /* Ignore delayed exceptions from user space */ 322 322 asm volatile("1: fwait\n" 323 323 "2:\n" 324 324 _ASM_EXTABLE(1b, 2b)); 325 - __thread_fpu_end(task_thread_info(tsk)); 325 + __thread_fpu_end(tsk); 326 326 } 327 327 } 328 328 ··· 337 337 */ 338 338 static inline bool interrupted_kernel_fpu_idle(void) 339 339 { 340 - return !__thread_has_fpu(current_thread_info()) && 340 + return !__thread_has_fpu(current) && 341 341 (read_cr0() & X86_CR0_TS); 342 342 } 343 343 ··· 371 371 372 372 static inline void kernel_fpu_begin(void) 373 373 { 374 - struct thread_info *me = current_thread_info(); 374 + struct task_struct *me = current; 375 375 376 376 WARN_ON_ONCE(!irq_fpu_usable()); 377 377 preempt_disable(); 378 378 if (__thread_has_fpu(me)) { 379 - __save_init_fpu(me->task); 379 + __save_init_fpu(me); 380 380 __thread_clear_has_fpu(me); 381 381 /* We do 'stts()' in kernel_fpu_end() */ 382 382 } else ··· 441 441 */ 442 442 static inline int user_has_fpu(void) 443 443 { 444 - return __thread_has_fpu(current_thread_info()); 444 + return __thread_has_fpu(current); 445 445 } 446 446 447 447 static inline void user_fpu_end(void) 448 448 { 449 449 preempt_disable(); 450 - __thread_fpu_end(current_thread_info()); 450 + __thread_fpu_end(current); 451 451 preempt_enable(); 452 452 } 453 453 ··· 455 455 { 456 456 preempt_disable(); 457 457 if (!user_has_fpu()) 458 - __thread_fpu_begin(current_thread_info()); 458 + __thread_fpu_begin(current); 459 459 preempt_enable(); 460 460 } 461 461 ··· 464 464 */ 465 465 static inline void save_init_fpu(struct task_struct *tsk) 466 466 { 467 - WARN_ON_ONCE(!__thread_has_fpu(task_thread_info(tsk))); 467 + WARN_ON_ONCE(!__thread_has_fpu(tsk)); 468 468 preempt_disable(); 469 469 __save_init_fpu(tsk); 470 - __thread_fpu_end(task_thread_info(tsk)); 470 + __thread_fpu_end(tsk); 471 471 preempt_enable(); 472 472 } 473 473
+1
arch/x86/include/asm/processor.h
··· 454 454 unsigned long trap_no; 455 455 unsigned long error_code; 456 456 /* floating point and extended processor state */ 457 + unsigned long has_fpu; 457 458 struct fpu fpu; 458 459 #ifdef CONFIG_X86_32 459 460 /* Virtual 86 mode info */
-2
arch/x86/include/asm/thread_info.h
··· 247 247 * ever touches our thread-synchronous status, so we don't 248 248 * have to worry about atomic accesses. 249 249 */ 250 - #define TS_USEDFPU 0x0001 /* FPU was used by this task 251 - this quantum (SMP) */ 252 250 #define TS_COMPAT 0x0002 /* 32bit syscall active (64BIT)*/ 253 251 #define TS_POLLING 0x0004 /* idle task polling need_resched, 254 252 skip sending interrupt */
+5 -6
arch/x86/kernel/traps.c
··· 582 582 */ 583 583 void math_state_restore(void) 584 584 { 585 - struct thread_info *thread = current_thread_info(); 586 - struct task_struct *tsk = thread->task; 585 + struct task_struct *tsk = current; 587 586 588 587 /* We need a safe address that is cheap to find and that is already 589 - in L1. We just brought in "thread->task", so use that */ 590 - #define safe_address (thread->task) 588 + in L1. We're just bringing in "tsk->thread.has_fpu", so use that */ 589 + #define safe_address (tsk->thread.has_fpu) 591 590 592 591 if (!tsk_used_math(tsk)) { 593 592 local_irq_enable(); ··· 603 604 local_irq_disable(); 604 605 } 605 606 606 - __thread_fpu_begin(thread); 607 + __thread_fpu_begin(tsk); 607 608 608 609 /* AMD K7/K8 CPUs don't save/restore FDP/FIP/FOP unless an exception 609 610 is pending. Clear the x87 state here by setting it to fixed ··· 619 620 * Paranoid restore. send a SIGSEGV if we fail to restore the state. 620 621 */ 621 622 if (unlikely(restore_fpu_checking(tsk))) { 622 - __thread_fpu_end(thread); 623 + __thread_fpu_end(tsk); 623 624 force_sig(SIGSEGV, tsk); 624 625 return; 625 626 }
+1 -1
arch/x86/kernel/xsave.c
··· 47 47 if (!fx) 48 48 return; 49 49 50 - BUG_ON(__thread_has_fpu(task_thread_info(tsk))); 50 + BUG_ON(__thread_has_fpu(tsk)); 51 51 52 52 xstate_bv = tsk->thread.fpu.state->xsave.xsave_hdr.xstate_bv; 53 53
+1 -1
arch/x86/kvm/vmx.c
··· 1457 1457 #ifdef CONFIG_X86_64 1458 1458 wrmsrl(MSR_KERNEL_GS_BASE, vmx->msr_host_kernel_gs_base); 1459 1459 #endif 1460 - if (__thread_has_fpu(current_thread_info())) 1460 + if (__thread_has_fpu(current)) 1461 1461 clts(); 1462 1462 load_gdt(&__get_cpu_var(host_gdt)); 1463 1463 }