Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux
1
fork

Configure Feed

Select the types of activity you want to include in your feed.

Merge branch 'locking-urgent-for-linus.patch' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip

Pull rtmutex fixes from Thomas Gleixner:
"Another three patches to make the rtmutex code more robust. That's
the last urgent fallout from the big futex/rtmutex investigation"

* 'locking-urgent-for-linus.patch' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip:
rtmutex: Plug slow unlock race
rtmutex: Detect changes in the pi lock chain
rtmutex: Handle deadlock detection smarter

+218 -35
+5
kernel/locking/rtmutex-debug.h
··· 31 31 { 32 32 return (waiter != NULL); 33 33 } 34 + 35 + static inline void rt_mutex_print_deadlock(struct rt_mutex_waiter *w) 36 + { 37 + debug_rt_mutex_print_deadlock(w); 38 + }
+208 -35
kernel/locking/rtmutex.c
··· 83 83 owner = *p; 84 84 } while (cmpxchg(p, owner, owner | RT_MUTEX_HAS_WAITERS) != owner); 85 85 } 86 + 87 + /* 88 + * Safe fastpath aware unlock: 89 + * 1) Clear the waiters bit 90 + * 2) Drop lock->wait_lock 91 + * 3) Try to unlock the lock with cmpxchg 92 + */ 93 + static inline bool unlock_rt_mutex_safe(struct rt_mutex *lock) 94 + __releases(lock->wait_lock) 95 + { 96 + struct task_struct *owner = rt_mutex_owner(lock); 97 + 98 + clear_rt_mutex_waiters(lock); 99 + raw_spin_unlock(&lock->wait_lock); 100 + /* 101 + * If a new waiter comes in between the unlock and the cmpxchg 102 + * we have two situations: 103 + * 104 + * unlock(wait_lock); 105 + * lock(wait_lock); 106 + * cmpxchg(p, owner, 0) == owner 107 + * mark_rt_mutex_waiters(lock); 108 + * acquire(lock); 109 + * or: 110 + * 111 + * unlock(wait_lock); 112 + * lock(wait_lock); 113 + * mark_rt_mutex_waiters(lock); 114 + * 115 + * cmpxchg(p, owner, 0) != owner 116 + * enqueue_waiter(); 117 + * unlock(wait_lock); 118 + * lock(wait_lock); 119 + * wake waiter(); 120 + * unlock(wait_lock); 121 + * lock(wait_lock); 122 + * acquire(lock); 123 + */ 124 + return rt_mutex_cmpxchg(lock, owner, NULL); 125 + } 126 + 86 127 #else 87 128 # define rt_mutex_cmpxchg(l,c,n) (0) 88 129 static inline void mark_rt_mutex_waiters(struct rt_mutex *lock) 89 130 { 90 131 lock->owner = (struct task_struct *) 91 132 ((unsigned long)lock->owner | RT_MUTEX_HAS_WAITERS); 133 + } 134 + 135 + /* 136 + * Simple slow path only version: lock->owner is protected by lock->wait_lock. 137 + */ 138 + static inline bool unlock_rt_mutex_safe(struct rt_mutex *lock) 139 + __releases(lock->wait_lock) 140 + { 141 + lock->owner = NULL; 142 + raw_spin_unlock(&lock->wait_lock); 143 + return true; 92 144 } 93 145 #endif 94 146 ··· 312 260 */ 313 261 int max_lock_depth = 1024; 314 262 263 + static inline struct rt_mutex *task_blocked_on_lock(struct task_struct *p) 264 + { 265 + return p->pi_blocked_on ? p->pi_blocked_on->lock : NULL; 266 + } 267 + 315 268 /* 316 269 * Adjust the priority chain. Also used for deadlock detection. 317 270 * Decreases task's usage by one - may thus free the task. 318 271 * 319 - * @task: the task owning the mutex (owner) for which a chain walk is probably 320 - * needed 272 + * @task: the task owning the mutex (owner) for which a chain walk is 273 + * probably needed 321 274 * @deadlock_detect: do we have to carry out deadlock detection? 322 - * @orig_lock: the mutex (can be NULL if we are walking the chain to recheck 323 - * things for a task that has just got its priority adjusted, and 324 - * is waiting on a mutex) 275 + * @orig_lock: the mutex (can be NULL if we are walking the chain to recheck 276 + * things for a task that has just got its priority adjusted, and 277 + * is waiting on a mutex) 278 + * @next_lock: the mutex on which the owner of @orig_lock was blocked before 279 + * we dropped its pi_lock. Is never dereferenced, only used for 280 + * comparison to detect lock chain changes. 325 281 * @orig_waiter: rt_mutex_waiter struct for the task that has just donated 326 - * its priority to the mutex owner (can be NULL in the case 327 - * depicted above or if the top waiter is gone away and we are 328 - * actually deboosting the owner) 329 - * @top_task: the current top waiter 282 + * its priority to the mutex owner (can be NULL in the case 283 + * depicted above or if the top waiter is gone away and we are 284 + * actually deboosting the owner) 285 + * @top_task: the current top waiter 330 286 * 331 287 * Returns 0 or -EDEADLK. 332 288 */ 333 289 static int rt_mutex_adjust_prio_chain(struct task_struct *task, 334 290 int deadlock_detect, 335 291 struct rt_mutex *orig_lock, 292 + struct rt_mutex *next_lock, 336 293 struct rt_mutex_waiter *orig_waiter, 337 294 struct task_struct *top_task) 338 295 { ··· 375 314 } 376 315 put_task_struct(task); 377 316 378 - return deadlock_detect ? -EDEADLK : 0; 317 + return -EDEADLK; 379 318 } 380 319 retry: 381 320 /* ··· 397 336 * the previous owner of the lock might have released the lock. 398 337 */ 399 338 if (orig_waiter && !rt_mutex_owner(orig_lock)) 339 + goto out_unlock_pi; 340 + 341 + /* 342 + * We dropped all locks after taking a refcount on @task, so 343 + * the task might have moved on in the lock chain or even left 344 + * the chain completely and blocks now on an unrelated lock or 345 + * on @orig_lock. 346 + * 347 + * We stored the lock on which @task was blocked in @next_lock, 348 + * so we can detect the chain change. 349 + */ 350 + if (next_lock != waiter->lock) 400 351 goto out_unlock_pi; 401 352 402 353 /* ··· 450 377 if (lock == orig_lock || rt_mutex_owner(lock) == top_task) { 451 378 debug_rt_mutex_deadlock(deadlock_detect, orig_waiter, lock); 452 379 raw_spin_unlock(&lock->wait_lock); 453 - ret = deadlock_detect ? -EDEADLK : 0; 380 + ret = -EDEADLK; 454 381 goto out_unlock_pi; 455 382 } 456 383 ··· 495 422 __rt_mutex_adjust_prio(task); 496 423 } 497 424 425 + /* 426 + * Check whether the task which owns the current lock is pi 427 + * blocked itself. If yes we store a pointer to the lock for 428 + * the lock chain change detection above. After we dropped 429 + * task->pi_lock next_lock cannot be dereferenced anymore. 430 + */ 431 + next_lock = task_blocked_on_lock(task); 432 + 498 433 raw_spin_unlock_irqrestore(&task->pi_lock, flags); 499 434 500 435 top_waiter = rt_mutex_top_waiter(lock); 501 436 raw_spin_unlock(&lock->wait_lock); 437 + 438 + /* 439 + * We reached the end of the lock chain. Stop right here. No 440 + * point to go back just to figure that out. 441 + */ 442 + if (!next_lock) 443 + goto out_put_task; 502 444 503 445 if (!detect_deadlock && waiter != top_waiter) 504 446 goto out_put_task; ··· 624 536 { 625 537 struct task_struct *owner = rt_mutex_owner(lock); 626 538 struct rt_mutex_waiter *top_waiter = waiter; 627 - unsigned long flags; 539 + struct rt_mutex *next_lock; 628 540 int chain_walk = 0, res; 541 + unsigned long flags; 629 542 630 543 /* 631 544 * Early deadlock detection. We really don't want the task to ··· 637 548 * which is wrong, as the other waiter is not in a deadlock 638 549 * situation. 639 550 */ 640 - if (detect_deadlock && owner == task) 551 + if (owner == task) 641 552 return -EDEADLK; 642 553 643 554 raw_spin_lock_irqsave(&task->pi_lock, flags); ··· 658 569 if (!owner) 659 570 return 0; 660 571 572 + raw_spin_lock_irqsave(&owner->pi_lock, flags); 661 573 if (waiter == rt_mutex_top_waiter(lock)) { 662 - raw_spin_lock_irqsave(&owner->pi_lock, flags); 663 574 rt_mutex_dequeue_pi(owner, top_waiter); 664 575 rt_mutex_enqueue_pi(owner, waiter); 665 576 666 577 __rt_mutex_adjust_prio(owner); 667 578 if (owner->pi_blocked_on) 668 579 chain_walk = 1; 669 - raw_spin_unlock_irqrestore(&owner->pi_lock, flags); 670 - } 671 - else if (debug_rt_mutex_detect_deadlock(waiter, detect_deadlock)) 580 + } else if (debug_rt_mutex_detect_deadlock(waiter, detect_deadlock)) { 672 581 chain_walk = 1; 582 + } 673 583 674 - if (!chain_walk) 584 + /* Store the lock on which owner is blocked or NULL */ 585 + next_lock = task_blocked_on_lock(owner); 586 + 587 + raw_spin_unlock_irqrestore(&owner->pi_lock, flags); 588 + /* 589 + * Even if full deadlock detection is on, if the owner is not 590 + * blocked itself, we can avoid finding this out in the chain 591 + * walk. 592 + */ 593 + if (!chain_walk || !next_lock) 675 594 return 0; 676 595 677 596 /* ··· 691 594 692 595 raw_spin_unlock(&lock->wait_lock); 693 596 694 - res = rt_mutex_adjust_prio_chain(owner, detect_deadlock, lock, waiter, 695 - task); 597 + res = rt_mutex_adjust_prio_chain(owner, detect_deadlock, lock, 598 + next_lock, waiter, task); 696 599 697 600 raw_spin_lock(&lock->wait_lock); 698 601 ··· 702 605 /* 703 606 * Wake up the next waiter on the lock. 704 607 * 705 - * Remove the top waiter from the current tasks waiter list and wake it up. 608 + * Remove the top waiter from the current tasks pi waiter list and 609 + * wake it up. 706 610 * 707 611 * Called with lock->wait_lock held. 708 612 */ ··· 724 626 */ 725 627 rt_mutex_dequeue_pi(current, waiter); 726 628 727 - rt_mutex_set_owner(lock, NULL); 629 + /* 630 + * As we are waking up the top waiter, and the waiter stays 631 + * queued on the lock until it gets the lock, this lock 632 + * obviously has waiters. Just set the bit here and this has 633 + * the added benefit of forcing all new tasks into the 634 + * slow path making sure no task of lower priority than 635 + * the top waiter can steal this lock. 636 + */ 637 + lock->owner = (void *) RT_MUTEX_HAS_WAITERS; 728 638 729 639 raw_spin_unlock_irqrestore(&current->pi_lock, flags); 730 640 641 + /* 642 + * It's safe to dereference waiter as it cannot go away as 643 + * long as we hold lock->wait_lock. The waiter task needs to 644 + * acquire it in order to dequeue the waiter. 645 + */ 731 646 wake_up_process(waiter->task); 732 647 } 733 648 ··· 755 644 { 756 645 int first = (waiter == rt_mutex_top_waiter(lock)); 757 646 struct task_struct *owner = rt_mutex_owner(lock); 647 + struct rt_mutex *next_lock = NULL; 758 648 unsigned long flags; 759 - int chain_walk = 0; 760 649 761 650 raw_spin_lock_irqsave(&current->pi_lock, flags); 762 651 rt_mutex_dequeue(lock, waiter); ··· 780 669 } 781 670 __rt_mutex_adjust_prio(owner); 782 671 783 - if (owner->pi_blocked_on) 784 - chain_walk = 1; 672 + /* Store the lock on which owner is blocked or NULL */ 673 + next_lock = task_blocked_on_lock(owner); 785 674 786 675 raw_spin_unlock_irqrestore(&owner->pi_lock, flags); 787 676 } 788 677 789 - if (!chain_walk) 678 + if (!next_lock) 790 679 return; 791 680 792 681 /* gets dropped in rt_mutex_adjust_prio_chain()! */ ··· 794 683 795 684 raw_spin_unlock(&lock->wait_lock); 796 685 797 - rt_mutex_adjust_prio_chain(owner, 0, lock, NULL, current); 686 + rt_mutex_adjust_prio_chain(owner, 0, lock, next_lock, NULL, current); 798 687 799 688 raw_spin_lock(&lock->wait_lock); 800 689 } ··· 807 696 void rt_mutex_adjust_pi(struct task_struct *task) 808 697 { 809 698 struct rt_mutex_waiter *waiter; 699 + struct rt_mutex *next_lock; 810 700 unsigned long flags; 811 701 812 702 raw_spin_lock_irqsave(&task->pi_lock, flags); ··· 818 706 raw_spin_unlock_irqrestore(&task->pi_lock, flags); 819 707 return; 820 708 } 821 - 709 + next_lock = waiter->lock; 822 710 raw_spin_unlock_irqrestore(&task->pi_lock, flags); 823 711 824 712 /* gets dropped in rt_mutex_adjust_prio_chain()! */ 825 713 get_task_struct(task); 826 - rt_mutex_adjust_prio_chain(task, 0, NULL, NULL, task); 714 + 715 + rt_mutex_adjust_prio_chain(task, 0, NULL, next_lock, NULL, task); 827 716 } 828 717 829 718 /** ··· 876 763 return ret; 877 764 } 878 765 766 + static void rt_mutex_handle_deadlock(int res, int detect_deadlock, 767 + struct rt_mutex_waiter *w) 768 + { 769 + /* 770 + * If the result is not -EDEADLOCK or the caller requested 771 + * deadlock detection, nothing to do here. 772 + */ 773 + if (res != -EDEADLOCK || detect_deadlock) 774 + return; 775 + 776 + /* 777 + * Yell lowdly and stop the task right here. 778 + */ 779 + rt_mutex_print_deadlock(w); 780 + while (1) { 781 + set_current_state(TASK_INTERRUPTIBLE); 782 + schedule(); 783 + } 784 + } 785 + 879 786 /* 880 787 * Slow path lock function: 881 788 */ ··· 935 802 936 803 set_current_state(TASK_RUNNING); 937 804 938 - if (unlikely(ret)) 805 + if (unlikely(ret)) { 939 806 remove_waiter(lock, &waiter); 807 + rt_mutex_handle_deadlock(ret, detect_deadlock, &waiter); 808 + } 940 809 941 810 /* 942 811 * try_to_take_rt_mutex() sets the waiter bit ··· 994 859 995 860 rt_mutex_deadlock_account_unlock(current); 996 861 997 - if (!rt_mutex_has_waiters(lock)) { 998 - lock->owner = NULL; 999 - raw_spin_unlock(&lock->wait_lock); 1000 - return; 862 + /* 863 + * We must be careful here if the fast path is enabled. If we 864 + * have no waiters queued we cannot set owner to NULL here 865 + * because of: 866 + * 867 + * foo->lock->owner = NULL; 868 + * rtmutex_lock(foo->lock); <- fast path 869 + * free = atomic_dec_and_test(foo->refcnt); 870 + * rtmutex_unlock(foo->lock); <- fast path 871 + * if (free) 872 + * kfree(foo); 873 + * raw_spin_unlock(foo->lock->wait_lock); 874 + * 875 + * So for the fastpath enabled kernel: 876 + * 877 + * Nothing can set the waiters bit as long as we hold 878 + * lock->wait_lock. So we do the following sequence: 879 + * 880 + * owner = rt_mutex_owner(lock); 881 + * clear_rt_mutex_waiters(lock); 882 + * raw_spin_unlock(&lock->wait_lock); 883 + * if (cmpxchg(&lock->owner, owner, 0) == owner) 884 + * return; 885 + * goto retry; 886 + * 887 + * The fastpath disabled variant is simple as all access to 888 + * lock->owner is serialized by lock->wait_lock: 889 + * 890 + * lock->owner = NULL; 891 + * raw_spin_unlock(&lock->wait_lock); 892 + */ 893 + while (!rt_mutex_has_waiters(lock)) { 894 + /* Drops lock->wait_lock ! */ 895 + if (unlock_rt_mutex_safe(lock) == true) 896 + return; 897 + /* Relock the rtmutex and try again */ 898 + raw_spin_lock(&lock->wait_lock); 1001 899 } 1002 900 901 + /* 902 + * The wakeup next waiter path does not suffer from the above 903 + * race. See the comments there. 904 + */ 1003 905 wakeup_next_waiter(lock); 1004 906 1005 907 raw_spin_unlock(&lock->wait_lock); ··· 1284 1112 return 1; 1285 1113 } 1286 1114 1287 - ret = task_blocks_on_rt_mutex(lock, waiter, task, detect_deadlock); 1115 + /* We enforce deadlock detection for futexes */ 1116 + ret = task_blocks_on_rt_mutex(lock, waiter, task, 1); 1288 1117 1289 1118 if (ret && !rt_mutex_owner(lock)) { 1290 1119 /*
+5
kernel/locking/rtmutex.h
··· 24 24 #define debug_rt_mutex_print_deadlock(w) do { } while (0) 25 25 #define debug_rt_mutex_detect_deadlock(w,d) (d) 26 26 #define debug_rt_mutex_reset_waiter(w) do { } while (0) 27 + 28 + static inline void rt_mutex_print_deadlock(struct rt_mutex_waiter *w) 29 + { 30 + WARN(1, "rtmutex deadlock detected\n"); 31 + }