Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux
1
fork

Configure Feed

Select the types of activity you want to include in your feed.

futex: revert back to the explicit waiter counting code

Srikar Dronamraju reports that commit b0c29f79ecea ("futexes: Avoid
taking the hb->lock if there's nothing to wake up") causes java threads
getting stuck on futexes when runing specjbb on a power7 numa box.

The cause appears to be that the powerpc spinlocks aren't using the same
ticket lock model that we use on x86 (and other) architectures, which in
turn result in the "spin_is_locked()" test in hb_waiters_pending()
occasionally reporting an unlocked spinlock even when there are pending
waiters.

So this reinstates Davidlohr Bueso's original explicit waiter counting
code, which I had convinced Davidlohr to drop in favor of figuring out
the pending waiters by just using the existing state of the spinlock and
the wait queue.

Reported-and-tested-by: Srikar Dronamraju <srikar@linux.vnet.ibm.com>
Original-code-by: Davidlohr Bueso <davidlohr@hp.com>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

+43 -10
+43 -10
kernel/futex.c
··· 234 234 * waiting on a futex. 235 235 */ 236 236 struct futex_hash_bucket { 237 + atomic_t waiters; 237 238 spinlock_t lock; 238 239 struct plist_head chain; 239 240 } ____cacheline_aligned_in_smp; ··· 254 253 smp_mb__after_atomic_inc(); 255 254 } 256 255 257 - static inline bool hb_waiters_pending(struct futex_hash_bucket *hb) 256 + /* 257 + * Reflects a new waiter being added to the waitqueue. 258 + */ 259 + static inline void hb_waiters_inc(struct futex_hash_bucket *hb) 258 260 { 259 261 #ifdef CONFIG_SMP 262 + atomic_inc(&hb->waiters); 260 263 /* 261 - * Tasks trying to enter the critical region are most likely 262 - * potential waiters that will be added to the plist. Ensure 263 - * that wakers won't miss to-be-slept tasks in the window between 264 - * the wait call and the actual plist_add. 264 + * Full barrier (A), see the ordering comment above. 265 265 */ 266 - if (spin_is_locked(&hb->lock)) 267 - return true; 268 - smp_rmb(); /* Make sure we check the lock state first */ 266 + smp_mb__after_atomic_inc(); 267 + #endif 268 + } 269 269 270 - return !plist_head_empty(&hb->chain); 270 + /* 271 + * Reflects a waiter being removed from the waitqueue by wakeup 272 + * paths. 273 + */ 274 + static inline void hb_waiters_dec(struct futex_hash_bucket *hb) 275 + { 276 + #ifdef CONFIG_SMP 277 + atomic_dec(&hb->waiters); 278 + #endif 279 + } 280 + 281 + static inline int hb_waiters_pending(struct futex_hash_bucket *hb) 282 + { 283 + #ifdef CONFIG_SMP 284 + return atomic_read(&hb->waiters); 271 285 #else 272 - return true; 286 + return 1; 273 287 #endif 274 288 } 275 289 ··· 970 954 971 955 hb = container_of(q->lock_ptr, struct futex_hash_bucket, lock); 972 956 plist_del(&q->list, &hb->chain); 957 + hb_waiters_dec(hb); 973 958 } 974 959 975 960 /* ··· 1274 1257 */ 1275 1258 if (likely(&hb1->chain != &hb2->chain)) { 1276 1259 plist_del(&q->list, &hb1->chain); 1260 + hb_waiters_dec(hb1); 1277 1261 plist_add(&q->list, &hb2->chain); 1262 + hb_waiters_inc(hb2); 1278 1263 q->lock_ptr = &hb2->lock; 1279 1264 } 1280 1265 get_futex_key_refs(key2); ··· 1619 1600 struct futex_hash_bucket *hb; 1620 1601 1621 1602 hb = hash_futex(&q->key); 1603 + 1604 + /* 1605 + * Increment the counter before taking the lock so that 1606 + * a potential waker won't miss a to-be-slept task that is 1607 + * waiting for the spinlock. This is safe as all queue_lock() 1608 + * users end up calling queue_me(). Similarly, for housekeeping, 1609 + * decrement the counter at queue_unlock() when some error has 1610 + * occurred and we don't end up adding the task to the list. 1611 + */ 1612 + hb_waiters_inc(hb); 1613 + 1622 1614 q->lock_ptr = &hb->lock; 1623 1615 1624 1616 spin_lock(&hb->lock); /* implies MB (A) */ ··· 1641 1611 __releases(&hb->lock) 1642 1612 { 1643 1613 spin_unlock(&hb->lock); 1614 + hb_waiters_dec(hb); 1644 1615 } 1645 1616 1646 1617 /** ··· 2373 2342 * Unqueue the futex_q and determine which it was. 2374 2343 */ 2375 2344 plist_del(&q->list, &hb->chain); 2345 + hb_waiters_dec(hb); 2376 2346 2377 2347 /* Handle spurious wakeups gracefully */ 2378 2348 ret = -EWOULDBLOCK; ··· 2907 2875 futex_cmpxchg_enabled = 1; 2908 2876 2909 2877 for (i = 0; i < futex_hashsize; i++) { 2878 + atomic_set(&futex_queues[i].waiters, 0); 2910 2879 plist_head_init(&futex_queues[i].chain); 2911 2880 spin_lock_init(&futex_queues[i].lock); 2912 2881 }