Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux
1
fork

Configure Feed

Select the types of activity you want to include in your feed.

locking/rwsem: Remove the list_head from struct rw_semaphore

Instead of embedding a list_head in struct rw_semaphore, store a pointer
to the first waiter. The list of waiters remains a doubly linked list
so we can efficiently add to the tail of the list, remove from the front
(or middle) of the list.

Some of the list manipulation becomes more complicated, but it's a
reasonable tradeoff on the slow paths to shrink some core data structures
like struct inode.

Signed-off-by: Matthew Wilcox (Oracle) <willy@infradead.org>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Link: https://patch.msgid.link/20260305195545.3707590-2-willy@infradead.org

authored by

Matthew Wilcox (Oracle) and committed by
Peter Zijlstra
1ea4b473 b91d5d4b

+62 -36
+4 -4
include/linux/rwsem.h
··· 57 57 struct optimistic_spin_queue osq; /* spinner MCS lock */ 58 58 #endif 59 59 raw_spinlock_t wait_lock; 60 - struct list_head wait_list; 60 + struct rwsem_waiter *first_waiter; 61 61 #ifdef CONFIG_DEBUG_RWSEMS 62 62 void *magic; 63 63 #endif ··· 106 106 .owner = ATOMIC_LONG_INIT(0), \ 107 107 __RWSEM_OPT_INIT(name) \ 108 108 .wait_lock = __RAW_SPIN_LOCK_UNLOCKED(name.wait_lock),\ 109 - .wait_list = LIST_HEAD_INIT((name).wait_list), \ 109 + .first_waiter = NULL, \ 110 110 __RWSEM_DEBUG_INIT(name) \ 111 111 __RWSEM_DEP_MAP_INIT(name) } 112 112 ··· 129 129 * rwsem to see if somebody from an incompatible type is wanting access to the 130 130 * lock. 131 131 */ 132 - static inline int rwsem_is_contended(struct rw_semaphore *sem) 132 + static inline bool rwsem_is_contended(struct rw_semaphore *sem) 133 133 { 134 - return !list_empty(&sem->wait_list); 134 + return sem->first_waiter != NULL; 135 135 } 136 136 137 137 #if defined(CONFIG_DEBUG_RWSEMS) || defined(CONFIG_DETECT_HUNG_TASK_BLOCKER)
+58 -32
kernel/locking/rwsem.c
··· 72 72 #c, atomic_long_read(&(sem)->count), \ 73 73 (unsigned long) sem->magic, \ 74 74 atomic_long_read(&(sem)->owner), (long)current, \ 75 - list_empty(&(sem)->wait_list) ? "" : "not ")) \ 75 + (sem)->first_waiter ? "" : "not ")) \ 76 76 debug_locks_off(); \ 77 77 } while (0) 78 78 #else ··· 321 321 #endif 322 322 atomic_long_set(&sem->count, RWSEM_UNLOCKED_VALUE); 323 323 raw_spin_lock_init(&sem->wait_lock); 324 - INIT_LIST_HEAD(&sem->wait_list); 324 + sem->first_waiter = NULL; 325 325 atomic_long_set(&sem->owner, 0L); 326 326 #ifdef CONFIG_RWSEM_SPIN_ON_OWNER 327 327 osq_lock_init(&sem->osq); ··· 341 341 unsigned long timeout; 342 342 bool handoff_set; 343 343 }; 344 - #define rwsem_first_waiter(sem) \ 345 - list_first_entry(&sem->wait_list, struct rwsem_waiter, list) 346 344 347 345 enum rwsem_wake_type { 348 346 RWSEM_WAKE_ANY, /* Wake whatever's at head of wait list */ ··· 363 365 */ 364 366 #define MAX_READERS_WAKEUP 0x100 365 367 366 - static inline void 367 - rwsem_add_waiter(struct rw_semaphore *sem, struct rwsem_waiter *waiter) 368 + static inline 369 + bool __rwsem_del_waiter(struct rw_semaphore *sem, struct rwsem_waiter *waiter) 368 370 { 369 - lockdep_assert_held(&sem->wait_lock); 370 - list_add_tail(&waiter->list, &sem->wait_list); 371 - /* caller will set RWSEM_FLAG_WAITERS */ 371 + if (list_empty(&waiter->list)) { 372 + sem->first_waiter = NULL; 373 + return true; 374 + } 375 + 376 + if (sem->first_waiter == waiter) { 377 + sem->first_waiter = list_first_entry(&waiter->list, 378 + struct rwsem_waiter, list); 379 + } 380 + list_del(&waiter->list); 381 + 382 + return false; 372 383 } 373 384 374 385 /* ··· 392 385 rwsem_del_waiter(struct rw_semaphore *sem, struct rwsem_waiter *waiter) 393 386 { 394 387 lockdep_assert_held(&sem->wait_lock); 395 - list_del(&waiter->list); 396 - if (likely(!list_empty(&sem->wait_list))) 388 + if (__rwsem_del_waiter(sem, waiter)) 397 389 return true; 398 - 399 390 atomic_long_andnot(RWSEM_FLAG_HANDOFF | RWSEM_FLAG_WAITERS, &sem->count); 400 391 return false; 392 + } 393 + 394 + static inline 395 + struct rwsem_waiter *next_waiter(const struct rw_semaphore *sem, 396 + const struct rwsem_waiter *waiter) 397 + { 398 + struct rwsem_waiter *next = list_first_entry(&waiter->list, 399 + struct rwsem_waiter, list); 400 + if (next == sem->first_waiter) 401 + return NULL; 402 + return next; 401 403 } 402 404 403 405 /* ··· 427 411 enum rwsem_wake_type wake_type, 428 412 struct wake_q_head *wake_q) 429 413 { 430 - struct rwsem_waiter *waiter, *tmp; 414 + struct rwsem_waiter *waiter, *next; 431 415 long oldcount, woken = 0, adjustment = 0; 432 416 struct list_head wlist; 433 417 ··· 437 421 * Take a peek at the queue head waiter such that we can determine 438 422 * the wakeup(s) to perform. 439 423 */ 440 - waiter = rwsem_first_waiter(sem); 424 + waiter = sem->first_waiter; 441 425 442 426 if (waiter->type == RWSEM_WAITING_FOR_WRITE) { 443 427 if (wake_type == RWSEM_WAKE_ANY) { ··· 522 506 * put them into wake_q to be woken up later. 523 507 */ 524 508 INIT_LIST_HEAD(&wlist); 525 - list_for_each_entry_safe(waiter, tmp, &sem->wait_list, list) { 509 + do { 510 + next = next_waiter(sem, waiter); 526 511 if (waiter->type == RWSEM_WAITING_FOR_WRITE) 527 512 continue; 528 513 529 514 woken++; 530 515 list_move_tail(&waiter->list, &wlist); 516 + if (sem->first_waiter == waiter) 517 + sem->first_waiter = next; 531 518 532 519 /* 533 520 * Limit # of readers that can be woken up per wakeup call. 534 521 */ 535 522 if (unlikely(woken >= MAX_READERS_WAKEUP)) 536 523 break; 537 - } 524 + } while ((waiter = next) != NULL); 538 525 539 526 adjustment = woken * RWSEM_READER_BIAS - adjustment; 540 527 lockevent_cond_inc(rwsem_wake_reader, woken); 541 528 542 529 oldcount = atomic_long_read(&sem->count); 543 - if (list_empty(&sem->wait_list)) { 530 + if (!sem->first_waiter) { 544 531 /* 545 532 * Combined with list_move_tail() above, this implies 546 533 * rwsem_del_waiter(). ··· 564 545 atomic_long_add(adjustment, &sem->count); 565 546 566 547 /* 2nd pass */ 567 - list_for_each_entry_safe(waiter, tmp, &wlist, list) { 548 + list_for_each_entry_safe(waiter, next, &wlist, list) { 568 549 struct task_struct *tsk; 569 550 570 551 tsk = waiter->task; ··· 596 577 struct wake_q_head *wake_q) 597 578 __releases(&sem->wait_lock) 598 579 { 599 - bool first = rwsem_first_waiter(sem) == waiter; 580 + bool first = sem->first_waiter == waiter; 600 581 601 582 wake_q_init(wake_q); 602 583 ··· 622 603 static inline bool rwsem_try_write_lock(struct rw_semaphore *sem, 623 604 struct rwsem_waiter *waiter) 624 605 { 625 - struct rwsem_waiter *first = rwsem_first_waiter(sem); 606 + struct rwsem_waiter *first = sem->first_waiter; 626 607 long count, new; 627 608 628 609 lockdep_assert_held(&sem->wait_lock); ··· 658 639 new |= RWSEM_WRITER_LOCKED; 659 640 new &= ~RWSEM_FLAG_HANDOFF; 660 641 661 - if (list_is_singular(&sem->wait_list)) 642 + if (list_empty(&first->list)) 662 643 new &= ~RWSEM_FLAG_WAITERS; 663 644 } 664 645 } while (!atomic_long_try_cmpxchg_acquire(&sem->count, &count, new)); ··· 678 659 * Have rwsem_try_write_lock() fully imply rwsem_del_waiter() on 679 660 * success. 680 661 */ 681 - list_del(&waiter->list); 662 + __rwsem_del_waiter(sem, waiter); 663 + 682 664 rwsem_set_owner(sem); 683 665 return true; 684 666 } ··· 1014 994 { 1015 995 long adjustment = -RWSEM_READER_BIAS; 1016 996 long rcnt = (count >> RWSEM_READER_SHIFT); 1017 - struct rwsem_waiter waiter; 997 + struct rwsem_waiter waiter, *first; 1018 998 DEFINE_WAKE_Q(wake_q); 1019 999 1020 1000 /* ··· 1039 1019 */ 1040 1020 if ((rcnt == 1) && (count & RWSEM_FLAG_WAITERS)) { 1041 1021 raw_spin_lock_irq(&sem->wait_lock); 1042 - if (!list_empty(&sem->wait_list)) 1022 + if (sem->first_waiter) 1043 1023 rwsem_mark_wake(sem, RWSEM_WAKE_READ_OWNED, 1044 1024 &wake_q); 1045 1025 raw_spin_unlock_irq(&sem->wait_lock); ··· 1055 1035 waiter.handoff_set = false; 1056 1036 1057 1037 raw_spin_lock_irq(&sem->wait_lock); 1058 - if (list_empty(&sem->wait_list)) { 1038 + first = sem->first_waiter; 1039 + if (!first) { 1059 1040 /* 1060 1041 * In case the wait queue is empty and the lock isn't owned 1061 1042 * by a writer, this reader can exit the slowpath and return ··· 1072 1051 return sem; 1073 1052 } 1074 1053 adjustment += RWSEM_FLAG_WAITERS; 1054 + INIT_LIST_HEAD(&waiter.list); 1055 + sem->first_waiter = &waiter; 1056 + } else { 1057 + list_add_tail(&waiter.list, &first->list); 1075 1058 } 1076 - rwsem_add_waiter(sem, &waiter); 1077 1059 1078 1060 /* we're now waiting on the lock, but no longer actively locking */ 1079 1061 count = atomic_long_add_return(adjustment, &sem->count); ··· 1134 1110 static struct rw_semaphore __sched * 1135 1111 rwsem_down_write_slowpath(struct rw_semaphore *sem, int state) 1136 1112 { 1137 - struct rwsem_waiter waiter; 1113 + struct rwsem_waiter waiter, *first; 1138 1114 DEFINE_WAKE_Q(wake_q); 1139 1115 1140 1116 /* do optimistic spinning and steal lock if possible */ ··· 1153 1129 waiter.handoff_set = false; 1154 1130 1155 1131 raw_spin_lock_irq(&sem->wait_lock); 1156 - rwsem_add_waiter(sem, &waiter); 1157 1132 1158 - /* we're now waiting on the lock */ 1159 - if (rwsem_first_waiter(sem) != &waiter) { 1133 + first = sem->first_waiter; 1134 + if (first) { 1135 + list_add_tail(&waiter.list, &first->list); 1160 1136 rwsem_cond_wake_waiter(sem, atomic_long_read(&sem->count), 1161 1137 &wake_q); 1162 1138 if (!wake_q_empty(&wake_q)) { ··· 1169 1145 raw_spin_lock_irq(&sem->wait_lock); 1170 1146 } 1171 1147 } else { 1148 + INIT_LIST_HEAD(&waiter.list); 1149 + sem->first_waiter = &waiter; 1172 1150 atomic_long_or(RWSEM_FLAG_WAITERS, &sem->count); 1173 1151 } 1174 1152 ··· 1244 1218 1245 1219 raw_spin_lock_irqsave(&sem->wait_lock, flags); 1246 1220 1247 - if (!list_empty(&sem->wait_list)) 1221 + if (sem->first_waiter) 1248 1222 rwsem_mark_wake(sem, RWSEM_WAKE_ANY, &wake_q); 1249 1223 1250 1224 raw_spin_unlock_irqrestore(&sem->wait_lock, flags); ··· 1265 1239 1266 1240 raw_spin_lock_irqsave(&sem->wait_lock, flags); 1267 1241 1268 - if (!list_empty(&sem->wait_list)) 1242 + if (sem->first_waiter) 1269 1243 rwsem_mark_wake(sem, RWSEM_WAKE_READ_OWNED, &wake_q); 1270 1244 1271 1245 raw_spin_unlock_irqrestore(&sem->wait_lock, flags);