Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux
1
fork

Configure Feed

Select the types of activity you want to include in your feed.

locking: Add task::blocked_lock to serialize blocked_on state

So far, we have been able to utilize the mutex::wait_lock
for serializing the blocked_on state, but when we move to
proxying across runqueues, we will need to add more state
and a way to serialize changes to this state in contexts
where we don't hold the mutex::wait_lock.

So introduce the task::blocked_lock, which nests under the
mutex::wait_lock in the locking order, and rework the locking
to use it.

Signed-off-by: John Stultz <jstultz@google.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Reviewed-by: K Prateek Nayak <kprateek.nayak@amd.com>
Link: https://patch.msgid.link/20260324191337.1841376-5-jstultz@google.com

authored by

John Stultz and committed by
Peter Zijlstra
fa4a1ff8 f4fe6be8

+58 -50
+17 -31
include/linux/sched.h
··· 1238 1238 #endif 1239 1239 1240 1240 struct mutex *blocked_on; /* lock we're blocked on */ 1241 + raw_spinlock_t blocked_lock; 1241 1242 1242 1243 #ifdef CONFIG_DETECT_HUNG_TASK_BLOCKER 1243 1244 /* ··· 2182 2181 #ifndef CONFIG_PREEMPT_RT 2183 2182 static inline struct mutex *__get_task_blocked_on(struct task_struct *p) 2184 2183 { 2185 - struct mutex *m = p->blocked_on; 2186 - 2187 - if (m) 2188 - lockdep_assert_held_once(&m->wait_lock); 2189 - return m; 2184 + lockdep_assert_held_once(&p->blocked_lock); 2185 + return p->blocked_on; 2190 2186 } 2191 2187 2192 2188 static inline void __set_task_blocked_on(struct task_struct *p, struct mutex *m) 2193 2189 { 2194 - struct mutex *blocked_on = READ_ONCE(p->blocked_on); 2195 - 2196 2190 WARN_ON_ONCE(!m); 2197 2191 /* The task should only be setting itself as blocked */ 2198 2192 WARN_ON_ONCE(p != current); 2199 - /* Currently we serialize blocked_on under the mutex::wait_lock */ 2200 - lockdep_assert_held_once(&m->wait_lock); 2193 + /* Currently we serialize blocked_on under the task::blocked_lock */ 2194 + lockdep_assert_held_once(&p->blocked_lock); 2201 2195 /* 2202 2196 * Check ensure we don't overwrite existing mutex value 2203 2197 * with a different mutex. Note, setting it to the same 2204 2198 * lock repeatedly is ok. 2205 2199 */ 2206 - WARN_ON_ONCE(blocked_on && blocked_on != m); 2207 - WRITE_ONCE(p->blocked_on, m); 2208 - } 2209 - 2210 - static inline void set_task_blocked_on(struct task_struct *p, struct mutex *m) 2211 - { 2212 - guard(raw_spinlock_irqsave)(&m->wait_lock); 2213 - __set_task_blocked_on(p, m); 2200 + WARN_ON_ONCE(p->blocked_on && p->blocked_on != m); 2201 + p->blocked_on = m; 2214 2202 } 2215 2203 2216 2204 static inline void __clear_task_blocked_on(struct task_struct *p, struct mutex *m) 2217 2205 { 2218 - if (m) { 2219 - struct mutex *blocked_on = READ_ONCE(p->blocked_on); 2220 - 2221 - /* Currently we serialize blocked_on under the mutex::wait_lock */ 2222 - lockdep_assert_held_once(&m->wait_lock); 2223 - /* 2224 - * There may be cases where we re-clear already cleared 2225 - * blocked_on relationships, but make sure we are not 2226 - * clearing the relationship with a different lock. 2227 - */ 2228 - WARN_ON_ONCE(blocked_on && blocked_on != m); 2229 - } 2230 - WRITE_ONCE(p->blocked_on, NULL); 2206 + /* Currently we serialize blocked_on under the task::blocked_lock */ 2207 + lockdep_assert_held_once(&p->blocked_lock); 2208 + /* 2209 + * There may be cases where we re-clear already cleared 2210 + * blocked_on relationships, but make sure we are not 2211 + * clearing the relationship with a different lock. 2212 + */ 2213 + WARN_ON_ONCE(m && p->blocked_on && p->blocked_on != m); 2214 + p->blocked_on = NULL; 2231 2215 } 2232 2216 2233 2217 static inline void clear_task_blocked_on(struct task_struct *p, struct mutex *m) 2234 2218 { 2235 - guard(raw_spinlock_irqsave)(&m->wait_lock); 2219 + guard(raw_spinlock_irqsave)(&p->blocked_lock); 2236 2220 __clear_task_blocked_on(p, m); 2237 2221 } 2238 2222 #else
+1
init/init_task.c
··· 169 169 .journal_info = NULL, 170 170 INIT_CPU_TIMERS(init_task) 171 171 .pi_lock = __RAW_SPIN_LOCK_UNLOCKED(init_task.pi_lock), 172 + .blocked_lock = __RAW_SPIN_LOCK_UNLOCKED(init_task.blocked_lock), 172 173 .timer_slack_ns = 50000, /* 50 usec default slack */ 173 174 .thread_pid = &init_struct_pid, 174 175 .thread_node = LIST_HEAD_INIT(init_signals.thread_head),
+1
kernel/fork.c
··· 2076 2076 ftrace_graph_init_task(p); 2077 2077 2078 2078 rt_mutex_init_task(p); 2079 + raw_spin_lock_init(&p->blocked_lock); 2079 2080 2080 2081 lockdep_assert_irqs_enabled(); 2081 2082 #ifdef CONFIG_PROVE_LOCKING
+2 -2
kernel/locking/mutex-debug.c
··· 54 54 lockdep_assert_held(&lock->wait_lock); 55 55 56 56 /* Current thread can't be already blocked (since it's executing!) */ 57 - DEBUG_LOCKS_WARN_ON(__get_task_blocked_on(task)); 57 + DEBUG_LOCKS_WARN_ON(get_task_blocked_on(task)); 58 58 } 59 59 60 60 void debug_mutex_remove_waiter(struct mutex *lock, struct mutex_waiter *waiter, 61 61 struct task_struct *task) 62 62 { 63 - struct mutex *blocked_on = __get_task_blocked_on(task); 63 + struct mutex *blocked_on = get_task_blocked_on(task); 64 64 65 65 DEBUG_LOCKS_WARN_ON(list_empty(&waiter->list)); 66 66 DEBUG_LOCKS_WARN_ON(waiter->task != task);
+26 -14
kernel/locking/mutex.c
··· 656 656 goto err_early_kill; 657 657 } 658 658 659 + raw_spin_lock(&current->blocked_lock); 659 660 __set_task_blocked_on(current, lock); 660 661 set_current_state(state); 661 662 trace_contention_begin(lock, LCB_F_MUTEX); ··· 670 669 * the handoff. 671 670 */ 672 671 if (__mutex_trylock(lock)) 673 - goto acquired; 672 + break; 674 673 674 + raw_spin_unlock(&current->blocked_lock); 675 675 /* 676 676 * Check for signals and kill conditions while holding 677 677 * wait_lock. This ensures the lock cancellation is ordered ··· 695 693 696 694 first = __mutex_waiter_is_first(lock, &waiter); 697 695 696 + raw_spin_lock_irqsave(&lock->wait_lock, flags); 697 + raw_spin_lock(&current->blocked_lock); 698 698 /* 699 699 * As we likely have been woken up by task 700 700 * that has cleared our blocked_on state, re-set 701 701 * it to the lock we are trying to acquire. 702 702 */ 703 - set_task_blocked_on(current, lock); 703 + __set_task_blocked_on(current, lock); 704 704 set_current_state(state); 705 705 /* 706 706 * Here we order against unlock; we must either see it change ··· 713 709 break; 714 710 715 711 if (first) { 716 - trace_contention_begin(lock, LCB_F_MUTEX | LCB_F_SPIN); 712 + bool opt_acquired; 713 + 717 714 /* 718 715 * mutex_optimistic_spin() can call schedule(), so 719 - * clear blocked on so we don't become unselectable 716 + * we need to release these locks before calling it, 717 + * and clear blocked on so we don't become unselectable 720 718 * to run. 721 719 */ 722 - clear_task_blocked_on(current, lock); 723 - if (mutex_optimistic_spin(lock, ww_ctx, &waiter)) 720 + __clear_task_blocked_on(current, lock); 721 + raw_spin_unlock(&current->blocked_lock); 722 + raw_spin_unlock_irqrestore(&lock->wait_lock, flags); 723 + 724 + trace_contention_begin(lock, LCB_F_MUTEX | LCB_F_SPIN); 725 + opt_acquired = mutex_optimistic_spin(lock, ww_ctx, &waiter); 726 + 727 + raw_spin_lock_irqsave(&lock->wait_lock, flags); 728 + raw_spin_lock(&current->blocked_lock); 729 + __set_task_blocked_on(current, lock); 730 + 731 + if (opt_acquired) 724 732 break; 725 - set_task_blocked_on(current, lock); 726 733 trace_contention_begin(lock, LCB_F_MUTEX); 727 734 } 728 - 729 - raw_spin_lock_irqsave(&lock->wait_lock, flags); 730 735 } 731 - raw_spin_lock_irqsave(&lock->wait_lock, flags); 732 - acquired: 733 736 __clear_task_blocked_on(current, lock); 734 737 __set_current_state(TASK_RUNNING); 738 + raw_spin_unlock(&current->blocked_lock); 735 739 736 740 if (ww_ctx) { 737 741 /* ··· 768 756 return 0; 769 757 770 758 err: 771 - __clear_task_blocked_on(current, lock); 759 + clear_task_blocked_on(current, lock); 772 760 __set_current_state(TASK_RUNNING); 773 761 __mutex_remove_waiter(lock, &waiter); 774 762 err_early_kill: 775 - WARN_ON(__get_task_blocked_on(current)); 763 + WARN_ON(get_task_blocked_on(current)); 776 764 trace_contention_end(lock, ret); 777 765 raw_spin_unlock_irqrestore_wake(&lock->wait_lock, flags, &wake_q); 778 766 debug_mutex_free_waiter(&waiter); ··· 983 971 next = waiter->task; 984 972 985 973 debug_mutex_wake_waiter(lock, waiter); 986 - __clear_task_blocked_on(next, lock); 974 + clear_task_blocked_on(next, lock); 987 975 wake_q_add(&wake_q, next); 988 976 } 989 977
+6
kernel/locking/mutex.h
··· 47 47 return (struct task_struct *)(atomic_long_read(&lock->owner) & ~MUTEX_FLAGS); 48 48 } 49 49 50 + static inline struct mutex *get_task_blocked_on(struct task_struct *p) 51 + { 52 + guard(raw_spinlock_irqsave)(&p->blocked_lock); 53 + return __get_task_blocked_on(p); 54 + } 55 + 50 56 #ifdef CONFIG_DEBUG_MUTEXES 51 57 extern void debug_mutex_lock_common(struct mutex *lock, 52 58 struct mutex_waiter *waiter);
+2 -2
kernel/locking/ww_mutex.h
··· 289 289 * blocked_on pointer. Otherwise we can see circular 290 290 * blocked_on relationships that can't resolve. 291 291 */ 292 - __clear_task_blocked_on(waiter->task, lock); 292 + clear_task_blocked_on(waiter->task, lock); 293 293 wake_q_add(wake_q, waiter->task); 294 294 } 295 295 ··· 347 347 * are waking the mutex owner, who may be currently 348 348 * blocked on a different mutex. 349 349 */ 350 - __clear_task_blocked_on(owner, NULL); 350 + clear_task_blocked_on(owner, NULL); 351 351 wake_q_add(wake_q, owner); 352 352 } 353 353 return true;
+3 -1
kernel/sched/core.c
··· 6584 6584 * p->pi_lock 6585 6585 * rq->lock 6586 6586 * mutex->wait_lock 6587 + * p->blocked_lock 6587 6588 * 6588 6589 * Returns the task that is going to be used as execution context (the one 6589 6590 * that is actually going to be run on cpu_of(rq)). ··· 6604 6603 * and ensure @owner sticks around. 6605 6604 */ 6606 6605 guard(raw_spinlock)(&mutex->wait_lock); 6606 + guard(raw_spinlock)(&p->blocked_lock); 6607 6607 6608 - /* Check again that p is blocked with wait_lock held */ 6608 + /* Check again that p is blocked with blocked_lock held */ 6609 6609 if (mutex != __get_task_blocked_on(p)) { 6610 6610 /* 6611 6611 * Something changed in the blocked_on chain and