Merge tag 'locking-core-2024-01-08' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip

+18

Documentation/locking/mutex-design.rst

··· 101 101 - Detects multi-task circular deadlocks and prints out all affected 102 102 locks and tasks (and only those tasks). 103 103 104 + Mutexes - and most other sleeping locks like rwsems - do not provide an 105 + implicit reference for the memory they occupy, which reference is released 106 + with mutex_unlock(). 107 + 108 + [ This is in contrast with spin_unlock() [or completion_done()], which 109 + APIs can be used to guarantee that the memory is not touched by the 110 + lock implementation after spin_unlock()/completion_done() releases 111 + the lock. ] 112 + 113 + mutex_unlock() may access the mutex structure even after it has internally 114 + released the lock already - so it's not safe for another context to 115 + acquire the mutex and assume that the mutex_unlock() context is not using 116 + the structure anymore. 117 + 118 + The mutex user must ensure that the mutex is not destroyed while a 119 + release operation is still in progress - in other words, callers of 120 + mutex_unlock() must ensure that the mutex stays alive until mutex_unlock() 121 + has returned. 104 122 105 123 Interfaces 106 124 ----------

+1 -1

MAINTAINERS

··· 12424 12424 T: git git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip.git locking/core 12425 12425 F: Documentation/locking/ 12426 12426 F: arch/*/include/asm/spinlock*.h 12427 - F: include/linux/lockdep.h 12427 + F: include/linux/lockdep*.h 12428 12428 F: include/linux/mutex*.h 12429 12429 F: include/linux/rwlock*.h 12430 12430 F: include/linux/rwsem*.h

+49 -3

include/linux/cleanup.h

··· 125 125 * trivial wrapper around DEFINE_CLASS() above specifically 126 126 * for locks. 127 127 * 128 + * DEFINE_GUARD_COND(name, ext, condlock) 129 + * wrapper around EXTEND_CLASS above to add conditional lock 130 + * variants to a base class, eg. mutex_trylock() or 131 + * mutex_lock_interruptible(). 132 + * 128 133 * guard(name): 129 - * an anonymous instance of the (guard) class 134 + * an anonymous instance of the (guard) class, not recommended for 135 + * conditional locks. 130 136 * 131 137 * scoped_guard (name, args...) { }: 132 138 * similar to CLASS(name, scope)(args), except the variable (with the 133 139 * explicit name 'scope') is declard in a for-loop such that its scope is 134 140 * bound to the next (compound) statement. 135 141 * 142 + * for conditional locks the loop body is skipped when the lock is not 143 + * acquired. 144 + * 145 + * scoped_cond_guard (name, fail, args...) { }: 146 + * similar to scoped_guard(), except it does fail when the lock 147 + * acquire fails. 148 + * 136 149 */ 137 150 138 151 #define DEFINE_GUARD(_name, _type, _lock, _unlock) \ 139 - DEFINE_CLASS(_name, _type, _unlock, ({ _lock; _T; }), _type _T) 152 + DEFINE_CLASS(_name, _type, if (_T) { _unlock; }, ({ _lock; _T; }), _type _T); \ 153 + static inline void * class_##_name##_lock_ptr(class_##_name##_t *_T) \ 154 + { return *_T; } 155 + 156 + #define DEFINE_GUARD_COND(_name, _ext, _condlock) \ 157 + EXTEND_CLASS(_name, _ext, \ 158 + ({ void *_t = _T; if (_T && !(_condlock)) _t = NULL; _t; }), \ 159 + class_##_name##_t _T) \ 160 + static inline void * class_##_name##_ext##_lock_ptr(class_##_name##_t *_T) \ 161 + { return class_##_name##_lock_ptr(_T); } 140 162 141 163 #define guard(_name) \ 142 164 CLASS(_name, __UNIQUE_ID(guard)) 143 165 166 + #define __guard_ptr(_name) class_##_name##_lock_ptr 167 + 144 168 #define scoped_guard(_name, args...) \ 145 169 for (CLASS(_name, scope)(args), \ 146 - *done = NULL; !done; done = (void *)1) 170 + *done = NULL; __guard_ptr(_name)(&scope) && !done; done = (void *)1) 171 + 172 + #define scoped_cond_guard(_name, _fail, args...) \ 173 + for (CLASS(_name, scope)(args), \ 174 + *done = NULL; !done; done = (void *)1) \ 175 + if (!__guard_ptr(_name)(&scope)) _fail; \ 176 + else 147 177 148 178 /* 149 179 * Additional helper macros for generating lock guards with types, either for ··· 182 152 * 183 153 * DEFINE_LOCK_GUARD_0(name, lock, unlock, ...) 184 154 * DEFINE_LOCK_GUARD_1(name, type, lock, unlock, ...) 155 + * DEFINE_LOCK_GUARD_1_COND(name, ext, condlock) 185 156 * 186 157 * will result in the following type: 187 158 * ··· 204 173 static inline void class_##_name##_destructor(class_##_name##_t *_T) \ 205 174 { \ 206 175 if (_T->lock) { _unlock; } \ 176 + } \ 177 + \ 178 + static inline void *class_##_name##_lock_ptr(class_##_name##_t *_T) \ 179 + { \ 180 + return _T->lock; \ 207 181 } 208 182 209 183 ··· 236 200 #define DEFINE_LOCK_GUARD_0(_name, _lock, _unlock, ...) \ 237 201 __DEFINE_UNLOCK_GUARD(_name, void, _unlock, __VA_ARGS__) \ 238 202 __DEFINE_LOCK_GUARD_0(_name, _lock) 203 + 204 + #define DEFINE_LOCK_GUARD_1_COND(_name, _ext, _condlock) \ 205 + EXTEND_CLASS(_name, _ext, \ 206 + ({ class_##_name##_t _t = { .lock = l }, *_T = &_t;\ 207 + if (_T->lock && !(_condlock)) _T->lock = NULL; \ 208 + _t; }), \ 209 + typeof_member(class_##_name##_t, lock) l) \ 210 + static inline void * class_##_name##_ext##_lock_ptr(class_##_name##_t *_T) \ 211 + { return class_##_name##_lock_ptr(_T); } 212 + 239 213 240 214 #endif /* __LINUX_GUARDS_H */

+1 -1

include/linux/lockdep_types.h

··· 127 127 unsigned long usage_mask; 128 128 const struct lock_trace *usage_traces[LOCK_TRACE_STATES]; 129 129 130 + const char *name; 130 131 /* 131 132 * Generation counter, when doing certain classes of graph walking, 132 133 * to ensure that we check one node only once: 133 134 */ 134 135 int name_version; 135 - const char *name; 136 136 137 137 u8 wait_type_inner; 138 138 u8 wait_type_outer;

+2 -1

include/linux/mutex.h

··· 221 221 extern int atomic_dec_and_mutex_lock(atomic_t *cnt, struct mutex *lock); 222 222 223 223 DEFINE_GUARD(mutex, struct mutex *, mutex_lock(_T), mutex_unlock(_T)) 224 - DEFINE_FREE(mutex, struct mutex *, if (_T) mutex_unlock(_T)) 224 + DEFINE_GUARD_COND(mutex, _try, mutex_trylock(_T)) 225 + DEFINE_GUARD_COND(mutex, _intr, mutex_lock_interruptible(_T) == 0) 225 226 226 227 #endif /* __LINUX_MUTEX_H */

+4 -4

include/linux/rwsem.h

··· 203 203 extern void up_write(struct rw_semaphore *sem); 204 204 205 205 DEFINE_GUARD(rwsem_read, struct rw_semaphore *, down_read(_T), up_read(_T)) 206 + DEFINE_GUARD_COND(rwsem_read, _try, down_read_trylock(_T)) 207 + DEFINE_GUARD_COND(rwsem_read, _intr, down_read_interruptible(_T) == 0) 208 + 206 209 DEFINE_GUARD(rwsem_write, struct rw_semaphore *, down_write(_T), up_write(_T)) 207 - 208 - DEFINE_FREE(up_read, struct rw_semaphore *, if (_T) up_read(_T)) 209 - DEFINE_FREE(up_write, struct rw_semaphore *, if (_T) up_write(_T)) 210 - 210 + DEFINE_GUARD_COND(rwsem_write, _try, down_write_trylock(_T)) 211 211 212 212 /* 213 213 * downgrade write lock to read lock

+2

include/linux/sched/task.h

··· 226 226 spin_unlock(&p->alloc_lock); 227 227 } 228 228 229 + DEFINE_GUARD(task_lock, struct task_struct *, task_lock(_T), task_unlock(_T)) 230 + 229 231 #endif /* _LINUX_SCHED_TASK_H */

+41

include/linux/spinlock.h

··· 507 507 raw_spin_lock(_T->lock), 508 508 raw_spin_unlock(_T->lock)) 509 509 510 + DEFINE_LOCK_GUARD_1_COND(raw_spinlock, _try, raw_spin_trylock(_T->lock)) 511 + 510 512 DEFINE_LOCK_GUARD_1(raw_spinlock_nested, raw_spinlock_t, 511 513 raw_spin_lock_nested(_T->lock, SINGLE_DEPTH_NESTING), 512 514 raw_spin_unlock(_T->lock)) ··· 517 515 raw_spin_lock_irq(_T->lock), 518 516 raw_spin_unlock_irq(_T->lock)) 519 517 518 + DEFINE_LOCK_GUARD_1_COND(raw_spinlock_irq, _try, raw_spin_trylock_irq(_T->lock)) 519 + 520 520 DEFINE_LOCK_GUARD_1(raw_spinlock_irqsave, raw_spinlock_t, 521 521 raw_spin_lock_irqsave(_T->lock, _T->flags), 522 522 raw_spin_unlock_irqrestore(_T->lock, _T->flags), 523 523 unsigned long flags) 524 524 525 + DEFINE_LOCK_GUARD_1_COND(raw_spinlock_irqsave, _try, 526 + raw_spin_trylock_irqsave(_T->lock, _T->flags)) 527 + 525 528 DEFINE_LOCK_GUARD_1(spinlock, spinlock_t, 526 529 spin_lock(_T->lock), 527 530 spin_unlock(_T->lock)) 531 + 532 + DEFINE_LOCK_GUARD_1_COND(spinlock, _try, spin_trylock(_T->lock)) 528 533 529 534 DEFINE_LOCK_GUARD_1(spinlock_irq, spinlock_t, 530 535 spin_lock_irq(_T->lock), 531 536 spin_unlock_irq(_T->lock)) 532 537 538 + DEFINE_LOCK_GUARD_1_COND(spinlock_irq, _try, 539 + spin_trylock_irq(_T->lock)) 540 + 533 541 DEFINE_LOCK_GUARD_1(spinlock_irqsave, spinlock_t, 534 542 spin_lock_irqsave(_T->lock, _T->flags), 535 543 spin_unlock_irqrestore(_T->lock, _T->flags), 544 + unsigned long flags) 545 + 546 + DEFINE_LOCK_GUARD_1_COND(spinlock_irqsave, _try, 547 + spin_trylock_irqsave(_T->lock, _T->flags)) 548 + 549 + DEFINE_LOCK_GUARD_1(read_lock, rwlock_t, 550 + read_lock(_T->lock), 551 + read_unlock(_T->lock)) 552 + 553 + DEFINE_LOCK_GUARD_1(read_lock_irq, rwlock_t, 554 + read_lock_irq(_T->lock), 555 + read_unlock_irq(_T->lock)) 556 + 557 + DEFINE_LOCK_GUARD_1(read_lock_irqsave, rwlock_t, 558 + read_lock_irqsave(_T->lock, _T->flags), 559 + read_unlock_irqrestore(_T->lock, _T->flags), 560 + unsigned long flags) 561 + 562 + DEFINE_LOCK_GUARD_1(write_lock, rwlock_t, 563 + write_lock(_T->lock), 564 + write_unlock(_T->lock)) 565 + 566 + DEFINE_LOCK_GUARD_1(write_lock_irq, rwlock_t, 567 + write_lock_irq(_T->lock), 568 + write_unlock_irq(_T->lock)) 569 + 570 + DEFINE_LOCK_GUARD_1(write_lock_irqsave, rwlock_t, 571 + write_lock_irqsave(_T->lock, _T->flags), 572 + write_unlock_irqrestore(_T->lock, _T->flags), 536 573 unsigned long flags) 537 574 538 575 #undef __LINUX_INSIDE_SPINLOCK_H

+5

kernel/locking/mutex.c

··· 532 532 * This function must not be used in interrupt context. Unlocking 533 533 * of a not locked mutex is not allowed. 534 534 * 535 + * The caller must ensure that the mutex stays alive until this function has 536 + * returned - mutex_unlock() can NOT directly be used to release an object such 537 + * that another concurrent task can free it. 538 + * Mutexes are different from spinlocks & refcounts in this aspect. 539 + * 535 540 * This function is similar to (but not equivalent to) up(). 536 541 */ 537 542 void __sched mutex_unlock(struct mutex *lock)

+74 -80

kernel/ptrace.c

··· 386 386 return 0; 387 387 } 388 388 389 - static int ptrace_attach(struct task_struct *task, long request, 390 - unsigned long addr, 391 - unsigned long flags) 389 + static inline void ptrace_set_stopped(struct task_struct *task) 392 390 { 393 - bool seize = (request == PTRACE_SEIZE); 394 - int retval; 395 - 396 - retval = -EIO; 397 - if (seize) { 398 - if (addr != 0) 399 - goto out; 400 - /* 401 - * This duplicates the check in check_ptrace_options() because 402 - * ptrace_attach() and ptrace_setoptions() have historically 403 - * used different error codes for unknown ptrace options. 404 - */ 405 - if (flags & ~(unsigned long)PTRACE_O_MASK) 406 - goto out; 407 - retval = check_ptrace_options(flags); 408 - if (retval) 409 - return retval; 410 - flags = PT_PTRACED | PT_SEIZED | (flags << PT_OPT_FLAG_SHIFT); 411 - } else { 412 - flags = PT_PTRACED; 413 - } 414 - 415 - audit_ptrace(task); 416 - 417 - retval = -EPERM; 418 - if (unlikely(task->flags & PF_KTHREAD)) 419 - goto out; 420 - if (same_thread_group(task, current)) 421 - goto out; 422 - 423 - /* 424 - * Protect exec's credential calculations against our interference; 425 - * SUID, SGID and LSM creds get determined differently 426 - * under ptrace. 427 - */ 428 - retval = -ERESTARTNOINTR; 429 - if (mutex_lock_interruptible(&task->signal->cred_guard_mutex)) 430 - goto out; 431 - 432 - task_lock(task); 433 - retval = __ptrace_may_access(task, PTRACE_MODE_ATTACH_REALCREDS); 434 - task_unlock(task); 435 - if (retval) 436 - goto unlock_creds; 437 - 438 - write_lock_irq(&tasklist_lock); 439 - retval = -EPERM; 440 - if (unlikely(task->exit_state)) 441 - goto unlock_tasklist; 442 - if (task->ptrace) 443 - goto unlock_tasklist; 444 - 445 - task->ptrace = flags; 446 - 447 - ptrace_link(task, current); 448 - 449 - /* SEIZE doesn't trap tracee on attach */ 450 - if (!seize) 451 - send_sig_info(SIGSTOP, SEND_SIG_PRIV, task); 452 - 453 - spin_lock(&task->sighand->siglock); 391 + guard(spinlock)(&task->sighand->siglock); 454 392 455 393 /* 456 394 * If the task is already STOPPED, set JOBCTL_TRAP_STOP and ··· 412 474 task->jobctl &= ~JOBCTL_STOPPED; 413 475 signal_wake_up_state(task, __TASK_STOPPED); 414 476 } 477 + } 415 478 416 - spin_unlock(&task->sighand->siglock); 479 + static int ptrace_attach(struct task_struct *task, long request, 480 + unsigned long addr, 481 + unsigned long flags) 482 + { 483 + bool seize = (request == PTRACE_SEIZE); 484 + int retval; 417 485 418 - retval = 0; 419 - unlock_tasklist: 420 - write_unlock_irq(&tasklist_lock); 421 - unlock_creds: 422 - mutex_unlock(&task->signal->cred_guard_mutex); 423 - out: 424 - if (!retval) { 486 + if (seize) { 487 + if (addr != 0) 488 + return -EIO; 425 489 /* 426 - * We do not bother to change retval or clear JOBCTL_TRAPPING 427 - * if wait_on_bit() was interrupted by SIGKILL. The tracer will 428 - * not return to user-mode, it will exit and clear this bit in 429 - * __ptrace_unlink() if it wasn't already cleared by the tracee; 430 - * and until then nobody can ptrace this task. 490 + * This duplicates the check in check_ptrace_options() because 491 + * ptrace_attach() and ptrace_setoptions() have historically 492 + * used different error codes for unknown ptrace options. 431 493 */ 432 - wait_on_bit(&task->jobctl, JOBCTL_TRAPPING_BIT, TASK_KILLABLE); 433 - proc_ptrace_connector(task, PTRACE_ATTACH); 494 + if (flags & ~(unsigned long)PTRACE_O_MASK) 495 + return -EIO; 496 + 497 + retval = check_ptrace_options(flags); 498 + if (retval) 499 + return retval; 500 + flags = PT_PTRACED | PT_SEIZED | (flags << PT_OPT_FLAG_SHIFT); 501 + } else { 502 + flags = PT_PTRACED; 434 503 } 435 504 436 - return retval; 505 + audit_ptrace(task); 506 + 507 + if (unlikely(task->flags & PF_KTHREAD)) 508 + return -EPERM; 509 + if (same_thread_group(task, current)) 510 + return -EPERM; 511 + 512 + /* 513 + * Protect exec's credential calculations against our interference; 514 + * SUID, SGID and LSM creds get determined differently 515 + * under ptrace. 516 + */ 517 + scoped_cond_guard (mutex_intr, return -ERESTARTNOINTR, 518 + &task->signal->cred_guard_mutex) { 519 + 520 + scoped_guard (task_lock, task) { 521 + retval = __ptrace_may_access(task, PTRACE_MODE_ATTACH_REALCREDS); 522 + if (retval) 523 + return retval; 524 + } 525 + 526 + scoped_guard (write_lock_irq, &tasklist_lock) { 527 + if (unlikely(task->exit_state)) 528 + return -EPERM; 529 + if (task->ptrace) 530 + return -EPERM; 531 + 532 + task->ptrace = flags; 533 + 534 + ptrace_link(task, current); 535 + 536 + /* SEIZE doesn't trap tracee on attach */ 537 + if (!seize) 538 + send_sig_info(SIGSTOP, SEND_SIG_PRIV, task); 539 + 540 + ptrace_set_stopped(task); 541 + } 542 + } 543 + 544 + /* 545 + * We do not bother to change retval or clear JOBCTL_TRAPPING 546 + * if wait_on_bit() was interrupted by SIGKILL. The tracer will 547 + * not return to user-mode, it will exit and clear this bit in 548 + * __ptrace_unlink() if it wasn't already cleared by the tracee; 549 + * and until then nobody can ptrace this task. 550 + */ 551 + wait_on_bit(&task->jobctl, JOBCTL_TRAPPING_BIT, TASK_KILLABLE); 552 + proc_ptrace_connector(task, PTRACE_ATTACH); 553 + 554 + return 0; 437 555 } 438 556 439 557 /**

Configure Feed

Configure Feed