Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux
1
fork

Configure Feed

Select the types of activity you want to include in your feed.

Merge tag 'locking_urgent_for_v5.15_rc1' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip

Pull locking fixes from Borislav Petkov:

- Fix the futex PI requeue machinery to not return to userspace in
inconsistent state

- Avoid a potential null pointer dereference in the ww_mutex deadlock
check

- Other smaller cleanups and optimizations

* tag 'locking_urgent_for_v5.15_rc1' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip:
locking/rtmutex: Fix ww_mutex deadlock check
futex: Remove unused variable 'vpid' in futex_proxy_trylock_atomic()
futex: Avoid redundant task lookup
futex: Clarify comment for requeue_pi_wake_futex()
futex: Prevent inconsistent state and exit race
futex: Return error code instead of assigning it without effect
locking/rwsem: Add missing __init_rwsem() for PREEMPT_RT

+120 -94
+2 -10
include/linux/rwsem.h
··· 142 142 #define DECLARE_RWSEM(lockname) \ 143 143 struct rw_semaphore lockname = __RWSEM_INITIALIZER(lockname) 144 144 145 - #ifdef CONFIG_DEBUG_LOCK_ALLOC 146 - extern void __rwsem_init(struct rw_semaphore *rwsem, const char *name, 145 + extern void __init_rwsem(struct rw_semaphore *rwsem, const char *name, 147 146 struct lock_class_key *key); 148 - #else 149 - static inline void __rwsem_init(struct rw_semaphore *rwsem, const char *name, 150 - struct lock_class_key *key) 151 - { 152 - } 153 - #endif 154 147 155 148 #define init_rwsem(sem) \ 156 149 do { \ 157 150 static struct lock_class_key __key; \ 158 151 \ 159 - init_rwbase_rt(&(sem)->rwbase); \ 160 - __rwsem_init((sem), #sem, &__key); \ 152 + __init_rwsem((sem), #sem, &__key); \ 161 153 } while (0) 162 154 163 155 static __always_inline int rwsem_is_locked(struct rw_semaphore *sem)
+111 -79
kernel/futex.c
··· 1263 1263 return -ESRCH; 1264 1264 } 1265 1265 1266 + static void __attach_to_pi_owner(struct task_struct *p, union futex_key *key, 1267 + struct futex_pi_state **ps) 1268 + { 1269 + /* 1270 + * No existing pi state. First waiter. [2] 1271 + * 1272 + * This creates pi_state, we have hb->lock held, this means nothing can 1273 + * observe this state, wait_lock is irrelevant. 1274 + */ 1275 + struct futex_pi_state *pi_state = alloc_pi_state(); 1276 + 1277 + /* 1278 + * Initialize the pi_mutex in locked state and make @p 1279 + * the owner of it: 1280 + */ 1281 + rt_mutex_init_proxy_locked(&pi_state->pi_mutex, p); 1282 + 1283 + /* Store the key for possible exit cleanups: */ 1284 + pi_state->key = *key; 1285 + 1286 + WARN_ON(!list_empty(&pi_state->list)); 1287 + list_add(&pi_state->list, &p->pi_state_list); 1288 + /* 1289 + * Assignment without holding pi_state->pi_mutex.wait_lock is safe 1290 + * because there is no concurrency as the object is not published yet. 1291 + */ 1292 + pi_state->owner = p; 1293 + 1294 + *ps = pi_state; 1295 + } 1266 1296 /* 1267 1297 * Lookup the task for the TID provided from user space and attach to 1268 1298 * it after doing proper sanity checks. ··· 1302 1272 struct task_struct **exiting) 1303 1273 { 1304 1274 pid_t pid = uval & FUTEX_TID_MASK; 1305 - struct futex_pi_state *pi_state; 1306 1275 struct task_struct *p; 1307 1276 1308 1277 /* ··· 1353 1324 return ret; 1354 1325 } 1355 1326 1356 - /* 1357 - * No existing pi state. First waiter. [2] 1358 - * 1359 - * This creates pi_state, we have hb->lock held, this means nothing can 1360 - * observe this state, wait_lock is irrelevant. 1361 - */ 1362 - pi_state = alloc_pi_state(); 1363 - 1364 - /* 1365 - * Initialize the pi_mutex in locked state and make @p 1366 - * the owner of it: 1367 - */ 1368 - rt_mutex_init_proxy_locked(&pi_state->pi_mutex, p); 1369 - 1370 - /* Store the key for possible exit cleanups: */ 1371 - pi_state->key = *key; 1372 - 1373 - WARN_ON(!list_empty(&pi_state->list)); 1374 - list_add(&pi_state->list, &p->pi_state_list); 1375 - /* 1376 - * Assignment without holding pi_state->pi_mutex.wait_lock is safe 1377 - * because there is no concurrency as the object is not published yet. 1378 - */ 1379 - pi_state->owner = p; 1327 + __attach_to_pi_owner(p, key, ps); 1380 1328 raw_spin_unlock_irq(&p->pi_lock); 1381 1329 1382 1330 put_task_struct(p); 1383 - 1384 - *ps = pi_state; 1385 1331 1386 1332 return 0; 1387 1333 } ··· 1458 1454 newval |= FUTEX_WAITERS; 1459 1455 1460 1456 ret = lock_pi_update_atomic(uaddr, uval, newval); 1461 - /* If the take over worked, return 1 */ 1462 - return ret < 0 ? ret : 1; 1457 + if (ret) 1458 + return ret; 1459 + 1460 + /* 1461 + * If the waiter bit was requested the caller also needs PI 1462 + * state attached to the new owner of the user space futex. 1463 + * 1464 + * @task is guaranteed to be alive and it cannot be exiting 1465 + * because it is either sleeping or waiting in 1466 + * futex_requeue_pi_wakeup_sync(). 1467 + * 1468 + * No need to do the full attach_to_pi_owner() exercise 1469 + * because @task is known and valid. 1470 + */ 1471 + if (set_waiters) { 1472 + raw_spin_lock_irq(&task->pi_lock); 1473 + __attach_to_pi_owner(task, key, ps); 1474 + raw_spin_unlock_irq(&task->pi_lock); 1475 + } 1476 + return 1; 1463 1477 } 1464 1478 1465 1479 /* ··· 1961 1939 * @hb: the hash_bucket of the requeue target futex 1962 1940 * 1963 1941 * During futex_requeue, with requeue_pi=1, it is possible to acquire the 1964 - * target futex if it is uncontended or via a lock steal. Set the futex_q key 1965 - * to the requeue target futex so the waiter can detect the wakeup on the right 1966 - * futex, but remove it from the hb and NULL the rt_waiter so it can detect 1967 - * atomic lock acquisition. Set the q->lock_ptr to the requeue target hb->lock 1968 - * to protect access to the pi_state to fixup the owner later. Must be called 1969 - * with both q->lock_ptr and hb->lock held. 1942 + * target futex if it is uncontended or via a lock steal. 1943 + * 1944 + * 1) Set @q::key to the requeue target futex key so the waiter can detect 1945 + * the wakeup on the right futex. 1946 + * 1947 + * 2) Dequeue @q from the hash bucket. 1948 + * 1949 + * 3) Set @q::rt_waiter to NULL so the woken up task can detect atomic lock 1950 + * acquisition. 1951 + * 1952 + * 4) Set the q->lock_ptr to the requeue target hb->lock for the case that 1953 + * the waiter has to fixup the pi state. 1954 + * 1955 + * 5) Complete the requeue state so the waiter can make progress. After 1956 + * this point the waiter task can return from the syscall immediately in 1957 + * case that the pi state does not have to be fixed up. 1958 + * 1959 + * 6) Wake the waiter task. 1960 + * 1961 + * Must be called with both q->lock_ptr and hb->lock held. 1970 1962 */ 1971 1963 static inline 1972 1964 void requeue_pi_wake_futex(struct futex_q *q, union futex_key *key, ··· 2034 1998 { 2035 1999 struct futex_q *top_waiter = NULL; 2036 2000 u32 curval; 2037 - int ret, vpid; 2001 + int ret; 2038 2002 2039 2003 if (get_futex_value_locked(&curval, pifutex)) 2040 2004 return -EFAULT; ··· 2061 2025 * and waiting on the 'waitqueue' futex which is always !PI. 2062 2026 */ 2063 2027 if (!top_waiter->rt_waiter || top_waiter->pi_state) 2064 - ret = -EINVAL; 2028 + return -EINVAL; 2065 2029 2066 2030 /* Ensure we requeue to the expected futex. */ 2067 2031 if (!match_futex(top_waiter->requeue_pi_key, key2)) ··· 2072 2036 return -EAGAIN; 2073 2037 2074 2038 /* 2075 - * Try to take the lock for top_waiter. Set the FUTEX_WAITERS bit in 2076 - * the contended case or if set_waiters is 1. The pi_state is returned 2077 - * in ps in contended cases. 2039 + * Try to take the lock for top_waiter and set the FUTEX_WAITERS bit 2040 + * in the contended case or if @set_waiters is true. 2041 + * 2042 + * In the contended case PI state is attached to the lock owner. If 2043 + * the user space lock can be acquired then PI state is attached to 2044 + * the new owner (@top_waiter->task) when @set_waiters is true. 2078 2045 */ 2079 - vpid = task_pid_vnr(top_waiter->task); 2080 2046 ret = futex_lock_pi_atomic(pifutex, hb2, key2, ps, top_waiter->task, 2081 2047 exiting, set_waiters); 2082 2048 if (ret == 1) { 2083 - /* Dequeue, wake up and update top_waiter::requeue_state */ 2049 + /* 2050 + * Lock was acquired in user space and PI state was 2051 + * attached to @top_waiter->task. That means state is fully 2052 + * consistent and the waiter can return to user space 2053 + * immediately after the wakeup. 2054 + */ 2084 2055 requeue_pi_wake_futex(top_waiter, key2, hb2); 2085 - return vpid; 2086 2056 } else if (ret < 0) { 2087 2057 /* Rewind top_waiter::requeue_state */ 2088 2058 futex_requeue_pi_complete(top_waiter, ret); ··· 2250 2208 &exiting, nr_requeue); 2251 2209 2252 2210 /* 2253 - * At this point the top_waiter has either taken uaddr2 or is 2254 - * waiting on it. If the former, then the pi_state will not 2255 - * exist yet, look it up one more time to ensure we have a 2256 - * reference to it. If the lock was taken, @ret contains the 2257 - * VPID of the top waiter task. 2258 - * If the lock was not taken, we have pi_state and an initial 2259 - * refcount on it. In case of an error we have nothing. 2211 + * At this point the top_waiter has either taken uaddr2 or 2212 + * is waiting on it. In both cases pi_state has been 2213 + * established and an initial refcount on it. In case of an 2214 + * error there's nothing. 2260 2215 * 2261 2216 * The top waiter's requeue_state is up to date: 2262 2217 * 2263 - * - If the lock was acquired atomically (ret > 0), then 2218 + * - If the lock was acquired atomically (ret == 1), then 2264 2219 * the state is Q_REQUEUE_PI_LOCKED. 2220 + * 2221 + * The top waiter has been dequeued and woken up and can 2222 + * return to user space immediately. The kernel/user 2223 + * space state is consistent. In case that there must be 2224 + * more waiters requeued the WAITERS bit in the user 2225 + * space futex is set so the top waiter task has to go 2226 + * into the syscall slowpath to unlock the futex. This 2227 + * will block until this requeue operation has been 2228 + * completed and the hash bucket locks have been 2229 + * dropped. 2265 2230 * 2266 2231 * - If the trylock failed with an error (ret < 0) then 2267 2232 * the state is either Q_REQUEUE_PI_NONE, i.e. "nothing ··· 2283 2234 * the same sanity checks for requeue_pi as the loop 2284 2235 * below does. 2285 2236 */ 2286 - if (ret > 0) { 2287 - WARN_ON(pi_state); 2288 - task_count++; 2289 - /* 2290 - * If futex_proxy_trylock_atomic() acquired the 2291 - * user space futex, then the user space value 2292 - * @uaddr2 has been set to the @hb1's top waiter 2293 - * task VPID. This task is guaranteed to be alive 2294 - * and cannot be exiting because it is either 2295 - * sleeping or blocked on @hb2 lock. 2296 - * 2297 - * The @uaddr2 futex cannot have waiters either as 2298 - * otherwise futex_proxy_trylock_atomic() would not 2299 - * have succeeded. 2300 - * 2301 - * In order to requeue waiters to @hb2, pi state is 2302 - * required. Hand in the VPID value (@ret) and 2303 - * allocate PI state with an initial refcount on 2304 - * it. 2305 - */ 2306 - ret = attach_to_pi_owner(uaddr2, ret, &key2, &pi_state, 2307 - &exiting); 2308 - WARN_ON(ret); 2309 - } 2310 - 2311 2237 switch (ret) { 2312 2238 case 0: 2313 2239 /* We hold a reference on the pi state. */ 2240 + break; 2241 + 2242 + case 1: 2243 + /* 2244 + * futex_proxy_trylock_atomic() acquired the user space 2245 + * futex. Adjust task_count. 2246 + */ 2247 + task_count++; 2248 + ret = 0; 2314 2249 break; 2315 2250 2316 2251 /* ··· 2428 2395 } 2429 2396 2430 2397 /* 2431 - * We took an extra initial reference to the pi_state either in 2432 - * futex_proxy_trylock_atomic() or in attach_to_pi_owner(). We need 2433 - * to drop it here again. 2398 + * We took an extra initial reference to the pi_state in 2399 + * futex_proxy_trylock_atomic(). We need to drop it here again. 2434 2400 */ 2435 2401 put_pi_state(pi_state); 2436 2402
+1 -1
kernel/locking/rtmutex.c
··· 753 753 * other configuration and we fail to report; also, see 754 754 * lockdep. 755 755 */ 756 - if (IS_ENABLED(CONFIG_PREEMPT_RT) && orig_waiter->ww_ctx) 756 + if (IS_ENABLED(CONFIG_PREEMPT_RT) && orig_waiter && orig_waiter->ww_ctx) 757 757 ret = 0; 758 758 759 759 raw_spin_unlock(&lock->wait_lock);
+6 -4
kernel/locking/rwsem.c
··· 1376 1376 1377 1377 #include "rwbase_rt.c" 1378 1378 1379 - #ifdef CONFIG_DEBUG_LOCK_ALLOC 1380 - void __rwsem_init(struct rw_semaphore *sem, const char *name, 1379 + void __init_rwsem(struct rw_semaphore *sem, const char *name, 1381 1380 struct lock_class_key *key) 1382 1381 { 1382 + init_rwbase_rt(&(sem)->rwbase); 1383 + 1384 + #ifdef CONFIG_DEBUG_LOCK_ALLOC 1383 1385 debug_check_no_locks_freed((void *)sem, sizeof(*sem)); 1384 1386 lockdep_init_map_wait(&sem->dep_map, name, key, 0, LD_WAIT_SLEEP); 1385 - } 1386 - EXPORT_SYMBOL(__rwsem_init); 1387 1387 #endif 1388 + } 1389 + EXPORT_SYMBOL(__init_rwsem); 1388 1390 1389 1391 static inline void __down_read(struct rw_semaphore *sem) 1390 1392 {