Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux
1
fork

Configure Feed

Select the types of activity you want to include in your feed.

bcachefs: Fix deadlock in __wait_on_freeing_inode()

We can't call __wait_on_freeing_inode() with btree locks held; we're
waiting on another thread that's in evict(), and before it clears that
bit it needs to write that inode to flush timestamps - deadlock.

Fixing this involves a fair amount of re-jiggering to plumb a new
transaction restart.

Signed-off-by: Kent Overstreet <kent.overstreet@linux.dev>

+47 -20
+47 -20
fs/bcachefs/fs.c
··· 190 190 return rhashtable_lookup_fast(&c->vfs_inodes_table, &inum, bch2_vfs_inodes_params); 191 191 } 192 192 193 - static struct bch_inode_info *bch2_inode_hash_find(struct bch_fs *c, subvol_inum inum) 193 + static struct bch_inode_info *bch2_inode_hash_find(struct bch_fs *c, struct btree_trans *trans, 194 + subvol_inum inum) 194 195 { 195 196 struct bch_inode_info *inode; 196 197 repeat: ··· 203 202 return NULL; 204 203 } 205 204 if ((inode->v.i_state & (I_FREEING|I_WILL_FREE))) { 206 - __wait_on_freeing_inode(&inode->v); 205 + if (!trans) { 206 + __wait_on_freeing_inode(&inode->v); 207 + } else { 208 + bch2_trans_unlock(trans); 209 + __wait_on_freeing_inode(&inode->v); 210 + int ret = bch2_trans_relock(trans); 211 + if (ret) 212 + return ERR_PTR(ret); 213 + } 207 214 goto repeat; 208 215 } 209 216 __iget(&inode->v); ··· 235 226 } 236 227 } 237 228 238 - static struct bch_inode_info *bch2_inode_hash_insert(struct bch_fs *c, struct bch_inode_info *inode) 229 + static struct bch_inode_info *bch2_inode_hash_insert(struct bch_fs *c, 230 + struct btree_trans *trans, 231 + struct bch_inode_info *inode) 239 232 { 240 233 struct bch_inode_info *old = inode; 241 234 ··· 246 235 if (unlikely(rhashtable_lookup_insert_fast(&c->vfs_inodes_table, 247 236 &inode->hash, 248 237 bch2_vfs_inodes_params))) { 249 - old = bch2_inode_hash_find(c, inode->ei_inum); 238 + old = bch2_inode_hash_find(c, trans, inode->ei_inum); 250 239 if (!old) 251 240 goto retry; 252 241 ··· 265 254 */ 266 255 set_nlink(&inode->v, 1); 267 256 discard_new_inode(&inode->v); 268 - inode = old; 257 + return old; 269 258 } else { 270 259 inode_fake_hash(&inode->v); 271 260 ··· 274 263 mutex_lock(&c->vfs_inodes_lock); 275 264 list_add(&inode->ei_vfs_inode_list, &c->vfs_inodes_list); 276 265 mutex_unlock(&c->vfs_inodes_lock); 266 + return inode; 277 267 } 278 - 279 - return inode; 280 268 } 281 269 282 270 #define memalloc_flags_do(_flags, _do) \ ··· 335 325 return inode; 336 326 } 337 327 328 + static struct bch_inode_info *bch2_inode_hash_init_insert(struct btree_trans *trans, 329 + subvol_inum inum, 330 + struct bch_inode_unpacked *bi, 331 + struct bch_subvolume *subvol) 332 + { 333 + struct bch_inode_info *inode = bch2_new_inode(trans); 334 + if (IS_ERR(inode)) 335 + return inode; 336 + 337 + bch2_vfs_inode_init(trans, inum, inode, bi, subvol); 338 + 339 + return bch2_inode_hash_insert(trans->c, trans, inode); 340 + 341 + } 342 + 338 343 struct inode *bch2_vfs_inode_get(struct bch_fs *c, subvol_inum inum) 339 344 { 340 - struct bch_inode_info *inode = bch2_inode_hash_find(c, inum); 345 + struct bch_inode_info *inode = bch2_inode_hash_find(c, NULL, inum); 341 346 if (inode) 342 347 return &inode->v; 343 348 ··· 363 338 int ret = lockrestart_do(trans, 364 339 bch2_subvolume_get(trans, inum.subvol, true, 0, &subvol) ?: 365 340 bch2_inode_find_by_inum_trans(trans, inum, &inode_u)) ?: 366 - PTR_ERR_OR_ZERO(inode = bch2_new_inode(trans)); 367 - if (!ret) { 368 - bch2_vfs_inode_init(trans, inum, inode, &inode_u, &subvol); 369 - inode = bch2_inode_hash_insert(c, inode); 370 - } 341 + PTR_ERR_OR_ZERO(inode = bch2_inode_hash_init_insert(trans, inum, &inode_u, &subvol)); 371 342 bch2_trans_put(trans); 372 343 373 344 return ret ? ERR_PTR(ret) : &inode->v; ··· 454 433 * we must insert the new inode into the inode cache before calling 455 434 * bch2_trans_exit() and dropping locks, else we could race with another 456 435 * thread pulling the inode in and modifying it: 436 + * 437 + * also, calling bch2_inode_hash_insert() without passing in the 438 + * transaction object is sketchy - if we could ever end up in 439 + * __wait_on_freeing_inode(), we'd risk deadlock. 440 + * 441 + * But that shouldn't be possible, since we still have the inode locked 442 + * that we just created, and we _really_ can't take a transaction 443 + * restart here. 457 444 */ 458 - inode = bch2_inode_hash_insert(c, inode); 445 + inode = bch2_inode_hash_insert(c, NULL, inode); 459 446 bch2_trans_put(trans); 460 447 err: 461 448 posix_acl_release(default_acl); ··· 503 474 if (ret) 504 475 goto err; 505 476 506 - struct bch_inode_info *inode = bch2_inode_hash_find(c, inum); 477 + struct bch_inode_info *inode = bch2_inode_hash_find(c, trans, inum); 507 478 if (inode) 508 479 goto out; 509 480 ··· 511 482 struct bch_inode_unpacked inode_u; 512 483 ret = bch2_subvolume_get(trans, inum.subvol, true, 0, &subvol) ?: 513 484 bch2_inode_find_by_inum_nowarn_trans(trans, inum, &inode_u) ?: 514 - PTR_ERR_OR_ZERO(inode = bch2_new_inode(trans)); 485 + PTR_ERR_OR_ZERO(inode = bch2_inode_hash_init_insert(trans, inum, &inode_u, &subvol)); 515 486 516 487 bch2_fs_inconsistent_on(bch2_err_matches(ret, ENOENT), 517 488 c, "dirent to missing inode:\n %s", ··· 531 502 ret = -ENOENT; 532 503 goto err; 533 504 } 534 - 535 - bch2_vfs_inode_init(trans, inum, inode, &inode_u, &subvol); 536 - inode = bch2_inode_hash_insert(c, inode); 537 505 out: 538 506 bch2_trans_iter_exit(trans, &dirent_iter); 539 507 printbuf_exit(&buf); ··· 1571 1545 .get_name = bch2_get_name, 1572 1546 }; 1573 1547 1574 - static void bch2_vfs_inode_init(struct btree_trans *trans, subvol_inum inum, 1548 + static void bch2_vfs_inode_init(struct btree_trans *trans, 1549 + subvol_inum inum, 1575 1550 struct bch_inode_info *inode, 1576 1551 struct bch_inode_unpacked *bi, 1577 1552 struct bch_subvolume *subvol)