···267267 * XXX: we're doing two index lookups when we end up reading the268268 * folio269269 */270270- ret = range_has_data(c, inode->ei_subvol,270270+ ret = range_has_data(c, inode->ei_inum.subvol,271271 POS(inode->v.i_ino, (index << PAGE_SECTORS_SHIFT)),272272 POS(inode->v.i_ino, (index << PAGE_SECTORS_SHIFT) + PAGE_SECTORS));273273 if (ret <= 0)···618618 bch2_trans_begin(trans);619619620620 ret = bch2_subvolume_get_snapshot(trans,621621- inode->ei_subvol, &snapshot);621621+ inode->ei_inum.subvol, &snapshot);622622 if (ret)623623 goto bkey_err;624624···823823retry:824824 bch2_trans_begin(trans);825825826826- ret = bch2_subvolume_get_snapshot(trans, inode->ei_subvol, &snapshot);826826+ ret = bch2_subvolume_get_snapshot(trans, inode->ei_inum.subvol, &snapshot);827827 if (ret)828828 goto err;829829
+2-2
fs/bcachefs/fs-ioctl.c
···100100 }101101102102 mutex_lock(&inode->ei_update_lock);103103- ret = bch2_subvol_is_ro(c, inode->ei_subvol) ?:103103+ ret = bch2_subvol_is_ro(c, inode->ei_inum.subvol) ?:104104 bch2_write_inode(c, inode, bch2_inode_flags_set, &s,105105 ATTR_CTIME);106106 mutex_unlock(&inode->ei_update_lock);···184184 }185185186186 mutex_lock(&inode->ei_update_lock);187187- ret = bch2_subvol_is_ro(c, inode->ei_subvol) ?:187187+ ret = bch2_subvol_is_ro(c, inode->ei_inum.subvol) ?:188188 bch2_set_projid(c, inode, fa.fsx_projid) ?:189189 bch2_write_inode(c, inode, fssetxattr_inode_update_fn, &s,190190 ATTR_CTIME);
+133-72
fs/bcachefs/fs.c
···108108 goto retry;109109110110 bch2_fs_fatal_err_on(bch2_err_matches(ret, ENOENT), c,111111- "%s: inode %u:%llu not found when updating",111111+ "%s: inode %llu:%llu not found when updating",112112 bch2_err_str(ret),113113 inode_inum(inode).subvol,114114 inode_inum(inode).inum);···152152 return ret;153153}154154155155-static int bch2_iget5_test(struct inode *vinode, void *p)155155+static bool subvol_inum_eq(subvol_inum a, subvol_inum b)156156{157157- struct bch_inode_info *inode = to_bch_ei(vinode);158158- subvol_inum *inum = p;159159-160160- return inode->ei_subvol == inum->subvol &&161161- inode->ei_inode.bi_inum == inum->inum;157157+ return a.subvol == b.subvol && a.inum == b.inum;162158}163159164164-static int bch2_iget5_set(struct inode *vinode, void *p)160160+static int bch2_vfs_inode_cmp_fn(struct rhashtable_compare_arg *arg,161161+ const void *obj)165162{166166- struct bch_inode_info *inode = to_bch_ei(vinode);167167- subvol_inum *inum = p;163163+ const struct bch_inode_info *inode = obj;164164+ const subvol_inum *v = arg->key;168165169169- inode->v.i_ino = inum->inum;170170- inode->ei_subvol = inum->subvol;171171- inode->ei_inode.bi_inum = inum->inum;172172- return 0;166166+ return !subvol_inum_eq(inode->ei_inum, *v);173167}174168175175-static unsigned bch2_inode_hash(subvol_inum inum)169169+static const struct rhashtable_params bch2_vfs_inodes_params = {170170+ .head_offset = offsetof(struct bch_inode_info, hash),171171+ .key_offset = offsetof(struct bch_inode_info, ei_inum),172172+ .key_len = sizeof(subvol_inum),173173+ .obj_cmpfn = bch2_vfs_inode_cmp_fn,174174+ .automatic_shrinking = true,175175+};176176+177177+static void __wait_on_freeing_inode(struct inode *inode)176178{177177- return jhash_3words(inum.subvol, inum.inum >> 32, inum.inum, JHASH_INITVAL);179179+ wait_queue_head_t *wq;180180+ DEFINE_WAIT_BIT(wait, &inode->i_state, __I_NEW);181181+ wq = bit_waitqueue(&inode->i_state, __I_NEW);182182+ prepare_to_wait(wq, &wait.wq_entry, TASK_UNINTERRUPTIBLE);183183+ spin_unlock(&inode->i_lock);184184+ schedule();185185+ finish_wait(wq, &wait.wq_entry);178186}179187180188struct bch_inode_info *__bch2_inode_hash_find(struct bch_fs *c, subvol_inum inum)181189{182182- return to_bch_ei(ilookup5_nowait(c->vfs_sb,183183- bch2_inode_hash(inum),184184- bch2_iget5_test,185185- &inum));190190+ return rhashtable_lookup_fast(&c->vfs_inodes_table, &inum, bch2_vfs_inodes_params);186191}187192188188-static struct bch_inode_info *bch2_inode_insert(struct bch_fs *c, struct bch_inode_info *inode)193193+static struct bch_inode_info *bch2_inode_hash_find(struct bch_fs *c, subvol_inum inum)189194{190190- subvol_inum inum = inode_inum(inode);191191- struct bch_inode_info *old = to_bch_ei(inode_insert5(&inode->v,192192- bch2_inode_hash(inum),193193- bch2_iget5_test,194194- bch2_iget5_set,195195- &inum));196196- BUG_ON(!old);195195+ struct bch_inode_info *inode;196196+repeat:197197+ inode = __bch2_inode_hash_find(c, inum);198198+ if (inode) {199199+ spin_lock(&inode->v.i_lock);200200+ if (!test_bit(EI_INODE_HASHED, &inode->ei_flags)) {201201+ spin_unlock(&inode->v.i_lock);202202+ return NULL;203203+ }204204+ if ((inode->v.i_state & (I_FREEING|I_WILL_FREE))) {205205+ __wait_on_freeing_inode(&inode->v);206206+ goto repeat;207207+ }208208+ __iget(&inode->v);209209+ spin_unlock(&inode->v.i_lock);210210+ }197211198198- if (unlikely(old != inode)) {212212+ return inode;213213+}214214+215215+static void bch2_inode_hash_remove(struct bch_fs *c, struct bch_inode_info *inode)216216+{217217+ spin_lock(&inode->v.i_lock);218218+ bool remove = test_and_clear_bit(EI_INODE_HASHED, &inode->ei_flags);219219+ spin_unlock(&inode->v.i_lock);220220+221221+ if (remove) {222222+ int ret = rhashtable_remove_fast(&c->vfs_inodes_table,223223+ &inode->hash, bch2_vfs_inodes_params);224224+ BUG_ON(ret);225225+ inode->v.i_hash.pprev = NULL;226226+ }227227+}228228+229229+static struct bch_inode_info *bch2_inode_hash_insert(struct bch_fs *c, struct bch_inode_info *inode)230230+{231231+ struct bch_inode_info *old = inode;232232+233233+ set_bit(EI_INODE_HASHED, &inode->ei_flags);234234+retry:235235+ if (unlikely(rhashtable_lookup_insert_fast(&c->vfs_inodes_table,236236+ &inode->hash,237237+ bch2_vfs_inodes_params))) {238238+ old = bch2_inode_hash_find(c, inode->ei_inum);239239+ if (!old)240240+ goto retry;241241+242242+ clear_bit(EI_INODE_HASHED, &inode->ei_flags);243243+199244 /*200245 * bcachefs doesn't use I_NEW; we have no use for it since we201246 * only insert fully created inodes in the inode hash table. But···256211 discard_new_inode(&inode->v);257212 inode = old;258213 } else {214214+ inode_fake_hash(&inode->v);215215+216216+ inode_sb_list_add(&inode->v);217217+259218 mutex_lock(&c->vfs_inodes_lock);260219 list_add(&inode->ei_vfs_inode_list, &c->vfs_inodes_list);261220 mutex_unlock(&c->vfs_inodes_lock);262262- /*263263- * Again, I_NEW makes no sense for bcachefs. This is only needed264264- * for clearing I_NEW, but since the inode was already fully265265- * created and initialized we didn't actually want266266- * inode_insert5() to set it for us.267267- */268268- unlock_new_inode(&inode->v);269221 }270222271223 return inode;···327285328286struct inode *bch2_vfs_inode_get(struct bch_fs *c, subvol_inum inum)329287{330330- struct bch_inode_info *inode =331331- to_bch_ei(ilookup5_nowait(c->vfs_sb,332332- bch2_inode_hash(inum),333333- bch2_iget5_test,334334- &inum));288288+ struct bch_inode_info *inode = bch2_inode_hash_find(c, inum);335289 if (inode)336290 return &inode->v;337291···341303 PTR_ERR_OR_ZERO(inode = bch2_new_inode(trans));342304 if (!ret) {343305 bch2_vfs_inode_init(trans, inum, inode, &inode_u, &subvol);344344- inode = bch2_inode_insert(c, inode);306306+ inode = bch2_inode_hash_insert(c, inode);345307 }346308 bch2_trans_put(trans);347309···389351retry:390352 bch2_trans_begin(trans);391353392392- ret = bch2_subvol_is_ro_trans(trans, dir->ei_subvol) ?:354354+ ret = bch2_subvol_is_ro_trans(trans, dir->ei_inum.subvol) ?:393355 bch2_create_trans(trans,394356 inode_inum(dir), &dir_u, &inode_u,395357 !(flags & BCH_CREATE_TMPFILE)···403365 if (unlikely(ret))404366 goto err_before_quota;405367406406- inum.subvol = inode_u.bi_subvol ?: dir->ei_subvol;368368+ inum.subvol = inode_u.bi_subvol ?: dir->ei_inum.subvol;407369 inum.inum = inode_u.bi_inum;408370409371 ret = bch2_subvolume_get(trans, inum.subvol, true,···434396 * bch2_trans_exit() and dropping locks, else we could race with another435397 * thread pulling the inode in and modifying it:436398 */437437- inode = bch2_inode_insert(c, inode);399399+ inode = bch2_inode_hash_insert(c, inode);438400 bch2_trans_put(trans);439401err:440402 posix_acl_release(default_acl);···474436 if (ret)475437 goto err;476438477477- struct bch_inode_info *inode =478478- to_bch_ei(ilookup5_nowait(c->vfs_sb,479479- bch2_inode_hash(inum),480480- bch2_iget5_test,481481- &inum));439439+ struct bch_inode_info *inode = bch2_inode_hash_find(c, inum);482440 if (inode)483441 goto out;484442···504470 }505471506472 bch2_vfs_inode_init(trans, inum, inode, &inode_u, &subvol);507507- inode = bch2_inode_insert(c, inode);473473+ inode = bch2_inode_hash_insert(c, inode);508474out:509475 bch2_trans_iter_exit(trans, &dirent_iter);510476 printbuf_exit(&buf);···591557592558 lockdep_assert_held(&inode->v.i_rwsem);593559594594- ret = bch2_subvol_is_ro(c, dir->ei_subvol) ?:595595- bch2_subvol_is_ro(c, inode->ei_subvol) ?:560560+ ret = bch2_subvol_is_ro(c, dir->ei_inum.subvol) ?:561561+ bch2_subvol_is_ro(c, inode->ei_inum.subvol) ?:596562 __bch2_link(c, inode, dir, dentry);597563 if (unlikely(ret))598564 return bch2_err_class(ret);···648614 struct bch_inode_info *dir= to_bch_ei(vdir);649615 struct bch_fs *c = dir->v.i_sb->s_fs_info;650616651651- int ret = bch2_subvol_is_ro(c, dir->ei_subvol) ?:617617+ int ret = bch2_subvol_is_ro(c, dir->ei_inum.subvol) ?:652618 __bch2_unlink(vdir, dentry, false);653619 return bch2_err_class(ret);654620}···731697732698 trans = bch2_trans_get(c);733699734734- ret = bch2_subvol_is_ro_trans(trans, src_dir->ei_subvol) ?:735735- bch2_subvol_is_ro_trans(trans, dst_dir->ei_subvol);700700+ ret = bch2_subvol_is_ro_trans(trans, src_dir->ei_inum.subvol) ?:701701+ bch2_subvol_is_ro_trans(trans, dst_dir->ei_inum.subvol);736702 if (ret)737703 goto err;738704···933899 stat->blksize = block_bytes(c);934900 stat->blocks = inode->v.i_blocks;935901936936- stat->subvol = inode->ei_subvol;902902+ stat->subvol = inode->ei_inum.subvol;937903 stat->result_mask |= STATX_SUBVOL;938904939905 if ((request_mask & STATX_DIOALIGN) && S_ISREG(inode->v.i_mode)) {···975941976942 lockdep_assert_held(&inode->v.i_rwsem);977943978978- ret = bch2_subvol_is_ro(c, inode->ei_subvol) ?:944944+ ret = bch2_subvol_is_ro(c, inode->ei_inum.subvol) ?:979945 setattr_prepare(idmap, dentry, iattr);980946 if (ret)981947 return ret;···10871053retry:10881054 bch2_trans_begin(trans);1089105510901090- ret = bch2_subvolume_get_snapshot(trans, ei->ei_subvol, &snapshot);10561056+ ret = bch2_subvolume_get_snapshot(trans, ei->ei_inum.subvol, &snapshot);10911057 if (ret)10921058 goto err;10931059···12071173 struct bch_inode_info *inode = to_bch_ei(vinode);12081174 struct bch_fs *c = inode->v.i_sb->s_fs_info;1209117512101210- int ret = bch2_subvol_is_ro(c, inode->ei_subvol);11761176+ int ret = bch2_subvol_is_ro(c, inode->ei_inum.subvol);12111177 if (ret)12121178 return ret;12131179 }···13391305static struct bcachefs_fid bch2_inode_to_fid(struct bch_inode_info *inode)13401306{13411307 return (struct bcachefs_fid) {13421342- .inum = inode->ei_inode.bi_inum,13431343- .subvol = inode->ei_subvol,13081308+ .inum = inode->ei_inum.inum,13091309+ .subvol = inode->ei_inum.subvol,13441310 .gen = inode->ei_inode.bi_generation,13451311 };13461312}···14251391 struct bch_fs *c = inode->v.i_sb->s_fs_info;14261392 subvol_inum parent_inum = {14271393 .subvol = inode->ei_inode.bi_parent_subvol ?:14281428- inode->ei_subvol,13941394+ inode->ei_inum.subvol,14291395 .inum = inode->ei_inode.bi_dir,14301396 };14311397···14611427retry:14621428 bch2_trans_begin(trans);1463142914641464- ret = bch2_subvolume_get_snapshot(trans, dir->ei_subvol, &snapshot);14301430+ ret = bch2_subvolume_get_snapshot(trans, dir->ei_inum.subvol, &snapshot);14651431 if (ret)14661432 goto err;14671433···14921458 if (ret)14931459 goto err;1494146014951495- if (target.subvol == inode->ei_subvol &&14961496- target.inum == inode->ei_inode.bi_inum)14611461+ if (subvol_inum_eq(target, inode->ei_inum))14971462 goto found;14981463 } else {14991464 /*···15131480 if (ret)15141481 continue;1515148215161516- if (target.subvol == inode->ei_subvol &&15171517- target.inum == inode->ei_inode.bi_inum)14831483+ if (subvol_inum_eq(target, inode->ei_inum))15181484 goto found;15191485 }15201486 }···15501518 struct bch_inode_unpacked *bi,15511519 struct bch_subvolume *subvol)15521520{15531553- bch2_iget5_set(&inode->v, &inum);15211521+ inode->v.i_ino = inum.inum;15221522+ inode->ei_inum = inum;15231523+ inode->ei_inode.bi_inum = inum.inum;15541524 bch2_inode_update_after_write(trans, inode, bi, ~0);1555152515561526 inode->v.i_blocks = bi->bi_sectors;···15641530 inode->ei_flags = 0;15651531 inode->ei_quota_reserved = 0;15661532 inode->ei_qid = bch_qid(bi);15671567- inode->ei_subvol = inum.subvol;1568153315691534 if (BCH_SUBVOLUME_SNAP(subvol))15701535 set_bit(EI_INODE_SNAPSHOT, &inode->ei_flags);···16301597{16311598 struct bch_fs *c = vinode->i_sb->s_fs_info;16321599 struct bch_inode_info *inode = to_bch_ei(vinode);16001600+ bool delete = !inode->v.i_nlink && !is_bad_inode(&inode->v);16011601+16021602+ /*16031603+ * evict() has waited for outstanding writeback, we'll do no more IO16041604+ * through this inode: it's safe to remove from VFS inode hashtable here16051605+ *16061606+ * Do that now so that other threads aren't blocked from pulling it back16071607+ * in, there's no reason for them to be:16081608+ */16091609+ if (!delete)16101610+ bch2_inode_hash_remove(c, inode);1633161116341612 truncate_inode_pages_final(&inode->v.i_data);16351613···1648160416491605 BUG_ON(!is_bad_inode(&inode->v) && inode->ei_quota_reserved);1650160616511651- if (!inode->v.i_nlink && !is_bad_inode(&inode->v)) {16071607+ if (delete) {16521608 bch2_quota_acct(c, inode->ei_qid, Q_SPC, -((s64) inode->v.i_blocks),16531609 KEY_TYPE_QUOTA_WARN);16541610 bch2_quota_acct(c, inode->ei_qid, Q_INO, -1,16551611 KEY_TYPE_QUOTA_WARN);16561612 bch2_inode_rm(c, inode_inum(inode));16131613+16141614+ /*16151615+ * If we are deleting, we need it present in the vfs hash table16161616+ * so that fsck can check if unlinked inodes are still open:16171617+ */16181618+ bch2_inode_hash_remove(c, inode);16571619 }1658162016591621 mutex_lock(&c->vfs_inodes_lock);···1689163916901640 mutex_lock(&c->vfs_inodes_lock);16911641 list_for_each_entry(inode, &c->vfs_inodes_list, ei_vfs_inode_list) {16921692- if (!snapshot_list_has_id(s, inode->ei_subvol))16421642+ if (!snapshot_list_has_id(s, inode->ei_inum.subvol))16931643 continue;1694164416951645 if (!(inode->v.i_state & I_DONTCACHE) &&···21752125 fc->fs_private = opts;2176212621772127 return 0;21282128+}21292129+21302130+void bch2_fs_vfs_exit(struct bch_fs *c)21312131+{21322132+ if (c->vfs_inodes_table.tbl)21332133+ rhashtable_destroy(&c->vfs_inodes_table);21342134+}21352135+21362136+int bch2_fs_vfs_init(struct bch_fs *c)21372137+{21382138+ return rhashtable_init(&c->vfs_inodes_table, &bch2_vfs_inodes_params);21782139}2179214021802141static struct file_system_type bcache_fs_type = {
···365365 subvol_inum inum, unsigned flags)366366{367367 int ret = bch2_inode_peek_nowarn(trans, iter, inode, inum, flags);368368- bch_err_msg(trans->c, ret, "looking up inum %u:%llu:", inum.subvol, inum.inum);368368+ bch_err_msg(trans->c, ret, "looking up inum %llu:%llu:", inum.subvol, inum.inum);369369 return ret;370370}371371
+2-1
fs/bcachefs/subvolume_types.h
···3030};31313232typedef struct {3333- u32 subvol;3333+ /* we can't have padding in this struct: */3434+ u64 subvol;3435 u64 inum;3536} subvol_inum;3637