Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux
1
fork

Configure Feed

Select the types of activity you want to include in your feed.

fs: rework I_NEW handling to operate without fences

In the inode hash code grab the state while ->i_lock is held. If found
to be set, synchronize the sleep once more with the lock held.

In the real world the flag is not set most of the time.

Apart from being simpler to reason about, it comes with a minor speed up
as now clearing the flag does not require the smp_mb() fence.

While here rename wait_on_inode() to wait_on_new_inode() to line it up
with __wait_on_freeing_inode().

Christian Brauner <brauner@kernel.org> says:

As per the discussion in [1] I folded in the diff sent in [2].

Link: https://lore.kernel.org/69238e4d.a70a0220.d98e3.006e.GAE@google.com [1]
Link: https://lore.kernel.org/c2kpawomkbvtahjm7y5mposbhckb7wxthi3iqy5yr22ggpucrm@ufvxwy233qxo [2]
Signed-off-by: Mateusz Guzik <mjguzik@gmail.com>
Link: https://patch.msgid.link/20251010221737.1403539-1-mjguzik@gmail.com
Signed-off-by: Christian Brauner <brauner@kernel.org>

authored by

Mateusz Guzik and committed by
Christian Brauner
a27628f4 11f2af2a

+66 -60
+2 -2
fs/afs/dir.c
··· 779 779 struct afs_vnode *dvnode = AFS_FS_I(dir), *vnode; 780 780 struct inode *inode = NULL, *ti; 781 781 afs_dataversion_t data_version = READ_ONCE(dvnode->status.data_version); 782 - bool supports_ibulk; 782 + bool supports_ibulk, isnew; 783 783 long ret; 784 784 int i; 785 785 ··· 850 850 * callback counters. 851 851 */ 852 852 ti = ilookup5_nowait(dir->i_sb, vp->fid.vnode, 853 - afs_ilookup5_test_by_fid, &vp->fid); 853 + afs_ilookup5_test_by_fid, &vp->fid, &isnew); 854 854 if (!IS_ERR_OR_NULL(ti)) { 855 855 vnode = AFS_FS_I(ti); 856 856 vp->dv_before = vnode->status.data_version;
-10
fs/dcache.c
··· 1981 1981 spin_lock(&inode->i_lock); 1982 1982 __d_instantiate(entry, inode); 1983 1983 WARN_ON(!(inode_state_read(inode) & I_NEW)); 1984 - /* 1985 - * Pairs with smp_rmb in wait_on_inode(). 1986 - */ 1987 - smp_wmb(); 1988 1984 inode_state_clear(inode, I_NEW | I_CREATING); 1989 - /* 1990 - * Pairs with the barrier in prepare_to_wait_event() to make sure 1991 - * ___wait_var_event() either sees the bit cleared or 1992 - * waitqueue_active() check in wake_up_var() sees the waiter. 1993 - */ 1994 - smp_mb(); 1995 1985 inode_wake_up_bit(inode, __I_NEW); 1996 1986 spin_unlock(&inode->i_lock); 1997 1987 }
+1 -1
fs/gfs2/glock.c
··· 957 957 ip = NULL; 958 958 spin_unlock(&gl->gl_lockref.lock); 959 959 if (ip) { 960 - wait_on_inode(&ip->i_inode); 960 + wait_on_new_inode(&ip->i_inode); 961 961 if (is_bad_inode(&ip->i_inode)) { 962 962 iput(&ip->i_inode); 963 963 ip = NULL;
+61 -37
fs/inode.c
··· 558 558 } 559 559 EXPORT_SYMBOL(inode_bit_waitqueue); 560 560 561 + void wait_on_new_inode(struct inode *inode) 562 + { 563 + struct wait_bit_queue_entry wqe; 564 + struct wait_queue_head *wq_head; 565 + 566 + spin_lock(&inode->i_lock); 567 + if (!(inode_state_read(inode) & I_NEW)) { 568 + spin_unlock(&inode->i_lock); 569 + return; 570 + } 571 + 572 + wq_head = inode_bit_waitqueue(&wqe, inode, __I_NEW); 573 + for (;;) { 574 + prepare_to_wait_event(wq_head, &wqe.wq_entry, TASK_UNINTERRUPTIBLE); 575 + if (!(inode_state_read(inode) & I_NEW)) 576 + break; 577 + spin_unlock(&inode->i_lock); 578 + schedule(); 579 + spin_lock(&inode->i_lock); 580 + } 581 + finish_wait(wq_head, &wqe.wq_entry); 582 + WARN_ON(inode_state_read(inode) & I_NEW); 583 + spin_unlock(&inode->i_lock); 584 + } 585 + EXPORT_SYMBOL(wait_on_new_inode); 586 + 561 587 /* 562 588 * Add inode to LRU if needed (inode is unused and clean). 563 589 * ··· 1034 1008 static struct inode *find_inode(struct super_block *sb, 1035 1009 struct hlist_head *head, 1036 1010 int (*test)(struct inode *, void *), 1037 - void *data, bool is_inode_hash_locked) 1011 + void *data, bool is_inode_hash_locked, 1012 + bool *isnew) 1038 1013 { 1039 1014 struct inode *inode = NULL; 1040 1015 ··· 1062 1035 return ERR_PTR(-ESTALE); 1063 1036 } 1064 1037 __iget(inode); 1038 + *isnew = !!(inode_state_read(inode) & I_NEW); 1065 1039 spin_unlock(&inode->i_lock); 1066 1040 rcu_read_unlock(); 1067 1041 return inode; ··· 1077 1049 */ 1078 1050 static struct inode *find_inode_fast(struct super_block *sb, 1079 1051 struct hlist_head *head, unsigned long ino, 1080 - bool is_inode_hash_locked) 1052 + bool is_inode_hash_locked, bool *isnew) 1081 1053 { 1082 1054 struct inode *inode = NULL; 1083 1055 ··· 1104 1076 return ERR_PTR(-ESTALE); 1105 1077 } 1106 1078 __iget(inode); 1079 + *isnew = !!(inode_state_read(inode) & I_NEW); 1107 1080 spin_unlock(&inode->i_lock); 1108 1081 rcu_read_unlock(); 1109 1082 return inode; ··· 1210 1181 lockdep_annotate_inode_mutex_key(inode); 1211 1182 spin_lock(&inode->i_lock); 1212 1183 WARN_ON(!(inode_state_read(inode) & I_NEW)); 1213 - /* 1214 - * Pairs with smp_rmb in wait_on_inode(). 1215 - */ 1216 - smp_wmb(); 1217 1184 inode_state_clear(inode, I_NEW | I_CREATING); 1218 - /* 1219 - * Pairs with the barrier in prepare_to_wait_event() to make sure 1220 - * ___wait_var_event() either sees the bit cleared or 1221 - * waitqueue_active() check in wake_up_var() sees the waiter. 1222 - */ 1223 - smp_mb(); 1224 1185 inode_wake_up_bit(inode, __I_NEW); 1225 1186 spin_unlock(&inode->i_lock); 1226 1187 } ··· 1221 1202 lockdep_annotate_inode_mutex_key(inode); 1222 1203 spin_lock(&inode->i_lock); 1223 1204 WARN_ON(!(inode_state_read(inode) & I_NEW)); 1224 - /* 1225 - * Pairs with smp_rmb in wait_on_inode(). 1226 - */ 1227 - smp_wmb(); 1228 1205 inode_state_clear(inode, I_NEW); 1229 - /* 1230 - * Pairs with the barrier in prepare_to_wait_event() to make sure 1231 - * ___wait_var_event() either sees the bit cleared or 1232 - * waitqueue_active() check in wake_up_var() sees the waiter. 1233 - */ 1234 - smp_mb(); 1235 1206 inode_wake_up_bit(inode, __I_NEW); 1236 1207 spin_unlock(&inode->i_lock); 1237 1208 iput(inode); ··· 1277 1268 * @test: callback used for comparisons between inodes 1278 1269 * @set: callback used to initialize a new struct inode 1279 1270 * @data: opaque data pointer to pass to @test and @set 1271 + * @isnew: pointer to a bool which will indicate whether I_NEW is set 1280 1272 * 1281 1273 * Search for the inode specified by @hashval and @data in the inode cache, 1282 1274 * and if present return it with an increased reference count. This is a ··· 1296 1286 { 1297 1287 struct hlist_head *head = inode_hashtable + hash(inode->i_sb, hashval); 1298 1288 struct inode *old; 1289 + bool isnew; 1299 1290 1300 1291 might_sleep(); 1301 1292 1302 1293 again: 1303 1294 spin_lock(&inode_hash_lock); 1304 - old = find_inode(inode->i_sb, head, test, data, true); 1295 + old = find_inode(inode->i_sb, head, test, data, true, &isnew); 1305 1296 if (unlikely(old)) { 1306 1297 /* 1307 1298 * Uhhuh, somebody else created the same inode under us. ··· 1311 1300 spin_unlock(&inode_hash_lock); 1312 1301 if (IS_ERR(old)) 1313 1302 return NULL; 1314 - wait_on_inode(old); 1303 + if (unlikely(isnew)) 1304 + wait_on_new_inode(old); 1315 1305 if (unlikely(inode_unhashed(old))) { 1316 1306 iput(old); 1317 1307 goto again; ··· 1403 1391 { 1404 1392 struct hlist_head *head = inode_hashtable + hash(sb, hashval); 1405 1393 struct inode *inode, *new; 1394 + bool isnew; 1406 1395 1407 1396 might_sleep(); 1408 1397 1409 1398 again: 1410 - inode = find_inode(sb, head, test, data, false); 1399 + inode = find_inode(sb, head, test, data, false, &isnew); 1411 1400 if (inode) { 1412 1401 if (IS_ERR(inode)) 1413 1402 return NULL; 1414 - wait_on_inode(inode); 1403 + if (unlikely(isnew)) 1404 + wait_on_new_inode(inode); 1415 1405 if (unlikely(inode_unhashed(inode))) { 1416 1406 iput(inode); 1417 1407 goto again; ··· 1448 1434 { 1449 1435 struct hlist_head *head = inode_hashtable + hash(sb, ino); 1450 1436 struct inode *inode; 1437 + bool isnew; 1451 1438 1452 1439 might_sleep(); 1453 1440 1454 1441 again: 1455 - inode = find_inode_fast(sb, head, ino, false); 1442 + inode = find_inode_fast(sb, head, ino, false, &isnew); 1456 1443 if (inode) { 1457 1444 if (IS_ERR(inode)) 1458 1445 return NULL; 1459 - wait_on_inode(inode); 1446 + if (unlikely(isnew)) 1447 + wait_on_new_inode(inode); 1460 1448 if (unlikely(inode_unhashed(inode))) { 1461 1449 iput(inode); 1462 1450 goto again; ··· 1472 1456 1473 1457 spin_lock(&inode_hash_lock); 1474 1458 /* We released the lock, so.. */ 1475 - old = find_inode_fast(sb, head, ino, true); 1459 + old = find_inode_fast(sb, head, ino, true, &isnew); 1476 1460 if (!old) { 1477 1461 inode->i_ino = ino; 1478 1462 spin_lock(&inode->i_lock); ··· 1498 1482 if (IS_ERR(old)) 1499 1483 return NULL; 1500 1484 inode = old; 1501 - wait_on_inode(inode); 1485 + if (unlikely(isnew)) 1486 + wait_on_new_inode(inode); 1502 1487 if (unlikely(inode_unhashed(inode))) { 1503 1488 iput(inode); 1504 1489 goto again; ··· 1603 1586 * Note2: @test is called with the inode_hash_lock held, so can't sleep. 1604 1587 */ 1605 1588 struct inode *ilookup5_nowait(struct super_block *sb, unsigned long hashval, 1606 - int (*test)(struct inode *, void *), void *data) 1589 + int (*test)(struct inode *, void *), void *data, bool *isnew) 1607 1590 { 1608 1591 struct hlist_head *head = inode_hashtable + hash(sb, hashval); 1609 1592 struct inode *inode; 1610 1593 1611 1594 spin_lock(&inode_hash_lock); 1612 - inode = find_inode(sb, head, test, data, true); 1595 + inode = find_inode(sb, head, test, data, true, isnew); 1613 1596 spin_unlock(&inode_hash_lock); 1614 1597 1615 1598 return IS_ERR(inode) ? NULL : inode; ··· 1637 1620 int (*test)(struct inode *, void *), void *data) 1638 1621 { 1639 1622 struct inode *inode; 1623 + bool isnew; 1640 1624 1641 1625 might_sleep(); 1642 1626 1643 1627 again: 1644 - inode = ilookup5_nowait(sb, hashval, test, data); 1628 + inode = ilookup5_nowait(sb, hashval, test, data, &isnew); 1645 1629 if (inode) { 1646 - wait_on_inode(inode); 1630 + if (unlikely(isnew)) 1631 + wait_on_new_inode(inode); 1647 1632 if (unlikely(inode_unhashed(inode))) { 1648 1633 iput(inode); 1649 1634 goto again; ··· 1667 1648 { 1668 1649 struct hlist_head *head = inode_hashtable + hash(sb, ino); 1669 1650 struct inode *inode; 1651 + bool isnew; 1670 1652 1671 1653 might_sleep(); 1672 1654 1673 1655 again: 1674 - inode = find_inode_fast(sb, head, ino, false); 1656 + inode = find_inode_fast(sb, head, ino, false, &isnew); 1675 1657 1676 1658 if (inode) { 1677 1659 if (IS_ERR(inode)) 1678 1660 return NULL; 1679 - wait_on_inode(inode); 1661 + if (unlikely(isnew)) 1662 + wait_on_new_inode(inode); 1680 1663 if (unlikely(inode_unhashed(inode))) { 1681 1664 iput(inode); 1682 1665 goto again; ··· 1821 1800 struct super_block *sb = inode->i_sb; 1822 1801 ino_t ino = inode->i_ino; 1823 1802 struct hlist_head *head = inode_hashtable + hash(sb, ino); 1803 + bool isnew; 1824 1804 1825 1805 might_sleep(); 1826 1806 ··· 1854 1832 return -EBUSY; 1855 1833 } 1856 1834 __iget(old); 1835 + isnew = !!(inode_state_read(old) & I_NEW); 1857 1836 spin_unlock(&old->i_lock); 1858 1837 spin_unlock(&inode_hash_lock); 1859 - wait_on_inode(old); 1838 + if (isnew) 1839 + wait_on_new_inode(old); 1860 1840 if (unlikely(!inode_unhashed(old))) { 1861 1841 iput(old); 1862 1842 return -EBUSY;
+2 -10
include/linux/fs.h
··· 1030 1030 hlist_add_fake(&inode->i_hash); 1031 1031 } 1032 1032 1033 - static inline void wait_on_inode(struct inode *inode) 1034 - { 1035 - wait_var_event(inode_state_wait_address(inode, __I_NEW), 1036 - !(inode_state_read_once(inode) & I_NEW)); 1037 - /* 1038 - * Pairs with routines clearing I_NEW. 1039 - */ 1040 - smp_rmb(); 1041 - } 1033 + void wait_on_new_inode(struct inode *inode); 1042 1034 1043 1035 /* 1044 1036 * inode->i_rwsem nesting subclasses for the lock validator: ··· 3409 3417 3410 3418 extern struct inode *ilookup5_nowait(struct super_block *sb, 3411 3419 unsigned long hashval, int (*test)(struct inode *, void *), 3412 - void *data); 3420 + void *data, bool *isnew); 3413 3421 extern struct inode *ilookup5(struct super_block *sb, unsigned long hashval, 3414 3422 int (*test)(struct inode *, void *), void *data); 3415 3423 extern struct inode *ilookup(struct super_block *sb, unsigned long ino);