Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux
1
fork

Configure Feed

Select the types of activity you want to include in your feed.

Merge tag 'pull-dcache-busy-wait' of git://git.kernel.org/pub/scm/linux/kernel/git/viro/vfs

Pull dcache busy loop updates from Al Viro:
"Fix livelocks in shrink_dcache_tree()

If shrink_dcache_tree() finds a dentry in the middle of being killed
by another thread, it has to wait until the victim finishes dying,
gets detached from the tree and ceases to pin its parent.

The way we used to deal with that amounted to busy-wait;
unfortunately, it's not just inefficient but can lead to reliably
reproducible hard livelocks.

Solved by having shrink_dentry_tree() attach a completion to such
dentry, with dentry_unlist() calling complete() on all objects
attached to it. With a bit of care it can be done without growing
struct dentry or adding overhead in normal case"

* tag 'pull-dcache-busy-wait' of git://git.kernel.org/pub/scm/linux/kernel/git/viro/vfs:
get rid of busy-waiting in shrink_dcache_tree()
dcache.c: more idiomatic "positives are not allowed" sanity checks
struct dentry: make ->d_u anonymous
for_each_alias(): helper macro for iterating through dentries of given inode

+140 -47
+10
Documentation/filesystems/porting.rst
··· 1368 1368 1369 1369 lookup_one_qstr_excl() is no longer exported - use start_creating() or 1370 1370 similar. 1371 + 1371 1372 --- 1372 1373 1373 1374 ** mandatory** ··· 1376 1375 lock_rename(), lock_rename_child(), unlock_rename() are no 1377 1376 longer available. Use start_renaming() or similar. 1378 1377 1378 + --- 1379 + 1380 + **recommended** 1381 + 1382 + If you really need to iterate through dentries for given inode, use 1383 + for_each_alias(dentry, inode) instead of hlist_for_each_entry; better 1384 + yet, see if any of the exported primitives could be used instead of 1385 + the entire loop. You still need to hold ->i_lock of the inode over 1386 + either form of manual loop.
+1 -1
fs/affs/amigaffs.c
··· 126 126 { 127 127 struct dentry *dentry; 128 128 spin_lock(&inode->i_lock); 129 - hlist_for_each_entry(dentry, &inode->i_dentry, d_u.d_alias) { 129 + for_each_alias(dentry, inode) { 130 130 if (entry_ino == (u32)(long)dentry->d_fsdata) { 131 131 dentry->d_fsdata = (void *)(unsigned long)inode->i_ino; 132 132 break;
+2 -2
fs/ceph/mds_client.c
··· 4608 4608 goto out_unlock; 4609 4609 4610 4610 if (S_ISDIR(inode->i_mode)) { 4611 - alias = hlist_entry(inode->i_dentry.first, struct dentry, d_u.d_alias); 4611 + alias = hlist_entry(inode->i_dentry.first, struct dentry, d_alias); 4612 4612 if (!IS_ROOT(alias)) 4613 4613 dn = dget(alias); 4614 4614 goto out_unlock; 4615 4615 } 4616 4616 4617 - hlist_for_each_entry(alias, &inode->i_dentry, d_u.d_alias) { 4617 + for_each_alias(alias, inode) { 4618 4618 spin_lock(&alias->d_lock); 4619 4619 if (!d_unhashed(alias) && 4620 4620 (ceph_dentry(alias)->flags & CEPH_DENTRY_PRIMARY_LINK)) {
+98 -31
fs/dcache.c
··· 40 40 /* 41 41 * Usage: 42 42 * dcache->d_inode->i_lock protects: 43 - * - i_dentry, d_u.d_alias, d_inode of aliases 43 + * - i_dentry, d_alias, d_inode of aliases 44 44 * dcache_hash_bucket lock protects: 45 45 * - the dcache hash table 46 46 * s_roots bl list spinlock protects: ··· 55 55 * - d_unhashed() 56 56 * - d_parent and d_chilren 57 57 * - childrens' d_sib and d_parent 58 - * - d_u.d_alias, d_inode 58 + * - d_alias, d_inode 59 59 * 60 60 * Ordering: 61 61 * dentry->d_inode->i_lock ··· 341 341 342 342 static void __d_free(struct rcu_head *head) 343 343 { 344 - struct dentry *dentry = container_of(head, struct dentry, d_u.d_rcu); 344 + struct dentry *dentry = container_of(head, struct dentry, d_rcu); 345 345 346 346 kmem_cache_free(dentry_cache, dentry); 347 347 } 348 348 349 349 static void __d_free_external(struct rcu_head *head) 350 350 { 351 - struct dentry *dentry = container_of(head, struct dentry, d_u.d_rcu); 351 + struct dentry *dentry = container_of(head, struct dentry, d_rcu); 352 352 kfree(external_name(dentry)); 353 353 kmem_cache_free(dentry_cache, dentry); 354 354 } ··· 428 428 429 429 static void dentry_free(struct dentry *dentry) 430 430 { 431 - WARN_ON(!hlist_unhashed(&dentry->d_u.d_alias)); 431 + WARN_ON(d_really_is_positive(dentry)); 432 432 if (unlikely(dname_external(dentry))) { 433 433 struct external_name *p = external_name(dentry); 434 434 if (likely(atomic_dec_and_test(&p->count))) { 435 - call_rcu(&dentry->d_u.d_rcu, __d_free_external); 435 + call_rcu(&dentry->d_rcu, __d_free_external); 436 436 return; 437 437 } 438 438 } 439 439 /* if dentry was never visible to RCU, immediate free is OK */ 440 440 if (dentry->d_flags & DCACHE_NORCU) 441 - __d_free(&dentry->d_u.d_rcu); 441 + __d_free(&dentry->d_rcu); 442 442 else 443 - call_rcu(&dentry->d_u.d_rcu, __d_free); 443 + call_rcu(&dentry->d_rcu, __d_free); 444 444 } 445 445 446 446 /* ··· 455 455 456 456 raw_write_seqcount_begin(&dentry->d_seq); 457 457 __d_clear_type_and_inode(dentry); 458 - hlist_del_init(&dentry->d_u.d_alias); 458 + hlist_del_init(&dentry->d_alias); 459 + /* 460 + * dentry becomes negative, so the space occupied by ->d_alias 461 + * belongs to ->waiters now; we could use __hlist_del() instead 462 + * of hlist_del_init(), if not for the stunt pulled by nfs 463 + * dummy root dentries - positive dentry *not* included into 464 + * the alias list of its inode. Open-coding hlist_del_init() 465 + * and removing zeroing would be too clumsy... 466 + */ 467 + dentry->waiters = NULL; 459 468 raw_write_seqcount_end(&dentry->d_seq); 460 469 spin_unlock(&dentry->d_lock); 461 470 spin_unlock(&inode->i_lock); ··· 614 605 } 615 606 EXPORT_SYMBOL(d_drop); 616 607 608 + struct completion_list { 609 + struct completion_list *next; 610 + struct completion completion; 611 + }; 612 + 613 + /* 614 + * shrink_dcache_tree() needs to be notified when dentry in process of 615 + * being evicted finally gets unlisted. Such dentries are 616 + * already with negative ->d_count 617 + * already negative 618 + * already not in in-lookup hash 619 + * reachable only via ->d_sib. 620 + * 621 + * Use ->waiters for a single-linked list of struct completion_list of 622 + * waiters. 623 + */ 624 + static inline void d_add_waiter(struct dentry *dentry, struct completion_list *p) 625 + { 626 + struct completion_list *v = dentry->waiters; 627 + init_completion(&p->completion); 628 + p->next = v; 629 + dentry->waiters = p; 630 + } 631 + 632 + static inline void d_complete_waiters(struct dentry *dentry) 633 + { 634 + struct completion_list *v = dentry->waiters; 635 + if (unlikely(v)) { 636 + /* some shrink_dcache_tree() instances are waiting */ 637 + dentry->waiters = NULL; 638 + while (v) { 639 + struct completion *r = &v->completion; 640 + v = v->next; 641 + complete(r); 642 + } 643 + } 644 + } 645 + 617 646 static inline void dentry_unlist(struct dentry *dentry) 618 647 { 619 648 struct dentry *next; ··· 660 613 * attached to the dentry tree 661 614 */ 662 615 dentry->d_flags |= DCACHE_DENTRY_KILLED; 616 + d_complete_waiters(dentry); 663 617 if (unlikely(hlist_unhashed(&dentry->d_sib))) 664 618 return; 665 619 __hlist_del(&dentry->d_sib); ··· 838 790 struct dentry *de; 839 791 840 792 spin_lock(&inode->i_lock); 841 - hlist_for_each_entry(de, &inode->i_dentry, d_u.d_alias) { 793 + for_each_alias(de, inode) { 842 794 spin_lock(&de->d_lock); 843 795 de->d_flags |= DCACHE_DONTCACHE; 844 796 spin_unlock(&de->d_lock); ··· 1058 1010 1059 1011 if (hlist_empty(&inode->i_dentry)) 1060 1012 return NULL; 1061 - alias = hlist_entry(inode->i_dentry.first, struct dentry, d_u.d_alias); 1013 + alias = hlist_entry(inode->i_dentry.first, struct dentry, d_alias); 1062 1014 lockref_get(&alias->d_lockref); 1063 1015 return alias; 1064 1016 } ··· 1088 1040 if (S_ISDIR(inode->i_mode)) 1089 1041 return __d_find_any_alias(inode); 1090 1042 1091 - hlist_for_each_entry(alias, &inode->i_dentry, d_u.d_alias) { 1043 + for_each_alias(alias, inode) { 1092 1044 spin_lock(&alias->d_lock); 1093 1045 if (!d_unhashed(alias)) { 1094 1046 dget_dlock(alias); ··· 1141 1093 // used without having I_FREEING set, which means no aliases left 1142 1094 if (likely(!(inode_state_read(inode) & I_FREEING) && !hlist_empty(l))) { 1143 1095 if (S_ISDIR(inode->i_mode)) { 1144 - de = hlist_entry(l->first, struct dentry, d_u.d_alias); 1096 + de = hlist_entry(l->first, struct dentry, d_alias); 1145 1097 } else { 1146 - hlist_for_each_entry(de, l, d_u.d_alias) 1098 + hlist_for_each_entry(de, l, d_alias) 1147 1099 if (!d_unhashed(de)) 1148 1100 break; 1149 1101 } ··· 1181 1133 struct dentry *dentry; 1182 1134 1183 1135 spin_lock(&inode->i_lock); 1184 - hlist_for_each_entry(dentry, &inode->i_dentry, d_u.d_alias) 1136 + for_each_alias(dentry, inode) 1185 1137 d_dispose_if_unused(dentry, &dispose); 1186 1138 spin_unlock(&inode->i_lock); 1187 1139 shrink_dentry_list(&dispose); ··· 1617 1569 return D_WALK_QUIT; 1618 1570 } 1619 1571 to_shrink_list(dentry, &data->dispose); 1572 + } else if (dentry->d_lockref.count < 0) { 1573 + rcu_read_lock(); 1574 + data->victim = dentry; 1575 + return D_WALK_QUIT; 1620 1576 } 1621 1577 /* 1622 1578 * We can return to the caller if we have found some (this ··· 1660 1608 data.victim = NULL; 1661 1609 d_walk(parent, &data, select_collect2); 1662 1610 if (data.victim) { 1663 - spin_lock(&data.victim->d_lock); 1664 - if (!lock_for_kill(data.victim)) { 1665 - spin_unlock(&data.victim->d_lock); 1611 + struct dentry *v = data.victim; 1612 + 1613 + spin_lock(&v->d_lock); 1614 + if (v->d_lockref.count < 0 && 1615 + !(v->d_flags & DCACHE_DENTRY_KILLED)) { 1616 + struct completion_list wait; 1617 + // It's busy dying; have it notify us once 1618 + // it becomes invisible to d_walk(). 1619 + d_add_waiter(v, &wait); 1620 + spin_unlock(&v->d_lock); 1621 + rcu_read_unlock(); 1622 + if (!list_empty(&data.dispose)) 1623 + shrink_dentry_list(&data.dispose); 1624 + wait_for_completion(&wait.completion); 1625 + continue; 1626 + } 1627 + if (!lock_for_kill(v)) { 1628 + spin_unlock(&v->d_lock); 1666 1629 rcu_read_unlock(); 1667 1630 } else { 1668 - shrink_kill(data.victim); 1631 + shrink_kill(v); 1669 1632 } 1670 1633 } 1671 1634 if (!list_empty(&data.dispose)) ··· 1854 1787 INIT_HLIST_BL_NODE(&dentry->d_hash); 1855 1788 INIT_LIST_HEAD(&dentry->d_lru); 1856 1789 INIT_HLIST_HEAD(&dentry->d_children); 1857 - INIT_HLIST_NODE(&dentry->d_u.d_alias); 1790 + dentry->waiters = NULL; 1858 1791 INIT_HLIST_NODE(&dentry->d_sib); 1859 1792 1860 1793 if (dentry->d_op && dentry->d_op->d_init) { ··· 2047 1980 if ((dentry->d_flags & 2048 1981 (DCACHE_LRU_LIST|DCACHE_SHRINK_LIST)) == DCACHE_LRU_LIST) 2049 1982 this_cpu_dec(nr_dentry_negative); 2050 - hlist_add_head(&dentry->d_u.d_alias, &inode->i_dentry); 1983 + hlist_add_head(&dentry->d_alias, &inode->i_dentry); 2051 1984 raw_write_seqcount_begin(&dentry->d_seq); 2052 1985 __d_set_inode_and_type(dentry, inode, add_flags); 2053 1986 raw_write_seqcount_end(&dentry->d_seq); ··· 2071 2004 2072 2005 void d_instantiate(struct dentry *entry, struct inode * inode) 2073 2006 { 2074 - BUG_ON(!hlist_unhashed(&entry->d_u.d_alias)); 2007 + BUG_ON(d_really_is_positive(entry)); 2075 2008 if (inode) { 2076 2009 security_d_instantiate(entry, inode); 2077 2010 spin_lock(&inode->i_lock); ··· 2091 2024 */ 2092 2025 void d_instantiate_new(struct dentry *entry, struct inode *inode) 2093 2026 { 2094 - BUG_ON(!hlist_unhashed(&entry->d_u.d_alias)); 2027 + BUG_ON(d_really_is_positive(entry)); 2095 2028 BUG_ON(!inode); 2096 2029 lockdep_annotate_inode_mutex_key(inode); 2097 2030 security_d_instantiate(entry, inode); ··· 2154 2087 2155 2088 spin_lock(&new->d_lock); 2156 2089 __d_set_inode_and_type(new, inode, add_flags); 2157 - hlist_add_head(&new->d_u.d_alias, &inode->i_dentry); 2090 + hlist_add_head(&new->d_alias, &inode->i_dentry); 2158 2091 if (!disconnected) { 2159 2092 hlist_bl_lock(&sb->s_roots); 2160 2093 hlist_bl_add_head(&new->d_hash, &sb->s_roots); ··· 2725 2658 * we unlock the chain. All fields are stable in everything 2726 2659 * we encounter. 2727 2660 */ 2728 - hlist_bl_for_each_entry(dentry, node, b, d_u.d_in_lookup_hash) { 2661 + hlist_bl_for_each_entry(dentry, node, b, d_in_lookup_hash) { 2729 2662 if (dentry->d_name.hash != hash) 2730 2663 continue; 2731 2664 if (dentry->d_parent != parent) ··· 2767 2700 } 2768 2701 rcu_read_unlock(); 2769 2702 new->d_wait = wq; 2770 - hlist_bl_add_head(&new->d_u.d_in_lookup_hash, b); 2703 + hlist_bl_add_head(&new->d_in_lookup_hash, b); 2771 2704 hlist_bl_unlock(b); 2772 2705 return new; 2773 2706 mismatch: ··· 2792 2725 b = in_lookup_hash(dentry->d_parent, dentry->d_name.hash); 2793 2726 hlist_bl_lock(b); 2794 2727 dentry->d_flags &= ~DCACHE_PAR_LOOKUP; 2795 - __hlist_bl_del(&dentry->d_u.d_in_lookup_hash); 2728 + __hlist_bl_del(&dentry->d_in_lookup_hash); 2796 2729 d_wait = dentry->d_wait; 2797 2730 dentry->d_wait = NULL; 2798 2731 hlist_bl_unlock(b); 2799 - INIT_HLIST_NODE(&dentry->d_u.d_alias); 2732 + dentry->waiters = NULL; 2800 2733 INIT_LIST_HEAD(&dentry->d_lru); 2801 2734 return d_wait; 2802 2735 } ··· 2827 2760 d_set_d_op(dentry, ops); 2828 2761 if (inode) { 2829 2762 unsigned add_flags = d_flags_for_inode(inode); 2830 - hlist_add_head(&dentry->d_u.d_alias, &inode->i_dentry); 2763 + hlist_add_head(&dentry->d_alias, &inode->i_dentry); 2831 2764 raw_write_seqcount_begin(&dentry->d_seq); 2832 2765 __d_set_inode_and_type(dentry, inode, add_flags); 2833 2766 raw_write_seqcount_end(&dentry->d_seq); ··· 2862 2795 2863 2796 struct dentry *d_make_persistent(struct dentry *dentry, struct inode *inode) 2864 2797 { 2865 - WARN_ON(!hlist_unhashed(&dentry->d_u.d_alias)); 2798 + WARN_ON(d_really_is_positive(dentry)); 2866 2799 WARN_ON(!inode); 2867 2800 security_d_instantiate(dentry, inode); 2868 2801 spin_lock(&inode->i_lock); ··· 3252 3185 struct dentry *dentry = file->f_path.dentry; 3253 3186 3254 3187 BUG_ON(dname_external(dentry) || 3255 - !hlist_unhashed(&dentry->d_u.d_alias) || 3188 + d_really_is_positive(dentry) || 3256 3189 !d_unlinked(dentry)); 3257 3190 spin_lock(&dentry->d_parent->d_lock); 3258 3191 spin_lock_nested(&dentry->d_lock, DENTRY_D_LOCK_NESTED);
+1 -1
fs/exportfs/expfs.c
··· 52 52 53 53 inode = result->d_inode; 54 54 spin_lock(&inode->i_lock); 55 - hlist_for_each_entry(dentry, &inode->i_dentry, d_u.d_alias) { 55 + for_each_alias(dentry, inode) { 56 56 dget(dentry); 57 57 spin_unlock(&inode->i_lock); 58 58 if (toput)
+1 -1
fs/inode.c
··· 750 750 return; 751 751 } 752 752 753 - dentry_ptr = container_of(dentry_first, struct dentry, d_u.d_alias); 753 + dentry_ptr = container_of(dentry_first, struct dentry, d_alias); 754 754 if (get_kernel_nofault(dentry, dentry_ptr) || 755 755 !dentry.d_parent || !dentry.d_name.name) { 756 756 pr_warn("aops:%ps ino:%llx invalid dentry:%px\n",
+2 -2
fs/nfs/dir.c
··· 1470 1470 struct dentry *alias; 1471 1471 struct inode *dir; 1472 1472 1473 - hlist_for_each_entry(alias, &inode->i_dentry, d_u.d_alias) { 1473 + for_each_alias(alias, inode) { 1474 1474 spin_lock(&alias->d_lock); 1475 1475 dir = d_inode_rcu(alias->d_parent); 1476 1476 if (!dir || ··· 1489 1489 if (hlist_empty(&dir->i_dentry)) 1490 1490 return; 1491 1491 this_parent = 1492 - hlist_entry(dir->i_dentry.first, struct dentry, d_u.d_alias); 1492 + hlist_entry(dir->i_dentry.first, struct dentry, d_alias); 1493 1493 1494 1494 spin_lock(&this_parent->d_lock); 1495 1495 nfs_unset_verifier_delegated(&this_parent->d_time);
+1 -1
fs/nfs/getroot.c
··· 54 54 */ 55 55 spin_lock(&d_inode(sb->s_root)->i_lock); 56 56 spin_lock(&sb->s_root->d_lock); 57 - hlist_del_init(&sb->s_root->d_u.d_alias); 57 + hlist_del_init(&sb->s_root->d_alias); 58 58 spin_unlock(&sb->s_root->d_lock); 59 59 spin_unlock(&d_inode(sb->s_root)->i_lock); 60 60 }
+1 -1
fs/notify/fsnotify.c
··· 76 76 spin_lock(&inode->i_lock); 77 77 /* run all of the dentries associated with this inode. Since this is a 78 78 * directory, there damn well better only be one item on this list */ 79 - hlist_for_each_entry(alias, &inode->i_dentry, d_u.d_alias) { 79 + for_each_alias(alias, inode) { 80 80 struct dentry *child; 81 81 82 82 /* run all of the children of the original inode and fix their
+1 -1
fs/ocfs2/dcache.c
··· 145 145 struct dentry *dentry; 146 146 147 147 spin_lock(&inode->i_lock); 148 - hlist_for_each_entry(dentry, &inode->i_dentry, d_u.d_alias) { 148 + for_each_alias(dentry, inode) { 149 149 spin_lock(&dentry->d_lock); 150 150 if (ocfs2_match_dentry(dentry, parent_blkno, skip_unhashed)) { 151 151 trace_ocfs2_find_local_alias(dentry->d_name.len,
+1 -1
fs/overlayfs/dir.c
··· 919 919 920 920 /* Try to find another, hashed alias */ 921 921 spin_lock(&inode->i_lock); 922 - hlist_for_each_entry(alias, &inode->i_dentry, d_u.d_alias) { 922 + for_each_alias(alias, inode) { 923 923 if (alias != dentry && !d_unhashed(alias)) 924 924 break; 925 925 }
+1 -1
fs/smb/client/inode.c
··· 1595 1595 struct dentry *dentry; 1596 1596 1597 1597 spin_lock(&inode->i_lock); 1598 - hlist_for_each_entry(dentry, &inode->i_dentry, d_u.d_alias) { 1598 + for_each_alias(dentry, inode) { 1599 1599 if (!d_unhashed(dentry) || IS_ROOT(dentry)) { 1600 1600 spin_unlock(&inode->i_lock); 1601 1601 return true;
+20 -4
include/linux/dcache.h
··· 88 88 89 89 #define d_lock d_lockref.lock 90 90 #define d_iname d_shortname.string 91 + struct completion_list; 91 92 92 93 struct dentry { 93 94 /* RCU lookup touched fields */ ··· 123 122 struct hlist_node d_sib; /* child of parent list */ 124 123 struct hlist_head d_children; /* our children */ 125 124 /* 126 - * d_alias and d_rcu can share memory 125 + * the following members can share memory - their uses are 126 + * mutually exclusive. 127 127 */ 128 128 union { 129 - struct hlist_node d_alias; /* inode alias list */ 130 - struct hlist_bl_node d_in_lookup_hash; /* only for in-lookup ones */ 129 + /* positives: inode alias list */ 130 + struct hlist_node d_alias; 131 + /* in-lookup ones (all negative, live): hash chain */ 132 + struct hlist_bl_node d_in_lookup_hash; 133 + /* killed ones: (already negative) used to schedule freeing */ 131 134 struct rcu_head d_rcu; 132 - } d_u; 135 + /* 136 + * live non-in-lookup negatives: used if shrink_dcache_tree() 137 + * races with eviction by another thread and needs to wait for 138 + * this dentry to get killed . Remains NULL for almost all 139 + * negative dentries. 140 + */ 141 + struct completion_list *waiters; 142 + }; 133 143 }; 134 144 135 145 /* ··· 627 615 void set_default_d_op(struct super_block *, const struct dentry_operations *); 628 616 struct dentry *d_make_persistent(struct dentry *, struct inode *); 629 617 void d_make_discardable(struct dentry *dentry); 618 + 619 + /* inode->i_lock must be held over that */ 620 + #define for_each_alias(dentry, inode) \ 621 + hlist_for_each_entry(dentry, &(inode)->i_dentry, d_alias) 630 622 631 623 #endif /* __LINUX_DCACHE_H */