Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux
1
fork

Configure Feed

Select the types of activity you want to include in your feed.

Merge branch 'for-linus-2' of git://git.kernel.org/pub/scm/linux/kernel/git/viro/vfs

Pull vfs fixes from Al Viro:
"Switch mnt_hash to hlist, turning the races between __lookup_mnt() and
hash modifications into false negatives from __lookup_mnt() (instead
of hangs)"

On the false negatives from __lookup_mnt():
"The *only* thing we care about is not getting stuck in __lookup_mnt().
If it misses an entry because something in front of it just got moved
around, etc, we are fine. We'll notice that mount_lock mismatch and
that'll be it"

* 'for-linus-2' of git://git.kernel.org/pub/scm/linux/kernel/git/viro/vfs:
switch mnt_hash to hlist
don't bother with propagate_mnt() unless the target is shared
keep shadowed vfsmounts together
resizable namespace.c hashes

+134 -77
+2 -2
fs/mount.h
··· 19 19 }; 20 20 21 21 struct mountpoint { 22 - struct list_head m_hash; 22 + struct hlist_node m_hash; 23 23 struct dentry *m_dentry; 24 24 int m_count; 25 25 }; 26 26 27 27 struct mount { 28 - struct list_head mnt_hash; 28 + struct hlist_node mnt_hash; 29 29 struct mount *mnt_parent; 30 30 struct dentry *mnt_mountpoint; 31 31 struct vfsmount mnt;
+115 -62
fs/namespace.c
··· 23 23 #include <linux/uaccess.h> 24 24 #include <linux/proc_ns.h> 25 25 #include <linux/magic.h> 26 + #include <linux/bootmem.h> 26 27 #include "pnode.h" 27 28 #include "internal.h" 28 29 29 - #define HASH_SHIFT ilog2(PAGE_SIZE / sizeof(struct list_head)) 30 - #define HASH_SIZE (1UL << HASH_SHIFT) 30 + static unsigned int m_hash_mask __read_mostly; 31 + static unsigned int m_hash_shift __read_mostly; 32 + static unsigned int mp_hash_mask __read_mostly; 33 + static unsigned int mp_hash_shift __read_mostly; 34 + 35 + static __initdata unsigned long mhash_entries; 36 + static int __init set_mhash_entries(char *str) 37 + { 38 + if (!str) 39 + return 0; 40 + mhash_entries = simple_strtoul(str, &str, 0); 41 + return 1; 42 + } 43 + __setup("mhash_entries=", set_mhash_entries); 44 + 45 + static __initdata unsigned long mphash_entries; 46 + static int __init set_mphash_entries(char *str) 47 + { 48 + if (!str) 49 + return 0; 50 + mphash_entries = simple_strtoul(str, &str, 0); 51 + return 1; 52 + } 53 + __setup("mphash_entries=", set_mphash_entries); 31 54 32 55 static int event; 33 56 static DEFINE_IDA(mnt_id_ida); ··· 59 36 static int mnt_id_start = 0; 60 37 static int mnt_group_start = 1; 61 38 62 - static struct list_head *mount_hashtable __read_mostly; 63 - static struct list_head *mountpoint_hashtable __read_mostly; 39 + static struct hlist_head *mount_hashtable __read_mostly; 40 + static struct hlist_head *mountpoint_hashtable __read_mostly; 64 41 static struct kmem_cache *mnt_cache __read_mostly; 65 42 static DECLARE_RWSEM(namespace_sem); 66 43 ··· 78 55 */ 79 56 __cacheline_aligned_in_smp DEFINE_SEQLOCK(mount_lock); 80 57 81 - static inline unsigned long hash(struct vfsmount *mnt, struct dentry *dentry) 58 + static inline struct hlist_head *m_hash(struct vfsmount *mnt, struct dentry *dentry) 82 59 { 83 60 unsigned long tmp = ((unsigned long)mnt / L1_CACHE_BYTES); 84 61 tmp += ((unsigned long)dentry / L1_CACHE_BYTES); 85 - tmp = tmp + (tmp >> HASH_SHIFT); 86 - return tmp & (HASH_SIZE - 1); 62 + tmp = tmp + (tmp >> m_hash_shift); 63 + return &mount_hashtable[tmp & m_hash_mask]; 64 + } 65 + 66 + static inline struct hlist_head *mp_hash(struct dentry *dentry) 67 + { 68 + unsigned long tmp = ((unsigned long)dentry / L1_CACHE_BYTES); 69 + tmp = tmp + (tmp >> mp_hash_shift); 70 + return &mountpoint_hashtable[tmp & mp_hash_mask]; 87 71 } 88 72 89 73 /* ··· 217 187 mnt->mnt_writers = 0; 218 188 #endif 219 189 220 - INIT_LIST_HEAD(&mnt->mnt_hash); 190 + INIT_HLIST_NODE(&mnt->mnt_hash); 221 191 INIT_LIST_HEAD(&mnt->mnt_child); 222 192 INIT_LIST_HEAD(&mnt->mnt_mounts); 223 193 INIT_LIST_HEAD(&mnt->mnt_list); ··· 605 575 */ 606 576 struct mount *__lookup_mnt(struct vfsmount *mnt, struct dentry *dentry) 607 577 { 608 - struct list_head *head = mount_hashtable + hash(mnt, dentry); 578 + struct hlist_head *head = m_hash(mnt, dentry); 609 579 struct mount *p; 610 580 611 - list_for_each_entry_rcu(p, head, mnt_hash) 581 + hlist_for_each_entry_rcu(p, head, mnt_hash) 612 582 if (&p->mnt_parent->mnt == mnt && p->mnt_mountpoint == dentry) 613 583 return p; 614 584 return NULL; ··· 620 590 */ 621 591 struct mount *__lookup_mnt_last(struct vfsmount *mnt, struct dentry *dentry) 622 592 { 623 - struct list_head *head = mount_hashtable + hash(mnt, dentry); 624 - struct mount *p; 625 - 626 - list_for_each_entry_reverse(p, head, mnt_hash) 627 - if (&p->mnt_parent->mnt == mnt && p->mnt_mountpoint == dentry) 628 - return p; 629 - return NULL; 593 + struct mount *p, *res; 594 + res = p = __lookup_mnt(mnt, dentry); 595 + if (!p) 596 + goto out; 597 + hlist_for_each_entry_continue(p, mnt_hash) { 598 + if (&p->mnt_parent->mnt != mnt || p->mnt_mountpoint != dentry) 599 + break; 600 + res = p; 601 + } 602 + out: 603 + return res; 630 604 } 631 605 632 606 /* ··· 667 633 668 634 static struct mountpoint *new_mountpoint(struct dentry *dentry) 669 635 { 670 - struct list_head *chain = mountpoint_hashtable + hash(NULL, dentry); 636 + struct hlist_head *chain = mp_hash(dentry); 671 637 struct mountpoint *mp; 672 638 int ret; 673 639 674 - list_for_each_entry(mp, chain, m_hash) { 640 + hlist_for_each_entry(mp, chain, m_hash) { 675 641 if (mp->m_dentry == dentry) { 676 642 /* might be worth a WARN_ON() */ 677 643 if (d_unlinked(dentry)) ··· 693 659 694 660 mp->m_dentry = dentry; 695 661 mp->m_count = 1; 696 - list_add(&mp->m_hash, chain); 662 + hlist_add_head(&mp->m_hash, chain); 697 663 return mp; 698 664 } 699 665 ··· 704 670 spin_lock(&dentry->d_lock); 705 671 dentry->d_flags &= ~DCACHE_MOUNTED; 706 672 spin_unlock(&dentry->d_lock); 707 - list_del(&mp->m_hash); 673 + hlist_del(&mp->m_hash); 708 674 kfree(mp); 709 675 } 710 676 } ··· 746 712 mnt->mnt_parent = mnt; 747 713 mnt->mnt_mountpoint = mnt->mnt.mnt_root; 748 714 list_del_init(&mnt->mnt_child); 749 - list_del_init(&mnt->mnt_hash); 715 + hlist_del_init_rcu(&mnt->mnt_hash); 750 716 put_mountpoint(mnt->mnt_mp); 751 717 mnt->mnt_mp = NULL; 752 718 } ··· 773 739 struct mountpoint *mp) 774 740 { 775 741 mnt_set_mountpoint(parent, mp, mnt); 776 - list_add_tail(&mnt->mnt_hash, mount_hashtable + 777 - hash(&parent->mnt, mp->m_dentry)); 742 + hlist_add_head_rcu(&mnt->mnt_hash, m_hash(&parent->mnt, mp->m_dentry)); 778 743 list_add_tail(&mnt->mnt_child, &parent->mnt_mounts); 779 744 } 780 745 781 746 /* 782 747 * vfsmount lock must be held for write 783 748 */ 784 - static void commit_tree(struct mount *mnt) 749 + static void commit_tree(struct mount *mnt, struct mount *shadows) 785 750 { 786 751 struct mount *parent = mnt->mnt_parent; 787 752 struct mount *m; ··· 795 762 796 763 list_splice(&head, n->list.prev); 797 764 798 - list_add_tail(&mnt->mnt_hash, mount_hashtable + 799 - hash(&parent->mnt, mnt->mnt_mountpoint)); 765 + if (shadows) 766 + hlist_add_after_rcu(&shadows->mnt_hash, &mnt->mnt_hash); 767 + else 768 + hlist_add_head_rcu(&mnt->mnt_hash, 769 + m_hash(&parent->mnt, mnt->mnt_mountpoint)); 800 770 list_add_tail(&mnt->mnt_child, &parent->mnt_mounts); 801 771 touch_mnt_namespace(n); 802 772 } ··· 1189 1153 1190 1154 EXPORT_SYMBOL(may_umount); 1191 1155 1192 - static LIST_HEAD(unmounted); /* protected by namespace_sem */ 1156 + static HLIST_HEAD(unmounted); /* protected by namespace_sem */ 1193 1157 1194 1158 static void namespace_unlock(void) 1195 1159 { 1196 1160 struct mount *mnt; 1197 - LIST_HEAD(head); 1161 + struct hlist_head head = unmounted; 1198 1162 1199 - if (likely(list_empty(&unmounted))) { 1163 + if (likely(hlist_empty(&head))) { 1200 1164 up_write(&namespace_sem); 1201 1165 return; 1202 1166 } 1203 1167 1204 - list_splice_init(&unmounted, &head); 1168 + head.first->pprev = &head.first; 1169 + INIT_HLIST_HEAD(&unmounted); 1170 + 1205 1171 up_write(&namespace_sem); 1206 1172 1207 1173 synchronize_rcu(); 1208 1174 1209 - while (!list_empty(&head)) { 1210 - mnt = list_first_entry(&head, struct mount, mnt_hash); 1211 - list_del_init(&mnt->mnt_hash); 1175 + while (!hlist_empty(&head)) { 1176 + mnt = hlist_entry(head.first, struct mount, mnt_hash); 1177 + hlist_del_init(&mnt->mnt_hash); 1212 1178 if (mnt->mnt_ex_mountpoint.mnt) 1213 1179 path_put(&mnt->mnt_ex_mountpoint); 1214 1180 mntput(&mnt->mnt); ··· 1231 1193 */ 1232 1194 void umount_tree(struct mount *mnt, int how) 1233 1195 { 1234 - LIST_HEAD(tmp_list); 1196 + HLIST_HEAD(tmp_list); 1235 1197 struct mount *p; 1198 + struct mount *last = NULL; 1236 1199 1237 - for (p = mnt; p; p = next_mnt(p, mnt)) 1238 - list_move(&p->mnt_hash, &tmp_list); 1200 + for (p = mnt; p; p = next_mnt(p, mnt)) { 1201 + hlist_del_init_rcu(&p->mnt_hash); 1202 + hlist_add_head(&p->mnt_hash, &tmp_list); 1203 + } 1239 1204 1240 1205 if (how) 1241 1206 propagate_umount(&tmp_list); 1242 1207 1243 - list_for_each_entry(p, &tmp_list, mnt_hash) { 1208 + hlist_for_each_entry(p, &tmp_list, mnt_hash) { 1244 1209 list_del_init(&p->mnt_expire); 1245 1210 list_del_init(&p->mnt_list); 1246 1211 __touch_mnt_namespace(p->mnt_ns); ··· 1261 1220 p->mnt_mp = NULL; 1262 1221 } 1263 1222 change_mnt_propagation(p, MS_PRIVATE); 1223 + last = p; 1264 1224 } 1265 - list_splice(&tmp_list, &unmounted); 1225 + if (last) { 1226 + last->mnt_hash.next = unmounted.first; 1227 + unmounted.first = tmp_list.first; 1228 + unmounted.first->pprev = &unmounted.first; 1229 + } 1266 1230 } 1267 1231 1268 1232 static void shrink_submounts(struct mount *mnt); ··· 1651 1605 struct mountpoint *dest_mp, 1652 1606 struct path *parent_path) 1653 1607 { 1654 - LIST_HEAD(tree_list); 1608 + HLIST_HEAD(tree_list); 1655 1609 struct mount *child, *p; 1610 + struct hlist_node *n; 1656 1611 int err; 1657 1612 1658 1613 if (IS_MNT_SHARED(dest_mnt)) { 1659 1614 err = invent_group_ids(source_mnt, true); 1660 1615 if (err) 1661 1616 goto out; 1662 - } 1663 - err = propagate_mnt(dest_mnt, dest_mp, source_mnt, &tree_list); 1664 - if (err) 1665 - goto out_cleanup_ids; 1666 - 1667 - lock_mount_hash(); 1668 - 1669 - if (IS_MNT_SHARED(dest_mnt)) { 1617 + err = propagate_mnt(dest_mnt, dest_mp, source_mnt, &tree_list); 1618 + if (err) 1619 + goto out_cleanup_ids; 1620 + lock_mount_hash(); 1670 1621 for (p = source_mnt; p; p = next_mnt(p, source_mnt)) 1671 1622 set_mnt_shared(p); 1623 + } else { 1624 + lock_mount_hash(); 1672 1625 } 1673 1626 if (parent_path) { 1674 1627 detach_mnt(source_mnt, parent_path); ··· 1675 1630 touch_mnt_namespace(source_mnt->mnt_ns); 1676 1631 } else { 1677 1632 mnt_set_mountpoint(dest_mnt, dest_mp, source_mnt); 1678 - commit_tree(source_mnt); 1633 + commit_tree(source_mnt, NULL); 1679 1634 } 1680 1635 1681 - list_for_each_entry_safe(child, p, &tree_list, mnt_hash) { 1682 - list_del_init(&child->mnt_hash); 1683 - commit_tree(child); 1636 + hlist_for_each_entry_safe(child, n, &tree_list, mnt_hash) { 1637 + struct mount *q; 1638 + hlist_del_init(&child->mnt_hash); 1639 + q = __lookup_mnt_last(&child->mnt_parent->mnt, 1640 + child->mnt_mountpoint); 1641 + commit_tree(child, q); 1684 1642 } 1685 1643 unlock_mount_hash(); 1686 1644 1687 1645 return 0; 1688 1646 1689 1647 out_cleanup_ids: 1690 - if (IS_MNT_SHARED(dest_mnt)) 1691 - cleanup_group_ids(source_mnt, NULL); 1648 + cleanup_group_ids(source_mnt, NULL); 1692 1649 out: 1693 1650 return err; 1694 1651 } ··· 2824 2777 mnt_cache = kmem_cache_create("mnt_cache", sizeof(struct mount), 2825 2778 0, SLAB_HWCACHE_ALIGN | SLAB_PANIC, NULL); 2826 2779 2827 - mount_hashtable = (struct list_head *)__get_free_page(GFP_ATOMIC); 2828 - mountpoint_hashtable = (struct list_head *)__get_free_page(GFP_ATOMIC); 2780 + mount_hashtable = alloc_large_system_hash("Mount-cache", 2781 + sizeof(struct hlist_head), 2782 + mhash_entries, 19, 2783 + 0, 2784 + &m_hash_shift, &m_hash_mask, 0, 0); 2785 + mountpoint_hashtable = alloc_large_system_hash("Mountpoint-cache", 2786 + sizeof(struct hlist_head), 2787 + mphash_entries, 19, 2788 + 0, 2789 + &mp_hash_shift, &mp_hash_mask, 0, 0); 2829 2790 2830 2791 if (!mount_hashtable || !mountpoint_hashtable) 2831 2792 panic("Failed to allocate mount hash table\n"); 2832 2793 2833 - printk(KERN_INFO "Mount-cache hash table entries: %lu\n", HASH_SIZE); 2834 - 2835 - for (u = 0; u < HASH_SIZE; u++) 2836 - INIT_LIST_HEAD(&mount_hashtable[u]); 2837 - for (u = 0; u < HASH_SIZE; u++) 2838 - INIT_LIST_HEAD(&mountpoint_hashtable[u]); 2794 + for (u = 0; u <= m_hash_mask; u++) 2795 + INIT_HLIST_HEAD(&mount_hashtable[u]); 2796 + for (u = 0; u <= mp_hash_mask; u++) 2797 + INIT_HLIST_HEAD(&mountpoint_hashtable[u]); 2839 2798 2840 2799 kernfs_init(); 2841 2800
+15 -11
fs/pnode.c
··· 220 220 * @tree_list : list of heads of trees to be attached. 221 221 */ 222 222 int propagate_mnt(struct mount *dest_mnt, struct mountpoint *dest_mp, 223 - struct mount *source_mnt, struct list_head *tree_list) 223 + struct mount *source_mnt, struct hlist_head *tree_list) 224 224 { 225 225 struct user_namespace *user_ns = current->nsproxy->mnt_ns->user_ns; 226 226 struct mount *m, *child; 227 227 int ret = 0; 228 228 struct mount *prev_dest_mnt = dest_mnt; 229 229 struct mount *prev_src_mnt = source_mnt; 230 - LIST_HEAD(tmp_list); 230 + HLIST_HEAD(tmp_list); 231 231 232 232 for (m = propagation_next(dest_mnt, dest_mnt); m; 233 233 m = propagation_next(m, dest_mnt)) { ··· 246 246 child = copy_tree(source, source->mnt.mnt_root, type); 247 247 if (IS_ERR(child)) { 248 248 ret = PTR_ERR(child); 249 - list_splice(tree_list, tmp_list.prev); 249 + tmp_list = *tree_list; 250 + tmp_list.first->pprev = &tmp_list.first; 251 + INIT_HLIST_HEAD(tree_list); 250 252 goto out; 251 253 } 252 254 253 255 if (is_subdir(dest_mp->m_dentry, m->mnt.mnt_root)) { 254 256 mnt_set_mountpoint(m, dest_mp, child); 255 - list_add_tail(&child->mnt_hash, tree_list); 257 + hlist_add_head(&child->mnt_hash, tree_list); 256 258 } else { 257 259 /* 258 260 * This can happen if the parent mount was bind mounted 259 261 * on some subdirectory of a shared/slave mount. 260 262 */ 261 - list_add_tail(&child->mnt_hash, &tmp_list); 263 + hlist_add_head(&child->mnt_hash, &tmp_list); 262 264 } 263 265 prev_dest_mnt = m; 264 266 prev_src_mnt = child; 265 267 } 266 268 out: 267 269 lock_mount_hash(); 268 - while (!list_empty(&tmp_list)) { 269 - child = list_first_entry(&tmp_list, struct mount, mnt_hash); 270 + while (!hlist_empty(&tmp_list)) { 271 + child = hlist_entry(tmp_list.first, struct mount, mnt_hash); 270 272 umount_tree(child, 0); 271 273 } 272 274 unlock_mount_hash(); ··· 340 338 * umount the child only if the child has no 341 339 * other children 342 340 */ 343 - if (child && list_empty(&child->mnt_mounts)) 344 - list_move_tail(&child->mnt_hash, &mnt->mnt_hash); 341 + if (child && list_empty(&child->mnt_mounts)) { 342 + hlist_del_init_rcu(&child->mnt_hash); 343 + hlist_add_before_rcu(&child->mnt_hash, &mnt->mnt_hash); 344 + } 345 345 } 346 346 } 347 347 ··· 354 350 * 355 351 * vfsmount lock must be held for write 356 352 */ 357 - int propagate_umount(struct list_head *list) 353 + int propagate_umount(struct hlist_head *list) 358 354 { 359 355 struct mount *mnt; 360 356 361 - list_for_each_entry(mnt, list, mnt_hash) 357 + hlist_for_each_entry(mnt, list, mnt_hash) 362 358 __propagate_umount(mnt); 363 359 return 0; 364 360 }
+2 -2
fs/pnode.h
··· 36 36 37 37 void change_mnt_propagation(struct mount *, int); 38 38 int propagate_mnt(struct mount *, struct mountpoint *, struct mount *, 39 - struct list_head *); 40 - int propagate_umount(struct list_head *); 39 + struct hlist_head *); 40 + int propagate_umount(struct hlist_head *); 41 41 int propagate_mount_busy(struct mount *, int); 42 42 void mnt_release_group_id(struct mount *); 43 43 int get_dominating_id(struct mount *mnt, const struct path *root);