Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux
1
fork

Configure Feed

Select the types of activity you want to include in your feed.

mnt: support ns lookup

Move the mount namespace to the generic ns lookup infrastructure.
This allows us to drop a bunch of members from struct mnt_namespace.

Signed-off-by: Christian Brauner <brauner@kernel.org>

+33 -120
+2 -8
fs/mount.h
··· 17 17 }; 18 18 struct user_namespace *user_ns; 19 19 struct ucounts *ucounts; 20 - u64 seq; /* Sequence number to prevent loops */ 21 - union { 22 - wait_queue_head_t poll; 23 - struct rcu_head mnt_ns_rcu; 24 - }; 20 + wait_queue_head_t poll; 25 21 u64 seq_origin; /* Sequence number of origin mount namespace */ 26 22 u64 event; 27 23 #ifdef CONFIG_FSNOTIFY ··· 26 30 #endif 27 31 unsigned int nr_mounts; /* # of mounts in the namespace */ 28 32 unsigned int pending_mounts; 29 - struct rb_node mnt_ns_tree_node; /* node in the mnt_ns_tree */ 30 - struct list_head mnt_ns_list; /* entry in the sequential list of mounts namespace */ 31 33 refcount_t passive; /* number references not pinning @mounts */ 32 34 } __randomize_layout; 33 35 ··· 167 173 168 174 static inline bool is_anon_ns(struct mnt_namespace *ns) 169 175 { 170 - return ns->seq == 0; 176 + return ns->ns.ns_id == 0; 171 177 } 172 178 173 179 static inline bool anon_ns_root(const struct mount *m)
+29 -110
fs/namespace.c
··· 33 33 #include <linux/shmem_fs.h> 34 34 #include <linux/mnt_idmapping.h> 35 35 #include <linux/pidfs.h> 36 + #include <linux/nstree.h> 36 37 37 38 #include "pnode.h" 38 39 #include "internal.h" ··· 81 80 static HLIST_HEAD(unmounted); /* protected by namespace_sem */ 82 81 static LIST_HEAD(ex_mountpoints); /* protected by namespace_sem */ 83 82 static struct mnt_namespace *emptied_ns; /* protected by namespace_sem */ 84 - static DEFINE_SEQLOCK(mnt_ns_tree_lock); 85 83 86 84 #ifdef CONFIG_FSNOTIFY 87 85 LIST_HEAD(notify_list); /* protected by namespace_sem */ 88 86 #endif 89 - static struct rb_root mnt_ns_tree = RB_ROOT; /* protected by mnt_ns_tree_lock */ 90 - static LIST_HEAD(mnt_ns_list); /* protected by mnt_ns_tree_lock */ 91 87 92 88 enum mount_kattr_flags_t { 93 89 MOUNT_KATTR_RECURSE = (1 << 0), ··· 117 119 118 120 static inline struct mnt_namespace *node_to_mnt_ns(const struct rb_node *node) 119 121 { 122 + struct ns_common *ns; 123 + 120 124 if (!node) 121 125 return NULL; 122 - return rb_entry(node, struct mnt_namespace, mnt_ns_tree_node); 123 - } 124 - 125 - static int mnt_ns_cmp(struct rb_node *a, const struct rb_node *b) 126 - { 127 - struct mnt_namespace *ns_a = node_to_mnt_ns(a); 128 - struct mnt_namespace *ns_b = node_to_mnt_ns(b); 129 - u64 seq_a = ns_a->seq; 130 - u64 seq_b = ns_b->seq; 131 - 132 - if (seq_a < seq_b) 133 - return -1; 134 - if (seq_a > seq_b) 135 - return 1; 136 - return 0; 137 - } 138 - 139 - static inline void mnt_ns_tree_write_lock(void) 140 - { 141 - write_seqlock(&mnt_ns_tree_lock); 142 - } 143 - 144 - static inline void mnt_ns_tree_write_unlock(void) 145 - { 146 - write_sequnlock(&mnt_ns_tree_lock); 147 - } 148 - 149 - static void mnt_ns_tree_add(struct mnt_namespace *ns) 150 - { 151 - struct rb_node *node, *prev; 152 - 153 - mnt_ns_tree_write_lock(); 154 - node = rb_find_add_rcu(&ns->mnt_ns_tree_node, &mnt_ns_tree, mnt_ns_cmp); 155 - /* 156 - * If there's no previous entry simply add it after the 157 - * head and if there is add it after the previous entry. 158 - */ 159 - prev = rb_prev(&ns->mnt_ns_tree_node); 160 - if (!prev) 161 - list_add_rcu(&ns->mnt_ns_list, &mnt_ns_list); 162 - else 163 - list_add_rcu(&ns->mnt_ns_list, &node_to_mnt_ns(prev)->mnt_ns_list); 164 - mnt_ns_tree_write_unlock(); 165 - 166 - WARN_ON_ONCE(node); 126 + ns = rb_entry(node, struct ns_common, ns_tree_node); 127 + return container_of(ns, struct mnt_namespace, ns); 167 128 } 168 129 169 130 static void mnt_ns_release(struct mnt_namespace *ns) ··· 138 181 139 182 static void mnt_ns_release_rcu(struct rcu_head *rcu) 140 183 { 141 - mnt_ns_release(container_of(rcu, struct mnt_namespace, mnt_ns_rcu)); 184 + mnt_ns_release(container_of(rcu, struct mnt_namespace, ns.ns_rcu)); 142 185 } 143 186 144 187 static void mnt_ns_tree_remove(struct mnt_namespace *ns) 145 188 { 146 189 /* remove from global mount namespace list */ 147 - if (!RB_EMPTY_NODE(&ns->mnt_ns_tree_node)) { 148 - mnt_ns_tree_write_lock(); 149 - rb_erase(&ns->mnt_ns_tree_node, &mnt_ns_tree); 150 - list_bidir_del_rcu(&ns->mnt_ns_list); 151 - mnt_ns_tree_write_unlock(); 152 - } 190 + if (ns_tree_active(ns)) 191 + ns_tree_remove(ns); 153 192 154 - call_rcu(&ns->mnt_ns_rcu, mnt_ns_release_rcu); 155 - } 156 - 157 - static int mnt_ns_find(const void *key, const struct rb_node *node) 158 - { 159 - const u64 mnt_ns_id = *(u64 *)key; 160 - const struct mnt_namespace *ns = node_to_mnt_ns(node); 161 - 162 - if (mnt_ns_id < ns->seq) 163 - return -1; 164 - if (mnt_ns_id > ns->seq) 165 - return 1; 166 - return 0; 193 + call_rcu(&ns->ns.ns_rcu, mnt_ns_release_rcu); 167 194 } 168 195 169 196 /* ··· 166 225 */ 167 226 static struct mnt_namespace *lookup_mnt_ns(u64 mnt_ns_id) 168 227 { 169 - struct mnt_namespace *ns; 170 - struct rb_node *node; 171 - unsigned int seq; 228 + struct mnt_namespace *mnt_ns; 229 + struct ns_common *ns; 172 230 173 231 guard(rcu)(); 174 - do { 175 - seq = read_seqbegin(&mnt_ns_tree_lock); 176 - node = rb_find_rcu(&mnt_ns_id, &mnt_ns_tree, mnt_ns_find); 177 - if (node) 178 - break; 179 - } while (read_seqretry(&mnt_ns_tree_lock, seq)); 180 - 181 - if (!node) 232 + ns = ns_tree_lookup_rcu(mnt_ns_id, CLONE_NEWNS); 233 + if (!ns) 182 234 return NULL; 183 235 184 236 /* 185 237 * The last reference count is put with RCU delay so we can 186 238 * unconditonally acquire a reference here. 187 239 */ 188 - ns = node_to_mnt_ns(node); 189 - refcount_inc(&ns->passive); 190 - return ns; 240 + mnt_ns = container_of(ns, struct mnt_namespace, ns); 241 + refcount_inc(&mnt_ns->passive); 242 + return mnt_ns; 191 243 } 192 244 193 245 static inline void lock_mount_hash(void) ··· 951 1017 return false; 952 1018 953 1019 seq = mnt->mnt_ns->seq_origin; 954 - return !seq || (seq == current->nsproxy->mnt_ns->seq); 1020 + return !seq || (seq == current->nsproxy->mnt_ns->ns.ns_id); 955 1021 } 956 1022 957 1023 /* ··· 2086 2152 2087 2153 struct mnt_namespace *get_sequential_mnt_ns(struct mnt_namespace *mntns, bool previous) 2088 2154 { 2155 + struct ns_common *ns; 2156 + 2089 2157 guard(rcu)(); 2090 2158 2091 2159 for (;;) { 2092 - struct list_head *list; 2160 + ns = ns_tree_adjoined_rcu(mntns, previous); 2161 + if (IS_ERR(ns)) 2162 + return ERR_CAST(ns); 2093 2163 2094 - if (previous) 2095 - list = rcu_dereference(list_bidir_prev_rcu(&mntns->mnt_ns_list)); 2096 - else 2097 - list = rcu_dereference(list_next_rcu(&mntns->mnt_ns_list)); 2098 - if (list_is_head(list, &mnt_ns_list)) 2099 - return ERR_PTR(-ENOENT); 2100 - 2101 - mntns = list_entry_rcu(list, struct mnt_namespace, mnt_ns_list); 2164 + mntns = to_mnt_ns(ns); 2102 2165 2103 2166 /* 2104 2167 * The last passive reference count is put with RCU ··· 2135 2204 if (!mnt_ns) 2136 2205 return false; 2137 2206 2138 - return current->nsproxy->mnt_ns->seq >= mnt_ns->seq; 2207 + return current->nsproxy->mnt_ns->ns.ns_id >= mnt_ns->ns.ns_id; 2139 2208 } 2140 2209 2141 2210 struct mount *copy_tree(struct mount *src_root, struct dentry *dentry, ··· 3011 3080 if (is_anon_ns(src_mnt_ns)) 3012 3081 ns->seq_origin = src_mnt_ns->seq_origin; 3013 3082 else 3014 - ns->seq_origin = src_mnt_ns->seq; 3083 + ns->seq_origin = src_mnt_ns->ns.ns_id; 3015 3084 } 3016 3085 3017 3086 mnt = __do_loopback(path, recursive); ··· 4087 4156 mnt_ns_tree_remove(ns); 4088 4157 } 4089 4158 4090 - /* 4091 - * Assign a sequence number so we can detect when we attempt to bind 4092 - * mount a reference to an older mount namespace into the current 4093 - * mount namespace, preventing reference counting loops. A 64bit 4094 - * number incrementing at 10Ghz will take 12,427 years to wrap which 4095 - * is effectively never, so we can ignore the possibility. 4096 - */ 4097 - static atomic64_t mnt_ns_seq = ATOMIC64_INIT(1); 4098 - 4099 4159 static struct mnt_namespace *alloc_mnt_ns(struct user_namespace *user_ns, bool anon) 4100 4160 { 4101 4161 struct mnt_namespace *new_ns; ··· 4110 4188 return ERR_PTR(ret); 4111 4189 } 4112 4190 if (!anon) 4113 - new_ns->seq = atomic64_inc_return(&mnt_ns_seq); 4191 + ns_tree_gen_id(&new_ns->ns); 4114 4192 refcount_set(&new_ns->passive, 1); 4115 4193 new_ns->mounts = RB_ROOT; 4116 - INIT_LIST_HEAD(&new_ns->mnt_ns_list); 4117 - RB_CLEAR_NODE(&new_ns->mnt_ns_tree_node); 4118 4194 init_waitqueue_head(&new_ns->poll); 4119 4195 new_ns->user_ns = get_user_ns(user_ns); 4120 4196 new_ns->ucounts = ucounts; ··· 4198 4278 if (pwdmnt) 4199 4279 mntput(pwdmnt); 4200 4280 4201 - mnt_ns_tree_add(new_ns); 4281 + ns_tree_add_raw(new_ns); 4202 4282 return new_ns; 4203 4283 } 4204 4284 ··· 5317 5397 static void statmount_mnt_ns_id(struct kstatmount *s, struct mnt_namespace *ns) 5318 5398 { 5319 5399 s->sm.mask |= STATMOUNT_MNT_NS_ID; 5320 - s->sm.mnt_ns_id = ns->seq; 5400 + s->sm.mnt_ns_id = ns->ns.ns_id; 5321 5401 } 5322 5402 5323 5403 static int statmount_mnt_opts(struct kstatmount *s, struct seq_file *seq) ··· 6022 6102 ns = alloc_mnt_ns(&init_user_ns, true); 6023 6103 if (IS_ERR(ns)) 6024 6104 panic("Can't allocate initial namespace"); 6025 - ns->seq = atomic64_inc_return(&mnt_ns_seq); 6026 6105 ns->ns.inum = PROC_MNT_INIT_INO; 6027 6106 m = real_mount(mnt); 6028 6107 ns->root = m; ··· 6036 6117 set_fs_pwd(current->fs, &root); 6037 6118 set_fs_root(current->fs, &root); 6038 6119 6039 - mnt_ns_tree_add(ns); 6120 + ns_tree_add(ns); 6040 6121 } 6041 6122 6042 6123 void __init mnt_init(void)
+2 -2
fs/nsfs.c
··· 139 139 * the size value will be set to the size the kernel knows about. 140 140 */ 141 141 kinfo->size = min(usize, sizeof(*kinfo)); 142 - kinfo->mnt_ns_id = mnt_ns->seq; 142 + kinfo->mnt_ns_id = mnt_ns->ns.ns_id; 143 143 kinfo->nr_mounts = READ_ONCE(mnt_ns->nr_mounts); 144 144 /* Subtract the root mount of the mount namespace. */ 145 145 if (kinfo->nr_mounts) ··· 221 221 222 222 mnt_ns = container_of(ns, struct mnt_namespace, ns); 223 223 idp = (__u64 __user *)arg; 224 - id = mnt_ns->seq; 224 + id = mnt_ns->ns.ns_id; 225 225 return put_user(id, idp); 226 226 } 227 227 case NS_GET_PID_FROM_PIDNS: