Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux
1
fork

Configure Feed

Select the types of activity you want to include in your feed.

nstree: maintain list of owned namespaces

The namespace tree doesn't express the ownership concept of namespace
appropriately. Maintain a list of directly owned namespaces per user
namespace. This will allow userspace and the kernel to use the listns()
system call to walk the namespace tree by owning user namespace. The
rbtree is used to find the relevant namespace entry point which allows
to continue iteration and the owner list can be used to walk the tree
completely lock free.

Link: https://patch.msgid.link/20251029-work-namespace-nstree-listns-v4-16-2e6f823ebdc0@kernel.org
Signed-off-by: Christian Brauner <brauner@kernel.org>

+79 -1
+8
include/linux/ns_common.h
··· 116 116 struct rb_node ns_tree_node; 117 117 struct list_head ns_list_node; 118 118 }; 119 + struct /* namespace ownership rbtree and list */ { 120 + struct rb_root ns_owner_tree; /* rbtree of namespaces owned by this namespace */ 121 + struct list_head ns_owner; /* list of namespaces owned by this namespace */ 122 + struct rb_node ns_owner_tree_node; /* node in the owner namespace's rbtree */ 123 + struct list_head ns_owner_entry; /* node in the owner namespace's ns_owned list */ 124 + }; 119 125 atomic_t __ns_ref_active; /* do not use directly */ 120 126 }; 121 127 struct rcu_head ns_rcu; ··· 222 216 .__ns_ref = REFCOUNT_INIT(refs), \ 223 217 .__ns_ref_active = ATOMIC_INIT(1), \ 224 218 .ns_list_node = LIST_HEAD_INIT(nsname.ns.ns_list_node), \ 219 + .ns_owner_entry = LIST_HEAD_INIT(nsname.ns.ns_owner_entry), \ 220 + .ns_owner = LIST_HEAD_INIT(nsname.ns.ns_owner), \ 225 221 } 226 222 227 223 #define ns_common_init(__ns) \
+4
kernel/nscommon.c
··· 63 63 ns->ns_type = ns_type; 64 64 RB_CLEAR_NODE(&ns->ns_tree_node); 65 65 RB_CLEAR_NODE(&ns->ns_unified_tree_node); 66 + RB_CLEAR_NODE(&ns->ns_owner_tree_node); 66 67 INIT_LIST_HEAD(&ns->ns_list_node); 68 + ns->ns_owner_tree = RB_ROOT; 69 + INIT_LIST_HEAD(&ns->ns_owner); 70 + INIT_LIST_HEAD(&ns->ns_owner_entry); 67 71 68 72 #ifdef CONFIG_DEBUG_VFS 69 73 ns_debug(ns, ops);
+67 -1
kernel/nstree.c
··· 3 3 4 4 #include <linux/nstree.h> 5 5 #include <linux/proc_ns.h> 6 + #include <linux/rculist.h> 6 7 #include <linux/vfsdebug.h> 8 + #include <linux/user_namespace.h> 7 9 8 10 static __cacheline_aligned_in_smp DEFINE_SEQLOCK(ns_tree_lock); 9 11 static struct rb_root ns_unified_tree = RB_ROOT; /* protected by ns_tree_lock */ ··· 85 83 return rb_entry(node, struct ns_common, ns_unified_tree_node); 86 84 } 87 85 86 + static inline struct ns_common *node_to_ns_owner(const struct rb_node *node) 87 + { 88 + if (!node) 89 + return NULL; 90 + return rb_entry(node, struct ns_common, ns_owner_tree_node); 91 + } 92 + 88 93 static inline int ns_cmp(struct rb_node *a, const struct rb_node *b) 89 94 { 90 95 struct ns_common *ns_a = node_to_ns(a); ··· 120 111 return 0; 121 112 } 122 113 114 + static inline int ns_cmp_owner(struct rb_node *a, const struct rb_node *b) 115 + { 116 + struct ns_common *ns_a = node_to_ns_owner(a); 117 + struct ns_common *ns_b = node_to_ns_owner(b); 118 + u64 ns_id_a = ns_a->ns_id; 119 + u64 ns_id_b = ns_b->ns_id; 120 + 121 + if (ns_id_a < ns_id_b) 122 + return -1; 123 + if (ns_id_a > ns_id_b) 124 + return 1; 125 + return 0; 126 + } 127 + 123 128 void __ns_tree_add_raw(struct ns_common *ns, struct ns_tree *ns_tree) 124 129 { 125 130 struct rb_node *node, *prev; 131 + const struct proc_ns_operations *ops = ns->ops; 126 132 127 133 VFS_WARN_ON_ONCE(!ns->ns_id); 134 + VFS_WARN_ON_ONCE(ns->ns_type != ns_tree->type); 128 135 129 136 write_seqlock(&ns_tree_lock); 130 137 ··· 156 131 list_add_rcu(&ns->ns_list_node, &node_to_ns(prev)->ns_list_node); 157 132 158 133 rb_find_add_rcu(&ns->ns_unified_tree_node, &ns_unified_tree, ns_cmp_unified); 134 + 135 + if (ops) { 136 + struct user_namespace *user_ns; 137 + 138 + VFS_WARN_ON_ONCE(!ops->owner); 139 + user_ns = ops->owner(ns); 140 + if (user_ns) { 141 + struct ns_common *owner = &user_ns->ns; 142 + VFS_WARN_ON_ONCE(owner->ns_type != CLONE_NEWUSER); 143 + 144 + /* Insert into owner's rbtree */ 145 + rb_find_add_rcu(&ns->ns_owner_tree_node, &owner->ns_owner_tree, ns_cmp_owner); 146 + 147 + /* Insert into owner's list in sorted order */ 148 + prev = rb_prev(&ns->ns_owner_tree_node); 149 + if (!prev) 150 + list_add_rcu(&ns->ns_owner_entry, &owner->ns_owner); 151 + else 152 + list_add_rcu(&ns->ns_owner_entry, &node_to_ns_owner(prev)->ns_owner_entry); 153 + } else { 154 + /* Only the initial user namespace doesn't have an owner. */ 155 + VFS_WARN_ON_ONCE(ns != to_ns_common(&init_user_ns)); 156 + } 157 + } 159 158 write_sequnlock(&ns_tree_lock); 160 159 161 160 VFS_WARN_ON_ONCE(node); ··· 195 146 196 147 void __ns_tree_remove(struct ns_common *ns, struct ns_tree *ns_tree) 197 148 { 149 + const struct proc_ns_operations *ops = ns->ops; 150 + struct user_namespace *user_ns; 151 + 198 152 VFS_WARN_ON_ONCE(RB_EMPTY_NODE(&ns->ns_tree_node)); 199 153 VFS_WARN_ON_ONCE(list_empty(&ns->ns_list_node)); 200 154 VFS_WARN_ON_ONCE(ns->ns_type != ns_tree->type); ··· 205 153 write_seqlock(&ns_tree_lock); 206 154 rb_erase(&ns->ns_tree_node, &ns_tree->ns_tree); 207 155 rb_erase(&ns->ns_unified_tree_node, &ns_unified_tree); 208 - list_bidir_del_rcu(&ns->ns_list_node); 209 156 RB_CLEAR_NODE(&ns->ns_tree_node); 157 + 158 + list_bidir_del_rcu(&ns->ns_list_node); 159 + 160 + /* Remove from owner's rbtree if this namespace has an owner */ 161 + if (ops) { 162 + user_ns = ops->owner(ns); 163 + if (user_ns) { 164 + struct ns_common *owner = &user_ns->ns; 165 + rb_erase(&ns->ns_owner_tree_node, &owner->ns_owner_tree); 166 + RB_CLEAR_NODE(&ns->ns_owner_tree_node); 167 + } 168 + 169 + list_bidir_del_rcu(&ns->ns_owner_entry); 170 + } 171 + 210 172 write_sequnlock(&ns_tree_lock); 211 173 } 212 174 EXPORT_SYMBOL_GPL(__ns_tree_remove);