Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux
1
fork

Configure Feed

Select the types of activity you want to include in your feed.

kernfs: Use RCU to access kernfs_node::parent.

kernfs_rename_lock is used to obtain stable kernfs_node::{name|parent}
pointer. This is a preparation to access kernfs_node::parent under RCU
and ensure that the pointer remains stable under the RCU lifetime
guarantees.

For a complete path, as it is done in kernfs_path_from_node(), the
kernfs_rename_lock is still required in order to obtain a stable parent
relationship while computing the relevant node depth. This must not
change while the nodes are inspected in order to build the path.
If the kernfs user never moves the nodes (changes the parent) then the
kernfs_rename_lock is not required and the RCU guarantees are
sufficient. This "restriction" can be set with
KERNFS_ROOT_INVARIANT_PARENT. Otherwise the lock is required.

Rename kernfs_node::parent to kernfs_node::__parent to denote the RCU
access and use RCU accessor while accessing the node.
Make cgroup use KERNFS_ROOT_INVARIANT_PARENT since the parent here can
not change.

Acked-by: Tejun Heo <tj@kernel.org>
Cc: Yonghong Song <yonghong.song@linux.dev>
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
Link: https://lore.kernel.org/r/20250213145023.2820193-6-bigeasy@linutronix.de
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>

authored by

Sebastian Andrzej Siewior and committed by
Greg Kroah-Hartman
63348894 9aab10a0

+195 -93
+44 -21
arch/x86/kernel/cpu/resctrl/rdtgroup.c
··· 956 956 return 0; 957 957 } 958 958 959 + static void *rdt_kn_parent_priv(struct kernfs_node *kn) 960 + { 961 + /* 962 + * The parent pointer is only valid within RCU section since it can be 963 + * replaced. 964 + */ 965 + guard(rcu)(); 966 + return rcu_dereference(kn->__parent)->priv; 967 + } 968 + 959 969 static int rdt_num_closids_show(struct kernfs_open_file *of, 960 970 struct seq_file *seq, void *v) 961 971 { 962 - struct resctrl_schema *s = of->kn->parent->priv; 972 + struct resctrl_schema *s = rdt_kn_parent_priv(of->kn); 963 973 964 974 seq_printf(seq, "%u\n", s->num_closid); 965 975 return 0; ··· 978 968 static int rdt_default_ctrl_show(struct kernfs_open_file *of, 979 969 struct seq_file *seq, void *v) 980 970 { 981 - struct resctrl_schema *s = of->kn->parent->priv; 971 + struct resctrl_schema *s = rdt_kn_parent_priv(of->kn); 982 972 struct rdt_resource *r = s->res; 983 973 984 974 seq_printf(seq, "%x\n", r->default_ctrl); ··· 988 978 static int rdt_min_cbm_bits_show(struct kernfs_open_file *of, 989 979 struct seq_file *seq, void *v) 990 980 { 991 - struct resctrl_schema *s = of->kn->parent->priv; 981 + struct resctrl_schema *s = rdt_kn_parent_priv(of->kn); 992 982 struct rdt_resource *r = s->res; 993 983 994 984 seq_printf(seq, "%u\n", r->cache.min_cbm_bits); ··· 998 988 static int rdt_shareable_bits_show(struct kernfs_open_file *of, 999 989 struct seq_file *seq, void *v) 1000 990 { 1001 - struct resctrl_schema *s = of->kn->parent->priv; 991 + struct resctrl_schema *s = rdt_kn_parent_priv(of->kn); 1002 992 struct rdt_resource *r = s->res; 1003 993 1004 994 seq_printf(seq, "%x\n", r->cache.shareable_bits); ··· 1022 1012 static int rdt_bit_usage_show(struct kernfs_open_file *of, 1023 1013 struct seq_file *seq, void *v) 1024 1014 { 1025 - struct resctrl_schema *s = of->kn->parent->priv; 1015 + struct resctrl_schema *s = rdt_kn_parent_priv(of->kn); 1026 1016 /* 1027 1017 * Use unsigned long even though only 32 bits are used to ensure 1028 1018 * test_bit() is used safely. ··· 1104 1094 static int rdt_min_bw_show(struct kernfs_open_file *of, 1105 1095 struct seq_file *seq, void *v) 1106 1096 { 1107 - struct resctrl_schema *s = of->kn->parent->priv; 1097 + struct resctrl_schema *s = rdt_kn_parent_priv(of->kn); 1108 1098 struct rdt_resource *r = s->res; 1109 1099 1110 1100 seq_printf(seq, "%u\n", r->membw.min_bw); ··· 1114 1104 static int rdt_num_rmids_show(struct kernfs_open_file *of, 1115 1105 struct seq_file *seq, void *v) 1116 1106 { 1117 - struct rdt_resource *r = of->kn->parent->priv; 1107 + struct rdt_resource *r = rdt_kn_parent_priv(of->kn); 1118 1108 1119 1109 seq_printf(seq, "%d\n", r->num_rmid); 1120 1110 ··· 1124 1114 static int rdt_mon_features_show(struct kernfs_open_file *of, 1125 1115 struct seq_file *seq, void *v) 1126 1116 { 1127 - struct rdt_resource *r = of->kn->parent->priv; 1117 + struct rdt_resource *r = rdt_kn_parent_priv(of->kn); 1128 1118 struct mon_evt *mevt; 1129 1119 1130 1120 list_for_each_entry(mevt, &r->evt_list, list) { ··· 1139 1129 static int rdt_bw_gran_show(struct kernfs_open_file *of, 1140 1130 struct seq_file *seq, void *v) 1141 1131 { 1142 - struct resctrl_schema *s = of->kn->parent->priv; 1132 + struct resctrl_schema *s = rdt_kn_parent_priv(of->kn); 1143 1133 struct rdt_resource *r = s->res; 1144 1134 1145 1135 seq_printf(seq, "%u\n", r->membw.bw_gran); ··· 1149 1139 static int rdt_delay_linear_show(struct kernfs_open_file *of, 1150 1140 struct seq_file *seq, void *v) 1151 1141 { 1152 - struct resctrl_schema *s = of->kn->parent->priv; 1142 + struct resctrl_schema *s = rdt_kn_parent_priv(of->kn); 1153 1143 struct rdt_resource *r = s->res; 1154 1144 1155 1145 seq_printf(seq, "%u\n", r->membw.delay_linear); ··· 1167 1157 static int rdt_thread_throttle_mode_show(struct kernfs_open_file *of, 1168 1158 struct seq_file *seq, void *v) 1169 1159 { 1170 - struct resctrl_schema *s = of->kn->parent->priv; 1160 + struct resctrl_schema *s = rdt_kn_parent_priv(of->kn); 1171 1161 struct rdt_resource *r = s->res; 1172 1162 1173 1163 if (r->membw.throttle_mode == THREAD_THROTTLE_PER_THREAD) ··· 1232 1222 static int rdt_has_sparse_bitmasks_show(struct kernfs_open_file *of, 1233 1223 struct seq_file *seq, void *v) 1234 1224 { 1235 - struct resctrl_schema *s = of->kn->parent->priv; 1225 + struct resctrl_schema *s = rdt_kn_parent_priv(of->kn); 1236 1226 struct rdt_resource *r = s->res; 1237 1227 1238 1228 seq_printf(seq, "%u\n", r->cache.arch_has_sparse_bitmasks); ··· 1644 1634 static int mbm_total_bytes_config_show(struct kernfs_open_file *of, 1645 1635 struct seq_file *seq, void *v) 1646 1636 { 1647 - struct rdt_resource *r = of->kn->parent->priv; 1637 + struct rdt_resource *r = rdt_kn_parent_priv(of->kn); 1648 1638 1649 1639 mbm_config_show(seq, r, QOS_L3_MBM_TOTAL_EVENT_ID); 1650 1640 ··· 1654 1644 static int mbm_local_bytes_config_show(struct kernfs_open_file *of, 1655 1645 struct seq_file *seq, void *v) 1656 1646 { 1657 - struct rdt_resource *r = of->kn->parent->priv; 1647 + struct rdt_resource *r = rdt_kn_parent_priv(of->kn); 1658 1648 1659 1649 mbm_config_show(seq, r, QOS_L3_MBM_LOCAL_EVENT_ID); 1660 1650 ··· 1760 1750 char *buf, size_t nbytes, 1761 1751 loff_t off) 1762 1752 { 1763 - struct rdt_resource *r = of->kn->parent->priv; 1753 + struct rdt_resource *r = rdt_kn_parent_priv(of->kn); 1764 1754 int ret; 1765 1755 1766 1756 /* Valid input requires a trailing newline */ ··· 1786 1776 char *buf, size_t nbytes, 1787 1777 loff_t off) 1788 1778 { 1789 - struct rdt_resource *r = of->kn->parent->priv; 1779 + struct rdt_resource *r = rdt_kn_parent_priv(of->kn); 1790 1780 int ret; 1791 1781 1792 1782 /* Valid input requires a trailing newline */ ··· 2450 2440 * resource. "info" and its subdirectories don't 2451 2441 * have rdtgroup structures, so return NULL here. 2452 2442 */ 2453 - if (kn == kn_info || kn->parent == kn_info) 2443 + if (kn == kn_info || 2444 + rcu_access_pointer(kn->__parent) == kn_info) 2454 2445 return NULL; 2455 2446 else 2456 2447 return kn->priv; 2457 2448 } else { 2458 - return kn->parent->priv; 2449 + return rdt_kn_parent_priv(kn); 2459 2450 } 2460 2451 } 2461 2452 ··· 3782 3771 return 0; 3783 3772 } 3784 3773 3774 + static struct kernfs_node *rdt_kn_parent(struct kernfs_node *kn) 3775 + { 3776 + /* 3777 + * Valid within the RCU section it was obtained or while rdtgroup_mutex 3778 + * is held. 3779 + */ 3780 + return rcu_dereference_check(kn->__parent, lockdep_is_held(&rdtgroup_mutex)); 3781 + } 3782 + 3785 3783 static int rdtgroup_rmdir(struct kernfs_node *kn) 3786 3784 { 3787 - struct kernfs_node *parent_kn = kn->parent; 3785 + struct kernfs_node *parent_kn; 3788 3786 struct rdtgroup *rdtgrp; 3789 3787 cpumask_var_t tmpmask; 3790 3788 int ret = 0; ··· 3806 3786 ret = -EPERM; 3807 3787 goto out; 3808 3788 } 3789 + parent_kn = rdt_kn_parent(kn); 3809 3790 3810 3791 /* 3811 3792 * If the rdtgroup is a ctrl_mon group and parent directory ··· 3875 3854 static int rdtgroup_rename(struct kernfs_node *kn, 3876 3855 struct kernfs_node *new_parent, const char *new_name) 3877 3856 { 3857 + struct kernfs_node *kn_parent; 3878 3858 struct rdtgroup *new_prdtgrp; 3879 3859 struct rdtgroup *rdtgrp; 3880 3860 cpumask_var_t tmpmask; ··· 3910 3888 goto out; 3911 3889 } 3912 3890 3913 - if (rdtgrp->type != RDTMON_GROUP || !kn->parent || 3914 - !is_mon_groups(kn->parent, kn->name)) { 3891 + kn_parent = rdt_kn_parent(kn); 3892 + if (rdtgrp->type != RDTMON_GROUP || !kn_parent || 3893 + !is_mon_groups(kn_parent, kn->name)) { 3915 3894 rdt_last_cmd_puts("Source must be a MON group\n"); 3916 3895 ret = -EPERM; 3917 3896 goto out;
+60 -36
fs/kernfs/dir.c
··· 17 17 18 18 #include "kernfs-internal.h" 19 19 20 - static DEFINE_RWLOCK(kernfs_rename_lock); /* kn->parent and ->name */ 20 + DEFINE_RWLOCK(kernfs_rename_lock); /* kn->parent and ->name */ 21 21 /* 22 22 * Don't use rename_lock to piggy back on pr_cont_buf. We don't want to 23 23 * call pr_cont() while holding rename_lock. Because sometimes pr_cont() ··· 56 56 if (!kn) 57 57 return strscpy(buf, "(null)", buflen); 58 58 59 - return strscpy(buf, kn->parent ? kn->name : "/", buflen); 59 + return strscpy(buf, rcu_access_pointer(kn->__parent) ? kn->name : "/", buflen); 60 60 } 61 61 62 62 /* kernfs_node_depth - compute depth from @from to @to */ ··· 64 64 { 65 65 size_t depth = 0; 66 66 67 - while (to->parent && to != from) { 67 + while (rcu_dereference(to->__parent) && to != from) { 68 68 depth++; 69 - to = to->parent; 69 + to = rcu_dereference(to->__parent); 70 70 } 71 71 return depth; 72 72 } ··· 84 84 db = kernfs_depth(rb->kn, b); 85 85 86 86 while (da > db) { 87 - a = a->parent; 87 + a = rcu_dereference(a->__parent); 88 88 da--; 89 89 } 90 90 while (db > da) { 91 - b = b->parent; 91 + b = rcu_dereference(b->__parent); 92 92 db--; 93 93 } 94 94 95 95 /* worst case b and a will be the same at root */ 96 96 while (b != a) { 97 - b = b->parent; 98 - a = a->parent; 97 + b = rcu_dereference(b->__parent); 98 + a = rcu_dereference(a->__parent); 99 99 } 100 100 101 101 return a; ··· 168 168 169 169 /* Calculate how many bytes we need for the rest */ 170 170 for (i = depth_to - 1; i >= 0; i--) { 171 + 171 172 for (kn = kn_to, j = 0; j < i; j++) 172 - kn = kn->parent; 173 + kn = rcu_dereference(kn->__parent); 173 174 174 175 len += scnprintf(buf + len, buflen - len, "/%s", kn->name); 175 176 } ··· 227 226 unsigned long flags; 228 227 int ret; 229 228 229 + guard(rcu)(); 230 230 read_lock_irqsave(&kernfs_rename_lock, flags); 231 231 ret = kernfs_path_from_node_locked(to, from, buf, buflen); 232 232 read_unlock_irqrestore(&kernfs_rename_lock, flags); ··· 297 295 unsigned long flags; 298 296 299 297 read_lock_irqsave(&kernfs_rename_lock, flags); 300 - parent = kn->parent; 298 + parent = kernfs_parent(kn); 301 299 kernfs_get(parent); 302 300 read_unlock_irqrestore(&kernfs_rename_lock, flags); 303 301 ··· 362 360 */ 363 361 static int kernfs_link_sibling(struct kernfs_node *kn) 364 362 { 365 - struct rb_node **node = &kn->parent->dir.children.rb_node; 366 363 struct rb_node *parent = NULL; 364 + struct kernfs_node *kn_parent; 365 + struct rb_node **node; 366 + 367 + kn_parent = kernfs_parent(kn); 368 + node = &kn_parent->dir.children.rb_node; 367 369 368 370 while (*node) { 369 371 struct kernfs_node *pos; ··· 386 380 387 381 /* add new node and rebalance the tree */ 388 382 rb_link_node(&kn->rb, parent, node); 389 - rb_insert_color(&kn->rb, &kn->parent->dir.children); 383 + rb_insert_color(&kn->rb, &kn_parent->dir.children); 390 384 391 385 /* successfully added, account subdir number */ 392 386 down_write(&kernfs_root(kn)->kernfs_iattr_rwsem); 393 387 if (kernfs_type(kn) == KERNFS_DIR) 394 - kn->parent->dir.subdirs++; 395 - kernfs_inc_rev(kn->parent); 388 + kn_parent->dir.subdirs++; 389 + kernfs_inc_rev(kn_parent); 396 390 up_write(&kernfs_root(kn)->kernfs_iattr_rwsem); 397 391 398 392 return 0; ··· 413 407 */ 414 408 static bool kernfs_unlink_sibling(struct kernfs_node *kn) 415 409 { 410 + struct kernfs_node *kn_parent; 411 + 416 412 if (RB_EMPTY_NODE(&kn->rb)) 417 413 return false; 418 414 415 + kn_parent = kernfs_parent(kn); 419 416 down_write(&kernfs_root(kn)->kernfs_iattr_rwsem); 420 417 if (kernfs_type(kn) == KERNFS_DIR) 421 - kn->parent->dir.subdirs--; 422 - kernfs_inc_rev(kn->parent); 418 + kn_parent->dir.subdirs--; 419 + kernfs_inc_rev(kn_parent); 423 420 up_write(&kernfs_root(kn)->kernfs_iattr_rwsem); 424 421 425 - rb_erase(&kn->rb, &kn->parent->dir.children); 422 + rb_erase(&kn->rb, &kn_parent->dir.children); 426 423 RB_CLEAR_NODE(&kn->rb); 427 424 return true; 428 425 } ··· 571 562 * Moving/renaming is always done while holding reference. 572 563 * kn->parent won't change beneath us. 573 564 */ 574 - parent = kn->parent; 565 + parent = kernfs_parent(kn); 575 566 576 567 WARN_ONCE(atomic_read(&kn->active) != KN_DEACTIVATED_BIAS, 577 568 "kernfs_put: %s/%s: released with incorrect active_ref %d\n", ··· 710 701 name, mode, uid, gid, flags); 711 702 if (kn) { 712 703 kernfs_get(parent); 713 - kn->parent = parent; 704 + rcu_assign_pointer(kn->__parent, parent); 714 705 } 715 706 return kn; 716 707 } ··· 778 769 */ 779 770 int kernfs_add_one(struct kernfs_node *kn) 780 771 { 781 - struct kernfs_node *parent = kn->parent; 782 - struct kernfs_root *root = kernfs_root(parent); 772 + struct kernfs_root *root = kernfs_root(kn); 783 773 struct kernfs_iattrs *ps_iattr; 774 + struct kernfs_node *parent; 784 775 bool has_ns; 785 776 int ret; 786 777 787 778 down_write(&root->kernfs_rwsem); 779 + parent = kernfs_parent(kn); 788 780 789 781 ret = -EINVAL; 790 782 has_ns = kernfs_ns_enabled(parent); ··· 959 949 return kn; 960 950 } 961 951 952 + unsigned int kernfs_root_flags(struct kernfs_node *kn) 953 + { 954 + return kernfs_root(kn)->flags; 955 + } 956 + 962 957 /** 963 958 * kernfs_create_root - create a new kernfs hierarchy 964 959 * @scops: optional syscall operations for the hierarchy ··· 1127 1112 static int kernfs_dop_revalidate(struct inode *dir, const struct qstr *name, 1128 1113 struct dentry *dentry, unsigned int flags) 1129 1114 { 1130 - struct kernfs_node *kn; 1115 + struct kernfs_node *kn, *parent; 1131 1116 struct kernfs_root *root; 1132 1117 1133 1118 if (flags & LOOKUP_RCU) ··· 1178 1163 if (!kernfs_active(kn)) 1179 1164 goto out_bad; 1180 1165 1166 + parent = kernfs_parent(kn); 1181 1167 /* The kernfs node has been moved? */ 1182 - if (kernfs_dentry_node(dentry->d_parent) != kn->parent) 1168 + if (kernfs_dentry_node(dentry->d_parent) != parent) 1183 1169 goto out_bad; 1184 1170 1185 1171 /* The kernfs node has been renamed */ ··· 1188 1172 goto out_bad; 1189 1173 1190 1174 /* The kernfs node has been moved to a different namespace */ 1191 - if (kn->parent && kernfs_ns_enabled(kn->parent) && 1175 + if (parent && kernfs_ns_enabled(parent) && 1192 1176 kernfs_info(dentry->d_sb)->ns != kn->ns) 1193 1177 goto out_bad; 1194 1178 ··· 1381 1365 return kernfs_leftmost_descendant(rb_to_kn(rbn)); 1382 1366 1383 1367 /* no sibling left, visit parent */ 1384 - return pos->parent; 1368 + return kernfs_parent(pos); 1385 1369 } 1386 1370 1387 1371 static void kernfs_activate_one(struct kernfs_node *kn) ··· 1393 1377 if (kernfs_active(kn) || (kn->flags & (KERNFS_HIDDEN | KERNFS_REMOVING))) 1394 1378 return; 1395 1379 1396 - WARN_ON_ONCE(kn->parent && RB_EMPTY_NODE(&kn->rb)); 1380 + WARN_ON_ONCE(rcu_access_pointer(kn->__parent) && RB_EMPTY_NODE(&kn->rb)); 1397 1381 WARN_ON_ONCE(atomic_read(&kn->active) != KN_DEACTIVATED_BIAS); 1398 1382 1399 1383 atomic_sub(KN_DEACTIVATED_BIAS, &kn->active); ··· 1463 1447 1464 1448 static void __kernfs_remove(struct kernfs_node *kn) 1465 1449 { 1466 - struct kernfs_node *pos; 1450 + struct kernfs_node *pos, *parent; 1467 1451 1468 1452 /* Short-circuit if non-root @kn has already finished removal. */ 1469 1453 if (!kn) ··· 1475 1459 * This is for kernfs_remove_self() which plays with active ref 1476 1460 * after removal. 1477 1461 */ 1478 - if (kn->parent && RB_EMPTY_NODE(&kn->rb)) 1462 + if (kernfs_parent(kn) && RB_EMPTY_NODE(&kn->rb)) 1479 1463 return; 1480 1464 1481 1465 pr_debug("kernfs %s: removing\n", kn->name); ··· 1501 1485 kernfs_get(pos); 1502 1486 1503 1487 kernfs_drain(pos); 1504 - 1488 + parent = kernfs_parent(pos); 1505 1489 /* 1506 1490 * kernfs_unlink_sibling() succeeds once per node. Use it 1507 1491 * to decide who's responsible for cleanups. 1508 1492 */ 1509 - if (!pos->parent || kernfs_unlink_sibling(pos)) { 1493 + if (!parent || kernfs_unlink_sibling(pos)) { 1510 1494 struct kernfs_iattrs *ps_iattr = 1511 - pos->parent ? pos->parent->iattr : NULL; 1495 + parent ? parent->iattr : NULL; 1512 1496 1513 1497 /* update timestamps on the parent */ 1514 1498 down_write(&kernfs_root(kn)->kernfs_iattr_rwsem); ··· 1738 1722 int error; 1739 1723 1740 1724 /* can't move or rename root */ 1741 - if (!kn->parent) 1725 + if (!rcu_access_pointer(kn->__parent)) 1742 1726 return -EINVAL; 1743 1727 1744 1728 root = kernfs_root(kn); ··· 1749 1733 (new_parent->flags & KERNFS_EMPTY_DIR)) 1750 1734 goto out; 1751 1735 1736 + old_parent = kernfs_parent(kn); 1737 + if (root->flags & KERNFS_ROOT_INVARIANT_PARENT) { 1738 + error = -EINVAL; 1739 + if (WARN_ON_ONCE(old_parent != new_parent)) 1740 + goto out; 1741 + } 1742 + 1752 1743 error = 0; 1753 - if ((kn->parent == new_parent) && (kn->ns == new_ns) && 1744 + if ((old_parent == new_parent) && (kn->ns == new_ns) && 1754 1745 (strcmp(kn->name, new_name) == 0)) 1755 1746 goto out; /* nothing to rename */ 1756 1747 ··· 1784 1761 /* rename_lock protects ->parent and ->name accessors */ 1785 1762 write_lock_irq(&kernfs_rename_lock); 1786 1763 1787 - old_parent = kn->parent; 1788 - kn->parent = new_parent; 1764 + old_parent = kernfs_parent(kn); 1765 + rcu_assign_pointer(kn->__parent, new_parent); 1789 1766 1790 1767 kn->ns = new_ns; 1791 1768 if (new_name) { ··· 1818 1795 { 1819 1796 if (pos) { 1820 1797 int valid = kernfs_active(pos) && 1821 - pos->parent == parent && hash == pos->hash; 1798 + rcu_access_pointer(pos->__parent) == parent && 1799 + hash == pos->hash; 1822 1800 kernfs_put(pos); 1823 1801 if (!valid) 1824 1802 pos = NULL;
+29 -3
fs/kernfs/kernfs-internal.h
··· 19 19 #include <linux/kernfs.h> 20 20 #include <linux/fs_context.h> 21 21 22 + extern rwlock_t kernfs_rename_lock; 23 + 22 24 struct kernfs_iattrs { 23 25 kuid_t ia_uid; 24 26 kgid_t ia_gid; ··· 66 64 * 67 65 * Return: the kernfs_root @kn belongs to. 68 66 */ 69 - static inline struct kernfs_root *kernfs_root(struct kernfs_node *kn) 67 + static inline struct kernfs_root *kernfs_root(const struct kernfs_node *kn) 70 68 { 69 + const struct kernfs_node *knp; 71 70 /* if parent exists, it's always a dir; otherwise, @sd is a dir */ 72 - if (kn->parent) 73 - kn = kn->parent; 71 + guard(rcu)(); 72 + knp = rcu_dereference(kn->__parent); 73 + if (knp) 74 + kn = knp; 74 75 return kn->dir.root; 75 76 } 76 77 ··· 101 96 struct list_head node; 102 97 }; 103 98 #define kernfs_info(SB) ((struct kernfs_super_info *)(SB->s_fs_info)) 99 + 100 + static inline bool kernfs_root_is_locked(const struct kernfs_node *kn) 101 + { 102 + return lockdep_is_held(&kernfs_root(kn)->kernfs_rwsem); 103 + } 104 + 105 + static inline struct kernfs_node *kernfs_parent(const struct kernfs_node *kn) 106 + { 107 + /* 108 + * The kernfs_node::__parent remains valid within a RCU section. The kn 109 + * can be reparented (and renamed) which changes the entry. This can be 110 + * avoided by locking kernfs_root::kernfs_rwsem or kernfs_rename_lock. 111 + * Both locks can be used to obtain a reference on __parent. Once the 112 + * reference count reaches 0 then the node is about to be freed 113 + * and can not be renamed (or become a different parent) anymore. 114 + */ 115 + return rcu_dereference_check(kn->__parent, 116 + kernfs_root_is_locked(kn) || 117 + lockdep_is_held(&kernfs_rename_lock) || 118 + !atomic_read(&kn->count)); 119 + } 104 120 105 121 static inline struct kernfs_node *kernfs_dentry_node(struct dentry *dentry) 106 122 {
+5 -5
fs/kernfs/mount.c
··· 148 148 struct kernfs_root *root = kernfs_root(kn); 149 149 150 150 guard(rwsem_read)(&root->kernfs_rwsem); 151 - return d_obtain_alias(kernfs_get_inode(child->d_sb, kn->parent)); 151 + return d_obtain_alias(kernfs_get_inode(child->d_sb, kernfs_parent(kn))); 152 152 } 153 153 154 154 static const struct export_operations kernfs_export_ops = { ··· 188 188 return NULL; 189 189 } 190 190 191 - while (child->parent != parent) { 192 - if (!child->parent) 191 + while (kernfs_parent(child) != parent) { 192 + child = kernfs_parent(child); 193 + if (!child) 193 194 return NULL; 194 - child = child->parent; 195 195 } 196 196 197 197 return child; ··· 216 216 dentry = dget(sb->s_root); 217 217 218 218 /* Check if this is the root kernfs_node */ 219 - if (!kn->parent) 219 + if (!rcu_access_pointer(kn->__parent)) 220 220 return dentry; 221 221 222 222 root = kernfs_root(kn);
+12 -11
fs/kernfs/symlink.c
··· 62 62 63 63 /* go up to the root, stop at the base */ 64 64 base = parent; 65 - while (base->parent) { 66 - kn = target->parent; 67 - while (kn->parent && base != kn) 68 - kn = kn->parent; 65 + while (kernfs_parent(base)) { 66 + kn = kernfs_parent(target); 67 + while (kernfs_parent(kn) && base != kn) 68 + kn = kernfs_parent(kn); 69 69 70 70 if (base == kn) 71 71 break; ··· 75 75 76 76 strcpy(s, "../"); 77 77 s += 3; 78 - base = base->parent; 78 + base = kernfs_parent(base); 79 79 } 80 80 81 81 /* determine end of target string for reverse fillup */ 82 82 kn = target; 83 - while (kn->parent && kn != base) { 83 + while (kernfs_parent(kn) && kn != base) { 84 84 len += strlen(kn->name) + 1; 85 - kn = kn->parent; 85 + kn = kernfs_parent(kn); 86 86 } 87 87 88 88 /* check limits */ ··· 94 94 95 95 /* reverse fillup of target string from target to base */ 96 96 kn = target; 97 - while (kn->parent && kn != base) { 97 + while (kernfs_parent(kn) && kn != base) { 98 98 int slen = strlen(kn->name); 99 99 100 100 len -= slen; ··· 102 102 if (len) 103 103 s[--len] = '/'; 104 104 105 - kn = kn->parent; 105 + kn = kernfs_parent(kn); 106 106 } 107 107 108 108 return 0; ··· 111 111 static int kernfs_getlink(struct inode *inode, char *path) 112 112 { 113 113 struct kernfs_node *kn = inode->i_private; 114 - struct kernfs_node *parent = kn->parent; 114 + struct kernfs_node *parent; 115 115 struct kernfs_node *target = kn->symlink.target_kn; 116 - struct kernfs_root *root = kernfs_root(parent); 116 + struct kernfs_root *root = kernfs_root(kn); 117 117 int error; 118 118 119 119 down_read(&root->kernfs_rwsem); 120 + parent = kernfs_parent(kn); 120 121 error = kernfs_get_target_path(parent, target, path); 121 122 up_read(&root->kernfs_rwsem); 122 123
+15 -9
fs/sysfs/file.c
··· 19 19 20 20 #include "sysfs.h" 21 21 22 + static struct kobject *sysfs_file_kobj(struct kernfs_node *kn) 23 + { 24 + guard(rcu)(); 25 + return rcu_dereference(kn->__parent)->priv; 26 + } 27 + 22 28 /* 23 29 * Determine ktype->sysfs_ops for the given kernfs_node. This function 24 30 * must be called while holding an active reference. 25 31 */ 26 32 static const struct sysfs_ops *sysfs_file_ops(struct kernfs_node *kn) 27 33 { 28 - struct kobject *kobj = kn->parent->priv; 34 + struct kobject *kobj = sysfs_file_kobj(kn); 29 35 30 36 if (kn->flags & KERNFS_LOCKDEP) 31 37 lockdep_assert_held(kn); ··· 46 40 static int sysfs_kf_seq_show(struct seq_file *sf, void *v) 47 41 { 48 42 struct kernfs_open_file *of = sf->private; 49 - struct kobject *kobj = of->kn->parent->priv; 43 + struct kobject *kobj = sysfs_file_kobj(of->kn); 50 44 const struct sysfs_ops *ops = sysfs_file_ops(of->kn); 51 45 ssize_t count; 52 46 char *buf; ··· 84 78 size_t count, loff_t pos) 85 79 { 86 80 struct bin_attribute *battr = of->kn->priv; 87 - struct kobject *kobj = of->kn->parent->priv; 81 + struct kobject *kobj = sysfs_file_kobj(of->kn); 88 82 loff_t size = file_inode(of->file)->i_size; 89 83 90 84 if (!count) ··· 111 105 size_t count, loff_t pos) 112 106 { 113 107 const struct sysfs_ops *ops = sysfs_file_ops(of->kn); 114 - struct kobject *kobj = of->kn->parent->priv; 108 + struct kobject *kobj = sysfs_file_kobj(of->kn); 115 109 ssize_t len; 116 110 117 111 /* ··· 137 131 size_t count, loff_t pos) 138 132 { 139 133 const struct sysfs_ops *ops = sysfs_file_ops(of->kn); 140 - struct kobject *kobj = of->kn->parent->priv; 134 + struct kobject *kobj = sysfs_file_kobj(of->kn); 141 135 142 136 if (!count) 143 137 return 0; ··· 150 144 size_t count, loff_t pos) 151 145 { 152 146 struct bin_attribute *battr = of->kn->priv; 153 - struct kobject *kobj = of->kn->parent->priv; 147 + struct kobject *kobj = sysfs_file_kobj(of->kn); 154 148 loff_t size = file_inode(of->file)->i_size; 155 149 156 150 if (size) { ··· 174 168 struct vm_area_struct *vma) 175 169 { 176 170 struct bin_attribute *battr = of->kn->priv; 177 - struct kobject *kobj = of->kn->parent->priv; 171 + struct kobject *kobj = sysfs_file_kobj(of->kn); 178 172 179 173 return battr->mmap(of->file, kobj, battr, vma); 180 174 } ··· 183 177 int whence) 184 178 { 185 179 struct bin_attribute *battr = of->kn->priv; 186 - struct kobject *kobj = of->kn->parent->priv; 180 + struct kobject *kobj = sysfs_file_kobj(of->kn); 187 181 188 182 if (battr->llseek) 189 183 return battr->llseek(of->file, kobj, battr, offset, whence); ··· 500 494 */ 501 495 void sysfs_unbreak_active_protection(struct kernfs_node *kn) 502 496 { 503 - struct kobject *kobj = kn->parent->priv; 497 + struct kobject *kobj = sysfs_file_kobj(kn); 504 498 505 499 kernfs_unbreak_active_protection(kn); 506 500 kernfs_put(kn);
+9 -1
include/linux/kernfs.h
··· 147 147 * Support user xattrs to be written to nodes rooted at this root. 148 148 */ 149 149 KERNFS_ROOT_SUPPORT_USER_XATTR = 0x0008, 150 + 151 + /* 152 + * Renames must not change the parent node. 153 + */ 154 + KERNFS_ROOT_INVARIANT_PARENT = 0x0010, 150 155 }; 151 156 152 157 /* type-specific structures for kernfs_node union members */ ··· 204 199 * never moved to a different parent, it is safe to access the 205 200 * parent directly. 206 201 */ 207 - struct kernfs_node *parent; 208 202 const char *name; 203 + struct kernfs_node __rcu *__parent; 209 204 210 205 struct rb_node rb; 211 206 ··· 421 416 struct kernfs_root *kernfs_create_root(struct kernfs_syscall_ops *scops, 422 417 unsigned int flags, void *priv); 423 418 void kernfs_destroy_root(struct kernfs_root *root); 419 + unsigned int kernfs_root_flags(struct kernfs_node *kn); 424 420 425 421 struct kernfs_node *kernfs_create_dir_ns(struct kernfs_node *parent, 426 422 const char *name, umode_t mode, ··· 520 514 { return ERR_PTR(-ENOSYS); } 521 515 522 516 static inline void kernfs_destroy_root(struct kernfs_root *root) { } 517 + static inline unsigned int kernfs_root_flags(struct kernfs_node *kn) 518 + { return 0; } 523 519 524 520 static inline struct kernfs_node * 525 521 kernfs_create_dir_ns(struct kernfs_node *parent, const char *name,
+1 -1
kernel/cgroup/cgroup-v1.c
··· 844 844 845 845 if (kernfs_type(kn) != KERNFS_DIR) 846 846 return -ENOTDIR; 847 - if (kn->parent != new_parent) 847 + if (rcu_access_pointer(kn->__parent) != new_parent) 848 848 return -EIO; 849 849 850 850 /*
+19 -5
kernel/cgroup/cgroup.c
··· 633 633 return count; 634 634 } 635 635 636 + static struct cgroup *kn_priv(struct kernfs_node *kn) 637 + { 638 + struct kernfs_node *parent; 639 + /* 640 + * The parent can not be replaced due to KERNFS_ROOT_INVARIANT_PARENT. 641 + * Therefore it is always safe to dereference this pointer outside of a 642 + * RCU section. 643 + */ 644 + parent = rcu_dereference_check(kn->__parent, 645 + kernfs_root_flags(kn) & KERNFS_ROOT_INVARIANT_PARENT); 646 + return parent->priv; 647 + } 648 + 636 649 struct cgroup_subsys_state *of_css(struct kernfs_open_file *of) 637 650 { 638 - struct cgroup *cgrp = of->kn->parent->priv; 651 + struct cgroup *cgrp = kn_priv(of->kn); 639 652 struct cftype *cft = of_cft(of); 640 653 641 654 /* ··· 1625 1612 if (kernfs_type(kn) == KERNFS_DIR) 1626 1613 cgrp = kn->priv; 1627 1614 else 1628 - cgrp = kn->parent->priv; 1615 + cgrp = kn_priv(kn); 1629 1616 1630 1617 cgroup_unlock(); 1631 1618 ··· 1657 1644 if (kernfs_type(kn) == KERNFS_DIR) 1658 1645 cgrp = kn->priv; 1659 1646 else 1660 - cgrp = kn->parent->priv; 1647 + cgrp = kn_priv(kn); 1661 1648 1662 1649 /* 1663 1650 * We're gonna grab cgroup_mutex which nests outside kernfs ··· 2131 2118 root->kf_root = kernfs_create_root(kf_sops, 2132 2119 KERNFS_ROOT_CREATE_DEACTIVATED | 2133 2120 KERNFS_ROOT_SUPPORT_EXPORTOP | 2134 - KERNFS_ROOT_SUPPORT_USER_XATTR, 2121 + KERNFS_ROOT_SUPPORT_USER_XATTR | 2122 + KERNFS_ROOT_INVARIANT_PARENT, 2135 2123 root_cgrp); 2136 2124 if (IS_ERR(root->kf_root)) { 2137 2125 ret = PTR_ERR(root->kf_root); ··· 4133 4119 size_t nbytes, loff_t off) 4134 4120 { 4135 4121 struct cgroup_file_ctx *ctx = of->priv; 4136 - struct cgroup *cgrp = of->kn->parent->priv; 4122 + struct cgroup *cgrp = kn_priv(of->kn); 4137 4123 struct cftype *cft = of_cft(of); 4138 4124 struct cgroup_subsys_state *css; 4139 4125 int ret;
+1 -1
tools/testing/selftests/bpf/progs/profiler.inc.h
··· 223 223 if (bpf_cmp_likely(filepart_length, <=, MAX_PATH)) { 224 224 payload += filepart_length; 225 225 } 226 - cgroup_node = BPF_CORE_READ(cgroup_node, parent); 226 + cgroup_node = BPF_CORE_READ(cgroup_node, __parent); 227 227 } 228 228 return payload; 229 229 }