Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux
1
fork

Configure Feed

Select the types of activity you want to include in your feed.

xattr: remove rbtree-based simple_xattr infrastructure

Now that all consumers (shmem, kernfs, pidfs) have been converted to
use the rhashtable-based simple_xattrs with pointer-based lazy
allocation, remove the legacy rbtree code path. The rhashtable
implementation provides O(1) average-case lookup with RCU-based lockless
reads, replacing the O(log n) rbtree with reader-writer spinlock
contention.

Link: https://patch.msgid.link/20260216-work-xattr-socket-v1-6-c2efa4f74cb7@kernel.org
Acked-by: Darrick J. Wong <djwong@kernel.org>
Signed-off-by: Christian Brauner <brauner@kernel.org>

+101 -294
+99 -284
fs/xattr.c
··· 1200 1200 1201 1201 static void simple_xattr_rcu_free(struct rcu_head *head) 1202 1202 { 1203 - struct simple_xattr *xattr; 1203 + struct simple_xattr *xattr = container_of(head, struct simple_xattr, rcu); 1204 1204 1205 - xattr = container_of(head, struct simple_xattr, rcu); 1206 1205 simple_xattr_free(xattr); 1207 1206 } 1208 1207 1209 1208 /** 1210 - * simple_xattr_free_rcu - free an xattr object after an RCU grace period 1209 + * simple_xattr_free_rcu - free an xattr object with RCU delay 1211 1210 * @xattr: the xattr object 1212 1211 * 1213 - * Schedule RCU-deferred freeing of an xattr entry. This is used by 1214 - * rhashtable-based callers of simple_xattr_set() that replace or remove 1215 - * an existing entry while concurrent RCU readers may still be accessing 1216 - * it. 1212 + * Free the xattr object after an RCU grace period. This must be used when 1213 + * the xattr was removed from a data structure that concurrent RCU readers 1214 + * may still be traversing. Can handle @xattr being NULL. 1217 1215 */ 1218 1216 void simple_xattr_free_rcu(struct simple_xattr *xattr) 1219 1217 { ··· 1250 1252 new_xattr->size = size; 1251 1253 memcpy(new_xattr->value, value, size); 1252 1254 return new_xattr; 1253 - } 1254 - 1255 - /** 1256 - * rbtree_simple_xattr_cmp - compare xattr name with current rbtree xattr entry 1257 - * @key: xattr name 1258 - * @node: current node 1259 - * 1260 - * Compare the xattr name with the xattr name attached to @node in the rbtree. 1261 - * 1262 - * Return: Negative value if continuing left, positive if continuing right, 0 1263 - * if the xattr attached to @node matches @key. 1264 - */ 1265 - static int rbtree_simple_xattr_cmp(const void *key, const struct rb_node *node) 1266 - { 1267 - const char *xattr_name = key; 1268 - const struct simple_xattr *xattr; 1269 - 1270 - xattr = rb_entry(node, struct simple_xattr, rb_node); 1271 - return strcmp(xattr->name, xattr_name); 1272 - } 1273 - 1274 - /** 1275 - * rbtree_simple_xattr_node_cmp - compare two xattr rbtree nodes 1276 - * @new_node: new node 1277 - * @node: current node 1278 - * 1279 - * Compare the xattr attached to @new_node with the xattr attached to @node. 1280 - * 1281 - * Return: Negative value if continuing left, positive if continuing right, 0 1282 - * if the xattr attached to @new_node matches the xattr attached to @node. 1283 - */ 1284 - static int rbtree_simple_xattr_node_cmp(struct rb_node *new_node, 1285 - const struct rb_node *node) 1286 - { 1287 - struct simple_xattr *xattr; 1288 - xattr = rb_entry(new_node, struct simple_xattr, rb_node); 1289 - return rbtree_simple_xattr_cmp(xattr->name, node); 1290 1255 } 1291 1256 1292 1257 static u32 simple_xattr_hashfn(const void *data, u32 len, u32 seed) ··· 1297 1336 int simple_xattr_get(struct simple_xattrs *xattrs, const char *name, 1298 1337 void *buffer, size_t size) 1299 1338 { 1300 - struct simple_xattr *xattr = NULL; 1339 + struct simple_xattr *xattr; 1301 1340 int ret = -ENODATA; 1302 1341 1303 - if (xattrs->use_rhashtable) { 1304 - guard(rcu)(); 1305 - xattr = rhashtable_lookup(&xattrs->ht, name, 1306 - simple_xattr_params); 1307 - if (xattr) { 1308 - ret = xattr->size; 1309 - if (buffer) { 1310 - if (size < xattr->size) 1311 - ret = -ERANGE; 1312 - else 1313 - memcpy(buffer, xattr->value, 1314 - xattr->size); 1315 - } 1342 + guard(rcu)(); 1343 + xattr = rhashtable_lookup(&xattrs->ht, name, simple_xattr_params); 1344 + if (xattr) { 1345 + ret = xattr->size; 1346 + if (buffer) { 1347 + if (size < xattr->size) 1348 + ret = -ERANGE; 1349 + else 1350 + memcpy(buffer, xattr->value, xattr->size); 1316 1351 } 1317 - } else { 1318 - struct rb_node *rbp; 1319 - 1320 - read_lock(&xattrs->lock); 1321 - rbp = rb_find(name, &xattrs->rb_root, 1322 - rbtree_simple_xattr_cmp); 1323 - if (rbp) { 1324 - xattr = rb_entry(rbp, struct simple_xattr, rb_node); 1325 - ret = xattr->size; 1326 - if (buffer) { 1327 - if (size < xattr->size) 1328 - ret = -ERANGE; 1329 - else 1330 - memcpy(buffer, xattr->value, 1331 - xattr->size); 1332 - } 1333 - } 1334 - read_unlock(&xattrs->lock); 1335 1352 } 1336 1353 return ret; 1337 1354 } ··· 1337 1398 * nothing if XATTR_CREATE is specified in @flags or @flags is zero. For 1338 1399 * XATTR_REPLACE we fail as mentioned above. 1339 1400 * 1401 + * Note: Callers must externally serialize writes. All current callers hold 1402 + * the inode lock for write operations. The lookup->replace/remove sequence 1403 + * is not atomic with respect to the rhashtable's per-bucket locking, but 1404 + * is safe because writes are serialized by the caller. 1405 + * 1340 1406 * Return: On success, the removed or replaced xattr is returned, to be freed 1341 1407 * by the caller; or NULL if none. On failure a negative error code is returned. 1342 1408 */ ··· 1350 1406 size_t size, int flags) 1351 1407 { 1352 1408 struct simple_xattr *old_xattr = NULL; 1353 - int err = 0; 1409 + int err; 1354 1410 1355 1411 CLASS(simple_xattr, new_xattr)(value, size); 1356 1412 if (IS_ERR(new_xattr)) ··· 1362 1418 return ERR_PTR(-ENOMEM); 1363 1419 } 1364 1420 1365 - if (xattrs->use_rhashtable) { 1366 - /* 1367 - * Lookup is safe without RCU here since writes are 1368 - * serialized by the caller. 1369 - */ 1370 - old_xattr = rhashtable_lookup_fast(&xattrs->ht, name, 1371 - simple_xattr_params); 1421 + /* Lookup is safe without RCU here since writes are serialized. */ 1422 + old_xattr = rhashtable_lookup_fast(&xattrs->ht, name, 1423 + simple_xattr_params); 1372 1424 1373 - if (old_xattr) { 1374 - /* Fail if XATTR_CREATE is requested and the xattr exists. */ 1375 - if (flags & XATTR_CREATE) 1376 - return ERR_PTR(-EEXIST); 1425 + if (old_xattr) { 1426 + /* Fail if XATTR_CREATE is requested and the xattr exists. */ 1427 + if (flags & XATTR_CREATE) 1428 + return ERR_PTR(-EEXIST); 1377 1429 1378 - if (new_xattr) { 1379 - err = rhashtable_replace_fast(&xattrs->ht, 1380 - &old_xattr->hash_node, 1381 - &new_xattr->hash_node, 1382 - simple_xattr_params); 1383 - if (err) 1384 - return ERR_PTR(err); 1385 - } else { 1386 - err = rhashtable_remove_fast(&xattrs->ht, 1387 - &old_xattr->hash_node, 1388 - simple_xattr_params); 1389 - if (err) 1390 - return ERR_PTR(err); 1391 - } 1430 + if (new_xattr) { 1431 + err = rhashtable_replace_fast(&xattrs->ht, 1432 + &old_xattr->hash_node, 1433 + &new_xattr->hash_node, 1434 + simple_xattr_params); 1435 + if (err) 1436 + return ERR_PTR(err); 1392 1437 } else { 1393 - /* Fail if XATTR_REPLACE is requested but no xattr is found. */ 1394 - if (flags & XATTR_REPLACE) 1395 - return ERR_PTR(-ENODATA); 1396 - 1397 - /* 1398 - * If XATTR_CREATE or no flags are specified together 1399 - * with a new value simply insert it. 1400 - */ 1401 - if (new_xattr) { 1402 - err = rhashtable_insert_fast(&xattrs->ht, 1403 - &new_xattr->hash_node, 1404 - simple_xattr_params); 1405 - if (err) 1406 - return ERR_PTR(err); 1407 - } 1408 - 1409 - /* 1410 - * If XATTR_CREATE or no flags are specified and 1411 - * neither an old or new xattr exist then we don't 1412 - * need to do anything. 1413 - */ 1438 + err = rhashtable_remove_fast(&xattrs->ht, 1439 + &old_xattr->hash_node, 1440 + simple_xattr_params); 1441 + if (err) 1442 + return ERR_PTR(err); 1414 1443 } 1415 1444 } else { 1416 - struct rb_node *parent = NULL, **rbp; 1417 - int ret; 1445 + /* Fail if XATTR_REPLACE is requested but no xattr is found. */ 1446 + if (flags & XATTR_REPLACE) 1447 + return ERR_PTR(-ENODATA); 1418 1448 1419 - write_lock(&xattrs->lock); 1420 - rbp = &xattrs->rb_root.rb_node; 1421 - while (*rbp) { 1422 - parent = *rbp; 1423 - ret = rbtree_simple_xattr_cmp(name, *rbp); 1424 - if (ret < 0) 1425 - rbp = &(*rbp)->rb_left; 1426 - else if (ret > 0) 1427 - rbp = &(*rbp)->rb_right; 1428 - else 1429 - old_xattr = rb_entry(*rbp, struct simple_xattr, 1430 - rb_node); 1431 - if (old_xattr) 1432 - break; 1449 + /* 1450 + * If XATTR_CREATE or no flags are specified together with a 1451 + * new value simply insert it. 1452 + */ 1453 + if (new_xattr) { 1454 + err = rhashtable_insert_fast(&xattrs->ht, 1455 + &new_xattr->hash_node, 1456 + simple_xattr_params); 1457 + if (err) 1458 + return ERR_PTR(err); 1433 1459 } 1434 1460 1435 - if (old_xattr) { 1436 - /* Fail if XATTR_CREATE is requested and the xattr exists. */ 1437 - if (flags & XATTR_CREATE) { 1438 - err = -EEXIST; 1439 - goto out_unlock; 1440 - } 1441 - 1442 - if (new_xattr) 1443 - rb_replace_node(&old_xattr->rb_node, 1444 - &new_xattr->rb_node, 1445 - &xattrs->rb_root); 1446 - else 1447 - rb_erase(&old_xattr->rb_node, 1448 - &xattrs->rb_root); 1449 - } else { 1450 - /* Fail if XATTR_REPLACE is requested but no xattr is found. */ 1451 - if (flags & XATTR_REPLACE) { 1452 - err = -ENODATA; 1453 - goto out_unlock; 1454 - } 1455 - 1456 - /* 1457 - * If XATTR_CREATE or no flags are specified together 1458 - * with a new value simply insert it. 1459 - */ 1460 - if (new_xattr) { 1461 - rb_link_node(&new_xattr->rb_node, parent, rbp); 1462 - rb_insert_color(&new_xattr->rb_node, 1463 - &xattrs->rb_root); 1464 - } 1465 - 1466 - /* 1467 - * If XATTR_CREATE or no flags are specified and 1468 - * neither an old or new xattr exist then we don't 1469 - * need to do anything. 1470 - */ 1471 - } 1472 - 1473 - out_unlock: 1474 - write_unlock(&xattrs->lock); 1475 - if (err) 1476 - return ERR_PTR(err); 1461 + /* 1462 + * If XATTR_CREATE or no flags are specified and neither an 1463 + * old or new xattr exist then we don't need to do anything. 1464 + */ 1477 1465 } 1466 + 1478 1467 retain_and_null_ptr(new_xattr); 1479 1468 return old_xattr; 1480 1469 } ··· 1449 1572 char *buffer, size_t size) 1450 1573 { 1451 1574 bool trusted = ns_capable_noaudit(&init_user_ns, CAP_SYS_ADMIN); 1575 + struct rhashtable_iter iter; 1452 1576 struct simple_xattr *xattr; 1453 1577 ssize_t remaining_size = size; 1454 1578 int err = 0; ··· 1473 1595 if (!xattrs) 1474 1596 return size - remaining_size; 1475 1597 1476 - if (xattrs->use_rhashtable) { 1477 - struct rhashtable_iter iter; 1598 + rhashtable_walk_enter(&xattrs->ht, &iter); 1599 + rhashtable_walk_start(&iter); 1478 1600 1479 - rhashtable_walk_enter(&xattrs->ht, &iter); 1480 - rhashtable_walk_start(&iter); 1481 - 1482 - while ((xattr = rhashtable_walk_next(&iter)) != NULL) { 1483 - if (IS_ERR(xattr)) { 1484 - if (PTR_ERR(xattr) == -EAGAIN) 1485 - continue; 1486 - err = PTR_ERR(xattr); 1487 - break; 1488 - } 1489 - 1490 - /* skip "trusted." attributes for unprivileged callers */ 1491 - if (!trusted && xattr_is_trusted(xattr->name)) 1601 + while ((xattr = rhashtable_walk_next(&iter)) != NULL) { 1602 + if (IS_ERR(xattr)) { 1603 + if (PTR_ERR(xattr) == -EAGAIN) 1492 1604 continue; 1493 - 1494 - /* skip MAC labels; these are provided by LSM above */ 1495 - if (xattr_is_maclabel(xattr->name)) 1496 - continue; 1497 - 1498 - err = xattr_list_one(&buffer, &remaining_size, 1499 - xattr->name); 1500 - if (err) 1501 - break; 1605 + err = PTR_ERR(xattr); 1606 + break; 1502 1607 } 1503 1608 1504 - rhashtable_walk_stop(&iter); 1505 - rhashtable_walk_exit(&iter); 1506 - } else { 1507 - struct rb_node *rbp; 1609 + /* skip "trusted." attributes for unprivileged callers */ 1610 + if (!trusted && xattr_is_trusted(xattr->name)) 1611 + continue; 1508 1612 1509 - read_lock(&xattrs->lock); 1510 - for (rbp = rb_first(&xattrs->rb_root); rbp; 1511 - rbp = rb_next(rbp)) { 1512 - xattr = rb_entry(rbp, struct simple_xattr, rb_node); 1613 + /* skip MAC labels; these are provided by LSM above */ 1614 + if (xattr_is_maclabel(xattr->name)) 1615 + continue; 1513 1616 1514 - /* skip "trusted." attributes for unprivileged callers */ 1515 - if (!trusted && xattr_is_trusted(xattr->name)) 1516 - continue; 1517 - 1518 - /* skip MAC labels; these are provided by LSM above */ 1519 - if (xattr_is_maclabel(xattr->name)) 1520 - continue; 1521 - 1522 - err = xattr_list_one(&buffer, &remaining_size, 1523 - xattr->name); 1524 - if (err) 1525 - break; 1526 - } 1527 - read_unlock(&xattrs->lock); 1617 + err = xattr_list_one(&buffer, &remaining_size, xattr->name); 1618 + if (err) 1619 + break; 1528 1620 } 1529 1621 1530 - return err ? err : size - remaining_size; 1531 - } 1622 + rhashtable_walk_stop(&iter); 1623 + rhashtable_walk_exit(&iter); 1532 1624 1533 - /** 1534 - * rbtree_simple_xattr_less - compare two xattr rbtree nodes 1535 - * @new_node: new node 1536 - * @node: current node 1537 - * 1538 - * Compare the xattr attached to @new_node with the xattr attached to @node. 1539 - * Note that this function technically tolerates duplicate entries. 1540 - * 1541 - * Return: True if insertion point in the rbtree is found. 1542 - */ 1543 - static bool rbtree_simple_xattr_less(struct rb_node *new_node, 1544 - const struct rb_node *node) 1545 - { 1546 - return rbtree_simple_xattr_node_cmp(new_node, node) < 0; 1625 + return err ? err : size - remaining_size; 1547 1626 } 1548 1627 1549 1628 /** ··· 1511 1676 * Add an xattr object to @xattrs. This assumes no replacement or removal 1512 1677 * of matching xattrs is wanted. Should only be called during inode 1513 1678 * initialization when a few distinct initial xattrs are supposed to be set. 1679 + * 1680 + * Return: On success zero is returned. On failure a negative error code is 1681 + * returned. 1514 1682 */ 1515 1683 int simple_xattr_add(struct simple_xattrs *xattrs, 1516 1684 struct simple_xattr *new_xattr) 1517 1685 { 1518 - if (xattrs->use_rhashtable) 1519 - return rhashtable_insert_fast(&xattrs->ht, 1520 - &new_xattr->hash_node, 1521 - simple_xattr_params); 1522 - 1523 - write_lock(&xattrs->lock); 1524 - rb_add(&new_xattr->rb_node, &xattrs->rb_root, 1525 - rbtree_simple_xattr_less); 1526 - write_unlock(&xattrs->lock); 1527 - return 0; 1686 + return rhashtable_insert_fast(&xattrs->ht, &new_xattr->hash_node, 1687 + simple_xattr_params); 1528 1688 } 1529 1689 1530 1690 /** 1531 1691 * simple_xattrs_init - initialize new xattr header 1532 1692 * @xattrs: header to initialize 1533 1693 * 1534 - * Initialize relevant fields of a an xattr header. 1694 + * Initialize the rhashtable used to store xattr objects. 1695 + * 1696 + * Return: On success zero is returned. On failure a negative error code is 1697 + * returned. 1535 1698 */ 1536 - void simple_xattrs_init(struct simple_xattrs *xattrs) 1699 + int simple_xattrs_init(struct simple_xattrs *xattrs) 1537 1700 { 1538 - xattrs->use_rhashtable = false; 1539 - xattrs->rb_root = RB_ROOT; 1540 - rwlock_init(&xattrs->lock); 1701 + return rhashtable_init(&xattrs->ht, &simple_xattr_params); 1541 1702 } 1542 1703 1543 1704 /** ··· 1541 1710 * 1542 1711 * Dynamically allocate a simple_xattrs header and initialize the 1543 1712 * underlying rhashtable. This is intended for consumers that want 1544 - * rhashtable-based xattr storage. 1713 + * to lazily allocate xattr storage only when the first xattr is set, 1714 + * avoiding the per-inode rhashtable overhead when no xattrs are used. 1545 1715 * 1546 1716 * Return: On success a new simple_xattrs is returned. On failure an 1547 1717 * ERR_PTR is returned. ··· 1550 1718 struct simple_xattrs *simple_xattrs_alloc(void) 1551 1719 { 1552 1720 struct simple_xattrs *xattrs __free(kfree) = NULL; 1721 + int ret; 1553 1722 1554 1723 xattrs = kzalloc(sizeof(*xattrs), GFP_KERNEL); 1555 1724 if (!xattrs) 1556 1725 return ERR_PTR(-ENOMEM); 1557 1726 1558 - xattrs->use_rhashtable = true; 1559 - if (rhashtable_init(&xattrs->ht, &simple_xattr_params)) 1560 - return ERR_PTR(-ENOMEM); 1727 + ret = simple_xattrs_init(xattrs); 1728 + if (ret) 1729 + return ERR_PTR(ret); 1561 1730 1562 1731 return no_free_ptr(xattrs); 1563 1732 } ··· 1617 1784 */ 1618 1785 void simple_xattrs_free(struct simple_xattrs *xattrs, size_t *freed_space) 1619 1786 { 1787 + might_sleep(); 1788 + 1620 1789 if (freed_space) 1621 1790 *freed_space = 0; 1622 - 1623 - if (xattrs->use_rhashtable) { 1624 - rhashtable_free_and_destroy(&xattrs->ht, 1625 - simple_xattr_ht_free, freed_space); 1626 - } else { 1627 - struct rb_node *rbp; 1628 - 1629 - rbp = rb_first(&xattrs->rb_root); 1630 - while (rbp) { 1631 - struct simple_xattr *xattr; 1632 - struct rb_node *rbp_next; 1633 - 1634 - rbp_next = rb_next(rbp); 1635 - xattr = rb_entry(rbp, struct simple_xattr, rb_node); 1636 - rb_erase(&xattr->rb_node, &xattrs->rb_root); 1637 - if (freed_space) 1638 - *freed_space += simple_xattr_space(xattr->name, 1639 - xattr->size); 1640 - simple_xattr_free(xattr); 1641 - rbp = rbp_next; 1642 - } 1643 - } 1791 + rhashtable_free_and_destroy(&xattrs->ht, simple_xattr_ht_free, 1792 + freed_space); 1644 1793 }
+2 -10
include/linux/xattr.h
··· 107 107 } 108 108 109 109 struct simple_xattrs { 110 - bool use_rhashtable; 111 - union { 112 - struct { 113 - struct rb_root rb_root; 114 - rwlock_t lock; 115 - }; 116 - struct rhashtable ht; 117 - }; 110 + struct rhashtable ht; 118 111 }; 119 112 120 113 struct simple_xattr { 121 - struct rb_node rb_node; 122 114 struct rhash_head hash_node; 123 115 struct rcu_head rcu; 124 116 char *name; ··· 118 126 char value[] __counted_by(size); 119 127 }; 120 128 121 - void simple_xattrs_init(struct simple_xattrs *xattrs); 129 + int simple_xattrs_init(struct simple_xattrs *xattrs); 122 130 struct simple_xattrs *simple_xattrs_alloc(void); 123 131 struct simple_xattrs *simple_xattrs_lazy_alloc(struct simple_xattrs **xattrsp, 124 132 const void *value, int flags);