Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux
1
fork

Configure Feed

Select the types of activity you want to include in your feed.

mm/mempolicy: support memory hotplug in weighted interleave

The weighted interleave policy distributes page allocations across
multiple NUMA nodes based on their performance weight, thereby improving
memory bandwidth utilization. The weight values for each node are
configured through sysfs.

Previously, sysfs entries for configuring weighted interleave were created
for all possible nodes (N_POSSIBLE) at initialization, including nodes
that might not have memory. However, not all nodes in N_POSSIBLE are
usable at runtime, as some may remain memoryless or offline. This led to
sysfs entries being created for unusable nodes, causing potential
misconfiguration issues.

To address this issue, this patch modifies the sysfs creation logic to:
1) Limit sysfs entries to nodes that are online and have memory, avoiding
the creation of sysfs entries for nodes that cannot be used.
2) Support memory hotplug by dynamically adding and removing sysfs entries
based on whether a node transitions into or out of the N_MEMORY state.

Additionally, the patch ensures that sysfs attributes are properly managed
when nodes go offline, preventing stale or redundant entries from
persisting in the system.

By making these changes, the weighted interleave policy now manages its
sysfs entries more efficiently, ensuring that only relevant nodes are
considered for interleaving, and dynamically adapting to memory hotplug
events.

[dan.carpenter@linaro.org: fix error code in sysfs_wi_node_add()]
Link: https://lkml.kernel.org/r/aBjL7Bwc0QBzgajK@stanley.mountain
Link: https://lkml.kernel.org/r/20250417072839.711-4-rakie.kim@sk.com
Co-developed-by: Honggyu Kim <honggyu.kim@sk.com>
Signed-off-by: Honggyu Kim <honggyu.kim@sk.com>
Co-developed-by: Yunjeong Mun <yunjeong.mun@sk.com>
Signed-off-by: Yunjeong Mun <yunjeong.mun@sk.com>
Signed-off-by: Rakie Kim <rakie.kim@sk.com>
Signed-off-by: Dan Carpenter <dan.carpenter@linaro.org>
Reviewed-by: Oscar Salvador <osalvador@suse.de>
Reviewed-by: Joshua Hahn <joshua.hahnjy@gmail.com>
Reviewed-by: Gregory Price <gourry@gourry.net>
Reviewed-by: Dan Williams <dan.j.williams@intel.com>
Acked-by: David Hildenbrand <david@redhat.com>
Cc: "Huang, Ying" <ying.huang@linux.alibaba.com>
Cc: Jonathan Cameron <Jonathan.Cameron@huawei.com>
Cc: Dan Carpenter <dan.carpenter@linaro.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>

authored by

Rakie Kim and committed by
Andrew Morton
dec92bf9 cf8cecf2

+84 -23
+84 -23
mm/mempolicy.c
··· 113 113 #include <asm/tlbflush.h> 114 114 #include <asm/tlb.h> 115 115 #include <linux/uaccess.h> 116 + #include <linux/memory.h> 116 117 117 118 #include "internal.h" 118 119 ··· 3430 3429 3431 3430 struct sysfs_wi_group { 3432 3431 struct kobject wi_kobj; 3432 + struct mutex kobj_lock; 3433 3433 struct iw_node_attr *nattrs[]; 3434 3434 }; 3435 3435 ··· 3480 3478 3481 3479 static void sysfs_wi_node_delete(int nid) 3482 3480 { 3483 - if (!wi_group->nattrs[nid]) 3481 + struct iw_node_attr *attr; 3482 + 3483 + if (nid < 0 || nid >= nr_node_ids) 3484 3484 return; 3485 3485 3486 - sysfs_remove_file(&wi_group->wi_kobj, 3487 - &wi_group->nattrs[nid]->kobj_attr.attr); 3488 - kfree(wi_group->nattrs[nid]->kobj_attr.attr.name); 3489 - kfree(wi_group->nattrs[nid]); 3486 + mutex_lock(&wi_group->kobj_lock); 3487 + attr = wi_group->nattrs[nid]; 3488 + if (!attr) { 3489 + mutex_unlock(&wi_group->kobj_lock); 3490 + return; 3491 + } 3492 + 3493 + wi_group->nattrs[nid] = NULL; 3494 + mutex_unlock(&wi_group->kobj_lock); 3495 + 3496 + sysfs_remove_file(&wi_group->wi_kobj, &attr->kobj_attr.attr); 3497 + kfree(attr->kobj_attr.attr.name); 3498 + kfree(attr); 3490 3499 } 3491 3500 3492 3501 static void sysfs_wi_node_delete_all(void) ··· 3539 3526 3540 3527 static int sysfs_wi_node_add(int nid) 3541 3528 { 3542 - struct iw_node_attr *node_attr; 3529 + int ret; 3543 3530 char *name; 3531 + struct iw_node_attr *new_attr; 3544 3532 3545 - node_attr = kzalloc(sizeof(*node_attr), GFP_KERNEL); 3546 - if (!node_attr) 3533 + if (nid < 0 || nid >= nr_node_ids) { 3534 + pr_err("invalid node id: %d\n", nid); 3535 + return -EINVAL; 3536 + } 3537 + 3538 + new_attr = kzalloc(sizeof(*new_attr), GFP_KERNEL); 3539 + if (!new_attr) 3547 3540 return -ENOMEM; 3548 3541 3549 3542 name = kasprintf(GFP_KERNEL, "node%d", nid); 3550 3543 if (!name) { 3551 - kfree(node_attr); 3544 + kfree(new_attr); 3552 3545 return -ENOMEM; 3553 3546 } 3554 3547 3555 - sysfs_attr_init(&node_attr->kobj_attr.attr); 3556 - node_attr->kobj_attr.attr.name = name; 3557 - node_attr->kobj_attr.attr.mode = 0644; 3558 - node_attr->kobj_attr.show = node_show; 3559 - node_attr->kobj_attr.store = node_store; 3560 - node_attr->nid = nid; 3548 + sysfs_attr_init(&new_attr->kobj_attr.attr); 3549 + new_attr->kobj_attr.attr.name = name; 3550 + new_attr->kobj_attr.attr.mode = 0644; 3551 + new_attr->kobj_attr.show = node_show; 3552 + new_attr->kobj_attr.store = node_store; 3553 + new_attr->nid = nid; 3561 3554 3562 - if (sysfs_create_file(&wi_group->wi_kobj, &node_attr->kobj_attr.attr)) { 3563 - kfree(node_attr->kobj_attr.attr.name); 3564 - kfree(node_attr); 3565 - pr_err("failed to add attribute to weighted_interleave\n"); 3566 - return -ENOMEM; 3555 + mutex_lock(&wi_group->kobj_lock); 3556 + if (wi_group->nattrs[nid]) { 3557 + mutex_unlock(&wi_group->kobj_lock); 3558 + ret = -EEXIST; 3559 + goto out; 3567 3560 } 3568 3561 3569 - wi_group->nattrs[nid] = node_attr; 3562 + ret = sysfs_create_file(&wi_group->wi_kobj, &new_attr->kobj_attr.attr); 3563 + if (ret) { 3564 + mutex_unlock(&wi_group->kobj_lock); 3565 + goto out; 3566 + } 3567 + wi_group->nattrs[nid] = new_attr; 3568 + mutex_unlock(&wi_group->kobj_lock); 3570 3569 return 0; 3570 + 3571 + out: 3572 + kfree(new_attr->kobj_attr.attr.name); 3573 + kfree(new_attr); 3574 + return ret; 3575 + } 3576 + 3577 + static int wi_node_notifier(struct notifier_block *nb, 3578 + unsigned long action, void *data) 3579 + { 3580 + int err; 3581 + struct memory_notify *arg = data; 3582 + int nid = arg->status_change_nid; 3583 + 3584 + if (nid < 0) 3585 + return NOTIFY_OK; 3586 + 3587 + switch (action) { 3588 + case MEM_ONLINE: 3589 + err = sysfs_wi_node_add(nid); 3590 + if (err) 3591 + pr_err("failed to add sysfs for node%d during hotplug: %d\n", 3592 + nid, err); 3593 + break; 3594 + case MEM_OFFLINE: 3595 + sysfs_wi_node_delete(nid); 3596 + break; 3597 + } 3598 + 3599 + return NOTIFY_OK; 3571 3600 } 3572 3601 3573 3602 static int __init add_weighted_interleave_group(struct kobject *mempolicy_kobj) ··· 3620 3565 GFP_KERNEL); 3621 3566 if (!wi_group) 3622 3567 return -ENOMEM; 3568 + mutex_init(&wi_group->kobj_lock); 3623 3569 3624 3570 err = kobject_init_and_add(&wi_group->wi_kobj, &wi_ktype, mempolicy_kobj, 3625 3571 "weighted_interleave"); 3626 3572 if (err) 3627 3573 goto err_put_kobj; 3628 3574 3629 - for_each_node_state(nid, N_POSSIBLE) { 3575 + for_each_online_node(nid) { 3576 + if (!node_state(nid, N_MEMORY)) 3577 + continue; 3578 + 3630 3579 err = sysfs_wi_node_add(nid); 3631 3580 if (err) { 3632 - pr_err("failed to add sysfs [node%d]\n", nid); 3581 + pr_err("failed to add sysfs for node%d during init: %d\n", 3582 + nid, err); 3633 3583 goto err_cleanup_kobj; 3634 3584 } 3635 3585 } 3636 3586 3587 + hotplug_memory_notifier(wi_node_notifier, DEFAULT_CALLBACK_PRI); 3637 3588 return 0; 3638 3589 3639 3590 err_cleanup_kobj: