Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux
1
fork

Configure Feed

Select the types of activity you want to include in your feed.

cpuset: Update HK_TYPE_DOMAIN cpumask from cpuset

Until now, HK_TYPE_DOMAIN used to only include boot defined isolated
CPUs passed through isolcpus= boot option. Users interested in also
knowing the runtime defined isolated CPUs through cpuset must use
different APIs: cpuset_cpu_is_isolated(), cpu_is_isolated(), etc...

There are many drawbacks to that approach:

1) Most interested subsystems want to know about all isolated CPUs, not
just those defined on boot time.

2) cpuset_cpu_is_isolated() / cpu_is_isolated() are not synchronized with
concurrent cpuset changes.

3) Further cpuset modifications are not propagated to subsystems

Solve 1) and 2) and centralize all isolated CPUs within the
HK_TYPE_DOMAIN housekeeping cpumask.

Subsystems can rely on RCU to synchronize against concurrent changes.

The propagation mentioned in 3) will be handled in further patches.

[Chen Ridong: Fix cpu_hotplug_lock deadlock and use correct static
branch API]

Signed-off-by: Frederic Weisbecker <frederic@kernel.org>
Reviewed-by: Waiman Long <longman@redhat.com>
Reviewed-by: Chen Ridong <chenridong@huawei.com>
Signed-off-by: Chen Ridong <chenridong@huawei.com>
Cc: "Michal Koutný" <mkoutny@suse.com>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: Johannes Weiner <hannes@cmpxchg.org>
Cc: Marco Crivellari <marco.crivellari@suse.com>
Cc: Michal Hocko <mhocko@suse.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Tejun Heo <tj@kernel.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Vlastimil Babka <vbabka@suse.cz>
Cc: Waiman Long <longman@redhat.com>
Cc: cgroups@vger.kernel.org

+80 -8
+7
include/linux/sched/isolation.h
··· 9 9 enum hk_type { 10 10 /* Inverse of boot-time isolcpus= argument */ 11 11 HK_TYPE_DOMAIN_BOOT, 12 + /* 13 + * Same as HK_TYPE_DOMAIN_BOOT but also includes the 14 + * inverse of cpuset isolated partitions. As such it 15 + * is always a subset of HK_TYPE_DOMAIN_BOOT. 16 + */ 12 17 HK_TYPE_DOMAIN, 13 18 /* Inverse of boot-time isolcpus=managed_irq argument */ 14 19 HK_TYPE_MANAGED_IRQ, ··· 40 35 extern bool housekeeping_enabled(enum hk_type type); 41 36 extern void housekeeping_affine(struct task_struct *t, enum hk_type type); 42 37 extern bool housekeeping_test_cpu(int cpu, enum hk_type type); 38 + extern int housekeeping_update(struct cpumask *isol_mask); 43 39 extern void __init housekeeping_init(void); 44 40 45 41 #else ··· 68 62 return true; 69 63 } 70 64 65 + static inline int housekeeping_update(struct cpumask *isol_mask) { return 0; } 71 66 static inline void housekeeping_init(void) { } 72 67 #endif /* CONFIG_CPU_ISOLATION */ 73 68
+3 -2
kernel/cgroup/cpuset.c
··· 1482 1482 if (!isolated_cpus_updating) 1483 1483 return; 1484 1484 1485 - lockdep_assert_cpus_held(); 1486 - 1487 1485 ret = workqueue_unbound_exclude_cpumask(isolated_cpus); 1488 1486 WARN_ON_ONCE(ret < 0); 1489 1487 1490 1488 ret = tmigr_isolated_exclude_cpumask(isolated_cpus); 1489 + WARN_ON_ONCE(ret < 0); 1490 + 1491 + ret = housekeeping_update(isolated_cpus); 1491 1492 WARN_ON_ONCE(ret < 0); 1492 1493 1493 1494 isolated_cpus_updating = false;
+69 -6
kernel/sched/isolation.c
··· 29 29 30 30 bool housekeeping_enabled(enum hk_type type) 31 31 { 32 - return !!(housekeeping.flags & BIT(type)); 32 + return !!(READ_ONCE(housekeeping.flags) & BIT(type)); 33 33 } 34 34 EXPORT_SYMBOL_GPL(housekeeping_enabled); 35 35 36 + static bool housekeeping_dereference_check(enum hk_type type) 37 + { 38 + if (IS_ENABLED(CONFIG_LOCKDEP) && type == HK_TYPE_DOMAIN) { 39 + /* Cpuset isn't even writable yet? */ 40 + if (system_state <= SYSTEM_SCHEDULING) 41 + return true; 42 + 43 + /* CPU hotplug write locked, so cpuset partition can't be overwritten */ 44 + if (IS_ENABLED(CONFIG_HOTPLUG_CPU) && lockdep_is_cpus_write_held()) 45 + return true; 46 + 47 + /* Cpuset lock held, partitions not writable */ 48 + if (IS_ENABLED(CONFIG_CPUSETS) && lockdep_is_cpuset_held()) 49 + return true; 50 + 51 + return false; 52 + } 53 + 54 + return true; 55 + } 56 + 57 + static inline struct cpumask *housekeeping_cpumask_dereference(enum hk_type type) 58 + { 59 + return rcu_dereference_all_check(housekeeping.cpumasks[type], 60 + housekeeping_dereference_check(type)); 61 + } 62 + 36 63 const struct cpumask *housekeeping_cpumask(enum hk_type type) 37 64 { 65 + const struct cpumask *mask = NULL; 66 + 38 67 if (static_branch_unlikely(&housekeeping_overridden)) { 39 - if (housekeeping.flags & BIT(type)) { 40 - return rcu_dereference_check(housekeeping.cpumasks[type], 1); 41 - } 68 + if (READ_ONCE(housekeeping.flags) & BIT(type)) 69 + mask = housekeeping_cpumask_dereference(type); 42 70 } 43 - return cpu_possible_mask; 71 + if (!mask) 72 + mask = cpu_possible_mask; 73 + return mask; 44 74 } 45 75 EXPORT_SYMBOL_GPL(housekeeping_cpumask); 46 76 ··· 110 80 111 81 bool housekeeping_test_cpu(int cpu, enum hk_type type) 112 82 { 113 - if (static_branch_unlikely(&housekeeping_overridden) && housekeeping.flags & BIT(type)) 83 + if (static_branch_unlikely(&housekeeping_overridden) && 84 + READ_ONCE(housekeeping.flags) & BIT(type)) 114 85 return cpumask_test_cpu(cpu, housekeeping_cpumask(type)); 115 86 return true; 116 87 } 117 88 EXPORT_SYMBOL_GPL(housekeeping_test_cpu); 89 + 90 + int housekeeping_update(struct cpumask *isol_mask) 91 + { 92 + struct cpumask *trial, *old = NULL; 93 + 94 + lockdep_assert_cpus_held(); 95 + 96 + trial = kmalloc(cpumask_size(), GFP_KERNEL); 97 + if (!trial) 98 + return -ENOMEM; 99 + 100 + cpumask_andnot(trial, housekeeping_cpumask(HK_TYPE_DOMAIN_BOOT), isol_mask); 101 + if (!cpumask_intersects(trial, cpu_online_mask)) { 102 + kfree(trial); 103 + return -EINVAL; 104 + } 105 + 106 + if (!housekeeping.flags) 107 + static_branch_enable_cpuslocked(&housekeeping_overridden); 108 + 109 + if (housekeeping.flags & HK_FLAG_DOMAIN) 110 + old = housekeeping_cpumask_dereference(HK_TYPE_DOMAIN); 111 + else 112 + WRITE_ONCE(housekeeping.flags, housekeeping.flags | HK_FLAG_DOMAIN); 113 + rcu_assign_pointer(housekeeping.cpumasks[HK_TYPE_DOMAIN], trial); 114 + 115 + synchronize_rcu(); 116 + 117 + kfree(old); 118 + 119 + return 0; 120 + } 118 121 119 122 void __init housekeeping_init(void) 120 123 {
+1
kernel/sched/sched.h
··· 30 30 #include <linux/context_tracking.h> 31 31 #include <linux/cpufreq.h> 32 32 #include <linux/cpumask_api.h> 33 + #include <linux/cpuset.h> 33 34 #include <linux/ctype.h> 34 35 #include <linux/file.h> 35 36 #include <linux/fs_api.h>