Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux
1
fork

Configure Feed

Select the types of activity you want to include in your feed.

Merge tag 'cgroup-for-7.0-rc3-fixes' of git://git.kernel.org/pub/scm/linux/kernel/git/tj/cgroup

Pull cgroup fixes from Tejun Heo:

- Hide PF_EXITING tasks from cgroup.procs to avoid exposing dead tasks
that haven't been removed yet, fixing a systemd timeout issue on
PREEMPT_RT

- Call rebuild_sched_domains() directly in CPU hotplug instead of
deferring to a workqueue, fixing a race where online/offline CPUs
could briefly appear in stale sched domains

* tag 'cgroup-for-7.0-rc3-fixes' of git://git.kernel.org/pub/scm/linux/kernel/git/tj/cgroup:
cgroup: Don't expose dead tasks in cgroup
cgroup/cpuset: Call rebuild_sched_domains() directly in hotplug

+37 -28
+6
kernel/cgroup/cgroup.c
··· 5109 5109 return; 5110 5110 5111 5111 task = list_entry(it->task_pos, struct task_struct, cg_list); 5112 + /* 5113 + * Hide tasks that are exiting but not yet removed. Keep zombie 5114 + * leaders with live threads visible. 5115 + */ 5116 + if ((task->flags & PF_EXITING) && !atomic_read(&task->signal->live)) 5117 + goto repeat; 5112 5118 5113 5119 if (it->flags & CSS_TASK_ITER_PROCS) { 5114 5120 /* if PROCS, skip over tasks which aren't group leaders */
+31 -28
kernel/cgroup/cpuset.c
··· 879 879 /* 880 880 * Cgroup v2 doesn't support domain attributes, just set all of them 881 881 * to SD_ATTR_INIT. Also non-isolating partition root CPUs are a 882 - * subset of HK_TYPE_DOMAIN housekeeping CPUs. 882 + * subset of HK_TYPE_DOMAIN_BOOT housekeeping CPUs. 883 883 */ 884 884 for (i = 0; i < ndoms; i++) { 885 885 /* ··· 888 888 */ 889 889 if (!csa || csa[i] == &top_cpuset) 890 890 cpumask_and(doms[i], top_cpuset.effective_cpus, 891 - housekeeping_cpumask(HK_TYPE_DOMAIN)); 891 + housekeeping_cpumask(HK_TYPE_DOMAIN_BOOT)); 892 892 else 893 893 cpumask_copy(doms[i], csa[i]->effective_cpus); 894 894 if (dattr) ··· 1329 1329 } 1330 1330 1331 1331 /* 1332 - * update_hk_sched_domains - Update HK cpumasks & rebuild sched domains 1332 + * cpuset_update_sd_hk_unlock - Rebuild sched domains, update HK & unlock 1333 1333 * 1334 - * Update housekeeping cpumasks and rebuild sched domains if necessary. 1335 - * This should be called at the end of cpuset or hotplug actions. 1334 + * Update housekeeping cpumasks and rebuild sched domains if necessary and 1335 + * then do a cpuset_full_unlock(). 1336 + * This should be called at the end of cpuset operation. 1336 1337 */ 1337 - static void update_hk_sched_domains(void) 1338 + static void cpuset_update_sd_hk_unlock(void) 1339 + __releases(&cpuset_mutex) 1340 + __releases(&cpuset_top_mutex) 1338 1341 { 1342 + /* force_sd_rebuild will be cleared in rebuild_sched_domains_locked() */ 1343 + if (force_sd_rebuild) 1344 + rebuild_sched_domains_locked(); 1345 + 1339 1346 if (update_housekeeping) { 1340 - /* Updating HK cpumasks implies rebuild sched domains */ 1341 1347 update_housekeeping = false; 1342 - force_sd_rebuild = true; 1343 1348 cpumask_copy(isolated_hk_cpus, isolated_cpus); 1344 1349 1345 1350 /* ··· 1355 1350 mutex_unlock(&cpuset_mutex); 1356 1351 cpus_read_unlock(); 1357 1352 WARN_ON_ONCE(housekeeping_update(isolated_hk_cpus)); 1358 - cpus_read_lock(); 1359 - mutex_lock(&cpuset_mutex); 1353 + mutex_unlock(&cpuset_top_mutex); 1354 + } else { 1355 + cpuset_full_unlock(); 1360 1356 } 1361 - /* force_sd_rebuild will be cleared in rebuild_sched_domains_locked() */ 1362 - if (force_sd_rebuild) 1363 - rebuild_sched_domains_locked(); 1364 1357 } 1365 1358 1366 1359 /* 1367 - * Work function to invoke update_hk_sched_domains() 1360 + * Work function to invoke cpuset_update_sd_hk_unlock() 1368 1361 */ 1369 1362 static void hk_sd_workfn(struct work_struct *work) 1370 1363 { 1371 1364 cpuset_full_lock(); 1372 - update_hk_sched_domains(); 1373 - cpuset_full_unlock(); 1365 + cpuset_update_sd_hk_unlock(); 1374 1366 } 1375 1367 1376 1368 /** ··· 3232 3230 3233 3231 free_cpuset(trialcs); 3234 3232 out_unlock: 3235 - update_hk_sched_domains(); 3236 - cpuset_full_unlock(); 3233 + cpuset_update_sd_hk_unlock(); 3237 3234 if (of_cft(of)->private == FILE_MEMLIST) 3238 3235 schedule_flush_migrate_mm(); 3239 3236 return retval ?: nbytes; ··· 3339 3338 cpuset_full_lock(); 3340 3339 if (is_cpuset_online(cs)) 3341 3340 retval = update_prstate(cs, val); 3342 - update_hk_sched_domains(); 3343 - cpuset_full_unlock(); 3341 + cpuset_update_sd_hk_unlock(); 3344 3342 return retval ?: nbytes; 3345 3343 } 3346 3344 ··· 3513 3513 /* Reset valid partition back to member */ 3514 3514 if (is_partition_valid(cs)) 3515 3515 update_prstate(cs, PRS_MEMBER); 3516 - update_hk_sched_domains(); 3517 - cpuset_full_unlock(); 3516 + cpuset_update_sd_hk_unlock(); 3518 3517 } 3519 3518 3520 3519 static void cpuset_css_free(struct cgroup_subsys_state *css) ··· 3922 3923 rcu_read_unlock(); 3923 3924 } 3924 3925 3925 - 3926 3926 /* 3927 - * Queue a work to call housekeeping_update() & rebuild_sched_domains() 3928 - * There will be a slight delay before the HK_TYPE_DOMAIN housekeeping 3929 - * cpumask can correctly reflect what is in isolated_cpus. 3927 + * rebuild_sched_domains() will always be called directly if needed 3928 + * to make sure that newly added or removed CPU will be reflected in 3929 + * the sched domains. However, if isolated partition invalidation 3930 + * or recreation is being done (update_housekeeping set), a work item 3931 + * will be queued to call housekeeping_update() to update the 3932 + * corresponding housekeeping cpumasks after some slight delay. 3930 3933 * 3931 3934 * We rely on WORK_STRUCT_PENDING_BIT to not requeue a work item that 3932 3935 * is still pending. Before the pending bit is cleared, the work data ··· 3937 3936 * previously queued work. Since hk_sd_workfn() doesn't use the work 3938 3937 * item at all, this is not a problem. 3939 3938 */ 3940 - if (update_housekeeping || force_sd_rebuild) 3941 - queue_work(system_unbound_wq, &hk_sd_work); 3939 + if (force_sd_rebuild) 3940 + rebuild_sched_domains_cpuslocked(); 3941 + if (update_housekeeping) 3942 + queue_work(system_dfl_wq, &hk_sd_work); 3942 3943 3943 3944 free_tmpmasks(ptmp); 3944 3945 }