Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux
1
fork

Configure Feed

Select the types of activity you want to include in your feed.

Merge tag 'cgroup-for-6.17-rc6-fixes' of git://git.kernel.org/pub/scm/linux/kernel/git/tj/cgroup

Pull cgroup fixes from Tejun Heo:
"This contains two cgroup changes. Both are pretty low risk.

- Fix deadlock in cgroup destruction when repeatedly
mounting/unmounting perf_event and net_prio controllers.

The issue occurs because cgroup_destroy_wq has max_active=1, causing
root destruction to wait for CSS offline operations that are queued
behind it.

The fix splits cgroup_destroy_wq into three separate workqueues to
eliminate the blocking.

- Set of->priv to NULL upon file release to make potential bugs to
manifest as NULL pointer dereferences rather than use-after-free
errors"

* tag 'cgroup-for-6.17-rc6-fixes' of git://git.kernel.org/pub/scm/linux/kernel/git/tj/cgroup:
cgroup/psi: Set of->priv to NULL upon file release
cgroup: split cgroup_destroy_wq into 3 workqueues

+37 -7
+37 -7
kernel/cgroup/cgroup.c
··· 126 126 * of concurrent destructions. Use a separate workqueue so that cgroup 127 127 * destruction work items don't end up filling up max_active of system_wq 128 128 * which may lead to deadlock. 129 + * 130 + * A cgroup destruction should enqueue work sequentially to: 131 + * cgroup_offline_wq: use for css offline work 132 + * cgroup_release_wq: use for css release work 133 + * cgroup_free_wq: use for free work 134 + * 135 + * Rationale for using separate workqueues: 136 + * The cgroup root free work may depend on completion of other css offline 137 + * operations. If all tasks were enqueued to a single workqueue, this could 138 + * create a deadlock scenario where: 139 + * - Free work waits for other css offline work to complete. 140 + * - But other css offline work is queued after free work in the same queue. 141 + * 142 + * Example deadlock scenario with single workqueue (cgroup_destroy_wq): 143 + * 1. umount net_prio 144 + * 2. net_prio root destruction enqueues work to cgroup_destroy_wq (CPUx) 145 + * 3. perf_event CSS A offline enqueues work to same cgroup_destroy_wq (CPUx) 146 + * 4. net_prio cgroup_destroy_root->cgroup_lock_and_drain_offline. 147 + * 5. net_prio root destruction blocks waiting for perf_event CSS A offline, 148 + * which can never complete as it's behind in the same queue and 149 + * workqueue's max_active is 1. 129 150 */ 130 - static struct workqueue_struct *cgroup_destroy_wq; 151 + static struct workqueue_struct *cgroup_offline_wq; 152 + static struct workqueue_struct *cgroup_release_wq; 153 + static struct workqueue_struct *cgroup_free_wq; 131 154 132 155 /* generate an array of cgroup subsystem pointers */ 133 156 #define SUBSYS(_x) [_x ## _cgrp_id] = &_x ## _cgrp_subsys, ··· 4182 4159 cft->release(of); 4183 4160 put_cgroup_ns(ctx->ns); 4184 4161 kfree(ctx); 4162 + of->priv = NULL; 4185 4163 } 4186 4164 4187 4165 static ssize_t cgroup_file_write(struct kernfs_open_file *of, char *buf, ··· 5582 5558 cgroup_unlock(); 5583 5559 5584 5560 INIT_RCU_WORK(&css->destroy_rwork, css_free_rwork_fn); 5585 - queue_rcu_work(cgroup_destroy_wq, &css->destroy_rwork); 5561 + queue_rcu_work(cgroup_free_wq, &css->destroy_rwork); 5586 5562 } 5587 5563 5588 5564 static void css_release(struct percpu_ref *ref) ··· 5591 5567 container_of(ref, struct cgroup_subsys_state, refcnt); 5592 5568 5593 5569 INIT_WORK(&css->destroy_work, css_release_work_fn); 5594 - queue_work(cgroup_destroy_wq, &css->destroy_work); 5570 + queue_work(cgroup_release_wq, &css->destroy_work); 5595 5571 } 5596 5572 5597 5573 static void init_and_link_css(struct cgroup_subsys_state *css, ··· 5725 5701 list_del_rcu(&css->sibling); 5726 5702 err_free_css: 5727 5703 INIT_RCU_WORK(&css->destroy_rwork, css_free_rwork_fn); 5728 - queue_rcu_work(cgroup_destroy_wq, &css->destroy_rwork); 5704 + queue_rcu_work(cgroup_free_wq, &css->destroy_rwork); 5729 5705 return ERR_PTR(err); 5730 5706 } 5731 5707 ··· 5963 5939 5964 5940 if (atomic_dec_and_test(&css->online_cnt)) { 5965 5941 INIT_WORK(&css->destroy_work, css_killed_work_fn); 5966 - queue_work(cgroup_destroy_wq, &css->destroy_work); 5942 + queue_work(cgroup_offline_wq, &css->destroy_work); 5967 5943 } 5968 5944 } 5969 5945 ··· 6349 6325 * We would prefer to do this in cgroup_init() above, but that 6350 6326 * is called before init_workqueues(): so leave this until after. 6351 6327 */ 6352 - cgroup_destroy_wq = alloc_workqueue("cgroup_destroy", 0, 1); 6353 - BUG_ON(!cgroup_destroy_wq); 6328 + cgroup_offline_wq = alloc_workqueue("cgroup_offline", 0, 1); 6329 + BUG_ON(!cgroup_offline_wq); 6330 + 6331 + cgroup_release_wq = alloc_workqueue("cgroup_release", 0, 1); 6332 + BUG_ON(!cgroup_release_wq); 6333 + 6334 + cgroup_free_wq = alloc_workqueue("cgroup_free", 0, 1); 6335 + BUG_ON(!cgroup_free_wq); 6354 6336 return 0; 6355 6337 } 6356 6338 core_initcall(cgroup_wq_init);