Merge branch 'for-3.13-fixes' of git://git.kernel.org/pub/scm/linux/kernel/git/tj/cgroup

Pull cgroup fixes from Tejun Heo:
"Fixes for three issues.

- cgroup destruction path could swamp system_wq possibly leading to
deadlock. This actually seems to happen in the wild with memcg
because memcg destruction path adds nested dependency on system_wq.

Resolved by isolating cgroup destruction work items on its
dedicated workqueue.

- Possible locking context deadlock through seqcount reported by
lockdep

- Memory leak under certain conditions"

* 'for-3.13-fixes' of git://git.kernel.org/pub/scm/linux/kernel/git/tj/cgroup:
cgroup: fix cgroup_subsys_state leak for seq_files
cpuset: Fix memory allocator deadlock
cgroup: use a dedicated workqueue for cgroup destruction

Linus Torvalds 12 years ago 2855987d b8495995

+37 -6

2 changed files

expand all

kernel

cgroup.c

cpuset.c

+31 -4

kernel/cgroup.c

··· 90 90 static DEFINE_MUTEX(cgroup_root_mutex); 91 91 92 92 /* 93 + * cgroup destruction makes heavy use of work items and there can be a lot 94 + * of concurrent destructions. Use a separate workqueue so that cgroup 95 + * destruction work items don't end up filling up max_active of system_wq 96 + * which may lead to deadlock. 97 + */ 98 + static struct workqueue_struct *cgroup_destroy_wq; 99 + 100 + /* 93 101 * Generate an array of cgroup subsystem pointers. At boot time, this is 94 102 * populated with the built in subsystems, and modular subsystems are 95 103 * registered after that. The mutable section of this array is protected by ··· 199 191 static int cgroup_destroy_locked(struct cgroup *cgrp); 200 192 static int cgroup_addrm_files(struct cgroup *cgrp, struct cftype cfts[], 201 193 bool is_add); 194 + static int cgroup_file_release(struct inode *inode, struct file *file); 202 195 203 196 /** 204 197 * cgroup_css - obtain a cgroup's css for the specified subsystem ··· 880 871 struct cgroup *cgrp = container_of(head, struct cgroup, rcu_head); 881 872 882 873 INIT_WORK(&cgrp->destroy_work, cgroup_free_fn); 883 - schedule_work(&cgrp->destroy_work); 874 + queue_work(cgroup_destroy_wq, &cgrp->destroy_work); 884 875 } 885 876 886 877 static void cgroup_diput(struct dentry *dentry, struct inode *inode) ··· 2430 2421 .read = seq_read, 2431 2422 .write = cgroup_file_write, 2432 2423 .llseek = seq_lseek, 2433 - .release = single_release, 2424 + .release = cgroup_file_release, 2434 2425 }; 2435 2426 2436 2427 static int cgroup_file_open(struct inode *inode, struct file *file) ··· 2491 2482 ret = cft->release(inode, file); 2492 2483 if (css->ss) 2493 2484 css_put(css); 2485 + if (file->f_op == &cgroup_seqfile_operations) 2486 + single_release(inode, file); 2494 2487 return ret; 2495 2488 } 2496 2489 ··· 4260 4249 * css_put(). dput() requires process context which we don't have. 4261 4250 */ 4262 4251 INIT_WORK(&css->destroy_work, css_free_work_fn); 4263 - schedule_work(&css->destroy_work); 4252 + queue_work(cgroup_destroy_wq, &css->destroy_work); 4264 4253 } 4265 4254 4266 4255 static void css_release(struct percpu_ref *ref) ··· 4550 4539 container_of(ref, struct cgroup_subsys_state, refcnt); 4551 4540 4552 4541 INIT_WORK(&css->destroy_work, css_killed_work_fn); 4553 - schedule_work(&css->destroy_work); 4542 + queue_work(cgroup_destroy_wq, &css->destroy_work); 4554 4543 } 4555 4544 4556 4545 /** ··· 5073 5062 5074 5063 return err; 5075 5064 } 5065 + 5066 + static int __init cgroup_wq_init(void) 5067 + { 5068 + /* 5069 + * There isn't much point in executing destruction path in 5070 + * parallel. Good chunk is serialized with cgroup_mutex anyway. 5071 + * Use 1 for @max_active. 5072 + * 5073 + * We would prefer to do this in cgroup_init() above, but that 5074 + * is called before init_workqueues(): so leave this until after. 5075 + */ 5076 + cgroup_destroy_wq = alloc_workqueue("cgroup_destroy", 0, 1); 5077 + BUG_ON(!cgroup_destroy_wq); 5078 + return 0; 5079 + } 5080 + core_initcall(cgroup_wq_init); 5076 5081 5077 5082 /* 5078 5083 * proc_cgroup_show()

+6 -2

kernel/cpuset.c

··· 1033 1033 need_loop = task_has_mempolicy(tsk) || 1034 1034 !nodes_intersects(*newmems, tsk->mems_allowed); 1035 1035 1036 - if (need_loop) 1036 + if (need_loop) { 1037 + local_irq_disable(); 1037 1038 write_seqcount_begin(&tsk->mems_allowed_seq); 1039 + } 1038 1040 1039 1041 nodes_or(tsk->mems_allowed, tsk->mems_allowed, *newmems); 1040 1042 mpol_rebind_task(tsk, newmems, MPOL_REBIND_STEP1); ··· 1044 1042 mpol_rebind_task(tsk, newmems, MPOL_REBIND_STEP2); 1045 1043 tsk->mems_allowed = *newmems; 1046 1044 1047 - if (need_loop) 1045 + if (need_loop) { 1048 1046 write_seqcount_end(&tsk->mems_allowed_seq); 1047 + local_irq_enable(); 1048 + } 1049 1049 1050 1050 task_unlock(tsk); 1051 1051 }

Configure Feed

Configure Feed