Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux
1
fork

Configure Feed

Select the types of activity you want to include in your feed.

Merge tag 'cgroup-for-7.1' of git://git.kernel.org/pub/scm/linux/kernel/git/tj/cgroup

Pull cgroup updates from Tejun Heo:

- cgroup_file_notify() locking converted from a global lock to
per-cgroup_file spinlock with a lockless fast-path when no
notification is needed

- Misc changes including exposing cgroup helpers for sched_ext and
minor fixes

* tag 'cgroup-for-7.1' of git://git.kernel.org/pub/scm/linux/kernel/git/tj/cgroup:
cgroup/rdma: fix swapped arguments in pr_warn() format string
cgroup/dmem: remove region parameter from dmemcg_parse_limit
cgroup: replace global cgroup_file_kn_lock with per-cgroup_file lock
cgroup: add lockless fast-path checks to cgroup_file_notify()
cgroup: reduce cgroup_file_kn_lock hold time in cgroup_file_notify()
cgroup: Expose some cgroup helpers

+96 -93
+1
include/linux/cgroup-defs.h
··· 167 167 struct kernfs_node *kn; 168 168 unsigned long notified_at; 169 169 struct timer_list notify_timer; 170 + spinlock_t lock; 170 171 }; 171 172 172 173 /*
+63 -2
include/linux/cgroup.h
··· 42 42 43 43 #ifdef CONFIG_CGROUPS 44 44 45 + /* 46 + * To avoid confusing the compiler (and generating warnings) with code 47 + * that attempts to access what would be a 0-element array (i.e. sized 48 + * to a potentially empty array when CGROUP_SUBSYS_COUNT == 0), this 49 + * constant expression can be added. 50 + */ 51 + #define CGROUP_HAS_SUBSYS_CONFIG (CGROUP_SUBSYS_COUNT > 0) 52 + 45 53 enum css_task_iter_flags { 46 54 CSS_TASK_ITER_PROCS = (1U << 0), /* walk only threadgroup leaders */ 47 55 CSS_TASK_ITER_THREADED = (1U << 1), /* walk all threaded css_sets in the domain */ ··· 84 76 extern struct file_system_type cgroup_fs_type; 85 77 extern struct cgroup_root cgrp_dfl_root; 86 78 extern struct css_set init_css_set; 79 + extern struct mutex cgroup_mutex; 87 80 extern spinlock_t css_set_lock; 88 81 extern struct blocking_notifier_head cgroup_lifetime_notifier; 89 82 ··· 111 102 */ 112 103 #define cgroup_subsys_on_dfl(ss) \ 113 104 static_branch_likely(&ss ## _on_dfl_key) 105 + 106 + bool cgroup_on_dfl(const struct cgroup *cgrp); 114 107 115 108 bool css_has_online_children(struct cgroup_subsys_state *css); 116 109 struct cgroup_subsys_state *css_from_id(int id, struct cgroup_subsys *ss); ··· 285 274 for ((pos) = css_next_descendant_post(NULL, (css)); (pos); \ 286 275 (pos) = css_next_descendant_post((pos), (css))) 287 276 277 + /* iterate over child cgrps, lock should be held throughout iteration */ 278 + #define cgroup_for_each_live_child(child, cgrp) \ 279 + list_for_each_entry((child), &(cgrp)->self.children, self.sibling) \ 280 + if (({ lockdep_assert_held(&cgroup_mutex); \ 281 + cgroup_is_dead(child); })) \ 282 + ; \ 283 + else 284 + 285 + /* walk live descendants in pre order */ 286 + #define cgroup_for_each_live_descendant_pre(dsct, d_css, cgrp) \ 287 + css_for_each_descendant_pre((d_css), cgroup_css((cgrp), NULL)) \ 288 + if (({ lockdep_assert_held(&cgroup_mutex); \ 289 + (dsct) = (d_css)->cgroup; \ 290 + cgroup_is_dead(dsct); })) \ 291 + ; \ 292 + else 293 + 294 + /* walk live descendants in postorder */ 295 + #define cgroup_for_each_live_descendant_post(dsct, d_css, cgrp) \ 296 + css_for_each_descendant_post((d_css), cgroup_css((cgrp), NULL)) \ 297 + if (({ lockdep_assert_held(&cgroup_mutex); \ 298 + (dsct) = (d_css)->cgroup; \ 299 + cgroup_is_dead(dsct); })) \ 300 + ; \ 301 + else 302 + 288 303 /** 289 304 * cgroup_taskset_for_each - iterate cgroup_taskset 290 305 * @task: the loop cursor ··· 374 337 } 375 338 376 339 /** 340 + * cgroup_css - obtain a cgroup's css for the specified subsystem 341 + * @cgrp: the cgroup of interest 342 + * @ss: the subsystem of interest (%NULL returns @cgrp->self) 343 + * 344 + * Return @cgrp's css (cgroup_subsys_state) associated with @ss. This 345 + * function must be called either under cgroup_mutex or rcu_read_lock() and 346 + * the caller is responsible for pinning the returned css if it wants to 347 + * keep accessing it outside the said locks. This function may return 348 + * %NULL if @cgrp doesn't have @subsys_id enabled. 349 + */ 350 + static inline struct cgroup_subsys_state *cgroup_css(struct cgroup *cgrp, 351 + struct cgroup_subsys *ss) 352 + { 353 + if (CGROUP_HAS_SUBSYS_CONFIG && ss) 354 + return rcu_dereference_check(cgrp->subsys[ss->id], 355 + lockdep_is_held(&cgroup_mutex)); 356 + else 357 + return &cgrp->self; 358 + } 359 + 360 + /** 377 361 * css_is_dying - test whether the specified css is dying 378 362 * @css: target css 379 363 * ··· 430 372 return false; 431 373 } 432 374 375 + static inline bool cgroup_is_dead(const struct cgroup *cgrp) 376 + { 377 + return !(cgrp->self.flags & CSS_ONLINE); 378 + } 379 + 433 380 static inline void cgroup_get(struct cgroup *cgrp) 434 381 { 435 382 css_get(&cgrp->self); ··· 449 386 { 450 387 css_put(&cgrp->self); 451 388 } 452 - 453 - extern struct mutex cgroup_mutex; 454 389 455 390 static inline void cgroup_lock(void) 456 391 {
-6
kernel/cgroup/cgroup-internal.h
··· 184 184 for ((ssid) = 0; (ssid) < CGROUP_SUBSYS_COUNT && \ 185 185 (((ss) = cgroup_subsys[ssid]) || true); (ssid)++) 186 186 187 - static inline bool cgroup_is_dead(const struct cgroup *cgrp) 188 - { 189 - return !(cgrp->self.flags & CSS_ONLINE); 190 - } 191 - 192 187 static inline bool notify_on_release(const struct cgroup *cgrp) 193 188 { 194 189 return test_bit(CGRP_NOTIFY_ON_RELEASE, &cgrp->flags); ··· 217 222 } 218 223 219 224 bool cgroup_ssid_enabled(int ssid); 220 - bool cgroup_on_dfl(const struct cgroup *cgrp); 221 225 222 226 struct cgroup_root *cgroup_root_from_kf(struct kernfs_root *kf_root); 223 227 struct cgroup *task_cgroup_from_root(struct task_struct *task,
+29 -81
kernel/cgroup/cgroup.c
··· 69 69 #define CGROUP_FILE_NOTIFY_MIN_INTV DIV_ROUND_UP(HZ, 100) 70 70 71 71 /* 72 - * To avoid confusing the compiler (and generating warnings) with code 73 - * that attempts to access what would be a 0-element array (i.e. sized 74 - * to a potentially empty array when CGROUP_SUBSYS_COUNT == 0), this 75 - * constant expression can be added. 76 - */ 77 - #define CGROUP_HAS_SUBSYS_CONFIG (CGROUP_SUBSYS_COUNT > 0) 78 - 79 - /* 80 72 * cgroup_mutex is the master lock. Any modification to cgroup or its 81 73 * hierarchy must be performed while holding it. 82 74 * ··· 98 106 * grabbing cgroup_mutex. 99 107 */ 100 108 static DEFINE_SPINLOCK(cgroup_idr_lock); 101 - 102 - /* 103 - * Protects cgroup_file->kn for !self csses. It synchronizes notifications 104 - * against file removal/re-creation across css hiding. 105 - */ 106 - static DEFINE_SPINLOCK(cgroup_file_kn_lock); 107 109 108 110 DEFINE_PERCPU_RWSEM(cgroup_threadgroup_rwsem); 109 111 ··· 496 510 } 497 511 498 512 /** 499 - * cgroup_css - obtain a cgroup's css for the specified subsystem 500 - * @cgrp: the cgroup of interest 501 - * @ss: the subsystem of interest (%NULL returns @cgrp->self) 502 - * 503 - * Return @cgrp's css (cgroup_subsys_state) associated with @ss. This 504 - * function must be called either under cgroup_mutex or rcu_read_lock() and 505 - * the caller is responsible for pinning the returned css if it wants to 506 - * keep accessing it outside the said locks. This function may return 507 - * %NULL if @cgrp doesn't have @subsys_id enabled. 508 - */ 509 - static struct cgroup_subsys_state *cgroup_css(struct cgroup *cgrp, 510 - struct cgroup_subsys *ss) 511 - { 512 - if (CGROUP_HAS_SUBSYS_CONFIG && ss) 513 - return rcu_dereference_check(cgrp->subsys[ss->id], 514 - lockdep_is_held(&cgroup_mutex)); 515 - else 516 - return &cgrp->self; 517 - } 518 - 519 - /** 520 513 * cgroup_e_css_by_mask - obtain a cgroup's effective css for the specified ss 521 514 * @cgrp: the cgroup of interest 522 515 * @ss: the subsystem of interest (%NULL returns @cgrp->self) ··· 705 740 } \ 706 741 } \ 707 742 } while (false) 708 - 709 - /* iterate over child cgrps, lock should be held throughout iteration */ 710 - #define cgroup_for_each_live_child(child, cgrp) \ 711 - list_for_each_entry((child), &(cgrp)->self.children, self.sibling) \ 712 - if (({ lockdep_assert_held(&cgroup_mutex); \ 713 - cgroup_is_dead(child); })) \ 714 - ; \ 715 - else 716 - 717 - /* walk live descendants in pre order */ 718 - #define cgroup_for_each_live_descendant_pre(dsct, d_css, cgrp) \ 719 - css_for_each_descendant_pre((d_css), cgroup_css((cgrp), NULL)) \ 720 - if (({ lockdep_assert_held(&cgroup_mutex); \ 721 - (dsct) = (d_css)->cgroup; \ 722 - cgroup_is_dead(dsct); })) \ 723 - ; \ 724 - else 725 - 726 - /* walk live descendants in postorder */ 727 - #define cgroup_for_each_live_descendant_post(dsct, d_css, cgrp) \ 728 - css_for_each_descendant_post((d_css), cgroup_css((cgrp), NULL)) \ 729 - if (({ lockdep_assert_held(&cgroup_mutex); \ 730 - (dsct) = (d_css)->cgroup; \ 731 - cgroup_is_dead(dsct); })) \ 732 - ; \ 733 - else 734 743 735 744 /* 736 745 * The default css_set - used by init and its children prior to any ··· 1687 1748 struct cgroup_subsys_state *css = cgroup_css(cgrp, cft->ss); 1688 1749 struct cgroup_file *cfile = (void *)css + cft->file_offset; 1689 1750 1690 - spin_lock_irq(&cgroup_file_kn_lock); 1691 - cfile->kn = NULL; 1692 - spin_unlock_irq(&cgroup_file_kn_lock); 1751 + spin_lock_irq(&cfile->lock); 1752 + WRITE_ONCE(cfile->kn, NULL); 1753 + spin_unlock_irq(&cfile->lock); 1693 1754 1694 1755 timer_delete_sync(&cfile->notify_timer); 1695 1756 } ··· 4368 4429 struct cgroup_file *cfile = (void *)css + cft->file_offset; 4369 4430 4370 4431 timer_setup(&cfile->notify_timer, cgroup_file_notify_timer, 0); 4371 - 4372 - spin_lock_irq(&cgroup_file_kn_lock); 4432 + spin_lock_init(&cfile->lock); 4373 4433 cfile->kn = kn; 4374 - spin_unlock_irq(&cgroup_file_kn_lock); 4375 4434 } 4376 4435 4377 4436 return 0; ··· 4624 4687 */ 4625 4688 void cgroup_file_notify(struct cgroup_file *cfile) 4626 4689 { 4627 - unsigned long flags; 4690 + unsigned long flags, last, next; 4691 + struct kernfs_node *kn = NULL; 4628 4692 4629 - spin_lock_irqsave(&cgroup_file_kn_lock, flags); 4630 - if (cfile->kn) { 4631 - unsigned long last = cfile->notified_at; 4632 - unsigned long next = last + CGROUP_FILE_NOTIFY_MIN_INTV; 4693 + if (!READ_ONCE(cfile->kn)) 4694 + return; 4633 4695 4634 - if (time_in_range(jiffies, last, next)) { 4635 - timer_reduce(&cfile->notify_timer, next); 4636 - } else { 4637 - kernfs_notify(cfile->kn); 4638 - cfile->notified_at = jiffies; 4639 - } 4696 + last = READ_ONCE(cfile->notified_at); 4697 + next = last + CGROUP_FILE_NOTIFY_MIN_INTV; 4698 + if (time_in_range(jiffies, last, next)) { 4699 + timer_reduce(&cfile->notify_timer, next); 4700 + if (timer_pending(&cfile->notify_timer)) 4701 + return; 4640 4702 } 4641 - spin_unlock_irqrestore(&cgroup_file_kn_lock, flags); 4703 + 4704 + spin_lock_irqsave(&cfile->lock, flags); 4705 + if (cfile->kn) { 4706 + kn = cfile->kn; 4707 + kernfs_get(kn); 4708 + WRITE_ONCE(cfile->notified_at, jiffies); 4709 + } 4710 + spin_unlock_irqrestore(&cfile->lock, flags); 4711 + 4712 + if (kn) { 4713 + kernfs_notify(kn); 4714 + kernfs_put(kn); 4715 + } 4642 4716 } 4643 4717 EXPORT_SYMBOL_GPL(cgroup_file_notify); 4644 4718 ··· 4662 4714 { 4663 4715 struct kernfs_node *kn; 4664 4716 4665 - spin_lock_irq(&cgroup_file_kn_lock); 4717 + spin_lock_irq(&cfile->lock); 4666 4718 kn = cfile->kn; 4667 4719 kernfs_get(kn); 4668 - spin_unlock_irq(&cgroup_file_kn_lock); 4720 + spin_unlock_irq(&cfile->lock); 4669 4721 4670 4722 if (kn) 4671 4723 kernfs_show(kn, show);
+2 -3
kernel/cgroup/dmem.c
··· 707 707 return 0; 708 708 } 709 709 710 - static int dmemcg_parse_limit(char *options, struct dmem_cgroup_region *region, 711 - u64 *new_limit) 710 + static int dmemcg_parse_limit(char *options, u64 *new_limit) 712 711 { 713 712 char *end; 714 713 ··· 761 762 if (!region) 762 763 return -EINVAL; 763 764 764 - err = dmemcg_parse_limit(options, region, &new_limit); 765 + err = dmemcg_parse_limit(options, &new_limit); 765 766 if (err < 0) 766 767 goto out_put; 767 768
+1 -1
kernel/cgroup/rdma.c
··· 173 173 * the system. 174 174 */ 175 175 if (unlikely(!rpool)) { 176 - pr_warn("Invalid device %p or rdma cgroup %p\n", cg, device); 176 + pr_warn("Invalid device %p or rdma cgroup %p\n", device, cg); 177 177 return; 178 178 } 179 179