cpuset: separate generate_sched_domains for v1 and v2

+23

kernel/cgroup/cpuset-internal.h

··· 9 9 #include <linux/cpuset.h> 10 10 #include <linux/spinlock.h> 11 11 #include <linux/union_find.h> 12 + #include <linux/sched/isolation.h> 12 13 13 14 /* See "Frequency meter" comments, below. */ 14 15 ··· 186 185 #endif 187 186 }; 188 187 188 + extern struct cpuset top_cpuset; 189 + 189 190 static inline struct cpuset *css_cs(struct cgroup_subsys_state *css) 190 191 { 191 192 return css ? container_of(css, struct cpuset, css) : NULL; ··· 243 240 static inline int is_spread_slab(const struct cpuset *cs) 244 241 { 245 242 return test_bit(CS_SPREAD_SLAB, &cs->flags); 243 + } 244 + 245 + /* 246 + * Helper routine for generate_sched_domains(). 247 + * Do cpusets a, b have overlapping effective cpus_allowed masks? 248 + */ 249 + static inline int cpusets_overlap(struct cpuset *a, struct cpuset *b) 250 + { 251 + return cpumask_intersects(a->effective_cpus, b->effective_cpus); 252 + } 253 + 254 + static inline int nr_cpusets(void) 255 + { 256 + /* jump label reference count + the top-level cpuset */ 257 + return static_key_count(&cpusets_enabled_key.key) + 1; 246 258 } 247 259 248 260 /** ··· 316 298 void cpuset1_online_css(struct cgroup_subsys_state *css); 317 299 void update_domain_attr_tree(struct sched_domain_attr *dattr, 318 300 struct cpuset *root_cs); 301 + int cpuset1_generate_sched_domains(cpumask_var_t **domains, 302 + struct sched_domain_attr **attributes); 303 + 319 304 #else 320 305 static inline void cpuset1_update_task_spread_flags(struct cpuset *cs, 321 306 struct task_struct *tsk) {} ··· 332 311 static inline void cpuset1_online_css(struct cgroup_subsys_state *css) {} 333 312 static inline void update_domain_attr_tree(struct sched_domain_attr *dattr, 334 313 struct cpuset *root_cs) {} 314 + static inline int cpuset1_generate_sched_domains(cpumask_var_t **domains, 315 + struct sched_domain_attr **attributes) { return 0; }; 335 316 336 317 #endif /* CONFIG_CPUSETS_V1 */ 337 318

+158

kernel/cgroup/cpuset-v1.c

··· 581 581 } 582 582 583 583 /* 584 + * cpuset1_generate_sched_domains() 585 + * 586 + * Finding the best partition (set of domains): 587 + * The double nested loops below over i, j scan over the load 588 + * balanced cpusets (using the array of cpuset pointers in csa[]) 589 + * looking for pairs of cpusets that have overlapping cpus_allowed 590 + * and merging them using a union-find algorithm. 591 + * 592 + * The union of the cpus_allowed masks from the set of all cpusets 593 + * having the same root then form the one element of the partition 594 + * (one sched domain) to be passed to partition_sched_domains(). 595 + */ 596 + int cpuset1_generate_sched_domains(cpumask_var_t **domains, 597 + struct sched_domain_attr **attributes) 598 + { 599 + struct cpuset *cp; /* top-down scan of cpusets */ 600 + struct cpuset **csa; /* array of all cpuset ptrs */ 601 + int csn; /* how many cpuset ptrs in csa so far */ 602 + int i, j; /* indices for partition finding loops */ 603 + cpumask_var_t *doms; /* resulting partition; i.e. sched domains */ 604 + struct sched_domain_attr *dattr; /* attributes for custom domains */ 605 + int ndoms = 0; /* number of sched domains in result */ 606 + int nslot; /* next empty doms[] struct cpumask slot */ 607 + struct cgroup_subsys_state *pos_css; 608 + bool root_load_balance = is_sched_load_balance(&top_cpuset); 609 + int nslot_update; 610 + 611 + lockdep_assert_cpuset_lock_held(); 612 + 613 + doms = NULL; 614 + dattr = NULL; 615 + csa = NULL; 616 + 617 + /* Special case for the 99% of systems with one, full, sched domain */ 618 + if (root_load_balance) { 619 + ndoms = 1; 620 + doms = alloc_sched_domains(ndoms); 621 + if (!doms) 622 + goto done; 623 + 624 + dattr = kmalloc(sizeof(struct sched_domain_attr), GFP_KERNEL); 625 + if (dattr) { 626 + *dattr = SD_ATTR_INIT; 627 + update_domain_attr_tree(dattr, &top_cpuset); 628 + } 629 + cpumask_and(doms[0], top_cpuset.effective_cpus, 630 + housekeeping_cpumask(HK_TYPE_DOMAIN)); 631 + 632 + goto done; 633 + } 634 + 635 + csa = kmalloc_array(nr_cpusets(), sizeof(cp), GFP_KERNEL); 636 + if (!csa) 637 + goto done; 638 + csn = 0; 639 + 640 + rcu_read_lock(); 641 + if (root_load_balance) 642 + csa[csn++] = &top_cpuset; 643 + cpuset_for_each_descendant_pre(cp, pos_css, &top_cpuset) { 644 + if (cp == &top_cpuset) 645 + continue; 646 + 647 + /* 648 + * Continue traversing beyond @cp iff @cp has some CPUs and 649 + * isn't load balancing. The former is obvious. The 650 + * latter: All child cpusets contain a subset of the 651 + * parent's cpus, so just skip them, and then we call 652 + * update_domain_attr_tree() to calc relax_domain_level of 653 + * the corresponding sched domain. 654 + */ 655 + if (!cpumask_empty(cp->cpus_allowed) && 656 + !(is_sched_load_balance(cp) && 657 + cpumask_intersects(cp->cpus_allowed, 658 + housekeeping_cpumask(HK_TYPE_DOMAIN)))) 659 + continue; 660 + 661 + if (is_sched_load_balance(cp) && 662 + !cpumask_empty(cp->effective_cpus)) 663 + csa[csn++] = cp; 664 + 665 + /* skip @cp's subtree */ 666 + pos_css = css_rightmost_descendant(pos_css); 667 + continue; 668 + } 669 + rcu_read_unlock(); 670 + 671 + for (i = 0; i < csn; i++) 672 + uf_node_init(&csa[i]->node); 673 + 674 + /* Merge overlapping cpusets */ 675 + for (i = 0; i < csn; i++) { 676 + for (j = i + 1; j < csn; j++) { 677 + if (cpusets_overlap(csa[i], csa[j])) 678 + uf_union(&csa[i]->node, &csa[j]->node); 679 + } 680 + } 681 + 682 + /* Count the total number of domains */ 683 + for (i = 0; i < csn; i++) { 684 + if (uf_find(&csa[i]->node) == &csa[i]->node) 685 + ndoms++; 686 + } 687 + 688 + /* 689 + * Now we know how many domains to create. 690 + * Convert <csn, csa> to <ndoms, doms> and populate cpu masks. 691 + */ 692 + doms = alloc_sched_domains(ndoms); 693 + if (!doms) 694 + goto done; 695 + 696 + /* 697 + * The rest of the code, including the scheduler, can deal with 698 + * dattr==NULL case. No need to abort if alloc fails. 699 + */ 700 + dattr = kmalloc_array(ndoms, sizeof(struct sched_domain_attr), 701 + GFP_KERNEL); 702 + 703 + for (nslot = 0, i = 0; i < csn; i++) { 704 + nslot_update = 0; 705 + for (j = i; j < csn; j++) { 706 + if (uf_find(&csa[j]->node) == &csa[i]->node) { 707 + struct cpumask *dp = doms[nslot]; 708 + 709 + if (i == j) { 710 + nslot_update = 1; 711 + cpumask_clear(dp); 712 + if (dattr) 713 + *(dattr + nslot) = SD_ATTR_INIT; 714 + } 715 + cpumask_or(dp, dp, csa[j]->effective_cpus); 716 + cpumask_and(dp, dp, housekeeping_cpumask(HK_TYPE_DOMAIN)); 717 + if (dattr) 718 + update_domain_attr_tree(dattr + nslot, csa[j]); 719 + } 720 + } 721 + if (nslot_update) 722 + nslot++; 723 + } 724 + BUG_ON(nslot != ndoms); 725 + 726 + done: 727 + kfree(csa); 728 + 729 + /* 730 + * Fallback to the default domain if kmalloc() failed. 731 + * See comments in partition_sched_domains(). 732 + */ 733 + if (doms == NULL) 734 + ndoms = 1; 735 + 736 + *domains = doms; 737 + *attributes = dattr; 738 + return ndoms; 739 + } 740 + 741 + /* 584 742 * for the common functions, 'private' gives the type of file 585 743 */ 586 744

+4 -27

kernel/cgroup/cpuset.c

··· 211 211 * If cpu_online_mask is used while a hotunplug operation is happening in 212 212 * parallel, we may leave an offline CPU in cpu_allowed or some other masks. 213 213 */ 214 - static struct cpuset top_cpuset = { 214 + struct cpuset top_cpuset = { 215 215 .flags = BIT(CS_CPU_EXCLUSIVE) | 216 216 BIT(CS_MEM_EXCLUSIVE) | BIT(CS_SCHED_LOAD_BALANCE), 217 217 .partition_root_state = PRS_ROOT, ··· 744 744 } 745 745 746 746 #ifdef CONFIG_SMP 747 - /* 748 - * Helper routine for generate_sched_domains(). 749 - * Do cpusets a, b have overlapping effective cpus_allowed masks? 750 - */ 751 - static int cpusets_overlap(struct cpuset *a, struct cpuset *b) 752 - { 753 - return cpumask_intersects(a->effective_cpus, b->effective_cpus); 754 - } 755 - 756 - /* Must be called with cpuset_mutex held. */ 757 - static inline int nr_cpusets(void) 758 - { 759 - /* jump label reference count + the top-level cpuset */ 760 - return static_key_count(&cpusets_enabled_key.key) + 1; 761 - } 762 747 763 748 /* 764 749 * generate_sched_domains() ··· 783 798 * convenient format, that can be easily compared to the prior 784 799 * value to determine what partition elements (sched domains) 785 800 * were changed (added or removed.) 786 - * 787 - * Finding the best partition (set of domains): 788 - * The double nested loops below over i, j scan over the load 789 - * balanced cpusets (using the array of cpuset pointers in csa[]) 790 - * looking for pairs of cpusets that have overlapping cpus_allowed 791 - * and merging them using a union-find algorithm. 792 - * 793 - * The union of the cpus_allowed masks from the set of all cpusets 794 - * having the same root then form the one element of the partition 795 - * (one sched domain) to be passed to partition_sched_domains(). 796 - * 797 801 */ 798 802 static int generate_sched_domains(cpumask_var_t **domains, 799 803 struct sched_domain_attr **attributes) ··· 799 825 bool root_load_balance = is_sched_load_balance(&top_cpuset); 800 826 bool cgrpv2 = cpuset_v2(); 801 827 int nslot_update; 828 + 829 + if (!cgrpv2) 830 + return cpuset1_generate_sched_domains(domains, attributes); 802 831 803 832 doms = NULL; 804 833 dattr = NULL;

Configure Feed

Configure Feed