Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux
1
fork

Configure Feed

Select the types of activity you want to include in your feed.

cpuset: separate generate_sched_domains for v1 and v2

The generate_sched_domains() function currently handles both v1 and v2
logic. However, the underlying mechanisms for building scheduler domains
differ significantly between the two versions. For cpuset v2, scheduler
domains are straightforwardly derived from valid partitions, whereas
cpuset v1 employs a more complex union-find algorithm to merge overlapping
cpusets. Co-locating these implementations complicates maintenance.

This patch, along with subsequent ones, aims to separate the v1 and v2
logic. For ease of review, this patch first copies the
generate_sched_domains() function into cpuset-v1.c as
cpuset1_generate_sched_domains() and removes v2-specific code. Common
helpers and top_cpuset are declared in cpuset-internal.h. When operating
in v1 mode, the code now calls cpuset1_generate_sched_domains().

Currently there is some code duplication, which will be largely eliminated
once v1-specific code is removed from v2 in the following patch.

Signed-off-by: Chen Ridong <chenridong@huawei.com>
Reviewed-by: Waiman Long <longman@redhat.com>
Signed-off-by: Tejun Heo <tj@kernel.org>

authored by

Chen Ridong and committed by
Tejun Heo
6e1d31ce cb33f881

+185 -27
+23
kernel/cgroup/cpuset-internal.h
··· 9 9 #include <linux/cpuset.h> 10 10 #include <linux/spinlock.h> 11 11 #include <linux/union_find.h> 12 + #include <linux/sched/isolation.h> 12 13 13 14 /* See "Frequency meter" comments, below. */ 14 15 ··· 186 185 #endif 187 186 }; 188 187 188 + extern struct cpuset top_cpuset; 189 + 189 190 static inline struct cpuset *css_cs(struct cgroup_subsys_state *css) 190 191 { 191 192 return css ? container_of(css, struct cpuset, css) : NULL; ··· 243 240 static inline int is_spread_slab(const struct cpuset *cs) 244 241 { 245 242 return test_bit(CS_SPREAD_SLAB, &cs->flags); 243 + } 244 + 245 + /* 246 + * Helper routine for generate_sched_domains(). 247 + * Do cpusets a, b have overlapping effective cpus_allowed masks? 248 + */ 249 + static inline int cpusets_overlap(struct cpuset *a, struct cpuset *b) 250 + { 251 + return cpumask_intersects(a->effective_cpus, b->effective_cpus); 252 + } 253 + 254 + static inline int nr_cpusets(void) 255 + { 256 + /* jump label reference count + the top-level cpuset */ 257 + return static_key_count(&cpusets_enabled_key.key) + 1; 246 258 } 247 259 248 260 /** ··· 316 298 void cpuset1_online_css(struct cgroup_subsys_state *css); 317 299 void update_domain_attr_tree(struct sched_domain_attr *dattr, 318 300 struct cpuset *root_cs); 301 + int cpuset1_generate_sched_domains(cpumask_var_t **domains, 302 + struct sched_domain_attr **attributes); 303 + 319 304 #else 320 305 static inline void cpuset1_update_task_spread_flags(struct cpuset *cs, 321 306 struct task_struct *tsk) {} ··· 332 311 static inline void cpuset1_online_css(struct cgroup_subsys_state *css) {} 333 312 static inline void update_domain_attr_tree(struct sched_domain_attr *dattr, 334 313 struct cpuset *root_cs) {} 314 + static inline int cpuset1_generate_sched_domains(cpumask_var_t **domains, 315 + struct sched_domain_attr **attributes) { return 0; }; 335 316 336 317 #endif /* CONFIG_CPUSETS_V1 */ 337 318
+158
kernel/cgroup/cpuset-v1.c
··· 581 581 } 582 582 583 583 /* 584 + * cpuset1_generate_sched_domains() 585 + * 586 + * Finding the best partition (set of domains): 587 + * The double nested loops below over i, j scan over the load 588 + * balanced cpusets (using the array of cpuset pointers in csa[]) 589 + * looking for pairs of cpusets that have overlapping cpus_allowed 590 + * and merging them using a union-find algorithm. 591 + * 592 + * The union of the cpus_allowed masks from the set of all cpusets 593 + * having the same root then form the one element of the partition 594 + * (one sched domain) to be passed to partition_sched_domains(). 595 + */ 596 + int cpuset1_generate_sched_domains(cpumask_var_t **domains, 597 + struct sched_domain_attr **attributes) 598 + { 599 + struct cpuset *cp; /* top-down scan of cpusets */ 600 + struct cpuset **csa; /* array of all cpuset ptrs */ 601 + int csn; /* how many cpuset ptrs in csa so far */ 602 + int i, j; /* indices for partition finding loops */ 603 + cpumask_var_t *doms; /* resulting partition; i.e. sched domains */ 604 + struct sched_domain_attr *dattr; /* attributes for custom domains */ 605 + int ndoms = 0; /* number of sched domains in result */ 606 + int nslot; /* next empty doms[] struct cpumask slot */ 607 + struct cgroup_subsys_state *pos_css; 608 + bool root_load_balance = is_sched_load_balance(&top_cpuset); 609 + int nslot_update; 610 + 611 + lockdep_assert_cpuset_lock_held(); 612 + 613 + doms = NULL; 614 + dattr = NULL; 615 + csa = NULL; 616 + 617 + /* Special case for the 99% of systems with one, full, sched domain */ 618 + if (root_load_balance) { 619 + ndoms = 1; 620 + doms = alloc_sched_domains(ndoms); 621 + if (!doms) 622 + goto done; 623 + 624 + dattr = kmalloc(sizeof(struct sched_domain_attr), GFP_KERNEL); 625 + if (dattr) { 626 + *dattr = SD_ATTR_INIT; 627 + update_domain_attr_tree(dattr, &top_cpuset); 628 + } 629 + cpumask_and(doms[0], top_cpuset.effective_cpus, 630 + housekeeping_cpumask(HK_TYPE_DOMAIN)); 631 + 632 + goto done; 633 + } 634 + 635 + csa = kmalloc_array(nr_cpusets(), sizeof(cp), GFP_KERNEL); 636 + if (!csa) 637 + goto done; 638 + csn = 0; 639 + 640 + rcu_read_lock(); 641 + if (root_load_balance) 642 + csa[csn++] = &top_cpuset; 643 + cpuset_for_each_descendant_pre(cp, pos_css, &top_cpuset) { 644 + if (cp == &top_cpuset) 645 + continue; 646 + 647 + /* 648 + * Continue traversing beyond @cp iff @cp has some CPUs and 649 + * isn't load balancing. The former is obvious. The 650 + * latter: All child cpusets contain a subset of the 651 + * parent's cpus, so just skip them, and then we call 652 + * update_domain_attr_tree() to calc relax_domain_level of 653 + * the corresponding sched domain. 654 + */ 655 + if (!cpumask_empty(cp->cpus_allowed) && 656 + !(is_sched_load_balance(cp) && 657 + cpumask_intersects(cp->cpus_allowed, 658 + housekeeping_cpumask(HK_TYPE_DOMAIN)))) 659 + continue; 660 + 661 + if (is_sched_load_balance(cp) && 662 + !cpumask_empty(cp->effective_cpus)) 663 + csa[csn++] = cp; 664 + 665 + /* skip @cp's subtree */ 666 + pos_css = css_rightmost_descendant(pos_css); 667 + continue; 668 + } 669 + rcu_read_unlock(); 670 + 671 + for (i = 0; i < csn; i++) 672 + uf_node_init(&csa[i]->node); 673 + 674 + /* Merge overlapping cpusets */ 675 + for (i = 0; i < csn; i++) { 676 + for (j = i + 1; j < csn; j++) { 677 + if (cpusets_overlap(csa[i], csa[j])) 678 + uf_union(&csa[i]->node, &csa[j]->node); 679 + } 680 + } 681 + 682 + /* Count the total number of domains */ 683 + for (i = 0; i < csn; i++) { 684 + if (uf_find(&csa[i]->node) == &csa[i]->node) 685 + ndoms++; 686 + } 687 + 688 + /* 689 + * Now we know how many domains to create. 690 + * Convert <csn, csa> to <ndoms, doms> and populate cpu masks. 691 + */ 692 + doms = alloc_sched_domains(ndoms); 693 + if (!doms) 694 + goto done; 695 + 696 + /* 697 + * The rest of the code, including the scheduler, can deal with 698 + * dattr==NULL case. No need to abort if alloc fails. 699 + */ 700 + dattr = kmalloc_array(ndoms, sizeof(struct sched_domain_attr), 701 + GFP_KERNEL); 702 + 703 + for (nslot = 0, i = 0; i < csn; i++) { 704 + nslot_update = 0; 705 + for (j = i; j < csn; j++) { 706 + if (uf_find(&csa[j]->node) == &csa[i]->node) { 707 + struct cpumask *dp = doms[nslot]; 708 + 709 + if (i == j) { 710 + nslot_update = 1; 711 + cpumask_clear(dp); 712 + if (dattr) 713 + *(dattr + nslot) = SD_ATTR_INIT; 714 + } 715 + cpumask_or(dp, dp, csa[j]->effective_cpus); 716 + cpumask_and(dp, dp, housekeeping_cpumask(HK_TYPE_DOMAIN)); 717 + if (dattr) 718 + update_domain_attr_tree(dattr + nslot, csa[j]); 719 + } 720 + } 721 + if (nslot_update) 722 + nslot++; 723 + } 724 + BUG_ON(nslot != ndoms); 725 + 726 + done: 727 + kfree(csa); 728 + 729 + /* 730 + * Fallback to the default domain if kmalloc() failed. 731 + * See comments in partition_sched_domains(). 732 + */ 733 + if (doms == NULL) 734 + ndoms = 1; 735 + 736 + *domains = doms; 737 + *attributes = dattr; 738 + return ndoms; 739 + } 740 + 741 + /* 584 742 * for the common functions, 'private' gives the type of file 585 743 */ 586 744
+4 -27
kernel/cgroup/cpuset.c
··· 211 211 * If cpu_online_mask is used while a hotunplug operation is happening in 212 212 * parallel, we may leave an offline CPU in cpu_allowed or some other masks. 213 213 */ 214 - static struct cpuset top_cpuset = { 214 + struct cpuset top_cpuset = { 215 215 .flags = BIT(CS_CPU_EXCLUSIVE) | 216 216 BIT(CS_MEM_EXCLUSIVE) | BIT(CS_SCHED_LOAD_BALANCE), 217 217 .partition_root_state = PRS_ROOT, ··· 744 744 } 745 745 746 746 #ifdef CONFIG_SMP 747 - /* 748 - * Helper routine for generate_sched_domains(). 749 - * Do cpusets a, b have overlapping effective cpus_allowed masks? 750 - */ 751 - static int cpusets_overlap(struct cpuset *a, struct cpuset *b) 752 - { 753 - return cpumask_intersects(a->effective_cpus, b->effective_cpus); 754 - } 755 - 756 - /* Must be called with cpuset_mutex held. */ 757 - static inline int nr_cpusets(void) 758 - { 759 - /* jump label reference count + the top-level cpuset */ 760 - return static_key_count(&cpusets_enabled_key.key) + 1; 761 - } 762 747 763 748 /* 764 749 * generate_sched_domains() ··· 783 798 * convenient format, that can be easily compared to the prior 784 799 * value to determine what partition elements (sched domains) 785 800 * were changed (added or removed.) 786 - * 787 - * Finding the best partition (set of domains): 788 - * The double nested loops below over i, j scan over the load 789 - * balanced cpusets (using the array of cpuset pointers in csa[]) 790 - * looking for pairs of cpusets that have overlapping cpus_allowed 791 - * and merging them using a union-find algorithm. 792 - * 793 - * The union of the cpus_allowed masks from the set of all cpusets 794 - * having the same root then form the one element of the partition 795 - * (one sched domain) to be passed to partition_sched_domains(). 796 - * 797 801 */ 798 802 static int generate_sched_domains(cpumask_var_t **domains, 799 803 struct sched_domain_attr **attributes) ··· 799 825 bool root_load_balance = is_sched_load_balance(&top_cpuset); 800 826 bool cgrpv2 = cpuset_v2(); 801 827 int nslot_update; 828 + 829 + if (!cgrpv2) 830 + return cpuset1_generate_sched_domains(domains, attributes); 802 831 803 832 doms = NULL; 804 833 dattr = NULL;