Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux
1
fork

Configure Feed

Select the types of activity you want to include in your feed.

Merge tag 'sched-urgent-2020-11-15' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip

Pull scheduler fixes from Thomas Gleixner:
"A set of scheduler fixes:

- Address a load balancer regression by making the load balancer use
the same logic as the wakeup path to spread tasks in the LLC domain

- Prefer the CPU on which a task run last over the local CPU in the
fast wakeup path for asymmetric CPU capacity systems to align with
the symmetric case. This ensures more locality and prevents massive
migration overhead on those asymetric systems

- Fix a memory corruption bug in the scheduler debug code caused by
handing a modified buffer pointer to kfree()"

* tag 'sched-urgent-2020-11-15' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip:
sched/debug: Fix memory corruption caused by multiple small reads of flags
sched/fair: Prefer prev cpu in asymmetric wakeup path
sched/fair: Ensure tasks spreading in LLC during LB

+51 -31
+6 -6
kernel/sched/debug.c
··· 251 251 unsigned long flags = *(unsigned long *)table->data; 252 252 size_t data_size = 0; 253 253 size_t len = 0; 254 - char *tmp; 254 + char *tmp, *buf; 255 255 int idx; 256 256 257 257 if (write) ··· 269 269 return 0; 270 270 } 271 271 272 - tmp = kcalloc(data_size + 1, sizeof(*tmp), GFP_KERNEL); 273 - if (!tmp) 272 + buf = kcalloc(data_size + 1, sizeof(*buf), GFP_KERNEL); 273 + if (!buf) 274 274 return -ENOMEM; 275 275 276 276 for_each_set_bit(idx, &flags, __SD_FLAG_CNT) { 277 277 char *name = sd_flag_debug[idx].name; 278 278 279 - len += snprintf(tmp + len, strlen(name) + 2, "%s ", name); 279 + len += snprintf(buf + len, strlen(name) + 2, "%s ", name); 280 280 } 281 281 282 - tmp += *ppos; 282 + tmp = buf + *ppos; 283 283 len -= *ppos; 284 284 285 285 if (len > *lenp) ··· 294 294 *lenp = len; 295 295 *ppos += len; 296 296 297 - kfree(tmp); 297 + kfree(buf); 298 298 299 299 return 0; 300 300 }
+45 -25
kernel/sched/fair.c
··· 6172 6172 static int 6173 6173 select_idle_capacity(struct task_struct *p, struct sched_domain *sd, int target) 6174 6174 { 6175 - unsigned long best_cap = 0; 6175 + unsigned long task_util, best_cap = 0; 6176 6176 int cpu, best_cpu = -1; 6177 6177 struct cpumask *cpus; 6178 6178 6179 - sync_entity_load_avg(&p->se); 6180 - 6181 6179 cpus = this_cpu_cpumask_var_ptr(select_idle_mask); 6182 6180 cpumask_and(cpus, sched_domain_span(sd), p->cpus_ptr); 6181 + 6182 + task_util = uclamp_task_util(p); 6183 6183 6184 6184 for_each_cpu_wrap(cpu, cpus, target) { 6185 6185 unsigned long cpu_cap = capacity_of(cpu); 6186 6186 6187 6187 if (!available_idle_cpu(cpu) && !sched_idle_cpu(cpu)) 6188 6188 continue; 6189 - if (task_fits_capacity(p, cpu_cap)) 6189 + if (fits_capacity(task_util, cpu_cap)) 6190 6190 return cpu; 6191 6191 6192 6192 if (cpu_cap > best_cap) { ··· 6198 6198 return best_cpu; 6199 6199 } 6200 6200 6201 + static inline bool asym_fits_capacity(int task_util, int cpu) 6202 + { 6203 + if (static_branch_unlikely(&sched_asym_cpucapacity)) 6204 + return fits_capacity(task_util, capacity_of(cpu)); 6205 + 6206 + return true; 6207 + } 6208 + 6201 6209 /* 6202 6210 * Try and locate an idle core/thread in the LLC cache domain. 6203 6211 */ 6204 6212 static int select_idle_sibling(struct task_struct *p, int prev, int target) 6205 6213 { 6206 6214 struct sched_domain *sd; 6215 + unsigned long task_util; 6207 6216 int i, recent_used_cpu; 6208 6217 6209 6218 /* 6210 - * For asymmetric CPU capacity systems, our domain of interest is 6211 - * sd_asym_cpucapacity rather than sd_llc. 6219 + * On asymmetric system, update task utilization because we will check 6220 + * that the task fits with cpu's capacity. 6212 6221 */ 6213 6222 if (static_branch_unlikely(&sched_asym_cpucapacity)) { 6214 - sd = rcu_dereference(per_cpu(sd_asym_cpucapacity, target)); 6215 - /* 6216 - * On an asymmetric CPU capacity system where an exclusive 6217 - * cpuset defines a symmetric island (i.e. one unique 6218 - * capacity_orig value through the cpuset), the key will be set 6219 - * but the CPUs within that cpuset will not have a domain with 6220 - * SD_ASYM_CPUCAPACITY. These should follow the usual symmetric 6221 - * capacity path. 6222 - */ 6223 - if (!sd) 6224 - goto symmetric; 6225 - 6226 - i = select_idle_capacity(p, sd, target); 6227 - return ((unsigned)i < nr_cpumask_bits) ? i : target; 6223 + sync_entity_load_avg(&p->se); 6224 + task_util = uclamp_task_util(p); 6228 6225 } 6229 6226 6230 - symmetric: 6231 - if (available_idle_cpu(target) || sched_idle_cpu(target)) 6227 + if ((available_idle_cpu(target) || sched_idle_cpu(target)) && 6228 + asym_fits_capacity(task_util, target)) 6232 6229 return target; 6233 6230 6234 6231 /* 6235 6232 * If the previous CPU is cache affine and idle, don't be stupid: 6236 6233 */ 6237 6234 if (prev != target && cpus_share_cache(prev, target) && 6238 - (available_idle_cpu(prev) || sched_idle_cpu(prev))) 6235 + (available_idle_cpu(prev) || sched_idle_cpu(prev)) && 6236 + asym_fits_capacity(task_util, prev)) 6239 6237 return prev; 6240 6238 6241 6239 /* ··· 6256 6258 recent_used_cpu != target && 6257 6259 cpus_share_cache(recent_used_cpu, target) && 6258 6260 (available_idle_cpu(recent_used_cpu) || sched_idle_cpu(recent_used_cpu)) && 6259 - cpumask_test_cpu(p->recent_used_cpu, p->cpus_ptr)) { 6261 + cpumask_test_cpu(p->recent_used_cpu, p->cpus_ptr) && 6262 + asym_fits_capacity(task_util, recent_used_cpu)) { 6260 6263 /* 6261 6264 * Replace recent_used_cpu with prev as it is a potential 6262 6265 * candidate for the next wake: 6263 6266 */ 6264 6267 p->recent_used_cpu = prev; 6265 6268 return recent_used_cpu; 6269 + } 6270 + 6271 + /* 6272 + * For asymmetric CPU capacity systems, our domain of interest is 6273 + * sd_asym_cpucapacity rather than sd_llc. 6274 + */ 6275 + if (static_branch_unlikely(&sched_asym_cpucapacity)) { 6276 + sd = rcu_dereference(per_cpu(sd_asym_cpucapacity, target)); 6277 + /* 6278 + * On an asymmetric CPU capacity system where an exclusive 6279 + * cpuset defines a symmetric island (i.e. one unique 6280 + * capacity_orig value through the cpuset), the key will be set 6281 + * but the CPUs within that cpuset will not have a domain with 6282 + * SD_ASYM_CPUCAPACITY. These should follow the usual symmetric 6283 + * capacity path. 6284 + */ 6285 + if (sd) { 6286 + i = select_idle_capacity(p, sd, target); 6287 + return ((unsigned)i < nr_cpumask_bits) ? i : target; 6288 + } 6266 6289 } 6267 6290 6268 6291 sd = rcu_dereference(per_cpu(sd_llc, target)); ··· 9050 9031 * emptying busiest. 9051 9032 */ 9052 9033 if (local->group_type == group_has_spare) { 9053 - if (busiest->group_type > group_fully_busy) { 9034 + if ((busiest->group_type > group_fully_busy) && 9035 + !(env->sd->flags & SD_SHARE_PKG_RESOURCES)) { 9054 9036 /* 9055 9037 * If busiest is overloaded, try to fill spare 9056 9038 * capacity. This might end up creating spare capacity