Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux
1
fork

Configure Feed

Select the types of activity you want to include in your feed.

Merge branch 'sched-urgent-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip

Pull scheduler updates from Thomas Gleixner:
"Third more careful attempt for this set of fixes:

- Prevent a 32bit math overflow in the cpufreq code

- Fix a buffer overflow when scanning the cgroup2 cpu.max property

- A set of fixes for the NOHZ scheduler logic to prevent waking up
CPUs even if the capacity of the busy CPUs is sufficient along with
other tweaks optimizing the behaviour for asymmetric systems
(big/little)"

* 'sched-urgent-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip:
sched/fair: Skip LLC NOHZ logic for asymmetric systems
sched/fair: Tune down misfit NOHZ kicks
sched/fair: Comment some nohz_balancer_kick() kick conditions
sched/core: Fix buffer overflow in cgroup2 property cpu.max
sched/cpufreq: Fix 32-bit math overflow

+89 -56
+1 -1
kernel/sched/core.c
··· 6998 6998 { 6999 6999 char tok[21]; /* U64_MAX */ 7000 7000 7001 - if (!sscanf(buf, "%s %llu", tok, periodp)) 7001 + if (sscanf(buf, "%20s %llu", tok, periodp) < 1) 7002 7002 return -EINVAL; 7003 7003 7004 7004 *periodp *= NSEC_PER_USEC;
+25 -34
kernel/sched/cpufreq_schedutil.c
··· 48 48 49 49 bool iowait_boost_pending; 50 50 unsigned int iowait_boost; 51 - unsigned int iowait_boost_max; 52 51 u64 last_update; 53 52 54 53 unsigned long bw_dl; 54 + unsigned long min; 55 55 unsigned long max; 56 56 57 57 /* The field below is for single-CPU policies only: */ ··· 303 303 if (delta_ns <= TICK_NSEC) 304 304 return false; 305 305 306 - sg_cpu->iowait_boost = set_iowait_boost 307 - ? sg_cpu->sg_policy->policy->min : 0; 306 + sg_cpu->iowait_boost = set_iowait_boost ? sg_cpu->min : 0; 308 307 sg_cpu->iowait_boost_pending = set_iowait_boost; 309 308 310 309 return true; ··· 343 344 344 345 /* Double the boost at each request */ 345 346 if (sg_cpu->iowait_boost) { 346 - sg_cpu->iowait_boost <<= 1; 347 - if (sg_cpu->iowait_boost > sg_cpu->iowait_boost_max) 348 - sg_cpu->iowait_boost = sg_cpu->iowait_boost_max; 347 + sg_cpu->iowait_boost = 348 + min_t(unsigned int, sg_cpu->iowait_boost << 1, SCHED_CAPACITY_SCALE); 349 349 return; 350 350 } 351 351 352 352 /* First wakeup after IO: start with minimum boost */ 353 - sg_cpu->iowait_boost = sg_cpu->sg_policy->policy->min; 353 + sg_cpu->iowait_boost = sg_cpu->min; 354 354 } 355 355 356 356 /** ··· 371 373 * This mechanism is designed to boost high frequently IO waiting tasks, while 372 374 * being more conservative on tasks which does sporadic IO operations. 373 375 */ 374 - static void sugov_iowait_apply(struct sugov_cpu *sg_cpu, u64 time, 375 - unsigned long *util, unsigned long *max) 376 + static unsigned long sugov_iowait_apply(struct sugov_cpu *sg_cpu, u64 time, 377 + unsigned long util, unsigned long max) 376 378 { 377 - unsigned int boost_util, boost_max; 379 + unsigned long boost; 378 380 379 381 /* No boost currently required */ 380 382 if (!sg_cpu->iowait_boost) 381 - return; 383 + return util; 382 384 383 385 /* Reset boost if the CPU appears to have been idle enough */ 384 386 if (sugov_iowait_reset(sg_cpu, time, false)) 385 - return; 387 + return util; 386 388 387 - /* 388 - * An IO waiting task has just woken up: 389 - * allow to further double the boost value 390 - */ 391 - if (sg_cpu->iowait_boost_pending) { 392 - sg_cpu->iowait_boost_pending = false; 393 - } else { 389 + if (!sg_cpu->iowait_boost_pending) { 394 390 /* 395 - * Otherwise: reduce the boost value and disable it when we 396 - * reach the minimum. 391 + * No boost pending; reduce the boost value. 397 392 */ 398 393 sg_cpu->iowait_boost >>= 1; 399 - if (sg_cpu->iowait_boost < sg_cpu->sg_policy->policy->min) { 394 + if (sg_cpu->iowait_boost < sg_cpu->min) { 400 395 sg_cpu->iowait_boost = 0; 401 - return; 396 + return util; 402 397 } 403 398 } 404 399 400 + sg_cpu->iowait_boost_pending = false; 401 + 405 402 /* 406 - * Apply the current boost value: a CPU is boosted only if its current 407 - * utilization is smaller then the current IO boost level. 403 + * @util is already in capacity scale; convert iowait_boost 404 + * into the same scale so we can compare. 408 405 */ 409 - boost_util = sg_cpu->iowait_boost; 410 - boost_max = sg_cpu->iowait_boost_max; 411 - if (*util * boost_max < *max * boost_util) { 412 - *util = boost_util; 413 - *max = boost_max; 414 - } 406 + boost = (sg_cpu->iowait_boost * max) >> SCHED_CAPACITY_SHIFT; 407 + return max(boost, util); 415 408 } 416 409 417 410 #ifdef CONFIG_NO_HZ_COMMON ··· 449 460 450 461 util = sugov_get_util(sg_cpu); 451 462 max = sg_cpu->max; 452 - sugov_iowait_apply(sg_cpu, time, &util, &max); 463 + util = sugov_iowait_apply(sg_cpu, time, util, max); 453 464 next_f = get_next_freq(sg_policy, util, max); 454 465 /* 455 466 * Do not reduce the frequency if the CPU has not been idle ··· 489 500 490 501 j_util = sugov_get_util(j_sg_cpu); 491 502 j_max = j_sg_cpu->max; 492 - sugov_iowait_apply(j_sg_cpu, time, &j_util, &j_max); 503 + j_util = sugov_iowait_apply(j_sg_cpu, time, j_util, j_max); 493 504 494 505 if (j_util * max > j_max * util) { 495 506 util = j_util; ··· 826 837 memset(sg_cpu, 0, sizeof(*sg_cpu)); 827 838 sg_cpu->cpu = cpu; 828 839 sg_cpu->sg_policy = sg_policy; 829 - sg_cpu->iowait_boost_max = policy->cpuinfo.max_freq; 840 + sg_cpu->min = 841 + (SCHED_CAPACITY_SCALE * policy->cpuinfo.min_freq) / 842 + policy->cpuinfo.max_freq; 830 843 } 831 844 832 845 for_each_cpu(cpu, policy->cpus) {
+63 -21
kernel/sched/fair.c
··· 8060 8060 } 8061 8061 8062 8062 /* 8063 + * Check whether a rq has a misfit task and if it looks like we can actually 8064 + * help that task: we can migrate the task to a CPU of higher capacity, or 8065 + * the task's current CPU is heavily pressured. 8066 + */ 8067 + static inline int check_misfit_status(struct rq *rq, struct sched_domain *sd) 8068 + { 8069 + return rq->misfit_task_load && 8070 + (rq->cpu_capacity_orig < rq->rd->max_cpu_capacity || 8071 + check_cpu_capacity(rq, sd)); 8072 + } 8073 + 8074 + /* 8063 8075 * Group imbalance indicates (and tries to solve) the problem where balancing 8064 8076 * groups is inadequate due to ->cpus_allowed constraints. 8065 8077 * ··· 9598 9586 if (time_before(now, nohz.next_balance)) 9599 9587 goto out; 9600 9588 9601 - if (rq->nr_running >= 2 || rq->misfit_task_load) { 9589 + if (rq->nr_running >= 2) { 9602 9590 flags = NOHZ_KICK_MASK; 9603 9591 goto out; 9604 9592 } 9605 9593 9606 9594 rcu_read_lock(); 9595 + 9596 + sd = rcu_dereference(rq->sd); 9597 + if (sd) { 9598 + /* 9599 + * If there's a CFS task and the current CPU has reduced 9600 + * capacity; kick the ILB to see if there's a better CPU to run 9601 + * on. 9602 + */ 9603 + if (rq->cfs.h_nr_running >= 1 && check_cpu_capacity(rq, sd)) { 9604 + flags = NOHZ_KICK_MASK; 9605 + goto unlock; 9606 + } 9607 + } 9608 + 9609 + sd = rcu_dereference(per_cpu(sd_asym_packing, cpu)); 9610 + if (sd) { 9611 + /* 9612 + * When ASYM_PACKING; see if there's a more preferred CPU 9613 + * currently idle; in which case, kick the ILB to move tasks 9614 + * around. 9615 + */ 9616 + for_each_cpu_and(i, sched_domain_span(sd), nohz.idle_cpus_mask) { 9617 + if (sched_asym_prefer(i, cpu)) { 9618 + flags = NOHZ_KICK_MASK; 9619 + goto unlock; 9620 + } 9621 + } 9622 + } 9623 + 9624 + sd = rcu_dereference(per_cpu(sd_asym_cpucapacity, cpu)); 9625 + if (sd) { 9626 + /* 9627 + * When ASYM_CPUCAPACITY; see if there's a higher capacity CPU 9628 + * to run the misfit task on. 9629 + */ 9630 + if (check_misfit_status(rq, sd)) { 9631 + flags = NOHZ_KICK_MASK; 9632 + goto unlock; 9633 + } 9634 + 9635 + /* 9636 + * For asymmetric systems, we do not want to nicely balance 9637 + * cache use, instead we want to embrace asymmetry and only 9638 + * ensure tasks have enough CPU capacity. 9639 + * 9640 + * Skip the LLC logic because it's not relevant in that case. 9641 + */ 9642 + goto unlock; 9643 + } 9644 + 9607 9645 sds = rcu_dereference(per_cpu(sd_llc_shared, cpu)); 9608 9646 if (sds) { 9609 9647 /* ··· 9669 9607 if (nr_busy > 1) { 9670 9608 flags = NOHZ_KICK_MASK; 9671 9609 goto unlock; 9672 - } 9673 - 9674 - } 9675 - 9676 - sd = rcu_dereference(rq->sd); 9677 - if (sd) { 9678 - if ((rq->cfs.h_nr_running >= 1) && 9679 - check_cpu_capacity(rq, sd)) { 9680 - flags = NOHZ_KICK_MASK; 9681 - goto unlock; 9682 - } 9683 - } 9684 - 9685 - sd = rcu_dereference(per_cpu(sd_asym_packing, cpu)); 9686 - if (sd) { 9687 - for_each_cpu_and(i, sched_domain_span(sd), nohz.idle_cpus_mask) { 9688 - if (sched_asym_prefer(i, cpu)) { 9689 - flags = NOHZ_KICK_MASK; 9690 - goto unlock; 9691 - } 9692 9610 } 9693 9611 } 9694 9612 unlock: