Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux
1
fork

Configure Feed

Select the types of activity you want to include in your feed.

Merge tag 'sched-urgent-2024-04-28' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip

Pull scheduler fixes from Ingo Molnar:

- Fix EEVDF corner cases

- Fix two nohz_full= related bugs that can cause boot crashes
and warnings

* tag 'sched-urgent-2024-04-28' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip:
sched/isolation: Fix boot crash when maxcpus < first housekeeping CPU
sched/isolation: Prevent boot crash when the boot CPU is nohz_full
sched/eevdf: Prevent vlag from going out of bounds in reweight_eevdf()
sched/eevdf: Fix miscalculation in reweight_entity() when se is not curr
sched/eevdf: Always update V if se->on_rq when reweighting

+38 -21
+2 -5
Documentation/timers/no_hz.rst
··· 129 129 online to handle timekeeping tasks in order to ensure that system 130 130 calls like gettimeofday() returns accurate values on adaptive-tick CPUs. 131 131 (This is not an issue for CONFIG_NO_HZ_IDLE=y because there are no running 132 - user processes to observe slight drifts in clock rate.) Therefore, the 133 - boot CPU is prohibited from entering adaptive-ticks mode. Specifying a 134 - "nohz_full=" mask that includes the boot CPU will result in a boot-time 135 - error message, and the boot CPU will be removed from the mask. Note that 136 - this means that your system must have at least two CPUs in order for 132 + user processes to observe slight drifts in clock rate.) Note that this 133 + means that your system must have at least two CPUs in order for 137 134 CONFIG_NO_HZ_FULL=y to do anything for you. 138 135 139 136 Finally, adaptive-ticks CPUs must have their RCU callbacks offloaded.
+20 -14
kernel/sched/fair.c
··· 696 696 * 697 697 * XXX could add max_slice to the augmented data to track this. 698 698 */ 699 + static s64 entity_lag(u64 avruntime, struct sched_entity *se) 700 + { 701 + s64 vlag, limit; 702 + 703 + vlag = avruntime - se->vruntime; 704 + limit = calc_delta_fair(max_t(u64, 2*se->slice, TICK_NSEC), se); 705 + 706 + return clamp(vlag, -limit, limit); 707 + } 708 + 699 709 static void update_entity_lag(struct cfs_rq *cfs_rq, struct sched_entity *se) 700 710 { 701 - s64 lag, limit; 702 - 703 711 SCHED_WARN_ON(!se->on_rq); 704 - lag = avg_vruntime(cfs_rq) - se->vruntime; 705 712 706 - limit = calc_delta_fair(max_t(u64, 2*se->slice, TICK_NSEC), se); 707 - se->vlag = clamp(lag, -limit, limit); 713 + se->vlag = entity_lag(avg_vruntime(cfs_rq), se); 708 714 } 709 715 710 716 /* ··· 3682 3676 dequeue_load_avg(struct cfs_rq *cfs_rq, struct sched_entity *se) { } 3683 3677 #endif 3684 3678 3685 - static void reweight_eevdf(struct cfs_rq *cfs_rq, struct sched_entity *se, 3679 + static void reweight_eevdf(struct sched_entity *se, u64 avruntime, 3686 3680 unsigned long weight) 3687 3681 { 3688 3682 unsigned long old_weight = se->load.weight; 3689 - u64 avruntime = avg_vruntime(cfs_rq); 3690 3683 s64 vlag, vslice; 3691 3684 3692 3685 /* ··· 3766 3761 * = V - vl' 3767 3762 */ 3768 3763 if (avruntime != se->vruntime) { 3769 - vlag = (s64)(avruntime - se->vruntime); 3764 + vlag = entity_lag(avruntime, se); 3770 3765 vlag = div_s64(vlag * old_weight, weight); 3771 3766 se->vruntime = avruntime - vlag; 3772 3767 } ··· 3792 3787 unsigned long weight) 3793 3788 { 3794 3789 bool curr = cfs_rq->curr == se; 3790 + u64 avruntime; 3795 3791 3796 3792 if (se->on_rq) { 3797 3793 /* commit outstanding execution time */ 3798 - if (curr) 3799 - update_curr(cfs_rq); 3800 - else 3794 + update_curr(cfs_rq); 3795 + avruntime = avg_vruntime(cfs_rq); 3796 + if (!curr) 3801 3797 __dequeue_entity(cfs_rq, se); 3802 3798 update_load_sub(&cfs_rq->load, se->load.weight); 3803 3799 } 3804 3800 dequeue_load_avg(cfs_rq, se); 3805 3801 3806 - if (!se->on_rq) { 3802 + if (se->on_rq) { 3803 + reweight_eevdf(se, avruntime, weight); 3804 + } else { 3807 3805 /* 3808 3806 * Because we keep se->vlag = V - v_i, while: lag_i = w_i*(V - v_i), 3809 3807 * we need to scale se->vlag when w_i changes. 3810 3808 */ 3811 3809 se->vlag = div_s64(se->vlag * se->load.weight, weight); 3812 - } else { 3813 - reweight_eevdf(cfs_rq, se, weight); 3814 3810 } 3815 3811 3816 3812 update_load_set(&se->load, weight);
+16 -2
kernel/sched/isolation.c
··· 46 46 if (cpu < nr_cpu_ids) 47 47 return cpu; 48 48 49 - return cpumask_any_and(housekeeping.cpumasks[type], cpu_online_mask); 49 + cpu = cpumask_any_and(housekeeping.cpumasks[type], cpu_online_mask); 50 + if (likely(cpu < nr_cpu_ids)) 51 + return cpu; 52 + /* 53 + * Unless we have another problem this can only happen 54 + * at boot time before start_secondary() brings the 1st 55 + * housekeeping CPU up. 56 + */ 57 + WARN_ON_ONCE(system_state == SYSTEM_RUNNING || 58 + type != HK_TYPE_TIMER); 50 59 } 51 60 } 52 61 return smp_processor_id(); ··· 118 109 static int __init housekeeping_setup(char *str, unsigned long flags) 119 110 { 120 111 cpumask_var_t non_housekeeping_mask, housekeeping_staging; 112 + unsigned int first_cpu; 121 113 int err = 0; 122 114 123 115 if ((flags & HK_FLAG_TICK) && !(housekeeping.flags & HK_FLAG_TICK)) { ··· 139 129 cpumask_andnot(housekeeping_staging, 140 130 cpu_possible_mask, non_housekeeping_mask); 141 131 142 - if (!cpumask_intersects(cpu_present_mask, housekeeping_staging)) { 132 + first_cpu = cpumask_first_and(cpu_present_mask, housekeeping_staging); 133 + if (first_cpu >= nr_cpu_ids || first_cpu >= setup_max_cpus) { 143 134 __cpumask_set_cpu(smp_processor_id(), housekeeping_staging); 144 135 __cpumask_clear_cpu(smp_processor_id(), non_housekeeping_mask); 145 136 if (!housekeeping.flags) { ··· 148 137 "using boot CPU:%d\n", smp_processor_id()); 149 138 } 150 139 } 140 + 141 + if (cpumask_empty(non_housekeeping_mask)) 142 + goto free_housekeeping_staging; 151 143 152 144 if (!housekeeping.flags) { 153 145 /* First setup call ("nohz_full=" or "isolcpus=") */