Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux
1
fork

Configure Feed

Select the types of activity you want to include in your feed.

Merge branch 'sched-urgent-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip

Pull scheduler fixes from Ingo Molnar:
"The biggest commit is an irqtime accounting loop latency fix, the rest
are misc fixes all over the place: deadline scheduling, docs, numa,
balancer and a bad to-idle latency fix"

* 'sched-urgent-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip:
sched/numa: Initialize newidle balance stats in sd_numa_init()
sched: Fix updating rq->max_idle_balance_cost and rq->next_balance in idle_balance()
sched: Skip double execution of pick_next_task_fair()
sched: Use CPUPRI_NR_PRIORITIES instead of MAX_RT_PRIO in cpupri check
sched/deadline: Fix memory leak
sched/deadline: Fix sched_yield() behavior
sched: Sanitize irq accounting madness
sched/docbook: Fix 'make htmldocs' warnings caused by missing description

+47 -35
+5 -2
include/linux/sched.h
··· 1153 1153 * 1154 1154 * @dl_boosted tells if we are boosted due to DI. If so we are 1155 1155 * outside bandwidth enforcement mechanism (but only until we 1156 - * exit the critical section). 1156 + * exit the critical section); 1157 + * 1158 + * @dl_yielded tells if task gave up the cpu before consuming 1159 + * all its available runtime during the last job. 1157 1160 */ 1158 - int dl_throttled, dl_new, dl_boosted; 1161 + int dl_throttled, dl_new, dl_boosted, dl_yielded; 1159 1162 1160 1163 /* 1161 1164 * Bandwidth enforcement timer. Each -deadline task has its
+13 -2
kernel/sched/core.c
··· 2592 2592 if (likely(prev->sched_class == class && 2593 2593 rq->nr_running == rq->cfs.h_nr_running)) { 2594 2594 p = fair_sched_class.pick_next_task(rq, prev); 2595 - if (likely(p && p != RETRY_TASK)) 2596 - return p; 2595 + if (unlikely(p == RETRY_TASK)) 2596 + goto again; 2597 + 2598 + /* assumes fair_sched_class->next == idle_sched_class */ 2599 + if (unlikely(!p)) 2600 + p = idle_sched_class.pick_next_task(rq, prev); 2601 + 2602 + return p; 2597 2603 } 2598 2604 2599 2605 again: ··· 3130 3124 dl_se->dl_bw = to_ratio(dl_se->dl_period, dl_se->dl_runtime); 3131 3125 dl_se->dl_throttled = 0; 3132 3126 dl_se->dl_new = 1; 3127 + dl_se->dl_yielded = 0; 3133 3128 } 3134 3129 3135 3130 static void __setscheduler_params(struct task_struct *p, ··· 3646 3639 * sys_sched_setattr - same as above, but with extended sched_attr 3647 3640 * @pid: the pid in question. 3648 3641 * @uattr: structure containing the extended parameters. 3642 + * @flags: for future extension. 3649 3643 */ 3650 3644 SYSCALL_DEFINE3(sched_setattr, pid_t, pid, struct sched_attr __user *, uattr, 3651 3645 unsigned int, flags) ··· 3791 3783 * @pid: the pid in question. 3792 3784 * @uattr: structure containing the extended parameters. 3793 3785 * @size: sizeof(attr) for fwd/bwd comp. 3786 + * @flags: for future extension. 3794 3787 */ 3795 3788 SYSCALL_DEFINE4(sched_getattr, pid_t, pid, struct sched_attr __user *, uattr, 3796 3789 unsigned int, size, unsigned int, flags) ··· 6026 6017 , 6027 6018 .last_balance = jiffies, 6028 6019 .balance_interval = sd_weight, 6020 + .max_newidle_lb_cost = 0, 6021 + .next_decay_max_lb_cost = jiffies, 6029 6022 }; 6030 6023 SD_INIT_NAME(sd, NUMA); 6031 6024 sd->private = &tl->data;
+1 -3
kernel/sched/cpudeadline.c
··· 210 210 */ 211 211 void cpudl_cleanup(struct cpudl *cp) 212 212 { 213 - /* 214 - * nothing to do for the moment 215 - */ 213 + free_cpumask_var(cp->free_cpus); 216 214 }
+1 -2
kernel/sched/cpupri.c
··· 70 70 int idx = 0; 71 71 int task_pri = convert_prio(p->prio); 72 72 73 - if (task_pri >= MAX_RT_PRIO) 74 - return 0; 73 + BUG_ON(task_pri >= CPUPRI_NR_PRIORITIES); 75 74 76 75 for (idx = 0; idx < task_pri; idx++) { 77 76 struct cpupri_vec *vec = &cp->pri_to_cpu[idx];
+16 -16
kernel/sched/cputime.c
··· 332 332 * softirq as those do not count in task exec_runtime any more. 333 333 */ 334 334 static void irqtime_account_process_tick(struct task_struct *p, int user_tick, 335 - struct rq *rq) 335 + struct rq *rq, int ticks) 336 336 { 337 - cputime_t one_jiffy_scaled = cputime_to_scaled(cputime_one_jiffy); 337 + cputime_t scaled = cputime_to_scaled(cputime_one_jiffy); 338 + u64 cputime = (__force u64) cputime_one_jiffy; 338 339 u64 *cpustat = kcpustat_this_cpu->cpustat; 339 340 340 341 if (steal_account_process_tick()) 341 342 return; 342 343 344 + cputime *= ticks; 345 + scaled *= ticks; 346 + 343 347 if (irqtime_account_hi_update()) { 344 - cpustat[CPUTIME_IRQ] += (__force u64) cputime_one_jiffy; 348 + cpustat[CPUTIME_IRQ] += cputime; 345 349 } else if (irqtime_account_si_update()) { 346 - cpustat[CPUTIME_SOFTIRQ] += (__force u64) cputime_one_jiffy; 350 + cpustat[CPUTIME_SOFTIRQ] += cputime; 347 351 } else if (this_cpu_ksoftirqd() == p) { 348 352 /* 349 353 * ksoftirqd time do not get accounted in cpu_softirq_time. 350 354 * So, we have to handle it separately here. 351 355 * Also, p->stime needs to be updated for ksoftirqd. 352 356 */ 353 - __account_system_time(p, cputime_one_jiffy, one_jiffy_scaled, 354 - CPUTIME_SOFTIRQ); 357 + __account_system_time(p, cputime, scaled, CPUTIME_SOFTIRQ); 355 358 } else if (user_tick) { 356 - account_user_time(p, cputime_one_jiffy, one_jiffy_scaled); 359 + account_user_time(p, cputime, scaled); 357 360 } else if (p == rq->idle) { 358 - account_idle_time(cputime_one_jiffy); 361 + account_idle_time(cputime); 359 362 } else if (p->flags & PF_VCPU) { /* System time or guest time */ 360 - account_guest_time(p, cputime_one_jiffy, one_jiffy_scaled); 363 + account_guest_time(p, cputime, scaled); 361 364 } else { 362 - __account_system_time(p, cputime_one_jiffy, one_jiffy_scaled, 363 - CPUTIME_SYSTEM); 365 + __account_system_time(p, cputime, scaled, CPUTIME_SYSTEM); 364 366 } 365 367 } 366 368 367 369 static void irqtime_account_idle_ticks(int ticks) 368 370 { 369 - int i; 370 371 struct rq *rq = this_rq(); 371 372 372 - for (i = 0; i < ticks; i++) 373 - irqtime_account_process_tick(current, 0, rq); 373 + irqtime_account_process_tick(current, 0, rq, ticks); 374 374 } 375 375 #else /* CONFIG_IRQ_TIME_ACCOUNTING */ 376 376 static inline void irqtime_account_idle_ticks(int ticks) {} 377 377 static inline void irqtime_account_process_tick(struct task_struct *p, int user_tick, 378 - struct rq *rq) {} 378 + struct rq *rq, int nr_ticks) {} 379 379 #endif /* CONFIG_IRQ_TIME_ACCOUNTING */ 380 380 381 381 /* ··· 464 464 return; 465 465 466 466 if (sched_clock_irqtime) { 467 - irqtime_account_process_tick(p, user_tick, rq); 467 + irqtime_account_process_tick(p, user_tick, rq, 1); 468 468 return; 469 469 } 470 470
+3 -2
kernel/sched/deadline.c
··· 528 528 sched_clock_tick(); 529 529 update_rq_clock(rq); 530 530 dl_se->dl_throttled = 0; 531 + dl_se->dl_yielded = 0; 531 532 if (p->on_rq) { 532 533 enqueue_task_dl(rq, p, ENQUEUE_REPLENISH); 533 534 if (task_has_dl_policy(rq->curr)) ··· 894 893 * We make the task go to sleep until its current deadline by 895 894 * forcing its runtime to zero. This way, update_curr_dl() stops 896 895 * it and the bandwidth timer will wake it up and will give it 897 - * new scheduling parameters (thanks to dl_new=1). 896 + * new scheduling parameters (thanks to dl_yielded=1). 898 897 */ 899 898 if (p->dl.runtime > 0) { 900 - rq->curr->dl.dl_new = 1; 899 + rq->curr->dl.dl_yielded = 1; 901 900 p->dl.runtime = 0; 902 901 } 903 902 update_curr_dl(rq);
+8 -8
kernel/sched/fair.c
··· 6653 6653 int this_cpu = this_rq->cpu; 6654 6654 6655 6655 idle_enter_fair(this_rq); 6656 + 6656 6657 /* 6657 6658 * We must set idle_stamp _before_ calling idle_balance(), such that we 6658 6659 * measure the duration of idle_balance() as idle time. ··· 6706 6705 6707 6706 raw_spin_lock(&this_rq->lock); 6708 6707 6708 + if (curr_cost > this_rq->max_idle_balance_cost) 6709 + this_rq->max_idle_balance_cost = curr_cost; 6710 + 6709 6711 /* 6710 - * While browsing the domains, we released the rq lock. 6711 - * A task could have be enqueued in the meantime 6712 + * While browsing the domains, we released the rq lock, a task could 6713 + * have been enqueued in the meantime. Since we're not going idle, 6714 + * pretend we pulled a task. 6712 6715 */ 6713 - if (this_rq->cfs.h_nr_running && !pulled_task) { 6716 + if (this_rq->cfs.h_nr_running && !pulled_task) 6714 6717 pulled_task = 1; 6715 - goto out; 6716 - } 6717 6718 6718 6719 if (pulled_task || time_after(jiffies, this_rq->next_balance)) { 6719 6720 /* ··· 6724 6721 */ 6725 6722 this_rq->next_balance = next_balance; 6726 6723 } 6727 - 6728 - if (curr_cost > this_rq->max_idle_balance_cost) 6729 - this_rq->max_idle_balance_cost = curr_cost; 6730 6724 6731 6725 out: 6732 6726 /* Is there a task of a high priority class? */