sched/deadline: Correctly account for allocated bandwidth during hotplug

For hotplug operations, DEADLINE needs to check that there is still enough
bandwidth left after removing the CPU that is going offline. We however
fail to do so currently.

Restore the correct behavior by restructuring dl_bw_manage() a bit, so
that overflow conditions (not enough bandwidth left) are properly
checked. Also account for dl_server bandwidth, i.e. discount such
bandwidth in the calculation since NORMAL tasks will be anyway moved
away from the CPU as a result of the hotplug operation.

Signed-off-by: Juri Lelli <juri.lelli@redhat.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Reviewed-by: Phil Auld <pauld@redhat.com>
Tested-by: Waiman Long <longman@redhat.com>
Link: https://lore.kernel.org/r/20241114142810.794657-3-juri.lelli@redhat.com

authored by

Juri Lelli and committed by

Peter Zijlstra 2 years ago d4742f6e 41d4200b

+41 -11

3 changed files

expand all

kernel

sched

core.c

deadline.c

sched.h

+1 -1

kernel/sched/core.c

··· 8185 8185 static int cpuset_cpu_inactive(unsigned int cpu) 8186 8186 { 8187 8187 if (!cpuhp_tasks_frozen) { 8188 - int ret = dl_bw_check_overflow(cpu); 8188 + int ret = dl_bw_deactivate(cpu); 8189 8189 8190 8190 if (ret) 8191 8191 return ret;

+39 -9

kernel/sched/deadline.c

··· 3460 3460 } 3461 3461 3462 3462 enum dl_bw_request { 3463 - dl_bw_req_check_overflow = 0, 3463 + dl_bw_req_deactivate = 0, 3464 3464 dl_bw_req_alloc, 3465 3465 dl_bw_req_free 3466 3466 }; 3467 3467 3468 3468 static int dl_bw_manage(enum dl_bw_request req, int cpu, u64 dl_bw) 3469 3469 { 3470 - unsigned long flags; 3470 + unsigned long flags, cap; 3471 3471 struct dl_bw *dl_b; 3472 3472 bool overflow = 0; 3473 + u64 fair_server_bw = 0; 3473 3474 3474 3475 rcu_read_lock_sched(); 3475 3476 dl_b = dl_bw_of(cpu); 3476 3477 raw_spin_lock_irqsave(&dl_b->lock, flags); 3477 3478 3478 - if (req == dl_bw_req_free) { 3479 + cap = dl_bw_capacity(cpu); 3480 + switch (req) { 3481 + case dl_bw_req_free: 3479 3482 __dl_sub(dl_b, dl_bw, dl_bw_cpus(cpu)); 3480 - } else { 3481 - unsigned long cap = dl_bw_capacity(cpu); 3482 - 3483 + break; 3484 + case dl_bw_req_alloc: 3483 3485 overflow = __dl_overflow(dl_b, cap, 0, dl_bw); 3484 3486 3485 - if (req == dl_bw_req_alloc && !overflow) { 3487 + if (!overflow) { 3486 3488 /* 3487 3489 * We reserve space in the destination 3488 3490 * root_domain, as we can't fail after this point. ··· 3493 3491 */ 3494 3492 __dl_add(dl_b, dl_bw, dl_bw_cpus(cpu)); 3495 3493 } 3494 + break; 3495 + case dl_bw_req_deactivate: 3496 + /* 3497 + * cpu is going offline and NORMAL tasks will be moved away 3498 + * from it. We can thus discount dl_server bandwidth 3499 + * contribution as it won't need to be servicing tasks after 3500 + * the cpu is off. 3501 + */ 3502 + if (cpu_rq(cpu)->fair_server.dl_server) 3503 + fair_server_bw = cpu_rq(cpu)->fair_server.dl_bw; 3504 + 3505 + /* 3506 + * Not much to check if no DEADLINE bandwidth is present. 3507 + * dl_servers we can discount, as tasks will be moved out the 3508 + * offlined CPUs anyway. 3509 + */ 3510 + if (dl_b->total_bw - fair_server_bw > 0) { 3511 + /* 3512 + * Leaving at least one CPU for DEADLINE tasks seems a 3513 + * wise thing to do. 3514 + */ 3515 + if (dl_bw_cpus(cpu)) 3516 + overflow = __dl_overflow(dl_b, cap, fair_server_bw, 0); 3517 + else 3518 + overflow = 1; 3519 + } 3520 + 3521 + break; 3496 3522 } 3497 3523 3498 3524 raw_spin_unlock_irqrestore(&dl_b->lock, flags); ··· 3529 3499 return overflow ? -EBUSY : 0; 3530 3500 } 3531 3501 3532 - int dl_bw_check_overflow(int cpu) 3502 + int dl_bw_deactivate(int cpu) 3533 3503 { 3534 - return dl_bw_manage(dl_bw_req_check_overflow, cpu, 0); 3504 + return dl_bw_manage(dl_bw_req_deactivate, cpu, 0); 3535 3505 } 3536 3506 3537 3507 int dl_bw_alloc(int cpu, u64 dl_bw)

+1 -1

kernel/sched/sched.h

··· 362 362 extern bool __checkparam_dl(const struct sched_attr *attr); 363 363 extern bool dl_param_changed(struct task_struct *p, const struct sched_attr *attr); 364 364 extern int dl_cpuset_cpumask_can_shrink(const struct cpumask *cur, const struct cpumask *trial); 365 - extern int dl_bw_check_overflow(int cpu); 365 + extern int dl_bw_deactivate(int cpu); 366 366 extern s64 dl_scaled_delta_exec(struct rq *rq, struct sched_dl_entity *dl_se, s64 delta_exec); 367 367 /* 368 368 * SCHED_DEADLINE supports servers (nested scheduling) with the following

Configure Feed

Configure Feed