Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux
1
fork

Configure Feed

Select the types of activity you want to include in your feed.

sched_ext: Factor out nldsq_cursor_next_task() and nldsq_cursor_lost_task()

Factor out cursor-based DSQ iteration from bpf_iter_scx_dsq_next() into
nldsq_cursor_next_task() and the task-lost check from scx_dsq_move() into
nldsq_cursor_lost_task() to prepare for reuse.

As ->priv is only used to record dsq->seq for cursors, update
INIT_DSQ_LIST_CURSOR() to take the DSQ pointer and set ->priv from dsq->seq
so that users don't have to read it manually. Move scx_dsq_iter_flags enum
earlier so nldsq_cursor_next_task() can use SCX_DSQ_ITER_REV.

bypass_lb_cpu() now sets cursor.priv to dsq->seq but doesn't use it.

Signed-off-by: Tejun Heo <tj@kernel.org>
Reviewed-by: Andrea Righi <arighi@nvidia.com>

Tejun Heo 35250720 30b05153

+102 -58
+3 -3
include/linux/sched/ext.h
··· 157 157 u32 priv; /* can be used by iter cursor */ 158 158 }; 159 159 160 - #define INIT_DSQ_LIST_CURSOR(__node, __flags, __priv) \ 160 + #define INIT_DSQ_LIST_CURSOR(__cursor, __dsq, __flags) \ 161 161 (struct scx_dsq_list_node) { \ 162 - .node = LIST_HEAD_INIT((__node).node), \ 162 + .node = LIST_HEAD_INIT((__cursor).node), \ 163 163 .flags = SCX_DSQ_LNODE_ITER_CURSOR | (__flags), \ 164 - .priv = (__priv), \ 164 + .priv = READ_ONCE((__dsq)->seq), \ 165 165 } 166 166 167 167 struct scx_sched;
+99 -55
kernel/sched/ext.c
··· 570 570 return true; 571 571 } 572 572 573 + enum scx_dsq_iter_flags { 574 + /* iterate in the reverse dispatch order */ 575 + SCX_DSQ_ITER_REV = 1U << 16, 576 + 577 + __SCX_DSQ_ITER_HAS_SLICE = 1U << 30, 578 + __SCX_DSQ_ITER_HAS_VTIME = 1U << 31, 579 + 580 + __SCX_DSQ_ITER_USER_FLAGS = SCX_DSQ_ITER_REV, 581 + __SCX_DSQ_ITER_ALL_FLAGS = __SCX_DSQ_ITER_USER_FLAGS | 582 + __SCX_DSQ_ITER_HAS_SLICE | 583 + __SCX_DSQ_ITER_HAS_VTIME, 584 + }; 585 + 573 586 /** 574 587 * nldsq_next_task - Iterate to the next task in a non-local DSQ 575 - * @dsq: user dsq being iterated 588 + * @dsq: non-local dsq being iterated 576 589 * @cur: current position, %NULL to start iteration 577 590 * @rev: walk backwards 578 591 * ··· 625 612 for ((p) = nldsq_next_task((dsq), NULL, false); (p); \ 626 613 (p) = nldsq_next_task((dsq), (p), false)) 627 614 615 + /** 616 + * nldsq_cursor_next_task - Iterate to the next task given a cursor in a non-local DSQ 617 + * @cursor: scx_dsq_list_node initialized with INIT_DSQ_LIST_CURSOR() 618 + * @dsq: non-local dsq being iterated 619 + * 620 + * Find the next task in a cursor based iteration. The caller must have 621 + * initialized @cursor using INIT_DSQ_LIST_CURSOR() and can release the DSQ lock 622 + * between the iteration steps. 623 + * 624 + * Only tasks which were queued before @cursor was initialized are visible. This 625 + * bounds the iteration and guarantees that vtime never jumps in the other 626 + * direction while iterating. 627 + */ 628 + static struct task_struct *nldsq_cursor_next_task(struct scx_dsq_list_node *cursor, 629 + struct scx_dispatch_q *dsq) 630 + { 631 + bool rev = cursor->flags & SCX_DSQ_ITER_REV; 632 + struct task_struct *p; 633 + 634 + lockdep_assert_held(&dsq->lock); 635 + BUG_ON(!(cursor->flags & SCX_DSQ_LNODE_ITER_CURSOR)); 636 + 637 + if (list_empty(&cursor->node)) 638 + p = NULL; 639 + else 640 + p = container_of(cursor, struct task_struct, scx.dsq_list); 641 + 642 + /* skip cursors and tasks that were queued after @cursor init */ 643 + do { 644 + p = nldsq_next_task(dsq, p, rev); 645 + } while (p && unlikely(u32_before(cursor->priv, p->scx.dsq_seq))); 646 + 647 + if (p) { 648 + if (rev) 649 + list_move_tail(&cursor->node, &p->scx.dsq_list.node); 650 + else 651 + list_move(&cursor->node, &p->scx.dsq_list.node); 652 + } else { 653 + list_del_init(&cursor->node); 654 + } 655 + 656 + return p; 657 + } 658 + 659 + /** 660 + * nldsq_cursor_lost_task - Test whether someone else took the task since iteration 661 + * @cursor: scx_dsq_list_node initialized with INIT_DSQ_LIST_CURSOR() 662 + * @rq: rq @p was on 663 + * @dsq: dsq @p was on 664 + * @p: target task 665 + * 666 + * @p is a task returned by nldsq_cursor_next_task(). The locks may have been 667 + * dropped and re-acquired inbetween. Verify that no one else took or is in the 668 + * process of taking @p from @dsq. 669 + * 670 + * On %false return, the caller can assume full ownership of @p. 671 + */ 672 + static bool nldsq_cursor_lost_task(struct scx_dsq_list_node *cursor, 673 + struct rq *rq, struct scx_dispatch_q *dsq, 674 + struct task_struct *p) 675 + { 676 + lockdep_assert_rq_held(rq); 677 + lockdep_assert_held(&dsq->lock); 678 + 679 + /* 680 + * @p could have already left $src_dsq, got re-enqueud, or be in the 681 + * process of being consumed by someone else. 682 + */ 683 + if (unlikely(p->scx.dsq != dsq || 684 + u32_before(cursor->priv, p->scx.dsq_seq) || 685 + p->scx.holding_cpu >= 0)) 686 + return true; 687 + 688 + /* if @p has stayed on @dsq, its rq couldn't have changed */ 689 + if (WARN_ON_ONCE(rq != task_rq(p))) 690 + return true; 691 + 692 + return false; 693 + } 628 694 629 695 /* 630 696 * BPF DSQ iterator. Tasks in a non-local DSQ can be iterated in [reverse] ··· 711 619 * changes without breaking backward compatibility. Can be used with 712 620 * bpf_for_each(). See bpf_iter_scx_dsq_*(). 713 621 */ 714 - enum scx_dsq_iter_flags { 715 - /* iterate in the reverse dispatch order */ 716 - SCX_DSQ_ITER_REV = 1U << 16, 717 - 718 - __SCX_DSQ_ITER_HAS_SLICE = 1U << 30, 719 - __SCX_DSQ_ITER_HAS_VTIME = 1U << 31, 720 - 721 - __SCX_DSQ_ITER_USER_FLAGS = SCX_DSQ_ITER_REV, 722 - __SCX_DSQ_ITER_ALL_FLAGS = __SCX_DSQ_ITER_USER_FLAGS | 723 - __SCX_DSQ_ITER_HAS_SLICE | 724 - __SCX_DSQ_ITER_HAS_VTIME, 725 - }; 726 - 727 622 struct bpf_iter_scx_dsq_kern { 728 623 struct scx_dsq_list_node cursor; 729 624 struct scx_dispatch_q *dsq; ··· 4576 4497 struct rq *donor_rq = cpu_rq(donor); 4577 4498 struct scx_dispatch_q *donor_dsq = bypass_dsq(sch, donor); 4578 4499 struct task_struct *p, *n; 4579 - struct scx_dsq_list_node cursor = INIT_DSQ_LIST_CURSOR(cursor, 0, 0); 4500 + struct scx_dsq_list_node cursor = INIT_DSQ_LIST_CURSOR(cursor, donor_dsq, 0); 4580 4501 s32 delta = READ_ONCE(donor_dsq->nr) - nr_donor_target; 4581 4502 u32 nr_balanced = 0, min_delta_us; 4582 4503 ··· 7621 7542 locked_rq = src_rq; 7622 7543 raw_spin_lock(&src_dsq->lock); 7623 7544 7624 - /* 7625 - * Did someone else get to it? @p could have already left $src_dsq, got 7626 - * re-enqueud, or be in the process of being consumed by someone else. 7627 - */ 7628 - if (unlikely(p->scx.dsq != src_dsq || 7629 - u32_before(kit->cursor.priv, p->scx.dsq_seq) || 7630 - p->scx.holding_cpu >= 0) || 7631 - WARN_ON_ONCE(src_rq != task_rq(p))) { 7545 + /* did someone else get to it while we dropped the locks? */ 7546 + if (nldsq_cursor_lost_task(&kit->cursor, src_rq, src_dsq, p)) { 7632 7547 raw_spin_unlock(&src_dsq->lock); 7633 7548 goto out; 7634 7549 } ··· 8261 8188 if (!kit->dsq) 8262 8189 return -ENOENT; 8263 8190 8264 - kit->cursor = INIT_DSQ_LIST_CURSOR(kit->cursor, flags, 8265 - READ_ONCE(kit->dsq->seq)); 8191 + kit->cursor = INIT_DSQ_LIST_CURSOR(kit->cursor, kit->dsq, flags); 8266 8192 8267 8193 return 0; 8268 8194 } ··· 8275 8203 __bpf_kfunc struct task_struct *bpf_iter_scx_dsq_next(struct bpf_iter_scx_dsq *it) 8276 8204 { 8277 8205 struct bpf_iter_scx_dsq_kern *kit = (void *)it; 8278 - bool rev = kit->cursor.flags & SCX_DSQ_ITER_REV; 8279 - struct task_struct *p; 8280 - unsigned long flags; 8281 8206 8282 8207 if (!kit->dsq) 8283 8208 return NULL; 8284 8209 8285 - raw_spin_lock_irqsave(&kit->dsq->lock, flags); 8210 + guard(raw_spinlock_irqsave)(&kit->dsq->lock); 8286 8211 8287 - if (list_empty(&kit->cursor.node)) 8288 - p = NULL; 8289 - else 8290 - p = container_of(&kit->cursor, struct task_struct, scx.dsq_list); 8291 - 8292 - /* 8293 - * Only tasks which were queued before the iteration started are 8294 - * visible. This bounds BPF iterations and guarantees that vtime never 8295 - * jumps in the other direction while iterating. 8296 - */ 8297 - do { 8298 - p = nldsq_next_task(kit->dsq, p, rev); 8299 - } while (p && unlikely(u32_before(kit->cursor.priv, p->scx.dsq_seq))); 8300 - 8301 - if (p) { 8302 - if (rev) 8303 - list_move_tail(&kit->cursor.node, &p->scx.dsq_list.node); 8304 - else 8305 - list_move(&kit->cursor.node, &p->scx.dsq_list.node); 8306 - } else { 8307 - list_del_init(&kit->cursor.node); 8308 - } 8309 - 8310 - raw_spin_unlock_irqrestore(&kit->dsq->lock, flags); 8311 - 8312 - return p; 8212 + return nldsq_cursor_next_task(&kit->cursor, kit->dsq); 8313 8213 } 8314 8214 8315 8215 /**