sched/fair: Fix lag clamp

Vincent reported that he was seeing undue lag clamping in a mixed
slice workload. Implement the max_slice tracking as per the todo
comment.

Fixes: 147f3efaa241 ("sched/fair: Implement an EEVDF-like scheduling policy")
Reported-off-by: Vincent Guittot <vincent.guittot@linaro.org>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Tested-by: Vincent Guittot <vincent.guittot@linaro.org>
Tested-by: K Prateek Nayak <kprateek.nayak@amd.com>
Tested-by: Shubhang Kaushik <shubhang@os.amperecomputing.com>
Link: https://patch.msgid.link/20250422101628.GA33555@noisy.programming.kicks-ass.net

Peter Zijlstra 3 months ago 6e3c0a4e ff384240

+36 -4

2 changed files

expand all

include

linux

sched.h

kernel

sched

fair.c

include/linux/sched.h

··· 579 579 u64 deadline; 580 580 u64 min_vruntime; 581 581 u64 min_slice; 582 + u64 max_slice; 582 583 583 584 struct list_head group_node; 584 585 unsigned char on_rq;

+35 -4

kernel/sched/fair.c

··· 748 748 return cfs_rq->zero_vruntime; 749 749 } 750 750 751 + static inline u64 cfs_rq_max_slice(struct cfs_rq *cfs_rq); 752 + 751 753 /* 752 754 * lag_i = S - s_i = w_i * (V - v_i) 753 755 * ··· 763 761 * EEVDF gives the following limit for a steady state system: 764 762 * 765 763 * -r_max < lag < max(r_max, q) 766 - * 767 - * XXX could add max_slice to the augmented data to track this. 768 764 */ 769 765 static void update_entity_lag(struct cfs_rq *cfs_rq, struct sched_entity *se) 770 766 { 767 + u64 max_slice = cfs_rq_max_slice(cfs_rq) + TICK_NSEC; 771 768 s64 vlag, limit; 772 769 773 770 WARN_ON_ONCE(!se->on_rq); 774 771 775 772 vlag = avg_vruntime(cfs_rq) - se->vruntime; 776 - limit = calc_delta_fair(max_t(u64, 2*se->slice, TICK_NSEC), se); 773 + limit = calc_delta_fair(max_slice, se); 777 774 778 775 se->vlag = clamp(vlag, -limit, limit); 779 776 } ··· 830 829 return min_slice; 831 830 } 832 831 832 + static inline u64 cfs_rq_max_slice(struct cfs_rq *cfs_rq) 833 + { 834 + struct sched_entity *root = __pick_root_entity(cfs_rq); 835 + struct sched_entity *curr = cfs_rq->curr; 836 + u64 max_slice = 0ULL; 837 + 838 + if (curr && curr->on_rq) 839 + max_slice = curr->slice; 840 + 841 + if (root) 842 + max_slice = max(max_slice, root->max_slice); 843 + 844 + return max_slice; 845 + } 846 + 833 847 static inline bool __entity_less(struct rb_node *a, const struct rb_node *b) 834 848 { 835 849 return entity_before(__node_2_se(a), __node_2_se(b)); ··· 869 853 } 870 854 } 871 855 856 + static inline void __max_slice_update(struct sched_entity *se, struct rb_node *node) 857 + { 858 + if (node) { 859 + struct sched_entity *rse = __node_2_se(node); 860 + if (rse->max_slice > se->max_slice) 861 + se->max_slice = rse->max_slice; 862 + } 863 + } 864 + 872 865 /* 873 866 * se->min_vruntime = min(se->vruntime, {left,right}->min_vruntime) 874 867 */ ··· 885 860 { 886 861 u64 old_min_vruntime = se->min_vruntime; 887 862 u64 old_min_slice = se->min_slice; 863 + u64 old_max_slice = se->max_slice; 888 864 struct rb_node *node = &se->run_node; 889 865 890 866 se->min_vruntime = se->vruntime; ··· 896 870 __min_slice_update(se, node->rb_right); 897 871 __min_slice_update(se, node->rb_left); 898 872 873 + se->max_slice = se->slice; 874 + __max_slice_update(se, node->rb_right); 875 + __max_slice_update(se, node->rb_left); 876 + 899 877 return se->min_vruntime == old_min_vruntime && 900 - se->min_slice == old_min_slice; 878 + se->min_slice == old_min_slice && 879 + se->max_slice == old_max_slice; 901 880 } 902 881 903 882 RB_DECLARE_CALLBACKS(static, min_vruntime_cb, struct sched_entity,

Configure Feed

Configure Feed