Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux
1
fork

Configure Feed

Select the types of activity you want to include in your feed.

Merge tag 'sched-urgent-2020-11-22' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip

Pull scheduler fixes from Thomas Gleixner:
"A couple of scheduler fixes:

- Make the conditional update of the overutilized state work
correctly by caching the relevant flags state before overwriting
them and checking them afterwards.

- Fix a data race in the wakeup path which caused loadavg on ARM64
platforms to become a random number generator.

- Fix the ordering of the iowaiter accounting operations so it can't
be decremented before it is incremented.

- Fix a bug in the deadline scheduler vs. priority inheritance when a
non-deadline task A has inherited the parameters of a deadline task
B and then blocks on a non-deadline task C.

The second inheritance step used the static deadline parameters of
task A, which are usually 0, instead of further propagating task
B's parameters. The zero initialized parameters trigger a bug in
the deadline scheduler"

* tag 'sched-urgent-2020-11-22' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip:
sched/deadline: Fix priority inheritance with multiple scheduling classes
sched: Fix rq->nr_iowait ordering
sched: Fix data-race in wakeup
sched/fair: Fix overutilized update in enqueue_task_fair()

+95 -57
+24 -2
include/linux/sched.h
··· 552 552 * overruns. 553 553 */ 554 554 unsigned int dl_throttled : 1; 555 - unsigned int dl_boosted : 1; 556 555 unsigned int dl_yielded : 1; 557 556 unsigned int dl_non_contending : 1; 558 557 unsigned int dl_overrun : 1; ··· 570 571 * time. 571 572 */ 572 573 struct hrtimer inactive_timer; 574 + 575 + #ifdef CONFIG_RT_MUTEXES 576 + /* 577 + * Priority Inheritance. When a DEADLINE scheduling entity is boosted 578 + * pi_se points to the donor, otherwise points to the dl_se it belongs 579 + * to (the original one/itself). 580 + */ 581 + struct sched_dl_entity *pi_se; 582 + #endif 573 583 }; 574 584 575 585 #ifdef CONFIG_UCLAMP_TASK ··· 778 770 unsigned sched_reset_on_fork:1; 779 771 unsigned sched_contributes_to_load:1; 780 772 unsigned sched_migrated:1; 781 - unsigned sched_remote_wakeup:1; 782 773 #ifdef CONFIG_PSI 783 774 unsigned sched_psi_wake_requeue:1; 784 775 #endif ··· 786 779 unsigned :0; 787 780 788 781 /* Unserialized, strictly 'current' */ 782 + 783 + /* 784 + * This field must not be in the scheduler word above due to wakelist 785 + * queueing no longer being serialized by p->on_cpu. However: 786 + * 787 + * p->XXX = X; ttwu() 788 + * schedule() if (p->on_rq && ..) // false 789 + * smp_mb__after_spinlock(); if (smp_load_acquire(&p->on_cpu) && //true 790 + * deactivate_task() ttwu_queue_wakelist()) 791 + * p->on_rq = 0; p->sched_remote_wakeup = Y; 792 + * 793 + * guarantees all stores of 'current' are visible before 794 + * ->sched_remote_wakeup gets used, so it can be in this word. 795 + */ 796 + unsigned sched_remote_wakeup:1; 789 797 790 798 /* Bit to tell LSMs we're in execve(): */ 791 799 unsigned in_execve:1;
+16 -10
kernel/sched/core.c
··· 2501 2501 #ifdef CONFIG_SMP 2502 2502 if (wake_flags & WF_MIGRATED) 2503 2503 en_flags |= ENQUEUE_MIGRATED; 2504 + else 2504 2505 #endif 2506 + if (p->in_iowait) { 2507 + delayacct_blkio_end(p); 2508 + atomic_dec(&task_rq(p)->nr_iowait); 2509 + } 2505 2510 2506 2511 activate_task(rq, p, en_flags); 2507 2512 ttwu_do_wakeup(rq, p, wake_flags, rf); ··· 2893 2888 if (READ_ONCE(p->on_rq) && ttwu_runnable(p, wake_flags)) 2894 2889 goto unlock; 2895 2890 2896 - if (p->in_iowait) { 2897 - delayacct_blkio_end(p); 2898 - atomic_dec(&task_rq(p)->nr_iowait); 2899 - } 2900 - 2901 2891 #ifdef CONFIG_SMP 2902 2892 /* 2903 2893 * Ensure we load p->on_cpu _after_ p->on_rq, otherwise it would be ··· 2963 2963 2964 2964 cpu = select_task_rq(p, p->wake_cpu, SD_BALANCE_WAKE, wake_flags); 2965 2965 if (task_cpu(p) != cpu) { 2966 + if (p->in_iowait) { 2967 + delayacct_blkio_end(p); 2968 + atomic_dec(&task_rq(p)->nr_iowait); 2969 + } 2970 + 2966 2971 wake_flags |= WF_MIGRATED; 2967 2972 psi_ttwu_dequeue(p); 2968 2973 set_task_cpu(p, cpu); ··· 4912 4907 if (!dl_prio(p->normal_prio) || 4913 4908 (pi_task && dl_prio(pi_task->prio) && 4914 4909 dl_entity_preempt(&pi_task->dl, &p->dl))) { 4915 - p->dl.dl_boosted = 1; 4910 + p->dl.pi_se = pi_task->dl.pi_se; 4916 4911 queue_flag |= ENQUEUE_REPLENISH; 4917 - } else 4918 - p->dl.dl_boosted = 0; 4912 + } else { 4913 + p->dl.pi_se = &p->dl; 4914 + } 4919 4915 p->sched_class = &dl_sched_class; 4920 4916 } else if (rt_prio(prio)) { 4921 4917 if (dl_prio(oldprio)) 4922 - p->dl.dl_boosted = 0; 4918 + p->dl.pi_se = &p->dl; 4923 4919 if (oldprio < prio) 4924 4920 queue_flag |= ENQUEUE_HEAD; 4925 4921 p->sched_class = &rt_sched_class; 4926 4922 } else { 4927 4923 if (dl_prio(oldprio)) 4928 - p->dl.dl_boosted = 0; 4924 + p->dl.pi_se = &p->dl; 4929 4925 if (rt_prio(oldprio)) 4930 4926 p->rt.timeout = 0; 4931 4927 p->sched_class = &fair_sched_class;
+53 -44
kernel/sched/deadline.c
··· 43 43 return !RB_EMPTY_NODE(&dl_se->rb_node); 44 44 } 45 45 46 + #ifdef CONFIG_RT_MUTEXES 47 + static inline struct sched_dl_entity *pi_of(struct sched_dl_entity *dl_se) 48 + { 49 + return dl_se->pi_se; 50 + } 51 + 52 + static inline bool is_dl_boosted(struct sched_dl_entity *dl_se) 53 + { 54 + return pi_of(dl_se) != dl_se; 55 + } 56 + #else 57 + static inline struct sched_dl_entity *pi_of(struct sched_dl_entity *dl_se) 58 + { 59 + return dl_se; 60 + } 61 + 62 + static inline bool is_dl_boosted(struct sched_dl_entity *dl_se) 63 + { 64 + return false; 65 + } 66 + #endif 67 + 46 68 #ifdef CONFIG_SMP 47 69 static inline struct dl_bw *dl_bw_of(int i) 48 70 { ··· 720 698 struct dl_rq *dl_rq = dl_rq_of_se(dl_se); 721 699 struct rq *rq = rq_of_dl_rq(dl_rq); 722 700 723 - WARN_ON(dl_se->dl_boosted); 701 + WARN_ON(is_dl_boosted(dl_se)); 724 702 WARN_ON(dl_time_before(rq_clock(rq), dl_se->deadline)); 725 703 726 704 /* ··· 758 736 * could happen are, typically, a entity voluntarily trying to overcome its 759 737 * runtime, or it just underestimated it during sched_setattr(). 760 738 */ 761 - static void replenish_dl_entity(struct sched_dl_entity *dl_se, 762 - struct sched_dl_entity *pi_se) 739 + static void replenish_dl_entity(struct sched_dl_entity *dl_se) 763 740 { 764 741 struct dl_rq *dl_rq = dl_rq_of_se(dl_se); 765 742 struct rq *rq = rq_of_dl_rq(dl_rq); 766 743 767 - BUG_ON(pi_se->dl_runtime <= 0); 744 + BUG_ON(pi_of(dl_se)->dl_runtime <= 0); 768 745 769 746 /* 770 747 * This could be the case for a !-dl task that is boosted. 771 748 * Just go with full inherited parameters. 772 749 */ 773 750 if (dl_se->dl_deadline == 0) { 774 - dl_se->deadline = rq_clock(rq) + pi_se->dl_deadline; 775 - dl_se->runtime = pi_se->dl_runtime; 751 + dl_se->deadline = rq_clock(rq) + pi_of(dl_se)->dl_deadline; 752 + dl_se->runtime = pi_of(dl_se)->dl_runtime; 776 753 } 777 754 778 755 if (dl_se->dl_yielded && dl_se->runtime > 0) ··· 784 763 * arbitrary large. 785 764 */ 786 765 while (dl_se->runtime <= 0) { 787 - dl_se->deadline += pi_se->dl_period; 788 - dl_se->runtime += pi_se->dl_runtime; 766 + dl_se->deadline += pi_of(dl_se)->dl_period; 767 + dl_se->runtime += pi_of(dl_se)->dl_runtime; 789 768 } 790 769 791 770 /* ··· 799 778 */ 800 779 if (dl_time_before(dl_se->deadline, rq_clock(rq))) { 801 780 printk_deferred_once("sched: DL replenish lagged too much\n"); 802 - dl_se->deadline = rq_clock(rq) + pi_se->dl_deadline; 803 - dl_se->runtime = pi_se->dl_runtime; 781 + dl_se->deadline = rq_clock(rq) + pi_of(dl_se)->dl_deadline; 782 + dl_se->runtime = pi_of(dl_se)->dl_runtime; 804 783 } 805 784 806 785 if (dl_se->dl_yielded) ··· 833 812 * task with deadline equal to period this is the same of using 834 813 * dl_period instead of dl_deadline in the equation above. 835 814 */ 836 - static bool dl_entity_overflow(struct sched_dl_entity *dl_se, 837 - struct sched_dl_entity *pi_se, u64 t) 815 + static bool dl_entity_overflow(struct sched_dl_entity *dl_se, u64 t) 838 816 { 839 817 u64 left, right; 840 818 ··· 855 835 * of anything below microseconds resolution is actually fiction 856 836 * (but still we want to give the user that illusion >;). 857 837 */ 858 - left = (pi_se->dl_deadline >> DL_SCALE) * (dl_se->runtime >> DL_SCALE); 838 + left = (pi_of(dl_se)->dl_deadline >> DL_SCALE) * (dl_se->runtime >> DL_SCALE); 859 839 right = ((dl_se->deadline - t) >> DL_SCALE) * 860 - (pi_se->dl_runtime >> DL_SCALE); 840 + (pi_of(dl_se)->dl_runtime >> DL_SCALE); 861 841 862 842 return dl_time_before(right, left); 863 843 } ··· 942 922 * Please refer to the comments update_dl_revised_wakeup() function to find 943 923 * more about the Revised CBS rule. 944 924 */ 945 - static void update_dl_entity(struct sched_dl_entity *dl_se, 946 - struct sched_dl_entity *pi_se) 925 + static void update_dl_entity(struct sched_dl_entity *dl_se) 947 926 { 948 927 struct dl_rq *dl_rq = dl_rq_of_se(dl_se); 949 928 struct rq *rq = rq_of_dl_rq(dl_rq); 950 929 951 930 if (dl_time_before(dl_se->deadline, rq_clock(rq)) || 952 - dl_entity_overflow(dl_se, pi_se, rq_clock(rq))) { 931 + dl_entity_overflow(dl_se, rq_clock(rq))) { 953 932 954 933 if (unlikely(!dl_is_implicit(dl_se) && 955 934 !dl_time_before(dl_se->deadline, rq_clock(rq)) && 956 - !dl_se->dl_boosted)){ 935 + !is_dl_boosted(dl_se))) { 957 936 update_dl_revised_wakeup(dl_se, rq); 958 937 return; 959 938 } 960 939 961 - dl_se->deadline = rq_clock(rq) + pi_se->dl_deadline; 962 - dl_se->runtime = pi_se->dl_runtime; 940 + dl_se->deadline = rq_clock(rq) + pi_of(dl_se)->dl_deadline; 941 + dl_se->runtime = pi_of(dl_se)->dl_runtime; 963 942 } 964 943 } 965 944 ··· 1057 1038 * The task might have been boosted by someone else and might be in the 1058 1039 * boosting/deboosting path, its not throttled. 1059 1040 */ 1060 - if (dl_se->dl_boosted) 1041 + if (is_dl_boosted(dl_se)) 1061 1042 goto unlock; 1062 1043 1063 1044 /* ··· 1085 1066 * but do not enqueue -- wait for our wakeup to do that. 1086 1067 */ 1087 1068 if (!task_on_rq_queued(p)) { 1088 - replenish_dl_entity(dl_se, dl_se); 1069 + replenish_dl_entity(dl_se); 1089 1070 goto unlock; 1090 1071 } 1091 1072 ··· 1175 1156 1176 1157 if (dl_time_before(dl_se->deadline, rq_clock(rq)) && 1177 1158 dl_time_before(rq_clock(rq), dl_next_period(dl_se))) { 1178 - if (unlikely(dl_se->dl_boosted || !start_dl_timer(p))) 1159 + if (unlikely(is_dl_boosted(dl_se) || !start_dl_timer(p))) 1179 1160 return; 1180 1161 dl_se->dl_throttled = 1; 1181 1162 if (dl_se->runtime > 0) ··· 1306 1287 dl_se->dl_overrun = 1; 1307 1288 1308 1289 __dequeue_task_dl(rq, curr, 0); 1309 - if (unlikely(dl_se->dl_boosted || !start_dl_timer(curr))) 1290 + if (unlikely(is_dl_boosted(dl_se) || !start_dl_timer(curr))) 1310 1291 enqueue_task_dl(rq, curr, ENQUEUE_REPLENISH); 1311 1292 1312 1293 if (!is_leftmost(curr, &rq->dl)) ··· 1500 1481 } 1501 1482 1502 1483 static void 1503 - enqueue_dl_entity(struct sched_dl_entity *dl_se, 1504 - struct sched_dl_entity *pi_se, int flags) 1484 + enqueue_dl_entity(struct sched_dl_entity *dl_se, int flags) 1505 1485 { 1506 1486 BUG_ON(on_dl_rq(dl_se)); 1507 1487 ··· 1511 1493 */ 1512 1494 if (flags & ENQUEUE_WAKEUP) { 1513 1495 task_contending(dl_se, flags); 1514 - update_dl_entity(dl_se, pi_se); 1496 + update_dl_entity(dl_se); 1515 1497 } else if (flags & ENQUEUE_REPLENISH) { 1516 - replenish_dl_entity(dl_se, pi_se); 1498 + replenish_dl_entity(dl_se); 1517 1499 } else if ((flags & ENQUEUE_RESTORE) && 1518 1500 dl_time_before(dl_se->deadline, 1519 1501 rq_clock(rq_of_dl_rq(dl_rq_of_se(dl_se))))) { ··· 1530 1512 1531 1513 static void enqueue_task_dl(struct rq *rq, struct task_struct *p, int flags) 1532 1514 { 1533 - struct task_struct *pi_task = rt_mutex_get_top_task(p); 1534 - struct sched_dl_entity *pi_se = &p->dl; 1535 - 1536 - /* 1537 - * Use the scheduling parameters of the top pi-waiter task if: 1538 - * - we have a top pi-waiter which is a SCHED_DEADLINE task AND 1539 - * - our dl_boosted is set (i.e. the pi-waiter's (absolute) deadline is 1540 - * smaller than our deadline OR we are a !SCHED_DEADLINE task getting 1541 - * boosted due to a SCHED_DEADLINE pi-waiter). 1542 - * Otherwise we keep our runtime and deadline. 1543 - */ 1544 - if (pi_task && dl_prio(pi_task->normal_prio) && p->dl.dl_boosted) { 1545 - pi_se = &pi_task->dl; 1515 + if (is_dl_boosted(&p->dl)) { 1546 1516 /* 1547 1517 * Because of delays in the detection of the overrun of a 1548 1518 * thread's runtime, it might be the case that a thread ··· 1563 1557 * the throttle. 1564 1558 */ 1565 1559 p->dl.dl_throttled = 0; 1566 - BUG_ON(!p->dl.dl_boosted || flags != ENQUEUE_REPLENISH); 1560 + BUG_ON(!is_dl_boosted(&p->dl) || flags != ENQUEUE_REPLENISH); 1567 1561 return; 1568 1562 } 1569 1563 ··· 1600 1594 return; 1601 1595 } 1602 1596 1603 - enqueue_dl_entity(&p->dl, pi_se, flags); 1597 + enqueue_dl_entity(&p->dl, flags); 1604 1598 1605 1599 if (!task_current(rq, p) && p->nr_cpus_allowed > 1) 1606 1600 enqueue_pushable_dl_task(rq, p); ··· 2793 2787 dl_se->dl_bw = 0; 2794 2788 dl_se->dl_density = 0; 2795 2789 2796 - dl_se->dl_boosted = 0; 2797 2790 dl_se->dl_throttled = 0; 2798 2791 dl_se->dl_yielded = 0; 2799 2792 dl_se->dl_non_contending = 0; 2800 2793 dl_se->dl_overrun = 0; 2794 + 2795 + #ifdef CONFIG_RT_MUTEXES 2796 + dl_se->pi_se = dl_se; 2797 + #endif 2801 2798 } 2802 2799 2803 2800 bool dl_param_changed(struct task_struct *p, const struct sched_attr *attr)
+2 -1
kernel/sched/fair.c
··· 5477 5477 struct cfs_rq *cfs_rq; 5478 5478 struct sched_entity *se = &p->se; 5479 5479 int idle_h_nr_running = task_has_idle_policy(p); 5480 + int task_new = !(flags & ENQUEUE_WAKEUP); 5480 5481 5481 5482 /* 5482 5483 * The code below (indirectly) updates schedutil which looks at ··· 5550 5549 * into account, but that is not straightforward to implement, 5551 5550 * and the following generally works well enough in practice. 5552 5551 */ 5553 - if (flags & ENQUEUE_WAKEUP) 5552 + if (!task_new) 5554 5553 update_overutilized_status(rq); 5555 5554 5556 5555 enqueue_throttle: