Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux
1
fork

Configure Feed

Select the types of activity you want to include in your feed.

Merge branch 'timers-urgent-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip

Pull timer fixes from Ingo Molnar:
"Fix four timer locking races: two were noticed by Linus while
reviewing the code while chasing for a corruption bug, and two
from fixing spurious USB timeouts"

* 'timers-urgent-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip:
timers: Prevent base clock corruption when forwarding
timers: Prevent base clock rewind when forwarding clock
timers: Lock base for same bucket optimization
timers: Plug locking race vs. timer migration

+46 -32
+46 -32
kernel/time/timer.c
··· 878 878 879 879 #ifdef CONFIG_NO_HZ_COMMON 880 880 static inline struct timer_base * 881 - __get_target_base(struct timer_base *base, unsigned tflags) 881 + get_target_base(struct timer_base *base, unsigned tflags) 882 882 { 883 883 #ifdef CONFIG_SMP 884 884 if ((tflags & TIMER_PINNED) || !base->migration_enabled) ··· 891 891 892 892 static inline void forward_timer_base(struct timer_base *base) 893 893 { 894 + unsigned long jnow = READ_ONCE(jiffies); 895 + 894 896 /* 895 897 * We only forward the base when it's idle and we have a delta between 896 898 * base clock and jiffies. 897 899 */ 898 - if (!base->is_idle || (long) (jiffies - base->clk) < 2) 900 + if (!base->is_idle || (long) (jnow - base->clk) < 2) 899 901 return; 900 902 901 903 /* 902 904 * If the next expiry value is > jiffies, then we fast forward to 903 905 * jiffies otherwise we forward to the next expiry value. 904 906 */ 905 - if (time_after(base->next_expiry, jiffies)) 906 - base->clk = jiffies; 907 + if (time_after(base->next_expiry, jnow)) 908 + base->clk = jnow; 907 909 else 908 910 base->clk = base->next_expiry; 909 911 } 910 912 #else 911 913 static inline struct timer_base * 912 - __get_target_base(struct timer_base *base, unsigned tflags) 914 + get_target_base(struct timer_base *base, unsigned tflags) 913 915 { 914 916 return get_timer_this_cpu_base(tflags); 915 917 } ··· 919 917 static inline void forward_timer_base(struct timer_base *base) { } 920 918 #endif 921 919 922 - static inline struct timer_base * 923 - get_target_base(struct timer_base *base, unsigned tflags) 924 - { 925 - struct timer_base *target = __get_target_base(base, tflags); 926 - 927 - forward_timer_base(target); 928 - return target; 929 - } 930 920 931 921 /* 932 922 * We are using hashed locking: Holding per_cpu(timer_bases[x]).lock means ··· 937 943 { 938 944 for (;;) { 939 945 struct timer_base *base; 940 - u32 tf = timer->flags; 946 + u32 tf; 947 + 948 + /* 949 + * We need to use READ_ONCE() here, otherwise the compiler 950 + * might re-read @tf between the check for TIMER_MIGRATING 951 + * and spin_lock(). 952 + */ 953 + tf = READ_ONCE(timer->flags); 941 954 942 955 if (!(tf & TIMER_MIGRATING)) { 943 956 base = get_timer_base(tf); ··· 965 964 unsigned long clk = 0, flags; 966 965 int ret = 0; 967 966 967 + BUG_ON(!timer->function); 968 + 968 969 /* 969 970 * This is a common optimization triggered by the networking code - if 970 971 * the timer is re-modified to have the same timeout or ends up in the ··· 975 972 if (timer_pending(timer)) { 976 973 if (timer->expires == expires) 977 974 return 1; 978 - /* 979 - * Take the current timer_jiffies of base, but without holding 980 - * the lock! 981 - */ 982 - base = get_timer_base(timer->flags); 983 - clk = base->clk; 984 975 976 + /* 977 + * We lock timer base and calculate the bucket index right 978 + * here. If the timer ends up in the same bucket, then we 979 + * just update the expiry time and avoid the whole 980 + * dequeue/enqueue dance. 981 + */ 982 + base = lock_timer_base(timer, &flags); 983 + 984 + clk = base->clk; 985 985 idx = calc_wheel_index(expires, clk); 986 986 987 987 /* ··· 994 988 */ 995 989 if (idx == timer_get_idx(timer)) { 996 990 timer->expires = expires; 997 - return 1; 991 + ret = 1; 992 + goto out_unlock; 998 993 } 994 + } else { 995 + base = lock_timer_base(timer, &flags); 999 996 } 1000 997 1001 998 timer_stats_timer_set_start_info(timer); 1002 - BUG_ON(!timer->function); 1003 - 1004 - base = lock_timer_base(timer, &flags); 1005 999 1006 1000 ret = detach_if_pending(timer, base, false); 1007 1001 if (!ret && pending_only) ··· 1031 1025 } 1032 1026 } 1033 1027 1028 + /* Try to forward a stale timer base clock */ 1029 + forward_timer_base(base); 1030 + 1034 1031 timer->expires = expires; 1035 1032 /* 1036 1033 * If 'idx' was calculated above and the base time did not advance 1037 - * between calculating 'idx' and taking the lock, only enqueue_timer() 1038 - * and trigger_dyntick_cpu() is required. Otherwise we need to 1039 - * (re)calculate the wheel index via internal_add_timer(). 1034 + * between calculating 'idx' and possibly switching the base, only 1035 + * enqueue_timer() and trigger_dyntick_cpu() is required. Otherwise 1036 + * we need to (re)calculate the wheel index via 1037 + * internal_add_timer(). 1040 1038 */ 1041 1039 if (idx != UINT_MAX && clk == base->clk) { 1042 1040 enqueue_timer(base, timer, idx); ··· 1520 1510 is_max_delta = (nextevt == base->clk + NEXT_TIMER_MAX_DELTA); 1521 1511 base->next_expiry = nextevt; 1522 1512 /* 1523 - * We have a fresh next event. Check whether we can forward the base: 1513 + * We have a fresh next event. Check whether we can forward the 1514 + * base. We can only do that when @basej is past base->clk 1515 + * otherwise we might rewind base->clk. 1524 1516 */ 1525 - if (time_after(nextevt, jiffies)) 1526 - base->clk = jiffies; 1527 - else if (time_after(nextevt, base->clk)) 1528 - base->clk = nextevt; 1517 + if (time_after(basej, base->clk)) { 1518 + if (time_after(nextevt, basej)) 1519 + base->clk = basej; 1520 + else if (time_after(nextevt, base->clk)) 1521 + base->clk = nextevt; 1522 + } 1529 1523 1530 1524 if (time_before_eq(nextevt, basej)) { 1531 1525 expires = basem;