Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux
1
fork

Configure Feed

Select the types of activity you want to include in your feed.

Merge tag 'rcu-fixes.v7.0-20260325a' of git://git.kernel.org/pub/scm/linux/kernel/git/rcu/linux

Pull RCU fixes from Boqun Feng:
"Fix a regression introduced by commit c27cea4416a3 ("rcu: Re-implement
RCU Tasks Trace in terms of SRCU-fast"): BPF contexts can run with
preemption disabled or scheduler locks held, so call_srcu() must work
in all such contexts.

Fix this by converting SRCU's spinlocks to raw spinlocks and avoiding
scheduler lock acquisition in call_srcu() by deferring to an irq_work
(similar to call_rcu_tasks_generic()), for both tree SRCU and tiny
SRCU.

Also fix a follow-on lockdep splat caused by srcu_node allocation
under the newly introduced raw spinlock by deferring the allocation to
grace-period worker context"

* tag 'rcu-fixes.v7.0-20260325a' of git://git.kernel.org/pub/scm/linux/kernel/git/rcu/linux:
srcu: Use irq_work to start GP in tiny SRCU
rcu: Use an intermediate irq_work to start process_srcu()
srcu: Push srcu_node allocation to GP when non-preemptible
srcu: Use raw spinlocks so call_srcu() can be used under preempt_disable()

+138 -114
+4
include/linux/srcutiny.h
··· 11 11 #ifndef _LINUX_SRCU_TINY_H 12 12 #define _LINUX_SRCU_TINY_H 13 13 14 + #include <linux/irq_work_types.h> 14 15 #include <linux/swait.h> 15 16 16 17 struct srcu_struct { ··· 25 24 struct rcu_head *srcu_cb_head; /* Pending callbacks: Head. */ 26 25 struct rcu_head **srcu_cb_tail; /* Pending callbacks: Tail. */ 27 26 struct work_struct srcu_work; /* For driving grace periods. */ 27 + struct irq_work srcu_irq_work; /* Defer schedule_work() to irq work. */ 28 28 #ifdef CONFIG_DEBUG_LOCK_ALLOC 29 29 struct lockdep_map dep_map; 30 30 #endif /* #ifdef CONFIG_DEBUG_LOCK_ALLOC */ 31 31 }; 32 32 33 33 void srcu_drive_gp(struct work_struct *wp); 34 + void srcu_tiny_irq_work(struct irq_work *irq_work); 34 35 35 36 #define __SRCU_STRUCT_INIT(name, __ignored, ___ignored, ____ignored) \ 36 37 { \ 37 38 .srcu_wq = __SWAIT_QUEUE_HEAD_INITIALIZER(name.srcu_wq), \ 38 39 .srcu_cb_tail = &name.srcu_cb_head, \ 39 40 .srcu_work = __WORK_INITIALIZER(name.srcu_work, srcu_drive_gp), \ 41 + .srcu_irq_work = { .func = srcu_tiny_irq_work }, \ 40 42 __SRCU_DEP_MAP_INIT(name) \ 41 43 } 42 44
+5 -4
include/linux/srcutree.h
··· 34 34 /* Values: SRCU_READ_FLAVOR_.* */ 35 35 36 36 /* Update-side state. */ 37 - spinlock_t __private lock ____cacheline_internodealigned_in_smp; 37 + raw_spinlock_t __private lock ____cacheline_internodealigned_in_smp; 38 38 struct rcu_segcblist srcu_cblist; /* List of callbacks.*/ 39 39 unsigned long srcu_gp_seq_needed; /* Furthest future GP needed. */ 40 40 unsigned long srcu_gp_seq_needed_exp; /* Furthest future exp GP. */ ··· 55 55 * Node in SRCU combining tree, similar in function to rcu_data. 56 56 */ 57 57 struct srcu_node { 58 - spinlock_t __private lock; 58 + raw_spinlock_t __private lock; 59 59 unsigned long srcu_have_cbs[4]; /* GP seq for children having CBs, but only */ 60 60 /* if greater than ->srcu_gp_seq. */ 61 61 unsigned long srcu_data_have_cbs[4]; /* Which srcu_data structs have CBs for given GP? */ ··· 74 74 /* First node at each level. */ 75 75 int srcu_size_state; /* Small-to-big transition state. */ 76 76 struct mutex srcu_cb_mutex; /* Serialize CB preparation. */ 77 - spinlock_t __private lock; /* Protect counters and size state. */ 77 + raw_spinlock_t __private lock; /* Protect counters and size state. */ 78 78 struct mutex srcu_gp_mutex; /* Serialize GP work. */ 79 79 unsigned long srcu_gp_seq; /* Grace-period seq #. */ 80 80 unsigned long srcu_gp_seq_needed; /* Latest gp_seq needed. */ ··· 95 95 unsigned long reschedule_jiffies; 96 96 unsigned long reschedule_count; 97 97 struct delayed_work work; 98 + struct irq_work irq_work; 98 99 struct srcu_struct *srcu_ssp; 99 100 }; 100 101 ··· 157 156 158 157 #define __SRCU_USAGE_INIT(name) \ 159 158 { \ 160 - .lock = __SPIN_LOCK_UNLOCKED(name.lock), \ 159 + .lock = __RAW_SPIN_LOCK_UNLOCKED(name.lock), \ 161 160 .srcu_gp_seq = SRCU_GP_SEQ_INITIAL_VAL, \ 162 161 .srcu_gp_seq_needed = SRCU_GP_SEQ_INITIAL_VAL_WITH_STATE, \ 163 162 .srcu_gp_seq_needed_exp = SRCU_GP_SEQ_INITIAL_VAL, \
+9
kernel/rcu/rcu.h
··· 502 502 ___locked; \ 503 503 }) 504 504 505 + #define raw_spin_trylock_irqsave_rcu_node(p, flags) \ 506 + ({ \ 507 + bool ___locked = raw_spin_trylock_irqsave(&ACCESS_PRIVATE(p, lock), flags); \ 508 + \ 509 + if (___locked) \ 510 + smp_mb__after_unlock_lock(); \ 511 + ___locked; \ 512 + }) 513 + 505 514 #define raw_lockdep_assert_held_rcu_node(p) \ 506 515 lockdep_assert_held(&ACCESS_PRIVATE(p, lock)) 507 516
+18 -1
kernel/rcu/srcutiny.c
··· 9 9 */ 10 10 11 11 #include <linux/export.h> 12 + #include <linux/irq_work.h> 12 13 #include <linux/mutex.h> 13 14 #include <linux/preempt.h> 14 15 #include <linux/rcupdate_wait.h> ··· 42 41 ssp->srcu_idx_max = 0; 43 42 INIT_WORK(&ssp->srcu_work, srcu_drive_gp); 44 43 INIT_LIST_HEAD(&ssp->srcu_work.entry); 44 + init_irq_work(&ssp->srcu_irq_work, srcu_tiny_irq_work); 45 45 return 0; 46 46 } 47 47 ··· 86 84 void cleanup_srcu_struct(struct srcu_struct *ssp) 87 85 { 88 86 WARN_ON(ssp->srcu_lock_nesting[0] || ssp->srcu_lock_nesting[1]); 87 + irq_work_sync(&ssp->srcu_irq_work); 89 88 flush_work(&ssp->srcu_work); 90 89 WARN_ON(ssp->srcu_gp_running); 91 90 WARN_ON(ssp->srcu_gp_waiting); ··· 180 177 } 181 178 EXPORT_SYMBOL_GPL(srcu_drive_gp); 182 179 180 + /* 181 + * Use an irq_work to defer schedule_work() to avoid acquiring the workqueue 182 + * pool->lock while the caller might hold scheduler locks, causing lockdep 183 + * splats due to workqueue_init() doing a wakeup. 184 + */ 185 + void srcu_tiny_irq_work(struct irq_work *irq_work) 186 + { 187 + struct srcu_struct *ssp; 188 + 189 + ssp = container_of(irq_work, struct srcu_struct, srcu_irq_work); 190 + schedule_work(&ssp->srcu_work); 191 + } 192 + EXPORT_SYMBOL_GPL(srcu_tiny_irq_work); 193 + 183 194 static void srcu_gp_start_if_needed(struct srcu_struct *ssp) 184 195 { 185 196 unsigned long cookie; ··· 206 189 WRITE_ONCE(ssp->srcu_idx_max, cookie); 207 190 if (!READ_ONCE(ssp->srcu_gp_running)) { 208 191 if (likely(srcu_init_done)) 209 - schedule_work(&ssp->srcu_work); 192 + irq_work_queue(&ssp->srcu_irq_work); 210 193 else if (list_empty(&ssp->srcu_work.entry)) 211 194 list_add(&ssp->srcu_work.entry, &srcu_boot_list); 212 195 }
+102 -109
kernel/rcu/srcutree.c
··· 19 19 #include <linux/mutex.h> 20 20 #include <linux/percpu.h> 21 21 #include <linux/preempt.h> 22 + #include <linux/irq_work.h> 22 23 #include <linux/rcupdate_wait.h> 23 24 #include <linux/sched.h> 24 25 #include <linux/smp.h> ··· 76 75 static void srcu_invoke_callbacks(struct work_struct *work); 77 76 static void srcu_reschedule(struct srcu_struct *ssp, unsigned long delay); 78 77 static void process_srcu(struct work_struct *work); 78 + static void srcu_irq_work(struct irq_work *work); 79 79 static void srcu_delay_timer(struct timer_list *t); 80 - 81 - /* Wrappers for lock acquisition and release, see raw_spin_lock_rcu_node(). */ 82 - #define spin_lock_rcu_node(p) \ 83 - do { \ 84 - spin_lock(&ACCESS_PRIVATE(p, lock)); \ 85 - smp_mb__after_unlock_lock(); \ 86 - } while (0) 87 - 88 - #define spin_unlock_rcu_node(p) spin_unlock(&ACCESS_PRIVATE(p, lock)) 89 - 90 - #define spin_lock_irq_rcu_node(p) \ 91 - do { \ 92 - spin_lock_irq(&ACCESS_PRIVATE(p, lock)); \ 93 - smp_mb__after_unlock_lock(); \ 94 - } while (0) 95 - 96 - #define spin_unlock_irq_rcu_node(p) \ 97 - spin_unlock_irq(&ACCESS_PRIVATE(p, lock)) 98 - 99 - #define spin_lock_irqsave_rcu_node(p, flags) \ 100 - do { \ 101 - spin_lock_irqsave(&ACCESS_PRIVATE(p, lock), flags); \ 102 - smp_mb__after_unlock_lock(); \ 103 - } while (0) 104 - 105 - #define spin_trylock_irqsave_rcu_node(p, flags) \ 106 - ({ \ 107 - bool ___locked = spin_trylock_irqsave(&ACCESS_PRIVATE(p, lock), flags); \ 108 - \ 109 - if (___locked) \ 110 - smp_mb__after_unlock_lock(); \ 111 - ___locked; \ 112 - }) 113 - 114 - #define spin_unlock_irqrestore_rcu_node(p, flags) \ 115 - spin_unlock_irqrestore(&ACCESS_PRIVATE(p, lock), flags) \ 116 80 117 81 /* 118 82 * Initialize SRCU per-CPU data. Note that statically allocated ··· 97 131 */ 98 132 for_each_possible_cpu(cpu) { 99 133 sdp = per_cpu_ptr(ssp->sda, cpu); 100 - spin_lock_init(&ACCESS_PRIVATE(sdp, lock)); 134 + raw_spin_lock_init(&ACCESS_PRIVATE(sdp, lock)); 101 135 rcu_segcblist_init(&sdp->srcu_cblist); 102 136 sdp->srcu_cblist_invoking = false; 103 137 sdp->srcu_gp_seq_needed = ssp->srcu_sup->srcu_gp_seq; ··· 152 186 153 187 /* Each pass through this loop initializes one srcu_node structure. */ 154 188 srcu_for_each_node_breadth_first(ssp, snp) { 155 - spin_lock_init(&ACCESS_PRIVATE(snp, lock)); 189 + raw_spin_lock_init(&ACCESS_PRIVATE(snp, lock)); 156 190 BUILD_BUG_ON(ARRAY_SIZE(snp->srcu_have_cbs) != 157 191 ARRAY_SIZE(snp->srcu_data_have_cbs)); 158 192 for (i = 0; i < ARRAY_SIZE(snp->srcu_have_cbs); i++) { ··· 208 242 if (!ssp->srcu_sup) 209 243 return -ENOMEM; 210 244 if (!is_static) 211 - spin_lock_init(&ACCESS_PRIVATE(ssp->srcu_sup, lock)); 245 + raw_spin_lock_init(&ACCESS_PRIVATE(ssp->srcu_sup, lock)); 212 246 ssp->srcu_sup->srcu_size_state = SRCU_SIZE_SMALL; 213 247 ssp->srcu_sup->node = NULL; 214 248 mutex_init(&ssp->srcu_sup->srcu_cb_mutex); ··· 218 252 mutex_init(&ssp->srcu_sup->srcu_barrier_mutex); 219 253 atomic_set(&ssp->srcu_sup->srcu_barrier_cpu_cnt, 0); 220 254 INIT_DELAYED_WORK(&ssp->srcu_sup->work, process_srcu); 255 + init_irq_work(&ssp->srcu_sup->irq_work, srcu_irq_work); 221 256 ssp->srcu_sup->sda_is_static = is_static; 222 257 if (!is_static) { 223 258 ssp->sda = alloc_percpu(struct srcu_data); ··· 230 263 ssp->srcu_sup->srcu_gp_seq_needed_exp = SRCU_GP_SEQ_INITIAL_VAL; 231 264 ssp->srcu_sup->srcu_last_gp_end = ktime_get_mono_fast_ns(); 232 265 if (READ_ONCE(ssp->srcu_sup->srcu_size_state) == SRCU_SIZE_SMALL && SRCU_SIZING_IS_INIT()) { 233 - if (!init_srcu_struct_nodes(ssp, is_static ? GFP_ATOMIC : GFP_KERNEL)) 266 + if (!preemptible()) 267 + WRITE_ONCE(ssp->srcu_sup->srcu_size_state, SRCU_SIZE_ALLOC); 268 + else if (init_srcu_struct_nodes(ssp, GFP_KERNEL)) 269 + WRITE_ONCE(ssp->srcu_sup->srcu_size_state, SRCU_SIZE_BIG); 270 + else 234 271 goto err_free_sda; 235 - WRITE_ONCE(ssp->srcu_sup->srcu_size_state, SRCU_SIZE_BIG); 236 272 } 237 273 ssp->srcu_sup->srcu_ssp = ssp; 238 274 smp_store_release(&ssp->srcu_sup->srcu_gp_seq_needed, ··· 364 394 /* Double-checked locking on ->srcu_size-state. */ 365 395 if (smp_load_acquire(&ssp->srcu_sup->srcu_size_state) != SRCU_SIZE_SMALL) 366 396 return; 367 - spin_lock_irqsave_rcu_node(ssp->srcu_sup, flags); 397 + raw_spin_lock_irqsave_rcu_node(ssp->srcu_sup, flags); 368 398 if (smp_load_acquire(&ssp->srcu_sup->srcu_size_state) != SRCU_SIZE_SMALL) { 369 - spin_unlock_irqrestore_rcu_node(ssp->srcu_sup, flags); 399 + raw_spin_unlock_irqrestore_rcu_node(ssp->srcu_sup, flags); 370 400 return; 371 401 } 372 402 __srcu_transition_to_big(ssp); 373 - spin_unlock_irqrestore_rcu_node(ssp->srcu_sup, flags); 403 + raw_spin_unlock_irqrestore_rcu_node(ssp->srcu_sup, flags); 374 404 } 375 405 376 406 /* 377 407 * Check to see if the just-encountered contention event justifies 378 408 * a transition to SRCU_SIZE_BIG. 379 409 */ 380 - static void spin_lock_irqsave_check_contention(struct srcu_struct *ssp) 410 + static void raw_spin_lock_irqsave_check_contention(struct srcu_struct *ssp) 381 411 { 382 412 unsigned long j; 383 413 ··· 399 429 * to SRCU_SIZE_BIG. But only if the srcutree.convert_to_big module 400 430 * parameter permits this. 401 431 */ 402 - static void spin_lock_irqsave_sdp_contention(struct srcu_data *sdp, unsigned long *flags) 432 + static void raw_spin_lock_irqsave_sdp_contention(struct srcu_data *sdp, unsigned long *flags) 403 433 { 404 434 struct srcu_struct *ssp = sdp->ssp; 405 435 406 - if (spin_trylock_irqsave_rcu_node(sdp, *flags)) 436 + if (raw_spin_trylock_irqsave_rcu_node(sdp, *flags)) 407 437 return; 408 - spin_lock_irqsave_rcu_node(ssp->srcu_sup, *flags); 409 - spin_lock_irqsave_check_contention(ssp); 410 - spin_unlock_irqrestore_rcu_node(ssp->srcu_sup, *flags); 411 - spin_lock_irqsave_rcu_node(sdp, *flags); 438 + raw_spin_lock_irqsave_rcu_node(ssp->srcu_sup, *flags); 439 + raw_spin_lock_irqsave_check_contention(ssp); 440 + raw_spin_unlock_irqrestore_rcu_node(ssp->srcu_sup, *flags); 441 + raw_spin_lock_irqsave_rcu_node(sdp, *flags); 412 442 } 413 443 414 444 /* ··· 417 447 * to SRCU_SIZE_BIG. But only if the srcutree.convert_to_big module 418 448 * parameter permits this. 419 449 */ 420 - static void spin_lock_irqsave_ssp_contention(struct srcu_struct *ssp, unsigned long *flags) 450 + static void raw_spin_lock_irqsave_ssp_contention(struct srcu_struct *ssp, unsigned long *flags) 421 451 { 422 - if (spin_trylock_irqsave_rcu_node(ssp->srcu_sup, *flags)) 452 + if (raw_spin_trylock_irqsave_rcu_node(ssp->srcu_sup, *flags)) 423 453 return; 424 - spin_lock_irqsave_rcu_node(ssp->srcu_sup, *flags); 425 - spin_lock_irqsave_check_contention(ssp); 454 + raw_spin_lock_irqsave_rcu_node(ssp->srcu_sup, *flags); 455 + raw_spin_lock_irqsave_check_contention(ssp); 426 456 } 427 457 428 458 /* ··· 440 470 /* The smp_load_acquire() pairs with the smp_store_release(). */ 441 471 if (!rcu_seq_state(smp_load_acquire(&ssp->srcu_sup->srcu_gp_seq_needed))) /*^^^*/ 442 472 return; /* Already initialized. */ 443 - spin_lock_irqsave_rcu_node(ssp->srcu_sup, flags); 473 + raw_spin_lock_irqsave_rcu_node(ssp->srcu_sup, flags); 444 474 if (!rcu_seq_state(ssp->srcu_sup->srcu_gp_seq_needed)) { 445 - spin_unlock_irqrestore_rcu_node(ssp->srcu_sup, flags); 475 + raw_spin_unlock_irqrestore_rcu_node(ssp->srcu_sup, flags); 446 476 return; 447 477 } 448 478 init_srcu_struct_fields(ssp, true); 449 - spin_unlock_irqrestore_rcu_node(ssp->srcu_sup, flags); 479 + raw_spin_unlock_irqrestore_rcu_node(ssp->srcu_sup, flags); 450 480 } 451 481 452 482 /* ··· 712 742 unsigned long delay; 713 743 struct srcu_usage *sup = ssp->srcu_sup; 714 744 715 - spin_lock_irq_rcu_node(ssp->srcu_sup); 745 + raw_spin_lock_irq_rcu_node(ssp->srcu_sup); 716 746 delay = srcu_get_delay(ssp); 717 - spin_unlock_irq_rcu_node(ssp->srcu_sup); 747 + raw_spin_unlock_irq_rcu_node(ssp->srcu_sup); 718 748 if (WARN_ON(!delay)) 719 749 return; /* Just leak it! */ 720 750 if (WARN_ON(srcu_readers_active(ssp))) 721 751 return; /* Just leak it! */ 752 + /* Wait for irq_work to finish first as it may queue a new work. */ 753 + irq_work_sync(&sup->irq_work); 722 754 flush_delayed_work(&sup->work); 723 755 for_each_possible_cpu(cpu) { 724 756 struct srcu_data *sdp = per_cpu_ptr(ssp->sda, cpu); ··· 932 960 mutex_lock(&sup->srcu_cb_mutex); 933 961 934 962 /* End the current grace period. */ 935 - spin_lock_irq_rcu_node(sup); 963 + raw_spin_lock_irq_rcu_node(sup); 936 964 idx = rcu_seq_state(sup->srcu_gp_seq); 937 965 WARN_ON_ONCE(idx != SRCU_STATE_SCAN2); 938 966 if (srcu_gp_is_expedited(ssp)) ··· 943 971 gpseq = rcu_seq_current(&sup->srcu_gp_seq); 944 972 if (ULONG_CMP_LT(sup->srcu_gp_seq_needed_exp, gpseq)) 945 973 WRITE_ONCE(sup->srcu_gp_seq_needed_exp, gpseq); 946 - spin_unlock_irq_rcu_node(sup); 974 + raw_spin_unlock_irq_rcu_node(sup); 947 975 mutex_unlock(&sup->srcu_gp_mutex); 948 976 /* A new grace period can start at this point. But only one. */ 949 977 ··· 955 983 } else { 956 984 idx = rcu_seq_ctr(gpseq) % ARRAY_SIZE(snp->srcu_have_cbs); 957 985 srcu_for_each_node_breadth_first(ssp, snp) { 958 - spin_lock_irq_rcu_node(snp); 986 + raw_spin_lock_irq_rcu_node(snp); 959 987 cbs = false; 960 988 last_lvl = snp >= sup->level[rcu_num_lvls - 1]; 961 989 if (last_lvl) ··· 970 998 else 971 999 mask = snp->srcu_data_have_cbs[idx]; 972 1000 snp->srcu_data_have_cbs[idx] = 0; 973 - spin_unlock_irq_rcu_node(snp); 1001 + raw_spin_unlock_irq_rcu_node(snp); 974 1002 if (cbs) 975 1003 srcu_schedule_cbs_snp(ssp, snp, mask, cbdelay); 976 1004 } ··· 980 1008 if (!(gpseq & counter_wrap_check)) 981 1009 for_each_possible_cpu(cpu) { 982 1010 sdp = per_cpu_ptr(ssp->sda, cpu); 983 - spin_lock_irq_rcu_node(sdp); 1011 + raw_spin_lock_irq_rcu_node(sdp); 984 1012 if (ULONG_CMP_GE(gpseq, sdp->srcu_gp_seq_needed + 100)) 985 1013 sdp->srcu_gp_seq_needed = gpseq; 986 1014 if (ULONG_CMP_GE(gpseq, sdp->srcu_gp_seq_needed_exp + 100)) 987 1015 sdp->srcu_gp_seq_needed_exp = gpseq; 988 - spin_unlock_irq_rcu_node(sdp); 1016 + raw_spin_unlock_irq_rcu_node(sdp); 989 1017 } 990 1018 991 1019 /* Callback initiation done, allow grace periods after next. */ 992 1020 mutex_unlock(&sup->srcu_cb_mutex); 993 1021 994 1022 /* Start a new grace period if needed. */ 995 - spin_lock_irq_rcu_node(sup); 1023 + raw_spin_lock_irq_rcu_node(sup); 996 1024 gpseq = rcu_seq_current(&sup->srcu_gp_seq); 997 1025 if (!rcu_seq_state(gpseq) && 998 1026 ULONG_CMP_LT(gpseq, sup->srcu_gp_seq_needed)) { 999 1027 srcu_gp_start(ssp); 1000 - spin_unlock_irq_rcu_node(sup); 1028 + raw_spin_unlock_irq_rcu_node(sup); 1001 1029 srcu_reschedule(ssp, 0); 1002 1030 } else { 1003 - spin_unlock_irq_rcu_node(sup); 1031 + raw_spin_unlock_irq_rcu_node(sup); 1004 1032 } 1005 1033 1006 1034 /* Transition to big if needed. */ ··· 1031 1059 if (WARN_ON_ONCE(rcu_seq_done(&ssp->srcu_sup->srcu_gp_seq, s)) || 1032 1060 (!srcu_invl_snp_seq(sgsne) && ULONG_CMP_GE(sgsne, s))) 1033 1061 return; 1034 - spin_lock_irqsave_rcu_node(snp, flags); 1062 + raw_spin_lock_irqsave_rcu_node(snp, flags); 1035 1063 sgsne = snp->srcu_gp_seq_needed_exp; 1036 1064 if (!srcu_invl_snp_seq(sgsne) && ULONG_CMP_GE(sgsne, s)) { 1037 - spin_unlock_irqrestore_rcu_node(snp, flags); 1065 + raw_spin_unlock_irqrestore_rcu_node(snp, flags); 1038 1066 return; 1039 1067 } 1040 1068 WRITE_ONCE(snp->srcu_gp_seq_needed_exp, s); 1041 - spin_unlock_irqrestore_rcu_node(snp, flags); 1069 + raw_spin_unlock_irqrestore_rcu_node(snp, flags); 1042 1070 } 1043 - spin_lock_irqsave_ssp_contention(ssp, &flags); 1071 + raw_spin_lock_irqsave_ssp_contention(ssp, &flags); 1044 1072 if (ULONG_CMP_LT(ssp->srcu_sup->srcu_gp_seq_needed_exp, s)) 1045 1073 WRITE_ONCE(ssp->srcu_sup->srcu_gp_seq_needed_exp, s); 1046 - spin_unlock_irqrestore_rcu_node(ssp->srcu_sup, flags); 1074 + raw_spin_unlock_irqrestore_rcu_node(ssp->srcu_sup, flags); 1047 1075 } 1048 1076 1049 1077 /* ··· 1081 1109 for (snp = snp_leaf; snp != NULL; snp = snp->srcu_parent) { 1082 1110 if (WARN_ON_ONCE(rcu_seq_done(&sup->srcu_gp_seq, s)) && snp != snp_leaf) 1083 1111 return; /* GP already done and CBs recorded. */ 1084 - spin_lock_irqsave_rcu_node(snp, flags); 1112 + raw_spin_lock_irqsave_rcu_node(snp, flags); 1085 1113 snp_seq = snp->srcu_have_cbs[idx]; 1086 1114 if (!srcu_invl_snp_seq(snp_seq) && ULONG_CMP_GE(snp_seq, s)) { 1087 1115 if (snp == snp_leaf && snp_seq == s) 1088 1116 snp->srcu_data_have_cbs[idx] |= sdp->grpmask; 1089 - spin_unlock_irqrestore_rcu_node(snp, flags); 1117 + raw_spin_unlock_irqrestore_rcu_node(snp, flags); 1090 1118 if (snp == snp_leaf && snp_seq != s) { 1091 1119 srcu_schedule_cbs_sdp(sdp, do_norm ? SRCU_INTERVAL : 0); 1092 1120 return; ··· 1101 1129 sgsne = snp->srcu_gp_seq_needed_exp; 1102 1130 if (!do_norm && (srcu_invl_snp_seq(sgsne) || ULONG_CMP_LT(sgsne, s))) 1103 1131 WRITE_ONCE(snp->srcu_gp_seq_needed_exp, s); 1104 - spin_unlock_irqrestore_rcu_node(snp, flags); 1132 + raw_spin_unlock_irqrestore_rcu_node(snp, flags); 1105 1133 } 1106 1134 1107 1135 /* Top of tree, must ensure the grace period will be started. */ 1108 - spin_lock_irqsave_ssp_contention(ssp, &flags); 1136 + raw_spin_lock_irqsave_ssp_contention(ssp, &flags); 1109 1137 if (ULONG_CMP_LT(sup->srcu_gp_seq_needed, s)) { 1110 1138 /* 1111 1139 * Record need for grace period s. Pair with load ··· 1126 1154 // it isn't. And it does not have to be. After all, it 1127 1155 // can only be executed during early boot when there is only 1128 1156 // the one boot CPU running with interrupts still disabled. 1157 + // 1158 + // Use an irq_work here to avoid acquiring runqueue lock with 1159 + // srcu rcu_node::lock held. BPF instrument could introduce the 1160 + // opposite dependency, hence we need to break the possible 1161 + // locking dependency here. 1129 1162 if (likely(srcu_init_done)) 1130 - queue_delayed_work(rcu_gp_wq, &sup->work, 1131 - !!srcu_get_delay(ssp)); 1163 + irq_work_queue(&sup->irq_work); 1132 1164 else if (list_empty(&sup->work.work.entry)) 1133 1165 list_add(&sup->work.work.entry, &srcu_boot_list); 1134 1166 } 1135 - spin_unlock_irqrestore_rcu_node(sup, flags); 1167 + raw_spin_unlock_irqrestore_rcu_node(sup, flags); 1136 1168 } 1137 1169 1138 1170 /* ··· 1148 1172 { 1149 1173 unsigned long curdelay; 1150 1174 1151 - spin_lock_irq_rcu_node(ssp->srcu_sup); 1175 + raw_spin_lock_irq_rcu_node(ssp->srcu_sup); 1152 1176 curdelay = !srcu_get_delay(ssp); 1153 - spin_unlock_irq_rcu_node(ssp->srcu_sup); 1177 + raw_spin_unlock_irq_rcu_node(ssp->srcu_sup); 1154 1178 1155 1179 for (;;) { 1156 1180 if (srcu_readers_active_idx_check(ssp, idx)) ··· 1261 1285 return false; 1262 1286 /* If the local srcu_data structure has callbacks, not idle. */ 1263 1287 sdp = raw_cpu_ptr(ssp->sda); 1264 - spin_lock_irqsave_rcu_node(sdp, flags); 1288 + raw_spin_lock_irqsave_rcu_node(sdp, flags); 1265 1289 if (rcu_segcblist_pend_cbs(&sdp->srcu_cblist)) { 1266 - spin_unlock_irqrestore_rcu_node(sdp, flags); 1290 + raw_spin_unlock_irqrestore_rcu_node(sdp, flags); 1267 1291 return false; /* Callbacks already present, so not idle. */ 1268 1292 } 1269 - spin_unlock_irqrestore_rcu_node(sdp, flags); 1293 + raw_spin_unlock_irqrestore_rcu_node(sdp, flags); 1270 1294 1271 1295 /* 1272 1296 * No local callbacks, so probabilistically probe global state. ··· 1326 1350 sdp = per_cpu_ptr(ssp->sda, get_boot_cpu_id()); 1327 1351 else 1328 1352 sdp = raw_cpu_ptr(ssp->sda); 1329 - spin_lock_irqsave_sdp_contention(sdp, &flags); 1353 + raw_spin_lock_irqsave_sdp_contention(sdp, &flags); 1330 1354 if (rhp) 1331 1355 rcu_segcblist_enqueue(&sdp->srcu_cblist, rhp); 1332 1356 /* ··· 1386 1410 sdp->srcu_gp_seq_needed_exp = s; 1387 1411 needexp = true; 1388 1412 } 1389 - spin_unlock_irqrestore_rcu_node(sdp, flags); 1413 + raw_spin_unlock_irqrestore_rcu_node(sdp, flags); 1390 1414 1391 1415 /* Ensure that snp node tree is fully initialized before traversing it */ 1392 1416 if (ss_state < SRCU_SIZE_WAIT_BARRIER) ··· 1498 1522 1499 1523 /* 1500 1524 * Make sure that later code is ordered after the SRCU grace 1501 - * period. This pairs with the spin_lock_irq_rcu_node() 1525 + * period. This pairs with the raw_spin_lock_irq_rcu_node() 1502 1526 * in srcu_invoke_callbacks(). Unlike Tree RCU, this is needed 1503 1527 * because the current CPU might have been totally uninvolved with 1504 1528 * (and thus unordered against) that grace period. ··· 1677 1701 */ 1678 1702 static void srcu_barrier_one_cpu(struct srcu_struct *ssp, struct srcu_data *sdp) 1679 1703 { 1680 - spin_lock_irq_rcu_node(sdp); 1704 + raw_spin_lock_irq_rcu_node(sdp); 1681 1705 atomic_inc(&ssp->srcu_sup->srcu_barrier_cpu_cnt); 1682 1706 sdp->srcu_barrier_head.func = srcu_barrier_cb; 1683 1707 debug_rcu_head_queue(&sdp->srcu_barrier_head); ··· 1686 1710 debug_rcu_head_unqueue(&sdp->srcu_barrier_head); 1687 1711 atomic_dec(&ssp->srcu_sup->srcu_barrier_cpu_cnt); 1688 1712 } 1689 - spin_unlock_irq_rcu_node(sdp); 1713 + raw_spin_unlock_irq_rcu_node(sdp); 1690 1714 } 1691 1715 1692 1716 /** ··· 1737 1761 bool needcb = false; 1738 1762 struct srcu_data *sdp = container_of(rhp, struct srcu_data, srcu_ec_head); 1739 1763 1740 - spin_lock_irqsave_sdp_contention(sdp, &flags); 1764 + raw_spin_lock_irqsave_sdp_contention(sdp, &flags); 1741 1765 if (sdp->srcu_ec_state == SRCU_EC_IDLE) { 1742 1766 WARN_ON_ONCE(1); 1743 1767 } else if (sdp->srcu_ec_state == SRCU_EC_PENDING) { ··· 1747 1771 sdp->srcu_ec_state = SRCU_EC_PENDING; 1748 1772 needcb = true; 1749 1773 } 1750 - spin_unlock_irqrestore_rcu_node(sdp, flags); 1774 + raw_spin_unlock_irqrestore_rcu_node(sdp, flags); 1751 1775 // If needed, requeue ourselves as an expedited SRCU callback. 1752 1776 if (needcb) 1753 1777 __call_srcu(sdp->ssp, &sdp->srcu_ec_head, srcu_expedite_current_cb, false); ··· 1771 1795 1772 1796 migrate_disable(); 1773 1797 sdp = this_cpu_ptr(ssp->sda); 1774 - spin_lock_irqsave_sdp_contention(sdp, &flags); 1798 + raw_spin_lock_irqsave_sdp_contention(sdp, &flags); 1775 1799 if (sdp->srcu_ec_state == SRCU_EC_IDLE) { 1776 1800 sdp->srcu_ec_state = SRCU_EC_PENDING; 1777 1801 needcb = true; ··· 1780 1804 } else { 1781 1805 WARN_ON_ONCE(sdp->srcu_ec_state != SRCU_EC_REPOST); 1782 1806 } 1783 - spin_unlock_irqrestore_rcu_node(sdp, flags); 1807 + raw_spin_unlock_irqrestore_rcu_node(sdp, flags); 1784 1808 // If needed, queue an expedited SRCU callback. 1785 1809 if (needcb) 1786 1810 __call_srcu(ssp, &sdp->srcu_ec_head, srcu_expedite_current_cb, false); ··· 1824 1848 */ 1825 1849 idx = rcu_seq_state(smp_load_acquire(&ssp->srcu_sup->srcu_gp_seq)); /* ^^^ */ 1826 1850 if (idx == SRCU_STATE_IDLE) { 1827 - spin_lock_irq_rcu_node(ssp->srcu_sup); 1851 + raw_spin_lock_irq_rcu_node(ssp->srcu_sup); 1828 1852 if (ULONG_CMP_GE(ssp->srcu_sup->srcu_gp_seq, ssp->srcu_sup->srcu_gp_seq_needed)) { 1829 1853 WARN_ON_ONCE(rcu_seq_state(ssp->srcu_sup->srcu_gp_seq)); 1830 - spin_unlock_irq_rcu_node(ssp->srcu_sup); 1854 + raw_spin_unlock_irq_rcu_node(ssp->srcu_sup); 1831 1855 mutex_unlock(&ssp->srcu_sup->srcu_gp_mutex); 1832 1856 return; 1833 1857 } 1834 1858 idx = rcu_seq_state(READ_ONCE(ssp->srcu_sup->srcu_gp_seq)); 1835 1859 if (idx == SRCU_STATE_IDLE) 1836 1860 srcu_gp_start(ssp); 1837 - spin_unlock_irq_rcu_node(ssp->srcu_sup); 1861 + raw_spin_unlock_irq_rcu_node(ssp->srcu_sup); 1838 1862 if (idx != SRCU_STATE_IDLE) { 1839 1863 mutex_unlock(&ssp->srcu_sup->srcu_gp_mutex); 1840 1864 return; /* Someone else started the grace period. */ ··· 1848 1872 return; /* readers present, retry later. */ 1849 1873 } 1850 1874 srcu_flip(ssp); 1851 - spin_lock_irq_rcu_node(ssp->srcu_sup); 1875 + raw_spin_lock_irq_rcu_node(ssp->srcu_sup); 1852 1876 rcu_seq_set_state(&ssp->srcu_sup->srcu_gp_seq, SRCU_STATE_SCAN2); 1853 1877 ssp->srcu_sup->srcu_n_exp_nodelay = 0; 1854 - spin_unlock_irq_rcu_node(ssp->srcu_sup); 1878 + raw_spin_unlock_irq_rcu_node(ssp->srcu_sup); 1855 1879 } 1856 1880 1857 1881 if (rcu_seq_state(READ_ONCE(ssp->srcu_sup->srcu_gp_seq)) == SRCU_STATE_SCAN2) { ··· 1889 1913 1890 1914 ssp = sdp->ssp; 1891 1915 rcu_cblist_init(&ready_cbs); 1892 - spin_lock_irq_rcu_node(sdp); 1916 + raw_spin_lock_irq_rcu_node(sdp); 1893 1917 WARN_ON_ONCE(!rcu_segcblist_segempty(&sdp->srcu_cblist, RCU_NEXT_TAIL)); 1894 1918 rcu_segcblist_advance(&sdp->srcu_cblist, 1895 1919 rcu_seq_current(&ssp->srcu_sup->srcu_gp_seq)); ··· 1900 1924 */ 1901 1925 if (sdp->srcu_cblist_invoking || 1902 1926 !rcu_segcblist_ready_cbs(&sdp->srcu_cblist)) { 1903 - spin_unlock_irq_rcu_node(sdp); 1927 + raw_spin_unlock_irq_rcu_node(sdp); 1904 1928 return; /* Someone else on the job or nothing to do. */ 1905 1929 } 1906 1930 ··· 1908 1932 sdp->srcu_cblist_invoking = true; 1909 1933 rcu_segcblist_extract_done_cbs(&sdp->srcu_cblist, &ready_cbs); 1910 1934 len = ready_cbs.len; 1911 - spin_unlock_irq_rcu_node(sdp); 1935 + raw_spin_unlock_irq_rcu_node(sdp); 1912 1936 rhp = rcu_cblist_dequeue(&ready_cbs); 1913 1937 for (; rhp != NULL; rhp = rcu_cblist_dequeue(&ready_cbs)) { 1914 1938 debug_rcu_head_unqueue(rhp); ··· 1923 1947 * Update counts, accelerate new callbacks, and if needed, 1924 1948 * schedule another round of callback invocation. 1925 1949 */ 1926 - spin_lock_irq_rcu_node(sdp); 1950 + raw_spin_lock_irq_rcu_node(sdp); 1927 1951 rcu_segcblist_add_len(&sdp->srcu_cblist, -len); 1928 1952 sdp->srcu_cblist_invoking = false; 1929 1953 more = rcu_segcblist_ready_cbs(&sdp->srcu_cblist); 1930 - spin_unlock_irq_rcu_node(sdp); 1954 + raw_spin_unlock_irq_rcu_node(sdp); 1931 1955 /* An SRCU barrier or callbacks from previous nesting work pending */ 1932 1956 if (more) 1933 1957 srcu_schedule_cbs_sdp(sdp, 0); ··· 1941 1965 { 1942 1966 bool pushgp = true; 1943 1967 1944 - spin_lock_irq_rcu_node(ssp->srcu_sup); 1968 + raw_spin_lock_irq_rcu_node(ssp->srcu_sup); 1945 1969 if (ULONG_CMP_GE(ssp->srcu_sup->srcu_gp_seq, ssp->srcu_sup->srcu_gp_seq_needed)) { 1946 1970 if (!WARN_ON_ONCE(rcu_seq_state(ssp->srcu_sup->srcu_gp_seq))) { 1947 1971 /* All requests fulfilled, time to go idle. */ ··· 1951 1975 /* Outstanding request and no GP. Start one. */ 1952 1976 srcu_gp_start(ssp); 1953 1977 } 1954 - spin_unlock_irq_rcu_node(ssp->srcu_sup); 1978 + raw_spin_unlock_irq_rcu_node(ssp->srcu_sup); 1955 1979 1956 1980 if (pushgp) 1957 1981 queue_delayed_work(rcu_gp_wq, &ssp->srcu_sup->work, delay); ··· 1971 1995 ssp = sup->srcu_ssp; 1972 1996 1973 1997 srcu_advance_state(ssp); 1974 - spin_lock_irq_rcu_node(ssp->srcu_sup); 1998 + raw_spin_lock_irq_rcu_node(ssp->srcu_sup); 1975 1999 curdelay = srcu_get_delay(ssp); 1976 - spin_unlock_irq_rcu_node(ssp->srcu_sup); 2000 + raw_spin_unlock_irq_rcu_node(ssp->srcu_sup); 1977 2001 if (curdelay) { 1978 2002 WRITE_ONCE(sup->reschedule_count, 0); 1979 2003 } else { ··· 1989 2013 } 1990 2014 } 1991 2015 srcu_reschedule(ssp, curdelay); 2016 + } 2017 + 2018 + static void srcu_irq_work(struct irq_work *work) 2019 + { 2020 + struct srcu_struct *ssp; 2021 + struct srcu_usage *sup; 2022 + unsigned long delay; 2023 + unsigned long flags; 2024 + 2025 + sup = container_of(work, struct srcu_usage, irq_work); 2026 + ssp = sup->srcu_ssp; 2027 + 2028 + raw_spin_lock_irqsave_rcu_node(ssp->srcu_sup, flags); 2029 + delay = srcu_get_delay(ssp); 2030 + raw_spin_unlock_irqrestore_rcu_node(ssp->srcu_sup, flags); 2031 + 2032 + queue_delayed_work(rcu_gp_wq, &sup->work, !!delay); 1992 2033 } 1993 2034 1994 2035 void srcutorture_get_gp_data(struct srcu_struct *ssp, int *flags,