Linux kernel mirror (for testing)
git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel
os
linux
1/* SPDX-License-Identifier: GPL-2.0+ */
2/*
3 * Sleepable Read-Copy Update mechanism for mutual exclusion,
4 * tree variant.
5 *
6 * Copyright (C) IBM Corporation, 2017
7 *
8 * Author: Paul McKenney <paulmck@linux.ibm.com>
9 */
10
11#ifndef _LINUX_SRCU_TREE_H
12#define _LINUX_SRCU_TREE_H
13
14#include <linux/rcu_node_tree.h>
15#include <linux/completion.h>
16
17struct srcu_node;
18struct srcu_struct;
19
20/* One element of the srcu_data srcu_ctrs array. */
21struct srcu_ctr {
22 atomic_long_t srcu_locks; /* Locks per CPU. */
23 atomic_long_t srcu_unlocks; /* Unlocks per CPU. */
24};
25
26/*
27 * Per-CPU structure feeding into leaf srcu_node, similar in function
28 * to rcu_node.
29 */
30struct srcu_data {
31 /* Read-side state. */
32 struct srcu_ctr srcu_ctrs[2]; /* Locks and unlocks per CPU. */
33 int srcu_reader_flavor; /* Reader flavor for srcu_struct structure? */
34 /* Values: SRCU_READ_FLAVOR_.* */
35
36 /* Update-side state. */
37 raw_spinlock_t __private lock ____cacheline_internodealigned_in_smp;
38 struct rcu_segcblist srcu_cblist; /* List of callbacks.*/
39 unsigned long srcu_gp_seq_needed; /* Furthest future GP needed. */
40 unsigned long srcu_gp_seq_needed_exp; /* Furthest future exp GP. */
41 bool srcu_cblist_invoking; /* Invoking these CBs? */
42 struct timer_list delay_work; /* Delay for CB invoking */
43 struct work_struct work; /* Context for CB invoking. */
44 struct rcu_head srcu_barrier_head; /* For srcu_barrier() use. */
45 struct rcu_head srcu_ec_head; /* For srcu_expedite_current() use. */
46 int srcu_ec_state; /* State for srcu_expedite_current(). */
47 struct srcu_node *mynode; /* Leaf srcu_node. */
48 unsigned long grpmask; /* Mask for leaf srcu_node */
49 /* ->srcu_data_have_cbs[]. */
50 int cpu;
51 struct srcu_struct *ssp;
52};
53
54/*
55 * Node in SRCU combining tree, similar in function to rcu_data.
56 */
57struct srcu_node {
58 raw_spinlock_t __private lock;
59 unsigned long srcu_have_cbs[4]; /* GP seq for children having CBs, but only */
60 /* if greater than ->srcu_gp_seq. */
61 unsigned long srcu_data_have_cbs[4]; /* Which srcu_data structs have CBs for given GP? */
62 unsigned long srcu_gp_seq_needed_exp; /* Furthest future exp GP. */
63 struct srcu_node *srcu_parent; /* Next up in tree. */
64 int grplo; /* Least CPU for node. */
65 int grphi; /* Biggest CPU for node. */
66};
67
68/*
69 * Per-SRCU-domain structure, update-side data linked from srcu_struct.
70 */
71struct srcu_usage {
72 struct srcu_node *node; /* Combining tree. */
73 struct srcu_node *level[RCU_NUM_LVLS + 1];
74 /* First node at each level. */
75 int srcu_size_state; /* Small-to-big transition state. */
76 struct mutex srcu_cb_mutex; /* Serialize CB preparation. */
77 raw_spinlock_t __private lock; /* Protect counters and size state. */
78 struct mutex srcu_gp_mutex; /* Serialize GP work. */
79 unsigned long srcu_gp_seq; /* Grace-period seq #. */
80 unsigned long srcu_gp_seq_needed; /* Latest gp_seq needed. */
81 unsigned long srcu_gp_seq_needed_exp; /* Furthest future exp GP. */
82 unsigned long srcu_gp_start; /* Last GP start timestamp (jiffies) */
83 unsigned long srcu_last_gp_end; /* Last GP end timestamp (ns) */
84 unsigned long srcu_size_jiffies; /* Current contention-measurement interval. */
85 unsigned long srcu_n_lock_retries; /* Contention events in current interval. */
86 unsigned long srcu_n_exp_nodelay; /* # expedited no-delays in current GP phase. */
87 bool sda_is_static; /* May ->sda be passed to free_percpu()? */
88 unsigned long srcu_barrier_seq; /* srcu_barrier seq #. */
89 struct mutex srcu_barrier_mutex; /* Serialize barrier ops. */
90 struct completion srcu_barrier_completion;
91 /* Awaken barrier rq at end. */
92 atomic_t srcu_barrier_cpu_cnt; /* # CPUs not yet posting a */
93 /* callback for the barrier */
94 /* operation. */
95 unsigned long reschedule_jiffies;
96 unsigned long reschedule_count;
97 struct delayed_work work;
98 struct irq_work irq_work;
99 struct srcu_struct *srcu_ssp;
100};
101
102/*
103 * Per-SRCU-domain structure, similar in function to rcu_state.
104 */
105struct srcu_struct {
106 struct srcu_ctr __percpu *srcu_ctrp;
107 struct srcu_data __percpu *sda; /* Per-CPU srcu_data array. */
108 u8 srcu_reader_flavor;
109 struct lockdep_map dep_map;
110 struct srcu_usage *srcu_sup; /* Update-side data. */
111};
112
113// Values for size state variable (->srcu_size_state). Once the state
114// has been set to SRCU_SIZE_ALLOC, the grace-period code advances through
115// this state machine one step per grace period until the SRCU_SIZE_BIG state
116// is reached. Otherwise, the state machine remains in the SRCU_SIZE_SMALL
117// state indefinitely.
118#define SRCU_SIZE_SMALL 0 // No srcu_node combining tree, ->node == NULL
119#define SRCU_SIZE_ALLOC 1 // An srcu_node tree is being allocated, initialized,
120 // and then referenced by ->node. It will not be used.
121#define SRCU_SIZE_WAIT_BARRIER 2 // The srcu_node tree starts being used by everything
122 // except call_srcu(), especially by srcu_barrier().
123 // By the end of this state, all CPUs and threads
124 // are aware of this tree's existence.
125#define SRCU_SIZE_WAIT_CALL 3 // The srcu_node tree starts being used by call_srcu().
126 // By the end of this state, all of the call_srcu()
127 // invocations that were running on a non-boot CPU
128 // and using the boot CPU's callback queue will have
129 // completed.
130#define SRCU_SIZE_WAIT_CBS1 4 // Don't trust the ->srcu_have_cbs[] grace-period
131#define SRCU_SIZE_WAIT_CBS2 5 // sequence elements or the ->srcu_data_have_cbs[]
132#define SRCU_SIZE_WAIT_CBS3 6 // CPU-bitmask elements until all four elements of
133#define SRCU_SIZE_WAIT_CBS4 7 // each array have been initialized.
134#define SRCU_SIZE_BIG 8 // The srcu_node combining tree is fully initialized
135 // and all aspects of it are being put to use.
136
137/* Values for state variable (bottom bits of ->srcu_gp_seq). */
138#define SRCU_STATE_IDLE 0
139#define SRCU_STATE_SCAN1 1
140#define SRCU_STATE_SCAN2 2
141
142/* Values for srcu_expedite_current() state (->srcu_ec_state). */
143#define SRCU_EC_IDLE 0
144#define SRCU_EC_PENDING 1
145#define SRCU_EC_REPOST 2
146
147/*
148 * Values for initializing gp sequence fields. Higher values allow wrap arounds to
149 * occur earlier.
150 * The second value with state is useful in the case of static initialization of
151 * srcu_usage where srcu_gp_seq_needed is expected to have some state value in its
152 * lower bits (or else it will appear to be already initialized within
153 * the call check_init_srcu_struct()).
154 */
155#define SRCU_GP_SEQ_INITIAL_VAL ((0UL - 100UL) << RCU_SEQ_CTR_SHIFT)
156#define SRCU_GP_SEQ_INITIAL_VAL_WITH_STATE (SRCU_GP_SEQ_INITIAL_VAL - 1)
157
158#define __SRCU_USAGE_INIT(name) \
159{ \
160 .lock = __RAW_SPIN_LOCK_UNLOCKED(name.lock), \
161 .srcu_gp_seq = SRCU_GP_SEQ_INITIAL_VAL, \
162 .srcu_gp_seq_needed = SRCU_GP_SEQ_INITIAL_VAL_WITH_STATE, \
163 .srcu_gp_seq_needed_exp = SRCU_GP_SEQ_INITIAL_VAL, \
164 .work = __DELAYED_WORK_INITIALIZER(name.work, NULL, 0), \
165}
166
167#define __SRCU_STRUCT_INIT_COMMON(name, usage_name, fast) \
168 .srcu_sup = &usage_name, \
169 .srcu_reader_flavor = fast, \
170 __SRCU_DEP_MAP_INIT(name)
171
172#define __SRCU_STRUCT_INIT_MODULE(name, usage_name, fast) \
173{ \
174 __SRCU_STRUCT_INIT_COMMON(name, usage_name, fast) \
175}
176
177#define __SRCU_STRUCT_INIT(name, usage_name, pcpu_name, fast) \
178{ \
179 .sda = &pcpu_name, \
180 .srcu_ctrp = &pcpu_name.srcu_ctrs[0], \
181 __SRCU_STRUCT_INIT_COMMON(name, usage_name, fast) \
182}
183
184/*
185 * Define and initialize a srcu struct at build time.
186 * Do -not- call init_srcu_struct() nor cleanup_srcu_struct() on it.
187 *
188 * Note that although DEFINE_STATIC_SRCU() hides the name from other
189 * files, the per-CPU variable rules nevertheless require that the
190 * chosen name be globally unique. These rules also prohibit use of
191 * DEFINE_STATIC_SRCU() within a function. If these rules are too
192 * restrictive, declare the srcu_struct manually. For example, in
193 * each file:
194 *
195 * static struct srcu_struct my_srcu;
196 *
197 * Then, before the first use of each my_srcu, manually initialize it:
198 *
199 * init_srcu_struct(&my_srcu);
200 *
201 * See include/linux/percpu-defs.h for the rules on per-CPU variables.
202 *
203 * DEFINE_SRCU_FAST() and DEFINE_STATIC_SRCU_FAST create an srcu_struct
204 * and associated structures whose readers must be of the SRCU-fast variety.
205 * DEFINE_SRCU_FAST_UPDOWN() and DEFINE_STATIC_SRCU_FAST_UPDOWN() create
206 * an srcu_struct and associated structures whose readers must be of the
207 * SRCU-fast-updown variety. The key point (aside from error checking) with
208 * both varieties is that the grace periods must use synchronize_rcu()
209 * instead of smp_mb(), and given that the first (for example)
210 * srcu_read_lock_fast() might race with the first synchronize_srcu(),
211 * this different must be specified at initialization time.
212 */
213#ifdef MODULE
214# define __DEFINE_SRCU(name, fast, is_static) \
215 static struct srcu_usage name##_srcu_usage = __SRCU_USAGE_INIT(name##_srcu_usage); \
216 is_static struct srcu_struct name = __SRCU_STRUCT_INIT_MODULE(name, name##_srcu_usage, \
217 fast); \
218 extern struct srcu_struct * const __srcu_struct_##name; \
219 struct srcu_struct * const __srcu_struct_##name \
220 __section("___srcu_struct_ptrs") = &name
221#else
222# define __DEFINE_SRCU(name, fast, is_static) \
223 static DEFINE_PER_CPU(struct srcu_data, name##_srcu_data); \
224 static struct srcu_usage name##_srcu_usage = __SRCU_USAGE_INIT(name##_srcu_usage); \
225 is_static struct srcu_struct name = \
226 __SRCU_STRUCT_INIT(name, name##_srcu_usage, name##_srcu_data, fast)
227#endif
228#define DEFINE_SRCU(name) __DEFINE_SRCU(name, 0, /* not static */)
229#define DEFINE_STATIC_SRCU(name) __DEFINE_SRCU(name, 0, static)
230#define DEFINE_SRCU_FAST(name) __DEFINE_SRCU(name, SRCU_READ_FLAVOR_FAST, /* not static */)
231#define DEFINE_STATIC_SRCU_FAST(name) __DEFINE_SRCU(name, SRCU_READ_FLAVOR_FAST, static)
232#define DEFINE_SRCU_FAST_UPDOWN(name) __DEFINE_SRCU(name, SRCU_READ_FLAVOR_FAST_UPDOWN, \
233 /* not static */)
234#define DEFINE_STATIC_SRCU_FAST_UPDOWN(name) \
235 __DEFINE_SRCU(name, SRCU_READ_FLAVOR_FAST_UPDOWN, static)
236
237int __srcu_read_lock(struct srcu_struct *ssp) __acquires_shared(ssp);
238void synchronize_srcu_expedited(struct srcu_struct *ssp);
239void srcu_barrier(struct srcu_struct *ssp);
240void srcu_expedite_current(struct srcu_struct *ssp);
241void srcu_torture_stats_print(struct srcu_struct *ssp, char *tt, char *tf);
242
243// Converts a per-CPU pointer to an ->srcu_ctrs[] array element to that
244// element's index.
245static inline bool __srcu_ptr_to_ctr(struct srcu_struct *ssp, struct srcu_ctr __percpu *scpp)
246{
247 return scpp - &ssp->sda->srcu_ctrs[0];
248}
249
250// Converts an integer to a per-CPU pointer to the corresponding
251// ->srcu_ctrs[] array element.
252static inline struct srcu_ctr __percpu *__srcu_ctr_to_ptr(struct srcu_struct *ssp, int idx)
253{
254 return &ssp->sda->srcu_ctrs[idx];
255}
256
257/*
258 * Counts the new reader in the appropriate per-CPU element of the
259 * srcu_struct. Returns a pointer that must be passed to the matching
260 * srcu_read_unlock_fast().
261 *
262 * Note that both this_cpu_inc() and atomic_long_inc() are RCU read-side
263 * critical sections either because they disable interrupts, because
264 * they are a single instruction, or because they are read-modify-write
265 * atomic operations, depending on the whims of the architecture.
266 * This matters because the SRCU-fast grace-period mechanism uses either
267 * synchronize_rcu() or synchronize_rcu_expedited(), that is, RCU,
268 * *not* SRCU, in order to eliminate the need for the read-side smp_mb()
269 * invocations that are used by srcu_read_lock() and srcu_read_unlock().
270 * The __srcu_read_unlock_fast() function also relies on this same RCU
271 * (again, *not* SRCU) trick to eliminate the need for smp_mb().
272 *
273 * The key point behind this RCU trick is that if any part of a given
274 * RCU reader precedes the beginning of a given RCU grace period, then
275 * the entirety of that RCU reader and everything preceding it happens
276 * before the end of that same RCU grace period. Similarly, if any part
277 * of a given RCU reader follows the end of a given RCU grace period,
278 * then the entirety of that RCU reader and everything following it
279 * happens after the beginning of that same RCU grace period. Therefore,
280 * the operations labeled Y in __srcu_read_lock_fast() and those labeled Z
281 * in __srcu_read_unlock_fast() are ordered against the corresponding SRCU
282 * read-side critical section from the viewpoint of the SRCU grace period.
283 * This is all the ordering that is required, hence no calls to smp_mb().
284 *
285 * This means that __srcu_read_lock_fast() is not all that fast
286 * on architectures that support NMIs but do not supply NMI-safe
287 * implementations of this_cpu_inc().
288 */
289static inline struct srcu_ctr __percpu notrace *__srcu_read_lock_fast(struct srcu_struct *ssp)
290 __acquires_shared(ssp)
291{
292 struct srcu_ctr __percpu *scp = READ_ONCE(ssp->srcu_ctrp);
293
294 if (!IS_ENABLED(CONFIG_NEED_SRCU_NMI_SAFE))
295 this_cpu_inc(scp->srcu_locks.counter); // Y, and implicit RCU reader.
296 else
297 atomic_long_inc(raw_cpu_ptr(&scp->srcu_locks)); // Y, and implicit RCU reader.
298 barrier(); /* Avoid leaking the critical section. */
299 __acquire_shared(ssp);
300 return scp;
301}
302
303/*
304 * Removes the count for the old reader from the appropriate
305 * per-CPU element of the srcu_struct. Note that this may well be a
306 * different CPU than that which was incremented by the corresponding
307 * srcu_read_lock_fast(), but it must be within the same task.
308 *
309 * Please see the __srcu_read_lock_fast() function's header comment for
310 * information on implicit RCU readers and NMI safety.
311 */
312static inline void notrace
313__srcu_read_unlock_fast(struct srcu_struct *ssp, struct srcu_ctr __percpu *scp)
314 __releases_shared(ssp)
315{
316 __release_shared(ssp);
317 barrier(); /* Avoid leaking the critical section. */
318 if (!IS_ENABLED(CONFIG_NEED_SRCU_NMI_SAFE))
319 this_cpu_inc(scp->srcu_unlocks.counter); // Z, and implicit RCU reader.
320 else
321 atomic_long_inc(raw_cpu_ptr(&scp->srcu_unlocks)); // Z, and implicit RCU reader.
322}
323
324/*
325 * Counts the new reader in the appropriate per-CPU element of the
326 * srcu_struct. Returns a pointer that must be passed to the matching
327 * srcu_read_unlock_fast_updown(). This type of reader is compatible
328 * with srcu_down_read_fast() and srcu_up_read_fast().
329 *
330 * See the __srcu_read_lock_fast() comment for more details.
331 */
332static inline
333struct srcu_ctr __percpu notrace *__srcu_read_lock_fast_updown(struct srcu_struct *ssp)
334 __acquires_shared(ssp)
335{
336 struct srcu_ctr __percpu *scp = READ_ONCE(ssp->srcu_ctrp);
337
338 if (!IS_ENABLED(CONFIG_NEED_SRCU_NMI_SAFE))
339 this_cpu_inc(scp->srcu_locks.counter); // Y, and implicit RCU reader.
340 else
341 atomic_long_inc(raw_cpu_ptr(&scp->srcu_locks)); // Y, and implicit RCU reader.
342 barrier(); /* Avoid leaking the critical section. */
343 __acquire_shared(ssp);
344 return scp;
345}
346
347/*
348 * Removes the count for the old reader from the appropriate
349 * per-CPU element of the srcu_struct. Note that this may well be a
350 * different CPU than that which was incremented by the corresponding
351 * srcu_read_lock_fast(), but it must be within the same task.
352 *
353 * Please see the __srcu_read_lock_fast() function's header comment for
354 * information on implicit RCU readers and NMI safety.
355 */
356static inline void notrace
357__srcu_read_unlock_fast_updown(struct srcu_struct *ssp, struct srcu_ctr __percpu *scp)
358 __releases_shared(ssp)
359{
360 __release_shared(ssp);
361 barrier(); /* Avoid leaking the critical section. */
362 if (!IS_ENABLED(CONFIG_NEED_SRCU_NMI_SAFE))
363 this_cpu_inc(scp->srcu_unlocks.counter); // Z, and implicit RCU reader.
364 else
365 atomic_long_inc(raw_cpu_ptr(&scp->srcu_unlocks)); // Z, and implicit RCU reader.
366}
367
368void __srcu_check_read_flavor(struct srcu_struct *ssp, int read_flavor);
369
370// Record SRCU-reader usage type only for CONFIG_PROVE_RCU=y kernels.
371static inline void srcu_check_read_flavor(struct srcu_struct *ssp, int read_flavor)
372{
373 if (IS_ENABLED(CONFIG_PROVE_RCU))
374 __srcu_check_read_flavor(ssp, read_flavor);
375}
376
377#endif