Merge tag 'locking-debug-2021-09-01' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip

+12

Documentation/dev-tools/kcsan.rst

··· 127 127 causes KCSAN to not report data races due to conflicts where the only plain 128 128 accesses are aligned writes up to word size. 129 129 130 + * ``CONFIG_KCSAN_PERMISSIVE``: Enable additional permissive rules to ignore 131 + certain classes of common data races. Unlike the above, the rules are more 132 + complex involving value-change patterns, access type, and address. This 133 + option depends on ``CONFIG_KCSAN_REPORT_VALUE_CHANGE_ONLY=y``. For details 134 + please see the ``kernel/kcsan/permissive.h``. Testers and maintainers that 135 + only focus on reports from specific subsystems and not the whole kernel are 136 + recommended to disable this option. 137 + 138 + To use the strictest possible rules, select ``CONFIG_KCSAN_STRICT=y``, which 139 + configures KCSAN to follow the Linux-kernel memory consistency model (LKMM) as 140 + closely as possible. 141 + 130 142 DebugFS interface 131 143 ~~~~~~~~~~~~~~~~~ 132 144

-23

kernel/kcsan/atomic.h

··· 1 - /* SPDX-License-Identifier: GPL-2.0 */ 2 - /* 3 - * Rules for implicitly atomic memory accesses. 4 - * 5 - * Copyright (C) 2019, Google LLC. 6 - */ 7 - 8 - #ifndef _KERNEL_KCSAN_ATOMIC_H 9 - #define _KERNEL_KCSAN_ATOMIC_H 10 - 11 - #include <linux/types.h> 12 - 13 - /* 14 - * Special rules for certain memory where concurrent conflicting accesses are 15 - * common, however, the current convention is to not mark them; returns true if 16 - * access to @ptr should be considered atomic. Called from slow-path. 17 - */ 18 - static bool kcsan_is_atomic_special(const volatile void *ptr) 19 - { 20 - return false; 21 - } 22 - 23 - #endif /* _KERNEL_KCSAN_ATOMIC_H */

+49 -28

kernel/kcsan/core.c

··· 20 20 #include <linux/sched.h> 21 21 #include <linux/uaccess.h> 22 22 23 - #include "atomic.h" 24 23 #include "encoding.h" 25 24 #include "kcsan.h" 25 + #include "permissive.h" 26 26 27 27 static bool kcsan_early_enable = IS_ENABLED(CONFIG_KCSAN_EARLY_ENABLE); 28 28 unsigned int kcsan_udelay_task = CONFIG_KCSAN_UDELAY_TASK; ··· 301 301 this_cpu_write(kcsan_skip, skip_count); 302 302 } 303 303 304 - static __always_inline bool kcsan_is_enabled(void) 304 + static __always_inline bool kcsan_is_enabled(struct kcsan_ctx *ctx) 305 305 { 306 - return READ_ONCE(kcsan_enabled) && get_ctx()->disable_count == 0; 306 + return READ_ONCE(kcsan_enabled) && !ctx->disable_count; 307 307 } 308 308 309 309 /* Introduce delay depending on context and configuration. */ ··· 353 353 atomic_long_t *watchpoint, 354 354 long encoded_watchpoint) 355 355 { 356 + const bool is_assert = (type & KCSAN_ACCESS_ASSERT) != 0; 357 + struct kcsan_ctx *ctx = get_ctx(); 356 358 unsigned long flags; 357 359 bool consumed; 358 360 359 - if (!kcsan_is_enabled()) 361 + /* 362 + * We know a watchpoint exists. Let's try to keep the race-window 363 + * between here and finally consuming the watchpoint below as small as 364 + * possible -- avoid unneccessarily complex code until consumed. 365 + */ 366 + 367 + if (!kcsan_is_enabled(ctx)) 360 368 return; 361 369 362 370 /* ··· 372 364 * reporting a race where e.g. the writer set up the watchpoint, but the 373 365 * reader has access_mask!=0, we have to ignore the found watchpoint. 374 366 */ 375 - if (get_ctx()->access_mask != 0) 367 + if (ctx->access_mask) 376 368 return; 377 369 378 370 /* 379 - * Consume the watchpoint as soon as possible, to minimize the chances 380 - * of !consumed. Consuming the watchpoint must always be guarded by 381 - * kcsan_is_enabled() check, as otherwise we might erroneously 382 - * triggering reports when disabled. 371 + * If the other thread does not want to ignore the access, and there was 372 + * a value change as a result of this thread's operation, we will still 373 + * generate a report of unknown origin. 374 + * 375 + * Use CONFIG_KCSAN_REPORT_RACE_UNKNOWN_ORIGIN=n to filter. 376 + */ 377 + if (!is_assert && kcsan_ignore_address(ptr)) 378 + return; 379 + 380 + /* 381 + * Consuming the watchpoint must be guarded by kcsan_is_enabled() to 382 + * avoid erroneously triggering reports if the context is disabled. 383 383 */ 384 384 consumed = try_consume_watchpoint(watchpoint, encoded_watchpoint); 385 385 ··· 407 391 atomic_long_inc(&kcsan_counters[KCSAN_COUNTER_REPORT_RACES]); 408 392 } 409 393 410 - if ((type & KCSAN_ACCESS_ASSERT) != 0) 394 + if (is_assert) 411 395 atomic_long_inc(&kcsan_counters[KCSAN_COUNTER_ASSERT_FAILURES]); 412 396 else 413 397 atomic_long_inc(&kcsan_counters[KCSAN_COUNTER_DATA_RACES]); ··· 425 409 unsigned long access_mask; 426 410 enum kcsan_value_change value_change = KCSAN_VALUE_CHANGE_MAYBE; 427 411 unsigned long ua_flags = user_access_save(); 412 + struct kcsan_ctx *ctx = get_ctx(); 428 413 unsigned long irq_flags = 0; 429 414 430 415 /* ··· 434 417 */ 435 418 reset_kcsan_skip(); 436 419 437 - if (!kcsan_is_enabled()) 420 + if (!kcsan_is_enabled(ctx)) 438 421 goto out; 439 422 440 423 /* 441 - * Special atomic rules: unlikely to be true, so we check them here in 442 - * the slow-path, and not in the fast-path in is_atomic(). Call after 443 - * kcsan_is_enabled(), as we may access memory that is not yet 444 - * initialized during early boot. 424 + * Check to-ignore addresses after kcsan_is_enabled(), as we may access 425 + * memory that is not yet initialized during early boot. 445 426 */ 446 - if (!is_assert && kcsan_is_atomic_special(ptr)) 427 + if (!is_assert && kcsan_ignore_address(ptr)) 447 428 goto out; 448 429 449 430 if (!check_encodable((unsigned long)ptr, size)) { ··· 494 479 break; /* ignore; we do not diff the values */ 495 480 } 496 481 497 - if (IS_ENABLED(CONFIG_KCSAN_DEBUG)) { 498 - kcsan_disable_current(); 499 - pr_err("watching %s, size: %zu, addr: %px [slot: %d, encoded: %lx]\n", 500 - is_write ? "write" : "read", size, ptr, 501 - watchpoint_slot((unsigned long)ptr), 502 - encode_watchpoint((unsigned long)ptr, size, is_write)); 503 - kcsan_enable_current(); 504 - } 505 - 506 482 /* 507 483 * Delay this thread, to increase probability of observing a racy 508 484 * conflicting access. ··· 504 498 * Re-read value, and check if it is as expected; if not, we infer a 505 499 * racy access. 506 500 */ 507 - access_mask = get_ctx()->access_mask; 501 + access_mask = ctx->access_mask; 508 502 new = 0; 509 503 switch (size) { 510 504 case 1: ··· 527 521 if (access_mask) 528 522 diff &= access_mask; 529 523 530 - /* Were we able to observe a value-change? */ 531 - if (diff != 0) 524 + /* 525 + * Check if we observed a value change. 526 + * 527 + * Also check if the data race should be ignored (the rules depend on 528 + * non-zero diff); if it is to be ignored, the below rules for 529 + * KCSAN_VALUE_CHANGE_MAYBE apply. 530 + */ 531 + if (diff && !kcsan_ignore_data_race(size, type, old, new, diff)) 532 532 value_change = KCSAN_VALUE_CHANGE_TRUE; 533 533 534 534 /* Check if this access raced with another. */ ··· 655 643 if (kcsan_early_enable) { 656 644 pr_info("enabled early\n"); 657 645 WRITE_ONCE(kcsan_enabled, true); 646 + } 647 + 648 + if (IS_ENABLED(CONFIG_KCSAN_REPORT_VALUE_CHANGE_ONLY) || 649 + IS_ENABLED(CONFIG_KCSAN_ASSUME_PLAIN_WRITES_ATOMIC) || 650 + IS_ENABLED(CONFIG_KCSAN_PERMISSIVE) || 651 + IS_ENABLED(CONFIG_KCSAN_IGNORE_ATOMICS)) { 652 + pr_warn("non-strict mode configured - use CONFIG_KCSAN_STRICT=y to see all data races\n"); 653 + } else { 654 + pr_info("strict mode configured\n"); 658 655 } 659 656 } 660 657

+32

kernel/kcsan/kcsan_test.c

··· 414 414 __atomic_load_n(&test_var, __ATOMIC_RELAXED); 415 415 } 416 416 417 + static noinline void test_kernel_xor_1bit(void) 418 + { 419 + /* Do not report data races between the read-writes. */ 420 + kcsan_nestable_atomic_begin(); 421 + test_var ^= 0x10000; 422 + kcsan_nestable_atomic_end(); 423 + } 424 + 417 425 /* ===== Test cases ===== */ 418 426 419 427 /* Simple test with normal data race. */ ··· 960 952 KUNIT_EXPECT_FALSE(test, match_never); 961 953 } 962 954 955 + __no_kcsan 956 + static void test_1bit_value_change(struct kunit *test) 957 + { 958 + const struct expect_report expect = { 959 + .access = { 960 + { test_kernel_read, &test_var, sizeof(test_var), 0 }, 961 + { test_kernel_xor_1bit, &test_var, sizeof(test_var), __KCSAN_ACCESS_RW(KCSAN_ACCESS_WRITE) }, 962 + }, 963 + }; 964 + bool match = false; 965 + 966 + begin_test_checks(test_kernel_read, test_kernel_xor_1bit); 967 + do { 968 + match = IS_ENABLED(CONFIG_KCSAN_PERMISSIVE) 969 + ? report_available() 970 + : report_matches(&expect); 971 + } while (!end_test_checks(match)); 972 + if (IS_ENABLED(CONFIG_KCSAN_PERMISSIVE)) 973 + KUNIT_EXPECT_FALSE(test, match); 974 + else 975 + KUNIT_EXPECT_TRUE(test, match); 976 + } 977 + 963 978 /* 964 979 * Generate thread counts for all test cases. Values generated are in interval 965 980 * [2, 5] followed by exponentially increasing thread counts from 8 to 32. ··· 1055 1024 KCSAN_KUNIT_CASE(test_jiffies_noreport), 1056 1025 KCSAN_KUNIT_CASE(test_seqlock_noreport), 1057 1026 KCSAN_KUNIT_CASE(test_atomic_builtins), 1027 + KCSAN_KUNIT_CASE(test_1bit_value_change), 1058 1028 {}, 1059 1029 }; 1060 1030

+94

kernel/kcsan/permissive.h

··· 1 + /* SPDX-License-Identifier: GPL-2.0 */ 2 + /* 3 + * Special rules for ignoring entire classes of data-racy memory accesses. None 4 + * of the rules here imply that such data races are generally safe! 5 + * 6 + * All rules in this file can be configured via CONFIG_KCSAN_PERMISSIVE. Keep 7 + * them separate from core code to make it easier to audit. 8 + * 9 + * Copyright (C) 2019, Google LLC. 10 + */ 11 + 12 + #ifndef _KERNEL_KCSAN_PERMISSIVE_H 13 + #define _KERNEL_KCSAN_PERMISSIVE_H 14 + 15 + #include <linux/bitops.h> 16 + #include <linux/sched.h> 17 + #include <linux/types.h> 18 + 19 + /* 20 + * Access ignore rules based on address. 21 + */ 22 + static __always_inline bool kcsan_ignore_address(const volatile void *ptr) 23 + { 24 + if (!IS_ENABLED(CONFIG_KCSAN_PERMISSIVE)) 25 + return false; 26 + 27 + /* 28 + * Data-racy bitops on current->flags are too common, ignore completely 29 + * for now. 30 + */ 31 + return ptr == &current->flags; 32 + } 33 + 34 + /* 35 + * Data race ignore rules based on access type and value change patterns. 36 + */ 37 + static bool 38 + kcsan_ignore_data_race(size_t size, int type, u64 old, u64 new, u64 diff) 39 + { 40 + if (!IS_ENABLED(CONFIG_KCSAN_PERMISSIVE)) 41 + return false; 42 + 43 + /* 44 + * Rules here are only for plain read accesses, so that we still report 45 + * data races between plain read-write accesses. 46 + */ 47 + if (type || size > sizeof(long)) 48 + return false; 49 + 50 + /* 51 + * A common pattern is checking/setting just 1 bit in a variable; for 52 + * example: 53 + * 54 + * if (flags & SOME_FLAG) { ... } 55 + * 56 + * and elsewhere flags is updated concurrently: 57 + * 58 + * flags |= SOME_OTHER_FLAG; // just 1 bit 59 + * 60 + * While it is still recommended that such accesses be marked 61 + * appropriately, in many cases these types of data races are so common 62 + * that marking them all is often unrealistic and left to maintainer 63 + * preference. 64 + * 65 + * The assumption in all cases is that with all known compiler 66 + * optimizations (including those that tear accesses), because no more 67 + * than 1 bit changed, the plain accesses are safe despite the presence 68 + * of data races. 69 + * 70 + * The rules here will ignore the data races if we observe no more than 71 + * 1 bit changed. 72 + * 73 + * Of course many operations can effecively change just 1 bit, but the 74 + * general assuption that data races involving 1-bit changes can be 75 + * tolerated still applies. 76 + * 77 + * And in case a true bug is missed, the bug likely manifests as a 78 + * reportable data race elsewhere. 79 + */ 80 + if (hweight64(diff) == 1) { 81 + /* 82 + * Exception: Report data races where the values look like 83 + * ordinary booleans (one of them was 0 and the 0th bit was 84 + * changed) More often than not, they come with interesting 85 + * memory ordering requirements, so let's report them. 86 + */ 87 + if (!((!old || !new) && diff == 1)) 88 + return true; 89 + } 90 + 91 + return false; 92 + } 93 + 94 + #endif /* _KERNEL_KCSAN_PERMISSIVE_H */

+30 -12

lib/Kconfig.kcsan

··· 40 40 41 41 if KCSAN 42 42 43 - # Compiler capabilities that should not fail the test if they are unavailable. 44 43 config CC_HAS_TSAN_COMPOUND_READ_BEFORE_WRITE 45 44 def_bool (CC_IS_CLANG && $(cc-option,-fsanitize=thread -mllvm -tsan-compound-read-before-write=1)) || \ 46 45 (CC_IS_GCC && $(cc-option,-fsanitize=thread --param tsan-compound-read-before-write=1)) 46 + help 47 + The compiler instruments plain compound read-write operations 48 + differently (++, --, +=, -=, |=, &=, etc.), which allows KCSAN to 49 + distinguish them from other plain accesses. This is currently 50 + supported by Clang 12 or later. 47 51 48 52 config KCSAN_VERBOSE 49 53 bool "Show verbose reports with more information about system state" ··· 61 57 external functions on report generation; if a race report is 62 58 generated from any one of them, system stability may suffer due to 63 59 deadlocks or recursion. If in doubt, say N. 64 - 65 - config KCSAN_DEBUG 66 - bool "Debugging of KCSAN internals" 67 60 68 61 config KCSAN_SELFTEST 69 62 bool "Perform short selftests on boot" ··· 150 149 KCSAN_WATCH_SKIP. 151 150 152 151 config KCSAN_INTERRUPT_WATCHER 153 - bool "Interruptible watchers" 152 + bool "Interruptible watchers" if !KCSAN_STRICT 153 + default KCSAN_STRICT 154 154 help 155 155 If enabled, a task that set up a watchpoint may be interrupted while 156 156 delayed. This option will allow KCSAN to detect races between ··· 171 169 reporting to avoid flooding the console with reports. Setting this 172 170 to 0 disables rate limiting. 173 171 174 - # The main purpose of the below options is to control reported data races (e.g. 175 - # in fuzzer configs), and are not expected to be switched frequently by other 176 - # users. We could turn some of them into boot parameters, but given they should 177 - # not be switched normally, let's keep them here to simplify configuration. 178 - # 179 - # The defaults below are chosen to be very conservative, and may miss certain 180 - # bugs. 172 + # The main purpose of the below options is to control reported data races, and 173 + # are not expected to be switched frequently by non-testers or at runtime. 174 + # The defaults are chosen to be conservative, and can miss certain bugs. 181 175 182 176 config KCSAN_REPORT_RACE_UNKNOWN_ORIGIN 183 177 bool "Report races of unknown origin" ··· 184 186 reported if it was only possible to infer a race due to a data value 185 187 change while an access is being delayed on a watchpoint. 186 188 189 + config KCSAN_STRICT 190 + bool "Strict data-race checking" 191 + help 192 + KCSAN will report data races with the strictest possible rules, which 193 + closely aligns with the rules defined by the Linux-kernel memory 194 + consistency model (LKMM). 195 + 187 196 config KCSAN_REPORT_VALUE_CHANGE_ONLY 188 197 bool "Only report races where watcher observed a data value change" 189 198 default y 199 + depends on !KCSAN_STRICT 190 200 help 191 201 If enabled and a conflicting write is observed via a watchpoint, but 192 202 the data value of the memory location was observed to remain ··· 203 197 config KCSAN_ASSUME_PLAIN_WRITES_ATOMIC 204 198 bool "Assume that plain aligned writes up to word size are atomic" 205 199 default y 200 + depends on !KCSAN_STRICT 206 201 help 207 202 Assume that plain aligned writes up to word size are atomic by 208 203 default, and also not subject to other unsafe compiler optimizations ··· 216 209 217 210 config KCSAN_IGNORE_ATOMICS 218 211 bool "Do not instrument marked atomic accesses" 212 + depends on !KCSAN_STRICT 219 213 help 220 214 Never instrument marked atomic accesses. This option can be used for 221 215 additional filtering. Conflicting marked atomic reads and plain ··· 231 223 be reported as data races; however, unlike that option, data races 232 224 due to two conflicting plain writes will be reported (aligned and 233 225 unaligned, if CONFIG_KCSAN_ASSUME_PLAIN_WRITES_ATOMIC=n). 226 + 227 + config KCSAN_PERMISSIVE 228 + bool "Enable all additional permissive rules" 229 + depends on KCSAN_REPORT_VALUE_CHANGE_ONLY 230 + help 231 + Enable additional permissive rules to ignore certain classes of data 232 + races (also see kernel/kcsan/permissive.h). None of the permissive 233 + rules imply that such data races are generally safe, but can be used 234 + to further reduce reported data races due to data-racy patterns 235 + common across the kernel. 234 236 235 237 endif # KCSAN

+135 -16

tools/memory-model/Documentation/access-marking.txt

··· 37 37 Therefore, if a given access is involved in an intentional data race, 38 38 using READ_ONCE() for loads and WRITE_ONCE() for stores is usually 39 39 preferable to data_race(), which in turn is usually preferable to plain 40 - C-language accesses. 40 + C-language accesses. It is permissible to combine #2 and #3, for example, 41 + data_race(READ_ONCE(a)), which will both restrict compiler optimizations 42 + and disable KCSAN diagnostics. 41 43 42 44 KCSAN will complain about many types of data races involving plain 43 45 C-language accesses, but marking all accesses involved in a given data ··· 88 86 data_race() for the diagnostic reads because otherwise KCSAN would give 89 87 false-positive warnings about these diagnostic reads. 90 88 89 + If it is necessary to both restrict compiler optimizations and disable 90 + KCSAN diagnostics, use both data_race() and READ_ONCE(), for example, 91 + data_race(READ_ONCE(a)). 92 + 91 93 In theory, plain C-language loads can also be used for this use case. 92 94 However, in practice this will have the disadvantage of causing KCSAN 93 95 to generate false positives because KCSAN will have no way of knowing ··· 131 125 consistent errors, which in turn are quite capable of breaking heuristics. 132 126 Therefore use of data_race() should be limited to cases where some other 133 127 code (such as a barrier() call) will force the occasional reload. 128 + 129 + Note that this use case requires that the heuristic be able to handle 130 + any possible error. In contrast, if the heuristics might be fatally 131 + confused by one or more of the possible erroneous values, use READ_ONCE() 132 + instead of data_race(). 134 133 135 134 In theory, plain C-language loads can also be used for this use case. 136 135 However, in practice this will have the disadvantage of causing KCSAN ··· 270 259 return ret; 271 260 } 272 261 273 - int read_foo_diagnostic(void) 262 + void read_foo_diagnostic(void) 274 263 { 275 - return data_race(foo); 264 + pr_info("Current value of foo: %d\n", data_race(foo)); 276 265 } 277 266 278 267 The reader-writer lock prevents the compiler from introducing concurrency ··· 285 274 ignored. This data_race() also tells the human reading the code that 286 275 read_foo_diagnostic() might sometimes return a bogus value. 287 276 288 - However, please note that your kernel must be built with 289 - CONFIG_KCSAN_ASSUME_PLAIN_WRITES_ATOMIC=n in order for KCSAN to 290 - detect a buggy lockless write. If you need KCSAN to detect such a 291 - write even if that write did not change the value of foo, you also 292 - need CONFIG_KCSAN_REPORT_VALUE_CHANGE_ONLY=n. If you need KCSAN to 293 - detect such a write happening in an interrupt handler running on the 294 - same CPU doing the legitimate lock-protected write, you also need 295 - CONFIG_KCSAN_INTERRUPT_WATCHER=y. With some or all of these Kconfig 296 - options set properly, KCSAN can be quite helpful, although it is not 297 - necessarily a full replacement for hardware watchpoints. On the other 298 - hand, neither are hardware watchpoints a full replacement for KCSAN 299 - because it is not always easy to tell hardware watchpoint to conditionally 300 - trap on accesses. 277 + If it is necessary to suppress compiler optimization and also detect 278 + buggy lockless writes, read_foo_diagnostic() can be updated as follows: 279 + 280 + void read_foo_diagnostic(void) 281 + { 282 + pr_info("Current value of foo: %d\n", data_race(READ_ONCE(foo))); 283 + } 284 + 285 + Alternatively, given that KCSAN is to ignore all accesses in this function, 286 + this function can be marked __no_kcsan and the data_race() can be dropped: 287 + 288 + void __no_kcsan read_foo_diagnostic(void) 289 + { 290 + pr_info("Current value of foo: %d\n", READ_ONCE(foo)); 291 + } 292 + 293 + However, in order for KCSAN to detect buggy lockless writes, your kernel 294 + must be built with CONFIG_KCSAN_ASSUME_PLAIN_WRITES_ATOMIC=n. If you 295 + need KCSAN to detect such a write even if that write did not change 296 + the value of foo, you also need CONFIG_KCSAN_REPORT_VALUE_CHANGE_ONLY=n. 297 + If you need KCSAN to detect such a write happening in an interrupt handler 298 + running on the same CPU doing the legitimate lock-protected write, you 299 + also need CONFIG_KCSAN_INTERRUPT_WATCHER=y. With some or all of these 300 + Kconfig options set properly, KCSAN can be quite helpful, although 301 + it is not necessarily a full replacement for hardware watchpoints. 302 + On the other hand, neither are hardware watchpoints a full replacement 303 + for KCSAN because it is not always easy to tell hardware watchpoint to 304 + conditionally trap on accesses. 301 305 302 306 303 307 Lock-Protected Writes With Lockless Reads ··· 343 317 Because foo is read locklessly, all accesses are marked. The purpose 344 318 of the ASSERT_EXCLUSIVE_WRITER() is to allow KCSAN to check for a buggy 345 319 concurrent lockless write. 320 + 321 + 322 + Lock-Protected Writes With Heuristic Lockless Reads 323 + --------------------------------------------------- 324 + 325 + For another example, suppose that the code can normally make use of 326 + a per-data-structure lock, but there are times when a global lock 327 + is required. These times are indicated via a global flag. The code 328 + might look as follows, and is based loosely on nf_conntrack_lock(), 329 + nf_conntrack_all_lock(), and nf_conntrack_all_unlock(): 330 + 331 + bool global_flag; 332 + DEFINE_SPINLOCK(global_lock); 333 + struct foo { 334 + spinlock_t f_lock; 335 + int f_data; 336 + }; 337 + 338 + /* All foo structures are in the following array. */ 339 + int nfoo; 340 + struct foo *foo_array; 341 + 342 + void do_something_locked(struct foo *fp) 343 + { 344 + /* This works even if data_race() returns nonsense. */ 345 + if (!data_race(global_flag)) { 346 + spin_lock(&fp->f_lock); 347 + if (!smp_load_acquire(&global_flag)) { 348 + do_something(fp); 349 + spin_unlock(&fp->f_lock); 350 + return; 351 + } 352 + spin_unlock(&fp->f_lock); 353 + } 354 + spin_lock(&global_lock); 355 + /* global_lock held, thus global flag cannot be set. */ 356 + spin_lock(&fp->f_lock); 357 + spin_unlock(&global_lock); 358 + /* 359 + * global_flag might be set here, but begin_global() 360 + * will wait for ->f_lock to be released. 361 + */ 362 + do_something(fp); 363 + spin_unlock(&fp->f_lock); 364 + } 365 + 366 + void begin_global(void) 367 + { 368 + int i; 369 + 370 + spin_lock(&global_lock); 371 + WRITE_ONCE(global_flag, true); 372 + for (i = 0; i < nfoo; i++) { 373 + /* 374 + * Wait for pre-existing local locks. One at 375 + * a time to avoid lockdep limitations. 376 + */ 377 + spin_lock(&fp->f_lock); 378 + spin_unlock(&fp->f_lock); 379 + } 380 + } 381 + 382 + void end_global(void) 383 + { 384 + smp_store_release(&global_flag, false); 385 + spin_unlock(&global_lock); 386 + } 387 + 388 + All code paths leading from the do_something_locked() function's first 389 + read from global_flag acquire a lock, so endless load fusing cannot 390 + happen. 391 + 392 + If the value read from global_flag is true, then global_flag is 393 + rechecked while holding ->f_lock, which, if global_flag is now false, 394 + prevents begin_global() from completing. It is therefore safe to invoke 395 + do_something(). 396 + 397 + Otherwise, if either value read from global_flag is true, then after 398 + global_lock is acquired global_flag must be false. The acquisition of 399 + ->f_lock will prevent any call to begin_global() from returning, which 400 + means that it is safe to release global_lock and invoke do_something(). 401 + 402 + For this to work, only those foo structures in foo_array[] may be passed 403 + to do_something_locked(). The reason for this is that the synchronization 404 + with begin_global() relies on momentarily holding the lock of each and 405 + every foo structure. 406 + 407 + The smp_load_acquire() and smp_store_release() are required because 408 + changes to a foo structure between calls to begin_global() and 409 + end_global() are carried out without holding that structure's ->f_lock. 410 + The smp_load_acquire() and smp_store_release() ensure that the next 411 + invocation of do_something() from do_something_locked() will see those 412 + changes. 346 413 347 414 348 415 Lockless Reads and Writes

Configure Feed

Configure Feed