Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux
1
fork

Configure Feed

Select the types of activity you want to include in your feed.

Merge branch 'rcu-urgent-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip

Pull RCU fixes from Ingo Molnar:
"This fixes sporadic ACPI related hangs in synchronize_rcu() that were
caused by the ACPI code mistakenly relying on an aspect of RCU that
was neither promised to work nor reliable but which happened to work -
until in v4.9 we changed the RCU implementation, which made the hangs
more prominent.

Since the mis-use of the RCU facility wasn't properly detected and
prevented either, these fixes make the RCU side work reliably instead
of working around the problem in the ACPI code.

Hence the slightly larger diffstat that goes beyond the normal scope
of RCU fixes in -rc kernels"

* 'rcu-urgent-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip:
rcu: Narrow early boot window of illegal synchronous grace periods
rcu: Remove cond_resched() from Tiny synchronize_sched()

+104 -39
+4
include/linux/rcupdate.h
··· 444 444 #error "Unknown RCU implementation specified to kernel configuration" 445 445 #endif 446 446 447 + #define RCU_SCHEDULER_INACTIVE 0 448 + #define RCU_SCHEDULER_INIT 1 449 + #define RCU_SCHEDULER_RUNNING 2 450 + 447 451 /* 448 452 * init_rcu_head_on_stack()/destroy_rcu_head_on_stack() are needed for dynamic 449 453 * initialization and destruction of rcu_head on the stack. rcu_head structures
+1
kernel/rcu/rcu.h
··· 136 136 #define TPS(x) tracepoint_string(x) 137 137 138 138 void rcu_early_boot_tests(void); 139 + void rcu_test_sync_prims(void); 139 140 140 141 /* 141 142 * This function really isn't for public consumption, but RCU is special in
-4
kernel/rcu/tiny.c
··· 185 185 * benefits of doing might_sleep() to reduce latency.) 186 186 * 187 187 * Cool, huh? (Due to Josh Triplett.) 188 - * 189 - * But we want to make this a static inline later. The cond_resched() 190 - * currently makes this problematic. 191 188 */ 192 189 void synchronize_sched(void) 193 190 { ··· 192 195 lock_is_held(&rcu_lock_map) || 193 196 lock_is_held(&rcu_sched_lock_map), 194 197 "Illegal synchronize_sched() in RCU read-side critical section"); 195 - cond_resched(); 196 198 } 197 199 EXPORT_SYMBOL_GPL(synchronize_sched); 198 200
+7 -2
kernel/rcu/tiny_plugin.h
··· 60 60 61 61 /* 62 62 * During boot, we forgive RCU lockdep issues. After this function is 63 - * invoked, we start taking RCU lockdep issues seriously. 63 + * invoked, we start taking RCU lockdep issues seriously. Note that unlike 64 + * Tree RCU, Tiny RCU transitions directly from RCU_SCHEDULER_INACTIVE 65 + * to RCU_SCHEDULER_RUNNING, skipping the RCU_SCHEDULER_INIT stage. 66 + * The reason for this is that Tiny RCU does not need kthreads, so does 67 + * not have to care about the fact that the scheduler is half-initialized 68 + * at a certain phase of the boot process. 64 69 */ 65 70 void __init rcu_scheduler_starting(void) 66 71 { 67 72 WARN_ON(nr_context_switches() > 0); 68 - rcu_scheduler_active = 1; 73 + rcu_scheduler_active = RCU_SCHEDULER_RUNNING; 69 74 } 70 75 71 76 #endif /* #ifdef CONFIG_DEBUG_LOCK_ALLOC */
+20 -13
kernel/rcu/tree.c
··· 127 127 int sysctl_panic_on_rcu_stall __read_mostly; 128 128 129 129 /* 130 - * The rcu_scheduler_active variable transitions from zero to one just 131 - * before the first task is spawned. So when this variable is zero, RCU 132 - * can assume that there is but one task, allowing RCU to (for example) 130 + * The rcu_scheduler_active variable is initialized to the value 131 + * RCU_SCHEDULER_INACTIVE and transitions RCU_SCHEDULER_INIT just before the 132 + * first task is spawned. So when this variable is RCU_SCHEDULER_INACTIVE, 133 + * RCU can assume that there is but one task, allowing RCU to (for example) 133 134 * optimize synchronize_rcu() to a simple barrier(). When this variable 134 - * is one, RCU must actually do all the hard work required to detect real 135 - * grace periods. This variable is also used to suppress boot-time false 136 - * positives from lockdep-RCU error checking. 135 + * is RCU_SCHEDULER_INIT, RCU must actually do all the hard work required 136 + * to detect real grace periods. This variable is also used to suppress 137 + * boot-time false positives from lockdep-RCU error checking. Finally, it 138 + * transitions from RCU_SCHEDULER_INIT to RCU_SCHEDULER_RUNNING after RCU 139 + * is fully initialized, including all of its kthreads having been spawned. 137 140 */ 138 141 int rcu_scheduler_active __read_mostly; 139 142 EXPORT_SYMBOL_GPL(rcu_scheduler_active); ··· 3983 3980 early_initcall(rcu_spawn_gp_kthread); 3984 3981 3985 3982 /* 3986 - * This function is invoked towards the end of the scheduler's initialization 3987 - * process. Before this is called, the idle task might contain 3988 - * RCU read-side critical sections (during which time, this idle 3989 - * task is booting the system). After this function is called, the 3990 - * idle tasks are prohibited from containing RCU read-side critical 3991 - * sections. This function also enables RCU lockdep checking. 3983 + * This function is invoked towards the end of the scheduler's 3984 + * initialization process. Before this is called, the idle task might 3985 + * contain synchronous grace-period primitives (during which time, this idle 3986 + * task is booting the system, and such primitives are no-ops). After this 3987 + * function is called, any synchronous grace-period primitives are run as 3988 + * expedited, with the requesting task driving the grace period forward. 3989 + * A later core_initcall() rcu_exp_runtime_mode() will switch to full 3990 + * runtime RCU functionality. 3992 3991 */ 3993 3992 void rcu_scheduler_starting(void) 3994 3993 { 3995 3994 WARN_ON(num_online_cpus() != 1); 3996 3995 WARN_ON(nr_context_switches() > 0); 3997 - rcu_scheduler_active = 1; 3996 + rcu_test_sync_prims(); 3997 + rcu_scheduler_active = RCU_SCHEDULER_INIT; 3998 + rcu_test_sync_prims(); 3998 3999 } 3999 4000 4000 4001 /*
+41 -11
kernel/rcu/tree_exp.h
··· 532 532 }; 533 533 534 534 /* 535 + * Common code to drive an expedited grace period forward, used by 536 + * workqueues and mid-boot-time tasks. 537 + */ 538 + static void rcu_exp_sel_wait_wake(struct rcu_state *rsp, 539 + smp_call_func_t func, unsigned long s) 540 + { 541 + /* Initialize the rcu_node tree in preparation for the wait. */ 542 + sync_rcu_exp_select_cpus(rsp, func); 543 + 544 + /* Wait and clean up, including waking everyone. */ 545 + rcu_exp_wait_wake(rsp, s); 546 + } 547 + 548 + /* 535 549 * Work-queue handler to drive an expedited grace period forward. 536 550 */ 537 551 static void wait_rcu_exp_gp(struct work_struct *wp) 538 552 { 539 553 struct rcu_exp_work *rewp; 540 554 541 - /* Initialize the rcu_node tree in preparation for the wait. */ 542 555 rewp = container_of(wp, struct rcu_exp_work, rew_work); 543 - sync_rcu_exp_select_cpus(rewp->rew_rsp, rewp->rew_func); 544 - 545 - /* Wait and clean up, including waking everyone. */ 546 - rcu_exp_wait_wake(rewp->rew_rsp, rewp->rew_s); 556 + rcu_exp_sel_wait_wake(rewp->rew_rsp, rewp->rew_func, rewp->rew_s); 547 557 } 548 558 549 559 /* ··· 579 569 if (exp_funnel_lock(rsp, s)) 580 570 return; /* Someone else did our work for us. */ 581 571 582 - /* Marshall arguments and schedule the expedited grace period. */ 583 - rew.rew_func = func; 584 - rew.rew_rsp = rsp; 585 - rew.rew_s = s; 586 - INIT_WORK_ONSTACK(&rew.rew_work, wait_rcu_exp_gp); 587 - schedule_work(&rew.rew_work); 572 + /* Ensure that load happens before action based on it. */ 573 + if (unlikely(rcu_scheduler_active == RCU_SCHEDULER_INIT)) { 574 + /* Direct call during scheduler init and early_initcalls(). */ 575 + rcu_exp_sel_wait_wake(rsp, func, s); 576 + } else { 577 + /* Marshall arguments & schedule the expedited grace period. */ 578 + rew.rew_func = func; 579 + rew.rew_rsp = rsp; 580 + rew.rew_s = s; 581 + INIT_WORK_ONSTACK(&rew.rew_work, wait_rcu_exp_gp); 582 + schedule_work(&rew.rew_work); 583 + } 588 584 589 585 /* Wait for expedited grace period to complete. */ 590 586 rdp = per_cpu_ptr(rsp->rda, raw_smp_processor_id()); ··· 692 676 { 693 677 struct rcu_state *rsp = rcu_state_p; 694 678 679 + if (rcu_scheduler_active == RCU_SCHEDULER_INACTIVE) 680 + return; 695 681 _synchronize_rcu_expedited(rsp, sync_rcu_exp_handler); 696 682 } 697 683 EXPORT_SYMBOL_GPL(synchronize_rcu_expedited); ··· 711 693 EXPORT_SYMBOL_GPL(synchronize_rcu_expedited); 712 694 713 695 #endif /* #else #ifdef CONFIG_PREEMPT_RCU */ 696 + 697 + /* 698 + * Switch to run-time mode once Tree RCU has fully initialized. 699 + */ 700 + static int __init rcu_exp_runtime_mode(void) 701 + { 702 + rcu_test_sync_prims(); 703 + rcu_scheduler_active = RCU_SCHEDULER_RUNNING; 704 + rcu_test_sync_prims(); 705 + return 0; 706 + } 707 + core_initcall(rcu_exp_runtime_mode);
+1 -1
kernel/rcu/tree_plugin.h
··· 670 670 lock_is_held(&rcu_lock_map) || 671 671 lock_is_held(&rcu_sched_lock_map), 672 672 "Illegal synchronize_rcu() in RCU read-side critical section"); 673 - if (!rcu_scheduler_active) 673 + if (rcu_scheduler_active == RCU_SCHEDULER_INACTIVE) 674 674 return; 675 675 if (rcu_gp_is_expedited()) 676 676 synchronize_rcu_expedited();
+30 -8
kernel/rcu/update.c
··· 121 121 * Should expedited grace-period primitives always fall back to their 122 122 * non-expedited counterparts? Intended for use within RCU. Note 123 123 * that if the user specifies both rcu_expedited and rcu_normal, then 124 - * rcu_normal wins. 124 + * rcu_normal wins. (Except during the time period during boot from 125 + * when the first task is spawned until the rcu_exp_runtime_mode() 126 + * core_initcall() is invoked, at which point everything is expedited.) 125 127 */ 126 128 bool rcu_gp_is_normal(void) 127 129 { 128 - return READ_ONCE(rcu_normal); 130 + return READ_ONCE(rcu_normal) && 131 + rcu_scheduler_active != RCU_SCHEDULER_INIT; 129 132 } 130 133 EXPORT_SYMBOL_GPL(rcu_gp_is_normal); 131 134 ··· 138 135 /* 139 136 * Should normal grace-period primitives be expedited? Intended for 140 137 * use within RCU. Note that this function takes the rcu_expedited 141 - * sysfs/boot variable into account as well as the rcu_expedite_gp() 142 - * nesting. So looping on rcu_unexpedite_gp() until rcu_gp_is_expedited() 143 - * returns false is a -really- bad idea. 138 + * sysfs/boot variable and rcu_scheduler_active into account as well 139 + * as the rcu_expedite_gp() nesting. So looping on rcu_unexpedite_gp() 140 + * until rcu_gp_is_expedited() returns false is a -really- bad idea. 144 141 */ 145 142 bool rcu_gp_is_expedited(void) 146 143 { 147 - return rcu_expedited || atomic_read(&rcu_expedited_nesting); 144 + return rcu_expedited || atomic_read(&rcu_expedited_nesting) || 145 + rcu_scheduler_active == RCU_SCHEDULER_INIT; 148 146 } 149 147 EXPORT_SYMBOL_GPL(rcu_gp_is_expedited); 150 148 ··· 261 257 262 258 int notrace debug_lockdep_rcu_enabled(void) 263 259 { 264 - return rcu_scheduler_active && debug_locks && 260 + return rcu_scheduler_active != RCU_SCHEDULER_INACTIVE && debug_locks && 265 261 current->lockdep_recursion == 0; 266 262 } 267 263 EXPORT_SYMBOL_GPL(debug_lockdep_rcu_enabled); ··· 595 591 void synchronize_rcu_tasks(void) 596 592 { 597 593 /* Complain if the scheduler has not started. */ 598 - RCU_LOCKDEP_WARN(!rcu_scheduler_active, 594 + RCU_LOCKDEP_WARN(rcu_scheduler_active == RCU_SCHEDULER_INACTIVE, 599 595 "synchronize_rcu_tasks called too soon"); 600 596 601 597 /* Wait for the grace period. */ ··· 817 813 818 814 #endif /* #ifdef CONFIG_TASKS_RCU */ 819 815 816 + /* 817 + * Test each non-SRCU synchronous grace-period wait API. This is 818 + * useful just after a change in mode for these primitives, and 819 + * during early boot. 820 + */ 821 + void rcu_test_sync_prims(void) 822 + { 823 + if (!IS_ENABLED(CONFIG_PROVE_RCU)) 824 + return; 825 + synchronize_rcu(); 826 + synchronize_rcu_bh(); 827 + synchronize_sched(); 828 + synchronize_rcu_expedited(); 829 + synchronize_rcu_bh_expedited(); 830 + synchronize_sched_expedited(); 831 + } 832 + 820 833 #ifdef CONFIG_PROVE_RCU 821 834 822 835 /* ··· 886 865 early_boot_test_call_rcu_bh(); 887 866 if (rcu_self_test_sched) 888 867 early_boot_test_call_rcu_sched(); 868 + rcu_test_sync_prims(); 889 869 } 890 870 891 871 static int rcu_verify_early_boot_tests(void)