Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux
1
fork

Configure Feed

Select the types of activity you want to include in your feed.

rcu: Re-implement RCU Tasks Trace in terms of SRCU-fast

This commit saves more than 500 lines of RCU code by re-implementing
RCU Tasks Trace in terms of SRCU-fast. Follow-up work will remove
more code that does not cause problems by its presence, but that is no
longer required.

This variant places smp_mb() in rcu_read_{,un}lock_trace(), and in the
same place that srcu_read_{,un}lock() would put them. These smp_mb()
calls will be removed on common-case architectures in a later commit.
In the meantime, it serves to enforce ordering between the underlying
srcu_read_{,un}lock_fast() markers and the intervening critical section,
even on architectures that permit attaching tracepoints on regions of
code not watched by RCU. Such architectures defeat SRCU-fast's use of
implicit single-instruction, interrupts-disabled, and atomic-operation
RCU read-side critical sections, which have no effect when RCU is not
watching. The aforementioned later commit will insert these smp_mb()
calls only on architectures that have not used noinstr to prevent
attaching tracepoints to code where RCU is not watching.

[ paulmck: Apply kernel test robot, Boqun Feng, and Zqiang feedback. ]
[ paulmck: Split out Tiny SRCU fixes per Andrii Nakryiko feedback. ]

Signed-off-by: Paul E. McKenney <paulmck@kernel.org>
Tested-by: kernel test robot <oliver.sang@intel.com>
Cc: Andrii Nakryiko <andrii@kernel.org>
Cc: Alexei Starovoitov <ast@kernel.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: bpf@vger.kernel.org
Reviewed-by: Joel Fernandes <joelagnelf@nvidia.com>
Signed-off-by: Boqun Feng <boqun.feng@gmail.com>

authored by

Paul E. McKenney and committed by
Boqun Feng
c27cea44 8f0b4cce

+99 -630
+79 -28
include/linux/rcupdate_trace.h
··· 12 12 #include <linux/rcupdate.h> 13 13 #include <linux/cleanup.h> 14 14 15 - extern struct lockdep_map rcu_trace_lock_map; 15 + #ifdef CONFIG_TASKS_TRACE_RCU 16 + extern struct srcu_struct rcu_tasks_trace_srcu_struct; 17 + #endif // #ifdef CONFIG_TASKS_TRACE_RCU 16 18 17 - #ifdef CONFIG_DEBUG_LOCK_ALLOC 19 + #if defined(CONFIG_DEBUG_LOCK_ALLOC) && defined(CONFIG_TASKS_TRACE_RCU) 18 20 19 21 static inline int rcu_read_lock_trace_held(void) 20 22 { 21 - return lock_is_held(&rcu_trace_lock_map); 23 + return srcu_read_lock_held(&rcu_tasks_trace_srcu_struct); 22 24 } 23 25 24 - #else /* #ifdef CONFIG_DEBUG_LOCK_ALLOC */ 26 + #else // #if defined(CONFIG_DEBUG_LOCK_ALLOC) && defined(CONFIG_TASKS_TRACE_RCU) 25 27 26 28 static inline int rcu_read_lock_trace_held(void) 27 29 { 28 30 return 1; 29 31 } 30 32 31 - #endif /* #else #ifdef CONFIG_DEBUG_LOCK_ALLOC */ 33 + #endif // #else // #if defined(CONFIG_DEBUG_LOCK_ALLOC) && defined(CONFIG_TASKS_TRACE_RCU) 32 34 33 35 #ifdef CONFIG_TASKS_TRACE_RCU 34 - 35 - void rcu_read_unlock_trace_special(struct task_struct *t); 36 36 37 37 /** 38 38 * rcu_read_lock_trace - mark beginning of RCU-trace read-side critical section ··· 50 50 { 51 51 struct task_struct *t = current; 52 52 53 - WRITE_ONCE(t->trc_reader_nesting, READ_ONCE(t->trc_reader_nesting) + 1); 54 - barrier(); 55 - if (IS_ENABLED(CONFIG_TASKS_TRACE_RCU_READ_MB) && 56 - t->trc_reader_special.b.need_mb) 57 - smp_mb(); // Pairs with update-side barriers 58 - rcu_lock_acquire(&rcu_trace_lock_map); 53 + if (t->trc_reader_nesting++) { 54 + // In case we interrupted a Tasks Trace RCU reader. 55 + rcu_try_lock_acquire(&rcu_tasks_trace_srcu_struct.dep_map); 56 + return; 57 + } 58 + barrier(); // nesting before scp to protect against interrupt handler. 59 + t->trc_reader_scp = srcu_read_lock_fast(&rcu_tasks_trace_srcu_struct); 60 + smp_mb(); // Placeholder for more selective ordering 59 61 } 60 62 61 63 /** ··· 71 69 */ 72 70 static inline void rcu_read_unlock_trace(void) 73 71 { 74 - int nesting; 72 + struct srcu_ctr __percpu *scp; 75 73 struct task_struct *t = current; 76 74 77 - rcu_lock_release(&rcu_trace_lock_map); 78 - nesting = READ_ONCE(t->trc_reader_nesting) - 1; 79 - barrier(); // Critical section before disabling. 80 - // Disable IPI-based setting of .need_qs. 81 - WRITE_ONCE(t->trc_reader_nesting, INT_MIN + nesting); 82 - if (likely(!READ_ONCE(t->trc_reader_special.s)) || nesting) { 83 - WRITE_ONCE(t->trc_reader_nesting, nesting); 84 - return; // We assume shallow reader nesting. 85 - } 86 - WARN_ON_ONCE(nesting != 0); 87 - rcu_read_unlock_trace_special(t); 75 + smp_mb(); // Placeholder for more selective ordering 76 + scp = t->trc_reader_scp; 77 + barrier(); // scp before nesting to protect against interrupt handler. 78 + if (!--t->trc_reader_nesting) 79 + srcu_read_unlock_fast(&rcu_tasks_trace_srcu_struct, scp); 80 + else 81 + srcu_lock_release(&rcu_tasks_trace_srcu_struct.dep_map); 88 82 } 89 83 90 - void call_rcu_tasks_trace(struct rcu_head *rhp, rcu_callback_t func); 91 - void synchronize_rcu_tasks_trace(void); 92 - void rcu_barrier_tasks_trace(void); 84 + /** 85 + * call_rcu_tasks_trace() - Queue a callback trace task-based grace period 86 + * @rhp: structure to be used for queueing the RCU updates. 87 + * @func: actual callback function to be invoked after the grace period 88 + * 89 + * The callback function will be invoked some time after a trace rcu-tasks 90 + * grace period elapses, in other words after all currently executing 91 + * trace rcu-tasks read-side critical sections have completed. These 92 + * read-side critical sections are delimited by calls to rcu_read_lock_trace() 93 + * and rcu_read_unlock_trace(). 94 + * 95 + * See the description of call_rcu() for more detailed information on 96 + * memory ordering guarantees. 97 + */ 98 + static inline void call_rcu_tasks_trace(struct rcu_head *rhp, rcu_callback_t func) 99 + { 100 + call_srcu(&rcu_tasks_trace_srcu_struct, rhp, func); 101 + } 102 + 103 + /** 104 + * synchronize_rcu_tasks_trace - wait for a trace rcu-tasks grace period 105 + * 106 + * Control will return to the caller some time after a trace rcu-tasks 107 + * grace period has elapsed, in other words after all currently executing 108 + * trace rcu-tasks read-side critical sections have elapsed. These read-side 109 + * critical sections are delimited by calls to rcu_read_lock_trace() 110 + * and rcu_read_unlock_trace(). 111 + * 112 + * This is a very specialized primitive, intended only for a few uses in 113 + * tracing and other situations requiring manipulation of function preambles 114 + * and profiling hooks. The synchronize_rcu_tasks_trace() function is not 115 + * (yet) intended for heavy use from multiple CPUs. 116 + * 117 + * See the description of synchronize_rcu() for more detailed information 118 + * on memory ordering guarantees. 119 + */ 120 + static inline void synchronize_rcu_tasks_trace(void) 121 + { 122 + synchronize_srcu(&rcu_tasks_trace_srcu_struct); 123 + } 124 + 125 + /** 126 + * rcu_barrier_tasks_trace - Wait for in-flight call_rcu_tasks_trace() callbacks. 127 + * 128 + * Note that rcu_barrier_tasks_trace() is not obligated to actually wait, 129 + * for example, if there are no pending callbacks. 130 + */ 131 + static inline void rcu_barrier_tasks_trace(void) 132 + { 133 + srcu_barrier(&rcu_tasks_trace_srcu_struct); 134 + } 135 + 136 + // Placeholders to enable stepwise transition. 137 + void rcu_tasks_trace_get_gp_data(int *flags, unsigned long *gp_seq); 138 + void __init rcu_tasks_trace_suppress_unused(void); 93 139 struct task_struct *get_rcu_tasks_trace_gp_kthread(void); 140 + 94 141 #else 95 142 /* 96 143 * The BPF JIT forms these addresses even when it doesn't call these
+1
include/linux/sched.h
··· 945 945 946 946 #ifdef CONFIG_TASKS_TRACE_RCU 947 947 int trc_reader_nesting; 948 + struct srcu_ctr __percpu *trc_reader_scp; 948 949 int trc_ipi_to_cpu; 949 950 union rcu_special trc_reader_special; 950 951 struct list_head trc_holdout_list;
+19 -602
kernel/rcu/tasks.h
··· 718 718 #endif /* #ifdef CONFIG_TASKS_TRACE_RCU */ 719 719 } 720 720 721 - 722 721 /* Dump out rcutorture-relevant state common to all RCU-tasks flavors. */ 723 722 static void show_rcu_tasks_generic_gp_kthread(struct rcu_tasks *rtp, char *s) 724 723 { ··· 802 803 803 804 static void exit_tasks_rcu_finish_trace(struct task_struct *t); 804 805 805 - #if defined(CONFIG_TASKS_RCU) || defined(CONFIG_TASKS_TRACE_RCU) 806 + #if defined(CONFIG_TASKS_RCU) 806 807 807 808 //////////////////////////////////////////////////////////////////////// 808 809 // ··· 897 898 rtp->postgp_func(rtp); 898 899 } 899 900 900 - #endif /* #if defined(CONFIG_TASKS_RCU) || defined(CONFIG_TASKS_TRACE_RCU) */ 901 + #endif /* #if defined(CONFIG_TASKS_RCU) */ 901 902 902 903 #ifdef CONFIG_TASKS_RCU 903 904 ··· 1452 1453 // 1453 1454 // Tracing variant of Tasks RCU. This variant is designed to be used 1454 1455 // to protect tracing hooks, including those of BPF. This variant 1455 - // therefore: 1456 - // 1457 - // 1. Has explicit read-side markers to allow finite grace periods 1458 - // in the face of in-kernel loops for PREEMPT=n builds. 1459 - // 1460 - // 2. Protects code in the idle loop, exception entry/exit, and 1461 - // CPU-hotplug code paths, similar to the capabilities of SRCU. 1462 - // 1463 - // 3. Avoids expensive read-side instructions, having overhead similar 1464 - // to that of Preemptible RCU. 1465 - // 1466 - // There are of course downsides. For example, the grace-period code 1467 - // can send IPIs to CPUs, even when those CPUs are in the idle loop or 1468 - // in nohz_full userspace. If needed, these downsides can be at least 1469 - // partially remedied. 1470 - // 1471 - // Perhaps most important, this variant of RCU does not affect the vanilla 1472 - // flavors, rcu_preempt and rcu_sched. The fact that RCU Tasks Trace 1473 - // readers can operate from idle, offline, and exception entry/exit in no 1474 - // way allows rcu_preempt and rcu_sched readers to also do so. 1475 - // 1476 - // The implementation uses rcu_tasks_wait_gp(), which relies on function 1477 - // pointers in the rcu_tasks structure. The rcu_spawn_tasks_trace_kthread() 1478 - // function sets these function pointers up so that rcu_tasks_wait_gp() 1479 - // invokes these functions in this order: 1480 - // 1481 - // rcu_tasks_trace_pregp_step(): 1482 - // Disables CPU hotplug, adds all currently executing tasks to the 1483 - // holdout list, then checks the state of all tasks that blocked 1484 - // or were preempted within their current RCU Tasks Trace read-side 1485 - // critical section, adding them to the holdout list if appropriate. 1486 - // Finally, this function re-enables CPU hotplug. 1487 - // The ->pertask_func() pointer is NULL, so there is no per-task processing. 1488 - // rcu_tasks_trace_postscan(): 1489 - // Invokes synchronize_rcu() to wait for late-stage exiting tasks 1490 - // to finish exiting. 1491 - // check_all_holdout_tasks_trace(), repeatedly until holdout list is empty: 1492 - // Scans the holdout list, attempting to identify a quiescent state 1493 - // for each task on the list. If there is a quiescent state, the 1494 - // corresponding task is removed from the holdout list. Once this 1495 - // list is empty, the grace period has completed. 1496 - // rcu_tasks_trace_postgp(): 1497 - // Provides the needed full memory barrier and does debug checks. 1498 - // 1499 - // The exit_tasks_rcu_finish_trace() synchronizes with exiting tasks. 1500 - // 1501 - // Pre-grace-period update-side code is ordered before the grace period 1502 - // via the ->cbs_lock and barriers in rcu_tasks_kthread(). Pre-grace-period 1503 - // read-side code is ordered before the grace period by atomic operations 1504 - // on .b.need_qs flag of each task involved in this process, or by scheduler 1505 - // context-switch ordering (for locked-down non-running readers). 1506 - 1507 - // The lockdep state must be outside of #ifdef to be useful. 1508 - #ifdef CONFIG_DEBUG_LOCK_ALLOC 1509 - static struct lock_class_key rcu_lock_trace_key; 1510 - struct lockdep_map rcu_trace_lock_map = 1511 - STATIC_LOCKDEP_MAP_INIT("rcu_read_lock_trace", &rcu_lock_trace_key); 1512 - EXPORT_SYMBOL_GPL(rcu_trace_lock_map); 1513 - #endif /* #ifdef CONFIG_DEBUG_LOCK_ALLOC */ 1456 + // is implemented via a straightforward mapping onto SRCU-fast. 1514 1457 1515 1458 #ifdef CONFIG_TASKS_TRACE_RCU 1516 1459 1517 - // Record outstanding IPIs to each CPU. No point in sending two... 1518 - static DEFINE_PER_CPU(bool, trc_ipi_to_cpu); 1460 + DEFINE_SRCU_FAST(rcu_tasks_trace_srcu_struct); 1461 + EXPORT_SYMBOL_GPL(rcu_tasks_trace_srcu_struct); 1519 1462 1520 - // The number of detections of task quiescent state relying on 1521 - // heavyweight readers executing explicit memory barriers. 1522 - static unsigned long n_heavy_reader_attempts; 1523 - static unsigned long n_heavy_reader_updates; 1524 - static unsigned long n_heavy_reader_ofl_updates; 1525 - static unsigned long n_trc_holdouts; 1526 - 1527 - void call_rcu_tasks_trace(struct rcu_head *rhp, rcu_callback_t func); 1528 - DEFINE_RCU_TASKS(rcu_tasks_trace, rcu_tasks_wait_gp, call_rcu_tasks_trace, 1529 - "RCU Tasks Trace"); 1530 - 1531 - /* Load from ->trc_reader_special.b.need_qs with proper ordering. */ 1532 - static u8 rcu_ld_need_qs(struct task_struct *t) 1463 + // Placeholder to suppress build errors through transition period. 1464 + void __init rcu_tasks_trace_suppress_unused(void) 1533 1465 { 1534 - smp_mb(); // Enforce full grace-period ordering. 1535 - return smp_load_acquire(&t->trc_reader_special.b.need_qs); 1536 - } 1537 - 1538 - /* Store to ->trc_reader_special.b.need_qs with proper ordering. */ 1539 - static void rcu_st_need_qs(struct task_struct *t, u8 v) 1540 - { 1541 - smp_store_release(&t->trc_reader_special.b.need_qs, v); 1542 - smp_mb(); // Enforce full grace-period ordering. 1466 + #ifndef CONFIG_TINY_RCU 1467 + show_rcu_tasks_generic_gp_kthread(NULL, NULL); 1468 + #endif // #ifndef CONFIG_TINY_RCU 1469 + rcu_spawn_tasks_kthread_generic(NULL); 1470 + synchronize_rcu_tasks_generic(NULL); 1471 + call_rcu_tasks_generic(NULL, NULL, NULL); 1472 + call_rcu_tasks_iw_wakeup(NULL); 1473 + cblist_init_generic(NULL); 1474 + #ifndef CONFIG_TINY_RCU 1475 + rcu_tasks_torture_stats_print_generic(NULL, NULL, NULL, NULL); 1476 + #endif // #ifndef CONFIG_TINY_RCU 1543 1477 } 1544 1478 1545 1479 /* ··· 1487 1555 } 1488 1556 EXPORT_SYMBOL_GPL(rcu_trc_cmpxchg_need_qs); 1489 1557 1490 - /* 1491 - * If we are the last reader, signal the grace-period kthread. 1492 - * Also remove from the per-CPU list of blocked tasks. 1493 - */ 1494 - void rcu_read_unlock_trace_special(struct task_struct *t) 1495 - { 1496 - unsigned long flags; 1497 - struct rcu_tasks_percpu *rtpcp; 1498 - union rcu_special trs; 1499 - 1500 - // Open-coded full-word version of rcu_ld_need_qs(). 1501 - smp_mb(); // Enforce full grace-period ordering. 1502 - trs = smp_load_acquire(&t->trc_reader_special); 1503 - 1504 - if (IS_ENABLED(CONFIG_TASKS_TRACE_RCU_READ_MB) && t->trc_reader_special.b.need_mb) 1505 - smp_mb(); // Pairs with update-side barriers. 1506 - // Update .need_qs before ->trc_reader_nesting for irq/NMI handlers. 1507 - if (trs.b.need_qs == (TRC_NEED_QS_CHECKED | TRC_NEED_QS)) { 1508 - u8 result = rcu_trc_cmpxchg_need_qs(t, TRC_NEED_QS_CHECKED | TRC_NEED_QS, 1509 - TRC_NEED_QS_CHECKED); 1510 - 1511 - WARN_ONCE(result != trs.b.need_qs, "%s: result = %d", __func__, result); 1512 - } 1513 - if (trs.b.blocked) { 1514 - rtpcp = per_cpu_ptr(rcu_tasks_trace.rtpcpu, t->trc_blkd_cpu); 1515 - raw_spin_lock_irqsave_rcu_node(rtpcp, flags); 1516 - list_del_init(&t->trc_blkd_node); 1517 - WRITE_ONCE(t->trc_reader_special.b.blocked, false); 1518 - raw_spin_unlock_irqrestore_rcu_node(rtpcp, flags); 1519 - } 1520 - WRITE_ONCE(t->trc_reader_nesting, 0); 1521 - } 1522 - EXPORT_SYMBOL_GPL(rcu_read_unlock_trace_special); 1523 - 1524 1558 /* Add a newly blocked reader task to its CPU's list. */ 1525 1559 void rcu_tasks_trace_qs_blkd(struct task_struct *t) 1526 1560 { 1527 - unsigned long flags; 1528 - struct rcu_tasks_percpu *rtpcp; 1529 - 1530 - local_irq_save(flags); 1531 - rtpcp = this_cpu_ptr(rcu_tasks_trace.rtpcpu); 1532 - raw_spin_lock_rcu_node(rtpcp); // irqs already disabled 1533 - t->trc_blkd_cpu = smp_processor_id(); 1534 - if (!rtpcp->rtp_blkd_tasks.next) 1535 - INIT_LIST_HEAD(&rtpcp->rtp_blkd_tasks); 1536 - list_add(&t->trc_blkd_node, &rtpcp->rtp_blkd_tasks); 1537 - WRITE_ONCE(t->trc_reader_special.b.blocked, true); 1538 - raw_spin_unlock_irqrestore_rcu_node(rtpcp, flags); 1539 1561 } 1540 1562 EXPORT_SYMBOL_GPL(rcu_tasks_trace_qs_blkd); 1541 - 1542 - /* Add a task to the holdout list, if it is not already on the list. */ 1543 - static void trc_add_holdout(struct task_struct *t, struct list_head *bhp) 1544 - { 1545 - if (list_empty(&t->trc_holdout_list)) { 1546 - get_task_struct(t); 1547 - list_add(&t->trc_holdout_list, bhp); 1548 - n_trc_holdouts++; 1549 - } 1550 - } 1551 - 1552 - /* Remove a task from the holdout list, if it is in fact present. */ 1553 - static void trc_del_holdout(struct task_struct *t) 1554 - { 1555 - if (!list_empty(&t->trc_holdout_list)) { 1556 - list_del_init(&t->trc_holdout_list); 1557 - put_task_struct(t); 1558 - n_trc_holdouts--; 1559 - } 1560 - } 1561 - 1562 - /* IPI handler to check task state. */ 1563 - static void trc_read_check_handler(void *t_in) 1564 - { 1565 - int nesting; 1566 - struct task_struct *t = current; 1567 - struct task_struct *texp = t_in; 1568 - 1569 - // If the task is no longer running on this CPU, leave. 1570 - if (unlikely(texp != t)) 1571 - goto reset_ipi; // Already on holdout list, so will check later. 1572 - 1573 - // If the task is not in a read-side critical section, and 1574 - // if this is the last reader, awaken the grace-period kthread. 1575 - nesting = READ_ONCE(t->trc_reader_nesting); 1576 - if (likely(!nesting)) { 1577 - rcu_trc_cmpxchg_need_qs(t, 0, TRC_NEED_QS_CHECKED); 1578 - goto reset_ipi; 1579 - } 1580 - // If we are racing with an rcu_read_unlock_trace(), try again later. 1581 - if (unlikely(nesting < 0)) 1582 - goto reset_ipi; 1583 - 1584 - // Get here if the task is in a read-side critical section. 1585 - // Set its state so that it will update state for the grace-period 1586 - // kthread upon exit from that critical section. 1587 - rcu_trc_cmpxchg_need_qs(t, 0, TRC_NEED_QS | TRC_NEED_QS_CHECKED); 1588 - 1589 - reset_ipi: 1590 - // Allow future IPIs to be sent on CPU and for task. 1591 - // Also order this IPI handler against any later manipulations of 1592 - // the intended task. 1593 - smp_store_release(per_cpu_ptr(&trc_ipi_to_cpu, smp_processor_id()), false); // ^^^ 1594 - smp_store_release(&texp->trc_ipi_to_cpu, -1); // ^^^ 1595 - } 1596 - 1597 - /* Callback function for scheduler to check locked-down task. */ 1598 - static int trc_inspect_reader(struct task_struct *t, void *bhp_in) 1599 - { 1600 - struct list_head *bhp = bhp_in; 1601 - int cpu = task_cpu(t); 1602 - int nesting; 1603 - bool ofl = cpu_is_offline(cpu); 1604 - 1605 - if (task_curr(t) && !ofl) { 1606 - // If no chance of heavyweight readers, do it the hard way. 1607 - if (!IS_ENABLED(CONFIG_TASKS_TRACE_RCU_READ_MB)) 1608 - return -EINVAL; 1609 - 1610 - // If heavyweight readers are enabled on the remote task, 1611 - // we can inspect its state despite its currently running. 1612 - // However, we cannot safely change its state. 1613 - n_heavy_reader_attempts++; 1614 - // Check for "running" idle tasks on offline CPUs. 1615 - if (!rcu_watching_zero_in_eqs(cpu, &t->trc_reader_nesting)) 1616 - return -EINVAL; // No quiescent state, do it the hard way. 1617 - n_heavy_reader_updates++; 1618 - nesting = 0; 1619 - } else { 1620 - // The task is not running, so C-language access is safe. 1621 - nesting = t->trc_reader_nesting; 1622 - WARN_ON_ONCE(ofl && task_curr(t) && (t != idle_task(task_cpu(t)))); 1623 - if (IS_ENABLED(CONFIG_TASKS_TRACE_RCU_READ_MB) && ofl) 1624 - n_heavy_reader_ofl_updates++; 1625 - } 1626 - 1627 - // If not exiting a read-side critical section, mark as checked 1628 - // so that the grace-period kthread will remove it from the 1629 - // holdout list. 1630 - if (!nesting) { 1631 - rcu_trc_cmpxchg_need_qs(t, 0, TRC_NEED_QS_CHECKED); 1632 - return 0; // In QS, so done. 1633 - } 1634 - if (nesting < 0) 1635 - return -EINVAL; // Reader transitioning, try again later. 1636 - 1637 - // The task is in a read-side critical section, so set up its 1638 - // state so that it will update state upon exit from that critical 1639 - // section. 1640 - if (!rcu_trc_cmpxchg_need_qs(t, 0, TRC_NEED_QS | TRC_NEED_QS_CHECKED)) 1641 - trc_add_holdout(t, bhp); 1642 - return 0; 1643 - } 1644 - 1645 - /* Attempt to extract the state for the specified task. */ 1646 - static void trc_wait_for_one_reader(struct task_struct *t, 1647 - struct list_head *bhp) 1648 - { 1649 - int cpu; 1650 - 1651 - // If a previous IPI is still in flight, let it complete. 1652 - if (smp_load_acquire(&t->trc_ipi_to_cpu) != -1) // Order IPI 1653 - return; 1654 - 1655 - // The current task had better be in a quiescent state. 1656 - if (t == current) { 1657 - rcu_trc_cmpxchg_need_qs(t, 0, TRC_NEED_QS_CHECKED); 1658 - WARN_ON_ONCE(READ_ONCE(t->trc_reader_nesting)); 1659 - return; 1660 - } 1661 - 1662 - // Attempt to nail down the task for inspection. 1663 - get_task_struct(t); 1664 - if (!task_call_func(t, trc_inspect_reader, bhp)) { 1665 - put_task_struct(t); 1666 - return; 1667 - } 1668 - put_task_struct(t); 1669 - 1670 - // If this task is not yet on the holdout list, then we are in 1671 - // an RCU read-side critical section. Otherwise, the invocation of 1672 - // trc_add_holdout() that added it to the list did the necessary 1673 - // get_task_struct(). Either way, the task cannot be freed out 1674 - // from under this code. 1675 - 1676 - // If currently running, send an IPI, either way, add to list. 1677 - trc_add_holdout(t, bhp); 1678 - if (task_curr(t) && 1679 - time_after(jiffies + 1, rcu_tasks_trace.gp_start + rcu_task_ipi_delay)) { 1680 - // The task is currently running, so try IPIing it. 1681 - cpu = task_cpu(t); 1682 - 1683 - // If there is already an IPI outstanding, let it happen. 1684 - if (per_cpu(trc_ipi_to_cpu, cpu) || t->trc_ipi_to_cpu >= 0) 1685 - return; 1686 - 1687 - per_cpu(trc_ipi_to_cpu, cpu) = true; 1688 - t->trc_ipi_to_cpu = cpu; 1689 - rcu_tasks_trace.n_ipis++; 1690 - if (smp_call_function_single(cpu, trc_read_check_handler, t, 0)) { 1691 - // Just in case there is some other reason for 1692 - // failure than the target CPU being offline. 1693 - WARN_ONCE(1, "%s(): smp_call_function_single() failed for CPU: %d\n", 1694 - __func__, cpu); 1695 - rcu_tasks_trace.n_ipis_fails++; 1696 - per_cpu(trc_ipi_to_cpu, cpu) = false; 1697 - t->trc_ipi_to_cpu = -1; 1698 - } 1699 - } 1700 - } 1701 - 1702 - /* 1703 - * Initialize for first-round processing for the specified task. 1704 - * Return false if task is NULL or already taken care of, true otherwise. 1705 - */ 1706 - static bool rcu_tasks_trace_pertask_prep(struct task_struct *t, bool notself) 1707 - { 1708 - // During early boot when there is only the one boot CPU, there 1709 - // is no idle task for the other CPUs. Also, the grace-period 1710 - // kthread is always in a quiescent state. In addition, just return 1711 - // if this task is already on the list. 1712 - if (unlikely(t == NULL) || (t == current && notself) || !list_empty(&t->trc_holdout_list)) 1713 - return false; 1714 - 1715 - rcu_st_need_qs(t, 0); 1716 - t->trc_ipi_to_cpu = -1; 1717 - return true; 1718 - } 1719 - 1720 - /* Do first-round processing for the specified task. */ 1721 - static void rcu_tasks_trace_pertask(struct task_struct *t, struct list_head *hop) 1722 - { 1723 - if (rcu_tasks_trace_pertask_prep(t, true)) 1724 - trc_wait_for_one_reader(t, hop); 1725 - } 1726 - 1727 - /* Initialize for a new RCU-tasks-trace grace period. */ 1728 - static void rcu_tasks_trace_pregp_step(struct list_head *hop) 1729 - { 1730 - LIST_HEAD(blkd_tasks); 1731 - int cpu; 1732 - unsigned long flags; 1733 - struct rcu_tasks_percpu *rtpcp; 1734 - struct task_struct *t; 1735 - 1736 - // There shouldn't be any old IPIs, but... 1737 - for_each_possible_cpu(cpu) 1738 - WARN_ON_ONCE(per_cpu(trc_ipi_to_cpu, cpu)); 1739 - 1740 - // Disable CPU hotplug across the CPU scan for the benefit of 1741 - // any IPIs that might be needed. This also waits for all readers 1742 - // in CPU-hotplug code paths. 1743 - cpus_read_lock(); 1744 - 1745 - // These rcu_tasks_trace_pertask_prep() calls are serialized to 1746 - // allow safe access to the hop list. 1747 - for_each_online_cpu(cpu) { 1748 - rcu_read_lock(); 1749 - // Note that cpu_curr_snapshot() picks up the target 1750 - // CPU's current task while its runqueue is locked with 1751 - // an smp_mb__after_spinlock(). This ensures that either 1752 - // the grace-period kthread will see that task's read-side 1753 - // critical section or the task will see the updater's pre-GP 1754 - // accesses. The trailing smp_mb() in cpu_curr_snapshot() 1755 - // does not currently play a role other than simplify 1756 - // that function's ordering semantics. If these simplified 1757 - // ordering semantics continue to be redundant, that smp_mb() 1758 - // might be removed. 1759 - t = cpu_curr_snapshot(cpu); 1760 - if (rcu_tasks_trace_pertask_prep(t, true)) 1761 - trc_add_holdout(t, hop); 1762 - rcu_read_unlock(); 1763 - cond_resched_tasks_rcu_qs(); 1764 - } 1765 - 1766 - // Only after all running tasks have been accounted for is it 1767 - // safe to take care of the tasks that have blocked within their 1768 - // current RCU tasks trace read-side critical section. 1769 - for_each_possible_cpu(cpu) { 1770 - rtpcp = per_cpu_ptr(rcu_tasks_trace.rtpcpu, cpu); 1771 - raw_spin_lock_irqsave_rcu_node(rtpcp, flags); 1772 - list_splice_init(&rtpcp->rtp_blkd_tasks, &blkd_tasks); 1773 - while (!list_empty(&blkd_tasks)) { 1774 - rcu_read_lock(); 1775 - t = list_first_entry(&blkd_tasks, struct task_struct, trc_blkd_node); 1776 - list_del_init(&t->trc_blkd_node); 1777 - list_add(&t->trc_blkd_node, &rtpcp->rtp_blkd_tasks); 1778 - raw_spin_unlock_irqrestore_rcu_node(rtpcp, flags); 1779 - rcu_tasks_trace_pertask(t, hop); 1780 - rcu_read_unlock(); 1781 - raw_spin_lock_irqsave_rcu_node(rtpcp, flags); 1782 - } 1783 - raw_spin_unlock_irqrestore_rcu_node(rtpcp, flags); 1784 - cond_resched_tasks_rcu_qs(); 1785 - } 1786 - 1787 - // Re-enable CPU hotplug now that the holdout list is populated. 1788 - cpus_read_unlock(); 1789 - } 1790 - 1791 - /* 1792 - * Do intermediate processing between task and holdout scans. 1793 - */ 1794 - static void rcu_tasks_trace_postscan(struct list_head *hop) 1795 - { 1796 - // Wait for late-stage exiting tasks to finish exiting. 1797 - // These might have passed the call to exit_tasks_rcu_finish(). 1798 - 1799 - // If you remove the following line, update rcu_trace_implies_rcu_gp()!!! 1800 - synchronize_rcu(); 1801 - // Any tasks that exit after this point will set 1802 - // TRC_NEED_QS_CHECKED in ->trc_reader_special.b.need_qs. 1803 - } 1804 1563 1805 1564 /* Communicate task state back to the RCU tasks trace stall warning request. */ 1806 1565 struct trc_stall_chk_rdr { ··· 1500 1877 u8 needqs; 1501 1878 }; 1502 1879 1503 - static int trc_check_slow_task(struct task_struct *t, void *arg) 1504 - { 1505 - struct trc_stall_chk_rdr *trc_rdrp = arg; 1506 - 1507 - if (task_curr(t) && cpu_online(task_cpu(t))) 1508 - return false; // It is running, so decline to inspect it. 1509 - trc_rdrp->nesting = READ_ONCE(t->trc_reader_nesting); 1510 - trc_rdrp->ipi_to_cpu = READ_ONCE(t->trc_ipi_to_cpu); 1511 - trc_rdrp->needqs = rcu_ld_need_qs(t); 1512 - return true; 1513 - } 1514 - 1515 - /* Show the state of a task stalling the current RCU tasks trace GP. */ 1516 - static void show_stalled_task_trace(struct task_struct *t, bool *firstreport) 1517 - { 1518 - int cpu; 1519 - struct trc_stall_chk_rdr trc_rdr; 1520 - bool is_idle_tsk = is_idle_task(t); 1521 - 1522 - if (*firstreport) { 1523 - pr_err("INFO: rcu_tasks_trace detected stalls on tasks:\n"); 1524 - *firstreport = false; 1525 - } 1526 - cpu = task_cpu(t); 1527 - if (!task_call_func(t, trc_check_slow_task, &trc_rdr)) 1528 - pr_alert("P%d: %c%c\n", 1529 - t->pid, 1530 - ".I"[t->trc_ipi_to_cpu >= 0], 1531 - ".i"[is_idle_tsk]); 1532 - else 1533 - pr_alert("P%d: %c%c%c%c nesting: %d%c%c cpu: %d%s\n", 1534 - t->pid, 1535 - ".I"[trc_rdr.ipi_to_cpu >= 0], 1536 - ".i"[is_idle_tsk], 1537 - ".N"[cpu >= 0 && tick_nohz_full_cpu(cpu)], 1538 - ".B"[!!data_race(t->trc_reader_special.b.blocked)], 1539 - trc_rdr.nesting, 1540 - " !CN"[trc_rdr.needqs & 0x3], 1541 - " ?"[trc_rdr.needqs > 0x3], 1542 - cpu, cpu_online(cpu) ? "" : "(offline)"); 1543 - sched_show_task(t); 1544 - } 1545 - 1546 - /* List stalled IPIs for RCU tasks trace. */ 1547 - static void show_stalled_ipi_trace(void) 1548 - { 1549 - int cpu; 1550 - 1551 - for_each_possible_cpu(cpu) 1552 - if (per_cpu(trc_ipi_to_cpu, cpu)) 1553 - pr_alert("\tIPI outstanding to CPU %d\n", cpu); 1554 - } 1555 - 1556 - /* Do one scan of the holdout list. */ 1557 - static void check_all_holdout_tasks_trace(struct list_head *hop, 1558 - bool needreport, bool *firstreport) 1559 - { 1560 - struct task_struct *g, *t; 1561 - 1562 - // Disable CPU hotplug across the holdout list scan for IPIs. 1563 - cpus_read_lock(); 1564 - 1565 - list_for_each_entry_safe(t, g, hop, trc_holdout_list) { 1566 - // If safe and needed, try to check the current task. 1567 - if (READ_ONCE(t->trc_ipi_to_cpu) == -1 && 1568 - !(rcu_ld_need_qs(t) & TRC_NEED_QS_CHECKED)) 1569 - trc_wait_for_one_reader(t, hop); 1570 - 1571 - // If check succeeded, remove this task from the list. 1572 - if (smp_load_acquire(&t->trc_ipi_to_cpu) == -1 && 1573 - rcu_ld_need_qs(t) == TRC_NEED_QS_CHECKED) 1574 - trc_del_holdout(t); 1575 - else if (needreport) 1576 - show_stalled_task_trace(t, firstreport); 1577 - cond_resched_tasks_rcu_qs(); 1578 - } 1579 - 1580 - // Re-enable CPU hotplug now that the holdout list scan has completed. 1581 - cpus_read_unlock(); 1582 - 1583 - if (needreport) { 1584 - if (*firstreport) 1585 - pr_err("INFO: rcu_tasks_trace detected stalls? (Late IPI?)\n"); 1586 - show_stalled_ipi_trace(); 1587 - } 1588 - } 1589 - 1590 - static void rcu_tasks_trace_empty_fn(void *unused) 1591 - { 1592 - } 1593 - 1594 - /* Wait for grace period to complete and provide ordering. */ 1595 - static void rcu_tasks_trace_postgp(struct rcu_tasks *rtp) 1596 - { 1597 - int cpu; 1598 - 1599 - // Wait for any lingering IPI handlers to complete. Note that 1600 - // if a CPU has gone offline or transitioned to userspace in the 1601 - // meantime, all IPI handlers should have been drained beforehand. 1602 - // Yes, this assumes that CPUs process IPIs in order. If that ever 1603 - // changes, there will need to be a recheck and/or timed wait. 1604 - for_each_online_cpu(cpu) 1605 - if (WARN_ON_ONCE(smp_load_acquire(per_cpu_ptr(&trc_ipi_to_cpu, cpu)))) 1606 - smp_call_function_single(cpu, rcu_tasks_trace_empty_fn, NULL, 1); 1607 - 1608 - smp_mb(); // Caller's code must be ordered after wakeup. 1609 - // Pairs with pretty much every ordering primitive. 1610 - } 1611 - 1612 1880 /* Report any needed quiescent state for this exiting task. */ 1613 1881 static void exit_tasks_rcu_finish_trace(struct task_struct *t) 1614 1882 { 1615 - union rcu_special trs = READ_ONCE(t->trc_reader_special); 1616 - 1617 - rcu_trc_cmpxchg_need_qs(t, 0, TRC_NEED_QS_CHECKED); 1618 - WARN_ON_ONCE(READ_ONCE(t->trc_reader_nesting)); 1619 - if (WARN_ON_ONCE(rcu_ld_need_qs(t) & TRC_NEED_QS || trs.b.blocked)) 1620 - rcu_read_unlock_trace_special(t); 1621 - else 1622 - WRITE_ONCE(t->trc_reader_nesting, 0); 1623 1883 } 1624 - 1625 - /** 1626 - * call_rcu_tasks_trace() - Queue a callback trace task-based grace period 1627 - * @rhp: structure to be used for queueing the RCU updates. 1628 - * @func: actual callback function to be invoked after the grace period 1629 - * 1630 - * The callback function will be invoked some time after a trace rcu-tasks 1631 - * grace period elapses, in other words after all currently executing 1632 - * trace rcu-tasks read-side critical sections have completed. These 1633 - * read-side critical sections are delimited by calls to rcu_read_lock_trace() 1634 - * and rcu_read_unlock_trace(). 1635 - * 1636 - * See the description of call_rcu() for more detailed information on 1637 - * memory ordering guarantees. 1638 - */ 1639 - void call_rcu_tasks_trace(struct rcu_head *rhp, rcu_callback_t func) 1640 - { 1641 - call_rcu_tasks_generic(rhp, func, &rcu_tasks_trace); 1642 - } 1643 - EXPORT_SYMBOL_GPL(call_rcu_tasks_trace); 1644 - 1645 - /** 1646 - * synchronize_rcu_tasks_trace - wait for a trace rcu-tasks grace period 1647 - * 1648 - * Control will return to the caller some time after a trace rcu-tasks 1649 - * grace period has elapsed, in other words after all currently executing 1650 - * trace rcu-tasks read-side critical sections have elapsed. These read-side 1651 - * critical sections are delimited by calls to rcu_read_lock_trace() 1652 - * and rcu_read_unlock_trace(). 1653 - * 1654 - * This is a very specialized primitive, intended only for a few uses in 1655 - * tracing and other situations requiring manipulation of function preambles 1656 - * and profiling hooks. The synchronize_rcu_tasks_trace() function is not 1657 - * (yet) intended for heavy use from multiple CPUs. 1658 - * 1659 - * See the description of synchronize_rcu() for more detailed information 1660 - * on memory ordering guarantees. 1661 - */ 1662 - void synchronize_rcu_tasks_trace(void) 1663 - { 1664 - RCU_LOCKDEP_WARN(lock_is_held(&rcu_trace_lock_map), "Illegal synchronize_rcu_tasks_trace() in RCU Tasks Trace read-side critical section"); 1665 - synchronize_rcu_tasks_generic(&rcu_tasks_trace); 1666 - } 1667 - EXPORT_SYMBOL_GPL(synchronize_rcu_tasks_trace); 1668 - 1669 - /** 1670 - * rcu_barrier_tasks_trace - Wait for in-flight call_rcu_tasks_trace() callbacks. 1671 - * 1672 - * Although the current implementation is guaranteed to wait, it is not 1673 - * obligated to, for example, if there are no pending callbacks. 1674 - */ 1675 - void rcu_barrier_tasks_trace(void) 1676 - { 1677 - rcu_barrier_tasks_generic(&rcu_tasks_trace); 1678 - } 1679 - EXPORT_SYMBOL_GPL(rcu_barrier_tasks_trace); 1680 1884 1681 1885 int rcu_tasks_trace_lazy_ms = -1; 1682 1886 module_param(rcu_tasks_trace_lazy_ms, int, 0444); 1683 1887 1684 1888 static int __init rcu_spawn_tasks_trace_kthread(void) 1685 1889 { 1686 - if (IS_ENABLED(CONFIG_TASKS_TRACE_RCU_READ_MB)) { 1687 - rcu_tasks_trace.gp_sleep = HZ / 10; 1688 - rcu_tasks_trace.init_fract = HZ / 10; 1689 - } else { 1690 - rcu_tasks_trace.gp_sleep = HZ / 200; 1691 - if (rcu_tasks_trace.gp_sleep <= 0) 1692 - rcu_tasks_trace.gp_sleep = 1; 1693 - rcu_tasks_trace.init_fract = HZ / 200; 1694 - if (rcu_tasks_trace.init_fract <= 0) 1695 - rcu_tasks_trace.init_fract = 1; 1696 - } 1697 - if (rcu_tasks_trace_lazy_ms >= 0) 1698 - rcu_tasks_trace.lazy_jiffies = msecs_to_jiffies(rcu_tasks_trace_lazy_ms); 1699 - rcu_tasks_trace.pregp_func = rcu_tasks_trace_pregp_step; 1700 - rcu_tasks_trace.postscan_func = rcu_tasks_trace_postscan; 1701 - rcu_tasks_trace.holdouts_func = check_all_holdout_tasks_trace; 1702 - rcu_tasks_trace.postgp_func = rcu_tasks_trace_postgp; 1703 - rcu_spawn_tasks_kthread_generic(&rcu_tasks_trace); 1704 1890 return 0; 1705 1891 } 1706 1892 1707 1893 #if !defined(CONFIG_TINY_RCU) 1708 1894 void show_rcu_tasks_trace_gp_kthread(void) 1709 1895 { 1710 - char buf[64]; 1711 - 1712 - snprintf(buf, sizeof(buf), "N%lu h:%lu/%lu/%lu", 1713 - data_race(n_trc_holdouts), 1714 - data_race(n_heavy_reader_ofl_updates), 1715 - data_race(n_heavy_reader_updates), 1716 - data_race(n_heavy_reader_attempts)); 1717 - show_rcu_tasks_generic_gp_kthread(&rcu_tasks_trace, buf); 1718 1896 } 1719 1897 EXPORT_SYMBOL_GPL(show_rcu_tasks_trace_gp_kthread); 1720 1898 1721 1899 void rcu_tasks_trace_torture_stats_print(char *tt, char *tf) 1722 1900 { 1723 - rcu_tasks_torture_stats_print_generic(&rcu_tasks_trace, tt, tf, ""); 1724 1901 } 1725 1902 EXPORT_SYMBOL_GPL(rcu_tasks_trace_torture_stats_print); 1726 1903 #endif // !defined(CONFIG_TINY_RCU) 1727 1904 1728 1905 struct task_struct *get_rcu_tasks_trace_gp_kthread(void) 1729 1906 { 1730 - return rcu_tasks_trace.kthread_ptr; 1907 + return NULL; 1731 1908 } 1732 1909 EXPORT_SYMBOL_GPL(get_rcu_tasks_trace_gp_kthread); 1733 1910 1734 1911 void rcu_tasks_trace_get_gp_data(int *flags, unsigned long *gp_seq) 1735 1912 { 1736 - *flags = 0; 1737 - *gp_seq = rcu_seq_current(&rcu_tasks_trace.tasks_gp_seq); 1738 1913 } 1739 1914 EXPORT_SYMBOL_GPL(rcu_tasks_trace_get_gp_data); 1740 1915 ··· 1671 2250 1672 2251 #ifdef CONFIG_TASKS_RUDE_RCU 1673 2252 cblist_init_generic(&rcu_tasks_rude); 1674 - #endif 1675 - 1676 - #ifdef CONFIG_TASKS_TRACE_RCU 1677 - cblist_init_generic(&rcu_tasks_trace); 1678 2253 #endif 1679 2254 } 1680 2255