Merge tag 'trace-ringbuffer-v6.14-2' of git://git.kernel.org/pub/scm/linux/kernel/git/trace/linux-trace

Pull trace fing buffer fix from Steven Rostedt:
"Fix atomic64 operations on some architectures for the tracing ring
buffer:

- Have emulating atomic64 use arch_spin_locks instead of
raw_spin_locks

The tracing ring buffer events have a small timestamp that holds
the delta between itself and the event before it. But this can be
tricky to update when interrupts come in. It originally just set
the deltas to zero for events that interrupted the adding of
another event which made all the events in the interrupt have the
same timestamp as the event it interrupted. This was not suitable
for many tools, so it was eventually fixed. But that fix required
adding an atomic64 cmpxchg on the timestamp in cases where an event
was added while another event was in the process of being added.

Originally, for 32 bit architectures, the manipulation of the 64
bit timestamp was done by a structure that held multiple 32bit
words to hold parts of the timestamp and a counter. But as updates
to the ring buffer were done, maintaining this became too complex
and was replaced by the atomic64 generic operations which are now
used by both 64bit and 32bit architectures. Shortly after that, it
was reported that riscv32 and other 32 bit architectures that just
used the generic atomic64 were locking up. This was because the
generic atomic64 operations defined in lib/atomic64.c uses a
raw_spin_lock() to emulate an atomic64 operation. The problem here
was that raw_spin_lock() can also be traced by the function tracer
(which is commonly used for debugging raw spin locks). Since the
function tracer uses the tracing ring buffer, which now is being
traced internally, this was triggering a recursion and setting off
a warning that the spin locks were recusing.

There's no reason for the code that emulates atomic64 operations to
be using raw_spin_locks which have a lot of debugging
infrastructure attached to them (depending on the config options).
Instead it should be using the arch_spin_lock() which does not have
any infrastructure attached to them and is used by low level
infrastructure like RCU locks, lockdep and of course tracing. Using
arch_spin_lock()s fixes this issue.

- Do not trace in NMI if the architecture uses emulated atomic64
operations

Another issue with using the emulated atomic64 operations that uses
spin locks to emulate the atomic64 operations is that they cannot
be used in NMI context. As an NMI can trigger while holding the
atomic64 spin locks it can try to take the same lock and cause a
deadlock.

Have the ring buffer fail recording events if in NMI context and
the architecture uses the emulated atomic64 operations"

* tag 'trace-ringbuffer-v6.14-2' of git://git.kernel.org/pub/scm/linux/kernel/git/trace/linux-trace:
atomic64: Use arch_spin_locks instead of raw_spin_locks
ring-buffer: Do not allow events in NMI with generic atomic64 cmpxchg()

Linus Torvalds 1 year ago 606489db 7c1badb2

+55 -32

2 changed files

expand all

kernel

trace

ring_buffer.c

lib

atomic64.c

+7 -2

kernel/trace/ring_buffer.c

··· 4398 4398 int nr_loops = 0; 4399 4399 int add_ts_default; 4400 4400 4401 - /* ring buffer does cmpxchg, make sure it is safe in NMI context */ 4402 - if (!IS_ENABLED(CONFIG_ARCH_HAVE_NMI_SAFE_CMPXCHG) && 4401 + /* 4402 + * ring buffer does cmpxchg as well as atomic64 operations 4403 + * (which some archs use locking for atomic64), make sure this 4404 + * is safe in NMI context 4405 + */ 4406 + if ((!IS_ENABLED(CONFIG_ARCH_HAVE_NMI_SAFE_CMPXCHG) || 4407 + IS_ENABLED(CONFIG_GENERIC_ATOMIC64)) && 4403 4408 (unlikely(in_nmi()))) { 4404 4409 return NULL; 4405 4410 }

+48 -30

lib/atomic64.c

··· 25 25 * Ensure each lock is in a separate cacheline. 26 26 */ 27 27 static union { 28 - raw_spinlock_t lock; 28 + arch_spinlock_t lock; 29 29 char pad[L1_CACHE_BYTES]; 30 30 } atomic64_lock[NR_LOCKS] __cacheline_aligned_in_smp = { 31 31 [0 ... (NR_LOCKS - 1)] = { 32 - .lock = __RAW_SPIN_LOCK_UNLOCKED(atomic64_lock.lock), 32 + .lock = __ARCH_SPIN_LOCK_UNLOCKED, 33 33 }, 34 34 }; 35 35 36 - static inline raw_spinlock_t *lock_addr(const atomic64_t *v) 36 + static inline arch_spinlock_t *lock_addr(const atomic64_t *v) 37 37 { 38 38 unsigned long addr = (unsigned long) v; 39 39 ··· 45 45 s64 generic_atomic64_read(const atomic64_t *v) 46 46 { 47 47 unsigned long flags; 48 - raw_spinlock_t *lock = lock_addr(v); 48 + arch_spinlock_t *lock = lock_addr(v); 49 49 s64 val; 50 50 51 - raw_spin_lock_irqsave(lock, flags); 51 + local_irq_save(flags); 52 + arch_spin_lock(lock); 52 53 val = v->counter; 53 - raw_spin_unlock_irqrestore(lock, flags); 54 + arch_spin_unlock(lock); 55 + local_irq_restore(flags); 54 56 return val; 55 57 } 56 58 EXPORT_SYMBOL(generic_atomic64_read); ··· 60 58 void generic_atomic64_set(atomic64_t *v, s64 i) 61 59 { 62 60 unsigned long flags; 63 - raw_spinlock_t *lock = lock_addr(v); 61 + arch_spinlock_t *lock = lock_addr(v); 64 62 65 - raw_spin_lock_irqsave(lock, flags); 63 + local_irq_save(flags); 64 + arch_spin_lock(lock); 66 65 v->counter = i; 67 - raw_spin_unlock_irqrestore(lock, flags); 66 + arch_spin_unlock(lock); 67 + local_irq_restore(flags); 68 68 } 69 69 EXPORT_SYMBOL(generic_atomic64_set); 70 70 ··· 74 70 void generic_atomic64_##op(s64 a, atomic64_t *v) \ 75 71 { \ 76 72 unsigned long flags; \ 77 - raw_spinlock_t *lock = lock_addr(v); \ 73 + arch_spinlock_t *lock = lock_addr(v); \ 78 74 \ 79 - raw_spin_lock_irqsave(lock, flags); \ 75 + local_irq_save(flags); \ 76 + arch_spin_lock(lock); \ 80 77 v->counter c_op a; \ 81 - raw_spin_unlock_irqrestore(lock, flags); \ 78 + arch_spin_unlock(lock); \ 79 + local_irq_restore(flags); \ 82 80 } \ 83 81 EXPORT_SYMBOL(generic_atomic64_##op); 84 82 ··· 88 82 s64 generic_atomic64_##op##_return(s64 a, atomic64_t *v) \ 89 83 { \ 90 84 unsigned long flags; \ 91 - raw_spinlock_t *lock = lock_addr(v); \ 85 + arch_spinlock_t *lock = lock_addr(v); \ 92 86 s64 val; \ 93 87 \ 94 - raw_spin_lock_irqsave(lock, flags); \ 88 + local_irq_save(flags); \ 89 + arch_spin_lock(lock); \ 95 90 val = (v->counter c_op a); \ 96 - raw_spin_unlock_irqrestore(lock, flags); \ 91 + arch_spin_unlock(lock); \ 92 + local_irq_restore(flags); \ 97 93 return val; \ 98 94 } \ 99 95 EXPORT_SYMBOL(generic_atomic64_##op##_return); ··· 104 96 s64 generic_atomic64_fetch_##op(s64 a, atomic64_t *v) \ 105 97 { \ 106 98 unsigned long flags; \ 107 - raw_spinlock_t *lock = lock_addr(v); \ 99 + arch_spinlock_t *lock = lock_addr(v); \ 108 100 s64 val; \ 109 101 \ 110 - raw_spin_lock_irqsave(lock, flags); \ 102 + local_irq_save(flags); \ 103 + arch_spin_lock(lock); \ 111 104 val = v->counter; \ 112 105 v->counter c_op a; \ 113 - raw_spin_unlock_irqrestore(lock, flags); \ 106 + arch_spin_unlock(lock); \ 107 + local_irq_restore(flags); \ 114 108 return val; \ 115 109 } \ 116 110 EXPORT_SYMBOL(generic_atomic64_fetch_##op); ··· 141 131 s64 generic_atomic64_dec_if_positive(atomic64_t *v) 142 132 { 143 133 unsigned long flags; 144 - raw_spinlock_t *lock = lock_addr(v); 134 + arch_spinlock_t *lock = lock_addr(v); 145 135 s64 val; 146 136 147 - raw_spin_lock_irqsave(lock, flags); 137 + local_irq_save(flags); 138 + arch_spin_lock(lock); 148 139 val = v->counter - 1; 149 140 if (val >= 0) 150 141 v->counter = val; 151 - raw_spin_unlock_irqrestore(lock, flags); 142 + arch_spin_unlock(lock); 143 + local_irq_restore(flags); 152 144 return val; 153 145 } 154 146 EXPORT_SYMBOL(generic_atomic64_dec_if_positive); ··· 158 146 s64 generic_atomic64_cmpxchg(atomic64_t *v, s64 o, s64 n) 159 147 { 160 148 unsigned long flags; 161 - raw_spinlock_t *lock = lock_addr(v); 149 + arch_spinlock_t *lock = lock_addr(v); 162 150 s64 val; 163 151 164 - raw_spin_lock_irqsave(lock, flags); 152 + local_irq_save(flags); 153 + arch_spin_lock(lock); 165 154 val = v->counter; 166 155 if (val == o) 167 156 v->counter = n; 168 - raw_spin_unlock_irqrestore(lock, flags); 157 + arch_spin_unlock(lock); 158 + local_irq_restore(flags); 169 159 return val; 170 160 } 171 161 EXPORT_SYMBOL(generic_atomic64_cmpxchg); ··· 175 161 s64 generic_atomic64_xchg(atomic64_t *v, s64 new) 176 162 { 177 163 unsigned long flags; 178 - raw_spinlock_t *lock = lock_addr(v); 164 + arch_spinlock_t *lock = lock_addr(v); 179 165 s64 val; 180 166 181 - raw_spin_lock_irqsave(lock, flags); 167 + local_irq_save(flags); 168 + arch_spin_lock(lock); 182 169 val = v->counter; 183 170 v->counter = new; 184 - raw_spin_unlock_irqrestore(lock, flags); 171 + arch_spin_unlock(lock); 172 + local_irq_restore(flags); 185 173 return val; 186 174 } 187 175 EXPORT_SYMBOL(generic_atomic64_xchg); ··· 191 175 s64 generic_atomic64_fetch_add_unless(atomic64_t *v, s64 a, s64 u) 192 176 { 193 177 unsigned long flags; 194 - raw_spinlock_t *lock = lock_addr(v); 178 + arch_spinlock_t *lock = lock_addr(v); 195 179 s64 val; 196 180 197 - raw_spin_lock_irqsave(lock, flags); 181 + local_irq_save(flags); 182 + arch_spin_lock(lock); 198 183 val = v->counter; 199 184 if (val != u) 200 185 v->counter += a; 201 - raw_spin_unlock_irqrestore(lock, flags); 186 + arch_spin_unlock(lock); 187 + local_irq_restore(flags); 202 188 203 189 return val; 204 190 }

Configure Feed

Configure Feed