Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux
1
fork

Configure Feed

Select the types of activity you want to include in your feed.

unwind_user/deferred: Make unwind deferral requests NMI-safe

Make unwind_deferred_request() NMI-safe so tracers in NMI context can
call it and safely request a user space stacktrace when the task exits.

Note, this is only allowed for architectures that implement a safe
cmpxchg. If an architecture requests a deferred stack trace from NMI
context that does not support a safe NMI cmpxchg, it will get an -EINVAL
and trigger a warning. For those architectures, they would need another
method (perhaps an irqwork), to request a deferred user space stack trace.
That can be dealt with later if one of theses architectures require this
feature.

Cc: Masami Hiramatsu <mhiramat@kernel.org>
Cc: Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
Cc: Josh Poimboeuf <jpoimboe@kernel.org>
Cc: Ingo Molnar <mingo@kernel.org>
Cc: Jiri Olsa <jolsa@kernel.org>
Cc: Arnaldo Carvalho de Melo <acme@kernel.org>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Andrii Nakryiko <andrii@kernel.org>
Cc: Indu Bhagat <indu.bhagat@oracle.com>
Cc: "Jose E. Marchesi" <jemarch@gnu.org>
Cc: Beau Belgrave <beaub@linux.microsoft.com>
Cc: Jens Remus <jremus@linux.ibm.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Jens Axboe <axboe@kernel.dk>
Cc: Florian Weimer <fweimer@redhat.com>
Cc: Sam James <sam@gentoo.org>
Link: https://lore.kernel.org/20250729182405.657072238@kernel.org
Suggested-by: Peter Zijlstra <peterz@infradead.org>
Signed-off-by: Steven Rostedt (Google) <rostedt@goodmis.org>

+44 -8
+44 -8
kernel/unwind/deferred.c
··· 12 12 #include <linux/slab.h> 13 13 #include <linux/mm.h> 14 14 15 + /* 16 + * For requesting a deferred user space stack trace from NMI context 17 + * the architecture must support a safe cmpxchg in NMI context. 18 + * For those architectures that do not have that, then it cannot ask 19 + * for a deferred user space stack trace from an NMI context. If it 20 + * does, then it will get -EINVAL. 21 + */ 22 + #if defined(CONFIG_ARCH_HAVE_NMI_SAFE_CMPXCHG) 23 + # define CAN_USE_IN_NMI 1 24 + static inline bool try_assign_cnt(struct unwind_task_info *info, u32 cnt) 25 + { 26 + u32 old = 0; 27 + 28 + return try_cmpxchg(&info->id.cnt, &old, cnt); 29 + } 30 + #else 31 + # define CAN_USE_IN_NMI 0 32 + /* When NMIs are not allowed, this always succeeds */ 33 + static inline bool try_assign_cnt(struct unwind_task_info *info, u32 cnt) 34 + { 35 + info->id.cnt = cnt; 36 + return true; 37 + } 38 + #endif 39 + 15 40 /* Make the cache fit in a 4K page */ 16 41 #define UNWIND_MAX_ENTRIES \ 17 42 ((SZ_4K - sizeof(struct unwind_cache)) / sizeof(long)) ··· 67 42 static u64 get_cookie(struct unwind_task_info *info) 68 43 { 69 44 u32 cnt = 1; 70 - u32 old = 0; 71 45 72 46 if (info->id.cpu) 73 47 return info->id.id; 74 48 75 49 /* LSB is always set to ensure 0 is an invalid value */ 76 50 cnt |= __this_cpu_read(unwind_ctx_ctr) + 2; 77 - if (try_cmpxchg(&info->id.cnt, &old, cnt)) { 51 + if (try_assign_cnt(info, cnt)) { 78 52 /* Update the per cpu counter */ 79 53 __this_cpu_write(unwind_ctx_ctr, cnt); 80 54 } ··· 191 167 int unwind_deferred_request(struct unwind_work *work, u64 *cookie) 192 168 { 193 169 struct unwind_task_info *info = &current->unwind_info; 170 + long pending; 194 171 int ret; 195 172 196 173 *cookie = 0; 197 174 198 - if (WARN_ON_ONCE(in_nmi())) 199 - return -EINVAL; 200 - 201 175 if ((current->flags & (PF_KTHREAD | PF_EXITING)) || 202 176 !user_mode(task_pt_regs(current))) 177 + return -EINVAL; 178 + 179 + /* 180 + * NMI requires having safe cmpxchg operations. 181 + * Trigger a warning to make it obvious that an architecture 182 + * is using this in NMI when it should not be. 183 + */ 184 + if (WARN_ON_ONCE(!CAN_USE_IN_NMI && in_nmi())) 203 185 return -EINVAL; 204 186 205 187 guard(irqsave)(); ··· 213 183 *cookie = get_cookie(info); 214 184 215 185 /* callback already pending? */ 216 - if (info->pending) 186 + pending = READ_ONCE(info->pending); 187 + if (pending) 188 + return 1; 189 + 190 + /* Claim the work unless an NMI just now swooped in to do so. */ 191 + if (!try_cmpxchg(&info->pending, &pending, 1)) 217 192 return 1; 218 193 219 194 /* The work has been claimed, now schedule it. */ 220 195 ret = task_work_add(current, &info->work, TWA_RESUME); 221 - if (WARN_ON_ONCE(ret)) 196 + if (WARN_ON_ONCE(ret)) { 197 + WRITE_ONCE(info->pending, 0); 222 198 return ret; 199 + } 223 200 224 - info->pending = 1; 225 201 return 0; 226 202 } 227 203