Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux
1
fork

Configure Feed

Select the types of activity you want to include in your feed.

Merge tag 'trace-deferred-unwind-v6.17' of git://git.kernel.org/pub/scm/linux/kernel/git/trace/linux-trace

Pull initial deferred unwind infrastructure from Steven Rostedt:
"This is the core infrastructure for the deferred unwinder that is
required for sframes[1]. Several other patch series are based on this
work although those patch series are not dependent on each other. In
order to simplify the development, having this core series upstream
will allow the other series to be worked on in parallel. The other
series are:

- The two patches to implement x86 support [2] [3]

- The s390 work [4]

- The perf work [5]

- The ftrace work [6]

- The sframe work [7]

And more is on the way.

The core infrastructure adds the following in kernel APIs:

- int unwind_user_faultable(struct unwind_stacktrace *trace);

Performs a user space stack trace that may fault user pages in.

- int unwind_deferred_init(struct unwind_work *work, unwind_callback_t func);

Allows a tracer to register with the unwind deferred
infrastructure.

- int unwind_deferred_request(struct unwind_work *work, u64 *cookie);

Used when a tracer request a deferred trace. Can be called from
interrupt or NMI context.

- void unwind_deferred_cancel(struct unwind_work *work);

Called by a tracer to unregister from the deferred unwind
infrastructure.

- void unwind_deferred_task_exit(struct task_struct *task);

Called by task exit code to flush any pending unwind requests.

- void unwind_task_init(struct task_struct *task);

Called by do_fork() to initialize the task struct for the
deferred unwinder.

- void unwind_task_free(struct task_struct *task);

Called by do_exit() to free up any resources used by the
deferred unwinder.

None of the above is actually compiled unless an architecture enables it,
which none currently do"

Link: https://sourceware.org/binutils/wiki/sframe [1]
Link: https://lore.kernel.org/linux-trace-kernel/20250717004958.260781923@kernel.org/ [2]
Link: https://lore.kernel.org/linux-trace-kernel/20250717004958.432327787@kernel.org/ [3]
Link: https://lore.kernel.org/linux-trace-kernel/20250710163522.3195293-1-jremus@linux.ibm.com/ [4]
Link: https://lore.kernel.org/linux-trace-kernel/20250718164119.089692174@kernel.org/ [5]
Link: https://lore.kernel.org/linux-trace-kernel/20250424192612.505622711@goodmis.org/ [6]
Link: https://lore.kernel.org/linux-trace-kernel/20250717012848.927473176@kernel.org/ [7]

* tag 'trace-deferred-unwind-v6.17' of git://git.kernel.org/pub/scm/linux/kernel/git/trace/linux-trace:
unwind: Finish up unwind when a task exits
unwind deferred: Use SRCU unwind_deferred_task_work()
unwind: Add USED bit to only have one conditional on way back to user space
unwind deferred: Add unwind_completed mask to stop spurious callbacks
unwind deferred: Use bitmask to determine which callbacks to call
unwind_user/deferred: Make unwind deferral requests NMI-safe
unwind_user/deferred: Add deferred unwinding interface
unwind_user/deferred: Add unwind cache
unwind_user/deferred: Add unwind_user_faultable()
unwind_user: Add user space unwinding API with frame pointer support

+703
+7
MAINTAINERS
··· 26253 26253 F: drivers/uio/ 26254 26254 F: include/linux/uio_driver.h 26255 26255 26256 + USERSPACE STACK UNWINDING 26257 + M: Josh Poimboeuf <jpoimboe@kernel.org> 26258 + M: Steven Rostedt <rostedt@goodmis.org> 26259 + S: Maintained 26260 + F: include/linux/unwind*.h 26261 + F: kernel/unwind/ 26262 + 26256 26263 UTIL-LINUX PACKAGE 26257 26264 M: Karel Zak <kzak@redhat.com> 26258 26265 L: util-linux@vger.kernel.org
+7
arch/Kconfig
··· 444 444 It uses the same command line parameters, and sysctl interface, 445 445 as the generic hardlockup detectors. 446 446 447 + config UNWIND_USER 448 + bool 449 + 450 + config HAVE_UNWIND_USER_FP 451 + bool 452 + select UNWIND_USER 453 + 447 454 config HAVE_PERF_REGS 448 455 bool 449 456 help
+1
include/asm-generic/Kbuild
··· 59 59 mandatory-y += topology.h 60 60 mandatory-y += trace_clock.h 61 61 mandatory-y += uaccess.h 62 + mandatory-y += unwind_user.h 62 63 mandatory-y += vermagic.h 63 64 mandatory-y += vga.h 64 65 mandatory-y += video.h
+5
include/asm-generic/unwind_user.h
··· 1 + /* SPDX-License-Identifier: GPL-2.0 */ 2 + #ifndef _ASM_GENERIC_UNWIND_USER_H 3 + #define _ASM_GENERIC_UNWIND_USER_H 4 + 5 + #endif /* _ASM_GENERIC_UNWIND_USER_H */
+2
include/linux/irq-entry-common.h
··· 7 7 #include <linux/context_tracking.h> 8 8 #include <linux/tick.h> 9 9 #include <linux/kmsan.h> 10 + #include <linux/unwind_deferred.h> 10 11 11 12 #include <asm/entry-common.h> 12 13 ··· 257 256 lockdep_hardirqs_on_prepare(); 258 257 instrumentation_end(); 259 258 259 + unwind_reset_info(); 260 260 user_enter_irqoff(); 261 261 arch_exit_to_user_mode(); 262 262 lockdep_hardirqs_on(CALLER_ADDR0);
+5
include/linux/sched.h
··· 47 47 #include <linux/rv.h> 48 48 #include <linux/uidgid_types.h> 49 49 #include <linux/tracepoint-defs.h> 50 + #include <linux/unwind_deferred_types.h> 50 51 #include <asm/kmap_size.h> 51 52 52 53 /* task_struct member predeclarations (sorted alphabetically): */ ··· 1645 1644 1646 1645 #ifdef CONFIG_USER_EVENTS 1647 1646 struct user_event_mm *user_event_mm; 1647 + #endif 1648 + 1649 + #ifdef CONFIG_UNWIND_USER 1650 + struct unwind_task_info unwind_info; 1648 1651 #endif 1649 1652 1650 1653 /* CPU-specific state of this task: */
+81
include/linux/unwind_deferred.h
··· 1 + /* SPDX-License-Identifier: GPL-2.0 */ 2 + #ifndef _LINUX_UNWIND_USER_DEFERRED_H 3 + #define _LINUX_UNWIND_USER_DEFERRED_H 4 + 5 + #include <linux/task_work.h> 6 + #include <linux/unwind_user.h> 7 + #include <linux/unwind_deferred_types.h> 8 + 9 + struct unwind_work; 10 + 11 + typedef void (*unwind_callback_t)(struct unwind_work *work, struct unwind_stacktrace *trace, u64 cookie); 12 + 13 + struct unwind_work { 14 + struct list_head list; 15 + unwind_callback_t func; 16 + int bit; 17 + }; 18 + 19 + #ifdef CONFIG_UNWIND_USER 20 + 21 + enum { 22 + UNWIND_PENDING_BIT = 0, 23 + UNWIND_USED_BIT, 24 + }; 25 + 26 + enum { 27 + UNWIND_PENDING = BIT(UNWIND_PENDING_BIT), 28 + 29 + /* Set if the unwinding was used (directly or deferred) */ 30 + UNWIND_USED = BIT(UNWIND_USED_BIT) 31 + }; 32 + 33 + void unwind_task_init(struct task_struct *task); 34 + void unwind_task_free(struct task_struct *task); 35 + 36 + int unwind_user_faultable(struct unwind_stacktrace *trace); 37 + 38 + int unwind_deferred_init(struct unwind_work *work, unwind_callback_t func); 39 + int unwind_deferred_request(struct unwind_work *work, u64 *cookie); 40 + void unwind_deferred_cancel(struct unwind_work *work); 41 + 42 + void unwind_deferred_task_exit(struct task_struct *task); 43 + 44 + static __always_inline void unwind_reset_info(void) 45 + { 46 + struct unwind_task_info *info = &current->unwind_info; 47 + unsigned long bits; 48 + 49 + /* Was there any unwinding? */ 50 + if (unlikely(info->unwind_mask)) { 51 + bits = info->unwind_mask; 52 + do { 53 + /* Is a task_work going to run again before going back */ 54 + if (bits & UNWIND_PENDING) 55 + return; 56 + } while (!try_cmpxchg(&info->unwind_mask, &bits, 0UL)); 57 + current->unwind_info.id.id = 0; 58 + 59 + if (unlikely(info->cache)) { 60 + info->cache->nr_entries = 0; 61 + info->cache->unwind_completed = 0; 62 + } 63 + } 64 + } 65 + 66 + #else /* !CONFIG_UNWIND_USER */ 67 + 68 + static inline void unwind_task_init(struct task_struct *task) {} 69 + static inline void unwind_task_free(struct task_struct *task) {} 70 + 71 + static inline int unwind_user_faultable(struct unwind_stacktrace *trace) { return -ENOSYS; } 72 + static inline int unwind_deferred_init(struct unwind_work *work, unwind_callback_t func) { return -ENOSYS; } 73 + static inline int unwind_deferred_request(struct unwind_work *work, u64 *timestamp) { return -ENOSYS; } 74 + static inline void unwind_deferred_cancel(struct unwind_work *work) {} 75 + 76 + static inline void unwind_deferred_task_exit(struct task_struct *task) {} 77 + static inline void unwind_reset_info(void) {} 78 + 79 + #endif /* !CONFIG_UNWIND_USER */ 80 + 81 + #endif /* _LINUX_UNWIND_USER_DEFERRED_H */
+39
include/linux/unwind_deferred_types.h
··· 1 + /* SPDX-License-Identifier: GPL-2.0 */ 2 + #ifndef _LINUX_UNWIND_USER_DEFERRED_TYPES_H 3 + #define _LINUX_UNWIND_USER_DEFERRED_TYPES_H 4 + 5 + struct unwind_cache { 6 + unsigned long unwind_completed; 7 + unsigned int nr_entries; 8 + unsigned long entries[]; 9 + }; 10 + 11 + /* 12 + * The unwind_task_id is a unique identifier that maps to a user space 13 + * stacktrace. It is generated the first time a deferred user space 14 + * stacktrace is requested after a task has entered the kerenl and 15 + * is cleared to zero when it exits. The mapped id will be a non-zero 16 + * number. 17 + * 18 + * To simplify the generation of the 64 bit number, 32 bits will be 19 + * the CPU it was generated on, and the other 32 bits will be a per 20 + * cpu counter that gets incremented by two every time a new identifier 21 + * is generated. The LSB will always be set to keep the value 22 + * from being zero. 23 + */ 24 + union unwind_task_id { 25 + struct { 26 + u32 cpu; 27 + u32 cnt; 28 + }; 29 + u64 id; 30 + }; 31 + 32 + struct unwind_task_info { 33 + unsigned long unwind_mask; 34 + struct unwind_cache *cache; 35 + struct callback_head work; 36 + union unwind_task_id id; 37 + }; 38 + 39 + #endif /* _LINUX_UNWIND_USER_DEFERRED_TYPES_H */
+14
include/linux/unwind_user.h
··· 1 + /* SPDX-License-Identifier: GPL-2.0 */ 2 + #ifndef _LINUX_UNWIND_USER_H 3 + #define _LINUX_UNWIND_USER_H 4 + 5 + #include <linux/unwind_user_types.h> 6 + #include <asm/unwind_user.h> 7 + 8 + #ifndef ARCH_INIT_USER_FP_FRAME 9 + #define ARCH_INIT_USER_FP_FRAME 10 + #endif 11 + 12 + int unwind_user(struct unwind_stacktrace *trace, unsigned int max_entries); 13 + 14 + #endif /* _LINUX_UNWIND_USER_H */
+44
include/linux/unwind_user_types.h
··· 1 + /* SPDX-License-Identifier: GPL-2.0 */ 2 + #ifndef _LINUX_UNWIND_USER_TYPES_H 3 + #define _LINUX_UNWIND_USER_TYPES_H 4 + 5 + #include <linux/types.h> 6 + 7 + /* 8 + * Unwind types, listed in priority order: lower numbers are attempted first if 9 + * available. 10 + */ 11 + enum unwind_user_type_bits { 12 + UNWIND_USER_TYPE_FP_BIT = 0, 13 + 14 + NR_UNWIND_USER_TYPE_BITS, 15 + }; 16 + 17 + enum unwind_user_type { 18 + /* Type "none" for the start of stack walk iteration. */ 19 + UNWIND_USER_TYPE_NONE = 0, 20 + UNWIND_USER_TYPE_FP = BIT(UNWIND_USER_TYPE_FP_BIT), 21 + }; 22 + 23 + struct unwind_stacktrace { 24 + unsigned int nr; 25 + unsigned long *entries; 26 + }; 27 + 28 + struct unwind_user_frame { 29 + s32 cfa_off; 30 + s32 ra_off; 31 + s32 fp_off; 32 + bool use_fp; 33 + }; 34 + 35 + struct unwind_user_state { 36 + unsigned long ip; 37 + unsigned long sp; 38 + unsigned long fp; 39 + enum unwind_user_type current_type; 40 + unsigned int available_types; 41 + bool done; 42 + }; 43 + 44 + #endif /* _LINUX_UNWIND_USER_TYPES_H */
+1
kernel/Makefile
··· 54 54 obj-y += livepatch/ 55 55 obj-y += dma/ 56 56 obj-y += entry/ 57 + obj-y += unwind/ 57 58 obj-$(CONFIG_MODULES) += module/ 58 59 59 60 obj-$(CONFIG_KCMP) += kcmp.o
+2
kernel/exit.c
··· 68 68 #include <linux/rethook.h> 69 69 #include <linux/sysfs.h> 70 70 #include <linux/user_events.h> 71 + #include <linux/unwind_deferred.h> 71 72 #include <linux/uaccess.h> 72 73 #include <linux/pidfs.h> 73 74 ··· 939 938 940 939 tsk->exit_code = code; 941 940 taskstats_exit(tsk, group_dead); 941 + unwind_deferred_task_exit(tsk); 942 942 trace_sched_process_exit(tsk, group_dead); 943 943 944 944 /*
+4
kernel/fork.c
··· 105 105 #include <uapi/linux/pidfd.h> 106 106 #include <linux/pidfs.h> 107 107 #include <linux/tick.h> 108 + #include <linux/unwind_deferred.h> 108 109 109 110 #include <asm/pgalloc.h> 110 111 #include <linux/uaccess.h> ··· 733 732 WARN_ON(refcount_read(&tsk->usage)); 734 733 WARN_ON(tsk == current); 735 734 735 + unwind_task_free(tsk); 736 736 sched_ext_free(tsk); 737 737 io_uring_free(tsk); 738 738 cgroup_free(tsk); ··· 2136 2134 RCU_INIT_POINTER(p->bpf_storage, NULL); 2137 2135 p->bpf_ctx = NULL; 2138 2136 #endif 2137 + 2138 + unwind_task_init(p); 2139 2139 2140 2140 /* Perform scheduler related setup. Assign this task to a CPU. */ 2141 2141 retval = sched_fork(clone_flags, p);
+1
kernel/unwind/Makefile
··· 1 + obj-$(CONFIG_UNWIND_USER) += user.o deferred.o
+362
kernel/unwind/deferred.c
··· 1 + // SPDX-License-Identifier: GPL-2.0 2 + /* 3 + * Deferred user space unwinding 4 + */ 5 + #include <linux/sched/task_stack.h> 6 + #include <linux/unwind_deferred.h> 7 + #include <linux/sched/clock.h> 8 + #include <linux/task_work.h> 9 + #include <linux/kernel.h> 10 + #include <linux/sched.h> 11 + #include <linux/sizes.h> 12 + #include <linux/slab.h> 13 + #include <linux/mm.h> 14 + 15 + /* 16 + * For requesting a deferred user space stack trace from NMI context 17 + * the architecture must support a safe cmpxchg in NMI context. 18 + * For those architectures that do not have that, then it cannot ask 19 + * for a deferred user space stack trace from an NMI context. If it 20 + * does, then it will get -EINVAL. 21 + */ 22 + #if defined(CONFIG_ARCH_HAVE_NMI_SAFE_CMPXCHG) 23 + # define CAN_USE_IN_NMI 1 24 + static inline bool try_assign_cnt(struct unwind_task_info *info, u32 cnt) 25 + { 26 + u32 old = 0; 27 + 28 + return try_cmpxchg(&info->id.cnt, &old, cnt); 29 + } 30 + #else 31 + # define CAN_USE_IN_NMI 0 32 + /* When NMIs are not allowed, this always succeeds */ 33 + static inline bool try_assign_cnt(struct unwind_task_info *info, u32 cnt) 34 + { 35 + info->id.cnt = cnt; 36 + return true; 37 + } 38 + #endif 39 + 40 + /* Make the cache fit in a 4K page */ 41 + #define UNWIND_MAX_ENTRIES \ 42 + ((SZ_4K - sizeof(struct unwind_cache)) / sizeof(long)) 43 + 44 + /* Guards adding to or removing from the list of callbacks */ 45 + static DEFINE_MUTEX(callback_mutex); 46 + static LIST_HEAD(callbacks); 47 + 48 + #define RESERVED_BITS (UNWIND_PENDING | UNWIND_USED) 49 + 50 + /* Zero'd bits are available for assigning callback users */ 51 + static unsigned long unwind_mask = RESERVED_BITS; 52 + DEFINE_STATIC_SRCU(unwind_srcu); 53 + 54 + static inline bool unwind_pending(struct unwind_task_info *info) 55 + { 56 + return test_bit(UNWIND_PENDING_BIT, &info->unwind_mask); 57 + } 58 + 59 + /* 60 + * This is a unique percpu identifier for a given task entry context. 61 + * Conceptually, it's incremented every time the CPU enters the kernel from 62 + * user space, so that each "entry context" on the CPU gets a unique ID. In 63 + * reality, as an optimization, it's only incremented on demand for the first 64 + * deferred unwind request after a given entry-from-user. 65 + * 66 + * It's combined with the CPU id to make a systemwide-unique "context cookie". 67 + */ 68 + static DEFINE_PER_CPU(u32, unwind_ctx_ctr); 69 + 70 + /* 71 + * The context cookie is a unique identifier that is assigned to a user 72 + * space stacktrace. As the user space stacktrace remains the same while 73 + * the task is in the kernel, the cookie is an identifier for the stacktrace. 74 + * Although it is possible for the stacktrace to get another cookie if another 75 + * request is made after the cookie was cleared and before reentering user 76 + * space. 77 + */ 78 + static u64 get_cookie(struct unwind_task_info *info) 79 + { 80 + u32 cnt = 1; 81 + 82 + if (info->id.cpu) 83 + return info->id.id; 84 + 85 + /* LSB is always set to ensure 0 is an invalid value */ 86 + cnt |= __this_cpu_read(unwind_ctx_ctr) + 2; 87 + if (try_assign_cnt(info, cnt)) { 88 + /* Update the per cpu counter */ 89 + __this_cpu_write(unwind_ctx_ctr, cnt); 90 + } 91 + /* Interrupts are disabled, the CPU will always be same */ 92 + info->id.cpu = smp_processor_id() + 1; /* Must be non zero */ 93 + 94 + return info->id.id; 95 + } 96 + 97 + /** 98 + * unwind_user_faultable - Produce a user stacktrace in faultable context 99 + * @trace: The descriptor that will store the user stacktrace 100 + * 101 + * This must be called in a known faultable context (usually when entering 102 + * or exiting user space). Depending on the available implementations 103 + * the @trace will be loaded with the addresses of the user space stacktrace 104 + * if it can be found. 105 + * 106 + * Return: 0 on success and negative on error 107 + * On success @trace will contain the user space stacktrace 108 + */ 109 + int unwind_user_faultable(struct unwind_stacktrace *trace) 110 + { 111 + struct unwind_task_info *info = &current->unwind_info; 112 + struct unwind_cache *cache; 113 + 114 + /* Should always be called from faultable context */ 115 + might_fault(); 116 + 117 + if (!current->mm) 118 + return -EINVAL; 119 + 120 + if (!info->cache) { 121 + info->cache = kzalloc(struct_size(cache, entries, UNWIND_MAX_ENTRIES), 122 + GFP_KERNEL); 123 + if (!info->cache) 124 + return -ENOMEM; 125 + } 126 + 127 + cache = info->cache; 128 + trace->entries = cache->entries; 129 + 130 + if (cache->nr_entries) { 131 + /* 132 + * The user stack has already been previously unwound in this 133 + * entry context. Skip the unwind and use the cache. 134 + */ 135 + trace->nr = cache->nr_entries; 136 + return 0; 137 + } 138 + 139 + trace->nr = 0; 140 + unwind_user(trace, UNWIND_MAX_ENTRIES); 141 + 142 + cache->nr_entries = trace->nr; 143 + 144 + /* Clear nr_entries on way back to user space */ 145 + set_bit(UNWIND_USED_BIT, &info->unwind_mask); 146 + 147 + return 0; 148 + } 149 + 150 + static void process_unwind_deferred(struct task_struct *task) 151 + { 152 + struct unwind_task_info *info = &task->unwind_info; 153 + struct unwind_stacktrace trace; 154 + struct unwind_work *work; 155 + unsigned long bits; 156 + u64 cookie; 157 + 158 + if (WARN_ON_ONCE(!unwind_pending(info))) 159 + return; 160 + 161 + /* Clear pending bit but make sure to have the current bits */ 162 + bits = atomic_long_fetch_andnot(UNWIND_PENDING, 163 + (atomic_long_t *)&info->unwind_mask); 164 + /* 165 + * From here on out, the callback must always be called, even if it's 166 + * just an empty trace. 167 + */ 168 + trace.nr = 0; 169 + trace.entries = NULL; 170 + 171 + unwind_user_faultable(&trace); 172 + 173 + if (info->cache) 174 + bits &= ~(info->cache->unwind_completed); 175 + 176 + cookie = info->id.id; 177 + 178 + guard(srcu)(&unwind_srcu); 179 + list_for_each_entry_srcu(work, &callbacks, list, 180 + srcu_read_lock_held(&unwind_srcu)) { 181 + if (test_bit(work->bit, &bits)) { 182 + work->func(work, &trace, cookie); 183 + if (info->cache) 184 + info->cache->unwind_completed |= BIT(work->bit); 185 + } 186 + } 187 + } 188 + 189 + static void unwind_deferred_task_work(struct callback_head *head) 190 + { 191 + process_unwind_deferred(current); 192 + } 193 + 194 + void unwind_deferred_task_exit(struct task_struct *task) 195 + { 196 + struct unwind_task_info *info = &current->unwind_info; 197 + 198 + if (!unwind_pending(info)) 199 + return; 200 + 201 + process_unwind_deferred(task); 202 + 203 + task_work_cancel(task, &info->work); 204 + } 205 + 206 + /** 207 + * unwind_deferred_request - Request a user stacktrace on task kernel exit 208 + * @work: Unwind descriptor requesting the trace 209 + * @cookie: The cookie of the first request made for this task 210 + * 211 + * Schedule a user space unwind to be done in task work before exiting the 212 + * kernel. 213 + * 214 + * The returned @cookie output is the generated cookie of the very first 215 + * request for a user space stacktrace for this task since it entered the 216 + * kernel. It can be from a request by any caller of this infrastructure. 217 + * Its value will also be passed to the callback function. It can be 218 + * used to stitch kernel and user stack traces together in post-processing. 219 + * 220 + * It's valid to call this function multiple times for the same @work within 221 + * the same task entry context. Each call will return the same cookie 222 + * while the task hasn't left the kernel. If the callback is not pending 223 + * because it has already been previously called for the same entry context, 224 + * it will be called again with the same stack trace and cookie. 225 + * 226 + * Return: 0 if the callback successfully was queued. 227 + * 1 if the callback is pending or was already executed. 228 + * Negative if there's an error. 229 + * @cookie holds the cookie of the first request by any user 230 + */ 231 + int unwind_deferred_request(struct unwind_work *work, u64 *cookie) 232 + { 233 + struct unwind_task_info *info = &current->unwind_info; 234 + unsigned long old, bits; 235 + unsigned long bit; 236 + int ret; 237 + 238 + *cookie = 0; 239 + 240 + if ((current->flags & (PF_KTHREAD | PF_EXITING)) || 241 + !user_mode(task_pt_regs(current))) 242 + return -EINVAL; 243 + 244 + /* 245 + * NMI requires having safe cmpxchg operations. 246 + * Trigger a warning to make it obvious that an architecture 247 + * is using this in NMI when it should not be. 248 + */ 249 + if (WARN_ON_ONCE(!CAN_USE_IN_NMI && in_nmi())) 250 + return -EINVAL; 251 + 252 + /* Do not allow cancelled works to request again */ 253 + bit = READ_ONCE(work->bit); 254 + if (WARN_ON_ONCE(bit < 0)) 255 + return -EINVAL; 256 + 257 + /* Only need the mask now */ 258 + bit = BIT(bit); 259 + 260 + guard(irqsave)(); 261 + 262 + *cookie = get_cookie(info); 263 + 264 + old = READ_ONCE(info->unwind_mask); 265 + 266 + /* Is this already queued or executed */ 267 + if (old & bit) 268 + return 1; 269 + 270 + /* 271 + * This work's bit hasn't been set yet. Now set it with the PENDING 272 + * bit and fetch the current value of unwind_mask. If ether the 273 + * work's bit or PENDING was already set, then this is already queued 274 + * to have a callback. 275 + */ 276 + bits = UNWIND_PENDING | bit; 277 + old = atomic_long_fetch_or(bits, (atomic_long_t *)&info->unwind_mask); 278 + if (old & bits) { 279 + /* 280 + * If the work's bit was set, whatever set it had better 281 + * have also set pending and queued a callback. 282 + */ 283 + WARN_ON_ONCE(!(old & UNWIND_PENDING)); 284 + return old & bit; 285 + } 286 + 287 + /* The work has been claimed, now schedule it. */ 288 + ret = task_work_add(current, &info->work, TWA_RESUME); 289 + 290 + if (WARN_ON_ONCE(ret)) 291 + WRITE_ONCE(info->unwind_mask, 0); 292 + 293 + return ret; 294 + } 295 + 296 + void unwind_deferred_cancel(struct unwind_work *work) 297 + { 298 + struct task_struct *g, *t; 299 + int bit; 300 + 301 + if (!work) 302 + return; 303 + 304 + bit = work->bit; 305 + 306 + /* No work should be using a reserved bit */ 307 + if (WARN_ON_ONCE(BIT(bit) & RESERVED_BITS)) 308 + return; 309 + 310 + guard(mutex)(&callback_mutex); 311 + list_del_rcu(&work->list); 312 + 313 + /* Do not allow any more requests and prevent callbacks */ 314 + work->bit = -1; 315 + 316 + __clear_bit(bit, &unwind_mask); 317 + 318 + synchronize_srcu(&unwind_srcu); 319 + 320 + guard(rcu)(); 321 + /* Clear this bit from all threads */ 322 + for_each_process_thread(g, t) { 323 + clear_bit(bit, &t->unwind_info.unwind_mask); 324 + if (t->unwind_info.cache) 325 + clear_bit(bit, &t->unwind_info.cache->unwind_completed); 326 + } 327 + } 328 + 329 + int unwind_deferred_init(struct unwind_work *work, unwind_callback_t func) 330 + { 331 + memset(work, 0, sizeof(*work)); 332 + 333 + guard(mutex)(&callback_mutex); 334 + 335 + /* See if there's a bit in the mask available */ 336 + if (unwind_mask == ~0UL) 337 + return -EBUSY; 338 + 339 + work->bit = ffz(unwind_mask); 340 + __set_bit(work->bit, &unwind_mask); 341 + 342 + list_add_rcu(&work->list, &callbacks); 343 + work->func = func; 344 + return 0; 345 + } 346 + 347 + void unwind_task_init(struct task_struct *task) 348 + { 349 + struct unwind_task_info *info = &task->unwind_info; 350 + 351 + memset(info, 0, sizeof(*info)); 352 + init_task_work(&info->work, unwind_deferred_task_work); 353 + info->unwind_mask = 0; 354 + } 355 + 356 + void unwind_task_free(struct task_struct *task) 357 + { 358 + struct unwind_task_info *info = &task->unwind_info; 359 + 360 + kfree(info->cache); 361 + task_work_cancel(task, &info->work); 362 + }
+128
kernel/unwind/user.c
··· 1 + // SPDX-License-Identifier: GPL-2.0 2 + /* 3 + * Generic interfaces for unwinding user space 4 + */ 5 + #include <linux/kernel.h> 6 + #include <linux/sched.h> 7 + #include <linux/sched/task_stack.h> 8 + #include <linux/unwind_user.h> 9 + #include <linux/uaccess.h> 10 + 11 + static const struct unwind_user_frame fp_frame = { 12 + ARCH_INIT_USER_FP_FRAME 13 + }; 14 + 15 + #define for_each_user_frame(state) \ 16 + for (unwind_user_start(state); !(state)->done; unwind_user_next(state)) 17 + 18 + static int unwind_user_next_fp(struct unwind_user_state *state) 19 + { 20 + const struct unwind_user_frame *frame = &fp_frame; 21 + unsigned long cfa, fp, ra; 22 + unsigned int shift; 23 + 24 + if (frame->use_fp) { 25 + if (state->fp < state->sp) 26 + return -EINVAL; 27 + cfa = state->fp; 28 + } else { 29 + cfa = state->sp; 30 + } 31 + 32 + /* Get the Canonical Frame Address (CFA) */ 33 + cfa += frame->cfa_off; 34 + 35 + /* stack going in wrong direction? */ 36 + if (cfa <= state->sp) 37 + return -EINVAL; 38 + 39 + /* Make sure that the address is word aligned */ 40 + shift = sizeof(long) == 4 ? 2 : 3; 41 + if (cfa & ((1 << shift) - 1)) 42 + return -EINVAL; 43 + 44 + /* Find the Return Address (RA) */ 45 + if (get_user(ra, (unsigned long *)(cfa + frame->ra_off))) 46 + return -EINVAL; 47 + 48 + if (frame->fp_off && get_user(fp, (unsigned long __user *)(cfa + frame->fp_off))) 49 + return -EINVAL; 50 + 51 + state->ip = ra; 52 + state->sp = cfa; 53 + if (frame->fp_off) 54 + state->fp = fp; 55 + return 0; 56 + } 57 + 58 + static int unwind_user_next(struct unwind_user_state *state) 59 + { 60 + unsigned long iter_mask = state->available_types; 61 + unsigned int bit; 62 + 63 + if (state->done) 64 + return -EINVAL; 65 + 66 + for_each_set_bit(bit, &iter_mask, NR_UNWIND_USER_TYPE_BITS) { 67 + enum unwind_user_type type = BIT(bit); 68 + 69 + state->current_type = type; 70 + switch (type) { 71 + case UNWIND_USER_TYPE_FP: 72 + if (!unwind_user_next_fp(state)) 73 + return 0; 74 + continue; 75 + default: 76 + WARN_ONCE(1, "Undefined unwind bit %d", bit); 77 + break; 78 + } 79 + break; 80 + } 81 + 82 + /* No successful unwind method. */ 83 + state->current_type = UNWIND_USER_TYPE_NONE; 84 + state->done = true; 85 + return -EINVAL; 86 + } 87 + 88 + static int unwind_user_start(struct unwind_user_state *state) 89 + { 90 + struct pt_regs *regs = task_pt_regs(current); 91 + 92 + memset(state, 0, sizeof(*state)); 93 + 94 + if ((current->flags & PF_KTHREAD) || !user_mode(regs)) { 95 + state->done = true; 96 + return -EINVAL; 97 + } 98 + 99 + if (IS_ENABLED(CONFIG_HAVE_UNWIND_USER_FP)) 100 + state->available_types |= UNWIND_USER_TYPE_FP; 101 + 102 + state->ip = instruction_pointer(regs); 103 + state->sp = user_stack_pointer(regs); 104 + state->fp = frame_pointer(regs); 105 + 106 + return 0; 107 + } 108 + 109 + int unwind_user(struct unwind_stacktrace *trace, unsigned int max_entries) 110 + { 111 + struct unwind_user_state state; 112 + 113 + trace->nr = 0; 114 + 115 + if (!max_entries) 116 + return -EINVAL; 117 + 118 + if (current->flags & PF_KTHREAD) 119 + return 0; 120 + 121 + for_each_user_frame(&state) { 122 + trace->entries[trace->nr++] = state.ip; 123 + if (trace->nr >= max_entries) 124 + break; 125 + } 126 + 127 + return 0; 128 + }